diff options
| author | Mark Rutland <mark.rutland@arm.com> | 2021-12-17 10:10:07 +0000 |
|---|---|---|
| committer | Mark Rutland <mark.rutland@arm.com> | 2021-12-17 15:39:50 +0000 |
| commit | 488dd36233a4a0d80f516054ee894688fc8f447f (patch) | |
| tree | 589f921c2c9db71cbe7b3aee98059a71d262510a | |
| parent | 0fcfb00b28c0b7884635dacf38e46d60bf3d4eb1 (diff) | |
| download | linux-488dd36233a4a0d80f516054ee894688fc8f447f.tar.gz | |
WIP: add DEBUG_REFCOUNT
In preparation for adding refcounts with a biased value, add checks that
refcounts have been correctly initialised prior to their first use.
NOTE: this relies on modifying the size of sk_ubff::cb as otherwise
there's not enough space:
| net/core/skbuff.c: In function 'msg_zerocopy_alloc':
| ././include/linux/compiler_types.h:335:45: error: call to '__compiletime_assert_715' declared with attribute error: BUILD_BUG_ON failed: sizeof(*uarg) > sizeof(skb->cb)
... and I suspect people will not be happy about that growing.
So far this results in a boot-time panic:
| Booting Linux on physical CPU 0x0000000000 [0x431f0af1]
| Linux version 5.16.0-rc4-00002-gca7bbda3895b (mark@lakrids) (aarch64-linux-gcc (GCC) 11.1.0, GNU ld (GNU Binutils) 2.36.1) #24 SMP PREEMPT Fri Dec 17 12:40:24 GMT 2021
| Machine model: linux,dummy-virt
| earlycon: pl11 at MMIO 0x0000000009000000 (options '')
| printk: bootconsole [pl11] enabled
| efi: UEFI not found.
| Kernel panic - not syncing: BUG: refcount 0xffff59a07efed528 has bad magic 0x00000000
| CPU: 0 PID: 0 Comm: swapper Not tainted 5.16.0-rc4-00002-gca7bbda3895b #24
| Hardware name: linux,dummy-virt (DT)
| Call trace:
| dump_backtrace+0x0/0x1c0
| show_stack+0x1c/0x70
| dump_stack_lvl+0x68/0x84
| dump_stack+0x1c/0x38
| panic+0x15c/0x31c
| get_unaligned_be32+0x0/0x14
| kobject_get+0x84/0xf0
| of_node_get+0x24/0x40
| of_find_node_opts_by_path+0x17c/0x1a0
| of_alias_scan+0x40/0x24c
| unflatten_device_tree+0x44/0x54
| setup_arch+0x288/0x5f0
| start_kernel+0x84/0x65c
| __primary_switched+0xc0/0xc8
| ---[ end Kernel panic - not syncing: BUG: refcount 0xffff59a07efed528 has bad magic 0x00000000 ]---
... which is apparently a refcount_inc() on zero:
| [mark@lakrids:~/src/linux]% ARCH=arm64 CROSS_COMPILE=aarch64-linux- usekorg 11.1.0 ./scripts/faddr2line vmlinux kobject_get+0x84/0xf0
| kobject_get+0x84/0xf0:
| arch_static_branch_jump at arch/arm64/include/asm/jump_label.h:38
| (inlined by) system_uses_lse_atomics at arch/arm64/include/asm/lse.h:24
| (inlined by) arch_atomic_fetch_add_relaxed at arch/arm64/include/asm/atomic.h:49
| (inlined by) atomic_fetch_add_relaxed at include/linux/atomic/atomic-instrumented.h:112
| (inlined by) __refcount_add at include/linux/refcount.h:227
| (inlined by) __refcount_inc at include/linux/refcount.h:286
| (inlined by) refcount_inc at include/linux/refcount.h:304
| (inlined by) kref_get at include/linux/kref.h:45
| (inlined by) kobject_get at lib/kobject.c:659
TODO:
* Refactor the __refcount_* and refcount_* functions so that internal functions
don't duplicate the check.
* Add a refcount_init() for dynamically-allocated objects?
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
| -rw-r--r-- | include/linux/refcount.h | 66 | ||||
| -rw-r--r-- | include/linux/skbuff.h | 2 | ||||
| -rw-r--r-- | lib/Kconfig.debug | 5 | ||||
| -rw-r--r-- | lib/refcount.c | 27 |
4 files changed, 92 insertions, 8 deletions
diff --git a/include/linux/refcount.h b/include/linux/refcount.h index b8a6e387f8f94..de22a492a2f31 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -110,9 +110,35 @@ struct mutex; */ typedef struct refcount_struct { atomic_t refs; +#ifdef CONFIG_DEBUG_REFCOUNT + unsigned int magic; +#endif } refcount_t; -#define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), } +#ifdef CONFIG_DEBUG_REFCOUNT +#define REFCOUNT_MAGIC 0x04f10bad + +#define DEBUG_REFCOUNT_INIT() \ + .magic = REFCOUNT_MAGIC, + +void refcount_bad_magic(const refcount_t *r); + +static __always_inline void refcount_debug_before(const refcount_t *r) +{ + if (r->magic != REFCOUNT_MAGIC) + refcount_bad_magic(r); +} +#else +#define DEBUG_REFCOUNT_INIT() +static __always_inline void refcount_debug_before(const refcount_t *r) { } +#endif + +#define REFCOUNT_INIT(n) \ + (refcount_t){ \ + .refs = ATOMIC_INIT(n), \ + DEBUG_REFCOUNT_INIT() \ + } + #define REFCOUNT_MAX INT_MAX #define REFCOUNT_SATURATED (INT_MIN / 2) @@ -133,9 +159,20 @@ void refcount_warn_saturate(refcount_t *r, enum refcount_saturation_type t); */ static inline void refcount_set(refcount_t *r, int n) { + // TODO: move callers to refcount_init(), and check this atomic_set(&r->refs, n); } +static inline void refcount_init(refcount_t *r, int n) +{ + *r = REFCOUNT_INIT(n); +} + +static inline unsigned int __refcount_internal_read(const refcount_t *r) +{ + return atomic_read(&r->refs); +} + /** * refcount_read - get a refcount's value * @r: the refcount @@ -144,12 +181,17 @@ static inline void refcount_set(refcount_t *r, int n) */ static inline unsigned int refcount_read(const refcount_t *r) { - return atomic_read(&r->refs); + refcount_debug_before(r); + return __refcount_internal_read(r); } static inline __must_check bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp) { - int old = refcount_read(r); + int old; + + refcount_debug_before(r); + + old = __refcount_internal_read(r); do { if (!old) @@ -190,7 +232,11 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) static inline void __refcount_add(int i, refcount_t *r, int *oldp) { - int old = atomic_fetch_add_relaxed(i, &r->refs); + int old; + + refcount_debug_before(r); + + old = atomic_fetch_add_relaxed(i, &r->refs); if (oldp) *oldp = old; @@ -269,7 +315,11 @@ static inline void refcount_inc(refcount_t *r) static inline __must_check bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) { - int old = atomic_fetch_sub_release(i, &r->refs); + int old; + + refcount_debug_before(r); + + old = atomic_fetch_sub_release(i, &r->refs); if (oldp) *oldp = old; @@ -335,7 +385,11 @@ static inline __must_check bool refcount_dec_and_test(refcount_t *r) static inline void __refcount_dec(refcount_t *r, int *oldp) { - int old = atomic_fetch_sub_release(1, &r->refs); + int old; + + refcount_debug_before(r); + + old = atomic_fetch_sub_release(1, &r->refs); if (oldp) *oldp = old; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c8cb7e697d479..e1d788844629c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -760,7 +760,7 @@ struct sk_buff { * want to keep them across layers you have to do a skb_clone() * first. This is owned by whoever has the skb queued ATM. */ - char cb[48] __aligned(8); + char cb[64] __aligned(8); union { struct { diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 5c12bde10996c..f18b2182cc083 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1318,6 +1318,11 @@ config LOCK_STAT CONFIG_LOCK_STAT defines "contended" and "acquired" lock events. (CONFIG_LOCKDEP defines "acquire" and "release" events.) +config DEBUG_REFCOUNT + bool "Refcount debugging: basic checks" + help + Say Y here to catch missing refcount_t initialization. + config DEBUG_RT_MUTEXES bool "RT Mutex debugging, deadlock detection" depends on DEBUG_KERNEL && RT_MUTEXES diff --git a/lib/refcount.c b/lib/refcount.c index a207a8f22b3ca..3b37d49568d92 100644 --- a/lib/refcount.c +++ b/lib/refcount.c @@ -3,10 +3,11 @@ * Out-of-line refcount functions. */ +#include <linux/bug.h> #include <linux/mutex.h> +#include <linux/ratelimit.h> #include <linux/refcount.h> #include <linux/spinlock.h> -#include <linux/bug.h> #define REFCOUNT_WARN(str) WARN_ONCE(1, "refcount_t: " str ".\n") @@ -56,6 +57,8 @@ bool refcount_dec_if_one(refcount_t *r) { int val = 1; + refcount_debug_before(r); + return atomic_try_cmpxchg_release(&r->refs, &val, 0); } EXPORT_SYMBOL(refcount_dec_if_one); @@ -75,6 +78,9 @@ bool refcount_dec_not_one(refcount_t *r) { unsigned int new, val = atomic_read(&r->refs); + // TODO: move the read of `val`? + refcount_debug_before(r); + do { if (unlikely(val == REFCOUNT_SATURATED)) return true; @@ -112,6 +118,8 @@ EXPORT_SYMBOL(refcount_dec_not_one); */ bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock) { + refcount_debug_before(r); + if (refcount_dec_not_one(r)) return false; @@ -143,6 +151,8 @@ EXPORT_SYMBOL(refcount_dec_and_mutex_lock); */ bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock) { + refcount_debug_before(r); + if (refcount_dec_not_one(r)) return false; @@ -172,6 +182,8 @@ EXPORT_SYMBOL(refcount_dec_and_lock); bool refcount_dec_and_lock_irqsave(refcount_t *r, spinlock_t *lock, unsigned long *flags) { + refcount_debug_before(r); + if (refcount_dec_not_one(r)) return false; @@ -184,3 +196,16 @@ bool refcount_dec_and_lock_irqsave(refcount_t *r, spinlock_t *lock, return true; } EXPORT_SYMBOL(refcount_dec_and_lock_irqsave); + +#ifdef CONFIG_DEBUG_REFCOUNT +void refcount_bad_magic(const refcount_t *r) +{ + /* + * This *should* be a BUG() or panic(). This is only a WARN for the + * sake of estimating how many places in the kernel need to be fixed + * up. + */ + WARN_RATELIMIT(1, "BUG: refcount %pS has bad magic 0x%08x\n", + r, READ_ONCE(r->magic)); +} +#endif /* CONFIG_DEBUG_REFCOUNT */ |
