aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Rutland <mark.rutland@arm.com>2021-12-17 10:10:07 +0000
committerMark Rutland <mark.rutland@arm.com>2021-12-17 15:39:50 +0000
commit488dd36233a4a0d80f516054ee894688fc8f447f (patch)
tree589f921c2c9db71cbe7b3aee98059a71d262510a
parent0fcfb00b28c0b7884635dacf38e46d60bf3d4eb1 (diff)
downloadlinux-488dd36233a4a0d80f516054ee894688fc8f447f.tar.gz
WIP: add DEBUG_REFCOUNT
In preparation for adding refcounts with a biased value, add checks that refcounts have been correctly initialised prior to their first use. NOTE: this relies on modifying the size of sk_ubff::cb as otherwise there's not enough space: | net/core/skbuff.c: In function 'msg_zerocopy_alloc': | ././include/linux/compiler_types.h:335:45: error: call to '__compiletime_assert_715' declared with attribute error: BUILD_BUG_ON failed: sizeof(*uarg) > sizeof(skb->cb) ... and I suspect people will not be happy about that growing. So far this results in a boot-time panic: | Booting Linux on physical CPU 0x0000000000 [0x431f0af1] | Linux version 5.16.0-rc4-00002-gca7bbda3895b (mark@lakrids) (aarch64-linux-gcc (GCC) 11.1.0, GNU ld (GNU Binutils) 2.36.1) #24 SMP PREEMPT Fri Dec 17 12:40:24 GMT 2021 | Machine model: linux,dummy-virt | earlycon: pl11 at MMIO 0x0000000009000000 (options '') | printk: bootconsole [pl11] enabled | efi: UEFI not found. | Kernel panic - not syncing: BUG: refcount 0xffff59a07efed528 has bad magic 0x00000000 | CPU: 0 PID: 0 Comm: swapper Not tainted 5.16.0-rc4-00002-gca7bbda3895b #24 | Hardware name: linux,dummy-virt (DT) | Call trace: | dump_backtrace+0x0/0x1c0 | show_stack+0x1c/0x70 | dump_stack_lvl+0x68/0x84 | dump_stack+0x1c/0x38 | panic+0x15c/0x31c | get_unaligned_be32+0x0/0x14 | kobject_get+0x84/0xf0 | of_node_get+0x24/0x40 | of_find_node_opts_by_path+0x17c/0x1a0 | of_alias_scan+0x40/0x24c | unflatten_device_tree+0x44/0x54 | setup_arch+0x288/0x5f0 | start_kernel+0x84/0x65c | __primary_switched+0xc0/0xc8 | ---[ end Kernel panic - not syncing: BUG: refcount 0xffff59a07efed528 has bad magic 0x00000000 ]--- ... which is apparently a refcount_inc() on zero: | [mark@lakrids:~/src/linux]% ARCH=arm64 CROSS_COMPILE=aarch64-linux- usekorg 11.1.0 ./scripts/faddr2line vmlinux kobject_get+0x84/0xf0 | kobject_get+0x84/0xf0: | arch_static_branch_jump at arch/arm64/include/asm/jump_label.h:38 | (inlined by) system_uses_lse_atomics at arch/arm64/include/asm/lse.h:24 | (inlined by) arch_atomic_fetch_add_relaxed at arch/arm64/include/asm/atomic.h:49 | (inlined by) atomic_fetch_add_relaxed at include/linux/atomic/atomic-instrumented.h:112 | (inlined by) __refcount_add at include/linux/refcount.h:227 | (inlined by) __refcount_inc at include/linux/refcount.h:286 | (inlined by) refcount_inc at include/linux/refcount.h:304 | (inlined by) kref_get at include/linux/kref.h:45 | (inlined by) kobject_get at lib/kobject.c:659 TODO: * Refactor the __refcount_* and refcount_* functions so that internal functions don't duplicate the check. * Add a refcount_init() for dynamically-allocated objects? Signed-off-by: Mark Rutland <mark.rutland@arm.com>
-rw-r--r--include/linux/refcount.h66
-rw-r--r--include/linux/skbuff.h2
-rw-r--r--lib/Kconfig.debug5
-rw-r--r--lib/refcount.c27
4 files changed, 92 insertions, 8 deletions
diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index b8a6e387f8f94..de22a492a2f31 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -110,9 +110,35 @@ struct mutex;
*/
typedef struct refcount_struct {
atomic_t refs;
+#ifdef CONFIG_DEBUG_REFCOUNT
+ unsigned int magic;
+#endif
} refcount_t;
-#define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), }
+#ifdef CONFIG_DEBUG_REFCOUNT
+#define REFCOUNT_MAGIC 0x04f10bad
+
+#define DEBUG_REFCOUNT_INIT() \
+ .magic = REFCOUNT_MAGIC,
+
+void refcount_bad_magic(const refcount_t *r);
+
+static __always_inline void refcount_debug_before(const refcount_t *r)
+{
+ if (r->magic != REFCOUNT_MAGIC)
+ refcount_bad_magic(r);
+}
+#else
+#define DEBUG_REFCOUNT_INIT()
+static __always_inline void refcount_debug_before(const refcount_t *r) { }
+#endif
+
+#define REFCOUNT_INIT(n) \
+ (refcount_t){ \
+ .refs = ATOMIC_INIT(n), \
+ DEBUG_REFCOUNT_INIT() \
+ }
+
#define REFCOUNT_MAX INT_MAX
#define REFCOUNT_SATURATED (INT_MIN / 2)
@@ -133,9 +159,20 @@ void refcount_warn_saturate(refcount_t *r, enum refcount_saturation_type t);
*/
static inline void refcount_set(refcount_t *r, int n)
{
+ // TODO: move callers to refcount_init(), and check this
atomic_set(&r->refs, n);
}
+static inline void refcount_init(refcount_t *r, int n)
+{
+ *r = REFCOUNT_INIT(n);
+}
+
+static inline unsigned int __refcount_internal_read(const refcount_t *r)
+{
+ return atomic_read(&r->refs);
+}
+
/**
* refcount_read - get a refcount's value
* @r: the refcount
@@ -144,12 +181,17 @@ static inline void refcount_set(refcount_t *r, int n)
*/
static inline unsigned int refcount_read(const refcount_t *r)
{
- return atomic_read(&r->refs);
+ refcount_debug_before(r);
+ return __refcount_internal_read(r);
}
static inline __must_check bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp)
{
- int old = refcount_read(r);
+ int old;
+
+ refcount_debug_before(r);
+
+ old = __refcount_internal_read(r);
do {
if (!old)
@@ -190,7 +232,11 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r)
static inline void __refcount_add(int i, refcount_t *r, int *oldp)
{
- int old = atomic_fetch_add_relaxed(i, &r->refs);
+ int old;
+
+ refcount_debug_before(r);
+
+ old = atomic_fetch_add_relaxed(i, &r->refs);
if (oldp)
*oldp = old;
@@ -269,7 +315,11 @@ static inline void refcount_inc(refcount_t *r)
static inline __must_check bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp)
{
- int old = atomic_fetch_sub_release(i, &r->refs);
+ int old;
+
+ refcount_debug_before(r);
+
+ old = atomic_fetch_sub_release(i, &r->refs);
if (oldp)
*oldp = old;
@@ -335,7 +385,11 @@ static inline __must_check bool refcount_dec_and_test(refcount_t *r)
static inline void __refcount_dec(refcount_t *r, int *oldp)
{
- int old = atomic_fetch_sub_release(1, &r->refs);
+ int old;
+
+ refcount_debug_before(r);
+
+ old = atomic_fetch_sub_release(1, &r->refs);
if (oldp)
*oldp = old;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c8cb7e697d479..e1d788844629c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -760,7 +760,7 @@ struct sk_buff {
* want to keep them across layers you have to do a skb_clone()
* first. This is owned by whoever has the skb queued ATM.
*/
- char cb[48] __aligned(8);
+ char cb[64] __aligned(8);
union {
struct {
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 5c12bde10996c..f18b2182cc083 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1318,6 +1318,11 @@ config LOCK_STAT
CONFIG_LOCK_STAT defines "contended" and "acquired" lock events.
(CONFIG_LOCKDEP defines "acquire" and "release" events.)
+config DEBUG_REFCOUNT
+ bool "Refcount debugging: basic checks"
+ help
+ Say Y here to catch missing refcount_t initialization.
+
config DEBUG_RT_MUTEXES
bool "RT Mutex debugging, deadlock detection"
depends on DEBUG_KERNEL && RT_MUTEXES
diff --git a/lib/refcount.c b/lib/refcount.c
index a207a8f22b3ca..3b37d49568d92 100644
--- a/lib/refcount.c
+++ b/lib/refcount.c
@@ -3,10 +3,11 @@
* Out-of-line refcount functions.
*/
+#include <linux/bug.h>
#include <linux/mutex.h>
+#include <linux/ratelimit.h>
#include <linux/refcount.h>
#include <linux/spinlock.h>
-#include <linux/bug.h>
#define REFCOUNT_WARN(str) WARN_ONCE(1, "refcount_t: " str ".\n")
@@ -56,6 +57,8 @@ bool refcount_dec_if_one(refcount_t *r)
{
int val = 1;
+ refcount_debug_before(r);
+
return atomic_try_cmpxchg_release(&r->refs, &val, 0);
}
EXPORT_SYMBOL(refcount_dec_if_one);
@@ -75,6 +78,9 @@ bool refcount_dec_not_one(refcount_t *r)
{
unsigned int new, val = atomic_read(&r->refs);
+ // TODO: move the read of `val`?
+ refcount_debug_before(r);
+
do {
if (unlikely(val == REFCOUNT_SATURATED))
return true;
@@ -112,6 +118,8 @@ EXPORT_SYMBOL(refcount_dec_not_one);
*/
bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock)
{
+ refcount_debug_before(r);
+
if (refcount_dec_not_one(r))
return false;
@@ -143,6 +151,8 @@ EXPORT_SYMBOL(refcount_dec_and_mutex_lock);
*/
bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock)
{
+ refcount_debug_before(r);
+
if (refcount_dec_not_one(r))
return false;
@@ -172,6 +182,8 @@ EXPORT_SYMBOL(refcount_dec_and_lock);
bool refcount_dec_and_lock_irqsave(refcount_t *r, spinlock_t *lock,
unsigned long *flags)
{
+ refcount_debug_before(r);
+
if (refcount_dec_not_one(r))
return false;
@@ -184,3 +196,16 @@ bool refcount_dec_and_lock_irqsave(refcount_t *r, spinlock_t *lock,
return true;
}
EXPORT_SYMBOL(refcount_dec_and_lock_irqsave);
+
+#ifdef CONFIG_DEBUG_REFCOUNT
+void refcount_bad_magic(const refcount_t *r)
+{
+ /*
+ * This *should* be a BUG() or panic(). This is only a WARN for the
+ * sake of estimating how many places in the kernel need to be fixed
+ * up.
+ */
+ WARN_RATELIMIT(1, "BUG: refcount %pS has bad magic 0x%08x\n",
+ r, READ_ONCE(r->magic));
+}
+#endif /* CONFIG_DEBUG_REFCOUNT */