diff options
| author | Luis Chamberlain <mcgrof@kernel.org> | 2021-08-13 14:58:11 -0700 |
|---|---|---|
| committer | Luis Chamberlain <mcgrof@kernel.org> | 2021-08-13 16:14:51 -0700 |
| commit | da898d13309d7b2855a74ee4e49e0fb43a55a2f1 (patch) | |
| tree | e436123dddfadd48205c93b5fdebb4fb94315e23 | |
| parent | cef409dc5a81d3f045208be14d62ef4f2c65afa6 (diff) | |
| download | linux-next-da898d13309d7b2855a74ee4e49e0fb43a55a2f1.tar.gz | |
sysfs: fix deadlock race with module removal
When sysfs attributes use a lock also used on module removal we can
race to deadlock. This happens when for instance a sysfs file on
a driver is used, then at the same time we have module removal call
trigger. The module removal call code holds a lock, and then the sysfs
file entry waits for the same lock. While holding the lock the module
removal tries to remove the sysfs entries, but these cannot be removed
yet as one is waiting for a lock. This won't complete as the lock is
already held. Likewise module removal cannot complete, and so we deadlock.
This can now be easily reproducible with our sysfs selftest as follows:
./tools/testing/selftests/sysfs/sysfs.sh -t 0027
To fix this we just *try* to get a refcount to the module when a shared
lock is used, prior to mucking with a sysfs attribute. If this fails we
just give up right away.
We use a try method as a full lock means we'd then make our sysfs
attributes busy us out from possible module removal, and so userspace
could force denying module removal, a silly form of "DOS" against module
removal. A try lock on the module removal ensures we give priority to
module removal and interacting with sysfs attributes only comes second.
Using a full lock could mean for instance that if you don't stop poking
at sysfs files you cannot remove a module.
Races between removal of sysfs files and the module are not possible
given sysfs files are created by the same module, and when a sysfs file
is being used kernfs prevents removal of the sysfs file. So if module
removal is actually happening the removal would have to wait until
the sysfs file is done being used.
This deadlock was first reported with the zram driver, however the live
patching folks have acknowledged they have observed this as well with
live patching, when a live patch is removed. I was then able to
reproduce easily by creating a dedicated selftests.
A sketch of how this can happen follows:
CPU A CPU B
whatever_store()
module_unload
mutex_lock(foo)
mutex_lock(foo)
del_gendisk(zram->disk);
device_del()
device_remove_groups()
In this situation whatever_store() is waiting for the mutex foo to
become unlocked, but that won't happen until module removal is complete.
But module removal won't complete until the sysfs file being poked
completes which is waiting for a lock already held.
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
| -rw-r--r-- | fs/kernfs/dir.c | 24 | ||||
| -rw-r--r-- | fs/kernfs/file.c | 7 | ||||
| -rw-r--r-- | fs/kernfs/kernfs-internal.h | 1 | ||||
| -rw-r--r-- | fs/kernfs/symlink.c | 3 | ||||
| -rw-r--r-- | fs/sysfs/dir.c | 3 | ||||
| -rw-r--r-- | fs/sysfs/file.c | 4 | ||||
| -rw-r--r-- | fs/sysfs/group.c | 1 | ||||
| -rw-r--r-- | include/linux/kernfs.h | 19 | ||||
| -rw-r--r-- | include/linux/sysfs.h | 52 | ||||
| -rw-r--r-- | kernel/cgroup/cgroup.c | 2 |
10 files changed, 87 insertions, 29 deletions
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index ba581429bf7b2..e72d8c71b6600 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -14,6 +14,7 @@ #include <linux/slab.h> #include <linux/security.h> #include <linux/hash.h> +#include <linux/module.h> #include "kernfs-internal.h" @@ -417,9 +418,15 @@ struct kernfs_node *kernfs_get_active(struct kernfs_node *kn) if (unlikely(!kn)) return NULL; - if (!atomic_inc_unless_negative(&kn->active)) + if (unlikely(kn->owner && !try_module_get(kn->owner))) return NULL; + if (!atomic_inc_unless_negative(&kn->active)) { + if (kn->owner) + module_put(kn->owner); + return NULL; + } + if (kernfs_lockdep(kn)) rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_); return kn; @@ -442,6 +449,8 @@ void kernfs_put_active(struct kernfs_node *kn) if (kernfs_lockdep(kn)) rwsem_release(&kn->dep_map, _RET_IP_); v = atomic_dec_return(&kn->active); + if (kn->owner) + module_put(kn->owner); if (likely(v != KN_DEACTIVATED_BIAS)) return; @@ -571,6 +580,7 @@ struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry) static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, struct kernfs_node *parent, const char *name, umode_t mode, + struct module *owner, kuid_t uid, kgid_t gid, unsigned flags) { @@ -606,6 +616,7 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, kn->name = name; kn->mode = mode; + kn->owner = owner; kn->flags = flags; if (!uid_eq(uid, GLOBAL_ROOT_UID) || !gid_eq(gid, GLOBAL_ROOT_GID)) { @@ -639,13 +650,14 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, const char *name, umode_t mode, + struct module *owner, kuid_t uid, kgid_t gid, unsigned flags) { struct kernfs_node *kn; kn = __kernfs_new_node(kernfs_root(parent), parent, - name, mode, uid, gid, flags); + name, mode, owner, uid, gid, flags); if (kn) { kernfs_get(parent); kn->parent = parent; @@ -926,7 +938,7 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, root->id_highbits = 1; kn = __kernfs_new_node(root, NULL, "", S_IFDIR | S_IRUGO | S_IXUGO, - GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, + NULL, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, KERNFS_DIR); if (!kn) { idr_destroy(&root->ino_idr); @@ -965,6 +977,7 @@ void kernfs_destroy_root(struct kernfs_root *root) * @parent: parent in which to create a new directory * @name: name of the new directory * @mode: mode of the new directory + * @owner: if set, the module owner of this directory * @uid: uid of the new directory * @gid: gid of the new directory * @priv: opaque data associated with the new directory @@ -974,6 +987,7 @@ void kernfs_destroy_root(struct kernfs_root *root) */ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, + struct module *owner, kuid_t uid, kgid_t gid, void *priv, const void *ns) { @@ -982,7 +996,7 @@ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, /* allocate */ kn = kernfs_new_node(parent, name, mode | S_IFDIR, - uid, gid, KERNFS_DIR); + owner, uid, gid, KERNFS_DIR); if (!kn) return ERR_PTR(-ENOMEM); @@ -1014,7 +1028,7 @@ struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, /* allocate */ kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR, - GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, KERNFS_DIR); + NULL, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, KERNFS_DIR); if (!kn) return ERR_PTR(-ENOMEM); diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 4479c6580333a..73be39ddc856c 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -974,6 +974,7 @@ const struct file_operations kernfs_file_fops = { * @uid: uid of the file * @gid: gid of the file * @size: size of the file + * @owner: if set, the module owner of the file * @ops: kernfs operations for the file * @priv: private data for the file * @ns: optional namespace tag of the file @@ -983,7 +984,9 @@ const struct file_operations kernfs_file_fops = { */ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, const char *name, - umode_t mode, kuid_t uid, kgid_t gid, + umode_t mode, + struct module *owner, + kuid_t uid, kgid_t gid, loff_t size, const struct kernfs_ops *ops, void *priv, const void *ns, @@ -996,7 +999,7 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, flags = KERNFS_FILE; kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG, - uid, gid, flags); + owner, uid, gid, flags); if (!kn) return ERR_PTR(-ENOMEM); diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index 80fe90a280724..e128a14aa8628 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -133,6 +133,7 @@ void kernfs_put_active(struct kernfs_node *kn); int kernfs_add_one(struct kernfs_node *kn); struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, const char *name, umode_t mode, + struct module *owner, kuid_t uid, kgid_t gid, unsigned flags); diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c index c8f8e41b84110..336c9681692a2 100644 --- a/fs/kernfs/symlink.c +++ b/fs/kernfs/symlink.c @@ -36,7 +36,8 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, gid = target->iattr->ia_gid; } - kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO, uid, gid, + kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO, + target->owner, uid, gid, KERNFS_LINK); if (!kn) return ERR_PTR(-ENOMEM); diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 59dffd5ca5178..5aff6ff07392c 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -57,7 +57,8 @@ int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) kobject_get_ownership(kobj, &uid, &gid); kn = kernfs_create_dir_ns(parent, kobject_name(kobj), - S_IRWXU | S_IRUGO | S_IXUGO, uid, gid, + S_IRWXU | S_IRUGO | S_IXUGO, NULL, + uid, gid, kobj, ns); if (IS_ERR(kn)) { if (PTR_ERR(kn) == -EEXIST) diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index d019d6ac6ad09..1fe72b0ff3760 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -314,8 +314,8 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent, key = attr->key ?: (struct lock_class_key *)&attr->skey; #endif - kn = __kernfs_create_file(parent, attr->name, mode & 0777, uid, gid, - size, ops, (void *)attr, ns, key); + kn = __kernfs_create_file(parent, attr->name, mode & 0777, attr->owner, + uid, gid, size, ops, (void *)attr, ns, key); if (IS_ERR(kn)) { if (PTR_ERR(kn) == -EEXIST) sysfs_warn_dup(parent, attr->name); diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index f29d620045272..4031e389f21fc 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -136,6 +136,7 @@ static int internal_create_group(struct kobject *kobj, int update, } else { kn = kernfs_create_dir_ns(kobj->sd, grp->name, S_IRWXU | S_IRUGO | S_IXUGO, + grp->owner, uid, gid, kobj, NULL); if (IS_ERR(kn)) { if (PTR_ERR(kn) == -EEXIST) diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 7c6838ba08e6b..9e744f4899345 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -160,6 +160,7 @@ struct kernfs_node { unsigned short flags; umode_t mode; + struct module *owner; struct kernfs_iattrs *iattr; }; @@ -373,12 +374,14 @@ void kernfs_destroy_root(struct kernfs_root *root); struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, + struct module *owner, kuid_t uid, kgid_t gid, void *priv, const void *ns); struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, const char *name); struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode, + struct module *owner, kuid_t uid, kgid_t gid, loff_t size, const struct kernfs_ops *ops, @@ -475,13 +478,15 @@ static inline void kernfs_destroy_root(struct kernfs_root *root) { } static inline struct kernfs_node * kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, - umode_t mode, kuid_t uid, kgid_t gid, + umode_t mode, struct module *owner, + kuid_t uid, kgid_t gid, void *priv, const void *ns) { return ERR_PTR(-ENOSYS); } static inline struct kernfs_node * __kernfs_create_file(struct kernfs_node *parent, const char *name, - umode_t mode, kuid_t uid, kgid_t gid, + umode_t mode, struct module *owner, + kuid_t uid, kgid_t gid, loff_t size, const struct kernfs_ops *ops, void *priv, const void *ns, struct lock_class_key *key) { return ERR_PTR(-ENOSYS); } @@ -568,14 +573,15 @@ static inline struct kernfs_node * kernfs_create_dir(struct kernfs_node *parent, const char *name, umode_t mode, void *priv) { - return kernfs_create_dir_ns(parent, name, mode, + return kernfs_create_dir_ns(parent, name, mode, parent->owner, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, priv, NULL); } static inline struct kernfs_node * kernfs_create_file_ns(struct kernfs_node *parent, const char *name, - umode_t mode, kuid_t uid, kgid_t gid, + umode_t mode, struct module *owner, + kuid_t uid, kgid_t gid, loff_t size, const struct kernfs_ops *ops, void *priv, const void *ns) { @@ -584,15 +590,16 @@ kernfs_create_file_ns(struct kernfs_node *parent, const char *name, #ifdef CONFIG_DEBUG_LOCK_ALLOC key = (struct lock_class_key *)&ops->lockdep_key; #endif - return __kernfs_create_file(parent, name, mode, uid, gid, + return __kernfs_create_file(parent, name, mode, owner, uid, gid, size, ops, priv, ns, key); } static inline struct kernfs_node * kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode, + struct module *owner, loff_t size, const struct kernfs_ops *ops, void *priv) { - return kernfs_create_file_ns(parent, name, mode, + return kernfs_create_file_ns(parent, name, mode, owner, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, size, ops, priv, NULL); } diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index e3f1e8ac1f85b..b816a497cb646 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -30,6 +30,7 @@ enum kobj_ns_type; struct attribute { const char *name; umode_t mode; + struct module *owner; #ifdef CONFIG_DEBUG_LOCK_ALLOC bool ignore_lockdep:1; struct lock_class_key *key; @@ -80,6 +81,7 @@ do { \ * @attrs: Pointer to NULL terminated list of attributes. * @bin_attrs: Pointer to NULL terminated list of binary attributes. * Either attrs or bin_attrs or both must be provided. + * @module: If set, module responsible for this attribute group */ struct attribute_group { const char *name; @@ -89,6 +91,7 @@ struct attribute_group { struct bin_attribute *, int); struct attribute **attrs; struct bin_attribute **bin_attrs; + struct module *owner; }; /* @@ -100,38 +103,52 @@ struct attribute_group { #define __ATTR(_name, _mode, _show, _store) { \ .attr = {.name = __stringify(_name), \ - .mode = VERIFY_OCTAL_PERMISSIONS(_mode) }, \ + .mode = VERIFY_OCTAL_PERMISSIONS(_mode), \ + .owner = THIS_MODULE, \ + }, \ .show = _show, \ .store = _store, \ } #define __ATTR_PREALLOC(_name, _mode, _show, _store) { \ .attr = {.name = __stringify(_name), \ - .mode = SYSFS_PREALLOC | VERIFY_OCTAL_PERMISSIONS(_mode) },\ + .mode = SYSFS_PREALLOC | VERIFY_OCTAL_PERMISSIONS(_mode),\ + .owner = THIS_MODULE, \ + }, \ .show = _show, \ .store = _store, \ } #define __ATTR_RO(_name) { \ - .attr = { .name = __stringify(_name), .mode = 0444 }, \ + .attr = { .name = __stringify(_name), \ + .mode = 0444, \ + .owner = THIS_MODULE, \ + }, \ .show = _name##_show, \ } #define __ATTR_RO_MODE(_name, _mode) { \ .attr = { .name = __stringify(_name), \ - .mode = VERIFY_OCTAL_PERMISSIONS(_mode) }, \ + .mode = VERIFY_OCTAL_PERMISSIONS(_mode), \ + .owner = THIS_MODULE, \ + }, \ .show = _name##_show, \ } #define __ATTR_RW_MODE(_name, _mode) { \ .attr = { .name = __stringify(_name), \ - .mode = VERIFY_OCTAL_PERMISSIONS(_mode) }, \ + .mode = VERIFY_OCTAL_PERMISSIONS(_mode), \ + .owner = THIS_MODULE, \ + }, \ .show = _name##_show, \ .store = _name##_store, \ } #define __ATTR_WO(_name) { \ - .attr = { .name = __stringify(_name), .mode = 0200 }, \ + .attr = { .name = __stringify(_name), \ + .mode = 0200, \ + .owner = THIS_MODULE, \ + }, \ .store = _name##_store, \ } @@ -141,8 +158,11 @@ struct attribute_group { #ifdef CONFIG_DEBUG_LOCK_ALLOC #define __ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) { \ - .attr = {.name = __stringify(_name), .mode = _mode, \ - .ignore_lockdep = true }, \ + .attr = {.name = __stringify(_name), \ + .mode = _mode, \ + .ignore_lockdep = true, \ + .owner = THIS_MODULE, \ + }, \ .show = _show, \ .store = _store, \ } @@ -159,6 +179,7 @@ static const struct attribute_group *_name##_groups[] = { \ #define ATTRIBUTE_GROUPS(_name) \ static const struct attribute_group _name##_group = { \ .attrs = _name##_attrs, \ + .owner = THIS_MODULE, \ }; \ __ATTRIBUTE_GROUPS(_name) @@ -199,20 +220,29 @@ struct bin_attribute { /* macros to create static binary attributes easier */ #define __BIN_ATTR(_name, _mode, _read, _write, _size) { \ - .attr = { .name = __stringify(_name), .mode = _mode }, \ + .attr = { .name = __stringify(_name), \ + .mode = _mode, \ + .owner = THIS_MODULE, \ + }, \ .read = _read, \ .write = _write, \ .size = _size, \ } #define __BIN_ATTR_RO(_name, _size) { \ - .attr = { .name = __stringify(_name), .mode = 0444 }, \ + .attr = { .name = __stringify(_name), \ + .mode = 0444, \ + .owner = THIS_MODULE, \ + }, \ .read = _name##_read, \ .size = _size, \ } #define __BIN_ATTR_WO(_name, _size) { \ - .attr = { .name = __stringify(_name), .mode = 0200 }, \ + .attr = { .name = __stringify(_name), \ + .mode = 0200, \ + .owner = THIS_MODULE, \ + }, \ .write = _name##_write, \ .size = _size, \ } diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index d0725c1a8db5e..bbcf1b14cf429 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3935,7 +3935,7 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp, key = &cft->lockdep_key; #endif kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name), - cgroup_file_mode(cft), + cgroup_file_mode(cft), NULL, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0, cft->kf_ops, cft, NULL, key); |
