aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLuis Chamberlain <mcgrof@kernel.org>2021-08-13 14:58:11 -0700
committerLuis Chamberlain <mcgrof@kernel.org>2021-08-13 16:14:51 -0700
commitda898d13309d7b2855a74ee4e49e0fb43a55a2f1 (patch)
treee436123dddfadd48205c93b5fdebb4fb94315e23
parentcef409dc5a81d3f045208be14d62ef4f2c65afa6 (diff)
downloadlinux-next-da898d13309d7b2855a74ee4e49e0fb43a55a2f1.tar.gz
sysfs: fix deadlock race with module removal
When sysfs attributes use a lock also used on module removal we can race to deadlock. This happens when for instance a sysfs file on a driver is used, then at the same time we have module removal call trigger. The module removal call code holds a lock, and then the sysfs file entry waits for the same lock. While holding the lock the module removal tries to remove the sysfs entries, but these cannot be removed yet as one is waiting for a lock. This won't complete as the lock is already held. Likewise module removal cannot complete, and so we deadlock. This can now be easily reproducible with our sysfs selftest as follows: ./tools/testing/selftests/sysfs/sysfs.sh -t 0027 To fix this we just *try* to get a refcount to the module when a shared lock is used, prior to mucking with a sysfs attribute. If this fails we just give up right away. We use a try method as a full lock means we'd then make our sysfs attributes busy us out from possible module removal, and so userspace could force denying module removal, a silly form of "DOS" against module removal. A try lock on the module removal ensures we give priority to module removal and interacting with sysfs attributes only comes second. Using a full lock could mean for instance that if you don't stop poking at sysfs files you cannot remove a module. Races between removal of sysfs files and the module are not possible given sysfs files are created by the same module, and when a sysfs file is being used kernfs prevents removal of the sysfs file. So if module removal is actually happening the removal would have to wait until the sysfs file is done being used. This deadlock was first reported with the zram driver, however the live patching folks have acknowledged they have observed this as well with live patching, when a live patch is removed. I was then able to reproduce easily by creating a dedicated selftests. A sketch of how this can happen follows: CPU A CPU B whatever_store() module_unload mutex_lock(foo) mutex_lock(foo) del_gendisk(zram->disk); device_del() device_remove_groups() In this situation whatever_store() is waiting for the mutex foo to become unlocked, but that won't happen until module removal is complete. But module removal won't complete until the sysfs file being poked completes which is waiting for a lock already held. Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
-rw-r--r--fs/kernfs/dir.c24
-rw-r--r--fs/kernfs/file.c7
-rw-r--r--fs/kernfs/kernfs-internal.h1
-rw-r--r--fs/kernfs/symlink.c3
-rw-r--r--fs/sysfs/dir.c3
-rw-r--r--fs/sysfs/file.c4
-rw-r--r--fs/sysfs/group.c1
-rw-r--r--include/linux/kernfs.h19
-rw-r--r--include/linux/sysfs.h52
-rw-r--r--kernel/cgroup/cgroup.c2
10 files changed, 87 insertions, 29 deletions
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index ba581429bf7b2..e72d8c71b6600 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -14,6 +14,7 @@
#include <linux/slab.h>
#include <linux/security.h>
#include <linux/hash.h>
+#include <linux/module.h>
#include "kernfs-internal.h"
@@ -417,9 +418,15 @@ struct kernfs_node *kernfs_get_active(struct kernfs_node *kn)
if (unlikely(!kn))
return NULL;
- if (!atomic_inc_unless_negative(&kn->active))
+ if (unlikely(kn->owner && !try_module_get(kn->owner)))
return NULL;
+ if (!atomic_inc_unless_negative(&kn->active)) {
+ if (kn->owner)
+ module_put(kn->owner);
+ return NULL;
+ }
+
if (kernfs_lockdep(kn))
rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_);
return kn;
@@ -442,6 +449,8 @@ void kernfs_put_active(struct kernfs_node *kn)
if (kernfs_lockdep(kn))
rwsem_release(&kn->dep_map, _RET_IP_);
v = atomic_dec_return(&kn->active);
+ if (kn->owner)
+ module_put(kn->owner);
if (likely(v != KN_DEACTIVATED_BIAS))
return;
@@ -571,6 +580,7 @@ struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry)
static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
struct kernfs_node *parent,
const char *name, umode_t mode,
+ struct module *owner,
kuid_t uid, kgid_t gid,
unsigned flags)
{
@@ -606,6 +616,7 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
kn->name = name;
kn->mode = mode;
+ kn->owner = owner;
kn->flags = flags;
if (!uid_eq(uid, GLOBAL_ROOT_UID) || !gid_eq(gid, GLOBAL_ROOT_GID)) {
@@ -639,13 +650,14 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
const char *name, umode_t mode,
+ struct module *owner,
kuid_t uid, kgid_t gid,
unsigned flags)
{
struct kernfs_node *kn;
kn = __kernfs_new_node(kernfs_root(parent), parent,
- name, mode, uid, gid, flags);
+ name, mode, owner, uid, gid, flags);
if (kn) {
kernfs_get(parent);
kn->parent = parent;
@@ -926,7 +938,7 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
root->id_highbits = 1;
kn = __kernfs_new_node(root, NULL, "", S_IFDIR | S_IRUGO | S_IXUGO,
- GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+ NULL, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
KERNFS_DIR);
if (!kn) {
idr_destroy(&root->ino_idr);
@@ -965,6 +977,7 @@ void kernfs_destroy_root(struct kernfs_root *root)
* @parent: parent in which to create a new directory
* @name: name of the new directory
* @mode: mode of the new directory
+ * @owner: if set, the module owner of this directory
* @uid: uid of the new directory
* @gid: gid of the new directory
* @priv: opaque data associated with the new directory
@@ -974,6 +987,7 @@ void kernfs_destroy_root(struct kernfs_root *root)
*/
struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
const char *name, umode_t mode,
+ struct module *owner,
kuid_t uid, kgid_t gid,
void *priv, const void *ns)
{
@@ -982,7 +996,7 @@ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
/* allocate */
kn = kernfs_new_node(parent, name, mode | S_IFDIR,
- uid, gid, KERNFS_DIR);
+ owner, uid, gid, KERNFS_DIR);
if (!kn)
return ERR_PTR(-ENOMEM);
@@ -1014,7 +1028,7 @@ struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
/* allocate */
kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR,
- GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, KERNFS_DIR);
+ NULL, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, KERNFS_DIR);
if (!kn)
return ERR_PTR(-ENOMEM);
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 4479c6580333a..73be39ddc856c 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -974,6 +974,7 @@ const struct file_operations kernfs_file_fops = {
* @uid: uid of the file
* @gid: gid of the file
* @size: size of the file
+ * @owner: if set, the module owner of the file
* @ops: kernfs operations for the file
* @priv: private data for the file
* @ns: optional namespace tag of the file
@@ -983,7 +984,9 @@ const struct file_operations kernfs_file_fops = {
*/
struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
const char *name,
- umode_t mode, kuid_t uid, kgid_t gid,
+ umode_t mode,
+ struct module *owner,
+ kuid_t uid, kgid_t gid,
loff_t size,
const struct kernfs_ops *ops,
void *priv, const void *ns,
@@ -996,7 +999,7 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
flags = KERNFS_FILE;
kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG,
- uid, gid, flags);
+ owner, uid, gid, flags);
if (!kn)
return ERR_PTR(-ENOMEM);
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 80fe90a280724..e128a14aa8628 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -133,6 +133,7 @@ void kernfs_put_active(struct kernfs_node *kn);
int kernfs_add_one(struct kernfs_node *kn);
struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
const char *name, umode_t mode,
+ struct module *owner,
kuid_t uid, kgid_t gid,
unsigned flags);
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c
index c8f8e41b84110..336c9681692a2 100644
--- a/fs/kernfs/symlink.c
+++ b/fs/kernfs/symlink.c
@@ -36,7 +36,8 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent,
gid = target->iattr->ia_gid;
}
- kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO, uid, gid,
+ kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO,
+ target->owner, uid, gid,
KERNFS_LINK);
if (!kn)
return ERR_PTR(-ENOMEM);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 59dffd5ca5178..5aff6ff07392c 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -57,7 +57,8 @@ int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
kobject_get_ownership(kobj, &uid, &gid);
kn = kernfs_create_dir_ns(parent, kobject_name(kobj),
- S_IRWXU | S_IRUGO | S_IXUGO, uid, gid,
+ S_IRWXU | S_IRUGO | S_IXUGO, NULL,
+ uid, gid,
kobj, ns);
if (IS_ERR(kn)) {
if (PTR_ERR(kn) == -EEXIST)
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index d019d6ac6ad09..1fe72b0ff3760 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -314,8 +314,8 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
key = attr->key ?: (struct lock_class_key *)&attr->skey;
#endif
- kn = __kernfs_create_file(parent, attr->name, mode & 0777, uid, gid,
- size, ops, (void *)attr, ns, key);
+ kn = __kernfs_create_file(parent, attr->name, mode & 0777, attr->owner,
+ uid, gid, size, ops, (void *)attr, ns, key);
if (IS_ERR(kn)) {
if (PTR_ERR(kn) == -EEXIST)
sysfs_warn_dup(parent, attr->name);
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index f29d620045272..4031e389f21fc 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -136,6 +136,7 @@ static int internal_create_group(struct kobject *kobj, int update,
} else {
kn = kernfs_create_dir_ns(kobj->sd, grp->name,
S_IRWXU | S_IRUGO | S_IXUGO,
+ grp->owner,
uid, gid, kobj, NULL);
if (IS_ERR(kn)) {
if (PTR_ERR(kn) == -EEXIST)
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 7c6838ba08e6b..9e744f4899345 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -160,6 +160,7 @@ struct kernfs_node {
unsigned short flags;
umode_t mode;
+ struct module *owner;
struct kernfs_iattrs *iattr;
};
@@ -373,12 +374,14 @@ void kernfs_destroy_root(struct kernfs_root *root);
struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
const char *name, umode_t mode,
+ struct module *owner,
kuid_t uid, kgid_t gid,
void *priv, const void *ns);
struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
const char *name);
struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
const char *name, umode_t mode,
+ struct module *owner,
kuid_t uid, kgid_t gid,
loff_t size,
const struct kernfs_ops *ops,
@@ -475,13 +478,15 @@ static inline void kernfs_destroy_root(struct kernfs_root *root) { }
static inline struct kernfs_node *
kernfs_create_dir_ns(struct kernfs_node *parent, const char *name,
- umode_t mode, kuid_t uid, kgid_t gid,
+ umode_t mode, struct module *owner,
+ kuid_t uid, kgid_t gid,
void *priv, const void *ns)
{ return ERR_PTR(-ENOSYS); }
static inline struct kernfs_node *
__kernfs_create_file(struct kernfs_node *parent, const char *name,
- umode_t mode, kuid_t uid, kgid_t gid,
+ umode_t mode, struct module *owner,
+ kuid_t uid, kgid_t gid,
loff_t size, const struct kernfs_ops *ops,
void *priv, const void *ns, struct lock_class_key *key)
{ return ERR_PTR(-ENOSYS); }
@@ -568,14 +573,15 @@ static inline struct kernfs_node *
kernfs_create_dir(struct kernfs_node *parent, const char *name, umode_t mode,
void *priv)
{
- return kernfs_create_dir_ns(parent, name, mode,
+ return kernfs_create_dir_ns(parent, name, mode, parent->owner,
GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
priv, NULL);
}
static inline struct kernfs_node *
kernfs_create_file_ns(struct kernfs_node *parent, const char *name,
- umode_t mode, kuid_t uid, kgid_t gid,
+ umode_t mode, struct module *owner,
+ kuid_t uid, kgid_t gid,
loff_t size, const struct kernfs_ops *ops,
void *priv, const void *ns)
{
@@ -584,15 +590,16 @@ kernfs_create_file_ns(struct kernfs_node *parent, const char *name,
#ifdef CONFIG_DEBUG_LOCK_ALLOC
key = (struct lock_class_key *)&ops->lockdep_key;
#endif
- return __kernfs_create_file(parent, name, mode, uid, gid,
+ return __kernfs_create_file(parent, name, mode, owner, uid, gid,
size, ops, priv, ns, key);
}
static inline struct kernfs_node *
kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode,
+ struct module *owner,
loff_t size, const struct kernfs_ops *ops, void *priv)
{
- return kernfs_create_file_ns(parent, name, mode,
+ return kernfs_create_file_ns(parent, name, mode, owner,
GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
size, ops, priv, NULL);
}
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index e3f1e8ac1f85b..b816a497cb646 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -30,6 +30,7 @@ enum kobj_ns_type;
struct attribute {
const char *name;
umode_t mode;
+ struct module *owner;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
bool ignore_lockdep:1;
struct lock_class_key *key;
@@ -80,6 +81,7 @@ do { \
* @attrs: Pointer to NULL terminated list of attributes.
* @bin_attrs: Pointer to NULL terminated list of binary attributes.
* Either attrs or bin_attrs or both must be provided.
+ * @module: If set, module responsible for this attribute group
*/
struct attribute_group {
const char *name;
@@ -89,6 +91,7 @@ struct attribute_group {
struct bin_attribute *, int);
struct attribute **attrs;
struct bin_attribute **bin_attrs;
+ struct module *owner;
};
/*
@@ -100,38 +103,52 @@ struct attribute_group {
#define __ATTR(_name, _mode, _show, _store) { \
.attr = {.name = __stringify(_name), \
- .mode = VERIFY_OCTAL_PERMISSIONS(_mode) }, \
+ .mode = VERIFY_OCTAL_PERMISSIONS(_mode), \
+ .owner = THIS_MODULE, \
+ }, \
.show = _show, \
.store = _store, \
}
#define __ATTR_PREALLOC(_name, _mode, _show, _store) { \
.attr = {.name = __stringify(_name), \
- .mode = SYSFS_PREALLOC | VERIFY_OCTAL_PERMISSIONS(_mode) },\
+ .mode = SYSFS_PREALLOC | VERIFY_OCTAL_PERMISSIONS(_mode),\
+ .owner = THIS_MODULE, \
+ }, \
.show = _show, \
.store = _store, \
}
#define __ATTR_RO(_name) { \
- .attr = { .name = __stringify(_name), .mode = 0444 }, \
+ .attr = { .name = __stringify(_name), \
+ .mode = 0444, \
+ .owner = THIS_MODULE, \
+ }, \
.show = _name##_show, \
}
#define __ATTR_RO_MODE(_name, _mode) { \
.attr = { .name = __stringify(_name), \
- .mode = VERIFY_OCTAL_PERMISSIONS(_mode) }, \
+ .mode = VERIFY_OCTAL_PERMISSIONS(_mode), \
+ .owner = THIS_MODULE, \
+ }, \
.show = _name##_show, \
}
#define __ATTR_RW_MODE(_name, _mode) { \
.attr = { .name = __stringify(_name), \
- .mode = VERIFY_OCTAL_PERMISSIONS(_mode) }, \
+ .mode = VERIFY_OCTAL_PERMISSIONS(_mode), \
+ .owner = THIS_MODULE, \
+ }, \
.show = _name##_show, \
.store = _name##_store, \
}
#define __ATTR_WO(_name) { \
- .attr = { .name = __stringify(_name), .mode = 0200 }, \
+ .attr = { .name = __stringify(_name), \
+ .mode = 0200, \
+ .owner = THIS_MODULE, \
+ }, \
.store = _name##_store, \
}
@@ -141,8 +158,11 @@ struct attribute_group {
#ifdef CONFIG_DEBUG_LOCK_ALLOC
#define __ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) { \
- .attr = {.name = __stringify(_name), .mode = _mode, \
- .ignore_lockdep = true }, \
+ .attr = {.name = __stringify(_name), \
+ .mode = _mode, \
+ .ignore_lockdep = true, \
+ .owner = THIS_MODULE, \
+ }, \
.show = _show, \
.store = _store, \
}
@@ -159,6 +179,7 @@ static const struct attribute_group *_name##_groups[] = { \
#define ATTRIBUTE_GROUPS(_name) \
static const struct attribute_group _name##_group = { \
.attrs = _name##_attrs, \
+ .owner = THIS_MODULE, \
}; \
__ATTRIBUTE_GROUPS(_name)
@@ -199,20 +220,29 @@ struct bin_attribute {
/* macros to create static binary attributes easier */
#define __BIN_ATTR(_name, _mode, _read, _write, _size) { \
- .attr = { .name = __stringify(_name), .mode = _mode }, \
+ .attr = { .name = __stringify(_name), \
+ .mode = _mode, \
+ .owner = THIS_MODULE, \
+ }, \
.read = _read, \
.write = _write, \
.size = _size, \
}
#define __BIN_ATTR_RO(_name, _size) { \
- .attr = { .name = __stringify(_name), .mode = 0444 }, \
+ .attr = { .name = __stringify(_name), \
+ .mode = 0444, \
+ .owner = THIS_MODULE, \
+ }, \
.read = _name##_read, \
.size = _size, \
}
#define __BIN_ATTR_WO(_name, _size) { \
- .attr = { .name = __stringify(_name), .mode = 0200 }, \
+ .attr = { .name = __stringify(_name), \
+ .mode = 0200, \
+ .owner = THIS_MODULE, \
+ }, \
.write = _name##_write, \
.size = _size, \
}
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index d0725c1a8db5e..bbcf1b14cf429 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3935,7 +3935,7 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
key = &cft->lockdep_key;
#endif
kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name),
- cgroup_file_mode(cft),
+ cgroup_file_mode(cft), NULL,
GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
0, cft->kf_ops, cft,
NULL, key);