aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Snitzer <snitzer@redhat.com>2016-02-09 19:21:18 -0500
committerMike Snitzer <snitzer@redhat.com>2016-02-09 19:21:18 -0500
commitd80a7e4f8b5be9c81e4d452137623b003fa64745 (patch)
tree9cc41bbefa63a4e220905beb4c4d34d9d1ad2f3c
parentaa8c484be790b54494d0360809963135e64b1ebe (diff)
downloadlinux-d80a7e4f8b5be9c81e4d452137623b003fa64745.tar.gz
dm mpath: brute force switch over to (ab)using RCU
(ab)use of RCU aside, also: - switched over to using bool instead of unsigned where appropriate - switched repeat_count over to using percpu_counter - attempted to make path-selection lockless but failed horribly because the path selectors themselves (e.g. service-time) assume locking In the end if repeat_count is set to 1 there isn't much improvement. Still taking m->lock every IO; but for a much shorter time. Not-Signed-off-by: Mike Snitzer <snitzer@redhat.com>
-rw-r--r--drivers/md/dm-mpath.c588
1 files changed, 359 insertions, 229 deletions
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 177a016fe980e..a5c3cb430c287 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -11,6 +11,8 @@
#include "dm-path-selector.h"
#include "dm-uevent.h"
+#include <linux/rcupdate.h>
+#include <linux/percpu_counter.h>
#include <linux/blkdev.h>
#include <linux/ctype.h>
#include <linux/init.h>
@@ -34,7 +36,7 @@ struct pgpath {
struct list_head list;
struct priority_group *pg; /* Owning PG */
- unsigned is_active; /* Path status */
+ bool is_active; /* Path status */
unsigned fail_count; /* Cumulative failure count */
struct dm_path path;
@@ -54,47 +56,52 @@ struct priority_group {
struct path_selector ps;
unsigned pg_num; /* Reference number */
- unsigned bypassed; /* Temporarily bypass this PG? */
+ bool bypassed; /* Temporarily bypass this PG? */
unsigned nr_pgpaths; /* Number of paths in PG */
struct list_head pgpaths;
};
-/* Multipath context */
-struct multipath {
- struct list_head list;
- struct dm_target *ti;
-
- const char *hw_handler_name;
- char *hw_handler_params;
-
- spinlock_t lock;
+struct multipath_paths {
+ unsigned pg_init_in_progress; /* Only one pg_init allowed at once */
unsigned nr_priority_groups;
struct list_head priority_groups;
- wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */
-
- unsigned pg_init_required; /* pg_init needs calling? */
- unsigned pg_init_in_progress; /* Only one pg_init allowed at once */
- unsigned pg_init_delay_retry; /* Delay pg_init retry? */
-
unsigned nr_valid_paths; /* Total number of usable paths */
struct pgpath *current_pgpath;
struct priority_group *current_pg;
struct priority_group *next_pg; /* Switch to this PG if set */
- unsigned repeat_count; /* I/Os left before calling PS again */
- unsigned queue_io:1; /* Must we queue all I/O? */
- unsigned queue_if_no_path:1; /* Queue I/O if last path fails? */
- unsigned saved_queue_if_no_path:1; /* Saved state during suspension */
- unsigned retain_attached_hw_handler:1; /* If there's already a hw_handler present, don't change it. */
- unsigned pg_init_disabled:1; /* pg_init is not currently allowed */
+ bool queue_io:1; /* Must we queue all I/O? */
+ bool queue_if_no_path:1; /* Queue I/O if last path fails? */
+ bool saved_queue_if_no_path:1; /* Saved state during suspension */
+ bool retain_attached_hw_handler:1; /* If there's already a hw_handler present, don't change it. */
+ bool pg_init_disabled:1; /* pg_init is not currently allowed */
+ bool pg_init_required:1; /* pg_init needs calling? */
+ bool pg_init_delay_retry:1; /* Delay pg_init retry? */
+};
+
+/* Multipath context */
+struct multipath {
+ struct list_head list;
+ struct dm_target *ti;
+
+ const char *hw_handler_name;
+ char *hw_handler_params;
+
+ spinlock_t lock;
unsigned pg_init_retries; /* Number of times to retry pg_init */
unsigned pg_init_count; /* Number of times pg_init called */
unsigned pg_init_delay_msecs; /* Number of msecs before pg_init retry */
+ struct multipath_paths *paths;
+
+ wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */
+
+ struct percpu_counter repeat_count; /* I/Os left before calling PS again */
+
struct work_struct trigger_event;
/*
@@ -133,7 +140,7 @@ static struct pgpath *alloc_pgpath(void)
struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL);
if (pgpath) {
- pgpath->is_active = 1;
+ pgpath->is_active = true;
INIT_DELAYED_WORK(&pgpath->activate_path, activate_path);
}
@@ -188,23 +195,28 @@ static struct multipath *alloc_multipath(struct dm_target *ti, bool use_blk_mq)
m = kzalloc(sizeof(*m), GFP_KERNEL);
if (m) {
- INIT_LIST_HEAD(&m->priority_groups);
spin_lock_init(&m->lock);
- m->queue_io = 1;
m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
INIT_WORK(&m->trigger_event, trigger_event);
init_waitqueue_head(&m->pg_init_wait);
mutex_init(&m->work_mutex);
+ m->paths = kzalloc(sizeof(struct multipath_paths), GFP_KERNEL);
+ if (!m->paths)
+ goto out_paths;
+ m->paths->queue_io = true;
+ INIT_LIST_HEAD(&m->paths->priority_groups);
+
+ if (percpu_counter_init(&m->repeat_count, 0, GFP_KERNEL))
+ goto out_percpu_cnt;
+
m->mpio_pool = NULL;
if (!use_blk_mq) {
unsigned min_ios = dm_get_reserved_rq_based_ios();
m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache);
- if (!m->mpio_pool) {
- kfree(m);
- return NULL;
- }
+ if (!m->mpio_pool)
+ goto out_mpio_pool;
}
m->ti = ti;
@@ -212,13 +224,21 @@ static struct multipath *alloc_multipath(struct dm_target *ti, bool use_blk_mq)
}
return m;
+
+out_mpio_pool:
+ percpu_counter_destroy(&m->repeat_count);
+out_percpu_cnt:
+ kfree(m->paths);
+out_paths:
+ kfree(m);
+ return NULL;
}
static void free_multipath(struct multipath *m)
{
struct priority_group *pg, *tmp;
- list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) {
+ list_for_each_entry_safe(pg, tmp, &m->paths->priority_groups, list) {
list_del(&pg->list);
free_priority_group(pg, m->ti);
}
@@ -226,6 +246,8 @@ static void free_multipath(struct multipath *m)
kfree(m->hw_handler_name);
kfree(m->hw_handler_params);
mempool_destroy(m->mpio_pool);
+ percpu_counter_destroy(&m->repeat_count);
+ kfree(m->paths);
kfree(m);
}
@@ -273,86 +295,121 @@ static void clear_request_fn_mpio(struct multipath *m, union map_info *info)
static int __pg_init_all_paths(struct multipath *m)
{
struct pgpath *pgpath;
+ struct priority_group *pg;
+ struct multipath_paths *paths = rcu_dereference(m->paths);
unsigned long pg_init_delay = 0;
- if (m->pg_init_in_progress || m->pg_init_disabled)
+ if (paths->pg_init_in_progress || paths->pg_init_disabled)
return 0;
m->pg_init_count++;
- m->pg_init_required = 0;
+ paths->pg_init_required = false;
/* Check here to reset pg_init_required */
- if (!m->current_pg)
+ pg = rcu_dereference(paths->current_pg);
+ if (!pg)
return 0;
- if (m->pg_init_delay_retry)
+ if (paths->pg_init_delay_retry)
pg_init_delay = msecs_to_jiffies(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT ?
m->pg_init_delay_msecs : DM_PG_INIT_DELAY_MSECS);
- list_for_each_entry(pgpath, &m->current_pg->pgpaths, list) {
+ list_for_each_entry_rcu(pgpath, &pg->pgpaths, list) {
/* Skip failed paths */
if (!pgpath->is_active)
continue;
if (queue_delayed_work(kmpath_handlerd, &pgpath->activate_path,
pg_init_delay))
- m->pg_init_in_progress++;
+ paths->pg_init_in_progress++;
}
- return m->pg_init_in_progress;
+ return paths->pg_init_in_progress;
}
static void __switch_pg(struct multipath *m, struct pgpath *pgpath)
{
- m->current_pg = pgpath->pg;
+ struct multipath_paths *paths = rcu_dereference(m->paths);
+
+ rcu_assign_pointer(paths->current_pg, pgpath->pg);
/* Must we initialise the PG first, and queue I/O till it's ready? */
if (m->hw_handler_name) {
- m->pg_init_required = 1;
- m->queue_io = 1;
+ paths->pg_init_required = true;
+ paths->queue_io = true;
} else {
- m->pg_init_required = 0;
- m->queue_io = 0;
+ paths->pg_init_required = false;
+ paths->queue_io = false;
}
+ // FIXME: move to 'struct multipath_paths'
m->pg_init_count = 0;
}
-static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg,
- size_t nr_bytes)
+static struct pgpath *choose_path_in_pg(struct multipath *m,
+ struct priority_group *pg,
+ size_t nr_bytes, bool *changed)
{
+ unsigned long flags;
struct dm_path *path;
+ unsigned repeat_count;
+ struct pgpath *pgpath = NULL;
+ struct multipath_paths *paths = rcu_dereference(m->paths);
- path = pg->ps.type->select_path(&pg->ps, &m->repeat_count, nr_bytes);
- if (!path)
- return -ENXIO;
+ // FIXME: path selectors aren't lockless...
+ spin_lock_irqsave(&m->lock, flags);
+ path = pg->ps.type->select_path(&pg->ps, &repeat_count, nr_bytes);
+ if (!path) {
+ spin_unlock_irqrestore(&m->lock, flags);
+ return ERR_PTR(-ENXIO);
+ }
+ spin_unlock_irqrestore(&m->lock, flags);
- m->current_pgpath = path_to_pgpath(path);
+ percpu_counter_set(&m->repeat_count, repeat_count);
+ pgpath = path_to_pgpath(path);
- if (m->current_pg != pg)
- __switch_pg(m, m->current_pgpath);
+ if (unlikely(rcu_dereference(paths->current_pg) != pg)) {
+ /* Only update current_pgpath if pg changed */
+ rcu_assign_pointer(paths->current_pgpath, pgpath);
+ *changed = true;
+ spin_lock_irqsave(&m->lock, flags);
+ __switch_pg(m, pgpath);
+ spin_unlock_irqrestore(&m->lock, flags);
+ }
- return 0;
+ return pgpath;
}
-static void __choose_pgpath(struct multipath *m, size_t nr_bytes)
+static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes, bool *changed)
{
+ struct multipath_paths *paths;
struct priority_group *pg;
+ struct pgpath *pgpath = NULL;
unsigned bypassed = 1;
- if (!m->nr_valid_paths) {
- m->queue_io = 0;
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ paths = rcu_dereference(m->paths);
+
+ if (!paths->nr_valid_paths) {
+ paths->queue_io = false;
+ *changed = true;
goto failed;
}
/* Were we instructed to switch PG? */
- if (m->next_pg) {
- pg = m->next_pg;
- m->next_pg = NULL;
- if (!__choose_path_in_pg(m, pg, nr_bytes))
- return;
+ pg = rcu_dereference(paths->next_pg);
+ if (pg) {
+ rcu_assign_pointer(paths->next_pg, NULL);
+ pgpath = choose_path_in_pg(m, pg, nr_bytes, changed);
+ if (!IS_ERR_OR_NULL(pgpath))
+ return pgpath;
}
/* Don't change PG until it has no remaining paths */
- if (m->current_pg && !__choose_path_in_pg(m, m->current_pg, nr_bytes))
- return;
+ pg = rcu_dereference(paths->current_pg);
+ if (pg) {
+ pgpath = choose_path_in_pg(m, pg, nr_bytes, changed);
+ if (!IS_ERR_OR_NULL(pgpath))
+ return pgpath;
+ }
/*
* Loop through priority groups until we find a valid path.
@@ -361,20 +418,26 @@ static void __choose_pgpath(struct multipath *m, size_t nr_bytes)
* pg_init_delay_retry so we do not hammer controllers.
*/
do {
- list_for_each_entry(pg, &m->priority_groups, list) {
- if (pg->bypassed == bypassed)
+ list_for_each_entry_rcu(pg, &paths->priority_groups, list) {
+ if (pg->bypassed == !!bypassed)
continue;
- if (!__choose_path_in_pg(m, pg, nr_bytes)) {
- if (!bypassed)
- m->pg_init_delay_retry = 1;
- return;
+ pgpath = choose_path_in_pg(m, pg, nr_bytes, changed);
+ if (!IS_ERR_OR_NULL(pgpath)) {
+ if (!bypassed) {
+ paths->pg_init_delay_retry = true;
+ *changed = true;
+ }
+ return pgpath;
}
}
} while (bypassed--);
failed:
- m->current_pgpath = NULL;
- m->current_pg = NULL;
+ rcu_assign_pointer(paths->current_pgpath, NULL);
+ rcu_assign_pointer(paths->current_pg, NULL);
+ *changed = true;
+
+ return NULL;
}
/*
@@ -390,8 +453,12 @@ failed:
*/
static int __must_push_back(struct multipath *m)
{
- return (m->queue_if_no_path ||
- (m->queue_if_no_path != m->saved_queue_if_no_path &&
+ struct multipath_paths *paths = rcu_dereference(m->paths);
+
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ return (paths->queue_if_no_path ||
+ (paths->queue_if_no_path != paths->saved_queue_if_no_path &&
dm_noflush_suspending(m->ti)));
}
@@ -403,31 +470,42 @@ static int __multipath_map(struct dm_target *ti, struct request *clone,
struct request *rq, struct request **__clone)
{
struct multipath *m = ti->private;
+ struct multipath_paths *paths;
int r = DM_MAPIO_REQUEUE;
size_t nr_bytes = clone ? blk_rq_bytes(clone) : blk_rq_bytes(rq);
struct pgpath *pgpath;
struct block_device *bdev;
struct dm_mpath_io *mpio;
+ bool sync_rcu = false;
- spin_lock_irq(&m->lock);
+ rcu_read_lock();
+ paths = rcu_dereference(m->paths);
/* Do we need to select a new pgpath? */
- if (!m->current_pgpath ||
- (!m->queue_io && (m->repeat_count && --m->repeat_count == 0)))
- __choose_pgpath(m, nr_bytes);
-
- pgpath = m->current_pgpath;
+ pgpath = rcu_dereference(paths->current_pgpath);
+ if (!pgpath)
+ pgpath = choose_pgpath(m, nr_bytes, &sync_rcu);
+ else if (!paths->queue_io) {
+ percpu_counter_dec(&m->repeat_count);
+ if (percpu_counter_read_positive(&m->repeat_count) == 0)
+ pgpath = choose_pgpath(m, nr_bytes, &sync_rcu);
+ }
if (!pgpath) {
if (!__must_push_back(m))
r = -EIO; /* Failed */
goto out_unlock;
- } else if (m->queue_io || m->pg_init_required) {
+ } else if (paths->queue_io || paths->pg_init_required) {
+ spin_lock_irq(&m->lock);
__pg_init_all_paths(m);
+ spin_unlock_irq(&m->lock);
+ sync_rcu = true;
goto out_unlock;
}
- spin_unlock_irq(&m->lock);
+ rcu_read_unlock();
+ if (sync_rcu)
+ synchronize_rcu();
mpio = set_mpio(m, map_context);
if (!mpio)
@@ -472,7 +550,9 @@ static int __multipath_map(struct dm_target *ti, struct request *clone,
return DM_MAPIO_REMAPPED;
out_unlock:
- spin_unlock_irq(&m->lock);
+ rcu_read_unlock();
+ if (sync_rcu)
+ synchronize_rcu();
return r;
}
@@ -498,24 +578,21 @@ static void multipath_release_clone(struct request *clone)
/*
* If we run out of usable paths, should we queue I/O or error it?
*/
-static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path,
- unsigned save_old_value)
+static void queue_if_no_path(struct multipath *m, bool queue_if_no_path,
+ bool save_old_value)
{
- unsigned long flags;
-
- spin_lock_irqsave(&m->lock, flags);
+ struct multipath_paths *paths = rcu_dereference(m->paths);
if (save_old_value)
- m->saved_queue_if_no_path = m->queue_if_no_path;
+ paths->saved_queue_if_no_path = paths->queue_if_no_path;
else
- m->saved_queue_if_no_path = queue_if_no_path;
- m->queue_if_no_path = queue_if_no_path;
- spin_unlock_irqrestore(&m->lock, flags);
+ paths->saved_queue_if_no_path = queue_if_no_path;
+ paths->queue_if_no_path = queue_if_no_path;
if (!queue_if_no_path)
dm_table_run_md_queue_async(m->ti->table);
- return 0;
+ synchronize_rcu();
}
/*
@@ -602,10 +679,10 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
goto bad;
}
- if (m->retain_attached_hw_handler || m->hw_handler_name)
+ if (m->paths->retain_attached_hw_handler || m->hw_handler_name)
q = bdev_get_queue(p->path.dev->bdev);
- if (m->retain_attached_hw_handler) {
+ if (m->paths->retain_attached_hw_handler) {
retain:
attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL);
if (attached_handler_name) {
@@ -805,12 +882,12 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
argc--;
if (!strcasecmp(arg_name, "queue_if_no_path")) {
- r = queue_if_no_path(m, 1, 0);
+ queue_if_no_path(m, 1, 0);
continue;
}
if (!strcasecmp(arg_name, "retain_attached_hw_handler")) {
- m->retain_attached_hw_handler = 1;
+ m->paths->retain_attached_hw_handler = true;
continue;
}
@@ -868,7 +945,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
if (r)
goto bad;
- r = dm_read_arg(_args, &as, &m->nr_priority_groups, &ti->error);
+ r = dm_read_arg(_args, &as, &m->paths->nr_priority_groups, &ti->error);
if (r)
goto bad;
@@ -876,8 +953,8 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
if (r)
goto bad;
- if ((!m->nr_priority_groups && next_pg_num) ||
- (m->nr_priority_groups && !next_pg_num)) {
+ if ((!m->paths->nr_priority_groups && next_pg_num) ||
+ (m->paths->nr_priority_groups && !next_pg_num)) {
ti->error = "invalid initial priority group";
r = -EINVAL;
goto bad;
@@ -893,15 +970,15 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
goto bad;
}
- m->nr_valid_paths += pg->nr_pgpaths;
- list_add_tail(&pg->list, &m->priority_groups);
+ m->paths->nr_valid_paths += pg->nr_pgpaths;
+ list_add_tail(&pg->list, &m->paths->priority_groups);
pg_count++;
pg->pg_num = pg_count;
if (!--next_pg_num)
- m->next_pg = pg;
+ m->paths->next_pg = pg;
}
- if (pg_count != m->nr_priority_groups) {
+ if (pg_count != m->paths->nr_priority_groups) {
ti->error = "priority group count mismatch";
r = -EINVAL;
goto bad;
@@ -922,20 +999,21 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
static void multipath_wait_for_pg_init_completion(struct multipath *m)
{
+ struct multipath_paths *paths;
DECLARE_WAITQUEUE(wait, current);
- unsigned long flags;
add_wait_queue(&m->pg_init_wait, &wait);
while (1) {
set_current_state(TASK_UNINTERRUPTIBLE);
- spin_lock_irqsave(&m->lock, flags);
- if (!m->pg_init_in_progress) {
- spin_unlock_irqrestore(&m->lock, flags);
+ rcu_read_lock();
+ paths = rcu_dereference(m->paths);
+ if (!paths->pg_init_in_progress) {
+ rcu_read_unlock();
break;
}
- spin_unlock_irqrestore(&m->lock, flags);
+ rcu_read_unlock();
io_schedule();
}
@@ -946,20 +1024,18 @@ static void multipath_wait_for_pg_init_completion(struct multipath *m)
static void flush_multipath_work(struct multipath *m)
{
- unsigned long flags;
+ struct multipath_paths *paths = rcu_dereference(m->paths);
- spin_lock_irqsave(&m->lock, flags);
- m->pg_init_disabled = 1;
- spin_unlock_irqrestore(&m->lock, flags);
+ paths->pg_init_disabled = true;
+ synchronize_rcu();
flush_workqueue(kmpath_handlerd);
multipath_wait_for_pg_init_completion(m);
flush_workqueue(kmultipathd);
flush_work(&m->trigger_event);
- spin_lock_irqsave(&m->lock, flags);
- m->pg_init_disabled = 0;
- spin_unlock_irqrestore(&m->lock, flags);
+ paths->pg_init_disabled = false;
+ synchronize_rcu();
}
static void multipath_dtr(struct dm_target *ti)
@@ -976,7 +1052,12 @@ static void multipath_dtr(struct dm_target *ti)
static int fail_path(struct pgpath *pgpath)
{
unsigned long flags;
+ struct multipath_paths *paths;
struct multipath *m = pgpath->pg->m;
+ bool sync_rcu = false;
+
+ rcu_read_lock();
+ paths = rcu_dereference(m->paths);
spin_lock_irqsave(&m->lock, flags);
@@ -986,21 +1067,24 @@ static int fail_path(struct pgpath *pgpath)
DMWARN("Failing path %s.", pgpath->path.dev->name);
pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path);
- pgpath->is_active = 0;
+ pgpath->is_active = false;
pgpath->fail_count++;
- m->nr_valid_paths--;
+ paths->nr_valid_paths--;
+ sync_rcu = true;
- if (pgpath == m->current_pgpath)
- m->current_pgpath = NULL;
+ if (pgpath == rcu_dereference(paths->current_pgpath))
+ rcu_assign_pointer(paths->current_pgpath, NULL);
dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti,
- pgpath->path.dev->name, m->nr_valid_paths);
+ pgpath->path.dev->name, paths->nr_valid_paths);
schedule_work(&m->trigger_event);
-
out:
spin_unlock_irqrestore(&m->lock, flags);
+ rcu_read_unlock();
+ if (sync_rcu)
+ synchronize_rcu();
return 0;
}
@@ -1010,9 +1094,15 @@ out:
*/
static int reinstate_path(struct pgpath *pgpath)
{
- int r = 0, run_queue = 0;
+ int r = 0;
unsigned long flags;
struct multipath *m = pgpath->pg->m;
+ struct multipath_paths *paths;
+ bool run_queue = false;
+ bool sync_rcu = false;
+
+ rcu_read_lock();
+ paths = rcu_dereference(m->paths);
spin_lock_irqsave(&m->lock, flags);
@@ -1030,25 +1120,28 @@ static int reinstate_path(struct pgpath *pgpath)
if (r)
goto out;
- pgpath->is_active = 1;
+ pgpath->is_active = true;
- if (!m->nr_valid_paths++) {
- m->current_pgpath = NULL;
- run_queue = 1;
- } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) {
+ if (!paths->nr_valid_paths++) {
+ rcu_assign_pointer(paths->current_pgpath, NULL);
+ run_queue = true;
+ } else if (m->hw_handler_name && (rcu_dereference(paths->current_pg) == pgpath->pg)) {
if (queue_work(kmpath_handlerd, &pgpath->activate_path.work))
- m->pg_init_in_progress++;
+ paths->pg_init_in_progress++;
}
+ sync_rcu = true;
dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
- pgpath->path.dev->name, m->nr_valid_paths);
+ pgpath->path.dev->name, paths->nr_valid_paths);
schedule_work(&m->trigger_event);
-
out:
spin_unlock_irqrestore(&m->lock, flags);
if (run_queue)
dm_table_run_md_queue_async(m->ti->table);
+ rcu_read_unlock();
+ if (sync_rcu)
+ synchronize_rcu();
return r;
}
@@ -1062,9 +1155,10 @@ static int action_dev(struct multipath *m, struct dm_dev *dev,
int r = -EINVAL;
struct pgpath *pgpath;
struct priority_group *pg;
+ struct multipath_paths *paths = rcu_dereference(m->paths);
- list_for_each_entry(pg, &m->priority_groups, list) {
- list_for_each_entry(pgpath, &pg->pgpaths, list) {
+ list_for_each_entry_rcu(pg, &paths->priority_groups, list) {
+ list_for_each_entry_rcu(pgpath, &pg->pgpaths, list) {
if (pgpath->path.dev == dev)
r = action(pgpath);
}
@@ -1077,17 +1171,19 @@ static int action_dev(struct multipath *m, struct dm_dev *dev,
* Temporarily try to avoid having to use the specified PG
*/
static void bypass_pg(struct multipath *m, struct priority_group *pg,
- int bypassed)
+ bool bypassed)
{
unsigned long flags;
+ struct multipath_paths *paths = rcu_dereference(m->paths);
spin_lock_irqsave(&m->lock, flags);
pg->bypassed = bypassed;
- m->current_pgpath = NULL;
- m->current_pg = NULL;
+ rcu_assign_pointer(paths->current_pgpath, NULL);
+ rcu_assign_pointer(paths->current_pg, NULL);
spin_unlock_irqrestore(&m->lock, flags);
+ synchronize_rcu();
schedule_work(&m->trigger_event);
}
@@ -1099,26 +1195,25 @@ static int switch_pg_num(struct multipath *m, const char *pgstr)
{
struct priority_group *pg;
unsigned pgnum;
- unsigned long flags;
char dummy;
+ struct multipath_paths *paths = rcu_dereference(m->paths);
if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
- (pgnum > m->nr_priority_groups)) {
+ (pgnum > paths->nr_priority_groups)) {
DMWARN("invalid PG number supplied to switch_pg_num");
return -EINVAL;
}
- spin_lock_irqsave(&m->lock, flags);
- list_for_each_entry(pg, &m->priority_groups, list) {
- pg->bypassed = 0;
+ list_for_each_entry_rcu(pg, &paths->priority_groups, list) {
+ pg->bypassed = false;
if (--pgnum)
continue;
- m->current_pgpath = NULL;
- m->current_pg = NULL;
- m->next_pg = pg;
+ rcu_assign_pointer(paths->current_pgpath, NULL);
+ rcu_assign_pointer(paths->current_pg, NULL);
+ rcu_assign_pointer(paths->next_pg, pg);
}
- spin_unlock_irqrestore(&m->lock, flags);
+ synchronize_rcu();
schedule_work(&m->trigger_event);
return 0;
@@ -1128,22 +1223,27 @@ static int switch_pg_num(struct multipath *m, const char *pgstr)
* Set/clear bypassed status of a PG.
* PGs are numbered upwards from 1 in the order they were declared.
*/
-static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed)
+static int bypass_pg_num(struct multipath *m, const char *pgstr, bool bypassed)
{
struct priority_group *pg;
unsigned pgnum;
char dummy;
+ struct multipath_paths *paths;
+
+ rcu_read_lock();
+ paths = rcu_dereference(m->paths);
if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
- (pgnum > m->nr_priority_groups)) {
+ (pgnum > paths->nr_priority_groups)) {
DMWARN("invalid PG number supplied to bypass_pg");
return -EINVAL;
}
- list_for_each_entry(pg, &m->priority_groups, list) {
+ list_for_each_entry_rcu(pg, &paths->priority_groups, list) {
if (!--pgnum)
break;
}
+ rcu_read_unlock();
bypass_pg(m, pg, bypassed);
return 0;
@@ -1152,20 +1252,27 @@ static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed)
/*
* Should we retry pg_init immediately?
*/
-static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath)
+static bool pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath)
{
unsigned long flags;
- int limit_reached = 0;
+ struct multipath_paths *paths;
+ bool limit_reached = false, sync_rcu = false;
- spin_lock_irqsave(&m->lock, flags);
-
- if (m->pg_init_count <= m->pg_init_retries && !m->pg_init_disabled)
- m->pg_init_required = 1;
- else
- limit_reached = 1;
+ rcu_read_lock();
+ paths = rcu_dereference(m->paths);
+ spin_lock_irqsave(&m->lock, flags);
+ if (m->pg_init_count <= m->pg_init_retries && !paths->pg_init_disabled) {
+ paths->pg_init_required = true;
+ sync_rcu = true;
+ } else
+ limit_reached = true;
spin_unlock_irqrestore(&m->lock, flags);
+ rcu_read_unlock();
+ if (sync_rcu)
+ synchronize_rcu();
+
return limit_reached;
}
@@ -1174,8 +1281,9 @@ static void pg_init_done(void *data, int errors)
struct pgpath *pgpath = data;
struct priority_group *pg = pgpath->pg;
struct multipath *m = pg->m;
+ struct multipath_paths *paths;
unsigned long flags;
- unsigned delay_retry = 0;
+ bool delay_retry = false;
/* device or driver problems */
switch (errors) {
@@ -1202,7 +1310,7 @@ static void pg_init_done(void *data, int errors)
break;
case SCSI_DH_RETRY:
/* Wait before retrying. */
- delay_retry = 1;
+ delay_retry = true;
case SCSI_DH_IMM_RETRY:
case SCSI_DH_RES_TEMP_UNAVAIL:
if (pg_init_limit_reached(m, pgpath))
@@ -1218,34 +1326,36 @@ static void pg_init_done(void *data, int errors)
fail_path(pgpath);
}
+ paths = rcu_dereference(m->paths);
+
spin_lock_irqsave(&m->lock, flags);
if (errors) {
- if (pgpath == m->current_pgpath) {
+ if (pgpath == rcu_dereference(paths->current_pgpath)) {
DMERR("Could not failover device. Error %d.", errors);
- m->current_pgpath = NULL;
- m->current_pg = NULL;
+ rcu_assign_pointer(paths->current_pgpath, NULL);
+ rcu_assign_pointer(paths->current_pg, NULL);
}
- } else if (!m->pg_init_required)
- pg->bypassed = 0;
+ } else if (!paths->pg_init_required)
+ pg->bypassed = false;
- if (--m->pg_init_in_progress)
+ if (--paths->pg_init_in_progress)
/* Activations of other paths are still on going */
goto out;
- if (m->pg_init_required) {
- m->pg_init_delay_retry = delay_retry;
+ if (paths->pg_init_required) {
+ paths->pg_init_delay_retry = delay_retry;
if (__pg_init_all_paths(m))
goto out;
}
- m->queue_io = 0;
+ paths->queue_io = false;
/*
* Wake up any thread waiting to suspend.
*/
wake_up(&m->pg_init_wait);
-
out:
spin_unlock_irqrestore(&m->lock, flags);
+ synchronize_rcu();
}
static void activate_path(struct work_struct *work)
@@ -1292,8 +1402,8 @@ static int do_end_io(struct multipath *m, struct request *clone,
* request into dm core, which will remake a clone request and
* clone bios for it and resubmit it later.
*/
+ struct multipath_paths *paths;
int r = DM_ENDIO_REQUEUE;
- unsigned long flags;
if (!error && !clone->errors)
return 0; /* I/O complete */
@@ -1304,9 +1414,10 @@ static int do_end_io(struct multipath *m, struct request *clone,
if (mpio->pgpath)
fail_path(mpio->pgpath);
- spin_lock_irqsave(&m->lock, flags);
- if (!m->nr_valid_paths) {
- if (!m->queue_if_no_path) {
+ rcu_read_lock();
+ paths = rcu_dereference(m->paths);
+ if (!paths->nr_valid_paths) {
+ if (!paths->queue_if_no_path) {
if (!__must_push_back(m))
r = -EIO;
} else {
@@ -1314,7 +1425,7 @@ static int do_end_io(struct multipath *m, struct request *clone,
r = error;
}
}
- spin_unlock_irqrestore(&m->lock, flags);
+ rcu_read_unlock();
return r;
}
@@ -1370,11 +1481,10 @@ static void multipath_postsuspend(struct dm_target *ti)
static void multipath_resume(struct dm_target *ti)
{
struct multipath *m = ti->private;
- unsigned long flags;
+ struct multipath_paths *paths = rcu_dereference(m->paths);
- spin_lock_irqsave(&m->lock, flags);
- m->queue_if_no_path = m->saved_queue_if_no_path;
- spin_unlock_irqrestore(&m->lock, flags);
+ paths->queue_if_no_path = paths->saved_queue_if_no_path;
+ synchronize_rcu();
}
/*
@@ -1399,28 +1509,32 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
int sz = 0;
unsigned long flags;
struct multipath *m = ti->private;
- struct priority_group *pg;
+ struct multipath_paths *paths;
+ struct priority_group *pg, *current_pg;
struct pgpath *p;
unsigned pg_num;
char state;
+ rcu_read_lock();
+ paths = rcu_dereference(m->paths);
+
spin_lock_irqsave(&m->lock, flags);
/* Features */
if (type == STATUSTYPE_INFO)
- DMEMIT("2 %u %u ", m->queue_io, m->pg_init_count);
+ DMEMIT("2 %u %u ", (paths->queue_io ? 1 : 0), m->pg_init_count);
else {
- DMEMIT("%u ", m->queue_if_no_path +
+ DMEMIT("%u ", (paths->queue_if_no_path ? 1 : 0) +
(m->pg_init_retries > 0) * 2 +
(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2 +
- m->retain_attached_hw_handler);
- if (m->queue_if_no_path)
+ (m->paths->retain_attached_hw_handler ? 1 : 0));
+ if (paths->queue_if_no_path)
DMEMIT("queue_if_no_path ");
if (m->pg_init_retries)
DMEMIT("pg_init_retries %u ", m->pg_init_retries);
if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT)
DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs);
- if (m->retain_attached_hw_handler)
+ if (m->paths->retain_attached_hw_handler)
DMEMIT("retain_attached_hw_handler ");
}
@@ -1429,23 +1543,25 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
else
DMEMIT("1 %s ", m->hw_handler_name);
- DMEMIT("%u ", m->nr_priority_groups);
+ DMEMIT("%u ", paths->nr_priority_groups);
- if (m->next_pg)
- pg_num = m->next_pg->pg_num;
- else if (m->current_pg)
- pg_num = m->current_pg->pg_num;
+ if (rcu_dereference(paths->next_pg))
+ pg_num = rcu_dereference(paths->next_pg)->pg_num;
+ else if (rcu_dereference(paths->current_pg))
+ pg_num = rcu_dereference(paths->current_pg)->pg_num;
else
- pg_num = (m->nr_priority_groups ? 1 : 0);
+ pg_num = (paths->nr_priority_groups ? 1 : 0);
DMEMIT("%u ", pg_num);
+ current_pg = rcu_dereference(paths->current_pg);
+
switch (type) {
case STATUSTYPE_INFO:
- list_for_each_entry(pg, &m->priority_groups, list) {
+ list_for_each_entry_rcu(pg, &paths->priority_groups, list) {
if (pg->bypassed)
state = 'D'; /* Disabled */
- else if (pg == m->current_pg)
+ else if (pg == current_pg)
state = 'A'; /* Currently Active */
else
state = 'E'; /* Enabled */
@@ -1462,7 +1578,7 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
DMEMIT("%u %u ", pg->nr_pgpaths,
pg->ps.type->info_args);
- list_for_each_entry(p, &pg->pgpaths, list) {
+ list_for_each_entry_rcu(p, &pg->pgpaths, list) {
DMEMIT("%s %s %u ", p->path.dev->name,
p->is_active ? "A" : "F",
p->fail_count);
@@ -1475,7 +1591,7 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
break;
case STATUSTYPE_TABLE:
- list_for_each_entry(pg, &m->priority_groups, list) {
+ list_for_each_entry_rcu(pg, &paths->priority_groups, list) {
DMEMIT("%s ", pg->ps.type->name);
if (pg->ps.type->status)
@@ -1488,7 +1604,7 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
DMEMIT("%u %u ", pg->nr_pgpaths,
pg->ps.type->table_args);
- list_for_each_entry(p, &pg->pgpaths, list) {
+ list_for_each_entry_rcu(p, &pg->pgpaths, list) {
DMEMIT("%s ", p->path.dev->name);
if (pg->ps.type->status)
sz += pg->ps.type->status(&pg->ps,
@@ -1500,6 +1616,7 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
}
spin_unlock_irqrestore(&m->lock, flags);
+ rcu_read_unlock();
}
static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
@@ -1518,10 +1635,10 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
if (argc == 1) {
if (!strcasecmp(argv[0], "queue_if_no_path")) {
- r = queue_if_no_path(m, 1, 0);
+ queue_if_no_path(m, 1, 0);
goto out;
} else if (!strcasecmp(argv[0], "fail_if_no_path")) {
- r = queue_if_no_path(m, 0, 0);
+ queue_if_no_path(m, 0, 0);
goto out;
}
}
@@ -1566,21 +1683,26 @@ out:
}
static int multipath_prepare_ioctl(struct dm_target *ti,
- struct block_device **bdev, fmode_t *mode)
+ struct block_device **bdev, fmode_t *mode)
{
struct multipath *m = ti->private;
+ struct multipath_paths *paths;
+ struct pgpath *current_pgpath;
unsigned long flags;
+ bool sync_rcu = false;
int r;
- spin_lock_irqsave(&m->lock, flags);
+ rcu_read_lock();
+ paths = rcu_dereference(m->paths);
+ current_pgpath = rcu_dereference(paths->current_pgpath);
- if (!m->current_pgpath)
- __choose_pgpath(m, 0);
+ if (!current_pgpath)
+ current_pgpath = choose_pgpath(m, 0, &sync_rcu);
- if (m->current_pgpath) {
- if (!m->queue_io) {
- *bdev = m->current_pgpath->path.dev->bdev;
- *mode = m->current_pgpath->path.dev->mode;
+ if (current_pgpath) {
+ if (!paths->queue_io) {
+ *bdev = current_pgpath->path.dev->bdev;
+ *mode = current_pgpath->path.dev->mode;
r = 0;
} else {
/* pg_init has not started or completed */
@@ -1588,25 +1710,28 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
}
} else {
/* No path is available */
- if (m->queue_if_no_path)
+ if (paths->queue_if_no_path)
r = -ENOTCONN;
else
r = -EIO;
}
- spin_unlock_irqrestore(&m->lock, flags);
-
if (r == -ENOTCONN) {
- spin_lock_irqsave(&m->lock, flags);
- if (!m->current_pg) {
+ if (!rcu_dereference(paths->current_pg)) {
/* Path status changed, redo selection */
- __choose_pgpath(m, 0);
+ (void) choose_pgpath(m, 0, &sync_rcu);
}
- if (m->pg_init_required)
+ if (paths->pg_init_required) {
+ spin_lock_irqsave(&m->lock, flags);
__pg_init_all_paths(m);
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irqrestore(&m->lock, flags);
+ sync_rcu = true;
+ }
dm_table_run_md_queue_async(m->ti->table);
}
+ rcu_read_unlock();
+ if (sync_rcu)
+ synchronize_rcu();
/*
* Only pass ioctls through if the device sizes match exactly.
@@ -1620,19 +1745,23 @@ static int multipath_iterate_devices(struct dm_target *ti,
iterate_devices_callout_fn fn, void *data)
{
struct multipath *m = ti->private;
+ struct multipath_paths *paths;
struct priority_group *pg;
struct pgpath *p;
int ret = 0;
- list_for_each_entry(pg, &m->priority_groups, list) {
- list_for_each_entry(p, &pg->pgpaths, list) {
+ rcu_read_lock();
+ paths = rcu_dereference(m->paths);
+
+ list_for_each_entry_rcu(pg, &paths->priority_groups, list) {
+ list_for_each_entry_rcu(p, &pg->pgpaths, list) {
ret = fn(ti, p->path.dev, ti->begin, ti->len, data);
if (ret)
goto out;
}
}
-
out:
+ rcu_read_unlock();
return ret;
}
@@ -1653,25 +1782,26 @@ static int __pgpath_busy(struct pgpath *pgpath)
*/
static int multipath_busy(struct dm_target *ti)
{
- int busy = 0, has_active = 0;
+ bool busy = false, has_active = false;
struct multipath *m = ti->private;
+ struct multipath_paths *paths;
struct priority_group *pg;
struct pgpath *pgpath;
- unsigned long flags;
- spin_lock_irqsave(&m->lock, flags);
+ rcu_read_lock();
+ paths = rcu_dereference(m->paths);
/* pg_init in progress or no paths available */
- if (m->pg_init_in_progress ||
- (!m->nr_valid_paths && m->queue_if_no_path)) {
- busy = 1;
+ if (paths->pg_init_in_progress ||
+ (!paths->nr_valid_paths && paths->queue_if_no_path)) {
+ busy = true;
goto out;
}
/* Guess which priority_group will be used at next mapping time */
- if (unlikely(!m->current_pgpath && m->next_pg))
- pg = m->next_pg;
- else if (likely(m->current_pg))
- pg = m->current_pg;
+ if (unlikely(!rcu_dereference(paths->current_pgpath) && rcu_dereference(paths->next_pg)))
+ pg = rcu_dereference(paths->next_pg);
+ else if (likely(rcu_dereference(paths->current_pg)))
+ pg = rcu_dereference(paths->current_pg);
else
/*
* We don't know which pg will be used at next mapping time.
@@ -1686,13 +1816,13 @@ static int multipath_busy(struct dm_target *ti)
* If there is one non-busy active path at least, the path selector
* will be able to select it. So we consider such a pg as not busy.
*/
- busy = 1;
- list_for_each_entry(pgpath, &pg->pgpaths, list)
+ busy = true;
+ list_for_each_entry_rcu(pgpath, &pg->pgpaths, list)
if (pgpath->is_active) {
- has_active = 1;
+ has_active = true;
if (!__pgpath_busy(pgpath)) {
- busy = 0;
+ busy = false;
break;
}
}
@@ -1703,10 +1833,10 @@ static int multipath_busy(struct dm_target *ti)
* the current_pg will be changed at next mapping time.
* We need to try mapping to determine it.
*/
- busy = 0;
+ busy = false;
out:
- spin_unlock_irqrestore(&m->lock, flags);
+ rcu_read_unlock();
return busy;
}