diff options
| author | Mike Snitzer <snitzer@redhat.com> | 2016-02-09 19:21:18 -0500 |
|---|---|---|
| committer | Mike Snitzer <snitzer@redhat.com> | 2016-02-09 19:21:18 -0500 |
| commit | d80a7e4f8b5be9c81e4d452137623b003fa64745 (patch) | |
| tree | 9cc41bbefa63a4e220905beb4c4d34d9d1ad2f3c | |
| parent | aa8c484be790b54494d0360809963135e64b1ebe (diff) | |
| download | linux-d80a7e4f8b5be9c81e4d452137623b003fa64745.tar.gz | |
dm mpath: brute force switch over to (ab)using RCU
(ab)use of RCU aside, also:
- switched over to using bool instead of unsigned where appropriate
- switched repeat_count over to using percpu_counter
- attempted to make path-selection lockless but failed horribly because
the path selectors themselves (e.g. service-time) assume locking
In the end if repeat_count is set to 1 there isn't much improvement.
Still taking m->lock every IO; but for a much shorter time.
Not-Signed-off-by: Mike Snitzer <snitzer@redhat.com>
| -rw-r--r-- | drivers/md/dm-mpath.c | 588 |
1 files changed, 359 insertions, 229 deletions
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 177a016fe980e..a5c3cb430c287 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -11,6 +11,8 @@ #include "dm-path-selector.h" #include "dm-uevent.h" +#include <linux/rcupdate.h> +#include <linux/percpu_counter.h> #include <linux/blkdev.h> #include <linux/ctype.h> #include <linux/init.h> @@ -34,7 +36,7 @@ struct pgpath { struct list_head list; struct priority_group *pg; /* Owning PG */ - unsigned is_active; /* Path status */ + bool is_active; /* Path status */ unsigned fail_count; /* Cumulative failure count */ struct dm_path path; @@ -54,47 +56,52 @@ struct priority_group { struct path_selector ps; unsigned pg_num; /* Reference number */ - unsigned bypassed; /* Temporarily bypass this PG? */ + bool bypassed; /* Temporarily bypass this PG? */ unsigned nr_pgpaths; /* Number of paths in PG */ struct list_head pgpaths; }; -/* Multipath context */ -struct multipath { - struct list_head list; - struct dm_target *ti; - - const char *hw_handler_name; - char *hw_handler_params; - - spinlock_t lock; +struct multipath_paths { + unsigned pg_init_in_progress; /* Only one pg_init allowed at once */ unsigned nr_priority_groups; struct list_head priority_groups; - wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */ - - unsigned pg_init_required; /* pg_init needs calling? */ - unsigned pg_init_in_progress; /* Only one pg_init allowed at once */ - unsigned pg_init_delay_retry; /* Delay pg_init retry? */ - unsigned nr_valid_paths; /* Total number of usable paths */ struct pgpath *current_pgpath; struct priority_group *current_pg; struct priority_group *next_pg; /* Switch to this PG if set */ - unsigned repeat_count; /* I/Os left before calling PS again */ - unsigned queue_io:1; /* Must we queue all I/O? */ - unsigned queue_if_no_path:1; /* Queue I/O if last path fails? */ - unsigned saved_queue_if_no_path:1; /* Saved state during suspension */ - unsigned retain_attached_hw_handler:1; /* If there's already a hw_handler present, don't change it. */ - unsigned pg_init_disabled:1; /* pg_init is not currently allowed */ + bool queue_io:1; /* Must we queue all I/O? */ + bool queue_if_no_path:1; /* Queue I/O if last path fails? */ + bool saved_queue_if_no_path:1; /* Saved state during suspension */ + bool retain_attached_hw_handler:1; /* If there's already a hw_handler present, don't change it. */ + bool pg_init_disabled:1; /* pg_init is not currently allowed */ + bool pg_init_required:1; /* pg_init needs calling? */ + bool pg_init_delay_retry:1; /* Delay pg_init retry? */ +}; + +/* Multipath context */ +struct multipath { + struct list_head list; + struct dm_target *ti; + + const char *hw_handler_name; + char *hw_handler_params; + + spinlock_t lock; unsigned pg_init_retries; /* Number of times to retry pg_init */ unsigned pg_init_count; /* Number of times pg_init called */ unsigned pg_init_delay_msecs; /* Number of msecs before pg_init retry */ + struct multipath_paths *paths; + + wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */ + + struct percpu_counter repeat_count; /* I/Os left before calling PS again */ + struct work_struct trigger_event; /* @@ -133,7 +140,7 @@ static struct pgpath *alloc_pgpath(void) struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL); if (pgpath) { - pgpath->is_active = 1; + pgpath->is_active = true; INIT_DELAYED_WORK(&pgpath->activate_path, activate_path); } @@ -188,23 +195,28 @@ static struct multipath *alloc_multipath(struct dm_target *ti, bool use_blk_mq) m = kzalloc(sizeof(*m), GFP_KERNEL); if (m) { - INIT_LIST_HEAD(&m->priority_groups); spin_lock_init(&m->lock); - m->queue_io = 1; m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT; INIT_WORK(&m->trigger_event, trigger_event); init_waitqueue_head(&m->pg_init_wait); mutex_init(&m->work_mutex); + m->paths = kzalloc(sizeof(struct multipath_paths), GFP_KERNEL); + if (!m->paths) + goto out_paths; + m->paths->queue_io = true; + INIT_LIST_HEAD(&m->paths->priority_groups); + + if (percpu_counter_init(&m->repeat_count, 0, GFP_KERNEL)) + goto out_percpu_cnt; + m->mpio_pool = NULL; if (!use_blk_mq) { unsigned min_ios = dm_get_reserved_rq_based_ios(); m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache); - if (!m->mpio_pool) { - kfree(m); - return NULL; - } + if (!m->mpio_pool) + goto out_mpio_pool; } m->ti = ti; @@ -212,13 +224,21 @@ static struct multipath *alloc_multipath(struct dm_target *ti, bool use_blk_mq) } return m; + +out_mpio_pool: + percpu_counter_destroy(&m->repeat_count); +out_percpu_cnt: + kfree(m->paths); +out_paths: + kfree(m); + return NULL; } static void free_multipath(struct multipath *m) { struct priority_group *pg, *tmp; - list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) { + list_for_each_entry_safe(pg, tmp, &m->paths->priority_groups, list) { list_del(&pg->list); free_priority_group(pg, m->ti); } @@ -226,6 +246,8 @@ static void free_multipath(struct multipath *m) kfree(m->hw_handler_name); kfree(m->hw_handler_params); mempool_destroy(m->mpio_pool); + percpu_counter_destroy(&m->repeat_count); + kfree(m->paths); kfree(m); } @@ -273,86 +295,121 @@ static void clear_request_fn_mpio(struct multipath *m, union map_info *info) static int __pg_init_all_paths(struct multipath *m) { struct pgpath *pgpath; + struct priority_group *pg; + struct multipath_paths *paths = rcu_dereference(m->paths); unsigned long pg_init_delay = 0; - if (m->pg_init_in_progress || m->pg_init_disabled) + if (paths->pg_init_in_progress || paths->pg_init_disabled) return 0; m->pg_init_count++; - m->pg_init_required = 0; + paths->pg_init_required = false; /* Check here to reset pg_init_required */ - if (!m->current_pg) + pg = rcu_dereference(paths->current_pg); + if (!pg) return 0; - if (m->pg_init_delay_retry) + if (paths->pg_init_delay_retry) pg_init_delay = msecs_to_jiffies(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT ? m->pg_init_delay_msecs : DM_PG_INIT_DELAY_MSECS); - list_for_each_entry(pgpath, &m->current_pg->pgpaths, list) { + list_for_each_entry_rcu(pgpath, &pg->pgpaths, list) { /* Skip failed paths */ if (!pgpath->is_active) continue; if (queue_delayed_work(kmpath_handlerd, &pgpath->activate_path, pg_init_delay)) - m->pg_init_in_progress++; + paths->pg_init_in_progress++; } - return m->pg_init_in_progress; + return paths->pg_init_in_progress; } static void __switch_pg(struct multipath *m, struct pgpath *pgpath) { - m->current_pg = pgpath->pg; + struct multipath_paths *paths = rcu_dereference(m->paths); + + rcu_assign_pointer(paths->current_pg, pgpath->pg); /* Must we initialise the PG first, and queue I/O till it's ready? */ if (m->hw_handler_name) { - m->pg_init_required = 1; - m->queue_io = 1; + paths->pg_init_required = true; + paths->queue_io = true; } else { - m->pg_init_required = 0; - m->queue_io = 0; + paths->pg_init_required = false; + paths->queue_io = false; } + // FIXME: move to 'struct multipath_paths' m->pg_init_count = 0; } -static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg, - size_t nr_bytes) +static struct pgpath *choose_path_in_pg(struct multipath *m, + struct priority_group *pg, + size_t nr_bytes, bool *changed) { + unsigned long flags; struct dm_path *path; + unsigned repeat_count; + struct pgpath *pgpath = NULL; + struct multipath_paths *paths = rcu_dereference(m->paths); - path = pg->ps.type->select_path(&pg->ps, &m->repeat_count, nr_bytes); - if (!path) - return -ENXIO; + // FIXME: path selectors aren't lockless... + spin_lock_irqsave(&m->lock, flags); + path = pg->ps.type->select_path(&pg->ps, &repeat_count, nr_bytes); + if (!path) { + spin_unlock_irqrestore(&m->lock, flags); + return ERR_PTR(-ENXIO); + } + spin_unlock_irqrestore(&m->lock, flags); - m->current_pgpath = path_to_pgpath(path); + percpu_counter_set(&m->repeat_count, repeat_count); + pgpath = path_to_pgpath(path); - if (m->current_pg != pg) - __switch_pg(m, m->current_pgpath); + if (unlikely(rcu_dereference(paths->current_pg) != pg)) { + /* Only update current_pgpath if pg changed */ + rcu_assign_pointer(paths->current_pgpath, pgpath); + *changed = true; + spin_lock_irqsave(&m->lock, flags); + __switch_pg(m, pgpath); + spin_unlock_irqrestore(&m->lock, flags); + } - return 0; + return pgpath; } -static void __choose_pgpath(struct multipath *m, size_t nr_bytes) +static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes, bool *changed) { + struct multipath_paths *paths; struct priority_group *pg; + struct pgpath *pgpath = NULL; unsigned bypassed = 1; - if (!m->nr_valid_paths) { - m->queue_io = 0; + WARN_ON_ONCE(!rcu_read_lock_held()); + + paths = rcu_dereference(m->paths); + + if (!paths->nr_valid_paths) { + paths->queue_io = false; + *changed = true; goto failed; } /* Were we instructed to switch PG? */ - if (m->next_pg) { - pg = m->next_pg; - m->next_pg = NULL; - if (!__choose_path_in_pg(m, pg, nr_bytes)) - return; + pg = rcu_dereference(paths->next_pg); + if (pg) { + rcu_assign_pointer(paths->next_pg, NULL); + pgpath = choose_path_in_pg(m, pg, nr_bytes, changed); + if (!IS_ERR_OR_NULL(pgpath)) + return pgpath; } /* Don't change PG until it has no remaining paths */ - if (m->current_pg && !__choose_path_in_pg(m, m->current_pg, nr_bytes)) - return; + pg = rcu_dereference(paths->current_pg); + if (pg) { + pgpath = choose_path_in_pg(m, pg, nr_bytes, changed); + if (!IS_ERR_OR_NULL(pgpath)) + return pgpath; + } /* * Loop through priority groups until we find a valid path. @@ -361,20 +418,26 @@ static void __choose_pgpath(struct multipath *m, size_t nr_bytes) * pg_init_delay_retry so we do not hammer controllers. */ do { - list_for_each_entry(pg, &m->priority_groups, list) { - if (pg->bypassed == bypassed) + list_for_each_entry_rcu(pg, &paths->priority_groups, list) { + if (pg->bypassed == !!bypassed) continue; - if (!__choose_path_in_pg(m, pg, nr_bytes)) { - if (!bypassed) - m->pg_init_delay_retry = 1; - return; + pgpath = choose_path_in_pg(m, pg, nr_bytes, changed); + if (!IS_ERR_OR_NULL(pgpath)) { + if (!bypassed) { + paths->pg_init_delay_retry = true; + *changed = true; + } + return pgpath; } } } while (bypassed--); failed: - m->current_pgpath = NULL; - m->current_pg = NULL; + rcu_assign_pointer(paths->current_pgpath, NULL); + rcu_assign_pointer(paths->current_pg, NULL); + *changed = true; + + return NULL; } /* @@ -390,8 +453,12 @@ failed: */ static int __must_push_back(struct multipath *m) { - return (m->queue_if_no_path || - (m->queue_if_no_path != m->saved_queue_if_no_path && + struct multipath_paths *paths = rcu_dereference(m->paths); + + WARN_ON_ONCE(!rcu_read_lock_held()); + + return (paths->queue_if_no_path || + (paths->queue_if_no_path != paths->saved_queue_if_no_path && dm_noflush_suspending(m->ti))); } @@ -403,31 +470,42 @@ static int __multipath_map(struct dm_target *ti, struct request *clone, struct request *rq, struct request **__clone) { struct multipath *m = ti->private; + struct multipath_paths *paths; int r = DM_MAPIO_REQUEUE; size_t nr_bytes = clone ? blk_rq_bytes(clone) : blk_rq_bytes(rq); struct pgpath *pgpath; struct block_device *bdev; struct dm_mpath_io *mpio; + bool sync_rcu = false; - spin_lock_irq(&m->lock); + rcu_read_lock(); + paths = rcu_dereference(m->paths); /* Do we need to select a new pgpath? */ - if (!m->current_pgpath || - (!m->queue_io && (m->repeat_count && --m->repeat_count == 0))) - __choose_pgpath(m, nr_bytes); - - pgpath = m->current_pgpath; + pgpath = rcu_dereference(paths->current_pgpath); + if (!pgpath) + pgpath = choose_pgpath(m, nr_bytes, &sync_rcu); + else if (!paths->queue_io) { + percpu_counter_dec(&m->repeat_count); + if (percpu_counter_read_positive(&m->repeat_count) == 0) + pgpath = choose_pgpath(m, nr_bytes, &sync_rcu); + } if (!pgpath) { if (!__must_push_back(m)) r = -EIO; /* Failed */ goto out_unlock; - } else if (m->queue_io || m->pg_init_required) { + } else if (paths->queue_io || paths->pg_init_required) { + spin_lock_irq(&m->lock); __pg_init_all_paths(m); + spin_unlock_irq(&m->lock); + sync_rcu = true; goto out_unlock; } - spin_unlock_irq(&m->lock); + rcu_read_unlock(); + if (sync_rcu) + synchronize_rcu(); mpio = set_mpio(m, map_context); if (!mpio) @@ -472,7 +550,9 @@ static int __multipath_map(struct dm_target *ti, struct request *clone, return DM_MAPIO_REMAPPED; out_unlock: - spin_unlock_irq(&m->lock); + rcu_read_unlock(); + if (sync_rcu) + synchronize_rcu(); return r; } @@ -498,24 +578,21 @@ static void multipath_release_clone(struct request *clone) /* * If we run out of usable paths, should we queue I/O or error it? */ -static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path, - unsigned save_old_value) +static void queue_if_no_path(struct multipath *m, bool queue_if_no_path, + bool save_old_value) { - unsigned long flags; - - spin_lock_irqsave(&m->lock, flags); + struct multipath_paths *paths = rcu_dereference(m->paths); if (save_old_value) - m->saved_queue_if_no_path = m->queue_if_no_path; + paths->saved_queue_if_no_path = paths->queue_if_no_path; else - m->saved_queue_if_no_path = queue_if_no_path; - m->queue_if_no_path = queue_if_no_path; - spin_unlock_irqrestore(&m->lock, flags); + paths->saved_queue_if_no_path = queue_if_no_path; + paths->queue_if_no_path = queue_if_no_path; if (!queue_if_no_path) dm_table_run_md_queue_async(m->ti->table); - return 0; + synchronize_rcu(); } /* @@ -602,10 +679,10 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps goto bad; } - if (m->retain_attached_hw_handler || m->hw_handler_name) + if (m->paths->retain_attached_hw_handler || m->hw_handler_name) q = bdev_get_queue(p->path.dev->bdev); - if (m->retain_attached_hw_handler) { + if (m->paths->retain_attached_hw_handler) { retain: attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL); if (attached_handler_name) { @@ -805,12 +882,12 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m) argc--; if (!strcasecmp(arg_name, "queue_if_no_path")) { - r = queue_if_no_path(m, 1, 0); + queue_if_no_path(m, 1, 0); continue; } if (!strcasecmp(arg_name, "retain_attached_hw_handler")) { - m->retain_attached_hw_handler = 1; + m->paths->retain_attached_hw_handler = true; continue; } @@ -868,7 +945,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, if (r) goto bad; - r = dm_read_arg(_args, &as, &m->nr_priority_groups, &ti->error); + r = dm_read_arg(_args, &as, &m->paths->nr_priority_groups, &ti->error); if (r) goto bad; @@ -876,8 +953,8 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, if (r) goto bad; - if ((!m->nr_priority_groups && next_pg_num) || - (m->nr_priority_groups && !next_pg_num)) { + if ((!m->paths->nr_priority_groups && next_pg_num) || + (m->paths->nr_priority_groups && !next_pg_num)) { ti->error = "invalid initial priority group"; r = -EINVAL; goto bad; @@ -893,15 +970,15 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, goto bad; } - m->nr_valid_paths += pg->nr_pgpaths; - list_add_tail(&pg->list, &m->priority_groups); + m->paths->nr_valid_paths += pg->nr_pgpaths; + list_add_tail(&pg->list, &m->paths->priority_groups); pg_count++; pg->pg_num = pg_count; if (!--next_pg_num) - m->next_pg = pg; + m->paths->next_pg = pg; } - if (pg_count != m->nr_priority_groups) { + if (pg_count != m->paths->nr_priority_groups) { ti->error = "priority group count mismatch"; r = -EINVAL; goto bad; @@ -922,20 +999,21 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, static void multipath_wait_for_pg_init_completion(struct multipath *m) { + struct multipath_paths *paths; DECLARE_WAITQUEUE(wait, current); - unsigned long flags; add_wait_queue(&m->pg_init_wait, &wait); while (1) { set_current_state(TASK_UNINTERRUPTIBLE); - spin_lock_irqsave(&m->lock, flags); - if (!m->pg_init_in_progress) { - spin_unlock_irqrestore(&m->lock, flags); + rcu_read_lock(); + paths = rcu_dereference(m->paths); + if (!paths->pg_init_in_progress) { + rcu_read_unlock(); break; } - spin_unlock_irqrestore(&m->lock, flags); + rcu_read_unlock(); io_schedule(); } @@ -946,20 +1024,18 @@ static void multipath_wait_for_pg_init_completion(struct multipath *m) static void flush_multipath_work(struct multipath *m) { - unsigned long flags; + struct multipath_paths *paths = rcu_dereference(m->paths); - spin_lock_irqsave(&m->lock, flags); - m->pg_init_disabled = 1; - spin_unlock_irqrestore(&m->lock, flags); + paths->pg_init_disabled = true; + synchronize_rcu(); flush_workqueue(kmpath_handlerd); multipath_wait_for_pg_init_completion(m); flush_workqueue(kmultipathd); flush_work(&m->trigger_event); - spin_lock_irqsave(&m->lock, flags); - m->pg_init_disabled = 0; - spin_unlock_irqrestore(&m->lock, flags); + paths->pg_init_disabled = false; + synchronize_rcu(); } static void multipath_dtr(struct dm_target *ti) @@ -976,7 +1052,12 @@ static void multipath_dtr(struct dm_target *ti) static int fail_path(struct pgpath *pgpath) { unsigned long flags; + struct multipath_paths *paths; struct multipath *m = pgpath->pg->m; + bool sync_rcu = false; + + rcu_read_lock(); + paths = rcu_dereference(m->paths); spin_lock_irqsave(&m->lock, flags); @@ -986,21 +1067,24 @@ static int fail_path(struct pgpath *pgpath) DMWARN("Failing path %s.", pgpath->path.dev->name); pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path); - pgpath->is_active = 0; + pgpath->is_active = false; pgpath->fail_count++; - m->nr_valid_paths--; + paths->nr_valid_paths--; + sync_rcu = true; - if (pgpath == m->current_pgpath) - m->current_pgpath = NULL; + if (pgpath == rcu_dereference(paths->current_pgpath)) + rcu_assign_pointer(paths->current_pgpath, NULL); dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti, - pgpath->path.dev->name, m->nr_valid_paths); + pgpath->path.dev->name, paths->nr_valid_paths); schedule_work(&m->trigger_event); - out: spin_unlock_irqrestore(&m->lock, flags); + rcu_read_unlock(); + if (sync_rcu) + synchronize_rcu(); return 0; } @@ -1010,9 +1094,15 @@ out: */ static int reinstate_path(struct pgpath *pgpath) { - int r = 0, run_queue = 0; + int r = 0; unsigned long flags; struct multipath *m = pgpath->pg->m; + struct multipath_paths *paths; + bool run_queue = false; + bool sync_rcu = false; + + rcu_read_lock(); + paths = rcu_dereference(m->paths); spin_lock_irqsave(&m->lock, flags); @@ -1030,25 +1120,28 @@ static int reinstate_path(struct pgpath *pgpath) if (r) goto out; - pgpath->is_active = 1; + pgpath->is_active = true; - if (!m->nr_valid_paths++) { - m->current_pgpath = NULL; - run_queue = 1; - } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) { + if (!paths->nr_valid_paths++) { + rcu_assign_pointer(paths->current_pgpath, NULL); + run_queue = true; + } else if (m->hw_handler_name && (rcu_dereference(paths->current_pg) == pgpath->pg)) { if (queue_work(kmpath_handlerd, &pgpath->activate_path.work)) - m->pg_init_in_progress++; + paths->pg_init_in_progress++; } + sync_rcu = true; dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti, - pgpath->path.dev->name, m->nr_valid_paths); + pgpath->path.dev->name, paths->nr_valid_paths); schedule_work(&m->trigger_event); - out: spin_unlock_irqrestore(&m->lock, flags); if (run_queue) dm_table_run_md_queue_async(m->ti->table); + rcu_read_unlock(); + if (sync_rcu) + synchronize_rcu(); return r; } @@ -1062,9 +1155,10 @@ static int action_dev(struct multipath *m, struct dm_dev *dev, int r = -EINVAL; struct pgpath *pgpath; struct priority_group *pg; + struct multipath_paths *paths = rcu_dereference(m->paths); - list_for_each_entry(pg, &m->priority_groups, list) { - list_for_each_entry(pgpath, &pg->pgpaths, list) { + list_for_each_entry_rcu(pg, &paths->priority_groups, list) { + list_for_each_entry_rcu(pgpath, &pg->pgpaths, list) { if (pgpath->path.dev == dev) r = action(pgpath); } @@ -1077,17 +1171,19 @@ static int action_dev(struct multipath *m, struct dm_dev *dev, * Temporarily try to avoid having to use the specified PG */ static void bypass_pg(struct multipath *m, struct priority_group *pg, - int bypassed) + bool bypassed) { unsigned long flags; + struct multipath_paths *paths = rcu_dereference(m->paths); spin_lock_irqsave(&m->lock, flags); pg->bypassed = bypassed; - m->current_pgpath = NULL; - m->current_pg = NULL; + rcu_assign_pointer(paths->current_pgpath, NULL); + rcu_assign_pointer(paths->current_pg, NULL); spin_unlock_irqrestore(&m->lock, flags); + synchronize_rcu(); schedule_work(&m->trigger_event); } @@ -1099,26 +1195,25 @@ static int switch_pg_num(struct multipath *m, const char *pgstr) { struct priority_group *pg; unsigned pgnum; - unsigned long flags; char dummy; + struct multipath_paths *paths = rcu_dereference(m->paths); if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum || - (pgnum > m->nr_priority_groups)) { + (pgnum > paths->nr_priority_groups)) { DMWARN("invalid PG number supplied to switch_pg_num"); return -EINVAL; } - spin_lock_irqsave(&m->lock, flags); - list_for_each_entry(pg, &m->priority_groups, list) { - pg->bypassed = 0; + list_for_each_entry_rcu(pg, &paths->priority_groups, list) { + pg->bypassed = false; if (--pgnum) continue; - m->current_pgpath = NULL; - m->current_pg = NULL; - m->next_pg = pg; + rcu_assign_pointer(paths->current_pgpath, NULL); + rcu_assign_pointer(paths->current_pg, NULL); + rcu_assign_pointer(paths->next_pg, pg); } - spin_unlock_irqrestore(&m->lock, flags); + synchronize_rcu(); schedule_work(&m->trigger_event); return 0; @@ -1128,22 +1223,27 @@ static int switch_pg_num(struct multipath *m, const char *pgstr) * Set/clear bypassed status of a PG. * PGs are numbered upwards from 1 in the order they were declared. */ -static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed) +static int bypass_pg_num(struct multipath *m, const char *pgstr, bool bypassed) { struct priority_group *pg; unsigned pgnum; char dummy; + struct multipath_paths *paths; + + rcu_read_lock(); + paths = rcu_dereference(m->paths); if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum || - (pgnum > m->nr_priority_groups)) { + (pgnum > paths->nr_priority_groups)) { DMWARN("invalid PG number supplied to bypass_pg"); return -EINVAL; } - list_for_each_entry(pg, &m->priority_groups, list) { + list_for_each_entry_rcu(pg, &paths->priority_groups, list) { if (!--pgnum) break; } + rcu_read_unlock(); bypass_pg(m, pg, bypassed); return 0; @@ -1152,20 +1252,27 @@ static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed) /* * Should we retry pg_init immediately? */ -static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath) +static bool pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath) { unsigned long flags; - int limit_reached = 0; + struct multipath_paths *paths; + bool limit_reached = false, sync_rcu = false; - spin_lock_irqsave(&m->lock, flags); - - if (m->pg_init_count <= m->pg_init_retries && !m->pg_init_disabled) - m->pg_init_required = 1; - else - limit_reached = 1; + rcu_read_lock(); + paths = rcu_dereference(m->paths); + spin_lock_irqsave(&m->lock, flags); + if (m->pg_init_count <= m->pg_init_retries && !paths->pg_init_disabled) { + paths->pg_init_required = true; + sync_rcu = true; + } else + limit_reached = true; spin_unlock_irqrestore(&m->lock, flags); + rcu_read_unlock(); + if (sync_rcu) + synchronize_rcu(); + return limit_reached; } @@ -1174,8 +1281,9 @@ static void pg_init_done(void *data, int errors) struct pgpath *pgpath = data; struct priority_group *pg = pgpath->pg; struct multipath *m = pg->m; + struct multipath_paths *paths; unsigned long flags; - unsigned delay_retry = 0; + bool delay_retry = false; /* device or driver problems */ switch (errors) { @@ -1202,7 +1310,7 @@ static void pg_init_done(void *data, int errors) break; case SCSI_DH_RETRY: /* Wait before retrying. */ - delay_retry = 1; + delay_retry = true; case SCSI_DH_IMM_RETRY: case SCSI_DH_RES_TEMP_UNAVAIL: if (pg_init_limit_reached(m, pgpath)) @@ -1218,34 +1326,36 @@ static void pg_init_done(void *data, int errors) fail_path(pgpath); } + paths = rcu_dereference(m->paths); + spin_lock_irqsave(&m->lock, flags); if (errors) { - if (pgpath == m->current_pgpath) { + if (pgpath == rcu_dereference(paths->current_pgpath)) { DMERR("Could not failover device. Error %d.", errors); - m->current_pgpath = NULL; - m->current_pg = NULL; + rcu_assign_pointer(paths->current_pgpath, NULL); + rcu_assign_pointer(paths->current_pg, NULL); } - } else if (!m->pg_init_required) - pg->bypassed = 0; + } else if (!paths->pg_init_required) + pg->bypassed = false; - if (--m->pg_init_in_progress) + if (--paths->pg_init_in_progress) /* Activations of other paths are still on going */ goto out; - if (m->pg_init_required) { - m->pg_init_delay_retry = delay_retry; + if (paths->pg_init_required) { + paths->pg_init_delay_retry = delay_retry; if (__pg_init_all_paths(m)) goto out; } - m->queue_io = 0; + paths->queue_io = false; /* * Wake up any thread waiting to suspend. */ wake_up(&m->pg_init_wait); - out: spin_unlock_irqrestore(&m->lock, flags); + synchronize_rcu(); } static void activate_path(struct work_struct *work) @@ -1292,8 +1402,8 @@ static int do_end_io(struct multipath *m, struct request *clone, * request into dm core, which will remake a clone request and * clone bios for it and resubmit it later. */ + struct multipath_paths *paths; int r = DM_ENDIO_REQUEUE; - unsigned long flags; if (!error && !clone->errors) return 0; /* I/O complete */ @@ -1304,9 +1414,10 @@ static int do_end_io(struct multipath *m, struct request *clone, if (mpio->pgpath) fail_path(mpio->pgpath); - spin_lock_irqsave(&m->lock, flags); - if (!m->nr_valid_paths) { - if (!m->queue_if_no_path) { + rcu_read_lock(); + paths = rcu_dereference(m->paths); + if (!paths->nr_valid_paths) { + if (!paths->queue_if_no_path) { if (!__must_push_back(m)) r = -EIO; } else { @@ -1314,7 +1425,7 @@ static int do_end_io(struct multipath *m, struct request *clone, r = error; } } - spin_unlock_irqrestore(&m->lock, flags); + rcu_read_unlock(); return r; } @@ -1370,11 +1481,10 @@ static void multipath_postsuspend(struct dm_target *ti) static void multipath_resume(struct dm_target *ti) { struct multipath *m = ti->private; - unsigned long flags; + struct multipath_paths *paths = rcu_dereference(m->paths); - spin_lock_irqsave(&m->lock, flags); - m->queue_if_no_path = m->saved_queue_if_no_path; - spin_unlock_irqrestore(&m->lock, flags); + paths->queue_if_no_path = paths->saved_queue_if_no_path; + synchronize_rcu(); } /* @@ -1399,28 +1509,32 @@ static void multipath_status(struct dm_target *ti, status_type_t type, int sz = 0; unsigned long flags; struct multipath *m = ti->private; - struct priority_group *pg; + struct multipath_paths *paths; + struct priority_group *pg, *current_pg; struct pgpath *p; unsigned pg_num; char state; + rcu_read_lock(); + paths = rcu_dereference(m->paths); + spin_lock_irqsave(&m->lock, flags); /* Features */ if (type == STATUSTYPE_INFO) - DMEMIT("2 %u %u ", m->queue_io, m->pg_init_count); + DMEMIT("2 %u %u ", (paths->queue_io ? 1 : 0), m->pg_init_count); else { - DMEMIT("%u ", m->queue_if_no_path + + DMEMIT("%u ", (paths->queue_if_no_path ? 1 : 0) + (m->pg_init_retries > 0) * 2 + (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2 + - m->retain_attached_hw_handler); - if (m->queue_if_no_path) + (m->paths->retain_attached_hw_handler ? 1 : 0)); + if (paths->queue_if_no_path) DMEMIT("queue_if_no_path "); if (m->pg_init_retries) DMEMIT("pg_init_retries %u ", m->pg_init_retries); if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs); - if (m->retain_attached_hw_handler) + if (m->paths->retain_attached_hw_handler) DMEMIT("retain_attached_hw_handler "); } @@ -1429,23 +1543,25 @@ static void multipath_status(struct dm_target *ti, status_type_t type, else DMEMIT("1 %s ", m->hw_handler_name); - DMEMIT("%u ", m->nr_priority_groups); + DMEMIT("%u ", paths->nr_priority_groups); - if (m->next_pg) - pg_num = m->next_pg->pg_num; - else if (m->current_pg) - pg_num = m->current_pg->pg_num; + if (rcu_dereference(paths->next_pg)) + pg_num = rcu_dereference(paths->next_pg)->pg_num; + else if (rcu_dereference(paths->current_pg)) + pg_num = rcu_dereference(paths->current_pg)->pg_num; else - pg_num = (m->nr_priority_groups ? 1 : 0); + pg_num = (paths->nr_priority_groups ? 1 : 0); DMEMIT("%u ", pg_num); + current_pg = rcu_dereference(paths->current_pg); + switch (type) { case STATUSTYPE_INFO: - list_for_each_entry(pg, &m->priority_groups, list) { + list_for_each_entry_rcu(pg, &paths->priority_groups, list) { if (pg->bypassed) state = 'D'; /* Disabled */ - else if (pg == m->current_pg) + else if (pg == current_pg) state = 'A'; /* Currently Active */ else state = 'E'; /* Enabled */ @@ -1462,7 +1578,7 @@ static void multipath_status(struct dm_target *ti, status_type_t type, DMEMIT("%u %u ", pg->nr_pgpaths, pg->ps.type->info_args); - list_for_each_entry(p, &pg->pgpaths, list) { + list_for_each_entry_rcu(p, &pg->pgpaths, list) { DMEMIT("%s %s %u ", p->path.dev->name, p->is_active ? "A" : "F", p->fail_count); @@ -1475,7 +1591,7 @@ static void multipath_status(struct dm_target *ti, status_type_t type, break; case STATUSTYPE_TABLE: - list_for_each_entry(pg, &m->priority_groups, list) { + list_for_each_entry_rcu(pg, &paths->priority_groups, list) { DMEMIT("%s ", pg->ps.type->name); if (pg->ps.type->status) @@ -1488,7 +1604,7 @@ static void multipath_status(struct dm_target *ti, status_type_t type, DMEMIT("%u %u ", pg->nr_pgpaths, pg->ps.type->table_args); - list_for_each_entry(p, &pg->pgpaths, list) { + list_for_each_entry_rcu(p, &pg->pgpaths, list) { DMEMIT("%s ", p->path.dev->name); if (pg->ps.type->status) sz += pg->ps.type->status(&pg->ps, @@ -1500,6 +1616,7 @@ static void multipath_status(struct dm_target *ti, status_type_t type, } spin_unlock_irqrestore(&m->lock, flags); + rcu_read_unlock(); } static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) @@ -1518,10 +1635,10 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) if (argc == 1) { if (!strcasecmp(argv[0], "queue_if_no_path")) { - r = queue_if_no_path(m, 1, 0); + queue_if_no_path(m, 1, 0); goto out; } else if (!strcasecmp(argv[0], "fail_if_no_path")) { - r = queue_if_no_path(m, 0, 0); + queue_if_no_path(m, 0, 0); goto out; } } @@ -1566,21 +1683,26 @@ out: } static int multipath_prepare_ioctl(struct dm_target *ti, - struct block_device **bdev, fmode_t *mode) + struct block_device **bdev, fmode_t *mode) { struct multipath *m = ti->private; + struct multipath_paths *paths; + struct pgpath *current_pgpath; unsigned long flags; + bool sync_rcu = false; int r; - spin_lock_irqsave(&m->lock, flags); + rcu_read_lock(); + paths = rcu_dereference(m->paths); + current_pgpath = rcu_dereference(paths->current_pgpath); - if (!m->current_pgpath) - __choose_pgpath(m, 0); + if (!current_pgpath) + current_pgpath = choose_pgpath(m, 0, &sync_rcu); - if (m->current_pgpath) { - if (!m->queue_io) { - *bdev = m->current_pgpath->path.dev->bdev; - *mode = m->current_pgpath->path.dev->mode; + if (current_pgpath) { + if (!paths->queue_io) { + *bdev = current_pgpath->path.dev->bdev; + *mode = current_pgpath->path.dev->mode; r = 0; } else { /* pg_init has not started or completed */ @@ -1588,25 +1710,28 @@ static int multipath_prepare_ioctl(struct dm_target *ti, } } else { /* No path is available */ - if (m->queue_if_no_path) + if (paths->queue_if_no_path) r = -ENOTCONN; else r = -EIO; } - spin_unlock_irqrestore(&m->lock, flags); - if (r == -ENOTCONN) { - spin_lock_irqsave(&m->lock, flags); - if (!m->current_pg) { + if (!rcu_dereference(paths->current_pg)) { /* Path status changed, redo selection */ - __choose_pgpath(m, 0); + (void) choose_pgpath(m, 0, &sync_rcu); } - if (m->pg_init_required) + if (paths->pg_init_required) { + spin_lock_irqsave(&m->lock, flags); __pg_init_all_paths(m); - spin_unlock_irqrestore(&m->lock, flags); + spin_unlock_irqrestore(&m->lock, flags); + sync_rcu = true; + } dm_table_run_md_queue_async(m->ti->table); } + rcu_read_unlock(); + if (sync_rcu) + synchronize_rcu(); /* * Only pass ioctls through if the device sizes match exactly. @@ -1620,19 +1745,23 @@ static int multipath_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) { struct multipath *m = ti->private; + struct multipath_paths *paths; struct priority_group *pg; struct pgpath *p; int ret = 0; - list_for_each_entry(pg, &m->priority_groups, list) { - list_for_each_entry(p, &pg->pgpaths, list) { + rcu_read_lock(); + paths = rcu_dereference(m->paths); + + list_for_each_entry_rcu(pg, &paths->priority_groups, list) { + list_for_each_entry_rcu(p, &pg->pgpaths, list) { ret = fn(ti, p->path.dev, ti->begin, ti->len, data); if (ret) goto out; } } - out: + rcu_read_unlock(); return ret; } @@ -1653,25 +1782,26 @@ static int __pgpath_busy(struct pgpath *pgpath) */ static int multipath_busy(struct dm_target *ti) { - int busy = 0, has_active = 0; + bool busy = false, has_active = false; struct multipath *m = ti->private; + struct multipath_paths *paths; struct priority_group *pg; struct pgpath *pgpath; - unsigned long flags; - spin_lock_irqsave(&m->lock, flags); + rcu_read_lock(); + paths = rcu_dereference(m->paths); /* pg_init in progress or no paths available */ - if (m->pg_init_in_progress || - (!m->nr_valid_paths && m->queue_if_no_path)) { - busy = 1; + if (paths->pg_init_in_progress || + (!paths->nr_valid_paths && paths->queue_if_no_path)) { + busy = true; goto out; } /* Guess which priority_group will be used at next mapping time */ - if (unlikely(!m->current_pgpath && m->next_pg)) - pg = m->next_pg; - else if (likely(m->current_pg)) - pg = m->current_pg; + if (unlikely(!rcu_dereference(paths->current_pgpath) && rcu_dereference(paths->next_pg))) + pg = rcu_dereference(paths->next_pg); + else if (likely(rcu_dereference(paths->current_pg))) + pg = rcu_dereference(paths->current_pg); else /* * We don't know which pg will be used at next mapping time. @@ -1686,13 +1816,13 @@ static int multipath_busy(struct dm_target *ti) * If there is one non-busy active path at least, the path selector * will be able to select it. So we consider such a pg as not busy. */ - busy = 1; - list_for_each_entry(pgpath, &pg->pgpaths, list) + busy = true; + list_for_each_entry_rcu(pgpath, &pg->pgpaths, list) if (pgpath->is_active) { - has_active = 1; + has_active = true; if (!__pgpath_busy(pgpath)) { - busy = 0; + busy = false; break; } } @@ -1703,10 +1833,10 @@ static int multipath_busy(struct dm_target *ti) * the current_pg will be changed at next mapping time. * We need to try mapping to determine it. */ - busy = 0; + busy = false; out: - spin_unlock_irqrestore(&m->lock, flags); + rcu_read_unlock(); return busy; } |
