aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.com>2017-09-06 09:43:28 +1000
committerMike Snitzer <snitzer@redhat.com>2017-11-21 11:26:43 -0500
commitb46d6a08f1ae7bf53e4cde28e0ccdf91567d432e (patch)
tree93bf351e1d2a4f17385fcc9c24b7a9e64443a3fa
parent16b219bcc263fff7157cc24b45615231f774053c (diff)
downloadlinux-dm-b46d6a08f1ae7bf53e4cde28e0ccdf91567d432e.tar.gz
dm: ensure bio submission follows a depth-first tree walk
Notice: this object is not reachable from any branch.
A dm device can, in general, represent a tree of targets, each of which handles a sub-range of the range of blocks handled by the parent. The bio sequencing managed by generic_make_request() requires that bios are generated and handled in a depth-first manner. Each call to a make_request_fn() may submit bios to a single member device, and may submit bios for a reduced region of the same device as the make_request_fn. In particular, any bios submitted to member devices must be expected to be processed in order, so a later one must never wait for an earlier one. This ordering is usually achieved by using bio_split() to reduce a bio to a size that can be completely handled by one target, and resubmitting the remainder to the originating device. bio_queue_split() shows the canonical approach. dm doesn't follow this approach, largely because it has needed to split bios since long before bio_split() was available. It currently can submit bios to separate targets within the one dm_make_request() call. Dependencies between these targets, as can happen with dm-snap, can cause deadlocks if either bios gets stuck behind the other in the queues managed by generic_make_request(). This requires the 'rescue' functionality provided by dm_offload_{start,end}. Some of this requirement can be removed by changing the order of bio submission to follow the canonical approach. That is, if dm finds that it needs to split a bio, the remainder should be sent to generic_make_request() rather than being handled immediately. This delays the handling until the first part is completely processed, so the deadlock problems do not occur. __split_and_process_bio() can be called both from dm_make_request() and from dm_wq_work(). When called from dm_wq_work() the current approach is perfectly satisfactory as each bio will be processed immediately. When called from dm_make_request(), current->bio_list will be non-NULL, and in this case it is best to create a separate "clone" bio for the remainder. To provide the clone bio when splitting, we use q->bio_split. This was previously being freed by bio-based dm to avoid having excess rescuer threads. As bio_split bio sets no longer create rescuer threads, there is little cost and much gain from restoring the q->bio_split bio set. Signed-off-by: NeilBrown <neilb@suse.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Notice: this object is not reachable from any branch.
-rw-r--r--drivers/md/dm.c26
1 files changed, 17 insertions, 9 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index fb9e6d808170a..b20febd6cbc7d 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1498,8 +1498,22 @@ static void __split_and_process_bio(struct mapped_device *md,
} else {
ci.bio = bio;
ci.sector_count = bio_sectors(bio);
- while (ci.sector_count && !error)
+ while (ci.sector_count && !error) {
error = __split_and_process_non_flush(&ci);
+ if (current->bio_list && ci.sector_count && !error) {
+ /*
+ * Remainder must be passed to generic_make_request()
+ * so that it gets handled *after* bios already submitted
+ * have been completely processed.
+ */
+ struct bio *b = bio_clone_bioset(bio, GFP_NOIO,
+ md->queue->bio_split);
+ bio_advance(b, (bio_sectors(b) - ci.sector_count) << 9);
+ bio_chain(b, bio);
+ generic_make_request(b);
+ break;
+ }
+ }
}
/* drop the extra reference count */
@@ -1510,8 +1524,8 @@ static void __split_and_process_bio(struct mapped_device *md,
*---------------------------------------------------------------*/
/*
- * The request function that just remaps the bio built up by
- * dm_merge_bvec.
+ * The request function that remaps the bio to one target and
+ * splits off any remainder.
*/
static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
{
@@ -2034,12 +2048,6 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
case DM_TYPE_DAX_BIO_BASED:
dm_init_normal_md_queue(md);
blk_queue_make_request(md->queue, dm_make_request);
- /*
- * DM handles splitting bios as needed. Free the bio_split bioset
- * since it won't be used (saves 1 process per bio-based DM device).
- */
- bioset_free(md->queue->bio_split);
- md->queue->bio_split = NULL;
if (type == DM_TYPE_DAX_BIO_BASED)
queue_flag_set_unlocked(QUEUE_FLAG_DAX, md->queue);