diff options
| author | Mike Snitzer <snitzer@redhat.com> | 2014-03-21 18:33:41 -0400 |
|---|---|---|
| committer | Mike Snitzer <snitzer@redhat.com> | 2014-03-28 14:42:41 -0400 |
| commit | a43260f01a3b6f5ef33d0abc86e9b0a92096cd84 (patch) | |
| tree | 804a5bcc18f695787c019c99d4f4574f6770aca4 | |
| parent | 844ea73aff9650585e9639b8cc930e151393b480 (diff) | |
| download | linux-dm-a43260f01a3b6f5ef33d0abc86e9b0a92096cd84.tar.gz | |
dm thin: sort the per thin deferred bios using an rb_tree
Notice: this object is not reachable from any branch.
The following illustrates the realized gains/potential offered by
sorting the deferred_bios_list. The collection of .txt files is the
thin_dump output generated for various io and block sizes using the
device-mapper-test-suite's 'multithreaded_layout_reread_throughput'
test. These output files provide a high-level summary of the on-disk
layout's efficiency (less lines is better as it implies more contiguous
block ranges):
unsorted:
234 thin_bs1024_io1024.txt
139 thin_bs1024_io128.txt
249 thin_bs1024_io256.txt
290 thin_bs1024_io512.txt
95 thin_bs128_io1024.txt
112 thin_bs128_io128.txt
98 thin_bs128_io256.txt
96 thin_bs128_io512.txt
103 thin_bs256_io1024.txt
99 thin_bs256_io128.txt
92 thin_bs256_io256.txt
1118 thin_bs256_io512.txt
133 thin_bs512_io1024.txt
148 thin_bs512_io128.txt
124 thin_bs512_io256.txt
133 thin_bs512_io512.txt
3263 total
sorted:
73 thin_bs1024_io1024.txt
72 thin_bs1024_io128.txt
65 thin_bs1024_io256.txt
75 thin_bs1024_io512.txt
76 thin_bs128_io1024.txt
84 thin_bs128_io128.txt
75 thin_bs128_io256.txt
79 thin_bs128_io512.txt
76 thin_bs256_io1024.txt
72 thin_bs256_io128.txt
71 thin_bs256_io256.txt
65 thin_bs256_io512.txt
76 thin_bs512_io1024.txt
74 thin_bs512_io128.txt
68 thin_bs512_io256.txt
73 thin_bs512_io512.txt
1174 total
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Notice: this object is not reachable from any branch.
| -rw-r--r-- | drivers/md/dm-thin.c | 84 |
1 files changed, 84 insertions, 0 deletions
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 0eda29a035c81..ccafeb4caa6b5 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -16,6 +16,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/slab.h> +#include <linux/rbtree.h> #define DM_MSG_PREFIX "thin" @@ -230,6 +231,7 @@ struct thin_c { spinlock_t lock; struct bio_list deferred_bio_list; struct bio_list retry_on_resume_list; + struct rb_root sort_bio_list; /* sorted list of deferred bios */ }; /*----------------------------------------------------------------*/ @@ -371,6 +373,7 @@ struct dm_thin_endio_hook { struct dm_deferred_entry *shared_read_entry; struct dm_deferred_entry *all_io_entry; struct dm_thin_new_mapping *overwrite_mapping; + struct rb_node rb_node; }; static void requeue_bio_list(struct thin_c *tc, struct bio_list *master) @@ -1367,12 +1370,77 @@ static int need_commit_due_to_time(struct pool *pool) jiffies > pool->last_commit_jiffies + COMMIT_PERIOD; } +#define thin_pbd(node) rb_entry((node), struct dm_thin_endio_hook, rb_node) +#define thin_bio(pbd) dm_bio_from_per_bio_data((pbd), sizeof(struct dm_thin_endio_hook)) + +static void __thin_bio_rb_add(struct thin_c *tc, struct bio *bio) +{ + struct rb_node **rbp, *parent; + struct dm_thin_endio_hook *pbd; + sector_t bi_sector = bio->bi_iter.bi_sector; + + rbp = &tc->sort_bio_list.rb_node; + parent = NULL; + while (*rbp) { + parent = *rbp; + pbd = thin_pbd(parent); + + if (bi_sector < thin_bio(pbd)->bi_iter.bi_sector) + rbp = &(*rbp)->rb_left; + else + rbp = &(*rbp)->rb_right; + } + + pbd = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); + rb_link_node(&pbd->rb_node, parent, rbp); + rb_insert_color(&pbd->rb_node, &tc->sort_bio_list); +} + +static void __extract_sorted_bios(struct thin_c *tc) +{ + struct rb_node *node; + struct dm_thin_endio_hook *pbd; + struct bio *bio; + + for (node = rb_first(&tc->sort_bio_list); node; node = rb_next(node)) { + pbd = thin_pbd(node); + bio = thin_bio(pbd); + + bio_list_add(&tc->deferred_bio_list, bio); + rb_erase(&pbd->rb_node, &tc->sort_bio_list); + } + + WARN_ON(!RB_EMPTY_ROOT(&tc->sort_bio_list)); +} + +static void __sort_thin_deferred_bios(struct thin_c *tc) +{ + struct bio *bio; + struct bio_list bios; + + bio_list_init(&bios); + bio_list_merge(&bios, &tc->deferred_bio_list); + bio_list_init(&tc->deferred_bio_list); + + /* Sort deferred_bio_list using rb-tree */ + while ((bio = bio_list_pop(&bios))) + __thin_bio_rb_add(tc, bio); + + /* + * Transfer the sorted bios in sort_bio_list back to + * deferred_bio_list to allow lockless submission of + * all bios. + */ + __extract_sorted_bios(tc); +} + static void process_thin_deferred_bios(struct thin_c *tc) { struct pool *pool = tc->pool; unsigned long flags; struct bio *bio; struct bio_list bios; + struct blk_plug plug; if (tc->requeue_mode) { requeue_bio_list(tc, &tc->deferred_bio_list); @@ -1382,10 +1450,24 @@ static void process_thin_deferred_bios(struct thin_c *tc) bio_list_init(&bios); spin_lock_irqsave(&tc->lock, flags); + + if (bio_list_empty(&tc->deferred_bio_list)) { + spin_unlock_irqrestore(&tc->lock, flags); + return; + } + + /* + * FIXME: allow sorting to be enabled/disabled via ctr and/or + * message (and auto-disable if data device is non-rotational?) + */ + __sort_thin_deferred_bios(tc); + bio_list_merge(&bios, &tc->deferred_bio_list); bio_list_init(&tc->deferred_bio_list); + spin_unlock_irqrestore(&tc->lock, flags); + blk_start_plug(&plug); while ((bio = bio_list_pop(&bios))) { /* * If we've got no free new_mapping structs, and processing @@ -1405,6 +1487,7 @@ static void process_thin_deferred_bios(struct thin_c *tc) else pool->process_bio(tc, bio); } + blk_finish_plug(&plug); } static void process_deferred_bios(struct pool *pool) @@ -3040,6 +3123,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) spin_lock_init(&tc->lock); bio_list_init(&tc->deferred_bio_list); bio_list_init(&tc->retry_on_resume_list); + tc->sort_bio_list = RB_ROOT; if (argc == 3) { r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev); |
