diff options
Diffstat (limited to 'fs/btrfs/raid56.c')
| -rw-r--r-- | fs/btrfs/raid56.c | 276 |
1 files changed, 137 insertions, 139 deletions
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 1e35a2327478..f3d0576dd327 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1,48 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2012 Fusion-io All rights reserved. * Copyright (C) 2012 Intel Corp. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. */ + #include <linux/sched.h> -#include <linux/wait.h> #include <linux/bio.h> #include <linux/slab.h> -#include <linux/buffer_head.h> #include <linux/blkdev.h> -#include <linux/random.h> -#include <linux/iocontext.h> -#include <linux/capability.h> -#include <linux/ratelimit.h> -#include <linux/kthread.h> #include <linux/raid/pq.h> #include <linux/hash.h> #include <linux/list_sort.h> #include <linux/raid/xor.h> #include <linux/mm.h> -#include <asm/div64.h> #include "ctree.h" -#include "extent_map.h" #include "disk-io.h" -#include "transaction.h" -#include "print-tree.h" #include "volumes.h" #include "raid56.h" #include "async-thread.h" -#include "check-integrity.h" -#include "rcu-string.h" /* set when additional merges to this rbio are not allowed */ #define RBIO_RMW_LOCKED_BIT 1 @@ -175,14 +150,18 @@ struct btrfs_raid_bio { * bitmap to record which horizontal stripe has data */ unsigned long *dbitmap; + + /* allocated with real_stripes-many pointers for finish_*() calls */ + void **finish_pointers; + + /* allocated with stripe_npages-many bits for finish_*() calls */ + unsigned long *finish_pbitmap; }; static int __raid56_parity_recover(struct btrfs_raid_bio *rbio); static noinline void finish_rmw(struct btrfs_raid_bio *rbio); static void rmw_work(struct btrfs_work *work); static void read_rebuild_work(struct btrfs_work *work); -static void async_rmw_stripe(struct btrfs_raid_bio *rbio); -static void async_read_rebuild(struct btrfs_raid_bio *rbio); static int fail_bio_stripe(struct btrfs_raid_bio *rbio, struct bio *bio); static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed); static void __free_raid_bio(struct btrfs_raid_bio *rbio); @@ -191,7 +170,13 @@ static int alloc_rbio_pages(struct btrfs_raid_bio *rbio); static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check); -static void async_scrub_parity(struct btrfs_raid_bio *rbio); +static void scrub_parity_work(struct btrfs_work *work); + +static void start_async_work(struct btrfs_raid_bio *rbio, btrfs_func_t work_func) +{ + btrfs_init_work(&rbio->work, btrfs_rmw_helper, work_func, NULL, NULL); + btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work); +} /* * the stripe hash table is used for locking, and to collect @@ -231,7 +216,6 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info) cur = h + i; INIT_LIST_HEAD(&cur->hash_list); spin_lock_init(&cur->lock); - init_waitqueue_head(&cur->wait); } x = cmpxchg(&info->stripe_hash_table, NULL, table); @@ -267,7 +251,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio) s = kmap(rbio->bio_pages[i]); d = kmap(rbio->stripe_pages[i]); - memcpy(d, s, PAGE_SIZE); + copy_page(d, s); kunmap(rbio->bio_pages[i]); kunmap(rbio->stripe_pages[i]); @@ -523,32 +507,21 @@ static void run_xor(void **pages, int src_cnt, ssize_t len) } /* - * returns true if the bio list inside this rbio - * covers an entire stripe (no rmw required). - * Must be called with the bio list lock held, or - * at a time when you know it is impossible to add - * new bios into the list + * Returns true if the bio list inside this rbio covers an entire stripe (no + * rmw required). */ -static int __rbio_is_full(struct btrfs_raid_bio *rbio) +static int rbio_is_full(struct btrfs_raid_bio *rbio) { + unsigned long flags; unsigned long size = rbio->bio_list_bytes; int ret = 1; + spin_lock_irqsave(&rbio->bio_list_lock, flags); if (size != rbio->nr_data * rbio->stripe_len) ret = 0; - BUG_ON(size > rbio->nr_data * rbio->stripe_len); - return ret; -} - -static int rbio_is_full(struct btrfs_raid_bio *rbio) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(&rbio->bio_list_lock, flags); - ret = __rbio_is_full(rbio); spin_unlock_irqrestore(&rbio->bio_list_lock, flags); + return ret; } @@ -595,14 +568,31 @@ static int rbio_can_merge(struct btrfs_raid_bio *last, * bio list here, anyone else that wants to * change this stripe needs to do their own rmw. */ - if (last->operation == BTRFS_RBIO_PARITY_SCRUB || - cur->operation == BTRFS_RBIO_PARITY_SCRUB) + if (last->operation == BTRFS_RBIO_PARITY_SCRUB) return 0; - if (last->operation == BTRFS_RBIO_REBUILD_MISSING || - cur->operation == BTRFS_RBIO_REBUILD_MISSING) + if (last->operation == BTRFS_RBIO_REBUILD_MISSING) return 0; + if (last->operation == BTRFS_RBIO_READ_REBUILD) { + int fa = last->faila; + int fb = last->failb; + int cur_fa = cur->faila; + int cur_fb = cur->failb; + + if (last->faila >= last->failb) { + fa = last->failb; + fb = last->faila; + } + + if (cur->faila >= cur->failb) { + cur_fa = cur->failb; + cur_fb = cur->faila; + } + + if (fa != cur_fa || fb != cur_fb) + return 0; + } return 1; } @@ -670,7 +660,6 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio) struct btrfs_raid_bio *cur; struct btrfs_raid_bio *pending; unsigned long flags; - DEFINE_WAIT(wait); struct btrfs_raid_bio *freeit = NULL; struct btrfs_raid_bio *cache_drop = NULL; int ret = 0; @@ -803,28 +792,19 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio) spin_unlock_irqrestore(&h->lock, flags); if (next->operation == BTRFS_RBIO_READ_REBUILD) - async_read_rebuild(next); + start_async_work(next, read_rebuild_work); else if (next->operation == BTRFS_RBIO_REBUILD_MISSING) { steal_rbio(rbio, next); - async_read_rebuild(next); + start_async_work(next, read_rebuild_work); } else if (next->operation == BTRFS_RBIO_WRITE) { steal_rbio(rbio, next); - async_rmw_stripe(next); + start_async_work(next, rmw_work); } else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) { steal_rbio(rbio, next); - async_scrub_parity(next); + start_async_work(next, scrub_parity_work); } goto done_nolock; - /* - * The barrier for this waitqueue_active is not needed, - * we're protected by h->lock and can't miss a wakeup. - */ - } else if (waitqueue_active(&h->wait)) { - spin_unlock(&rbio->bio_list_lock); - spin_unlock_irqrestore(&h->lock, flags); - wake_up(&h->wait); - goto done_nolock; } } done: @@ -987,9 +967,14 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info, int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE); void *p; - rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2 + - DIV_ROUND_UP(stripe_npages, BITS_PER_LONG) * - sizeof(long), GFP_NOFS); + rbio = kzalloc(sizeof(*rbio) + + sizeof(*rbio->stripe_pages) * num_pages + + sizeof(*rbio->bio_pages) * num_pages + + sizeof(*rbio->finish_pointers) * real_stripes + + sizeof(*rbio->dbitmap) * BITS_TO_LONGS(stripe_npages) + + sizeof(*rbio->finish_pbitmap) * + BITS_TO_LONGS(stripe_npages), + GFP_NOFS); if (!rbio) return ERR_PTR(-ENOMEM); @@ -1011,13 +996,20 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info, atomic_set(&rbio->stripes_pending, 0); /* - * the stripe_pages and bio_pages array point to the extra + * the stripe_pages, bio_pages, etc arrays point to the extra * memory we allocated past the end of the rbio */ p = rbio + 1; - rbio->stripe_pages = p; - rbio->bio_pages = p + sizeof(struct page *) * num_pages; - rbio->dbitmap = p + sizeof(struct page *) * num_pages * 2; +#define CONSUME_ALLOC(ptr, count) do { \ + ptr = p; \ + p = (unsigned char *)p + sizeof(*(ptr)) * (count); \ + } while (0) + CONSUME_ALLOC(rbio->stripe_pages, num_pages); + CONSUME_ALLOC(rbio->bio_pages, num_pages); + CONSUME_ALLOC(rbio->finish_pointers, real_stripes); + CONSUME_ALLOC(rbio->dbitmap, BITS_TO_LONGS(stripe_npages)); + CONSUME_ALLOC(rbio->finish_pbitmap, BITS_TO_LONGS(stripe_npages)); +#undef CONSUME_ALLOC if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5) nr_data = real_stripes - 1; @@ -1186,7 +1178,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio) static noinline void finish_rmw(struct btrfs_raid_bio *rbio) { struct btrfs_bio *bbio = rbio->bbio; - void *pointers[rbio->real_stripes]; + void **pointers = rbio->finish_pointers; int nr_data = rbio->nr_data; int stripe; int pagenr; @@ -1263,7 +1255,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) pointers); } else { /* raid5 */ - memcpy(pointers[nr_data], pointers[0], PAGE_SIZE); + copy_page(pointers[nr_data], pointers[0]); run_xor(pointers + 1, nr_data - 1, PAGE_SIZE); } @@ -1331,7 +1323,7 @@ write_data: bio->bi_private = rbio; bio->bi_end_io = raid_write_end_io; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + bio->bi_opf = REQ_OP_WRITE; submit_bio(bio); } @@ -1339,6 +1331,9 @@ write_data: cleanup: rbio_orig_end_io(rbio, BLK_STS_IOERR); + + while ((bio = bio_list_pop(&bio_list))) + bio_put(bio); } /* @@ -1447,11 +1442,11 @@ static int fail_bio_stripe(struct btrfs_raid_bio *rbio, static void set_bio_pages_uptodate(struct bio *bio) { struct bio_vec *bvec; - int i; + struct bvec_iter_all iter_all; ASSERT(!bio_flagged(bio, BIO_CLONED)); - bio_for_each_segment_all(bvec, bio, i) + bio_for_each_segment_all(bvec, bio, iter_all) SetPageUptodate(bvec->bv_page); } @@ -1493,20 +1488,6 @@ cleanup: rbio_orig_end_io(rbio, BLK_STS_IOERR); } -static void async_rmw_stripe(struct btrfs_raid_bio *rbio) -{ - btrfs_init_work(&rbio->work, btrfs_rmw_helper, rmw_work, NULL, NULL); - btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work); -} - -static void async_read_rebuild(struct btrfs_raid_bio *rbio) -{ - btrfs_init_work(&rbio->work, btrfs_rmw_helper, - read_rebuild_work, NULL, NULL); - - btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work); -} - /* * the stripe must be locked by the caller. It will * unlock after all the writes are done @@ -1584,7 +1565,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) bio->bi_private = rbio; bio->bi_end_io = raid_rmw_end_io; - bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio->bi_opf = REQ_OP_READ; btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); @@ -1595,6 +1576,10 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) cleanup: rbio_orig_end_io(rbio, BLK_STS_IOERR); + + while ((bio = bio_list_pop(&bio_list))) + bio_put(bio); + return -EIO; finish: @@ -1633,7 +1618,7 @@ static int partial_stripe_write(struct btrfs_raid_bio *rbio) ret = lock_stripe_add(rbio); if (ret == 0) - async_rmw_stripe(rbio); + start_async_work(rbio, rmw_work); return 0; } @@ -1701,8 +1686,11 @@ static void run_plug(struct btrfs_plug_cb *plug) list_del_init(&cur->plug_list); if (rbio_is_full(cur)) { + int ret; + /* we have a full stripe, send it down */ - full_stripe_write(cur); + ret = full_stripe_write(cur); + BUG_ON(ret); continue; } if (last) { @@ -1922,9 +1910,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) BUG_ON(failb != -1); pstripe: /* Copy parity block into failed block to start with */ - memcpy(pointers[faila], - pointers[rbio->nr_data], - PAGE_SIZE); + copy_page(pointers[faila], pointers[rbio->nr_data]); /* rearrange the pointer array */ p = pointers[faila]; @@ -1974,15 +1960,34 @@ cleanup: kfree(pointers); cleanup_io: - if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { - if (err == BLK_STS_OK) + /* + * Similar to READ_REBUILD, REBUILD_MISSING at this point also has a + * valid rbio which is consistent with ondisk content, thus such a + * valid rbio can be cached to avoid further disk reads. + */ + if (rbio->operation == BTRFS_RBIO_READ_REBUILD || + rbio->operation == BTRFS_RBIO_REBUILD_MISSING) { + /* + * - In case of two failures, where rbio->failb != -1: + * + * Do not cache this rbio since the above read reconstruction + * (raid6_datap_recov() or raid6_2data_recov()) may have + * changed some content of stripes which are not identical to + * on-disk content any more, otherwise, a later write/recover + * may steal stripe_pages from this rbio and end up with + * corruptions or rebuild failures. + * + * - In case of single failure, where rbio->failb == -1: + * + * Cache this rbio iff the above read reconstruction is + * executed without problems. + */ + if (err == BLK_STS_OK && rbio->failb < 0) cache_rbio_pages(rbio); else clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); rbio_orig_end_io(rbio, err); - } else if (rbio->operation == BTRFS_RBIO_REBUILD_MISSING) { - rbio_orig_end_io(rbio, err); } else if (err == BLK_STS_OK) { rbio->faila = -1; rbio->failb = -1; @@ -2107,7 +2112,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) bio->bi_private = rbio; bio->bi_end_io = raid_recover_end_io; - bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio->bi_opf = REQ_OP_READ; btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); @@ -2120,6 +2125,10 @@ cleanup: if (rbio->operation == BTRFS_RBIO_READ_REBUILD || rbio->operation == BTRFS_RBIO_REBUILD_MISSING) rbio_orig_end_io(rbio, BLK_STS_IOERR); + + while ((bio = bio_list_pop(&bio_list))) + bio_put(bio); + return -EIO; } @@ -2254,12 +2263,18 @@ raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, ASSERT(!bio->bi_iter.bi_size); rbio->operation = BTRFS_RBIO_PARITY_SCRUB; - for (i = 0; i < rbio->real_stripes; i++) { + /* + * After mapping bbio with BTRFS_MAP_WRITE, parities have been sorted + * to the end position, so this search can start from the first parity + * stripe. + */ + for (i = rbio->nr_data; i < rbio->real_stripes; i++) { if (bbio->stripes[i].dev == scrub_dev) { rbio->scrubp = i; break; } } + ASSERT(i < rbio->real_stripes); /* Now we just support the sectorsize equals to page size */ ASSERT(fs_info->sectorsize == PAGE_SIZE); @@ -2320,8 +2335,8 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check) { struct btrfs_bio *bbio = rbio->bbio; - void *pointers[rbio->real_stripes]; - DECLARE_BITMAP(pbitmap, rbio->stripe_npages); + void **pointers = rbio->finish_pointers; + unsigned long *pbitmap = rbio->finish_pbitmap; int nr_data = rbio->nr_data; int stripe; int pagenr; @@ -2400,7 +2415,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, pointers); } else { /* raid5 */ - memcpy(pointers[nr_data], pointers[0], PAGE_SIZE); + copy_page(pointers[nr_data], pointers[0]); run_xor(pointers + 1, nr_data - 1, PAGE_SIZE); } @@ -2408,7 +2423,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, p = rbio_stripe_page(rbio, rbio->scrubp, pagenr); parity = kmap(p); if (memcmp(parity, pointers[rbio->scrubp], PAGE_SIZE)) - memcpy(parity, pointers[rbio->scrubp], PAGE_SIZE); + copy_page(parity, pointers[rbio->scrubp]); else /* Parity is right, needn't writeback */ bitmap_clear(rbio->dbitmap, pagenr, 1); @@ -2470,7 +2485,7 @@ submit_write: bio->bi_private = rbio; bio->bi_end_io = raid_write_end_io; - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + bio->bi_opf = REQ_OP_WRITE; submit_bio(bio); } @@ -2478,6 +2493,9 @@ submit_write: cleanup: rbio_orig_end_io(rbio, BLK_STS_IOERR); + + while ((bio = bio_list_pop(&bio_list))) + bio_put(bio); } static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe) @@ -2587,12 +2605,12 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio) int stripe; struct bio *bio; + bio_list_init(&bio_list); + ret = alloc_rbio_essential_pages(rbio); if (ret) goto cleanup; - bio_list_init(&bio_list); - atomic_set(&rbio->error, 0); /* * build a list of bios to read all the missing parts of this @@ -2649,7 +2667,7 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio) bio->bi_private = rbio; bio->bi_end_io = raid56_parity_scrub_end_io; - bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio->bi_opf = REQ_OP_READ; btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); @@ -2660,6 +2678,10 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio) cleanup: rbio_orig_end_io(rbio, BLK_STS_IOERR); + + while ((bio = bio_list_pop(&bio_list))) + bio_put(bio); + return; finish: @@ -2674,18 +2696,10 @@ static void scrub_parity_work(struct btrfs_work *work) raid56_parity_scrub_stripe(rbio); } -static void async_scrub_parity(struct btrfs_raid_bio *rbio) -{ - btrfs_init_work(&rbio->work, btrfs_rmw_helper, - scrub_parity_work, NULL, NULL); - - btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work); -} - void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio) { if (!lock_stripe_add(rbio)) - async_scrub_parity(rbio); + start_async_work(rbio, scrub_parity_work); } /* The following code is used for dev replace of a missing RAID 5/6 device. */ @@ -2724,24 +2738,8 @@ raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, return rbio; } -static void missing_raid56_work(struct btrfs_work *work) -{ - struct btrfs_raid_bio *rbio; - - rbio = container_of(work, struct btrfs_raid_bio, work); - __raid56_parity_recover(rbio); -} - -static void async_missing_raid56(struct btrfs_raid_bio *rbio) -{ - btrfs_init_work(&rbio->work, btrfs_rmw_helper, - missing_raid56_work, NULL, NULL); - - btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work); -} - void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio) { if (!lock_stripe_add(rbio)) - async_missing_raid56(rbio); + start_async_work(rbio, read_rebuild_work); } |
