aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/raid56.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/raid56.c')
-rw-r--r--fs/btrfs/raid56.c309
1 files changed, 153 insertions, 156 deletions
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index a7f79254ecca..df41d7049936 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1,48 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2012 Fusion-io All rights reserved.
* Copyright (C) 2012 Intel Corp. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
*/
+
#include <linux/sched.h>
-#include <linux/wait.h>
#include <linux/bio.h>
#include <linux/slab.h>
-#include <linux/buffer_head.h>
#include <linux/blkdev.h>
-#include <linux/random.h>
-#include <linux/iocontext.h>
-#include <linux/capability.h>
-#include <linux/ratelimit.h>
-#include <linux/kthread.h>
#include <linux/raid/pq.h>
#include <linux/hash.h>
#include <linux/list_sort.h>
#include <linux/raid/xor.h>
#include <linux/mm.h>
-#include <asm/div64.h>
#include "ctree.h"
-#include "extent_map.h"
#include "disk-io.h"
-#include "transaction.h"
-#include "print-tree.h"
#include "volumes.h"
#include "raid56.h"
#include "async-thread.h"
-#include "check-integrity.h"
-#include "rcu-string.h"
/* set when additional merges to this rbio are not allowed */
#define RBIO_RMW_LOCKED_BIT 1
@@ -175,14 +150,18 @@ struct btrfs_raid_bio {
* bitmap to record which horizontal stripe has data
*/
unsigned long *dbitmap;
+
+ /* allocated with real_stripes-many pointers for finish_*() calls */
+ void **finish_pointers;
+
+ /* allocated with stripe_npages-many bits for finish_*() calls */
+ unsigned long *finish_pbitmap;
};
static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
static noinline void finish_rmw(struct btrfs_raid_bio *rbio);
static void rmw_work(struct btrfs_work *work);
static void read_rebuild_work(struct btrfs_work *work);
-static void async_rmw_stripe(struct btrfs_raid_bio *rbio);
-static void async_read_rebuild(struct btrfs_raid_bio *rbio);
static int fail_bio_stripe(struct btrfs_raid_bio *rbio, struct bio *bio);
static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed);
static void __free_raid_bio(struct btrfs_raid_bio *rbio);
@@ -191,7 +170,13 @@ static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
int need_check);
-static void async_scrub_parity(struct btrfs_raid_bio *rbio);
+static void scrub_parity_work(struct btrfs_work *work);
+
+static void start_async_work(struct btrfs_raid_bio *rbio, btrfs_func_t work_func)
+{
+ btrfs_init_work(&rbio->work, btrfs_rmw_helper, work_func, NULL, NULL);
+ btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
+}
/*
* the stripe hash table is used for locking, and to collect
@@ -231,7 +216,6 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
cur = h + i;
INIT_LIST_HEAD(&cur->hash_list);
spin_lock_init(&cur->lock);
- init_waitqueue_head(&cur->wait);
}
x = cmpxchg(&info->stripe_hash_table, NULL, table);
@@ -267,7 +251,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
s = kmap(rbio->bio_pages[i]);
d = kmap(rbio->stripe_pages[i]);
- memcpy(d, s, PAGE_SIZE);
+ copy_page(d, s);
kunmap(rbio->bio_pages[i]);
kunmap(rbio->stripe_pages[i]);
@@ -523,32 +507,21 @@ static void run_xor(void **pages, int src_cnt, ssize_t len)
}
/*
- * returns true if the bio list inside this rbio
- * covers an entire stripe (no rmw required).
- * Must be called with the bio list lock held, or
- * at a time when you know it is impossible to add
- * new bios into the list
+ * Returns true if the bio list inside this rbio covers an entire stripe (no
+ * rmw required).
*/
-static int __rbio_is_full(struct btrfs_raid_bio *rbio)
+static int rbio_is_full(struct btrfs_raid_bio *rbio)
{
+ unsigned long flags;
unsigned long size = rbio->bio_list_bytes;
int ret = 1;
+ spin_lock_irqsave(&rbio->bio_list_lock, flags);
if (size != rbio->nr_data * rbio->stripe_len)
ret = 0;
-
BUG_ON(size > rbio->nr_data * rbio->stripe_len);
- return ret;
-}
-
-static int rbio_is_full(struct btrfs_raid_bio *rbio)
-{
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&rbio->bio_list_lock, flags);
- ret = __rbio_is_full(rbio);
spin_unlock_irqrestore(&rbio->bio_list_lock, flags);
+
return ret;
}
@@ -595,14 +568,31 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
* bio list here, anyone else that wants to
* change this stripe needs to do their own rmw.
*/
- if (last->operation == BTRFS_RBIO_PARITY_SCRUB ||
- cur->operation == BTRFS_RBIO_PARITY_SCRUB)
+ if (last->operation == BTRFS_RBIO_PARITY_SCRUB)
return 0;
- if (last->operation == BTRFS_RBIO_REBUILD_MISSING ||
- cur->operation == BTRFS_RBIO_REBUILD_MISSING)
+ if (last->operation == BTRFS_RBIO_REBUILD_MISSING)
return 0;
+ if (last->operation == BTRFS_RBIO_READ_REBUILD) {
+ int fa = last->faila;
+ int fb = last->failb;
+ int cur_fa = cur->faila;
+ int cur_fb = cur->failb;
+
+ if (last->faila >= last->failb) {
+ fa = last->failb;
+ fb = last->faila;
+ }
+
+ if (cur->faila >= cur->failb) {
+ cur_fa = cur->failb;
+ cur_fb = cur->faila;
+ }
+
+ if (fa != cur_fa || fb != cur_fb)
+ return 0;
+ }
return 1;
}
@@ -670,7 +660,6 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
struct btrfs_raid_bio *cur;
struct btrfs_raid_bio *pending;
unsigned long flags;
- DEFINE_WAIT(wait);
struct btrfs_raid_bio *freeit = NULL;
struct btrfs_raid_bio *cache_drop = NULL;
int ret = 0;
@@ -803,28 +792,19 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
spin_unlock_irqrestore(&h->lock, flags);
if (next->operation == BTRFS_RBIO_READ_REBUILD)
- async_read_rebuild(next);
+ start_async_work(next, read_rebuild_work);
else if (next->operation == BTRFS_RBIO_REBUILD_MISSING) {
steal_rbio(rbio, next);
- async_read_rebuild(next);
+ start_async_work(next, read_rebuild_work);
} else if (next->operation == BTRFS_RBIO_WRITE) {
steal_rbio(rbio, next);
- async_rmw_stripe(next);
+ start_async_work(next, rmw_work);
} else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) {
steal_rbio(rbio, next);
- async_scrub_parity(next);
+ start_async_work(next, scrub_parity_work);
}
goto done_nolock;
- /*
- * The barrier for this waitqueue_active is not needed,
- * we're protected by h->lock and can't miss a wakeup.
- */
- } else if (waitqueue_active(&h->wait)) {
- spin_unlock(&rbio->bio_list_lock);
- spin_unlock_irqrestore(&h->lock, flags);
- wake_up(&h->wait);
- goto done_nolock;
}
}
done:
@@ -858,10 +838,17 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio)
kfree(rbio);
}
-static void free_raid_bio(struct btrfs_raid_bio *rbio)
+static void rbio_endio_bio_list(struct bio *cur, blk_status_t err)
{
- unlock_stripe(rbio);
- __free_raid_bio(rbio);
+ struct bio *next;
+
+ while (cur) {
+ next = cur->bi_next;
+ cur->bi_next = NULL;
+ cur->bi_status = err;
+ bio_endio(cur);
+ cur = next;
+ }
}
/*
@@ -871,20 +858,26 @@ static void free_raid_bio(struct btrfs_raid_bio *rbio)
static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
{
struct bio *cur = bio_list_get(&rbio->bio_list);
- struct bio *next;
+ struct bio *extra;
if (rbio->generic_bio_cnt)
btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt);
- free_raid_bio(rbio);
+ /*
+ * At this moment, rbio->bio_list is empty, however since rbio does not
+ * always have RBIO_RMW_LOCKED_BIT set and rbio is still linked on the
+ * hash list, rbio may be merged with others so that rbio->bio_list
+ * becomes non-empty.
+ * Once unlock_stripe() is done, rbio->bio_list will not be updated any
+ * more and we can call bio_endio() on all queued bios.
+ */
+ unlock_stripe(rbio);
+ extra = bio_list_get(&rbio->bio_list);
+ __free_raid_bio(rbio);
- while (cur) {
- next = cur->bi_next;
- cur->bi_next = NULL;
- cur->bi_status = err;
- bio_endio(cur);
- cur = next;
- }
+ rbio_endio_bio_list(cur, err);
+ if (extra)
+ rbio_endio_bio_list(extra, err);
}
/*
@@ -974,9 +967,14 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE);
void *p;
- rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2 +
- DIV_ROUND_UP(stripe_npages, BITS_PER_LONG) *
- sizeof(long), GFP_NOFS);
+ rbio = kzalloc(sizeof(*rbio) +
+ sizeof(*rbio->stripe_pages) * num_pages +
+ sizeof(*rbio->bio_pages) * num_pages +
+ sizeof(*rbio->finish_pointers) * real_stripes +
+ sizeof(*rbio->dbitmap) * BITS_TO_LONGS(stripe_npages) +
+ sizeof(*rbio->finish_pbitmap) *
+ BITS_TO_LONGS(stripe_npages),
+ GFP_NOFS);
if (!rbio)
return ERR_PTR(-ENOMEM);
@@ -998,13 +996,20 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
atomic_set(&rbio->stripes_pending, 0);
/*
- * the stripe_pages and bio_pages array point to the extra
+ * the stripe_pages, bio_pages, etc arrays point to the extra
* memory we allocated past the end of the rbio
*/
p = rbio + 1;
- rbio->stripe_pages = p;
- rbio->bio_pages = p + sizeof(struct page *) * num_pages;
- rbio->dbitmap = p + sizeof(struct page *) * num_pages * 2;
+#define CONSUME_ALLOC(ptr, count) do { \
+ ptr = p; \
+ p = (unsigned char *)p + sizeof(*(ptr)) * (count); \
+ } while (0)
+ CONSUME_ALLOC(rbio->stripe_pages, num_pages);
+ CONSUME_ALLOC(rbio->bio_pages, num_pages);
+ CONSUME_ALLOC(rbio->finish_pointers, real_stripes);
+ CONSUME_ALLOC(rbio->dbitmap, BITS_TO_LONGS(stripe_npages));
+ CONSUME_ALLOC(rbio->finish_pbitmap, BITS_TO_LONGS(stripe_npages));
+#undef CONSUME_ALLOC
if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
nr_data = real_stripes - 1;
@@ -1173,7 +1178,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
{
struct btrfs_bio *bbio = rbio->bbio;
- void *pointers[rbio->real_stripes];
+ void **pointers = rbio->finish_pointers;
int nr_data = rbio->nr_data;
int stripe;
int pagenr;
@@ -1250,7 +1255,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
pointers);
} else {
/* raid5 */
- memcpy(pointers[nr_data], pointers[0], PAGE_SIZE);
+ copy_page(pointers[nr_data], pointers[0]);
run_xor(pointers + 1, nr_data - 1, PAGE_SIZE);
}
@@ -1318,7 +1323,7 @@ write_data:
bio->bi_private = rbio;
bio->bi_end_io = raid_write_end_io;
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ bio->bi_opf = REQ_OP_WRITE;
submit_bio(bio);
}
@@ -1351,6 +1356,7 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
stripe_start = stripe->physical;
if (physical >= stripe_start &&
physical < stripe_start + rbio->stripe_len &&
+ stripe->dev->bdev &&
bio->bi_disk == stripe->dev->bdev->bd_disk &&
bio->bi_partno == stripe->dev->bdev->bd_partno) {
return i;
@@ -1435,14 +1441,13 @@ static int fail_bio_stripe(struct btrfs_raid_bio *rbio,
*/
static void set_bio_pages_uptodate(struct bio *bio)
{
- struct bio_vec bvec;
- struct bvec_iter iter;
+ struct bio_vec *bvec;
+ int i;
- if (bio_flagged(bio, BIO_CLONED))
- bio->bi_iter = btrfs_io_bio(bio)->iter;
+ ASSERT(!bio_flagged(bio, BIO_CLONED));
- bio_for_each_segment(bvec, bio, iter)
- SetPageUptodate(bvec.bv_page);
+ bio_for_each_segment_all(bvec, bio, i)
+ SetPageUptodate(bvec->bv_page);
}
/*
@@ -1483,20 +1488,6 @@ cleanup:
rbio_orig_end_io(rbio, BLK_STS_IOERR);
}
-static void async_rmw_stripe(struct btrfs_raid_bio *rbio)
-{
- btrfs_init_work(&rbio->work, btrfs_rmw_helper, rmw_work, NULL, NULL);
- btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
-}
-
-static void async_read_rebuild(struct btrfs_raid_bio *rbio)
-{
- btrfs_init_work(&rbio->work, btrfs_rmw_helper,
- read_rebuild_work, NULL, NULL);
-
- btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
-}
-
/*
* the stripe must be locked by the caller. It will
* unlock after all the writes are done
@@ -1574,7 +1565,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
bio->bi_private = rbio;
bio->bi_end_io = raid_rmw_end_io;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bio->bi_opf = REQ_OP_READ;
btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
@@ -1627,7 +1618,7 @@ static int partial_stripe_write(struct btrfs_raid_bio *rbio)
ret = lock_stripe_add(rbio);
if (ret == 0)
- async_rmw_stripe(rbio);
+ start_async_work(rbio, rmw_work);
return 0;
}
@@ -1695,8 +1686,11 @@ static void run_plug(struct btrfs_plug_cb *plug)
list_del_init(&cur->plug_list);
if (rbio_is_full(cur)) {
+ int ret;
+
/* we have a full stripe, send it down */
- full_stripe_write(cur);
+ ret = full_stripe_write(cur);
+ BUG_ON(ret);
continue;
}
if (last) {
@@ -1916,9 +1910,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
BUG_ON(failb != -1);
pstripe:
/* Copy parity block into failed block to start with */
- memcpy(pointers[faila],
- pointers[rbio->nr_data],
- PAGE_SIZE);
+ copy_page(pointers[faila], pointers[rbio->nr_data]);
/* rearrange the pointer array */
p = pointers[faila];
@@ -1968,15 +1960,34 @@ cleanup:
kfree(pointers);
cleanup_io:
- if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
- if (err == BLK_STS_OK)
+ /*
+ * Similar to READ_REBUILD, REBUILD_MISSING at this point also has a
+ * valid rbio which is consistent with ondisk content, thus such a
+ * valid rbio can be cached to avoid further disk reads.
+ */
+ if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
+ rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
+ /*
+ * - In case of two failures, where rbio->failb != -1:
+ *
+ * Do not cache this rbio since the above read reconstruction
+ * (raid6_datap_recov() or raid6_2data_recov()) may have
+ * changed some content of stripes which are not identical to
+ * on-disk content any more, otherwise, a later write/recover
+ * may steal stripe_pages from this rbio and end up with
+ * corruptions or rebuild failures.
+ *
+ * - In case of single failure, where rbio->failb == -1:
+ *
+ * Cache this rbio iff the above read reconstruction is
+ * excuted without problems.
+ */
+ if (err == BLK_STS_OK && rbio->failb < 0)
cache_rbio_pages(rbio);
else
clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
rbio_orig_end_io(rbio, err);
- } else if (rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
- rbio_orig_end_io(rbio, err);
} else if (err == BLK_STS_OK) {
rbio->faila = -1;
rbio->failb = -1;
@@ -2101,7 +2112,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
bio->bi_private = rbio;
bio->bi_end_io = raid_recover_end_io;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bio->bi_opf = REQ_OP_READ;
btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
@@ -2170,11 +2181,21 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
}
/*
- * reconstruct from the q stripe if they are
- * asking for mirror 3
+ * Loop retry:
+ * for 'mirror == 2', reconstruct from all other stripes.
+ * for 'mirror_num > 2', select a stripe to fail on every retry.
*/
- if (mirror_num == 3)
- rbio->failb = rbio->real_stripes - 2;
+ if (mirror_num > 2) {
+ /*
+ * 'mirror == 3' is to fail the p stripe and
+ * reconstruct from the q stripe. 'mirror > 3' is to
+ * fail a data stripe and reconstruct from p+q stripe.
+ */
+ rbio->failb = rbio->real_stripes - (mirror_num - 1);
+ ASSERT(rbio->failb > 0);
+ if (rbio->failb <= rbio->faila)
+ rbio->failb--;
+ }
ret = lock_stripe_add(rbio);
@@ -2314,8 +2335,8 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
int need_check)
{
struct btrfs_bio *bbio = rbio->bbio;
- void *pointers[rbio->real_stripes];
- DECLARE_BITMAP(pbitmap, rbio->stripe_npages);
+ void **pointers = rbio->finish_pointers;
+ unsigned long *pbitmap = rbio->finish_pbitmap;
int nr_data = rbio->nr_data;
int stripe;
int pagenr;
@@ -2394,7 +2415,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
pointers);
} else {
/* raid5 */
- memcpy(pointers[nr_data], pointers[0], PAGE_SIZE);
+ copy_page(pointers[nr_data], pointers[0]);
run_xor(pointers + 1, nr_data - 1, PAGE_SIZE);
}
@@ -2402,7 +2423,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
p = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
parity = kmap(p);
if (memcmp(parity, pointers[rbio->scrubp], PAGE_SIZE))
- memcpy(parity, pointers[rbio->scrubp], PAGE_SIZE);
+ copy_page(parity, pointers[rbio->scrubp]);
else
/* Parity is right, needn't writeback */
bitmap_clear(rbio->dbitmap, pagenr, 1);
@@ -2463,7 +2484,7 @@ submit_write:
bio->bi_private = rbio;
bio->bi_end_io = raid_write_end_io;
- bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+ bio->bi_opf = REQ_OP_WRITE;
submit_bio(bio);
}
@@ -2645,7 +2666,7 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
bio->bi_private = rbio;
bio->bi_end_io = raid56_parity_scrub_end_io;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bio->bi_opf = REQ_OP_READ;
btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
@@ -2674,18 +2695,10 @@ static void scrub_parity_work(struct btrfs_work *work)
raid56_parity_scrub_stripe(rbio);
}
-static void async_scrub_parity(struct btrfs_raid_bio *rbio)
-{
- btrfs_init_work(&rbio->work, btrfs_rmw_helper,
- scrub_parity_work, NULL, NULL);
-
- btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
-}
-
void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
{
if (!lock_stripe_add(rbio))
- async_scrub_parity(rbio);
+ start_async_work(rbio, scrub_parity_work);
}
/* The following code is used for dev replace of a missing RAID 5/6 device. */
@@ -2724,24 +2737,8 @@ raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
return rbio;
}
-static void missing_raid56_work(struct btrfs_work *work)
-{
- struct btrfs_raid_bio *rbio;
-
- rbio = container_of(work, struct btrfs_raid_bio, work);
- __raid56_parity_recover(rbio);
-}
-
-static void async_missing_raid56(struct btrfs_raid_bio *rbio)
-{
- btrfs_init_work(&rbio->work, btrfs_rmw_helper,
- missing_raid56_work, NULL, NULL);
-
- btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
-}
-
void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio)
{
if (!lock_stripe_add(rbio))
- async_missing_raid56(rbio);
+ start_async_work(rbio, read_rebuild_work);
}