diff options
Diffstat (limited to 'fs/btrfs/disk-io.c')
| -rw-r--r-- | fs/btrfs/disk-io.c | 422 |
1 files changed, 244 insertions, 178 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 41fb43183406..60ce1190307b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -25,7 +25,6 @@ #include <linux/buffer_head.h> #include <linux/workqueue.h> #include <linux/kthread.h> -#include <linux/freezer.h> #include <linux/slab.h> #include <linux/migrate.h> #include <linux/ratelimit.h> @@ -42,6 +41,7 @@ #include "locking.h" #include "tree-log.h" #include "free-space-cache.h" +#include "free-space-tree.h" #include "inode-map.h" #include "check-integrity.h" #include "rcu-string.h" @@ -49,11 +49,18 @@ #include "raid56.h" #include "sysfs.h" #include "qgroup.h" +#include "compression.h" #ifdef CONFIG_X86 #include <asm/cpufeature.h> #endif +#define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\ + BTRFS_HEADER_FLAG_RELOC |\ + BTRFS_SUPER_FLAG_ERROR |\ + BTRFS_SUPER_FLAG_SEEDING |\ + BTRFS_SUPER_FLAG_METADUMP) + static const struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); static void free_fs_root(struct btrfs_root *root); @@ -103,8 +110,7 @@ int __init btrfs_end_io_wq_init(void) void btrfs_end_io_wq_exit(void) { - if (btrfs_end_io_wq_cache) - kmem_cache_destroy(btrfs_end_io_wq_cache); + kmem_cache_destroy(btrfs_end_io_wq_cache); } /* @@ -175,6 +181,7 @@ static struct btrfs_lockdep_keyset { { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" }, { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" }, { .id = BTRFS_UUID_TREE_OBJECTID, .name_stem = "uuid" }, + { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, .name_stem = "free-space" }, { .id = 0, .name_stem = "tree" }, }; @@ -295,7 +302,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info, err = map_private_extent_buffer(buf, offset, 32, &kaddr, &map_start, &map_len); if (err) - return 1; + return err; cur_len = min(len, map_len - (offset - map_start)); crc = btrfs_csum_data(kaddr + offset - map_start, crc, cur_len); @@ -305,7 +312,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info, if (csum_size > sizeof(inline_result)) { result = kzalloc(csum_size, GFP_NOFS); if (!result) - return 1; + return -ENOMEM; } else { result = (char *)&inline_result; } @@ -326,7 +333,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info, val, found, btrfs_header_level(buf)); if (result != (char *)&inline_result) kfree(result); - return 1; + return -EUCLEAN; } } else { write_extent_buffer(buf, result, 0, csum_size); @@ -362,7 +369,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, } lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1, - 0, &cached_state); + &cached_state); if (extent_buffer_uptodate(eb) && btrfs_header_generation(eb) == parent_transid) { ret = 0; @@ -377,7 +384,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, /* * Things reading via commit roots that don't have normal protection, * like send, can have a really old block in cache that may point at a - * block that has been free'd and re-allocated. So don't clear uptodate + * block that has been freed and re-allocated. So don't clear uptodate * if we find an eb that is under IO (dirty/writeback) because we could * end up reading in the stale data and then writing it back out and * making everybody very sad. @@ -411,7 +418,7 @@ static int btrfs_check_super_csum(char *raw_disk_sb) /* * The super_block structure does not span the whole * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space - * is filled with zeros and is included in the checkum. + * is filled with zeros and is included in the checksum. */ crc = btrfs_csum_data(raw_disk_sb + BTRFS_CSUM_SIZE, crc, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); @@ -505,11 +512,21 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page) eb = (struct extent_buffer *)page->private; if (page != eb->pages[0]) return 0; + found_start = btrfs_header_bytenr(eb); - if (WARN_ON(found_start != start || !PageUptodate(page))) - return 0; - csum_tree_block(fs_info, eb, 0); - return 0; + /* + * Please do not consolidate these warnings into a single if. + * It is useful to know what went wrong. + */ + if (WARN_ON(found_start != start)) + return -EUCLEAN; + if (WARN_ON(!PageUptodate(page))) + return -EUCLEAN; + + ASSERT(memcmp_extent_buffer(eb, fs_info->fsid, + btrfs_header_fsid(), BTRFS_FSID_SIZE) == 0); + + return csum_tree_block(fs_info, eb, 0); } static int check_tree_block_fsid(struct btrfs_fs_info *fs_info, @@ -583,7 +600,7 @@ static noinline int check_leaf(struct btrfs_root *root, /* * Check to make sure that we don't point outside of the leaf, - * just incase all the items are consistent to eachother, but + * just in case all the items are consistent to each other, but * all point outside of the leaf. */ if (btrfs_item_end_nr(leaf, slot) > @@ -604,6 +621,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, int found_level; struct extent_buffer *eb; struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; + struct btrfs_fs_info *fs_info = root->fs_info; int ret = 0; int reads_done; @@ -629,21 +647,21 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, found_start = btrfs_header_bytenr(eb); if (found_start != eb->start) { - btrfs_err_rl(eb->fs_info, "bad tree block start %llu %llu", - found_start, eb->start); + btrfs_err_rl(fs_info, "bad tree block start %llu %llu", + found_start, eb->start); ret = -EIO; goto err; } - if (check_tree_block_fsid(root->fs_info, eb)) { - btrfs_err_rl(eb->fs_info, "bad fsid on block %llu", - eb->start); + if (check_tree_block_fsid(fs_info, eb)) { + btrfs_err_rl(fs_info, "bad fsid on block %llu", + eb->start); ret = -EIO; goto err; } found_level = btrfs_header_level(eb); if (found_level >= BTRFS_MAX_LEVEL) { - btrfs_err(root->fs_info, "bad tree block level %d", - (int)btrfs_header_level(eb)); + btrfs_err(fs_info, "bad tree block level %d", + (int)btrfs_header_level(eb)); ret = -EIO; goto err; } @@ -651,11 +669,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), eb, found_level); - ret = csum_tree_block(root->fs_info, eb, 1); - if (ret) { - ret = -EIO; + ret = csum_tree_block(fs_info, eb, 1); + if (ret) goto err; - } /* * If this is a leaf block and it is corrupt, set the corrupt bit so @@ -672,7 +688,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, err: if (reads_done && test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) - btree_readahead_hook(root, eb, eb->start, ret); + btree_readahead_hook(fs_info, eb, eb->start, ret); if (ret) { /* @@ -691,14 +707,13 @@ out: static int btree_io_failed_hook(struct page *page, int failed_mirror) { struct extent_buffer *eb; - struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; eb = (struct extent_buffer *)page->private; set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); eb->read_mirror = failed_mirror; atomic_dec(&eb->io_pages); if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) - btree_readahead_hook(root, eb, eb->start, -EIO); + btree_readahead_hook(eb->fs_info, eb, eb->start, -EIO); return -EIO; /* we fixed nothing */ } @@ -808,7 +823,7 @@ static void run_one_async_done(struct btrfs_work *work) waitqueue_active(&fs_info->async_submit_wait)) wake_up(&fs_info->async_submit_wait); - /* If an error occured we just want to clean up the bio and move on */ + /* If an error occurred we just want to clean up the bio and move on */ if (async->error) { async->bio->bi_error = async->error; bio_endio(async->bio); @@ -923,7 +938,7 @@ static int check_async_write(struct inode *inode, unsigned long bio_flags) if (bio_flags & EXTENT_BIO_TREE_LOG) return 0; #ifdef CONFIG_X86 - if (cpu_has_xmm4_2) + if (static_cpu_has(X86_FEATURE_XMM4_2)) return 0; #endif return 1; @@ -1047,7 +1062,7 @@ static void btree_invalidatepage(struct page *page, unsigned int offset, (unsigned long long)page_offset(page)); ClearPagePrivate(page); set_page_private(page, 0); - page_cache_release(page); + put_page(page); } } @@ -1083,7 +1098,7 @@ void readahead_tree_block(struct btrfs_root *root, u64 bytenr) struct inode *btree_inode = root->fs_info->btree_inode; buf = btrfs_find_create_tree_block(root, bytenr); - if (!buf) + if (IS_ERR(buf)) return; read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, WAIT_NONE, btree_get_extent, 0); @@ -1099,7 +1114,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, int ret; buf = btrfs_find_create_tree_block(root, bytenr); - if (!buf) + if (IS_ERR(buf)) return 0; set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags); @@ -1132,7 +1147,8 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, u64 bytenr) { if (btrfs_test_is_dummy_root(root)) - return alloc_test_extent_buffer(root->fs_info, bytenr); + return alloc_test_extent_buffer(root->fs_info, bytenr, + root->nodesize); return alloc_extent_buffer(root->fs_info, bytenr); } @@ -1156,8 +1172,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, int ret; buf = btrfs_find_create_tree_block(root, bytenr); - if (!buf) - return ERR_PTR(-ENOMEM); + if (IS_ERR(buf)) + return buf; ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); if (ret) { @@ -1288,9 +1304,10 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize, spin_lock_init(&root->root_item_lock); } -static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info) +static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info, + gfp_t flags) { - struct btrfs_root *root = kzalloc(sizeof(*root), GFP_NOFS); + struct btrfs_root *root = kzalloc(sizeof(*root), flags); if (root) root->fs_info = fs_info; return root; @@ -1298,14 +1315,16 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info) #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS /* Should only be used by the testing infrastructure */ -struct btrfs_root *btrfs_alloc_dummy_root(void) +struct btrfs_root *btrfs_alloc_dummy_root(u32 sectorsize, u32 nodesize) { struct btrfs_root *root; - root = btrfs_alloc_root(NULL); + root = btrfs_alloc_root(NULL, GFP_KERNEL); if (!root) return ERR_PTR(-ENOMEM); - __setup_root(4096, 4096, 4096, root, NULL, 1); + /* We don't use the stripesize in selftest, set it as sectorsize */ + __setup_root(nodesize, sectorsize, sectorsize, root, NULL, + BTRFS_ROOT_TREE_OBJECTID); set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state); root->alloc_bytenr = 0; @@ -1324,7 +1343,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, int ret = 0; uuid_le uuid; - root = btrfs_alloc_root(fs_info); + root = btrfs_alloc_root(fs_info, GFP_KERNEL); if (!root) return ERR_PTR(-ENOMEM); @@ -1400,7 +1419,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, struct btrfs_root *tree_root = fs_info->tree_root; struct extent_buffer *leaf; - root = btrfs_alloc_root(fs_info); + root = btrfs_alloc_root(fs_info, GFP_NOFS); if (!root) return ERR_PTR(-ENOMEM); @@ -1498,7 +1517,7 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, if (!path) return ERR_PTR(-ENOMEM); - root = btrfs_alloc_root(fs_info); + root = btrfs_alloc_root(fs_info, GFP_NOFS); if (!root) { ret = -ENOMEM; goto alloc_fail; @@ -1624,7 +1643,7 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, { int ret; - ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); + ret = radix_tree_preload(GFP_NOFS); if (ret) return ret; @@ -1665,6 +1684,9 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, if (location->objectid == BTRFS_UUID_TREE_OBJECTID) return fs_info->uuid_root ? fs_info->uuid_root : ERR_PTR(-ENOENT); + if (location->objectid == BTRFS_FREE_SPACE_TREE_OBJECTID) + return fs_info->free_space_root ? fs_info->free_space_root : + ERR_PTR(-ENOENT); again: root = btrfs_lookup_fs_root(fs_info, location->objectid); if (root) { @@ -1745,7 +1767,7 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) if (err) return err; - bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE; + bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; bdi->congested_fn = btrfs_congested_fn; bdi->congested_data = info; bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK; @@ -1784,6 +1806,13 @@ static int cleaner_kthread(void *arg) if (btrfs_need_cleaner_sleep(root)) goto sleep; + /* + * Do not do anything if we might cause open_ctree() to block + * before we have finished mounting the filesystem. + */ + if (!root->fs_info->open) + goto sleep; + if (!mutex_trylock(&root->fs_info->cleaner_mutex)) goto sleep; @@ -1819,7 +1848,7 @@ static int cleaner_kthread(void *arg) */ btrfs_delete_unused_bgs(root->fs_info); sleep: - if (!try_to_freeze() && !again) { + if (!again) { set_current_state(TASK_INTERRUPTIBLE); if (!kthread_should_stop()) schedule(); @@ -1909,14 +1938,12 @@ sleep: if (unlikely(test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))) btrfs_cleanup_transaction(root); - if (!try_to_freeze()) { - set_current_state(TASK_INTERRUPTIBLE); - if (!kthread_should_stop() && - (!btrfs_transaction_blocked(root->fs_info) || - cannot_commit)) - schedule_timeout(delay); - __set_current_state(TASK_RUNNING); - } + set_current_state(TASK_INTERRUPTIBLE); + if (!kthread_should_stop() && + (!btrfs_transaction_blocked(root->fs_info) || + cannot_commit)) + schedule_timeout(delay); + __set_current_state(TASK_RUNNING); } while (!kthread_should_stop()); return 0; } @@ -2165,6 +2192,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) free_root_extent_buffers(info->uuid_root); if (chunk_root) free_root_extent_buffers(info->chunk_root); + free_root_extent_buffers(info->free_space_root); } void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info) @@ -2260,9 +2288,11 @@ static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info) fs_info->dev_replace.lock_owner = 0; atomic_set(&fs_info->dev_replace.nesting_level, 0); mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount); - mutex_init(&fs_info->dev_replace.lock_management_lock); - mutex_init(&fs_info->dev_replace.lock); + rwlock_init(&fs_info->dev_replace.lock); + atomic_set(&fs_info->dev_replace.read_locks, 0); + atomic_set(&fs_info->dev_replace.blocking_readers, 0); init_waitqueue_head(&fs_info->replace_wait); + init_waitqueue_head(&fs_info->dev_replace.read_lock_wq); } static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info) @@ -2373,7 +2403,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, return -EIO; } - log_tree_root = btrfs_alloc_root(fs_info); + log_tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL); if (!log_tree_root) return -ENOMEM; @@ -2397,7 +2427,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, /* returns with log_tree_root freed on success */ ret = btrfs_recover_log_trees(log_tree_root); if (ret) { - btrfs_std_error(tree_root->fs_info, ret, + btrfs_handle_fs_error(tree_root->fs_info, ret, "Failed to recover log tree"); free_extent_buffer(log_tree_root->node); kfree(log_tree_root); @@ -2465,6 +2495,15 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info, fs_info->uuid_root = root; } + if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { + location.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID; + root = btrfs_read_tree_root(tree_root, &location); + if (IS_ERR(root)) + return PTR_ERR(root); + set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); + fs_info->free_space_root = root; + } + return 0; } @@ -2489,8 +2528,8 @@ int open_ctree(struct super_block *sb, int backup_index = 0; int max_active; - tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info); - chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); + tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL); + chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL); if (!tree_root || !chunk_root) { err = -ENOMEM; goto fail; @@ -2513,7 +2552,7 @@ int open_ctree(struct super_block *sb, err = ret; goto fail_bdi; } - fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE * + fs_info->dirty_metadata_batch = PAGE_SIZE * (1 + ilog2(nr_cpu_ids)); ret = percpu_counter_init(&fs_info->delalloc_bytes, 0, GFP_KERNEL); @@ -2582,6 +2621,7 @@ int open_ctree(struct super_block *sb, atomic_set(&fs_info->nr_async_bios, 0); atomic_set(&fs_info->defrag_running, 0); atomic_set(&fs_info->qgroup_op_seq, 0); + atomic_set(&fs_info->reada_works_cnt, 0); atomic64_set(&fs_info->tree_mod_seq, 0); fs_info->sb = sb; fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; @@ -2601,7 +2641,7 @@ int open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->ordered_roots); spin_lock_init(&fs_info->ordered_root_lock); fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root), - GFP_NOFS); + GFP_KERNEL); if (!fs_info->delayed_root) { err = -ENOMEM; goto fail_iput; @@ -2683,7 +2723,7 @@ int open_ctree(struct super_block *sb, * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k). */ if (btrfs_check_super_csum(bh->b_data)) { - printk(KERN_ERR "BTRFS: superblock checksum mismatch\n"); + btrfs_err(fs_info, "superblock checksum mismatch"); err = -EINVAL; brelse(bh); goto fail_alloc; @@ -2703,7 +2743,7 @@ int open_ctree(struct super_block *sb, ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); if (ret) { - printk(KERN_ERR "BTRFS: superblock contains fatal errors\n"); + btrfs_err(fs_info, "superblock contains fatal errors"); err = -EINVAL; goto fail_alloc; } @@ -2729,7 +2769,7 @@ int open_ctree(struct super_block *sb, */ fs_info->compress_type = BTRFS_COMPRESS_ZLIB; - ret = btrfs_parse_options(tree_root, options); + ret = btrfs_parse_options(tree_root, options, sb->s_flags); if (ret) { err = ret; goto fail_alloc; @@ -2738,29 +2778,9 @@ int open_ctree(struct super_block *sb, features = btrfs_super_incompat_flags(disk_super) & ~BTRFS_FEATURE_INCOMPAT_SUPP; if (features) { - printk(KERN_ERR "BTRFS: couldn't mount because of " - "unsupported optional features (%Lx).\n", - features); - err = -EINVAL; - goto fail_alloc; - } - - /* - * Leafsize and nodesize were always equal, this is only a sanity check. - */ - if (le32_to_cpu(disk_super->__unused_leafsize) != - btrfs_super_nodesize(disk_super)) { - printk(KERN_ERR "BTRFS: couldn't mount because metadata " - "blocksizes don't match. node %d leaf %d\n", - btrfs_super_nodesize(disk_super), - le32_to_cpu(disk_super->__unused_leafsize)); - err = -EINVAL; - goto fail_alloc; - } - if (btrfs_super_nodesize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) { - printk(KERN_ERR "BTRFS: couldn't mount because metadata " - "blocksize (%d) was too large\n", - btrfs_super_nodesize(disk_super)); + btrfs_err(fs_info, + "cannot mount because of unsupported optional features (%llx)", + features); err = -EINVAL; goto fail_alloc; } @@ -2771,21 +2791,22 @@ int open_ctree(struct super_block *sb, features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) - printk(KERN_INFO "BTRFS: has skinny extents\n"); + btrfs_info(fs_info, "has skinny extents"); /* * flag our filesystem as having big metadata blocks if * they are bigger than the page size */ - if (btrfs_super_nodesize(disk_super) > PAGE_CACHE_SIZE) { + if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) { if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA)) - printk(KERN_INFO "BTRFS: flagging fs with big metadata feature\n"); + btrfs_info(fs_info, + "flagging fs with big metadata feature"); features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; } nodesize = btrfs_super_nodesize(disk_super); sectorsize = btrfs_super_sectorsize(disk_super); - stripesize = btrfs_super_stripesize(disk_super); + stripesize = sectorsize; fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids)); fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids)); @@ -2795,9 +2816,9 @@ int open_ctree(struct super_block *sb, */ if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && (sectorsize != nodesize)) { - printk(KERN_ERR "BTRFS: unequal leaf/node/sector sizes " - "are not allowed for mixed block groups on %s\n", - sb->s_id); + btrfs_err(fs_info, +"unequal nodesize/sectorsize (%u != %u) are not allowed for mixed block groups", + nodesize, sectorsize); goto fail_alloc; } @@ -2810,8 +2831,8 @@ int open_ctree(struct super_block *sb, features = btrfs_super_compat_ro_flags(disk_super) & ~BTRFS_FEATURE_COMPAT_RO_SUPP; if (!(sb->s_flags & MS_RDONLY) && features) { - printk(KERN_ERR "BTRFS: couldn't mount RDWR because of " - "unsupported option features (%Lx).\n", + btrfs_err(fs_info, + "cannot mount read-write because of unsupported optional features (%llx)", features); err = -EINVAL; goto fail_alloc; @@ -2827,7 +2848,7 @@ int open_ctree(struct super_block *sb, fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, - 4 * 1024 * 1024 / PAGE_CACHE_SIZE); + SZ_4M / PAGE_SIZE); tree_root->nodesize = nodesize; tree_root->sectorsize = sectorsize; @@ -2836,23 +2857,11 @@ int open_ctree(struct super_block *sb, sb->s_blocksize = sectorsize; sb->s_blocksize_bits = blksize_bits(sectorsize); - if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) { - printk(KERN_ERR "BTRFS: valid FS not found on %s\n", sb->s_id); - goto fail_sb_buffer; - } - - if (sectorsize != PAGE_SIZE) { - printk(KERN_ERR "BTRFS: incompatible sector size (%lu) " - "found on %s\n", (unsigned long)sectorsize, sb->s_id); - goto fail_sb_buffer; - } - mutex_lock(&fs_info->chunk_mutex); ret = btrfs_read_sys_array(tree_root); mutex_unlock(&fs_info->chunk_mutex); if (ret) { - printk(KERN_ERR "BTRFS: failed to read the system " - "array on %s\n", sb->s_id); + btrfs_err(fs_info, "failed to read the system array: %d", ret); goto fail_sb_buffer; } @@ -2866,8 +2875,7 @@ int open_ctree(struct super_block *sb, generation); if (IS_ERR(chunk_root->node) || !extent_buffer_uptodate(chunk_root->node)) { - printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n", - sb->s_id); + btrfs_err(fs_info, "failed to read chunk root"); if (!IS_ERR(chunk_root->node)) free_extent_buffer(chunk_root->node); chunk_root->node = NULL; @@ -2881,8 +2889,7 @@ int open_ctree(struct super_block *sb, ret = btrfs_read_chunk_tree(chunk_root); if (ret) { - printk(KERN_ERR "BTRFS: failed to read chunk tree on %s\n", - sb->s_id); + btrfs_err(fs_info, "failed to read chunk tree: %d", ret); goto fail_tree_roots; } @@ -2893,8 +2900,7 @@ int open_ctree(struct super_block *sb, btrfs_close_extra_devices(fs_devices, 0); if (!fs_devices->latest_bdev) { - printk(KERN_ERR "BTRFS: failed to read devices on %s\n", - sb->s_id); + btrfs_err(fs_info, "failed to read devices"); goto fail_tree_roots; } @@ -2906,8 +2912,7 @@ retry_root_backup: generation); if (IS_ERR(tree_root->node) || !extent_buffer_uptodate(tree_root->node)) { - printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n", - sb->s_id); + btrfs_warn(fs_info, "failed to read tree root"); if (!IS_ERR(tree_root->node)) free_extent_buffer(tree_root->node); tree_root->node = NULL; @@ -2939,20 +2944,19 @@ retry_root_backup: ret = btrfs_recover_balance(fs_info); if (ret) { - printk(KERN_ERR "BTRFS: failed to recover balance\n"); + btrfs_err(fs_info, "failed to recover balance: %d", ret); goto fail_block_groups; } ret = btrfs_init_dev_stats(fs_info); if (ret) { - printk(KERN_ERR "BTRFS: failed to init dev_stats: %d\n", - ret); + btrfs_err(fs_info, "failed to init dev_stats: %d", ret); goto fail_block_groups; } ret = btrfs_init_dev_replace(fs_info); if (ret) { - pr_err("BTRFS: failed to init dev_replace: %d\n", ret); + btrfs_err(fs_info, "failed to init dev_replace: %d", ret); goto fail_block_groups; } @@ -2960,31 +2964,33 @@ retry_root_backup: ret = btrfs_sysfs_add_fsid(fs_devices, NULL); if (ret) { - pr_err("BTRFS: failed to init sysfs fsid interface: %d\n", ret); + btrfs_err(fs_info, "failed to init sysfs fsid interface: %d", + ret); goto fail_block_groups; } ret = btrfs_sysfs_add_device(fs_devices); if (ret) { - pr_err("BTRFS: failed to init sysfs device interface: %d\n", ret); + btrfs_err(fs_info, "failed to init sysfs device interface: %d", + ret); goto fail_fsdev_sysfs; } ret = btrfs_sysfs_add_mounted(fs_info); if (ret) { - pr_err("BTRFS: failed to init sysfs interface: %d\n", ret); + btrfs_err(fs_info, "failed to init sysfs interface: %d", ret); goto fail_fsdev_sysfs; } ret = btrfs_init_space_info(fs_info); if (ret) { - printk(KERN_ERR "BTRFS: Failed to initial space info: %d\n", ret); + btrfs_err(fs_info, "failed to initialize space info: %d", ret); goto fail_sysfs; } ret = btrfs_read_block_groups(fs_info->extent_root); if (ret) { - printk(KERN_ERR "BTRFS: Failed to read block groups: %d\n", ret); + btrfs_err(fs_info, "failed to read block groups: %d", ret); goto fail_sysfs; } fs_info->num_tolerated_disk_barrier_failures = @@ -2992,7 +2998,8 @@ retry_root_backup: if (fs_info->fs_devices->missing_devices > fs_info->num_tolerated_disk_barrier_failures && !(sb->s_flags & MS_RDONLY)) { - pr_warn("BTRFS: missing devices(%llu) exceeds the limit(%d), writeable mount is not allowed\n", + btrfs_warn(fs_info, +"missing devices (%llu) exceeds the limit (%d), writeable mount is not allowed", fs_info->fs_devices->missing_devices, fs_info->num_tolerated_disk_barrier_failures); goto fail_sysfs; @@ -3012,13 +3019,12 @@ retry_root_backup: if (!btrfs_test_opt(tree_root, SSD) && !btrfs_test_opt(tree_root, NOSSD) && !fs_info->fs_devices->rotating) { - printk(KERN_INFO "BTRFS: detected SSD devices, enabling SSD " - "mode\n"); + btrfs_info(fs_info, "detected SSD devices, enabling SSD mode"); btrfs_set_opt(fs_info->mount_opt, SSD); } /* - * Mount does not set all options immediatelly, we can do it now and do + * Mount does not set all options immediately, we can do it now and do * not have to wait for transaction commit */ btrfs_apply_pending_changes(fs_info); @@ -3031,16 +3037,18 @@ retry_root_backup: 1 : 0, fs_info->check_integrity_print_mask); if (ret) - printk(KERN_WARNING "BTRFS: failed to initialize" - " integrity check module %s\n", sb->s_id); + btrfs_warn(fs_info, + "failed to initialize integrity check module: %d", + ret); } #endif ret = btrfs_read_qgroup_config(fs_info); if (ret) goto fail_trans_kthread; - /* do not make disk changes in broken FS */ - if (btrfs_super_log_root(disk_super) != 0) { + /* do not make disk changes in broken FS or nologreplay is given */ + if (btrfs_super_log_root(disk_super) != 0 && + !btrfs_test_opt(tree_root, NOLOGREPLAY)) { ret = btrfs_replay_log(fs_info, fs_devices); if (ret) { err = ret; @@ -3061,8 +3069,8 @@ retry_root_backup: ret = btrfs_recover_relocation(tree_root); mutex_unlock(&fs_info->cleaner_mutex); if (ret < 0) { - printk(KERN_WARNING - "BTRFS: failed to recover relocation\n"); + btrfs_warn(fs_info, "failed to recover relocation: %d", + ret); err = -EINVAL; goto fail_qgroup; } @@ -3081,6 +3089,18 @@ retry_root_backup: if (sb->s_flags & MS_RDONLY) return 0; + if (btrfs_test_opt(tree_root, FREE_SPACE_TREE) && + !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { + btrfs_info(fs_info, "creating free space tree"); + ret = btrfs_create_free_space_tree(fs_info); + if (ret) { + btrfs_warn(fs_info, + "failed to create free space tree: %d", ret); + close_ctree(tree_root); + return ret; + } + } + down_read(&fs_info->cleanup_work_sem); if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) || (ret = btrfs_orphan_cleanup(fs_info->tree_root))) { @@ -3092,37 +3112,49 @@ retry_root_backup: ret = btrfs_resume_balance_async(fs_info); if (ret) { - printk(KERN_WARNING "BTRFS: failed to resume balance\n"); + btrfs_warn(fs_info, "failed to resume balance: %d", ret); close_ctree(tree_root); return ret; } ret = btrfs_resume_dev_replace_async(fs_info); if (ret) { - pr_warn("BTRFS: failed to resume dev_replace\n"); + btrfs_warn(fs_info, "failed to resume device replace: %d", ret); close_ctree(tree_root); return ret; } btrfs_qgroup_rescan_resume(fs_info); + if (btrfs_test_opt(tree_root, CLEAR_CACHE) && + btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { + btrfs_info(fs_info, "clearing free space tree"); + ret = btrfs_clear_free_space_tree(fs_info); + if (ret) { + btrfs_warn(fs_info, + "failed to clear free space tree: %d", ret); + close_ctree(tree_root); + return ret; + } + } + if (!fs_info->uuid_root) { - pr_info("BTRFS: creating UUID tree\n"); + btrfs_info(fs_info, "creating UUID tree"); ret = btrfs_create_uuid_tree(fs_info); if (ret) { - pr_warn("BTRFS: failed to create the UUID tree %d\n", - ret); + btrfs_warn(fs_info, + "failed to create the UUID tree: %d", ret); close_ctree(tree_root); return ret; } } else if (btrfs_test_opt(tree_root, RESCAN_UUID_TREE) || fs_info->generation != btrfs_super_uuid_tree_generation(disk_super)) { - pr_info("BTRFS: checking UUID tree\n"); + btrfs_info(fs_info, "checking UUID tree"); ret = btrfs_check_uuid_tree(fs_info); if (ret) { - pr_warn("BTRFS: failed to check the UUID tree %d\n", - ret); + btrfs_warn(fs_info, + "failed to check the UUID tree: %d", ret); close_ctree(tree_root); return ret; } @@ -3132,6 +3164,12 @@ retry_root_backup: fs_info->open = 1; + /* + * backuproot only affect mount behavior, and if open_ctree succeeded, + * no need to keep the flag + */ + btrfs_clear_opt(fs_info->mount_opt, USEBACKUPROOT); + return 0; fail_qgroup: @@ -3186,7 +3224,7 @@ fail: return err; recovery_tree_root: - if (!btrfs_test_opt(tree_root, RECOVERY)) + if (!btrfs_test_opt(tree_root, USEBACKUPROOT)) goto fail_tree_roots; free_root_pointers(fs_info, 0); @@ -3215,7 +3253,7 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) btrfs_warn_rl_in_rcu(device->dev_root->fs_info, "lost page write due to IO error on %s", rcu_str_deref(device->name)); - /* note, we dont' set_buffer_write_io_error because we have + /* note, we don't set_buffer_write_io_error because we have * our own ways of dealing with the IO errors */ clear_buffer_uptodate(bh); @@ -3616,7 +3654,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) if (ret) { mutex_unlock( &root->fs_info->fs_devices->device_list_mutex); - btrfs_std_error(root->fs_info, ret, + btrfs_handle_fs_error(root->fs_info, ret, "errors while submitting device barriers."); return ret; } @@ -3656,7 +3694,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); /* FUA is masked off if unsupported and can't be the reason */ - btrfs_std_error(root->fs_info, -EIO, + btrfs_handle_fs_error(root->fs_info, -EIO, "%d errors while writing supers", total_errors); return -EIO; } @@ -3674,7 +3712,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) } mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); if (total_errors > max_errors) { - btrfs_std_error(root->fs_info, -EIO, + btrfs_handle_fs_error(root->fs_info, -EIO, "%d errors while writing supers", total_errors); return -EIO; } @@ -3932,11 +3970,6 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, return !ret; } -int btrfs_set_buffer_uptodate(struct extent_buffer *buf) -{ - return set_extent_buffer_uptodate(buf); -} - void btrfs_mark_buffer_dirty(struct extent_buffer *buf) { struct btrfs_root *root; @@ -3992,7 +4025,6 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root, balance_dirty_pages_ratelimited( root->fs_info->btree_inode->i_mapping); } - return; } void btrfs_btree_balance_dirty(struct btrfs_root *root) @@ -4015,8 +4047,17 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, int read_only) { struct btrfs_super_block *sb = fs_info->super_copy; + u64 nodesize = btrfs_super_nodesize(sb); + u64 sectorsize = btrfs_super_sectorsize(sb); int ret = 0; + if (btrfs_super_magic(sb) != BTRFS_MAGIC) { + printk(KERN_ERR "BTRFS: no valid FS found\n"); + ret = -EINVAL; + } + if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP) + printk(KERN_WARNING "BTRFS: unrecognized super flag: %llu\n", + btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP); if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) { printk(KERN_ERR "BTRFS: tree_root level too big: %d >= %d\n", btrfs_super_root_level(sb), BTRFS_MAX_LEVEL); @@ -4034,31 +4075,46 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, } /* - * The common minimum, we don't know if we can trust the nodesize/sectorsize - * items yet, they'll be verified later. Issue just a warning. + * Check sectorsize and nodesize first, other check will need it. + * Check all possible sectorsize(4K, 8K, 16K, 32K, 64K) here. */ - if (!IS_ALIGNED(btrfs_super_root(sb), 4096)) + if (!is_power_of_2(sectorsize) || sectorsize < 4096 || + sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) { + printk(KERN_ERR "BTRFS: invalid sectorsize %llu\n", sectorsize); + ret = -EINVAL; + } + /* Only PAGE SIZE is supported yet */ + if (sectorsize != PAGE_SIZE) { + printk(KERN_ERR "BTRFS: sectorsize %llu not supported yet, only support %lu\n", + sectorsize, PAGE_SIZE); + ret = -EINVAL; + } + if (!is_power_of_2(nodesize) || nodesize < sectorsize || + nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) { + printk(KERN_ERR "BTRFS: invalid nodesize %llu\n", nodesize); + ret = -EINVAL; + } + if (nodesize != le32_to_cpu(sb->__unused_leafsize)) { + printk(KERN_ERR "BTRFS: invalid leafsize %u, should be %llu\n", + le32_to_cpu(sb->__unused_leafsize), + nodesize); + ret = -EINVAL; + } + + /* Root alignment check */ + if (!IS_ALIGNED(btrfs_super_root(sb), sectorsize)) { printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", btrfs_super_root(sb)); - if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096)) + ret = -EINVAL; + } + if (!IS_ALIGNED(btrfs_super_chunk_root(sb), sectorsize)) { printk(KERN_WARNING "BTRFS: chunk_root block unaligned: %llu\n", btrfs_super_chunk_root(sb)); - if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096)) - printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n", - btrfs_super_log_root(sb)); - - /* - * Check the lower bound, the alignment and other constraints are - * checked later. - */ - if (btrfs_super_nodesize(sb) < 4096) { - printk(KERN_ERR "BTRFS: nodesize too small: %u < 4096\n", - btrfs_super_nodesize(sb)); ret = -EINVAL; } - if (btrfs_super_sectorsize(sb) < 4096) { - printk(KERN_ERR "BTRFS: sectorsize too small: %u < 4096\n", - btrfs_super_sectorsize(sb)); + if (!IS_ALIGNED(btrfs_super_log_root(sb), sectorsize)) { + printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n", + btrfs_super_log_root(sb)); ret = -EINVAL; } @@ -4072,6 +4128,16 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, * Hint to catch really bogus numbers, bitflips or so, more exact checks are * done later */ + if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) { + btrfs_err(fs_info, "bytes_used is too small %llu", + btrfs_super_bytes_used(sb)); + ret = -EINVAL; + } + if (!is_power_of_2(btrfs_super_stripesize(sb))) { + btrfs_err(fs_info, "invalid stripesize %u", + btrfs_super_stripesize(sb)); + ret = -EINVAL; + } if (btrfs_super_num_devices(sb) > (1UL << 31)) printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n", btrfs_super_num_devices(sb)); @@ -4309,7 +4375,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, if (ret) break; - clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); + clear_extent_bits(dirty_pages, start, end, mark); while (start <= end) { eb = btrfs_find_tree_block(root->fs_info, start); start += root->nodesize; @@ -4344,7 +4410,7 @@ again: if (ret) break; - clear_extent_dirty(unpin, start, end, GFP_NOFS); + clear_extent_dirty(unpin, start, end); btrfs_error_unpin_extent_range(root, start, end); cond_resched(); } |
