aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c422
1 files changed, 244 insertions, 178 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 41fb43183406..60ce1190307b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -25,7 +25,6 @@
#include <linux/buffer_head.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>
-#include <linux/freezer.h>
#include <linux/slab.h>
#include <linux/migrate.h>
#include <linux/ratelimit.h>
@@ -42,6 +41,7 @@
#include "locking.h"
#include "tree-log.h"
#include "free-space-cache.h"
+#include "free-space-tree.h"
#include "inode-map.h"
#include "check-integrity.h"
#include "rcu-string.h"
@@ -49,11 +49,18 @@
#include "raid56.h"
#include "sysfs.h"
#include "qgroup.h"
+#include "compression.h"
#ifdef CONFIG_X86
#include <asm/cpufeature.h>
#endif
+#define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\
+ BTRFS_HEADER_FLAG_RELOC |\
+ BTRFS_SUPER_FLAG_ERROR |\
+ BTRFS_SUPER_FLAG_SEEDING |\
+ BTRFS_SUPER_FLAG_METADUMP)
+
static const struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
static void free_fs_root(struct btrfs_root *root);
@@ -103,8 +110,7 @@ int __init btrfs_end_io_wq_init(void)
void btrfs_end_io_wq_exit(void)
{
- if (btrfs_end_io_wq_cache)
- kmem_cache_destroy(btrfs_end_io_wq_cache);
+ kmem_cache_destroy(btrfs_end_io_wq_cache);
}
/*
@@ -175,6 +181,7 @@ static struct btrfs_lockdep_keyset {
{ .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" },
{ .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
{ .id = BTRFS_UUID_TREE_OBJECTID, .name_stem = "uuid" },
+ { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, .name_stem = "free-space" },
{ .id = 0, .name_stem = "tree" },
};
@@ -295,7 +302,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
err = map_private_extent_buffer(buf, offset, 32,
&kaddr, &map_start, &map_len);
if (err)
- return 1;
+ return err;
cur_len = min(len, map_len - (offset - map_start));
crc = btrfs_csum_data(kaddr + offset - map_start,
crc, cur_len);
@@ -305,7 +312,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
if (csum_size > sizeof(inline_result)) {
result = kzalloc(csum_size, GFP_NOFS);
if (!result)
- return 1;
+ return -ENOMEM;
} else {
result = (char *)&inline_result;
}
@@ -326,7 +333,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
val, found, btrfs_header_level(buf));
if (result != (char *)&inline_result)
kfree(result);
- return 1;
+ return -EUCLEAN;
}
} else {
write_extent_buffer(buf, result, 0, csum_size);
@@ -362,7 +369,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
}
lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
- 0, &cached_state);
+ &cached_state);
if (extent_buffer_uptodate(eb) &&
btrfs_header_generation(eb) == parent_transid) {
ret = 0;
@@ -377,7 +384,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
/*
* Things reading via commit roots that don't have normal protection,
* like send, can have a really old block in cache that may point at a
- * block that has been free'd and re-allocated. So don't clear uptodate
+ * block that has been freed and re-allocated. So don't clear uptodate
* if we find an eb that is under IO (dirty/writeback) because we could
* end up reading in the stale data and then writing it back out and
* making everybody very sad.
@@ -411,7 +418,7 @@ static int btrfs_check_super_csum(char *raw_disk_sb)
/*
* The super_block structure does not span the whole
* BTRFS_SUPER_INFO_SIZE range, we expect that the unused space
- * is filled with zeros and is included in the checkum.
+ * is filled with zeros and is included in the checksum.
*/
crc = btrfs_csum_data(raw_disk_sb + BTRFS_CSUM_SIZE,
crc, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
@@ -505,11 +512,21 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page)
eb = (struct extent_buffer *)page->private;
if (page != eb->pages[0])
return 0;
+
found_start = btrfs_header_bytenr(eb);
- if (WARN_ON(found_start != start || !PageUptodate(page)))
- return 0;
- csum_tree_block(fs_info, eb, 0);
- return 0;
+ /*
+ * Please do not consolidate these warnings into a single if.
+ * It is useful to know what went wrong.
+ */
+ if (WARN_ON(found_start != start))
+ return -EUCLEAN;
+ if (WARN_ON(!PageUptodate(page)))
+ return -EUCLEAN;
+
+ ASSERT(memcmp_extent_buffer(eb, fs_info->fsid,
+ btrfs_header_fsid(), BTRFS_FSID_SIZE) == 0);
+
+ return csum_tree_block(fs_info, eb, 0);
}
static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,
@@ -583,7 +600,7 @@ static noinline int check_leaf(struct btrfs_root *root,
/*
* Check to make sure that we don't point outside of the leaf,
- * just incase all the items are consistent to eachother, but
+ * just in case all the items are consistent to each other, but
* all point outside of the leaf.
*/
if (btrfs_item_end_nr(leaf, slot) >
@@ -604,6 +621,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
int found_level;
struct extent_buffer *eb;
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
int ret = 0;
int reads_done;
@@ -629,21 +647,21 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
found_start = btrfs_header_bytenr(eb);
if (found_start != eb->start) {
- btrfs_err_rl(eb->fs_info, "bad tree block start %llu %llu",
- found_start, eb->start);
+ btrfs_err_rl(fs_info, "bad tree block start %llu %llu",
+ found_start, eb->start);
ret = -EIO;
goto err;
}
- if (check_tree_block_fsid(root->fs_info, eb)) {
- btrfs_err_rl(eb->fs_info, "bad fsid on block %llu",
- eb->start);
+ if (check_tree_block_fsid(fs_info, eb)) {
+ btrfs_err_rl(fs_info, "bad fsid on block %llu",
+ eb->start);
ret = -EIO;
goto err;
}
found_level = btrfs_header_level(eb);
if (found_level >= BTRFS_MAX_LEVEL) {
- btrfs_err(root->fs_info, "bad tree block level %d",
- (int)btrfs_header_level(eb));
+ btrfs_err(fs_info, "bad tree block level %d",
+ (int)btrfs_header_level(eb));
ret = -EIO;
goto err;
}
@@ -651,11 +669,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
eb, found_level);
- ret = csum_tree_block(root->fs_info, eb, 1);
- if (ret) {
- ret = -EIO;
+ ret = csum_tree_block(fs_info, eb, 1);
+ if (ret)
goto err;
- }
/*
* If this is a leaf block and it is corrupt, set the corrupt bit so
@@ -672,7 +688,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
err:
if (reads_done &&
test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
- btree_readahead_hook(root, eb, eb->start, ret);
+ btree_readahead_hook(fs_info, eb, eb->start, ret);
if (ret) {
/*
@@ -691,14 +707,13 @@ out:
static int btree_io_failed_hook(struct page *page, int failed_mirror)
{
struct extent_buffer *eb;
- struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
eb = (struct extent_buffer *)page->private;
set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
eb->read_mirror = failed_mirror;
atomic_dec(&eb->io_pages);
if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
- btree_readahead_hook(root, eb, eb->start, -EIO);
+ btree_readahead_hook(eb->fs_info, eb, eb->start, -EIO);
return -EIO; /* we fixed nothing */
}
@@ -808,7 +823,7 @@ static void run_one_async_done(struct btrfs_work *work)
waitqueue_active(&fs_info->async_submit_wait))
wake_up(&fs_info->async_submit_wait);
- /* If an error occured we just want to clean up the bio and move on */
+ /* If an error occurred we just want to clean up the bio and move on */
if (async->error) {
async->bio->bi_error = async->error;
bio_endio(async->bio);
@@ -923,7 +938,7 @@ static int check_async_write(struct inode *inode, unsigned long bio_flags)
if (bio_flags & EXTENT_BIO_TREE_LOG)
return 0;
#ifdef CONFIG_X86
- if (cpu_has_xmm4_2)
+ if (static_cpu_has(X86_FEATURE_XMM4_2))
return 0;
#endif
return 1;
@@ -1047,7 +1062,7 @@ static void btree_invalidatepage(struct page *page, unsigned int offset,
(unsigned long long)page_offset(page));
ClearPagePrivate(page);
set_page_private(page, 0);
- page_cache_release(page);
+ put_page(page);
}
}
@@ -1083,7 +1098,7 @@ void readahead_tree_block(struct btrfs_root *root, u64 bytenr)
struct inode *btree_inode = root->fs_info->btree_inode;
buf = btrfs_find_create_tree_block(root, bytenr);
- if (!buf)
+ if (IS_ERR(buf))
return;
read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
buf, 0, WAIT_NONE, btree_get_extent, 0);
@@ -1099,7 +1114,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
int ret;
buf = btrfs_find_create_tree_block(root, bytenr);
- if (!buf)
+ if (IS_ERR(buf))
return 0;
set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags);
@@ -1132,7 +1147,8 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
u64 bytenr)
{
if (btrfs_test_is_dummy_root(root))
- return alloc_test_extent_buffer(root->fs_info, bytenr);
+ return alloc_test_extent_buffer(root->fs_info, bytenr,
+ root->nodesize);
return alloc_extent_buffer(root->fs_info, bytenr);
}
@@ -1156,8 +1172,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
int ret;
buf = btrfs_find_create_tree_block(root, bytenr);
- if (!buf)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(buf))
+ return buf;
ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
if (ret) {
@@ -1288,9 +1304,10 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize,
spin_lock_init(&root->root_item_lock);
}
-static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info)
+static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
+ gfp_t flags)
{
- struct btrfs_root *root = kzalloc(sizeof(*root), GFP_NOFS);
+ struct btrfs_root *root = kzalloc(sizeof(*root), flags);
if (root)
root->fs_info = fs_info;
return root;
@@ -1298,14 +1315,16 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info)
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
/* Should only be used by the testing infrastructure */
-struct btrfs_root *btrfs_alloc_dummy_root(void)
+struct btrfs_root *btrfs_alloc_dummy_root(u32 sectorsize, u32 nodesize)
{
struct btrfs_root *root;
- root = btrfs_alloc_root(NULL);
+ root = btrfs_alloc_root(NULL, GFP_KERNEL);
if (!root)
return ERR_PTR(-ENOMEM);
- __setup_root(4096, 4096, 4096, root, NULL, 1);
+ /* We don't use the stripesize in selftest, set it as sectorsize */
+ __setup_root(nodesize, sectorsize, sectorsize, root, NULL,
+ BTRFS_ROOT_TREE_OBJECTID);
set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state);
root->alloc_bytenr = 0;
@@ -1324,7 +1343,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
int ret = 0;
uuid_le uuid;
- root = btrfs_alloc_root(fs_info);
+ root = btrfs_alloc_root(fs_info, GFP_KERNEL);
if (!root)
return ERR_PTR(-ENOMEM);
@@ -1400,7 +1419,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *tree_root = fs_info->tree_root;
struct extent_buffer *leaf;
- root = btrfs_alloc_root(fs_info);
+ root = btrfs_alloc_root(fs_info, GFP_NOFS);
if (!root)
return ERR_PTR(-ENOMEM);
@@ -1498,7 +1517,7 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
if (!path)
return ERR_PTR(-ENOMEM);
- root = btrfs_alloc_root(fs_info);
+ root = btrfs_alloc_root(fs_info, GFP_NOFS);
if (!root) {
ret = -ENOMEM;
goto alloc_fail;
@@ -1624,7 +1643,7 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
{
int ret;
- ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
+ ret = radix_tree_preload(GFP_NOFS);
if (ret)
return ret;
@@ -1665,6 +1684,9 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
if (location->objectid == BTRFS_UUID_TREE_OBJECTID)
return fs_info->uuid_root ? fs_info->uuid_root :
ERR_PTR(-ENOENT);
+ if (location->objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
+ return fs_info->free_space_root ? fs_info->free_space_root :
+ ERR_PTR(-ENOENT);
again:
root = btrfs_lookup_fs_root(fs_info, location->objectid);
if (root) {
@@ -1745,7 +1767,7 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
if (err)
return err;
- bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE;
+ bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
bdi->congested_fn = btrfs_congested_fn;
bdi->congested_data = info;
bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK;
@@ -1784,6 +1806,13 @@ static int cleaner_kthread(void *arg)
if (btrfs_need_cleaner_sleep(root))
goto sleep;
+ /*
+ * Do not do anything if we might cause open_ctree() to block
+ * before we have finished mounting the filesystem.
+ */
+ if (!root->fs_info->open)
+ goto sleep;
+
if (!mutex_trylock(&root->fs_info->cleaner_mutex))
goto sleep;
@@ -1819,7 +1848,7 @@ static int cleaner_kthread(void *arg)
*/
btrfs_delete_unused_bgs(root->fs_info);
sleep:
- if (!try_to_freeze() && !again) {
+ if (!again) {
set_current_state(TASK_INTERRUPTIBLE);
if (!kthread_should_stop())
schedule();
@@ -1909,14 +1938,12 @@ sleep:
if (unlikely(test_bit(BTRFS_FS_STATE_ERROR,
&root->fs_info->fs_state)))
btrfs_cleanup_transaction(root);
- if (!try_to_freeze()) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (!kthread_should_stop() &&
- (!btrfs_transaction_blocked(root->fs_info) ||
- cannot_commit))
- schedule_timeout(delay);
- __set_current_state(TASK_RUNNING);
- }
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (!kthread_should_stop() &&
+ (!btrfs_transaction_blocked(root->fs_info) ||
+ cannot_commit))
+ schedule_timeout(delay);
+ __set_current_state(TASK_RUNNING);
} while (!kthread_should_stop());
return 0;
}
@@ -2165,6 +2192,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
free_root_extent_buffers(info->uuid_root);
if (chunk_root)
free_root_extent_buffers(info->chunk_root);
+ free_root_extent_buffers(info->free_space_root);
}
void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
@@ -2260,9 +2288,11 @@ static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info)
fs_info->dev_replace.lock_owner = 0;
atomic_set(&fs_info->dev_replace.nesting_level, 0);
mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
- mutex_init(&fs_info->dev_replace.lock_management_lock);
- mutex_init(&fs_info->dev_replace.lock);
+ rwlock_init(&fs_info->dev_replace.lock);
+ atomic_set(&fs_info->dev_replace.read_locks, 0);
+ atomic_set(&fs_info->dev_replace.blocking_readers, 0);
init_waitqueue_head(&fs_info->replace_wait);
+ init_waitqueue_head(&fs_info->dev_replace.read_lock_wq);
}
static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
@@ -2373,7 +2403,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
return -EIO;
}
- log_tree_root = btrfs_alloc_root(fs_info);
+ log_tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
if (!log_tree_root)
return -ENOMEM;
@@ -2397,7 +2427,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
/* returns with log_tree_root freed on success */
ret = btrfs_recover_log_trees(log_tree_root);
if (ret) {
- btrfs_std_error(tree_root->fs_info, ret,
+ btrfs_handle_fs_error(tree_root->fs_info, ret,
"Failed to recover log tree");
free_extent_buffer(log_tree_root->node);
kfree(log_tree_root);
@@ -2465,6 +2495,15 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info,
fs_info->uuid_root = root;
}
+ if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
+ location.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID;
+ root = btrfs_read_tree_root(tree_root, &location);
+ if (IS_ERR(root))
+ return PTR_ERR(root);
+ set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+ fs_info->free_space_root = root;
+ }
+
return 0;
}
@@ -2489,8 +2528,8 @@ int open_ctree(struct super_block *sb,
int backup_index = 0;
int max_active;
- tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info);
- chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info);
+ tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
+ chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
if (!tree_root || !chunk_root) {
err = -ENOMEM;
goto fail;
@@ -2513,7 +2552,7 @@ int open_ctree(struct super_block *sb,
err = ret;
goto fail_bdi;
}
- fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE *
+ fs_info->dirty_metadata_batch = PAGE_SIZE *
(1 + ilog2(nr_cpu_ids));
ret = percpu_counter_init(&fs_info->delalloc_bytes, 0, GFP_KERNEL);
@@ -2582,6 +2621,7 @@ int open_ctree(struct super_block *sb,
atomic_set(&fs_info->nr_async_bios, 0);
atomic_set(&fs_info->defrag_running, 0);
atomic_set(&fs_info->qgroup_op_seq, 0);
+ atomic_set(&fs_info->reada_works_cnt, 0);
atomic64_set(&fs_info->tree_mod_seq, 0);
fs_info->sb = sb;
fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
@@ -2601,7 +2641,7 @@ int open_ctree(struct super_block *sb,
INIT_LIST_HEAD(&fs_info->ordered_roots);
spin_lock_init(&fs_info->ordered_root_lock);
fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root),
- GFP_NOFS);
+ GFP_KERNEL);
if (!fs_info->delayed_root) {
err = -ENOMEM;
goto fail_iput;
@@ -2683,7 +2723,7 @@ int open_ctree(struct super_block *sb,
* Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
*/
if (btrfs_check_super_csum(bh->b_data)) {
- printk(KERN_ERR "BTRFS: superblock checksum mismatch\n");
+ btrfs_err(fs_info, "superblock checksum mismatch");
err = -EINVAL;
brelse(bh);
goto fail_alloc;
@@ -2703,7 +2743,7 @@ int open_ctree(struct super_block *sb,
ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
if (ret) {
- printk(KERN_ERR "BTRFS: superblock contains fatal errors\n");
+ btrfs_err(fs_info, "superblock contains fatal errors");
err = -EINVAL;
goto fail_alloc;
}
@@ -2729,7 +2769,7 @@ int open_ctree(struct super_block *sb,
*/
fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
- ret = btrfs_parse_options(tree_root, options);
+ ret = btrfs_parse_options(tree_root, options, sb->s_flags);
if (ret) {
err = ret;
goto fail_alloc;
@@ -2738,29 +2778,9 @@ int open_ctree(struct super_block *sb,
features = btrfs_super_incompat_flags(disk_super) &
~BTRFS_FEATURE_INCOMPAT_SUPP;
if (features) {
- printk(KERN_ERR "BTRFS: couldn't mount because of "
- "unsupported optional features (%Lx).\n",
- features);
- err = -EINVAL;
- goto fail_alloc;
- }
-
- /*
- * Leafsize and nodesize were always equal, this is only a sanity check.
- */
- if (le32_to_cpu(disk_super->__unused_leafsize) !=
- btrfs_super_nodesize(disk_super)) {
- printk(KERN_ERR "BTRFS: couldn't mount because metadata "
- "blocksizes don't match. node %d leaf %d\n",
- btrfs_super_nodesize(disk_super),
- le32_to_cpu(disk_super->__unused_leafsize));
- err = -EINVAL;
- goto fail_alloc;
- }
- if (btrfs_super_nodesize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) {
- printk(KERN_ERR "BTRFS: couldn't mount because metadata "
- "blocksize (%d) was too large\n",
- btrfs_super_nodesize(disk_super));
+ btrfs_err(fs_info,
+ "cannot mount because of unsupported optional features (%llx)",
+ features);
err = -EINVAL;
goto fail_alloc;
}
@@ -2771,21 +2791,22 @@ int open_ctree(struct super_block *sb,
features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
- printk(KERN_INFO "BTRFS: has skinny extents\n");
+ btrfs_info(fs_info, "has skinny extents");
/*
* flag our filesystem as having big metadata blocks if
* they are bigger than the page size
*/
- if (btrfs_super_nodesize(disk_super) > PAGE_CACHE_SIZE) {
+ if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) {
if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
- printk(KERN_INFO "BTRFS: flagging fs with big metadata feature\n");
+ btrfs_info(fs_info,
+ "flagging fs with big metadata feature");
features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
}
nodesize = btrfs_super_nodesize(disk_super);
sectorsize = btrfs_super_sectorsize(disk_super);
- stripesize = btrfs_super_stripesize(disk_super);
+ stripesize = sectorsize;
fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids));
fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids));
@@ -2795,9 +2816,9 @@ int open_ctree(struct super_block *sb,
*/
if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
(sectorsize != nodesize)) {
- printk(KERN_ERR "BTRFS: unequal leaf/node/sector sizes "
- "are not allowed for mixed block groups on %s\n",
- sb->s_id);
+ btrfs_err(fs_info,
+"unequal nodesize/sectorsize (%u != %u) are not allowed for mixed block groups",
+ nodesize, sectorsize);
goto fail_alloc;
}
@@ -2810,8 +2831,8 @@ int open_ctree(struct super_block *sb,
features = btrfs_super_compat_ro_flags(disk_super) &
~BTRFS_FEATURE_COMPAT_RO_SUPP;
if (!(sb->s_flags & MS_RDONLY) && features) {
- printk(KERN_ERR "BTRFS: couldn't mount RDWR because of "
- "unsupported option features (%Lx).\n",
+ btrfs_err(fs_info,
+ "cannot mount read-write because of unsupported optional features (%llx)",
features);
err = -EINVAL;
goto fail_alloc;
@@ -2827,7 +2848,7 @@ int open_ctree(struct super_block *sb,
fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
- 4 * 1024 * 1024 / PAGE_CACHE_SIZE);
+ SZ_4M / PAGE_SIZE);
tree_root->nodesize = nodesize;
tree_root->sectorsize = sectorsize;
@@ -2836,23 +2857,11 @@ int open_ctree(struct super_block *sb,
sb->s_blocksize = sectorsize;
sb->s_blocksize_bits = blksize_bits(sectorsize);
- if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
- printk(KERN_ERR "BTRFS: valid FS not found on %s\n", sb->s_id);
- goto fail_sb_buffer;
- }
-
- if (sectorsize != PAGE_SIZE) {
- printk(KERN_ERR "BTRFS: incompatible sector size (%lu) "
- "found on %s\n", (unsigned long)sectorsize, sb->s_id);
- goto fail_sb_buffer;
- }
-
mutex_lock(&fs_info->chunk_mutex);
ret = btrfs_read_sys_array(tree_root);
mutex_unlock(&fs_info->chunk_mutex);
if (ret) {
- printk(KERN_ERR "BTRFS: failed to read the system "
- "array on %s\n", sb->s_id);
+ btrfs_err(fs_info, "failed to read the system array: %d", ret);
goto fail_sb_buffer;
}
@@ -2866,8 +2875,7 @@ int open_ctree(struct super_block *sb,
generation);
if (IS_ERR(chunk_root->node) ||
!extent_buffer_uptodate(chunk_root->node)) {
- printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n",
- sb->s_id);
+ btrfs_err(fs_info, "failed to read chunk root");
if (!IS_ERR(chunk_root->node))
free_extent_buffer(chunk_root->node);
chunk_root->node = NULL;
@@ -2881,8 +2889,7 @@ int open_ctree(struct super_block *sb,
ret = btrfs_read_chunk_tree(chunk_root);
if (ret) {
- printk(KERN_ERR "BTRFS: failed to read chunk tree on %s\n",
- sb->s_id);
+ btrfs_err(fs_info, "failed to read chunk tree: %d", ret);
goto fail_tree_roots;
}
@@ -2893,8 +2900,7 @@ int open_ctree(struct super_block *sb,
btrfs_close_extra_devices(fs_devices, 0);
if (!fs_devices->latest_bdev) {
- printk(KERN_ERR "BTRFS: failed to read devices on %s\n",
- sb->s_id);
+ btrfs_err(fs_info, "failed to read devices");
goto fail_tree_roots;
}
@@ -2906,8 +2912,7 @@ retry_root_backup:
generation);
if (IS_ERR(tree_root->node) ||
!extent_buffer_uptodate(tree_root->node)) {
- printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n",
- sb->s_id);
+ btrfs_warn(fs_info, "failed to read tree root");
if (!IS_ERR(tree_root->node))
free_extent_buffer(tree_root->node);
tree_root->node = NULL;
@@ -2939,20 +2944,19 @@ retry_root_backup:
ret = btrfs_recover_balance(fs_info);
if (ret) {
- printk(KERN_ERR "BTRFS: failed to recover balance\n");
+ btrfs_err(fs_info, "failed to recover balance: %d", ret);
goto fail_block_groups;
}
ret = btrfs_init_dev_stats(fs_info);
if (ret) {
- printk(KERN_ERR "BTRFS: failed to init dev_stats: %d\n",
- ret);
+ btrfs_err(fs_info, "failed to init dev_stats: %d", ret);
goto fail_block_groups;
}
ret = btrfs_init_dev_replace(fs_info);
if (ret) {
- pr_err("BTRFS: failed to init dev_replace: %d\n", ret);
+ btrfs_err(fs_info, "failed to init dev_replace: %d", ret);
goto fail_block_groups;
}
@@ -2960,31 +2964,33 @@ retry_root_backup:
ret = btrfs_sysfs_add_fsid(fs_devices, NULL);
if (ret) {
- pr_err("BTRFS: failed to init sysfs fsid interface: %d\n", ret);
+ btrfs_err(fs_info, "failed to init sysfs fsid interface: %d",
+ ret);
goto fail_block_groups;
}
ret = btrfs_sysfs_add_device(fs_devices);
if (ret) {
- pr_err("BTRFS: failed to init sysfs device interface: %d\n", ret);
+ btrfs_err(fs_info, "failed to init sysfs device interface: %d",
+ ret);
goto fail_fsdev_sysfs;
}
ret = btrfs_sysfs_add_mounted(fs_info);
if (ret) {
- pr_err("BTRFS: failed to init sysfs interface: %d\n", ret);
+ btrfs_err(fs_info, "failed to init sysfs interface: %d", ret);
goto fail_fsdev_sysfs;
}
ret = btrfs_init_space_info(fs_info);
if (ret) {
- printk(KERN_ERR "BTRFS: Failed to initial space info: %d\n", ret);
+ btrfs_err(fs_info, "failed to initialize space info: %d", ret);
goto fail_sysfs;
}
ret = btrfs_read_block_groups(fs_info->extent_root);
if (ret) {
- printk(KERN_ERR "BTRFS: Failed to read block groups: %d\n", ret);
+ btrfs_err(fs_info, "failed to read block groups: %d", ret);
goto fail_sysfs;
}
fs_info->num_tolerated_disk_barrier_failures =
@@ -2992,7 +2998,8 @@ retry_root_backup:
if (fs_info->fs_devices->missing_devices >
fs_info->num_tolerated_disk_barrier_failures &&
!(sb->s_flags & MS_RDONLY)) {
- pr_warn("BTRFS: missing devices(%llu) exceeds the limit(%d), writeable mount is not allowed\n",
+ btrfs_warn(fs_info,
+"missing devices (%llu) exceeds the limit (%d), writeable mount is not allowed",
fs_info->fs_devices->missing_devices,
fs_info->num_tolerated_disk_barrier_failures);
goto fail_sysfs;
@@ -3012,13 +3019,12 @@ retry_root_backup:
if (!btrfs_test_opt(tree_root, SSD) &&
!btrfs_test_opt(tree_root, NOSSD) &&
!fs_info->fs_devices->rotating) {
- printk(KERN_INFO "BTRFS: detected SSD devices, enabling SSD "
- "mode\n");
+ btrfs_info(fs_info, "detected SSD devices, enabling SSD mode");
btrfs_set_opt(fs_info->mount_opt, SSD);
}
/*
- * Mount does not set all options immediatelly, we can do it now and do
+ * Mount does not set all options immediately, we can do it now and do
* not have to wait for transaction commit
*/
btrfs_apply_pending_changes(fs_info);
@@ -3031,16 +3037,18 @@ retry_root_backup:
1 : 0,
fs_info->check_integrity_print_mask);
if (ret)
- printk(KERN_WARNING "BTRFS: failed to initialize"
- " integrity check module %s\n", sb->s_id);
+ btrfs_warn(fs_info,
+ "failed to initialize integrity check module: %d",
+ ret);
}
#endif
ret = btrfs_read_qgroup_config(fs_info);
if (ret)
goto fail_trans_kthread;
- /* do not make disk changes in broken FS */
- if (btrfs_super_log_root(disk_super) != 0) {
+ /* do not make disk changes in broken FS or nologreplay is given */
+ if (btrfs_super_log_root(disk_super) != 0 &&
+ !btrfs_test_opt(tree_root, NOLOGREPLAY)) {
ret = btrfs_replay_log(fs_info, fs_devices);
if (ret) {
err = ret;
@@ -3061,8 +3069,8 @@ retry_root_backup:
ret = btrfs_recover_relocation(tree_root);
mutex_unlock(&fs_info->cleaner_mutex);
if (ret < 0) {
- printk(KERN_WARNING
- "BTRFS: failed to recover relocation\n");
+ btrfs_warn(fs_info, "failed to recover relocation: %d",
+ ret);
err = -EINVAL;
goto fail_qgroup;
}
@@ -3081,6 +3089,18 @@ retry_root_backup:
if (sb->s_flags & MS_RDONLY)
return 0;
+ if (btrfs_test_opt(tree_root, FREE_SPACE_TREE) &&
+ !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
+ btrfs_info(fs_info, "creating free space tree");
+ ret = btrfs_create_free_space_tree(fs_info);
+ if (ret) {
+ btrfs_warn(fs_info,
+ "failed to create free space tree: %d", ret);
+ close_ctree(tree_root);
+ return ret;
+ }
+ }
+
down_read(&fs_info->cleanup_work_sem);
if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) ||
(ret = btrfs_orphan_cleanup(fs_info->tree_root))) {
@@ -3092,37 +3112,49 @@ retry_root_backup:
ret = btrfs_resume_balance_async(fs_info);
if (ret) {
- printk(KERN_WARNING "BTRFS: failed to resume balance\n");
+ btrfs_warn(fs_info, "failed to resume balance: %d", ret);
close_ctree(tree_root);
return ret;
}
ret = btrfs_resume_dev_replace_async(fs_info);
if (ret) {
- pr_warn("BTRFS: failed to resume dev_replace\n");
+ btrfs_warn(fs_info, "failed to resume device replace: %d", ret);
close_ctree(tree_root);
return ret;
}
btrfs_qgroup_rescan_resume(fs_info);
+ if (btrfs_test_opt(tree_root, CLEAR_CACHE) &&
+ btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
+ btrfs_info(fs_info, "clearing free space tree");
+ ret = btrfs_clear_free_space_tree(fs_info);
+ if (ret) {
+ btrfs_warn(fs_info,
+ "failed to clear free space tree: %d", ret);
+ close_ctree(tree_root);
+ return ret;
+ }
+ }
+
if (!fs_info->uuid_root) {
- pr_info("BTRFS: creating UUID tree\n");
+ btrfs_info(fs_info, "creating UUID tree");
ret = btrfs_create_uuid_tree(fs_info);
if (ret) {
- pr_warn("BTRFS: failed to create the UUID tree %d\n",
- ret);
+ btrfs_warn(fs_info,
+ "failed to create the UUID tree: %d", ret);
close_ctree(tree_root);
return ret;
}
} else if (btrfs_test_opt(tree_root, RESCAN_UUID_TREE) ||
fs_info->generation !=
btrfs_super_uuid_tree_generation(disk_super)) {
- pr_info("BTRFS: checking UUID tree\n");
+ btrfs_info(fs_info, "checking UUID tree");
ret = btrfs_check_uuid_tree(fs_info);
if (ret) {
- pr_warn("BTRFS: failed to check the UUID tree %d\n",
- ret);
+ btrfs_warn(fs_info,
+ "failed to check the UUID tree: %d", ret);
close_ctree(tree_root);
return ret;
}
@@ -3132,6 +3164,12 @@ retry_root_backup:
fs_info->open = 1;
+ /*
+ * backuproot only affect mount behavior, and if open_ctree succeeded,
+ * no need to keep the flag
+ */
+ btrfs_clear_opt(fs_info->mount_opt, USEBACKUPROOT);
+
return 0;
fail_qgroup:
@@ -3186,7 +3224,7 @@ fail:
return err;
recovery_tree_root:
- if (!btrfs_test_opt(tree_root, RECOVERY))
+ if (!btrfs_test_opt(tree_root, USEBACKUPROOT))
goto fail_tree_roots;
free_root_pointers(fs_info, 0);
@@ -3215,7 +3253,7 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
btrfs_warn_rl_in_rcu(device->dev_root->fs_info,
"lost page write due to IO error on %s",
rcu_str_deref(device->name));
- /* note, we dont' set_buffer_write_io_error because we have
+ /* note, we don't set_buffer_write_io_error because we have
* our own ways of dealing with the IO errors
*/
clear_buffer_uptodate(bh);
@@ -3616,7 +3654,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
if (ret) {
mutex_unlock(
&root->fs_info->fs_devices->device_list_mutex);
- btrfs_std_error(root->fs_info, ret,
+ btrfs_handle_fs_error(root->fs_info, ret,
"errors while submitting device barriers.");
return ret;
}
@@ -3656,7 +3694,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
/* FUA is masked off if unsupported and can't be the reason */
- btrfs_std_error(root->fs_info, -EIO,
+ btrfs_handle_fs_error(root->fs_info, -EIO,
"%d errors while writing supers", total_errors);
return -EIO;
}
@@ -3674,7 +3712,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
}
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
if (total_errors > max_errors) {
- btrfs_std_error(root->fs_info, -EIO,
+ btrfs_handle_fs_error(root->fs_info, -EIO,
"%d errors while writing supers", total_errors);
return -EIO;
}
@@ -3932,11 +3970,6 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
return !ret;
}
-int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
-{
- return set_extent_buffer_uptodate(buf);
-}
-
void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
{
struct btrfs_root *root;
@@ -3992,7 +4025,6 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
balance_dirty_pages_ratelimited(
root->fs_info->btree_inode->i_mapping);
}
- return;
}
void btrfs_btree_balance_dirty(struct btrfs_root *root)
@@ -4015,8 +4047,17 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
int read_only)
{
struct btrfs_super_block *sb = fs_info->super_copy;
+ u64 nodesize = btrfs_super_nodesize(sb);
+ u64 sectorsize = btrfs_super_sectorsize(sb);
int ret = 0;
+ if (btrfs_super_magic(sb) != BTRFS_MAGIC) {
+ printk(KERN_ERR "BTRFS: no valid FS found\n");
+ ret = -EINVAL;
+ }
+ if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP)
+ printk(KERN_WARNING "BTRFS: unrecognized super flag: %llu\n",
+ btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
printk(KERN_ERR "BTRFS: tree_root level too big: %d >= %d\n",
btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
@@ -4034,31 +4075,46 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
}
/*
- * The common minimum, we don't know if we can trust the nodesize/sectorsize
- * items yet, they'll be verified later. Issue just a warning.
+ * Check sectorsize and nodesize first, other check will need it.
+ * Check all possible sectorsize(4K, 8K, 16K, 32K, 64K) here.
*/
- if (!IS_ALIGNED(btrfs_super_root(sb), 4096))
+ if (!is_power_of_2(sectorsize) || sectorsize < 4096 ||
+ sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) {
+ printk(KERN_ERR "BTRFS: invalid sectorsize %llu\n", sectorsize);
+ ret = -EINVAL;
+ }
+ /* Only PAGE SIZE is supported yet */
+ if (sectorsize != PAGE_SIZE) {
+ printk(KERN_ERR "BTRFS: sectorsize %llu not supported yet, only support %lu\n",
+ sectorsize, PAGE_SIZE);
+ ret = -EINVAL;
+ }
+ if (!is_power_of_2(nodesize) || nodesize < sectorsize ||
+ nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) {
+ printk(KERN_ERR "BTRFS: invalid nodesize %llu\n", nodesize);
+ ret = -EINVAL;
+ }
+ if (nodesize != le32_to_cpu(sb->__unused_leafsize)) {
+ printk(KERN_ERR "BTRFS: invalid leafsize %u, should be %llu\n",
+ le32_to_cpu(sb->__unused_leafsize),
+ nodesize);
+ ret = -EINVAL;
+ }
+
+ /* Root alignment check */
+ if (!IS_ALIGNED(btrfs_super_root(sb), sectorsize)) {
printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n",
btrfs_super_root(sb));
- if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096))
+ ret = -EINVAL;
+ }
+ if (!IS_ALIGNED(btrfs_super_chunk_root(sb), sectorsize)) {
printk(KERN_WARNING "BTRFS: chunk_root block unaligned: %llu\n",
btrfs_super_chunk_root(sb));
- if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096))
- printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n",
- btrfs_super_log_root(sb));
-
- /*
- * Check the lower bound, the alignment and other constraints are
- * checked later.
- */
- if (btrfs_super_nodesize(sb) < 4096) {
- printk(KERN_ERR "BTRFS: nodesize too small: %u < 4096\n",
- btrfs_super_nodesize(sb));
ret = -EINVAL;
}
- if (btrfs_super_sectorsize(sb) < 4096) {
- printk(KERN_ERR "BTRFS: sectorsize too small: %u < 4096\n",
- btrfs_super_sectorsize(sb));
+ if (!IS_ALIGNED(btrfs_super_log_root(sb), sectorsize)) {
+ printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n",
+ btrfs_super_log_root(sb));
ret = -EINVAL;
}
@@ -4072,6 +4128,16 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
* Hint to catch really bogus numbers, bitflips or so, more exact checks are
* done later
*/
+ if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) {
+ btrfs_err(fs_info, "bytes_used is too small %llu",
+ btrfs_super_bytes_used(sb));
+ ret = -EINVAL;
+ }
+ if (!is_power_of_2(btrfs_super_stripesize(sb))) {
+ btrfs_err(fs_info, "invalid stripesize %u",
+ btrfs_super_stripesize(sb));
+ ret = -EINVAL;
+ }
if (btrfs_super_num_devices(sb) > (1UL << 31))
printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n",
btrfs_super_num_devices(sb));
@@ -4309,7 +4375,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
if (ret)
break;
- clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
+ clear_extent_bits(dirty_pages, start, end, mark);
while (start <= end) {
eb = btrfs_find_tree_block(root->fs_info, start);
start += root->nodesize;
@@ -4344,7 +4410,7 @@ again:
if (ret)
break;
- clear_extent_dirty(unpin, start, end, GFP_NOFS);
+ clear_extent_dirty(unpin, start, end);
btrfs_error_unpin_extent_range(root, start, end);
cond_resched();
}