diff options
| author | Darrick J. Wong <djwong@kernel.org> | 2021-01-05 17:45:09 -0800 |
|---|---|---|
| committer | Darrick J. Wong <djwong@kernel.org> | 2021-01-15 17:40:44 -0800 |
| commit | 4d8f99e0a4197987823f6df1681efd02e87c1b2a (patch) | |
| tree | 0be60bd1f66b98283f0861aa9192c09b770f1b5e | |
| parent | 182ed4eb067b7b081cb731498f19f51cd1476efb (diff) | |
| download | xfs-linux-4d8f99e0a4197987823f6df1681efd02e87c1b2a.tar.gz | |
xfs: add a ->xchg_file_range handler
Notice: this object is not reachable from any branch.
Add a function to handle file range exchange requests from the vfs.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Notice: this object is not reachable from any branch.
| -rw-r--r-- | fs/xfs/xfs_file.c | 49 | ||||
| -rw-r--r-- | fs/xfs/xfs_inode.c | 13 | ||||
| -rw-r--r-- | fs/xfs/xfs_inode.h | 1 | ||||
| -rw-r--r-- | fs/xfs/xfs_trace.h | 4 | ||||
| -rw-r--r-- | fs/xfs/xfs_xchgrange.c | 331 | ||||
| -rw-r--r-- | fs/xfs/xfs_xchgrange.h | 11 |
6 files changed, 409 insertions, 0 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 6fed82a60a75a..9a027755b7934 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -24,6 +24,7 @@ #include "xfs_pnfs.h" #include "xfs_iomap.h" #include "xfs_reflink.h" +#include "xfs_xchgrange.h" #include <linux/falloc.h> #include <linux/backing-dev.h> @@ -1222,6 +1223,53 @@ use_generic: } STATIC int +xfs_file_xchg_range( + struct file *file1, + struct file *file2, + struct file_xchg_range *fxr) +{ + struct inode *inode1 = file_inode(file1); + struct inode *inode2 = file_inode(file2); + struct xfs_inode *ip1 = XFS_I(inode1); + struct xfs_inode *ip2 = XFS_I(inode2); + struct xfs_mount *mp = ip1->i_mount; + unsigned int priv_flags = 0; + int ret; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + /* Update cmtime if the fd/inode don't forbid it. */ + if (likely(!(file1->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode1))) + priv_flags |= XFS_XCHG_RANGE_UPD_CMTIME1; + if (likely(!(file2->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode2))) + priv_flags |= XFS_XCHG_RANGE_UPD_CMTIME2; + + /* Lock both files against IO */ + ret = xfs_ilock2_io_mmap(ip1, ip2); + if (ret) + return ret; + + /* Prepare and then exchange file contents. */ + ret = xfs_xchg_range_prep(file1, file2, fxr); + if (ret) + goto out_unlock; + + trace_xfs_file_xchg_range(ip1, fxr->file1_offset, fxr->length, ip2, + fxr->file2_offset); + + ret = xfs_xchg_range(ip1, ip2, fxr, priv_flags); + if (ret) + goto out_unlock; + +out_unlock: + xfs_iunlock2_io_mmap(ip1, ip2); + if (ret) + trace_xfs_file_xchg_range_error(ip2, ret, _RET_IP_); + return ret; +} + +STATIC int xfs_file_open( struct inode *inode, struct file *file) @@ -1485,6 +1533,7 @@ const struct file_operations xfs_file_operations = { .fadvise = xfs_file_fadvise, .copy_file_range = xfs_file_copy_range, .remap_file_range = xfs_file_remap_range, + .xchg_file_range = xfs_file_xchg_range, }; const struct file_operations xfs_dir_file_operations = { diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 284219f56ca2c..2a08131ac67d8 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3813,3 +3813,16 @@ xfs_iunlock2_io_mmap( if (!same_inode) inode_unlock(VFS_I(ip1)); } + +/* Returns the size of fundamental allocation unit for a file, in bytes. */ +unsigned int +xfs_inode_alloc_unitsize( + struct xfs_inode *ip) +{ + unsigned int blocks = 1; + + if (XFS_IS_REALTIME_INODE(ip)) + blocks = ip->i_mount->m_sb.sb_rextsize; + + return XFS_FSB_TO_B(ip->i_mount, blocks); +} diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 95fa03e998762..1da7a16e33feb 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -489,5 +489,6 @@ void xfs_end_io(struct work_struct *work); int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); void xfs_iunlock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); +unsigned int xfs_inode_alloc_unitsize(struct xfs_inode *ip); #endif /* __XFS_INODE_H__ */ diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index f6f1ba90ebb37..84fb33376201c 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -3323,6 +3323,10 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_cancel_cow); DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap); DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap_piece); DEFINE_INODE_ERROR_EVENT(xfs_swap_extent_rmap_error); + +/* swapext tracepoints */ +DEFINE_DOUBLE_IO_EVENT(xfs_file_xchg_range); +DEFINE_INODE_ERROR_EVENT(xfs_file_xchg_range_error); DEFINE_INODE_IREC_EVENT(xfs_swapext_extent1); DEFINE_INODE_IREC_EVENT(xfs_swapext_extent2); DEFINE_ITRUNC_EVENT(xfs_swapext_update_inode_size); diff --git a/fs/xfs/xfs_xchgrange.c b/fs/xfs/xfs_xchgrange.c index 5e7098d5838ed..e53244cc3c071 100644 --- a/fs/xfs/xfs_xchgrange.c +++ b/fs/xfs/xfs_xchgrange.c @@ -13,8 +13,13 @@ #include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_trans.h" +#include "xfs_quota.h" +#include "xfs_bmap_util.h" +#include "xfs_reflink.h" +#include "xfs_trace.h" #include "xfs_swapext.h" #include "xfs_xchgrange.h" +#include "xfs_sb.h" /* Lock (and optionally join) two inodes for a file range exchange. */ void @@ -64,3 +69,329 @@ xfs_xchg_range_estimate( xfs_xchg_range_iunlock(req->ip1, req->ip2); return error; } + +/* Prepare two files to have their data exchanged. */ +int +xfs_xchg_range_prep( + struct file *file1, + struct file *file2, + struct file_xchg_range *fxr) +{ + struct xfs_inode *ip1 = XFS_I(file_inode(file1)); + struct xfs_inode *ip2 = XFS_I(file_inode(file2)); + int ret; + + /* Verify both files are either real-time or non-realtime */ + if (XFS_IS_REALTIME_INODE(ip1) != XFS_IS_REALTIME_INODE(ip2)) + return -EINVAL; + + /* + * The alignment checks in the VFS helpers cannot deal with allocation + * units that are not powers of 2. This can happen with the realtime + * volume if the extent size is set. Note that alignment checks are + * skipped if FULL_FILES is set. + */ + if (!(fxr->flags & FILE_XCHG_RANGE_FULL_FILES) && + !is_power_of_2(xfs_inode_alloc_unitsize(ip2))) + return -EOPNOTSUPP; + + ret = generic_xchg_file_range_prep(file1, file2, fxr, + xfs_inode_alloc_unitsize(ip2)); + if (ret) + return ret; + + /* Attach dquots to both inodes before changing block maps. */ + ret = xfs_qm_dqattach(ip2); + if (ret) + return ret; + ret = xfs_qm_dqattach(ip1); + if (ret) + return ret; + + /* Flush the relevant ranges of both files. */ + ret = xfs_flush_unmap_range(ip2, fxr->file2_offset, fxr->length); + if (ret) + return ret; + return xfs_flush_unmap_range(ip1, fxr->file1_offset, fxr->length); +} + +/* Make a particular type of quota reservation. */ +STATIC int +xfs_xchg_range_reserve_quota_blocks( + struct xfs_trans **tpp, + const struct xfs_swapext_req *req, + xfs_filblks_t ip1_mapped, + xfs_filblks_t ip2_mapped, + unsigned int qmopts, + bool *quota_retry) +{ + int error; + + /* + * For each file, compute the net gain in the number of blocks that + * will be mapped into that file and reserve that much quota. The + * quota counts must be able to absorb at least that much space. + */ + if (ip2_mapped > ip1_mapped) { + error = xfs_trans_reserve_quota_nblks(tpp, req->ip1, + ip2_mapped - ip1_mapped, 0, + qmopts, quota_retry); + if (!(*tpp) && req->ip1 != req->ip2) + xfs_iunlock(req->ip2, XFS_ILOCK_EXCL); + if (error || *quota_retry) + return error; + } + + if (ip1_mapped > ip2_mapped) { + error = xfs_trans_reserve_quota_nblks(tpp, req->ip2, + ip1_mapped - ip2_mapped, 0, + qmopts, quota_retry); + if (!(*tpp) && req->ip1 != req->ip2) + xfs_iunlock(req->ip1, XFS_ILOCK_EXCL); + if (error || *quota_retry) + return error; + } + + /* + * For each file, forcibly reserve the gross gain in mapped blocks so + * that we don't trip over any quota block reservation assertions. + * We must reserve the gross gain because the quota code subtracts from + * bcount the number of blocks that we unmap; it does not add that + * quantity back to the quota block reservation. + */ + error = xfs_trans_reserve_quota_nblks(tpp, req->ip1, ip1_mapped, 0, + XFS_QMOPT_FORCE_RES | qmopts, NULL); + if (error) + return error; + + return xfs_trans_reserve_quota_nblks(tpp, req->ip2, ip2_mapped, 0, + XFS_QMOPT_FORCE_RES | qmopts, NULL); +} + +/* + * Obtain a quota reservation to make sure we don't hit EDQUOT. We can skip + * this if quota enforcement is disabled or if both inodes' dquots are the + * same. + */ +STATIC int +xfs_xchg_range_reserve_quota( + struct xfs_trans **tpp, + const struct xfs_swapext_req *req, + const struct xfs_swapext_res *res, + bool *quota_retry) +{ + int error; + + /* + * Don't bother with a quota reservation if we're not enforcing them + * or the two inodes have the same dquots. + */ + if (!XFS_IS_QUOTA_ON((*tpp)->t_mountp) || + (req->ip1->i_udquot == req->ip2->i_udquot && + req->ip1->i_gdquot == req->ip2->i_gdquot && + req->ip1->i_pdquot == req->ip2->i_pdquot)) + return 0; + + error = xfs_xchg_range_reserve_quota_blocks(tpp, req, res->ip1_bcount, + res->ip2_bcount, XFS_QMOPT_RES_REGBLKS, quota_retry); + if (error) + return error; + return xfs_xchg_range_reserve_quota_blocks(tpp, req, res->ip1_rtbcount, + res->ip2_rtbcount, XFS_QMOPT_RES_RTBLKS, quota_retry); +} + +/* Enable the atomic file extent swap feature in the primary superblock. */ +STATIC int +xfs_add_atomic_swap( + struct xfs_mount *mp) +{ + if (xfs_sb_version_hasatomicswap(&mp->m_sb)) + return 0; + + /* + * Atomic extent swapping is only supported on filesystems new enough + * to have reflink or rmap support enabled, and only if the filesystem + * isn't configured with realtime support. + */ + if (!xfs_sb_version_canatomicswap(&mp->m_sb) || + xfs_sb_version_hasrealtime(&mp->m_sb)) + return -EOPNOTSUPP; + + xfs_warn(mp, + "EXPERIMENTAL atomic file range swap feature added. Use at your own risk!"); + return xfs_add_incompat_log_feature(mp, + XFS_SB_FEAT_INCOMPAT_LOG_ATOMIC_SWAP); +} + +/* Exchange the contents of two files. */ +int +xfs_xchg_range( + struct xfs_inode *ip1, + struct xfs_inode *ip2, + const struct file_xchg_range *fxr, + unsigned int private_flags) +{ + struct xfs_swapext_req req = { + .ip1 = ip1, + .ip2 = ip2, + .whichfork = XFS_DATA_FORK, + }; + struct xfs_swapext_res res; + struct xfs_mount *mp = ip1->i_mount; + struct xfs_trans *tp; + loff_t req_len; + unsigned int attempts = 4; /* XXX ugly hack */ + bool quota_retry = false; + int error; + + /* We don't support whole-fork swapping yet. */ + if (!xfs_sb_version_canatomicswap(&mp->m_sb)) + return -EOPNOTSUPP; + + if (fxr->flags & FILE_XCHG_RANGE_TO_EOF) + req.flags |= XFS_SWAPEXT_SET_SIZES; + if (fxr->flags & FILE_XCHG_RANGE_SKIP_FILE1_HOLES) + req.flags |= XFS_SWAPEXT_SKIP_FILE1_HOLES; + + req.startoff1 = XFS_B_TO_FSBT(mp, fxr->file1_offset); + req.startoff2 = XFS_B_TO_FSBT(mp, fxr->file2_offset); + + /* + * Round the request length up to the nearest fundamental unit of + * allocation. The prep function already checked that the request + * offsets and length in @fxr are safe to round up. + */ + req_len = round_up(fxr->length, xfs_inode_alloc_unitsize(ip2)); + req.blockcount = XFS_B_TO_FSB(mp, req_len); + + /* + * Cancel CoW fork preallocations for the ranges of both files. The + * prep function should have flushed all the dirty data, so the only + * extents remaining should be speculative. + */ + if (xfs_inode_has_cow_data(ip1)) { + error = xfs_reflink_cancel_cow_range(ip1, fxr->file1_offset, + fxr->length, true); + if (error) + return error; + } + + if (xfs_inode_has_cow_data(ip2)) { + error = xfs_reflink_cancel_cow_range(ip2, fxr->file2_offset, + fxr->length, true); + if (error) + return error; + } + + error = xfs_xchg_range_estimate(&req, &res); + if (error) + return error; + + /* + * If the caller wanted atomic swap, make sure the feature bit is + * turned on and ready to go. + */ + if (!(fxr->flags & FILE_XCHG_RANGE_NONATOMIC)) { + error = xfs_add_atomic_swap(mp); + if (error) + return error; + } + +retry: + /* Allocate the transaction, lock the inodes, and join them. */ + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, res.resblks, 0, + XFS_TRANS_RES_FDBLKS, &tp); + if (error) + return error; + + xfs_xchg_range_ilock(tp, ip1, ip2); + + trace_xfs_swap_extent_before(ip2, 0); + trace_xfs_swap_extent_before(ip1, 1); + + /* + * Do all of the inputs checking that we can only do once we've taken + * both ILOCKs. + */ + error = generic_xchg_file_range_check_fresh(VFS_I(ip1), VFS_I(ip2), + fxr); + if (error) + goto out_trans_cancel; + + error = xfs_swapext_check_extents(mp, &req); + if (error) + goto out_trans_cancel; + + /* + * Reserve ourselves some quota if any of them are in enforcing mode. + * In theory we only need enough to satisfy the change in the number + * of blocks between the two ranges being remapped. + */ + error = xfs_xchg_range_reserve_quota(&tp, &req, &res, "a_retry); + if (error) + return error; + if (quota_retry) { + if (--attempts == 0) { + xfs_emerg(mp, "OY! QUOTA RES FAILED 4x IN XCHGRANGE!"); + return -EL3HLT; + } + goto retry; + } + + /* If we got this far on a dry run, all parameters are ok. */ + if (fxr->flags & FILE_XCHG_RANGE_DRY_RUN) + goto out_trans_cancel; + + /* Update the mtime and ctime of both files. */ + if (private_flags & XFS_XCHG_RANGE_UPD_CMTIME1) + xfs_trans_ichgtime(tp, ip1, + XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + if (private_flags & XFS_XCHG_RANGE_UPD_CMTIME2) + xfs_trans_ichgtime(tp, ip2, + XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + + /* Exchange the file contents by swapping the block mappings. */ + error = xfs_swapext(&tp, &req); + if (error) + goto out_trans_cancel; + + /* + * If the caller wanted us to exchange the contents of two complete + * files of unequal length, exchange the incore sizes now. This should + * be safe because we flushed both files' page caches and moved all the + * post-eof extents, so there should not be anything to zero. + */ + if (fxr->flags & FILE_XCHG_RANGE_TO_EOF) { + loff_t temp; + + temp = i_size_read(VFS_I(ip2)); + i_size_write(VFS_I(ip2), i_size_read(VFS_I(ip1))); + i_size_write(VFS_I(ip1), temp); + } + + /* Relog the inodes to keep transactions moving forward. */ + xfs_trans_log_inode(tp, ip1, XFS_ILOG_CORE); + xfs_trans_log_inode(tp, ip2, XFS_ILOG_CORE); + + /* + * Force the log to persist metadata updates if the caller or the + * administrator requires this. The VFS prep function already flushed + * the relevant parts of the page cache. + */ + if ((mp->m_flags & XFS_MOUNT_WSYNC) || + (fxr->flags & FILE_XCHG_RANGE_FSYNC)) + xfs_trans_set_sync(tp); + + error = xfs_trans_commit(tp); + + trace_xfs_swap_extent_after(ip2, 0); + trace_xfs_swap_extent_after(ip1, 1); + +out_unlock: + xfs_xchg_range_iunlock(ip1, ip2); + return error; + +out_trans_cancel: + xfs_trans_cancel(tp); + goto out_unlock; +} diff --git a/fs/xfs/xfs_xchgrange.h b/fs/xfs/xfs_xchgrange.h index ddda2bfb6f4b9..cca2970346890 100644 --- a/fs/xfs/xfs_xchgrange.h +++ b/fs/xfs/xfs_xchgrange.h @@ -15,5 +15,16 @@ void xfs_xchg_range_iunlock(struct xfs_inode *ip1, struct xfs_inode *ip2); int xfs_xchg_range_estimate(const struct xfs_swapext_req *req, struct xfs_swapext_res *res); +int xfs_xchg_range_prep(struct file *file1, struct file *file2, + struct file_xchg_range *fxr); + +/* Update ip1's change and mod time. */ +#define XFS_XCHG_RANGE_UPD_CMTIME1 (1 << 0) + +/* Update ip2's change and mod time. */ +#define XFS_XCHG_RANGE_UPD_CMTIME2 (1 << 1) + +int xfs_xchg_range(struct xfs_inode *ip1, struct xfs_inode *ip2, + const struct file_xchg_range *fxr, unsigned int private_flags); #endif /* __XFS_XCHGRANGE_H__ */ |
