summaryrefslogtreecommitdiff
path: root/fs/ocfs2
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-12-17 18:44:00 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-17 18:44:00 -0800
commit0110c350c86d511be2130cb2a30dcbb76c4af750 (patch)
treed343a9e0fcb586a7110b13d411b314d33d404c08 /fs/ocfs2
parentd9cb5bfcc3339f1a63df8fe0af8cece33c83c3af (diff)
parent9763f7a4a5f7b1a7c480fa06d01b2bad25163c0a (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull more vfs updates from Al Viro: "In this pile: - autofs-namespace series - dedupe stuff - more struct path constification" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (40 commits) ocfs2: implement the VFS clone_range, copy_range, and dedupe_range features ocfs2: charge quota for reflinked blocks ocfs2: fix bad pointer cast ocfs2: always unlock when completing dio writes ocfs2: don't eat io errors during _dio_end_io_write ocfs2: budget for extent tree splits when adding refcount flag ocfs2: prohibit refcounted swapfiles ocfs2: add newlines to some error messages ocfs2: convert inode refcount test to a helper simple_write_end(): don't zero in short copy into uptodate exofs: don't mess with simple_write_{begin,end} 9p: saner ->write_end() on failing copy into non-uptodate page fix gfs2_stuffed_write_end() on short copies fix ceph_write_end() nfs_write_end(): fix handling of short copies vfs: refactor clone/dedupe_file_range common functions fs: try to clone files first in vfs_copy_file_range vfs: misc struct path constification namespace.c: constify struct path passed to a bunch of primitives quota: constify struct path in quota_on ...
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/alloc.c7
-rw-r--r--fs/ocfs2/aops.c31
-rw-r--r--fs/ocfs2/file.c42
-rw-r--r--fs/ocfs2/file.h3
-rw-r--r--fs/ocfs2/inode.h6
-rw-r--r--fs/ocfs2/move_extents.c10
-rw-r--r--fs/ocfs2/refcounttree.c464
-rw-r--r--fs/ocfs2/refcounttree.h7
-rw-r--r--fs/ocfs2/xattr.c4
9 files changed, 529 insertions, 45 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index f72712f6c28d..d4ec0d8961a6 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5194,7 +5194,7 @@ int ocfs2_change_extent_flag(handle_t *handle,
rec = &el->l_recs[index];
if (new_flags && (rec->e_flags & new_flags)) {
mlog(ML_ERROR, "Owner %llu tried to set %d flags on an "
- "extent that already had them",
+ "extent that already had them\n",
(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
new_flags);
goto out;
@@ -5202,7 +5202,7 @@ int ocfs2_change_extent_flag(handle_t *handle,
if (clear_flags && !(rec->e_flags & clear_flags)) {
mlog(ML_ERROR, "Owner %llu tried to clear %d flags on an "
- "extent that didn't have them",
+ "extent that didn't have them\n",
(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
clear_flags);
goto out;
@@ -5713,8 +5713,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
struct ocfs2_refcount_tree *ref_tree = NULL;
if ((flags & OCFS2_EXT_REFCOUNTED) && len) {
- BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
- OCFS2_HAS_REFCOUNT_FL));
+ BUG_ON(!ocfs2_is_refcount_inode(inode));
if (!refcount_tree_locked) {
ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 4d9c6f5ec28a..11556b7d93ec 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -464,6 +464,15 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
trace_ocfs2_bmap((unsigned long long)OCFS2_I(inode)->ip_blkno,
(unsigned long long)block);
+ /*
+ * The swap code (ab-)uses ->bmap to get a block mapping and then
+ * bypasseѕ the file system for actual I/O. We really can't allow
+ * that on refcounted inodes, so we have to skip out here. And yes,
+ * 0 is the magic code for a bmap error..
+ */
+ if (ocfs2_is_refcount_inode(inode))
+ return 0;
+
/* We don't need to lock journal system files, since they aren't
* accessed concurrently from multiple nodes.
*/
@@ -2253,10 +2262,10 @@ out:
return ret;
}
-static void ocfs2_dio_end_io_write(struct inode *inode,
- struct ocfs2_dio_write_ctxt *dwc,
- loff_t offset,
- ssize_t bytes)
+static int ocfs2_dio_end_io_write(struct inode *inode,
+ struct ocfs2_dio_write_ctxt *dwc,
+ loff_t offset,
+ ssize_t bytes)
{
struct ocfs2_cached_dealloc_ctxt dealloc;
struct ocfs2_extent_tree et;
@@ -2307,7 +2316,7 @@ static void ocfs2_dio_end_io_write(struct inode *inode,
mlog_errno(ret);
}
- di = (struct ocfs2_dinode *)di_bh;
+ di = (struct ocfs2_dinode *)di_bh->b_data;
ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
@@ -2364,6 +2373,8 @@ out:
if (locked)
inode_unlock(inode);
ocfs2_dio_free_write_ctx(inode, dwc);
+
+ return ret;
}
/*
@@ -2378,21 +2389,19 @@ static int ocfs2_dio_end_io(struct kiocb *iocb,
{
struct inode *inode = file_inode(iocb->ki_filp);
int level;
-
- if (bytes <= 0)
- return 0;
+ int ret = 0;
/* this io's submitter should not have unlocked this before we could */
BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
- if (private)
- ocfs2_dio_end_io_write(inode, private, offset, bytes);
+ if (bytes > 0 && private)
+ ret = ocfs2_dio_end_io_write(inode, private, offset, bytes);
ocfs2_iocb_clear_rw_locked(iocb);
level = ocfs2_iocb_rw_locked_level(iocb);
ocfs2_rw_unlock(inode, level);
- return 0;
+ return ret;
}
static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 000c234d7bbd..c4889655d32b 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1030,7 +1030,7 @@ int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
* Only quota files call this without a bh, and they can't be
* refcounted.
*/
- BUG_ON(!di_bh && (oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+ BUG_ON(!di_bh && ocfs2_is_refcount_inode(inode));
BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE));
clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);
@@ -1667,9 +1667,9 @@ static void ocfs2_calc_trunc_pos(struct inode *inode,
*done = ret;
}
-static int ocfs2_remove_inode_range(struct inode *inode,
- struct buffer_head *di_bh, u64 byte_start,
- u64 byte_len)
+int ocfs2_remove_inode_range(struct inode *inode,
+ struct buffer_head *di_bh, u64 byte_start,
+ u64 byte_len)
{
int ret = 0, flags = 0, done = 0, i;
u32 trunc_start, trunc_len, trunc_end, trunc_cpos, phys_cpos;
@@ -1719,8 +1719,7 @@ static int ocfs2_remove_inode_range(struct inode *inode,
* within one cluster(means is not exactly aligned to clustersize).
*/
- if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
-
+ if (ocfs2_is_refcount_inode(inode)) {
ret = ocfs2_cow_file_pos(inode, di_bh, byte_start);
if (ret) {
mlog_errno(ret);
@@ -2036,7 +2035,7 @@ int ocfs2_check_range_for_refcount(struct inode *inode, loff_t pos,
struct super_block *sb = inode->i_sb;
if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)) ||
- !(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) ||
+ !ocfs2_is_refcount_inode(inode) ||
OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
return 0;
@@ -2440,6 +2439,31 @@ out:
return offset;
}
+static int ocfs2_file_clone_range(struct file *file_in,
+ loff_t pos_in,
+ struct file *file_out,
+ loff_t pos_out,
+ u64 len)
+{
+ return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out,
+ len, false);
+}
+
+static ssize_t ocfs2_file_dedupe_range(struct file *src_file,
+ u64 loff,
+ u64 len,
+ struct file *dst_file,
+ u64 dst_loff)
+{
+ int error;
+
+ error = ocfs2_reflink_remap_range(src_file, loff, dst_file, dst_loff,
+ len, true);
+ if (error)
+ return error;
+ return len;
+}
+
const struct inode_operations ocfs2_file_iops = {
.setattr = ocfs2_setattr,
.getattr = ocfs2_getattr,
@@ -2479,6 +2503,8 @@ const struct file_operations ocfs2_fops = {
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = ocfs2_fallocate,
+ .clone_file_range = ocfs2_file_clone_range,
+ .dedupe_file_range = ocfs2_file_dedupe_range,
};
const struct file_operations ocfs2_dops = {
@@ -2524,6 +2550,8 @@ const struct file_operations ocfs2_fops_no_plocks = {
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = ocfs2_fallocate,
+ .clone_file_range = ocfs2_file_clone_range,
+ .dedupe_file_range = ocfs2_file_dedupe_range,
};
const struct file_operations ocfs2_dops_no_plocks = {
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index e8c62f22215c..897fd9a2e51d 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -82,4 +82,7 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd,
int ocfs2_check_range_for_refcount(struct inode *inode, loff_t pos,
size_t count);
+int ocfs2_remove_inode_range(struct inode *inode,
+ struct buffer_head *di_bh, u64 byte_start,
+ u64 byte_len);
#endif /* OCFS2_FILE_H */
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 5af68fcdf9d3..9b955f732bca 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -181,4 +181,10 @@ static inline struct ocfs2_inode_info *cache_info_to_inode(struct ocfs2_caching_
return container_of(ci, struct ocfs2_inode_info, ip_metadata_cache);
}
+/* Does this inode have the reflink flag set? */
+static inline bool ocfs2_is_refcount_inode(struct inode *inode)
+{
+ return (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
+}
+
#endif /* OCFS2_INODE_H */
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 4e8f32eb0bdb..e52a2852d50d 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -235,10 +235,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) {
-
- BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
- OCFS2_HAS_REFCOUNT_FL));
-
+ BUG_ON(!ocfs2_is_refcount_inode(inode));
BUG_ON(!context->refcount_loc);
ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1,
@@ -581,10 +578,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
if ((ext_flags & OCFS2_EXT_REFCOUNTED) && len) {
-
- BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
- OCFS2_HAS_REFCOUNT_FL));
-
+ BUG_ON(!ocfs2_is_refcount_inode(inode));
BUG_ON(!context->refcount_loc);
ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1,
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 738b4ea8e990..d171d2c53f7f 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -34,6 +34,7 @@
#include "xattr.h"
#include "namei.h"
#include "ocfs2_trace.h"
+#include "file.h"
#include <linux/bio.h>
#include <linux/blkdev.h>
@@ -410,7 +411,7 @@ static int ocfs2_get_refcount_block(struct inode *inode, u64 *ref_blkno)
goto out;
}
- BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+ BUG_ON(!ocfs2_is_refcount_inode(inode));
di = (struct ocfs2_dinode *)di_bh->b_data;
*ref_blkno = le64_to_cpu(di->i_refcount_loc);
@@ -569,7 +570,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
u32 num_got;
u64 suballoc_loc, first_blkno;
- BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
+ BUG_ON(ocfs2_is_refcount_inode(inode));
trace_ocfs2_create_refcount_tree(
(unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -707,7 +708,7 @@ static int ocfs2_set_refcount_tree(struct inode *inode,
struct ocfs2_refcount_block *rb;
struct ocfs2_refcount_tree *ref_tree;
- BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
+ BUG_ON(ocfs2_is_refcount_inode(inode));
ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
&ref_tree, &ref_root_bh);
@@ -774,7 +775,7 @@ int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh)
u64 blk = 0, bg_blkno = 0, ref_blkno = le64_to_cpu(di->i_refcount_loc);
u16 bit = 0;
- if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL))
+ if (!ocfs2_is_refcount_inode(inode))
return 0;
BUG_ON(!ref_blkno);
@@ -2298,11 +2299,10 @@ int ocfs2_decrease_refcount(struct inode *inode,
{
int ret;
u64 ref_blkno;
- struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct buffer_head *ref_root_bh = NULL;
struct ocfs2_refcount_tree *tree;
- BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+ BUG_ON(!ocfs2_is_refcount_inode(inode));
ret = ocfs2_get_refcount_block(inode, &ref_blkno);
if (ret) {
@@ -2532,7 +2532,6 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
int *ref_blocks)
{
int ret;
- struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct buffer_head *ref_root_bh = NULL;
struct ocfs2_refcount_tree *tree;
u64 start_cpos = ocfs2_blocks_to_clusters(inode->i_sb, phys_blkno);
@@ -2543,7 +2542,7 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
goto out;
}
- BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+ BUG_ON(!ocfs2_is_refcount_inode(inode));
ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb),
refcount_loc, &tree);
@@ -3411,14 +3410,13 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
{
int ret;
u32 cow_start = 0, cow_len = 0;
- struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct buffer_head *ref_root_bh = NULL;
struct ocfs2_refcount_tree *ref_tree;
struct ocfs2_cow_context *context = NULL;
- BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+ BUG_ON(!ocfs2_is_refcount_inode(inode));
ret = ocfs2_refcount_cal_cow_clusters(inode, &di->id2.i_list,
cpos, write_len, max_cpos,
@@ -3628,11 +3626,10 @@ int ocfs2_refcount_cow_xattr(struct inode *inode,
{
int ret;
struct ocfs2_xattr_value_root *xv = vb->vb_xv;
- struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_cow_context *context = NULL;
u32 cow_start, cow_len;
- BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+ BUG_ON(!ocfs2_is_refcount_inode(inode));
ret = ocfs2_refcount_cal_cow_clusters(inode, &xv->xr_list,
cpos, write_len, UINT_MAX,
@@ -3695,6 +3692,9 @@ int ocfs2_add_refcount_flag(struct inode *inode,
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_alloc_context *meta_ac = NULL;
+ /* We need to be able to handle at least an extent tree split. */
+ ref_blocks = ocfs2_extend_meta_needed(data_et->et_root_el);
+
ret = ocfs2_calc_refcount_meta_credits(inode->i_sb,
ref_ci, ref_root_bh,
p_cluster, num_clusters,
@@ -3806,7 +3806,7 @@ static int ocfs2_attach_refcount_tree(struct inode *inode,
ocfs2_init_dealloc_ctxt(&dealloc);
- if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)) {
+ if (!ocfs2_is_refcount_inode(inode)) {
ret = ocfs2_create_refcount_tree(inode, di_bh);
if (ret) {
mlog_errno(ret);
@@ -3933,6 +3933,13 @@ static int ocfs2_add_refcounted_extent(struct inode *inode,
ret = ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
p_cluster, num_clusters,
meta_ac, dealloc);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_commit;
+ }
+
+ ret = dquot_alloc_space_nodirty(inode,
+ ocfs2_clusters_to_bytes(osb->sb, num_clusters));
if (ret)
mlog_errno(ret);
@@ -4441,3 +4448,434 @@ out:
return error;
}
+
+/* Update destination inode size, if necessary. */
+static int ocfs2_reflink_update_dest(struct inode *dest,
+ struct buffer_head *d_bh,
+ loff_t newlen)
+{
+ handle_t *handle;
+ int ret;
+
+ dest->i_blocks = ocfs2_inode_sector_count(dest);
+
+ if (newlen <= i_size_read(dest))
+ return 0;
+
+ handle = ocfs2_start_trans(OCFS2_SB(dest->i_sb),
+ OCFS2_INODE_UPDATE_CREDITS);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ mlog_errno(ret);
+ return ret;
+ }
+
+ /* Extend i_size if needed. */
+ spin_lock(&OCFS2_I(dest)->ip_lock);
+ if (newlen > i_size_read(dest))
+ i_size_write(dest, newlen);
+ spin_unlock(&OCFS2_I(dest)->ip_lock);
+ dest->i_ctime = dest->i_mtime = current_time(dest);
+
+ ret = ocfs2_mark_inode_dirty(handle, dest, d_bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_commit;
+ }
+
+out_commit:
+ ocfs2_commit_trans(OCFS2_SB(dest->i_sb), handle);
+ return ret;
+}
+
+/* Remap the range pos_in:len in s_inode to pos_out:len in t_inode. */
+static int ocfs2_reflink_remap_extent(struct inode *s_inode,
+ struct buffer_head *s_bh,
+ loff_t pos_in,
+ struct inode *t_inode,
+ struct buffer_head *t_bh,
+ loff_t pos_out,
+ loff_t len,
+ struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+ struct ocfs2_extent_tree s_et;
+ struct ocfs2_extent_tree t_et;
+ struct ocfs2_dinode *dis;
+ struct buffer_head *ref_root_bh = NULL;
+ struct ocfs2_refcount_tree *ref_tree;
+ struct ocfs2_super *osb;
+ loff_t pstart, plen;
+ u32 p_cluster, num_clusters, slast, spos, tpos;
+ unsigned int ext_flags;
+ int ret = 0;
+
+ osb = OCFS2_SB(s_inode->i_sb);
+ dis = (struct ocfs2_dinode *)s_bh->b_data;
+ ocfs2_init_dinode_extent_tree(&s_et, INODE_CACHE(s_inode), s_bh);
+ ocfs2_init_dinode_extent_tree(&t_et, INODE_CACHE(t_inode), t_bh);
+
+ spos = ocfs2_bytes_to_clusters(s_inode->i_sb, pos_in);
+ tpos = ocfs2_bytes_to_clusters(t_inode->i_sb, pos_out);
+ slast = ocfs2_clusters_for_bytes(s_inode->i_sb, pos_in + len);
+
+ while (spos < slast) {
+ if (fatal_signal_pending(current)) {
+ ret = -EINTR;
+ goto out;
+ }
+
+ /* Look up the extent. */
+ ret = ocfs2_get_clusters(s_inode, spos, &p_cluster,
+ &num_clusters, &ext_flags);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ num_clusters = min_t(u32, num_clusters, slast - spos);
+
+ /* Punch out the dest range. */
+ pstart = ocfs2_clusters_to_bytes(t_inode->i_sb, tpos);
+ plen = ocfs2_clusters_to_bytes(t_inode->i_sb, num_clusters);
+ ret = ocfs2_remove_inode_range(t_inode, t_bh, pstart, plen);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ if (p_cluster == 0)
+ goto next_loop;
+
+ /* Lock the refcount btree... */
+ ret = ocfs2_lock_refcount_tree(osb,
+ le64_to_cpu(dis->i_refcount_loc),
+ 1, &ref_tree, &ref_root_bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ /* Mark s_inode's extent as refcounted. */
+ if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) {
+ ret = ocfs2_add_refcount_flag(s_inode, &s_et,
+ &ref_tree->rf_ci,
+ ref_root_bh, spos,
+ p_cluster, num_clusters,
+ dealloc, NULL);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_unlock_refcount;
+ }
+ }
+
+ /* Map in the new extent. */
+ ext_flags |= OCFS2_EXT_REFCOUNTED;
+ ret = ocfs2_add_refcounted_extent(t_inode, &t_et,
+ &ref_tree->rf_ci,
+ ref_root_bh,
+ tpos, p_cluster,
+ num_clusters,
+ ext_flags,
+ dealloc);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_unlock_refcount;
+ }
+
+ ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
+ brelse(ref_root_bh);
+next_loop:
+ spos += num_clusters;
+ tpos += num_clusters;
+ }
+
+out:
+ return ret;
+out_unlock_refcount:
+ ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
+ brelse(ref_root_bh);
+ return ret;
+}
+
+/* Set up refcount tree and remap s_inode to t_inode. */
+static int ocfs2_reflink_remap_blocks(struct inode *s_inode,
+ struct buffer_head *s_bh,
+ loff_t pos_in,
+ struct inode *t_inode,
+ struct buffer_head *t_bh,
+ loff_t pos_out,
+ loff_t len)
+{
+ struct ocfs2_cached_dealloc_ctxt dealloc;
+ struct ocfs2_super *osb;
+ struct ocfs2_dinode *dis;
+ struct ocfs2_dinode *dit;
+ int ret;
+
+ osb = OCFS2_SB(s_inode->i_sb);
+ dis = (struct ocfs2_dinode *)s_bh->b_data;
+ dit = (struct ocfs2_dinode *)t_bh->b_data;
+ ocfs2_init_dealloc_ctxt(&dealloc);
+
+ /*
+ * If we're reflinking the entire file and the source is inline
+ * data, just copy the contents.
+ */
+ if (pos_in == pos_out && pos_in == 0 && len == i_size_read(s_inode) &&
+ i_size_read(t_inode) <= len &&
+ (OCFS2_I(s_inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)) {
+ ret = ocfs2_duplicate_inline_data(s_inode, s_bh, t_inode, t_bh);
+ if (ret)
+ mlog_errno(ret);
+ goto out;
+ }
+
+ /*
+ * If both inodes belong to two different refcount groups then
+ * forget it because we don't know how (or want) to go merging
+ * refcount trees.
+ */
+ ret = -EOPNOTSUPP;
+ if (ocfs2_is_refcount_inode(s_inode) &&
+ ocfs2_is_refcount_inode(t_inode) &&
+ le64_to_cpu(dis->i_refcount_loc) !=
+ le64_to_cpu(dit->i_refcount_loc))
+ goto out;
+
+ /* Neither inode has a refcount tree. Add one to s_inode. */
+ if (!ocfs2_is_refcount_inode(s_inode) &&
+ !ocfs2_is_refcount_inode(t_inode)) {
+ ret = ocfs2_create_refcount_tree(s_inode, s_bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+
+ /* Ensure that both inodes end up with the same refcount tree. */
+ if (!ocfs2_is_refcount_inode(s_inode)) {
+ ret = ocfs2_set_refcount_tree(s_inode, s_bh,
+ le64_to_cpu(dit->i_refcount_loc));
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+ if (!ocfs2_is_refcount_inode(t_inode)) {
+ ret = ocfs2_set_refcount_tree(t_inode, t_bh,
+ le64_to_cpu(dis->i_refcount_loc));
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+
+ /* Turn off inline data in the dest file. */
+ if (OCFS2_I(t_inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
+ ret = ocfs2_convert_inline_data_to_extents(t_inode, t_bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+
+ /* Actually remap extents now. */
+ ret = ocfs2_reflink_remap_extent(s_inode, s_bh, pos_in, t_inode, t_bh,
+ pos_out, len, &dealloc);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+out:
+ if (ocfs2_dealloc_has_cluster(&dealloc)) {
+ ocfs2_schedule_truncate_log_flush(osb, 1);
+ ocfs2_run_deallocs(osb, &dealloc);
+ }
+
+ return ret;
+}
+
+/* Lock an inode and grab a bh pointing to the inode. */
+static int ocfs2_reflink_inodes_lock(struct inode *s_inode,
+ struct buffer_head **bh1,
+ struct inode *t_inode,
+ struct buffer_head **bh2)
+{
+ struct inode *inode1;
+ struct inode *inode2;
+ struct ocfs2_inode_info *oi1;
+ struct ocfs2_inode_info *oi2;
+ bool same_inode = (s_inode == t_inode);
+ int status;
+
+ /* First grab the VFS and rw locks. */
+ lock_two_nondirectories(s_inode, t_inode);
+ inode1 = s_inode;
+ inode2 = t_inode;
+ if (inode1->i_ino > inode2->i_ino)
+ swap(inode1, inode2);
+
+ status = ocfs2_rw_lock(inode1, 1);
+ if (status) {
+ mlog_errno(status);
+ goto out_i1;
+ }
+ if (!same_inode) {
+ status = ocfs2_rw_lock(inode2, 1);
+ if (status) {
+ mlog_errno(status);
+ goto out_i2;
+ }
+ }
+
+ /* Now go for the cluster locks */
+ oi1 = OCFS2_I(inode1);
+ oi2 = OCFS2_I(inode2);
+
+ trace_ocfs2_double_lock((unsigned long long)oi1->ip_blkno,
+ (unsigned long long)oi2->ip_blkno);
+
+ if (*bh1)
+ *bh1 = NULL;
+ if (*bh2)
+ *bh2 = NULL;
+
+ /* We always want to lock the one with the lower lockid first. */
+ if (oi1->ip_blkno > oi2->ip_blkno)
+ mlog_errno(-ENOLCK);
+
+ /* lock id1 */
+ status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_REFLINK_TARGET);
+ if (status < 0) {
+ if (status != -ENOENT)
+ mlog_errno(status);
+ goto out_rw2;
+ }
+
+ /* lock id2 */
+ if (!same_inode) {
+ status = ocfs2_inode_lock_nested(inode2, bh2, 1,
+ OI_LS_REFLINK_TARGET);
+ if (status < 0) {
+ if (status != -ENOENT)
+ mlog_errno(status);
+ goto out_cl1;
+ }
+ } else
+ *bh2 = *bh1;
+
+ trace_ocfs2_double_lock_end(
+ (unsigned long long)OCFS2_I(inode1)->ip_blkno,
+ (unsigned long long)OCFS2_I(inode2)->ip_blkno);
+
+ return 0;
+
+out_cl1:
+ ocfs2_inode_unlock(inode1, 1);
+ brelse(*bh1);
+ *bh1 = NULL;
+out_rw2:
+ ocfs2_rw_unlock(inode2, 1);
+out_i2:
+ ocfs2_rw_unlock(inode1, 1);
+out_i1:
+ unlock_two_nondirectories(s_inode, t_inode);
+ return status;
+}
+
+/* Unlock both inodes and release buffers. */
+static void ocfs2_reflink_inodes_unlock(struct inode *s_inode,
+ struct buffer_head *s_bh,
+ struct inode *t_inode,
+ struct buffer_head *t_bh)
+{
+ ocfs2_inode_unlock(s_inode, 1);
+ ocfs2_rw_unlock(s_inode, 1);
+ brelse(s_bh);
+ if (s_inode != t_inode) {
+ ocfs2_inode_unlock(t_inode, 1);
+ ocfs2_rw_unlock(t_inode, 1);
+ brelse(t_bh);
+ }
+ unlock_two_nondirectories(s_inode, t_inode);
+}
+
+/* Link a range of blocks from one file to another. */
+int ocfs2_reflink_remap_range(struct file *file_in,
+ loff_t pos_in,
+ struct file *file_out,
+ loff_t pos_out,
+ u64 len,
+ bool is_dedupe)
+{
+ struct inode *inode_in = file_inode(file_in);
+ struct inode *inode_out = file_inode(file_out);
+ struct ocfs2_super *osb = OCFS2_SB(inode_in->i_sb);
+ struct buffer_head *in_bh = NULL, *out_bh = NULL;
+ bool same_inode = (inode_in == inode_out);
+ ssize_t ret;
+
+ if (!ocfs2_refcount_tree(osb))
+ return -EOPNOTSUPP;
+ if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
+ return -EROFS;
+
+ /* Lock both files against IO */
+ ret = ocfs2_reflink_inodes_lock(inode_in, &in_bh, inode_out, &out_bh);
+ if (ret)
+ return ret;
+
+ /* Check file eligibility and prepare for block sharing. */
+ ret = -EINVAL;
+ if ((OCFS2_I(inode_in)->ip_flags & OCFS2_INODE_SYSTEM_FILE) ||
+ (OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE))
+ goto out_unlock;
+
+ ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
+ &len, is_dedupe);
+ if (ret || len == 0)
+ goto out_unlock;
+
+ /* Lock out changes to the allocation maps and remap. */
+ down_write(&OCFS2_I(inode_in)->ip_alloc_sem);
+ if (!same_inode)
+ down_write_nested(&OCFS2_I(inode_out)->ip_alloc_sem,
+ SINGLE_DEPTH_NESTING);
+
+ ret = ocfs2_reflink_remap_blocks(inode_in, in_bh, pos_in, inode_out,
+ out_bh, pos_out, len);
+
+ /* Zap any page cache for the destination file's range. */
+ if (!ret)
+ truncate_inode_pages_range(&inode_out->i_data, pos_out,
+ PAGE_ALIGN(pos_out + len) - 1);
+
+ up_write(&OCFS2_I(inode_in)->ip_alloc_sem);
+ if (!same_inode)
+ up_write(&OCFS2_I(inode_out)->ip_alloc_sem);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_unlock;
+ }
+
+ /*
+ * Empty the extent map so that we may get the right extent
+ * record from the disk.
+ */
+ ocfs2_extent_map_trunc(inode_in, 0);
+ ocfs2_extent_map_trunc(inode_out, 0);
+
+ ret = ocfs2_reflink_update_dest(inode_out, out_bh, pos_out + len);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_unlock;
+ }
+
+ ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh);
+ return 0;
+
+out_unlock:
+ ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh);
+ return ret;
+}
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 6422bbcdb525..4af55bf4b35b 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -115,4 +115,11 @@ int ocfs2_reflink_ioctl(struct inode *inode,
const char __user *oldname,
const char __user *newname,
bool preserve);
+int ocfs2_reflink_remap_range(struct file *file_in,
+ loff_t pos_in,
+ struct file *file_out,
+ loff_t pos_out,
+ u64 len,
+ bool is_dedupe);
+
#endif /* OCFS2_REFCOUNTTREE_H */
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index cb157a34a656..3c5384d9b3a5 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -2577,7 +2577,7 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
return 0;
- if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
+ if (ocfs2_is_refcount_inode(inode)) {
ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb),
le64_to_cpu(di->i_refcount_loc),
1, &ref_tree, &ref_root_bh);
@@ -3608,7 +3608,7 @@ int ocfs2_xattr_set(struct inode *inode,
}
/* Check whether the value is refcounted and do some preparation. */
- if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL &&
+ if (ocfs2_is_refcount_inode(inode) &&
(!xis.not_found || !xbs.not_found)) {
ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
&xis, &xbs, &ref_tree,