summaryrefslogtreecommitdiff
path: root/fs/xfs/libxfs/xfs_inode_fork.c
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2021-03-22 09:52:03 -0700
committerDarrick J. Wong <djwong@kernel.org>2021-03-25 16:47:51 -0700
commite6a688c3323840f3e388ba28fd2db86675b79917 (patch)
treea1413916fb8dc3287c05753d41990229884c0bba /fs/xfs/libxfs/xfs_inode_fork.c
parentb2c2974b8cdf1eb3ef90ff845eb27b19e2187b7e (diff)
xfs: initialise attr fork on inode create
When we allocate a new inode, we often need to add an attribute to the inode as part of the create. This can happen as a result of needing to add default ACLs or security labels before the inode is made visible to userspace. This is highly inefficient right now. We do the create transaction to allocate the inode, then we do an "add attr fork" transaction to modify the just created empty inode to set the inode fork offset to allow attributes to be stored, then we go and do the attribute creation. This means 3 transactions instead of 1 to allocate an inode, and this greatly increases the load on the CIL commit code, resulting in excessive contention on the CIL spin locks and performance degradation: 18.99% [kernel] [k] __pv_queued_spin_lock_slowpath 3.57% [kernel] [k] do_raw_spin_lock 2.51% [kernel] [k] __raw_callee_save___pv_queued_spin_unlock 2.48% [kernel] [k] memcpy 2.34% [kernel] [k] xfs_log_commit_cil The typical profile resulting from running fsmark on a selinux enabled filesytem is adds this overhead to the create path: - 15.30% xfs_init_security - 15.23% security_inode_init_security - 13.05% xfs_initxattrs - 12.94% xfs_attr_set - 6.75% xfs_bmap_add_attrfork - 5.51% xfs_trans_commit - 5.48% __xfs_trans_commit - 5.35% xfs_log_commit_cil - 3.86% _raw_spin_lock - do_raw_spin_lock __pv_queued_spin_lock_slowpath - 0.70% xfs_trans_alloc 0.52% xfs_trans_reserve - 5.41% xfs_attr_set_args - 5.39% xfs_attr_set_shortform.constprop.0 - 4.46% xfs_trans_commit - 4.46% __xfs_trans_commit - 4.33% xfs_log_commit_cil - 2.74% _raw_spin_lock - do_raw_spin_lock __pv_queued_spin_lock_slowpath 0.60% xfs_inode_item_format 0.90% xfs_attr_try_sf_addname - 1.99% selinux_inode_init_security - 1.02% security_sid_to_context_force - 1.00% security_sid_to_context_core - 0.92% sidtab_entry_to_string - 0.90% sidtab_sid2str_get 0.59% sidtab_sid2str_put.part.0 - 0.82% selinux_determine_inode_label - 0.77% security_transition_sid 0.70% security_compute_sid.part.0 And fsmark creation rate performance drops by ~25%. The key point to note here is that half the additional overhead comes from adding the attribute fork to the newly created inode. That's crazy, considering we can do this same thing at inode create time with a couple of lines of code and no extra overhead. So, if we know we are going to add an attribute immediately after creating the inode, let's just initialise the attribute fork inside the create transaction and chop that whole chunk of code out of the create fast path. This completely removes the performance drop caused by enabling SELinux, and the profile looks like: - 8.99% xfs_init_security - 9.00% security_inode_init_security - 6.43% xfs_initxattrs - 6.37% xfs_attr_set - 5.45% xfs_attr_set_args - 5.42% xfs_attr_set_shortform.constprop.0 - 4.51% xfs_trans_commit - 4.54% __xfs_trans_commit - 4.59% xfs_log_commit_cil - 2.67% _raw_spin_lock - 3.28% do_raw_spin_lock 3.08% __pv_queued_spin_lock_slowpath 0.66% xfs_inode_item_format - 0.90% xfs_attr_try_sf_addname - 0.60% xfs_trans_alloc - 2.35% selinux_inode_init_security - 1.25% security_sid_to_context_force - 1.21% security_sid_to_context_core - 1.19% sidtab_entry_to_string - 1.20% sidtab_sid2str_get - 0.86% sidtab_sid2str_put.part.0 - 0.62% _raw_spin_lock_irqsave - 0.77% do_raw_spin_lock __pv_queued_spin_lock_slowpath - 0.84% selinux_determine_inode_label - 0.83% security_transition_sid 0.86% security_compute_sid.part.0 Which indicates the XFS overhead of creating the selinux xattr has been halved. This doesn't fix the CIL lock contention problem, just means it's not a limiting factor for this workload. Lock contention in the security subsystems is going to be an issue soon, though... Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> [djwong: fix compilation error when CONFIG_SECURITY=n] Reviewed-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Gao Xiang <hsiangkao@redhat.com>
Diffstat (limited to 'fs/xfs/libxfs/xfs_inode_fork.c')
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c20
1 files changed, 15 insertions, 5 deletions
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 192bcf3e549d..1851d6f266d0 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -282,6 +282,19 @@ xfs_dfork_attr_shortform_size(
return be16_to_cpu(atp->hdr.totsize);
}
+struct xfs_ifork *
+xfs_ifork_alloc(
+ enum xfs_dinode_fmt format,
+ xfs_extnum_t nextents)
+{
+ struct xfs_ifork *ifp;
+
+ ifp = kmem_cache_zalloc(xfs_ifork_zone, GFP_NOFS | __GFP_NOFAIL);
+ ifp->if_format = format;
+ ifp->if_nextents = nextents;
+ return ifp;
+}
+
int
xfs_iformat_attr_fork(
struct xfs_inode *ip,
@@ -293,11 +306,8 @@ xfs_iformat_attr_fork(
* Initialize the extent count early, as the per-format routines may
* depend on it.
*/
- ip->i_afp = kmem_cache_zalloc(xfs_ifork_zone, GFP_NOFS | __GFP_NOFAIL);
- ip->i_afp->if_format = dip->di_aformat;
- if (unlikely(ip->i_afp->if_format == 0)) /* pre IRIX 6.2 file system */
- ip->i_afp->if_format = XFS_DINODE_FMT_EXTENTS;
- ip->i_afp->if_nextents = be16_to_cpu(dip->di_anextents);
+ ip->i_afp = xfs_ifork_alloc(dip->di_aformat,
+ be16_to_cpu(dip->di_anextents));
switch (ip->i_afp->if_format) {
case XFS_DINODE_FMT_LOCAL: