xfs: force buffer writeback before blocking on the ilock in inode reclaim

If we are doing synchronous inode reclaim we block the VM from making progress in memory reclaim. So if we encouter a flush locked inode promote it in the delwri list and wake up xfsbufd to write it out now. Without this we can get hangs of up to 30 seconds during workloads hitting synchronous inode reclaim. The scheme is copied from what we do for dquot reclaims. Reported-by: Simon Kirby <sim@hostway.ca> Signed-off-by: Christoph Hellwig <hch@lst.de> Tested-by: Simon Kirby <sim@hostway.ca> Signed-off-by: Ben Myers <bpm@sgi.com>
author: Christoph Hellwig <hch@lst.de> 2011-11-29 12:06:14 -0600
committer: Ben Myers <bpm@sgi.com> 2011-11-29 12:06:14 -0600
commit: 4dd2cb4a28b7ab1f37163a4eba280926a13a8749 (patch)
tree: f48cbb282ad4d52649b594657f00c1c8be753a47
parent: fa8b18edd752a8b4e9d1ee2cd615b82c93cf8bba (diff)
3 files changed, 33 insertions, 0 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index c0237c602f11..755ee8164880 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2835,6 +2835,27 @@ corrupt_out:
 	return XFS_ERROR(EFSCORRUPTED);
 }
 
+void
+xfs_promote_inode(
+	struct xfs_inode	*ip)
+{
+	struct xfs_buf		*bp;
+
+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
+
+	bp = xfs_incore(ip->i_mount->m_ddev_targp, ip->i_imap.im_blkno,
+			ip->i_imap.im_len, XBF_TRYLOCK);
+	if (!bp)
+		return;
+
+	if (XFS_BUF_ISDELAYWRITE(bp)) {
+		xfs_buf_delwri_promote(bp);
+		wake_up_process(ip->i_mount->m_ddev_targp->bt_task);
+	}
+
+	xfs_buf_relse(bp);
+}
+
 /*
  * Return a pointer to the extent record at file index idx.
  */
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 760140d1dd66..b4cd4739f98e 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -498,6 +498,7 @@ int		xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
 void		xfs_iext_realloc(xfs_inode_t *, int, int);
 void		xfs_iunpin_wait(xfs_inode_t *);
 int		xfs_iflush(xfs_inode_t *, uint);
+void		xfs_promote_inode(struct xfs_inode *);
 void		xfs_lock_inodes(xfs_inode_t **, int, uint);
 void		xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
 
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index aa3dc1a4d53d..be5c51d8f757 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -770,6 +770,17 @@ restart:
 	if (!xfs_iflock_nowait(ip)) {
 		if (!(sync_mode & SYNC_WAIT))
 			goto out;
+
+		/*
+		 * If we only have a single dirty inode in a cluster there is
+		 * a fair chance that the AIL push may have pushed it into
+		 * the buffer, but xfsbufd won't touch it until 30 seconds
+		 * from now, and thus we will lock up here.
+		 *
+		 * Promote the inode buffer to the front of the delwri list
+		 * and wake up xfsbufd now.
+		 */
+		xfs_promote_inode(ip);
 		xfs_iflock(ip);
 	}
author	Christoph Hellwig <hch@lst.de>	2011-11-29 12:06:14 -0600
committer	Ben Myers <bpm@sgi.com>	2011-11-29 12:06:14 -0600
commit	4dd2cb4a28b7ab1f37163a4eba280926a13a8749 (patch)
tree	f48cbb282ad4d52649b594657f00c1c8be753a47
parent	fa8b18edd752a8b4e9d1ee2cd615b82c93cf8bba (diff)