summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDmitry Monakhov <dmonakhov@openvz.org>2014-12-02 18:08:53 -0500
committerTheodore Ts'o <tytso@mit.edu>2014-12-02 18:08:53 -0500
commit14516bb7bb6ffbd49f35389f9ece3b2045ba5815 (patch)
tree23cbe197595d9dc8534dd1dbab7e1237fbb74226
parentd952d69e268f833c85c0bafee9f67f9dba85044b (diff)
ext4: fix suboptimal seek_{data,hole} extents traversial
It is ridiculous practice to scan inode block by block, this technique applicable only for old indirect files. This takes significant amount of time for really large files. Let's reuse ext4_fiemap which already traverse inode-tree in most optimal meaner. TESTCASE: ftruncate64(fd, 0); ftruncate64(fd, 1ULL << 40); /* lseek will spin very long time */ lseek64(fd, 0, SEEK_DATA); lseek64(fd, 0, SEEK_HOLE); Original report: https://lkml.org/lkml/2014/10/16/620 Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
-rw-r--r--fs/ext4/extents.c4
-rw-r--r--fs/ext4/file.c220
2 files changed, 108 insertions, 116 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index bed43081720f..e5d3eadf47b1 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5166,8 +5166,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
/* fallback to generic here if not in extents fmt */
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
- return generic_block_fiemap(inode, fieinfo, start, len,
- ext4_get_block);
+ return __generic_block_fiemap(inode, fieinfo, start, len,
+ ext4_get_block);
if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS))
return -EBADR;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 8131be8c0af3..513c12cf444c 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -273,24 +273,19 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
* we determine this extent as a data or a hole according to whether the
* page cache has data or not.
*/
-static int ext4_find_unwritten_pgoff(struct inode *inode,
- int whence,
- struct ext4_map_blocks *map,
- loff_t *offset)
+static int ext4_find_unwritten_pgoff(struct inode *inode, int whence,
+ loff_t endoff, loff_t *offset)
{
struct pagevec pvec;
- unsigned int blkbits;
pgoff_t index;
pgoff_t end;
- loff_t endoff;
loff_t startoff;
loff_t lastoff;
int found = 0;
- blkbits = inode->i_sb->s_blocksize_bits;
startoff = *offset;
lastoff = startoff;
- endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits;
+
index = startoff >> PAGE_CACHE_SHIFT;
end = endoff >> PAGE_CACHE_SHIFT;
@@ -408,147 +403,144 @@ out:
static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
{
struct inode *inode = file->f_mapping->host;
- struct ext4_map_blocks map;
- struct extent_status es;
- ext4_lblk_t start, last, end;
- loff_t dataoff, isize;
- int blkbits;
- int ret = 0;
+ struct fiemap_extent_info fie;
+ struct fiemap_extent ext[2];
+ loff_t next;
+ int i, ret = 0;
mutex_lock(&inode->i_mutex);
-
- isize = i_size_read(inode);
- if (offset >= isize) {
+ if (offset >= inode->i_size) {
mutex_unlock(&inode->i_mutex);
return -ENXIO;
}
-
- blkbits = inode->i_sb->s_blocksize_bits;
- start = offset >> blkbits;
- last = start;
- end = isize >> blkbits;
- dataoff = offset;
-
- do {
- map.m_lblk = last;
- map.m_len = end - last + 1;
- ret = ext4_map_blocks(NULL, inode, &map, 0);
- if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
- if (last != start)
- dataoff = (loff_t)last << blkbits;
+ fie.fi_flags = 0;
+ fie.fi_extents_max = 2;
+ fie.fi_extents_start = (struct fiemap_extent __user *) &ext;
+ while (1) {
+ mm_segment_t old_fs = get_fs();
+
+ fie.fi_extents_mapped = 0;
+ memset(ext, 0, sizeof(*ext) * fie.fi_extents_max);
+
+ set_fs(get_ds());
+ ret = ext4_fiemap(inode, &fie, offset, maxsize - offset);
+ set_fs(old_fs);
+ if (ret)
break;
- }
- /*
- * If there is a delay extent at this offset,
- * it will be as a data.
- */
- ext4_es_find_delayed_extent_range(inode, last, last, &es);
- if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
- if (last != start)
- dataoff = (loff_t)last << blkbits;
+ /* No extents found, EOF */
+ if (!fie.fi_extents_mapped) {
+ ret = -ENXIO;
break;
}
+ for (i = 0; i < fie.fi_extents_mapped; i++) {
+ next = (loff_t)(ext[i].fe_length + ext[i].fe_logical);
- /*
- * If there is a unwritten extent at this offset,
- * it will be as a data or a hole according to page
- * cache that has data or not.
- */
- if (map.m_flags & EXT4_MAP_UNWRITTEN) {
- int unwritten;
- unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA,
- &map, &dataoff);
- if (unwritten)
- break;
- }
+ if (offset < (loff_t)ext[i].fe_logical)
+ offset = (loff_t)ext[i].fe_logical;
+ /*
+ * If extent is not unwritten, then it contains valid
+ * data, mapped or delayed.
+ */
+ if (!(ext[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN))
+ goto out;
- last++;
- dataoff = (loff_t)last << blkbits;
- } while (last <= end);
+ /*
+ * If there is a unwritten extent at this offset,
+ * it will be as a data or a hole according to page
+ * cache that has data or not.
+ */
+ if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
+ next, &offset))
+ goto out;
+ if (ext[i].fe_flags & FIEMAP_EXTENT_LAST) {
+ ret = -ENXIO;
+ goto out;
+ }
+ offset = next;
+ }
+ }
+ if (offset > inode->i_size)
+ offset = inode->i_size;
+out:
mutex_unlock(&inode->i_mutex);
+ if (ret)
+ return ret;
- if (dataoff > isize)
- return -ENXIO;
-
- return vfs_setpos(file, dataoff, maxsize);
+ return vfs_setpos(file, offset, maxsize);
}
/*
- * ext4_seek_hole() retrieves the offset for SEEK_HOLE.
+ * ext4_seek_hole() retrieves the offset for SEEK_HOLE
*/
static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
{
struct inode *inode = file->f_mapping->host;
- struct ext4_map_blocks map;
- struct extent_status es;
- ext4_lblk_t start, last, end;
- loff_t holeoff, isize;
- int blkbits;
- int ret = 0;
+ struct fiemap_extent_info fie;
+ struct fiemap_extent ext[2];
+ loff_t next;
+ int i, ret = 0;
mutex_lock(&inode->i_mutex);
-
- isize = i_size_read(inode);
- if (offset >= isize) {
+ if (offset >= inode->i_size) {
mutex_unlock(&inode->i_mutex);
return -ENXIO;
}
- blkbits = inode->i_sb->s_blocksize_bits;
- start = offset >> blkbits;
- last = start;
- end = isize >> blkbits;
- holeoff = offset;
+ fie.fi_flags = 0;
+ fie.fi_extents_max = 2;
+ fie.fi_extents_start = (struct fiemap_extent __user *)&ext;
+ while (1) {
+ mm_segment_t old_fs = get_fs();
- do {
- map.m_lblk = last;
- map.m_len = end - last + 1;
- ret = ext4_map_blocks(NULL, inode, &map, 0);
- if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
- last += ret;
- holeoff = (loff_t)last << blkbits;
- continue;
- }
+ fie.fi_extents_mapped = 0;
+ memset(ext, 0, sizeof(*ext));
- /*
- * If there is a delay extent at this offset,
- * we will skip this extent.
- */
- ext4_es_find_delayed_extent_range(inode, last, last, &es);
- if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
- last = es.es_lblk + es.es_len;
- holeoff = (loff_t)last << blkbits;
- continue;
- }
+ set_fs(get_ds());
+ ret = ext4_fiemap(inode, &fie, offset, maxsize - offset);
+ set_fs(old_fs);
+ if (ret)
+ break;
- /*
- * If there is a unwritten extent at this offset,
- * it will be as a data or a hole according to page
- * cache that has data or not.
- */
- if (map.m_flags & EXT4_MAP_UNWRITTEN) {
- int unwritten;
- unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
- &map, &holeoff);
- if (!unwritten) {
- last += ret;
- holeoff = (loff_t)last << blkbits;
+ /* No extents found */
+ if (!fie.fi_extents_mapped)
+ break;
+
+ for (i = 0; i < fie.fi_extents_mapped; i++) {
+ next = (loff_t)(ext[i].fe_logical + ext[i].fe_length);
+ /*
+ * If extent is not unwritten, then it contains valid
+ * data, mapped or delayed.
+ */
+ if (!(ext[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN)) {
+ if (offset < (loff_t)ext[i].fe_logical)
+ goto out;
+ offset = next;
continue;
}
- }
-
- /* find a hole */
- break;
- } while (last <= end);
+ /*
+ * If there is a unwritten extent at this offset,
+ * it will be as a data or a hole according to page
+ * cache that has data or not.
+ */
+ if (ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
+ next, &offset))
+ goto out;
+ offset = next;
+ if (ext[i].fe_flags & FIEMAP_EXTENT_LAST)
+ goto out;
+ }
+ }
+ if (offset > inode->i_size)
+ offset = inode->i_size;
+out:
mutex_unlock(&inode->i_mutex);
+ if (ret)
+ return ret;
- if (holeoff > isize)
- holeoff = isize;
-
- return vfs_setpos(file, holeoff, maxsize);
+ return vfs_setpos(file, offset, maxsize);
}
/*