summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-01-10 15:51:48 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-10 15:51:48 -0800
commit54c2c5761febcca46c8037d3a81612991e6c209a (patch)
treee6a686088f16e182138bdf8ae13f6cd27b8613b0
parent609eac1c152353ee4cd6f292d49f0aa0b885951a (diff)
parentff9cb1c4eead5e4c292e75cd3170a82d66944101 (diff)
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Ext4 commits for 3.3 merge window * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (32 commits) ext4: fix undefined behavior in ext4_fill_flex_info() ext4: make more symbols static ext4: make local symbol ext4_initxattrs static jbd2: fix hung processes in jbd2_journal_lock_updates() ext4: reserve new feature flag codepoints ext4: Report max_batch_time option correctly ext4: add missing ext4_resize_end on error paths ext4: let ext4_group_add() use common code ext4: let ext4_group_extend() use common code ext4: add new online resize interface ext4: add a new function which adds a flex group to a fs ext4: add a new function which allocates bitmaps and inode tables ext4: pass verify_reserved_gdb() the number of group decriptors ext4: add a function which updates the super block during online resizing ext4: add a function which sets up a block group descriptors of a flex bg ext4: add a function which sets up group blocks of a flex bg ext4: add a structure which will be used by 64bit-resize interface ext4: add a function which adds a new group descriptors to a fs ext4: add a function which extends a group without checking parameters ext4: use proper little-endian bitops ...
-rw-r--r--Documentation/filesystems/ext4.txt7
-rw-r--r--fs/ext4/balloc.c4
-rw-r--r--fs/ext4/ext4.h29
-rw-r--r--fs/ext4/extents.c10
-rw-r--r--fs/ext4/ialloc.c18
-rw-r--r--fs/ext4/inode.c143
-rw-r--r--fs/ext4/ioctl.c86
-rw-r--r--fs/ext4/mballoc.c2
-rw-r--r--fs/ext4/resize.c1175
-rw-r--r--fs/ext4/super.c11
-rw-r--r--fs/ext4/xattr_security.c5
-rw-r--r--fs/jbd2/commit.c6
-rw-r--r--fs/jbd2/revoke.c34
-rw-r--r--fs/jbd2/transaction.c5
-rw-r--r--include/linux/jbd2.h1
-rw-r--r--include/trace/events/ext4.h6
16 files changed, 1079 insertions, 463 deletions
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index 4917cf24a5e0..10ec4639f152 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -581,6 +581,13 @@ Table of Ext4 specific ioctls
behaviour may change in the future as it is
not necessary and has been done this way only
for sake of simplicity.
+
+ EXT4_IOC_RESIZE_FS Resize the filesystem to a new size. The number
+ of blocks of resized filesystem is passed in via
+ 64 bit integer argument. The kernel allocates
+ bitmaps and inode table, the userspace tool thus
+ just passes the new number of blocks.
+
..............................................................................
References
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 12ccacda44e0..f9e2cd8cf711 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -23,6 +23,8 @@
#include <trace/events/ext4.h>
+static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
+ ext4_group_t block_group);
/*
* balloc.c contains the blocks allocation and deallocation routines
*/
@@ -668,7 +670,7 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
* This function returns the number of file system metadata clusters at
* the beginning of a block group, including the reserved gdt blocks.
*/
-unsigned ext4_num_base_meta_clusters(struct super_block *sb,
+static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
ext4_group_t block_group)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1554b15f91bc..513004fc3d84 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -511,6 +511,14 @@ struct ext4_new_group_data {
__u32 free_blocks_count;
};
+/* Indexes used to index group tables in ext4_new_group_data */
+enum {
+ BLOCK_BITMAP = 0, /* block bitmap */
+ INODE_BITMAP, /* inode bitmap */
+ INODE_TABLE, /* inode tables */
+ GROUP_TABLE_COUNT,
+};
+
/*
* Flags used by ext4_map_blocks()
*/
@@ -575,6 +583,7 @@ struct ext4_new_group_data {
/* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
+#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/*
@@ -957,12 +966,13 @@ struct ext4_inode_info {
#define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \
EXT4_MOUNT2_##opt)
-#define ext4_set_bit __test_and_set_bit_le
+#define ext4_test_and_set_bit __test_and_set_bit_le
+#define ext4_set_bit __set_bit_le
#define ext4_set_bit_atomic ext2_set_bit_atomic
-#define ext4_clear_bit __test_and_clear_bit_le
+#define ext4_test_and_clear_bit __test_and_clear_bit_le
+#define ext4_clear_bit __clear_bit_le
#define ext4_clear_bit_atomic ext2_clear_bit_atomic
#define ext4_test_bit test_bit_le
-#define ext4_find_first_zero_bit find_first_zero_bit_le
#define ext4_find_next_zero_bit find_next_zero_bit_le
#define ext4_find_next_bit find_next_bit_le
@@ -1397,6 +1407,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
#define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100
#define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200
+#define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400
#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
@@ -1409,6 +1420,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200
#define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */
#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */
+#define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x2000 /* data in inode */
+#define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */
#define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR
#define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
@@ -1790,8 +1803,6 @@ extern void ext4_init_block_bitmap(struct super_block *sb,
extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
ext4_group_t block_group,
struct ext4_group_desc *gdp);
-extern unsigned ext4_num_base_meta_clusters(struct super_block *sb,
- ext4_group_t block_group);
extern unsigned ext4_num_overhead_clusters(struct super_block *sb,
ext4_group_t block_group,
struct ext4_group_desc *gdp);
@@ -1880,16 +1891,9 @@ extern int ext4_alloc_da_blocks(struct inode *inode);
extern void ext4_set_aops(struct inode *inode);
extern int ext4_writepage_trans_blocks(struct inode *);
extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
-extern int ext4_block_truncate_page(handle_t *handle,
- struct address_space *mapping, loff_t from);
-extern int ext4_block_zero_page_range(handle_t *handle,
- struct address_space *mapping, loff_t from, loff_t length);
extern int ext4_discard_partial_page_buffers(handle_t *handle,
struct address_space *mapping, loff_t from,
loff_t length, int flags);
-extern int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
- struct inode *inode, struct page *page, loff_t from,
- loff_t length, int flags);
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
extern void ext4_da_update_reserve_space(struct inode *inode,
@@ -1924,6 +1928,7 @@ extern int ext4_group_add(struct super_block *sb,
extern int ext4_group_extend(struct super_block *sb,
struct ext4_super_block *es,
ext4_fsblk_t n_blocks_count);
+extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count);
/* super.c */
extern void *ext4_kvmalloc(size_t size, gfp_t flags);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 841faf5fb785..74f23c292e1b 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3280,6 +3280,9 @@ static int ext4_find_delalloc_range(struct inode *inode,
ext4_lblk_t i, pg_lblk;
pgoff_t index;
+ if (!test_opt(inode->i_sb, DELALLOC))
+ return 0;
+
/* reverse search wont work if fs block size is less than page size */
if (inode->i_blkbits < PAGE_CACHE_SHIFT)
search_hint_reverse = 0;
@@ -3452,8 +3455,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
int err = 0;
ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
- ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical"
- "block %llu, max_blocks %u, flags %d, allocated %u",
+ ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical "
+ "block %llu, max_blocks %u, flags %x, allocated %u\n",
inode->i_ino, (unsigned long long)map->m_lblk, map->m_len,
flags, allocated);
ext4_ext_show_leaf(inode, path);
@@ -3624,7 +3627,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
ext4_lblk_t ex_cluster_start, ex_cluster_end;
- ext4_lblk_t rr_cluster_start, rr_cluster_end;
+ ext4_lblk_t rr_cluster_start;
ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
unsigned short ee_len = ext4_ext_get_actual_len(ex);
@@ -3635,7 +3638,6 @@ static int get_implied_cluster_alloc(struct super_block *sb,
/* The requested region passed into ext4_map_blocks() */
rr_cluster_start = EXT4_B2C(sbi, map->m_lblk);
- rr_cluster_end = EXT4_B2C(sbi, map->m_lblk + map->m_len - 1);
if ((rr_cluster_start == ex_cluster_end) ||
(rr_cluster_start == ex_cluster_start)) {
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 4637af036d9c..25d8c9781ad9 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -252,7 +252,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
fatal = ext4_journal_get_write_access(handle, bh2);
}
ext4_lock_group(sb, block_group);
- cleared = ext4_clear_bit(bit, bitmap_bh->b_data);
+ cleared = ext4_test_and_clear_bit(bit, bitmap_bh->b_data);
if (fatal || !cleared) {
ext4_unlock_group(sb, block_group);
goto out;
@@ -358,7 +358,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_group_t real_ngroups = ext4_get_groups_count(sb);
int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
- unsigned int freei, avefreei;
+ unsigned int freei, avefreei, grp_free;
ext4_fsblk_t freeb, avefreec;
unsigned int ndirs;
int max_dirs, min_inodes;
@@ -477,8 +477,8 @@ fallback_retry:
for (i = 0; i < ngroups; i++) {
grp = (parent_group + i) % ngroups;
desc = ext4_get_group_desc(sb, grp, NULL);
- if (desc && ext4_free_inodes_count(sb, desc) &&
- ext4_free_inodes_count(sb, desc) >= avefreei) {
+ grp_free = ext4_free_inodes_count(sb, desc);
+ if (desc && grp_free && grp_free >= avefreei) {
*group = grp;
return 0;
}
@@ -618,7 +618,7 @@ static int ext4_claim_inode(struct super_block *sb,
*/
down_read(&grp->alloc_sem);
ext4_lock_group(sb, group);
- if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
+ if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) {
/* not a free inode */
retval = 1;
goto err_ret;
@@ -885,8 +885,12 @@ got:
if (IS_DIRSYNC(inode))
ext4_handle_sync(handle);
if (insert_inode_locked(inode) < 0) {
- err = -EINVAL;
- goto fail_drop;
+ /*
+ * Likely a bitmap corruption causing inode to be allocated
+ * twice.
+ */
+ err = -EIO;
+ goto fail;
}
spin_lock(&sbi->s_next_gen_lock);
inode->i_generation = sbi->s_next_generation++;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index aa8efa6572d6..feaa82fe629d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -71,6 +71,9 @@ static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
static int __ext4_journalled_writepage(struct page *page, unsigned int len);
static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
+static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
+ struct inode *inode, struct page *page, loff_t from,
+ loff_t length, int flags);
/*
* Test whether an inode is a fast symlink.
@@ -2759,7 +2762,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
if (!io_end || !size)
goto out;
- ext_debug("ext4_end_io_dio(): io_end 0x%p"
+ ext_debug("ext4_end_io_dio(): io_end 0x%p "
"for inode %lu, iocb 0x%p, offset %llu, size %llu\n",
iocb->private, io_end->inode->i_ino, iocb, offset,
size);
@@ -3160,7 +3163,7 @@ int ext4_discard_partial_page_buffers(handle_t *handle,
*
* Returns zero on sucess or negative on failure.
*/
-int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
+static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
struct inode *inode, struct page *page, loff_t from,
loff_t length, int flags)
{
@@ -3300,126 +3303,6 @@ next:
return err;
}
-/*
- * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
- * up to the end of the block which corresponds to `from'.
- * This required during truncate. We need to physically zero the tail end
- * of that block so it doesn't yield old data if the file is later grown.
- */
-int ext4_block_truncate_page(handle_t *handle,
- struct address_space *mapping, loff_t from)
-{
- unsigned offset = from & (PAGE_CACHE_SIZE-1);
- unsigned length;
- unsigned blocksize;
- struct inode *inode = mapping->host;
-
- blocksize = inode->i_sb->s_blocksize;
- length = blocksize - (offset & (blocksize - 1));
-
- return ext4_block_zero_page_range(handle, mapping, from, length);
-}
-
-/*
- * ext4_block_zero_page_range() zeros out a mapping of length 'length'
- * starting from file offset 'from'. The range to be zero'd must
- * be contained with in one block. If the specified range exceeds
- * the end of the block it will be shortened to end of the block
- * that cooresponds to 'from'
- */
-int ext4_block_zero_page_range(handle_t *handle,
- struct address_space *mapping, loff_t from, loff_t length)
-{
- ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
- unsigned offset = from & (PAGE_CACHE_SIZE-1);
- unsigned blocksize, max, pos;
- ext4_lblk_t iblock;
- struct inode *inode = mapping->host;
- struct buffer_head *bh;
- struct page *page;
- int err = 0;
-
- page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
- mapping_gfp_mask(mapping) & ~__GFP_FS);
- if (!page)
- return -ENOMEM;
-
- blocksize = inode->i_sb->s_blocksize;
- max = blocksize - (offset & (blocksize - 1));
-
- /*
- * correct length if it does not fall between
- * 'from' and the end of the block
- */
- if (length > max || length < 0)
- length = max;
-
- iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
-
- if (!page_has_buffers(page))
- create_empty_buffers(page, blocksize, 0);
-
- /* Find the buffer that contains "offset" */
- bh = page_buffers(page);
- pos = blocksize;
- while (offset >= pos) {
- bh = bh->b_this_page;
- iblock++;
- pos += blocksize;
- }
-
- err = 0;
- if (buffer_freed(bh)) {
- BUFFER_TRACE(bh, "freed: skip");
- goto unlock;
- }
-
- if (!buffer_mapped(bh)) {
- BUFFER_TRACE(bh, "unmapped");
- ext4_get_block(inode, iblock, bh, 0);
- /* unmapped? It's a hole - nothing to do */
- if (!buffer_mapped(bh)) {
- BUFFER_TRACE(bh, "still unmapped");
- goto unlock;
- }
- }
-
- /* Ok, it's mapped. Make sure it's up-to-date */
- if (PageUptodate(page))
- set_buffer_uptodate(bh);
-
- if (!buffer_uptodate(bh)) {
- err = -EIO;
- ll_rw_block(READ, 1, &bh);
- wait_on_buffer(bh);
- /* Uhhuh. Read error. Complain and punt. */
- if (!buffer_uptodate(bh))
- goto unlock;
- }
-
- if (ext4_should_journal_data(inode)) {
- BUFFER_TRACE(bh, "get write access");
- err = ext4_journal_get_write_access(handle, bh);
- if (err)
- goto unlock;
- }
-
- zero_user(page, offset, length);
-
- BUFFER_TRACE(bh, "zeroed end of block");
-
- err = 0;
- if (ext4_should_journal_data(inode)) {
- err = ext4_handle_dirty_metadata(handle, inode, bh);
- } else
- mark_buffer_dirty(bh);
-
-unlock:
- unlock_page(page);
- page_cache_release(page);
- return err;
-}
-
int ext4_can_truncate(struct inode *inode)
{
if (S_ISREG(inode->i_mode))
@@ -4646,9 +4529,19 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
return 0;
if (is_journal_aborted(journal))
return -EROFS;
+ /* We have to allocate physical blocks for delalloc blocks
+ * before flushing journal. otherwise delalloc blocks can not
+ * be allocated any more. even more truncate on delalloc blocks
+ * could trigger BUG by flushing delalloc blocks in journal.
+ * There is no delalloc block in non-journal data mode.
+ */
+ if (val && test_opt(inode->i_sb, DELALLOC)) {
+ err = ext4_alloc_da_blocks(inode);
+ if (err < 0)
+ return err;
+ }
jbd2_journal_lock_updates(journal);
- jbd2_journal_flush(journal);
/*
* OK, there are no updates running now, and all cached data is
@@ -4660,8 +4553,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
if (val)
ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
- else
+ else {
+ jbd2_journal_flush(journal);
ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
+ }
ext4_set_aops(inode);
jbd2_journal_unlock_updates(journal);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index e87a932b073b..6eee25591b81 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -18,6 +18,8 @@
#include "ext4_jbd2.h"
#include "ext4.h"
+#define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1)
+
long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = filp->f_dentry->d_inode;
@@ -186,19 +188,22 @@ setversion_out:
if (err)
return err;
- if (get_user(n_blocks_count, (__u32 __user *)arg))
- return -EFAULT;
+ if (get_user(n_blocks_count, (__u32 __user *)arg)) {
+ err = -EFAULT;
+ goto group_extend_out;
+ }
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
ext4_msg(sb, KERN_ERR,
"Online resizing not supported with bigalloc");
- return -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+ goto group_extend_out;
}
err = mnt_want_write_file(filp);
if (err)
- return err;
+ goto group_extend_out;
err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
if (EXT4_SB(sb)->s_journal) {
@@ -209,8 +214,8 @@ setversion_out:
if (err == 0)
err = err2;
mnt_drop_write_file(filp);
+group_extend_out:
ext4_resize_end(sb);
-
return err;
}
@@ -251,8 +256,7 @@ setversion_out:
err = ext4_move_extents(filp, donor_filp, me.orig_start,
me.donor_start, me.len, &me.moved_len);
mnt_drop_write_file(filp);
- if (me.moved_len > 0)
- file_remove_suid(donor_filp);
+ mnt_drop_write(filp->f_path.mnt);
if (copy_to_user((struct move_extent __user *)arg,
&me, sizeof(me)))
@@ -271,19 +275,22 @@ mext_out:
return err;
if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg,
- sizeof(input)))
- return -EFAULT;
+ sizeof(input))) {
+ err = -EFAULT;
+ goto group_add_out;
+ }
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
ext4_msg(sb, KERN_ERR,
"Online resizing not supported with bigalloc");
- return -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+ goto group_add_out;
}
err = mnt_want_write_file(filp);
if (err)
- return err;
+ goto group_add_out;
err = ext4_group_add(sb, &input);
if (EXT4_SB(sb)->s_journal) {
@@ -294,8 +301,8 @@ mext_out:
if (err == 0)
err = err2;
mnt_drop_write_file(filp);
+group_add_out:
ext4_resize_end(sb);
-
return err;
}
@@ -335,6 +342,60 @@ mext_out:
return err;
}
+ case EXT4_IOC_RESIZE_FS: {
+ ext4_fsblk_t n_blocks_count;
+ struct super_block *sb = inode->i_sb;
+ int err = 0, err2 = 0;
+
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+ EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
+ ext4_msg(sb, KERN_ERR,
+ "Online resizing not (yet) supported with bigalloc");
+ return -EOPNOTSUPP;
+ }
+
+ if (EXT4_HAS_INCOMPAT_FEATURE(sb,
+ EXT4_FEATURE_INCOMPAT_META_BG)) {
+ ext4_msg(sb, KERN_ERR,
+ "Online resizing not (yet) supported with meta_bg");
+ return -EOPNOTSUPP;
+ }
+
+ if (copy_from_user(&n_blocks_count, (__u64 __user *)arg,
+ sizeof(__u64))) {
+ return -EFAULT;
+ }
+
+ if (n_blocks_count > MAX_32_NUM &&
+ !EXT4_HAS_INCOMPAT_FEATURE(sb,
+ EXT4_FEATURE_INCOMPAT_64BIT)) {
+ ext4_msg(sb, KERN_ERR,
+ "File system only supports 32-bit block numbers");
+ return -EOPNOTSUPP;
+ }
+
+ err = ext4_resize_begin(sb);
+ if (err)
+ return err;
+
+ err = mnt_want_write(filp->f_path.mnt);
+ if (err)
+ goto resizefs_out;
+
+ err = ext4_resize_fs(sb, n_blocks_count);
+ if (EXT4_SB(sb)->s_journal) {
+ jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
+ err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
+ jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+ }
+ if (err == 0)
+ err = err2;
+ mnt_drop_write(filp->f_path.mnt);
+resizefs_out:
+ ext4_resize_end(sb);
+ return err;
+ }
+
case FITRIM:
{
struct request_queue *q = bdev_get_queue(sb->s_bdev);
@@ -433,6 +494,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
}
case EXT4_IOC_MOVE_EXT:
case FITRIM:
+ case EXT4_IOC_RESIZE_FS:
break;
default:
return -ENOIOCTLCMD;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index e2d8be8f28bf..cb990b21c698 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3671,7 +3671,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
ext4_group_t group;
ext4_grpblk_t bit;
- trace_ext4_mb_release_group_pa(pa);
+ trace_ext4_mb_release_group_pa(sb, pa);
BUG_ON(pa->pa_deleted == 0);
ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 996780ab4f4e..f9d948f0eb86 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -134,6 +134,172 @@ static int verify_group_input(struct super_block *sb,
return err;
}
+/*
+ * ext4_new_flex_group_data is used by 64bit-resize interface to add a flex
+ * group each time.
+ */
+struct ext4_new_flex_group_data {
+ struct ext4_new_group_data *groups; /* new_group_data for groups
+ in the flex group */
+ __u16 *bg_flags; /* block group flags of groups
+ in @groups */
+ ext4_group_t count; /* number of groups in @groups
+ */
+};
+
+/*
+ * alloc_flex_gd() allocates a ext4_new_flex_group_data with size of
+ * @flexbg_size.
+ *
+ * Returns NULL on failure otherwise address of the allocated structure.
+ */
+static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size)
+{
+ struct ext4_new_flex_group_data *flex_gd;
+
+ flex_gd = kmalloc(sizeof(*flex_gd), GFP_NOFS);
+ if (flex_gd == NULL)
+ goto out3;
+
+ flex_gd->count = flexbg_size;
+
+ flex_gd->groups = kmalloc(sizeof(struct ext4_new_group_data) *
+ flexbg_size, GFP_NOFS);
+ if (flex_gd->groups == NULL)
+ goto out2;
+
+ flex_gd->bg_flags = kmalloc(flexbg_size * sizeof(__u16), GFP_NOFS);
+ if (flex_gd->bg_flags == NULL)
+ goto out1;
+
+ return flex_gd;
+
+out1:
+ kfree(flex_gd->groups);
+out2:
+ kfree(flex_gd);
+out3:
+ return NULL;
+}
+
+static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd)
+{
+ kfree(flex_gd->bg_flags);
+ kfree(flex_gd->groups);
+ kfree(flex_gd);
+}
+
+/*
+ * ext4_alloc_group_tables() allocates block bitmaps, inode bitmaps
+ * and inode tables for a flex group.
+ *
+ * This function is used by 64bit-resize. Note that this function allocates
+ * group tables from the 1st group of groups contained by @flexgd, which may
+ * be a partial of a flex group.
+ *
+ * @sb: super block of fs to which the groups belongs
+ */
+static void ext4_alloc_group_tables(struct super_block *sb,
+ struct ext4_new_flex_group_data *flex_gd,
+ int flexbg_size)
+{
+ struct ext4_new_group_data *group_data = flex_gd->groups;
+ struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+ ext4_fsblk_t start_blk;
+ ext4_fsblk_t last_blk;
+ ext4_group_t src_group;
+ ext4_group_t bb_index = 0;
+ ext4_group_t ib_index = 0;
+ ext4_group_t it_index = 0;
+ ext4_group_t group;
+ ext4_group_t last_group;
+ unsigned overhead;
+
+ BUG_ON(flex_gd->count == 0 || group_data == NULL);
+
+ src_group = group_data[0].group;
+ last_group = src_group + flex_gd->count - 1;
+
+ BUG_ON((flexbg_size > 1) && ((src_group & ~(flexbg_size - 1)) !=
+ (last_group & ~(flexbg_size - 1))));
+next_group:
+ group = group_data[0].group;
+ start_blk = ext4_group_first_block_no(sb, src_group);
+ last_blk = start_blk + group_data[src_group - group].blocks_count;
+
+ overhead = ext4_bg_has_super(sb, src_group) ?
+ (1 + ext4_bg_num_gdb(sb, src_group) +
+ le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
+
+ start_blk += overhead;
+
+ BUG_ON(src_group >= group_data[0].group + flex_gd->count);
+ /* We collect contiguous blocks as much as possible. */
+ src_group++;
+ for (; src_group <= last_group; src_group++)
+ if (!ext4_bg_has_super(sb, src_group))
+ last_blk += group_data[src_group - group].blocks_count;
+ else
+ break;
+
+ /* Allocate block bitmaps */
+ for (; bb_index < flex_gd->count; bb_index++) {
+ if (start_blk >= last_blk)
+ goto next_group;
+ group_data[bb_index].block_bitmap = start_blk++;
+ ext4_get_group_no_and_offset(sb, start_blk - 1, &group, NULL);
+ group -= group_data[0].group;
+ group_data[group].free_blocks_count--;
+ if (flexbg_size > 1)
+ flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
+ }
+
+ /* Allocate inode bitmaps */
+ for (; ib_index < flex_gd->count; ib_index++) {
+ if (start_blk >= last_blk)
+ goto next_group;
+ group_data[ib_index].inode_bitmap = start_blk++;
+ ext4_get_group_no_and_offset(sb, start_blk - 1, &group, NULL);
+ group -= group_data[0].group;
+ group_data[group].free_blocks_count--;
+ if (flexbg_size > 1)
+ flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
+ }
+
+ /* Allocate inode tables */
+ for (; it_index < flex_gd->count; it_index++) {
+ if (start_blk + EXT4_SB(sb)->s_itb_per_group > last_blk)
+ goto next_group;
+ group_data[it_index].inode_table = start_blk;
+ ext4_get_group_no_and_offset(sb, start_blk, &group, NULL);
+ group -= group_data[0].group;
+ group_data[group].free_blocks_count -=
+ EXT4_SB(sb)->s_itb_per_group;
+ if (flexbg_size > 1)
+ flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
+
+ start_blk += EXT4_SB(sb)->s_itb_per_group;
+ }
+
+ if (test_opt(sb, DEBUG)) {
+ int i;
+ group = group_data[0].group;
+
+ printk(KERN_DEBUG "EXT4-fs: adding a flex group with "
+ "%d groups, flexbg size is %d:\n", flex_gd->count,
+ flexbg_size);
+
+ for (i = 0; i < flex_gd->count; i++) {
+ printk(KERN_DEBUG "adding %s group %u: %u "
+ "blocks (%d free)\n",
+ ext4_bg_has_super(sb, group + i) ? "normal" :
+ "no-super", group + i,
+ group_data[i].blocks_count,
+ group_data[i].free_blocks_count);
+ }
+ }
+}
+
static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
ext4_fsblk_t blk)
{
@@ -179,131 +345,250 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh)
}
/*
- * Set up the block and inode bitmaps, and the inode table for the new group.
+ * set_flexbg_block_bitmap() mark @count blocks starting from @block used.
+ *
+ * Helper function for ext4_setup_new_group_blocks() which set .
+ *
+ * @sb: super block
+ * @handle: journal handle
+ * @flex_gd: flex group data
+ */
+static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
+ struct ext4_new_flex_group_data *flex_gd,
+ ext4_fsblk_t block, ext4_group_t count)
+{
+ ext4_group_t count2;
+
+ ext4_debug("mark blocks [%llu/%u] used\n", block, count);
+ for (count2 = count; count > 0; count -= count2, block += count2) {
+ ext4_fsblk_t start;
+ struct buffer_head *bh;
+ ext4_group_t group;
+ int err;
+
+ ext4_get_group_no_and_offset(sb, block, &group, NULL);
+ start = ext4_group_first_block_no(sb, group);
+ group -= flex_gd->groups[0].group;
+
+ count2 = sb->s_blocksize * 8 - (block - start);
+ if (count2 > count)
+ count2 = count;
+
+ if (flex_gd->bg_flags[group] & EXT4_BG_BLOCK_UNINIT) {
+ BUG_ON(flex_gd->count > 1);
+ continue;
+ }
+
+ err = extend_or_restart_transaction(handle, 1);
+ if (err)
+ return err;
+
+ bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap);
+ if (!bh)
+ return -EIO;
+
+ err = ext4_journal_get_write_access(handle, bh);
+ if (err)
+ return err;
+ ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", block,
+ block - start, count2);
+ ext4_set_bits(bh->b_data, block - start, count2);
+
+ err = ext4_handle_dirty_metadata(handle, NULL, bh);
+ if (unlikely(err))
+ return err;
+ brelse(bh);
+ }
+
+ return 0;
+}
+
+/*
+ * Set up the block and inode bitmaps, and the inode table for the new groups.
* This doesn't need to be part of the main transaction, since we are only
* changing blocks outside the actual filesystem. We still do journaling to
* ensure the recovery is correct in case of a failure just after resize.
* If any part of this fails, we simply abort the resize.
+ *
+ * setup_new_flex_group_blocks handles a flex group as follow:
+ * 1. copy super block and GDT, and initialize group tables if necessary.
+ * In this step, we only set bits in blocks bitmaps for blocks taken by
+ * super block and GDT.
+ * 2. allocate group tables in block bitmaps, that is, set bits in block
+ * bitmap for blocks taken by group tables.
*/
-static int setup_new_group_blocks(struct super_block *sb,
- struct ext4_new_group_data *input)
+static int setup_new_flex_group_blocks(struct super_block *sb,
+ struct ext4_new_flex_group_data *flex_gd)
{
+ int group_table_count[] = {1, 1, EXT4_SB(sb)->s_itb_per_group};
+ ext4_fsblk_t start;
+ ext4_fsblk_t block;
struct ext4_sb_info *sbi = EXT4_SB(sb);
- ext4_fsblk_t start = ext4_group_first_block_no(sb, input->group);
- int reserved_gdb = ext4_bg_has_super(sb, input->group) ?
- le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0;
- unsigned long gdblocks = ext4_bg_num_gdb(sb, input->group);
- struct buffer_head *bh;
+ struct ext4_super_block *es = sbi->s_es;
+ struct ext4_new_group_data *group_data = flex_gd->groups;
+ __u16 *bg_flags = flex_gd->bg_flags;
handle_t *handle;
- ext4_fsblk_t block;
- ext4_grpblk_t bit;
- int i;
- int err = 0, err2;
+ ext4_group_t group, count;
+ struct buffer_head *bh = NULL;
+ int reserved_gdb, i, j, err = 0, err2;
+
+ BUG_ON(!flex_gd->count || !group_data ||
+ group_data[0].group != sbi->s_groups_count);
+
+ reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks);
/* This transaction may be extended/restarted along the way */
handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA);
-
if (IS_ERR(handle))
return PTR_ERR(handle);
- BUG_ON(input->group != sbi->s_groups_count);
+ group = group_data[0].group;
+ for (i = 0; i < flex_gd->count; i++, group++) {
+ unsigned long gdblocks;
- /* Copy all of the GDT blocks into the backup in this group */
- for (i = 0, bit = 1, block = start + 1;
- i < gdblocks; i++, block++, bit++) {
- struct buffer_head *gdb;
+ gdblocks = ext4_bg_num_gdb(sb, group);
+ start = ext4_group_first_block_no(sb, group);
- ext4_debug("update backup group %#04llx (+%d)\n", block, bit);
- err = extend_or_restart_transaction(handle, 1);
- if (err)
- goto exit_journal;
+ /* Copy all of the GDT blocks into the backup in this group */
+ for (j = 0, block = start + 1; j < gdblocks; j++, block++) {
+ struct buffer_head *gdb;
- gdb = sb_getblk(sb, block);
- if (!gdb) {
- err = -EIO;
- goto exit_journal;
- }
- if ((err = ext4_journal_get_write_access(handle, gdb))) {
+ ext4_debug("update backup group %#04llx\n", block);
+ err = extend_or_restart_transaction(handle, 1);
+ if (err)
+ goto out;
+
+ gdb = sb_getblk(sb, block);
+ if (!gdb) {
+ err = -EIO;
+ goto out;
+ }
+
+ err = ext4_journal_get_write_access(handle, gdb);
+ if (err) {
+ brelse(gdb);
+ goto out;
+ }
+ memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data,
+ gdb->b_size);
+ set_buffer_uptodate(gdb);
+
+ err = ext4_handle_dirty_metadata(handle, NULL, gdb);
+ if (unlikely(err)) {
+ brelse(gdb);
+ goto out;
+ }
brelse(gdb);
- goto exit_journal;
}
- memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
- set_buffer_uptodate(gdb);
- err = ext4_handle_dirty_metadata(handle, NULL, gdb);
- if (unlikely(err)) {
- brelse(gdb);
- goto exit_journal;
+
+ /* Zero out all of the reserved backup group descriptor
+ * table blocks
+ */
+ if (ext4_bg_has_super(sb, group)) {
+ err = sb_issue_zeroout(sb, gdblocks + start + 1,
+ reserved_gdb, GFP_NOFS);
+ if (err)
+ goto out;
}
- brelse(gdb);
- }
- /* Zero out all of the reserved backup group descriptor table blocks */
- ext4_debug("clear inode table blocks %#04llx -> %#04lx\n",
- block, sbi->s_itb_per_group);
- err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb,
- GFP_NOFS);
- if (err)
- goto exit_journal;
+ /* Initialize group tables of the grop @group */
+ if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED))
+ goto handle_bb;
- err = extend_or_restart_transaction(handle, 2);
- if (err)
- goto exit_journal;
+ /* Zero out all of the inode table blocks */
+ block = group_data[i].inode_table;
+ ext4_debug("clear inode table blocks %#04llx -> %#04lx\n",
+ block, sbi->s_itb_per_group);
+ err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group,
+ GFP_NOFS);
+ if (err)
+ goto out;
- bh = bclean(handle, sb, input->block_bitmap);
- if (IS_ERR(bh)) {
- err = PTR_ERR(bh);
- goto exit_journal;
- }
+handle_bb:
+ if (bg_flags[i] & EXT4_BG_BLOCK_UNINIT)
+ goto handle_ib;
- if (ext4_bg_has_super(sb, input->group)) {
- ext4_debug("mark backup group tables %#04llx (+0)\n", start);
- ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + 1);
- }
+ /* Initialize block bitmap of the @group */
+ block = group_data[i].block_bitmap;
+ err = extend_or_restart_transaction(handle, 1);
+ if (err)
+ goto out;
- ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap,
- input->block_bitmap - start);
- ext4_set_bit(input->block_bitmap - start, bh->b_data);
- ext4_debug("mark inode bitmap %#04llx (+%llu)\n", input->inode_bitmap,
- input->inode_bitmap - start);
- ext4_set_bit(input->inode_bitmap - start, bh->b_data);
-
- /* Zero out all of the inode table blocks */
- block = input->inode_table;
- ext4_debug("clear inode table blocks %#04llx -> %#04lx\n",
- block, sbi->s_itb_per_group);
- err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
- if (err)
- goto exit_bh;
- ext4_set_bits(bh->b_data, input->inode_table - start,
- sbi->s_itb_per_group);
+ bh = bclean(handle, sb, block);
+ if (IS_ERR(bh)) {
+ err = PTR_ERR(bh);
+ goto out;
+ }
+ if (ext4_bg_has_super(sb, group)) {
+ ext4_debug("mark backup superblock %#04llx (+0)\n",
+ start);
+ ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb +
+ 1);
+ }
+ ext4_mark_bitmap_end(group_data[i].blocks_count,
+ sb->s_blocksize * 8, bh->b_data);
+ err = ext4_handle_dirty_metadata(handle, NULL, bh);
+ if (err)
+ goto out;
+ brelse(bh);
+handle_ib:
+ if (bg_flags[i] & EXT4_BG_INODE_UNINIT)
+ continue;
- ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8,
- bh->b_data);
- err = ext4_handle_dirty_metadata(handle, NULL, bh);
- if (unlikely(err)) {
- ext4_std_error(sb, err);
- goto exit_bh;
+ /* Initialize inode bitmap of the @group */
+ block = group_data[i].inode_bitmap;
+ err = extend_or_restart_transaction(handle, 1);
+ if (err)
+ goto out;
+ /* Mark unused entries in inode bitmap used */
+ bh = bclean(handle, sb, block);
+ if (IS_ERR(bh)) {
+ err = PTR_ERR(bh);
+ goto out;
+ }
+
+ ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb),
+ sb->s_blocksize * 8, bh->b_data);
+ err = ext4_handle_dirty_metadata(handle, NULL, bh);
+ if (err)
+ goto out;
+ brelse(bh);
}
- brelse(bh);
- /* Mark unused entries in inode bitmap used */
- ext4_debug("clear inode bitmap %#04llx (+%llu)\n",
- input->inode_bitmap, input->inode_bitmap - start);
- if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) {
- err = PTR_ERR(bh);
- goto exit_journal;
+ bh = NULL;
+
+ /* Mark group tables in block bitmap */
+ for (j = 0; j < GROUP_TABLE_COUNT; j++) {
+ count = group_table_count[j];
+ start = (&group_data[0].block_bitmap)[j];
+ block = start;
+ for (i = 1; i < flex_gd->count; i++) {
+ block += group_table_count[j];
+ if (block == (&group_data[i].block_bitmap)[j]) {
+ count += group_table_count[j];
+ continue;
+ }
+ err = set_flexbg_block_bitmap(sb, handle,
+ flex_gd, start, count);
+ if (err)
+ goto out;
+ count = group_table_count[j];
+ start = group_data[i].block_bitmap;
+ block = start;
+ }
+
+ if (count) {
+ err = set_flexbg_block_bitmap(sb, handle,
+ flex_gd, start, count);
+ if (err)
+ goto out;
+ }
}
- ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
- bh->b_data);
- err = ext4_handle_dirty_metadata(handle, NULL, bh);
- if (unlikely(err))
- ext4_std_error(sb, err);
-exit_bh:
+out:
brelse(bh);
-
-exit_journal:
- if ((err2 = ext4_journal_stop(handle)) && !err)
+ err2 = ext4_journal_stop(handle);
+ if (err2 && !err)
err = err2;
return err;
@@ -351,10 +636,10 @@ static unsigned ext4_list_backups(struct super_block *sb, unsigned *three,
* groups in current filesystem that have BACKUPS, or -ve error code.
*/
static int verify_reserved_gdb(struct super_block *sb,
+ ext4_group_t end,
struct buffer_head *primary)
{
const ext4_fsblk_t blk = primary->b_blocknr;
- const ext4_group_t end = EXT4_SB(sb)->s_groups_count;
unsigned three = 1;
unsigned five = 5;
unsigned seven = 7;
@@ -429,7 +714,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
if (!gdb_bh)
return -EIO;
- gdbackups = verify_reserved_gdb(sb, gdb_bh);
+ gdbackups = verify_reserved_gdb(sb, group, gdb_bh);
if (gdbackups < 0) {
err = gdbackups;
goto exit_bh;
@@ -592,7 +877,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
err = -EIO;
goto exit_bh;
}
- if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) {
+ gdbackups = verify_reserved_gdb(sb, group, primary[res]);
+ if (gdbackups < 0) {
brelse(primary[res]);
err = gdbackups;
goto exit_bh;
@@ -735,6 +1021,348 @@ exit_err:
}
}
+/*
+ * ext4_add_new_descs() adds @count group descriptor of groups
+ * starting at @group
+ *
+ * @handle: journal handle
+ * @sb: super block
+ * @group: the group no. of the first group desc to be added
+ * @resize_inode: the resize inode
+ * @count: number of group descriptors to be added
+ */
+static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
+ ext4_group_t group, struct inode *resize_inode,
+ ext4_group_t count)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es = sbi->s_es;
+ struct buffer_head *gdb_bh;
+ int i, gdb_off, gdb_num, err = 0;
+
+ for (i = 0; i < count; i++, group++) {
+ int reserved_gdb = ext4_bg_has_super(sb, group) ?
+ le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
+
+ gdb_off = group % EXT4_DESC_PER_BLOCK(sb);
+ gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
+
+ /*
+ * We will only either add reserved group blocks to a backup group
+ * or remove reserved blocks for the first group in a new group block.
+ * Doing both would be mean more complex code, and sane people don't
+ * use non-sparse filesystems anymore. This is already checked above.
+ */
+ if (gdb_off) {
+ gdb_bh = sbi->s_group_desc[gdb_num];
+ err = ext4_journal_get_write_access(handle, gdb_bh);
+
+ if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group))
+ err = reserve_backup_gdb(handle, resize_inode, group);
+ } else
+ err = add_new_gdb(handle, resize_inode, group);
+ if (err)
+ break;
+ }
+ return err;
+}
+
+/*
+ * ext4_setup_new_descs() will set up the group descriptor descriptors of a flex bg
+ */
+static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
+ struct ext4_new_flex_group_data *flex_gd)
+{
+ struct ext4_new_group_data *group_data = flex_gd->groups;
+ struct ext4_group_desc *gdp;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct buffer_head *gdb_bh;
+ ext4_group_t group;
+ __u16 *bg_flags = flex_gd->bg_flags;
+ int i, gdb_off, gdb_num, err = 0;
+
+
+ for (i = 0; i < flex_gd->count; i++, group_data++, bg_flags++) {
+ group = group_data->group;
+
+ gdb_off = group % EXT4_DESC_PER_BLOCK(sb);
+ gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
+
+ /*
+ * get_write_access() has been called on gdb_bh by ext4_add_new_desc().
+ */
+ gdb_bh = sbi->s_group_desc[gdb_num];
+ /* Update group descriptor block for new group */
+ gdp = (struct ext4_group_desc *)((char *)gdb_bh->b_data +
+ gdb_off * EXT4_DESC_SIZE(sb));
+
+ memset(gdp, 0, EXT4_DESC_SIZE(sb));
+ ext4_block_bitmap_set(sb, gdp, group_data->block_bitmap);
+ ext4_inode_bitmap_set(sb, gdp, group_data->inode_bitmap);
+ ext4_inode_table_set(sb, gdp, group_data->inode_table);
+ ext4_free_group_clusters_set(sb, gdp,
+ EXT4_B2C(sbi, group_data->free_blocks_count));
+ ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
+ gdp->bg_flags = cpu_to_le16(*bg_flags);
+ gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
+
+ err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
+ if (unlikely(err)) {
+ ext4_std_error(sb, err);
+ break;
+ }
+
+ /*
+ * We can allocate memory for mb_alloc based on the new group
+ * descriptor
+ */
+ err = ext4_mb_add_groupinfo(sb, group, gdp);
+ if (err)
+ break;
+ }
+ return err;
+}
+
+/*
+ * ext4_update_super() updates the super block so that the newly added
+ * groups can be seen by the filesystem.
+ *
+ * @sb: super block
+ * @flex_gd: new added groups
+ */
+static void ext4_update_super(struct super_block *sb,
+ struct ext4_new_flex_group_data *flex_gd)
+{
+ ext4_fsblk_t blocks_count = 0;
+ ext4_fsblk_t free_blocks = 0;
+ ext4_fsblk_t reserved_blocks = 0;
+ struct ext4_new_group_data *group_data = flex_gd->groups;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es = sbi->s_es;
+ int i;
+
+ BUG_ON(flex_gd->count == 0 || group_data == NULL);
+ /*
+ * Make the new blocks and inodes valid next. We do this before
+ * increasing the group count so that once the group is enabled,
+ * all of its blocks and inodes are already valid.
+ *
+ * We always allocate group-by-group, then block-by-block or
+ * inode-by-inode within a group, so enabling these
+ * blocks/inodes before the group is live won't actually let us
+ * allocate the new space yet.
+ */
+ for (i = 0; i < flex_gd->count; i++) {
+ blocks_count += group_data[i].blocks_count;
+ free_blocks += group_data[i].free_blocks_count;
+ }
+
+ reserved_blocks = ext4_r_blocks_count(es) * 100;
+ do_div(reserved_blocks, ext4_blocks_count(es));
+ reserved_blocks *= blocks_count;
+ do_div(reserved_blocks, 100);
+
+ ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count);
+ le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) *
+ flex_gd->count);
+
+ /*
+ * We need to protect s_groups_count against other CPUs seeing
+ * inconsistent state in the superblock.
+ *
+ * The precise rules we use are:
+ *
+ * * Writers must perform a smp_wmb() after updating all
+ * dependent data and before modifying the groups count
+ *
+ * * Readers must perform an smp_rmb() after reading the groups
+ * count and before reading any dependent data.
+ *
+ * NB. These rules can be relaxed when checking the group count
+ * while freeing data, as we can only allocate from a block
+ * group after serialising against the group count, and we can
+ * only then free after serialising in turn against that
+ * allocation.
+ */
+ smp_wmb();
+
+ /* Update the global fs size fields */
+ sbi->s_groups_count += flex_gd->count;
+
+ /* Update the reserved block counts only once the new group is
+ * active. */
+ ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) +
+ reserved_blocks);
+
+ /* Update the free space counts */
+ percpu_counter_add(&sbi->s_freeclusters_counter,
+ EXT4_B2C(sbi, free_blocks));
+ percpu_counter_add(&sbi->s_freeinodes_counter,
+ EXT4_INODES_PER_GROUP(sb) * flex_gd->count);
+
+ if (EXT4_HAS_INCOMPAT_FEATURE(sb,
+ EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
+ sbi->s_log_groups_per_flex) {
+ ext4_group_t flex_group;
+ flex_group = ext4_flex_group(sbi, group_data[0].group);
+ atomic_add(EXT4_B2C(sbi, free_blocks),
+ &sbi->s_flex_groups[flex_group].free_clusters);
+ atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count,
+ &sbi->s_flex_groups[flex_group].free_inodes);
+ }
+
+ if (test_opt(sb, DEBUG))
+ printk(KERN_DEBUG "EXT4-fs: added group %u:"
+ "%llu blocks(%llu free %llu reserved)\n", flex_gd->count,
+ blocks_count, free_blocks, reserved_blocks);
+}
+
+/* Add a flex group to an fs. Ensure we handle all possible error conditions
+ * _before_ we start modifying the filesystem, because we cannot abort the
+ * transaction and not have it write the data to disk.
+ */
+static int ext4_flex_group_add(struct super_block *sb,
+ struct inode *resize_inode,
+ struct ext4_new_flex_group_data *flex_gd)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es = sbi->s_es;
+ ext4_fsblk_t o_blocks_count;
+ ext4_grpblk_t last;
+ ext4_group_t group;
+ handle_t *handle;
+ unsigned reserved_gdb;
+ int err = 0, err2 = 0, credit;
+
+ BUG_ON(!flex_gd->count || !flex_gd->groups || !flex_gd->bg_flags);
+
+ reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks);
+ o_blocks_count = ext4_blocks_count(es);
+ ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
+ BUG_ON(last);
+
+ err = setup_new_flex_group_blocks(sb, flex_gd);
+ if (err)
+ goto exit;
+ /*
+ * We will always be modifying at least the superblock and GDT
+ * block. If we are adding a group past the last current GDT block,
+ * we will also modify the inode and the dindirect block. If we
+ * are adding a group with superblock/GDT backups we will also
+ * modify each of the reserved GDT dindirect blocks.
+ */
+ credit = flex_gd->count * 4 + reserved_gdb;
+ handle = ext4_journal_start_sb(sb, credit);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto exit;
+ }
+
+ err = ext4_journal_get_write_access(handle, sbi->s_sbh);
+ if (err)
+ goto exit_journal;
+
+ group = flex_gd->groups[0].group;
+ BUG_ON(group != EXT4_SB(sb)->s_groups_count);
+ err = ext4_add_new_descs(handle, sb, group,
+ resize_inode, flex_gd->count);
+ if (err)
+ goto exit_journal;
+
+ err = ext4_setup_new_descs(handle, sb, flex_gd);
+ if (err)
+ goto exit_journal;
+
+ ext4_update_super(sb, flex_gd);
+
+ err = ext4_handle_dirty_super(handle, sb);
+
+exit_journal:
+ err2 = ext4_journal_stop(handle);
+ if (!err)
+ err = err2;
+
+ if (!err) {
+ int i;
+ update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
+ sizeof(struct ext4_super_block));
+ for (i = 0; i < flex_gd->count; i++, group++) {
+ struct buffer_head *gdb_bh;
+ int gdb_num;
+ gdb_num = group / EXT4_BLOCKS_PER_GROUP(sb);
+ gdb_bh = sbi->s_group_desc[gdb_num];
+ update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data,
+ gdb_bh->b_size);
+ }
+ }
+exit:
+ return err;
+}
+
+static int ext4_setup_next_flex_gd(struct super_block *sb,
+ struct ext4_new_flex_group_data *flex_gd,
+ ext4_fsblk_t n_blocks_count,
+ unsigned long flexbg_size)
+{
+ struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+ struct ext4_new_group_data *group_data = flex_gd->groups;
+ ext4_fsblk_t o_blocks_count;
+ ext4_group_t n_group;
+ ext4_group_t group;
+ ext4_group_t last_group;
+ ext4_grpblk_t last;
+ ext4_grpblk_t blocks_per_group;
+ unsigned long i;
+
+ blocks_per_group = EXT4_BLOCKS_PER_GROUP(sb);
+
+ o_blocks_count = ext4_blocks_count(es);
+
+ if (o_blocks_count == n_blocks_count)
+ return 0;
+
+ ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
+ BUG_ON(last);
+ ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &last);
+
+ last_group = group | (flexbg_size - 1);
+ if (last_group > n_group)
+ last_group = n_group;
+
+ flex_gd->count = last_group - group + 1;
+
+ for (i = 0; i < flex_gd->count; i++) {
+ int overhead;
+
+ group_data[i].group = group + i;
+ group_data[i].blocks_count = blocks_per_group;
+ overhead = ext4_bg_has_super(sb, group + i) ?
+ (1 + ext4_bg_num_gdb(sb, group + i) +
+ le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
+ group_data[i].free_blocks_count = blocks_per_group - overhead;
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+ EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
+ flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT |
+ EXT4_BG_INODE_UNINIT;
+ else
+ flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED;
+ }
+
+ if (last_group == n_group &&
+ EXT4_HAS_RO_COMPAT_FEATURE(sb,
+ EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
+ /* We need to initialize block bitmap of last group. */
+ flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT;
+
+ if ((last_group == n_group) && (last != blocks_per_group - 1)) {
+ group_data[i - 1].blocks_count = last + 1;
+ group_data[i - 1].free_blocks_count -= blocks_per_group-
+ last - 1;
+ }
+
+ return 1;
+}
+
/* Add group descriptor data to an existing or new group descriptor block.
* Ensure we handle all possible error conditions _before_ we start modifying
* the filesystem, because we cannot abort the transaction and not have it
@@ -750,16 +1378,15 @@ exit_err:
*/
int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
{
+ struct ext4_new_flex_group_data flex_gd;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
int reserved_gdb = ext4_bg_has_super(sb, input->group) ?
le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
- struct buffer_head *primary = NULL;
- struct ext4_group_desc *gdp;
struct inode *inode = NULL;
- handle_t *handle;
int gdb_off, gdb_num;
- int err, err2;
+ int err;
+ __u16 bg_flags = 0;
gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb);
gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb);
@@ -798,175 +1425,69 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
}
- if ((err = verify_group_input(sb, input)))
- goto exit_put;
+ err = verify_group_input(sb, input);
+ if (err)
+ goto out;
- if ((err = setup_new_group_blocks(sb, input)))
- goto exit_put;
+ flex_gd.count = 1;
+ flex_gd.groups = input;
+ flex_gd.bg_flags = &bg_flags;
+ err = ext4_flex_group_add(sb, inode, &flex_gd);
+out:
+ iput(inode);
+ return err;
+} /* ext4_group_add */
- /*
- * We will always be modifying at least the superblock and a GDT
- * block. If we are adding a group past the last current GDT block,
- * we will also modify the inode and the dindirect block. If we
- * are adding a group with superblock/GDT backups we will also
- * modify each of the reserved GDT dindirect blocks.
+/*
+ * extend a group without checking assuming that checking has been done.
+ */
+static int ext4_group_extend_no_check(struct super_block *sb,
+ ext4_fsblk_t o_blocks_count, ext4_grpblk_t add)
+{
+ struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+ handle_t *handle;
+ int err = 0, err2;
+
+ /* We will update the superblock, one block bitmap, and
+ * one group descriptor via ext4_group_add_blocks().
*/
- handle = ext4_journal_start_sb(sb,
- ext4_bg_has_super(sb, input->group) ?
- 3 + reserved_gdb : 4);
+ handle = ext4_journal_start_sb(sb, 3);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
- goto exit_put;
+ ext4_warning(sb, "error %d on journal start", err);
+ return err;
}
- if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh)))
- goto exit_journal;
-
- /*
- * We will only either add reserved group blocks to a backup group
- * or remove reserved blocks for the first group in a new group block.
- * Doing both would be mean more complex code, and sane people don't
- * use non-sparse filesystems anymore. This is already checked above.
- */
- if (gdb_off) {
- primary = sbi->s_group_desc[gdb_num];
- if ((err = ext4_journal_get_write_access(handle, primary)))
- goto exit_journal;
-
- if (reserved_gdb && ext4_bg_num_gdb(sb, input->group)) {
- err = reserve_backup_gdb(handle, inode, input->group);
- if (err)
- goto exit_journal;
- }
- } else {
- /*
- * Note that we can access new group descriptor block safely
- * only if add_new_gdb() succeeds.
- */
- err = add_new_gdb(handle, inode, input->group);
- if (err)
- goto exit_journal;
- primary = sbi->s_group_desc[gdb_num];
+ err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
+ if (err) {
+ ext4_warning(sb, "error %d on journal write access", err);
+ goto errout;
}
- /*
- * OK, now we've set up the new group. Time to make it active.
- *
- * so we have to be safe wrt. concurrent accesses the group
- * data. So we need to be careful to set all of the relevant
- * group descriptor data etc. *before* we enable the group.
- *
- * The key field here is sbi->s_groups_count: as long as
- * that retains its old value, nobody is going to access the new
- * group.
- *
- * So first we update all the descriptor metadata for the new
- * group; then we update the total disk blocks count; then we
- * update the groups count to enable the group; then finally we
- * update the free space counts so that the system can start
- * using the new disk blocks.
- */
-
- /* Update group descriptor block for new group */
- gdp = (struct ext4_group_desc *)((char *)primary->b_data +
- gdb_off * EXT4_DESC_SIZE(sb));
-
- memset(gdp, 0, EXT4_DESC_SIZE(sb));
- ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
- ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
- ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
- ext4_free_group_clusters_set(sb, gdp, input->free_blocks_count);
- ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
- gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED);
- gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
-
- /*
- * We can allocate memory for mb_alloc based on the new group
- * descriptor
- */
- err = ext4_mb_add_groupinfo(sb, input->group, gdp);
+ ext4_blocks_count_set(es, o_blocks_count + add);
+ ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
+ o_blocks_count + add);
+ /* We add the blocks to the bitmap and set the group need init bit */
+ err = ext4_group_add_blocks(handle, sb, o_blocks_count, add);
if (err)
- goto exit_journal;
-
- /*
- * Make the new blocks and inodes valid next. We do this before
- * increasing the group count so that once the group is enabled,
- * all of its blocks and inodes are already valid.
- *
- * We always allocate group-by-group, then block-by-block or
- * inode-by-inode within a group, so enabling these
- * blocks/inodes before the group is live won't actually let us
- * allocate the new space yet.
- */
- ext4_blocks_count_set(es, ext4_blocks_count(es) +
- input->blocks_count);
- le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb));
-
- /*
- * We need to protect s_groups_count against other CPUs seeing
- * inconsistent state in the superblock.
- *
- * The precise rules we use are:
- *
- * * Writers must perform a smp_wmb() after updating all dependent
- * data and before modifying the groups count
- *
- * * Readers must perform an smp_rmb() after reading the groups count
- * and before reading any dependent data.
- *
- * NB. These rules can be relaxed when checking the group count
- * while freeing data, as we can only allocate from a block
- * group after serialising against the group count, and we can
- * only then free after serialising in turn against that
- * allocation.
- */
- smp_wmb();
-
- /* Update the global fs size fields */
- sbi->s_groups_count++;
-
- err = ext4_handle_dirty_metadata(handle, NULL, primary);
- if (unlikely(err)) {
- ext4_std_error(sb, err);
- goto exit_journal;
- }
-
- /* Update the reserved block counts only once the new group is
- * active. */
- ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) +
- input->reserved_blocks);
-
- /* Update the free space counts */
- percpu_counter_add(&sbi->s_freeclusters_counter,
- EXT4_B2C(sbi, input->free_blocks_count));
- percpu_counter_add(&sbi->s_freeinodes_counter,
- EXT4_INODES_PER_GROUP(sb));
-
- if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
- sbi->s_log_groups_per_flex) {
- ext4_group_t flex_group;
- flex_group = ext4_flex_group(sbi, input->group);
- atomic_add(EXT4_B2C(sbi, input->free_blocks_count),
- &sbi->s_flex_groups[flex_group].free_clusters);
- atomic_add(EXT4_INODES_PER_GROUP(sb),
- &sbi->s_flex_groups[flex_group].free_inodes);
- }
-
+ goto errout;
ext4_handle_dirty_super(handle, sb);
-
-exit_journal:
- if ((err2 = ext4_journal_stop(handle)) && !err)
+ ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
+ o_blocks_count + add);
+errout:
+ err2 = ext4_journal_stop(handle);
+ if (err2 && !err)
err = err2;
- if (!err && primary) {
- update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
+
+ if (!err) {
+ if (test_opt(sb, DEBUG))
+ printk(KERN_DEBUG "EXT4-fs: extended group to %llu "
+ "blocks\n", ext4_blocks_count(es));
+ update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es,
sizeof(struct ext4_super_block));
- update_backups(sb, primary->b_blocknr, primary->b_data,
- primary->b_size);
}
-exit_put:
- iput(inode);
return err;
-} /* ext4_group_add */
+}
/*
* Extend the filesystem to the new number of blocks specified. This entry
@@ -985,8 +1506,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
ext4_grpblk_t last;
ext4_grpblk_t add;
struct buffer_head *bh;
- handle_t *handle;
- int err, err2;
+ int err;
ext4_group_t group;
o_blocks_count = ext4_blocks_count(es);
@@ -1042,42 +1562,119 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
}
brelse(bh);
- /* We will update the superblock, one block bitmap, and
- * one group descriptor via ext4_free_blocks().
- */
- handle = ext4_journal_start_sb(sb, 3);
- if (IS_ERR(handle)) {
- err = PTR_ERR(handle);
- ext4_warning(sb, "error %d on journal start", err);
- goto exit_put;
+ err = ext4_group_extend_no_check(sb, o_blocks_count, add);
+ return err;
+} /* ext4_group_extend */
+
+/*
+ * ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count
+ *
+ * @sb: super block of the fs to be resized
+ * @n_blocks_count: the number of blocks resides in the resized fs
+ */
+int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
+{
+ struct ext4_new_flex_group_data *flex_gd = NULL;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es = sbi->s_es;
+ struct buffer_head *bh;
+ struct inode *resize_inode;
+ ext4_fsblk_t o_blocks_count;
+ ext4_group_t o_group;
+ ext4_group_t n_group;
+ ext4_grpblk_t offset;
+ unsigned long n_desc_blocks;
+ unsigned long o_desc_blocks;
+ unsigned long desc_blocks;
+ int err = 0, flexbg_size = 1;
+
+ o_blocks_count = ext4_blocks_count(es);
+
+ if (test_opt(sb, DEBUG))
+ printk(KERN_DEBUG "EXT4-fs: resizing filesystem from %llu "
+ "upto %llu blocks\n", o_blocks_count, n_blocks_count);
+
+ if (n_blocks_count < o_blocks_count) {
+ /* On-line shrinking not supported */
+ ext4_warning(sb, "can't shrink FS - resize aborted");
+ return -EINVAL;
}
- if ((err = ext4_journal_get_write_access(handle,
- EXT4_SB(sb)->s_sbh))) {
- ext4_warning(sb, "error %d on journal write access", err);
- ext4_journal_stop(handle);
- goto exit_put;
+ if (n_blocks_count == o_blocks_count)
+ /* Nothing need to do */
+ return 0;
+
+ ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset);
+ ext4_get_group_no_and_offset(sb, o_blocks_count, &o_group, &offset);
+
+ n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) /
+ EXT4_DESC_PER_BLOCK(sb);
+ o_desc_blocks = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
+ EXT4_DESC_PER_BLOCK(sb);
+ desc_blocks = n_desc_blocks - o_desc_blocks;
+
+ if (desc_blocks &&
+ (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE) ||
+ le16_to_cpu(es->s_reserved_gdt_blocks) < desc_blocks)) {
+ ext4_warning(sb, "No reserved GDT blocks, can't resize");
+ return -EPERM;
}
- ext4_blocks_count_set(es, o_blocks_count + add);
- ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
- o_blocks_count + add);
- /* We add the blocks to the bitmap and set the group need init bit */
- err = ext4_group_add_blocks(handle, sb, o_blocks_count, add);
- ext4_handle_dirty_super(handle, sb);
- ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
- o_blocks_count + add);
- err2 = ext4_journal_stop(handle);
- if (!err && err2)
- err = err2;
- if (err)
- goto exit_put;
+ resize_inode = ext4_iget(sb, EXT4_RESIZE_INO);
+ if (IS_ERR(resize_inode)) {
+ ext4_warning(sb, "Error opening resize inode");
+ return PTR_ERR(resize_inode);
+ }
+ /* See if the device is actually as big as what was requested */
+ bh = sb_bread(sb, n_blocks_count - 1);
+ if (!bh) {
+ ext4_warning(sb, "can't read last block, resize aborted");
+ return -ENOSPC;
+ }
+ brelse(bh);
+
+ if (offset != 0) {
+ /* extend the last group */
+ ext4_grpblk_t add;
+ add = EXT4_BLOCKS_PER_GROUP(sb) - offset;
+ err = ext4_group_extend_no_check(sb, o_blocks_count, add);
+ if (err)
+ goto out;
+ }
+
+ if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
+ es->s_log_groups_per_flex)
+ flexbg_size = 1 << es->s_log_groups_per_flex;
+
+ o_blocks_count = ext4_blocks_count(es);
+ if (o_blocks_count == n_blocks_count)
+ goto out;
+
+ flex_gd = alloc_flex_gd(flexbg_size);
+ if (flex_gd == NULL) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /* Add flex groups. Note that a regular group is a
+ * flex group with 1 group.
+ */
+ while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count,
+ flexbg_size)) {
+ ext4_alloc_group_tables(sb, flex_gd, flexbg_size);
+ err = ext4_flex_group_add(sb, resize_inode, flex_gd);
+ if (unlikely(err))
+ break;
+ }
+
+out:
+ if (flex_gd)
+ free_flex_gd(flex_gd);
+
+ iput(resize_inode);
if (test_opt(sb, DEBUG))
- printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n",
- ext4_blocks_count(es));
- update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es,
- sizeof(struct ext4_super_block));
-exit_put:
+ printk(KERN_DEBUG "EXT4-fs: resized filesystem from %llu "
+ "upto %llu blocks\n", o_blocks_count, n_blocks_count);
return err;
-} /* ext4_group_extend */
+}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ed3ce82e2de4..502c61fd7392 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1095,7 +1095,7 @@ static int ext4_show_options(struct seq_file *seq, struct dentry *root)
}
if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) {
seq_printf(seq, ",max_batch_time=%u",
- (unsigned) sbi->s_min_batch_time);
+ (unsigned) sbi->s_max_batch_time);
}
/*
@@ -2005,17 +2005,16 @@ static int ext4_fill_flex_info(struct super_block *sb)
struct ext4_group_desc *gdp = NULL;
ext4_group_t flex_group_count;
ext4_group_t flex_group;
- int groups_per_flex = 0;
+ unsigned int groups_per_flex = 0;
size_t size;
int i;
sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
- groups_per_flex = 1 << sbi->s_log_groups_per_flex;
-
- if (groups_per_flex < 2) {
+ if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
sbi->s_log_groups_per_flex = 0;
return 1;
}
+ groups_per_flex = 1 << sbi->s_log_groups_per_flex;
/* We allocate both existing and potentially added groups */
flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
@@ -3506,7 +3505,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
* of the filesystem.
*/
if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
- ext4_msg(sb, KERN_WARNING, "bad geometry: first data"
+ ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
"block %u is beyond end of filesystem (%llu)",
le32_to_cpu(es->s_first_data_block),
ext4_blocks_count(es));
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index b60f9f81e33c..d2a200624af5 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -47,8 +47,9 @@ ext4_xattr_security_set(struct dentry *dentry, const char *name,
name, value, size, flags);
}
-int ext4_initxattrs(struct inode *inode, const struct xattr *xattr_array,
- void *fs_info)
+static int
+ext4_initxattrs(struct inode *inode, const struct xattr *xattr_array,
+ void *fs_info)
{
const struct xattr *xattr;
handle_t *handle = fs_info;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 68d704db787f..5069b8475150 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -430,6 +430,12 @@ void jbd2_journal_commit_transaction(journal_t *journal)
jbd_debug(3, "JBD2: commit phase 1\n");
/*
+ * Clear revoked flag to reflect there is no revoked buffers
+ * in the next transaction which is going to be started.
+ */
+ jbd2_clear_buffer_revoked_flags(journal);
+
+ /*
* Switch to a new revoke table.
*/
jbd2_journal_switch_revoke_table(journal);
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 69fd93588118..30b2867d6cc9 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -47,6 +47,10 @@
* overwriting the new data. We don't even need to clear the revoke
* bit here.
*
+ * We cache revoke status of a buffer in the current transaction in b_states
+ * bits. As the name says, revokevalid flag indicates that the cached revoke
+ * status of a buffer is valid and we can rely on the cached status.
+ *
* Revoke information on buffers is a tri-state value:
*
* RevokeValid clear: no cached revoke status, need to look it up
@@ -478,6 +482,36 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
return did_revoke;
}
+/*
+ * journal_clear_revoked_flag clears revoked flag of buffers in
+ * revoke table to reflect there is no revoked buffers in the next
+ * transaction which is going to be started.
+ */
+void jbd2_clear_buffer_revoked_flags(journal_t *journal)
+{
+ struct jbd2_revoke_table_s *revoke = journal->j_revoke;
+ int i = 0;
+
+ for (i = 0; i < revoke->hash_size; i++) {
+ struct list_head *hash_list;
+ struct list_head *list_entry;
+ hash_list = &revoke->hash_table[i];
+
+ list_for_each(list_entry, hash_list) {
+ struct jbd2_revoke_record_s *record;
+ struct buffer_head *bh;
+ record = (struct jbd2_revoke_record_s *)list_entry;
+ bh = __find_get_block(journal->j_fs_dev,
+ record->blocknr,
+ journal->j_blocksize);
+ if (bh) {
+ clear_buffer_revoked(bh);
+ __brelse(bh);
+ }
+ }
+ }
+}
+
/* journal_switch_revoke table select j_revoke for next transaction
* we do not want to suspend any processing until all revokes are
* written -bzzz
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index a0e41a4c080e..35ae096bed5d 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -517,12 +517,13 @@ void jbd2_journal_lock_updates(journal_t *journal)
break;
spin_lock(&transaction->t_handle_lock);
+ prepare_to_wait(&journal->j_wait_updates, &wait,
+ TASK_UNINTERRUPTIBLE);
if (!atomic_read(&transaction->t_updates)) {
spin_unlock(&transaction->t_handle_lock);
+ finish_wait(&journal->j_wait_updates, &wait);
break;
}
- prepare_to_wait(&journal->j_wait_updates, &wait,
- TASK_UNINTERRUPTIBLE);
spin_unlock(&transaction->t_handle_lock);
write_unlock(&journal->j_state_lock);
schedule();
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 2092ea21e469..5557baefed60 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1151,6 +1151,7 @@ extern int jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t);
extern int jbd2_journal_test_revoke(journal_t *, unsigned long long, tid_t);
extern void jbd2_journal_clear_revoke(journal_t *);
extern void jbd2_journal_switch_revoke_table(journal_t *journal);
+extern void jbd2_clear_buffer_revoked_flags(journal_t *journal);
/*
* The log thread user interface:
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 748ff7cbe555..319538bf17d2 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -573,9 +573,9 @@ TRACE_EVENT(ext4_mb_release_inode_pa,
);
TRACE_EVENT(ext4_mb_release_group_pa,
- TP_PROTO(struct ext4_prealloc_space *pa),
+ TP_PROTO(struct super_block *sb, struct ext4_prealloc_space *pa),
- TP_ARGS(pa),
+ TP_ARGS(sb, pa),
TP_STRUCT__entry(
__field( dev_t, dev )
@@ -585,7 +585,7 @@ TRACE_EVENT(ext4_mb_release_group_pa,
),
TP_fast_assign(
- __entry->dev = pa->pa_inode->i_sb->s_dev;
+ __entry->dev = sb->s_dev;
__entry->pa_pstart = pa->pa_pstart;
__entry->pa_len = pa->pa_len;
),