diff options
author | Omar Sandoval <osandov@fb.com> | 2020-04-16 14:46:14 -0700 |
---|---|---|
committer | David Sterba <dsterba@suse.com> | 2020-05-25 11:25:25 +0200 |
commit | c7333972b9b571a03bf9aecd1aeecfab81243e8d (patch) | |
tree | 64243b0e65485fb237f416d0d2c5c2c69ea7bba9 /fs/btrfs/extent_io.c | |
parent | c36cac28cb94e58f7e21ff43bdc6064346dab32c (diff) |
btrfs: look at full bi_io_vec for repair decision
Read repair does two things: it finds a good copy of data to return to
the reader, and it corrects the bad copy on disk. If a read of multiple
sectors has an I/O error, repair does an extra "validation" step that
issues a separate read for each sector. This allows us to find the exact
failing sectors and only rewrite those.
This heuristic is implemented in
bio_readpage_error()/btrfs_check_repairable() as:
failed_bio_pages = failed_bio->bi_iter.bi_size >> PAGE_SHIFT;
if (failed_bio_pages > 1)
do validation
However, at this point, bi_iter may have already been advanced. This
means that we'll skip the validation step and rewrite the entire failed
read.
Fix it by getting the actual size from the biovec (which we can do
because this is only called for non-cloned bios, although that will
change in a later commit).
Fixes: 8a2ee44a371c ("btrfs: look at bi_size for repair decisions")
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 33 |
1 files changed, 27 insertions, 6 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 39e45b8a5031..fcf2ff9ae4a8 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2537,8 +2537,9 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, return 0; } -bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages, - struct io_failure_record *failrec, int failed_mirror) +bool btrfs_check_repairable(struct inode *inode, bool needs_validation, + struct io_failure_record *failrec, + int failed_mirror) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); int num_copies; @@ -2561,7 +2562,7 @@ bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages, * a) deliver good data to the caller * b) correct the bad sectors on disk */ - if (failed_bio_pages > 1) { + if (needs_validation) { /* * to fulfill b), we need to know the exact failing sectors, as * we don't want to rewrite any more than the failed ones. thus, @@ -2633,6 +2634,24 @@ struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio, return bio; } +static bool btrfs_io_needs_validation(struct inode *inode, struct bio *bio) +{ + struct bio_vec *bvec; + u64 len = 0; + int i; + + /* + * We need to validate each sector individually if the failed I/O was + * for multiple sectors. + */ + bio_for_each_bvec_all(bvec, bio, i) { + len += bvec->bv_len; + if (len > inode->i_sb->s_blocksize) + return true; + } + return false; +} + /* * This is a generic handler for readpage errors. If other copies exist, read * those and write back good data to the failed position. Does not investigate @@ -2647,11 +2666,11 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, struct inode *inode = page->mapping->host; struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; + bool need_validation; struct bio *bio; int read_mode = 0; blk_status_t status; int ret; - unsigned failed_bio_pages = failed_bio->bi_iter.bi_size >> PAGE_SHIFT; BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); @@ -2659,13 +2678,15 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, if (ret) return ret; - if (!btrfs_check_repairable(inode, failed_bio_pages, failrec, + need_validation = btrfs_io_needs_validation(inode, failed_bio); + + if (!btrfs_check_repairable(inode, need_validation, failrec, failed_mirror)) { free_io_failure(failure_tree, tree, failrec); return -EIO; } - if (failed_bio_pages > 1) + if (need_validation) read_mode |= REQ_FAILFAST_DEV; phy_offset >>= inode->i_sb->s_blocksize_bits; |