diff options
author | Jens Axboe <axboe@kernel.dk> | 2020-07-22 14:11:38 -0600 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2020-07-22 14:11:38 -0600 |
commit | ef67744e7a4c82e246cd9831208d07249c519d22 (patch) | |
tree | b309aa7ea7f89b1aecdb44844cb1d3a75daecbcf | |
parent | 659bf827ba8f1183b714341d8a1d4b1e446178d9 (diff) | |
parent | fe630de009d0729584d79c78f43121e07c745fdc (diff) |
Merge branch 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-5.9/drivers
Pull MD for 5.9 from Song.
* 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md:
md/raid10: avoid deadlock on recovery.
raid: md_p.h: drop duplicated word in a comment
md-cluster: fix rmmod issue when md_cluster convert bitmap to none
md-cluster: fix safemode_delay value when converting to clustered bitmap
md/raid5: support config stripe_size by sysfs entry
md/raid5: set default stripe_size as 4096
md/raid456: convert macro STRIPE_* to RAID5_STRIPE_*
raid5: remove the meaningless check in raid5_make_request
raid5: put the comment of clear_batch_ready to the right place
raid5: call clear_batch_ready before set STRIPE_ACTIVE
md: raid10: Fix compilation warning
md: raid5: Fix compilation warning
md: raid5-cache: Remove set but unused variable
md: Fix compilation warning
-rw-r--r-- | drivers/md/md.c | 24 | ||||
-rw-r--r-- | drivers/md/raid10.c | 18 | ||||
-rw-r--r-- | drivers/md/raid5-cache.c | 12 | ||||
-rw-r--r-- | drivers/md/raid5-ppl.c | 11 | ||||
-rw-r--r-- | drivers/md/raid5.c | 340 | ||||
-rw-r--r-- | drivers/md/raid5.h | 53 | ||||
-rw-r--r-- | include/uapi/linux/raid/md_p.h | 2 |
7 files changed, 298 insertions, 162 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index fc33f2f8a415..ea48bc25cce1 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -101,6 +101,8 @@ static void mddev_detach(struct mddev *mddev); * count by 2 for every hour elapsed between read errors. */ #define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20 +/* Default safemode delay: 200 msec */ +#define DEFAULT_SAFEMODE_DELAY ((200 * HZ)/1000 +1) /* * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' * is 1000 KB/sec, so the extra system load does not show up that much. @@ -2469,8 +2471,8 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) goto fail; ko = &part_to_dev(rdev->bdev->bd_part)->kobj; - if (sysfs_create_link(&rdev->kobj, ko, "block")) - /* failure here is OK */; + /* failure here is OK */ + err = sysfs_create_link(&rdev->kobj, ko, "block"); rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state"); rdev->sysfs_unack_badblocks = sysfs_get_dirent_safe(rdev->kobj.sd, "unacknowledged_bad_blocks"); @@ -3238,8 +3240,8 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len) return err; } else sysfs_notify_dirent_safe(rdev->sysfs_state); - if (sysfs_link_rdev(rdev->mddev, rdev)) - /* failure here is OK */; + /* failure here is OK */; + sysfs_link_rdev(rdev->mddev, rdev); /* don't wakeup anyone, leave that to userspace. */ } else { if (slot >= rdev->mddev->raid_disks && @@ -6034,7 +6036,7 @@ int md_run(struct mddev *mddev) if (mddev_is_clustered(mddev)) mddev->safemode_delay = 0; else - mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */ + mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY; mddev->in_sync = 1; smp_wmb(); spin_lock(&mddev->lock); @@ -7413,6 +7415,8 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) mddev->bitmap_info.nodes = 0; md_cluster_ops->leave(mddev); + module_put(md_cluster_mod); + mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY; } mddev_suspend(mddev); md_bitmap_destroy(mddev); @@ -8408,6 +8412,7 @@ EXPORT_SYMBOL(unregister_md_cluster_operations); int md_setup_cluster(struct mddev *mddev, int nodes) { + int ret; if (!md_cluster_ops) request_module("md-cluster"); spin_lock(&pers_lock); @@ -8419,7 +8424,10 @@ int md_setup_cluster(struct mddev *mddev, int nodes) } spin_unlock(&pers_lock); - return md_cluster_ops->join(mddev, nodes); + ret = md_cluster_ops->join(mddev, nodes); + if (!ret) + mddev->safemode_delay = 0; + return ret; } void md_cluster_stop(struct mddev *mddev) @@ -9113,8 +9121,8 @@ static int remove_and_add_spares(struct mddev *mddev, rdev->recovery_offset = 0; } if (mddev->pers->hot_add_disk(mddev, rdev) == 0) { - if (sysfs_link_rdev(mddev, rdev)) - /* failure here is OK */; + /* failure here is OK */ + sysfs_link_rdev(mddev, rdev); if (!test_bit(Journal, &rdev->flags)) spares++; md_new_event(mddev); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 2c47474fa69d..cefda2abd34f 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -980,6 +980,7 @@ static void wait_barrier(struct r10conf *conf) { spin_lock_irq(&conf->resync_lock); if (conf->barrier) { + struct bio_list *bio_list = current->bio_list; conf->nr_waiting++; /* Wait for the barrier to drop. * However if there are already pending @@ -994,9 +995,16 @@ static void wait_barrier(struct r10conf *conf) wait_event_lock_irq(conf->wait_barrier, !conf->barrier || (atomic_read(&conf->nr_pending) && - current->bio_list && - (!bio_list_empty(¤t->bio_list[0]) || - !bio_list_empty(¤t->bio_list[1]))), + bio_list && + (!bio_list_empty(&bio_list[0]) || + !bio_list_empty(&bio_list[1]))) || + /* move on if recovery thread is + * blocked by us + */ + (conf->mddev->thread->tsk == current && + test_bit(MD_RECOVERY_RUNNING, + &conf->mddev->recovery) && + conf->nr_queued > 0), conf->resync_lock); conf->nr_waiting--; if (!conf->nr_waiting) @@ -4307,8 +4315,8 @@ out: else rdev->recovery_offset = 0; - if (sysfs_link_rdev(mddev, rdev)) - /* Failure here is OK */; + /* Failure here is OK */ + sysfs_link_rdev(mddev, rdev); } } else if (rdev->raid_disk >= conf->prev.raid_disks && !test_bit(Faulty, &rdev->flags)) { diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 0bea21d81697..82eb4a906e31 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -195,9 +195,7 @@ struct r5l_log { static inline sector_t r5c_tree_index(struct r5conf *conf, sector_t sect) { - sector_t offset; - - offset = sector_div(sect, conf->chunk_sectors); + sector_div(sect, conf->chunk_sectors); return sect; } @@ -298,8 +296,8 @@ r5c_return_dev_pending_writes(struct r5conf *conf, struct r5dev *dev) wbi = dev->written; dev->written = NULL; while (wbi && wbi->bi_iter.bi_sector < - dev->sector + STRIPE_SECTORS) { - wbi2 = r5_next_bio(wbi, dev->sector); + dev->sector + RAID5_STRIPE_SECTORS(conf)) { + wbi2 = r5_next_bio(conf, wbi, dev->sector); md_write_end(conf->mddev); bio_endio(wbi); wbi = wbi2; @@ -316,7 +314,7 @@ void r5c_handle_cached_data_endio(struct r5conf *conf, set_bit(R5_UPTODATE, &sh->dev[i].flags); r5c_return_dev_pending_writes(conf, &sh->dev[i]); md_bitmap_endwrite(conf->mddev->bitmap, sh->sector, - STRIPE_SECTORS, + RAID5_STRIPE_SECTORS(conf), !test_bit(STRIPE_DEGRADED, &sh->state), 0); } @@ -364,7 +362,7 @@ void r5c_check_cached_full_stripe(struct r5conf *conf) */ if (atomic_read(&conf->r5c_cached_full_stripes) >= min(R5C_FULL_STRIPE_FLUSH_BATCH(conf), - conf->chunk_sectors >> STRIPE_SHIFT)) + conf->chunk_sectors >> RAID5_STRIPE_SHIFT(conf))) r5l_wake_reclaim(conf->log, 0); } diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index a750f4bbb5d9..d0f540296fe9 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -324,7 +324,7 @@ static int ppl_log_stripe(struct ppl_log *log, struct stripe_head *sh) * be just after the last logged stripe and write to the same * disks. Use bit shift and logarithm to avoid 64-bit division. */ - if ((sh->sector == sh_last->sector + STRIPE_SECTORS) && + if ((sh->sector == sh_last->sector + RAID5_STRIPE_SECTORS(conf)) && (data_sector >> ilog2(conf->chunk_sectors) == data_sector_last >> ilog2(conf->chunk_sectors)) && ((data_sector - data_sector_last) * data_disks == @@ -844,9 +844,9 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e, /* if start and end is 4k aligned, use a 4k block */ if (block_size == 512 && - (r_sector_first & (STRIPE_SECTORS - 1)) == 0 && - (r_sector_last & (STRIPE_SECTORS - 1)) == 0) - block_size = STRIPE_SIZE; + (r_sector_first & (RAID5_STRIPE_SECTORS(conf) - 1)) == 0 && + (r_sector_last & (RAID5_STRIPE_SECTORS(conf) - 1)) == 0) + block_size = RAID5_STRIPE_SIZE(conf); /* iterate through blocks in strip */ for (i = 0; i < strip_sectors; i += (block_size >> 9)) { @@ -1274,7 +1274,8 @@ static int ppl_validate_rdev(struct md_rdev *rdev) ppl_data_sectors = rdev->ppl.size - (PPL_HEADER_SIZE >> 9); if (ppl_data_sectors > 0) - ppl_data_sectors = rounddown(ppl_data_sectors, STRIPE_SECTORS); + ppl_data_sectors = rounddown(ppl_data_sectors, + RAID5_STRIPE_SECTORS((struct r5conf *)rdev->mddev->private)); if (ppl_data_sectors <= 0) { pr_warn("md/raid:%s: PPL space too small on %s\n", diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index a4fbafa5b8f8..40961dd1777b 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -69,13 +69,13 @@ static struct workqueue_struct *raid5_wq; static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect) { - int hash = (sect >> STRIPE_SHIFT) & HASH_MASK; + int hash = (sect >> RAID5_STRIPE_SHIFT(conf)) & HASH_MASK; return &conf->stripe_hashtbl[hash]; } -static inline int stripe_hash_locks_hash(sector_t sect) +static inline int stripe_hash_locks_hash(struct r5conf *conf, sector_t sect) { - return (sect >> STRIPE_SHIFT) & STRIPE_HASH_LOCKS_MASK; + return (sect >> RAID5_STRIPE_SHIFT(conf)) & STRIPE_HASH_LOCKS_MASK; } static inline void lock_device_hash_lock(struct r5conf *conf, int hash) @@ -627,7 +627,7 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector, int previous, int noblock, int noquiesce) { struct stripe_head *sh; - int hash = stripe_hash_locks_hash(sector); + int hash = stripe_hash_locks_hash(conf, sector); int inc_empty_inactive_list_flag; pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector); @@ -748,9 +748,9 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh tmp_sec = sh->sector; if (!sector_div(tmp_sec, conf->chunk_sectors)) return; - head_sector = sh->sector - STRIPE_SECTORS; + head_sector = sh->sector - RAID5_STRIPE_SECTORS(conf); - hash = stripe_hash_locks_hash(head_sector); + hash = stripe_hash_locks_hash(conf, head_sector); spin_lock_irq(conf->hash_locks + hash); head = __find_stripe(conf, head_sector, conf->generation); if (head && !atomic_inc_not_zero(&head->count)) { @@ -1057,7 +1057,7 @@ again: test_bit(WriteErrorSeen, &rdev->flags)) { sector_t first_bad; int bad_sectors; - int bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS, + int bad = is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), &first_bad, &bad_sectors); if (!bad) break; @@ -1089,7 +1089,7 @@ again: if (rdev) { if (s->syncing || s->expanding || s->expanded || s->replacing) - md_sync_acct(rdev->bdev, STRIPE_SECTORS); + md_sync_acct(rdev->bdev, RAID5_STRIPE_SECTORS(conf)); set_bit(STRIPE_IO_STARTED, &sh->state); @@ -1129,9 +1129,9 @@ again: else sh->dev[i].vec.bv_page = sh->dev[i].page; bi->bi_vcnt = 1; - bi->bi_io_vec[0].bv_len = STRIPE_SIZE; + bi->bi_io_vec[0].bv_len = RAID5_STRIPE_SIZE(conf); bi->bi_io_vec[0].bv_offset = 0; - bi->bi_iter.bi_size = STRIPE_SIZE; + bi->bi_iter.bi_size = RAID5_STRIPE_SIZE(conf); bi->bi_write_hint = sh->dev[i].write_hint; if (!rrdev) sh->dev[i].write_hint = RWH_WRITE_LIFE_NOT_SET; @@ -1156,7 +1156,7 @@ again: if (rrdev) { if (s->syncing || s->expanding || s->expanded || s->replacing) - md_sync_acct(rrdev->bdev, STRIPE_SECTORS); + md_sync_acct(rrdev->bdev, RAID5_STRIPE_SECTORS(conf)); set_bit(STRIPE_IO_STARTED, &sh->state); @@ -1183,9 +1183,9 @@ again: WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags)); sh->dev[i].rvec.bv_page = sh->dev[i].page; rbi->bi_vcnt = 1; - rbi->bi_io_vec[0].bv_len = STRIPE_SIZE; + rbi->bi_io_vec[0].bv_len = RAID5_STRIPE_SIZE(conf); rbi->bi_io_vec[0].bv_offset = 0; - rbi->bi_iter.bi_size = STRIPE_SIZE; + rbi->bi_iter.bi_size = RAID5_STRIPE_SIZE(conf); rbi->bi_write_hint = sh->dev[i].write_hint; sh->dev[i].write_hint = RWH_WRITE_LIFE_NOT_SET; /* @@ -1235,6 +1235,7 @@ async_copy_data(int frombio, struct bio *bio, struct page **page, int page_offset; struct async_submit_ctl submit; enum async_tx_flags flags = 0; + struct r5conf *conf = sh->raid_conf; if (bio->bi_iter.bi_sector >= sector) page_offset = (signed)(bio->bi_iter.bi_sector - sector) * 512; @@ -1256,8 +1257,8 @@ async_copy_data(int frombio, struct bio *bio, struct page **page, len -= b_offset; } - if (len > 0 && page_offset + len > STRIPE_SIZE) - clen = STRIPE_SIZE - page_offset; + if (len > 0 && page_offset + len > RAID5_STRIPE_SIZE(conf)) + clen = RAID5_STRIPE_SIZE(conf) - page_offset; else clen = len; @@ -1265,9 +1266,9 @@ async_copy_data(int frombio, struct bio *bio, struct page **page, b_offset += bvl.bv_offset; bio_page = bvl.bv_page; if (frombio) { - if (sh->raid_conf->skip_copy && + if (conf->skip_copy && b_offset == 0 && page_offset == 0 && - clen == STRIPE_SIZE && + clen == RAID5_STRIPE_SIZE(conf) && !no_skipcopy) *page = bio_page; else @@ -1292,6 +1293,7 @@ static void ops_complete_biofill(void *stripe_head_ref) { struct stripe_head *sh = stripe_head_ref; int i; + struct r5conf *conf = sh->raid_conf; pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector); @@ -1312,8 +1314,8 @@ static void ops_complete_biofill(void *stripe_head_ref) rbi = dev->read; dev->read = NULL; while (rbi && rbi->bi_iter.bi_sector < - dev->sector + STRIPE_SECTORS) { - rbi2 = r5_next_bio(rbi, dev->sector); + dev->sector + RAID5_STRIPE_SECTORS(conf)) { + rbi2 = r5_next_bio(conf, rbi, dev->sector); bio_endio(rbi); rbi = rbi2; } @@ -1330,6 +1332,7 @@ static void ops_run_biofill(struct stripe_head *sh) struct dma_async_tx_descriptor *tx = NULL; struct async_submit_ctl submit; int i; + struct r5conf *conf = sh->raid_conf; BUG_ON(sh->batch_head); pr_debug("%s: stripe %llu\n", __func__, @@ -1344,10 +1347,10 @@ static void ops_run_biofill(struct stripe_head *sh) dev->toread = NULL; spin_unlock_irq(&sh->stripe_lock); while (rbi && rbi->bi_iter.bi_sector < - dev->sector + STRIPE_SECTORS) { + dev->sector + RAID5_STRIPE_SECTORS(conf)) { tx = async_copy_data(0, rbi, &dev->page, dev->sector, tx, sh, 0); - rbi = r5_next_bio(rbi, dev->sector); + rbi = r5_next_bio(conf, rbi, dev->sector); } } } @@ -1429,9 +1432,11 @@ ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu) init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL, ops_complete_compute, sh, to_addr_conv(sh, percpu, 0)); if (unlikely(count == 1)) - tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit); + tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, + RAID5_STRIPE_SIZE(sh->raid_conf), &submit); else - tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); + tx = async_xor(xor_dest, xor_srcs, 0, count, + RAID5_STRIPE_SIZE(sh->raid_conf), &submit); return tx; } @@ -1522,7 +1527,8 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu) init_async_submit(&submit, ASYNC_TX_FENCE, NULL, ops_complete_compute, sh, to_addr_conv(sh, percpu, 0)); - tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit); + tx = async_gen_syndrome(blocks, 0, count+2, + RAID5_STRIPE_SIZE(sh->raid_conf), &submit); } else { /* Compute any data- or p-drive using XOR */ count = 0; @@ -1535,7 +1541,8 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu) init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL, ops_complete_compute, sh, to_addr_conv(sh, percpu, 0)); - tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit); + tx = async_xor(dest, blocks, 0, count, + RAID5_STRIPE_SIZE(sh->raid_conf), &submit); } return tx; @@ -1598,7 +1605,8 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) ops_complete_compute, sh, to_addr_conv(sh, percpu, 0)); return async_gen_syndrome(blocks, 0, syndrome_disks+2, - STRIPE_SIZE, &submit); + RAID5_STRIPE_SIZE(sh->raid_conf), + &submit); } else { struct page *dest; int data_target; @@ -1621,7 +1629,8 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL, NULL, NULL, to_addr_conv(sh, percpu, 0)); - tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, + tx = async_xor(dest, blocks, 0, count, + RAID5_STRIPE_SIZE(sh->raid_conf), &submit); count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_ALL); @@ -1629,7 +1638,8 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) ops_complete_compute, sh, to_addr_conv(sh, percpu, 0)); return async_gen_syndrome(blocks, 0, count+2, - STRIPE_SIZE, &submit); + RAID5_STRIPE_SIZE(sh->raid_conf), + &submit); } } else { init_async_submit(&submit, ASYNC_TX_FENCE, NULL, @@ -1638,13 +1648,15 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu) if (failb == syndrome_disks) { /* We're missing D+P. */ return async_raid6_datap_recov(syndrome_disks+2, - STRIPE_SIZE, faila, - blocks, &submit); + RAID5_STRIPE_SIZE(sh->raid_conf), + faila, + blocks, &submit); } else { /* We're missing D+D. */ return async_raid6_2data_recov(syndrome_disks+2, - STRIPE_SIZE, faila, failb, - blocks, &submit); + RAID5_STRIPE_SIZE(sh->raid_conf), + faila, failb, + blocks, &submit); } } } @@ -1691,7 +1703,8 @@ ops_run_prexor5(struct stripe_head *sh, struct raid5_percpu *percpu, init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx, ops_complete_prexor, sh, to_addr_conv(sh, percpu, 0)); - tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); + tx = async_xor(xor_dest, xor_srcs, 0, count, + RAID5_STRIPE_SIZE(sh->raid_conf), &submit); return tx; } @@ -1711,7 +1724,8 @@ ops_run_prexor6(struct stripe_head *sh, struct raid5_percpu *percpu, init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_PQ_XOR_DST, tx, ops_complete_prexor, sh, to_addr_conv(sh, percpu, 0)); - tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit); + tx = async_gen_syndrome(blocks, 0, count+2, + RAID5_STRIPE_SIZE(sh->raid_conf), &submit); return tx; } @@ -1752,7 +1766,7 @@ again: WARN_ON(dev->page != dev->orig_page); while (wbi && wbi->bi_iter.bi_sector < - dev->sector + STRIPE_SECTORS) { + dev->sector + RAID5_STRIPE_SECTORS(conf)) { if (wbi->bi_opf & REQ_FUA) set_bit(R5_WantFUA, &dev->flags); if (wbi->bi_opf & REQ_SYNC) @@ -1770,7 +1784,7 @@ again: clear_bit(R5_OVERWRITE, &dev->flags); } } - wbi = r5_next_bio(wbi, dev->sector); + wbi = r5_next_bio(conf, wbi, dev->sector); } if (head_sh->batch_head) { @@ -1910,9 +1924,11 @@ again: } if (unlikely(count == 1)) - tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit); + tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, + RAID5_STRIPE_SIZE(sh->raid_conf), &submit); else - tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit); + tx = async_xor(xor_dest, xor_srcs, 0, count, + RAID5_STRIPE_SIZE(sh->raid_conf), &submit); if (!last_stripe) { j++; sh = list_first_entry(&sh->batch_list, struct stripe_head, @@ -1972,7 +1988,8 @@ again: } else init_async_submit(&submit, 0, tx, NULL, NULL, to_addr_conv(sh, percpu, j)); - tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit); + tx = async_gen_syndrome(blocks, 0, count+2, + RAID5_STRIPE_SIZE(sh->raid_conf), &submit); if (!last_stripe) { j++; sh = list_first_entry(&sh->batch_list, struct stripe_head, @@ -2020,7 +2037,8 @@ static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu) init_async_submit(&submit, 0, NULL, NULL, NULL, to_addr_conv(sh, percpu, 0)); - tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, + tx = async_xor_val(xor_dest, xor_srcs, 0, count, + RAID5_STRIPE_SIZE(sh->raid_conf), &sh->ops.zero_sum_result, &submit); atomic_inc(&sh->count); @@ -2045,7 +2063,8 @@ static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu atomic_inc(&sh->count); init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check, sh, to_addr_conv(sh, percpu, 0)); - async_syndrome_val(srcs, 0, count+2, STRIPE_SIZE, + async_syndrome_val(srcs, 0, count+2, + RAID5_STRIPE_SIZE(sh->raid_conf), &sh->ops.zero_sum_result, percpu->spare_page, &submit); } @@ -2217,9 +2236,9 @@ static int grow_stripes(struct r5conf *conf, int num) /** * scribble_alloc - allocate percpu scribble buffer for required size * of the scribble region - * @percpu - from for_each_present_cpu() of the caller - * @num - total number of disks in the array - * @cnt - scribble objs count for required size of the scribble region + * @percpu: from for_each_present_cpu() of the caller + * @num: total number of disks in the array + * @cnt: scribble objs count for required size of the scribble region * * The scribble buffer size must be enough to contain: * 1/ a struct page pointer for each device in the array +2 @@ -2275,7 +2294,7 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors) percpu = per_cpu_ptr(conf->percpu, cpu); err = scribble_alloc(percpu, new_disks, - new_sectors / STRIPE_SECTORS); + new_sectors / RAID5_STRIPE_SECTORS(conf)); if (err) break; } @@ -2509,10 +2528,10 @@ static void raid5_end_read_request(struct bio * bi) */ pr_info_ratelimited( "md/raid:%s: read error corrected (%lu sectors at %llu on %s)\n", - mdname(conf->mddev), STRIPE_SECTORS, + mdname(conf->mddev), RAID5_STRIPE_SECTORS(conf), (unsigned long long)s, bdevname(rdev->bdev, b)); - atomic_add(STRIPE_SECTORS, &rdev->corrected_errors); + atomic_add(RAID5_STRIPE_SECTORS(conf), &rdev->corrected_errors); clear_bit(R5_ReadError, &sh->dev[i].flags); clear_bit(R5_ReWrite, &sh->dev[i].flags); } else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) @@ -2585,7 +2604,7 @@ static void raid5_end_read_request(struct bio * bi) if (!(set_bad && test_bit(In_sync, &rdev->flags) && rdev_set_badblocks( - rdev, sh->sector, STRIPE_SECTORS, 0))) + rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), 0))) md_error(conf->mddev, rdev); } } @@ -2637,7 +2656,7 @@ static void raid5_end_write_request(struct bio *bi) if (bi->bi_status) md_error(conf->mddev, rdev); else if (is_badblock(rdev, sh->sector, - STRIPE_SECTORS, + RAID5_STRIPE_SECTORS(conf), &first_bad, &bad_sectors)) set_bit(R5_MadeGoodRepl, &sh->dev[i].flags); } else { @@ -2649,7 +2668,7 @@ static void raid5_end_write_request(struct bio *bi) set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); } else if (is_badblock(rdev, sh->sector, - STRIPE_SECTORS, + RAID5_STRIPE_SECTORS(conf), &first_bad, &bad_sectors)) { set_bit(R5_MadeGood, &sh->dev[i].flags); if (test_bit(R5_ReadError, &sh->dev[i].flags)) @@ -3283,13 +3302,13 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, /* check if page is covered */ sector_t sector = sh->dev[dd_idx].sector; for (bi=sh->dev[dd_idx].towrite; - sector < sh->dev[dd_idx].sector + STRIPE_SECTORS && + sector < sh->dev[dd_idx].sector + RAID5_STRIPE_SECTORS(conf) && bi && bi->bi_iter.bi_sector <= sector; - bi = r5_next_bio(bi, sh->dev[dd_idx].sector)) { + bi = r5_next_bio(conf, bi, sh->dev[dd_idx].sector)) { if (bio_end_sector(bi) >= sector) sector = bio_end_sector(bi); } - if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS) + if (sector >= sh->dev[dd_idx].sector + RAID5_STRIPE_SECTORS(conf)) if (!test_and_set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags)) sh->overwrite_disks++; } @@ -3314,7 +3333,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, set_bit(STRIPE_BITMAP_PENDING, &sh->state); spin_unlock_irq(&sh->stripe_lock); md_bitmap_startwrite(conf->mddev->bitmap, sh->sector, - STRIPE_SECTORS, 0); + RAID5_STRIPE_SECTORS(conf), 0); spin_lock_irq(&sh->stripe_lock); clear_bit(STRIPE_BITMAP_PENDING, &sh->state); if (!sh->batch_head) { @@ -3376,7 +3395,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, if (!rdev_set_badblocks( rdev, sh->sector, - STRIPE_SECTORS, 0)) + RAID5_STRIPE_SECTORS(conf), 0)) md_error(conf->mddev, rdev); rdev_dec_pending(rdev, conf->mddev); } @@ -3396,8 +3415,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, wake_up(&conf->wait_for_overlap); while (bi && bi->bi_iter.bi_sector < - sh->dev[i].sector + STRIPE_SECTORS) { - struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); + sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) { + struct bio *nextbi = r5_next_bio(conf, bi, sh->dev[i].sector); md_write_end(conf->mddev); bio_io_error(bi); @@ -3405,7 +3424,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, } if (bitmap_end) md_bitmap_endwrite(conf->mddev->bitmap, sh->sector, - STRIPE_SECTORS, 0, 0); + RAID5_STRIPE_SECTORS(conf), 0, 0); bitmap_end = 0; /* and fail all 'written' */ bi = sh->dev[i].written; @@ -3417,8 +3436,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, if (bi) bitmap_end = 1; while (bi && bi->bi_iter.bi_sector < - sh->dev[i].sector + STRIPE_SECTORS) { - struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); + sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) { + struct bio *bi2 = r5_next_bio(conf, bi, sh->dev[i].sector); md_write_end(conf->mddev); bio_io_error(bi); @@ -3441,9 +3460,9 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, if (bi) s->to_read--; while (bi && bi->bi_iter.bi_sector < - sh->dev[i].sector + STRIPE_SECTORS) { + sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) { struct bio *nextbi = - r5_next_bio(bi, sh->dev[i].sector); + r5_next_bio(conf, bi, sh->dev[i].sector); bio_io_error(bi); bi = nextbi; @@ -3451,7 +3470,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, } if (bitmap_end) md_bitmap_endwrite(conf->mddev->bitmap, sh->sector, - STRIPE_SECTORS, 0, 0); + RAID5_STRIPE_SECTORS(conf), 0, 0); /* If we were in the middle of a write the parity block might * still be locked - so just clear all R5_LOCKED flags */ @@ -3496,14 +3515,14 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh, && !test_bit(Faulty, &rdev->flags) && !test_bit(In_sync, &rdev->flags) && !rdev_set_badblocks(rdev, sh->sector, - STRIPE_SECTORS, 0)) + RAID5_STRIPE_SECTORS(conf), 0)) abort = 1; rdev = rcu_dereference(conf->disks[i].replacement); if (rdev && !test_bit(Faulty, &rdev->flags) && !test_bit(In_sync, &rdev->flags) && !rdev_set_badblocks(rdev, sh->sector, - STRIPE_SECTORS, 0)) + RAID5_STRIPE_SECTORS(conf), 0)) abort = 1; } rcu_read_unlock(); @@ -3511,7 +3530,7 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh, conf->recovery_disabled = conf->mddev->recovery_disabled; } - md_done_sync(conf->mddev, STRIPE_SECTORS, !abort); + md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf), !abort); } static int want_replace(struct stripe_head *sh, int disk_idx) @@ -3710,7 +3729,7 @@ static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s, return 0; } -/** +/* * handle_stripe_fill - read or compute data to satisfy pending requests. */ static void handle_stripe_fill(struct stripe_head *sh, @@ -3785,14 +3804,14 @@ returnbi: wbi = dev->written; dev->written = NULL; while (wbi && wbi->bi_iter.bi_sector < - dev->sector + STRIPE_SECTORS) { - wbi2 = r5_next_bio(wbi, dev->sector); + dev->sector + RAID5_STRIPE_SECTORS(conf)) { + wbi2 = r5_next_bio(conf, wbi, dev->sector); md_write_end(conf->mddev); bio_endio(wbi); wbi = wbi2; } md_bitmap_endwrite(conf->mddev->bitmap, sh->sector, - STRIPE_SECTORS, + RAID5_STRIPE_SECTORS(conf), !test_bit(STRIPE_DEGRADED, &sh->state), 0); if (head_sh->batch_head) { @@ -4099,7 +4118,7 @@ static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh, */ set_bit(STRIPE_INSYNC, &sh->state); else { - atomic64_add(STRIPE_SECTORS, &conf->mddev->resync_mismatches); + atomic64_add(RAID5_STRIPE_SECTORS(conf), &conf->mddev->resync_mismatches); if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) { /* don't try to repair!! */ set_bit(STRIPE_INSYNC, &sh->state); @@ -4107,7 +4126,7 @@ static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh, "%llu-%llu\n", mdname(conf->mddev), (unsigned long long) sh->sector, (unsigned long long) sh->sector + - STRIPE_SECTORS); + RAID5_STRIPE_SECTORS(conf)); } else { sh->check_state = check_state_compute_run; set_bit(STRIPE_COMPUTE_RUN, &sh->state); @@ -4264,7 +4283,7 @@ static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh, */ } } else { - atomic64_add(STRIPE_SECTORS, &conf->mddev->resync_mismatches); + atomic64_add(RAID5_STRIPE_SECTORS(conf), &conf->mddev->resync_mismatches); if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) { /* don't try to repair!! */ set_bit(STRIPE_INSYNC, &sh->state); @@ -4272,7 +4291,7 @@ static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh, "%llu-%llu\n", mdname(conf->mddev), (unsigned long long) sh->sector, (unsigned long long) sh->sector + - STRIPE_SECTORS); + RAID5_STRIPE_SECTORS(conf)); } else { int *target = &sh->ops.target; @@ -4343,7 +4362,7 @@ static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh) /* place all the copies on one channel */ init_async_submit(&submit, 0, tx, NULL, NULL, NULL); tx = async_memcpy(sh2->dev[dd_idx].page, - sh->dev[i].page, 0, 0, STRIPE_SIZE, + sh->dev[i].page, 0, 0, RAID5_STRIPE_SIZE(conf), &submit); set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); @@ -4442,8 +4461,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) */ rdev = rcu_dereference(conf->disks[i].replacement); if (rdev && !test_bit(Faulty, &rdev->flags) && - rdev->recovery_offset >= sh->sector + STRIPE_SECTORS && - !is_badblock(rdev, sh->sector, STRIPE_SECTORS, + rdev->recovery_offset >= sh->sector + RAID5_STRIPE_SECTORS(conf) && + !is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), &first_bad, &bad_sectors)) set_bit(R5_ReadRepl, &dev->flags); else { @@ -4457,7 +4476,7 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) if (rdev && test_bit(Faulty, &rdev->flags)) rdev = NULL; if (rdev) { - is_bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS, + is_bad = is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), &first_bad, &bad_sectors); if (s->blocked_rdev == NULL && (test_bit(Blocked, &rdev->flags) @@ -4484,7 +4503,7 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) } } else if (test_bit(In_sync, &rdev->flags)) set_bit(R5_Insync, &dev->flags); - else if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset) + else if (sh->sector + RAID5_STRIPE_SECTORS(conf) <= rdev->recovery_offset) /* in sync if before recovery_offset */ set_bit(R5_Insync, &dev->flags); else if (test_bit(R5_UPTODATE, &dev->flags) && @@ -4573,12 +4592,12 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) rcu_read_unlock(); } +/* + * Return '1' if this is a member of batch, or '0' if it is a lone stripe or + * a head which can now be handled. + */ static int clear_batch_ready(struct stripe_head *sh) { - /* Return '1' if this is a member of batch, or - * '0' if it is a lone stripe or a head which can now be - * handled. - */ struct stripe_head *tmp; if (!test_and_clear_bit(STRIPE_BATCH_READY, &sh->state)) return (sh->batch_head && sh->batch_head != sh); @@ -4682,6 +4701,16 @@ static void handle_stripe(struct stripe_head *sh) struct r5dev *pdev, *qdev; clear_bit(STRIPE_HANDLE, &sh->state); + + /* + * handle_stripe should not continue handle the batched stripe, only + * the head of batch list or lone stripe can continue. Otherwise we + * could see break_stripe_batch_list warns about the STRIPE_ACTIVE + * is set for the batched stripe. + */ + if (clear_batch_ready(sh)) + return; + if (test_and_set_bit_lock(STRIPE_ACTIVE, &sh->state)) { /* already being handled, ensure it gets handled * again when current action finishes */ @@ -4689,11 +4718,6 @@ static void handle_stripe(struct stripe_head *sh) return; } - if (clear_batch_ready(sh) ) { - clear_bit_unlock(STRIPE_ACTIVE, &sh->state); - return; - } - if (test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state)) break_stripe_batch_list(sh, 0); @@ -4927,7 +4951,7 @@ static void handle_stripe(struct stripe_head *sh) if ((s.syncing || s.replacing) && s.locked == 0 && !test_bit(STRIPE_COMPUTE_RUN, &sh->state) && test_bit(STRIPE_INSYNC, &sh->state)) { - md_done_sync(conf->mddev, STRIPE_SECTORS, 1); + md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf), 1); clear_bit(STRIPE_SYNCING, &sh->state); if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags)) wake_up(&conf->wait_for_overlap); @@ -4995,7 +5019,7 @@ static void handle_stripe(struct stripe_head *sh) clear_bit(STRIPE_EXPAND_READY, &sh->state); atomic_dec(&conf->reshape_stripes); wake_up(&conf->wait_for_overlap); - md_done_sync(conf->mddev, STRIPE_SECTORS, 1); + md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf), 1); } if (s.expanding && s.locked == 0 && @@ -5025,14 +5049,14 @@ finish: /* We own a safe reference to the rdev */ rdev = conf->disks[i].rdev; if (!rdev_set_badblocks(rdev, sh->sector, - STRIPE_SECTORS, 0)) + RAID5_STRIPE_SECTORS(conf), 0)) md_error(conf->mddev, rdev); rdev_dec_pending(rdev, conf->mddev); } if (test_and_clear_bit(R5_MadeGood, &dev->flags)) { rdev = conf->disks[i].rdev; rdev_clear_badblocks(rdev, sh->sector, - STRIPE_SECTORS, 0); + RAID5_STRIPE_SECTORS(conf), 0); rdev_dec_pending(rdev, conf->mddev); } if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) { @@ -5041,7 +5065,7 @@ finish: /* rdev have been moved down */ rdev = conf->disks[i].rdev; rdev_clear_badblocks(rdev, sh->sector, - STRIPE_SECTORS, 0); + RAID5_STRIPE_SECTORS(conf), 0); rdev_dec_pending(rdev, conf->mddev); } } @@ -5505,7 +5529,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) /* Skip discard while reshape is happening */ return; - logical_sector = bi->bi_iter.bi_sector & ~((sector_t)STRIPE_SECTORS-1); + logical_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1); last_sector = bio_end_sector(bi); bi->bi_next = NULL; @@ -5520,7 +5544,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) last_sector *= conf->chunk_sectors; for (; logical_sector < last_sector; - logical_sector += STRIPE_SECTORS) { + logical_sector += RAID5_STRIPE_SECTORS(conf)) { DEFINE_WAIT(w); int d; again: @@ -5565,7 +5589,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) d++) md_bitmap_startwrite(mddev->bitmap, sh->sector, - STRIPE_SECTORS, + RAID5_STRIPE_SECTORS(conf), 0); sh->bm_seq = conf->seq_flush + 1; set_bit(STRIPE_BIT_DELAY, &sh->state); @@ -5630,12 +5654,12 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi) return true; } - logical_sector = bi->bi_iter.bi_sector & ~((sector_t)STRIPE_SECTORS-1); + logical_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1); last_sector = bio_end_sector(bi); bi->bi_next = NULL; prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); - for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { + for (; logical_sector < last_sector; logical_sector += RAID5_STRIPE_SECTORS(conf)) { int previous; int seq; @@ -5733,8 +5757,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi) do_flush = false; } - if (!sh->batch_head || sh == sh->batch_head) - set_bit(STRIPE_HANDLE, &sh->state); + set_bit(STRIPE_HANDLE, &sh->state); clear_bit(STRIPE_DELAYED, &sh->state); if ((!sh->batch_head || sh == sh->batch_head) && (bi->bi_opf & REQ_SYNC) && @@ -5917,7 +5940,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk } INIT_LIST_HEAD(&stripes); - for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) { + for (i = 0; i < reshape_sectors; i += RAID5_STRIPE_SECTORS(conf)) { int j; int skipped_disk = 0; sh = raid5_get_active_stripe(conf, stripe_addr+i, 0, 0, 1); @@ -5938,7 +5961,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk skipped_disk = 1; continue; } - memset(page_address(sh->dev[j].page), 0, STRIPE_SIZE); + memset(page_address(sh->dev[j].page), 0, RAID5_STRIPE_SIZE(conf)); set_bit(R5_Expanded, &sh->dev[j].flags); set_bit(R5_UPTODATE, &sh->dev[j].flags); } @@ -5973,7 +5996,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk set_bit(STRIPE_EXPAND_SOURCE, &sh->state); set_bit(STRIPE_HANDLE, &sh->state); raid5_release_stripe(sh); - first_sector += STRIPE_SECTORS; + first_sector += RAID5_STRIPE_SECTORS(conf); } /* Now that the sources are clearly marked, we can release * the destination stripes @@ -6079,11 +6102,12 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && !conf->fullsync && !md_bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && - sync_blocks >= STRIPE_SECTORS) { + sync_blocks >= RAID5_STRIPE_SECTORS(conf)) { /* we can skip this block, and probably more */ - sync_blocks /= STRIPE_SECTORS; + sync_blocks /= RAID5_STRIPE_SECTORS(conf); *skipped = 1; - return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */ + /* keep things rounded to whole stripes */ + return sync_blocks * RAID5_STRIPE_SECTORS(conf); } md_bitmap_cond_end_sync(mddev->bitmap, sector_nr, false); @@ -6116,7 +6140,7 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n raid5_release_stripe(sh); - return STRIPE_SECTORS; + return RAID5_STRIPE_SECTORS(conf); } static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio, @@ -6139,14 +6163,14 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio, int handled = 0; logical_sector = raid_bio->bi_iter.bi_sector & - ~((sector_t)STRIPE_SECTORS-1); + ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1); sector = raid5_compute_sector(conf, logical_sector, 0, &dd_idx, NULL); last_sector = bio_end_sector(raid_bio); for (; logical_sector < last_sector; - logical_sector += STRIPE_SECTORS, - sector += STRIPE_SECTORS, + logical_sector += RAID5_STRIPE_SECTORS(conf), + sector += RAID5_STRIPE_SECTORS(conf), scnt++) { if (scnt < offset) @@ -6479,6 +6503,77 @@ raid5_rmw_level = __ATTR(rmw_level, S_IRUGO | S_IWUSR, raid5_show_rmw_level, raid5_store_rmw_level); +static ssize_t +raid5_show_stripe_size(struct mddev *mddev, char *page) +{ + struct r5conf *conf; + int ret = 0; + + spin_lock(&mddev->lock); + conf = mddev->private; + if (conf) + ret = sprintf(page, "%lu\n", RAID5_STRIPE_SIZE(conf)); + spin_unlock(&mddev->lock); + return ret; +} + +#if PAGE_SIZE != DEFAULT_STRIPE_SIZE +static ssize_t +raid5_store_stripe_size(struct mddev *mddev, const char *page, size_t len) +{ + struct r5conf *conf; + unsigned long new; + int err; + + if (len >= PAGE_SIZE) + return -EINVAL; + if (kstrtoul(page, 10, &new)) + return -EINVAL; + + /* + * The value should not be bigger than PAGE_SIZE. It requires to + * be multiple of DEFAULT_STRIPE_SIZE. + */ + if (new % DEFAULT_STRIPE_SIZE != 0 || new > PAGE_SIZE || new == 0) + return -EINVAL; + + err = mddev_lock(mddev); + if (err) + return err; + + conf = mddev->private; + if (!conf) { + err = -ENODEV; + goto out_unlock; + } + + if (new == conf->stripe_size) + goto out_unlock; + + pr_debug("md/raid: change stripe_size from %lu to %lu\n", + conf->stripe_size, new); + + mddev_suspend(mddev); + conf->stripe_size = new; + conf->stripe_shift = ilog2(new) - 9; + conf->stripe_sectors = new >> 9; + mddev_resume(mddev); + +out_unlock: + mddev_unlock(mddev); + return err ?: len; +} + +static struct md_sysfs_entry +raid5_stripe_size = __ATTR(stripe_size, 0644, + raid5_show_stripe_size, + raid5_store_stripe_size); +#else +static struct md_sysfs_entry +raid5_stripe_size = __ATTR(stripe_size, 0444, + raid5_show_stripe_size, + NULL); +#endif static ssize_t raid5_show_preread_threshold(struct mddev *mddev, char *page) @@ -6667,6 +6762,7 @@ static struct attribute *raid5_attrs[] = { &raid5_group_thread_cnt.attr, &raid5_skip_copy.attr, &raid5_rmw_level.attr, + &raid5_stripe_size.attr, &r5c_journal_mode.attr, &ppl_write_hint.attr, NULL, @@ -6766,7 +6862,7 @@ static int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu conf->previous_raid_disks), max(conf->chunk_sectors, conf->prev_chunk_sectors) - / STRIPE_SECTORS)) { + / RAID5_STRIPE_SECTORS(conf))) { free_scratch_buffer(conf, percpu); return -ENOMEM; } @@ -6918,6 +7014,12 @@ static struct r5conf *setup_conf(struct mddev *mddev) conf = kzalloc(sizeof(struct r5conf), GFP_KERNEL); if (conf == NULL) goto abort; + +#if PAGE_SIZE != DEFAULT_STRIPE_SIZE + conf->stripe_size = DEFAULT_STRIPE_SIZE; + conf->stripe_shift = ilog2(DEFAULT_STRIPE_SIZE) - 9; + conf->stripe_sectors = DEFAULT_STRIPE_SIZE >> 9; +#endif INIT_LIST_HEAD(&conf->free_list); INIT_LIST_HEAD(&conf->pending_list); conf->pending_data = kcalloc(PENDING_IO_MAX, @@ -7069,8 +7171,8 @@ static struct r5conf *setup_conf(struct mddev *mddev) conf->min_nr_stripes = NR_STRIPES; if (mddev->reshape_position != MaxSector) { int stripes = max_t(int, - ((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4, - ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4); + ((mddev->chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4, + ((mddev->new_chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4); conf->min_nr_stripes = max(NR_STRIPES, stripes); if (conf->min_nr_stripes != NR_STRIPES) pr_info("md/raid:%s: force stripe size %d for reshape\n", @@ -7801,14 +7903,14 @@ static int check_stripe_cache(struct mddev *mddev) * stripe_heads first. */ struct r5conf *conf = mddev->private; - if (((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4 + if (((mddev->chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4 > conf->min_nr_stripes || - ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4 + ((mddev->new_chunk_sectors << 9) / RAID5_STRIPE_SIZE(conf)) * 4 > conf->min_nr_stripes) { pr_warn("md/raid:%s: reshape: not enough stripes. Needed %lu\n", mdname(mddev), ((max(mddev->chunk_sectors, mddev->new_chunk_sectors) << 9) - / STRIPE_SIZE)*4); + / RAID5_STRIPE_SIZE(conf))*4); return 0; } return 1; @@ -7944,8 +8046,8 @@ static int raid5_start_reshape(struct mddev *mddev) else rdev->recovery_offset = 0; - if (sysfs_link_rdev(mddev, rdev)) - /* Failure here is OK */; + /* Failure here is OK */ + sysfs_link_rdev(mddev, rdev); } } else if (rdev->raid_disk >= conf->previous_raid_disks && !test_bit(Faulty, &rdev->flags)) { @@ -8140,7 +8242,7 @@ static void *raid5_takeover_raid1(struct mddev *mddev) while (chunksect && (mddev->array_sectors & (chunksect-1))) chunksect >>= 1; - if ((chunksect<<9) < STRIPE_SIZE) + if ((chunksect<<9) < RAID5_STRIPE_SIZE((struct r5conf *)mddev->private)) /* array size does not allow a suitable chunk size */ return ERR_PTR(-EINVAL); diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index f90e0704bed9..7fb3b26a181a 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -472,32 +472,20 @@ struct disk_info { */ #define NR_STRIPES 256 +#define DEFAULT_STRIPE_SIZE 4096 + +#if PAGE_SIZE == DEFAULT_STRIPE_SIZE #define STRIPE_SIZE PAGE_SIZE #define STRIPE_SHIFT (PAGE_SHIFT - 9) #define STRIPE_SECTORS (STRIPE_SIZE>>9) +#endif + #define IO_THRESHOLD 1 #define BYPASS_THRESHOLD 1 #define NR_HASH (PAGE_SIZE / sizeof(struct hlist_head)) #define HASH_MASK (NR_HASH - 1) #define MAX_STRIPE_BATCH 8 -/* bio's attached to a stripe+device for I/O are linked together in bi_sector - * order without overlap. There may be several bio's per stripe+device, and - * a bio could span several devices. - * When walking this list for a particular stripe+device, we must never proceed - * beyond a bio that extends past this device, as the next bio might no longer - * be valid. - * This function is used to determine the 'next' bio in the list, given the - * sector of the current stripe+device - */ -static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector) -{ - if (bio_end_sector(bio) < sector + STRIPE_SECTORS) - return bio->bi_next; - else - return NULL; -} - /* NOTE NR_STRIPE_HASH_LOCKS must remain below 64. * This is because we sometimes take all the spinlocks * and creating that much locking depth can cause @@ -574,6 +562,11 @@ struct r5conf { int raid_disks; int max_nr_stripes; int min_nr_stripes; +#if PAGE_SIZE != DEFAULT_STRIPE_SIZE + unsigned long stripe_size; + unsigned int stripe_shift; + unsigned long stripe_sectors; +#endif /* reshape_progress is the leading edge of a 'reshape' * It has value MaxSector when no reshape is happening @@ -690,6 +683,32 @@ struct r5conf { struct r5pending_data *next_pending_data; }; +#if PAGE_SIZE == DEFAULT_STRIPE_SIZE +#define RAID5_STRIPE_SIZE(conf) STRIPE_SIZE +#define RAID5_STRIPE_SHIFT(conf) STRIPE_SHIFT +#define RAID5_STRIPE_SECTORS(conf) STRIPE_SECTORS +#else +#define RAID5_STRIPE_SIZE(conf) ((conf)->stripe_size) +#define RAID5_STRIPE_SHIFT(conf) ((conf)->stripe_shift) +#define RAID5_STRIPE_SECTORS(conf) ((conf)->stripe_sectors) +#endif + +/* bio's attached to a stripe+device for I/O are linked together in bi_sector + * order without overlap. There may be several bio's per stripe+device, and + * a bio could span several devices. + * When walking this list for a particular stripe+device, we must never proceed + * beyond a bio that extends past this device, as the next bio might no longer + * be valid. + * This function is used to determine the 'next' bio in the list, given the + * sector of the current stripe+device + */ +static inline struct bio *r5_next_bio(struct r5conf *conf, struct bio *bio, sector_t sector) +{ + if (bio_end_sector(bio) < sector + RAID5_STRIPE_SECTORS(conf)) + return bio->bi_next; + else + return NULL; +} /* * Our supported algorithms diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index 1f2d8c81f0e0..e5a98a16f9b0 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -123,7 +123,7 @@ typedef struct mdp_device_descriptor_s { /* * Notes: - * - if an array is being reshaped (restriped) in order to change the + * - if an array is being reshaped (restriped) in order to change * the number of active devices in the array, 'raid_disks' will be * the larger of the old and new numbers. 'delta_disks' will * be the "new - old". So if +ve, raid_disks is the new value, and |