summaryrefslogtreecommitdiff
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c148
1 files changed, 87 insertions, 61 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 1ba97fdc6df1..553d54b87052 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -749,6 +749,7 @@ static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
static bool stripe_can_batch(struct stripe_head *sh)
{
return test_bit(STRIPE_BATCH_READY, &sh->state) &&
+ !test_bit(STRIPE_BITMAP_PENDING, &sh->state) &&
is_full_stripe_write(sh);
}
@@ -837,6 +838,15 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
< IO_THRESHOLD)
md_wakeup_thread(conf->mddev->thread);
+ if (test_and_clear_bit(STRIPE_BIT_DELAY, &sh->state)) {
+ int seq = sh->bm_seq;
+ if (test_bit(STRIPE_BIT_DELAY, &sh->batch_head->state) &&
+ sh->batch_head->bm_seq > seq)
+ seq = sh->batch_head->bm_seq;
+ set_bit(STRIPE_BIT_DELAY, &sh->batch_head->state);
+ sh->batch_head->bm_seq = seq;
+ }
+
atomic_inc(&sh->count);
unlock_out:
unlock_two_stripes(head, sh);
@@ -1822,7 +1832,7 @@ again:
} else
init_async_submit(&submit, 0, tx, NULL, NULL,
to_addr_conv(sh, percpu, j));
- async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
+ tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
if (!last_stripe) {
j++;
sh = list_first_entry(&sh->batch_list, struct stripe_head,
@@ -2987,14 +2997,32 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx,
pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
(unsigned long long)(*bip)->bi_iter.bi_sector,
(unsigned long long)sh->sector, dd_idx);
- spin_unlock_irq(&sh->stripe_lock);
if (conf->mddev->bitmap && firstwrite) {
+ /* Cannot hold spinlock over bitmap_startwrite,
+ * but must ensure this isn't added to a batch until
+ * we have added to the bitmap and set bm_seq.
+ * So set STRIPE_BITMAP_PENDING to prevent
+ * batching.
+ * If multiple add_stripe_bio() calls race here they
+ * much all set STRIPE_BITMAP_PENDING. So only the first one
+ * to complete "bitmap_startwrite" gets to set
+ * STRIPE_BIT_DELAY. This is important as once a stripe
+ * is added to a batch, STRIPE_BIT_DELAY cannot be changed
+ * any more.
+ */
+ set_bit(STRIPE_BITMAP_PENDING, &sh->state);
+ spin_unlock_irq(&sh->stripe_lock);
bitmap_startwrite(conf->mddev->bitmap, sh->sector,
STRIPE_SECTORS, 0);
- sh->bm_seq = conf->seq_flush+1;
- set_bit(STRIPE_BIT_DELAY, &sh->state);
+ spin_lock_irq(&sh->stripe_lock);
+ clear_bit(STRIPE_BITMAP_PENDING, &sh->state);
+ if (!sh->batch_head) {
+ sh->bm_seq = conf->seq_flush+1;
+ set_bit(STRIPE_BIT_DELAY, &sh->state);
+ }
}
+ spin_unlock_irq(&sh->stripe_lock);
if (stripe_can_batch(sh))
stripe_add_to_batch_list(conf, sh);
@@ -3392,6 +3420,8 @@ static void handle_stripe_fill(struct stripe_head *sh,
set_bit(STRIPE_HANDLE, &sh->state);
}
+static void break_stripe_batch_list(struct stripe_head *head_sh,
+ unsigned long handle_flags);
/* handle_stripe_clean_event
* any written block on an uptodate or failed drive can be returned.
* Note that if we 'wrote' to a failed drive, it will be UPTODATE, but
@@ -3405,7 +3435,6 @@ static void handle_stripe_clean_event(struct r5conf *conf,
int discard_pending = 0;
struct stripe_head *head_sh = sh;
bool do_endio = false;
- int wakeup_nr = 0;
for (i = disks; i--; )
if (sh->dev[i].written) {
@@ -3494,44 +3523,8 @@ unhash:
if (atomic_dec_and_test(&conf->pending_full_writes))
md_wakeup_thread(conf->mddev->thread);
- if (!head_sh->batch_head || !do_endio)
- return;
- for (i = 0; i < head_sh->disks; i++) {
- if (test_and_clear_bit(R5_Overlap, &head_sh->dev[i].flags))
- wakeup_nr++;
- }
- while (!list_empty(&head_sh->batch_list)) {
- int i;
- sh = list_first_entry(&head_sh->batch_list,
- struct stripe_head, batch_list);
- list_del_init(&sh->batch_list);
-
- set_mask_bits(&sh->state, ~STRIPE_EXPAND_SYNC_FLAG,
- head_sh->state & ~((1 << STRIPE_ACTIVE) |
- (1 << STRIPE_PREREAD_ACTIVE) |
- STRIPE_EXPAND_SYNC_FLAG));
- sh->check_state = head_sh->check_state;
- sh->reconstruct_state = head_sh->reconstruct_state;
- for (i = 0; i < sh->disks; i++) {
- if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
- wakeup_nr++;
- sh->dev[i].flags = head_sh->dev[i].flags;
- }
-
- spin_lock_irq(&sh->stripe_lock);
- sh->batch_head = NULL;
- spin_unlock_irq(&sh->stripe_lock);
- if (sh->state & STRIPE_EXPAND_SYNC_FLAG)
- set_bit(STRIPE_HANDLE, &sh->state);
- release_stripe(sh);
- }
-
- spin_lock_irq(&head_sh->stripe_lock);
- head_sh->batch_head = NULL;
- spin_unlock_irq(&head_sh->stripe_lock);
- wake_up_nr(&conf->wait_for_overlap, wakeup_nr);
- if (head_sh->state & STRIPE_EXPAND_SYNC_FLAG)
- set_bit(STRIPE_HANDLE, &head_sh->state);
+ if (head_sh->batch_head && do_endio)
+ break_stripe_batch_list(head_sh, STRIPE_EXPAND_SYNC_FLAGS);
}
static void handle_stripe_dirtying(struct r5conf *conf,
@@ -4172,9 +4165,13 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
static int clear_batch_ready(struct stripe_head *sh)
{
+ /* Return '1' if this is a member of batch, or
+ * '0' if it is a lone stripe or a head which can now be
+ * handled.
+ */
struct stripe_head *tmp;
if (!test_and_clear_bit(STRIPE_BATCH_READY, &sh->state))
- return 0;
+ return (sh->batch_head && sh->batch_head != sh);
spin_lock(&sh->stripe_lock);
if (!sh->batch_head) {
spin_unlock(&sh->stripe_lock);
@@ -4202,38 +4199,65 @@ static int clear_batch_ready(struct stripe_head *sh)
return 0;
}
-static void check_break_stripe_batch_list(struct stripe_head *sh)
+static void break_stripe_batch_list(struct stripe_head *head_sh,
+ unsigned long handle_flags)
{
- struct stripe_head *head_sh, *next;
+ struct stripe_head *sh, *next;
int i;
-
- if (!test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state))
- return;
-
- head_sh = sh;
+ int do_wakeup = 0;
list_for_each_entry_safe(sh, next, &head_sh->batch_list, batch_list) {
list_del_init(&sh->batch_list);
- set_mask_bits(&sh->state, ~STRIPE_EXPAND_SYNC_FLAG,
- head_sh->state & ~((1 << STRIPE_ACTIVE) |
- (1 << STRIPE_PREREAD_ACTIVE) |
- (1 << STRIPE_DEGRADED) |
- STRIPE_EXPAND_SYNC_FLAG));
+ WARN_ON_ONCE(sh->state & ((1 << STRIPE_ACTIVE) |
+ (1 << STRIPE_SYNCING) |
+ (1 << STRIPE_REPLACED) |
+ (1 << STRIPE_PREREAD_ACTIVE) |
+ (1 << STRIPE_DELAYED) |
+ (1 << STRIPE_BIT_DELAY) |
+ (1 << STRIPE_FULL_WRITE) |
+ (1 << STRIPE_BIOFILL_RUN) |
+ (1 << STRIPE_COMPUTE_RUN) |
+ (1 << STRIPE_OPS_REQ_PENDING) |
+ (1 << STRIPE_DISCARD) |
+ (1 << STRIPE_BATCH_READY) |
+ (1 << STRIPE_BATCH_ERR) |
+ (1 << STRIPE_BITMAP_PENDING)));
+ WARN_ON_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) |
+ (1 << STRIPE_REPLACED)));
+
+ set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS |
+ (1 << STRIPE_DEGRADED)),
+ head_sh->state & (1 << STRIPE_INSYNC));
+
sh->check_state = head_sh->check_state;
sh->reconstruct_state = head_sh->reconstruct_state;
- for (i = 0; i < sh->disks; i++)
+ for (i = 0; i < sh->disks; i++) {
+ if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
+ do_wakeup = 1;
sh->dev[i].flags = head_sh->dev[i].flags &
(~((1 << R5_WriteError) | (1 << R5_Overlap)));
-
+ }
spin_lock_irq(&sh->stripe_lock);
sh->batch_head = NULL;
spin_unlock_irq(&sh->stripe_lock);
-
- set_bit(STRIPE_HANDLE, &sh->state);
+ if (handle_flags == 0 ||
+ sh->state & handle_flags)
+ set_bit(STRIPE_HANDLE, &sh->state);
release_stripe(sh);
}
+ spin_lock_irq(&head_sh->stripe_lock);
+ head_sh->batch_head = NULL;
+ spin_unlock_irq(&head_sh->stripe_lock);
+ for (i = 0; i < head_sh->disks; i++)
+ if (test_and_clear_bit(R5_Overlap, &head_sh->dev[i].flags))
+ do_wakeup = 1;
+ if (head_sh->state & handle_flags)
+ set_bit(STRIPE_HANDLE, &head_sh->state);
+
+ if (do_wakeup)
+ wake_up(&head_sh->raid_conf->wait_for_overlap);
}
static void handle_stripe(struct stripe_head *sh)
@@ -4258,7 +4282,8 @@ static void handle_stripe(struct stripe_head *sh)
return;
}
- check_break_stripe_batch_list(sh);
+ if (test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state))
+ break_stripe_batch_list(sh, 0);
if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) {
spin_lock(&sh->stripe_lock);
@@ -4312,6 +4337,7 @@ static void handle_stripe(struct stripe_head *sh)
if (s.failed > conf->max_degraded) {
sh->check_state = 0;
sh->reconstruct_state = 0;
+ break_stripe_batch_list(sh, 0);
if (s.to_read+s.to_write+s.written)
handle_failed_stripe(conf, sh, &s, disks, &s.return_bi);
if (s.syncing + s.replacing)