From 19d5f834d6aff7efb1c9353523865c5bce869470 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Sat, 10 Sep 2011 17:21:17 +1000
Subject: md/raid10: unify handling of write completion.

A write can complete at two different places:
1/ when the last member-device write completes, through
   raid10_end_write_request
2/ in make_request() when we remove the initial bias from ->remaining.

These two should do exactly the same thing and the comment says they
do, but they don't.

So factor the correct code out into a function and call it in both
places.  This makes the code much more similar to RAID1.

The difference is only significant if there is an error, and they
usually take a while, so it is unlikely that there will be an error
already when make_request is completing, so this is unlikely to cause
real problems.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 drivers/md/raid10.c | 38 ++++++++++++++++++--------------------
 1 file changed, 18 insertions(+), 20 deletions(-)

(limited to 'drivers/md/raid10.c')

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 8b29cd4f01c8..f6873fc8e5ee 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -337,6 +337,21 @@ static void close_write(r10bio_t *r10_bio)
 	md_write_end(r10_bio->mddev);
 }
 
+static void one_write_done(r10bio_t *r10_bio)
+{
+	if (atomic_dec_and_test(&r10_bio->remaining)) {
+		if (test_bit(R10BIO_WriteError, &r10_bio->state))
+			reschedule_retry(r10_bio);
+		else {
+			close_write(r10_bio);
+			if (test_bit(R10BIO_MadeGood, &r10_bio->state))
+				reschedule_retry(r10_bio);
+			else
+				raid_end_bio_io(r10_bio);
+		}
+	}
+}
+
 static void raid10_end_write_request(struct bio *bio, int error)
 {
 	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
@@ -387,17 +402,7 @@ static void raid10_end_write_request(struct bio *bio, int error)
 	 * Let's see if all mirrored write operations have finished
 	 * already.
 	 */
-	if (atomic_dec_and_test(&r10_bio->remaining)) {
-		if (test_bit(R10BIO_WriteError, &r10_bio->state))
-			reschedule_retry(r10_bio);
-		else {
-			close_write(r10_bio);
-			if (test_bit(R10BIO_MadeGood, &r10_bio->state))
-				reschedule_retry(r10_bio);
-			else
-				raid_end_bio_io(r10_bio);
-		}
-	}
+	one_write_done(r10_bio);
 	if (dec_rdev)
 		rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
 }
@@ -1127,15 +1132,8 @@ retry_write:
 		spin_unlock_irqrestore(&conf->device_lock, flags);
 	}
 
-	if (atomic_dec_and_test(&r10_bio->remaining)) {
-		/* This matches the end of raid10_end_write_request() */
-		bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
-				r10_bio->sectors,
-				!test_bit(R10BIO_Degraded, &r10_bio->state),
-				0);
-		md_write_end(mddev);
-		raid_end_bio_io(r10_bio);
-	}
+	/* Remove the bias on 'remaining' */
+	one_write_done(r10_bio);
 
 	/* In case raid10d snuck in to freeze_array */
 	wake_up(&conf->wait_barrier);
-- 
cgit v1.2.3


From 079fa166a2874985ae58b2e21e26e1cbc91127d4 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Sat, 10 Sep 2011 17:21:23 +1000
Subject: md/raid1,10: Remove use-after-free bug in make_request.

A single request to RAID1 or RAID10 might result in multiple
requests if there are known bad blocks that need to be avoided.

To detect if we need to submit another write request we test:
 	if (sectors_handled < (bio->bi_size >> 9)) {

However this is after we call **_write_done() so the 'bio' no longer
belongs to us - the writes could have completed and the bio freed.

So move the **_write_done call until after the test against
bio->bi_size.

This addresses https://bugzilla.kernel.org/show_bug.cgi?id=41862

Reported-by: Bruno Wolff III <bruno@wolff.to>
Tested-by: Bruno Wolff III <bruno@wolff.to>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 drivers/md/raid10.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'drivers/md/raid10.c')

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index f6873fc8e5ee..d7a8468ddeab 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1132,13 +1132,12 @@ retry_write:
 		spin_unlock_irqrestore(&conf->device_lock, flags);
 	}
 
-	/* Remove the bias on 'remaining' */
-	one_write_done(r10_bio);
-
-	/* In case raid10d snuck in to freeze_array */
-	wake_up(&conf->wait_barrier);
+	/* Don't remove the bias on 'remaining' (one_write_done) until
+	 * after checking if we need to go around again.
+	 */
 
 	if (sectors_handled < (bio->bi_size >> 9)) {
+		one_write_done(r10_bio);
 		/* We need another r10_bio.  It has already been counted
 		 * in bio->bi_phys_segments.
 		 */
@@ -1152,6 +1151,10 @@ retry_write:
 		r10_bio->state = 0;
 		goto retry_write;
 	}
+	one_write_done(r10_bio);
+
+	/* In case raid10d snuck in to freeze_array */
+	wake_up(&conf->wait_barrier);
 
 	if (do_sync || !mddev->bitmap || !plugged)
 		md_wakeup_thread(mddev->thread);
-- 
cgit v1.2.3