drbd: don't send out P_BARRIER with stale information

We must only send P_BARRIER for epochs we actually sent P_DATA in. If we (re-)establish a connection, we reinitialized the send.current_epoch_nr, but forgot to reset send.current_epoch_writes. This could result in a spurious P_BARRIER with stale epoch information, and a disconnect/reconnect cycle once the then "unexpected" P_BARRIER_ACK is received: BAD! BarrierAck #28823 received, expected #28829! Introduce re_init_if_first_write() and maybe_send_barrier() helpers, and call them appropriately for read/write/set-out-of-sync requests. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
author: Lars Ellenberg <lars.ellenberg@linbit.com> 2012-08-20 11:05:23 +0200
committer: Philipp Reisner <philipp.reisner@linbit.com> 2012-11-09 14:08:19 +0100
commit: 4eb9b3cba00471a01699cceb0f4b1f0cb8111ee2 (patch)
tree: 55ee26423ac7d71efc76d8436622563ea272a549 /drivers/block
parent: 08332d73250eec349b055843a503d45a9b5c13b6 (diff)
1 files changed, 25 insertions, 24 deletions
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 1c9c6fd332c3..c674f17773a6 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -1265,6 +1265,27 @@ int w_send_write_hint(struct drbd_work *w, int cancel)
 	return drbd_send_command(mdev, sock, P_UNPLUG_REMOTE, 0, NULL, 0);
 }
 
+static void re_init_if_first_write(struct drbd_tconn *tconn, unsigned int epoch)
+{
+	if (!tconn->send.seen_any_write_yet) {
+		tconn->send.seen_any_write_yet = true;
+		tconn->send.current_epoch_nr = epoch;
+		tconn->send.current_epoch_writes = 0;
+	}
+}
+
+static void maybe_send_barrier(struct drbd_tconn *tconn, unsigned int epoch)
+{
+	/* re-init if first write on this connection */
+	if (!tconn->send.seen_any_write_yet)
+		return;
+	if (tconn->send.current_epoch_nr != epoch) {
+		if (tconn->send.current_epoch_writes)
+			drbd_send_barrier(tconn);
+		tconn->send.current_epoch_nr = epoch;
+	}
+}
+
 int w_send_out_of_sync(struct drbd_work *w, int cancel)
 {
 	struct drbd_request *req = container_of(w, struct drbd_request, w);
@@ -1277,19 +1298,11 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel)
 		return 0;
 	}
 
-	if (!tconn->send.seen_any_write_yet) {
-		tconn->send.seen_any_write_yet = true;
-		tconn->send.current_epoch_nr = req->epoch;
-	}
-	if (tconn->send.current_epoch_nr != req->epoch) {
-		if (tconn->send.current_epoch_writes)
-			drbd_send_barrier(tconn);
-		tconn->send.current_epoch_nr = req->epoch;
-	}
 	/* this time, no tconn->send.current_epoch_writes++;
 	 * If it was sent, it was the closing barrier for the last
 	 * replicated epoch, before we went into AHEAD mode.
 	 * No more barriers will be sent, until we leave AHEAD mode again. */
+	maybe_send_barrier(tconn, req->epoch);
 
 	err = drbd_send_out_of_sync(mdev, req);
 	req_mod(req, OOS_HANDED_TO_NETWORK);
@@ -1315,15 +1328,8 @@ int w_send_dblock(struct drbd_work *w, int cancel)
 		return 0;
 	}
 
-	if (!tconn->send.seen_any_write_yet) {
-		tconn->send.seen_any_write_yet = true;
-		tconn->send.current_epoch_nr = req->epoch;
-	}
-	if (tconn->send.current_epoch_nr != req->epoch) {
-		if (tconn->send.current_epoch_writes)
-			drbd_send_barrier(tconn);
-		tconn->send.current_epoch_nr = req->epoch;
-	}
+	re_init_if_first_write(tconn, req->epoch);
+	maybe_send_barrier(tconn, req->epoch);
 	tconn->send.current_epoch_writes++;
 
 	err = drbd_send_dblock(mdev, req);
@@ -1352,12 +1358,7 @@ int w_send_read_req(struct drbd_work *w, int cancel)
 
 	/* Even read requests may close a write epoch,
 	 * if there was any yet. */
-	if (tconn->send.seen_any_write_yet &&
-	    tconn->send.current_epoch_nr != req->epoch) {
-		if (tconn->send.current_epoch_writes)
-			drbd_send_barrier(tconn);
-		tconn->send.current_epoch_nr = req->epoch;
-	}
+	maybe_send_barrier(tconn, req->epoch);
 
 	err = drbd_send_drequest(mdev, P_DATA_REQUEST, req->i.sector, req->i.size,
 				 (unsigned long)req);
author	Lars Ellenberg <lars.ellenberg@linbit.com>	2012-08-20 11:05:23 +0200
committer	Philipp Reisner <philipp.reisner@linbit.com>	2012-11-09 14:08:19 +0100
commit	4eb9b3cba00471a01699cceb0f4b1f0cb8111ee2 (patch)
tree	55ee26423ac7d71efc76d8436622563ea272a549 /drivers/block
parent	08332d73250eec349b055843a503d45a9b5c13b6 (diff)