From 197296ffed71b7d5056d8618a07fec145b040303 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 26 Mar 2012 16:47:11 +0200 Subject: drbd: Delay/reject other state changes while establishing a connection Changes to the role and disk state should be delayed or rejected while we establish a connection. This is necessary, since the peer will base its resync decision on the UUIDs and the state we sent in the drbd_connect() function. The most prominent example for this race is becoming primary after sending state and UUIDs and before the state changes to C_WF_CONNECTION. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_main.c | 13 +++++++++++++ drivers/block/drbd/drbd_nl.c | 2 +- drivers/block/drbd/drbd_receiver.c | 10 +++++++++- 4 files changed, 24 insertions(+), 2 deletions(-) (limited to 'drivers/block/drbd') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 746f7933bf86..f215ad430bb8 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -850,6 +850,7 @@ enum { NEW_CUR_UUID, /* Create new current UUID when thawing IO */ AL_SUSPENDED, /* Activity logging is currently suspended. */ AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */ + STATE_SENT, /* Do not change state/UUIDs while this is set */ }; struct drbd_bitmap; /* opaque for drbd_conf */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 519f286a4299..deccff3af774 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -841,6 +841,13 @@ is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns, if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS) rv = SS_IN_TRANSIENT_STATE; + /* While establishing a connection only allow cstate to change. + Delay/refuse role changes, detach attach etc... */ + if (test_bit(STATE_SENT, &mdev->flags) && + !(os.conn == C_WF_REPORT_PARAMS || + (ns.conn == C_WF_REPORT_PARAMS && os.conn == C_WF_CONNECTION))) + rv = SS_IN_TRANSIENT_STATE; + if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED) rv = SS_NEED_CONNECTION; @@ -1668,6 +1675,12 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) drbd_send_state(mdev, ns); + /* Wake up role changes, that were delayed because of connection establishing */ + if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS) { + clear_bit(STATE_SENT, &mdev->flags); + wake_up(&mdev->state_wait); + } + /* This triggers bitmap writeout of potentially still unwritten pages * if the resync finished cleanly, or aborted because of peer disk * failure, or because of connection loss. diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 1bbbad302ae7..308beb8c0c1e 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -289,7 +289,7 @@ static int _try_outdate_peer_async(void *data) */ spin_lock_irq(&mdev->req_lock); ns = mdev->state; - if (ns.conn < C_WF_REPORT_PARAMS) { + if (ns.conn < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &mdev->flags)) { ns.pdsk = nps; _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index d601501c336a..9db93ff11c02 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -751,6 +751,7 @@ static int drbd_connect(struct drbd_conf *mdev) { struct socket *s, *sock, *msock; int try, h, ok; + enum drbd_state_rv rv; D_ASSERT(!mdev->data.socket); @@ -897,6 +898,7 @@ retry: if (drbd_send_protocol(mdev) == -1) return -1; + set_bit(STATE_SENT, &mdev->flags); drbd_send_sync_param(mdev, &mdev->sync_conf); drbd_send_sizes(mdev, 0, 0); drbd_send_uuids(mdev); @@ -904,7 +906,13 @@ retry: clear_bit(USE_DEGR_WFC_T, &mdev->flags); clear_bit(RESIZE_PENDING, &mdev->flags); - if (drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS) + spin_lock_irq(&mdev->req_lock); + rv = _drbd_set_state(_NS(mdev, conn, C_WF_REPORT_PARAMS), CS_VERBOSE, NULL); + if (mdev->state.conn != C_WF_REPORT_PARAMS) + clear_bit(STATE_SENT, &mdev->flags); + spin_unlock_irq(&mdev->req_lock); + + if (rv < SS_SUCCESS) return 0; drbd_thread_start(&mdev->asender); -- cgit v1.2.3-58-ga151