From: Philipp Reisner Date: Tue, 31 Jul 2012 09:22:58 +0000 (+0200) Subject: drbd: Finish requests that completed while IO was frozen X-Git-Tag: v3.6-rc4~18^2~1^2~1 X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=509fc019e534bdf5f3969d78c53184db4cf7ff48;p=~emulex%2Finfiniband.git drbd: Finish requests that completed while IO was frozen Requests of an acked epoch are stored on the barrier_acked_requests list. In case the private bio of such a request completes while IO on the drbd device is suspended [req_mod(completed_ok)] then the request stays there. When thawing IO because the fence_peer handler returned, then we use tl_clear() to apply the connection_lost_while_pending event to all requests on the transfer-log and the barrier_acked_requests list. Up to now the connection_lost_while_pending event was not applied on requests on the barrier_acked_requests list. Fixed that. I.e. now the connection_lost_while_pending and resend events are applied to requests on the barrier_acked_requests list. For that it is necessary that the resend event finishes (local only) READS correctly. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2e0e7fc1dbb..136db95b212 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -79,6 +79,7 @@ static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused); static void md_sync_timer_fn(unsigned long data); static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused); static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused); +static void _tl_clear(struct drbd_conf *mdev); MODULE_AUTHOR("Philipp Reisner , " "Lars Ellenberg "); @@ -432,19 +433,10 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) /* Actions operating on the disk state, also want to work on requests that got barrier acked. */ - switch (what) { - case fail_frozen_disk_io: - case restart_frozen_disk_io: - list_for_each_safe(le, tle, &mdev->barrier_acked_requests) { - req = list_entry(le, struct drbd_request, tl_requests); - _req_mod(req, what); - } - case connection_lost_while_pending: - case resend: - break; - default: - dev_err(DEV, "what = %d in _tl_restart()\n", what); + list_for_each_safe(le, tle, &mdev->barrier_acked_requests) { + req = list_entry(le, struct drbd_request, tl_requests); + _req_mod(req, what); } } @@ -458,12 +450,17 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) * receiver thread and the worker thread. */ void tl_clear(struct drbd_conf *mdev) +{ + spin_lock_irq(&mdev->req_lock); + _tl_clear(mdev); + spin_unlock_irq(&mdev->req_lock); +} + +static void _tl_clear(struct drbd_conf *mdev) { struct list_head *le, *tle; struct drbd_request *r; - spin_lock_irq(&mdev->req_lock); - _tl_restart(mdev, connection_lost_while_pending); /* we expect this list to be empty. */ @@ -482,7 +479,6 @@ void tl_clear(struct drbd_conf *mdev) memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *)); - spin_unlock_irq(&mdev->req_lock); } void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) @@ -1476,12 +1472,12 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, if (ns.susp_fen) { /* case1: The outdate peer handler is successful: */ if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) { - tl_clear(mdev); if (test_bit(NEW_CUR_UUID, &mdev->flags)) { drbd_uuid_new_current(mdev); clear_bit(NEW_CUR_UUID, &mdev->flags); } spin_lock_irq(&mdev->req_lock); + _tl_clear(mdev); _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL); spin_unlock_irq(&mdev->req_lock); } diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 0bb1e41f136..01b2ac641c7 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -695,6 +695,12 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; case resend: + /* Simply complete (local only) READs. */ + if (!(req->rq_state & RQ_WRITE) && !req->w.cb) { + _req_may_be_done(req, m); + break; + } + /* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK before the connection loss (B&C only); only P_BARRIER_ACK was missing. Trowing them out of the TL here by pretending we got a BARRIER_ACK