From: Arlin Davis Date: Fri, 3 Dec 2010 18:25:46 +0000 (-0800) Subject: cma: disconnect can block for excessive times waiting for rdma_cm DREP timeout X-Git-Tag: dapl-2.0.31-1~12 X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=c269c9ab83a72a2b4ffa972697b83572410d9cea;p=~ardavis%2Fdapl.git cma: disconnect can block for excessive times waiting for rdma_cm DREP timeout rdma_cm uses the same timeout values for connect and disconnect request/reply. Disconnect abrupt option allows DAT consumers to specify a prompt disconnect with immediate event. If the remote node goes down or is non-responsive a CM disconnect event could take minutes. Add a time limit waiting for event and move EP to disconnected state to prevent callback from issuing duplicate disconnect event via callback. The EP to CM linking will cleanup/cancel any pending events before destroying cm_id. Signed-off-by: Arlin Davis --- diff --git a/dapl/openib_cma/cm.c b/dapl/openib_cma/cm.c index 1eb7aed..ff48999 100644 --- a/dapl/openib_cma/cm.c +++ b/dapl/openib_cma/cm.c @@ -623,6 +623,7 @@ DAT_RETURN dapls_ib_disconnect(IN DAPL_EP * ep_ptr, IN DAT_CLOSE_FLAGS close_flags) { struct dapl_cm_id *conn = dapl_get_cm_from_ep(ep_ptr); + int drep_time = 25; dapl_dbg_log(DAPL_DBG_TYPE_CM, " disconnect(ep %p, conn %p, id %d flags %x)\n", @@ -636,13 +637,29 @@ dapls_ib_disconnect(IN DAPL_EP * ep_ptr, IN DAT_CLOSE_FLAGS close_flags) /* ABRUPT close, wait for callback and DISCONNECTED state */ if (close_flags == DAT_CLOSE_ABRUPT_FLAG) { + DAPL_EVD *evd = NULL; + DAT_EVENT_NUMBER num = DAT_CONNECTION_EVENT_DISCONNECTED; + dapl_os_lock(&ep_ptr->header.lock); - while (ep_ptr->param.ep_state != DAT_EP_STATE_DISCONNECTED) { + /* limit DREP waiting, other side could be down */ + while (--drep_time && ep_ptr->param.ep_state != DAT_EP_STATE_DISCONNECTED) { dapl_os_unlock(&ep_ptr->header.lock); dapl_os_sleep_usec(10000); dapl_os_lock(&ep_ptr->header.lock); } + if (ep_ptr->param.ep_state != DAT_EP_STATE_DISCONNECTED) { + dapl_log(DAPL_DBG_TYPE_WARN, + " WARNING: disconnect(ep %p, conn %p, id %d) timed out\n", + ep_ptr, conn, (conn ? conn->cm_id : 0)); + ep_ptr->param.ep_state = DAT_EP_STATE_DISCONNECTED; + evd = (DAPL_EVD *)ep_ptr->param.connect_evd_handle; + } dapl_os_unlock(&ep_ptr->header.lock); + + if (evd) { + dapl_sp_remove_ep(ep_ptr); + dapls_evd_post_connection_event(evd, num, ep_ptr, 0, 0); + } } /*