]> git.openfabrics.org - ~ardavis/dapl.git/commitdiff
cma: disconnect can block for excessive times waiting for rdma_cm DREP timeout
authorArlin Davis <arlin.r.davis@intel.com>
Fri, 3 Dec 2010 18:25:46 +0000 (10:25 -0800)
committerArlin Davis <arlin.r.davis@intel.com>
Fri, 3 Dec 2010 18:25:46 +0000 (10:25 -0800)
rdma_cm uses the same timeout values for connect and disconnect
request/reply. Disconnect abrupt option allows DAT consumers to
specify a prompt disconnect with immediate event. If the remote
node goes down or is non-responsive a CM disconnect event could
take minutes. Add a time limit waiting for event and move EP to
disconnected state to prevent callback from issuing duplicate
disconnect event via callback. The EP to CM linking will
cleanup/cancel any pending events before destroying cm_id.

Signed-off-by: Arlin Davis <arlin.r.davis@intel.com>
dapl/openib_cma/cm.c

index 1eb7aedcf55bda55249baa2db3d99228cde8a130..ff4899905d079c1461245cbc5bc48179cb671b8a 100644 (file)
@@ -623,6 +623,7 @@ DAT_RETURN
 dapls_ib_disconnect(IN DAPL_EP * ep_ptr, IN DAT_CLOSE_FLAGS close_flags)
 {
        struct dapl_cm_id *conn = dapl_get_cm_from_ep(ep_ptr);
+       int drep_time = 25;
 
        dapl_dbg_log(DAPL_DBG_TYPE_CM,
                     " disconnect(ep %p, conn %p, id %d flags %x)\n",
@@ -636,13 +637,29 @@ dapls_ib_disconnect(IN DAPL_EP * ep_ptr, IN DAT_CLOSE_FLAGS close_flags)
 
        /* ABRUPT close, wait for callback and DISCONNECTED state */
        if (close_flags == DAT_CLOSE_ABRUPT_FLAG) {
+               DAPL_EVD *evd = NULL;
+               DAT_EVENT_NUMBER num = DAT_CONNECTION_EVENT_DISCONNECTED;
+
                dapl_os_lock(&ep_ptr->header.lock);
-               while (ep_ptr->param.ep_state != DAT_EP_STATE_DISCONNECTED) {
+               /* limit DREP waiting, other side could be down */
+               while (--drep_time && ep_ptr->param.ep_state != DAT_EP_STATE_DISCONNECTED) {
                        dapl_os_unlock(&ep_ptr->header.lock);
                        dapl_os_sleep_usec(10000);
                        dapl_os_lock(&ep_ptr->header.lock);
                }
+               if (ep_ptr->param.ep_state != DAT_EP_STATE_DISCONNECTED) {
+                       dapl_log(DAPL_DBG_TYPE_WARN,
+                                " WARNING: disconnect(ep %p, conn %p, id %d) timed out\n",
+                                ep_ptr, conn, (conn ? conn->cm_id : 0));
+                       ep_ptr->param.ep_state = DAT_EP_STATE_DISCONNECTED;
+                       evd = (DAPL_EVD *)ep_ptr->param.connect_evd_handle;
+               }
                dapl_os_unlock(&ep_ptr->header.lock);
+
+               if (evd) {
+                       dapl_sp_remove_ep(ep_ptr);
+                       dapls_evd_post_connection_event(evd, num, ep_ptr, 0, 0);
+               }
        }
 
        /*