]> git.openfabrics.org - ~ardavis/dapl.git/commitdiff
ibal: delay QP transition during disconnect phase
authorardavis <ardavis@cst-linux.(none)>
Fri, 24 Sep 2010 17:47:30 +0000 (10:47 -0700)
committerardavis <ardavis@cst-linux.(none)>
Fri, 24 Sep 2010 17:47:30 +0000 (10:47 -0700)
ibal provider calls ib_cm_drep in response to receiving
a dreq.  The result is that the user's QP is transitioned
through the error state, which fails any outstanding send
operations and flushes all receives.  The disconnect request
is then reported to the user.

Since a user can receive errors from the QP before they are
aware of a pending disconnect request, the application may
respond to the errors as, well, actual errors.  Fix this by
delaying the QP transition until the user responds to the
dreq.

This fixes an error with Intel MPI running over the ibal
dapl provider with a 'spawn' test.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
dapl/ibal/dapl_ibal_cm.c
dapl/ibal/dapl_ibal_util.h
dapl/include/dapl.h

index e3c12ffe7e498f3f213da39c907d89b73d1d44d4..37e158bc93dd655f4c9e3a8656092347ee0096b9 100644 (file)
@@ -46,6 +46,9 @@
 int g_dapl_loopback_connection = 0;
 extern dapl_ibal_root_t        dapl_ibal_root;
 
+static void dapli_ib_cm_drep_cb (
+       IN    ib_cm_drep_rec_t          *p_cm_drep_rec );
+
 /*
  * Prototypes
  */
@@ -192,6 +195,28 @@ dapli_ib_cm_lap_cb (
                   "--> DiCLcb: CM callback LAP (Load Alternate Path)\n");
 }
 
+static DAT_RETURN dapli_send_ib_cm_dreq(ib_cm_handle_t h_cm)
+{
+       ib_cm_dreq_t cm_dreq;
+
+       dapl_os_memzero(&cm_dreq, sizeof(ib_cm_dreq_t));
+
+       cm_dreq.flags = IB_FLAGS_SYNC;
+       cm_dreq.qp_type = IB_QPT_RELIABLE_CONN;
+       cm_dreq.h_qp = h_cm.h_qp;
+       cm_dreq.pfn_cm_drep_cb = dapli_ib_cm_drep_cb;
+
+       return dapl_ib_status_convert(ib_cm_dreq(&cm_dreq));
+}
+
+static DAT_RETURN dapli_send_ib_cm_drep(ib_cm_handle_t h_cm)
+{
+    ib_cm_drep_t cm_drep;
+
+    dapl_os_memzero(&cm_drep, sizeof(ib_cm_drep_t));
+       return dapl_ib_status_convert(ib_cm_drep(h_cm, &cm_drep));
+}
+
 /*
  * Connection Disconnect Request callback
  * We received a DREQ, return a DREP (disconnect reply).
@@ -201,9 +226,7 @@ static void
 dapli_ib_cm_dreq_cb (
         IN    ib_cm_dreq_rec_t          *p_cm_dreq_rec )
 {
-    ib_cm_drep_t        cm_drep;
     DAPL_EP             *ep_ptr;
-    int                        bail=10;
     dp_ib_cm_handle_t  cm_ptr;
     
     dapl_os_assert (p_cm_dreq_rec);
@@ -213,25 +236,25 @@ dapli_ib_cm_dreq_cb (
     {
         dapl_dbg_log (DAPL_DBG_TYPE_ERR, 
                       "--> %s: BAD_PTR EP %lx\n", __FUNCTION__, ep_ptr);
-        return;
+        goto send_drep;
     }
     if ( ep_ptr->header.magic != DAPL_MAGIC_EP  )
     {
         if ( ep_ptr->header.magic == DAPL_MAGIC_INVALID )
-            return;
+            goto send_drep;
 
         dapl_dbg_log (DAPL_DBG_TYPE_ERR,
                       "--> %s: EP %p BAD_EP_MAGIC %x != wanted %x\n",
                       __FUNCTION__, ep_ptr, ep_ptr->header.magic,
                       DAPL_MAGIC_EP );
-        return;
+        goto send_drep;
     }
     cm_ptr = dapl_get_cm_from_ep(ep_ptr);
     if (!cm_ptr)
     {
         dapl_dbg_log (DAPL_DBG_TYPE_ERR,
                       "--> %s: !CM_PTR on EP %p\n", __FUNCTION__, ep_ptr);
-        return;
+        goto send_drep;
     }
     dapl_os_assert(cm_ptr->ib_cm.h_qp == p_cm_dreq_rec->h_cm_dreq.h_qp);
 
@@ -242,45 +265,34 @@ dapli_ib_cm_dreq_cb (
                   (ep_ptr->sent_discreq == DAT_TRUE ? "TRUE":"FALSE"));
 
     dapl_os_lock (&ep_ptr->header.lock);
-    if ( ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECTED
-         /*|| ( ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECT_PENDING
-           && ep_ptr->sent_discreq == DAT_TRUE)*/ )
+    if ( ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECTED )
     {
-        dapl_os_unlock (&ep_ptr->header.lock);
         dapl_dbg_log (DAPL_DBG_TYPE_CM, 
                       "--> DiCDcb: EP %lx QP %lx already Disconnected\n",
                       ep_ptr, ep_ptr->qp_handle);
-        return;
+        goto unlock;
     }
 
     ep_ptr->param.ep_state = DAT_EP_STATE_DISCONNECT_PENDING;
-    ep_ptr->recv_discreq = DAT_TRUE;
-    dapl_os_unlock (&ep_ptr->header.lock);
-
-    dapl_os_memzero (&cm_drep, sizeof(ib_cm_drep_t));
 
-    /* Could fail if we received reply from other side, no need to retry */
-    /* Wait for any send ops in process holding reference */
-    while (dapls_cb_pending(&ep_ptr->req_buffer) && bail-- > 0 )
+    if (cm_ptr->state == IBAL_CM_CONNECT)
     {
-       dapl_dbg_log (DAPL_DBG_TYPE_CM, 
-                  "--> DiCDcb: WAIT for EP=%lx req_count(%d) != 0\n", 
-                  ep_ptr, dapls_cb_pending(&ep_ptr->req_buffer));
-       dapl_os_sleep_usec (100);
+           cm_ptr->state = IBAL_CM_DREQ;
+           cm_ptr->ib_cm = p_cm_dreq_rec->h_cm_dreq;
     }
+    else
+    {
+           dapli_send_ib_cm_drep(p_cm_dreq_rec->h_cm_dreq);
+    }
+    dapl_os_unlock (&ep_ptr->header.lock);
 
-    ib_cm_drep (p_cm_dreq_rec->h_cm_dreq, &cm_drep);
-
-    /* CM puts QP in reset state */
-    ep_ptr->qp_state = IB_QPS_RESET;
-          
     if (ep_ptr->cr_ptr)
     {
         /* passive side */
         dapls_cr_callback ( cm_ptr,
                             IB_CME_DISCONNECTED,
                             (void * __ptr64) p_cm_dreq_rec->p_dreq_pdata,
-                           IB_DREQ_PDATA_SIZE,
+                                                       IB_DREQ_PDATA_SIZE,
                             (void *) (((DAPL_CR *) ep_ptr->cr_ptr)->sp_ptr) );
     }
     else
@@ -291,9 +303,15 @@ dapli_ib_cm_dreq_cb (
                                   IB_CME_DISCONNECTED,
                                   (void * __ptr64)
                                   p_cm_dreq_rec->p_dreq_pdata,
-                                 IB_DREQ_PDATA_SIZE,
+                                                                 IB_DREQ_PDATA_SIZE,
                                   p_cm_dreq_rec->qp_context );
     }
+    return;
+
+unlock:
+       dapl_os_unlock (&ep_ptr->header.lock);
+send_drep:
+    dapli_send_ib_cm_drep(p_cm_dreq_rec->h_cm_dreq);
 }
 
 /*
@@ -335,9 +353,8 @@ dapli_ib_cm_drep_cb (
     dapl_os_assert(cm_ptr->ib_cm.h_qp == p_cm_drep_rec->h_qp);
     
     dapl_dbg_log (DAPL_DBG_TYPE_CM, 
-       "--> DiCDpcb: EP %p state %s cm_hdl %p\n",ep_ptr,
-                  dapl_get_ep_state_str(ep_ptr->param.ep_state),
-                  cm_ptr);
+               "--> DiCDpcb: EP %p state %s cm_hdl %p\n",ep_ptr,
+               dapl_get_ep_state_str(ep_ptr->param.ep_state), cm_ptr);
 
     if ( ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECTED )
     {
@@ -353,7 +370,7 @@ dapli_ib_cm_drep_cb (
         dapls_cr_callback ( cm_ptr,
                             IB_CME_DISCONNECTED,
                            (void * __ptr64) p_cm_drep_rec->p_drep_pdata,
-                          IB_DREP_PDATA_SIZE,
+                                                  IB_DREP_PDATA_SIZE,
                            (void *) (((DAPL_CR *) ep_ptr->cr_ptr)->sp_ptr) );
     }
     else
@@ -363,7 +380,7 @@ dapli_ib_cm_drep_cb (
                                    cm_ptr,
                                    IB_CME_DISCONNECTED,
                                    (void * __ptr64) p_cm_drep_rec->p_drep_pdata,
-                                  IB_DREP_PDATA_SIZE,
+                                                                  IB_DREP_PDATA_SIZE,
                                    p_cm_drep_rec->qp_context );
     }
 }
@@ -1101,88 +1118,59 @@ DAT_RETURN
 dapls_ib_disconnect ( IN   DAPL_EP           *ep_ptr,
                       IN   DAT_CLOSE_FLAGS   disconnect_flags )
 {
-    ib_api_status_t    ib_status = IB_SUCCESS;
-    ib_cm_dreq_t       cm_dreq;
-    dp_ib_cm_handle_t  cm_ptr;
+       DAT_RETURN dat_status = DAT_SUCCESS;
+       ib_cm_dreq_t    cm_dreq;
+       dp_ib_cm_handle_t       cm_ptr;
+       ib_cm_handle_t h_cm;
+       enum dapl_ibal_cm_state state;
 
-    dapl_os_assert(ep_ptr);
+       dapl_os_assert(ep_ptr);
 
-    if ( DAPL_BAD_HANDLE(ep_ptr, DAPL_MAGIC_EP) )
-    {
-         dapl_dbg_log (DAPL_DBG_TYPE_CM,
-                  "--> %s: BAD EP Magic EP=%lx\n", __FUNCTION__,ep_ptr); 
-        return DAT_SUCCESS;
-    }
-    cm_ptr = dapl_get_cm_from_ep(ep_ptr);
-    if (!cm_ptr)
-    {
-        dapl_dbg_log (DAPL_DBG_TYPE_ERR, 
-                      "--> %s: !CM_PTR on EP %p\n", __FUNCTION__, ep_ptr);
-        return DAT_SUCCESS;
-    }
-    dapl_dbg_log (DAPL_DBG_TYPE_CM,
-       "--> %s() EP %p %s rx_drq %d tx_drq %d Close %s\n", __FUNCTION__,
-       ep_ptr, dapl_get_ep_state_str(ep_ptr->param.ep_state),
-                  ep_ptr->recv_discreq, ep_ptr->sent_discreq,
-       (disconnect_flags == DAT_CLOSE_ABRUPT_FLAG ? "Abrupt":"Graceful"));
-
-    if ( disconnect_flags == DAT_CLOSE_ABRUPT_FLAG )
-    {
-       if ( ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECTED )
-        return DAT_SUCCESS;
-
-       if ( ep_ptr->param.ep_state != DAT_EP_STATE_DISCONNECT_PENDING )
-    {
-           dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                       "%s() calling legacy_post_disconnect()\n",__FUNCTION__);
-            dapl_ep_legacy_post_disconnect(ep_ptr, disconnect_flags);
-            return DAT_SUCCESS;
+       if ( DAPL_BAD_HANDLE(ep_ptr, DAPL_MAGIC_EP) )
+       {
+               dapl_dbg_log (DAPL_DBG_TYPE_CM,
+                       "--> %s: BAD EP Magic EP=%lx\n", __FUNCTION__,ep_ptr); 
+               return DAT_SUCCESS;
        }
-    }
-
-    dapl_os_memzero(&cm_dreq, sizeof(ib_cm_dreq_t));
 
-    cm_dreq.qp_type        = IB_QPT_RELIABLE_CONN;
-    cm_dreq.h_qp           = ep_ptr->qp_handle;
-    cm_dreq.pfn_cm_drep_cb = dapli_ib_cm_drep_cb;
-    
-    /* 
-     * Currently we do not send any disconnect private data to
-     * the other endpoint because DAT 2.0 does not support it.  
-     */
-    cm_dreq.p_dreq_pdata   = NULL;
-    cm_dreq.flags          = IB_FLAGS_SYNC;
-
-    /*
-     * still need to send DREQ (disconnect request)?
-     */
-    if ( (ep_ptr->recv_discreq == DAT_FALSE)
-          && (ep_ptr->sent_discreq == DAT_FALSE)
-         && (ep_ptr->qp_state != IB_QPS_RESET) )
-    {
-        ep_ptr->sent_discreq = DAT_TRUE;
-        ib_status = ib_cm_dreq ( &cm_dreq );
-
-       if ( ib_status == IB_SUCCESS )
-           dapl_dbg_log (DAPL_DBG_TYPE_CM,
-                       "--> DsD: EP %p  DREQ SENT\n", ep_ptr);
-
-       /* tolerate INVALID_STATE error as the other side can race ahead and
-        * generate a DREQ before we do.
-        */
-       if ( ib_status == IB_INVALID_STATE || ib_status == IB_INVALID_HANDLE )
+       cm_ptr = dapl_get_cm_from_ep(ep_ptr);
+       if (!cm_ptr)
        {
-           ib_status = IB_SUCCESS;
+               dapl_dbg_log (DAPL_DBG_TYPE_ERR, 
+                       "--> %s: !CM_PTR on EP %p\n", __FUNCTION__, ep_ptr);
+               return DAT_SUCCESS;
        }
-       else if (ib_status)
+       dapl_dbg_log (DAPL_DBG_TYPE_CM,
+               "--> %s() EP %p %s Close %s\n", __FUNCTION__,
+               ep_ptr, dapl_get_ep_state_str(ep_ptr->param.ep_state),
+               (disconnect_flags == DAT_CLOSE_ABRUPT_FLAG ? "Abrupt":"Graceful"));
+
+       //if ( disconnect_flags == DAT_CLOSE_ABRUPT_FLAG )
+       //{
+       //    dapl_dbg_log(DAPL_DBG_TYPE_CM,
+       //              "%s() calling legacy_post_disconnect()\n",__FUNCTION__);
+       //      dapl_ep_legacy_post_disconnect(ep_ptr, disconnect_flags);
+       //}
+
+       dapl_os_lock(&ep_ptr->header.lock);
+       if (ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECTED)
        {
-           dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                       "%s() EP %p ib_cm_dreq() status %s\n",
-                       __FUNCTION__,ep_ptr,ib_get_err_str(ib_status));
+               dapl_os_unlock(&ep_ptr->header.lock);
+               return dat_status;
        }
-    }
-    return ib_status;
+
+       h_cm = cm_ptr->ib_cm;
+       state = cm_ptr->state;
+       cm_ptr->state = IBAL_CM_DISCONNECT;
+       dapl_os_unlock(&ep_ptr->header.lock);
+
+       if (state == IBAL_CM_DREQ)
+               dapli_send_ib_cm_drep(h_cm);
+       else
+               dapli_send_ib_cm_dreq(cm_ptr->ib_cm);
+
+       return DAT_SUCCESS;
 }
 
 
@@ -1692,19 +1680,13 @@ dapls_ib_disconnect_clean (
     }
     dapl_os_assert ( ep_ptr->header.magic == DAPL_MAGIC_EP );
     
-    ep_ptr->sent_discreq = DAT_FALSE;
-    ep_ptr->recv_discreq = DAT_FALSE;
-
-    /* 
-     * Query the QP to get the current state */
     ib_status = ib_query_qp ( ep_ptr->qp_handle, &qp_attr );
-                       
     if ( ib_status != IB_SUCCESS )
     {
            dapl_dbg_log ( DAPL_DBG_TYPE_ERR,
                        ">>>DSCONN_CLEAN(%s): Query QP failed = %s\n",
                        (active?"Act":"Pas"),ib_get_err_str(ib_status) );
-       return;
+               return;
     }
     
     ep_ptr->qp_state = qp_attr.state;
index e28bdbe47e47508b29a95c2729eeb30cf79193da..37fcbaa26db6deb6fcfac6b7891f3c239a7545a8 100644 (file)
@@ -40,6 +40,13 @@ typedef  struct ib_cm_handle           *dp_ib_cm_handle_t;
 typedef  struct ib_cm_handle           *ib_cm_srvc_handle_t;
 #else
 
+enum dapl_ibal_cm_state
+{
+       IBAL_CM_CONNECT,
+       IBAL_CM_DREQ,
+       IBAL_CM_DISCONNECT
+};
+
 /* EP-CM linking requires list_entry, protected ref counting */
 typedef struct dapl_ibal_cm
 { 
@@ -49,6 +56,7 @@ typedef struct dapl_ibal_cm
        DAT_SOCK_ADDR6          dst_ip_addr; 
        ib_cm_handle_t          ib_cm;  /* h_al, h_qp, cid */
        DAPL_EP                 *ep;
+       enum dapl_ibal_cm_state state;
 
 } dapl_ibal_cm_t;
 
index a522f150910297af5e167780818be8b30988e6db..68d0ea4207e5563ef126dad2d680830e6827a374 100755 (executable)
@@ -466,10 +466,6 @@ struct dapl_ep
     struct io_buf_track *ibt_base;
     DAPL_RING_BUFFER   ibt_queue;
 #endif /* DAPL_DBG_IO_TRC */
-#if defined(_WIN32) || defined(_WIN64)
-    DAT_BOOLEAN         recv_discreq;
-    DAT_BOOLEAN         sent_discreq;
-#endif
 #ifdef DAPL_COUNTERS
     void               *cntrs;
 #endif