From 4eda455d9bc80c35743b3a2f6773e6c4a500affc Mon Sep 17 00:00:00 2001 From: Arlin Davis Date: Wed, 22 Sep 2010 10:35:24 -0700 Subject: [PATCH] ibal: delay QP transition during disconnect phase The ibal provider calls ib_cm_drep in response to receiving a dreq. The result is that the user's QP is transitioned through the error state, which fails any outstanding send operations and flushes all receives. The disconnect request is then reported to the user. Since a user can receive errors from the QP before they are aware of a pending disconnect request, the application may respond to the errors as, well, actual errors. Fix this by delaying the QP transition until the user responds to the dreq. Signed-off-by: Sean Hefty --- --- dapl/ibal/dapl_ibal_cm.c | 194 ++++++++++++++++----------------------- dapl/include/dapl.h | 4 - 2 files changed, 79 insertions(+), 119 deletions(-) diff --git a/dapl/ibal/dapl_ibal_cm.c b/dapl/ibal/dapl_ibal_cm.c index e3c12ff..da445e9 100644 --- a/dapl/ibal/dapl_ibal_cm.c +++ b/dapl/ibal/dapl_ibal_cm.c @@ -46,6 +46,9 @@ int g_dapl_loopback_connection = 0; extern dapl_ibal_root_t dapl_ibal_root; +static void dapli_ib_cm_drep_cb ( + IN ib_cm_drep_rec_t *p_cm_drep_rec ); + /* * Prototypes */ @@ -192,6 +195,28 @@ dapli_ib_cm_lap_cb ( "--> DiCLcb: CM callback LAP (Load Alternate Path)\n"); } +static DAT_RETURN dapli_send_ib_cm_dreq(ib_cm_handle_t h_cm) +{ + ib_cm_dreq_t cm_dreq; + + dapl_os_memzero(&cm_dreq, sizeof(ib_cm_dreq_t)); + + cm_dreq.flags = IB_FLAGS_SYNC; + cm_dreq.qp_type = IB_QPT_RELIABLE_CONN; + cm_dreq.h_qp = h_cm.h_qp; + cm_dreq.pfn_cm_drep_cb = dapli_ib_cm_drep_cb; + + return dapl_ib_status_convert(ib_cm_dreq(&cm_dreq)); +} + +static DAT_RETURN dapli_send_ib_cm_drep(ib_cm_handle_t h_cm) +{ + ib_cm_drep_t cm_drep; + + dapl_os_memzero(&cm_drep, sizeof(ib_cm_drep_t)); + return dapl_ib_status_convert(ib_cm_drep(h_cm, &cm_drep)); +} + /* * Connection Disconnect Request callback * We received a DREQ, return a DREP (disconnect reply). @@ -201,9 +226,7 @@ static void dapli_ib_cm_dreq_cb ( IN ib_cm_dreq_rec_t *p_cm_dreq_rec ) { - ib_cm_drep_t cm_drep; DAPL_EP *ep_ptr; - int bail=10; dp_ib_cm_handle_t cm_ptr; dapl_os_assert (p_cm_dreq_rec); @@ -213,25 +236,25 @@ dapli_ib_cm_dreq_cb ( { dapl_dbg_log (DAPL_DBG_TYPE_ERR, "--> %s: BAD_PTR EP %lx\n", __FUNCTION__, ep_ptr); - return; + goto send_drep; } if ( ep_ptr->header.magic != DAPL_MAGIC_EP ) { if ( ep_ptr->header.magic == DAPL_MAGIC_INVALID ) - return; + goto send_drep; dapl_dbg_log (DAPL_DBG_TYPE_ERR, "--> %s: EP %p BAD_EP_MAGIC %x != wanted %x\n", __FUNCTION__, ep_ptr, ep_ptr->header.magic, DAPL_MAGIC_EP ); - return; + goto send_drep; } cm_ptr = dapl_get_cm_from_ep(ep_ptr); if (!cm_ptr) { dapl_dbg_log (DAPL_DBG_TYPE_ERR, "--> %s: !CM_PTR on EP %p\n", __FUNCTION__, ep_ptr); - return; + goto send_drep; } dapl_os_assert(cm_ptr->ib_cm.h_qp == p_cm_dreq_rec->h_cm_dreq.h_qp); @@ -242,45 +265,25 @@ dapli_ib_cm_dreq_cb ( (ep_ptr->sent_discreq == DAT_TRUE ? "TRUE":"FALSE")); dapl_os_lock (&ep_ptr->header.lock); - if ( ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECTED - /*|| ( ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECT_PENDING - && ep_ptr->sent_discreq == DAT_TRUE)*/ ) + if ( ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECTED ) { - dapl_os_unlock (&ep_ptr->header.lock); dapl_dbg_log (DAPL_DBG_TYPE_CM, "--> DiCDcb: EP %lx QP %lx already Disconnected\n", ep_ptr, ep_ptr->qp_handle); - return; + goto unlock; } ep_ptr->param.ep_state = DAT_EP_STATE_DISCONNECT_PENDING; - ep_ptr->recv_discreq = DAT_TRUE; + cm_ptr->ib_cm = p_cm_dreq_rec->h_cm_dreq; dapl_os_unlock (&ep_ptr->header.lock); - dapl_os_memzero (&cm_drep, sizeof(ib_cm_drep_t)); - - /* Could fail if we received reply from other side, no need to retry */ - /* Wait for any send ops in process holding reference */ - while (dapls_cb_pending(&ep_ptr->req_buffer) && bail-- > 0 ) - { - dapl_dbg_log (DAPL_DBG_TYPE_CM, - "--> DiCDcb: WAIT for EP=%lx req_count(%d) != 0\n", - ep_ptr, dapls_cb_pending(&ep_ptr->req_buffer)); - dapl_os_sleep_usec (100); - } - - ib_cm_drep (p_cm_dreq_rec->h_cm_dreq, &cm_drep); - - /* CM puts QP in reset state */ - ep_ptr->qp_state = IB_QPS_RESET; - if (ep_ptr->cr_ptr) { /* passive side */ dapls_cr_callback ( cm_ptr, IB_CME_DISCONNECTED, (void * __ptr64) p_cm_dreq_rec->p_dreq_pdata, - IB_DREQ_PDATA_SIZE, + IB_DREQ_PDATA_SIZE, (void *) (((DAPL_CR *) ep_ptr->cr_ptr)->sp_ptr) ); } else @@ -291,9 +294,15 @@ dapli_ib_cm_dreq_cb ( IB_CME_DISCONNECTED, (void * __ptr64) p_cm_dreq_rec->p_dreq_pdata, - IB_DREQ_PDATA_SIZE, + IB_DREQ_PDATA_SIZE, p_cm_dreq_rec->qp_context ); } + return; + +unlock: + dapl_os_unlock (&ep_ptr->header.lock); +send_drep: + dapli_send_ib_cm_drep(p_cm_dreq_rec->h_cm_dreq); } /* @@ -335,9 +344,8 @@ dapli_ib_cm_drep_cb ( dapl_os_assert(cm_ptr->ib_cm.h_qp == p_cm_drep_rec->h_qp); dapl_dbg_log (DAPL_DBG_TYPE_CM, - "--> DiCDpcb: EP %p state %s cm_hdl %p\n",ep_ptr, - dapl_get_ep_state_str(ep_ptr->param.ep_state), - cm_ptr); + "--> DiCDpcb: EP %p state %s cm_hdl %p\n",ep_ptr, + dapl_get_ep_state_str(ep_ptr->param.ep_state), cm_ptr); if ( ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECTED ) { @@ -353,7 +361,7 @@ dapli_ib_cm_drep_cb ( dapls_cr_callback ( cm_ptr, IB_CME_DISCONNECTED, (void * __ptr64) p_cm_drep_rec->p_drep_pdata, - IB_DREP_PDATA_SIZE, + IB_DREP_PDATA_SIZE, (void *) (((DAPL_CR *) ep_ptr->cr_ptr)->sp_ptr) ); } else @@ -363,7 +371,7 @@ dapli_ib_cm_drep_cb ( cm_ptr, IB_CME_DISCONNECTED, (void * __ptr64) p_cm_drep_rec->p_drep_pdata, - IB_DREP_PDATA_SIZE, + IB_DREP_PDATA_SIZE, p_cm_drep_rec->qp_context ); } } @@ -1101,88 +1109,50 @@ DAT_RETURN dapls_ib_disconnect ( IN DAPL_EP *ep_ptr, IN DAT_CLOSE_FLAGS disconnect_flags ) { - ib_api_status_t ib_status = IB_SUCCESS; - ib_cm_dreq_t cm_dreq; - dp_ib_cm_handle_t cm_ptr; + DAT_RETURN dat_status = DAT_SUCCESS; + ib_cm_dreq_t cm_dreq; + dp_ib_cm_handle_t cm_ptr; - dapl_os_assert(ep_ptr); + dapl_os_assert(ep_ptr); - if ( DAPL_BAD_HANDLE(ep_ptr, DAPL_MAGIC_EP) ) - { - dapl_dbg_log (DAPL_DBG_TYPE_CM, - "--> %s: BAD EP Magic EP=%lx\n", __FUNCTION__,ep_ptr); - return DAT_SUCCESS; - } - cm_ptr = dapl_get_cm_from_ep(ep_ptr); - if (!cm_ptr) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> %s: !CM_PTR on EP %p\n", __FUNCTION__, ep_ptr); - return DAT_SUCCESS; - } - - dapl_dbg_log (DAPL_DBG_TYPE_CM, - "--> %s() EP %p %s rx_drq %d tx_drq %d Close %s\n", __FUNCTION__, - ep_ptr, dapl_get_ep_state_str(ep_ptr->param.ep_state), - ep_ptr->recv_discreq, ep_ptr->sent_discreq, - (disconnect_flags == DAT_CLOSE_ABRUPT_FLAG ? "Abrupt":"Graceful")); + if ( DAPL_BAD_HANDLE(ep_ptr, DAPL_MAGIC_EP) ) + { + dapl_dbg_log (DAPL_DBG_TYPE_CM, + "--> %s: BAD EP Magic EP=%lx\n", __FUNCTION__,ep_ptr); + return DAT_SUCCESS; + } - if ( disconnect_flags == DAT_CLOSE_ABRUPT_FLAG ) - { - if ( ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECTED ) - return DAT_SUCCESS; + cm_ptr = dapl_get_cm_from_ep(ep_ptr); + if (!cm_ptr) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> %s: !CM_PTR on EP %p\n", __FUNCTION__, ep_ptr); + return DAT_SUCCESS; + } + + dapl_dbg_log (DAPL_DBG_TYPE_CM, + "--> %s() EP %p %s Close %s\n", __FUNCTION__, + ep_ptr, dapl_get_ep_state_str(ep_ptr->param.ep_state), + (disconnect_flags == DAT_CLOSE_ABRUPT_FLAG ? "Abrupt":"Graceful")); - if ( ep_ptr->param.ep_state != DAT_EP_STATE_DISCONNECT_PENDING ) - { + if ( disconnect_flags == DAT_CLOSE_ABRUPT_FLAG ) + { dapl_dbg_log(DAPL_DBG_TYPE_CM, "%s() calling legacy_post_disconnect()\n",__FUNCTION__); - dapl_ep_legacy_post_disconnect(ep_ptr, disconnect_flags); - return DAT_SUCCESS; + dapl_ep_legacy_post_disconnect(ep_ptr, disconnect_flags); } - } - - dapl_os_memzero(&cm_dreq, sizeof(ib_cm_dreq_t)); - cm_dreq.qp_type = IB_QPT_RELIABLE_CONN; - cm_dreq.h_qp = ep_ptr->qp_handle; - cm_dreq.pfn_cm_drep_cb = dapli_ib_cm_drep_cb; - - /* - * Currently we do not send any disconnect private data to - * the other endpoint because DAT 2.0 does not support it. - */ - cm_dreq.p_dreq_pdata = NULL; - cm_dreq.flags = IB_FLAGS_SYNC; - - /* - * still need to send DREQ (disconnect request)? - */ - if ( (ep_ptr->recv_discreq == DAT_FALSE) - && (ep_ptr->sent_discreq == DAT_FALSE) - && (ep_ptr->qp_state != IB_QPS_RESET) ) - { - ep_ptr->sent_discreq = DAT_TRUE; - ib_status = ib_cm_dreq ( &cm_dreq ); + dapl_os_lock(&ep_ptr->header.lock); + if ( ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECTED ) + goto out; - if ( ib_status == IB_SUCCESS ) - dapl_dbg_log (DAPL_DBG_TYPE_CM, - "--> DsD: EP %p DREQ SENT\n", ep_ptr); + dat_status = dapli_send_ib_cm_dreq(cm_ptr->ib_cm); + if (dat_status != DAT_SUCCESS) + dat_status = dapli_send_ib_cm_drep(cm_ptr->ib_cm); - /* tolerate INVALID_STATE error as the other side can race ahead and - * generate a DREQ before we do. - */ - if ( ib_status == IB_INVALID_STATE || ib_status == IB_INVALID_HANDLE ) - { - ib_status = IB_SUCCESS; - } - else if (ib_status) - { - dapl_dbg_log(DAPL_DBG_TYPE_ERR, - "%s() EP %p ib_cm_dreq() status %s\n", - __FUNCTION__,ep_ptr,ib_get_err_str(ib_status)); - } - } - return ib_status; +out: + dapl_os_unlock(&ep_ptr->header.lock); + return dat_status; } @@ -1692,19 +1662,13 @@ dapls_ib_disconnect_clean ( } dapl_os_assert ( ep_ptr->header.magic == DAPL_MAGIC_EP ); - ep_ptr->sent_discreq = DAT_FALSE; - ep_ptr->recv_discreq = DAT_FALSE; - - /* - * Query the QP to get the current state */ ib_status = ib_query_qp ( ep_ptr->qp_handle, &qp_attr ); - if ( ib_status != IB_SUCCESS ) { dapl_dbg_log ( DAPL_DBG_TYPE_ERR, ">>>DSCONN_CLEAN(%s): Query QP failed = %s\n", (active?"Act":"Pas"),ib_get_err_str(ib_status) ); - return; + return; } ep_ptr->qp_state = qp_attr.state; diff --git a/dapl/include/dapl.h b/dapl/include/dapl.h index a522f15..68d0ea4 100755 --- a/dapl/include/dapl.h +++ b/dapl/include/dapl.h @@ -466,10 +466,6 @@ struct dapl_ep struct io_buf_track *ibt_base; DAPL_RING_BUFFER ibt_queue; #endif /* DAPL_DBG_IO_TRC */ -#if defined(_WIN32) || defined(_WIN64) - DAT_BOOLEAN recv_discreq; - DAT_BOOLEAN sent_discreq; -#endif #ifdef DAPL_COUNTERS void *cntrs; #endif -- 2.41.0