From: Sean Hefty Date: Mon, 4 Jan 2010 19:01:09 +0000 (+0000) Subject: ib/cm: fix handling failed send completions X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=0b13fc88a49309b858bcf71a2eb8eb9815081813;p=~shefty%2Frdma-win.git ib/cm: fix handling failed send completions __cep_mad_send_cb() assumes that the mad being processed is associated with the current state of the CEP. This may not be the case. For example, for a short lived connection, it was observed that a REP mad completed with status canceled. This is normal. However, the user already attempted to disconnect the connection by sending a DREQ. This left the cep in the DREQ_SENT state by the time that the REP mad completed. Since the REP failed, but the state was DREQ_SENT, the code assumed that the DREQ had failed and transitioned the cep into TIMEWAIT. The result is that the DREQ is never matched with a DREP or canceled, but holds a reference on the CEP. Until the DREQ times out (time depends on connection, but easily up to a minute), attempts to destroy the CEP are blocked. Fix this by simply discarding any completed sends that were not sent from the current state of the cep when the completion handler is invoked. Signed-off-by: Sean Hefty git-svn-id: svn://openib.tc.cornell.edu/gen1@2650 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86 --- diff --git a/trunk/core/al/kernel/al_cm.c b/trunk/core/al/kernel/al_cm.c index 48b0cb56..955985a3 100644 --- a/trunk/core/al/kernel/al_cm.c +++ b/trunk/core/al/kernel/al_cm.c @@ -37,7 +37,7 @@ typedef struct _iba_cm_id_priv { iba_cm_id id; - KEVENT destroy_event; + KEVENT destroy_event; } iba_cm_id_priv; diff --git a/trunk/core/al/kernel/al_cm_cep.c b/trunk/core/al/kernel/al_cm_cep.c index 49fa4174..49872074 100644 --- a/trunk/core/al/kernel/al_cm_cep.c +++ b/trunk/core/al/kernel/al_cm_cep.c @@ -27,7 +27,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * - * $Id$ + * $Id: al_cm_cep.c 2540 2009-11-03 17:23:09Z shefty $ */ @@ -2213,10 +2213,7 @@ __cep_mad_send_cb( p_cep = (kcep_t*)p_mad->context1; - /* - * The connection context is not set when performing immediate responses, - * such as repeating MADS. - */ + /* The cep context is only set for MADs that are retried. */ if( !p_cep ) { ib_put_mad( p_mad ); @@ -2224,94 +2221,71 @@ __cep_mad_send_cb( return; } + CL_ASSERT( p_mad->status != IB_WCS_SUCCESS ); p_mad->context1 = NULL; KeAcquireInStackQueuedSpinLockAtDpcLevel( &gp_cep_mgr->lock, &hdl ); - /* Clear the sent MAD pointer so that we don't try cancelling again. */ - if( p_cep->p_send_mad == p_mad ) - p_cep->p_send_mad = NULL; - - switch( p_mad->status ) + if( p_cep->p_send_mad != p_mad ) { - case IB_WCS_SUCCESS: KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl ); ib_put_mad( p_mad ); - break; - - case IB_WCS_CANCELED: - if( p_cep->state != CEP_STATE_REQ_SENT && - p_cep->state != CEP_STATE_REQ_MRA_RCVD && - p_cep->state != CEP_STATE_REP_SENT && - p_cep->state != CEP_STATE_REP_MRA_RCVD && - p_cep->state != CEP_STATE_LAP_SENT && - p_cep->state != CEP_STATE_LAP_MRA_RCVD && - p_cep->state != CEP_STATE_DREQ_SENT && - p_cep->state != CEP_STATE_SREQ_SENT ) - { - KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl ); - ib_put_mad( p_mad ); - break; - } - /* Treat as a timeout so we don't stall the state machine. */ - p_mad->status = IB_WCS_TIMEOUT_RETRY_ERR; - - /* Fall through. */ - case IB_WCS_TIMEOUT_RETRY_ERR: - default: - /* Timeout. Reject the connection. */ - switch( p_cep->state ) - { - case CEP_STATE_REQ_SENT: - case CEP_STATE_REQ_MRA_RCVD: - case CEP_STATE_REP_SENT: - case CEP_STATE_REP_MRA_RCVD: - /* Send the REJ. */ - __reject_timeout( p_port_cep, p_cep, p_mad ); - __remove_cep( p_cep ); - p_cep->state = CEP_STATE_IDLE; - break; - - case CEP_STATE_DREQ_DESTROY: - p_cep->state = CEP_STATE_DESTROY; - __insert_timewait( p_cep ); - /* Fall through. */ + goto done; + } - case CEP_STATE_DESTROY: - KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl ); - ib_put_mad( p_mad ); - goto done; + /* Clear the sent MAD pointer so that we don't try cancelling again. */ + p_cep->p_send_mad = NULL; - case CEP_STATE_DREQ_SENT: - /* - * Make up a DREP mad so we can respond if we receive - * a DREQ while in timewait. - */ - __format_mad_hdr( &p_cep->mads.drep.hdr, p_cep, CM_DREP_ATTR_ID ); - __format_drep( p_cep, NULL, 0, &p_cep->mads.drep ); - p_cep->state = CEP_STATE_TIMEWAIT; - __insert_timewait( p_cep ); - break; + switch( p_cep->state ) + { + case CEP_STATE_REQ_SENT: + case CEP_STATE_REQ_MRA_RCVD: + case CEP_STATE_REP_SENT: + case CEP_STATE_REP_MRA_RCVD: + /* Send the REJ. */ + __reject_timeout( p_port_cep, p_cep, p_mad ); + __remove_cep( p_cep ); + p_cep->state = CEP_STATE_IDLE; + break; - case CEP_STATE_LAP_SENT: - /* - * Before CEP was sent, we have been in CEP_STATE_ESTABLISHED as we - * failed to send, we return to that state. - */ - p_cep->state = CEP_STATE_ESTABLISHED; - break; - default: - break; - } + case CEP_STATE_DREQ_DESTROY: + p_cep->state = CEP_STATE_DESTROY; + __insert_timewait( p_cep ); + /* Fall through. */ - status = __cep_queue_mad( p_cep, p_mad ); - CL_ASSERT( status != IB_INVALID_STATE ); + case CEP_STATE_DESTROY: KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl ); + ib_put_mad( p_mad ); + goto done; - if( status == IB_SUCCESS ) - __process_cep( p_cep ); + case CEP_STATE_DREQ_SENT: + /* + * Make up a DREP mad so we can respond if we receive + * a DREQ while in timewait. + */ + __format_mad_hdr( &p_cep->mads.drep.hdr, p_cep, CM_DREP_ATTR_ID ); + __format_drep( p_cep, NULL, 0, &p_cep->mads.drep ); + p_cep->state = CEP_STATE_TIMEWAIT; + __insert_timewait( p_cep ); + break; + + case CEP_STATE_LAP_SENT: + /* + * Before CEP was sent, we have been in CEP_STATE_ESTABLISHED as we + * failed to send, we return to that state. + */ + p_cep->state = CEP_STATE_ESTABLISHED; + break; + default: break; } + status = __cep_queue_mad( p_cep, p_mad ); + CL_ASSERT( status != IB_INVALID_STATE ); + KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl ); + + if( status == IB_SUCCESS ) + __process_cep( p_cep ); + done: pfn_destroy_cb = p_cep->pfn_destroy_cb; cep_context = p_cep->context; @@ -3938,12 +3912,8 @@ __cleanup_cep( CL_ASSERT( KeGetCurrentIrql() == DISPATCH_LEVEL ); /* If we've already come through here, we're done. */ - if( p_cep->state == CEP_STATE_DESTROY || - p_cep->state == CEP_STATE_DREQ_DESTROY ) - { - AL_EXIT( AL_DBG_CM ); - return -1; - } + CL_ASSERT( p_cep->state != CEP_STATE_DESTROY && + p_cep->state != CEP_STATE_DREQ_DESTROY ); /* Cleanup the pending MAD list. */ while( p_cep->p_mad_head )