]> git.openfabrics.org - ~shefty/rdma-win.git/commitdiff
[IBAL] ib/cm: fix handling failed send completions
authorStan Smith <stan.smith@intel.com>
Tue, 5 Jan 2010 18:59:21 +0000 (18:59 +0000)
committerStan Smith <stan.smith@intel.com>
Tue, 5 Jan 2010 18:59:21 +0000 (18:59 +0000)
__cep_mad_send_cb() assumes that the mad being processed is
associated with the current state of the CEP.  This may not be
the case.
For example, for a short lived connection, it was observed that
a REP mad completed with status canceled.  This is normal.  However,
the user already attempted to disconnect the connection by sending
a DREQ.  This left the cep in the DREQ_SENT state by the time that
the REP mad completed.  Since the REP status was not success, but the
state was DREQ_SENT, the code assumed that the DREQ had failed and
transitioned the cep into TIMEWAIT.  The result is that the DREQ is never
>matched with a DREP or canceled, but holds a reference on the CEP.
Until the DREQ times out (time depends on connection, but easily
up to a minute), attempts to destroy the CEP are blocked.
Fix this by simply discarding any completed sends that were not
sent from the current state of the cep when the completion handler
is invoked.
Signed-off-by: Sean Hefty <sean.hefty@intel.com>
git-svn-id: svn://openib.tc.cornell.edu/gen1@2652 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86

branches/WOF2-2/core/al/kernel/al_cm.c
branches/WOF2-2/core/al/kernel/al_cm_cep.c

index 48b0cb56d7866f848b2116c4c8b3f13533c745d8..955985a346cd4d8b83f0f6ee2a3a2fda22080a52 100644 (file)
@@ -37,7 +37,7 @@
 typedef struct _iba_cm_id_priv\r
 {\r
        iba_cm_id       id;\r
-       KEVENT          destroy_event;  \r
+       KEVENT          destroy_event;\r
 \r
 }      iba_cm_id_priv;\r
 \r
index 49fa4174cef3309084c8269adf1d5e6ad6a2852e..89ffe126c2279ea35f9cf769cbfb56cc81f64960 100644 (file)
@@ -2213,10 +2213,7 @@ __cep_mad_send_cb(
 \r
        p_cep = (kcep_t*)p_mad->context1;\r
 \r
-       /*\r
-        * The connection context is not set when performing immediate responses,\r
-        * such as repeating MADS.\r
-        */\r
+       /* The cep context is only set for MADs that are retried. */\r
        if( !p_cep )\r
        {\r
                ib_put_mad( p_mad );\r
@@ -2224,94 +2221,71 @@ __cep_mad_send_cb(
                return;\r
        }\r
 \r
+       CL_ASSERT( p_mad->status != IB_WCS_SUCCESS );\r
        p_mad->context1 = NULL;\r
 \r
        KeAcquireInStackQueuedSpinLockAtDpcLevel( &gp_cep_mgr->lock, &hdl );\r
-       /* Clear the sent MAD pointer so that we don't try cancelling again. */\r
-       if( p_cep->p_send_mad == p_mad )\r
-               p_cep->p_send_mad = NULL;\r
-\r
-       switch( p_mad->status )\r
+       if( p_cep->p_send_mad != p_mad )\r
        {\r
-       case IB_WCS_SUCCESS:\r
                KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl );\r
                ib_put_mad( p_mad );\r
-               break;\r
-\r
-       case IB_WCS_CANCELED:\r
-               if( p_cep->state != CEP_STATE_REQ_SENT &&\r
-                       p_cep->state != CEP_STATE_REQ_MRA_RCVD &&\r
-                       p_cep->state != CEP_STATE_REP_SENT &&\r
-                       p_cep->state != CEP_STATE_REP_MRA_RCVD &&\r
-                       p_cep->state != CEP_STATE_LAP_SENT &&\r
-                       p_cep->state != CEP_STATE_LAP_MRA_RCVD &&\r
-                       p_cep->state != CEP_STATE_DREQ_SENT &&\r
-                       p_cep->state != CEP_STATE_SREQ_SENT )\r
-               {\r
-                       KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl );\r
-                       ib_put_mad( p_mad );\r
-                       break;\r
-               }\r
-               /* Treat as a timeout so we don't stall the state machine. */\r
-               p_mad->status = IB_WCS_TIMEOUT_RETRY_ERR;\r
-\r
-               /* Fall through. */\r
-       case IB_WCS_TIMEOUT_RETRY_ERR:\r
-       default:\r
-               /* Timeout.  Reject the connection. */\r
-               switch( p_cep->state )\r
-               {\r
-               case CEP_STATE_REQ_SENT:\r
-               case CEP_STATE_REQ_MRA_RCVD:\r
-               case CEP_STATE_REP_SENT:\r
-               case CEP_STATE_REP_MRA_RCVD:\r
-                       /* Send the REJ. */\r
-                       __reject_timeout( p_port_cep, p_cep, p_mad );\r
-                       __remove_cep( p_cep );\r
-                       p_cep->state = CEP_STATE_IDLE;\r
-                       break;\r
-\r
-               case CEP_STATE_DREQ_DESTROY:\r
-                       p_cep->state = CEP_STATE_DESTROY;\r
-                       __insert_timewait( p_cep );\r
-                       /* Fall through. */\r
+               goto done;\r
+       }\r
 \r
-               case CEP_STATE_DESTROY:\r
-                       KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl );\r
-                       ib_put_mad( p_mad );\r
-                       goto done;\r
+       /* Clear the sent MAD pointer so that we don't try cancelling again. */\r
+       p_cep->p_send_mad = NULL;\r
 \r
-               case CEP_STATE_DREQ_SENT:\r
-                       /*\r
-                        * Make up a DREP mad so we can respond if we receive\r
-                        * a DREQ while in timewait.\r
-                        */\r
-                       __format_mad_hdr( &p_cep->mads.drep.hdr, p_cep, CM_DREP_ATTR_ID );\r
-                       __format_drep( p_cep, NULL, 0, &p_cep->mads.drep );\r
-                       p_cep->state = CEP_STATE_TIMEWAIT;\r
-                       __insert_timewait( p_cep );\r
-                       break;\r
+       switch( p_cep->state )\r
+       {\r
+       case CEP_STATE_REQ_SENT:\r
+       case CEP_STATE_REQ_MRA_RCVD:\r
+       case CEP_STATE_REP_SENT:\r
+       case CEP_STATE_REP_MRA_RCVD:\r
+               /* Send the REJ. */\r
+               __reject_timeout( p_port_cep, p_cep, p_mad );\r
+               __remove_cep( p_cep );\r
+               p_cep->state = CEP_STATE_IDLE;\r
+               break;\r
 \r
-               case CEP_STATE_LAP_SENT:\r
-                       /*\r
-                        * Before CEP was sent, we have been in CEP_STATE_ESTABLISHED as we\r
-                        * failed to send, we return to that state.\r
-                        */\r
-                       p_cep->state = CEP_STATE_ESTABLISHED;\r
-                       break;\r
-               default:\r
-                       break;\r
-               }\r
+       case CEP_STATE_DREQ_DESTROY:\r
+               p_cep->state = CEP_STATE_DESTROY;\r
+               __insert_timewait( p_cep );\r
+               /* Fall through. */\r
 \r
-               status = __cep_queue_mad( p_cep, p_mad );\r
-               CL_ASSERT( status != IB_INVALID_STATE );\r
+       case CEP_STATE_DESTROY:\r
                KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl );\r
+               ib_put_mad( p_mad );\r
+               goto done;\r
 \r
-               if( status == IB_SUCCESS )\r
-                       __process_cep( p_cep );\r
+       case CEP_STATE_DREQ_SENT:\r
+               /*\r
+                * Make up a DREP mad so we can respond if we receive\r
+                * a DREQ while in timewait.\r
+                */\r
+               __format_mad_hdr( &p_cep->mads.drep.hdr, p_cep, CM_DREP_ATTR_ID );\r
+               __format_drep( p_cep, NULL, 0, &p_cep->mads.drep );\r
+               p_cep->state = CEP_STATE_TIMEWAIT;\r
+               __insert_timewait( p_cep );\r
+               break;\r
+\r
+       case CEP_STATE_LAP_SENT:\r
+               /*\r
+                * Before CEP was sent, we have been in CEP_STATE_ESTABLISHED as we\r
+                * failed to send, we return to that state.\r
+                */\r
+               p_cep->state = CEP_STATE_ESTABLISHED;\r
+               break;\r
+       default:\r
                break;\r
        }\r
 \r
+       status = __cep_queue_mad( p_cep, p_mad );\r
+       CL_ASSERT( status != IB_INVALID_STATE );\r
+       KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl );\r
+\r
+       if( status == IB_SUCCESS )\r
+               __process_cep( p_cep );\r
+\r
 done:\r
        pfn_destroy_cb = p_cep->pfn_destroy_cb;\r
        cep_context = p_cep->context;\r
@@ -3938,12 +3912,8 @@ __cleanup_cep(
        CL_ASSERT( KeGetCurrentIrql() == DISPATCH_LEVEL );\r
 \r
        /* If we've already come through here, we're done. */\r
-       if( p_cep->state == CEP_STATE_DESTROY ||\r
-               p_cep->state == CEP_STATE_DREQ_DESTROY )\r
-       {\r
-               AL_EXIT( AL_DBG_CM );\r
-               return -1;\r
-       }\r
+       CL_ASSERT( p_cep->state != CEP_STATE_DESTROY &&\r
+               p_cep->state != CEP_STATE_DREQ_DESTROY );\r
 \r
        /* Cleanup the pending MAD list. */\r
        while( p_cep->p_mad_head )\r