From 7260c91cdf387ce5a15c01788daadd40dac6acaf Mon Sep 17 00:00:00 2001 From: ftillier Date: Fri, 7 Apr 2006 04:22:58 +0000 Subject: [PATCH] [IBAL] Fix handling of stale connections. git-svn-id: svn://openib.tc.cornell.edu/gen1@290 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86 --- trunk/core/al/kernel/al_cm_cep.c | 325 +++++++++++++++++++------------ 1 file changed, 204 insertions(+), 121 deletions(-) diff --git a/trunk/core/al/kernel/al_cm_cep.c b/trunk/core/al/kernel/al_cm_cep.c index 1667b4f7..7e8e3d02 100644 --- a/trunk/core/al/kernel/al_cm_cep.c +++ b/trunk/core/al/kernel/al_cm_cep.c @@ -456,6 +456,13 @@ static inline void __insert_timewait( IN kcep_t* const p_cep ); +static ib_api_status_t +__cep_get_mad( + IN kcep_t* const p_cep, + IN net16_t attr_id, + OUT cep_agent_t** const pp_port_cep, + OUT ib_mad_element_t** const pp_mad ); + static ib_api_status_t __cep_send_mad( IN cep_agent_t* const p_port_cep, @@ -863,14 +870,155 @@ __repeat_mad( } +static ib_api_status_t +__process_rej( + IN kcep_t* const p_cep, + IN ib_mad_element_t* const p_mad ) +{ + ib_api_status_t status; + mad_cm_rej_t *p_rej; + + AL_ENTER( AL_DBG_CM ); + + ASSERT( p_cep ); + ASSERT( p_mad ); + ASSERT( p_mad->p_mad_buf ); + + p_rej = (mad_cm_rej_t*)p_mad->p_mad_buf; + + switch( p_cep->state ) + { + case CEP_STATE_REQ_SENT: + /* + * Ignore rejects with the status set to IB_REJ_INVALID_SID. We will + * continue to retry (up to max_cm_retries) to connect to the remote + * side. This is required to support peer-to-peer connections and + * clients that try to connect before the server comes up. + */ + if( p_rej->reason == IB_REJ_INVALID_SID ) + { + AL_TRACE( AL_DBG_CM, + ("Request rejected (invalid SID) - retrying.\n") ); + goto err1; + } + + /* Fall through */ + case CEP_STATE_REP_SENT: + case CEP_STATE_REQ_MRA_RCVD: + case CEP_STATE_REP_MRA_RCVD: + /* Cancel any outstanding MAD. */ + if( p_cep->p_send_mad ) + { + ib_cancel_mad( p_cep->h_mad_svc, p_cep->p_send_mad ); + p_cep->p_send_mad = NULL; + } + + /* Fall through */ + case CEP_STATE_REQ_RCVD: + case CEP_STATE_REP_RCVD: + case CEP_STATE_REQ_MRA_SENT: + case CEP_STATE_REP_MRA_SENT: + case CEP_STATE_PRE_REP: + case CEP_STATE_PRE_REP_MRA_SENT: + if( p_cep->state & CEP_STATE_PREP ) + { + CL_ASSERT( p_cep->p_mad ); + ib_put_mad( p_cep->p_mad ); + p_cep->p_mad = NULL; + } + /* Abort connection establishment. No transition to timewait. */ + __remove_cep( p_cep ); + p_cep->state = CEP_STATE_IDLE; + break; + + case CEP_STATE_ESTABLISHED: + case CEP_STATE_LAP_RCVD: + case CEP_STATE_LAP_SENT: + case CEP_STATE_LAP_MRA_RCVD: + case CEP_STATE_LAP_MRA_SENT: + case CEP_STATE_PRE_APR: + case CEP_STATE_PRE_APR_MRA_SENT: + if( p_cep->state & CEP_STATE_PREP ) + { + CL_ASSERT( p_cep->p_mad ); + ib_put_mad( p_cep->p_mad ); + p_cep->p_mad = NULL; + } + p_cep->state = CEP_STATE_TIMEWAIT; + __insert_timewait( p_cep ); + break; + + default: + /* Ignore the REJ. */ + AL_TRACE( AL_DBG_CM, ("REJ received in invalid state.\n") ); +err1: + ib_put_mad( p_mad ); + AL_EXIT( AL_DBG_CM ); + return IB_NO_MATCH; + } + + status = __cep_queue_mad( p_cep, p_mad ); + + AL_EXIT( AL_DBG_CM ); + return status; +} + + +static ib_api_status_t +__process_stale( + IN kcep_t* const p_cep ) +{ + ib_api_status_t status; + cep_agent_t *p_port_cep; + ib_mad_element_t *p_mad; + mad_cm_rej_t *p_rej; + + status = __cep_get_mad( p_cep, CM_REJ_ATTR_ID, &p_port_cep, &p_mad ); + if( status != IB_SUCCESS ) + return status; + + p_rej = ib_get_mad_buf( p_mad ); + + conn_rej_set_ari( NULL, 0, p_rej ); + conn_rej_set_pdata( NULL, 0, p_rej ); + + p_rej->local_comm_id = p_cep->remote_comm_id; + p_rej->remote_comm_id = p_cep->local_comm_id; + p_rej->reason = IB_REJ_STALE_CONN; + + switch( p_cep->state ) + { + case CEP_STATE_REQ_RCVD: + case CEP_STATE_REQ_MRA_SENT: + case CEP_STATE_PRE_REP: + case CEP_STATE_PRE_REP_MRA_SENT: + conn_rej_set_msg_rejected( 0, p_rej ); + break; + + case CEP_STATE_REQ_SENT: + case CEP_STATE_REP_RCVD: + case CEP_STATE_REP_MRA_SENT: + conn_rej_set_msg_rejected( 1, p_rej ); + break; + + default: + conn_rej_set_msg_rejected( 2, p_rej ); + break; + } + conn_rej_clr_rsvd_fields( p_rej ); + + return __process_rej( p_cep, p_mad ); +} + + static void -__process_req( +__req_handler( IN cep_agent_t* const p_port_cep, IN ib_mad_element_t* const p_mad ) { - ib_api_status_t status; + ib_api_status_t status = IB_SUCCESS; mad_cm_req_t *p_req; - kcep_t *p_cep, *p_new_cep, *p_stale_cep; + kcep_t *p_cep, *p_new_cep, *p_stale_cep = NULL; KLOCK_QUEUE_HANDLE hdl; ib_rej_status_t reason; @@ -958,8 +1106,9 @@ __process_req( if( p_stale_cep != p_new_cep ) { /* Duplicate - must be a stale connection. */ - /* TODO: Fail the CEP in p_stale_cep */ reason = IB_REJ_STALE_CONN; + /* Fail the local stale CEP. */ + status = __process_stale( p_stale_cep ); goto unbind; } @@ -1040,6 +1189,10 @@ reject: __reject_req( p_port_cep, p_mad, reason ); KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl ); + + if( reason == IB_REJ_STALE_CONN && status == IB_SUCCESS ) + __process_cep( p_stale_cep ); + AL_EXIT( AL_DBG_CM ); } @@ -1087,7 +1240,7 @@ __save_wire_rep( static void -__process_mra( +__mra_handler( IN ib_mad_element_t* const p_mad ) { ib_api_status_t status; @@ -1119,13 +1272,13 @@ __process_mra( goto err; } } + /* * Note that we don't update the CEP's remote comm ID - it messes up REP * processing since a non-zero RCID implies the connection is in the RCID * map. Adding it here requires checking there and conditionally adding * it. Ignoring it is a valid thing to do. */ - if( !(p_cep->state & CEP_STATE_SENT) || (1 << conn_mra_get_msg_mraed( p_mra ) != (p_cep->state & CEP_MSG_MASK)) ) @@ -1152,7 +1305,6 @@ __process_mra( p_cep->state |= CEP_STATE_MRA; status = __cep_queue_mad( p_cep, p_mad ); - CL_ASSERT( status != IB_INVALID_STATE ); KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl ); @@ -1170,7 +1322,7 @@ err: static void -__process_rej( +__rej_handler( IN ib_mad_element_t* const p_mad ) { ib_api_status_t status; @@ -1210,79 +1362,14 @@ __process_rej( if( p_cep->remote_comm_id && p_cep->remote_comm_id != p_rej->local_comm_id ) { - goto err2; - } - - switch( p_cep->state ) - { - case CEP_STATE_REQ_SENT: - /* - * Ignore rejects with the status set to IB_REJ_INVALID_SID. We will - * continue to retry (up to max_cm_retries) to connect to the remote - * side. This is required to support peer-to-peer connections and - * clients that try to connect before the server comes up. - */ - if( p_rej->reason == IB_REJ_INVALID_SID ) - { - AL_TRACE( AL_DBG_CM, - ("Request rejected (invalid SID) - retrying.\n") ); - goto err2; - } - - /* Fall through */ - case CEP_STATE_REP_SENT: - case CEP_STATE_REQ_MRA_RCVD: - case CEP_STATE_REP_MRA_RCVD: - /* Cancel any outstanding MAD. */ - if( p_cep->p_send_mad ) - { - ib_cancel_mad( p_cep->h_mad_svc, p_cep->p_send_mad ); - p_cep->p_send_mad = NULL; - } - - /* Fall through */ - case CEP_STATE_REQ_RCVD: - case CEP_STATE_REP_RCVD: - case CEP_STATE_REQ_MRA_SENT: - case CEP_STATE_REP_MRA_SENT: - case CEP_STATE_PRE_REP: - case CEP_STATE_PRE_REP_MRA_SENT: - if( p_cep->state & CEP_STATE_PREP ) - { - CL_ASSERT( p_cep->p_mad ); - ib_put_mad( p_cep->p_mad ); - p_cep->p_mad = NULL; - } - /* Abort connection establishment. No transition to timewait. */ - __remove_cep( p_cep ); - p_cep->state = CEP_STATE_IDLE; - break; - - case CEP_STATE_ESTABLISHED: - case CEP_STATE_LAP_RCVD: - case CEP_STATE_LAP_SENT: - case CEP_STATE_LAP_MRA_RCVD: - case CEP_STATE_LAP_MRA_SENT: - case CEP_STATE_PRE_APR: - case CEP_STATE_PRE_APR_MRA_SENT: - if( p_cep->state & CEP_STATE_PREP ) - { - CL_ASSERT( p_cep->p_mad ); - ib_put_mad( p_cep->p_mad ); - p_cep->p_mad = NULL; - } - p_cep->state = CEP_STATE_TIMEWAIT; - __insert_timewait( p_cep ); - break; - - default: - /* Ignore the REJ. */ - AL_TRACE( AL_DBG_CM, ("REJ received in invalid state.\n") ); - goto err2; + err2: + KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl ); + err1: + ib_put_mad( p_mad ); + AL_EXIT( AL_DBG_CM ); } - status = __cep_queue_mad( p_cep, p_mad ); - CL_ASSERT( status != IB_INVALID_STATE ); + status = __process_rej( p_cep, p_mad ); KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl ); @@ -1290,18 +1377,11 @@ __process_rej( __process_cep( p_cep ); AL_EXIT( AL_DBG_CM ); - return; - -err2: - KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl ); -err1: - ib_put_mad( p_mad ); - AL_EXIT( AL_DBG_CM ); } static void -__process_rep( +__rep_handler( IN cep_agent_t* const p_port_cep, IN ib_mad_element_t* const p_mad ) { @@ -1342,25 +1422,25 @@ __process_rep( if( __insert_cep( p_cep ) != p_cep ) { /* Roll back the state change. */ - p_cep->state = old_state; __reject_mad( p_port_cep, p_cep, p_mad, IB_REJ_STALE_CONN ); - /* TODO: Handle stale connection. */ - break; + p_cep->state = old_state; + status = __process_stale( p_cep ); } - - /* - * Cancel any outstanding send. Note that we do this only after - * inserting the CEP - if we failed, then we the send will timeout - * and we'll finish our way through the state machine. - */ - if( p_cep->p_send_mad ) + else { - ib_cancel_mad( p_cep->h_mad_svc, p_cep->p_send_mad ); - p_cep->p_send_mad = NULL; - } + /* + * Cancel any outstanding send. Note that we do this only after + * inserting the CEP - if we failed, then the send will timeout + * and we'll finish our way through the state machine. + */ + if( p_cep->p_send_mad ) + { + ib_cancel_mad( p_cep->h_mad_svc, p_cep->p_send_mad ); + p_cep->p_send_mad = NULL; + } - status = __cep_queue_mad( p_cep, p_mad ); - CL_ASSERT( status != IB_INVALID_STATE ); + status = __cep_queue_mad( p_cep, p_mad ); + } KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl ); @@ -1393,7 +1473,7 @@ __process_rep( static void -__process_rtu( +__rtu_handler( IN ib_mad_element_t* const p_mad ) { ib_api_status_t status; @@ -1433,7 +1513,6 @@ __process_rtu( p_cep->state = CEP_STATE_ESTABLISHED; status = __cep_queue_mad( p_cep, p_mad ); - CL_ASSERT( status != IB_INVALID_STATE ); /* Update timewait time. */ __calc_timewait( p_cep ); @@ -1459,7 +1538,7 @@ done: static void -__process_dreq( +__dreq_handler( IN cep_agent_t* const p_port_cep, IN ib_mad_element_t* const p_mad ) { @@ -1513,7 +1592,6 @@ __process_dreq( p_cep->state = CEP_STATE_DREQ_RCVD; status = __cep_queue_mad( p_cep, p_mad ); - CL_ASSERT( status != IB_INVALID_STATE ); /* Store the TID for use in the reply DREP. */ p_cep->tid = p_dreq->hdr.trans_id; @@ -1544,7 +1622,7 @@ __process_dreq( static void -__process_drep( +__drep_handler( IN ib_mad_element_t* const p_mad ) { ib_api_status_t status; @@ -1593,7 +1671,6 @@ __process_drep( p_cep->state = CEP_STATE_TIMEWAIT; status = __cep_queue_mad( p_cep, p_mad ); - CL_ASSERT( status != IB_INVALID_STATE ); } else { @@ -1684,7 +1761,7 @@ __format_lap_av( static void -__process_lap( +__lap_handler( IN cep_agent_t* const p_port_cep, IN ib_mad_element_t* const p_mad ) { @@ -1748,7 +1825,6 @@ __process_lap( p_cep->state = CEP_STATE_LAP_RCVD; status = __cep_queue_mad( p_cep, p_mad ); - CL_ASSERT( status != IB_INVALID_STATE ); KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl ); @@ -1774,7 +1850,7 @@ __process_lap( static void -__process_apr( +__apr_handler( IN ib_mad_element_t* const p_mad ) { ib_api_status_t status; @@ -1819,7 +1895,6 @@ __process_apr( p_cep->state = CEP_STATE_ESTABLISHED; status = __cep_queue_mad( p_cep, p_mad ); - CL_ASSERT( status != IB_INVALID_STATE ); KeReleaseInStackQueuedSpinLockFromDpcLevel( &hdl ); @@ -1869,39 +1944,39 @@ __cep_mad_recv_cb( switch( p_hdr->attr_id ) { case CM_REQ_ATTR_ID: - __process_req( p_port_cep, p_mad ); + __req_handler( p_port_cep, p_mad ); break; case CM_MRA_ATTR_ID: - __process_mra( p_mad ); + __mra_handler( p_mad ); break; case CM_REJ_ATTR_ID: - __process_rej( p_mad ); + __rej_handler( p_mad ); break; case CM_REP_ATTR_ID: - __process_rep( p_port_cep, p_mad ); + __rep_handler( p_port_cep, p_mad ); break; case CM_RTU_ATTR_ID: - __process_rtu( p_mad ); + __rtu_handler( p_mad ); break; case CM_DREQ_ATTR_ID: - __process_dreq( p_port_cep, p_mad ); + __dreq_handler( p_port_cep, p_mad ); break; case CM_DREP_ATTR_ID: - __process_drep( p_mad ); + __drep_handler( p_mad ); break; case CM_LAP_ATTR_ID: - __process_lap( p_port_cep, p_mad ); + __lap_handler( p_port_cep, p_mad ); break; case CM_APR_ATTR_ID: - __process_apr( p_mad ); + __apr_handler( p_mad ); break; case CM_SIDR_REQ_ATTR_ID: @@ -2642,7 +2717,11 @@ __insert_by_id( else if( p_new_cep->remote_ca_guid > p_cep->remote_ca_guid ) p_item = cl_rbmap_right( p_item ), left = FALSE; else + { + AL_TRACE( AL_DBG_CM | AL_DBG_WARN, + ("WARNING: Duplicate remote CID and CA GUID.\n") ); goto done; + } } cl_rbmap_insert( @@ -2681,7 +2760,11 @@ __insert_by_qpn( else if( p_new_cep->remote_ca_guid > p_cep->remote_ca_guid ) p_item = cl_rbmap_right( p_item ), left = FALSE; else + { + AL_TRACE( AL_DBG_CM | AL_DBG_WARN, + ("WARNING: Duplicate remote QPN and CA GUID.\n") ); goto done; + } } cl_rbmap_insert( -- 2.41.0