From: eitan Date: Wed, 12 Oct 2005 08:14:52 +0000 (+0000) Subject: Fix bug: When there is an error in the osm_vendor_send call, X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=ed955f5beeb67473ba536ebad14c42fb4516209e;p=~shefty%2Frdma-win.git Fix bug: When there is an error in the osm_vendor_send call, need to decrement several mad counters and call the dispatcher, if we reached '0' on the qp0_mads_outstanding. git-svn-id: svn://openib.tc.cornell.edu/gen1@115 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86 --- diff --git a/trunk/ulp/opensm/user/include/opensm/osm_log.h b/trunk/ulp/opensm/user/include/opensm/osm_log.h index 3337794a..55e1695c 100644 --- a/trunk/ulp/opensm/user/include/opensm/osm_log.h +++ b/trunk/ulp/opensm/user/include/opensm/osm_log.h @@ -243,14 +243,14 @@ osm_log_init( else { if (accum_log_file) - p_log->out_port = fopen(log_file,"a+"); + p_log->out_port = fopen(log_file,"a+"); else p_log->out_port = fopen(log_file,"w+"); if (!p_log->out_port) { if (accum_log_file) - printf("Cannot open %s for appending. Permission denied\n", log_file); + printf("Cannot open %s for appending. Permission denied\n", log_file); else printf("Cannot open %s for writing. Permission denied\n", log_file); diff --git a/trunk/ulp/opensm/user/include/opensm/osm_vl15intf.h b/trunk/ulp/opensm/user/include/opensm/osm_vl15intf.h index 2c07ed3a..4257f49c 100644 --- a/trunk/ulp/opensm/user/include/opensm/osm_vl15intf.h +++ b/trunk/ulp/opensm/user/include/opensm/osm_vl15intf.h @@ -52,11 +52,13 @@ #include #include #include +#include #include #include #include #include #include +#include #ifdef __cplusplus # define BEGIN_C_DECLS extern "C" { @@ -134,6 +136,9 @@ typedef struct _osm_vl15 osm_vendor_t *p_vend; osm_log_t *p_log; osm_stats_t *p_stats; + osm_subn_t *p_subn; + cl_disp_reg_handle_t h_disp; + cl_plock_t *p_lock; } osm_vl15_t; /* @@ -173,6 +178,15 @@ typedef struct _osm_vl15 * p_stats * Pointer to the OpenSM statistics block. * +* p_subn +* Pointer to the Subnet object for this subnet. +* +* h_disp +* Handle returned from dispatcher registration. +* +* p_lock +* Pointer to the serializing lock. +* * SEE ALSO * VL15 object *********/ @@ -262,7 +276,10 @@ osm_vl15_init( IN osm_vendor_t* const p_vend, IN osm_log_t* const p_log, IN osm_stats_t* const p_stats, - IN const int32_t max_wire_smps ); + IN const int32_t max_wire_smps, + IN osm_subn_t* const p_subn, + IN cl_dispatcher_t* const p_disp, + IN cl_plock_t* const p_lock ); /* * PARAMETERS * p_vl15 @@ -280,6 +297,15 @@ osm_vl15_init( * max_wire_smps * [in] Maximum number of MADs allowed on the wire at one time. * +* p_subn +* [in] Pointer to the subnet object. +* +* p_disp +* [in] Pointer to the dispatcher object. +* +* p_lock +* [in] Pointer to the OpenSM serializing lock. +* * RETURN VALUES * IB_SUCCESS if the VL15 object was initialized successfully. * diff --git a/trunk/ulp/opensm/user/opensm/osm_opensm.c b/trunk/ulp/opensm/user/opensm/osm_opensm.c index 925d227e..7ccfc813 100644 --- a/trunk/ulp/opensm/user/opensm/osm_opensm.c +++ b/trunk/ulp/opensm/user/opensm/osm_opensm.c @@ -254,7 +254,8 @@ osm_opensm_init( status = osm_vl15_init( &p_osm->vl15, p_osm->p_vendor, - &p_osm->log, &p_osm->stats, p_opt->max_wire_smps ); + &p_osm->log, &p_osm->stats, p_opt->max_wire_smps, + &p_osm->subn, &p_osm->disp, &p_osm->lock ); if( status != IB_SUCCESS ) goto Exit; diff --git a/trunk/ulp/opensm/user/opensm/osm_sm_mad_ctrl.c b/trunk/ulp/opensm/user/opensm/osm_sm_mad_ctrl.c index f15836b1..c01b808b 100644 --- a/trunk/ulp/opensm/user/opensm/osm_sm_mad_ctrl.c +++ b/trunk/ulp/opensm/user/opensm/osm_sm_mad_ctrl.c @@ -96,7 +96,17 @@ __osm_sm_mad_ctrl_retire_trans_mad( osm_mad_pool_put( p_ctrl->p_mad_pool, p_madw ); - cl_atomic_dec( &p_ctrl->p_stats->qp0_mads_outstanding ); + if ( !&p_ctrl->p_stats->qp0_mads_outstanding ) + { + osm_log( p_ctrl->p_log, OSM_LOG_ERROR, + "__osm_sm_mad_ctrl_retire_trans_mad: ERR 3120: " + "Trying to dec qp0_mads_outstanding=0. " + "Problem with transaction mgr!\n"); + } + else + { + cl_atomic_dec( &p_ctrl->p_stats->qp0_mads_outstanding ); + } if( osm_log_is_active( p_ctrl->p_log, OSM_LOG_DEBUG ) ) { @@ -222,8 +232,19 @@ __osm_sm_mad_ctrl_update_wire_stats( OSM_LOG_ENTER( p_ctrl->p_log, __osm_sm_mad_ctrl_update_wire_stats ); - mads_on_wire = cl_atomic_dec( - &p_ctrl->p_stats->qp0_mads_outstanding_on_wire ); + /* Make sure we are not decrementing below zero */ + if ( !&p_ctrl->p_stats->qp0_mads_outstanding_on_wire ) + { + osm_log( p_ctrl->p_log, OSM_LOG_ERROR, + "__osm_sm_mad_ctrl_update_wire_stats: ERR 3105: " + "Trying to dec qp0_mads_outstanding_on_wire=0. " + "Problem with transaction mgr!\n"); + } + else + { + mads_on_wire = cl_atomic_dec( + &p_ctrl->p_stats->qp0_mads_outstanding_on_wire ); + } if( osm_log_is_active( p_ctrl->p_log, OSM_LOG_DEBUG ) ) { diff --git a/trunk/ulp/opensm/user/opensm/osm_vl15intf.c b/trunk/ulp/opensm/user/opensm/osm_vl15intf.c index 1b9aa205..ae382851 100644 --- a/trunk/ulp/opensm/user/opensm/osm_vl15intf.c +++ b/trunk/ulp/opensm/user/opensm/osm_vl15intf.c @@ -154,6 +154,8 @@ __osm_vl15_poller( if( status != IB_SUCCESS ) { + uint32_t outstanding; + cl_status_t cl_status; osm_log( p_vl->p_log, OSM_LOG_ERROR, "__osm_vl15_poller: ERR 3E03: " "MAD send failed (%s).\n", @@ -161,9 +163,71 @@ __osm_vl15_poller( /* The MAD was never successfully sent, so - Fix-up the pre-incremented count values. + fix up the pre-incremented count values. */ + /* Decrement qp0_mads_sent and qp0_mads_outstanding_on_wire + that was incremented in the code above. */ mads_sent = cl_atomic_dec( &p_vl->p_stats->qp0_mads_sent ); + if( p_madw->resp_expected == TRUE ) + cl_atomic_dec( &p_vl->p_stats->qp0_mads_outstanding_on_wire ); + + /* + The following code is similar to the one in + __osm_sm_mad_ctrl_retire_trans_mad. We need to decrement the + qp0_mads_outstanding counter, and if we reached 0 - need to call + the cl_disp_post with OSM_SIGNAL_NO_PENDING_TRANSACTION (in order + to wake up the state mgr). + */ + cl_atomic_dec( &p_vl->p_stats->qp0_mads_outstanding ); + + osm_log( p_vl->p_log, OSM_LOG_DEBUG, + "__osm_vl15_poller: " + "%u QP0 MADs outstanding.\n", + p_vl->p_stats->qp0_mads_outstanding ); + + /* + Acquire the lock non-exclusively. + Other modules that send MADs grab this lock exclusively. + These modules that are in the process of sending MADs + will hold the lock until they finish posting all the MADs + they plan to send. While the other module is sending MADs + the outstanding count may temporarily go to zero. + Thus, by grabbing the lock ourselves, we get an accurate + view of whether or not the number of outstanding MADs is + really zero. + */ + CL_PLOCK_ACQUIRE( p_vl->p_lock ); + outstanding = p_vl->p_stats->qp0_mads_outstanding; + CL_PLOCK_RELEASE( p_vl->p_lock ); + + if( outstanding == 0 ) + { + /* + The wire is clean. + Signal the state manager. + */ + if( osm_log_is_active( p_vl->p_log, OSM_LOG_DEBUG ) ) + { + osm_log( p_vl->p_log, OSM_LOG_DEBUG, + "__osm_vl15_poller: " + "Posting Dispatcher message %s.\n", + osm_get_disp_msg_str( OSM_MSG_NO_SMPS_OUTSTANDING ) ); + } + + cl_status = cl_disp_post( p_vl->h_disp, + OSM_MSG_NO_SMPS_OUTSTANDING, + (void *)OSM_SIGNAL_NO_PENDING_TRANSACTIONS, + NULL, + NULL ); + + if( cl_status != CL_SUCCESS ) + { + osm_log( p_vl->p_log, OSM_LOG_ERROR, + "__osm_vl15_poller: ERR 3E06: " + "Dispatcher post message failed (%s).\n", + CL_STATUS_MSG( cl_status ) ); + } + } } else { @@ -171,8 +235,8 @@ __osm_vl15_poller( { osm_log( p_vl->p_log, OSM_LOG_DEBUG, "__osm_vl15_poller: " - "%u on wire, %u outstanding, %u unicasts sent, " - "%u sent total.\n", + "%u QP0 MADs on wire, %u outstanding, %u unicasts sent, " + "%u total sent.\n", p_vl->p_stats->qp0_mads_outstanding_on_wire, p_vl->p_stats->qp0_mads_outstanding, p_vl->p_stats->qp0_unicasts_sent, @@ -229,6 +293,7 @@ osm_vl15_construct( cl_qlist_init( &p_vl->rfifo ); cl_qlist_init( &p_vl->ufifo ); cl_thread_construct( &p_vl->poller ); + p_vl->h_disp = CL_DISP_INVALID_HANDLE; } /********************************************************************** @@ -278,6 +343,8 @@ osm_vl15_destroy( p_vl->state = OSM_VL15_STATE_INIT; cl_spinlock_destroy( &p_vl->lock ); + cl_disp_unregister( p_vl->h_disp ); + OSM_LOG_EXIT( p_vl->p_log ); } @@ -289,7 +356,11 @@ osm_vl15_init( IN osm_vendor_t* const p_vend, IN osm_log_t* const p_log, IN osm_stats_t* const p_stats, - IN const int32_t max_wire_smps ) + IN const int32_t max_wire_smps, + IN osm_subn_t* const p_subn, + IN cl_dispatcher_t* const p_disp, + IN cl_plock_t* const p_lock + ) { ib_api_status_t status = IB_SUCCESS; OSM_LOG_ENTER( p_log, osm_vl15_init ); @@ -298,6 +369,8 @@ osm_vl15_init( p_vl->p_log = p_log; p_vl->p_stats = p_stats; p_vl->max_wire_smps = max_wire_smps; + p_vl->p_subn = p_subn; + p_vl->p_lock = p_lock; status = cl_event_init( &p_vl->signal, FALSE ); if( status != IB_SUCCESS ) @@ -318,6 +391,21 @@ osm_vl15_init( if( status != IB_SUCCESS ) goto Exit; + p_vl->h_disp = cl_disp_register( + p_disp, + CL_DISP_MSGID_NONE, + NULL, + NULL ); + + if( p_vl->h_disp == CL_DISP_INVALID_HANDLE ) + { + osm_log( p_log, OSM_LOG_ERROR, + "osm_vl15_init: ERR 3E01: " + "Dispatcher registration failed.\n" ); + status = IB_INSUFFICIENT_RESOURCES; + goto Exit; + } + Exit: OSM_LOG_EXIT( p_log ); return( status ); @@ -396,7 +484,7 @@ osm_vl15_post( { osm_log( p_vl->p_log, OSM_LOG_DEBUG, "osm_vl15_post: " - "%u MADs on wire, %u MADs outstanding.\n", + "%u QP0 MADs on wire, %u QP0 MADs outstanding.\n", p_vl->p_stats->qp0_mads_outstanding_on_wire, p_vl->p_stats->qp0_mads_outstanding ); }