From 83a21e7b9403f3d295eb26308547b3060f08a7b1 Mon Sep 17 00:00:00 2001 From: leonidk Date: Sun, 13 Jul 2008 11:35:53 +0000 Subject: [PATCH] [IBAL] Limit time spent at DISPATCH_LEVEL when processing MADs. In working with some switch SMs, I found that under stress the QP0 MAD processing could get delayed by eternal QP1 MAD processing (from hitting the SA for path queries). This caused the SMs to remove the node from the fabric (multicast group membership, etc) because it appeared unreachable. This patch adds a DPC for QP0 and QP1 to the SMI, and limits the SMI to processing 16 MADs before re-queueing the DPC to the DPC queue. Because QP0 processing is crucial to the fabric operating properly, the QP0 DPC is set to high importance so that it always gets queued to the front of the DPC queue, pre-empting any QP1 processing (among other things). In any case, with this patch all my changes have now been mailed to the list. Signed-off-by: Fab Tillier git-svn-id: svn://openib.tc.cornell.edu/gen1@1393 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86 --- trunk/core/al/kernel/al_smi.c | 146 ++++++++++++++++++++++++++++++---- trunk/core/al/kernel/al_smi.h | 3 + 2 files changed, 132 insertions(+), 17 deletions(-) diff --git a/trunk/core/al/kernel/al_smi.c b/trunk/core/al/kernel/al_smi.c index b3cc3981..255c0c24 100644 --- a/trunk/core/al/kernel/al_smi.c +++ b/trunk/core/al/kernel/al_smi.c @@ -140,6 +140,22 @@ spl_qp_send_comp_cb( IN const ib_cq_handle_t h_cq, IN void *cq_context ); +void +spl_qp_send_dpc_cb( + IN KDPC *p_dpc, + IN void *context, + IN void *arg1, + IN void *arg2 + ); + +void +spl_qp_recv_dpc_cb( + IN KDPC *p_dpc, + IN void *context, + IN void *arg1, + IN void *arg2 + ); + void spl_qp_recv_comp_cb( IN const ib_cq_handle_t h_cq, @@ -554,7 +570,17 @@ create_spl_qp_svc( cl_qlist_init( &p_spl_qp_svc->send_queue ); cl_qlist_init( &p_spl_qp_svc->recv_queue ); cl_spinlock_init(&p_spl_qp_svc->cache_lock); - + + /* Initialize the DPCs. */ + KeInitializeDpc( &p_spl_qp_svc->send_dpc, spl_qp_send_dpc_cb, p_spl_qp_svc ); + KeInitializeDpc( &p_spl_qp_svc->recv_dpc, spl_qp_recv_dpc_cb, p_spl_qp_svc ); + + if( qp_type == IB_QPT_QP0 ) + { + KeSetImportanceDpc( &p_spl_qp_svc->send_dpc, HighImportance ); + KeSetImportanceDpc( &p_spl_qp_svc->recv_dpc, HighImportance ); + } + #if defined( CL_USE_MUTEX ) /* Initialize async callbacks and flags for send/receive processing. */ p_spl_qp_svc->send_async_queued = FALSE; @@ -2461,6 +2487,8 @@ spl_qp_send_comp_cb( AL_ENTER( AL_DBG_SMI ); + UNREFERENCED_PARAMETER( h_cq ); + CL_ASSERT( cq_context ); p_spl_qp_svc = cq_context; @@ -2477,21 +2505,55 @@ spl_qp_send_comp_cb( cl_spinlock_release( &p_spl_qp_svc->obj.lock ); #else + cl_spinlock_acquire( &p_spl_qp_svc->obj.lock ); + if( p_spl_qp_svc->state != SPL_QP_ACTIVE ) + { + cl_spinlock_release( &p_spl_qp_svc->obj.lock ); + AL_EXIT( AL_DBG_SMI ); + return; + } + cl_atomic_inc( &p_spl_qp_svc->in_use_cnt ); + cl_spinlock_release( &p_spl_qp_svc->obj.lock ); - /* Invoke the callback directly. */ + /* Queue the DPC. */ CL_ASSERT( h_cq == p_spl_qp_svc->h_send_cq ); - spl_qp_comp( p_spl_qp_svc, h_cq, IB_WC_SEND ); + KeInsertQueueDpc( &p_spl_qp_svc->send_dpc, NULL, NULL ); +#endif + + AL_EXIT( AL_DBG_SMI ); +} + + +void +spl_qp_send_dpc_cb( + IN KDPC *p_dpc, + IN void *context, + IN void *arg1, + IN void *arg2 + ) +{ + spl_qp_svc_t* p_spl_qp_svc; + + AL_ENTER( AL_DBG_SMI ); + + CL_ASSERT( context ); + p_spl_qp_svc = context; + + UNREFERENCED_PARAMETER( p_dpc ); + UNREFERENCED_PARAMETER( arg1 ); + UNREFERENCED_PARAMETER( arg2 ); + + spl_qp_comp( p_spl_qp_svc, p_spl_qp_svc->h_send_cq, IB_WC_SEND ); /* Continue processing any queued MADs on the QP. */ special_qp_resume_sends( p_spl_qp_svc->h_qp ); -#endif + cl_atomic_dec( &p_spl_qp_svc->in_use_cnt ); - AL_EXIT( AL_DBG_SMI ); + AL_EXIT( AL_DBG_SMI ); } - #if defined( CL_USE_MUTEX ) void spl_qp_send_async_cb( @@ -2536,6 +2598,8 @@ spl_qp_recv_comp_cb( AL_ENTER( AL_DBG_SMI ); + UNREFERENCED_PARAMETER( h_cq ); + CL_ASSERT( cq_context ); p_spl_qp_svc = cq_context; @@ -2552,16 +2616,51 @@ spl_qp_recv_comp_cb( cl_spinlock_release( &p_spl_qp_svc->obj.lock ); #else + cl_spinlock_acquire( &p_spl_qp_svc->obj.lock ); + if( p_spl_qp_svc->state != SPL_QP_ACTIVE ) + { + cl_spinlock_release( &p_spl_qp_svc->obj.lock ); + AL_EXIT( AL_DBG_SMI ); + return; + } + cl_atomic_inc( &p_spl_qp_svc->in_use_cnt ); + cl_spinlock_release( &p_spl_qp_svc->obj.lock ); + /* Queue the DPC. */ CL_ASSERT( h_cq == p_spl_qp_svc->h_recv_cq ); - spl_qp_comp( p_spl_qp_svc, h_cq, IB_WC_RECV ); - + KeInsertQueueDpc( &p_spl_qp_svc->recv_dpc, NULL, NULL ); #endif AL_EXIT( AL_DBG_SMI ); } +void +spl_qp_recv_dpc_cb( + IN KDPC *p_dpc, + IN void *context, + IN void *arg1, + IN void *arg2 + ) +{ + spl_qp_svc_t* p_spl_qp_svc; + + AL_ENTER( AL_DBG_SMI ); + + CL_ASSERT( context ); + p_spl_qp_svc = context; + + UNREFERENCED_PARAMETER( p_dpc ); + UNREFERENCED_PARAMETER( arg1 ); + UNREFERENCED_PARAMETER( arg2 ); + + spl_qp_comp( p_spl_qp_svc, p_spl_qp_svc->h_recv_cq, IB_WC_RECV ); + + cl_atomic_dec( &p_spl_qp_svc->in_use_cnt ); + + AL_EXIT( AL_DBG_SMI ); +} + #if defined( CL_USE_MUTEX ) void @@ -2589,7 +2688,7 @@ spl_qp_recv_async_cb( #endif - +#define SPL_QP_MAX_POLL 16 /* * Special QP completion handler. */ @@ -2607,6 +2706,7 @@ spl_qp_comp( ib_mad_element_t* p_mad_element; ib_smp_t* p_smp; ib_api_status_t status; + int max_poll = SPL_QP_MAX_POLL; AL_ENTER( AL_DBG_SMI_CB ); @@ -2625,7 +2725,7 @@ spl_qp_comp( wc.p_next = NULL; /* Process work completions. */ - while( ib_poll_cq( h_cq, &p_free_wc, &p_done_wc ) == IB_SUCCESS ) + while( max_poll && ib_poll_cq( h_cq, &p_free_wc, &p_done_wc ) == IB_SUCCESS ) { /* Process completions one at a time. */ CL_ASSERT( p_done_wc ); @@ -2713,13 +2813,25 @@ spl_qp_comp( spl_qp_svc_reset( p_spl_qp_svc ); } p_free_wc = &wc; - } - - /* Rearm the CQ. */ - status = ib_rearm_cq( h_cq, FALSE ); - CL_ASSERT( status == IB_SUCCESS ); - - cl_atomic_dec( &p_spl_qp_svc->in_use_cnt ); + --max_poll; + } + + if( max_poll == 0 ) + { + /* We already have an in_use_cnt reference - use it to queue the DPC. */ + if( wc_type == IB_WC_SEND ) + KeInsertQueueDpc( &p_spl_qp_svc->send_dpc, NULL, NULL ); + else + KeInsertQueueDpc( &p_spl_qp_svc->recv_dpc, NULL, NULL ); + } + else + { + /* Rearm the CQ. */ + status = ib_rearm_cq( h_cq, FALSE ); + CL_ASSERT( status == IB_SUCCESS ); + + cl_atomic_dec( &p_spl_qp_svc->in_use_cnt ); + } AL_EXIT( AL_DBG_SMI_CB ); } diff --git a/trunk/core/al/kernel/al_smi.h b/trunk/core/al/kernel/al_smi.h index 95b2718f..62cb0fe2 100644 --- a/trunk/core/al/kernel/al_smi.h +++ b/trunk/core/al/kernel/al_smi.h @@ -157,6 +157,9 @@ typedef struct _spl_qp_svc ib_pool_key_t pool_key; ib_mad_svc_handle_t h_mad_svc; + KDPC send_dpc; + KDPC recv_dpc; + } spl_qp_svc_t; -- 2.46.0