From: ftillier Date: Tue, 27 Sep 2005 00:34:40 +0000 (+0000) Subject: [WSD] Add support for multiple CQs to allow more than 3000 sockets per process. X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=f2b3561ef420b4c6fc998d2050b7fcc157bd2eac;p=~shefty%2Frdma-win.git [WSD] Add support for multiple CQs to allow more than 3000 sockets per process. Signed-off-by: Fab Tillier (ftillier@silverstorm.com) git-svn-id: svn://openib.tc.cornell.edu/gen1@91 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86 --- diff --git a/trunk/ulp/wsd/user/ibsp_iblow.c b/trunk/ulp/wsd/user/ibsp_iblow.c index 130d45d3..a01c3af2 100644 --- a/trunk/ulp/wsd/user/ibsp_iblow.c +++ b/trunk/ulp/wsd/user/ibsp_iblow.c @@ -32,9 +32,6 @@ #include "ibspdll.h" -static void ib_destroy_cq_tinfo( struct cq_thread_info *cq_tinfo ); - - typedef struct _io_comp_info { struct ibsp_socket_info *p_socket; @@ -455,13 +452,13 @@ ib_cq_thread( __LINE__, GetCurrentProcessId(), GetCurrentThreadId())); } - } while( (cq_tinfo->ib_cq_thread_exit_wanted != TRUE) || - cl_qlist_count( &cq_tinfo->done_wr_list ) ); + } while( !cq_tinfo->ib_cq_thread_exit_wanted ); cl_status = cl_waitobj_destroy( cq_tinfo->cq_waitobj ); if( cl_status != CL_SUCCESS ) { - IBSP_ERROR( ("cl_waitobj_destroy() (%d)\n", cl_status) ); + IBSP_ERROR( + ("cl_waitobj_destroy() returned %s\n", CL_STATUS_MSG(cl_status)) ); } HeapFree( g_ibsp.heap, 0, cq_tinfo ); @@ -471,6 +468,7 @@ ib_cq_thread( } +/* Called with the HCA's CQ lock held. */ static struct cq_thread_info * ib_alloc_cq_tinfo( struct ibsp_hca *hca ) @@ -479,26 +477,26 @@ ib_alloc_cq_tinfo( ib_cq_create_t cq_create; ib_api_status_t status; cl_status_t cl_status; - int error; IBSP_ENTER( IBSP_DBG_HW ); - cq_tinfo = HeapAlloc( g_ibsp.heap, HEAP_ZERO_MEMORY, sizeof(struct cq_thread_info) ); + cq_tinfo = HeapAlloc( + g_ibsp.heap, HEAP_ZERO_MEMORY, sizeof(struct cq_thread_info) ); - if( cq_tinfo == NULL ) + if( !cq_tinfo ) { - IBSP_ERROR( ("HeapAlloc() Failed.\n") ); - error = TRUE; - goto done; + IBSP_ERROR_EXIT( ("HeapAlloc() Failed.\n") ); + return NULL; } cl_status = cl_waitobj_create( FALSE, &cq_tinfo->cq_waitobj ); if( cl_status != CL_SUCCESS ) { cq_tinfo->cq_waitobj = NULL; - IBSP_ERROR( ("cl_waitobj_create() (%d)\n", cl_status) ); - error = TRUE; - goto done; + ib_destroy_cq_tinfo( cq_tinfo ); + IBSP_ERROR_EXIT( + ("cl_waitobj_create() returned %s\n", CL_STATUS_MSG(cl_status)) ); + return NULL; } cq_tinfo->hca = hca; @@ -509,9 +507,9 @@ ib_alloc_cq_tinfo( if( cq_tinfo->ib_cq_thread == NULL ) { - IBSP_ERROR( ("CreateThread failed.") ); - error = TRUE; - goto done; + ib_destroy_cq_tinfo( cq_tinfo ); + IBSP_ERROR_EXIT( ("CreateThread failed (%d)", GetLastError()) ); + return NULL; } STAT_INC( thread_num ); @@ -527,9 +525,10 @@ ib_alloc_cq_tinfo( &cq_tinfo->cq ); if( status ) { - IBSP_ERROR( ("ib_create_cq failed (%d)\n", status) ); - error = TRUE; - goto done; + ib_destroy_cq_tinfo( cq_tinfo ); + IBSP_ERROR_EXIT( + ("ib_create_cq returned %s\n", ib_get_err_str( status )) ); + return NULL; } STAT_INC( cq_num ); @@ -537,34 +536,35 @@ ib_alloc_cq_tinfo( status = ib_rearm_cq( cq_tinfo->cq, FALSE ); if( status ) { - IBSP_ERROR( ("ib_rearm_cq failed (%d)\n", status) ); - error = TRUE; - goto done; + ib_destroy_cq_tinfo( cq_tinfo ); + IBSP_ERROR_EXIT( + ("ib_rearm_cq returned %s\n", ib_get_err_str( status )) ); + return NULL; } - cl_spinlock_init( &cq_tinfo->wr_mutex ); - cl_qlist_init( &cq_tinfo->done_wr_list ); cq_tinfo->cqe_size = IB_CQ_SIZE; - /* Only one CQ per HCA now */ - hca->cq_tinfo = cq_tinfo; - - error = FALSE; - -done: - if( error == TRUE ) + if( hca->cq_tinfo ) { - ib_destroy_cq_tinfo( cq_tinfo ); - cq_tinfo = NULL; + __cl_primitive_insert( + &hca->cq_tinfo->list_item, &cq_tinfo->list_item ); + } + else + { + /* Setup the list entry to point to itself. */ + cq_tinfo->list_item.p_next = &cq_tinfo->list_item; + cq_tinfo->list_item.p_prev = &cq_tinfo->list_item; } - IBSP_EXIT( IBSP_DBG_HW ); + /* Upon allocation, the new CQ becomes the primary. */ + hca->cq_tinfo = cq_tinfo; + IBSP_EXIT( IBSP_DBG_HW ); return (cq_tinfo); } -static void +void ib_destroy_cq_tinfo( struct cq_thread_info *cq_tinfo ) { @@ -576,29 +576,22 @@ ib_destroy_cq_tinfo( IBSP_ENTER( IBSP_DBG_HW ); - if( cq_tinfo == NULL ) - { - return; - } + CL_ASSERT( cq_tinfo ); + CL_ASSERT( cq_tinfo->qp_count == 0 ); if( cq_tinfo->cq ) { wclist.p_next = NULL; free_wclist = &wclist; - while( ib_poll_cq( cq_tinfo->cq, &free_wclist, &done_wclist ) == IB_SUCCESS ) + while( ib_poll_cq( + cq_tinfo->cq, &free_wclist, &done_wclist ) == IB_SUCCESS ) { - IBSP_TRACE( IBSP_DBG_WQ, ("%s():%d:0x%x:0x%x: free=%p, done=%p\n", - __FUNCTION__, - __LINE__, GetCurrentProcessId(), - GetCurrentThreadId(), - free_wclist, done_wclist) ); + IBSP_TRACE1( IBSP_DBG_WQ, + ("free=%p, done=%p\n", free_wclist, done_wclist) ); } - IBSP_TRACE( IBSP_DBG_WQ, ("%s():%d:0x%x:0x%x: ib_destroy_cq() start..\n", - __FUNCTION__, - __LINE__, GetCurrentProcessId(), - GetCurrentThreadId()) ); + IBSP_TRACE4( IBSP_DBG_WQ, ("ib_destroy_cq() start..\n") ); /* * Called from cleanup thread, okay to block. @@ -606,13 +599,12 @@ ib_destroy_cq_tinfo( status = ib_destroy_cq( cq_tinfo->cq, ib_sync_destroy ); if( status ) { - IBSP_ERROR( ("ib_destroy_cq failed (%d)\n", status) ); + IBSP_ERROR( + ("ib_destroy_cq returned %s\n", ib_get_err_str( status )) ); } else { - IBSP_TRACE( IBSP_DBG_WQ, - ("%s():%d:0x%x:0x%x: ib_destroy_cq() finished.\n", __FUNCTION__, - __LINE__, GetCurrentProcessId(), GetCurrentThreadId()) ); + IBSP_TRACE4( IBSP_DBG_WQ, ("ib_destroy_cq() finished.\n") ); cq_tinfo->cq = NULL; @@ -620,9 +612,6 @@ ib_destroy_cq_tinfo( } } - /* Currently only 1 CQ per HCA */ - cq_tinfo->hca = NULL; - if( cq_tinfo->ib_cq_thread ) { /* ib_cq_thread() will release the cq_tinfo before exit. Don't @@ -676,21 +665,21 @@ static struct cq_thread_info * ib_acquire_cq_tinfo( struct ibsp_hca *hca ) { - struct cq_thread_info *cq_tinfo = NULL; - uint32_t current_cqe_size; + struct cq_thread_info *cq_tinfo = NULL; + uint32_t cqe_size; + ib_api_status_t status; IBSP_ENTER( IBSP_DBG_HW ); - /* - * TODO: If future implementations require more than 1 cq_tinfo per HCA, then - * search HCA cq_tinfo list for optimal cq_tinfo - */ - if( hca->cq_tinfo == NULL ) + cl_spinlock_acquire( &hca->cq_lock ); + + if( !hca->cq_tinfo ) { cq_tinfo = ib_alloc_cq_tinfo( hca ); - if( cq_tinfo == NULL ) + if( !cq_tinfo ) { - IBSP_ERROR( ("ib_alloc_cq_tinfo() failed\n") ); + IBSP_ERROR_EXIT( ("ib_alloc_cq_tinfo() failed\n") ); + cl_spinlock_release( &hca->cq_lock ); return (NULL); } } @@ -701,38 +690,45 @@ ib_acquire_cq_tinfo( CL_ASSERT( cq_tinfo != NULL ); - current_cqe_size = cq_tinfo->qp_count * IB_CQ_SIZE; - - cl_atomic_inc( &cq_tinfo->qp_count ); + cqe_size = (cq_tinfo->qp_count + 1) * IB_CQ_SIZE; - if( cq_tinfo->cqe_size < current_cqe_size ) + if( cq_tinfo->cqe_size < cqe_size ) { - ib_api_status_t status; - status = ib_modify_cq( cq_tinfo->cq, ¤t_cqe_size ); - if( status ) - { - /* - * TODO: This could mean we are out of cqe and need to have - * more than one cq per HCA in the future. - */ - cl_atomic_dec( &cq_tinfo->qp_count ); - IBSP_ERROR_EXIT( - ("ib_modify_cq() failed. (%d)\n", status) ); - return (NULL); - } - else + status = ib_modify_cq( cq_tinfo->cq, &cqe_size ); + switch( status ) { - cq_tinfo->cqe_size = current_cqe_size; + case IB_INVALID_CQ_SIZE: + cq_tinfo = ib_alloc_cq_tinfo( hca ); + if( !cq_tinfo ) + break; + + cq_tinfo->qp_count++; + break; + + case IB_SUCCESS: + cq_tinfo->cqe_size = cqe_size; + + cq_tinfo->qp_count++; + fzprint(("%s():%d:0x%x:0x%x: New cq size=%d.\n", __FUNCTION__, __LINE__, GetCurrentProcessId(), GetCurrentThreadId(), cq_tinfo->cqe_size)); + break; + default: + IBSP_ERROR_EXIT( + ("ib_modify_cq() returned %s\n", ib_get_err_str(status)) ); + cq_tinfo = NULL; } } + else + { + cq_tinfo->qp_count++; + } + cl_spinlock_release( &hca->cq_lock ); IBSP_EXIT( IBSP_DBG_HW ); - return (cq_tinfo); } @@ -742,9 +738,14 @@ ib_release_cq_tinfo( { IBSP_ENTER( IBSP_DBG_HW ); - cl_atomic_dec( &cq_tinfo->qp_count ); + CL_ASSERT( cq_tinfo ); + CL_ASSERT( cq_tinfo->hca ); - /* TODO: downsize the cq */ + cl_spinlock_acquire( &cq_tinfo->hca->cq_lock ); + /* If this CQ now has fewer QPs than the primary, make it the primary. */ + if( --cq_tinfo->qp_count < cq_tinfo->hca->cq_tinfo->qp_count ) + cq_tinfo->hca->cq_tinfo = cq_tinfo; + cl_spinlock_release( &cq_tinfo->hca->cq_lock ); IBSP_EXIT( IBSP_DBG_HW ); } @@ -769,12 +770,6 @@ ib_release(void) { struct ibsp_hca *hca = PARENT_STRUCT(item, struct ibsp_hca, item); - if( hca->cq_tinfo ) - { - CL_ASSERT( hca->cq_tinfo->qp_count == 0 ); - ib_destroy_cq_tinfo( hca->cq_tinfo ); - } - pnp_ca_remove( hca ); } @@ -788,7 +783,8 @@ ib_release(void) status)); if( status != IB_SUCCESS ) { - IBSP_ERROR( ("ib_close_al failed (%d)\n", status) ); + IBSP_ERROR( + ("ib_close_al returned %s\n", ib_get_err_str( status )) ); } else { @@ -907,11 +903,8 @@ int ib_create_socket( IN OUT struct ibsp_socket_info *socket_info) { - struct cq_thread_info *cq_tinfo; ib_qp_create_t qp_create; ib_api_status_t status; - int ret; - struct ibsp_hca *hca; ib_qp_attr_t qp_attr; IBSP_ENTER( IBSP_DBG_EP ); @@ -920,18 +913,15 @@ ib_create_socket( CL_ASSERT( socket_info->port != NULL ); CL_ASSERT( socket_info->qp == NULL ); - hca = socket_info->port->hca; - socket_info->hca_pd = hca->pd; + socket_info->hca_pd = socket_info->port->hca->pd; /* Get the completion queue and thread info for this socket */ - cq_tinfo = ib_acquire_cq_tinfo( hca ); - if( cq_tinfo == NULL ) + socket_info->cq_tinfo = ib_acquire_cq_tinfo( socket_info->port->hca ); + if( !socket_info->cq_tinfo ) { - IBSP_ERROR( ("ib_acquire_cq_tinfo failed\n") ); - ret = WSAEPROVIDERFAILEDINIT; - goto done; + IBSP_ERROR_EXIT( ("ib_acquire_cq_tinfo failed\n") ); + return WSAENOBUFS; } - socket_info->cq_tinfo = cq_tinfo; /* Queue pair */ qp_create.qp_type = IB_QPT_RELIABLE_CONN; @@ -939,8 +929,8 @@ ib_create_socket( qp_create.rq_depth = QP_ATTRIB_RQ_DEPTH; qp_create.sq_sge = QP_ATTRIB_SQ_SGE; qp_create.rq_sge = 1; - qp_create.h_rq_cq = cq_tinfo->cq; - qp_create.h_sq_cq = cq_tinfo->cq; + qp_create.h_rq_cq = socket_info->cq_tinfo->cq; + qp_create.h_sq_cq = socket_info->cq_tinfo->cq; qp_create.sq_signaled = TRUE; status = ib_create_qp( socket_info->hca_pd, &qp_create, socket_info, /* context */ @@ -948,9 +938,9 @@ ib_create_socket( &socket_info->qp ); if( status ) { - IBSP_ERROR( ("ib_create_qp failed (%d)\n", status)); - ret = WSAEPROVIDERFAILEDINIT; - goto done; + IBSP_ERROR_EXIT( + ("ib_create_qp returned %s\n", ib_get_err_str( status )) ); + return WSAENOBUFS; } status = ib_query_qp( socket_info->qp, &qp_attr ); @@ -960,24 +950,14 @@ ib_create_socket( } else { - IBSP_ERROR( - ("ib_query_qp returned %s\n", ib_get_err_str( status )) ); + IBSP_ERROR( ("ib_query_qp returned %s\n", ib_get_err_str( status )) ); socket_info->max_inline = 0; } STAT_INC( qp_num ); - ret = 0; - - done: - if( ret ) - { - ib_destroy_socket( socket_info ); - } - IBSP_EXIT( IBSP_DBG_EP ); - - return ret; + return 0; } diff --git a/trunk/ulp/wsd/user/ibsp_pnp.c b/trunk/ulp/wsd/user/ibsp_pnp.c index 77f4074b..a9688563 100644 --- a/trunk/ulp/wsd/user/ibsp_pnp.c +++ b/trunk/ulp/wsd/user/ibsp_pnp.c @@ -91,6 +91,7 @@ pnp_ca_add( cl_spinlock_init( &hca->port_lock ); cl_qlist_init( &hca->rdma_mem_list.list ); cl_spinlock_init( &hca->rdma_mem_list.mutex ); + cl_spinlock_init( &hca->cq_lock ); /* HCA handle */ IBSP_TRACE( IBSP_DBG_HW, @@ -143,8 +144,9 @@ void pnp_ca_remove( struct ibsp_hca *hca ) { - ib_api_status_t status; - cl_list_item_t *p_item; + ib_api_status_t status; + cl_list_item_t *p_item; + struct cq_thread_info *p_cq_tinfo; IBSP_ENTER( IBSP_DBG_HW ); @@ -161,6 +163,26 @@ pnp_ca_remove( } cl_spinlock_release( &hca->port_lock ); + cl_spinlock_acquire( &hca->cq_lock ); + while( hca->cq_tinfo ) + { + p_cq_tinfo = hca->cq_tinfo; + + hca->cq_tinfo = PARENT_STRUCT( + cl_qlist_next( &hca->cq_tinfo->list_item ), + struct cq_thread_info, list_item ); + + __cl_primitive_remove( &p_cq_tinfo->list_item ); + + if( hca->cq_tinfo == p_cq_tinfo ) + break; + + cl_spinlock_release( &hca->cq_lock ); + ib_destroy_cq_tinfo( hca->cq_tinfo ); + cl_spinlock_acquire( &hca->cq_lock ); + } + cl_spinlock_release( &hca->cq_lock ); + if( hca->pd ) { ib_deregister_all_mr( &hca->rdma_mem_list ); @@ -195,6 +217,8 @@ pnp_ca_remove( cl_spinlock_destroy( &hca->port_lock ); cl_spinlock_destroy( &hca->rdma_mem_list.mutex ); + cl_spinlock_destroy( &hca->cq_lock ); + HeapFree( g_ibsp.heap, 0, hca ); IBSP_EXIT( IBSP_DBG_HW ); diff --git a/trunk/ulp/wsd/user/ibspdefines.h b/trunk/ulp/wsd/user/ibspdefines.h index eea39708..859b6257 100644 --- a/trunk/ulp/wsd/user/ibspdefines.h +++ b/trunk/ulp/wsd/user/ibspdefines.h @@ -72,7 +72,7 @@ C_ASSERT( QP_ATTRIB_SQ_DEPTH <= 256 ); C_ASSERT( QP_ATTRIB_RQ_DEPTH <= 256 ); /* Number of entries in a CQ */ -#define IB_CQ_SIZE (QP_ATTRIB_SQ_DEPTH + QP_ATTRIB_RQ_DEPTH + 1) +#define IB_CQ_SIZE (QP_ATTRIB_SQ_DEPTH + QP_ATTRIB_RQ_DEPTH) /* CM timeouts */ #define CM_MIN_LOCAL_TIMEOUT (18) diff --git a/trunk/ulp/wsd/user/ibspproto.h b/trunk/ulp/wsd/user/ibspproto.h index 1fc338e5..fa0a1029 100644 --- a/trunk/ulp/wsd/user/ibspproto.h +++ b/trunk/ulp/wsd/user/ibspproto.h @@ -116,6 +116,13 @@ ib_release( void ); extern int ibsp_initialize( void ); +void +ib_release_cq_tinfo( + struct cq_thread_info *cq_tinfo ); +void +ib_destroy_cq_tinfo( + struct cq_thread_info *cq_tinfo ); + int ib_create_socket( IN OUT struct ibsp_socket_info *socket_info ); diff --git a/trunk/ulp/wsd/user/ibspstruct.h b/trunk/ulp/wsd/user/ibspstruct.h index 4588174a..59316c44 100644 --- a/trunk/ulp/wsd/user/ibspstruct.h +++ b/trunk/ulp/wsd/user/ibspstruct.h @@ -183,15 +183,11 @@ struct rdma_memory_desc struct cq_thread_info { - /* For future growth if the hca needs a list of cqs */ - cl_list_item_t item; + cl_list_item_t list_item; cl_waitobj_handle_t cq_waitobj; ib_cq_handle_t cq; - cl_spinlock_t wr_mutex; - cl_qlist_t done_wr_list; - /* Number of qp's using this cq */ atomic32_t qp_count; @@ -391,7 +387,11 @@ struct ibsp_hca cl_spinlock_t port_lock; cl_qlist_t port_list; - /* TODO: Make this a dynamic list if we need more than one cq_tinfo per HCA */ + /* + * The CQ list is a circular list without an end. The pointer here + * points to the entry that should be used for the next allocation. + */ + cl_spinlock_t cq_lock; struct cq_thread_info *cq_tinfo; };