From 143dee51f30a0d59ca799a0e4290ab58ef358a37 Mon Sep 17 00:00:00 2001 From: leonidk Date: Mon, 19 Nov 2007 17:39:55 +0000 Subject: [PATCH] [IBAL] 1) bugfix: missed storing private data in one of scenarios of GetConnectReq; 2) bugfix in releasing QP NDI resources; 3) added allocation of parameter list for REQ in order to prevent problems upon cancelling IRPs; 4) added workaround of real solution for coming DREQ after destroying QP (in __ndi_cm_handler) git-svn-id: svn://openib.tc.cornell.edu/gen1@907 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86 --- branches/Ndi/core/al/al_qp.c | 6 +- branches/Ndi/core/al/kernel/al_cm_cep.c | 36 ++++++-- branches/Ndi/core/al/kernel/al_ndi_cm.c | 101 ++++++++++++++++----- branches/Ndi/core/al/kernel/al_ndi_cm.h | 6 +- branches/Ndi/core/al/kernel/al_proxy_ndi.c | 15 ++- 5 files changed, 129 insertions(+), 35 deletions(-) diff --git a/branches/Ndi/core/al/al_qp.c b/branches/Ndi/core/al/al_qp.c index bce2e577..eecb686a 100644 --- a/branches/Ndi/core/al/al_qp.c +++ b/branches/Ndi/core/al/al_qp.c @@ -1129,7 +1129,7 @@ destroying_qp( } } #ifdef CL_KERNEL - ndi_qp_deinit( h_qp ); + ndi_qp_destroy( h_qp ); #endif /* Fall through. */ @@ -1248,6 +1248,10 @@ free_qp( CL_ASSERT( p_obj ); h_qp = PARENT_STRUCT( p_obj, ib_qp_t, obj ); +#ifdef CL_KERNEL + ndi_qp_free( h_qp ); +#endif + destroy_al_obj( p_obj ); cl_free( h_qp ); } diff --git a/branches/Ndi/core/al/kernel/al_cm_cep.c b/branches/Ndi/core/al/kernel/al_cm_cep.c index 52dc251e..9cc886c4 100644 --- a/branches/Ndi/core/al/kernel/al_cm_cep.c +++ b/branches/Ndi/core/al/kernel/al_cm_cep.c @@ -3403,8 +3403,9 @@ __cep_set_pdata( cl_memclr( p_cep->pdata, sizeof(p_cep->pdata) ); p_cep->psize = min( psize, sizeof(p_cep->pdata) ); memcpy( p_cep->pdata, pdata, p_cep->psize ); - AL_PRINT(TRACE_LEVEL_INFORMATION ,AL_DBG_CM , - ("__cep_set_pdata: set %d of pdata \n", p_cep->psize )); + AL_PRINT(TRACE_LEVEL_ERROR ,AL_DBG_ERROR , + ("__cep_set_pdata: set %d of pdata for cid %d, h_al %p \n", + p_cep->psize, cid, h_al )); AL_EXIT( AL_DBG_CM ); return IB_SUCCESS; @@ -3458,10 +3459,11 @@ __cep_queue_mad( // store REQ private data __cep_set_pdata( (ib_al_handle_t)p_irp->Tail.Overlay.DriverContext[1], - p_new_cep->cid, IB_REQ_PDATA_SIZE, (uint8_t*)p_req->pdata ); - AL_PRINT(TRACE_LEVEL_INFORMATION ,AL_DBG_CM , - ("__cep_queue_mad: set %d of REQ pdata to CEP with cid %d\n", - IB_REJ_PDATA_SIZE, p_new_cep->cid )); + p_new_cep->cid, sizeof(p_req->pdata), (uint8_t*)p_req->pdata ); + AL_PRINT(TRACE_LEVEL_ERROR ,AL_DBG_ERROR , + ("set %d of REQ pdata to CEP with cid %d, h_al %p\n", + sizeof(p_req->pdata), p_new_cep->cid, + (ib_al_handle_t)p_irp->Tail.Overlay.DriverContext[1] )); // complete GetConnectionReq IRP #pragma warning(push, 3) @@ -6317,10 +6319,11 @@ al_cep_get_cid( IN PIRP p_irp ) { - kcep_t *p_cep; + kcep_t *p_cep, *p_new_cep; NTSTATUS nt_status; KLOCK_QUEUE_HANDLE hdl; ib_mad_element_t* p_mad = NULL; + mad_cm_req_t* p_req; AL_ENTER( AL_DBG_NDI ); KeAcquireInStackQueuedSpinLock( &gp_cep_mgr->lock, &hdl ); @@ -6354,6 +6357,15 @@ al_cep_get_cid( p_cep->p_mad_head = p_mad->p_next; p_mad->p_next = NULL; + /* store REQ private data */ + p_req = (mad_cm_req_t*)ib_get_mad_buf( p_mad ); + p_new_cep = (kcep_t* __ptr64)p_mad->send_context1; + __cep_set_pdata( h_al, p_new_cep->cid, + sizeof(p_req->pdata), (uint8_t*)p_req->pdata ); + AL_PRINT(TRACE_LEVEL_ERROR ,AL_DBG_ERROR , + ("set %d of REQ pdata to CEP with cid %d, h_al %p\n", + sizeof(p_req->pdata), p_new_cep->cid, h_al )); + /* complete the IRP */ __complete_ndi_irp( p_irp, p_mad ); nt_status = STATUS_EVENT_DONE; @@ -6386,7 +6398,7 @@ al_cep_get_pdata( { KeReleaseInStackQueuedSpinLock( &hdl ); AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, - ("CEP not found\n")); + ("CEP not found for cid %d, h_al %p\n", cid, h_al )); return IB_INVALID_HANDLE; } @@ -6394,7 +6406,8 @@ al_cep_get_pdata( { KeReleaseInStackQueuedSpinLock( &hdl ); AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, - ("Insufficient size: *p_psize %d, max %d\n", *p_psize, p_cep->psize)); + ("Insufficient size: *p_psize %d, max %d, cid %d, h_al %p\n", + *p_psize, p_cep->psize, cid, h_al )); return IB_INVALID_PARAMETER; } @@ -6403,6 +6416,11 @@ al_cep_get_pdata( if (remainder) cl_memclr( &pdata[p_cep->psize], remainder ); *p_psize = p_cep->psize; + if ( !*p_psize ) + { + AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, + ("p_cep->psize is zero for cid %d, h_al %p\n", cid, h_al )); + } AL_PRINT(TRACE_LEVEL_INFORMATION ,AL_DBG_CM , ("al_cep_get_pdata: get %d of pdata from CEP with cid %d \n", p_cep->psize, cid )); diff --git a/branches/Ndi/core/al/kernel/al_ndi_cm.c b/branches/Ndi/core/al/kernel/al_ndi_cm.c index d143ef28..85c3892b 100644 --- a/branches/Ndi/core/al/kernel/al_ndi_cm.c +++ b/branches/Ndi/core/al/kernel/al_ndi_cm.c @@ -401,6 +401,9 @@ static inline void __ndi_flush_que( PIRP Irp; while( Irp = IoCsqRemoveNextIrp( &p_ndi_csq->csq, NULL ) ) { + AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI, + ("h_qp %I64x, ref_cnt %d\n", (uint64_t)p_ndi_csq->h_qp, + p_ndi_csq->h_qp->obj.ref_cnt ) ); cl_ioctl_complete( Irp, completion_code, 0 ); deref_al_obj( &p_ndi_csq->h_qp->obj ); } @@ -414,6 +417,8 @@ ndi_qp_flush_ques( { AL_ENTER( AL_DBG_NDI ); __ndi_flush_que( h_qp->p_irp_que, STATUS_CANCELLED ); + AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI, + ("h_qp %I64x, ref_cnt %d\n", (uint64_t)h_qp, h_qp->obj.ref_cnt ) ); AL_EXIT( AL_DBG_NDI ); } @@ -454,20 +459,39 @@ exit: } void -ndi_qp_deinit( +ndi_qp_destroy( IN ib_qp_handle_t h_qp ) { - if (h_qp->p_irp_que) + AL_ENTER( AL_DBG_NDI ); + + if (h_qp->type == IB_QPT_RELIABLE_CONN && h_qp->p_irp_que) { + AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI, + ("Destroying h_qp %I64x. h_ioctl %p\n", + (uint64_t)h_qp, h_qp->p_irp_que->h_ioctl ) ); + /* cancel pending IRPS for NDI type CQ */ ndi_qp_flush_ques( h_qp ); + } + + AL_EXIT( AL_DBG_NDI ); +} + +void +ndi_qp_free( + IN ib_qp_handle_t h_qp ) +{ + AL_ENTER( AL_DBG_NDI ); + if (h_qp->type == IB_QPT_RELIABLE_CONN && h_qp->p_irp_que) + { /* free NDI context */ cl_free( h_qp->p_irp_que ); h_qp->p_irp_que = NULL; } -} + AL_EXIT( AL_DBG_NDI ); +} /******************************************************************* @@ -543,6 +567,9 @@ __ndi_proc_rej( { case NDI_CM_CONNECTING_REQ_SENT: al_cep_set_pdata( p_cm->h_al, p_cm->cid, IB_REJ_PDATA_SIZE, (uint8_t*)p_rej->pdata ); + AL_PRINT(TRACE_LEVEL_ERROR ,AL_DBG_ERROR , + ("set %d of REQ pdata to CEP with cid %d, h_al %p\n", + IB_REJ_PDATA_SIZE, p_cm->cid, p_cm->h_al )); status = (p_rej->reason == IB_REJ_TIMEOUT) ? STATUS_TIMEOUT : STATUS_CONNECTION_REFUSED; __ndi_complete_irp_ex( p_cm->h_qp, status, NDI_CM_CONNECTING_REJ_RCVD ); break; @@ -619,6 +646,9 @@ __ndi_proc_rep( /* fill the rej data */ al_cep_set_pdata( p_cm->h_al, p_cm->cid, IB_REJ_PDATA_SIZE, p_rep->pdata ); + AL_PRINT(TRACE_LEVEL_ERROR ,AL_DBG_ERROR , + ("set %d of REQ pdata to CEP with cid %d, h_al %p\n", + IB_REJ_PDATA_SIZE, p_cm->cid, p_cm->h_al )); __ndi_complete_irp_ex( p_cm->h_qp, STATUS_SUCCESS, NDI_CM_CONNECTING_REP_RCVD ); @@ -752,9 +782,18 @@ __ndi_cm_handler( switch( p_mad->attr_id ) { case CM_REP_ATTR_ID: - CL_ASSERT( ((al_conn_qp_t*)h_qp)->cid == (int32_t)cid || - ((al_conn_qp_t*)h_qp)->cid == AL_INVALID_CID ); - __ndi_proc_rep( &h_cm, (mad_cm_rep_t*)p_mad ); + // TODO: how to prevent calling this handler with QP destroyed ? + if( ((al_conn_qp_t*)h_qp)->cid == (int32_t)cid || + ((al_conn_qp_t*)h_qp)->cid == AL_INVALID_CID ) + { + __ndi_proc_rep( &h_cm, (mad_cm_rep_t*)p_mad ); + } + else + { + AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, + ("REP received when QP destroyed, cid %d:%d .\n", + ((al_conn_qp_t*)h_qp)->cid, cid ) ); + } break; case CM_REJ_ATTR_ID: @@ -762,9 +801,18 @@ __ndi_cm_handler( break; case CM_DREQ_ATTR_ID: - CL_ASSERT( ((al_conn_qp_t*)h_qp)->cid == (int32_t)cid || - ((al_conn_qp_t*)h_qp)->cid == AL_INVALID_CID ); - __ndi_proc_dreq( &h_cm, (mad_cm_dreq_t*)p_mad ); + // TODO: how to prevent calling this handler with QP destroyed ? + if( ((al_conn_qp_t*)h_qp)->cid == (int32_t)cid || + ((al_conn_qp_t*)h_qp)->cid == AL_INVALID_CID ) + { + __ndi_proc_dreq( &h_cm, (mad_cm_dreq_t*)p_mad ); + } + else + { + AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, + ("DREQ received when QP destroyed, cid %d:%d .\n", + ((al_conn_qp_t*)h_qp)->cid, cid ) ); + } break; default: @@ -842,9 +890,9 @@ __ndi_pr_query_cb( AL_ENTER( AL_DBG_NDI ); - AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI, - ("status is %d, count is %d\n", p_query_rec->status, - p_query_rec->result_cnt) ); + AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, + ("status is %d, count is %d, context %p\n", p_query_rec->status, + p_query_rec->result_cnt, p_query_rec->query_context) ); h_ioctl = IoCsqRemoveNextIrp( &h_qp->p_irp_que->csq, NULL ); if( !h_ioctl || p_query_rec->status != IB_SUCCESS || !p_query_rec->result_cnt ) @@ -913,6 +961,7 @@ err_irp_complete: deref_al_obj( &h_qp->obj ); /* release the extra reference */ exit: + cl_free( p_req ); if( p_query_rec->p_result_mad ) ib_put_mad( p_query_rec->p_result_mad ); @@ -980,8 +1029,8 @@ __ndi_ats_query_cb( AL_ENTER( AL_DBG_NDI ); AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, - ("status is %d, count is %d\n", p_query_rec->status, - p_query_rec->result_cnt) ); + ("status is %d, count is %d, context %p\n", p_query_rec->status, + p_query_rec->result_cnt, p_query_rec->query_context) ); h_ioctl = IoCsqRemoveNextIrp( &h_qp->p_irp_que->csq, NULL ); if( !h_ioctl || p_query_rec->status != IB_SUCCESS || !p_query_rec->result_cnt ) @@ -989,6 +1038,7 @@ __ndi_ats_query_cb( h_qp->p_irp_que->state = NDI_CM_IDLE; __ndi_complete_irp( h_qp, h_ioctl, STATUS_HOST_UNREACHABLE ); deref_al_obj( &h_qp->obj ); /* release the extra reference */ + cl_free( p_req ); goto exit; /* ATS request failed */ } @@ -1050,7 +1100,7 @@ __ndi_ats_query( service_record.service_lease = 0xFFFFFFFF; strcpy( (void*)service_record.service_name, ATS_NAME ); - AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, + AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI, ("ATS:: MAD: method %#x, attr_id %#hx, Service: comp_mask %#I64x, IP %d.%d.%d.%d \n", user_query.method, CL_NTOH16(user_query.attr_id), @@ -1063,7 +1113,9 @@ __ndi_ats_query( h_qp->p_irp_que->state = NDI_CM_CONNECTING_ATS_SENT; /* prevent destroying QP after cancelling of the IRP and before ib_query calback*/ - ref_al_obj( &h_qp->obj ); + AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI, + ("h_qp %I64x, ref_cnt %d\n", (uint64_t)h_qp, h_qp->obj.ref_cnt ) ); + ref_al_obj( &h_qp->obj ); /* take the extra reference */ /* insert IRP in the queue */ IoCsqInsertIrp( &h_qp->p_irp_que->csq, h_ioctl, NULL ); @@ -1094,22 +1146,29 @@ ndi_req_cm( /* check outstanding request */ __ndi_acquire_lock( &h_qp->p_irp_que->csq, NULL); - if (__ndi_peek_next_irp( &h_qp->p_irp_que->csq, NULL, NULL )) + if ( h_qp->p_irp_que->h_ioctl ) { + AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, + ("STATUS_CONNECTION_ACTIVE: h_qp %I64x, ref_cnt %d\n", + (uint64_t)h_qp, h_qp->obj.ref_cnt ) ); nt_status = STATUS_CONNECTION_ACTIVE; __ndi_release_lock( &h_qp->p_irp_que->csq, 0 ); goto exit; } - __ndi_release_lock( &h_qp->p_irp_que->csq, 0 ); - if ( h_qp->p_irp_que->state != NDI_CM_IDLE ) { + AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, + ("STATUS_INVALID_DEVICE_STATE: h_qp %I64x, ref_cnt %d, state %d\n", + (uint64_t)h_qp, h_qp->obj.ref_cnt, h_qp->p_irp_que->state ) ); nt_status = STATUS_INVALID_DEVICE_STATE; + __ndi_release_lock( &h_qp->p_irp_que->csq, 0 ); goto exit; } - - /* send ATS request */ h_qp->p_irp_que->h_ioctl = h_ioctl; /* mark IRP as present */ + h_qp->p_irp_que->state = NDI_CM_CONNECTING_ATS_SENT; + __ndi_release_lock( &h_qp->p_irp_que->csq, 0 ); + + /* send ATS request */ nt_status = __ndi_ats_query( h_qp, h_ioctl, p_req ); exit: diff --git a/branches/Ndi/core/al/kernel/al_ndi_cm.h b/branches/Ndi/core/al/kernel/al_ndi_cm.h index 805df62f..07b92563 100644 --- a/branches/Ndi/core/al/kernel/al_ndi_cm.h +++ b/branches/Ndi/core/al/kernel/al_ndi_cm.h @@ -133,7 +133,11 @@ ndi_qp_init( IN ib_qp_handle_t h_qp ); void -ndi_qp_deinit( +ndi_qp_destroy( + IN ib_qp_handle_t h_qp ); +void + +ndi_qp_free( IN ib_qp_handle_t h_qp ); #endif diff --git a/branches/Ndi/core/al/kernel/al_proxy_ndi.c b/branches/Ndi/core/al/kernel/al_proxy_ndi.c index d8ac676f..a34ab9ea 100644 --- a/branches/Ndi/core/al/kernel/al_proxy_ndi.c +++ b/branches/Ndi/core/al/kernel/al_proxy_ndi.c @@ -312,7 +312,7 @@ __ndi_req_cm( cl_status_t cl_status; ib_qp_handle_t h_qp = NULL; al_dev_open_context_t *p_context; - ual_ndi_req_cm_ioctl_in_t *p_req = + ual_ndi_req_cm_ioctl_in_t *p_parm, *p_req = (ual_ndi_req_cm_ioctl_in_t*)cl_ioctl_in_buf( h_ioctl ); UNUSED_PARAM(p_ret_bytes); @@ -341,7 +341,6 @@ __ndi_req_cm( cl_status = CL_INVALID_HANDLE; goto exit; } - p_req->h_qp = (uint64_t)h_qp; /* Check psize */ if ( p_req->pdata_size > sizeof(p_req->pdata) ) @@ -349,9 +348,19 @@ __ndi_req_cm( cl_status = CL_INVALID_PARAMETER; goto exit; } + + /* copy request parameters a side to prevent problems from cancelled IRP */ + p_parm = cl_zalloc( sizeof(ual_ndi_req_cm_ioctl_in_t) ); + if (!p_parm ) + { + cl_status = CL_INSUFFICIENT_MEMORY; + goto exit; + } + RtlCopyMemory( p_parm, p_req, sizeof(ual_ndi_req_cm_ioctl_in_t) ); + p_parm->h_qp = (uint64_t)h_qp; /* perform the ioctl */ - cl_status = ndi_req_cm( h_qp, h_ioctl, p_req ); + cl_status = ndi_req_cm( h_qp, h_ioctl, p_parm ); exit: if (h_qp && cl_status != CL_PENDING) -- 2.41.0