]> git.openfabrics.org - ~shefty/rdma-win.git/commitdiff
[IBAL] 1) bugfix: missed storing private data in one of scenarios of GetConnectReq;
authorleonidk <leonidk@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Mon, 19 Nov 2007 17:39:55 +0000 (17:39 +0000)
committerleonidk <leonidk@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Mon, 19 Nov 2007 17:39:55 +0000 (17:39 +0000)
2) bugfix in releasing QP NDI resources;
3) added allocation of parameter list for REQ in order to prevent problems upon cancelling IRPs;
4) added workaround of real solution for coming DREQ after destroying QP (in __ndi_cm_handler)

git-svn-id: svn://openib.tc.cornell.edu/gen1@907 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86

branches/Ndi/core/al/al_qp.c
branches/Ndi/core/al/kernel/al_cm_cep.c
branches/Ndi/core/al/kernel/al_ndi_cm.c
branches/Ndi/core/al/kernel/al_ndi_cm.h
branches/Ndi/core/al/kernel/al_proxy_ndi.c

index bce2e577d41c10d862ed30539643569109de3752..eecb686a81fc7e1e8a8ea8926147910f0c700c1f 100644 (file)
@@ -1129,7 +1129,7 @@ destroying_qp(
                        }\r
                }\r
 #ifdef CL_KERNEL\r
-               ndi_qp_deinit( h_qp );\r
+               ndi_qp_destroy( h_qp );\r
 #endif\r
 \r
                /* Fall through. */\r
@@ -1248,6 +1248,10 @@ free_qp(
        CL_ASSERT( p_obj );\r
        h_qp = PARENT_STRUCT( p_obj, ib_qp_t, obj );\r
 \r
+#ifdef CL_KERNEL\r
+       ndi_qp_free( h_qp );\r
+#endif\r
+\r
        destroy_al_obj( p_obj );\r
        cl_free( h_qp );\r
 }\r
index 52dc251e4a48c333360fc765cfb03da1884f7b45..9cc886c4bbd160488cc438320922c8c0809180a6 100644 (file)
@@ -3403,8 +3403,9 @@ __cep_set_pdata(
        cl_memclr( p_cep->pdata, sizeof(p_cep->pdata) );\r
        p_cep->psize = min( psize, sizeof(p_cep->pdata) );\r
        memcpy( p_cep->pdata, pdata, p_cep->psize );\r
-       AL_PRINT(TRACE_LEVEL_INFORMATION ,AL_DBG_CM ,\r
-               ("__cep_set_pdata: set %d of pdata \n", p_cep->psize ));\r
+       AL_PRINT(TRACE_LEVEL_ERROR ,AL_DBG_ERROR ,\r
+               ("__cep_set_pdata: set %d of pdata for cid %d, h_al %p \n", \r
+               p_cep->psize, cid, h_al ));\r
 \r
        AL_EXIT( AL_DBG_CM );\r
        return IB_SUCCESS;\r
@@ -3458,10 +3459,11 @@ __cep_queue_mad(
                        // store REQ private data\r
                        __cep_set_pdata( \r
                                (ib_al_handle_t)p_irp->Tail.Overlay.DriverContext[1], \r
-                               p_new_cep->cid, IB_REQ_PDATA_SIZE, (uint8_t*)p_req->pdata );\r
-                       AL_PRINT(TRACE_LEVEL_INFORMATION ,AL_DBG_CM ,\r
-                               ("__cep_queue_mad: set %d of REQ pdata to CEP with cid %d\n", \r
-                               IB_REJ_PDATA_SIZE, p_new_cep->cid ));\r
+                               p_new_cep->cid, sizeof(p_req->pdata), (uint8_t*)p_req->pdata );\r
+                       AL_PRINT(TRACE_LEVEL_ERROR ,AL_DBG_ERROR ,\r
+                               ("set %d of REQ pdata to CEP with cid %d, h_al %p\n", \r
+                               sizeof(p_req->pdata), p_new_cep->cid, \r
+                               (ib_al_handle_t)p_irp->Tail.Overlay.DriverContext[1] ));\r
 \r
                        // complete GetConnectionReq IRP\r
 #pragma warning(push, 3)\r
@@ -6317,10 +6319,11 @@ al_cep_get_cid(
        IN              PIRP                                                            p_irp\r
        )\r
 {\r
-       kcep_t *p_cep;\r
+       kcep_t *p_cep, *p_new_cep;\r
        NTSTATUS nt_status;\r
        KLOCK_QUEUE_HANDLE hdl;\r
        ib_mad_element_t* p_mad = NULL;\r
+       mad_cm_req_t* p_req;\r
 \r
        AL_ENTER( AL_DBG_NDI );\r
        KeAcquireInStackQueuedSpinLock( &gp_cep_mgr->lock, &hdl );\r
@@ -6354,6 +6357,15 @@ al_cep_get_cid(
        p_cep->p_mad_head = p_mad->p_next;\r
        p_mad->p_next = NULL;\r
 \r
+       /* store REQ private data */\r
+       p_req = (mad_cm_req_t*)ib_get_mad_buf( p_mad );\r
+       p_new_cep = (kcep_t* __ptr64)p_mad->send_context1;\r
+       __cep_set_pdata( h_al, p_new_cep->cid,\r
+               sizeof(p_req->pdata), (uint8_t*)p_req->pdata );\r
+       AL_PRINT(TRACE_LEVEL_ERROR ,AL_DBG_ERROR ,\r
+               ("set %d of REQ pdata to CEP with cid %d, h_al %p\n", \r
+               sizeof(p_req->pdata), p_new_cep->cid, h_al ));\r
+\r
        /* complete the IRP */\r
        __complete_ndi_irp( p_irp, p_mad );\r
        nt_status = STATUS_EVENT_DONE;\r
@@ -6386,7 +6398,7 @@ al_cep_get_pdata(
        {\r
                KeReleaseInStackQueuedSpinLock( &hdl );\r
                AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, \r
-                       ("CEP not found\n"));\r
+                       ("CEP not found for cid %d, h_al %p\n", cid, h_al ));\r
                return IB_INVALID_HANDLE;\r
        }\r
 \r
@@ -6394,7 +6406,8 @@ al_cep_get_pdata(
        {\r
                KeReleaseInStackQueuedSpinLock( &hdl );\r
                AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, \r
-                       ("Insufficient size: *p_psize %d, max %d\n", *p_psize, p_cep->psize));\r
+                       ("Insufficient size: *p_psize %d, max %d, cid %d, h_al %p\n", \r
+                       *p_psize, p_cep->psize, cid, h_al ));\r
                return IB_INVALID_PARAMETER;\r
        }\r
        \r
@@ -6403,6 +6416,11 @@ al_cep_get_pdata(
        if (remainder)\r
                cl_memclr( &pdata[p_cep->psize], remainder );\r
        *p_psize = p_cep->psize;\r
+       if ( !*p_psize )\r
+       {\r
+               AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, \r
+                       ("p_cep->psize is zero for cid %d, h_al %p\n", cid, h_al ));\r
+       }\r
 \r
        AL_PRINT(TRACE_LEVEL_INFORMATION ,AL_DBG_CM ,\r
                ("al_cep_get_pdata: get %d of pdata from CEP with cid %d \n", p_cep->psize, cid ));\r
index d143ef285156bb8573ab1159280b0c4c107edcfd..85c3892beeb267a30ccbe5f6d6881ad73c4b6503 100644 (file)
@@ -401,6 +401,9 @@ static inline void __ndi_flush_que(
        PIRP Irp;
        while( Irp = IoCsqRemoveNextIrp( &p_ndi_csq->csq, NULL ) )
        {
+               AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI, 
+                       ("h_qp %I64x, ref_cnt %d\n", (uint64_t)p_ndi_csq->h_qp, 
+                       p_ndi_csq->h_qp->obj.ref_cnt ) );
                cl_ioctl_complete( Irp, completion_code, 0 );
                deref_al_obj( &p_ndi_csq->h_qp->obj );
        }
@@ -414,6 +417,8 @@ ndi_qp_flush_ques(
 {
        AL_ENTER( AL_DBG_NDI );
        __ndi_flush_que( h_qp->p_irp_que, STATUS_CANCELLED );
+       AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI, 
+               ("h_qp %I64x, ref_cnt %d\n", (uint64_t)h_qp, h_qp->obj.ref_cnt ) );
        AL_EXIT( AL_DBG_NDI );
 }
 
@@ -454,20 +459,39 @@ exit:
 }
 
 void
-ndi_qp_deinit(
+ndi_qp_destroy(
        IN              ib_qp_handle_t                                  h_qp )
 {
-       if (h_qp->p_irp_que)
+       AL_ENTER( AL_DBG_NDI );
+
+       if (h_qp->type == IB_QPT_RELIABLE_CONN && h_qp->p_irp_que)
        {
+               AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
+                       ("Destroying h_qp %I64x. h_ioctl %p\n", 
+                       (uint64_t)h_qp, h_qp->p_irp_que->h_ioctl ) );
+
                /* cancel pending IRPS for NDI type CQ */
                ndi_qp_flush_ques( h_qp );
+       }
+
+       AL_EXIT( AL_DBG_NDI );
+}
+
+void
+ndi_qp_free(
+       IN              ib_qp_handle_t                                  h_qp )
+{
+       AL_ENTER( AL_DBG_NDI );
 
+       if (h_qp->type == IB_QPT_RELIABLE_CONN && h_qp->p_irp_que)
+       {
                /* free NDI context */
                cl_free( h_qp->p_irp_que );
                h_qp->p_irp_que = NULL;
        }
-}
 
+       AL_EXIT( AL_DBG_NDI );
+}
 
 
 /*******************************************************************
@@ -543,6 +567,9 @@ __ndi_proc_rej(
        {
        case NDI_CM_CONNECTING_REQ_SENT:
                al_cep_set_pdata( p_cm->h_al, p_cm->cid, IB_REJ_PDATA_SIZE, (uint8_t*)p_rej->pdata );
+               AL_PRINT(TRACE_LEVEL_ERROR ,AL_DBG_ERROR ,
+                       ("set %d of REQ pdata to CEP with cid %d, h_al %p\n", 
+                       IB_REJ_PDATA_SIZE, p_cm->cid, p_cm->h_al ));
                status = (p_rej->reason == IB_REJ_TIMEOUT) ? STATUS_TIMEOUT : STATUS_CONNECTION_REFUSED;
                __ndi_complete_irp_ex( p_cm->h_qp, status, NDI_CM_CONNECTING_REJ_RCVD );
                break;
@@ -619,6 +646,9 @@ __ndi_proc_rep(
 
        /* fill the rej data */
        al_cep_set_pdata( p_cm->h_al, p_cm->cid, IB_REJ_PDATA_SIZE, p_rep->pdata );
+       AL_PRINT(TRACE_LEVEL_ERROR ,AL_DBG_ERROR ,
+               ("set %d of REQ pdata to CEP with cid %d, h_al %p\n", 
+               IB_REJ_PDATA_SIZE, p_cm->cid, p_cm->h_al ));
 
        __ndi_complete_irp_ex( p_cm->h_qp, STATUS_SUCCESS, NDI_CM_CONNECTING_REP_RCVD );
 
@@ -752,9 +782,18 @@ __ndi_cm_handler(
                        switch( p_mad->attr_id )
                        {
                        case CM_REP_ATTR_ID:
-                               CL_ASSERT( ((al_conn_qp_t*)h_qp)->cid == (int32_t)cid || 
-                                       ((al_conn_qp_t*)h_qp)->cid == AL_INVALID_CID );
-                               __ndi_proc_rep( &h_cm, (mad_cm_rep_t*)p_mad );
+                               // TODO: how to prevent calling this handler with QP destroyed ?
+                               if( ((al_conn_qp_t*)h_qp)->cid == (int32_t)cid || 
+                                       ((al_conn_qp_t*)h_qp)->cid == AL_INVALID_CID )
+                               {
+                                       __ndi_proc_rep( &h_cm, (mad_cm_rep_t*)p_mad );
+                               }
+                               else
+                               {
+                                       AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
+                                               ("REP received when QP destroyed, cid %d:%d .\n", 
+                                               ((al_conn_qp_t*)h_qp)->cid, cid ) );
+                               }
                                break;
                        
                        case CM_REJ_ATTR_ID:
@@ -762,9 +801,18 @@ __ndi_cm_handler(
                                break;
                
                        case CM_DREQ_ATTR_ID:
-                               CL_ASSERT( ((al_conn_qp_t*)h_qp)->cid == (int32_t)cid || 
-                                       ((al_conn_qp_t*)h_qp)->cid == AL_INVALID_CID );
-                               __ndi_proc_dreq( &h_cm, (mad_cm_dreq_t*)p_mad );
+                               // TODO: how to prevent calling this handler with QP destroyed ?
+                               if( ((al_conn_qp_t*)h_qp)->cid == (int32_t)cid || 
+                                       ((al_conn_qp_t*)h_qp)->cid == AL_INVALID_CID )
+                               {
+                                       __ndi_proc_dreq( &h_cm, (mad_cm_dreq_t*)p_mad );
+                               }
+                               else
+                               {
+                                       AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
+                                               ("DREQ received when QP destroyed, cid %d:%d .\n", 
+                                               ((al_conn_qp_t*)h_qp)->cid, cid ) );
+                               }
                                break;
                
                        default:
@@ -842,9 +890,9 @@ __ndi_pr_query_cb(
 
        AL_ENTER( AL_DBG_NDI );
 
-       AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
-               ("status is %d, count is %d\n", p_query_rec->status,
-               p_query_rec->result_cnt) );
+       AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
+               ("status is %d, count is %d, context %p\n", p_query_rec->status,
+               p_query_rec->result_cnt, p_query_rec->query_context) );
 
        h_ioctl = IoCsqRemoveNextIrp( &h_qp->p_irp_que->csq, NULL );
        if( !h_ioctl || p_query_rec->status != IB_SUCCESS || !p_query_rec->result_cnt )
@@ -913,6 +961,7 @@ err_irp_complete:
        deref_al_obj( &h_qp->obj );     /* release the extra reference */
 
 exit:  
+       cl_free( p_req );
        if( p_query_rec->p_result_mad )
                ib_put_mad( p_query_rec->p_result_mad );
 
@@ -980,8 +1029,8 @@ __ndi_ats_query_cb(
        AL_ENTER( AL_DBG_NDI );
 
        AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
-               ("status is %d, count is %d\n", p_query_rec->status,
-               p_query_rec->result_cnt) );
+               ("status is %d, count is %d, context %p\n", p_query_rec->status,
+               p_query_rec->result_cnt, p_query_rec->query_context) );
 
        h_ioctl = IoCsqRemoveNextIrp( &h_qp->p_irp_que->csq, NULL );
        if( !h_ioctl || p_query_rec->status != IB_SUCCESS || !p_query_rec->result_cnt )
@@ -989,6 +1038,7 @@ __ndi_ats_query_cb(
                h_qp->p_irp_que->state = NDI_CM_IDLE; 
                __ndi_complete_irp( h_qp, h_ioctl, STATUS_HOST_UNREACHABLE );
                deref_al_obj( &h_qp->obj );     /* release the extra reference */
+               cl_free( p_req );
                goto exit;              /* ATS request failed */
        }
 
@@ -1050,7 +1100,7 @@ __ndi_ats_query(
        service_record.service_lease = 0xFFFFFFFF;
        strcpy( (void*)service_record.service_name, ATS_NAME );
        
-       AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR,
+       AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI,
                ("ATS:: MAD: method %#x, attr_id %#hx, Service: comp_mask %#I64x, IP %d.%d.%d.%d \n", 
                user_query.method,
                CL_NTOH16(user_query.attr_id),
@@ -1063,7 +1113,9 @@ __ndi_ats_query(
 
        h_qp->p_irp_que->state = NDI_CM_CONNECTING_ATS_SENT;
        /* prevent destroying QP after cancelling of the IRP and before ib_query calback*/
-       ref_al_obj( &h_qp->obj );
+       AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_NDI, 
+               ("h_qp %I64x, ref_cnt %d\n", (uint64_t)h_qp, h_qp->obj.ref_cnt ) );
+       ref_al_obj( &h_qp->obj );               /* take the extra reference */
 
        /* insert IRP in the queue */
        IoCsqInsertIrp( &h_qp->p_irp_que->csq, h_ioctl, NULL );
@@ -1094,22 +1146,29 @@ ndi_req_cm(
 
        /* check outstanding request */
        __ndi_acquire_lock( &h_qp->p_irp_que->csq, NULL);
-       if (__ndi_peek_next_irp( &h_qp->p_irp_que->csq, NULL, NULL ))
+       if ( h_qp->p_irp_que->h_ioctl )
        {
+               AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, 
+                       ("STATUS_CONNECTION_ACTIVE: h_qp %I64x, ref_cnt %d\n",
+                       (uint64_t)h_qp, h_qp->obj.ref_cnt ) );
                nt_status = STATUS_CONNECTION_ACTIVE;
                __ndi_release_lock( &h_qp->p_irp_que->csq, 0 );
                goto exit;
        }
-       __ndi_release_lock( &h_qp->p_irp_que->csq, 0 );
-
        if ( h_qp->p_irp_que->state != NDI_CM_IDLE )
        {
+               AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, 
+                       ("STATUS_INVALID_DEVICE_STATE: h_qp %I64x, ref_cnt %d, state %d\n",
+                       (uint64_t)h_qp, h_qp->obj.ref_cnt, h_qp->p_irp_que->state ) );
                nt_status = STATUS_INVALID_DEVICE_STATE;
+               __ndi_release_lock( &h_qp->p_irp_que->csq, 0 );
                goto exit;
        }
-       
-       /* send ATS request */
        h_qp->p_irp_que->h_ioctl = h_ioctl;     /* mark IRP as present */
+       h_qp->p_irp_que->state = NDI_CM_CONNECTING_ATS_SENT;
+       __ndi_release_lock( &h_qp->p_irp_que->csq, 0 );
+
+       /* send ATS request */
        nt_status = __ndi_ats_query( h_qp, h_ioctl, p_req );
 
 exit:
index 805df62f4f63da706f1e494cf9b1aa78d137b073..07b925637b5348998e5e47999ab90b36db031750 100644 (file)
@@ -133,7 +133,11 @@ ndi_qp_init(
        IN              ib_qp_handle_t                                  h_qp );
 
 void
-ndi_qp_deinit(
+ndi_qp_destroy(
+       IN              ib_qp_handle_t                                  h_qp );
+void
+
+ndi_qp_free(
        IN              ib_qp_handle_t                                  h_qp );
 
 #endif
index d8ac676f60b962ce780bc81a9463f11969ef7501..a34ab9eab1ca4bd12a3bb6bc0845d53a51da833b 100644 (file)
@@ -312,7 +312,7 @@ __ndi_req_cm(
        cl_status_t cl_status;
        ib_qp_handle_t h_qp = NULL;
        al_dev_open_context_t *p_context;
-       ual_ndi_req_cm_ioctl_in_t *p_req = 
+       ual_ndi_req_cm_ioctl_in_t *p_parm, *p_req = 
                (ual_ndi_req_cm_ioctl_in_t*)cl_ioctl_in_buf( h_ioctl );
        UNUSED_PARAM(p_ret_bytes);
        
@@ -341,7 +341,6 @@ __ndi_req_cm(
                cl_status = CL_INVALID_HANDLE;
                goto exit;
        }
-       p_req->h_qp = (uint64_t)h_qp;
 
        /* Check psize */
        if ( p_req->pdata_size > sizeof(p_req->pdata) )
@@ -349,9 +348,19 @@ __ndi_req_cm(
                cl_status = CL_INVALID_PARAMETER;
                goto exit;
        }
+
+       /* copy request parameters a side to prevent problems from cancelled IRP */
+       p_parm = cl_zalloc( sizeof(ual_ndi_req_cm_ioctl_in_t) );
+       if (!p_parm )
+       {
+               cl_status = CL_INSUFFICIENT_MEMORY;
+               goto exit;
+       }
+       RtlCopyMemory( p_parm, p_req, sizeof(ual_ndi_req_cm_ioctl_in_t) );
+       p_parm->h_qp = (uint64_t)h_qp;
        
        /* perform the ioctl */
-       cl_status = ndi_req_cm( h_qp, h_ioctl, p_req );
+       cl_status = ndi_req_cm( h_qp, h_ioctl, p_parm );
 
 exit:
        if (h_qp && cl_status != CL_PENDING)