]> git.openfabrics.org - ~shefty/rdma-win.git/commitdiff
[WinVerbs] fix crash accessing freed memory from async thread
authorstansmith <stansmith@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Fri, 18 Sep 2009 23:02:31 +0000 (23:02 +0000)
committerstansmith <stansmith@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Fri, 18 Sep 2009 23:02:31 +0000 (23:02 +0000)
If an application exits while asynchronous accept processing is queued,
it's possible for the async processing to access the IbCmId after it has
been freed.  A similar problem to this was fixed that dealt with accessing
the verbs QP handle.

A simpler, more generic solution to this problem is to handle application
exit in the same manner as device removal, and lock the winverb provider
lookup lists with exclusive access.  Asynchronous operations that are in
process will run to completion, and future operations will be blocked until
the provider cleanup thread has completed.  Once they run, they will fail
to acquire a reference on the desired object, which should result in a
graceful failure.

This avoids more complicated locking to use handles belonging to the lower
level code.  If a reference on an object can be acquired, the handle will
be available for use until the reference is released.  To handle IB CM
callbacks, additional state checking is required to avoid processing
CM events when we're trying to destroy the endpoint.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
git-svn-id: svn://openib.tc.cornell.edu/gen1@2448 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86

branches/WOF2-1/core/winverbs/kernel/wv_ep.c
branches/WOF2-1/core/winverbs/kernel/wv_provider.c
branches/WOF2-1/core/winverbs/kernel/wv_qp.c

index 9a3350cb95af140f18759b776368e8f55b742dfb..2fbb5b315040cdeee4c34f2ee746eecc3e0e2604 100644 (file)
@@ -57,7 +57,7 @@ WV_ENDPOINT *WvEpAcquire(WV_PROVIDER *pProvider, UINT64 Id)
        KeAcquireGuardedMutex(&pProvider->Lock);\r
        WvProviderDisableRemove(pProvider);\r
        ep = IndexListAt(&pProvider->EpIndex, (SIZE_T) Id);\r
-       if (ep != NULL) {\r
+       if (ep != NULL && ep->State != WvEpDestroying) {\r
                WvEpGet(ep);\r
        } else {\r
                ep = NULL;\r
@@ -185,14 +185,14 @@ void WvEpFree(WV_ENDPOINT *pEndpoint)
        pEndpoint->State = WvEpDestroying;\r
        WdfObjectReleaseLock(pEndpoint->Queue);\r
 \r
-       if (pEndpoint->pIbCmId != NULL) {\r
-               IbCmInterface.CM.destroy_id(pEndpoint->pIbCmId);\r
-       }\r
-\r
        if (InterlockedDecrement(&pEndpoint->Ref) > 0) {\r
                KeWaitForSingleObject(&pEndpoint->Event, Executive, KernelMode, FALSE, NULL);\r
        }\r
 \r
+       if (pEndpoint->pIbCmId != NULL) {\r
+               IbCmInterface.CM.destroy_id(pEndpoint->pIbCmId);\r
+       }\r
+\r
        WdfIoQueuePurgeSynchronously(pEndpoint->Queue);\r
        WdfObjectDelete(pEndpoint->Queue);\r
        ExFreePoolWithTag(pEndpoint, 'pevw');\r
@@ -373,10 +373,6 @@ static void WvEpSaveReject(WV_ENDPOINT *pEndpoint, iba_cm_rej_event *pReject)
        pEndpoint->Attributes.Param.Connect.DataLength = len;\r
 }\r
 \r
-/*\r
- * The QP transition to error may be done from an async worker thread.\r
- * Synchronize against application exit.\r
- */\r
 static NTSTATUS WvEpModifyQpErr(WV_QUEUE_PAIR *pQp,\r
                                                                UINT8 *pVerbsData, UINT32 VerbsSize)\r
 {\r
@@ -384,12 +380,6 @@ static NTSTATUS WvEpModifyQpErr(WV_QUEUE_PAIR *pQp,
        ib_api_status_t         ib_status;\r
        NTSTATUS                        status;\r
 \r
-       KeAcquireGuardedMutex(&pQp->Lock);\r
-       if (pQp->hVerbsQp == NULL) {\r
-               status = STATUS_NOT_FOUND;\r
-               goto out;\r
-       }\r
-\r
        attr.req_state = IB_QPS_ERROR;\r
        ib_status = pQp->pVerbs->ndi_modify_qp(pQp->hVerbsQp, &attr, NULL,\r
                                                                                   VerbsSize, pVerbsData);\r
@@ -399,8 +389,6 @@ static NTSTATUS WvEpModifyQpErr(WV_QUEUE_PAIR *pQp,
                status = STATUS_UNSUCCESSFUL;\r
        }\r
 \r
-out:\r
-       KeReleaseGuardedMutex(&pQp->Lock);\r
        return status;\r
 }\r
 \r
@@ -505,10 +493,19 @@ static NTSTATUS WvEpIbCmHandler(iba_cm_id *pId, iba_cm_event *pEvent)
        ep = pId->context;\r
        switch (pEvent->type) {\r
        case iba_cm_req_error:\r
+               WdfObjectAcquireLock(ep->Queue);\r
+               if (ep->State == WvEpActiveConnect) {\r
+                       ep->State = WvEpDisconnected;\r
+                       WvCompleteRequests(ep->Queue, STATUS_IO_TIMEOUT);\r
+               }\r
+               WdfObjectReleaseLock(ep->Queue);\r
+               break;\r
        case iba_cm_rep_error:\r
                WdfObjectAcquireLock(ep->Queue);\r
-               ep->State = WvEpDisconnected;\r
-               WvCompleteRequests(ep->Queue, STATUS_IO_TIMEOUT);\r
+               if (ep->State == WvEpPassiveConnect) {\r
+                       ep->State = WvEpDisconnected;\r
+                       WvCompleteRequests(ep->Queue, STATUS_IO_TIMEOUT);\r
+               }\r
                WdfObjectReleaseLock(ep->Queue);\r
                break;\r
        case iba_cm_dreq_error:\r
@@ -557,8 +554,10 @@ static NTSTATUS WvEpIbCmHandler(iba_cm_id *pId, iba_cm_event *pEvent)
                break;\r
        default:\r
                WdfObjectAcquireLock(ep->Queue);\r
-               ep->State = WvEpDisconnected;\r
-               WvCompleteRequests(ep->Queue, STATUS_NOT_IMPLEMENTED);\r
+               if (ep->State != WvEpDestroying) {\r
+                       ep->State = WvEpDisconnected;\r
+                       WvCompleteRequests(ep->Queue, STATUS_NOT_IMPLEMENTED);\r
+               }\r
                WdfObjectReleaseLock(ep->Queue);\r
                break;\r
        }\r
index 2998afe7f27595ac2e257d6727b1db2a3441bc38..e61f30e301560bbc4525144579f452747d29740c 100644 (file)
@@ -53,6 +53,38 @@ void WvProviderPut(WV_PROVIDER *pProvider)
        }\r
 }\r
 \r
+// See comment above WvProviderRemoveHandler.\r
+static void WvProviderLockRemove(WV_PROVIDER *pProvider)\r
+{\r
+       KeAcquireGuardedMutex(&pProvider->Lock);\r
+       pProvider->Exclusive++;\r
+       KeClearEvent(&pProvider->SharedEvent);\r
+       while (pProvider->Active > 0) {\r
+               KeReleaseGuardedMutex(&pProvider->Lock);\r
+DbgPrint("WvProviderLockRemove - someone is active - waiting\n");\r
+               KeWaitForSingleObject(&pProvider->ExclusiveEvent, Executive, KernelMode,\r
+                                                         FALSE, NULL);\r
+DbgPrint("WvProviderLockRemove - signaled - continuing\n");\r
+               KeAcquireGuardedMutex(&pProvider->Lock);\r
+       }\r
+       pProvider->Active++;\r
+       KeReleaseGuardedMutex(&pProvider->Lock);\r
+}\r
+\r
+// See comment above WvProviderRemoveHandler.\r
+static void WvProviderUnlockRemove(WV_PROVIDER *pProvider)\r
+{\r
+       KeAcquireGuardedMutex(&pProvider->Lock);\r
+       pProvider->Exclusive--;\r
+       pProvider->Active--;\r
+       if (pProvider->Exclusive > 0) {\r
+               KeSetEvent(&pProvider->ExclusiveEvent, 0, FALSE);\r
+       } else if (pProvider->Pending > 0) {\r
+               KeSetEvent(&pProvider->SharedEvent, 0, FALSE);\r
+       }\r
+       KeReleaseGuardedMutex(&pProvider->Lock);\r
+}\r
+\r
 NTSTATUS WvProviderInit(WDFDEVICE Device, WV_PROVIDER *pProvider)\r
 {\r
        NTSTATUS status;\r
@@ -83,6 +115,15 @@ NTSTATUS WvProviderInit(WDFDEVICE Device, WV_PROVIDER *pProvider)
        return STATUS_SUCCESS;\r
 }\r
 \r
+/*\r
+ * We could be processing asynchronous requests or have them queued\r
+ * when cleaning up.  To synchronize with async request processing,\r
+ * acquire the provider lock with exclusive access until we're done\r
+ * destroying all resoureces.  This will allow active requests to\r
+ * complete their processing, but prevent queued requests from\r
+ * running until cleanup is done.  At that point, queue requests will\r
+ * be unable to acquire any winverbs resources.\r
+ */\r
 void WvProviderCleanup(WV_PROVIDER *pProvider)\r
 {\r
        WV_DEVICE                               *dev;\r
@@ -95,6 +136,7 @@ void WvProviderCleanup(WV_PROVIDER *pProvider)
        WV_ENDPOINT                             *ep;\r
        SIZE_T                                  i;\r
 \r
+       WvProviderLockRemove(pProvider);\r
        IndexListForEach(&pProvider->EpIndex, i) {\r
                ep = (WV_ENDPOINT *) IndexListAt(&pProvider->EpIndex, i);\r
                if (ep->State == WvEpListening) {\r
@@ -131,6 +173,7 @@ void WvProviderCleanup(WV_PROVIDER *pProvider)
        while ((dev = IndexListRemoveHead(&pProvider->DevIndex)) != NULL) {\r
                WvDeviceFree(dev);\r
        }\r
+       WvProviderUnlockRemove(pProvider);\r
 \r
        if (InterlockedDecrement(&pProvider->Ref) > 0) {\r
                KeWaitForSingleObject(&pProvider->Event, Executive, KernelMode, FALSE, NULL);\r
@@ -147,36 +190,6 @@ void WvProviderCleanup(WV_PROVIDER *pProvider)
        WorkQueueDestroy(&pProvider->WorkQueue);\r
 }\r
 \r
-// See comment above WvProviderRemoveHandler.\r
-static void WvProviderLockRemove(WV_PROVIDER *pProvider)\r
-{\r
-       KeAcquireGuardedMutex(&pProvider->Lock);\r
-       pProvider->Exclusive++;\r
-       KeClearEvent(&pProvider->SharedEvent);\r
-       while (pProvider->Active > 0) {\r
-               KeReleaseGuardedMutex(&pProvider->Lock);\r
-               KeWaitForSingleObject(&pProvider->ExclusiveEvent, Executive, KernelMode,\r
-                                                         FALSE, NULL);\r
-               KeAcquireGuardedMutex(&pProvider->Lock);\r
-       }\r
-       pProvider->Active++;\r
-       KeReleaseGuardedMutex(&pProvider->Lock);\r
-}\r
-\r
-// See comment above WvProviderRemoveHandler.\r
-static void WvProviderUnlockRemove(WV_PROVIDER *pProvider)\r
-{\r
-       KeAcquireGuardedMutex(&pProvider->Lock);\r
-       pProvider->Exclusive--;\r
-       pProvider->Active--;\r
-       if (pProvider->Exclusive > 0) {\r
-               KeSetEvent(&pProvider->ExclusiveEvent, 0, FALSE);\r
-       } else if (pProvider->Pending > 0) {\r
-               KeSetEvent(&pProvider->SharedEvent, 0, FALSE);\r
-       }\r
-       KeReleaseGuardedMutex(&pProvider->Lock);\r
-}\r
-\r
 /*\r
  * Must hold pProvider->Lock.  Function may release and re-acquire.\r
  * See comment above WvProviderRemoveHandler.\r
index 3f64bd57ca1ba65e58b7cd580d446452e7d4fbc4..8ad87e5b5e67bd80bddbd04724676968772fe64c 100644 (file)
@@ -519,20 +519,10 @@ out:
        WdfRequestComplete(Request, status);\r
 }\r
 \r
-/*\r
- * Synchronize freeing the QP due to application exit with asynchronous\r
- * disconnect processing transitioning the QP into the error state.\r
- */\r
 void WvQpFree(WV_QUEUE_PAIR *pQp)\r
 {\r
        WV_MULTICAST            *mc;\r
        LIST_ENTRY                      *entry;\r
-       ib_qp_handle_t          hqp;\r
-\r
-       KeAcquireGuardedMutex(&pQp->Lock);\r
-       hqp = pQp->hVerbsQp;\r
-       pQp->hVerbsQp = NULL;\r
-       KeReleaseGuardedMutex(&pQp->Lock);\r
 \r
        while ((entry = RemoveHeadList(&pQp->McList)) != &pQp->McList) {\r
                mc = CONTAINING_RECORD(entry, WV_MULTICAST, Entry);\r
@@ -546,8 +536,8 @@ void WvQpFree(WV_QUEUE_PAIR *pQp)
                KeWaitForSingleObject(&pQp->Event, Executive, KernelMode, FALSE, NULL);\r
        }\r
 \r
-       if (hqp != NULL) {\r
-               pQp->pVerbs->destroy_qp(hqp, 0);\r
+       if (pQp->hVerbsQp != NULL) {\r
+               pQp->pVerbs->destroy_qp(pQp->hVerbsQp, 0);\r
        }\r
 \r
        if (pQp->pSrq != NULL) {\r