From 1baf632feb27f363a2e4310fe8317fdef89ea272 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 4 Jan 2010 19:00:54 +0000 Subject: [PATCH] KMDF tracks all requests that pass through an IO queue. Even after a request has been removed from the queue, the request still maintains a reference on the queue. Any attempt to delete the queue will block until all requests holding references on the queue have completed. To avoid deadlock conditions during cleanup, we need to ensure that all requests can complete during cleanup. Modify the asynchronous handling code to queue a separate data structure, so that all requests can remain on the IO queues. Signed-off-by: Sean Hefty git-svn-id: svn://openib.tc.cornell.edu/gen1@2649 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86 --- trunk/core/winverbs/kernel/wv_ep.c | 387 ++++++++++++----------- trunk/core/winverbs/kernel/wv_ep.h | 1 + trunk/core/winverbs/kernel/wv_provider.h | 14 + 3 files changed, 222 insertions(+), 180 deletions(-) diff --git a/trunk/core/winverbs/kernel/wv_ep.c b/trunk/core/winverbs/kernel/wv_ep.c index 2fbb5b31..3d5c6ce3 100644 --- a/trunk/core/winverbs/kernel/wv_ep.c +++ b/trunk/core/winverbs/kernel/wv_ep.c @@ -38,6 +38,8 @@ #define WV_AF_INET 2 #define WV_AF_INET6 23 +static void WvEpWorkHandler(WORK_ENTRY *pWork); + static void WvEpGet(WV_ENDPOINT *pEndpoint) { InterlockedIncrement(&pEndpoint->Ref); @@ -87,6 +89,12 @@ static NTSTATUS WvEpAllocate(WV_PROVIDER *pProvider, UINT16 EpType, } RtlZeroMemory(ep, sizeof(WV_ENDPOINT)); + ep->pWork = ExAllocatePoolWithTag(NonPagedPool, sizeof(WV_WORK_ENTRY), 'wevw'); + if (ep->pWork == NULL) { + status = STATUS_NO_MEMORY; + goto err1; + } + ep->Ref = 1; ep->pProvider = pProvider; ep->EpType = EpType; @@ -97,13 +105,15 @@ static NTSTATUS WvEpAllocate(WV_PROVIDER *pProvider, UINT16 EpType, status = WdfIoQueueCreate(ControlDevice, &config, WDF_NO_OBJECT_ATTRIBUTES, &ep->Queue); if (!NT_SUCCESS(status)) { - goto err; + goto err2; } *ppEndpoint = ep; return STATUS_SUCCESS; -err: +err2: + ExFreePoolWithTag(ep->pWork, 'wevw'); +err1: ExFreePoolWithTag(ep, 'pevw'); return status; } @@ -137,6 +147,7 @@ void WvEpCreate(WV_PROVIDER *pProvider, WDFREQUEST Request) } KeReleaseGuardedMutex(&pProvider->Lock); + WvWorkEntryInit(ep->pWork, *pId, WvEpWorkHandler, pProvider); WdfRequestCompleteWithInformation(Request, status, sizeof(UINT64)); return; @@ -195,6 +206,9 @@ void WvEpFree(WV_ENDPOINT *pEndpoint) WdfIoQueuePurgeSynchronously(pEndpoint->Queue); WdfObjectDelete(pEndpoint->Queue); + if (pEndpoint->pWork != NULL) { + ExFreePoolWithTag(pEndpoint->pWork, 'wevw'); + } ExFreePoolWithTag(pEndpoint, 'pevw'); } @@ -413,77 +427,72 @@ static NTSTATUS WvEpDisconnectQp(WV_PROVIDER *pProvider, UINT64 QpId, return status; } -static void WvEpDisconnectHandler(WORK_ENTRY *pWork) +static NTSTATUS WvEpAsyncDisconnect(WV_ENDPOINT *pEndpoint, WDFREQUEST Request) { - WV_PROVIDER *prov; - WDFREQUEST request; WV_IO_EP_DISCONNECT *pattr; UINT8 *out; size_t outlen = 0; NTSTATUS status; - request = (WDFREQUEST) pWork->Context; - prov = WvProviderGetContext(WdfRequestGetFileObject(request)); - - status = WdfRequestRetrieveInputBuffer(request, sizeof(WV_IO_EP_DISCONNECT), + status = WdfRequestRetrieveInputBuffer(Request, sizeof(WV_IO_EP_DISCONNECT), &pattr, NULL); if (!NT_SUCCESS(status)) { - goto complete; + return status; } - status = WdfRequestRetrieveOutputBuffer(request, 0, &out, &outlen); + status = WdfRequestRetrieveOutputBuffer(Request, 0, &out, &outlen); if (!NT_SUCCESS(status) && status != STATUS_BUFFER_TOO_SMALL) { - goto complete; + return status; } - status = (NTSTATUS) WdfRequestGetInformation(request); + status = (NTSTATUS) WdfRequestGetInformation(Request); if (NT_SUCCESS(status)) { - status = WvEpDisconnectQp(prov, pattr->QpId, out, outlen); + status = WvEpDisconnectQp(pEndpoint->pProvider, pattr->QpId, out, outlen); } else { - WvEpDisconnectQp(prov, pattr->QpId, out, outlen); + WvEpDisconnectQp(pEndpoint->pProvider, pattr->QpId, out, outlen); } -complete: - WdfRequestCompleteWithInformation(request, status, outlen); - WvProviderPut(prov); + WdfRequestCompleteWithInformation(Request, status, outlen); + return STATUS_SUCCESS; } -// We use IRP DriverContext to queue the request for further processing, -// but the request/IRP are no longer owned by the framework. static void WvEpCompleteDisconnect(WV_ENDPOINT *pEndpoint, NTSTATUS DiscStatus) { WDFREQUEST request; + WDFREQUEST disc_req = NULL; WDF_REQUEST_PARAMETERS param; - WORK_ENTRY *work; NTSTATUS status; WdfObjectAcquireLock(pEndpoint->Queue); - if (pEndpoint->State == WvEpDestroying) { + if (pEndpoint->State == WvEpDestroying || !pEndpoint->pWork) { goto release; } pEndpoint->State = WvEpDisconnected; status = WdfIoQueueRetrieveNextRequest(pEndpoint->Queue, &request); while (NT_SUCCESS(status)) { - WdfObjectReleaseLock(pEndpoint->Queue); WDF_REQUEST_PARAMETERS_INIT(¶m); WdfRequestGetParameters(request, ¶m); if (param.Parameters.DeviceIoControl.IoControlCode == WV_IOCTL_EP_DISCONNECT) { - work = WorkEntryFromIrp(WdfRequestWdmGetIrp(request)); WdfRequestSetInformation(request, DiscStatus); - WorkEntryInit(work, WvEpDisconnectHandler, request); WvProviderGet(pEndpoint->pProvider); - WorkQueueInsert(&pEndpoint->pProvider->WorkQueue, work); + WorkQueueInsert(&pEndpoint->pProvider->WorkQueue, &pEndpoint->pWork->Work); + pEndpoint->pWork = NULL; + disc_req = request; } else { WdfRequestComplete(request, DiscStatus); } - WdfObjectAcquireLock(pEndpoint->Queue); status = WdfIoQueueRetrieveNextRequest(pEndpoint->Queue, &request); } + + if (disc_req != NULL) { + WdfRequestRequeue(disc_req); + } release: WdfObjectReleaseLock(pEndpoint->Queue); + } static NTSTATUS WvEpIbCmHandler(iba_cm_id *pId, iba_cm_event *pEvent) @@ -565,49 +574,31 @@ static NTSTATUS WvEpIbCmHandler(iba_cm_id *pId, iba_cm_event *pEvent) return STATUS_SUCCESS; } -void WvEpConnectHandler(WORK_ENTRY *pWork) +static NTSTATUS WvEpAsyncConnect(WV_ENDPOINT *pEndpoint, WDFREQUEST Request) { - WV_PROVIDER *prov; - WDFREQUEST request; WV_IO_EP_CONNECT *pattr; - WV_ENDPOINT *ep; WV_QUEUE_PAIR *qp; iba_cm_req req; NTSTATUS status; UINT8 data[IB_REQ_PDATA_SIZE]; - request = (WDFREQUEST) pWork->Context; - prov = WvProviderGetContext(WdfRequestGetFileObject(request)); - - status = WdfRequestRetrieveInputBuffer(request, sizeof(WV_IO_EP_CONNECT), + status = WdfRequestRetrieveInputBuffer(Request, sizeof(WV_IO_EP_CONNECT), &pattr, NULL); if (!NT_SUCCESS(status)) { - goto complete; - } - - if (pattr->Param.DataLength > sizeof(pattr->Param.Data)) { - status = STATUS_INVALID_BUFFER_SIZE; - goto complete; - } - - ep = WvEpAcquire(prov, pattr->Id); - if (ep == NULL) { - status = STATUS_NOT_FOUND; - goto complete; + return status; } - qp = WvQpAcquire(prov, pattr->QpId); + qp = WvQpAcquire(pEndpoint->pProvider, pattr->QpId); if (qp == NULL) { - status = STATUS_NOT_FOUND; - goto release; + return STATUS_NOT_FOUND; } - ep->Attributes.PeerAddress = pattr->PeerAddress; - WvFormatCmaHeader((IB_CMA_HEADER *) data, &ep->Attributes.LocalAddress, - &ep->Attributes.PeerAddress); + pEndpoint->Attributes.PeerAddress = pattr->PeerAddress; + WvFormatCmaHeader((IB_CMA_HEADER *) data, &pEndpoint->Attributes.LocalAddress, + &pEndpoint->Attributes.PeerAddress); - req.service_id = WvGetServiceId(ep->EpType, &ep->Attributes.PeerAddress); - req.p_primary_path = &ep->Route; + req.service_id = WvGetServiceId(pEndpoint->EpType, &pEndpoint->Attributes.PeerAddress); + req.p_primary_path = &pEndpoint->Route; req.p_alt_path = NULL; req.qpn = qp->Qpn; req.qp_type = IB_QPT_RELIABLE_CONN; @@ -627,54 +618,86 @@ void WvEpConnectHandler(WORK_ENTRY *pWork) req.srq = (qp->pSrq != NULL); WvQpRelease(qp); - RtlCopyMemory(&ep->Attributes.Param.Connect, &pattr->Param, + RtlCopyMemory(&pEndpoint->Attributes.Param.Connect, &pattr->Param, sizeof(pattr->Param)); - WdfObjectAcquireLock(ep->Queue); - if (ep->State != WvEpRouteResolved) { + WdfObjectAcquireLock(pEndpoint->Queue); + if (pEndpoint->State != WvEpRouteResolved) { status = STATUS_NOT_SUPPORTED; - goto unlock; + goto out; } - status = IbCmInterface.CM.create_id(WvEpIbCmHandler, ep, &ep->pIbCmId); + status = IbCmInterface.CM.create_id(WvEpIbCmHandler, pEndpoint, &pEndpoint->pIbCmId); if (!NT_SUCCESS(status)) { - goto unlock; + goto out; } - ep->State = WvEpActiveConnect; - status = IbCmInterface.CM.send_req(ep->pIbCmId, &req); + pEndpoint->State = WvEpActiveConnect; + status = IbCmInterface.CM.send_req(pEndpoint->pIbCmId, &req); if (NT_SUCCESS(status)) { - status = WdfRequestForwardToIoQueue(request, ep->Queue); + status = WdfRequestRequeue(Request); } if (!NT_SUCCESS(status)) { - ep->State = WvEpDisconnected; - } -unlock: - WdfObjectReleaseLock(ep->Queue); -release: - WvEpRelease(ep); -complete: - if (!NT_SUCCESS(status)) { - WdfRequestComplete(request, status); + pEndpoint->State = WvEpDisconnected; } - WvProviderPut(prov); + +out: + WdfObjectReleaseLock(pEndpoint->Queue); + return status; } -static void WvEpProcessAsync(WV_PROVIDER *pProvider, WDFREQUEST Request, - void (*AsyncHandler)(struct _WORK_ENTRY *Work)) +static NTSTATUS WvEpProcessAsync(WV_PROVIDER *pProvider, UINT64 Id, WDFREQUEST Request) { - WORK_ENTRY *work; + WV_ENDPOINT *ep; + NTSTATUS status; + + ep = WvEpAcquire(pProvider, Id); + if (ep == NULL) { + return STATUS_NOT_FOUND; + } + + WdfObjectAcquireLock(ep->Queue); + if (!ep->pWork) { + status = STATUS_TOO_MANY_COMMANDS; + goto out; + } - work = WorkEntryFromIrp(WdfRequestWdmGetIrp(Request)); - WorkEntryInit(work, AsyncHandler, Request); - WvProviderGet(pProvider); - WorkQueueInsert(&pProvider->WorkQueue, work); + status = WdfRequestForwardToIoQueue(Request, ep->Queue); + if (NT_SUCCESS(status)) { + WvProviderGet(pProvider); + WorkQueueInsert(&pProvider->WorkQueue, &ep->pWork->Work); + ep->pWork = NULL; + } + +out: + WdfObjectReleaseLock(ep->Queue); + WvEpRelease(ep); + return status; } void WvEpConnect(WV_PROVIDER *pProvider, WDFREQUEST Request) { - WvEpProcessAsync(pProvider, Request, WvEpConnectHandler); + WV_IO_EP_CONNECT *pattr; + NTSTATUS status; + + status = WdfRequestRetrieveInputBuffer(Request, sizeof(WV_IO_EP_CONNECT), + &pattr, NULL); + if (!NT_SUCCESS(status)) { + goto out; + } + + if (pattr->Param.DataLength > sizeof(pattr->Param.Data)) { + status = STATUS_INVALID_BUFFER_SIZE; + goto out; + } + + status = WvEpProcessAsync(pProvider, pattr->Id, Request); + +out: + if (!NT_SUCCESS(status)) { + WdfRequestComplete(Request, status); + } } static NTSTATUS WvEpModifyQpRtr(WV_ENDPOINT *pEndpoint, WV_QUEUE_PAIR *pQp, @@ -823,7 +846,7 @@ static NTSTATUS WvEpAcceptPassive(WDFREQUEST Request, UINT8 *pVerbsData, size_t status = IbCmInterface.CM.send_rep(pEndpoint->pIbCmId, &rep); if (NT_SUCCESS(status)) { - status = WdfRequestForwardToIoQueue(Request, pEndpoint->Queue); + status = WdfRequestRequeue(Request); } if (!NT_SUCCESS(status)) { @@ -836,65 +859,114 @@ out: return status; } -void WvEpAcceptHandler(WORK_ENTRY *pWork) +static NTSTATUS WvEpAsyncAccept(WV_ENDPOINT *pEndpoint, WDFREQUEST Request) { - WV_PROVIDER *prov; - WDFREQUEST request; WV_IO_EP_ACCEPT *pattr; - WV_ENDPOINT *ep; NTSTATUS status; UINT8 *out; size_t outlen; - request = (WDFREQUEST) pWork->Context; - prov = WvProviderGetContext(WdfRequestGetFileObject(request)); - - status = WdfRequestRetrieveInputBuffer(request, sizeof(WV_IO_EP_ACCEPT), + status = WdfRequestRetrieveInputBuffer(Request, sizeof(WV_IO_EP_ACCEPT), &pattr, NULL); if (!NT_SUCCESS(status)) { - goto complete; + return status; } - status = WdfRequestRetrieveOutputBuffer(request, 0, &out, &outlen); + status = WdfRequestRetrieveOutputBuffer(Request, 0, &out, &outlen); if (!NT_SUCCESS(status) && status != STATUS_BUFFER_TOO_SMALL) { - goto complete; - } - - if (pattr->Param.DataLength > sizeof(pattr->Param.Data)) { - status = STATUS_INVALID_BUFFER_SIZE; - goto complete; - } - - ep = WvEpAcquire(prov, pattr->Id); - if (ep == NULL) { - status = STATUS_NOT_FOUND; - goto complete; + return status; } /* EP state is re-checked under lock in WvEpAccept* calls */ - switch (ep->State) { + switch (pEndpoint->State) { case WvEpActiveConnect: - status = WvEpAcceptActive(request, out, outlen, ep, pattr); + status = WvEpAcceptActive(Request, out, outlen, pEndpoint, pattr); break; case WvEpPassiveConnect: - status = WvEpAcceptPassive(request, out, outlen, ep, pattr); + status = WvEpAcceptPassive(Request, out, outlen, pEndpoint, pattr); break; default: status = STATUS_NOT_SUPPORTED; break; } - WvEpRelease(ep); -complete: + return status; +} + +static void WvEpWorkHandler(WORK_ENTRY *pWork) +{ + WV_PROVIDER *prov; + WV_ENDPOINT *ep; + WV_WORK_ENTRY *work; + WDFREQUEST request; + WDF_REQUEST_PARAMETERS param; + NTSTATUS status; + + work = CONTAINING_RECORD(pWork, WV_WORK_ENTRY, Work); + prov = (WV_PROVIDER *) pWork->Context; + + ep = WvEpAcquire(prov, work->Id); + if (ep == NULL) { + ExFreePoolWithTag(work, 'wevw'); + goto out; + } + + WdfObjectAcquireLock(ep->Queue); + ep->pWork = work; + status = WdfIoQueueRetrieveNextRequest(ep->Queue, &request); + WdfObjectReleaseLock(ep->Queue); + + if (!NT_SUCCESS(status)) { + goto put; + } + + WDF_REQUEST_PARAMETERS_INIT(¶m); + WdfRequestGetParameters(request, ¶m); + switch (param.Parameters.DeviceIoControl.IoControlCode) { + case WV_IOCTL_EP_CONNECT: + status = WvEpAsyncConnect(ep, request); + break; + case WV_IOCTL_EP_ACCEPT: + status = WvEpAsyncAccept(ep, request); + break; + case WV_IOCTL_EP_DISCONNECT: + status = WvEpAsyncDisconnect(ep, request); + break; + default: + status = STATUS_NOT_IMPLEMENTED; + } + if (!NT_SUCCESS(status)) { WdfRequestComplete(request, status); } +put: + WvEpRelease(ep); +out: WvProviderPut(prov); } void WvEpAccept(WV_PROVIDER *pProvider, WDFREQUEST Request) { - WvEpProcessAsync(pProvider, Request, WvEpAcceptHandler); + WV_IO_EP_ACCEPT *pattr; + NTSTATUS status; + + status = WdfRequestRetrieveInputBuffer(Request, sizeof(WV_IO_EP_ACCEPT), + &pattr, NULL); + if (!NT_SUCCESS(status)) { + goto out; + } + + if (pattr->Param.DataLength > sizeof(pattr->Param.Data)) { + status = STATUS_INVALID_BUFFER_SIZE; + goto out; + } + + status = WvEpProcessAsync(pProvider, pattr->Id, Request); + +out: + if (!NT_SUCCESS(status)) { + WdfRequestComplete(Request, status); + } } void WvEpReject(WV_PROVIDER *pProvider, WDFREQUEST Request) @@ -932,69 +1004,7 @@ complete: WdfRequestComplete(Request, status); } -static NTSTATUS WvEpDisconnectActive(WDFREQUEST Request, - UINT8 *pVerbsData, size_t VerbsSize, - WV_ENDPOINT *pEndpoint, - WV_IO_EP_DISCONNECT *pAttr) -{ - NTSTATUS status, failure; - - WdfObjectAcquireLock(pEndpoint->Queue); - if (pEndpoint->State != WvEpConnected) { - status = STATUS_NOT_SUPPORTED; - goto release; - } - - pEndpoint->State = WvEpActiveDisconnect; - IbCmInterface.CM.send_dreq(pEndpoint->pIbCmId, NULL, 0); - - status = WdfRequestForwardToIoQueue(Request, pEndpoint->Queue); - if (!NT_SUCCESS(status)) { - pEndpoint->State = WvEpDisconnected; - WvCompleteRequests(pEndpoint->Queue, STATUS_UNSUCCESSFUL); - WdfObjectReleaseLock(pEndpoint->Queue); - - failure = status; - status = WvEpDisconnectQp(pEndpoint->pProvider, pAttr->QpId, - pVerbsData, VerbsSize); - if (NT_SUCCESS(status)) { - WdfRequestCompleteWithInformation(Request, failure, VerbsSize); - } - return status; - } - -release: - WdfObjectReleaseLock(pEndpoint->Queue); - return status; -} - -static NTSTATUS WvEpDisconnectPassive(WDFREQUEST Request, - UINT8 *pVerbsData, size_t VerbsSize, - WV_ENDPOINT *pEndpoint, - WV_IO_EP_DISCONNECT *pAttr) -{ - NTSTATUS status; - - WdfObjectAcquireLock(pEndpoint->Queue); - if (pEndpoint->State != WvEpPassiveDisconnect) { - WdfObjectReleaseLock(pEndpoint->Queue); - return STATUS_NOT_SUPPORTED; - } - - pEndpoint->State = WvEpDisconnected; - WdfObjectReleaseLock(pEndpoint->Queue); - - IbCmInterface.CM.send_drep(pEndpoint->pIbCmId, NULL, 0); - - status = WvEpDisconnectQp(pEndpoint->pProvider, pAttr->QpId, - pVerbsData, VerbsSize); - if (NT_SUCCESS(status)) { - WdfRequestCompleteWithInformation(Request, status, VerbsSize); - } - - return status; -} - +// The IB CM could have received and processed a DREQ that we haven't seen yet. void WvEpDisconnect(WV_PROVIDER *pProvider, WDFREQUEST Request) { WV_IO_EP_DISCONNECT *pattr; @@ -1020,19 +1030,36 @@ void WvEpDisconnect(WV_PROVIDER *pProvider, WDFREQUEST Request) goto complete; } - /* EP state is re-checked under lock in WvEpDisconnect* calls */ + WdfObjectAcquireLock(ep->Queue); switch (ep->State) { case WvEpConnected: - status = WvEpDisconnectActive(Request, out, outlen, ep, pattr); - break; + status = IbCmInterface.CM.send_dreq(ep->pIbCmId, NULL, 0); + if (NT_SUCCESS(status)) { + status = WdfRequestForwardToIoQueue(Request, ep->Queue); + if (NT_SUCCESS(status)) { + ep->State = WvEpActiveDisconnect; + break; + } + } + /* Fall through to passive disconnect case on failure */ case WvEpPassiveDisconnect: - status = WvEpDisconnectPassive(Request, out, outlen, ep, pattr); - break; + ep->State = WvEpDisconnected; + WdfObjectReleaseLock(ep->Queue); + + IbCmInterface.CM.send_drep(ep->pIbCmId, NULL, 0); + + status = WvEpDisconnectQp(ep->pProvider, pattr->QpId, out, outlen); + if (NT_SUCCESS(status)) { + WdfRequestCompleteWithInformation(Request, status, outlen); + } + goto release; default: - status = STATUS_NOT_SUPPORTED; + status = STATUS_INVALID_DEVICE_STATE; break; } + WdfObjectReleaseLock(ep->Queue); +release: WvEpRelease(ep); complete: if (!NT_SUCCESS(status)) { diff --git a/trunk/core/winverbs/kernel/wv_ep.h b/trunk/core/winverbs/kernel/wv_ep.h index f6d48d3f..650b23c5 100644 --- a/trunk/core/winverbs/kernel/wv_ep.h +++ b/trunk/core/winverbs/kernel/wv_ep.h @@ -72,6 +72,7 @@ typedef struct _WV_ENDPOINT KEVENT Event; LONG Ref; WDFQUEUE Queue; + WV_WORK_ENTRY *pWork; } WV_ENDPOINT; diff --git a/trunk/core/winverbs/kernel/wv_provider.h b/trunk/core/winverbs/kernel/wv_provider.h index 329145f2..bd430fb0 100644 --- a/trunk/core/winverbs/kernel/wv_provider.h +++ b/trunk/core/winverbs/kernel/wv_provider.h @@ -44,6 +44,20 @@ struct _WV_DEVICE; struct _WV_PROTECTION_DOMAIN; +typedef struct _WV_WORK_ENTRY +{ + WORK_ENTRY Work; + UINT64 Id; + +} WV_WORK_ENTRY; + +static void WvWorkEntryInit(WV_WORK_ENTRY *pWork, UINT64 Id, + void (*WorkHandler)(WORK_ENTRY *Work), void *Context) +{ + pWork->Id = Id; + WorkEntryInit(&pWork->Work, WorkHandler, Context); +} + typedef struct _WV_PROVIDER { LIST_ENTRY Entry; -- 2.41.0