From: shefty Date: Wed, 11 Mar 2009 22:07:21 +0000 (+0000) Subject: libibverbs: use comp_channel to enhance scalability X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=a19ee478afb0357544fb8158c5da7b59d9d07dd2;p=~shefty%2Frdma-win.git libibverbs: use comp_channel to enhance scalability Use the COMP_CHANNEL abstraction as a common framework for event reporting and to provide better scalability. Signed-off-by: Sean Hefty git-svn-id: svn://openib.tc.cornell.edu/gen1@2025 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86 --- diff --git a/trunk/ulp/libibverbs/include/infiniband/verbs.h b/trunk/ulp/libibverbs/include/infiniband/verbs.h index c849d759..68414635 100644 --- a/trunk/ulp/libibverbs/include/infiniband/verbs.h +++ b/trunk/ulp/libibverbs/include/infiniband/verbs.h @@ -37,6 +37,8 @@ #include #include +#include +#include #ifdef __cplusplus extern "C" { @@ -51,6 +53,16 @@ typedef unsigned __int16 uint16_t; typedef unsigned __int32 uint32_t; typedef unsigned __int64 uint64_t; +#define EOVERFLOW WV_BUFFER_OVERFLOW +#define EISCONN WV_CONNECTION_ACTIVE +#define ECONNREFUSED WV_CONNECTION_REFUSED +#define ETIMEDOUT WV_TIMEOUT +#define ENETUNREACH WV_HOST_UNREACHABLE +#define EADDRINUSE WV_ADDRESS_ALREADY_EXISTS +#define EALREADY WV_IO_PENDING +#define EAFNOSUPPORT WV_INVALID_ADDRESS +#define EWOULDBLOCK WV_DEVICE_BUSY + union ibv_gid { uint8_t raw[16]; @@ -636,8 +648,8 @@ struct ibv_qp struct ibv_comp_channel { - struct ibv_context *context; - uint32_t timeout; + struct ibv_context *context; + COMP_CHANNEL comp_channel; }; struct ibv_cq @@ -646,9 +658,8 @@ struct ibv_cq struct ibv_comp_channel *channel; void *cq_context; IWVCompletionQueue *handle; - OVERLAPPED overlap; int cqe; - uint32_t comp_events_completed; + COMP_ENTRY comp_entry; }; struct ibv_ah @@ -678,7 +689,7 @@ struct ibv_context { struct ibv_device *device; IWVDevice *cmd_if; - uint32_t timeout; + COMP_CHANNEL channel; }; /** @@ -1103,6 +1114,23 @@ const char *ibv_port_state_str(enum ibv_port_state port_state); __declspec(dllexport) const char *ibv_event_type_str(enum ibv_event_type event); +/* + * Windows specific structures and interfaces + */ +struct ibv_windata +{ + IWVProvider *prov; + COMP_MANAGER *comp_mgr; +}; + +#define IBV_WINDATA_VERSION 1 + +__declspec(dllexport) +int ibv_get_windata(struct ibv_windata *windata, int version); + +__declspec(dllexport) +void ibv_release_windata(struct ibv_windata *windata, int version); + #ifdef __cplusplus } #endif diff --git a/trunk/ulp/libibverbs/src/Sources b/trunk/ulp/libibverbs/src/Sources index aef09468..ee9e40b8 100644 --- a/trunk/ulp/libibverbs/src/Sources +++ b/trunk/ulp/libibverbs/src/Sources @@ -14,7 +14,7 @@ DLLDEF = $(OBJ_PATH)\$O\ibv_exports.def !endif DLLENTRY = DllMain -USE_MSVCRT=1 +USE_MSVCRT = 1 SOURCES = \ ibverbs.rc \ diff --git a/trunk/ulp/libibverbs/src/device.cpp b/trunk/ulp/libibverbs/src/device.cpp index af633386..58a2ec53 100644 --- a/trunk/ulp/libibverbs/src/device.cpp +++ b/trunk/ulp/libibverbs/src/device.cpp @@ -31,8 +31,10 @@ #include #include #include +#include "..\..\..\etc\user\comp_channel.cpp" IWVProvider *prov; +COMP_MANAGER comp_mgr; struct verbs_device { @@ -43,35 +45,53 @@ struct verbs_device struct verbs_port { - OVERLAPPED overlap; + COMP_ENTRY comp_entry; DWORD event_flag; + uint8_t port_num; }; -#define EVENT_PORT_NONE 0xFF - struct verbs_context { struct ibv_context context; struct verbs_device device; - HANDLE *event; struct verbs_port *port; - uint8_t event_port_index; + verbs_port *event_port; }; -__declspec(dllexport) -IWVProvider *ibv_get_winverbs(void) +static int ibv_init(void) { HRESULT hr; if (prov == NULL) { hr = WvGetObject(IID_IWVProvider, (LPVOID*) &prov); if (FAILED(hr)) { - return NULL; + return -1; } + CompManagerOpen(&comp_mgr); + CompManagerMonitor(&comp_mgr, prov->GetFileHandle(), 0); + } + return 0; +} + +__declspec(dllexport) +int ibv_get_windata(struct ibv_windata *windata, int version) +{ + int ret; + + if (version != IBV_WINDATA_VERSION || ibv_init()) { + return -1; } prov->AddRef(); - return prov; + windata->prov = prov; + windata->comp_mgr = &comp_mgr; + return 0; +} + +__declspec(dllexport) +void ibv_release_windata(struct ibv_windata *windata, int version) +{ + windata->prov->Release(); } __declspec(dllexport) @@ -84,11 +104,8 @@ struct ibv_device **ibv_get_device_list(int *num) SIZE_T size, cnt; HRESULT hr; - if (prov == NULL) { - hr = WvGetObject(IID_IWVProvider, (LPVOID*) &prov); - if (FAILED(hr)) { - goto err1; - } + if (ibv_init()) { + goto err1; } cnt = 0; @@ -179,45 +196,30 @@ struct ibv_context *ibv_open_device(struct ibv_device *device) return NULL; } memcpy(&vcontext->device, vdev, sizeof(struct verbs_device)); - vcontext->event_port_index = EVENT_PORT_NONE; - vcontext->context.timeout = INFINITE; + vcontext->event_port = NULL; + CompChannelInit(&comp_mgr, &vcontext->context.channel, INFINITE); vcontext->port = new struct verbs_port[vdev->phys_port_cnt]; if (vcontext->port == NULL) { goto err1; } - vcontext->event = new HANDLE[vdev->phys_port_cnt]; - if (vcontext->event == NULL) { - goto err2; - } - hr = prov->OpenDevice(vdev->guid, &vcontext->context.cmd_if); if (FAILED(hr)) { - goto err3; + goto err2; } for (i = 0; i < vdev->phys_port_cnt; i++) { - vcontext->event[i] = CreateEvent(NULL, FALSE, FALSE, NULL); - if (vcontext->event[i] == NULL) { - goto err4; - } - vcontext->port[i].overlap.hEvent = vcontext->event[i]; + vcontext->port[i].port_num = (uint8_t) i + 1; vcontext->port[i].event_flag = 0; - - vcontext->context.cmd_if->Notify((UINT8) i + 1, - &vcontext->port[i].overlap, + CompEntryInit(&vcontext->context.channel, &vcontext->port[i].comp_entry); + vcontext->context.cmd_if->Notify(vcontext->port[i].port_num, + &vcontext->port[i].comp_entry.Overlap, &vcontext->port[i].event_flag); } return &vcontext->context; -err4: - while (--i >= 0) { - CloseHandle(vcontext->event[i]); - } -err3: - delete vcontext->event; err2: delete vcontext->port; err1: @@ -235,11 +237,10 @@ int ibv_close_device(struct ibv_context *context) context->cmd_if->CancelOverlappedRequests(); for (i = 0; i < vcontext->device.phys_port_cnt; i++) { - CloseHandle(vcontext->event[i]); + CompChannelRemoveEntry(&context->channel, &vcontext->port[i].comp_entry); } context->cmd_if->Release(); - delete vcontext->event; delete vcontext->port; delete vcontext; return 0; @@ -250,7 +251,7 @@ static enum ibv_event_type ibv_get_port_event_state(struct verbs_context *vconte WV_PORT_ATTRIBUTES attr; HRESULT hr; - hr = vcontext->context.cmd_if->QueryPort(vcontext->event_port_index + 1, &attr); + hr = vcontext->context.cmd_if->QueryPort(vcontext->event_port->port_num, &attr); if (FAILED(hr)) { return IBV_EVENT_PORT_ERR; } @@ -265,8 +266,8 @@ static int ibv_report_port_event(struct verbs_context *vcontext, struct verbs_port *port; int ret = 0; - port = &vcontext->port[vcontext->event_port_index]; - event->element.port_num = vcontext->event_port_index + 1; + port = vcontext->event_port; + event->element.port_num = port->port_num; if (port->event_flag & WV_EVENT_ERROR) { event->event_type = IBV_EVENT_DEVICE_FATAL; @@ -289,9 +290,10 @@ static int ibv_report_port_event(struct verbs_context *vcontext, } if (port->event_flag == 0) { - vcontext->context.cmd_if->Notify(vcontext->event_port_index + 1, - &port->overlap, &port->event_flag); - vcontext->event_port_index = EVENT_PORT_NONE; + vcontext->context.cmd_if->Notify(vcontext->event_port->port_num, + &port->comp_entry.Overlap, + &port->event_flag); + vcontext->event_port = NULL; } return ret; } @@ -301,26 +303,23 @@ int ibv_get_async_event(struct ibv_context *context, struct ibv_async_event *event) { struct verbs_context *vcontext; - HRESULT hr; - int i; + COMP_ENTRY *entry; + int ret; vcontext = CONTAINING_RECORD(context, struct verbs_context, context); - if (vcontext->event_port_index != EVENT_PORT_NONE) { + if (vcontext->event_port) { if (ibv_report_port_event(vcontext, event) == 0) { return 0; } } - hr = WaitForMultipleObjects(vcontext->device.phys_port_cnt, - vcontext->event, FALSE, context->timeout); - if (hr == WAIT_TIMEOUT) { - return hr; - } else if (hr == WAIT_FAILED) { - return HRESULT_FROM_WIN32(GetLastError()); + ret = CompChannelPoll(&context->channel, &entry); + if (!ret) { + vcontext->event_port = CONTAINING_RECORD(entry, struct verbs_port, comp_entry); + ret = ibv_report_port_event(vcontext, event); } - vcontext->event_port_index = (UINT8) hr; - return ibv_report_port_event(vcontext, event); + return ret; } __declspec(dllexport) diff --git a/trunk/ulp/libibverbs/src/ibv_exports.src b/trunk/ulp/libibverbs/src/ibv_exports.src index 4698d947..63f274ea 100644 --- a/trunk/ulp/libibverbs/src/ibv_exports.src +++ b/trunk/ulp/libibverbs/src/ibv_exports.src @@ -51,4 +51,6 @@ ibv_detach_mcast ibv_node_type_str ibv_port_state_str ibv_event_type_str +ibv_get_windata +ibv_release_windata #endif diff --git a/trunk/ulp/libibverbs/src/ibverbs.h b/trunk/ulp/libibverbs/src/ibverbs.h index 834b178c..baf012c6 100644 --- a/trunk/ulp/libibverbs/src/ibverbs.h +++ b/trunk/ulp/libibverbs/src/ibverbs.h @@ -1,6 +1,5 @@ /* - * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2009 Intel Corp, Inc. All rights reserved. * * This software is available to you under the OpenIB.org BSD license * below: @@ -31,6 +30,8 @@ #ifndef IB_VERBS_H #define IB_VERBS_H +extern COMP_MANAGER comp_mgr; + __inline void* __cdecl operator new(size_t size) { return HeapAlloc(GetProcessHeap(), 0, size); diff --git a/trunk/ulp/libibverbs/src/verbs.cpp b/trunk/ulp/libibverbs/src/verbs.cpp index a25d4307..ca80d1aa 100644 --- a/trunk/ulp/libibverbs/src/verbs.cpp +++ b/trunk/ulp/libibverbs/src/verbs.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2008 Intel Corporation. All rights reserved. + * Copyright (c) 2008-2009 Intel Corporation. All rights reserved. * * This software is available to you under the OpenIB.org BSD license * below: @@ -34,23 +34,9 @@ #include #include +#include #include "ibverbs.h" -struct verbs_cq -{ - struct ibv_cq cq; - int channel_index; -}; - -struct verbs_comp_channel -{ - struct ibv_comp_channel channel; - CRITICAL_SECTION lock; - struct verbs_cq *cq[MAXIMUM_WAIT_OBJECTS]; - HANDLE event[MAXIMUM_WAIT_OBJECTS]; - int count; -}; - __declspec(dllexport) int ibv_rate_to_mult(enum ibv_rate rate) { @@ -64,7 +50,7 @@ int ibv_rate_to_mult(enum ibv_rate rate) case IBV_RATE_60_GBPS: return 24; case IBV_RATE_80_GBPS: return 32; case IBV_RATE_120_GBPS: return 48; - default: return -1; + default: return -1; } } @@ -322,124 +308,60 @@ int ibv_dereg_mr(struct ibv_mr *mr) __declspec(dllexport) struct ibv_comp_channel *ibv_create_comp_channel(struct ibv_context *context) { - struct verbs_comp_channel *vchan; + struct ibv_comp_channel *channel; - vchan = new struct verbs_comp_channel; - if (vchan == NULL) { + channel = new struct ibv_comp_channel; + if (channel == NULL) { return NULL; } - InitializeCriticalSection(&vchan->lock); - vchan->count = 0; - vchan->channel.context = context; - vchan->channel.timeout = INFINITE; - - return &vchan->channel; + CompChannelInit(&comp_mgr, &channel->comp_channel, INFINITE); + channel->context = context; + return channel; } __declspec(dllexport) int ibv_destroy_comp_channel(struct ibv_comp_channel *channel) { - struct verbs_comp_channel *vchan; - - vchan = CONTAINING_RECORD(channel, struct verbs_comp_channel, channel); - if (vchan->count > 0) { - return -1; - } - - DeleteCriticalSection(&vchan->lock); - delete vchan; + CompChannelCleanup(&channel->comp_channel); + delete channel; return 0; } -static int ibv_comp_channel_insert_cq(struct ibv_comp_channel *channel, - struct verbs_cq *cq) -{ - struct verbs_comp_channel *vchan; - int ret = 0; - - vchan = CONTAINING_RECORD(channel, struct verbs_comp_channel, channel); - - EnterCriticalSection(&vchan->lock); - if (vchan->count == MAXIMUM_WAIT_OBJECTS) { - ret = -1; - goto out; - } - - vchan->cq[vchan->count] = cq; - vchan->event[vchan->count] = cq->cq.overlap.hEvent; - cq->channel_index = vchan->count++; -out: - LeaveCriticalSection(&vchan->lock); - return ret; -} - -/* - * TODO: we cannot call ibv_comp_channel_remove_cq() while another - * thread is calling ibv_get_cq_event(). If this is needed, then we - * need to halt the ibv_get_cq_event() thread, modify the event list, - * then restart the ibv_get_cq_event() thread. - */ -static void ibv_comp_channel_remove_cq(struct ibv_comp_channel *channel, - struct verbs_cq *cq) -{ - struct verbs_comp_channel *vchan; - - vchan = CONTAINING_RECORD(channel, struct verbs_comp_channel, channel); - - EnterCriticalSection(&vchan->lock); - vchan->count--; - vchan->cq[cq->channel_index] = vchan->cq[vchan->count]; - vchan->event[cq->channel_index] = vchan->event[vchan->count]; - vchan->cq[cq->channel_index]->channel_index = cq->channel_index; - LeaveCriticalSection(&vchan->lock); -} - __declspec(dllexport) struct ibv_cq *ibv_create_cq(struct ibv_context *context, int cqe, void *cq_context, struct ibv_comp_channel *channel, int comp_vector) { - struct verbs_cq *vcq; + struct ibv_cq *cq; HRESULT hr; SIZE_T entries; - vcq = new struct verbs_cq; - if (vcq == NULL) { + cq = new struct ibv_cq; + if (cq == NULL) { return NULL; } - vcq->cq.overlap.hEvent = CreateEvent(NULL, FALSE, FALSE, NULL); - if (vcq->cq.overlap.hEvent == NULL) { - goto err1; - } - - vcq->cq.context = context; - vcq->cq.channel = channel; - vcq->cq.cq_context = cq_context; - vcq->cq.comp_events_completed = 0; + cq->context = context; + cq->channel = channel; + cq->cq_context = cq_context; entries = cqe; - hr = context->cmd_if->CreateCompletionQueue(&entries, &vcq->cq.handle); + hr = context->cmd_if->CreateCompletionQueue(&entries, &cq->handle); if (FAILED(hr)) { - goto err2; + goto err; } if (channel != NULL) { - hr = ibv_comp_channel_insert_cq(channel, vcq); - if (FAILED(hr)) { - goto err3; - } + CompEntryInit(&channel->comp_channel, &cq->comp_entry); + } else { + memset(&cq->comp_entry, 0, sizeof cq->comp_entry); } - vcq->cq.cqe = (uint32_t) entries; - return &vcq->cq; + cq->cqe = (uint32_t) entries; + return cq; -err3: - vcq->cq.handle->Release(); -err2: - CloseHandle(vcq->cq.overlap.hEvent); -err1: - delete vcq; +err: + delete cq; return NULL; } @@ -460,8 +382,9 @@ __declspec(dllexport) int ibv_req_notify_cq(struct ibv_cq *cq, int solicited_only) { HRESULT hr; + hr = cq->handle->Notify(solicited_only ? WvCqSolicited : WvCqNextCompletion, - &cq->overlap); + &cq->comp_entry.Overlap); if (SUCCEEDED(hr) || hr == WV_IO_PENDING) { return 0; } else { @@ -486,19 +409,12 @@ int ibv_poll_cq(struct ibv_cq *cq, int num_entries, struct ibv_wc *wc) __declspec(dllexport) int ibv_destroy_cq(struct ibv_cq *cq) { - struct verbs_cq *vcq; - - vcq = CONTAINING_RECORD(cq, struct verbs_cq, cq); - cq->handle->CancelOverlappedRequests(); if (cq->channel != NULL) { - ibv_comp_channel_remove_cq(cq->channel, vcq); + CompChannelRemoveEntry(&cq->channel->comp_channel, &cq->comp_entry); } - while (cq->comp_events_completed > 0) - ; /* twiddle thumbs */ - cq->handle->Release(); delete cq; return 0; @@ -508,42 +424,21 @@ __declspec(dllexport) int ibv_get_cq_event(struct ibv_comp_channel *channel, struct ibv_cq **cq, void **cq_context) { - struct verbs_comp_channel *vchan; - struct verbs_cq *vcq; - HRESULT hr; + COMP_ENTRY *entry; + DWORD ret; - vchan = CONTAINING_RECORD(channel, struct verbs_comp_channel, channel); - hr = WaitForMultipleObjects(vchan->count, vchan->event, FALSE, - vchan->channel.timeout); - if (hr == WAIT_TIMEOUT) { - return hr; - } else if (hr == WAIT_FAILED) { - return HRESULT_FROM_WIN32(GetLastError()); + ret = CompChannelPoll(&channel->comp_channel, &entry); + if (!ret) { + *cq = CONTAINING_RECORD(entry, struct ibv_cq, comp_entry); + *cq_context = (*cq)->cq_context; } - EnterCriticalSection(&vchan->lock); - vcq = vchan->cq[hr]; - vcq->cq.comp_events_completed++; - LeaveCriticalSection(&vchan->lock); - - *cq = &vcq->cq; - *cq_context = vcq->cq.cq_context; - return 0; + return ret; } __declspec(dllexport) void ibv_ack_cq_events(struct ibv_cq *cq, unsigned int nevents) { - struct verbs_comp_channel *vchan; - - if (cq->channel == NULL) { - return; - } - - vchan = CONTAINING_RECORD(cq->channel, struct verbs_comp_channel, channel); - EnterCriticalSection(&vchan->lock); - cq->comp_events_completed -= nevents; - LeaveCriticalSection(&vchan->lock); } __declspec(dllexport)