]> git.openfabrics.org - ~shefty/rdma-win.git/commitdiff
[DAPL2]
authorStan Smith <stan.smith@intel.com>
Tue, 26 Jan 2010 20:31:36 +0000 (20:31 +0000)
committerStan Smith <stan.smith@intel.com>
Tue, 26 Jan 2010 20:31:36 +0000 (20:31 +0000)
libibverbs/device: destroy completion channel when closing device
ibv_close_device should destroy the completion channel
when closing the device and freeing the memory.
 trunk/ulp/libibverbs/src/device.cpp

dapl: move close device after async thread is done using it
Before calling ibv_close_device, wait for the asynchronous
processing thread to finish using the device.  This prevents a use after free error.
 trunk/ulp/dapl2/dapl/openib_cma/device.c
 trunk/ulp/dapl2/dapl/openib_scm/device.c

librdmacm: set private_data_len
Set the private_data_len for reported events.  This allows DAPL to use the value when copying private data.
 trunk/ulp/librdmacm/include/rdma/rdma_cma.h
 trunk/ulp/librdmacm/src/cma.cpp

dapl: quick fix for wrong private data size
DAPL expects the private data size to be up to 256 bytes, but
on windows the private data size is limited to 56 bytes.  As
a result, DAPL can access memory beyond the end of what's
allocated.
A more 'correct' fix is being submitted upstream to dapl.  This
is a simpler fix for the Windows 2.2 release only.
 trunk/ulp/dapl2/dapl/openib_cma/cm.c

dapl/cma: fix referencing freed address
DAPL uses a pointer to reference the local and remote addresses
of an endpoint.  It expects that those addresses are located
in memory that is always accessible.  Typically, for the local
address, the pointer references the address stored with the DAPL
HCA device.  However, for the cma provider, it changes this pointer
to reference the address stored with the rdma_cm_id.

This causes a problem when that endpoint is connected on the
passive side of a connection.  When connect requests are given
to DAPL, a new rdma_cm_id is associated with the request.  The
DAPL code replaces the current rdma_cm_id associated with a
user's endpoint with the new rdma_cm_id.  The old rdma_cm_id is
then deleted.  But the endpoint's local address pointer still
references the address stored with the old rdma_cm_id.  The
result is that any reference to the address will access freed
memory.
Fix this by keeping the local address pointer always pointing
to the address associated with the DAPL HCA device.  This is about
the best that can be done given the DAPL interface design.
 trunk/ulp/dapl2/dapl/openib_common/qp.c

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
git-svn-id: svn://openib.tc.cornell.edu/gen1@2684 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86

branches/WOF2-2/ulp/dapl2/dapl/openib_cma/cm.c
branches/WOF2-2/ulp/dapl2/dapl/openib_cma/device.c
branches/WOF2-2/ulp/dapl2/dapl/openib_common/qp.c
branches/WOF2-2/ulp/dapl2/dapl/openib_scm/device.c
branches/WOF2-2/ulp/libibverbs/src/device.cpp
branches/WOF2-2/ulp/librdmacm/include/rdma/rdma_cma.h
branches/WOF2-2/ulp/librdmacm/src/cma.cpp

index c52e71644defc5506175350b4daa793bf35a3ec5..61d30b46d28f0713a146907757ebc8022dea7975 100644 (file)
-/*
- * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
- * Copyright (c) 2005-2007 Intel Corporation. All rights reserved.
- * Copyright (c) 2004-2005, Mellanox Technologies, Inc. All rights reserved. 
- * Copyright (c) 2003 Topspin Corporation.  All rights reserved. 
- * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- *
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- *    copy of which is available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/**********************************************************************
- *
- * MODULE: dapl_ib_cm.c
- *
- * PURPOSE: The OFED provider - uCMA, name and route resolution
- *
- * $Id: $
- *
- **********************************************************************/
-
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-#include "dapl_evd_util.h"
-#include "dapl_cr_util.h"
-#include "dapl_name_service.h"
-#include "dapl_ib_util.h"
-#include "dapl_vendor.h"
-#include "dapl_osd.h"
-
-extern struct rdma_event_channel *g_cm_events;
-
-/* local prototypes */
-static struct dapl_cm_id *dapli_req_recv(struct dapl_cm_id *conn,
-                                        struct rdma_cm_event *event);
-static void dapli_cm_active_cb(struct dapl_cm_id *conn,
-                              struct rdma_cm_event *event);
-static void dapli_cm_passive_cb(struct dapl_cm_id *conn,
-                               struct rdma_cm_event *event);
-static void dapli_addr_resolve(struct dapl_cm_id *conn);
-static void dapli_route_resolve(struct dapl_cm_id *conn);
-
-/* cma requires 16 bit SID, in network order */
-#define IB_PORT_MOD 32001
-#define IB_PORT_BASE (65535 - IB_PORT_MOD)
-#define SID_TO_PORT(SID) \
-    (SID > 0xffff ? \
-    htons((unsigned short)((SID % IB_PORT_MOD) + IB_PORT_BASE)) :\
-    htons((unsigned short)SID))
-
-#define PORT_TO_SID(p) ntohs(p)
-
-/* private data header to validate consumer rejects versus abnormal events */
-struct dapl_pdata_hdr {
-       DAT_UINT32 version;
-};
-
-static void dapli_addr_resolve(struct dapl_cm_id *conn)
-{
-       int ret;
-#ifdef DAPL_DBG
-       struct rdma_addr *ipaddr = &conn->cm_id->route.addr;
-#endif
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " addr_resolve: cm_id %p SRC %x DST %x\n",
-                    conn->cm_id, ntohl(((struct sockaddr_in *)
-                                        &ipaddr->src_addr)->sin_addr.s_addr),
-                    ntohl(((struct sockaddr_in *)
-                           &ipaddr->dst_addr)->sin_addr.s_addr));
-
-       ret = rdma_resolve_route(conn->cm_id, conn->route_timeout);
-       if (ret) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " dapl_cma_connect: rdma_resolve_route ERR 0x%x %s\n",
-                        ret, strerror(errno));
-               dapl_evd_connection_callback(conn,
-                                            IB_CME_LOCAL_FAILURE,
-                                            NULL, conn->ep);
-       }
-}
-
-static void dapli_route_resolve(struct dapl_cm_id *conn)
-{
-       int ret;
-#ifdef DAPL_DBG
-       struct rdma_addr *ipaddr = &conn->cm_id->route.addr;
-       struct ib_addr *ibaddr = &conn->cm_id->route.addr.addr.ibaddr;
-#endif
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " route_resolve: cm_id %p SRC %x DST %x PORT %d\n",
-                    conn->cm_id, ntohl(((struct sockaddr_in *)
-                                        &ipaddr->src_addr)->sin_addr.s_addr),
-                    ntohl(((struct sockaddr_in *)
-                           &ipaddr->dst_addr)->sin_addr.s_addr),
-                    ntohs(((struct sockaddr_in *)
-                           &ipaddr->dst_addr)->sin_port));
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " route_resolve: SRC GID subnet %016llx id %016llx\n",
-                    (unsigned long long)
-                    ntohll(ibaddr->sgid.global.subnet_prefix),
-                    (unsigned long long)
-                    ntohll(ibaddr->sgid.global.interface_id));
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " route_resolve: DST GID subnet %016llx id %016llx\n",
-                    (unsigned long long)
-                    ntohll(ibaddr->dgid.global.subnet_prefix),
-                    (unsigned long long)
-                    ntohll(ibaddr->dgid.global.interface_id));
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " route_resolve: cm_id %p pdata %p plen %d rr %d ind %d\n",
-                    conn->cm_id,
-                    conn->params.private_data,
-                    conn->params.private_data_len,
-                    conn->params.responder_resources,
-                    conn->params.initiator_depth);
-
-       ret = rdma_connect(conn->cm_id, &conn->params);
-       if (ret) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " dapl_cma_connect: rdma_connect ERR %d %s\n",
-                        ret, strerror(errno));
-               goto bail;
-       }
-       return;
-
-      bail:
-       dapl_evd_connection_callback(conn,
-                                    IB_CME_LOCAL_FAILURE, NULL, conn->ep);
-}
-
-dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep)
-{
-       dp_ib_cm_handle_t conn;
-       struct rdma_cm_id *cm_id;
-
-       /* Allocate CM and initialize lock */
-       if ((conn = dapl_os_alloc(sizeof(*conn))) == NULL)
-               return NULL;
-
-       dapl_os_memzero(conn, sizeof(*conn));
-       dapl_os_lock_init(&conn->lock);
-       conn->refs++;
-
-       /* create CM_ID, bind to local device, create QP */
-       if (rdma_create_id(g_cm_events, &cm_id, (void *)conn, RDMA_PS_TCP)) {
-               dapl_os_free(conn, sizeof(*conn));
-               return NULL;
-       }
-       conn->cm_id = cm_id;
-
-       /* setup timers for address and route resolution */
-       conn->arp_timeout = dapl_os_get_env_val("DAPL_CM_ARP_TIMEOUT_MS",
-                                               IB_ARP_TIMEOUT);
-       conn->arp_retries = dapl_os_get_env_val("DAPL_CM_ARP_RETRY_COUNT",
-                                               IB_ARP_RETRY_COUNT);
-       conn->route_timeout = dapl_os_get_env_val("DAPL_CM_ROUTE_TIMEOUT_MS",
-                                                 IB_ROUTE_TIMEOUT);
-       conn->route_retries = dapl_os_get_env_val("DAPL_CM_ROUTE_RETRY_COUNT",
-                                                 IB_ROUTE_RETRY_COUNT);
-       if (ep != NULL) {
-               conn->ep = ep;
-               conn->hca = ((DAPL_IA *)ep->param.ia_handle)->hca_ptr;
-       }
-
-       return conn;
-}
-
-/* 
- * Only called from consumer thread via dat_ep_free()
- * accept, reject, or connect.
- * Cannot be called from callback thread.
- * rdma_destroy_id will block until rdma_get_cm_event is acked.
- */
-void dapls_ib_cm_free(dp_ib_cm_handle_t conn, DAPL_EP *ep)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " destroy_conn: conn %p id %d\n", 
-                    conn, conn->cm_id);
-
-       dapl_os_lock(&conn->lock);
-       conn->refs--;
-       dapl_os_unlock(&conn->lock);
-
-       /* block until event thread complete */
-       while (conn->refs) 
-               dapl_os_sleep_usec(10000);
-       
-       if (ep) {
-               ep->cm_handle = NULL;
-               ep->qp_handle = NULL;
-               ep->qp_state = IB_QP_STATE_ERROR;
-       }
-
-       if (conn->cm_id) {
-               if (conn->cm_id->qp)
-                       rdma_destroy_qp(conn->cm_id);
-               rdma_destroy_id(conn->cm_id);
-       }
-
-       dapl_os_free(conn, sizeof(*conn));
-}
-
-static struct dapl_cm_id *dapli_req_recv(struct dapl_cm_id *conn,
-                                        struct rdma_cm_event *event)
-{
-       struct dapl_cm_id *new_conn;
-#ifdef DAPL_DBG
-       struct rdma_addr *ipaddr = &event->id->route.addr;
-#endif
-
-       if (conn->sp == NULL) {
-               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                            " dapli_rep_recv: on invalid listen " "handle\n");
-               return NULL;
-       }
-
-       /* allocate new cm_id and merge listen parameters */
-       new_conn = dapl_os_alloc(sizeof(*new_conn));
-       if (new_conn) {
-               (void)dapl_os_memzero(new_conn, sizeof(*new_conn));
-               dapl_os_lock_init(&new_conn->lock);
-               new_conn->cm_id = event->id;    /* provided by uCMA */
-               event->id->context = new_conn;  /* update CM_ID context */
-               new_conn->sp = conn->sp;
-               new_conn->hca = conn->hca;
-               new_conn->refs++;
-
-               /* Get requesters connect data, setup for accept */
-               new_conn->params.responder_resources =
-                   DAPL_MIN(event->param.conn.responder_resources,
-                            conn->hca->ib_trans.rd_atom_in);
-               new_conn->params.initiator_depth =
-                   DAPL_MIN(event->param.conn.initiator_depth,
-                            conn->hca->ib_trans.rd_atom_out);
-
-               new_conn->params.flow_control = event->param.conn.flow_control;
-               new_conn->params.rnr_retry_count =
-                   event->param.conn.rnr_retry_count;
-               new_conn->params.retry_count = event->param.conn.retry_count;
-
-               /* save private data */
-               if (event->param.conn.private_data_len) {
-                       dapl_os_memcpy(new_conn->p_data,
-                                      event->param.conn.private_data,
-                                      event->param.conn.private_data_len);
-                       new_conn->params.private_data = new_conn->p_data;
-                       new_conn->params.private_data_len =
-                           event->param.conn.private_data_len;
-               }
-
-               dapl_dbg_log(DAPL_DBG_TYPE_CM, " passive_cb: "
-                            "REQ: SP %p PORT %d LID %d "
-                            "NEW CONN %p ID %p pdata %p,%d\n",
-                            new_conn->sp, ntohs(((struct sockaddr_in *)
-                                                 &ipaddr->src_addr)->sin_port),
-                            event->listen_id, new_conn, event->id,
-                            event->param.conn.private_data,
-                            event->param.conn.private_data_len);
-
-               dapl_dbg_log(DAPL_DBG_TYPE_CM, " passive_cb: "
-                            "REQ: IP SRC %x PORT %d DST %x PORT %d "
-                            "rr %d init %d\n", ntohl(((struct sockaddr_in *)
-                                                      &ipaddr->src_addr)->
-                                                     sin_addr.s_addr),
-                            ntohs(((struct sockaddr_in *)
-                                   &ipaddr->src_addr)->sin_port),
-                            ntohl(((struct sockaddr_in *)
-                                   &ipaddr->dst_addr)->sin_addr.s_addr),
-                            ntohs(((struct sockaddr_in *)
-                                   &ipaddr->dst_addr)->sin_port),
-                            new_conn->params.responder_resources,
-                            new_conn->params.initiator_depth);
-       }
-       return new_conn;
-}
-
-static void dapli_cm_active_cb(struct dapl_cm_id *conn,
-                              struct rdma_cm_event *event)
-{
-       DAPL_OS_LOCK *lock = &conn->lock;
-       ib_cm_events_t ib_cm_event;
-       const void *pdata = NULL;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " active_cb: conn %p id %d event %d\n",
-                    conn, conn->cm_id, event->event);
-
-       /* There is a chance that we can get events after
-        * the consumer calls disconnect in a pending state
-        * since the IB CM and uDAPL states are not shared.
-        * In some cases, IB CM could generate either a DCONN
-        * or CONN_ERR after the consumer returned from
-        * dapl_ep_disconnect with a DISCONNECTED event
-        * already queued. Check state here and bail to
-        * avoid any events after a disconnect.
-        */
-       if (DAPL_BAD_HANDLE(conn->ep, DAPL_MAGIC_EP))
-               return;
-
-       dapl_os_lock(&conn->ep->header.lock);
-       if (conn->ep->param.ep_state == DAT_EP_STATE_DISCONNECTED) {
-               dapl_os_unlock(&conn->ep->header.lock);
-               return;
-       }
-       if (event->event == RDMA_CM_EVENT_DISCONNECTED)
-               conn->ep->param.ep_state = DAT_EP_STATE_DISCONNECTED;
-
-       dapl_os_unlock(&conn->ep->header.lock);
-       dapl_os_lock(lock);
-
-       switch (event->event) {
-       case RDMA_CM_EVENT_UNREACHABLE:
-       case RDMA_CM_EVENT_CONNECT_ERROR:
-               dapl_log(DAPL_DBG_TYPE_WARN,
-                        "dapl_cma_active: CONN_ERR event=0x%x"
-                        " status=%d %s DST %s, %d\n",
-                        event->event, event->status,
-                        (event->status == -ETIMEDOUT) ? "TIMEOUT" : "",
-                        inet_ntoa(((struct sockaddr_in *)
-                                   &conn->cm_id->route.addr.dst_addr)->
-                                  sin_addr),
-                        ntohs(((struct sockaddr_in *)
-                               &conn->cm_id->route.addr.dst_addr)->
-                              sin_port));
-
-               /* per DAT SPEC provider always returns UNREACHABLE */
-               ib_cm_event = IB_CME_DESTINATION_UNREACHABLE;
-               break;
-       case RDMA_CM_EVENT_REJECTED:
-               dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                            " dapli_cm_active_handler: REJECTED reason=%d\n",
-                            event->status);
-
-               /* valid REJ from consumer will always contain private data */
-               if (event->status == 28 &&
-                   event->param.conn.private_data_len) {
-                       ib_cm_event = IB_CME_DESTINATION_REJECT_PRIVATE_DATA;
-                       pdata =
-                           (unsigned char *)event->param.conn.
-                           private_data +
-                           sizeof(struct dapl_pdata_hdr);
-               } else {
-                       ib_cm_event = IB_CME_DESTINATION_REJECT;
-                       dapl_log(DAPL_DBG_TYPE_WARN,
-                                "dapl_cma_active: non-consumer REJ,"
-                                " reason=%d, DST %s, %d\n",
-                                event->status,
-                                inet_ntoa(((struct sockaddr_in *)
-                                           &conn->cm_id->route.addr.
-                                           dst_addr)->sin_addr),
-                                ntohs(((struct sockaddr_in *)
-                                       &conn->cm_id->route.addr.
-                                       dst_addr)->sin_port));
-               }
-               break;
-       case RDMA_CM_EVENT_ESTABLISHED:
-               dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                            " active_cb: cm_id %d PORT %d CONNECTED to %s!\n",
-                            conn->cm_id, ntohs(((struct sockaddr_in *)
-                                                &conn->cm_id->route.addr.
-                                                dst_addr)->sin_port),
-                            inet_ntoa(((struct sockaddr_in *)
-                                       &conn->cm_id->route.addr.dst_addr)->
-                                      sin_addr));
-
-               /* setup local and remote ports for ep query */
-               conn->ep->param.remote_port_qual =
-                   PORT_TO_SID(rdma_get_dst_port(conn->cm_id));
-               conn->ep->param.local_port_qual =
-                   PORT_TO_SID(rdma_get_src_port(conn->cm_id));
-
-               ib_cm_event = IB_CME_CONNECTED;
-               pdata = event->param.conn.private_data;
-               break;
-       case RDMA_CM_EVENT_DISCONNECTED:
-               dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                            " active_cb: DISC EVENT - EP %p\n",conn->ep);
-               rdma_disconnect(conn->cm_id);   /* required for DREP */
-               ib_cm_event = IB_CME_DISCONNECTED;
-               /* validate EP handle */
-               if (DAPL_BAD_HANDLE(conn->ep, DAPL_MAGIC_EP))
-                       conn = NULL;
-               break;
-       default:
-               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                            " dapli_cm_active_cb_handler: Unexpected CM "
-                            "event %d on ID 0x%p\n", event->event,
-                            conn->cm_id);
-               conn = NULL;
-               break;
-       }
-
-       dapl_os_unlock(lock);
-       if (conn)
-               dapl_evd_connection_callback(conn, ib_cm_event, pdata, conn->ep);
-}
-
-static void dapli_cm_passive_cb(struct dapl_cm_id *conn,
-                               struct rdma_cm_event *event)
-{
-       ib_cm_events_t ib_cm_event;
-       struct dapl_cm_id *conn_recv = conn;
-       const void *pdata = NULL;
-       
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " passive_cb: conn %p id %d event %d\n",
-                    conn, event->id, event->event);
-
-       dapl_os_lock(&conn->lock);
-
-       switch (event->event) {
-       case RDMA_CM_EVENT_CONNECT_REQUEST:
-               /* create new conn object with new conn_id from event */
-               conn_recv = dapli_req_recv(conn, event);
-               ib_cm_event = IB_CME_CONNECTION_REQUEST_PENDING;
-               pdata = event->param.conn.private_data;
-               break;
-       case RDMA_CM_EVENT_UNREACHABLE:
-       case RDMA_CM_EVENT_CONNECT_ERROR:
-               dapl_log(DAPL_DBG_TYPE_WARN,
-                        "dapl_cm_passive: CONN_ERR event=0x%x status=%d %s,"
-                        " DST %s,%d\n",
-                        event->event, event->status,
-                        (event->status == -ETIMEDOUT) ? "TIMEOUT" : "",
-                        inet_ntoa(((struct sockaddr_in *)
-                                   &conn->cm_id->route.addr.dst_addr)->
-                                  sin_addr), ntohs(((struct sockaddr_in *)
-                                                    &conn->cm_id->route.addr.
-                                                    dst_addr)->sin_port));
-               ib_cm_event = IB_CME_DESTINATION_UNREACHABLE;
-               break;
-       case RDMA_CM_EVENT_REJECTED:
-               /* will alwasys be abnormal NON-consumer from active side */
-               dapl_log(DAPL_DBG_TYPE_WARN,
-                        "dapl_cm_passive: non-consumer REJ, reason=%d,"
-                        " DST %s, %d\n",
-                        event->status,
-                        inet_ntoa(((struct sockaddr_in *)
-                                   &conn->cm_id->route.addr.dst_addr)->
-                                  sin_addr),
-                        ntohs(((struct sockaddr_in *)
-                               &conn->cm_id->route.addr.dst_addr)->
-                              sin_port));
-               ib_cm_event = IB_CME_DESTINATION_REJECT;
-               break;
-       case RDMA_CM_EVENT_ESTABLISHED:
-               dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                            " passive_cb: cm_id %p PORT %d CONNECTED from 0x%x!\n",
-                            conn->cm_id, ntohs(((struct sockaddr_in *)
-                                                &conn->cm_id->route.addr.
-                                                src_addr)->sin_port),
-                            ntohl(((struct sockaddr_in *)
-                                   &conn->cm_id->route.addr.dst_addr)->
-                                  sin_addr.s_addr));
-               ib_cm_event = IB_CME_CONNECTED;
-               break;
-       case RDMA_CM_EVENT_DISCONNECTED:
-               rdma_disconnect(conn->cm_id);   /* required for DREP */
-               ib_cm_event = IB_CME_DISCONNECTED;
-               /* validate SP handle context */
-               if (DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_PSP) &&
-                   DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_RSP))
-                       conn_recv = NULL;
-               break;
-       default:
-               dapl_dbg_log(DAPL_DBG_TYPE_ERR, " passive_cb: "
-                            "Unexpected CM event %d on ID 0x%p\n",
-                            event->event, conn->cm_id);
-               conn_recv = NULL;
-               break;
-       }
-
-       dapl_os_unlock(&conn->lock);
-       if (conn_recv)
-               dapls_cr_callback(conn_recv, ib_cm_event, pdata, conn_recv->sp);
-}
-
-/************************ DAPL provider entry points **********************/
-
-/*
- * dapls_ib_connect
- *
- * Initiate a connection with the passive listener on another node
- *
- * Input:
- *     ep_handle,
- *     remote_ia_address,
- *     remote_conn_qual,
- *     prd_size                size of private data and structure
- *     prd_prt                 pointer to private data structure
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INSUFFICIENT_RESOURCES
- *     DAT_INVALID_PARAMETER
- *
- */
-DAT_RETURN dapls_ib_connect(IN DAT_EP_HANDLE ep_handle,
-                           IN DAT_IA_ADDRESS_PTR r_addr,
-                           IN DAT_CONN_QUAL r_qual,
-                           IN DAT_COUNT p_size, IN void *p_data)
-{
-       struct dapl_ep *ep_ptr = ep_handle;
-       struct dapl_cm_id *conn = ep_ptr->cm_handle;
-       int ret;
-
-       /* Sanity check */
-       if (NULL == ep_ptr)
-               return DAT_SUCCESS;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " connect: rSID 0x%llx rPort %d, pdata %p, ln %d\n",
-                    r_qual, ntohs(SID_TO_PORT(r_qual)), p_data, p_size);
-
-       /* rdma conn and cm_id pre-bound; reference via ep_ptr->cm_handle */
-
-       /* Setup QP/CM parameters and private data in cm_id */
-       (void)dapl_os_memzero(&conn->params, sizeof(conn->params));
-       conn->params.responder_resources =
-           ep_ptr->param.ep_attr.max_rdma_read_in;
-       conn->params.initiator_depth = ep_ptr->param.ep_attr.max_rdma_read_out;
-       conn->params.flow_control = 1;
-       conn->params.rnr_retry_count = IB_RNR_RETRY_COUNT;
-       conn->params.retry_count = IB_RC_RETRY_COUNT;
-       if (p_size) {
-               dapl_os_memcpy(conn->p_data, p_data, p_size);
-               conn->params.private_data = conn->p_data;
-               conn->params.private_data_len = p_size;
-       }
-
-       /* copy in remote address, need a copy for retry attempts */
-       dapl_os_memcpy(&conn->r_addr, r_addr, sizeof(*r_addr));
-
-       /* Resolve remote address, src already bound during QP create */
-       ((struct sockaddr_in *)&conn->r_addr)->sin_port = SID_TO_PORT(r_qual);
-       ((struct sockaddr_in *)&conn->r_addr)->sin_family = AF_INET;
-
-       ret = rdma_resolve_addr(conn->cm_id, NULL,
-                               (struct sockaddr *)&conn->r_addr,
-                               conn->arp_timeout);
-       if (ret) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " dapl_cma_connect: rdma_resolve_addr ERR 0x%x %s\n",
-                        ret, strerror(errno));
-               return dapl_convert_errno(errno, "ib_connect");
-       }
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " connect: resolve_addr: cm_id %p -> %s port %d\n",
-                    conn->cm_id,
-                    inet_ntoa(((struct sockaddr_in *)&conn->r_addr)->sin_addr),
-                    ((struct sockaddr_in *)&conn->r_addr)->sin_port);
-
-       return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_disconnect
- *
- * Disconnect an EP
- *
- * Input:
- *     ep_handle,
- *     disconnect_flags
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *
- */
-DAT_RETURN
-dapls_ib_disconnect(IN DAPL_EP * ep_ptr, IN DAT_CLOSE_FLAGS close_flags)
-{
-       dp_ib_cm_handle_t conn = ep_ptr->cm_handle;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " disconnect(ep %p, conn %p, id %d flags %x)\n",
-                    ep_ptr, conn, (conn ? conn->cm_id : 0), close_flags);
-
-       if ((conn == IB_INVALID_HANDLE) || (conn->cm_id == NULL))
-               return DAT_SUCCESS;
-
-       /* no graceful half-pipe disconnect option */
-       rdma_disconnect(conn->cm_id);
-
-       /* 
-        * DAT event notification occurs from the callback
-        * Note: will fire even if DREQ goes unanswered on timeout 
-        */
-       return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_disconnect_clean
- *
- * Clean up outstanding connection data. This routine is invoked
- * after the final disconnect callback has occurred. Only on the
- * ACTIVE side of a connection.
- *
- * Input:
- *     ep_ptr          DAPL_EP
- *     active          Indicates active side of connection
- *
- * Output:
- *     none
- *
- * Returns:
- *     void
- *
- */
-void
-dapls_ib_disconnect_clean(IN DAPL_EP * ep_ptr,
-                         IN DAT_BOOLEAN active,
-                         IN const ib_cm_events_t ib_cm_event)
-{
-       /* nothing to do */
-       return;
-}
-
-/*
- * dapl_ib_setup_conn_listener
- *
- * Have the CM set up a connection listener.
- *
- * Input:
- *     ibm_hca_handle          HCA handle
- *     qp_handle                       QP handle
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INSUFFICIENT_RESOURCES
- *     DAT_INTERNAL_ERROR
- *     DAT_CONN_QUAL_UNAVAILBLE
- *     DAT_CONN_QUAL_IN_USE
- *
- */
-DAT_RETURN
-dapls_ib_setup_conn_listener(IN DAPL_IA * ia_ptr,
-                            IN DAT_UINT64 ServiceID, IN DAPL_SP * sp_ptr)
-{
-       DAT_RETURN dat_status = DAT_SUCCESS;
-       ib_cm_srvc_handle_t conn;
-       DAT_SOCK_ADDR6 addr;    /* local binding address */
-
-       /* Allocate CM and initialize lock */
-       if ((conn = dapl_os_alloc(sizeof(*conn))) == NULL)
-               return DAT_INSUFFICIENT_RESOURCES;
-
-       dapl_os_memzero(conn, sizeof(*conn));
-       dapl_os_lock_init(&conn->lock);
-       conn->refs++;
-
-       /* create CM_ID, bind to local device, create QP */
-       if (rdma_create_id
-           (g_cm_events, &conn->cm_id, (void *)conn, RDMA_PS_TCP)) {
-               dapl_os_free(conn, sizeof(*conn));
-               return (dapl_convert_errno(errno, "setup_listener"));
-       }
-
-       /* open identifies the local device; per DAT specification */
-       /* Get family and address then set port to consumer's ServiceID */
-       dapl_os_memcpy(&addr, &ia_ptr->hca_ptr->hca_address, sizeof(addr));
-       ((struct sockaddr_in *)&addr)->sin_port = SID_TO_PORT(ServiceID);
-
-       if (rdma_bind_addr(conn->cm_id, (struct sockaddr *)&addr)) {
-               if ((errno == EBUSY) || (errno == EADDRINUSE) || 
-                   (errno == EADDRNOTAVAIL))
-                       dat_status = DAT_CONN_QUAL_IN_USE;
-               else
-                       dat_status =
-                           dapl_convert_errno(errno, "setup_listener");
-               goto bail;
-       }
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " listen(ia_ptr %p SID 0x%llx Port %d sp %p conn %p id %d)\n",
-                    ia_ptr, ServiceID, ntohs(SID_TO_PORT(ServiceID)),
-                    sp_ptr, conn, conn->cm_id);
-
-       sp_ptr->cm_srvc_handle = conn;
-       conn->sp = sp_ptr;
-       conn->hca = ia_ptr->hca_ptr;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    " listen(conn=%p cm_id=%d)\n",
-                    sp_ptr->cm_srvc_handle, conn->cm_id);
-
-       if (rdma_listen(conn->cm_id, 0)) {      /* max cma backlog */
-
-               if ((errno == EBUSY) || (errno == EADDRINUSE) ||
-                   (errno == EADDRNOTAVAIL))
-                       dat_status = DAT_CONN_QUAL_IN_USE;
-               else
-                       dat_status =
-                           dapl_convert_errno(errno, "setup_listener");
-               goto bail;
-       }
-
-       /* success */
-       return DAT_SUCCESS;
-
-      bail:
-       rdma_destroy_id(conn->cm_id);
-       dapl_os_free(conn, sizeof(*conn));
-       return dat_status;
-}
-
-/*
- * dapl_ib_remove_conn_listener
- *
- * Have the CM remove a connection listener.
- *
- * Input:
- *     ia_handle               IA handle
- *     ServiceID               IB Channel Service ID
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INVALID_STATE
- *
- */
-DAT_RETURN
-dapls_ib_remove_conn_listener(IN DAPL_IA * ia_ptr, IN DAPL_SP * sp_ptr)
-{
-       ib_cm_srvc_handle_t conn = sp_ptr->cm_srvc_handle;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " remove_listen(ia_ptr %p sp_ptr %p cm_ptr %p)\n",
-                    ia_ptr, sp_ptr, conn);
-
-       if (conn != IB_INVALID_HANDLE) {
-               sp_ptr->cm_srvc_handle = NULL;
-               dapls_ib_cm_free(conn, NULL);
-       }
-       return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_accept_connection
- *
- * Perform necessary steps to accept a connection
- *
- * Input:
- *     cr_handle
- *     ep_handle
- *     private_data_size
- *     private_data
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INSUFFICIENT_RESOURCES
- *     DAT_INTERNAL_ERROR
- *
- */
-DAT_RETURN
-dapls_ib_accept_connection(IN DAT_CR_HANDLE cr_handle,
-                          IN DAT_EP_HANDLE ep_handle,
-                          IN DAT_COUNT p_size, IN const DAT_PVOID p_data)
-{
-       DAPL_CR *cr_ptr = (DAPL_CR *) cr_handle;
-       DAPL_EP *ep_ptr = (DAPL_EP *) ep_handle;
-       DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;
-       struct dapl_cm_id *cr_conn = cr_ptr->ib_cm_handle;
-       int ret;
-       DAT_RETURN dat_status;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " accept(cr %p conn %p, id %p, p_data %p, p_sz=%d)\n",
-                    cr_ptr, cr_conn, cr_conn->cm_id, p_data, p_size);
-
-       /* Obtain size of private data structure & contents */
-       if (p_size > IB_MAX_REP_PDATA_SIZE) {
-               dat_status = DAT_ERROR(DAT_LENGTH_ERROR, DAT_NO_SUBTYPE);
-               goto bail;
-       }
-
-       if (ep_ptr->qp_state == DAPL_QP_STATE_UNATTACHED) {
-               /* 
-                * If we are lazy attaching the QP then we may need to
-                * hook it up here. Typically, we run this code only for
-                * DAT_PSP_PROVIDER_FLAG
-                */
-               dat_status = dapls_ib_qp_alloc(ia_ptr, ep_ptr, NULL);
-               if (dat_status != DAT_SUCCESS) {
-                       dapl_log(DAPL_DBG_TYPE_ERR,
-                                " dapl_cma_accept: qp_alloc ERR %d\n",
-                                dat_status);
-                       goto bail;
-               }
-       }
-
-       /* 
-        * Validate device and port in EP cm_id against inbound 
-        * CR cm_id. The pre-allocated EP cm_id is already bound to 
-        * a local device (cm_id and QP) when created. Move the QP
-        * to the new cm_id only if device and port numbers match.
-        */
-       if (ep_ptr->cm_handle->cm_id->verbs == cr_conn->cm_id->verbs &&
-           ep_ptr->cm_handle->cm_id->port_num == cr_conn->cm_id->port_num) {
-               /* move QP to new cr_conn, remove QP ref in EP cm_id */
-               cr_conn->cm_id->qp = ep_ptr->cm_handle->cm_id->qp;
-               ep_ptr->cm_handle->cm_id->qp = NULL;
-               dapls_ib_cm_free(ep_ptr->cm_handle, NULL);
-       } else {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " dapl_cma_accept: ERR dev(%p!=%p) or"
-                        " port mismatch(%d!=%d)\n",
-                        ep_ptr->cm_handle->cm_id->verbs, cr_conn->cm_id->verbs,
-                        ntohs(ep_ptr->cm_handle->cm_id->port_num),
-                        ntohs(cr_conn->cm_id->port_num));
-               dat_status = DAT_INTERNAL_ERROR;
-               goto bail;
-       }
-
-       cr_ptr->param.local_ep_handle = ep_handle;
-       cr_conn->params.private_data = p_data;
-       cr_conn->params.private_data_len = p_size;
-
-       ret = rdma_accept(cr_conn->cm_id, &cr_conn->params);
-       if (ret) {
-               dapl_log(DAPL_DBG_TYPE_ERR, " dapl_cma_accept: ERR %d %s\n",
-                        ret, strerror(errno));
-               dat_status = dapl_convert_errno(ret, "accept");
-               goto bail;
-       }
-
-       /* save accepted conn and EP reference, qp_handle unchanged */
-       ep_ptr->cm_handle = cr_conn;
-       cr_conn->ep = ep_ptr;
-
-       /* setup local and remote ports for ep query */
-       /* Note: port qual in network order */
-       ep_ptr->param.remote_port_qual =
-           PORT_TO_SID(rdma_get_dst_port(cr_conn->cm_id));
-       ep_ptr->param.local_port_qual =
-           PORT_TO_SID(rdma_get_src_port(cr_conn->cm_id));
-
-       return DAT_SUCCESS;
-      bail:
-       rdma_reject(cr_conn->cm_id, NULL, 0);
-       dapls_ib_cm_free(cr_conn, NULL);
-       return dat_status;
-}
-
-/*
- * dapls_ib_reject_connection
- *
- * Reject a connection
- *
- * Input:
- *     cr_handle
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INTERNAL_ERROR
- *
- */
-DAT_RETURN
-dapls_ib_reject_connection(IN dp_ib_cm_handle_t cm_handle,
-                          IN int reason,
-                          IN DAT_COUNT private_data_size,
-                          IN const DAT_PVOID private_data)
-{
-       int ret;
-       int offset = sizeof(struct dapl_pdata_hdr);
-       struct dapl_pdata_hdr pdata_hdr;
-
-       memset(&pdata_hdr, 0, sizeof pdata_hdr);
-       pdata_hdr.version = htonl((DAT_VERSION_MAJOR << 24) |
-                                 (DAT_VERSION_MINOR << 16) |
-                                 (VN_PROVIDER_MAJOR << 8) |
-                                 (VN_PROVIDER_MINOR));
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " reject: handle %p reason %x, ver=%x, data %p, sz=%d\n",
-                    cm_handle, reason, ntohl(pdata_hdr.version),
-                    private_data, private_data_size);
-
-       if (cm_handle == IB_INVALID_HANDLE) {
-               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                            " reject: invalid handle: reason %d\n", reason);
-               return DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_CR);
-       }
-
-       if (private_data_size >
-           dapls_ib_private_data_size(NULL, DAPL_PDATA_CONN_REJ,
-                                      cm_handle->hca))
-               return DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG3);
-
-       /* setup pdata_hdr and users data, in CR pdata buffer */
-       dapl_os_memcpy(cm_handle->p_data, &pdata_hdr, offset);
-       if (private_data_size)
-               dapl_os_memcpy(cm_handle->p_data + offset,
-                              private_data, private_data_size);
-
-       /*
-        * Always some private data with reject so active peer can
-        * determine real application reject from an abnormal 
-        * application termination
-        */
-       ret = rdma_reject(cm_handle->cm_id,
-                         cm_handle->p_data, offset + private_data_size);
-
-       dapls_ib_cm_free(cm_handle, NULL);
-       return dapl_convert_errno(ret, "reject");
-}
-
-/*
- * dapls_ib_cm_remote_addr
- *
- * Obtain the remote IP address given a connection
- *
- * Input:
- *     cr_handle
- *
- * Output:
- *     remote_ia_address: where to place the remote address
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INVALID_HANDLE
- *
- */
-DAT_RETURN
-dapls_ib_cm_remote_addr(IN DAT_HANDLE dat_handle, OUT DAT_SOCK_ADDR6 * raddr)
-{
-       DAPL_HEADER *header;
-       dp_ib_cm_handle_t ib_cm_handle;
-       struct rdma_addr *ipaddr;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    " remote_addr(cm_handle=%p, r_addr=%p)\n",
-                    dat_handle, raddr);
-
-       header = (DAPL_HEADER *) dat_handle;
-
-       if (header->magic == DAPL_MAGIC_EP)
-               ib_cm_handle = ((DAPL_EP *) dat_handle)->cm_handle;
-       else if (header->magic == DAPL_MAGIC_CR)
-               ib_cm_handle = ((DAPL_CR *) dat_handle)->ib_cm_handle;
-       else
-               return DAT_INVALID_HANDLE;
-
-       /* get remote IP address from cm_id route */
-       ipaddr = &ib_cm_handle->cm_id->route.addr;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " remote_addr: conn %p id %p SRC %x DST %x PORT %d\n",
-                    ib_cm_handle, ib_cm_handle->cm_id,
-                    ntohl(((struct sockaddr_in *)
-                           &ipaddr->src_addr)->sin_addr.s_addr),
-                    ntohl(((struct sockaddr_in *)
-                           &ipaddr->dst_addr)->sin_addr.s_addr),
-                    ntohs(((struct sockaddr_in *)
-                           &ipaddr->dst_addr)->sin_port));
-
-       dapl_os_memcpy(raddr, &ipaddr->dst_addr, sizeof(DAT_SOCK_ADDR));
-       return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_private_data_size
- *
- * Return the size of private data given a connection op type
- *
- * Input:
- *     prd_ptr         private data pointer
- *     conn_op         connection operation type
- *      hca_ptr         hca pointer, needed for transport type
- *
- * If prd_ptr is NULL, this is a query for the max size supported by
- * the provider, otherwise it is the actual size of the private data
- * contained in prd_ptr.
- *
- *
- * Output:
- *     None
- *
- * Returns:
- *     length of private data
- *
- */
-int dapls_ib_private_data_size(IN DAPL_PRIVATE * prd_ptr,
-                              IN DAPL_PDATA_OP conn_op, IN DAPL_HCA * hca_ptr)
-{
-       int size;
-
-       if (hca_ptr->ib_hca_handle->device->transport_type
-           == IBV_TRANSPORT_IWARP)
-               return (IWARP_MAX_PDATA_SIZE - sizeof(struct dapl_pdata_hdr));
-
-       switch (conn_op) {
-
-       case DAPL_PDATA_CONN_REQ:
-               size = IB_MAX_REQ_PDATA_SIZE;
-               break;
-       case DAPL_PDATA_CONN_REP:
-               size = IB_MAX_REP_PDATA_SIZE;
-               break;
-       case DAPL_PDATA_CONN_REJ:
-               size = IB_MAX_REJ_PDATA_SIZE - sizeof(struct dapl_pdata_hdr);
-               break;
-       case DAPL_PDATA_CONN_DREQ:
-               size = IB_MAX_DREQ_PDATA_SIZE;
-               break;
-       case DAPL_PDATA_CONN_DREP:
-               size = IB_MAX_DREP_PDATA_SIZE;
-               break;
-       default:
-               size = 0;
-
-       }                       /* end case */
-
-       return size;
-}
-
-/*
- * Map all CMA event codes to the DAT equivelent.
- */
-#define DAPL_IB_EVENT_CNT      13
-
-static struct ib_cm_event_map {
-       const ib_cm_events_t ib_cm_event;
-       DAT_EVENT_NUMBER dat_event_num;
-} ib_cm_event_map[DAPL_IB_EVENT_CNT] = {
-       /* 00 */  {
-       IB_CME_CONNECTED, DAT_CONNECTION_EVENT_ESTABLISHED},
-           /* 01 */  {
-       IB_CME_DISCONNECTED, DAT_CONNECTION_EVENT_DISCONNECTED},
-           /* 02 */  {
-       IB_CME_DISCONNECTED_ON_LINK_DOWN,
-                   DAT_CONNECTION_EVENT_DISCONNECTED},
-           /* 03 */  {
-       IB_CME_CONNECTION_REQUEST_PENDING, DAT_CONNECTION_REQUEST_EVENT},
-           /* 04 */  {
-       IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,
-                   DAT_CONNECTION_REQUEST_EVENT},
-           /* 05 */  {
-       IB_CME_CONNECTION_REQUEST_ACKED, DAT_CONNECTION_REQUEST_EVENT},
-           /* 06 */  {
-       IB_CME_DESTINATION_REJECT,
-                   DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
-           /* 07 */  {
-       IB_CME_DESTINATION_REJECT_PRIVATE_DATA,
-                   DAT_CONNECTION_EVENT_PEER_REJECTED},
-           /* 08 */  {
-       IB_CME_DESTINATION_UNREACHABLE, DAT_CONNECTION_EVENT_UNREACHABLE},
-           /* 09 */  {
-       IB_CME_TOO_MANY_CONNECTION_REQUESTS,
-                   DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
-           /* 10 */  {
-       IB_CME_LOCAL_FAILURE, DAT_CONNECTION_EVENT_BROKEN},
-           /* 11 */  {
-       IB_CME_BROKEN, DAT_CONNECTION_EVENT_BROKEN},
-           /* 12 */  {
-IB_CME_TIMEOUT, DAT_CONNECTION_EVENT_TIMED_OUT},};
-
-/*
- * dapls_ib_get_cm_event
- *
- * Return a DAT connection event given a provider CM event.
- *
- * Input:
- *     dat_event_num   DAT event we need an equivelent CM event for
- *
- * Output:
- *     none
- *
- * Returns:
- *     ib_cm_event of translated DAPL value
- */
-DAT_EVENT_NUMBER
-dapls_ib_get_dat_event(IN const ib_cm_events_t ib_cm_event,
-                      IN DAT_BOOLEAN active)
-{
-       DAT_EVENT_NUMBER dat_event_num;
-       int i;
-
-       active = active;
-
-       dat_event_num = 0;
-       for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
-               if (ib_cm_event == ib_cm_event_map[i].ib_cm_event) {
-                       dat_event_num = ib_cm_event_map[i].dat_event_num;
-                       break;
-               }
-       }
-       dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
-                    "dapls_ib_get_dat_event: event(%s) ib=0x%x dat=0x%x\n",
-                    active ? "active" : "passive", ib_cm_event, dat_event_num);
-
-       return dat_event_num;
-}
-
-/*
- * dapls_ib_get_dat_event
- *
- * Return a DAT connection event given a provider CM event.
- * 
- * Input:
- *     ib_cm_event     event provided to the dapl callback routine
- *     active          switch indicating active or passive connection
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_EVENT_NUMBER of translated provider value
- */
-ib_cm_events_t dapls_ib_get_cm_event(IN DAT_EVENT_NUMBER dat_event_num)
-{
-       ib_cm_events_t ib_cm_event;
-       int i;
-
-       ib_cm_event = 0;
-       for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
-               if (dat_event_num == ib_cm_event_map[i].dat_event_num) {
-                       ib_cm_event = ib_cm_event_map[i].ib_cm_event;
-                       break;
-               }
-       }
-       return ib_cm_event;
-}
-
-void dapli_cma_event_cb(void)
-{
-       struct rdma_cm_event *event;
-                               
-       /* process one CM event, fairness, non-blocking */
-       if (!rdma_get_cm_event(g_cm_events, &event)) {
-               struct dapl_cm_id *conn;
-
-               /* set proper conn from cm_id context */
-               if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST)
-                       conn = (struct dapl_cm_id *)event->listen_id->context;
-               else
-                       conn = (struct dapl_cm_id *)event->id->context;
-
-               dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                            " cm_event: EVENT=%d ID=%p LID=%p CTX=%p\n",
-                            event->event, event->id, event->listen_id, conn);
-               
-               /* cm_free is blocked waiting for ack  */
-               dapl_os_lock(&conn->lock);
-               if (!conn->refs) {
-                       dapl_os_unlock(&conn->lock);
-                       rdma_ack_cm_event(event);
-                       return;
-               }
-               conn->refs++;
-               dapl_os_unlock(&conn->lock);
-
-               switch (event->event) {
-               case RDMA_CM_EVENT_ADDR_RESOLVED:
-                       dapli_addr_resolve(conn);
-                       break;
-
-               case RDMA_CM_EVENT_ROUTE_RESOLVED:
-                       dapli_route_resolve(conn);
-                       break;
-
-               case RDMA_CM_EVENT_ADDR_ERROR:
-                       dapl_log(DAPL_DBG_TYPE_WARN,
-                                "dapl_cma_active: CM ADDR ERROR: ->"
-                                " DST %s retry (%d)..\n",
-                                inet_ntoa(((struct sockaddr_in *)
-                                           &conn->r_addr)->sin_addr),
-                                conn->arp_retries);
-
-                       /* retry address resolution */
-                       if ((--conn->arp_retries) &&
-                           (event->status == -ETIMEDOUT)) {
-                               int ret;
-                               ret = rdma_resolve_addr(conn->cm_id, NULL,
-                                                       (struct sockaddr *)
-                                                       &conn->r_addr,
-                                                       conn->arp_timeout);
-                               if (!ret)
-                                       break;
-                               else {
-                                       dapl_dbg_log(DAPL_DBG_TYPE_WARN,
-                                                    " ERROR: rdma_resolve_addr = "
-                                                    "%d %s\n",
-                                                    ret, strerror(errno));
-                               }
-                       }
-                       /* retries exhausted or resolve_addr failed */
-                       dapl_log(DAPL_DBG_TYPE_ERR,
-                                "dapl_cma_active: ARP_ERR, retries(%d)"
-                                " exhausted -> DST %s,%d\n",
-                                IB_ARP_RETRY_COUNT,
-                                inet_ntoa(((struct sockaddr_in *)
-                                           &conn->cm_id->route.addr.dst_addr)->
-                                          sin_addr),
-                                ntohs(((struct sockaddr_in *)
-                                       &conn->cm_id->route.addr.dst_addr)->
-                                      sin_port));
-
-                       dapl_evd_connection_callback(conn,
-                                                    IB_CME_DESTINATION_UNREACHABLE,
-                                                    NULL, conn->ep);
-                       break;
-
-               case RDMA_CM_EVENT_ROUTE_ERROR:
-                       dapl_log(DAPL_DBG_TYPE_WARN,
-                                "dapl_cma_active: CM ROUTE ERROR: ->"
-                                " DST %s retry (%d)..\n",
-                                inet_ntoa(((struct sockaddr_in *)
-                                           &conn->r_addr)->sin_addr),
-                                conn->route_retries);
-
-                       /* retry route resolution */
-                       if ((--conn->route_retries) &&
-                           (event->status == -ETIMEDOUT))
-                               dapli_addr_resolve(conn);
-                       else {
-                               dapl_log(DAPL_DBG_TYPE_ERR,
-                                        "dapl_cma_active: PATH_RECORD_ERR,"
-                                        " retries(%d) exhausted, DST %s,%d\n",
-                                        IB_ROUTE_RETRY_COUNT,
-                                        inet_ntoa(((struct sockaddr_in *)
-                                                   &conn->cm_id->route.addr.
-                                                   dst_addr)->sin_addr),
-                                        ntohs(((struct sockaddr_in *)
-                                               &conn->cm_id->route.addr.
-                                               dst_addr)->sin_port));
-
-                               dapl_evd_connection_callback(conn,
-                                                            IB_CME_DESTINATION_UNREACHABLE,
-                                                            NULL, conn->ep);
-                       }
-                       break;
-
-               case RDMA_CM_EVENT_DEVICE_REMOVAL:
-                       dapl_evd_connection_callback(conn,
-                                                    IB_CME_LOCAL_FAILURE,
-                                                    NULL, conn->ep);
-                       break;
-               case RDMA_CM_EVENT_CONNECT_REQUEST:
-               case RDMA_CM_EVENT_CONNECT_ERROR:
-               case RDMA_CM_EVENT_UNREACHABLE:
-               case RDMA_CM_EVENT_REJECTED:
-               case RDMA_CM_EVENT_ESTABLISHED:
-               case RDMA_CM_EVENT_DISCONNECTED:
-                       /* passive or active */
-                       if (conn->sp)
-                               dapli_cm_passive_cb(conn, event);
-                       else
-                               dapli_cm_active_cb(conn, event);
-                       break;
-               case RDMA_CM_EVENT_CONNECT_RESPONSE:
-#ifdef RDMA_CM_EVENT_TIMEWAIT_EXIT
-               case RDMA_CM_EVENT_TIMEWAIT_EXIT:
-#endif
-                       break;
-               default:
-                       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                                    " cm_event: UNEXPECTED EVENT=%p ID=%p CTX=%p\n",
-                                    event->event, event->id,
-                                    event->id->context);
-                       break;
-               }
-               
-               /* ack event, unblocks destroy_cm_id in consumer threads */
-               rdma_ack_cm_event(event);
-
-               dapl_os_lock(&conn->lock);
-                conn->refs--;
-               dapl_os_unlock(&conn->lock);
-       } 
-}
-
-/*
- * Local variables:
- *  c-indent-level: 4
- *  c-basic-offset: 4
- *  tab-width: 8
- * End:
- */
+/*\r
+ * Copyright (c) 2005 Voltaire Inc.  All rights reserved.\r
+ * Copyright (c) 2005-2007 Intel Corporation. All rights reserved.\r
+ * Copyright (c) 2004-2005, Mellanox Technologies, Inc. All rights reserved. \r
+ * Copyright (c) 2003 Topspin Corporation.  All rights reserved. \r
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.\r
+ *\r
+ * This Software is licensed under one of the following licenses:\r
+ *\r
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is\r
+ *    available from the Open Source Initiative, see\r
+ *    http://www.opensource.org/licenses/cpl.php.\r
+ *\r
+ * 2) under the terms of the "The BSD License" a copy of which is\r
+ *    available from the Open Source Initiative, see\r
+ *    http://www.opensource.org/licenses/bsd-license.php.\r
+ *\r
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a\r
+ *    copy of which is available from the Open Source Initiative, see\r
+ *    http://www.opensource.org/licenses/gpl-license.php.\r
+ *\r
+ * Licensee has the right to choose one of the above licenses.\r
+ *\r
+ * Redistributions of source code must retain the above copyright\r
+ * notice and one of the license notices.\r
+ *\r
+ * Redistributions in binary form must reproduce both the above copyright\r
+ * notice, one of the license notices in the documentation\r
+ * and/or other materials provided with the distribution.\r
+ */\r
+\r
+/**********************************************************************\r
+ *\r
+ * MODULE: dapl_ib_cm.c\r
+ *\r
+ * PURPOSE: The OFED provider - uCMA, name and route resolution\r
+ *\r
+ * $Id: $\r
+ *\r
+ **********************************************************************/\r
+\r
+#include "dapl.h"\r
+#include "dapl_adapter_util.h"\r
+#include "dapl_evd_util.h"\r
+#include "dapl_cr_util.h"\r
+#include "dapl_name_service.h"\r
+#include "dapl_ib_util.h"\r
+#include "dapl_vendor.h"\r
+#include "dapl_osd.h"\r
+\r
+extern struct rdma_event_channel *g_cm_events;\r
+\r
+/* local prototypes */\r
+static struct dapl_cm_id *dapli_req_recv(struct dapl_cm_id *conn,\r
+                                        struct rdma_cm_event *event);\r
+static void dapli_cm_active_cb(struct dapl_cm_id *conn,\r
+                              struct rdma_cm_event *event);\r
+static void dapli_cm_passive_cb(struct dapl_cm_id *conn,\r
+                               struct rdma_cm_event *event);\r
+static void dapli_addr_resolve(struct dapl_cm_id *conn);\r
+static void dapli_route_resolve(struct dapl_cm_id *conn);\r
+\r
+/* cma requires 16 bit SID, in network order */\r
+#define IB_PORT_MOD 32001\r
+#define IB_PORT_BASE (65535 - IB_PORT_MOD)\r
+#define SID_TO_PORT(SID) \\r
+    (SID > 0xffff ? \\r
+    htons((unsigned short)((SID % IB_PORT_MOD) + IB_PORT_BASE)) :\\r
+    htons((unsigned short)SID))\r
+\r
+#define PORT_TO_SID(p) ntohs(p)\r
+\r
+/* private data header to validate consumer rejects versus abnormal events */\r
+struct dapl_pdata_hdr {\r
+       DAT_UINT32 version;\r
+};\r
+\r
+static void dapli_addr_resolve(struct dapl_cm_id *conn)\r
+{\r
+       int ret;\r
+#ifdef DAPL_DBG\r
+       struct rdma_addr *ipaddr = &conn->cm_id->route.addr;\r
+#endif\r
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+                    " addr_resolve: cm_id %p SRC %x DST %x\n",\r
+                    conn->cm_id, ntohl(((struct sockaddr_in *)\r
+                                        &ipaddr->src_addr)->sin_addr.s_addr),\r
+                    ntohl(((struct sockaddr_in *)\r
+                           &ipaddr->dst_addr)->sin_addr.s_addr));\r
+\r
+       ret = rdma_resolve_route(conn->cm_id, conn->route_timeout);\r
+       if (ret) {\r
+               dapl_log(DAPL_DBG_TYPE_ERR,\r
+                        " dapl_cma_connect: rdma_resolve_route ERR 0x%x %s\n",\r
+                        ret, strerror(errno));\r
+               dapl_evd_connection_callback(conn,\r
+                                            IB_CME_LOCAL_FAILURE,\r
+                                            NULL, conn->ep);\r
+       }\r
+}\r
+\r
+static void dapli_route_resolve(struct dapl_cm_id *conn)\r
+{\r
+       int ret;\r
+#ifdef DAPL_DBG\r
+       struct rdma_addr *ipaddr = &conn->cm_id->route.addr;\r
+       struct ib_addr *ibaddr = &conn->cm_id->route.addr.addr.ibaddr;\r
+#endif\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+                    " route_resolve: cm_id %p SRC %x DST %x PORT %d\n",\r
+                    conn->cm_id, ntohl(((struct sockaddr_in *)\r
+                                        &ipaddr->src_addr)->sin_addr.s_addr),\r
+                    ntohl(((struct sockaddr_in *)\r
+                           &ipaddr->dst_addr)->sin_addr.s_addr),\r
+                    ntohs(((struct sockaddr_in *)\r
+                           &ipaddr->dst_addr)->sin_port));\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+                    " route_resolve: SRC GID subnet %016llx id %016llx\n",\r
+                    (unsigned long long)\r
+                    ntohll(ibaddr->sgid.global.subnet_prefix),\r
+                    (unsigned long long)\r
+                    ntohll(ibaddr->sgid.global.interface_id));\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+                    " route_resolve: DST GID subnet %016llx id %016llx\n",\r
+                    (unsigned long long)\r
+                    ntohll(ibaddr->dgid.global.subnet_prefix),\r
+                    (unsigned long long)\r
+                    ntohll(ibaddr->dgid.global.interface_id));\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+                    " route_resolve: cm_id %p pdata %p plen %d rr %d ind %d\n",\r
+                    conn->cm_id,\r
+                    conn->params.private_data,\r
+                    conn->params.private_data_len,\r
+                    conn->params.responder_resources,\r
+                    conn->params.initiator_depth);\r
+\r
+       ret = rdma_connect(conn->cm_id, &conn->params);\r
+       if (ret) {\r
+               dapl_log(DAPL_DBG_TYPE_ERR,\r
+                        " dapl_cma_connect: rdma_connect ERR %d %s\n",\r
+                        ret, strerror(errno));\r
+               goto bail;\r
+       }\r
+       return;\r
+\r
+      bail:\r
+       dapl_evd_connection_callback(conn,\r
+                                    IB_CME_LOCAL_FAILURE, NULL, conn->ep);\r
+}\r
+\r
+dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep)\r
+{\r
+       dp_ib_cm_handle_t conn;\r
+       struct rdma_cm_id *cm_id;\r
+\r
+       /* Allocate CM and initialize lock */\r
+       if ((conn = dapl_os_alloc(sizeof(*conn))) == NULL)\r
+               return NULL;\r
+\r
+       dapl_os_memzero(conn, sizeof(*conn));\r
+       dapl_os_lock_init(&conn->lock);\r
+       conn->refs++;\r
+\r
+       /* create CM_ID, bind to local device, create QP */\r
+       if (rdma_create_id(g_cm_events, &cm_id, (void *)conn, RDMA_PS_TCP)) {\r
+               dapl_os_free(conn, sizeof(*conn));\r
+               return NULL;\r
+       }\r
+       conn->cm_id = cm_id;\r
+\r
+       /* setup timers for address and route resolution */\r
+       conn->arp_timeout = dapl_os_get_env_val("DAPL_CM_ARP_TIMEOUT_MS",\r
+                                               IB_ARP_TIMEOUT);\r
+       conn->arp_retries = dapl_os_get_env_val("DAPL_CM_ARP_RETRY_COUNT",\r
+                                               IB_ARP_RETRY_COUNT);\r
+       conn->route_timeout = dapl_os_get_env_val("DAPL_CM_ROUTE_TIMEOUT_MS",\r
+                                                 IB_ROUTE_TIMEOUT);\r
+       conn->route_retries = dapl_os_get_env_val("DAPL_CM_ROUTE_RETRY_COUNT",\r
+                                                 IB_ROUTE_RETRY_COUNT);\r
+       if (ep != NULL) {\r
+               conn->ep = ep;\r
+               conn->hca = ((DAPL_IA *)ep->param.ia_handle)->hca_ptr;\r
+       }\r
+\r
+       return conn;\r
+}\r
+\r
+/* \r
+ * Only called from consumer thread via dat_ep_free()\r
+ * accept, reject, or connect.\r
+ * Cannot be called from callback thread.\r
+ * rdma_destroy_id will block until rdma_get_cm_event is acked.\r
+ */\r
+void dapls_ib_cm_free(dp_ib_cm_handle_t conn, DAPL_EP *ep)\r
+{\r
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+                    " destroy_conn: conn %p id %d\n", \r
+                    conn, conn->cm_id);\r
+\r
+       dapl_os_lock(&conn->lock);\r
+       conn->refs--;\r
+       dapl_os_unlock(&conn->lock);\r
+\r
+       /* block until event thread complete */\r
+       while (conn->refs) \r
+               dapl_os_sleep_usec(10000);\r
+       \r
+       if (ep) {\r
+               ep->cm_handle = NULL;\r
+               ep->qp_handle = NULL;\r
+               ep->qp_state = IB_QP_STATE_ERROR;\r
+       }\r
+\r
+       if (conn->cm_id) {\r
+               if (conn->cm_id->qp)\r
+                       rdma_destroy_qp(conn->cm_id);\r
+               rdma_destroy_id(conn->cm_id);\r
+       }\r
+\r
+       dapl_os_free(conn, sizeof(*conn));\r
+}\r
+\r
+static struct dapl_cm_id *dapli_req_recv(struct dapl_cm_id *conn,\r
+                                        struct rdma_cm_event *event)\r
+{\r
+       struct dapl_cm_id *new_conn;\r
+#ifdef DAPL_DBG\r
+       struct rdma_addr *ipaddr = &event->id->route.addr;\r
+#endif\r
+\r
+       if (conn->sp == NULL) {\r
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR,\r
+                            " dapli_rep_recv: on invalid listen " "handle\n");\r
+               return NULL;\r
+       }\r
+\r
+       /* allocate new cm_id and merge listen parameters */\r
+       new_conn = dapl_os_alloc(sizeof(*new_conn));\r
+       if (new_conn) {\r
+               (void)dapl_os_memzero(new_conn, sizeof(*new_conn));\r
+               dapl_os_lock_init(&new_conn->lock);\r
+               new_conn->cm_id = event->id;    /* provided by uCMA */\r
+               event->id->context = new_conn;  /* update CM_ID context */\r
+               new_conn->sp = conn->sp;\r
+               new_conn->hca = conn->hca;\r
+               new_conn->refs++;\r
+\r
+               /* Get requesters connect data, setup for accept */\r
+               new_conn->params.responder_resources =\r
+                   DAPL_MIN(event->param.conn.responder_resources,\r
+                            conn->hca->ib_trans.rd_atom_in);\r
+               new_conn->params.initiator_depth =\r
+                   DAPL_MIN(event->param.conn.initiator_depth,\r
+                            conn->hca->ib_trans.rd_atom_out);\r
+\r
+               new_conn->params.flow_control = event->param.conn.flow_control;\r
+               new_conn->params.rnr_retry_count =\r
+                   event->param.conn.rnr_retry_count;\r
+               new_conn->params.retry_count = event->param.conn.retry_count;\r
+\r
+               /* save private data */\r
+               if (event->param.conn.private_data_len) {\r
+                       dapl_os_memcpy(new_conn->p_data,\r
+                                      event->param.conn.private_data,\r
+                                      event->param.conn.private_data_len);\r
+                       new_conn->params.private_data = new_conn->p_data;\r
+                       new_conn->params.private_data_len =\r
+                           event->param.conn.private_data_len;\r
+               }\r
+\r
+               dapl_dbg_log(DAPL_DBG_TYPE_CM, " passive_cb: "\r
+                            "REQ: SP %p PORT %d LID %d "\r
+                            "NEW CONN %p ID %p pdata %p,%d\n",\r
+                            new_conn->sp, ntohs(((struct sockaddr_in *)\r
+                                                 &ipaddr->src_addr)->sin_port),\r
+                            event->listen_id, new_conn, event->id,\r
+                            event->param.conn.private_data,\r
+                            event->param.conn.private_data_len);\r
+\r
+               dapl_dbg_log(DAPL_DBG_TYPE_CM, " passive_cb: "\r
+                            "REQ: IP SRC %x PORT %d DST %x PORT %d "\r
+                            "rr %d init %d\n", ntohl(((struct sockaddr_in *)\r
+                                                      &ipaddr->src_addr)->\r
+                                                     sin_addr.s_addr),\r
+                            ntohs(((struct sockaddr_in *)\r
+                                   &ipaddr->src_addr)->sin_port),\r
+                            ntohl(((struct sockaddr_in *)\r
+                                   &ipaddr->dst_addr)->sin_addr.s_addr),\r
+                            ntohs(((struct sockaddr_in *)\r
+                                   &ipaddr->dst_addr)->sin_port),\r
+                            new_conn->params.responder_resources,\r
+                            new_conn->params.initiator_depth);\r
+       }\r
+       return new_conn;\r
+}\r
+\r
+static void dapli_cm_active_cb(struct dapl_cm_id *conn,\r
+                              struct rdma_cm_event *event)\r
+{\r
+       DAPL_OS_LOCK *lock = &conn->lock;\r
+       ib_cm_events_t ib_cm_event;\r
+       const void *pdata = NULL;\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+                    " active_cb: conn %p id %d event %d\n",\r
+                    conn, conn->cm_id, event->event);\r
+\r
+       /* There is a chance that we can get events after\r
+        * the consumer calls disconnect in a pending state\r
+        * since the IB CM and uDAPL states are not shared.\r
+        * In some cases, IB CM could generate either a DCONN\r
+        * or CONN_ERR after the consumer returned from\r
+        * dapl_ep_disconnect with a DISCONNECTED event\r
+        * already queued. Check state here and bail to\r
+        * avoid any events after a disconnect.\r
+        */\r
+       if (DAPL_BAD_HANDLE(conn->ep, DAPL_MAGIC_EP))\r
+               return;\r
+\r
+       dapl_os_lock(&conn->ep->header.lock);\r
+       if (conn->ep->param.ep_state == DAT_EP_STATE_DISCONNECTED) {\r
+               dapl_os_unlock(&conn->ep->header.lock);\r
+               return;\r
+       }\r
+       if (event->event == RDMA_CM_EVENT_DISCONNECTED)\r
+               conn->ep->param.ep_state = DAT_EP_STATE_DISCONNECTED;\r
+\r
+       dapl_os_unlock(&conn->ep->header.lock);\r
+       dapl_os_lock(lock);\r
+\r
+       switch (event->event) {\r
+       case RDMA_CM_EVENT_UNREACHABLE:\r
+       case RDMA_CM_EVENT_CONNECT_ERROR:\r
+               dapl_log(DAPL_DBG_TYPE_WARN,\r
+                        "dapl_cma_active: CONN_ERR event=0x%x"\r
+                        " status=%d %s DST %s, %d\n",\r
+                        event->event, event->status,\r
+                        (event->status == -ETIMEDOUT) ? "TIMEOUT" : "",\r
+                        inet_ntoa(((struct sockaddr_in *)\r
+                                   &conn->cm_id->route.addr.dst_addr)->\r
+                                  sin_addr),\r
+                        ntohs(((struct sockaddr_in *)\r
+                               &conn->cm_id->route.addr.dst_addr)->\r
+                              sin_port));\r
+\r
+               /* per DAT SPEC provider always returns UNREACHABLE */\r
+               ib_cm_event = IB_CME_DESTINATION_UNREACHABLE;\r
+               break;\r
+       case RDMA_CM_EVENT_REJECTED:\r
+               dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+                            " dapli_cm_active_handler: REJECTED reason=%d\n",\r
+                            event->status);\r
+\r
+               /* valid REJ from consumer will always contain private data */\r
+               if (event->status == 28 &&\r
+                   event->param.conn.private_data_len) {\r
+                       ib_cm_event = IB_CME_DESTINATION_REJECT_PRIVATE_DATA;\r
+                       pdata =\r
+                           (unsigned char *)event->param.conn.\r
+                           private_data +\r
+                           sizeof(struct dapl_pdata_hdr);\r
+               } else {\r
+                       ib_cm_event = IB_CME_DESTINATION_REJECT;\r
+                       dapl_log(DAPL_DBG_TYPE_WARN,\r
+                                "dapl_cma_active: non-consumer REJ,"\r
+                                " reason=%d, DST %s, %d\n",\r
+                                event->status,\r
+                                inet_ntoa(((struct sockaddr_in *)\r
+                                           &conn->cm_id->route.addr.\r
+                                           dst_addr)->sin_addr),\r
+                                ntohs(((struct sockaddr_in *)\r
+                                       &conn->cm_id->route.addr.\r
+                                       dst_addr)->sin_port));\r
+               }\r
+               break;\r
+       case RDMA_CM_EVENT_ESTABLISHED:\r
+               dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+                            " active_cb: cm_id %d PORT %d CONNECTED to %s!\n",\r
+                            conn->cm_id, ntohs(((struct sockaddr_in *)\r
+                                                &conn->cm_id->route.addr.\r
+                                                dst_addr)->sin_port),\r
+                            inet_ntoa(((struct sockaddr_in *)\r
+                                       &conn->cm_id->route.addr.dst_addr)->\r
+                                      sin_addr));\r
+\r
+               /* setup local and remote ports for ep query */\r
+               conn->ep->param.remote_port_qual =\r
+                   PORT_TO_SID(rdma_get_dst_port(conn->cm_id));\r
+               conn->ep->param.local_port_qual =\r
+                   PORT_TO_SID(rdma_get_src_port(conn->cm_id));\r
+\r
+               ib_cm_event = IB_CME_CONNECTED;\r
+               pdata = event->param.conn.private_data;\r
+               break;\r
+       case RDMA_CM_EVENT_DISCONNECTED:\r
+               dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+                            " active_cb: DISC EVENT - EP %p\n",conn->ep);\r
+               rdma_disconnect(conn->cm_id);   /* required for DREP */\r
+               ib_cm_event = IB_CME_DISCONNECTED;\r
+               /* validate EP handle */\r
+               if (DAPL_BAD_HANDLE(conn->ep, DAPL_MAGIC_EP))\r
+                       conn = NULL;\r
+               break;\r
+       default:\r
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR,\r
+                            " dapli_cm_active_cb_handler: Unexpected CM "\r
+                            "event %d on ID 0x%p\n", event->event,\r
+                            conn->cm_id);\r
+               conn = NULL;\r
+               break;\r
+       }\r
+\r
+       dapl_os_unlock(lock);\r
+       if (conn)\r
+               dapl_evd_connection_callback(conn, ib_cm_event, pdata, conn->ep);\r
+}\r
+\r
+static void dapli_cm_passive_cb(struct dapl_cm_id *conn,\r
+                               struct rdma_cm_event *event)\r
+{\r
+       ib_cm_events_t ib_cm_event;\r
+       struct dapl_cm_id *conn_recv = conn;\r
+       const void *pdata = NULL;\r
+       \r
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+                    " passive_cb: conn %p id %d event %d\n",\r
+                    conn, event->id, event->event);\r
+\r
+       dapl_os_lock(&conn->lock);\r
+\r
+       switch (event->event) {\r
+       case RDMA_CM_EVENT_CONNECT_REQUEST:\r
+               /* create new conn object with new conn_id from event */\r
+               conn_recv = dapli_req_recv(conn, event);\r
+               ib_cm_event = IB_CME_CONNECTION_REQUEST_PENDING;\r
+               pdata = event->param.conn.private_data;\r
+               break;\r
+       case RDMA_CM_EVENT_UNREACHABLE:\r
+       case RDMA_CM_EVENT_CONNECT_ERROR:\r
+               dapl_log(DAPL_DBG_TYPE_WARN,\r
+                        "dapl_cm_passive: CONN_ERR event=0x%x status=%d %s,"\r
+                        " DST %s,%d\n",\r
+                        event->event, event->status,\r
+                        (event->status == -ETIMEDOUT) ? "TIMEOUT" : "",\r
+                        inet_ntoa(((struct sockaddr_in *)\r
+                                   &conn->cm_id->route.addr.dst_addr)->\r
+                                  sin_addr), ntohs(((struct sockaddr_in *)\r
+                                                    &conn->cm_id->route.addr.\r
+                                                    dst_addr)->sin_port));\r
+               ib_cm_event = IB_CME_DESTINATION_UNREACHABLE;\r
+               break;\r
+       case RDMA_CM_EVENT_REJECTED:\r
+               /* will alwasys be abnormal NON-consumer from active side */\r
+               dapl_log(DAPL_DBG_TYPE_WARN,\r
+                        "dapl_cm_passive: non-consumer REJ, reason=%d,"\r
+                        " DST %s, %d\n",\r
+                        event->status,\r
+                        inet_ntoa(((struct sockaddr_in *)\r
+                                   &conn->cm_id->route.addr.dst_addr)->\r
+                                  sin_addr),\r
+                        ntohs(((struct sockaddr_in *)\r
+                               &conn->cm_id->route.addr.dst_addr)->\r
+                              sin_port));\r
+               ib_cm_event = IB_CME_DESTINATION_REJECT;\r
+               break;\r
+       case RDMA_CM_EVENT_ESTABLISHED:\r
+               dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+                            " passive_cb: cm_id %p PORT %d CONNECTED from 0x%x!\n",\r
+                            conn->cm_id, ntohs(((struct sockaddr_in *)\r
+                                                &conn->cm_id->route.addr.\r
+                                                src_addr)->sin_port),\r
+                            ntohl(((struct sockaddr_in *)\r
+                                   &conn->cm_id->route.addr.dst_addr)->\r
+                                  sin_addr.s_addr));\r
+               ib_cm_event = IB_CME_CONNECTED;\r
+               break;\r
+       case RDMA_CM_EVENT_DISCONNECTED:\r
+               rdma_disconnect(conn->cm_id);   /* required for DREP */\r
+               ib_cm_event = IB_CME_DISCONNECTED;\r
+               /* validate SP handle context */\r
+               if (DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_PSP) &&\r
+                   DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_RSP))\r
+                       conn_recv = NULL;\r
+               break;\r
+       default:\r
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR, " passive_cb: "\r
+                            "Unexpected CM event %d on ID 0x%p\n",\r
+                            event->event, conn->cm_id);\r
+               conn_recv = NULL;\r
+               break;\r
+       }\r
+\r
+       dapl_os_unlock(&conn->lock);\r
+       if (conn_recv)\r
+               dapls_cr_callback(conn_recv, ib_cm_event, pdata, conn_recv->sp);\r
+}\r
+\r
+/************************ DAPL provider entry points **********************/\r
+\r
+/*\r
+ * dapls_ib_connect\r
+ *\r
+ * Initiate a connection with the passive listener on another node\r
+ *\r
+ * Input:\r
+ *     ep_handle,\r
+ *     remote_ia_address,\r
+ *     remote_conn_qual,\r
+ *     prd_size                size of private data and structure\r
+ *     prd_prt                 pointer to private data structure\r
+ *\r
+ * Output:\r
+ *     none\r
+ *\r
+ * Returns:\r
+ *     DAT_SUCCESS\r
+ *     DAT_INSUFFICIENT_RESOURCES\r
+ *     DAT_INVALID_PARAMETER\r
+ *\r
+ */\r
+DAT_RETURN dapls_ib_connect(IN DAT_EP_HANDLE ep_handle,\r
+                           IN DAT_IA_ADDRESS_PTR r_addr,\r
+                           IN DAT_CONN_QUAL r_qual,\r
+                           IN DAT_COUNT p_size, IN void *p_data)\r
+{\r
+       struct dapl_ep *ep_ptr = ep_handle;\r
+       struct dapl_cm_id *conn = ep_ptr->cm_handle;\r
+       int ret;\r
+\r
+       /* Sanity check */\r
+       if (NULL == ep_ptr)\r
+               return DAT_SUCCESS;\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+                    " connect: rSID 0x%llx rPort %d, pdata %p, ln %d\n",\r
+                    r_qual, ntohs(SID_TO_PORT(r_qual)), p_data, p_size);\r
+\r
+       /* rdma conn and cm_id pre-bound; reference via ep_ptr->cm_handle */\r
+\r
+       /* Setup QP/CM parameters and private data in cm_id */\r
+       (void)dapl_os_memzero(&conn->params, sizeof(conn->params));\r
+       conn->params.responder_resources =\r
+           ep_ptr->param.ep_attr.max_rdma_read_in;\r
+       conn->params.initiator_depth = ep_ptr->param.ep_attr.max_rdma_read_out;\r
+       conn->params.flow_control = 1;\r
+       conn->params.rnr_retry_count = IB_RNR_RETRY_COUNT;\r
+       conn->params.retry_count = IB_RC_RETRY_COUNT;\r
+       if (p_size) {\r
+               dapl_os_memcpy(conn->p_data, p_data, p_size);\r
+               conn->params.private_data = conn->p_data;\r
+               conn->params.private_data_len = p_size;\r
+       }\r
+\r
+       /* copy in remote address, need a copy for retry attempts */\r
+       dapl_os_memcpy(&conn->r_addr, r_addr, sizeof(*r_addr));\r
+\r
+       /* Resolve remote address, src already bound during QP create */\r
+       ((struct sockaddr_in *)&conn->r_addr)->sin_port = SID_TO_PORT(r_qual);\r
+       ((struct sockaddr_in *)&conn->r_addr)->sin_family = AF_INET;\r
+\r
+       ret = rdma_resolve_addr(conn->cm_id, NULL,\r
+                               (struct sockaddr *)&conn->r_addr,\r
+                               conn->arp_timeout);\r
+       if (ret) {\r
+               dapl_log(DAPL_DBG_TYPE_ERR,\r
+                        " dapl_cma_connect: rdma_resolve_addr ERR 0x%x %s\n",\r
+                        ret, strerror(errno));\r
+               return dapl_convert_errno(errno, "ib_connect");\r
+       }\r
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+                    " connect: resolve_addr: cm_id %p -> %s port %d\n",\r
+                    conn->cm_id,\r
+                    inet_ntoa(((struct sockaddr_in *)&conn->r_addr)->sin_addr),\r
+                    ((struct sockaddr_in *)&conn->r_addr)->sin_port);\r
+\r
+       return DAT_SUCCESS;\r
+}\r
+\r
+/*\r
+ * dapls_ib_disconnect\r
+ *\r
+ * Disconnect an EP\r
+ *\r
+ * Input:\r
+ *     ep_handle,\r
+ *     disconnect_flags\r
+ *\r
+ * Output:\r
+ *     none\r
+ *\r
+ * Returns:\r
+ *     DAT_SUCCESS\r
+ *\r
+ */\r
+DAT_RETURN\r
+dapls_ib_disconnect(IN DAPL_EP * ep_ptr, IN DAT_CLOSE_FLAGS close_flags)\r
+{\r
+       dp_ib_cm_handle_t conn = ep_ptr->cm_handle;\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+                    " disconnect(ep %p, conn %p, id %d flags %x)\n",\r
+                    ep_ptr, conn, (conn ? conn->cm_id : 0), close_flags);\r
+\r
+       if ((conn == IB_INVALID_HANDLE) || (conn->cm_id == NULL))\r
+               return DAT_SUCCESS;\r
+\r
+       /* no graceful half-pipe disconnect option */\r
+       rdma_disconnect(conn->cm_id);\r
+\r
+       /* \r
+        * DAT event notification occurs from the callback\r
+        * Note: will fire even if DREQ goes unanswered on timeout \r
+        */\r
+       return DAT_SUCCESS;\r
+}\r
+\r
+/*\r
+ * dapls_ib_disconnect_clean\r
+ *\r
+ * Clean up outstanding connection data. This routine is invoked\r
+ * after the final disconnect callback has occurred. Only on the\r
+ * ACTIVE side of a connection.\r
+ *\r
+ * Input:\r
+ *     ep_ptr          DAPL_EP\r
+ *     active          Indicates active side of connection\r
+ *\r
+ * Output:\r
+ *     none\r
+ *\r
+ * Returns:\r
+ *     void\r
+ *\r
+ */\r
+void\r
+dapls_ib_disconnect_clean(IN DAPL_EP * ep_ptr,\r
+                         IN DAT_BOOLEAN active,\r
+                         IN const ib_cm_events_t ib_cm_event)\r
+{\r
+       /* nothing to do */\r
+       return;\r
+}\r
+\r
+/*\r
+ * dapl_ib_setup_conn_listener\r
+ *\r
+ * Have the CM set up a connection listener.\r
+ *\r
+ * Input:\r
+ *     ibm_hca_handle          HCA handle\r
+ *     qp_handle                       QP handle\r
+ *\r
+ * Output:\r
+ *     none\r
+ *\r
+ * Returns:\r
+ *     DAT_SUCCESS\r
+ *     DAT_INSUFFICIENT_RESOURCES\r
+ *     DAT_INTERNAL_ERROR\r
+ *     DAT_CONN_QUAL_UNAVAILBLE\r
+ *     DAT_CONN_QUAL_IN_USE\r
+ *\r
+ */\r
+DAT_RETURN\r
+dapls_ib_setup_conn_listener(IN DAPL_IA * ia_ptr,\r
+                            IN DAT_UINT64 ServiceID, IN DAPL_SP * sp_ptr)\r
+{\r
+       DAT_RETURN dat_status = DAT_SUCCESS;\r
+       ib_cm_srvc_handle_t conn;\r
+       DAT_SOCK_ADDR6 addr;    /* local binding address */\r
+\r
+       /* Allocate CM and initialize lock */\r
+       if ((conn = dapl_os_alloc(sizeof(*conn))) == NULL)\r
+               return DAT_INSUFFICIENT_RESOURCES;\r
+\r
+       dapl_os_memzero(conn, sizeof(*conn));\r
+       dapl_os_lock_init(&conn->lock);\r
+       conn->refs++;\r
+\r
+       /* create CM_ID, bind to local device, create QP */\r
+       if (rdma_create_id\r
+           (g_cm_events, &conn->cm_id, (void *)conn, RDMA_PS_TCP)) {\r
+               dapl_os_free(conn, sizeof(*conn));\r
+               return (dapl_convert_errno(errno, "setup_listener"));\r
+       }\r
+\r
+       /* open identifies the local device; per DAT specification */\r
+       /* Get family and address then set port to consumer's ServiceID */\r
+       dapl_os_memcpy(&addr, &ia_ptr->hca_ptr->hca_address, sizeof(addr));\r
+       ((struct sockaddr_in *)&addr)->sin_port = SID_TO_PORT(ServiceID);\r
+\r
+       if (rdma_bind_addr(conn->cm_id, (struct sockaddr *)&addr)) {\r
+               if ((errno == EBUSY) || (errno == EADDRINUSE) || \r
+                   (errno == EADDRNOTAVAIL))\r
+                       dat_status = DAT_CONN_QUAL_IN_USE;\r
+               else\r
+                       dat_status =\r
+                           dapl_convert_errno(errno, "setup_listener");\r
+               goto bail;\r
+       }\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+                    " listen(ia_ptr %p SID 0x%llx Port %d sp %p conn %p id %d)\n",\r
+                    ia_ptr, ServiceID, ntohs(SID_TO_PORT(ServiceID)),\r
+                    sp_ptr, conn, conn->cm_id);\r
+\r
+       sp_ptr->cm_srvc_handle = conn;\r
+       conn->sp = sp_ptr;\r
+       conn->hca = ia_ptr->hca_ptr;\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_EP,\r
+                    " listen(conn=%p cm_id=%d)\n",\r
+                    sp_ptr->cm_srvc_handle, conn->cm_id);\r
+\r
+       if (rdma_listen(conn->cm_id, 0)) {      /* max cma backlog */\r
+\r
+               if ((errno == EBUSY) || (errno == EADDRINUSE) ||\r
+                   (errno == EADDRNOTAVAIL))\r
+                       dat_status = DAT_CONN_QUAL_IN_USE;\r
+               else\r
+                       dat_status =\r
+                           dapl_convert_errno(errno, "setup_listener");\r
+               goto bail;\r
+       }\r
+\r
+       /* success */\r
+       return DAT_SUCCESS;\r
+\r
+      bail:\r
+       rdma_destroy_id(conn->cm_id);\r
+       dapl_os_free(conn, sizeof(*conn));\r
+       return dat_status;\r
+}\r
+\r
+/*\r
+ * dapl_ib_remove_conn_listener\r
+ *\r
+ * Have the CM remove a connection listener.\r
+ *\r
+ * Input:\r
+ *     ia_handle               IA handle\r
+ *     ServiceID               IB Channel Service ID\r
+ *\r
+ * Output:\r
+ *     none\r
+ *\r
+ * Returns:\r
+ *     DAT_SUCCESS\r
+ *     DAT_INVALID_STATE\r
+ *\r
+ */\r
+DAT_RETURN\r
+dapls_ib_remove_conn_listener(IN DAPL_IA * ia_ptr, IN DAPL_SP * sp_ptr)\r
+{\r
+       ib_cm_srvc_handle_t conn = sp_ptr->cm_srvc_handle;\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+                    " remove_listen(ia_ptr %p sp_ptr %p cm_ptr %p)\n",\r
+                    ia_ptr, sp_ptr, conn);\r
+\r
+       if (conn != IB_INVALID_HANDLE) {\r
+               sp_ptr->cm_srvc_handle = NULL;\r
+               dapls_ib_cm_free(conn, NULL);\r
+       }\r
+       return DAT_SUCCESS;\r
+}\r
+\r
+/*\r
+ * dapls_ib_accept_connection\r
+ *\r
+ * Perform necessary steps to accept a connection\r
+ *\r
+ * Input:\r
+ *     cr_handle\r
+ *     ep_handle\r
+ *     private_data_size\r
+ *     private_data\r
+ *\r
+ * Output:\r
+ *     none\r
+ *\r
+ * Returns:\r
+ *     DAT_SUCCESS\r
+ *     DAT_INSUFFICIENT_RESOURCES\r
+ *     DAT_INTERNAL_ERROR\r
+ *\r
+ */\r
+DAT_RETURN\r
+dapls_ib_accept_connection(IN DAT_CR_HANDLE cr_handle,\r
+                          IN DAT_EP_HANDLE ep_handle,\r
+                          IN DAT_COUNT p_size, IN const DAT_PVOID p_data)\r
+{\r
+       DAPL_CR *cr_ptr = (DAPL_CR *) cr_handle;\r
+       DAPL_EP *ep_ptr = (DAPL_EP *) ep_handle;\r
+       DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;\r
+       struct dapl_cm_id *cr_conn = cr_ptr->ib_cm_handle;\r
+       int ret;\r
+       DAT_RETURN dat_status;\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+                    " accept(cr %p conn %p, id %p, p_data %p, p_sz=%d)\n",\r
+                    cr_ptr, cr_conn, cr_conn->cm_id, p_data, p_size);\r
+\r
+       /* Obtain size of private data structure & contents */\r
+       if (p_size > IB_MAX_REP_PDATA_SIZE) {\r
+               dat_status = DAT_ERROR(DAT_LENGTH_ERROR, DAT_NO_SUBTYPE);\r
+               goto bail;\r
+       }\r
+\r
+       if (ep_ptr->qp_state == DAPL_QP_STATE_UNATTACHED) {\r
+               /* \r
+                * If we are lazy attaching the QP then we may need to\r
+                * hook it up here. Typically, we run this code only for\r
+                * DAT_PSP_PROVIDER_FLAG\r
+                */\r
+               dat_status = dapls_ib_qp_alloc(ia_ptr, ep_ptr, NULL);\r
+               if (dat_status != DAT_SUCCESS) {\r
+                       dapl_log(DAPL_DBG_TYPE_ERR,\r
+                                " dapl_cma_accept: qp_alloc ERR %d\n",\r
+                                dat_status);\r
+                       goto bail;\r
+               }\r
+       }\r
+\r
+       /* \r
+        * Validate device and port in EP cm_id against inbound \r
+        * CR cm_id. The pre-allocated EP cm_id is already bound to \r
+        * a local device (cm_id and QP) when created. Move the QP\r
+        * to the new cm_id only if device and port numbers match.\r
+        */\r
+       if (ep_ptr->cm_handle->cm_id->verbs == cr_conn->cm_id->verbs &&\r
+           ep_ptr->cm_handle->cm_id->port_num == cr_conn->cm_id->port_num) {\r
+               /* move QP to new cr_conn, remove QP ref in EP cm_id */\r
+               cr_conn->cm_id->qp = ep_ptr->cm_handle->cm_id->qp;\r
+               ep_ptr->cm_handle->cm_id->qp = NULL;\r
+               dapls_ib_cm_free(ep_ptr->cm_handle, NULL);\r
+       } else {\r
+               dapl_log(DAPL_DBG_TYPE_ERR,\r
+                        " dapl_cma_accept: ERR dev(%p!=%p) or"\r
+                        " port mismatch(%d!=%d)\n",\r
+                        ep_ptr->cm_handle->cm_id->verbs, cr_conn->cm_id->verbs,\r
+                        ntohs(ep_ptr->cm_handle->cm_id->port_num),\r
+                        ntohs(cr_conn->cm_id->port_num));\r
+               dat_status = DAT_INTERNAL_ERROR;\r
+               goto bail;\r
+       }\r
+\r
+       cr_ptr->param.local_ep_handle = ep_handle;\r
+       cr_conn->params.private_data = p_data;\r
+       cr_conn->params.private_data_len = p_size;\r
+\r
+       ret = rdma_accept(cr_conn->cm_id, &cr_conn->params);\r
+       if (ret) {\r
+               dapl_log(DAPL_DBG_TYPE_ERR, " dapl_cma_accept: ERR %d %s\n",\r
+                        ret, strerror(errno));\r
+               dat_status = dapl_convert_errno(ret, "accept");\r
+               goto bail;\r
+       }\r
+\r
+       /* save accepted conn and EP reference, qp_handle unchanged */\r
+       ep_ptr->cm_handle = cr_conn;\r
+       cr_conn->ep = ep_ptr;\r
+\r
+       /* setup local and remote ports for ep query */\r
+       /* Note: port qual in network order */\r
+       ep_ptr->param.remote_port_qual =\r
+           PORT_TO_SID(rdma_get_dst_port(cr_conn->cm_id));\r
+       ep_ptr->param.local_port_qual =\r
+           PORT_TO_SID(rdma_get_src_port(cr_conn->cm_id));\r
+\r
+       return DAT_SUCCESS;\r
+      bail:\r
+       rdma_reject(cr_conn->cm_id, NULL, 0);\r
+       dapls_ib_cm_free(cr_conn, NULL);\r
+       return dat_status;\r
+}\r
+\r
+/*\r
+ * dapls_ib_reject_connection\r
+ *\r
+ * Reject a connection\r
+ *\r
+ * Input:\r
+ *     cr_handle\r
+ *\r
+ * Output:\r
+ *     none\r
+ *\r
+ * Returns:\r
+ *     DAT_SUCCESS\r
+ *     DAT_INTERNAL_ERROR\r
+ *\r
+ */\r
+DAT_RETURN\r
+dapls_ib_reject_connection(IN dp_ib_cm_handle_t cm_handle,\r
+                          IN int reason,\r
+                          IN DAT_COUNT private_data_size,\r
+                          IN const DAT_PVOID private_data)\r
+{\r
+       int ret;\r
+       int offset = sizeof(struct dapl_pdata_hdr);\r
+       struct dapl_pdata_hdr pdata_hdr;\r
+\r
+       memset(&pdata_hdr, 0, sizeof pdata_hdr);\r
+       pdata_hdr.version = htonl((DAT_VERSION_MAJOR << 24) |\r
+                                 (DAT_VERSION_MINOR << 16) |\r
+                                 (VN_PROVIDER_MAJOR << 8) |\r
+                                 (VN_PROVIDER_MINOR));\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+                    " reject: handle %p reason %x, ver=%x, data %p, sz=%d\n",\r
+                    cm_handle, reason, ntohl(pdata_hdr.version),\r
+                    private_data, private_data_size);\r
+\r
+       if (cm_handle == IB_INVALID_HANDLE) {\r
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR,\r
+                            " reject: invalid handle: reason %d\n", reason);\r
+               return DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_CR);\r
+       }\r
+\r
+       if (private_data_size >\r
+           dapls_ib_private_data_size(NULL, DAPL_PDATA_CONN_REJ,\r
+                                      cm_handle->hca))\r
+               return DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG3);\r
+\r
+       /* setup pdata_hdr and users data, in CR pdata buffer */\r
+       dapl_os_memcpy(cm_handle->p_data, &pdata_hdr, offset);\r
+       if (private_data_size)\r
+               dapl_os_memcpy(cm_handle->p_data + offset,\r
+                              private_data, private_data_size);\r
+\r
+       /*\r
+        * Always some private data with reject so active peer can\r
+        * determine real application reject from an abnormal \r
+        * application termination\r
+        */\r
+       ret = rdma_reject(cm_handle->cm_id,\r
+                         cm_handle->p_data, offset + private_data_size);\r
+\r
+       dapls_ib_cm_free(cm_handle, NULL);\r
+       return dapl_convert_errno(ret, "reject");\r
+}\r
+\r
+/*\r
+ * dapls_ib_cm_remote_addr\r
+ *\r
+ * Obtain the remote IP address given a connection\r
+ *\r
+ * Input:\r
+ *     cr_handle\r
+ *\r
+ * Output:\r
+ *     remote_ia_address: where to place the remote address\r
+ *\r
+ * Returns:\r
+ *     DAT_SUCCESS\r
+ *     DAT_INVALID_HANDLE\r
+ *\r
+ */\r
+DAT_RETURN\r
+dapls_ib_cm_remote_addr(IN DAT_HANDLE dat_handle, OUT DAT_SOCK_ADDR6 * raddr)\r
+{\r
+       DAPL_HEADER *header;\r
+       dp_ib_cm_handle_t ib_cm_handle;\r
+       struct rdma_addr *ipaddr;\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_EP,\r
+                    " remote_addr(cm_handle=%p, r_addr=%p)\n",\r
+                    dat_handle, raddr);\r
+\r
+       header = (DAPL_HEADER *) dat_handle;\r
+\r
+       if (header->magic == DAPL_MAGIC_EP)\r
+               ib_cm_handle = ((DAPL_EP *) dat_handle)->cm_handle;\r
+       else if (header->magic == DAPL_MAGIC_CR)\r
+               ib_cm_handle = ((DAPL_CR *) dat_handle)->ib_cm_handle;\r
+       else\r
+               return DAT_INVALID_HANDLE;\r
+\r
+       /* get remote IP address from cm_id route */\r
+       ipaddr = &ib_cm_handle->cm_id->route.addr;\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+                    " remote_addr: conn %p id %p SRC %x DST %x PORT %d\n",\r
+                    ib_cm_handle, ib_cm_handle->cm_id,\r
+                    ntohl(((struct sockaddr_in *)\r
+                           &ipaddr->src_addr)->sin_addr.s_addr),\r
+                    ntohl(((struct sockaddr_in *)\r
+                           &ipaddr->dst_addr)->sin_addr.s_addr),\r
+                    ntohs(((struct sockaddr_in *)\r
+                           &ipaddr->dst_addr)->sin_port));\r
+\r
+       dapl_os_memcpy(raddr, &ipaddr->dst_addr, sizeof(DAT_SOCK_ADDR));\r
+       return DAT_SUCCESS;\r
+}\r
+\r
+/*\r
+ * dapls_ib_private_data_size\r
+ *\r
+ * Return the size of private data given a connection op type\r
+ *\r
+ * Input:\r
+ *     prd_ptr         private data pointer\r
+ *     conn_op         connection operation type\r
+ *      hca_ptr         hca pointer, needed for transport type\r
+ *\r
+ * If prd_ptr is NULL, this is a query for the max size supported by\r
+ * the provider, otherwise it is the actual size of the private data\r
+ * contained in prd_ptr.\r
+ *\r
+ *\r
+ * Output:\r
+ *     None\r
+ *\r
+ * Returns:\r
+ *     length of private data\r
+ *\r
+ */\r
+int dapls_ib_private_data_size(IN DAPL_PRIVATE * prd_ptr,\r
+                              IN DAPL_PDATA_OP conn_op, IN DAPL_HCA * hca_ptr)\r
+{\r
+       return RDMA_MAX_PRIVATE_DATA;\r
+}\r
+\r
+/*\r
+ * Map all CMA event codes to the DAT equivelent.\r
+ */\r
+#define DAPL_IB_EVENT_CNT      13\r
+\r
+static struct ib_cm_event_map {\r
+       const ib_cm_events_t ib_cm_event;\r
+       DAT_EVENT_NUMBER dat_event_num;\r
+} ib_cm_event_map[DAPL_IB_EVENT_CNT] = {\r
+       /* 00 */  {\r
+       IB_CME_CONNECTED, DAT_CONNECTION_EVENT_ESTABLISHED},\r
+           /* 01 */  {\r
+       IB_CME_DISCONNECTED, DAT_CONNECTION_EVENT_DISCONNECTED},\r
+           /* 02 */  {\r
+       IB_CME_DISCONNECTED_ON_LINK_DOWN,\r
+                   DAT_CONNECTION_EVENT_DISCONNECTED},\r
+           /* 03 */  {\r
+       IB_CME_CONNECTION_REQUEST_PENDING, DAT_CONNECTION_REQUEST_EVENT},\r
+           /* 04 */  {\r
+       IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,\r
+                   DAT_CONNECTION_REQUEST_EVENT},\r
+           /* 05 */  {\r
+       IB_CME_CONNECTION_REQUEST_ACKED, DAT_CONNECTION_REQUEST_EVENT},\r
+           /* 06 */  {\r
+       IB_CME_DESTINATION_REJECT,\r
+                   DAT_CONNECTION_EVENT_NON_PEER_REJECTED},\r
+           /* 07 */  {\r
+       IB_CME_DESTINATION_REJECT_PRIVATE_DATA,\r
+                   DAT_CONNECTION_EVENT_PEER_REJECTED},\r
+           /* 08 */  {\r
+       IB_CME_DESTINATION_UNREACHABLE, DAT_CONNECTION_EVENT_UNREACHABLE},\r
+           /* 09 */  {\r
+       IB_CME_TOO_MANY_CONNECTION_REQUESTS,\r
+                   DAT_CONNECTION_EVENT_NON_PEER_REJECTED},\r
+           /* 10 */  {\r
+       IB_CME_LOCAL_FAILURE, DAT_CONNECTION_EVENT_BROKEN},\r
+           /* 11 */  {\r
+       IB_CME_BROKEN, DAT_CONNECTION_EVENT_BROKEN},\r
+           /* 12 */  {\r
+IB_CME_TIMEOUT, DAT_CONNECTION_EVENT_TIMED_OUT},};\r
+\r
+/*\r
+ * dapls_ib_get_cm_event\r
+ *\r
+ * Return a DAT connection event given a provider CM event.\r
+ *\r
+ * Input:\r
+ *     dat_event_num   DAT event we need an equivelent CM event for\r
+ *\r
+ * Output:\r
+ *     none\r
+ *\r
+ * Returns:\r
+ *     ib_cm_event of translated DAPL value\r
+ */\r
+DAT_EVENT_NUMBER\r
+dapls_ib_get_dat_event(IN const ib_cm_events_t ib_cm_event,\r
+                      IN DAT_BOOLEAN active)\r
+{\r
+       DAT_EVENT_NUMBER dat_event_num;\r
+       int i;\r
+\r
+       active = active;\r
+\r
+       dat_event_num = 0;\r
+       for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {\r
+               if (ib_cm_event == ib_cm_event_map[i].ib_cm_event) {\r
+                       dat_event_num = ib_cm_event_map[i].dat_event_num;\r
+                       break;\r
+               }\r
+       }\r
+       dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,\r
+                    "dapls_ib_get_dat_event: event(%s) ib=0x%x dat=0x%x\n",\r
+                    active ? "active" : "passive", ib_cm_event, dat_event_num);\r
+\r
+       return dat_event_num;\r
+}\r
+\r
+/*\r
+ * dapls_ib_get_dat_event\r
+ *\r
+ * Return a DAT connection event given a provider CM event.\r
+ * \r
+ * Input:\r
+ *     ib_cm_event     event provided to the dapl callback routine\r
+ *     active          switch indicating active or passive connection\r
+ *\r
+ * Output:\r
+ *     none\r
+ *\r
+ * Returns:\r
+ *     DAT_EVENT_NUMBER of translated provider value\r
+ */\r
+ib_cm_events_t dapls_ib_get_cm_event(IN DAT_EVENT_NUMBER dat_event_num)\r
+{\r
+       ib_cm_events_t ib_cm_event;\r
+       int i;\r
+\r
+       ib_cm_event = 0;\r
+       for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {\r
+               if (dat_event_num == ib_cm_event_map[i].dat_event_num) {\r
+                       ib_cm_event = ib_cm_event_map[i].ib_cm_event;\r
+                       break;\r
+               }\r
+       }\r
+       return ib_cm_event;\r
+}\r
+\r
+void dapli_cma_event_cb(void)\r
+{\r
+       struct rdma_cm_event *event;\r
+                               \r
+       /* process one CM event, fairness, non-blocking */\r
+       if (!rdma_get_cm_event(g_cm_events, &event)) {\r
+               struct dapl_cm_id *conn;\r
+\r
+               /* set proper conn from cm_id context */\r
+               if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST)\r
+                       conn = (struct dapl_cm_id *)event->listen_id->context;\r
+               else\r
+                       conn = (struct dapl_cm_id *)event->id->context;\r
+\r
+               dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+                            " cm_event: EVENT=%d ID=%p LID=%p CTX=%p\n",\r
+                            event->event, event->id, event->listen_id, conn);\r
+               \r
+               /* cm_free is blocked waiting for ack  */\r
+               dapl_os_lock(&conn->lock);\r
+               if (!conn->refs) {\r
+                       dapl_os_unlock(&conn->lock);\r
+                       rdma_ack_cm_event(event);\r
+                       return;\r
+               }\r
+               conn->refs++;\r
+               dapl_os_unlock(&conn->lock);\r
+\r
+               switch (event->event) {\r
+               case RDMA_CM_EVENT_ADDR_RESOLVED:\r
+                       dapli_addr_resolve(conn);\r
+                       break;\r
+\r
+               case RDMA_CM_EVENT_ROUTE_RESOLVED:\r
+                       dapli_route_resolve(conn);\r
+                       break;\r
+\r
+               case RDMA_CM_EVENT_ADDR_ERROR:\r
+                       dapl_log(DAPL_DBG_TYPE_WARN,\r
+                                "dapl_cma_active: CM ADDR ERROR: ->"\r
+                                " DST %s retry (%d)..\n",\r
+                                inet_ntoa(((struct sockaddr_in *)\r
+                                           &conn->r_addr)->sin_addr),\r
+                                conn->arp_retries);\r
+\r
+                       /* retry address resolution */\r
+                       if ((--conn->arp_retries) &&\r
+                           (event->status == -ETIMEDOUT)) {\r
+                               int ret;\r
+                               ret = rdma_resolve_addr(conn->cm_id, NULL,\r
+                                                       (struct sockaddr *)\r
+                                                       &conn->r_addr,\r
+                                                       conn->arp_timeout);\r
+                               if (!ret)\r
+                                       break;\r
+                               else {\r
+                                       dapl_dbg_log(DAPL_DBG_TYPE_WARN,\r
+                                                    " ERROR: rdma_resolve_addr = "\r
+                                                    "%d %s\n",\r
+                                                    ret, strerror(errno));\r
+                               }\r
+                       }\r
+                       /* retries exhausted or resolve_addr failed */\r
+                       dapl_log(DAPL_DBG_TYPE_ERR,\r
+                                "dapl_cma_active: ARP_ERR, retries(%d)"\r
+                                " exhausted -> DST %s,%d\n",\r
+                                IB_ARP_RETRY_COUNT,\r
+                                inet_ntoa(((struct sockaddr_in *)\r
+                                           &conn->cm_id->route.addr.dst_addr)->\r
+                                          sin_addr),\r
+                                ntohs(((struct sockaddr_in *)\r
+                                       &conn->cm_id->route.addr.dst_addr)->\r
+                                      sin_port));\r
+\r
+                       dapl_evd_connection_callback(conn,\r
+                                                    IB_CME_DESTINATION_UNREACHABLE,\r
+                                                    NULL, conn->ep);\r
+                       break;\r
+\r
+               case RDMA_CM_EVENT_ROUTE_ERROR:\r
+                       dapl_log(DAPL_DBG_TYPE_WARN,\r
+                                "dapl_cma_active: CM ROUTE ERROR: ->"\r
+                                " DST %s retry (%d)..\n",\r
+                                inet_ntoa(((struct sockaddr_in *)\r
+                                           &conn->r_addr)->sin_addr),\r
+                                conn->route_retries);\r
+\r
+                       /* retry route resolution */\r
+                       if ((--conn->route_retries) &&\r
+                           (event->status == -ETIMEDOUT))\r
+                               dapli_addr_resolve(conn);\r
+                       else {\r
+                               dapl_log(DAPL_DBG_TYPE_ERR,\r
+                                        "dapl_cma_active: PATH_RECORD_ERR,"\r
+                                        " retries(%d) exhausted, DST %s,%d\n",\r
+                                        IB_ROUTE_RETRY_COUNT,\r
+                                        inet_ntoa(((struct sockaddr_in *)\r
+                                                   &conn->cm_id->route.addr.\r
+                                                   dst_addr)->sin_addr),\r
+                                        ntohs(((struct sockaddr_in *)\r
+                                               &conn->cm_id->route.addr.\r
+                                               dst_addr)->sin_port));\r
+\r
+                               dapl_evd_connection_callback(conn,\r
+                                                            IB_CME_DESTINATION_UNREACHABLE,\r
+                                                            NULL, conn->ep);\r
+                       }\r
+                       break;\r
+\r
+               case RDMA_CM_EVENT_DEVICE_REMOVAL:\r
+                       dapl_evd_connection_callback(conn,\r
+                                                    IB_CME_LOCAL_FAILURE,\r
+                                                    NULL, conn->ep);\r
+                       break;\r
+               case RDMA_CM_EVENT_CONNECT_REQUEST:\r
+               case RDMA_CM_EVENT_CONNECT_ERROR:\r
+               case RDMA_CM_EVENT_UNREACHABLE:\r
+               case RDMA_CM_EVENT_REJECTED:\r
+               case RDMA_CM_EVENT_ESTABLISHED:\r
+               case RDMA_CM_EVENT_DISCONNECTED:\r
+                       /* passive or active */\r
+                       if (conn->sp)\r
+                               dapli_cm_passive_cb(conn, event);\r
+                       else\r
+                               dapli_cm_active_cb(conn, event);\r
+                       break;\r
+               case RDMA_CM_EVENT_CONNECT_RESPONSE:\r
+#ifdef RDMA_CM_EVENT_TIMEWAIT_EXIT\r
+               case RDMA_CM_EVENT_TIMEWAIT_EXIT:\r
+#endif\r
+                       break;\r
+               default:\r
+                       dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+                                    " cm_event: UNEXPECTED EVENT=%p ID=%p CTX=%p\n",\r
+                                    event->event, event->id,\r
+                                    event->id->context);\r
+                       break;\r
+               }\r
+               \r
+               /* ack event, unblocks destroy_cm_id in consumer threads */\r
+               rdma_ack_cm_event(event);\r
+\r
+               dapl_os_lock(&conn->lock);\r
+                conn->refs--;\r
+               dapl_os_unlock(&conn->lock);\r
+       } \r
+}\r
+\r
+/*\r
+ * Local variables:\r
+ *  c-indent-level: 4\r
+ *  c-basic-offset: 4\r
+ *  tab-width: 8\r
+ * End:\r
+ */\r
index e9ec7334ab12377770a29ec77e106ee7a260d2cf..32090fbef3c7bf97b695130a848a1de680394f71 100644 (file)
-/*
- * Copyright (c) 2005-2008 Intel Corporation.  All rights reserved.
- *
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- *    copy of which is available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/**********************************************************************
- * 
- * MODULE: dapl_ib_util.c
- *
- * PURPOSE: OFED provider - init, open, close, utilities, work thread
- *
- * $Id:$
- *
- **********************************************************************/
-
-#ifdef RCSID
-static const char rcsid[] = "$Id:  $";
-#endif
-
-#include "openib_osd.h"
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-#include "dapl_ib_util.h"
-#include "dapl_osd.h"
-
-#include <stdlib.h>
-
-struct rdma_event_channel *g_cm_events = NULL;
-ib_thread_state_t g_ib_thread_state = 0;
-DAPL_OS_THREAD g_ib_thread;
-DAPL_OS_LOCK g_hca_lock;
-struct dapl_llist_entry *g_hca_list;
-
-#if defined(_WIN64) || defined(_WIN32)
-#include "..\..\..\..\..\etc\user\comp_channel.cpp"
-#include <rdma\winverbs.h>
-
-static COMP_SET ufds;
-
-static int getipaddr_netdev(char *name, char *addr, int addr_len)
-{
-       IWVProvider *prov;
-       WV_DEVICE_ADDRESS devaddr;
-       struct addrinfo *res, *ai;
-       HRESULT hr;
-       int index;
-
-       if (strncmp(name, "rdma_dev", 8)) {
-               return EINVAL;
-       }
-
-       index = atoi(name + 8);
-
-       hr = WvGetObject(&IID_IWVProvider, (LPVOID *) &prov);
-       if (FAILED(hr)) {
-               return hr;
-       }
-
-       hr = getaddrinfo("..localmachine", NULL, NULL, &res);
-       if (hr) {
-               goto release;
-       }
-
-       for (ai = res; ai; ai = ai->ai_next) {
-               hr = prov->lpVtbl->TranslateAddress(prov, ai->ai_addr, &devaddr);
-               if (SUCCEEDED(hr) && (ai->ai_addrlen <= addr_len) && (index-- == 0)) {
-                       memcpy(addr, ai->ai_addr, ai->ai_addrlen);
-                       goto free;
-               }
-       }
-       hr = ENODEV;
-
-free:
-       freeaddrinfo(res);
-release:
-       prov->lpVtbl->Release(prov);
-       return hr;
-}
-
-static int dapls_os_init(void)
-{
-       return CompSetInit(&ufds);
-}
-
-static void dapls_os_release(void)
-{
-       CompSetCleanup(&ufds);
-}
-
-static int dapls_config_cm_channel(struct rdma_event_channel *channel)
-{
-       channel->channel.Milliseconds = 0;
-       return 0;
-}
-
-static int dapls_config_verbs(struct ibv_context *verbs)
-{
-       verbs->channel.Milliseconds = 0;
-       return 0;
-}
-
-static int dapls_config_comp_channel(struct ibv_comp_channel *channel)
-{
-       channel->comp_channel.Milliseconds = 0;
-       return 0;
-}
-
-static int dapls_thread_signal(void)
-{
-       CompSetCancel(&ufds);
-       return 0;
-}
-#else                          // _WIN64 || WIN32
-int g_ib_pipe[2];
-
-static int dapls_os_init(void)
-{
-       /* create pipe for waking up work thread */
-       return pipe(g_ib_pipe);
-}
-
-static void dapls_os_release(void)
-{
-       /* close pipe? */
-}
-
-/* Get IP address using network device name */
-static int getipaddr_netdev(char *name, char *addr, int addr_len)
-{
-       struct ifreq ifr;
-       int skfd, ret, len;
-
-       /* Fill in the structure */
-       snprintf(ifr.ifr_name, IFNAMSIZ, "%s", name);
-       ifr.ifr_hwaddr.sa_family = ARPHRD_INFINIBAND;
-
-       /* Create a socket fd */
-       skfd = socket(PF_INET, SOCK_STREAM, 0);
-       ret = ioctl(skfd, SIOCGIFADDR, &ifr);
-       if (ret)
-               goto bail;
-
-       switch (ifr.ifr_addr.sa_family) {
-#ifdef AF_INET6
-       case AF_INET6:
-               len = sizeof(struct sockaddr_in6);
-               break;
-#endif
-       case AF_INET:
-       default:
-               len = sizeof(struct sockaddr);
-               break;
-       }
-
-       if (len <= addr_len)
-               memcpy(addr, &ifr.ifr_addr, len);
-       else
-               ret = EINVAL;
-
-      bail:
-       close(skfd);
-       return ret;
-}
-
-static int dapls_config_fd(int fd)
-{
-       int opts;
-
-       opts = fcntl(fd, F_GETFL);
-       if (opts < 0 || fcntl(fd, F_SETFL, opts | O_NONBLOCK) < 0) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " dapls_config_fd: fcntl on fd %d ERR %d %s\n",
-                        fd, opts, strerror(errno));
-               return errno;
-       }
-
-       return 0;
-}
-
-static int dapls_config_cm_channel(struct rdma_event_channel *channel)
-{
-       return dapls_config_fd(channel->fd);
-}
-
-static int dapls_config_verbs(struct ibv_context *verbs)
-{
-       return dapls_config_fd(verbs->async_fd);
-}
-
-static int dapls_config_comp_channel(struct ibv_comp_channel *channel)
-{
-       return dapls_config_fd(channel->fd);
-}
-
-static int dapls_thread_signal(void)
-{
-       return write(g_ib_pipe[1], "w", sizeof "w");
-}
-#endif
-
-/* Get IP address using network name, address, or device name */
-static int getipaddr(char *name, char *addr, int len)
-{
-       struct addrinfo *res;
-
-       /* assume netdev for first attempt, then network and address type */
-       if (getipaddr_netdev(name, addr, len)) {
-               if (getaddrinfo(name, NULL, NULL, &res)) {
-                       dapl_log(DAPL_DBG_TYPE_ERR,
-                                " open_hca: getaddr_netdev ERROR:"
-                                " %s. Is %s configured?\n",
-                                strerror(errno), name);
-                       return 1;
-               } else {
-                       if (len >= res->ai_addrlen)
-                               memcpy(addr, res->ai_addr, res->ai_addrlen);
-                       else {
-                               freeaddrinfo(res);
-                               return 1;
-                       }
-                       freeaddrinfo(res);
-               }
-       }
-
-       dapl_dbg_log(
-               DAPL_DBG_TYPE_UTIL,
-               " getipaddr: family %d port %d addr %d.%d.%d.%d\n",
-               ((struct sockaddr_in *)addr)->sin_family,
-               ((struct sockaddr_in *)addr)->sin_port,
-               ((struct sockaddr_in *)addr)->sin_addr.s_addr >> 0 & 0xff,
-               ((struct sockaddr_in *)addr)->sin_addr.s_addr >> 8 & 0xff,
-               ((struct sockaddr_in *)addr)->sin_addr.s_addr >> 16 & 0xff,
-               ((struct sockaddr_in *)addr)->sin_addr.
-                s_addr >> 24 & 0xff);
-
-       return 0;
-}
-
-/*
- * dapls_ib_init, dapls_ib_release
- *
- * Initialize Verb related items for device open
- *
- * Input:
- *     none
- *
- * Output:
- *     none
- *
- * Returns:
- *     0 success, -1 error
- *
- */
-int32_t dapls_ib_init(void)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " dapl_ib_init: \n");
-
-       /* initialize hca_list lock */
-       dapl_os_lock_init(&g_hca_lock);
-
-       /* initialize hca list for CQ events */
-       dapl_llist_init_head(&g_hca_list);
-
-       if (dapls_os_init())
-               return 1;
-
-       return 0;
-}
-
-int32_t dapls_ib_release(void)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " dapl_ib_release: \n");
-       dapli_ib_thread_destroy();
-       if (g_cm_events != NULL)
-               rdma_destroy_event_channel(g_cm_events);
-       dapls_os_release();
-       return 0;
-}
-
-/*
- * dapls_ib_open_hca
- *
- * Open HCA
- *
- * Input:
- *      *hca_name         pointer to provider device name
- *      *ib_hca_handle_p  pointer to provide HCA handle
- *
- * Output:
- *      none
- *
- * Return:
- *      DAT_SUCCESS
- *      dapl_convert_errno
- *
- */
-DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, IN DAPL_HCA * hca_ptr)
-{
-       struct rdma_cm_id *cm_id = NULL;
-       union ibv_gid *gid;
-       int ret;
-       DAT_RETURN dat_status;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " open_hca: %s - %p\n", hca_name, hca_ptr);
-
-       /* Setup the global cm event channel */
-       dapl_os_lock(&g_hca_lock);
-       if (g_cm_events == NULL) {
-               g_cm_events = rdma_create_event_channel();
-               if (g_cm_events == NULL) {
-                       dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                                    " open_hca: ERR - RDMA channel %s\n",
-                                    strerror(errno));
-                       dapl_os_unlock(&g_hca_lock);
-                       return DAT_INTERNAL_ERROR;
-               }
-       }
-       dapl_os_unlock(&g_hca_lock);
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " open_hca: RDMA channel created (%p)\n", g_cm_events);
-
-       /* HCA name will be hostname or IP address */
-       if (getipaddr((char *)hca_name,
-                     (char *)&hca_ptr->hca_address, 
-                     sizeof(DAT_SOCK_ADDR6)))
-               return DAT_INVALID_ADDRESS;
-
-       /* cm_id will bind local device/GID based on IP address */
-       if (rdma_create_id(g_cm_events, &cm_id, 
-                          (void *)hca_ptr, RDMA_PS_TCP)) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " open_hca: rdma_create ERR %s\n", strerror(errno));
-               return DAT_INTERNAL_ERROR;
-       }
-       ret = rdma_bind_addr(cm_id, (struct sockaddr *)&hca_ptr->hca_address);
-       if ((ret) || (cm_id->verbs == NULL)) {
-               rdma_destroy_id(cm_id);
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " open_hca: rdma_bind ERR %s."
-                        " Is %s configured?\n", strerror(errno), hca_name);
-               rdma_destroy_id(cm_id);
-               return DAT_INVALID_ADDRESS;
-       }
-
-       /* keep reference to IB device and cm_id */
-       hca_ptr->ib_trans.cm_id = cm_id;
-       hca_ptr->ib_hca_handle = cm_id->verbs;
-       dapls_config_verbs(cm_id->verbs);
-       hca_ptr->port_num = cm_id->port_num;
-       hca_ptr->ib_trans.ib_dev = cm_id->verbs->device;
-       hca_ptr->ib_trans.ib_ctx = cm_id->verbs;
-       gid = &cm_id->route.addr.addr.ibaddr.sgid;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " open_hca: ctx=%p port=%d GID subnet %016llx"
-                    " id %016llx\n", cm_id->verbs, cm_id->port_num,
-                    (unsigned long long)ntohll(gid->global.subnet_prefix),
-                    (unsigned long long)ntohll(gid->global.interface_id));
-
-       /* support for EVD's with CNO's: one channel via thread */
-       hca_ptr->ib_trans.ib_cq =
-           ibv_create_comp_channel(hca_ptr->ib_hca_handle);
-       if (hca_ptr->ib_trans.ib_cq == NULL) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " open_hca: ibv_create_comp_channel ERR %s\n",
-                        strerror(errno));
-               rdma_destroy_id(cm_id);
-               return DAT_INTERNAL_ERROR;
-       }
-       if (dapls_config_comp_channel(hca_ptr->ib_trans.ib_cq)) {
-               rdma_destroy_id(cm_id);
-               return DAT_INTERNAL_ERROR;
-       }
-
-       /* set inline max with env or default, get local lid and gid 0 */
-       if (hca_ptr->ib_hca_handle->device->transport_type
-           == IBV_TRANSPORT_IWARP)
-               hca_ptr->ib_trans.max_inline_send =
-                   dapl_os_get_env_val("DAPL_MAX_INLINE",
-                                       INLINE_SEND_IWARP_DEFAULT);
-       else
-               hca_ptr->ib_trans.max_inline_send =
-                   dapl_os_get_env_val("DAPL_MAX_INLINE",
-                                       INLINE_SEND_IB_DEFAULT);
-
-       /* set CM timer defaults */
-       hca_ptr->ib_trans.max_cm_timeout =
-           dapl_os_get_env_val("DAPL_MAX_CM_RESPONSE_TIME",
-                               IB_CM_RESPONSE_TIMEOUT);
-       hca_ptr->ib_trans.max_cm_retries =
-           dapl_os_get_env_val("DAPL_MAX_CM_RETRIES", IB_CM_RETRIES);
-       
-       /* set default IB MTU */
-       hca_ptr->ib_trans.mtu = dapl_ib_mtu(2048);
-
-       dat_status = dapli_ib_thread_init();
-       if (dat_status != DAT_SUCCESS)
-               return dat_status;
-       /* 
-        * Put new hca_transport on list for async and CQ event processing 
-        * Wakeup work thread to add to polling list
-        */
-       dapl_llist_init_entry((DAPL_LLIST_ENTRY *) &hca_ptr->ib_trans.entry);
-       dapl_os_lock(&g_hca_lock);
-       dapl_llist_add_tail(&g_hca_list,
-                           (DAPL_LLIST_ENTRY *) &hca_ptr->ib_trans.entry,
-                           &hca_ptr->ib_trans.entry);
-       if (dapls_thread_signal() == -1)
-               dapl_log(DAPL_DBG_TYPE_UTIL,
-                        " open_hca: thread wakeup error = %s\n",
-                        strerror(errno));
-       dapl_os_unlock(&g_hca_lock);
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " open_hca: %s, %s %d.%d.%d.%d INLINE_MAX=%d\n", hca_name,
-                    ((struct sockaddr_in *)
-                    &hca_ptr->hca_address)->sin_family == AF_INET ?
-                    "AF_INET" : "AF_INET6", 
-                    ((struct sockaddr_in *)
-                    &hca_ptr->hca_address)->sin_addr.s_addr >> 0 & 0xff, 
-                    ((struct sockaddr_in *)
-                    &hca_ptr->hca_address)->sin_addr.s_addr >> 8 & 0xff, 
-                    ((struct sockaddr_in *)
-                    &hca_ptr->hca_address)->sin_addr.s_addr >> 16 & 0xff, 
-                    ((struct sockaddr_in *)
-                    &hca_ptr->hca_address)->sin_addr.s_addr >> 24 & 0xff, 
-                    hca_ptr->ib_trans.max_inline_send);
-
-       return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_close_hca
- *
- * Open HCA
- *
- * Input:
- *      DAPL_HCA   provide CA handle
- *
- * Output:
- *      none
- *
- * Return:
- *      DAT_SUCCESS
- *     dapl_convert_errno 
- *
- */
-DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA * hca_ptr)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " close_hca: %p->%p\n",
-                    hca_ptr, hca_ptr->ib_hca_handle);
-
-       if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) {
-               if (rdma_destroy_id(hca_ptr->ib_trans.cm_id))
-                       return (dapl_convert_errno(errno, "ib_close_device"));
-               hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
-       }
-
-       dapl_os_lock(&g_hca_lock);
-       if (g_ib_thread_state != IB_THREAD_RUN) {
-               dapl_os_unlock(&g_hca_lock);
-               goto bail;
-       }
-       dapl_os_unlock(&g_hca_lock);
-
-       /* 
-        * Remove hca from async event processing list
-        * Wakeup work thread to remove from polling list
-        */
-       hca_ptr->ib_trans.destroy = 1;
-       if (dapls_thread_signal() == -1)
-               dapl_log(DAPL_DBG_TYPE_UTIL,
-                        " destroy: thread wakeup error = %s\n",
-                        strerror(errno));
-
-       /* wait for thread to remove HCA references */
-       while (hca_ptr->ib_trans.destroy != 2) {
-               if (dapls_thread_signal() == -1)
-                       dapl_log(DAPL_DBG_TYPE_UTIL,
-                                " destroy: thread wakeup error = %s\n",
-                                strerror(errno));
-               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                            " ib_thread_destroy: wait on hca %p destroy\n");
-               dapl_os_sleep_usec(1000);
-       }
-bail:
-       return (DAT_SUCCESS);
-}
-
-
-DAT_RETURN dapli_ib_thread_init(void)
-{
-       DAT_RETURN dat_status;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " ib_thread_init(%d)\n", dapl_os_getpid());
-
-       dapl_os_lock(&g_hca_lock);
-       if (g_ib_thread_state != IB_THREAD_INIT) {
-               dapl_os_unlock(&g_hca_lock);
-               return DAT_SUCCESS;
-       }
-
-       /* uCMA events non-blocking */
-       if (dapls_config_cm_channel(g_cm_events)) {
-               dapl_os_unlock(&g_hca_lock);
-               return (dapl_convert_errno(errno, "create_thread ERR: cm_fd"));
-       }
-
-       g_ib_thread_state = IB_THREAD_CREATE;
-       dapl_os_unlock(&g_hca_lock);
-
-       /* create thread to process inbound connect request */
-       dat_status = dapl_os_thread_create(dapli_thread, NULL, &g_ib_thread);
-       if (dat_status != DAT_SUCCESS)
-               return (dapl_convert_errno(errno,
-                                          "create_thread ERR:"
-                                          " check resource limits"));
-
-       /* wait for thread to start */
-       dapl_os_lock(&g_hca_lock);
-       while (g_ib_thread_state != IB_THREAD_RUN) {
-               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                            " ib_thread_init: waiting for ib_thread\n");
-               dapl_os_unlock(&g_hca_lock);
-               dapl_os_sleep_usec(1000);
-               dapl_os_lock(&g_hca_lock);
-       }
-       dapl_os_unlock(&g_hca_lock);
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " ib_thread_init(%d) exit\n", dapl_os_getpid());
-
-       return DAT_SUCCESS;
-}
-
-void dapli_ib_thread_destroy(void)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " ib_thread_destroy(%d)\n", dapl_os_getpid());
-       /* 
-        * wait for async thread to terminate. 
-        * pthread_join would be the correct method
-        * but some applications have some issues
-        */
-
-       /* destroy ib_thread, wait for termination, if not already */
-       dapl_os_lock(&g_hca_lock);
-       if (g_ib_thread_state != IB_THREAD_RUN)
-               goto bail;
-
-       g_ib_thread_state = IB_THREAD_CANCEL;
-       while ((g_ib_thread_state != IB_THREAD_EXIT)) {
-               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                            " ib_thread_destroy: waiting for ib_thread\n");
-               if (dapls_thread_signal() == -1)
-                       dapl_log(DAPL_DBG_TYPE_UTIL,
-                                " destroy: thread wakeup error = %s\n",
-                                strerror(errno));
-               dapl_os_unlock(&g_hca_lock);
-               dapl_os_sleep_usec(2000);
-               dapl_os_lock(&g_hca_lock);
-       }
-bail:
-       dapl_os_unlock(&g_hca_lock);
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " ib_thread_destroy(%d) exit\n", dapl_os_getpid());
-}
-
-#if defined(_WIN64) || defined(_WIN32)
-/* work thread for uAT, uCM, CQ, and async events */
-void dapli_thread(void *arg)
-{
-       struct _ib_hca_transport *hca;
-       struct _ib_hca_transport *uhca[8];
-       COMP_CHANNEL *channel;
-       int ret, idx, cnt;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread(%d,0x%x): ENTER: \n",
-                    dapl_os_getpid(), g_ib_thread);
-
-       dapl_os_lock(&g_hca_lock);
-       for (g_ib_thread_state = IB_THREAD_RUN;
-            g_ib_thread_state == IB_THREAD_RUN; 
-            dapl_os_lock(&g_hca_lock)) {
-
-               CompSetZero(&ufds);
-               CompSetAdd(&g_cm_events->channel, &ufds);
-
-               idx = 0;
-               hca = dapl_llist_is_empty(&g_hca_list) ? NULL :
-                     dapl_llist_peek_head(&g_hca_list);
-
-               while (hca) {
-                       CompSetAdd(&hca->ib_ctx->channel, &ufds);
-                       CompSetAdd(&hca->ib_cq->comp_channel, &ufds);
-                       uhca[idx++] = hca;
-                       hca = dapl_llist_next_entry(&g_hca_list,
-                                                   (DAPL_LLIST_ENTRY *)
-                                                   &hca->entry);
-               }
-               cnt = idx;
-
-               dapl_os_unlock(&g_hca_lock);
-               ret = CompSetPoll(&ufds, INFINITE);
-
-               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                            " ib_thread(%d) poll_event 0x%x\n",
-                            dapl_os_getpid(), ret);
-
-               dapli_cma_event_cb();
-
-               /* check and process ASYNC events, per device */
-               for (idx = 0; idx < cnt; idx++) {
-                       if (uhca[idx]->destroy == 1) {
-                               dapl_os_lock(&g_hca_lock);
-                               dapl_llist_remove_entry(&g_hca_list,
-                                                       (DAPL_LLIST_ENTRY *)
-                                                       &uhca[idx]->entry);
-                               dapl_os_unlock(&g_hca_lock);
-                               uhca[idx]->destroy = 2;
-                       } else {
-                               dapli_cq_event_cb(uhca[idx]);
-                               dapli_async_event_cb(uhca[idx]);
-                       }
-               }
-       }
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread(%d) EXIT\n",
-                    dapl_os_getpid());
-       g_ib_thread_state = IB_THREAD_EXIT;
-       dapl_os_unlock(&g_hca_lock);
-}
-#else                          // _WIN64 || WIN32
-
-/* work thread for uAT, uCM, CQ, and async events */
-void dapli_thread(void *arg)
-{
-       struct pollfd ufds[__FD_SETSIZE];
-       struct _ib_hca_transport *uhca[__FD_SETSIZE] = { NULL };
-       struct _ib_hca_transport *hca;
-       int ret, idx, fds;
-       char rbuf[2];
-
-       dapl_dbg_log(DAPL_DBG_TYPE_THREAD,
-                    " ib_thread(%d,0x%x): ENTER: pipe %d ucma %d\n",
-                    dapl_os_getpid(), g_ib_thread, g_ib_pipe[0],
-                    g_cm_events->fd);
-
-       /* Poll across pipe, CM, AT never changes */
-       dapl_os_lock(&g_hca_lock);
-       g_ib_thread_state = IB_THREAD_RUN;
-
-       ufds[0].fd = g_ib_pipe[0];      /* pipe */
-       ufds[0].events = POLLIN;
-       ufds[1].fd = g_cm_events->fd;   /* uCMA */
-       ufds[1].events = POLLIN;
-
-       while (g_ib_thread_state == IB_THREAD_RUN) {
-
-               /* build ufds after pipe and uCMA events */
-               ufds[0].revents = 0;
-               ufds[1].revents = 0;
-               idx = 1;
-
-               /*  Walk HCA list and setup async and CQ events */
-               if (!dapl_llist_is_empty(&g_hca_list))
-                       hca = dapl_llist_peek_head(&g_hca_list);
-               else
-                       hca = NULL;
-
-               while (hca) {
-
-                       /* uASYNC events */
-                       ufds[++idx].fd = hca->ib_ctx->async_fd;
-                       ufds[idx].events = POLLIN;
-                       ufds[idx].revents = 0;
-                       uhca[idx] = hca;
-
-                       /* CQ events are non-direct with CNO's */
-                       ufds[++idx].fd = hca->ib_cq->fd;
-                       ufds[idx].events = POLLIN;
-                       ufds[idx].revents = 0;
-                       uhca[idx] = hca;
-
-                       dapl_dbg_log(DAPL_DBG_TYPE_THREAD,
-                                    " ib_thread(%d) poll_fd: hca[%d]=%p,"
-                                    " async=%d pipe=%d cm=%d \n",
-                                    dapl_os_getpid(), hca, ufds[idx - 1].fd,
-                                    ufds[0].fd, ufds[1].fd);
-
-                       hca = dapl_llist_next_entry(&g_hca_list,
-                                                   (DAPL_LLIST_ENTRY *)
-                                                   &hca->entry);
-               }
-
-               /* unlock, and setup poll */
-               fds = idx + 1;
-               dapl_os_unlock(&g_hca_lock);
-               ret = poll(ufds, fds, -1);
-               if (ret <= 0) {
-                       dapl_dbg_log(DAPL_DBG_TYPE_THREAD,
-                                    " ib_thread(%d): ERR %s poll\n",
-                                    dapl_os_getpid(), strerror(errno));
-                       dapl_os_lock(&g_hca_lock);
-                       continue;
-               }
-
-               dapl_dbg_log(DAPL_DBG_TYPE_THREAD,
-                            " ib_thread(%d) poll_event: "
-                            " async=0x%x pipe=0x%x cm=0x%x \n",
-                            dapl_os_getpid(), ufds[idx].revents,
-                            ufds[0].revents, ufds[1].revents);
-
-               /* uCMA events */
-               if (ufds[1].revents == POLLIN)
-                       dapli_cma_event_cb();
-
-               /* check and process CQ and ASYNC events, per device */
-               for (idx = 2; idx < fds; idx++) {
-                       if (ufds[idx].revents == POLLIN) {
-                               dapli_cq_event_cb(uhca[idx]);
-                               dapli_async_event_cb(uhca[idx]);
-                       }
-               }
-
-               /* check and process user events, PIPE */
-               if (ufds[0].revents == POLLIN) {
-                       if (read(g_ib_pipe[0], rbuf, 2) == -1)
-                               dapl_log(DAPL_DBG_TYPE_THREAD,
-                                        " cr_thread: pipe rd err= %s\n",
-                                        strerror(errno));
-
-                       /* cleanup any device on list marked for destroy */
-                       for (idx = 3; idx < fds; idx++) {
-                               if (uhca[idx] && uhca[idx]->destroy == 1) {
-                                       dapl_os_lock(&g_hca_lock);
-                                       dapl_llist_remove_entry(
-                                               &g_hca_list,
-                                               (DAPL_LLIST_ENTRY*)
-                                               &uhca[idx]->entry);
-                                       dapl_os_unlock(&g_hca_lock);
-                                       uhca[idx]->destroy = 2;
-                               }
-                       }
-               }
-               dapl_os_lock(&g_hca_lock);
-       }
-
-       dapl_dbg_log(DAPL_DBG_TYPE_THREAD, " ib_thread(%d) EXIT\n",
-                    dapl_os_getpid());
-       g_ib_thread_state = IB_THREAD_EXIT;
-       dapl_os_unlock(&g_hca_lock);
-}
-#endif
+/*\r
+ * Copyright (c) 2005-2008 Intel Corporation.  All rights reserved.\r
+ *\r
+ * This Software is licensed under one of the following licenses:\r
+ *\r
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is\r
+ *    available from the Open Source Initiative, see\r
+ *    http://www.opensource.org/licenses/cpl.php.\r
+ *\r
+ * 2) under the terms of the "The BSD License" a copy of which is\r
+ *    available from the Open Source Initiative, see\r
+ *    http://www.opensource.org/licenses/bsd-license.php.\r
+ *\r
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a\r
+ *    copy of which is available from the Open Source Initiative, see\r
+ *    http://www.opensource.org/licenses/gpl-license.php.\r
+ *\r
+ * Licensee has the right to choose one of the above licenses.\r
+ *\r
+ * Redistributions of source code must retain the above copyright\r
+ * notice and one of the license notices.\r
+ *\r
+ * Redistributions in binary form must reproduce both the above copyright\r
+ * notice, one of the license notices in the documentation\r
+ * and/or other materials provided with the distribution.\r
+ */\r
+\r
+/**********************************************************************\r
+ * \r
+ * MODULE: dapl_ib_util.c\r
+ *\r
+ * PURPOSE: OFED provider - init, open, close, utilities, work thread\r
+ *\r
+ * $Id:$\r
+ *\r
+ **********************************************************************/\r
+\r
+#ifdef RCSID\r
+static const char rcsid[] = "$Id:  $";\r
+#endif\r
+\r
+#include "openib_osd.h"\r
+#include "dapl.h"\r
+#include "dapl_adapter_util.h"\r
+#include "dapl_ib_util.h"\r
+#include "dapl_osd.h"\r
+\r
+#include <stdlib.h>\r
+\r
+struct rdma_event_channel *g_cm_events = NULL;\r
+ib_thread_state_t g_ib_thread_state = 0;\r
+DAPL_OS_THREAD g_ib_thread;\r
+DAPL_OS_LOCK g_hca_lock;\r
+struct dapl_llist_entry *g_hca_list;\r
+\r
+#if defined(_WIN64) || defined(_WIN32)\r
+#include "..\..\..\..\..\etc\user\comp_channel.cpp"\r
+#include <rdma\winverbs.h>\r
+\r
+static COMP_SET ufds;\r
+\r
+static int getipaddr_netdev(char *name, char *addr, int addr_len)\r
+{\r
+       IWVProvider *prov;\r
+       WV_DEVICE_ADDRESS devaddr;\r
+       struct addrinfo *res, *ai;\r
+       HRESULT hr;\r
+       int index;\r
+\r
+       if (strncmp(name, "rdma_dev", 8)) {\r
+               return EINVAL;\r
+       }\r
+\r
+       index = atoi(name + 8);\r
+\r
+       hr = WvGetObject(&IID_IWVProvider, (LPVOID *) &prov);\r
+       if (FAILED(hr)) {\r
+               return hr;\r
+       }\r
+\r
+       hr = getaddrinfo("..localmachine", NULL, NULL, &res);\r
+       if (hr) {\r
+               goto release;\r
+       }\r
+\r
+       for (ai = res; ai; ai = ai->ai_next) {\r
+               hr = prov->lpVtbl->TranslateAddress(prov, ai->ai_addr, &devaddr);\r
+               if (SUCCEEDED(hr) && (ai->ai_addrlen <= addr_len) && (index-- == 0)) {\r
+                       memcpy(addr, ai->ai_addr, ai->ai_addrlen);\r
+                       goto free;\r
+               }\r
+       }\r
+       hr = ENODEV;\r
+\r
+free:\r
+       freeaddrinfo(res);\r
+release:\r
+       prov->lpVtbl->Release(prov);\r
+       return hr;\r
+}\r
+\r
+static int dapls_os_init(void)\r
+{\r
+       return CompSetInit(&ufds);\r
+}\r
+\r
+static void dapls_os_release(void)\r
+{\r
+       CompSetCleanup(&ufds);\r
+}\r
+\r
+static int dapls_config_cm_channel(struct rdma_event_channel *channel)\r
+{\r
+       channel->channel.Milliseconds = 0;\r
+       return 0;\r
+}\r
+\r
+static int dapls_config_verbs(struct ibv_context *verbs)\r
+{\r
+       verbs->channel.Milliseconds = 0;\r
+       return 0;\r
+}\r
+\r
+static int dapls_config_comp_channel(struct ibv_comp_channel *channel)\r
+{\r
+       channel->comp_channel.Milliseconds = 0;\r
+       return 0;\r
+}\r
+\r
+static int dapls_thread_signal(void)\r
+{\r
+       CompSetCancel(&ufds);\r
+       return 0;\r
+}\r
+#else                          // _WIN64 || WIN32\r
+int g_ib_pipe[2];\r
+\r
+static int dapls_os_init(void)\r
+{\r
+       /* create pipe for waking up work thread */\r
+       return pipe(g_ib_pipe);\r
+}\r
+\r
+static void dapls_os_release(void)\r
+{\r
+       /* close pipe? */\r
+}\r
+\r
+/* Get IP address using network device name */\r
+static int getipaddr_netdev(char *name, char *addr, int addr_len)\r
+{\r
+       struct ifreq ifr;\r
+       int skfd, ret, len;\r
+\r
+       /* Fill in the structure */\r
+       snprintf(ifr.ifr_name, IFNAMSIZ, "%s", name);\r
+       ifr.ifr_hwaddr.sa_family = ARPHRD_INFINIBAND;\r
+\r
+       /* Create a socket fd */\r
+       skfd = socket(PF_INET, SOCK_STREAM, 0);\r
+       ret = ioctl(skfd, SIOCGIFADDR, &ifr);\r
+       if (ret)\r
+               goto bail;\r
+\r
+       switch (ifr.ifr_addr.sa_family) {\r
+#ifdef AF_INET6\r
+       case AF_INET6:\r
+               len = sizeof(struct sockaddr_in6);\r
+               break;\r
+#endif\r
+       case AF_INET:\r
+       default:\r
+               len = sizeof(struct sockaddr);\r
+               break;\r
+       }\r
+\r
+       if (len <= addr_len)\r
+               memcpy(addr, &ifr.ifr_addr, len);\r
+       else\r
+               ret = EINVAL;\r
+\r
+      bail:\r
+       close(skfd);\r
+       return ret;\r
+}\r
+\r
+static int dapls_config_fd(int fd)\r
+{\r
+       int opts;\r
+\r
+       opts = fcntl(fd, F_GETFL);\r
+       if (opts < 0 || fcntl(fd, F_SETFL, opts | O_NONBLOCK) < 0) {\r
+               dapl_log(DAPL_DBG_TYPE_ERR,\r
+                        " dapls_config_fd: fcntl on fd %d ERR %d %s\n",\r
+                        fd, opts, strerror(errno));\r
+               return errno;\r
+       }\r
+\r
+       return 0;\r
+}\r
+\r
+static int dapls_config_cm_channel(struct rdma_event_channel *channel)\r
+{\r
+       return dapls_config_fd(channel->fd);\r
+}\r
+\r
+static int dapls_config_verbs(struct ibv_context *verbs)\r
+{\r
+       return dapls_config_fd(verbs->async_fd);\r
+}\r
+\r
+static int dapls_config_comp_channel(struct ibv_comp_channel *channel)\r
+{\r
+       return dapls_config_fd(channel->fd);\r
+}\r
+\r
+static int dapls_thread_signal(void)\r
+{\r
+       return write(g_ib_pipe[1], "w", sizeof "w");\r
+}\r
+#endif\r
+\r
+/* Get IP address using network name, address, or device name */\r
+static int getipaddr(char *name, char *addr, int len)\r
+{\r
+       struct addrinfo *res;\r
+\r
+       /* assume netdev for first attempt, then network and address type */\r
+       if (getipaddr_netdev(name, addr, len)) {\r
+               if (getaddrinfo(name, NULL, NULL, &res)) {\r
+                       dapl_log(DAPL_DBG_TYPE_ERR,\r
+                                " open_hca: getaddr_netdev ERROR:"\r
+                                " %s. Is %s configured?\n",\r
+                                strerror(errno), name);\r
+                       return 1;\r
+               } else {\r
+                       if (len >= res->ai_addrlen)\r
+                               memcpy(addr, res->ai_addr, res->ai_addrlen);\r
+                       else {\r
+                               freeaddrinfo(res);\r
+                               return 1;\r
+                       }\r
+                       freeaddrinfo(res);\r
+               }\r
+       }\r
+\r
+       dapl_dbg_log(\r
+               DAPL_DBG_TYPE_UTIL,\r
+               " getipaddr: family %d port %d addr %d.%d.%d.%d\n",\r
+               ((struct sockaddr_in *)addr)->sin_family,\r
+               ((struct sockaddr_in *)addr)->sin_port,\r
+               ((struct sockaddr_in *)addr)->sin_addr.s_addr >> 0 & 0xff,\r
+               ((struct sockaddr_in *)addr)->sin_addr.s_addr >> 8 & 0xff,\r
+               ((struct sockaddr_in *)addr)->sin_addr.s_addr >> 16 & 0xff,\r
+               ((struct sockaddr_in *)addr)->sin_addr.\r
+                s_addr >> 24 & 0xff);\r
+\r
+       return 0;\r
+}\r
+\r
+/*\r
+ * dapls_ib_init, dapls_ib_release\r
+ *\r
+ * Initialize Verb related items for device open\r
+ *\r
+ * Input:\r
+ *     none\r
+ *\r
+ * Output:\r
+ *     none\r
+ *\r
+ * Returns:\r
+ *     0 success, -1 error\r
+ *\r
+ */\r
+int32_t dapls_ib_init(void)\r
+{\r
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " dapl_ib_init: \n");\r
+\r
+       /* initialize hca_list lock */\r
+       dapl_os_lock_init(&g_hca_lock);\r
+\r
+       /* initialize hca list for CQ events */\r
+       dapl_llist_init_head(&g_hca_list);\r
+\r
+       if (dapls_os_init())\r
+               return 1;\r
+\r
+       return 0;\r
+}\r
+\r
+int32_t dapls_ib_release(void)\r
+{\r
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " dapl_ib_release: \n");\r
+       dapli_ib_thread_destroy();\r
+       if (g_cm_events != NULL)\r
+               rdma_destroy_event_channel(g_cm_events);\r
+       dapls_os_release();\r
+       return 0;\r
+}\r
+\r
+/*\r
+ * dapls_ib_open_hca\r
+ *\r
+ * Open HCA\r
+ *\r
+ * Input:\r
+ *      *hca_name         pointer to provider device name\r
+ *      *ib_hca_handle_p  pointer to provide HCA handle\r
+ *\r
+ * Output:\r
+ *      none\r
+ *\r
+ * Return:\r
+ *      DAT_SUCCESS\r
+ *      dapl_convert_errno\r
+ *\r
+ */\r
+DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, IN DAPL_HCA * hca_ptr)\r
+{\r
+       struct rdma_cm_id *cm_id = NULL;\r
+       union ibv_gid *gid;\r
+       int ret;\r
+       DAT_RETURN dat_status;\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+                    " open_hca: %s - %p\n", hca_name, hca_ptr);\r
+\r
+       /* Setup the global cm event channel */\r
+       dapl_os_lock(&g_hca_lock);\r
+       if (g_cm_events == NULL) {\r
+               g_cm_events = rdma_create_event_channel();\r
+               if (g_cm_events == NULL) {\r
+                       dapl_dbg_log(DAPL_DBG_TYPE_ERR,\r
+                                    " open_hca: ERR - RDMA channel %s\n",\r
+                                    strerror(errno));\r
+                       dapl_os_unlock(&g_hca_lock);\r
+                       return DAT_INTERNAL_ERROR;\r
+               }\r
+       }\r
+       dapl_os_unlock(&g_hca_lock);\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+                    " open_hca: RDMA channel created (%p)\n", g_cm_events);\r
+\r
+       /* HCA name will be hostname or IP address */\r
+       if (getipaddr((char *)hca_name,\r
+                     (char *)&hca_ptr->hca_address, \r
+                     sizeof(DAT_SOCK_ADDR6)))\r
+               return DAT_INVALID_ADDRESS;\r
+\r
+       /* cm_id will bind local device/GID based on IP address */\r
+       if (rdma_create_id(g_cm_events, &cm_id, \r
+                          (void *)hca_ptr, RDMA_PS_TCP)) {\r
+               dapl_log(DAPL_DBG_TYPE_ERR,\r
+                        " open_hca: rdma_create ERR %s\n", strerror(errno));\r
+               return DAT_INTERNAL_ERROR;\r
+       }\r
+       ret = rdma_bind_addr(cm_id, (struct sockaddr *)&hca_ptr->hca_address);\r
+       if ((ret) || (cm_id->verbs == NULL)) {\r
+               rdma_destroy_id(cm_id);\r
+               dapl_log(DAPL_DBG_TYPE_ERR,\r
+                        " open_hca: rdma_bind ERR %s."\r
+                        " Is %s configured?\n", strerror(errno), hca_name);\r
+               rdma_destroy_id(cm_id);\r
+               return DAT_INVALID_ADDRESS;\r
+       }\r
+\r
+       /* keep reference to IB device and cm_id */\r
+       hca_ptr->ib_trans.cm_id = cm_id;\r
+       hca_ptr->ib_hca_handle = cm_id->verbs;\r
+       dapls_config_verbs(cm_id->verbs);\r
+       hca_ptr->port_num = cm_id->port_num;\r
+       hca_ptr->ib_trans.ib_dev = cm_id->verbs->device;\r
+       hca_ptr->ib_trans.ib_ctx = cm_id->verbs;\r
+       gid = &cm_id->route.addr.addr.ibaddr.sgid;\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+                    " open_hca: ctx=%p port=%d GID subnet %016llx"\r
+                    " id %016llx\n", cm_id->verbs, cm_id->port_num,\r
+                    (unsigned long long)ntohll(gid->global.subnet_prefix),\r
+                    (unsigned long long)ntohll(gid->global.interface_id));\r
+\r
+       /* support for EVD's with CNO's: one channel via thread */\r
+       hca_ptr->ib_trans.ib_cq =\r
+           ibv_create_comp_channel(hca_ptr->ib_hca_handle);\r
+       if (hca_ptr->ib_trans.ib_cq == NULL) {\r
+               dapl_log(DAPL_DBG_TYPE_ERR,\r
+                        " open_hca: ibv_create_comp_channel ERR %s\n",\r
+                        strerror(errno));\r
+               rdma_destroy_id(cm_id);\r
+               return DAT_INTERNAL_ERROR;\r
+       }\r
+       if (dapls_config_comp_channel(hca_ptr->ib_trans.ib_cq)) {\r
+               rdma_destroy_id(cm_id);\r
+               return DAT_INTERNAL_ERROR;\r
+       }\r
+\r
+       /* set inline max with env or default, get local lid and gid 0 */\r
+       if (hca_ptr->ib_hca_handle->device->transport_type\r
+           == IBV_TRANSPORT_IWARP)\r
+               hca_ptr->ib_trans.max_inline_send =\r
+                   dapl_os_get_env_val("DAPL_MAX_INLINE",\r
+                                       INLINE_SEND_IWARP_DEFAULT);\r
+       else\r
+               hca_ptr->ib_trans.max_inline_send =\r
+                   dapl_os_get_env_val("DAPL_MAX_INLINE",\r
+                                       INLINE_SEND_IB_DEFAULT);\r
+\r
+       /* set CM timer defaults */\r
+       hca_ptr->ib_trans.max_cm_timeout =\r
+           dapl_os_get_env_val("DAPL_MAX_CM_RESPONSE_TIME",\r
+                               IB_CM_RESPONSE_TIMEOUT);\r
+       hca_ptr->ib_trans.max_cm_retries =\r
+           dapl_os_get_env_val("DAPL_MAX_CM_RETRIES", IB_CM_RETRIES);\r
+       \r
+       /* set default IB MTU */\r
+       hca_ptr->ib_trans.mtu = dapl_ib_mtu(2048);\r
+\r
+       dat_status = dapli_ib_thread_init();\r
+       if (dat_status != DAT_SUCCESS)\r
+               return dat_status;\r
+       /* \r
+        * Put new hca_transport on list for async and CQ event processing \r
+        * Wakeup work thread to add to polling list\r
+        */\r
+       dapl_llist_init_entry((DAPL_LLIST_ENTRY *) &hca_ptr->ib_trans.entry);\r
+       dapl_os_lock(&g_hca_lock);\r
+       dapl_llist_add_tail(&g_hca_list,\r
+                           (DAPL_LLIST_ENTRY *) &hca_ptr->ib_trans.entry,\r
+                           &hca_ptr->ib_trans.entry);\r
+       if (dapls_thread_signal() == -1)\r
+               dapl_log(DAPL_DBG_TYPE_UTIL,\r
+                        " open_hca: thread wakeup error = %s\n",\r
+                        strerror(errno));\r
+       dapl_os_unlock(&g_hca_lock);\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+                    " open_hca: %s, %s %d.%d.%d.%d INLINE_MAX=%d\n", hca_name,\r
+                    ((struct sockaddr_in *)\r
+                    &hca_ptr->hca_address)->sin_family == AF_INET ?\r
+                    "AF_INET" : "AF_INET6", \r
+                    ((struct sockaddr_in *)\r
+                    &hca_ptr->hca_address)->sin_addr.s_addr >> 0 & 0xff, \r
+                    ((struct sockaddr_in *)\r
+                    &hca_ptr->hca_address)->sin_addr.s_addr >> 8 & 0xff, \r
+                    ((struct sockaddr_in *)\r
+                    &hca_ptr->hca_address)->sin_addr.s_addr >> 16 & 0xff, \r
+                    ((struct sockaddr_in *)\r
+                    &hca_ptr->hca_address)->sin_addr.s_addr >> 24 & 0xff, \r
+                    hca_ptr->ib_trans.max_inline_send);\r
+\r
+       return DAT_SUCCESS;\r
+}\r
+\r
+/*\r
+ * dapls_ib_close_hca\r
+ *\r
+ * Open HCA\r
+ *\r
+ * Input:\r
+ *      DAPL_HCA   provide CA handle\r
+ *\r
+ * Output:\r
+ *      none\r
+ *\r
+ * Return:\r
+ *      DAT_SUCCESS\r
+ *     dapl_convert_errno \r
+ *\r
+ */\r
+DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA * hca_ptr)\r
+{\r
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " close_hca: %p->%p\n",\r
+                    hca_ptr, hca_ptr->ib_hca_handle);\r
+\r
+       dapl_os_lock(&g_hca_lock);\r
+       if (g_ib_thread_state != IB_THREAD_RUN) {\r
+               dapl_os_unlock(&g_hca_lock);\r
+               goto bail;\r
+       }\r
+       dapl_os_unlock(&g_hca_lock);\r
+\r
+       /* \r
+        * Remove hca from async event processing list\r
+        * Wakeup work thread to remove from polling list\r
+        */\r
+       hca_ptr->ib_trans.destroy = 1;\r
+       if (dapls_thread_signal() == -1)\r
+               dapl_log(DAPL_DBG_TYPE_UTIL,\r
+                        " destroy: thread wakeup error = %s\n",\r
+                        strerror(errno));\r
+\r
+       /* wait for thread to remove HCA references */\r
+       while (hca_ptr->ib_trans.destroy != 2) {\r
+               if (dapls_thread_signal() == -1)\r
+                       dapl_log(DAPL_DBG_TYPE_UTIL,\r
+                                " destroy: thread wakeup error = %s\n",\r
+                                strerror(errno));\r
+               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+                            " ib_thread_destroy: wait on hca %p destroy\n");\r
+               dapl_os_sleep_usec(1000);\r
+       }\r
+bail:\r
+       if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) {\r
+               if (rdma_destroy_id(hca_ptr->ib_trans.cm_id))\r
+                       return (dapl_convert_errno(errno, "ib_close_device"));\r
+               hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;\r
+       }\r
+\r
+       return (DAT_SUCCESS);\r
+}\r
+\r
+\r
+DAT_RETURN dapli_ib_thread_init(void)\r
+{\r
+       DAT_RETURN dat_status;\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+                    " ib_thread_init(%d)\n", dapl_os_getpid());\r
+\r
+       dapl_os_lock(&g_hca_lock);\r
+       if (g_ib_thread_state != IB_THREAD_INIT) {\r
+               dapl_os_unlock(&g_hca_lock);\r
+               return DAT_SUCCESS;\r
+       }\r
+\r
+       /* uCMA events non-blocking */\r
+       if (dapls_config_cm_channel(g_cm_events)) {\r
+               dapl_os_unlock(&g_hca_lock);\r
+               return (dapl_convert_errno(errno, "create_thread ERR: cm_fd"));\r
+       }\r
+\r
+       g_ib_thread_state = IB_THREAD_CREATE;\r
+       dapl_os_unlock(&g_hca_lock);\r
+\r
+       /* create thread to process inbound connect request */\r
+       dat_status = dapl_os_thread_create(dapli_thread, NULL, &g_ib_thread);\r
+       if (dat_status != DAT_SUCCESS)\r
+               return (dapl_convert_errno(errno,\r
+                                          "create_thread ERR:"\r
+                                          " check resource limits"));\r
+\r
+       /* wait for thread to start */\r
+       dapl_os_lock(&g_hca_lock);\r
+       while (g_ib_thread_state != IB_THREAD_RUN) {\r
+               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+                            " ib_thread_init: waiting for ib_thread\n");\r
+               dapl_os_unlock(&g_hca_lock);\r
+               dapl_os_sleep_usec(1000);\r
+               dapl_os_lock(&g_hca_lock);\r
+       }\r
+       dapl_os_unlock(&g_hca_lock);\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+                    " ib_thread_init(%d) exit\n", dapl_os_getpid());\r
+\r
+       return DAT_SUCCESS;\r
+}\r
+\r
+void dapli_ib_thread_destroy(void)\r
+{\r
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+                    " ib_thread_destroy(%d)\n", dapl_os_getpid());\r
+       /* \r
+        * wait for async thread to terminate. \r
+        * pthread_join would be the correct method\r
+        * but some applications have some issues\r
+        */\r
+\r
+       /* destroy ib_thread, wait for termination, if not already */\r
+       dapl_os_lock(&g_hca_lock);\r
+       if (g_ib_thread_state != IB_THREAD_RUN)\r
+               goto bail;\r
+\r
+       g_ib_thread_state = IB_THREAD_CANCEL;\r
+       while ((g_ib_thread_state != IB_THREAD_EXIT)) {\r
+               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+                            " ib_thread_destroy: waiting for ib_thread\n");\r
+               if (dapls_thread_signal() == -1)\r
+                       dapl_log(DAPL_DBG_TYPE_UTIL,\r
+                                " destroy: thread wakeup error = %s\n",\r
+                                strerror(errno));\r
+               dapl_os_unlock(&g_hca_lock);\r
+               dapl_os_sleep_usec(2000);\r
+               dapl_os_lock(&g_hca_lock);\r
+       }\r
+bail:\r
+       dapl_os_unlock(&g_hca_lock);\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+                    " ib_thread_destroy(%d) exit\n", dapl_os_getpid());\r
+}\r
+\r
+#if defined(_WIN64) || defined(_WIN32)\r
+/* work thread for uAT, uCM, CQ, and async events */\r
+void dapli_thread(void *arg)\r
+{\r
+       struct _ib_hca_transport *hca;\r
+       struct _ib_hca_transport *uhca[8];\r
+       COMP_CHANNEL *channel;\r
+       int ret, idx, cnt;\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread(%d,0x%x): ENTER: \n",\r
+                    dapl_os_getpid(), g_ib_thread);\r
+\r
+       dapl_os_lock(&g_hca_lock);\r
+       for (g_ib_thread_state = IB_THREAD_RUN;\r
+            g_ib_thread_state == IB_THREAD_RUN; \r
+            dapl_os_lock(&g_hca_lock)) {\r
+\r
+               CompSetZero(&ufds);\r
+               CompSetAdd(&g_cm_events->channel, &ufds);\r
+\r
+               idx = 0;\r
+               hca = dapl_llist_is_empty(&g_hca_list) ? NULL :\r
+                     dapl_llist_peek_head(&g_hca_list);\r
+\r
+               while (hca) {\r
+                       CompSetAdd(&hca->ib_ctx->channel, &ufds);\r
+                       CompSetAdd(&hca->ib_cq->comp_channel, &ufds);\r
+                       uhca[idx++] = hca;\r
+                       hca = dapl_llist_next_entry(&g_hca_list,\r
+                                                   (DAPL_LLIST_ENTRY *)\r
+                                                   &hca->entry);\r
+               }\r
+               cnt = idx;\r
+\r
+               dapl_os_unlock(&g_hca_lock);\r
+               ret = CompSetPoll(&ufds, INFINITE);\r
+\r
+               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+                            " ib_thread(%d) poll_event 0x%x\n",\r
+                            dapl_os_getpid(), ret);\r
+\r
+               dapli_cma_event_cb();\r
+\r
+               /* check and process ASYNC events, per device */\r
+               for (idx = 0; idx < cnt; idx++) {\r
+                       if (uhca[idx]->destroy == 1) {\r
+                               dapl_os_lock(&g_hca_lock);\r
+                               dapl_llist_remove_entry(&g_hca_list,\r
+                                                       (DAPL_LLIST_ENTRY *)\r
+                                                       &uhca[idx]->entry);\r
+                               dapl_os_unlock(&g_hca_lock);\r
+                               uhca[idx]->destroy = 2;\r
+                       } else {\r
+                               dapli_cq_event_cb(uhca[idx]);\r
+                               dapli_async_event_cb(uhca[idx]);\r
+                       }\r
+               }\r
+       }\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread(%d) EXIT\n",\r
+                    dapl_os_getpid());\r
+       g_ib_thread_state = IB_THREAD_EXIT;\r
+       dapl_os_unlock(&g_hca_lock);\r
+}\r
+#else                          // _WIN64 || WIN32\r
+\r
+/* work thread for uAT, uCM, CQ, and async events */\r
+void dapli_thread(void *arg)\r
+{\r
+       struct pollfd ufds[__FD_SETSIZE];\r
+       struct _ib_hca_transport *uhca[__FD_SETSIZE] = { NULL };\r
+       struct _ib_hca_transport *hca;\r
+       int ret, idx, fds;\r
+       char rbuf[2];\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_THREAD,\r
+                    " ib_thread(%d,0x%x): ENTER: pipe %d ucma %d\n",\r
+                    dapl_os_getpid(), g_ib_thread, g_ib_pipe[0],\r
+                    g_cm_events->fd);\r
+\r
+       /* Poll across pipe, CM, AT never changes */\r
+       dapl_os_lock(&g_hca_lock);\r
+       g_ib_thread_state = IB_THREAD_RUN;\r
+\r
+       ufds[0].fd = g_ib_pipe[0];      /* pipe */\r
+       ufds[0].events = POLLIN;\r
+       ufds[1].fd = g_cm_events->fd;   /* uCMA */\r
+       ufds[1].events = POLLIN;\r
+\r
+       while (g_ib_thread_state == IB_THREAD_RUN) {\r
+\r
+               /* build ufds after pipe and uCMA events */\r
+               ufds[0].revents = 0;\r
+               ufds[1].revents = 0;\r
+               idx = 1;\r
+\r
+               /*  Walk HCA list and setup async and CQ events */\r
+               if (!dapl_llist_is_empty(&g_hca_list))\r
+                       hca = dapl_llist_peek_head(&g_hca_list);\r
+               else\r
+                       hca = NULL;\r
+\r
+               while (hca) {\r
+\r
+                       /* uASYNC events */\r
+                       ufds[++idx].fd = hca->ib_ctx->async_fd;\r
+                       ufds[idx].events = POLLIN;\r
+                       ufds[idx].revents = 0;\r
+                       uhca[idx] = hca;\r
+\r
+                       /* CQ events are non-direct with CNO's */\r
+                       ufds[++idx].fd = hca->ib_cq->fd;\r
+                       ufds[idx].events = POLLIN;\r
+                       ufds[idx].revents = 0;\r
+                       uhca[idx] = hca;\r
+\r
+                       dapl_dbg_log(DAPL_DBG_TYPE_THREAD,\r
+                                    " ib_thread(%d) poll_fd: hca[%d]=%p,"\r
+                                    " async=%d pipe=%d cm=%d \n",\r
+                                    dapl_os_getpid(), hca, ufds[idx - 1].fd,\r
+                                    ufds[0].fd, ufds[1].fd);\r
+\r
+                       hca = dapl_llist_next_entry(&g_hca_list,\r
+                                                   (DAPL_LLIST_ENTRY *)\r
+                                                   &hca->entry);\r
+               }\r
+\r
+               /* unlock, and setup poll */\r
+               fds = idx + 1;\r
+               dapl_os_unlock(&g_hca_lock);\r
+               ret = poll(ufds, fds, -1);\r
+               if (ret <= 0) {\r
+                       dapl_dbg_log(DAPL_DBG_TYPE_THREAD,\r
+                                    " ib_thread(%d): ERR %s poll\n",\r
+                                    dapl_os_getpid(), strerror(errno));\r
+                       dapl_os_lock(&g_hca_lock);\r
+                       continue;\r
+               }\r
+\r
+               dapl_dbg_log(DAPL_DBG_TYPE_THREAD,\r
+                            " ib_thread(%d) poll_event: "\r
+                            " async=0x%x pipe=0x%x cm=0x%x \n",\r
+                            dapl_os_getpid(), ufds[idx].revents,\r
+                            ufds[0].revents, ufds[1].revents);\r
+\r
+               /* uCMA events */\r
+               if (ufds[1].revents == POLLIN)\r
+                       dapli_cma_event_cb();\r
+\r
+               /* check and process CQ and ASYNC events, per device */\r
+               for (idx = 2; idx < fds; idx++) {\r
+                       if (ufds[idx].revents == POLLIN) {\r
+                               dapli_cq_event_cb(uhca[idx]);\r
+                               dapli_async_event_cb(uhca[idx]);\r
+                       }\r
+               }\r
+\r
+               /* check and process user events, PIPE */\r
+               if (ufds[0].revents == POLLIN) {\r
+                       if (read(g_ib_pipe[0], rbuf, 2) == -1)\r
+                               dapl_log(DAPL_DBG_TYPE_THREAD,\r
+                                        " cr_thread: pipe rd err= %s\n",\r
+                                        strerror(errno));\r
+\r
+                       /* cleanup any device on list marked for destroy */\r
+                       for (idx = 3; idx < fds; idx++) {\r
+                               if (uhca[idx] && uhca[idx]->destroy == 1) {\r
+                                       dapl_os_lock(&g_hca_lock);\r
+                                       dapl_llist_remove_entry(\r
+                                               &g_hca_list,\r
+                                               (DAPL_LLIST_ENTRY*)\r
+                                               &uhca[idx]->entry);\r
+                                       dapl_os_unlock(&g_hca_lock);\r
+                                       uhca[idx]->destroy = 2;\r
+                               }\r
+                       }\r
+               }\r
+               dapl_os_lock(&g_hca_lock);\r
+       }\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_THREAD, " ib_thread(%d) EXIT\n",\r
+                    dapl_os_getpid());\r
+       g_ib_thread_state = IB_THREAD_EXIT;\r
+       dapl_os_unlock(&g_hca_lock);\r
+}\r
+#endif\r
index 143098efefb5564084f61c21636f296c954ddbd3..c2b5c69f1caa43fe51550a8fd724382a7783d87c 100644 (file)
-/*
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- *    copy of which is available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-
-/*
- * dapl_ib_qp_alloc
- *
- * Alloc a QP
- *
- * Input:
- *     *ep_ptr         pointer to EP INFO
- *     ib_hca_handle   provider HCA handle
- *     ib_pd_handle    provider protection domain handle
- *     cq_recv         provider recv CQ handle
- *     cq_send         provider send CQ handle
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INSUFFICIENT_RESOURCES
- *     DAT_INTERNAL_ERROR
- *
- */
-DAT_RETURN
-dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr,
-                 IN DAPL_EP * ep_ptr, IN DAPL_EP * ep_ctx_ptr)
-{
-       DAT_EP_ATTR *attr;
-       DAPL_EVD *rcv_evd, *req_evd;
-       ib_cq_handle_t rcv_cq, req_cq;
-       ib_pd_handle_t ib_pd_handle;
-       struct ibv_qp_init_attr qp_create;
-#ifdef _OPENIB_CMA_
-       dp_ib_cm_handle_t conn;
-#endif
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    " qp_alloc: ia_ptr %p ep_ptr %p ep_ctx_ptr %p\n",
-                    ia_ptr, ep_ptr, ep_ctx_ptr);
-
-       attr = &ep_ptr->param.ep_attr;
-       ib_pd_handle = ((DAPL_PZ *) ep_ptr->param.pz_handle)->pd_handle;
-       rcv_evd = (DAPL_EVD *) ep_ptr->param.recv_evd_handle;
-       req_evd = (DAPL_EVD *) ep_ptr->param.request_evd_handle;
-
-       /* 
-        * DAT allows usage model of EP's with no EVD's but IB does not. 
-        * Create a CQ with zero entries under the covers to support and 
-        * catch any invalid posting. 
-        */
-       if (rcv_evd != DAT_HANDLE_NULL)
-               rcv_cq = rcv_evd->ib_cq_handle;
-       else if (!ia_ptr->hca_ptr->ib_trans.ib_cq_empty)
-               rcv_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;
-       else {
-               struct ibv_comp_channel *channel;
-
-               channel = ibv_create_comp_channel(ia_ptr->hca_ptr->ib_hca_handle);
-               if (!channel)
-                       return (dapl_convert_errno(ENOMEM, "create_cq"));
-                 
-               /* Call IB verbs to create CQ */
-               rcv_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
-                                      0, NULL, channel, 0);
-
-               if (rcv_cq == IB_INVALID_HANDLE) {
-                       ibv_destroy_comp_channel(channel);
-                       return (dapl_convert_errno(ENOMEM, "create_cq"));
-               }
-
-               ia_ptr->hca_ptr->ib_trans.ib_cq_empty = rcv_cq;
-       }
-       if (req_evd != DAT_HANDLE_NULL)
-               req_cq = req_evd->ib_cq_handle;
-       else
-               req_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;
-
-       /* 
-        * IMPLEMENTATION NOTE:
-        * uDAPL allows consumers to post buffers on the EP after creation
-        * and before a connect request (outbound and inbound). This forces
-        * a binding to a device during the hca_open call and requires the
-        * consumer to predetermine which device to listen on or connect from.
-        * This restriction eliminates any option of listening or connecting 
-        * over multiple devices. uDAPL should add API's to resolve addresses 
-        * and bind to the device at the approriate time (before connect 
-        * and after CR arrives). Discovery should happen at connection time 
-        * based on addressing and not on static configuration during open.
-        */
-
-#ifdef _OPENIB_CMA_
-       /* Allocate CM and initialize lock */
-       if ((conn = dapls_ib_cm_create(ep_ptr)) == NULL)
-               return (dapl_convert_errno(ENOMEM, "create_cq"));
-
-       /* open identifies the local device; per DAT specification */
-       if (rdma_bind_addr(conn->cm_id,
-                          (struct sockaddr *)&ia_ptr->hca_ptr->hca_address))
-               return (dapl_convert_errno(EAFNOSUPPORT, "create_cq"));
-#endif
-       /* Setup attributes and create qp */
-       dapl_os_memzero((void *)&qp_create, sizeof(qp_create));
-       qp_create.send_cq = req_cq;
-       qp_create.cap.max_send_wr = attr->max_request_dtos;
-       qp_create.cap.max_send_sge = attr->max_request_iov;
-       qp_create.cap.max_inline_data =
-           ia_ptr->hca_ptr->ib_trans.max_inline_send;
-       qp_create.qp_type = IBV_QPT_RC;
-       qp_create.qp_context = (void *)ep_ptr;
-
-#ifdef DAT_EXTENSIONS 
-       if (attr->service_type == DAT_IB_SERVICE_TYPE_UD) {
-#ifdef _OPENIB_CMA_
-               return (DAT_NOT_IMPLEMENTED);
-#endif
-               qp_create.qp_type = IBV_QPT_UD;
-               if (attr->max_message_size >
-                   (128 << ia_ptr->hca_ptr->ib_trans.mtu)) {
-                       return (DAT_INVALID_PARAMETER | DAT_INVALID_ARG6);
-               }
-       }
-#endif
-       
-       /* ibv assumes rcv_cq is never NULL, set to req_cq */
-       if (rcv_cq == NULL) {
-               qp_create.recv_cq = req_cq;
-               qp_create.cap.max_recv_wr = 0;
-               qp_create.cap.max_recv_sge = 0;
-       } else {
-               qp_create.recv_cq = rcv_cq;
-               qp_create.cap.max_recv_wr = attr->max_recv_dtos;
-               qp_create.cap.max_recv_sge = attr->max_recv_iov;
-       }
-
-#ifdef _OPENIB_CMA_
-       if (rdma_create_qp(conn->cm_id, ib_pd_handle, &qp_create)) {
-               dapls_ib_cm_free(conn, ep_ptr);
-               return (dapl_convert_errno(errno, "create_qp"));
-       }
-       ep_ptr->qp_handle = conn->cm_id->qp;
-       ep_ptr->cm_handle = conn;
-       ep_ptr->qp_state = IBV_QPS_INIT;
-               
-       /* setup up ep->param to reference the bound local address and port */
-       ep_ptr->param.local_ia_address_ptr = 
-               &conn->cm_id->route.addr.src_addr;
-       ep_ptr->param.local_port_qual = rdma_get_src_port(conn->cm_id);
-#else
-       ep_ptr->qp_handle = ibv_create_qp(ib_pd_handle, &qp_create);
-       if (!ep_ptr->qp_handle)
-               return (dapl_convert_errno(ENOMEM, "create_qp"));
-               
-       /* Setup QP attributes for INIT state on the way out */
-       if (dapls_modify_qp_state(ep_ptr->qp_handle,
-                                 IBV_QPS_INIT, 0, 0, 0) != DAT_SUCCESS) {
-               ibv_destroy_qp(ep_ptr->qp_handle);
-               ep_ptr->qp_handle = IB_INVALID_HANDLE;
-               return DAT_INTERNAL_ERROR;
-       }
-#endif
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    " qp_alloc: qpn %p sq %d,%d rq %d,%d\n",
-                    ep_ptr->qp_handle->qp_num,
-                    qp_create.cap.max_send_wr, qp_create.cap.max_send_sge,
-                    qp_create.cap.max_recv_wr, qp_create.cap.max_recv_sge);
-
-       return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_qp_free
- *
- * Free a QP
- *
- * Input:
- *     ia_handle       IA handle
- *     *ep_ptr         pointer to EP INFO
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *  dapl_convert_errno
- *
- */
-DAT_RETURN dapls_ib_qp_free(IN DAPL_IA * ia_ptr, IN DAPL_EP * ep_ptr)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_EP, " qp_free:  ep_ptr %p qp %p\n",
-                    ep_ptr, ep_ptr->qp_handle);
-
-       if (ep_ptr->cm_handle != NULL) {
-               dapls_ib_cm_free(ep_ptr->cm_handle, ep_ptr);
-       }
-       
-       if (ep_ptr->qp_handle != NULL) {
-               /* force error state to flush queue, then destroy */
-               dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, 0,0,0);
-
-               if (ibv_destroy_qp(ep_ptr->qp_handle))
-                       return (dapl_convert_errno(errno, "destroy_qp"));
-
-               ep_ptr->qp_handle = NULL;
-       }
-
-#ifdef DAT_EXTENSIONS
-       /* UD endpoints can have many CR associations and will not
-        * set ep->cm_handle. Call provider with cm_ptr null to incidate
-        * UD type multi CR's for this EP. It will parse internal list
-        * and cleanup all associations.
-        */
-       if (ep_ptr->param.ep_attr.service_type == DAT_IB_SERVICE_TYPE_UD) 
-               dapls_ib_cm_free(NULL, ep_ptr);
-#endif
-
-       return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_qp_modify
- *
- * Set the QP to the parameters specified in an EP_PARAM
- *
- * The EP_PARAM structure that is provided has been
- * sanitized such that only non-zero values are valid.
- *
- * Input:
- *     ib_hca_handle           HCA handle
- *     qp_handle               QP handle
- *     ep_attr                 Sanitized EP Params
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INSUFFICIENT_RESOURCES
- *     DAT_INVALID_PARAMETER
- *
- */
-DAT_RETURN
-dapls_ib_qp_modify(IN DAPL_IA * ia_ptr,
-                  IN DAPL_EP * ep_ptr, IN DAT_EP_ATTR * attr)
-{
-       struct ibv_qp_attr qp_attr;
-
-       if (ep_ptr->qp_handle == IB_INVALID_HANDLE)
-               return DAT_INVALID_PARAMETER;
-
-       /* 
-        * EP state, qp_handle state should be an indication
-        * of current state but the only way to be sure is with
-        * a user mode ibv_query_qp call which is NOT available 
-        */
-
-       /* move to error state if necessary */
-       if ((ep_ptr->qp_state == IB_QP_STATE_ERROR) &&
-           (ep_ptr->qp_handle->state != IBV_QPS_ERR)) {
-               return (dapls_modify_qp_state(ep_ptr->qp_handle, 
-                                             IBV_QPS_ERR, 0, 0, 0));
-       }
-
-       /*
-        * Check if we have the right qp_state to modify attributes
-        */
-       if ((ep_ptr->qp_handle->state != IBV_QPS_RTR) &&
-           (ep_ptr->qp_handle->state != IBV_QPS_RTS))
-               return DAT_INVALID_STATE;
-
-       /* Adjust to current EP attributes */
-       dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
-       qp_attr.cap.max_send_wr = attr->max_request_dtos;
-       qp_attr.cap.max_recv_wr = attr->max_recv_dtos;
-       qp_attr.cap.max_send_sge = attr->max_request_iov;
-       qp_attr.cap.max_recv_sge = attr->max_recv_iov;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    "modify_qp: qp %p sq %d,%d, rq %d,%d\n",
-                    ep_ptr->qp_handle,
-                    qp_attr.cap.max_send_wr, qp_attr.cap.max_send_sge,
-                    qp_attr.cap.max_recv_wr, qp_attr.cap.max_recv_sge);
-
-       if (ibv_modify_qp(ep_ptr->qp_handle, &qp_attr, IBV_QP_CAP)) {
-               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                            "modify_qp: modify ep %p qp %p failed\n",
-                            ep_ptr, ep_ptr->qp_handle);
-               return (dapl_convert_errno(errno, "modify_qp_state"));
-       }
-
-       return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_reinit_ep
- *
- * Move the QP to INIT state again.
- *
- * Input:
- *     ep_ptr          DAPL_EP
- *
- * Output:
- *     none
- *
- * Returns:
- *     void
- *
- */
-#if defined(_WIN32) || defined(_WIN64) || defined(_OPENIB_CMA_)
-void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr)
-{
-       /* work around bug in low level driver - 3/24/09 */
-       /* RTS -> RESET -> INIT -> ERROR QP transition crashes system */
-       if (ep_ptr->qp_handle != IB_INVALID_HANDLE) {
-               dapls_ib_qp_free(ep_ptr->header.owner_ia, ep_ptr);
-               dapls_ib_qp_alloc(ep_ptr->header.owner_ia, ep_ptr, ep_ptr);
-       }
-}
-#else                          // _WIN32 || _WIN64
-void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr)
-{
-       if (ep_ptr->qp_handle != IB_INVALID_HANDLE &&
-           ep_ptr->qp_handle->qp_type != IBV_QPT_UD) {
-               /* move to RESET state and then to INIT */
-               dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_RESET,0,0,0);
-               dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_INIT,0,0,0);
-       }
-}
-#endif                         // _WIN32 || _WIN64
-
-/* 
- * Generic QP modify for init, reset, error, RTS, RTR
- * For UD, create_ah on RTR, qkey on INIT
- * CM msg provides QP attributes, info in network order
- */
-DAT_RETURN
-dapls_modify_qp_state(IN ib_qp_handle_t                qp_handle,
-                     IN ib_qp_state_t          qp_state, 
-                     IN uint32_t               qpn,
-                     IN uint16_t               lid,
-                     IN ib_gid_handle_t        gid)
-{
-       struct ibv_qp_attr qp_attr;
-       enum ibv_qp_attr_mask mask = IBV_QP_STATE;
-       DAPL_EP *ep_ptr = (DAPL_EP *) qp_handle->qp_context;
-       DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;
-       int ret;
-
-       dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
-       qp_attr.qp_state = qp_state;
-       
-       switch (qp_state) {
-       case IBV_QPS_RTR:
-               dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                               " QPS_RTR: type %d qpn 0x%x lid 0x%x"
-                               " port %d ep %p qp_state %d \n",
-                               qp_handle->qp_type, 
-                               ntohl(qpn), ntohs(lid), 
-                               ia_ptr->hca_ptr->port_num,
-                               ep_ptr, ep_ptr->qp_state);
-
-               mask |= IBV_QP_AV |
-                       IBV_QP_PATH_MTU |
-                       IBV_QP_DEST_QPN |
-                       IBV_QP_RQ_PSN |
-                       IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER;
-
-               qp_attr.dest_qp_num = ntohl(qpn);
-               qp_attr.rq_psn = 1;
-               qp_attr.path_mtu = ia_ptr->hca_ptr->ib_trans.mtu;
-               qp_attr.max_dest_rd_atomic =
-                       ep_ptr->param.ep_attr.max_rdma_read_out;
-               qp_attr.min_rnr_timer =
-                       ia_ptr->hca_ptr->ib_trans.rnr_timer;
-
-               /* address handle. RC and UD */
-               qp_attr.ah_attr.dlid = ntohs(lid);
-               if (ia_ptr->hca_ptr->ib_trans.global) {
-                       qp_attr.ah_attr.is_global = 1;
-                       qp_attr.ah_attr.grh.dgid.global.subnet_prefix = 
-                               ntohll(gid->global.subnet_prefix);
-                       qp_attr.ah_attr.grh.dgid.global.interface_id = 
-                               ntohll(gid->global.interface_id);
-                       qp_attr.ah_attr.grh.hop_limit =
-                               ia_ptr->hca_ptr->ib_trans.hop_limit;
-                       qp_attr.ah_attr.grh.traffic_class =
-                               ia_ptr->hca_ptr->ib_trans.tclass;
-               }
-               qp_attr.ah_attr.sl = 0;
-               qp_attr.ah_attr.src_path_bits = 0;
-               qp_attr.ah_attr.port_num = ia_ptr->hca_ptr->port_num;
-
-               /* UD: already in RTR, RTS state */
-               if (qp_handle->qp_type == IBV_QPT_UD) {
-                       mask = IBV_QP_STATE;
-                       if (ep_ptr->qp_state == IBV_QPS_RTR ||
-                               ep_ptr->qp_state == IBV_QPS_RTS)
-                               return DAT_SUCCESS;
-               }
-               break;
-       case IBV_QPS_RTS:
-               if (qp_handle->qp_type == IBV_QPT_RC) {
-                       mask |= IBV_QP_SQ_PSN |
-                               IBV_QP_TIMEOUT |
-                               IBV_QP_RETRY_CNT |
-                               IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC;
-                       qp_attr.timeout =
-                               ia_ptr->hca_ptr->ib_trans.ack_timer;
-                       qp_attr.retry_cnt =
-                               ia_ptr->hca_ptr->ib_trans.ack_retry;
-                       qp_attr.rnr_retry =
-                               ia_ptr->hca_ptr->ib_trans.rnr_retry;
-                       qp_attr.max_rd_atomic =
-                               ep_ptr->param.ep_attr.max_rdma_read_out;
-               }
-               /* RC and UD */
-               qp_attr.qp_state = IBV_QPS_RTS;
-               qp_attr.sq_psn = 1;
-
-               dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                               " QPS_RTS: psn %x rd_atomic %d ack %d "
-                               " retry %d rnr_retry %d ep %p qp_state %d\n",
-                               qp_attr.sq_psn, qp_attr.max_rd_atomic,
-                               qp_attr.timeout, qp_attr.retry_cnt,
-                               qp_attr.rnr_retry, ep_ptr,
-                               ep_ptr->qp_state);
-
-               if (qp_handle->qp_type == IBV_QPT_UD) {
-                       /* already RTS, multi remote AH's on QP */
-                       if (ep_ptr->qp_state == IBV_QPS_RTS)
-                               return DAT_SUCCESS;
-                       else
-                               mask = IBV_QP_STATE | IBV_QP_SQ_PSN;
-               }
-               break;
-       case IBV_QPS_INIT:
-               mask |= IBV_QP_PKEY_INDEX | IBV_QP_PORT;
-               if (qp_handle->qp_type == IBV_QPT_RC) {
-                       mask |= IBV_QP_ACCESS_FLAGS;
-                       qp_attr.qp_access_flags =
-                               IBV_ACCESS_LOCAL_WRITE |
-                               IBV_ACCESS_REMOTE_WRITE |
-                               IBV_ACCESS_REMOTE_READ |
-                               IBV_ACCESS_REMOTE_ATOMIC |
-                               IBV_ACCESS_MW_BIND;
-               }
-
-               if (qp_handle->qp_type == IBV_QPT_UD) {
-                       /* already INIT, multi remote AH's on QP */
-                       if (ep_ptr->qp_state == IBV_QPS_INIT)
-                               return DAT_SUCCESS;
-                       mask |= IBV_QP_QKEY;
-                       qp_attr.qkey = DAT_UD_QKEY;
-               }
-
-               qp_attr.pkey_index = 0;
-               qp_attr.port_num = ia_ptr->hca_ptr->port_num;
-
-               dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                               " QPS_INIT: pi %x port %x acc %x qkey 0x%x\n",
-                               qp_attr.pkey_index, qp_attr.port_num,
-                               qp_attr.qp_access_flags, qp_attr.qkey);
-               break;
-       default:
-               break;
-       }
-
-       ret = ibv_modify_qp(qp_handle, &qp_attr, mask);
-       if (ret == 0) {
-               ep_ptr->qp_state = qp_state;
-               return DAT_SUCCESS;
-       } else {
-               return (dapl_convert_errno(errno, "modify_qp_state"));
-       }
-}
-
-/* Modify UD type QP from init, rtr, rts, info network order */
-DAT_RETURN 
-dapls_modify_qp_ud(IN DAPL_HCA *hca, IN ib_qp_handle_t qp)
-{
-       struct ibv_qp_attr qp_attr;
-
-       /* modify QP, setup and prepost buffers */
-       dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
-       qp_attr.qp_state = IBV_QPS_INIT;
-        qp_attr.pkey_index = 0;
-        qp_attr.port_num = hca->port_num;
-        qp_attr.qkey = DAT_UD_QKEY;
-       if (ibv_modify_qp(qp, &qp_attr, 
-                         IBV_QP_STATE          |
-                         IBV_QP_PKEY_INDEX     |
-                          IBV_QP_PORT          |
-                          IBV_QP_QKEY)) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                       " modify_ud_qp INIT: ERR %s\n", strerror(errno));
-               return (dapl_convert_errno(errno, "modify_qp"));
-       }
-       dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
-       qp_attr.qp_state = IBV_QPS_RTR;
-       if (ibv_modify_qp(qp, &qp_attr,IBV_QP_STATE)) {
-               dapl_log(DAPL_DBG_TYPE_ERR, 
-                       " modify_ud_qp RTR: ERR %s\n", strerror(errno));
-               return (dapl_convert_errno(errno, "modify_qp"));
-       }
-       dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
-       qp_attr.qp_state = IBV_QPS_RTS;
-       qp_attr.sq_psn = 1;
-       if (ibv_modify_qp(qp, &qp_attr, 
-                         IBV_QP_STATE | IBV_QP_SQ_PSN)) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                       " modify_ud_qp RTS: ERR %s\n", strerror(errno));
-               return (dapl_convert_errno(errno, "modify_qp"));
-       }
-       return DAT_SUCCESS;
-}
-
-/* Create address handle for remote QP, info in network order */
-ib_ah_handle_t 
-dapls_create_ah(IN DAPL_HCA            *hca,
-               IN ib_pd_handle_t       pd,
-               IN ib_qp_handle_t       qp,
-               IN uint16_t             lid,
-               IN ib_gid_handle_t      gid)
-{
-       struct ibv_qp_attr qp_attr;
-       ib_ah_handle_t  ah;
-
-       if (qp->qp_type != IBV_QPT_UD)
-               return NULL;
-
-       dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
-       qp_attr.qp_state = IBV_QP_STATE;
-
-       /* address handle. RC and UD */
-       qp_attr.ah_attr.dlid = ntohs(lid);
-       if (gid != NULL) {
-               dapl_log(DAPL_DBG_TYPE_CM, "dapl_create_ah: with GID\n");
-               qp_attr.ah_attr.is_global = 1;
-               qp_attr.ah_attr.grh.dgid.global.subnet_prefix = 
-                               ntohll(gid->global.subnet_prefix);
-               qp_attr.ah_attr.grh.dgid.global.interface_id = 
-                               ntohll(gid->global.interface_id);
-               qp_attr.ah_attr.grh.hop_limit = hca->ib_trans.hop_limit;
-               qp_attr.ah_attr.grh.traffic_class = hca->ib_trans.tclass;
-       }
-       qp_attr.ah_attr.sl = 0;
-       qp_attr.ah_attr.src_path_bits = 0;
-       qp_attr.ah_attr.port_num = hca->port_num;
-
-       dapl_log(DAPL_DBG_TYPE_CM, 
-                       " dapls_create_ah: port %x lid %x pd %p ctx %p handle 0x%x\n", 
-                       hca->port_num,qp_attr.ah_attr.dlid, pd, pd->context, pd->handle);
-
-       /* UD: create AH for remote side */
-       ah = ibv_create_ah(pd, &qp_attr.ah_attr);
-       if (!ah) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                       " create_ah: ERR %s\n", strerror(errno));
-               return NULL;
-       }
-
-       dapl_log(DAPL_DBG_TYPE_CM, 
-                       " dapls_create_ah: AH %p for lid %x\n", 
-                       ah, qp_attr.ah_attr.dlid);
-
-       return ah;
-}
-
-/*
- * Local variables:
- *  c-indent-level: 4
- *  c-basic-offset: 4
- *  tab-width: 8
- * End:
- */
+/*\r
+ * This Software is licensed under one of the following licenses:\r
+ *\r
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is\r
+ *    available from the Open Source Initiative, see\r
+ *    http://www.opensource.org/licenses/cpl.php.\r
+ *\r
+ * 2) under the terms of the "The BSD License" a copy of which is\r
+ *    available from the Open Source Initiative, see\r
+ *    http://www.opensource.org/licenses/bsd-license.php.\r
+ *\r
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a\r
+ *    copy of which is available from the Open Source Initiative, see\r
+ *    http://www.opensource.org/licenses/gpl-license.php.\r
+ *\r
+ * Licensee has the right to choose one of the above licenses.\r
+ *\r
+ * Redistributions of source code must retain the above copyright\r
+ * notice and one of the license notices.\r
+ *\r
+ * Redistributions in binary form must reproduce both the above copyright\r
+ * notice, one of the license notices in the documentation\r
+ * and/or other materials provided with the distribution.\r
+ */\r
+#include "dapl.h"\r
+#include "dapl_adapter_util.h"\r
+\r
+/*\r
+ * dapl_ib_qp_alloc\r
+ *\r
+ * Alloc a QP\r
+ *\r
+ * Input:\r
+ *     *ep_ptr         pointer to EP INFO\r
+ *     ib_hca_handle   provider HCA handle\r
+ *     ib_pd_handle    provider protection domain handle\r
+ *     cq_recv         provider recv CQ handle\r
+ *     cq_send         provider send CQ handle\r
+ *\r
+ * Output:\r
+ *     none\r
+ *\r
+ * Returns:\r
+ *     DAT_SUCCESS\r
+ *     DAT_INSUFFICIENT_RESOURCES\r
+ *     DAT_INTERNAL_ERROR\r
+ *\r
+ */\r
+DAT_RETURN\r
+dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr,\r
+                 IN DAPL_EP * ep_ptr, IN DAPL_EP * ep_ctx_ptr)\r
+{\r
+       DAT_EP_ATTR *attr;\r
+       DAPL_EVD *rcv_evd, *req_evd;\r
+       ib_cq_handle_t rcv_cq, req_cq;\r
+       ib_pd_handle_t ib_pd_handle;\r
+       struct ibv_qp_init_attr qp_create;\r
+#ifdef _OPENIB_CMA_\r
+       dp_ib_cm_handle_t conn;\r
+#endif\r
+       dapl_dbg_log(DAPL_DBG_TYPE_EP,\r
+                    " qp_alloc: ia_ptr %p ep_ptr %p ep_ctx_ptr %p\n",\r
+                    ia_ptr, ep_ptr, ep_ctx_ptr);\r
+\r
+       attr = &ep_ptr->param.ep_attr;\r
+       ib_pd_handle = ((DAPL_PZ *) ep_ptr->param.pz_handle)->pd_handle;\r
+       rcv_evd = (DAPL_EVD *) ep_ptr->param.recv_evd_handle;\r
+       req_evd = (DAPL_EVD *) ep_ptr->param.request_evd_handle;\r
+\r
+       /* \r
+        * DAT allows usage model of EP's with no EVD's but IB does not. \r
+        * Create a CQ with zero entries under the covers to support and \r
+        * catch any invalid posting. \r
+        */\r
+       if (rcv_evd != DAT_HANDLE_NULL)\r
+               rcv_cq = rcv_evd->ib_cq_handle;\r
+       else if (!ia_ptr->hca_ptr->ib_trans.ib_cq_empty)\r
+               rcv_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;\r
+       else {\r
+               struct ibv_comp_channel *channel;\r
+\r
+               channel = ibv_create_comp_channel(ia_ptr->hca_ptr->ib_hca_handle);\r
+               if (!channel)\r
+                       return (dapl_convert_errno(ENOMEM, "create_cq"));\r
+                 \r
+               /* Call IB verbs to create CQ */\r
+               rcv_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,\r
+                                      0, NULL, channel, 0);\r
+\r
+               if (rcv_cq == IB_INVALID_HANDLE) {\r
+                       ibv_destroy_comp_channel(channel);\r
+                       return (dapl_convert_errno(ENOMEM, "create_cq"));\r
+               }\r
+\r
+               ia_ptr->hca_ptr->ib_trans.ib_cq_empty = rcv_cq;\r
+       }\r
+       if (req_evd != DAT_HANDLE_NULL)\r
+               req_cq = req_evd->ib_cq_handle;\r
+       else\r
+               req_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;\r
+\r
+       /* \r
+        * IMPLEMENTATION NOTE:\r
+        * uDAPL allows consumers to post buffers on the EP after creation\r
+        * and before a connect request (outbound and inbound). This forces\r
+        * a binding to a device during the hca_open call and requires the\r
+        * consumer to predetermine which device to listen on or connect from.\r
+        * This restriction eliminates any option of listening or connecting \r
+        * over multiple devices. uDAPL should add API's to resolve addresses \r
+        * and bind to the device at the approriate time (before connect \r
+        * and after CR arrives). Discovery should happen at connection time \r
+        * based on addressing and not on static configuration during open.\r
+        */\r
+\r
+#ifdef _OPENIB_CMA_\r
+       /* Allocate CM and initialize lock */\r
+       if ((conn = dapls_ib_cm_create(ep_ptr)) == NULL)\r
+               return (dapl_convert_errno(ENOMEM, "create_cq"));\r
+\r
+       /* open identifies the local device; per DAT specification */\r
+       if (rdma_bind_addr(conn->cm_id,\r
+                          (struct sockaddr *)&ia_ptr->hca_ptr->hca_address))\r
+               return (dapl_convert_errno(EAFNOSUPPORT, "create_cq"));\r
+#endif\r
+       /* Setup attributes and create qp */\r
+       dapl_os_memzero((void *)&qp_create, sizeof(qp_create));\r
+       qp_create.send_cq = req_cq;\r
+       qp_create.cap.max_send_wr = attr->max_request_dtos;\r
+       qp_create.cap.max_send_sge = attr->max_request_iov;\r
+       qp_create.cap.max_inline_data =\r
+           ia_ptr->hca_ptr->ib_trans.max_inline_send;\r
+       qp_create.qp_type = IBV_QPT_RC;\r
+       qp_create.qp_context = (void *)ep_ptr;\r
+\r
+#ifdef DAT_EXTENSIONS \r
+       if (attr->service_type == DAT_IB_SERVICE_TYPE_UD) {\r
+#ifdef _OPENIB_CMA_\r
+               return (DAT_NOT_IMPLEMENTED);\r
+#endif\r
+               qp_create.qp_type = IBV_QPT_UD;\r
+               if (attr->max_message_size >\r
+                   (128 << ia_ptr->hca_ptr->ib_trans.mtu)) {\r
+                       return (DAT_INVALID_PARAMETER | DAT_INVALID_ARG6);\r
+               }\r
+       }\r
+#endif\r
+       \r
+       /* ibv assumes rcv_cq is never NULL, set to req_cq */\r
+       if (rcv_cq == NULL) {\r
+               qp_create.recv_cq = req_cq;\r
+               qp_create.cap.max_recv_wr = 0;\r
+               qp_create.cap.max_recv_sge = 0;\r
+       } else {\r
+               qp_create.recv_cq = rcv_cq;\r
+               qp_create.cap.max_recv_wr = attr->max_recv_dtos;\r
+               qp_create.cap.max_recv_sge = attr->max_recv_iov;\r
+       }\r
+\r
+#ifdef _OPENIB_CMA_\r
+       if (rdma_create_qp(conn->cm_id, ib_pd_handle, &qp_create)) {\r
+               dapls_ib_cm_free(conn, ep_ptr);\r
+               return (dapl_convert_errno(errno, "create_qp"));\r
+       }\r
+       ep_ptr->qp_handle = conn->cm_id->qp;\r
+       ep_ptr->cm_handle = conn;\r
+       ep_ptr->qp_state = IBV_QPS_INIT;\r
+\r
+       ep_ptr->param.local_port_qual = rdma_get_src_port(conn->cm_id);\r
+#else\r
+       ep_ptr->qp_handle = ibv_create_qp(ib_pd_handle, &qp_create);\r
+       if (!ep_ptr->qp_handle)\r
+               return (dapl_convert_errno(ENOMEM, "create_qp"));\r
+               \r
+       /* Setup QP attributes for INIT state on the way out */\r
+       if (dapls_modify_qp_state(ep_ptr->qp_handle,\r
+                                 IBV_QPS_INIT, 0, 0, 0) != DAT_SUCCESS) {\r
+               ibv_destroy_qp(ep_ptr->qp_handle);\r
+               ep_ptr->qp_handle = IB_INVALID_HANDLE;\r
+               return DAT_INTERNAL_ERROR;\r
+       }\r
+#endif\r
+       dapl_dbg_log(DAPL_DBG_TYPE_EP,\r
+                    " qp_alloc: qpn %p sq %d,%d rq %d,%d\n",\r
+                    ep_ptr->qp_handle->qp_num,\r
+                    qp_create.cap.max_send_wr, qp_create.cap.max_send_sge,\r
+                    qp_create.cap.max_recv_wr, qp_create.cap.max_recv_sge);\r
+\r
+       return DAT_SUCCESS;\r
+}\r
+\r
+/*\r
+ * dapl_ib_qp_free\r
+ *\r
+ * Free a QP\r
+ *\r
+ * Input:\r
+ *     ia_handle       IA handle\r
+ *     *ep_ptr         pointer to EP INFO\r
+ *\r
+ * Output:\r
+ *     none\r
+ *\r
+ * Returns:\r
+ *     DAT_SUCCESS\r
+ *  dapl_convert_errno\r
+ *\r
+ */\r
+DAT_RETURN dapls_ib_qp_free(IN DAPL_IA * ia_ptr, IN DAPL_EP * ep_ptr)\r
+{\r
+       dapl_dbg_log(DAPL_DBG_TYPE_EP, " qp_free:  ep_ptr %p qp %p\n",\r
+                    ep_ptr, ep_ptr->qp_handle);\r
+\r
+       if (ep_ptr->cm_handle != NULL) {\r
+               dapls_ib_cm_free(ep_ptr->cm_handle, ep_ptr);\r
+       }\r
+       \r
+       if (ep_ptr->qp_handle != NULL) {\r
+               /* force error state to flush queue, then destroy */\r
+               dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, 0,0,0);\r
+\r
+               if (ibv_destroy_qp(ep_ptr->qp_handle))\r
+                       return (dapl_convert_errno(errno, "destroy_qp"));\r
+\r
+               ep_ptr->qp_handle = NULL;\r
+       }\r
+\r
+#ifdef DAT_EXTENSIONS\r
+       /* UD endpoints can have many CR associations and will not\r
+        * set ep->cm_handle. Call provider with cm_ptr null to incidate\r
+        * UD type multi CR's for this EP. It will parse internal list\r
+        * and cleanup all associations.\r
+        */\r
+       if (ep_ptr->param.ep_attr.service_type == DAT_IB_SERVICE_TYPE_UD) \r
+               dapls_ib_cm_free(NULL, ep_ptr);\r
+#endif\r
+\r
+       return DAT_SUCCESS;\r
+}\r
+\r
+/*\r
+ * dapl_ib_qp_modify\r
+ *\r
+ * Set the QP to the parameters specified in an EP_PARAM\r
+ *\r
+ * The EP_PARAM structure that is provided has been\r
+ * sanitized such that only non-zero values are valid.\r
+ *\r
+ * Input:\r
+ *     ib_hca_handle           HCA handle\r
+ *     qp_handle               QP handle\r
+ *     ep_attr                 Sanitized EP Params\r
+ *\r
+ * Output:\r
+ *     none\r
+ *\r
+ * Returns:\r
+ *     DAT_SUCCESS\r
+ *     DAT_INSUFFICIENT_RESOURCES\r
+ *     DAT_INVALID_PARAMETER\r
+ *\r
+ */\r
+DAT_RETURN\r
+dapls_ib_qp_modify(IN DAPL_IA * ia_ptr,\r
+                  IN DAPL_EP * ep_ptr, IN DAT_EP_ATTR * attr)\r
+{\r
+       struct ibv_qp_attr qp_attr;\r
+\r
+       if (ep_ptr->qp_handle == IB_INVALID_HANDLE)\r
+               return DAT_INVALID_PARAMETER;\r
+\r
+       /* \r
+        * EP state, qp_handle state should be an indication\r
+        * of current state but the only way to be sure is with\r
+        * a user mode ibv_query_qp call which is NOT available \r
+        */\r
+\r
+       /* move to error state if necessary */\r
+       if ((ep_ptr->qp_state == IB_QP_STATE_ERROR) &&\r
+           (ep_ptr->qp_handle->state != IBV_QPS_ERR)) {\r
+               return (dapls_modify_qp_state(ep_ptr->qp_handle, \r
+                                             IBV_QPS_ERR, 0, 0, 0));\r
+       }\r
+\r
+       /*\r
+        * Check if we have the right qp_state to modify attributes\r
+        */\r
+       if ((ep_ptr->qp_handle->state != IBV_QPS_RTR) &&\r
+           (ep_ptr->qp_handle->state != IBV_QPS_RTS))\r
+               return DAT_INVALID_STATE;\r
+\r
+       /* Adjust to current EP attributes */\r
+       dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));\r
+       qp_attr.cap.max_send_wr = attr->max_request_dtos;\r
+       qp_attr.cap.max_recv_wr = attr->max_recv_dtos;\r
+       qp_attr.cap.max_send_sge = attr->max_request_iov;\r
+       qp_attr.cap.max_recv_sge = attr->max_recv_iov;\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_EP,\r
+                    "modify_qp: qp %p sq %d,%d, rq %d,%d\n",\r
+                    ep_ptr->qp_handle,\r
+                    qp_attr.cap.max_send_wr, qp_attr.cap.max_send_sge,\r
+                    qp_attr.cap.max_recv_wr, qp_attr.cap.max_recv_sge);\r
+\r
+       if (ibv_modify_qp(ep_ptr->qp_handle, &qp_attr, IBV_QP_CAP)) {\r
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR,\r
+                            "modify_qp: modify ep %p qp %p failed\n",\r
+                            ep_ptr, ep_ptr->qp_handle);\r
+               return (dapl_convert_errno(errno, "modify_qp_state"));\r
+       }\r
+\r
+       return DAT_SUCCESS;\r
+}\r
+\r
+/*\r
+ * dapls_ib_reinit_ep\r
+ *\r
+ * Move the QP to INIT state again.\r
+ *\r
+ * Input:\r
+ *     ep_ptr          DAPL_EP\r
+ *\r
+ * Output:\r
+ *     none\r
+ *\r
+ * Returns:\r
+ *     void\r
+ *\r
+ */\r
+#if defined(_WIN32) || defined(_WIN64) || defined(_OPENIB_CMA_)\r
+void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr)\r
+{\r
+       /* work around bug in low level driver - 3/24/09 */\r
+       /* RTS -> RESET -> INIT -> ERROR QP transition crashes system */\r
+       if (ep_ptr->qp_handle != IB_INVALID_HANDLE) {\r
+               dapls_ib_qp_free(ep_ptr->header.owner_ia, ep_ptr);\r
+               dapls_ib_qp_alloc(ep_ptr->header.owner_ia, ep_ptr, ep_ptr);\r
+       }\r
+}\r
+#else                          // _WIN32 || _WIN64\r
+void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr)\r
+{\r
+       if (ep_ptr->qp_handle != IB_INVALID_HANDLE &&\r
+           ep_ptr->qp_handle->qp_type != IBV_QPT_UD) {\r
+               /* move to RESET state and then to INIT */\r
+               dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_RESET,0,0,0);\r
+               dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_INIT,0,0,0);\r
+       }\r
+}\r
+#endif                         // _WIN32 || _WIN64\r
+\r
+/* \r
+ * Generic QP modify for init, reset, error, RTS, RTR\r
+ * For UD, create_ah on RTR, qkey on INIT\r
+ * CM msg provides QP attributes, info in network order\r
+ */\r
+DAT_RETURN\r
+dapls_modify_qp_state(IN ib_qp_handle_t                qp_handle,\r
+                     IN ib_qp_state_t          qp_state, \r
+                     IN uint32_t               qpn,\r
+                     IN uint16_t               lid,\r
+                     IN ib_gid_handle_t        gid)\r
+{\r
+       struct ibv_qp_attr qp_attr;\r
+       enum ibv_qp_attr_mask mask = IBV_QP_STATE;\r
+       DAPL_EP *ep_ptr = (DAPL_EP *) qp_handle->qp_context;\r
+       DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;\r
+       int ret;\r
+\r
+       dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));\r
+       qp_attr.qp_state = qp_state;\r
+       \r
+       switch (qp_state) {\r
+       case IBV_QPS_RTR:\r
+               dapl_dbg_log(DAPL_DBG_TYPE_EP,\r
+                               " QPS_RTR: type %d qpn 0x%x lid 0x%x"\r
+                               " port %d ep %p qp_state %d \n",\r
+                               qp_handle->qp_type, \r
+                               ntohl(qpn), ntohs(lid), \r
+                               ia_ptr->hca_ptr->port_num,\r
+                               ep_ptr, ep_ptr->qp_state);\r
+\r
+               mask |= IBV_QP_AV |\r
+                       IBV_QP_PATH_MTU |\r
+                       IBV_QP_DEST_QPN |\r
+                       IBV_QP_RQ_PSN |\r
+                       IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER;\r
+\r
+               qp_attr.dest_qp_num = ntohl(qpn);\r
+               qp_attr.rq_psn = 1;\r
+               qp_attr.path_mtu = ia_ptr->hca_ptr->ib_trans.mtu;\r
+               qp_attr.max_dest_rd_atomic =\r
+                       ep_ptr->param.ep_attr.max_rdma_read_out;\r
+               qp_attr.min_rnr_timer =\r
+                       ia_ptr->hca_ptr->ib_trans.rnr_timer;\r
+\r
+               /* address handle. RC and UD */\r
+               qp_attr.ah_attr.dlid = ntohs(lid);\r
+               if (ia_ptr->hca_ptr->ib_trans.global) {\r
+                       qp_attr.ah_attr.is_global = 1;\r
+                       qp_attr.ah_attr.grh.dgid.global.subnet_prefix = \r
+                               ntohll(gid->global.subnet_prefix);\r
+                       qp_attr.ah_attr.grh.dgid.global.interface_id = \r
+                               ntohll(gid->global.interface_id);\r
+                       qp_attr.ah_attr.grh.hop_limit =\r
+                               ia_ptr->hca_ptr->ib_trans.hop_limit;\r
+                       qp_attr.ah_attr.grh.traffic_class =\r
+                               ia_ptr->hca_ptr->ib_trans.tclass;\r
+               }\r
+               qp_attr.ah_attr.sl = 0;\r
+               qp_attr.ah_attr.src_path_bits = 0;\r
+               qp_attr.ah_attr.port_num = ia_ptr->hca_ptr->port_num;\r
+\r
+               /* UD: already in RTR, RTS state */\r
+               if (qp_handle->qp_type == IBV_QPT_UD) {\r
+                       mask = IBV_QP_STATE;\r
+                       if (ep_ptr->qp_state == IBV_QPS_RTR ||\r
+                               ep_ptr->qp_state == IBV_QPS_RTS)\r
+                               return DAT_SUCCESS;\r
+               }\r
+               break;\r
+       case IBV_QPS_RTS:\r
+               if (qp_handle->qp_type == IBV_QPT_RC) {\r
+                       mask |= IBV_QP_SQ_PSN |\r
+                               IBV_QP_TIMEOUT |\r
+                               IBV_QP_RETRY_CNT |\r
+                               IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC;\r
+                       qp_attr.timeout =\r
+                               ia_ptr->hca_ptr->ib_trans.ack_timer;\r
+                       qp_attr.retry_cnt =\r
+                               ia_ptr->hca_ptr->ib_trans.ack_retry;\r
+                       qp_attr.rnr_retry =\r
+                               ia_ptr->hca_ptr->ib_trans.rnr_retry;\r
+                       qp_attr.max_rd_atomic =\r
+                               ep_ptr->param.ep_attr.max_rdma_read_out;\r
+               }\r
+               /* RC and UD */\r
+               qp_attr.qp_state = IBV_QPS_RTS;\r
+               qp_attr.sq_psn = 1;\r
+\r
+               dapl_dbg_log(DAPL_DBG_TYPE_EP,\r
+                               " QPS_RTS: psn %x rd_atomic %d ack %d "\r
+                               " retry %d rnr_retry %d ep %p qp_state %d\n",\r
+                               qp_attr.sq_psn, qp_attr.max_rd_atomic,\r
+                               qp_attr.timeout, qp_attr.retry_cnt,\r
+                               qp_attr.rnr_retry, ep_ptr,\r
+                               ep_ptr->qp_state);\r
+\r
+               if (qp_handle->qp_type == IBV_QPT_UD) {\r
+                       /* already RTS, multi remote AH's on QP */\r
+                       if (ep_ptr->qp_state == IBV_QPS_RTS)\r
+                               return DAT_SUCCESS;\r
+                       else\r
+                               mask = IBV_QP_STATE | IBV_QP_SQ_PSN;\r
+               }\r
+               break;\r
+       case IBV_QPS_INIT:\r
+               mask |= IBV_QP_PKEY_INDEX | IBV_QP_PORT;\r
+               if (qp_handle->qp_type == IBV_QPT_RC) {\r
+                       mask |= IBV_QP_ACCESS_FLAGS;\r
+                       qp_attr.qp_access_flags =\r
+                               IBV_ACCESS_LOCAL_WRITE |\r
+                               IBV_ACCESS_REMOTE_WRITE |\r
+                               IBV_ACCESS_REMOTE_READ |\r
+                               IBV_ACCESS_REMOTE_ATOMIC |\r
+                               IBV_ACCESS_MW_BIND;\r
+               }\r
+\r
+               if (qp_handle->qp_type == IBV_QPT_UD) {\r
+                       /* already INIT, multi remote AH's on QP */\r
+                       if (ep_ptr->qp_state == IBV_QPS_INIT)\r
+                               return DAT_SUCCESS;\r
+                       mask |= IBV_QP_QKEY;\r
+                       qp_attr.qkey = DAT_UD_QKEY;\r
+               }\r
+\r
+               qp_attr.pkey_index = 0;\r
+               qp_attr.port_num = ia_ptr->hca_ptr->port_num;\r
+\r
+               dapl_dbg_log(DAPL_DBG_TYPE_EP,\r
+                               " QPS_INIT: pi %x port %x acc %x qkey 0x%x\n",\r
+                               qp_attr.pkey_index, qp_attr.port_num,\r
+                               qp_attr.qp_access_flags, qp_attr.qkey);\r
+               break;\r
+       default:\r
+               break;\r
+       }\r
+\r
+       ret = ibv_modify_qp(qp_handle, &qp_attr, mask);\r
+       if (ret == 0) {\r
+               ep_ptr->qp_state = qp_state;\r
+               return DAT_SUCCESS;\r
+       } else {\r
+               return (dapl_convert_errno(errno, "modify_qp_state"));\r
+       }\r
+}\r
+\r
+/* Modify UD type QP from init, rtr, rts, info network order */\r
+DAT_RETURN \r
+dapls_modify_qp_ud(IN DAPL_HCA *hca, IN ib_qp_handle_t qp)\r
+{\r
+       struct ibv_qp_attr qp_attr;\r
+\r
+       /* modify QP, setup and prepost buffers */\r
+       dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));\r
+       qp_attr.qp_state = IBV_QPS_INIT;\r
+        qp_attr.pkey_index = 0;\r
+        qp_attr.port_num = hca->port_num;\r
+        qp_attr.qkey = DAT_UD_QKEY;\r
+       if (ibv_modify_qp(qp, &qp_attr, \r
+                         IBV_QP_STATE          |\r
+                         IBV_QP_PKEY_INDEX     |\r
+                          IBV_QP_PORT          |\r
+                          IBV_QP_QKEY)) {\r
+               dapl_log(DAPL_DBG_TYPE_ERR,\r
+                       " modify_ud_qp INIT: ERR %s\n", strerror(errno));\r
+               return (dapl_convert_errno(errno, "modify_qp"));\r
+       }\r
+       dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));\r
+       qp_attr.qp_state = IBV_QPS_RTR;\r
+       if (ibv_modify_qp(qp, &qp_attr,IBV_QP_STATE)) {\r
+               dapl_log(DAPL_DBG_TYPE_ERR, \r
+                       " modify_ud_qp RTR: ERR %s\n", strerror(errno));\r
+               return (dapl_convert_errno(errno, "modify_qp"));\r
+       }\r
+       dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));\r
+       qp_attr.qp_state = IBV_QPS_RTS;\r
+       qp_attr.sq_psn = 1;\r
+       if (ibv_modify_qp(qp, &qp_attr, \r
+                         IBV_QP_STATE | IBV_QP_SQ_PSN)) {\r
+               dapl_log(DAPL_DBG_TYPE_ERR,\r
+                       " modify_ud_qp RTS: ERR %s\n", strerror(errno));\r
+               return (dapl_convert_errno(errno, "modify_qp"));\r
+       }\r
+       return DAT_SUCCESS;\r
+}\r
+\r
+/* Create address handle for remote QP, info in network order */\r
+ib_ah_handle_t \r
+dapls_create_ah(IN DAPL_HCA            *hca,\r
+               IN ib_pd_handle_t       pd,\r
+               IN ib_qp_handle_t       qp,\r
+               IN uint16_t             lid,\r
+               IN ib_gid_handle_t      gid)\r
+{\r
+       struct ibv_qp_attr qp_attr;\r
+       ib_ah_handle_t  ah;\r
+\r
+       if (qp->qp_type != IBV_QPT_UD)\r
+               return NULL;\r
+\r
+       dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));\r
+       qp_attr.qp_state = IBV_QP_STATE;\r
+\r
+       /* address handle. RC and UD */\r
+       qp_attr.ah_attr.dlid = ntohs(lid);\r
+       if (gid != NULL) {\r
+               dapl_log(DAPL_DBG_TYPE_CM, "dapl_create_ah: with GID\n");\r
+               qp_attr.ah_attr.is_global = 1;\r
+               qp_attr.ah_attr.grh.dgid.global.subnet_prefix = \r
+                               ntohll(gid->global.subnet_prefix);\r
+               qp_attr.ah_attr.grh.dgid.global.interface_id = \r
+                               ntohll(gid->global.interface_id);\r
+               qp_attr.ah_attr.grh.hop_limit = hca->ib_trans.hop_limit;\r
+               qp_attr.ah_attr.grh.traffic_class = hca->ib_trans.tclass;\r
+       }\r
+       qp_attr.ah_attr.sl = 0;\r
+       qp_attr.ah_attr.src_path_bits = 0;\r
+       qp_attr.ah_attr.port_num = hca->port_num;\r
+\r
+       dapl_log(DAPL_DBG_TYPE_CM, \r
+                       " dapls_create_ah: port %x lid %x pd %p ctx %p handle 0x%x\n", \r
+                       hca->port_num,qp_attr.ah_attr.dlid, pd, pd->context, pd->handle);\r
+\r
+       /* UD: create AH for remote side */\r
+       ah = ibv_create_ah(pd, &qp_attr.ah_attr);\r
+       if (!ah) {\r
+               dapl_log(DAPL_DBG_TYPE_ERR,\r
+                       " create_ah: ERR %s\n", strerror(errno));\r
+               return NULL;\r
+       }\r
+\r
+       dapl_log(DAPL_DBG_TYPE_CM, \r
+                       " dapls_create_ah: AH %p for lid %x\n", \r
+                       ah, qp_attr.ah_attr.dlid);\r
+\r
+       return ah;\r
+}\r
+\r
+/*\r
+ * Local variables:\r
+ *  c-indent-level: 4\r
+ *  c-basic-offset: 4\r
+ *  tab-width: 8\r
+ * End:\r
+ */\r
index dedcb16f3758d2ac8b8d5c14a4567c81c525b269..bb3893a72c3de5ade7fb323216f2f4a75e1f1872 100644 (file)
-/*
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- *    copy of which is available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/***************************************************************************
- *
- *   Module:            uDAPL
- *
- *   Filename:          dapl_ib_util.c
- *
- *   Author:            Arlin Davis
- *
- *   Created:           3/10/2005
- *
- *   Description: 
- *
- *   The uDAPL openib provider - init, open, close, utilities
- *
- ****************************************************************************
- *                Source Control System Information
- *
- *    $Id: $
- *
- *     Copyright (c) 2005 Intel Corporation.  All rights reserved.
- *
- **************************************************************************/
-#ifdef RCSID
-static const char rcsid[] = "$Id:  $";
-#endif
-
-#include "openib_osd.h"
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-#include "dapl_ib_util.h"
-#include "dapl_osd.h"
-
-#include <stdlib.h>
-
-ib_thread_state_t g_ib_thread_state = 0;
-DAPL_OS_THREAD g_ib_thread;
-DAPL_OS_LOCK g_hca_lock;
-struct dapl_llist_entry *g_hca_list;
-
-void dapli_thread(void *arg);
-DAT_RETURN  dapli_ib_thread_init(void);
-void dapli_ib_thread_destroy(void);
-
-#if defined(_WIN64) || defined(_WIN32)
-#include "..\..\..\..\..\etc\user\comp_channel.cpp"
-#include <rdma\winverbs.h>
-
-static COMP_SET ufds;
-
-static int dapls_os_init(void)
-{
-       return CompSetInit(&ufds);
-}
-
-static void dapls_os_release(void)
-{
-       CompSetCleanup(&ufds);
-}
-
-static int dapls_config_verbs(struct ibv_context *verbs)
-{
-       verbs->channel.Milliseconds = 0;
-       return 0;
-}
-
-static int dapls_config_comp_channel(struct ibv_comp_channel *channel)
-{
-       channel->comp_channel.Milliseconds = 0;
-       return 0;
-}
-
-static int dapls_thread_signal(void)
-{
-       CompSetCancel(&ufds);
-       return 0;
-}
-#else                          // _WIN64 || WIN32
-int g_ib_pipe[2];
-
-static int dapls_os_init(void)
-{
-       /* create pipe for waking up work thread */
-       return pipe(g_ib_pipe);
-}
-
-static void dapls_os_release(void)
-{
-       /* close pipe? */
-}
-
-static int dapls_config_fd(int fd)
-{
-       int opts;
-
-       opts = fcntl(fd, F_GETFL);
-       if (opts < 0 || fcntl(fd, F_SETFL, opts | O_NONBLOCK) < 0) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " dapls_config_fd: fcntl on fd %d ERR %d %s\n",
-                        fd, opts, strerror(errno));
-               return errno;
-       }
-
-       return 0;
-}
-
-static int dapls_config_verbs(struct ibv_context *verbs)
-{
-       return dapls_config_fd(verbs->async_fd);
-}
-
-static int dapls_config_comp_channel(struct ibv_comp_channel *channel)
-{
-       return dapls_config_fd(channel->fd);
-}
-
-static int dapls_thread_signal(void)
-{
-       return write(g_ib_pipe[1], "w", sizeof "w");
-}
-#endif
-
-
-static int32_t create_cr_pipe(IN DAPL_HCA * hca_ptr)
-{
-       DAPL_SOCKET listen_socket;
-       struct sockaddr_in addr;
-       socklen_t addrlen = sizeof(addr);
-       int ret;
-
-       listen_socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
-       if (listen_socket == DAPL_INVALID_SOCKET)
-               return 1;
-
-       memset(&addr, 0, sizeof addr);
-       addr.sin_family = AF_INET;
-       addr.sin_addr.s_addr = htonl(0x7f000001);
-       ret = bind(listen_socket, (struct sockaddr *)&addr, sizeof addr);
-       if (ret)
-               goto err1;
-
-       ret = getsockname(listen_socket, (struct sockaddr *)&addr, &addrlen);
-       if (ret)
-               goto err1;
-
-       ret = listen(listen_socket, 0);
-       if (ret)
-               goto err1;
-
-       hca_ptr->ib_trans.scm[1] = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
-       if (hca_ptr->ib_trans.scm[1] == DAPL_INVALID_SOCKET)
-               goto err1;
-
-       ret = connect(hca_ptr->ib_trans.scm[1], 
-                     (struct sockaddr *)&addr, sizeof(addr));
-       if (ret)
-               goto err2;
-
-       hca_ptr->ib_trans.scm[0] = accept(listen_socket, NULL, NULL);
-       if (hca_ptr->ib_trans.scm[0] == DAPL_INVALID_SOCKET)
-               goto err2;
-
-       closesocket(listen_socket);
-       return 0;
-
-      err2:
-       closesocket(hca_ptr->ib_trans.scm[1]);
-      err1:
-       closesocket(listen_socket);
-       return 1;
-}
-
-static void destroy_cr_pipe(IN DAPL_HCA * hca_ptr)
-{
-       closesocket(hca_ptr->ib_trans.scm[0]);
-       closesocket(hca_ptr->ib_trans.scm[1]);
-}
-
-
-/*
- * dapls_ib_init, dapls_ib_release
- *
- * Initialize Verb related items for device open
- *
- * Input:
- *     none
- *
- * Output:
- *     none
- *
- * Returns:
- *     0 success, -1 error
- *
- */
-int32_t dapls_ib_init(void)
-{
-       /* initialize hca_list */
-       dapl_os_lock_init(&g_hca_lock);
-       dapl_llist_init_head(&g_hca_list);
-
-       if (dapls_os_init())
-               return 1;
-
-       return 0;
-}
-
-int32_t dapls_ib_release(void)
-{
-       dapli_ib_thread_destroy();
-       dapls_os_release();
-       return 0;
-}
-
-/*
- * dapls_ib_open_hca
- *
- * Open HCA
- *
- * Input:
- *      *hca_name         pointer to provider device name
- *      *ib_hca_handle_p  pointer to provide HCA handle
- *
- * Output:
- *      none
- *
- * Return:
- *      DAT_SUCCESS
- *      dapl_convert_errno
- *
- */
-DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, IN DAPL_HCA * hca_ptr)
-{
-       struct ibv_device **dev_list;
-       struct ibv_port_attr port_attr;
-       int i;
-       DAT_RETURN dat_status;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " open_hca: %s - %p\n", hca_name, hca_ptr);
-
-       /* get the IP address of the device */
-       dat_status = getlocalipaddr((DAT_SOCK_ADDR *) &hca_ptr->hca_address,
-                                   sizeof(DAT_SOCK_ADDR6));
-       if (dat_status != DAT_SUCCESS)
-               return dat_status;
-
-#ifdef DAPL_DBG
-       /* DBG: unused port, set process id, lower 16 bits of pid */
-       ((struct sockaddr_in *)&hca_ptr->hca_address)->sin_port = 
-                                       htons((uint16_t)dapl_os_getpid());
-#endif
-        /* Get list of all IB devices, find match, open */
-       dev_list = ibv_get_device_list(NULL);
-       if (!dev_list) {
-               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                            " open_hca: ibv_get_device_list() failed\n",
-                            hca_name);
-               return DAT_INTERNAL_ERROR;
-       }
-
-       for (i = 0; dev_list[i]; ++i) {
-               hca_ptr->ib_trans.ib_dev = dev_list[i];
-               if (!strcmp(ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
-                           hca_name))
-                       goto found;
-       }
-
-       dapl_log(DAPL_DBG_TYPE_ERR,
-                " open_hca: device %s not found\n", hca_name);
-       goto err;
-
-found:
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " open_hca: Found dev %s %016llx\n",
-                    ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
-                    (unsigned long long)
-                    ntohll(ibv_get_device_guid(hca_ptr->ib_trans.ib_dev)));
-
-       hca_ptr->ib_hca_handle = ibv_open_device(hca_ptr->ib_trans.ib_dev);
-       if (!hca_ptr->ib_hca_handle) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " open_hca: dev open failed for %s, err=%s\n",
-                        ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
-                        strerror(errno));
-               goto err;
-       }
-       hca_ptr->ib_trans.ib_ctx = hca_ptr->ib_hca_handle;
-       dapls_config_verbs(hca_ptr->ib_hca_handle);
-
-       /* get lid for this hca-port, network order */
-       if (ibv_query_port(hca_ptr->ib_hca_handle,
-                          (uint8_t) hca_ptr->port_num, &port_attr)) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " open_hca: get lid ERR for %s, err=%s\n",
-                        ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
-                        strerror(errno));
-               goto err;
-       } else {
-               hca_ptr->ib_trans.lid = htons(port_attr.lid);
-       }
-
-       /* get gid for this hca-port, network order */
-       if (ibv_query_gid(hca_ptr->ib_hca_handle,
-                         (uint8_t) hca_ptr->port_num,
-                         0, &hca_ptr->ib_trans.gid)) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " open_hca: query GID ERR for %s, err=%s\n",
-                        ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
-                        strerror(errno));
-               goto err;
-       }
-
-       /* set RC tunables via enviroment or default */
-       hca_ptr->ib_trans.max_inline_send =
-           dapl_os_get_env_val("DAPL_MAX_INLINE", INLINE_SEND_DEFAULT);
-       hca_ptr->ib_trans.ack_retry =
-           dapl_os_get_env_val("DAPL_ACK_RETRY", SCM_ACK_RETRY);
-       hca_ptr->ib_trans.ack_timer =
-           dapl_os_get_env_val("DAPL_ACK_TIMER", SCM_ACK_TIMER);
-       hca_ptr->ib_trans.rnr_retry =
-           dapl_os_get_env_val("DAPL_RNR_RETRY", SCM_RNR_RETRY);
-       hca_ptr->ib_trans.rnr_timer =
-           dapl_os_get_env_val("DAPL_RNR_TIMER", SCM_RNR_TIMER);
-       hca_ptr->ib_trans.global =
-           dapl_os_get_env_val("DAPL_GLOBAL_ROUTING", SCM_GLOBAL);
-       hca_ptr->ib_trans.hop_limit =
-           dapl_os_get_env_val("DAPL_HOP_LIMIT", SCM_HOP_LIMIT);
-       hca_ptr->ib_trans.tclass =
-           dapl_os_get_env_val("DAPL_TCLASS", SCM_TCLASS);
-       hca_ptr->ib_trans.mtu =
-           dapl_ib_mtu(dapl_os_get_env_val("DAPL_IB_MTU", SCM_IB_MTU));
-
-
-       /* EVD events without direct CQ channels, CNO support */
-       hca_ptr->ib_trans.ib_cq =
-           ibv_create_comp_channel(hca_ptr->ib_hca_handle);
-       if (hca_ptr->ib_trans.ib_cq == NULL) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " open_hca: ibv_create_comp_channel ERR %s\n",
-                        strerror(errno));
-               goto bail;
-       }
-       dapls_config_comp_channel(hca_ptr->ib_trans.ib_cq);
-       
-       dat_status = dapli_ib_thread_init();
-       if (dat_status != DAT_SUCCESS) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " open_hca: failed to init cq thread lock\n");
-               goto bail;
-       }
-       /* 
-        * Put new hca_transport on list for async and CQ event processing 
-        * Wakeup work thread to add to polling list
-        */
-       dapl_llist_init_entry((DAPL_LLIST_ENTRY *)&hca_ptr->ib_trans.entry);
-       dapl_os_lock(&g_hca_lock);
-       dapl_llist_add_tail(&g_hca_list,
-                           (DAPL_LLIST_ENTRY *) &hca_ptr->ib_trans.entry,
-                           &hca_ptr->ib_trans.entry);
-       if (dapls_thread_signal() == -1)
-               dapl_log(DAPL_DBG_TYPE_UTIL,
-                        " open_hca: thread wakeup error = %s\n",
-                        strerror(errno));
-       dapl_os_unlock(&g_hca_lock);
-
-       /* initialize cr_list lock */
-       dat_status = dapl_os_lock_init(&hca_ptr->ib_trans.lock);
-       if (dat_status != DAT_SUCCESS) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " open_hca: failed to init cr_list lock\n");
-               goto bail;
-       }
-
-       /* initialize CM list for listens on this HCA */
-       dapl_llist_init_head(&hca_ptr->ib_trans.list);
-
-       /* initialize pipe, user level wakeup on select */
-       if (create_cr_pipe(hca_ptr)) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " open_hca: failed to init cr pipe - %s\n",
-                        strerror(errno));
-               goto bail;
-       }
-
-       /* create thread to process inbound connect request */
-       hca_ptr->ib_trans.cr_state = IB_THREAD_INIT;
-       dat_status = dapl_os_thread_create(cr_thread,
-                                          (void *)hca_ptr,
-                                          &hca_ptr->ib_trans.thread);
-       if (dat_status != DAT_SUCCESS) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " open_hca: failed to create thread\n");
-               goto bail;
-       }
-
-       /* wait for thread */
-       while (hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) {
-               dapl_os_sleep_usec(1000);
-       }
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " open_hca: devname %s, port %d, hostname_IP %s\n",
-                    ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
-                    hca_ptr->port_num, inet_ntoa(((struct sockaddr_in *)
-                                                  &hca_ptr->hca_address)->
-                                                 sin_addr));
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " open_hca: LID 0x%x GID Subnet 0x" F64x " ID 0x" F64x
-                    "\n", ntohs(hca_ptr->ib_trans.lid), (unsigned long long)
-                    htonll(hca_ptr->ib_trans.gid.global.subnet_prefix),
-                    (unsigned long long)htonll(hca_ptr->ib_trans.gid.global.
-                                               interface_id));
-
-       ibv_free_device_list(dev_list);
-       return dat_status;
-
-      bail:
-       ibv_close_device(hca_ptr->ib_hca_handle);
-       hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
-      err:
-       ibv_free_device_list(dev_list);
-       return DAT_INTERNAL_ERROR;
-}
-
-/*
- * dapls_ib_close_hca
- *
- * Open HCA
- *
- * Input:
- *      DAPL_HCA   provide CA handle
- *
- * Output:
- *      none
- *
- * Return:
- *      DAT_SUCCESS
- *     dapl_convert_errno 
- *
- */
-DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA * hca_ptr)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " close_hca: %p\n", hca_ptr);
-
-       if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) {
-               if (ibv_close_device(hca_ptr->ib_hca_handle))
-                       return (dapl_convert_errno(errno, "ib_close_device"));
-               hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
-       }
-
-       dapl_os_lock(&g_hca_lock);
-       if (g_ib_thread_state != IB_THREAD_RUN) {
-               dapl_os_unlock(&g_hca_lock);
-               return (DAT_SUCCESS);
-       }
-       dapl_os_unlock(&g_hca_lock);
-
-       /* destroy cr_thread and lock */
-       hca_ptr->ib_trans.cr_state = IB_THREAD_CANCEL;
-       send(hca_ptr->ib_trans.scm[1], "w", sizeof "w", 0);
-       while (hca_ptr->ib_trans.cr_state != IB_THREAD_EXIT) {
-               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                            " close_hca: waiting for cr_thread\n");
-               send(hca_ptr->ib_trans.scm[1], "w", sizeof "w", 0);
-               dapl_os_sleep_usec(1000);
-       }
-       dapl_os_lock_destroy(&hca_ptr->ib_trans.lock);
-       destroy_cr_pipe(hca_ptr); /* no longer need pipe */
-       
-       /* 
-        * Remove hca from async event processing list
-        * Wakeup work thread to remove from polling list
-        */
-       hca_ptr->ib_trans.destroy = 1;
-       if (dapls_thread_signal() == -1)
-               dapl_log(DAPL_DBG_TYPE_UTIL,
-                        " destroy: thread wakeup error = %s\n",
-                        strerror(errno));
-
-       /* wait for thread to remove HCA references */
-       while (hca_ptr->ib_trans.destroy != 2) {
-               if (dapls_thread_signal() == -1)
-                       dapl_log(DAPL_DBG_TYPE_UTIL,
-                                " destroy: thread wakeup error = %s\n",
-                                strerror(errno));
-               dapl_os_sleep_usec(1000);
-       }
-
-       return (DAT_SUCCESS);
-}
-
-DAT_RETURN dapli_ib_thread_init(void)
-{
-       DAT_RETURN dat_status;
-
-       dapl_os_lock(&g_hca_lock);
-       if (g_ib_thread_state != IB_THREAD_INIT) {
-               dapl_os_unlock(&g_hca_lock);
-               return DAT_SUCCESS;
-       }
-
-       g_ib_thread_state = IB_THREAD_CREATE;
-       dapl_os_unlock(&g_hca_lock);
-
-       /* create thread to process inbound connect request */
-       dat_status = dapl_os_thread_create(dapli_thread, NULL, &g_ib_thread);
-       if (dat_status != DAT_SUCCESS)
-               return (dapl_convert_errno(errno,
-                                          "create_thread ERR:"
-                                          " check resource limits"));
-
-       /* wait for thread to start */
-       dapl_os_lock(&g_hca_lock);
-       while (g_ib_thread_state != IB_THREAD_RUN) {
-               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                            " ib_thread_init: waiting for ib_thread\n");
-               dapl_os_unlock(&g_hca_lock);
-               dapl_os_sleep_usec(1000);
-               dapl_os_lock(&g_hca_lock);
-       }
-       dapl_os_unlock(&g_hca_lock);
-
-       return DAT_SUCCESS;
-}
-
-void dapli_ib_thread_destroy(void)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " ib_thread_destroy(%d)\n", dapl_os_getpid());
-       /* 
-        * wait for async thread to terminate. 
-        * pthread_join would be the correct method
-        * but some applications have some issues
-        */
-
-       /* destroy ib_thread, wait for termination, if not already */
-       dapl_os_lock(&g_hca_lock);
-       if (g_ib_thread_state != IB_THREAD_RUN)
-               goto bail;
-
-       g_ib_thread_state = IB_THREAD_CANCEL;
-       while (g_ib_thread_state != IB_THREAD_EXIT) {
-               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                            " ib_thread_destroy: waiting for ib_thread\n");
-               if (dapls_thread_signal() == -1)
-                       dapl_log(DAPL_DBG_TYPE_UTIL,
-                                " destroy: thread wakeup error = %s\n",
-                                strerror(errno));
-               dapl_os_unlock(&g_hca_lock);
-               dapl_os_sleep_usec(2000);
-               dapl_os_lock(&g_hca_lock);
-       }
-bail:
-       dapl_os_unlock(&g_hca_lock);
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " ib_thread_destroy(%d) exit\n", dapl_os_getpid());
-}
-
-
-#if defined(_WIN64) || defined(_WIN32)
-/* work thread for uAT, uCM, CQ, and async events */
-void dapli_thread(void *arg)
-{
-       struct _ib_hca_transport *hca;
-       struct _ib_hca_transport *uhca[8];
-       int ret, idx, cnt;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread(%d,0x%x): ENTER: \n",
-                    dapl_os_getpid(), g_ib_thread);
-
-       dapl_os_lock(&g_hca_lock);
-       for (g_ib_thread_state = IB_THREAD_RUN;
-            g_ib_thread_state == IB_THREAD_RUN; 
-            dapl_os_lock(&g_hca_lock)) {
-
-               CompSetZero(&ufds);
-               idx = 0;
-               hca = dapl_llist_is_empty(&g_hca_list) ? NULL :
-                     dapl_llist_peek_head(&g_hca_list);
-
-               while (hca) {
-                       CompSetAdd(&hca->ib_ctx->channel, &ufds);
-                       CompSetAdd(&hca->ib_cq->comp_channel, &ufds);
-                       uhca[idx++] = hca;
-                       hca = dapl_llist_next_entry(&g_hca_list,
-                                                   (DAPL_LLIST_ENTRY *)
-                                                   &hca->entry);
-               }
-               cnt = idx;
-
-               dapl_os_unlock(&g_hca_lock);
-               ret = CompSetPoll(&ufds, INFINITE);
-
-               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                            " ib_thread(%d) poll_event 0x%x\n",
-                            dapl_os_getpid(), ret);
-
-
-               /* check and process ASYNC events, per device */
-               for (idx = 0; idx < cnt; idx++) {
-                       if (uhca[idx]->destroy == 1) {
-                               dapl_os_lock(&g_hca_lock);
-                               dapl_llist_remove_entry(&g_hca_list,
-                                                       (DAPL_LLIST_ENTRY *)
-                                                       &uhca[idx]->entry);
-                               dapl_os_unlock(&g_hca_lock);
-                               uhca[idx]->destroy = 2;
-                       } else {
-                               dapli_cq_event_cb(uhca[idx]);
-                               dapli_async_event_cb(uhca[idx]);
-                       }
-               }
-       }
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread(%d) EXIT\n",
-                    dapl_os_getpid());
-       g_ib_thread_state = IB_THREAD_EXIT;
-       dapl_os_unlock(&g_hca_lock);
-}
-#else                          // _WIN64 || WIN32
-
-/* work thread for uAT, uCM, CQ, and async events */
-void dapli_thread(void *arg)
-{
-       struct pollfd ufds[__FD_SETSIZE];
-       struct _ib_hca_transport *uhca[__FD_SETSIZE] = { NULL };
-       struct _ib_hca_transport *hca;
-       int ret, idx, fds;
-       char rbuf[2];
-
-       dapl_dbg_log(DAPL_DBG_TYPE_THREAD,
-                    " ib_thread(%d,0x%x): ENTER: pipe %d \n",
-                    dapl_os_getpid(), g_ib_thread, g_ib_pipe[0]);
-
-       /* Poll across pipe, CM, AT never changes */
-       dapl_os_lock(&g_hca_lock);
-       g_ib_thread_state = IB_THREAD_RUN;
-
-       ufds[0].fd = g_ib_pipe[0];      /* pipe */
-       ufds[0].events = POLLIN;
-
-       while (g_ib_thread_state == IB_THREAD_RUN) {
-
-               /* build ufds after pipe and uCMA events */
-               ufds[0].revents = 0;
-               idx = 0;
-
-               /*  Walk HCA list and setup async and CQ events */
-               if (!dapl_llist_is_empty(&g_hca_list))
-                       hca = dapl_llist_peek_head(&g_hca_list);
-               else
-                       hca = NULL;
-
-               while (hca) {
-
-                       /* uASYNC events */
-                       ufds[++idx].fd = hca->ib_ctx->async_fd;
-                       ufds[idx].events = POLLIN;
-                       ufds[idx].revents = 0;
-                       uhca[idx] = hca;
-
-                       /* CQ events are non-direct with CNO's */
-                       ufds[++idx].fd = hca->ib_cq->fd;
-                       ufds[idx].events = POLLIN;
-                       ufds[idx].revents = 0;
-                       uhca[idx] = hca;
-
-                       dapl_dbg_log(DAPL_DBG_TYPE_THREAD,
-                                    " ib_thread(%d) poll_fd: hca[%d]=%p,"
-                                    " async=%d pipe=%d \n",
-                                    dapl_os_getpid(), hca, ufds[idx - 1].fd,
-                                    ufds[0].fd);
-
-                       hca = dapl_llist_next_entry(&g_hca_list,
-                                                   (DAPL_LLIST_ENTRY *)
-                                                   &hca->entry);
-               }
-
-               /* unlock, and setup poll */
-               fds = idx + 1;
-               dapl_os_unlock(&g_hca_lock);
-               ret = poll(ufds, fds, -1);
-               if (ret <= 0) {
-                       dapl_dbg_log(DAPL_DBG_TYPE_THREAD,
-                                    " ib_thread(%d): ERR %s poll\n",
-                                    dapl_os_getpid(), strerror(errno));
-                       dapl_os_lock(&g_hca_lock);
-                       continue;
-               }
-
-               dapl_dbg_log(DAPL_DBG_TYPE_THREAD,
-                            " ib_thread(%d) poll_event: "
-                            " async=0x%x pipe=0x%x \n",
-                            dapl_os_getpid(), ufds[idx].revents,
-                            ufds[0].revents);
-
-               /* check and process CQ and ASYNC events, per device */
-               for (idx = 1; idx < fds; idx++) {
-                       if (ufds[idx].revents == POLLIN) {
-                               dapli_cq_event_cb(uhca[idx]);
-                               dapli_async_event_cb(uhca[idx]);
-                       }
-               }
-
-               /* check and process user events, PIPE */
-               if (ufds[0].revents == POLLIN) {
-                       if (read(g_ib_pipe[0], rbuf, 2) == -1)
-                               dapl_log(DAPL_DBG_TYPE_THREAD,
-                                        " cr_thread: pipe rd err= %s\n",
-                                        strerror(errno));
-
-                       /* cleanup any device on list marked for destroy */
-                       for (idx = 1; idx < fds; idx++) {
-                               if (uhca[idx] && uhca[idx]->destroy == 1) {
-                                       dapl_os_lock(&g_hca_lock);
-                                       dapl_llist_remove_entry(
-                                               &g_hca_list,
-                                               (DAPL_LLIST_ENTRY*)
-                                               &uhca[idx]->entry);
-                                       dapl_os_unlock(&g_hca_lock);
-                                       uhca[idx]->destroy = 2;
-                               }
-                       }
-               }
-               dapl_os_lock(&g_hca_lock);
-       }
-
-       dapl_dbg_log(DAPL_DBG_TYPE_THREAD, " ib_thread(%d) EXIT\n",
-                    dapl_os_getpid());
-       g_ib_thread_state = IB_THREAD_EXIT;
-       dapl_os_unlock(&g_hca_lock);
-}
-#endif
+/*\r
+ * This Software is licensed under one of the following licenses:\r
+ *\r
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is\r
+ *    available from the Open Source Initiative, see\r
+ *    http://www.opensource.org/licenses/cpl.php.\r
+ *\r
+ * 2) under the terms of the "The BSD License" a copy of which is\r
+ *    available from the Open Source Initiative, see\r
+ *    http://www.opensource.org/licenses/bsd-license.php.\r
+ *\r
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a\r
+ *    copy of which is available from the Open Source Initiative, see\r
+ *    http://www.opensource.org/licenses/gpl-license.php.\r
+ *\r
+ * Licensee has the right to choose one of the above licenses.\r
+ *\r
+ * Redistributions of source code must retain the above copyright\r
+ * notice and one of the license notices.\r
+ *\r
+ * Redistributions in binary form must reproduce both the above copyright\r
+ * notice, one of the license notices in the documentation\r
+ * and/or other materials provided with the distribution.\r
+ */\r
+\r
+/***************************************************************************\r
+ *\r
+ *   Module:            uDAPL\r
+ *\r
+ *   Filename:          dapl_ib_util.c\r
+ *\r
+ *   Author:            Arlin Davis\r
+ *\r
+ *   Created:           3/10/2005\r
+ *\r
+ *   Description: \r
+ *\r
+ *   The uDAPL openib provider - init, open, close, utilities\r
+ *\r
+ ****************************************************************************\r
+ *                Source Control System Information\r
+ *\r
+ *    $Id: $\r
+ *\r
+ *     Copyright (c) 2005 Intel Corporation.  All rights reserved.\r
+ *\r
+ **************************************************************************/\r
+#ifdef RCSID\r
+static const char rcsid[] = "$Id:  $";\r
+#endif\r
+\r
+#include "openib_osd.h"\r
+#include "dapl.h"\r
+#include "dapl_adapter_util.h"\r
+#include "dapl_ib_util.h"\r
+#include "dapl_osd.h"\r
+\r
+#include <stdlib.h>\r
+\r
+ib_thread_state_t g_ib_thread_state = 0;\r
+DAPL_OS_THREAD g_ib_thread;\r
+DAPL_OS_LOCK g_hca_lock;\r
+struct dapl_llist_entry *g_hca_list;\r
+\r
+void dapli_thread(void *arg);\r
+DAT_RETURN  dapli_ib_thread_init(void);\r
+void dapli_ib_thread_destroy(void);\r
+\r
+#if defined(_WIN64) || defined(_WIN32)\r
+#include "..\..\..\..\..\etc\user\comp_channel.cpp"\r
+#include <rdma\winverbs.h>\r
+\r
+static COMP_SET ufds;\r
+\r
+static int dapls_os_init(void)\r
+{\r
+       return CompSetInit(&ufds);\r
+}\r
+\r
+static void dapls_os_release(void)\r
+{\r
+       CompSetCleanup(&ufds);\r
+}\r
+\r
+static int dapls_config_verbs(struct ibv_context *verbs)\r
+{\r
+       verbs->channel.Milliseconds = 0;\r
+       return 0;\r
+}\r
+\r
+static int dapls_config_comp_channel(struct ibv_comp_channel *channel)\r
+{\r
+       channel->comp_channel.Milliseconds = 0;\r
+       return 0;\r
+}\r
+\r
+static int dapls_thread_signal(void)\r
+{\r
+       CompSetCancel(&ufds);\r
+       return 0;\r
+}\r
+#else                          // _WIN64 || WIN32\r
+int g_ib_pipe[2];\r
+\r
+static int dapls_os_init(void)\r
+{\r
+       /* create pipe for waking up work thread */\r
+       return pipe(g_ib_pipe);\r
+}\r
+\r
+static void dapls_os_release(void)\r
+{\r
+       /* close pipe? */\r
+}\r
+\r
+static int dapls_config_fd(int fd)\r
+{\r
+       int opts;\r
+\r
+       opts = fcntl(fd, F_GETFL);\r
+       if (opts < 0 || fcntl(fd, F_SETFL, opts | O_NONBLOCK) < 0) {\r
+               dapl_log(DAPL_DBG_TYPE_ERR,\r
+                        " dapls_config_fd: fcntl on fd %d ERR %d %s\n",\r
+                        fd, opts, strerror(errno));\r
+               return errno;\r
+       }\r
+\r
+       return 0;\r
+}\r
+\r
+static int dapls_config_verbs(struct ibv_context *verbs)\r
+{\r
+       return dapls_config_fd(verbs->async_fd);\r
+}\r
+\r
+static int dapls_config_comp_channel(struct ibv_comp_channel *channel)\r
+{\r
+       return dapls_config_fd(channel->fd);\r
+}\r
+\r
+static int dapls_thread_signal(void)\r
+{\r
+       return write(g_ib_pipe[1], "w", sizeof "w");\r
+}\r
+#endif\r
+\r
+\r
+static int32_t create_cr_pipe(IN DAPL_HCA * hca_ptr)\r
+{\r
+       DAPL_SOCKET listen_socket;\r
+       struct sockaddr_in addr;\r
+       socklen_t addrlen = sizeof(addr);\r
+       int ret;\r
+\r
+       listen_socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);\r
+       if (listen_socket == DAPL_INVALID_SOCKET)\r
+               return 1;\r
+\r
+       memset(&addr, 0, sizeof addr);\r
+       addr.sin_family = AF_INET;\r
+       addr.sin_addr.s_addr = htonl(0x7f000001);\r
+       ret = bind(listen_socket, (struct sockaddr *)&addr, sizeof addr);\r
+       if (ret)\r
+               goto err1;\r
+\r
+       ret = getsockname(listen_socket, (struct sockaddr *)&addr, &addrlen);\r
+       if (ret)\r
+               goto err1;\r
+\r
+       ret = listen(listen_socket, 0);\r
+       if (ret)\r
+               goto err1;\r
+\r
+       hca_ptr->ib_trans.scm[1] = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);\r
+       if (hca_ptr->ib_trans.scm[1] == DAPL_INVALID_SOCKET)\r
+               goto err1;\r
+\r
+       ret = connect(hca_ptr->ib_trans.scm[1], \r
+                     (struct sockaddr *)&addr, sizeof(addr));\r
+       if (ret)\r
+               goto err2;\r
+\r
+       hca_ptr->ib_trans.scm[0] = accept(listen_socket, NULL, NULL);\r
+       if (hca_ptr->ib_trans.scm[0] == DAPL_INVALID_SOCKET)\r
+               goto err2;\r
+\r
+       closesocket(listen_socket);\r
+       return 0;\r
+\r
+      err2:\r
+       closesocket(hca_ptr->ib_trans.scm[1]);\r
+      err1:\r
+       closesocket(listen_socket);\r
+       return 1;\r
+}\r
+\r
+static void destroy_cr_pipe(IN DAPL_HCA * hca_ptr)\r
+{\r
+       closesocket(hca_ptr->ib_trans.scm[0]);\r
+       closesocket(hca_ptr->ib_trans.scm[1]);\r
+}\r
+\r
+\r
+/*\r
+ * dapls_ib_init, dapls_ib_release\r
+ *\r
+ * Initialize Verb related items for device open\r
+ *\r
+ * Input:\r
+ *     none\r
+ *\r
+ * Output:\r
+ *     none\r
+ *\r
+ * Returns:\r
+ *     0 success, -1 error\r
+ *\r
+ */\r
+int32_t dapls_ib_init(void)\r
+{\r
+       /* initialize hca_list */\r
+       dapl_os_lock_init(&g_hca_lock);\r
+       dapl_llist_init_head(&g_hca_list);\r
+\r
+       if (dapls_os_init())\r
+               return 1;\r
+\r
+       return 0;\r
+}\r
+\r
+int32_t dapls_ib_release(void)\r
+{\r
+       dapli_ib_thread_destroy();\r
+       dapls_os_release();\r
+       return 0;\r
+}\r
+\r
+/*\r
+ * dapls_ib_open_hca\r
+ *\r
+ * Open HCA\r
+ *\r
+ * Input:\r
+ *      *hca_name         pointer to provider device name\r
+ *      *ib_hca_handle_p  pointer to provide HCA handle\r
+ *\r
+ * Output:\r
+ *      none\r
+ *\r
+ * Return:\r
+ *      DAT_SUCCESS\r
+ *      dapl_convert_errno\r
+ *\r
+ */\r
+DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, IN DAPL_HCA * hca_ptr)\r
+{\r
+       struct ibv_device **dev_list;\r
+       struct ibv_port_attr port_attr;\r
+       int i;\r
+       DAT_RETURN dat_status;\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+                    " open_hca: %s - %p\n", hca_name, hca_ptr);\r
+\r
+       /* get the IP address of the device */\r
+       dat_status = getlocalipaddr((DAT_SOCK_ADDR *) &hca_ptr->hca_address,\r
+                                   sizeof(DAT_SOCK_ADDR6));\r
+       if (dat_status != DAT_SUCCESS)\r
+               return dat_status;\r
+\r
+#ifdef DAPL_DBG\r
+       /* DBG: unused port, set process id, lower 16 bits of pid */\r
+       ((struct sockaddr_in *)&hca_ptr->hca_address)->sin_port = \r
+                                       htons((uint16_t)dapl_os_getpid());\r
+#endif\r
+        /* Get list of all IB devices, find match, open */\r
+       dev_list = ibv_get_device_list(NULL);\r
+       if (!dev_list) {\r
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR,\r
+                            " open_hca: ibv_get_device_list() failed\n",\r
+                            hca_name);\r
+               return DAT_INTERNAL_ERROR;\r
+       }\r
+\r
+       for (i = 0; dev_list[i]; ++i) {\r
+               hca_ptr->ib_trans.ib_dev = dev_list[i];\r
+               if (!strcmp(ibv_get_device_name(hca_ptr->ib_trans.ib_dev),\r
+                           hca_name))\r
+                       goto found;\r
+       }\r
+\r
+       dapl_log(DAPL_DBG_TYPE_ERR,\r
+                " open_hca: device %s not found\n", hca_name);\r
+       goto err;\r
+\r
+found:\r
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " open_hca: Found dev %s %016llx\n",\r
+                    ibv_get_device_name(hca_ptr->ib_trans.ib_dev),\r
+                    (unsigned long long)\r
+                    ntohll(ibv_get_device_guid(hca_ptr->ib_trans.ib_dev)));\r
+\r
+       hca_ptr->ib_hca_handle = ibv_open_device(hca_ptr->ib_trans.ib_dev);\r
+       if (!hca_ptr->ib_hca_handle) {\r
+               dapl_log(DAPL_DBG_TYPE_ERR,\r
+                        " open_hca: dev open failed for %s, err=%s\n",\r
+                        ibv_get_device_name(hca_ptr->ib_trans.ib_dev),\r
+                        strerror(errno));\r
+               goto err;\r
+       }\r
+       hca_ptr->ib_trans.ib_ctx = hca_ptr->ib_hca_handle;\r
+       dapls_config_verbs(hca_ptr->ib_hca_handle);\r
+\r
+       /* get lid for this hca-port, network order */\r
+       if (ibv_query_port(hca_ptr->ib_hca_handle,\r
+                          (uint8_t) hca_ptr->port_num, &port_attr)) {\r
+               dapl_log(DAPL_DBG_TYPE_ERR,\r
+                        " open_hca: get lid ERR for %s, err=%s\n",\r
+                        ibv_get_device_name(hca_ptr->ib_trans.ib_dev),\r
+                        strerror(errno));\r
+               goto err;\r
+       } else {\r
+               hca_ptr->ib_trans.lid = htons(port_attr.lid);\r
+       }\r
+\r
+       /* get gid for this hca-port, network order */\r
+       if (ibv_query_gid(hca_ptr->ib_hca_handle,\r
+                         (uint8_t) hca_ptr->port_num,\r
+                         0, &hca_ptr->ib_trans.gid)) {\r
+               dapl_log(DAPL_DBG_TYPE_ERR,\r
+                        " open_hca: query GID ERR for %s, err=%s\n",\r
+                        ibv_get_device_name(hca_ptr->ib_trans.ib_dev),\r
+                        strerror(errno));\r
+               goto err;\r
+       }\r
+\r
+       /* set RC tunables via enviroment or default */\r
+       hca_ptr->ib_trans.max_inline_send =\r
+           dapl_os_get_env_val("DAPL_MAX_INLINE", INLINE_SEND_DEFAULT);\r
+       hca_ptr->ib_trans.ack_retry =\r
+           dapl_os_get_env_val("DAPL_ACK_RETRY", SCM_ACK_RETRY);\r
+       hca_ptr->ib_trans.ack_timer =\r
+           dapl_os_get_env_val("DAPL_ACK_TIMER", SCM_ACK_TIMER);\r
+       hca_ptr->ib_trans.rnr_retry =\r
+           dapl_os_get_env_val("DAPL_RNR_RETRY", SCM_RNR_RETRY);\r
+       hca_ptr->ib_trans.rnr_timer =\r
+           dapl_os_get_env_val("DAPL_RNR_TIMER", SCM_RNR_TIMER);\r
+       hca_ptr->ib_trans.global =\r
+           dapl_os_get_env_val("DAPL_GLOBAL_ROUTING", SCM_GLOBAL);\r
+       hca_ptr->ib_trans.hop_limit =\r
+           dapl_os_get_env_val("DAPL_HOP_LIMIT", SCM_HOP_LIMIT);\r
+       hca_ptr->ib_trans.tclass =\r
+           dapl_os_get_env_val("DAPL_TCLASS", SCM_TCLASS);\r
+       hca_ptr->ib_trans.mtu =\r
+           dapl_ib_mtu(dapl_os_get_env_val("DAPL_IB_MTU", SCM_IB_MTU));\r
+\r
+\r
+       /* EVD events without direct CQ channels, CNO support */\r
+       hca_ptr->ib_trans.ib_cq =\r
+           ibv_create_comp_channel(hca_ptr->ib_hca_handle);\r
+       if (hca_ptr->ib_trans.ib_cq == NULL) {\r
+               dapl_log(DAPL_DBG_TYPE_ERR,\r
+                        " open_hca: ibv_create_comp_channel ERR %s\n",\r
+                        strerror(errno));\r
+               goto bail;\r
+       }\r
+       dapls_config_comp_channel(hca_ptr->ib_trans.ib_cq);\r
+       \r
+       dat_status = dapli_ib_thread_init();\r
+       if (dat_status != DAT_SUCCESS) {\r
+               dapl_log(DAPL_DBG_TYPE_ERR,\r
+                        " open_hca: failed to init cq thread lock\n");\r
+               goto bail;\r
+       }\r
+       /* \r
+        * Put new hca_transport on list for async and CQ event processing \r
+        * Wakeup work thread to add to polling list\r
+        */\r
+       dapl_llist_init_entry((DAPL_LLIST_ENTRY *)&hca_ptr->ib_trans.entry);\r
+       dapl_os_lock(&g_hca_lock);\r
+       dapl_llist_add_tail(&g_hca_list,\r
+                           (DAPL_LLIST_ENTRY *) &hca_ptr->ib_trans.entry,\r
+                           &hca_ptr->ib_trans.entry);\r
+       if (dapls_thread_signal() == -1)\r
+               dapl_log(DAPL_DBG_TYPE_UTIL,\r
+                        " open_hca: thread wakeup error = %s\n",\r
+                        strerror(errno));\r
+       dapl_os_unlock(&g_hca_lock);\r
+\r
+       /* initialize cr_list lock */\r
+       dat_status = dapl_os_lock_init(&hca_ptr->ib_trans.lock);\r
+       if (dat_status != DAT_SUCCESS) {\r
+               dapl_log(DAPL_DBG_TYPE_ERR,\r
+                        " open_hca: failed to init cr_list lock\n");\r
+               goto bail;\r
+       }\r
+\r
+       /* initialize CM list for listens on this HCA */\r
+       dapl_llist_init_head(&hca_ptr->ib_trans.list);\r
+\r
+       /* initialize pipe, user level wakeup on select */\r
+       if (create_cr_pipe(hca_ptr)) {\r
+               dapl_log(DAPL_DBG_TYPE_ERR,\r
+                        " open_hca: failed to init cr pipe - %s\n",\r
+                        strerror(errno));\r
+               goto bail;\r
+       }\r
+\r
+       /* create thread to process inbound connect request */\r
+       hca_ptr->ib_trans.cr_state = IB_THREAD_INIT;\r
+       dat_status = dapl_os_thread_create(cr_thread,\r
+                                          (void *)hca_ptr,\r
+                                          &hca_ptr->ib_trans.thread);\r
+       if (dat_status != DAT_SUCCESS) {\r
+               dapl_log(DAPL_DBG_TYPE_ERR,\r
+                        " open_hca: failed to create thread\n");\r
+               goto bail;\r
+       }\r
+\r
+       /* wait for thread */\r
+       while (hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) {\r
+               dapl_os_sleep_usec(1000);\r
+       }\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+                    " open_hca: devname %s, port %d, hostname_IP %s\n",\r
+                    ibv_get_device_name(hca_ptr->ib_trans.ib_dev),\r
+                    hca_ptr->port_num, inet_ntoa(((struct sockaddr_in *)\r
+                                                  &hca_ptr->hca_address)->\r
+                                                 sin_addr));\r
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+                    " open_hca: LID 0x%x GID Subnet 0x" F64x " ID 0x" F64x\r
+                    "\n", ntohs(hca_ptr->ib_trans.lid), (unsigned long long)\r
+                    htonll(hca_ptr->ib_trans.gid.global.subnet_prefix),\r
+                    (unsigned long long)htonll(hca_ptr->ib_trans.gid.global.\r
+                                               interface_id));\r
+\r
+       ibv_free_device_list(dev_list);\r
+       return dat_status;\r
+\r
+      bail:\r
+       ibv_close_device(hca_ptr->ib_hca_handle);\r
+       hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;\r
+      err:\r
+       ibv_free_device_list(dev_list);\r
+       return DAT_INTERNAL_ERROR;\r
+}\r
+\r
+/*\r
+ * dapls_ib_close_hca\r
+ *\r
+ * Open HCA\r
+ *\r
+ * Input:\r
+ *      DAPL_HCA   provide CA handle\r
+ *\r
+ * Output:\r
+ *      none\r
+ *\r
+ * Return:\r
+ *      DAT_SUCCESS\r
+ *     dapl_convert_errno \r
+ *\r
+ */\r
+DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA * hca_ptr)\r
+{\r
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " close_hca: %p\n", hca_ptr);\r
+\r
+       dapl_os_lock(&g_hca_lock);\r
+       if (g_ib_thread_state != IB_THREAD_RUN) {\r
+               dapl_os_unlock(&g_hca_lock);\r
+               goto out;\r
+       }\r
+       dapl_os_unlock(&g_hca_lock);\r
+\r
+       /* destroy cr_thread and lock */\r
+       hca_ptr->ib_trans.cr_state = IB_THREAD_CANCEL;\r
+       send(hca_ptr->ib_trans.scm[1], "w", sizeof "w", 0);\r
+       while (hca_ptr->ib_trans.cr_state != IB_THREAD_EXIT) {\r
+               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+                            " close_hca: waiting for cr_thread\n");\r
+               send(hca_ptr->ib_trans.scm[1], "w", sizeof "w", 0);\r
+               dapl_os_sleep_usec(1000);\r
+       }\r
+       dapl_os_lock_destroy(&hca_ptr->ib_trans.lock);\r
+       destroy_cr_pipe(hca_ptr); /* no longer need pipe */\r
+       \r
+       /* \r
+        * Remove hca from async event processing list\r
+        * Wakeup work thread to remove from polling list\r
+        */\r
+       hca_ptr->ib_trans.destroy = 1;\r
+       if (dapls_thread_signal() == -1)\r
+               dapl_log(DAPL_DBG_TYPE_UTIL,\r
+                        " destroy: thread wakeup error = %s\n",\r
+                        strerror(errno));\r
+\r
+       /* wait for thread to remove HCA references */\r
+       while (hca_ptr->ib_trans.destroy != 2) {\r
+               if (dapls_thread_signal() == -1)\r
+                       dapl_log(DAPL_DBG_TYPE_UTIL,\r
+                                " destroy: thread wakeup error = %s\n",\r
+                                strerror(errno));\r
+               dapl_os_sleep_usec(1000);\r
+       }\r
+\r
+out:\r
+       if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) {\r
+               if (ibv_close_device(hca_ptr->ib_hca_handle))\r
+                       return (dapl_convert_errno(errno, "ib_close_device"));\r
+               hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;\r
+       }\r
+       return (DAT_SUCCESS);\r
+}\r
+\r
+DAT_RETURN dapli_ib_thread_init(void)\r
+{\r
+       DAT_RETURN dat_status;\r
+\r
+       dapl_os_lock(&g_hca_lock);\r
+       if (g_ib_thread_state != IB_THREAD_INIT) {\r
+               dapl_os_unlock(&g_hca_lock);\r
+               return DAT_SUCCESS;\r
+       }\r
+\r
+       g_ib_thread_state = IB_THREAD_CREATE;\r
+       dapl_os_unlock(&g_hca_lock);\r
+\r
+       /* create thread to process inbound connect request */\r
+       dat_status = dapl_os_thread_create(dapli_thread, NULL, &g_ib_thread);\r
+       if (dat_status != DAT_SUCCESS)\r
+               return (dapl_convert_errno(errno,\r
+                                          "create_thread ERR:"\r
+                                          " check resource limits"));\r
+\r
+       /* wait for thread to start */\r
+       dapl_os_lock(&g_hca_lock);\r
+       while (g_ib_thread_state != IB_THREAD_RUN) {\r
+               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+                            " ib_thread_init: waiting for ib_thread\n");\r
+               dapl_os_unlock(&g_hca_lock);\r
+               dapl_os_sleep_usec(1000);\r
+               dapl_os_lock(&g_hca_lock);\r
+       }\r
+       dapl_os_unlock(&g_hca_lock);\r
+\r
+       return DAT_SUCCESS;\r
+}\r
+\r
+void dapli_ib_thread_destroy(void)\r
+{\r
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+                    " ib_thread_destroy(%d)\n", dapl_os_getpid());\r
+       /* \r
+        * wait for async thread to terminate. \r
+        * pthread_join would be the correct method\r
+        * but some applications have some issues\r
+        */\r
+\r
+       /* destroy ib_thread, wait for termination, if not already */\r
+       dapl_os_lock(&g_hca_lock);\r
+       if (g_ib_thread_state != IB_THREAD_RUN)\r
+               goto bail;\r
+\r
+       g_ib_thread_state = IB_THREAD_CANCEL;\r
+       while (g_ib_thread_state != IB_THREAD_EXIT) {\r
+               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+                            " ib_thread_destroy: waiting for ib_thread\n");\r
+               if (dapls_thread_signal() == -1)\r
+                       dapl_log(DAPL_DBG_TYPE_UTIL,\r
+                                " destroy: thread wakeup error = %s\n",\r
+                                strerror(errno));\r
+               dapl_os_unlock(&g_hca_lock);\r
+               dapl_os_sleep_usec(2000);\r
+               dapl_os_lock(&g_hca_lock);\r
+       }\r
+bail:\r
+       dapl_os_unlock(&g_hca_lock);\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+                    " ib_thread_destroy(%d) exit\n", dapl_os_getpid());\r
+}\r
+\r
+\r
+#if defined(_WIN64) || defined(_WIN32)\r
+/* work thread for uAT, uCM, CQ, and async events */\r
+void dapli_thread(void *arg)\r
+{\r
+       struct _ib_hca_transport *hca;\r
+       struct _ib_hca_transport *uhca[8];\r
+       int ret, idx, cnt;\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread(%d,0x%x): ENTER: \n",\r
+                    dapl_os_getpid(), g_ib_thread);\r
+\r
+       dapl_os_lock(&g_hca_lock);\r
+       for (g_ib_thread_state = IB_THREAD_RUN;\r
+            g_ib_thread_state == IB_THREAD_RUN; \r
+            dapl_os_lock(&g_hca_lock)) {\r
+\r
+               CompSetZero(&ufds);\r
+               idx = 0;\r
+               hca = dapl_llist_is_empty(&g_hca_list) ? NULL :\r
+                     dapl_llist_peek_head(&g_hca_list);\r
+\r
+               while (hca) {\r
+                       CompSetAdd(&hca->ib_ctx->channel, &ufds);\r
+                       CompSetAdd(&hca->ib_cq->comp_channel, &ufds);\r
+                       uhca[idx++] = hca;\r
+                       hca = dapl_llist_next_entry(&g_hca_list,\r
+                                                   (DAPL_LLIST_ENTRY *)\r
+                                                   &hca->entry);\r
+               }\r
+               cnt = idx;\r
+\r
+               dapl_os_unlock(&g_hca_lock);\r
+               ret = CompSetPoll(&ufds, INFINITE);\r
+\r
+               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+                            " ib_thread(%d) poll_event 0x%x\n",\r
+                            dapl_os_getpid(), ret);\r
+\r
+\r
+               /* check and process ASYNC events, per device */\r
+               for (idx = 0; idx < cnt; idx++) {\r
+                       if (uhca[idx]->destroy == 1) {\r
+                               dapl_os_lock(&g_hca_lock);\r
+                               dapl_llist_remove_entry(&g_hca_list,\r
+                                                       (DAPL_LLIST_ENTRY *)\r
+                                                       &uhca[idx]->entry);\r
+                               dapl_os_unlock(&g_hca_lock);\r
+                               uhca[idx]->destroy = 2;\r
+                       } else {\r
+                               dapli_cq_event_cb(uhca[idx]);\r
+                               dapli_async_event_cb(uhca[idx]);\r
+                       }\r
+               }\r
+       }\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread(%d) EXIT\n",\r
+                    dapl_os_getpid());\r
+       g_ib_thread_state = IB_THREAD_EXIT;\r
+       dapl_os_unlock(&g_hca_lock);\r
+}\r
+#else                          // _WIN64 || WIN32\r
+\r
+/* work thread for uAT, uCM, CQ, and async events */\r
+void dapli_thread(void *arg)\r
+{\r
+       struct pollfd ufds[__FD_SETSIZE];\r
+       struct _ib_hca_transport *uhca[__FD_SETSIZE] = { NULL };\r
+       struct _ib_hca_transport *hca;\r
+       int ret, idx, fds;\r
+       char rbuf[2];\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_THREAD,\r
+                    " ib_thread(%d,0x%x): ENTER: pipe %d \n",\r
+                    dapl_os_getpid(), g_ib_thread, g_ib_pipe[0]);\r
+\r
+       /* Poll across pipe, CM, AT never changes */\r
+       dapl_os_lock(&g_hca_lock);\r
+       g_ib_thread_state = IB_THREAD_RUN;\r
+\r
+       ufds[0].fd = g_ib_pipe[0];      /* pipe */\r
+       ufds[0].events = POLLIN;\r
+\r
+       while (g_ib_thread_state == IB_THREAD_RUN) {\r
+\r
+               /* build ufds after pipe and uCMA events */\r
+               ufds[0].revents = 0;\r
+               idx = 0;\r
+\r
+               /*  Walk HCA list and setup async and CQ events */\r
+               if (!dapl_llist_is_empty(&g_hca_list))\r
+                       hca = dapl_llist_peek_head(&g_hca_list);\r
+               else\r
+                       hca = NULL;\r
+\r
+               while (hca) {\r
+\r
+                       /* uASYNC events */\r
+                       ufds[++idx].fd = hca->ib_ctx->async_fd;\r
+                       ufds[idx].events = POLLIN;\r
+                       ufds[idx].revents = 0;\r
+                       uhca[idx] = hca;\r
+\r
+                       /* CQ events are non-direct with CNO's */\r
+                       ufds[++idx].fd = hca->ib_cq->fd;\r
+                       ufds[idx].events = POLLIN;\r
+                       ufds[idx].revents = 0;\r
+                       uhca[idx] = hca;\r
+\r
+                       dapl_dbg_log(DAPL_DBG_TYPE_THREAD,\r
+                                    " ib_thread(%d) poll_fd: hca[%d]=%p,"\r
+                                    " async=%d pipe=%d \n",\r
+                                    dapl_os_getpid(), hca, ufds[idx - 1].fd,\r
+                                    ufds[0].fd);\r
+\r
+                       hca = dapl_llist_next_entry(&g_hca_list,\r
+                                                   (DAPL_LLIST_ENTRY *)\r
+                                                   &hca->entry);\r
+               }\r
+\r
+               /* unlock, and setup poll */\r
+               fds = idx + 1;\r
+               dapl_os_unlock(&g_hca_lock);\r
+               ret = poll(ufds, fds, -1);\r
+               if (ret <= 0) {\r
+                       dapl_dbg_log(DAPL_DBG_TYPE_THREAD,\r
+                                    " ib_thread(%d): ERR %s poll\n",\r
+                                    dapl_os_getpid(), strerror(errno));\r
+                       dapl_os_lock(&g_hca_lock);\r
+                       continue;\r
+               }\r
+\r
+               dapl_dbg_log(DAPL_DBG_TYPE_THREAD,\r
+                            " ib_thread(%d) poll_event: "\r
+                            " async=0x%x pipe=0x%x \n",\r
+                            dapl_os_getpid(), ufds[idx].revents,\r
+                            ufds[0].revents);\r
+\r
+               /* check and process CQ and ASYNC events, per device */\r
+               for (idx = 1; idx < fds; idx++) {\r
+                       if (ufds[idx].revents == POLLIN) {\r
+                               dapli_cq_event_cb(uhca[idx]);\r
+                               dapli_async_event_cb(uhca[idx]);\r
+                       }\r
+               }\r
+\r
+               /* check and process user events, PIPE */\r
+               if (ufds[0].revents == POLLIN) {\r
+                       if (read(g_ib_pipe[0], rbuf, 2) == -1)\r
+                               dapl_log(DAPL_DBG_TYPE_THREAD,\r
+                                        " cr_thread: pipe rd err= %s\n",\r
+                                        strerror(errno));\r
+\r
+                       /* cleanup any device on list marked for destroy */\r
+                       for (idx = 1; idx < fds; idx++) {\r
+                               if (uhca[idx] && uhca[idx]->destroy == 1) {\r
+                                       dapl_os_lock(&g_hca_lock);\r
+                                       dapl_llist_remove_entry(\r
+                                               &g_hca_list,\r
+                                               (DAPL_LLIST_ENTRY*)\r
+                                               &uhca[idx]->entry);\r
+                                       dapl_os_unlock(&g_hca_lock);\r
+                                       uhca[idx]->destroy = 2;\r
+                               }\r
+                       }\r
+               }\r
+               dapl_os_lock(&g_hca_lock);\r
+       }\r
+\r
+       dapl_dbg_log(DAPL_DBG_TYPE_THREAD, " ib_thread(%d) EXIT\n",\r
+                    dapl_os_getpid());\r
+       g_ib_thread_state = IB_THREAD_EXIT;\r
+       dapl_os_unlock(&g_hca_lock);\r
+}\r
+#endif\r
index a515a16f8b00f8a89bb9c398467793bb774a0db4..498d781f085e73f148a3169bb295c887edb530c2 100644 (file)
@@ -281,6 +281,7 @@ int ibv_close_device(struct ibv_context *context)
        context->cmd_if->Release();\r
        ibv_release();\r
        delete vcontext->port;\r
+       CompChannelCleanup(&vcontext->context.channel);\r
        delete vcontext;\r
        return 0;\r
 }\r
index c3b9ad18135a568a7447dcaf45b3841f72cfc98f..7b9a6e0e5792dc56104da7dd1d829e32759bd852 100644 (file)
@@ -44,6 +44,8 @@ extern "C" {
  * Interfaces based on librdmacm 1.0.8.\r
  */\r
 \r
+#define RDMA_MAX_PRIVATE_DATA  56\r
+\r
 /*\r
  * Upon receiving a device removal event, users must destroy the associated\r
  * RDMA identifier and release all resources allocated with the device.\r
index 2727d8875998818825c0e6e0ae3598b9cc9a9acd..cde309b881321206bbbb73204bd544d8d9989991 100644 (file)
@@ -84,7 +84,7 @@ struct cma_device
 \r
 struct cma_event {\r
        struct rdma_cm_event    event;\r
-       uint8_t                                 private_data[56];\r
+       uint8_t                                 private_data[RDMA_MAX_PRIVATE_DATA];\r
        struct cma_id_private   *id_priv;\r
 };\r
 \r
@@ -1029,6 +1029,8 @@ int rdma_get_cm_event(struct rdma_event_channel *channel,
                evt->id_priv = CONTAINING_RECORD(id, struct cma_id_private, id);\r
                evt->event.id = id;\r
                evt->event.param.conn.private_data = evt->private_data;\r
+               evt->event.param.conn.private_data_len = RDMA_MAX_PRIVATE_DATA;\r
+\r
                evt->event.status = id->ep.connect->\r
                                                        GetOverlappedResult(&entry->Overlap, &bytes, FALSE);\r
 \r