]> git.openfabrics.org - ~ardavis/dapl.git/commitdiff
openib_common: reorganize provider code base to share common mem, cq, qp, dto functions
authorArlin Davis <arlin.r.davis@intel.com>
Mon, 29 Jun 2009 15:57:46 +0000 (08:57 -0700)
committerArlin Davis <arlin.r.davis@intel.com>
Mon, 29 Jun 2009 15:57:46 +0000 (08:57 -0700)
add new openib_common directory with cq, qp, util, dto, mem function calls
and definitions. This basically leaves the unique CM and Device definitions
and functions to the individual providers directory of openib_scm and openib_cma.

modifications to dapl_cr_accept required. ep->cm_handle is allocated
and managed entirely in provider so dapl common code should not update
ep_handle->cm_handle from the cr->cm_handle automatically. The provider
should determine which cm_handle is required for the accept.

openib_cma defines _OPENIB_CMA_ and openib_scm defines _OPENIB_SCM_ for provider
specific build needs in common code.

33 files changed:
Makefile.am
dapl/common/dapl_cr_accept.c
dapl/dirs
dapl/include/dapl_debug.h
dapl/openib_cma/SOURCES
dapl/openib_cma/cm.c [new file with mode: 0644]
dapl/openib_cma/dapl_ib_cm.c [deleted file]
dapl/openib_cma/dapl_ib_cq.c [deleted file]
dapl/openib_cma/dapl_ib_dto.h [deleted file]
dapl/openib_cma/dapl_ib_extensions.c [deleted file]
dapl/openib_cma/dapl_ib_mem.c [deleted file]
dapl/openib_cma/dapl_ib_qp.c [deleted file]
dapl/openib_cma/dapl_ib_util.c [deleted file]
dapl/openib_cma/dapl_ib_util.h
dapl/openib_cma/device.c [new file with mode: 0644]
dapl/openib_common/cq.c [new file with mode: 0644]
dapl/openib_common/dapl_ib_common.h [new file with mode: 0644]
dapl/openib_common/dapl_ib_dto.h [new file with mode: 0644]
dapl/openib_common/ib_extensions.c [new file with mode: 0644]
dapl/openib_common/mem.c [new file with mode: 0644]
dapl/openib_common/qp.c [new file with mode: 0644]
dapl/openib_common/util.c [new file with mode: 0644]
dapl/openib_scm/SOURCES
dapl/openib_scm/cm.c [new file with mode: 0644]
dapl/openib_scm/dapl_ib_cm.c [deleted file]
dapl/openib_scm/dapl_ib_cq.c [deleted file]
dapl/openib_scm/dapl_ib_dto.h [deleted file]
dapl/openib_scm/dapl_ib_extensions.c [deleted file]
dapl/openib_scm/dapl_ib_mem.c [deleted file]
dapl/openib_scm/dapl_ib_qp.c [deleted file]
dapl/openib_scm/dapl_ib_util.c [deleted file]
dapl/openib_scm/dapl_ib_util.h
dapl/openib_scm/device.c [new file with mode: 0644]

index fa47165533a96ab6a73ae619c437625b095e4d9d..cf75a88e6eef8205f5524fff760f4a257289a3a3 100755 (executable)
@@ -17,8 +17,8 @@ endif
 
 if EXT_TYPE_IB
 XFLAGS = -DDAT_EXTENSIONS
-XPROGRAMS_CMA = dapl/openib_cma/dapl_ib_extensions.c
-XPROGRAMS_SCM = dapl/openib_scm/dapl_ib_extensions.c
+XPROGRAMS_CMA = dapl/openib_common/ib_extensions.c
+XPROGRAMS_SCM = dapl/openib_common/ib_extensions.c
 else
 XFLAGS =
 XPROGRAMS_CMA =
@@ -47,6 +47,7 @@ dapl_udapl_libdaplofa_la_CFLAGS = $(AM_CFLAGS) -D_GNU_SOURCE $(OSFLAGS) $(XFLAGS
                                -DOPENIB -DCQ_WAIT_OBJECT \
                                -I$(srcdir)/dat/include/ -I$(srcdir)/dapl/include/ \
                                -I$(srcdir)/dapl/common -I$(srcdir)/dapl/udapl/linux \
+                               -I$(srcdir)/dapl/openib_common \
                                -I$(srcdir)/dapl/openib_cma \
                                -I$(srcdir)/dapl/openib_cma/linux
                
@@ -54,6 +55,7 @@ dapl_udapl_libdaploscm_la_CFLAGS = $(AM_CFLAGS) -D_GNU_SOURCE $(OSFLAGS) $(XFLAG
                                 -DOPENIB -DCQ_WAIT_OBJECT \
                                 -I$(srcdir)/dat/include/ -I$(srcdir)/dapl/include/ \
                                 -I$(srcdir)/dapl/common -I$(srcdir)/dapl/udapl/linux \
+                               -I$(srcdir)/dapl/openib_common \
                                 -I$(srcdir)/dapl/openib_scm \
                                -I$(srcdir)/dapl/openib_scm/linux
 
@@ -185,11 +187,12 @@ dapl_udapl_libdaplofa_la_SOURCES = dapl/udapl/dapl_init.c \
         dapl/common/dapl_csp.c                      \
         dapl/common/dapl_ep_post_send_invalidate.c  \
         dapl/common/dapl_ep_post_rdma_read_to_rmr.c \
-        dapl/openib_cma/dapl_ib_util.c              \
-        dapl/openib_cma/dapl_ib_cq.c                \
-        dapl/openib_cma/dapl_ib_qp.c                \
-        dapl/openib_cma/dapl_ib_cm.c                \
-        dapl/openib_cma/dapl_ib_mem.c $(XPROGRAMS_CMA)
+        dapl/openib_common/mem.c                    \
+        dapl/openib_common/cq.c                     \
+        dapl/openib_common/qp.c                     \
+        dapl/openib_common/util.c                   \
+        dapl/openib_cma/cm.c                        \
+        dapl/openib_cma/device.c $(XPROGRAMS_CMA)
 
 dapl_udapl_libdaplofa_la_LDFLAGS = -version-info 2:0:0 $(daplofa_version_script) \
                                   -Wl,-init,dapl_init -Wl,-fini,dapl_fini \
@@ -298,11 +301,12 @@ dapl_udapl_libdaploscm_la_SOURCES = dapl/udapl/dapl_init.c \
         dapl/common/dapl_csp.c                      \
         dapl/common/dapl_ep_post_send_invalidate.c  \
         dapl/common/dapl_ep_post_rdma_read_to_rmr.c \
-        dapl/openib_scm/dapl_ib_util.c              \
-        dapl/openib_scm/dapl_ib_cq.c                \
-        dapl/openib_scm/dapl_ib_qp.c                \
-        dapl/openib_scm/dapl_ib_cm.c                \
-        dapl/openib_scm/dapl_ib_mem.c $(XPROGRAMS_SCM)
+        dapl/openib_common/mem.c                    \
+        dapl/openib_common/cq.c                     \
+        dapl/openib_common/qp.c                     \
+        dapl/openib_common/util.c                   \
+        dapl/openib_scm/cm.c                        \
+        dapl/openib_scm/device.c $(XPROGRAMS_SCM)
 
 dapl_udapl_libdaploscm_la_LDFLAGS = -version-info 2:0:0 $(daploscm_version_script) \
                                    -Wl,-init,dapl_init -Wl,-fini,dapl_fini \
@@ -365,10 +369,10 @@ EXTRA_DIST = dat/common/dat_dictionary.h \
             dapl/include/dapl_debug.h \
             dapl/include/dapl_ipoib_names.h \
             dapl/include/dapl_vendor.h \
-            dapl/openib_cma/dapl_ib_dto.h \
+            dapl/openib_common/dapl_ib_dto.h \
+            dapl/openib_common/dapl_ib_common.h \
             dapl/openib_cma/dapl_ib_util.h \
             dapl/openib_cma/linux/openib_osd.h \
-            dapl/openib_scm/dapl_ib_dto.h \
             dapl/openib_scm/dapl_ib_util.h \
             dapl/openib_scm/linux/openib_osd.h \
             dat/udat/libdat2.map \
index 76a841eaee45f549c73e78c9c641c82725bd148b..5df9458b9626d7cb5c45e49c3e61ab38f67eb4bd 100644 (file)
@@ -180,7 +180,6 @@ dapl_cr_accept(IN DAT_CR_HANDLE cr_handle,
        entry_ep_state = ep_ptr->param.ep_state;
        entry_ep_handle = cr_ptr->param.local_ep_handle;
        ep_ptr->param.ep_state = DAT_EP_STATE_COMPLETION_PENDING;
-       ep_ptr->cm_handle = cr_ptr->ib_cm_handle;
        ep_ptr->cr_ptr = cr_ptr;
        ep_ptr->param.remote_ia_address_ptr =
            cr_ptr->param.remote_ia_address_ptr;
index e865dfbd40daeb74cfebdc9272ab313f3f1141a2..e721ef5c9b2d3150460ef9ef21e9468ac8fa1161 100644 (file)
--- a/dapl/dirs
+++ b/dapl/dirs
@@ -1 +1 @@
-DIRS = ibal openib_scm openib_cma
+DIRS = ibal openib_common openib_scm openib_cma 
index 92e3d3b7a6435c310394f9382b21da98ec86ee61..37edf90d0efff87add4a13cee8e6f2ed26efab07 100644 (file)
@@ -66,7 +66,8 @@ typedef enum
     DAPL_DBG_TYPE_EXCEPTION    = 0x0400,
     DAPL_DBG_TYPE_SRQ          = 0x0800,
     DAPL_DBG_TYPE_CNTR         = 0x1000,
-    DAPL_DBG_TYPE_CM_LIST      = 0x2000
+    DAPL_DBG_TYPE_CM_LIST      = 0x2000,
+    DAPL_DBG_TYPE_THREAD       = 0x4000
 
 } DAPL_DBG_TYPE;
 
index fd67d07591ebb2b5ec2be80e623bd36db98e70b0..f1c50028b6466d463b40073fcdc0703102026d4c 100644 (file)
@@ -18,16 +18,17 @@ USE_MSVCRT = 1
 \r
 SOURCES = \\r
        udapl.rc \\r
-       ..\dapl_common_src.c    \\r
-       ..\dapl_udapl_src.c             \\r
-       dapl_ib_cq.c                    \\r
-       dapl_ib_extensions.c    \\r
-       dapl_ib_mem.c                   \\r
-       dapl_ib_qp.c                    \\r
-       dapl_ib_util.c                  \\r
-       dapl_ib_cm.c\r
-\r
-INCLUDES = ..\include;..\common;windows;..\..\dat\include;\\r
+       ..\dapl_common_src.c                    \\r
+       ..\dapl_udapl_src.c                     \\r
+       ..\openib_common\mem.c                  \\r
+       ..\openib_common\util.c                 \\r
+       ..\openib_common\cq.c                   \\r
+       ..\openib_common\qp.c                   \\r
+       ..\openib_common\ib_extensions.c        \\r
+       device.c                                \\r
+       cm.c\r
+\r
+INCLUDES = ..\include;..\openib_common;..\common;windows;..\..\dat\include;\\r
                   ..\..\dat\udat\windows;..\udapl\windows;\\r
                   ..\..\..\..\inc;..\..\..\..\inc\user;..\..\..\libibverbs\include;\\r
                   ..\..\..\librdmacm\include\r
diff --git a/dapl/openib_cma/cm.c b/dapl/openib_cma/cm.c
new file mode 100644 (file)
index 0000000..497f78a
--- /dev/null
@@ -0,0 +1,1338 @@
+/*
+ * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
+ * Copyright (c) 2005-2007 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004-2005, Mellanox Technologies, Inc. All rights reserved. 
+ * Copyright (c) 2003 Topspin Corporation.  All rights reserved. 
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/**********************************************************************
+ *
+ * MODULE: dapl_ib_cm.c
+ *
+ * PURPOSE: The OFED provider - uCMA, name and route resolution
+ *
+ * $Id: $
+ *
+ **********************************************************************/
+
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_evd_util.h"
+#include "dapl_cr_util.h"
+#include "dapl_name_service.h"
+#include "dapl_ib_util.h"
+#include "dapl_vendor.h"
+#include "dapl_osd.h"
+
+extern struct rdma_event_channel *g_cm_events;
+
+/* local prototypes */
+static struct dapl_cm_id *dapli_req_recv(struct dapl_cm_id *conn,
+                                        struct rdma_cm_event *event);
+static void dapli_cm_active_cb(struct dapl_cm_id *conn,
+                              struct rdma_cm_event *event);
+static void dapli_cm_passive_cb(struct dapl_cm_id *conn,
+                               struct rdma_cm_event *event);
+static void dapli_addr_resolve(struct dapl_cm_id *conn);
+static void dapli_route_resolve(struct dapl_cm_id *conn);
+
+/* cma requires 16 bit SID, in network order */
+#define IB_PORT_MOD 32001
+#define IB_PORT_BASE (65535 - IB_PORT_MOD)
+#define SID_TO_PORT(SID) \
+    (SID > 0xffff ? \
+    htons((unsigned short)((SID % IB_PORT_MOD) + IB_PORT_BASE)) :\
+    htons((unsigned short)SID))
+
+#define PORT_TO_SID(p) ntohs(p)
+
+/* private data header to validate consumer rejects versus abnormal events */
+struct dapl_pdata_hdr {
+       DAT_UINT32 version;
+};
+
+static void dapli_addr_resolve(struct dapl_cm_id *conn)
+{
+       int ret;
+#ifdef DAPL_DBG
+       struct rdma_addr *ipaddr = &conn->cm_id->route.addr;
+#endif
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                    " addr_resolve: cm_id %p SRC %x DST %x\n",
+                    conn->cm_id, ntohl(((struct sockaddr_in *)
+                                        &ipaddr->src_addr)->sin_addr.s_addr),
+                    ntohl(((struct sockaddr_in *)
+                           &ipaddr->dst_addr)->sin_addr.s_addr));
+
+       ret = rdma_resolve_route(conn->cm_id, conn->route_timeout);
+       if (ret) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " dapl_cma_connect: rdma_resolve_route ERR 0x%x %s\n",
+                        ret, strerror(errno));
+               dapl_evd_connection_callback(conn,
+                                            IB_CME_LOCAL_FAILURE,
+                                            NULL, conn->ep);
+       }
+}
+
+static void dapli_route_resolve(struct dapl_cm_id *conn)
+{
+       int ret;
+#ifdef DAPL_DBG
+       struct rdma_addr *ipaddr = &conn->cm_id->route.addr;
+       struct ib_addr *ibaddr = &conn->cm_id->route.addr.addr.ibaddr;
+#endif
+
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                    " route_resolve: cm_id %p SRC %x DST %x PORT %d\n",
+                    conn->cm_id, ntohl(((struct sockaddr_in *)
+                                        &ipaddr->src_addr)->sin_addr.s_addr),
+                    ntohl(((struct sockaddr_in *)
+                           &ipaddr->dst_addr)->sin_addr.s_addr),
+                    ntohs(((struct sockaddr_in *)
+                           &ipaddr->dst_addr)->sin_port));
+
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                    " route_resolve: SRC GID subnet %016llx id %016llx\n",
+                    (unsigned long long)
+                    ntohll(ibaddr->sgid.global.subnet_prefix),
+                    (unsigned long long)
+                    ntohll(ibaddr->sgid.global.interface_id));
+
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                    " route_resolve: DST GID subnet %016llx id %016llx\n",
+                    (unsigned long long)
+                    ntohll(ibaddr->dgid.global.subnet_prefix),
+                    (unsigned long long)
+                    ntohll(ibaddr->dgid.global.interface_id));
+
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                    " route_resolve: cm_id %p pdata %p plen %d rr %d ind %d\n",
+                    conn->cm_id,
+                    conn->params.private_data,
+                    conn->params.private_data_len,
+                    conn->params.responder_resources,
+                    conn->params.initiator_depth);
+
+       ret = rdma_connect(conn->cm_id, &conn->params);
+       if (ret) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " dapl_cma_connect: rdma_connect ERR %d %s\n",
+                        ret, strerror(errno));
+               goto bail;
+       }
+       return;
+
+      bail:
+       dapl_evd_connection_callback(conn,
+                                    IB_CME_LOCAL_FAILURE, NULL, conn->ep);
+}
+
+dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep)
+{
+       dp_ib_cm_handle_t conn;
+       struct rdma_cm_id *cm_id;
+
+       /* Allocate CM and initialize lock */
+       if ((conn = dapl_os_alloc(sizeof(*conn))) == NULL)
+               return NULL;
+
+       dapl_os_memzero(conn, sizeof(*conn));
+       dapl_os_lock_init(&conn->lock);
+
+       /* create CM_ID, bind to local device, create QP */
+       if (rdma_create_id(g_cm_events, &cm_id, (void *)conn, RDMA_PS_TCP)) {
+               dapl_os_free(conn, sizeof(*conn));
+               return NULL;
+       }
+       conn->cm_id = cm_id;
+
+       /* setup timers for address and route resolution */
+       conn->arp_timeout = dapl_os_get_env_val("DAPL_CM_ARP_TIMEOUT_MS",
+                                               IB_ARP_TIMEOUT);
+       conn->arp_retries = dapl_os_get_env_val("DAPL_CM_ARP_RETRY_COUNT",
+                                               IB_ARP_RETRY_COUNT);
+       conn->route_timeout = dapl_os_get_env_val("DAPL_CM_ROUTE_TIMEOUT_MS",
+                                                 IB_ROUTE_TIMEOUT);
+       conn->route_retries = dapl_os_get_env_val("DAPL_CM_ROUTE_RETRY_COUNT",
+                                                 IB_ROUTE_RETRY_COUNT);
+       if (ep != NULL) {
+               conn->ep = ep;
+               conn->hca = ((DAPL_IA *)ep->param.ia_handle)->hca_ptr;
+       }
+
+       return conn;
+}
+
+/* 
+ * Called from consumer thread via dat_ep_free().
+ * CANNOT be called from the async event processing thread
+ * dapli_cma_event_cb() since a cm_id reference is held and
+ * a deadlock will occur.
+ */
+
+void dapls_ib_cm_free(dp_ib_cm_handle_t conn, DAPL_EP *ep)
+{
+       struct rdma_cm_id *cm_id;
+
+       if (conn == NULL)
+               return;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                    " destroy_conn: conn %p id %d\n", conn, conn->cm_id);
+
+       dapl_os_lock(&conn->lock);
+       conn->destroy = 1;
+
+       if (ep != NULL) {
+               ep->cm_handle = NULL;
+               ep->qp_handle = NULL;
+               ep->qp_state = IB_QP_STATE_ERROR;
+       }
+
+       cm_id = conn->cm_id;
+       conn->cm_id = NULL;
+       dapl_os_unlock(&conn->lock);
+
+       /* 
+        * rdma_destroy_id will force synchronization with async CM event 
+        * thread since it blocks until the in-process event reference
+        * is cleared during our event processing call exit.
+        */
+       if (cm_id) {
+               if (cm_id->qp)
+                       rdma_destroy_qp(cm_id);
+
+               rdma_destroy_id(cm_id);
+       }
+       dapl_os_free(conn, sizeof(*conn));
+}
+
+static struct dapl_cm_id *dapli_req_recv(struct dapl_cm_id *conn,
+                                        struct rdma_cm_event *event)
+{
+       struct dapl_cm_id *new_conn;
+#ifdef DAPL_DBG
+       struct rdma_addr *ipaddr = &event->id->route.addr;
+#endif
+
+       if (conn->sp == NULL) {
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+                            " dapli_rep_recv: on invalid listen " "handle\n");
+               return NULL;
+       }
+
+       /* allocate new cm_id and merge listen parameters */
+       new_conn = dapl_os_alloc(sizeof(*new_conn));
+       if (new_conn) {
+               (void)dapl_os_memzero(new_conn, sizeof(*new_conn));
+               dapl_os_lock_init(&new_conn->lock);
+               new_conn->cm_id = event->id;    /* provided by uCMA */
+               event->id->context = new_conn;  /* update CM_ID context */
+               new_conn->sp = conn->sp;
+               new_conn->hca = conn->hca;
+
+               /* Get requesters connect data, setup for accept */
+               new_conn->params.responder_resources =
+                   DAPL_MIN(event->param.conn.responder_resources,
+                            conn->hca->ib_trans.rd_atom_in);
+               new_conn->params.initiator_depth =
+                   DAPL_MIN(event->param.conn.initiator_depth,
+                            conn->hca->ib_trans.rd_atom_out);
+
+               new_conn->params.flow_control = event->param.conn.flow_control;
+               new_conn->params.rnr_retry_count =
+                   event->param.conn.rnr_retry_count;
+               new_conn->params.retry_count = event->param.conn.retry_count;
+
+               /* save private data */
+               if (event->param.conn.private_data_len) {
+                       dapl_os_memcpy(new_conn->p_data,
+                                      event->param.conn.private_data,
+                                      event->param.conn.private_data_len);
+                       new_conn->params.private_data = new_conn->p_data;
+                       new_conn->params.private_data_len =
+                           event->param.conn.private_data_len;
+               }
+
+               dapl_dbg_log(DAPL_DBG_TYPE_CM, " passive_cb: "
+                            "REQ: SP %p PORT %d LID %d "
+                            "NEW CONN %p ID %p pdata %p,%d\n",
+                            new_conn->sp, ntohs(((struct sockaddr_in *)
+                                                 &ipaddr->src_addr)->sin_port),
+                            event->listen_id, new_conn, event->id,
+                            event->param.conn.private_data,
+                            event->param.conn.private_data_len);
+
+               dapl_dbg_log(DAPL_DBG_TYPE_CM, " passive_cb: "
+                            "REQ: IP SRC %x PORT %d DST %x PORT %d "
+                            "rr %d init %d\n", ntohl(((struct sockaddr_in *)
+                                                      &ipaddr->src_addr)->
+                                                     sin_addr.s_addr),
+                            ntohs(((struct sockaddr_in *)
+                                   &ipaddr->src_addr)->sin_port),
+                            ntohl(((struct sockaddr_in *)
+                                   &ipaddr->dst_addr)->sin_addr.s_addr),
+                            ntohs(((struct sockaddr_in *)
+                                   &ipaddr->dst_addr)->sin_port),
+                            new_conn->params.responder_resources,
+                            new_conn->params.initiator_depth);
+       }
+       return new_conn;
+}
+
+static void dapli_cm_active_cb(struct dapl_cm_id *conn,
+                              struct rdma_cm_event *event)
+{
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                    " active_cb: conn %p id %d event %d\n",
+                    conn, conn->cm_id, event->event);
+
+       dapl_os_lock(&conn->lock);
+       if (conn->destroy) {
+               dapl_os_unlock(&conn->lock);
+               return;
+       }
+       dapl_os_unlock(&conn->lock);
+
+       /* There is a chance that we can get events after
+        * the consumer calls disconnect in a pending state
+        * since the IB CM and uDAPL states are not shared.
+        * In some cases, IB CM could generate either a DCONN
+        * or CONN_ERR after the consumer returned from
+        * dapl_ep_disconnect with a DISCONNECTED event
+        * already queued. Check state here and bail to
+        * avoid any events after a disconnect.
+        */
+       if (DAPL_BAD_HANDLE(conn->ep, DAPL_MAGIC_EP))
+               return;
+
+       dapl_os_lock(&conn->ep->header.lock);
+       if (conn->ep->param.ep_state == DAT_EP_STATE_DISCONNECTED) {
+               dapl_os_unlock(&conn->ep->header.lock);
+               return;
+       }
+       if (event->event == RDMA_CM_EVENT_DISCONNECTED)
+               conn->ep->param.ep_state = DAT_EP_STATE_DISCONNECTED;
+
+       dapl_os_unlock(&conn->ep->header.lock);
+
+       switch (event->event) {
+       case RDMA_CM_EVENT_UNREACHABLE:
+       case RDMA_CM_EVENT_CONNECT_ERROR:
+               {
+                       dapl_log(DAPL_DBG_TYPE_WARN,
+                                "dapl_cma_active: CONN_ERR event=0x%x"
+                                " status=%d %s DST %s, %d\n",
+                                event->event, event->status,
+                                (event->status == -ETIMEDOUT) ? "TIMEOUT" : "",
+                                inet_ntoa(((struct sockaddr_in *)
+                                           &conn->cm_id->route.addr.dst_addr)->
+                                          sin_addr),
+                                ntohs(((struct sockaddr_in *)
+                                       &conn->cm_id->route.addr.dst_addr)->
+                                      sin_port));
+
+                       /* per DAT SPEC provider always returns UNREACHABLE */
+                       dapl_evd_connection_callback(conn,
+                                                    IB_CME_DESTINATION_UNREACHABLE,
+                                                    NULL, conn->ep);
+                       break;
+               }
+       case RDMA_CM_EVENT_REJECTED:
+               {
+                       ib_cm_events_t cm_event;
+                       unsigned char *pdata = NULL;
+
+                       dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                                    " dapli_cm_active_handler: REJECTED reason=%d\n",
+                                    event->status);
+
+                       /* valid REJ from consumer will always contain private data */
+                       if (event->status == 28 &&
+                           event->param.conn.private_data_len) {
+                               cm_event =
+                                   IB_CME_DESTINATION_REJECT_PRIVATE_DATA;
+                               pdata =
+                                   (unsigned char *)event->param.conn.
+                                   private_data +
+                                   sizeof(struct dapl_pdata_hdr);
+                       } else {
+                               cm_event = IB_CME_DESTINATION_REJECT;
+                               dapl_log(DAPL_DBG_TYPE_WARN,
+                                        "dapl_cma_active: non-consumer REJ,"
+                                        " reason=%d, DST %s, %d\n",
+                                        event->status,
+                                        inet_ntoa(((struct sockaddr_in *)
+                                                   &conn->cm_id->route.addr.
+                                                   dst_addr)->sin_addr),
+                                        ntohs(((struct sockaddr_in *)
+                                               &conn->cm_id->route.addr.
+                                               dst_addr)->sin_port));
+                       }
+                       dapl_evd_connection_callback(conn, cm_event, pdata,
+                                                    conn->ep);
+                       break;
+               }
+       case RDMA_CM_EVENT_ESTABLISHED:
+               dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                            " active_cb: cm_id %d PORT %d CONNECTED to %s!\n",
+                            conn->cm_id, ntohs(((struct sockaddr_in *)
+                                                &conn->cm_id->route.addr.
+                                                dst_addr)->sin_port),
+                            inet_ntoa(((struct sockaddr_in *)
+                                       &conn->cm_id->route.addr.dst_addr)->
+                                      sin_addr));
+
+               /* setup local and remote ports for ep query */
+               conn->ep->param.remote_port_qual =
+                   PORT_TO_SID(rdma_get_dst_port(conn->cm_id));
+               conn->ep->param.local_port_qual =
+                   PORT_TO_SID(rdma_get_src_port(conn->cm_id));
+
+               dapl_evd_connection_callback(conn, IB_CME_CONNECTED,
+                                            event->param.conn.private_data,
+                                            conn->ep);
+               break;
+
+       case RDMA_CM_EVENT_DISCONNECTED:
+               dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                            " active_cb: DISC EVENT - EP %p\n",conn->ep);
+               rdma_disconnect(conn->cm_id);   /* required for DREP */
+               /* validate EP handle */
+               if (!DAPL_BAD_HANDLE(conn->ep, DAPL_MAGIC_EP))
+                       dapl_evd_connection_callback(conn,
+                                                    IB_CME_DISCONNECTED,
+                                                    NULL, conn->ep);
+               break;
+       default:
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+                            " dapli_cm_active_cb_handler: Unexpected CM "
+                            "event %d on ID 0x%p\n", event->event,
+                            conn->cm_id);
+               break;
+       }
+
+       return;
+}
+
+static void dapli_cm_passive_cb(struct dapl_cm_id *conn,
+                               struct rdma_cm_event *event)
+{
+       struct dapl_cm_id *new_conn;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                    " passive_cb: conn %p id %d event %d\n",
+                    conn, event->id, event->event);
+
+       dapl_os_lock(&conn->lock);
+       if (conn->destroy) {
+               dapl_os_unlock(&conn->lock);
+               return;
+       }
+       dapl_os_unlock(&conn->lock);
+
+       switch (event->event) {
+       case RDMA_CM_EVENT_CONNECT_REQUEST:
+               /* create new conn object with new conn_id from event */
+               new_conn = dapli_req_recv(conn, event);
+
+               if (new_conn)
+                       dapls_cr_callback(new_conn,
+                                         IB_CME_CONNECTION_REQUEST_PENDING,
+                                         event->param.conn.private_data,
+                                         new_conn->sp);
+               break;
+       case RDMA_CM_EVENT_UNREACHABLE:
+       case RDMA_CM_EVENT_CONNECT_ERROR:
+               dapl_log(DAPL_DBG_TYPE_WARN,
+                        "dapl_cm_passive: CONN_ERR event=0x%x status=%d %s,"
+                        " DST %s,%d\n",
+                        event->event, event->status,
+                        (event->status == -ETIMEDOUT) ? "TIMEOUT" : "",
+                        inet_ntoa(((struct sockaddr_in *)
+                                   &conn->cm_id->route.addr.dst_addr)->
+                                  sin_addr), ntohs(((struct sockaddr_in *)
+                                                    &conn->cm_id->route.addr.
+                                                    dst_addr)->sin_port));
+
+               dapls_cr_callback(conn, IB_CME_DESTINATION_UNREACHABLE,
+                                 NULL, conn->sp);
+               break;
+
+       case RDMA_CM_EVENT_REJECTED:
+               {
+                       /* will alwasys be abnormal NON-consumer from active side */
+                       dapl_log(DAPL_DBG_TYPE_WARN,
+                                "dapl_cm_passive: non-consumer REJ, reason=%d,"
+                                " DST %s, %d\n",
+                                event->status,
+                                inet_ntoa(((struct sockaddr_in *)
+                                           &conn->cm_id->route.addr.dst_addr)->
+                                          sin_addr),
+                                ntohs(((struct sockaddr_in *)
+                                       &conn->cm_id->route.addr.dst_addr)->
+                                      sin_port));
+
+                       dapls_cr_callback(conn, IB_CME_DESTINATION_REJECT,
+                                         NULL, conn->sp);
+                       break;
+               }
+       case RDMA_CM_EVENT_ESTABLISHED:
+               dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                            " passive_cb: cm_id %p PORT %d CONNECTED from 0x%x!\n",
+                            conn->cm_id, ntohs(((struct sockaddr_in *)
+                                                &conn->cm_id->route.addr.
+                                                src_addr)->sin_port),
+                            ntohl(((struct sockaddr_in *)
+                                   &conn->cm_id->route.addr.dst_addr)->
+                                  sin_addr.s_addr));
+
+               dapls_cr_callback(conn, IB_CME_CONNECTED, NULL, conn->sp);
+
+               break;
+       case RDMA_CM_EVENT_DISCONNECTED:
+               rdma_disconnect(conn->cm_id);   /* required for DREP */
+               /* validate SP handle context */
+               if (!DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_PSP) ||
+                   !DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_RSP))
+                       dapls_cr_callback(conn,
+                                         IB_CME_DISCONNECTED, NULL, conn->sp);
+               break;
+       default:
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR, " passive_cb: "
+                            "Unexpected CM event %d on ID 0x%p\n",
+                            event->event, conn->cm_id);
+               break;
+       }
+
+       return;
+}
+
+/************************ DAPL provider entry points **********************/
+
+/*
+ * dapls_ib_connect
+ *
+ * Initiate a connection with the passive listener on another node
+ *
+ * Input:
+ *     ep_handle,
+ *     remote_ia_address,
+ *     remote_conn_qual,
+ *     prd_size                size of private data and structure
+ *     prd_prt                 pointer to private data structure
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INSUFFICIENT_RESOURCES
+ *     DAT_INVALID_PARAMETER
+ *
+ */
+DAT_RETURN dapls_ib_connect(IN DAT_EP_HANDLE ep_handle,
+                           IN DAT_IA_ADDRESS_PTR r_addr,
+                           IN DAT_CONN_QUAL r_qual,
+                           IN DAT_COUNT p_size, IN void *p_data)
+{
+       struct dapl_ep *ep_ptr = ep_handle;
+       struct dapl_cm_id *conn = ep_ptr->cm_handle;
+       int ret;
+
+       /* Sanity check */
+       if (NULL == ep_ptr)
+               return DAT_SUCCESS;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                    " connect: rSID 0x%llx rPort %d, pdata %p, ln %d\n",
+                    r_qual, ntohs(SID_TO_PORT(r_qual)), p_data, p_size);
+
+       /* rdma conn and cm_id pre-bound; reference via ep_ptr->cm_handle */
+
+       /* Setup QP/CM parameters and private data in cm_id */
+       (void)dapl_os_memzero(&conn->params, sizeof(conn->params));
+       conn->params.responder_resources =
+           ep_ptr->param.ep_attr.max_rdma_read_in;
+       conn->params.initiator_depth = ep_ptr->param.ep_attr.max_rdma_read_out;
+       conn->params.flow_control = 1;
+       conn->params.rnr_retry_count = IB_RNR_RETRY_COUNT;
+       conn->params.retry_count = IB_RC_RETRY_COUNT;
+       if (p_size) {
+               dapl_os_memcpy(conn->p_data, p_data, p_size);
+               conn->params.private_data = conn->p_data;
+               conn->params.private_data_len = p_size;
+       }
+
+       /* copy in remote address, need a copy for retry attempts */
+       dapl_os_memcpy(&conn->r_addr, r_addr, sizeof(*r_addr));
+
+       /* Resolve remote address, src already bound during QP create */
+       ((struct sockaddr_in *)&conn->r_addr)->sin_port = SID_TO_PORT(r_qual);
+       ((struct sockaddr_in *)&conn->r_addr)->sin_family = AF_INET;
+
+       ret = rdma_resolve_addr(conn->cm_id, NULL,
+                               (struct sockaddr *)&conn->r_addr,
+                               conn->arp_timeout);
+       if (ret) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " dapl_cma_connect: rdma_resolve_addr ERR 0x%x %s\n",
+                        ret, strerror(errno));
+               return dapl_convert_errno(errno, "ib_connect");
+       }
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                    " connect: resolve_addr: cm_id %p -> %s port %d\n",
+                    conn->cm_id,
+                    inet_ntoa(((struct sockaddr_in *)&conn->r_addr)->sin_addr),
+                    ((struct sockaddr_in *)&conn->r_addr)->sin_port);
+
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_disconnect
+ *
+ * Disconnect an EP
+ *
+ * Input:
+ *     ep_handle,
+ *     disconnect_flags
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *
+ */
+DAT_RETURN
+dapls_ib_disconnect(IN DAPL_EP * ep_ptr, IN DAT_CLOSE_FLAGS close_flags)
+{
+       dp_ib_cm_handle_t conn = ep_ptr->cm_handle;
+       int ret;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                    " disconnect(ep %p, conn %p, id %d flags %x)\n",
+                    ep_ptr, conn, (conn ? conn->cm_id : 0), close_flags);
+
+       if ((conn == IB_INVALID_HANDLE) || (conn->cm_id == NULL))
+               return DAT_SUCCESS;
+
+       /* no graceful half-pipe disconnect option */
+       ret = rdma_disconnect(conn->cm_id);
+       if (ret)
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+                            " disconnect: ID %p ret 0x%x\n",
+                            ep_ptr->cm_handle, ret);
+
+       /* 
+        * DAT event notification occurs from the callback
+        * Note: will fire even if DREQ goes unanswered on timeout 
+        */
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_disconnect_clean
+ *
+ * Clean up outstanding connection data. This routine is invoked
+ * after the final disconnect callback has occurred. Only on the
+ * ACTIVE side of a connection.
+ *
+ * Input:
+ *     ep_ptr          DAPL_EP
+ *     active          Indicates active side of connection
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     void
+ *
+ */
+void
+dapls_ib_disconnect_clean(IN DAPL_EP * ep_ptr,
+                         IN DAT_BOOLEAN active,
+                         IN const ib_cm_events_t ib_cm_event)
+{
+       /* nothing to do */
+       return;
+}
+
+/*
+ * dapl_ib_setup_conn_listener
+ *
+ * Have the CM set up a connection listener.
+ *
+ * Input:
+ *     ibm_hca_handle          HCA handle
+ *     qp_handle                       QP handle
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INSUFFICIENT_RESOURCES
+ *     DAT_INTERNAL_ERROR
+ *     DAT_CONN_QUAL_UNAVAILBLE
+ *     DAT_CONN_QUAL_IN_USE
+ *
+ */
+DAT_RETURN
+dapls_ib_setup_conn_listener(IN DAPL_IA * ia_ptr,
+                            IN DAT_UINT64 ServiceID, IN DAPL_SP * sp_ptr)
+{
+       DAT_RETURN dat_status = DAT_SUCCESS;
+       ib_cm_srvc_handle_t conn;
+       DAT_SOCK_ADDR6 addr;    /* local binding address */
+
+       /* Allocate CM and initialize lock */
+       if ((conn = dapl_os_alloc(sizeof(*conn))) == NULL)
+               return DAT_INSUFFICIENT_RESOURCES;
+
+       dapl_os_memzero(conn, sizeof(*conn));
+       dapl_os_lock_init(&conn->lock);
+
+       /* create CM_ID, bind to local device, create QP */
+       if (rdma_create_id
+           (g_cm_events, &conn->cm_id, (void *)conn, RDMA_PS_TCP)) {
+               dapl_os_free(conn, sizeof(*conn));
+               return (dapl_convert_errno(errno, "setup_listener"));
+       }
+
+       /* open identifies the local device; per DAT specification */
+       /* Get family and address then set port to consumer's ServiceID */
+       dapl_os_memcpy(&addr, &ia_ptr->hca_ptr->hca_address, sizeof(addr));
+       ((struct sockaddr_in *)&addr)->sin_port = SID_TO_PORT(ServiceID);
+
+       if (rdma_bind_addr(conn->cm_id, (struct sockaddr *)&addr)) {
+               if ((errno == EBUSY) || (errno == EADDRINUSE))
+                       dat_status = DAT_CONN_QUAL_IN_USE;
+               else
+                       dat_status =
+                           dapl_convert_errno(errno, "setup_listener");
+               goto bail;
+       }
+
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                    " listen(ia_ptr %p SID 0x%llx Port %d sp %p conn %p id %d)\n",
+                    ia_ptr, ServiceID, ntohs(SID_TO_PORT(ServiceID)),
+                    sp_ptr, conn, conn->cm_id);
+
+       sp_ptr->cm_srvc_handle = conn;
+       conn->sp = sp_ptr;
+       conn->hca = ia_ptr->hca_ptr;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                    " listen(conn=%p cm_id=%d)\n",
+                    sp_ptr->cm_srvc_handle, conn->cm_id);
+
+       if (rdma_listen(conn->cm_id, 0)) {      /* max cma backlog */
+
+               if ((errno == EBUSY) || (errno == EADDRINUSE))
+                       dat_status = DAT_CONN_QUAL_IN_USE;
+               else
+                       dat_status =
+                           dapl_convert_errno(errno, "setup_listener");
+               goto bail;
+       }
+
+       /* success */
+       return DAT_SUCCESS;
+
+      bail:
+       rdma_destroy_id(conn->cm_id);
+       dapl_os_free(conn, sizeof(*conn));
+       return dat_status;
+}
+
+/*
+ * dapl_ib_remove_conn_listener
+ *
+ * Have the CM remove a connection listener.
+ *
+ * Input:
+ *     ia_handle               IA handle
+ *     ServiceID               IB Channel Service ID
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INVALID_STATE
+ *
+ */
+DAT_RETURN
+dapls_ib_remove_conn_listener(IN DAPL_IA * ia_ptr, IN DAPL_SP * sp_ptr)
+{
+       ib_cm_srvc_handle_t conn = sp_ptr->cm_srvc_handle;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                    " remove_listen(ia_ptr %p sp_ptr %p cm_ptr %p)\n",
+                    ia_ptr, sp_ptr, conn);
+
+       if (conn != IB_INVALID_HANDLE) {
+               sp_ptr->cm_srvc_handle = NULL;
+               dapls_ib_cm_free(conn, NULL);
+       }
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_accept_connection
+ *
+ * Perform necessary steps to accept a connection
+ *
+ * Input:
+ *     cr_handle
+ *     ep_handle
+ *     private_data_size
+ *     private_data
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INSUFFICIENT_RESOURCES
+ *     DAT_INTERNAL_ERROR
+ *
+ */
+DAT_RETURN
+dapls_ib_accept_connection(IN DAT_CR_HANDLE cr_handle,
+                          IN DAT_EP_HANDLE ep_handle,
+                          IN DAT_COUNT p_size, IN const DAT_PVOID p_data)
+{
+       DAPL_CR *cr_ptr = (DAPL_CR *) cr_handle;
+       DAPL_EP *ep_ptr = (DAPL_EP *) ep_handle;
+       DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;
+       struct dapl_cm_id *cr_conn = cr_ptr->ib_cm_handle;
+       int ret;
+       DAT_RETURN dat_status;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                    " accept(cr %p conn %p, id %p, p_data %p, p_sz=%d)\n",
+                    cr_ptr, cr_conn, cr_conn->cm_id, p_data, p_size);
+
+       /* Obtain size of private data structure & contents */
+       if (p_size > IB_MAX_REP_PDATA_SIZE) {
+               dat_status = DAT_ERROR(DAT_LENGTH_ERROR, DAT_NO_SUBTYPE);
+               goto bail;
+       }
+
+       if (ep_ptr->qp_state == DAPL_QP_STATE_UNATTACHED) {
+               /* 
+                * If we are lazy attaching the QP then we may need to
+                * hook it up here. Typically, we run this code only for
+                * DAT_PSP_PROVIDER_FLAG
+                */
+               dat_status = dapls_ib_qp_alloc(ia_ptr, ep_ptr, NULL);
+               if (dat_status != DAT_SUCCESS) {
+                       dapl_log(DAPL_DBG_TYPE_ERR,
+                                " dapl_cma_accept: qp_alloc ERR %d\n",
+                                dat_status);
+                       goto bail;
+               }
+       }
+
+       /* 
+        * Validate device and port in EP cm_id against inbound 
+        * CR cm_id. The pre-allocated EP cm_id is already bound to 
+        * a local device (cm_id and QP) when created. Move the QP
+        * to the new cm_id only if device and port numbers match.
+        */
+       if (ep_ptr->cm_handle->cm_id->verbs == cr_conn->cm_id->verbs &&
+           ep_ptr->cm_handle->cm_id->port_num == cr_conn->cm_id->port_num) {
+               /* move QP to new cr_conn, remove QP ref in EP cm_id */
+               cr_conn->cm_id->qp = ep_ptr->cm_handle->cm_id->qp;
+               ep_ptr->cm_handle->cm_id->qp = NULL;
+               dapls_ib_cm_free(ep_ptr->cm_handle, NULL);
+       } else {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " dapl_cma_accept: ERR dev(%p!=%p) or"
+                        " port mismatch(%d!=%d)\n",
+                        ep_ptr->cm_handle->cm_id->verbs, cr_conn->cm_id->verbs,
+                        ntohs(ep_ptr->cm_handle->cm_id->port_num),
+                        ntohs(cr_conn->cm_id->port_num));
+               dat_status = DAT_INTERNAL_ERROR;
+               goto bail;
+       }
+
+       cr_ptr->param.local_ep_handle = ep_handle;
+       cr_conn->params.private_data = p_data;
+       cr_conn->params.private_data_len = p_size;
+
+       ret = rdma_accept(cr_conn->cm_id, &cr_conn->params);
+       if (ret) {
+               dapl_log(DAPL_DBG_TYPE_ERR, " dapl_cma_accept: ERR %d %s\n",
+                        ret, strerror(errno));
+               dat_status = dapl_convert_errno(ret, "accept");
+               goto bail;
+       }
+
+       /* save accepted conn and EP reference, qp_handle unchanged */
+       ep_ptr->cm_handle = cr_conn;
+       cr_conn->ep = ep_ptr;
+
+       /* setup local and remote ports for ep query */
+       /* Note: port qual in network order */
+       ep_ptr->param.remote_port_qual =
+           PORT_TO_SID(rdma_get_dst_port(cr_conn->cm_id));
+       ep_ptr->param.local_port_qual =
+           PORT_TO_SID(rdma_get_src_port(cr_conn->cm_id));
+
+       return DAT_SUCCESS;
+      bail:
+       rdma_reject(cr_conn->cm_id, NULL, 0);
+       dapls_ib_cm_free(cr_conn, NULL);
+       return dat_status;
+}
+
+/*
+ * dapls_ib_reject_connection
+ *
+ * Reject a connection
+ *
+ * Input:
+ *     cr_handle
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INTERNAL_ERROR
+ *
+ */
+DAT_RETURN
+dapls_ib_reject_connection(IN dp_ib_cm_handle_t cm_handle,
+                          IN int reason,
+                          IN DAT_COUNT private_data_size,
+                          IN const DAT_PVOID private_data)
+{
+       int ret;
+       int offset = sizeof(struct dapl_pdata_hdr);
+       struct dapl_pdata_hdr pdata_hdr;
+
+       memset(&pdata_hdr, 0, sizeof pdata_hdr);
+       pdata_hdr.version = htonl((DAT_VERSION_MAJOR << 24) |
+                                 (DAT_VERSION_MINOR << 16) |
+                                 (VN_PROVIDER_MAJOR << 8) |
+                                 (VN_PROVIDER_MINOR));
+
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                    " reject: handle %p reason %x, ver=%x, data %p, sz=%d\n",
+                    cm_handle, reason, ntohl(pdata_hdr.version),
+                    private_data, private_data_size);
+
+       if (cm_handle == IB_INVALID_HANDLE) {
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+                            " reject: invalid handle: reason %d\n", reason);
+               return DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_CR);
+       }
+
+       if (private_data_size >
+           dapls_ib_private_data_size(NULL, DAPL_PDATA_CONN_REJ,
+                                      cm_handle->hca))
+               return DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG3);
+
+       /* setup pdata_hdr and users data, in CR pdata buffer */
+       dapl_os_memcpy(cm_handle->p_data, &pdata_hdr, offset);
+       if (private_data_size)
+               dapl_os_memcpy(cm_handle->p_data + offset,
+                              private_data, private_data_size);
+
+       /*
+        * Always some private data with reject so active peer can
+        * determine real application reject from an abnormal 
+        * application termination
+        */
+       ret = rdma_reject(cm_handle->cm_id,
+                         cm_handle->p_data, offset + private_data_size);
+
+       dapls_ib_cm_free(cm_handle, NULL);
+       return dapl_convert_errno(ret, "reject");
+}
+
+/*
+ * dapls_ib_cm_remote_addr
+ *
+ * Obtain the remote IP address given a connection
+ *
+ * Input:
+ *     cr_handle
+ *
+ * Output:
+ *     remote_ia_address: where to place the remote address
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INVALID_HANDLE
+ *
+ */
+DAT_RETURN
+dapls_ib_cm_remote_addr(IN DAT_HANDLE dat_handle, OUT DAT_SOCK_ADDR6 * raddr)
+{
+       DAPL_HEADER *header;
+       dp_ib_cm_handle_t ib_cm_handle;
+       struct rdma_addr *ipaddr;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                    " remote_addr(cm_handle=%p, r_addr=%p)\n",
+                    dat_handle, raddr);
+
+       header = (DAPL_HEADER *) dat_handle;
+
+       if (header->magic == DAPL_MAGIC_EP)
+               ib_cm_handle = ((DAPL_EP *) dat_handle)->cm_handle;
+       else if (header->magic == DAPL_MAGIC_CR)
+               ib_cm_handle = ((DAPL_CR *) dat_handle)->ib_cm_handle;
+       else
+               return DAT_INVALID_HANDLE;
+
+       /* get remote IP address from cm_id route */
+       ipaddr = &ib_cm_handle->cm_id->route.addr;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                    " remote_addr: conn %p id %p SRC %x DST %x PORT %d\n",
+                    ib_cm_handle, ib_cm_handle->cm_id,
+                    ntohl(((struct sockaddr_in *)
+                           &ipaddr->src_addr)->sin_addr.s_addr),
+                    ntohl(((struct sockaddr_in *)
+                           &ipaddr->dst_addr)->sin_addr.s_addr),
+                    ntohs(((struct sockaddr_in *)
+                           &ipaddr->dst_addr)->sin_port));
+
+       dapl_os_memcpy(raddr, &ipaddr->dst_addr, sizeof(DAT_SOCK_ADDR));
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_private_data_size
+ *
+ * Return the size of private data given a connection op type
+ *
+ * Input:
+ *     prd_ptr         private data pointer
+ *     conn_op         connection operation type
+ *      hca_ptr         hca pointer, needed for transport type
+ *
+ * If prd_ptr is NULL, this is a query for the max size supported by
+ * the provider, otherwise it is the actual size of the private data
+ * contained in prd_ptr.
+ *
+ *
+ * Output:
+ *     None
+ *
+ * Returns:
+ *     length of private data
+ *
+ */
+int dapls_ib_private_data_size(IN DAPL_PRIVATE * prd_ptr,
+                              IN DAPL_PDATA_OP conn_op, IN DAPL_HCA * hca_ptr)
+{
+       int size;
+
+       if (hca_ptr->ib_hca_handle->device->transport_type
+           == IBV_TRANSPORT_IWARP)
+               return (IWARP_MAX_PDATA_SIZE - sizeof(struct dapl_pdata_hdr));
+
+       switch (conn_op) {
+
+       case DAPL_PDATA_CONN_REQ:
+               size = IB_MAX_REQ_PDATA_SIZE;
+               break;
+       case DAPL_PDATA_CONN_REP:
+               size = IB_MAX_REP_PDATA_SIZE;
+               break;
+       case DAPL_PDATA_CONN_REJ:
+               size = IB_MAX_REJ_PDATA_SIZE - sizeof(struct dapl_pdata_hdr);
+               break;
+       case DAPL_PDATA_CONN_DREQ:
+               size = IB_MAX_DREQ_PDATA_SIZE;
+               break;
+       case DAPL_PDATA_CONN_DREP:
+               size = IB_MAX_DREP_PDATA_SIZE;
+               break;
+       default:
+               size = 0;
+
+       }                       /* end case */
+
+       return size;
+}
+
+/*
+ * Map all CMA event codes to the DAT equivelent.
+ */
+#define DAPL_IB_EVENT_CNT      13
+
+static struct ib_cm_event_map {
+       const ib_cm_events_t ib_cm_event;
+       DAT_EVENT_NUMBER dat_event_num;
+} ib_cm_event_map[DAPL_IB_EVENT_CNT] = {
+       /* 00 */  {
+       IB_CME_CONNECTED, DAT_CONNECTION_EVENT_ESTABLISHED},
+           /* 01 */  {
+       IB_CME_DISCONNECTED, DAT_CONNECTION_EVENT_DISCONNECTED},
+           /* 02 */  {
+       IB_CME_DISCONNECTED_ON_LINK_DOWN,
+                   DAT_CONNECTION_EVENT_DISCONNECTED},
+           /* 03 */  {
+       IB_CME_CONNECTION_REQUEST_PENDING, DAT_CONNECTION_REQUEST_EVENT},
+           /* 04 */  {
+       IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,
+                   DAT_CONNECTION_REQUEST_EVENT},
+           /* 05 */  {
+       IB_CME_CONNECTION_REQUEST_ACKED, DAT_CONNECTION_REQUEST_EVENT},
+           /* 06 */  {
+       IB_CME_DESTINATION_REJECT,
+                   DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
+           /* 07 */  {
+       IB_CME_DESTINATION_REJECT_PRIVATE_DATA,
+                   DAT_CONNECTION_EVENT_PEER_REJECTED},
+           /* 08 */  {
+       IB_CME_DESTINATION_UNREACHABLE, DAT_CONNECTION_EVENT_UNREACHABLE},
+           /* 09 */  {
+       IB_CME_TOO_MANY_CONNECTION_REQUESTS,
+                   DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
+           /* 10 */  {
+       IB_CME_LOCAL_FAILURE, DAT_CONNECTION_EVENT_BROKEN},
+           /* 11 */  {
+       IB_CME_BROKEN, DAT_CONNECTION_EVENT_BROKEN},
+           /* 12 */  {
+IB_CME_TIMEOUT, DAT_CONNECTION_EVENT_TIMED_OUT},};
+
+/*
+ * dapls_ib_get_cm_event
+ *
+ * Return a DAT connection event given a provider CM event.
+ *
+ * Input:
+ *     dat_event_num   DAT event we need an equivelent CM event for
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     ib_cm_event of translated DAPL value
+ */
+DAT_EVENT_NUMBER
+dapls_ib_get_dat_event(IN const ib_cm_events_t ib_cm_event,
+                      IN DAT_BOOLEAN active)
+{
+       DAT_EVENT_NUMBER dat_event_num;
+       int i;
+
+       active = active;
+
+       dat_event_num = 0;
+       for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
+               if (ib_cm_event == ib_cm_event_map[i].ib_cm_event) {
+                       dat_event_num = ib_cm_event_map[i].dat_event_num;
+                       break;
+               }
+       }
+       dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
+                    "dapls_ib_get_dat_event: event(%s) ib=0x%x dat=0x%x\n",
+                    active ? "active" : "passive", ib_cm_event, dat_event_num);
+
+       return dat_event_num;
+}
+
+/*
+ * dapls_ib_get_dat_event
+ *
+ * Return a DAT connection event given a provider CM event.
+ * 
+ * Input:
+ *     ib_cm_event     event provided to the dapl callback routine
+ *     active          switch indicating active or passive connection
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_EVENT_NUMBER of translated provider value
+ */
+ib_cm_events_t dapls_ib_get_cm_event(IN DAT_EVENT_NUMBER dat_event_num)
+{
+       ib_cm_events_t ib_cm_event;
+       int i;
+
+       ib_cm_event = 0;
+       for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
+               if (dat_event_num == ib_cm_event_map[i].dat_event_num) {
+                       ib_cm_event = ib_cm_event_map[i].ib_cm_event;
+                       break;
+               }
+       }
+       return ib_cm_event;
+}
+
+void dapli_cma_event_cb(void)
+{
+       struct rdma_cm_event *event;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cm_event()\n");
+
+       /* process one CM event, fairness */
+       if (!rdma_get_cm_event(g_cm_events, &event)) {
+               struct dapl_cm_id *conn;
+
+               /* set proper conn from cm_id context */
+               if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST)
+                       conn = (struct dapl_cm_id *)event->listen_id->context;
+               else
+                       conn = (struct dapl_cm_id *)event->id->context;
+
+               dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                            " cm_event: EVENT=%d ID=%p LID=%p CTX=%p\n",
+                            event->event, event->id, event->listen_id, conn);
+
+               switch (event->event) {
+               case RDMA_CM_EVENT_ADDR_RESOLVED:
+                       dapli_addr_resolve(conn);
+                       break;
+
+               case RDMA_CM_EVENT_ROUTE_RESOLVED:
+                       dapli_route_resolve(conn);
+                       break;
+
+               case RDMA_CM_EVENT_ADDR_ERROR:
+                       dapl_log(DAPL_DBG_TYPE_WARN,
+                                "dapl_cma_active: CM ADDR ERROR: ->"
+                                " DST %s retry (%d)..\n",
+                                inet_ntoa(((struct sockaddr_in *)
+                                           &conn->r_addr)->sin_addr),
+                                conn->arp_retries);
+
+                       /* retry address resolution */
+                       if ((--conn->arp_retries) &&
+                           (event->status == -ETIMEDOUT)) {
+                               int ret;
+                               ret = rdma_resolve_addr(conn->cm_id, NULL,
+                                                       (struct sockaddr *)
+                                                       &conn->r_addr,
+                                                       conn->arp_timeout);
+                               if (!ret)
+                                       break;
+                               else {
+                                       dapl_dbg_log(DAPL_DBG_TYPE_WARN,
+                                                    " ERROR: rdma_resolve_addr = "
+                                                    "%d %s\n",
+                                                    ret, strerror(errno));
+                               }
+                       }
+                       /* retries exhausted or resolve_addr failed */
+                       dapl_log(DAPL_DBG_TYPE_ERR,
+                                "dapl_cma_active: ARP_ERR, retries(%d)"
+                                " exhausted -> DST %s,%d\n",
+                                IB_ARP_RETRY_COUNT,
+                                inet_ntoa(((struct sockaddr_in *)
+                                           &conn->cm_id->route.addr.dst_addr)->
+                                          sin_addr),
+                                ntohs(((struct sockaddr_in *)
+                                       &conn->cm_id->route.addr.dst_addr)->
+                                      sin_port));
+
+                       dapl_evd_connection_callback(conn,
+                                                    IB_CME_DESTINATION_UNREACHABLE,
+                                                    NULL, conn->ep);
+                       break;
+
+               case RDMA_CM_EVENT_ROUTE_ERROR:
+                       dapl_log(DAPL_DBG_TYPE_WARN,
+                                "dapl_cma_active: CM ROUTE ERROR: ->"
+                                " DST %s retry (%d)..\n",
+                                inet_ntoa(((struct sockaddr_in *)
+                                           &conn->r_addr)->sin_addr),
+                                conn->route_retries);
+
+                       /* retry route resolution */
+                       if ((--conn->route_retries) &&
+                           (event->status == -ETIMEDOUT))
+                               dapli_addr_resolve(conn);
+                       else {
+                               dapl_log(DAPL_DBG_TYPE_ERR,
+                                        "dapl_cma_active: PATH_RECORD_ERR,"
+                                        " retries(%d) exhausted, DST %s,%d\n",
+                                        IB_ROUTE_RETRY_COUNT,
+                                        inet_ntoa(((struct sockaddr_in *)
+                                                   &conn->cm_id->route.addr.
+                                                   dst_addr)->sin_addr),
+                                        ntohs(((struct sockaddr_in *)
+                                               &conn->cm_id->route.addr.
+                                               dst_addr)->sin_port));
+
+                               dapl_evd_connection_callback(conn,
+                                                            IB_CME_DESTINATION_UNREACHABLE,
+                                                            NULL, conn->ep);
+                       }
+                       break;
+
+               case RDMA_CM_EVENT_DEVICE_REMOVAL:
+                       dapl_evd_connection_callback(conn,
+                                                    IB_CME_LOCAL_FAILURE,
+                                                    NULL, conn->ep);
+                       break;
+               case RDMA_CM_EVENT_CONNECT_REQUEST:
+               case RDMA_CM_EVENT_CONNECT_ERROR:
+               case RDMA_CM_EVENT_UNREACHABLE:
+               case RDMA_CM_EVENT_REJECTED:
+               case RDMA_CM_EVENT_ESTABLISHED:
+               case RDMA_CM_EVENT_DISCONNECTED:
+                       /* passive or active */
+                       if (conn->sp)
+                               dapli_cm_passive_cb(conn, event);
+                       else
+                               dapli_cm_active_cb(conn, event);
+                       break;
+               case RDMA_CM_EVENT_CONNECT_RESPONSE:
+               default:
+                       dapl_dbg_log(DAPL_DBG_TYPE_WARN,
+                                    " cm_event: UNEXPECTED EVENT=%p ID=%p CTX=%p\n",
+                                    event->event, event->id,
+                                    event->id->context);
+                       break;
+               }
+               /* ack event, unblocks destroy_cm_id in consumer threads */
+               rdma_ack_cm_event(event);
+       }
+}
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ *  tab-width: 8
+ * End:
+ */
diff --git a/dapl/openib_cma/dapl_ib_cm.c b/dapl/openib_cma/dapl_ib_cm.c
deleted file mode 100755 (executable)
index 946cfbd..0000000
+++ /dev/null
@@ -1,1297 +0,0 @@
-/*
- * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
- * Copyright (c) 2005-2007 Intel Corporation. All rights reserved.
- * Copyright (c) 2004-2005, Mellanox Technologies, Inc. All rights reserved. 
- * Copyright (c) 2003 Topspin Corporation.  All rights reserved. 
- * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- *
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- *    copy of which is available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/**********************************************************************
- *
- * MODULE: dapl_ib_cm.c
- *
- * PURPOSE: The OFED provider - uCMA, name and route resolution
- *
- * $Id: $
- *
- **********************************************************************/
-
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-#include "dapl_evd_util.h"
-#include "dapl_cr_util.h"
-#include "dapl_name_service.h"
-#include "dapl_ib_util.h"
-#include "dapl_vendor.h"
-#include "dapl_osd.h"
-
-extern struct rdma_event_channel *g_cm_events;
-
-/* local prototypes */
-static struct dapl_cm_id *dapli_req_recv(struct dapl_cm_id *conn,
-                                        struct rdma_cm_event *event);
-static void dapli_cm_active_cb(struct dapl_cm_id *conn,
-                              struct rdma_cm_event *event);
-static void dapli_cm_passive_cb(struct dapl_cm_id *conn,
-                               struct rdma_cm_event *event);
-static void dapli_addr_resolve(struct dapl_cm_id *conn);
-static void dapli_route_resolve(struct dapl_cm_id *conn);
-
-/* cma requires 16 bit SID, in network order */
-#define IB_PORT_MOD 32001
-#define IB_PORT_BASE (65535 - IB_PORT_MOD)
-#define SID_TO_PORT(SID) \
-    (SID > 0xffff ? \
-    htons((unsigned short)((SID % IB_PORT_MOD) + IB_PORT_BASE)) :\
-    htons((unsigned short)SID))
-
-#define PORT_TO_SID(p) ntohs(p)
-
-/* private data header to validate consumer rejects versus abnormal events */
-struct dapl_pdata_hdr {
-       DAT_UINT32 version;
-};
-
-static void dapli_addr_resolve(struct dapl_cm_id *conn)
-{
-       int ret;
-#ifdef DAPL_DBG
-       struct rdma_addr *ipaddr = &conn->cm_id->route.addr;
-#endif
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " addr_resolve: cm_id %p SRC %x DST %x\n",
-                    conn->cm_id, ntohl(((struct sockaddr_in *)
-                                        &ipaddr->src_addr)->sin_addr.s_addr),
-                    ntohl(((struct sockaddr_in *)
-                           &ipaddr->dst_addr)->sin_addr.s_addr));
-
-       ret = rdma_resolve_route(conn->cm_id, conn->route_timeout);
-       if (ret) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " dapl_cma_connect: rdma_resolve_route ERR 0x%x %s\n",
-                        ret, strerror(errno));
-               dapl_evd_connection_callback(conn,
-                                            IB_CME_LOCAL_FAILURE,
-                                            NULL, conn->ep);
-       }
-}
-
-static void dapli_route_resolve(struct dapl_cm_id *conn)
-{
-       int ret;
-#ifdef DAPL_DBG
-       struct rdma_addr *ipaddr = &conn->cm_id->route.addr;
-       struct ib_addr *ibaddr = &conn->cm_id->route.addr.addr.ibaddr;
-#endif
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " route_resolve: cm_id %p SRC %x DST %x PORT %d\n",
-                    conn->cm_id, ntohl(((struct sockaddr_in *)
-                                        &ipaddr->src_addr)->sin_addr.s_addr),
-                    ntohl(((struct sockaddr_in *)
-                           &ipaddr->dst_addr)->sin_addr.s_addr),
-                    ntohs(((struct sockaddr_in *)
-                           &ipaddr->dst_addr)->sin_port));
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " route_resolve: SRC GID subnet %016llx id %016llx\n",
-                    (unsigned long long)
-                    ntohll(ibaddr->sgid.global.subnet_prefix),
-                    (unsigned long long)
-                    ntohll(ibaddr->sgid.global.interface_id));
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " route_resolve: DST GID subnet %016llx id %016llx\n",
-                    (unsigned long long)
-                    ntohll(ibaddr->dgid.global.subnet_prefix),
-                    (unsigned long long)
-                    ntohll(ibaddr->dgid.global.interface_id));
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " route_resolve: cm_id %p pdata %p plen %d rr %d ind %d\n",
-                    conn->cm_id,
-                    conn->params.private_data,
-                    conn->params.private_data_len,
-                    conn->params.responder_resources,
-                    conn->params.initiator_depth);
-
-       ret = rdma_connect(conn->cm_id, &conn->params);
-       if (ret) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " dapl_cma_connect: rdma_connect ERR %d %s\n",
-                        ret, strerror(errno));
-               goto bail;
-       }
-       return;
-
-      bail:
-       dapl_evd_connection_callback(conn,
-                                    IB_CME_LOCAL_FAILURE, NULL, conn->ep);
-}
-
-/* 
- * Called from consumer thread via dat_ep_free().
- * CANNOT be called from the async event processing thread
- * dapli_cma_event_cb() since a cm_id reference is held and
- * a deadlock will occur.
- */
-void dapli_destroy_conn(struct dapl_cm_id *conn)
-{
-       struct rdma_cm_id *cm_id;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " destroy_conn: conn %p id %d\n", conn, conn->cm_id);
-
-       dapl_os_lock(&conn->lock);
-       conn->destroy = 1;
-
-       if (conn->ep) {
-               conn->ep->cm_handle = IB_INVALID_HANDLE;
-               conn->ep->qp_handle = IB_INVALID_HANDLE;
-       }
-
-       cm_id = conn->cm_id;
-       conn->cm_id = NULL;
-       dapl_os_unlock(&conn->lock);
-
-       /* 
-        * rdma_destroy_id will force synchronization with async CM event 
-        * thread since it blocks until the in-process event reference
-        * is cleared during our event processing call exit.
-        */
-       if (cm_id) {
-               if (cm_id->qp)
-                       rdma_destroy_qp(cm_id);
-
-               rdma_destroy_id(cm_id);
-       }
-       dapl_os_free(conn, sizeof(*conn));
-}
-
-static struct dapl_cm_id *dapli_req_recv(struct dapl_cm_id *conn,
-                                        struct rdma_cm_event *event)
-{
-       struct dapl_cm_id *new_conn;
-#ifdef DAPL_DBG
-       struct rdma_addr *ipaddr = &event->id->route.addr;
-#endif
-
-       if (conn->sp == NULL) {
-               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                            " dapli_rep_recv: on invalid listen " "handle\n");
-               return NULL;
-       }
-
-       /* allocate new cm_id and merge listen parameters */
-       new_conn = dapl_os_alloc(sizeof(*new_conn));
-       if (new_conn) {
-               (void)dapl_os_memzero(new_conn, sizeof(*new_conn));
-               dapl_os_lock_init(&new_conn->lock);
-               new_conn->cm_id = event->id;    /* provided by uCMA */
-               event->id->context = new_conn;  /* update CM_ID context */
-               new_conn->sp = conn->sp;
-               new_conn->hca = conn->hca;
-
-               /* Get requesters connect data, setup for accept */
-               new_conn->params.responder_resources =
-                   DAPL_MIN(event->param.conn.responder_resources,
-                            conn->hca->ib_trans.max_rdma_rd_in);
-               new_conn->params.initiator_depth =
-                   DAPL_MIN(event->param.conn.initiator_depth,
-                            conn->hca->ib_trans.max_rdma_rd_out);
-
-               new_conn->params.flow_control = event->param.conn.flow_control;
-               new_conn->params.rnr_retry_count =
-                   event->param.conn.rnr_retry_count;
-               new_conn->params.retry_count = event->param.conn.retry_count;
-
-               /* save private data */
-               if (event->param.conn.private_data_len) {
-                       dapl_os_memcpy(new_conn->p_data,
-                                      event->param.conn.private_data,
-                                      event->param.conn.private_data_len);
-                       new_conn->params.private_data = new_conn->p_data;
-                       new_conn->params.private_data_len =
-                           event->param.conn.private_data_len;
-               }
-
-               dapl_dbg_log(DAPL_DBG_TYPE_CM, " passive_cb: "
-                            "REQ: SP %p PORT %d LID %d "
-                            "NEW CONN %p ID %p pD %p,%d\n",
-                            new_conn->sp, ntohs(((struct sockaddr_in *)
-                                                 &ipaddr->src_addr)->sin_port),
-                            event->listen_id, new_conn, event->id,
-                            event->param.conn.private_data,
-                            event->param.conn.private_data_len);
-
-               dapl_dbg_log(DAPL_DBG_TYPE_CM, " passive_cb: "
-                            "REQ: IP SRC %x PORT %d DST %x PORT %d "
-                            "rr %d init %d\n", ntohl(((struct sockaddr_in *)
-                                                      &ipaddr->src_addr)->
-                                                     sin_addr.s_addr),
-                            ntohs(((struct sockaddr_in *)
-                                   &ipaddr->src_addr)->sin_port),
-                            ntohl(((struct sockaddr_in *)
-                                   &ipaddr->dst_addr)->sin_addr.s_addr),
-                            ntohs(((struct sockaddr_in *)
-                                   &ipaddr->dst_addr)->sin_port),
-                            new_conn->params.responder_resources,
-                            new_conn->params.initiator_depth);
-       }
-       return new_conn;
-}
-
-static void dapli_cm_active_cb(struct dapl_cm_id *conn,
-                              struct rdma_cm_event *event)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " active_cb: conn %p id %d event %d\n",
-                    conn, conn->cm_id, event->event);
-
-       dapl_os_lock(&conn->lock);
-       if (conn->destroy) {
-               dapl_os_unlock(&conn->lock);
-               return;
-       }
-       dapl_os_unlock(&conn->lock);
-
-       /* There is a chance that we can get events after
-        * the consumer calls disconnect in a pending state
-        * since the IB CM and uDAPL states are not shared.
-        * In some cases, IB CM could generate either a DCONN
-        * or CONN_ERR after the consumer returned from
-        * dapl_ep_disconnect with a DISCONNECTED event
-        * already queued. Check state here and bail to
-        * avoid any events after a disconnect.
-        */
-       if (DAPL_BAD_HANDLE(conn->ep, DAPL_MAGIC_EP))
-               return;
-
-       dapl_os_lock(&conn->ep->header.lock);
-       if (conn->ep->param.ep_state == DAT_EP_STATE_DISCONNECTED) {
-               dapl_os_unlock(&conn->ep->header.lock);
-               return;
-       }
-       if (event->event == RDMA_CM_EVENT_DISCONNECTED)
-               conn->ep->param.ep_state = DAT_EP_STATE_DISCONNECTED;
-
-       dapl_os_unlock(&conn->ep->header.lock);
-
-       switch (event->event) {
-       case RDMA_CM_EVENT_UNREACHABLE:
-       case RDMA_CM_EVENT_CONNECT_ERROR:
-               {
-                       dapl_log(DAPL_DBG_TYPE_WARN,
-                                "dapl_cma_active: CONN_ERR event=0x%x"
-                                " status=%d %s DST %s, %d\n",
-                                event->event, event->status,
-                                (event->status == -ETIMEDOUT) ? "TIMEOUT" : "",
-                                inet_ntoa(((struct sockaddr_in *)
-                                           &conn->cm_id->route.addr.dst_addr)->
-                                          sin_addr),
-                                ntohs(((struct sockaddr_in *)
-                                       &conn->cm_id->route.addr.dst_addr)->
-                                      sin_port));
-
-                       /* per DAT SPEC provider always returns UNREACHABLE */
-                       dapl_evd_connection_callback(conn,
-                                                    IB_CME_DESTINATION_UNREACHABLE,
-                                                    NULL, conn->ep);
-                       break;
-               }
-       case RDMA_CM_EVENT_REJECTED:
-               {
-                       ib_cm_events_t cm_event;
-                       unsigned char *pdata = NULL;
-
-                       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                                    " dapli_cm_active_handler: REJECTED reason=%d\n",
-                                    event->status);
-
-                       /* valid REJ from consumer will always contain private data */
-                       if (event->status == 28 &&
-                           event->param.conn.private_data_len) {
-                               cm_event =
-                                   IB_CME_DESTINATION_REJECT_PRIVATE_DATA;
-                               pdata =
-                                   (unsigned char *)event->param.conn.
-                                   private_data +
-                                   sizeof(struct dapl_pdata_hdr);
-                       } else {
-                               cm_event = IB_CME_DESTINATION_REJECT;
-                               dapl_log(DAPL_DBG_TYPE_WARN,
-                                        "dapl_cma_active: non-consumer REJ,"
-                                        " reason=%d, DST %s, %d\n",
-                                        event->status,
-                                        inet_ntoa(((struct sockaddr_in *)
-                                                   &conn->cm_id->route.addr.
-                                                   dst_addr)->sin_addr),
-                                        ntohs(((struct sockaddr_in *)
-                                               &conn->cm_id->route.addr.
-                                               dst_addr)->sin_port));
-                       }
-                       dapl_evd_connection_callback(conn, cm_event, pdata,
-                                                    conn->ep);
-                       break;
-               }
-       case RDMA_CM_EVENT_ESTABLISHED:
-               dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                            " active_cb: cm_id %d PORT %d CONNECTED to %s!\n",
-                            conn->cm_id, ntohs(((struct sockaddr_in *)
-                                                &conn->cm_id->route.addr.
-                                                dst_addr)->sin_port),
-                            inet_ntoa(((struct sockaddr_in *)
-                                       &conn->cm_id->route.addr.dst_addr)->
-                                      sin_addr));
-
-               /* setup local and remote ports for ep query */
-               conn->ep->param.remote_port_qual =
-                   PORT_TO_SID(rdma_get_dst_port(conn->cm_id));
-               conn->ep->param.local_port_qual =
-                   PORT_TO_SID(rdma_get_src_port(conn->cm_id));
-
-               dapl_evd_connection_callback(conn, IB_CME_CONNECTED,
-                                            event->param.conn.private_data,
-                                            conn->ep);
-               break;
-
-       case RDMA_CM_EVENT_DISCONNECTED:
-               rdma_disconnect(conn->cm_id);   /* required for DREP */
-               /* validate EP handle */
-               if (!DAPL_BAD_HANDLE(conn->ep, DAPL_MAGIC_EP))
-                       dapl_evd_connection_callback(conn,
-                                                    IB_CME_DISCONNECTED,
-                                                    NULL, conn->ep);
-               break;
-       default:
-               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                            " dapli_cm_active_cb_handler: Unexpected CM "
-                            "event %d on ID 0x%p\n", event->event,
-                            conn->cm_id);
-               break;
-       }
-
-       return;
-}
-
-static void dapli_cm_passive_cb(struct dapl_cm_id *conn,
-                               struct rdma_cm_event *event)
-{
-       struct dapl_cm_id *new_conn;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " passive_cb: conn %p id %d event %d\n",
-                    conn, event->id, event->event);
-
-       dapl_os_lock(&conn->lock);
-       if (conn->destroy) {
-               dapl_os_unlock(&conn->lock);
-               return;
-       }
-       dapl_os_unlock(&conn->lock);
-
-       switch (event->event) {
-       case RDMA_CM_EVENT_CONNECT_REQUEST:
-               /* create new conn object with new conn_id from event */
-               new_conn = dapli_req_recv(conn, event);
-
-               if (new_conn)
-                       dapls_cr_callback(new_conn,
-                                         IB_CME_CONNECTION_REQUEST_PENDING,
-                                         event->param.conn.private_data,
-                                         new_conn->sp);
-               break;
-       case RDMA_CM_EVENT_UNREACHABLE:
-       case RDMA_CM_EVENT_CONNECT_ERROR:
-               dapl_log(DAPL_DBG_TYPE_WARN,
-                        "dapl_cm_passive: CONN_ERR event=0x%x status=%d %s,"
-                        " DST %s,%d\n",
-                        event->event, event->status,
-                        (event->status == -ETIMEDOUT) ? "TIMEOUT" : "",
-                        inet_ntoa(((struct sockaddr_in *)
-                                   &conn->cm_id->route.addr.dst_addr)->
-                                  sin_addr), ntohs(((struct sockaddr_in *)
-                                                    &conn->cm_id->route.addr.
-                                                    dst_addr)->sin_port));
-
-               dapls_cr_callback(conn, IB_CME_DESTINATION_UNREACHABLE,
-                                 NULL, conn->sp);
-               break;
-
-       case RDMA_CM_EVENT_REJECTED:
-               {
-                       /* will alwasys be abnormal NON-consumer from active side */
-                       dapl_log(DAPL_DBG_TYPE_WARN,
-                                "dapl_cm_passive: non-consumer REJ, reason=%d,"
-                                " DST %s, %d\n",
-                                event->status,
-                                inet_ntoa(((struct sockaddr_in *)
-                                           &conn->cm_id->route.addr.dst_addr)->
-                                          sin_addr),
-                                ntohs(((struct sockaddr_in *)
-                                       &conn->cm_id->route.addr.dst_addr)->
-                                      sin_port));
-
-                       dapls_cr_callback(conn, IB_CME_DESTINATION_REJECT,
-                                         NULL, conn->sp);
-                       break;
-               }
-       case RDMA_CM_EVENT_ESTABLISHED:
-               dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                            " passive_cb: cm_id %p PORT %d CONNECTED from 0x%x!\n",
-                            conn->cm_id, ntohs(((struct sockaddr_in *)
-                                                &conn->cm_id->route.addr.
-                                                src_addr)->sin_port),
-                            ntohl(((struct sockaddr_in *)
-                                   &conn->cm_id->route.addr.dst_addr)->
-                                  sin_addr.s_addr));
-
-               dapls_cr_callback(conn, IB_CME_CONNECTED, NULL, conn->sp);
-
-               break;
-       case RDMA_CM_EVENT_DISCONNECTED:
-               rdma_disconnect(conn->cm_id);   /* required for DREP */
-               /* validate SP handle context */
-               if (!DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_PSP) ||
-                   !DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_RSP))
-                       dapls_cr_callback(conn,
-                                         IB_CME_DISCONNECTED, NULL, conn->sp);
-               break;
-       default:
-               dapl_dbg_log(DAPL_DBG_TYPE_ERR, " passive_cb: "
-                            "Unexpected CM event %d on ID 0x%p\n",
-                            event->event, conn->cm_id);
-               break;
-       }
-
-       return;
-}
-
-/************************ DAPL provider entry points **********************/
-
-/*
- * dapls_ib_connect
- *
- * Initiate a connection with the passive listener on another node
- *
- * Input:
- *     ep_handle,
- *     remote_ia_address,
- *     remote_conn_qual,
- *     prd_size                size of private data and structure
- *     prd_prt                 pointer to private data structure
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INSUFFICIENT_RESOURCES
- *     DAT_INVALID_PARAMETER
- *
- */
-DAT_RETURN dapls_ib_connect(IN DAT_EP_HANDLE ep_handle,
-                           IN DAT_IA_ADDRESS_PTR r_addr,
-                           IN DAT_CONN_QUAL r_qual,
-                           IN DAT_COUNT p_size, IN void *p_data)
-{
-       struct dapl_ep *ep_ptr = ep_handle;
-       struct dapl_cm_id *conn;
-       int ret;
-
-       /* Sanity check */
-       if (NULL == ep_ptr)
-               return DAT_SUCCESS;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " connect: rSID 0x%llx rPort %d, pdata %p, ln %d\n",
-                    r_qual, ntohs(SID_TO_PORT(r_qual)), p_data, p_size);
-
-       /* rdma conn and cm_id pre-bound; reference via qp_handle */
-       conn = ep_ptr->cm_handle = ep_ptr->qp_handle;
-
-       /* Setup QP/CM parameters and private data in cm_id */
-       (void)dapl_os_memzero(&conn->params, sizeof(conn->params));
-       conn->params.responder_resources =
-           ep_ptr->param.ep_attr.max_rdma_read_in;
-       conn->params.initiator_depth = ep_ptr->param.ep_attr.max_rdma_read_out;
-       conn->params.flow_control = 1;
-       conn->params.rnr_retry_count = IB_RNR_RETRY_COUNT;
-       conn->params.retry_count = IB_RC_RETRY_COUNT;
-       if (p_size) {
-               dapl_os_memcpy(conn->p_data, p_data, p_size);
-               conn->params.private_data = conn->p_data;
-               conn->params.private_data_len = p_size;
-       }
-
-       /* copy in remote address, need a copy for retry attempts */
-       dapl_os_memcpy(&conn->r_addr, r_addr, sizeof(*r_addr));
-
-       /* Resolve remote address, src already bound during QP create */
-       ((struct sockaddr_in *)&conn->r_addr)->sin_port = SID_TO_PORT(r_qual);
-       ((struct sockaddr_in *)&conn->r_addr)->sin_family = AF_INET;
-
-       ret = rdma_resolve_addr(conn->cm_id, NULL,
-                               (struct sockaddr *)&conn->r_addr,
-                               conn->arp_timeout);
-       if (ret) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " dapl_cma_connect: rdma_resolve_addr ERR 0x%x %s\n",
-                        ret, strerror(errno));
-               return dapl_convert_errno(errno, "ib_connect");
-       }
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " connect: resolve_addr: cm_id %p -> %s port %d\n",
-                    conn->cm_id,
-                    inet_ntoa(((struct sockaddr_in *)&conn->r_addr)->sin_addr),
-                    ((struct sockaddr_in *)&conn->r_addr)->sin_port);
-
-       return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_disconnect
- *
- * Disconnect an EP
- *
- * Input:
- *     ep_handle,
- *     disconnect_flags
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *
- */
-DAT_RETURN
-dapls_ib_disconnect(IN DAPL_EP * ep_ptr, IN DAT_CLOSE_FLAGS close_flags)
-{
-       dp_ib_cm_handle_t conn = ep_ptr->cm_handle;
-       int ret;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " disconnect(ep %p, conn %p, id %d flags %x)\n",
-                    ep_ptr, conn, (conn ? conn->cm_id : 0), close_flags);
-
-       if ((conn == IB_INVALID_HANDLE) || (conn->cm_id == NULL))
-               return DAT_SUCCESS;
-
-       /* no graceful half-pipe disconnect option */
-       ret = rdma_disconnect(conn->cm_id);
-       if (ret)
-               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                            " disconnect: ID %p ret 0x%x\n",
-                            ep_ptr->cm_handle, ret);
-
-       /* 
-        * DAT event notification occurs from the callback
-        * Note: will fire even if DREQ goes unanswered on timeout 
-        */
-       return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_disconnect_clean
- *
- * Clean up outstanding connection data. This routine is invoked
- * after the final disconnect callback has occurred. Only on the
- * ACTIVE side of a connection.
- *
- * Input:
- *     ep_ptr          DAPL_EP
- *     active          Indicates active side of connection
- *
- * Output:
- *     none
- *
- * Returns:
- *     void
- *
- */
-void
-dapls_ib_disconnect_clean(IN DAPL_EP * ep_ptr,
-                         IN DAT_BOOLEAN active,
-                         IN const ib_cm_events_t ib_cm_event)
-{
-       /* nothing to do */
-       return;
-}
-
-/*
- * dapl_ib_setup_conn_listener
- *
- * Have the CM set up a connection listener.
- *
- * Input:
- *     ibm_hca_handle          HCA handle
- *     qp_handle                       QP handle
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INSUFFICIENT_RESOURCES
- *     DAT_INTERNAL_ERROR
- *     DAT_CONN_QUAL_UNAVAILBLE
- *     DAT_CONN_QUAL_IN_USE
- *
- */
-DAT_RETURN
-dapls_ib_setup_conn_listener(IN DAPL_IA * ia_ptr,
-                            IN DAT_UINT64 ServiceID, IN DAPL_SP * sp_ptr)
-{
-       DAT_RETURN dat_status = DAT_SUCCESS;
-       ib_cm_srvc_handle_t conn;
-       DAT_SOCK_ADDR6 addr;    /* local binding address */
-
-       /* Allocate CM and initialize lock */
-       if ((conn = dapl_os_alloc(sizeof(*conn))) == NULL)
-               return DAT_INSUFFICIENT_RESOURCES;
-
-       dapl_os_memzero(conn, sizeof(*conn));
-       dapl_os_lock_init(&conn->lock);
-
-       /* create CM_ID, bind to local device, create QP */
-       if (rdma_create_id
-           (g_cm_events, &conn->cm_id, (void *)conn, RDMA_PS_TCP)) {
-               dapl_os_free(conn, sizeof(*conn));
-               return (dapl_convert_errno(errno, "setup_listener"));
-       }
-
-       /* open identifies the local device; per DAT specification */
-       /* Get family and address then set port to consumer's ServiceID */
-       dapl_os_memcpy(&addr, &ia_ptr->hca_ptr->hca_address, sizeof(addr));
-       ((struct sockaddr_in *)&addr)->sin_port = SID_TO_PORT(ServiceID);
-
-       if (rdma_bind_addr(conn->cm_id, (struct sockaddr *)&addr)) {
-               if ((errno == EBUSY) || (errno == EADDRINUSE))
-                       dat_status = DAT_CONN_QUAL_IN_USE;
-               else
-                       dat_status =
-                           dapl_convert_errno(errno, "setup_listener");
-               goto bail;
-       }
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " listen(ia_ptr %p SID 0x%llx Port %d sp %p conn %p id %d)\n",
-                    ia_ptr, ServiceID, ntohs(SID_TO_PORT(ServiceID)),
-                    sp_ptr, conn, conn->cm_id);
-
-       sp_ptr->cm_srvc_handle = conn;
-       conn->sp = sp_ptr;
-       conn->hca = ia_ptr->hca_ptr;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    " listen(conn=%p cm_id=%d)\n",
-                    sp_ptr->cm_srvc_handle, conn->cm_id);
-
-       if (rdma_listen(conn->cm_id, 0)) {      /* max cma backlog */
-
-               if ((errno == EBUSY) || (errno == EADDRINUSE))
-                       dat_status = DAT_CONN_QUAL_IN_USE;
-               else
-                       dat_status =
-                           dapl_convert_errno(errno, "setup_listener");
-               goto bail;
-       }
-
-       /* success */
-       return DAT_SUCCESS;
-
-      bail:
-       rdma_destroy_id(conn->cm_id);
-       dapl_os_free(conn, sizeof(*conn));
-       return dat_status;
-}
-
-/*
- * dapl_ib_remove_conn_listener
- *
- * Have the CM remove a connection listener.
- *
- * Input:
- *     ia_handle               IA handle
- *     ServiceID               IB Channel Service ID
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INVALID_STATE
- *
- */
-DAT_RETURN
-dapls_ib_remove_conn_listener(IN DAPL_IA * ia_ptr, IN DAPL_SP * sp_ptr)
-{
-       ib_cm_srvc_handle_t conn = sp_ptr->cm_srvc_handle;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " remove_listen(ia_ptr %p sp_ptr %p cm_ptr %p)\n",
-                    ia_ptr, sp_ptr, conn);
-
-       if (conn != IB_INVALID_HANDLE) {
-               sp_ptr->cm_srvc_handle = NULL;
-               dapli_destroy_conn(conn);
-       }
-       return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_accept_connection
- *
- * Perform necessary steps to accept a connection
- *
- * Input:
- *     cr_handle
- *     ep_handle
- *     private_data_size
- *     private_data
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INSUFFICIENT_RESOURCES
- *     DAT_INTERNAL_ERROR
- *
- */
-DAT_RETURN
-dapls_ib_accept_connection(IN DAT_CR_HANDLE cr_handle,
-                          IN DAT_EP_HANDLE ep_handle,
-                          IN DAT_COUNT p_size, IN const DAT_PVOID p_data)
-{
-       DAPL_CR *cr_ptr = (DAPL_CR *) cr_handle;
-       DAPL_EP *ep_ptr = (DAPL_EP *) ep_handle;
-       DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;
-       struct dapl_cm_id *cr_conn = cr_ptr->ib_cm_handle;
-       int ret;
-       DAT_RETURN dat_status;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " accept(cr %p conn %p, id %p, p_data %p, p_sz=%d)\n",
-                    cr_ptr, cr_conn, cr_conn->cm_id, p_data, p_size);
-
-       /* Obtain size of private data structure & contents */
-       if (p_size > IB_MAX_REP_PDATA_SIZE) {
-               dat_status = DAT_ERROR(DAT_LENGTH_ERROR, DAT_NO_SUBTYPE);
-               goto bail;
-       }
-
-       if (ep_ptr->qp_state == DAPL_QP_STATE_UNATTACHED) {
-               /* 
-                * If we are lazy attaching the QP then we may need to
-                * hook it up here. Typically, we run this code only for
-                * DAT_PSP_PROVIDER_FLAG
-                */
-               dat_status = dapls_ib_qp_alloc(ia_ptr, ep_ptr, NULL);
-               if (dat_status != DAT_SUCCESS) {
-                       dapl_log(DAPL_DBG_TYPE_ERR,
-                                " dapl_cma_accept: qp_alloc ERR %d\n",
-                                dat_status);
-                       goto bail;
-               }
-       }
-
-       /* 
-        * Validate device and port in EP cm_id against inbound 
-        * CR cm_id. The pre-allocated EP cm_id is already bound to 
-        * a local device (cm_id and QP) when created. Move the QP
-        * to the new cm_id only if device and port numbers match.
-        */
-       if (ep_ptr->qp_handle->cm_id->verbs == cr_conn->cm_id->verbs &&
-           ep_ptr->qp_handle->cm_id->port_num == cr_conn->cm_id->port_num) {
-               /* move QP to new cr_conn, remove QP ref in EP cm_id */
-               cr_conn->cm_id->qp = ep_ptr->qp_handle->cm_id->qp;
-               ep_ptr->qp_handle->cm_id->qp = NULL;
-               dapli_destroy_conn(ep_ptr->qp_handle);
-       } else {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " dapl_cma_accept: ERR dev(%p!=%p) or"
-                        " port mismatch(%d!=%d)\n",
-                        ep_ptr->qp_handle->cm_id->verbs, cr_conn->cm_id->verbs,
-                        ntohs(ep_ptr->qp_handle->cm_id->port_num),
-                        ntohs(cr_conn->cm_id->port_num));
-               dat_status = DAT_INTERNAL_ERROR;
-               goto bail;
-       }
-
-       cr_ptr->param.local_ep_handle = ep_handle;
-       cr_conn->params.private_data = p_data;
-       cr_conn->params.private_data_len = p_size;
-
-       ret = rdma_accept(cr_conn->cm_id, &cr_conn->params);
-       if (ret) {
-               dapl_log(DAPL_DBG_TYPE_ERR, " dapl_cma_accept: ERR %d %s\n",
-                        ret, strerror(errno));
-               dat_status = dapl_convert_errno(ret, "accept");
-               goto bail;
-       }
-
-       /* save accepted conn and EP reference */
-       ep_ptr->qp_handle = cr_conn;
-       ep_ptr->cm_handle = cr_conn;
-       cr_conn->ep = ep_ptr;
-
-       /* setup local and remote ports for ep query */
-       /* Note: port qual in network order */
-       ep_ptr->param.remote_port_qual =
-           PORT_TO_SID(rdma_get_dst_port(cr_conn->cm_id));
-       ep_ptr->param.local_port_qual =
-           PORT_TO_SID(rdma_get_src_port(cr_conn->cm_id));
-
-       return DAT_SUCCESS;
-      bail:
-       rdma_reject(cr_conn->cm_id, NULL, 0);
-       dapli_destroy_conn(cr_conn);
-       return dat_status;
-}
-
-/*
- * dapls_ib_reject_connection
- *
- * Reject a connection
- *
- * Input:
- *     cr_handle
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INTERNAL_ERROR
- *
- */
-DAT_RETURN
-dapls_ib_reject_connection(IN dp_ib_cm_handle_t cm_handle,
-                          IN int reason,
-                          IN DAT_COUNT private_data_size,
-                          IN const DAT_PVOID private_data)
-{
-       int ret;
-       int offset = sizeof(struct dapl_pdata_hdr);
-       struct dapl_pdata_hdr pdata_hdr;
-
-       memset(&pdata_hdr, 0, sizeof pdata_hdr);
-       pdata_hdr.version = htonl((DAT_VERSION_MAJOR << 24) |
-                                 (DAT_VERSION_MINOR << 16) |
-                                 (VN_PROVIDER_MAJOR << 8) |
-                                 (VN_PROVIDER_MINOR));
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " reject: handle %p reason %x, ver=%x, data %p, sz=%d\n",
-                    cm_handle, reason, ntohl(pdata_hdr.version),
-                    private_data, private_data_size);
-
-       if (cm_handle == IB_INVALID_HANDLE) {
-               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                            " reject: invalid handle: reason %d\n", reason);
-               return DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_CR);
-       }
-
-       if (private_data_size >
-           dapls_ib_private_data_size(NULL, DAPL_PDATA_CONN_REJ,
-                                      cm_handle->hca))
-               return DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG3);
-
-       /* setup pdata_hdr and users data, in CR pdata buffer */
-       dapl_os_memcpy(cm_handle->p_data, &pdata_hdr, offset);
-       if (private_data_size)
-               dapl_os_memcpy(cm_handle->p_data + offset,
-                              private_data, private_data_size);
-
-       /*
-        * Always some private data with reject so active peer can
-        * determine real application reject from an abnormal 
-        * application termination
-        */
-       ret = rdma_reject(cm_handle->cm_id,
-                         cm_handle->p_data, offset + private_data_size);
-
-       dapli_destroy_conn(cm_handle);
-       return dapl_convert_errno(ret, "reject");
-}
-
-/*
- * dapls_ib_cm_remote_addr
- *
- * Obtain the remote IP address given a connection
- *
- * Input:
- *     cr_handle
- *
- * Output:
- *     remote_ia_address: where to place the remote address
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INVALID_HANDLE
- *
- */
-DAT_RETURN
-dapls_ib_cm_remote_addr(IN DAT_HANDLE dat_handle, OUT DAT_SOCK_ADDR6 * raddr)
-{
-       DAPL_HEADER *header;
-       dp_ib_cm_handle_t ib_cm_handle;
-       struct rdma_addr *ipaddr;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    " remote_addr(cm_handle=%p, r_addr=%p)\n",
-                    dat_handle, raddr);
-
-       header = (DAPL_HEADER *) dat_handle;
-
-       if (header->magic == DAPL_MAGIC_EP)
-               ib_cm_handle = ((DAPL_EP *) dat_handle)->cm_handle;
-       else if (header->magic == DAPL_MAGIC_CR)
-               ib_cm_handle = ((DAPL_CR *) dat_handle)->ib_cm_handle;
-       else
-               return DAT_INVALID_HANDLE;
-
-       /* get remote IP address from cm_id route */
-       ipaddr = &ib_cm_handle->cm_id->route.addr;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " remote_addr: conn %p id %p SRC %x DST %x PORT %d\n",
-                    ib_cm_handle, ib_cm_handle->cm_id,
-                    ntohl(((struct sockaddr_in *)
-                           &ipaddr->src_addr)->sin_addr.s_addr),
-                    ntohl(((struct sockaddr_in *)
-                           &ipaddr->dst_addr)->sin_addr.s_addr),
-                    ntohs(((struct sockaddr_in *)
-                           &ipaddr->dst_addr)->sin_port));
-
-       dapl_os_memcpy(raddr, &ipaddr->dst_addr, sizeof(DAT_SOCK_ADDR));
-       return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_private_data_size
- *
- * Return the size of private data given a connection op type
- *
- * Input:
- *     prd_ptr         private data pointer
- *     conn_op         connection operation type
- *      hca_ptr         hca pointer, needed for transport type
- *
- * If prd_ptr is NULL, this is a query for the max size supported by
- * the provider, otherwise it is the actual size of the private data
- * contained in prd_ptr.
- *
- *
- * Output:
- *     None
- *
- * Returns:
- *     length of private data
- *
- */
-int dapls_ib_private_data_size(IN DAPL_PRIVATE * prd_ptr,
-                              IN DAPL_PDATA_OP conn_op, IN DAPL_HCA * hca_ptr)
-{
-       int size;
-
-       if (hca_ptr->ib_hca_handle->device->transport_type
-           == IBV_TRANSPORT_IWARP)
-               return (IWARP_MAX_PDATA_SIZE - sizeof(struct dapl_pdata_hdr));
-
-       switch (conn_op) {
-
-       case DAPL_PDATA_CONN_REQ:
-               size = IB_MAX_REQ_PDATA_SIZE;
-               break;
-       case DAPL_PDATA_CONN_REP:
-               size = IB_MAX_REP_PDATA_SIZE;
-               break;
-       case DAPL_PDATA_CONN_REJ:
-               size = IB_MAX_REJ_PDATA_SIZE - sizeof(struct dapl_pdata_hdr);
-               break;
-       case DAPL_PDATA_CONN_DREQ:
-               size = IB_MAX_DREQ_PDATA_SIZE;
-               break;
-       case DAPL_PDATA_CONN_DREP:
-               size = IB_MAX_DREP_PDATA_SIZE;
-               break;
-       default:
-               size = 0;
-
-       }                       /* end case */
-
-       return size;
-}
-
-/*
- * Map all CMA event codes to the DAT equivelent.
- */
-#define DAPL_IB_EVENT_CNT      13
-
-static struct ib_cm_event_map {
-       const ib_cm_events_t ib_cm_event;
-       DAT_EVENT_NUMBER dat_event_num;
-} ib_cm_event_map[DAPL_IB_EVENT_CNT] = {
-       /* 00 */  {
-       IB_CME_CONNECTED, DAT_CONNECTION_EVENT_ESTABLISHED},
-           /* 01 */  {
-       IB_CME_DISCONNECTED, DAT_CONNECTION_EVENT_DISCONNECTED},
-           /* 02 */  {
-       IB_CME_DISCONNECTED_ON_LINK_DOWN,
-                   DAT_CONNECTION_EVENT_DISCONNECTED},
-           /* 03 */  {
-       IB_CME_CONNECTION_REQUEST_PENDING, DAT_CONNECTION_REQUEST_EVENT},
-           /* 04 */  {
-       IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,
-                   DAT_CONNECTION_REQUEST_EVENT},
-           /* 05 */  {
-       IB_CME_CONNECTION_REQUEST_ACKED, DAT_CONNECTION_REQUEST_EVENT},
-           /* 06 */  {
-       IB_CME_DESTINATION_REJECT,
-                   DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
-           /* 07 */  {
-       IB_CME_DESTINATION_REJECT_PRIVATE_DATA,
-                   DAT_CONNECTION_EVENT_PEER_REJECTED},
-           /* 08 */  {
-       IB_CME_DESTINATION_UNREACHABLE, DAT_CONNECTION_EVENT_UNREACHABLE},
-           /* 09 */  {
-       IB_CME_TOO_MANY_CONNECTION_REQUESTS,
-                   DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
-           /* 10 */  {
-       IB_CME_LOCAL_FAILURE, DAT_CONNECTION_EVENT_BROKEN},
-           /* 11 */  {
-       IB_CME_BROKEN, DAT_CONNECTION_EVENT_BROKEN},
-           /* 12 */  {
-IB_CME_TIMEOUT, DAT_CONNECTION_EVENT_TIMED_OUT},};
-
-/*
- * dapls_ib_get_cm_event
- *
- * Return a DAT connection event given a provider CM event.
- *
- * Input:
- *     dat_event_num   DAT event we need an equivelent CM event for
- *
- * Output:
- *     none
- *
- * Returns:
- *     ib_cm_event of translated DAPL value
- */
-DAT_EVENT_NUMBER
-dapls_ib_get_dat_event(IN const ib_cm_events_t ib_cm_event,
-                      IN DAT_BOOLEAN active)
-{
-       DAT_EVENT_NUMBER dat_event_num;
-       int i;
-
-       active = active;
-
-       dat_event_num = 0;
-       for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
-               if (ib_cm_event == ib_cm_event_map[i].ib_cm_event) {
-                       dat_event_num = ib_cm_event_map[i].dat_event_num;
-                       break;
-               }
-       }
-       dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
-                    "dapls_ib_get_dat_event: event(%s) ib=0x%x dat=0x%x\n",
-                    active ? "active" : "passive", ib_cm_event, dat_event_num);
-
-       return dat_event_num;
-}
-
-/*
- * dapls_ib_get_dat_event
- *
- * Return a DAT connection event given a provider CM event.
- * 
- * Input:
- *     ib_cm_event     event provided to the dapl callback routine
- *     active          switch indicating active or passive connection
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_EVENT_NUMBER of translated provider value
- */
-ib_cm_events_t dapls_ib_get_cm_event(IN DAT_EVENT_NUMBER dat_event_num)
-{
-       ib_cm_events_t ib_cm_event;
-       int i;
-
-       ib_cm_event = 0;
-       for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
-               if (dat_event_num == ib_cm_event_map[i].dat_event_num) {
-                       ib_cm_event = ib_cm_event_map[i].ib_cm_event;
-                       break;
-               }
-       }
-       return ib_cm_event;
-}
-
-void dapli_cma_event_cb(void)
-{
-       struct rdma_cm_event *event;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cm_event()\n");
-
-       /* process one CM event, fairness */
-       if (!rdma_get_cm_event(g_cm_events, &event)) {
-               struct dapl_cm_id *conn;
-
-               /* set proper conn from cm_id context */
-               if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST)
-                       conn = (struct dapl_cm_id *)event->listen_id->context;
-               else
-                       conn = (struct dapl_cm_id *)event->id->context;
-
-               dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                            " cm_event: EVENT=%d ID=%p LID=%p CTX=%p\n",
-                            event->event, event->id, event->listen_id, conn);
-
-               switch (event->event) {
-               case RDMA_CM_EVENT_ADDR_RESOLVED:
-                       dapli_addr_resolve(conn);
-                       break;
-
-               case RDMA_CM_EVENT_ROUTE_RESOLVED:
-                       dapli_route_resolve(conn);
-                       break;
-
-               case RDMA_CM_EVENT_ADDR_ERROR:
-                       dapl_log(DAPL_DBG_TYPE_WARN,
-                                "dapl_cma_active: CM ADDR ERROR: ->"
-                                " DST %s retry (%d)..\n",
-                                inet_ntoa(((struct sockaddr_in *)
-                                           &conn->r_addr)->sin_addr),
-                                conn->arp_retries);
-
-                       /* retry address resolution */
-                       if ((--conn->arp_retries) &&
-                           (event->status == -ETIMEDOUT)) {
-                               int ret;
-                               ret = rdma_resolve_addr(conn->cm_id, NULL,
-                                                       (struct sockaddr *)
-                                                       &conn->r_addr,
-                                                       conn->arp_timeout);
-                               if (!ret)
-                                       break;
-                               else {
-                                       dapl_dbg_log(DAPL_DBG_TYPE_WARN,
-                                                    " ERROR: rdma_resolve_addr = "
-                                                    "%d %s\n",
-                                                    ret, strerror(errno));
-                               }
-                       }
-                       /* retries exhausted or resolve_addr failed */
-                       dapl_log(DAPL_DBG_TYPE_ERR,
-                                "dapl_cma_active: ARP_ERR, retries(%d)"
-                                " exhausted -> DST %s,%d\n",
-                                IB_ARP_RETRY_COUNT,
-                                inet_ntoa(((struct sockaddr_in *)
-                                           &conn->cm_id->route.addr.dst_addr)->
-                                          sin_addr),
-                                ntohs(((struct sockaddr_in *)
-                                       &conn->cm_id->route.addr.dst_addr)->
-                                      sin_port));
-
-                       dapl_evd_connection_callback(conn,
-                                                    IB_CME_DESTINATION_UNREACHABLE,
-                                                    NULL, conn->ep);
-                       break;
-
-               case RDMA_CM_EVENT_ROUTE_ERROR:
-                       dapl_log(DAPL_DBG_TYPE_WARN,
-                                "dapl_cma_active: CM ROUTE ERROR: ->"
-                                " DST %s retry (%d)..\n",
-                                inet_ntoa(((struct sockaddr_in *)
-                                           &conn->r_addr)->sin_addr),
-                                conn->route_retries);
-
-                       /* retry route resolution */
-                       if ((--conn->route_retries) &&
-                           (event->status == -ETIMEDOUT))
-                               dapli_addr_resolve(conn);
-                       else {
-                               dapl_log(DAPL_DBG_TYPE_ERR,
-                                        "dapl_cma_active: PATH_RECORD_ERR,"
-                                        " retries(%d) exhausted, DST %s,%d\n",
-                                        IB_ROUTE_RETRY_COUNT,
-                                        inet_ntoa(((struct sockaddr_in *)
-                                                   &conn->cm_id->route.addr.
-                                                   dst_addr)->sin_addr),
-                                        ntohs(((struct sockaddr_in *)
-                                               &conn->cm_id->route.addr.
-                                               dst_addr)->sin_port));
-
-                               dapl_evd_connection_callback(conn,
-                                                            IB_CME_DESTINATION_UNREACHABLE,
-                                                            NULL, conn->ep);
-                       }
-                       break;
-
-               case RDMA_CM_EVENT_DEVICE_REMOVAL:
-                       dapl_evd_connection_callback(conn,
-                                                    IB_CME_LOCAL_FAILURE,
-                                                    NULL, conn->ep);
-                       break;
-               case RDMA_CM_EVENT_CONNECT_REQUEST:
-               case RDMA_CM_EVENT_CONNECT_ERROR:
-               case RDMA_CM_EVENT_UNREACHABLE:
-               case RDMA_CM_EVENT_REJECTED:
-               case RDMA_CM_EVENT_ESTABLISHED:
-               case RDMA_CM_EVENT_DISCONNECTED:
-                       /* passive or active */
-                       if (conn->sp)
-                               dapli_cm_passive_cb(conn, event);
-                       else
-                               dapli_cm_active_cb(conn, event);
-                       break;
-               case RDMA_CM_EVENT_CONNECT_RESPONSE:
-               default:
-                       dapl_dbg_log(DAPL_DBG_TYPE_WARN,
-                                    " cm_event: UNEXPECTED EVENT=%p ID=%p CTX=%p\n",
-                                    event->event, event->id,
-                                    event->id->context);
-                       break;
-               }
-               /* ack event, unblocks destroy_cm_id in consumer threads */
-               rdma_ack_cm_event(event);
-       }
-}
-
-/*
- * Local variables:
- *  c-indent-level: 4
- *  c-basic-offset: 4
- *  tab-width: 8
- * End:
- */
diff --git a/dapl/openib_cma/dapl_ib_cq.c b/dapl/openib_cma/dapl_ib_cq.c
deleted file mode 100755 (executable)
index 7f67982..0000000
+++ /dev/null
@@ -1,559 +0,0 @@
-/*
- * Copyright (c) 2005-2007 Intel Corporation.  All rights reserved.
- *
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- *    copy of which is available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/**********************************************************************
- *
- * MODULE: dapl_ib_cq.c
- *
- * PURPOSE: completion queues for OFED IB Verbs
- *
- * $Id: $
- *
- **********************************************************************/
-
-#include "openib_osd.h"
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-#include "dapl_lmr_util.h"
-#include "dapl_evd_util.h"
-#include "dapl_ring_buffer_util.h"
-
-/* One CQ event channel per HCA */
-void dapli_cq_event_cb(struct _ib_hca_transport *hca)
-{
-       /* check all comp events on this device */
-       struct dapl_evd *evd_ptr = NULL;
-       struct ibv_cq *ibv_cq = NULL;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " dapli_cq_event_cb(%p)\n", hca);
-
-       if (!ibv_get_cq_event(hca->ib_cq, &ibv_cq, (void *)&evd_ptr)) {
-
-               if (DAPL_BAD_HANDLE(evd_ptr, DAPL_MAGIC_EVD)) {
-                       ibv_ack_cq_events(ibv_cq, 1);
-                       return;
-               }
-
-               /* process DTO event via callback */
-               dapl_evd_dto_callback(hca->cm_id->verbs,
-                                     evd_ptr->ib_cq_handle, (void *)evd_ptr);
-
-               ibv_ack_cq_events(ibv_cq, 1);
-       }
-}
-
-/*
- * Map all verbs DTO completion codes to the DAT equivelent.
- *
- * Not returned by verbs:     DAT_DTO_ERR_PARTIAL_PACKET
- */
-static struct ib_status_map {
-       int ib_status;
-       DAT_DTO_COMPLETION_STATUS dat_status;
-} ib_status_map[] = {
-/* 00 */  {
-       IBV_WC_SUCCESS, DAT_DTO_SUCCESS},
-/* 01 */  {
-       IBV_WC_LOC_LEN_ERR, DAT_DTO_ERR_LOCAL_LENGTH},
-/* 02 */  {
-       IBV_WC_LOC_QP_OP_ERR, DAT_DTO_ERR_LOCAL_EP},
-/* 03 */  {
-       IBV_WC_LOC_EEC_OP_ERR, DAT_DTO_ERR_TRANSPORT},
-/* 04 */  {
-       IBV_WC_LOC_PROT_ERR, DAT_DTO_ERR_LOCAL_PROTECTION},
-/* 05 */  {
-       IBV_WC_WR_FLUSH_ERR, DAT_DTO_ERR_FLUSHED},
-/* 06 */  {
-       IBV_WC_MW_BIND_ERR, DAT_RMR_OPERATION_FAILED},
-/* 07 */  {
-       IBV_WC_BAD_RESP_ERR, DAT_DTO_ERR_BAD_RESPONSE},
-/* 08 */  {
-       IBV_WC_LOC_ACCESS_ERR, DAT_DTO_ERR_LOCAL_PROTECTION},
-/* 09 */  {
-       IBV_WC_REM_INV_REQ_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
-/* 10 */  {
-       IBV_WC_REM_ACCESS_ERR, DAT_DTO_ERR_REMOTE_ACCESS},
-/* 11 */  {
-       IBV_WC_REM_OP_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
-/* 12 */  {
-       IBV_WC_RETRY_EXC_ERR, DAT_DTO_ERR_TRANSPORT},
-/* 13 */  {
-       IBV_WC_RNR_RETRY_EXC_ERR, DAT_DTO_ERR_RECEIVER_NOT_READY},
-/* 14 */  {
-       IBV_WC_LOC_RDD_VIOL_ERR, DAT_DTO_ERR_LOCAL_PROTECTION},
-/* 15 */  {
-       IBV_WC_REM_INV_RD_REQ_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
-/* 16 */  {
-       IBV_WC_REM_ABORT_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
-/* 17 */  {
-       IBV_WC_INV_EECN_ERR, DAT_DTO_ERR_TRANSPORT},
-/* 18 */  {
-       IBV_WC_INV_EEC_STATE_ERR, DAT_DTO_ERR_TRANSPORT},
-/* 19 */  {
-       IBV_WC_FATAL_ERR, DAT_DTO_ERR_TRANSPORT},
-/* 20 */  {
-       IBV_WC_RESP_TIMEOUT_ERR, DAT_DTO_ERR_RECEIVER_NOT_READY},
-/* 21 */  {
-IBV_WC_GENERAL_ERR, DAT_DTO_ERR_TRANSPORT},};
-
-/*
- * dapls_ib_get_dto_status
- *
- * Return the DAT status of a DTO operation
- *
- * Input:
- *     cqe_ptr         pointer to completion queue entry
- *
- * Output:
- *     none
- *
- * Returns:
- *     Value from ib_status_map table above
- */
-
-DAT_DTO_COMPLETION_STATUS
-dapls_ib_get_dto_status(IN ib_work_completion_t * cqe_ptr)
-{
-       uint32_t ib_status;
-       int i;
-
-       ib_status = DAPL_GET_CQE_STATUS(cqe_ptr);
-
-       /*
-        * Due to the implementation of verbs completion code, we need to
-        * search the table for the correct value rather than assuming
-        * linear distribution.
-        */
-       for (i = 0; i <= IBV_WC_GENERAL_ERR; i++) {
-               if (ib_status == ib_status_map[i].ib_status) {
-                       if (ib_status != IBV_WC_SUCCESS) {
-                               dapl_dbg_log(DAPL_DBG_TYPE_DTO_COMP_ERR,
-                                            " DTO completion ERROR: %d: op %#x\n",
-                                            ib_status,
-                                            DAPL_GET_CQE_OPTYPE(cqe_ptr));
-                       }
-                       return ib_status_map[i].dat_status;
-               }
-       }
-
-       dapl_dbg_log(DAPL_DBG_TYPE_DTO_COMP_ERR,
-                    " DTO completion ERROR: %d: op %#x\n",
-                    ib_status, DAPL_GET_CQE_OPTYPE(cqe_ptr));
-
-       return DAT_DTO_FAILURE;
-}
-
-DAT_RETURN dapls_ib_get_async_event(IN ib_error_record_t * err_record,
-                                   OUT DAT_EVENT_NUMBER * async_event)
-{
-       DAT_RETURN dat_status = DAT_SUCCESS;
-       int err_code = err_record->event_type;
-
-       switch (err_code) {
-               /* OVERFLOW error */
-       case IBV_EVENT_CQ_ERR:
-               *async_event = DAT_ASYNC_ERROR_EVD_OVERFLOW;
-               break;
-               /* INTERNAL errors */
-       case IBV_EVENT_DEVICE_FATAL:
-               *async_event = DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR;
-               break;
-               /* CATASTROPHIC errors */
-       case IBV_EVENT_PORT_ERR:
-               *async_event = DAT_ASYNC_ERROR_IA_CATASTROPHIC;
-               break;
-               /* BROKEN QP error */
-       case IBV_EVENT_SQ_DRAINED:
-       case IBV_EVENT_QP_FATAL:
-       case IBV_EVENT_QP_REQ_ERR:
-       case IBV_EVENT_QP_ACCESS_ERR:
-               *async_event = DAT_ASYNC_ERROR_EP_BROKEN;
-               break;
-               /* connection completion */
-       case IBV_EVENT_COMM_EST:
-               *async_event = DAT_CONNECTION_EVENT_ESTABLISHED;
-               break;
-               /* TODO: process HW state changes */
-       case IBV_EVENT_PATH_MIG:
-       case IBV_EVENT_PATH_MIG_ERR:
-       case IBV_EVENT_PORT_ACTIVE:
-       case IBV_EVENT_LID_CHANGE:
-       case IBV_EVENT_PKEY_CHANGE:
-       case IBV_EVENT_SM_CHANGE:
-       default:
-               dat_status = DAT_ERROR(DAT_NOT_IMPLEMENTED, 0);
-       }
-       return dat_status;
-}
-
-/*
- * dapl_ib_cq_alloc
- *
- * Alloc a CQ
- *
- * Input:
- *     ia_handle               IA handle
- *     evd_ptr                 pointer to EVD struct
- *     cqlen                   minimum QLen
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN
-dapls_ib_cq_alloc(IN DAPL_IA * ia_ptr,
-                 IN DAPL_EVD * evd_ptr, IN DAT_COUNT * cqlen)
-{
-       struct ibv_comp_channel *channel = ia_ptr->hca_ptr->ib_trans.ib_cq;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    "dapls_ib_cq_alloc: evd %p cqlen=%d \n", evd_ptr, *cqlen);
-
-#ifdef CQ_WAIT_OBJECT
-       if (evd_ptr->cq_wait_obj_handle)
-               channel = evd_ptr->cq_wait_obj_handle->events;
-#endif
-
-       /* Call IB verbs to create CQ */
-       evd_ptr->ib_cq_handle = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
-                                             *cqlen, evd_ptr, channel, 0);
-
-       if (evd_ptr->ib_cq_handle == IB_INVALID_HANDLE)
-               return (dapl_convert_errno(errno, "create_cq"));
-
-       /* arm cq for events */
-       dapls_set_cq_notify(ia_ptr, evd_ptr);
-
-       /* update with returned cq entry size */
-       *cqlen = evd_ptr->ib_cq_handle->cqe;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    "dapls_ib_cq_alloc: new_cq %p cqlen=%d \n",
-                    evd_ptr->ib_cq_handle, *cqlen);
-
-       return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_cq_resize
- *
- * Alloc a CQ
- *
- * Input:
- *     ia_handle               IA handle
- *     evd_ptr                 pointer to EVD struct
- *     cqlen                   minimum QLen
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INVALID_PARAMETER
- *
- */
-DAT_RETURN
-dapls_ib_cq_resize(IN DAPL_IA * ia_ptr,
-                  IN DAPL_EVD * evd_ptr, IN DAT_COUNT * cqlen)
-{
-       ib_cq_handle_t new_cq;
-       struct ibv_comp_channel *channel = ia_ptr->hca_ptr->ib_trans.ib_cq;
-
-       /* IB verbs doe not support resize. Try to re-create CQ
-        * with new size. Can only be done if QP is not attached. 
-        * destroy EBUSY == QP still attached.
-        */
-
-#ifdef CQ_WAIT_OBJECT
-       if (evd_ptr->cq_wait_obj_handle)
-               channel = evd_ptr->cq_wait_obj_handle->events;
-#endif
-
-       /* Call IB verbs to create CQ */
-       new_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle, *cqlen,
-                              evd_ptr, channel, 0);
-
-       if (new_cq == IB_INVALID_HANDLE)
-               return DAT_INSUFFICIENT_RESOURCES;
-
-       /* destroy the original and replace if successful */
-       if (ibv_destroy_cq(evd_ptr->ib_cq_handle)) {
-               ibv_destroy_cq(new_cq);
-               return (dapl_convert_errno(errno, "resize_cq"));
-       }
-
-       /* update EVD with new cq handle and size */
-       evd_ptr->ib_cq_handle = new_cq;
-       *cqlen = new_cq->cqe;
-
-       /* arm cq for events */
-       dapls_set_cq_notify(ia_ptr, evd_ptr);
-
-       return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_cq_free
- *
- * destroy a CQ
- *
- * Input:
- *     ia_handle               IA handle
- *     evd_ptr                 pointer to EVD struct
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INVALID_PARAMETER
- *
- */
-DAT_RETURN dapls_ib_cq_free(IN DAPL_IA * ia_ptr, IN DAPL_EVD * evd_ptr)
-{
-       if (evd_ptr->ib_cq_handle != IB_INVALID_HANDLE) {
-               /* copy all entries on CQ to EVD before destroying */
-               dapls_evd_copy_cq(evd_ptr);
-               if (ibv_destroy_cq(evd_ptr->ib_cq_handle))
-                       return (dapl_convert_errno(errno, "destroy_cq"));
-               evd_ptr->ib_cq_handle = IB_INVALID_HANDLE;
-       }
-       return DAT_SUCCESS;
-}
-
-/*
- * dapls_set_cq_notify
- *
- * Set the CQ notification for next
- *
- * Input:
- *     hca_handl               hca handle
- *     DAPL_EVD                evd handle
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     dapl_convert_errno 
- */
-DAT_RETURN dapls_set_cq_notify(IN DAPL_IA * ia_ptr, IN DAPL_EVD * evd_ptr)
-{
-       if (ibv_req_notify_cq(evd_ptr->ib_cq_handle, 0))
-               return (dapl_convert_errno(errno, "notify_cq"));
-       else
-               return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_completion_notify
- *
- * Set the CQ notification type
- *
- * Input:
- *     hca_handl               hca handle
- *     evd_ptr                 evd handle
- *     type                    notification type
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     dapl_convert_errno
- */
-DAT_RETURN dapls_ib_completion_notify(IN ib_hca_handle_t hca_handle,
-                                     IN DAPL_EVD * evd_ptr,
-                                     IN ib_notification_type_t type)
-{
-       if (ibv_req_notify_cq(evd_ptr->ib_cq_handle, type))
-               return (dapl_convert_errno(errno, "notify_cq_type"));
-       else
-               return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_completion_poll
- *
- * CQ poll for completions
- *
- * Input:
- *     hca_handl               hca handle
- *     evd_ptr                 evd handle
- *     wc_ptr                  work completion
- *
- * Output:
- *     none
- *
- * Returns: 
- *     DAT_SUCCESS
- *     DAT_QUEUE_EMPTY
- *     
- */
-DAT_RETURN dapls_ib_completion_poll(IN DAPL_HCA * hca_ptr,
-                                   IN DAPL_EVD * evd_ptr,
-                                   IN ib_work_completion_t * wc_ptr)
-{
-       if (ibv_poll_cq(evd_ptr->ib_cq_handle, 1, wc_ptr) == 1)
-               return DAT_SUCCESS;
-
-       return DAT_QUEUE_EMPTY;
-}
-
-#ifdef CQ_WAIT_OBJECT
-
-/* NEW common wait objects for providers with direct CQ wait objects */
-DAT_RETURN
-dapls_ib_wait_object_create(IN DAPL_EVD * evd_ptr,
-                           IN ib_wait_obj_handle_t * p_cq_wait_obj_handle)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " cq_object_create: (%p,%p)\n",
-                    evd_ptr, p_cq_wait_obj_handle);
-
-       *p_cq_wait_obj_handle =
-           dapl_os_alloc(sizeof(struct _ib_wait_obj_handle));
-
-       if (*p_cq_wait_obj_handle == NULL)
-               return (dapl_convert_errno(ENOMEM, " wait_object_create"));
-
-       dapl_os_memzero(*p_cq_wait_obj_handle,
-                       sizeof(struct _ib_wait_obj_handle));
-
-       /* create pipe for waking up work thread */
-       if (pipe((*p_cq_wait_obj_handle)->pipe))
-               goto bail;
-
-       /* set cq_wait object to evd_ptr */
-       (*p_cq_wait_obj_handle)->events =
-           ibv_create_comp_channel(evd_ptr->header.owner_ia->hca_ptr->
-                                   ib_hca_handle);
-
-       if ((*p_cq_wait_obj_handle)->events == NULL)
-               goto bail;
-
-       return DAT_SUCCESS;
-      bail:
-       dapl_os_free(*p_cq_wait_obj_handle, sizeof(struct _ib_wait_obj_handle));
-       *p_cq_wait_obj_handle = NULL;
-       return (dapl_convert_errno(errno, " wait_object_create"));
-}
-
-DAT_RETURN
-dapls_ib_wait_object_destroy(IN ib_wait_obj_handle_t p_cq_wait_obj_handle)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " cq_object_destroy: wait_obj=%p\n", p_cq_wait_obj_handle);
-
-       ibv_destroy_comp_channel(p_cq_wait_obj_handle->events);
-
-       dapl_os_free(p_cq_wait_obj_handle, sizeof(struct _ib_wait_obj_handle));
-
-       return DAT_SUCCESS;
-}
-
-DAT_RETURN
-dapls_ib_wait_object_wakeup(IN ib_wait_obj_handle_t p_cq_wait_obj_handle)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " cq_object_wakeup: wait_obj=%p\n", p_cq_wait_obj_handle);
-
-       /* write to pipe for wake up */
-       if (write(p_cq_wait_obj_handle->pipe[1], "w", sizeof "w") == -1)
-               dapl_log(DAPL_DBG_TYPE_UTIL,
-                        " wait object wakeup write error = %s\n",
-                        strerror(errno));
-       return DAT_SUCCESS;
-}
-
-DAT_RETURN
-dapls_ib_wait_object_wait(IN ib_wait_obj_handle_t p_cq_wait_obj_handle,
-                         IN u_int32_t timeout)
-{
-       struct dapl_evd *evd_ptr;
-       struct ibv_cq *ibv_cq = NULL;
-       int status = 0;
-       int timeout_ms = -1;
-       struct pollfd ufds[2];
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " cq_object_wait: CQ channel %p time %d\n",
-                    p_cq_wait_obj_handle, timeout);
-
-       /* setup cq event channel and pipe fd for consumer wakeup */
-       ufds[0].fd = p_cq_wait_obj_handle->events->fd;
-       ufds[0].events = POLLIN;
-       ufds[0].revents = 0;
-       ufds[1].fd = p_cq_wait_obj_handle->pipe[0];
-       ufds[1].events = POLLIN;
-       ufds[1].revents = 0;
-
-       /* uDAPL timeout values in usecs */
-       if (timeout != DAT_TIMEOUT_INFINITE)
-               timeout_ms = timeout / 1000;
-
-       /* restart syscall */
-       while ((status = poll(ufds, 2, timeout_ms)) == -1)
-               if (errno == EINTR)
-                       continue;
-
-       /* returned event */
-       if (status > 0) {
-               if (ufds[0].revents == POLLIN) {
-                       if (!ibv_get_cq_event(p_cq_wait_obj_handle->events,
-                                             &ibv_cq, (void *)&evd_ptr)) {
-                               ibv_ack_cq_events(ibv_cq, 1);
-                       }
-               }
-               status = 0;
-
-               /* timeout */
-       } else if (status == 0)
-               status = ETIMEDOUT;
-       else
-               status = errno;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " cq_object_wait: RET evd %p ibv_cq %p %s\n",
-                    evd_ptr, ibv_cq, strerror(errno));
-
-       return (dapl_convert_errno(status, "cq_wait_object_wait"));
-
-}
-#endif
-
-/*
- * Local variables:
- *  c-indent-level: 4
- *  c-basic-offset: 4
- *  tab-width: 8
- * End:
- */
diff --git a/dapl/openib_cma/dapl_ib_dto.h b/dapl/openib_cma/dapl_ib_dto.h
deleted file mode 100644 (file)
index d97c26b..0000000
+++ /dev/null
@@ -1,472 +0,0 @@
-/*
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- *    copy of which is available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/***************************************************************************
- *
- *   Module:            uDAPL
- *
- *   Filename:          dapl_ib_dto.h
- *
- *   Author:            Arlin Davis
- *
- *   Created:           3/10/2005
- *
- *   Description: 
- *
- *   The OpenIB uCMA provider - DTO operations and CQE macros 
- *
- ****************************************************************************
- *                Source Control System Information
- *
- *    $Id: $
- *
- *     Copyright (c) 2005 Intel Corporation.  All rights reserved.
- *
- **************************************************************************/
-#ifndef _DAPL_IB_DTO_H_
-#define _DAPL_IB_DTO_H_
-
-#include "dapl_ib_util.h"
-
-#ifdef DAT_EXTENSIONS
-#include <dat2/dat_ib_extensions.h>
-#endif
-
-STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p);
-
-/*
- * dapls_ib_post_recv
- *
- * Provider specific Post RECV function
- */
-STATIC _INLINE_ DAT_RETURN 
-dapls_ib_post_recv (
-       IN  DAPL_EP             *ep_ptr,
-       IN  DAPL_COOKIE         *cookie,
-       IN  DAT_COUNT           segments,
-       IN  DAT_LMR_TRIPLET     *local_iov )
-{
-       struct ibv_recv_wr wr;
-       struct ibv_recv_wr *bad_wr;
-       ib_data_segment_t *ds = (ib_data_segment_t *)local_iov;
-       DAT_COUNT i, total_len;
-       int ret;
-       
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    " post_rcv: ep %p cookie %p segs %d l_iov %p\n",
-                    ep_ptr, cookie, segments, local_iov);
-
-       /* setup work request */
-       total_len = 0;
-       wr.next = 0;
-       wr.num_sge = segments;
-       wr.wr_id = (uint64_t)(uintptr_t)cookie;
-       wr.sg_list = ds;
-
-       if (cookie != NULL) { 
-               for (i = 0; i < segments; i++) {
-                       dapl_dbg_log(DAPL_DBG_TYPE_EP, 
-                                    " post_rcv: l_key 0x%x va %p len %d\n",
-                                    ds->lkey, ds->addr, ds->length);
-                       total_len += ds->length;
-                       ds++;
-               }
-               cookie->val.dto.size = total_len;
-       }
-
-       ret = ibv_post_recv(ep_ptr->qp_handle->cm_id->qp, &wr, &bad_wr);
-       
-       if (ret)
-               return( dapl_convert_errno(errno,"ibv_recv") );
-
-       DAPL_CNTR(ep_ptr, DCNT_EP_POST_RECV);
-       DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_RECV_DATA, total_len);
-
-       return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_post_send
- *
- * Provider specific Post SEND function
- */
-STATIC _INLINE_ DAT_RETURN 
-dapls_ib_post_send (
-       IN  DAPL_EP                     *ep_ptr,
-       IN  ib_send_op_type_t           op_type,
-       IN  DAPL_COOKIE                 *cookie,
-       IN  DAT_COUNT                   segments,
-       IN  DAT_LMR_TRIPLET             *local_iov,
-       IN  const DAT_RMR_TRIPLET       *remote_iov,
-       IN  DAT_COMPLETION_FLAGS        completion_flags)
-{
-       struct ibv_send_wr wr;
-       struct ibv_send_wr *bad_wr;
-       ib_data_segment_t *ds = (ib_data_segment_t *)local_iov;
-       ib_hca_transport_t *ibt_ptr = 
-               &ep_ptr->header.owner_ia->hca_ptr->ib_trans;
-       DAT_COUNT i, total_len;
-       int ret;
-       
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    " post_snd: ep %p op %d ck %p sgs",
-                    "%d l_iov %p r_iov %p f %d\n",
-                    ep_ptr, op_type, cookie, segments, local_iov, 
-                    remote_iov, completion_flags);
-
-       /* setup the work request */
-       wr.next = 0;
-       wr.opcode = op_type;
-       wr.num_sge = segments;
-       wr.send_flags = 0;
-       wr.wr_id = (uint64_t)(uintptr_t)cookie;
-       wr.sg_list = ds;
-       total_len = 0;
-
-       if (cookie != NULL) {
-               for (i = 0; i < segments; i++ ) {
-                       dapl_dbg_log(DAPL_DBG_TYPE_EP, 
-                                    " post_snd: lkey 0x%x va %p len %d\n",
-                                    ds->lkey, ds->addr, ds->length );
-                       total_len += ds->length;
-                       ds++;
-               }
-               cookie->val.dto.size = total_len;
-       }
-
-       if (wr.num_sge && 
-           (op_type == OP_RDMA_WRITE || op_type == OP_RDMA_READ)) {
-               wr.wr.rdma.remote_addr = remote_iov->virtual_address;
-               wr.wr.rdma.rkey = remote_iov->rmr_context;
-               dapl_dbg_log(DAPL_DBG_TYPE_EP, 
-                            " post_snd_rdma: rkey 0x%x va %#016Lx\n",
-                            wr.wr.rdma.rkey, wr.wr.rdma.remote_addr);
-       }
-
-       /* inline data for send or write ops */
-       if ((total_len <= ibt_ptr->max_inline_send) && 
-          ((op_type == OP_SEND) || (op_type == OP_RDMA_WRITE))) 
-               wr.send_flags |= IBV_SEND_INLINE;
-       
-       /* set completion flags in work request */
-       wr.send_flags |= (DAT_COMPLETION_SUPPRESS_FLAG & 
-                               completion_flags) ? 0 : IBV_SEND_SIGNALED;
-       wr.send_flags |= (DAT_COMPLETION_BARRIER_FENCE_FLAG & 
-                               completion_flags) ? IBV_SEND_FENCE : 0;
-       wr.send_flags |= (DAT_COMPLETION_SOLICITED_WAIT_FLAG & 
-                               completion_flags) ? IBV_SEND_SOLICITED : 0;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP, 
-                    " post_snd: op 0x%x flags 0x%x sglist %p, %d\n", 
-                    wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge);
-
-       ret = ibv_post_send(ep_ptr->qp_handle->cm_id->qp, &wr, &bad_wr);
-
-       if (ret)
-               return( dapl_convert_errno(errno,"ibv_send") );
-
-#ifdef DAPL_COUNTERS
-       switch (op_type) {
-       case OP_SEND:
-               DAPL_CNTR(ep_ptr, DCNT_EP_POST_SEND);
-               DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_SEND_DATA,total_len);
-               break;
-       case OP_RDMA_WRITE:
-               DAPL_CNTR(ep_ptr, DCNT_EP_POST_WRITE);
-               DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_WRITE_DATA,total_len);
-               break;  
-       case OP_RDMA_READ:
-               DAPL_CNTR(ep_ptr, DCNT_EP_POST_READ);
-               DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_READ_DATA,total_len);
-               break;
-       default:
-               break;
-       }
-#endif /* DAPL_COUNTERS */
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP," post_snd: returned\n");
-       return DAT_SUCCESS;
-}
-
-/* map Work Completions to DAPL WR operations */
-STATIC _INLINE_ DAT_DTOS dapls_cqe_dtos_opcode(ib_work_completion_t *cqe_p)
-{
-       switch (cqe_p->opcode) {
-
-       case IBV_WC_SEND:
-               return (DAT_DTO_SEND);
-       case IBV_WC_RDMA_READ:
-               return (DAT_DTO_RDMA_READ);
-       case IBV_WC_BIND_MW:
-               return (DAT_DTO_BIND_MW);
-#ifdef DAT_EXTENSIONS
-       case IBV_WC_RDMA_WRITE:
-               if (cqe_p->wc_flags & IBV_WC_WITH_IMM)
-                       return (DAT_IB_DTO_RDMA_WRITE_IMMED);
-               else
-                       return (DAT_DTO_RDMA_WRITE);
-       case IBV_WC_COMP_SWAP:
-               return (DAT_IB_DTO_CMP_SWAP);
-       case IBV_WC_FETCH_ADD:
-               return (DAT_IB_DTO_FETCH_ADD);
-       case IBV_WC_RECV_RDMA_WITH_IMM:
-               return (DAT_IB_DTO_RECV_IMMED);
-#else
-       case IBV_WC_RDMA_WRITE:
-               return (DAT_DTO_RDMA_WRITE);
-#endif
-       case IBV_WC_RECV:
-               return (DAT_DTO_RECEIVE);
-       default:
-               return (0xff);
-       }
-}
-#define DAPL_GET_CQE_DTOS_OPTYPE(cqe_p) dapls_cqe_dtos_opcode(cqe_p)
-
-
-#ifdef DAT_EXTENSIONS
-/*
- * dapls_ib_post_ext_send
- *
- * Provider specific extended Post SEND function for atomics
- *     OP_COMP_AND_SWAP and OP_FETCH_AND_ADD
- */
-STATIC _INLINE_ DAT_RETURN 
-dapls_ib_post_ext_send (
-       IN  DAPL_EP                     *ep_ptr,
-       IN  ib_send_op_type_t           op_type,
-       IN  DAPL_COOKIE                 *cookie,
-       IN  DAT_COUNT                   segments,
-       IN  DAT_LMR_TRIPLET             *local_iov,
-       IN  const DAT_RMR_TRIPLET       *remote_iov,
-       IN  DAT_UINT32                  immed_data,
-       IN  DAT_UINT64                  compare_add,
-       IN  DAT_UINT64                  swap,
-       IN  DAT_COMPLETION_FLAGS        completion_flags)
-{
-       struct ibv_send_wr wr;
-       struct ibv_send_wr *bad_wr;
-       ib_data_segment_t *ds = (ib_data_segment_t *)local_iov;
-       DAT_COUNT i, total_len;
-       int ret;
-       
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    " post_ext_snd: ep %p op %d ck %p sgs",
-                    "%d l_iov %p r_iov %p f %d\n",
-                    ep_ptr, op_type, cookie, segments, local_iov, 
-                    remote_iov, completion_flags);
-
-       /* setup the work request */
-       wr.next = 0;
-       wr.opcode = op_type;
-       wr.num_sge = segments;
-       wr.send_flags = 0;
-       wr.wr_id = (uint64_t)(uintptr_t)cookie;
-       wr.sg_list = ds;
-       total_len = 0;
-
-       if (cookie != NULL) {
-               for (i = 0; i < segments; i++ ) {
-                       dapl_dbg_log(DAPL_DBG_TYPE_EP, 
-                                    " post_ext_snd: lkey 0x%x va %p ln %d\n",
-                                    ds->lkey, ds->addr, ds->length);
-                       total_len += ds->length;
-                       ds++;
-               }
-               cookie->val.dto.size = total_len;
-       }
-
-       switch (op_type) {
-       case OP_RDMA_WRITE_IMM:
-               /* OP_RDMA_WRITE)IMMED has direct IB wr_type mapping */
-               dapl_dbg_log(DAPL_DBG_TYPE_EP, 
-                            " post_ext: rkey 0x%x va %#016Lx immed=0x%x\n",
-                            remote_iov?remote_iov->rmr_context:0, 
-                            remote_iov?remote_iov->virtual_address:0,
-                            immed_data);
-
-               wr.imm_data = immed_data;
-               if (wr.num_sge) {
-                       wr.wr.rdma.remote_addr = remote_iov->virtual_address;
-                       wr.wr.rdma.rkey = remote_iov->rmr_context;
-               }
-               break;
-       case OP_COMP_AND_SWAP:
-               /* OP_COMP_AND_SWAP has direct IB wr_type mapping */
-               dapl_dbg_log(DAPL_DBG_TYPE_EP, 
-                            " post_ext: OP_COMP_AND_SWAP=%lx,"
-                            "%lx rkey 0x%x va %#016Lx\n",
-                            compare_add, swap, remote_iov->rmr_context,
-                            remote_iov->virtual_address);
-               
-               wr.wr.atomic.compare_add = compare_add;
-               wr.wr.atomic.swap = swap;
-               wr.wr.atomic.remote_addr = remote_iov->virtual_address;
-               wr.wr.atomic.rkey = remote_iov->rmr_context;
-               break;
-       case OP_FETCH_AND_ADD:
-               /* OP_FETCH_AND_ADD has direct IB wr_type mapping */
-               dapl_dbg_log(DAPL_DBG_TYPE_EP, 
-                            " post_ext: OP_FETCH_AND_ADD=%lx,"
-                            "%lx rkey 0x%x va %#016Lx\n",
-                            compare_add, remote_iov->rmr_context,
-                            remote_iov->virtual_address);
-
-               wr.wr.atomic.compare_add = compare_add;
-               wr.wr.atomic.remote_addr = remote_iov->virtual_address;
-               wr.wr.atomic.rkey = remote_iov->rmr_context;
-               break;
-       default:
-               break;
-       }
-
-       /* set completion flags in work request */
-       wr.send_flags |= (DAT_COMPLETION_SUPPRESS_FLAG & 
-                               completion_flags) ? 0 : IBV_SEND_SIGNALED;
-       wr.send_flags |= (DAT_COMPLETION_BARRIER_FENCE_FLAG & 
-                               completion_flags) ? IBV_SEND_FENCE : 0;
-       wr.send_flags |= (DAT_COMPLETION_SOLICITED_WAIT_FLAG & 
-                               completion_flags) ? IBV_SEND_SOLICITED : 0;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP, 
-                    " post_snd: op 0x%x flags 0x%x sglist %p, %d\n", 
-                    wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge);
-
-       ret = ibv_post_send(ep_ptr->qp_handle->cm_id->qp, &wr, &bad_wr);
-
-       if (ret)
-               return( dapl_convert_errno(errno,"ibv_send") );
-
-#ifdef DAPL_COUNTERS
-       switch (op_type) {
-       case OP_RDMA_WRITE_IMM:
-               DAPL_CNTR(ep_ptr, DCNT_EP_POST_WRITE_IMM);
-               DAPL_CNTR_DATA(ep_ptr, 
-                              DCNT_EP_POST_WRITE_IMM_DATA, total_len);
-               break;
-       case OP_COMP_AND_SWAP:
-               DAPL_CNTR(ep_ptr, DCNT_EP_POST_CMP_SWAP);
-               break;  
-       case OP_FETCH_AND_ADD:
-               DAPL_CNTR(ep_ptr, DCNT_EP_POST_FETCH_ADD);
-               break;
-       default:
-               break;
-       }
-#endif /* DAPL_COUNTERS */
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP," post_snd: returned\n");
-       return DAT_SUCCESS;
-}
-#endif
-
-STATIC _INLINE_ DAT_RETURN 
-dapls_ib_optional_prv_dat(
-       IN  DAPL_CR             *cr_ptr,
-       IN  const void          *event_data,
-       OUT   DAPL_CR           **cr_pp)
-{
-    return DAT_SUCCESS;
-}
-
-/* map Work Completions to DAPL WR operations */
-STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p)
-{
-#ifdef DAPL_COUNTERS
-       DAPL_COOKIE *cookie = (DAPL_COOKIE *)(uintptr_t)cqe_p->wr_id;
-#endif /* DAPL_COUNTERS */
-
-       switch (cqe_p->opcode) {
-       case IBV_WC_SEND:
-               return (OP_SEND);
-       case IBV_WC_RDMA_WRITE:
-               if (cqe_p->wc_flags & IBV_WC_WITH_IMM)
-                       return (OP_RDMA_WRITE_IMM);
-               else
-                       return (OP_RDMA_WRITE);
-       case IBV_WC_RDMA_READ:
-               return (OP_RDMA_READ);
-       case IBV_WC_COMP_SWAP:
-               return (OP_COMP_AND_SWAP);
-       case IBV_WC_FETCH_ADD:
-               return (OP_FETCH_AND_ADD);
-       case IBV_WC_BIND_MW:
-               return (OP_BIND_MW);
-       case IBV_WC_RECV:
-               if (cqe_p->wc_flags & IBV_WC_WITH_IMM) {
-                       DAPL_CNTR(cookie->ep, DCNT_EP_RECV_IMM);
-                       DAPL_CNTR_DATA(cookie->ep, DCNT_EP_RECV_IMM_DATA, 
-                                      cqe_p->byte_len);
-                       return (OP_RECEIVE_IMM);
-               } else {
-                       DAPL_CNTR(cookie->ep, DCNT_EP_RECV);
-                       DAPL_CNTR_DATA(cookie->ep, DCNT_EP_RECV_DATA, 
-                                      cqe_p->byte_len);
-                       return (OP_RECEIVE);
-               }
-       case IBV_WC_RECV_RDMA_WITH_IMM:
-               DAPL_CNTR(cookie->ep, DCNT_EP_RECV_RDMA_IMM);
-               DAPL_CNTR_DATA(cookie->ep, DCNT_EP_RECV_RDMA_IMM_DATA, 
-                              cqe_p->byte_len);
-               return (OP_RECEIVE_IMM);
-       default:
-               return (OP_INVALID);
-       }
-}
-
-#define DAPL_GET_CQE_OPTYPE(cqe_p) dapls_cqe_opcode(cqe_p)
-#define DAPL_GET_CQE_WRID(cqe_p) ((ib_work_completion_t*)cqe_p)->wr_id
-#define DAPL_GET_CQE_STATUS(cqe_p) ((ib_work_completion_t*)cqe_p)->status
-#define DAPL_GET_CQE_VENDOR_ERR(cqe_p) ((ib_work_completion_t*)cqe_p)->vendor_err
-#define DAPL_GET_CQE_BYTESNUM(cqe_p) ((ib_work_completion_t*)cqe_p)->byte_len
-#define DAPL_GET_CQE_IMMED_DATA(cqe_p) ((ib_work_completion_t*)cqe_p)->imm_data
-
-STATIC _INLINE_ char * dapls_dto_op_str(int op)
-{
-    static char *optable[] =
-    {
-        "OP_RDMA_WRITE",
-        "OP_RDMA_WRITE_IMM",
-        "OP_SEND",
-        "OP_SEND_IMM",
-        "OP_RDMA_READ",
-        "OP_COMP_AND_SWAP",
-        "OP_FETCH_AND_ADD",
-        "OP_RECEIVE",
-        "OP_RECEIVE_IMM",
-        "OP_BIND_MW"
-    };
-    return ((op < 0 || op > 9) ? "Invalid CQE OP?" : optable[op]);
-}
-
-static _INLINE_ char *
-dapls_cqe_op_str(IN ib_work_completion_t *cqe_ptr)
-{
-    return dapls_dto_op_str(DAPL_GET_CQE_OPTYPE(cqe_ptr));
-}
-
-#define DAPL_GET_CQE_OP_STR(cqe) dapls_cqe_op_str(cqe)
-
-#endif /*  _DAPL_IB_DTO_H_ */
diff --git a/dapl/openib_cma/dapl_ib_extensions.c b/dapl/openib_cma/dapl_ib_extensions.c
deleted file mode 100755 (executable)
index 3bcde58..0000000
+++ /dev/null
@@ -1,329 +0,0 @@
-/*
- * Copyright (c) 2007 Intel Corporation.  All rights reserved.
- *
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- *    copy of which is available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/**********************************************************************
- * 
- * MODULE: dapl_ib_extensions.c
- *
- * PURPOSE:  Extensions routines for OpenIB uCMA provider
- *
- * $Id: $
- *
- **********************************************************************/
-
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-#include "dapl_evd_util.h"
-#include "dapl_ib_util.h"
-#include "dapl_ep_util.h"
-#include "dapl_cookie.h"
-#include <stdarg.h>
-
-DAT_RETURN
-dapli_post_ext(IN DAT_EP_HANDLE ep_handle,
-              IN DAT_UINT64 cmp_add,
-              IN DAT_UINT64 swap,
-              IN DAT_UINT32 immed_data,
-              IN DAT_COUNT segments,
-              IN DAT_LMR_TRIPLET * local_iov,
-              IN DAT_DTO_COOKIE user_cookie,
-              IN const DAT_RMR_TRIPLET * remote_iov,
-              IN int op_type, IN DAT_COMPLETION_FLAGS flags);
-
-/*
- * dapl_extensions
- *
- * Process extension requests
- *
- * Input:
- *     ext_type,
- *     ...
- *
- * Output:
- *     Depends....
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_NOT_IMPLEMENTED
- *      .....
- *
- */
-DAT_RETURN
-dapl_extensions(IN DAT_HANDLE dat_handle,
-               IN DAT_EXTENDED_OP ext_op, IN va_list args)
-{
-       DAT_EP_HANDLE ep;
-       DAT_LMR_TRIPLET *lmr_p;
-       DAT_DTO_COOKIE cookie;
-       const DAT_RMR_TRIPLET *rmr_p;
-       DAT_UINT64 dat_uint64a, dat_uint64b;
-       DAT_UINT32 dat_uint32;
-       DAT_COUNT segments = 1;
-       DAT_COMPLETION_FLAGS comp_flags;
-       DAT_RETURN status = DAT_NOT_IMPLEMENTED;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_API,
-                    "dapl_extensions(hdl %p operation %d, ...)\n",
-                    dat_handle, ext_op);
-
-       switch ((int)ext_op) {
-
-       case DAT_IB_RDMA_WRITE_IMMED_OP:
-               dapl_dbg_log(DAPL_DBG_TYPE_RTN,
-                            " WRITE_IMMED_DATA extension call\n");
-
-               ep = dat_handle;        /* ep_handle */
-               segments = va_arg(args, DAT_COUNT);     /* num segments */
-               lmr_p = va_arg(args, DAT_LMR_TRIPLET *);
-               cookie = va_arg(args, DAT_DTO_COOKIE);
-               rmr_p = va_arg(args, const DAT_RMR_TRIPLET *);
-               dat_uint32 = va_arg(args, DAT_UINT32);  /* immed data */
-               comp_flags = va_arg(args, DAT_COMPLETION_FLAGS);
-
-               status = dapli_post_ext(ep, 0, 0, dat_uint32, segments, lmr_p,
-                                       cookie, rmr_p, OP_RDMA_WRITE_IMM,
-                                       comp_flags);
-               break;
-
-       case DAT_IB_CMP_AND_SWAP_OP:
-               dapl_dbg_log(DAPL_DBG_TYPE_RTN,
-                            " CMP_AND_SWAP extension call\n");
-
-               ep = dat_handle;        /* ep_handle */
-               dat_uint64a = va_arg(args, DAT_UINT64); /* cmp_value */
-               dat_uint64b = va_arg(args, DAT_UINT64); /* swap_value */
-               lmr_p = va_arg(args, DAT_LMR_TRIPLET *);
-               cookie = va_arg(args, DAT_DTO_COOKIE);
-               rmr_p = va_arg(args, const DAT_RMR_TRIPLET *);
-               comp_flags = va_arg(args, DAT_COMPLETION_FLAGS);
-
-               status = dapli_post_ext(ep, dat_uint64a, dat_uint64b,
-                                       0, segments, lmr_p, cookie, rmr_p,
-                                       OP_COMP_AND_SWAP, comp_flags);
-               break;
-
-       case DAT_IB_FETCH_AND_ADD_OP:
-               dapl_dbg_log(DAPL_DBG_TYPE_RTN,
-                            " FETCH_AND_ADD extension call\n");
-
-               ep = dat_handle;        /* ep_handle */
-               dat_uint64a = va_arg(args, DAT_UINT64); /* add value */
-               lmr_p = va_arg(args, DAT_LMR_TRIPLET *);
-               cookie = va_arg(args, DAT_DTO_COOKIE);
-               rmr_p = va_arg(args, const DAT_RMR_TRIPLET *);
-               comp_flags = va_arg(args, DAT_COMPLETION_FLAGS);
-
-               status = dapli_post_ext(ep, dat_uint64a, 0, 0, segments,
-                                       lmr_p, cookie, rmr_p,
-                                       OP_FETCH_AND_ADD, comp_flags);
-
-               break;
-
-#ifdef DAPL_COUNTERS
-       case DAT_QUERY_COUNTERS_OP:
-               {
-                       int cntr, reset;
-                       DAT_UINT64 *p_cntr_out;
-
-                       dapl_dbg_log(DAPL_DBG_TYPE_RTN,
-                                    " Query counter extension call\n");
-
-                       cntr = va_arg(args, int);
-                       p_cntr_out = va_arg(args, DAT_UINT64 *);
-                       reset = va_arg(args, int);
-
-                       status = dapl_query_counter(dat_handle, cntr,
-                                                   p_cntr_out, reset);
-                       break;
-               }
-       case DAT_PRINT_COUNTERS_OP:
-               {
-                       int cntr, reset;
-
-                       dapl_dbg_log(DAPL_DBG_TYPE_RTN,
-                                    " Print counter extension call\n");
-
-                       cntr = va_arg(args, int);
-                       reset = va_arg(args, int);
-
-                       dapl_print_counter(dat_handle, cntr, reset);
-                       status = DAT_SUCCESS;
-                       break;
-               }
-#endif                         /* DAPL_COUNTERS */
-
-       default:
-               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                            "unsupported extension(%d)\n", (int)ext_op);
-       }
-
-       return (status);
-}
-
-DAT_RETURN
-dapli_post_ext(IN DAT_EP_HANDLE ep_handle,
-              IN DAT_UINT64 cmp_add,
-              IN DAT_UINT64 swap,
-              IN DAT_UINT32 immed_data,
-              IN DAT_COUNT segments,
-              IN DAT_LMR_TRIPLET * local_iov,
-              IN DAT_DTO_COOKIE user_cookie,
-              IN const DAT_RMR_TRIPLET * remote_iov,
-              IN int op_type, IN DAT_COMPLETION_FLAGS flags)
-{
-       DAPL_EP *ep_ptr;
-       ib_qp_handle_t qp_ptr;
-       DAPL_COOKIE *cookie = NULL;
-       DAT_RETURN dat_status = DAT_SUCCESS;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_API,
-                    " post_ext_op: ep %p cmp_val %d "
-                    "swap_val %d cookie 0x%x, r_iov %p, flags 0x%x\n",
-                    ep_handle, (unsigned)cmp_add, (unsigned)swap,
-                    (unsigned)user_cookie.as_64, remote_iov, flags);
-
-       if (DAPL_BAD_HANDLE(ep_handle, DAPL_MAGIC_EP))
-               return (DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP));
-
-       ep_ptr = (DAPL_EP *) ep_handle;
-       qp_ptr = ep_ptr->qp_handle;
-
-       /*
-        * Synchronization ok since this buffer is only used for send
-        * requests, which aren't allowed to race with each other.
-        */
-       dat_status = dapls_dto_cookie_alloc(&ep_ptr->req_buffer,
-                                           DAPL_DTO_TYPE_EXTENSION,
-                                           user_cookie, &cookie);
-       if (dat_status != DAT_SUCCESS)
-               goto bail;
-
-       /*
-        * Take reference before posting to avoid race conditions with
-        * completions
-        */
-       dapl_os_atomic_inc(&ep_ptr->req_count);
-
-       /*
-        * Invoke provider specific routine to post DTO
-        */
-       dat_status = dapls_ib_post_ext_send(ep_ptr, op_type, cookie, segments,  /* data segments */
-                                           local_iov, remote_iov, immed_data,  /* immed data */
-                                           cmp_add,    /* compare or add */
-                                           swap,       /* swap */
-                                           flags);
-
-       if (dat_status != DAT_SUCCESS) {
-               dapl_os_atomic_dec(&ep_ptr->req_count);
-               dapls_cookie_dealloc(&ep_ptr->req_buffer, cookie);
-       }
-
-      bail:
-       return dat_status;
-
-}
-
-/* 
- * New provider routine to process extended DTO events 
- */
-void
-dapls_cqe_to_event_extension(IN DAPL_EP * ep_ptr,
-                            IN DAPL_COOKIE * cookie,
-                            IN ib_work_completion_t * cqe_ptr,
-                            IN DAT_EVENT * event_ptr)
-{
-       uint32_t ibtype;
-       DAT_DTO_COMPLETION_EVENT_DATA *dto =
-           &event_ptr->event_data.dto_completion_event_data;
-       DAT_IB_EXTENSION_EVENT_DATA *ext_data = (DAT_IB_EXTENSION_EVENT_DATA *)
-           & event_ptr->event_extension_data[0];
-       DAT_DTO_COMPLETION_STATUS dto_status;
-
-       /* Get status from cqe */
-       dto_status = dapls_ib_get_dto_status(cqe_ptr);
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EVD,
-                    " cqe_to_event_ext: dto_ptr %p ext_ptr %p status %d\n",
-                    dto, ext_data, dto_status);
-
-       event_ptr->event_number = DAT_IB_DTO_EVENT;
-       dto->ep_handle = cookie->ep;
-       dto->user_cookie = cookie->val.dto.cookie;
-       dto->operation = DAPL_GET_CQE_DTOS_OPTYPE(cqe_ptr);     /* new for 2.0 */
-       dto->status = ext_data->status = dto_status;
-
-       if (dto_status != DAT_DTO_SUCCESS)
-               return;
-
-       /* 
-        * Get operation type from CQ work completion entry and
-        * if extented operation then set extended event data
-        */
-       ibtype = DAPL_GET_CQE_OPTYPE(cqe_ptr);
-
-       switch (ibtype) {
-
-       case OP_RDMA_WRITE_IMM:
-               dapl_dbg_log(DAPL_DBG_TYPE_EVD,
-                            " cqe_to_event_ext: OP_RDMA_WRITE_IMMED\n");
-
-               /* type and outbound rdma write transfer size */
-               dto->transfered_length = cookie->val.dto.size;
-               ext_data->type = DAT_IB_RDMA_WRITE_IMMED;
-               break;
-       case OP_RECEIVE_IMM:
-               dapl_dbg_log(DAPL_DBG_TYPE_EVD,
-                            " cqe_to_event_ext: OP_RECEIVE_RDMA_IMMED\n");
-
-               /* immed recvd, type and inbound rdma write transfer size */
-               dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
-               ext_data->type = DAT_IB_RDMA_WRITE_IMMED_DATA;
-               ext_data->val.immed.data = DAPL_GET_CQE_IMMED_DATA(cqe_ptr);
-               break;
-       case OP_COMP_AND_SWAP:
-               dapl_dbg_log(DAPL_DBG_TYPE_EVD,
-                            " cqe_to_event_ext: COMP_AND_SWAP_RESP\n");
-
-               /* original data is returned in LMR provided with post */
-               ext_data->type = DAT_IB_CMP_AND_SWAP;
-               dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
-               break;
-       case OP_FETCH_AND_ADD:
-               dapl_dbg_log(DAPL_DBG_TYPE_EVD,
-                            " cqe_to_event_ext: FETCH_AND_ADD_RESP\n");
-
-               /* original data is returned in LMR provided with post */
-               ext_data->type = DAT_IB_FETCH_AND_ADD;
-               dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
-               break;
-       default:
-               /* not extended operation */
-               ext_data->status = DAT_IB_OP_ERR;
-               dto->status = DAT_DTO_ERR_TRANSPORT;
-               break;
-       }
-}
diff --git a/dapl/openib_cma/dapl_ib_mem.c b/dapl/openib_cma/dapl_ib_mem.c
deleted file mode 100755 (executable)
index 7e73044..0000000
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * Copyright (c) 2005-2007 Intel Corporation.  All rights reserved.
- *
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- *    copy of which is available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/**********************************************************************
- * 
- * MODULE: dapl_ib_mem.c
- *
- * PURPOSE: Memory windows, registration, and protection domain 
- *
- * $Id:$
- *
- **********************************************************************/
-
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-#include "dapl_lmr_util.h"
-
-/*
- * dapls_convert_privileges
- *
- * Convert LMR privileges to provider  
- *
- * Input:
- *     DAT_MEM_PRIV_FLAGS
- *
- * Output:
- *     none
- *
- * Returns:
- *     ibv_access_flags
- *
- */
-STATIC _INLINE_ int dapls_convert_privileges(IN DAT_MEM_PRIV_FLAGS privileges)
-{
-       int access = 0;
-
-       /*
-        * if (DAT_MEM_PRIV_LOCAL_READ_FLAG & privileges) do nothing
-        */
-       if (DAT_MEM_PRIV_LOCAL_WRITE_FLAG & privileges)
-               access |= IBV_ACCESS_LOCAL_WRITE;
-       if (DAT_MEM_PRIV_REMOTE_WRITE_FLAG & privileges)
-               access |= IBV_ACCESS_REMOTE_WRITE;
-       if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
-               access |= IBV_ACCESS_REMOTE_READ;
-       if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
-               access |= IBV_ACCESS_REMOTE_READ;
-       if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
-               access |= IBV_ACCESS_REMOTE_READ;
-#ifdef DAT_EXTENSIONS
-       if (DAT_IB_MEM_PRIV_REMOTE_ATOMIC & privileges)
-               access |= IBV_ACCESS_REMOTE_ATOMIC;
-#endif
-
-       return access;
-}
-
-/*
- * dapl_ib_pd_alloc
- *
- * Alloc a PD
- *
- * Input:
- *     ia_handle       IA handle
- *     pz              pointer to PZ struct
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN dapls_ib_pd_alloc(IN DAPL_IA * ia_ptr, IN DAPL_PZ * pz)
-{
-       /* get a protection domain */
-       pz->pd_handle = ibv_alloc_pd(ia_ptr->hca_ptr->ib_hca_handle);
-       if (!pz->pd_handle)
-               return (dapl_convert_errno(ENOMEM, "alloc_pd"));
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " pd_alloc: pd_handle=%p\n", pz->pd_handle);
-
-       return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_pd_free
- *
- * Free a PD
- *
- * Input:
- *     ia_handle       IA handle
- *     PZ_ptr          pointer to PZ struct
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *      DAT_INVALID_STATE
- *
- */
-DAT_RETURN dapls_ib_pd_free(IN DAPL_PZ * pz)
-{
-       if (pz->pd_handle != IB_INVALID_HANDLE) {
-               if (ibv_dealloc_pd(pz->pd_handle))
-                       return (dapl_convert_errno(errno, "dealloc_pd"));
-               pz->pd_handle = IB_INVALID_HANDLE;
-       }
-       return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_mr_register
- *
- * Register a virtual memory region
- *
- * Input:
- *     ia_handle       IA handle
- *     lmr             pointer to dapl_lmr struct
- *     virt_addr       virtual address of beginning of mem region
- *     length          length of memory region
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN
-dapls_ib_mr_register(IN DAPL_IA * ia_ptr,
-                    IN DAPL_LMR * lmr,
-                    IN DAT_PVOID virt_addr,
-                    IN DAT_VLEN length,
-                    IN DAT_MEM_PRIV_FLAGS privileges, IN DAT_VA_TYPE va_type)
-{
-       ib_pd_handle_t ib_pd_handle;
-
-       ib_pd_handle = ((DAPL_PZ *) lmr->param.pz_handle)->pd_handle;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " mr_register: ia=%p, lmr=%p va=%p ln=%d pv=0x%x\n",
-                    ia_ptr, lmr, virt_addr, length, privileges);
-
-       /* TODO: shared memory */
-       if (lmr->param.mem_type == DAT_MEM_TYPE_SHARED_VIRTUAL) {
-               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                            " mr_register_shared: NOT IMPLEMENTED\n");
-               return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
-       }
-
-       /* IB verbs does not support */
-       if (va_type == DAT_VA_TYPE_ZB) {
-               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                            " va_type == DAT_VA_TYPE_ZB: NOT SUPPORTED\n");
-               return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
-       }
-
-       /* local read is default on IB */
-       lmr->mr_handle =
-           ibv_reg_mr(((DAPL_PZ *) lmr->param.pz_handle)->pd_handle,
-                      virt_addr, length, dapls_convert_privileges(privileges));
-
-       if (!lmr->mr_handle)
-               return (dapl_convert_errno(ENOMEM, "reg_mr"));
-
-       lmr->param.lmr_context = lmr->mr_handle->lkey;
-       lmr->param.rmr_context = lmr->mr_handle->rkey;
-       lmr->param.registered_size = length;
-       lmr->param.registered_address = (DAT_VADDR) (uintptr_t) virt_addr;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " mr_register: mr=%p addr=%p pd %p ctx %p "
-                    "lkey=0x%x rkey=0x%x priv=%x\n",
-                    lmr->mr_handle, lmr->mr_handle->addr,
-                    lmr->mr_handle->pd, lmr->mr_handle->context,
-                    lmr->mr_handle->lkey, lmr->mr_handle->rkey,
-                    length, dapls_convert_privileges(privileges));
-
-       return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_mr_deregister
- *
- * Free a memory region
- *
- * Input:
- *     lmr                     pointer to dapl_lmr struct
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INVALID_STATE
- *
- */
-DAT_RETURN dapls_ib_mr_deregister(IN DAPL_LMR * lmr)
-{
-       if (lmr->mr_handle != IB_INVALID_HANDLE) {
-               if (ibv_dereg_mr(lmr->mr_handle))
-                       return (dapl_convert_errno(errno, "dereg_pd"));
-               lmr->mr_handle = IB_INVALID_HANDLE;
-       }
-       return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_mr_register_shared
- *
- * Register a virtual memory region
- *
- * Input:
- *     ia_ptr          IA handle
- *     lmr             pointer to dapl_lmr struct
- *     virt_addr       virtual address of beginning of mem region
- *     length          length of memory region
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN
-dapls_ib_mr_register_shared(IN DAPL_IA * ia_ptr,
-                           IN DAPL_LMR * lmr,
-                           IN DAT_MEM_PRIV_FLAGS privileges,
-                           IN DAT_VA_TYPE va_type)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                    " mr_register_shared: NOT IMPLEMENTED\n");
-
-       return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
-}
-
-/*
- * dapls_ib_mw_alloc
- *
- * Bind a protection domain to a memory window
- *
- * Input:
- *     rmr     Initialized rmr to hold binding handles
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN dapls_ib_mw_alloc(IN DAPL_RMR * rmr)
-{
-
-       dapl_dbg_log(DAPL_DBG_TYPE_ERR, " mw_alloc: NOT IMPLEMENTED\n");
-
-       return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
-}
-
-/*
- * dapls_ib_mw_free
- *
- * Release bindings of a protection domain to a memory window
- *
- * Input:
- *     rmr     Initialized rmr to hold binding handles
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INVALID_STATE
- *
- */
-DAT_RETURN dapls_ib_mw_free(IN DAPL_RMR * rmr)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_ERR, " mw_free: NOT IMPLEMENTED\n");
-
-       return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
-}
-
-/*
- * dapls_ib_mw_bind
- *
- * Bind a protection domain to a memory window
- *
- * Input:
- *     rmr     Initialized rmr to hold binding handles
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INVALID_PARAMETER;
- *     DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN
-dapls_ib_mw_bind(IN DAPL_RMR * rmr,
-                IN DAPL_LMR * lmr,
-                IN DAPL_EP * ep,
-                IN DAPL_COOKIE * cookie,
-                IN DAT_VADDR virtual_address,
-                IN DAT_VLEN length,
-                IN DAT_MEM_PRIV_FLAGS mem_priv, IN DAT_BOOLEAN is_signaled)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_ERR, " mw_bind: NOT IMPLEMENTED\n");
-
-       return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
-}
-
-/*
- * dapls_ib_mw_unbind
- *
- * Unbind a protection domain from a memory window
- *
- * Input:
- *     rmr     Initialized rmr to hold binding handles
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INVALID_PARAMETER;
- *     DAT_INVALID_STATE;
- *     DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN
-dapls_ib_mw_unbind(IN DAPL_RMR * rmr,
-                  IN DAPL_EP * ep,
-                  IN DAPL_COOKIE * cookie, IN DAT_BOOLEAN is_signaled)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_ERR, " mw_unbind: NOT IMPLEMENTED\n");
-
-       return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
-}
-
-/*
- * Local variables:
- *  c-indent-level: 4
- *  c-basic-offset: 4
- *  tab-width: 8
- * End:
- */
diff --git a/dapl/openib_cma/dapl_ib_qp.c b/dapl/openib_cma/dapl_ib_qp.c
deleted file mode 100755 (executable)
index c9a61c3..0000000
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
- * Copyright (c) 2005-2007 Intel Corporation.  All rights reserved.
- *
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- *    copy of which is available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/**********************************************************************
- *
- * MODULE: dapl_ib_qp.c
- *
- * PURPOSE: QP routines for access to OFED IB Verbs
- *
- * $Id: $
- *
- **********************************************************************/
-
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-
-extern struct rdma_event_channel *g_cm_events;
-
-/*
- * dapl_ib_qp_alloc
- *
- * Alloc a QP
- *
- * Input:
- *     *ep_ptr         pointer to EP INFO
- *     ib_hca_handle   provider HCA handle
- *     ib_pd_handle    provider protection domain handle
- *     cq_recv         provider recv CQ handle
- *     cq_send         provider send CQ handle
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INSUFFICIENT_RESOURCES
- *     DAT_INTERNAL_ERROR
- *
- */
-DAT_RETURN dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr,
-                            IN DAPL_EP * ep_ptr, IN DAPL_EP * ep_ctx_ptr)
-{
-       DAT_EP_ATTR *attr;
-       DAPL_EVD *rcv_evd, *req_evd;
-       ib_cq_handle_t rcv_cq, req_cq;
-       ib_pd_handle_t ib_pd_handle;
-       struct ibv_qp_init_attr qp_create;
-       dp_ib_cm_handle_t conn;
-       struct rdma_cm_id *cm_id;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    " qp_alloc: ia_ptr %p ep_ptr %p ep_ctx_ptr %p\n",
-                    ia_ptr, ep_ptr, ep_ctx_ptr);
-
-       attr = &ep_ptr->param.ep_attr;
-       ib_pd_handle = ((DAPL_PZ *) ep_ptr->param.pz_handle)->pd_handle;
-       rcv_evd = (DAPL_EVD *) ep_ptr->param.recv_evd_handle;
-       req_evd = (DAPL_EVD *) ep_ptr->param.request_evd_handle;
-
-       /* 
-        * DAT allows usage model of EP's with no EVD's but IB does not. 
-        * Create a CQ with zero entries under the covers to support and 
-        * catch any invalid posting. 
-        */
-       if (rcv_evd != DAT_HANDLE_NULL)
-               rcv_cq = rcv_evd->ib_cq_handle;
-       else if (!ia_ptr->hca_ptr->ib_trans.ib_cq_empty)
-               rcv_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;
-       else {
-               struct ibv_comp_channel *channel =
-                   ia_ptr->hca_ptr->ib_trans.ib_cq;
-#ifdef CQ_WAIT_OBJECT
-               if (rcv_evd->cq_wait_obj_handle)
-                       channel = rcv_evd->cq_wait_obj_handle->events;
-#endif
-               /* Call IB verbs to create CQ */
-               rcv_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
-                                      0, NULL, channel, 0);
-
-               if (rcv_cq == IB_INVALID_HANDLE)
-                       return (dapl_convert_errno(ENOMEM, "create_cq"));
-
-               ia_ptr->hca_ptr->ib_trans.ib_cq_empty = rcv_cq;
-       }
-       if (req_evd != DAT_HANDLE_NULL)
-               req_cq = req_evd->ib_cq_handle;
-       else
-               req_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;
-
-       /* 
-        * IMPLEMENTATION NOTE:
-        * uDAPL allows consumers to post buffers on the EP after creation
-        * and before a connect request (outbound and inbound). This forces
-        * a binding to a device during the hca_open call and requires the
-        * consumer to predetermine which device to listen on or connect from.
-        * This restriction eliminates any option of listening or connecting 
-        * over multiple devices. uDAPL should add API's to resolve addresses 
-        * and bind to the device at the approriate time (before connect 
-        * and after CR arrives). Discovery should happen at connection time 
-        * based on addressing and not on static configuration during open.
-        */
-
-       /* Allocate CM and initialize lock */
-       if ((conn = dapl_os_alloc(sizeof(*conn))) == NULL)
-               return (dapl_convert_errno(ENOMEM, "create_cq"));
-
-       dapl_os_memzero(conn, sizeof(*conn));
-       dapl_os_lock_init(&conn->lock);
-
-       /* create CM_ID, bind to local device, create QP */
-       if (rdma_create_id(g_cm_events, &cm_id, (void *)conn, RDMA_PS_TCP)) {
-               dapl_os_free(conn, sizeof(*conn));
-               return (dapl_convert_errno(errno, "create_qp"));
-       }
-
-       /* open identifies the local device; per DAT specification */
-       if (rdma_bind_addr(cm_id,
-                          (struct sockaddr *)&ia_ptr->hca_ptr->hca_address))
-               goto bail;
-
-       /* Setup attributes and create qp */
-       dapl_os_memzero((void *)&qp_create, sizeof(qp_create));
-       qp_create.cap.max_send_wr = attr->max_request_dtos;
-       qp_create.cap.max_send_sge = attr->max_request_iov;
-       qp_create.cap.max_inline_data =
-           ia_ptr->hca_ptr->ib_trans.max_inline_send;
-       qp_create.send_cq = req_cq;
-
-       /* ibv assumes rcv_cq is never NULL, set to req_cq */
-       if (rcv_cq == NULL) {
-               qp_create.recv_cq = req_cq;
-               qp_create.cap.max_recv_wr = 0;
-               qp_create.cap.max_recv_sge = 0;
-       } else {
-               qp_create.recv_cq = rcv_cq;
-               qp_create.cap.max_recv_wr = attr->max_recv_dtos;
-               qp_create.cap.max_recv_sge = attr->max_recv_iov;
-       }
-       qp_create.qp_type = IBV_QPT_RC;
-       qp_create.qp_context = (void *)ep_ptr;
-
-       /* Let uCMA transition QP states */
-       if (rdma_create_qp(cm_id, ib_pd_handle, &qp_create))
-               goto bail;
-
-       conn->cm_id = cm_id;
-       conn->ep = ep_ptr;
-       conn->hca = ia_ptr->hca_ptr;
-
-       /* setup timers for address and route resolution */
-       conn->arp_timeout = dapl_os_get_env_val("DAPL_CM_ARP_TIMEOUT_MS",
-                                               IB_ARP_TIMEOUT);
-       conn->arp_retries = dapl_os_get_env_val("DAPL_CM_ARP_RETRY_COUNT",
-                                               IB_ARP_RETRY_COUNT);
-       conn->route_timeout = dapl_os_get_env_val("DAPL_CM_ROUTE_TIMEOUT_MS",
-                                                 IB_ROUTE_TIMEOUT);
-       conn->route_retries = dapl_os_get_env_val("DAPL_CM_ROUTE_RETRY_COUNT",
-                                                 IB_ROUTE_RETRY_COUNT);
-
-       /* setup up ep->param to reference the bound local address and port */
-       ep_ptr->param.local_ia_address_ptr = &cm_id->route.addr.src_addr;
-       ep_ptr->param.local_port_qual = rdma_get_src_port(cm_id);
-
-       ep_ptr->qp_handle = conn;
-       ep_ptr->qp_state = IB_QP_STATE_INIT;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    " qp_alloc: qpn %p sq %d,%d rq %d,%d port=%d\n",
-                    ep_ptr->qp_handle->cm_id->qp->qp_num,
-                    qp_create.cap.max_send_wr, qp_create.cap.max_send_sge,
-                    qp_create.cap.max_recv_wr, qp_create.cap.max_recv_sge,
-                    ep_ptr->param.local_port_qual);
-
-       return DAT_SUCCESS;
-      bail:
-       rdma_destroy_id(cm_id);
-       dapl_os_free(conn, sizeof(*conn));
-       return (dapl_convert_errno(errno, "create_qp"));
-}
-
-/*
- * dapl_ib_qp_free
- *
- * Free a QP
- *
- * Input:
- *     ia_handle       IA handle
- *     *ep_ptr         pointer to EP INFO
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *  dapl_convert_errno
- *
- */
-DAT_RETURN dapls_ib_qp_free(IN DAPL_IA * ia_ptr, IN DAPL_EP * ep_ptr)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_EP, " qp_free:  ep_ptr %p qp %p\n",
-                    ep_ptr, ep_ptr->qp_handle);
-
-       if (ep_ptr->qp_handle != IB_INVALID_HANDLE) {
-               /* qp_handle is conn object with reference to cm_id and qp */
-               dapli_destroy_conn(ep_ptr->qp_handle);
-               ep_ptr->qp_handle = IB_INVALID_HANDLE;
-               ep_ptr->qp_state = IB_QP_STATE_ERROR;
-       }
-       return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_qp_modify
- *
- * Set the QP to the parameters specified in an EP_PARAM
- *
- * The EP_PARAM structure that is provided has been
- * sanitized such that only non-zero values are valid.
- *
- * Input:
- *     ib_hca_handle           HCA handle
- *     qp_handle               QP handle
- *     ep_attr                 Sanitized EP Params
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INSUFFICIENT_RESOURCES
- *     DAT_INVALID_PARAMETER
- *
- */
-DAT_RETURN dapls_ib_qp_modify(IN DAPL_IA * ia_ptr,
-                             IN DAPL_EP * ep_ptr, IN DAT_EP_ATTR * attr)
-{
-       struct ibv_qp_attr qp_attr;
-
-       if (ep_ptr->qp_handle == IB_INVALID_HANDLE)
-               return DAT_INVALID_PARAMETER;
-
-       /*
-        * Check if we have the right qp_state to modify attributes
-        */
-       if ((ep_ptr->qp_handle->cm_id->qp->state != IBV_QPS_RTR) &&
-           (ep_ptr->qp_handle->cm_id->qp->state != IBV_QPS_RTS))
-               return DAT_INVALID_STATE;
-
-       /* Adjust to current EP attributes */
-       dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
-       qp_attr.cap.max_send_wr = attr->max_request_dtos;
-       qp_attr.cap.max_recv_wr = attr->max_recv_dtos;
-       qp_attr.cap.max_send_sge = attr->max_request_iov;
-       qp_attr.cap.max_recv_sge = attr->max_recv_iov;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    "modify_qp: qp %p sq %d,%d, rq %d,%d\n",
-                    ep_ptr->qp_handle->cm_id->qp,
-                    qp_attr.cap.max_send_wr, qp_attr.cap.max_send_sge,
-                    qp_attr.cap.max_recv_wr, qp_attr.cap.max_recv_sge);
-
-       if (ibv_modify_qp(ep_ptr->qp_handle->cm_id->qp, &qp_attr, IBV_QP_CAP)) {
-               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                            "modify_qp: modify ep %p qp %p failed\n",
-                            ep_ptr, ep_ptr->qp_handle->cm_id->qp);
-               return (dapl_convert_errno(errno, "modify_qp_state"));
-       }
-
-       return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_reinit_ep
- *
- * Move the QP to INIT state again.
- *
- * Input:
- *     ep_ptr          DAPL_EP
- *
- * Output:
- *     none
- *
- * Returns:
- *     void
- *
- */
-void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr)
-{
-       /* uCMA does not allow reuse of CM_ID, destroy and create new one */
-       if (ep_ptr->qp_handle != IB_INVALID_HANDLE) {
-
-               /* destroy */
-               dapli_destroy_conn(ep_ptr->qp_handle);
-
-               /* create new CM_ID and QP */
-               ep_ptr->qp_handle = IB_INVALID_HANDLE;
-               dapls_ib_qp_alloc(ep_ptr->header.owner_ia, ep_ptr, ep_ptr);
-       }
-}
-
-/*
- * Local variables:
- *  c-indent-level: 4
- *  c-basic-offset: 4
- *  tab-width: 8
- * End:
- */
diff --git a/dapl/openib_cma/dapl_ib_util.c b/dapl/openib_cma/dapl_ib_util.c
deleted file mode 100755 (executable)
index bf23d43..0000000
+++ /dev/null
@@ -1,1134 +0,0 @@
-/*
- * Copyright (c) 2005-2008 Intel Corporation.  All rights reserved.
- *
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- *    copy of which is available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/**********************************************************************
- * 
- * MODULE: dapl_ib_util.c
- *
- * PURPOSE: OFED provider - init, open, close, utilities, work thread
- *
- * $Id:$
- *
- **********************************************************************/
-
-#ifdef RCSID
-static const char rcsid[] = "$Id:  $";
-#endif
-
-#include "openib_osd.h"
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-#include "dapl_ib_util.h"
-#include "dapl_osd.h"
-
-#include <stdlib.h>
-
-int g_dapl_loopback_connection = 0;
-struct rdma_event_channel *g_cm_events = NULL;
-ib_thread_state_t g_ib_thread_state = 0;
-DAPL_OS_THREAD g_ib_thread;
-DAPL_OS_LOCK g_hca_lock;
-struct dapl_llist_entry *g_hca_list;
-
-#if defined(_WIN64) || defined(_WIN32)
-#include "..\..\..\..\..\etc\user\comp_channel.cpp"
-#include "..\..\..\..\..\etc\user\dlist.c"
-#include <rdma\winverbs.h>
-
-struct ibvw_windata windata;
-
-static int getipaddr_netdev(char *name, char *addr, int addr_len)
-{
-       IWVProvider *prov;
-       WV_DEVICE_ADDRESS devaddr;
-       struct addrinfo *res, *ai;
-       HRESULT hr;
-       int index;
-
-       if (strncmp(name, "rdma_dev", 8)) {
-               return EINVAL;
-       }
-
-       index = atoi(name + 8);
-
-       hr = WvGetObject(&IID_IWVProvider, (LPVOID *) &prov);
-       if (FAILED(hr)) {
-               return hr;
-       }
-
-       hr = getaddrinfo("..localmachine", NULL, NULL, &res);
-       if (hr) {
-               goto release;
-       }
-
-       for (ai = res; ai; ai = ai->ai_next) {
-               hr = prov->lpVtbl->TranslateAddress(prov, ai->ai_addr, &devaddr);
-               if (SUCCEEDED(hr) && (ai->ai_addrlen <= addr_len) && (index-- == 0)) {
-                       memcpy(addr, ai->ai_addr, ai->ai_addrlen);
-                       goto free;
-               }
-       }
-       hr = ENODEV;
-
-free:
-       freeaddrinfo(res);
-release:
-       prov->lpVtbl->Release(prov);
-       return hr;
-}
-
-static int dapls_os_init(void)
-{
-       return ibvw_get_windata(&windata, IBVW_WINDATA_VERSION);
-}
-
-static void dapls_os_release(void)
-{
-       if (windata.comp_mgr)
-               ibvw_release_windata(&windata, IBVW_WINDATA_VERSION);
-       windata.comp_mgr = NULL;
-}
-
-static int dapls_config_comp_channel(struct ibv_comp_channel *channel)
-{
-       channel->comp_channel.Milliseconds = 0;
-       return 0;
-}
-
-static int dapls_config_cm_channel(struct rdma_event_channel *channel)
-{
-       channel->channel.Milliseconds = 0;
-       return 0;
-}
-
-static int dapls_config_verbs(struct ibv_context *verbs)
-{
-       verbs->channel.Milliseconds = 0;
-       return 0;
-}
-
-static int dapls_thread_signal(void)
-{
-       CompManagerCancel(windata.comp_mgr);
-       return 0;
-}
-#else                          // _WIN64 || WIN32
-int g_ib_pipe[2];
-
-static int dapls_os_init(void)
-{
-       /* create pipe for waking up work thread */
-       return pipe(g_ib_pipe);
-}
-
-static void dapls_os_release(void)
-{
-       /* close pipe? */
-}
-
-/* Get IP address using network device name */
-static int getipaddr_netdev(char *name, char *addr, int addr_len)
-{
-       struct ifreq ifr;
-       int skfd, ret, len;
-
-       /* Fill in the structure */
-       snprintf(ifr.ifr_name, IFNAMSIZ, "%s", name);
-       ifr.ifr_hwaddr.sa_family = ARPHRD_INFINIBAND;
-
-       /* Create a socket fd */
-       skfd = socket(PF_INET, SOCK_STREAM, 0);
-       ret = ioctl(skfd, SIOCGIFADDR, &ifr);
-       if (ret)
-               goto bail;
-
-       switch (ifr.ifr_addr.sa_family) {
-#ifdef AF_INET6
-       case AF_INET6:
-               len = sizeof(struct sockaddr_in6);
-               break;
-#endif
-       case AF_INET:
-       default:
-               len = sizeof(struct sockaddr);
-               break;
-       }
-
-       if (len <= addr_len)
-               memcpy(addr, &ifr.ifr_addr, len);
-       else
-               ret = EINVAL;
-
-      bail:
-       close(skfd);
-       return ret;
-}
-
-static int dapls_config_fd(int fd)
-{
-       int opts;
-
-       opts = fcntl(fd, F_GETFL);
-       if (opts < 0 || fcntl(fd, F_SETFL, opts | O_NONBLOCK) < 0) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " dapls_config_fd: fcntl on fd %d ERR %d %s\n",
-                        fd, opts, strerror(errno));
-               return errno;
-       }
-
-       return 0;
-}
-
-static int dapls_config_comp_channel(struct ibv_comp_channel *channel)
-{
-       return dapls_config_fd(channel->fd);
-}
-
-static int dapls_config_cm_channel(struct rdma_event_channel *channel)
-{
-       return dapls_config_fd(channel->fd);
-}
-
-static int dapls_config_verbs(struct ibv_context *verbs)
-{
-       return dapls_config_fd(verbs->async_fd);
-}
-
-static int dapls_thread_signal(void)
-{
-       return write(g_ib_pipe[1], "w", sizeof "w");
-}
-#endif
-
-/* Get IP address using network name, address, or device name */
-static int getipaddr(char *name, char *addr, int len)
-{
-       struct addrinfo *res;
-
-       /* assume netdev for first attempt, then network and address type */
-       if (getipaddr_netdev(name, addr, len)) {
-               if (getaddrinfo(name, NULL, NULL, &res)) {
-                       dapl_log(DAPL_DBG_TYPE_ERR,
-                                " open_hca: getaddr_netdev ERROR:"
-                                " %s. Is %s configured?\n",
-                                strerror(errno), name);
-                       return 1;
-               } else {
-                       if (len >= res->ai_addrlen)
-                               memcpy(addr, res->ai_addr, res->ai_addrlen);
-                       else {
-                               freeaddrinfo(res);
-                               return 1;
-                       }
-                       freeaddrinfo(res);
-               }
-       }
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " getipaddr: family %d port %d addr %d.%d.%d.%d\n",
-                    ((struct sockaddr_in *)addr)->sin_family,
-                    ((struct sockaddr_in *)addr)->sin_port,
-                    ((struct sockaddr_in *)addr)->sin_addr.s_addr >> 0 & 0xff,
-                    ((struct sockaddr_in *)addr)->sin_addr.s_addr >> 8 & 0xff,
-                    ((struct sockaddr_in *)addr)->sin_addr.s_addr >> 16 & 0xff,
-                    ((struct sockaddr_in *)addr)->sin_addr.
-                    s_addr >> 24 & 0xff);
-
-       return 0;
-}
-
-/*
- * dapls_ib_init, dapls_ib_release
- *
- * Initialize Verb related items for device open
- *
- * Input:
- *     none
- *
- * Output:
- *     none
- *
- * Returns:
- *     0 success, -1 error
- *
- */
-int32_t dapls_ib_init(void)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " dapl_ib_init: \n");
-
-       /* initialize hca_list lock */
-       dapl_os_lock_init(&g_hca_lock);
-
-       /* initialize hca list for CQ events */
-       dapl_llist_init_head(&g_hca_list);
-
-       if (dapls_os_init())
-               return 1;
-
-       return 0;
-}
-
-int32_t dapls_ib_release(void)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " dapl_ib_release: \n");
-       dapli_ib_thread_destroy();
-       if (g_cm_events != NULL)
-               rdma_destroy_event_channel(g_cm_events);
-       dapls_os_release();
-       return 0;
-}
-
-/*
- * dapls_ib_open_hca
- *
- * Open HCA
- *
- * Input:
- *      *hca_name         pointer to provider device name
- *      *ib_hca_handle_p  pointer to provide HCA handle
- *
- * Output:
- *      none
- *
- * Return:
- *      DAT_SUCCESS
- *      dapl_convert_errno
- *
- */
-DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, IN DAPL_HCA * hca_ptr)
-{
-       struct rdma_cm_id *cm_id = NULL;
-       union ibv_gid *gid;
-       int ret;
-       DAT_RETURN dat_status;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " open_hca: %s - %p\n", hca_name, hca_ptr);
-
-       /* Setup the global cm event channel */
-       dapl_os_lock(&g_hca_lock);
-       if (g_cm_events == NULL) {
-               g_cm_events = rdma_create_event_channel();
-               if (g_cm_events == NULL) {
-                       dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                                    " open_hca: ERR - RDMA channel %s\n",
-                                    strerror(errno));
-                       return DAT_INTERNAL_ERROR;
-               }
-       }
-       dapl_os_unlock(&g_hca_lock);
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " open_hca: RDMA channel created (%p)\n", g_cm_events);
-
-       dat_status = dapli_ib_thread_init();
-       if (dat_status != DAT_SUCCESS)
-               return dat_status;
-
-       /* HCA name will be hostname or IP address */
-       if (getipaddr((char *)hca_name,
-                     (char *)&hca_ptr->hca_address, sizeof(DAT_SOCK_ADDR6)))
-               return DAT_INVALID_ADDRESS;
-
-       /* cm_id will bind local device/GID based on IP address */
-       if (rdma_create_id(g_cm_events, &cm_id, (void *)hca_ptr, RDMA_PS_TCP)) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " open_hca: rdma_create_id ERR %s\n", strerror(errno));
-               return DAT_INTERNAL_ERROR;
-       }
-       ret = rdma_bind_addr(cm_id, (struct sockaddr *)&hca_ptr->hca_address);
-       if ((ret) || (cm_id->verbs == NULL)) {
-               rdma_destroy_id(cm_id);
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " open_hca: rdma_bind ERR %s."
-                        " Is %s configured?\n", strerror(errno), hca_name);
-               return DAT_INVALID_ADDRESS;
-       }
-
-       /* keep reference to IB device and cm_id */
-       hca_ptr->ib_trans.cm_id = cm_id;
-       hca_ptr->ib_hca_handle = cm_id->verbs;
-       dapls_config_verbs(cm_id->verbs);
-       hca_ptr->port_num = cm_id->port_num;
-       gid = &cm_id->route.addr.addr.ibaddr.sgid;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " open_hca: ctx=%p port=%d GID subnet %016llx id %016llx\n",
-                    cm_id->verbs, cm_id->port_num,
-                    (unsigned long long)ntohll(gid->global.subnet_prefix),
-                    (unsigned long long)ntohll(gid->global.interface_id));
-
-       /* set inline max with env or default, get local lid and gid 0 */
-       if (hca_ptr->ib_hca_handle->device->transport_type
-           == IBV_TRANSPORT_IWARP)
-               hca_ptr->ib_trans.max_inline_send =
-                   dapl_os_get_env_val("DAPL_MAX_INLINE",
-                                       INLINE_SEND_IWARP_DEFAULT);
-       else
-               hca_ptr->ib_trans.max_inline_send =
-                   dapl_os_get_env_val("DAPL_MAX_INLINE",
-                                       INLINE_SEND_IB_DEFAULT);
-
-       /* set CM timer defaults */
-       hca_ptr->ib_trans.max_cm_timeout =
-           dapl_os_get_env_val("DAPL_MAX_CM_RESPONSE_TIME",
-                               IB_CM_RESPONSE_TIMEOUT);
-       hca_ptr->ib_trans.max_cm_retries =
-           dapl_os_get_env_val("DAPL_MAX_CM_RETRIES", IB_CM_RETRIES);
-
-       /* EVD events without direct CQ channels, non-blocking */
-       hca_ptr->ib_trans.ib_cq =
-           ibv_create_comp_channel(hca_ptr->ib_hca_handle);
-       if (hca_ptr->ib_trans.ib_cq == NULL) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " open_hca: ibv_create_comp_channel ERR %s\n",
-                        strerror(errno));
-               goto bail;
-       }
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " open_hca: CQ channel created\n");
-
-       if (dapls_config_comp_channel(hca_ptr->ib_trans.ib_cq)) {
-               goto bail;
-       }
-
-       /* 
-        * Put new hca_transport on list for async and CQ event processing 
-        * Wakeup work thread to add to polling list
-        */
-       dapl_llist_init_entry((DAPL_LLIST_ENTRY *) & hca_ptr->ib_trans.entry);
-       dapl_os_lock(&g_hca_lock);
-       dapl_llist_add_tail(&g_hca_list,
-                           (DAPL_LLIST_ENTRY *) & hca_ptr->ib_trans.entry,
-                           &hca_ptr->ib_trans.entry);
-       if (dapls_thread_signal() == -1)
-               dapl_log(DAPL_DBG_TYPE_UTIL,
-                        " open_hca: thread wakeup error = %s\n",
-                        strerror(errno));
-       dapl_os_unlock(&g_hca_lock);
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " open_hca: %s, %s %d.%d.%d.%d INLINE_MAX=%d\n", hca_name,
-                    ((struct sockaddr_in *)
-                     &hca_ptr->hca_address)->sin_family == AF_INET ?
-                    "AF_INET" : "AF_INET6", ((struct sockaddr_in *)
-                                             &hca_ptr->hca_address)->sin_addr.
-                    s_addr >> 0 & 0xff, ((struct sockaddr_in *)
-                                         &hca_ptr->hca_address)->sin_addr.
-                    s_addr >> 8 & 0xff, ((struct sockaddr_in *)
-                                         &hca_ptr->hca_address)->sin_addr.
-                    s_addr >> 16 & 0xff, ((struct sockaddr_in *)
-                                          &hca_ptr->hca_address)->sin_addr.
-                    s_addr >> 24 & 0xff, hca_ptr->ib_trans.max_inline_send);
-
-       hca_ptr->ib_trans.d_hca = hca_ptr;
-       return DAT_SUCCESS;
-      bail:
-       rdma_destroy_id(hca_ptr->ib_trans.cm_id);
-       hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
-       return DAT_INTERNAL_ERROR;
-}
-
-/*
- * dapls_ib_close_hca
- *
- * Open HCA
- *
- * Input:
- *      DAPL_HCA   provide CA handle
- *
- * Output:
- *      none
- *
- * Return:
- *      DAT_SUCCESS
- *     dapl_convert_errno 
- *
- */
-DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA * hca_ptr)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " close_hca: %p->%p\n",
-                    hca_ptr, hca_ptr->ib_hca_handle);
-
-       if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) {
-               if (rdma_destroy_id(hca_ptr->ib_trans.cm_id))
-                       return (dapl_convert_errno(errno, "ib_close_device"));
-               hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
-       }
-
-       dapl_os_lock(&g_hca_lock);
-       if (g_ib_thread_state != IB_THREAD_RUN) {
-               dapl_os_unlock(&g_hca_lock);
-               goto bail;
-       }
-       dapl_os_unlock(&g_hca_lock);
-
-       /* 
-        * Remove hca from async and CQ event processing list
-        * Wakeup work thread to remove from polling list
-        */
-       hca_ptr->ib_trans.destroy = 1;
-       if (dapls_thread_signal() == -1)
-               dapl_log(DAPL_DBG_TYPE_UTIL,
-                        " destroy: thread wakeup error = %s\n",
-                        strerror(errno));
-
-       /* wait for thread to remove HCA references */
-       while (hca_ptr->ib_trans.destroy != 2) {
-               if (dapls_thread_signal() == -1)
-                       dapl_log(DAPL_DBG_TYPE_UTIL,
-                                " destroy: thread wakeup error = %s\n",
-                                strerror(errno));
-               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                            " ib_thread_destroy: wait on hca %p destroy\n");
-               dapl_os_sleep_usec(10000);
-       }
-      bail:
-       return (DAT_SUCCESS);
-}
-
-/*
- * dapls_ib_query_hca
- *
- * Query the hca attribute
- *
- * Input:
- *     hca_handl               hca handle      
- *     ia_attr                 attribute of the ia
- *     ep_attr                 attribute of the ep
- *     ip_addr                 ip address of DET NIC
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INVALID_HANDLE
- */
-
-DAT_RETURN dapls_ib_query_hca(IN DAPL_HCA * hca_ptr,
-                             OUT DAT_IA_ATTR * ia_attr,
-                             OUT DAT_EP_ATTR * ep_attr,
-                             OUT DAT_SOCK_ADDR6 * ip_addr)
-{
-       struct ibv_device_attr dev_attr;
-       struct ibv_port_attr port_attr;
-
-       if (hca_ptr->ib_hca_handle == NULL) {
-               dapl_dbg_log(DAPL_DBG_TYPE_ERR, " query_hca: BAD handle\n");
-               return (DAT_INVALID_HANDLE);
-       }
-
-       /* local IP address of device, set during ia_open */
-       if (ip_addr != NULL)
-               memcpy(ip_addr, &hca_ptr->hca_address, sizeof(DAT_SOCK_ADDR6));
-
-       if (ia_attr == NULL && ep_attr == NULL)
-               return DAT_SUCCESS;
-
-       /* query verbs for this device and port attributes */
-       if (ibv_query_device(hca_ptr->ib_hca_handle, &dev_attr) ||
-           ibv_query_port(hca_ptr->ib_hca_handle,
-                          hca_ptr->port_num, &port_attr))
-               return (dapl_convert_errno(errno, "ib_query_hca"));
-
-       /*
-        * There is no query for inline data so there is no way to 
-        * calculate the impact on sge nor the max inline send. Most 
-        * implementions consume 1 or none so just reduce by 1 until 
-        * we are provided with a query mechanism from verbs.
-        */
-       if (hca_ptr->ib_trans.max_inline_send)
-               dev_attr.max_sge--;
-
-       if (ia_attr != NULL) {
-               (void)dapl_os_memzero(ia_attr, sizeof(*ia_attr));
-               ia_attr->adapter_name[DAT_NAME_MAX_LENGTH - 1] = '\0';
-               ia_attr->vendor_name[DAT_NAME_MAX_LENGTH - 1] = '\0';
-               ia_attr->ia_address_ptr =
-                   (DAT_IA_ADDRESS_PTR) & hca_ptr->hca_address;
-
-               dapl_log(DAPL_DBG_TYPE_UTIL,
-                        "dapl_query_hca: %s %s %s\n", hca_ptr->name,
-                        ((struct sockaddr_in *)
-                         ia_attr->ia_address_ptr)->sin_family == AF_INET ?
-                        "AF_INET" : "AF_INET6",
-                        inet_ntoa(((struct sockaddr_in *)
-                                   ia_attr->ia_address_ptr)->sin_addr));
-
-               ia_attr->hardware_version_major = dev_attr.hw_ver;
-               ia_attr->max_eps = dev_attr.max_qp;
-               ia_attr->max_dto_per_ep = dev_attr.max_qp_wr;
-               ia_attr->max_rdma_read_in = dev_attr.max_res_rd_atom;
-               ia_attr->max_rdma_read_out = dev_attr.max_qp_init_rd_atom;
-               ia_attr->max_rdma_read_per_ep_in = dev_attr.max_qp_rd_atom;
-               ia_attr->max_rdma_read_per_ep_out =
-                   dev_attr.max_qp_init_rd_atom;
-               ia_attr->max_rdma_read_per_ep_in_guaranteed = DAT_TRUE;
-               ia_attr->max_rdma_read_per_ep_out_guaranteed = DAT_TRUE;
-               ia_attr->max_evds = dev_attr.max_cq;
-               ia_attr->max_evd_qlen = dev_attr.max_cqe;
-               ia_attr->max_iov_segments_per_dto = dev_attr.max_sge;
-               ia_attr->max_lmrs = dev_attr.max_mr;
-               /* 32bit attribute from 64bit, 4G-1 limit, DAT v2 needs fix */
-               ia_attr->max_lmr_block_size = 
-                   (dev_attr.max_mr_size >> 32) ? ~0 : dev_attr.max_mr_size;
-               ia_attr->max_rmrs = dev_attr.max_mw;
-               ia_attr->max_lmr_virtual_address = dev_attr.max_mr_size;
-               ia_attr->max_rmr_target_address = dev_attr.max_mr_size;
-               ia_attr->max_pzs = dev_attr.max_pd;
-               ia_attr->max_mtu_size = port_attr.max_msg_sz;
-               ia_attr->max_rdma_size = port_attr.max_msg_sz;
-               ia_attr->num_transport_attr = 0;
-               ia_attr->transport_attr = NULL;
-               ia_attr->num_vendor_attr = 0;
-               ia_attr->vendor_attr = NULL;
-               /* iWARP spec. - 1 sge for RDMA reads */
-               if (hca_ptr->ib_hca_handle->device->transport_type
-                   == IBV_TRANSPORT_IWARP)
-                       ia_attr->max_iov_segments_per_rdma_read = 1;
-               else
-                       ia_attr->max_iov_segments_per_rdma_read =
-                           dev_attr.max_sge;
-
-               ia_attr->max_iov_segments_per_rdma_write = dev_attr.max_sge;
-               /* save rd_atom for peer validation during connect requests */
-               hca_ptr->ib_trans.max_rdma_rd_in = dev_attr.max_qp_rd_atom;
-               hca_ptr->ib_trans.max_rdma_rd_out =
-                   dev_attr.max_qp_init_rd_atom;
-#ifdef DAT_EXTENSIONS
-               ia_attr->extension_supported = DAT_EXTENSION_IB;
-               ia_attr->extension_version = DAT_IB_EXTENSION_VERSION;
-#endif
-               dapl_log(DAPL_DBG_TYPE_UTIL,
-                        "dapl_query_hca: (ver=%x) ep's %d ep_q %d"
-                        " evd's %d evd_q %d mr %u\n",
-                        ia_attr->hardware_version_major,
-                        ia_attr->max_eps, ia_attr->max_dto_per_ep,
-                        ia_attr->max_evds, ia_attr->max_evd_qlen, 
-                        ia_attr->max_lmr_block_size);
-               dapl_log(DAPL_DBG_TYPE_UTIL,
-                        "dapl_query_hca: msg %llu rdma %llu iov's %d"
-                        " lmr %d rmr %d rd_in,out %d,%d inline=%d\n",
-                        ia_attr->max_mtu_size, ia_attr->max_rdma_size,
-                        ia_attr->max_iov_segments_per_dto, ia_attr->max_lmrs,
-                        ia_attr->max_rmrs, ia_attr->max_rdma_read_per_ep_in,
-                        ia_attr->max_rdma_read_per_ep_out,
-                        hca_ptr->ib_trans.max_inline_send);
-       }
-
-       if (ep_attr != NULL) {
-               (void)dapl_os_memzero(ep_attr, sizeof(*ep_attr));
-               ep_attr->max_mtu_size = port_attr.max_msg_sz;
-               ep_attr->max_rdma_size = port_attr.max_msg_sz;
-               ep_attr->max_recv_dtos = dev_attr.max_qp_wr;
-               ep_attr->max_request_dtos = dev_attr.max_qp_wr;
-               ep_attr->max_recv_iov = dev_attr.max_sge;
-               ep_attr->max_request_iov = dev_attr.max_sge;
-               ep_attr->max_rdma_read_in = dev_attr.max_qp_rd_atom;
-               ep_attr->max_rdma_read_out = dev_attr.max_qp_init_rd_atom;
-               /* iWARP spec. - 1 sge for RDMA reads */
-               if (hca_ptr->ib_hca_handle->device->transport_type
-                   == IBV_TRANSPORT_IWARP)
-                       ep_attr->max_rdma_read_iov = 1;
-               else
-                       ep_attr->max_rdma_read_iov = dev_attr.max_sge;
-
-               ep_attr->max_rdma_write_iov = dev_attr.max_sge;
-               dapl_log(DAPL_DBG_TYPE_UTIL,
-                        "dapl_query_hca: MAX msg %llu dto %d iov %d"
-                        " rdma i%d,o%d\n",
-                        ep_attr->max_mtu_size,
-                        ep_attr->max_recv_dtos, ep_attr->max_recv_iov,
-                        ep_attr->max_rdma_read_in, ep_attr->max_rdma_read_out);
-       }
-       return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_setup_async_callback
- *
- * Set up an asynchronous callbacks of various kinds
- *
- * Input:
- *     ia_handle               IA handle
- *     handler_type            type of handler to set up
- *     callback_handle         handle param for completion callbacks
- *     callback                callback routine pointer
- *     context                 argument for callback routine
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INSUFFICIENT_RESOURCES
- *     DAT_INVALID_PARAMETER
- *
- */
-DAT_RETURN dapls_ib_setup_async_callback(IN DAPL_IA * ia_ptr,
-                                        IN DAPL_ASYNC_HANDLER_TYPE type,
-                                        IN DAPL_EVD * evd_ptr,
-                                        IN ib_async_handler_t callback,
-                                        IN void *context)
-{
-       ib_hca_transport_t *hca_ptr;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " setup_async_cb: ia %p type %d hdl %p cb %p ctx %p\n",
-                    ia_ptr, type, evd_ptr, callback, context);
-
-       hca_ptr = &ia_ptr->hca_ptr->ib_trans;
-       switch (type) {
-       case DAPL_ASYNC_UNAFILIATED:
-               hca_ptr->async_unafiliated = (ib_async_handler_t) callback;
-               hca_ptr->async_un_ctx = context;
-               break;
-       case DAPL_ASYNC_CQ_ERROR:
-               hca_ptr->async_cq_error = (ib_async_cq_handler_t) callback;
-               break;
-       case DAPL_ASYNC_CQ_COMPLETION:
-               hca_ptr->async_cq = (ib_async_dto_handler_t) callback;
-               break;
-       case DAPL_ASYNC_QP_ERROR:
-               hca_ptr->async_qp_error = (ib_async_qp_handler_t) callback;
-               break;
-       default:
-               break;
-       }
-       return DAT_SUCCESS;
-}
-
-DAT_RETURN dapli_ib_thread_init(void)
-{
-       DAT_RETURN dat_status;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " ib_thread_init(%d)\n", dapl_os_getpid());
-
-       dapl_os_lock(&g_hca_lock);
-       if (g_ib_thread_state != IB_THREAD_INIT) {
-               dapl_os_unlock(&g_hca_lock);
-               return DAT_SUCCESS;
-       }
-
-       /* uCMA events non-blocking */
-       if (dapls_config_cm_channel(g_cm_events)) {
-               dapl_os_unlock(&g_hca_lock);
-               return (dapl_convert_errno(errno, "create_thread ERR: cm_fd"));
-       }
-
-       g_ib_thread_state = IB_THREAD_CREATE;
-       dapl_os_unlock(&g_hca_lock);
-
-       /* create thread to process inbound connect request */
-       dat_status = dapl_os_thread_create(dapli_thread, NULL, &g_ib_thread);
-       if (dat_status != DAT_SUCCESS)
-               return (dapl_convert_errno(errno,
-                                          "create_thread ERR:"
-                                          " check resource limits"));
-
-       /* wait for thread to start */
-       dapl_os_lock(&g_hca_lock);
-       while (g_ib_thread_state != IB_THREAD_RUN) {
-               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                            " ib_thread_init: waiting for ib_thread\n");
-               dapl_os_unlock(&g_hca_lock);
-               dapl_os_sleep_usec(2000);
-               dapl_os_lock(&g_hca_lock);
-       }
-       dapl_os_unlock(&g_hca_lock);
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " ib_thread_init(%d) exit\n", dapl_os_getpid());
-
-       return DAT_SUCCESS;
-}
-
-void dapli_ib_thread_destroy(void)
-{
-       int retries = 10;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " ib_thread_destroy(%d)\n", dapl_os_getpid());
-       /* 
-        * wait for async thread to terminate. 
-        * pthread_join would be the correct method
-        * but some applications have some issues
-        */
-
-       /* destroy ib_thread, wait for termination, if not already */
-       dapl_os_lock(&g_hca_lock);
-       if (g_ib_thread_state != IB_THREAD_RUN)
-               goto bail;
-
-       g_ib_thread_state = IB_THREAD_CANCEL;
-       if (dapls_thread_signal() == -1)
-               dapl_log(DAPL_DBG_TYPE_UTIL,
-                        " destroy: thread wakeup error = %s\n",
-                        strerror(errno));
-       while ((g_ib_thread_state != IB_THREAD_EXIT) && (retries--)) {
-               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                            " ib_thread_destroy: waiting for ib_thread\n");
-               if (dapls_thread_signal() == -1)
-                       dapl_log(DAPL_DBG_TYPE_UTIL,
-                                " destroy: thread wakeup error = %s\n",
-                                strerror(errno));
-               dapl_os_unlock(&g_hca_lock);
-               dapl_os_sleep_usec(2000);
-               dapl_os_lock(&g_hca_lock);
-       }
-
-      bail:
-       dapl_os_unlock(&g_hca_lock);
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " ib_thread_destroy(%d) exit\n", dapl_os_getpid());
-}
-
-void dapli_async_event_cb(struct _ib_hca_transport *hca)
-{
-       struct ibv_async_event event;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " async_event(%p)\n", hca);
-
-       if (hca->destroy)
-               return;
-
-       if (!ibv_get_async_event(hca->cm_id->verbs, &event)) {
-
-               switch (event.event_type) {
-               case IBV_EVENT_CQ_ERR:
-                       {
-                               struct dapl_ep *evd_ptr =
-                                   event.element.cq->cq_context;
-
-                               dapl_log(DAPL_DBG_TYPE_ERR,
-                                        "dapl async_event CQ (%p) ERR %d\n",
-                                        evd_ptr, event.event_type);
-
-                               /* report up if async callback still setup */
-                               if (hca->async_cq_error)
-                                       hca->async_cq_error(hca->cm_id->verbs,
-                                                           event.element.cq,
-                                                           &event,
-                                                           (void *)evd_ptr);
-                               break;
-                       }
-               case IBV_EVENT_COMM_EST:
-                       {
-                               /* Received msgs on connected QP before RTU */
-                               dapl_log(DAPL_DBG_TYPE_UTIL,
-                                        " async_event COMM_EST(%p) rdata beat RTU\n",
-                                        event.element.qp);
-
-                               break;
-                       }
-               case IBV_EVENT_QP_FATAL:
-               case IBV_EVENT_QP_REQ_ERR:
-               case IBV_EVENT_QP_ACCESS_ERR:
-               case IBV_EVENT_QP_LAST_WQE_REACHED:
-               case IBV_EVENT_SRQ_ERR:
-               case IBV_EVENT_SRQ_LIMIT_REACHED:
-               case IBV_EVENT_SQ_DRAINED:
-                       {
-                               struct dapl_ep *ep_ptr =
-                                   event.element.qp->qp_context;
-
-                               dapl_log(DAPL_DBG_TYPE_ERR,
-                                        "dapl async_event QP (%p) ERR %d\n",
-                                        ep_ptr, event.event_type);
-
-                               /* report up if async callback still setup */
-                               if (hca->async_qp_error)
-                                       hca->async_qp_error(hca->cm_id->verbs,
-                                                           ep_ptr->qp_handle,
-                                                           &event,
-                                                           (void *)ep_ptr);
-                               break;
-                       }
-               case IBV_EVENT_PATH_MIG:
-               case IBV_EVENT_PATH_MIG_ERR:
-               case IBV_EVENT_DEVICE_FATAL:
-               case IBV_EVENT_PORT_ACTIVE:
-               case IBV_EVENT_PORT_ERR:
-               case IBV_EVENT_LID_CHANGE:
-               case IBV_EVENT_PKEY_CHANGE:
-               case IBV_EVENT_SM_CHANGE:
-                       {
-                               dapl_log(DAPL_DBG_TYPE_WARN,
-                                        "dapl async_event: DEV ERR %d\n",
-                                        event.event_type);
-
-                               /* report up if async callback still setup */
-                               if (hca->async_unafiliated)
-                                       hca->async_unafiliated(hca->cm_id->
-                                                              verbs, &event,
-                                                              hca->
-                                                              async_un_ctx);
-                               break;
-                       }
-               case IBV_EVENT_CLIENT_REREGISTER:
-                       /* no need to report this event this time */
-                       dapl_log(DAPL_DBG_TYPE_UTIL,
-                                " async_event: IBV_EVENT_CLIENT_REREGISTER\n");
-                       break;
-
-               default:
-                       dapl_log(DAPL_DBG_TYPE_WARN,
-                                "dapl async_event: %d UNKNOWN\n",
-                                event.event_type);
-                       break;
-
-               }
-               ibv_ack_async_event(&event);
-       }
-}
-
-#if defined(_WIN64) || defined(_WIN32)
-/* work thread for uAT, uCM, CQ, and async events */
-void dapli_thread(void *arg)
-{
-       struct _ib_hca_transport *hca;
-       struct _ib_hca_transport *uhca[8];
-       COMP_CHANNEL *channel;
-       int ret, idx, cnt;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread(%d,0x%x): ENTER: \n",
-                    dapl_os_getpid(), g_ib_thread);
-
-       dapl_os_lock(&g_hca_lock);
-       for (g_ib_thread_state = IB_THREAD_RUN;
-            g_ib_thread_state == IB_THREAD_RUN; dapl_os_lock(&g_hca_lock)) {
-
-               idx = 0;
-               hca = dapl_llist_is_empty(&g_hca_list) ? NULL :
-                   dapl_llist_peek_head(&g_hca_list);
-
-               while (hca) {
-                       uhca[idx++] = hca;
-                       hca = dapl_llist_next_entry(&g_hca_list,
-                                                   (DAPL_LLIST_ENTRY *) & hca->
-                                                   entry);
-               }
-               cnt = idx;
-
-               dapl_os_unlock(&g_hca_lock);
-               ret = CompManagerPoll(windata.comp_mgr, INFINITE, &channel);
-
-               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                            " ib_thread(%d) poll_event 0x%x\n",
-                            dapl_os_getpid(), ret);
-
-               dapli_cma_event_cb();
-
-               /* check and process CQ and ASYNC events, per device */
-               for (idx = 0; idx < cnt; idx++) {
-                       if (uhca[idx]->destroy == 1) {
-                               dapl_os_lock(&g_hca_lock);
-                               dapl_llist_remove_entry(&g_hca_list,
-                                                       (DAPL_LLIST_ENTRY *) &
-                                                       uhca[idx]->entry);
-                               dapl_os_unlock(&g_hca_lock);
-                               uhca[idx]->destroy = 2;
-                       } else {
-                               dapli_cq_event_cb(uhca[idx]);
-                               dapli_async_event_cb(uhca[idx]);
-                       }
-               }
-       }
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread(%d) EXIT\n",
-                    dapl_os_getpid());
-       g_ib_thread_state = IB_THREAD_EXIT;
-       dapl_os_unlock(&g_hca_lock);
-}
-#else                          // _WIN64 || WIN32
-/* work thread for uAT, uCM, CQ, and async events */
-void dapli_thread(void *arg)
-{
-       struct pollfd ufds[__FD_SETSIZE];
-       struct _ib_hca_transport *uhca[__FD_SETSIZE] = { NULL };
-       struct _ib_hca_transport *hca;
-       int ret, idx, fds;
-       char rbuf[2];
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " ib_thread(%d,0x%x): ENTER: pipe %d ucma %d\n",
-                    dapl_os_getpid(), g_ib_thread, g_ib_pipe[0],
-                    g_cm_events->fd);
-
-       /* Poll across pipe, CM, AT never changes */
-       dapl_os_lock(&g_hca_lock);
-       g_ib_thread_state = IB_THREAD_RUN;
-
-       ufds[0].fd = g_ib_pipe[0];      /* pipe */
-       ufds[0].events = POLLIN;
-       ufds[1].fd = g_cm_events->fd;   /* uCMA */
-       ufds[1].events = POLLIN;
-
-       while (g_ib_thread_state == IB_THREAD_RUN) {
-
-               /* build ufds after pipe and uCMA events */
-               ufds[0].revents = 0;
-               ufds[1].revents = 0;
-               idx = 1;
-
-               /*  Walk HCA list and setup async and CQ events */
-               if (!dapl_llist_is_empty(&g_hca_list))
-                       hca = dapl_llist_peek_head(&g_hca_list);
-               else
-                       hca = NULL;
-
-               while (hca) {
-
-                       /* uASYNC events */
-                       ufds[++idx].fd = hca->cm_id->verbs->async_fd;
-                       ufds[idx].events = POLLIN;
-                       ufds[idx].revents = 0;
-                       uhca[idx] = hca;
-
-                       /* uCQ, non-direct events */
-                       ufds[++idx].fd = hca->ib_cq->fd;
-                       ufds[idx].events = POLLIN;
-                       ufds[idx].revents = 0;
-                       uhca[idx] = hca;
-
-                       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                                    " ib_thread(%d) poll_fd: hca[%d]=%p, async=%d"
-                                    " pipe=%d cm=%d cq=d\n",
-                                    dapl_os_getpid(), hca, ufds[idx - 1].fd,
-                                    ufds[0].fd, ufds[1].fd, ufds[idx].fd);
-
-                       hca = dapl_llist_next_entry(&g_hca_list,
-                                                   (DAPL_LLIST_ENTRY *) & hca->
-                                                   entry);
-               }
-
-               /* unlock, and setup poll */
-               fds = idx + 1;
-               dapl_os_unlock(&g_hca_lock);
-               ret = poll(ufds, fds, -1);
-               if (ret <= 0) {
-                       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                                    " ib_thread(%d): ERR %s poll\n",
-                                    dapl_os_getpid(), strerror(errno));
-                       dapl_os_lock(&g_hca_lock);
-                       continue;
-               }
-
-               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                            " ib_thread(%d) poll_event: "
-                            " async=0x%x pipe=0x%x cm=0x%x cq=0x%x\n",
-                            dapl_os_getpid(), ufds[idx - 1].revents,
-                            ufds[0].revents, ufds[1].revents,
-                            ufds[idx].revents);
-
-               /* uCMA events */
-               if (ufds[1].revents == POLLIN)
-                       dapli_cma_event_cb();
-
-               /* check and process CQ and ASYNC events, per device */
-               for (idx = 2; idx < fds; idx++) {
-                       if (ufds[idx].revents == POLLIN) {
-                               dapli_cq_event_cb(uhca[idx]);
-                               dapli_async_event_cb(uhca[idx]);
-                       }
-               }
-
-               /* check and process user events, PIPE */
-               if (ufds[0].revents == POLLIN) {
-                       if (read(g_ib_pipe[0], rbuf, 2) == -1)
-                               dapl_log(DAPL_DBG_TYPE_UTIL,
-                                        " cr_thread: pipe rd err= %s\n",
-                                        strerror(errno));
-
-                       /* cleanup any device on list marked for destroy */
-                       for (idx = 3; idx < fds; idx++) {
-                               if (uhca[idx] && uhca[idx]->destroy == 1) {
-                                       dapl_os_lock(&g_hca_lock);
-                                       dapl_llist_remove_entry(&g_hca_list,
-                                                               (DAPL_LLIST_ENTRY
-                                                                *)
-                                                               & uhca[idx]->
-                                                               entry);
-                                       dapl_os_unlock(&g_hca_lock);
-                                       uhca[idx]->destroy = 2;
-                               }
-                       }
-               }
-               dapl_os_lock(&g_hca_lock);
-       }
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread(%d) EXIT\n",
-                    dapl_os_getpid());
-       g_ib_thread_state = IB_THREAD_EXIT;
-       dapl_os_unlock(&g_hca_lock);
-}
-#endif
-
-/*
- * dapls_set_provider_specific_attr
- *
- * Input:
- *     attr_ptr        Pointer provider specific attributes
- *
- * Output:
- *     none
- *
- * Returns:
- *     void
- */
-DAT_NAMED_ATTR ib_attrs[] = {
-#ifdef DAT_EXTENSIONS
-       {
-        "DAT_EXTENSION_INTERFACE", "TRUE"}
-       ,
-       {
-        DAT_IB_ATTR_FETCH_AND_ADD, "TRUE"}
-       ,
-       {
-        DAT_IB_ATTR_CMP_AND_SWAP, "TRUE"}
-       ,
-       {
-        DAT_IB_ATTR_IMMED_DATA, "TRUE"}
-       ,
-#ifdef DAPL_COUNTERS
-       {
-        DAT_ATTR_COUNTERS, "TRUE"}
-       ,
-#endif                         /* DAPL_COUNTERS */
-#endif
-};
-
-#define SPEC_ATTR_SIZE( x )    (sizeof( x ) / sizeof( DAT_NAMED_ATTR))
-
-void dapls_query_provider_specific_attr(IN DAPL_IA * ia_ptr,
-                                       IN DAT_PROVIDER_ATTR * attr_ptr)
-{
-       attr_ptr->num_provider_specific_attr = SPEC_ATTR_SIZE(ib_attrs);
-       attr_ptr->provider_specific_attr = ib_attrs;
-}
index dde5fac31d2366c71f8745072583efe0518a2ea3..f466c0680813be0c83dc388c91b6b1923da487bb 100755 (executable)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005-2008 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2005-2009 Intel Corporation.  All rights reserved.
  *
  * This Software is licensed under one of the following licenses:
  *
  * notice, one of the license notices in the documentation
  * and/or other materials provided with the distribution.
  */
-
-/**********************************************************************
- *
- * MODULE: dapl_ib_util.h
- *
- * PURPOSE: The OFED provider - definitions, prototypes,
- *
- * $Id: $
- *
- **********************************************************************/
-
+/* 
+ * Definitions specific to OpenIB CMA provider.
+ *   Connection manager - rdma_cma, provided in separate library.
+ */
 #ifndef _DAPL_IB_UTIL_H_
 #define _DAPL_IB_UTIL_H_
+#define _OPENIB_CMA_ 
 
-#include "openib_osd.h"
 #include <infiniband/verbs.h>
 #include <rdma/rdma_cma.h>
-
-/* Typedefs to map common DAPL provider types to IB verbs */
-typedef        struct dapl_cm_id       *ib_qp_handle_t;
-typedef        struct ibv_cq           *ib_cq_handle_t;
-typedef        struct ibv_pd           *ib_pd_handle_t;
-typedef        struct ibv_mr           *ib_mr_handle_t;
-typedef        struct ibv_mw           *ib_mw_handle_t;
-typedef        struct ibv_wc           ib_work_completion_t;
-
-/* HCA context type maps to IB verbs  */
-typedef        struct ibv_context      *ib_hca_handle_t;
-typedef ib_hca_handle_t                dapl_ibal_ca_t;
+#include "openib_osd.h"
+#include "dapl_ib_common.h"
 
 #define IB_RC_RETRY_COUNT      7
 #define IB_RNR_RETRY_COUNT     7
@@ -64,56 +47,6 @@ typedef ib_hca_handle_t              dapl_ibal_ca_t;
 #define IB_ROUTE_RETRY_COUNT   15      /* 60 sec total */
 #define IB_MAX_AT_RETRY                3
 
-typedef enum {
-       IB_CME_CONNECTED,
-       IB_CME_DISCONNECTED,
-       IB_CME_DISCONNECTED_ON_LINK_DOWN,
-       IB_CME_CONNECTION_REQUEST_PENDING,
-       IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,
-       IB_CME_CONNECTION_REQUEST_ACKED,
-       IB_CME_DESTINATION_REJECT,
-       IB_CME_DESTINATION_REJECT_PRIVATE_DATA,
-       IB_CME_DESTINATION_UNREACHABLE,
-       IB_CME_TOO_MANY_CONNECTION_REQUESTS,
-       IB_CME_LOCAL_FAILURE,
-       IB_CME_BROKEN,
-       IB_CME_TIMEOUT
-} ib_cm_events_t;
-
-/* CQ notifications */
-typedef enum
-{
-       IB_NOTIFY_ON_NEXT_COMP,
-       IB_NOTIFY_ON_SOLIC_COMP
-
-} ib_notification_type_t;
-
-/* other mappings */
-typedef int                    ib_bool_t;
-typedef union ibv_gid          GID;
-typedef char                   *IB_HCA_NAME;
-typedef uint16_t               ib_hca_port_t;
-typedef uint32_t               ib_comp_handle_t;
-
-#ifdef CQ_WAIT_OBJECT
-
-/* CQ event channel, plus pipe to enable consumer wakeup */
-typedef struct _ib_wait_obj_handle
-{ 
-       struct ibv_comp_channel *events;
-       int                     pipe[2];
-
-} *ib_wait_obj_handle_t;
-
-#endif
-
-/* Definitions */
-#define IB_INVALID_HANDLE      NULL
-
-/* inline send rdma threshold */
-#define        INLINE_SEND_IWARP_DEFAULT       64
-#define        INLINE_SEND_IB_DEFAULT          200
-
 /* CMA private data areas */
 #define CMA_PDATA_HDR          36
 #define        IB_MAX_REQ_PDATA_SIZE   (92-CMA_PDATA_HDR)
@@ -123,38 +56,6 @@ typedef struct _ib_wait_obj_handle
 #define        IB_MAX_DREP_PDATA_SIZE  (224-CMA_PDATA_HDR)
 #define        IWARP_MAX_PDATA_SIZE    (512-CMA_PDATA_HDR)
 
-/* DTO OPs, ordered for DAPL ENUM definitions */
-#define OP_RDMA_WRITE           IBV_WR_RDMA_WRITE
-#define OP_RDMA_WRITE_IMM       IBV_WR_RDMA_WRITE_WITH_IMM
-#define OP_SEND                 IBV_WR_SEND
-#define OP_SEND_IMM             IBV_WR_SEND_WITH_IMM
-#define OP_RDMA_READ            IBV_WR_RDMA_READ
-#define OP_COMP_AND_SWAP        IBV_WR_ATOMIC_CMP_AND_SWP
-#define OP_FETCH_AND_ADD        IBV_WR_ATOMIC_FETCH_AND_ADD
-#define OP_RECEIVE              7   /* internal op */
-#define OP_RECEIVE_IMM         8   /* internel op */
-#define OP_BIND_MW              9   /* internal op */
-#define OP_INVALID             0xff
-
-/* Definitions to map QP state */
-#define IB_QP_STATE_RESET      IBV_QPS_RESET
-#define IB_QP_STATE_INIT       IBV_QPS_INIT
-#define IB_QP_STATE_RTR                IBV_QPS_RTR
-#define IB_QP_STATE_RTS                IBV_QPS_RTS
-#define IB_QP_STATE_SQD                IBV_QPS_SQD
-#define IB_QP_STATE_SQE                IBV_QPS_SQE
-#define IB_QP_STATE_ERROR      IBV_QPS_ERR
-
-typedef enum
-{
-       IB_THREAD_INIT,
-       IB_THREAD_CREATE,
-       IB_THREAD_RUN,
-       IB_THREAD_CANCEL,
-       IB_THREAD_EXIT
-
-} ib_thread_state_t;
-
 struct dapl_cm_id {
        DAPL_OS_LOCK                    lock;
        int                             destroy;
@@ -171,67 +72,13 @@ struct dapl_cm_id {
        DAT_SOCK_ADDR6                  r_addr;
        int                             p_len;
        unsigned char                   p_data[256]; /* dapl max private data size */
+       ib_qp_cm_t                      dst; /* dapls_modify_qp_state */
+       struct ibv_ah                   *ah; /* dapls_modify_qp_state */
 };
 
 typedef struct dapl_cm_id      *dp_ib_cm_handle_t;
 typedef struct dapl_cm_id      *ib_cm_srvc_handle_t;
 
-/* Operation and state mappings */
-typedef int    ib_send_op_type_t;
-typedef        struct  ibv_sge         ib_data_segment_t;
-typedef enum   ibv_qp_state    ib_qp_state_t;
-typedef        enum    ibv_event_type  ib_async_event_type;
-typedef struct ibv_async_event ib_error_record_t;
-
-/* Definitions for ibverbs/mthca return codes, should be defined in verbs.h */
-/* some are errno and some are -n values */
-
-/**
- * ibv_get_device_name - Return kernel device name
- * ibv_get_device_guid - Return device's node GUID
- * ibv_open_device - Return ibv_context or NULL
- * ibv_close_device - Return 0, (errno?)
- * ibv_get_async_event - Return 0, -1 
- * ibv_alloc_pd - Return ibv_pd, NULL
- * ibv_dealloc_pd - Return 0, errno 
- * ibv_reg_mr - Return ibv_mr, NULL
- * ibv_dereg_mr - Return 0, errno
- * ibv_create_cq - Return ibv_cq, NULL
- * ibv_destroy_cq - Return 0, errno
- * ibv_get_cq_event - Return 0 & ibv_cq/context, int
- * ibv_poll_cq - Return n & ibv_wc, 0 ok, -1 empty, -2 error 
- * ibv_req_notify_cq - Return 0 (void?)
- * ibv_create_qp - Return ibv_qp, NULL
- * ibv_modify_qp - Return 0, errno
- * ibv_destroy_qp - Return 0, errno
- * ibv_post_send - Return 0, -1 & bad_wr
- * ibv_post_recv - Return 0, -1 & bad_wr 
- */
-
-/* async handlers for DTO, CQ, QP, and unafiliated */
-typedef void (*ib_async_dto_handler_t)(
-    IN    ib_hca_handle_t    ib_hca_handle,
-    IN    ib_error_record_t  *err_code,
-    IN    void               *context);
-
-typedef void (*ib_async_cq_handler_t)(
-    IN    ib_hca_handle_t    ib_hca_handle,
-    IN    ib_cq_handle_t     ib_cq_handle,
-    IN    ib_error_record_t  *err_code,
-    IN    void               *context);
-
-typedef void (*ib_async_qp_handler_t)(
-    IN    ib_hca_handle_t    ib_hca_handle,
-    IN    ib_qp_handle_t     ib_qp_handle,
-    IN    ib_error_record_t  *err_code,
-    IN    void               *context);
-
-typedef void (*ib_async_handler_t)(
-    IN    ib_hca_handle_t    ib_hca_handle,
-    IN    ib_error_record_t  *err_code,
-    IN    void               *context);
-
-
 /* ib_hca_transport_t, specific to this implementation */
 typedef struct _ib_hca_transport
 { 
@@ -250,79 +97,38 @@ typedef struct _ib_hca_transport
        uint8_t                 max_cm_timeout;
        uint8_t                 max_cm_retries;
        /* device attributes */
-       int                     max_rdma_rd_in;
-       int                     max_rdma_rd_out;
+       int                     rd_atom_in;
+       int                     rd_atom_out;
+       struct  ibv_device      *ib_dev;
+       /* dapls_modify_qp_state */
+       uint16_t                lid;
+       uint8_t                 ack_timer;
+       uint8_t                 ack_retry;
+       uint8_t                 rnr_timer;
+       uint8_t                 rnr_retry;
+       uint8_t                 global;
+       uint8_t                 hop_limit;
+       uint8_t                 tclass;
+       uint8_t                 mtu;
+       DAT_NAMED_ATTR          named_attr;
 
 } ib_hca_transport_t;
 
-/* provider specfic fields for shared memory support */
-typedef uint32_t ib_shm_transport_t;
-
 /* prototypes */
-int32_t        dapls_ib_init (void);
-int32_t        dapls_ib_release (void);
 void dapli_thread(void *arg);
 DAT_RETURN  dapli_ib_thread_init(void);
 void dapli_ib_thread_destroy(void);
 void dapli_cma_event_cb(void);
-void dapli_cq_event_cb(struct _ib_hca_transport *hca);
 void dapli_async_event_cb(struct _ib_hca_transport *hca);
-void dapli_destroy_conn(struct dapl_cm_id *conn);
-
-DAT_RETURN
-dapls_modify_qp_state ( IN ib_qp_handle_t      qp_handle,
-                       IN ib_qp_state_t        qp_state,
-                       IN struct dapl_cm_id    *conn );
-
-/* inline functions */
-STATIC _INLINE_ IB_HCA_NAME dapl_ib_convert_name (IN char *name)
-{
-       /* use ascii; name of local device */
-       return dapl_os_strdup(name);
-}
-
-STATIC _INLINE_ void dapl_ib_release_name (IN IB_HCA_NAME name)
-{
-       return;
-}
+dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep);
+void dapls_ib_cm_free(dp_ib_cm_handle_t cm, DAPL_EP *ep);
+DAT_RETURN dapls_modify_qp_state(IN ib_qp_handle_t qp_handle,
+                                IN ib_qp_state_t qp_state,
+                                IN dp_ib_cm_handle_t cm);
 
 STATIC _INLINE_ void dapls_print_cm_list(IN DAPL_IA * ia_ptr)
 {
        return;
 }
 
-/*
- *  Convert errno to DAT_RETURN values
- */
-STATIC _INLINE_ DAT_RETURN 
-dapl_convert_errno( IN int err, IN const char *str )
-{
-    if (!err)  return DAT_SUCCESS;
-       
-#if DAPL_DBG
-    if ((err != EAGAIN) && (err != ETIMEDOUT))
-       dapl_dbg_log (DAPL_DBG_TYPE_ERR," %s %s\n", str, strerror(err));
-#endif 
-
-    switch( err )
-    {
-       case EOVERFLOW  : return DAT_LENGTH_ERROR;
-       case EACCES     : return DAT_PRIVILEGES_VIOLATION;
-       case EPERM      : return DAT_PROTECTION_VIOLATION;                
-       case EINVAL     : return DAT_INVALID_HANDLE;
-       case EISCONN    : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_CONNECTED;
-       case ECONNREFUSED : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_NOTREADY;
-       case ETIMEDOUT  : return DAT_TIMEOUT_EXPIRED;
-       case ENETUNREACH: return DAT_INVALID_ADDRESS | DAT_INVALID_ADDRESS_UNREACHABLE;
-       case EADDRINUSE : return DAT_CONN_QUAL_IN_USE;
-       case EALREADY   : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_ACTCONNPENDING;
-       case ENOMEM     : return DAT_INSUFFICIENT_RESOURCES;
-        case EAGAIN    : return DAT_QUEUE_EMPTY;
-       case EINTR      : return DAT_INTERRUPTED_CALL;
-       case EAFNOSUPPORT : return DAT_INVALID_ADDRESS | DAT_INVALID_ADDRESS_MALFORMED;
-       case EFAULT     : 
-       default         : return DAT_INTERNAL_ERROR;
-    }
- }
-
 #endif /*  _DAPL_IB_UTIL_H_ */
diff --git a/dapl/openib_cma/device.c b/dapl/openib_cma/device.c
new file mode 100644 (file)
index 0000000..0e974f6
--- /dev/null
@@ -0,0 +1,847 @@
+/*
+ * Copyright (c) 2005-2008 Intel Corporation.  All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/**********************************************************************
+ * 
+ * MODULE: dapl_ib_util.c
+ *
+ * PURPOSE: OFED provider - init, open, close, utilities, work thread
+ *
+ * $Id:$
+ *
+ **********************************************************************/
+
+#ifdef RCSID
+static const char rcsid[] = "$Id:  $";
+#endif
+
+#include "openib_osd.h"
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_ib_util.h"
+#include "dapl_osd.h"
+
+#include <stdlib.h>
+
+struct rdma_event_channel *g_cm_events = NULL;
+ib_thread_state_t g_ib_thread_state = 0;
+DAPL_OS_THREAD g_ib_thread;
+DAPL_OS_LOCK g_hca_lock;
+struct dapl_llist_entry *g_hca_list;
+
+#if defined(_WIN64) || defined(_WIN32)
+#include "..\..\..\..\..\etc\user\comp_channel.cpp"
+#include "..\..\..\..\..\etc\user\dlist.c"
+#include <rdma\winverbs.h>
+
+struct ibvw_windata windata;
+
+static int getipaddr_netdev(char *name, char *addr, int addr_len)
+{
+       IWVProvider *prov;
+       WV_DEVICE_ADDRESS devaddr;
+       struct addrinfo *res, *ai;
+       HRESULT hr;
+       int index;
+
+       if (strncmp(name, "rdma_dev", 8)) {
+               return EINVAL;
+       }
+
+       index = atoi(name + 8);
+
+       hr = WvGetObject(&IID_IWVProvider, (LPVOID *) &prov);
+       if (FAILED(hr)) {
+               return hr;
+       }
+
+       hr = getaddrinfo("..localmachine", NULL, NULL, &res);
+       if (hr) {
+               goto release;
+       }
+
+       for (ai = res; ai; ai = ai->ai_next) {
+               hr = prov->lpVtbl->TranslateAddress(prov, ai->ai_addr, &devaddr);
+               if (SUCCEEDED(hr) && (ai->ai_addrlen <= addr_len) && (index-- == 0)) {
+                       memcpy(addr, ai->ai_addr, ai->ai_addrlen);
+                       goto free;
+               }
+       }
+       hr = ENODEV;
+
+free:
+       freeaddrinfo(res);
+release:
+       prov->lpVtbl->Release(prov);
+       return hr;
+}
+
+static int dapls_os_init(void)
+{
+       return ibvw_get_windata(&windata, IBVW_WINDATA_VERSION);
+}
+
+static void dapls_os_release(void)
+{
+       if (windata.comp_mgr)
+               ibvw_release_windata(&windata, IBVW_WINDATA_VERSION);
+       windata.comp_mgr = NULL;
+}
+
+static int dapls_config_cm_channel(struct rdma_event_channel *channel)
+{
+       channel->channel.Milliseconds = 0;
+       return 0;
+}
+
+static int dapls_config_verbs(struct ibv_context *verbs)
+{
+       verbs->channel.Milliseconds = 0;
+       return 0;
+}
+
+static int dapls_thread_signal(void)
+{
+       CompManagerCancel(windata.comp_mgr);
+       return 0;
+}
+#else                          // _WIN64 || WIN32
+int g_ib_pipe[2];
+
+static int dapls_os_init(void)
+{
+       /* create pipe for waking up work thread */
+       return pipe(g_ib_pipe);
+}
+
+static void dapls_os_release(void)
+{
+       /* close pipe? */
+}
+
+/* Get IP address using network device name */
+static int getipaddr_netdev(char *name, char *addr, int addr_len)
+{
+       struct ifreq ifr;
+       int skfd, ret, len;
+
+       /* Fill in the structure */
+       snprintf(ifr.ifr_name, IFNAMSIZ, "%s", name);
+       ifr.ifr_hwaddr.sa_family = ARPHRD_INFINIBAND;
+
+       /* Create a socket fd */
+       skfd = socket(PF_INET, SOCK_STREAM, 0);
+       ret = ioctl(skfd, SIOCGIFADDR, &ifr);
+       if (ret)
+               goto bail;
+
+       switch (ifr.ifr_addr.sa_family) {
+#ifdef AF_INET6
+       case AF_INET6:
+               len = sizeof(struct sockaddr_in6);
+               break;
+#endif
+       case AF_INET:
+       default:
+               len = sizeof(struct sockaddr);
+               break;
+       }
+
+       if (len <= addr_len)
+               memcpy(addr, &ifr.ifr_addr, len);
+       else
+               ret = EINVAL;
+
+      bail:
+       close(skfd);
+       return ret;
+}
+
+static int dapls_config_fd(int fd)
+{
+       int opts;
+
+       opts = fcntl(fd, F_GETFL);
+       if (opts < 0 || fcntl(fd, F_SETFL, opts | O_NONBLOCK) < 0) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " dapls_config_fd: fcntl on fd %d ERR %d %s\n",
+                        fd, opts, strerror(errno));
+               return errno;
+       }
+
+       return 0;
+}
+
+static int dapls_config_cm_channel(struct rdma_event_channel *channel)
+{
+       return dapls_config_fd(channel->fd);
+}
+
+static int dapls_config_verbs(struct ibv_context *verbs)
+{
+       return dapls_config_fd(verbs->async_fd);
+}
+
+static int dapls_thread_signal(void)
+{
+       return write(g_ib_pipe[1], "w", sizeof "w");
+}
+#endif
+
+/* Get IP address using network name, address, or device name */
+static int getipaddr(char *name, char *addr, int len)
+{
+       struct addrinfo *res;
+
+       /* assume netdev for first attempt, then network and address type */
+       if (getipaddr_netdev(name, addr, len)) {
+               if (getaddrinfo(name, NULL, NULL, &res)) {
+                       dapl_log(DAPL_DBG_TYPE_ERR,
+                                " open_hca: getaddr_netdev ERROR:"
+                                " %s. Is %s configured?\n",
+                                strerror(errno), name);
+                       return 1;
+               } else {
+                       if (len >= res->ai_addrlen)
+                               memcpy(addr, res->ai_addr, res->ai_addrlen);
+                       else {
+                               freeaddrinfo(res);
+                               return 1;
+                       }
+                       freeaddrinfo(res);
+               }
+       }
+
+       dapl_dbg_log(
+               DAPL_DBG_TYPE_UTIL,
+               " getipaddr: family %d port %d addr %d.%d.%d.%d\n",
+               ((struct sockaddr_in *)addr)->sin_family,
+               ((struct sockaddr_in *)addr)->sin_port,
+               ((struct sockaddr_in *)addr)->sin_addr.s_addr >> 0 & 0xff,
+               ((struct sockaddr_in *)addr)->sin_addr.s_addr >> 8 & 0xff,
+               ((struct sockaddr_in *)addr)->sin_addr.s_addr >> 16 & 0xff,
+               ((struct sockaddr_in *)addr)->sin_addr.
+                s_addr >> 24 & 0xff);
+
+       return 0;
+}
+
+/*
+ * dapls_ib_init, dapls_ib_release
+ *
+ * Initialize Verb related items for device open
+ *
+ * Input:
+ *     none
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     0 success, -1 error
+ *
+ */
+int32_t dapls_ib_init(void)
+{
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " dapl_ib_init: \n");
+
+       /* initialize hca_list lock */
+       dapl_os_lock_init(&g_hca_lock);
+
+       /* initialize hca list for CQ events */
+       dapl_llist_init_head(&g_hca_list);
+
+       if (dapls_os_init())
+               return 1;
+
+       return 0;
+}
+
+int32_t dapls_ib_release(void)
+{
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " dapl_ib_release: \n");
+       dapli_ib_thread_destroy();
+       if (g_cm_events != NULL)
+               rdma_destroy_event_channel(g_cm_events);
+       dapls_os_release();
+       return 0;
+}
+
+/*
+ * dapls_ib_open_hca
+ *
+ * Open HCA
+ *
+ * Input:
+ *      *hca_name         pointer to provider device name
+ *      *ib_hca_handle_p  pointer to provide HCA handle
+ *
+ * Output:
+ *      none
+ *
+ * Return:
+ *      DAT_SUCCESS
+ *      dapl_convert_errno
+ *
+ */
+DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, IN DAPL_HCA * hca_ptr)
+{
+       struct rdma_cm_id *cm_id = NULL;
+       union ibv_gid *gid;
+       int ret;
+       DAT_RETURN dat_status;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                    " open_hca: %s - %p\n", hca_name, hca_ptr);
+
+       /* Setup the global cm event channel */
+       dapl_os_lock(&g_hca_lock);
+       if (g_cm_events == NULL) {
+               g_cm_events = rdma_create_event_channel();
+               if (g_cm_events == NULL) {
+                       dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+                                    " open_hca: ERR - RDMA channel %s\n",
+                                    strerror(errno));
+                       return DAT_INTERNAL_ERROR;
+               }
+       }
+       dapl_os_unlock(&g_hca_lock);
+
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                    " open_hca: RDMA channel created (%p)\n", g_cm_events);
+
+       dat_status = dapli_ib_thread_init();
+       if (dat_status != DAT_SUCCESS)
+               return dat_status;
+
+       /* HCA name will be hostname or IP address */
+       if (getipaddr((char *)hca_name,
+                     (char *)&hca_ptr->hca_address, 
+                     sizeof(DAT_SOCK_ADDR6)))
+               return DAT_INVALID_ADDRESS;
+
+       /* cm_id will bind local device/GID based on IP address */
+       if (rdma_create_id(g_cm_events, &cm_id, 
+                          (void *)hca_ptr, RDMA_PS_TCP)) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " open_hca: rdma_create ERR %s\n", strerror(errno));
+               return DAT_INTERNAL_ERROR;
+       }
+       ret = rdma_bind_addr(cm_id, (struct sockaddr *)&hca_ptr->hca_address);
+       if ((ret) || (cm_id->verbs == NULL)) {
+               rdma_destroy_id(cm_id);
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " open_hca: rdma_bind ERR %s."
+                        " Is %s configured?\n", strerror(errno), hca_name);
+               return DAT_INVALID_ADDRESS;
+       }
+
+       /* keep reference to IB device and cm_id */
+       hca_ptr->ib_trans.cm_id = cm_id;
+       hca_ptr->ib_hca_handle = cm_id->verbs;
+       dapls_config_verbs(cm_id->verbs);
+       hca_ptr->port_num = cm_id->port_num;
+       hca_ptr->ib_trans.ib_dev = cm_id->verbs->device;
+       gid = &cm_id->route.addr.addr.ibaddr.sgid;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                    " open_hca: ctx=%p port=%d GID subnet %016llx"
+                    " id %016llx\n", cm_id->verbs, cm_id->port_num,
+                    (unsigned long long)ntohll(gid->global.subnet_prefix),
+                    (unsigned long long)ntohll(gid->global.interface_id));
+
+       /* set inline max with env or default, get local lid and gid 0 */
+       if (hca_ptr->ib_hca_handle->device->transport_type
+           == IBV_TRANSPORT_IWARP)
+               hca_ptr->ib_trans.max_inline_send =
+                   dapl_os_get_env_val("DAPL_MAX_INLINE",
+                                       INLINE_SEND_IWARP_DEFAULT);
+       else
+               hca_ptr->ib_trans.max_inline_send =
+                   dapl_os_get_env_val("DAPL_MAX_INLINE",
+                                       INLINE_SEND_IB_DEFAULT);
+
+       /* set CM timer defaults */
+       hca_ptr->ib_trans.max_cm_timeout =
+           dapl_os_get_env_val("DAPL_MAX_CM_RESPONSE_TIME",
+                               IB_CM_RESPONSE_TIMEOUT);
+       hca_ptr->ib_trans.max_cm_retries =
+           dapl_os_get_env_val("DAPL_MAX_CM_RETRIES", IB_CM_RETRIES);
+       
+       /* set default IB MTU */
+       hca_ptr->ib_trans.mtu = dapl_ib_mtu(2048);
+
+       /* 
+        * Put new hca_transport on list for async and CQ event processing 
+        * Wakeup work thread to add to polling list
+        */
+       dapl_llist_init_entry((DAPL_LLIST_ENTRY *) & hca_ptr->ib_trans.entry);
+       dapl_os_lock(&g_hca_lock);
+       dapl_llist_add_tail(&g_hca_list,
+                           (DAPL_LLIST_ENTRY *) & hca_ptr->ib_trans.entry,
+                           &hca_ptr->ib_trans.entry);
+       if (dapls_thread_signal() == -1)
+               dapl_log(DAPL_DBG_TYPE_UTIL,
+                        " open_hca: thread wakeup error = %s\n",
+                        strerror(errno));
+       dapl_os_unlock(&g_hca_lock);
+
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                    " open_hca: %s, %s %d.%d.%d.%d INLINE_MAX=%d\n", hca_name,
+                    ((struct sockaddr_in *)
+                    &hca_ptr->hca_address)->sin_family == AF_INET ?
+                    "AF_INET" : "AF_INET6", 
+                    ((struct sockaddr_in *)
+                    &hca_ptr->hca_address)->sin_addr.s_addr >> 0 & 0xff, 
+                    ((struct sockaddr_in *)
+                    &hca_ptr->hca_address)->sin_addr.s_addr >> 8 & 0xff, 
+                    ((struct sockaddr_in *)
+                    &hca_ptr->hca_address)->sin_addr.s_addr >> 16 & 0xff, 
+                    ((struct sockaddr_in *)
+                    &hca_ptr->hca_address)->sin_addr.s_addr >> 24 & 0xff, 
+                    hca_ptr->ib_trans.max_inline_send);
+
+       hca_ptr->ib_trans.d_hca = hca_ptr;
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_close_hca
+ *
+ * Open HCA
+ *
+ * Input:
+ *      DAPL_HCA   provide CA handle
+ *
+ * Output:
+ *      none
+ *
+ * Return:
+ *      DAT_SUCCESS
+ *     dapl_convert_errno 
+ *
+ */
+DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA * hca_ptr)
+{
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " close_hca: %p->%p\n",
+                    hca_ptr, hca_ptr->ib_hca_handle);
+
+       if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) {
+               if (rdma_destroy_id(hca_ptr->ib_trans.cm_id))
+                       return (dapl_convert_errno(errno, "ib_close_device"));
+               hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
+       }
+
+       dapl_os_lock(&g_hca_lock);
+       if (g_ib_thread_state != IB_THREAD_RUN) {
+               dapl_os_unlock(&g_hca_lock);
+               goto bail;
+       }
+       dapl_os_unlock(&g_hca_lock);
+
+       /* 
+        * Remove hca from async event processing list
+        * Wakeup work thread to remove from polling list
+        */
+       hca_ptr->ib_trans.destroy = 1;
+       if (dapls_thread_signal() == -1)
+               dapl_log(DAPL_DBG_TYPE_UTIL,
+                        " destroy: thread wakeup error = %s\n",
+                        strerror(errno));
+
+       /* wait for thread to remove HCA references */
+       while (hca_ptr->ib_trans.destroy != 2) {
+               if (dapls_thread_signal() == -1)
+                       dapl_log(DAPL_DBG_TYPE_UTIL,
+                                " destroy: thread wakeup error = %s\n",
+                                strerror(errno));
+               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                            " ib_thread_destroy: wait on hca %p destroy\n");
+               dapl_os_sleep_usec(1000);
+       }
+bail:
+       return (DAT_SUCCESS);
+}
+
+
+DAT_RETURN dapli_ib_thread_init(void)
+{
+       DAT_RETURN dat_status;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                    " ib_thread_init(%d)\n", dapl_os_getpid());
+
+       dapl_os_lock(&g_hca_lock);
+       if (g_ib_thread_state != IB_THREAD_INIT) {
+               dapl_os_unlock(&g_hca_lock);
+               return DAT_SUCCESS;
+       }
+
+       /* uCMA events non-blocking */
+       if (dapls_config_cm_channel(g_cm_events)) {
+               dapl_os_unlock(&g_hca_lock);
+               return (dapl_convert_errno(errno, "create_thread ERR: cm_fd"));
+       }
+
+       g_ib_thread_state = IB_THREAD_CREATE;
+       dapl_os_unlock(&g_hca_lock);
+
+       /* create thread to process inbound connect request */
+       dat_status = dapl_os_thread_create(dapli_thread, NULL, &g_ib_thread);
+       if (dat_status != DAT_SUCCESS)
+               return (dapl_convert_errno(errno,
+                                          "create_thread ERR:"
+                                          " check resource limits"));
+
+       /* wait for thread to start */
+       dapl_os_lock(&g_hca_lock);
+       while (g_ib_thread_state != IB_THREAD_RUN) {
+               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                            " ib_thread_init: waiting for ib_thread\n");
+               dapl_os_unlock(&g_hca_lock);
+               dapl_os_sleep_usec(1000);
+               dapl_os_lock(&g_hca_lock);
+       }
+       dapl_os_unlock(&g_hca_lock);
+
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                    " ib_thread_init(%d) exit\n", dapl_os_getpid());
+
+       return DAT_SUCCESS;
+}
+
+void dapli_ib_thread_destroy(void)
+{
+       int retries = 10;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                    " ib_thread_destroy(%d)\n", dapl_os_getpid());
+       /* 
+        * wait for async thread to terminate. 
+        * pthread_join would be the correct method
+        * but some applications have some issues
+        */
+
+       /* destroy ib_thread, wait for termination, if not already */
+       dapl_os_lock(&g_hca_lock);
+       if (g_ib_thread_state != IB_THREAD_RUN)
+               goto bail;
+
+       g_ib_thread_state = IB_THREAD_CANCEL;
+       if (dapls_thread_signal() == -1)
+               dapl_log(DAPL_DBG_TYPE_UTIL,
+                        " destroy: thread wakeup error = %s\n",
+                        strerror(errno));
+       while ((g_ib_thread_state != IB_THREAD_EXIT) && (retries--)) {
+               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                            " ib_thread_destroy: waiting for ib_thread\n");
+               if (dapls_thread_signal() == -1)
+                       dapl_log(DAPL_DBG_TYPE_UTIL,
+                                " destroy: thread wakeup error = %s\n",
+                                strerror(errno));
+               dapl_os_unlock(&g_hca_lock);
+               dapl_os_sleep_usec(2000);
+               dapl_os_lock(&g_hca_lock);
+       }
+bail:
+       dapl_os_unlock(&g_hca_lock);
+
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                    " ib_thread_destroy(%d) exit\n", dapl_os_getpid());
+}
+
+void dapli_async_event_cb(struct _ib_hca_transport *hca)
+{
+       struct ibv_async_event event;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " async_event(%p)\n", hca);
+
+       if (hca->destroy)
+               return;
+
+       if (!ibv_get_async_event(hca->cm_id->verbs, &event)) {
+
+               switch (event.event_type) {
+               case IBV_EVENT_CQ_ERR:
+               {
+                       struct dapl_ep *evd_ptr =
+                               event.element.cq->cq_context;
+
+                       dapl_log(DAPL_DBG_TYPE_ERR,
+                                "dapl async_event CQ (%p) ERR %d\n",
+                                evd_ptr, event.event_type);
+
+                       /* report up if async callback still setup */
+                       if (hca->async_cq_error)
+                               hca->async_cq_error(hca->cm_id->verbs,
+                                                       event.element.cq,
+                                                       &event,
+                                                       (void *)evd_ptr);
+                       break;
+               }
+               case IBV_EVENT_COMM_EST:
+               {
+                       /* Received msgs on connected QP before RTU */
+                       dapl_log(DAPL_DBG_TYPE_UTIL,
+                                " async_event COMM_EST(%p) rdata beat RTU\n",
+                                event.element.qp);
+
+                       break;
+               }
+               case IBV_EVENT_QP_FATAL:
+               case IBV_EVENT_QP_REQ_ERR:
+               case IBV_EVENT_QP_ACCESS_ERR:
+               case IBV_EVENT_QP_LAST_WQE_REACHED:
+               case IBV_EVENT_SRQ_ERR:
+               case IBV_EVENT_SRQ_LIMIT_REACHED:
+               case IBV_EVENT_SQ_DRAINED:
+               {
+                       struct dapl_ep *ep_ptr =
+                               event.element.qp->qp_context;
+
+                       dapl_log(DAPL_DBG_TYPE_ERR,
+                                "dapl async_event QP (%p) ERR %d\n",
+                                ep_ptr, event.event_type);
+
+                       /* report up if async callback still setup */
+                       if (hca->async_qp_error)
+                               hca->async_qp_error(hca->cm_id->verbs,
+                                                   ep_ptr->qp_handle,
+                                                   &event,
+                                                   (void *)ep_ptr);
+                       break;
+               }
+               case IBV_EVENT_PATH_MIG:
+               case IBV_EVENT_PATH_MIG_ERR:
+               case IBV_EVENT_DEVICE_FATAL:
+               case IBV_EVENT_PORT_ACTIVE:
+               case IBV_EVENT_PORT_ERR:
+               case IBV_EVENT_LID_CHANGE:
+               case IBV_EVENT_PKEY_CHANGE:
+               case IBV_EVENT_SM_CHANGE:
+               {
+                       dapl_log(DAPL_DBG_TYPE_WARN,
+                                "dapl async_event: DEV ERR %d\n",
+                                event.event_type);
+
+                       /* report up if async callback still setup */
+                       if (hca->async_unafiliated)
+                               hca->async_unafiliated(hca->cm_id->
+                                                       verbs, &event,
+                                                       hca->
+                                                       async_un_ctx);
+                       break;
+               }
+               case IBV_EVENT_CLIENT_REREGISTER:
+                       /* no need to report this event this time */
+                       dapl_log(DAPL_DBG_TYPE_UTIL,
+                                " async_event: IBV_CLIENT_REREGISTER\n");
+                       break;
+
+               default:
+                       dapl_log(DAPL_DBG_TYPE_WARN,
+                                "dapl async_event: %d UNKNOWN\n",
+                                event.event_type);
+                       break;
+
+               }
+               ibv_ack_async_event(&event);
+       }
+}
+
+#if defined(_WIN64) || defined(_WIN32)
+/* work thread for uAT, uCM, CQ, and async events */
+void dapli_thread(void *arg)
+{
+       struct _ib_hca_transport *hca;
+       struct _ib_hca_transport *uhca[8];
+       COMP_CHANNEL *channel;
+       int ret, idx, cnt;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread(%d,0x%x): ENTER: \n",
+                    dapl_os_getpid(), g_ib_thread);
+
+       dapl_os_lock(&g_hca_lock);
+       for (g_ib_thread_state = IB_THREAD_RUN;
+            g_ib_thread_state == IB_THREAD_RUN; 
+            dapl_os_lock(&g_hca_lock)) {
+
+               idx = 0;
+               hca = dapl_llist_is_empty(&g_hca_list) ? NULL :
+                     dapl_llist_peek_head(&g_hca_list);
+
+               while (hca) {
+                       uhca[idx++] = hca;
+                       hca = dapl_llist_next_entry(&g_hca_list,
+                                                   (DAPL_LLIST_ENTRY *)
+                                                   &hca->entry);
+               }
+               cnt = idx;
+
+               dapl_os_unlock(&g_hca_lock);
+               ret = CompManagerPoll(windata.comp_mgr, INFINITE, &channel);
+
+               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                            " ib_thread(%d) poll_event 0x%x\n",
+                            dapl_os_getpid(), ret);
+
+               dapli_cma_event_cb();
+
+               /* check and process ASYNC events, per device */
+               for (idx = 0; idx < cnt; idx++) {
+                       if (uhca[idx]->destroy == 1) {
+                               dapl_os_lock(&g_hca_lock);
+                               dapl_llist_remove_entry(&g_hca_list,
+                                                       (DAPL_LLIST_ENTRY *)
+                                                       &uhca[idx]->entry);
+                               dapl_os_unlock(&g_hca_lock);
+                               uhca[idx]->destroy = 2;
+                       } else {
+                               dapli_async_event_cb(uhca[idx]);
+                       }
+               }
+       }
+
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread(%d) EXIT\n",
+                    dapl_os_getpid());
+       g_ib_thread_state = IB_THREAD_EXIT;
+       dapl_os_unlock(&g_hca_lock);
+}
+#else                          // _WIN64 || WIN32
+/* work thread for uAT, uCM, CQ, and async events */
+void dapli_thread(void *arg)
+{
+       struct pollfd ufds[__FD_SETSIZE];
+       struct _ib_hca_transport *uhca[__FD_SETSIZE] = { NULL };
+       struct _ib_hca_transport *hca;
+       int ret, idx, fds;
+       char rbuf[2];
+
+       dapl_dbg_log(DAPL_DBG_TYPE_THREAD,
+                    " ib_thread(%d,0x%x): ENTER: pipe %d ucma %d\n",
+                    dapl_os_getpid(), g_ib_thread, g_ib_pipe[0],
+                    g_cm_events->fd);
+
+       /* Poll across pipe, CM, AT never changes */
+       dapl_os_lock(&g_hca_lock);
+       g_ib_thread_state = IB_THREAD_RUN;
+
+       ufds[0].fd = g_ib_pipe[0];      /* pipe */
+       ufds[0].events = POLLIN;
+       ufds[1].fd = g_cm_events->fd;   /* uCMA */
+       ufds[1].events = POLLIN;
+
+       while (g_ib_thread_state == IB_THREAD_RUN) {
+
+               /* build ufds after pipe and uCMA events */
+               ufds[0].revents = 0;
+               ufds[1].revents = 0;
+               idx = 1;
+
+               /*  Walk HCA list and setup async and CQ events */
+               if (!dapl_llist_is_empty(&g_hca_list))
+                       hca = dapl_llist_peek_head(&g_hca_list);
+               else
+                       hca = NULL;
+
+               while (hca) {
+
+                       /* uASYNC events */
+                       ufds[++idx].fd = hca->cm_id->verbs->async_fd;
+                       ufds[idx].events = POLLIN;
+                       ufds[idx].revents = 0;
+                       uhca[idx] = hca;
+
+                       dapl_dbg_log(DAPL_DBG_TYPE_THREAD,
+                                    " ib_thread(%d) poll_fd: hca[%d]=%p,"
+                                    " async=%d pipe=%d cm=%d \n",
+                                    dapl_os_getpid(), hca, ufds[idx - 1].fd,
+                                    ufds[0].fd, ufds[1].fd);
+
+                       hca = dapl_llist_next_entry(&g_hca_list,
+                                                   (DAPL_LLIST_ENTRY *)
+                                                   &hca->entry);
+               }
+
+               /* unlock, and setup poll */
+               fds = idx + 1;
+               dapl_os_unlock(&g_hca_lock);
+               ret = poll(ufds, fds, -1);
+               if (ret <= 0) {
+                       dapl_dbg_log(DAPL_DBG_TYPE_THREAD,
+                                    " ib_thread(%d): ERR %s poll\n",
+                                    dapl_os_getpid(), strerror(errno));
+                       dapl_os_lock(&g_hca_lock);
+                       continue;
+               }
+
+               dapl_dbg_log(DAPL_DBG_TYPE_THREAD,
+                            " ib_thread(%d) poll_event: "
+                            " async=0x%x pipe=0x%x cm=0x%x \n",
+                            dapl_os_getpid(), ufds[idx].revents,
+                            ufds[0].revents, ufds[1].revents);
+
+               /* uCMA events */
+               if (ufds[1].revents == POLLIN)
+                       dapli_cma_event_cb();
+
+               /* check and process ASYNC events, per device */
+               for (idx = 2; idx < fds; idx++) {
+                       if (ufds[idx].revents == POLLIN) {
+                               dapli_async_event_cb(uhca[idx]);
+                       }
+               }
+
+               /* check and process user events, PIPE */
+               if (ufds[0].revents == POLLIN) {
+                       if (read(g_ib_pipe[0], rbuf, 2) == -1)
+                               dapl_log(DAPL_DBG_TYPE_THREAD,
+                                        " cr_thread: pipe rd err= %s\n",
+                                        strerror(errno));
+
+                       /* cleanup any device on list marked for destroy */
+                       for (idx = 2; idx < fds; idx++) {
+                               if (uhca[idx] && uhca[idx]->destroy == 1) {
+                                       dapl_os_lock(&g_hca_lock);
+                                       dapl_llist_remove_entry(
+                                               &g_hca_list,
+                                               (DAPL_LLIST_ENTRY*)
+                                               &uhca[idx]->entry);
+                                       dapl_os_unlock(&g_hca_lock);
+                                       uhca[idx]->destroy = 2;
+                               }
+                       }
+               }
+               dapl_os_lock(&g_hca_lock);
+       }
+
+       dapl_dbg_log(DAPL_DBG_TYPE_THREAD, " ib_thread(%d) EXIT\n",
+                    dapl_os_getpid());
+       g_ib_thread_state = IB_THREAD_EXIT;
+       dapl_os_unlock(&g_hca_lock);
+}
+#endif
diff --git a/dapl/openib_common/cq.c b/dapl/openib_common/cq.c
new file mode 100644 (file)
index 0000000..74a5940
--- /dev/null
@@ -0,0 +1,491 @@
+/*
+ * Copyright (c) 2009 Intel Corporation.  All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+#include "openib_osd.h"
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_lmr_util.h"
+#include "dapl_evd_util.h"
+#include "dapl_ring_buffer_util.h"
+
+/*
+ * Map all verbs DTO completion codes to the DAT equivelent.
+ *
+ * Not returned by verbs: DAT_DTO_ERR_PARTIAL_PACKET
+ */
+static struct ib_status_map {
+       int ib_status;
+       DAT_DTO_COMPLETION_STATUS dat_status;
+} ib_status_map[] = {
+/* 00 */  {IBV_WC_SUCCESS, DAT_DTO_SUCCESS},
+/* 01 */  {IBV_WC_LOC_LEN_ERR, DAT_DTO_ERR_LOCAL_LENGTH},
+/* 02 */  {IBV_WC_LOC_QP_OP_ERR, DAT_DTO_ERR_LOCAL_EP},
+/* 03 */  {IBV_WC_LOC_EEC_OP_ERR, DAT_DTO_ERR_TRANSPORT},
+/* 04 */  {IBV_WC_LOC_PROT_ERR, DAT_DTO_ERR_LOCAL_PROTECTION},
+/* 05 */  {IBV_WC_WR_FLUSH_ERR, DAT_DTO_ERR_FLUSHED},
+/* 06 */  {IBV_WC_MW_BIND_ERR, DAT_RMR_OPERATION_FAILED},
+/* 07 */  {IBV_WC_BAD_RESP_ERR, DAT_DTO_ERR_BAD_RESPONSE},
+/* 08 */  {IBV_WC_LOC_ACCESS_ERR, DAT_DTO_ERR_LOCAL_PROTECTION},
+/* 09 */  {IBV_WC_REM_INV_REQ_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
+/* 10 */  {IBV_WC_REM_ACCESS_ERR, DAT_DTO_ERR_REMOTE_ACCESS},
+/* 11 */  {IBV_WC_REM_OP_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
+/* 12 */  {IBV_WC_RETRY_EXC_ERR, DAT_DTO_ERR_TRANSPORT},
+/* 13 */  {IBV_WC_RNR_RETRY_EXC_ERR, DAT_DTO_ERR_RECEIVER_NOT_READY},
+/* 14 */  {IBV_WC_LOC_RDD_VIOL_ERR, DAT_DTO_ERR_LOCAL_PROTECTION},
+/* 15 */  {IBV_WC_REM_INV_RD_REQ_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
+/* 16 */  {IBV_WC_REM_ABORT_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
+/* 17 */  {IBV_WC_INV_EECN_ERR, DAT_DTO_ERR_TRANSPORT},
+/* 18 */  {IBV_WC_INV_EEC_STATE_ERR, DAT_DTO_ERR_TRANSPORT},
+/* 19 */  {IBV_WC_FATAL_ERR, DAT_DTO_ERR_TRANSPORT},
+/* 20 */  {IBV_WC_RESP_TIMEOUT_ERR, DAT_DTO_ERR_RECEIVER_NOT_READY},
+/* 21 */  {IBV_WC_GENERAL_ERR, DAT_DTO_ERR_TRANSPORT},
+};
+
+/*
+ * dapls_ib_get_dto_status
+ *
+ * Return the DAT status of a DTO operation
+ *
+ * Input:
+ *     cqe_ptr         pointer to completion queue entry
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     Value from ib_status_map table above
+ */
+
+DAT_DTO_COMPLETION_STATUS
+dapls_ib_get_dto_status(IN ib_work_completion_t * cqe_ptr)
+{
+       uint32_t ib_status;
+       int i;
+
+       ib_status = DAPL_GET_CQE_STATUS(cqe_ptr);
+
+       /*
+        * Due to the implementation of verbs completion code, we need to
+        * search the table for the correct value rather than assuming
+        * linear distribution.
+        */
+       for (i = 0; i <= IBV_WC_GENERAL_ERR; i++) {
+               if (ib_status == ib_status_map[i].ib_status) {
+                       if (ib_status != IBV_WC_SUCCESS) {
+                               dapl_log(DAPL_DBG_TYPE_DTO_COMP_ERR,
+                                        " DTO completion ERROR: %d:"
+                                        " op %#x\n",
+                                        ib_status,
+                                        DAPL_GET_CQE_OPTYPE(cqe_ptr));
+                       }
+                       return ib_status_map[i].dat_status;
+               }
+       }
+
+       return DAT_DTO_FAILURE;
+}
+
+DAT_RETURN dapls_ib_get_async_event(IN ib_error_record_t * err_record,
+                                   OUT DAT_EVENT_NUMBER * async_event)
+{
+       DAT_RETURN dat_status = DAT_SUCCESS;
+       int err_code = err_record->event_type;
+
+       switch (err_code) {
+               /* OVERFLOW error */
+       case IBV_EVENT_CQ_ERR:
+               *async_event = DAT_ASYNC_ERROR_EVD_OVERFLOW;
+               break;
+               /* INTERNAL errors */
+       case IBV_EVENT_DEVICE_FATAL:
+               *async_event = DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR;
+               break;
+               /* CATASTROPHIC errors */
+       case IBV_EVENT_PORT_ERR:
+               *async_event = DAT_ASYNC_ERROR_IA_CATASTROPHIC;
+               break;
+               /* BROKEN QP error */
+       case IBV_EVENT_SQ_DRAINED:
+       case IBV_EVENT_QP_FATAL:
+       case IBV_EVENT_QP_REQ_ERR:
+       case IBV_EVENT_QP_ACCESS_ERR:
+               *async_event = DAT_ASYNC_ERROR_EP_BROKEN;
+               break;
+
+               /* connection completion */
+       case IBV_EVENT_COMM_EST:
+               *async_event = DAT_CONNECTION_EVENT_ESTABLISHED;
+               break;
+
+               /* TODO: process HW state changes */
+       case IBV_EVENT_PATH_MIG:
+       case IBV_EVENT_PATH_MIG_ERR:
+       case IBV_EVENT_PORT_ACTIVE:
+       case IBV_EVENT_LID_CHANGE:
+       case IBV_EVENT_PKEY_CHANGE:
+       case IBV_EVENT_SM_CHANGE:
+       default:
+               dat_status = DAT_ERROR(DAT_NOT_IMPLEMENTED, 0);
+       }
+       return dat_status;
+}
+
+/*
+ * dapl_ib_cq_alloc
+ *
+ * Alloc a CQ
+ *
+ * Input:
+ *     ia_handle               IA handle
+ *     evd_ptr                 pointer to EVD struct
+ *     cqlen                   minimum QLen
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_cq_alloc(IN DAPL_IA * ia_ptr,
+                 IN DAPL_EVD * evd_ptr, IN DAT_COUNT * cqlen)
+{
+       struct ibv_comp_channel *channel = evd_ptr->cq_wait_obj_handle;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                    "dapls_ib_cq_alloc: evd %p cqlen=%d \n", evd_ptr, *cqlen);
+
+       /* Call IB verbs to create CQ */
+       evd_ptr->ib_cq_handle = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
+                                             *cqlen, evd_ptr, channel, 0);
+
+       if (evd_ptr->ib_cq_handle == IB_INVALID_HANDLE)
+               return DAT_INSUFFICIENT_RESOURCES;
+
+       /* arm cq for events */
+       dapls_set_cq_notify(ia_ptr, evd_ptr);
+
+       /* update with returned cq entry size */
+       *cqlen = evd_ptr->ib_cq_handle->cqe;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                    "dapls_ib_cq_alloc: new_cq %p cqlen=%d \n",
+                    evd_ptr->ib_cq_handle, *cqlen);
+
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_cq_resize
+ *
+ * Alloc a CQ
+ *
+ * Input:
+ *     ia_handle               IA handle
+ *     evd_ptr                 pointer to EVD struct
+ *     cqlen                   minimum QLen
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INVALID_PARAMETER
+ *
+ */
+DAT_RETURN
+dapls_ib_cq_resize(IN DAPL_IA * ia_ptr,
+                  IN DAPL_EVD * evd_ptr, IN DAT_COUNT * cqlen)
+{
+       ib_cq_handle_t new_cq;
+       struct ibv_comp_channel *channel = evd_ptr->cq_wait_obj_handle;
+
+       /* IB verbs DOES support resize. REDO THIS.
+        * Try to re-create CQ
+        * with new size. Can only be done if QP is not attached. 
+        * destroy EBUSY == QP still attached.
+        */
+
+       /* Call IB verbs to create CQ */
+       new_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle, *cqlen,
+                              evd_ptr, channel, 0);
+
+       if (new_cq == IB_INVALID_HANDLE)
+               return DAT_INSUFFICIENT_RESOURCES;
+
+       /* destroy the original and replace if successful */
+       if (ibv_destroy_cq(evd_ptr->ib_cq_handle)) {
+               ibv_destroy_cq(new_cq);
+               return (dapl_convert_errno(errno, "resize_cq"));
+       }
+
+       /* update EVD with new cq handle and size */
+       evd_ptr->ib_cq_handle = new_cq;
+       *cqlen = new_cq->cqe;
+
+       /* arm cq for events */
+       dapls_set_cq_notify(ia_ptr, evd_ptr);
+
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_cq_free
+ *
+ * destroy a CQ
+ *
+ * Input:
+ *     ia_handle               IA handle
+ *     evd_ptr                 pointer to EVD struct
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INVALID_PARAMETER
+ *
+ */
+DAT_RETURN dapls_ib_cq_free(IN DAPL_IA * ia_ptr, IN DAPL_EVD * evd_ptr)
+{
+       DAT_EVENT event;
+       ib_work_completion_t wc;
+
+       if (evd_ptr->ib_cq_handle != IB_INVALID_HANDLE) {
+               /* pull off CQ and EVD entries and toss */
+               while (ibv_poll_cq(evd_ptr->ib_cq_handle, 1, &wc) == 1) ;
+               while (dapl_evd_dequeue(evd_ptr, &event) == DAT_SUCCESS) ;
+               if (ibv_destroy_cq(evd_ptr->ib_cq_handle))
+                       return (dapl_convert_errno(errno, "ibv_destroy_cq"));
+               evd_ptr->ib_cq_handle = IB_INVALID_HANDLE;
+       }
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapls_set_cq_notify
+ *
+ * Set the CQ notification for next
+ *
+ * Input:
+ *     hca_handl               hca handle
+ *     DAPL_EVD                evd handle
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     dapl_convert_errno 
+ */
+DAT_RETURN dapls_set_cq_notify(IN DAPL_IA * ia_ptr, IN DAPL_EVD * evd_ptr)
+{
+       if (ibv_req_notify_cq(evd_ptr->ib_cq_handle, 0))
+               return (dapl_convert_errno(errno, "notify_cq"));
+       else
+               return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_completion_notify
+ *
+ * Set the CQ notification type
+ *
+ * Input:
+ *     hca_handl               hca handle
+ *     evd_ptr                 evd handle
+ *     type                    notification type
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     dapl_convert_errno
+ */
+DAT_RETURN dapls_ib_completion_notify(IN ib_hca_handle_t hca_handle,
+                                     IN DAPL_EVD * evd_ptr,
+                                     IN ib_notification_type_t type)
+{
+       if (ibv_req_notify_cq(evd_ptr->ib_cq_handle, type))
+               return (dapl_convert_errno(errno, "notify_cq_type"));
+       else
+               return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_completion_poll
+ *
+ * CQ poll for completions
+ *
+ * Input:
+ *     hca_handl               hca handle
+ *     evd_ptr                 evd handle
+ *     wc_ptr                  work completion
+ *
+ * Output:
+ *     none
+ *
+ * Returns: 
+ *     DAT_SUCCESS
+ *     DAT_QUEUE_EMPTY
+ *     
+ */
+DAT_RETURN dapls_ib_completion_poll(IN DAPL_HCA * hca_ptr,
+                                   IN DAPL_EVD * evd_ptr,
+                                   IN ib_work_completion_t * wc_ptr)
+{
+       int ret;
+
+       ret = ibv_poll_cq(evd_ptr->ib_cq_handle, 1, wc_ptr);
+       if (ret == 1)
+               return DAT_SUCCESS;
+
+       return DAT_QUEUE_EMPTY;
+}
+
+/* NEW common wait objects for providers with direct CQ wait objects */
+DAT_RETURN
+dapls_ib_wait_object_create(IN DAPL_EVD * evd_ptr,
+                           IN ib_wait_obj_handle_t * p_cq_wait_obj_handle)
+{
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                    " cq_object_create: (%p,%p)\n",
+                    evd_ptr, p_cq_wait_obj_handle);
+
+       /* set cq_wait object to evd_ptr */
+       *p_cq_wait_obj_handle =
+           ibv_create_comp_channel(evd_ptr->header.owner_ia->hca_ptr->
+                                   ib_hca_handle);
+
+       return DAT_SUCCESS;
+}
+
+DAT_RETURN
+dapls_ib_wait_object_destroy(IN ib_wait_obj_handle_t p_cq_wait_obj_handle)
+{
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                    " cq_object_destroy: wait_obj=%p\n", p_cq_wait_obj_handle);
+
+       ibv_destroy_comp_channel(p_cq_wait_obj_handle);
+
+       return DAT_SUCCESS;
+}
+
+DAT_RETURN
+dapls_ib_wait_object_wakeup(IN ib_wait_obj_handle_t p_cq_wait_obj_handle)
+{
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                    " cq_object_wakeup: wait_obj=%p\n", p_cq_wait_obj_handle);
+
+       /* no wake up mechanism */
+       return DAT_SUCCESS;
+}
+
+#if defined(_WIN32) || defined(_WIN64)
+DAT_RETURN
+dapls_ib_wait_object_wait(IN ib_wait_obj_handle_t p_cq_wait_obj_handle,
+                         IN uint32_t timeout)
+{
+       struct dapl_evd *evd_ptr;
+       struct ibv_cq *ibv_cq = NULL;
+       int status = 0;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                    " cq_object_wait: CQ channel %p time %d\n",
+                    p_cq_wait_obj_handle, timeout);
+
+       /* uDAPL timeout values in usecs */
+       p_cq_wait_obj_handle->comp_channel.Milliseconds = timeout / 1000;
+
+       /* returned event */
+       status = ibv_get_cq_event(p_cq_wait_obj_handle, &ibv_cq,
+                                 (void *)&evd_ptr);
+       if (status == 0) {
+               ibv_ack_cq_events(ibv_cq, 1);
+       }
+
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                    " cq_object_wait: RET evd %p ibv_cq %p %s\n",
+                    evd_ptr, ibv_cq, strerror(errno));
+
+       return (dapl_convert_errno(status, "cq_wait_object_wait"));
+}
+#else                  //_WIN32 || _WIN64
+DAT_RETURN
+dapls_ib_wait_object_wait(IN ib_wait_obj_handle_t p_cq_wait_obj_handle,
+                         IN uint32_t timeout)
+{
+       struct dapl_evd *evd_ptr;
+       struct ibv_cq *ibv_cq = NULL;
+       int status = 0;
+       int timeout_ms = -1;
+       struct pollfd cq_fd = {
+               .fd = p_cq_wait_obj_handle->fd,
+               .events = POLLIN,
+               .revents = 0
+       };
+
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                    " cq_object_wait: CQ channel %p time %d\n",
+                    p_cq_wait_obj_handle, timeout);
+
+       /* uDAPL timeout values in usecs */
+       if (timeout != DAT_TIMEOUT_INFINITE)
+               timeout_ms = timeout / 1000;
+
+       status = poll(&cq_fd, 1, timeout_ms);
+
+       /* returned event */
+       if (status > 0) {
+               if (!ibv_get_cq_event(p_cq_wait_obj_handle,
+                                     &ibv_cq, (void *)&evd_ptr)) {
+                       ibv_ack_cq_events(ibv_cq, 1);
+               }
+               status = 0;
+
+               /* timeout */
+       } else if (status == 0)
+               status = ETIMEDOUT;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                    " cq_object_wait: RET evd %p ibv_cq %p %s\n",
+                    evd_ptr, ibv_cq, strerror(errno));
+
+       return (dapl_convert_errno(status, "cq_wait_object_wait"));
+
+}
+#endif                         //_WIN32 || _WIN64
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ *  tab-width: 8
+ * End:
+ */
diff --git a/dapl/openib_common/dapl_ib_common.h b/dapl/openib_common/dapl_ib_common.h
new file mode 100644 (file)
index 0000000..b61e50e
--- /dev/null
@@ -0,0 +1,299 @@
+/*
+ * Copyright (c) 2009 Intel Corporation.  All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/* 
+ * Definitions common to all OpenIB providers, cma, scm, ucm 
+ */
+
+#ifndef _DAPL_IB_COMMON_H_
+#define _DAPL_IB_COMMON_H_
+
+#include <infiniband/verbs.h>
+
+#ifdef DAT_EXTENSIONS
+#include <dat2/dat_ib_extensions.h>
+#endif
+
+#ifndef __cplusplus
+#define false 0
+#define true  1
+#endif /*__cplusplus */
+
+/* Typedefs to map common DAPL provider types to IB verbs */
+typedef        struct ibv_qp           *ib_qp_handle_t;
+typedef        struct ibv_cq           *ib_cq_handle_t;
+typedef        struct ibv_pd           *ib_pd_handle_t;
+typedef        struct ibv_mr           *ib_mr_handle_t;
+typedef        struct ibv_mw           *ib_mw_handle_t;
+typedef        struct ibv_wc           ib_work_completion_t;
+
+/* HCA context type maps to IB verbs  */
+typedef        struct ibv_context      *ib_hca_handle_t;
+typedef ib_hca_handle_t                dapl_ibal_ca_t;
+
+/* QP info to exchange, wire protocol version for these CM's */
+#define DCM_VER 4
+typedef struct _ib_qp_cm
+{ 
+       uint16_t                ver;
+       uint16_t                rej;
+       uint16_t                lid;
+       uint16_t                port;
+       uint32_t                qpn;
+       uint32_t                p_size;
+       union ibv_gid           gid;
+       DAT_SOCK_ADDR6          ia_address;
+       uint16_t                qp_type; 
+} ib_qp_cm_t;
+
+/* CM events */
+typedef enum {
+       IB_CME_CONNECTED,
+       IB_CME_DISCONNECTED,
+       IB_CME_DISCONNECTED_ON_LINK_DOWN,
+       IB_CME_CONNECTION_REQUEST_PENDING,
+       IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,
+       IB_CME_CONNECTION_REQUEST_ACKED,
+       IB_CME_DESTINATION_REJECT,
+       IB_CME_DESTINATION_REJECT_PRIVATE_DATA,
+       IB_CME_DESTINATION_UNREACHABLE,
+       IB_CME_TOO_MANY_CONNECTION_REQUESTS,
+       IB_CME_LOCAL_FAILURE,
+       IB_CME_BROKEN,
+       IB_CME_TIMEOUT
+} ib_cm_events_t;
+
+/* Operation and state mappings */
+typedef int ib_send_op_type_t;
+typedef        struct  ibv_sge         ib_data_segment_t;
+typedef enum   ibv_qp_state    ib_qp_state_t;
+typedef        enum    ibv_event_type  ib_async_event_type;
+typedef struct ibv_async_event ib_error_record_t;      
+
+/* CQ notifications */
+typedef enum
+{
+       IB_NOTIFY_ON_NEXT_COMP,
+       IB_NOTIFY_ON_SOLIC_COMP
+
+} ib_notification_type_t;
+
+/* other mappings */
+typedef int                    ib_bool_t;
+typedef union ibv_gid          GID;
+typedef char                   *IB_HCA_NAME;
+typedef uint16_t               ib_hca_port_t;
+typedef uint32_t               ib_comp_handle_t;
+
+typedef struct ibv_comp_channel *ib_wait_obj_handle_t;
+
+/* Definitions */
+#define IB_INVALID_HANDLE      NULL
+
+/* inline send rdma threshold */
+#define        INLINE_SEND_IWARP_DEFAULT       64
+#define        INLINE_SEND_IB_DEFAULT          200
+
+/* qkey for UD QP's */
+#define DAT_UD_QKEY    0x78654321
+
+/* DTO OPs, ordered for DAPL ENUM definitions */
+#define OP_RDMA_WRITE           IBV_WR_RDMA_WRITE
+#define OP_RDMA_WRITE_IMM       IBV_WR_RDMA_WRITE_WITH_IMM
+#define OP_SEND                 IBV_WR_SEND
+#define OP_SEND_IMM             IBV_WR_SEND_WITH_IMM
+#define OP_RDMA_READ            IBV_WR_RDMA_READ
+#define OP_COMP_AND_SWAP        IBV_WR_ATOMIC_CMP_AND_SWP
+#define OP_FETCH_AND_ADD        IBV_WR_ATOMIC_FETCH_AND_ADD
+#define OP_RECEIVE              7   /* internal op */
+#define OP_RECEIVE_IMM         8   /* rdma write with immed, internel op */
+#define OP_RECEIVE_MSG_IMM     9   /* recv msg with immed, internel op */
+#define OP_BIND_MW              10   /* internal op */
+#define OP_SEND_UD              11  /* internal op */
+#define OP_RECV_UD              12  /* internal op */
+#define OP_INVALID             0xff
+
+/* Definitions to map QP state */
+#define IB_QP_STATE_RESET      IBV_QPS_RESET
+#define IB_QP_STATE_INIT       IBV_QPS_INIT
+#define IB_QP_STATE_RTR                IBV_QPS_RTR
+#define IB_QP_STATE_RTS                IBV_QPS_RTS
+#define IB_QP_STATE_SQD                IBV_QPS_SQD
+#define IB_QP_STATE_SQE                IBV_QPS_SQE
+#define IB_QP_STATE_ERROR      IBV_QPS_ERR
+
+/* Definitions for ibverbs/mthca return codes, should be defined in verbs.h */
+/* some are errno and some are -n values */
+
+/**
+ * ibv_get_device_name - Return kernel device name
+ * ibv_get_device_guid - Return device's node GUID
+ * ibv_open_device - Return ibv_context or NULL
+ * ibv_close_device - Return 0, (errno?)
+ * ibv_get_async_event - Return 0, -1 
+ * ibv_alloc_pd - Return ibv_pd, NULL
+ * ibv_dealloc_pd - Return 0, errno 
+ * ibv_reg_mr - Return ibv_mr, NULL
+ * ibv_dereg_mr - Return 0, errno
+ * ibv_create_cq - Return ibv_cq, NULL
+ * ibv_destroy_cq - Return 0, errno
+ * ibv_get_cq_event - Return 0 & ibv_cq/context, int
+ * ibv_poll_cq - Return n & ibv_wc, 0 ok, -1 empty, -2 error 
+ * ibv_req_notify_cq - Return 0 (void?)
+ * ibv_create_qp - Return ibv_qp, NULL
+ * ibv_modify_qp - Return 0, errno
+ * ibv_destroy_qp - Return 0, errno
+ * ibv_post_send - Return 0, -1 & bad_wr
+ * ibv_post_recv - Return 0, -1 & bad_wr 
+ */
+
+/* async handler for DTO, CQ, QP, and unafiliated */
+typedef void (*ib_async_dto_handler_t)(
+    IN    ib_hca_handle_t    ib_hca_handle,
+    IN    ib_error_record_t  *err_code,
+    IN    void               *context);
+
+typedef void (*ib_async_cq_handler_t)(
+    IN    ib_hca_handle_t    ib_hca_handle,
+    IN    ib_cq_handle_t     ib_cq_handle,
+    IN    ib_error_record_t  *err_code,
+    IN    void               *context);
+
+typedef void (*ib_async_qp_handler_t)(
+    IN    ib_hca_handle_t    ib_hca_handle,
+    IN    ib_qp_handle_t     ib_qp_handle,
+    IN    ib_error_record_t  *err_code,
+    IN    void               *context);
+
+typedef void (*ib_async_handler_t)(
+    IN    ib_hca_handle_t    ib_hca_handle,
+    IN    ib_error_record_t  *err_code,
+    IN    void               *context);
+
+typedef enum
+{
+       IB_THREAD_INIT,
+       IB_THREAD_CREATE,
+       IB_THREAD_RUN,
+       IB_THREAD_CANCEL,
+       IB_THREAD_EXIT
+
+} ib_thread_state_t;
+
+
+/* provider specfic fields for shared memory support */
+typedef uint32_t ib_shm_transport_t;
+
+/* prototypes */
+int32_t        dapls_ib_init(void);
+int32_t        dapls_ib_release(void);
+enum ibv_mtu dapl_ib_mtu(int mtu);
+char *dapl_ib_mtu_str(enum ibv_mtu mtu);
+DAT_RETURN getlocalipaddr(DAT_SOCK_ADDR *addr, int addr_len);
+
+/* inline functions */
+STATIC _INLINE_ IB_HCA_NAME dapl_ib_convert_name (IN char *name)
+{
+       /* use ascii; name of local device */
+       return dapl_os_strdup(name);
+}
+
+STATIC _INLINE_ void dapl_ib_release_name (IN IB_HCA_NAME name)
+{
+       return;
+}
+
+/*
+ *  Convert errno to DAT_RETURN values
+ */
+STATIC _INLINE_ DAT_RETURN 
+dapl_convert_errno( IN int err, IN const char *str )
+{
+    if (!err)  return DAT_SUCCESS;
+       
+#if DAPL_DBG
+    if ((err != EAGAIN) && (err != ETIMEDOUT))
+       dapl_dbg_log (DAPL_DBG_TYPE_ERR," %s %s\n", str, strerror(err));
+#endif 
+
+    switch( err )
+    {
+       case EOVERFLOW  : return DAT_LENGTH_ERROR;
+       case EACCES     : return DAT_PRIVILEGES_VIOLATION;
+       case EPERM      : return DAT_PROTECTION_VIOLATION;                
+       case EINVAL     : return DAT_INVALID_HANDLE;
+       case EISCONN    : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_CONNECTED;
+       case ECONNREFUSED : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_NOTREADY;
+       case ETIMEDOUT  : return DAT_TIMEOUT_EXPIRED;
+       case ENETUNREACH: return DAT_INVALID_ADDRESS | DAT_INVALID_ADDRESS_UNREACHABLE;
+       case EADDRINUSE : return DAT_CONN_QUAL_IN_USE;
+       case EALREADY   : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_ACTCONNPENDING;
+       case ENOMEM     : return DAT_INSUFFICIENT_RESOURCES;
+        case EAGAIN    : return DAT_QUEUE_EMPTY;
+       case EINTR      : return DAT_INTERRUPTED_CALL;
+       case EAFNOSUPPORT : return DAT_INVALID_ADDRESS | DAT_INVALID_ADDRESS_MALFORMED;
+       case EFAULT     : 
+       default         : return DAT_INTERNAL_ERROR;
+    }
+ }
+
+typedef enum dapl_cm_state 
+{
+       DCM_INIT,
+       DCM_LISTEN,
+       DCM_CONN_PENDING,
+       DCM_RTU_PENDING,
+       DCM_ACCEPTING,
+       DCM_ACCEPTING_DATA,
+       DCM_ACCEPTED,
+       DCM_REJECTED,
+       DCM_CONNECTED,
+       DCM_RELEASED,
+       DCM_DISCONNECTED,
+       DCM_DESTROY
+} DAPL_CM_STATE;
+
+STATIC _INLINE_ char * dapl_cm_state_str(IN int st)
+{
+       static char *state[] = {
+               "CM_INIT",
+               "CM_LISTEN",
+               "CM_CONN_PENDING",
+               "CM_RTU_PENDING",
+               "CM_ACCEPTING",
+               "CM_ACCEPTING_DATA",
+               "CM_ACCEPTED",
+               "CM_REJECTED",
+               "CM_CONNECTED",
+               "CM_RELEASED",
+               "CM_DISCONNECTED",
+               "CM_DESTROY"
+        };
+        return ((st < 0 || st > 11) ? "Invalid CM state?" : state[st]);
+}
+
+#endif /*  _DAPL_IB_COMMON_H_ */
diff --git a/dapl/openib_common/dapl_ib_dto.h b/dapl/openib_common/dapl_ib_dto.h
new file mode 100644 (file)
index 0000000..e6c03b2
--- /dev/null
@@ -0,0 +1,504 @@
+/*
+ * Copyright (c) 2009 Intel Corporation.  All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+#ifndef _DAPL_IB_DTO_H_
+#define _DAPL_IB_DTO_H_
+
+#include "dapl_ib_util.h"
+
+#ifdef DAT_EXTENSIONS
+#include <dat2/dat_ib_extensions.h>
+#endif
+
+STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p);
+
+#define CQE_WR_TYPE_UD(id) \
+       (((DAPL_COOKIE *)(uintptr_t)id)->ep->qp_handle->qp_type == IBV_QPT_UD)
+
+/*
+ * dapls_ib_post_recv
+ *
+ * Provider specific Post RECV function
+ */
+STATIC _INLINE_ DAT_RETURN 
+dapls_ib_post_recv (
+       IN  DAPL_EP             *ep_ptr,
+       IN  DAPL_COOKIE         *cookie,
+       IN  DAT_COUNT           segments,
+       IN  DAT_LMR_TRIPLET     *local_iov )
+{
+       struct ibv_recv_wr wr;
+       struct ibv_recv_wr *bad_wr;
+       ib_data_segment_t *ds = (ib_data_segment_t *)local_iov;
+       DAT_COUNT i, total_len;
+       int ret;
+       
+       dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                    " post_rcv: ep %p cookie %p segs %d l_iov %p\n",
+                    ep_ptr, cookie, segments, local_iov);
+
+       /* setup work request */
+       total_len = 0;
+       wr.next = 0;
+       wr.num_sge = segments;
+       wr.wr_id = (uint64_t)(uintptr_t)cookie;
+       wr.sg_list = ds;
+
+       if (cookie != NULL) {
+               for (i = 0; i < segments; i++) {
+                       dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+                                    " post_rcv: l_key 0x%x va %p len %d\n",
+                                    ds->lkey, ds->addr, ds->length );
+                       total_len += ds->length;
+                       ds++;
+               }
+               cookie->val.dto.size = total_len;
+       }
+
+       ret = ibv_post_recv(ep_ptr->qp_handle, &wr, &bad_wr);
+       
+       if (ret)
+               return(dapl_convert_errno(errno,"ibv_recv"));
+
+       DAPL_CNTR(ep_ptr, DCNT_EP_POST_RECV);
+       DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_RECV_DATA, total_len);
+
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_post_send
+ *
+ * Provider specific Post SEND function
+ */
+STATIC _INLINE_ DAT_RETURN 
+dapls_ib_post_send (
+       IN  DAPL_EP                     *ep_ptr,
+       IN  ib_send_op_type_t           op_type,
+       IN  DAPL_COOKIE                 *cookie,
+       IN  DAT_COUNT                   segments,
+       IN  DAT_LMR_TRIPLET             *local_iov,
+       IN  const DAT_RMR_TRIPLET       *remote_iov,
+       IN  DAT_COMPLETION_FLAGS        completion_flags)
+{
+       struct ibv_send_wr wr;
+       struct ibv_send_wr *bad_wr;
+       ib_data_segment_t *ds = (ib_data_segment_t *)local_iov;
+       ib_hca_transport_t *ibt_ptr = 
+               &ep_ptr->header.owner_ia->hca_ptr->ib_trans;
+       DAT_COUNT i, total_len;
+       int ret;
+       
+       dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                    " post_snd: ep %p op %d ck %p sgs",
+                    "%d l_iov %p r_iov %p f %d\n",
+                    ep_ptr, op_type, cookie, segments, local_iov, 
+                    remote_iov, completion_flags);
+
+#ifdef DAT_EXTENSIONS  
+       if (ep_ptr->qp_handle->qp_type != IBV_QPT_RC)
+               return(DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP));
+#endif
+       /* setup the work request */
+       wr.next = 0;
+       wr.opcode = op_type;
+       wr.num_sge = segments;
+       wr.send_flags = 0;
+       wr.wr_id = (uint64_t)(uintptr_t)cookie;
+       wr.sg_list = ds;
+       total_len = 0;
+
+       if (cookie != NULL) {
+               for (i = 0; i < segments; i++ ) {
+                       dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+                                    " post_snd: lkey 0x%x va %p len %d\n",
+                                    ds->lkey, ds->addr, ds->length );
+                       total_len += ds->length;
+                       ds++;
+               }
+               cookie->val.dto.size = total_len;
+       }
+
+       if (wr.num_sge && 
+           (op_type == OP_RDMA_WRITE || op_type == OP_RDMA_READ)) {
+               wr.wr.rdma.remote_addr = remote_iov->virtual_address;
+               wr.wr.rdma.rkey = remote_iov->rmr_context;
+               dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+                            " post_snd_rdma: rkey 0x%x va %#016Lx\n",
+                            wr.wr.rdma.rkey, wr.wr.rdma.remote_addr);
+       }
+
+
+       /* inline data for send or write ops */
+       if ((total_len <= ibt_ptr->max_inline_send) && 
+          ((op_type == OP_SEND) || (op_type == OP_RDMA_WRITE))) 
+               wr.send_flags |= IBV_SEND_INLINE;
+       
+       /* set completion flags in work request */
+       wr.send_flags |= (DAT_COMPLETION_SUPPRESS_FLAG & 
+                               completion_flags) ? 0 : IBV_SEND_SIGNALED;
+       wr.send_flags |= (DAT_COMPLETION_BARRIER_FENCE_FLAG & 
+                               completion_flags) ? IBV_SEND_FENCE : 0;
+       wr.send_flags |= (DAT_COMPLETION_SOLICITED_WAIT_FLAG & 
+                               completion_flags) ? IBV_SEND_SOLICITED : 0;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+                    " post_snd: op 0x%x flags 0x%x sglist %p, %d\n", 
+                    wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge);
+
+       ret = ibv_post_send(ep_ptr->qp_handle, &wr, &bad_wr);
+
+       if (ret)
+               return(dapl_convert_errno(errno,"ibv_send"));
+
+#ifdef DAPL_COUNTERS
+       switch (op_type) {
+       case OP_SEND:
+               DAPL_CNTR(ep_ptr, DCNT_EP_POST_SEND);
+               DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_SEND_DATA,total_len);
+               break;
+       case OP_RDMA_WRITE:
+               DAPL_CNTR(ep_ptr, DCNT_EP_POST_WRITE);
+               DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_WRITE_DATA,total_len);
+               break;  
+       case OP_RDMA_READ:
+               DAPL_CNTR(ep_ptr, DCNT_EP_POST_READ);
+               DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_READ_DATA,total_len);
+               break;
+       default:
+               break;
+       }
+#endif /* DAPL_COUNTERS */
+
+       dapl_dbg_log(DAPL_DBG_TYPE_EP," post_snd: returned\n");
+       return DAT_SUCCESS;
+}
+
+/* map Work Completions to DAPL WR operations */
+STATIC _INLINE_ DAT_DTOS dapls_cqe_dtos_opcode(ib_work_completion_t *cqe_p)
+{
+       switch (cqe_p->opcode) {
+
+       case IBV_WC_SEND:
+#ifdef DAT_EXTENSIONS
+               if (CQE_WR_TYPE_UD(cqe_p->wr_id))
+                       return (DAT_IB_DTO_SEND_UD);
+               else
+#endif                 
+               return (DAT_DTO_SEND);
+       case IBV_WC_RDMA_READ:
+               return (DAT_DTO_RDMA_READ);
+       case IBV_WC_BIND_MW:
+               return (DAT_DTO_BIND_MW);
+#ifdef DAT_EXTENSIONS
+       case IBV_WC_RDMA_WRITE:
+               if (cqe_p->wc_flags & IBV_WC_WITH_IMM)
+                       return (DAT_IB_DTO_RDMA_WRITE_IMMED);
+               else
+                       return (DAT_DTO_RDMA_WRITE);
+       case IBV_WC_COMP_SWAP:
+               return (DAT_IB_DTO_CMP_SWAP);
+       case IBV_WC_FETCH_ADD:
+               return (DAT_IB_DTO_FETCH_ADD);
+       case IBV_WC_RECV_RDMA_WITH_IMM:
+               return (DAT_IB_DTO_RECV_IMMED);
+#else
+       case IBV_WC_RDMA_WRITE:
+               return (DAT_DTO_RDMA_WRITE);
+#endif
+       case IBV_WC_RECV:
+#ifdef DAT_EXTENSIONS
+               if (CQE_WR_TYPE_UD(cqe_p->wr_id)) 
+                       return (DAT_IB_DTO_RECV_UD);
+               else if (cqe_p->wc_flags & IBV_WC_WITH_IMM)
+                       return (DAT_IB_DTO_RECV_MSG_IMMED);
+               else
+#endif 
+               return (DAT_DTO_RECEIVE);
+       default:
+               return (0xff);
+       }
+}
+#define DAPL_GET_CQE_DTOS_OPTYPE(cqe_p) dapls_cqe_dtos_opcode(cqe_p)
+
+
+#ifdef DAT_EXTENSIONS
+/*
+ * dapls_ib_post_ext_send
+ *
+ * Provider specific extended Post SEND function for atomics
+ *     OP_COMP_AND_SWAP and OP_FETCH_AND_ADD
+ */
+STATIC _INLINE_ DAT_RETURN 
+dapls_ib_post_ext_send (
+       IN  DAPL_EP                     *ep_ptr,
+       IN  ib_send_op_type_t           op_type,
+       IN  DAPL_COOKIE                 *cookie,
+       IN  DAT_COUNT                   segments,
+       IN  DAT_LMR_TRIPLET             *local_iov,
+       IN  const DAT_RMR_TRIPLET       *remote_iov,
+       IN  DAT_UINT32                  immed_data,
+       IN  DAT_UINT64                  compare_add,
+       IN  DAT_UINT64                  swap,
+       IN  DAT_COMPLETION_FLAGS        completion_flags,
+       IN  DAT_IB_ADDR_HANDLE          *remote_ah)
+{
+       struct ibv_send_wr wr;
+       struct ibv_send_wr *bad_wr;
+       ib_data_segment_t *ds = (ib_data_segment_t *)local_iov;
+       DAT_COUNT i, total_len;
+       int ret;
+       
+       dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                    " post_ext_snd: ep %p op %d ck %p sgs",
+                    "%d l_iov %p r_iov %p f %d\n",
+                    ep_ptr, op_type, cookie, segments, local_iov, 
+                    remote_iov, completion_flags, remote_ah);
+
+       /* setup the work request */
+       wr.next = 0;
+       wr.opcode = op_type;
+       wr.num_sge = segments;
+       wr.send_flags = 0;
+       wr.wr_id = (uint64_t)(uintptr_t)cookie;
+       wr.sg_list = ds;
+       total_len = 0;
+
+       if (cookie != NULL) {
+               for (i = 0; i < segments; i++ ) {
+                       dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+                                    " post_snd: lkey 0x%x va %p len %d\n",
+                                    ds->lkey, ds->addr, ds->length );
+                       total_len += ds->length;
+                       ds++;
+               }
+               cookie->val.dto.size = total_len;
+       }
+
+       switch (op_type) {
+       case OP_RDMA_WRITE_IMM:
+               /* OP_RDMA_WRITE)IMMED has direct IB wr_type mapping */
+               dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+                            " post_ext: rkey 0x%x va %#016Lx immed=0x%x\n",
+                            remote_iov?remote_iov->rmr_context:0, 
+                            remote_iov?remote_iov->virtual_address:0,
+                            immed_data);
+
+               wr.imm_data = immed_data;
+               if (wr.num_sge) {
+                       wr.wr.rdma.remote_addr = remote_iov->virtual_address;
+                       wr.wr.rdma.rkey = remote_iov->rmr_context;
+               }
+               break;
+       case OP_COMP_AND_SWAP:
+               /* OP_COMP_AND_SWAP has direct IB wr_type mapping */
+               dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+                            " post_ext: OP_COMP_AND_SWAP=%lx,"
+                            "%lx rkey 0x%x va %#016Lx\n",
+                            compare_add, swap, remote_iov->rmr_context,
+                            remote_iov->virtual_address);
+               
+               wr.wr.atomic.compare_add = compare_add;
+               wr.wr.atomic.swap = swap;
+               wr.wr.atomic.remote_addr = remote_iov->virtual_address;
+               wr.wr.atomic.rkey = remote_iov->rmr_context;
+               break;
+       case OP_FETCH_AND_ADD:
+               /* OP_FETCH_AND_ADD has direct IB wr_type mapping */
+               dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+                            " post_ext: OP_FETCH_AND_ADD=%lx,"
+                            "%lx rkey 0x%x va %#016Lx\n",
+                            compare_add, remote_iov->rmr_context,
+                            remote_iov->virtual_address);
+
+               wr.wr.atomic.compare_add = compare_add;
+               wr.wr.atomic.remote_addr = remote_iov->virtual_address;
+               wr.wr.atomic.rkey = remote_iov->rmr_context;
+               break;
+       case OP_SEND_UD:
+               /* post must be on EP with service_type of UD */
+               if (ep_ptr->qp_handle->qp_type != IBV_QPT_UD)
+                       return(DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP));
+
+               dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+                            " post_ext: OP_SEND_UD ah=%p"
+                            " qp_num=0x%x\n",
+                            remote_ah, remote_ah->qpn);
+               
+               wr.opcode = OP_SEND;
+               wr.wr.ud.ah = remote_ah->ah;
+               wr.wr.ud.remote_qpn = remote_ah->qpn;
+               wr.wr.ud.remote_qkey = DAT_UD_QKEY;
+               break;
+       default:
+               break;
+       }
+
+       /* set completion flags in work request */
+       wr.send_flags |= (DAT_COMPLETION_SUPPRESS_FLAG & 
+                               completion_flags) ? 0 : IBV_SEND_SIGNALED;
+       wr.send_flags |= (DAT_COMPLETION_BARRIER_FENCE_FLAG & 
+                               completion_flags) ? IBV_SEND_FENCE : 0;
+       wr.send_flags |= (DAT_COMPLETION_SOLICITED_WAIT_FLAG & 
+                               completion_flags) ? IBV_SEND_SOLICITED : 0;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+                    " post_snd: op 0x%x flags 0x%x sglist %p, %d\n", 
+                    wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge);
+
+       ret = ibv_post_send(ep_ptr->qp_handle, &wr, &bad_wr);
+
+       if (ret)
+               return( dapl_convert_errno(errno,"ibv_send") );
+       
+#ifdef DAPL_COUNTERS
+       switch (op_type) {
+       case OP_RDMA_WRITE_IMM:
+               DAPL_CNTR(ep_ptr, DCNT_EP_POST_WRITE_IMM);
+               DAPL_CNTR_DATA(ep_ptr, 
+                              DCNT_EP_POST_WRITE_IMM_DATA, total_len);
+               break;
+       case OP_COMP_AND_SWAP:
+               DAPL_CNTR(ep_ptr, DCNT_EP_POST_CMP_SWAP);
+               break;  
+       case OP_FETCH_AND_ADD:
+               DAPL_CNTR(ep_ptr, DCNT_EP_POST_FETCH_ADD);
+               break;
+       case OP_SEND_UD:
+               DAPL_CNTR(ep_ptr, DCNT_EP_POST_SEND_UD);
+               DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_SEND_UD_DATA, total_len);
+               break;
+       default:
+               break;
+       }
+#endif /* DAPL_COUNTERS */
+
+       dapl_dbg_log(DAPL_DBG_TYPE_EP," post_snd: returned\n");
+        return DAT_SUCCESS;
+}
+#endif
+
+STATIC _INLINE_ DAT_RETURN 
+dapls_ib_optional_prv_dat(
+       IN  DAPL_CR             *cr_ptr,
+       IN  const void          *event_data,
+       OUT   DAPL_CR           **cr_pp)
+{
+    return DAT_SUCCESS;
+}
+
+
+/* map Work Completions to DAPL WR operations */
+STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p)
+{
+#ifdef DAPL_COUNTERS
+       DAPL_COOKIE *cookie = (DAPL_COOKIE *)(uintptr_t)cqe_p->wr_id;
+#endif /* DAPL_COUNTERS */
+
+       switch (cqe_p->opcode) {
+       case IBV_WC_SEND:
+               if (CQE_WR_TYPE_UD(cqe_p->wr_id))
+                       return(OP_SEND_UD);
+               else
+                       return (OP_SEND);
+       case IBV_WC_RDMA_WRITE:
+               if (cqe_p->wc_flags & IBV_WC_WITH_IMM)
+                       return (OP_RDMA_WRITE_IMM);
+               else
+                       return (OP_RDMA_WRITE);
+       case IBV_WC_RDMA_READ:
+               return (OP_RDMA_READ);
+       case IBV_WC_COMP_SWAP:
+               return (OP_COMP_AND_SWAP);
+       case IBV_WC_FETCH_ADD:
+               return (OP_FETCH_AND_ADD);
+       case IBV_WC_BIND_MW:
+               return (OP_BIND_MW);
+       case IBV_WC_RECV:
+               if (CQE_WR_TYPE_UD(cqe_p->wr_id)) {
+                       DAPL_CNTR(cookie->ep, DCNT_EP_RECV_UD);
+                       DAPL_CNTR_DATA(cookie->ep, DCNT_EP_RECV_UD_DATA, 
+                                      cqe_p->byte_len);
+                       return (OP_RECV_UD);
+               }
+               else if (cqe_p->wc_flags & IBV_WC_WITH_IMM) {
+                       DAPL_CNTR(cookie->ep, DCNT_EP_RECV_IMM);
+                       DAPL_CNTR_DATA(cookie->ep, DCNT_EP_RECV_IMM_DATA, 
+                                      cqe_p->byte_len);
+                       return (OP_RECEIVE_IMM);
+               } else {
+                       DAPL_CNTR(cookie->ep, DCNT_EP_RECV);
+                       DAPL_CNTR_DATA(cookie->ep, DCNT_EP_RECV_DATA, 
+                                      cqe_p->byte_len);
+                       return (OP_RECEIVE);
+               }
+       case IBV_WC_RECV_RDMA_WITH_IMM:
+               DAPL_CNTR(cookie->ep, DCNT_EP_RECV_RDMA_IMM);
+               DAPL_CNTR_DATA(cookie->ep, DCNT_EP_RECV_RDMA_IMM_DATA, 
+                              cqe_p->byte_len);
+               return (OP_RECEIVE_IMM);
+       default:
+               return (OP_INVALID);
+       }
+}
+
+#define DAPL_GET_CQE_OPTYPE(cqe_p) dapls_cqe_opcode(cqe_p)
+#define DAPL_GET_CQE_WRID(cqe_p) ((ib_work_completion_t*)cqe_p)->wr_id
+#define DAPL_GET_CQE_STATUS(cqe_p) ((ib_work_completion_t*)cqe_p)->status
+#define DAPL_GET_CQE_VENDOR_ERR(cqe_p) ((ib_work_completion_t*)cqe_p)->vendor_err
+#define DAPL_GET_CQE_BYTESNUM(cqe_p) ((ib_work_completion_t*)cqe_p)->byte_len
+#define DAPL_GET_CQE_IMMED_DATA(cqe_p) ((ib_work_completion_t*)cqe_p)->imm_data
+
+STATIC _INLINE_ char * dapls_dto_op_str(int op)
+{
+    static char *optable[] =
+    {
+        "OP_RDMA_WRITE",
+        "OP_RDMA_WRITE_IMM",
+        "OP_SEND",
+        "OP_SEND_IMM",
+        "OP_RDMA_READ",
+        "OP_COMP_AND_SWAP",
+        "OP_FETCH_AND_ADD",
+        "OP_RECEIVE",
+        "OP_RECEIVE_MSG_IMM",
+       "OP_RECEIVE_RDMA_IMM",
+        "OP_BIND_MW"
+       "OP_SEND_UD"
+       "OP_RECV_UD"
+    };
+    return ((op < 0 || op > 12) ? "Invalid CQE OP?" : optable[op]);
+}
+
+static _INLINE_ char *
+dapls_cqe_op_str(IN ib_work_completion_t *cqe_ptr)
+{
+    return dapls_dto_op_str(DAPL_GET_CQE_OPTYPE(cqe_ptr));
+}
+
+#define DAPL_GET_CQE_OP_STR(cqe) dapls_cqe_op_str(cqe)
+
+#endif /*  _DAPL_IB_DTO_H_ */
diff --git a/dapl/openib_common/ib_extensions.c b/dapl/openib_common/ib_extensions.c
new file mode 100644 (file)
index 0000000..3c418e1
--- /dev/null
@@ -0,0 +1,360 @@
+/*
+ * Copyright (c) 2007-2009 Intel Corporation.  All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_evd_util.h"
+#include "dapl_ib_util.h"
+#include "dapl_ep_util.h"
+#include "dapl_cookie.h"
+#include <stdarg.h>
+
+DAT_RETURN
+dapli_post_ext(IN DAT_EP_HANDLE ep_handle,
+              IN DAT_UINT64 cmp_add,
+              IN DAT_UINT64 swap,
+              IN DAT_UINT32 immed_data,
+              IN DAT_COUNT segments,
+              IN DAT_LMR_TRIPLET * local_iov,
+              IN DAT_DTO_COOKIE user_cookie,
+              IN const DAT_RMR_TRIPLET * remote_iov,
+              IN int op_type,
+              IN DAT_COMPLETION_FLAGS flags, IN DAT_IB_ADDR_HANDLE * ah);
+
+/*
+ * dapl_extensions
+ *
+ * Process extension requests
+ *
+ * Input:
+ *     ext_type,
+ *     ...
+ *
+ * Output:
+ *     Depends....
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_NOT_IMPLEMENTED
+ *      .....
+ *
+ */
+DAT_RETURN
+dapl_extensions(IN DAT_HANDLE dat_handle,
+               IN DAT_EXTENDED_OP ext_op, IN va_list args)
+{
+       DAT_EP_HANDLE ep;
+       DAT_IB_ADDR_HANDLE *ah = NULL;
+       DAT_LMR_TRIPLET *lmr_p;
+       DAT_DTO_COOKIE cookie;
+       const DAT_RMR_TRIPLET *rmr_p;
+       DAT_UINT64 dat_uint64a, dat_uint64b;
+       DAT_UINT32 dat_uint32;
+       DAT_COUNT segments = 1;
+       DAT_COMPLETION_FLAGS comp_flags;
+       DAT_RETURN status = DAT_NOT_IMPLEMENTED;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_API,
+                    "dapl_extensions(hdl %p operation %d, ...)\n",
+                    dat_handle, ext_op);
+
+       switch ((int)ext_op) {
+
+       case DAT_IB_RDMA_WRITE_IMMED_OP:
+               dapl_dbg_log(DAPL_DBG_TYPE_RTN,
+                            " WRITE_IMMED_DATA extension call\n");
+
+               ep = dat_handle;        /* ep_handle */
+               segments = va_arg(args, DAT_COUNT);     /* num segments */
+               lmr_p = va_arg(args, DAT_LMR_TRIPLET *);
+               cookie = va_arg(args, DAT_DTO_COOKIE);
+               rmr_p = va_arg(args, const DAT_RMR_TRIPLET *);
+               dat_uint32 = va_arg(args, DAT_UINT32);  /* immed data */
+               comp_flags = va_arg(args, DAT_COMPLETION_FLAGS);
+
+               status = dapli_post_ext(ep, 0, 0, dat_uint32, segments, lmr_p,
+                                       cookie, rmr_p, OP_RDMA_WRITE_IMM,
+                                       comp_flags, ah);
+               break;
+
+       case DAT_IB_CMP_AND_SWAP_OP:
+               dapl_dbg_log(DAPL_DBG_TYPE_RTN,
+                            " CMP_AND_SWAP extension call\n");
+
+               ep = dat_handle;        /* ep_handle */
+               dat_uint64a = va_arg(args, DAT_UINT64); /* cmp_value */
+               dat_uint64b = va_arg(args, DAT_UINT64); /* swap_value */
+               lmr_p = va_arg(args, DAT_LMR_TRIPLET *);
+               cookie = va_arg(args, DAT_DTO_COOKIE);
+               rmr_p = va_arg(args, const DAT_RMR_TRIPLET *);
+               comp_flags = va_arg(args, DAT_COMPLETION_FLAGS);
+
+               status = dapli_post_ext(ep, dat_uint64a, dat_uint64b,
+                                       0, segments, lmr_p, cookie, rmr_p,
+                                       OP_COMP_AND_SWAP, comp_flags, ah);
+               break;
+
+       case DAT_IB_FETCH_AND_ADD_OP:
+               dapl_dbg_log(DAPL_DBG_TYPE_RTN,
+                            " FETCH_AND_ADD extension call\n");
+
+               ep = dat_handle;        /* ep_handle */
+               dat_uint64a = va_arg(args, DAT_UINT64); /* add value */
+               lmr_p = va_arg(args, DAT_LMR_TRIPLET *);
+               cookie = va_arg(args, DAT_DTO_COOKIE);
+               rmr_p = va_arg(args, const DAT_RMR_TRIPLET *);
+               comp_flags = va_arg(args, DAT_COMPLETION_FLAGS);
+
+               status = dapli_post_ext(ep, dat_uint64a, 0, 0, segments,
+                                       lmr_p, cookie, rmr_p,
+                                       OP_FETCH_AND_ADD, comp_flags, ah);
+               break;
+
+       case DAT_IB_UD_SEND_OP:
+               dapl_dbg_log(DAPL_DBG_TYPE_RTN,
+                            " UD post_send extension call\n");
+
+               ep = dat_handle;        /* ep_handle */
+               segments = va_arg(args, DAT_COUNT);     /* segments */
+               lmr_p = va_arg(args, DAT_LMR_TRIPLET *);
+               ah = va_arg(args, DAT_IB_ADDR_HANDLE *);
+               cookie = va_arg(args, DAT_DTO_COOKIE);
+               comp_flags = va_arg(args, DAT_COMPLETION_FLAGS);
+
+               status = dapli_post_ext(ep, 0, 0, 0, segments,
+                                       lmr_p, cookie, NULL,
+                                       OP_SEND_UD, comp_flags, ah);
+               break;
+
+#ifdef DAPL_COUNTERS
+       case DAT_QUERY_COUNTERS_OP:
+               {
+                       int cntr, reset;
+                       DAT_UINT64 *p_cntr_out;
+
+                       dapl_dbg_log(DAPL_DBG_TYPE_RTN,
+                                    " Query counter extension call\n");
+
+                       cntr = va_arg(args, int);
+                       p_cntr_out = va_arg(args, DAT_UINT64 *);
+                       reset = va_arg(args, int);
+
+                       status = dapl_query_counter(dat_handle, cntr,
+                                                   p_cntr_out, reset);
+                       break;
+               }
+       case DAT_PRINT_COUNTERS_OP:
+               {
+                       int cntr, reset;
+
+                       dapl_dbg_log(DAPL_DBG_TYPE_RTN,
+                                    " Print counter extension call\n");
+
+                       cntr = va_arg(args, int);
+                       reset = va_arg(args, int);
+
+                       dapl_print_counter(dat_handle, cntr, reset);
+                       status = DAT_SUCCESS;
+                       break;
+               }
+#endif                         /* DAPL_COUNTERS */
+
+       default:
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+                            "unsupported extension(%d)\n", (int)ext_op);
+       }
+
+       return (status);
+}
+
+DAT_RETURN
+dapli_post_ext(IN DAT_EP_HANDLE ep_handle,
+              IN DAT_UINT64 cmp_add,
+              IN DAT_UINT64 swap,
+              IN DAT_UINT32 immed_data,
+              IN DAT_COUNT segments,
+              IN DAT_LMR_TRIPLET * local_iov,
+              IN DAT_DTO_COOKIE user_cookie,
+              IN const DAT_RMR_TRIPLET * remote_iov,
+              IN int op_type,
+              IN DAT_COMPLETION_FLAGS flags, IN DAT_IB_ADDR_HANDLE * ah)
+{
+       DAPL_EP *ep_ptr;
+       ib_qp_handle_t qp_ptr;
+       DAPL_COOKIE *cookie = NULL;
+       DAT_RETURN dat_status = DAT_SUCCESS;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_API,
+                    " post_ext_op: ep %p cmp_val %d "
+                    "swap_val %d cookie 0x%x, r_iov %p, flags 0x%x, ah %p\n",
+                    ep_handle, (unsigned)cmp_add, (unsigned)swap,
+                    (unsigned)user_cookie.as_64, remote_iov, flags, ah);
+
+       if (DAPL_BAD_HANDLE(ep_handle, DAPL_MAGIC_EP))
+               return (DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP));
+
+       ep_ptr = (DAPL_EP *) ep_handle;
+       qp_ptr = ep_ptr->qp_handle;
+
+       /*
+        * Synchronization ok since this buffer is only used for send
+        * requests, which aren't allowed to race with each other.
+        */
+       dat_status = dapls_dto_cookie_alloc(&ep_ptr->req_buffer,
+                                           DAPL_DTO_TYPE_EXTENSION,
+                                           user_cookie, &cookie);
+       if (dat_status != DAT_SUCCESS)
+               goto bail;
+
+       /*
+        * Take reference before posting to avoid race conditions with
+        * completions
+        */
+       dapl_os_atomic_inc(&ep_ptr->req_count);
+
+       /*
+        * Invoke provider specific routine to post DTO
+        */
+       dat_status = dapls_ib_post_ext_send(ep_ptr, op_type, cookie, segments,  /* data segments */
+                                           local_iov, remote_iov, immed_data,  /* immed data */
+                                           cmp_add,    /* compare or add */
+                                           swap,       /* swap */
+                                           flags, ah);
+
+       if (dat_status != DAT_SUCCESS) {
+               dapl_os_atomic_dec(&ep_ptr->req_count);
+               dapls_cookie_dealloc(&ep_ptr->req_buffer, cookie);
+       }
+
+      bail:
+       return dat_status;
+
+}
+
+/* 
+ * New provider routine to process extended DTO events 
+ */
+void
+dapls_cqe_to_event_extension(IN DAPL_EP * ep_ptr,
+                            IN DAPL_COOKIE * cookie,
+                            IN ib_work_completion_t * cqe_ptr,
+                            IN DAT_EVENT * event_ptr)
+{
+       uint32_t ibtype;
+       DAT_DTO_COMPLETION_EVENT_DATA *dto =
+           &event_ptr->event_data.dto_completion_event_data;
+       DAT_IB_EXTENSION_EVENT_DATA *ext_data = (DAT_IB_EXTENSION_EVENT_DATA *)
+           & event_ptr->event_extension_data[0];
+       DAT_DTO_COMPLETION_STATUS dto_status;
+
+       /* Get status from cqe */
+       dto_status = dapls_ib_get_dto_status(cqe_ptr);
+
+       dapl_dbg_log(DAPL_DBG_TYPE_EVD,
+                    " cqe_to_event_ext: dto_ptr %p ext_ptr %p status %d\n",
+                    dto, ext_data, dto_status);
+
+       event_ptr->event_number = DAT_IB_DTO_EVENT;
+       dto->ep_handle = cookie->ep;
+       dto->user_cookie = cookie->val.dto.cookie;
+       dto->operation = DAPL_GET_CQE_DTOS_OPTYPE(cqe_ptr);     /* new for 2.0 */
+       dto->status = ext_data->status = dto_status;
+
+       if (dto_status != DAT_DTO_SUCCESS)
+               return;
+
+       /* 
+        * Get operation type from CQ work completion entry and
+        * if extented operation then set extended event data
+        */
+       ibtype = DAPL_GET_CQE_OPTYPE(cqe_ptr);
+
+       switch (ibtype) {
+
+       case OP_RDMA_WRITE_IMM:
+               dapl_dbg_log(DAPL_DBG_TYPE_EVD,
+                            " cqe_to_event_ext: OP_RDMA_WRITE_IMMED\n");
+
+               /* type and outbound rdma write transfer size */
+               dto->transfered_length = cookie->val.dto.size;
+               ext_data->type = DAT_IB_RDMA_WRITE_IMMED;
+               break;
+       case OP_RECEIVE_IMM:
+               dapl_dbg_log(DAPL_DBG_TYPE_EVD,
+                            " cqe_to_event_ext: OP_RECEIVE_RDMA_IMMED\n");
+
+               /* immed recvd, type and inbound rdma write transfer size */
+               dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
+               ext_data->type = DAT_IB_RDMA_WRITE_IMMED_DATA;
+               ext_data->val.immed.data = DAPL_GET_CQE_IMMED_DATA(cqe_ptr);
+               break;
+       case OP_RECEIVE_MSG_IMM:
+               dapl_dbg_log(DAPL_DBG_TYPE_EVD,
+                            " cqe_to_event_ext: OP_RECEIVE_MSG_IMMED\n");
+
+               /* immed recvd, type and inbound recv message transfer size */
+               dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
+               ext_data->type = DAT_IB_RECV_IMMED_DATA;
+               ext_data->val.immed.data = DAPL_GET_CQE_IMMED_DATA(cqe_ptr);
+               break;
+       case OP_COMP_AND_SWAP:
+               dapl_dbg_log(DAPL_DBG_TYPE_EVD,
+                            " cqe_to_event_ext: COMP_AND_SWAP_RESP\n");
+
+               /* original data is returned in LMR provided with post */
+               ext_data->type = DAT_IB_CMP_AND_SWAP;
+               dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
+               break;
+       case OP_FETCH_AND_ADD:
+               dapl_dbg_log(DAPL_DBG_TYPE_EVD,
+                            " cqe_to_event_ext: FETCH_AND_ADD_RESP\n");
+
+               /* original data is returned in LMR provided with post */
+               ext_data->type = DAT_IB_FETCH_AND_ADD;
+               dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
+               break;
+       case OP_SEND_UD:
+               dapl_dbg_log(DAPL_DBG_TYPE_EVD, " cqe_to_event_ext: UD_SEND\n");
+
+               /* type and outbound send transfer size */
+               ext_data->type = DAT_IB_UD_SEND;
+               dto->transfered_length = cookie->val.dto.size;
+               break;
+       case OP_RECV_UD:
+               dapl_dbg_log(DAPL_DBG_TYPE_EVD, " cqe_to_event_ext: UD_RECV\n");
+
+               /* type and inbound recv message transfer size */
+               ext_data->type = DAT_IB_UD_RECV;
+               dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
+               break;
+
+       default:
+               /* not extended operation */
+               ext_data->status = DAT_IB_OP_ERR;
+               dto->status = DAT_DTO_ERR_TRANSPORT;
+               break;
+       }
+}
diff --git a/dapl/openib_common/mem.c b/dapl/openib_common/mem.c
new file mode 100644 (file)
index 0000000..8a3e152
--- /dev/null
@@ -0,0 +1,370 @@
+/*
+ * Copyright (c) 2005-2007 Intel Corporation.  All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_lmr_util.h"
+
+/*
+ * dapls_convert_privileges
+ *
+ * Convert LMR privileges to provider  
+ *
+ * Input:
+ *     DAT_MEM_PRIV_FLAGS
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     ibv_access_flags
+ *
+ */
+STATIC _INLINE_ int dapls_convert_privileges(IN DAT_MEM_PRIV_FLAGS privileges)
+{
+       int access = 0;
+
+       /*
+        * if (DAT_MEM_PRIV_LOCAL_READ_FLAG & privileges) do nothing
+        */
+       if (DAT_MEM_PRIV_LOCAL_WRITE_FLAG & privileges)
+               access |= IBV_ACCESS_LOCAL_WRITE;
+       if (DAT_MEM_PRIV_REMOTE_WRITE_FLAG & privileges)
+               access |= IBV_ACCESS_REMOTE_WRITE;
+       if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
+               access |= IBV_ACCESS_REMOTE_READ;
+       if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
+               access |= IBV_ACCESS_REMOTE_READ;
+       if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
+               access |= IBV_ACCESS_REMOTE_READ;
+#ifdef DAT_EXTENSIONS
+       if (DAT_IB_MEM_PRIV_REMOTE_ATOMIC & privileges)
+               access |= IBV_ACCESS_REMOTE_ATOMIC;
+#endif
+
+       return access;
+}
+
+/*
+ * dapl_ib_pd_alloc
+ *
+ * Alloc a PD
+ *
+ * Input:
+ *     ia_handle       IA handle
+ *     pz              pointer to PZ struct
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN dapls_ib_pd_alloc(IN DAPL_IA * ia_ptr, IN DAPL_PZ * pz)
+{
+       /* get a protection domain */
+       pz->pd_handle = ibv_alloc_pd(ia_ptr->hca_ptr->ib_hca_handle);
+       if (!pz->pd_handle)
+               return (dapl_convert_errno(ENOMEM, "alloc_pd"));
+
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                    " pd_alloc: pd_handle=%p\n", pz->pd_handle);
+
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_pd_free
+ *
+ * Free a PD
+ *
+ * Input:
+ *     ia_handle       IA handle
+ *     PZ_ptr          pointer to PZ struct
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *      DAT_INVALID_STATE
+ *
+ */
+DAT_RETURN dapls_ib_pd_free(IN DAPL_PZ * pz)
+{
+       if (pz->pd_handle != IB_INVALID_HANDLE) {
+               if (ibv_dealloc_pd(pz->pd_handle))
+                       return (dapl_convert_errno(errno, "ibv_dealloc_pd"));
+               pz->pd_handle = IB_INVALID_HANDLE;
+       }
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_mr_register
+ *
+ * Register a virtual memory region
+ *
+ * Input:
+ *     ia_handle       IA handle
+ *     lmr             pointer to dapl_lmr struct
+ *     virt_addr       virtual address of beginning of mem region
+ *     length          length of memory region
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_mr_register(IN DAPL_IA * ia_ptr,
+                    IN DAPL_LMR * lmr,
+                    IN DAT_PVOID virt_addr,
+                    IN DAT_VLEN length,
+                    IN DAT_MEM_PRIV_FLAGS privileges, IN DAT_VA_TYPE va_type)
+{
+       ib_pd_handle_t ib_pd_handle;
+       struct ibv_device *ibv_dev = ia_ptr->hca_ptr->ib_hca_handle->device;
+
+       ib_pd_handle = ((DAPL_PZ *) lmr->param.pz_handle)->pd_handle;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                    " mr_register: ia=%p, lmr=%p va=%p ln=%d pv=0x%x\n",
+                    ia_ptr, lmr, virt_addr, length, privileges);
+
+       /* TODO: shared memory */
+       if (lmr->param.mem_type == DAT_MEM_TYPE_SHARED_VIRTUAL) {
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+                            " mr_register_shared: NOT IMPLEMENTED\n");
+               return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
+       }
+
+       /* iWARP only support */
+       if ((va_type == DAT_VA_TYPE_ZB) &&
+           (ibv_dev->transport_type != IBV_TRANSPORT_IWARP)) {
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+                            " va_type == DAT_VA_TYPE_ZB: NOT SUPPORTED\n");
+               return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
+       }
+
+       /* local read is default on IB */
+       lmr->mr_handle =
+           ibv_reg_mr(((DAPL_PZ *) lmr->param.pz_handle)->pd_handle,
+                      virt_addr, length, dapls_convert_privileges(privileges));
+
+       if (!lmr->mr_handle)
+               return (dapl_convert_errno(ENOMEM, "reg_mr"));
+
+       lmr->param.lmr_context = lmr->mr_handle->lkey;
+       lmr->param.rmr_context = lmr->mr_handle->rkey;
+       lmr->param.registered_size = length;
+       lmr->param.registered_address = (DAT_VADDR) (uintptr_t) virt_addr;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                    " mr_register: mr=%p addr=%p pd %p ctx %p "
+                    "lkey=0x%x rkey=0x%x priv=%x\n",
+                    lmr->mr_handle, lmr->mr_handle->addr,
+                    lmr->mr_handle->pd, lmr->mr_handle->context,
+                    lmr->mr_handle->lkey, lmr->mr_handle->rkey,
+                    length, dapls_convert_privileges(privileges));
+
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_mr_deregister
+ *
+ * Free a memory region
+ *
+ * Input:
+ *     lmr                     pointer to dapl_lmr struct
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INVALID_STATE
+ *
+ */
+DAT_RETURN dapls_ib_mr_deregister(IN DAPL_LMR * lmr)
+{
+       if (lmr->mr_handle != IB_INVALID_HANDLE) {
+               if (ibv_dereg_mr(lmr->mr_handle))
+                       return (dapl_convert_errno(errno, "dereg_pd"));
+               lmr->mr_handle = IB_INVALID_HANDLE;
+       }
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_mr_register_shared
+ *
+ * Register a virtual memory region
+ *
+ * Input:
+ *     ia_ptr          IA handle
+ *     lmr             pointer to dapl_lmr struct
+ *     privileges      
+ *     va_type         
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_mr_register_shared(IN DAPL_IA * ia_ptr,
+                           IN DAPL_LMR * lmr,
+                           IN DAT_MEM_PRIV_FLAGS privileges,
+                           IN DAT_VA_TYPE va_type)
+{
+       dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+                    " mr_register_shared: NOT IMPLEMENTED\n");
+
+       return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
+}
+
+/*
+ * dapls_ib_mw_alloc
+ *
+ * Bind a protection domain to a memory window
+ *
+ * Input:
+ *     rmr     Initialized rmr to hold binding handles
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN dapls_ib_mw_alloc(IN DAPL_RMR * rmr)
+{
+
+       dapl_dbg_log(DAPL_DBG_TYPE_ERR, " mw_alloc: NOT IMPLEMENTED\n");
+
+       return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
+}
+
+/*
+ * dapls_ib_mw_free
+ *
+ * Release bindings of a protection domain to a memory window
+ *
+ * Input:
+ *     rmr     Initialized rmr to hold binding handles
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INVALID_STATE
+ *
+ */
+DAT_RETURN dapls_ib_mw_free(IN DAPL_RMR * rmr)
+{
+       dapl_dbg_log(DAPL_DBG_TYPE_ERR, " mw_free: NOT IMPLEMENTED\n");
+
+       return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
+}
+
+/*
+ * dapls_ib_mw_bind
+ *
+ * Bind a protection domain to a memory window
+ *
+ * Input:
+ *     rmr     Initialized rmr to hold binding handles
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INVALID_PARAMETER;
+ *     DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_mw_bind(IN DAPL_RMR * rmr,
+                IN DAPL_LMR * lmr,
+                IN DAPL_EP * ep,
+                IN DAPL_COOKIE * cookie,
+                IN DAT_VADDR virtual_address,
+                IN DAT_VLEN length,
+                IN DAT_MEM_PRIV_FLAGS mem_priv, IN DAT_BOOLEAN is_signaled)
+{
+       dapl_dbg_log(DAPL_DBG_TYPE_ERR, " mw_bind: NOT IMPLEMENTED\n");
+
+       return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
+}
+
+/*
+ * dapls_ib_mw_unbind
+ *
+ * Unbind a protection domain from a memory window
+ *
+ * Input:
+ *     rmr     Initialized rmr to hold binding handles
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INVALID_PARAMETER;
+ *     DAT_INVALID_STATE;
+ *     DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_mw_unbind(IN DAPL_RMR * rmr,
+                  IN DAPL_EP * ep,
+                  IN DAPL_COOKIE * cookie, IN DAT_BOOLEAN is_signaled)
+{
+       dapl_dbg_log(DAPL_DBG_TYPE_ERR, " mw_unbind: NOT IMPLEMENTED\n");
+
+       return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
+}
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ *  tab-width: 8
+ * End:
+ */
diff --git a/dapl/openib_common/qp.c b/dapl/openib_common/qp.c
new file mode 100644 (file)
index 0000000..9fb7c96
--- /dev/null
@@ -0,0 +1,515 @@
+/*
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+
+/*
+ * dapl_ib_qp_alloc
+ *
+ * Alloc a QP
+ *
+ * Input:
+ *     *ep_ptr         pointer to EP INFO
+ *     ib_hca_handle   provider HCA handle
+ *     ib_pd_handle    provider protection domain handle
+ *     cq_recv         provider recv CQ handle
+ *     cq_send         provider send CQ handle
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INSUFFICIENT_RESOURCES
+ *     DAT_INTERNAL_ERROR
+ *
+ */
+DAT_RETURN
+dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr,
+                 IN DAPL_EP * ep_ptr, IN DAPL_EP * ep_ctx_ptr)
+{
+       DAT_EP_ATTR *attr;
+       DAPL_EVD *rcv_evd, *req_evd;
+       ib_cq_handle_t rcv_cq, req_cq;
+       ib_pd_handle_t ib_pd_handle;
+       struct ibv_qp_init_attr qp_create;
+#ifdef _OPENIB_CMA_
+       dp_ib_cm_handle_t conn;
+#endif
+       dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                    " qp_alloc: ia_ptr %p ep_ptr %p ep_ctx_ptr %p\n",
+                    ia_ptr, ep_ptr, ep_ctx_ptr);
+
+       attr = &ep_ptr->param.ep_attr;
+       ib_pd_handle = ((DAPL_PZ *) ep_ptr->param.pz_handle)->pd_handle;
+       rcv_evd = (DAPL_EVD *) ep_ptr->param.recv_evd_handle;
+       req_evd = (DAPL_EVD *) ep_ptr->param.request_evd_handle;
+
+       /* 
+        * DAT allows usage model of EP's with no EVD's but IB does not. 
+        * Create a CQ with zero entries under the covers to support and 
+        * catch any invalid posting. 
+        */
+       if (rcv_evd != DAT_HANDLE_NULL)
+               rcv_cq = rcv_evd->ib_cq_handle;
+       else if (!ia_ptr->hca_ptr->ib_trans.ib_cq_empty)
+               rcv_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;
+       else {
+               struct ibv_comp_channel *channel = 
+                               rcv_evd->cq_wait_obj_handle;
+                 
+               /* Call IB verbs to create CQ */
+               rcv_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
+                                      0, NULL, channel, 0);
+
+               if (rcv_cq == IB_INVALID_HANDLE)
+                       return (dapl_convert_errno(ENOMEM, "create_cq"));
+
+               ia_ptr->hca_ptr->ib_trans.ib_cq_empty = rcv_cq;
+       }
+       if (req_evd != DAT_HANDLE_NULL)
+               req_cq = req_evd->ib_cq_handle;
+       else
+               req_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;
+
+       /* 
+        * IMPLEMENTATION NOTE:
+        * uDAPL allows consumers to post buffers on the EP after creation
+        * and before a connect request (outbound and inbound). This forces
+        * a binding to a device during the hca_open call and requires the
+        * consumer to predetermine which device to listen on or connect from.
+        * This restriction eliminates any option of listening or connecting 
+        * over multiple devices. uDAPL should add API's to resolve addresses 
+        * and bind to the device at the approriate time (before connect 
+        * and after CR arrives). Discovery should happen at connection time 
+        * based on addressing and not on static configuration during open.
+        */
+
+#ifdef _OPENIB_CMA_
+       /* Allocate CM and initialize lock */
+       if ((conn = dapls_ib_cm_create(ep_ptr)) == NULL)
+               return (dapl_convert_errno(ENOMEM, "create_cq"));
+
+       /* open identifies the local device; per DAT specification */
+       if (rdma_bind_addr(conn->cm_id,
+                          (struct sockaddr *)&ia_ptr->hca_ptr->hca_address))
+               return (dapl_convert_errno(EAFNOSUPPORT, "create_cq"));
+#endif
+       /* Setup attributes and create qp */
+       dapl_os_memzero((void *)&qp_create, sizeof(qp_create));
+       qp_create.send_cq = req_cq;
+       qp_create.cap.max_send_wr = attr->max_request_dtos;
+       qp_create.cap.max_send_sge = attr->max_request_iov;
+       qp_create.cap.max_inline_data =
+           ia_ptr->hca_ptr->ib_trans.max_inline_send;
+       qp_create.qp_type = IBV_QPT_RC;
+       qp_create.qp_context = (void *)ep_ptr;
+
+#ifdef DAT_EXTENSIONS 
+       if (attr->service_type == DAT_IB_SERVICE_TYPE_UD) {
+#ifdef _OPENIB_CMA_
+               return (DAT_NOT_IMPLEMENTED);
+#endif
+               qp_create.qp_type = IBV_QPT_UD;
+               if (attr->max_message_size >
+                   (128 << ia_ptr->hca_ptr->ib_trans.mtu)) {
+                       return (DAT_INVALID_PARAMETER | DAT_INVALID_ARG6);
+               }
+       }
+#endif
+       
+       /* ibv assumes rcv_cq is never NULL, set to req_cq */
+       if (rcv_cq == NULL) {
+               qp_create.recv_cq = req_cq;
+               qp_create.cap.max_recv_wr = 0;
+               qp_create.cap.max_recv_sge = 0;
+       } else {
+               qp_create.recv_cq = rcv_cq;
+               qp_create.cap.max_recv_wr = attr->max_recv_dtos;
+               qp_create.cap.max_recv_sge = attr->max_recv_iov;
+       }
+
+#ifdef _OPENIB_CMA_
+       if (rdma_create_qp(conn->cm_id, ib_pd_handle, &qp_create)) {
+               dapls_ib_cm_free(conn, ep_ptr);
+               return (dapl_convert_errno(errno, "create_qp"));
+       }
+       ep_ptr->qp_handle = conn->cm_id->qp;
+       ep_ptr->cm_handle = conn;
+       ep_ptr->qp_state = IBV_QPS_INIT;
+               
+       /* setup up ep->param to reference the bound local address and port */
+       ep_ptr->param.local_ia_address_ptr = 
+               &conn->cm_id->route.addr.src_addr;
+       ep_ptr->param.local_port_qual = rdma_get_src_port(conn->cm_id);
+#else
+       ep_ptr->qp_handle = ibv_create_qp(ib_pd_handle, &qp_create);
+       if (!ep_ptr->qp_handle)
+               return (dapl_convert_errno(ENOMEM, "create_qp"));
+               
+       /* Setup QP attributes for INIT state on the way out */
+       if (dapls_modify_qp_state(ep_ptr->qp_handle,
+                                 IBV_QPS_INIT, NULL) != DAT_SUCCESS) {
+               ibv_destroy_qp(ep_ptr->qp_handle);
+               ep_ptr->qp_handle = IB_INVALID_HANDLE;
+               return DAT_INTERNAL_ERROR;
+       }
+#endif
+       dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                    " qp_alloc: qpn %p sq %d,%d rq %d,%d\n",
+                    ep_ptr->qp_handle->qp_num,
+                    qp_create.cap.max_send_wr, qp_create.cap.max_send_sge,
+                    qp_create.cap.max_recv_wr, qp_create.cap.max_recv_sge);
+
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_qp_free
+ *
+ * Free a QP
+ *
+ * Input:
+ *     ia_handle       IA handle
+ *     *ep_ptr         pointer to EP INFO
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *  dapl_convert_errno
+ *
+ */
+DAT_RETURN dapls_ib_qp_free(IN DAPL_IA * ia_ptr, IN DAPL_EP * ep_ptr)
+{
+       dapl_dbg_log(DAPL_DBG_TYPE_EP, " qp_free:  ep_ptr %p qp %p\n",
+                    ep_ptr, ep_ptr->qp_handle);
+
+       if (ep_ptr->cm_handle != NULL) {
+               dapls_ib_cm_free(ep_ptr->cm_handle, ep_ptr);
+       }
+       
+       if (ep_ptr->qp_handle != NULL) {
+               /* force error state to flush queue, then destroy */
+               dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, NULL);
+
+               if (ibv_destroy_qp(ep_ptr->qp_handle))
+                       return (dapl_convert_errno(errno, "destroy_qp"));
+
+               ep_ptr->qp_handle = NULL;
+       }
+
+#ifdef DAT_EXTENSIONS
+       /* UD endpoints can have many CR associations and will not
+        * set ep->cm_handle. Call provider with cm_ptr null to incidate
+        * UD type multi CR's for this EP. It will parse internal list
+        * and cleanup all associations.
+        */
+       if (ep_ptr->param.ep_attr.service_type == DAT_IB_SERVICE_TYPE_UD) 
+               dapls_ib_cm_free(NULL, ep_ptr);
+#endif
+
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_qp_modify
+ *
+ * Set the QP to the parameters specified in an EP_PARAM
+ *
+ * The EP_PARAM structure that is provided has been
+ * sanitized such that only non-zero values are valid.
+ *
+ * Input:
+ *     ib_hca_handle           HCA handle
+ *     qp_handle               QP handle
+ *     ep_attr                 Sanitized EP Params
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INSUFFICIENT_RESOURCES
+ *     DAT_INVALID_PARAMETER
+ *
+ */
+DAT_RETURN
+dapls_ib_qp_modify(IN DAPL_IA * ia_ptr,
+                  IN DAPL_EP * ep_ptr, IN DAT_EP_ATTR * attr)
+{
+       struct ibv_qp_attr qp_attr;
+
+       if (ep_ptr->qp_handle == IB_INVALID_HANDLE)
+               return DAT_INVALID_PARAMETER;
+
+       /* 
+        * EP state, qp_handle state should be an indication
+        * of current state but the only way to be sure is with
+        * a user mode ibv_query_qp call which is NOT available 
+        */
+
+       /* move to error state if necessary */
+       if ((ep_ptr->qp_state == IB_QP_STATE_ERROR) &&
+           (ep_ptr->qp_handle->state != IBV_QPS_ERR)) {
+               return (dapls_modify_qp_state(ep_ptr->qp_handle,
+                                             IBV_QPS_ERR, NULL));
+       }
+
+       /*
+        * Check if we have the right qp_state to modify attributes
+        */
+       if ((ep_ptr->qp_handle->state != IBV_QPS_RTR) &&
+           (ep_ptr->qp_handle->state != IBV_QPS_RTS))
+               return DAT_INVALID_STATE;
+
+       /* Adjust to current EP attributes */
+       dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
+       qp_attr.cap.max_send_wr = attr->max_request_dtos;
+       qp_attr.cap.max_recv_wr = attr->max_recv_dtos;
+       qp_attr.cap.max_send_sge = attr->max_request_iov;
+       qp_attr.cap.max_recv_sge = attr->max_recv_iov;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                    "modify_qp: qp %p sq %d,%d, rq %d,%d\n",
+                    ep_ptr->qp_handle,
+                    qp_attr.cap.max_send_wr, qp_attr.cap.max_send_sge,
+                    qp_attr.cap.max_recv_wr, qp_attr.cap.max_recv_sge);
+
+       if (ibv_modify_qp(ep_ptr->qp_handle, &qp_attr, IBV_QP_CAP)) {
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+                            "modify_qp: modify ep %p qp %p failed\n",
+                            ep_ptr, ep_ptr->qp_handle);
+               return (dapl_convert_errno(errno, "modify_qp_state"));
+       }
+
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_reinit_ep
+ *
+ * Move the QP to INIT state again.
+ *
+ * Input:
+ *     ep_ptr          DAPL_EP
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     void
+ *
+ */
+#if defined(_WIN32) || defined(_WIN64)
+void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr)
+{
+       /* work around bug in low level driver - 3/24/09 */
+       /* RTS -> RESET -> INIT -> ERROR QP transition crashes system */
+       if (ep_ptr->qp_handle != IB_INVALID_HANDLE) {
+               dapls_ib_qp_free(ep_ptr->header.owner_ia, ep_ptr);
+               dapls_ib_qp_alloc(ep_ptr->header.owner_ia, ep_ptr, ep_ptr);
+       }
+}
+#else                          // _WIN32 || _WIN64
+void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr)
+{
+       if (ep_ptr->qp_handle != IB_INVALID_HANDLE &&
+           ep_ptr->qp_handle->qp_type != IBV_QPT_UD) {
+               /* move to RESET state and then to INIT */
+               dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_RESET, 0);
+               dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_INIT, 0);
+       }
+}
+#endif                         // _WIN32 || _WIN64
+
+/* 
+ * Generic QP modify for init, reset, error, RTS, RTR
+ * For UD, create_ah on RTR, qkey on INIT
+ */
+DAT_RETURN
+dapls_modify_qp_state(IN ib_qp_handle_t qp_handle,
+                     IN ib_qp_state_t qp_state, 
+                     IN dp_ib_cm_handle_t cm_ptr)
+{
+       struct ibv_qp_attr qp_attr;
+       enum ibv_qp_attr_mask mask = IBV_QP_STATE;
+       DAPL_EP *ep_ptr = (DAPL_EP *) qp_handle->qp_context;
+       DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;
+       ib_qp_cm_t *qp_cm = &cm_ptr->dst;
+       int ret;
+
+       dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
+       qp_attr.qp_state = qp_state;
+       switch (qp_state) {
+               /* additional attributes with RTR and RTS */
+       case IBV_QPS_RTR:
+               {
+                       dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                                    " QPS_RTR: type %d state %d qpn %x lid %x"
+                                    " port %x ep %p qp_state %d\n",
+                                    qp_handle->qp_type, qp_handle->qp_type,
+                                    qp_cm->qpn, qp_cm->lid, qp_cm->port,
+                                    ep_ptr, ep_ptr->qp_state);
+
+                       mask |= IBV_QP_AV |
+                           IBV_QP_PATH_MTU |
+                           IBV_QP_DEST_QPN |
+                           IBV_QP_RQ_PSN |
+                           IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER;
+
+                       qp_attr.dest_qp_num = qp_cm->qpn;
+                       qp_attr.rq_psn = 1;
+                       qp_attr.path_mtu = ia_ptr->hca_ptr->ib_trans.mtu;
+                       qp_attr.max_dest_rd_atomic =
+                           ep_ptr->param.ep_attr.max_rdma_read_out;
+                       qp_attr.min_rnr_timer =
+                           ia_ptr->hca_ptr->ib_trans.rnr_timer;
+
+                       /* address handle. RC and UD */
+                       qp_attr.ah_attr.dlid = qp_cm->lid;
+                       if (ia_ptr->hca_ptr->ib_trans.global) {
+                               qp_attr.ah_attr.is_global = 1;
+                               qp_attr.ah_attr.grh.dgid = qp_cm->gid;
+                               qp_attr.ah_attr.grh.hop_limit =
+                                   ia_ptr->hca_ptr->ib_trans.hop_limit;
+                               qp_attr.ah_attr.grh.traffic_class =
+                                   ia_ptr->hca_ptr->ib_trans.tclass;
+                       }
+                       qp_attr.ah_attr.sl = 0;
+                       qp_attr.ah_attr.src_path_bits = 0;
+                       qp_attr.ah_attr.port_num = ia_ptr->hca_ptr->port_num;
+#ifdef DAT_EXTENSIONS
+                       /* UD: create AH for remote side */
+                       if (qp_handle->qp_type == IBV_QPT_UD) {
+                               ib_pd_handle_t pz;
+                               pz = ((DAPL_PZ *)
+                                     ep_ptr->param.pz_handle)->pd_handle;
+                               mask = IBV_QP_STATE;
+                               cm_ptr->ah = ibv_create_ah(pz,
+                                                          &qp_attr.ah_attr);
+                               if (!cm_ptr->ah)
+                                       return (dapl_convert_errno(errno,
+                                                                  "ibv_ah"));
+
+                               /* already RTR, multi remote AH's on QP */
+                               if (ep_ptr->qp_state == IBV_QPS_RTR ||
+                                   ep_ptr->qp_state == IBV_QPS_RTS)
+                                       return DAT_SUCCESS;
+                       }
+#endif
+                       break;
+               }
+       case IBV_QPS_RTS:
+               {
+                       /* RC only */
+                       if (qp_handle->qp_type == IBV_QPT_RC) {
+                               mask |= IBV_QP_SQ_PSN |
+                                   IBV_QP_TIMEOUT |
+                                   IBV_QP_RETRY_CNT |
+                                   IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC;
+                               qp_attr.timeout =
+                                   ia_ptr->hca_ptr->ib_trans.ack_timer;
+                               qp_attr.retry_cnt =
+                                   ia_ptr->hca_ptr->ib_trans.ack_retry;
+                               qp_attr.rnr_retry =
+                                   ia_ptr->hca_ptr->ib_trans.rnr_retry;
+                               qp_attr.max_rd_atomic =
+                                   ep_ptr->param.ep_attr.max_rdma_read_out;
+                       }
+                       /* RC and UD */
+                       qp_attr.qp_state = IBV_QPS_RTS;
+                       qp_attr.sq_psn = 1;
+
+                       dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                                    " QPS_RTS: psn %x rd_atomic %d ack %d "
+                                    " retry %d rnr_retry %d ep %p qp_state %d\n",
+                                    qp_attr.sq_psn, qp_attr.max_rd_atomic,
+                                    qp_attr.timeout, qp_attr.retry_cnt,
+                                    qp_attr.rnr_retry, ep_ptr,
+                                    ep_ptr->qp_state);
+#ifdef DAT_EXTENSIONS
+                       if (qp_handle->qp_type == IBV_QPT_UD) {
+                               /* already RTS, multi remote AH's on QP */
+                               if (ep_ptr->qp_state == IBV_QPS_RTS)
+                                       return DAT_SUCCESS;
+                               else
+                                       mask = IBV_QP_STATE | IBV_QP_SQ_PSN;
+                       }
+#endif
+                       break;
+               }
+       case IBV_QPS_INIT:
+               {
+                       mask |= IBV_QP_PKEY_INDEX | IBV_QP_PORT;
+                       if (qp_handle->qp_type == IBV_QPT_RC) {
+                               mask |= IBV_QP_ACCESS_FLAGS;
+                               qp_attr.qp_access_flags =
+                                   IBV_ACCESS_LOCAL_WRITE |
+                                   IBV_ACCESS_REMOTE_WRITE |
+                                   IBV_ACCESS_REMOTE_READ |
+                                   IBV_ACCESS_REMOTE_ATOMIC |
+                                   IBV_ACCESS_MW_BIND;
+                       }
+#ifdef DAT_EXTENSIONS
+                       if (qp_handle->qp_type == IBV_QPT_UD) {
+                               /* already INIT, multi remote AH's on QP */
+                               if (ep_ptr->qp_state == IBV_QPS_INIT)
+                                       return DAT_SUCCESS;
+                               mask |= IBV_QP_QKEY;
+                               qp_attr.qkey = DAT_UD_QKEY;
+                       }
+#endif
+                       qp_attr.pkey_index = 0;
+                       qp_attr.port_num = ia_ptr->hca_ptr->port_num;
+
+                       dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                                    " QPS_INIT: pi %x port %x acc %x qkey 0x%x\n",
+                                    qp_attr.pkey_index, qp_attr.port_num,
+                                    qp_attr.qp_access_flags, qp_attr.qkey);
+                       break;
+               }
+       default:
+               break;
+
+       }
+
+       ret = ibv_modify_qp(qp_handle, &qp_attr, mask);
+       if (ret == 0) {
+               ep_ptr->qp_state = qp_state;
+               return DAT_SUCCESS;
+       } else {
+               return (dapl_convert_errno(errno, "modify_qp_state"));
+       }
+}
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ *  tab-width: 8
+ * End:
+ */
diff --git a/dapl/openib_common/util.c b/dapl/openib_common/util.c
new file mode 100644 (file)
index 0000000..da913c5
--- /dev/null
@@ -0,0 +1,375 @@
+/*
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_ib_util.h"
+#include "dapl_osd.h"
+
+#include <stdlib.h>
+
+int g_dapl_loopback_connection = 0;
+
+enum ibv_mtu dapl_ib_mtu(int mtu)
+{
+       switch (mtu) {
+       case 256:
+               return IBV_MTU_256;
+       case 512:
+               return IBV_MTU_512;
+       case 1024:
+               return IBV_MTU_1024;
+       case 2048:
+               return IBV_MTU_2048;
+       case 4096:
+               return IBV_MTU_4096;
+       default:
+               return IBV_MTU_1024;
+       }
+}
+
+char *dapl_ib_mtu_str(enum ibv_mtu mtu)
+{
+       switch (mtu) {
+       case IBV_MTU_256:
+               return "256";
+       case IBV_MTU_512:
+               return "512";
+       case IBV_MTU_1024:
+               return "1024";
+       case IBV_MTU_2048:
+               return "2048";
+       case IBV_MTU_4096:
+               return "4096";
+       default:
+               return "1024";
+       }
+}
+
+DAT_RETURN getlocalipaddr(DAT_SOCK_ADDR * addr, int addr_len)
+{
+       struct sockaddr_in *sin;
+       struct addrinfo *res, hint, *ai;
+       int ret;
+       char hostname[256];
+
+       if (addr_len < sizeof(*sin)) {
+               return DAT_INTERNAL_ERROR;
+       }
+
+       ret = gethostname(hostname, 256);
+       if (ret)
+               return dapl_convert_errno(ret, "gethostname");
+
+       memset(&hint, 0, sizeof hint);
+       hint.ai_flags = AI_PASSIVE;
+       hint.ai_family = AF_INET;
+       hint.ai_socktype = SOCK_STREAM;
+       hint.ai_protocol = IPPROTO_TCP;
+
+       ret = getaddrinfo(hostname, NULL, &hint, &res);
+       if (ret) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " getaddrinfo ERR: %d %s\n", ret, gai_strerror(ret));
+               return DAT_INVALID_ADDRESS;
+       }
+
+       ret = DAT_INVALID_ADDRESS;
+       for (ai = res; ai; ai = ai->ai_next) {
+               sin = (struct sockaddr_in *)ai->ai_addr;
+               if (*((uint32_t *) & sin->sin_addr) != htonl(0x7f000001)) {
+                       *((struct sockaddr_in *)addr) = *sin;
+                       ret = DAT_SUCCESS;
+                       break;
+               }
+       }
+
+       freeaddrinfo(res);
+       return ret;
+}
+
+/*
+ * dapls_ib_query_hca
+ *
+ * Query the hca attribute
+ *
+ * Input:
+ *     hca_handl               hca handle      
+ *     ia_attr                 attribute of the ia
+ *     ep_attr                 attribute of the ep
+ *     ip_addr                 ip address of DET NIC
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INVALID_HANDLE
+ */
+
+DAT_RETURN dapls_ib_query_hca(IN DAPL_HCA * hca_ptr,
+                             OUT DAT_IA_ATTR * ia_attr,
+                             OUT DAT_EP_ATTR * ep_attr,
+                             OUT DAT_SOCK_ADDR6 * ip_addr)
+{
+       struct ibv_device_attr dev_attr;
+       struct ibv_port_attr port_attr;
+
+       if (hca_ptr->ib_hca_handle == NULL) {
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR, " query_hca: BAD handle\n");
+               return (DAT_INVALID_HANDLE);
+       }
+
+       /* local IP address of device, set during ia_open */
+       if (ip_addr != NULL)
+               memcpy(ip_addr, &hca_ptr->hca_address, sizeof(DAT_SOCK_ADDR6));
+
+       if (ia_attr == NULL && ep_attr == NULL)
+               return DAT_SUCCESS;
+
+       /* query verbs for this device and port attributes */
+       if (ibv_query_device(hca_ptr->ib_hca_handle, &dev_attr) ||
+           ibv_query_port(hca_ptr->ib_hca_handle,
+                          hca_ptr->port_num, &port_attr))
+               return (dapl_convert_errno(errno, "ib_query_hca"));
+
+       if (ia_attr != NULL) {
+               (void)dapl_os_memzero(ia_attr, sizeof(*ia_attr));
+               ia_attr->adapter_name[DAT_NAME_MAX_LENGTH - 1] = '\0';
+               ia_attr->vendor_name[DAT_NAME_MAX_LENGTH - 1] = '\0';
+               ia_attr->ia_address_ptr =
+                   (DAT_IA_ADDRESS_PTR) & hca_ptr->hca_address;
+
+               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                            " query_hca: %s %s \n",
+                            ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+                            inet_ntoa(((struct sockaddr_in *)
+                                       &hca_ptr->hca_address)->sin_addr));
+
+               ia_attr->hardware_version_major = dev_attr.hw_ver;
+               /* ia_attr->hardware_version_minor   = dev_attr.fw_ver; */
+               ia_attr->max_eps = dev_attr.max_qp;
+               ia_attr->max_dto_per_ep = dev_attr.max_qp_wr;
+               ia_attr->max_rdma_read_in = dev_attr.max_qp_rd_atom;
+               ia_attr->max_rdma_read_out = dev_attr.max_qp_init_rd_atom;
+               ia_attr->max_rdma_read_per_ep_in = dev_attr.max_qp_rd_atom;
+               ia_attr->max_rdma_read_per_ep_out =
+                   dev_attr.max_qp_init_rd_atom;
+               ia_attr->max_rdma_read_per_ep_in_guaranteed = DAT_TRUE;
+               ia_attr->max_rdma_read_per_ep_out_guaranteed = DAT_TRUE;
+               ia_attr->max_evds = dev_attr.max_cq;
+               ia_attr->max_evd_qlen = dev_attr.max_cqe;
+               ia_attr->max_iov_segments_per_dto = dev_attr.max_sge;
+               ia_attr->max_lmrs = dev_attr.max_mr;
+               /* 32bit attribute from 64bit, 4G-1 limit, DAT v2 needs fix */
+               ia_attr->max_lmr_block_size = 
+                   (dev_attr.max_mr_size >> 32) ? ~0 : dev_attr.max_mr_size;
+               ia_attr->max_rmrs = dev_attr.max_mw;
+               ia_attr->max_lmr_virtual_address = dev_attr.max_mr_size;
+               ia_attr->max_rmr_target_address = dev_attr.max_mr_size;
+               ia_attr->max_pzs = dev_attr.max_pd;
+               ia_attr->max_message_size = port_attr.max_msg_sz;
+               ia_attr->max_rdma_size = port_attr.max_msg_sz;
+               /* iWARP spec. - 1 sge for RDMA reads */
+               if (hca_ptr->ib_hca_handle->device->transport_type
+                   == IBV_TRANSPORT_IWARP)
+                       ia_attr->max_iov_segments_per_rdma_read = 1;
+               else
+                       ia_attr->max_iov_segments_per_rdma_read =
+                           dev_attr.max_sge;
+               ia_attr->max_iov_segments_per_rdma_write = dev_attr.max_sge;
+               ia_attr->num_transport_attr = 0;
+               ia_attr->transport_attr = NULL;
+               ia_attr->num_vendor_attr = 0;
+               ia_attr->vendor_attr = NULL;
+#ifdef DAT_EXTENSIONS
+               ia_attr->extension_supported = DAT_EXTENSION_IB;
+               ia_attr->extension_version = DAT_IB_EXTENSION_VERSION;
+#endif
+               /* save key device attributes for CM exchange */
+               hca_ptr->ib_trans.rd_atom_in  = dev_attr.max_qp_rd_atom;
+               hca_ptr->ib_trans.rd_atom_out = dev_attr.max_qp_init_rd_atom;
+               
+               hca_ptr->ib_trans.mtu = DAPL_MIN(port_attr.active_mtu,
+                                                hca_ptr->ib_trans.mtu);
+               hca_ptr->ib_trans.ack_timer =
+                   DAPL_MAX(dev_attr.local_ca_ack_delay,
+                            hca_ptr->ib_trans.ack_timer);
+
+               /* set MTU in transport specific named attribute */
+               hca_ptr->ib_trans.named_attr.name = "DAT_IB_TRANSPORT_MTU";
+               hca_ptr->ib_trans.named_attr.value =
+                   dapl_ib_mtu_str(hca_ptr->ib_trans.mtu);
+
+               dapl_log(DAPL_DBG_TYPE_UTIL,
+                            " query_hca: (%x.%x) eps %d, sz %d evds %d,"
+                            " sz %d mtu %d\n",
+                            ia_attr->hardware_version_major,
+                            ia_attr->hardware_version_minor,
+                            ia_attr->max_eps, ia_attr->max_dto_per_ep,
+                            ia_attr->max_evds, ia_attr->max_evd_qlen,
+                            128 << hca_ptr->ib_trans.mtu);
+
+               dapl_log(DAPL_DBG_TYPE_UTIL,
+                            " query_hca: msg %llu rdma %llu iov %d lmr %d rmr %d"
+                            " ack_time %d mr %u\n",
+                            ia_attr->max_message_size, ia_attr->max_rdma_size,
+                            ia_attr->max_iov_segments_per_dto,
+                            ia_attr->max_lmrs, ia_attr->max_rmrs,
+                            hca_ptr->ib_trans.ack_timer,
+                            ia_attr->max_lmr_block_size);
+       }
+
+       if (ep_attr != NULL) {
+               (void)dapl_os_memzero(ep_attr, sizeof(*ep_attr));
+               ep_attr->max_message_size = port_attr.max_msg_sz;
+               ep_attr->max_rdma_size = port_attr.max_msg_sz;
+               ep_attr->max_recv_dtos = dev_attr.max_qp_wr;
+               ep_attr->max_request_dtos = dev_attr.max_qp_wr;
+               ep_attr->max_recv_iov = dev_attr.max_sge;
+               ep_attr->max_request_iov = dev_attr.max_sge;
+               ep_attr->max_rdma_read_in = dev_attr.max_qp_rd_atom;
+               ep_attr->max_rdma_read_out = dev_attr.max_qp_init_rd_atom;
+               ep_attr->max_rdma_read_iov = dev_attr.max_sge;
+               ep_attr->max_rdma_write_iov = dev_attr.max_sge;
+               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                            " query_hca: MAX msg %llu mtu %d qsz %d iov %d"
+                            " rdma i%d,o%d\n",
+                            ep_attr->max_message_size,
+                            128 << hca_ptr->ib_trans.mtu,
+                            ep_attr->max_recv_dtos, 
+                            ep_attr->max_recv_iov,
+                            ep_attr->max_rdma_read_in,
+                            ep_attr->max_rdma_read_out);
+       }
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_setup_async_callback
+ *
+ * Set up an asynchronous callbacks of various kinds
+ *
+ * Input:
+ *     ia_handle               IA handle
+ *     handler_type            type of handler to set up
+ *     callback_handle         handle param for completion callbacks
+ *     callback                callback routine pointer
+ *     context                 argument for callback routine
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INSUFFICIENT_RESOURCES
+ *     DAT_INVALID_PARAMETER
+ *
+ */
+DAT_RETURN dapls_ib_setup_async_callback(IN DAPL_IA * ia_ptr,
+                                        IN DAPL_ASYNC_HANDLER_TYPE
+                                        handler_type, IN DAPL_EVD * evd_ptr,
+                                        IN ib_async_handler_t callback,
+                                        IN void *context)
+{
+       ib_hca_transport_t *hca_ptr;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                    " setup_async_cb: ia %p type %d handle %p cb %p ctx %p\n",
+                    ia_ptr, handler_type, evd_ptr, callback, context);
+
+       hca_ptr = &ia_ptr->hca_ptr->ib_trans;
+       switch (handler_type) {
+       case DAPL_ASYNC_UNAFILIATED:
+               hca_ptr->async_unafiliated = (ib_async_handler_t) callback;
+               hca_ptr->async_un_ctx = context;
+               break;
+       case DAPL_ASYNC_CQ_ERROR:
+               hca_ptr->async_cq_error = (ib_async_cq_handler_t) callback;
+               break;
+       case DAPL_ASYNC_CQ_COMPLETION:
+               hca_ptr->async_cq = (ib_async_dto_handler_t) callback;
+               break;
+       case DAPL_ASYNC_QP_ERROR:
+               hca_ptr->async_qp_error = (ib_async_qp_handler_t) callback;
+               break;
+       default:
+               break;
+       }
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapls_set_provider_specific_attr
+ *
+ * Input:
+ *      attr_ptr        Pointer provider specific attributes
+ *
+ * Output:
+ *      none
+ *
+ * Returns:
+ *      void
+ */
+DAT_NAMED_ATTR ib_attrs[] = {
+       {
+        "DAT_IB_TRANSPORT_MTU", "2048"}
+       ,
+#ifdef DAT_EXTENSIONS
+       {
+        "DAT_EXTENSION_INTERFACE", "TRUE"}
+       ,
+       {
+        DAT_IB_ATTR_FETCH_AND_ADD, "TRUE"}
+       ,
+       {
+        DAT_IB_ATTR_CMP_AND_SWAP, "TRUE"}
+       ,
+       {
+        DAT_IB_ATTR_IMMED_DATA, "TRUE"}
+       ,
+#ifndef _OPENIB_CMA_
+       {
+        DAT_IB_ATTR_UD, "TRUE"}
+       ,
+#endif
+#ifdef DAPL_COUNTERS
+       {
+        DAT_ATTR_COUNTERS, "TRUE"}
+       ,
+#endif                         /* DAPL_COUNTERS */
+#endif
+};
+
+#define SPEC_ATTR_SIZE( x )     (sizeof( x ) / sizeof( DAT_NAMED_ATTR))
+
+void dapls_query_provider_specific_attr(IN DAPL_IA * ia_ptr,
+                                       IN DAT_PROVIDER_ATTR * attr_ptr)
+{
+       attr_ptr->num_provider_specific_attr = SPEC_ATTR_SIZE(ib_attrs);
+       attr_ptr->provider_specific_attr = ib_attrs;
+
+       /* set MTU to actual settings */
+       ib_attrs[0].value = ia_ptr->hca_ptr->ib_trans.named_attr.value;
+}
index f9204d95d7a7f5fa5f811305eb7e8853361379cf..5714aa3282be7499876a3b662f7651d23301d0d1 100644 (file)
@@ -18,16 +18,17 @@ USE_MSVCRT = 1
 \r
 SOURCES = \\r
        udapl.rc \\r
-       ..\dapl_common_src.c    \\r
-       ..\dapl_udapl_src.c             \\r
-       dapl_ib_cq.c                    \\r
-       dapl_ib_extensions.c    \\r
-       dapl_ib_mem.c                   \\r
-       dapl_ib_qp.c                    \\r
-       dapl_ib_util.c                  \\r
-       dapl_ib_cm.c\r
-\r
-INCLUDES = ..\include;..\common;windows;..\..\dat\include;\\r
+       ..\dapl_common_src.c             \\r
+       ..\dapl_udapl_src.c              \\r
+        ..\openib_common\mem.c           \\r
+        ..\openib_common\util.c          \\r
+        ..\openib_common\cq.c            \\r
+        ..\openib_common\qp.c            \\r
+        ..\openib_common\ib_extensions.c \\r
+        device.c                         \\r
+        cm.c\r
+\r
+INCLUDES = ..\include;..\openib_common\;..\common;windows;..\..\dat\include;\\r
                   ..\..\dat\udat\windows;..\udapl\windows;\\r
                   ..\..\..\..\inc;..\..\..\..\inc\user;..\..\..\libibverbs\include\r
 \r
diff --git a/dapl/openib_scm/cm.c b/dapl/openib_scm/cm.c
new file mode 100644 (file)
index 0000000..5708214
--- /dev/null
@@ -0,0 +1,1839 @@
+/*
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/***************************************************************************
+ *
+ *   Module:            uDAPL
+ *
+ *   Filename:          dapl_ib_cm.c
+ *
+ *   Author:            Arlin Davis
+ *
+ *   Created:           3/10/2005
+ *
+ *   Description: 
+ *
+ *   The uDAPL openib provider - connection management
+ *
+ ****************************************************************************
+ *                Source Control System Information
+ *
+ *    $Id: $
+ *
+ *     Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ **************************************************************************/
+
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_evd_util.h"
+#include "dapl_cr_util.h"
+#include "dapl_name_service.h"
+#include "dapl_ib_util.h"
+#include "dapl_osd.h"
+
+#if defined(_WIN32) || defined(_WIN64)
+enum DAPL_FD_EVENTS {
+       DAPL_FD_READ = 0x1,
+       DAPL_FD_WRITE = 0x2,
+       DAPL_FD_ERROR = 0x4
+};
+
+static int dapl_config_socket(DAPL_SOCKET s)
+{
+       unsigned long nonblocking = 1;
+       return ioctlsocket(s, FIONBIO, &nonblocking);
+}
+
+static int dapl_connect_socket(DAPL_SOCKET s, struct sockaddr *addr,
+                              int addrlen)
+{
+       int err;
+
+       err = connect(s, addr, addrlen);
+       if (err == SOCKET_ERROR)
+               err = WSAGetLastError();
+       return (err == WSAEWOULDBLOCK) ? EAGAIN : err;
+}
+
+struct dapl_fd_set {
+       struct fd_set set[3];
+};
+
+static struct dapl_fd_set *dapl_alloc_fd_set(void)
+{
+       return dapl_os_alloc(sizeof(struct dapl_fd_set));
+}
+
+static void dapl_fd_zero(struct dapl_fd_set *set)
+{
+       FD_ZERO(&set->set[0]);
+       FD_ZERO(&set->set[1]);
+       FD_ZERO(&set->set[2]);
+}
+
+static int dapl_fd_set(DAPL_SOCKET s, struct dapl_fd_set *set,
+                      enum DAPL_FD_EVENTS event)
+{
+       FD_SET(s, &set->set[(event == DAPL_FD_READ) ? 0 : 1]);
+       FD_SET(s, &set->set[2]);
+       return 0;
+}
+
+static enum DAPL_FD_EVENTS dapl_poll(DAPL_SOCKET s, enum DAPL_FD_EVENTS event)
+{
+       struct fd_set rw_fds;
+       struct fd_set err_fds;
+       struct timeval tv;
+       int ret;
+
+       FD_ZERO(&rw_fds);
+       FD_ZERO(&err_fds);
+       FD_SET(s, &rw_fds);
+       FD_SET(s, &err_fds);
+
+       tv.tv_sec = 0;
+       tv.tv_usec = 0;
+
+       if (event == DAPL_FD_READ)
+               ret = select(1, &rw_fds, NULL, &err_fds, &tv);
+       else
+               ret = select(1, NULL, &rw_fds, &err_fds, &tv);
+
+       if (ret == 0)
+               return 0;
+       else if (ret == SOCKET_ERROR)
+               return WSAGetLastError();
+       else if (FD_ISSET(s, &rw_fds))
+               return event;
+       else
+               return DAPL_FD_ERROR;
+}
+
+static int dapl_select(struct dapl_fd_set *set)
+{
+       int ret;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: sleep\n");
+       ret = select(0, &set->set[0], &set->set[1], &set->set[2], NULL);
+       dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: wakeup\n");
+
+       if (ret == SOCKET_ERROR)
+               dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                            " dapl_select: error 0x%x\n", WSAGetLastError());
+
+       return ret;
+}
+#else                          // _WIN32 || _WIN64
+enum DAPL_FD_EVENTS {
+       DAPL_FD_READ = POLLIN,
+       DAPL_FD_WRITE = POLLOUT,
+       DAPL_FD_ERROR = POLLERR
+};
+
+static int dapl_config_socket(DAPL_SOCKET s)
+{
+       int ret;
+
+       ret = fcntl(s, F_GETFL);
+       if (ret >= 0)
+               ret = fcntl(s, F_SETFL, ret | O_NONBLOCK);
+       return ret;
+}
+
+static int dapl_connect_socket(DAPL_SOCKET s, struct sockaddr *addr,
+                              int addrlen)
+{
+       int ret;
+
+       ret = connect(s, addr, addrlen);
+
+       return (errno == EINPROGRESS) ? EAGAIN : ret;
+}
+
+struct dapl_fd_set {
+       int index;
+       struct pollfd set[DAPL_FD_SETSIZE];
+};
+
+static struct dapl_fd_set *dapl_alloc_fd_set(void)
+{
+       return dapl_os_alloc(sizeof(struct dapl_fd_set));
+}
+
+static void dapl_fd_zero(struct dapl_fd_set *set)
+{
+       set->index = 0;
+}
+
+static int dapl_fd_set(DAPL_SOCKET s, struct dapl_fd_set *set,
+                      enum DAPL_FD_EVENTS event)
+{
+       if (set->index == DAPL_FD_SETSIZE - 1) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        "SCM ERR: cm_thread exceeded FD_SETSIZE %d\n",
+                        set->index + 1);
+               return -1;
+       }
+
+       set->set[set->index].fd = s;
+       set->set[set->index].revents = 0;
+       set->set[set->index++].events = event;
+       return 0;
+}
+
+static enum DAPL_FD_EVENTS dapl_poll(DAPL_SOCKET s, enum DAPL_FD_EVENTS event)
+{
+       struct pollfd fds;
+       int ret;
+
+       fds.fd = s;
+       fds.events = event;
+       fds.revents = 0;
+       ret = poll(&fds, 1, 0);
+       dapl_log(DAPL_DBG_TYPE_CM, " dapl_poll: fd=%d ret=%d, evnts=0x%x\n",
+                s, ret, fds.revents);
+       if (ret == 0)
+               return 0;
+       else if (fds.revents & (POLLERR | POLLHUP | POLLNVAL)) 
+               return DAPL_FD_ERROR;
+       else 
+               return fds.revents;
+}
+
+static int dapl_select(struct dapl_fd_set *set)
+{
+       int ret;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: sleep, fds=%d\n",
+                    set->index);
+       ret = poll(set->set, set->index, -1);
+       dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: wakeup, ret=0x%x\n", ret);
+       return ret;
+}
+#endif
+
+dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep)
+{
+       dp_ib_cm_handle_t cm_ptr;
+
+       /* Allocate CM, init lock, and initialize */
+       if ((cm_ptr = dapl_os_alloc(sizeof(*cm_ptr))) == NULL)
+               return NULL;
+
+       (void)dapl_os_memzero(cm_ptr, sizeof(*cm_ptr));
+       if (dapl_os_lock_init(&cm_ptr->lock))
+               goto bail;
+
+       cm_ptr->dst.ver = htons(DCM_VER);
+       cm_ptr->socket = DAPL_INVALID_SOCKET;
+       cm_ptr->ep = ep;
+       return cm_ptr;
+bail:
+       dapl_os_free(cm_ptr, sizeof(*cm_ptr));
+       return NULL;
+}
+
+/* mark for destroy, remove all references, schedule cleanup */
+/* cm_ptr == NULL (UD), then multi CR's, kill all associated with EP */
+void dapls_ib_cm_free(dp_ib_cm_handle_t cm_ptr, DAPL_EP *ep)
+{
+       DAPL_IA *ia_ptr;
+       DAPL_HCA *hca_ptr = NULL;
+       dp_ib_cm_handle_t cr, next_cr;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                    " cm_destroy: cm %p ep %p\n", cm_ptr, ep);
+
+       if (cm_ptr == NULL)
+               goto multi_cleanup;
+
+       /* to notify cleanup thread */
+       hca_ptr = cm_ptr->hca;
+
+       /* cleanup, never made it to work queue */
+       if (cm_ptr->state == DCM_INIT) {
+               if (cm_ptr->socket != DAPL_INVALID_SOCKET) {
+                       shutdown(cm_ptr->socket, SHUT_RDWR);
+                       closesocket(cm_ptr->socket);
+               }
+               dapl_os_free(cm_ptr, sizeof(*cm_ptr));
+               return;
+       }
+
+       dapl_os_lock(&cm_ptr->lock);
+       cm_ptr->state = DCM_DESTROY;
+       if ((cm_ptr->ep) && (cm_ptr->ep->cm_handle == cm_ptr)) {
+               cm_ptr->ep->cm_handle = IB_INVALID_HANDLE;
+               cm_ptr->ep = NULL;
+       }
+
+       /* close socket if still active */
+       if (cm_ptr->socket != DAPL_INVALID_SOCKET) {
+               shutdown(cm_ptr->socket, SHUT_RDWR);
+               closesocket(cm_ptr->socket);
+               cm_ptr->socket = DAPL_INVALID_SOCKET;
+       }
+       dapl_os_unlock(&cm_ptr->lock);
+       goto notify_thread;
+
+multi_cleanup:
+
+       /* 
+        * UD CR objects are kept active because of direct private data references
+        * from CONN events. The cr->socket is closed and marked inactive but the 
+        * object remains allocated and queued on the CR resource list. There can
+        * be multiple CR's associated with a given EP. There is no way to determine 
+        * when consumer is finished with event until the dat_ep_free.
+        *
+        * Schedule destruction for all CR's associated with this EP, cr_thread will
+        * complete the cleanup with state == DCM_DESTROY. 
+        */ 
+       ia_ptr = ep->header.owner_ia;
+       dapl_os_lock(&ia_ptr->hca_ptr->ib_trans.lock);
+       if (!dapl_llist_is_empty((DAPL_LLIST_HEAD*)
+                                 &ia_ptr->hca_ptr->ib_trans.list))
+            next_cr = dapl_llist_peek_head((DAPL_LLIST_HEAD*)
+                                           &ia_ptr->hca_ptr->ib_trans.list);
+       else
+           next_cr = NULL;
+
+       while (next_cr) {
+               cr = next_cr;
+               next_cr = dapl_llist_next_entry((DAPL_LLIST_HEAD*)
+                                               &ia_ptr->hca_ptr->ib_trans.list,
+                                               (DAPL_LLIST_ENTRY*)&cr->entry);
+               if (cr->ep == ep)  {
+                       dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                                    " qp_free CR: ep %p cr %p\n", ep, cr);
+                       dapli_socket_disconnect(cr);
+                       dapl_os_lock(&cr->lock);
+                       hca_ptr = cr->hca;
+                       cr->ep = NULL;
+                       cr->state = DCM_DESTROY;
+                       dapl_os_unlock(&cr->lock);
+               }
+       }
+       dapl_os_unlock(&ia_ptr->hca_ptr->ib_trans.lock);
+
+notify_thread:
+
+       /* wakeup work thread, if something destroyed */
+       if (hca_ptr != NULL) {
+               if (send(hca_ptr->ib_trans.scm[1], 
+                        "w", sizeof "w", 0) == -1)
+                       dapl_log(DAPL_DBG_TYPE_CM,
+                                " cm_destroy: thread wakeup error = %s\n",
+                                strerror(errno));
+       }
+}
+
+/* queue socket for processing CM work */
+static void dapli_cm_queue(struct ib_cm_handle *cm_ptr)
+{
+       /* add to work queue for cr thread processing */
+       dapl_llist_init_entry((DAPL_LLIST_ENTRY *) & cm_ptr->entry);
+       dapl_os_lock(&cm_ptr->hca->ib_trans.lock);
+       dapl_llist_add_tail(&cm_ptr->hca->ib_trans.list,
+                           (DAPL_LLIST_ENTRY *) & cm_ptr->entry, cm_ptr);
+       dapl_os_unlock(&cm_ptr->hca->ib_trans.lock);
+
+       /* wakeup CM work thread */
+       if (send(cm_ptr->hca->ib_trans.scm[1], "w", sizeof "w", 0) == -1)
+               dapl_log(DAPL_DBG_TYPE_CM,
+                        " cm_queue: thread wakeup error = %s\n",
+                        strerror(errno));
+}
+
+/*
+ * ACTIVE/PASSIVE: called from CR thread or consumer via ep_disconnect
+ *                 or from ep_free
+ */
+DAT_RETURN dapli_socket_disconnect(dp_ib_cm_handle_t cm_ptr)
+{
+       DAPL_EP *ep_ptr = cm_ptr->ep;
+       DAT_UINT32 disc_data = htonl(0xdead);
+
+       if (ep_ptr == NULL)
+               return DAT_SUCCESS;
+
+       dapl_os_lock(&cm_ptr->lock);
+       if ((cm_ptr->state == DCM_INIT) ||
+           (cm_ptr->state == DCM_DISCONNECTED) ||
+           (cm_ptr->state == DCM_DESTROY)) {
+               dapl_os_unlock(&cm_ptr->lock);
+               return DAT_SUCCESS;
+       } else {
+               /* send disc date, close socket, schedule destroy */
+               if (cm_ptr->socket != DAPL_INVALID_SOCKET) {
+                       if (send(cm_ptr->socket, (char *)&disc_data,
+                                sizeof(disc_data), 0) == -1)
+                               dapl_log(DAPL_DBG_TYPE_WARN,
+                                        " cm_disc: write error = %s\n",
+                                        strerror(errno));
+                       shutdown(cm_ptr->socket, SHUT_RDWR);
+                       closesocket(cm_ptr->socket);
+                       cm_ptr->socket = DAPL_INVALID_SOCKET;
+               }
+               cm_ptr->state = DCM_DISCONNECTED;
+       }
+       dapl_os_unlock(&cm_ptr->lock);
+
+       /* disconnect events for RC's only */
+       if (ep_ptr->param.ep_attr.service_type == DAT_SERVICE_TYPE_RC) {
+               if (ep_ptr->cr_ptr) {
+                       dapls_cr_callback(cm_ptr,
+                                         IB_CME_DISCONNECTED,
+                                         NULL,
+                                         ((DAPL_CR *) ep_ptr->cr_ptr)->sp_ptr);
+               } else {
+                       dapl_evd_connection_callback(ep_ptr->cm_handle,
+                                                    IB_CME_DISCONNECTED,
+                                                    NULL, ep_ptr);
+               }
+       }
+
+       /* scheduled destroy via disconnect clean in callback */
+       return DAT_SUCCESS;
+}
+
+/*
+ * ACTIVE: socket connected, send QP information to peer 
+ */
+static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err)
+{
+       int len, opt = 1;
+       struct iovec iov[2];
+       struct dapl_ep *ep_ptr = cm_ptr->ep;
+
+       if (err) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " CONN_PENDING: %s ERR %s -> %s %d\n",
+                        err == -1 ? "POLL" : "SOCKOPT",
+                        err == -1 ? strerror(errno) : strerror(err), 
+                        inet_ntoa(((struct sockaddr_in *)
+                                  ep_ptr->param.
+                                  remote_ia_address_ptr)->sin_addr), 
+                        ntohs(((struct sockaddr_in *)
+                               &cm_ptr->dst.ia_address)->sin_port));
+               goto bail;
+       }
+       dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                    " socket connected, write QP and private data\n");
+
+       /* no delay for small packets */
+       setsockopt(cm_ptr->socket, IPPROTO_TCP, TCP_NODELAY,
+                  (char *)&opt, sizeof(opt));
+
+       /* send qp info and pdata to remote peer */
+       iov[0].iov_base = (void *)&cm_ptr->dst;
+       iov[0].iov_len = sizeof(ib_qp_cm_t);
+       if (cm_ptr->dst.p_size) {
+               iov[1].iov_base = cm_ptr->p_data;
+               iov[1].iov_len = ntohl(cm_ptr->dst.p_size);
+               len = writev(cm_ptr->socket, iov, 2);
+       } else {
+               len = writev(cm_ptr->socket, iov, 1);
+       }
+
+       if (len != (ntohl(cm_ptr->dst.p_size) + sizeof(ib_qp_cm_t))) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " CONN_PENDING write: ERR %s, wcnt=%d -> %s\n",
+                        strerror(errno), len, inet_ntoa(((struct sockaddr_in *)
+                                                         ep_ptr->param.
+                                                         remote_ia_address_ptr)->
+                                                        sin_addr));
+               goto bail;
+       }
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                    " connected: sending SRC port=0x%x lid=0x%x,"
+                    " qpn=0x%x, psize=%d\n",
+                    ntohs(cm_ptr->dst.port), ntohs(cm_ptr->dst.lid),
+                    ntohl(cm_ptr->dst.qpn), ntohl(cm_ptr->dst.p_size));
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                    " connected: sending SRC GID subnet %016llx id %016llx\n",
+                    (unsigned long long)
+                    htonll(cm_ptr->dst.gid.global.subnet_prefix),
+                    (unsigned long long)
+                    htonll(cm_ptr->dst.gid.global.interface_id));
+
+       /* queue up to work thread to avoid blocking consumer */
+       cm_ptr->state = DCM_RTU_PENDING;
+       return;
+      bail:
+       /* close socket, free cm structure and post error event */
+       dapls_ib_cm_free(cm_ptr, cm_ptr->ep);
+       dapl_evd_connection_callback(NULL, IB_CME_LOCAL_FAILURE, NULL, ep_ptr);
+}
+
+/*
+ * ACTIVE: Create socket, connect, defer exchange QP information to CR thread
+ * to avoid blocking. 
+ */
+DAT_RETURN
+dapli_socket_connect(DAPL_EP * ep_ptr,
+                    DAT_IA_ADDRESS_PTR r_addr,
+                    DAT_CONN_QUAL r_qual, DAT_COUNT p_size, DAT_PVOID p_data)
+{
+       dp_ib_cm_handle_t cm_ptr;
+       int ret;
+       DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;
+       struct sockaddr_in addr;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect: r_qual %d p_size=%d\n",
+                    r_qual, p_size);
+
+       cm_ptr = dapls_ib_cm_create(ep_ptr);
+       if (cm_ptr == NULL)
+               return DAT_INSUFFICIENT_RESOURCES;
+
+       /* create, connect, sockopt, and exchange QP information */
+       if ((cm_ptr->socket =
+            socket(AF_INET, SOCK_STREAM, 0)) == DAPL_INVALID_SOCKET) {
+               dapl_os_free(cm_ptr, sizeof(*cm_ptr));
+               return DAT_INSUFFICIENT_RESOURCES;
+       }
+
+       ret = dapl_config_socket(cm_ptr->socket);
+       if (ret < 0) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " socket connect: config socket %d ERR %d %s\n",
+                        cm_ptr->socket, ret, strerror(errno));
+               goto bail;
+       }
+
+       dapl_os_memcpy(&addr, r_addr, sizeof(addr));
+       addr.sin_port = htons(r_qual);
+       ret = dapl_connect_socket(cm_ptr->socket, (struct sockaddr *)&addr,
+                                 sizeof(addr));
+       if (ret && ret != EAGAIN) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " socket connect ERROR: %s -> %s r_qual %d\n",
+                        strerror(errno),
+                        inet_ntoa(addr.sin_addr), (unsigned int)r_qual);
+               dapls_ib_cm_free(cm_ptr, cm_ptr->ep);
+               return DAT_INVALID_ADDRESS;
+       }
+
+       /* Send QP info, IA address, and private data */
+       cm_ptr->dst.qpn = htonl(ep_ptr->qp_handle->qp_num);
+#ifdef DAT_EXTENSIONS
+       cm_ptr->dst.qp_type = htons(ep_ptr->qp_handle->qp_type);
+#endif
+       cm_ptr->dst.port = htons(ia_ptr->hca_ptr->port_num);
+       cm_ptr->dst.lid = ia_ptr->hca_ptr->ib_trans.lid;
+       cm_ptr->dst.gid = ia_ptr->hca_ptr->ib_trans.gid;
+
+       /* save references */
+       cm_ptr->hca = ia_ptr->hca_ptr;
+       cm_ptr->ep = ep_ptr;
+       cm_ptr->dst.ia_address = ia_ptr->hca_ptr->hca_address;
+       ((struct sockaddr_in *)
+               &cm_ptr->dst.ia_address)->sin_port = ntohs(r_qual);
+
+       if (p_size) {
+               cm_ptr->dst.p_size = htonl(p_size);
+               dapl_os_memcpy(cm_ptr->p_data, p_data, p_size);
+       }
+
+       /* connected or pending, either way results via async event */
+       if (ret == 0)
+               dapli_socket_connected(cm_ptr, 0);
+       else
+               cm_ptr->state = DCM_CONN_PENDING;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                    " connect: socket %d to %s r_qual %d pending\n",
+                    cm_ptr->socket,
+                    inet_ntoa(addr.sin_addr), (unsigned int)r_qual);
+
+       dapli_cm_queue(cm_ptr);
+       return DAT_SUCCESS;
+      bail:
+       dapl_log(DAPL_DBG_TYPE_ERR,
+                " socket connect ERROR: %s query lid(0x%x)/gid"
+                " -> %s r_qual %d\n",
+                strerror(errno), ntohs(cm_ptr->dst.lid),
+                inet_ntoa(((struct sockaddr_in *)r_addr)->sin_addr),
+                (unsigned int)r_qual);
+
+       /* close socket, free cm structure */
+       dapls_ib_cm_free(cm_ptr, cm_ptr->ep);
+       return DAT_INTERNAL_ERROR;
+}
+
+/*
+ * ACTIVE: exchange QP information, called from CR thread
+ */
+static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
+{
+       DAPL_EP *ep_ptr = cm_ptr->ep;
+       int len;
+       short rtu_data = htons(0x0E0F);
+       ib_cm_events_t event = IB_CME_DESTINATION_REJECT;
+
+       /* read DST information into cm_ptr, overwrite SRC info */
+       dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect_rtu: recv peer QP data\n");
+
+       len = recv(cm_ptr->socket, (char *)&cm_ptr->dst, sizeof(ib_qp_cm_t), 0);
+       if (len != sizeof(ib_qp_cm_t) || ntohs(cm_ptr->dst.ver) != DCM_VER) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " CONN_RTU read: ERR %s, rcnt=%d, ver=%d -> %s\n",
+                        strerror(errno), len, cm_ptr->dst.ver,
+                        inet_ntoa(((struct sockaddr_in *)
+                                   ep_ptr->param.remote_ia_address_ptr)->
+                                  sin_addr));
+               goto bail;
+       }
+
+       /* convert peer response values to host order */
+       cm_ptr->dst.port = ntohs(cm_ptr->dst.port);
+       cm_ptr->dst.lid = ntohs(cm_ptr->dst.lid);
+       cm_ptr->dst.qpn = ntohl(cm_ptr->dst.qpn);
+#ifdef DAT_EXTENSIONS
+       cm_ptr->dst.qp_type = ntohs(cm_ptr->dst.qp_type);
+#endif
+       cm_ptr->dst.p_size = ntohl(cm_ptr->dst.p_size);
+
+       /* save remote address information */
+       dapl_os_memcpy(&ep_ptr->remote_ia_address,
+                      &cm_ptr->dst.ia_address,
+                      sizeof(ep_ptr->remote_ia_address));
+
+       dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                    " CONN_RTU: DST %s port=0x%x lid=0x%x,"
+                    " qpn=0x%x, qp_type=%d, psize=%d\n",
+                    inet_ntoa(((struct sockaddr_in *)
+                               &cm_ptr->dst.ia_address)->sin_addr),
+                    cm_ptr->dst.port, cm_ptr->dst.lid,
+                    cm_ptr->dst.qpn, cm_ptr->dst.qp_type, cm_ptr->dst.p_size);
+
+       /* validate private data size before reading */
+       if (cm_ptr->dst.p_size > IB_MAX_REP_PDATA_SIZE) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " CONN_RTU read: psize (%d) wrong -> %s\n",
+                        cm_ptr->dst.p_size, inet_ntoa(((struct sockaddr_in *)
+                                                       ep_ptr->param.
+                                                       remote_ia_address_ptr)->
+                                                      sin_addr));
+               goto bail;
+       }
+
+       /* read private data into cm_handle if any present */
+       dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                    " socket connected, read private data\n");
+       if (cm_ptr->dst.p_size) {
+               len =
+                   recv(cm_ptr->socket, cm_ptr->p_data, cm_ptr->dst.p_size, 0);
+               if (len != cm_ptr->dst.p_size) {
+                       dapl_log(DAPL_DBG_TYPE_ERR,
+                                " CONN_RTU read pdata: ERR %s, rcnt=%d -> %s\n",
+                                strerror(errno), len,
+                                inet_ntoa(((struct sockaddr_in *)
+                                           ep_ptr->param.
+                                           remote_ia_address_ptr)->sin_addr));
+                       goto bail;
+               }
+       }
+
+       /* check for consumer reject */
+       if (cm_ptr->dst.rej) {
+               dapl_log(DAPL_DBG_TYPE_CM,
+                        " CONN_RTU read: PEER REJ reason=0x%x -> %s\n",
+                        ntohs(cm_ptr->dst.rej),
+                        inet_ntoa(((struct sockaddr_in *)
+                                   ep_ptr->param.remote_ia_address_ptr)->
+                                  sin_addr));
+               event = IB_CME_DESTINATION_REJECT_PRIVATE_DATA;
+#ifdef DAT_EXTENSIONS
+               if (cm_ptr->dst.qp_type == IBV_QPT_UD) 
+                       goto ud_bail;
+               else
+#endif
+               goto bail;
+       }
+
+       /* modify QP to RTR and then to RTS with remote info */
+       dapl_os_lock(&ep_ptr->header.lock);
+       if (dapls_modify_qp_state(ep_ptr->qp_handle,
+                                 IBV_QPS_RTR, cm_ptr) != DAT_SUCCESS) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " CONN_RTU: QPS_RTR ERR %s -> %s\n",
+                        strerror(errno), inet_ntoa(((struct sockaddr_in *)
+                                                    ep_ptr->param.
+                                                    remote_ia_address_ptr)->
+                                                   sin_addr));
+               dapl_os_unlock(&ep_ptr->header.lock);
+               goto bail;
+       }
+       if (dapls_modify_qp_state(ep_ptr->qp_handle,
+                                 IBV_QPS_RTS, cm_ptr) != DAT_SUCCESS) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " CONN_RTU: QPS_RTS ERR %s -> %s\n",
+                        strerror(errno), inet_ntoa(((struct sockaddr_in *)
+                                                    ep_ptr->param.
+                                                    remote_ia_address_ptr)->
+                                                   sin_addr));
+               dapl_os_unlock(&ep_ptr->header.lock);
+               goto bail;
+       }
+       dapl_os_unlock(&ep_ptr->header.lock);
+       dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect_rtu: send RTU\n");
+
+       /* complete handshake after final QP state change */
+       if (send(cm_ptr->socket, (char *)&rtu_data, sizeof(rtu_data), 0) == -1) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " CONN_RTU: write error = %s\n", strerror(errno));
+               goto bail;
+       }
+       /* init cm_handle and post the event with private data */
+       cm_ptr->state = DCM_CONNECTED;
+       event = IB_CME_CONNECTED;
+       dapl_dbg_log(DAPL_DBG_TYPE_EP, " ACTIVE: connected!\n");
+
+#ifdef DAT_EXTENSIONS
+ud_bail:
+       if (cm_ptr->dst.qp_type == IBV_QPT_UD) {
+               DAT_IB_EXTENSION_EVENT_DATA xevent;
+
+               /* post EVENT, modify_qp created ah */
+               xevent.status = 0;
+               xevent.type = DAT_IB_UD_REMOTE_AH;
+               xevent.remote_ah.ah = cm_ptr->ah;
+               xevent.remote_ah.qpn = cm_ptr->dst.qpn;
+               dapl_os_memcpy(&xevent.remote_ah.ia_addr,
+                              &cm_ptr->dst.ia_address,
+                              sizeof(cm_ptr->dst.ia_address));
+
+               if (event == IB_CME_CONNECTED)
+                       event = DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED;
+               else
+                       event = DAT_IB_UD_CONNECTION_REJECT_EVENT;
+
+               dapls_evd_post_connection_event_ext((DAPL_EVD *) ep_ptr->param.
+                                                   connect_evd_handle,
+                                                   event,
+                                                   (DAT_EP_HANDLE) ep_ptr,
+                                                   (DAT_COUNT) cm_ptr->dst.p_size,
+                                                   (DAT_PVOID *) cm_ptr->p_data,
+                                                   (DAT_PVOID *) &xevent);
+
+               /* done with socket, don't destroy cm_ptr, need pdata */
+               closesocket(cm_ptr->socket);
+               cm_ptr->socket = DAPL_INVALID_SOCKET;
+               cm_ptr->state = DCM_RELEASED;
+       } else
+#endif
+       {
+               ep_ptr->cm_handle = cm_ptr; /* only RC, multi CR's on UD */
+               dapl_evd_connection_callback(cm_ptr,
+                                            IB_CME_CONNECTED,
+                                            cm_ptr->p_data, ep_ptr);
+       }
+       return;
+
+bail:
+       /* close socket, and post error event */
+       dapls_ib_reinit_ep(ep_ptr);     /* reset QP state */
+       closesocket(cm_ptr->socket);
+       cm_ptr->socket = DAPL_INVALID_SOCKET;
+       dapl_evd_connection_callback(NULL, event, cm_ptr->p_data, ep_ptr);
+}
+
+/*
+ * PASSIVE: Create socket, listen, accept, exchange QP information 
+ */
+DAT_RETURN
+dapli_socket_listen(DAPL_IA * ia_ptr, DAT_CONN_QUAL serviceID, DAPL_SP * sp_ptr)
+{
+       struct sockaddr_in addr;
+       ib_cm_srvc_handle_t cm_ptr = NULL;
+       int opt = 1;
+       DAT_RETURN dat_status = DAT_SUCCESS;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                    " listen(ia_ptr %p ServiceID %d sp_ptr %p)\n",
+                    ia_ptr, serviceID, sp_ptr);
+
+       cm_ptr = dapls_ib_cm_create(NULL);
+       if (cm_ptr == NULL)
+               return DAT_INSUFFICIENT_RESOURCES;
+
+       cm_ptr->sp = sp_ptr;
+       cm_ptr->hca = ia_ptr->hca_ptr;
+
+       /* bind, listen, set sockopt, accept, exchange data */
+       if ((cm_ptr->socket =
+            socket(AF_INET, SOCK_STREAM, 0)) == DAPL_INVALID_SOCKET) {
+               dapl_log(DAPL_DBG_TYPE_ERR, " ERR: listen socket create: %s\n",
+                        strerror(errno));
+               dat_status = DAT_INSUFFICIENT_RESOURCES;
+               goto bail;
+       }
+
+       setsockopt(cm_ptr->socket, SOL_SOCKET, SO_REUSEADDR,
+                  (char *)&opt, sizeof(opt));
+       addr.sin_port = htons(serviceID);
+       addr.sin_family = AF_INET;
+       addr.sin_addr.s_addr = INADDR_ANY;
+
+       if ((bind(cm_ptr->socket, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+           || (listen(cm_ptr->socket, 128) < 0)) {
+               dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                            " listen: ERROR %s on conn_qual 0x%x\n",
+                            strerror(errno), serviceID);
+               if (errno == EADDRINUSE)
+                       dat_status = DAT_CONN_QUAL_IN_USE;
+               else
+                       dat_status = DAT_CONN_QUAL_UNAVAILABLE;
+               goto bail;
+       }
+
+       /* set cm_handle for this service point, save listen socket */
+       sp_ptr->cm_srvc_handle = cm_ptr;
+
+       /* queue up listen socket to process inbound CR's */
+       cm_ptr->state = DCM_LISTEN;
+       dapli_cm_queue(cm_ptr);
+
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                    " listen: qual 0x%x cr %p s_fd %d\n",
+                    ntohs(serviceID), cm_ptr, cm_ptr->socket);
+
+       return dat_status;
+      bail:
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                    " listen: ERROR on conn_qual 0x%x\n", serviceID);
+       dapls_ib_cm_free(cm_ptr, cm_ptr->ep);
+       return dat_status;
+}
+
+/*
+ * PASSIVE: accept socket 
+ */
+static void dapli_socket_accept(ib_cm_srvc_handle_t cm_ptr)
+{
+       dp_ib_cm_handle_t acm_ptr;
+       int len;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_EP, " socket_accept\n");
+       
+       /* 
+        * Accept all CR's on this port to avoid half-connection (SYN_RCV)
+        * stalls with many to one connection storms
+        */
+       do {
+               /* Allocate accept CM and initialize */
+               if ((acm_ptr = dapls_ib_cm_create(NULL)) == NULL)
+                       return;
+
+               acm_ptr->sp = cm_ptr->sp;
+               acm_ptr->hca = cm_ptr->hca;
+
+               len = sizeof(acm_ptr->dst.ia_address);
+               acm_ptr->socket = accept(cm_ptr->socket,
+                                       (struct sockaddr *)
+                                       &acm_ptr->dst.ia_address,
+                                       (socklen_t *) & len);
+               if (acm_ptr->socket == DAPL_INVALID_SOCKET) {
+                       dapl_log(DAPL_DBG_TYPE_ERR,
+                               " accept: ERR %s on FD %d l_cr %p\n",
+                               strerror(errno), cm_ptr->socket, cm_ptr);
+                       dapls_ib_cm_free(acm_ptr, acm_ptr->ep);
+                       return;
+               }
+
+               acm_ptr->state = DCM_ACCEPTING;
+               dapli_cm_queue(acm_ptr);
+       
+       } while (dapl_poll(cm_ptr->socket, DAPL_FD_READ) == DAPL_FD_READ);
+}
+
+/*
+ * PASSIVE: receive peer QP information, private data, post cr_event 
+ */
+static void dapli_socket_accept_data(ib_cm_srvc_handle_t acm_ptr)
+{
+       int len;
+       void *p_data = NULL;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_EP, " socket accepted, read QP data\n");
+
+       /* read in DST QP info, IA address. check for private data */
+       len =
+           recv(acm_ptr->socket, (char *)&acm_ptr->dst, sizeof(ib_qp_cm_t), 0);
+       if (len != sizeof(ib_qp_cm_t) || ntohs(acm_ptr->dst.ver) != DCM_VER) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " accept read: ERR %s, rcnt=%d, ver=%d\n",
+                        strerror(errno), len, ntohs(acm_ptr->dst.ver));
+               goto bail;
+       }
+
+       /* convert accepted values to host order */
+       acm_ptr->dst.port = ntohs(acm_ptr->dst.port);
+       acm_ptr->dst.lid = ntohs(acm_ptr->dst.lid);
+       acm_ptr->dst.qpn = ntohl(acm_ptr->dst.qpn);
+#ifdef DAT_EXTENSIONS
+       acm_ptr->dst.qp_type = ntohs(acm_ptr->dst.qp_type);
+#endif
+       acm_ptr->dst.p_size = ntohl(acm_ptr->dst.p_size);
+
+       dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                    " accept: DST %s port=0x%x lid=0x%x, qpn=0x%x, psize=%d\n",
+                    inet_ntoa(((struct sockaddr_in *)&acm_ptr->dst.
+                               ia_address)->sin_addr), acm_ptr->dst.port,
+                    acm_ptr->dst.lid, acm_ptr->dst.qpn, acm_ptr->dst.p_size);
+
+       /* validate private data size before reading */
+       if (acm_ptr->dst.p_size > IB_MAX_REQ_PDATA_SIZE) {
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+                            " accept read: psize (%d) wrong\n",
+                            acm_ptr->dst.p_size);
+               goto bail;
+       }
+
+       dapl_dbg_log(DAPL_DBG_TYPE_EP, " socket accepted, read private data\n");
+
+       /* read private data into cm_handle if any present */
+       if (acm_ptr->dst.p_size) {
+               len =
+                   recv(acm_ptr->socket, acm_ptr->p_data, acm_ptr->dst.p_size,
+                        0);
+               if (len != acm_ptr->dst.p_size) {
+                       dapl_log(DAPL_DBG_TYPE_ERR,
+                                " accept read pdata: ERR %s, rcnt=%d\n",
+                                strerror(errno), len);
+                       goto bail;
+               }
+               dapl_dbg_log(DAPL_DBG_TYPE_EP, " accept: psize=%d read\n", len);
+               p_data = acm_ptr->p_data;
+       }
+
+       acm_ptr->state = DCM_ACCEPTING_DATA;
+
+#ifdef DAT_EXTENSIONS
+       if (acm_ptr->dst.qp_type == IBV_QPT_UD) {
+               DAT_IB_EXTENSION_EVENT_DATA xevent;
+
+               /* post EVENT, modify_qp created ah */
+               xevent.status = 0;
+               xevent.type = DAT_IB_UD_CONNECT_REQUEST;
+
+               dapls_evd_post_cr_event_ext(acm_ptr->sp,
+                                           DAT_IB_UD_CONNECTION_REQUEST_EVENT,
+                                           acm_ptr,
+                                           (DAT_COUNT) acm_ptr->dst.p_size,
+                                           (DAT_PVOID *) acm_ptr->p_data,
+                                           (DAT_PVOID *) & xevent);
+       } else
+#endif
+               /* trigger CR event and return SUCCESS */
+               dapls_cr_callback(acm_ptr,
+                                 IB_CME_CONNECTION_REQUEST_PENDING,
+                                 p_data, acm_ptr->sp);
+       return;
+      bail:
+       /* close socket, free cm structure, active will see socket close as reject */
+       dapls_ib_cm_free(acm_ptr, acm_ptr->ep);
+       return;
+}
+
+/*
+ * PASSIVE: consumer accept, send local QP information, private data, 
+ * queue on work thread to receive RTU information to avoid blocking
+ * user thread. 
+ */
+DAT_RETURN
+dapli_socket_accept_usr(DAPL_EP * ep_ptr,
+                       DAPL_CR * cr_ptr, DAT_COUNT p_size, DAT_PVOID p_data)
+{
+       DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;
+       dp_ib_cm_handle_t cm_ptr = cr_ptr->ib_cm_handle;
+       ib_qp_cm_t local;
+       struct iovec iov[2];
+       int len;
+
+       if (p_size > IB_MAX_REP_PDATA_SIZE)
+               return DAT_LENGTH_ERROR;
+
+       /* must have a accepted socket */
+       if (cm_ptr->socket == DAPL_INVALID_SOCKET)
+               return DAT_INTERNAL_ERROR;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                    " ACCEPT_USR: remote port=0x%x lid=0x%x"
+                    " qpn=0x%x qp_type %d, psize=%d\n",
+                    cm_ptr->dst.port, cm_ptr->dst.lid,
+                    cm_ptr->dst.qpn, cm_ptr->dst.qp_type, cm_ptr->dst.p_size);
+
+#ifdef DAT_EXTENSIONS
+       if (cm_ptr->dst.qp_type == IBV_QPT_UD &&
+           ep_ptr->qp_handle->qp_type != IBV_QPT_UD) {
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+                            " ACCEPT_USR: ERR remote QP is UD,"
+                            ", but local QP is not\n");
+               return (DAT_INVALID_HANDLE | DAT_INVALID_HANDLE_EP);
+       }
+#endif
+
+       /* modify QP to RTR and then to RTS with remote info already read */
+       dapl_os_lock(&ep_ptr->header.lock);
+       if (dapls_modify_qp_state(ep_ptr->qp_handle,
+                                 IBV_QPS_RTR, cm_ptr) != DAT_SUCCESS) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " ACCEPT_USR: QPS_RTR ERR %s -> %s\n",
+                        strerror(errno), inet_ntoa(((struct sockaddr_in *)
+                                                    &cm_ptr->dst.ia_address)->
+                                                   sin_addr));
+               dapl_os_unlock(&ep_ptr->header.lock);
+               goto bail;
+       }
+       if (dapls_modify_qp_state(ep_ptr->qp_handle,
+                                 IBV_QPS_RTS, cm_ptr) != DAT_SUCCESS) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " ACCEPT_USR: QPS_RTS ERR %s -> %s\n",
+                        strerror(errno), inet_ntoa(((struct sockaddr_in *)
+                                                    &cm_ptr->dst.ia_address)->
+                                                   sin_addr));
+               dapl_os_unlock(&ep_ptr->header.lock);
+               goto bail;
+       }
+       dapl_os_unlock(&ep_ptr->header.lock);
+
+       /* save remote address information */
+       dapl_os_memcpy(&ep_ptr->remote_ia_address,
+                      &cm_ptr->dst.ia_address,
+                      sizeof(ep_ptr->remote_ia_address));
+
+       /* send our QP info, IA address, pdata. Don't overwrite dst data */
+       local.ver = htons(DCM_VER);
+       local.rej = 0;
+       local.qpn = htonl(ep_ptr->qp_handle->qp_num);
+       local.qp_type = htons(ep_ptr->qp_handle->qp_type);
+       local.port = htons(ia_ptr->hca_ptr->port_num);
+       local.lid = ia_ptr->hca_ptr->ib_trans.lid;
+       local.gid = ia_ptr->hca_ptr->ib_trans.gid;
+       local.ia_address = ia_ptr->hca_ptr->hca_address;
+       ((struct sockaddr_in *)&local.ia_address)->sin_port = 
+               ntohs(cm_ptr->sp->conn_qual);
+
+       local.p_size = htonl(p_size);
+       iov[0].iov_base = (void *)&local;
+       iov[0].iov_len = sizeof(ib_qp_cm_t);
+       if (p_size) {
+               iov[1].iov_base = p_data;
+               iov[1].iov_len = p_size;
+               len = writev(cm_ptr->socket, iov, 2);
+       } else {
+               len = writev(cm_ptr->socket, iov, 1);
+       }
+
+       if (len != (p_size + sizeof(ib_qp_cm_t))) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " ACCEPT_USR: ERR %s, wcnt=%d -> %s\n",
+                        strerror(errno), len, inet_ntoa(((struct sockaddr_in *)
+                                                         &cm_ptr->dst.
+                                                         ia_address)->
+                                                        sin_addr));
+               goto bail;
+       }
+
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                    " ACCEPT_USR: local port=0x%x lid=0x%x"
+                    " qpn=0x%x psize=%d\n",
+                    ntohs(local.port), ntohs(local.lid),
+                    ntohl(local.qpn), ntohl(local.p_size));
+       dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                    " ACCEPT_USR SRC GID subnet %016llx id %016llx\n",
+                    (unsigned long long)
+                    htonll(local.gid.global.subnet_prefix),
+                    (unsigned long long)
+                    htonll(local.gid.global.interface_id));
+
+       /* save state and reference to EP, queue for RTU data */
+       cm_ptr->ep = ep_ptr;
+       cm_ptr->hca = ia_ptr->hca_ptr;
+       cm_ptr->state = DCM_ACCEPTED;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_EP, " PASSIVE: accepted!\n");
+       return DAT_SUCCESS;
+      bail:
+       dapls_ib_cm_free(cm_ptr, cm_ptr->ep);
+       dapls_ib_reinit_ep(ep_ptr);     /* reset QP state */
+       return DAT_INTERNAL_ERROR;
+}
+
+/*
+ * PASSIVE: read RTU from active peer, post CONN event
+ */
+void dapli_socket_accept_rtu(dp_ib_cm_handle_t cm_ptr)
+{
+       int len;
+       short rtu_data = 0;
+
+       /* complete handshake after final QP state change */
+       len = recv(cm_ptr->socket, (char *)&rtu_data, sizeof(rtu_data), 0);
+       if (len != sizeof(rtu_data) || ntohs(rtu_data) != 0x0e0f) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " ACCEPT_RTU: ERR %s, rcnt=%d rdata=%x\n",
+                        strerror(errno), len, ntohs(rtu_data),
+                        inet_ntoa(((struct sockaddr_in *)
+                                   &cm_ptr->dst.ia_address)->sin_addr));
+               goto bail;
+       }
+
+       /* save state and reference to EP, queue for disc event */
+       cm_ptr->state = DCM_CONNECTED;
+
+       /* final data exchange if remote QP state is good to go */
+       dapl_dbg_log(DAPL_DBG_TYPE_EP, " PASSIVE: connected!\n");
+
+#ifdef DAT_EXTENSIONS
+       if (cm_ptr->dst.qp_type == IBV_QPT_UD) {
+               DAT_IB_EXTENSION_EVENT_DATA xevent;
+
+               /* post EVENT, modify_qp created ah */
+               xevent.status = 0;
+               xevent.type = DAT_IB_UD_PASSIVE_REMOTE_AH;
+               xevent.remote_ah.ah = cm_ptr->ah;
+               xevent.remote_ah.qpn = cm_ptr->dst.qpn;
+               dapl_os_memcpy(&xevent.remote_ah.ia_addr,
+                              &cm_ptr->dst.ia_address,
+                              sizeof(cm_ptr->dst.ia_address));
+
+               dapls_evd_post_connection_event_ext((DAPL_EVD *) cm_ptr->ep->
+                                                   param.connect_evd_handle,
+                                                   DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED,
+                                                   (DAT_EP_HANDLE) cm_ptr->ep,
+                                                   (DAT_COUNT) cm_ptr->dst.p_size,
+                                                   (DAT_PVOID *) cm_ptr->p_data,
+                                                   (DAT_PVOID *) &xevent);
+
+                /* done with socket, don't destroy cm_ptr, need pdata */
+                closesocket(cm_ptr->socket);
+                cm_ptr->socket = DAPL_INVALID_SOCKET;
+               cm_ptr->state = DCM_RELEASED;
+       } else {
+#endif
+               cm_ptr->ep->cm_handle = cm_ptr; /* only RC, multi CR's on UD */
+               dapls_cr_callback(cm_ptr, IB_CME_CONNECTED, NULL, cm_ptr->sp);
+       }
+       return;
+      
+bail:
+       dapls_ib_reinit_ep(cm_ptr->ep); /* reset QP state */
+       dapls_ib_cm_free(cm_ptr, cm_ptr->ep);
+       dapls_cr_callback(cm_ptr, IB_CME_DESTINATION_REJECT, NULL, cm_ptr->sp);
+}
+
+/*
+ * dapls_ib_connect
+ *
+ * Initiate a connection with the passive listener on another node
+ *
+ * Input:
+ *     ep_handle,
+ *     remote_ia_address,
+ *     remote_conn_qual,
+ *     prd_size                size of private data and structure
+ *     prd_prt                 pointer to private data structure
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INSUFFICIENT_RESOURCES
+ *     DAT_INVALID_PARAMETER
+ *
+ */
+DAT_RETURN
+dapls_ib_connect(IN DAT_EP_HANDLE ep_handle,
+                IN DAT_IA_ADDRESS_PTR remote_ia_address,
+                IN DAT_CONN_QUAL remote_conn_qual,
+                IN DAT_COUNT private_data_size, IN void *private_data)
+{
+       DAPL_EP *ep_ptr;
+       ib_qp_handle_t qp_ptr;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                    " connect(ep_handle %p ....)\n", ep_handle);
+
+       ep_ptr = (DAPL_EP *) ep_handle;
+       qp_ptr = ep_ptr->qp_handle;
+
+       return (dapli_socket_connect(ep_ptr, remote_ia_address,
+                                    remote_conn_qual,
+                                    private_data_size, private_data));
+}
+
+/*
+ * dapls_ib_disconnect
+ *
+ * Disconnect an EP
+ *
+ * Input:
+ *     ep_handle,
+ *     disconnect_flags
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ */
+DAT_RETURN
+dapls_ib_disconnect(IN DAPL_EP * ep_ptr, IN DAT_CLOSE_FLAGS close_flags)
+{
+       dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                    "dapls_ib_disconnect(ep_handle %p ....)\n", ep_ptr);
+
+       /* reinit to modify QP state */
+       dapls_ib_reinit_ep(ep_ptr);
+
+       if (ep_ptr->cm_handle == NULL ||
+           ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECTED)
+               return DAT_SUCCESS;
+       else
+               return (dapli_socket_disconnect(ep_ptr->cm_handle));
+}
+
+/*
+ * dapls_ib_disconnect_clean
+ *
+ * Clean up outstanding connection data. This routine is invoked
+ * after the final disconnect callback has occurred. Only on the
+ * ACTIVE side of a connection. It is also called if dat_ep_connect
+ * times out using the consumer supplied timeout value.
+ *
+ * Input:
+ *     ep_ptr          DAPL_EP
+ *     active          Indicates active side of connection
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     void
+ *
+ */
+void
+dapls_ib_disconnect_clean(IN DAPL_EP * ep_ptr,
+                         IN DAT_BOOLEAN active,
+                         IN const ib_cm_events_t ib_cm_event)
+{
+       /* NOTE: SCM will only initialize cm_handle with RC type
+        * 
+        * For UD there can many in-flight CR's so you 
+        * cannot cleanup timed out CR's with EP reference 
+        * alone since they share the same EP. The common
+        * code that handles connection timeout logic needs 
+        * updated for UD support.
+        */
+       if (ep_ptr->cm_handle)
+               dapls_ib_cm_free(ep_ptr->cm_handle, ep_ptr);
+
+       return;
+}
+
+/*
+ * dapl_ib_setup_conn_listener
+ *
+ * Have the CM set up a connection listener.
+ *
+ * Input:
+ *     ibm_hca_handle          HCA handle
+ *     qp_handle                       QP handle
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INSUFFICIENT_RESOURCES
+ *     DAT_INTERNAL_ERROR
+ *     DAT_CONN_QUAL_UNAVAILBLE
+ *     DAT_CONN_QUAL_IN_USE
+ *
+ */
+DAT_RETURN
+dapls_ib_setup_conn_listener(IN DAPL_IA * ia_ptr,
+                            IN DAT_UINT64 ServiceID, IN DAPL_SP * sp_ptr)
+{
+       return (dapli_socket_listen(ia_ptr, ServiceID, sp_ptr));
+}
+
+/*
+ * dapl_ib_remove_conn_listener
+ *
+ * Have the CM remove a connection listener.
+ *
+ * Input:
+ *     ia_handle               IA handle
+ *     ServiceID               IB Channel Service ID
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INVALID_STATE
+ *
+ */
+DAT_RETURN
+dapls_ib_remove_conn_listener(IN DAPL_IA * ia_ptr, IN DAPL_SP * sp_ptr)
+{
+       ib_cm_srvc_handle_t cm_ptr = sp_ptr->cm_srvc_handle;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                    "dapls_ib_remove_conn_listener(ia_ptr %p sp_ptr %p cm_ptr %p)\n",
+                    ia_ptr, sp_ptr, cm_ptr);
+
+       /* close accepted socket, free cm_srvc_handle and return */
+       if (cm_ptr != NULL) {
+               if (cm_ptr->socket != DAPL_INVALID_SOCKET) {
+                       shutdown(cm_ptr->socket, SHUT_RDWR);
+                       closesocket(cm_ptr->socket);
+                       cm_ptr->socket = DAPL_INVALID_SOCKET;
+               }
+               /* cr_thread will free */
+               cm_ptr->state = DCM_DESTROY;
+               sp_ptr->cm_srvc_handle = NULL;
+               if (send(cm_ptr->hca->ib_trans.scm[1], 
+                        "w", sizeof "w", 0) == -1)
+                       dapl_log(DAPL_DBG_TYPE_CM,
+                                " cm_destroy: thread wakeup error = %s\n",
+                                strerror(errno));
+       }
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_accept_connection
+ *
+ * Perform necessary steps to accept a connection
+ *
+ * Input:
+ *     cr_handle
+ *     ep_handle
+ *     private_data_size
+ *     private_data
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INSUFFICIENT_RESOURCES
+ *     DAT_INTERNAL_ERROR
+ *
+ */
+DAT_RETURN
+dapls_ib_accept_connection(IN DAT_CR_HANDLE cr_handle,
+                          IN DAT_EP_HANDLE ep_handle,
+                          IN DAT_COUNT p_size, IN const DAT_PVOID p_data)
+{
+       DAPL_CR *cr_ptr;
+       DAPL_EP *ep_ptr;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                    "dapls_ib_accept_connection(cr %p ep %p prd %p,%d)\n",
+                    cr_handle, ep_handle, p_data, p_size);
+
+       cr_ptr = (DAPL_CR *) cr_handle;
+       ep_ptr = (DAPL_EP *) ep_handle;
+
+       /* allocate and attach a QP if necessary */
+       if (ep_ptr->qp_state == DAPL_QP_STATE_UNATTACHED) {
+               DAT_RETURN status;
+               status = dapls_ib_qp_alloc(ep_ptr->header.owner_ia,
+                                          ep_ptr, ep_ptr);
+               if (status != DAT_SUCCESS)
+                       return status;
+       }
+       return (dapli_socket_accept_usr(ep_ptr, cr_ptr, p_size, p_data));
+}
+
+/*
+ * dapls_ib_reject_connection
+ *
+ * Reject a connection
+ *
+ * Input:
+ *     cr_handle
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INTERNAL_ERROR
+ *
+ */
+DAT_RETURN
+dapls_ib_reject_connection(IN dp_ib_cm_handle_t cm_ptr,
+                          IN int reason,
+                          IN DAT_COUNT psize, IN const DAT_PVOID pdata)
+{
+       struct iovec iov[2];
+
+       dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                    " reject(cm %p reason %x, pdata %p, psize %d)\n",
+                    cm_ptr, reason, pdata, psize);
+
+        if (psize > IB_MAX_REJ_PDATA_SIZE)
+                return DAT_LENGTH_ERROR;
+
+       /* write reject data to indicate reject */
+       if (cm_ptr->socket != DAPL_INVALID_SOCKET) {
+               cm_ptr->dst.rej = (uint16_t) reason;
+               cm_ptr->dst.rej = htons(cm_ptr->dst.rej);
+               cm_ptr->dst.p_size = htonl(psize);
+               /* get qp_type from request */
+               cm_ptr->dst.qp_type = ntohs(cm_ptr->dst.qp_type);
+
+               iov[0].iov_base = (void *)&cm_ptr->dst;
+               iov[0].iov_len = sizeof(ib_qp_cm_t);
+               if (psize) {
+                       iov[1].iov_base = pdata;
+                       iov[1].iov_len = psize;
+                       writev(cm_ptr->socket, iov, 2);
+               } else {
+                       writev(cm_ptr->socket, iov, 1);
+               }
+
+               shutdown(cm_ptr->socket, SHUT_RDWR);
+               closesocket(cm_ptr->socket);
+               cm_ptr->socket = DAPL_INVALID_SOCKET;
+       }
+
+       /* cr_thread will destroy CR */
+       cm_ptr->state = DCM_DESTROY;
+       if (send(cm_ptr->hca->ib_trans.scm[1], "w", sizeof "w", 0) == -1)
+               dapl_log(DAPL_DBG_TYPE_CM,
+                        " cm_destroy: thread wakeup error = %s\n",
+                        strerror(errno));
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_cm_remote_addr
+ *
+ * Obtain the remote IP address given a connection
+ *
+ * Input:
+ *     cr_handle
+ *
+ * Output:
+ *     remote_ia_address: where to place the remote address
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INVALID_HANDLE
+ *
+ */
+DAT_RETURN
+dapls_ib_cm_remote_addr(IN DAT_HANDLE dat_handle,
+                       OUT DAT_SOCK_ADDR6 * remote_ia_address)
+{
+       DAPL_HEADER *header;
+       dp_ib_cm_handle_t ib_cm_handle;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                    "dapls_ib_cm_remote_addr(dat_handle %p, ....)\n",
+                    dat_handle);
+
+       header = (DAPL_HEADER *) dat_handle;
+
+       if (header->magic == DAPL_MAGIC_EP)
+               ib_cm_handle = ((DAPL_EP *) dat_handle)->cm_handle;
+       else if (header->magic == DAPL_MAGIC_CR)
+               ib_cm_handle = ((DAPL_CR *) dat_handle)->ib_cm_handle;
+       else
+               return DAT_INVALID_HANDLE;
+
+       dapl_os_memcpy(remote_ia_address,
+                      &ib_cm_handle->dst.ia_address, sizeof(DAT_SOCK_ADDR6));
+
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_private_data_size
+ *
+ * Return the size of private data given a connection op type
+ *
+ * Input:
+ *     prd_ptr         private data pointer
+ *     conn_op         connection operation type
+ *
+ * If prd_ptr is NULL, this is a query for the max size supported by
+ * the provider, otherwise it is the actual size of the private data
+ * contained in prd_ptr.
+ *
+ *
+ * Output:
+ *     None
+ *
+ * Returns:
+ *     length of private data
+ *
+ */
+int dapls_ib_private_data_size(IN DAPL_PRIVATE * prd_ptr,
+                              IN DAPL_PDATA_OP conn_op, IN DAPL_HCA * hca_ptr)
+{
+       int size;
+
+       switch (conn_op) {
+       case DAPL_PDATA_CONN_REQ:
+               {
+                       size = IB_MAX_REQ_PDATA_SIZE;
+                       break;
+               }
+       case DAPL_PDATA_CONN_REP:
+               {
+                       size = IB_MAX_REP_PDATA_SIZE;
+                       break;
+               }
+       case DAPL_PDATA_CONN_REJ:
+               {
+                       size = IB_MAX_REJ_PDATA_SIZE;
+                       break;
+               }
+       case DAPL_PDATA_CONN_DREQ:
+               {
+                       size = IB_MAX_DREQ_PDATA_SIZE;
+                       break;
+               }
+       case DAPL_PDATA_CONN_DREP:
+               {
+                       size = IB_MAX_DREP_PDATA_SIZE;
+                       break;
+               }
+       default:
+               {
+                       size = 0;
+               }
+
+       }                       /* end case */
+
+       return size;
+}
+
+/*
+ * Map all socket CM event codes to the DAT equivelent.
+ */
+#define DAPL_IB_EVENT_CNT      10
+
+static struct ib_cm_event_map {
+       const ib_cm_events_t ib_cm_event;
+       DAT_EVENT_NUMBER dat_event_num;
+} ib_cm_event_map[DAPL_IB_EVENT_CNT] = {
+/* 00 */ {IB_CME_CONNECTED, 
+         DAT_CONNECTION_EVENT_ESTABLISHED},
+/* 01 */ {IB_CME_DISCONNECTED, 
+         DAT_CONNECTION_EVENT_DISCONNECTED},
+/* 02 */ {IB_CME_DISCONNECTED_ON_LINK_DOWN,
+         DAT_CONNECTION_EVENT_DISCONNECTED},
+/* 03 */ {IB_CME_CONNECTION_REQUEST_PENDING, 
+         DAT_CONNECTION_REQUEST_EVENT},
+/* 04 */ {IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,
+         DAT_CONNECTION_REQUEST_EVENT},
+/* 05 */ {IB_CME_DESTINATION_REJECT,
+         DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
+/* 06 */ {IB_CME_DESTINATION_REJECT_PRIVATE_DATA,
+         DAT_CONNECTION_EVENT_PEER_REJECTED},
+/* 07 */ {IB_CME_DESTINATION_UNREACHABLE, 
+         DAT_CONNECTION_EVENT_UNREACHABLE},
+/* 08 */ {IB_CME_TOO_MANY_CONNECTION_REQUESTS,
+         DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
+/* 09 */ {IB_CME_LOCAL_FAILURE, 
+         DAT_CONNECTION_EVENT_BROKEN}
+};
+
+/*
+ * dapls_ib_get_cm_event
+ *
+ * Return a DAT connection event given a provider CM event.
+ *
+ * Input:
+ *     dat_event_num   DAT event we need an equivelent CM event for
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     ib_cm_event of translated DAPL value
+ */
+DAT_EVENT_NUMBER
+dapls_ib_get_dat_event(IN const ib_cm_events_t ib_cm_event,
+                      IN DAT_BOOLEAN active)
+{
+       DAT_EVENT_NUMBER dat_event_num;
+       int i;
+
+       active = active;
+
+       if (ib_cm_event > IB_CME_LOCAL_FAILURE)
+               return (DAT_EVENT_NUMBER) 0;
+
+       dat_event_num = 0;
+       for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
+               if (ib_cm_event == ib_cm_event_map[i].ib_cm_event) {
+                       dat_event_num = ib_cm_event_map[i].dat_event_num;
+                       break;
+               }
+       }
+       dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
+                    "dapls_ib_get_dat_event: event translate(%s) ib=0x%x dat=0x%x\n",
+                    active ? "active" : "passive", ib_cm_event, dat_event_num);
+
+       return dat_event_num;
+}
+
+/*
+ * dapls_ib_get_dat_event
+ *
+ * Return a DAT connection event given a provider CM event.
+ * 
+ * Input:
+ *     ib_cm_event     event provided to the dapl callback routine
+ *     active          switch indicating active or passive connection
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_EVENT_NUMBER of translated provider value
+ */
+ib_cm_events_t dapls_ib_get_cm_event(IN DAT_EVENT_NUMBER dat_event_num)
+{
+       ib_cm_events_t ib_cm_event;
+       int i;
+
+       ib_cm_event = 0;
+       for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
+               if (dat_event_num == ib_cm_event_map[i].dat_event_num) {
+                       ib_cm_event = ib_cm_event_map[i].ib_cm_event;
+                       break;
+               }
+       }
+       return ib_cm_event;
+}
+
+/* outbound/inbound CR processing thread to avoid blocking applications */
+void cr_thread(void *arg)
+{
+       struct dapl_hca *hca_ptr = arg;
+       dp_ib_cm_handle_t cr, next_cr;
+       int opt, ret;
+       socklen_t opt_len;
+       char rbuf[2];
+       struct dapl_fd_set *set;
+       enum DAPL_FD_EVENTS event;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cr_thread: ENTER hca %p\n", hca_ptr);
+       set = dapl_alloc_fd_set();
+       if (!set)
+               goto out;
+
+       dapl_os_lock(&hca_ptr->ib_trans.lock);
+       hca_ptr->ib_trans.cr_state = IB_THREAD_RUN;
+
+       while (1) {
+               dapl_fd_zero(set);
+               dapl_fd_set(hca_ptr->ib_trans.scm[0], set, DAPL_FD_READ);
+
+               if (!dapl_llist_is_empty(&hca_ptr->ib_trans.list))
+                       next_cr = dapl_llist_peek_head(&hca_ptr->ib_trans.list);
+               else
+                       next_cr = NULL;
+
+               while (next_cr) {
+                       cr = next_cr;
+                       next_cr = dapl_llist_next_entry(&hca_ptr->ib_trans.list,
+                                                       (DAPL_LLIST_ENTRY *) &
+                                                       cr->entry);
+                       if (cr->state == DCM_DESTROY
+                           || hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) {
+                               dapl_llist_remove_entry(&hca_ptr->ib_trans.list,
+                                                       (DAPL_LLIST_ENTRY *) &
+                                                       cr->entry);
+                               dapl_dbg_log(DAPL_DBG_TYPE_CM, 
+                                            " CR FREE: %p ep=%p st=%d sock=%d\n", 
+                                            cr, cr->ep, cr->state, cr->socket);
+                               dapl_os_free(cr, sizeof(*cr));
+                               continue;
+                       }
+                       if (cr->socket == DAPL_INVALID_SOCKET) 
+                               continue;
+
+                       event = (cr->state == DCM_CONN_PENDING) ?
+                           DAPL_FD_WRITE : DAPL_FD_READ;
+                       if (dapl_fd_set(cr->socket, set, event)) {
+                               dapl_log(DAPL_DBG_TYPE_ERR,
+                                        " cr_thread: DESTROY CR st=%d fd %d"
+                                        " -> %s\n", cr->state, cr->socket,
+                                        inet_ntoa(((struct sockaddr_in *)
+                                                   &cr->dst.ia_address)->
+                                                  sin_addr));
+                               dapls_ib_cm_free(cr, cr->ep);
+                               continue;
+                       }
+
+                       dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                                    " poll cr=%p, socket=%d\n", cr,
+                                    cr->socket);
+                       dapl_os_unlock(&hca_ptr->ib_trans.lock);
+
+                       ret = dapl_poll(cr->socket, event);
+
+                       dapl_dbg_log(DAPL_DBG_TYPE_CM,
+                                    " poll ret=0x%x cr->state=%d socket=%d\n",
+                                    ret, cr->state, cr->socket);
+
+                       /* data on listen, qp exchange, and on disc req */
+                       if (ret == DAPL_FD_READ) {
+                               if (cr->socket != DAPL_INVALID_SOCKET) {
+                                       switch (cr->state) {
+                                       case DCM_LISTEN:
+                                               dapli_socket_accept(cr);
+                                               break;
+                                       case DCM_ACCEPTING:
+                                               dapli_socket_accept_data(cr);
+                                               break;
+                                       case DCM_ACCEPTED:
+                                               dapli_socket_accept_rtu(cr);
+                                               break;
+                                       case DCM_RTU_PENDING:
+                                               dapli_socket_connect_rtu(cr);
+                                               break;
+                                       case DCM_CONNECTED:
+                                               dapli_socket_disconnect(cr);
+                                               break;
+                                       default:
+                                               break;
+                                       }
+                               }
+                       /* connect socket is writable, check status */
+                       } else if (ret == DAPL_FD_WRITE ||
+                                  (cr->state == DCM_CONN_PENDING && 
+                                   ret == DAPL_FD_ERROR)) {
+                               opt = 0;
+                               opt_len = sizeof(opt);
+                               ret = getsockopt(cr->socket, SOL_SOCKET,
+                                                SO_ERROR, (char *)&opt,
+                                                &opt_len);
+                               if (!ret)
+                                       dapli_socket_connected(cr, opt);
+                               else
+                                       dapli_socket_connected(cr, errno);
+                        
+                       /* POLLUP, ERR, NVAL, or poll error - DISC */
+                       } else if (ret < 0 || ret == DAPL_FD_ERROR) {
+                               dapl_log(DAPL_DBG_TYPE_CM,
+                                    " poll=%d cr->st=%s sk=%d ep %p, %d\n",
+                                    ret, dapl_cm_state_str(cr->state), 
+                                    cr->socket, cr->ep,
+                                    cr->ep ? cr->ep->param.ep_state:0);
+                               dapli_socket_disconnect(cr);
+                       }
+                       dapl_os_lock(&hca_ptr->ib_trans.lock);
+               }
+
+               /* set to exit and all resources destroyed */
+               if ((hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) &&
+                   (dapl_llist_is_empty(&hca_ptr->ib_trans.list)))
+                       break;
+
+               dapl_os_unlock(&hca_ptr->ib_trans.lock);
+               dapl_select(set);
+
+               /* if pipe used to wakeup, consume */
+               while (dapl_poll(hca_ptr->ib_trans.scm[0], 
+                                DAPL_FD_READ) == DAPL_FD_READ) {
+                       if (recv(hca_ptr->ib_trans.scm[0], rbuf, 2, 0) == -1)
+                               dapl_log(DAPL_DBG_TYPE_CM,
+                                        " cr_thread: read pipe error = %s\n",
+                                        strerror(errno));
+               }
+               dapl_os_lock(&hca_ptr->ib_trans.lock);
+               
+               /* set to exit and all resources destroyed */
+               if ((hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) &&
+                   (dapl_llist_is_empty(&hca_ptr->ib_trans.list)))
+                       break;
+       }
+
+       dapl_os_unlock(&hca_ptr->ib_trans.lock);
+       free(set);
+      out:
+       hca_ptr->ib_trans.cr_state = IB_THREAD_EXIT;
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cr_thread(hca %p) exit\n", hca_ptr);
+}
+
+
+#ifdef DAPL_COUNTERS
+/* Debug aid: List all Connections in process and state */
+void dapls_print_cm_list(IN DAPL_IA *ia_ptr)
+{
+       /* Print in process CR's for this IA, if debug type set */
+       int i = 0;
+       dp_ib_cm_handle_t cr, next_cr;
+
+       dapl_os_lock(&ia_ptr->hca_ptr->ib_trans.lock);
+       if (!dapl_llist_is_empty((DAPL_LLIST_HEAD*)
+                                &ia_ptr->hca_ptr->ib_trans.list))
+                                next_cr = dapl_llist_peek_head((DAPL_LLIST_HEAD*)
+                                &ia_ptr->hca_ptr->ib_trans.list);
+       else
+               next_cr = NULL;
+
+        printf("\n DAPL IA CONNECTIONS IN PROCESS:\n");
+       while (next_cr) {
+               cr = next_cr;
+               next_cr = dapl_llist_next_entry((DAPL_LLIST_HEAD*)
+                                &ia_ptr->hca_ptr->ib_trans.list,
+                               (DAPL_LLIST_ENTRY*)&cr->entry);
+
+               printf( "  CONN[%d]: sp %p ep %p sock %d %s %s %s %s %d\n",
+                       i, cr->sp, cr->ep, cr->socket,
+                       cr->dst.qp_type == IBV_QPT_RC ? "RC" : "UD",
+                       dapl_cm_state_str(cr->state),
+                       cr->sp ? "<-" : "->",
+                       cr->state == DCM_LISTEN ? 
+                       inet_ntoa(((struct sockaddr_in *)
+                               &ia_ptr->hca_ptr->hca_address)->sin_addr) :
+                       inet_ntoa(((struct sockaddr_in *)
+                               &cr->dst.ia_address)->sin_addr),
+                       cr->sp ? (int)cr->sp->conn_qual : 
+                       ntohs(((struct sockaddr_in *)
+                               &cr->dst.ia_address)->sin_port));
+               i++;
+       }
+       printf("\n");
+       dapl_os_unlock(&ia_ptr->hca_ptr->ib_trans.lock);
+}
+#endif
diff --git a/dapl/openib_scm/dapl_ib_cm.c b/dapl/openib_scm/dapl_ib_cm.c
deleted file mode 100644 (file)
index 90d6d27..0000000
+++ /dev/null
@@ -1,1786 +0,0 @@
-/*
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- *    copy of which is available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/***************************************************************************
- *
- *   Module:            uDAPL
- *
- *   Filename:          dapl_ib_cm.c
- *
- *   Author:            Arlin Davis
- *
- *   Created:           3/10/2005
- *
- *   Description: 
- *
- *   The uDAPL openib provider - connection management
- *
- ****************************************************************************
- *                Source Control System Information
- *
- *    $Id: $
- *
- *     Copyright (c) 2005 Intel Corporation.  All rights reserved.
- *
- **************************************************************************/
-
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-#include "dapl_evd_util.h"
-#include "dapl_cr_util.h"
-#include "dapl_name_service.h"
-#include "dapl_ib_util.h"
-#include "dapl_osd.h"
-
-#if defined(_WIN32) || defined(_WIN64)
-enum DAPL_FD_EVENTS {
-       DAPL_FD_READ = 0x1,
-       DAPL_FD_WRITE = 0x2,
-       DAPL_FD_ERROR = 0x4
-};
-
-static int dapl_config_socket(DAPL_SOCKET s)
-{
-       unsigned long nonblocking = 1;
-       return ioctlsocket(s, FIONBIO, &nonblocking);
-}
-
-static int dapl_connect_socket(DAPL_SOCKET s, struct sockaddr *addr,
-                              int addrlen)
-{
-       int err;
-
-       err = connect(s, addr, addrlen);
-       if (err == SOCKET_ERROR)
-               err = WSAGetLastError();
-       return (err == WSAEWOULDBLOCK) ? EAGAIN : err;
-}
-
-struct dapl_fd_set {
-       struct fd_set set[3];
-};
-
-static struct dapl_fd_set *dapl_alloc_fd_set(void)
-{
-       return dapl_os_alloc(sizeof(struct dapl_fd_set));
-}
-
-static void dapl_fd_zero(struct dapl_fd_set *set)
-{
-       FD_ZERO(&set->set[0]);
-       FD_ZERO(&set->set[1]);
-       FD_ZERO(&set->set[2]);
-}
-
-static int dapl_fd_set(DAPL_SOCKET s, struct dapl_fd_set *set,
-                      enum DAPL_FD_EVENTS event)
-{
-       FD_SET(s, &set->set[(event == DAPL_FD_READ) ? 0 : 1]);
-       FD_SET(s, &set->set[2]);
-       return 0;
-}
-
-static enum DAPL_FD_EVENTS dapl_poll(DAPL_SOCKET s, enum DAPL_FD_EVENTS event)
-{
-       struct fd_set rw_fds;
-       struct fd_set err_fds;
-       struct timeval tv;
-       int ret;
-
-       FD_ZERO(&rw_fds);
-       FD_ZERO(&err_fds);
-       FD_SET(s, &rw_fds);
-       FD_SET(s, &err_fds);
-
-       tv.tv_sec = 0;
-       tv.tv_usec = 0;
-
-       if (event == DAPL_FD_READ)
-               ret = select(1, &rw_fds, NULL, &err_fds, &tv);
-       else
-               ret = select(1, NULL, &rw_fds, &err_fds, &tv);
-
-       if (ret == 0)
-               return 0;
-       else if (ret == SOCKET_ERROR)
-               return WSAGetLastError();
-       else if (FD_ISSET(s, &rw_fds))
-               return event;
-       else
-               return DAPL_FD_ERROR;
-}
-
-static int dapl_select(struct dapl_fd_set *set)
-{
-       int ret;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: sleep\n");
-       ret = select(0, &set->set[0], &set->set[1], &set->set[2], NULL);
-       dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: wakeup\n");
-
-       if (ret == SOCKET_ERROR)
-               dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                            " dapl_select: error 0x%x\n", WSAGetLastError());
-
-       return ret;
-}
-#else                          // _WIN32 || _WIN64
-enum DAPL_FD_EVENTS {
-       DAPL_FD_READ = POLLIN,
-       DAPL_FD_WRITE = POLLOUT,
-       DAPL_FD_ERROR = POLLERR
-};
-
-static int dapl_config_socket(DAPL_SOCKET s)
-{
-       int ret;
-
-       ret = fcntl(s, F_GETFL);
-       if (ret >= 0)
-               ret = fcntl(s, F_SETFL, ret | O_NONBLOCK);
-       return ret;
-}
-
-static int dapl_connect_socket(DAPL_SOCKET s, struct sockaddr *addr,
-                              int addrlen)
-{
-       int ret;
-
-       ret = connect(s, addr, addrlen);
-
-       return (errno == EINPROGRESS) ? EAGAIN : ret;
-}
-
-struct dapl_fd_set {
-       int index;
-       struct pollfd set[DAPL_FD_SETSIZE];
-};
-
-static struct dapl_fd_set *dapl_alloc_fd_set(void)
-{
-       return dapl_os_alloc(sizeof(struct dapl_fd_set));
-}
-
-static void dapl_fd_zero(struct dapl_fd_set *set)
-{
-       set->index = 0;
-}
-
-static int dapl_fd_set(DAPL_SOCKET s, struct dapl_fd_set *set,
-                      enum DAPL_FD_EVENTS event)
-{
-       if (set->index == DAPL_FD_SETSIZE - 1) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        "SCM ERR: cm_thread exceeded FD_SETSIZE %d\n",
-                        set->index + 1);
-               return -1;
-       }
-
-       set->set[set->index].fd = s;
-       set->set[set->index].revents = 0;
-       set->set[set->index++].events = event;
-       return 0;
-}
-
-static enum DAPL_FD_EVENTS dapl_poll(DAPL_SOCKET s, enum DAPL_FD_EVENTS event)
-{
-       struct pollfd fds;
-       int ret;
-
-       fds.fd = s;
-       fds.events = event;
-       fds.revents = 0;
-       ret = poll(&fds, 1, 0);
-       dapl_log(DAPL_DBG_TYPE_CM, " dapl_poll: fd=%d ret=%d, evnts=0x%x\n",
-                s, ret, fds.revents);
-       if (ret == 0)
-               return 0;
-       else if (fds.revents & (POLLERR | POLLHUP | POLLNVAL)) 
-               return DAPL_FD_ERROR;
-       else 
-               return fds.revents;
-}
-
-static int dapl_select(struct dapl_fd_set *set)
-{
-       int ret;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: sleep, fds=%d\n",
-                    set->index);
-       ret = poll(set->set, set->index, -1);
-       dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: wakeup, ret=0x%x\n", ret);
-       return ret;
-}
-#endif
-
-static struct ib_cm_handle *dapli_cm_create(void)
-{
-       struct ib_cm_handle *cm_ptr;
-
-       /* Allocate CM, init lock, and initialize */
-       if ((cm_ptr = dapl_os_alloc(sizeof(*cm_ptr))) == NULL)
-               return NULL;
-
-       (void)dapl_os_memzero(cm_ptr, sizeof(*cm_ptr));
-       if (dapl_os_lock_init(&cm_ptr->lock))
-               goto bail;
-
-       cm_ptr->dst.ver = htons(DSCM_VER);
-       cm_ptr->socket = DAPL_INVALID_SOCKET;
-       return cm_ptr;
-      bail:
-       dapl_os_free(cm_ptr, sizeof(*cm_ptr));
-       return NULL;
-}
-
-/* mark for destroy, remove all references, schedule cleanup */
-static void dapli_cm_destroy(struct ib_cm_handle *cm_ptr)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " cm_destroy: cm %p ep %p\n", cm_ptr, cm_ptr->ep);
-
-       /* cleanup, never made it to work queue */
-       if (cm_ptr->state == SCM_INIT) {
-               if (cm_ptr->socket != DAPL_INVALID_SOCKET) {
-                       shutdown(cm_ptr->socket, SHUT_RDWR);
-                       closesocket(cm_ptr->socket);
-               }
-               dapl_os_free(cm_ptr, sizeof(*cm_ptr));
-               return;
-       }
-
-       dapl_os_lock(&cm_ptr->lock);
-       cm_ptr->state = SCM_DESTROY;
-       if ((cm_ptr->ep) && (cm_ptr->ep->cm_handle == cm_ptr)) {
-               cm_ptr->ep->cm_handle = IB_INVALID_HANDLE;
-               cm_ptr->ep = NULL;
-       }
-
-       /* close socket if still active */
-       if (cm_ptr->socket != DAPL_INVALID_SOCKET) {
-               shutdown(cm_ptr->socket, SHUT_RDWR);
-               closesocket(cm_ptr->socket);
-               cm_ptr->socket = DAPL_INVALID_SOCKET;
-       }
-       dapl_os_unlock(&cm_ptr->lock);
-
-       /* wakeup work thread */
-       if (send(cm_ptr->hca->ib_trans.scm[1], "w", sizeof "w", 0) == -1)
-               dapl_log(DAPL_DBG_TYPE_CM,
-                        " cm_destroy: thread wakeup error = %s\n",
-                        strerror(errno));
-}
-
-/* queue socket for processing CM work */
-static void dapli_cm_queue(struct ib_cm_handle *cm_ptr)
-{
-       /* add to work queue for cr thread processing */
-       dapl_llist_init_entry((DAPL_LLIST_ENTRY *) & cm_ptr->entry);
-       dapl_os_lock(&cm_ptr->hca->ib_trans.lock);
-       dapl_llist_add_tail(&cm_ptr->hca->ib_trans.list,
-                           (DAPL_LLIST_ENTRY *) & cm_ptr->entry, cm_ptr);
-       dapl_os_unlock(&cm_ptr->hca->ib_trans.lock);
-
-       /* wakeup CM work thread */
-       if (send(cm_ptr->hca->ib_trans.scm[1], "w", sizeof "w", 0) == -1)
-               dapl_log(DAPL_DBG_TYPE_CM,
-                        " cm_queue: thread wakeup error = %s\n",
-                        strerror(errno));
-}
-
-/*
- * ACTIVE/PASSIVE: called from CR thread or consumer via ep_disconnect
- *                 or from ep_free
- */
-DAT_RETURN dapli_socket_disconnect(dp_ib_cm_handle_t cm_ptr)
-{
-       DAPL_EP *ep_ptr = cm_ptr->ep;
-       DAT_UINT32 disc_data = htonl(0xdead);
-
-       if (ep_ptr == NULL)
-               return DAT_SUCCESS;
-
-       dapl_os_lock(&cm_ptr->lock);
-       if ((cm_ptr->state == SCM_INIT) ||
-           (cm_ptr->state == SCM_DISCONNECTED) ||
-           (cm_ptr->state == SCM_DESTROY)) {
-               dapl_os_unlock(&cm_ptr->lock);
-               return DAT_SUCCESS;
-       } else {
-               /* send disc date, close socket, schedule destroy */
-               if (cm_ptr->socket != DAPL_INVALID_SOCKET) {
-                       if (send(cm_ptr->socket, (char *)&disc_data,
-                                sizeof(disc_data), 0) == -1)
-                               dapl_log(DAPL_DBG_TYPE_WARN,
-                                        " cm_disc: write error = %s\n",
-                                        strerror(errno));
-                       shutdown(cm_ptr->socket, SHUT_RDWR);
-                       closesocket(cm_ptr->socket);
-                       cm_ptr->socket = DAPL_INVALID_SOCKET;
-               }
-               cm_ptr->state = SCM_DISCONNECTED;
-       }
-       dapl_os_unlock(&cm_ptr->lock);
-
-       /* disconnect events for RC's only */
-       if (ep_ptr->param.ep_attr.service_type == DAT_SERVICE_TYPE_RC) {
-               if (ep_ptr->cr_ptr) {
-                       dapls_cr_callback(cm_ptr,
-                                         IB_CME_DISCONNECTED,
-                                         NULL,
-                                         ((DAPL_CR *) ep_ptr->cr_ptr)->sp_ptr);
-               } else {
-                       dapl_evd_connection_callback(ep_ptr->cm_handle,
-                                                    IB_CME_DISCONNECTED,
-                                                    NULL, ep_ptr);
-               }
-       }
-
-       /* scheduled destroy via disconnect clean in callback */
-       return DAT_SUCCESS;
-}
-
-/*
- * ACTIVE: socket connected, send QP information to peer 
- */
-static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err)
-{
-       int len, opt = 1;
-       struct iovec iov[2];
-       struct dapl_ep *ep_ptr = cm_ptr->ep;
-
-       if (err) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " CONN_PENDING: %s ERR %s -> %s %d\n",
-                        err == -1 ? "POLL" : "SOCKOPT",
-                        err == -1 ? strerror(errno) : strerror(err), 
-                        inet_ntoa(((struct sockaddr_in *)
-                                  ep_ptr->param.
-                                  remote_ia_address_ptr)->sin_addr), 
-                        ntohs(((struct sockaddr_in *)
-                               &cm_ptr->dst.ia_address)->sin_port));
-               goto bail;
-       }
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    " socket connected, write QP and private data\n");
-
-       /* no delay for small packets */
-       setsockopt(cm_ptr->socket, IPPROTO_TCP, TCP_NODELAY,
-                  (char *)&opt, sizeof(opt));
-
-       /* send qp info and pdata to remote peer */
-       iov[0].iov_base = (void *)&cm_ptr->dst;
-       iov[0].iov_len = sizeof(ib_qp_cm_t);
-       if (cm_ptr->dst.p_size) {
-               iov[1].iov_base = cm_ptr->p_data;
-               iov[1].iov_len = ntohl(cm_ptr->dst.p_size);
-               len = writev(cm_ptr->socket, iov, 2);
-       } else {
-               len = writev(cm_ptr->socket, iov, 1);
-       }
-
-       if (len != (ntohl(cm_ptr->dst.p_size) + sizeof(ib_qp_cm_t))) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " CONN_PENDING write: ERR %s, wcnt=%d -> %s\n",
-                        strerror(errno), len, inet_ntoa(((struct sockaddr_in *)
-                                                         ep_ptr->param.
-                                                         remote_ia_address_ptr)->
-                                                        sin_addr));
-               goto bail;
-       }
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " connected: sending SRC port=0x%x lid=0x%x,"
-                    " qpn=0x%x, psize=%d\n",
-                    ntohs(cm_ptr->dst.port), ntohs(cm_ptr->dst.lid),
-                    ntohl(cm_ptr->dst.qpn), ntohl(cm_ptr->dst.p_size));
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " connected: sending SRC GID subnet %016llx id %016llx\n",
-                    (unsigned long long)
-                    htonll(cm_ptr->dst.gid.global.subnet_prefix),
-                    (unsigned long long)
-                    htonll(cm_ptr->dst.gid.global.interface_id));
-
-       /* queue up to work thread to avoid blocking consumer */
-       cm_ptr->state = SCM_RTU_PENDING;
-       return;
-      bail:
-       /* close socket, free cm structure and post error event */
-       dapli_cm_destroy(cm_ptr);
-       dapl_evd_connection_callback(NULL, IB_CME_LOCAL_FAILURE, NULL, ep_ptr);
-}
-
-/*
- * ACTIVE: Create socket, connect, defer exchange QP information to CR thread
- * to avoid blocking. 
- */
-DAT_RETURN
-dapli_socket_connect(DAPL_EP * ep_ptr,
-                    DAT_IA_ADDRESS_PTR r_addr,
-                    DAT_CONN_QUAL r_qual, DAT_COUNT p_size, DAT_PVOID p_data)
-{
-       dp_ib_cm_handle_t cm_ptr;
-       int ret;
-       DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;
-       struct sockaddr_in addr;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect: r_qual %d p_size=%d\n",
-                    r_qual, p_size);
-
-       cm_ptr = dapli_cm_create();
-       if (cm_ptr == NULL)
-               return DAT_INSUFFICIENT_RESOURCES;
-
-       /* create, connect, sockopt, and exchange QP information */
-       if ((cm_ptr->socket =
-            socket(AF_INET, SOCK_STREAM, 0)) == DAPL_INVALID_SOCKET) {
-               dapl_os_free(cm_ptr, sizeof(*cm_ptr));
-               return DAT_INSUFFICIENT_RESOURCES;
-       }
-
-       ret = dapl_config_socket(cm_ptr->socket);
-       if (ret < 0) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " socket connect: config socket %d ERR %d %s\n",
-                        cm_ptr->socket, ret, strerror(errno));
-               goto bail;
-       }
-
-       dapl_os_memcpy(&addr, r_addr, sizeof(addr));
-       addr.sin_port = htons(r_qual);
-       ret = dapl_connect_socket(cm_ptr->socket, (struct sockaddr *)&addr,
-                                 sizeof(addr));
-       if (ret && ret != EAGAIN) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " socket connect ERROR: %s -> %s r_qual %d\n",
-                        strerror(errno),
-                        inet_ntoa(addr.sin_addr), (unsigned int)r_qual);
-               dapli_cm_destroy(cm_ptr);
-               return DAT_INVALID_ADDRESS;
-       }
-
-       /* Send QP info, IA address, and private data */
-       cm_ptr->dst.qpn = htonl(ep_ptr->qp_handle->qp_num);
-#ifdef DAT_EXTENSIONS
-       cm_ptr->dst.qp_type = htons(ep_ptr->qp_handle->qp_type);
-#endif
-       cm_ptr->dst.port = htons(ia_ptr->hca_ptr->port_num);
-       cm_ptr->dst.lid = ia_ptr->hca_ptr->ib_trans.lid;
-       cm_ptr->dst.gid = ia_ptr->hca_ptr->ib_trans.gid;
-
-       /* save references */
-       cm_ptr->hca = ia_ptr->hca_ptr;
-       cm_ptr->ep = ep_ptr;
-       cm_ptr->dst.ia_address = ia_ptr->hca_ptr->hca_address;
-       ((struct sockaddr_in *)
-               &cm_ptr->dst.ia_address)->sin_port = ntohs(r_qual);
-
-       if (p_size) {
-               cm_ptr->dst.p_size = htonl(p_size);
-               dapl_os_memcpy(cm_ptr->p_data, p_data, p_size);
-       }
-
-       /* connected or pending, either way results via async event */
-       if (ret == 0)
-               dapli_socket_connected(cm_ptr, 0);
-       else
-               cm_ptr->state = SCM_CONN_PENDING;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    " connect: socket %d to %s r_qual %d pending\n",
-                    cm_ptr->socket,
-                    inet_ntoa(addr.sin_addr), (unsigned int)r_qual);
-
-       dapli_cm_queue(cm_ptr);
-       return DAT_SUCCESS;
-      bail:
-       dapl_log(DAPL_DBG_TYPE_ERR,
-                " socket connect ERROR: %s query lid(0x%x)/gid"
-                " -> %s r_qual %d\n",
-                strerror(errno), ntohs(cm_ptr->dst.lid),
-                inet_ntoa(((struct sockaddr_in *)r_addr)->sin_addr),
-                (unsigned int)r_qual);
-
-       /* close socket, free cm structure */
-       dapli_cm_destroy(cm_ptr);
-       return DAT_INTERNAL_ERROR;
-}
-
-/*
- * ACTIVE: exchange QP information, called from CR thread
- */
-static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
-{
-       DAPL_EP *ep_ptr = cm_ptr->ep;
-       int len;
-       short rtu_data = htons(0x0E0F);
-       ib_cm_events_t event = IB_CME_DESTINATION_REJECT;
-
-       /* read DST information into cm_ptr, overwrite SRC info */
-       dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect_rtu: recv peer QP data\n");
-
-       len = recv(cm_ptr->socket, (char *)&cm_ptr->dst, sizeof(ib_qp_cm_t), 0);
-       if (len != sizeof(ib_qp_cm_t) || ntohs(cm_ptr->dst.ver) != DSCM_VER) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " CONN_RTU read: ERR %s, rcnt=%d, ver=%d -> %s\n",
-                        strerror(errno), len, cm_ptr->dst.ver,
-                        inet_ntoa(((struct sockaddr_in *)
-                                   ep_ptr->param.remote_ia_address_ptr)->
-                                  sin_addr));
-               goto bail;
-       }
-
-       /* convert peer response values to host order */
-       cm_ptr->dst.port = ntohs(cm_ptr->dst.port);
-       cm_ptr->dst.lid = ntohs(cm_ptr->dst.lid);
-       cm_ptr->dst.qpn = ntohl(cm_ptr->dst.qpn);
-#ifdef DAT_EXTENSIONS
-       cm_ptr->dst.qp_type = ntohs(cm_ptr->dst.qp_type);
-#endif
-       cm_ptr->dst.p_size = ntohl(cm_ptr->dst.p_size);
-
-       /* save remote address information */
-       dapl_os_memcpy(&ep_ptr->remote_ia_address,
-                      &cm_ptr->dst.ia_address,
-                      sizeof(ep_ptr->remote_ia_address));
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    " CONN_RTU: DST %s port=0x%x lid=0x%x,"
-                    " qpn=0x%x, qp_type=%d, psize=%d\n",
-                    inet_ntoa(((struct sockaddr_in *)
-                               &cm_ptr->dst.ia_address)->sin_addr),
-                    cm_ptr->dst.port, cm_ptr->dst.lid,
-                    cm_ptr->dst.qpn, cm_ptr->dst.qp_type, cm_ptr->dst.p_size);
-
-       /* validate private data size before reading */
-       if (cm_ptr->dst.p_size > IB_MAX_REP_PDATA_SIZE) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " CONN_RTU read: psize (%d) wrong -> %s\n",
-                        cm_ptr->dst.p_size, inet_ntoa(((struct sockaddr_in *)
-                                                       ep_ptr->param.
-                                                       remote_ia_address_ptr)->
-                                                      sin_addr));
-               goto bail;
-       }
-
-       /* read private data into cm_handle if any present */
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    " socket connected, read private data\n");
-       if (cm_ptr->dst.p_size) {
-               len =
-                   recv(cm_ptr->socket, cm_ptr->p_data, cm_ptr->dst.p_size, 0);
-               if (len != cm_ptr->dst.p_size) {
-                       dapl_log(DAPL_DBG_TYPE_ERR,
-                                " CONN_RTU read pdata: ERR %s, rcnt=%d -> %s\n",
-                                strerror(errno), len,
-                                inet_ntoa(((struct sockaddr_in *)
-                                           ep_ptr->param.
-                                           remote_ia_address_ptr)->sin_addr));
-                       goto bail;
-               }
-       }
-
-       /* check for consumer reject */
-       if (cm_ptr->dst.rej) {
-               dapl_log(DAPL_DBG_TYPE_CM,
-                        " CONN_RTU read: PEER REJ reason=0x%x -> %s\n",
-                        ntohs(cm_ptr->dst.rej),
-                        inet_ntoa(((struct sockaddr_in *)
-                                   ep_ptr->param.remote_ia_address_ptr)->
-                                  sin_addr));
-               event = IB_CME_DESTINATION_REJECT_PRIVATE_DATA;
-#ifdef DAT_EXTENSIONS
-               if (cm_ptr->dst.qp_type == IBV_QPT_UD) 
-                       goto ud_bail;
-               else
-#endif
-               goto bail;
-       }
-
-       /* modify QP to RTR and then to RTS with remote info */
-       dapl_os_lock(&ep_ptr->header.lock);
-       if (dapls_modify_qp_state(ep_ptr->qp_handle,
-                                 IBV_QPS_RTR, cm_ptr) != DAT_SUCCESS) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " CONN_RTU: QPS_RTR ERR %s -> %s\n",
-                        strerror(errno), inet_ntoa(((struct sockaddr_in *)
-                                                    ep_ptr->param.
-                                                    remote_ia_address_ptr)->
-                                                   sin_addr));
-               dapl_os_unlock(&ep_ptr->header.lock);
-               goto bail;
-       }
-       if (dapls_modify_qp_state(ep_ptr->qp_handle,
-                                 IBV_QPS_RTS, cm_ptr) != DAT_SUCCESS) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " CONN_RTU: QPS_RTS ERR %s -> %s\n",
-                        strerror(errno), inet_ntoa(((struct sockaddr_in *)
-                                                    ep_ptr->param.
-                                                    remote_ia_address_ptr)->
-                                                   sin_addr));
-               dapl_os_unlock(&ep_ptr->header.lock);
-               goto bail;
-       }
-       dapl_os_unlock(&ep_ptr->header.lock);
-       dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect_rtu: send RTU\n");
-
-       /* complete handshake after final QP state change */
-       if (send(cm_ptr->socket, (char *)&rtu_data, sizeof(rtu_data), 0) == -1) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " CONN_RTU: write error = %s\n", strerror(errno));
-               goto bail;
-       }
-       /* init cm_handle and post the event with private data */
-       cm_ptr->state = SCM_CONNECTED;
-       event = IB_CME_CONNECTED;
-       dapl_dbg_log(DAPL_DBG_TYPE_EP, " ACTIVE: connected!\n");
-
-#ifdef DAT_EXTENSIONS
-ud_bail:
-       if (cm_ptr->dst.qp_type == IBV_QPT_UD) {
-               DAT_IB_EXTENSION_EVENT_DATA xevent;
-
-               /* post EVENT, modify_qp created ah */
-               xevent.status = 0;
-               xevent.type = DAT_IB_UD_REMOTE_AH;
-               xevent.remote_ah.ah = cm_ptr->ah;
-               xevent.remote_ah.qpn = cm_ptr->dst.qpn;
-               dapl_os_memcpy(&xevent.remote_ah.ia_addr,
-                              &cm_ptr->dst.ia_address,
-                              sizeof(cm_ptr->dst.ia_address));
-
-               if (event == IB_CME_CONNECTED)
-                       event = DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED;
-               else
-                       event = DAT_IB_UD_CONNECTION_REJECT_EVENT;
-
-               dapls_evd_post_connection_event_ext((DAPL_EVD *) ep_ptr->param.
-                                                   connect_evd_handle,
-                                                   event,
-                                                   (DAT_EP_HANDLE) ep_ptr,
-                                                   (DAT_COUNT) cm_ptr->dst.p_size,
-                                                   (DAT_PVOID *) cm_ptr->p_data,
-                                                   (DAT_PVOID *) &xevent);
-
-               /* done with socket, don't destroy cm_ptr, need pdata */
-               closesocket(cm_ptr->socket);
-               cm_ptr->socket = DAPL_INVALID_SOCKET;
-               cm_ptr->state = SCM_RELEASED;
-       } else
-#endif
-       {
-               ep_ptr->cm_handle = cm_ptr; /* only RC, multi CR's on UD */
-               dapl_evd_connection_callback(cm_ptr,
-                                            IB_CME_CONNECTED,
-                                            cm_ptr->p_data, ep_ptr);
-       }
-       return;
-
-bail:
-       /* close socket, and post error event */
-       dapls_ib_reinit_ep(ep_ptr);     /* reset QP state */
-       closesocket(cm_ptr->socket);
-       cm_ptr->socket = DAPL_INVALID_SOCKET;
-       dapl_evd_connection_callback(NULL, event, cm_ptr->p_data, ep_ptr);
-}
-
-/*
- * PASSIVE: Create socket, listen, accept, exchange QP information 
- */
-DAT_RETURN
-dapli_socket_listen(DAPL_IA * ia_ptr, DAT_CONN_QUAL serviceID, DAPL_SP * sp_ptr)
-{
-       struct sockaddr_in addr;
-       ib_cm_srvc_handle_t cm_ptr = NULL;
-       int opt = 1;
-       DAT_RETURN dat_status = DAT_SUCCESS;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    " listen(ia_ptr %p ServiceID %d sp_ptr %p)\n",
-                    ia_ptr, serviceID, sp_ptr);
-
-       cm_ptr = dapli_cm_create();
-       if (cm_ptr == NULL)
-               return DAT_INSUFFICIENT_RESOURCES;
-
-       cm_ptr->sp = sp_ptr;
-       cm_ptr->hca = ia_ptr->hca_ptr;
-
-       /* bind, listen, set sockopt, accept, exchange data */
-       if ((cm_ptr->socket =
-            socket(AF_INET, SOCK_STREAM, 0)) == DAPL_INVALID_SOCKET) {
-               dapl_log(DAPL_DBG_TYPE_ERR, " ERR: listen socket create: %s\n",
-                        strerror(errno));
-               dat_status = DAT_INSUFFICIENT_RESOURCES;
-               goto bail;
-       }
-
-       setsockopt(cm_ptr->socket, SOL_SOCKET, SO_REUSEADDR,
-                  (char *)&opt, sizeof(opt));
-       addr.sin_port = htons(serviceID);
-       addr.sin_family = AF_INET;
-       addr.sin_addr.s_addr = INADDR_ANY;
-
-       if ((bind(cm_ptr->socket, (struct sockaddr *)&addr, sizeof(addr)) < 0)
-           || (listen(cm_ptr->socket, 128) < 0)) {
-               dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                            " listen: ERROR %s on conn_qual 0x%x\n",
-                            strerror(errno), serviceID);
-               if (errno == EADDRINUSE)
-                       dat_status = DAT_CONN_QUAL_IN_USE;
-               else
-                       dat_status = DAT_CONN_QUAL_UNAVAILABLE;
-               goto bail;
-       }
-
-       /* set cm_handle for this service point, save listen socket */
-       sp_ptr->cm_srvc_handle = cm_ptr;
-
-       /* queue up listen socket to process inbound CR's */
-       cm_ptr->state = SCM_LISTEN;
-       dapli_cm_queue(cm_ptr);
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " listen: qual 0x%x cr %p s_fd %d\n",
-                    ntohs(serviceID), cm_ptr, cm_ptr->socket);
-
-       return dat_status;
-      bail:
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " listen: ERROR on conn_qual 0x%x\n", serviceID);
-       dapli_cm_destroy(cm_ptr);
-       return dat_status;
-}
-
-/*
- * PASSIVE: accept socket 
- */
-static void dapli_socket_accept(ib_cm_srvc_handle_t cm_ptr)
-{
-       dp_ib_cm_handle_t acm_ptr;
-       int len;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP, " socket_accept\n");
-       
-       /* 
-        * Accept all CR's on this port to avoid half-connection (SYN_RCV)
-        * stalls with many to one connection storms
-        */
-       do {
-               /* Allocate accept CM and initialize */
-               if ((acm_ptr = dapli_cm_create()) == NULL)
-                       return;
-
-               acm_ptr->sp = cm_ptr->sp;
-               acm_ptr->hca = cm_ptr->hca;
-
-               len = sizeof(acm_ptr->dst.ia_address);
-               acm_ptr->socket = accept(cm_ptr->socket,
-                                       (struct sockaddr *)
-                                       &acm_ptr->dst.ia_address,
-                                       (socklen_t *) & len);
-               if (acm_ptr->socket == DAPL_INVALID_SOCKET) {
-                       dapl_log(DAPL_DBG_TYPE_ERR,
-                               " accept: ERR %s on FD %d l_cr %p\n",
-                               strerror(errno), cm_ptr->socket, cm_ptr);
-                       dapli_cm_destroy(acm_ptr);
-                       return;
-               }
-
-               acm_ptr->state = SCM_ACCEPTING;
-               dapli_cm_queue(acm_ptr);
-       
-       } while (dapl_poll(cm_ptr->socket, DAPL_FD_READ) == DAPL_FD_READ);
-}
-
-/*
- * PASSIVE: receive peer QP information, private data, post cr_event 
- */
-static void dapli_socket_accept_data(ib_cm_srvc_handle_t acm_ptr)
-{
-       int len;
-       void *p_data = NULL;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP, " socket accepted, read QP data\n");
-
-       /* read in DST QP info, IA address. check for private data */
-       len =
-           recv(acm_ptr->socket, (char *)&acm_ptr->dst, sizeof(ib_qp_cm_t), 0);
-       if (len != sizeof(ib_qp_cm_t) || ntohs(acm_ptr->dst.ver) != DSCM_VER) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " accept read: ERR %s, rcnt=%d, ver=%d\n",
-                        strerror(errno), len, ntohs(acm_ptr->dst.ver));
-               goto bail;
-       }
-
-       /* convert accepted values to host order */
-       acm_ptr->dst.port = ntohs(acm_ptr->dst.port);
-       acm_ptr->dst.lid = ntohs(acm_ptr->dst.lid);
-       acm_ptr->dst.qpn = ntohl(acm_ptr->dst.qpn);
-#ifdef DAT_EXTENSIONS
-       acm_ptr->dst.qp_type = ntohs(acm_ptr->dst.qp_type);
-#endif
-       acm_ptr->dst.p_size = ntohl(acm_ptr->dst.p_size);
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    " accept: DST %s port=0x%x lid=0x%x, qpn=0x%x, psize=%d\n",
-                    inet_ntoa(((struct sockaddr_in *)&acm_ptr->dst.
-                               ia_address)->sin_addr), acm_ptr->dst.port,
-                    acm_ptr->dst.lid, acm_ptr->dst.qpn, acm_ptr->dst.p_size);
-
-       /* validate private data size before reading */
-       if (acm_ptr->dst.p_size > IB_MAX_REQ_PDATA_SIZE) {
-               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                            " accept read: psize (%d) wrong\n",
-                            acm_ptr->dst.p_size);
-               goto bail;
-       }
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP, " socket accepted, read private data\n");
-
-       /* read private data into cm_handle if any present */
-       if (acm_ptr->dst.p_size) {
-               len =
-                   recv(acm_ptr->socket, acm_ptr->p_data, acm_ptr->dst.p_size,
-                        0);
-               if (len != acm_ptr->dst.p_size) {
-                       dapl_log(DAPL_DBG_TYPE_ERR,
-                                " accept read pdata: ERR %s, rcnt=%d\n",
-                                strerror(errno), len);
-                       goto bail;
-               }
-               dapl_dbg_log(DAPL_DBG_TYPE_EP, " accept: psize=%d read\n", len);
-               p_data = acm_ptr->p_data;
-       }
-
-       acm_ptr->state = SCM_ACCEPTING_DATA;
-
-#ifdef DAT_EXTENSIONS
-       if (acm_ptr->dst.qp_type == IBV_QPT_UD) {
-               DAT_IB_EXTENSION_EVENT_DATA xevent;
-
-               /* post EVENT, modify_qp created ah */
-               xevent.status = 0;
-               xevent.type = DAT_IB_UD_CONNECT_REQUEST;
-
-               dapls_evd_post_cr_event_ext(acm_ptr->sp,
-                                           DAT_IB_UD_CONNECTION_REQUEST_EVENT,
-                                           acm_ptr,
-                                           (DAT_COUNT) acm_ptr->dst.p_size,
-                                           (DAT_PVOID *) acm_ptr->p_data,
-                                           (DAT_PVOID *) & xevent);
-       } else
-#endif
-               /* trigger CR event and return SUCCESS */
-               dapls_cr_callback(acm_ptr,
-                                 IB_CME_CONNECTION_REQUEST_PENDING,
-                                 p_data, acm_ptr->sp);
-       return;
-      bail:
-       /* close socket, free cm structure, active will see socket close as reject */
-       dapli_cm_destroy(acm_ptr);
-       return;
-}
-
-/*
- * PASSIVE: consumer accept, send local QP information, private data, 
- * queue on work thread to receive RTU information to avoid blocking
- * user thread. 
- */
-DAT_RETURN
-dapli_socket_accept_usr(DAPL_EP * ep_ptr,
-                       DAPL_CR * cr_ptr, DAT_COUNT p_size, DAT_PVOID p_data)
-{
-       DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;
-       dp_ib_cm_handle_t cm_ptr = cr_ptr->ib_cm_handle;
-       ib_qp_cm_t local;
-       struct iovec iov[2];
-       int len;
-
-       if (p_size > IB_MAX_REP_PDATA_SIZE)
-               return DAT_LENGTH_ERROR;
-
-       /* must have a accepted socket */
-       if (cm_ptr->socket == DAPL_INVALID_SOCKET)
-               return DAT_INTERNAL_ERROR;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    " ACCEPT_USR: remote port=0x%x lid=0x%x"
-                    " qpn=0x%x qp_type %d, psize=%d\n",
-                    cm_ptr->dst.port, cm_ptr->dst.lid,
-                    cm_ptr->dst.qpn, cm_ptr->dst.qp_type, cm_ptr->dst.p_size);
-
-#ifdef DAT_EXTENSIONS
-       if (cm_ptr->dst.qp_type == IBV_QPT_UD &&
-           ep_ptr->qp_handle->qp_type != IBV_QPT_UD) {
-               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                            " ACCEPT_USR: ERR remote QP is UD,"
-                            ", but local QP is not\n");
-               return (DAT_INVALID_HANDLE | DAT_INVALID_HANDLE_EP);
-       }
-#endif
-
-       /* modify QP to RTR and then to RTS with remote info already read */
-       dapl_os_lock(&ep_ptr->header.lock);
-       if (dapls_modify_qp_state(ep_ptr->qp_handle,
-                                 IBV_QPS_RTR, cm_ptr) != DAT_SUCCESS) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " ACCEPT_USR: QPS_RTR ERR %s -> %s\n",
-                        strerror(errno), inet_ntoa(((struct sockaddr_in *)
-                                                    &cm_ptr->dst.ia_address)->
-                                                   sin_addr));
-               dapl_os_unlock(&ep_ptr->header.lock);
-               goto bail;
-       }
-       if (dapls_modify_qp_state(ep_ptr->qp_handle,
-                                 IBV_QPS_RTS, cm_ptr) != DAT_SUCCESS) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " ACCEPT_USR: QPS_RTS ERR %s -> %s\n",
-                        strerror(errno), inet_ntoa(((struct sockaddr_in *)
-                                                    &cm_ptr->dst.ia_address)->
-                                                   sin_addr));
-               dapl_os_unlock(&ep_ptr->header.lock);
-               goto bail;
-       }
-       dapl_os_unlock(&ep_ptr->header.lock);
-
-       /* save remote address information */
-       dapl_os_memcpy(&ep_ptr->remote_ia_address,
-                      &cm_ptr->dst.ia_address,
-                      sizeof(ep_ptr->remote_ia_address));
-
-       /* send our QP info, IA address, pdata. Don't overwrite dst data */
-       local.ver = htons(DSCM_VER);
-       local.rej = 0;
-       local.qpn = htonl(ep_ptr->qp_handle->qp_num);
-       local.qp_type = htons(ep_ptr->qp_handle->qp_type);
-       local.port = htons(ia_ptr->hca_ptr->port_num);
-       local.lid = ia_ptr->hca_ptr->ib_trans.lid;
-       local.gid = ia_ptr->hca_ptr->ib_trans.gid;
-       local.ia_address = ia_ptr->hca_ptr->hca_address;
-       ((struct sockaddr_in *)&local.ia_address)->sin_port = 
-               ntohs(cm_ptr->sp->conn_qual);
-
-       local.p_size = htonl(p_size);
-       iov[0].iov_base = (void *)&local;
-       iov[0].iov_len = sizeof(ib_qp_cm_t);
-       if (p_size) {
-               iov[1].iov_base = p_data;
-               iov[1].iov_len = p_size;
-               len = writev(cm_ptr->socket, iov, 2);
-       } else {
-               len = writev(cm_ptr->socket, iov, 1);
-       }
-
-       if (len != (p_size + sizeof(ib_qp_cm_t))) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " ACCEPT_USR: ERR %s, wcnt=%d -> %s\n",
-                        strerror(errno), len, inet_ntoa(((struct sockaddr_in *)
-                                                         &cm_ptr->dst.
-                                                         ia_address)->
-                                                        sin_addr));
-               goto bail;
-       }
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " ACCEPT_USR: local port=0x%x lid=0x%x"
-                    " qpn=0x%x psize=%d\n",
-                    ntohs(local.port), ntohs(local.lid),
-                    ntohl(local.qpn), ntohl(local.p_size));
-       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " ACCEPT_USR SRC GID subnet %016llx id %016llx\n",
-                    (unsigned long long)
-                    htonll(local.gid.global.subnet_prefix),
-                    (unsigned long long)
-                    htonll(local.gid.global.interface_id));
-
-       /* save state and reference to EP, queue for RTU data */
-       cm_ptr->ep = ep_ptr;
-       cm_ptr->hca = ia_ptr->hca_ptr;
-       cm_ptr->state = SCM_ACCEPTED;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP, " PASSIVE: accepted!\n");
-       return DAT_SUCCESS;
-      bail:
-       dapli_cm_destroy(cm_ptr);
-       dapls_ib_reinit_ep(ep_ptr);     /* reset QP state */
-       return DAT_INTERNAL_ERROR;
-}
-
-/*
- * PASSIVE: read RTU from active peer, post CONN event
- */
-void dapli_socket_accept_rtu(dp_ib_cm_handle_t cm_ptr)
-{
-       int len;
-       short rtu_data = 0;
-
-       /* complete handshake after final QP state change */
-       len = recv(cm_ptr->socket, (char *)&rtu_data, sizeof(rtu_data), 0);
-       if (len != sizeof(rtu_data) || ntohs(rtu_data) != 0x0e0f) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " ACCEPT_RTU: ERR %s, rcnt=%d rdata=%x\n",
-                        strerror(errno), len, ntohs(rtu_data),
-                        inet_ntoa(((struct sockaddr_in *)
-                                   &cm_ptr->dst.ia_address)->sin_addr));
-               goto bail;
-       }
-
-       /* save state and reference to EP, queue for disc event */
-       cm_ptr->state = SCM_CONNECTED;
-
-       /* final data exchange if remote QP state is good to go */
-       dapl_dbg_log(DAPL_DBG_TYPE_EP, " PASSIVE: connected!\n");
-
-#ifdef DAT_EXTENSIONS
-       if (cm_ptr->dst.qp_type == IBV_QPT_UD) {
-               DAT_IB_EXTENSION_EVENT_DATA xevent;
-
-               /* post EVENT, modify_qp created ah */
-               xevent.status = 0;
-               xevent.type = DAT_IB_UD_PASSIVE_REMOTE_AH;
-               xevent.remote_ah.ah = cm_ptr->ah;
-               xevent.remote_ah.qpn = cm_ptr->dst.qpn;
-               dapl_os_memcpy(&xevent.remote_ah.ia_addr,
-                              &cm_ptr->dst.ia_address,
-                              sizeof(cm_ptr->dst.ia_address));
-
-               dapls_evd_post_connection_event_ext((DAPL_EVD *) cm_ptr->ep->
-                                                   param.connect_evd_handle,
-                                                   DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED,
-                                                   (DAT_EP_HANDLE) cm_ptr->ep,
-                                                   (DAT_COUNT) cm_ptr->dst.p_size,
-                                                   (DAT_PVOID *) cm_ptr->p_data,
-                                                   (DAT_PVOID *) &xevent);
-
-                /* done with socket, don't destroy cm_ptr, need pdata */
-                closesocket(cm_ptr->socket);
-                cm_ptr->socket = DAPL_INVALID_SOCKET;
-               cm_ptr->state = SCM_RELEASED;
-       } else
-#endif
-               dapls_cr_callback(cm_ptr, IB_CME_CONNECTED, NULL, cm_ptr->sp);
-       return;
-      bail:
-       dapls_ib_reinit_ep(cm_ptr->ep); /* reset QP state */
-       dapli_cm_destroy(cm_ptr);
-       dapls_cr_callback(cm_ptr, IB_CME_DESTINATION_REJECT, NULL, cm_ptr->sp);
-}
-
-/*
- * dapls_ib_connect
- *
- * Initiate a connection with the passive listener on another node
- *
- * Input:
- *     ep_handle,
- *     remote_ia_address,
- *     remote_conn_qual,
- *     prd_size                size of private data and structure
- *     prd_prt                 pointer to private data structure
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INSUFFICIENT_RESOURCES
- *     DAT_INVALID_PARAMETER
- *
- */
-DAT_RETURN
-dapls_ib_connect(IN DAT_EP_HANDLE ep_handle,
-                IN DAT_IA_ADDRESS_PTR remote_ia_address,
-                IN DAT_CONN_QUAL remote_conn_qual,
-                IN DAT_COUNT private_data_size, IN void *private_data)
-{
-       DAPL_EP *ep_ptr;
-       ib_qp_handle_t qp_ptr;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    " connect(ep_handle %p ....)\n", ep_handle);
-
-       ep_ptr = (DAPL_EP *) ep_handle;
-       qp_ptr = ep_ptr->qp_handle;
-
-       return (dapli_socket_connect(ep_ptr, remote_ia_address,
-                                    remote_conn_qual,
-                                    private_data_size, private_data));
-}
-
-/*
- * dapls_ib_disconnect
- *
- * Disconnect an EP
- *
- * Input:
- *     ep_handle,
- *     disconnect_flags
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- */
-DAT_RETURN
-dapls_ib_disconnect(IN DAPL_EP * ep_ptr, IN DAT_CLOSE_FLAGS close_flags)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    "dapls_ib_disconnect(ep_handle %p ....)\n", ep_ptr);
-
-       /* reinit to modify QP state */
-       dapls_ib_reinit_ep(ep_ptr);
-
-       if (ep_ptr->cm_handle == NULL ||
-           ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECTED)
-               return DAT_SUCCESS;
-       else
-               return (dapli_socket_disconnect(ep_ptr->cm_handle));
-}
-
-/*
- * dapls_ib_disconnect_clean
- *
- * Clean up outstanding connection data. This routine is invoked
- * after the final disconnect callback has occurred. Only on the
- * ACTIVE side of a connection. It is also called if dat_ep_connect
- * times out using the consumer supplied timeout value.
- *
- * Input:
- *     ep_ptr          DAPL_EP
- *     active          Indicates active side of connection
- *
- * Output:
- *     none
- *
- * Returns:
- *     void
- *
- */
-void
-dapls_ib_disconnect_clean(IN DAPL_EP * ep_ptr,
-                         IN DAT_BOOLEAN active,
-                         IN const ib_cm_events_t ib_cm_event)
-{
-       /* NOTE: SCM will only initialize cm_handle with RC type
-        * 
-        * For UD there can many in-flight CR's so you 
-        * cannot cleanup timed out CR's with EP reference 
-        * alone since they share the same EP. The common
-        * code that handles connection timeout logic needs 
-        * updated for UD support.
-        */
-       if (ep_ptr->cm_handle)
-               dapli_cm_destroy(ep_ptr->cm_handle);
-
-       return;
-}
-
-/*
- * dapl_ib_setup_conn_listener
- *
- * Have the CM set up a connection listener.
- *
- * Input:
- *     ibm_hca_handle          HCA handle
- *     qp_handle                       QP handle
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INSUFFICIENT_RESOURCES
- *     DAT_INTERNAL_ERROR
- *     DAT_CONN_QUAL_UNAVAILBLE
- *     DAT_CONN_QUAL_IN_USE
- *
- */
-DAT_RETURN
-dapls_ib_setup_conn_listener(IN DAPL_IA * ia_ptr,
-                            IN DAT_UINT64 ServiceID, IN DAPL_SP * sp_ptr)
-{
-       return (dapli_socket_listen(ia_ptr, ServiceID, sp_ptr));
-}
-
-/*
- * dapl_ib_remove_conn_listener
- *
- * Have the CM remove a connection listener.
- *
- * Input:
- *     ia_handle               IA handle
- *     ServiceID               IB Channel Service ID
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INVALID_STATE
- *
- */
-DAT_RETURN
-dapls_ib_remove_conn_listener(IN DAPL_IA * ia_ptr, IN DAPL_SP * sp_ptr)
-{
-       ib_cm_srvc_handle_t cm_ptr = sp_ptr->cm_srvc_handle;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    "dapls_ib_remove_conn_listener(ia_ptr %p sp_ptr %p cm_ptr %p)\n",
-                    ia_ptr, sp_ptr, cm_ptr);
-
-       /* close accepted socket, free cm_srvc_handle and return */
-       if (cm_ptr != NULL) {
-               if (cm_ptr->socket != DAPL_INVALID_SOCKET) {
-                       shutdown(cm_ptr->socket, SHUT_RDWR);
-                       closesocket(cm_ptr->socket);
-                       cm_ptr->socket = DAPL_INVALID_SOCKET;
-               }
-               /* cr_thread will free */
-               cm_ptr->state = SCM_DESTROY;
-               sp_ptr->cm_srvc_handle = NULL;
-               if (send(cm_ptr->hca->ib_trans.scm[1], 
-                        "w", sizeof "w", 0) == -1)
-                       dapl_log(DAPL_DBG_TYPE_CM,
-                                " cm_destroy: thread wakeup error = %s\n",
-                                strerror(errno));
-       }
-       return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_accept_connection
- *
- * Perform necessary steps to accept a connection
- *
- * Input:
- *     cr_handle
- *     ep_handle
- *     private_data_size
- *     private_data
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INSUFFICIENT_RESOURCES
- *     DAT_INTERNAL_ERROR
- *
- */
-DAT_RETURN
-dapls_ib_accept_connection(IN DAT_CR_HANDLE cr_handle,
-                          IN DAT_EP_HANDLE ep_handle,
-                          IN DAT_COUNT p_size, IN const DAT_PVOID p_data)
-{
-       DAPL_CR *cr_ptr;
-       DAPL_EP *ep_ptr;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    "dapls_ib_accept_connection(cr %p ep %p prd %p,%d)\n",
-                    cr_handle, ep_handle, p_data, p_size);
-
-       cr_ptr = (DAPL_CR *) cr_handle;
-       ep_ptr = (DAPL_EP *) ep_handle;
-
-       /* allocate and attach a QP if necessary */
-       if (ep_ptr->qp_state == DAPL_QP_STATE_UNATTACHED) {
-               DAT_RETURN status;
-               status = dapls_ib_qp_alloc(ep_ptr->header.owner_ia,
-                                          ep_ptr, ep_ptr);
-               if (status != DAT_SUCCESS)
-                       return status;
-       }
-       return (dapli_socket_accept_usr(ep_ptr, cr_ptr, p_size, p_data));
-}
-
-/*
- * dapls_ib_reject_connection
- *
- * Reject a connection
- *
- * Input:
- *     cr_handle
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INTERNAL_ERROR
- *
- */
-DAT_RETURN
-dapls_ib_reject_connection(IN dp_ib_cm_handle_t cm_ptr,
-                          IN int reason,
-                          IN DAT_COUNT psize, IN const DAT_PVOID pdata)
-{
-       struct iovec iov[2];
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    " reject(cm %p reason %x, pdata %p, psize %d)\n",
-                    cm_ptr, reason, pdata, psize);
-
-        if (psize > IB_MAX_REJ_PDATA_SIZE)
-                return DAT_LENGTH_ERROR;
-
-       /* write reject data to indicate reject */
-       if (cm_ptr->socket != DAPL_INVALID_SOCKET) {
-               cm_ptr->dst.rej = (uint16_t) reason;
-               cm_ptr->dst.rej = htons(cm_ptr->dst.rej);
-               cm_ptr->dst.p_size = htonl(psize);
-               /* get qp_type from request */
-               cm_ptr->dst.qp_type = ntohs(cm_ptr->dst.qp_type);
-
-               iov[0].iov_base = (void *)&cm_ptr->dst;
-               iov[0].iov_len = sizeof(ib_qp_cm_t);
-               if (psize) {
-                       iov[1].iov_base = pdata;
-                       iov[1].iov_len = psize;
-                       writev(cm_ptr->socket, iov, 2);
-               } else {
-                       writev(cm_ptr->socket, iov, 1);
-               }
-
-               shutdown(cm_ptr->socket, SHUT_RDWR);
-               closesocket(cm_ptr->socket);
-               cm_ptr->socket = DAPL_INVALID_SOCKET;
-       }
-
-       /* cr_thread will destroy CR */
-       cm_ptr->state = SCM_DESTROY;
-       if (send(cm_ptr->hca->ib_trans.scm[1], "w", sizeof "w", 0) == -1)
-               dapl_log(DAPL_DBG_TYPE_CM,
-                        " cm_destroy: thread wakeup error = %s\n",
-                        strerror(errno));
-       return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_cm_remote_addr
- *
- * Obtain the remote IP address given a connection
- *
- * Input:
- *     cr_handle
- *
- * Output:
- *     remote_ia_address: where to place the remote address
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INVALID_HANDLE
- *
- */
-DAT_RETURN
-dapls_ib_cm_remote_addr(IN DAT_HANDLE dat_handle,
-                       OUT DAT_SOCK_ADDR6 * remote_ia_address)
-{
-       DAPL_HEADER *header;
-       dp_ib_cm_handle_t ib_cm_handle;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    "dapls_ib_cm_remote_addr(dat_handle %p, ....)\n",
-                    dat_handle);
-
-       header = (DAPL_HEADER *) dat_handle;
-
-       if (header->magic == DAPL_MAGIC_EP)
-               ib_cm_handle = ((DAPL_EP *) dat_handle)->cm_handle;
-       else if (header->magic == DAPL_MAGIC_CR)
-               ib_cm_handle = ((DAPL_CR *) dat_handle)->ib_cm_handle;
-       else
-               return DAT_INVALID_HANDLE;
-
-       dapl_os_memcpy(remote_ia_address,
-                      &ib_cm_handle->dst.ia_address, sizeof(DAT_SOCK_ADDR6));
-
-       return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_private_data_size
- *
- * Return the size of private data given a connection op type
- *
- * Input:
- *     prd_ptr         private data pointer
- *     conn_op         connection operation type
- *
- * If prd_ptr is NULL, this is a query for the max size supported by
- * the provider, otherwise it is the actual size of the private data
- * contained in prd_ptr.
- *
- *
- * Output:
- *     None
- *
- * Returns:
- *     length of private data
- *
- */
-int dapls_ib_private_data_size(IN DAPL_PRIVATE * prd_ptr,
-                              IN DAPL_PDATA_OP conn_op, IN DAPL_HCA * hca_ptr)
-{
-       int size;
-
-       switch (conn_op) {
-       case DAPL_PDATA_CONN_REQ:
-               {
-                       size = IB_MAX_REQ_PDATA_SIZE;
-                       break;
-               }
-       case DAPL_PDATA_CONN_REP:
-               {
-                       size = IB_MAX_REP_PDATA_SIZE;
-                       break;
-               }
-       case DAPL_PDATA_CONN_REJ:
-               {
-                       size = IB_MAX_REJ_PDATA_SIZE;
-                       break;
-               }
-       case DAPL_PDATA_CONN_DREQ:
-               {
-                       size = IB_MAX_DREQ_PDATA_SIZE;
-                       break;
-               }
-       case DAPL_PDATA_CONN_DREP:
-               {
-                       size = IB_MAX_DREP_PDATA_SIZE;
-                       break;
-               }
-       default:
-               {
-                       size = 0;
-               }
-
-       }                       /* end case */
-
-       return size;
-}
-
-/*
- * Map all socket CM event codes to the DAT equivelent.
- */
-#define DAPL_IB_EVENT_CNT      11
-
-static struct ib_cm_event_map {
-       const ib_cm_events_t ib_cm_event;
-       DAT_EVENT_NUMBER dat_event_num;
-} ib_cm_event_map[DAPL_IB_EVENT_CNT] = {
-       /* 00 */  {
-       IB_CME_CONNECTED, DAT_CONNECTION_EVENT_ESTABLISHED},
-           /* 01 */  {
-       IB_CME_DISCONNECTED, DAT_CONNECTION_EVENT_DISCONNECTED},
-           /* 02 */  {
-       IB_CME_DISCONNECTED_ON_LINK_DOWN,
-                   DAT_CONNECTION_EVENT_DISCONNECTED},
-           /* 03 */  {
-       IB_CME_CONNECTION_REQUEST_PENDING, DAT_CONNECTION_REQUEST_EVENT},
-           /* 04 */  {
-       IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,
-                   DAT_CONNECTION_REQUEST_EVENT},
-           /* 05 */  {
-       IB_CME_DESTINATION_REJECT,
-                   DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
-           /* 06 */  {
-       IB_CME_DESTINATION_REJECT_PRIVATE_DATA,
-                   DAT_CONNECTION_EVENT_PEER_REJECTED},
-           /* 07 */  {
-       IB_CME_DESTINATION_UNREACHABLE, DAT_CONNECTION_EVENT_UNREACHABLE},
-           /* 08 */  {
-       IB_CME_TOO_MANY_CONNECTION_REQUESTS,
-                   DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
-           /* 09 */  {
-       IB_CME_LOCAL_FAILURE, DAT_CONNECTION_EVENT_BROKEN},
-           /* 10 */  {
-       IB_CM_LOCAL_FAILURE, DAT_CONNECTION_EVENT_BROKEN}
-};
-
-/*
- * dapls_ib_get_cm_event
- *
- * Return a DAT connection event given a provider CM event.
- *
- * Input:
- *     dat_event_num   DAT event we need an equivelent CM event for
- *
- * Output:
- *     none
- *
- * Returns:
- *     ib_cm_event of translated DAPL value
- */
-DAT_EVENT_NUMBER
-dapls_ib_get_dat_event(IN const ib_cm_events_t ib_cm_event,
-                      IN DAT_BOOLEAN active)
-{
-       DAT_EVENT_NUMBER dat_event_num;
-       int i;
-
-       active = active;
-
-       if (ib_cm_event > IB_CM_LOCAL_FAILURE)
-               return (DAT_EVENT_NUMBER) 0;
-
-       dat_event_num = 0;
-       for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
-               if (ib_cm_event == ib_cm_event_map[i].ib_cm_event) {
-                       dat_event_num = ib_cm_event_map[i].dat_event_num;
-                       break;
-               }
-       }
-       dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
-                    "dapls_ib_get_dat_event: event translate(%s) ib=0x%x dat=0x%x\n",
-                    active ? "active" : "passive", ib_cm_event, dat_event_num);
-
-       return dat_event_num;
-}
-
-/*
- * dapls_ib_get_dat_event
- *
- * Return a DAT connection event given a provider CM event.
- * 
- * Input:
- *     ib_cm_event     event provided to the dapl callback routine
- *     active          switch indicating active or passive connection
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_EVENT_NUMBER of translated provider value
- */
-ib_cm_events_t dapls_ib_get_cm_event(IN DAT_EVENT_NUMBER dat_event_num)
-{
-       ib_cm_events_t ib_cm_event;
-       int i;
-
-       ib_cm_event = 0;
-       for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
-               if (dat_event_num == ib_cm_event_map[i].dat_event_num) {
-                       ib_cm_event = ib_cm_event_map[i].ib_cm_event;
-                       break;
-               }
-       }
-       return ib_cm_event;
-}
-
-/* outbound/inbound CR processing thread to avoid blocking applications */
-void cr_thread(void *arg)
-{
-       struct dapl_hca *hca_ptr = arg;
-       dp_ib_cm_handle_t cr, next_cr;
-       int opt, ret;
-       socklen_t opt_len;
-       char rbuf[2];
-       struct dapl_fd_set *set;
-       enum DAPL_FD_EVENTS event;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cr_thread: ENTER hca %p\n", hca_ptr);
-       set = dapl_alloc_fd_set();
-       if (!set)
-               goto out;
-
-       dapl_os_lock(&hca_ptr->ib_trans.lock);
-       hca_ptr->ib_trans.cr_state = IB_THREAD_RUN;
-
-       while (1) {
-               dapl_fd_zero(set);
-               dapl_fd_set(hca_ptr->ib_trans.scm[0], set, DAPL_FD_READ);
-
-               if (!dapl_llist_is_empty(&hca_ptr->ib_trans.list))
-                       next_cr = dapl_llist_peek_head(&hca_ptr->ib_trans.list);
-               else
-                       next_cr = NULL;
-
-               while (next_cr) {
-                       cr = next_cr;
-                       next_cr = dapl_llist_next_entry(&hca_ptr->ib_trans.list,
-                                                       (DAPL_LLIST_ENTRY *) &
-                                                       cr->entry);
-                       if (cr->state == SCM_DESTROY
-                           || hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) {
-                               dapl_llist_remove_entry(&hca_ptr->ib_trans.list,
-                                                       (DAPL_LLIST_ENTRY *) &
-                                                       cr->entry);
-                               dapl_dbg_log(DAPL_DBG_TYPE_CM, 
-                                            " CR FREE: %p ep=%p st=%d sock=%d\n", 
-                                            cr, cr->ep, cr->state, cr->socket);
-                               dapl_os_free(cr, sizeof(*cr));
-                               continue;
-                       }
-                       if (cr->socket == DAPL_INVALID_SOCKET) 
-                               continue;
-
-                       event = (cr->state == SCM_CONN_PENDING) ?
-                           DAPL_FD_WRITE : DAPL_FD_READ;
-                       if (dapl_fd_set(cr->socket, set, event)) {
-                               dapl_log(DAPL_DBG_TYPE_ERR,
-                                        " cr_thread: DESTROY CR st=%d fd %d"
-                                        " -> %s\n", cr->state, cr->socket,
-                                        inet_ntoa(((struct sockaddr_in *)
-                                                   &cr->dst.ia_address)->
-                                                  sin_addr));
-                               dapli_cm_destroy(cr);
-                               continue;
-                       }
-
-                       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                                    " poll cr=%p, socket=%d\n", cr,
-                                    cr->socket);
-                       dapl_os_unlock(&hca_ptr->ib_trans.lock);
-
-                       ret = dapl_poll(cr->socket, event);
-
-                       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                                    " poll ret=0x%x cr->state=%d socket=%d\n",
-                                    ret, cr->state, cr->socket);
-
-                       /* data on listen, qp exchange, and on disc req */
-                       if (ret == DAPL_FD_READ) {
-                               if (cr->socket != DAPL_INVALID_SOCKET) {
-                                       switch (cr->state) {
-                                       case SCM_LISTEN:
-                                               dapli_socket_accept(cr);
-                                               break;
-                                       case SCM_ACCEPTING:
-                                               dapli_socket_accept_data(cr);
-                                               break;
-                                       case SCM_ACCEPTED:
-                                               dapli_socket_accept_rtu(cr);
-                                               break;
-                                       case SCM_RTU_PENDING:
-                                               dapli_socket_connect_rtu(cr);
-                                               break;
-                                       case SCM_CONNECTED:
-                                               dapli_socket_disconnect(cr);
-                                               break;
-                                       default:
-                                               break;
-                                       }
-                               }
-                       /* connect socket is writable, check status */
-                       } else if (ret == DAPL_FD_WRITE ||
-                                  (cr->state == SCM_CONN_PENDING && 
-                                   ret == DAPL_FD_ERROR)) {
-                               opt = 0;
-                               opt_len = sizeof(opt);
-                               ret = getsockopt(cr->socket, SOL_SOCKET,
-                                                SO_ERROR, (char *)&opt,
-                                                &opt_len);
-                               if (!ret)
-                                       dapli_socket_connected(cr, opt);
-                               else
-                                       dapli_socket_connected(cr, errno);
-                        
-                       /* POLLUP, ERR, NVAL, or poll error - DISC */
-                       } else if (ret < 0 || ret == DAPL_FD_ERROR) {
-                               dapl_log(DAPL_DBG_TYPE_WARN,
-                                    " poll=%d cr->st=%s sk=%d ep %p, %d\n",
-                                    ret, dapl_cm_state_str(cr->state), 
-                                    cr->socket, cr->ep,
-                                    cr->ep ? cr->ep->param.ep_state:0);
-                               dapli_socket_disconnect(cr);
-                       }
-                       dapl_os_lock(&hca_ptr->ib_trans.lock);
-               }
-
-               /* set to exit and all resources destroyed */
-               if ((hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) &&
-                   (dapl_llist_is_empty(&hca_ptr->ib_trans.list)))
-                       break;
-
-               dapl_os_unlock(&hca_ptr->ib_trans.lock);
-               dapl_select(set);
-
-               /* if pipe used to wakeup, consume */
-               while (dapl_poll(hca_ptr->ib_trans.scm[0], 
-                                DAPL_FD_READ) == DAPL_FD_READ) {
-                       if (recv(hca_ptr->ib_trans.scm[0], rbuf, 2, 0) == -1)
-                               dapl_log(DAPL_DBG_TYPE_CM,
-                                        " cr_thread: read pipe error = %s\n",
-                                        strerror(errno));
-               }
-               dapl_os_lock(&hca_ptr->ib_trans.lock);
-               
-               /* set to exit and all resources destroyed */
-               if ((hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) &&
-                   (dapl_llist_is_empty(&hca_ptr->ib_trans.list)))
-                       break;
-       }
-
-       dapl_os_unlock(&hca_ptr->ib_trans.lock);
-       free(set);
-      out:
-       hca_ptr->ib_trans.cr_state = IB_THREAD_EXIT;
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cr_thread(hca %p) exit\n", hca_ptr);
-}
-
-
-#ifdef DAPL_COUNTERS
-/* Debug aid: List all Connections in process and state */
-void dapls_print_cm_list(IN DAPL_IA *ia_ptr)
-{
-       /* Print in process CR's for this IA, if debug type set */
-       int i = 0;
-       dp_ib_cm_handle_t cr, next_cr;
-
-       dapl_os_lock(&ia_ptr->hca_ptr->ib_trans.lock);
-       if (!dapl_llist_is_empty((DAPL_LLIST_HEAD*)
-                                &ia_ptr->hca_ptr->ib_trans.list))
-                                next_cr = dapl_llist_peek_head((DAPL_LLIST_HEAD*)
-                                &ia_ptr->hca_ptr->ib_trans.list);
-       else
-               next_cr = NULL;
-
-        printf("\n DAPL IA CONNECTIONS IN PROCESS:\n");
-       while (next_cr) {
-               cr = next_cr;
-               next_cr = dapl_llist_next_entry((DAPL_LLIST_HEAD*)
-                                &ia_ptr->hca_ptr->ib_trans.list,
-                               (DAPL_LLIST_ENTRY*)&cr->entry);
-
-               printf( "  CONN[%d]: sp %p ep %p sock %d %s %s %s %s %d\n",
-                       i, cr->sp, cr->ep, cr->socket,
-                       cr->dst.qp_type == IBV_QPT_RC ? "RC" : "UD",
-                       dapl_cm_state_str(cr->state),
-                       cr->sp ? "<-" : "->",
-                       cr->state == SCM_LISTEN ? 
-                       inet_ntoa(((struct sockaddr_in *)
-                               &ia_ptr->hca_ptr->hca_address)->sin_addr) :
-                       inet_ntoa(((struct sockaddr_in *)
-                               &cr->dst.ia_address)->sin_addr),
-                       cr->sp ? (int)cr->sp->conn_qual : 
-                       ntohs(((struct sockaddr_in *)
-                               &cr->dst.ia_address)->sin_port));
-               i++;
-       }
-       printf("\n");
-       dapl_os_unlock(&ia_ptr->hca_ptr->ib_trans.lock);
-}
-#endif
diff --git a/dapl/openib_scm/dapl_ib_cq.c b/dapl/openib_scm/dapl_ib_cq.c
deleted file mode 100644 (file)
index 2af1889..0000000
+++ /dev/null
@@ -1,705 +0,0 @@
-/*
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- *    copy of which is available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/***************************************************************************
- *
- *   Module:            uDAPL
- *
- *   Filename:          dapl_ib_cq.c
- *
- *   Author:            Arlin Davis
- *
- *   Created:           3/10/2005
- *
- *   Description: 
- *
- *   The uDAPL openib provider - completion queue
- *
- ****************************************************************************
- *                Source Control System Information
- *
- *    $Id: $
- *
- *     Copyright (c) 2005 Intel Corporation.  All rights reserved.
- *
- **************************************************************************/
-
-#include "openib_osd.h"
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-#include "dapl_lmr_util.h"
-#include "dapl_evd_util.h"
-#include "dapl_ring_buffer_util.h"
-
-#if defined(_WIN64) || defined(_WIN32)
-#include "..\..\..\..\..\etc\user\comp_channel.cpp"
-#include "..\..\..\..\..\etc\user\dlist.c"
-
-void dapli_cq_thread_destroy(struct dapl_hca *hca_ptr)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cq_thread_destroy(%p)\n", hca_ptr);
-
-       if (hca_ptr->ib_trans.cq_state != IB_THREAD_RUN)
-               return;
-
-       /* destroy cr_thread and lock */
-       hca_ptr->ib_trans.cq_state = IB_THREAD_CANCEL;
-       CompChannelCancel(&hca_ptr->ib_trans.ib_cq->comp_channel);
-       dapl_dbg_log(DAPL_DBG_TYPE_CM, " cq_thread_destroy(%p) cancel\n",
-                    hca_ptr);
-       while (hca_ptr->ib_trans.cq_state != IB_THREAD_EXIT) {
-               dapl_os_sleep_usec(20000);
-       }
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cq_thread_destroy(%d) exit\n",
-                    dapl_os_getpid());
-}
-
-static void cq_thread(void *arg)
-{
-       struct dapl_hca *hca_ptr = arg;
-       struct dapl_evd *evd_ptr;
-       struct ibv_cq *ibv_cq = NULL;
-
-       hca_ptr->ib_trans.cq_state = IB_THREAD_RUN;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cq_thread: ENTER hca %p\n", hca_ptr);
-
-       /* wait on DTO event, or signal to abort */
-       while (hca_ptr->ib_trans.cq_state == IB_THREAD_RUN) {
-               if (!ibv_get_cq_event
-                   (hca_ptr->ib_trans.ib_cq, &ibv_cq, (void *)&evd_ptr)) {
-
-                       if (DAPL_BAD_HANDLE(evd_ptr, DAPL_MAGIC_EVD)) {
-                               ibv_ack_cq_events(ibv_cq, 1);
-                               return;
-                       }
-
-                       /* process DTO event via callback */
-                       dapl_evd_dto_callback(hca_ptr->ib_hca_handle,
-                                             evd_ptr->ib_cq_handle,
-                                             (void *)evd_ptr);
-
-                       ibv_ack_cq_events(ibv_cq, 1);
-               }
-       }
-       hca_ptr->ib_trans.cq_state = IB_THREAD_EXIT;
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cq_thread: EXIT: hca %p \n",
-                    hca_ptr);
-}
-
-#else                          // _WIN32 || _WIN64
-
-void dapli_cq_thread_destroy(struct dapl_hca *hca_ptr)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cq_thread_destroy(%p)\n", hca_ptr);
-
-       if (hca_ptr->ib_trans.cq_state != IB_THREAD_RUN)
-               return;
-
-       /* destroy cr_thread and lock */
-       hca_ptr->ib_trans.cq_state = IB_THREAD_CANCEL;
-       pthread_kill(hca_ptr->ib_trans.cq_thread, SIGUSR1);
-       dapl_dbg_log(DAPL_DBG_TYPE_CM, " cq_thread_destroy(%p) cancel\n",
-                    hca_ptr);
-       while (hca_ptr->ib_trans.cq_state != IB_THREAD_EXIT) {
-               dapl_os_sleep_usec(20000);
-       }
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cq_thread_destroy(%d) exit\n",
-                    dapl_os_getpid());
-}
-
-/* catch the signal */
-static void ib_cq_handler(int signum)
-{
-       return;
-}
-
-static void cq_thread(void *arg)
-{
-       struct dapl_hca *hca_ptr = arg;
-       struct dapl_evd *evd_ptr;
-       struct ibv_cq *ibv_cq = NULL;
-       sigset_t sigset;
-
-       sigemptyset(&sigset);
-       sigaddset(&sigset, SIGUSR1);
-       pthread_sigmask(SIG_UNBLOCK, &sigset, NULL);
-       signal(SIGUSR1, ib_cq_handler);
-
-       hca_ptr->ib_trans.cq_state = IB_THREAD_RUN;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cq_thread: ENTER hca %p\n", hca_ptr);
-
-       /* wait on DTO event, or signal to abort */
-       while (hca_ptr->ib_trans.cq_state == IB_THREAD_RUN) {
-               struct pollfd cq_fd = {
-                       .fd = hca_ptr->ib_trans.ib_cq->fd,
-                       .events = POLLIN,
-                       .revents = 0
-               };
-               if ((poll(&cq_fd, 1, -1) == 1) &&
-                   (!ibv_get_cq_event
-                    (hca_ptr->ib_trans.ib_cq, &ibv_cq, (void *)&evd_ptr))) {
-
-                       if (DAPL_BAD_HANDLE(evd_ptr, DAPL_MAGIC_EVD)) {
-                               ibv_ack_cq_events(ibv_cq, 1);
-                               return;
-                       }
-
-                       /* process DTO event via callback */
-                       dapl_evd_dto_callback(hca_ptr->ib_hca_handle,
-                                             evd_ptr->ib_cq_handle,
-                                             (void *)evd_ptr);
-
-                       ibv_ack_cq_events(ibv_cq, 1);
-               }
-       }
-       hca_ptr->ib_trans.cq_state = IB_THREAD_EXIT;
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cq_thread: EXIT: hca %p \n",
-                    hca_ptr);
-}
-
-#endif                         // _WIN32 || _WIN64
-
-int dapli_cq_thread_init(struct dapl_hca *hca_ptr)
-{
-       DAT_RETURN dat_status;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cq_thread_init(%p)\n", hca_ptr);
-
-       /* create thread to process inbound connect request */
-       hca_ptr->ib_trans.cq_state = IB_THREAD_INIT;
-       dat_status =
-           dapl_os_thread_create(cq_thread, (void *)hca_ptr,
-                                 &hca_ptr->ib_trans.cq_thread);
-       if (dat_status != DAT_SUCCESS) {
-               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                            " cq_thread_init: failed to create thread\n");
-               return 1;
-       }
-
-       /* wait for thread to start */
-       while (hca_ptr->ib_trans.cq_state != IB_THREAD_RUN) {
-               dapl_os_sleep_usec(20000);
-       }
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cq_thread_init(%d) exit\n",
-                    dapl_os_getpid());
-       return 0;
-}
-
-/*
- * Map all verbs DTO completion codes to the DAT equivelent.
- *
- * Not returned by verbs:     DAT_DTO_ERR_PARTIAL_PACKET
- */
-static struct ib_status_map {
-       int ib_status;
-       DAT_DTO_COMPLETION_STATUS dat_status;
-} ib_status_map[] = {
-       /* 00 */  {
-       IBV_WC_SUCCESS, DAT_DTO_SUCCESS},
-           /* 01 */  {
-       IBV_WC_LOC_LEN_ERR, DAT_DTO_ERR_LOCAL_LENGTH},
-           /* 02 */  {
-       IBV_WC_LOC_QP_OP_ERR, DAT_DTO_ERR_LOCAL_EP},
-           /* 03 */  {
-       IBV_WC_LOC_EEC_OP_ERR, DAT_DTO_ERR_TRANSPORT},
-           /* 04 */  {
-       IBV_WC_LOC_PROT_ERR, DAT_DTO_ERR_LOCAL_PROTECTION},
-           /* 05 */  {
-       IBV_WC_WR_FLUSH_ERR, DAT_DTO_ERR_FLUSHED},
-           /* 06 */  {
-       IBV_WC_MW_BIND_ERR, DAT_RMR_OPERATION_FAILED},
-           /* 07 */  {
-       IBV_WC_BAD_RESP_ERR, DAT_DTO_ERR_BAD_RESPONSE},
-           /* 08 */  {
-       IBV_WC_LOC_ACCESS_ERR, DAT_DTO_ERR_LOCAL_PROTECTION},
-           /* 09 */  {
-       IBV_WC_REM_INV_REQ_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
-           /* 10 */  {
-       IBV_WC_REM_ACCESS_ERR, DAT_DTO_ERR_REMOTE_ACCESS},
-           /* 11 */  {
-       IBV_WC_REM_OP_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
-           /* 12 */  {
-       IBV_WC_RETRY_EXC_ERR, DAT_DTO_ERR_TRANSPORT},
-           /* 13 */  {
-       IBV_WC_RNR_RETRY_EXC_ERR, DAT_DTO_ERR_RECEIVER_NOT_READY},
-           /* 14 */  {
-       IBV_WC_LOC_RDD_VIOL_ERR, DAT_DTO_ERR_LOCAL_PROTECTION},
-           /* 15 */  {
-       IBV_WC_REM_INV_RD_REQ_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
-           /* 16 */  {
-       IBV_WC_REM_ABORT_ERR, DAT_DTO_ERR_REMOTE_RESPONDER},
-           /* 17 */  {
-       IBV_WC_INV_EECN_ERR, DAT_DTO_ERR_TRANSPORT},
-           /* 18 */  {
-       IBV_WC_INV_EEC_STATE_ERR, DAT_DTO_ERR_TRANSPORT},
-           /* 19 */  {
-       IBV_WC_FATAL_ERR, DAT_DTO_ERR_TRANSPORT},
-           /* 20 */  {
-       IBV_WC_RESP_TIMEOUT_ERR, DAT_DTO_ERR_RECEIVER_NOT_READY},
-           /* 21 */  {
-IBV_WC_GENERAL_ERR, DAT_DTO_ERR_TRANSPORT},};
-
-/*
- * dapls_ib_get_dto_status
- *
- * Return the DAT status of a DTO operation
- *
- * Input:
- *     cqe_ptr         pointer to completion queue entry
- *
- * Output:
- *     none
- *
- * Returns:
- *     Value from ib_status_map table above
- */
-
-DAT_DTO_COMPLETION_STATUS
-dapls_ib_get_dto_status(IN ib_work_completion_t * cqe_ptr)
-{
-       uint32_t ib_status;
-       int i;
-
-       ib_status = DAPL_GET_CQE_STATUS(cqe_ptr);
-
-       /*
-        * Due to the implementation of verbs completion code, we need to
-        * search the table for the correct value rather than assuming
-        * linear distribution.
-        */
-       for (i = 0; i <= IBV_WC_GENERAL_ERR; i++) {
-               if (ib_status == ib_status_map[i].ib_status) {
-                       if (ib_status != IBV_WC_SUCCESS) {
-                               dapl_dbg_log(DAPL_DBG_TYPE_DTO_COMP_ERR,
-                                            " DTO completion ERROR: %d: op %#x\n",
-                                            ib_status,
-                                            DAPL_GET_CQE_OPTYPE(cqe_ptr));
-                       }
-                       return ib_status_map[i].dat_status;
-               }
-       }
-
-       dapl_dbg_log(DAPL_DBG_TYPE_DTO_COMP_ERR,
-                    " DTO completion ERROR: %d: op %#x\n",
-                    ib_status, DAPL_GET_CQE_OPTYPE(cqe_ptr));
-
-       return DAT_DTO_FAILURE;
-}
-
-DAT_RETURN dapls_ib_get_async_event(IN ib_error_record_t * err_record,
-                                   OUT DAT_EVENT_NUMBER * async_event)
-{
-       DAT_RETURN dat_status = DAT_SUCCESS;
-       int err_code = err_record->event_type;
-
-       switch (err_code) {
-               /* OVERFLOW error */
-       case IBV_EVENT_CQ_ERR:
-               *async_event = DAT_ASYNC_ERROR_EVD_OVERFLOW;
-               break;
-               /* INTERNAL errors */
-       case IBV_EVENT_DEVICE_FATAL:
-               *async_event = DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR;
-               break;
-               /* CATASTROPHIC errors */
-       case IBV_EVENT_PORT_ERR:
-               *async_event = DAT_ASYNC_ERROR_IA_CATASTROPHIC;
-               break;
-               /* BROKEN QP error */
-       case IBV_EVENT_SQ_DRAINED:
-       case IBV_EVENT_QP_FATAL:
-       case IBV_EVENT_QP_REQ_ERR:
-       case IBV_EVENT_QP_ACCESS_ERR:
-               *async_event = DAT_ASYNC_ERROR_EP_BROKEN;
-               break;
-
-               /* connection completion */
-       case IBV_EVENT_COMM_EST:
-               *async_event = DAT_CONNECTION_EVENT_ESTABLISHED;
-               break;
-
-               /* TODO: process HW state changes */
-       case IBV_EVENT_PATH_MIG:
-       case IBV_EVENT_PATH_MIG_ERR:
-       case IBV_EVENT_PORT_ACTIVE:
-       case IBV_EVENT_LID_CHANGE:
-       case IBV_EVENT_PKEY_CHANGE:
-       case IBV_EVENT_SM_CHANGE:
-       default:
-               dat_status = DAT_ERROR(DAT_NOT_IMPLEMENTED, 0);
-       }
-       return dat_status;
-}
-
-/*
- * dapl_ib_cq_alloc
- *
- * Alloc a CQ
- *
- * Input:
- *     ia_handle               IA handle
- *     evd_ptr                 pointer to EVD struct
- *     cqlen                   minimum QLen
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN
-dapls_ib_cq_alloc(IN DAPL_IA * ia_ptr,
-                 IN DAPL_EVD * evd_ptr, IN DAT_COUNT * cqlen)
-{
-       struct ibv_comp_channel *channel = ia_ptr->hca_ptr->ib_trans.ib_cq;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    "dapls_ib_cq_alloc: evd %p cqlen=%d \n", evd_ptr, *cqlen);
-
-#ifdef CQ_WAIT_OBJECT
-       if (evd_ptr->cq_wait_obj_handle)
-               channel = evd_ptr->cq_wait_obj_handle;
-#endif
-
-       /* Call IB verbs to create CQ */
-       evd_ptr->ib_cq_handle = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
-                                             *cqlen, evd_ptr, channel, 0);
-
-       if (evd_ptr->ib_cq_handle == IB_INVALID_HANDLE)
-               return DAT_INSUFFICIENT_RESOURCES;
-
-       /* arm cq for events */
-       dapls_set_cq_notify(ia_ptr, evd_ptr);
-
-       /* update with returned cq entry size */
-       *cqlen = evd_ptr->ib_cq_handle->cqe;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    "dapls_ib_cq_alloc: new_cq %p cqlen=%d \n",
-                    evd_ptr->ib_cq_handle, *cqlen);
-
-       return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_cq_resize
- *
- * Alloc a CQ
- *
- * Input:
- *     ia_handle               IA handle
- *     evd_ptr                 pointer to EVD struct
- *     cqlen                   minimum QLen
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INVALID_PARAMETER
- *
- */
-DAT_RETURN
-dapls_ib_cq_resize(IN DAPL_IA * ia_ptr,
-                  IN DAPL_EVD * evd_ptr, IN DAT_COUNT * cqlen)
-{
-       ib_cq_handle_t new_cq;
-       struct ibv_comp_channel *channel = ia_ptr->hca_ptr->ib_trans.ib_cq;
-
-       /* IB verbs doe not support resize. Try to re-create CQ
-        * with new size. Can only be done if QP is not attached. 
-        * destroy EBUSY == QP still attached.
-        */
-
-#ifdef CQ_WAIT_OBJECT
-       if (evd_ptr->cq_wait_obj_handle)
-               channel = evd_ptr->cq_wait_obj_handle;
-#endif
-
-       /* Call IB verbs to create CQ */
-       new_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle, *cqlen,
-                              evd_ptr, channel, 0);
-
-       if (new_cq == IB_INVALID_HANDLE)
-               return DAT_INSUFFICIENT_RESOURCES;
-
-       /* destroy the original and replace if successful */
-       if (ibv_destroy_cq(evd_ptr->ib_cq_handle)) {
-               ibv_destroy_cq(new_cq);
-               return (dapl_convert_errno(errno, "resize_cq"));
-       }
-
-       /* update EVD with new cq handle and size */
-       evd_ptr->ib_cq_handle = new_cq;
-       *cqlen = new_cq->cqe;
-
-       /* arm cq for events */
-       dapls_set_cq_notify(ia_ptr, evd_ptr);
-
-       return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_cq_free
- *
- * destroy a CQ
- *
- * Input:
- *     ia_handle               IA handle
- *     evd_ptr                 pointer to EVD struct
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INVALID_PARAMETER
- *
- */
-DAT_RETURN dapls_ib_cq_free(IN DAPL_IA * ia_ptr, IN DAPL_EVD * evd_ptr)
-{
-       DAT_EVENT event;
-       ib_work_completion_t wc;
-
-       if (evd_ptr->ib_cq_handle != IB_INVALID_HANDLE) {
-               /* pull off CQ and EVD entries and toss */
-               while (ibv_poll_cq(evd_ptr->ib_cq_handle, 1, &wc) == 1) ;
-               while (dapl_evd_dequeue(evd_ptr, &event) == DAT_SUCCESS) ;
-               if (ibv_destroy_cq(evd_ptr->ib_cq_handle))
-                       return (dapl_convert_errno(errno, "ibv_destroy_cq"));
-               evd_ptr->ib_cq_handle = IB_INVALID_HANDLE;
-       }
-       return DAT_SUCCESS;
-}
-
-/*
- * dapls_set_cq_notify
- *
- * Set the CQ notification for next
- *
- * Input:
- *     hca_handl               hca handle
- *     DAPL_EVD                evd handle
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     dapl_convert_errno 
- */
-DAT_RETURN dapls_set_cq_notify(IN DAPL_IA * ia_ptr, IN DAPL_EVD * evd_ptr)
-{
-       if (ibv_req_notify_cq(evd_ptr->ib_cq_handle, 0))
-               return (dapl_convert_errno(errno, "notify_cq"));
-       else
-               return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_completion_notify
- *
- * Set the CQ notification type
- *
- * Input:
- *     hca_handl               hca handle
- *     evd_ptr                 evd handle
- *     type                    notification type
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     dapl_convert_errno
- */
-DAT_RETURN dapls_ib_completion_notify(IN ib_hca_handle_t hca_handle,
-                                     IN DAPL_EVD * evd_ptr,
-                                     IN ib_notification_type_t type)
-{
-       if (ibv_req_notify_cq(evd_ptr->ib_cq_handle, type))
-               return (dapl_convert_errno(errno, "notify_cq_type"));
-       else
-               return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_completion_poll
- *
- * CQ poll for completions
- *
- * Input:
- *     hca_handl               hca handle
- *     evd_ptr                 evd handle
- *     wc_ptr                  work completion
- *
- * Output:
- *     none
- *
- * Returns: 
- *     DAT_SUCCESS
- *     DAT_QUEUE_EMPTY
- *     
- */
-DAT_RETURN dapls_ib_completion_poll(IN DAPL_HCA * hca_ptr,
-                                   IN DAPL_EVD * evd_ptr,
-                                   IN ib_work_completion_t * wc_ptr)
-{
-       int ret;
-
-       ret = ibv_poll_cq(evd_ptr->ib_cq_handle, 1, wc_ptr);
-       if (ret == 1)
-               return DAT_SUCCESS;
-
-       return DAT_QUEUE_EMPTY;
-}
-
-#ifdef CQ_WAIT_OBJECT
-
-/* NEW common wait objects for providers with direct CQ wait objects */
-DAT_RETURN
-dapls_ib_wait_object_create(IN DAPL_EVD * evd_ptr,
-                           IN ib_wait_obj_handle_t * p_cq_wait_obj_handle)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " cq_object_create: (%p,%p)\n",
-                    evd_ptr, p_cq_wait_obj_handle);
-
-       /* set cq_wait object to evd_ptr */
-       *p_cq_wait_obj_handle =
-           ibv_create_comp_channel(evd_ptr->header.owner_ia->hca_ptr->
-                                   ib_hca_handle);
-
-       return DAT_SUCCESS;
-}
-
-DAT_RETURN
-dapls_ib_wait_object_destroy(IN ib_wait_obj_handle_t p_cq_wait_obj_handle)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " cq_object_destroy: wait_obj=%p\n", p_cq_wait_obj_handle);
-
-       ibv_destroy_comp_channel(p_cq_wait_obj_handle);
-
-       return DAT_SUCCESS;
-}
-
-DAT_RETURN
-dapls_ib_wait_object_wakeup(IN ib_wait_obj_handle_t p_cq_wait_obj_handle)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " cq_object_wakeup: wait_obj=%p\n", p_cq_wait_obj_handle);
-
-       /* no wake up mechanism */
-       return DAT_SUCCESS;
-}
-
-#if defined(_WIN32) || defined(_WIN64)
-DAT_RETURN
-dapls_ib_wait_object_wait(IN ib_wait_obj_handle_t p_cq_wait_obj_handle,
-                         IN uint32_t timeout)
-{
-       struct dapl_evd *evd_ptr;
-       struct ibv_cq *ibv_cq = NULL;
-       int status = 0;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " cq_object_wait: CQ channel %p time %d\n",
-                    p_cq_wait_obj_handle, timeout);
-
-       /* uDAPL timeout values in usecs */
-       p_cq_wait_obj_handle->comp_channel.Milliseconds = timeout / 1000;
-
-       /* returned event */
-       status = ibv_get_cq_event(p_cq_wait_obj_handle, &ibv_cq,
-                                 (void *)&evd_ptr);
-       if (status == 0) {
-               ibv_ack_cq_events(ibv_cq, 1);
-       }
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " cq_object_wait: RET evd %p ibv_cq %p %s\n",
-                    evd_ptr, ibv_cq, strerror(errno));
-
-       return (dapl_convert_errno(status, "cq_wait_object_wait"));
-}
-#else                          //_WIN32 || _WIN64
-DAT_RETURN
-dapls_ib_wait_object_wait(IN ib_wait_obj_handle_t p_cq_wait_obj_handle,
-                         IN uint32_t timeout)
-{
-       struct dapl_evd *evd_ptr;
-       struct ibv_cq *ibv_cq = NULL;
-       int status = 0;
-       int timeout_ms = -1;
-       struct pollfd cq_fd = {
-               .fd = p_cq_wait_obj_handle->fd,
-               .events = POLLIN,
-               .revents = 0
-       };
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " cq_object_wait: CQ channel %p time %d\n",
-                    p_cq_wait_obj_handle, timeout);
-
-       /* uDAPL timeout values in usecs */
-       if (timeout != DAT_TIMEOUT_INFINITE)
-               timeout_ms = timeout / 1000;
-
-       status = poll(&cq_fd, 1, timeout_ms);
-
-       /* returned event */
-       if (status > 0) {
-               if (!ibv_get_cq_event(p_cq_wait_obj_handle,
-                                     &ibv_cq, (void *)&evd_ptr)) {
-                       ibv_ack_cq_events(ibv_cq, 1);
-               }
-               status = 0;
-
-               /* timeout */
-       } else if (status == 0)
-               status = ETIMEDOUT;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " cq_object_wait: RET evd %p ibv_cq %p %s\n",
-                    evd_ptr, ibv_cq, strerror(errno));
-
-       return (dapl_convert_errno(status, "cq_wait_object_wait"));
-
-}
-#endif                         //_WIN32 || _WIN64
-#endif                         // CQ_WAIT_OBJECT
-
-/*
- * Local variables:
- *  c-indent-level: 4
- *  c-basic-offset: 4
- *  tab-width: 8
- * End:
- */
diff --git a/dapl/openib_scm/dapl_ib_dto.h b/dapl/openib_scm/dapl_ib_dto.h
deleted file mode 100644 (file)
index 9118b2e..0000000
+++ /dev/null
@@ -1,527 +0,0 @@
-/*
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- *    copy of which is available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/***************************************************************************
- *
- *   Module:            uDAPL
- *
- *   Filename:          dapl_ib_dto.h
- *
- *   Author:            Arlin Davis
- *
- *   Created:           3/10/2005
- *
- *   Description: 
- *
- *   The OpenIB uCMA provider - DTO operations and CQE macros 
- *
- ****************************************************************************
- *                Source Control System Information
- *
- *    $Id: $
- *
- *     Copyright (c) 2005 Intel Corporation.  All rights reserved.
- *
- **************************************************************************/
-#ifndef _DAPL_IB_DTO_H_
-#define _DAPL_IB_DTO_H_
-
-#include "dapl_ib_util.h"
-
-#ifdef DAT_EXTENSIONS
-#include <dat2/dat_ib_extensions.h>
-#endif
-
-#define        DEFAULT_DS_ENTRIES      8
-
-STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p);
-
-#define CQE_WR_TYPE_UD(id) \
-       (((DAPL_COOKIE *)(uintptr_t)id)->ep->qp_handle->qp_type == IBV_QPT_UD)
-
-/*
- * dapls_ib_post_recv
- *
- * Provider specific Post RECV function
- */
-STATIC _INLINE_ DAT_RETURN 
-dapls_ib_post_recv (
-       IN  DAPL_EP             *ep_ptr,
-       IN  DAPL_COOKIE         *cookie,
-       IN  DAT_COUNT           segments,
-       IN  DAT_LMR_TRIPLET     *local_iov )
-{
-       struct ibv_recv_wr wr;
-       struct ibv_recv_wr *bad_wr;
-       ib_data_segment_t *ds = (ib_data_segment_t *)local_iov;
-       DAT_COUNT i, total_len;
-       int ret;
-       
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    " post_rcv: ep %p cookie %p segs %d l_iov %p\n",
-                    ep_ptr, cookie, segments, local_iov);
-
-       /* setup work request */
-       total_len = 0;
-       wr.next = 0;
-       wr.num_sge = segments;
-       wr.wr_id = (uint64_t)(uintptr_t)cookie;
-       wr.sg_list = ds;
-
-       if (cookie != NULL) {
-               for (i = 0; i < segments; i++) {
-                       dapl_dbg_log(DAPL_DBG_TYPE_EP, 
-                                    " post_rcv: l_key 0x%x va %p len %d\n",
-                                    ds->lkey, ds->addr, ds->length );
-                       total_len += ds->length;
-                       ds++;
-               }
-               cookie->val.dto.size = total_len;
-       }
-
-       ret = ibv_post_recv(ep_ptr->qp_handle, &wr, &bad_wr);
-       
-       if (ret)
-               return(dapl_convert_errno(errno,"ibv_recv"));
-
-       DAPL_CNTR(ep_ptr, DCNT_EP_POST_RECV);
-       DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_RECV_DATA, total_len);
-
-       return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_post_send
- *
- * Provider specific Post SEND function
- */
-STATIC _INLINE_ DAT_RETURN 
-dapls_ib_post_send (
-       IN  DAPL_EP                     *ep_ptr,
-       IN  ib_send_op_type_t           op_type,
-       IN  DAPL_COOKIE                 *cookie,
-       IN  DAT_COUNT                   segments,
-       IN  DAT_LMR_TRIPLET             *local_iov,
-       IN  const DAT_RMR_TRIPLET       *remote_iov,
-       IN  DAT_COMPLETION_FLAGS        completion_flags)
-{
-       struct ibv_send_wr wr;
-       struct ibv_send_wr *bad_wr;
-       ib_data_segment_t *ds = (ib_data_segment_t *)local_iov;
-       ib_hca_transport_t *ibt_ptr = 
-               &ep_ptr->header.owner_ia->hca_ptr->ib_trans;
-       DAT_COUNT i, total_len;
-       int ret;
-       
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    " post_snd: ep %p op %d ck %p sgs",
-                    "%d l_iov %p r_iov %p f %d\n",
-                    ep_ptr, op_type, cookie, segments, local_iov, 
-                    remote_iov, completion_flags);
-
-#ifdef DAT_EXTENSIONS  
-       if (ep_ptr->qp_handle->qp_type != IBV_QPT_RC)
-               return(DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP));
-#endif
-       /* setup the work request */
-       wr.next = 0;
-       wr.opcode = op_type;
-       wr.num_sge = segments;
-       wr.send_flags = 0;
-       wr.wr_id = (uint64_t)(uintptr_t)cookie;
-       wr.sg_list = ds;
-       total_len = 0;
-
-       if (cookie != NULL) {
-               for (i = 0; i < segments; i++ ) {
-                       dapl_dbg_log(DAPL_DBG_TYPE_EP, 
-                                    " post_snd: lkey 0x%x va %p len %d\n",
-                                    ds->lkey, ds->addr, ds->length );
-                       total_len += ds->length;
-                       ds++;
-               }
-               cookie->val.dto.size = total_len;
-       }
-
-       if (wr.num_sge && 
-           (op_type == OP_RDMA_WRITE || op_type == OP_RDMA_READ)) {
-               wr.wr.rdma.remote_addr = remote_iov->virtual_address;
-               wr.wr.rdma.rkey = remote_iov->rmr_context;
-               dapl_dbg_log(DAPL_DBG_TYPE_EP, 
-                            " post_snd_rdma: rkey 0x%x va %#016Lx\n",
-                            wr.wr.rdma.rkey, wr.wr.rdma.remote_addr);
-       }
-
-
-       /* inline data for send or write ops */
-       if ((total_len <= ibt_ptr->max_inline_send) && 
-          ((op_type == OP_SEND) || (op_type == OP_RDMA_WRITE))) 
-               wr.send_flags |= IBV_SEND_INLINE;
-       
-       /* set completion flags in work request */
-       wr.send_flags |= (DAT_COMPLETION_SUPPRESS_FLAG & 
-                               completion_flags) ? 0 : IBV_SEND_SIGNALED;
-       wr.send_flags |= (DAT_COMPLETION_BARRIER_FENCE_FLAG & 
-                               completion_flags) ? IBV_SEND_FENCE : 0;
-       wr.send_flags |= (DAT_COMPLETION_SOLICITED_WAIT_FLAG & 
-                               completion_flags) ? IBV_SEND_SOLICITED : 0;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP, 
-                    " post_snd: op 0x%x flags 0x%x sglist %p, %d\n", 
-                    wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge);
-
-       ret = ibv_post_send(ep_ptr->qp_handle, &wr, &bad_wr);
-
-       if (ret)
-               return(dapl_convert_errno(errno,"ibv_send"));
-
-#ifdef DAPL_COUNTERS
-       switch (op_type) {
-       case OP_SEND:
-               DAPL_CNTR(ep_ptr, DCNT_EP_POST_SEND);
-               DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_SEND_DATA,total_len);
-               break;
-       case OP_RDMA_WRITE:
-               DAPL_CNTR(ep_ptr, DCNT_EP_POST_WRITE);
-               DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_WRITE_DATA,total_len);
-               break;  
-       case OP_RDMA_READ:
-               DAPL_CNTR(ep_ptr, DCNT_EP_POST_READ);
-               DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_READ_DATA,total_len);
-               break;
-       default:
-               break;
-       }
-#endif /* DAPL_COUNTERS */
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP," post_snd: returned\n");
-       return DAT_SUCCESS;
-}
-
-/* map Work Completions to DAPL WR operations */
-STATIC _INLINE_ DAT_DTOS dapls_cqe_dtos_opcode(ib_work_completion_t *cqe_p)
-{
-       switch (cqe_p->opcode) {
-
-       case IBV_WC_SEND:
-#ifdef DAT_EXTENSIONS
-               if (CQE_WR_TYPE_UD(cqe_p->wr_id))
-                       return (DAT_IB_DTO_SEND_UD);
-               else
-#endif                 
-               return (DAT_DTO_SEND);
-       case IBV_WC_RDMA_READ:
-               return (DAT_DTO_RDMA_READ);
-       case IBV_WC_BIND_MW:
-               return (DAT_DTO_BIND_MW);
-#ifdef DAT_EXTENSIONS
-       case IBV_WC_RDMA_WRITE:
-               if (cqe_p->wc_flags & IBV_WC_WITH_IMM)
-                       return (DAT_IB_DTO_RDMA_WRITE_IMMED);
-               else
-                       return (DAT_DTO_RDMA_WRITE);
-       case IBV_WC_COMP_SWAP:
-               return (DAT_IB_DTO_CMP_SWAP);
-       case IBV_WC_FETCH_ADD:
-               return (DAT_IB_DTO_FETCH_ADD);
-       case IBV_WC_RECV_RDMA_WITH_IMM:
-               return (DAT_IB_DTO_RECV_IMMED);
-#else
-       case IBV_WC_RDMA_WRITE:
-               return (DAT_DTO_RDMA_WRITE);
-#endif
-       case IBV_WC_RECV:
-#ifdef DAT_EXTENSIONS
-               if (CQE_WR_TYPE_UD(cqe_p->wr_id)) 
-                       return (DAT_IB_DTO_RECV_UD);
-               else if (cqe_p->wc_flags & IBV_WC_WITH_IMM)
-                       return (DAT_IB_DTO_RECV_MSG_IMMED);
-               else
-#endif 
-               return (DAT_DTO_RECEIVE);
-       default:
-               return (0xff);
-       }
-}
-#define DAPL_GET_CQE_DTOS_OPTYPE(cqe_p) dapls_cqe_dtos_opcode(cqe_p)
-
-
-#ifdef DAT_EXTENSIONS
-/*
- * dapls_ib_post_ext_send
- *
- * Provider specific extended Post SEND function for atomics
- *     OP_COMP_AND_SWAP and OP_FETCH_AND_ADD
- */
-STATIC _INLINE_ DAT_RETURN 
-dapls_ib_post_ext_send (
-       IN  DAPL_EP                     *ep_ptr,
-       IN  ib_send_op_type_t           op_type,
-       IN  DAPL_COOKIE                 *cookie,
-       IN  DAT_COUNT                   segments,
-       IN  DAT_LMR_TRIPLET             *local_iov,
-       IN  const DAT_RMR_TRIPLET       *remote_iov,
-       IN  DAT_UINT32                  immed_data,
-       IN  DAT_UINT64                  compare_add,
-       IN  DAT_UINT64                  swap,
-       IN  DAT_COMPLETION_FLAGS        completion_flags,
-       IN  DAT_IB_ADDR_HANDLE          *remote_ah)
-{
-       struct ibv_send_wr wr;
-       struct ibv_send_wr *bad_wr;
-       ib_data_segment_t *ds = (ib_data_segment_t *)local_iov;
-       DAT_COUNT i, total_len;
-       int ret;
-       
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    " post_ext_snd: ep %p op %d ck %p sgs",
-                    "%d l_iov %p r_iov %p f %d\n",
-                    ep_ptr, op_type, cookie, segments, local_iov, 
-                    remote_iov, completion_flags, remote_ah);
-
-       /* setup the work request */
-       wr.next = 0;
-       wr.opcode = op_type;
-       wr.num_sge = segments;
-       wr.send_flags = 0;
-       wr.wr_id = (uint64_t)(uintptr_t)cookie;
-       wr.sg_list = ds;
-       total_len = 0;
-
-       if (cookie != NULL) {
-               for (i = 0; i < segments; i++ ) {
-                       dapl_dbg_log(DAPL_DBG_TYPE_EP, 
-                                    " post_snd: lkey 0x%x va %p len %d\n",
-                                    ds->lkey, ds->addr, ds->length );
-                       total_len += ds->length;
-                       ds++;
-               }
-               cookie->val.dto.size = total_len;
-       }
-
-       switch (op_type) {
-       case OP_RDMA_WRITE_IMM:
-               /* OP_RDMA_WRITE)IMMED has direct IB wr_type mapping */
-               dapl_dbg_log(DAPL_DBG_TYPE_EP, 
-                            " post_ext: rkey 0x%x va %#016Lx immed=0x%x\n",
-                            remote_iov?remote_iov->rmr_context:0, 
-                            remote_iov?remote_iov->virtual_address:0,
-                            immed_data);
-
-               wr.imm_data = immed_data;
-               if (wr.num_sge) {
-                       wr.wr.rdma.remote_addr = remote_iov->virtual_address;
-                       wr.wr.rdma.rkey = remote_iov->rmr_context;
-               }
-               break;
-       case OP_COMP_AND_SWAP:
-               /* OP_COMP_AND_SWAP has direct IB wr_type mapping */
-               dapl_dbg_log(DAPL_DBG_TYPE_EP, 
-                            " post_ext: OP_COMP_AND_SWAP=%lx,"
-                            "%lx rkey 0x%x va %#016Lx\n",
-                            compare_add, swap, remote_iov->rmr_context,
-                            remote_iov->virtual_address);
-               
-               wr.wr.atomic.compare_add = compare_add;
-               wr.wr.atomic.swap = swap;
-               wr.wr.atomic.remote_addr = remote_iov->virtual_address;
-               wr.wr.atomic.rkey = remote_iov->rmr_context;
-               break;
-       case OP_FETCH_AND_ADD:
-               /* OP_FETCH_AND_ADD has direct IB wr_type mapping */
-               dapl_dbg_log(DAPL_DBG_TYPE_EP, 
-                            " post_ext: OP_FETCH_AND_ADD=%lx,"
-                            "%lx rkey 0x%x va %#016Lx\n",
-                            compare_add, remote_iov->rmr_context,
-                            remote_iov->virtual_address);
-
-               wr.wr.atomic.compare_add = compare_add;
-               wr.wr.atomic.remote_addr = remote_iov->virtual_address;
-               wr.wr.atomic.rkey = remote_iov->rmr_context;
-               break;
-       case OP_SEND_UD:
-               /* post must be on EP with service_type of UD */
-               if (ep_ptr->qp_handle->qp_type != IBV_QPT_UD)
-                       return(DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP));
-
-               dapl_dbg_log(DAPL_DBG_TYPE_EP, 
-                            " post_ext: OP_SEND_UD ah=%p"
-                            " qp_num=0x%x\n",
-                            remote_ah, remote_ah->qpn);
-               
-               wr.opcode = OP_SEND;
-               wr.wr.ud.ah = remote_ah->ah;
-               wr.wr.ud.remote_qpn = remote_ah->qpn;
-               wr.wr.ud.remote_qkey = SCM_UD_QKEY;
-               break;
-       default:
-               break;
-       }
-
-       /* set completion flags in work request */
-       wr.send_flags |= (DAT_COMPLETION_SUPPRESS_FLAG & 
-                               completion_flags) ? 0 : IBV_SEND_SIGNALED;
-       wr.send_flags |= (DAT_COMPLETION_BARRIER_FENCE_FLAG & 
-                               completion_flags) ? IBV_SEND_FENCE : 0;
-       wr.send_flags |= (DAT_COMPLETION_SOLICITED_WAIT_FLAG & 
-                               completion_flags) ? IBV_SEND_SOLICITED : 0;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP, 
-                    " post_snd: op 0x%x flags 0x%x sglist %p, %d\n", 
-                    wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge);
-
-       ret = ibv_post_send(ep_ptr->qp_handle, &wr, &bad_wr);
-
-       if (ret)
-               return( dapl_convert_errno(errno,"ibv_send") );
-       
-#ifdef DAPL_COUNTERS
-       switch (op_type) {
-       case OP_RDMA_WRITE_IMM:
-               DAPL_CNTR(ep_ptr, DCNT_EP_POST_WRITE_IMM);
-               DAPL_CNTR_DATA(ep_ptr, 
-                              DCNT_EP_POST_WRITE_IMM_DATA, total_len);
-               break;
-       case OP_COMP_AND_SWAP:
-               DAPL_CNTR(ep_ptr, DCNT_EP_POST_CMP_SWAP);
-               break;  
-       case OP_FETCH_AND_ADD:
-               DAPL_CNTR(ep_ptr, DCNT_EP_POST_FETCH_ADD);
-               break;
-       case OP_SEND_UD:
-               DAPL_CNTR(ep_ptr, DCNT_EP_POST_SEND_UD);
-               DAPL_CNTR_DATA(ep_ptr, DCNT_EP_POST_SEND_UD_DATA, total_len);
-               break;
-       default:
-               break;
-       }
-#endif /* DAPL_COUNTERS */
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP," post_snd: returned\n");
-        return DAT_SUCCESS;
-}
-#endif
-
-STATIC _INLINE_ DAT_RETURN 
-dapls_ib_optional_prv_dat(
-       IN  DAPL_CR             *cr_ptr,
-       IN  const void          *event_data,
-       OUT   DAPL_CR           **cr_pp)
-{
-    return DAT_SUCCESS;
-}
-
-
-/* map Work Completions to DAPL WR operations */
-STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p)
-{
-#ifdef DAPL_COUNTERS
-       DAPL_COOKIE *cookie = (DAPL_COOKIE *)(uintptr_t)cqe_p->wr_id;
-#endif /* DAPL_COUNTERS */
-
-       switch (cqe_p->opcode) {
-       case IBV_WC_SEND:
-               if (CQE_WR_TYPE_UD(cqe_p->wr_id))
-                       return(OP_SEND_UD);
-               else
-                       return (OP_SEND);
-       case IBV_WC_RDMA_WRITE:
-               if (cqe_p->wc_flags & IBV_WC_WITH_IMM)
-                       return (OP_RDMA_WRITE_IMM);
-               else
-                       return (OP_RDMA_WRITE);
-       case IBV_WC_RDMA_READ:
-               return (OP_RDMA_READ);
-       case IBV_WC_COMP_SWAP:
-               return (OP_COMP_AND_SWAP);
-       case IBV_WC_FETCH_ADD:
-               return (OP_FETCH_AND_ADD);
-       case IBV_WC_BIND_MW:
-               return (OP_BIND_MW);
-       case IBV_WC_RECV:
-               if (CQE_WR_TYPE_UD(cqe_p->wr_id)) {
-                       DAPL_CNTR(cookie->ep, DCNT_EP_RECV_UD);
-                       DAPL_CNTR_DATA(cookie->ep, DCNT_EP_RECV_UD_DATA, 
-                                      cqe_p->byte_len);
-                       return (OP_RECV_UD);
-               }
-               else if (cqe_p->wc_flags & IBV_WC_WITH_IMM) {
-                       DAPL_CNTR(cookie->ep, DCNT_EP_RECV_IMM);
-                       DAPL_CNTR_DATA(cookie->ep, DCNT_EP_RECV_IMM_DATA, 
-                                      cqe_p->byte_len);
-                       return (OP_RECEIVE_IMM);
-               } else {
-                       DAPL_CNTR(cookie->ep, DCNT_EP_RECV);
-                       DAPL_CNTR_DATA(cookie->ep, DCNT_EP_RECV_DATA, 
-                                      cqe_p->byte_len);
-                       return (OP_RECEIVE);
-               }
-       case IBV_WC_RECV_RDMA_WITH_IMM:
-               DAPL_CNTR(cookie->ep, DCNT_EP_RECV_RDMA_IMM);
-               DAPL_CNTR_DATA(cookie->ep, DCNT_EP_RECV_RDMA_IMM_DATA, 
-                              cqe_p->byte_len);
-               return (OP_RECEIVE_IMM);
-       default:
-               return (OP_INVALID);
-       }
-}
-
-#define DAPL_GET_CQE_OPTYPE(cqe_p) dapls_cqe_opcode(cqe_p)
-#define DAPL_GET_CQE_WRID(cqe_p) ((ib_work_completion_t*)cqe_p)->wr_id
-#define DAPL_GET_CQE_STATUS(cqe_p) ((ib_work_completion_t*)cqe_p)->status
-#define DAPL_GET_CQE_VENDOR_ERR(cqe_p) ((ib_work_completion_t*)cqe_p)->vendor_err
-#define DAPL_GET_CQE_BYTESNUM(cqe_p) ((ib_work_completion_t*)cqe_p)->byte_len
-#define DAPL_GET_CQE_IMMED_DATA(cqe_p) ((ib_work_completion_t*)cqe_p)->imm_data
-
-STATIC _INLINE_ char * dapls_dto_op_str(int op)
-{
-    static char *optable[] =
-    {
-        "OP_RDMA_WRITE",
-        "OP_RDMA_WRITE_IMM",
-        "OP_SEND",
-        "OP_SEND_IMM",
-        "OP_RDMA_READ",
-        "OP_COMP_AND_SWAP",
-        "OP_FETCH_AND_ADD",
-        "OP_RECEIVE",
-        "OP_RECEIVE_MSG_IMM",
-       "OP_RECEIVE_RDMA_IMM",
-        "OP_BIND_MW"
-       "OP_SEND_UD"
-       "OP_RECV_UD"
-    };
-    return ((op < 0 || op > 12) ? "Invalid CQE OP?" : optable[op]);
-}
-
-static _INLINE_ char *
-dapls_cqe_op_str(IN ib_work_completion_t *cqe_ptr)
-{
-    return dapls_dto_op_str(DAPL_GET_CQE_OPTYPE(cqe_ptr));
-}
-
-#define DAPL_GET_CQE_OP_STR(cqe) dapls_cqe_op_str(cqe)
-
-#endif /*  _DAPL_IB_DTO_H_ */
diff --git a/dapl/openib_scm/dapl_ib_extensions.c b/dapl/openib_scm/dapl_ib_extensions.c
deleted file mode 100755 (executable)
index 98a07ec..0000000
+++ /dev/null
@@ -1,371 +0,0 @@
-/*
- * Copyright (c) 2007 Intel Corporation.  All rights reserved.
- *
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- *    copy of which is available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/**********************************************************************
- * 
- * MODULE: dapl_ib_extensions.c
- *
- * PURPOSE:  Extensions routines for OpenIB uCMA provider
- *
- * $Id: $
- *
- **********************************************************************/
-
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-#include "dapl_evd_util.h"
-#include "dapl_ib_util.h"
-#include "dapl_ep_util.h"
-#include "dapl_cookie.h"
-#include <stdarg.h>
-
-DAT_RETURN
-dapli_post_ext(IN DAT_EP_HANDLE ep_handle,
-              IN DAT_UINT64 cmp_add,
-              IN DAT_UINT64 swap,
-              IN DAT_UINT32 immed_data,
-              IN DAT_COUNT segments,
-              IN DAT_LMR_TRIPLET * local_iov,
-              IN DAT_DTO_COOKIE user_cookie,
-              IN const DAT_RMR_TRIPLET * remote_iov,
-              IN int op_type,
-              IN DAT_COMPLETION_FLAGS flags, IN DAT_IB_ADDR_HANDLE * ah);
-
-/*
- * dapl_extensions
- *
- * Process extension requests
- *
- * Input:
- *     ext_type,
- *     ...
- *
- * Output:
- *     Depends....
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_NOT_IMPLEMENTED
- *      .....
- *
- */
-DAT_RETURN
-dapl_extensions(IN DAT_HANDLE dat_handle,
-               IN DAT_EXTENDED_OP ext_op, IN va_list args)
-{
-       DAT_EP_HANDLE ep;
-       DAT_IB_ADDR_HANDLE *ah = NULL;
-       DAT_LMR_TRIPLET *lmr_p;
-       DAT_DTO_COOKIE cookie;
-       const DAT_RMR_TRIPLET *rmr_p;
-       DAT_UINT64 dat_uint64a, dat_uint64b;
-       DAT_UINT32 dat_uint32;
-       DAT_COUNT segments = 1;
-       DAT_COMPLETION_FLAGS comp_flags;
-       DAT_RETURN status = DAT_NOT_IMPLEMENTED;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_API,
-                    "dapl_extensions(hdl %p operation %d, ...)\n",
-                    dat_handle, ext_op);
-
-       switch ((int)ext_op) {
-
-       case DAT_IB_RDMA_WRITE_IMMED_OP:
-               dapl_dbg_log(DAPL_DBG_TYPE_RTN,
-                            " WRITE_IMMED_DATA extension call\n");
-
-               ep = dat_handle;        /* ep_handle */
-               segments = va_arg(args, DAT_COUNT);     /* num segments */
-               lmr_p = va_arg(args, DAT_LMR_TRIPLET *);
-               cookie = va_arg(args, DAT_DTO_COOKIE);
-               rmr_p = va_arg(args, const DAT_RMR_TRIPLET *);
-               dat_uint32 = va_arg(args, DAT_UINT32);  /* immed data */
-               comp_flags = va_arg(args, DAT_COMPLETION_FLAGS);
-
-               status = dapli_post_ext(ep, 0, 0, dat_uint32, segments, lmr_p,
-                                       cookie, rmr_p, OP_RDMA_WRITE_IMM,
-                                       comp_flags, ah);
-               break;
-
-       case DAT_IB_CMP_AND_SWAP_OP:
-               dapl_dbg_log(DAPL_DBG_TYPE_RTN,
-                            " CMP_AND_SWAP extension call\n");
-
-               ep = dat_handle;        /* ep_handle */
-               dat_uint64a = va_arg(args, DAT_UINT64); /* cmp_value */
-               dat_uint64b = va_arg(args, DAT_UINT64); /* swap_value */
-               lmr_p = va_arg(args, DAT_LMR_TRIPLET *);
-               cookie = va_arg(args, DAT_DTO_COOKIE);
-               rmr_p = va_arg(args, const DAT_RMR_TRIPLET *);
-               comp_flags = va_arg(args, DAT_COMPLETION_FLAGS);
-
-               status = dapli_post_ext(ep, dat_uint64a, dat_uint64b,
-                                       0, segments, lmr_p, cookie, rmr_p,
-                                       OP_COMP_AND_SWAP, comp_flags, ah);
-               break;
-
-       case DAT_IB_FETCH_AND_ADD_OP:
-               dapl_dbg_log(DAPL_DBG_TYPE_RTN,
-                            " FETCH_AND_ADD extension call\n");
-
-               ep = dat_handle;        /* ep_handle */
-               dat_uint64a = va_arg(args, DAT_UINT64); /* add value */
-               lmr_p = va_arg(args, DAT_LMR_TRIPLET *);
-               cookie = va_arg(args, DAT_DTO_COOKIE);
-               rmr_p = va_arg(args, const DAT_RMR_TRIPLET *);
-               comp_flags = va_arg(args, DAT_COMPLETION_FLAGS);
-
-               status = dapli_post_ext(ep, dat_uint64a, 0, 0, segments,
-                                       lmr_p, cookie, rmr_p,
-                                       OP_FETCH_AND_ADD, comp_flags, ah);
-               break;
-
-       case DAT_IB_UD_SEND_OP:
-               dapl_dbg_log(DAPL_DBG_TYPE_RTN,
-                            " UD post_send extension call\n");
-
-               ep = dat_handle;        /* ep_handle */
-               segments = va_arg(args, DAT_COUNT);     /* segments */
-               lmr_p = va_arg(args, DAT_LMR_TRIPLET *);
-               ah = va_arg(args, DAT_IB_ADDR_HANDLE *);
-               cookie = va_arg(args, DAT_DTO_COOKIE);
-               comp_flags = va_arg(args, DAT_COMPLETION_FLAGS);
-
-               status = dapli_post_ext(ep, 0, 0, 0, segments,
-                                       lmr_p, cookie, NULL,
-                                       OP_SEND_UD, comp_flags, ah);
-               break;
-
-#ifdef DAPL_COUNTERS
-       case DAT_QUERY_COUNTERS_OP:
-               {
-                       int cntr, reset;
-                       DAT_UINT64 *p_cntr_out;
-
-                       dapl_dbg_log(DAPL_DBG_TYPE_RTN,
-                                    " Query counter extension call\n");
-
-                       cntr = va_arg(args, int);
-                       p_cntr_out = va_arg(args, DAT_UINT64 *);
-                       reset = va_arg(args, int);
-
-                       status = dapl_query_counter(dat_handle, cntr,
-                                                   p_cntr_out, reset);
-                       break;
-               }
-       case DAT_PRINT_COUNTERS_OP:
-               {
-                       int cntr, reset;
-
-                       dapl_dbg_log(DAPL_DBG_TYPE_RTN,
-                                    " Print counter extension call\n");
-
-                       cntr = va_arg(args, int);
-                       reset = va_arg(args, int);
-
-                       dapl_print_counter(dat_handle, cntr, reset);
-                       status = DAT_SUCCESS;
-                       break;
-               }
-#endif                         /* DAPL_COUNTERS */
-
-       default:
-               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                            "unsupported extension(%d)\n", (int)ext_op);
-       }
-
-       return (status);
-}
-
-DAT_RETURN
-dapli_post_ext(IN DAT_EP_HANDLE ep_handle,
-              IN DAT_UINT64 cmp_add,
-              IN DAT_UINT64 swap,
-              IN DAT_UINT32 immed_data,
-              IN DAT_COUNT segments,
-              IN DAT_LMR_TRIPLET * local_iov,
-              IN DAT_DTO_COOKIE user_cookie,
-              IN const DAT_RMR_TRIPLET * remote_iov,
-              IN int op_type,
-              IN DAT_COMPLETION_FLAGS flags, IN DAT_IB_ADDR_HANDLE * ah)
-{
-       DAPL_EP *ep_ptr;
-       ib_qp_handle_t qp_ptr;
-       DAPL_COOKIE *cookie = NULL;
-       DAT_RETURN dat_status = DAT_SUCCESS;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_API,
-                    " post_ext_op: ep %p cmp_val %d "
-                    "swap_val %d cookie 0x%x, r_iov %p, flags 0x%x, ah %p\n",
-                    ep_handle, (unsigned)cmp_add, (unsigned)swap,
-                    (unsigned)user_cookie.as_64, remote_iov, flags, ah);
-
-       if (DAPL_BAD_HANDLE(ep_handle, DAPL_MAGIC_EP))
-               return (DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP));
-
-       ep_ptr = (DAPL_EP *) ep_handle;
-       qp_ptr = ep_ptr->qp_handle;
-
-       /*
-        * Synchronization ok since this buffer is only used for send
-        * requests, which aren't allowed to race with each other.
-        */
-       dat_status = dapls_dto_cookie_alloc(&ep_ptr->req_buffer,
-                                           DAPL_DTO_TYPE_EXTENSION,
-                                           user_cookie, &cookie);
-       if (dat_status != DAT_SUCCESS)
-               goto bail;
-
-       /*
-        * Take reference before posting to avoid race conditions with
-        * completions
-        */
-       dapl_os_atomic_inc(&ep_ptr->req_count);
-
-       /*
-        * Invoke provider specific routine to post DTO
-        */
-       dat_status = dapls_ib_post_ext_send(ep_ptr, op_type, cookie, segments,  /* data segments */
-                                           local_iov, remote_iov, immed_data,  /* immed data */
-                                           cmp_add,    /* compare or add */
-                                           swap,       /* swap */
-                                           flags, ah);
-
-       if (dat_status != DAT_SUCCESS) {
-               dapl_os_atomic_dec(&ep_ptr->req_count);
-               dapls_cookie_dealloc(&ep_ptr->req_buffer, cookie);
-       }
-
-      bail:
-       return dat_status;
-
-}
-
-/* 
- * New provider routine to process extended DTO events 
- */
-void
-dapls_cqe_to_event_extension(IN DAPL_EP * ep_ptr,
-                            IN DAPL_COOKIE * cookie,
-                            IN ib_work_completion_t * cqe_ptr,
-                            IN DAT_EVENT * event_ptr)
-{
-       uint32_t ibtype;
-       DAT_DTO_COMPLETION_EVENT_DATA *dto =
-           &event_ptr->event_data.dto_completion_event_data;
-       DAT_IB_EXTENSION_EVENT_DATA *ext_data = (DAT_IB_EXTENSION_EVENT_DATA *)
-           & event_ptr->event_extension_data[0];
-       DAT_DTO_COMPLETION_STATUS dto_status;
-
-       /* Get status from cqe */
-       dto_status = dapls_ib_get_dto_status(cqe_ptr);
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EVD,
-                    " cqe_to_event_ext: dto_ptr %p ext_ptr %p status %d\n",
-                    dto, ext_data, dto_status);
-
-       event_ptr->event_number = DAT_IB_DTO_EVENT;
-       dto->ep_handle = cookie->ep;
-       dto->user_cookie = cookie->val.dto.cookie;
-       dto->operation = DAPL_GET_CQE_DTOS_OPTYPE(cqe_ptr);     /* new for 2.0 */
-       dto->status = ext_data->status = dto_status;
-
-       if (dto_status != DAT_DTO_SUCCESS)
-               return;
-
-       /* 
-        * Get operation type from CQ work completion entry and
-        * if extented operation then set extended event data
-        */
-       ibtype = DAPL_GET_CQE_OPTYPE(cqe_ptr);
-
-       switch (ibtype) {
-
-       case OP_RDMA_WRITE_IMM:
-               dapl_dbg_log(DAPL_DBG_TYPE_EVD,
-                            " cqe_to_event_ext: OP_RDMA_WRITE_IMMED\n");
-
-               /* type and outbound rdma write transfer size */
-               dto->transfered_length = cookie->val.dto.size;
-               ext_data->type = DAT_IB_RDMA_WRITE_IMMED;
-               break;
-       case OP_RECEIVE_IMM:
-               dapl_dbg_log(DAPL_DBG_TYPE_EVD,
-                            " cqe_to_event_ext: OP_RECEIVE_RDMA_IMMED\n");
-
-               /* immed recvd, type and inbound rdma write transfer size */
-               dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
-               ext_data->type = DAT_IB_RDMA_WRITE_IMMED_DATA;
-               ext_data->val.immed.data = DAPL_GET_CQE_IMMED_DATA(cqe_ptr);
-               break;
-       case OP_RECEIVE_MSG_IMM:
-               dapl_dbg_log(DAPL_DBG_TYPE_EVD,
-                            " cqe_to_event_ext: OP_RECEIVE_MSG_IMMED\n");
-
-               /* immed recvd, type and inbound recv message transfer size */
-               dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
-               ext_data->type = DAT_IB_RECV_IMMED_DATA;
-               ext_data->val.immed.data = DAPL_GET_CQE_IMMED_DATA(cqe_ptr);
-               break;
-       case OP_COMP_AND_SWAP:
-               dapl_dbg_log(DAPL_DBG_TYPE_EVD,
-                            " cqe_to_event_ext: COMP_AND_SWAP_RESP\n");
-
-               /* original data is returned in LMR provided with post */
-               ext_data->type = DAT_IB_CMP_AND_SWAP;
-               dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
-               break;
-       case OP_FETCH_AND_ADD:
-               dapl_dbg_log(DAPL_DBG_TYPE_EVD,
-                            " cqe_to_event_ext: FETCH_AND_ADD_RESP\n");
-
-               /* original data is returned in LMR provided with post */
-               ext_data->type = DAT_IB_FETCH_AND_ADD;
-               dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
-               break;
-       case OP_SEND_UD:
-               dapl_dbg_log(DAPL_DBG_TYPE_EVD, " cqe_to_event_ext: UD_SEND\n");
-
-               /* type and outbound send transfer size */
-               ext_data->type = DAT_IB_UD_SEND;
-               dto->transfered_length = cookie->val.dto.size;
-               break;
-       case OP_RECV_UD:
-               dapl_dbg_log(DAPL_DBG_TYPE_EVD, " cqe_to_event_ext: UD_RECV\n");
-
-               /* type and inbound recv message transfer size */
-               ext_data->type = DAT_IB_UD_RECV;
-               dto->transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
-               break;
-
-       default:
-               /* not extended operation */
-               ext_data->status = DAT_IB_OP_ERR;
-               dto->status = DAT_DTO_ERR_TRANSPORT;
-               break;
-       }
-}
diff --git a/dapl/openib_scm/dapl_ib_mem.c b/dapl/openib_scm/dapl_ib_mem.c
deleted file mode 100644 (file)
index e45a2b3..0000000
+++ /dev/null
@@ -1,382 +0,0 @@
-       /*
-        * Copyright (c) 2005-2007 Intel Corporation.  All rights reserved.
-        *
-        * This Software is licensed under one of the following licenses:
-        *
-        * 1) under the terms of the "Common Public License 1.0" a copy of which is
-        *    available from the Open Source Initiative, see
-        *    http://www.opensource.org/licenses/cpl.php.
-        *
-        * 2) under the terms of the "The BSD License" a copy of which is
-        *    available from the Open Source Initiative, see
-        *    http://www.opensource.org/licenses/bsd-license.php.
-        *
-        * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
-        *    copy of which is available from the Open Source Initiative, see
-        *    http://www.opensource.org/licenses/gpl-license.php.
-        *
-        * Licensee has the right to choose one of the above licenses.
-        *
-        * Redistributions of source code must retain the above copyright
-        * notice and one of the license notices.
-        *
-        * Redistributions in binary form must reproduce both the above copyright
-        * notice, one of the license notices in the documentation
-        * and/or other materials provided with the distribution.
-        */
-
-/**********************************************************************
- * 
- * MODULE: dapl_ib_mem.c
- *
- * PURPOSE: Memory windows, registration, and protection domain 
- *
- * $Id:$
- *
- **********************************************************************/
-
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-#include "dapl_lmr_util.h"
-
-/*
- * dapls_convert_privileges
- *
- * Convert LMR privileges to provider  
- *
- * Input:
- *     DAT_MEM_PRIV_FLAGS
- *
- * Output:
- *     none
- *
- * Returns:
- *     ibv_access_flags
- *
- */
-STATIC _INLINE_ int dapls_convert_privileges(IN DAT_MEM_PRIV_FLAGS privileges)
-{
-       int access = 0;
-
-       /*
-        * if (DAT_MEM_PRIV_LOCAL_READ_FLAG & privileges) do nothing
-        */
-       if (DAT_MEM_PRIV_LOCAL_WRITE_FLAG & privileges)
-               access |= IBV_ACCESS_LOCAL_WRITE;
-       if (DAT_MEM_PRIV_REMOTE_WRITE_FLAG & privileges)
-               access |= IBV_ACCESS_REMOTE_WRITE;
-       if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
-               access |= IBV_ACCESS_REMOTE_READ;
-       if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
-               access |= IBV_ACCESS_REMOTE_READ;
-       if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
-               access |= IBV_ACCESS_REMOTE_READ;
-#ifdef DAT_EXTENSIONS
-       if (DAT_IB_MEM_PRIV_REMOTE_ATOMIC & privileges)
-               access |= IBV_ACCESS_REMOTE_ATOMIC;
-#endif
-
-       return access;
-}
-
-/*
- * dapl_ib_pd_alloc
- *
- * Alloc a PD
- *
- * Input:
- *     ia_handle       IA handle
- *     pz              pointer to PZ struct
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN dapls_ib_pd_alloc(IN DAPL_IA * ia_ptr, IN DAPL_PZ * pz)
-{
-       /* get a protection domain */
-       pz->pd_handle = ibv_alloc_pd(ia_ptr->hca_ptr->ib_hca_handle);
-       if (!pz->pd_handle)
-               return (dapl_convert_errno(ENOMEM, "alloc_pd"));
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " pd_alloc: pd_handle=%p\n", pz->pd_handle);
-
-       return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_pd_free
- *
- * Free a PD
- *
- * Input:
- *     ia_handle       IA handle
- *     PZ_ptr          pointer to PZ struct
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *      DAT_INVALID_STATE
- *
- */
-DAT_RETURN dapls_ib_pd_free(IN DAPL_PZ * pz)
-{
-       if (pz->pd_handle != IB_INVALID_HANDLE) {
-               if (ibv_dealloc_pd(pz->pd_handle))
-                       return (dapl_convert_errno(errno, "ibv_dealloc_pd"));
-               pz->pd_handle = IB_INVALID_HANDLE;
-       }
-       return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_mr_register
- *
- * Register a virtual memory region
- *
- * Input:
- *     ia_handle       IA handle
- *     lmr             pointer to dapl_lmr struct
- *     virt_addr       virtual address of beginning of mem region
- *     length          length of memory region
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN
-dapls_ib_mr_register(IN DAPL_IA * ia_ptr,
-                    IN DAPL_LMR * lmr,
-                    IN DAT_PVOID virt_addr,
-                    IN DAT_VLEN length,
-                    IN DAT_MEM_PRIV_FLAGS privileges, IN DAT_VA_TYPE va_type)
-{
-       ib_pd_handle_t ib_pd_handle;
-       struct ibv_device *ibv_dev = ia_ptr->hca_ptr->ib_hca_handle->device;
-
-       ib_pd_handle = ((DAPL_PZ *) lmr->param.pz_handle)->pd_handle;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " mr_register: ia=%p, lmr=%p va=%p ln=%d pv=0x%x\n",
-                    ia_ptr, lmr, virt_addr, length, privileges);
-
-       /* TODO: shared memory */
-       if (lmr->param.mem_type == DAT_MEM_TYPE_SHARED_VIRTUAL) {
-               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                            " mr_register_shared: NOT IMPLEMENTED\n");
-               return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
-       }
-
-       /* iWARP only support */
-       if ((va_type == DAT_VA_TYPE_ZB) &&
-           (ibv_dev->transport_type != IBV_TRANSPORT_IWARP)) {
-               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                            " va_type == DAT_VA_TYPE_ZB: NOT SUPPORTED\n");
-               return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
-       }
-
-       /* local read is default on IB */
-       lmr->mr_handle =
-           ibv_reg_mr(((DAPL_PZ *) lmr->param.pz_handle)->pd_handle,
-                      virt_addr, length, dapls_convert_privileges(privileges));
-
-       if (!lmr->mr_handle)
-               return (dapl_convert_errno(ENOMEM, "reg_mr"));
-
-       lmr->param.lmr_context = lmr->mr_handle->lkey;
-       lmr->param.rmr_context = lmr->mr_handle->rkey;
-       lmr->param.registered_size = length;
-       lmr->param.registered_address = (DAT_VADDR) (uintptr_t) virt_addr;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " mr_register: mr=%p addr=%p pd %p ctx %p "
-                    "lkey=0x%x rkey=0x%x priv=%x\n",
-                    lmr->mr_handle, lmr->mr_handle->addr,
-                    lmr->mr_handle->pd, lmr->mr_handle->context,
-                    lmr->mr_handle->lkey, lmr->mr_handle->rkey,
-                    length, dapls_convert_privileges(privileges));
-
-       return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_mr_deregister
- *
- * Free a memory region
- *
- * Input:
- *     lmr                     pointer to dapl_lmr struct
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INVALID_STATE
- *
- */
-DAT_RETURN dapls_ib_mr_deregister(IN DAPL_LMR * lmr)
-{
-       if (lmr->mr_handle != IB_INVALID_HANDLE) {
-               if (ibv_dereg_mr(lmr->mr_handle))
-                       return (dapl_convert_errno(errno, "dereg_pd"));
-               lmr->mr_handle = IB_INVALID_HANDLE;
-       }
-       return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_mr_register_shared
- *
- * Register a virtual memory region
- *
- * Input:
- *     ia_ptr          IA handle
- *     lmr             pointer to dapl_lmr struct
- *     privileges      
- *     va_type         
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN
-dapls_ib_mr_register_shared(IN DAPL_IA * ia_ptr,
-                           IN DAPL_LMR * lmr,
-                           IN DAT_MEM_PRIV_FLAGS privileges,
-                           IN DAT_VA_TYPE va_type)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                    " mr_register_shared: NOT IMPLEMENTED\n");
-
-       return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
-}
-
-/*
- * dapls_ib_mw_alloc
- *
- * Bind a protection domain to a memory window
- *
- * Input:
- *     rmr     Initialized rmr to hold binding handles
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN dapls_ib_mw_alloc(IN DAPL_RMR * rmr)
-{
-
-       dapl_dbg_log(DAPL_DBG_TYPE_ERR, " mw_alloc: NOT IMPLEMENTED\n");
-
-       return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
-}
-
-/*
- * dapls_ib_mw_free
- *
- * Release bindings of a protection domain to a memory window
- *
- * Input:
- *     rmr     Initialized rmr to hold binding handles
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INVALID_STATE
- *
- */
-DAT_RETURN dapls_ib_mw_free(IN DAPL_RMR * rmr)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_ERR, " mw_free: NOT IMPLEMENTED\n");
-
-       return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
-}
-
-/*
- * dapls_ib_mw_bind
- *
- * Bind a protection domain to a memory window
- *
- * Input:
- *     rmr     Initialized rmr to hold binding handles
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INVALID_PARAMETER;
- *     DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN
-dapls_ib_mw_bind(IN DAPL_RMR * rmr,
-                IN DAPL_LMR * lmr,
-                IN DAPL_EP * ep,
-                IN DAPL_COOKIE * cookie,
-                IN DAT_VADDR virtual_address,
-                IN DAT_VLEN length,
-                IN DAT_MEM_PRIV_FLAGS mem_priv, IN DAT_BOOLEAN is_signaled)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_ERR, " mw_bind: NOT IMPLEMENTED\n");
-
-       return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
-}
-
-/*
- * dapls_ib_mw_unbind
- *
- * Unbind a protection domain from a memory window
- *
- * Input:
- *     rmr     Initialized rmr to hold binding handles
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INVALID_PARAMETER;
- *     DAT_INVALID_STATE;
- *     DAT_INSUFFICIENT_RESOURCES
- *
- */
-DAT_RETURN
-dapls_ib_mw_unbind(IN DAPL_RMR * rmr,
-                  IN DAPL_EP * ep,
-                  IN DAPL_COOKIE * cookie, IN DAT_BOOLEAN is_signaled)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_ERR, " mw_unbind: NOT IMPLEMENTED\n");
-
-       return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
-}
-
-/*
- * Local variables:
- *  c-indent-level: 4
- *  c-basic-offset: 4
- *  tab-width: 8
- * End:
- */
diff --git a/dapl/openib_scm/dapl_ib_qp.c b/dapl/openib_scm/dapl_ib_qp.c
deleted file mode 100644 (file)
index f943ff8..0000000
+++ /dev/null
@@ -1,513 +0,0 @@
-/*
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- *    copy of which is available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/**********************************************************************
- *
- * MODULE: dapl_ib_qp.c
- *
- * PURPOSE: QP routines for access to ofa rdma verbs 
- *
- * $Id: $
- **********************************************************************/
-
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-
-/*
- * dapl_ib_qp_alloc
- *
- * Alloc a QP
- *
- * Input:
- *     *ep_ptr         pointer to EP INFO
- *     ib_hca_handle   provider HCA handle
- *     ib_pd_handle    provider protection domain handle
- *     cq_recv         provider recv CQ handle
- *     cq_send         provider send CQ handle
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INSUFFICIENT_RESOURCES
- *     DAT_INTERNAL_ERROR
- *
- */
-DAT_RETURN
-dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr,
-                 IN DAPL_EP * ep_ptr, IN DAPL_EP * ep_ctx_ptr)
-{
-       DAT_EP_ATTR *attr;
-       DAPL_EVD *rcv_evd, *req_evd;
-       ib_cq_handle_t rcv_cq, req_cq;
-       ib_pd_handle_t ib_pd_handle;
-       struct ibv_qp_init_attr qp_create;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    " qp_alloc: ia_ptr %p ep_ptr %p ep_ctx_ptr %p\n",
-                    ia_ptr, ep_ptr, ep_ctx_ptr);
-
-       attr = &ep_ptr->param.ep_attr;
-       ib_pd_handle = ((DAPL_PZ *) ep_ptr->param.pz_handle)->pd_handle;
-       rcv_evd = (DAPL_EVD *) ep_ptr->param.recv_evd_handle;
-       req_evd = (DAPL_EVD *) ep_ptr->param.request_evd_handle;
-
-       /* 
-        * DAT allows usage model of EP's with no EVD's but IB does not. 
-        * Create a CQ with zero entries under the covers to support and 
-        * catch any invalid posting. 
-        */
-       if (rcv_evd != DAT_HANDLE_NULL)
-               rcv_cq = rcv_evd->ib_cq_handle;
-       else if (!ia_ptr->hca_ptr->ib_trans.ib_cq_empty)
-               rcv_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;
-       else {
-               struct ibv_comp_channel *channel =
-                   ia_ptr->hca_ptr->ib_trans.ib_cq;
-#ifdef CQ_WAIT_OBJECT
-               if (rcv_evd->cq_wait_obj_handle)
-                       channel = rcv_evd->cq_wait_obj_handle;
-#endif
-               /* Call IB verbs to create CQ */
-               rcv_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
-                                      0, NULL, channel, 0);
-
-               if (rcv_cq == IB_INVALID_HANDLE)
-                       return (dapl_convert_errno(ENOMEM, "create_cq"));
-
-               ia_ptr->hca_ptr->ib_trans.ib_cq_empty = rcv_cq;
-       }
-       if (req_evd != DAT_HANDLE_NULL)
-               req_cq = req_evd->ib_cq_handle;
-       else
-               req_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;
-
-       /* Setup attributes and create qp */
-       dapl_os_memzero((void *)&qp_create, sizeof(qp_create));
-       qp_create.send_cq = req_cq;
-       qp_create.cap.max_send_wr = attr->max_request_dtos;
-       qp_create.cap.max_send_sge = attr->max_request_iov;
-       qp_create.cap.max_inline_data =
-           ia_ptr->hca_ptr->ib_trans.max_inline_send;
-       qp_create.qp_type = IBV_QPT_RC;
-
-#ifdef DAT_EXTENSIONS
-       if (attr->service_type == DAT_IB_SERVICE_TYPE_UD) {
-               qp_create.qp_type = IBV_QPT_UD;
-               if (attr->max_message_size >
-                   (128 << ia_ptr->hca_ptr->ib_trans.mtu)) {
-                       return (DAT_INVALID_PARAMETER | DAT_INVALID_ARG6);
-               }
-       }
-#endif
-       qp_create.qp_context = (void *)ep_ptr;
-
-       /* ibv assumes rcv_cq is never NULL, set to req_cq */
-       if (rcv_cq == NULL) {
-               qp_create.recv_cq = req_cq;
-               qp_create.cap.max_recv_wr = 0;
-               qp_create.cap.max_recv_sge = 0;
-       } else {
-               qp_create.recv_cq = rcv_cq;
-               qp_create.cap.max_recv_wr = attr->max_recv_dtos;
-               qp_create.cap.max_recv_sge = attr->max_recv_iov;
-       }
-
-       ep_ptr->qp_handle = ibv_create_qp(ib_pd_handle, &qp_create);
-       if (!ep_ptr->qp_handle)
-               return (dapl_convert_errno(ENOMEM, "create_qp"));
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    " qp_alloc: qpn %p sq %d,%d rq %d,%d\n",
-                    ep_ptr->qp_handle->qp_num,
-                    qp_create.cap.max_send_wr, qp_create.cap.max_send_sge,
-                    qp_create.cap.max_recv_wr, qp_create.cap.max_recv_sge);
-
-       /* Setup QP attributes for INIT state on the way out */
-       if (dapls_modify_qp_state(ep_ptr->qp_handle,
-                                 IBV_QPS_INIT, NULL) != DAT_SUCCESS) {
-               ibv_destroy_qp(ep_ptr->qp_handle);
-               ep_ptr->qp_handle = IB_INVALID_HANDLE;
-               return DAT_INTERNAL_ERROR;
-       }
-
-       return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_qp_free
- *
- * Free a QP
- *
- * Input:
- *     ia_handle       IA handle
- *     *ep_ptr         pointer to EP INFO
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *  dapl_convert_errno
- *
- */
-DAT_RETURN dapls_ib_qp_free(IN DAPL_IA * ia_ptr, IN DAPL_EP * ep_ptr)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_EP, " qp_free:  ep_ptr %p qp %p\n",
-                    ep_ptr, ep_ptr->qp_handle);
-
-       if (ep_ptr->qp_handle != IB_INVALID_HANDLE) {
-               /* force error state to flush queue, then destroy */
-               dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, NULL);
-
-               if (ibv_destroy_qp(ep_ptr->qp_handle))
-                       return (dapl_convert_errno(errno, "destroy_qp"));
-
-               ep_ptr->qp_handle = IB_INVALID_HANDLE;
-       }
-
-#ifdef DAT_EXTENSIONS
-{
-       dp_ib_cm_handle_t cr, next_cr;
-
-       /* 
-        * UD CR objects are kept active because of direct private data references
-        * from CONN events. The cr->socket is closed and marked inactive but the 
-        * object remains allocated and queued on the CR resource list. There can
-        * be multiple CR's associated with a given EP. There is no way to determine 
-        * when consumer is finished with event until the dat_ep_free.
-        *
-        * Schedule destruction for all CR's associated with this EP, cr_thread will
-        * complete the cleanup with state == SCM_DESTROY. 
-        */ 
-       dapl_os_lock(&ia_ptr->hca_ptr->ib_trans.lock);
-       if (!dapl_llist_is_empty((DAPL_LLIST_HEAD*)
-                                 &ia_ptr->hca_ptr->ib_trans.list))
-            next_cr = dapl_llist_peek_head((DAPL_LLIST_HEAD*)
-                                           &ia_ptr->hca_ptr->ib_trans.list);
-       else
-           next_cr = NULL;
-
-       while (next_cr) {
-               cr = next_cr;
-               next_cr = dapl_llist_next_entry((DAPL_LLIST_HEAD*)
-                                               &ia_ptr->hca_ptr->ib_trans.list,
-                                               (DAPL_LLIST_ENTRY*)&cr->entry);
-               if (cr->ep == ep_ptr)  {
-                       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                                    " qp_free CR: ep %p cr %p\n", ep_ptr, cr);
-                       dapli_socket_disconnect(cr);
-                       dapl_os_lock(&cr->lock);
-                       cr->ep = NULL;
-                       cr->state = SCM_DESTROY;
-                       dapl_os_unlock(&cr->lock);
-               }
-       }
-       dapl_os_unlock(&ia_ptr->hca_ptr->ib_trans.lock);
-       send(ia_ptr->hca_ptr->ib_trans.scm[1], "w", sizeof "w", 0);
-}
-#endif
-
-       return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_qp_modify
- *
- * Set the QP to the parameters specified in an EP_PARAM
- *
- * The EP_PARAM structure that is provided has been
- * sanitized such that only non-zero values are valid.
- *
- * Input:
- *     ib_hca_handle           HCA handle
- *     qp_handle               QP handle
- *     ep_attr                 Sanitized EP Params
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INSUFFICIENT_RESOURCES
- *     DAT_INVALID_PARAMETER
- *
- */
-DAT_RETURN
-dapls_ib_qp_modify(IN DAPL_IA * ia_ptr,
-                  IN DAPL_EP * ep_ptr, IN DAT_EP_ATTR * attr)
-{
-       struct ibv_qp_attr qp_attr;
-
-       if (ep_ptr->qp_handle == IB_INVALID_HANDLE)
-               return DAT_INVALID_PARAMETER;
-
-       /* 
-        * EP state, qp_handle state should be an indication
-        * of current state but the only way to be sure is with
-        * a user mode ibv_query_qp call which is NOT available 
-        */
-
-       /* move to error state if necessary */
-       if ((ep_ptr->qp_state == IB_QP_STATE_ERROR) &&
-           (ep_ptr->qp_handle->state != IBV_QPS_ERR)) {
-               return (dapls_modify_qp_state(ep_ptr->qp_handle,
-                                             IBV_QPS_ERR, NULL));
-       }
-
-       /*
-        * Check if we have the right qp_state to modify attributes
-        */
-       if ((ep_ptr->qp_handle->state != IBV_QPS_RTR) &&
-           (ep_ptr->qp_handle->state != IBV_QPS_RTS))
-               return DAT_INVALID_STATE;
-
-       /* Adjust to current EP attributes */
-       dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
-       qp_attr.cap.max_send_wr = attr->max_request_dtos;
-       qp_attr.cap.max_recv_wr = attr->max_recv_dtos;
-       qp_attr.cap.max_send_sge = attr->max_request_iov;
-       qp_attr.cap.max_recv_sge = attr->max_recv_iov;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    "modify_qp: qp %p sq %d,%d, rq %d,%d\n",
-                    ep_ptr->qp_handle,
-                    qp_attr.cap.max_send_wr, qp_attr.cap.max_send_sge,
-                    qp_attr.cap.max_recv_wr, qp_attr.cap.max_recv_sge);
-
-       if (ibv_modify_qp(ep_ptr->qp_handle, &qp_attr, IBV_QP_CAP)) {
-               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                            "modify_qp: modify ep %p qp %p failed\n",
-                            ep_ptr, ep_ptr->qp_handle);
-               return (dapl_convert_errno(errno, "modify_qp_state"));
-       }
-
-       return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_reinit_ep
- *
- * Move the QP to INIT state again.
- *
- * Input:
- *     ep_ptr          DAPL_EP
- *
- * Output:
- *     none
- *
- * Returns:
- *     void
- *
- */
-#if defined(_WIN32) || defined(_WIN64)
-void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr)
-{
-       /* work around bug in low level driver - 3/24/09 */
-       /* RTS -> RESET -> INIT -> ERROR QP transition crashes system */
-       if (ep_ptr->qp_handle != IB_INVALID_HANDLE) {
-               dapls_ib_qp_free(ep_ptr->header.owner_ia, ep_ptr);
-               dapls_ib_qp_alloc(ep_ptr->header.owner_ia, ep_ptr, ep_ptr);
-       }
-}
-#else                          // _WIN32 || _WIN64
-void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr)
-{
-       if (ep_ptr->qp_handle != IB_INVALID_HANDLE &&
-           ep_ptr->qp_handle->qp_type != IBV_QPT_UD) {
-               /* move to RESET state and then to INIT */
-               dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_RESET, 0);
-               dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_INIT, 0);
-       }
-}
-#endif                         // _WIN32 || _WIN64
-
-/* 
- * Generic QP modify for init, reset, error, RTS, RTR
- * For UD, create_ah on RTR, qkey on INIT
- */
-DAT_RETURN
-dapls_modify_qp_state(IN ib_qp_handle_t qp_handle,
-                     IN ib_qp_state_t qp_state, IN struct ib_cm_handle *cm_ptr)
-{
-       struct ibv_qp_attr qp_attr;
-       enum ibv_qp_attr_mask mask = IBV_QP_STATE;
-       DAPL_EP *ep_ptr = (DAPL_EP *) qp_handle->qp_context;
-       DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;
-       ib_qp_cm_t *qp_cm = &cm_ptr->dst;
-       int ret;
-
-       dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
-       qp_attr.qp_state = qp_state;
-       switch (qp_state) {
-               /* additional attributes with RTR and RTS */
-       case IBV_QPS_RTR:
-               {
-                       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                                    " QPS_RTR: type %d state %d qpn %x lid %x"
-                                    " port %x ep %p qp_state %d\n",
-                                    qp_handle->qp_type, qp_handle->qp_type,
-                                    qp_cm->qpn, qp_cm->lid, qp_cm->port,
-                                    ep_ptr, ep_ptr->qp_state);
-
-                       mask |= IBV_QP_AV |
-                           IBV_QP_PATH_MTU |
-                           IBV_QP_DEST_QPN |
-                           IBV_QP_RQ_PSN |
-                           IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER;
-
-                       qp_attr.dest_qp_num = qp_cm->qpn;
-                       qp_attr.rq_psn = 1;
-                       qp_attr.path_mtu = ia_ptr->hca_ptr->ib_trans.mtu;
-                       qp_attr.max_dest_rd_atomic =
-                           ep_ptr->param.ep_attr.max_rdma_read_out;
-                       qp_attr.min_rnr_timer =
-                           ia_ptr->hca_ptr->ib_trans.rnr_timer;
-
-                       /* address handle. RC and UD */
-                       qp_attr.ah_attr.dlid = qp_cm->lid;
-                       if (ia_ptr->hca_ptr->ib_trans.global) {
-                               qp_attr.ah_attr.is_global = 1;
-                               qp_attr.ah_attr.grh.dgid = qp_cm->gid;
-                               qp_attr.ah_attr.grh.hop_limit =
-                                   ia_ptr->hca_ptr->ib_trans.hop_limit;
-                               qp_attr.ah_attr.grh.traffic_class =
-                                   ia_ptr->hca_ptr->ib_trans.tclass;
-                       }
-                       qp_attr.ah_attr.sl = 0;
-                       qp_attr.ah_attr.src_path_bits = 0;
-                       qp_attr.ah_attr.port_num = ia_ptr->hca_ptr->port_num;
-#ifdef DAT_EXTENSIONS
-                       /* UD: create AH for remote side */
-                       if (qp_handle->qp_type == IBV_QPT_UD) {
-                               ib_pd_handle_t pz;
-                               pz = ((DAPL_PZ *)
-                                     ep_ptr->param.pz_handle)->pd_handle;
-                               mask = IBV_QP_STATE;
-                               cm_ptr->ah = ibv_create_ah(pz,
-                                                          &qp_attr.ah_attr);
-                               if (!cm_ptr->ah)
-                                       return (dapl_convert_errno(errno,
-                                                                  "ibv_ah"));
-
-                               /* already RTR, multi remote AH's on QP */
-                               if (ep_ptr->qp_state == IBV_QPS_RTR ||
-                                   ep_ptr->qp_state == IBV_QPS_RTS)
-                                       return DAT_SUCCESS;
-                       }
-#endif
-                       break;
-               }
-       case IBV_QPS_RTS:
-               {
-                       /* RC only */
-                       if (qp_handle->qp_type == IBV_QPT_RC) {
-                               mask |= IBV_QP_SQ_PSN |
-                                   IBV_QP_TIMEOUT |
-                                   IBV_QP_RETRY_CNT |
-                                   IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC;
-                               qp_attr.timeout =
-                                   ia_ptr->hca_ptr->ib_trans.ack_timer;
-                               qp_attr.retry_cnt =
-                                   ia_ptr->hca_ptr->ib_trans.ack_retry;
-                               qp_attr.rnr_retry =
-                                   ia_ptr->hca_ptr->ib_trans.rnr_retry;
-                               qp_attr.max_rd_atomic =
-                                   ep_ptr->param.ep_attr.max_rdma_read_out;
-                       }
-                       /* RC and UD */
-                       qp_attr.qp_state = IBV_QPS_RTS;
-                       qp_attr.sq_psn = 1;
-
-                       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                                    " QPS_RTS: psn %x rd_atomic %d ack %d "
-                                    " retry %d rnr_retry %d ep %p qp_state %d\n",
-                                    qp_attr.sq_psn, qp_attr.max_rd_atomic,
-                                    qp_attr.timeout, qp_attr.retry_cnt,
-                                    qp_attr.rnr_retry, ep_ptr,
-                                    ep_ptr->qp_state);
-#ifdef DAT_EXTENSIONS
-                       if (qp_handle->qp_type == IBV_QPT_UD) {
-                               /* already RTS, multi remote AH's on QP */
-                               if (ep_ptr->qp_state == IBV_QPS_RTS)
-                                       return DAT_SUCCESS;
-                               else
-                                       mask = IBV_QP_STATE | IBV_QP_SQ_PSN;
-                       }
-#endif
-                       break;
-               }
-       case IBV_QPS_INIT:
-               {
-                       mask |= IBV_QP_PKEY_INDEX | IBV_QP_PORT;
-                       if (qp_handle->qp_type == IBV_QPT_RC) {
-                               mask |= IBV_QP_ACCESS_FLAGS;
-                               qp_attr.qp_access_flags =
-                                   IBV_ACCESS_LOCAL_WRITE |
-                                   IBV_ACCESS_REMOTE_WRITE |
-                                   IBV_ACCESS_REMOTE_READ |
-                                   IBV_ACCESS_REMOTE_ATOMIC |
-                                   IBV_ACCESS_MW_BIND;
-                       }
-#ifdef DAT_EXTENSIONS
-                       if (qp_handle->qp_type == IBV_QPT_UD) {
-                               /* already INIT, multi remote AH's on QP */
-                               if (ep_ptr->qp_state == IBV_QPS_INIT)
-                                       return DAT_SUCCESS;
-                               mask |= IBV_QP_QKEY;
-                               qp_attr.qkey = SCM_UD_QKEY;
-                       }
-#endif
-                       qp_attr.pkey_index = 0;
-                       qp_attr.port_num = ia_ptr->hca_ptr->port_num;
-
-                       dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                                    " QPS_INIT: pi %x port %x acc %x qkey 0x%x\n",
-                                    qp_attr.pkey_index, qp_attr.port_num,
-                                    qp_attr.qp_access_flags, qp_attr.qkey);
-                       break;
-               }
-       default:
-               break;
-
-       }
-
-       ret = ibv_modify_qp(qp_handle, &qp_attr, mask);
-       if (ret == 0) {
-               ep_ptr->qp_state = qp_state;
-               return DAT_SUCCESS;
-       } else {
-               return (dapl_convert_errno(errno, "modify_qp_state"));
-       }
-}
-
-/*
- * Local variables:
- *  c-indent-level: 4
- *  c-basic-offset: 4
- *  tab-width: 8
- * End:
- */
diff --git a/dapl/openib_scm/dapl_ib_util.c b/dapl/openib_scm/dapl_ib_util.c
deleted file mode 100644 (file)
index ad30f73..0000000
+++ /dev/null
@@ -1,743 +0,0 @@
-/*
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- *    available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- *    copy of which is available from the Open Source Initiative, see
- *    http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/***************************************************************************
- *
- *   Module:            uDAPL
- *
- *   Filename:          dapl_ib_util.c
- *
- *   Author:            Arlin Davis
- *
- *   Created:           3/10/2005
- *
- *   Description: 
- *
- *   The uDAPL openib provider - init, open, close, utilities
- *
- ****************************************************************************
- *                Source Control System Information
- *
- *    $Id: $
- *
- *     Copyright (c) 2005 Intel Corporation.  All rights reserved.
- *
- **************************************************************************/
-#ifdef RCSID
-static const char rcsid[] = "$Id:  $";
-#endif
-
-#include "openib_osd.h"
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-#include "dapl_ib_util.h"
-#include "dapl_osd.h"
-
-#include <stdlib.h>
-
-int g_dapl_loopback_connection = 0;
-
-enum ibv_mtu dapl_ib_mtu(int mtu)
-{
-       switch (mtu) {
-       case 256:
-               return IBV_MTU_256;
-       case 512:
-               return IBV_MTU_512;
-       case 1024:
-               return IBV_MTU_1024;
-       case 2048:
-               return IBV_MTU_2048;
-       case 4096:
-               return IBV_MTU_4096;
-       default:
-               return IBV_MTU_1024;
-       }
-}
-
-char *dapl_ib_mtu_str(enum ibv_mtu mtu)
-{
-       switch (mtu) {
-       case IBV_MTU_256:
-               return "256";
-       case IBV_MTU_512:
-               return "512";
-       case IBV_MTU_1024:
-               return "1024";
-       case IBV_MTU_2048:
-               return "2048";
-       case IBV_MTU_4096:
-               return "4096";
-       default:
-               return "1024";
-       }
-}
-
-static DAT_RETURN getlocalipaddr(DAT_SOCK_ADDR * addr, int addr_len)
-{
-       struct sockaddr_in *sin;
-       struct addrinfo *res, hint, *ai;
-       int ret;
-       char hostname[256];
-
-       if (addr_len < sizeof(*sin)) {
-               return DAT_INTERNAL_ERROR;
-       }
-
-       ret = gethostname(hostname, 256);
-       if (ret)
-               return dapl_convert_errno(ret, "gethostname");
-
-       memset(&hint, 0, sizeof hint);
-       hint.ai_flags = AI_PASSIVE;
-       hint.ai_family = AF_INET;
-       hint.ai_socktype = SOCK_STREAM;
-       hint.ai_protocol = IPPROTO_TCP;
-
-       ret = getaddrinfo(hostname, NULL, &hint, &res);
-       if (ret) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " getaddrinfo ERR: %d %s\n", ret, gai_strerror(ret));
-               return DAT_INVALID_ADDRESS;
-       }
-
-       ret = DAT_INVALID_ADDRESS;
-       for (ai = res; ai; ai = ai->ai_next) {
-               sin = (struct sockaddr_in *)ai->ai_addr;
-               if (*((uint32_t *) & sin->sin_addr) != htonl(0x7f000001)) {
-                       *((struct sockaddr_in *)addr) = *sin;
-                       ret = DAT_SUCCESS;
-                       break;
-               }
-       }
-
-       freeaddrinfo(res);
-       return ret;
-}
-
-static int32_t create_cr_pipe(IN DAPL_HCA * hca_ptr)
-{
-       DAPL_SOCKET listen_socket;
-       struct sockaddr_in addr;
-       socklen_t addrlen = sizeof(addr);
-       int ret;
-
-       listen_socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
-       if (listen_socket == DAPL_INVALID_SOCKET)
-               return 1;
-
-       memset(&addr, 0, sizeof addr);
-       addr.sin_family = AF_INET;
-       addr.sin_addr.s_addr = htonl(0x7f000001);
-       ret = bind(listen_socket, (struct sockaddr *)&addr, sizeof addr);
-       if (ret)
-               goto err1;
-
-       ret = getsockname(listen_socket, (struct sockaddr *)&addr, &addrlen);
-       if (ret)
-               goto err1;
-
-       ret = listen(listen_socket, 0);
-       if (ret)
-               goto err1;
-
-       hca_ptr->ib_trans.scm[1] = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
-       if (hca_ptr->ib_trans.scm[1] == DAPL_INVALID_SOCKET)
-               goto err1;
-
-       ret = connect(hca_ptr->ib_trans.scm[1], 
-                     (struct sockaddr *)&addr, sizeof(addr));
-       if (ret)
-               goto err2;
-
-       hca_ptr->ib_trans.scm[0] = accept(listen_socket, NULL, NULL);
-       if (hca_ptr->ib_trans.scm[0] == DAPL_INVALID_SOCKET)
-               goto err2;
-
-       closesocket(listen_socket);
-       return 0;
-
-      err2:
-       closesocket(hca_ptr->ib_trans.scm[1]);
-      err1:
-       closesocket(listen_socket);
-       return 1;
-}
-
-static void destroy_cr_pipe(IN DAPL_HCA * hca_ptr)
-{
-       closesocket(hca_ptr->ib_trans.scm[0]);
-       closesocket(hca_ptr->ib_trans.scm[1]);
-}
-
-
-/*
- * dapls_ib_init, dapls_ib_release
- *
- * Initialize Verb related items for device open
- *
- * Input:
- *     none
- *
- * Output:
- *     none
- *
- * Returns:
- *     0 success, -1 error
- *
- */
-int32_t dapls_ib_init(void)
-{
-       return 0;
-}
-
-int32_t dapls_ib_release(void)
-{
-       return 0;
-}
-
-#if defined(_WIN64) || defined(_WIN32)
-int dapls_config_comp_channel(struct ibv_comp_channel *channel)
-{
-       return 0;
-}
-#else                          // _WIN64 || WIN32
-int dapls_config_comp_channel(struct ibv_comp_channel *channel)
-{
-       int opts;
-
-       opts = fcntl(channel->fd, F_GETFL);     /* uCQ */
-       if (opts < 0 || fcntl(channel->fd, F_SETFL, opts | O_NONBLOCK) < 0) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " dapls_create_comp_channel: fcntl on ib_cq->fd %d ERR %d %s\n",
-                        channel->fd, opts, strerror(errno));
-               return errno;
-       }
-
-       return 0;
-}
-#endif
-
-/*
- * dapls_ib_open_hca
- *
- * Open HCA
- *
- * Input:
- *      *hca_name         pointer to provider device name
- *      *ib_hca_handle_p  pointer to provide HCA handle
- *
- * Output:
- *      none
- *
- * Return:
- *      DAT_SUCCESS
- *      dapl_convert_errno
- *
- */
-DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, IN DAPL_HCA * hca_ptr)
-{
-       struct ibv_device **dev_list;
-       struct ibv_port_attr port_attr;
-       int i;
-       DAT_RETURN dat_status;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " open_hca: %s - %p\n", hca_name, hca_ptr);
-
-       /* get the IP address of the device */
-       dat_status = getlocalipaddr((DAT_SOCK_ADDR *) & hca_ptr->hca_address,
-                                   sizeof(DAT_SOCK_ADDR6));
-       if (dat_status != DAT_SUCCESS)
-               return dat_status;
-
-       /* Get list of all IB devices, find match, open */
-       dev_list = ibv_get_device_list(NULL);
-       if (!dev_list) {
-               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                            " open_hca: ibv_get_device_list() failed\n",
-                            hca_name);
-               return DAT_INTERNAL_ERROR;
-       }
-
-       for (i = 0; dev_list[i]; ++i) {
-               hca_ptr->ib_trans.ib_dev = dev_list[i];
-               if (!strcmp(ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
-                           hca_name))
-                       goto found;
-       }
-
-       dapl_log(DAPL_DBG_TYPE_ERR,
-                " open_hca: device %s not found\n", hca_name);
-       goto err;
-
-      found:
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " open_hca: Found dev %s %016llx\n",
-                    ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
-                    (unsigned long long)
-                    ntohll(ibv_get_device_guid(hca_ptr->ib_trans.ib_dev)));
-
-       hca_ptr->ib_hca_handle = ibv_open_device(hca_ptr->ib_trans.ib_dev);
-       if (!hca_ptr->ib_hca_handle) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " open_hca: dev open failed for %s, err=%s\n",
-                        ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
-                        strerror(errno));
-               goto err;
-       }
-
-       /* get lid for this hca-port, network order */
-       if (ibv_query_port(hca_ptr->ib_hca_handle,
-                          (uint8_t) hca_ptr->port_num, &port_attr)) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " open_hca: get lid ERR for %s, err=%s\n",
-                        ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
-                        strerror(errno));
-               goto err;
-       } else {
-               hca_ptr->ib_trans.lid = htons(port_attr.lid);
-       }
-
-       /* get gid for this hca-port, network order */
-       if (ibv_query_gid(hca_ptr->ib_hca_handle,
-                         (uint8_t) hca_ptr->port_num,
-                         0, &hca_ptr->ib_trans.gid)) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " open_hca: query GID ERR for %s, err=%s\n",
-                        ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
-                        strerror(errno));
-               goto err;
-       }
-
-       /* set RC tunables via enviroment or default */
-       hca_ptr->ib_trans.max_inline_send =
-           dapl_os_get_env_val("DAPL_MAX_INLINE", INLINE_SEND_DEFAULT);
-       hca_ptr->ib_trans.ack_retry =
-           dapl_os_get_env_val("DAPL_ACK_RETRY", SCM_ACK_RETRY);
-       hca_ptr->ib_trans.ack_timer =
-           dapl_os_get_env_val("DAPL_ACK_TIMER", SCM_ACK_TIMER);
-       hca_ptr->ib_trans.rnr_retry =
-           dapl_os_get_env_val("DAPL_RNR_RETRY", SCM_RNR_RETRY);
-       hca_ptr->ib_trans.rnr_timer =
-           dapl_os_get_env_val("DAPL_RNR_TIMER", SCM_RNR_TIMER);
-       hca_ptr->ib_trans.global =
-           dapl_os_get_env_val("DAPL_GLOBAL_ROUTING", SCM_GLOBAL);
-       hca_ptr->ib_trans.hop_limit =
-           dapl_os_get_env_val("DAPL_HOP_LIMIT", SCM_HOP_LIMIT);
-       hca_ptr->ib_trans.tclass =
-           dapl_os_get_env_val("DAPL_TCLASS", SCM_TCLASS);
-       hca_ptr->ib_trans.mtu =
-           dapl_ib_mtu(dapl_os_get_env_val("DAPL_IB_MTU", SCM_IB_MTU));
-
-#ifndef CQ_WAIT_OBJECT
-       /* initialize cq_lock */
-       dat_status = dapl_os_lock_init(&hca_ptr->ib_trans.cq_lock);
-       if (dat_status != DAT_SUCCESS) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " open_hca: failed to init cq_lock\n");
-               goto bail;
-       }
-       /* EVD events without direct CQ channels, non-blocking */
-       hca_ptr->ib_trans.ib_cq =
-           ibv_create_comp_channel(hca_ptr->ib_hca_handle);
-       if (hca_ptr->ib_trans.ib_cq == NULL) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " open_hca: ibv_create_comp_channel ERR %s\n",
-                        strerror(errno));
-               goto bail;
-       }
-
-       if (dapls_config_comp_channel(hca_ptr->ib_trans.ib_cq)) {
-               goto bail;
-       }
-
-       if (dapli_cq_thread_init(hca_ptr)) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " open_hca: cq_thread_init failed for %s\n",
-                        ibv_get_device_name(hca_ptr->ib_trans.ib_dev));
-               goto bail;
-       }
-#endif                         /* CQ_WAIT_OBJECT */
-
-       /* initialize cr_list lock */
-       dat_status = dapl_os_lock_init(&hca_ptr->ib_trans.lock);
-       if (dat_status != DAT_SUCCESS) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " open_hca: failed to init cr_list lock\n");
-               goto bail;
-       }
-
-       /* initialize CM list for listens on this HCA */
-       dapl_llist_init_head(&hca_ptr->ib_trans.list);
-
-       /* initialize pipe, user level wakeup on select */
-       if (create_cr_pipe(hca_ptr)) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " open_hca: failed to init cr pipe - %s\n",
-                        strerror(errno));
-               goto bail;
-       }
-
-       /* create thread to process inbound connect request */
-       hca_ptr->ib_trans.cr_state = IB_THREAD_INIT;
-       dat_status = dapl_os_thread_create(cr_thread,
-                                          (void *)hca_ptr,
-                                          &hca_ptr->ib_trans.thread);
-       if (dat_status != DAT_SUCCESS) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " open_hca: failed to create thread\n");
-               goto bail;
-       }
-
-       /* wait for thread */
-       while (hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) {
-               dapl_os_sleep_usec(2000);
-       }
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " open_hca: devname %s, port %d, hostname_IP %s\n",
-                    ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
-                    hca_ptr->port_num, inet_ntoa(((struct sockaddr_in *)
-                                                  &hca_ptr->hca_address)->
-                                                 sin_addr));
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " open_hca: LID 0x%x GID Subnet 0x" F64x " ID 0x" F64x
-                    "\n", ntohs(hca_ptr->ib_trans.lid), (unsigned long long)
-                    htonll(hca_ptr->ib_trans.gid.global.subnet_prefix),
-                    (unsigned long long)htonll(hca_ptr->ib_trans.gid.global.
-                                               interface_id));
-
-       ibv_free_device_list(dev_list);
-       return dat_status;
-
-      bail:
-       ibv_close_device(hca_ptr->ib_hca_handle);
-       hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
-      err:
-       ibv_free_device_list(dev_list);
-       return DAT_INTERNAL_ERROR;
-}
-
-/*
- * dapls_ib_close_hca
- *
- * Open HCA
- *
- * Input:
- *      DAPL_HCA   provide CA handle
- *
- * Output:
- *      none
- *
- * Return:
- *      DAT_SUCCESS
- *     dapl_convert_errno 
- *
- */
-DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA * hca_ptr)
-{
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " close_hca: %p\n", hca_ptr);
-
-#ifndef CQ_WAIT_OBJECT
-       dapli_cq_thread_destroy(hca_ptr);
-       dapl_os_lock_destroy(&hca_ptr->ib_trans.cq_lock);
-#endif                         /* CQ_WAIT_OBJECT */
-
-       if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) {
-               if (ibv_close_device(hca_ptr->ib_hca_handle))
-                       return (dapl_convert_errno(errno, "ib_close_device"));
-               hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
-       }
-
-       /* destroy cr_thread and lock */
-       hca_ptr->ib_trans.cr_state = IB_THREAD_CANCEL;
-       if (send(hca_ptr->ib_trans.scm[1], "w", sizeof "w", 0) == -1)
-               dapl_log(DAPL_DBG_TYPE_UTIL,
-                        " thread_destroy: thread wakeup err = %s\n",
-                        strerror(errno));
-       while (hca_ptr->ib_trans.cr_state != IB_THREAD_EXIT) {
-               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                            " close_hca: waiting for cr_thread\n");
-               if (send(hca_ptr->ib_trans.scm[1], "w", sizeof "w", 0) == -1)
-                       dapl_log(DAPL_DBG_TYPE_UTIL,
-                                " thread_destroy: thread wakeup err = %s\n",
-                                strerror(errno));
-               dapl_os_sleep_usec(2000);
-       }
-       dapl_os_lock_destroy(&hca_ptr->ib_trans.lock);
-       destroy_cr_pipe(hca_ptr); /* no longer need pipe */
-       return (DAT_SUCCESS);
-}
-
-/*
- * dapls_ib_query_hca
- *
- * Query the hca attribute
- *
- * Input:
- *     hca_handl               hca handle      
- *     ia_attr                 attribute of the ia
- *     ep_attr                 attribute of the ep
- *     ip_addr                 ip address of DET NIC
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INVALID_HANDLE
- */
-
-DAT_RETURN dapls_ib_query_hca(IN DAPL_HCA * hca_ptr,
-                             OUT DAT_IA_ATTR * ia_attr,
-                             OUT DAT_EP_ATTR * ep_attr,
-                             OUT DAT_SOCK_ADDR6 * ip_addr)
-{
-       struct ibv_device_attr dev_attr;
-       struct ibv_port_attr port_attr;
-
-       if (hca_ptr->ib_hca_handle == NULL) {
-               dapl_dbg_log(DAPL_DBG_TYPE_ERR, " query_hca: BAD handle\n");
-               return (DAT_INVALID_HANDLE);
-       }
-
-       /* local IP address of device, set during ia_open */
-       if (ip_addr != NULL)
-               memcpy(ip_addr, &hca_ptr->hca_address, sizeof(DAT_SOCK_ADDR6));
-
-       if (ia_attr == NULL && ep_attr == NULL)
-               return DAT_SUCCESS;
-
-       /* query verbs for this device and port attributes */
-       if (ibv_query_device(hca_ptr->ib_hca_handle, &dev_attr) ||
-           ibv_query_port(hca_ptr->ib_hca_handle,
-                          hca_ptr->port_num, &port_attr))
-               return (dapl_convert_errno(errno, "ib_query_hca"));
-
-       if (ia_attr != NULL) {
-               (void)dapl_os_memzero(ia_attr, sizeof(*ia_attr));
-               ia_attr->adapter_name[DAT_NAME_MAX_LENGTH - 1] = '\0';
-               ia_attr->vendor_name[DAT_NAME_MAX_LENGTH - 1] = '\0';
-               ia_attr->ia_address_ptr =
-                   (DAT_IA_ADDRESS_PTR) & hca_ptr->hca_address;
-
-               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                            " query_hca: %s %s \n",
-                            ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
-                            inet_ntoa(((struct sockaddr_in *)
-                                       &hca_ptr->hca_address)->sin_addr));
-
-               ia_attr->hardware_version_major = dev_attr.hw_ver;
-               /* ia_attr->hardware_version_minor   = dev_attr.fw_ver; */
-               ia_attr->max_eps = dev_attr.max_qp;
-               ia_attr->max_dto_per_ep = dev_attr.max_qp_wr;
-               ia_attr->max_rdma_read_in = dev_attr.max_qp_rd_atom;
-               ia_attr->max_rdma_read_out = dev_attr.max_qp_init_rd_atom;
-               ia_attr->max_rdma_read_per_ep_in = dev_attr.max_qp_rd_atom;
-               ia_attr->max_rdma_read_per_ep_out =
-                   dev_attr.max_qp_init_rd_atom;
-               ia_attr->max_rdma_read_per_ep_in_guaranteed = DAT_TRUE;
-               ia_attr->max_rdma_read_per_ep_out_guaranteed = DAT_TRUE;
-               ia_attr->max_evds = dev_attr.max_cq;
-               ia_attr->max_evd_qlen = dev_attr.max_cqe;
-               ia_attr->max_iov_segments_per_dto = dev_attr.max_sge;
-               ia_attr->max_lmrs = dev_attr.max_mr;
-               /* 32bit attribute from 64bit, 4G-1 limit, DAT v2 needs fix */
-               ia_attr->max_lmr_block_size = 
-                   (dev_attr.max_mr_size >> 32) ? ~0 : dev_attr.max_mr_size;
-               ia_attr->max_rmrs = dev_attr.max_mw;
-               ia_attr->max_lmr_virtual_address = dev_attr.max_mr_size;
-               ia_attr->max_rmr_target_address = dev_attr.max_mr_size;
-               ia_attr->max_pzs = dev_attr.max_pd;
-               ia_attr->max_message_size = port_attr.max_msg_sz;
-               ia_attr->max_rdma_size = port_attr.max_msg_sz;
-               ia_attr->max_iov_segments_per_rdma_read = dev_attr.max_sge;
-               ia_attr->max_iov_segments_per_rdma_write = dev_attr.max_sge;
-               ia_attr->num_transport_attr = 0;
-               ia_attr->transport_attr = NULL;
-               ia_attr->num_vendor_attr = 0;
-               ia_attr->vendor_attr = NULL;
-#ifdef DAT_EXTENSIONS
-               ia_attr->extension_supported = DAT_EXTENSION_IB;
-               ia_attr->extension_version = DAT_IB_EXTENSION_VERSION;
-#endif
-               hca_ptr->ib_trans.mtu = DAPL_MIN(port_attr.active_mtu,
-                                                hca_ptr->ib_trans.mtu);
-               hca_ptr->ib_trans.ack_timer =
-                   DAPL_MAX(dev_attr.local_ca_ack_delay,
-                            hca_ptr->ib_trans.ack_timer);
-
-               /* set MTU in transport specific named attribute */
-               hca_ptr->ib_trans.named_attr.name = "DAT_IB_TRANSPORT_MTU";
-               hca_ptr->ib_trans.named_attr.value =
-                   dapl_ib_mtu_str(hca_ptr->ib_trans.mtu);
-
-               dapl_log(DAPL_DBG_TYPE_UTIL,
-                            " query_hca: (%x.%x) ep %d ep_q %d evd %d"
-                            " evd_q %d mtu %d\n",
-                            ia_attr->hardware_version_major,
-                            ia_attr->hardware_version_minor,
-                            ia_attr->max_eps, ia_attr->max_dto_per_ep,
-                            ia_attr->max_evds, ia_attr->max_evd_qlen,
-                            128 << hca_ptr->ib_trans.mtu);
-
-               dapl_log(DAPL_DBG_TYPE_UTIL,
-                            " query_hca: msg %llu rdma %llu iov %d lmr %d rmr %d"
-                            " ack_time %d mr %u\n",
-                            ia_attr->max_message_size, ia_attr->max_rdma_size,
-                            ia_attr->max_iov_segments_per_dto,
-                            ia_attr->max_lmrs, ia_attr->max_rmrs,
-                            hca_ptr->ib_trans.ack_timer,
-                            ia_attr->max_lmr_block_size);
-       }
-
-       if (ep_attr != NULL) {
-               (void)dapl_os_memzero(ep_attr, sizeof(*ep_attr));
-               ep_attr->max_message_size = port_attr.max_msg_sz;
-               ep_attr->max_rdma_size = port_attr.max_msg_sz;
-               ep_attr->max_recv_dtos = dev_attr.max_qp_wr;
-               ep_attr->max_request_dtos = dev_attr.max_qp_wr;
-               ep_attr->max_recv_iov = dev_attr.max_sge;
-               ep_attr->max_request_iov = dev_attr.max_sge;
-               ep_attr->max_rdma_read_in = dev_attr.max_qp_rd_atom;
-               ep_attr->max_rdma_read_out = dev_attr.max_qp_init_rd_atom;
-               ep_attr->max_rdma_read_iov = dev_attr.max_sge;
-               ep_attr->max_rdma_write_iov = dev_attr.max_sge;
-               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                            " query_hca: MAX msg %llu mtu %d dto %d iov %d"
-                            " rdma i%d,o%d\n",
-                            ep_attr->max_message_size,
-                            ep_attr->max_recv_dtos, ep_attr->max_recv_iov,
-                            ep_attr->max_rdma_read_in,
-                            ep_attr->max_rdma_read_out);
-       }
-       return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_setup_async_callback
- *
- * Set up an asynchronous callbacks of various kinds
- *
- * Input:
- *     ia_handle               IA handle
- *     handler_type            type of handler to set up
- *     callback_handle         handle param for completion callbacks
- *     callback                callback routine pointer
- *     context                 argument for callback routine
- *
- * Output:
- *     none
- *
- * Returns:
- *     DAT_SUCCESS
- *     DAT_INSUFFICIENT_RESOURCES
- *     DAT_INVALID_PARAMETER
- *
- */
-DAT_RETURN dapls_ib_setup_async_callback(IN DAPL_IA * ia_ptr,
-                                        IN DAPL_ASYNC_HANDLER_TYPE
-                                        handler_type, IN DAPL_EVD * evd_ptr,
-                                        IN ib_async_handler_t callback,
-                                        IN void *context)
-{
-       ib_hca_transport_t *hca_ptr;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " setup_async_cb: ia %p type %d handle %p cb %p ctx %p\n",
-                    ia_ptr, handler_type, evd_ptr, callback, context);
-
-       hca_ptr = &ia_ptr->hca_ptr->ib_trans;
-       switch (handler_type) {
-       case DAPL_ASYNC_UNAFILIATED:
-               hca_ptr->async_unafiliated = (ib_async_handler_t) callback;
-               hca_ptr->async_un_ctx = context;
-               break;
-       case DAPL_ASYNC_CQ_ERROR:
-               hca_ptr->async_cq_error = (ib_async_cq_handler_t) callback;
-               break;
-       case DAPL_ASYNC_CQ_COMPLETION:
-               hca_ptr->async_cq = (ib_async_dto_handler_t) callback;
-               break;
-       case DAPL_ASYNC_QP_ERROR:
-               hca_ptr->async_qp_error = (ib_async_qp_handler_t) callback;
-               break;
-       default:
-               break;
-       }
-       return DAT_SUCCESS;
-}
-
-/*
- * dapls_set_provider_specific_attr
- *
- * Input:
- *      attr_ptr        Pointer provider specific attributes
- *
- * Output:
- *      none
- *
- * Returns:
- *      void
- */
-DAT_NAMED_ATTR ib_attrs[] = {
-       {
-        "DAT_IB_TRANSPORT_MTU", "2048"}
-       ,
-#ifdef DAT_EXTENSIONS
-       {
-        "DAT_EXTENSION_INTERFACE", "TRUE"}
-       ,
-       {
-        DAT_IB_ATTR_FETCH_AND_ADD, "TRUE"}
-       ,
-       {
-        DAT_IB_ATTR_CMP_AND_SWAP, "TRUE"}
-       ,
-       {
-        DAT_IB_ATTR_IMMED_DATA, "TRUE"}
-       ,
-       {
-        DAT_IB_ATTR_UD, "TRUE"}
-       ,
-#ifdef DAPL_COUNTERS
-       {
-        DAT_ATTR_COUNTERS, "TRUE"}
-       ,
-#endif                         /* DAPL_COUNTERS */
-#endif
-};
-
-#define SPEC_ATTR_SIZE( x )     (sizeof( x ) / sizeof( DAT_NAMED_ATTR))
-
-void dapls_query_provider_specific_attr(IN DAPL_IA * ia_ptr,
-                                       IN DAT_PROVIDER_ATTR * attr_ptr)
-{
-       attr_ptr->num_provider_specific_attr = SPEC_ATTR_SIZE(ib_attrs);
-       attr_ptr->provider_specific_attr = ib_attrs;
-
-       /* set MTU to actual settings */
-       ib_attrs[0].value = ia_ptr->hca_ptr->ib_trans.named_attr.value;
-}
index a668af70a2d7750765c7d4624f5e1c00d480cc0a..a5e734e5c29a0b9cee5a2438df127b4c4422138f 100644 (file)
  * and/or other materials provided with the distribution.
  */
 
-/***************************************************************************
- *
- *   Module:            uDAPL
- *
- *   Filename:          dapl_ib_util.h
- *
- *   Author:            Arlin Davis
- *
- *   Created:           3/10/2005
- *
- *   Description: 
- *
- *   The uDAPL openib provider - definitions, prototypes,
- *
- ****************************************************************************
- *                Source Control System Information
- *
- *    $Id: $
- *
- *     Copyright (c) 2005 Intel Corporation.  All rights reserved.
- *
- **************************************************************************/
-
 #ifndef _DAPL_IB_UTIL_H_
 #define _DAPL_IB_UTIL_H_
+#define _OPENIB_SCM_ 
 
-#include "openib_osd.h"
 #include <infiniband/verbs.h>
-
-#ifdef DAT_EXTENSIONS
-#include <dat2/dat_ib_extensions.h>
-#endif
-
-#ifndef __cplusplus
-#define false 0
-#define true  1
-#endif /*__cplusplus */
-
-/* Typedefs to map common DAPL provider types to IB verbs */
-typedef        struct ibv_qp           *ib_qp_handle_t;
-typedef        struct ibv_cq           *ib_cq_handle_t;
-typedef        struct ibv_pd           *ib_pd_handle_t;
-typedef        struct ibv_mr           *ib_mr_handle_t;
-typedef        struct ibv_mw           *ib_mw_handle_t;
-typedef        struct ibv_wc           ib_work_completion_t;
-
-/* HCA context type maps to IB verbs  */
-typedef        struct ibv_context      *ib_hca_handle_t;
-typedef ib_hca_handle_t                dapl_ibal_ca_t;
-
-/* destination info to exchange, define wire protocol version */
-#define DSCM_VER 4
-typedef struct _ib_qp_cm
-{ 
-       uint16_t                ver;
-       uint16_t                rej;
-       uint16_t                lid;
-       uint16_t                port;
-       uint32_t                qpn;
-       uint32_t                p_size;
-       union ibv_gid           gid;
-       DAT_SOCK_ADDR6          ia_address;
-       uint16_t                qp_type; 
-} ib_qp_cm_t;
-
-typedef enum scm_state 
-{
-       SCM_INIT,
-       SCM_LISTEN,
-       SCM_CONN_PENDING,
-       SCM_RTU_PENDING,
-       SCM_ACCEPTING,
-       SCM_ACCEPTING_DATA,
-       SCM_ACCEPTED,
-       SCM_REJECTED,
-       SCM_CONNECTED,
-       SCM_RELEASED,
-       SCM_DISCONNECTED,
-       SCM_DESTROY
-} SCM_STATE;
+#include "openib_osd.h"
+#include "dapl_ib_common.h"
 
 struct ib_cm_handle
 { 
        struct dapl_llist_entry entry;
        DAPL_OS_LOCK            lock;
-       SCM_STATE               state;
+       int                     state;
        DAPL_SOCKET             socket;
        struct dapl_hca         *hca;
        struct dapl_sp          *sp;    
@@ -121,58 +48,12 @@ struct ib_cm_handle
 typedef struct ib_cm_handle    *dp_ib_cm_handle_t;
 typedef dp_ib_cm_handle_t      ib_cm_srvc_handle_t;
 
-/* CM events */
-typedef enum 
-{
-    IB_CME_CONNECTED,
-    IB_CME_DISCONNECTED,
-    IB_CME_DISCONNECTED_ON_LINK_DOWN,
-    IB_CME_CONNECTION_REQUEST_PENDING,
-    IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,
-    IB_CME_DESTINATION_REJECT,
-    IB_CME_DESTINATION_REJECT_PRIVATE_DATA,
-    IB_CME_DESTINATION_UNREACHABLE,
-    IB_CME_TOO_MANY_CONNECTION_REQUESTS,
-    IB_CME_LOCAL_FAILURE,
-    IB_CM_LOCAL_FAILURE
-
-} ib_cm_events_t;
-
-/* Operation and state mappings */
-typedef int ib_send_op_type_t;
-typedef        struct  ibv_sge         ib_data_segment_t;
-typedef enum   ibv_qp_state    ib_qp_state_t;
-typedef        enum    ibv_event_type  ib_async_event_type;
-typedef struct ibv_async_event ib_error_record_t;      
-
-/* CQ notifications */
-typedef enum
-{
-       IB_NOTIFY_ON_NEXT_COMP,
-       IB_NOTIFY_ON_SOLIC_COMP
-
-} ib_notification_type_t;
-
-/* other mappings */
-typedef int                    ib_bool_t;
-typedef union ibv_gid          GID;
-typedef char                   *IB_HCA_NAME;
-typedef uint16_t               ib_hca_port_t;
-typedef uint32_t               ib_comp_handle_t;
-
-#ifdef CQ_WAIT_OBJECT
-typedef struct ibv_comp_channel *ib_wait_obj_handle_t;
-#endif
-
 /* Definitions */
 #define IB_INVALID_HANDLE      NULL
 
 /* inline send rdma threshold */
 #define        INLINE_SEND_DEFAULT     200
 
-/* qkey for UD QP's */
-#define SCM_UD_QKEY    0x78654321
-
 /* RC timer - retry count defaults */
 #define SCM_ACK_TIMER 16 /* 5 bits, 4.096us*2^ack_timer. 16== 268ms */
 #define SCM_ACK_RETRY 7  /* 3 bits, 7 * 268ms = 1.8 seconds */
@@ -193,87 +74,6 @@ typedef struct ibv_comp_channel *ib_wait_obj_handle_t;
 #define        IB_MAX_DREP_PDATA_SIZE  224
 #define        IB_MAX_RTU_PDATA_SIZE   224
 
-/* DTO OPs, ordered for DAPL ENUM definitions */
-#define OP_RDMA_WRITE           IBV_WR_RDMA_WRITE
-#define OP_RDMA_WRITE_IMM       IBV_WR_RDMA_WRITE_WITH_IMM
-#define OP_SEND                 IBV_WR_SEND
-#define OP_SEND_IMM             IBV_WR_SEND_WITH_IMM
-#define OP_RDMA_READ            IBV_WR_RDMA_READ
-#define OP_COMP_AND_SWAP        IBV_WR_ATOMIC_CMP_AND_SWP
-#define OP_FETCH_AND_ADD        IBV_WR_ATOMIC_FETCH_AND_ADD
-#define OP_RECEIVE              7   /* internal op */
-#define OP_RECEIVE_IMM         8   /* rdma write with immed, internel op */
-#define OP_RECEIVE_MSG_IMM     9   /* recv msg with immed, internel op */
-#define OP_BIND_MW              10   /* internal op */
-#define OP_SEND_UD              11  /* internal op */
-#define OP_RECV_UD              12  /* internal op */
-#define OP_INVALID             0xff
-
-/* Definitions to map QP state */
-#define IB_QP_STATE_RESET      IBV_QPS_RESET
-#define IB_QP_STATE_INIT       IBV_QPS_INIT
-#define IB_QP_STATE_RTR                IBV_QPS_RTR
-#define IB_QP_STATE_RTS                IBV_QPS_RTS
-#define IB_QP_STATE_SQD                IBV_QPS_SQD
-#define IB_QP_STATE_SQE                IBV_QPS_SQE
-#define IB_QP_STATE_ERROR      IBV_QPS_ERR
-
-/* Definitions for ibverbs/mthca return codes, should be defined in verbs.h */
-/* some are errno and some are -n values */
-
-/**
- * ibv_get_device_name - Return kernel device name
- * ibv_get_device_guid - Return device's node GUID
- * ibv_open_device - Return ibv_context or NULL
- * ibv_close_device - Return 0, (errno?)
- * ibv_get_async_event - Return 0, -1 
- * ibv_alloc_pd - Return ibv_pd, NULL
- * ibv_dealloc_pd - Return 0, errno 
- * ibv_reg_mr - Return ibv_mr, NULL
- * ibv_dereg_mr - Return 0, errno
- * ibv_create_cq - Return ibv_cq, NULL
- * ibv_destroy_cq - Return 0, errno
- * ibv_get_cq_event - Return 0 & ibv_cq/context, int
- * ibv_poll_cq - Return n & ibv_wc, 0 ok, -1 empty, -2 error 
- * ibv_req_notify_cq - Return 0 (void?)
- * ibv_create_qp - Return ibv_qp, NULL
- * ibv_modify_qp - Return 0, errno
- * ibv_destroy_qp - Return 0, errno
- * ibv_post_send - Return 0, -1 & bad_wr
- * ibv_post_recv - Return 0, -1 & bad_wr 
- */
-
-/* async handler for DTO, CQ, QP, and unafiliated */
-typedef void (*ib_async_dto_handler_t)(
-    IN    ib_hca_handle_t    ib_hca_handle,
-    IN    ib_error_record_t  *err_code,
-    IN    void               *context);
-
-typedef void (*ib_async_cq_handler_t)(
-    IN    ib_hca_handle_t    ib_hca_handle,
-    IN    ib_cq_handle_t     ib_cq_handle,
-    IN    ib_error_record_t  *err_code,
-    IN    void               *context);
-
-typedef void (*ib_async_qp_handler_t)(
-    IN    ib_hca_handle_t    ib_hca_handle,
-    IN    ib_qp_handle_t     ib_qp_handle,
-    IN    ib_error_record_t  *err_code,
-    IN    void               *context);
-
-typedef void (*ib_async_handler_t)(
-    IN    ib_hca_handle_t    ib_hca_handle,
-    IN    ib_error_record_t  *err_code,
-    IN    void               *context);
-
-typedef enum
-{
-       IB_THREAD_INIT,
-       IB_THREAD_RUN,
-       IB_THREAD_CANCEL,
-       IB_THREAD_EXIT
-
-} ib_thread_state_t;
 
 /* ib_hca_transport_t, specific to this implementation */
 typedef struct _ib_hca_transport
@@ -295,6 +95,8 @@ typedef struct _ib_hca_transport
        ib_async_cq_handler_t   async_cq_error;
        ib_async_dto_handler_t  async_cq;
        ib_async_qp_handler_t   async_qp_error;
+       int                     rd_atom_in;
+       int                     rd_atom_out;
        uint16_t                lid;
        uint8_t                 ack_timer;
        uint8_t                 ack_retry;
@@ -308,96 +110,16 @@ typedef struct _ib_hca_transport
        DAPL_SOCKET             scm[2];
 } ib_hca_transport_t;
 
-/* provider specfic fields for shared memory support */
-typedef uint32_t ib_shm_transport_t;
-
 /* prototypes */
-int32_t        dapls_ib_init (void);
-int32_t        dapls_ib_release (void);
 void cr_thread(void *arg);
 int dapli_cq_thread_init(struct dapl_hca *hca_ptr);
 void dapli_cq_thread_destroy(struct dapl_hca *hca_ptr);
 DAT_RETURN dapli_socket_disconnect(dp_ib_cm_handle_t cm_ptr);
 void dapls_print_cm_list(IN DAPL_IA *ia_ptr);
-
-DAT_RETURN
-dapls_modify_qp_state ( IN ib_qp_handle_t      qp_handle,
-                       IN ib_qp_state_t        qp_state,
-                       IN struct ib_cm_handle  *cm_ptr );
-
-/* inline functions */
-STATIC _INLINE_ IB_HCA_NAME dapl_ib_convert_name (IN char *name)
-{
-       /* use ascii; name of local device */
-       return dapl_os_strdup(name);
-}
-
-STATIC _INLINE_ void dapl_ib_release_name (IN IB_HCA_NAME name)
-{
-       return;
-}
-
-/*
- *  Convert errno to DAT_RETURN values
- */
-STATIC _INLINE_ DAT_RETURN 
-dapl_convert_errno( IN int err, IN const char *str )
-{
-    if (!err)  return DAT_SUCCESS;
-       
-#if DAPL_DBG
-    if ((err != EAGAIN) && (err != ETIMEDOUT))
-       dapl_dbg_log (DAPL_DBG_TYPE_ERR," %s %s\n", str, strerror(err));
-#endif 
-
-    switch( err )
-    {
-       case EOVERFLOW  : return DAT_LENGTH_ERROR;
-       case EACCES     : return DAT_PRIVILEGES_VIOLATION;
-       case EPERM      : return DAT_PROTECTION_VIOLATION;                
-       case EINVAL     : return DAT_INVALID_HANDLE;
-       case EISCONN    : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_CONNECTED;
-       case ECONNREFUSED : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_NOTREADY;
-       case ETIMEDOUT  : return DAT_TIMEOUT_EXPIRED;
-       case ENETUNREACH: return DAT_INVALID_ADDRESS | DAT_INVALID_ADDRESS_UNREACHABLE;
-       case EADDRINUSE : return DAT_CONN_QUAL_IN_USE;
-       case EALREADY   : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_ACTCONNPENDING;
-       case ENOMEM     : return DAT_INSUFFICIENT_RESOURCES;
-        case EAGAIN    : return DAT_QUEUE_EMPTY;
-       case EINTR      : return DAT_INTERRUPTED_CALL;
-       case EAFNOSUPPORT : return DAT_INVALID_ADDRESS | DAT_INVALID_ADDRESS_MALFORMED;
-       case EFAULT     : 
-       default         : return DAT_INTERNAL_ERROR;
-    }
- }
-
-STATIC _INLINE_ char * dapl_cm_state_str(IN int st)
-{
-       static char *cm_state[] = {
-               "SCM_INIT",
-               "SCM_LISTEN",
-               "SCM_CONN_PENDING",
-               "SCM_RTU_PENDING",
-               "SCM_ACCEPTING",
-               "SCM_ACCEPTING_DATA",
-               "SCM_ACCEPTED",
-               "SCM_REJECTED",
-               "SCM_CONNECTED",
-               "SCM_RELEASED",
-               "SCM_DISCONNECTED",
-               "SCM_DESTROY"
-        };
-        return ((st < 0 || st > 11) ? "Invalid CM state?" : cm_state[st]);
-}
-
-/*
- * Definitions required only for DAT 1.1 builds
- */
-#define IB_ACCESS_LOCAL_READ    IBV_ACCESS_LOCAL_WRITE
-#define IB_ACCESS_LOCAL_WRITE   IBV_ACCESS_LOCAL_WRITE
-#define IB_ACCESS_REMOTE_READ   IBV_ACCESS_REMOTE_READ
-#define IB_ACCESS_REMOTE_WRITE  IBV_ACCESS_REMOTE_WRITE
-#define IB_ACCESS_MW_BIND       IBV_ACCESS_LOCAL_WRITE
-#define IB_ACCESS_ATOMIC       
+dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep);
+void dapls_ib_cm_free(dp_ib_cm_handle_t cm, DAPL_EP *ep);
+DAT_RETURN dapls_modify_qp_state(IN ib_qp_handle_t qp_handle,
+                                IN ib_qp_state_t qp_state,
+                                IN dp_ib_cm_handle_t cm);
 
 #endif /*  _DAPL_IB_UTIL_H_ */
diff --git a/dapl/openib_scm/device.c b/dapl/openib_scm/device.c
new file mode 100644 (file)
index 0000000..d5089aa
--- /dev/null
@@ -0,0 +1,412 @@
+/*
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/***************************************************************************
+ *
+ *   Module:            uDAPL
+ *
+ *   Filename:          dapl_ib_util.c
+ *
+ *   Author:            Arlin Davis
+ *
+ *   Created:           3/10/2005
+ *
+ *   Description: 
+ *
+ *   The uDAPL openib provider - init, open, close, utilities
+ *
+ ****************************************************************************
+ *                Source Control System Information
+ *
+ *    $Id: $
+ *
+ *     Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ **************************************************************************/
+#ifdef RCSID
+static const char rcsid[] = "$Id:  $";
+#endif
+
+#include "openib_osd.h"
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_ib_util.h"
+#include "dapl_osd.h"
+
+#include <stdlib.h>
+
+static int32_t create_cr_pipe(IN DAPL_HCA * hca_ptr)
+{
+       DAPL_SOCKET listen_socket;
+       struct sockaddr_in addr;
+       socklen_t addrlen = sizeof(addr);
+       int ret;
+
+       listen_socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+       if (listen_socket == DAPL_INVALID_SOCKET)
+               return 1;
+
+       memset(&addr, 0, sizeof addr);
+       addr.sin_family = AF_INET;
+       addr.sin_addr.s_addr = htonl(0x7f000001);
+       ret = bind(listen_socket, (struct sockaddr *)&addr, sizeof addr);
+       if (ret)
+               goto err1;
+
+       ret = getsockname(listen_socket, (struct sockaddr *)&addr, &addrlen);
+       if (ret)
+               goto err1;
+
+       ret = listen(listen_socket, 0);
+       if (ret)
+               goto err1;
+
+       hca_ptr->ib_trans.scm[1] = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+       if (hca_ptr->ib_trans.scm[1] == DAPL_INVALID_SOCKET)
+               goto err1;
+
+       ret = connect(hca_ptr->ib_trans.scm[1], 
+                     (struct sockaddr *)&addr, sizeof(addr));
+       if (ret)
+               goto err2;
+
+       hca_ptr->ib_trans.scm[0] = accept(listen_socket, NULL, NULL);
+       if (hca_ptr->ib_trans.scm[0] == DAPL_INVALID_SOCKET)
+               goto err2;
+
+       closesocket(listen_socket);
+       return 0;
+
+      err2:
+       closesocket(hca_ptr->ib_trans.scm[1]);
+      err1:
+       closesocket(listen_socket);
+       return 1;
+}
+
+static void destroy_cr_pipe(IN DAPL_HCA * hca_ptr)
+{
+       closesocket(hca_ptr->ib_trans.scm[0]);
+       closesocket(hca_ptr->ib_trans.scm[1]);
+}
+
+
+/*
+ * dapls_ib_init, dapls_ib_release
+ *
+ * Initialize Verb related items for device open
+ *
+ * Input:
+ *     none
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     0 success, -1 error
+ *
+ */
+int32_t dapls_ib_init(void)
+{
+       return 0;
+}
+
+int32_t dapls_ib_release(void)
+{
+       return 0;
+}
+
+#if defined(_WIN64) || defined(_WIN32)
+int dapls_config_comp_channel(struct ibv_comp_channel *channel)
+{
+       return 0;
+}
+#else                          // _WIN64 || WIN32
+int dapls_config_comp_channel(struct ibv_comp_channel *channel)
+{
+       int opts;
+
+       opts = fcntl(channel->fd, F_GETFL);     /* uCQ */
+       if (opts < 0 || fcntl(channel->fd, F_SETFL, opts | O_NONBLOCK) < 0) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " dapls_create_comp_channel: fcntl on ib_cq->fd %d ERR %d %s\n",
+                        channel->fd, opts, strerror(errno));
+               return errno;
+       }
+
+       return 0;
+}
+#endif
+
+/*
+ * dapls_ib_open_hca
+ *
+ * Open HCA
+ *
+ * Input:
+ *      *hca_name         pointer to provider device name
+ *      *ib_hca_handle_p  pointer to provide HCA handle
+ *
+ * Output:
+ *      none
+ *
+ * Return:
+ *      DAT_SUCCESS
+ *      dapl_convert_errno
+ *
+ */
+DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, IN DAPL_HCA * hca_ptr)
+{
+       struct ibv_device **dev_list;
+       struct ibv_port_attr port_attr;
+       int i;
+       DAT_RETURN dat_status;
+
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                    " open_hca: %s - %p\n", hca_name, hca_ptr);
+
+       /* get the IP address of the device */
+       dat_status = getlocalipaddr((DAT_SOCK_ADDR *) &hca_ptr->hca_address,
+                                   sizeof(DAT_SOCK_ADDR6));
+       if (dat_status != DAT_SUCCESS)
+               return dat_status;
+
+       /* Get list of all IB devices, find match, open */
+       dev_list = ibv_get_device_list(NULL);
+       if (!dev_list) {
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+                            " open_hca: ibv_get_device_list() failed\n",
+                            hca_name);
+               return DAT_INTERNAL_ERROR;
+       }
+
+       for (i = 0; dev_list[i]; ++i) {
+               hca_ptr->ib_trans.ib_dev = dev_list[i];
+               if (!strcmp(ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+                           hca_name))
+                       goto found;
+       }
+
+       dapl_log(DAPL_DBG_TYPE_ERR,
+                " open_hca: device %s not found\n", hca_name);
+       goto err;
+
+      found:
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " open_hca: Found dev %s %016llx\n",
+                    ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+                    (unsigned long long)
+                    ntohll(ibv_get_device_guid(hca_ptr->ib_trans.ib_dev)));
+
+       hca_ptr->ib_hca_handle = ibv_open_device(hca_ptr->ib_trans.ib_dev);
+       if (!hca_ptr->ib_hca_handle) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " open_hca: dev open failed for %s, err=%s\n",
+                        ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+                        strerror(errno));
+               goto err;
+       }
+
+       /* get lid for this hca-port, network order */
+       if (ibv_query_port(hca_ptr->ib_hca_handle,
+                          (uint8_t) hca_ptr->port_num, &port_attr)) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " open_hca: get lid ERR for %s, err=%s\n",
+                        ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+                        strerror(errno));
+               goto err;
+       } else {
+               hca_ptr->ib_trans.lid = htons(port_attr.lid);
+       }
+
+       /* get gid for this hca-port, network order */
+       if (ibv_query_gid(hca_ptr->ib_hca_handle,
+                         (uint8_t) hca_ptr->port_num,
+                         0, &hca_ptr->ib_trans.gid)) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " open_hca: query GID ERR for %s, err=%s\n",
+                        ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+                        strerror(errno));
+               goto err;
+       }
+
+       /* set RC tunables via enviroment or default */
+       hca_ptr->ib_trans.max_inline_send =
+           dapl_os_get_env_val("DAPL_MAX_INLINE", INLINE_SEND_DEFAULT);
+       hca_ptr->ib_trans.ack_retry =
+           dapl_os_get_env_val("DAPL_ACK_RETRY", SCM_ACK_RETRY);
+       hca_ptr->ib_trans.ack_timer =
+           dapl_os_get_env_val("DAPL_ACK_TIMER", SCM_ACK_TIMER);
+       hca_ptr->ib_trans.rnr_retry =
+           dapl_os_get_env_val("DAPL_RNR_RETRY", SCM_RNR_RETRY);
+       hca_ptr->ib_trans.rnr_timer =
+           dapl_os_get_env_val("DAPL_RNR_TIMER", SCM_RNR_TIMER);
+       hca_ptr->ib_trans.global =
+           dapl_os_get_env_val("DAPL_GLOBAL_ROUTING", SCM_GLOBAL);
+       hca_ptr->ib_trans.hop_limit =
+           dapl_os_get_env_val("DAPL_HOP_LIMIT", SCM_HOP_LIMIT);
+       hca_ptr->ib_trans.tclass =
+           dapl_os_get_env_val("DAPL_TCLASS", SCM_TCLASS);
+       hca_ptr->ib_trans.mtu =
+           dapl_ib_mtu(dapl_os_get_env_val("DAPL_IB_MTU", SCM_IB_MTU));
+
+#ifndef CQ_WAIT_OBJECT
+       /* initialize cq_lock */
+       dat_status = dapl_os_lock_init(&hca_ptr->ib_trans.cq_lock);
+       if (dat_status != DAT_SUCCESS) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " open_hca: failed to init cq_lock\n");
+               goto bail;
+       }
+       /* EVD events without direct CQ channels, non-blocking */
+       hca_ptr->ib_trans.ib_cq =
+           ibv_create_comp_channel(hca_ptr->ib_hca_handle);
+       if (hca_ptr->ib_trans.ib_cq == NULL) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " open_hca: ibv_create_comp_channel ERR %s\n",
+                        strerror(errno));
+               goto bail;
+       }
+
+       if (dapls_config_comp_channel(hca_ptr->ib_trans.ib_cq)) {
+               goto bail;
+       }
+
+       if (dapli_cq_thread_init(hca_ptr)) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " open_hca: cq_thread_init failed for %s\n",
+                        ibv_get_device_name(hca_ptr->ib_trans.ib_dev));
+               goto bail;
+       }
+#endif                         /* CQ_WAIT_OBJECT */
+
+       /* initialize cr_list lock */
+       dat_status = dapl_os_lock_init(&hca_ptr->ib_trans.lock);
+       if (dat_status != DAT_SUCCESS) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " open_hca: failed to init cr_list lock\n");
+               goto bail;
+       }
+
+       /* initialize CM list for listens on this HCA */
+       dapl_llist_init_head(&hca_ptr->ib_trans.list);
+
+       /* initialize pipe, user level wakeup on select */
+       if (create_cr_pipe(hca_ptr)) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " open_hca: failed to init cr pipe - %s\n",
+                        strerror(errno));
+               goto bail;
+       }
+
+       /* create thread to process inbound connect request */
+       hca_ptr->ib_trans.cr_state = IB_THREAD_INIT;
+       dat_status = dapl_os_thread_create(cr_thread,
+                                          (void *)hca_ptr,
+                                          &hca_ptr->ib_trans.thread);
+       if (dat_status != DAT_SUCCESS) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " open_hca: failed to create thread\n");
+               goto bail;
+       }
+
+       /* wait for thread */
+       while (hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) {
+               dapl_os_sleep_usec(2000);
+       }
+
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                    " open_hca: devname %s, port %d, hostname_IP %s\n",
+                    ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+                    hca_ptr->port_num, inet_ntoa(((struct sockaddr_in *)
+                                                  &hca_ptr->hca_address)->
+                                                 sin_addr));
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                    " open_hca: LID 0x%x GID Subnet 0x" F64x " ID 0x" F64x
+                    "\n", ntohs(hca_ptr->ib_trans.lid), (unsigned long long)
+                    htonll(hca_ptr->ib_trans.gid.global.subnet_prefix),
+                    (unsigned long long)htonll(hca_ptr->ib_trans.gid.global.
+                                               interface_id));
+
+       ibv_free_device_list(dev_list);
+       return dat_status;
+
+      bail:
+       ibv_close_device(hca_ptr->ib_hca_handle);
+       hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
+      err:
+       ibv_free_device_list(dev_list);
+       return DAT_INTERNAL_ERROR;
+}
+
+/*
+ * dapls_ib_close_hca
+ *
+ * Open HCA
+ *
+ * Input:
+ *      DAPL_HCA   provide CA handle
+ *
+ * Output:
+ *      none
+ *
+ * Return:
+ *      DAT_SUCCESS
+ *     dapl_convert_errno 
+ *
+ */
+DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA * hca_ptr)
+{
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " close_hca: %p\n", hca_ptr);
+
+#ifndef CQ_WAIT_OBJECT
+       dapli_cq_thread_destroy(hca_ptr);
+       dapl_os_lock_destroy(&hca_ptr->ib_trans.cq_lock);
+#endif                         /* CQ_WAIT_OBJECT */
+
+       if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) {
+               if (ibv_close_device(hca_ptr->ib_hca_handle))
+                       return (dapl_convert_errno(errno, "ib_close_device"));
+               hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
+       }
+
+       /* destroy cr_thread and lock */
+       hca_ptr->ib_trans.cr_state = IB_THREAD_CANCEL;
+       if (send(hca_ptr->ib_trans.scm[1], "w", sizeof "w", 0) == -1)
+               dapl_log(DAPL_DBG_TYPE_UTIL,
+                        " thread_destroy: thread wakeup err = %s\n",
+                        strerror(errno));
+       while (hca_ptr->ib_trans.cr_state != IB_THREAD_EXIT) {
+               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                            " close_hca: waiting for cr_thread\n");
+               if (send(hca_ptr->ib_trans.scm[1], "w", sizeof "w", 0) == -1)
+                       dapl_log(DAPL_DBG_TYPE_UTIL,
+                                " thread_destroy: thread wakeup err = %s\n",
+                                strerror(errno));
+               dapl_os_sleep_usec(2000);
+       }
+       dapl_os_lock_destroy(&hca_ptr->ib_trans.lock);
+       destroy_cr_pipe(hca_ptr); /* no longer need pipe */
+       return (DAT_SUCCESS);
+}