]> git.openfabrics.org - ~ardavis/dapl.git/commitdiff
updated for QP, CM services
authorArlin Davis <arlin.r.davis@intel.com>
Tue, 26 Jun 2012 18:35:53 +0000 (11:35 -0700)
committerArlin Davis <arlin.r.davis@intel.com>
Tue, 26 Jun 2012 18:35:53 +0000 (11:35 -0700)
18 files changed:
Makefile.am
dapl/openib_cma/device.c
dapl/openib_common/cq.c
dapl/openib_common/dapl_ib_common.h
dapl/openib_common/dapl_ib_dto.h
dapl/openib_common/qp.c
dapl/openib_common/util.c
dapl/openib_mcm/cm.c
dapl/openib_mcm/dapl_ib_util.h
dapl/openib_mcm/device.c
dapl/openib_mcm/mix.c [new file with mode: 0644]
dapl/openib_scm/cm.c
dapl/openib_scm/device.c
dapl/openib_ucm/cm.c
dapl/openib_ucm/device.c
dapl/svc/mpxyd.c
dat/include/dat2/dat_mic_extensions.h
test/dtest/dtest.c

index 9cc7d65a3dc23422014281f11905f64ef434e344..04d676e051ee71137626cd50f8315de84c827156 100755 (executable)
@@ -627,6 +627,7 @@ dapl_udapl_libdaplomcm_la_SOURCES = dapl/udapl/dapl_init.c \
         dapl/openib_common/qp.c                     \
         dapl/openib_common/util.c                   \
         dapl/openib_mcm/cm.c                        \
+        dapl/openib_mcm/mix.c                       \
         dapl/openib_mcm/device.c $(XPROGRAMS)
 
 dapl_udapl_libdaplomcm_la_LDFLAGS = -version-info 2:0:0 $(daplomcm_version_script) \
index d1a3ab635a81b32d246f3d2b2622510e8642cbf2..3a6e87e60ec47535fedd7f4776dfb594394c4647 100644 (file)
@@ -447,8 +447,8 @@ bail:
 
        if (hca_ptr->ib_trans.ib_cq_empty) {
                struct ibv_comp_channel *channel;
-               channel = hca_ptr->ib_trans.ib_cq_empty->channel;
-               ibv_destroy_cq(hca_ptr->ib_trans.ib_cq_empty);
+               channel = hca_ptr->ib_trans.ib_cq_empty->ib_cq->channel;
+               ibv_destroy_cq(hca_ptr->ib_trans.ib_cq_empty->ib_cq);
                ibv_destroy_comp_channel(channel);
        }
 
index f03400fddc4c27942b2d03bc872258e7b6c5344b..3ac0ed460d81e53b920433095c2633807b479f6d 100644 (file)
@@ -175,33 +175,49 @@ DAT_RETURN
 dapls_ib_cq_alloc(IN DAPL_IA * ia_ptr,
                  IN DAPL_EVD * evd_ptr, IN DAT_COUNT * cqlen)
 {
-       struct ibv_comp_channel *channel;
-       DAT_RETURN ret;
+       struct ibv_comp_channel *channel = NULL;
+       int ret = ENOMEM;
 
        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
                     "dapls_ib_cq_alloc: evd %p cqlen=%d \n", evd_ptr, *cqlen);
 
+       /* create CQ object */
+       evd_ptr->ib_cq_handle = dapl_os_alloc(sizeof(struct dcm_ib_cq));
+       if (!evd_ptr->ib_cq_handle)
+               goto err;
+
+       dapl_os_memzero(evd_ptr->ib_cq_handle, sizeof(struct dcm_ib_cq));
+       evd_ptr->ib_cq_handle->tp = &ia_ptr->hca_ptr->ib_trans;
+       evd_ptr->ib_cq_handle->evd = evd_ptr;
+
        if (!evd_ptr->cno_ptr)
                channel = ibv_create_comp_channel(ia_ptr->hca_ptr->ib_hca_handle);
        else
                channel = ia_ptr->hca_ptr->ib_trans.ib_cq;
 
        if (!channel)
-               return DAT_INSUFFICIENT_RESOURCES;
+               goto err;
 
-       evd_ptr->ib_cq_handle = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
-                                             *cqlen, evd_ptr, channel, 0);
+       evd_ptr->ib_cq_handle->ib_cq =
+               ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
+                             *cqlen, evd_ptr, channel, 0);
 
-       if (evd_ptr->ib_cq_handle == IB_INVALID_HANDLE) {
-               ret = DAT_INSUFFICIENT_RESOURCES;
+       if (!evd_ptr->ib_cq_handle->ib_cq)
                goto err;
-       }
 
        /* arm cq for events */
        dapls_set_cq_notify(ia_ptr, evd_ptr);
 
        /* update with returned cq entry size */
-       *cqlen = evd_ptr->ib_cq_handle->cqe;
+       *cqlen = evd_ptr->ib_cq_handle->ib_cq->cqe;
+
+#ifdef _OPENIB_MCM_
+       /* shadow support, MPXYD */
+       if (ia_ptr->hca_ptr->ib_trans.scif_ep)
+               ret = dapli_mix_cq_create(evd_ptr->ib_cq_handle);
+       if (ret)
+               goto err;
+#endif
 
        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
                     "dapls_ib_cq_alloc: new_cq %p cqlen=%d \n",
@@ -210,9 +226,13 @@ dapls_ib_cq_alloc(IN DAPL_IA * ia_ptr,
        return DAT_SUCCESS;
 
 err:
-       if (!evd_ptr->cno_ptr)
+       if (evd_ptr->ib_cq_handle)
+               dapl_os_free(evd_ptr->ib_cq_handle, sizeof(struct dcm_ib_cq));
+
+       if (!evd_ptr->cno_ptr && channel)
                ibv_destroy_comp_channel(channel);
-       return ret;
+
+       return dapl_convert_errno(ret, "cq_allocate" );
 }
 
 /*
@@ -240,14 +260,20 @@ DAT_RETURN dapls_ib_cq_free(IN DAPL_IA * ia_ptr, IN DAPL_EVD * evd_ptr)
 
        if (evd_ptr->ib_cq_handle != IB_INVALID_HANDLE) {
                /* pull off CQ and EVD entries and toss */
-               while (ibv_poll_cq(evd_ptr->ib_cq_handle, 1, &wc) == 1) ;
+               while (ibv_poll_cq(evd_ptr->ib_cq_handle->ib_cq, 1, &wc) == 1) ;
                while (dapl_evd_dequeue(evd_ptr, &event) == DAT_SUCCESS) ;
 
-               channel = evd_ptr->ib_cq_handle->channel;
-               if (ibv_destroy_cq(evd_ptr->ib_cq_handle))
+               channel = evd_ptr->ib_cq_handle->ib_cq->channel;
+               if (ibv_destroy_cq(evd_ptr->ib_cq_handle->ib_cq))
                        return (dapl_convert_errno(errno, "ibv_destroy_cq"));
                if (!evd_ptr->cno_ptr)
                        ibv_destroy_comp_channel(channel);
+#ifdef _OPENIB_MCM_
+               /* shadow support, MPXYD */
+               if (ia_ptr->hca_ptr->ib_trans.scif_ep)
+                       dapli_mix_cq_free(evd_ptr->ib_cq_handle);
+#endif
+               dapl_os_free(evd_ptr->ib_cq_handle, sizeof(struct dcm_ib_cq));
                evd_ptr->ib_cq_handle = IB_INVALID_HANDLE;
        }
        return DAT_SUCCESS;
@@ -303,7 +329,7 @@ dapls_wait_comp_channel(IN struct ibv_comp_channel *channel, IN uint32_t timeout
 DAT_RETURN
 dapls_evd_dto_wait(IN DAPL_EVD * evd_ptr, IN uint32_t timeout)
 {
-       struct ibv_comp_channel *channel = evd_ptr->ib_cq_handle->channel;
+       struct ibv_comp_channel *channel = evd_ptr->ib_cq_handle->ib_cq->channel;
        struct ibv_cq *ibv_cq = NULL;
        void *context;
        int status;
@@ -405,7 +431,7 @@ err:
  */
 DAT_RETURN dapls_set_cq_notify(IN DAPL_IA * ia_ptr, IN DAPL_EVD * evd_ptr)
 {
-       if (ibv_req_notify_cq(evd_ptr->ib_cq_handle, 0))
+       if (ibv_req_notify_cq(evd_ptr->ib_cq_handle->ib_cq, 0))
                return (dapl_convert_errno(errno, "notify_cq"));
        else
                return DAT_SUCCESS;
@@ -432,7 +458,7 @@ DAT_RETURN dapls_ib_completion_notify(IN ib_hca_handle_t hca_handle,
                                      IN DAPL_EVD * evd_ptr,
                                      IN ib_notification_type_t type)
 {
-       if (ibv_req_notify_cq(evd_ptr->ib_cq_handle, type))
+       if (ibv_req_notify_cq(evd_ptr->ib_cq_handle->ib_cq, type))
                return (dapl_convert_errno(errno, "notify_cq_type"));
        else
                return DAT_SUCCESS;
@@ -462,7 +488,7 @@ DAT_RETURN dapls_ib_completion_poll(IN DAPL_HCA * hca_ptr,
 {
        int ret;
 
-       ret = ibv_poll_cq(evd_ptr->ib_cq_handle, 1, wc_ptr);
+       ret = ibv_poll_cq(evd_ptr->ib_cq_handle->ib_cq, 1, wc_ptr);
        if (ret == 1)
                return DAT_SUCCESS;
 
index ba805d0b4bd2e4c65de50fad97017d259c4f76ea..f52ec8cea3f2ccc2fef901428f91514dabaf1d0a 100644 (file)
 #endif /*__cplusplus */
 
 /* Typedefs to map common DAPL provider types to IB verbs */
-typedef        struct ibv_qp           *ib_qp_handle_t;
-typedef        struct ibv_cq           *ib_cq_handle_t;
+struct dcm_ib_qp {
+       uint64_t                qp_ctx;  /* local  */
+       uint64_t                sqp_ctx; /* shadow */
+       struct _ib_hca_transport *tp;
+       struct dapl_ep          *ep;
+       struct ibv_qp           *qp;    /* local */
+       struct ibv_qp           *sqp;   /* shadow */
+       uint32_t                qp_id;  /* local  */
+       uint32_t                sqp_id; /* shadow */
+};
+
+struct dcm_ib_cq {
+       uint64_t                cq_ctx; /* local   */
+       uint64_t                scq_ctx; /* shadow  */
+       struct _ib_hca_transport *tp;
+       struct dapl_evd         *evd;
+       struct ibv_cq           *ib_cq;
+       struct ibv_comp_channel *ib_ch;
+       uint32_t                cq_id; /* local  */
+       uint32_t                scq_id; /* shadow  */
+};
+
+typedef        struct dcm_ib_cq        *ib_cq_handle_t;
+typedef        struct dcm_ib_qp        *ib_qp_handle_t;
 typedef        struct ibv_pd           *ib_pd_handle_t;
 typedef        struct ibv_mr           *ib_mr_handle_t;
 typedef        struct ibv_mw           *ib_mw_handle_t;
@@ -309,15 +331,15 @@ int getipaddr_netdev(char *name, char *addr, int addr_len);
 DAT_RETURN getlocalipaddr(char *addr, int addr_len);
 
 /* qp.c */
-DAT_RETURN dapls_modify_qp_ud(IN DAPL_HCA *hca, IN ib_qp_handle_t qp);
-DAT_RETURN dapls_modify_qp_state(IN ib_qp_handle_t     qp_handle,
+DAT_RETURN dapls_modify_qp_ud(IN DAPL_HCA *hca, IN struct ibv_qp *qp);
+DAT_RETURN dapls_modify_qp_state(IN struct ibv_qp      *qp_handle,
                                 IN ib_qp_state_t       qp_state,
                                 IN uint32_t            qpn,
                                 IN uint16_t            lid,
                                 IN ib_gid_handle_t     gid);
 ib_ah_handle_t dapls_create_ah( IN DAPL_HCA            *hca,
                                IN ib_pd_handle_t       pd,
-                               IN ib_qp_handle_t       qp,
+                               IN struct ibv_qp        *qp,
                                IN uint16_t             lid,
                                IN ib_gid_handle_t      gid);
 
index b93565c64ea09d9f04e19df48900d9187a0b6350..e49c4e4fec51d1ab6f5bd42bb8309cb960199636 100644 (file)
@@ -36,7 +36,7 @@
 STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p);
 
 #define CQE_WR_TYPE_UD(id) \
-       (((DAPL_COOKIE *)(uintptr_t)id)->ep->qp_handle->qp_type == IBV_QPT_UD)
+       (((DAPL_COOKIE *)(uintptr_t)id)->ep->qp_handle->qp->qp_type == IBV_QPT_UD)
 
 /*
  * dapls_ib_post_recv
@@ -78,7 +78,7 @@ dapls_ib_post_recv (
                cookie->val.dto.size = total_len;
        }
 
-       ret = ibv_post_recv(ep_ptr->qp_handle, &wr, &bad_wr);
+       ret = ibv_post_recv(ep_ptr->qp_handle->qp, &wr, &bad_wr);
        
        if (ret)
                return(dapl_convert_errno(errno,"ibv_recv"));
@@ -119,7 +119,7 @@ dapls_ib_post_send (
                     remote_iov, completion_flags);
 
 #ifdef DAT_EXTENSIONS  
-       if (ep_ptr->qp_handle->qp_type != IBV_QPT_RC)
+       if (ep_ptr->qp_handle->qp->qp_type != IBV_QPT_RC)
                return(DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP));
 #endif
        /* setup the work request */
@@ -169,7 +169,7 @@ dapls_ib_post_send (
                     " post_snd: op 0x%x flags 0x%x sglist %p, %d\n", 
                     wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge);
 
-       ret = ibv_post_send(ep_ptr->qp_handle, &wr, &bad_wr);
+       ret = ibv_post_send(ep_ptr->qp_handle->qp, &wr, &bad_wr);
 
        if (ret)
                return(dapl_convert_errno(errno,"ibv_send"));
@@ -340,7 +340,7 @@ dapls_ib_post_ext_send (
                break;
        case OP_SEND_UD:
                /* post must be on EP with service_type of UD */
-               if (ep_ptr->qp_handle->qp_type != IBV_QPT_UD)
+               if (ep_ptr->qp_handle->qp->qp_type != IBV_QPT_UD)
                        return(DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP));
 
                dapl_dbg_log(DAPL_DBG_TYPE_EP, 
@@ -369,7 +369,7 @@ dapls_ib_post_ext_send (
                     " post_snd: op 0x%x flags 0x%x sglist %p, %d\n", 
                     wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge);
 
-       ret = ibv_post_send(ep_ptr->qp_handle, &wr, &bad_wr);
+       ret = ibv_post_send(ep_ptr->qp_handle->qp, &wr, &bad_wr);
 
        if (ret)
                return( dapl_convert_errno(errno,"ibv_send") );
index a8cc56efb7656913ec7dd39cc5ca6b9109cd0ac0..7c561f246d154e79cdbe225e2327140a7f6f2395 100644 (file)
@@ -61,6 +61,7 @@ dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr,
        DAPL_EVD *rcv_evd, *req_evd;
        ib_cq_handle_t rcv_cq, req_cq;
        ib_pd_handle_t ib_pd_handle;
+       int ret = EINVAL;
        struct ibv_qp_init_attr qp_create;
 #ifdef _OPENIB_CMA_
        dp_ib_cm_handle_t conn;
@@ -87,10 +88,16 @@ dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr,
                        return (dapl_convert_errno(ENOMEM, "create_cq_chan"));
                  
                /* Call IB verbs to create CQ */
-               rcv_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
-                                      1, NULL, channel, 0);
+               rcv_cq = dapl_os_alloc(sizeof(struct dcm_ib_cq));
+               if (!rcv_cq)
+                       return (dapl_convert_errno(ENOMEM, " alloc cq"));
 
-               if (rcv_cq == IB_INVALID_HANDLE) {
+               dapl_os_memzero(rcv_cq, sizeof(struct dcm_ib_cq));
+
+               rcv_cq->ib_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
+                                              1, NULL, channel, 0);
+
+               if (!rcv_cq->ib_cq) {
                        ibv_destroy_comp_channel(channel);
                        return (dapl_convert_errno(ENOMEM, "create_cq"));
                }
@@ -107,6 +114,15 @@ dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr,
        else
                req_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;
 
+       /* create QP object */
+       ep_ptr->qp_handle = dapl_os_alloc(sizeof(struct dcm_ib_qp));
+       if (!ep_ptr->qp_handle)
+               return (dapl_convert_errno(ENOMEM, "create_qp"));
+
+       dapl_os_memzero(ep_ptr->qp_handle, sizeof(struct dcm_ib_qp));
+       ep_ptr->qp_handle->tp = &ia_ptr->hca_ptr->ib_trans;
+       ep_ptr->qp_handle->ep = ep_ptr;
+
        /* 
         * IMPLEMENTATION NOTE:
         * uDAPL allows consumers to post buffers on the EP after creation
@@ -122,22 +138,24 @@ dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr,
 
 #ifdef _OPENIB_CMA_
        /* Allocate CM and initialize lock */
-       if ((conn = dapls_ib_cm_create(ep_ptr)) == NULL)
-               return (dapl_convert_errno(ENOMEM, "cm_create"));
-
+       if ((conn = dapls_ib_cm_create(ep_ptr)) == NULL) {
+               ret = ENOMEM;
+               goto err;
+       }
        /* open identifies the local device; per DAT specification */
        if (rdma_bind_addr(conn->cm_id,
                (struct sockaddr *)&ia_ptr->hca_ptr->hca_address)) {
                dapls_cm_free(conn);
-               return (dapl_convert_errno(EAFNOSUPPORT, "rdma_bind_addr"));
+               ret = EAFNOSUPPORT;
+               goto err;
        }
 #endif
        /* Setup attributes and create qp */
        dapl_os_memzero((void *)&qp_create, sizeof(qp_create));
-       qp_create.recv_cq = rcv_cq;
+       qp_create.recv_cq = rcv_cq->ib_cq;
        qp_create.cap.max_recv_wr = rcv_evd ? attr->max_recv_dtos:0;
        qp_create.cap.max_recv_sge = rcv_evd ? attr->max_recv_iov:0;
-       qp_create.send_cq = req_cq;
+       qp_create.send_cq = req_cq->ib_cq;
        qp_create.cap.max_send_wr = req_evd ? attr->max_request_dtos:0;
        qp_create.cap.max_send_sge = req_evd ? attr->max_request_iov:0;
        qp_create.cap.max_inline_data =
@@ -148,12 +166,12 @@ dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr,
 #ifdef DAT_EXTENSIONS 
        if (attr->service_type == DAT_IB_SERVICE_TYPE_UD) {
 #ifdef _OPENIB_CMA_
-               return (DAT_NOT_IMPLEMENTED);
+               goto err;
 #endif
                qp_create.qp_type = IBV_QPT_UD;
                if (attr->max_message_size >
                    (128 << ia_ptr->hca_ptr->ib_trans.mtu)) {
-                       return (DAT_INVALID_PARAMETER | DAT_INVALID_ARG6);
+                       goto err;
                }
        }
 #endif
@@ -161,32 +179,51 @@ dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr,
 #ifdef _OPENIB_CMA_
        if (rdma_create_qp(conn->cm_id, ib_pd_handle, &qp_create)) {
                dapls_cm_free(conn);
-               return (dapl_convert_errno(errno, "rdma_create_qp"));
+               ret = errno;
+               goto err;
        }
-       ep_ptr->qp_handle = conn->cm_id->qp;
+       ep_ptr->qp_handle->qp = conn->cm_id->qp;
        ep_ptr->qp_state = IBV_QPS_INIT;
 
        ep_ptr->param.local_port_qual = rdma_get_src_port(conn->cm_id);
 #else
-       ep_ptr->qp_handle = ibv_create_qp(ib_pd_handle, &qp_create);
-       if (!ep_ptr->qp_handle)
-               return (dapl_convert_errno(ENOMEM, "create_qp"));
+       ep_ptr->qp_handle->qp = ibv_create_qp(ib_pd_handle, &qp_create);
+       if (!ep_ptr->qp_handle->qp) {
+               ret = errno;
+               goto err;
+       }
+
+#ifdef _OPENIB_MCM_
+       /* shadow support, MPXYD */
+       ep_ptr->qp_handle->qp_ctx = (uint64_t)ep_ptr;
+       ep_ptr->qp_handle->qp_id = 0; /* ??? */
+       if (ia_ptr->hca_ptr->ib_trans.scif_ep)
+               dapli_mix_qp_create(ep_ptr->qp_handle, &qp_create);
+#endif
 
        /* Setup QP attributes for INIT state on the way out */
-       if (dapls_modify_qp_state(ep_ptr->qp_handle,
+       if (dapls_modify_qp_state(ep_ptr->qp_handle->qp,
                                  IBV_QPS_INIT, 0, 0, 0) != DAT_SUCCESS) {
-               ibv_destroy_qp(ep_ptr->qp_handle);
-               ep_ptr->qp_handle = IB_INVALID_HANDLE;
-               return DAT_INTERNAL_ERROR;
+               ibv_destroy_qp(ep_ptr->qp_handle->qp);
+               ret = errno;
+               goto err;
        }
 #endif
        dapl_dbg_log(DAPL_DBG_TYPE_EP,
                     " qp_alloc: qpn 0x%x type %d sq %d,%d rq %d,%d\n",
-                    ep_ptr->qp_handle->qp_num, ep_ptr->qp_handle->qp_type,
+                    ep_ptr->qp_handle->qp->qp_num,
+                    ep_ptr->qp_handle->qp->qp_type,
                     qp_create.cap.max_send_wr, qp_create.cap.max_send_sge,
                     qp_create.cap.max_recv_wr, qp_create.cap.max_recv_sge);
 
        return DAT_SUCCESS;
+err:
+       if (ep_ptr->qp_handle)
+               dapl_os_free(ep_ptr->qp_handle, sizeof(struct dcm_ib_qp));
+
+       ep_ptr->qp_handle = IB_INVALID_HANDLE;
+
+       return (dapl_convert_errno(ret, "create_qp"));
 }
 
 /*
@@ -219,14 +256,13 @@ DAT_RETURN dapls_ib_qp_free(IN DAPL_IA * ia_ptr, IN DAPL_EP * ep_ptr)
 
        dapl_os_lock(&ep_ptr->header.lock);
        if (ep_ptr->qp_handle != NULL) {
-               qp = ep_ptr->qp_handle;
+               qp = ep_ptr->qp_handle->qp;
                dapl_os_unlock(&ep_ptr->header.lock);
 
                qp_attr.qp_state = IBV_QPS_ERR;
                ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE);
                dapls_ep_flush_cqs(ep_ptr);
 
-               ep_ptr->qp_handle = NULL;
 #ifdef _OPENIB_CMA_
                rdma_destroy_qp(cm_ptr->cm_id);
                cm_ptr->cm_id->qp = NULL;
@@ -237,9 +273,24 @@ DAT_RETURN dapls_ib_qp_free(IN DAPL_IA * ia_ptr, IN DAPL_EP * ep_ptr)
                                 strerror(errno));
                }
 #endif
+
+#ifdef _OPENIB_MCM_
+               /* shadow support, MPXYD */
+               if (ia_ptr->hca_ptr->ib_trans.scif_ep)
+                       dapli_mix_qp_free(ep_ptr->qp_handle);
+
+               /* TODO: flush shadow CQ on MPXYD */
+
+#endif
+               ep_ptr->qp_handle = NULL;
+
        } else {
                dapl_os_unlock(&ep_ptr->header.lock);
        }
+
+
+
+       dapl_os_free(ep_ptr->qp_handle, sizeof(struct dcm_ib_qp));
        return DAT_SUCCESS;
 }
 
@@ -282,22 +333,22 @@ dapls_ib_qp_modify(IN DAPL_IA * ia_ptr,
 
        /* move to error state if necessary */
        if ((ep_ptr->qp_state == IB_QP_STATE_ERROR) &&
-           (ep_ptr->qp_handle->state != IBV_QPS_ERR)) {
-               return (dapls_modify_qp_state(ep_ptr->qp_handle
+           (ep_ptr->qp_handle->qp->state != IBV_QPS_ERR)) {
+               return (dapls_modify_qp_state(ep_ptr->qp_handle->qp,
                                              IBV_QPS_ERR, 0, 0, 0));
        }
 
        /* consumer ep_modify, init state */
-       if (ep_ptr->qp_handle->state == IBV_QPS_INIT) {
-               return (dapls_modify_qp_state(ep_ptr->qp_handle,
+       if (ep_ptr->qp_handle->qp->state == IBV_QPS_INIT) {
+               return (dapls_modify_qp_state(ep_ptr->qp_handle->qp,
                                              IBV_QPS_INIT, 0, 0, 0));
        }
 
        /*
         * Check if we have the right qp_state to modify attributes
         */
-       if ((ep_ptr->qp_handle->state != IBV_QPS_RTR) &&
-           (ep_ptr->qp_handle->state != IBV_QPS_RTS))
+       if ((ep_ptr->qp_handle->qp->state != IBV_QPS_RTR) &&
+           (ep_ptr->qp_handle->qp->state != IBV_QPS_RTS))
                return DAT_INVALID_STATE;
 
        /* Adjust to current EP attributes */
@@ -308,15 +359,15 @@ dapls_ib_qp_modify(IN DAPL_IA * ia_ptr,
        qp_attr.cap.max_recv_sge = attr->max_recv_iov;
 
        dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                    "modify_qp: qp %p sq %d,%d, rq %d,%d\n",
+                    "modify_qp: qp_h %p sq %d,%d, rq %d,%d\n",
                     ep_ptr->qp_handle,
                     qp_attr.cap.max_send_wr, qp_attr.cap.max_send_sge,
                     qp_attr.cap.max_recv_wr, qp_attr.cap.max_recv_sge);
 
-       if (ibv_modify_qp(ep_ptr->qp_handle, &qp_attr, IBV_QP_CAP)) {
+       if (ibv_modify_qp(ep_ptr->qp_handle->qp, &qp_attr, IBV_QP_CAP)) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                            "modify_qp: modify ep %p qp %p failed\n",
-                            ep_ptr, ep_ptr->qp_handle);
+                            "modify_qp: modify ep %p qp_h %p failed\n",
+                            ep_ptr, ep_ptr->qp_handle->qp);
                return (dapl_convert_errno(errno, "modify_qp_state"));
        }
 
@@ -364,10 +415,10 @@ void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr)
 void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr)
 {
        if (ep_ptr->qp_handle != IB_INVALID_HANDLE &&
-           ep_ptr->qp_handle->qp_type != IBV_QPT_UD) {
+           ep_ptr->qp_handle->qp->qp_type != IBV_QPT_UD) {
                /* move to RESET state and then to INIT */
-               dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_RESET,0,0,0);
-               dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_INIT,0,0,0);
+               dapls_modify_qp_state(ep_ptr->qp_handle->qp, IBV_QPS_RESET,0,0,0);
+               dapls_modify_qp_state(ep_ptr->qp_handle->qp, IBV_QPS_INIT,0,0,0);
        }
 }
 #endif                         // _WIN32 || _WIN64
@@ -422,7 +473,7 @@ uint8_t dapls_get_sl(DAPL_HCA *hca_ptr, uint16_t dlid)
  * CM msg provides QP attributes, info in network order
  */
 DAT_RETURN
-dapls_modify_qp_state(IN ib_qp_handle_t                qp_handle,
+dapls_modify_qp_state(IN struct ibv_qp         *qp_handle,
                      IN ib_qp_state_t          qp_state, 
                      IN uint32_t               qpn,
                      IN uint16_t               lid,
@@ -442,7 +493,7 @@ dapls_modify_qp_state(IN ib_qp_handle_t             qp_handle,
                dapl_dbg_log(DAPL_DBG_TYPE_EP,
                                " QPS_RTR: type %d qpn 0x%x gid %p (%d) lid 0x%x"
                                " port %d ep %p qp_state %d rd_atomic %d\n",
-                               qp_handle->qp_type, ntohl(qpn), gid, 
+                               qp_handle->qp_type, ntohl(qpn), gid,
                                ia_ptr->hca_ptr->ib_trans.global,
                                ntohs(lid), ia_ptr->hca_ptr->port_num,
                                ep_ptr, ep_ptr->qp_state,
@@ -580,7 +631,7 @@ dapls_modify_qp_state(IN ib_qp_handle_t             qp_handle,
 
 /* Modify UD type QP from init, rtr, rts, info network order */
 DAT_RETURN 
-dapls_modify_qp_ud(IN DAPL_HCA *hca, IN ib_qp_handle_t qp)
+dapls_modify_qp_ud(IN DAPL_HCA *hca, IN struct ibv_qp *qp)
 {
        struct ibv_qp_attr qp_attr;
 
@@ -590,7 +641,7 @@ dapls_modify_qp_ud(IN DAPL_HCA *hca, IN ib_qp_handle_t qp)
         qp_attr.pkey_index = hca->ib_trans.pkey_idx;
         qp_attr.port_num = hca->port_num;
         qp_attr.qkey = DAT_UD_QKEY;
-       if (ibv_modify_qp(qp, &qp_attr, 
+       if (ibv_modify_qp(qp, &qp_attr,
                          IBV_QP_STATE          |
                          IBV_QP_PKEY_INDEX     |
                           IBV_QP_PORT          |
@@ -609,7 +660,7 @@ dapls_modify_qp_ud(IN DAPL_HCA *hca, IN ib_qp_handle_t qp)
        dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
        qp_attr.qp_state = IBV_QPS_RTS;
        qp_attr.sq_psn = 1;
-       if (ibv_modify_qp(qp, &qp_attr, 
+       if (ibv_modify_qp(qp, &qp_attr,
                          IBV_QP_STATE | IBV_QP_SQ_PSN)) {
                dapl_log(DAPL_DBG_TYPE_ERR,
                        " modify_ud_qp RTS: ERR %s\n", strerror(errno));
@@ -622,7 +673,7 @@ dapls_modify_qp_ud(IN DAPL_HCA *hca, IN ib_qp_handle_t qp)
 ib_ah_handle_t 
 dapls_create_ah(IN DAPL_HCA            *hca,
                IN ib_pd_handle_t       pd,
-               IN ib_qp_handle_t       qp,
+               IN struct ibv_qp        *qp,
                IN uint16_t             lid,
                IN ib_gid_handle_t      gid)
 {
index 33629b8f72bce14076a0d1abd263cf840f28ecac..1cda009ee0d94e6b45789e480df3404f3c8a20a3 100644 (file)
@@ -544,7 +544,7 @@ void dapli_async_event_cb(struct _ib_hca_transport *hca)
                switch (event.event_type) {
                case IBV_EVENT_CQ_ERR:
                {
-                       struct dapl_ep *evd_ptr =
+                       struct dapl_evd *evd_ptr =
                                event.element.cq->cq_context;
 
                        dapl_log(DAPL_DBG_TYPE_ERR,
@@ -554,7 +554,7 @@ void dapli_async_event_cb(struct _ib_hca_transport *hca)
                        /* report up if async callback still setup */
                        if (hca->async_cq_error)
                                hca->async_cq_error(hca->ib_ctx,
-                                                   event.element.cq,
+                                                   evd_ptr->ib_cq_handle,
                                                    &event,
                                                    (void *)evd_ptr);
                        break;
index 4e6c527a776c14ab41192dc531abcbb1a090308d..40b2dae1a9fc8be73273003c11afb574dd001e72 100644 (file)
 #include "dapl_ep_util.h"
 #include "dapl_osd.h"
 
-
-#if defined(_WIN32)
-#include <rdma\winverbs.h>
-#else                          // _WIN32
 enum DAPL_FD_EVENTS {
        DAPL_FD_READ = POLLIN,
        DAPL_FD_WRITE = POLLOUT,
@@ -62,6 +58,9 @@ static void dapl_fd_zero(struct dapl_fd_set *set)
 static int dapl_fd_set(DAPL_SOCKET s, struct dapl_fd_set *set,
                       enum DAPL_FD_EVENTS event)
 {
+       if (!s)
+               return 0;
+
        if (set->index == DAPL_FD_SETSIZE - 1) {
                dapl_log(DAPL_DBG_TYPE_ERR,
                         "SCM ERR: cm_thread exceeded FD_SETSIZE %d\n",
@@ -80,6 +79,9 @@ static enum DAPL_FD_EVENTS dapl_poll(DAPL_SOCKET s, enum DAPL_FD_EVENTS event)
        struct pollfd fds;
        int ret;
 
+       if (!s)
+               return 0;
+
        fds.fd = s;
        fds.events = event;
        fds.revents = 0;
@@ -104,20 +106,19 @@ static int dapl_select(struct dapl_fd_set *set, int time_ms)
        dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: wakeup, ret=0x%x\n", ret);
        return ret;
 }
-#endif
 
 /* forward declarations */
-static int ucm_reply(dp_ib_cm_handle_t cm);
-static void ucm_accept(ib_cm_srvc_handle_t cm, ib_cm_msg_t *msg);
-static void ucm_connect_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg);
-static void ucm_accept_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg);
-static int ucm_send(ib_hca_transport_t *tp, ib_cm_msg_t *msg, DAT_PVOID p_data, DAT_COUNT p_size);
-static void ucm_disconnect_final(dp_ib_cm_handle_t cm);
+static int mcm_reply(dp_ib_cm_handle_t cm);
+static void mcm_accept(ib_cm_srvc_handle_t cm, dat_mcm_msg_t *msg);
+static void mcm_connect_rtu(dp_ib_cm_handle_t cm, dat_mcm_msg_t *msg);
+static void mcm_accept_rtu(dp_ib_cm_handle_t cm, dat_mcm_msg_t *msg);
+static int mcm_send(ib_hca_transport_t *tp, dat_mcm_msg_t *msg, DAT_PVOID p_data, DAT_COUNT p_size);
+static void mcm_disconnect_final(dp_ib_cm_handle_t cm);
 DAT_RETURN dapli_cm_disconnect(dp_ib_cm_handle_t cm);
 DAT_RETURN dapli_cm_connect(DAPL_EP *ep, dp_ib_cm_handle_t cm);
 
 /* Service ids - port space */
-static uint16_t ucm_get_port(ib_hca_transport_t *tp, uint16_t port)
+static uint16_t mcm_get_port(ib_hca_transport_t *tp, uint16_t port)
 {
        int i = 0;
        
@@ -143,14 +144,14 @@ done:
        return i;
 }
 
-static void ucm_free_port(ib_hca_transport_t *tp, uint16_t port)
+static void mcm_free_port(ib_hca_transport_t *tp, uint16_t port)
 {
        dapl_os_lock(&tp->plock);
        tp->sid[port] = 0;
        dapl_os_unlock(&tp->plock);
 }
 
-static void ucm_check_timers(dp_ib_cm_handle_t cm, int *timer)
+static void mcm_check_timers(dp_ib_cm_handle_t cm, int *timer)
 {
        DAPL_OS_TIMEVAL time;
 
@@ -166,10 +167,10 @@ static void ucm_check_timers(dp_ib_cm_handle_t cm, int *timer)
                                 " CM_REQ retry %p %d [lid, port, cqp, iqp]:"
                                 " %x %x %x %x -> %x %x %x %x Time(ms) %d > %d\n",
                                 cm, cm->retries+1,
-                                ntohs(cm->msg.saddr.ib.lid), ntohs(cm->msg.sport),
-                                ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr.ib.qpn),
-                                ntohs(cm->msg.daddr.ib.lid), ntohs(cm->msg.dport),
-                                ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr.ib.qpn),
+                                ntohs(cm->msg.saddr.lid), ntohs(cm->msg.sport),
+                                ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr.qpn),
+                                ntohs(cm->msg.daddr.lid), ntohs(cm->msg.dport),
+                                ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr.qpn),
                                 (time - cm->timer)/1000,
                                 cm->hca->ib_trans.rep_time << cm->retries);
                        cm->retries++;
@@ -188,17 +189,17 @@ static void ucm_check_timers(dp_ib_cm_handle_t cm, int *timer)
                                 " %x %x %x %x -> %x %x %x %x r_pid %x Time(ms) %d > %d\n",
                                 cm->retries+1,
                                 dapl_cm_op_str(ntohs(cm->msg.op)),
-                                ntohs(cm->msg.saddr.ib.lid), ntohs(cm->msg.sport),
-                                ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr.ib.qpn),
-                                ntohs(cm->msg.daddr.ib.lid), ntohs(cm->msg.dport),
-                                ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr.ib.qpn),
+                                ntohs(cm->msg.saddr.lid), ntohs(cm->msg.sport),
+                                ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr.qpn),
+                                ntohs(cm->msg.daddr.lid), ntohs(cm->msg.dport),
+                                ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr.qpn),
                                 ntohl(cm->msg.d_id),
                                 (time - cm->timer)/1000, 
                                 cm->hca->ib_trans.rtu_time << cm->retries);
                        cm->retries++;
                        DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_ERR_REP_RETRY);
                        dapl_os_unlock(&cm->lock);
-                       ucm_reply(cm);
+                       mcm_reply(cm);
                        return;
                }
                break;
@@ -211,10 +212,10 @@ static void ucm_check_timers(dp_ib_cm_handle_t cm, int *timer)
                                 " CM_DREQ retry %d [lid, port, cqp, iqp]:"
                                 " %x %x %x %x -> %x %x %x %x r_pid %x Time(ms) %d > %d\n",
                                 cm->retries+1,
-                                ntohs(cm->msg.saddr.ib.lid), ntohs(cm->msg.sport),
-                                ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr.ib.qpn),
-                                ntohs(cm->msg.daddr.ib.lid), ntohs(cm->msg.dport),
-                                ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr.ib.qpn),
+                                ntohs(cm->msg.saddr.lid), ntohs(cm->msg.sport),
+                                ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr.qpn),
+                                ntohs(cm->msg.daddr.lid), ntohs(cm->msg.dport),
+                                ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr.qpn),
                                 ntohl(cm->msg.d_id),
                                 (time - cm->timer)/1000, 
                                 cm->hca->ib_trans.rtu_time << cm->retries);
@@ -234,9 +235,9 @@ static void ucm_check_timers(dp_ib_cm_handle_t cm, int *timer)
 /* SEND CM MESSAGE PROCESSING */
 
 /* Get CM UD message from send queue, called with s_lock held */
-static ib_cm_msg_t *ucm_get_smsg(ib_hca_transport_t *tp)
+static dat_mcm_msg_t *mcm_get_smsg(ib_hca_transport_t *tp)
 {
-       ib_cm_msg_t *msg = NULL; 
+       dat_mcm_msg_t *msg = NULL;
        int ret, polled = 1, hd = tp->s_hd;
 
        hd++;
@@ -248,7 +249,7 @@ retry:
                msg = NULL;
                if (polled % 1000000 == 0)
                        dapl_log(DAPL_DBG_TYPE_WARN,
-                                " ucm_get_smsg: FULLq hd %d == tl %d,"
+                                " mcm_get_smsg: FULLq hd %d == tl %d,"
                                 " completions stalled, polls=%d\n",
                                 hd, tp->s_tl, polled);
        }
@@ -261,7 +262,7 @@ retry:
        if (msg == NULL) {
                struct ibv_wc wc;
 
-               /* process completions, based on UCM_TX_BURST */
+               /* process completions, based on mcm_TX_BURST */
                ret = ibv_poll_cq(tp->scq, 1, &wc);
                if (ret < 0) {
                        dapl_log(DAPL_DBG_TYPE_WARN,
@@ -283,7 +284,7 @@ retry:
 
 /* RECEIVE CM MESSAGE PROCESSING */
 
-static int ucm_post_rmsg(ib_hca_transport_t *tp, ib_cm_msg_t *msg)
+static int mcm_post_rmsg(ib_hca_transport_t *tp, dat_mcm_msg_t *msg)
 {      
        struct ibv_recv_wr recv_wr, *recv_err;
        struct ibv_sge sge;
@@ -292,16 +293,16 @@ static int ucm_post_rmsg(ib_hca_transport_t *tp, ib_cm_msg_t *msg)
        recv_wr.sg_list = &sge;
        recv_wr.num_sge = 1;
        recv_wr.wr_id = (uint64_t)(uintptr_t) msg;
-       sge.length = sizeof(ib_cm_msg_t) + sizeof(struct ibv_grh);
+       sge.length = sizeof(dat_mcm_msg_t) + sizeof(struct ibv_grh);
        sge.lkey = tp->mr_rbuf->lkey;
        sge.addr = (uintptr_t)((char *)msg - sizeof(struct ibv_grh));
        
        return (ibv_post_recv(tp->qp, &recv_wr, &recv_err));
 }
 
-static int ucm_reject(ib_hca_transport_t *tp, ib_cm_msg_t *msg)
+static int mcm_reject(ib_hca_transport_t *tp, dat_mcm_msg_t *msg)
 {
-       ib_cm_msg_t     smsg;
+       dat_mcm_msg_t   smsg;
 
        /* setup op, rearrange the src, dst cm and addr info */
        (void)dapl_os_memzero(&smsg, sizeof(smsg));
@@ -312,42 +313,42 @@ static int ucm_reject(ib_hca_transport_t *tp, ib_cm_msg_t *msg)
        smsg.sport = msg->dport; 
        smsg.sqpn = msg->dqpn;
 
-       dapl_os_memcpy(&smsg.daddr, &msg->saddr, sizeof(union dcm_addr));
+       dapl_os_memcpy(&smsg.daddr, &msg->saddr, sizeof(dat_mcm_addr_t));
        
        /* no dst_addr IB info in REQ, init lid, gid, get type from saddr */
-       smsg.saddr.ib.lid = tp->addr.ib.lid; 
-       smsg.saddr.ib.qp_type = msg->saddr.ib.qp_type;
-       dapl_os_memcpy(&smsg.saddr.ib.gid[0],
-                      &tp->addr.ib.gid, 16); 
+       smsg.saddr.lid = tp->addr.lid;
+       smsg.saddr.qp_type = msg->saddr.qp_type;
+       dapl_os_memcpy(&smsg.saddr.gid[0],
+                      &tp->addr.gid, 16);
 
-       dapl_os_memcpy(&smsg.saddr, &msg->daddr, sizeof(union dcm_addr));
+       dapl_os_memcpy(&smsg.saddr, &msg->daddr, sizeof(dat_mcm_addr_t));
 
        dapl_dbg_log(DAPL_DBG_TYPE_CM, 
                     " CM reject -> LID %x, QPN %x PORT %x\n", 
-                    ntohs(smsg.daddr.ib.lid),
+                    ntohs(smsg.daddr.lid),
                     ntohl(smsg.dqpn), ntohs(smsg.dport));
 
        DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&tp->hca->ia_list_head)), DCNT_IA_CM_ERR_REJ_TX);
-       return (ucm_send(tp, &smsg, NULL, 0));
+       return (mcm_send(tp, &smsg, NULL, 0));
 }
 
-static void ucm_process_recv(ib_hca_transport_t *tp, 
-                            ib_cm_msg_t *msg, 
+static void mcm_process_recv(ib_hca_transport_t *tp,
+                            dat_mcm_msg_t *msg,
                             dp_ib_cm_handle_t cm)
 {
        dapl_os_lock(&cm->lock);
        switch (cm->state) {
        case DCM_LISTEN: /* passive */
                dapl_os_unlock(&cm->lock);
-               ucm_accept(cm, msg);
+               mcm_accept(cm, msg);
                break;
        case DCM_RTU_PENDING: /* passive */
                dapl_os_unlock(&cm->lock);
-               ucm_accept_rtu(cm, msg);
+               mcm_accept_rtu(cm, msg);
                break;
        case DCM_REP_PENDING: /* active */
                dapl_os_unlock(&cm->lock);
-               ucm_connect_rtu(cm, msg);
+               mcm_connect_rtu(cm, msg);
                break;
        case DCM_CONNECTED: /* active and passive */
                /* DREQ, change state and process */
@@ -365,14 +366,14 @@ static void ucm_process_recv(ib_hca_transport_t *tp,
                                 " %x %x %x %x -> %x %x %x %x r_pid %x\n",
                                  dapl_cm_op_str(ntohs(cm->msg.op)),
                                  dapl_cm_state_str(cm->state),
-                                ntohs(cm->msg.saddr.ib.lid), ntohs(cm->msg.sport),
-                                ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr.ib.qpn),
-                                ntohs(cm->msg.daddr.ib.lid), ntohs(cm->msg.dport),
-                                ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr.ib.qpn),
+                                ntohs(cm->msg.saddr.lid), ntohs(cm->msg.sport),
+                                ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr.qpn),
+                                ntohs(cm->msg.daddr.lid), ntohs(cm->msg.dport),
+                                ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr.qpn),
                                 ntohl(cm->msg.d_id));
 
                        cm->msg.op = htons(DCM_RTU);
-                       ucm_send(&cm->hca->ib_trans, &cm->msg, NULL, 0);
+                       mcm_send(&cm->hca->ib_trans, &cm->msg, NULL, 0);
 
                        DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_ERR_RTU_RETRY);
                }
@@ -381,7 +382,7 @@ static void ucm_process_recv(ib_hca_transport_t *tp,
        case DCM_DISC_PENDING: /* active and passive */
                /* DREQ or DREP, finalize */
                dapl_os_unlock(&cm->lock);
-               ucm_disconnect_final(cm);
+               mcm_disconnect_final(cm);
                break;
        case DCM_DISCONNECTED:
        case DCM_FREE:
@@ -392,21 +393,21 @@ static void ucm_process_recv(ib_hca_transport_t *tp,
                                " %x %x %x -> %x %x %x\n", 
                                dapl_cm_op_str(ntohs(msg->op)), 
                                dapl_cm_state_str(cm->state),
-                               ntohs(msg->saddr.ib.lid), 
+                               ntohs(msg->saddr.lid),
                                ntohs(msg->sport),
-                               ntohl(msg->saddr.ib.qpn), 
-                               ntohs(msg->daddr.ib.lid), 
+                               ntohl(msg->saddr.qpn),
+                               ntohs(msg->daddr.lid),
                                ntohs(msg->dport),
-                               ntohl(msg->daddr.ib.qpn));  
+                               ntohl(msg->daddr.qpn));
                        cm->msg.op = htons(DCM_DREP);
-                       ucm_send(&cm->hca->ib_trans, &cm->msg, NULL, 0); 
+                       mcm_send(&cm->hca->ib_trans, &cm->msg, NULL, 0);
                        
                        DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_ERR_DREP_RETRY);
 
                } else if (ntohs(msg->op) != DCM_DREP){
                        /* DREP ok to ignore, any other print warning */
                        dapl_log(DAPL_DBG_TYPE_WARN,
-                               " ucm_recv: UNEXPECTED MSG on cm %p"
+                               " mcm_recv: UNEXPECTED MSG on cm %p"
                                " <- op %s, st %s spsp %x sqpn %x\n", 
                                cm, dapl_cm_op_str(ntohs(msg->op)),
                                dapl_cm_state_str(cm->state),
@@ -423,19 +424,19 @@ static void ucm_process_recv(ib_hca_transport_t *tp,
                }
        default:
                dapl_log(DAPL_DBG_TYPE_WARN,
-                       " ucm_recv: Warning, UNKNOWN state"
+                       " mcm_recv: Warning, UNKNOWN state"
                        " <- op %s, %s spsp %x sqpn %x slid %x\n",
                        dapl_cm_op_str(ntohs(msg->op)),
                        dapl_cm_state_str(cm->state),
                        ntohs(msg->sport), ntohl(msg->sqpn),
-                       ntohs(msg->saddr.ib.lid));
+                       ntohs(msg->saddr.lid));
                dapl_os_unlock(&cm->lock);
                break;
        }
 }
 
 /* Find matching CM object for this receive message, return CM reference, timer */
-dp_ib_cm_handle_t ucm_cm_find(ib_hca_transport_t *tp, ib_cm_msg_t *msg)
+dp_ib_cm_handle_t mcm_cm_find(ib_hca_transport_t *tp, dat_mcm_msg_t *msg)
 {
        dp_ib_cm_handle_t cm = NULL, next, found = NULL;
        struct dapl_llist_entry **list;
@@ -471,7 +472,7 @@ retry_listenq:
                if (!listenq && 
                    cm->msg.sport == msg->dport && cm->msg.sqpn == msg->dqpn && 
                    cm->msg.dport == msg->sport && cm->msg.dqpn == msg->sqpn && 
-                   cm->msg.daddr.ib.lid == msg->saddr.ib.lid) {
+                   cm->msg.daddr.lid == msg->saddr.lid) {
                        if (ntohs(msg->op) != DCM_REQ) {
                                found = cm;
                                break; 
@@ -486,14 +487,14 @@ retry_listenq:
                                         cm, dapl_cm_op_str(ntohs(msg->op)),
                                         dapl_cm_op_str(ntohs(cm->msg.op)),
                                         dapl_cm_state_str(cm->state),
-                                        ntohs(cm->msg.daddr.ib.lid), ntohs(cm->msg.dport),
-                                        ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr.ib.qpn),
-                                        ntohs(msg->saddr.ib.lid), ntohs(msg->sport),
-                                        ntohl(msg->sqpn), ntohl(msg->saddr.ib.qpn),
-                                        ntohs(msg->daddr.ib.lid), ntohs(msg->dport),
-                                        ntohl(msg->dqpn), ntohl(msg->daddr.ib.qpn),
-                                        ntohs(cm->msg.saddr.ib.lid), ntohs(cm->msg.sport),
-                                        ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr.ib.qpn));
+                                        ntohs(cm->msg.daddr.lid), ntohs(cm->msg.dport),
+                                        ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr.qpn),
+                                        ntohs(msg->saddr.lid), ntohs(msg->sport),
+                                        ntohl(msg->sqpn), ntohl(msg->saddr.qpn),
+                                        ntohs(msg->daddr.lid), ntohs(msg->dport),
+                                        ntohl(msg->dqpn), ntohl(msg->daddr.qpn),
+                                        ntohs(cm->msg.saddr.lid), ntohs(cm->msg.sport),
+                                        ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr.qpn));
 
                                        DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_ERR_REQ_DUP);
 
@@ -514,15 +515,15 @@ retry_listenq:
        /* not match on listenq for valid request, send reject */
        if (ntohs(msg->op) == DCM_REQ && !found) {
                dapl_log(DAPL_DBG_TYPE_WARN,
-                       " ucm_recv: NO LISTENER for %s %x %x i%x c%x"
+                       " mcm_recv: NO LISTENER for %s %x %x i%x c%x"
                        " < %x %x %x, sending reject\n", 
                        dapl_cm_op_str(ntohs(msg->op)), 
-                       ntohs(msg->daddr.ib.lid), ntohs(msg->dport), 
-                       ntohl(msg->daddr.ib.qpn), ntohl(msg->sqpn),
-                       ntohs(msg->saddr.ib.lid), ntohs(msg->sport), 
-                       ntohl(msg->saddr.ib.qpn));
+                       ntohs(msg->daddr.lid), ntohs(msg->dport),
+                       ntohl(msg->daddr.qpn), ntohl(msg->sqpn),
+                       ntohs(msg->saddr.lid), ntohs(msg->sport),
+                       ntohl(msg->saddr.qpn));
 
-               ucm_reject(tp, msg);
+               mcm_reject(tp, msg);
        }
 
        if (!found) {
@@ -530,11 +531,11 @@ retry_listenq:
                         " NO MATCH: op %s [lid, port, cqp, iqp, pid]:"
                         " %x %x %x %x %x <- %x %x %x %x l_pid %x r_pid %x\n",
                         dapl_cm_op_str(ntohs(msg->op)),
-                        ntohs(msg->daddr.ib.lid), ntohs(msg->dport),
-                        ntohl(msg->dqpn), ntohl(msg->daddr.ib.qpn),
-                        ntohl(msg->d_id), ntohs(msg->saddr.ib.lid),
+                        ntohs(msg->daddr.lid), ntohs(msg->dport),
+                        ntohl(msg->dqpn), ntohl(msg->daddr.qpn),
+                        ntohl(msg->d_id), ntohs(msg->saddr.lid),
                         ntohs(msg->sport), ntohl(msg->sqpn),
-                        ntohl(msg->saddr.ib.qpn), ntohl(msg->s_id),
+                        ntohl(msg->saddr.qpn), ntohl(msg->s_id),
                         ntohl(msg->d_id));
 
                if (ntohs(msg->op) == DCM_DREP) {
@@ -546,10 +547,10 @@ retry_listenq:
 }
 
 /* Get rmsgs from CM completion queue, 10 at a time */
-static void ucm_recv(ib_hca_transport_t *tp)
+static void mcm_recv(ib_hca_transport_t *tp)
 {
        struct ibv_wc wc[10];
-       ib_cm_msg_t *msg;
+       dat_mcm_msg_t *msg;
        dp_ib_cm_handle_t cm;
        int i, ret, notify = 0;
        struct ibv_cq *ibv_cq = NULL;
@@ -573,10 +574,10 @@ retry:
                notify = 0;
        
        for (i = 0; i < ret; i++) {
-               msg = (ib_cm_msg_t*) (uintptr_t) wc[i].wr_id;
+               msg = (dat_mcm_msg_t*) (uintptr_t) wc[i].wr_id;
 
                dapl_dbg_log(DAPL_DBG_TYPE_CM, 
-                            " ucm_recv: stat=%d op=%s ln=%d id=%p sqp=%x\n",
+                            " mcm_recv: stat=%d op=%s ln=%d id=%p sqp=%x\n",
                             wc[i].status, dapl_cm_op_str(ntohs(msg->op)),
                             wc[i].byte_len,
                             (void*)wc[i].wr_id, wc[i].src_qp);
@@ -584,19 +585,19 @@ retry:
                /* validate CM message, version */
                if (ntohs(msg->ver) < DCM_VER_MIN) {
                        dapl_log(DAPL_DBG_TYPE_WARN,
-                                " ucm_recv: UNKNOWN msg %p, ver %d\n", 
+                                " mcm_recv: UNKNOWN msg %p, ver %d\n",
                                 msg, msg->ver);
-                       ucm_post_rmsg(tp, msg);
+                       mcm_post_rmsg(tp, msg);
                        continue;
                }
-               if (!(cm = ucm_cm_find(tp, msg))) {
-                       ucm_post_rmsg(tp, msg);
+               if (!(cm = mcm_cm_find(tp, msg))) {
+                       mcm_post_rmsg(tp, msg);
                        continue;
                }
                
                /* match, process it */
-               ucm_process_recv(tp, msg, cm);
-               ucm_post_rmsg(tp, msg);
+               mcm_process_recv(tp, msg, cm);
+               mcm_post_rmsg(tp, msg);
        }
        
        /* finished this batch of WC's, poll and rearm */
@@ -604,19 +605,19 @@ retry:
 }
 
 /* ACTIVE/PASSIVE: build and send CM message out of CM object */
-static int ucm_send(ib_hca_transport_t *tp, ib_cm_msg_t *msg, DAT_PVOID p_data, DAT_COUNT p_size)
+static int mcm_send(ib_hca_transport_t *tp, dat_mcm_msg_t *msg, DAT_PVOID p_data, DAT_COUNT p_size)
 {
-       ib_cm_msg_t *smsg = NULL;
+       dat_mcm_msg_t *smsg = NULL;
        struct ibv_send_wr wr, *bad_wr;
        struct ibv_sge sge;
        int len, ret = -1;
-       uint16_t dlid = ntohs(msg->daddr.ib.lid);
+       uint16_t dlid = ntohs(msg->daddr.lid);
 
        /* Get message from send queue, copy data, and send */
        dapl_os_lock(&tp->slock);
-       if ((smsg = ucm_get_smsg(tp)) == NULL) {
+       if ((smsg = mcm_get_smsg(tp)) == NULL) {
                dapl_log(DAPL_DBG_TYPE_ERR,
-                       " ucm_send ERR: get_smsg(hd=%d,tl=%d) \n",
+                       " mcm_send ERR: get_smsg(hd=%d,tl=%d) \n",
                        tp->s_hd, tp->s_tl);
                goto bail;
        }
@@ -642,9 +643,9 @@ static int ucm_send(ib_hca_transport_t *tp, ib_cm_msg_t *msg, DAT_PVOID p_data,
         sge.addr = (uintptr_t)smsg;
 
        dapl_dbg_log(DAPL_DBG_TYPE_CM, 
-               " ucm_send: op %s ln %d lid %x c_qpn %x rport %x\n",
+               " mcm_send: op %s ln %d lid %x c_qpn %x rport %x\n",
                dapl_cm_op_str(ntohs(smsg->op)), 
-               sge.length, htons(smsg->daddr.ib.lid), 
+               sge.length, htons(smsg->daddr.lid),
                htonl(smsg->dqpn), htons(smsg->dport));
 
        /* empty slot, then create AH */
@@ -663,7 +664,7 @@ static int ucm_send(ib_hca_transport_t *tp, ib_cm_msg_t *msg, DAT_PVOID p_data,
        ret = ibv_post_send(tp->qp, &wr, &bad_wr);
        if (ret) {
                dapl_log(DAPL_DBG_TYPE_ERR,
-                        " ucm_send ERR: post_send() %s\n",
+                        " mcm_send ERR: post_send() %s\n",
                         strerror(errno) );
        }
 
@@ -701,7 +702,7 @@ void dapls_cm_release(dp_ib_cm_handle_t cm)
        }
        /* client, release local conn id port */
        if (!cm->sp && cm->msg.sport)
-               ucm_free_port(&cm->hca->ib_trans, ntohs(cm->msg.sport));
+               mcm_free_port(&cm->hca->ib_trans, ntohs(cm->msg.sport));
 
        /* clean up any UD address handles */
        if (cm->ah) {
@@ -742,7 +743,7 @@ dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep)
        if (ep) {
                DAPL_HCA *hca = ep->header.owner_ia->hca_ptr;
 
-               cm->msg.sport = htons(ucm_get_port(&hca->ib_trans, 0));
+               cm->msg.sport = htons(mcm_get_port(&hca->ib_trans, 0));
                if (!cm->msg.sport) {
                        dapl_os_wait_object_destroy(&cm->f_event);
                        dapl_os_wait_object_destroy(&cm->d_event);
@@ -756,11 +757,11 @@ dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep)
 
                /* IB info in network order */
                cm->msg.sqpn = htonl(hca->ib_trans.qp->qp_num); /* ucm */
-               cm->msg.saddr.ib.qpn = htonl(ep->qp_handle->qp_num); /* ep */
-               cm->msg.saddr.ib.qp_type = ep->qp_handle->qp_type;
-                cm->msg.saddr.ib.lid = hca->ib_trans.addr.ib.lid; 
-               dapl_os_memcpy(&cm->msg.saddr.ib.gid[0], 
-                              &hca->ib_trans.addr.ib.gid, 16);
+               cm->msg.saddr.qpn = htonl(ep->qp_handle->qp->qp_num); /* ep */
+               cm->msg.saddr.qp_type = ep->qp_handle->qp->qp_type;
+                cm->msg.saddr.lid = hca->ib_trans.addr.lid;
+               dapl_os_memcpy(&cm->msg.saddr.gid[0],
+                              &hca->ib_trans.addr.gid, 16);
         }
        return cm;
 bail:
@@ -855,7 +856,7 @@ static void dapli_cm_dequeue(dp_ib_cm_handle_t cm)
        dapls_cm_release(cm);
 }
 
-static void ucm_disconnect_final(dp_ib_cm_handle_t cm) 
+static void mcm_disconnect_final(dp_ib_cm_handle_t cm)
 {
        /* no EP attachment or not RC, nothing to process */
        if (cm->ep == NULL ||
@@ -893,8 +894,8 @@ DAT_RETURN dapli_cm_disconnect(dp_ib_cm_handle_t cm)
        switch (cm->state) {
        case DCM_CONNECTED:
                /* CONSUMER: move to err state to flush, if not UD */
-               if (cm->ep->qp_handle->qp_type != IBV_QPT_UD) 
-                       dapls_modify_qp_state(cm->ep->qp_handle, IBV_QPS_ERR,0,0,0);
+               if (cm->ep->qp_handle->qp->qp_type != IBV_QPT_UD)
+                       dapls_modify_qp_state(cm->ep->qp_handle->qp, IBV_QPS_ERR,0,0,0);
 
                /* send DREQ, event after DREP or DREQ timeout */
                cm->state = DCM_DISC_PENDING;
@@ -910,10 +911,10 @@ DAT_RETURN dapli_cm_disconnect(dp_ib_cm_handle_t cm)
                        dapl_log(DAPL_DBG_TYPE_ERR, 
                                " CM_DREQ: RETRIES EXHAUSTED:"
                                " %x %x %x -> %x %x %x\n",
-                               htons(cm->msg.saddr.ib.lid), 
-                               htonl(cm->msg.saddr.ib.qpn), 
+                               htons(cm->msg.saddr.lid),
+                               htonl(cm->msg.saddr.qpn),
                                htons(cm->msg.sport), 
-                               htons(cm->msg.daddr.ib.lid), 
+                               htons(cm->msg.daddr.lid),
                                htonl(cm->msg.dqpn), 
                                htons(cm->msg.dport));
                        finalize = 1;
@@ -922,8 +923,8 @@ DAT_RETURN dapli_cm_disconnect(dp_ib_cm_handle_t cm)
                break;
        case DCM_DISC_RECV:
                /* CM_THREAD: move to err state to flush, if not UD */
-               if (cm->ep->qp_handle->qp_type != IBV_QPT_UD) 
-                       dapls_modify_qp_state(cm->ep->qp_handle, IBV_QPS_ERR,0,0,0);
+               if (cm->ep->qp_handle->qp->qp_type != IBV_QPT_UD)
+                       dapls_modify_qp_state(cm->ep->qp_handle->qp, IBV_QPS_ERR,0,0,0);
 
                /* DREQ received, send DREP and schedule event, finalize */
                cm->msg.op = htons(DCM_DREP);
@@ -937,15 +938,15 @@ DAT_RETURN dapli_cm_disconnect(dp_ib_cm_handle_t cm)
                        "  disconnect UNKNOWN state: ep %p cm %p %s %s"
                        "  %x %x %x %s %x %x %x r_id %x l_id %x\n",
                        cm->ep, cm,
-                       cm->msg.saddr.ib.qp_type == IBV_QPT_RC ? "RC" : "UD",
+                       cm->msg.saddr.qp_type == IBV_QPT_RC ? "RC" : "UD",
                        dapl_cm_state_str(cm->state),
-                       ntohs(cm->msg.saddr.ib.lid),
+                       ntohs(cm->msg.saddr.lid),
                        ntohs(cm->msg.sport),
-                       ntohl(cm->msg.saddr.ib.qpn),    
+                       ntohl(cm->msg.saddr.qpn),
                        cm->sp ? "<-" : "->",
-                       ntohs(cm->msg.daddr.ib.lid),
+                       ntohs(cm->msg.daddr.lid),
                        ntohs(cm->msg.dport),
-                       ntohl(cm->msg.daddr.ib.qpn),
+                       ntohl(cm->msg.daddr.qpn),
                        ntohl(cm->msg.d_id),
                        ntohl(cm->msg.s_id));
 
@@ -954,14 +955,14 @@ DAT_RETURN dapli_cm_disconnect(dp_ib_cm_handle_t cm)
        }
        
        dapl_os_get_time(&cm->timer); /* reply expected */
-       ucm_send(&cm->hca->ib_trans, &cm->msg, NULL, 0); 
+       mcm_send(&cm->hca->ib_trans, &cm->msg, NULL, 0);
        dapl_os_unlock(&cm->lock);
        
        if (wakeup)
                dapls_thread_signal(&cm->hca->ib_trans.signal);
 
        if (finalize) 
-               ucm_disconnect_final(cm);
+               mcm_disconnect_final(cm);
        
        return DAT_SUCCESS;
 }
@@ -976,9 +977,9 @@ dapli_cm_connect(DAPL_EP *ep, dp_ib_cm_handle_t cm)
        dapl_log(DAPL_DBG_TYPE_EP, 
                 " connect: lid %x i_qpn %x lport %x p_sz=%d -> "
                 " lid %x c_qpn %x rport %x\n",
-                htons(cm->msg.saddr.ib.lid), htonl(cm->msg.saddr.ib.qpn),
+                htons(cm->msg.saddr.lid), htonl(cm->msg.saddr.qpn),
                 htons(cm->msg.sport), htons(cm->msg.p_size),
-                htons(cm->msg.daddr.ib.lid), htonl(cm->msg.dqpn),
+                htons(cm->msg.daddr.lid), htonl(cm->msg.dqpn),
                 htons(cm->msg.dport));
 
        dapl_os_lock(&cm->lock);
@@ -991,10 +992,10 @@ dapli_cm_connect(DAPL_EP *ep, dp_ib_cm_handle_t cm)
                dapl_log(DAPL_DBG_TYPE_ERR, 
                        " CM_REQ: RETRIES EXHAUSTED:"
                         " 0x%x %x 0x%x -> 0x%x %x 0x%x\n",
-                        htons(cm->msg.saddr.ib.lid), 
-                        htonl(cm->msg.saddr.ib.qpn), 
+                        htons(cm->msg.saddr.lid),
+                        htonl(cm->msg.saddr.qpn),
                         htons(cm->msg.sport), 
-                        htons(cm->msg.daddr.ib.lid), 
+                        htons(cm->msg.daddr.lid),
                         htonl(cm->msg.dqpn), 
                         htons(cm->msg.dport));
 
@@ -1019,7 +1020,7 @@ dapli_cm_connect(DAPL_EP *ep, dp_ib_cm_handle_t cm)
        cm->state = DCM_REP_PENDING;
        cm->msg.op = htons(DCM_REQ);
        dapl_os_get_time(&cm->timer); /* reset reply timer */
-       if (ucm_send(&cm->hca->ib_trans, &cm->msg, 
+       if (mcm_send(&cm->hca->ib_trans, &cm->msg,
                     &cm->msg.p_data, ntohs(cm->msg.p_size))) {
                dapl_os_unlock(&cm->lock);
                goto bail;
@@ -1034,7 +1035,7 @@ bail:
        DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_ERR);
        dapl_log(DAPL_DBG_TYPE_WARN, 
                 " connect: snd ERR -> cm_lid %x cm_qpn %x r_psp %x p_sz=%d\n",
-                htons(cm->msg.daddr.ib.lid),
+                htons(cm->msg.daddr.lid),
                 htonl(cm->msg.dqpn), htons(cm->msg.dport), 
                 htons(cm->msg.p_size));
 
@@ -1045,7 +1046,7 @@ bail:
 /*
  * ACTIVE: exchange QP information, called from CR thread
  */
-static void ucm_connect_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg)
+static void mcm_connect_rtu(dp_ib_cm_handle_t cm, dat_mcm_msg_t *msg)
 {
        DAPL_EP *ep = cm->ep;
        ib_cm_events_t event = IB_CME_CONNECTED;
@@ -1057,7 +1058,7 @@ static void ucm_connect_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg)
                         " op %s, st %s <- lid %x sqpn %x sport %x\n", 
                         dapl_cm_op_str(ntohs(msg->op)), 
                         dapl_cm_state_str(cm->state), 
-                        ntohs(msg->saddr.ib.lid), ntohl(msg->saddr.ib.qpn), 
+                        ntohs(msg->saddr.lid), ntohl(msg->saddr.qpn),
                         ntohs(msg->sport));
                dapl_os_unlock(&cm->lock);
                return;
@@ -1066,9 +1067,9 @@ static void ucm_connect_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg)
        /* save remote address information to EP and CM */
        cm->msg.d_id = msg->s_id;
        dapl_os_memcpy(&ep->remote_ia_address,
-                      &msg->saddr, sizeof(union dcm_addr));
+                      &msg->saddr, sizeof(dat_mcm_addr_t));
        dapl_os_memcpy(&cm->msg.daddr, 
-                      &msg->saddr, sizeof(union dcm_addr));
+                      &msg->saddr, sizeof(dat_mcm_addr_t));
 
        /* validate private data size, and copy if necessary */
        if (msg->p_size) {
@@ -1078,8 +1079,8 @@ static void ucm_connect_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg)
                                 " st %s <- lid %x sqpn %x spsp %x\n", 
                                 ntohs(msg->p_size), 
                                 dapl_cm_state_str(cm->state), 
-                                ntohs(msg->saddr.ib.lid), 
-                                ntohl(msg->saddr.ib.qpn), 
+                                ntohs(msg->saddr.lid),
+                                ntohl(msg->saddr.qpn),
                                 ntohs(msg->sport));
                        dapl_os_unlock(&cm->lock);
                        goto bail;
@@ -1091,8 +1092,8 @@ static void ucm_connect_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg)
        dapl_dbg_log(DAPL_DBG_TYPE_CM,
                     " CONN_RTU: DST lid=%x,"
                     " iqp=%x, qp_type=%d, port=%x psize=%d\n",
-                    ntohs(cm->msg.daddr.ib.lid),
-                    ntohl(cm->msg.daddr.ib.qpn), cm->msg.daddr.ib.qp_type,
+                    ntohs(cm->msg.daddr.lid),
+                    ntohl(cm->msg.daddr.qpn), cm->msg.daddr.qp_type,
                     ntohs(msg->sport), ntohs(msg->p_size));
 
        if (ntohs(msg->op) == DCM_REP)
@@ -1106,9 +1107,9 @@ static void ucm_connect_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg)
                         " slid %x iqp %x port %x\n", cm,
                         dapl_cm_op_str(ntohs(msg->op)),
                         dapl_cm_state_str(cm->state),
-                        ntohs(msg->daddr.ib.lid), ntohl(msg->daddr.ib.qpn),
-                        ntohs(msg->dport), ntohs(msg->saddr.ib.lid),
-                        ntohl(msg->saddr.ib.qpn), ntohs(msg->sport));
+                        ntohs(msg->daddr.lid), ntohl(msg->daddr.qpn),
+                        ntohs(msg->dport), ntohs(msg->saddr.lid),
+                        ntohl(msg->saddr.qpn), ntohs(msg->sport));
                DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_ERR_REJ_RX);
                event = IB_CME_DESTINATION_REJECT;
        }
@@ -1119,18 +1120,12 @@ static void ucm_connect_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg)
                         " slid %x iqp %x port %x\n", cm,
                         dapl_cm_op_str(ntohs(msg->op)), 
                         dapl_cm_state_str(cm->state), 
-                        ntohs(msg->daddr.ib.lid), ntohl(msg->daddr.ib.qpn), 
-                        ntohs(msg->dport), ntohs(msg->saddr.ib.lid), 
-                        ntohl(msg->saddr.ib.qpn), ntohs(msg->sport));
+                        ntohs(msg->daddr.lid), ntohl(msg->daddr.qpn),
+                        ntohs(msg->dport), ntohs(msg->saddr.lid),
+                        ntohl(msg->saddr.qpn), ntohs(msg->sport));
 
                cm->state = DCM_REJECTED;
                dapl_os_unlock(&cm->lock);
-
-#ifdef DAT_EXTENSIONS
-               if (cm->msg.daddr.ib.qp_type == IBV_QPT_UD) 
-                       goto ud_bail;
-               else
-#endif
                goto bail;
        }
        dapl_os_unlock(&cm->lock);
@@ -1143,28 +1138,28 @@ static void ucm_connect_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg)
 
        /* modify QP to RTR and then to RTS with remote info */
        dapl_os_lock(&cm->ep->header.lock);
-       if (dapls_modify_qp_state(cm->ep->qp_handle,
+       if (dapls_modify_qp_state(cm->ep->qp_handle->qp,
                                  IBV_QPS_RTR, 
-                                 cm->msg.daddr.ib.qpn,
-                                 cm->msg.daddr.ib.lid,
-                                 (ib_gid_handle_t)cm->msg.daddr.ib.gid) != DAT_SUCCESS) {
+                                 cm->msg.daddr.qpn,
+                                 cm->msg.daddr.lid,
+                                 (ib_gid_handle_t)cm->msg.daddr.gid) != DAT_SUCCESS) {
                dapl_log(DAPL_DBG_TYPE_ERR,
                         " CONN_RTU: QPS_RTR ERR %s <- lid %x iqp %x\n",
-                        strerror(errno), ntohs(cm->msg.daddr.ib.lid),
-                        ntohl(cm->msg.daddr.ib.qpn));
+                        strerror(errno), ntohs(cm->msg.daddr.lid),
+                        ntohl(cm->msg.daddr.qpn));
                dapl_os_unlock(&cm->ep->header.lock);
                event = IB_CME_LOCAL_FAILURE;
                goto bail;
        }
-       if (dapls_modify_qp_state(cm->ep->qp_handle,
+       if (dapls_modify_qp_state(cm->ep->qp_handle->qp,
                                  IBV_QPS_RTS, 
-                                 cm->msg.daddr.ib.qpn,
-                                 cm->msg.daddr.ib.lid,
+                                 cm->msg.daddr.qpn,
+                                 cm->msg.daddr.lid,
                                  NULL) != DAT_SUCCESS) {
                dapl_log(DAPL_DBG_TYPE_ERR,
                         " CONN_RTU: QPS_RTS ERR %s <- lid %x iqp %x\n",
-                        strerror(errno), ntohs(cm->msg.daddr.ib.lid),
-                        ntohl(cm->msg.daddr.ib.qpn));
+                        strerror(errno), ntohs(cm->msg.daddr.lid),
+                        ntohl(cm->msg.daddr.qpn));
                dapl_os_unlock(&cm->ep->header.lock);
                event = IB_CME_LOCAL_FAILURE;
                goto bail;
@@ -1176,7 +1171,7 @@ static void ucm_connect_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg)
        
        dapl_os_lock(&cm->lock);
        cm->state = DCM_CONNECTED;
-       if (ucm_send(&cm->hca->ib_trans, &cm->msg, NULL, 0)) {
+       if (mcm_send(&cm->hca->ib_trans, &cm->msg, NULL, 0)) {
                dapl_os_unlock(&cm->lock);
                goto bail;
        }
@@ -1185,88 +1180,16 @@ static void ucm_connect_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg)
 
        /* init cm_handle and post the event with private data */
        dapl_dbg_log(DAPL_DBG_TYPE_EP, " ACTIVE: connected!\n");
+       DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_ACTIVE_EST);
+       dapl_evd_connection_callback(cm,
+                                    IB_CME_CONNECTED,
+                                    cm->msg.p_data, ntohs(cm->msg.p_size), cm->ep);
 
-#ifdef DAT_EXTENSIONS
-ud_bail:
-       if (cm->msg.daddr.ib.qp_type == IBV_QPT_UD) {
-               DAT_IB_EXTENSION_EVENT_DATA xevent;
-               uint16_t lid = ntohs(cm->msg.daddr.ib.lid);
-               
-               /* post EVENT, modify_qp, AH already created, ucm msg */
-               xevent.status = 0;
-               xevent.type = DAT_IB_UD_REMOTE_AH;
-               xevent.remote_ah.qpn = ntohl(cm->msg.daddr.ib.qpn);
-               xevent.remote_ah.ah = dapls_create_ah(cm->hca, 
-                                                     cm->ep->qp_handle->pd, 
-                                                     cm->ep->qp_handle, 
-                                                     htons(lid), 
-                                                     NULL);
-               if (xevent.remote_ah.ah == NULL) {
-                       dapl_log(DAPL_DBG_TYPE_ERR,
-                                " active UD RTU: ERR create_ah"
-                                " for qpn 0x%x lid 0x%x\n",
-                                xevent.remote_ah.qpn, lid);
-                       event = IB_CME_LOCAL_FAILURE;
-                       goto bail;
-               }
-               cm->ah = xevent.remote_ah.ah; /* keep ref to destroy */
-
-               dapl_os_memcpy(&xevent.remote_ah.ia_addr,
-                              &cm->msg.daddr,
-                              sizeof(union dcm_addr));
-
-               /* remote ia_addr reference includes ucm qpn, not IB qpn */
-               ((union dcm_addr*)
-                       &xevent.remote_ah.ia_addr)->ib.qpn = cm->msg.dqpn;
-
-               dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                            " ACTIVE: UD xevent ah %p qpn %x lid %x\n",
-                            xevent.remote_ah.ah, xevent.remote_ah.qpn, lid);
-               dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                            " ACTIVE: UD xevent ia_addr qp_type %d"
-                            " lid 0x%x qpn 0x%x gid 0x"F64x" 0x"F64x" \n",
-                            ((union dcm_addr*)
-                               &xevent.remote_ah.ia_addr)->ib.qp_type,
-                            ntohs(((union dcm_addr*)
-                               &xevent.remote_ah.ia_addr)->ib.lid),
-                            ntohl(((union dcm_addr*)
-                               &xevent.remote_ah.ia_addr)->ib.qpn),
-                            ntohll(*(uint64_t*)&cm->msg.daddr.ib.gid[0]),
-                            ntohll(*(uint64_t*)&cm->msg.daddr.ib.gid[8]));
-
-               if (event == IB_CME_CONNECTED)
-                       event = DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED;
-               else {
-                       xevent.type = DAT_IB_UD_CONNECT_REJECT;
-                       event = DAT_IB_UD_CONNECTION_REJECT_EVENT;
-               }
-
-               dapls_evd_post_connection_event_ext(
-                               (DAPL_EVD *)cm->ep->param.connect_evd_handle,
-                               event,
-                               (DAT_EP_HANDLE)ep,
-                               (DAT_COUNT)ntohs(cm->msg.p_size),
-                               (DAT_PVOID *)cm->msg.p_data,
-                               (DAT_PVOID *)&xevent);
-
-               if (event != DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED)
-                       dapli_cm_free(cm);
-
-               DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_AH_RESOLVED);
-
-       } else
-#endif
-       {
-               DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_ACTIVE_EST);
-               dapl_evd_connection_callback(cm,
-                                            IB_CME_CONNECTED,
-                                            cm->msg.p_data, ntohs(cm->msg.p_size), cm->ep);
-       }
        dapl_log(DAPL_DBG_TYPE_CM_EST,
-                " UCM_ACTIVE_CONN %p %d [lid port qpn] %x %x %x -> %x %x %x\n",
-                cm->hca, cm->retries, ntohs(cm->msg.saddr.ib.lid),
-                ntohs(cm->msg.sport), ntohl(cm->msg.saddr.ib.qpn),
-                ntohs(cm->msg.daddr.ib.lid), ntohs(cm->msg.dport),
+                " mcm_ACTIVE_CONN %p %d [lid port qpn] %x %x %x -> %x %x %x\n",
+                cm->hca, cm->retries, ntohs(cm->msg.saddr.lid),
+                ntohs(cm->msg.sport), ntohl(cm->msg.saddr.qpn),
+                ntohs(cm->msg.daddr.lid), ntohs(cm->msg.dport),
                 ntohl(cm->msg.dqpn));
        return;
 bail:
@@ -1280,7 +1203,7 @@ bail:
  *         receive peer QP information, private data, 
  *         and post cr_event 
  */
-static void ucm_accept(ib_cm_srvc_handle_t cm, ib_cm_msg_t *msg)
+static void mcm_accept(ib_cm_srvc_handle_t cm, dat_mcm_msg_t *msg)
 {
        dp_ib_cm_handle_t acm;
 
@@ -1303,12 +1226,12 @@ static void ucm_accept(ib_cm_srvc_handle_t cm, ib_cm_msg_t *msg)
        acm->msg.rd_in = msg->rd_in;
 
        /* CR saddr is CM daddr info, need EP for local saddr */
-       dapl_os_memcpy(&acm->msg.daddr, &msg->saddr, sizeof(union dcm_addr));
+       dapl_os_memcpy(&acm->msg.daddr, &msg->saddr, sizeof(dat_mcm_addr_t));
        
        dapl_log(DAPL_DBG_TYPE_CM,
                 " accept: DST port=%x lid=%x, iqp=%x, psize=%d\n",
-                ntohs(acm->msg.dport), ntohs(acm->msg.daddr.ib.lid), 
-                htonl(acm->msg.daddr.ib.qpn), htons(acm->msg.p_size));
+                ntohs(acm->msg.dport), ntohs(acm->msg.daddr.lid),
+                htonl(acm->msg.daddr.qpn), htons(acm->msg.p_size));
 
        /* validate private data size before reading */
        if (ntohs(msg->p_size) > DCM_MAX_PDATA_SIZE) {
@@ -1325,29 +1248,11 @@ static void ucm_accept(ib_cm_srvc_handle_t cm, ib_cm_msg_t *msg)
        acm->state = DCM_ACCEPTING;
        dapli_queue_conn(acm);
 
-#ifdef DAT_EXTENSIONS
-       if (acm->msg.daddr.ib.qp_type == IBV_QPT_UD) {
-               DAT_IB_EXTENSION_EVENT_DATA xevent;
-
-               /* post EVENT, modify_qp created ah */
-               xevent.status = 0;
-               xevent.type = DAT_IB_UD_CONNECT_REQUEST;
-
-               dapls_evd_post_cr_event_ext(acm->sp,
-                                           DAT_IB_UD_CONNECTION_REQUEST_EVENT,
-                                           acm,
-                                           (DAT_COUNT)ntohs(acm->msg.p_size),
-                                           (DAT_PVOID *)acm->msg.p_data,
-                                           (DAT_PVOID *)&xevent);
-               DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_AH_REQ_TX);
-       } else
-#endif
-               /* trigger CR event and return SUCCESS */
-               dapls_cr_callback(acm,
-                                 IB_CME_CONNECTION_REQUEST_PENDING,
-                                 acm->msg.p_data, ntohs(msg->p_size), acm->sp);
+       /* trigger CR event and return SUCCESS */
+       dapls_cr_callback(acm,
+                         IB_CME_CONNECTION_REQUEST_PENDING,
+                         acm->msg.p_data, ntohs(msg->p_size), acm->sp);
        return;
-
 bail:
        /* schedule work thread cleanup */
        DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_ERR);
@@ -1358,7 +1263,7 @@ bail:
 /*
  * PASSIVE: read RTU from active peer, post CONN event
  */
-static void ucm_accept_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg)
+static void mcm_accept_rtu(dp_ib_cm_handle_t cm, dat_mcm_msg_t *msg)
 {
        dapl_os_lock(&cm->lock);
        if ((ntohs(msg->op) != DCM_RTU) || (cm->state != DCM_RTU_PENDING)) {
@@ -1367,7 +1272,7 @@ static void ucm_accept_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg)
                         " op %s, st %s <- lid %x iqp %x sport %x\n", 
                         dapl_cm_op_str(ntohs(msg->op)), 
                         dapl_cm_state_str(cm->state), 
-                        ntohs(msg->saddr.ib.lid), ntohl(msg->saddr.ib.qpn), 
+                        ntohs(msg->saddr.lid), ntohl(msg->saddr.qpn),
                         ntohs(msg->sport));
                dapl_os_unlock(&cm->lock);
                goto bail;
@@ -1378,71 +1283,15 @@ static void ucm_accept_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg)
        /* final data exchange if remote QP state is good to go */
        dapl_dbg_log(DAPL_DBG_TYPE_CM, " PASSIVE: connected!\n");
 
-#ifdef DAT_EXTENSIONS
-       if (cm->msg.saddr.ib.qp_type == IBV_QPT_UD) {
-               DAT_IB_EXTENSION_EVENT_DATA xevent;
-               uint16_t lid = ntohs(cm->msg.daddr.ib.lid);
-               
-               /* post EVENT, modify_qp, AH already created, ucm msg */
-               xevent.status = 0;
-               xevent.type = DAT_IB_UD_PASSIVE_REMOTE_AH;
-               xevent.remote_ah.qpn = ntohl(cm->msg.daddr.ib.qpn);
-               xevent.remote_ah.ah = dapls_create_ah(cm->hca, 
-                                                     cm->ep->qp_handle->pd, 
-                                                     cm->ep->qp_handle, 
-                                                     htons(lid), 
-                                                     NULL);
-               if (xevent.remote_ah.ah == NULL) {
-                       dapl_log(DAPL_DBG_TYPE_ERR,
-                                " passive UD RTU: ERR create_ah"
-                                " for qpn 0x%x lid 0x%x\n",
-                                xevent.remote_ah.qpn, lid);
-                       goto bail;
-               }
-               cm->ah = xevent.remote_ah.ah; /* keep ref to destroy */
-               dapl_os_memcpy(&xevent.remote_ah.ia_addr,
-                              &cm->msg.daddr,
-                               sizeof(union dcm_addr));
+       DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_PASSIVE_EST);
 
-               /* remote ia_addr reference includes ucm qpn, not IB qpn */
-               ((union dcm_addr*)
-                       &xevent.remote_ah.ia_addr)->ib.qpn = cm->msg.dqpn;
+       dapls_cr_callback(cm, IB_CME_CONNECTED, NULL, 0, cm->sp);
 
-               dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                            " PASSIVE: UD xevent ah %p qpn %x lid %x\n",
-                            xevent.remote_ah.ah, xevent.remote_ah.qpn, lid);
-               dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                            " PASSIVE: UD xevent ia_addr qp_type %d"
-                            " lid 0x%x qpn 0x%x gid 0x"F64x" 0x"F64x" \n",
-                            ((union dcm_addr*)
-                               &xevent.remote_ah.ia_addr)->ib.qp_type,
-                            ntohs(((union dcm_addr*)
-                               &xevent.remote_ah.ia_addr)->ib.lid),
-                            ntohl(((union dcm_addr*)
-                               &xevent.remote_ah.ia_addr)->ib.qpn),
-                            ntohll(*(uint64_t*)&cm->msg.daddr.ib.gid[0]),
-                            ntohll(*(uint64_t*)&cm->msg.daddr.ib.gid[8]));
-
-               dapls_evd_post_connection_event_ext(
-                               (DAPL_EVD *)cm->ep->param.connect_evd_handle,
-                               DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED,
-                               (DAT_EP_HANDLE)cm->ep,
-                               (DAT_COUNT)ntohs(cm->msg.p_size),
-                               (DAT_PVOID *)cm->msg.p_data,
-                               (DAT_PVOID *)&xevent);
-
-               DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_AH_RESOLVED);
-               dapli_cm_free(cm); /* still attached to EP */
-       } else {
-#endif
-               DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_PASSIVE_EST);
-               dapls_cr_callback(cm, IB_CME_CONNECTED, NULL, 0, cm->sp);
-       }
        dapl_log(DAPL_DBG_TYPE_CM_EST,
-                " UCM_PASSIVE_CONN %p %d [lid port qpn] %x %x %x <- %x %x %x\n",
-                cm->hca, cm->retries, ntohs(cm->msg.saddr.ib.lid),
-                ntohs(cm->msg.sport), ntohl(cm->msg.saddr.ib.qpn),
-                ntohs(cm->msg.daddr.ib.lid), ntohs(cm->msg.dport),
+                " PASSIVE_CONN %p %d [lid port qpn] %x %x %x <- %x %x %x\n",
+                cm->hca, cm->retries, ntohs(cm->msg.saddr.lid),
+                ntohs(cm->msg.sport), ntohl(cm->msg.saddr.qpn),
+                ntohs(cm->msg.daddr.lid), ntohs(cm->msg.dport),
                 ntohl(cm->msg.dqpn));
        return;
 bail:
@@ -1454,7 +1303,7 @@ bail:
 /*
  * PASSIVE: user accepted, check and re-send reply message, called from cm_thread.
  */
-static int ucm_reply(dp_ib_cm_handle_t cm)
+static int mcm_reply(dp_ib_cm_handle_t cm)
 {
        dapl_os_lock(&cm->lock);
        if (cm->state != DCM_RTU_PENDING) {
@@ -1463,12 +1312,12 @@ static int ucm_reply(dp_ib_cm_handle_t cm)
                         " %x %x i_%x -> %x %x i_%x l_pid %x r_pid %x\n",
                         cm->ep, cm, dapl_cm_state_str(cm->state),
                         cm->ref_count,
-                        htons(cm->msg.saddr.ib.lid),
+                        htons(cm->msg.saddr.lid),
                         htons(cm->msg.sport),
-                        htonl(cm->msg.saddr.ib.qpn),
-                        htons(cm->msg.daddr.ib.lid),
+                        htonl(cm->msg.saddr.qpn),
+                        htons(cm->msg.daddr.lid),
                         htons(cm->msg.dport),
-                        htonl(cm->msg.daddr.ib.qpn),
+                        htonl(cm->msg.daddr.qpn),
                         ntohl(cm->msg.s_id),
                         ntohl(cm->msg.d_id));
                dapl_os_unlock(&cm->lock);
@@ -1479,14 +1328,15 @@ static int ucm_reply(dp_ib_cm_handle_t cm)
                dapl_log(DAPL_DBG_TYPE_ERR, 
                        " CM_REPLY: RETRIES EXHAUSTED (lid port qpn)"
                         " %x %x %x -> %x %x %x\n",
-                        htons(cm->msg.saddr.ib.lid), 
+                        htons(cm->msg.saddr.lid),
                         htons(cm->msg.sport), 
-                        htonl(cm->msg.saddr.ib.qpn), 
-                        htons(cm->msg.daddr.ib.lid), 
+                        htonl(cm->msg.saddr.qpn),
+                        htons(cm->msg.daddr.lid),
                         htons(cm->msg.dport), 
-                        htonl(cm->msg.daddr.ib.qpn));
+                        htonl(cm->msg.daddr.qpn));
                        
                dapl_os_unlock(&cm->lock);
+
 #ifdef DAPL_COUNTERS
                if (g_dapl_dbg_type & DAPL_DBG_TYPE_CM_LIST) {
                        dapl_os_unlock(&cm->hca->ib_trans.lock);
@@ -1494,30 +1344,13 @@ static int ucm_reply(dp_ib_cm_handle_t cm)
                        dapl_os_lock(&cm->hca->ib_trans.lock);
                }
 #endif
-#ifdef DAT_EXTENSIONS
-               if (cm->msg.saddr.ib.qp_type == IBV_QPT_UD) {
-                       DAT_IB_EXTENSION_EVENT_DATA xevent;
-                                       
-                       /* post REJECT event with CONN_REQ p_data */
-                       xevent.status = 0;
-                       xevent.type = DAT_IB_UD_CONNECT_ERROR;
-                                       
-                       dapls_evd_post_connection_event_ext(
-                               (DAPL_EVD *)cm->ep->param.connect_evd_handle,
-                               DAT_IB_UD_CONNECTION_ERROR_EVENT,
-                               (DAT_EP_HANDLE)cm->ep,
-                               (DAT_COUNT)ntohs(cm->msg.p_size),
-                               (DAT_PVOID *)cm->msg.p_data,
-                               (DAT_PVOID *)&xevent);
-               } else 
-#endif
-                       dapls_cr_callback(cm, IB_CME_LOCAL_FAILURE, 
-                                         NULL, 0, cm->sp);
+
+               dapls_cr_callback(cm, IB_CME_LOCAL_FAILURE, NULL, 0, cm->sp);
                return -1;
        }
 
        dapl_os_get_time(&cm->timer); /* RTU expected */
-       if (ucm_send(&cm->hca->ib_trans, &cm->msg, cm->p_data, cm->p_size)) {
+       if (mcm_send(&cm->hca->ib_trans, &cm->msg, cm->p_data, cm->p_size)) {
                dapl_log(DAPL_DBG_TYPE_ERR," accept ERR: ucm reply send()\n");
                dapl_os_unlock(&cm->lock);
                return -1;
@@ -1548,12 +1381,12 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data)
                         " %x %x i_%x -> %x %x i_%x l_pid %x r_pid %x\n",
                         cm->ep, cm, dapl_cm_state_str(cm->state),
                         cm->ref_count,
-                        htons(cm->hca->ib_trans.addr.ib.lid),
+                        htons(cm->hca->ib_trans.addr.lid),
                         htons(cm->msg.sport),
-                        htonl(ep->qp_handle->qp_num),
-                        htons(cm->msg.daddr.ib.lid),
+                        htonl(ep->qp_handle->qp->qp_num),
+                        htons(cm->msg.daddr.lid),
                         htons(cm->msg.dport),
-                        htonl(cm->msg.daddr.ib.qpn),
+                        htonl(cm->msg.daddr.qpn),
                         ntohl(cm->msg.s_id),
                         ntohl(cm->msg.d_id));
                dapl_os_unlock(&cm->lock);
@@ -1564,26 +1397,14 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data)
        dapl_dbg_log(DAPL_DBG_TYPE_CM,
                     " ACCEPT_USR: remote lid=%x"
                     " iqp=%x qp_type %d, psize=%d\n",
-                    ntohs(cm->msg.daddr.ib.lid),
-                    ntohl(cm->msg.daddr.ib.qpn), cm->msg.daddr.ib.qp_type, 
+                    ntohs(cm->msg.daddr.lid),
+                    ntohl(cm->msg.daddr.qpn), cm->msg.daddr.qp_type,
                     p_size);
 
        dapl_dbg_log(DAPL_DBG_TYPE_CM,
                     " ACCEPT_USR: remote GID subnet %016llx id %016llx\n",
-                    (unsigned long long)
-                    htonll(*(uint64_t*)&cm->msg.daddr.ib.gid[0]),
-                    (unsigned long long)
-                    htonll(*(uint64_t*)&cm->msg.daddr.ib.gid[8]));
-
-#ifdef DAT_EXTENSIONS
-       if (cm->msg.daddr.ib.qp_type == IBV_QPT_UD &&
-           ep->qp_handle->qp_type != IBV_QPT_UD) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                            " ACCEPT_USR: ERR remote QP is UD,"
-                            ", but local QP is not\n");
-               return (DAT_INVALID_HANDLE | DAT_INVALID_HANDLE_EP);
-       }
-#endif
+                    (unsigned long long)htonll(*(uint64_t*)&cm->msg.daddr.gid[0]),
+                    (unsigned long long)htonll(*(uint64_t*)&cm->msg.daddr.gid[8]));
 
         /* rdma_out, initiator, cannot exceed remote rdma_in max */
        if (ntohs(cm->msg.ver) >= 7)
@@ -1593,27 +1414,27 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data)
 
        /* modify QP to RTR and then to RTS with remote info already read */
        dapl_os_lock(&ep->header.lock);
-       if (dapls_modify_qp_state(ep->qp_handle,
+       if (dapls_modify_qp_state(ep->qp_handle->qp,
                                  IBV_QPS_RTR, 
-                                 cm->msg.daddr.ib.qpn,
-                                 cm->msg.daddr.ib.lid,
-                                 (ib_gid_handle_t)cm->msg.daddr.ib.gid) != DAT_SUCCESS) {
+                                 cm->msg.daddr.qpn,
+                                 cm->msg.daddr.lid,
+                                 (ib_gid_handle_t)&cm->msg.daddr.gid[0]) != DAT_SUCCESS) {
                dapl_log(DAPL_DBG_TYPE_ERR,
                         " ACCEPT_USR: QPS_RTR ERR %s -> lid %x qpn %x\n",
-                        strerror(errno), ntohs(cm->msg.daddr.ib.lid),
-                        ntohl(cm->msg.daddr.ib.qpn));
+                        strerror(errno), ntohs(cm->msg.daddr.lid),
+                        ntohl(cm->msg.daddr.qpn));
                dapl_os_unlock(&ep->header.lock);
                goto bail;
        }
-       if (dapls_modify_qp_state(ep->qp_handle,
+       if (dapls_modify_qp_state(ep->qp_handle->qp,
                                  IBV_QPS_RTS, 
-                                 cm->msg.daddr.ib.qpn,
-                                 cm->msg.daddr.ib.lid,
+                                 cm->msg.daddr.qpn,
+                                 cm->msg.daddr.lid,
                                  NULL) != DAT_SUCCESS) {
                dapl_log(DAPL_DBG_TYPE_ERR,
                         " ACCEPT_USR: QPS_RTS ERR %s -> lid %x qpn %x\n",
-                        strerror(errno), ntohs(cm->msg.daddr.ib.lid),
-                        ntohl(cm->msg.daddr.ib.qpn));
+                        strerror(errno), ntohs(cm->msg.daddr.lid),
+                        ntohl(cm->msg.daddr.qpn));
                dapl_os_unlock(&ep->header.lock);
                goto bail;
        }
@@ -1621,16 +1442,16 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data)
 
        /* save remote address information */
        dapl_os_memcpy(&ep->remote_ia_address,
-                      &cm->msg.saddr, sizeof(union dcm_addr));
+                      &cm->msg.saddr, sizeof(dat_mcm_addr_t));
 
        /* setup local QP info and type from EP, copy pdata, for reply */
        cm->msg.op = htons(DCM_REP);
        cm->msg.rd_in = ep->param.ep_attr.max_rdma_read_in;
-       cm->msg.saddr.ib.qpn = htonl(ep->qp_handle->qp_num);
-       cm->msg.saddr.ib.qp_type = ep->qp_handle->qp_type;
-       cm->msg.saddr.ib.lid = cm->hca->ib_trans.addr.ib.lid; 
-       dapl_os_memcpy(&cm->msg.saddr.ib.gid[0],
-                      &cm->hca->ib_trans.addr.ib.gid, 16); 
+       cm->msg.saddr.qpn = htonl(ep->qp_handle->qp->qp_num);
+       cm->msg.saddr.qp_type = ep->qp_handle->qp->qp_type;
+       cm->msg.saddr.lid = cm->hca->ib_trans.addr.lid;
+       dapl_os_memcpy(&cm->msg.saddr.gid[0],
+                      &cm->hca->ib_trans.addr.gid, 16);
 
        /*
         * UD: deliver p_data with REQ and EST event, keep REQ p_data in
@@ -1649,7 +1470,7 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data)
        dapl_os_lock(&cm->lock);
        cm->state = DCM_RTU_PENDING;
        dapl_os_get_time(&cm->timer); /* RTU expected */
-       if (ucm_send(&cm->hca->ib_trans, &cm->msg, cm->p_data, cm->p_size)) {
+       if (mcm_send(&cm->hca->ib_trans, &cm->msg, cm->p_data, cm->p_size)) {
                dapl_log(DAPL_DBG_TYPE_ERR," accept ERR: ucm reply send()\n");
                dapl_os_unlock(&cm->lock);
                dapl_ep_unlink_cm(ep, cm);
@@ -1704,12 +1525,12 @@ dapls_ib_connect(IN DAT_EP_HANDLE ep_handle,
                return DAT_INSUFFICIENT_RESOURCES;
 
        /* remote hca and port: lid, gid, network order */
-       dapl_os_memcpy(&cm->msg.daddr, r_addr, sizeof(union dcm_addr));
+       dapl_os_memcpy(&cm->msg.daddr, r_addr, sizeof(dat_mcm_addr_t));
 
        /* remote uCM information, comes from consumer provider r_addr */
        cm->msg.dport = htons((uint16_t)r_psp);
-       cm->msg.dqpn = cm->msg.daddr.ib.qpn;
-       cm->msg.daddr.ib.qpn = 0; /* don't have a remote qpn until reply */
+       cm->msg.dqpn = cm->msg.daddr.qpn;
+       cm->msg.daddr.qpn = 0; /* don't have a remote qpn until reply */
        
         /* set max rdma inbound requests */
         cm->msg.rd_in = ep->param.ep_attr.max_rdma_read_in;
@@ -1837,20 +1658,13 @@ dapls_ib_setup_conn_listener(IN DAPL_IA *ia,
                             IN DAT_UINT64 sid, 
                             IN DAPL_SP *sp)
 {
-       ib_cm_srvc_handle_t cm = NULL;
+       dp_ib_cm_handle_t cm = NULL;
+       int ret;
 
        dapl_dbg_log(DAPL_DBG_TYPE_EP,
                     " listen(ia %p ServiceID %x sp %p)\n",
                     ia, sid, sp);
 
-       /* reserve local port, then allocate CM object */
-       if (!ucm_get_port(&ia->hca_ptr->ib_trans, (uint16_t)sid)) {
-               dapl_dbg_log(DAPL_DBG_TYPE_WARN,
-                            " listen: ERROR %s on conn_qual %x\n",
-                            strerror(errno), sid);
-               return DAT_CONN_QUAL_IN_USE;
-       }
-
        /* cm_create will setup saddr for listen server */
        if ((cm = dapls_ib_cm_create(NULL)) == NULL)
                return DAT_INSUFFICIENT_RESOURCES;
@@ -1858,19 +1672,41 @@ dapls_ib_setup_conn_listener(IN DAPL_IA *ia,
        /* LISTEN: init DST address and QP info to local CM server info */
        cm->sp = sp;
        cm->hca = ia->hca_ptr;
-       cm->msg.sport = htons((uint16_t)sid);
-       cm->msg.sqpn = htonl(ia->hca_ptr->ib_trans.qp->qp_num);
-       cm->msg.saddr.ib.qp_type = IBV_QPT_UD;
-        cm->msg.saddr.ib.lid = ia->hca_ptr->ib_trans.addr.ib.lid; 
-       dapl_os_memcpy(&cm->msg.saddr.ib.gid[0],
-                      &cm->hca->ib_trans.addr.ib.gid, 16); 
-       
+
        /* save cm_handle reference in service point */
        sp->cm_srvc_handle = cm;
 
+       /* proxy CM service: send listen over to MPXYD */
+       if (ia->hca_ptr->ib_trans.scif_ep) {
+               ret = dapli_mix_listen(cm, sid);
+               if (ret) {
+                       dapl_dbg_log(DAPL_DBG_TYPE_WARN,
+                                    " listen: ERROR %s on conn_qual %x\n",
+                                    strerror(ret), sid);
+                       dapli_cm_free(cm);
+                       return dapl_convert_errno(ret, "mix_listen" );
+               }
+       } else {
+               /* local CM service, reserve local port and setup addr info */
+               if (!mcm_get_port(&ia->hca_ptr->ib_trans, (uint16_t)sid)) {
+                       dapl_dbg_log(DAPL_DBG_TYPE_WARN,
+                                    " listen: ERROR %s on conn_qual %x\n",
+                                    strerror(errno), sid);
+                       dapli_cm_free(cm);
+                       return DAT_CONN_QUAL_IN_USE;
+               }
+               cm->msg.sport = htons((uint16_t)sid);
+               cm->msg.sqpn = htonl(ia->hca_ptr->ib_trans.qp->qp_num);
+               cm->msg.saddr.qp_type = IBV_QPT_UD;
+               cm->msg.saddr.lid = ia->hca_ptr->ib_trans.addr.lid;
+               dapl_os_memcpy(&cm->msg.saddr.gid[0],
+                              &cm->hca->ib_trans.addr.gid, 16);
+       }
+       
        /* queue up listen socket to process inbound CR's */
        cm->state = DCM_LISTEN;
        dapli_queue_listen(cm);
+
        DAPL_CNTR(ia, DCNT_IA_CM_LISTEN);
 
        return DAT_SUCCESS;
@@ -1897,17 +1733,23 @@ dapls_ib_setup_conn_listener(IN DAPL_IA *ia,
 DAT_RETURN
 dapls_ib_remove_conn_listener(IN DAPL_IA *ia, IN DAPL_SP *sp)
 {
-       ib_cm_srvc_handle_t cm = sp->cm_srvc_handle;
+       dp_ib_cm_handle_t cm = sp->cm_srvc_handle;
 
        /* free cm_srvc_handle and port, and mark CM for cleanup */
        if (cm) {
                dapl_dbg_log(DAPL_DBG_TYPE_EP,
                     " remove_listener(ia %p sp %p cm %p psp=%x)\n",
-                    ia, sp, cm, ntohs(cm->msg.dport));
+                    ia, sp, cm, ntohs(cm->msg.sport));
 
                sp->cm_srvc_handle = NULL;
                dapli_dequeue_listen(cm);  
-               ucm_free_port(&cm->hca->ib_trans, ntohs(cm->msg.sport));
+
+               /* clean up proxy listen, otherwise local port space */
+               if (cm->hca->ib_trans.scif_ep)
+                       dapli_mix_listen_free(cm);
+               else
+                       mcm_free_port(&cm->hca->ib_trans, ntohs(cm->msg.sport));
+
                dapls_cm_release(cm);  /* last ref, dealloc */
        }
        return DAT_SUCCESS;
@@ -1993,16 +1835,16 @@ dapls_ib_reject_connection(IN dp_ib_cm_handle_t cm,
                 " dlid %x iqp %x port %x\n", cm,
                 dapl_cm_op_str(ntohs(cm->msg.op)), 
                 dapl_cm_state_str(cm->state), 
-                ntohs(cm->hca->ib_trans.addr.ib.lid), 
-                ntohl(cm->msg.saddr.ib.qpn), 
-                ntohs(cm->msg.sport), ntohs(cm->msg.daddr.ib.lid), 
-                ntohl(cm->msg.daddr.ib.qpn), ntohs(cm->msg.dport));
+                ntohs(cm->hca->ib_trans.addr.lid),
+                ntohl(cm->msg.saddr.qpn),
+                ntohs(cm->msg.sport), ntohs(cm->msg.daddr.lid),
+                ntohl(cm->msg.daddr.qpn), ntohs(cm->msg.dport));
 
        cm->state = DCM_REJECTED;
-       cm->msg.saddr.ib.lid = cm->hca->ib_trans.addr.ib.lid; 
-       cm->msg.saddr.ib.qp_type = cm->msg.daddr.ib.qp_type;
-       dapl_os_memcpy(&cm->msg.saddr.ib.gid[0],
-                      &cm->hca->ib_trans.addr.ib.gid, 16); 
+       cm->msg.saddr.lid = cm->hca->ib_trans.addr.lid;
+       cm->msg.saddr.qp_type = cm->msg.daddr.qp_type;
+       dapl_os_memcpy(&cm->msg.saddr.gid[0],
+                      &cm->hca->ib_trans.addr.gid, 16);
        
        if (reason == IB_CM_REJ_REASON_CONSUMER_REJ)
                cm->msg.op = htons(DCM_REJ_USER);
@@ -2013,7 +1855,7 @@ dapls_ib_reject_connection(IN dp_ib_cm_handle_t cm,
                  reason == IB_CM_REJ_REASON_CONSUMER_REJ ?
                  DCNT_IA_CM_USER_REJ_TX : DCNT_IA_CM_ERR_REJ_TX);
 
-       if (ucm_send(&cm->hca->ib_trans, &cm->msg, pdata, psize)) {
+       if (mcm_send(&cm->hca->ib_trans, &cm->msg, pdata, psize)) {
                dapl_log(DAPL_DBG_TYPE_WARN,
                         " cm_reject: send ERR: %s\n", strerror(errno));
                dapl_os_unlock(&cm->lock);
@@ -2072,77 +1914,6 @@ int dapls_ib_private_data_size(
        return DCM_MAX_PDATA_SIZE;
 }
 
-#if defined(_WIN32) || defined(_WIN64)
-
-void cm_thread(void *arg)
-{
-       struct dapl_hca *hca = arg;
-       dp_ib_cm_handle_t cm, next;
-       DWORD time_ms;
-
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cm_thread: ENTER hca %p\n", hca);
-       dapl_os_lock(&hca->ib_trans.lock);
-       for (hca->ib_trans.cm_state = IB_THREAD_RUN;
-            hca->ib_trans.cm_state == IB_THREAD_RUN ||
-            !dapl_llist_is_empty(&hca->ib_trans.list);
-            dapl_os_lock(&hca->ib_trans.lock)) {
-
-               time_ms = INFINITE;
-               CompSetZero(&hca->ib_trans.signal.set);
-               CompSetAdd(&hca->ib_hca_handle->channel, &hca->ib_trans.signal.set);
-               CompSetAdd(&hca->ib_trans.rch->comp_channel, &hca->ib_trans.signal.set);
-               CompSetAdd(&hca->ib_trans.ib_cq->comp_channel, &hca->ib_trans.signal.set);
-
-               next = dapl_llist_is_empty(&hca->ib_trans.list) ? NULL :
-                       dapl_llist_peek_head(&hca->ib_trans.list);
-
-               while (next) {
-                       cm = next;
-                       next = dapl_llist_next_entry(&hca->ib_trans.list,
-                                                    (DAPL_LLIST_ENTRY *)&cm->local_entry);
-                       dapls_cm_acquire(cm); /* hold thread ref */
-                       dapl_os_lock(&cm->lock);
-                       if (cm->state == DCM_FREE || 
-                           hca->ib_trans.cm_state != IB_THREAD_RUN) {
-                               dapl_os_unlock(&cm->lock);
-                               dapl_log(DAPL_DBG_TYPE_CM, 
-                                        " CM FREE: %p ep=%p st=%s refs=%d\n", 
-                                        cm, cm->ep, dapl_cm_state_str(cm->state), 
-                                        cm->ref_count);
-
-                               dapls_cm_release(cm); /* release alloc ref */
-                               dapli_cm_dequeue(cm); /* release workq ref */
-                               dapls_cm_release(cm); /* release thread ref */
-                               continue;
-                       }
-                       dapl_os_unlock(&cm->lock);
-                       ucm_check_timers(cm, &time_ms);
-                       dapls_cm_release(cm); /* release thread ref */
-               }
-
-               dapl_os_unlock(&hca->ib_trans.lock);
-
-               hca->ib_hca_handle->channel.Milliseconds = time_ms;
-               hca->ib_trans.rch->comp_channel.Milliseconds = time_ms;
-               hca->ib_trans.ib_cq->comp_channel.Milliseconds = time_ms;
-               CompSetPoll(&hca->ib_trans.signal.set, time_ms);
-
-               hca->ib_hca_handle->channel.Milliseconds = 0;
-               hca->ib_trans.rch->comp_channel.Milliseconds = 0;
-               hca->ib_trans.ib_cq->comp_channel.Milliseconds = 0;
-
-               ucm_recv(&hca->ib_trans);
-               ucm_async_event(hca);
-               dapli_cq_event_cb(&hca->ib_trans);
-       }
-
-       dapl_os_unlock(&hca->ib_trans.lock);
-       hca->ib_trans.cm_state = IB_THREAD_EXIT;
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cm_thread(hca %p) exit\n", hca);
-}
-
-#else                          // _WIN32 || _WIN64
-
 void cm_thread(void *arg)
 {
        struct dapl_hca *hca = arg;
@@ -2164,7 +1935,8 @@ void cm_thread(void *arg)
                dapl_fd_zero(set);
                dapl_fd_set(hca->ib_trans.signal.scm[0], set, DAPL_FD_READ);    
                dapl_fd_set(hca->ib_hca_handle->async_fd, set, DAPL_FD_READ);
-               dapl_fd_set(hca->ib_trans.rch->fd, set, DAPL_FD_READ);
+               dapl_fd_set(hca->ib_trans.rch_fd, set, DAPL_FD_READ);
+               dapl_fd_set(hca->ib_trans.scif_ep, set, DAPL_FD_READ);
                dapl_fd_set(hca->ib_trans.ib_cq->fd, set, DAPL_FD_READ);
                
                if (!dapl_llist_is_empty(&hca->ib_trans.list))
@@ -2193,7 +1965,7 @@ void cm_thread(void *arg)
                                continue;
                        }
                        dapl_os_unlock(&cm->lock);
-                       ucm_check_timers(cm, &time_ms);
+                       mcm_check_timers(cm, &time_ms);
                        dapls_cm_release(cm); /* release thread ref */
                }
 
@@ -2205,14 +1977,17 @@ void cm_thread(void *arg)
                dapl_os_unlock(&hca->ib_trans.lock);
                dapl_select(set, time_ms);
 
-               /* Process events: CM, ASYNC, NOTIFY THREAD */
-               if (dapl_poll(hca->ib_trans.rch->fd, 
-                             DAPL_FD_READ) == DAPL_FD_READ) {
-                       ucm_recv(&hca->ib_trans);
+               if (dapl_poll(hca->ib_trans.rch_fd,
+                                       DAPL_FD_READ) == DAPL_FD_READ) {
+                       mcm_recv(&hca->ib_trans);
+               }
+               if (dapl_poll(hca->ib_trans.scif_ep,
+                                     DAPL_FD_READ) == DAPL_FD_READ) {
+                       dapli_mix_recv(hca, hca->ib_trans.scif_ep);
                }
                if (dapl_poll(hca->ib_hca_handle->async_fd, 
                              DAPL_FD_READ) == DAPL_FD_READ) {
-                       ucm_async_event(hca);
+                       mcm_async_event(hca);
                }
                if (dapl_poll(hca->ib_trans.ib_cq->fd, 
                              DAPL_FD_READ) == DAPL_FD_READ) {
@@ -2236,132 +2011,4 @@ out:
        hca->ib_trans.cm_state = IB_THREAD_EXIT;
        dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cm_thread(hca %p) exit\n", hca);
 }
-#endif
-
-#ifdef DAPL_COUNTERS
-static char _ctr_host_[128];
-/* Debug aid: List all Connections in process and state */
-void dapls_print_cm_list(IN DAPL_IA *ia_ptr)
-{
-       /* Print in process CM's for this IA, if debug type set */
-       int i = 0;
-       dp_ib_cm_handle_t cm, next_cm;
-       struct dapl_llist_entry **list;
-       DAPL_OS_LOCK *lock;
-       
-       /* LISTEN LIST */
-       list = &ia_ptr->hca_ptr->ib_trans.llist;
-       lock = &ia_ptr->hca_ptr->ib_trans.llock;
 
-       dapl_os_lock(lock);
-       if (!dapl_llist_is_empty((DAPL_LLIST_HEAD*)list))
-               next_cm = dapl_llist_peek_head((DAPL_LLIST_HEAD*)list);
-       else
-               next_cm = NULL;
-
-       gethostname(_ctr_host_, sizeof(_ctr_host_));
-       printf("\n [%s:%x] DAPL IA LISTEN/CONNECTIONS IN PROCESS:\n", 
-               _ctr_host_ , dapl_os_getpid());
-
-       while (next_cm) {
-               cm = next_cm;
-               next_cm = dapl_llist_next_entry((DAPL_LLIST_HEAD*)list,
-                                               (DAPL_LLIST_ENTRY*)&cm->local_entry);
-
-               printf( "  LISTEN[%d]: sp %p %s uCM_QP: %x %x c_%x l_pid %x \n",
-                       i, cm->sp, dapl_cm_state_str(cm->state),
-                       ntohs(cm->msg.saddr.ib.lid), ntohs(cm->msg.sport),
-                       ntohl(cm->msg.sqpn),
-                       ntohl(cm->msg.s_id));
-               i++;
-       }
-       dapl_os_unlock(lock);
-
-       /* CONNECTION LIST */
-       list = &ia_ptr->hca_ptr->ib_trans.list;
-       lock = &ia_ptr->hca_ptr->ib_trans.lock;
-
-       dapl_os_lock(lock);
-       if (!dapl_llist_is_empty((DAPL_LLIST_HEAD*)list))
-               next_cm = dapl_llist_peek_head((DAPL_LLIST_HEAD*)list);
-       else
-               next_cm = NULL;
-
-        while (next_cm) {
-               cm = next_cm;
-               next_cm = dapl_llist_next_entry((DAPL_LLIST_HEAD*)list,
-                                               (DAPL_LLIST_ENTRY*)&cm->local_entry);
-
-               printf( "  CONN[%d]: ep %p cm %p %s %s"
-                       "  %x %x c_%x i_%x %s %x %x c_%x i_%x r_pid %x\n",
-                       i, cm->ep, cm,
-                       cm->msg.saddr.ib.qp_type == IBV_QPT_RC ? "RC" : "UD",
-                       dapl_cm_state_str(cm->state),
-                       ntohs(cm->msg.saddr.ib.lid),
-                       ntohs(cm->msg.sport),
-                       ntohl(cm->msg.sqpn),
-                       ntohl(cm->msg.saddr.ib.qpn),    
-                       cm->sp ? "<-" : "->",
-                       ntohs(cm->msg.daddr.ib.lid),
-                       ntohs(cm->msg.dport),
-                       ntohl(cm->msg.dqpn),
-                       ntohl(cm->msg.daddr.ib.qpn),
-                       ntohl(cm->msg.d_id));
-               i++;
-       }
-       printf("\n");
-       dapl_os_unlock(lock);
-}
-
-void dapls_print_cm_free_list(IN DAPL_IA *ia_ptr)
-{
-       DAPL_EP *ep, *next_ep;
-       dp_ib_cm_handle_t cm, next_cm;
-       int i = 0;
-
-       gethostname(_ctr_host_, sizeof(_ctr_host_));
-       printf("\n [%s:%x] DAPL EP CM FREE LIST:\n",
-               _ctr_host_ , dapl_os_getpid());
-
-       dapl_os_lock(&ia_ptr->header.lock);
-       ep = (dapl_llist_is_empty(&ia_ptr->ep_list_head) ?
-               NULL : dapl_llist_peek_head(&ia_ptr->ep_list_head));
-       while (ep != NULL) {
-               next_ep = dapl_llist_next_entry(&ia_ptr->ep_list_head,
-                                               &ep->header.ia_list_entry);
-               dapl_os_lock(&ep->header.lock);
-               cm = (dapl_llist_is_empty(&ep->cm_list_head) ?
-                       NULL : dapl_llist_peek_head(&ep->cm_list_head));
-               while (cm) {
-                       dapl_os_lock(&cm->lock);
-                       next_cm = dapl_llist_next_entry(&ep->cm_list_head,
-                                                       &cm->list_entry);
-                       if (cm->state == DCM_FREE) {
-                               printf( "  CONN[%d]: ep %p cm %p %s %s"
-                                       " %x %x c_%x i_%x l_pid %x %s"
-                                       " %x %x c_%x i_%x r_pid %x\n",
-                                       i, cm->ep, cm,
-                                       cm->msg.saddr.ib.qp_type == IBV_QPT_RC ? "RC" : "UD",
-                                       dapl_cm_state_str(cm->state),
-                                       ntohs(cm->msg.saddr.ib.lid),
-                                       ntohs(cm->msg.sport),
-                                       ntohl(cm->msg.sqpn),
-                                       ntohl(cm->msg.saddr.ib.qpn),
-                                       ntohl(cm->msg.s_id),
-                                       cm->sp ? "<-" : "->",
-                                       ntohs(cm->msg.daddr.ib.lid),
-                                       ntohs(cm->msg.dport),
-                                       ntohl(cm->msg.dqpn),
-                                       ntohl(cm->msg.daddr.ib.qpn),
-                                       ntohl(cm->msg.d_id));
-                               i++;
-                       }
-                       dapl_os_unlock(&cm->lock);
-                       cm = next_cm;
-               }
-               dapl_os_unlock(&ep->header.lock);
-               ep = next_ep;
-       }
-       dapl_os_unlock(&ia_ptr->header.lock);
-}
-#endif
index b5bd082ed39833d72b46158cecba22e76b306a2c..5b81d64f9f5ba002db20a3e9fc3e94c0d7569330 100644 (file)
 
 #ifndef _DAPL_IB_UTIL_H_
 #define _DAPL_IB_UTIL_H_
-#define _OPENIB_SCM_ 
+#define _OPENIB_MCM_
 
 #include <infiniband/verbs.h>
+#include <scif.h>
 #include "openib_osd.h"
 #include "dapl_ib_common.h"
+#include <dat2/dat_mic_extensions.h>
+
 
 /* DAPL CM objects MUST include list_entry, ref_count, event for EP linking */
 struct ib_cm_handle
 { 
-       struct dapl_llist_entry list_entry;
-       struct dapl_llist_entry local_entry;
-       DAPL_OS_WAIT_OBJECT     d_event;
-       DAPL_OS_WAIT_OBJECT     f_event;
-       DAPL_OS_LOCK            lock;
-       DAPL_OS_TIMEVAL         timer;
-        int                    ref_count;
-       int                     state;
-       int                     retries;
-       struct dapl_hca         *hca;
-       struct dapl_sp          *sp;    
-       struct dapl_ep          *ep;
-       struct ibv_ah           *ah;
-       uint16_t                p_size; /* accept p_data, for retries */
-       uint8_t                 p_data[DCM_MAX_PDATA_SIZE];
-       ib_cm_msg_t             msg;
+       struct dapl_llist_entry         list_entry;
+       struct dapl_llist_entry         local_entry;
+       DAPL_OS_WAIT_OBJECT             d_event;
+       DAPL_OS_WAIT_OBJECT             f_event;
+       DAPL_OS_LOCK                    lock;
+       DAPL_OS_TIMEVAL                 timer;
+       uint32_t                        cm_id;  /* local id */
+       uint32_t                        scm_id; /* shadow id */
+       uint64_t                        cm_ctx; /* local context */
+       uint64_t                        scm_ctx;        /* shadow context */
+       int                             ref_count;
+       int                             state;
+       int                             retries;
+       struct _ib_hca_transport        *tp;
+       struct dapl_hca                 *hca;
+       struct dapl_sp                  *sp;
+       struct dapl_ep                  *ep;
+       struct ibv_ah                   *ah;
+       uint16_t                        p_size; /* accept p_data, for retries */
+       uint8_t                         p_data[DAT_MCM_PDATA_SIZE];
+       dat_mcm_msg_t                   msg;
 };
 
 typedef struct ib_cm_handle    *dp_ib_cm_handle_t;
@@ -80,10 +88,10 @@ typedef struct _ib_hca_transport
        ib_async_cq_handler_t   async_cq_error;
        ib_async_dto_handler_t  async_cq;
        ib_async_qp_handler_t   async_qp_error;
-       union dcm_addr          addr;   /* lid, port, qp_num, gid */
-       int                     max_inline_send;
-       int                     rd_atom_in;
-       int                     rd_atom_out;
+       dat_mcm_addr_t          addr;   /* lid, port, qp_num, gid */
+       DAT_NAMED_ATTR          named_attr;
+       struct dapl_thread_signal signal;
+       /* dat_mix_dev_attr_t */
        uint8_t                 ack_timer;
        uint8_t                 ack_retry;
        uint8_t                 rnr_timer;
@@ -91,9 +99,14 @@ typedef struct _ib_hca_transport
        uint8_t                 global;
        uint8_t                 hop_limit;
        uint8_t                 tclass;
+       uint8_t                 sl;
        uint8_t                 mtu;
-       DAT_NAMED_ATTR          named_attr;
-       struct dapl_thread_signal signal;
+       uint8_t                 rd_atom_in;
+       uint8_t                 rd_atom_out;
+       uint8_t                 pkey_idx;
+       uint16_t                pkey;
+       uint16_t                max_inline_send;
+       /* dat_mix_dev_attr_t */
        int                     cqe;
        int                     qpe;
        int                     burst;
@@ -110,44 +123,47 @@ typedef struct _ib_hca_transport
        struct ibv_qp           *qp;
        struct ibv_mr           *mr_rbuf;
        struct ibv_mr           *mr_sbuf;
-       ib_cm_msg_t             *sbuf;
-       ib_cm_msg_t             *rbuf;
+       dat_mcm_msg_t           *sbuf;
+       dat_mcm_msg_t           *rbuf;
        struct ibv_comp_channel *rch;
+       int                     rch_fd;
        struct ibv_ah           **ah;  
        DAPL_OS_LOCK            plock;
        uint16_t                lid;
        uint8_t                 *sid;  /* Sevice IDs, port space, bitarray? */
-       uint8_t                 sl;
-       uint16_t                pkey;
-       int                     pkey_idx;
-#ifdef DAT_IB_COLLECTIVES
-       /* Collective member device and address information */
-       ib_thread_state_t       coll_thread_state;
-       DAPL_OS_THREAD          coll_thread;
-       DAPL_OS_LOCK            coll_lock;
-       DAPL_OS_WAIT_OBJECT     coll_event;
-       struct dapl_llist_entry *grp_list;
-       user_progress_func_t    *user_func;
-       int                     l_sock;
-       struct sockaddr_in      m_addr;
-       void                    *m_ctx;
-       void                    *m_info;
-       void                    *f_info;
-       int                     m_size;
-       int                     f_size;
-       int                     t_id;
-#endif
+
+       /* SCIF MIC indirect, EP to MPXYD services, if running on MIC */
+       struct scif_portID      self;
+       scif_epd_t              scif_ep;        /* FD operation processing */
+       scif_epd_t              scif_cm_ep;     /* FD CM packet processing */
+       struct scif_portID      peer;           /* MPXYD op proxy addr info */
+       struct scif_portID      peer_cm;        /* MPXYD cm proxy addr info */
+       off_t                   scif_adr;       /* MPXYD RDMA memory pool */
+       off_t                   scif_off;
+       int                     scif_len;
 
 } ib_hca_transport_t;
 
 /* prototypes */
 void cm_thread(void *arg);
-void ucm_async_event(struct dapl_hca *hca);
+void mcm_async_event(struct dapl_hca *hca);
 void dapli_cq_event_cb(struct _ib_hca_transport *tp);
 void dapls_cm_acquire(dp_ib_cm_handle_t cm_ptr);
 void dapls_cm_release(dp_ib_cm_handle_t cm_ptr);
 void dapls_cm_free(dp_ib_cm_handle_t cm_ptr);
 
+/* MIC indirect eXchange (MIX) operations */
+int  dapli_mix_open(ib_hca_transport_t *tp, char *name, int port);
+void dapli_mix_close(ib_hca_transport_t *tp);
+int  dapli_mix_listen(dp_ib_cm_handle_t cm, uint16_t sid);
+int  dapli_mix_listen_free(dp_ib_cm_handle_t cm);
+int  dapli_mix_qp_create(ib_qp_handle_t m_qp, struct ibv_qp_init_attr *attr);
+int  dapli_mix_qp_free(ib_qp_handle_t m_qp);
+int  dapli_mix_cq_create(ib_cq_handle_t m_cq);
+int  dapli_mix_cq_free(ib_cq_handle_t m_cq);
+int  dapli_mix_recv(DAPL_HCA *hca, int scif_ep);
+
+
 #ifdef DAPL_COUNTERS
 void dapls_print_cm_list(IN DAPL_IA *ia_ptr);
 #endif
index a07d886a498e990200af2d10051b62b0fcdeed81..83d231da603014925f76de21114fc450e500534b 100644 (file)
 #include "dapl_adapter_util.h"
 #include "dapl_ib_util.h"
 #include "dapl_osd.h"
-
 #include <stdlib.h>
 
-#ifdef DAT_IB_COLLECTIVES
-#include <collectives/ib_collectives.h>
-#endif
 
 static void ucm_service_destroy(IN DAPL_HCA *hca);
 static int  ucm_service_create(IN DAPL_HCA *hca);
 
-#if defined (_WIN32)
-#include <rdma\winverbs.h>
-
-static int32_t create_os_signal(IN DAPL_HCA * hca_ptr)
-{
-       return CompSetInit(&hca_ptr->ib_trans.signal.set);
-}
-
-static void destroy_os_signal(IN DAPL_HCA * hca_ptr)
-{
-       CompSetCleanup(&hca_ptr->ib_trans.signal.set);
-}
-
-static int dapls_config_verbs(struct ibv_context *verbs)
-{
-       verbs->channel.Milliseconds = 0;
-       return 0;
-}
-
-static int dapls_config_comp_channel(struct ibv_comp_channel *channel)
-{
-       channel->comp_channel.Milliseconds = 0;
-       return 0;
-}
-
-#else // _WIN32
-
 static int32_t create_os_signal(IN DAPL_HCA * hca_ptr)
 {
        DAPL_SOCKET listen_socket;
@@ -147,8 +116,6 @@ static int dapls_config_comp_channel(struct ibv_comp_channel *channel)
        return dapls_config_fd(channel->fd);
 }
 
-#endif
-
 /*
  * dapls_ib_init, dapls_ib_release
  *
@@ -219,7 +186,6 @@ DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, IN DAPL_HCA * hca_ptr)
        goto err;
 
 found:
-
        hca_ptr->ib_hca_handle = ibv_open_device(hca_ptr->ib_trans.ib_dev);
        if (!hca_ptr->ib_hca_handle) {
                dapl_log(DAPL_DBG_TYPE_ERR,
@@ -240,14 +206,14 @@ found:
                         strerror(errno));
                goto err;
        } else {
-               hca_ptr->ib_trans.addr.ib.lid = htons(port_attr.lid);
+               hca_ptr->ib_trans.addr.lid = htons(port_attr.lid);
                hca_ptr->ib_trans.lid = htons(port_attr.lid);
        }
 
        /* get gid for this hca-port, network order */
        if (ibv_query_gid(hca_ptr->ib_hca_handle,
                          (uint8_t) hca_ptr->port_num, 0,
-                         (union ibv_gid *)&hca_ptr->ib_trans.addr.ib.gid)) {
+                         (union ibv_gid *)&hca_ptr->ib_trans.addr.gid)) {
                dapl_log(DAPL_DBG_TYPE_ERR,
                         " open_hca: query GID ERR for %s, err=%s\n",
                         ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
@@ -275,6 +241,14 @@ found:
        hca_ptr->ib_trans.mtu =
            dapl_ib_mtu(dapl_os_get_env_val("DAPL_IB_MTU", DCM_IB_MTU));
 
+       if (dapli_mix_open(&hca_ptr->ib_trans, hca_name, hca_ptr->port_num)) {
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " open_hca: SCIF init ERR for %s, err=%s\n",
+                        ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+                        strerror(errno));
+               goto err;
+       }
+
        /* initialize CM list, LISTEN, SND queue, PSP array, locks */
        if ((dapl_os_lock_init(&hca_ptr->ib_trans.lock)) != DAT_SUCCESS)
                goto err;
@@ -335,27 +309,22 @@ found:
        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
                     " open_hca: QPN 0x%x LID 0x%x GID Subnet 0x" F64x ""
                     " ID 0x" F64x "\n", 
-                    ntohl(hca_ptr->ib_trans.addr.ib.qpn),
-                    ntohs(hca_ptr->ib_trans.addr.ib.lid), 
+                    ntohl(hca_ptr->ib_trans.addr.qpn),
+                    ntohs(hca_ptr->ib_trans.addr.lid),
                     (unsigned long long)
-                    ntohll(*(uint64_t*)&hca_ptr->ib_trans.addr.ib.gid[0]),
+                    ntohll(*(uint64_t*)&hca_ptr->ib_trans.addr.gid[0]),
                     (unsigned long long)
-                    ntohll(*(uint64_t*)&hca_ptr->ib_trans.addr.ib.gid[8]));
+                    ntohll(*(uint64_t*)&hca_ptr->ib_trans.addr.gid[8]));
 
        /* save LID, GID, QPN, PORT address information, for ia_queries */
        /* Set AF_INET6 to insure callee address storage of 28 bytes */
        hca_ptr->ib_trans.hca = hca_ptr;
-       hca_ptr->ib_trans.addr.ib.family = AF_INET6; 
-       hca_ptr->ib_trans.addr.ib.qp_type = IBV_QPT_UD;
+       hca_ptr->ib_trans.addr.family = AF_INET6;
+       hca_ptr->ib_trans.addr.qp_type = IBV_QPT_UD;
        memcpy(&hca_ptr->hca_address, 
               &hca_ptr->ib_trans.addr, 
               sizeof(union dcm_addr));
 
-#ifdef DAT_IB_COLLECTIVES
-       if (dapli_create_collective_service(hca_ptr))
-               goto bail;
-#endif
-
        ibv_free_device_list(dev_list);
 
        /* wait for cm_thread */
@@ -394,10 +363,6 @@ DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA * hca_ptr)
 {
        dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " close_hca: %p\n", hca_ptr);
 
-#ifdef DAT_IB_COLLECTIVES
-       dapli_free_collective_service(hca_ptr);
-#endif
-
        if (hca_ptr->ib_trans.cm_state == IB_THREAD_RUN) {
                hca_ptr->ib_trans.cm_state = IB_THREAD_CANCEL;
                dapls_thread_signal(&hca_ptr->ib_trans.signal);
@@ -409,6 +374,8 @@ DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA * hca_ptr)
                }
        }
 
+       dapli_mix_close(&hca_ptr->ib_trans);
+
        dapl_os_lock_destroy(&hca_ptr->ib_trans.lock);
        dapl_os_lock_destroy(&hca_ptr->ib_trans.llock);
        destroy_os_signal(hca_ptr);
@@ -419,8 +386,8 @@ DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA * hca_ptr)
 
        if (hca_ptr->ib_trans.ib_cq_empty) {
                struct ibv_comp_channel *channel;
-               channel = hca_ptr->ib_trans.ib_cq_empty->channel;
-               ibv_destroy_cq(hca_ptr->ib_trans.ib_cq_empty);
+               channel = hca_ptr->ib_trans.ib_cq_empty->ib_cq->channel;
+               ibv_destroy_cq(hca_ptr->ib_trans.ib_cq_empty->ib_cq);
                ibv_destroy_comp_channel(channel);
        }
 
@@ -454,8 +421,10 @@ static void ucm_service_destroy(IN DAPL_HCA *hca)
        if (tp->rcq)
                ibv_destroy_cq(tp->rcq);
 
-       if (tp->rch)
+       if (tp->rch) {
+               tp->rch_fd = 0;
                ibv_destroy_comp_channel(tp->rch);
+       }
 
        if (tp->ah) {
                int i;
@@ -491,6 +460,10 @@ static int ucm_service_create(IN DAPL_HCA *hca)
 
        dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ucm_create: \n");
 
+       /* CM service via MPXYD, no need for local IB UD CM service */
+       if (tp->scif_ep)
+               return 0;
+
        /* setup CM timers and queue sizes */
        tp->retries = dapl_os_get_env_val("DAPL_UCM_RETRY", DCM_RETRY_CNT);
        tp->rep_time = dapl_os_get_env_val("DAPL_UCM_REP_TIME", DCM_REP_TIME);
@@ -511,6 +484,7 @@ static int ucm_service_create(IN DAPL_HCA *hca)
        if (!tp->rch) 
                goto bail;
        dapls_config_comp_channel(tp->rch);
+       tp->rch_fd = tp->rch->fd;
 
        tp->scq = ibv_create_cq(hca->ib_hca_handle, tp->cqe, hca, NULL, 0);
        if (!tp->scq) 
@@ -584,7 +558,7 @@ static int ucm_service_create(IN DAPL_HCA *hca)
        }
 
        /* save qp_num as part of ia_address, network order */
-       tp->addr.ib.qpn = htonl(tp->qp->qp_num);
+       tp->addr.qpn = htonl(tp->qp->qp_num);
         return 0;
 bail:
        dapl_log(DAPL_DBG_TYPE_ERR,
@@ -593,7 +567,7 @@ bail:
        return -1;
 }
 
-void ucm_async_event(struct dapl_hca *hca)
+void mcm_async_event(struct dapl_hca *hca)
 {
        struct ibv_async_event event;
        struct _ib_hca_transport *tp = &hca->ib_trans;
diff --git a/dapl/openib_mcm/mix.c b/dapl/openib_mcm/mix.c
new file mode 100644 (file)
index 0000000..2a56a2f
--- /dev/null
@@ -0,0 +1,537 @@
+/*
+ * Copyright (c) 2009 Intel Corporation.  All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_evd_util.h"
+#include "dapl_cr_util.h"
+#include "dapl_name_service.h"
+#include "dapl_ib_util.h"
+#include "dapl_ep_util.h"
+#include "dapl_osd.h"
+
+/*
+ * CM proxy services, MCM on MIC to MPXYD via SCIF
+ *
+ *  NOTE: all sync MIX operations for now, TODO async?
+ *
+ * MIX_IA_OPEN
+ */
+int dapli_mix_open(ib_hca_transport_t *tp, char *name, int port)
+{
+       int ret, len;
+       dat_mix_open_t msg;
+
+       ret = scif_get_nodeIDs(NULL, 0, &tp->self.node);
+       if (ret < 0) {
+               dapl_log(1, " scif_get_nodeIDs() failed with error %d\n", errno);
+               return -1;
+       }
+       dapl_log(DAPL_DBG_TYPE_EXTENSION," SCIF node_id: %d\n", (uint16_t)tp->self.node);
+
+#if 0 /* let run on Xeon for testing */
+       if (tp->self.node == 0) {
+               dapl_log(DAPL_DBG_TYPE_EXTENSION," Not running on MIC, no MPXY connect required\n");
+               tp->scif_ep = 0;
+               return 0;
+       }
+       dapl_log(DAPL_DBG_TYPE_EXTENSION," Running on MIC, MPXY connect required\n");
+#endif
+       /* MPXYD is running on node 0 and well-known OFED port */
+       tp->peer.node = 0;
+       tp->peer.port = SCIF_OFED_PORT_8;
+
+       tp->scif_ep = scif_open();
+       if (tp->scif_ep < 0) {
+               dapl_log(1, "scif_open() failed with error %d\n", errno);
+               return -1;
+       }
+       ret = scif_connect(tp->scif_ep, &tp->peer);
+       if (ret < 0) {
+               dapl_log(1, "scif_connect() OP EP failed with error %d\n", errno);
+               return -1;
+       }
+       dapl_log(DAPL_DBG_TYPE_EXTENSION,"Connected to node 0 for operations\n");
+
+       tp->scif_cm_ep = scif_open();
+       if (tp->scif_cm_ep < 0) {
+               dapl_log(1, "scif_open() for cm_ep failed with error %d\n", errno);
+               return -1;
+       }
+       ret = scif_connect(tp->scif_cm_ep, &tp->peer);
+       if (ret < 0) {
+               dapl_log(1, "scif_connect() CM EP to port %d failed with error %d\n", errno);
+               return -1;
+       }
+       dapl_log(DAPL_DBG_TYPE_EXTENSION,"Connected to node 0 for CM messages \n");
+
+       /* MIX_IA_OPEN: device name and port */
+       msg.hdr.ver = DAT_MIX_VER;
+       msg.hdr.op = MIX_IA_OPEN;
+       msg.hdr.status = 0;
+       msg.hdr.flags = MIX_OP_REQ;
+       msg.port = port;
+       strcpy((char*)&msg.name, name);
+       memcpy(&msg.dev_attr, (void*)&tp->ack_timer, sizeof(dat_mix_dev_attr_t));
+
+       len = sizeof(dat_mix_open_t);
+       ret = scif_send(tp->scif_ep, &msg, len, SCIF_SEND_BLOCK);
+       if (ret != len) {
+               dapl_log(1, " ERR: send on %d, ret %d, exp %d\n", tp->scif_ep, ret, len);
+       }
+       dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent open request on SCIF EP\n");
+
+       /* MIX_IA_OPEN: reply includes addr info */
+       msg.hdr.status = 1; /* make sure we update status from response */
+       ret = scif_recv(tp->scif_ep, &msg, len, SCIF_RECV_BLOCK);
+       if (ret != len) {
+               dapl_log(1, " ERR: rcv on new_ep %d, ret %d, exp %d\n", tp->scif_ep, ret, len);
+               return -1;
+       }
+
+       if (msg.hdr.ver != DAT_MIX_VER || msg.hdr.op != MIX_IA_OPEN ||
+           msg.hdr.flags != MIX_OP_RSP || msg.hdr.status != MIX_SUCCESS) {
+               dapl_log(1, " MIX msg ver %d, op %d, flags %d, or stat %d ERR \n",
+                        msg.hdr.ver, msg.hdr.op, msg.hdr.flags);
+               return -1;
+       }
+       /* save address to transport object, keeps IA queries local */
+       memcpy((void*)&tp->addr, (void*)&msg.dev_addr, sizeof(dat_mcm_addr_t));
+
+       dapl_log(DAPL_DBG_TYPE_EXTENSION," Received valid open reply on SCIF EP\n");
+       return 0;
+}
+
+/* MIX_IA_CLOSE - no operation, just shutdown endpoint */
+void dapli_mix_close(ib_hca_transport_t *tp)
+{
+       if (tp->scif_ep)
+               scif_close(tp->scif_ep);
+
+       tp->scif_ep = 0;
+}
+
+/* MIX_LISTEN */
+int dapli_mix_listen(dp_ib_cm_handle_t cm, uint16_t sid)
+{
+       dat_mix_listen_t msg;
+       scif_epd_t mix_ep = cm->hca->ib_trans.scif_ep;
+       int ret, len;
+
+       /* listen request: sid and backlog */
+       msg.hdr.ver = DAT_MIX_VER;
+       msg.hdr.op = MIX_LISTEN;
+       msg.hdr.status = 0;
+       msg.hdr.flags = MIX_OP_REQ;
+       msg.sid = sid;
+       msg.backlog = 64;
+
+       len = sizeof(dat_mix_listen_t);
+       ret = scif_send(mix_ep, &msg, len, SCIF_SEND_BLOCK);
+       if (ret != len) {
+               dapl_log(1, " ERR: send on %d, ret %d, exp %d\n", mix_ep, ret, len);
+       }
+       dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %d request on SCIF EP\n", msg.hdr.op);
+
+       /* listen response */
+       ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK);
+       if (ret != len) {
+               dapl_log(1, " ERR: rcv on new_ep %d, ret %d, exp %d\n", mix_ep, ret, len);
+               return -1;
+       }
+       if (msg.hdr.ver != DAT_MIX_VER || msg.hdr.op != MIX_LISTEN ||
+           msg.hdr.flags != MIX_OP_RSP || msg.hdr.status != MIX_SUCCESS) {
+               dapl_log(1, " MIX msg ver %d, op %d, flags %d, or stat %d ERROR \n",
+                        msg.hdr.ver, msg.hdr.op, msg.hdr.flags, msg.hdr.status);
+               return -1;
+       }
+       dapl_log(DAPL_DBG_TYPE_EXTENSION," received successful reply on SCIF EP\n");
+       return 0;
+}
+
+/* MIX_LISTEN_FREE */
+int dapli_mix_listen_free(dp_ib_cm_handle_t cm)
+{
+       dat_mix_hdr_t msg;
+       scif_epd_t mix_ep = cm->hca->ib_trans.scif_ep;
+       int ret, len;
+
+       /* listen free request */
+       msg.ver = DAT_MIX_VER;
+       msg.op = MIX_LISTEN_FREE;
+       msg.status = 0;
+       msg.flags = MIX_OP_REQ;
+       msg.req_id = (uint16_t)cm->sp->conn_qual;
+
+       len = sizeof(dat_mix_hdr_t);
+       ret = scif_send(mix_ep, &msg, len, SCIF_SEND_BLOCK);
+       if (ret != len) {
+               dapl_log(1, " ERR: send on %d, ret %d, exp %d\n", mix_ep, ret, len);
+       }
+       dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %d request on SCIF EP\n", msg.op);
+
+       /* listen free response */
+       ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK);
+       if (ret != len) {
+               dapl_log(1, " ERR: rcv on new_ep %d, ret %d, exp %d\n", mix_ep, ret, len);
+               return -1;
+       }
+       if (msg.ver != DAT_MIX_VER || msg.op != MIX_LISTEN_FREE ||
+           msg.flags != MIX_OP_RSP || msg.status != MIX_SUCCESS) {
+               dapl_log(1, " MIX msg ver %d, op %d, flags %d, or stat %d ERR \n",
+                        msg.ver, msg.op, msg.flags, msg.status);
+               return -1;
+       }
+       dapl_log(DAPL_DBG_TYPE_EXTENSION," received successful reply on SCIF EP\n");
+       return 0;
+}
+
+/*  MIX_MR_CREATE */
+int dapli_mix_mr_create(ib_hca_transport_t *tp, uint32_t id, uint32_t mr_len, uint64_t off, uint64_t ctx)
+{
+       dat_mix_mr_t msg;
+       scif_epd_t mix_ep = tp->scif_ep;
+       int ret, len;
+
+       /* request: */
+       msg.hdr.ver = DAT_MIX_VER;
+       msg.hdr.op = MIX_MR_CREATE;
+       msg.hdr.status = 0;
+       msg.hdr.flags = MIX_OP_REQ;
+       msg.mr_id = id;
+       msg.len = mr_len;
+       msg.off = off;
+       msg.ctx = ctx;
+
+       len = sizeof(dat_mix_mr_t);
+       ret = scif_send(mix_ep, &msg, len, SCIF_SEND_BLOCK);
+       if (ret != len) {
+               dapl_log(1, " ERR: send on %d, ret %d, exp %d\n", mix_ep, ret, len);
+       }
+       dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %d request on SCIF EP\n", msg.hdr.op);
+
+       /* response, just status */
+       len = sizeof(dat_mix_hdr_t);
+       ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK);
+       if (ret != len) {
+               dapl_log(1, " ERR: rcv on new_ep %d, ret %d, exp %d\n", mix_ep, ret, len);
+               return -1;
+       }
+       if (msg.hdr.ver != DAT_MIX_VER || msg.hdr.op != MIX_MR_CREATE ||
+           msg.hdr.flags != MIX_OP_RSP || msg.hdr.status != MIX_SUCCESS) {
+               dapl_log(1, " MIX msg ver %d, op %d, flags %d, or stat %d ERR \n",
+                        msg.hdr.ver, msg.hdr.op, msg.hdr.flags, msg.hdr.status);
+               return -1;
+       }
+       dapl_log(DAPL_DBG_TYPE_EXTENSION," received successful reply on SCIF EP\n");
+       return 0;
+}
+
+/* MIX_MR_FREE */
+int dapli_mix_mr_free(dp_ib_cm_handle_t cm, uint32_t id)
+{
+       dat_mix_mr_t msg;
+       scif_epd_t mix_ep = cm->hca->ib_trans.scif_ep;
+       int ret, len;
+
+       /* request */
+       msg.hdr.ver = DAT_MIX_VER;
+       msg.hdr.op = MIX_MR_FREE;
+       msg.hdr.status = 0;
+       msg.hdr.flags = MIX_OP_REQ;
+       msg.mr_id = id;
+
+       len = sizeof(dat_mix_mr_t);
+       ret = scif_send(mix_ep, &msg, len, SCIF_SEND_BLOCK);
+       if (ret != len) {
+               dapl_log(1, " ERR: send on %d, ret %d, exp %d\n", mix_ep, ret, len);
+       }
+       dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %d request on SCIF EP\n", msg.hdr.op);
+
+       /* response, status only */
+       len = sizeof(dat_mix_hdr_t);
+       ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK);
+       if (ret != len) {
+               dapl_log(1, " ERR: rcv on new_ep %d, ret %d, exp %d\n", mix_ep, ret, len);
+               return -1;
+       }
+       if (msg.hdr.ver != DAT_MIX_VER || msg.hdr.op != MIX_MR_FREE ||
+           msg.hdr.flags != MIX_OP_RSP || msg.hdr.status != MIX_SUCCESS) {
+               dapl_log(1, " MIX msg ver %d, op %d, flags %d, or stat %d ERR \n",
+                        msg.hdr.ver, msg.hdr.op, msg.hdr.flags, msg.hdr.status);
+               return -1;
+       }
+       dapl_log(DAPL_DBG_TYPE_EXTENSION," received reply on SCIF EP\n");
+       return 0;
+}
+
+
+/*  MIX_QP_CREATE */
+int dapli_mix_qp_create(ib_qp_handle_t m_qp, struct ibv_qp_init_attr *attr)
+{
+       dat_mix_qp_t msg;
+       scif_epd_t mix_ep = m_qp->tp->scif_ep;
+       int ret, len;
+
+       /* request: QP_r local, QP_t shadowed */
+       msg.hdr.ver = DAT_MIX_VER;
+       msg.hdr.op = MIX_QP_CREATE;
+       msg.hdr.status = 0;
+       msg.hdr.flags = MIX_OP_REQ;
+
+       msg.qp_r.qp_type = attr->qp_type;
+       msg.qp_r.qp_num = m_qp->qp->qp_num;
+       msg.qp_r.qp_type = m_qp->qp->qp_type;
+       msg.qp_r.state = m_qp->qp->state;
+       msg.qp_r.max_recv_wr = attr->cap.max_recv_wr;
+       msg.qp_r.max_recv_sge = attr->cap.max_recv_sge;
+       msg.qp_r.rcq_id = attr->recv_cq->handle; /* ??? */
+
+       msg.qp_t.qp_type = attr->qp_type;
+       msg.qp_t.max_inline_data = attr->cap.max_inline_data;
+       msg.qp_t.max_send_wr = attr->cap.max_send_wr;
+       msg.qp_t.max_send_sge = attr->cap.max_send_sge;
+       msg.qp_t.scq_id = attr->send_cq->handle;  /* ??? */
+
+       len = sizeof(dat_mix_qp_t);
+       ret = scif_send(mix_ep, &msg, len, SCIF_SEND_BLOCK);
+       if (ret != len) {
+               dapl_log(1, " ERR: send on %d, ret %d, exp %d\n", mix_ep, ret, len);
+       }
+       dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %d request on SCIF EP\n", msg.hdr.op);
+
+
+       /* wait for response */
+       ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK);
+       if (ret != len) {
+               dapl_log(1, " ERR: rcv on new_ep %d, ret %d, exp %d\n", mix_ep, ret, len);
+               return -1;
+       }
+       if (msg.hdr.ver != DAT_MIX_VER || msg.hdr.op != MIX_QP_CREATE ||
+           msg.hdr.flags != MIX_OP_RSP || msg.hdr.status != MIX_SUCCESS) {
+               dapl_log(1, " MIX msg ver %d, op %d, flags %d, or stat %d ERR \n",
+                        msg.hdr.ver, msg.hdr.op, msg.hdr.flags, msg.hdr.status);
+               return -1;
+       }
+
+       /* save QP_t id and ctx, needed for posting WR */
+       m_qp->sqp_id = msg.qp_t.qp_id;
+       m_qp->sqp_ctx = msg.qp_t.ctx;
+
+       dapl_log(DAPL_DBG_TYPE_EXTENSION,
+               " reply on SCIF EP -> sqp_id 0x%x, ctx %p\n",
+               m_qp->sqp_id, (void*)m_qp->sqp_ctx );
+
+       return 0;
+}
+
+/* MIX_EP_FREE, fits in header */
+int dapli_mix_qp_free(ib_qp_handle_t m_qp)
+{
+       dat_mix_hdr_t msg;
+       scif_epd_t mix_ep = m_qp->tp->scif_ep;
+       int ret, len;
+
+       /* request */
+       msg.ver = DAT_MIX_VER;
+       msg.op = MIX_QP_FREE;
+       msg.status = 0;
+       msg.flags = MIX_OP_REQ;
+       msg.req_id = m_qp->sqp_id;
+
+       len = sizeof(dat_mix_hdr_t);
+       ret = scif_send(mix_ep, &msg, len, SCIF_SEND_BLOCK);
+       if (ret != len) {
+               dapl_log(1, " ERR: send on %d, ret %d, exp %d\n", mix_ep, ret, len);
+       }
+       dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %d request on SCIF EP\n", msg.op);
+
+       /* response */
+       ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK);
+       if (ret != len) {
+               dapl_log(1, " ERR: rcv on new_ep %d, ret %d, exp %d\n", mix_ep, ret, len);
+               return -1;
+       }
+       if (msg.ver != DAT_MIX_VER || msg.op != MIX_QP_FREE ||
+           msg.flags != MIX_OP_RSP || msg.status != MIX_SUCCESS) {
+               dapl_log(1, " MIX msg ver %d, op %d, flags %d, or stat %d ERR \n",
+                        msg.ver, msg.op, msg.flags, msg.status);
+               return -1;
+       }
+       dapl_log(DAPL_DBG_TYPE_EXTENSION," received reply on SCIF EP\n");
+       return 0;
+}
+
+/*  MIX_CQ_CREATE */
+int dapli_mix_cq_create(ib_cq_handle_t m_cq)
+{
+       dat_mix_cq_t msg;
+       scif_epd_t mix_ep = m_cq->tp->scif_ep;
+       int ret, len;
+
+       /* request: QP_r local, QP_t shadowed */
+       msg.hdr.ver = DAT_MIX_VER;
+       msg.hdr.op = MIX_CQ_CREATE;
+       msg.hdr.status = 0;
+       msg.hdr.flags = MIX_OP_REQ;
+       msg.cq_len = m_cq->ib_cq->cqe;
+
+       len = sizeof(dat_mix_cq_t);
+       ret = scif_send(mix_ep, &msg, len, SCIF_SEND_BLOCK);
+       if (ret != len) {
+               dapl_log(1, " ERR: send on %d, ret %d, exp %d\n", mix_ep, ret, len);
+       }
+       dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %d request on SCIF EP\n", msg.hdr.op);
+
+
+       /* wait for response */
+       ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK);
+       if (ret != len) {
+               dapl_log(1, " ERR: rcv on new_ep %d, ret %d, exp %d\n", mix_ep, ret, len);
+               return -1;
+       }
+       if (msg.hdr.ver != DAT_MIX_VER || msg.hdr.op != MIX_CQ_CREATE ||
+           msg.hdr.flags != MIX_OP_RSP || msg.hdr.status != MIX_SUCCESS) {
+               dapl_log(1, " MIX msg ver %d, op %d, flags %d, or stat %d ERR \n",
+                        msg.hdr.ver, msg.hdr.op, msg.hdr.flags, msg.hdr.status);
+               return -1;
+       }
+
+       /* save CQ_t id and ctx, needed for polling */
+       m_cq->cq_id = msg.cq_id;
+       m_cq->cq_ctx = msg.cq_ctx;
+
+       dapl_log(DAPL_DBG_TYPE_EXTENSION,
+               " reply on SCIF EP -> cq_id 0x%x, ctx %p\n",
+               m_cq->cq_id, (void*)m_cq->cq_ctx );
+
+       return 0;
+}
+
+/* MIX_CQ_FREE, fits in header */
+int dapli_mix_cq_free(ib_cq_handle_t m_cq)
+{
+       dat_mix_hdr_t msg;
+       scif_epd_t mix_ep = m_cq->tp->scif_ep;
+       int ret, len;
+
+       /* request */
+       msg.ver = DAT_MIX_VER;
+       msg.op = MIX_CQ_FREE;
+       msg.status = 0;
+       msg.flags = MIX_OP_REQ;
+       msg.req_id = m_cq->cq_id;
+
+       len = sizeof(dat_mix_hdr_t);
+       ret = scif_send(mix_ep, &msg, len, SCIF_SEND_BLOCK);
+       if (ret != len) {
+               dapl_log(1, " ERR: send on %d, ret %d, exp %d\n", mix_ep, ret, len);
+       }
+       dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %d request on SCIF EP\n", msg.op);
+
+       /* response */
+       ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK);
+       if (ret != len) {
+               dapl_log(1, " ERR: rcv on new_ep %d, ret %d, exp %d\n", mix_ep, ret, len);
+               return -1;
+       }
+       if (msg.ver != DAT_MIX_VER || msg.op != MIX_CQ_FREE ||
+           msg.flags != MIX_OP_RSP || msg.status != MIX_SUCCESS) {
+               dapl_log(1, " MIX msg ver %d, op %d, flags %d, or stat %d ERR \n",
+                        msg.ver, msg.op, msg.flags, msg.status);
+               return -1;
+       }
+       dapl_log(DAPL_DBG_TYPE_EXTENSION," received reply on SCIF EP\n");
+       return 0;
+}
+
+/*  MIX_CM_REQ */
+int dapli_mix_connect(dp_ib_cm_handle_t m_cm)
+{
+       dat_mix_cm_t msg;
+       scif_epd_t mix_ep = m_cm->tp->scif_ep;
+       int ret, len;
+
+       /* request: QP_r local, QP_t shadowed */
+       msg.hdr.ver = DAT_MIX_VER;
+       msg.hdr.op = MIX_CM_REQ;
+       msg.hdr.status = 0;
+       msg.hdr.flags = MIX_OP_REQ;
+       msg.cm_ctx = (uint64_t)m_cm;
+
+
+
+       len = sizeof(dat_mix_cq_t);
+       ret = scif_send(mix_ep, &msg, len, SCIF_SEND_BLOCK);
+       if (ret != len) {
+               dapl_log(1, " ERR: send on %d, ret %d, exp %d\n", mix_ep, ret, len);
+       }
+       dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %d request on SCIF EP\n", msg.hdr.op);
+
+
+       /* wait for response */
+       ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK);
+       if (ret != len) {
+               dapl_log(1, " ERR: rcv on new_ep %d, ret %d, exp %d\n", mix_ep, ret, len);
+               return -1;
+       }
+       if (msg.hdr.ver != DAT_MIX_VER || msg.hdr.op != MIX_CM_REQ ||
+           msg.hdr.flags != MIX_OP_RSP || msg.hdr.status != MIX_SUCCESS) {
+               dapl_log(1, " MIX msg ver %d, op %d, flags %d, or stat %d ERR \n",
+                        msg.hdr.ver, msg.hdr.op, msg.hdr.flags, msg.hdr.status);
+               return -1;
+       }
+
+       /* save CQ_t id and ctx, needed for polling */
+       m_cm->scm_id = msg.cm_id;
+       m_cm->scm_ctx = msg.cm_ctx;
+
+       dapl_log(DAPL_DBG_TYPE_EXTENSION,
+               " reply on SCIF EP -> cm_id 0x%x, ctx %p\n",
+               m_cm->scm_id, (void*)m_cm->scm_ctx );
+
+       return 0;
+}
+
+/* MIX recv, messages from MPXYD */
+int dapli_mix_recv(DAPL_HCA *hca, int scif_ep)
+{
+
+       return 0;
+}
+
+
+
+
+
+
+
+
+
+
+
+
index b095c2f673934ebaf4d5dd1a1f4fea0500700f90..514944985d3c5ba735b114be4f2a3214722458d2 100644 (file)
@@ -475,7 +475,7 @@ DAT_RETURN dapli_socket_disconnect(dp_ib_cm_handle_t cm_ptr)
        /* disconnect events for RC's only */
        if (cm_ptr->ep->param.ep_attr.service_type == DAT_SERVICE_TYPE_RC) {
                dapl_os_lock(&cm_ptr->ep->header.lock);
-               dapls_modify_qp_state(cm_ptr->ep->qp_handle, IBV_QPS_ERR, 0,0,0);
+               dapls_modify_qp_state(cm_ptr->ep->qp_handle->qp, IBV_QPS_ERR, 0,0,0);
                dapl_os_unlock(&cm_ptr->ep->header.lock);
                if (cm_ptr->ep->cr_ptr) {
                        dapls_cr_callback(cm_ptr,
@@ -653,8 +653,8 @@ dapli_socket_connect(DAPL_EP * ep_ptr,
        /* REQ: QP info in msg.saddr, IA address in msg.daddr, and pdata */
        cm_ptr->hca = ia_ptr->hca_ptr;
        cm_ptr->msg.op = ntohs(DCM_REQ);
-       cm_ptr->msg.saddr.ib.qpn = htonl(ep_ptr->qp_handle->qp_num);
-       cm_ptr->msg.saddr.ib.qp_type = ep_ptr->qp_handle->qp_type;
+       cm_ptr->msg.saddr.ib.qpn = htonl(ep_ptr->qp_handle->qp->qp_num);
+       cm_ptr->msg.saddr.ib.qp_type = ep_ptr->qp_handle->qp->qp_type;
        cm_ptr->msg.saddr.ib.lid = ia_ptr->hca_ptr->ib_trans.lid;
        dapl_os_memcpy(&cm_ptr->msg.saddr.ib.gid[0], 
                       &ia_ptr->hca_ptr->ib_trans.gid, 16);
@@ -824,15 +824,15 @@ static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
 
        /* modify QP to RTR and then to RTS with remote info */
        dapl_os_lock(&ep_ptr->header.lock);
-       if (dapls_modify_qp_state(ep_ptr->qp_handle,
+       if (dapls_modify_qp_state(ep_ptr->qp_handle->qp,
                                  IBV_QPS_RTR, 
                                  cm_ptr->msg.saddr.ib.qpn,
                                  cm_ptr->msg.saddr.ib.lid,
                                  (ib_gid_handle_t)cm_ptr->msg.saddr.ib.gid) != DAT_SUCCESS) {
                dapl_log(DAPL_DBG_TYPE_ERR,
                         " CONN_RTU: QPS_RTR ERR %s (%d,%d,%x,%x,%x) -> %s %x\n",
-                        strerror(errno), ep_ptr->qp_handle->qp_type,
-                        ep_ptr->qp_state, ep_ptr->qp_handle->qp_num,
+                        strerror(errno), ep_ptr->qp_handle->qp->qp_type,
+                        ep_ptr->qp_state, ep_ptr->qp_handle->qp->qp_num,
                         ntohl(cm_ptr->msg.saddr.ib.qpn), 
                         ntohs(cm_ptr->msg.saddr.ib.lid),
                         inet_ntoa(((struct sockaddr_in *)
@@ -842,15 +842,15 @@ static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
                dapl_os_unlock(&ep_ptr->header.lock);
                goto bail;
        }
-       if (dapls_modify_qp_state(ep_ptr->qp_handle,
+       if (dapls_modify_qp_state(ep_ptr->qp_handle->qp,
                                  IBV_QPS_RTS, 
                                  cm_ptr->msg.saddr.ib.qpn,
                                  cm_ptr->msg.saddr.ib.lid,
                                  NULL) != DAT_SUCCESS) {
                dapl_log(DAPL_DBG_TYPE_ERR,
                         " CONN_RTU: QPS_RTS ERR %s (%d,%d,%x,%x,%x) -> %s %x\n",
-                        strerror(errno), ep_ptr->qp_handle->qp_type,
-                        ep_ptr->qp_state, ep_ptr->qp_handle->qp_num,
+                        strerror(errno), ep_ptr->qp_handle->qp->qp_type,
+                        ep_ptr->qp_state, ep_ptr->qp_handle->qp->qp_num,
                         ntohl(cm_ptr->msg.saddr.ib.qpn), 
                         ntohs(cm_ptr->msg.saddr.ib.lid),
                         inet_ntoa(((struct sockaddr_in *)
@@ -891,7 +891,7 @@ ud_bail:
 
                if (event == IB_CME_CONNECTED) {
                        cm_ptr->ah = dapls_create_ah(cm_ptr->hca, pd_handle,
-                                                    ep_ptr->qp_handle,
+                                                    ep_ptr->qp_handle->qp,
                                                     cm_ptr->msg.saddr.ib.lid, 
                                                     NULL);
                        if (cm_ptr->ah) {
@@ -1225,7 +1225,7 @@ dapli_socket_accept_usr(DAPL_EP * ep_ptr,
 
 #ifdef DAT_EXTENSIONS
        if (cm_ptr->msg.saddr.ib.qp_type == IBV_QPT_UD &&
-           ep_ptr->qp_handle->qp_type != IBV_QPT_UD) {
+           ep_ptr->qp_handle->qp->qp_type != IBV_QPT_UD) {
                dapl_log(DAPL_DBG_TYPE_ERR,
                         " ACCEPT_USR: ERR remote QP is UD,"
                         ", but local QP is not\n");
@@ -1241,7 +1241,7 @@ dapli_socket_accept_usr(DAPL_EP * ep_ptr,
 
        /* modify QP to RTR and then to RTS with remote info already read */
        dapl_os_lock(&ep_ptr->header.lock);
-       if (dapls_modify_qp_state(ep_ptr->qp_handle,
+       if (dapls_modify_qp_state(ep_ptr->qp_handle->qp,
                                  IBV_QPS_RTR, 
                                  cm_ptr->msg.saddr.ib.qpn,
                                  cm_ptr->msg.saddr.ib.lid,
@@ -1254,7 +1254,7 @@ dapli_socket_accept_usr(DAPL_EP * ep_ptr,
                dapl_os_unlock(&ep_ptr->header.lock);
                goto bail;
        }
-       if (dapls_modify_qp_state(ep_ptr->qp_handle,
+       if (dapls_modify_qp_state(ep_ptr->qp_handle->qp,
                                  IBV_QPS_RTS, 
                                  cm_ptr->msg.saddr.ib.qpn,
                                  cm_ptr->msg.saddr.ib.lid,
@@ -1278,8 +1278,8 @@ dapli_socket_accept_usr(DAPL_EP * ep_ptr,
        local.ver = htons(DCM_VER);
        local.op = htons(DCM_REP);
        local.rd_in = ep_ptr->param.ep_attr.max_rdma_read_in;
-       local.saddr.ib.qpn = htonl(ep_ptr->qp_handle->qp_num);
-       local.saddr.ib.qp_type = ep_ptr->qp_handle->qp_type;
+       local.saddr.ib.qpn = htonl(ep_ptr->qp_handle->qp->qp_num);
+       local.saddr.ib.qp_type = ep_ptr->qp_handle->qp->qp_type;
        local.saddr.ib.lid = ia_ptr->hca_ptr->ib_trans.lid;
        dapl_os_memcpy(&local.saddr.ib.gid[0], 
                       &ia_ptr->hca_ptr->ib_trans.gid, 16);
@@ -1332,10 +1332,8 @@ dapli_socket_accept_usr(DAPL_EP * ep_ptr,
                     ntohl(local.saddr.ib.qpn), ntohs(local.p_size));
        dapl_dbg_log(DAPL_DBG_TYPE_CM,
                     " ACCEPT_USR: local GID subnet %016llx id %016llx\n",
-                    (unsigned long long)
-                    htonll(*(uint64_t*)&local.saddr.ib.gid[0]),
-                    (unsigned long long)
-                    htonll(*(uint64_t*)&local.saddr.ib.gid[8]));
+                    (unsigned long long)htonll(*(uint64_t*)&local.saddr.ib.gid[0]),
+                    (unsigned long long)htonll(*(uint64_t*)&local.saddr.ib.gid[8]));
 
        dapl_dbg_log(DAPL_DBG_TYPE_EP, " PASSIVE: accepted!\n");
 
@@ -1388,7 +1386,7 @@ ud_bail:
                
                if (event == IB_CME_CONNECTED) {
                        cm_ptr->ah = dapls_create_ah(cm_ptr->hca, pd_handle,
-                                               cm_ptr->ep->qp_handle,
+                                               cm_ptr->ep->qp_handle->qp,
                                                cm_ptr->msg.saddr.ib.lid, 
                                                NULL);
                        if (cm_ptr->ah) { 
index 77c64e3fca24f809c1bed02c4630273336109c3e..2d07da56b9d579938305066aa4aa373f351e9102 100644 (file)
@@ -529,8 +529,8 @@ out:
 
        if (hca_ptr->ib_trans.ib_cq_empty) {
                struct ibv_comp_channel *channel;
-               channel = hca_ptr->ib_trans.ib_cq_empty->channel;
-               ibv_destroy_cq(hca_ptr->ib_trans.ib_cq_empty);
+               channel = hca_ptr->ib_trans.ib_cq_empty->ib_cq->channel;
+               ibv_destroy_cq(hca_ptr->ib_trans.ib_cq_empty->ib_cq);
                ibv_destroy_comp_channel(channel);
        }
 
index 4e6c527a776c14ab41192dc531abcbb1a090308d..f6f53a61fd8fa29a4a9136aceb56ce804594cb93 100644 (file)
@@ -756,8 +756,8 @@ dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep)
 
                /* IB info in network order */
                cm->msg.sqpn = htonl(hca->ib_trans.qp->qp_num); /* ucm */
-               cm->msg.saddr.ib.qpn = htonl(ep->qp_handle->qp_num); /* ep */
-               cm->msg.saddr.ib.qp_type = ep->qp_handle->qp_type;
+               cm->msg.saddr.ib.qpn = htonl(ep->qp_handle->qp->qp_num); /* ep */
+               cm->msg.saddr.ib.qp_type = ep->qp_handle->qp->qp_type;
                 cm->msg.saddr.ib.lid = hca->ib_trans.addr.ib.lid; 
                dapl_os_memcpy(&cm->msg.saddr.ib.gid[0], 
                               &hca->ib_trans.addr.ib.gid, 16);
@@ -893,8 +893,8 @@ DAT_RETURN dapli_cm_disconnect(dp_ib_cm_handle_t cm)
        switch (cm->state) {
        case DCM_CONNECTED:
                /* CONSUMER: move to err state to flush, if not UD */
-               if (cm->ep->qp_handle->qp_type != IBV_QPT_UD) 
-                       dapls_modify_qp_state(cm->ep->qp_handle, IBV_QPS_ERR,0,0,0);
+               if (cm->ep->qp_handle->qp->qp_type != IBV_QPT_UD)
+                       dapls_modify_qp_state(cm->ep->qp_handle->qp, IBV_QPS_ERR,0,0,0);
 
                /* send DREQ, event after DREP or DREQ timeout */
                cm->state = DCM_DISC_PENDING;
@@ -922,8 +922,8 @@ DAT_RETURN dapli_cm_disconnect(dp_ib_cm_handle_t cm)
                break;
        case DCM_DISC_RECV:
                /* CM_THREAD: move to err state to flush, if not UD */
-               if (cm->ep->qp_handle->qp_type != IBV_QPT_UD) 
-                       dapls_modify_qp_state(cm->ep->qp_handle, IBV_QPS_ERR,0,0,0);
+               if (cm->ep->qp_handle->qp->qp_type != IBV_QPT_UD)
+                       dapls_modify_qp_state(cm->ep->qp_handle->qp, IBV_QPS_ERR,0,0,0);
 
                /* DREQ received, send DREP and schedule event, finalize */
                cm->msg.op = htons(DCM_DREP);
@@ -1026,7 +1026,7 @@ dapli_cm_connect(DAPL_EP *ep, dp_ib_cm_handle_t cm)
        }
        dapl_os_unlock(&cm->lock);
        DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)),
-                 ep->qp_handle->qp_type == IBV_QPT_UD ? DCNT_IA_CM_AH_REQ_TX : DCNT_IA_CM_REQ_TX);
+                 ep->qp_handle->qp->qp_type == IBV_QPT_UD ? DCNT_IA_CM_AH_REQ_TX : DCNT_IA_CM_REQ_TX);
 
        return DAT_SUCCESS;
 
@@ -1143,7 +1143,7 @@ static void ucm_connect_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg)
 
        /* modify QP to RTR and then to RTS with remote info */
        dapl_os_lock(&cm->ep->header.lock);
-       if (dapls_modify_qp_state(cm->ep->qp_handle,
+       if (dapls_modify_qp_state(cm->ep->qp_handle->qp,
                                  IBV_QPS_RTR, 
                                  cm->msg.daddr.ib.qpn,
                                  cm->msg.daddr.ib.lid,
@@ -1156,7 +1156,7 @@ static void ucm_connect_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg)
                event = IB_CME_LOCAL_FAILURE;
                goto bail;
        }
-       if (dapls_modify_qp_state(cm->ep->qp_handle,
+       if (dapls_modify_qp_state(cm->ep->qp_handle->qp,
                                  IBV_QPS_RTS, 
                                  cm->msg.daddr.ib.qpn,
                                  cm->msg.daddr.ib.lid,
@@ -1197,8 +1197,8 @@ ud_bail:
                xevent.type = DAT_IB_UD_REMOTE_AH;
                xevent.remote_ah.qpn = ntohl(cm->msg.daddr.ib.qpn);
                xevent.remote_ah.ah = dapls_create_ah(cm->hca, 
-                                                     cm->ep->qp_handle->pd, 
-                                                     cm->ep->qp_handle
+                                                     cm->ep->qp_handle->qp->pd,
+                                                     cm->ep->qp_handle->qp,
                                                      htons(lid), 
                                                      NULL);
                if (xevent.remote_ah.ah == NULL) {
@@ -1388,8 +1388,8 @@ static void ucm_accept_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg)
                xevent.type = DAT_IB_UD_PASSIVE_REMOTE_AH;
                xevent.remote_ah.qpn = ntohl(cm->msg.daddr.ib.qpn);
                xevent.remote_ah.ah = dapls_create_ah(cm->hca, 
-                                                     cm->ep->qp_handle->pd, 
-                                                     cm->ep->qp_handle
+                                                     cm->ep->qp_handle->qp->pd,
+                                                     cm->ep->qp_handle->qp,
                                                      htons(lid), 
                                                      NULL);
                if (xevent.remote_ah.ah == NULL) {
@@ -1405,8 +1405,7 @@ static void ucm_accept_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg)
                                sizeof(union dcm_addr));
 
                /* remote ia_addr reference includes ucm qpn, not IB qpn */
-               ((union dcm_addr*)
-                       &xevent.remote_ah.ia_addr)->ib.qpn = cm->msg.dqpn;
+               ((union dcm_addr*)&xevent.remote_ah.ia_addr)->ib.qpn = cm->msg.dqpn;
 
                dapl_dbg_log(DAPL_DBG_TYPE_EP,
                             " PASSIVE: UD xevent ah %p qpn %x lid %x\n",
@@ -1550,7 +1549,7 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data)
                         cm->ref_count,
                         htons(cm->hca->ib_trans.addr.ib.lid),
                         htons(cm->msg.sport),
-                        htonl(ep->qp_handle->qp_num),
+                        htonl(ep->qp_handle->qp->qp_num),
                         htons(cm->msg.daddr.ib.lid),
                         htons(cm->msg.dport),
                         htonl(cm->msg.daddr.ib.qpn),
@@ -1577,7 +1576,7 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data)
 
 #ifdef DAT_EXTENSIONS
        if (cm->msg.daddr.ib.qp_type == IBV_QPT_UD &&
-           ep->qp_handle->qp_type != IBV_QPT_UD) {
+           ep->qp_handle->qp->qp_type != IBV_QPT_UD) {
                dapl_log(DAPL_DBG_TYPE_ERR,
                             " ACCEPT_USR: ERR remote QP is UD,"
                             ", but local QP is not\n");
@@ -1593,7 +1592,7 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data)
 
        /* modify QP to RTR and then to RTS with remote info already read */
        dapl_os_lock(&ep->header.lock);
-       if (dapls_modify_qp_state(ep->qp_handle,
+       if (dapls_modify_qp_state(ep->qp_handle->qp,
                                  IBV_QPS_RTR, 
                                  cm->msg.daddr.ib.qpn,
                                  cm->msg.daddr.ib.lid,
@@ -1605,7 +1604,7 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data)
                dapl_os_unlock(&ep->header.lock);
                goto bail;
        }
-       if (dapls_modify_qp_state(ep->qp_handle,
+       if (dapls_modify_qp_state(ep->qp_handle->qp,
                                  IBV_QPS_RTS, 
                                  cm->msg.daddr.ib.qpn,
                                  cm->msg.daddr.ib.lid,
@@ -1626,8 +1625,8 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data)
        /* setup local QP info and type from EP, copy pdata, for reply */
        cm->msg.op = htons(DCM_REP);
        cm->msg.rd_in = ep->param.ep_attr.max_rdma_read_in;
-       cm->msg.saddr.ib.qpn = htonl(ep->qp_handle->qp_num);
-       cm->msg.saddr.ib.qp_type = ep->qp_handle->qp_type;
+       cm->msg.saddr.ib.qpn = htonl(ep->qp_handle->qp->qp_num);
+       cm->msg.saddr.ib.qp_type = ep->qp_handle->qp->qp_type;
        cm->msg.saddr.ib.lid = cm->hca->ib_trans.addr.ib.lid; 
        dapl_os_memcpy(&cm->msg.saddr.ib.gid[0],
                       &cm->hca->ib_trans.addr.ib.gid, 16); 
index a07d886a498e990200af2d10051b62b0fcdeed81..dfb31e0a689c397cb7a43c2bcbdcd7621ea656cc 100644 (file)
@@ -32,6 +32,7 @@
 #include "dapl_osd.h"
 
 #include <stdlib.h>
+#include <arpa/inet.h>
 
 #ifdef DAT_IB_COLLECTIVES
 #include <collectives/ib_collectives.h>
@@ -197,6 +198,7 @@ DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, IN DAPL_HCA * hca_ptr)
        struct ibv_port_attr port_attr;
        int i;
        DAT_RETURN dat_status;
+       char gid_str[INET6_ADDRSTRLEN];
 
        /* Get list of all IB devices, find match, open */
        dev_list = ibv_get_device_list(NULL);
@@ -332,15 +334,12 @@ found:
                     hca_ptr->port_num, 
                     inet_ntoa(((struct sockaddr_in *)
                               &hca_ptr->hca_address)->sin_addr));
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " open_hca: QPN 0x%x LID 0x%x GID Subnet 0x" F64x ""
-                    " ID 0x" F64x "\n", 
+       dapl_log(DAPL_DBG_TYPE_UTIL,
+                    " open_hca: QPN 0x%x LID 0x%x GID %s\n",
                     ntohl(hca_ptr->ib_trans.addr.ib.qpn),
                     ntohs(hca_ptr->ib_trans.addr.ib.lid), 
-                    (unsigned long long)
-                    ntohll(*(uint64_t*)&hca_ptr->ib_trans.addr.ib.gid[0]),
-                    (unsigned long long)
-                    ntohll(*(uint64_t*)&hca_ptr->ib_trans.addr.ib.gid[8]));
+                    inet_ntop(AF_INET6, hca_ptr->ib_trans.addr.ib.gid,
+                              gid_str, sizeof(gid_str)));
 
        /* save LID, GID, QPN, PORT address information, for ia_queries */
        /* Set AF_INET6 to insure callee address storage of 28 bytes */
@@ -419,8 +418,8 @@ DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA * hca_ptr)
 
        if (hca_ptr->ib_trans.ib_cq_empty) {
                struct ibv_comp_channel *channel;
-               channel = hca_ptr->ib_trans.ib_cq_empty->channel;
-               ibv_destroy_cq(hca_ptr->ib_trans.ib_cq_empty);
+               channel = hca_ptr->ib_trans.ib_cq_empty->ib_cq->channel;
+               ibv_destroy_cq(hca_ptr->ib_trans.ib_cq_empty->ib_cq);
                ibv_destroy_comp_channel(channel);
        }
 
@@ -603,7 +602,7 @@ void ucm_async_event(struct dapl_hca *hca)
                switch (event.event_type) {
                case IBV_EVENT_CQ_ERR:
                {
-                       struct dapl_ep *evd_ptr =
+                       struct dapl_evd *evd_ptr =
                                event.element.cq->cq_context;
 
                        dapl_log(DAPL_DBG_TYPE_ERR,
@@ -613,7 +612,7 @@ void ucm_async_event(struct dapl_hca *hca)
                        /* report up if async callback still setup */
                        if (tp->async_cq_error)
                                tp->async_cq_error(hca->ib_hca_handle,
-                                                  event.element.cq,
+                                                  evd_ptr->ib_cq_handle,
                                                   &event, (void *)evd_ptr);
                        break;
                }
index b8f5646a47b1ef67ea3d0321a329cd23de3ffae4..873bf6d0c61837b13bd047f23363f316fe429f65 100644 (file)
 #include <getopt.h>
 #include <fcntl.h>
 #include <scif.h>
+#include <infiniband/verbs.h>
 #include "dat2/udat.h"
 #include "dat2/dat_mic_extensions.h"
 
+#define min(a, b) ((a < b) ? (a) : (b))
+#define max(a, b) ((a > b) ? (a) : (b))
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define htonll(x) (x)
+#define ntohll(x) (x)
+#elif __BYTE_ORDER == __LITTLE_ENDIAN
+#define htonll(x)  bswap_64(x)
+#define ntohll(x)  bswap_64(x)
+#endif
+
 /*
  * Service options - set through mpxyd.conf file.
  */
@@ -57,13 +69,12 @@ static char *opts_file = MPXYD_CONF;
 static char log_file[128] = "stdout";
 static int log_level = 0;
 static char lock_file[128] = "/var/run/mpxyd.pid";
-static char scif_dev[32] = "scif";
-static short scif_sport = SCIF_OFED_PORT_7;
-static scif_epd_t scif_ep;
+static short scif_sport = SCIF_OFED_PORT_8;
+static scif_epd_t scif_listen_ep;
 static struct scif_portID scif_id;
 
 /* scif-rdma cmd and data channel parameters */
-static int mix_buffer_mb = 64;
+static int mix_buffer_mb = 4;
 static int mix_buffer_sg = 128 * 1024;
 static int mix_cmd_depth = 50;
 static int mix_cmd_size = 256;
@@ -72,22 +83,25 @@ static int mix_cmd_size = 256;
 static int mcm_depth = 500;
 static int mcm_size = 256;
 static int mcm_signal = 100;
-static int mcm_retry_cnt = 10;
+static int mcm_retry = 10;
 static int mcm_rep_ms = 800;
 static int mcm_rtu_ms = 400;
 
 static FILE *logfile;
-static pthread_t mpxy_thread;
 static pthread_mutex_t flock;
 
-/* lists, fds, etc  */
-static struct llist_entry
+/* lists, fds, etc., include tid for lists  */
+typedef struct _llist_entry
 {
-    struct llist_entry *next;
-    struct llist_entry *prev;
+    struct _llist_entry        *next;
+    struct _llist_entry        *prev;
+    struct _llist_entry        *head;
     void               *data;
-};
+    uint32_t           tid;
+
+} LLIST_ENTRY;
 
+#define MCM_PORT_SPACE 0xffff
 #define MCM_FD_SETSIZE 1024
 struct mcm_fd_set {
        int index;
@@ -95,48 +109,157 @@ struct mcm_fd_set {
 };
 
 /* IB verbs device lists */
-static struct ibv_device **iblist;
-static struct llist_entry mcm_llist;
+static LLIST_ENTRY mcm_list;
 static pthread_mutex_t mcm_llock;
 
+typedef enum mcm_state
+{
+       MCM_INIT,
+       MCM_LISTEN,
+       MCM_CONN_PENDING,
+       MCM_REP_PENDING,
+       MCM_ACCEPTING,
+       MCM_ACCEPTING_DATA,
+       MCM_ACCEPTED,
+       MCM_REJECTING,
+       MCM_REJECTED,
+       MCM_CONNECTED,
+       MCM_RELEASE,
+       MCM_DISC_PENDING,
+       MCM_DISCONNECTED,
+       MCM_DESTROY,
+       MCM_RTU_PENDING,
+       MCM_DISC_RECV,
+       MCM_FREE,
+
+} MCM_STATE;
+
 /* Support for IB devices - One service per device: UD QP for fabric CM services */
-static struct mcm_ib_dev {
-       DLIST_ENTRY             entry;
-       DLIST_ENTRY             mix_list; /* MIC client open instances */
-       pthread_mutex_t         mix_lock;
+typedef struct mcm_ib_dev {
+       LLIST_ENTRY             entry;
+       LLIST_ENTRY             smd_list;       /* MIC client open instances */
+       pthread_mutex_t         slock;          /* SCIF client device lock */
+       pthread_mutex_t         plock;          /* port space lock */
        /* MCM - IB Device Resources */
-       ibv_context             *ib_dev;
-       uint16_t                port;
+       struct ibv_device       *ibdev;
+       struct ibv_context      *ibctx;
+       int                     ref_count;
+       char                    name[IBV_SYSFS_NAME_MAX];
+       uint16_t                port;           /* IB device port */
        struct ibv_pd           *pd;
        struct ibv_cq           *scq;
        struct ibv_cq           *rcq;
        struct ibv_qp           *qp;
        struct ibv_mr           *mr_rbuf;
        struct ibv_mr           *mr_sbuf;
-       ib_cm_msg_t             *sbuf;
-       ib_cm_msg_t             *rbuf;
        struct ibv_comp_channel *rch;
        struct ibv_ah           **ah;
-       union dat_mcm_addr      addr;
+       dat_mcm_msg_t           *sbuf;
+       dat_mcm_msg_t           *rbuf;
+       uint64_t                *ports; /* SCIF device open clients, cm_id*/
+       dat_mcm_addr_t          addr;
        uint16_t                lid;
-       uint8_t                 sl;
-       uint16_t                pkey;
-       int                     pkey_idx;
-};
-
-/* per MIC MCM client open, SCIF device: TODO share message resources across clients? */
-static struct mcm_scif_dev {
-       struct list_entry       entry;
+       dat_mix_dev_attr_t      dev_attr; /* provided with mix_open */
+       int                     s_hd;
+       int                     s_tl;
+       int                     cqe;
+       int                     qpe;
+       int                     signal;
+       int                     retries;
+       int                     cm_timer;
+       int                     rep_time;
+       int                     rtu_time;
+
+} mcm_ib_dev_t;
+
+/*  DAPL MCM QP object, id in entry */
+typedef struct mcm_qp {
+       LLIST_ENTRY             entry;
+       struct mcm_scif_dev     *smd;
+       struct mcm_cm           *cm;
+       struct ibv_qp           *ib_qp;
+       dat_mix_qp_attr_t       qp_t;
+       dat_mix_qp_attr_t       qp_r;
+
+} mcm_qp_t;
+
+/*  DAPL MCM CQ object, id in entry */
+typedef struct mcm_cq {
+       LLIST_ENTRY             entry;
+       struct mcm_scif_dev     *smd;
+       struct ibv_cq           *ib_cq;
+       struct ibv_comp_channel *ib_ch;
+       uint32_t                cq_len;
+
+} mcm_cq_t;
+
+/*  DAPL MCM MR object, id in entry */
+typedef struct mcm_mr {
+       LLIST_ENTRY             entry;
+       struct mcm_scif_dev     *smd;
+       uint32_t                len;
+       uint32_t                ib_lkey;
+       uint32_t                ib_rkey;
+       off_t                   scif_off;
+
+} mcm_mr_t;
+
+/*  DAPL MCM Connection/Listen object */
+typedef struct mcm_cm {
+       LLIST_ENTRY             entry;
        pthread_mutex_t         lock;
-       struct mcm_ib_dev       *mcm_dev;
-       scif_epd_t              ep;
-       struct scif_portID      peer;
-       off_t                   r_address;
-       off_t                   r_offset;
-       int                     r_len;
-       dat_mix_msg_t           *sbuf;
-       dat_mix_msg_t           *rbuf;
-};
+       struct mcm_ib_dev       *md;    /* mcm_ib_dev parent reference */
+       struct mcm_scif_dev     *smd;   /* mcm_scif_dev parent reference */
+       struct mcm_cm           *l_ep;  /* listen reference, passive */
+       uint16_t                sid;    /* service ID for endpoint */
+       uint64_t                timer;
+        int                    ref_count;
+       int                     state;
+       int                     retries;
+       struct ibv_comp_channel *ib_ch;
+       struct ibv_pd           *pd;
+       struct ibv_cq           *scq;
+       struct ibv_cq           *rcq;
+       struct mcm_qp           *m_qp;  /* pair of QP's, qp_t and qp_r */
+       uint16_t                p_size; /* accept p_data, for retries */
+       uint8_t                 p_data[DAT_MCM_PDATA_SIZE];
+       struct dat_mcm_msg      msg;
+
+} mcm_cm_t;
+
+/* per MIC MCM client open, SCIF device object:
+ *
+ * TODO share message resources across clients?
+ *     or maybe NOT share IB device, create new thread with each SCIF client?
+ */
+typedef struct mcm_scif_dev {
+       LLIST_ENTRY             entry;
+       LLIST_ENTRY             clist;          /* LISTS: cm list */
+       LLIST_ENTRY             llist;          /* listen list */
+       LLIST_ENTRY             qplist;         /* qp list */
+       LLIST_ENTRY             cqlist;         /* cq list */
+       LLIST_ENTRY             mrlist;         /* mr list */
+       pthread_mutex_t         clock;          /* LOCKS: cm lock */
+       pthread_mutex_t         llock;          /* listen lock */
+       pthread_mutex_t         plock;          /* port space lock */
+       pthread_mutex_t         qplock;         /* qp lock */
+       pthread_mutex_t         cqlock;         /* cq lock */
+       pthread_mutex_t         mrlock;         /* mr lock */
+       int                     ref_count;      /* references */
+       struct mcm_ib_dev       *md;            /* mcm_ib_dev, parent */
+       uint16_t                cm_id;          /* port ID MIC client, md->ports */
+       uint64_t                *ports;         /* EP port space MIC client */
+       scif_epd_t              scif_ep;        /* SCIF EP, MIX device operations */
+       scif_epd_t              scif_cm_ep;     /* SCIF CM EP, MIX device CM messages */
+       struct scif_portID      peer;           /* SCIF EP peer, MIC adapter */
+       struct scif_portID      peer_cm;        /* SCIF CM EP peer, MIC adapter */
+       char                    *m_buf;         /* MIC proxy buffer, SCIF and IB  */
+       struct ibv_mr           *m_mr;          /* ib registration */
+       off_t                   m_offset;       /* SCIF registration */
+       int                     m_len;          /* buffer size */
+       int                     m_seg;          /* segment size */
+
+} mcm_scif_dev_t;
 
 #define mlog(level, format, ...) \
        mpxy_write(level, "%s: "format, __func__, ## __VA_ARGS__)
@@ -158,54 +281,66 @@ static void mpxy_write(int level, const char *format, ...)
        pthread_mutex_unlock(&flock);
        va_end(args);
 }
-/* link list  helper resources */
-static void init_list(struct llist_entry *head)
+
+/* link list helper resources */
+static void init_list(LLIST_ENTRY *head)
 {
         head->next = head;
         head->prev = head;
         head->data = NULL;
+        head->tid = 0;
 }
 
-static int list_empty(struct llist_entry *head)
+static int list_empty(LLIST_ENTRY *head)
 {
         return head->next == head;
 }
 
-static void *get_head_entry(struct llist_entry *head)
+static void *get_head_entry(LLIST_ENTRY *head)
 {
        if (list_empty(head))
                return NULL;
        else
-               return head->data;
+               return head->next->data;
 }
 
-static void *get_next_entry(struct llist_entry *entry, struct lllist_entry *head)
+static void *get_next_entry(LLIST_ENTRY *entry, LLIST_ENTRY *head)
 {
        if (entry->next == head)
                return NULL;
        else
-               return entry->data;
+               return entry->next->data;
 }
 
-static void insert_head(struct llist_entry *entry, struct llist_entry *head, void *data)
+static void insert_head(LLIST_ENTRY *entry, LLIST_ENTRY *head, void *data)
 {
+       head->tid++;    /* each insertion gets unique ID */
+       entry->tid = head->tid;
        entry->next = head->next;
        entry->prev = head;
        entry->data = data;
-        head->next->Prev = entry;
-        head->next = entry;
+       head->next->prev = entry;
+       head->next = entry;
 }
 
-static void insert_tail(struct llist_entry *entry, struct llist_entry *head, void *data)
+static void insert_tail(LLIST_ENTRY *entry, LLIST_ENTRY *head, void *data)
 {
-        insert_head(entry, head->prev, data);
+       head->tid++;    /* each insertion gets unique ID */
+       entry->tid = head->tid;
+       entry->data = data;
+       entry->next = head->prev->next;
+       entry->prev = head->prev;
+       head->prev->next = entry;
+       head->prev = entry;
+
 }
 
-static void remove_entry(struct llist_entry *entry)
+static void remove_entry(LLIST_ENTRY *entry)
 {
-        entry->prev->next = entry->Next;
-        entry->next->prev = entry->Prev;
+        entry->prev->next = entry->next;
+        entry->next->prev = entry->prev;
         entry->data = NULL;
+        entry->tid = 0;
 }
 
 /* FD helper resources */
@@ -219,8 +354,7 @@ static void mcm_fd_zero(struct mcm_fd_set *set)
        set->index = 0;
 }
 
-static int mcm_fd_set(int fd, struct dapl_fd_set *set,
-                      enum DAPL_FD_EVENTS event)
+static int mcm_fd_set(int fd, struct mcm_fd_set *set, int event)
 {
        if (set->index == MCM_FD_SETSIZE - 1) {
                mlog(0," mcm exceeded FD_SETSIZE %d\n", set->index + 1);
@@ -237,11 +371,11 @@ static int mcm_poll(int fd, int event)
        struct pollfd fds;
        int ret;
 
-       fds.fd = s;
+       fds.fd = fd;
        fds.events = event;
        fds.revents = 0;
        ret = poll(&fds, 1, 0);
-       mlog(0, " poll: fd=%d ret=%d, event=0x%x\n", s, ret, fds.revents);
+       mlog(0, " poll: fd=%d ret=%d, event=0x%x\n", fd, ret, fds.revents);
        if (ret == 0)
                return 0;
        else if (fds.revents & (POLLERR | POLLHUP | POLLNVAL))
@@ -257,9 +391,86 @@ static int mcm_select(struct mcm_fd_set *set, int time_ms)
        mlog(1, " select: sleep, fds=%d\n", set->index);
        ret = poll(set->set, set->index, time_ms);
        mlog(1, " select: wakeup, ret=0x%x\n", ret);
+
        return ret;
 }
 
+/* MCM 16-bit port space */
+static uint16_t mcm_get_port(uint64_t *p_port, uint16_t port, uint64_t ctx)
+{
+       int i = 0;
+
+       /* get specific port */
+       if (port) {
+               if (p_port[port] == 0) {
+                       p_port[port] = ctx;
+                       i = port;
+               }
+               goto done;
+       }
+
+       /* get first free port */
+       for (i = MCM_PORT_SPACE; i > 0; i--) {
+               if (p_port[i] == 0) {
+                       p_port[i] = ctx;
+                       break;
+               }
+       }
+done:
+       return i;
+}
+
+static void mcm_free_port(uint64_t *p_port, uint16_t port)
+{
+       p_port[port] = 0;
+}
+
+static uint64_t mcm_get_port_ctx(uint64_t *p_port, uint16_t port)
+{
+       return p_port[port];
+}
+
+/* operation, state  strings  */
+static char * mcm_op_str(IN int op)
+{
+       static char *ops[] = {
+               "INVALID",
+               "REQ",
+               "REP",
+               "REJ_USER",
+               "REJ_CM",
+               "RTU",
+               "DREQ",
+               "DREP",
+       };
+       return ((op < 1 || op > 7) ? "Invalid OP?" : ops[op]);
+}
+
+static char * mcm_state_str(IN int st)
+{
+       static char *state[] = {
+               "CM_INIT",
+               "CM_LISTEN",
+               "CM_CONN_PENDING",
+               "CM_REP_PENDING",
+               "CM_ACCEPTING",
+               "CM_ACCEPTING_DATA",
+               "CM_ACCEPTED",
+               "CM_REJECTING",
+               "CM_REJECTED",
+               "CM_CONNECTED",
+               "CM_RELEASE",
+               "CM_DISC_PENDING",
+               "CM_DISCONNECTED",
+               "CM_DESTROY",
+               "CM_RTU_PENDING",
+               "CM_DISC_RECV",
+               "CM_FREE"
+        };
+        return ((st < 0 || st > 16) ? "Invalid CM state?" : state[st]);
+}
+
+
 static FILE *mpxy_open_log(void)
 {
        FILE *f;
@@ -298,16 +509,14 @@ static void mpxy_set_options(void)
                        log_level = atoi(value);
                else if (!strcasecmp("lock_file", opt))
                        strcpy(lock_file, value);
-               else if (!strcasecmp("rdma_buffer_kb", opt))
-                       rdma_buffer_size = atoi(value);
-               else if (!strcasecmp("cm_msg_depth", opt))
-                       cm_msg_depth = atoi(value);
+               else if (!strcasecmp("buffer_pool_mb", opt))
+                       mix_buffer_mb = atoi(value);
+               else if (!strcasecmp("mcm_depth", opt))
+                       mcm_depth = atoi(value);
                else if (!strcasecmp("scif_port_id", opt))
                        scif_sport = (short) atoi(value);
-               else if (!strcasecmp("tx_depth", opt))
-                       tx_depth = atoi(value);
-               else if (!strcasecmp("tx_signal_rate", opt))
-                       tx_signal = atoi(value);
+               else if (!strcasecmp("mcm_signal_rate", opt))
+                       mcm_signal = atoi(value);
        }
 
        fclose(f);
@@ -318,10 +527,9 @@ static void mpxy_log_options(void)
        mlog(0, "log level %d\n", log_level);
        mlog(0, "lock file %s\n", lock_file);
        mlog(0, "SCIF server_port %d\n", scif_sport);
-       mlog(0, "rdma buffer pool size %d\n", buffer_pool_mb);
-       mlog(0, "transmit queue depth %d\n", tx_depth);
-       mlog(0, "transmit completion signal rate %d\n", tx_signal);
-       mlog(0, "uDAPL provider/device - %s\n", dapl_dev);
+       mlog(0, "rdma buffer pool size %d\n", mix_buffer_mb);
+       mlog(0, "mcm msg queue depth %d\n", mcm_depth);
+       mlog(0, "mcm msg completion signal rate %d\n", mcm_signal);
 }
 
 static int mpxy_open_lock_file(void)
@@ -380,26 +588,26 @@ static int init_scif()
                return -1;
        }
 
-       scif_ep = scif_open();
-       if (scif_ep < 0) {
+       scif_listen_ep = scif_open();
+       if (scif_listen_ep < 0) {
                mlog(0, "scif_open() failed with error %d\n", errno);
                return -1;
        }
-       mlog(1,"Opened SCIF endpoint for listening\n");
+       mlog(1,"Opened SCIF endpoint for OPERATIONS listening, ep = %d\n", scif_listen_ep);
 
-       ret = scif_bind(scif_ep, scif_sport);
+       ret = scif_bind(scif_listen_ep, scif_sport);
        if (ret < 0) {
-               fprintf(stderr, "scif_bind() failed with error %d\n", errno);
-               scif_close(scif_ep);
+               mlog(0, "scif_bind() to %d failed with error %s\n", scif_sport, strerror(errno));
+               scif_close(scif_listen_ep);
                return -1;
        }
-       scif_id.port = ret;
-       mlog(1,"Bind to reserved SCIF OFED port %d\n", (uint16_t)scif_id.port);
 
-       ret = scif_listen(scif_ep, 5);
+       scif_id.port = ret;
+       mlog(1,"Bound to reserved SCIF OFED port %d\n", (uint16_t)scif_id.port);
+       ret = scif_listen(scif_listen_ep, 5);
        if (ret < 0) {
                mlog(0, "scif_listen() failed with error %d\n", errno);
-               scif_close(scif_ep);
+               scif_close(scif_listen_ep);
                return -1;
        }
 
@@ -408,7 +616,13 @@ static int init_scif()
 
 static void close_scif()
 {
-       scif_close(scif_ep);
+       scif_close(scif_listen_ep);
+}
+
+static void close_ib()
+{
+       /* any cleanup ??, server thread should do the work */
+       return;
 }
 
 static int config_fd(int fd)
@@ -424,17 +638,58 @@ static int config_fd(int fd)
        return 0;
 }
 
+/* Create address handle for remote QP, info in network order */
+static struct ibv_ah *mcm_create_ah(mcm_ib_dev_t *md,
+                                   struct ibv_pd *pd,
+                                   struct ibv_qp *qp,
+                                   uint16_t lid,
+                                   union ibv_gid *gid)
+{
+       struct ibv_qp_attr qp_attr;
+       struct ibv_ah *ah;
+
+       memset((void *)&qp_attr, 0, sizeof(qp_attr));
+       qp_attr.qp_state = IBV_QP_STATE;
+       qp_attr.ah_attr.dlid = lid;
+       if (gid != NULL) {
+               mlog(2, "create_ah: with GID\n");
+               qp_attr.ah_attr.is_global = 1;
+               qp_attr.ah_attr.grh.dgid.global.subnet_prefix =
+                               ntohll(gid->global.subnet_prefix);
+               qp_attr.ah_attr.grh.dgid.global.interface_id =
+                               ntohll(gid->global.interface_id);
+               qp_attr.ah_attr.grh.hop_limit = md->dev_attr.hop_limit;
+               qp_attr.ah_attr.grh.traffic_class = md->dev_attr.tclass;
+       }
+       qp_attr.ah_attr.sl = md->dev_attr.sl;
+       qp_attr.ah_attr.src_path_bits = 0;
+       qp_attr.ah_attr.port_num = md->port;
+
+       mlog(2, "create_ah: port %x lid %x pd %p ctx %p handle 0x%x\n",
+               md->port, qp_attr.ah_attr.dlid, pd, pd->context, pd->handle);
+
+       /* UD: create AH for remote side */
+       ah = ibv_create_ah(pd, &qp_attr.ah_attr);
+       if (!ah) {
+               mlog(0, " create_ah: ERR %s\n", strerror(errno));
+               return NULL;
+       }
+
+       mlog(2, "create_ah: AH %p for lid %x\n", ah, qp_attr.ah_attr.dlid);
+       return ah;
+}
+
 /* Modify UD-QP from init, rtr, rts, info network order */
-static int modify_ud_qp(struct mcm_dev md, struct ibv_qp qp)
+static int modify_ud_qp(mcm_ib_dev_t *md, struct ibv_qp *qp)
 {
        struct ibv_qp_attr qp_attr;
 
        /* modify QP, setup and prepost buffers */
-       dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
+       memset((void *)&qp_attr, 0, sizeof(qp_attr));
        qp_attr.qp_state = IBV_QPS_INIT;
-        qp_attr.pkey_index = md->pkey_idx;
+        qp_attr.pkey_index = md->dev_attr.pkey_idx;
         qp_attr.port_num = md->port;
-        qp_attr.qkey = DAT_UD_QKEY;
+        qp_attr.qkey = DAT_MCM_UD_QKEY;
        if (ibv_modify_qp(qp, &qp_attr,
                          IBV_QP_STATE          |
                          IBV_QP_PKEY_INDEX     |
@@ -443,13 +698,13 @@ static int modify_ud_qp(struct mcm_dev md, struct ibv_qp qp)
                mlog(0, " modify_ud_qp INIT: ERR %s\n", strerror(errno));
                return 1;
        }
-       dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
+       memset((void *)&qp_attr, 0, sizeof(qp_attr));
        qp_attr.qp_state = IBV_QPS_RTR;
-       if (ibv_modify_qp(qp, &qp_attr,IBV_QP_STATE)) {
+       if (ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE)) {
                mlog(0, " modify_ud_qp RTR: ERR %s\n", strerror(errno));
                return 1;
        }
-       dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
+       memset((void *)&qp_attr, 0, sizeof(qp_attr));
        qp_attr.qp_state = IBV_QPS_RTS;
        qp_attr.sq_psn = 1;
        if (ibv_modify_qp(qp, &qp_attr,
@@ -460,70 +715,310 @@ static int modify_ud_qp(struct mcm_dev md, struct ibv_qp qp)
        return 0;
 }
 
-static int init_ib()
+static int modify_qp(struct ibv_qp     *qp_handle,
+                    enum ibv_qp_state  qp_state,
+                    uint32_t           qpn,
+                    uint16_t           lid,
+                    union ibv_gid      *gid)
+{
+       struct ibv_qp_attr qp_attr;
+       enum ibv_qp_attr_mask mask = IBV_QP_STATE;
+       mcm_qp_t *m_qp = (mcm_qp_t *)qp_handle->qp_context;
+       int ret;
+
+       memset((void *)&qp_attr, 0, sizeof(qp_attr));
+       qp_attr.qp_state = qp_state;
+
+       switch (qp_state) {
+       case IBV_QPS_RTR:
+               mlog(1, " QPS_RTR: type %d qpn 0x%x gid %p (%d) lid 0x%x"
+                       " port %d ep %p qp_state %d \n",
+                       qp_handle->qp_type, ntohl(qpn), gid,
+                       m_qp->smd->md->dev_attr.global,
+                       ntohs(lid), m_qp->smd->md->port,
+                       m_qp, m_qp->qp_t.cur_state);
+
+               mask |= IBV_QP_AV |
+                       IBV_QP_PATH_MTU |
+                       IBV_QP_DEST_QPN |
+                       IBV_QP_RQ_PSN |
+                       IBV_QP_MIN_RNR_TIMER;
+
+               qp_attr.dest_qp_num = ntohl(qpn);
+               qp_attr.rq_psn = 1;
+               qp_attr.path_mtu = m_qp->smd->md->dev_attr.mtu;
+               qp_attr.max_dest_rd_atomic = 0;
+               qp_attr.min_rnr_timer = m_qp->smd->md->dev_attr.rnr_timer;
+
+               /* address handle. RC and UD */
+               qp_attr.ah_attr.dlid = ntohs(lid);
+               qp_attr.ah_attr.sl = m_qp->smd->md->dev_attr.sl;
+               qp_attr.ah_attr.src_path_bits = 0;
+               qp_attr.ah_attr.port_num = m_qp->smd->md->port;
+               break;
+
+       case IBV_QPS_RTS:
+               mask |= IBV_QP_SQ_PSN |
+                       IBV_QP_TIMEOUT |
+                       IBV_QP_RETRY_CNT |
+                       IBV_QP_RNR_RETRY;
+               qp_attr.sq_psn = 1;
+               qp_attr.timeout = m_qp->smd->md->dev_attr.ack_timer;
+               qp_attr.retry_cnt = m_qp->smd->md->dev_attr.ack_retry;
+               qp_attr.rnr_retry = m_qp->smd->md->dev_attr.rnr_retry;
+
+               mlog(1, " QPS_RTS: psn %x rd_atomic %d ack %d "
+                       " retry %d rnr_retry %d m_qp %p qp_state %d\n",
+                       qp_attr.sq_psn, qp_attr.max_rd_atomic,
+                       qp_attr.timeout, qp_attr.retry_cnt,
+                       qp_attr.rnr_retry, m_qp, m_qp->qp_t.cur_state);
+               break;
+
+       case IBV_QPS_INIT:
+               mask |= IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS;
+               qp_attr.qp_access_flags =
+                               IBV_ACCESS_LOCAL_WRITE |
+                               IBV_ACCESS_REMOTE_WRITE |
+                               IBV_ACCESS_REMOTE_READ;
+               qp_attr.pkey_index = m_qp->smd->md->dev_attr.pkey_idx;
+               qp_attr.port_num = m_qp->smd->md->port;
+
+               mlog(1, " QPS_INIT: pi %x port %x acc %x qkey 0x%x\n",
+                       qp_attr.pkey_index, qp_attr.port_num,
+                       qp_attr.qp_access_flags, qp_attr.qkey);
+               break;
+
+       default:
+               break;
+       }
+
+       ret = ibv_modify_qp(qp_handle, &qp_attr, mask);
+       if (ret == 0) {
+               m_qp->qp_t.cur_state = m_qp->qp_t.state = qp_state;
+               return 0;
+       } else {
+               mlog(0, " RTR ERR (%s): type %d qpn 0x%x lid 0x%x"
+                       " port %d state %d mtu %d rd %d rnr %d sl %d\n",
+                       strerror(ret), qp_handle->qp_type, ntohl(qpn),
+                       ntohs(lid), m_qp->smd->md->port,
+                       m_qp->qp_t.cur_state,
+                       qp_attr.path_mtu, qp_attr.max_dest_rd_atomic,
+                       qp_attr.min_rnr_timer, qp_attr.ah_attr.sl);
+       }
+       return ret;
+}
+
+/* MCM Endpoint CM objects */
+void mcm_cm_free(mcm_cm_t *cm)
+{
+       /* client, release local conn id port */
+       if (!cm->l_ep && cm->msg.sport)
+               mcm_free_port(cm->smd->ports, ntohs(cm->msg.sport));
+
+       pthread_mutex_destroy(&cm->lock);
+       free(cm);
+}
+
+mcm_cm_t *mcm_cm_create(mcm_scif_dev_t *smd, mcm_qp_t *m_qp)
 {
+       mcm_cm_t *cm;
+
+       /* Allocate CM, init lock, and initialize */
+       if ((cm = malloc(sizeof(*cm))) == NULL)
+               return NULL;
+       memset(cm, 0, sizeof(*cm));
+
+       init_list(&cm->entry);
+       if (pthread_mutex_init(&cm->lock, NULL))
+               goto bail;
+
+       cm->smd = smd;
+       cm->msg.ver = htons(DAT_MCM_VER);
+       cm->msg.sqpn = htonl(smd->md->qp->qp_num); /* ucm */
+
+       /* ACTIVE: init source address QP info from MPXYD and MIC client */
+       if (m_qp) {
+               cm->msg.sport = htons(mcm_get_port(smd->ports, 0, (uint64_t)smd));
+               if (!cm->msg.sport) {
+                       pthread_mutex_destroy(&cm->lock);
+                       goto bail;
+               }
+               cm->m_qp = m_qp;
+
+               /* MPXYD src IB info in network order, QP snd */
+               cm->msg.saddr.qpn = htonl(cm->m_qp->qp_t.qp_num); /* ep */
+               cm->msg.saddr.qp_type = cm->m_qp->qp_t.qp_type;
+                cm->msg.saddr.lid = smd->md->addr.lid;
+               memcpy(&cm->msg.saddr.gid[0], &smd->md->addr.gid, 16);
+
+               /* MIC src IB info in network order,  QP rcv */
+               cm->msg.saddr2.qpn = htonl(cm->m_qp->qp_r.qp_num); /* ep */
+               cm->msg.saddr2.qp_type = cm->m_qp->qp_r.qp_type;
+                cm->msg.saddr2.lid = smd->md->addr.lid;
+               memcpy(&cm->msg.saddr2.gid[0], &smd->md->addr.gid, 16);
+
+        }
+       return cm;
+bail:
+       free(cm);
+       return NULL;
+}
+
+/* queue up connection object on CM list */
+static void mcm_qconn(mcm_scif_dev_t *smd, mcm_cm_t *cm)
+{
+       /* add to CONN work queue, list, for mcm fabric CM */
+       pthread_mutex_lock(&smd->clock);
+       insert_tail(&cm->entry, &smd->clist, (void *)cm);
+       pthread_mutex_unlock(&smd->clock);
+}
+/* dequeue connection object from CM list */
+static void mcm_dqconn(mcm_scif_dev_t *smd, mcm_cm_t *cm)
+{
+       /* Remove from work queue, cr thread processing */
+       pthread_mutex_lock(&smd->clock);
+       remove_entry(&cm->entry);
+       pthread_mutex_unlock(&smd->clock);
+
+}
+/* queue listen object on listen list */
+static void mcm_qlisten(mcm_scif_dev_t *smd, mcm_cm_t *cm)
+{
+       /* add to LISTEN work queue, list, for mcm fabric CM */
+       pthread_mutex_lock(&smd->llock);
+       insert_tail(&cm->entry, &smd->llist, (void *)cm);
+       pthread_mutex_unlock(&smd->llock);
+}
+/* dequeue listen object from listen list */
+static void mcm_dqlisten(mcm_scif_dev_t *smd, mcm_cm_t *cm)
+{
+       pthread_mutex_lock(&smd->llock);
+       remove_entry(&cm->entry);
+       pthread_mutex_unlock(&smd->llock);
+}
+
+/*
+ * Open IB device
+ */
+static struct ibv_context *open_ib_device(char *name, int port)
+{
+       int i, ibcnt;
+       struct ibv_device **iblist;
+       struct ibv_context *ibctx = NULL;
        struct ibv_port_attr port_attr;
-       int i, num_devices;
 
        /* get list of all IB devices, open 1st IB type by default */
-       iblist = ibv_get_device_list(&num_devices);
+       iblist = ibv_get_device_list(&ibcnt);
        if (!iblist) {
-               mlog(0, " ibv_get_dev_list() failed - %d\n", errno);
-               return 1;
+               mlog(0,"ERR ibv_get_dev_list, %s\n", strerror(errno));
+               return NULL;
        }
 
-       for (i=0; i < num_devices; ++i) {
-               if (iblist[i].transport_type != IBV_TRANSPORT_IB)
-                       continue;
+       for (i=0; i < ibcnt; ++i) {
+               if (!strcmp(iblist[i]->name, name)) {
+                       ibctx = ibv_open_device(iblist[i]);
+                       if (!ibctx) {
+                               mlog(0,"ERR ibv_open, %s\n", strerror(errno));
+                               goto bail;
+                       }
+                       if (ibv_query_port(ibctx, port, &port_attr)) {
+                               mlog(0,"ERR ibv_query, %s\n", strerror(errno));
+                               goto bail;
+                       }
+                       else
+                               break;
+               }
                else {
-                       mlog(1, " opening 1st IB device found - %s\n",
-                            ibv_get_device_name(iblist[i]));
-                       break;
+                       continue;
                }
        }
-       if (i == num_devices) {
-               mlog(1, " no IB devices found, exit\n");
-               ibv_free_device_list(iblist);
-               return 1;
-       }
-
-       return 0;
+bail:
+       ibv_free_device_list(iblist);
+       return ibctx;
 }
 
-static void close_ib()
+static void mcm_destroy(struct mcm_ib_dev *md)
 {
-       ibv_free_device_list(iblist);
+       if (md->mr_sbuf)
+               ibv_dereg_mr(md->mr_sbuf);
+
+       if (md->mr_rbuf)
+               ibv_dereg_mr(md->mr_rbuf);
+
+       if (md->qp)
+               ibv_destroy_qp(md->qp);
+
+       if (md->scq)
+               ibv_destroy_cq(md->scq);
+
+       if (md->rcq)
+               ibv_destroy_cq(md->rcq);
+
+       if (md->rch)
+               ibv_destroy_comp_channel(md->rch);
+
+       if (md->ah) {
+               int i;
+
+               for (i = 0;i < 0xffff; i++) {
+                       if (md->ah[i])
+                               ibv_destroy_ah(md->ah[i]);
+               }
+               free(md->ah);
+       }
+
+       if (md->pd)
+               ibv_dealloc_pd(md->pd);
+
+       if (md->ports)
+               free(md->ports);
+
+       if (md->rbuf)
+               free(md->rbuf);
+
+       if (md->sbuf)
+               free(md->sbuf);
+
        return;
 }
 
-static int init_mcm_service(struct mcm_ib_dev *md)
+static int init_mcm_service(mcm_ib_dev_t *md)
 {
         struct ibv_qp_init_attr qp_create;
        struct ibv_recv_wr recv_wr, *recv_err;
         struct ibv_sge sge;
        int i, mlen = 256; /* overhead for mcm_msg & ibv_grh */
 
-       mlog(1, " create MCM services.. \n");
+       mlog(1, " create MCM services.. %p\n", md);
+
+       /* setup CM msg attributes and timers */
+       md->retries = mcm_retry;
+       md->rep_time = mcm_rep_ms;
+       md->rtu_time = mcm_rtu_ms;
+       md->cm_timer = min(md->rep_time, md->rtu_time);
+       md->qpe = mcm_depth;
+       md->cqe = mcm_depth;
+       md->signal = mcm_signal;
 
        /* setup CM timers and queue sizes */
-       md->pd = ibv_alloc_pd(md->ibdev);
+       md->pd = ibv_alloc_pd(md->ibctx);
         if (!md->pd)
                 goto bail;
 
         mlog(1, " allocated PD\n");
 
-       md->rch = ibv_create_comp_channel(md->ibdev);
+       md->rch = ibv_create_comp_channel(md->ibctx);
        if (!md->rch)
                goto bail;
        config_fd(md->rch->fd);
 
        mlog(1, " allocated rx completion channel\n");
 
-       md->scq = ibv_create_cq(md->ibdev, md->cqe, md, NULL, 0);
+       md->scq = ibv_create_cq(md->ibctx, md->cqe, md, NULL, 0);
        if (!md->scq)
                goto bail;
 
-       md->rcq = ibv_create_cq(md->ibdev, md->cqe, md, md->rch, 0);
+       md->rcq = ibv_create_cq(md->ibctx, md->cqe, md, md->rch, 0);
        if (!md->rcq)
                goto bail;
 
@@ -538,7 +1033,7 @@ static int init_mcm_service(struct mcm_ib_dev *md)
        qp_create.recv_cq = md->rcq;
        qp_create.cap.max_send_wr = qp_create.cap.max_recv_wr = md->qpe;
        qp_create.cap.max_send_sge = qp_create.cap.max_recv_sge = 1;
-       qp_create.cap.max_inline_data = md->max_inline_send;
+       qp_create.cap.max_inline_data = 256; /* best latency for CM messages */
        qp_create.qp_context = (void *)md;
 
        md->qp = ibv_create_qp(md->pd, &qp_create);
@@ -547,18 +1042,18 @@ static int init_mcm_service(struct mcm_ib_dev *md)
 
        mlog(1, " created QP\n");
 
-       md->ah = (ib_ah_handle_t*) malloc(sizeof(ib_ah_handle_t) * 0xffff);
-       md->sid = (uint8_t*) malloc(sizeof(uint8_t) * 0xffff);
-       md->rbuf = (void*) malloc(mlen * md->qpe);
-       md->sbuf = (void*) malloc(mlen * md->qpe);
+       md->ah = (struct ibv_ah **) malloc(sizeof(struct ibv_ah *) * 0xffff);
+       md->ports = (uint64_t*) malloc(sizeof(uint64_t) * 0xffff);
+       md->rbuf = malloc(mlen * md->qpe);
+       md->sbuf = malloc(mlen * md->qpe);
        md->s_hd = md->s_tl = 0;
 
-       if (!md->ah || !md->rbuf || !md->sbuf || !md->sid)
+       if (!md->ah || !md->rbuf || !md->sbuf || !md->ports)
                goto bail;
 
-       (void)memset(md->ah, 0, (sizeof(ib_ah_handle_t) * 0xffff));
-       (void)memset(md->sid, 0, (sizeof(uint8_t) * 0xffff));
-       md->sid[0] = 1; /* resv slot 0, 0 == no ports available */
+       (void)memset(md->ah, 0, (sizeof(struct ibv_ah *) * 0xffff));
+       (void)memset(md->ports, 0, (sizeof(uint64_t) * 0xffff));
+       md->ports[0] = 1; /* resv slot 0, 0 == no ports available */
        (void)memset(md->rbuf, 0, (mlen * md->qpe));
        (void)memset(md->sbuf, 0, (mlen * md->qpe));
 
@@ -569,7 +1064,7 @@ static int init_mcm_service(struct mcm_ib_dev *md)
                goto bail;
 
        md->mr_rbuf = ibv_reg_mr(md->pd, md->rbuf,
-                                ((mlen + hlen) * md->qpe),
+                                (mlen * md->qpe),
                                 IBV_ACCESS_LOCAL_WRITE);
        if (!md->mr_rbuf)
                goto bail;
@@ -584,7 +1079,7 @@ static int init_mcm_service(struct mcm_ib_dev *md)
        recv_wr.next = NULL;
        recv_wr.sg_list = &sge;
        recv_wr.num_sge = 1;
-       sge.length = mlen + hlen;
+       sge.length = mlen;
        sge.lkey = md->mr_rbuf->lkey;
 
        for (i = 0; i < md->qpe; i++) {
@@ -597,7 +1092,7 @@ static int init_mcm_service(struct mcm_ib_dev *md)
        }
 
        /* save qp_num as part of ia_address, network order */
-       md->addr.ib.qpn = htonl(md->qp->qp_num);
+       md->addr.qpn = htonl(md->qp->qp_num);
         return 0;
 
 bail:
@@ -606,21 +1101,135 @@ bail:
        return -1;
 }
 
-/****************  MIX operations ********************************/
+/* destroy SMD, md->slock held */
+static void mcm_destroy_smd(mcm_scif_dev_t *smd)
+{
+       /* free port space, under lock */
+       pthread_mutex_lock(&smd->plock);
+       if (smd->ports) {
+               free(smd->ports);
+               smd->ports = NULL;
+       }
+       pthread_mutex_unlock(&smd->plock);
 
-/* open MCM device, MIC clients via SCIF well known port - SCIF_OFED_PORT_7 */
-static struct scif_mic_dev *open_mcm_device(char *name, int port, scif_epd_t listen_ep)
+       /* TODO: walk all lists and cleanup resouces, right now assume they are gone */
+       if (smd->ref_count) {
+               mlog(1, " WARNING: ref_count not 0, = %d \n", smd->ref_count);
+       }
+       remove_entry(&smd->entry);
+
+       /* destroy all mutex resources */
+       pthread_mutex_destroy(&smd->plock);
+       pthread_mutex_destroy(&smd->clock);
+       pthread_mutex_destroy(&smd->llock);
+       pthread_mutex_destroy(&smd->qplock);
+       pthread_mutex_destroy(&smd->cqlock);
+       pthread_mutex_destroy(&smd->mrlock);
+
+       smd->md = NULL;
+       free(smd);
+}
+
+static mcm_scif_dev_t *mcm_create_smd(mcm_ib_dev_t *md, scif_epd_t op_ep, scif_epd_t cm_ep)
 {
-       int i;
-       struct mcm_ib_dev *md;
-       struct mcm_scif_dev *smd = NULL;
+       mcm_scif_dev_t  *smd = NULL;
+       int ret;
 
+       /* SCIF device object, allocate and init resources, one per MIC client */
+       smd = malloc(sizeof(*smd));
+       if (!smd)
+               goto err;
+       memset(smd, 0, sizeof(*smd));
+       smd->md = md;
+
+       /* RDMA buffers, register with SCIF and IB */
+       smd->m_len = mix_buffer_mb * (1024 * 1024);
+       ret = posix_memalign((void **)&smd->m_buf, 4096, smd->m_len);
+       if (ret)
+               goto err;
+       mlog(1, " Allocate/Register RDMA Proxy buffer %p, ln=%d\n", smd->m_buf, smd->m_len);
+
+       smd->m_offset = scif_register(op_ep, smd->m_buf, smd->m_len,
+                                     (off_t)0, SCIF_PROT_READ | SCIF_PROT_WRITE, 0);
+       if (smd->m_offset == (off_t)(-1)) {
+               mlog(1, " scif_register addr=%p,%d failed %s\n", smd->m_buf, smd->m_len, strerror(errno));
+               goto err;
+       }
+       mlog(1, " SCIF addr=%p, offset=0x%llx, len %d\n", smd->m_buf, smd->m_offset, smd->m_len);
+
+       smd->m_mr = ibv_reg_mr(smd->md->pd, smd->m_buf, smd->m_len,
+                              IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ);
+       if (smd->m_mr == NULL) {
+               mlog(1, " IB addr=%p,%d failed %s\n", smd->m_buf, smd->m_len, strerror(errno));
+               goto err;
+       }
+       mlog(1, " IB registered addr=%p,%d, mr_addr=%p handle=0x%x, lkey=0x%x rkey=0x%x \n",
+            smd->m_buf, smd->m_len, smd->m_mr->addr, smd->m_mr->handle, smd->m_mr->lkey, smd->m_mr->rkey);
+
+       /* SCIF device client port space */
+       smd->ports = (uint64_t*) malloc(sizeof(uint64_t) * 0xffff);
+       if (!smd->ports)
+               goto err;
+       memset(smd->ports, 0, sizeof(uint64_t) * 0xffff);
+
+       pthread_mutex_lock(&md->plock);
+
+       smd->scif_ep = op_ep;
+       smd->scif_cm_ep = cm_ep;
+
+       smd->cm_id = mcm_get_port(md->ports, 0, (uint64_t)smd);
+       pthread_mutex_unlock(&md->plock);
+       if (!smd->cm_id)
+               goto err;
+
+       pthread_mutex_init(&smd->plock, NULL);   /* port space for EP's */
+       pthread_mutex_init(&smd->clock, NULL);   /* connect list */
+       pthread_mutex_init(&smd->llock, NULL);   /* listen list */
+       pthread_mutex_init(&smd->qplock, NULL);  /* qp list */
+       pthread_mutex_init(&smd->cqlock, NULL);  /* cq list */
+       pthread_mutex_init(&smd->mrlock, NULL);  /* cq list */
+
+       init_list(&smd->entry);
+       init_list(&smd->clist);
+       init_list(&smd->llist);
+       init_list(&smd->qplist);
+       init_list(&smd->cqlist);
+       init_list(&smd->mrlist);
+
+       return smd;
+err:
+       if (smd) {
+               if (smd->m_buf) {
+                       if (smd->m_offset)
+                               scif_unregister(smd->scif_ep, smd->m_offset, smd->m_len);
+                       free(smd->m_buf);
+               }
+               if (smd->ports)
+                       free(smd->ports);
+               free(smd);
+       }
+       return NULL;
+}
+
+/*
+ *
+ *   Platform side - MIC Indirect eXchange (MIX) operations, SCIF
+ *
+ */
+
+/* open MCM device, New MIC clients via SCIF listen on well known port, new ep from accept */
+static mcm_scif_dev_t *mix_open_device(char *name, int port, scif_epd_t op_ep, scif_epd_t cm_ep)
+{
+       mcm_ib_dev_t *md;
+       mcm_scif_dev_t *smd = NULL;
+
+       mlog(1, " name - %s, port %d\n", name, port);
        pthread_mutex_lock(&mcm_llock);
-       md = get_list_head(&mcm_llist);
+       md = get_head_entry(&mcm_list);
 
        while (md) {
-               if ((!strcmp(ibv_get_device_name(md->ib_dev, name))
-                   && md->port == port))
+               mlog(1, " md %p -> %s port %d\n", md, md->name, md->port);
+               if (!strcmp(md->name, name) && md->port == port)
                        goto found;
                else
                        md = get_next_entry(&md->entry, &mcm_list);
@@ -629,63 +1238,1021 @@ static struct scif_mic_dev *open_mcm_device(char *name, int port, scif_epd_t lis
        /* no IB device object, allocate and init, one per IB device */
        md = malloc(sizeof(*md));
        if (md == NULL)
-               goto done;
+               goto err;
        memset(md, 0, sizeof(*md));
-       init_list(&md->list);
 
-       if (init_mcm_service(md)) {
+       init_list(&md->entry);
+       init_list(&md->smd_list);
+       pthread_mutex_init(&md->slock, NULL);
+       pthread_mutex_init(&md->plock, NULL);
+       strcpy(md->name, name);
+       md->port = port;
+       md->ibctx = open_ib_device(name, port);
+
+       if ((!md->ibctx) || init_mcm_service(md)) {
                free(md);
-               md = NULL;
-               goto done;
+               goto err;
        }
+
        /* queue on active device list */
-       insert_tail(&md->list, &mcm_llist, md);
+       insert_tail(&md->entry, &mcm_list, md);
 
 found:
-       /* SCIF MIX device object, allocate and init, one per MIC client */
-       smd = malloc(sizeof(*smd));
+       /* create SCIF client device on this IB device */
+       smd = mcm_create_smd(md, op_ep, cm_ep);
        if (!smd)
-               goto done;
-       memset(smd, 0, sizeof(*smd));
-
-       /* Accept new MIX message connection */
-       scif_accept(listen_ep, &smd->peer, &smd->ep, SCIF_ACCEPT_SYNC);
-
-
-       smd->mcm_dev = md;
-       pthread_mutex_init(&smd->lock, NULL);
-       init_list(&smd->entry);
+               goto err;
 
        /* insert on active MIX device list */
-       pthread_mutex_lock(&md->mix_lock);
-       insert_tail(&smd->entry, &md->mix_list, (void *)smd);
-       pthread_mutex_unlock(&md->mix_lock);
-
-done:
+       pthread_mutex_lock(&md->slock);
+       insert_tail(&smd->entry, &md->smd_list, (void *)smd);
+       pthread_mutex_unlock(&md->slock);
+err:
        pthread_mutex_unlock(&mcm_llock);
        return smd;
 }
 
-/* close MCM device, MIC clients via MIX */
-static void close_mcm_device(struct mcm_dev *mdev)
+/* close MCM device, MIC client, md->slock held */
+static void mix_close_device(mcm_ib_dev_t *md, mcm_scif_dev_t *smd)
 {
+       mlog(1, " md %p smd %p\n", md, smd);
+
+       /* close and remove scif MIX client, leave parent mcm_ib_dev open */
+       if (smd->scif_ep) {
+               scif_close(smd->scif_ep);
+               smd->scif_ep = 0;
+       }
+       if (smd->scif_cm_ep) {
+               scif_close(smd->scif_cm_ep);
+               smd->scif_cm_ep = 0;
+       }
+
+       mcm_destroy_smd(smd);
+       mlog(1, " freed smd %p\n", md, smd);
+
        return;
 }
 
-/*  DAPL MCM message */
-static void mcm_rcv_evd(struct mcm_ib_dev *md)
+/* accept SCIF endpoint connect request */
+static void mix_scif_accept(scif_epd_t listen_ep)
 {
-       return;
+       struct scif_portID peer, peer_cm;
+       scif_epd_t op_ep, cm_ep;
+       int ret, len;
+       dat_mix_open_t msg;
+       mcm_scif_dev_t  *smd;
+
+       /* 2 channels created with clients, OP and CM processing */
+       ret = scif_accept(listen_ep, &peer, &op_ep, SCIF_ACCEPT_SYNC);
+       if (ret) {
+               mlog(0, " ERR: scif_accept on OP ep %d, ret = %s\n", listen_ep, strerror(ret));
+               return;
+       }
+       ret = scif_accept(listen_ep, &peer_cm, &cm_ep, SCIF_ACCEPT_SYNC);
+       if (ret) {
+               mlog(0, " ERR: scif_accept on CM ep %d, ret = %s\n", listen_ep, strerror(ret));
+               return;
+       }
+
+       /* connect is followed immediately by MIX open command on OP channel */
+       len = sizeof(msg);
+       ret = scif_recv(op_ep, &msg, len, SCIF_RECV_BLOCK);
+       if (ret != len) {
+               mlog(0, " ERR: rcv on new_ep %d, ret %d, exp %d\n", op_ep, ret, len);
+               return;
+       }
+
+       if (msg.hdr.ver != DAT_MIX_VER || msg.hdr.op != MIX_IA_OPEN) {
+               mlog(0, " ERR: mix msg ver (%d) or op (%d) wrong\n", msg.hdr.ver, msg.hdr.op);
+               return;
+       }
+
+       /* open new device with hca name and port info, send response with addr info */
+       smd = mix_open_device(msg.name, msg.port, op_ep, cm_ep);
+       msg.hdr.flags = MIX_OP_RSP;
+       if (smd) {
+               msg.hdr.status = MIX_SUCCESS;
+               memcpy(&smd->md->dev_attr, &msg.dev_attr, sizeof(dat_mix_dev_attr_t));
+               memcpy(&msg.dev_addr, &smd->md->addr, sizeof(dat_mcm_addr_t));
+       } else {
+               msg.hdr.status = MIX_ENODEV;
+       }
+
+       /* send back response */
+       len = sizeof(msg);
+       ret = scif_send(op_ep, &msg, len, SCIF_SEND_BLOCK);
+       if (ret != len) {
+               mlog(0, " ERR: rcv on new_ep %d, ret %d, exp %d\n", op_ep, ret, len);
+               /* remove this created SMD from MCM device ??? */
+       }
+}
+
+static int mix_listen_free(mcm_scif_dev_t *smd, dat_mix_hdr_t *pmsg)
+{
+       int len, ret;
+       mcm_cm_t *cm;
+
+       mlog(1, " MIX_LISTEN_FREE: sid 0x%x \n", pmsg->req_id);
+
+       pthread_mutex_lock(&smd->llock);
+       cm = get_head_entry(&smd->llist);
+       while (cm) {
+               if (cm->sid == (uint16_t)pmsg->req_id) {
+                       remove_entry(&cm->entry);
+                       mcm_free_port(smd->ports, (uint16_t)pmsg->req_id);
+                       mcm_cm_free(cm);
+                       break;
+               }
+               cm = get_next_entry(&cm->entry, &smd->llist);
+       }
+       pthread_mutex_unlock(&smd->llock);
+
+       if (cm)
+               pmsg->status = MIX_SUCCESS;
+       else
+               pmsg->status = MIX_EINVAL;
+
+       /* send back response */
+       pmsg->flags = MIX_OP_RSP;
+       len = sizeof(dat_mix_hdr_t);
+       ret = scif_send(smd->scif_ep, pmsg, len, SCIF_SEND_BLOCK);
+       if (ret != len) {
+               mlog(0, " ERR: rcv on new_ep %d, ret %d, exp %d\n", smd->scif_ep, ret, len);
+               return ret;
+       }
+       return 0;
+}
+
+static int mix_listen(mcm_scif_dev_t *smd, dat_mix_listen_t *pmsg)
+{
+       int len, ret;
+       uint16_t lport;
+       mcm_cm_t *cm;
+
+       /* hdr already read, get operation data */
+       len = sizeof(dat_mix_listen_t) - sizeof(dat_mix_hdr_t);
+       ret = scif_recv(smd->scif_ep, ((char*)pmsg + sizeof(dat_mix_hdr_t)), len, SCIF_RECV_BLOCK);
+       if (ret != len) {
+               mlog(0, " ERR: ret %d, exp %d\n", ret, len);
+               return ret;
+       }
+       mlog(1, " MIX_LISTEN: sid 0x%x, backlog %d\n", pmsg->sid, pmsg->backlog);
+
+       /* create listen EP for provided SID */
+       lport = mcm_get_port(smd->ports, pmsg->sid, (uint64_t)smd);
+       if (lport == pmsg->sid) {
+               cm = mcm_cm_create(smd, NULL);
+               if (cm == NULL) {
+                       pmsg->hdr.status = MIX_ENOMEM;
+                       mcm_free_port(smd->ports, lport);
+               } else {
+                       cm->state = MCM_LISTEN;
+                       cm->sid = lport;
+                       mcm_qlisten(smd, cm);
+                       pmsg->hdr.status = MIX_SUCCESS;
+               }
+       } else
+               pmsg->hdr.status = MIX_EADDRINUSE;
+
+       /* send back response */
+       pmsg->hdr.flags = MIX_OP_RSP;
+       len = sizeof(dat_mix_listen_t);
+       ret = scif_send(smd->scif_ep, pmsg, len, SCIF_SEND_BLOCK);
+       if (ret != len) {
+               mlog(0, " ERR: rcv on new_ep %d, ret %d, exp %d\n", smd->scif_ep, ret, len);
+               return ret;
+       }
+
+       return 0;
+
+}
+
+/* locate CQ object */
+mcm_cq_t *mix_get_cq(mcm_scif_dev_t *smd, uint32_t tid)
+{
+       mcm_cq_t *cq = NULL;
+
+       pthread_mutex_lock(&smd->cqlock);
+       cq = get_head_entry(&smd->cqlist);
+
+       while (cq) {
+               if (cq->entry.tid == tid)
+                       break;
+               cq = get_next_entry(&cq->entry, &smd->cqlist);
+       }
+       pthread_mutex_unlock(&smd->cqlock);
+       return cq;
+}
+
+/* locate QP object */
+mcm_qp_t *mix_get_qp(mcm_scif_dev_t *smd, uint32_t tid)
+{
+       mcm_qp_t *qp = NULL;
+
+       pthread_mutex_lock(&smd->qplock);
+       qp = get_head_entry(&smd->qplist);
+       while (qp) {
+               if (qp->entry.tid == tid)
+                       break;
+               qp = get_next_entry(&qp->entry, &smd->qplist);
+       }
+       pthread_mutex_unlock(&smd->qplock);
+       return qp;
+}
+
+/* destroy proxy CQ, fits in header */
+static int mix_cq_destroy(mcm_scif_dev_t *smd, dat_mix_hdr_t *pmsg)
+{
+       int len, ret;
+       struct mcm_cq *m_cq;
+
+       mlog(1, " MIX_CQ_DESTROY: cq_id 0x%x\n", pmsg->req_id);
+
+       /* Find the CQ */
+       m_cq = mix_get_cq(smd, pmsg->req_id);
+       if (!m_cq) {
+               mlog(0, " ERR: mix_get_cq, id %d, not found\n", pmsg->req_id);
+               goto err;
+       }
+
+       ibv_destroy_cq(m_cq->ib_cq);
+       ibv_destroy_comp_channel(m_cq->ib_ch);
+       pthread_mutex_lock(&smd->cqlock);
+       remove_entry(&m_cq->entry);
+       pthread_mutex_unlock(&smd->cqlock);
+       free(m_cq);
+
+       pmsg->status = MIX_SUCCESS;
+       goto resp;
+err:
+       mlog(0, " ERR: %s\n", strerror(errno));
+       if (m_cq)
+               free(m_cq);
+
+       pmsg->status = MIX_EINVAL;
+resp:
+       /* send back response */
+       pmsg->flags = MIX_OP_RSP;
+       len = sizeof(dat_mix_hdr_t);
+       ret = scif_send(smd->scif_ep, pmsg, len, SCIF_SEND_BLOCK);
+       if (ret != len) {
+               mlog(0, " ERR: rcv on scif_ep %d, ret %d, exp %d\n", smd->scif_ep, ret, len);
+               return ret;
+       }
+       return 0;
+}
+
+/* create new proxy CQ */
+static int mix_cq_create(mcm_scif_dev_t *smd, dat_mix_cq_t *pmsg)
+{
+       int len, ret;
+       struct mcm_cq *m_cq;
+
+       /* hdr already read, get operation data */
+       len = sizeof(dat_mix_cq_t) - sizeof(dat_mix_hdr_t);
+       ret = scif_recv(smd->scif_ep, ((char*)pmsg + sizeof(dat_mix_hdr_t)), len, SCIF_RECV_BLOCK);
+       if (ret != len) {
+               mlog(0, " ERR: ret %d, exp %d\n", ret, len);
+               return ret;
+       }
+       mlog(1, " MIX_CQ_CREATE: cq_len = %d\n", pmsg->cq_len);
+
+       /* Create CQ object */
+       m_cq = malloc(sizeof(mcm_cq_t));
+       if (!m_cq)
+               goto err;
+       memset(m_cq, 0, sizeof(mcm_cq_t));
+       init_list(&m_cq->entry);
+       m_cq->smd = smd;
+
+       m_cq->ib_ch = ibv_create_comp_channel(smd->md->ibctx);
+       if (!m_cq->ib_ch)
+               goto err;
+       mlog(1, " created comp channel\n");
+       m_cq->ib_cq = ibv_create_cq(smd->md->ibctx, pmsg->cq_len, m_cq, m_cq->ib_ch, 0);
+       if (!m_cq->ib_cq)
+               goto err;
+       mlog(1, " created cq\n");
+       ret = ibv_req_notify_cq(m_cq->ib_cq, 0);
+       if (ret)
+               goto err;
+       mlog(1, " notify cq\n");
+
+       /* insert on cq list, update object tid */
+       pthread_mutex_lock(&smd->cqlock);
+       insert_tail(&m_cq->entry, &smd->cqlist, m_cq);
+       pmsg->cq_id = m_cq->entry.tid;
+       pthread_mutex_unlock(&smd->cqlock);
+
+       mlog(1, " new cq_id %d\n", pmsg->cq_id);
+
+       pmsg->hdr.status = MIX_SUCCESS;
+       goto resp;
+
+err:
+       mlog(0, " ERR: %s\n", strerror(errno));
+       if (m_cq)
+               free(m_cq);
+
+       pmsg->hdr.status = MIX_EINVAL;
+resp:
+       /* send back response */
+       pmsg->hdr.flags = MIX_OP_RSP;
+       len = sizeof(dat_mix_cq_t);
+       ret = scif_send(smd->scif_ep, pmsg, len, SCIF_SEND_BLOCK);
+       if (ret != len) {
+               mlog(0, " ERR: rcv on scif_ep %d, ret %d, exp %d\n", smd->scif_ep, ret, len);
+               return ret;
+       }
+       return 0;
+}
+
+/* destroy proxy QP, fits in hdr */
+static int mix_qp_destroy(mcm_scif_dev_t *smd, dat_mix_hdr_t *pmsg)
+{
+       int len, ret;
+       struct mcm_qp *m_qp;
+
+       mlog(1, " MIX_QP_DESTROY: QP_t - id 0x%x\n", pmsg->req_id );
+
+       /* Find the QP */
+       m_qp = mix_get_qp(smd, pmsg->req_id);
+       if (!m_qp) {
+               mlog(0, " ERR: mix_get_qp, id %d, not found\n", pmsg->req_id);
+               goto err;
+       }
+
+       ibv_destroy_qp(m_qp->ib_qp);
+       m_qp->ib_qp = NULL;
+       pthread_mutex_lock(&smd->qplock);
+       remove_entry(&m_qp->entry);
+       pthread_mutex_unlock(&smd->qplock);
+       free(m_qp);
+
+       pmsg->status = MIX_SUCCESS;
+       goto resp;
+err:
+       mlog(0, " ERR: %s\n", strerror(errno));
+       if (m_qp)
+               free(m_qp);
+
+       pmsg->status = MIX_EINVAL;
+resp:
+       /* send back response */
+       pmsg->flags = MIX_OP_RSP;
+       len = sizeof(dat_mix_hdr_t);
+       ret = scif_send(smd->scif_ep, pmsg, len, SCIF_SEND_BLOCK);
+       if (ret != len) {
+               mlog(0, " ERR: rcv on scif_ep %d, ret %d, exp %d\n", smd->scif_ep, ret, len);
+               return ret;
+       }
+       return 0;
+}
+
+static int mix_qp_modify(mcm_scif_dev_t *smd, dat_mix_qp_t *pmsg)
+{
+       /* TODO */
+       return 0;
+}
+
+/* create new proxy QP */
+static int mix_qp_create(mcm_scif_dev_t *smd, dat_mix_qp_t *pmsg)
+{
+       int len, ret;
+       struct mcm_qp *m_qp;
+       struct mcm_cq *m_cq;
+       struct ibv_qp_init_attr qp_create;
+
+       /* hdr already read, get operation data */
+       len = sizeof(dat_mix_qp_t) - sizeof(dat_mix_hdr_t);
+       ret = scif_recv(smd->scif_ep, ((char*)pmsg + sizeof(dat_mix_hdr_t)), len, SCIF_RECV_BLOCK);
+       if (ret != len) {
+               mlog(0, " ERR: ret %d, exp %d\n", ret, len);
+               return ret;
+       }
+       mlog(1, " MIX_QP_CREATE: QP_r - qpn 0x%x, id 0x%x, s_q %d,%d r_q %d,%d inline=%d cq_id %d\n",
+               pmsg->qp_r.qp_num, pmsg->qp_r.qp_id, pmsg->qp_t.max_send_wr,
+               pmsg->qp_t.max_send_sge, pmsg->qp_r.max_recv_wr, pmsg->qp_r.max_recv_sge,
+               pmsg->qp_r.max_inline_data, pmsg->qp_t.scq_id);
+
+       /* Create QP object */
+       m_qp = malloc(sizeof(mcm_qp_t));
+       if (!m_qp)
+               goto err;
+       memset(m_qp, 0, sizeof(mcm_qp_t));
+       init_list(&m_qp->entry);
+       m_qp->smd = smd;
+       memcpy(&m_qp->qp_r, &pmsg->qp_r, sizeof(dat_mix_qp_attr_t));
+       memcpy(&m_qp->qp_t, &pmsg->qp_t, sizeof(dat_mix_qp_attr_t));
+
+       /* Find the CQ's for this QP for transmitting */
+       m_cq = mix_get_cq(smd, pmsg->qp_t.scq_id);
+       if (!m_cq) {
+               mlog(0, " ERR: mcm_get_cq, id %d, not found\n", pmsg->qp_t.scq_id);
+               goto err;
+       }
+
+       /* Setup attributes and create qp, for TX services */
+       memset((void *)&qp_create, 0, sizeof(qp_create));
+       qp_create.recv_cq = m_cq->ib_cq;
+       qp_create.cap.max_recv_wr = 1;
+       qp_create.cap.max_recv_sge = 0;
+       qp_create.send_cq = m_cq->ib_cq;
+       qp_create.cap.max_send_wr = pmsg->qp_t.max_send_wr;
+       qp_create.cap.max_send_sge = pmsg->qp_t.max_send_sge;
+       qp_create.cap.max_inline_data = pmsg->qp_t.max_inline_data;
+       qp_create.qp_type = IBV_QPT_RC;
+       qp_create.qp_context = (void *)m_qp;
+
+       m_qp->ib_qp = ibv_create_qp(smd->md->pd, &qp_create);
+       if (!m_qp->ib_qp)
+               goto err;
+
+       /* set to INIT state */
+       ret = modify_qp(m_qp->ib_qp, IBV_QPS_INIT, 0, 0, NULL);
+       if (ret) {
+               ibv_destroy_qp(m_qp->ib_qp);
+               m_qp->ib_qp = NULL;
+               goto err;
+       }
+
+       /* insert on qp list, update proxy qp object tid */
+       pthread_mutex_lock(&smd->qplock);
+       insert_tail(&m_qp->entry, &smd->qplist, m_qp);
+       pmsg->qp_t.qp_id = m_qp->entry.tid;
+       pmsg->qp_t.ctx = (uint64_t)m_qp;
+       pthread_mutex_unlock(&smd->qplock);
+
+       pmsg->hdr.status = MIX_SUCCESS;
+       goto resp;
+err:
+       mlog(0, " ERR: %s\n", strerror(errno));
+       if (m_qp)
+               free(m_qp);
+
+       pmsg->hdr.status = MIX_EINVAL;
+resp:
+       /* send back response */
+       pmsg->hdr.flags = MIX_OP_RSP;
+       len = sizeof(dat_mix_qp_t);
+       ret = scif_send(smd->scif_ep, pmsg, len, SCIF_SEND_BLOCK);
+       if (ret != len) {
+               mlog(0, " ERR: rcv on scif_ep %d, ret %d, exp %d\n", smd->scif_ep, ret, len);
+               return ret;
+       }
+       mlog(0, " MIX_QP_CREATE: QP_t - qpn 0x%x id 0x%x, ctx %p \n", m_qp->ib_qp->qp_num, pmsg->qp_t.qp_id, m_qp);
+       return 0;
+}
+
+/* receive MIX operations on connected SCIF endpoint */
+static int mix_scif_recv(mcm_scif_dev_t *smd)
+{
+       char cmd[DAT_MIX_MSG_MAX];
+       dat_mix_hdr_t *phdr = (dat_mix_hdr_t *)cmd;
+       int ret, len;
+
+       len = sizeof(*phdr);
+       ret = scif_recv(smd->scif_ep, phdr, len, SCIF_RECV_BLOCK);
+       if ((ret != len) || (phdr->ver != DAT_MIX_VER)) {
+               mlog(0, " ERR: rcv on scif_ep %d, ret %d, exp %d, VER=%d\n",
+                    smd->scif_ep, ret, len, phdr->ver);
+               return -1;
+       }
+
+       mlog(0, " ver %d, op %d, flags %d\n", phdr->ver, phdr->op, phdr->flags);
+
+       switch (phdr->op) {
+       case MIX_MR_CREATE:
+       case MIX_MR_FREE:
+       case MIX_QP_CREATE:
+               ret = mix_qp_create(smd, (dat_mix_qp_t *)phdr);
+               break;
+       case MIX_QP_MODIFY:
+               ret = mix_qp_modify(smd, (dat_mix_qp_t *)phdr);
+               break;
+       case MIX_QP_FREE:
+               ret = mix_qp_destroy(smd, phdr);
+               break;
+       case MIX_CQ_CREATE:
+               ret = mix_cq_create(smd, (dat_mix_cq_t *)phdr);
+               break;
+       case MIX_CQ_FREE:
+               ret = mix_cq_destroy(smd, phdr);
+               break;
+       case MIX_WRITE:
+       case MIX_SEND:
+       case MIX_LISTEN:
+               ret = mix_listen(smd, (dat_mix_listen_t *)phdr);
+               break;
+       case MIX_LISTEN_FREE:
+               ret = mix_listen_free(smd, phdr);
+               break;
+       case MIX_CM_REQ:
+
+       case MIX_CM_REP:
+       case MIX_CM_ACCEPT:
+       case MIX_CM_REJECT:
+       case MIX_CM_RTU:
+       case MIX_CM_EST:
+       case MIX_CM_DISC:
+       case MIX_CM_REPLY:
+       default:
+               mlog(0, " ERROR!!! unknown MIX operation: %d\n", phdr->op);
+               return -1;
+       }
+
+       return ret;
 }
+
+/* receive MIX CM messages on connected SCIF endpoint */
+static int mix_scif_recv_cm(mcm_scif_dev_t *smd)
+{
+       char cmd[DAT_MIX_MSG_MAX];
+       dat_mix_hdr_t *phdr = (dat_mix_hdr_t *)cmd;
+       int ret, len;
+
+       len = sizeof(*phdr);
+       ret = scif_recv(smd->scif_ep, phdr, len, SCIF_RECV_BLOCK);
+       if ((ret != len) || (phdr->ver != DAT_MIX_VER)) {
+               mlog(0, " ERR: rcv on scif_ep %d, ret %d, exp %d, VER=%d\n",
+                    smd->scif_ep, ret, len, phdr->ver);
+               return -1;
+       }
+
+       mlog(0, " ver %d, op %d, flags %d\n", phdr->ver, phdr->op, phdr->flags);
+
+       switch (phdr->op) {
+       case MIX_CM_REQ:
+
+       default:
+               mlog(0, " ERROR!!! unknown MIX CM message: %d\n", phdr->op);
+               return -1;
+       }
+
+       return ret;
+}
+
+
+/*
+ *
+ * Fabric side MCM messages, IB UD QP
+ *
+ */
+
 /*  IB async device event */
-static void mcm_async_evd(struct mcm_ib_dev *md)
+static void mcm_ib_async_event(struct mcm_ib_dev *md)
 {
-       return;
+       struct ibv_async_event event;
+
+       if (!ibv_get_async_event(md->ibctx, &event)) {
+               switch (event.event_type) {
+               case IBV_EVENT_CQ_ERR:
+                       mlog(0, "CQ ERR ctx(%p) = %d\n",
+                            event.element.cq->cq_context, event.event_type);
+                       break;
+               case IBV_EVENT_COMM_EST:
+                       mlog(0, "COMM_EST(QP=%p) rdata beat RTU\n", event.element.qp);
+                       break;
+               case IBV_EVENT_QP_FATAL:
+               case IBV_EVENT_QP_REQ_ERR:
+               case IBV_EVENT_QP_ACCESS_ERR:
+               case IBV_EVENT_QP_LAST_WQE_REACHED:
+               case IBV_EVENT_SRQ_ERR:
+               case IBV_EVENT_SRQ_LIMIT_REACHED:
+               case IBV_EVENT_SQ_DRAINED:
+                       mlog(0, "QP (%p) ERR = %d\n",
+                            event.element.qp->qp_context, event.event_type);
+                       break;
+               case IBV_EVENT_PATH_MIG:
+               case IBV_EVENT_PATH_MIG_ERR:
+               case IBV_EVENT_DEVICE_FATAL:
+               case IBV_EVENT_PORT_ACTIVE:
+               case IBV_EVENT_PORT_ERR:
+               case IBV_EVENT_LID_CHANGE:
+               case IBV_EVENT_PKEY_CHANGE:
+               case IBV_EVENT_SM_CHANGE:
+                       mlog(0, "Device Error = %d\n", event.event_type);
+                       break;
+               case IBV_EVENT_CLIENT_REREGISTER:
+                       mlog(0, "IBV_CLIENT_REREGISTER\n");
+                       break;
+               default:
+                       mlog(0, "%d UNKNOWN\n", event.event_type);
+                       break;
+               }
+               ibv_ack_async_event(&event);
+       }
 }
-/* SCIF MIX message */
-static void mix_rcv_evd(struct mcm_scif_dev *md)
+
+/* Get CM UD message from send queue, called with s_lock held */
+static dat_mcm_msg_t *mcm_get_smsg(mcm_ib_dev_t *md)
 {
-       return;
+       dat_mcm_msg_t *msg = NULL;
+       int ret, polled = 1, hd = md->s_hd;
+
+       hd++;
+       if (hd == md->qpe)
+               hd = 0;
+retry:
+       if (hd == md->s_tl) {
+               msg = NULL;
+               if (polled % 1000000 == 0)
+                       mlog(1,  " ucm_get_smsg: FULLq hd %d == tl %d,"
+                                " completions stalled, polls=%d\n",
+                                hd, md->s_tl, polled);
+       }
+       else {
+               msg = &md->sbuf[hd];
+               md->s_hd = hd; /* new hd */
+       }
+
+       /* if empty, process some completions */
+       if (msg == NULL) {
+               struct ibv_wc wc;
+
+               /* process completions, based on UCM_TX_BURST */
+               ret = ibv_poll_cq(md->scq, 1, &wc);
+               if (ret < 0) {
+                       mlog(1, " get_smsg: cq %p %s\n", md->scq, strerror(errno));
+                       return NULL;
+               }
+               /* free up completed sends, update tail */
+               if (ret > 0)
+                       md->s_tl = (int)wc.wr_id;
+
+               polled++;
+               goto retry;
+       }
+       return msg;
+}
+
+/* ACTIVE/PASSIVE: build and send CM message out of CM object */
+static int mcm_send(mcm_ib_dev_t *md, dat_mcm_msg_t *msg, DAT_PVOID p_data, DAT_COUNT p_size)
+{
+       dat_mcm_msg_t *smsg = NULL;
+       struct ibv_send_wr wr, *bad_wr;
+       struct ibv_sge sge;
+       int len, ret = -1;
+       uint16_t dlid = ntohs(msg->daddr.lid);
+
+       /* Get message from send queue, copy data, and send */
+       pthread_mutex_lock(&md->slock);
+       if ((smsg = mcm_get_smsg(md)) == NULL) {
+               mlog(0, " mcm_send ERR: get_smsg(hd=%d,tl=%d) \n", md->s_hd, md->s_tl);
+               goto bail;
+       }
+
+       len = (sizeof(*msg) - DAT_MCM_PDATA_SIZE);
+       memcpy(smsg, msg, len);
+       if (p_size) {
+               smsg->p_size = ntohs(p_size);
+               memcpy(&smsg->p_data, p_data, p_size);
+       }
+
+       wr.next = NULL;
+        wr.sg_list = &sge;
+        wr.num_sge = 1;
+        wr.opcode = IBV_WR_SEND;
+        wr.wr_id = (unsigned long)md->s_hd;
+       wr.send_flags = (wr.wr_id % md->signal) ? 0 : IBV_SEND_SIGNALED;
+       wr.send_flags |= IBV_SEND_INLINE;
+
+        sge.length = len + p_size;
+        sge.lkey = md->mr_sbuf->lkey;
+        sge.addr = (uintptr_t)smsg;
+
+       mlog(2," mcm_send: op %s ln %d lid %x c_qpn %x rport %x\n",
+               mcm_op_str(ntohs(smsg->op)),
+               sge.length, htons(smsg->daddr.lid),
+               htonl(smsg->dqpn), htons(smsg->dport));
+
+       /* empty slot, then create AH */
+       if (!md->ah[dlid]) {
+               md->ah[dlid] =
+                       mcm_create_ah(md, md->pd, md->qp, dlid, NULL);
+               if (!md->ah[dlid])
+                       goto bail;
+       }
+
+       wr.wr.ud.ah = md->ah[dlid];
+       wr.wr.ud.remote_qpn = ntohl(smsg->dqpn);
+       wr.wr.ud.remote_qkey = DAT_MCM_UD_QKEY;
+
+       ret = ibv_post_send(md->qp, &wr, &bad_wr);
+       if (ret)
+               mlog(0, " mcm_send ERR: post_send() %s\n", strerror(errno));
+bail:
+       pthread_mutex_unlock(&md->slock);
+       return ret;
+}
+
+static int mcm_post_rmsg(mcm_ib_dev_t *md, dat_mcm_msg_t *msg)
+{
+       struct ibv_recv_wr recv_wr, *recv_err;
+       struct ibv_sge sge;
+
+       recv_wr.next = NULL;
+       recv_wr.sg_list = &sge;
+       recv_wr.num_sge = 1;
+       recv_wr.wr_id = (uint64_t)(uintptr_t) msg;
+       sge.length = sizeof(dat_mcm_msg_t) + sizeof(struct ibv_grh);
+       sge.lkey = md->mr_rbuf->lkey;
+       sge.addr = (uintptr_t)((char *)msg - sizeof(struct ibv_grh));
+
+       return (ibv_post_recv(md->qp, &recv_wr, &recv_err));
+}
+
+static int mcm_reject(mcm_ib_dev_t *md, dat_mcm_msg_t *msg)
+{
+       dat_mcm_msg_t smsg;
+
+       /* setup op, rearrange the src, dst cm and addr info */
+       memset(&smsg, 0, sizeof(smsg));
+       smsg.ver = htons(DAT_MCM_VER);
+       smsg.op = htons(MCM_REJ_CM);
+       smsg.dport = msg->sport;
+       smsg.dqpn = msg->sqpn;
+       smsg.sport = msg->dport;
+       smsg.sqpn = msg->dqpn;
+       memcpy(&smsg.daddr, &msg->saddr, sizeof(dat_mcm_addr_t));
+
+       /* no dst_addr IB info in REQ, init lid, gid, get type from saddr */
+       smsg.saddr.lid = md->addr.lid;
+       smsg.saddr.qp_type = msg->saddr.qp_type;
+       memcpy(&smsg.saddr.gid[0], &md->addr.gid, 16);
+       memcpy(&smsg.saddr, &msg->daddr, sizeof(dat_mcm_addr_t));
+
+       mlog(2," CM reject -> LID %x, QPN %x PORT %x\n",
+            ntohs(smsg.daddr.lid),
+            ntohl(smsg.dqpn), ntohs(smsg.dport));
+
+       return (mcm_send(md, &smsg, NULL, 0));
+}
+
+static void mcm_process_recv(mcm_ib_dev_t *md, dat_mcm_msg_t *msg, mcm_cm_t *cm)
+{
+       pthread_mutex_lock(&cm->lock);
+       switch (cm->state) {
+       case MCM_LISTEN: /* passive */
+               pthread_mutex_unlock(&cm->lock);
+               //mcm_accept(cm, msg);
+               break;
+       case MCM_RTU_PENDING: /* passive */
+               pthread_mutex_unlock(&cm->lock);
+               //mcm_accept_rtu(cm, msg);
+               break;
+       case MCM_REP_PENDING: /* active */
+               pthread_mutex_unlock(&cm->lock);
+               //mcm_connect_rtu(cm, msg);
+               break;
+       case MCM_CONNECTED: /* active and passive */
+               /* DREQ, change state and process */
+               cm->retries = 2;
+               if (ntohs(msg->op) == MCM_DREQ) {
+                       cm->state = MCM_DISC_RECV;
+                       pthread_mutex_unlock(&cm->lock);
+                       //mcm_disconnect(cm);
+                       break;
+               }
+               /* active: RTU was dropped, resend */
+               if (ntohs(msg->op) == MCM_REP) {
+                       mlog(1,  " RESEND RTU: op %s st %s [lid, port, cqp, iqp]:"
+                                " %x %x %x %x -> %x %x %x %x r_pid %x\n",
+                                 mcm_op_str(ntohs(cm->msg.op)),
+                                 mcm_state_str(cm->state),
+                                ntohs(cm->msg.saddr.lid), ntohs(cm->msg.sport),
+                                ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr.qpn),
+                                ntohs(cm->msg.daddr.lid), ntohs(cm->msg.dport),
+                                ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr.qpn),
+                                ntohl(cm->msg.d_id));
+
+                       cm->msg.op = htons(MCM_RTU);
+                       mcm_send(cm->smd->md, &cm->msg, NULL, 0);
+               }
+               pthread_mutex_unlock(&cm->lock);
+               break;
+       case MCM_DISC_PENDING: /* active and passive */
+               /* DREQ or DREP, finalize */
+               pthread_mutex_unlock(&cm->lock);
+               //mcm_disconnect_final(cm);
+               break;
+       case MCM_DISCONNECTED:
+       case MCM_FREE:
+               /* DREQ dropped, resend */
+               if (ntohs(msg->op) == MCM_DREQ) {
+                       mlog(1, " RESEND DREP: op %s st %s [lid, port, qpn]:"
+                               " %x %x %x -> %x %x %x\n",
+                               mcm_op_str(ntohs(msg->op)),
+                               mcm_state_str(cm->state),
+                               ntohs(msg->saddr.lid),
+                               ntohs(msg->sport),
+                               ntohl(msg->saddr.qpn),
+                               ntohs(msg->daddr.lid),
+                               ntohs(msg->dport),
+                               ntohl(msg->daddr.qpn));
+                       cm->msg.op = htons(MCM_DREP);
+                       mcm_send(cm->smd->md, &cm->msg, NULL, 0);
+
+               } else if (ntohs(msg->op) != MCM_DREP){
+                       /* DREP ok to ignore, any other print warning */
+                       mlog(1, " mcm_recv: UNEXPECTED MSG on cm %p"
+                               " <- op %s, st %s spsp %x sqpn %x\n",
+                               cm, mcm_op_str(ntohs(msg->op)),
+                               mcm_state_str(cm->state),
+                               ntohs(msg->sport), ntohl(msg->sqpn));
+               }
+               pthread_mutex_unlock(&cm->lock);
+               break;
+       case MCM_REJECTED:
+               if (ntohs(msg->op) == MCM_REJ_USER) {
+                       pthread_mutex_unlock(&cm->lock);
+                       break;
+               }
+       default:
+               mlog(0, " mcm_recv: Warning, UNKNOWN state"
+                       " <- op %s, %s spsp %x sqpn %x slid %x\n",
+                       mcm_op_str(ntohs(msg->op)), mcm_state_str(cm->state),
+                       ntohs(msg->sport), ntohl(msg->sqpn), ntohs(msg->saddr.lid));
+
+               pthread_mutex_unlock(&cm->lock);
+               break;
+       }
+}
+
+/* Find matching CM object for this receive message, return CM reference, timer */
+mcm_cm_t *mcm_get_smd_cm(mcm_scif_dev_t *smd, dat_mcm_msg_t *msg)
+{
+       mcm_cm_t *cm = NULL, *next, *found = NULL;
+       LLIST_ENTRY *list;
+       pthread_mutex_t *lock;
+       int listenq = 0;
+
+       /* conn list first, duplicate requests for MCM_REQ */
+       list = &smd->clist;
+       lock = &smd->clock;
+
+retry_listenq:
+       pthread_mutex_lock(lock);
+       next = get_head_entry(list);
+
+       while (next) {
+               cm = next;
+               next = get_next_entry(&cm->entry, list);
+               if (cm->state == MCM_DESTROY || cm->state == MCM_FREE)
+                       continue;
+
+               /* CM sPORT + QPN, match is good enough for listenq */
+               if (listenq &&
+                   cm->msg.sport == msg->dport &&
+                   cm->msg.sqpn == msg->dqpn) {
+                       found = cm;
+                       break;
+               }
+               /* connectq, check src and dst plus id's, check duplicate conn_reqs */
+               if (!listenq &&
+                   cm->msg.sport == msg->dport && cm->msg.sqpn == msg->dqpn &&
+                   cm->msg.dport == msg->sport && cm->msg.dqpn == msg->sqpn &&
+                   cm->msg.daddr.lid == msg->saddr.lid) {
+                       if (ntohs(msg->op) != MCM_REQ) {
+                               found = cm;
+                               break;
+                       } else {
+                               /* duplicate; bail and throw away */
+                               pthread_mutex_unlock(lock);
+                               mlog(1,  " DUPLICATE: cm %p op %s (%s) st %s"
+                                        " [lid, port, cqp, iqp]:"
+                                        " %x %x %x %x <- (%x %x %x %x :"
+                                        " %x %x %x %x) -> %x %x %x %x\n",
+                                        cm, mcm_op_str(ntohs(msg->op)),
+                                        mcm_op_str(ntohs(cm->msg.op)),
+                                        mcm_state_str(cm->state),
+                                        ntohs(cm->msg.daddr.lid), ntohs(cm->msg.dport),
+                                        ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr.qpn),
+                                        ntohs(msg->saddr.lid), ntohs(msg->sport),
+                                        ntohl(msg->sqpn), ntohl(msg->saddr.qpn),
+                                        ntohs(msg->daddr.lid), ntohs(msg->dport),
+                                        ntohl(msg->dqpn), ntohl(msg->daddr.qpn),
+                                        ntohs(cm->msg.saddr.lid), ntohs(cm->msg.sport),
+                                        ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr.qpn));
+
+                                       return NULL;
+                       }
+               }
+       }
+       pthread_mutex_unlock(lock);
+
+       /* no duplicate request on connq, check listenq for new request */
+       if (ntohs(msg->op) == MCM_REQ && !listenq && !found) {
+               listenq = 1;
+               list = &smd->llist;
+               lock = &smd->llock;
+               goto retry_listenq;
+       }
+
+       /* not match on listenq for valid request, send reject */
+       if (ntohs(msg->op) == MCM_REQ && !found) {
+               mlog(1, " mcm_recv: NO LISTENER for %s %x %x i%x c%x"
+                       " < %x %x %x, sending reject\n",
+                       mcm_op_str(ntohs(msg->op)),
+                       ntohs(msg->daddr.lid), ntohs(msg->dport),
+                       ntohl(msg->daddr.qpn), ntohl(msg->sqpn),
+                       ntohs(msg->saddr.lid), ntohs(msg->sport),
+                       ntohl(msg->saddr.qpn));
+
+               mcm_reject(smd->md, msg);
+       }
+
+       if (!found) {
+               mlog(1,  " NO MATCH: op %s [lid, port, cqp, iqp, pid]:"
+                        " %x %x %x %x %x <- %x %x %x %x l_pid %x r_pid %x\n",
+                        mcm_op_str(ntohs(msg->op)),
+                        ntohs(msg->daddr.lid), ntohs(msg->dport),
+                        ntohl(msg->dqpn), ntohl(msg->daddr.qpn),
+                        ntohl(msg->d_id), ntohs(msg->saddr.lid),
+                        ntohs(msg->sport), ntohl(msg->sqpn),
+                        ntohl(msg->saddr.qpn), ntohl(msg->s_id),
+                        ntohl(msg->d_id));
+
+               if (ntohs(msg->op) == MCM_DREP) {
+                       /* DREP_DUP */
+               }
+       }
+
+       return found;
+}
+
+/* locate CM object for msg, walk all SCIF clients for MD */
+mcm_cm_t *mcm_get_cm(mcm_ib_dev_t *md, dat_mcm_msg_t *msg)
+{
+       mcm_cm_t *cm = NULL;
+       mcm_scif_dev_t *smd;
+
+       /* Walk scif device client list */
+       pthread_mutex_lock(&md->slock);
+       smd = get_head_entry(&md->smd_list);
+       while (smd) {
+               cm = mcm_get_smd_cm(smd, msg);
+               if (cm)
+                       break;
+               smd = get_next_entry(&smd->entry, &md->smd_list);
+       }
+       pthread_mutex_unlock(&md->slock);
+       return cm;
+}
+
+/* Get rmsgs from CM completion queue, 10 at a time */
+static void mcm_ib_recv(mcm_ib_dev_t *md)
+{
+       struct ibv_wc wc[10];
+       dat_mcm_msg_t *msg;
+       mcm_cm_t *cm;
+       int i, ret, notify = 0;
+       struct ibv_cq *ibv_cq = NULL;
+
+
+       /* POLLIN on channel FD */
+       ret = ibv_get_cq_event(md->rch, &ibv_cq, (void *)&md);
+       if (ret == 0) {
+               ibv_ack_cq_events(ibv_cq, 1);
+       }
+retry:
+       ret = ibv_poll_cq(md->rcq, 10, wc);
+       if (ret <= 0) {
+               if (!ret && !notify) {
+                       ibv_req_notify_cq(md->rcq, 0);
+                       notify = 1;
+                       goto retry;
+               }
+               return;
+       } else
+               notify = 0;
+
+       for (i = 0; i < ret; i++) {
+               msg = (dat_mcm_msg_t*) (uintptr_t) wc[i].wr_id;
+
+               mlog(2, " mcm_recv: stat=%d op=%s ln=%d id=%p sqp=%x\n",
+                    wc[i].status, mcm_op_str(ntohs(msg->op)),
+                    wc[i].byte_len,
+                    (void*)wc[i].wr_id, wc[i].src_qp);
+
+               /* validate CM message, version */
+               if (ntohs(msg->ver) != DAT_MCM_VER) {
+                       mlog(1, " mcm_recv: UNKNOWN msg %p, ver %d\n", msg, msg->ver);
+                       mcm_post_rmsg(md, msg);
+                       continue;
+               }
+               if (!(cm = mcm_get_cm(md, msg))) {
+                       mcm_post_rmsg(md, msg);
+                       continue;
+               }
+
+               /* match, process it */
+               mcm_process_recv(md, msg, cm);
+               mcm_post_rmsg(md, msg);
+       }
+
+       /* finished this batch of WC's, poll and rearm */
+       goto retry;
 }
 
 /*
@@ -707,14 +2274,13 @@ static void mpxy_server(void)
 {
        struct mcm_fd_set *set;
        struct mcm_ib_dev *md;
-       struct mcm_scif_dev *smd;
-       int time_ms;
-       int i, n, ret;
+       struct mcm_scif_dev *smd, *next;
+       int time_ms, ret;
 
        /* FD array */
        set = mcm_alloc_fd_set();
        if (!set)
-               goto out;
+               return;
 
        mlog(0, "server started\n");
 
@@ -722,53 +2288,68 @@ static void mpxy_server(void)
                time_ms = -1; /* blocking */
                mcm_fd_zero(set);
 
+               /* SCIF listen EP, MIC client open requests */
+               mcm_fd_set(scif_listen_ep, set, POLLIN);
+
                /* trigger on all active IB devices */
                pthread_mutex_lock(&mcm_llock);
-               md = get_list_head(&mcm_llist);
+               md = get_head_entry(&mcm_list);
                while (md) {
-                       mcm_fd_set(md->ib_dev->async_fd, set, POLLIN);
+                       mcm_fd_set(md->ibctx->async_fd, set, POLLIN);
                        mcm_fd_set(md->rch->fd, set, POLLIN);
 
                        /* trigger on all active SCIF ep's */
-                       pthread_mutex_lock(&md->mix_lock);
-                       smd = get_list_head(&md->mix_list);
+                       pthread_mutex_lock(&md->slock);
+                       smd = get_head_entry(&md->smd_list);
                        while (smd) {
-                               mcm_fd_set(smd->ep, set, POLLIN);
-                               smd = get_next_entry(&smd->entry, &md->mix_list);
+                               mcm_fd_set(smd->scif_ep, set, POLLIN);
+                               mcm_fd_set(smd->scif_cm_ep, set, POLLIN);
+                               smd = get_next_entry(&smd->entry, &md->smd_list);
                        }
-                       pthread_mutex_unlock(&md->mix_lock);
+                       pthread_mutex_unlock(&md->slock);
                        md = get_next_entry(&md->entry, &mcm_list);
                }
                pthread_mutex_unlock(&mcm_llock);
 
                mcm_select(set, time_ms); /* wait, DAPL MCM or SCIF MIX msgs */
 
+               /* process listens */
+               if (mcm_poll(scif_listen_ep, POLLIN) == POLLIN)
+                       mix_scif_accept(scif_listen_ep);
+
                pthread_mutex_lock(&mcm_llock);
-               md = get_list_head(&mcm_llist);
+               md = get_head_entry(&mcm_list);
                while (md) {
                        /* process MCM events: async device and CM msgs */
                        if (mcm_poll(md->rch->fd, POLLIN) == POLLIN)
-                               mcm_rcv_evd(md);
+                               mcm_ib_recv(md);
 
-                       if (mcm_poll(md->ib_dev->async_fd, POLLIN) == POLLIN)
-                               mcm_async_evd(md);
+                       if (mcm_poll(md->ibctx->async_fd, POLLIN) == POLLIN)
+                               mcm_ib_async_event(md);
 
-                       /* process SCIF cmd channels */
-                       pthread_mutex_lock(&md->mix_lock);
-                       smd = get_list_head(&md->mix_list);
+                       /* process SCIF operation and CM channels */
+                       pthread_mutex_lock(&md->slock);
+                       smd = get_head_entry(&md->smd_list);
                        while (smd) {
-                               if (mcm_poll(smd->ep, POLLIN) == POLLIN)
-                                       mix_rcv_evd(smd);
+                               ret = mcm_poll(smd->scif_ep, POLLIN); /* OP */
+                               if (ret == POLLIN)
+                                       ret = mix_scif_recv(smd);
+
+                               ret = mcm_poll(smd->scif_cm_ep, POLLIN); /* CM */
+                               if (ret == POLLIN)
+                                       ret = mix_scif_recv_cm(smd);
 
-                               smd = get_next_entry(&smd->entry, &md->mix_list);
+                               next = get_next_entry(&smd->entry, &md->smd_list);
+                               if (ret)
+                                       mix_close_device(md, smd);
+
+                               smd = next;
                        }
-                       pthread_mutex_unlock(&md->mix_lock);
+                       pthread_mutex_unlock(&md->slock);
                        md = get_next_entry(&md->entry, &mcm_list);
                }
                pthread_mutex_unlock(&mcm_llock);
        }
-
-
 }
 
 static void show_usage(char *program)
@@ -811,7 +2392,7 @@ int main(int argc, char **argv)
        pthread_mutex_init(&mcm_llock, NULL);
 
        /* init MCM device list */
-       init_list(&mcm_llist);
+       init_list(&mcm_list);
 
        logfile = mpxy_open_log();
 
@@ -823,11 +2404,6 @@ int main(int argc, char **argv)
                return -1;
        }
 
-       if (init_ib()) {
-               mlog(0, "ERROR - unable to open/init IB device\n");
-               return -1;
-       }
-
        mlog(1, "starting server\n");
        mpxy_server();
        mlog(0, "shutting down\n");
index ad8ffec36cedc793e2d2079b9146e5126cfb6ce4..b060b39c79f1ce752e324dcc041abc4874b6157c 100755 (executable)
  * PURPOSE: extensions to the DAT API for MIC Proxy RDMA services
  *
  *
- **********************************************************************/
+ *     This extension/service enables MIC based DAPL providers to use a
+ *     proxy service for sends and RDMA write operations. RDMA reads and
+ *     receives are NOT supported. This service
+ *     communicates within a server platform over PCI-E bus using SCIF
+ *     and a new MIX within messaging protocol. The MCM provider uses
+ *     DAPL CM messaging protocols on the wire. MIX protocol is defined
+ *     as part of the new MIC extensions.
+ *
+ ***********************************************************************/
 #ifndef _DAT_MIC_EXTENSIONS_H_
 #define _DAT_MIC_EXTENSIONS_H_
 
+#include <sys/socket.h>
+#include <netinet/in.h>
+
 #define DAT_MIC_EXTENSION_VERSION      1
 #define DAT_MIC_ATTR_MIC               "DAT_MIC_SUPPORT"
 
-/* Wire protocol version for MIC Indirect Exchange (MIX) protocol over SCIF */
-#define DAT_MIX_VER 1
+/***** MIC Indirect CM (MCM) protocol over IB fabrics *****/
+#define DAT_MCM_VER            1
+#define DAT_MCM_UD_QKEY                0x78655322
+#define DAT_MCM_PDATA_SIZE      64
 
-typedef enum _dat_mix_ops
+typedef enum dat_mcm_op
 {
-       DAT_MIX_IA_OPEN = 1,
-       DAT_MIX_IA_CLOSE,
-       DAT_MIX_IA_QUERY,
-       DAT_MIX_IA_MR,
-       DAT_MIX_EP_CREATE,
-       DAT_MIX_EP_QUERY,
-       DAT_MIX_WRITE,
-       DAT_MIX_SEND,
-       DAT_MIX_READ,
-       DAT_MIX_LISTEN,
-       DAT_MIX_CM_REQ,
-       DAT_MIX_CM_REP,
-       DAT_MIX_CM_ACCEPT,
-       DAT_MIX_CM_REJECT,
-       DAT_MIX_CM_RTU,
-       DAT_MIX_CM_EST,
-       DAT_MIX_CM_DISC,
-       DAT_MIX_CM_REPLY,
-} dat_mix_ops_t;
+       MCM_REQ = 1,
+       MCM_REP,
+       MCM_REJ_USER, /* user reject */
+       MCM_REJ_CM,   /* cm reject */
+       MCM_RTU,
+       MCM_DREQ,
+       MCM_DREP
 
-/* MIC Indirect CM (MCM) protocol over IB fabric */
-#define DAT_MCM_PDATA_SIZE      64
-union dat_mcm_addr {
-       DAT_SOCK_ADDR6          so;
-       struct {
-               uint16_t        family;  /* sin6_family */
-               uint16_t        lid;     /* sin6_port */
-               uint32_t        qpn;     /* sin6_flowinfo */
-               uint8_t         gid[16]; /* sin6_addr */
-               uint16_t        port;    /* sin6_scope_id */
-               uint8_t         sl;
-               uint8_t         qp_type;
-       } ib;
-};
+} DAT_MCM_OP;
+
+/* MCM address, 28 bytes */
+typedef struct dat_mcm_addr
+{
+       uint16_t        family;
+       uint16_t        lid;
+       uint32_t        qpn;
+       uint8_t         gid[16];
+       uint16_t        port;
+       uint8_t         sl;
+       uint8_t         qp_type;
+} dat_mcm_addr_t;
 
 /* MCM message, 208 bytes */
-typedef struct _dat_mcm_msg
+typedef struct dat_mcm_msg
 {
        uint16_t                ver;
        uint16_t                op;
@@ -95,29 +95,206 @@ typedef struct _dat_mcm_msg
        uint32_t                s_id;  /* src pid */
        uint32_t                d_id;  /* dst pid */
        uint8_t                 rd_in; /* atomic_rd_in */
-       uint8_t                 resv[5];
-       union dat_mcm_addr      saddr;
-       union dat_mcm_addr      daddr;
-       union dat_mcm_addr      saddr_alt;
-       union dat_mcm_addr      daddr_alt;
+       uint8_t                 resv[5];        /* 2 connections for MCM endpoints */
+       dat_mcm_addr_t          saddr;          /* 1st RC - local MPXY QP  ->  */
+       dat_mcm_addr_t          daddr;          /*          <- remote MIC QP   */
+       dat_mcm_addr_t          saddr2;         /* 2nd RC - local MIC QP   ->  */
+       dat_mcm_addr_t          daddr2;         /*          <- remote MPXY QP  */
        uint8_t                 p_data[DAT_MCM_PDATA_SIZE];
 
 } dat_mcm_msg_t;
 
-typedef struct _dat_mix_open_op {
+/***** MIC Indirect Exchange (MIX) protocol over SCIF ****/
+#define DAT_MIX_VER            1
+#define DAT_MIX_MSG_MAX        256
+
+typedef enum dat_mix_ops
+{
+       MIX_IA_OPEN = 1,
+       MIX_IA_CLOSE,
+       MIX_LISTEN,
+       MIX_LISTEN_FREE,
+       MIX_MR_CREATE,
+       MIX_MR_FREE,
+       MIX_QP_CREATE,
+       MIX_QP_MODIFY,
+       MIX_QP_FREE,
+       MIX_CQ_CREATE,
+       MIX_CQ_FREE,
+       MIX_CM_REQ,
+       MIX_CM_REP,
+       MIX_CM_ACCEPT,
+       MIX_CM_REJECT,
+       MIX_CM_RTU,
+       MIX_CM_EST,
+       MIX_CM_DISC,
+       MIX_CM_REPLY,
+       MIX_WRITE,
+       MIX_SEND,
+
+} dat_mix_ops_t;
+
+typedef enum dat_mix_op_flags
+{
+    MIX_OP_REQ   = 0x00,
+    MIX_OP_RSP   = 0x01,
+    MIX_OP_SYNC  = 0x02,
+    MIX_OP_ASYNC = 0x04,
+
+} dat_mix_op_flags_t;
+
+typedef enum dat_mix_op_status
+{
+    MIX_SUCCESS = 0,
+    MIX_EFAULT,                /* internal error */
+    MIX_ENOMEM,                /* no space */
+    MIX_EINVAL,                /* invalid parameter */
+    MIX_ENOTCONN,      /* no active RDMA channels */
+    MIX_ENODEV,                /* no device available */
+    MIX_ECONNRESET,    /* RDMA channel reset */
+    MIX_EBADF,         /* RDMA channel or CM id invalid */
+    MIX_EAGAIN,                /* busy */
+    MIX_EADDRINUSE,    /* port or address in use */
+    MIX_ENETUNREACH,   /* remote address unreachable */
+    MIX_ETIMEDOUT,     /* connection time out */
+    MIX_EAFNOSUPPORT,  /* invalid address */
+    MIX_EPERM,         /* invalid permission */
+    MIX_EALREADY,      /* invalid state */
+    MIX_ECONNREFUSED,  /* connection rejected */
+    MIX_EISCONN,       /* already connected */
+    MIX_EOVERFLOW,     /* length error */
+
+} dat_mix_op_status_t;
+
+/* MIX message header, 8 bytes */
+typedef struct dat_mix_hdr
+{
+       uint8_t         ver;            /* version */
+       uint8_t         op;             /* operation type */
+       uint8_t         flags;          /* operation flags */
+       uint8_t         status;         /* operation status */
+       uint32_t        req_id;         /* operation id, multiple operations */
+
+} dat_mix_hdr_t;
+
+/**** MIX device open  *****/
+typedef struct dat_mix_dev_attr
+{
+       uint8_t         ack_timer;
+       uint8_t         ack_retry;
+       uint8_t         rnr_timer;
+       uint8_t         rnr_retry;
+       uint8_t         global;
+       uint8_t         hop_limit;
+       uint8_t         tclass;
+       uint8_t         sl;
+       uint8_t         mtu;
+       uint8_t         rd_atom_in;
+       uint8_t         rd_atom_out;
+       uint8_t         pkey_idx;
+       uint16_t        pkey;
+       uint16_t        max_inline;
 
-};
+} dat_mix_dev_attr_t;
 
-/* MIX message, 256 bytes */
-typedef struct _dat_mix_msg
+/***** MIX open, device address info returned */
+typedef struct dat_mix_open
 {
-       uint16_t                ver;            /* version */
-       uint16_t                op;             /* operation type */
-       uint32_t                len;            /* operation data length */
-       uint64_t                hdl;            /* handle */
-       uint64_t                ctx;            /* context */
-       uint8_t                 data[232];      /* operation data */
-       
-} dat_mix_msg_t;
+       dat_mix_hdr_t           hdr;
+       char                    name[64];
+       uint16_t                port;           /* ib physical port number */
+       dat_mix_dev_attr_t      dev_attr;
+       dat_mcm_addr_t          dev_addr;
+
+} dat_mix_open_t;
+
+/***** MIX memory registration *****/
+typedef struct dat_mix_mr
+{
+       dat_mix_hdr_t           hdr;
+       uint32_t                mr_id;
+       uint32_t                len;
+       uint64_t                off;
+       uint64_t                ctx;
+
+} dat_mix_mr_t;
+
+/***** MIX listen, status returned, no data *****/
+typedef struct dat_mix_listen
+{
+       dat_mix_hdr_t           hdr;
+       uint16_t                sid;
+       uint16_t                backlog;
+
+} dat_mix_listen_t;
+
+/***** MIX create QP *****/
+typedef struct dat_mix_qp_attr
+{
+       uint8_t         qp_type;
+       uint8_t         state;
+       uint8_t         cur_state;
+       uint8_t         sq_sig_all;
+       uint32_t        qp_num;
+       uint32_t        qkey;
+       uint32_t        max_send_wr;
+       uint32_t        max_recv_wr;
+       uint32_t        max_send_sge;
+       uint32_t        max_recv_sge;
+       uint32_t        max_inline_data;
+       uint32_t        qp_id;
+       uint32_t        scq_id;
+       uint32_t        rcq_id;
+       uint64_t        ctx;
+
+} dat_mix_qp_attr_t;
+
+/*
+ * todo, move posting WR's to aperture windows ??
+ * ok for now since we can post async and queue them up.
+ * For initial prototyping write streams we don't have many
+ * completions. SCIF should be 2x speeds so once we pipeline
+ * it will keep up with IB speeds.
+ */
+typedef struct dat_mix_qp
+{
+       dat_mix_hdr_t           hdr;
+       dat_mix_qp_attr_t       qp_t;   /* on Proxy */
+       dat_mix_qp_attr_t       qp_r;   /* on MIC */
+
+} dat_mix_qp_t;
+
+/***** MIX CQ operations, create, free, poll, event *****/
+/*
+ * todo, move polling WC's and notifications to aperture windows
+ * Might not be needed unless signaling lot's of TX WR's
+ *
+ */
+typedef struct dat_mix_cq
+{
+       dat_mix_hdr_t           hdr;
+       uint64_t                cq_ctx;
+       uint32_t                cq_len;
+       uint32_t                cq_id;
+       uint64_t                wr_id;
+       uint32_t                status;
+       uint32_t                opcode;
+       uint32_t                vendor_err;
+       uint32_t                byte_len;
+       uint32_t                qp_num;
+       uint32_t                src_qp;
+       uint32_t                wc_flags;
+
+} dat_mix_cq_t;
+
+typedef struct dat_mix_cm
+{
+       dat_mix_hdr_t           hdr;
+       uint64_t                cm_ctx;
+       uint32_t                cm_id;
+       dat_mcm_msg_t           msg;
+
+} dat_mix_cm_t;
+
 
 #endif         /* _DAT_MIC_EXTENSIONS_H_ */
index d43a09b35fad63591a52670fe55f0dd15ef7e91c..d7a28640b7064885e20cebfe84c2e05799701004 100755 (executable)
 #define CNO_TIMEOUT       (1000*1000*1)
 #define DTO_FLUSH_TIMEOUT (1000*1000*2)
 #define CONN_TIMEOUT      (1000*1000*100)
-#define SERVER_TIMEOUT    DAT_TIMEOUT_INFINITE
+#define SERVER_TIMEOUT    10000000
 #define RDMA_BUFFER_SIZE  (64)
 
 /* Global DAT vars */