dapl/openib_common/qp.c \
dapl/openib_common/util.c \
dapl/openib_mcm/cm.c \
+ dapl/openib_mcm/mix.c \
dapl/openib_mcm/device.c $(XPROGRAMS)
dapl_udapl_libdaplomcm_la_LDFLAGS = -version-info 2:0:0 $(daplomcm_version_script) \
if (hca_ptr->ib_trans.ib_cq_empty) {
struct ibv_comp_channel *channel;
- channel = hca_ptr->ib_trans.ib_cq_empty->channel;
- ibv_destroy_cq(hca_ptr->ib_trans.ib_cq_empty);
+ channel = hca_ptr->ib_trans.ib_cq_empty->ib_cq->channel;
+ ibv_destroy_cq(hca_ptr->ib_trans.ib_cq_empty->ib_cq);
ibv_destroy_comp_channel(channel);
}
dapls_ib_cq_alloc(IN DAPL_IA * ia_ptr,
IN DAPL_EVD * evd_ptr, IN DAT_COUNT * cqlen)
{
- struct ibv_comp_channel *channel;
- DAT_RETURN ret;
+ struct ibv_comp_channel *channel = NULL;
+ int ret = ENOMEM;
dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
"dapls_ib_cq_alloc: evd %p cqlen=%d \n", evd_ptr, *cqlen);
+ /* create CQ object */
+ evd_ptr->ib_cq_handle = dapl_os_alloc(sizeof(struct dcm_ib_cq));
+ if (!evd_ptr->ib_cq_handle)
+ goto err;
+
+ dapl_os_memzero(evd_ptr->ib_cq_handle, sizeof(struct dcm_ib_cq));
+ evd_ptr->ib_cq_handle->tp = &ia_ptr->hca_ptr->ib_trans;
+ evd_ptr->ib_cq_handle->evd = evd_ptr;
+
if (!evd_ptr->cno_ptr)
channel = ibv_create_comp_channel(ia_ptr->hca_ptr->ib_hca_handle);
else
channel = ia_ptr->hca_ptr->ib_trans.ib_cq;
if (!channel)
- return DAT_INSUFFICIENT_RESOURCES;
+ goto err;
- evd_ptr->ib_cq_handle = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
- *cqlen, evd_ptr, channel, 0);
+ evd_ptr->ib_cq_handle->ib_cq =
+ ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
+ *cqlen, evd_ptr, channel, 0);
- if (evd_ptr->ib_cq_handle == IB_INVALID_HANDLE) {
- ret = DAT_INSUFFICIENT_RESOURCES;
+ if (!evd_ptr->ib_cq_handle->ib_cq)
goto err;
- }
/* arm cq for events */
dapls_set_cq_notify(ia_ptr, evd_ptr);
/* update with returned cq entry size */
- *cqlen = evd_ptr->ib_cq_handle->cqe;
+ *cqlen = evd_ptr->ib_cq_handle->ib_cq->cqe;
+
+#ifdef _OPENIB_MCM_
+ /* shadow support, MPXYD */
+ if (ia_ptr->hca_ptr->ib_trans.scif_ep)
+ ret = dapli_mix_cq_create(evd_ptr->ib_cq_handle);
+ if (ret)
+ goto err;
+#endif
dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
"dapls_ib_cq_alloc: new_cq %p cqlen=%d \n",
return DAT_SUCCESS;
err:
- if (!evd_ptr->cno_ptr)
+ if (evd_ptr->ib_cq_handle)
+ dapl_os_free(evd_ptr->ib_cq_handle, sizeof(struct dcm_ib_cq));
+
+ if (!evd_ptr->cno_ptr && channel)
ibv_destroy_comp_channel(channel);
- return ret;
+
+ return dapl_convert_errno(ret, "cq_allocate" );
}
/*
if (evd_ptr->ib_cq_handle != IB_INVALID_HANDLE) {
/* pull off CQ and EVD entries and toss */
- while (ibv_poll_cq(evd_ptr->ib_cq_handle, 1, &wc) == 1) ;
+ while (ibv_poll_cq(evd_ptr->ib_cq_handle->ib_cq, 1, &wc) == 1) ;
while (dapl_evd_dequeue(evd_ptr, &event) == DAT_SUCCESS) ;
- channel = evd_ptr->ib_cq_handle->channel;
- if (ibv_destroy_cq(evd_ptr->ib_cq_handle))
+ channel = evd_ptr->ib_cq_handle->ib_cq->channel;
+ if (ibv_destroy_cq(evd_ptr->ib_cq_handle->ib_cq))
return (dapl_convert_errno(errno, "ibv_destroy_cq"));
if (!evd_ptr->cno_ptr)
ibv_destroy_comp_channel(channel);
+#ifdef _OPENIB_MCM_
+ /* shadow support, MPXYD */
+ if (ia_ptr->hca_ptr->ib_trans.scif_ep)
+ dapli_mix_cq_free(evd_ptr->ib_cq_handle);
+#endif
+ dapl_os_free(evd_ptr->ib_cq_handle, sizeof(struct dcm_ib_cq));
evd_ptr->ib_cq_handle = IB_INVALID_HANDLE;
}
return DAT_SUCCESS;
DAT_RETURN
dapls_evd_dto_wait(IN DAPL_EVD * evd_ptr, IN uint32_t timeout)
{
- struct ibv_comp_channel *channel = evd_ptr->ib_cq_handle->channel;
+ struct ibv_comp_channel *channel = evd_ptr->ib_cq_handle->ib_cq->channel;
struct ibv_cq *ibv_cq = NULL;
void *context;
int status;
*/
DAT_RETURN dapls_set_cq_notify(IN DAPL_IA * ia_ptr, IN DAPL_EVD * evd_ptr)
{
- if (ibv_req_notify_cq(evd_ptr->ib_cq_handle, 0))
+ if (ibv_req_notify_cq(evd_ptr->ib_cq_handle->ib_cq, 0))
return (dapl_convert_errno(errno, "notify_cq"));
else
return DAT_SUCCESS;
IN DAPL_EVD * evd_ptr,
IN ib_notification_type_t type)
{
- if (ibv_req_notify_cq(evd_ptr->ib_cq_handle, type))
+ if (ibv_req_notify_cq(evd_ptr->ib_cq_handle->ib_cq, type))
return (dapl_convert_errno(errno, "notify_cq_type"));
else
return DAT_SUCCESS;
{
int ret;
- ret = ibv_poll_cq(evd_ptr->ib_cq_handle, 1, wc_ptr);
+ ret = ibv_poll_cq(evd_ptr->ib_cq_handle->ib_cq, 1, wc_ptr);
if (ret == 1)
return DAT_SUCCESS;
#endif /*__cplusplus */
/* Typedefs to map common DAPL provider types to IB verbs */
-typedef struct ibv_qp *ib_qp_handle_t;
-typedef struct ibv_cq *ib_cq_handle_t;
+struct dcm_ib_qp {
+ uint64_t qp_ctx; /* local */
+ uint64_t sqp_ctx; /* shadow */
+ struct _ib_hca_transport *tp;
+ struct dapl_ep *ep;
+ struct ibv_qp *qp; /* local */
+ struct ibv_qp *sqp; /* shadow */
+ uint32_t qp_id; /* local */
+ uint32_t sqp_id; /* shadow */
+};
+
+struct dcm_ib_cq {
+ uint64_t cq_ctx; /* local */
+ uint64_t scq_ctx; /* shadow */
+ struct _ib_hca_transport *tp;
+ struct dapl_evd *evd;
+ struct ibv_cq *ib_cq;
+ struct ibv_comp_channel *ib_ch;
+ uint32_t cq_id; /* local */
+ uint32_t scq_id; /* shadow */
+};
+
+typedef struct dcm_ib_cq *ib_cq_handle_t;
+typedef struct dcm_ib_qp *ib_qp_handle_t;
typedef struct ibv_pd *ib_pd_handle_t;
typedef struct ibv_mr *ib_mr_handle_t;
typedef struct ibv_mw *ib_mw_handle_t;
DAT_RETURN getlocalipaddr(char *addr, int addr_len);
/* qp.c */
-DAT_RETURN dapls_modify_qp_ud(IN DAPL_HCA *hca, IN ib_qp_handle_t qp);
-DAT_RETURN dapls_modify_qp_state(IN ib_qp_handle_t qp_handle,
+DAT_RETURN dapls_modify_qp_ud(IN DAPL_HCA *hca, IN struct ibv_qp *qp);
+DAT_RETURN dapls_modify_qp_state(IN struct ibv_qp *qp_handle,
IN ib_qp_state_t qp_state,
IN uint32_t qpn,
IN uint16_t lid,
IN ib_gid_handle_t gid);
ib_ah_handle_t dapls_create_ah( IN DAPL_HCA *hca,
IN ib_pd_handle_t pd,
- IN ib_qp_handle_t qp,
+ IN struct ibv_qp *qp,
IN uint16_t lid,
IN ib_gid_handle_t gid);
STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p);
#define CQE_WR_TYPE_UD(id) \
- (((DAPL_COOKIE *)(uintptr_t)id)->ep->qp_handle->qp_type == IBV_QPT_UD)
+ (((DAPL_COOKIE *)(uintptr_t)id)->ep->qp_handle->qp->qp_type == IBV_QPT_UD)
/*
* dapls_ib_post_recv
cookie->val.dto.size = total_len;
}
- ret = ibv_post_recv(ep_ptr->qp_handle, &wr, &bad_wr);
+ ret = ibv_post_recv(ep_ptr->qp_handle->qp, &wr, &bad_wr);
if (ret)
return(dapl_convert_errno(errno,"ibv_recv"));
remote_iov, completion_flags);
#ifdef DAT_EXTENSIONS
- if (ep_ptr->qp_handle->qp_type != IBV_QPT_RC)
+ if (ep_ptr->qp_handle->qp->qp_type != IBV_QPT_RC)
return(DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP));
#endif
/* setup the work request */
" post_snd: op 0x%x flags 0x%x sglist %p, %d\n",
wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge);
- ret = ibv_post_send(ep_ptr->qp_handle, &wr, &bad_wr);
+ ret = ibv_post_send(ep_ptr->qp_handle->qp, &wr, &bad_wr);
if (ret)
return(dapl_convert_errno(errno,"ibv_send"));
break;
case OP_SEND_UD:
/* post must be on EP with service_type of UD */
- if (ep_ptr->qp_handle->qp_type != IBV_QPT_UD)
+ if (ep_ptr->qp_handle->qp->qp_type != IBV_QPT_UD)
return(DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP));
dapl_dbg_log(DAPL_DBG_TYPE_EP,
" post_snd: op 0x%x flags 0x%x sglist %p, %d\n",
wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge);
- ret = ibv_post_send(ep_ptr->qp_handle, &wr, &bad_wr);
+ ret = ibv_post_send(ep_ptr->qp_handle->qp, &wr, &bad_wr);
if (ret)
return( dapl_convert_errno(errno,"ibv_send") );
DAPL_EVD *rcv_evd, *req_evd;
ib_cq_handle_t rcv_cq, req_cq;
ib_pd_handle_t ib_pd_handle;
+ int ret = EINVAL;
struct ibv_qp_init_attr qp_create;
#ifdef _OPENIB_CMA_
dp_ib_cm_handle_t conn;
return (dapl_convert_errno(ENOMEM, "create_cq_chan"));
/* Call IB verbs to create CQ */
- rcv_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
- 1, NULL, channel, 0);
+ rcv_cq = dapl_os_alloc(sizeof(struct dcm_ib_cq));
+ if (!rcv_cq)
+ return (dapl_convert_errno(ENOMEM, " alloc cq"));
- if (rcv_cq == IB_INVALID_HANDLE) {
+ dapl_os_memzero(rcv_cq, sizeof(struct dcm_ib_cq));
+
+ rcv_cq->ib_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
+ 1, NULL, channel, 0);
+
+ if (!rcv_cq->ib_cq) {
ibv_destroy_comp_channel(channel);
return (dapl_convert_errno(ENOMEM, "create_cq"));
}
else
req_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;
+ /* create QP object */
+ ep_ptr->qp_handle = dapl_os_alloc(sizeof(struct dcm_ib_qp));
+ if (!ep_ptr->qp_handle)
+ return (dapl_convert_errno(ENOMEM, "create_qp"));
+
+ dapl_os_memzero(ep_ptr->qp_handle, sizeof(struct dcm_ib_qp));
+ ep_ptr->qp_handle->tp = &ia_ptr->hca_ptr->ib_trans;
+ ep_ptr->qp_handle->ep = ep_ptr;
+
/*
* IMPLEMENTATION NOTE:
* uDAPL allows consumers to post buffers on the EP after creation
#ifdef _OPENIB_CMA_
/* Allocate CM and initialize lock */
- if ((conn = dapls_ib_cm_create(ep_ptr)) == NULL)
- return (dapl_convert_errno(ENOMEM, "cm_create"));
-
+ if ((conn = dapls_ib_cm_create(ep_ptr)) == NULL) {
+ ret = ENOMEM;
+ goto err;
+ }
/* open identifies the local device; per DAT specification */
if (rdma_bind_addr(conn->cm_id,
(struct sockaddr *)&ia_ptr->hca_ptr->hca_address)) {
dapls_cm_free(conn);
- return (dapl_convert_errno(EAFNOSUPPORT, "rdma_bind_addr"));
+ ret = EAFNOSUPPORT;
+ goto err;
}
#endif
/* Setup attributes and create qp */
dapl_os_memzero((void *)&qp_create, sizeof(qp_create));
- qp_create.recv_cq = rcv_cq;
+ qp_create.recv_cq = rcv_cq->ib_cq;
qp_create.cap.max_recv_wr = rcv_evd ? attr->max_recv_dtos:0;
qp_create.cap.max_recv_sge = rcv_evd ? attr->max_recv_iov:0;
- qp_create.send_cq = req_cq;
+ qp_create.send_cq = req_cq->ib_cq;
qp_create.cap.max_send_wr = req_evd ? attr->max_request_dtos:0;
qp_create.cap.max_send_sge = req_evd ? attr->max_request_iov:0;
qp_create.cap.max_inline_data =
#ifdef DAT_EXTENSIONS
if (attr->service_type == DAT_IB_SERVICE_TYPE_UD) {
#ifdef _OPENIB_CMA_
- return (DAT_NOT_IMPLEMENTED);
+ goto err;
#endif
qp_create.qp_type = IBV_QPT_UD;
if (attr->max_message_size >
(128 << ia_ptr->hca_ptr->ib_trans.mtu)) {
- return (DAT_INVALID_PARAMETER | DAT_INVALID_ARG6);
+ goto err;
}
}
#endif
#ifdef _OPENIB_CMA_
if (rdma_create_qp(conn->cm_id, ib_pd_handle, &qp_create)) {
dapls_cm_free(conn);
- return (dapl_convert_errno(errno, "rdma_create_qp"));
+ ret = errno;
+ goto err;
}
- ep_ptr->qp_handle = conn->cm_id->qp;
+ ep_ptr->qp_handle->qp = conn->cm_id->qp;
ep_ptr->qp_state = IBV_QPS_INIT;
ep_ptr->param.local_port_qual = rdma_get_src_port(conn->cm_id);
#else
- ep_ptr->qp_handle = ibv_create_qp(ib_pd_handle, &qp_create);
- if (!ep_ptr->qp_handle)
- return (dapl_convert_errno(ENOMEM, "create_qp"));
+ ep_ptr->qp_handle->qp = ibv_create_qp(ib_pd_handle, &qp_create);
+ if (!ep_ptr->qp_handle->qp) {
+ ret = errno;
+ goto err;
+ }
+
+#ifdef _OPENIB_MCM_
+ /* shadow support, MPXYD */
+ ep_ptr->qp_handle->qp_ctx = (uint64_t)ep_ptr;
+ ep_ptr->qp_handle->qp_id = 0; /* ??? */
+ if (ia_ptr->hca_ptr->ib_trans.scif_ep)
+ dapli_mix_qp_create(ep_ptr->qp_handle, &qp_create);
+#endif
/* Setup QP attributes for INIT state on the way out */
- if (dapls_modify_qp_state(ep_ptr->qp_handle,
+ if (dapls_modify_qp_state(ep_ptr->qp_handle->qp,
IBV_QPS_INIT, 0, 0, 0) != DAT_SUCCESS) {
- ibv_destroy_qp(ep_ptr->qp_handle);
- ep_ptr->qp_handle = IB_INVALID_HANDLE;
- return DAT_INTERNAL_ERROR;
+ ibv_destroy_qp(ep_ptr->qp_handle->qp);
+ ret = errno;
+ goto err;
}
#endif
dapl_dbg_log(DAPL_DBG_TYPE_EP,
" qp_alloc: qpn 0x%x type %d sq %d,%d rq %d,%d\n",
- ep_ptr->qp_handle->qp_num, ep_ptr->qp_handle->qp_type,
+ ep_ptr->qp_handle->qp->qp_num,
+ ep_ptr->qp_handle->qp->qp_type,
qp_create.cap.max_send_wr, qp_create.cap.max_send_sge,
qp_create.cap.max_recv_wr, qp_create.cap.max_recv_sge);
return DAT_SUCCESS;
+err:
+ if (ep_ptr->qp_handle)
+ dapl_os_free(ep_ptr->qp_handle, sizeof(struct dcm_ib_qp));
+
+ ep_ptr->qp_handle = IB_INVALID_HANDLE;
+
+ return (dapl_convert_errno(ret, "create_qp"));
}
/*
dapl_os_lock(&ep_ptr->header.lock);
if (ep_ptr->qp_handle != NULL) {
- qp = ep_ptr->qp_handle;
+ qp = ep_ptr->qp_handle->qp;
dapl_os_unlock(&ep_ptr->header.lock);
qp_attr.qp_state = IBV_QPS_ERR;
ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE);
dapls_ep_flush_cqs(ep_ptr);
- ep_ptr->qp_handle = NULL;
#ifdef _OPENIB_CMA_
rdma_destroy_qp(cm_ptr->cm_id);
cm_ptr->cm_id->qp = NULL;
strerror(errno));
}
#endif
+
+#ifdef _OPENIB_MCM_
+ /* shadow support, MPXYD */
+ if (ia_ptr->hca_ptr->ib_trans.scif_ep)
+ dapli_mix_qp_free(ep_ptr->qp_handle);
+
+ /* TODO: flush shadow CQ on MPXYD */
+
+#endif
+ ep_ptr->qp_handle = NULL;
+
} else {
dapl_os_unlock(&ep_ptr->header.lock);
}
+
+
+
+ dapl_os_free(ep_ptr->qp_handle, sizeof(struct dcm_ib_qp));
return DAT_SUCCESS;
}
/* move to error state if necessary */
if ((ep_ptr->qp_state == IB_QP_STATE_ERROR) &&
- (ep_ptr->qp_handle->state != IBV_QPS_ERR)) {
- return (dapls_modify_qp_state(ep_ptr->qp_handle,
+ (ep_ptr->qp_handle->qp->state != IBV_QPS_ERR)) {
+ return (dapls_modify_qp_state(ep_ptr->qp_handle->qp,
IBV_QPS_ERR, 0, 0, 0));
}
/* consumer ep_modify, init state */
- if (ep_ptr->qp_handle->state == IBV_QPS_INIT) {
- return (dapls_modify_qp_state(ep_ptr->qp_handle,
+ if (ep_ptr->qp_handle->qp->state == IBV_QPS_INIT) {
+ return (dapls_modify_qp_state(ep_ptr->qp_handle->qp,
IBV_QPS_INIT, 0, 0, 0));
}
/*
* Check if we have the right qp_state to modify attributes
*/
- if ((ep_ptr->qp_handle->state != IBV_QPS_RTR) &&
- (ep_ptr->qp_handle->state != IBV_QPS_RTS))
+ if ((ep_ptr->qp_handle->qp->state != IBV_QPS_RTR) &&
+ (ep_ptr->qp_handle->qp->state != IBV_QPS_RTS))
return DAT_INVALID_STATE;
/* Adjust to current EP attributes */
qp_attr.cap.max_recv_sge = attr->max_recv_iov;
dapl_dbg_log(DAPL_DBG_TYPE_EP,
- "modify_qp: qp %p sq %d,%d, rq %d,%d\n",
+ "modify_qp: qp_h %p sq %d,%d, rq %d,%d\n",
ep_ptr->qp_handle,
qp_attr.cap.max_send_wr, qp_attr.cap.max_send_sge,
qp_attr.cap.max_recv_wr, qp_attr.cap.max_recv_sge);
- if (ibv_modify_qp(ep_ptr->qp_handle, &qp_attr, IBV_QP_CAP)) {
+ if (ibv_modify_qp(ep_ptr->qp_handle->qp, &qp_attr, IBV_QP_CAP)) {
dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- "modify_qp: modify ep %p qp %p failed\n",
- ep_ptr, ep_ptr->qp_handle);
+ "modify_qp: modify ep %p qp_h %p failed\n",
+ ep_ptr, ep_ptr->qp_handle->qp);
return (dapl_convert_errno(errno, "modify_qp_state"));
}
void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr)
{
if (ep_ptr->qp_handle != IB_INVALID_HANDLE &&
- ep_ptr->qp_handle->qp_type != IBV_QPT_UD) {
+ ep_ptr->qp_handle->qp->qp_type != IBV_QPT_UD) {
/* move to RESET state and then to INIT */
- dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_RESET,0,0,0);
- dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_INIT,0,0,0);
+ dapls_modify_qp_state(ep_ptr->qp_handle->qp, IBV_QPS_RESET,0,0,0);
+ dapls_modify_qp_state(ep_ptr->qp_handle->qp, IBV_QPS_INIT,0,0,0);
}
}
#endif // _WIN32 || _WIN64
* CM msg provides QP attributes, info in network order
*/
DAT_RETURN
-dapls_modify_qp_state(IN ib_qp_handle_t qp_handle,
+dapls_modify_qp_state(IN struct ibv_qp *qp_handle,
IN ib_qp_state_t qp_state,
IN uint32_t qpn,
IN uint16_t lid,
dapl_dbg_log(DAPL_DBG_TYPE_EP,
" QPS_RTR: type %d qpn 0x%x gid %p (%d) lid 0x%x"
" port %d ep %p qp_state %d rd_atomic %d\n",
- qp_handle->qp_type, ntohl(qpn), gid,
+ qp_handle->qp_type, ntohl(qpn), gid,
ia_ptr->hca_ptr->ib_trans.global,
ntohs(lid), ia_ptr->hca_ptr->port_num,
ep_ptr, ep_ptr->qp_state,
/* Modify UD type QP from init, rtr, rts, info network order */
DAT_RETURN
-dapls_modify_qp_ud(IN DAPL_HCA *hca, IN ib_qp_handle_t qp)
+dapls_modify_qp_ud(IN DAPL_HCA *hca, IN struct ibv_qp *qp)
{
struct ibv_qp_attr qp_attr;
qp_attr.pkey_index = hca->ib_trans.pkey_idx;
qp_attr.port_num = hca->port_num;
qp_attr.qkey = DAT_UD_QKEY;
- if (ibv_modify_qp(qp, &qp_attr,
+ if (ibv_modify_qp(qp, &qp_attr,
IBV_QP_STATE |
IBV_QP_PKEY_INDEX |
IBV_QP_PORT |
dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
qp_attr.qp_state = IBV_QPS_RTS;
qp_attr.sq_psn = 1;
- if (ibv_modify_qp(qp, &qp_attr,
+ if (ibv_modify_qp(qp, &qp_attr,
IBV_QP_STATE | IBV_QP_SQ_PSN)) {
dapl_log(DAPL_DBG_TYPE_ERR,
" modify_ud_qp RTS: ERR %s\n", strerror(errno));
ib_ah_handle_t
dapls_create_ah(IN DAPL_HCA *hca,
IN ib_pd_handle_t pd,
- IN ib_qp_handle_t qp,
+ IN struct ibv_qp *qp,
IN uint16_t lid,
IN ib_gid_handle_t gid)
{
switch (event.event_type) {
case IBV_EVENT_CQ_ERR:
{
- struct dapl_ep *evd_ptr =
+ struct dapl_evd *evd_ptr =
event.element.cq->cq_context;
dapl_log(DAPL_DBG_TYPE_ERR,
/* report up if async callback still setup */
if (hca->async_cq_error)
hca->async_cq_error(hca->ib_ctx,
- event.element.cq,
+ evd_ptr->ib_cq_handle,
&event,
(void *)evd_ptr);
break;
#include "dapl_ep_util.h"
#include "dapl_osd.h"
-
-#if defined(_WIN32)
-#include <rdma\winverbs.h>
-#else // _WIN32
enum DAPL_FD_EVENTS {
DAPL_FD_READ = POLLIN,
DAPL_FD_WRITE = POLLOUT,
static int dapl_fd_set(DAPL_SOCKET s, struct dapl_fd_set *set,
enum DAPL_FD_EVENTS event)
{
+ if (!s)
+ return 0;
+
if (set->index == DAPL_FD_SETSIZE - 1) {
dapl_log(DAPL_DBG_TYPE_ERR,
"SCM ERR: cm_thread exceeded FD_SETSIZE %d\n",
struct pollfd fds;
int ret;
+ if (!s)
+ return 0;
+
fds.fd = s;
fds.events = event;
fds.revents = 0;
dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: wakeup, ret=0x%x\n", ret);
return ret;
}
-#endif
/* forward declarations */
-static int ucm_reply(dp_ib_cm_handle_t cm);
-static void ucm_accept(ib_cm_srvc_handle_t cm, ib_cm_msg_t *msg);
-static void ucm_connect_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg);
-static void ucm_accept_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg);
-static int ucm_send(ib_hca_transport_t *tp, ib_cm_msg_t *msg, DAT_PVOID p_data, DAT_COUNT p_size);
-static void ucm_disconnect_final(dp_ib_cm_handle_t cm);
+static int mcm_reply(dp_ib_cm_handle_t cm);
+static void mcm_accept(ib_cm_srvc_handle_t cm, dat_mcm_msg_t *msg);
+static void mcm_connect_rtu(dp_ib_cm_handle_t cm, dat_mcm_msg_t *msg);
+static void mcm_accept_rtu(dp_ib_cm_handle_t cm, dat_mcm_msg_t *msg);
+static int mcm_send(ib_hca_transport_t *tp, dat_mcm_msg_t *msg, DAT_PVOID p_data, DAT_COUNT p_size);
+static void mcm_disconnect_final(dp_ib_cm_handle_t cm);
DAT_RETURN dapli_cm_disconnect(dp_ib_cm_handle_t cm);
DAT_RETURN dapli_cm_connect(DAPL_EP *ep, dp_ib_cm_handle_t cm);
/* Service ids - port space */
-static uint16_t ucm_get_port(ib_hca_transport_t *tp, uint16_t port)
+static uint16_t mcm_get_port(ib_hca_transport_t *tp, uint16_t port)
{
int i = 0;
return i;
}
-static void ucm_free_port(ib_hca_transport_t *tp, uint16_t port)
+static void mcm_free_port(ib_hca_transport_t *tp, uint16_t port)
{
dapl_os_lock(&tp->plock);
tp->sid[port] = 0;
dapl_os_unlock(&tp->plock);
}
-static void ucm_check_timers(dp_ib_cm_handle_t cm, int *timer)
+static void mcm_check_timers(dp_ib_cm_handle_t cm, int *timer)
{
DAPL_OS_TIMEVAL time;
" CM_REQ retry %p %d [lid, port, cqp, iqp]:"
" %x %x %x %x -> %x %x %x %x Time(ms) %d > %d\n",
cm, cm->retries+1,
- ntohs(cm->msg.saddr.ib.lid), ntohs(cm->msg.sport),
- ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr.ib.qpn),
- ntohs(cm->msg.daddr.ib.lid), ntohs(cm->msg.dport),
- ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr.ib.qpn),
+ ntohs(cm->msg.saddr.lid), ntohs(cm->msg.sport),
+ ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr.qpn),
+ ntohs(cm->msg.daddr.lid), ntohs(cm->msg.dport),
+ ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr.qpn),
(time - cm->timer)/1000,
cm->hca->ib_trans.rep_time << cm->retries);
cm->retries++;
" %x %x %x %x -> %x %x %x %x r_pid %x Time(ms) %d > %d\n",
cm->retries+1,
dapl_cm_op_str(ntohs(cm->msg.op)),
- ntohs(cm->msg.saddr.ib.lid), ntohs(cm->msg.sport),
- ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr.ib.qpn),
- ntohs(cm->msg.daddr.ib.lid), ntohs(cm->msg.dport),
- ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr.ib.qpn),
+ ntohs(cm->msg.saddr.lid), ntohs(cm->msg.sport),
+ ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr.qpn),
+ ntohs(cm->msg.daddr.lid), ntohs(cm->msg.dport),
+ ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr.qpn),
ntohl(cm->msg.d_id),
(time - cm->timer)/1000,
cm->hca->ib_trans.rtu_time << cm->retries);
cm->retries++;
DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_ERR_REP_RETRY);
dapl_os_unlock(&cm->lock);
- ucm_reply(cm);
+ mcm_reply(cm);
return;
}
break;
" CM_DREQ retry %d [lid, port, cqp, iqp]:"
" %x %x %x %x -> %x %x %x %x r_pid %x Time(ms) %d > %d\n",
cm->retries+1,
- ntohs(cm->msg.saddr.ib.lid), ntohs(cm->msg.sport),
- ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr.ib.qpn),
- ntohs(cm->msg.daddr.ib.lid), ntohs(cm->msg.dport),
- ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr.ib.qpn),
+ ntohs(cm->msg.saddr.lid), ntohs(cm->msg.sport),
+ ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr.qpn),
+ ntohs(cm->msg.daddr.lid), ntohs(cm->msg.dport),
+ ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr.qpn),
ntohl(cm->msg.d_id),
(time - cm->timer)/1000,
cm->hca->ib_trans.rtu_time << cm->retries);
/* SEND CM MESSAGE PROCESSING */
/* Get CM UD message from send queue, called with s_lock held */
-static ib_cm_msg_t *ucm_get_smsg(ib_hca_transport_t *tp)
+static dat_mcm_msg_t *mcm_get_smsg(ib_hca_transport_t *tp)
{
- ib_cm_msg_t *msg = NULL;
+ dat_mcm_msg_t *msg = NULL;
int ret, polled = 1, hd = tp->s_hd;
hd++;
msg = NULL;
if (polled % 1000000 == 0)
dapl_log(DAPL_DBG_TYPE_WARN,
- " ucm_get_smsg: FULLq hd %d == tl %d,"
+ " mcm_get_smsg: FULLq hd %d == tl %d,"
" completions stalled, polls=%d\n",
hd, tp->s_tl, polled);
}
if (msg == NULL) {
struct ibv_wc wc;
- /* process completions, based on UCM_TX_BURST */
+ /* process completions, based on mcm_TX_BURST */
ret = ibv_poll_cq(tp->scq, 1, &wc);
if (ret < 0) {
dapl_log(DAPL_DBG_TYPE_WARN,
/* RECEIVE CM MESSAGE PROCESSING */
-static int ucm_post_rmsg(ib_hca_transport_t *tp, ib_cm_msg_t *msg)
+static int mcm_post_rmsg(ib_hca_transport_t *tp, dat_mcm_msg_t *msg)
{
struct ibv_recv_wr recv_wr, *recv_err;
struct ibv_sge sge;
recv_wr.sg_list = &sge;
recv_wr.num_sge = 1;
recv_wr.wr_id = (uint64_t)(uintptr_t) msg;
- sge.length = sizeof(ib_cm_msg_t) + sizeof(struct ibv_grh);
+ sge.length = sizeof(dat_mcm_msg_t) + sizeof(struct ibv_grh);
sge.lkey = tp->mr_rbuf->lkey;
sge.addr = (uintptr_t)((char *)msg - sizeof(struct ibv_grh));
return (ibv_post_recv(tp->qp, &recv_wr, &recv_err));
}
-static int ucm_reject(ib_hca_transport_t *tp, ib_cm_msg_t *msg)
+static int mcm_reject(ib_hca_transport_t *tp, dat_mcm_msg_t *msg)
{
- ib_cm_msg_t smsg;
+ dat_mcm_msg_t smsg;
/* setup op, rearrange the src, dst cm and addr info */
(void)dapl_os_memzero(&smsg, sizeof(smsg));
smsg.sport = msg->dport;
smsg.sqpn = msg->dqpn;
- dapl_os_memcpy(&smsg.daddr, &msg->saddr, sizeof(union dcm_addr));
+ dapl_os_memcpy(&smsg.daddr, &msg->saddr, sizeof(dat_mcm_addr_t));
/* no dst_addr IB info in REQ, init lid, gid, get type from saddr */
- smsg.saddr.ib.lid = tp->addr.ib.lid;
- smsg.saddr.ib.qp_type = msg->saddr.ib.qp_type;
- dapl_os_memcpy(&smsg.saddr.ib.gid[0],
- &tp->addr.ib.gid, 16);
+ smsg.saddr.lid = tp->addr.lid;
+ smsg.saddr.qp_type = msg->saddr.qp_type;
+ dapl_os_memcpy(&smsg.saddr.gid[0],
+ &tp->addr.gid, 16);
- dapl_os_memcpy(&smsg.saddr, &msg->daddr, sizeof(union dcm_addr));
+ dapl_os_memcpy(&smsg.saddr, &msg->daddr, sizeof(dat_mcm_addr_t));
dapl_dbg_log(DAPL_DBG_TYPE_CM,
" CM reject -> LID %x, QPN %x PORT %x\n",
- ntohs(smsg.daddr.ib.lid),
+ ntohs(smsg.daddr.lid),
ntohl(smsg.dqpn), ntohs(smsg.dport));
DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&tp->hca->ia_list_head)), DCNT_IA_CM_ERR_REJ_TX);
- return (ucm_send(tp, &smsg, NULL, 0));
+ return (mcm_send(tp, &smsg, NULL, 0));
}
-static void ucm_process_recv(ib_hca_transport_t *tp,
- ib_cm_msg_t *msg,
+static void mcm_process_recv(ib_hca_transport_t *tp,
+ dat_mcm_msg_t *msg,
dp_ib_cm_handle_t cm)
{
dapl_os_lock(&cm->lock);
switch (cm->state) {
case DCM_LISTEN: /* passive */
dapl_os_unlock(&cm->lock);
- ucm_accept(cm, msg);
+ mcm_accept(cm, msg);
break;
case DCM_RTU_PENDING: /* passive */
dapl_os_unlock(&cm->lock);
- ucm_accept_rtu(cm, msg);
+ mcm_accept_rtu(cm, msg);
break;
case DCM_REP_PENDING: /* active */
dapl_os_unlock(&cm->lock);
- ucm_connect_rtu(cm, msg);
+ mcm_connect_rtu(cm, msg);
break;
case DCM_CONNECTED: /* active and passive */
/* DREQ, change state and process */
" %x %x %x %x -> %x %x %x %x r_pid %x\n",
dapl_cm_op_str(ntohs(cm->msg.op)),
dapl_cm_state_str(cm->state),
- ntohs(cm->msg.saddr.ib.lid), ntohs(cm->msg.sport),
- ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr.ib.qpn),
- ntohs(cm->msg.daddr.ib.lid), ntohs(cm->msg.dport),
- ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr.ib.qpn),
+ ntohs(cm->msg.saddr.lid), ntohs(cm->msg.sport),
+ ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr.qpn),
+ ntohs(cm->msg.daddr.lid), ntohs(cm->msg.dport),
+ ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr.qpn),
ntohl(cm->msg.d_id));
cm->msg.op = htons(DCM_RTU);
- ucm_send(&cm->hca->ib_trans, &cm->msg, NULL, 0);
+ mcm_send(&cm->hca->ib_trans, &cm->msg, NULL, 0);
DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_ERR_RTU_RETRY);
}
case DCM_DISC_PENDING: /* active and passive */
/* DREQ or DREP, finalize */
dapl_os_unlock(&cm->lock);
- ucm_disconnect_final(cm);
+ mcm_disconnect_final(cm);
break;
case DCM_DISCONNECTED:
case DCM_FREE:
" %x %x %x -> %x %x %x\n",
dapl_cm_op_str(ntohs(msg->op)),
dapl_cm_state_str(cm->state),
- ntohs(msg->saddr.ib.lid),
+ ntohs(msg->saddr.lid),
ntohs(msg->sport),
- ntohl(msg->saddr.ib.qpn),
- ntohs(msg->daddr.ib.lid),
+ ntohl(msg->saddr.qpn),
+ ntohs(msg->daddr.lid),
ntohs(msg->dport),
- ntohl(msg->daddr.ib.qpn));
+ ntohl(msg->daddr.qpn));
cm->msg.op = htons(DCM_DREP);
- ucm_send(&cm->hca->ib_trans, &cm->msg, NULL, 0);
+ mcm_send(&cm->hca->ib_trans, &cm->msg, NULL, 0);
DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_ERR_DREP_RETRY);
} else if (ntohs(msg->op) != DCM_DREP){
/* DREP ok to ignore, any other print warning */
dapl_log(DAPL_DBG_TYPE_WARN,
- " ucm_recv: UNEXPECTED MSG on cm %p"
+ " mcm_recv: UNEXPECTED MSG on cm %p"
" <- op %s, st %s spsp %x sqpn %x\n",
cm, dapl_cm_op_str(ntohs(msg->op)),
dapl_cm_state_str(cm->state),
}
default:
dapl_log(DAPL_DBG_TYPE_WARN,
- " ucm_recv: Warning, UNKNOWN state"
+ " mcm_recv: Warning, UNKNOWN state"
" <- op %s, %s spsp %x sqpn %x slid %x\n",
dapl_cm_op_str(ntohs(msg->op)),
dapl_cm_state_str(cm->state),
ntohs(msg->sport), ntohl(msg->sqpn),
- ntohs(msg->saddr.ib.lid));
+ ntohs(msg->saddr.lid));
dapl_os_unlock(&cm->lock);
break;
}
}
/* Find matching CM object for this receive message, return CM reference, timer */
-dp_ib_cm_handle_t ucm_cm_find(ib_hca_transport_t *tp, ib_cm_msg_t *msg)
+dp_ib_cm_handle_t mcm_cm_find(ib_hca_transport_t *tp, dat_mcm_msg_t *msg)
{
dp_ib_cm_handle_t cm = NULL, next, found = NULL;
struct dapl_llist_entry **list;
if (!listenq &&
cm->msg.sport == msg->dport && cm->msg.sqpn == msg->dqpn &&
cm->msg.dport == msg->sport && cm->msg.dqpn == msg->sqpn &&
- cm->msg.daddr.ib.lid == msg->saddr.ib.lid) {
+ cm->msg.daddr.lid == msg->saddr.lid) {
if (ntohs(msg->op) != DCM_REQ) {
found = cm;
break;
cm, dapl_cm_op_str(ntohs(msg->op)),
dapl_cm_op_str(ntohs(cm->msg.op)),
dapl_cm_state_str(cm->state),
- ntohs(cm->msg.daddr.ib.lid), ntohs(cm->msg.dport),
- ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr.ib.qpn),
- ntohs(msg->saddr.ib.lid), ntohs(msg->sport),
- ntohl(msg->sqpn), ntohl(msg->saddr.ib.qpn),
- ntohs(msg->daddr.ib.lid), ntohs(msg->dport),
- ntohl(msg->dqpn), ntohl(msg->daddr.ib.qpn),
- ntohs(cm->msg.saddr.ib.lid), ntohs(cm->msg.sport),
- ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr.ib.qpn));
+ ntohs(cm->msg.daddr.lid), ntohs(cm->msg.dport),
+ ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr.qpn),
+ ntohs(msg->saddr.lid), ntohs(msg->sport),
+ ntohl(msg->sqpn), ntohl(msg->saddr.qpn),
+ ntohs(msg->daddr.lid), ntohs(msg->dport),
+ ntohl(msg->dqpn), ntohl(msg->daddr.qpn),
+ ntohs(cm->msg.saddr.lid), ntohs(cm->msg.sport),
+ ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr.qpn));
DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_ERR_REQ_DUP);
/* not match on listenq for valid request, send reject */
if (ntohs(msg->op) == DCM_REQ && !found) {
dapl_log(DAPL_DBG_TYPE_WARN,
- " ucm_recv: NO LISTENER for %s %x %x i%x c%x"
+ " mcm_recv: NO LISTENER for %s %x %x i%x c%x"
" < %x %x %x, sending reject\n",
dapl_cm_op_str(ntohs(msg->op)),
- ntohs(msg->daddr.ib.lid), ntohs(msg->dport),
- ntohl(msg->daddr.ib.qpn), ntohl(msg->sqpn),
- ntohs(msg->saddr.ib.lid), ntohs(msg->sport),
- ntohl(msg->saddr.ib.qpn));
+ ntohs(msg->daddr.lid), ntohs(msg->dport),
+ ntohl(msg->daddr.qpn), ntohl(msg->sqpn),
+ ntohs(msg->saddr.lid), ntohs(msg->sport),
+ ntohl(msg->saddr.qpn));
- ucm_reject(tp, msg);
+ mcm_reject(tp, msg);
}
if (!found) {
" NO MATCH: op %s [lid, port, cqp, iqp, pid]:"
" %x %x %x %x %x <- %x %x %x %x l_pid %x r_pid %x\n",
dapl_cm_op_str(ntohs(msg->op)),
- ntohs(msg->daddr.ib.lid), ntohs(msg->dport),
- ntohl(msg->dqpn), ntohl(msg->daddr.ib.qpn),
- ntohl(msg->d_id), ntohs(msg->saddr.ib.lid),
+ ntohs(msg->daddr.lid), ntohs(msg->dport),
+ ntohl(msg->dqpn), ntohl(msg->daddr.qpn),
+ ntohl(msg->d_id), ntohs(msg->saddr.lid),
ntohs(msg->sport), ntohl(msg->sqpn),
- ntohl(msg->saddr.ib.qpn), ntohl(msg->s_id),
+ ntohl(msg->saddr.qpn), ntohl(msg->s_id),
ntohl(msg->d_id));
if (ntohs(msg->op) == DCM_DREP) {
}
/* Get rmsgs from CM completion queue, 10 at a time */
-static void ucm_recv(ib_hca_transport_t *tp)
+static void mcm_recv(ib_hca_transport_t *tp)
{
struct ibv_wc wc[10];
- ib_cm_msg_t *msg;
+ dat_mcm_msg_t *msg;
dp_ib_cm_handle_t cm;
int i, ret, notify = 0;
struct ibv_cq *ibv_cq = NULL;
notify = 0;
for (i = 0; i < ret; i++) {
- msg = (ib_cm_msg_t*) (uintptr_t) wc[i].wr_id;
+ msg = (dat_mcm_msg_t*) (uintptr_t) wc[i].wr_id;
dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " ucm_recv: stat=%d op=%s ln=%d id=%p sqp=%x\n",
+ " mcm_recv: stat=%d op=%s ln=%d id=%p sqp=%x\n",
wc[i].status, dapl_cm_op_str(ntohs(msg->op)),
wc[i].byte_len,
(void*)wc[i].wr_id, wc[i].src_qp);
/* validate CM message, version */
if (ntohs(msg->ver) < DCM_VER_MIN) {
dapl_log(DAPL_DBG_TYPE_WARN,
- " ucm_recv: UNKNOWN msg %p, ver %d\n",
+ " mcm_recv: UNKNOWN msg %p, ver %d\n",
msg, msg->ver);
- ucm_post_rmsg(tp, msg);
+ mcm_post_rmsg(tp, msg);
continue;
}
- if (!(cm = ucm_cm_find(tp, msg))) {
- ucm_post_rmsg(tp, msg);
+ if (!(cm = mcm_cm_find(tp, msg))) {
+ mcm_post_rmsg(tp, msg);
continue;
}
/* match, process it */
- ucm_process_recv(tp, msg, cm);
- ucm_post_rmsg(tp, msg);
+ mcm_process_recv(tp, msg, cm);
+ mcm_post_rmsg(tp, msg);
}
/* finished this batch of WC's, poll and rearm */
}
/* ACTIVE/PASSIVE: build and send CM message out of CM object */
-static int ucm_send(ib_hca_transport_t *tp, ib_cm_msg_t *msg, DAT_PVOID p_data, DAT_COUNT p_size)
+static int mcm_send(ib_hca_transport_t *tp, dat_mcm_msg_t *msg, DAT_PVOID p_data, DAT_COUNT p_size)
{
- ib_cm_msg_t *smsg = NULL;
+ dat_mcm_msg_t *smsg = NULL;
struct ibv_send_wr wr, *bad_wr;
struct ibv_sge sge;
int len, ret = -1;
- uint16_t dlid = ntohs(msg->daddr.ib.lid);
+ uint16_t dlid = ntohs(msg->daddr.lid);
/* Get message from send queue, copy data, and send */
dapl_os_lock(&tp->slock);
- if ((smsg = ucm_get_smsg(tp)) == NULL) {
+ if ((smsg = mcm_get_smsg(tp)) == NULL) {
dapl_log(DAPL_DBG_TYPE_ERR,
- " ucm_send ERR: get_smsg(hd=%d,tl=%d) \n",
+ " mcm_send ERR: get_smsg(hd=%d,tl=%d) \n",
tp->s_hd, tp->s_tl);
goto bail;
}
sge.addr = (uintptr_t)smsg;
dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " ucm_send: op %s ln %d lid %x c_qpn %x rport %x\n",
+ " mcm_send: op %s ln %d lid %x c_qpn %x rport %x\n",
dapl_cm_op_str(ntohs(smsg->op)),
- sge.length, htons(smsg->daddr.ib.lid),
+ sge.length, htons(smsg->daddr.lid),
htonl(smsg->dqpn), htons(smsg->dport));
/* empty slot, then create AH */
ret = ibv_post_send(tp->qp, &wr, &bad_wr);
if (ret) {
dapl_log(DAPL_DBG_TYPE_ERR,
- " ucm_send ERR: post_send() %s\n",
+ " mcm_send ERR: post_send() %s\n",
strerror(errno) );
}
}
/* client, release local conn id port */
if (!cm->sp && cm->msg.sport)
- ucm_free_port(&cm->hca->ib_trans, ntohs(cm->msg.sport));
+ mcm_free_port(&cm->hca->ib_trans, ntohs(cm->msg.sport));
/* clean up any UD address handles */
if (cm->ah) {
if (ep) {
DAPL_HCA *hca = ep->header.owner_ia->hca_ptr;
- cm->msg.sport = htons(ucm_get_port(&hca->ib_trans, 0));
+ cm->msg.sport = htons(mcm_get_port(&hca->ib_trans, 0));
if (!cm->msg.sport) {
dapl_os_wait_object_destroy(&cm->f_event);
dapl_os_wait_object_destroy(&cm->d_event);
/* IB info in network order */
cm->msg.sqpn = htonl(hca->ib_trans.qp->qp_num); /* ucm */
- cm->msg.saddr.ib.qpn = htonl(ep->qp_handle->qp_num); /* ep */
- cm->msg.saddr.ib.qp_type = ep->qp_handle->qp_type;
- cm->msg.saddr.ib.lid = hca->ib_trans.addr.ib.lid;
- dapl_os_memcpy(&cm->msg.saddr.ib.gid[0],
- &hca->ib_trans.addr.ib.gid, 16);
+ cm->msg.saddr.qpn = htonl(ep->qp_handle->qp->qp_num); /* ep */
+ cm->msg.saddr.qp_type = ep->qp_handle->qp->qp_type;
+ cm->msg.saddr.lid = hca->ib_trans.addr.lid;
+ dapl_os_memcpy(&cm->msg.saddr.gid[0],
+ &hca->ib_trans.addr.gid, 16);
}
return cm;
bail:
dapls_cm_release(cm);
}
-static void ucm_disconnect_final(dp_ib_cm_handle_t cm)
+static void mcm_disconnect_final(dp_ib_cm_handle_t cm)
{
/* no EP attachment or not RC, nothing to process */
if (cm->ep == NULL ||
switch (cm->state) {
case DCM_CONNECTED:
/* CONSUMER: move to err state to flush, if not UD */
- if (cm->ep->qp_handle->qp_type != IBV_QPT_UD)
- dapls_modify_qp_state(cm->ep->qp_handle, IBV_QPS_ERR,0,0,0);
+ if (cm->ep->qp_handle->qp->qp_type != IBV_QPT_UD)
+ dapls_modify_qp_state(cm->ep->qp_handle->qp, IBV_QPS_ERR,0,0,0);
/* send DREQ, event after DREP or DREQ timeout */
cm->state = DCM_DISC_PENDING;
dapl_log(DAPL_DBG_TYPE_ERR,
" CM_DREQ: RETRIES EXHAUSTED:"
" %x %x %x -> %x %x %x\n",
- htons(cm->msg.saddr.ib.lid),
- htonl(cm->msg.saddr.ib.qpn),
+ htons(cm->msg.saddr.lid),
+ htonl(cm->msg.saddr.qpn),
htons(cm->msg.sport),
- htons(cm->msg.daddr.ib.lid),
+ htons(cm->msg.daddr.lid),
htonl(cm->msg.dqpn),
htons(cm->msg.dport));
finalize = 1;
break;
case DCM_DISC_RECV:
/* CM_THREAD: move to err state to flush, if not UD */
- if (cm->ep->qp_handle->qp_type != IBV_QPT_UD)
- dapls_modify_qp_state(cm->ep->qp_handle, IBV_QPS_ERR,0,0,0);
+ if (cm->ep->qp_handle->qp->qp_type != IBV_QPT_UD)
+ dapls_modify_qp_state(cm->ep->qp_handle->qp, IBV_QPS_ERR,0,0,0);
/* DREQ received, send DREP and schedule event, finalize */
cm->msg.op = htons(DCM_DREP);
" disconnect UNKNOWN state: ep %p cm %p %s %s"
" %x %x %x %s %x %x %x r_id %x l_id %x\n",
cm->ep, cm,
- cm->msg.saddr.ib.qp_type == IBV_QPT_RC ? "RC" : "UD",
+ cm->msg.saddr.qp_type == IBV_QPT_RC ? "RC" : "UD",
dapl_cm_state_str(cm->state),
- ntohs(cm->msg.saddr.ib.lid),
+ ntohs(cm->msg.saddr.lid),
ntohs(cm->msg.sport),
- ntohl(cm->msg.saddr.ib.qpn),
+ ntohl(cm->msg.saddr.qpn),
cm->sp ? "<-" : "->",
- ntohs(cm->msg.daddr.ib.lid),
+ ntohs(cm->msg.daddr.lid),
ntohs(cm->msg.dport),
- ntohl(cm->msg.daddr.ib.qpn),
+ ntohl(cm->msg.daddr.qpn),
ntohl(cm->msg.d_id),
ntohl(cm->msg.s_id));
}
dapl_os_get_time(&cm->timer); /* reply expected */
- ucm_send(&cm->hca->ib_trans, &cm->msg, NULL, 0);
+ mcm_send(&cm->hca->ib_trans, &cm->msg, NULL, 0);
dapl_os_unlock(&cm->lock);
if (wakeup)
dapls_thread_signal(&cm->hca->ib_trans.signal);
if (finalize)
- ucm_disconnect_final(cm);
+ mcm_disconnect_final(cm);
return DAT_SUCCESS;
}
dapl_log(DAPL_DBG_TYPE_EP,
" connect: lid %x i_qpn %x lport %x p_sz=%d -> "
" lid %x c_qpn %x rport %x\n",
- htons(cm->msg.saddr.ib.lid), htonl(cm->msg.saddr.ib.qpn),
+ htons(cm->msg.saddr.lid), htonl(cm->msg.saddr.qpn),
htons(cm->msg.sport), htons(cm->msg.p_size),
- htons(cm->msg.daddr.ib.lid), htonl(cm->msg.dqpn),
+ htons(cm->msg.daddr.lid), htonl(cm->msg.dqpn),
htons(cm->msg.dport));
dapl_os_lock(&cm->lock);
dapl_log(DAPL_DBG_TYPE_ERR,
" CM_REQ: RETRIES EXHAUSTED:"
" 0x%x %x 0x%x -> 0x%x %x 0x%x\n",
- htons(cm->msg.saddr.ib.lid),
- htonl(cm->msg.saddr.ib.qpn),
+ htons(cm->msg.saddr.lid),
+ htonl(cm->msg.saddr.qpn),
htons(cm->msg.sport),
- htons(cm->msg.daddr.ib.lid),
+ htons(cm->msg.daddr.lid),
htonl(cm->msg.dqpn),
htons(cm->msg.dport));
cm->state = DCM_REP_PENDING;
cm->msg.op = htons(DCM_REQ);
dapl_os_get_time(&cm->timer); /* reset reply timer */
- if (ucm_send(&cm->hca->ib_trans, &cm->msg,
+ if (mcm_send(&cm->hca->ib_trans, &cm->msg,
&cm->msg.p_data, ntohs(cm->msg.p_size))) {
dapl_os_unlock(&cm->lock);
goto bail;
DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_ERR);
dapl_log(DAPL_DBG_TYPE_WARN,
" connect: snd ERR -> cm_lid %x cm_qpn %x r_psp %x p_sz=%d\n",
- htons(cm->msg.daddr.ib.lid),
+ htons(cm->msg.daddr.lid),
htonl(cm->msg.dqpn), htons(cm->msg.dport),
htons(cm->msg.p_size));
/*
* ACTIVE: exchange QP information, called from CR thread
*/
-static void ucm_connect_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg)
+static void mcm_connect_rtu(dp_ib_cm_handle_t cm, dat_mcm_msg_t *msg)
{
DAPL_EP *ep = cm->ep;
ib_cm_events_t event = IB_CME_CONNECTED;
" op %s, st %s <- lid %x sqpn %x sport %x\n",
dapl_cm_op_str(ntohs(msg->op)),
dapl_cm_state_str(cm->state),
- ntohs(msg->saddr.ib.lid), ntohl(msg->saddr.ib.qpn),
+ ntohs(msg->saddr.lid), ntohl(msg->saddr.qpn),
ntohs(msg->sport));
dapl_os_unlock(&cm->lock);
return;
/* save remote address information to EP and CM */
cm->msg.d_id = msg->s_id;
dapl_os_memcpy(&ep->remote_ia_address,
- &msg->saddr, sizeof(union dcm_addr));
+ &msg->saddr, sizeof(dat_mcm_addr_t));
dapl_os_memcpy(&cm->msg.daddr,
- &msg->saddr, sizeof(union dcm_addr));
+ &msg->saddr, sizeof(dat_mcm_addr_t));
/* validate private data size, and copy if necessary */
if (msg->p_size) {
" st %s <- lid %x sqpn %x spsp %x\n",
ntohs(msg->p_size),
dapl_cm_state_str(cm->state),
- ntohs(msg->saddr.ib.lid),
- ntohl(msg->saddr.ib.qpn),
+ ntohs(msg->saddr.lid),
+ ntohl(msg->saddr.qpn),
ntohs(msg->sport));
dapl_os_unlock(&cm->lock);
goto bail;
dapl_dbg_log(DAPL_DBG_TYPE_CM,
" CONN_RTU: DST lid=%x,"
" iqp=%x, qp_type=%d, port=%x psize=%d\n",
- ntohs(cm->msg.daddr.ib.lid),
- ntohl(cm->msg.daddr.ib.qpn), cm->msg.daddr.ib.qp_type,
+ ntohs(cm->msg.daddr.lid),
+ ntohl(cm->msg.daddr.qpn), cm->msg.daddr.qp_type,
ntohs(msg->sport), ntohs(msg->p_size));
if (ntohs(msg->op) == DCM_REP)
" slid %x iqp %x port %x\n", cm,
dapl_cm_op_str(ntohs(msg->op)),
dapl_cm_state_str(cm->state),
- ntohs(msg->daddr.ib.lid), ntohl(msg->daddr.ib.qpn),
- ntohs(msg->dport), ntohs(msg->saddr.ib.lid),
- ntohl(msg->saddr.ib.qpn), ntohs(msg->sport));
+ ntohs(msg->daddr.lid), ntohl(msg->daddr.qpn),
+ ntohs(msg->dport), ntohs(msg->saddr.lid),
+ ntohl(msg->saddr.qpn), ntohs(msg->sport));
DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_ERR_REJ_RX);
event = IB_CME_DESTINATION_REJECT;
}
" slid %x iqp %x port %x\n", cm,
dapl_cm_op_str(ntohs(msg->op)),
dapl_cm_state_str(cm->state),
- ntohs(msg->daddr.ib.lid), ntohl(msg->daddr.ib.qpn),
- ntohs(msg->dport), ntohs(msg->saddr.ib.lid),
- ntohl(msg->saddr.ib.qpn), ntohs(msg->sport));
+ ntohs(msg->daddr.lid), ntohl(msg->daddr.qpn),
+ ntohs(msg->dport), ntohs(msg->saddr.lid),
+ ntohl(msg->saddr.qpn), ntohs(msg->sport));
cm->state = DCM_REJECTED;
dapl_os_unlock(&cm->lock);
-
-#ifdef DAT_EXTENSIONS
- if (cm->msg.daddr.ib.qp_type == IBV_QPT_UD)
- goto ud_bail;
- else
-#endif
goto bail;
}
dapl_os_unlock(&cm->lock);
/* modify QP to RTR and then to RTS with remote info */
dapl_os_lock(&cm->ep->header.lock);
- if (dapls_modify_qp_state(cm->ep->qp_handle,
+ if (dapls_modify_qp_state(cm->ep->qp_handle->qp,
IBV_QPS_RTR,
- cm->msg.daddr.ib.qpn,
- cm->msg.daddr.ib.lid,
- (ib_gid_handle_t)cm->msg.daddr.ib.gid) != DAT_SUCCESS) {
+ cm->msg.daddr.qpn,
+ cm->msg.daddr.lid,
+ (ib_gid_handle_t)cm->msg.daddr.gid) != DAT_SUCCESS) {
dapl_log(DAPL_DBG_TYPE_ERR,
" CONN_RTU: QPS_RTR ERR %s <- lid %x iqp %x\n",
- strerror(errno), ntohs(cm->msg.daddr.ib.lid),
- ntohl(cm->msg.daddr.ib.qpn));
+ strerror(errno), ntohs(cm->msg.daddr.lid),
+ ntohl(cm->msg.daddr.qpn));
dapl_os_unlock(&cm->ep->header.lock);
event = IB_CME_LOCAL_FAILURE;
goto bail;
}
- if (dapls_modify_qp_state(cm->ep->qp_handle,
+ if (dapls_modify_qp_state(cm->ep->qp_handle->qp,
IBV_QPS_RTS,
- cm->msg.daddr.ib.qpn,
- cm->msg.daddr.ib.lid,
+ cm->msg.daddr.qpn,
+ cm->msg.daddr.lid,
NULL) != DAT_SUCCESS) {
dapl_log(DAPL_DBG_TYPE_ERR,
" CONN_RTU: QPS_RTS ERR %s <- lid %x iqp %x\n",
- strerror(errno), ntohs(cm->msg.daddr.ib.lid),
- ntohl(cm->msg.daddr.ib.qpn));
+ strerror(errno), ntohs(cm->msg.daddr.lid),
+ ntohl(cm->msg.daddr.qpn));
dapl_os_unlock(&cm->ep->header.lock);
event = IB_CME_LOCAL_FAILURE;
goto bail;
dapl_os_lock(&cm->lock);
cm->state = DCM_CONNECTED;
- if (ucm_send(&cm->hca->ib_trans, &cm->msg, NULL, 0)) {
+ if (mcm_send(&cm->hca->ib_trans, &cm->msg, NULL, 0)) {
dapl_os_unlock(&cm->lock);
goto bail;
}
/* init cm_handle and post the event with private data */
dapl_dbg_log(DAPL_DBG_TYPE_EP, " ACTIVE: connected!\n");
+ DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_ACTIVE_EST);
+ dapl_evd_connection_callback(cm,
+ IB_CME_CONNECTED,
+ cm->msg.p_data, ntohs(cm->msg.p_size), cm->ep);
-#ifdef DAT_EXTENSIONS
-ud_bail:
- if (cm->msg.daddr.ib.qp_type == IBV_QPT_UD) {
- DAT_IB_EXTENSION_EVENT_DATA xevent;
- uint16_t lid = ntohs(cm->msg.daddr.ib.lid);
-
- /* post EVENT, modify_qp, AH already created, ucm msg */
- xevent.status = 0;
- xevent.type = DAT_IB_UD_REMOTE_AH;
- xevent.remote_ah.qpn = ntohl(cm->msg.daddr.ib.qpn);
- xevent.remote_ah.ah = dapls_create_ah(cm->hca,
- cm->ep->qp_handle->pd,
- cm->ep->qp_handle,
- htons(lid),
- NULL);
- if (xevent.remote_ah.ah == NULL) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " active UD RTU: ERR create_ah"
- " for qpn 0x%x lid 0x%x\n",
- xevent.remote_ah.qpn, lid);
- event = IB_CME_LOCAL_FAILURE;
- goto bail;
- }
- cm->ah = xevent.remote_ah.ah; /* keep ref to destroy */
-
- dapl_os_memcpy(&xevent.remote_ah.ia_addr,
- &cm->msg.daddr,
- sizeof(union dcm_addr));
-
- /* remote ia_addr reference includes ucm qpn, not IB qpn */
- ((union dcm_addr*)
- &xevent.remote_ah.ia_addr)->ib.qpn = cm->msg.dqpn;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " ACTIVE: UD xevent ah %p qpn %x lid %x\n",
- xevent.remote_ah.ah, xevent.remote_ah.qpn, lid);
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " ACTIVE: UD xevent ia_addr qp_type %d"
- " lid 0x%x qpn 0x%x gid 0x"F64x" 0x"F64x" \n",
- ((union dcm_addr*)
- &xevent.remote_ah.ia_addr)->ib.qp_type,
- ntohs(((union dcm_addr*)
- &xevent.remote_ah.ia_addr)->ib.lid),
- ntohl(((union dcm_addr*)
- &xevent.remote_ah.ia_addr)->ib.qpn),
- ntohll(*(uint64_t*)&cm->msg.daddr.ib.gid[0]),
- ntohll(*(uint64_t*)&cm->msg.daddr.ib.gid[8]));
-
- if (event == IB_CME_CONNECTED)
- event = DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED;
- else {
- xevent.type = DAT_IB_UD_CONNECT_REJECT;
- event = DAT_IB_UD_CONNECTION_REJECT_EVENT;
- }
-
- dapls_evd_post_connection_event_ext(
- (DAPL_EVD *)cm->ep->param.connect_evd_handle,
- event,
- (DAT_EP_HANDLE)ep,
- (DAT_COUNT)ntohs(cm->msg.p_size),
- (DAT_PVOID *)cm->msg.p_data,
- (DAT_PVOID *)&xevent);
-
- if (event != DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED)
- dapli_cm_free(cm);
-
- DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_AH_RESOLVED);
-
- } else
-#endif
- {
- DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_ACTIVE_EST);
- dapl_evd_connection_callback(cm,
- IB_CME_CONNECTED,
- cm->msg.p_data, ntohs(cm->msg.p_size), cm->ep);
- }
dapl_log(DAPL_DBG_TYPE_CM_EST,
- " UCM_ACTIVE_CONN %p %d [lid port qpn] %x %x %x -> %x %x %x\n",
- cm->hca, cm->retries, ntohs(cm->msg.saddr.ib.lid),
- ntohs(cm->msg.sport), ntohl(cm->msg.saddr.ib.qpn),
- ntohs(cm->msg.daddr.ib.lid), ntohs(cm->msg.dport),
+ " mcm_ACTIVE_CONN %p %d [lid port qpn] %x %x %x -> %x %x %x\n",
+ cm->hca, cm->retries, ntohs(cm->msg.saddr.lid),
+ ntohs(cm->msg.sport), ntohl(cm->msg.saddr.qpn),
+ ntohs(cm->msg.daddr.lid), ntohs(cm->msg.dport),
ntohl(cm->msg.dqpn));
return;
bail:
* receive peer QP information, private data,
* and post cr_event
*/
-static void ucm_accept(ib_cm_srvc_handle_t cm, ib_cm_msg_t *msg)
+static void mcm_accept(ib_cm_srvc_handle_t cm, dat_mcm_msg_t *msg)
{
dp_ib_cm_handle_t acm;
acm->msg.rd_in = msg->rd_in;
/* CR saddr is CM daddr info, need EP for local saddr */
- dapl_os_memcpy(&acm->msg.daddr, &msg->saddr, sizeof(union dcm_addr));
+ dapl_os_memcpy(&acm->msg.daddr, &msg->saddr, sizeof(dat_mcm_addr_t));
dapl_log(DAPL_DBG_TYPE_CM,
" accept: DST port=%x lid=%x, iqp=%x, psize=%d\n",
- ntohs(acm->msg.dport), ntohs(acm->msg.daddr.ib.lid),
- htonl(acm->msg.daddr.ib.qpn), htons(acm->msg.p_size));
+ ntohs(acm->msg.dport), ntohs(acm->msg.daddr.lid),
+ htonl(acm->msg.daddr.qpn), htons(acm->msg.p_size));
/* validate private data size before reading */
if (ntohs(msg->p_size) > DCM_MAX_PDATA_SIZE) {
acm->state = DCM_ACCEPTING;
dapli_queue_conn(acm);
-#ifdef DAT_EXTENSIONS
- if (acm->msg.daddr.ib.qp_type == IBV_QPT_UD) {
- DAT_IB_EXTENSION_EVENT_DATA xevent;
-
- /* post EVENT, modify_qp created ah */
- xevent.status = 0;
- xevent.type = DAT_IB_UD_CONNECT_REQUEST;
-
- dapls_evd_post_cr_event_ext(acm->sp,
- DAT_IB_UD_CONNECTION_REQUEST_EVENT,
- acm,
- (DAT_COUNT)ntohs(acm->msg.p_size),
- (DAT_PVOID *)acm->msg.p_data,
- (DAT_PVOID *)&xevent);
- DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_AH_REQ_TX);
- } else
-#endif
- /* trigger CR event and return SUCCESS */
- dapls_cr_callback(acm,
- IB_CME_CONNECTION_REQUEST_PENDING,
- acm->msg.p_data, ntohs(msg->p_size), acm->sp);
+ /* trigger CR event and return SUCCESS */
+ dapls_cr_callback(acm,
+ IB_CME_CONNECTION_REQUEST_PENDING,
+ acm->msg.p_data, ntohs(msg->p_size), acm->sp);
return;
-
bail:
/* schedule work thread cleanup */
DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_ERR);
/*
* PASSIVE: read RTU from active peer, post CONN event
*/
-static void ucm_accept_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg)
+static void mcm_accept_rtu(dp_ib_cm_handle_t cm, dat_mcm_msg_t *msg)
{
dapl_os_lock(&cm->lock);
if ((ntohs(msg->op) != DCM_RTU) || (cm->state != DCM_RTU_PENDING)) {
" op %s, st %s <- lid %x iqp %x sport %x\n",
dapl_cm_op_str(ntohs(msg->op)),
dapl_cm_state_str(cm->state),
- ntohs(msg->saddr.ib.lid), ntohl(msg->saddr.ib.qpn),
+ ntohs(msg->saddr.lid), ntohl(msg->saddr.qpn),
ntohs(msg->sport));
dapl_os_unlock(&cm->lock);
goto bail;
/* final data exchange if remote QP state is good to go */
dapl_dbg_log(DAPL_DBG_TYPE_CM, " PASSIVE: connected!\n");
-#ifdef DAT_EXTENSIONS
- if (cm->msg.saddr.ib.qp_type == IBV_QPT_UD) {
- DAT_IB_EXTENSION_EVENT_DATA xevent;
- uint16_t lid = ntohs(cm->msg.daddr.ib.lid);
-
- /* post EVENT, modify_qp, AH already created, ucm msg */
- xevent.status = 0;
- xevent.type = DAT_IB_UD_PASSIVE_REMOTE_AH;
- xevent.remote_ah.qpn = ntohl(cm->msg.daddr.ib.qpn);
- xevent.remote_ah.ah = dapls_create_ah(cm->hca,
- cm->ep->qp_handle->pd,
- cm->ep->qp_handle,
- htons(lid),
- NULL);
- if (xevent.remote_ah.ah == NULL) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " passive UD RTU: ERR create_ah"
- " for qpn 0x%x lid 0x%x\n",
- xevent.remote_ah.qpn, lid);
- goto bail;
- }
- cm->ah = xevent.remote_ah.ah; /* keep ref to destroy */
- dapl_os_memcpy(&xevent.remote_ah.ia_addr,
- &cm->msg.daddr,
- sizeof(union dcm_addr));
+ DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_PASSIVE_EST);
- /* remote ia_addr reference includes ucm qpn, not IB qpn */
- ((union dcm_addr*)
- &xevent.remote_ah.ia_addr)->ib.qpn = cm->msg.dqpn;
+ dapls_cr_callback(cm, IB_CME_CONNECTED, NULL, 0, cm->sp);
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " PASSIVE: UD xevent ah %p qpn %x lid %x\n",
- xevent.remote_ah.ah, xevent.remote_ah.qpn, lid);
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " PASSIVE: UD xevent ia_addr qp_type %d"
- " lid 0x%x qpn 0x%x gid 0x"F64x" 0x"F64x" \n",
- ((union dcm_addr*)
- &xevent.remote_ah.ia_addr)->ib.qp_type,
- ntohs(((union dcm_addr*)
- &xevent.remote_ah.ia_addr)->ib.lid),
- ntohl(((union dcm_addr*)
- &xevent.remote_ah.ia_addr)->ib.qpn),
- ntohll(*(uint64_t*)&cm->msg.daddr.ib.gid[0]),
- ntohll(*(uint64_t*)&cm->msg.daddr.ib.gid[8]));
-
- dapls_evd_post_connection_event_ext(
- (DAPL_EVD *)cm->ep->param.connect_evd_handle,
- DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED,
- (DAT_EP_HANDLE)cm->ep,
- (DAT_COUNT)ntohs(cm->msg.p_size),
- (DAT_PVOID *)cm->msg.p_data,
- (DAT_PVOID *)&xevent);
-
- DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_AH_RESOLVED);
- dapli_cm_free(cm); /* still attached to EP */
- } else {
-#endif
- DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_PASSIVE_EST);
- dapls_cr_callback(cm, IB_CME_CONNECTED, NULL, 0, cm->sp);
- }
dapl_log(DAPL_DBG_TYPE_CM_EST,
- " UCM_PASSIVE_CONN %p %d [lid port qpn] %x %x %x <- %x %x %x\n",
- cm->hca, cm->retries, ntohs(cm->msg.saddr.ib.lid),
- ntohs(cm->msg.sport), ntohl(cm->msg.saddr.ib.qpn),
- ntohs(cm->msg.daddr.ib.lid), ntohs(cm->msg.dport),
+ " PASSIVE_CONN %p %d [lid port qpn] %x %x %x <- %x %x %x\n",
+ cm->hca, cm->retries, ntohs(cm->msg.saddr.lid),
+ ntohs(cm->msg.sport), ntohl(cm->msg.saddr.qpn),
+ ntohs(cm->msg.daddr.lid), ntohs(cm->msg.dport),
ntohl(cm->msg.dqpn));
return;
bail:
/*
* PASSIVE: user accepted, check and re-send reply message, called from cm_thread.
*/
-static int ucm_reply(dp_ib_cm_handle_t cm)
+static int mcm_reply(dp_ib_cm_handle_t cm)
{
dapl_os_lock(&cm->lock);
if (cm->state != DCM_RTU_PENDING) {
" %x %x i_%x -> %x %x i_%x l_pid %x r_pid %x\n",
cm->ep, cm, dapl_cm_state_str(cm->state),
cm->ref_count,
- htons(cm->msg.saddr.ib.lid),
+ htons(cm->msg.saddr.lid),
htons(cm->msg.sport),
- htonl(cm->msg.saddr.ib.qpn),
- htons(cm->msg.daddr.ib.lid),
+ htonl(cm->msg.saddr.qpn),
+ htons(cm->msg.daddr.lid),
htons(cm->msg.dport),
- htonl(cm->msg.daddr.ib.qpn),
+ htonl(cm->msg.daddr.qpn),
ntohl(cm->msg.s_id),
ntohl(cm->msg.d_id));
dapl_os_unlock(&cm->lock);
dapl_log(DAPL_DBG_TYPE_ERR,
" CM_REPLY: RETRIES EXHAUSTED (lid port qpn)"
" %x %x %x -> %x %x %x\n",
- htons(cm->msg.saddr.ib.lid),
+ htons(cm->msg.saddr.lid),
htons(cm->msg.sport),
- htonl(cm->msg.saddr.ib.qpn),
- htons(cm->msg.daddr.ib.lid),
+ htonl(cm->msg.saddr.qpn),
+ htons(cm->msg.daddr.lid),
htons(cm->msg.dport),
- htonl(cm->msg.daddr.ib.qpn));
+ htonl(cm->msg.daddr.qpn));
dapl_os_unlock(&cm->lock);
+
#ifdef DAPL_COUNTERS
if (g_dapl_dbg_type & DAPL_DBG_TYPE_CM_LIST) {
dapl_os_unlock(&cm->hca->ib_trans.lock);
dapl_os_lock(&cm->hca->ib_trans.lock);
}
#endif
-#ifdef DAT_EXTENSIONS
- if (cm->msg.saddr.ib.qp_type == IBV_QPT_UD) {
- DAT_IB_EXTENSION_EVENT_DATA xevent;
-
- /* post REJECT event with CONN_REQ p_data */
- xevent.status = 0;
- xevent.type = DAT_IB_UD_CONNECT_ERROR;
-
- dapls_evd_post_connection_event_ext(
- (DAPL_EVD *)cm->ep->param.connect_evd_handle,
- DAT_IB_UD_CONNECTION_ERROR_EVENT,
- (DAT_EP_HANDLE)cm->ep,
- (DAT_COUNT)ntohs(cm->msg.p_size),
- (DAT_PVOID *)cm->msg.p_data,
- (DAT_PVOID *)&xevent);
- } else
-#endif
- dapls_cr_callback(cm, IB_CME_LOCAL_FAILURE,
- NULL, 0, cm->sp);
+
+ dapls_cr_callback(cm, IB_CME_LOCAL_FAILURE, NULL, 0, cm->sp);
return -1;
}
dapl_os_get_time(&cm->timer); /* RTU expected */
- if (ucm_send(&cm->hca->ib_trans, &cm->msg, cm->p_data, cm->p_size)) {
+ if (mcm_send(&cm->hca->ib_trans, &cm->msg, cm->p_data, cm->p_size)) {
dapl_log(DAPL_DBG_TYPE_ERR," accept ERR: ucm reply send()\n");
dapl_os_unlock(&cm->lock);
return -1;
" %x %x i_%x -> %x %x i_%x l_pid %x r_pid %x\n",
cm->ep, cm, dapl_cm_state_str(cm->state),
cm->ref_count,
- htons(cm->hca->ib_trans.addr.ib.lid),
+ htons(cm->hca->ib_trans.addr.lid),
htons(cm->msg.sport),
- htonl(ep->qp_handle->qp_num),
- htons(cm->msg.daddr.ib.lid),
+ htonl(ep->qp_handle->qp->qp_num),
+ htons(cm->msg.daddr.lid),
htons(cm->msg.dport),
- htonl(cm->msg.daddr.ib.qpn),
+ htonl(cm->msg.daddr.qpn),
ntohl(cm->msg.s_id),
ntohl(cm->msg.d_id));
dapl_os_unlock(&cm->lock);
dapl_dbg_log(DAPL_DBG_TYPE_CM,
" ACCEPT_USR: remote lid=%x"
" iqp=%x qp_type %d, psize=%d\n",
- ntohs(cm->msg.daddr.ib.lid),
- ntohl(cm->msg.daddr.ib.qpn), cm->msg.daddr.ib.qp_type,
+ ntohs(cm->msg.daddr.lid),
+ ntohl(cm->msg.daddr.qpn), cm->msg.daddr.qp_type,
p_size);
dapl_dbg_log(DAPL_DBG_TYPE_CM,
" ACCEPT_USR: remote GID subnet %016llx id %016llx\n",
- (unsigned long long)
- htonll(*(uint64_t*)&cm->msg.daddr.ib.gid[0]),
- (unsigned long long)
- htonll(*(uint64_t*)&cm->msg.daddr.ib.gid[8]));
-
-#ifdef DAT_EXTENSIONS
- if (cm->msg.daddr.ib.qp_type == IBV_QPT_UD &&
- ep->qp_handle->qp_type != IBV_QPT_UD) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " ACCEPT_USR: ERR remote QP is UD,"
- ", but local QP is not\n");
- return (DAT_INVALID_HANDLE | DAT_INVALID_HANDLE_EP);
- }
-#endif
+ (unsigned long long)htonll(*(uint64_t*)&cm->msg.daddr.gid[0]),
+ (unsigned long long)htonll(*(uint64_t*)&cm->msg.daddr.gid[8]));
/* rdma_out, initiator, cannot exceed remote rdma_in max */
if (ntohs(cm->msg.ver) >= 7)
/* modify QP to RTR and then to RTS with remote info already read */
dapl_os_lock(&ep->header.lock);
- if (dapls_modify_qp_state(ep->qp_handle,
+ if (dapls_modify_qp_state(ep->qp_handle->qp,
IBV_QPS_RTR,
- cm->msg.daddr.ib.qpn,
- cm->msg.daddr.ib.lid,
- (ib_gid_handle_t)cm->msg.daddr.ib.gid) != DAT_SUCCESS) {
+ cm->msg.daddr.qpn,
+ cm->msg.daddr.lid,
+ (ib_gid_handle_t)&cm->msg.daddr.gid[0]) != DAT_SUCCESS) {
dapl_log(DAPL_DBG_TYPE_ERR,
" ACCEPT_USR: QPS_RTR ERR %s -> lid %x qpn %x\n",
- strerror(errno), ntohs(cm->msg.daddr.ib.lid),
- ntohl(cm->msg.daddr.ib.qpn));
+ strerror(errno), ntohs(cm->msg.daddr.lid),
+ ntohl(cm->msg.daddr.qpn));
dapl_os_unlock(&ep->header.lock);
goto bail;
}
- if (dapls_modify_qp_state(ep->qp_handle,
+ if (dapls_modify_qp_state(ep->qp_handle->qp,
IBV_QPS_RTS,
- cm->msg.daddr.ib.qpn,
- cm->msg.daddr.ib.lid,
+ cm->msg.daddr.qpn,
+ cm->msg.daddr.lid,
NULL) != DAT_SUCCESS) {
dapl_log(DAPL_DBG_TYPE_ERR,
" ACCEPT_USR: QPS_RTS ERR %s -> lid %x qpn %x\n",
- strerror(errno), ntohs(cm->msg.daddr.ib.lid),
- ntohl(cm->msg.daddr.ib.qpn));
+ strerror(errno), ntohs(cm->msg.daddr.lid),
+ ntohl(cm->msg.daddr.qpn));
dapl_os_unlock(&ep->header.lock);
goto bail;
}
/* save remote address information */
dapl_os_memcpy(&ep->remote_ia_address,
- &cm->msg.saddr, sizeof(union dcm_addr));
+ &cm->msg.saddr, sizeof(dat_mcm_addr_t));
/* setup local QP info and type from EP, copy pdata, for reply */
cm->msg.op = htons(DCM_REP);
cm->msg.rd_in = ep->param.ep_attr.max_rdma_read_in;
- cm->msg.saddr.ib.qpn = htonl(ep->qp_handle->qp_num);
- cm->msg.saddr.ib.qp_type = ep->qp_handle->qp_type;
- cm->msg.saddr.ib.lid = cm->hca->ib_trans.addr.ib.lid;
- dapl_os_memcpy(&cm->msg.saddr.ib.gid[0],
- &cm->hca->ib_trans.addr.ib.gid, 16);
+ cm->msg.saddr.qpn = htonl(ep->qp_handle->qp->qp_num);
+ cm->msg.saddr.qp_type = ep->qp_handle->qp->qp_type;
+ cm->msg.saddr.lid = cm->hca->ib_trans.addr.lid;
+ dapl_os_memcpy(&cm->msg.saddr.gid[0],
+ &cm->hca->ib_trans.addr.gid, 16);
/*
* UD: deliver p_data with REQ and EST event, keep REQ p_data in
dapl_os_lock(&cm->lock);
cm->state = DCM_RTU_PENDING;
dapl_os_get_time(&cm->timer); /* RTU expected */
- if (ucm_send(&cm->hca->ib_trans, &cm->msg, cm->p_data, cm->p_size)) {
+ if (mcm_send(&cm->hca->ib_trans, &cm->msg, cm->p_data, cm->p_size)) {
dapl_log(DAPL_DBG_TYPE_ERR," accept ERR: ucm reply send()\n");
dapl_os_unlock(&cm->lock);
dapl_ep_unlink_cm(ep, cm);
return DAT_INSUFFICIENT_RESOURCES;
/* remote hca and port: lid, gid, network order */
- dapl_os_memcpy(&cm->msg.daddr, r_addr, sizeof(union dcm_addr));
+ dapl_os_memcpy(&cm->msg.daddr, r_addr, sizeof(dat_mcm_addr_t));
/* remote uCM information, comes from consumer provider r_addr */
cm->msg.dport = htons((uint16_t)r_psp);
- cm->msg.dqpn = cm->msg.daddr.ib.qpn;
- cm->msg.daddr.ib.qpn = 0; /* don't have a remote qpn until reply */
+ cm->msg.dqpn = cm->msg.daddr.qpn;
+ cm->msg.daddr.qpn = 0; /* don't have a remote qpn until reply */
/* set max rdma inbound requests */
cm->msg.rd_in = ep->param.ep_attr.max_rdma_read_in;
IN DAT_UINT64 sid,
IN DAPL_SP *sp)
{
- ib_cm_srvc_handle_t cm = NULL;
+ dp_ib_cm_handle_t cm = NULL;
+ int ret;
dapl_dbg_log(DAPL_DBG_TYPE_EP,
" listen(ia %p ServiceID %x sp %p)\n",
ia, sid, sp);
- /* reserve local port, then allocate CM object */
- if (!ucm_get_port(&ia->hca_ptr->ib_trans, (uint16_t)sid)) {
- dapl_dbg_log(DAPL_DBG_TYPE_WARN,
- " listen: ERROR %s on conn_qual %x\n",
- strerror(errno), sid);
- return DAT_CONN_QUAL_IN_USE;
- }
-
/* cm_create will setup saddr for listen server */
if ((cm = dapls_ib_cm_create(NULL)) == NULL)
return DAT_INSUFFICIENT_RESOURCES;
/* LISTEN: init DST address and QP info to local CM server info */
cm->sp = sp;
cm->hca = ia->hca_ptr;
- cm->msg.sport = htons((uint16_t)sid);
- cm->msg.sqpn = htonl(ia->hca_ptr->ib_trans.qp->qp_num);
- cm->msg.saddr.ib.qp_type = IBV_QPT_UD;
- cm->msg.saddr.ib.lid = ia->hca_ptr->ib_trans.addr.ib.lid;
- dapl_os_memcpy(&cm->msg.saddr.ib.gid[0],
- &cm->hca->ib_trans.addr.ib.gid, 16);
-
+
/* save cm_handle reference in service point */
sp->cm_srvc_handle = cm;
+ /* proxy CM service: send listen over to MPXYD */
+ if (ia->hca_ptr->ib_trans.scif_ep) {
+ ret = dapli_mix_listen(cm, sid);
+ if (ret) {
+ dapl_dbg_log(DAPL_DBG_TYPE_WARN,
+ " listen: ERROR %s on conn_qual %x\n",
+ strerror(ret), sid);
+ dapli_cm_free(cm);
+ return dapl_convert_errno(ret, "mix_listen" );
+ }
+ } else {
+ /* local CM service, reserve local port and setup addr info */
+ if (!mcm_get_port(&ia->hca_ptr->ib_trans, (uint16_t)sid)) {
+ dapl_dbg_log(DAPL_DBG_TYPE_WARN,
+ " listen: ERROR %s on conn_qual %x\n",
+ strerror(errno), sid);
+ dapli_cm_free(cm);
+ return DAT_CONN_QUAL_IN_USE;
+ }
+ cm->msg.sport = htons((uint16_t)sid);
+ cm->msg.sqpn = htonl(ia->hca_ptr->ib_trans.qp->qp_num);
+ cm->msg.saddr.qp_type = IBV_QPT_UD;
+ cm->msg.saddr.lid = ia->hca_ptr->ib_trans.addr.lid;
+ dapl_os_memcpy(&cm->msg.saddr.gid[0],
+ &cm->hca->ib_trans.addr.gid, 16);
+ }
+
/* queue up listen socket to process inbound CR's */
cm->state = DCM_LISTEN;
dapli_queue_listen(cm);
+
DAPL_CNTR(ia, DCNT_IA_CM_LISTEN);
return DAT_SUCCESS;
DAT_RETURN
dapls_ib_remove_conn_listener(IN DAPL_IA *ia, IN DAPL_SP *sp)
{
- ib_cm_srvc_handle_t cm = sp->cm_srvc_handle;
+ dp_ib_cm_handle_t cm = sp->cm_srvc_handle;
/* free cm_srvc_handle and port, and mark CM for cleanup */
if (cm) {
dapl_dbg_log(DAPL_DBG_TYPE_EP,
" remove_listener(ia %p sp %p cm %p psp=%x)\n",
- ia, sp, cm, ntohs(cm->msg.dport));
+ ia, sp, cm, ntohs(cm->msg.sport));
sp->cm_srvc_handle = NULL;
dapli_dequeue_listen(cm);
- ucm_free_port(&cm->hca->ib_trans, ntohs(cm->msg.sport));
+
+ /* clean up proxy listen, otherwise local port space */
+ if (cm->hca->ib_trans.scif_ep)
+ dapli_mix_listen_free(cm);
+ else
+ mcm_free_port(&cm->hca->ib_trans, ntohs(cm->msg.sport));
+
dapls_cm_release(cm); /* last ref, dealloc */
}
return DAT_SUCCESS;
" dlid %x iqp %x port %x\n", cm,
dapl_cm_op_str(ntohs(cm->msg.op)),
dapl_cm_state_str(cm->state),
- ntohs(cm->hca->ib_trans.addr.ib.lid),
- ntohl(cm->msg.saddr.ib.qpn),
- ntohs(cm->msg.sport), ntohs(cm->msg.daddr.ib.lid),
- ntohl(cm->msg.daddr.ib.qpn), ntohs(cm->msg.dport));
+ ntohs(cm->hca->ib_trans.addr.lid),
+ ntohl(cm->msg.saddr.qpn),
+ ntohs(cm->msg.sport), ntohs(cm->msg.daddr.lid),
+ ntohl(cm->msg.daddr.qpn), ntohs(cm->msg.dport));
cm->state = DCM_REJECTED;
- cm->msg.saddr.ib.lid = cm->hca->ib_trans.addr.ib.lid;
- cm->msg.saddr.ib.qp_type = cm->msg.daddr.ib.qp_type;
- dapl_os_memcpy(&cm->msg.saddr.ib.gid[0],
- &cm->hca->ib_trans.addr.ib.gid, 16);
+ cm->msg.saddr.lid = cm->hca->ib_trans.addr.lid;
+ cm->msg.saddr.qp_type = cm->msg.daddr.qp_type;
+ dapl_os_memcpy(&cm->msg.saddr.gid[0],
+ &cm->hca->ib_trans.addr.gid, 16);
if (reason == IB_CM_REJ_REASON_CONSUMER_REJ)
cm->msg.op = htons(DCM_REJ_USER);
reason == IB_CM_REJ_REASON_CONSUMER_REJ ?
DCNT_IA_CM_USER_REJ_TX : DCNT_IA_CM_ERR_REJ_TX);
- if (ucm_send(&cm->hca->ib_trans, &cm->msg, pdata, psize)) {
+ if (mcm_send(&cm->hca->ib_trans, &cm->msg, pdata, psize)) {
dapl_log(DAPL_DBG_TYPE_WARN,
" cm_reject: send ERR: %s\n", strerror(errno));
dapl_os_unlock(&cm->lock);
return DCM_MAX_PDATA_SIZE;
}
-#if defined(_WIN32) || defined(_WIN64)
-
-void cm_thread(void *arg)
-{
- struct dapl_hca *hca = arg;
- dp_ib_cm_handle_t cm, next;
- DWORD time_ms;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cm_thread: ENTER hca %p\n", hca);
- dapl_os_lock(&hca->ib_trans.lock);
- for (hca->ib_trans.cm_state = IB_THREAD_RUN;
- hca->ib_trans.cm_state == IB_THREAD_RUN ||
- !dapl_llist_is_empty(&hca->ib_trans.list);
- dapl_os_lock(&hca->ib_trans.lock)) {
-
- time_ms = INFINITE;
- CompSetZero(&hca->ib_trans.signal.set);
- CompSetAdd(&hca->ib_hca_handle->channel, &hca->ib_trans.signal.set);
- CompSetAdd(&hca->ib_trans.rch->comp_channel, &hca->ib_trans.signal.set);
- CompSetAdd(&hca->ib_trans.ib_cq->comp_channel, &hca->ib_trans.signal.set);
-
- next = dapl_llist_is_empty(&hca->ib_trans.list) ? NULL :
- dapl_llist_peek_head(&hca->ib_trans.list);
-
- while (next) {
- cm = next;
- next = dapl_llist_next_entry(&hca->ib_trans.list,
- (DAPL_LLIST_ENTRY *)&cm->local_entry);
- dapls_cm_acquire(cm); /* hold thread ref */
- dapl_os_lock(&cm->lock);
- if (cm->state == DCM_FREE ||
- hca->ib_trans.cm_state != IB_THREAD_RUN) {
- dapl_os_unlock(&cm->lock);
- dapl_log(DAPL_DBG_TYPE_CM,
- " CM FREE: %p ep=%p st=%s refs=%d\n",
- cm, cm->ep, dapl_cm_state_str(cm->state),
- cm->ref_count);
-
- dapls_cm_release(cm); /* release alloc ref */
- dapli_cm_dequeue(cm); /* release workq ref */
- dapls_cm_release(cm); /* release thread ref */
- continue;
- }
- dapl_os_unlock(&cm->lock);
- ucm_check_timers(cm, &time_ms);
- dapls_cm_release(cm); /* release thread ref */
- }
-
- dapl_os_unlock(&hca->ib_trans.lock);
-
- hca->ib_hca_handle->channel.Milliseconds = time_ms;
- hca->ib_trans.rch->comp_channel.Milliseconds = time_ms;
- hca->ib_trans.ib_cq->comp_channel.Milliseconds = time_ms;
- CompSetPoll(&hca->ib_trans.signal.set, time_ms);
-
- hca->ib_hca_handle->channel.Milliseconds = 0;
- hca->ib_trans.rch->comp_channel.Milliseconds = 0;
- hca->ib_trans.ib_cq->comp_channel.Milliseconds = 0;
-
- ucm_recv(&hca->ib_trans);
- ucm_async_event(hca);
- dapli_cq_event_cb(&hca->ib_trans);
- }
-
- dapl_os_unlock(&hca->ib_trans.lock);
- hca->ib_trans.cm_state = IB_THREAD_EXIT;
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cm_thread(hca %p) exit\n", hca);
-}
-
-#else // _WIN32 || _WIN64
-
void cm_thread(void *arg)
{
struct dapl_hca *hca = arg;
dapl_fd_zero(set);
dapl_fd_set(hca->ib_trans.signal.scm[0], set, DAPL_FD_READ);
dapl_fd_set(hca->ib_hca_handle->async_fd, set, DAPL_FD_READ);
- dapl_fd_set(hca->ib_trans.rch->fd, set, DAPL_FD_READ);
+ dapl_fd_set(hca->ib_trans.rch_fd, set, DAPL_FD_READ);
+ dapl_fd_set(hca->ib_trans.scif_ep, set, DAPL_FD_READ);
dapl_fd_set(hca->ib_trans.ib_cq->fd, set, DAPL_FD_READ);
if (!dapl_llist_is_empty(&hca->ib_trans.list))
continue;
}
dapl_os_unlock(&cm->lock);
- ucm_check_timers(cm, &time_ms);
+ mcm_check_timers(cm, &time_ms);
dapls_cm_release(cm); /* release thread ref */
}
dapl_os_unlock(&hca->ib_trans.lock);
dapl_select(set, time_ms);
- /* Process events: CM, ASYNC, NOTIFY THREAD */
- if (dapl_poll(hca->ib_trans.rch->fd,
- DAPL_FD_READ) == DAPL_FD_READ) {
- ucm_recv(&hca->ib_trans);
+ if (dapl_poll(hca->ib_trans.rch_fd,
+ DAPL_FD_READ) == DAPL_FD_READ) {
+ mcm_recv(&hca->ib_trans);
+ }
+ if (dapl_poll(hca->ib_trans.scif_ep,
+ DAPL_FD_READ) == DAPL_FD_READ) {
+ dapli_mix_recv(hca, hca->ib_trans.scif_ep);
}
if (dapl_poll(hca->ib_hca_handle->async_fd,
DAPL_FD_READ) == DAPL_FD_READ) {
- ucm_async_event(hca);
+ mcm_async_event(hca);
}
if (dapl_poll(hca->ib_trans.ib_cq->fd,
DAPL_FD_READ) == DAPL_FD_READ) {
hca->ib_trans.cm_state = IB_THREAD_EXIT;
dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cm_thread(hca %p) exit\n", hca);
}
-#endif
-
-#ifdef DAPL_COUNTERS
-static char _ctr_host_[128];
-/* Debug aid: List all Connections in process and state */
-void dapls_print_cm_list(IN DAPL_IA *ia_ptr)
-{
- /* Print in process CM's for this IA, if debug type set */
- int i = 0;
- dp_ib_cm_handle_t cm, next_cm;
- struct dapl_llist_entry **list;
- DAPL_OS_LOCK *lock;
-
- /* LISTEN LIST */
- list = &ia_ptr->hca_ptr->ib_trans.llist;
- lock = &ia_ptr->hca_ptr->ib_trans.llock;
- dapl_os_lock(lock);
- if (!dapl_llist_is_empty((DAPL_LLIST_HEAD*)list))
- next_cm = dapl_llist_peek_head((DAPL_LLIST_HEAD*)list);
- else
- next_cm = NULL;
-
- gethostname(_ctr_host_, sizeof(_ctr_host_));
- printf("\n [%s:%x] DAPL IA LISTEN/CONNECTIONS IN PROCESS:\n",
- _ctr_host_ , dapl_os_getpid());
-
- while (next_cm) {
- cm = next_cm;
- next_cm = dapl_llist_next_entry((DAPL_LLIST_HEAD*)list,
- (DAPL_LLIST_ENTRY*)&cm->local_entry);
-
- printf( " LISTEN[%d]: sp %p %s uCM_QP: %x %x c_%x l_pid %x \n",
- i, cm->sp, dapl_cm_state_str(cm->state),
- ntohs(cm->msg.saddr.ib.lid), ntohs(cm->msg.sport),
- ntohl(cm->msg.sqpn),
- ntohl(cm->msg.s_id));
- i++;
- }
- dapl_os_unlock(lock);
-
- /* CONNECTION LIST */
- list = &ia_ptr->hca_ptr->ib_trans.list;
- lock = &ia_ptr->hca_ptr->ib_trans.lock;
-
- dapl_os_lock(lock);
- if (!dapl_llist_is_empty((DAPL_LLIST_HEAD*)list))
- next_cm = dapl_llist_peek_head((DAPL_LLIST_HEAD*)list);
- else
- next_cm = NULL;
-
- while (next_cm) {
- cm = next_cm;
- next_cm = dapl_llist_next_entry((DAPL_LLIST_HEAD*)list,
- (DAPL_LLIST_ENTRY*)&cm->local_entry);
-
- printf( " CONN[%d]: ep %p cm %p %s %s"
- " %x %x c_%x i_%x %s %x %x c_%x i_%x r_pid %x\n",
- i, cm->ep, cm,
- cm->msg.saddr.ib.qp_type == IBV_QPT_RC ? "RC" : "UD",
- dapl_cm_state_str(cm->state),
- ntohs(cm->msg.saddr.ib.lid),
- ntohs(cm->msg.sport),
- ntohl(cm->msg.sqpn),
- ntohl(cm->msg.saddr.ib.qpn),
- cm->sp ? "<-" : "->",
- ntohs(cm->msg.daddr.ib.lid),
- ntohs(cm->msg.dport),
- ntohl(cm->msg.dqpn),
- ntohl(cm->msg.daddr.ib.qpn),
- ntohl(cm->msg.d_id));
- i++;
- }
- printf("\n");
- dapl_os_unlock(lock);
-}
-
-void dapls_print_cm_free_list(IN DAPL_IA *ia_ptr)
-{
- DAPL_EP *ep, *next_ep;
- dp_ib_cm_handle_t cm, next_cm;
- int i = 0;
-
- gethostname(_ctr_host_, sizeof(_ctr_host_));
- printf("\n [%s:%x] DAPL EP CM FREE LIST:\n",
- _ctr_host_ , dapl_os_getpid());
-
- dapl_os_lock(&ia_ptr->header.lock);
- ep = (dapl_llist_is_empty(&ia_ptr->ep_list_head) ?
- NULL : dapl_llist_peek_head(&ia_ptr->ep_list_head));
- while (ep != NULL) {
- next_ep = dapl_llist_next_entry(&ia_ptr->ep_list_head,
- &ep->header.ia_list_entry);
- dapl_os_lock(&ep->header.lock);
- cm = (dapl_llist_is_empty(&ep->cm_list_head) ?
- NULL : dapl_llist_peek_head(&ep->cm_list_head));
- while (cm) {
- dapl_os_lock(&cm->lock);
- next_cm = dapl_llist_next_entry(&ep->cm_list_head,
- &cm->list_entry);
- if (cm->state == DCM_FREE) {
- printf( " CONN[%d]: ep %p cm %p %s %s"
- " %x %x c_%x i_%x l_pid %x %s"
- " %x %x c_%x i_%x r_pid %x\n",
- i, cm->ep, cm,
- cm->msg.saddr.ib.qp_type == IBV_QPT_RC ? "RC" : "UD",
- dapl_cm_state_str(cm->state),
- ntohs(cm->msg.saddr.ib.lid),
- ntohs(cm->msg.sport),
- ntohl(cm->msg.sqpn),
- ntohl(cm->msg.saddr.ib.qpn),
- ntohl(cm->msg.s_id),
- cm->sp ? "<-" : "->",
- ntohs(cm->msg.daddr.ib.lid),
- ntohs(cm->msg.dport),
- ntohl(cm->msg.dqpn),
- ntohl(cm->msg.daddr.ib.qpn),
- ntohl(cm->msg.d_id));
- i++;
- }
- dapl_os_unlock(&cm->lock);
- cm = next_cm;
- }
- dapl_os_unlock(&ep->header.lock);
- ep = next_ep;
- }
- dapl_os_unlock(&ia_ptr->header.lock);
-}
-#endif
#ifndef _DAPL_IB_UTIL_H_
#define _DAPL_IB_UTIL_H_
-#define _OPENIB_SCM_
+#define _OPENIB_MCM_
#include <infiniband/verbs.h>
+#include <scif.h>
#include "openib_osd.h"
#include "dapl_ib_common.h"
+#include <dat2/dat_mic_extensions.h>
+
/* DAPL CM objects MUST include list_entry, ref_count, event for EP linking */
struct ib_cm_handle
{
- struct dapl_llist_entry list_entry;
- struct dapl_llist_entry local_entry;
- DAPL_OS_WAIT_OBJECT d_event;
- DAPL_OS_WAIT_OBJECT f_event;
- DAPL_OS_LOCK lock;
- DAPL_OS_TIMEVAL timer;
- int ref_count;
- int state;
- int retries;
- struct dapl_hca *hca;
- struct dapl_sp *sp;
- struct dapl_ep *ep;
- struct ibv_ah *ah;
- uint16_t p_size; /* accept p_data, for retries */
- uint8_t p_data[DCM_MAX_PDATA_SIZE];
- ib_cm_msg_t msg;
+ struct dapl_llist_entry list_entry;
+ struct dapl_llist_entry local_entry;
+ DAPL_OS_WAIT_OBJECT d_event;
+ DAPL_OS_WAIT_OBJECT f_event;
+ DAPL_OS_LOCK lock;
+ DAPL_OS_TIMEVAL timer;
+ uint32_t cm_id; /* local id */
+ uint32_t scm_id; /* shadow id */
+ uint64_t cm_ctx; /* local context */
+ uint64_t scm_ctx; /* shadow context */
+ int ref_count;
+ int state;
+ int retries;
+ struct _ib_hca_transport *tp;
+ struct dapl_hca *hca;
+ struct dapl_sp *sp;
+ struct dapl_ep *ep;
+ struct ibv_ah *ah;
+ uint16_t p_size; /* accept p_data, for retries */
+ uint8_t p_data[DAT_MCM_PDATA_SIZE];
+ dat_mcm_msg_t msg;
};
typedef struct ib_cm_handle *dp_ib_cm_handle_t;
ib_async_cq_handler_t async_cq_error;
ib_async_dto_handler_t async_cq;
ib_async_qp_handler_t async_qp_error;
- union dcm_addr addr; /* lid, port, qp_num, gid */
- int max_inline_send;
- int rd_atom_in;
- int rd_atom_out;
+ dat_mcm_addr_t addr; /* lid, port, qp_num, gid */
+ DAT_NAMED_ATTR named_attr;
+ struct dapl_thread_signal signal;
+ /* dat_mix_dev_attr_t */
uint8_t ack_timer;
uint8_t ack_retry;
uint8_t rnr_timer;
uint8_t global;
uint8_t hop_limit;
uint8_t tclass;
+ uint8_t sl;
uint8_t mtu;
- DAT_NAMED_ATTR named_attr;
- struct dapl_thread_signal signal;
+ uint8_t rd_atom_in;
+ uint8_t rd_atom_out;
+ uint8_t pkey_idx;
+ uint16_t pkey;
+ uint16_t max_inline_send;
+ /* dat_mix_dev_attr_t */
int cqe;
int qpe;
int burst;
struct ibv_qp *qp;
struct ibv_mr *mr_rbuf;
struct ibv_mr *mr_sbuf;
- ib_cm_msg_t *sbuf;
- ib_cm_msg_t *rbuf;
+ dat_mcm_msg_t *sbuf;
+ dat_mcm_msg_t *rbuf;
struct ibv_comp_channel *rch;
+ int rch_fd;
struct ibv_ah **ah;
DAPL_OS_LOCK plock;
uint16_t lid;
uint8_t *sid; /* Sevice IDs, port space, bitarray? */
- uint8_t sl;
- uint16_t pkey;
- int pkey_idx;
-#ifdef DAT_IB_COLLECTIVES
- /* Collective member device and address information */
- ib_thread_state_t coll_thread_state;
- DAPL_OS_THREAD coll_thread;
- DAPL_OS_LOCK coll_lock;
- DAPL_OS_WAIT_OBJECT coll_event;
- struct dapl_llist_entry *grp_list;
- user_progress_func_t *user_func;
- int l_sock;
- struct sockaddr_in m_addr;
- void *m_ctx;
- void *m_info;
- void *f_info;
- int m_size;
- int f_size;
- int t_id;
-#endif
+
+ /* SCIF MIC indirect, EP to MPXYD services, if running on MIC */
+ struct scif_portID self;
+ scif_epd_t scif_ep; /* FD operation processing */
+ scif_epd_t scif_cm_ep; /* FD CM packet processing */
+ struct scif_portID peer; /* MPXYD op proxy addr info */
+ struct scif_portID peer_cm; /* MPXYD cm proxy addr info */
+ off_t scif_adr; /* MPXYD RDMA memory pool */
+ off_t scif_off;
+ int scif_len;
} ib_hca_transport_t;
/* prototypes */
void cm_thread(void *arg);
-void ucm_async_event(struct dapl_hca *hca);
+void mcm_async_event(struct dapl_hca *hca);
void dapli_cq_event_cb(struct _ib_hca_transport *tp);
void dapls_cm_acquire(dp_ib_cm_handle_t cm_ptr);
void dapls_cm_release(dp_ib_cm_handle_t cm_ptr);
void dapls_cm_free(dp_ib_cm_handle_t cm_ptr);
+/* MIC indirect eXchange (MIX) operations */
+int dapli_mix_open(ib_hca_transport_t *tp, char *name, int port);
+void dapli_mix_close(ib_hca_transport_t *tp);
+int dapli_mix_listen(dp_ib_cm_handle_t cm, uint16_t sid);
+int dapli_mix_listen_free(dp_ib_cm_handle_t cm);
+int dapli_mix_qp_create(ib_qp_handle_t m_qp, struct ibv_qp_init_attr *attr);
+int dapli_mix_qp_free(ib_qp_handle_t m_qp);
+int dapli_mix_cq_create(ib_cq_handle_t m_cq);
+int dapli_mix_cq_free(ib_cq_handle_t m_cq);
+int dapli_mix_recv(DAPL_HCA *hca, int scif_ep);
+
+
#ifdef DAPL_COUNTERS
void dapls_print_cm_list(IN DAPL_IA *ia_ptr);
#endif
#include "dapl_adapter_util.h"
#include "dapl_ib_util.h"
#include "dapl_osd.h"
-
#include <stdlib.h>
-#ifdef DAT_IB_COLLECTIVES
-#include <collectives/ib_collectives.h>
-#endif
static void ucm_service_destroy(IN DAPL_HCA *hca);
static int ucm_service_create(IN DAPL_HCA *hca);
-#if defined (_WIN32)
-#include <rdma\winverbs.h>
-
-static int32_t create_os_signal(IN DAPL_HCA * hca_ptr)
-{
- return CompSetInit(&hca_ptr->ib_trans.signal.set);
-}
-
-static void destroy_os_signal(IN DAPL_HCA * hca_ptr)
-{
- CompSetCleanup(&hca_ptr->ib_trans.signal.set);
-}
-
-static int dapls_config_verbs(struct ibv_context *verbs)
-{
- verbs->channel.Milliseconds = 0;
- return 0;
-}
-
-static int dapls_config_comp_channel(struct ibv_comp_channel *channel)
-{
- channel->comp_channel.Milliseconds = 0;
- return 0;
-}
-
-#else // _WIN32
-
static int32_t create_os_signal(IN DAPL_HCA * hca_ptr)
{
DAPL_SOCKET listen_socket;
return dapls_config_fd(channel->fd);
}
-#endif
-
/*
* dapls_ib_init, dapls_ib_release
*
goto err;
found:
-
hca_ptr->ib_hca_handle = ibv_open_device(hca_ptr->ib_trans.ib_dev);
if (!hca_ptr->ib_hca_handle) {
dapl_log(DAPL_DBG_TYPE_ERR,
strerror(errno));
goto err;
} else {
- hca_ptr->ib_trans.addr.ib.lid = htons(port_attr.lid);
+ hca_ptr->ib_trans.addr.lid = htons(port_attr.lid);
hca_ptr->ib_trans.lid = htons(port_attr.lid);
}
/* get gid for this hca-port, network order */
if (ibv_query_gid(hca_ptr->ib_hca_handle,
(uint8_t) hca_ptr->port_num, 0,
- (union ibv_gid *)&hca_ptr->ib_trans.addr.ib.gid)) {
+ (union ibv_gid *)&hca_ptr->ib_trans.addr.gid)) {
dapl_log(DAPL_DBG_TYPE_ERR,
" open_hca: query GID ERR for %s, err=%s\n",
ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
hca_ptr->ib_trans.mtu =
dapl_ib_mtu(dapl_os_get_env_val("DAPL_IB_MTU", DCM_IB_MTU));
+ if (dapli_mix_open(&hca_ptr->ib_trans, hca_name, hca_ptr->port_num)) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " open_hca: SCIF init ERR for %s, err=%s\n",
+ ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+ strerror(errno));
+ goto err;
+ }
+
/* initialize CM list, LISTEN, SND queue, PSP array, locks */
if ((dapl_os_lock_init(&hca_ptr->ib_trans.lock)) != DAT_SUCCESS)
goto err;
dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
" open_hca: QPN 0x%x LID 0x%x GID Subnet 0x" F64x ""
" ID 0x" F64x "\n",
- ntohl(hca_ptr->ib_trans.addr.ib.qpn),
- ntohs(hca_ptr->ib_trans.addr.ib.lid),
+ ntohl(hca_ptr->ib_trans.addr.qpn),
+ ntohs(hca_ptr->ib_trans.addr.lid),
(unsigned long long)
- ntohll(*(uint64_t*)&hca_ptr->ib_trans.addr.ib.gid[0]),
+ ntohll(*(uint64_t*)&hca_ptr->ib_trans.addr.gid[0]),
(unsigned long long)
- ntohll(*(uint64_t*)&hca_ptr->ib_trans.addr.ib.gid[8]));
+ ntohll(*(uint64_t*)&hca_ptr->ib_trans.addr.gid[8]));
/* save LID, GID, QPN, PORT address information, for ia_queries */
/* Set AF_INET6 to insure callee address storage of 28 bytes */
hca_ptr->ib_trans.hca = hca_ptr;
- hca_ptr->ib_trans.addr.ib.family = AF_INET6;
- hca_ptr->ib_trans.addr.ib.qp_type = IBV_QPT_UD;
+ hca_ptr->ib_trans.addr.family = AF_INET6;
+ hca_ptr->ib_trans.addr.qp_type = IBV_QPT_UD;
memcpy(&hca_ptr->hca_address,
&hca_ptr->ib_trans.addr,
sizeof(union dcm_addr));
-#ifdef DAT_IB_COLLECTIVES
- if (dapli_create_collective_service(hca_ptr))
- goto bail;
-#endif
-
ibv_free_device_list(dev_list);
/* wait for cm_thread */
{
dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " close_hca: %p\n", hca_ptr);
-#ifdef DAT_IB_COLLECTIVES
- dapli_free_collective_service(hca_ptr);
-#endif
-
if (hca_ptr->ib_trans.cm_state == IB_THREAD_RUN) {
hca_ptr->ib_trans.cm_state = IB_THREAD_CANCEL;
dapls_thread_signal(&hca_ptr->ib_trans.signal);
}
}
+ dapli_mix_close(&hca_ptr->ib_trans);
+
dapl_os_lock_destroy(&hca_ptr->ib_trans.lock);
dapl_os_lock_destroy(&hca_ptr->ib_trans.llock);
destroy_os_signal(hca_ptr);
if (hca_ptr->ib_trans.ib_cq_empty) {
struct ibv_comp_channel *channel;
- channel = hca_ptr->ib_trans.ib_cq_empty->channel;
- ibv_destroy_cq(hca_ptr->ib_trans.ib_cq_empty);
+ channel = hca_ptr->ib_trans.ib_cq_empty->ib_cq->channel;
+ ibv_destroy_cq(hca_ptr->ib_trans.ib_cq_empty->ib_cq);
ibv_destroy_comp_channel(channel);
}
if (tp->rcq)
ibv_destroy_cq(tp->rcq);
- if (tp->rch)
+ if (tp->rch) {
+ tp->rch_fd = 0;
ibv_destroy_comp_channel(tp->rch);
+ }
if (tp->ah) {
int i;
dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ucm_create: \n");
+ /* CM service via MPXYD, no need for local IB UD CM service */
+ if (tp->scif_ep)
+ return 0;
+
/* setup CM timers and queue sizes */
tp->retries = dapl_os_get_env_val("DAPL_UCM_RETRY", DCM_RETRY_CNT);
tp->rep_time = dapl_os_get_env_val("DAPL_UCM_REP_TIME", DCM_REP_TIME);
if (!tp->rch)
goto bail;
dapls_config_comp_channel(tp->rch);
+ tp->rch_fd = tp->rch->fd;
tp->scq = ibv_create_cq(hca->ib_hca_handle, tp->cqe, hca, NULL, 0);
if (!tp->scq)
}
/* save qp_num as part of ia_address, network order */
- tp->addr.ib.qpn = htonl(tp->qp->qp_num);
+ tp->addr.qpn = htonl(tp->qp->qp_num);
return 0;
bail:
dapl_log(DAPL_DBG_TYPE_ERR,
return -1;
}
-void ucm_async_event(struct dapl_hca *hca)
+void mcm_async_event(struct dapl_hca *hca)
{
struct ibv_async_event event;
struct _ib_hca_transport *tp = &hca->ib_trans;
--- /dev/null
+/*
+ * Copyright (c) 2009 Intel Corporation. All rights reserved.
+ *
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ * available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ * copy of which is available from the Open Source Initiative, see
+ * http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_evd_util.h"
+#include "dapl_cr_util.h"
+#include "dapl_name_service.h"
+#include "dapl_ib_util.h"
+#include "dapl_ep_util.h"
+#include "dapl_osd.h"
+
+/*
+ * CM proxy services, MCM on MIC to MPXYD via SCIF
+ *
+ * NOTE: all sync MIX operations for now, TODO async?
+ *
+ * MIX_IA_OPEN
+ */
+int dapli_mix_open(ib_hca_transport_t *tp, char *name, int port)
+{
+ int ret, len;
+ dat_mix_open_t msg;
+
+ ret = scif_get_nodeIDs(NULL, 0, &tp->self.node);
+ if (ret < 0) {
+ dapl_log(1, " scif_get_nodeIDs() failed with error %d\n", errno);
+ return -1;
+ }
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," SCIF node_id: %d\n", (uint16_t)tp->self.node);
+
+#if 0 /* let run on Xeon for testing */
+ if (tp->self.node == 0) {
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," Not running on MIC, no MPXY connect required\n");
+ tp->scif_ep = 0;
+ return 0;
+ }
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," Running on MIC, MPXY connect required\n");
+#endif
+ /* MPXYD is running on node 0 and well-known OFED port */
+ tp->peer.node = 0;
+ tp->peer.port = SCIF_OFED_PORT_8;
+
+ tp->scif_ep = scif_open();
+ if (tp->scif_ep < 0) {
+ dapl_log(1, "scif_open() failed with error %d\n", errno);
+ return -1;
+ }
+ ret = scif_connect(tp->scif_ep, &tp->peer);
+ if (ret < 0) {
+ dapl_log(1, "scif_connect() OP EP failed with error %d\n", errno);
+ return -1;
+ }
+ dapl_log(DAPL_DBG_TYPE_EXTENSION,"Connected to node 0 for operations\n");
+
+ tp->scif_cm_ep = scif_open();
+ if (tp->scif_cm_ep < 0) {
+ dapl_log(1, "scif_open() for cm_ep failed with error %d\n", errno);
+ return -1;
+ }
+ ret = scif_connect(tp->scif_cm_ep, &tp->peer);
+ if (ret < 0) {
+ dapl_log(1, "scif_connect() CM EP to port %d failed with error %d\n", errno);
+ return -1;
+ }
+ dapl_log(DAPL_DBG_TYPE_EXTENSION,"Connected to node 0 for CM messages \n");
+
+ /* MIX_IA_OPEN: device name and port */
+ msg.hdr.ver = DAT_MIX_VER;
+ msg.hdr.op = MIX_IA_OPEN;
+ msg.hdr.status = 0;
+ msg.hdr.flags = MIX_OP_REQ;
+ msg.port = port;
+ strcpy((char*)&msg.name, name);
+ memcpy(&msg.dev_attr, (void*)&tp->ack_timer, sizeof(dat_mix_dev_attr_t));
+
+ len = sizeof(dat_mix_open_t);
+ ret = scif_send(tp->scif_ep, &msg, len, SCIF_SEND_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: send on %d, ret %d, exp %d\n", tp->scif_ep, ret, len);
+ }
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent open request on SCIF EP\n");
+
+ /* MIX_IA_OPEN: reply includes addr info */
+ msg.hdr.status = 1; /* make sure we update status from response */
+ ret = scif_recv(tp->scif_ep, &msg, len, SCIF_RECV_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: rcv on new_ep %d, ret %d, exp %d\n", tp->scif_ep, ret, len);
+ return -1;
+ }
+
+ if (msg.hdr.ver != DAT_MIX_VER || msg.hdr.op != MIX_IA_OPEN ||
+ msg.hdr.flags != MIX_OP_RSP || msg.hdr.status != MIX_SUCCESS) {
+ dapl_log(1, " MIX msg ver %d, op %d, flags %d, or stat %d ERR \n",
+ msg.hdr.ver, msg.hdr.op, msg.hdr.flags);
+ return -1;
+ }
+ /* save address to transport object, keeps IA queries local */
+ memcpy((void*)&tp->addr, (void*)&msg.dev_addr, sizeof(dat_mcm_addr_t));
+
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," Received valid open reply on SCIF EP\n");
+ return 0;
+}
+
+/* MIX_IA_CLOSE - no operation, just shutdown endpoint */
+void dapli_mix_close(ib_hca_transport_t *tp)
+{
+ if (tp->scif_ep)
+ scif_close(tp->scif_ep);
+
+ tp->scif_ep = 0;
+}
+
+/* MIX_LISTEN */
+int dapli_mix_listen(dp_ib_cm_handle_t cm, uint16_t sid)
+{
+ dat_mix_listen_t msg;
+ scif_epd_t mix_ep = cm->hca->ib_trans.scif_ep;
+ int ret, len;
+
+ /* listen request: sid and backlog */
+ msg.hdr.ver = DAT_MIX_VER;
+ msg.hdr.op = MIX_LISTEN;
+ msg.hdr.status = 0;
+ msg.hdr.flags = MIX_OP_REQ;
+ msg.sid = sid;
+ msg.backlog = 64;
+
+ len = sizeof(dat_mix_listen_t);
+ ret = scif_send(mix_ep, &msg, len, SCIF_SEND_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: send on %d, ret %d, exp %d\n", mix_ep, ret, len);
+ }
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %d request on SCIF EP\n", msg.hdr.op);
+
+ /* listen response */
+ ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: rcv on new_ep %d, ret %d, exp %d\n", mix_ep, ret, len);
+ return -1;
+ }
+ if (msg.hdr.ver != DAT_MIX_VER || msg.hdr.op != MIX_LISTEN ||
+ msg.hdr.flags != MIX_OP_RSP || msg.hdr.status != MIX_SUCCESS) {
+ dapl_log(1, " MIX msg ver %d, op %d, flags %d, or stat %d ERROR \n",
+ msg.hdr.ver, msg.hdr.op, msg.hdr.flags, msg.hdr.status);
+ return -1;
+ }
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," received successful reply on SCIF EP\n");
+ return 0;
+}
+
+/* MIX_LISTEN_FREE */
+int dapli_mix_listen_free(dp_ib_cm_handle_t cm)
+{
+ dat_mix_hdr_t msg;
+ scif_epd_t mix_ep = cm->hca->ib_trans.scif_ep;
+ int ret, len;
+
+ /* listen free request */
+ msg.ver = DAT_MIX_VER;
+ msg.op = MIX_LISTEN_FREE;
+ msg.status = 0;
+ msg.flags = MIX_OP_REQ;
+ msg.req_id = (uint16_t)cm->sp->conn_qual;
+
+ len = sizeof(dat_mix_hdr_t);
+ ret = scif_send(mix_ep, &msg, len, SCIF_SEND_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: send on %d, ret %d, exp %d\n", mix_ep, ret, len);
+ }
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %d request on SCIF EP\n", msg.op);
+
+ /* listen free response */
+ ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: rcv on new_ep %d, ret %d, exp %d\n", mix_ep, ret, len);
+ return -1;
+ }
+ if (msg.ver != DAT_MIX_VER || msg.op != MIX_LISTEN_FREE ||
+ msg.flags != MIX_OP_RSP || msg.status != MIX_SUCCESS) {
+ dapl_log(1, " MIX msg ver %d, op %d, flags %d, or stat %d ERR \n",
+ msg.ver, msg.op, msg.flags, msg.status);
+ return -1;
+ }
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," received successful reply on SCIF EP\n");
+ return 0;
+}
+
+/* MIX_MR_CREATE */
+int dapli_mix_mr_create(ib_hca_transport_t *tp, uint32_t id, uint32_t mr_len, uint64_t off, uint64_t ctx)
+{
+ dat_mix_mr_t msg;
+ scif_epd_t mix_ep = tp->scif_ep;
+ int ret, len;
+
+ /* request: */
+ msg.hdr.ver = DAT_MIX_VER;
+ msg.hdr.op = MIX_MR_CREATE;
+ msg.hdr.status = 0;
+ msg.hdr.flags = MIX_OP_REQ;
+ msg.mr_id = id;
+ msg.len = mr_len;
+ msg.off = off;
+ msg.ctx = ctx;
+
+ len = sizeof(dat_mix_mr_t);
+ ret = scif_send(mix_ep, &msg, len, SCIF_SEND_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: send on %d, ret %d, exp %d\n", mix_ep, ret, len);
+ }
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %d request on SCIF EP\n", msg.hdr.op);
+
+ /* response, just status */
+ len = sizeof(dat_mix_hdr_t);
+ ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: rcv on new_ep %d, ret %d, exp %d\n", mix_ep, ret, len);
+ return -1;
+ }
+ if (msg.hdr.ver != DAT_MIX_VER || msg.hdr.op != MIX_MR_CREATE ||
+ msg.hdr.flags != MIX_OP_RSP || msg.hdr.status != MIX_SUCCESS) {
+ dapl_log(1, " MIX msg ver %d, op %d, flags %d, or stat %d ERR \n",
+ msg.hdr.ver, msg.hdr.op, msg.hdr.flags, msg.hdr.status);
+ return -1;
+ }
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," received successful reply on SCIF EP\n");
+ return 0;
+}
+
+/* MIX_MR_FREE */
+int dapli_mix_mr_free(dp_ib_cm_handle_t cm, uint32_t id)
+{
+ dat_mix_mr_t msg;
+ scif_epd_t mix_ep = cm->hca->ib_trans.scif_ep;
+ int ret, len;
+
+ /* request */
+ msg.hdr.ver = DAT_MIX_VER;
+ msg.hdr.op = MIX_MR_FREE;
+ msg.hdr.status = 0;
+ msg.hdr.flags = MIX_OP_REQ;
+ msg.mr_id = id;
+
+ len = sizeof(dat_mix_mr_t);
+ ret = scif_send(mix_ep, &msg, len, SCIF_SEND_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: send on %d, ret %d, exp %d\n", mix_ep, ret, len);
+ }
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %d request on SCIF EP\n", msg.hdr.op);
+
+ /* response, status only */
+ len = sizeof(dat_mix_hdr_t);
+ ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: rcv on new_ep %d, ret %d, exp %d\n", mix_ep, ret, len);
+ return -1;
+ }
+ if (msg.hdr.ver != DAT_MIX_VER || msg.hdr.op != MIX_MR_FREE ||
+ msg.hdr.flags != MIX_OP_RSP || msg.hdr.status != MIX_SUCCESS) {
+ dapl_log(1, " MIX msg ver %d, op %d, flags %d, or stat %d ERR \n",
+ msg.hdr.ver, msg.hdr.op, msg.hdr.flags, msg.hdr.status);
+ return -1;
+ }
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," received reply on SCIF EP\n");
+ return 0;
+}
+
+
+/* MIX_QP_CREATE */
+int dapli_mix_qp_create(ib_qp_handle_t m_qp, struct ibv_qp_init_attr *attr)
+{
+ dat_mix_qp_t msg;
+ scif_epd_t mix_ep = m_qp->tp->scif_ep;
+ int ret, len;
+
+ /* request: QP_r local, QP_t shadowed */
+ msg.hdr.ver = DAT_MIX_VER;
+ msg.hdr.op = MIX_QP_CREATE;
+ msg.hdr.status = 0;
+ msg.hdr.flags = MIX_OP_REQ;
+
+ msg.qp_r.qp_type = attr->qp_type;
+ msg.qp_r.qp_num = m_qp->qp->qp_num;
+ msg.qp_r.qp_type = m_qp->qp->qp_type;
+ msg.qp_r.state = m_qp->qp->state;
+ msg.qp_r.max_recv_wr = attr->cap.max_recv_wr;
+ msg.qp_r.max_recv_sge = attr->cap.max_recv_sge;
+ msg.qp_r.rcq_id = attr->recv_cq->handle; /* ??? */
+
+ msg.qp_t.qp_type = attr->qp_type;
+ msg.qp_t.max_inline_data = attr->cap.max_inline_data;
+ msg.qp_t.max_send_wr = attr->cap.max_send_wr;
+ msg.qp_t.max_send_sge = attr->cap.max_send_sge;
+ msg.qp_t.scq_id = attr->send_cq->handle; /* ??? */
+
+ len = sizeof(dat_mix_qp_t);
+ ret = scif_send(mix_ep, &msg, len, SCIF_SEND_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: send on %d, ret %d, exp %d\n", mix_ep, ret, len);
+ }
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %d request on SCIF EP\n", msg.hdr.op);
+
+
+ /* wait for response */
+ ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: rcv on new_ep %d, ret %d, exp %d\n", mix_ep, ret, len);
+ return -1;
+ }
+ if (msg.hdr.ver != DAT_MIX_VER || msg.hdr.op != MIX_QP_CREATE ||
+ msg.hdr.flags != MIX_OP_RSP || msg.hdr.status != MIX_SUCCESS) {
+ dapl_log(1, " MIX msg ver %d, op %d, flags %d, or stat %d ERR \n",
+ msg.hdr.ver, msg.hdr.op, msg.hdr.flags, msg.hdr.status);
+ return -1;
+ }
+
+ /* save QP_t id and ctx, needed for posting WR */
+ m_qp->sqp_id = msg.qp_t.qp_id;
+ m_qp->sqp_ctx = msg.qp_t.ctx;
+
+ dapl_log(DAPL_DBG_TYPE_EXTENSION,
+ " reply on SCIF EP -> sqp_id 0x%x, ctx %p\n",
+ m_qp->sqp_id, (void*)m_qp->sqp_ctx );
+
+ return 0;
+}
+
+/* MIX_EP_FREE, fits in header */
+int dapli_mix_qp_free(ib_qp_handle_t m_qp)
+{
+ dat_mix_hdr_t msg;
+ scif_epd_t mix_ep = m_qp->tp->scif_ep;
+ int ret, len;
+
+ /* request */
+ msg.ver = DAT_MIX_VER;
+ msg.op = MIX_QP_FREE;
+ msg.status = 0;
+ msg.flags = MIX_OP_REQ;
+ msg.req_id = m_qp->sqp_id;
+
+ len = sizeof(dat_mix_hdr_t);
+ ret = scif_send(mix_ep, &msg, len, SCIF_SEND_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: send on %d, ret %d, exp %d\n", mix_ep, ret, len);
+ }
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %d request on SCIF EP\n", msg.op);
+
+ /* response */
+ ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: rcv on new_ep %d, ret %d, exp %d\n", mix_ep, ret, len);
+ return -1;
+ }
+ if (msg.ver != DAT_MIX_VER || msg.op != MIX_QP_FREE ||
+ msg.flags != MIX_OP_RSP || msg.status != MIX_SUCCESS) {
+ dapl_log(1, " MIX msg ver %d, op %d, flags %d, or stat %d ERR \n",
+ msg.ver, msg.op, msg.flags, msg.status);
+ return -1;
+ }
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," received reply on SCIF EP\n");
+ return 0;
+}
+
+/* MIX_CQ_CREATE */
+int dapli_mix_cq_create(ib_cq_handle_t m_cq)
+{
+ dat_mix_cq_t msg;
+ scif_epd_t mix_ep = m_cq->tp->scif_ep;
+ int ret, len;
+
+ /* request: QP_r local, QP_t shadowed */
+ msg.hdr.ver = DAT_MIX_VER;
+ msg.hdr.op = MIX_CQ_CREATE;
+ msg.hdr.status = 0;
+ msg.hdr.flags = MIX_OP_REQ;
+ msg.cq_len = m_cq->ib_cq->cqe;
+
+ len = sizeof(dat_mix_cq_t);
+ ret = scif_send(mix_ep, &msg, len, SCIF_SEND_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: send on %d, ret %d, exp %d\n", mix_ep, ret, len);
+ }
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %d request on SCIF EP\n", msg.hdr.op);
+
+
+ /* wait for response */
+ ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: rcv on new_ep %d, ret %d, exp %d\n", mix_ep, ret, len);
+ return -1;
+ }
+ if (msg.hdr.ver != DAT_MIX_VER || msg.hdr.op != MIX_CQ_CREATE ||
+ msg.hdr.flags != MIX_OP_RSP || msg.hdr.status != MIX_SUCCESS) {
+ dapl_log(1, " MIX msg ver %d, op %d, flags %d, or stat %d ERR \n",
+ msg.hdr.ver, msg.hdr.op, msg.hdr.flags, msg.hdr.status);
+ return -1;
+ }
+
+ /* save CQ_t id and ctx, needed for polling */
+ m_cq->cq_id = msg.cq_id;
+ m_cq->cq_ctx = msg.cq_ctx;
+
+ dapl_log(DAPL_DBG_TYPE_EXTENSION,
+ " reply on SCIF EP -> cq_id 0x%x, ctx %p\n",
+ m_cq->cq_id, (void*)m_cq->cq_ctx );
+
+ return 0;
+}
+
+/* MIX_CQ_FREE, fits in header */
+int dapli_mix_cq_free(ib_cq_handle_t m_cq)
+{
+ dat_mix_hdr_t msg;
+ scif_epd_t mix_ep = m_cq->tp->scif_ep;
+ int ret, len;
+
+ /* request */
+ msg.ver = DAT_MIX_VER;
+ msg.op = MIX_CQ_FREE;
+ msg.status = 0;
+ msg.flags = MIX_OP_REQ;
+ msg.req_id = m_cq->cq_id;
+
+ len = sizeof(dat_mix_hdr_t);
+ ret = scif_send(mix_ep, &msg, len, SCIF_SEND_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: send on %d, ret %d, exp %d\n", mix_ep, ret, len);
+ }
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %d request on SCIF EP\n", msg.op);
+
+ /* response */
+ ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: rcv on new_ep %d, ret %d, exp %d\n", mix_ep, ret, len);
+ return -1;
+ }
+ if (msg.ver != DAT_MIX_VER || msg.op != MIX_CQ_FREE ||
+ msg.flags != MIX_OP_RSP || msg.status != MIX_SUCCESS) {
+ dapl_log(1, " MIX msg ver %d, op %d, flags %d, or stat %d ERR \n",
+ msg.ver, msg.op, msg.flags, msg.status);
+ return -1;
+ }
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," received reply on SCIF EP\n");
+ return 0;
+}
+
+/* MIX_CM_REQ */
+int dapli_mix_connect(dp_ib_cm_handle_t m_cm)
+{
+ dat_mix_cm_t msg;
+ scif_epd_t mix_ep = m_cm->tp->scif_ep;
+ int ret, len;
+
+ /* request: QP_r local, QP_t shadowed */
+ msg.hdr.ver = DAT_MIX_VER;
+ msg.hdr.op = MIX_CM_REQ;
+ msg.hdr.status = 0;
+ msg.hdr.flags = MIX_OP_REQ;
+ msg.cm_ctx = (uint64_t)m_cm;
+
+
+
+ len = sizeof(dat_mix_cq_t);
+ ret = scif_send(mix_ep, &msg, len, SCIF_SEND_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: send on %d, ret %d, exp %d\n", mix_ep, ret, len);
+ }
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %d request on SCIF EP\n", msg.hdr.op);
+
+
+ /* wait for response */
+ ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: rcv on new_ep %d, ret %d, exp %d\n", mix_ep, ret, len);
+ return -1;
+ }
+ if (msg.hdr.ver != DAT_MIX_VER || msg.hdr.op != MIX_CM_REQ ||
+ msg.hdr.flags != MIX_OP_RSP || msg.hdr.status != MIX_SUCCESS) {
+ dapl_log(1, " MIX msg ver %d, op %d, flags %d, or stat %d ERR \n",
+ msg.hdr.ver, msg.hdr.op, msg.hdr.flags, msg.hdr.status);
+ return -1;
+ }
+
+ /* save CQ_t id and ctx, needed for polling */
+ m_cm->scm_id = msg.cm_id;
+ m_cm->scm_ctx = msg.cm_ctx;
+
+ dapl_log(DAPL_DBG_TYPE_EXTENSION,
+ " reply on SCIF EP -> cm_id 0x%x, ctx %p\n",
+ m_cm->scm_id, (void*)m_cm->scm_ctx );
+
+ return 0;
+}
+
+/* MIX recv, messages from MPXYD */
+int dapli_mix_recv(DAPL_HCA *hca, int scif_ep)
+{
+
+ return 0;
+}
+
+
+
+
+
+
+
+
+
+
+
+
/* disconnect events for RC's only */
if (cm_ptr->ep->param.ep_attr.service_type == DAT_SERVICE_TYPE_RC) {
dapl_os_lock(&cm_ptr->ep->header.lock);
- dapls_modify_qp_state(cm_ptr->ep->qp_handle, IBV_QPS_ERR, 0,0,0);
+ dapls_modify_qp_state(cm_ptr->ep->qp_handle->qp, IBV_QPS_ERR, 0,0,0);
dapl_os_unlock(&cm_ptr->ep->header.lock);
if (cm_ptr->ep->cr_ptr) {
dapls_cr_callback(cm_ptr,
/* REQ: QP info in msg.saddr, IA address in msg.daddr, and pdata */
cm_ptr->hca = ia_ptr->hca_ptr;
cm_ptr->msg.op = ntohs(DCM_REQ);
- cm_ptr->msg.saddr.ib.qpn = htonl(ep_ptr->qp_handle->qp_num);
- cm_ptr->msg.saddr.ib.qp_type = ep_ptr->qp_handle->qp_type;
+ cm_ptr->msg.saddr.ib.qpn = htonl(ep_ptr->qp_handle->qp->qp_num);
+ cm_ptr->msg.saddr.ib.qp_type = ep_ptr->qp_handle->qp->qp_type;
cm_ptr->msg.saddr.ib.lid = ia_ptr->hca_ptr->ib_trans.lid;
dapl_os_memcpy(&cm_ptr->msg.saddr.ib.gid[0],
&ia_ptr->hca_ptr->ib_trans.gid, 16);
/* modify QP to RTR and then to RTS with remote info */
dapl_os_lock(&ep_ptr->header.lock);
- if (dapls_modify_qp_state(ep_ptr->qp_handle,
+ if (dapls_modify_qp_state(ep_ptr->qp_handle->qp,
IBV_QPS_RTR,
cm_ptr->msg.saddr.ib.qpn,
cm_ptr->msg.saddr.ib.lid,
(ib_gid_handle_t)cm_ptr->msg.saddr.ib.gid) != DAT_SUCCESS) {
dapl_log(DAPL_DBG_TYPE_ERR,
" CONN_RTU: QPS_RTR ERR %s (%d,%d,%x,%x,%x) -> %s %x\n",
- strerror(errno), ep_ptr->qp_handle->qp_type,
- ep_ptr->qp_state, ep_ptr->qp_handle->qp_num,
+ strerror(errno), ep_ptr->qp_handle->qp->qp_type,
+ ep_ptr->qp_state, ep_ptr->qp_handle->qp->qp_num,
ntohl(cm_ptr->msg.saddr.ib.qpn),
ntohs(cm_ptr->msg.saddr.ib.lid),
inet_ntoa(((struct sockaddr_in *)
dapl_os_unlock(&ep_ptr->header.lock);
goto bail;
}
- if (dapls_modify_qp_state(ep_ptr->qp_handle,
+ if (dapls_modify_qp_state(ep_ptr->qp_handle->qp,
IBV_QPS_RTS,
cm_ptr->msg.saddr.ib.qpn,
cm_ptr->msg.saddr.ib.lid,
NULL) != DAT_SUCCESS) {
dapl_log(DAPL_DBG_TYPE_ERR,
" CONN_RTU: QPS_RTS ERR %s (%d,%d,%x,%x,%x) -> %s %x\n",
- strerror(errno), ep_ptr->qp_handle->qp_type,
- ep_ptr->qp_state, ep_ptr->qp_handle->qp_num,
+ strerror(errno), ep_ptr->qp_handle->qp->qp_type,
+ ep_ptr->qp_state, ep_ptr->qp_handle->qp->qp_num,
ntohl(cm_ptr->msg.saddr.ib.qpn),
ntohs(cm_ptr->msg.saddr.ib.lid),
inet_ntoa(((struct sockaddr_in *)
if (event == IB_CME_CONNECTED) {
cm_ptr->ah = dapls_create_ah(cm_ptr->hca, pd_handle,
- ep_ptr->qp_handle,
+ ep_ptr->qp_handle->qp,
cm_ptr->msg.saddr.ib.lid,
NULL);
if (cm_ptr->ah) {
#ifdef DAT_EXTENSIONS
if (cm_ptr->msg.saddr.ib.qp_type == IBV_QPT_UD &&
- ep_ptr->qp_handle->qp_type != IBV_QPT_UD) {
+ ep_ptr->qp_handle->qp->qp_type != IBV_QPT_UD) {
dapl_log(DAPL_DBG_TYPE_ERR,
" ACCEPT_USR: ERR remote QP is UD,"
", but local QP is not\n");
/* modify QP to RTR and then to RTS with remote info already read */
dapl_os_lock(&ep_ptr->header.lock);
- if (dapls_modify_qp_state(ep_ptr->qp_handle,
+ if (dapls_modify_qp_state(ep_ptr->qp_handle->qp,
IBV_QPS_RTR,
cm_ptr->msg.saddr.ib.qpn,
cm_ptr->msg.saddr.ib.lid,
dapl_os_unlock(&ep_ptr->header.lock);
goto bail;
}
- if (dapls_modify_qp_state(ep_ptr->qp_handle,
+ if (dapls_modify_qp_state(ep_ptr->qp_handle->qp,
IBV_QPS_RTS,
cm_ptr->msg.saddr.ib.qpn,
cm_ptr->msg.saddr.ib.lid,
local.ver = htons(DCM_VER);
local.op = htons(DCM_REP);
local.rd_in = ep_ptr->param.ep_attr.max_rdma_read_in;
- local.saddr.ib.qpn = htonl(ep_ptr->qp_handle->qp_num);
- local.saddr.ib.qp_type = ep_ptr->qp_handle->qp_type;
+ local.saddr.ib.qpn = htonl(ep_ptr->qp_handle->qp->qp_num);
+ local.saddr.ib.qp_type = ep_ptr->qp_handle->qp->qp_type;
local.saddr.ib.lid = ia_ptr->hca_ptr->ib_trans.lid;
dapl_os_memcpy(&local.saddr.ib.gid[0],
&ia_ptr->hca_ptr->ib_trans.gid, 16);
ntohl(local.saddr.ib.qpn), ntohs(local.p_size));
dapl_dbg_log(DAPL_DBG_TYPE_CM,
" ACCEPT_USR: local GID subnet %016llx id %016llx\n",
- (unsigned long long)
- htonll(*(uint64_t*)&local.saddr.ib.gid[0]),
- (unsigned long long)
- htonll(*(uint64_t*)&local.saddr.ib.gid[8]));
+ (unsigned long long)htonll(*(uint64_t*)&local.saddr.ib.gid[0]),
+ (unsigned long long)htonll(*(uint64_t*)&local.saddr.ib.gid[8]));
dapl_dbg_log(DAPL_DBG_TYPE_EP, " PASSIVE: accepted!\n");
if (event == IB_CME_CONNECTED) {
cm_ptr->ah = dapls_create_ah(cm_ptr->hca, pd_handle,
- cm_ptr->ep->qp_handle,
+ cm_ptr->ep->qp_handle->qp,
cm_ptr->msg.saddr.ib.lid,
NULL);
if (cm_ptr->ah) {
if (hca_ptr->ib_trans.ib_cq_empty) {
struct ibv_comp_channel *channel;
- channel = hca_ptr->ib_trans.ib_cq_empty->channel;
- ibv_destroy_cq(hca_ptr->ib_trans.ib_cq_empty);
+ channel = hca_ptr->ib_trans.ib_cq_empty->ib_cq->channel;
+ ibv_destroy_cq(hca_ptr->ib_trans.ib_cq_empty->ib_cq);
ibv_destroy_comp_channel(channel);
}
/* IB info in network order */
cm->msg.sqpn = htonl(hca->ib_trans.qp->qp_num); /* ucm */
- cm->msg.saddr.ib.qpn = htonl(ep->qp_handle->qp_num); /* ep */
- cm->msg.saddr.ib.qp_type = ep->qp_handle->qp_type;
+ cm->msg.saddr.ib.qpn = htonl(ep->qp_handle->qp->qp_num); /* ep */
+ cm->msg.saddr.ib.qp_type = ep->qp_handle->qp->qp_type;
cm->msg.saddr.ib.lid = hca->ib_trans.addr.ib.lid;
dapl_os_memcpy(&cm->msg.saddr.ib.gid[0],
&hca->ib_trans.addr.ib.gid, 16);
switch (cm->state) {
case DCM_CONNECTED:
/* CONSUMER: move to err state to flush, if not UD */
- if (cm->ep->qp_handle->qp_type != IBV_QPT_UD)
- dapls_modify_qp_state(cm->ep->qp_handle, IBV_QPS_ERR,0,0,0);
+ if (cm->ep->qp_handle->qp->qp_type != IBV_QPT_UD)
+ dapls_modify_qp_state(cm->ep->qp_handle->qp, IBV_QPS_ERR,0,0,0);
/* send DREQ, event after DREP or DREQ timeout */
cm->state = DCM_DISC_PENDING;
break;
case DCM_DISC_RECV:
/* CM_THREAD: move to err state to flush, if not UD */
- if (cm->ep->qp_handle->qp_type != IBV_QPT_UD)
- dapls_modify_qp_state(cm->ep->qp_handle, IBV_QPS_ERR,0,0,0);
+ if (cm->ep->qp_handle->qp->qp_type != IBV_QPT_UD)
+ dapls_modify_qp_state(cm->ep->qp_handle->qp, IBV_QPS_ERR,0,0,0);
/* DREQ received, send DREP and schedule event, finalize */
cm->msg.op = htons(DCM_DREP);
}
dapl_os_unlock(&cm->lock);
DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)),
- ep->qp_handle->qp_type == IBV_QPT_UD ? DCNT_IA_CM_AH_REQ_TX : DCNT_IA_CM_REQ_TX);
+ ep->qp_handle->qp->qp_type == IBV_QPT_UD ? DCNT_IA_CM_AH_REQ_TX : DCNT_IA_CM_REQ_TX);
return DAT_SUCCESS;
/* modify QP to RTR and then to RTS with remote info */
dapl_os_lock(&cm->ep->header.lock);
- if (dapls_modify_qp_state(cm->ep->qp_handle,
+ if (dapls_modify_qp_state(cm->ep->qp_handle->qp,
IBV_QPS_RTR,
cm->msg.daddr.ib.qpn,
cm->msg.daddr.ib.lid,
event = IB_CME_LOCAL_FAILURE;
goto bail;
}
- if (dapls_modify_qp_state(cm->ep->qp_handle,
+ if (dapls_modify_qp_state(cm->ep->qp_handle->qp,
IBV_QPS_RTS,
cm->msg.daddr.ib.qpn,
cm->msg.daddr.ib.lid,
xevent.type = DAT_IB_UD_REMOTE_AH;
xevent.remote_ah.qpn = ntohl(cm->msg.daddr.ib.qpn);
xevent.remote_ah.ah = dapls_create_ah(cm->hca,
- cm->ep->qp_handle->pd,
- cm->ep->qp_handle,
+ cm->ep->qp_handle->qp->pd,
+ cm->ep->qp_handle->qp,
htons(lid),
NULL);
if (xevent.remote_ah.ah == NULL) {
xevent.type = DAT_IB_UD_PASSIVE_REMOTE_AH;
xevent.remote_ah.qpn = ntohl(cm->msg.daddr.ib.qpn);
xevent.remote_ah.ah = dapls_create_ah(cm->hca,
- cm->ep->qp_handle->pd,
- cm->ep->qp_handle,
+ cm->ep->qp_handle->qp->pd,
+ cm->ep->qp_handle->qp,
htons(lid),
NULL);
if (xevent.remote_ah.ah == NULL) {
sizeof(union dcm_addr));
/* remote ia_addr reference includes ucm qpn, not IB qpn */
- ((union dcm_addr*)
- &xevent.remote_ah.ia_addr)->ib.qpn = cm->msg.dqpn;
+ ((union dcm_addr*)&xevent.remote_ah.ia_addr)->ib.qpn = cm->msg.dqpn;
dapl_dbg_log(DAPL_DBG_TYPE_EP,
" PASSIVE: UD xevent ah %p qpn %x lid %x\n",
cm->ref_count,
htons(cm->hca->ib_trans.addr.ib.lid),
htons(cm->msg.sport),
- htonl(ep->qp_handle->qp_num),
+ htonl(ep->qp_handle->qp->qp_num),
htons(cm->msg.daddr.ib.lid),
htons(cm->msg.dport),
htonl(cm->msg.daddr.ib.qpn),
#ifdef DAT_EXTENSIONS
if (cm->msg.daddr.ib.qp_type == IBV_QPT_UD &&
- ep->qp_handle->qp_type != IBV_QPT_UD) {
+ ep->qp_handle->qp->qp_type != IBV_QPT_UD) {
dapl_log(DAPL_DBG_TYPE_ERR,
" ACCEPT_USR: ERR remote QP is UD,"
", but local QP is not\n");
/* modify QP to RTR and then to RTS with remote info already read */
dapl_os_lock(&ep->header.lock);
- if (dapls_modify_qp_state(ep->qp_handle,
+ if (dapls_modify_qp_state(ep->qp_handle->qp,
IBV_QPS_RTR,
cm->msg.daddr.ib.qpn,
cm->msg.daddr.ib.lid,
dapl_os_unlock(&ep->header.lock);
goto bail;
}
- if (dapls_modify_qp_state(ep->qp_handle,
+ if (dapls_modify_qp_state(ep->qp_handle->qp,
IBV_QPS_RTS,
cm->msg.daddr.ib.qpn,
cm->msg.daddr.ib.lid,
/* setup local QP info and type from EP, copy pdata, for reply */
cm->msg.op = htons(DCM_REP);
cm->msg.rd_in = ep->param.ep_attr.max_rdma_read_in;
- cm->msg.saddr.ib.qpn = htonl(ep->qp_handle->qp_num);
- cm->msg.saddr.ib.qp_type = ep->qp_handle->qp_type;
+ cm->msg.saddr.ib.qpn = htonl(ep->qp_handle->qp->qp_num);
+ cm->msg.saddr.ib.qp_type = ep->qp_handle->qp->qp_type;
cm->msg.saddr.ib.lid = cm->hca->ib_trans.addr.ib.lid;
dapl_os_memcpy(&cm->msg.saddr.ib.gid[0],
&cm->hca->ib_trans.addr.ib.gid, 16);
#include "dapl_osd.h"
#include <stdlib.h>
+#include <arpa/inet.h>
#ifdef DAT_IB_COLLECTIVES
#include <collectives/ib_collectives.h>
struct ibv_port_attr port_attr;
int i;
DAT_RETURN dat_status;
+ char gid_str[INET6_ADDRSTRLEN];
/* Get list of all IB devices, find match, open */
dev_list = ibv_get_device_list(NULL);
hca_ptr->port_num,
inet_ntoa(((struct sockaddr_in *)
&hca_ptr->hca_address)->sin_addr));
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " open_hca: QPN 0x%x LID 0x%x GID Subnet 0x" F64x ""
- " ID 0x" F64x "\n",
+ dapl_log(DAPL_DBG_TYPE_UTIL,
+ " open_hca: QPN 0x%x LID 0x%x GID %s\n",
ntohl(hca_ptr->ib_trans.addr.ib.qpn),
ntohs(hca_ptr->ib_trans.addr.ib.lid),
- (unsigned long long)
- ntohll(*(uint64_t*)&hca_ptr->ib_trans.addr.ib.gid[0]),
- (unsigned long long)
- ntohll(*(uint64_t*)&hca_ptr->ib_trans.addr.ib.gid[8]));
+ inet_ntop(AF_INET6, hca_ptr->ib_trans.addr.ib.gid,
+ gid_str, sizeof(gid_str)));
/* save LID, GID, QPN, PORT address information, for ia_queries */
/* Set AF_INET6 to insure callee address storage of 28 bytes */
if (hca_ptr->ib_trans.ib_cq_empty) {
struct ibv_comp_channel *channel;
- channel = hca_ptr->ib_trans.ib_cq_empty->channel;
- ibv_destroy_cq(hca_ptr->ib_trans.ib_cq_empty);
+ channel = hca_ptr->ib_trans.ib_cq_empty->ib_cq->channel;
+ ibv_destroy_cq(hca_ptr->ib_trans.ib_cq_empty->ib_cq);
ibv_destroy_comp_channel(channel);
}
switch (event.event_type) {
case IBV_EVENT_CQ_ERR:
{
- struct dapl_ep *evd_ptr =
+ struct dapl_evd *evd_ptr =
event.element.cq->cq_context;
dapl_log(DAPL_DBG_TYPE_ERR,
/* report up if async callback still setup */
if (tp->async_cq_error)
tp->async_cq_error(hca->ib_hca_handle,
- event.element.cq,
+ evd_ptr->ib_cq_handle,
&event, (void *)evd_ptr);
break;
}
#include <getopt.h>
#include <fcntl.h>
#include <scif.h>
+#include <infiniband/verbs.h>
#include "dat2/udat.h"
#include "dat2/dat_mic_extensions.h"
+#define min(a, b) ((a < b) ? (a) : (b))
+#define max(a, b) ((a > b) ? (a) : (b))
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define htonll(x) (x)
+#define ntohll(x) (x)
+#elif __BYTE_ORDER == __LITTLE_ENDIAN
+#define htonll(x) bswap_64(x)
+#define ntohll(x) bswap_64(x)
+#endif
+
/*
* Service options - set through mpxyd.conf file.
*/
static char log_file[128] = "stdout";
static int log_level = 0;
static char lock_file[128] = "/var/run/mpxyd.pid";
-static char scif_dev[32] = "scif";
-static short scif_sport = SCIF_OFED_PORT_7;
-static scif_epd_t scif_ep;
+static short scif_sport = SCIF_OFED_PORT_8;
+static scif_epd_t scif_listen_ep;
static struct scif_portID scif_id;
/* scif-rdma cmd and data channel parameters */
-static int mix_buffer_mb = 64;
+static int mix_buffer_mb = 4;
static int mix_buffer_sg = 128 * 1024;
static int mix_cmd_depth = 50;
static int mix_cmd_size = 256;
static int mcm_depth = 500;
static int mcm_size = 256;
static int mcm_signal = 100;
-static int mcm_retry_cnt = 10;
+static int mcm_retry = 10;
static int mcm_rep_ms = 800;
static int mcm_rtu_ms = 400;
static FILE *logfile;
-static pthread_t mpxy_thread;
static pthread_mutex_t flock;
-/* lists, fds, etc */
-static struct llist_entry
+/* lists, fds, etc., include tid for lists */
+typedef struct _llist_entry
{
- struct llist_entry *next;
- struct llist_entry *prev;
+ struct _llist_entry *next;
+ struct _llist_entry *prev;
+ struct _llist_entry *head;
void *data;
-};
+ uint32_t tid;
+
+} LLIST_ENTRY;
+#define MCM_PORT_SPACE 0xffff
#define MCM_FD_SETSIZE 1024
struct mcm_fd_set {
int index;
};
/* IB verbs device lists */
-static struct ibv_device **iblist;
-static struct llist_entry mcm_llist;
+static LLIST_ENTRY mcm_list;
static pthread_mutex_t mcm_llock;
+typedef enum mcm_state
+{
+ MCM_INIT,
+ MCM_LISTEN,
+ MCM_CONN_PENDING,
+ MCM_REP_PENDING,
+ MCM_ACCEPTING,
+ MCM_ACCEPTING_DATA,
+ MCM_ACCEPTED,
+ MCM_REJECTING,
+ MCM_REJECTED,
+ MCM_CONNECTED,
+ MCM_RELEASE,
+ MCM_DISC_PENDING,
+ MCM_DISCONNECTED,
+ MCM_DESTROY,
+ MCM_RTU_PENDING,
+ MCM_DISC_RECV,
+ MCM_FREE,
+
+} MCM_STATE;
+
/* Support for IB devices - One service per device: UD QP for fabric CM services */
-static struct mcm_ib_dev {
- DLIST_ENTRY entry;
- DLIST_ENTRY mix_list; /* MIC client open instances */
- pthread_mutex_t mix_lock;
+typedef struct mcm_ib_dev {
+ LLIST_ENTRY entry;
+ LLIST_ENTRY smd_list; /* MIC client open instances */
+ pthread_mutex_t slock; /* SCIF client device lock */
+ pthread_mutex_t plock; /* port space lock */
/* MCM - IB Device Resources */
- ibv_context *ib_dev;
- uint16_t port;
+ struct ibv_device *ibdev;
+ struct ibv_context *ibctx;
+ int ref_count;
+ char name[IBV_SYSFS_NAME_MAX];
+ uint16_t port; /* IB device port */
struct ibv_pd *pd;
struct ibv_cq *scq;
struct ibv_cq *rcq;
struct ibv_qp *qp;
struct ibv_mr *mr_rbuf;
struct ibv_mr *mr_sbuf;
- ib_cm_msg_t *sbuf;
- ib_cm_msg_t *rbuf;
struct ibv_comp_channel *rch;
struct ibv_ah **ah;
- union dat_mcm_addr addr;
+ dat_mcm_msg_t *sbuf;
+ dat_mcm_msg_t *rbuf;
+ uint64_t *ports; /* SCIF device open clients, cm_id*/
+ dat_mcm_addr_t addr;
uint16_t lid;
- uint8_t sl;
- uint16_t pkey;
- int pkey_idx;
-};
-
-/* per MIC MCM client open, SCIF device: TODO share message resources across clients? */
-static struct mcm_scif_dev {
- struct list_entry entry;
+ dat_mix_dev_attr_t dev_attr; /* provided with mix_open */
+ int s_hd;
+ int s_tl;
+ int cqe;
+ int qpe;
+ int signal;
+ int retries;
+ int cm_timer;
+ int rep_time;
+ int rtu_time;
+
+} mcm_ib_dev_t;
+
+/* DAPL MCM QP object, id in entry */
+typedef struct mcm_qp {
+ LLIST_ENTRY entry;
+ struct mcm_scif_dev *smd;
+ struct mcm_cm *cm;
+ struct ibv_qp *ib_qp;
+ dat_mix_qp_attr_t qp_t;
+ dat_mix_qp_attr_t qp_r;
+
+} mcm_qp_t;
+
+/* DAPL MCM CQ object, id in entry */
+typedef struct mcm_cq {
+ LLIST_ENTRY entry;
+ struct mcm_scif_dev *smd;
+ struct ibv_cq *ib_cq;
+ struct ibv_comp_channel *ib_ch;
+ uint32_t cq_len;
+
+} mcm_cq_t;
+
+/* DAPL MCM MR object, id in entry */
+typedef struct mcm_mr {
+ LLIST_ENTRY entry;
+ struct mcm_scif_dev *smd;
+ uint32_t len;
+ uint32_t ib_lkey;
+ uint32_t ib_rkey;
+ off_t scif_off;
+
+} mcm_mr_t;
+
+/* DAPL MCM Connection/Listen object */
+typedef struct mcm_cm {
+ LLIST_ENTRY entry;
pthread_mutex_t lock;
- struct mcm_ib_dev *mcm_dev;
- scif_epd_t ep;
- struct scif_portID peer;
- off_t r_address;
- off_t r_offset;
- int r_len;
- dat_mix_msg_t *sbuf;
- dat_mix_msg_t *rbuf;
-};
+ struct mcm_ib_dev *md; /* mcm_ib_dev parent reference */
+ struct mcm_scif_dev *smd; /* mcm_scif_dev parent reference */
+ struct mcm_cm *l_ep; /* listen reference, passive */
+ uint16_t sid; /* service ID for endpoint */
+ uint64_t timer;
+ int ref_count;
+ int state;
+ int retries;
+ struct ibv_comp_channel *ib_ch;
+ struct ibv_pd *pd;
+ struct ibv_cq *scq;
+ struct ibv_cq *rcq;
+ struct mcm_qp *m_qp; /* pair of QP's, qp_t and qp_r */
+ uint16_t p_size; /* accept p_data, for retries */
+ uint8_t p_data[DAT_MCM_PDATA_SIZE];
+ struct dat_mcm_msg msg;
+
+} mcm_cm_t;
+
+/* per MIC MCM client open, SCIF device object:
+ *
+ * TODO share message resources across clients?
+ * or maybe NOT share IB device, create new thread with each SCIF client?
+ */
+typedef struct mcm_scif_dev {
+ LLIST_ENTRY entry;
+ LLIST_ENTRY clist; /* LISTS: cm list */
+ LLIST_ENTRY llist; /* listen list */
+ LLIST_ENTRY qplist; /* qp list */
+ LLIST_ENTRY cqlist; /* cq list */
+ LLIST_ENTRY mrlist; /* mr list */
+ pthread_mutex_t clock; /* LOCKS: cm lock */
+ pthread_mutex_t llock; /* listen lock */
+ pthread_mutex_t plock; /* port space lock */
+ pthread_mutex_t qplock; /* qp lock */
+ pthread_mutex_t cqlock; /* cq lock */
+ pthread_mutex_t mrlock; /* mr lock */
+ int ref_count; /* references */
+ struct mcm_ib_dev *md; /* mcm_ib_dev, parent */
+ uint16_t cm_id; /* port ID MIC client, md->ports */
+ uint64_t *ports; /* EP port space MIC client */
+ scif_epd_t scif_ep; /* SCIF EP, MIX device operations */
+ scif_epd_t scif_cm_ep; /* SCIF CM EP, MIX device CM messages */
+ struct scif_portID peer; /* SCIF EP peer, MIC adapter */
+ struct scif_portID peer_cm; /* SCIF CM EP peer, MIC adapter */
+ char *m_buf; /* MIC proxy buffer, SCIF and IB */
+ struct ibv_mr *m_mr; /* ib registration */
+ off_t m_offset; /* SCIF registration */
+ int m_len; /* buffer size */
+ int m_seg; /* segment size */
+
+} mcm_scif_dev_t;
#define mlog(level, format, ...) \
mpxy_write(level, "%s: "format, __func__, ## __VA_ARGS__)
pthread_mutex_unlock(&flock);
va_end(args);
}
-/* link list helper resources */
-static void init_list(struct llist_entry *head)
+
+/* link list helper resources */
+static void init_list(LLIST_ENTRY *head)
{
head->next = head;
head->prev = head;
head->data = NULL;
+ head->tid = 0;
}
-static int list_empty(struct llist_entry *head)
+static int list_empty(LLIST_ENTRY *head)
{
return head->next == head;
}
-static void *get_head_entry(struct llist_entry *head)
+static void *get_head_entry(LLIST_ENTRY *head)
{
if (list_empty(head))
return NULL;
else
- return head->data;
+ return head->next->data;
}
-static void *get_next_entry(struct llist_entry *entry, struct lllist_entry *head)
+static void *get_next_entry(LLIST_ENTRY *entry, LLIST_ENTRY *head)
{
if (entry->next == head)
return NULL;
else
- return entry->data;
+ return entry->next->data;
}
-static void insert_head(struct llist_entry *entry, struct llist_entry *head, void *data)
+static void insert_head(LLIST_ENTRY *entry, LLIST_ENTRY *head, void *data)
{
+ head->tid++; /* each insertion gets unique ID */
+ entry->tid = head->tid;
entry->next = head->next;
entry->prev = head;
entry->data = data;
- head->next->Prev = entry;
- head->next = entry;
+ head->next->prev = entry;
+ head->next = entry;
}
-static void insert_tail(struct llist_entry *entry, struct llist_entry *head, void *data)
+static void insert_tail(LLIST_ENTRY *entry, LLIST_ENTRY *head, void *data)
{
- insert_head(entry, head->prev, data);
+ head->tid++; /* each insertion gets unique ID */
+ entry->tid = head->tid;
+ entry->data = data;
+ entry->next = head->prev->next;
+ entry->prev = head->prev;
+ head->prev->next = entry;
+ head->prev = entry;
+
}
-static void remove_entry(struct llist_entry *entry)
+static void remove_entry(LLIST_ENTRY *entry)
{
- entry->prev->next = entry->Next;
- entry->next->prev = entry->Prev;
+ entry->prev->next = entry->next;
+ entry->next->prev = entry->prev;
entry->data = NULL;
+ entry->tid = 0;
}
/* FD helper resources */
set->index = 0;
}
-static int mcm_fd_set(int fd, struct dapl_fd_set *set,
- enum DAPL_FD_EVENTS event)
+static int mcm_fd_set(int fd, struct mcm_fd_set *set, int event)
{
if (set->index == MCM_FD_SETSIZE - 1) {
mlog(0," mcm exceeded FD_SETSIZE %d\n", set->index + 1);
struct pollfd fds;
int ret;
- fds.fd = s;
+ fds.fd = fd;
fds.events = event;
fds.revents = 0;
ret = poll(&fds, 1, 0);
- mlog(0, " poll: fd=%d ret=%d, event=0x%x\n", s, ret, fds.revents);
+ mlog(0, " poll: fd=%d ret=%d, event=0x%x\n", fd, ret, fds.revents);
if (ret == 0)
return 0;
else if (fds.revents & (POLLERR | POLLHUP | POLLNVAL))
mlog(1, " select: sleep, fds=%d\n", set->index);
ret = poll(set->set, set->index, time_ms);
mlog(1, " select: wakeup, ret=0x%x\n", ret);
+
return ret;
}
+/* MCM 16-bit port space */
+static uint16_t mcm_get_port(uint64_t *p_port, uint16_t port, uint64_t ctx)
+{
+ int i = 0;
+
+ /* get specific port */
+ if (port) {
+ if (p_port[port] == 0) {
+ p_port[port] = ctx;
+ i = port;
+ }
+ goto done;
+ }
+
+ /* get first free port */
+ for (i = MCM_PORT_SPACE; i > 0; i--) {
+ if (p_port[i] == 0) {
+ p_port[i] = ctx;
+ break;
+ }
+ }
+done:
+ return i;
+}
+
+static void mcm_free_port(uint64_t *p_port, uint16_t port)
+{
+ p_port[port] = 0;
+}
+
+static uint64_t mcm_get_port_ctx(uint64_t *p_port, uint16_t port)
+{
+ return p_port[port];
+}
+
+/* operation, state strings */
+static char * mcm_op_str(IN int op)
+{
+ static char *ops[] = {
+ "INVALID",
+ "REQ",
+ "REP",
+ "REJ_USER",
+ "REJ_CM",
+ "RTU",
+ "DREQ",
+ "DREP",
+ };
+ return ((op < 1 || op > 7) ? "Invalid OP?" : ops[op]);
+}
+
+static char * mcm_state_str(IN int st)
+{
+ static char *state[] = {
+ "CM_INIT",
+ "CM_LISTEN",
+ "CM_CONN_PENDING",
+ "CM_REP_PENDING",
+ "CM_ACCEPTING",
+ "CM_ACCEPTING_DATA",
+ "CM_ACCEPTED",
+ "CM_REJECTING",
+ "CM_REJECTED",
+ "CM_CONNECTED",
+ "CM_RELEASE",
+ "CM_DISC_PENDING",
+ "CM_DISCONNECTED",
+ "CM_DESTROY",
+ "CM_RTU_PENDING",
+ "CM_DISC_RECV",
+ "CM_FREE"
+ };
+ return ((st < 0 || st > 16) ? "Invalid CM state?" : state[st]);
+}
+
+
static FILE *mpxy_open_log(void)
{
FILE *f;
log_level = atoi(value);
else if (!strcasecmp("lock_file", opt))
strcpy(lock_file, value);
- else if (!strcasecmp("rdma_buffer_kb", opt))
- rdma_buffer_size = atoi(value);
- else if (!strcasecmp("cm_msg_depth", opt))
- cm_msg_depth = atoi(value);
+ else if (!strcasecmp("buffer_pool_mb", opt))
+ mix_buffer_mb = atoi(value);
+ else if (!strcasecmp("mcm_depth", opt))
+ mcm_depth = atoi(value);
else if (!strcasecmp("scif_port_id", opt))
scif_sport = (short) atoi(value);
- else if (!strcasecmp("tx_depth", opt))
- tx_depth = atoi(value);
- else if (!strcasecmp("tx_signal_rate", opt))
- tx_signal = atoi(value);
+ else if (!strcasecmp("mcm_signal_rate", opt))
+ mcm_signal = atoi(value);
}
fclose(f);
mlog(0, "log level %d\n", log_level);
mlog(0, "lock file %s\n", lock_file);
mlog(0, "SCIF server_port %d\n", scif_sport);
- mlog(0, "rdma buffer pool size %d\n", buffer_pool_mb);
- mlog(0, "transmit queue depth %d\n", tx_depth);
- mlog(0, "transmit completion signal rate %d\n", tx_signal);
- mlog(0, "uDAPL provider/device - %s\n", dapl_dev);
+ mlog(0, "rdma buffer pool size %d\n", mix_buffer_mb);
+ mlog(0, "mcm msg queue depth %d\n", mcm_depth);
+ mlog(0, "mcm msg completion signal rate %d\n", mcm_signal);
}
static int mpxy_open_lock_file(void)
return -1;
}
- scif_ep = scif_open();
- if (scif_ep < 0) {
+ scif_listen_ep = scif_open();
+ if (scif_listen_ep < 0) {
mlog(0, "scif_open() failed with error %d\n", errno);
return -1;
}
- mlog(1,"Opened SCIF endpoint for listening\n");
+ mlog(1,"Opened SCIF endpoint for OPERATIONS listening, ep = %d\n", scif_listen_ep);
- ret = scif_bind(scif_ep, scif_sport);
+ ret = scif_bind(scif_listen_ep, scif_sport);
if (ret < 0) {
- fprintf(stderr, "scif_bind() failed with error %d\n", errno);
- scif_close(scif_ep);
+ mlog(0, "scif_bind() to %d failed with error %s\n", scif_sport, strerror(errno));
+ scif_close(scif_listen_ep);
return -1;
}
- scif_id.port = ret;
- mlog(1,"Bind to reserved SCIF OFED port %d\n", (uint16_t)scif_id.port);
- ret = scif_listen(scif_ep, 5);
+ scif_id.port = ret;
+ mlog(1,"Bound to reserved SCIF OFED port %d\n", (uint16_t)scif_id.port);
+ ret = scif_listen(scif_listen_ep, 5);
if (ret < 0) {
mlog(0, "scif_listen() failed with error %d\n", errno);
- scif_close(scif_ep);
+ scif_close(scif_listen_ep);
return -1;
}
static void close_scif()
{
- scif_close(scif_ep);
+ scif_close(scif_listen_ep);
+}
+
+static void close_ib()
+{
+ /* any cleanup ??, server thread should do the work */
+ return;
}
static int config_fd(int fd)
return 0;
}
+/* Create address handle for remote QP, info in network order */
+static struct ibv_ah *mcm_create_ah(mcm_ib_dev_t *md,
+ struct ibv_pd *pd,
+ struct ibv_qp *qp,
+ uint16_t lid,
+ union ibv_gid *gid)
+{
+ struct ibv_qp_attr qp_attr;
+ struct ibv_ah *ah;
+
+ memset((void *)&qp_attr, 0, sizeof(qp_attr));
+ qp_attr.qp_state = IBV_QP_STATE;
+ qp_attr.ah_attr.dlid = lid;
+ if (gid != NULL) {
+ mlog(2, "create_ah: with GID\n");
+ qp_attr.ah_attr.is_global = 1;
+ qp_attr.ah_attr.grh.dgid.global.subnet_prefix =
+ ntohll(gid->global.subnet_prefix);
+ qp_attr.ah_attr.grh.dgid.global.interface_id =
+ ntohll(gid->global.interface_id);
+ qp_attr.ah_attr.grh.hop_limit = md->dev_attr.hop_limit;
+ qp_attr.ah_attr.grh.traffic_class = md->dev_attr.tclass;
+ }
+ qp_attr.ah_attr.sl = md->dev_attr.sl;
+ qp_attr.ah_attr.src_path_bits = 0;
+ qp_attr.ah_attr.port_num = md->port;
+
+ mlog(2, "create_ah: port %x lid %x pd %p ctx %p handle 0x%x\n",
+ md->port, qp_attr.ah_attr.dlid, pd, pd->context, pd->handle);
+
+ /* UD: create AH for remote side */
+ ah = ibv_create_ah(pd, &qp_attr.ah_attr);
+ if (!ah) {
+ mlog(0, " create_ah: ERR %s\n", strerror(errno));
+ return NULL;
+ }
+
+ mlog(2, "create_ah: AH %p for lid %x\n", ah, qp_attr.ah_attr.dlid);
+ return ah;
+}
+
/* Modify UD-QP from init, rtr, rts, info network order */
-static int modify_ud_qp(struct mcm_dev md, struct ibv_qp qp)
+static int modify_ud_qp(mcm_ib_dev_t *md, struct ibv_qp *qp)
{
struct ibv_qp_attr qp_attr;
/* modify QP, setup and prepost buffers */
- dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
+ memset((void *)&qp_attr, 0, sizeof(qp_attr));
qp_attr.qp_state = IBV_QPS_INIT;
- qp_attr.pkey_index = md->pkey_idx;
+ qp_attr.pkey_index = md->dev_attr.pkey_idx;
qp_attr.port_num = md->port;
- qp_attr.qkey = DAT_UD_QKEY;
+ qp_attr.qkey = DAT_MCM_UD_QKEY;
if (ibv_modify_qp(qp, &qp_attr,
IBV_QP_STATE |
IBV_QP_PKEY_INDEX |
mlog(0, " modify_ud_qp INIT: ERR %s\n", strerror(errno));
return 1;
}
- dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
+ memset((void *)&qp_attr, 0, sizeof(qp_attr));
qp_attr.qp_state = IBV_QPS_RTR;
- if (ibv_modify_qp(qp, &qp_attr,IBV_QP_STATE)) {
+ if (ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE)) {
mlog(0, " modify_ud_qp RTR: ERR %s\n", strerror(errno));
return 1;
}
- dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
+ memset((void *)&qp_attr, 0, sizeof(qp_attr));
qp_attr.qp_state = IBV_QPS_RTS;
qp_attr.sq_psn = 1;
if (ibv_modify_qp(qp, &qp_attr,
return 0;
}
-static int init_ib()
+static int modify_qp(struct ibv_qp *qp_handle,
+ enum ibv_qp_state qp_state,
+ uint32_t qpn,
+ uint16_t lid,
+ union ibv_gid *gid)
+{
+ struct ibv_qp_attr qp_attr;
+ enum ibv_qp_attr_mask mask = IBV_QP_STATE;
+ mcm_qp_t *m_qp = (mcm_qp_t *)qp_handle->qp_context;
+ int ret;
+
+ memset((void *)&qp_attr, 0, sizeof(qp_attr));
+ qp_attr.qp_state = qp_state;
+
+ switch (qp_state) {
+ case IBV_QPS_RTR:
+ mlog(1, " QPS_RTR: type %d qpn 0x%x gid %p (%d) lid 0x%x"
+ " port %d ep %p qp_state %d \n",
+ qp_handle->qp_type, ntohl(qpn), gid,
+ m_qp->smd->md->dev_attr.global,
+ ntohs(lid), m_qp->smd->md->port,
+ m_qp, m_qp->qp_t.cur_state);
+
+ mask |= IBV_QP_AV |
+ IBV_QP_PATH_MTU |
+ IBV_QP_DEST_QPN |
+ IBV_QP_RQ_PSN |
+ IBV_QP_MIN_RNR_TIMER;
+
+ qp_attr.dest_qp_num = ntohl(qpn);
+ qp_attr.rq_psn = 1;
+ qp_attr.path_mtu = m_qp->smd->md->dev_attr.mtu;
+ qp_attr.max_dest_rd_atomic = 0;
+ qp_attr.min_rnr_timer = m_qp->smd->md->dev_attr.rnr_timer;
+
+ /* address handle. RC and UD */
+ qp_attr.ah_attr.dlid = ntohs(lid);
+ qp_attr.ah_attr.sl = m_qp->smd->md->dev_attr.sl;
+ qp_attr.ah_attr.src_path_bits = 0;
+ qp_attr.ah_attr.port_num = m_qp->smd->md->port;
+ break;
+
+ case IBV_QPS_RTS:
+ mask |= IBV_QP_SQ_PSN |
+ IBV_QP_TIMEOUT |
+ IBV_QP_RETRY_CNT |
+ IBV_QP_RNR_RETRY;
+ qp_attr.sq_psn = 1;
+ qp_attr.timeout = m_qp->smd->md->dev_attr.ack_timer;
+ qp_attr.retry_cnt = m_qp->smd->md->dev_attr.ack_retry;
+ qp_attr.rnr_retry = m_qp->smd->md->dev_attr.rnr_retry;
+
+ mlog(1, " QPS_RTS: psn %x rd_atomic %d ack %d "
+ " retry %d rnr_retry %d m_qp %p qp_state %d\n",
+ qp_attr.sq_psn, qp_attr.max_rd_atomic,
+ qp_attr.timeout, qp_attr.retry_cnt,
+ qp_attr.rnr_retry, m_qp, m_qp->qp_t.cur_state);
+ break;
+
+ case IBV_QPS_INIT:
+ mask |= IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS;
+ qp_attr.qp_access_flags =
+ IBV_ACCESS_LOCAL_WRITE |
+ IBV_ACCESS_REMOTE_WRITE |
+ IBV_ACCESS_REMOTE_READ;
+ qp_attr.pkey_index = m_qp->smd->md->dev_attr.pkey_idx;
+ qp_attr.port_num = m_qp->smd->md->port;
+
+ mlog(1, " QPS_INIT: pi %x port %x acc %x qkey 0x%x\n",
+ qp_attr.pkey_index, qp_attr.port_num,
+ qp_attr.qp_access_flags, qp_attr.qkey);
+ break;
+
+ default:
+ break;
+ }
+
+ ret = ibv_modify_qp(qp_handle, &qp_attr, mask);
+ if (ret == 0) {
+ m_qp->qp_t.cur_state = m_qp->qp_t.state = qp_state;
+ return 0;
+ } else {
+ mlog(0, " RTR ERR (%s): type %d qpn 0x%x lid 0x%x"
+ " port %d state %d mtu %d rd %d rnr %d sl %d\n",
+ strerror(ret), qp_handle->qp_type, ntohl(qpn),
+ ntohs(lid), m_qp->smd->md->port,
+ m_qp->qp_t.cur_state,
+ qp_attr.path_mtu, qp_attr.max_dest_rd_atomic,
+ qp_attr.min_rnr_timer, qp_attr.ah_attr.sl);
+ }
+ return ret;
+}
+
+/* MCM Endpoint CM objects */
+void mcm_cm_free(mcm_cm_t *cm)
+{
+ /* client, release local conn id port */
+ if (!cm->l_ep && cm->msg.sport)
+ mcm_free_port(cm->smd->ports, ntohs(cm->msg.sport));
+
+ pthread_mutex_destroy(&cm->lock);
+ free(cm);
+}
+
+mcm_cm_t *mcm_cm_create(mcm_scif_dev_t *smd, mcm_qp_t *m_qp)
{
+ mcm_cm_t *cm;
+
+ /* Allocate CM, init lock, and initialize */
+ if ((cm = malloc(sizeof(*cm))) == NULL)
+ return NULL;
+ memset(cm, 0, sizeof(*cm));
+
+ init_list(&cm->entry);
+ if (pthread_mutex_init(&cm->lock, NULL))
+ goto bail;
+
+ cm->smd = smd;
+ cm->msg.ver = htons(DAT_MCM_VER);
+ cm->msg.sqpn = htonl(smd->md->qp->qp_num); /* ucm */
+
+ /* ACTIVE: init source address QP info from MPXYD and MIC client */
+ if (m_qp) {
+ cm->msg.sport = htons(mcm_get_port(smd->ports, 0, (uint64_t)smd));
+ if (!cm->msg.sport) {
+ pthread_mutex_destroy(&cm->lock);
+ goto bail;
+ }
+ cm->m_qp = m_qp;
+
+ /* MPXYD src IB info in network order, QP snd */
+ cm->msg.saddr.qpn = htonl(cm->m_qp->qp_t.qp_num); /* ep */
+ cm->msg.saddr.qp_type = cm->m_qp->qp_t.qp_type;
+ cm->msg.saddr.lid = smd->md->addr.lid;
+ memcpy(&cm->msg.saddr.gid[0], &smd->md->addr.gid, 16);
+
+ /* MIC src IB info in network order, QP rcv */
+ cm->msg.saddr2.qpn = htonl(cm->m_qp->qp_r.qp_num); /* ep */
+ cm->msg.saddr2.qp_type = cm->m_qp->qp_r.qp_type;
+ cm->msg.saddr2.lid = smd->md->addr.lid;
+ memcpy(&cm->msg.saddr2.gid[0], &smd->md->addr.gid, 16);
+
+ }
+ return cm;
+bail:
+ free(cm);
+ return NULL;
+}
+
+/* queue up connection object on CM list */
+static void mcm_qconn(mcm_scif_dev_t *smd, mcm_cm_t *cm)
+{
+ /* add to CONN work queue, list, for mcm fabric CM */
+ pthread_mutex_lock(&smd->clock);
+ insert_tail(&cm->entry, &smd->clist, (void *)cm);
+ pthread_mutex_unlock(&smd->clock);
+}
+/* dequeue connection object from CM list */
+static void mcm_dqconn(mcm_scif_dev_t *smd, mcm_cm_t *cm)
+{
+ /* Remove from work queue, cr thread processing */
+ pthread_mutex_lock(&smd->clock);
+ remove_entry(&cm->entry);
+ pthread_mutex_unlock(&smd->clock);
+
+}
+/* queue listen object on listen list */
+static void mcm_qlisten(mcm_scif_dev_t *smd, mcm_cm_t *cm)
+{
+ /* add to LISTEN work queue, list, for mcm fabric CM */
+ pthread_mutex_lock(&smd->llock);
+ insert_tail(&cm->entry, &smd->llist, (void *)cm);
+ pthread_mutex_unlock(&smd->llock);
+}
+/* dequeue listen object from listen list */
+static void mcm_dqlisten(mcm_scif_dev_t *smd, mcm_cm_t *cm)
+{
+ pthread_mutex_lock(&smd->llock);
+ remove_entry(&cm->entry);
+ pthread_mutex_unlock(&smd->llock);
+}
+
+/*
+ * Open IB device
+ */
+static struct ibv_context *open_ib_device(char *name, int port)
+{
+ int i, ibcnt;
+ struct ibv_device **iblist;
+ struct ibv_context *ibctx = NULL;
struct ibv_port_attr port_attr;
- int i, num_devices;
/* get list of all IB devices, open 1st IB type by default */
- iblist = ibv_get_device_list(&num_devices);
+ iblist = ibv_get_device_list(&ibcnt);
if (!iblist) {
- mlog(0, " ibv_get_dev_list() failed - %d\n", errno);
- return 1;
+ mlog(0,"ERR ibv_get_dev_list, %s\n", strerror(errno));
+ return NULL;
}
- for (i=0; i < num_devices; ++i) {
- if (iblist[i].transport_type != IBV_TRANSPORT_IB)
- continue;
+ for (i=0; i < ibcnt; ++i) {
+ if (!strcmp(iblist[i]->name, name)) {
+ ibctx = ibv_open_device(iblist[i]);
+ if (!ibctx) {
+ mlog(0,"ERR ibv_open, %s\n", strerror(errno));
+ goto bail;
+ }
+ if (ibv_query_port(ibctx, port, &port_attr)) {
+ mlog(0,"ERR ibv_query, %s\n", strerror(errno));
+ goto bail;
+ }
+ else
+ break;
+ }
else {
- mlog(1, " opening 1st IB device found - %s\n",
- ibv_get_device_name(iblist[i]));
- break;
+ continue;
}
}
- if (i == num_devices) {
- mlog(1, " no IB devices found, exit\n");
- ibv_free_device_list(iblist);
- return 1;
- }
-
- return 0;
+bail:
+ ibv_free_device_list(iblist);
+ return ibctx;
}
-static void close_ib()
+static void mcm_destroy(struct mcm_ib_dev *md)
{
- ibv_free_device_list(iblist);
+ if (md->mr_sbuf)
+ ibv_dereg_mr(md->mr_sbuf);
+
+ if (md->mr_rbuf)
+ ibv_dereg_mr(md->mr_rbuf);
+
+ if (md->qp)
+ ibv_destroy_qp(md->qp);
+
+ if (md->scq)
+ ibv_destroy_cq(md->scq);
+
+ if (md->rcq)
+ ibv_destroy_cq(md->rcq);
+
+ if (md->rch)
+ ibv_destroy_comp_channel(md->rch);
+
+ if (md->ah) {
+ int i;
+
+ for (i = 0;i < 0xffff; i++) {
+ if (md->ah[i])
+ ibv_destroy_ah(md->ah[i]);
+ }
+ free(md->ah);
+ }
+
+ if (md->pd)
+ ibv_dealloc_pd(md->pd);
+
+ if (md->ports)
+ free(md->ports);
+
+ if (md->rbuf)
+ free(md->rbuf);
+
+ if (md->sbuf)
+ free(md->sbuf);
+
return;
}
-static int init_mcm_service(struct mcm_ib_dev *md)
+static int init_mcm_service(mcm_ib_dev_t *md)
{
struct ibv_qp_init_attr qp_create;
struct ibv_recv_wr recv_wr, *recv_err;
struct ibv_sge sge;
int i, mlen = 256; /* overhead for mcm_msg & ibv_grh */
- mlog(1, " create MCM services.. \n");
+ mlog(1, " create MCM services.. %p\n", md);
+
+ /* setup CM msg attributes and timers */
+ md->retries = mcm_retry;
+ md->rep_time = mcm_rep_ms;
+ md->rtu_time = mcm_rtu_ms;
+ md->cm_timer = min(md->rep_time, md->rtu_time);
+ md->qpe = mcm_depth;
+ md->cqe = mcm_depth;
+ md->signal = mcm_signal;
/* setup CM timers and queue sizes */
- md->pd = ibv_alloc_pd(md->ibdev);
+ md->pd = ibv_alloc_pd(md->ibctx);
if (!md->pd)
goto bail;
mlog(1, " allocated PD\n");
- md->rch = ibv_create_comp_channel(md->ibdev);
+ md->rch = ibv_create_comp_channel(md->ibctx);
if (!md->rch)
goto bail;
config_fd(md->rch->fd);
mlog(1, " allocated rx completion channel\n");
- md->scq = ibv_create_cq(md->ibdev, md->cqe, md, NULL, 0);
+ md->scq = ibv_create_cq(md->ibctx, md->cqe, md, NULL, 0);
if (!md->scq)
goto bail;
- md->rcq = ibv_create_cq(md->ibdev, md->cqe, md, md->rch, 0);
+ md->rcq = ibv_create_cq(md->ibctx, md->cqe, md, md->rch, 0);
if (!md->rcq)
goto bail;
qp_create.recv_cq = md->rcq;
qp_create.cap.max_send_wr = qp_create.cap.max_recv_wr = md->qpe;
qp_create.cap.max_send_sge = qp_create.cap.max_recv_sge = 1;
- qp_create.cap.max_inline_data = md->max_inline_send;
+ qp_create.cap.max_inline_data = 256; /* best latency for CM messages */
qp_create.qp_context = (void *)md;
md->qp = ibv_create_qp(md->pd, &qp_create);
mlog(1, " created QP\n");
- md->ah = (ib_ah_handle_t*) malloc(sizeof(ib_ah_handle_t) * 0xffff);
- md->sid = (uint8_t*) malloc(sizeof(uint8_t) * 0xffff);
- md->rbuf = (void*) malloc(mlen * md->qpe);
- md->sbuf = (void*) malloc(mlen * md->qpe);
+ md->ah = (struct ibv_ah **) malloc(sizeof(struct ibv_ah *) * 0xffff);
+ md->ports = (uint64_t*) malloc(sizeof(uint64_t) * 0xffff);
+ md->rbuf = malloc(mlen * md->qpe);
+ md->sbuf = malloc(mlen * md->qpe);
md->s_hd = md->s_tl = 0;
- if (!md->ah || !md->rbuf || !md->sbuf || !md->sid)
+ if (!md->ah || !md->rbuf || !md->sbuf || !md->ports)
goto bail;
- (void)memset(md->ah, 0, (sizeof(ib_ah_handle_t) * 0xffff));
- (void)memset(md->sid, 0, (sizeof(uint8_t) * 0xffff));
- md->sid[0] = 1; /* resv slot 0, 0 == no ports available */
+ (void)memset(md->ah, 0, (sizeof(struct ibv_ah *) * 0xffff));
+ (void)memset(md->ports, 0, (sizeof(uint64_t) * 0xffff));
+ md->ports[0] = 1; /* resv slot 0, 0 == no ports available */
(void)memset(md->rbuf, 0, (mlen * md->qpe));
(void)memset(md->sbuf, 0, (mlen * md->qpe));
goto bail;
md->mr_rbuf = ibv_reg_mr(md->pd, md->rbuf,
- ((mlen + hlen) * md->qpe),
+ (mlen * md->qpe),
IBV_ACCESS_LOCAL_WRITE);
if (!md->mr_rbuf)
goto bail;
recv_wr.next = NULL;
recv_wr.sg_list = &sge;
recv_wr.num_sge = 1;
- sge.length = mlen + hlen;
+ sge.length = mlen;
sge.lkey = md->mr_rbuf->lkey;
for (i = 0; i < md->qpe; i++) {
}
/* save qp_num as part of ia_address, network order */
- md->addr.ib.qpn = htonl(md->qp->qp_num);
+ md->addr.qpn = htonl(md->qp->qp_num);
return 0;
bail:
return -1;
}
-/**************** MIX operations ********************************/
+/* destroy SMD, md->slock held */
+static void mcm_destroy_smd(mcm_scif_dev_t *smd)
+{
+ /* free port space, under lock */
+ pthread_mutex_lock(&smd->plock);
+ if (smd->ports) {
+ free(smd->ports);
+ smd->ports = NULL;
+ }
+ pthread_mutex_unlock(&smd->plock);
-/* open MCM device, MIC clients via SCIF well known port - SCIF_OFED_PORT_7 */
-static struct scif_mic_dev *open_mcm_device(char *name, int port, scif_epd_t listen_ep)
+ /* TODO: walk all lists and cleanup resouces, right now assume they are gone */
+ if (smd->ref_count) {
+ mlog(1, " WARNING: ref_count not 0, = %d \n", smd->ref_count);
+ }
+ remove_entry(&smd->entry);
+
+ /* destroy all mutex resources */
+ pthread_mutex_destroy(&smd->plock);
+ pthread_mutex_destroy(&smd->clock);
+ pthread_mutex_destroy(&smd->llock);
+ pthread_mutex_destroy(&smd->qplock);
+ pthread_mutex_destroy(&smd->cqlock);
+ pthread_mutex_destroy(&smd->mrlock);
+
+ smd->md = NULL;
+ free(smd);
+}
+
+static mcm_scif_dev_t *mcm_create_smd(mcm_ib_dev_t *md, scif_epd_t op_ep, scif_epd_t cm_ep)
{
- int i;
- struct mcm_ib_dev *md;
- struct mcm_scif_dev *smd = NULL;
+ mcm_scif_dev_t *smd = NULL;
+ int ret;
+ /* SCIF device object, allocate and init resources, one per MIC client */
+ smd = malloc(sizeof(*smd));
+ if (!smd)
+ goto err;
+ memset(smd, 0, sizeof(*smd));
+ smd->md = md;
+
+ /* RDMA buffers, register with SCIF and IB */
+ smd->m_len = mix_buffer_mb * (1024 * 1024);
+ ret = posix_memalign((void **)&smd->m_buf, 4096, smd->m_len);
+ if (ret)
+ goto err;
+ mlog(1, " Allocate/Register RDMA Proxy buffer %p, ln=%d\n", smd->m_buf, smd->m_len);
+
+ smd->m_offset = scif_register(op_ep, smd->m_buf, smd->m_len,
+ (off_t)0, SCIF_PROT_READ | SCIF_PROT_WRITE, 0);
+ if (smd->m_offset == (off_t)(-1)) {
+ mlog(1, " scif_register addr=%p,%d failed %s\n", smd->m_buf, smd->m_len, strerror(errno));
+ goto err;
+ }
+ mlog(1, " SCIF addr=%p, offset=0x%llx, len %d\n", smd->m_buf, smd->m_offset, smd->m_len);
+
+ smd->m_mr = ibv_reg_mr(smd->md->pd, smd->m_buf, smd->m_len,
+ IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ);
+ if (smd->m_mr == NULL) {
+ mlog(1, " IB addr=%p,%d failed %s\n", smd->m_buf, smd->m_len, strerror(errno));
+ goto err;
+ }
+ mlog(1, " IB registered addr=%p,%d, mr_addr=%p handle=0x%x, lkey=0x%x rkey=0x%x \n",
+ smd->m_buf, smd->m_len, smd->m_mr->addr, smd->m_mr->handle, smd->m_mr->lkey, smd->m_mr->rkey);
+
+ /* SCIF device client port space */
+ smd->ports = (uint64_t*) malloc(sizeof(uint64_t) * 0xffff);
+ if (!smd->ports)
+ goto err;
+ memset(smd->ports, 0, sizeof(uint64_t) * 0xffff);
+
+ pthread_mutex_lock(&md->plock);
+
+ smd->scif_ep = op_ep;
+ smd->scif_cm_ep = cm_ep;
+
+ smd->cm_id = mcm_get_port(md->ports, 0, (uint64_t)smd);
+ pthread_mutex_unlock(&md->plock);
+ if (!smd->cm_id)
+ goto err;
+
+ pthread_mutex_init(&smd->plock, NULL); /* port space for EP's */
+ pthread_mutex_init(&smd->clock, NULL); /* connect list */
+ pthread_mutex_init(&smd->llock, NULL); /* listen list */
+ pthread_mutex_init(&smd->qplock, NULL); /* qp list */
+ pthread_mutex_init(&smd->cqlock, NULL); /* cq list */
+ pthread_mutex_init(&smd->mrlock, NULL); /* cq list */
+
+ init_list(&smd->entry);
+ init_list(&smd->clist);
+ init_list(&smd->llist);
+ init_list(&smd->qplist);
+ init_list(&smd->cqlist);
+ init_list(&smd->mrlist);
+
+ return smd;
+err:
+ if (smd) {
+ if (smd->m_buf) {
+ if (smd->m_offset)
+ scif_unregister(smd->scif_ep, smd->m_offset, smd->m_len);
+ free(smd->m_buf);
+ }
+ if (smd->ports)
+ free(smd->ports);
+ free(smd);
+ }
+ return NULL;
+}
+
+/*
+ *
+ * Platform side - MIC Indirect eXchange (MIX) operations, SCIF
+ *
+ */
+
+/* open MCM device, New MIC clients via SCIF listen on well known port, new ep from accept */
+static mcm_scif_dev_t *mix_open_device(char *name, int port, scif_epd_t op_ep, scif_epd_t cm_ep)
+{
+ mcm_ib_dev_t *md;
+ mcm_scif_dev_t *smd = NULL;
+
+ mlog(1, " name - %s, port %d\n", name, port);
pthread_mutex_lock(&mcm_llock);
- md = get_list_head(&mcm_llist);
+ md = get_head_entry(&mcm_list);
while (md) {
- if ((!strcmp(ibv_get_device_name(md->ib_dev, name))
- && md->port == port))
+ mlog(1, " md %p -> %s port %d\n", md, md->name, md->port);
+ if (!strcmp(md->name, name) && md->port == port)
goto found;
else
md = get_next_entry(&md->entry, &mcm_list);
/* no IB device object, allocate and init, one per IB device */
md = malloc(sizeof(*md));
if (md == NULL)
- goto done;
+ goto err;
memset(md, 0, sizeof(*md));
- init_list(&md->list);
- if (init_mcm_service(md)) {
+ init_list(&md->entry);
+ init_list(&md->smd_list);
+ pthread_mutex_init(&md->slock, NULL);
+ pthread_mutex_init(&md->plock, NULL);
+ strcpy(md->name, name);
+ md->port = port;
+ md->ibctx = open_ib_device(name, port);
+
+ if ((!md->ibctx) || init_mcm_service(md)) {
free(md);
- md = NULL;
- goto done;
+ goto err;
}
+
/* queue on active device list */
- insert_tail(&md->list, &mcm_llist, md);
+ insert_tail(&md->entry, &mcm_list, md);
found:
- /* SCIF MIX device object, allocate and init, one per MIC client */
- smd = malloc(sizeof(*smd));
+ /* create SCIF client device on this IB device */
+ smd = mcm_create_smd(md, op_ep, cm_ep);
if (!smd)
- goto done;
- memset(smd, 0, sizeof(*smd));
-
- /* Accept new MIX message connection */
- scif_accept(listen_ep, &smd->peer, &smd->ep, SCIF_ACCEPT_SYNC);
-
-
- smd->mcm_dev = md;
- pthread_mutex_init(&smd->lock, NULL);
- init_list(&smd->entry);
+ goto err;
/* insert on active MIX device list */
- pthread_mutex_lock(&md->mix_lock);
- insert_tail(&smd->entry, &md->mix_list, (void *)smd);
- pthread_mutex_unlock(&md->mix_lock);
-
-done:
+ pthread_mutex_lock(&md->slock);
+ insert_tail(&smd->entry, &md->smd_list, (void *)smd);
+ pthread_mutex_unlock(&md->slock);
+err:
pthread_mutex_unlock(&mcm_llock);
return smd;
}
-/* close MCM device, MIC clients via MIX */
-static void close_mcm_device(struct mcm_dev *mdev)
+/* close MCM device, MIC client, md->slock held */
+static void mix_close_device(mcm_ib_dev_t *md, mcm_scif_dev_t *smd)
{
+ mlog(1, " md %p smd %p\n", md, smd);
+
+ /* close and remove scif MIX client, leave parent mcm_ib_dev open */
+ if (smd->scif_ep) {
+ scif_close(smd->scif_ep);
+ smd->scif_ep = 0;
+ }
+ if (smd->scif_cm_ep) {
+ scif_close(smd->scif_cm_ep);
+ smd->scif_cm_ep = 0;
+ }
+
+ mcm_destroy_smd(smd);
+ mlog(1, " freed smd %p\n", md, smd);
+
return;
}
-/* DAPL MCM message */
-static void mcm_rcv_evd(struct mcm_ib_dev *md)
+/* accept SCIF endpoint connect request */
+static void mix_scif_accept(scif_epd_t listen_ep)
{
- return;
+ struct scif_portID peer, peer_cm;
+ scif_epd_t op_ep, cm_ep;
+ int ret, len;
+ dat_mix_open_t msg;
+ mcm_scif_dev_t *smd;
+
+ /* 2 channels created with clients, OP and CM processing */
+ ret = scif_accept(listen_ep, &peer, &op_ep, SCIF_ACCEPT_SYNC);
+ if (ret) {
+ mlog(0, " ERR: scif_accept on OP ep %d, ret = %s\n", listen_ep, strerror(ret));
+ return;
+ }
+ ret = scif_accept(listen_ep, &peer_cm, &cm_ep, SCIF_ACCEPT_SYNC);
+ if (ret) {
+ mlog(0, " ERR: scif_accept on CM ep %d, ret = %s\n", listen_ep, strerror(ret));
+ return;
+ }
+
+ /* connect is followed immediately by MIX open command on OP channel */
+ len = sizeof(msg);
+ ret = scif_recv(op_ep, &msg, len, SCIF_RECV_BLOCK);
+ if (ret != len) {
+ mlog(0, " ERR: rcv on new_ep %d, ret %d, exp %d\n", op_ep, ret, len);
+ return;
+ }
+
+ if (msg.hdr.ver != DAT_MIX_VER || msg.hdr.op != MIX_IA_OPEN) {
+ mlog(0, " ERR: mix msg ver (%d) or op (%d) wrong\n", msg.hdr.ver, msg.hdr.op);
+ return;
+ }
+
+ /* open new device with hca name and port info, send response with addr info */
+ smd = mix_open_device(msg.name, msg.port, op_ep, cm_ep);
+ msg.hdr.flags = MIX_OP_RSP;
+ if (smd) {
+ msg.hdr.status = MIX_SUCCESS;
+ memcpy(&smd->md->dev_attr, &msg.dev_attr, sizeof(dat_mix_dev_attr_t));
+ memcpy(&msg.dev_addr, &smd->md->addr, sizeof(dat_mcm_addr_t));
+ } else {
+ msg.hdr.status = MIX_ENODEV;
+ }
+
+ /* send back response */
+ len = sizeof(msg);
+ ret = scif_send(op_ep, &msg, len, SCIF_SEND_BLOCK);
+ if (ret != len) {
+ mlog(0, " ERR: rcv on new_ep %d, ret %d, exp %d\n", op_ep, ret, len);
+ /* remove this created SMD from MCM device ??? */
+ }
+}
+
+static int mix_listen_free(mcm_scif_dev_t *smd, dat_mix_hdr_t *pmsg)
+{
+ int len, ret;
+ mcm_cm_t *cm;
+
+ mlog(1, " MIX_LISTEN_FREE: sid 0x%x \n", pmsg->req_id);
+
+ pthread_mutex_lock(&smd->llock);
+ cm = get_head_entry(&smd->llist);
+ while (cm) {
+ if (cm->sid == (uint16_t)pmsg->req_id) {
+ remove_entry(&cm->entry);
+ mcm_free_port(smd->ports, (uint16_t)pmsg->req_id);
+ mcm_cm_free(cm);
+ break;
+ }
+ cm = get_next_entry(&cm->entry, &smd->llist);
+ }
+ pthread_mutex_unlock(&smd->llock);
+
+ if (cm)
+ pmsg->status = MIX_SUCCESS;
+ else
+ pmsg->status = MIX_EINVAL;
+
+ /* send back response */
+ pmsg->flags = MIX_OP_RSP;
+ len = sizeof(dat_mix_hdr_t);
+ ret = scif_send(smd->scif_ep, pmsg, len, SCIF_SEND_BLOCK);
+ if (ret != len) {
+ mlog(0, " ERR: rcv on new_ep %d, ret %d, exp %d\n", smd->scif_ep, ret, len);
+ return ret;
+ }
+ return 0;
+}
+
+static int mix_listen(mcm_scif_dev_t *smd, dat_mix_listen_t *pmsg)
+{
+ int len, ret;
+ uint16_t lport;
+ mcm_cm_t *cm;
+
+ /* hdr already read, get operation data */
+ len = sizeof(dat_mix_listen_t) - sizeof(dat_mix_hdr_t);
+ ret = scif_recv(smd->scif_ep, ((char*)pmsg + sizeof(dat_mix_hdr_t)), len, SCIF_RECV_BLOCK);
+ if (ret != len) {
+ mlog(0, " ERR: ret %d, exp %d\n", ret, len);
+ return ret;
+ }
+ mlog(1, " MIX_LISTEN: sid 0x%x, backlog %d\n", pmsg->sid, pmsg->backlog);
+
+ /* create listen EP for provided SID */
+ lport = mcm_get_port(smd->ports, pmsg->sid, (uint64_t)smd);
+ if (lport == pmsg->sid) {
+ cm = mcm_cm_create(smd, NULL);
+ if (cm == NULL) {
+ pmsg->hdr.status = MIX_ENOMEM;
+ mcm_free_port(smd->ports, lport);
+ } else {
+ cm->state = MCM_LISTEN;
+ cm->sid = lport;
+ mcm_qlisten(smd, cm);
+ pmsg->hdr.status = MIX_SUCCESS;
+ }
+ } else
+ pmsg->hdr.status = MIX_EADDRINUSE;
+
+ /* send back response */
+ pmsg->hdr.flags = MIX_OP_RSP;
+ len = sizeof(dat_mix_listen_t);
+ ret = scif_send(smd->scif_ep, pmsg, len, SCIF_SEND_BLOCK);
+ if (ret != len) {
+ mlog(0, " ERR: rcv on new_ep %d, ret %d, exp %d\n", smd->scif_ep, ret, len);
+ return ret;
+ }
+
+ return 0;
+
+}
+
+/* locate CQ object */
+mcm_cq_t *mix_get_cq(mcm_scif_dev_t *smd, uint32_t tid)
+{
+ mcm_cq_t *cq = NULL;
+
+ pthread_mutex_lock(&smd->cqlock);
+ cq = get_head_entry(&smd->cqlist);
+
+ while (cq) {
+ if (cq->entry.tid == tid)
+ break;
+ cq = get_next_entry(&cq->entry, &smd->cqlist);
+ }
+ pthread_mutex_unlock(&smd->cqlock);
+ return cq;
+}
+
+/* locate QP object */
+mcm_qp_t *mix_get_qp(mcm_scif_dev_t *smd, uint32_t tid)
+{
+ mcm_qp_t *qp = NULL;
+
+ pthread_mutex_lock(&smd->qplock);
+ qp = get_head_entry(&smd->qplist);
+ while (qp) {
+ if (qp->entry.tid == tid)
+ break;
+ qp = get_next_entry(&qp->entry, &smd->qplist);
+ }
+ pthread_mutex_unlock(&smd->qplock);
+ return qp;
+}
+
+/* destroy proxy CQ, fits in header */
+static int mix_cq_destroy(mcm_scif_dev_t *smd, dat_mix_hdr_t *pmsg)
+{
+ int len, ret;
+ struct mcm_cq *m_cq;
+
+ mlog(1, " MIX_CQ_DESTROY: cq_id 0x%x\n", pmsg->req_id);
+
+ /* Find the CQ */
+ m_cq = mix_get_cq(smd, pmsg->req_id);
+ if (!m_cq) {
+ mlog(0, " ERR: mix_get_cq, id %d, not found\n", pmsg->req_id);
+ goto err;
+ }
+
+ ibv_destroy_cq(m_cq->ib_cq);
+ ibv_destroy_comp_channel(m_cq->ib_ch);
+ pthread_mutex_lock(&smd->cqlock);
+ remove_entry(&m_cq->entry);
+ pthread_mutex_unlock(&smd->cqlock);
+ free(m_cq);
+
+ pmsg->status = MIX_SUCCESS;
+ goto resp;
+err:
+ mlog(0, " ERR: %s\n", strerror(errno));
+ if (m_cq)
+ free(m_cq);
+
+ pmsg->status = MIX_EINVAL;
+resp:
+ /* send back response */
+ pmsg->flags = MIX_OP_RSP;
+ len = sizeof(dat_mix_hdr_t);
+ ret = scif_send(smd->scif_ep, pmsg, len, SCIF_SEND_BLOCK);
+ if (ret != len) {
+ mlog(0, " ERR: rcv on scif_ep %d, ret %d, exp %d\n", smd->scif_ep, ret, len);
+ return ret;
+ }
+ return 0;
+}
+
+/* create new proxy CQ */
+static int mix_cq_create(mcm_scif_dev_t *smd, dat_mix_cq_t *pmsg)
+{
+ int len, ret;
+ struct mcm_cq *m_cq;
+
+ /* hdr already read, get operation data */
+ len = sizeof(dat_mix_cq_t) - sizeof(dat_mix_hdr_t);
+ ret = scif_recv(smd->scif_ep, ((char*)pmsg + sizeof(dat_mix_hdr_t)), len, SCIF_RECV_BLOCK);
+ if (ret != len) {
+ mlog(0, " ERR: ret %d, exp %d\n", ret, len);
+ return ret;
+ }
+ mlog(1, " MIX_CQ_CREATE: cq_len = %d\n", pmsg->cq_len);
+
+ /* Create CQ object */
+ m_cq = malloc(sizeof(mcm_cq_t));
+ if (!m_cq)
+ goto err;
+ memset(m_cq, 0, sizeof(mcm_cq_t));
+ init_list(&m_cq->entry);
+ m_cq->smd = smd;
+
+ m_cq->ib_ch = ibv_create_comp_channel(smd->md->ibctx);
+ if (!m_cq->ib_ch)
+ goto err;
+ mlog(1, " created comp channel\n");
+ m_cq->ib_cq = ibv_create_cq(smd->md->ibctx, pmsg->cq_len, m_cq, m_cq->ib_ch, 0);
+ if (!m_cq->ib_cq)
+ goto err;
+ mlog(1, " created cq\n");
+ ret = ibv_req_notify_cq(m_cq->ib_cq, 0);
+ if (ret)
+ goto err;
+ mlog(1, " notify cq\n");
+
+ /* insert on cq list, update object tid */
+ pthread_mutex_lock(&smd->cqlock);
+ insert_tail(&m_cq->entry, &smd->cqlist, m_cq);
+ pmsg->cq_id = m_cq->entry.tid;
+ pthread_mutex_unlock(&smd->cqlock);
+
+ mlog(1, " new cq_id %d\n", pmsg->cq_id);
+
+ pmsg->hdr.status = MIX_SUCCESS;
+ goto resp;
+
+err:
+ mlog(0, " ERR: %s\n", strerror(errno));
+ if (m_cq)
+ free(m_cq);
+
+ pmsg->hdr.status = MIX_EINVAL;
+resp:
+ /* send back response */
+ pmsg->hdr.flags = MIX_OP_RSP;
+ len = sizeof(dat_mix_cq_t);
+ ret = scif_send(smd->scif_ep, pmsg, len, SCIF_SEND_BLOCK);
+ if (ret != len) {
+ mlog(0, " ERR: rcv on scif_ep %d, ret %d, exp %d\n", smd->scif_ep, ret, len);
+ return ret;
+ }
+ return 0;
+}
+
+/* destroy proxy QP, fits in hdr */
+static int mix_qp_destroy(mcm_scif_dev_t *smd, dat_mix_hdr_t *pmsg)
+{
+ int len, ret;
+ struct mcm_qp *m_qp;
+
+ mlog(1, " MIX_QP_DESTROY: QP_t - id 0x%x\n", pmsg->req_id );
+
+ /* Find the QP */
+ m_qp = mix_get_qp(smd, pmsg->req_id);
+ if (!m_qp) {
+ mlog(0, " ERR: mix_get_qp, id %d, not found\n", pmsg->req_id);
+ goto err;
+ }
+
+ ibv_destroy_qp(m_qp->ib_qp);
+ m_qp->ib_qp = NULL;
+ pthread_mutex_lock(&smd->qplock);
+ remove_entry(&m_qp->entry);
+ pthread_mutex_unlock(&smd->qplock);
+ free(m_qp);
+
+ pmsg->status = MIX_SUCCESS;
+ goto resp;
+err:
+ mlog(0, " ERR: %s\n", strerror(errno));
+ if (m_qp)
+ free(m_qp);
+
+ pmsg->status = MIX_EINVAL;
+resp:
+ /* send back response */
+ pmsg->flags = MIX_OP_RSP;
+ len = sizeof(dat_mix_hdr_t);
+ ret = scif_send(smd->scif_ep, pmsg, len, SCIF_SEND_BLOCK);
+ if (ret != len) {
+ mlog(0, " ERR: rcv on scif_ep %d, ret %d, exp %d\n", smd->scif_ep, ret, len);
+ return ret;
+ }
+ return 0;
+}
+
+static int mix_qp_modify(mcm_scif_dev_t *smd, dat_mix_qp_t *pmsg)
+{
+ /* TODO */
+ return 0;
+}
+
+/* create new proxy QP */
+static int mix_qp_create(mcm_scif_dev_t *smd, dat_mix_qp_t *pmsg)
+{
+ int len, ret;
+ struct mcm_qp *m_qp;
+ struct mcm_cq *m_cq;
+ struct ibv_qp_init_attr qp_create;
+
+ /* hdr already read, get operation data */
+ len = sizeof(dat_mix_qp_t) - sizeof(dat_mix_hdr_t);
+ ret = scif_recv(smd->scif_ep, ((char*)pmsg + sizeof(dat_mix_hdr_t)), len, SCIF_RECV_BLOCK);
+ if (ret != len) {
+ mlog(0, " ERR: ret %d, exp %d\n", ret, len);
+ return ret;
+ }
+ mlog(1, " MIX_QP_CREATE: QP_r - qpn 0x%x, id 0x%x, s_q %d,%d r_q %d,%d inline=%d cq_id %d\n",
+ pmsg->qp_r.qp_num, pmsg->qp_r.qp_id, pmsg->qp_t.max_send_wr,
+ pmsg->qp_t.max_send_sge, pmsg->qp_r.max_recv_wr, pmsg->qp_r.max_recv_sge,
+ pmsg->qp_r.max_inline_data, pmsg->qp_t.scq_id);
+
+ /* Create QP object */
+ m_qp = malloc(sizeof(mcm_qp_t));
+ if (!m_qp)
+ goto err;
+ memset(m_qp, 0, sizeof(mcm_qp_t));
+ init_list(&m_qp->entry);
+ m_qp->smd = smd;
+ memcpy(&m_qp->qp_r, &pmsg->qp_r, sizeof(dat_mix_qp_attr_t));
+ memcpy(&m_qp->qp_t, &pmsg->qp_t, sizeof(dat_mix_qp_attr_t));
+
+ /* Find the CQ's for this QP for transmitting */
+ m_cq = mix_get_cq(smd, pmsg->qp_t.scq_id);
+ if (!m_cq) {
+ mlog(0, " ERR: mcm_get_cq, id %d, not found\n", pmsg->qp_t.scq_id);
+ goto err;
+ }
+
+ /* Setup attributes and create qp, for TX services */
+ memset((void *)&qp_create, 0, sizeof(qp_create));
+ qp_create.recv_cq = m_cq->ib_cq;
+ qp_create.cap.max_recv_wr = 1;
+ qp_create.cap.max_recv_sge = 0;
+ qp_create.send_cq = m_cq->ib_cq;
+ qp_create.cap.max_send_wr = pmsg->qp_t.max_send_wr;
+ qp_create.cap.max_send_sge = pmsg->qp_t.max_send_sge;
+ qp_create.cap.max_inline_data = pmsg->qp_t.max_inline_data;
+ qp_create.qp_type = IBV_QPT_RC;
+ qp_create.qp_context = (void *)m_qp;
+
+ m_qp->ib_qp = ibv_create_qp(smd->md->pd, &qp_create);
+ if (!m_qp->ib_qp)
+ goto err;
+
+ /* set to INIT state */
+ ret = modify_qp(m_qp->ib_qp, IBV_QPS_INIT, 0, 0, NULL);
+ if (ret) {
+ ibv_destroy_qp(m_qp->ib_qp);
+ m_qp->ib_qp = NULL;
+ goto err;
+ }
+
+ /* insert on qp list, update proxy qp object tid */
+ pthread_mutex_lock(&smd->qplock);
+ insert_tail(&m_qp->entry, &smd->qplist, m_qp);
+ pmsg->qp_t.qp_id = m_qp->entry.tid;
+ pmsg->qp_t.ctx = (uint64_t)m_qp;
+ pthread_mutex_unlock(&smd->qplock);
+
+ pmsg->hdr.status = MIX_SUCCESS;
+ goto resp;
+err:
+ mlog(0, " ERR: %s\n", strerror(errno));
+ if (m_qp)
+ free(m_qp);
+
+ pmsg->hdr.status = MIX_EINVAL;
+resp:
+ /* send back response */
+ pmsg->hdr.flags = MIX_OP_RSP;
+ len = sizeof(dat_mix_qp_t);
+ ret = scif_send(smd->scif_ep, pmsg, len, SCIF_SEND_BLOCK);
+ if (ret != len) {
+ mlog(0, " ERR: rcv on scif_ep %d, ret %d, exp %d\n", smd->scif_ep, ret, len);
+ return ret;
+ }
+ mlog(0, " MIX_QP_CREATE: QP_t - qpn 0x%x id 0x%x, ctx %p \n", m_qp->ib_qp->qp_num, pmsg->qp_t.qp_id, m_qp);
+ return 0;
+}
+
+/* receive MIX operations on connected SCIF endpoint */
+static int mix_scif_recv(mcm_scif_dev_t *smd)
+{
+ char cmd[DAT_MIX_MSG_MAX];
+ dat_mix_hdr_t *phdr = (dat_mix_hdr_t *)cmd;
+ int ret, len;
+
+ len = sizeof(*phdr);
+ ret = scif_recv(smd->scif_ep, phdr, len, SCIF_RECV_BLOCK);
+ if ((ret != len) || (phdr->ver != DAT_MIX_VER)) {
+ mlog(0, " ERR: rcv on scif_ep %d, ret %d, exp %d, VER=%d\n",
+ smd->scif_ep, ret, len, phdr->ver);
+ return -1;
+ }
+
+ mlog(0, " ver %d, op %d, flags %d\n", phdr->ver, phdr->op, phdr->flags);
+
+ switch (phdr->op) {
+ case MIX_MR_CREATE:
+ case MIX_MR_FREE:
+ case MIX_QP_CREATE:
+ ret = mix_qp_create(smd, (dat_mix_qp_t *)phdr);
+ break;
+ case MIX_QP_MODIFY:
+ ret = mix_qp_modify(smd, (dat_mix_qp_t *)phdr);
+ break;
+ case MIX_QP_FREE:
+ ret = mix_qp_destroy(smd, phdr);
+ break;
+ case MIX_CQ_CREATE:
+ ret = mix_cq_create(smd, (dat_mix_cq_t *)phdr);
+ break;
+ case MIX_CQ_FREE:
+ ret = mix_cq_destroy(smd, phdr);
+ break;
+ case MIX_WRITE:
+ case MIX_SEND:
+ case MIX_LISTEN:
+ ret = mix_listen(smd, (dat_mix_listen_t *)phdr);
+ break;
+ case MIX_LISTEN_FREE:
+ ret = mix_listen_free(smd, phdr);
+ break;
+ case MIX_CM_REQ:
+
+ case MIX_CM_REP:
+ case MIX_CM_ACCEPT:
+ case MIX_CM_REJECT:
+ case MIX_CM_RTU:
+ case MIX_CM_EST:
+ case MIX_CM_DISC:
+ case MIX_CM_REPLY:
+ default:
+ mlog(0, " ERROR!!! unknown MIX operation: %d\n", phdr->op);
+ return -1;
+ }
+
+ return ret;
}
+
+/* receive MIX CM messages on connected SCIF endpoint */
+static int mix_scif_recv_cm(mcm_scif_dev_t *smd)
+{
+ char cmd[DAT_MIX_MSG_MAX];
+ dat_mix_hdr_t *phdr = (dat_mix_hdr_t *)cmd;
+ int ret, len;
+
+ len = sizeof(*phdr);
+ ret = scif_recv(smd->scif_ep, phdr, len, SCIF_RECV_BLOCK);
+ if ((ret != len) || (phdr->ver != DAT_MIX_VER)) {
+ mlog(0, " ERR: rcv on scif_ep %d, ret %d, exp %d, VER=%d\n",
+ smd->scif_ep, ret, len, phdr->ver);
+ return -1;
+ }
+
+ mlog(0, " ver %d, op %d, flags %d\n", phdr->ver, phdr->op, phdr->flags);
+
+ switch (phdr->op) {
+ case MIX_CM_REQ:
+
+ default:
+ mlog(0, " ERROR!!! unknown MIX CM message: %d\n", phdr->op);
+ return -1;
+ }
+
+ return ret;
+}
+
+
+/*
+ *
+ * Fabric side MCM messages, IB UD QP
+ *
+ */
+
/* IB async device event */
-static void mcm_async_evd(struct mcm_ib_dev *md)
+static void mcm_ib_async_event(struct mcm_ib_dev *md)
{
- return;
+ struct ibv_async_event event;
+
+ if (!ibv_get_async_event(md->ibctx, &event)) {
+ switch (event.event_type) {
+ case IBV_EVENT_CQ_ERR:
+ mlog(0, "CQ ERR ctx(%p) = %d\n",
+ event.element.cq->cq_context, event.event_type);
+ break;
+ case IBV_EVENT_COMM_EST:
+ mlog(0, "COMM_EST(QP=%p) rdata beat RTU\n", event.element.qp);
+ break;
+ case IBV_EVENT_QP_FATAL:
+ case IBV_EVENT_QP_REQ_ERR:
+ case IBV_EVENT_QP_ACCESS_ERR:
+ case IBV_EVENT_QP_LAST_WQE_REACHED:
+ case IBV_EVENT_SRQ_ERR:
+ case IBV_EVENT_SRQ_LIMIT_REACHED:
+ case IBV_EVENT_SQ_DRAINED:
+ mlog(0, "QP (%p) ERR = %d\n",
+ event.element.qp->qp_context, event.event_type);
+ break;
+ case IBV_EVENT_PATH_MIG:
+ case IBV_EVENT_PATH_MIG_ERR:
+ case IBV_EVENT_DEVICE_FATAL:
+ case IBV_EVENT_PORT_ACTIVE:
+ case IBV_EVENT_PORT_ERR:
+ case IBV_EVENT_LID_CHANGE:
+ case IBV_EVENT_PKEY_CHANGE:
+ case IBV_EVENT_SM_CHANGE:
+ mlog(0, "Device Error = %d\n", event.event_type);
+ break;
+ case IBV_EVENT_CLIENT_REREGISTER:
+ mlog(0, "IBV_CLIENT_REREGISTER\n");
+ break;
+ default:
+ mlog(0, "%d UNKNOWN\n", event.event_type);
+ break;
+ }
+ ibv_ack_async_event(&event);
+ }
}
-/* SCIF MIX message */
-static void mix_rcv_evd(struct mcm_scif_dev *md)
+
+/* Get CM UD message from send queue, called with s_lock held */
+static dat_mcm_msg_t *mcm_get_smsg(mcm_ib_dev_t *md)
{
- return;
+ dat_mcm_msg_t *msg = NULL;
+ int ret, polled = 1, hd = md->s_hd;
+
+ hd++;
+ if (hd == md->qpe)
+ hd = 0;
+retry:
+ if (hd == md->s_tl) {
+ msg = NULL;
+ if (polled % 1000000 == 0)
+ mlog(1, " ucm_get_smsg: FULLq hd %d == tl %d,"
+ " completions stalled, polls=%d\n",
+ hd, md->s_tl, polled);
+ }
+ else {
+ msg = &md->sbuf[hd];
+ md->s_hd = hd; /* new hd */
+ }
+
+ /* if empty, process some completions */
+ if (msg == NULL) {
+ struct ibv_wc wc;
+
+ /* process completions, based on UCM_TX_BURST */
+ ret = ibv_poll_cq(md->scq, 1, &wc);
+ if (ret < 0) {
+ mlog(1, " get_smsg: cq %p %s\n", md->scq, strerror(errno));
+ return NULL;
+ }
+ /* free up completed sends, update tail */
+ if (ret > 0)
+ md->s_tl = (int)wc.wr_id;
+
+ polled++;
+ goto retry;
+ }
+ return msg;
+}
+
+/* ACTIVE/PASSIVE: build and send CM message out of CM object */
+static int mcm_send(mcm_ib_dev_t *md, dat_mcm_msg_t *msg, DAT_PVOID p_data, DAT_COUNT p_size)
+{
+ dat_mcm_msg_t *smsg = NULL;
+ struct ibv_send_wr wr, *bad_wr;
+ struct ibv_sge sge;
+ int len, ret = -1;
+ uint16_t dlid = ntohs(msg->daddr.lid);
+
+ /* Get message from send queue, copy data, and send */
+ pthread_mutex_lock(&md->slock);
+ if ((smsg = mcm_get_smsg(md)) == NULL) {
+ mlog(0, " mcm_send ERR: get_smsg(hd=%d,tl=%d) \n", md->s_hd, md->s_tl);
+ goto bail;
+ }
+
+ len = (sizeof(*msg) - DAT_MCM_PDATA_SIZE);
+ memcpy(smsg, msg, len);
+ if (p_size) {
+ smsg->p_size = ntohs(p_size);
+ memcpy(&smsg->p_data, p_data, p_size);
+ }
+
+ wr.next = NULL;
+ wr.sg_list = &sge;
+ wr.num_sge = 1;
+ wr.opcode = IBV_WR_SEND;
+ wr.wr_id = (unsigned long)md->s_hd;
+ wr.send_flags = (wr.wr_id % md->signal) ? 0 : IBV_SEND_SIGNALED;
+ wr.send_flags |= IBV_SEND_INLINE;
+
+ sge.length = len + p_size;
+ sge.lkey = md->mr_sbuf->lkey;
+ sge.addr = (uintptr_t)smsg;
+
+ mlog(2," mcm_send: op %s ln %d lid %x c_qpn %x rport %x\n",
+ mcm_op_str(ntohs(smsg->op)),
+ sge.length, htons(smsg->daddr.lid),
+ htonl(smsg->dqpn), htons(smsg->dport));
+
+ /* empty slot, then create AH */
+ if (!md->ah[dlid]) {
+ md->ah[dlid] =
+ mcm_create_ah(md, md->pd, md->qp, dlid, NULL);
+ if (!md->ah[dlid])
+ goto bail;
+ }
+
+ wr.wr.ud.ah = md->ah[dlid];
+ wr.wr.ud.remote_qpn = ntohl(smsg->dqpn);
+ wr.wr.ud.remote_qkey = DAT_MCM_UD_QKEY;
+
+ ret = ibv_post_send(md->qp, &wr, &bad_wr);
+ if (ret)
+ mlog(0, " mcm_send ERR: post_send() %s\n", strerror(errno));
+bail:
+ pthread_mutex_unlock(&md->slock);
+ return ret;
+}
+
+static int mcm_post_rmsg(mcm_ib_dev_t *md, dat_mcm_msg_t *msg)
+{
+ struct ibv_recv_wr recv_wr, *recv_err;
+ struct ibv_sge sge;
+
+ recv_wr.next = NULL;
+ recv_wr.sg_list = &sge;
+ recv_wr.num_sge = 1;
+ recv_wr.wr_id = (uint64_t)(uintptr_t) msg;
+ sge.length = sizeof(dat_mcm_msg_t) + sizeof(struct ibv_grh);
+ sge.lkey = md->mr_rbuf->lkey;
+ sge.addr = (uintptr_t)((char *)msg - sizeof(struct ibv_grh));
+
+ return (ibv_post_recv(md->qp, &recv_wr, &recv_err));
+}
+
+static int mcm_reject(mcm_ib_dev_t *md, dat_mcm_msg_t *msg)
+{
+ dat_mcm_msg_t smsg;
+
+ /* setup op, rearrange the src, dst cm and addr info */
+ memset(&smsg, 0, sizeof(smsg));
+ smsg.ver = htons(DAT_MCM_VER);
+ smsg.op = htons(MCM_REJ_CM);
+ smsg.dport = msg->sport;
+ smsg.dqpn = msg->sqpn;
+ smsg.sport = msg->dport;
+ smsg.sqpn = msg->dqpn;
+ memcpy(&smsg.daddr, &msg->saddr, sizeof(dat_mcm_addr_t));
+
+ /* no dst_addr IB info in REQ, init lid, gid, get type from saddr */
+ smsg.saddr.lid = md->addr.lid;
+ smsg.saddr.qp_type = msg->saddr.qp_type;
+ memcpy(&smsg.saddr.gid[0], &md->addr.gid, 16);
+ memcpy(&smsg.saddr, &msg->daddr, sizeof(dat_mcm_addr_t));
+
+ mlog(2," CM reject -> LID %x, QPN %x PORT %x\n",
+ ntohs(smsg.daddr.lid),
+ ntohl(smsg.dqpn), ntohs(smsg.dport));
+
+ return (mcm_send(md, &smsg, NULL, 0));
+}
+
+static void mcm_process_recv(mcm_ib_dev_t *md, dat_mcm_msg_t *msg, mcm_cm_t *cm)
+{
+ pthread_mutex_lock(&cm->lock);
+ switch (cm->state) {
+ case MCM_LISTEN: /* passive */
+ pthread_mutex_unlock(&cm->lock);
+ //mcm_accept(cm, msg);
+ break;
+ case MCM_RTU_PENDING: /* passive */
+ pthread_mutex_unlock(&cm->lock);
+ //mcm_accept_rtu(cm, msg);
+ break;
+ case MCM_REP_PENDING: /* active */
+ pthread_mutex_unlock(&cm->lock);
+ //mcm_connect_rtu(cm, msg);
+ break;
+ case MCM_CONNECTED: /* active and passive */
+ /* DREQ, change state and process */
+ cm->retries = 2;
+ if (ntohs(msg->op) == MCM_DREQ) {
+ cm->state = MCM_DISC_RECV;
+ pthread_mutex_unlock(&cm->lock);
+ //mcm_disconnect(cm);
+ break;
+ }
+ /* active: RTU was dropped, resend */
+ if (ntohs(msg->op) == MCM_REP) {
+ mlog(1, " RESEND RTU: op %s st %s [lid, port, cqp, iqp]:"
+ " %x %x %x %x -> %x %x %x %x r_pid %x\n",
+ mcm_op_str(ntohs(cm->msg.op)),
+ mcm_state_str(cm->state),
+ ntohs(cm->msg.saddr.lid), ntohs(cm->msg.sport),
+ ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr.qpn),
+ ntohs(cm->msg.daddr.lid), ntohs(cm->msg.dport),
+ ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr.qpn),
+ ntohl(cm->msg.d_id));
+
+ cm->msg.op = htons(MCM_RTU);
+ mcm_send(cm->smd->md, &cm->msg, NULL, 0);
+ }
+ pthread_mutex_unlock(&cm->lock);
+ break;
+ case MCM_DISC_PENDING: /* active and passive */
+ /* DREQ or DREP, finalize */
+ pthread_mutex_unlock(&cm->lock);
+ //mcm_disconnect_final(cm);
+ break;
+ case MCM_DISCONNECTED:
+ case MCM_FREE:
+ /* DREQ dropped, resend */
+ if (ntohs(msg->op) == MCM_DREQ) {
+ mlog(1, " RESEND DREP: op %s st %s [lid, port, qpn]:"
+ " %x %x %x -> %x %x %x\n",
+ mcm_op_str(ntohs(msg->op)),
+ mcm_state_str(cm->state),
+ ntohs(msg->saddr.lid),
+ ntohs(msg->sport),
+ ntohl(msg->saddr.qpn),
+ ntohs(msg->daddr.lid),
+ ntohs(msg->dport),
+ ntohl(msg->daddr.qpn));
+ cm->msg.op = htons(MCM_DREP);
+ mcm_send(cm->smd->md, &cm->msg, NULL, 0);
+
+ } else if (ntohs(msg->op) != MCM_DREP){
+ /* DREP ok to ignore, any other print warning */
+ mlog(1, " mcm_recv: UNEXPECTED MSG on cm %p"
+ " <- op %s, st %s spsp %x sqpn %x\n",
+ cm, mcm_op_str(ntohs(msg->op)),
+ mcm_state_str(cm->state),
+ ntohs(msg->sport), ntohl(msg->sqpn));
+ }
+ pthread_mutex_unlock(&cm->lock);
+ break;
+ case MCM_REJECTED:
+ if (ntohs(msg->op) == MCM_REJ_USER) {
+ pthread_mutex_unlock(&cm->lock);
+ break;
+ }
+ default:
+ mlog(0, " mcm_recv: Warning, UNKNOWN state"
+ " <- op %s, %s spsp %x sqpn %x slid %x\n",
+ mcm_op_str(ntohs(msg->op)), mcm_state_str(cm->state),
+ ntohs(msg->sport), ntohl(msg->sqpn), ntohs(msg->saddr.lid));
+
+ pthread_mutex_unlock(&cm->lock);
+ break;
+ }
+}
+
+/* Find matching CM object for this receive message, return CM reference, timer */
+mcm_cm_t *mcm_get_smd_cm(mcm_scif_dev_t *smd, dat_mcm_msg_t *msg)
+{
+ mcm_cm_t *cm = NULL, *next, *found = NULL;
+ LLIST_ENTRY *list;
+ pthread_mutex_t *lock;
+ int listenq = 0;
+
+ /* conn list first, duplicate requests for MCM_REQ */
+ list = &smd->clist;
+ lock = &smd->clock;
+
+retry_listenq:
+ pthread_mutex_lock(lock);
+ next = get_head_entry(list);
+
+ while (next) {
+ cm = next;
+ next = get_next_entry(&cm->entry, list);
+ if (cm->state == MCM_DESTROY || cm->state == MCM_FREE)
+ continue;
+
+ /* CM sPORT + QPN, match is good enough for listenq */
+ if (listenq &&
+ cm->msg.sport == msg->dport &&
+ cm->msg.sqpn == msg->dqpn) {
+ found = cm;
+ break;
+ }
+ /* connectq, check src and dst plus id's, check duplicate conn_reqs */
+ if (!listenq &&
+ cm->msg.sport == msg->dport && cm->msg.sqpn == msg->dqpn &&
+ cm->msg.dport == msg->sport && cm->msg.dqpn == msg->sqpn &&
+ cm->msg.daddr.lid == msg->saddr.lid) {
+ if (ntohs(msg->op) != MCM_REQ) {
+ found = cm;
+ break;
+ } else {
+ /* duplicate; bail and throw away */
+ pthread_mutex_unlock(lock);
+ mlog(1, " DUPLICATE: cm %p op %s (%s) st %s"
+ " [lid, port, cqp, iqp]:"
+ " %x %x %x %x <- (%x %x %x %x :"
+ " %x %x %x %x) -> %x %x %x %x\n",
+ cm, mcm_op_str(ntohs(msg->op)),
+ mcm_op_str(ntohs(cm->msg.op)),
+ mcm_state_str(cm->state),
+ ntohs(cm->msg.daddr.lid), ntohs(cm->msg.dport),
+ ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr.qpn),
+ ntohs(msg->saddr.lid), ntohs(msg->sport),
+ ntohl(msg->sqpn), ntohl(msg->saddr.qpn),
+ ntohs(msg->daddr.lid), ntohs(msg->dport),
+ ntohl(msg->dqpn), ntohl(msg->daddr.qpn),
+ ntohs(cm->msg.saddr.lid), ntohs(cm->msg.sport),
+ ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr.qpn));
+
+ return NULL;
+ }
+ }
+ }
+ pthread_mutex_unlock(lock);
+
+ /* no duplicate request on connq, check listenq for new request */
+ if (ntohs(msg->op) == MCM_REQ && !listenq && !found) {
+ listenq = 1;
+ list = &smd->llist;
+ lock = &smd->llock;
+ goto retry_listenq;
+ }
+
+ /* not match on listenq for valid request, send reject */
+ if (ntohs(msg->op) == MCM_REQ && !found) {
+ mlog(1, " mcm_recv: NO LISTENER for %s %x %x i%x c%x"
+ " < %x %x %x, sending reject\n",
+ mcm_op_str(ntohs(msg->op)),
+ ntohs(msg->daddr.lid), ntohs(msg->dport),
+ ntohl(msg->daddr.qpn), ntohl(msg->sqpn),
+ ntohs(msg->saddr.lid), ntohs(msg->sport),
+ ntohl(msg->saddr.qpn));
+
+ mcm_reject(smd->md, msg);
+ }
+
+ if (!found) {
+ mlog(1, " NO MATCH: op %s [lid, port, cqp, iqp, pid]:"
+ " %x %x %x %x %x <- %x %x %x %x l_pid %x r_pid %x\n",
+ mcm_op_str(ntohs(msg->op)),
+ ntohs(msg->daddr.lid), ntohs(msg->dport),
+ ntohl(msg->dqpn), ntohl(msg->daddr.qpn),
+ ntohl(msg->d_id), ntohs(msg->saddr.lid),
+ ntohs(msg->sport), ntohl(msg->sqpn),
+ ntohl(msg->saddr.qpn), ntohl(msg->s_id),
+ ntohl(msg->d_id));
+
+ if (ntohs(msg->op) == MCM_DREP) {
+ /* DREP_DUP */
+ }
+ }
+
+ return found;
+}
+
+/* locate CM object for msg, walk all SCIF clients for MD */
+mcm_cm_t *mcm_get_cm(mcm_ib_dev_t *md, dat_mcm_msg_t *msg)
+{
+ mcm_cm_t *cm = NULL;
+ mcm_scif_dev_t *smd;
+
+ /* Walk scif device client list */
+ pthread_mutex_lock(&md->slock);
+ smd = get_head_entry(&md->smd_list);
+ while (smd) {
+ cm = mcm_get_smd_cm(smd, msg);
+ if (cm)
+ break;
+ smd = get_next_entry(&smd->entry, &md->smd_list);
+ }
+ pthread_mutex_unlock(&md->slock);
+ return cm;
+}
+
+/* Get rmsgs from CM completion queue, 10 at a time */
+static void mcm_ib_recv(mcm_ib_dev_t *md)
+{
+ struct ibv_wc wc[10];
+ dat_mcm_msg_t *msg;
+ mcm_cm_t *cm;
+ int i, ret, notify = 0;
+ struct ibv_cq *ibv_cq = NULL;
+
+
+ /* POLLIN on channel FD */
+ ret = ibv_get_cq_event(md->rch, &ibv_cq, (void *)&md);
+ if (ret == 0) {
+ ibv_ack_cq_events(ibv_cq, 1);
+ }
+retry:
+ ret = ibv_poll_cq(md->rcq, 10, wc);
+ if (ret <= 0) {
+ if (!ret && !notify) {
+ ibv_req_notify_cq(md->rcq, 0);
+ notify = 1;
+ goto retry;
+ }
+ return;
+ } else
+ notify = 0;
+
+ for (i = 0; i < ret; i++) {
+ msg = (dat_mcm_msg_t*) (uintptr_t) wc[i].wr_id;
+
+ mlog(2, " mcm_recv: stat=%d op=%s ln=%d id=%p sqp=%x\n",
+ wc[i].status, mcm_op_str(ntohs(msg->op)),
+ wc[i].byte_len,
+ (void*)wc[i].wr_id, wc[i].src_qp);
+
+ /* validate CM message, version */
+ if (ntohs(msg->ver) != DAT_MCM_VER) {
+ mlog(1, " mcm_recv: UNKNOWN msg %p, ver %d\n", msg, msg->ver);
+ mcm_post_rmsg(md, msg);
+ continue;
+ }
+ if (!(cm = mcm_get_cm(md, msg))) {
+ mcm_post_rmsg(md, msg);
+ continue;
+ }
+
+ /* match, process it */
+ mcm_process_recv(md, msg, cm);
+ mcm_post_rmsg(md, msg);
+ }
+
+ /* finished this batch of WC's, poll and rearm */
+ goto retry;
}
/*
{
struct mcm_fd_set *set;
struct mcm_ib_dev *md;
- struct mcm_scif_dev *smd;
- int time_ms;
- int i, n, ret;
+ struct mcm_scif_dev *smd, *next;
+ int time_ms, ret;
/* FD array */
set = mcm_alloc_fd_set();
if (!set)
- goto out;
+ return;
mlog(0, "server started\n");
time_ms = -1; /* blocking */
mcm_fd_zero(set);
+ /* SCIF listen EP, MIC client open requests */
+ mcm_fd_set(scif_listen_ep, set, POLLIN);
+
/* trigger on all active IB devices */
pthread_mutex_lock(&mcm_llock);
- md = get_list_head(&mcm_llist);
+ md = get_head_entry(&mcm_list);
while (md) {
- mcm_fd_set(md->ib_dev->async_fd, set, POLLIN);
+ mcm_fd_set(md->ibctx->async_fd, set, POLLIN);
mcm_fd_set(md->rch->fd, set, POLLIN);
/* trigger on all active SCIF ep's */
- pthread_mutex_lock(&md->mix_lock);
- smd = get_list_head(&md->mix_list);
+ pthread_mutex_lock(&md->slock);
+ smd = get_head_entry(&md->smd_list);
while (smd) {
- mcm_fd_set(smd->ep, set, POLLIN);
- smd = get_next_entry(&smd->entry, &md->mix_list);
+ mcm_fd_set(smd->scif_ep, set, POLLIN);
+ mcm_fd_set(smd->scif_cm_ep, set, POLLIN);
+ smd = get_next_entry(&smd->entry, &md->smd_list);
}
- pthread_mutex_unlock(&md->mix_lock);
+ pthread_mutex_unlock(&md->slock);
md = get_next_entry(&md->entry, &mcm_list);
}
pthread_mutex_unlock(&mcm_llock);
mcm_select(set, time_ms); /* wait, DAPL MCM or SCIF MIX msgs */
+ /* process listens */
+ if (mcm_poll(scif_listen_ep, POLLIN) == POLLIN)
+ mix_scif_accept(scif_listen_ep);
+
pthread_mutex_lock(&mcm_llock);
- md = get_list_head(&mcm_llist);
+ md = get_head_entry(&mcm_list);
while (md) {
/* process MCM events: async device and CM msgs */
if (mcm_poll(md->rch->fd, POLLIN) == POLLIN)
- mcm_rcv_evd(md);
+ mcm_ib_recv(md);
- if (mcm_poll(md->ib_dev->async_fd, POLLIN) == POLLIN)
- mcm_async_evd(md);
+ if (mcm_poll(md->ibctx->async_fd, POLLIN) == POLLIN)
+ mcm_ib_async_event(md);
- /* process SCIF cmd channels */
- pthread_mutex_lock(&md->mix_lock);
- smd = get_list_head(&md->mix_list);
+ /* process SCIF operation and CM channels */
+ pthread_mutex_lock(&md->slock);
+ smd = get_head_entry(&md->smd_list);
while (smd) {
- if (mcm_poll(smd->ep, POLLIN) == POLLIN)
- mix_rcv_evd(smd);
+ ret = mcm_poll(smd->scif_ep, POLLIN); /* OP */
+ if (ret == POLLIN)
+ ret = mix_scif_recv(smd);
+
+ ret = mcm_poll(smd->scif_cm_ep, POLLIN); /* CM */
+ if (ret == POLLIN)
+ ret = mix_scif_recv_cm(smd);
- smd = get_next_entry(&smd->entry, &md->mix_list);
+ next = get_next_entry(&smd->entry, &md->smd_list);
+ if (ret)
+ mix_close_device(md, smd);
+
+ smd = next;
}
- pthread_mutex_unlock(&md->mix_lock);
+ pthread_mutex_unlock(&md->slock);
md = get_next_entry(&md->entry, &mcm_list);
}
pthread_mutex_unlock(&mcm_llock);
}
-
-
}
static void show_usage(char *program)
pthread_mutex_init(&mcm_llock, NULL);
/* init MCM device list */
- init_list(&mcm_llist);
+ init_list(&mcm_list);
logfile = mpxy_open_log();
return -1;
}
- if (init_ib()) {
- mlog(0, "ERROR - unable to open/init IB device\n");
- return -1;
- }
-
mlog(1, "starting server\n");
mpxy_server();
mlog(0, "shutting down\n");
* PURPOSE: extensions to the DAT API for MIC Proxy RDMA services
*
*
- **********************************************************************/
+ * This extension/service enables MIC based DAPL providers to use a
+ * proxy service for sends and RDMA write operations. RDMA reads and
+ * receives are NOT supported. This service
+ * communicates within a server platform over PCI-E bus using SCIF
+ * and a new MIX within messaging protocol. The MCM provider uses
+ * DAPL CM messaging protocols on the wire. MIX protocol is defined
+ * as part of the new MIC extensions.
+ *
+ ***********************************************************************/
#ifndef _DAT_MIC_EXTENSIONS_H_
#define _DAT_MIC_EXTENSIONS_H_
+#include <sys/socket.h>
+#include <netinet/in.h>
+
#define DAT_MIC_EXTENSION_VERSION 1
#define DAT_MIC_ATTR_MIC "DAT_MIC_SUPPORT"
-/* Wire protocol version for MIC Indirect Exchange (MIX) protocol over SCIF */
-#define DAT_MIX_VER 1
+/***** MIC Indirect CM (MCM) protocol over IB fabrics *****/
+#define DAT_MCM_VER 1
+#define DAT_MCM_UD_QKEY 0x78655322
+#define DAT_MCM_PDATA_SIZE 64
-typedef enum _dat_mix_ops
+typedef enum dat_mcm_op
{
- DAT_MIX_IA_OPEN = 1,
- DAT_MIX_IA_CLOSE,
- DAT_MIX_IA_QUERY,
- DAT_MIX_IA_MR,
- DAT_MIX_EP_CREATE,
- DAT_MIX_EP_QUERY,
- DAT_MIX_WRITE,
- DAT_MIX_SEND,
- DAT_MIX_READ,
- DAT_MIX_LISTEN,
- DAT_MIX_CM_REQ,
- DAT_MIX_CM_REP,
- DAT_MIX_CM_ACCEPT,
- DAT_MIX_CM_REJECT,
- DAT_MIX_CM_RTU,
- DAT_MIX_CM_EST,
- DAT_MIX_CM_DISC,
- DAT_MIX_CM_REPLY,
-} dat_mix_ops_t;
+ MCM_REQ = 1,
+ MCM_REP,
+ MCM_REJ_USER, /* user reject */
+ MCM_REJ_CM, /* cm reject */
+ MCM_RTU,
+ MCM_DREQ,
+ MCM_DREP
-/* MIC Indirect CM (MCM) protocol over IB fabric */
-#define DAT_MCM_PDATA_SIZE 64
-union dat_mcm_addr {
- DAT_SOCK_ADDR6 so;
- struct {
- uint16_t family; /* sin6_family */
- uint16_t lid; /* sin6_port */
- uint32_t qpn; /* sin6_flowinfo */
- uint8_t gid[16]; /* sin6_addr */
- uint16_t port; /* sin6_scope_id */
- uint8_t sl;
- uint8_t qp_type;
- } ib;
-};
+} DAT_MCM_OP;
+
+/* MCM address, 28 bytes */
+typedef struct dat_mcm_addr
+{
+ uint16_t family;
+ uint16_t lid;
+ uint32_t qpn;
+ uint8_t gid[16];
+ uint16_t port;
+ uint8_t sl;
+ uint8_t qp_type;
+} dat_mcm_addr_t;
/* MCM message, 208 bytes */
-typedef struct _dat_mcm_msg
+typedef struct dat_mcm_msg
{
uint16_t ver;
uint16_t op;
uint32_t s_id; /* src pid */
uint32_t d_id; /* dst pid */
uint8_t rd_in; /* atomic_rd_in */
- uint8_t resv[5];
- union dat_mcm_addr saddr;
- union dat_mcm_addr daddr;
- union dat_mcm_addr saddr_alt;
- union dat_mcm_addr daddr_alt;
+ uint8_t resv[5]; /* 2 connections for MCM endpoints */
+ dat_mcm_addr_t saddr; /* 1st RC - local MPXY QP -> */
+ dat_mcm_addr_t daddr; /* <- remote MIC QP */
+ dat_mcm_addr_t saddr2; /* 2nd RC - local MIC QP -> */
+ dat_mcm_addr_t daddr2; /* <- remote MPXY QP */
uint8_t p_data[DAT_MCM_PDATA_SIZE];
} dat_mcm_msg_t;
-typedef struct _dat_mix_open_op {
+/***** MIC Indirect Exchange (MIX) protocol over SCIF ****/
+#define DAT_MIX_VER 1
+#define DAT_MIX_MSG_MAX 256
+
+typedef enum dat_mix_ops
+{
+ MIX_IA_OPEN = 1,
+ MIX_IA_CLOSE,
+ MIX_LISTEN,
+ MIX_LISTEN_FREE,
+ MIX_MR_CREATE,
+ MIX_MR_FREE,
+ MIX_QP_CREATE,
+ MIX_QP_MODIFY,
+ MIX_QP_FREE,
+ MIX_CQ_CREATE,
+ MIX_CQ_FREE,
+ MIX_CM_REQ,
+ MIX_CM_REP,
+ MIX_CM_ACCEPT,
+ MIX_CM_REJECT,
+ MIX_CM_RTU,
+ MIX_CM_EST,
+ MIX_CM_DISC,
+ MIX_CM_REPLY,
+ MIX_WRITE,
+ MIX_SEND,
+
+} dat_mix_ops_t;
+
+typedef enum dat_mix_op_flags
+{
+ MIX_OP_REQ = 0x00,
+ MIX_OP_RSP = 0x01,
+ MIX_OP_SYNC = 0x02,
+ MIX_OP_ASYNC = 0x04,
+
+} dat_mix_op_flags_t;
+
+typedef enum dat_mix_op_status
+{
+ MIX_SUCCESS = 0,
+ MIX_EFAULT, /* internal error */
+ MIX_ENOMEM, /* no space */
+ MIX_EINVAL, /* invalid parameter */
+ MIX_ENOTCONN, /* no active RDMA channels */
+ MIX_ENODEV, /* no device available */
+ MIX_ECONNRESET, /* RDMA channel reset */
+ MIX_EBADF, /* RDMA channel or CM id invalid */
+ MIX_EAGAIN, /* busy */
+ MIX_EADDRINUSE, /* port or address in use */
+ MIX_ENETUNREACH, /* remote address unreachable */
+ MIX_ETIMEDOUT, /* connection time out */
+ MIX_EAFNOSUPPORT, /* invalid address */
+ MIX_EPERM, /* invalid permission */
+ MIX_EALREADY, /* invalid state */
+ MIX_ECONNREFUSED, /* connection rejected */
+ MIX_EISCONN, /* already connected */
+ MIX_EOVERFLOW, /* length error */
+
+} dat_mix_op_status_t;
+
+/* MIX message header, 8 bytes */
+typedef struct dat_mix_hdr
+{
+ uint8_t ver; /* version */
+ uint8_t op; /* operation type */
+ uint8_t flags; /* operation flags */
+ uint8_t status; /* operation status */
+ uint32_t req_id; /* operation id, multiple operations */
+
+} dat_mix_hdr_t;
+
+/**** MIX device open *****/
+typedef struct dat_mix_dev_attr
+{
+ uint8_t ack_timer;
+ uint8_t ack_retry;
+ uint8_t rnr_timer;
+ uint8_t rnr_retry;
+ uint8_t global;
+ uint8_t hop_limit;
+ uint8_t tclass;
+ uint8_t sl;
+ uint8_t mtu;
+ uint8_t rd_atom_in;
+ uint8_t rd_atom_out;
+ uint8_t pkey_idx;
+ uint16_t pkey;
+ uint16_t max_inline;
-};
+} dat_mix_dev_attr_t;
-/* MIX message, 256 bytes */
-typedef struct _dat_mix_msg
+/***** MIX open, device address info returned */
+typedef struct dat_mix_open
{
- uint16_t ver; /* version */
- uint16_t op; /* operation type */
- uint32_t len; /* operation data length */
- uint64_t hdl; /* handle */
- uint64_t ctx; /* context */
- uint8_t data[232]; /* operation data */
-
-} dat_mix_msg_t;
+ dat_mix_hdr_t hdr;
+ char name[64];
+ uint16_t port; /* ib physical port number */
+ dat_mix_dev_attr_t dev_attr;
+ dat_mcm_addr_t dev_addr;
+
+} dat_mix_open_t;
+
+/***** MIX memory registration *****/
+typedef struct dat_mix_mr
+{
+ dat_mix_hdr_t hdr;
+ uint32_t mr_id;
+ uint32_t len;
+ uint64_t off;
+ uint64_t ctx;
+
+} dat_mix_mr_t;
+
+/***** MIX listen, status returned, no data *****/
+typedef struct dat_mix_listen
+{
+ dat_mix_hdr_t hdr;
+ uint16_t sid;
+ uint16_t backlog;
+
+} dat_mix_listen_t;
+
+/***** MIX create QP *****/
+typedef struct dat_mix_qp_attr
+{
+ uint8_t qp_type;
+ uint8_t state;
+ uint8_t cur_state;
+ uint8_t sq_sig_all;
+ uint32_t qp_num;
+ uint32_t qkey;
+ uint32_t max_send_wr;
+ uint32_t max_recv_wr;
+ uint32_t max_send_sge;
+ uint32_t max_recv_sge;
+ uint32_t max_inline_data;
+ uint32_t qp_id;
+ uint32_t scq_id;
+ uint32_t rcq_id;
+ uint64_t ctx;
+
+} dat_mix_qp_attr_t;
+
+/*
+ * todo, move posting WR's to aperture windows ??
+ * ok for now since we can post async and queue them up.
+ * For initial prototyping write streams we don't have many
+ * completions. SCIF should be 2x speeds so once we pipeline
+ * it will keep up with IB speeds.
+ */
+typedef struct dat_mix_qp
+{
+ dat_mix_hdr_t hdr;
+ dat_mix_qp_attr_t qp_t; /* on Proxy */
+ dat_mix_qp_attr_t qp_r; /* on MIC */
+
+} dat_mix_qp_t;
+
+/***** MIX CQ operations, create, free, poll, event *****/
+/*
+ * todo, move polling WC's and notifications to aperture windows
+ * Might not be needed unless signaling lot's of TX WR's
+ *
+ */
+typedef struct dat_mix_cq
+{
+ dat_mix_hdr_t hdr;
+ uint64_t cq_ctx;
+ uint32_t cq_len;
+ uint32_t cq_id;
+ uint64_t wr_id;
+ uint32_t status;
+ uint32_t opcode;
+ uint32_t vendor_err;
+ uint32_t byte_len;
+ uint32_t qp_num;
+ uint32_t src_qp;
+ uint32_t wc_flags;
+
+} dat_mix_cq_t;
+
+typedef struct dat_mix_cm
+{
+ dat_mix_hdr_t hdr;
+ uint64_t cm_ctx;
+ uint32_t cm_id;
+ dat_mcm_msg_t msg;
+
+} dat_mix_cm_t;
+
#endif /* _DAT_MIC_EXTENSIONS_H_ */
#define CNO_TIMEOUT (1000*1000*1)
#define DTO_FLUSH_TIMEOUT (1000*1000*2)
#define CONN_TIMEOUT (1000*1000*100)
-#define SERVER_TIMEOUT DAT_TIMEOUT_INFINITE
+#define SERVER_TIMEOUT 10000000
#define RDMA_BUFFER_SIZE (64)
/* Global DAT vars */