CM will support Proxy-in services on both MFO and MXS modes.
CM thread will not process ibv channels when in MFO mode.
Device open/close will export all verbs calls in MFO mode.
Add MIX (MIC to Proxy) functions for pz, device query, port query.
Signed-off-by: Arlin Davis <arlin.r.davis@intel.com>
Signed-off-by: Amir Hanania <amir.hanania@intel.com>
}
/* QPr is on proxy when xsocket from device */
- if (!MXS_EP(&hca->ib_trans.addr)) {
+ if (!MXF_EP(&hca->ib_trans.addr)) {
cm->msg.saddr1.qpn = htonl(ep->qp_handle->qp->qp_num); /* QPr local*/
cm->msg.saddr1.qp_type = ep->qp_handle->qp->qp_type;
cm->msg.saddr1.lid = hca->ib_trans.addr.lid;
/* QP to RTR-RTS with remote QPt (daddr2) info */
dapl_os_lock(&cm->ep->header.lock);
- if (!MXS_EP(&cm->hca->ib_trans.addr)) {
+ if (!MXF_EP(&cm->hca->ib_trans.addr)) {
ret = dapls_modify_qp_rtu(cm->ep->qp_handle->qp,
cm->msg.daddr2.qpn,
cm->msg.daddr2.lid,
goto bail;
}
/* MXS peer: setup PI WC and save peer WR queue info */
- if (MXS_EP(&cm->msg.daddr1)) {
+ if (MXF_EP(&cm->msg.daddr1)) {
/* save PI WR info, create local WC_q, send back WC info */
mcm_ntoh_wrc(&ep->qp_handle->wrc_rem, (mcm_wrc_info_t*)cm->msg.p_proxy);
mcm_create_wc_q(ep->qp_handle, ep->qp_handle->wrc_rem.wr_end + 1);
/* modify QPr to RTR and then to RTS, QPr (qp) to remote QPt (daddr2), !xsocket */
dapl_os_lock(&ep->header.lock);
- if (!MXS_EP(&cm->hca->ib_trans.addr)) {
+ if (!MXF_EP(&cm->hca->ib_trans.addr)) {
ret = dapls_modify_qp_rtu(ep->qp_handle->qp,
cm->msg.daddr2.qpn,
cm->msg.daddr2.lid,
&cm->hca->ib_trans.addr.gid, 16);
/* MXS peer: setup PI WC and save peer WR queue info */
- if (MXS_EP(&cm->msg.daddr1)) {
+ if (MXF_EP(&cm->msg.daddr1)) {
/* save PI WR info, create local WC_q, send back WC info */
mcm_ntoh_wrc(&ep->qp_handle->wrc_rem, (mcm_wrc_info_t*)cm->msg.p_proxy);
mcm_create_wc_q(ep->qp_handle, ep->qp_handle->wrc_rem.wr_end + 1);
cm->msg.op = htons(MCM_REP);
cm->msg.rd_in = ep->param.ep_attr.max_rdma_read_in;
- if (!MXS_EP(&cm->hca->ib_trans.addr)) {
+ if (!MXF_EP(&cm->hca->ib_trans.addr)) {
cm->msg.saddr1.qpn = htonl(ep->qp_handle->qp->qp_num);
cm->msg.saddr1.qp_type = ep->qp_handle->qp->qp_type;
cm->msg.saddr1.lid = cm->hca->ib_trans.addr.lid;
/* HST dev scif0, remote LID is host proxy, not MIC */
/* validate port and ep_map range */
- if ((mcm_ia->port > 2) || (mcm_ia->ep_map > 3))
+ if ((mcm_ia->port > 2) || (mcm_ia->ep_map > 4))
cm->msg.daddr1.ep_map = 0;
/* remote uCM information, comes from consumer provider r_addr */
time_ms = -1; /* reset to blocking */
dapl_fd_zero(set);
dapl_fd_set(hca->ib_trans.signal.scm[0], set, DAPL_FD_READ);
- dapl_fd_set(hca->ib_hca_handle->async_fd, set, DAPL_FD_READ);
- dapl_fd_set(hca->ib_trans.rch_fd, set, DAPL_FD_READ);
dapl_fd_set(hca->ib_trans.scif_ev_ep, set, DAPL_FD_READ);
- dapl_fd_set(hca->ib_trans.ib_cq->fd, set, DAPL_FD_READ);
-
+ if (!MFO_EP(&hca->ib_trans.addr) || !hca->ib_trans.self.node)
+ {
+ dapl_fd_set(hca->ib_hca_handle->async_fd, set, DAPL_FD_READ);
+ dapl_fd_set(hca->ib_trans.rch_fd, set, DAPL_FD_READ);
+ dapl_fd_set(hca->ib_trans.ib_cq->fd, set, DAPL_FD_READ);
+ }
+
dapl_os_lock(&hca->ib_trans.cqlock); /* CQt for HST->MXS */
if (!dapl_llist_is_empty(&hca->ib_trans.cqlist))
m_cq = dapl_llist_peek_head(&hca->ib_trans.cqlist);
dapl_os_unlock(&hca->ib_trans.lock);
dapl_select(set, time_ms);
- if (dapl_poll(hca->ib_trans.rch_fd,
- DAPL_FD_READ) == DAPL_FD_READ) {
- mcm_recv(&hca->ib_trans);
+ if (!MFO_EP(&hca->ib_trans.addr) || !hca->ib_trans.self.node)
+ {
+ if (dapl_poll(hca->ib_trans.rch_fd,
+ DAPL_FD_READ) == DAPL_FD_READ) {
+ mcm_recv(&hca->ib_trans);
+ }
}
ret = dapl_poll(hca->ib_trans.scif_ev_ep, DAPL_FD_READ);
if (ret == DAPL_FD_READ)
hca->ib_trans.dev_id, hca->ib_trans.scif_ev_ep);
event.event_type = IBV_EVENT_DEVICE_FATAL;
- dapl_evd_un_async_error_callback(hca->ib_hca_handle,
- &event,
- hca->ib_trans.async_un_ctx);
+ if (!MFO_EP(&hca->ib_trans.addr) || !hca->ib_trans.self.node)
+ {
+ dapl_evd_un_async_error_callback(hca->ib_hca_handle,
+ &event,
+ hca->ib_trans.async_un_ctx);
+ }
dapl_os_lock(&hca->ib_trans.lock);
hca->ib_trans.cm_state = IB_THREAD_CANCEL;
continue;
}
- if (dapl_poll(hca->ib_hca_handle->async_fd,
- DAPL_FD_READ) == DAPL_FD_READ) {
- dapli_async_event_cb(&hca->ib_trans);
- }
- if (dapl_poll(hca->ib_trans.ib_cq->fd,
- DAPL_FD_READ) == DAPL_FD_READ) {
- dapli_cq_event_cb(&hca->ib_trans);
+ if (!MFO_EP(&hca->ib_trans.addr) || !hca->ib_trans.self.node)
+ {
+ if (dapl_poll(hca->ib_hca_handle->async_fd,
+ DAPL_FD_READ) == DAPL_FD_READ) {
+ dapli_async_event_cb(&hca->ib_trans);
+ }
+ if (dapl_poll(hca->ib_trans.ib_cq->fd,
+ DAPL_FD_READ) == DAPL_FD_READ) {
+ dapli_cq_event_cb(&hca->ib_trans);
+ }
}
while (dapl_poll(hca->ib_trans.signal.scm[0],
DAPL_FD_READ) == DAPL_FD_READ) {
static void mcm_log_addrs(int lvl, struct dat_mcm_msg *msg, int state, int in)
{
if (in) {
- if (MXS_EP(&msg->daddr1) && MXS_EP(&msg->saddr1)) {
+ if (MXF_EP(&msg->daddr1) && MXF_EP(&msg->saddr1)) {
dapl_log(lvl, " QPr_t addr2: %s 0x%x %x 0x%x %s <- QPt_r addr2: 0x%x %x 0x%x %s\n",
mcm_state_str(state), htons(msg->daddr2.lid),
htonl(msg->daddr2.qpn), htons(msg->dport),
htons(msg->sport), mcm_map_str(msg->saddr1.ep_map));
}
} else {
- if (MXS_EP(&msg->saddr1) && MXS_EP(&msg->daddr1)) {
+ if (MXF_EP(&msg->saddr1) && MXF_EP(&msg->daddr1)) {
dapl_log(lvl, " QPr_t addr2: %s 0x%x %x 0x%x %s -> QPt_r addr2: 0x%x %x 0x%x %s\n",
mcm_state_str(state), htons(msg->saddr2.lid),
htonl(msg->saddr2.qpn), htons(msg->sport),
IN DAPL_HCA * hca_ptr,
IN DAPL_OPEN_FLAGS flags)
{
- struct ibv_device **dev_list;
+ struct ibv_device **dev_list = NULL;
struct dat_mcm_addr *mcm_ia = (struct dat_mcm_addr *) &hca_ptr->hca_address;
struct ibv_port_attr port_attr;
int i, nd = 0;
PROVIDER_NAME, hca_name, hca_ptr,
flags & DAPL_OPEN_QUERY ? "QUERY MODE":"STD MODE");
+ /* set RC tunables via enviroment or default */
+ hca_ptr->ib_trans.ib_cm.max_inline =
+ dapl_os_get_env_val("DAPL_MAX_INLINE", INLINE_SEND_IB_DEFAULT);
+ hca_ptr->ib_trans.ib_cm.ack_retry =
+ dapl_os_get_env_val("DAPL_ACK_RETRY", DCM_ACK_RETRY);
+ hca_ptr->ib_trans.ib_cm.ack_timer =
+ dapl_os_get_env_val("DAPL_ACK_TIMER", DCM_ACK_TIMER);
+ hca_ptr->ib_trans.ib_cm.rnr_retry =
+ dapl_os_get_env_val("DAPL_RNR_RETRY", DCM_RNR_RETRY);
+ hca_ptr->ib_trans.ib_cm.rnr_timer =
+ dapl_os_get_env_val("DAPL_RNR_TIMER", DCM_RNR_TIMER);
+ hca_ptr->ib_trans.ib_cm.global =
+ dapl_os_get_env_val("DAPL_GLOBAL_ROUTING", DCM_GLOBAL);
+ hca_ptr->ib_trans.ib_cm.hop_limit =
+ dapl_os_get_env_val("DAPL_HOP_LIMIT", DCM_HOP_LIMIT);
+ hca_ptr->ib_trans.ib_cm.tclass =
+ dapl_os_get_env_val("DAPL_TCLASS", DCM_TCLASS);
+ hca_ptr->ib_trans.ib_cm.mtu =
+ dapl_ib_mtu(dapl_os_get_env_val("DAPL_IB_MTU", DCM_IB_MTU));
+
+ /* initialize CM list, LISTEN, SND queue, PSP array, locks */
+ if ((dapl_os_lock_init(&hca_ptr->ib_trans.lock)) != DAT_SUCCESS)
+ goto err;
+
+ if ((dapl_os_lock_init(&hca_ptr->ib_trans.llock)) != DAT_SUCCESS)
+ goto err;
+
+ if ((dapl_os_lock_init(&hca_ptr->ib_trans.slock)) != DAT_SUCCESS)
+ goto err;
+
+ if ((dapl_os_lock_init(&hca_ptr->ib_trans.plock)) != DAT_SUCCESS)
+ goto err;
+
+ if ((dapl_os_lock_init(&hca_ptr->ib_trans.cqlock)) != DAT_SUCCESS)
+ goto err;
+
+ /* initialize CM and listen lists on this HCA uCM QP */
+ dapl_llist_init_head(&hca_ptr->ib_trans.list);
+ dapl_llist_init_head(&hca_ptr->ib_trans.llist);
+ dapl_llist_init_head(&hca_ptr->ib_trans.cqlist);
+
+ /* Get MIC mode and EP mappings */
+ if (dapli_mix_mode(&hca_ptr->ib_trans, hca_name))
+ goto err;
+
+ /* MIC EP with Full offload, no local verbs device */
+ if (MFO_EP(&hca_ptr->ib_trans.addr)) {
+ if (dapli_mix_open(&hca_ptr->ib_trans, hca_name,
+ hca_ptr->port_num,
+ flags & DAPL_OPEN_QUERY)) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " open_hca: SCIF init ERR on %s\n", hca_name);
+ goto err;
+ }
+ hca_ptr->ib_hca_handle = hca_ptr->ib_trans.ib_ctx;
+ goto cm_init;
+ }
+
/* Get list of all IB devices, find match, open */
dev_list = ibv_get_device_list(&nd);
if (!dev_list) {
dapl_dbg_log(DAPL_DBG_TYPE_ERR,
" open_hca: ibv_get_device_list() failed\n",
hca_name);
- return DAT_INTERNAL_ERROR;
+ goto err;
}
dapl_log(DAPL_DBG_TYPE_UTIL, " open_hca %p: %d devices found\n", hca_ptr, nd);
hca_ptr->ib_trans.ib_dev = NULL;
if (hca_ptr->ib_trans.ib_dev == NULL) {
dapl_log(DAPL_DBG_TYPE_ERR, " open_hca: device %s not found\n", hca_name);
dat_status = DAT_PROVIDER_NOT_FOUND;
- goto err;
+ goto err2;
}
hca_ptr->ib_hca_handle = ibv_open_device(hca_ptr->ib_trans.ib_dev);
dapl_log(DAPL_DBG_TYPE_ERR,
" open_hca: dev open failed for %s\n",
ibv_get_device_name(hca_ptr->ib_trans.ib_dev));
- goto err;
+ goto err2;
}
hca_ptr->ib_trans.ib_ctx = hca_ptr->ib_hca_handle;
dapls_config_verbs(hca_ptr->ib_hca_handle);
ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
strerror(errno));
dat_status = DAT_INVALID_ADDRESS;
- goto bail;
+ goto err2;
} else {
if (port_attr.state != IBV_PORT_ACTIVE) {
dat_status = DAT_INVALID_ADDRESS;
- goto bail;
+ goto err2;
}
hca_ptr->ib_trans.addr.lid = htons(port_attr.lid);
hca_ptr->ib_trans.lid = htons(port_attr.lid);
ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
strerror(errno));
dat_status = DAT_INVALID_ADDRESS;
- goto bail;
+ goto err2;
}
- /* set RC tunables via enviroment or default */
+ /* max inline value when running on MIC is returned from host */
if (dapl_ib_inline_data(hca_ptr->ib_hca_handle)) {
hca_ptr->ib_trans.ib_cm.max_inline =
dapl_os_get_env_val("DAPL_MAX_INLINE",
INLINE_SEND_IB_DEFAULT);
}
- hca_ptr->ib_trans.ib_cm.ack_retry =
- dapl_os_get_env_val("DAPL_ACK_RETRY", DCM_ACK_RETRY);
- hca_ptr->ib_trans.ib_cm.ack_timer =
- dapl_os_get_env_val("DAPL_ACK_TIMER", DCM_ACK_TIMER);
- hca_ptr->ib_trans.ib_cm.rnr_retry =
- dapl_os_get_env_val("DAPL_RNR_RETRY", DCM_RNR_RETRY);
- hca_ptr->ib_trans.ib_cm.rnr_timer =
- dapl_os_get_env_val("DAPL_RNR_TIMER", DCM_RNR_TIMER);
- hca_ptr->ib_trans.ib_cm.global =
- dapl_os_get_env_val("DAPL_GLOBAL_ROUTING", DCM_GLOBAL);
- hca_ptr->ib_trans.ib_cm.hop_limit =
- dapl_os_get_env_val("DAPL_HOP_LIMIT", DCM_HOP_LIMIT);
- hca_ptr->ib_trans.ib_cm.tclass =
- dapl_os_get_env_val("DAPL_TCLASS", DCM_TCLASS);
- hca_ptr->ib_trans.ib_cm.mtu =
- dapl_ib_mtu(dapl_os_get_env_val("DAPL_IB_MTU", DCM_IB_MTU));
-
- if (dapli_mix_open(&hca_ptr->ib_trans, hca_name,
- hca_ptr->port_num, flags & DAPL_OPEN_QUERY)) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: SCIF init ERR for %s\n",
- ibv_get_device_name(hca_ptr->ib_trans.ib_dev));
- goto bail;
- }
+ else
+ hca_ptr->ib_trans.ib_cm.max_inline = 0;
- if (flags & DAPL_OPEN_QUERY)
- goto done;
-
- /* initialize CM list, LISTEN, SND queue, PSP array, locks */
- if ((dapl_os_lock_init(&hca_ptr->ib_trans.lock)) != DAT_SUCCESS)
- goto bail;
-
- if ((dapl_os_lock_init(&hca_ptr->ib_trans.llock)) != DAT_SUCCESS)
- goto bail;
-
- if ((dapl_os_lock_init(&hca_ptr->ib_trans.slock)) != DAT_SUCCESS)
- goto bail;
-
- if ((dapl_os_lock_init(&hca_ptr->ib_trans.plock)) != DAT_SUCCESS)
- goto bail;
-
- if ((dapl_os_lock_init(&hca_ptr->ib_trans.cqlock)) != DAT_SUCCESS)
- goto bail;
+ dapl_log(DAPL_DBG_TYPE_UTIL, " open_hca: max inline data set to %d\n",
+ hca_ptr->ib_trans.ib_cm.max_inline);
/* EVD events without direct CQ channels, CNO support */
hca_ptr->ib_trans.ib_cq =
dapl_log(DAPL_DBG_TYPE_ERR,
" open_hca: ibv_create_comp_channel ERR %s\n",
strerror(errno));
- goto bail;
+ goto err2;
}
dapls_config_comp_channel(hca_ptr->ib_trans.ib_cq);
dapl_log(DAPL_DBG_TYPE_ERR,
" open_hca: ERR: create_empty_cq = %s\n",
strerror(errno));
- goto bail;
+ goto err2;
}
- /* initialize CM and listen lists on this HCA uCM QP */
- dapl_llist_init_head(&hca_ptr->ib_trans.list);
- dapl_llist_init_head(&hca_ptr->ib_trans.llist);
- dapl_llist_init_head(&hca_ptr->ib_trans.cqlist);
+ if (dapli_mix_open(&hca_ptr->ib_trans, hca_name,
+ hca_ptr->port_num, flags & DAPL_OPEN_QUERY)) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " open_hca: SCIF init ERR for %s\n", hca_name);
+ goto err2;
+ }
+
+cm_init:
+ if (flags & DAPL_OPEN_QUERY)
+ goto done;
- /* create uCM qp services */
+ /* create MCM qp services */
if (mcm_service_create(hca_ptr))
- goto bail;
+ goto err2;
if (create_os_signal(hca_ptr)) {
dapl_log(DAPL_DBG_TYPE_ERR,
" open_hca: failed to init cr pipe - %s\n",
strerror(errno));
- goto bail;
+ goto err2;
}
/* create thread to process inbound connect request */
if (dat_status != DAT_SUCCESS) {
dapl_log(DAPL_DBG_TYPE_ERR,
" open_hca: failed to create thread\n");
- goto bail;
+ goto err2;
}
dapl_log(DAPL_DBG_TYPE_UTIL,
- " open_hca: MCM devname %s port %d, dev_IP %s ep_map %s\n",
- ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
- hca_ptr->port_num,
+ " open_hca: devname %s port %d, dev_IP %s ep_map %s\n",
+ hca_name, hca_ptr->port_num,
inet_ntoa(((struct sockaddr_in *)
&hca_ptr->hca_address)->sin_addr),
mcm_map_str(hca_ptr->ib_trans.addr.ep_map));
dapl_os_sleep_usec(1000);
done:
+ if (dev_list)
+ ibv_free_device_list(dev_list);
+
/* save LID, GID, QPN, PORT address information, for ia_queries */
/* Set AF_INET6 to insure callee address storage of 28 bytes */
hca_ptr->ib_trans.hca = hca_ptr;
hca_ptr->ib_trans.addr.family = AF_INET6;
hca_ptr->ib_trans.addr.qp_type = IBV_QPT_UD;
- memcpy(&hca_ptr->hca_address,
- &hca_ptr->ib_trans.addr,
+ memcpy(&hca_ptr->hca_address,
+ &hca_ptr->ib_trans.addr,
sizeof(struct dat_mcm_addr));
dapl_log(DAPL_DBG_TYPE_UTIL,
ntohs(mcm_ia->lid), ntohl(mcm_ia->qpn),
mcm_ia->sl, mcm_map_str(mcm_ia->ep_map));
- ibv_free_device_list(dev_list);
return DAT_SUCCESS;
-bail:
+
+err2:
mcm_service_destroy(hca_ptr);
- ibv_close_device(hca_ptr->ib_hca_handle);
+ if (dev_list) {
+ if (hca_ptr->ib_hca_handle)
+ ibv_close_device(hca_ptr->ib_hca_handle);
+ ibv_free_device_list(dev_list);
+ }
+err:
hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
hca_ptr->ib_trans.ib_ctx = NULL;
hca_ptr->ib_trans.ib_dev = NULL;
-
-err:
- ibv_free_device_list(dev_list);
+ hca_ptr->ib_trans.hca = NULL;
return dat_status;
}
}
if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) {
- if (ibv_close_device(hca_ptr->ib_hca_handle))
- return (dapl_convert_errno(errno, "ib_close_device"));
+ if (!MFO_EP(&hca_ptr->ib_trans.addr) || hca_ptr->ib_trans.self.node == 0) {
+ if (ibv_close_device(hca_ptr->ib_hca_handle))
+ return (dapl_convert_errno(errno, "ib_close_device"));
+ }
hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
}
static int mcm_service_create(IN DAPL_HCA *hca)
{
- struct ibv_qp_init_attr qp_create;
+ struct ibv_qp_init_attr qp_create;
ib_hca_transport_t *tp = &hca->ib_trans;
struct ibv_recv_wr recv_wr, *recv_err;
- struct ibv_sge sge;
+ struct ibv_sge sge;
int i, mlen = sizeof(dat_mcm_msg_t);
int hlen = sizeof(struct ibv_grh); /* hdr included with UD recv */
char *rbuf;
return 0;
tp->pd = ibv_alloc_pd(hca->ib_hca_handle);
- if (!tp->pd)
- goto bail;
-
- dapl_log(DAPL_DBG_TYPE_UTIL,
- " create_service: pd %p ctx %p handle 0x%x\n",
- tp->pd, tp->pd->context, tp->pd->handle);
+ if (!tp->pd)
+ goto bail;
+
+ dapl_log(DAPL_DBG_TYPE_UTIL,
+ " create_service: pd %p ctx %p handle 0x%x\n",
+ tp->pd, tp->pd->context, tp->pd->handle);
tp->rch = ibv_create_comp_channel(hca->ib_hca_handle);
if (!tp->rch)
tp->scq = ibv_create_cq(hca->ib_hca_handle, tp->cqe, hca, NULL, 0);
if (!tp->scq)
goto bail;
-
+
tp->rcq = ibv_create_cq(hca->ib_hca_handle, tp->cqe, hca, tp->rch, 0);
if (!tp->rcq)
goto bail;
tp->qp = ibv_create_qp(tp->pd, &qp_create);
if (!tp->qp)
- goto bail;
+ goto bail;
tp->ah = (ib_ah_handle_t*) dapl_os_alloc(sizeof(ib_ah_handle_t) * 0xffff);
tp->sid = (uint8_t*) dapl_os_alloc(sizeof(uint8_t) * 0xffff);
/* save qp_num as part of ia_address, network order */
tp->addr.qpn = htonl(tp->qp->qp_num);
- return 0;
+ return 0;
bail:
dapl_log(DAPL_DBG_TYPE_ERR,
" ucm_create_services: ERR %s\n", strerror(errno));
#include "dapl_osd.h"
/*
- * CM proxy services, MCM on MIC to MPXYD via SCIF
- *
+ * MCM Provider Proxy services, MIC to MPXYD via SCIF or HST with SCIF
+ */
+
+/*
+ * MIX_IA_MODE
+ */
+
+int dapli_mix_mode(ib_hca_transport_t *tp, char *name)
+{
+ int ret, mfo_dev, mfo_mode;
+
+ mfo_mode = dapl_os_get_env_val("DAPL_MCM_MFO", 0); /* Force MIC Full Offload */
+
+ ret = scif_get_nodeIDs(NULL, 0, &tp->self.node);
+ if (ret < 0) {
+ dapl_log(1, " scif_get_nodeIDs() failed with error %s\n", strerror(errno));
+ return -1;
+ }
+ dapl_log(DAPL_DBG_TYPE_EXTENSION,
+ " SCIF node_id: %d client req_id 0x%x, %s\n",
+ (uint16_t)tp->self.node, dapl_os_getpid(), name);
+
+ if (tp->self.node == 0) {
+ tp->addr.ep_map = HOST_SOCK_DEV; /* non-MIC mapping */
+ return 0;
+ }
+
+ /* MIC node: "qib" device requires full offload */
+ mfo_dev = !dapl_os_pstrcmp("qib", name);
+ if (mfo_mode || mfo_dev) {
+ tp->addr.ep_map = MIC_FULL_DEV; /* MIC with full proxy offload, no direct verbs */
+ }
+ return 0;
+}
+
+/*
* MIX_IA_OPEN
*/
int dapli_mix_open(ib_hca_transport_t *tp, char *name, int port, int query_only)
always_proxy = dapl_os_get_env_val("DAPL_MCM_ALWAYS_PROXY", 0);
scif_port_id = dapl_os_get_env_val("DAPL_MCM_PORT_ID", SCIF_OFED_PORT_8);
- ret = scif_get_nodeIDs(NULL, 0, &tp->self.node);
- if (ret < 0) {
- dapl_log(1, " scif_get_nodeIDs() failed with error %s\n", strerror(errno));
- return -1;
- }
- dapl_log(DAPL_DBG_TYPE_EXTENSION,
- " SCIF node_id: %d client req_id 0x%x\n",
- (uint16_t)tp->self.node, dapl_os_getpid());
-
- if (tp->self.node == 0)
- tp->addr.ep_map = HOST_SOCK_DEV; /* non-MIC mapping */
-
- if (query_only || (tp->self.node == 0 && !always_proxy)){
+ if (query_only || (tp->self.node == 0 && !always_proxy)) {
dapl_log(DAPL_DBG_TYPE_EXTENSION," Not running on MIC, no MPXY connect required\n");
tp->scif_ep = 0;
return 0;
}
- dapl_log(DAPL_DBG_TYPE_EXTENSION," Running on MIC, MPXY connect required\n");
+ dapl_log(DAPL_DBG_TYPE_EXTENSION,
+ " Running on MIC at %s ep_map, MPXY connect required\n",
+ mcm_map_str(tp->addr.ep_map));
/* Create an endpoint for MPXYD to connect back */
listen_ep = scif_open();
msg.dev_attr.pkey_idx = tp->ib_cm.pkey_idx;
msg.dev_attr.pkey = tp->ib_cm.pkey;
msg.dev_attr.max_inline = tp->ib_cm.max_inline;
+ msg.dev_addr.ep_map = tp->addr.ep_map;
memcpy(&msg.dev_addr, &tp->addr, sizeof(dat_mcm_addr_t));
tp->ib_cm.hop_limit = msg.dev_attr.hop_limit;
tp->ib_cm.tclass = msg.dev_attr.tclass;
tp->ib_cm.sl = msg.dev_attr.sl;
- tp->ib_cm.mtu = msg.dev_attr.mtu;
tp->ib_cm.rd_atom_in = msg.dev_attr.rd_atom_in;
tp->ib_cm.rd_atom_out = msg.dev_attr.rd_atom_out;
tp->ib_cm.pkey_idx = msg.dev_attr.pkey_idx;
tp->ib_cm.pkey = msg.dev_attr.pkey;
tp->ib_cm.max_inline = msg.dev_attr.max_inline;
-
tp->dev_id = msg.hdr.req_id;
+ if (MFO_EP(&tp->addr))
+ /* We do not use this var in MFO, but use it as a flag to signal success */
+ tp->ib_ctx = (struct ibv_context *)0xdeadbeef;
+
dapl_log(DAPL_DBG_TYPE_EXTENSION,
" mix_open reply (msg %p, ln %d) EPs %d %d %d - dev_id %d lid 0x%x\n",
&msg, len, tp->scif_ep, tp->scif_ev_ep,
tp->scif_tx_ep, tp->dev_id, ntohs(tp->addr.lid));
+
return 0;
}
}
}
+/* MIX device ATTR */
+int dapli_mix_query_device(ib_hca_transport_t *tp, struct ibv_device_attr *dev_attr)
+{
+ dat_mix_device_attr_t msg;
+ scif_epd_t mix_ep = tp->scif_ep;
+ int ret, len;
+
+ dapl_log(DAPL_DBG_TYPE_EXTENSION, " MIX_QUERY_DEVICE_ATTR tp = %p\n", tp);
+
+ /* get attr request */
+ msg.hdr.ver = DAT_MIX_VER;
+ msg.hdr.op = MIX_QUERY_DEVICE;
+ msg.hdr.status = 0;
+ msg.hdr.flags = MIX_OP_REQ;
+
+ len = sizeof(dat_mix_hdr_t);
+ ret = scif_send(mix_ep, &msg, len, SCIF_SEND_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: %s msg %p send on %d, ret %d, exp %d, error %s\n",
+ mix_op_str(msg.hdr.op), &msg, mix_ep, ret, len, strerror(errno));
+ return -1;
+ }
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %s request on SCIF EP %d\n", mix_op_str(msg.hdr.op), mix_ep);
+
+ /* get device attr response */
+ len = sizeof(dat_mix_device_attr_t);
+ ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: rcv on new_ep %d, ret %d, exp %d, error %s\n", mix_ep, ret, len, strerror(errno));
+ return -1;
+ }
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," Recv'd %s reply on SCIF EP %d for dev_id %d\n",
+ mix_op_str(msg.hdr.op), mix_ep, msg.hdr.req_id);
+
+ if (msg.hdr.ver != DAT_MIX_VER || msg.hdr.op != MIX_QUERY_DEVICE ||
+ msg.hdr.flags != MIX_OP_RSP || msg.hdr.status != MIX_SUCCESS) {
+ dapl_log(1, " ERR: MIX_QUERY_DEVICE ver %d, op %s, flgs %d, st %d dev_id %d\n",
+ msg.hdr.ver, mix_op_str(msg.hdr.op),
+ msg.hdr.flags, msg.hdr.status, msg.hdr.req_id);
+ if (msg.hdr.status != MIX_SUCCESS)
+ return msg.hdr.status;
+ else
+ return -1;
+ }
+
+ strncpy(dev_attr->fw_ver, msg.fw_ver, sizeof(dev_attr->fw_ver));
+ dev_attr->node_guid = msg.node_guid;
+ dev_attr->sys_image_guid = msg.sys_image_guid;
+ dev_attr->max_mr_size = msg.max_mr_size;
+ dev_attr->page_size_cap = msg.page_size_cap;
+ dev_attr->vendor_id = msg.vendor_id;
+ dev_attr->vendor_part_id = msg.vendor_part_id;
+ dev_attr->hw_ver = msg.hw_ver;
+ dev_attr->max_qp = msg.max_qp;
+ dev_attr->max_qp_wr = msg.max_qp_wr;
+ dev_attr->device_cap_flags = msg.device_cap_flags;
+ dev_attr->max_sge = msg.max_sge;
+ dev_attr->max_sge_rd = msg.max_sge_rd;
+ dev_attr->max_cq = msg.max_cq;
+ dev_attr->max_cqe = msg.max_cqe;
+ dev_attr->max_mr = msg.max_mr;
+ dev_attr->max_pd = msg.max_pd;
+ dev_attr->max_qp_rd_atom = msg.max_qp_rd_atom;
+ dev_attr->max_ee_rd_atom = msg.max_ee_rd_atom;
+ dev_attr->max_res_rd_atom = msg.max_ee_rd_atom;
+ dev_attr->max_qp_init_rd_atom = msg.max_qp_init_rd_atom;
+ dev_attr->max_ee_init_rd_atom = msg.max_ee_init_rd_atom;
+ dev_attr->atomic_cap = msg.atomic_cap;
+ dev_attr->max_ee = msg.max_ee;
+ dev_attr->max_rdd = msg.max_rdd;
+ dev_attr->max_mw = msg.max_mw;
+ dev_attr->max_raw_ipv6_qp = msg.max_raw_ipv6_qp;
+ dev_attr->max_raw_ethy_qp = msg.max_raw_ethy_qp;
+ dev_attr->max_mcast_grp = msg.max_mcast_grp;
+ dev_attr->max_mcast_qp_attach = msg.max_mcast_qp_attach;
+ dev_attr->max_total_mcast_qp_attach = msg.max_total_mcast_qp_attach;
+ dev_attr->max_ah = msg.max_ah;
+ dev_attr->max_fmr = msg.max_fmr;
+ dev_attr->max_map_per_fmr = msg.max_map_per_fmr;
+ dev_attr->max_srq = msg.max_srq;
+ dev_attr->max_srq_wr = msg.max_srq_wr;
+ dev_attr->max_srq_sge = msg.max_srq_sge;
+ dev_attr->max_pkeys = msg.max_pkeys;
+ dev_attr->local_ca_ack_delay = msg.local_ca_ack_delay;
+ dev_attr->phys_port_cnt = msg.phys_port_cnt;
+
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," MIX_QUERY_DEVICE successful on SCIF EP %d\n", mix_ep);
+ return 0;
+}
+
/* MIX_PROV_ATTR */
int dapli_mix_get_attr(ib_hca_transport_t *tp, dat_mix_prov_attr_t *pr_attr)
{
memcpy(pr_attr, &msg.attr, sizeof(dat_mix_prov_attr_t));
+ /* update local TP CM attributes */
+ tp->retries = pr_attr->cm_retry;
+ tp->rep_time = pr_attr->cm_rep_time_ms;
+ tp->rtu_time = pr_attr->cm_rtu_time_ms;
+ tp->cm_timer = DAPL_MIN(tp->rep_time, tp->rtu_time);
+
dapl_log(DAPL_DBG_TYPE_EXTENSION," MIX_PROV_ATTR successful on SCIF EP %d\n", mix_ep);
return 0;
}
scif_epd_t mix_ep = tp->scif_ep;
int ret, len;
- dapl_log(DAPL_DBG_TYPE_EXTENSION," lmr create %p, addr %p %p rmr_context %x mr->rkey %x\n",
- lmr, lmr->mr_handle->addr, lmr->param.registered_address,
- lmr->param.rmr_context, lmr->mr_handle->rkey );
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," lmr create %p, addr %p rmr_context %x\n",
+ lmr, lmr->param.registered_address, lmr->param.rmr_context);
+
+ if (MFO_EP(&tp->addr)) {
+ lmr->mr_handle = (ib_mr_handle_t) dapl_os_alloc (sizeof(struct ibv_mr));
+ if (NULL == lmr->mr_handle) {
+ dapl_log(1, " ERR: Could not allocat mr_hadle\n");
+ return -1;
+ }
+ }
/* request: */
msg.hdr.ver = DAT_MIX_VER;
msg.mr_len = lmr->param.registered_size;
msg.sci_addr = lmr->sci_addr;
msg.sci_off = lmr->sci_off;
- msg.ib_addr = (uint64_t) lmr->mr_handle->addr;
+ if (MFO_EP(&tp->addr))
+ msg.ib_addr = (uint64_t) lmr->param.registered_address;
+ else
+ msg.ib_addr = (uint64_t) lmr->mr_handle->addr;
msg.ib_rkey = lmr->param.rmr_context;
msg.ctx = (uint64_t)lmr;
msg.hdr.ver, msg.hdr.op, msg.hdr.flags, msg.hdr.status);
return -1;
}
+ if (MFO_EP(&tp->addr) && lmr->mr_handle) {
+ dapl_os_free(lmr->mr_handle, sizeof(struct ibv_mr));
+ lmr->mr_handle = IB_INVALID_HANDLE;
+ }
dapl_log(DAPL_DBG_TYPE_EXTENSION," removed lmr %p, id %d\n", lmr, lmr->mr_id);
return 0;
}
return 0;
}
+/* MIX_PZ_CREATE */
+int dapli_mix_pz_create(DAPL_IA * ia_ptr, DAPL_PZ *m_pz)
+{
+ dat_mix_pz_t msg;
+ scif_epd_t mix_ep = ia_ptr->hca_ptr->ib_trans.scif_ep;
+ int ret, len;
+
+ m_pz->pd_handle = IB_INVALID_HANDLE;
+
+ /* request: QP_r local, QP_t shadowed */
+ msg.hdr.ver = DAT_MIX_VER;
+ msg.hdr.op = MIX_PZ_CREATE;
+ msg.hdr.status = 0;
+ msg.hdr.flags = MIX_OP_REQ;
+ msg.ctx = (uint64_t)m_pz;
+ msg.ib_pd = 0;
+
+ len = sizeof(dat_mix_pz_t);
+ ret = scif_send(mix_ep, &msg, len, SCIF_SEND_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: %s snd on %d, ret %d, exp %d, err %s\n",
+ mix_op_str(msg.hdr.op), mix_ep, ret, len,
+ strerror(errno));
+ return -1;
+ }
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %s request on SCIF EP\n",
+ mix_op_str(msg.hdr.op));
+
+ /* wait for response */
+ msg.ctx = 0;
+ ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: rcv on ep %d, ret %d, exp %d, err %s\n",
+ mix_ep, ret, len, strerror(errno));
+ return -1;
+ }
+ if (msg.hdr.ver != DAT_MIX_VER || msg.hdr.op != MIX_PZ_CREATE ||
+ msg.hdr.flags != MIX_OP_RSP || msg.hdr.status != MIX_SUCCESS) {
+ dapl_log(1, " ERR: %s %p ver %d, op %d, flags %d, stat %d\n",
+ mix_op_str(msg.hdr.op), m_pz, msg.hdr.ver,
+ msg.hdr.op, msg.hdr.flags, msg.hdr.status);
+ return -1;
+ }
+
+ if (msg.ctx != (uint64_t)m_pz) {
+ dapl_log(1, " ERR: response ctx (0x%x) != sent one (0x%x)\n",
+ msg.ctx, (uint64_t)m_pz);
+ return -1;
+ }
+
+ /* save id from proxy PZ create */
+ m_pz->pd_handle = (ib_pd_handle_t)msg.ib_pd;
+
+ dapl_log(DAPL_DBG_TYPE_EXTENSION,
+ " MIX_PZ_CREATE: pz %p, reply, proxy IB_PD %p\n",
+ m_pz, m_pz->pd_handle);
+ return 0;
+}
+
+/* MIX_CQ_FREE, fits in header */
+int dapli_mix_pz_free(DAPL_PZ *m_pz)
+{
+ dat_mix_pz_t msg;
+ DAPL_IA * ia_ptr = m_pz->header.owner_ia;
+ scif_epd_t mix_ep = ia_ptr->hca_ptr->ib_trans.scif_ep;
+ int ret, len;
+
+ dapl_log(DAPL_DBG_TYPE_EXTENSION,
+ " MIX_PZ_FREE: pz %p, send, proxy IB_PD %p\n",
+ m_pz, m_pz->pd_handle);
+
+ /* request */
+ msg.hdr.ver = DAT_MIX_VER;
+ msg.hdr.op = MIX_PZ_FREE;
+ msg.hdr.status = 0;
+ msg.hdr.flags = MIX_OP_REQ;
+ msg.ctx = (uint64_t)m_pz;
+ msg.ib_pd = (uint64_t)m_pz->pd_handle;
+
+ len = sizeof(dat_mix_pz_t);
+ ret = scif_send(mix_ep, &msg, len, SCIF_SEND_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: %s send on %d, ret %d, exp %d, error %s\n",
+ mix_op_str(msg.hdr.op), mix_ep, ret, len, strerror(errno));
+ }
+
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %s request on SCIF EP\n",
+ mix_op_str(msg.hdr.op));
+
+ /* response */
+ ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: rcv on new_ep %d, ret %d, exp %d, error %s\n",
+ mix_ep, ret, len, strerror(errno));
+ return -1;
+ }
+ if (msg.hdr.ver != DAT_MIX_VER || msg.hdr.op != MIX_PZ_FREE ||
+ msg.hdr.flags != MIX_OP_RSP || msg.hdr.status != MIX_SUCCESS) {
+ dapl_log(1, " MIX_CQ_FREE ERR: ver %d, op %d, flags %d, or stat %d ln %d\n",
+ msg.hdr.ver, msg.hdr.op, msg.hdr.flags, msg.hdr.status, ret);
+ return -1;
+ }
+
+ dapl_log(DAPL_DBG_TYPE_EXTENSION,
+ " MIX_PZ_FREE: reply, proxy IB_PD 0x%x\n", msg.ib_pd);
+
+ m_pz->pd_handle = 0;
+
+ return 0;
+}
+
+
/* SCIF DMA outbound writes and inbound msg receives; translate to scif_off via LMR */
/* TODO: faster translation for post_send? */
static inline int mix_proxy_data(ib_qp_handle_t m_qp, dat_mix_sr_t *msg, struct ibv_sge *sglist, int txlen, int mix_ep)
return ret;
}
+int dapli_mix_query_port(ib_hca_transport_t *tp, unsigned long port_num, struct ibv_port_attr *port_attr)
+{
+ dat_mix_port_attr_t msg;
+ scif_epd_t mix_ep = tp->scif_ep;
+ int ret, len;
+ ret = scif_get_nodeIDs(NULL, 0, &tp->self.node);
+ if (ret < 0) {
+ dapl_log(1, " scif_get_nodeIDs() failed with error %s\n", strerror(errno));
+ return -1;
+ }
+ /* request: QP_r local, QP_t shadowed */
+ msg.hdr.ver = DAT_MIX_VER;
+ msg.hdr.op = MIX_QUERY_PORT;
+ msg.hdr.status = 0;
+ msg.hdr.flags = MIX_OP_REQ;
+ msg.hdr.req_id = port_num;
+ len = sizeof(dat_mix_hdr_t);
+ ret = scif_send(mix_ep, &msg, len, SCIF_SEND_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: %s snd on %d, ret %d, exp %d, err %s\n",
+ mix_op_str(msg.hdr.op), mix_ep, ret, len,
+ strerror(errno));
+ return -1;
+ }
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %s request on SCIF EP\n",
+ mix_op_str(msg.hdr.op));
+ /* wait for response */
+ len = sizeof(dat_mix_port_attr_t);
+ ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK);
+ if (ret != len) {
+ dapl_log(1, " ERR: rcv on ep %d, ret %d, exp %d, err %s\n",
+ mix_ep, ret, len, strerror(errno));
+ return -1;
+ }
+ if (msg.hdr.ver != DAT_MIX_VER || msg.hdr.op != MIX_QUERY_PORT ||
+ msg.hdr.flags != MIX_OP_RSP || msg.hdr.status != MIX_SUCCESS) {
+ dapl_log(1, " ERR: %s ver %d, op %d, flags %d, stat %d\n",
+ mix_op_str(msg.hdr.op), msg.hdr.ver,
+ msg.hdr.op, msg.hdr.flags, msg.hdr.status);
+ return -1;
+ }
-
-
-
-
-
-
+ port_attr->gid_tbl_len = msg.gid_tbl_len;
+ port_attr->port_cap_flags = msg.port_cap_flags;
+ port_attr->max_msg_sz = msg.max_msg_sz;
+ port_attr->bad_pkey_cntr = msg.bad_pkey_cntr;
+ port_attr->qkey_viol_cntr = msg.qkey_viol_cntr;
+ port_attr->pkey_tbl_len = msg.pkey_tbl_len;
+ port_attr->lid = msg.lid;
+ port_attr->sm_lid = msg.sm_lid;
+ port_attr->lmc = msg.lmc;
+ port_attr->max_vl_num = msg.max_vl_num;
+ port_attr->sm_sl = msg.sm_sl;
+ port_attr->subnet_timeout = msg.subnet_timeout;
+ port_attr->init_type_reply = msg.init_type_reply;
+ port_attr->active_width = msg.active_width;
+ port_attr->active_speed = msg.active_speed;
+ port_attr->phys_state = msg.phys_state;
+ port_attr->link_layer = msg.link_layer;
+ port_attr->state = msg.state;
+ port_attr->max_mtu = msg.max_mtu;
+ port_attr->active_mtu = msg.active_mtu;
+
+ dapl_log(DAPL_DBG_TYPE_EXTENSION," MIX_QUERY_PORT successful on SCIF EP %d\n", mix_ep);
+ return 0;
+}
cookie = (DAPL_COOKIE *)(uintptr_t)wc->wr_id;
m_qp = cookie->ep->qp_handle;
- if (!m_qp->tp->scif_ep && MXS_EP(m_qp) &&
+ if (!m_qp->tp->scif_ep && MXF_EP(m_qp) &&
(wc->opcode == (uint32_t)IBV_WR_RDMA_WRITE_WITH_IMM)) {
dapl_log(DAPL_DBG_TYPE_EP,
" mcm_dto_req: RW_imm -> WR, wr_id %Lx\n", wc->wr_id);