From: Arlin Davis Date: Tue, 4 Feb 2014 02:37:34 +0000 (-0800) Subject: open_ib common: qp, cq, and post_recv changes for proxy-in X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=dd82920ae215455fa79459d011069957b17ade0c;p=~ardavis%2Fdapl.git open_ib common: qp, cq, and post_recv changes for proxy-in Modify common QP, CQ, and DTO services to support proxy-in service that eliminates the need for local QP and CQ resouces on the MIC adapter. Change WR UD type check to support no QP mode. Add dapli_mix_post_recv funtionality for PI, QPr on mpxyd. Store platform unique guid for EP locality - inside/outside. Signed-off-by: Arlin Davis --- diff --git a/dapl/common/dapl_evd_connection_callb.c b/dapl/common/dapl_evd_connection_callb.c index a28d8d6..3c79103 100644 --- a/dapl/common/dapl_evd_connection_callb.c +++ b/dapl/common/dapl_evd_connection_callb.c @@ -153,8 +153,8 @@ dapl_evd_connection_callback(IN dp_ib_cm_handle_t ib_cm_handle, DAPL_MAX_PRIVATE_DATA_SIZE)); dapl_dbg_log(DAPL_DBG_TYPE_CM | DAPL_DBG_TYPE_CALLBACK, - "dapl_evd_connection_callback PEER REJ pd=%p sz=%d\n", - prd_ptr, private_data_size); + "dapl_evd_connection_callback PEER REJ ep=%p pd=%p sz=%d\n", + ep_ptr, prd_ptr, private_data_size); } case DAT_CONNECTION_EVENT_DISCONNECTED: case DAT_CONNECTION_EVENT_UNREACHABLE: diff --git a/dapl/openib_common/cq.c b/dapl/openib_common/cq.c index 5870991..bcfb643 100644 --- a/dapl/openib_common/cq.c +++ b/dapl/openib_common/cq.c @@ -199,6 +199,18 @@ dapls_ib_cq_alloc(IN DAPL_IA * ia_ptr, evd_ptr->ib_cq_handle->tp = &ia_ptr->hca_ptr->ib_trans; evd_ptr->ib_cq_handle->evd = evd_ptr; +#ifdef _OPENIB_MCM_ + /* shadow support for TX, MPXYD */ + if (ia_ptr->hca_ptr->ib_trans.scif_ep) { + ret = dapli_mix_cq_create(evd_ptr->ib_cq_handle, *cqlen); + if (ret) + goto err; + + /* cross-socket: shadow both RX and TX, no IB CQ on MIC */ + if (MXS_EP(&ia_ptr->hca_ptr->ib_trans.addr)) + return DAT_SUCCESS; + } +#endif if (!evd_ptr->cno_ptr) channel = ibv_create_comp_channel(ia_ptr->hca_ptr->ib_hca_handle); else @@ -220,15 +232,6 @@ dapls_ib_cq_alloc(IN DAPL_IA * ia_ptr, /* update with returned cq entry size */ *cqlen = evd_ptr->ib_cq_handle->ib_cq->cqe; -#ifdef _OPENIB_MCM_ - /* shadow support, MPXYD */ - if (ia_ptr->hca_ptr->ib_trans.scif_ep) { - ret = dapli_mix_cq_create(evd_ptr->ib_cq_handle); - if (ret) - goto err; - } -#endif - dapl_dbg_log(DAPL_DBG_TYPE_UTIL, "dapls_ib_cq_alloc: new_cq %p cqlen=%d \n", evd_ptr->ib_cq_handle, *cqlen); @@ -236,6 +239,10 @@ dapls_ib_cq_alloc(IN DAPL_IA * ia_ptr, return DAT_SUCCESS; err: + dapl_log(DAPL_DBG_TYPE_ERR, + "ib_cq_alloc ERR (%d): new_cq %p cqlen=%d ret %d %s\n", + evd_ptr->ib_cq_handle, *cqlen, ret, strerror(errno)); + if (evd_ptr->ib_cq_handle) dapl_os_free(evd_ptr->ib_cq_handle, sizeof(struct dcm_ib_cq)); @@ -269,6 +276,19 @@ DAT_RETURN dapls_ib_cq_free(IN DAPL_IA * ia_ptr, IN DAPL_EVD * evd_ptr) struct ibv_comp_channel *channel; if (evd_ptr->ib_cq_handle != IB_INVALID_HANDLE) { + +#ifdef _OPENIB_MCM_ + /* shadow support, MPXYD */ + if (ia_ptr->hca_ptr->ib_trans.scif_ep) { + dapli_mix_cq_free(evd_ptr->ib_cq_handle); + if (!evd_ptr->ib_cq_handle->ib_cq) { + dapl_os_free(evd_ptr->ib_cq_handle, + sizeof(struct dcm_ib_cq)); + evd_ptr->ib_cq_handle = IB_INVALID_HANDLE; + return DAT_SUCCESS; + } + } +#endif /* pull off CQ and EVD entries and toss */ while (ibv_poll_cq(evd_ptr->ib_cq_handle->ib_cq, 1, &wc) == 1) ; while (dapl_evd_dequeue(evd_ptr, &event) == DAT_SUCCESS) ; @@ -278,11 +298,7 @@ DAT_RETURN dapls_ib_cq_free(IN DAPL_IA * ia_ptr, IN DAPL_EVD * evd_ptr) return (dapl_convert_errno(errno, "ibv_destroy_cq")); if (!evd_ptr->cno_ptr) ibv_destroy_comp_channel(channel); -#ifdef _OPENIB_MCM_ - /* shadow support, MPXYD */ - if (ia_ptr->hca_ptr->ib_trans.scif_ep) - dapli_mix_cq_free(evd_ptr->ib_cq_handle); -#endif + dapl_os_free(evd_ptr->ib_cq_handle, sizeof(struct dcm_ib_cq)); evd_ptr->ib_cq_handle = IB_INVALID_HANDLE; } @@ -300,11 +316,12 @@ dapls_evd_dto_wakeup(IN DAPL_EVD * evd_ptr) dapl_os_wait_object_wakeup(&evd_ptr->wait_object); #ifdef _OPENIB_MCM_ - if ((evd_ptr->ib_cq_handle->tp->scif_ep) && - (evd_ptr->ib_cq_handle->type & DCM_CQ_SND)) + if (evd_ptr->ib_cq_handle->tp->scif_ep && + ((evd_ptr->ib_cq_handle->type & DCM_CQ_SND) || + (!evd_ptr->ib_cq_handle->ib_cq))) { dapl_os_wait_object_wakeup(&evd_ptr->wait_object); + } #endif - /* otherwise, no wake up mechanism */ return DAT_SUCCESS; } @@ -345,7 +362,7 @@ dapls_wait_comp_channel(IN struct ibv_comp_channel *channel, IN uint32_t timeout DAT_RETURN dapls_evd_dto_wait(IN DAPL_EVD * evd_ptr, IN uint32_t timeout) { - struct ibv_comp_channel *channel = evd_ptr->ib_cq_handle->ib_cq->channel; + struct ibv_comp_channel *channel; struct ibv_cq *ibv_cq = NULL; void *context; int status; @@ -355,11 +372,13 @@ dapls_evd_dto_wait(IN DAPL_EVD * evd_ptr, IN uint32_t timeout) evd_ptr, timeout); #ifdef _OPENIB_MCM_ - if ((evd_ptr->ib_cq_handle->tp->scif_ep) && - (evd_ptr->ib_cq_handle->type & DCM_CQ_SND)) { + if (evd_ptr->ib_cq_handle->tp->scif_ep && + ((evd_ptr->ib_cq_handle->type & DCM_CQ_SND) || + (!evd_ptr->ib_cq_handle->ib_cq))) { return (dapl_os_wait_object_wait(&evd_ptr->wait_object, timeout)); } #endif + channel = evd_ptr->ib_cq_handle->ib_cq->channel; status = dapls_wait_comp_channel(channel, timeout); if (!status) { if (!ibv_get_cq_event(channel, &ibv_cq, &context)) { @@ -389,7 +408,6 @@ void dapli_cq_event_cb(struct _ib_hca_transport *tp) dapl_evd_dto_callback(tp->ib_ctx, evd->ib_cq_handle, (void*)evd); } - ibv_ack_cq_events(ibv_cq, 1); } } @@ -453,7 +471,8 @@ err: */ DAT_RETURN dapls_set_cq_notify(IN DAPL_IA * ia_ptr, IN DAPL_EVD * evd_ptr) { - if (ibv_req_notify_cq(evd_ptr->ib_cq_handle->ib_cq, 0)) + if (evd_ptr->ib_cq_handle->ib_cq && + ibv_req_notify_cq(evd_ptr->ib_cq_handle->ib_cq, 0)) return (dapl_convert_errno(errno, "notify_cq")); else return DAT_SUCCESS; @@ -480,7 +499,8 @@ DAT_RETURN dapls_ib_completion_notify(IN ib_hca_handle_t hca_handle, IN DAPL_EVD * evd_ptr, IN ib_notification_type_t type) { - if (ibv_req_notify_cq(evd_ptr->ib_cq_handle->ib_cq, type)) + if (evd_ptr->ib_cq_handle->ib_cq && + ibv_req_notify_cq(evd_ptr->ib_cq_handle->ib_cq, type)) return (dapl_convert_errno(errno, "notify_cq_type")); else return DAT_SUCCESS; @@ -511,8 +531,9 @@ DAT_RETURN dapls_ib_completion_poll(IN DAPL_HCA * hca_ptr, int ret; #ifdef _OPENIB_MCM_ - if ((evd_ptr->ib_cq_handle->tp->scif_ep) && - (evd_ptr->ib_cq_handle->type & DCM_CQ_SND)) { + if (evd_ptr->ib_cq_handle->tp->scif_ep && + ((evd_ptr->ib_cq_handle->type & DCM_CQ_SND) || + (!evd_ptr->ib_cq_handle->ib_cq))) { ret = dapli_mix_cq_poll(evd_ptr->ib_cq_handle, wc_ptr); if (ret == 1) return DAT_SUCCESS; @@ -520,7 +541,6 @@ DAT_RETURN dapls_ib_completion_poll(IN DAPL_HCA * hca_ptr, return DAT_QUEUE_EMPTY; } #endif - ret = ibv_poll_cq(evd_ptr->ib_cq_handle->ib_cq, 1, wc_ptr); if (ret == 1) return DAT_SUCCESS; diff --git a/dapl/openib_common/dapl_ib_dto.h b/dapl/openib_common/dapl_ib_dto.h index 2893e01..5e06f8e 100644 --- a/dapl/openib_common/dapl_ib_dto.h +++ b/dapl/openib_common/dapl_ib_dto.h @@ -31,13 +31,15 @@ #ifdef DAT_EXTENSIONS #include + +#define CQE_WR_TYPE_UD(id) \ + ((int)((DAPL_COOKIE *)(uintptr_t)id)->ep->param.ep_attr.service_type == (int)DAT_IB_SERVICE_TYPE_UD) +#else +#define CQE_WR_TYPE_UD(id) (0) #endif STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p); -#define CQE_WR_TYPE_UD(id) \ - (((DAPL_COOKIE *)(uintptr_t)id)->ep->qp_handle->qp->qp_type == IBV_QPT_UD) - #if defined(_OPENIB_MCM_) #define PROVIDER_NAME "MCM" #elif defined(_OPENIB_CMA_) @@ -91,8 +93,14 @@ dapls_ib_post_recv ( cookie->val.dto.size = total_len; } +#ifdef _OPENIB_MCM_ + if (ep_ptr->qp_handle->tp->scif_ep && !ep_ptr->qp_handle->qp) /* QPr shadowed on proxy */ + ret = dapli_mix_post_recv(ep_ptr->qp_handle, total_len, &wr, &bad_wr); + else + ret = ibv_post_recv(ep_ptr->qp_handle->qp, &wr, &bad_wr); +#else ret = ibv_post_recv(ep_ptr->qp_handle->qp, &wr, &bad_wr); - +#endif if (ret) return(dapl_convert_errno(errno,"ibv_recv")); @@ -130,7 +138,7 @@ dapls_ib_post_send ( remote_iov, completion_flags); #ifdef DAT_EXTENSIONS - if (ep_ptr->qp_handle->qp->qp_type != IBV_QPT_RC) + if (ep_ptr->param.ep_attr.service_type != DAT_SERVICE_TYPE_RC) return(DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP)); #endif /* setup the work request */ diff --git a/dapl/openib_common/qp.c b/dapl/openib_common/qp.c index 72a286b..046ac0b 100644 --- a/dapl/openib_common/qp.c +++ b/dapl/openib_common/qp.c @@ -66,15 +66,18 @@ dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr, #ifdef _OPENIB_CMA_ dp_ib_cm_handle_t conn; #endif - dapl_dbg_log(DAPL_DBG_TYPE_EP, - " qp_alloc: ia_ptr %p ep_ptr %p ep_ctx_ptr %p\n", - ia_ptr, ep_ptr, ep_ctx_ptr); attr = &ep_ptr->param.ep_attr; ib_pd_handle = ((DAPL_PZ *) ep_ptr->param.pz_handle)->pd_handle; rcv_evd = (DAPL_EVD *) ep_ptr->param.recv_evd_handle; req_evd = (DAPL_EVD *) ep_ptr->param.request_evd_handle; + dapl_dbg_log(DAPL_DBG_TYPE_EP, + " qp_alloc: ia %p ep %p ctx %p: SQ %d,%d evd %p - RQ %d,%d evd %p\n", + ia_ptr, ep_ptr, ep_ctx_ptr, + attr->max_request_dtos, attr->max_request_iov, req_evd, + attr->max_recv_dtos, attr->max_recv_iov, rcv_evd); + /* * DAT allows usage model of EP's with no EVD's but IB does not. * Create a CQ with zero entries under the covers to support and @@ -85,12 +88,12 @@ dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr, channel = ibv_create_comp_channel(ia_ptr->hca_ptr->ib_hca_handle); if (!channel) - return (dapl_convert_errno(ENOMEM, "create_cq_chan")); + return (dapl_convert_errno(ENOMEM, "QP create_cq_chan")); /* Call IB verbs to create CQ */ rcv_cq = dapl_os_alloc(sizeof(struct dcm_ib_cq)); if (!rcv_cq) - return (dapl_convert_errno(ENOMEM, " alloc cq")); + return (dapl_convert_errno(ENOMEM, "QP alloc cq")); dapl_os_memzero(rcv_cq, sizeof(struct dcm_ib_cq)); @@ -99,7 +102,7 @@ dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr, if (!rcv_cq->ib_cq) { ibv_destroy_comp_channel(channel); - return (dapl_convert_errno(ENOMEM, "create_cq")); + return (dapl_convert_errno(ENOMEM, "QP create_cq")); } ia_ptr->hca_ptr->ib_trans.ib_cq_empty = rcv_cq; } @@ -117,7 +120,7 @@ dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr, /* create QP object */ ep_ptr->qp_handle = dapl_os_alloc(sizeof(struct dcm_ib_qp)); if (!ep_ptr->qp_handle) - return (dapl_convert_errno(ENOMEM, "create_qp")); + return (dapl_convert_errno(errno, "create_qp")); dapl_os_memzero(ep_ptr->qp_handle, sizeof(struct dcm_ib_qp)); ep_ptr->qp_handle->tp = &ia_ptr->hca_ptr->ib_trans; @@ -163,7 +166,7 @@ dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr, qp_create.qp_context = (void *)ep_ptr; #ifdef DAT_EXTENSIONS - if (attr->service_type == DAT_IB_SERVICE_TYPE_UD) { + if ((int)attr->service_type == (int)DAT_IB_SERVICE_TYPE_UD) { #ifdef _OPENIB_CMA_ goto err; #endif @@ -198,40 +201,48 @@ dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr, qp_create.cap.max_send_wr = 1; qp_create.cap.max_send_sge = 1; } + + /* Don't create any QP if MIC xsocket, QPt and QPr both on MPXYD */ + if (!ia_ptr->hca_ptr->ib_trans.scif_ep || + (ia_ptr->hca_ptr->ib_trans.scif_ep && + !MXS_EP(&ia_ptr->hca_ptr->ib_trans.addr))) #endif - dapl_dbg_log(DAPL_DBG_TYPE_EP, - " 1 - QP_ALLOC: QPr sq %d,%d rq %d,%d\n", - qp_create.cap.max_send_wr, - qp_create.cap.max_send_sge, - qp_create.cap.max_recv_wr, - qp_create.cap.max_recv_sge); - - ep_ptr->qp_handle->qp = ibv_create_qp(ib_pd_handle, &qp_create); - if (!ep_ptr->qp_handle->qp) { - ret = errno; - goto err; + { + ep_ptr->qp_handle->qp = ibv_create_qp(ib_pd_handle, &qp_create); + if (!ep_ptr->qp_handle->qp) { + dapl_log(1," qp_alloc ERR %d %s line %d on device %s\n", + errno, strerror(errno), __LINE__ , + ibv_get_device_name(ia_ptr->hca_ptr->ib_trans.ib_dev)); + ret = errno; + goto err; + } + dapl_dbg_log(DAPL_DBG_TYPE_EP, + " QP_ALLOC: QPr 0x%x sq %d,%d rq %d,%d\n", + ep_ptr->qp_handle->qp->qp_num, + qp_create.cap.max_send_wr, + qp_create.cap.max_send_sge, + qp_create.cap.max_recv_wr, + qp_create.cap.max_recv_sge); } - dapl_dbg_log(DAPL_DBG_TYPE_EP, - " 2 - QP_ALLOC: QPr 0x%x sq %d,%d rq %d,%d\n", - ep_ptr->qp_handle->qp->qp_num, - qp_create.cap.max_send_wr, - qp_create.cap.max_send_sge, - qp_create.cap.max_recv_wr, - qp_create.cap.max_recv_sge); - #ifdef _OPENIB_MCM_ /* shadow support, MPXYD */ ep_ptr->qp_handle->qp_ctx = (uint64_t)ep_ptr; - ep_ptr->qp_handle->qp_id = 0; /* ??? */ - if (ia_ptr->hca_ptr->ib_trans.scif_ep) { /* MIC: shadow on proxy node */ + ep_ptr->qp_handle->qp_id = 0; + if (ia_ptr->hca_ptr->ib_trans.scif_ep) { /* MIC: shadow QPt on proxy */ qp_create.cap.max_inline_data = 32; /* setup for bw not latency */ qp_create.cap.max_send_wr = attr->max_request_dtos; qp_create.cap.max_send_sge = attr->max_request_iov; - qp_create.cap.max_recv_wr = 1; - qp_create.cap.max_recv_sge = 2; + if (ep_ptr->qp_handle->qp) { + qp_create.cap.max_recv_wr = 1; /* MIC: unused shadow QPr on proxy */ + qp_create.cap.max_recv_sge = 1; + } else { + qp_create.cap.max_recv_wr = attr->max_recv_dtos; /* MIC: shadow QPr on proxy */ + qp_create.cap.max_recv_sge = attr->max_recv_iov; + } dapl_dbg_log(DAPL_DBG_TYPE_EP, - " 3 - QP_ALLOC: QPt (MPXYD) sq %d,%d rq %d,%d\n", + " QP_ALLOC: QPt -> (MPXYD) sq %d,%d %s rq %d,%d\n", qp_create.cap.max_send_wr, qp_create.cap.max_send_sge, + ep_ptr->qp_handle->qp ? "":"QPr", qp_create.cap.max_recv_wr, qp_create.cap.max_recv_sge); ret = dapli_mix_qp_create(ep_ptr->qp_handle, &qp_create, req_cq, rcv_cq); @@ -240,7 +251,7 @@ dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr, } else { /* NON-MIC: need QPt, in case of shadowed QP's from MIC's */ qp_create.cap.max_recv_wr = 1; - qp_create.cap.max_recv_sge = 2; + qp_create.cap.max_recv_sge = 1; ep_ptr->qp_handle->sqp = ibv_create_qp(ib_pd_handle, &qp_create); if (!ep_ptr->qp_handle->sqp) { ret = errno; @@ -253,11 +264,15 @@ dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr, goto err; } dapl_dbg_log(DAPL_DBG_TYPE_EP, - " 3 - QP_ALLOC: QPt 0x%x sq %d,%d rq %d,%d\n", + " 3 - QP_ALLOC: QP (LOCAL) QPt 0x%x sq %d,%d QPr rq %d,%d\n", ep_ptr->qp_handle->sqp->qp_num, qp_create.cap.max_send_wr, qp_create.cap.max_send_sge, qp_create.cap.max_recv_wr, qp_create.cap.max_recv_sge); } + if (!ep_ptr->qp_handle->qp) { /* QPr and QPs both shadowed */ + ep_ptr->qp_state = IBV_QPS_INIT; + return DAT_SUCCESS; + } #endif /* Setup QP attributes for INIT state on the way out */ if (dapls_modify_qp_state(ep_ptr->qp_handle->qp, @@ -280,7 +295,6 @@ err: dapl_os_free(ep_ptr->qp_handle, sizeof(struct dcm_ib_qp)); ep_ptr->qp_handle = IB_INVALID_HANDLE; - return (dapl_convert_errno(ret, "create_qp")); } @@ -313,19 +327,20 @@ DAT_RETURN dapls_ib_qp_free(IN DAPL_IA * ia_ptr, IN DAPL_EP * ep_ptr) #endif dapl_os_lock(&ep_ptr->header.lock); - if (ep_ptr->qp_handle != NULL) { + if (ep_ptr->qp_handle) { qp = ep_ptr->qp_handle->qp; dapl_os_unlock(&ep_ptr->header.lock); qp_attr.qp_state = IBV_QPS_ERR; - ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE); + if (qp) + ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE); dapls_ep_flush_cqs(ep_ptr); #ifdef _OPENIB_CMA_ rdma_destroy_qp(cm_ptr->cm_id); cm_ptr->cm_id->qp = NULL; #else - if (ibv_destroy_qp(qp)) { + if (qp && ibv_destroy_qp(qp)) { dapl_log(DAPL_DBG_TYPE_ERR, " qp_free: ibv_destroy_qp error - %s\n", strerror(errno)); @@ -339,8 +354,6 @@ DAT_RETURN dapls_ib_qp_free(IN DAPL_IA * ia_ptr, IN DAPL_EP * ep_ptr) else /* NON MIC: local shadow queue */ ibv_destroy_qp(ep_ptr->qp_handle->sqp); - - /* TODO: flush shadow CQ on MPXYD */ #endif } else { dapl_os_unlock(&ep_ptr->header.lock); diff --git a/dapl/openib_common/util.c b/dapl/openib_common/util.c index bfd4081..9bea66e 100644 --- a/dapl/openib_common/util.c +++ b/dapl/openib_common/util.c @@ -332,8 +332,7 @@ DAT_RETURN dapls_ib_query_hca(IN DAPL_HCA * hca_ptr, DAT_NAME_MAX_LENGTH - 1); ia_attr->adapter_name[DAT_NAME_MAX_LENGTH - 1] = '\0'; ia_attr->vendor_name[DAT_NAME_MAX_LENGTH - 1] = '\0'; - ia_attr->ia_address_ptr = - (DAT_IA_ADDRESS_PTR) & hca_ptr->hca_address; + ia_attr->ia_address_ptr = (DAT_IA_ADDRESS_PTR) &hca_ptr->hca_address; dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " query_hca: %s %s \n", @@ -398,6 +397,7 @@ DAT_RETURN dapls_ib_query_hca(IN DAPL_HCA * hca_ptr, (unsigned) (hca_ptr->ib_trans.guid >> 16) & 0xffff, (unsigned) (hca_ptr->ib_trans.guid >> 0) & 0xffff); #ifdef _OPENIB_MCM_ + hca_ptr->ib_trans.sys_guid = dev_attr.sys_image_guid; /* network order */ if (hca_ptr->ib_trans.scif_ep) hca_ptr->ib_trans.mode_str = "PROXY"; else