From a35089aad64a967cd6d973456aadb974d71c4f03 Mon Sep 17 00:00:00 2001 From: Arlin Davis Date: Thu, 18 Jul 2013 10:17:11 -0700 Subject: [PATCH] mcm: support incompatable verbs definitions inter-node within the platform OFA verbs 3.5 and 1.5.4 are incompatable so there can be no direct mappings to verbs within any MIC to Host communications. Remove all direct verbs mappings in MIX and create inline construct fuctions to convert verbs to new dat_mix_wr and dat_mix_wc types for both work requests and work completions. Signed-off-by: Arlin Davis --- dapl/openib_mcm/mix.c | 26 +++++++++- dapl/svc/mpxyd.c | 73 ++++++++++++++++++--------- dat/include/dat2/dat_mic_extensions.h | 56 ++++++++++++++++++-- 3 files changed, 127 insertions(+), 28 deletions(-) diff --git a/dapl/openib_mcm/mix.c b/dapl/openib_mcm/mix.c index 6fae0b2..f80a79a 100644 --- a/dapl/openib_mcm/mix.c +++ b/dapl/openib_mcm/mix.c @@ -34,7 +34,27 @@ #include "dapl_ep_util.h" #include "dapl_osd.h" - +static inline void const_ib_wc(struct ibv_wc *iwc, struct dat_mix_wc *mwc, int entries) +{ + int i; + + for (i=0;icq_ctx; for (i=0; iwc_cnt; i++) { + struct ibv_wc ib_wc; /* possible segmentation on mpxyd side, update length if success */ if (pmsg->wc[i].status == 0) { cookie = (DAPL_COOKIE *) (uintptr_t) pmsg->wc[i].wr_id; pmsg->wc[i].byte_len = cookie->val.dto.size; } - dapls_evd_cqe_to_event(m_cq->evd, &pmsg->wc[i]); + const_ib_wc(&ib_wc, &pmsg->wc[i], 1); + dapls_evd_cqe_to_event(m_cq->evd, &ib_wc); } return 0; diff --git a/dapl/svc/mpxyd.c b/dapl/svc/mpxyd.c index 1c1b1cd..3709a19 100644 --- a/dapl/svc/mpxyd.c +++ b/dapl/svc/mpxyd.c @@ -976,6 +976,38 @@ static inline int scif_send_msg(scif_epd_t ep, void *msg, int len) return 0; } +static inline void const_mix_wc(struct dat_mix_wc *mwc, struct ibv_wc *iwc, int entries) +{ + int i; + + for (i=0;isg_list = sg_ptr; + iwr->opcode = mwr->opcode; + iwr->send_flags = mwr->send_flags; + iwr->imm_data = mwr->imm_data; + iwr->wr.rdma.remote_addr = mwr->wr.rdma.remote_addr; + iwr->wr.rdma.rkey = mwr->wr.rdma.rkey; +} static int init_scif() { @@ -2411,6 +2443,7 @@ static int mix_cq_poll(mcm_scif_dev_t *smd, dat_mix_dto_comp_t *pmsg) { int len, ret, pcnt; struct mcm_cq *m_cq; + struct ibv_wc local_wc[DAT_MIX_WC_MAX]; /* hdr already read, get operation data */ len = sizeof(dat_mix_dto_comp_t) - sizeof(dat_mix_hdr_t); @@ -2428,7 +2461,7 @@ static int mix_cq_poll(mcm_scif_dev_t *smd, dat_mix_dto_comp_t *pmsg) else pcnt = pmsg->wc_cnt; - pmsg->wc_cnt = ibv_poll_cq(m_cq->ib_cq, pcnt, pmsg->wc); + pmsg->wc_cnt = ibv_poll_cq(m_cq->ib_cq, pcnt, local_wc); mlog(1," completions = %d \n", pmsg->wc_cnt); if (pmsg->wc_cnt < 0) { mlog(0, " ibv_poll_cq ERR %s on cq_id 0x%x, cq_ctx %p\n", strerror(errno), pmsg->cq_id, pmsg->cq_ctx); @@ -2436,11 +2469,14 @@ static int mix_cq_poll(mcm_scif_dev_t *smd, dat_mix_dto_comp_t *pmsg) } else pmsg->hdr.status = 0; /* success, check pmsg->wc_cnt WC's */ - if (pmsg->wc_cnt) + const_mix_wc(pmsg->wc, local_wc, pmsg->wc_cnt); + + if (pmsg->wc_cnt) { mlog(1, " (ep=%d) cq %p id %d ctx %p stat %d op %d ln %d wr_id %p wc's %d verr 0x%x\n", m_cq->smd->scif_op_ep, m_cq, m_cq->cq_id, m_cq->cq_ctx, pmsg->wc[0].status, pmsg->wc[0].opcode, pmsg->wc[0].byte_len, pmsg->wc[0].wr_id, pmsg->wc_cnt, pmsg->wc[0].vendor_err); + } /* send back response, with client context */ pmsg->cq_id = m_cq->cq_id; @@ -2661,7 +2697,7 @@ resp: return (scif_send_msg(smd->scif_op_ep, (void*)pmsg, len)); } -static void mix_dto_event(struct mcm_cq *m_cq, struct ibv_wc *wc, int nc) +static void mix_dto_event(struct mcm_cq *m_cq, struct dat_mix_wc *wc, int nc) { dat_mix_dto_comp_t msg; int i; @@ -3219,7 +3255,7 @@ static void m_post_pending_wr(mcm_scif_dev_t *smd, int *data, int *events) m_wr->wr.wr_id = (uint64_t)m_wr; ret = ibv_post_send(m_qp->ib_qp, &m_wr->wr, &bad_wr); if (ret || (cn_signal && mix_eager_completion)) { - struct ibv_wc wc; + struct dat_mix_wc wc; mlog(1, " dto_event: sig %d ret %d, %s - m_wr %p, wr_id %p\n", cn_signal, ret, strerror(errno), m_wr->wr.wr_id, m_wr->org_id); @@ -3314,16 +3350,11 @@ static int m_proxy_data(mcm_scif_dev_t *smd, dat_mix_send_t *pmsg, struct mcm_qp mlog(1, " m_wr %p m_sge %p \n", m_wr, m_sge); - memcpy(&m_wr->wr.num_sge, &pmsg->wr.num_sge, 40); m_wr->org_id = pmsg->wr.wr_id; - m_wr->wr.sg_list = m_sge; - m_wr->wr.next = 0; - m_wr->wr.wr.rdma.remote_addr += total_offset; - m_wr->wr.num_sge = 0; - m_wr->wr.wr_id = 0; m_wr->w_idx = m_qp->wr_hd; m_wr->flags = 0; m_wr->context = (uint64_t)m_qp; + const_ib_wr(&m_wr->wr, &pmsg->wr, m_sge); for (i=0;iwr.num_sge;i++) { @@ -3471,16 +3502,12 @@ retry_mr: mlog(1, " next m_wr %p m_sge %p \n", m_wr, m_wr->sg); - memcpy(&m_wr->wr.num_sge, &pmsg->wr.num_sge, 40); m_wr->org_id = pmsg->wr.wr_id; - m_wr->wr.sg_list = m_wr->sg; - m_wr->wr.next = 0; - m_wr->wr.wr.rdma.remote_addr += total_offset; - m_wr->wr.num_sge = 0; - m_wr->wr.wr_id = 0; m_wr->w_idx = m_qp->wr_hd; m_wr->flags = 0; m_wr->context = (uint64_t)m_qp; + const_ib_wr(&m_wr->wr, &pmsg->wr, m_sge); + m_wr->wr.wr.rdma.remote_addr += total_offset; } } } @@ -3517,7 +3544,7 @@ retry_mr: ret = 0; bail: if (ret) { - struct ibv_wc wc; + struct dat_mix_wc wc; wc.wr_id = pmsg->wr.wr_id; wc.byte_len = 0; @@ -3550,8 +3577,8 @@ static int mix_post_send(mcm_scif_dev_t *smd, dat_mix_send_t *pmsg) } m_qp = (struct mcm_qp*)pmsg->qp_ctx; - mlog(1, " q_id %d, q_num %x ln %d, wr_id %p, sge %d, op %x flgs %x pst %d,%d cmp %d, inl %d, %s\n", - pmsg->qp_id, m_qp->ib_qp->qp_num, pmsg->len, pmsg->wr.wr_id, + mlog(1, " q_id %d, q_num %x data %d pkt %d wr_id %p, sge %d, op %x flgs %x pst %d,%d cmp %d, inl %d, %s\n", + pmsg->qp_id, m_qp->ib_qp->qp_num, pmsg->len, sizeof(dat_mix_send_t) + pmsg->len, pmsg->wr.wr_id, pmsg->wr.num_sge, pmsg->wr.opcode, pmsg->wr.send_flags, m_qp->post_cnt, m_qp->post_sig_cnt, m_qp->comp_cnt, pmsg->hdr.flags & MIX_OP_INLINE ? 1:0, pmsg->wr.opcode == IBV_WR_SEND ? "SND":"WR"); @@ -3577,9 +3604,8 @@ static int mix_post_send(mcm_scif_dev_t *smd, dat_mix_send_t *pmsg) m_wr, m_wr_prev, m_wr->sg, len, m_qp->wr_hd, m_qp->wr_tl); /* IB_WR */ - memcpy(&m_wr->wr.num_sge, &pmsg->wr.num_sge, 40); + const_ib_wr(&m_wr->wr, &pmsg->wr, m_wr->sg); m_wr->wr.sg_list = m_wr->sg; - m_wr->wr.next = 0; m_wr->wr.num_sge = len ? 1:0; /* M_WR */ @@ -3694,7 +3720,7 @@ bail: if (ret || ((m_wr->flags & M_SEND_POSTED) && (m_wr->flags & M_SEND_CN_EAGER_SIG))) { - struct ibv_wc wc; + struct dat_mix_wc wc; wc.wr_id = pmsg->wr.wr_id; wc.byte_len = len; @@ -4649,7 +4675,7 @@ void mcm_cq_event(struct mcm_cq *m_cq) void *cq_ctx; int i, ret, num, notify = 0; struct ibv_wc wc[DAT_MIX_WC_MAX]; - struct ibv_wc wc_ev[DAT_MIX_WC_MAX]; + struct dat_mix_wc wc_ev[DAT_MIX_WC_MAX]; mlog(2," m_cq(%p) \n", m_cq); @@ -4707,6 +4733,7 @@ retry: m_cq->prev_id = m_wr->org_id; wc_ev[num].wr_id = m_wr->org_id; wc_ev[num].status = wc[i].status; + wc_ev[num].vendor_err = wc[i].vendor_err; num++; } } diff --git a/dat/include/dat2/dat_mic_extensions.h b/dat/include/dat2/dat_mic_extensions.h index fa6e2d7..59c5766 100755 --- a/dat/include/dat2/dat_mic_extensions.h +++ b/dat/include/dat2/dat_mic_extensions.h @@ -447,6 +447,56 @@ typedef struct dat_mix_cm_event } dat_mix_cm_event_t; +typedef struct dat_mix_wc +{ + uint64_t wr_id; + uint32_t status; + uint32_t opcode; + uint32_t vendor_err; + uint32_t byte_len; + uint32_t imm_data; /* in network byte order */ + uint32_t qp_num; + uint32_t src_qp; + int wc_flags; + uint16_t pkey_index; + uint16_t slid; + uint8_t sl; + uint8_t dlid_path_bits; +} dat_mix_wc_t; + +typedef struct dat_mix_sge { + uint64_t addr; + uint32_t length; + uint32_t lkey; +} dat_mix_sge_t; + +typedef struct dat_mix_wr { + uint64_t wr_id; + struct dat_mix_send_wr *next; + struct dat_mix_sge *sg_list; + int num_sge; + uint32_t opcode; + int send_flags; + uint32_t imm_data; /* in network byte order */ + union { + struct { + uint64_t remote_addr; + uint32_t rkey; + } rdma; + struct { + uint64_t remote_addr; + uint64_t compare_add; + uint64_t swap; + uint32_t rkey; + } atomic; + struct { + struct ibv_ah *ah; + uint32_t remote_qpn; + uint32_t remote_qkey; + } ud; + } wr; +} dat_mix_wr_t; + #define DAT_MIX_WC_MAX 4 typedef struct dat_mix_dto_comp { @@ -454,7 +504,7 @@ typedef struct dat_mix_dto_comp uint64_t cq_ctx; uint32_t cq_id; uint32_t wc_cnt; - struct ibv_wc wc[DAT_MIX_WC_MAX]; + struct dat_mix_wc wc[DAT_MIX_WC_MAX]; } dat_mix_dto_comp_t; @@ -465,8 +515,8 @@ typedef struct dat_mix_send uint64_t qp_ctx; uint32_t qp_id; uint32_t len; - struct ibv_send_wr wr; - struct ibv_sge sge[DAT_MIX_SGE_MAX]; + struct dat_mix_wr wr; + struct dat_mix_sge sge[DAT_MIX_SGE_MAX]; } dat_mix_send_t; -- 2.46.0