From 5d1f8bd9f72f47ad071586ec9caf59fe3cdfb13b Mon Sep 17 00:00:00 2001 From: Arlin Davis Date: Wed, 30 Apr 2014 11:13:35 -0700 Subject: [PATCH] mpxyd,mcm: changes for backward compatibility with older v4 MIC clients Allow mpxyd service to run with older MIC clients that support only proxy-out and not proxy-in capabilities. Define minimal and compatible versions and sync to MIC client during device open. Create and use dat_mcm_msg_compat, dat_mix_mr_compat, and dat_mix_cm_compat messages and operations with older v4 clients. Move current MIX command version to v5. Signed-off-by: Arlin Davis --- dapl/openib_mcm/cm.c | 18 +-- dapl/openib_mcm/mix.c | 16 ++- dapl/svc/mcm.c | 15 ++- dapl/svc/mix.c | 185 +++++++++++++++++++------- dapl/svc/mpxyd.c | 1 + dapl/svc/mpxyd.h | 4 + dat/include/dat2/dat_mic_extensions.h | 52 +++++++- 7 files changed, 222 insertions(+), 69 deletions(-) diff --git a/dapl/openib_mcm/cm.c b/dapl/openib_mcm/cm.c index 59e9012..35c82c4 100644 --- a/dapl/openib_mcm/cm.c +++ b/dapl/openib_mcm/cm.c @@ -630,12 +630,13 @@ static int mcm_send(ib_hca_transport_t *tp, dat_mcm_msg_t *msg, DAT_PVOID p_data goto bail; } - len = (sizeof(*msg) - DAT_MCM_PDATA_SIZE); + len = sizeof(dat_mcm_msg_t); dapl_os_memcpy(smsg, msg, len); if (p_size) { smsg->p_size = ntohs(p_size); dapl_os_memcpy(&smsg->p_data, p_data, p_size); - } + } else + smsg->p_size = 0; wr.next = NULL; wr.sg_list = &sge; @@ -646,7 +647,7 @@ static int mcm_send(ib_hca_transport_t *tp, dat_mcm_msg_t *msg, DAT_PVOID p_data if (len <= tp->max_inline_send) wr.send_flags |= IBV_SEND_INLINE; - sge.length = len + p_size; + sge.length = len; sge.lkey = tp->mr_sbuf->lkey; sge.addr = (uintptr_t)smsg; @@ -1178,7 +1179,7 @@ void mcm_connect_rtu(dp_ib_cm_handle_t cm, dat_mcm_msg_t *msg) if (MXS_EP(&cm->msg.daddr1)) { /* save PI WR info, create local WC_q, send back WC info */ mcm_ntoh_wrc(&ep->qp_handle->wrc_rem, (mcm_wrc_info_t*)cm->msg.p_proxy); - mcm_create_wc_q(ep->qp_handle, MCM_WRC_QLEN); + mcm_create_wc_q(ep->qp_handle, ep->qp_handle->wrc_rem.wr_end + 1); mcm_hton_wrc((mcm_wrc_info_t*)cm->msg.p_proxy, &ep->qp_handle->wrc); ep->qp_handle->ep_map = cm->msg.daddr1.ep_map; @@ -1186,8 +1187,7 @@ void mcm_connect_rtu(dp_ib_cm_handle_t cm, dat_mcm_msg_t *msg) if (mcm_post_rcv_wc(ep->qp_handle, MCM_WRC_QLEN)) goto bail; - dapl_log(DAPL_DBG_TYPE_CM, - "CONN_RTU: WR_rem %p sz %d, WC %p sz %d\n", + dapl_log(DAPL_DBG_TYPE_CM, "CONN_RTU: WR_rem %p sz %d, WC %p sz %d\n", ep->qp_handle->wrc_rem.wr_addr, ep->qp_handle->wrc_rem.wr_end+1, ep->qp_handle->wrc.wc_addr, @@ -1498,7 +1498,7 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data) if (MXS_EP(&cm->msg.daddr1)) { /* save PI WR info, create local WC_q, send back WC info */ mcm_ntoh_wrc(&ep->qp_handle->wrc_rem, (mcm_wrc_info_t*)cm->msg.p_proxy); - mcm_create_wc_q(ep->qp_handle, MCM_WRC_QLEN); + mcm_create_wc_q(ep->qp_handle, ep->qp_handle->wrc_rem.wr_end + 1); mcm_hton_wrc((mcm_wrc_info_t*)cm->msg.p_proxy, &ep->qp_handle->wrc); ep->qp_handle->ep_map = cm->msg.daddr1.ep_map; @@ -1507,10 +1507,12 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data) goto bail; dapl_log(DAPL_DBG_TYPE_CM, - "ACCEPT_USR: WR_rem %p sz %d, WC %p sz %d\n", + "ACCEPT_USR: WR_rem %p rkey %x sz %d, WC %p rkey %x sz %d\n", ep->qp_handle->wrc_rem.wr_addr, + ep->qp_handle->wrc_rem.wr_rkey, ep->qp_handle->wrc_rem.wr_end+1, ep->qp_handle->wrc.wc_addr, + ep->qp_handle->wrc.wc_rkey, ep->qp_handle->wrc.wc_end+1); } } diff --git a/dapl/openib_mcm/mix.c b/dapl/openib_mcm/mix.c index 4d27b53..06656dc 100644 --- a/dapl/openib_mcm/mix.c +++ b/dapl/openib_mcm/mix.c @@ -518,8 +518,8 @@ int dapli_mix_qp_free(ib_qp_handle_t m_qp) } if (msg.ver != DAT_MIX_VER || msg.op != MIX_QP_FREE || msg.flags != MIX_OP_RSP || msg.status != MIX_SUCCESS) { - dapl_log(1, " MIX msg ver %d, op %d, flags %d, or stat %d ERR \n", - msg.ver, msg.op, msg.flags, msg.status); + dapl_log(1, " MIX_QP_FREE ERR: ver %d, op %d, flags %d, or stat %d len %d\n", + msg.ver, msg.op, msg.flags, msg.status, ret); return -1; } dapl_log(DAPL_DBG_TYPE_EXTENSION," received reply on SCIF EP\n"); @@ -551,6 +551,8 @@ int dapli_mix_cq_create(ib_cq_handle_t m_cq, int cq_len) strerror(errno)); return -1; } + dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %s request on SCIF EP\n", + mix_op_str(msg.hdr.op)); /* wait for response */ ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK); @@ -595,6 +597,8 @@ int dapli_mix_cq_free(ib_cq_handle_t m_cq) dapl_log(1, " ERR: %s send on %d, ret %d, exp %d, error %s\n", mix_op_str(msg.op), mix_ep, ret, len, strerror(errno)); } + dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %s request on SCIF EP\n", + mix_op_str(msg.op)); /* response */ ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK); @@ -605,10 +609,12 @@ int dapli_mix_cq_free(ib_cq_handle_t m_cq) } if (msg.ver != DAT_MIX_VER || msg.op != MIX_CQ_FREE || msg.flags != MIX_OP_RSP || msg.status != MIX_SUCCESS) { - dapl_log(1, " MIX msg ver %d, op %d, flags %d, or stat %d ERR \n", - msg.ver, msg.op, msg.flags, msg.status); + dapl_log(1, " MIX_CQ_FREE ERR: ver %d, op %d, flags %d, or stat %d ln %d\n", + msg.ver, msg.op, msg.flags, msg.status, ret); return -1; } + dapl_log(DAPL_DBG_TYPE_EXTENSION, + " MIX_CQ_FREE: reply, proxy cq_id 0x%x\n", m_cq->cq_id); return 0; } @@ -940,7 +946,7 @@ int dapli_mix_cm_rtu_out(dp_ib_cm_handle_t m_cm) msg.cm_ctx = (uint64_t)m_cm; dapl_log(DAPL_DBG_TYPE_EXTENSION," RTU -> id 0x%x dport 0x%x, dqpn 0x%x dlid 0x%x\n", - msg.cm_id, ntohs(msg.msg.dport), ntohl(msg.msg.dqpn), ntohs(msg.msg.daddr1.lid) ); + msg.cm_id, ntohs(msg.msg.dport), ntohl(msg.msg.dqpn), ntohs(msg.msg.daddr1.lid)); len = sizeof(dat_mix_cm_t); dapl_os_lock(&m_cm->tp->lock); diff --git a/dapl/svc/mcm.c b/dapl/svc/mcm.c index e6207e7..3377e47 100644 --- a/dapl/svc/mcm.c +++ b/dapl/svc/mcm.c @@ -174,8 +174,8 @@ int mcm_init_cm_service(mcm_ib_dev_t *md) strerror(errno)); return -1; } - /* endpoint mapping hint for MIC to HCA communication */ - if (md->numa_node == md->mc->numa_node) + /* endpoint mapping hint for MIC to HCA communication, backward compatible */ + if ((md->numa_node == md->mc->numa_node) || (md->mc->ver == MIX_COMP)) md->addr.ep_map = MIC_SSOCK_DEV; else md->addr.ep_map = MIC_XSOCK_DEV; @@ -270,11 +270,11 @@ int mcm_init_cm_service(mcm_ib_dev_t *md) /* save qp_num as part of ia_address, network order */ md->addr.qpn = htonl(md->qp->qp_num); - mlog(0, " IB LID 0x%x PORT %d GID %s QPN 0x%x: mic%d -> %s - %s socket\n", + mlog(0, " IB LID 0x%x PORT %d GID %s QPN 0x%x: mic%d -> %s - %s, mic_ver %d\n", ntohs(md->addr.lid), md->port, inet_ntop(AF_INET6, md->addr.gid, gid_str, sizeof(gid_str)), md->qp->qp_num, md->mc->scif_id - 1, md->ibdev->name, - md->addr.ep_map == MIC_SSOCK_DEV ? "local":"cross"); + md->addr.ep_map == MIC_SSOCK_DEV ? "MSS":"MXS", md->mc->ver); return 0; bail: @@ -758,12 +758,13 @@ static int mcm_send(mcm_ib_dev_t *md, dat_mcm_msg_t *msg, DAT_PVOID p_data, DAT_ goto bail; } - len = (sizeof(dat_mcm_msg_t) - DAT_MCM_PDATA_SIZE); + len = sizeof(dat_mcm_msg_t); memcpy(smsg, msg, len); if (p_size) { smsg->p_size = ntohs(p_size); memcpy(&smsg->p_data, p_data, p_size); - } + } else + smsg->p_size = 0; wr.next = NULL; wr.sg_list = &sge; @@ -773,7 +774,7 @@ static int mcm_send(mcm_ib_dev_t *md, dat_mcm_msg_t *msg, DAT_PVOID p_data, DAT_ wr.send_flags = (wr.wr_id % md->signal) ? 0 : IBV_SEND_SIGNALED; wr.send_flags |= IBV_SEND_INLINE; - sge.length = len + p_size; + sge.length = len; sge.lkey = md->mr_sbuf->lkey; sge.addr = (uintptr_t)smsg; diff --git a/dapl/svc/mix.c b/dapl/svc/mix.c index db59f9b..8d19bfc 100644 --- a/dapl/svc/mix.c +++ b/dapl/svc/mix.c @@ -166,7 +166,7 @@ void mix_scif_accept(scif_epd_t listen_ep) msg.hdr.flags = MIX_OP_RSP; - if (msg.hdr.ver != DAT_MIX_VER || msg.hdr.op != MIX_IA_OPEN) { + if (msg.hdr.ver < MIX_MIN || msg.hdr.ver > MIX_MAX || msg.hdr.op != MIX_IA_OPEN) { mlog(0, " ERR: MIC client incompatible with MPXYD (exp %d,rcvd %d) or OP (exp %d,rcvd %d)\n", DAT_MIX_VER, msg.hdr.ver, msg.hdr.op, MIX_IA_OPEN); msg.hdr.ver = DAT_MIX_VER; @@ -857,6 +857,16 @@ resp: return (scif_send_msg(smd->scif_op_ep, (void*)pmsg, len)); } +typedef struct dat_mix_mr_v4 +{ + dat_mix_hdr_t hdr; + uint32_t mr_id; + uint32_t len; + uint64_t off; + uint64_t ctx; + +} dat_mix_mr_v4_t; + /* MIX_MR_CREATE: new proxy mr, insert on mr_list */ static int mix_mr_create(mcm_scif_dev_t *smd, dat_mix_mr_t *pmsg) { @@ -864,7 +874,12 @@ static int mix_mr_create(mcm_scif_dev_t *smd, dat_mix_mr_t *pmsg) struct mcm_mr *m_mr = NULL; /* hdr already read, get operation data */ - len = sizeof(dat_mix_mr_t) - sizeof(dat_mix_hdr_t); + + if (smd->md->mc->ver == MIX_COMP) + len = sizeof(dat_mix_mr_compat_t) - sizeof(dat_mix_hdr_t); + else + len = sizeof(dat_mix_mr_t) - sizeof(dat_mix_hdr_t); + ret = scif_recv(smd->scif_op_ep, ((char*)pmsg + sizeof(dat_mix_hdr_t)), len, SCIF_RECV_BLOCK); if (ret != len) { mlog(0, " ERR: ret %d, exp %d\n", ret, len); @@ -896,7 +911,11 @@ static int mix_mr_create(mcm_scif_dev_t *smd, dat_mix_mr_t *pmsg) resp: /* send back response */ pmsg->hdr.flags = MIX_OP_RSP; - len = sizeof(dat_mix_mr_t); + if (smd->md->mc->ver == MIX_COMP) + len = sizeof(dat_mix_mr_compat_t); + else + len = sizeof(dat_mix_mr_t); + return (scif_send_msg(smd->scif_op_ep, (void*)pmsg, len)); } @@ -957,7 +976,7 @@ void mix_dto_event(struct mcm_cq *m_cq, struct dat_mix_wc *wc, int nc) int i; /* send DTO events to MIC client */ - msg.hdr.ver = DAT_MIX_VER; + msg.hdr.ver = m_cq->smd->md->mc->ver; msg.hdr.op = MIX_DTO_EVENT; msg.hdr.flags = MIX_OP_REQ; msg.cq_id = m_cq->cq_id; @@ -999,7 +1018,7 @@ void mix_cm_event(mcm_cm_t *m_cm, uint32_t event) int len; /* send event to MIC client */ - msg.hdr.ver = DAT_MIX_VER; + msg.hdr.ver = m_cm->md->mc->ver; msg.hdr.op = MIX_CM_EVENT; msg.hdr.flags = MIX_OP_REQ; msg.cm_id = m_cm->cm_id; @@ -1031,11 +1050,15 @@ static int mix_cm_req_out(mcm_scif_dev_t *smd, dat_mix_cm_t *pmsg, scif_epd_t sc struct mcm_qp *m_qp = NULL; struct mcm_cm *m_cm = NULL; - /* hdr already read, get operation data */ - len = sizeof(dat_mix_cm_t) - sizeof(dat_mix_hdr_t); + /* hdr already read, get operation data, support compat mode */ + if (smd->md->mc->ver == MIX_COMP) + len = sizeof(dat_mix_cm_compat_t) - sizeof(dat_mix_hdr_t); + else + len = sizeof(dat_mix_cm_t) - sizeof(dat_mix_hdr_t); + ret = scif_recv(scif_ep, ((char*)pmsg + sizeof(dat_mix_hdr_t)), len, SCIF_RECV_BLOCK); if (ret != len) { - mlog(0, " ERR: ret %d, exp %d\n", ret, len); + mlog(0, " ERR: ret %d, exp %d ver %d\n", ret, len, smd->md->mc->ver); return ret; } @@ -1111,7 +1134,12 @@ resp: /* send back response */ pmsg->hdr.flags = MIX_OP_RSP; - len = sizeof(dat_mix_cm_t); + /* support compat mode */ + if (m_cm->md->mc->ver == MIX_COMP) + len = sizeof(dat_mix_cm_compat_t); + else + len = sizeof(dat_mix_cm_t); + return (scif_send_msg(smd->scif_op_ep, (void*)pmsg, len)); } @@ -1121,8 +1149,12 @@ static int mix_cm_disc_out(mcm_scif_dev_t *smd, dat_mix_cm_t *pmsg, scif_epd_t s int len, ret; struct mcm_cm *m_cm; - /* hdr already read, get operation data */ - len = sizeof(dat_mix_cm_t) - sizeof(dat_mix_hdr_t); + /* hdr already read, get operation data, support compat mode */ + if (smd->md->mc->ver == MIX_COMP) + len = sizeof(dat_mix_cm_compat_t) - sizeof(dat_mix_hdr_t); + else + len = sizeof(dat_mix_cm_t) - sizeof(dat_mix_hdr_t); + ret = scif_recv(scif_ep, ((char*)pmsg + sizeof(dat_mix_hdr_t)), len, SCIF_RECV_BLOCK); if (ret != len) { mlog(0, " ERR: ret %d, exp %d\n", ret, len); @@ -1149,8 +1181,12 @@ static int mix_cm_rtu_out(mcm_scif_dev_t *smd, dat_mix_cm_t *pmsg, scif_epd_t sc int len, ret; struct mcm_cm *m_cm; - /* hdr already read, get operation data */ - len = sizeof(dat_mix_cm_t) - sizeof(dat_mix_hdr_t); + /* hdr already read, get operation data, support compat mode */ + if (smd->md->mc->ver == MIX_COMP) + len = sizeof(dat_mix_cm_compat_t) - sizeof(dat_mix_hdr_t); + else + len = sizeof(dat_mix_cm_t) - sizeof(dat_mix_hdr_t); + ret = scif_recv(scif_ep, ((char*)pmsg + sizeof(dat_mix_hdr_t)), len, SCIF_RECV_BLOCK); if (ret != len) { mlog(0, " ERR: ret %d, exp %d\n", ret, len); @@ -1166,12 +1202,11 @@ static int mix_cm_rtu_out(mcm_scif_dev_t *smd, dat_mix_cm_t *pmsg, scif_epd_t sc return -1; } - mlog(2," CONNECTED: QPt 0x%x QPr 0x%x -> dport 0x%x, dqpn 0x%x dlid 0x%x psize %d %Lx\n", + mlog(2," CONNECTED: QPt 0x%x QPr 0x%x -> dport 0x%x, dqpn 0x%x dlid 0x%x %Lx\n", m_cm->m_qp?m_cm->m_qp->qp_attr2.qp_num:0, m_cm->m_qp?m_cm->m_qp->qp_attr1.qp_num:0, ntohs(m_cm->msg.dport), ntohl(m_cm->msg.dqpn), - ntohs(m_cm->msg.daddr1.lid), ntohs(m_cm->msg.p_size), - ntohll(m_cm->msg.sys_guid)); + ntohs(m_cm->msg.daddr1.lid), ntohll(m_cm->msg.sys_guid)); /* send RTU on wire */ mcm_cm_rtu_out(m_cm); @@ -1197,7 +1232,7 @@ int mix_cm_rej_in(mcm_cm_t *m_cm, dat_mcm_msg_t *pkt, int pkt_len) MCNTR(m_cm->md, MCM_CM_REJ_IN); msg.hdr.op = MIX_CM_REJECT; } - msg.hdr.ver = DAT_MIX_VER; + msg.hdr.ver = m_cm->md->mc->ver; msg.hdr.flags = MIX_OP_REQ; msg.cm_id = m_cm->cm_id; msg.cm_ctx = m_cm->cm_ctx; @@ -1219,7 +1254,12 @@ int mix_cm_rej_in(mcm_cm_t *m_cm, dat_mcm_msg_t *pkt, int pkt_len) /* clean up proxy QP, CQ resources here ??? */ - len = sizeof(dat_mix_cm_t); + /* support compat mode */ + if (m_cm->md->mc->ver == MIX_COMP) + len = sizeof(dat_mix_cm_compat_t); + else + len = sizeof(dat_mix_cm_t); + mpxy_lock(&m_cm->smd->evlock); if (scif_send_msg(m_cm->smd->scif_ev_ep, (void*)&msg, len)) { mpxy_unlock(&m_cm->smd->evlock); @@ -1250,7 +1290,7 @@ int mix_cm_rep_in(mcm_cm_t *m_cm, dat_mcm_msg_t *pkt, int pkt_len) pkt_len, ntohs(pkt->p_size), ntohll(pkt->sys_guid)); /* Forward, as is, conn_reply message to MIC client, with remote QP info */ - msg.hdr.ver = DAT_MIX_VER; + msg.hdr.ver = m_cm->md->mc->ver; msg.hdr.flags = MIX_OP_REQ; msg.hdr.op = MIX_CM_REP; msg.cm_id = m_cm->cm_id; @@ -1268,10 +1308,21 @@ int mix_cm_rep_in(mcm_cm_t *m_cm, dat_mcm_msg_t *pkt, int pkt_len) memcpy(&m_cm->msg.daddr2, &pkt->saddr2, sizeof(dat_mcm_addr_t)); mcm_ntoh_wrc(&m_cm->m_qp->wrc_rem, (mcm_wrc_info_t *)m_cm->msg.p_proxy); /* peer RI WRC info */ - mlog(2, " WRC_rem: m_qp %p - addr 0x%Lx rkey 0x%x len %d, sz %d end %d\n", - m_cm->m_qp, m_cm->m_qp->wrc_rem.wc_addr, m_cm->m_qp->wrc_rem.wc_rkey, - m_cm->m_qp->wrc_rem.wc_len, m_cm->m_qp->wrc_rem.wc_sz, - m_cm->m_qp->wrc_rem.wc_end); + mlog(2, " WRC: m_qp %p - WR 0x%Lx rkey 0x%x ln %d, sz %d end %d" + " WC 0x%Lx rkey 0x%x ln %d, sz %d end %d\n", + m_cm->m_qp, m_cm->m_qp->wrc.wr_addr, m_cm->m_qp->wrc.wr_rkey, + m_cm->m_qp->wrc.wr_len, m_cm->m_qp->wrc.wr_sz, + m_cm->m_qp->wrc.wr_end, m_cm->m_qp->wrc.wc_addr, + m_cm->m_qp->wrc.wc_rkey, m_cm->m_qp->wrc.wc_len, + m_cm->m_qp->wrc.wc_sz, m_cm->m_qp->wrc.wc_end); + + mlog(2, " WRC_rem: m_qp %p - WR 0x%Lx rkey 0x%x ln %d, sz %d end %d" + " WC 0x%Lx rkey 0x%x ln %d, sz %d end %d\n", + m_cm->m_qp, m_cm->m_qp->wrc_rem.wr_addr, m_cm->m_qp->wrc_rem.wr_rkey, + m_cm->m_qp->wrc_rem.wr_len, m_cm->m_qp->wrc_rem.wr_sz, + m_cm->m_qp->wrc_rem.wr_end, m_cm->m_qp->wrc_rem.wc_addr, + m_cm->m_qp->wrc_rem.wc_rkey, m_cm->m_qp->wrc_rem.wc_len, + m_cm->m_qp->wrc_rem.wc_sz, m_cm->m_qp->wrc_rem.wc_end); /* MXS <- MSS or HOST, fabric: TX: QP2->QP1 direct, RX: QP1<-QP2 proxy */ if ((MXS_EP(&m_cm->md->addr) && !MXS_EP(&m_cm->msg.daddr1)) && @@ -1355,7 +1406,12 @@ int mix_cm_rep_in(mcm_cm_t *m_cm, dat_mcm_msg_t *pkt, int pkt_len) goto err; } - len = sizeof(dat_mix_cm_t); + /* support compat mode */ + if (m_cm->md->mc->ver == MIX_COMP) + len = sizeof(dat_mix_cm_compat_t); + else + len = sizeof(dat_mix_cm_t); + mpxy_lock(&m_cm->smd->evlock); if (scif_send_msg(m_cm->smd->scif_ev_ep, (void*)&msg, len)) { mpxy_unlock(&m_cm->smd->evlock); @@ -1390,7 +1446,7 @@ int mix_cm_req_in(mcm_cm_t *cm, dat_mcm_msg_t *pkt, int pkt_len) mcm_pr_addrs(2, pkt, acm->state, 1); - msg.hdr.ver = DAT_MIX_VER; + msg.hdr.ver = cm->md->mc->ver; msg.hdr.flags = MIX_OP_REQ; msg.hdr.op = MIX_CM_REQ; msg.hdr.status = MIX_SUCCESS; @@ -1427,8 +1483,14 @@ int mix_cm_req_in(mcm_cm_t *cm, dat_mcm_msg_t *pkt, int pkt_len) if (pkt->p_size) memcpy(acm->msg.p_data, pkt->p_data, ntohs(pkt->p_size)); - /* forward reformated CM message info to MIX client */ - memcpy(&msg.msg, &acm->msg, sizeof(dat_mcm_msg_t)); + /* forward reformated CM message info to MIX client, support compat mode */ + if (cm->md->mc->ver == MIX_COMP) { + len = sizeof(dat_mix_cm_compat_t); + memcpy(&msg.msg, &acm->msg, sizeof(dat_mcm_msg_compat_t)); + } else { + len = sizeof(dat_mix_cm_t); + memcpy(&msg.msg, &acm->msg, sizeof(dat_mcm_msg_t)); + } acm->state = MCM_ACCEPTING; mcm_qconn(acm->smd, acm); @@ -1436,7 +1498,6 @@ int mix_cm_req_in(mcm_cm_t *cm, dat_mcm_msg_t *pkt, int pkt_len) msg.cm_ctx = (uint64_t)acm; msg.sp_ctx = cm->sp_ctx; - len = sizeof(dat_mix_cm_t); mpxy_lock(&acm->smd->evlock); if (scif_send_msg(acm->smd->scif_ev_ep, (void*)&msg, len)) { mpxy_unlock(&acm->smd->evlock); @@ -1472,14 +1533,19 @@ int mix_cm_rtu_in(mcm_cm_t *m_cm, dat_mcm_msg_t *pkt, int pkt_len) } /* Forward, as is, conn_reply message to MIC client, with remote QP info */ - msg.hdr.ver = DAT_MIX_VER; + msg.hdr.ver = m_cm->md->mc->ver; msg.hdr.flags = MIX_OP_REQ; msg.hdr.op = MIX_CM_RTU; msg.cm_id = m_cm->cm_id; msg.cm_ctx = m_cm->cm_ctx; m_cm->msg.sys_guid = pkt->sys_guid; /* save remote quid */ - len = sizeof(dat_mix_cm_t); + /* support compat mode */ + if (m_cm->md->mc->ver == MIX_COMP) + len = sizeof(dat_mix_cm_compat_t); + else + len = sizeof(dat_mix_cm_t); + mpxy_lock(&m_cm->smd->evlock); if (scif_send_msg(m_cm->smd->scif_ev_ep, (void*)&msg, len)) { mpxy_unlock(&m_cm->smd->evlock); @@ -1500,8 +1566,12 @@ static int mix_cm_rep_out(mcm_scif_dev_t *smd, dat_mix_cm_t *pmsg, scif_epd_t sc uint32_t dqpn = 0, dqpn2 = 0; uint16_t dlid = 0, dlid2 = 0; - /* hdr already read, get operation data */ - len = sizeof(dat_mix_cm_t) - sizeof(dat_mix_hdr_t); + /* hdr already read, get operation data, support compat mode */ + if (smd->md->mc->ver == MIX_COMP) + len = sizeof(dat_mix_cm_compat_t) - sizeof(dat_mix_hdr_t); + else + len = sizeof(dat_mix_cm_t) - sizeof(dat_mix_hdr_t); + ret = scif_recv(scif_ep, ((char*)pmsg + sizeof(dat_mix_hdr_t)), len, SCIF_RECV_BLOCK); if (ret != len) { mlog(0, " ERR: ret %d, exp %d\n", ret, len); @@ -1523,7 +1593,11 @@ static int mix_cm_rep_out(mcm_scif_dev_t *smd, dat_mix_cm_t *pmsg, scif_epd_t sc m_cm->cm_ctx = pmsg->sp_ctx; m_cm->p_size = ntohs(pmsg->msg.p_size); memcpy(&m_cm->p_data, &pmsg->msg.p_data, m_cm->p_size); - memcpy(&m_cm->msg, &pmsg->msg, sizeof(dat_mcm_msg_t)); + + if (smd->md->mc->ver == MIX_COMP) + memcpy(&m_cm->msg, &pmsg->msg, sizeof(dat_mcm_msg_compat_t)); + else + memcpy(&m_cm->msg, &pmsg->msg, sizeof(dat_mcm_msg_t)); /* Attach the QP for this CR */ m_cm->m_qp = mix_get_qp(smd, pmsg->qp_id); @@ -1535,13 +1609,22 @@ static int mix_cm_rep_out(mcm_scif_dev_t *smd, dat_mix_cm_t *pmsg, scif_epd_t sc m_cm->ref_cnt++; /* Passive: QP ref */ m_cm->m_qp->cm = m_cm; mcm_ntoh_wrc(&m_cm->m_qp->wrc_rem, (mcm_wrc_info_t *)m_cm->msg.p_proxy); /* save peer PI WRC info */ - mlog(2, " WRC_rem: m_qp %p - addr 0x%Lx rkey 0x%x len %d, sz %d end %d\n", - m_cm->m_qp, m_cm->m_qp->wrc_rem.wc_addr, m_cm->m_qp->wrc_rem.wc_rkey, - m_cm->m_qp->wrc_rem.wc_len, m_cm->m_qp->wrc_rem.wc_sz, - m_cm->m_qp->wrc_rem.wc_end); - mlog(8, " loc_guid %Lx, rem_guid %Lx\n", - ntohll(system_guid), ntohll(m_cm->msg.sys_guid)); + mlog(2, " WRC: m_qp %p - WR 0x%Lx rkey 0x%x ln %d, sz %d end %d" + " WC 0x%Lx rkey 0x%x ln %d, sz %d end %d\n", + m_cm->m_qp, m_cm->m_qp->wrc.wr_addr, m_cm->m_qp->wrc.wr_rkey, + m_cm->m_qp->wrc.wr_len, m_cm->m_qp->wrc.wr_sz, + m_cm->m_qp->wrc.wr_end, m_cm->m_qp->wrc.wc_addr, + m_cm->m_qp->wrc.wc_rkey, m_cm->m_qp->wrc.wc_len, + m_cm->m_qp->wrc.wc_sz, m_cm->m_qp->wrc.wc_end); + + mlog(2, " WRC_rem: m_qp %p - WR 0x%Lx rkey 0x%x ln %d, sz %d end %d" + " WC 0x%Lx rkey 0x%x ln %d, sz %d end %d\n", + m_cm->m_qp, m_cm->m_qp->wrc_rem.wr_addr, m_cm->m_qp->wrc_rem.wr_rkey, + m_cm->m_qp->wrc_rem.wr_len, m_cm->m_qp->wrc_rem.wr_sz, + m_cm->m_qp->wrc_rem.wr_end, m_cm->m_qp->wrc_rem.wc_addr, + m_cm->m_qp->wrc_rem.wc_rkey, m_cm->m_qp->wrc_rem.wc_len, + m_cm->m_qp->wrc_rem.wc_sz, m_cm->m_qp->wrc_rem.wc_end); /* MXS -> MSS or HOST, remote: need QPr1, saddr1 on mpxyd */ if ((MXS_EP(&m_cm->md->addr) && !MXS_EP(&m_cm->msg.daddr1)) && @@ -1704,8 +1787,12 @@ static int mix_cm_rej_out(mcm_scif_dev_t *smd, dat_mix_cm_t *pmsg, scif_epd_t sc int len, ret; struct mcm_cm *m_cm; - /* hdr already read, get operation data */ - len = sizeof(dat_mix_cm_t) - sizeof(dat_mix_hdr_t); + /* hdr already read, get operation data, support compat mode */ + if (smd->md->mc->ver == MIX_COMP) + len = sizeof(dat_mix_cm_compat_t) - sizeof(dat_mix_hdr_t); + else + len = sizeof(dat_mix_cm_t) - sizeof(dat_mix_hdr_t); + ret = scif_recv(scif_ep, ((char*)pmsg + sizeof(dat_mix_hdr_t)), len, SCIF_RECV_BLOCK); if (ret != len) { mlog(0, " ERR: ret %d, exp %d\n", ret, len); @@ -1722,7 +1809,10 @@ static int mix_cm_rej_out(mcm_scif_dev_t *smd, dat_mix_cm_t *pmsg, scif_epd_t sc smd->entry.tid, pmsg->cm_id, (void*)pmsg->cm_ctx, pmsg->qp_id, m_cm); /* update CM message from MIX client, send back guid */ - memcpy(&m_cm->msg, &pmsg->msg, sizeof(dat_mcm_msg_t)); + if (smd->md->mc->ver == MIX_COMP) + memcpy(&m_cm->msg, &pmsg->msg, sizeof(dat_mcm_msg_compat_t)); + else + memcpy(&m_cm->msg, &pmsg->msg, sizeof(dat_mcm_msg_t)); #ifdef MPXYD_LOCAL_SUPPORT m_cm->msg.sys_guid = system_guid; @@ -1743,12 +1833,17 @@ int mix_cm_disc_in(mcm_cm_t *m_cm) int len; /* send disconnect to MIC client */ - msg.ver = DAT_MIX_VER; + msg.ver = m_cm->md->mc->ver; msg.flags = MIX_OP_REQ; msg.op = MIX_CM_DISC; msg.req_id = m_cm->cm_id; - len = sizeof(dat_mix_hdr_t); + /* support compat mode */ + if (m_cm->md->mc->ver == MIX_COMP) + len = sizeof(dat_mix_cm_compat_t); + else + len = sizeof(dat_mix_cm_t); + mpxy_lock(&m_cm->smd->evlock); if (scif_send_msg(m_cm->smd->scif_ev_ep, (void*)&msg, len)) { return -1; @@ -1981,7 +2076,7 @@ int mix_scif_recv(mcm_scif_dev_t *smd, scif_epd_t scif_ep) len = sizeof(*phdr); phdr->ver = 0; phdr->op = 0; ret = scif_recv(scif_ep, phdr, len, SCIF_RECV_BLOCK); - if ((ret != len) || (phdr->ver != DAT_MIX_VER)) { + if ((ret != len) || (phdr->ver < MIX_MIN) || (phdr->ver > MIX_MAX)) { mlog(0, " ERR: smd %p ep %d ret %d exp %d ver %d op %s flgs %d\n", smd, scif_ep, ret, len, phdr->ver, mix_op_str(phdr->op), diff --git a/dapl/svc/mpxyd.c b/dapl/svc/mpxyd.c index 8385ccc..2b83509 100644 --- a/dapl/svc/mpxyd.c +++ b/dapl/svc/mpxyd.c @@ -634,6 +634,7 @@ mcm_scif_dev_t *mix_open_device(dat_mix_open_t *msg, scif_epd_t op_ep, scif_epd_ char value[64]; char path[64]; + mc->ver = msg->hdr.ver; mc->scif_id = node; sprintf(path, "/sys/class/mic/mic%d/device", mc->scif_id - 1); diff --git a/dapl/svc/mpxyd.h b/dapl/svc/mpxyd.h index 0095e5d..49a1f18 100644 --- a/dapl/svc/mpxyd.h +++ b/dapl/svc/mpxyd.h @@ -62,6 +62,9 @@ #define MCM_IB_INLINE 160 #define MIX_MAX_MSG_SIZE (8*1024*1024) +#define MIX_MIN 4 /* oldest version supported */ +#define MIX_COMP 4 /* compatibility version */ +#define MIX_MAX DAT_MIX_VER #define MCM_PROFILE 1 #define MCM_PROFILE_DBG 0 @@ -355,6 +358,7 @@ typedef struct mcm_scif_dev { #define MCM_IB_MAX 8 #define MCM_CLIENT_MAX 8 typedef struct mcm_client { + uint16_t ver; uint16_t scif_id; int numa_node; int cpu_mask; diff --git a/dat/include/dat2/dat_mic_extensions.h b/dat/include/dat2/dat_mic_extensions.h index 15bfe2a..a23b115 100755 --- a/dat/include/dat2/dat_mic_extensions.h +++ b/dat/include/dat2/dat_mic_extensions.h @@ -217,7 +217,7 @@ typedef struct dat_mcm_addr uint8_t qp_type; } __attribute__((packed)) dat_mcm_addr_t; -/* MCM message extended, 256 bytes */ +/* MCM message extended after existing fields, 256 bytes */ typedef struct dat_mcm_msg { uint16_t ver; @@ -236,12 +236,34 @@ typedef struct dat_mcm_msg dat_mcm_addr_t saddr2; /* QPr local, MIC or MCM on non-MIC node or MPXY */ dat_mcm_addr_t daddr1; /* QPt remote, MPXY or MCM on non-MIC node */ dat_mcm_addr_t daddr2; /* QPr remote, MIC or MCM on non-MIC node or MPXY */ - uint64_t sys_guid; /* system image guid */ - uint8_t p_proxy[DAT_MCM_PROXY_DATA]; uint8_t p_data[DAT_MCM_PDATA_SIZE]; + uint8_t p_proxy[DAT_MCM_PROXY_DATA]; + uint64_t sys_guid; /* system image guid */ } __attribute__((packed)) dat_mcm_msg_t; +/* MCM message, 208 bytes */ +typedef struct dat_mcm_msg_compat +{ + uint16_t ver; + uint16_t op; + uint16_t sport; /* src cm port */ + uint16_t dport; /* dst cm port */ + uint32_t sqpn; /* src cm qpn */ + uint32_t dqpn; /* dst cm qpn */ + uint16_t p_size; + uint32_t s_id; /* src pid */ + uint32_t d_id; /* dst pid */ + uint8_t rd_in; /* atomic_rd_in */ + uint8_t resv[5];/* Shadow QP's, 2 connections */ + dat_mcm_addr_t saddr; /* QPt local, MPXY or MCM on non-MIC node */ + dat_mcm_addr_t saddr2; /* QPr local, MIC or MCM on non-MIC node */ + dat_mcm_addr_t daddr; /* QPt remote, MPXY or MCM on non-MIC node */ + dat_mcm_addr_t daddr2; /* QPr remote, MIC or MCM on non-MIC node */ + uint8_t p_data[DAT_MCM_PDATA_SIZE]; + +} __attribute__((packed)) dat_mcm_msg_compat_t; + /***** MIC Indirect Exchange (MIX) protocol over SCIF ****/ /* Revisions: @@ -249,8 +271,9 @@ typedef struct dat_mcm_msg * v2 - Support 3 separate EP's per device (Operations/CM, unsolicited events, transmit) * v3 - reduce SGE from 7 to 4, add post_send inline support * v4 - pack all command structures, replace verbs wr/wc types with defined MIX types + * v5 - CM services with proxy_in, private data */ -#define DAT_MIX_VER 4 +#define DAT_MIX_VER 5 #define DAT_MIX_MSG_MAX 256 #define DAT_MIX_INLINE_MAX 256 #define DAT_MIX_RDMA_MAX (8*1024*1024) @@ -449,6 +472,16 @@ typedef struct dat_mix_mr } __attribute__((packed)) dat_mix_mr_t; +typedef struct dat_mix_mr_compat +{ + dat_mix_hdr_t hdr; + uint32_t mr_id; + uint32_t len; + uint64_t off; + uint64_t ctx; + +} __attribute__((packed)) dat_mix_mr_compat_t; + /***** MIX listen, status returned, no data *****/ typedef struct dat_mix_listen { @@ -533,6 +566,17 @@ typedef struct dat_mix_cm } dat_mix_cm_t; +typedef struct dat_mix_cm_compat +{ + dat_mix_hdr_t hdr; + uint64_t sp_ctx; + uint64_t cm_ctx; + uint32_t cm_id; + uint32_t qp_id; + dat_mcm_msg_compat_t msg; + +} dat_mix_cm_compat_t; + typedef struct dat_mix_cm_event { dat_mix_hdr_t hdr; -- 2.41.0