" mr_register: ia=%p, lmr=%p va=%p ln=%d pv=0x%x\n",
ia_ptr, lmr, virt_addr, length, privileges);
- /* TODO: shared memory */
if (lmr->param.mem_type == DAT_MEM_TYPE_SHARED_VIRTUAL) {
dapl_dbg_log(DAPL_DBG_TYPE_ERR,
" mr_register_shared: NOT IMPLEMENTED\n");
/*
* CM proxy services, MCM on MIC to MPXYD via SCIF
*
- * NOTE: all sync MIX operations for now, TODO async?
- *
* MIX_IA_OPEN
*/
int dapli_mix_open(ib_hca_transport_t *tp, char *name, int port, int query_only)
return 0;
}
-/* TODO: change for aperture/mapped memory ?? optimize */
int dapli_mix_cq_poll(ib_cq_handle_t m_cq, struct ibv_wc *wc)
{
/* MPXYD will send event and update EVD, return empty to avoid unnecessary SCIF traffic */
* Separate EP's per thread too avoid locking overhead on SCIF streams
*/
-/* locate CM object by context, address of object for now--- TODO change to ID */
dp_ib_cm_handle_t dapli_mix_get_cm(ib_hca_transport_t *tp, uint64_t cm_ctx)
{
dp_ib_cm_handle_t cm = NULL;
}
/* create 2nd qp for proxy-in service, remote is not proxy-in so we need 2 QPs */
-/* TODO: MXS - MSS, CQt will process RR and RW for bi-directional, CQ overflow? create separate CQs? */
int m_qp_create_pi(mcm_scif_dev_t *smd, struct mcm_qp *m_qp)
{
struct ibv_qp_init_attr qp_create;
qp_create.cap.max_send_sge = 1;
qp_create.cap.max_inline_data = mcm_ib_inline;
qp_create.qp_type = IBV_QPT_RC;
- qp_create.send_cq = m_qp->m_cq_tx->ib_cq; /* resize CQ ? */
+ qp_create.send_cq = m_qp->m_cq_tx->ib_cq; /* signal rate adjusted to avoid CQ overrun */
qp_create.recv_cq = m_qp->m_cq_rx->ib_cq;
qp_create.qp_context = (void *)m_qp;
return 0;
}
-#ifdef MCM_CQ_TO_RX
-/* move m_cq from cqlist to cqrlist for PI service */
-static void m_cq_to_rx(mcm_scif_dev_t *smd, struct mcm_cq *m_cq)
-{
- mpxy_lock(&smd->cqlock);
- remove_entry(&m_cq->entry);
- insert_tail(&m_cq->entry, &smd->cqrlist, m_cq);
- m_cq->cq_id = m_cq->entry.tid;
- write(smd->md->mc->rx_pipe[1], "w", sizeof("w"));
- mlog(8, " cq %p id %d moved to RX cqlist\n", m_cq, m_cq->cq_id);
- mpxy_unlock(&smd->cqlock);
-}
-#endif
-
static int m_qp_create(mcm_scif_dev_t *smd,
struct ibv_qp_init_attr *attr,
uint32_t scq_id,
(system_guid == m_cm->msg.sys_guid)) {
mlog(2, " MXS -> MXS local - MODE NOT SUPPORTED, running MXS -> MXS remote mode\n");
- /* TODO: change this to work inside box without IB QP's */
qp = m_cm->m_qp->ib_qp2;
dgid = (union ibv_gid *)m_cm->msg.daddr2.gid;
dqpn = m_cm->msg.daddr2.qpn;
if (++m_qp->sr_hd == m_qp->sr_end)
m_qp->sr_hd = 0;
- /* check, mark full ??? TODO, start hd at 1?? */
-
mpxy_unlock(&m_qp->rxlock);
return 0;
err:
struct mcm_sr *m_sr = NULL;
off_t l_off, l_off_wr, r_off;
int ret, i, l_start, l_end, l_len, sg_len, w_len, num_sge, wr_idx, wr_cnt = 0;
-#if MCM_PROFILE_DBG
- struct mcm_wr_rx *wr_last = NULL;
- int lwr_cnt = 0;
- int wr_last_idx = 0;
-
- if (!(wr_sig->flags & M_READ_POSTED)) {
- mlog(0, " RR_sig !POSTED[%d]: flgs=0x%x WR[%d,%d-%d,%d] op 0x%x prev %d\n",
- wr_sig->w_idx, wr_sig->flags, m_qp->wr_tl_r, m_qp->wr_tl_r_wt,
- wr_sig->w_idx, m_qp->wr_hd_r, wc->opcode, m_qp->wt_last_sig);
- }
-#endif
+
wr_idx = m_qp->wr_tl_r_wt; /* from WT tail, process RR's posted until reaching wr_last */
while (m_qp->pi_rr_cnt) { /* RR's pending */
wr_rx = (struct mcm_wr_rx *)(m_qp->wrc.wr_addr + (m_qp->wrc.wr_sz * wr_idx));
if (!(wr_rx->flags & M_READ_POSTED)) {
-
-#if MCM_PROFILE_DBG
- if ((wr_rx == wr_sig) || (wr_rx->flags & M_READ_PAUSED)) {
- mlog(0, " !POSTED or PAUSE[%d]: flg=0x%x WR[%d,%d-%d,%d] %p LWR[%p->%d,%d]"
- " m_qp %p op 0x%x cnt %d,%d prev %d\n",
- wr_idx, wr_sig->flags, m_qp->wr_tl_r, m_qp->wr_tl_r_wt,
- wr_sig->w_idx, m_qp->wr_hd_r,
- wr_rx, wr_last, wr_last ? wr_last->w_idx:0, wr_last_idx, m_qp,
- wc->opcode, wr_cnt, lwr_cnt, m_qp->wt_last_sig);
- if (!m_qp->wt_err) { /* first error dump list */
- wr_idx = m_qp->wr_tl_r;
- while (m_qp->pi_rr_cnt) {
- wr_rx = (struct mcm_wr_rx *)(m_qp->wrc.wr_addr + (m_qp->wrc.wr_sz * wr_idx));
- mlog(0, " wr[%d] %p RR(%d,%d,%d) flg %x tl %d tl_wt %d hd %d\n",
- wr_rx->w_idx, wr_rx, m_qp->post_cnt_rr, m_qp->stall_cnt_rr,
- m_qp->pi_rr_cnt, wr_rx->flags, m_qp->wr_tl_r, m_qp->wr_tl_r_wt,
- m_qp->wr_hd_r);
-
- if (wr_idx == m_qp->wr_hd_r)
- break;
-
- wr_idx = (wr_idx + 1 ) & m_qp->wrc.wr_end; /* next */
- }
- m_qp->wt_err = 1;
- }
- break;
- }
- lwr_cnt++;
- wr_last = wr_rx;
- m_qp->wt_last_sig = wr_sig->w_idx;
-#endif
/* reached RR signaled marker, or head pointer */
if (wr_idx == wr_sig->w_idx || wr_idx == m_qp->wr_hd_r)
break;
wr_rx->flags |= M_READ_WRITE_TO;
m_qp->post_cnt_wt++;
-#if MCM_PROFILE_DBG
- wr_last = wr_rx;
- wr_last_idx = wr_idx;
-#endif
/* reached RR signaled marker, or head */
if (wr_idx == wr_sig->w_idx || wr_idx == m_qp->wr_hd_r)
break;
wr_idx = (wr_idx + 1) & m_qp->wrc.wr_end; /* next WR */
}
-#if MCM_PROFILE_DBG
- m_qp->wt_last_sig = wr_sig->w_idx;
-#endif
-
write(smd->md->mc->rx_pipe[1], "w", sizeof "w"); /* signal rx_thread */
return;
bail:
/* RR has completed, forward segment to final dst address via SCIF_sendto */
void m_pi_req_event(struct mcm_qp *m_qp, struct mcm_wr_rx *wr_rx, struct ibv_wc *wc, int type)
{
-#if MCM_PROFILE_DBG
- uint32_t delay, pi_ts = mcm_ts_us();
-#endif
mlog(4, " WR_rx[%d] %p %s complete po-addr=%p ln=%d, key=%x ctx=%Lx\n",
wr_rx->w_idx, wr_rx,
wc->opcode == IBV_WC_RDMA_READ ? "RR":"RW_IMM WC",
wr_rx->sg[0].lkey, wr_rx->context);
mpxy_lock(&m_qp->rxlock);
-
-#if MCM_PROFILE_DBG
- delay = mcm_ts_us() - pi_ts;
- if (delay > 200)
- mlog(0, " DELAYED (%d): WR_rx[%d] %p %s complete po-addr=%p ln=%d, key=%x ctx=%Lx\n",
- delay, wr_rx->w_idx, wr_rx,
- wc->opcode == IBV_WC_RDMA_READ ? "RR":"RW_IMM WC",
- wr_rx->sg[0].addr, wr_rx->sg[0].length,
- wr_rx->sg[0].lkey, wr_rx->context);
-#endif
if (wc->status && (wc->status != IBV_WC_WR_FLUSH_ERR)) {
char *sbuf = (char*)wr_rx->sg[1].addr;
ib_wr.wr.rdma.rkey, ib_wr.sg_list->addr, ib_wr.sg_list->lkey,
m_qp->wr_tl_r, m_qp->wr_hd_r, wr_rx->m_idx);
-#if MCM_PROFILE_DBG
- if (m_qp->pi_rr_cnt == 1) {
- m_qp->rr_last = wr_rx;
- m_qp->rr_last_idx = wr_rx->w_idx;
- } else if ((m_qp->rr_last == wr_rx) || (((m_qp->rr_last_idx + 1) != wr_rx->w_idx) && wr_rx->w_idx)) {
- mlog(0, " ERR: WR Order? wr[%d] %p RR(%d,%d,%d):"
- " flgs %x tl %d tl_wt %d hd %d last idx (flg %x op %x) %d %d != %d-1\n",
- wr_rx->w_idx, wr_rx, m_qp->post_cnt_rr, m_qp->stall_cnt_rr, m_qp->pi_rr_cnt,
- wr_rx->flags, m_qp->wr_tl_r, m_qp->wr_tl_r_wt, m_qp->wr_hd_r,
- m_qp->rr_last->flags, m_qp->rr_last->wr.opcode,
- m_qp->rr_last_idx_sav, m_qp->rr_last_idx, wr_rx->w_idx);
- mlog(0, " ERR: WR[%d] %p RR (%d,%d,%d,%d):"
- " flgs 0x%x ln %d r_addr,key %Lx %x to l_addr,key %Lx %x"
- " tl %d w_tl %d hd %d\n",
- wr_rx->w_idx, wr_rx, m_qp->pi_rr_cnt,
- m_qp->pi_rw_cnt, m_qp->post_sig_cnt, m_qp->stall_cnt_rr,
- ib_wr.send_flags, l_len, ib_wr.wr.rdma.remote_addr,
- ib_wr.wr.rdma.rkey, ib_wr.sg_list->addr, ib_wr.sg_list->lkey,
- m_qp->wr_tl_r, m_qp->wr_tl_r_wt, m_qp->wr_hd_r);
-
- m_qp->rr_last = wr_rx;
- m_qp->rr_last_idx_sav = m_qp->rr_last_idx;
- m_qp->rr_last_idx = wr_rx->w_idx;
- } else {
- m_qp->rr_last = wr_rx;
- m_qp->rr_last_idx_sav = m_qp->rr_last_idx;
- m_qp->rr_last_idx = wr_rx->w_idx;
- }
-#endif
write(smd->md->mc->tx_pipe[1], "w", sizeof "w");
return;
bail:
void m_pi_rcv_event(struct mcm_qp *m_qp, wrc_idata_t *wrc)
{
mlog(8," WRC id %x, type %x, flags %x\n", wrc->id, wrc->type, wrc->flags);
-
-#if MCM_PROFILE_DBG
- if (wrc->type == M_WR_TYPE) {
- if (!m_qp->wrc_last_idx)
- m_qp->wrc_last_idx = wrc->id;
- else if ((m_qp->wrc_last_idx == wrc->id) ||
- (((m_qp->wrc_last_idx + 1) != wrc->id) && (wrc->id != 0))) {
- mlog(0," ERR: WR_id %d type %x flgs %x id %d != %d + 1\n",
- wrc->id, wrc->type, wrc->flags, wrc->id, m_qp->wrc_last_idx);
- m_qp->wrc_last_idx = wrc->id;
- } else
- m_qp->wrc_last_idx = wrc->id;
- } else {
- if (!m_qp->wrcc_last_idx)
- m_qp->wrcc_last_idx = wrc->id;
- else if ((m_qp->wrcc_last_idx == wrc->id) ||
- (((m_qp->wrcc_last_idx + 1) != wrc->id) && (wrc->id != 0))) {
- mlog(0," ERR: WC_id %d type %x flgs %x id %d != %d + 1\n",
- wrc->id, wrc->type, wrc->flags, wrc->id, m_qp->wrcc_last_idx);
- m_qp->wrcc_last_idx = wrc->id;
- } else
- m_qp->wrcc_last_idx = wrc->id;
- }
-#endif
if (wrc->type == M_WR_TYPE) {
struct mcm_wr_rx *wr_rx;
ntohll(m_cm->msg.sys_guid), ntohll(system_guid),
mcm_map_str(m_cm->msg.daddr1.ep_map));
- return 0; /* TODO */
+ return 0;
}
/*
struct mcm_wr *m_wr;
struct ibv_send_wr *bad_wr;
int ret, wr_idx, wr_max, poll_cnt, cn_signal;
-#if MCM_PROFILE_DBG
- int first = 0, last = 0;
-#endif
mpxy_lock(&m_qp->txlock);
if ((m_qp->wr_tl == m_qp->wr_hd) ||
mpxy_unlock(&m_qp->txlock);
return;
}
-
-#if MCM_PROFILE_DBG
- wr_max = 40;
- wr_idx = m_qp->wr_tl_rf;
-
- /* wait for all FS to LS before posting, defer IB RW-RR traffic on first IO */
- while (wr_max) {
- m_wr = (struct mcm_wr *)(m_qp->wr_buf + (m_qp->wr_sz * wr_idx));
- if ((m_wr->flags & M_READ_FROM_DONE) && !(m_wr->flags & M_SEND_POSTED)) {
- if ((m_wr->flags & M_SEND_FS)) {
- mlog(0x4, " FS: qp %p hd %d tl %d idx %d wr %p wr_id %p,"
- " addr %p sz %d flgs 0x%x\n",
- m_qp, m_qp->wr_hd,
- m_qp->wr_tl, wr_idx, m_wr, m_wr->org_id,
- m_wr->wr.sg_list ? m_wr->wr.sg_list->addr:0,
- m_wr->wr.sg_list ? m_wr->sg->length:0,
- m_wr->flags);
- first = 1;
- }
- if ((m_wr->flags & M_SEND_LS)) {
- mlog(0x10, " LS: qp %p hd %d tl %d idx %d wr %p wr_id %p,"
- " addr %p sz %d flgs 0x%x\n",
- m_qp, m_qp->wr_hd,
- m_qp->wr_tl, wr_idx, m_wr, m_wr->org_id,
- m_wr->wr.sg_list ? m_wr->wr.sg_list->addr:0,
- m_wr->wr.sg_list ? m_wr->sg->length:0,
- m_wr->flags);
- last = 1;
- break;
- }
- }
- if (wr_idx == m_qp->wr_hd)
- break;
-
- wr_idx = (wr_idx + 1) & m_qp->wr_end;
- wr_max--;
- }
-#endif
wr_max = 40;
wr_idx = m_qp->wr_tl_rf;
}
#endif
}
-#if MCM_PROFILE_DBG
- if (!first || !last) {
- if (wr_idx == m_qp->wr_hd)
- goto done;
- wr_idx = (wr_idx + 1) & m_qp->wr_end;
- continue;
- }
-#endif
if (!(m_wr->flags & M_SEND_INLINE))
MCNTR(smd->md, MCM_SCIF_READ_FROM_DONE);
m_qp->comp_cnt,
m_wr->flags & M_SEND_FS ? "FS":
(m_wr->flags & M_SEND_LS) ? "LS":"");
-#if MCM_PROFILE_DBG
- if (m_wr->flags & M_SEND_LS)
- goto done;
-#endif
}
if (!(m_wr->flags & M_SEND_POSTED)) {
struct mcm_cq *m_cq;
struct mcm_qp *m_qp;
struct mcm_fd_set *set;
- int i, time_ms, data, events, cpu_id, wr_cnt, rf_cnt, rd_cnt, smd_cnt;
+ int i, time_ms, data, events, cpu_id, smd_cnt;
char rbuf[2];
if (mcm_affinity) {
mpxy_lock(&mc->txlock);
mcm_fd_zero(set);
mcm_fd_set(mc->tx_pipe[0], set, POLLIN);
- data = 0, events = 0, wr_cnt=0, rf_cnt=0, rd_cnt=0, smd_cnt=0;
+ data = 0, events = 0, smd_cnt=0;
for (i=0;i<MCM_IB_MAX;i++) {
md = &mc->mdev[i];
if (md->ibctx == NULL)
smd = get_next_entry(&smd->entry, &md->smd_list);
}
mpxy_unlock(&md->slock);
- wr_cnt += ((uint64_t *)md->cntrs)[MCM_QP_WRITE];
- rf_cnt += ((uint64_t *)md->cntrs)[MCM_SCIF_READ_FROM];
- rd_cnt += ((uint64_t *)md->cntrs)[MCM_QP_READ_DONE];
}
time_ms = smd_cnt ? 0:-1;
mpxy_unlock(&mc->txlock);
- if (time_ms) mlog(0x10, "TX sleep WR %d RF %d RD %d\n", wr_cnt,rf_cnt,rd_cnt);
mcm_select(set, time_ms);
- if (time_ms) mlog(0x10, "TX wake WR %d RF %d RD %d\n",wr_cnt,rf_cnt,rd_cnt);
if (mcm_poll(mc->tx_pipe[0], POLLIN) == POLLIN) {
int cnt = 0;
while (read(mc->tx_pipe[0], rbuf, 1) > 0)
struct mcm_cq *m_cq;
struct mcm_fd_set *set;
char rbuf[2];
- int i, data = 0, cpu_id, time_ms, rr_cnt, wt_cnt;
+ int i, data = 0, cpu_id, time_ms;
if (mcm_affinity) {
mpxy_lock(&mc->cplock);
mpxy_lock(&mc->rxlock);
mcm_fd_zero(set);
mcm_fd_set(mc->rx_pipe[0], set, POLLIN);
- data = 0, rr_cnt = 0, wt_cnt = 0;
+ data = 0;
for (i=0;i<MCM_IB_MAX;i++) {
md = &mc->mdev[i];
if (md->ibctx == NULL)
smd = get_next_entry(&smd->entry, &md->smd_list);
}
mpxy_unlock(&md->slock);
- rr_cnt += ((uint64_t *)md->cntrs)[MCM_QP_READ];
- wt_cnt += ((uint64_t *)md->cntrs)[MCM_SCIF_WRITE_TO];
}
time_ms = data ? 0:-1;
mpxy_unlock(&mc->rxlock);
- if (time_ms) mlog(0x20, "RX sleep RR %d WT %d\n",rr_cnt, wt_cnt);
mcm_select(set, time_ms);
- if (time_ms) mlog(0x20, "RX wake RR %d WT %d\n",rr_cnt, wt_cnt);
if (mcm_poll(mc->rx_pipe[0], POLLIN) == POLLIN)
read(mc->rx_pipe[0], rbuf, 2);
}
uint32_t last_wr_sig;
uint32_t last_wr_pst;
#endif
-#if MCM_PROFILE_DBG
- int wt_err;
- int wt_last_sig;
- struct mcm_wr_rx *rr_last;
- int rr_last_idx;
- int rr_last_idx_sav;
- int wrc_last_idx;
- int wrcc_last_idx;
-#endif
+
} mcm_qp_t;
/* DAPL MCM CQ object, id in entry */
} __attribute__((packed)) dat_mix_qp_t;
/***** MIX CQ operations, create, free, poll, event *****/
-/*
- * todo, move polling WC's and notifications to aperture windows
- * Might not be needed unless signaling lot's of TX WR's
- *
- */
typedef struct dat_mix_cq
{
dat_mix_hdr_t hdr;