]> git.openfabrics.org - ~ardavis/dapl.git/commitdiff
mpxyd: proxy out doesn't release proxy buffer as quickly as necessary
authorArlin Davis <arlin.r.davis@intel.com>
Tue, 18 Feb 2014 23:23:37 +0000 (15:23 -0800)
committerArlin Davis <arlin.r.davis@intel.com>
Tue, 18 Feb 2014 23:23:37 +0000 (15:23 -0800)
Change proxy-out WC processing to release the proxy-out buffer
during every event, not just consumer signaled events.
The remote proxy-in will only send WC if this WR segments has been
completely moved and is ready for reuse.

Change WR and proxy memory stall logic to limit retries to 5 seconds.
Print warning messages every 100 retries with appropriate queue info.

dapl/svc/mpxy_out.c

index b9f1966020866dd401e6dbed1e9acebf7a7d7c14..9b22327714fd87a5825ee353717b2c41371d6559 100644 (file)
@@ -89,8 +89,8 @@ err:
 /* called with smd->sblock */
 static void m_po_buf_hd(mcm_scif_dev_t *smd, int m_idx)
 {
-       mlog(8," m_buf_wc %p: hd %d tl %d buf_wc_r[%d].m_idx=0x%x\n",
-               smd->m_buf_wc, smd->m_buf_hd, smd->m_buf_tl,
+       mlog(8," m_hd 0x%Lx - m_buf_wc %p: hd %d tl %d buf_wc_r[%d].m_idx=0x%x\n",
+               smd->m_hd, smd->m_buf_wc, smd->m_buf_hd, smd->m_buf_tl,
                smd->m_buf_hd, m_idx);
 
        if (smd->m_buf_hd == smd->m_buf_tl) {
@@ -115,8 +115,8 @@ static void m_po_buf_tl(mcm_scif_dev_t *smd, int m_idx)
        if (idx == smd->m_buf_end)
                idx = 0;
 
-       mlog(8," m_idx %d m_tl %d buf_wc[%d].m_idx=%d\n",
-               m_idx, idx, idx, smd->m_buf_wc[idx].m_idx);
+       mlog(8," m_tl 0x%Lx -> m_idx 0x%x buf_wc[%d].m_idx=0x%x\n",
+               smd->m_tl, m_idx, idx, smd->m_buf_wc[idx].m_idx);
 
        /* 1st one, completed in order, move proxy buffer tl and buf wc tl */
        if (smd->m_buf_wc[idx].m_idx == m_idx) {
@@ -427,7 +427,8 @@ done:
        mpxy_unlock(&m_qp->txlock);
 
        if (smd->destroy) {
-               mlog(0, "  SMD destroy - QP %p hd %d tl %d pst %d,%d cmp %d, pending data %d, events %d\n",
+               mlog(0, " SMD destroy - QP %p hd %d tl %d pst %d,%d cmp %d,"
+                       " pending data %d, events %d\n",
                        m_qp, m_qp->wr_hd, m_qp->wr_tl,m_qp->post_cnt,
                        m_qp->post_sig_cnt, m_qp->comp_cnt, data, events);
        }
@@ -440,7 +441,7 @@ static uint32_t last_rf = 0;
 /* initiate proxy data transfer, operation channel */
 int m_po_proxy_data(mcm_scif_dev_t *smd, dat_mix_sr_t *pmsg, struct mcm_qp *m_qp)
 {
-       int len, ret, i, retries;
+       int len, ret, i, retries, wc_err = IBV_WC_GENERAL_ERR;
        off_t l_off, r_off;
        uint64_t total_offset;
        int  l_start, l_end, l_len, cacheln_off, seg_len;
@@ -504,24 +505,34 @@ retry_mr:
                                        MCNTR(smd->md, MCM_MX_MR_STALL);
                                        write(smd->md->mc->tx_pipe[1], "w", sizeof("w"));
                                }
-                               if (++retries == 100) {
-                                       mlog(0, " ERR: retries exhuasted, no proxy memory, %x hd 0x%x tl 0x%x %x,"
-                                               " need 0x%x-0x%x ln %d, retries = %d\n",
-                                               smd->m_buf, smd->m_hd, smd->m_tl, smd->m_buf + smd->m_len,
-                                               l_start, l_end, seg_len, retries);
+                               if (!(++retries % 100)) {
+                                       mlog(1, " WARN: DTO delay, no PO memory,"
+                                               " %x hd 0x%x tl 0x%x %x,"
+                                               " need 0x%x-0x%x ln %d,"
+                                               " retries = %d -> %s\n",
+                                               smd->m_buf, smd->m_hd, smd->m_tl,
+                                               smd->m_buf + smd->m_len,
+                                               l_start, l_end, seg_len, retries,
+                                               mcm_map_str(m_qp->cm->msg.daddr1.ep_map));
+                                       mcm_pr_addrs(1, &m_qp->cm->msg, m_qp->cm->state, 0);
+                               }
+                               if (retries == 500) {
                                        ret = ENOMEM;
+                                       wc_err = IBV_WC_RETRY_EXC_ERR;
                                        goto bail;
                                }
                                mpxy_unlock(&m_qp->txlock);
                                mpxy_unlock(&smd->tblock);
-                               sleep_usec(1000);
+                               sleep_usec(10000);
                                mpxy_lock(&m_qp->txlock);
                                mpxy_lock(&smd->tblock);
                                goto retry_mr;
-
-                       } else if (retries) {
-                               mlog(1, " stalled: %x hd 0x%x tl 0x%x %x got 0x%x-0x%x %d, retries %d\n",
-                                       smd->m_buf, smd->m_hd, smd->m_tl, smd->m_buf + smd->m_len,
+                       }
+                       if (retries) {
+                               mlog(1, " MEM stalled: %x hd 0x%x tl 0x%x %x"
+                                       " got 0x%x-0x%x %d, retried %d\n",
+                                       smd->m_buf, smd->m_hd, smd->m_tl,
+                                       smd->m_buf + smd->m_len,
                                        l_start, l_end, seg_len, retries);
                        }
 
@@ -529,18 +540,24 @@ retry_mr:
                        smd->m_hd = l_end;
                        mpxy_unlock(&smd->tblock);
 
-                       mlog(4, " SCIF_readfrom[%d] l_off %p, r_off %p, l_start 0x%x l_end 0x%x seg_len %d, len %d cacheln_off %d \n",
-                                i, l_off, r_off, l_start, l_end, seg_len, len, cacheln_off);
+                       mlog(4, " SCIF_readfrom[%d] l_off %p, r_off %p,"
+                               " l_start 0x%x l_end 0x%x seg_len %d,"
+                               " len %d cacheln_off %d \n",
+                                i, l_off, r_off, l_start, l_end,
+                                seg_len, len, cacheln_off);
 #if MCM_PROFILE
                        m_wr->wr.wr.atomic.swap = mcm_ts_us();
                        m_wr->wr.wr.atomic.rkey = m_qp->wr_pp;
                        if (last_rf) {
                                uint32_t now = mcm_ts_us();
                                if ((now - last_rf) > 500) {
-                                       mlog(0x10, " WARN: delayed post (%d us): WR[%d] hd %d tl %d io %d"
-                                               " pst %d,%d cmp %d wr_pp %d\n",
-                                               now - last_rf, m_wr->w_idx, m_qp->wr_hd, m_qp->wr_tl,
-                                               seg_len, m_qp->post_cnt, m_qp->post_sig_cnt,
+                                       mlog(0x10, " WARN: delayed post (%d us):"
+                                                  " WR[%d] hd %d tl %d io %d"
+                                                  " pst %d,%d cmp %d wr_pp %d\n",
+                                               now - last_rf, m_wr->w_idx,
+                                               m_qp->wr_hd, m_qp->wr_tl,
+                                               seg_len, m_qp->post_cnt,
+                                               m_qp->post_sig_cnt,
                                                m_qp->comp_cnt, m_qp->wr_pp);
                                }
                        }
@@ -575,21 +592,27 @@ retry_mr:
                        if (seg_len == smd->m_seg || i == (pmsg->wr.num_sge - 1)) {
                                l_off = m_qp->wr_off + (m_qp->wr_hd * m_qp->wr_sz);
 
-                               mlog(4, " SCIF_fence_signal[%d] l_off %p, wr_id %p, new wr_hd 0x%x wr_len %d\n",
-                                        i, l_off, pmsg->wr.wr_id, m_qp->wr_hd, m_qp->wr_len);
+                               mlog(4, " SCIF_fence_signal[%d] l_off %p, wr_id"
+                                       " %p, new wr_hd 0x%x wr_len %d\n",
+                                        i, l_off, pmsg->wr.wr_id, m_qp->wr_hd,
+                                        m_qp->wr_len);
 
-                               ret = scif_fence_signal(smd->scif_tx_ep, l_off, pmsg->wr.wr_id, 0, 0,
-                                                       SCIF_FENCE_INIT_SELF | SCIF_SIGNAL_LOCAL);
+                               ret = scif_fence_signal(smd->scif_tx_ep, l_off,
+                                                       pmsg->wr.wr_id, 0, 0,
+                                                       SCIF_FENCE_INIT_SELF |
+                                                       SCIF_SIGNAL_LOCAL);
                                if (ret) {
-                                       mlog(0," ERR: scif_fence_signal, ret %d \n", ret);
+                                       mlog(0," ERR: scif_fence_sig, ret %d \n", ret);
                                        goto bail;
                                }
                                m_qp->wr_pp++;
                                MCNTR(smd->md, MCM_SCIF_SIGNAL);
                                MCNTR(smd->md, MCM_MX_WRITE_SEG);
 
-                               /* remove IMM unless it's the last segment */
-                               /* NON-COMPLIANT: IMM segmented causes receiver RDMA length will be wrong */
+                               /* Remove IMM unless it's the last segment
+                                * NON-COMPLIANT: IMM segmented causes receiver
+                                * RDMA length will be wrong
+                                */
                                if (l_len || i != (pmsg->wr.num_sge - 1)) {
                                        if (m_wr->wr.opcode == IBV_WR_RDMA_WRITE_WITH_IMM)
                                                m_wr->wr.opcode = IBV_WR_RDMA_WRITE;
@@ -610,23 +633,27 @@ retry_mr:
                                                MCNTR(smd->md, MCM_MX_WR_STALL);
                                                write(smd->md->mc->tx_pipe[1], "w", sizeof("w"));
                                        }
-                                       if (++retries == 100) {
-                                               mlog(0, " ERR: WR retries exhausted: sz %d, hd %d tl %d io %d"
-                                                       " retried %d pst %d,%d cmp %d wr_pp %d\n",
+                                       if (!(++retries % 100)) {
+                                               mlog(1, " WARN: DTO delay: no PO WR: sz %d, hd %d tl %d io %d"
+                                                       " retried %d pst %d,%d cmp %d wr_pp %d -> %s\n",
                                                        m_qp->wr_end, m_qp->wr_hd, m_qp->wr_tl,
                                                        seg_len, retries, m_qp->post_cnt,
-                                                       m_qp->post_sig_cnt,
-                                                       m_qp->comp_cnt, m_qp->wr_pp);
+                                                       m_qp->post_sig_cnt, m_qp->comp_cnt, m_qp->wr_pp,
+                                                       mcm_map_str(m_qp->cm->msg.daddr1.ep_map));
+                                               mcm_pr_addrs(1, &m_qp->cm->msg, m_qp->cm->state, 0);
+                                       }
+                                       if (retries == 500) {
                                                ret = ENOMEM;
+                                               wc_err = IBV_WC_RETRY_EXC_ERR;
                                                goto bail;
                                        }
                                        mpxy_unlock(&m_qp->txlock);
-                                       sleep_usec(1000);
+                                       sleep_usec(10000);
                                        mpxy_lock(&m_qp->txlock);
                                }
                                if (retries) {
                                        mlog(1, " WR stalled: sz %d, hd %d tl %d io %d"
-                                               " retries %d pst %d,%d cmp %d wr_pp %d\n",
+                                               " retried %d pst %d,%d cmp %d wr_pp %d\n",
                                                m_qp->wr_end, m_qp->wr_hd, m_qp->wr_tl,
                                                seg_len, retries, m_qp->post_cnt,
                                                m_qp->post_sig_cnt, m_qp->comp_cnt, m_qp->wr_pp);
@@ -655,7 +682,7 @@ bail:
 
                wc.wr_id = pmsg->wr.wr_id;
                wc.byte_len = 0;
-               wc.status = IBV_WC_GENERAL_ERR;
+               wc.status = wc_err;
                wc.opcode = pmsg->wr.opcode == IBV_WR_SEND ? IBV_WC_SEND:IBV_WC_RDMA_WRITE;
                wc.vendor_err = ret;
                mix_dto_event(m_qp->ib_qp2->send_cq->cq_context, &wc, 1);
@@ -722,13 +749,16 @@ void m_po_wc_event(struct mcm_qp *m_qp, struct mcm_wc_rx *wc_rx, int wc_idx)
                }
        }
 
+       mlog(0x10," mb_tl %Lx->%x, m_hd %Lx wr_tl %d->%d wr_id %d wr_hd %d wc_tl %d->%d - pst %d,%d cmp %d\n",
+               m_qp->smd->m_tl, m_wr->m_idx, m_qp->smd->m_hd, m_qp->wr_tl, wc_rx->wr_tl,
+               m_wr->w_idx, m_qp->wr_hd, m_qp->wc_tl, wc_idx,
+               m_qp->post_cnt, m_qp->post_sig_cnt, m_qp->comp_cnt);
+
        /* PI peer has copy of data, free buffer slot */
-       if (m_wr->flags & M_SEND_CN_SIG) {
-               mpxy_lock(&m_qp->smd->tblock);
-               m_po_buf_tl(m_qp->smd, m_wr->m_idx); /* move proxy buffer tail */
-               m_wr->m_idx = 0;
-               mpxy_unlock(&m_qp->smd->tblock);
-       }
+       mpxy_lock(&m_qp->smd->tblock);
+       m_po_buf_tl(m_qp->smd, m_wr->m_idx); /* move proxy buffer tail */
+       m_wr->m_idx = 0;
+       mpxy_unlock(&m_qp->smd->tblock);
 
        if (m_qp->wr_addr_rem)  /* remote MXS: sync PO WR tail with remote PI WR tail */
                m_qp->wr_tl = wc_rx->wr_tl;
@@ -736,11 +766,6 @@ void m_po_wc_event(struct mcm_qp *m_qp, struct mcm_wc_rx *wc_rx, int wc_idx)
        m_qp->wc_tl = wc_idx; /* move local wc_tl, for wc_tl_rem on peer PI service */
        mpxy_unlock(&m_qp->txlock);
 
-       mlog(0x4," mb_tl %Lx to %x, m_hd %Lx wr_tl %d wr_id %d wr_hd %d - pst %d,%d cmp %d\n",
-               m_qp->smd->m_tl, m_wr->m_idx, m_qp->smd->m_hd, m_qp->wr_tl,
-               m_wr->w_idx, m_qp->wr_hd, m_qp->post_cnt, m_qp->post_sig_cnt,
-               m_qp->comp_cnt);
-
        if (event)
                mix_dto_event(m_cq, &wc_ev, 1);
 }