]> git.openfabrics.org - ~ardavis/dapl.git/commitdiff
mcm: HST->MXS IO streams can overrun MPXYD proxy-in WR queue
authorArlin Davis <arlin.r.davis@intel.com>
Wed, 6 Apr 2016 21:02:59 +0000 (14:02 -0700)
committerArlin Davis <arlin.r.davis@intel.com>
Wed, 6 Apr 2016 21:02:59 +0000 (14:02 -0700)
MPXYD proxy-in service cannot consume HST->MIC WR's fast
enough on 100Gb/s fabrics and from server based clients. This
results in post_send failing with DAT_INSUFFICIENT_RESOURCES.
Add retry mechanism, with limited retries, for the
host side mcm provider via dat_ep_post_send.

Signed-off-by: Arlin Davis <arlin.r.davis@intel.com>
dapl/openib_mcm/proxy.c

index 5abb8b143b5b493c0d4d5a67474a89038ff969ed..4e05eccbb0a8f8bf1b52a07e9bded9c44dc056e3 100644 (file)
@@ -39,6 +39,7 @@
  * be direct so there is no need for PI service on this MCM providers host side.
  */
 #define MCM_MP_SIG_RATE 5
+#define MCM_MP_RETRY   100000
 
 int mcm_send_pi(struct dcm_ib_qp *m_qp,
                int len,
@@ -48,7 +49,7 @@ int mcm_send_pi(struct dcm_ib_qp *m_qp,
        struct ibv_send_wr wr_imm;
        struct ibv_sge sge;
        struct mcm_wr_rx m_wr_rx;
-       int i, l_len, seg_len, ret = 0, wr_idx;
+       int i, l_len, seg_len, retry_cnt, ret = 0, wr_idx=0;
        struct wrc_idata wrc;
        uint32_t wr_flags, l_off, r_off = 0;
        uint64_t l_addr;
@@ -84,9 +85,22 @@ int mcm_send_pi(struct dcm_ib_qp *m_qp,
 
                        dapl_os_lock(&m_qp->lock);
                        if (((m_qp->wr_hd + 1) & m_qp->wrc_rem.wr_end) == m_qp->wr_tl) { /* full */
-                               ret = ENOMEM;
                                dapl_os_unlock(&m_qp->lock);
-                               goto bail;
+                               retry_cnt = MCM_MP_RETRY;
+retry_wr:
+                               sched_yield();
+                               dapl_os_lock(&m_qp->tp->cqlock);
+                               mcm_dto_event(m_qp->rcv_cq); /* process CQ, free WR slots */
+                               dapl_os_unlock(&m_qp->tp->cqlock);
+
+                               dapl_os_lock(&m_qp->lock); /* retry */
+                               if (((m_qp->wr_hd + 1) & m_qp->wrc_rem.wr_end) == m_qp->wr_tl) {
+                                       dapl_os_unlock(&m_qp->lock);
+                                       if (--retry_cnt)
+                                               goto retry_wr;
+                                       ret = ENOMEM;
+                                       goto bail;
+                               }
                        }
                        m_qp->wr_hd = (m_qp->wr_hd + 1) & m_qp->wrc_rem.wr_end; /* move hd */
                        wr_idx = m_qp->wr_hd;