From: Arlin Davis Date: Wed, 6 Apr 2016 21:02:59 +0000 (-0700) Subject: mcm: HST->MXS IO streams can overrun MPXYD proxy-in WR queue X-Git-Tag: dapl-2.1.9-1~4 X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=79b885fac295f6dbe9d2dc772b87755ba5b90734;p=~ardavis%2Fdapl.git mcm: HST->MXS IO streams can overrun MPXYD proxy-in WR queue MPXYD proxy-in service cannot consume HST->MIC WR's fast enough on 100Gb/s fabrics and from server based clients. This results in post_send failing with DAT_INSUFFICIENT_RESOURCES. Add retry mechanism, with limited retries, for the host side mcm provider via dat_ep_post_send. Signed-off-by: Arlin Davis --- diff --git a/dapl/openib_mcm/proxy.c b/dapl/openib_mcm/proxy.c index 5abb8b1..4e05ecc 100644 --- a/dapl/openib_mcm/proxy.c +++ b/dapl/openib_mcm/proxy.c @@ -39,6 +39,7 @@ * be direct so there is no need for PI service on this MCM providers host side. */ #define MCM_MP_SIG_RATE 5 +#define MCM_MP_RETRY 100000 int mcm_send_pi(struct dcm_ib_qp *m_qp, int len, @@ -48,7 +49,7 @@ int mcm_send_pi(struct dcm_ib_qp *m_qp, struct ibv_send_wr wr_imm; struct ibv_sge sge; struct mcm_wr_rx m_wr_rx; - int i, l_len, seg_len, ret = 0, wr_idx; + int i, l_len, seg_len, retry_cnt, ret = 0, wr_idx=0; struct wrc_idata wrc; uint32_t wr_flags, l_off, r_off = 0; uint64_t l_addr; @@ -84,9 +85,22 @@ int mcm_send_pi(struct dcm_ib_qp *m_qp, dapl_os_lock(&m_qp->lock); if (((m_qp->wr_hd + 1) & m_qp->wrc_rem.wr_end) == m_qp->wr_tl) { /* full */ - ret = ENOMEM; dapl_os_unlock(&m_qp->lock); - goto bail; + retry_cnt = MCM_MP_RETRY; +retry_wr: + sched_yield(); + dapl_os_lock(&m_qp->tp->cqlock); + mcm_dto_event(m_qp->rcv_cq); /* process CQ, free WR slots */ + dapl_os_unlock(&m_qp->tp->cqlock); + + dapl_os_lock(&m_qp->lock); /* retry */ + if (((m_qp->wr_hd + 1) & m_qp->wrc_rem.wr_end) == m_qp->wr_tl) { + dapl_os_unlock(&m_qp->lock); + if (--retry_cnt) + goto retry_wr; + ret = ENOMEM; + goto bail; + } } m_qp->wr_hd = (m_qp->wr_hd + 1) & m_qp->wrc_rem.wr_end; /* move hd */ wr_idx = m_qp->wr_hd;