From: Arlin Davis Date: Wed, 12 Aug 2015 16:46:30 +0000 (-0700) Subject: mpxyd: proxy_in data transfers can improperly start before RTU received X-Git-Tag: dapl-2.1.6-1~2 X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=b140211771b3fb212784c514c58198de22fa3dfc;p=~ardavis%2Fdapl.git mpxyd: proxy_in data transfers can improperly start before RTU received Proxy-in data transfers must be defered until RTU is received and QP is in CONN state. Otherwise, the remote PI WC address/rkey information is still unitialized. Check for initial CONN state before processing RR or WT data phase and set RR to pause state until RTU and remote PI WRC information is processed. Update pi_req_event error logging. Signed-off-by: Arlin Davis --- diff --git a/dapl/svc/mcm.c b/dapl/svc/mcm.c index f6ea700..bd3149f 100644 --- a/dapl/svc/mcm.c +++ b/dapl/svc/mcm.c @@ -1104,7 +1104,6 @@ static void mcm_process_recv(mcm_ib_dev_t *md, dat_mcm_msg_t *msg, mcm_cm_t *cm, case MCM_RTU_PENDING: /* passive */ mlog(2, "RTU_PENDING: cm %p, my_id %d, cm_id %d\n", cm, cm->entry.tid, cm->cm_id); - cm->state = MCM_CONNECTED; mpxy_unlock(&cm->lock); MCNTR(md, MCM_CM_RTU_IN); mix_cm_rtu_in(cm, msg, len); diff --git a/dapl/svc/mix.c b/dapl/svc/mix.c index b13edcb..ec715f3 100644 --- a/dapl/svc/mix.c +++ b/dapl/svc/mix.c @@ -1821,6 +1821,10 @@ int mix_cm_rtu_in(mcm_cm_t *m_cm, dat_mcm_msg_t *pkt, int pkt_len) m_cm->m_qp->wrc_rem.wc_end); } + mpxy_lock(&m_cm->lock); + m_cm->state = MCM_CONNECTED; + mpxy_unlock(&m_cm->lock); + /* Forward, as is, conn_reply message to MIC client, with remote QP info */ msg.hdr.ver = m_cm->md->mc->ver; msg.hdr.flags = MIX_OP_REQ; diff --git a/dapl/svc/mpxy_in.c b/dapl/svc/mpxy_in.c index 248ff20..adf5021 100644 --- a/dapl/svc/mpxy_in.c +++ b/dapl/svc/mpxy_in.c @@ -727,21 +727,18 @@ void m_pi_req_event(struct mcm_qp *m_qp, struct mcm_wr_rx *wr_rx, struct ibv_wc wr_rx->sg[0].lkey, wr_rx->context); if (wc->status && (wc->status != IBV_WC_WR_FLUSH_ERR)) { - char *sbuf = (char*)wr_rx->sg[1].addr; - - mlog(0," WR ERR: %s st %d, vn %x pst %d cmp %d qstate 0x%x\n", + mlog(0," WR ERR: %s st %d, vn %x rr %d wt %d WC[%d] %p %x %d\n", type == WRID_RX_RR ? "RR":"RW_IMM WC", - wc->status, wc->vendor_err, m_qp->post_cnt, - m_qp->comp_cnt, m_qp->ib_qp2->state); - mlog(0, " WR ERR: wr_rx[%d] %p laddr %p=0x%x - %p=0x%x, len=%d, lkey=%x\n", - wr_rx->w_idx, wr_rx, sbuf, sbuf[0], &sbuf[wr_rx->sg[1].length], - sbuf[wr_rx->sg[1].length], wr_rx->sg[1].length, wr_rx->sg[1].lkey); - mlog(0, " WR ERR: wr_id %Lx sge %d op %d flgs" - " %d imm 0x%x raddr %p rkey %x ln %d, laddr %p key %x ln %d\n", - wr_rx->org_id, wr_rx->wr.num_sge, - wr_rx->wr.opcode, wr_rx->wr.send_flags, wr_rx->wr.imm_data, - wr_rx->sg[0].addr, wr_rx->sg[0].lkey, wr_rx->sg[0].length, - wr_rx->sg[1].addr, wr_rx->sg[1].lkey, wr_rx->sg[1].length); + wc->status, wc->vendor_err, m_qp->post_cnt_rr, + m_qp->post_cnt_wt, m_qp->wc_hd_rem, + m_qp->wrc_rem.wc_addr, m_qp->wrc_rem.wc_rkey, + m_qp->wrc_rem.wc_sz); + mlog(0, " WR ERR: wr_rx[%d] oid %Lx sge %d op %d flgs %d" + " imm 0x%x SRC %p %x ln %d, DST %p %x ln %d\n", + wr_rx->w_idx, wr_rx->org_id, wr_rx->wr.num_sge, + wr_rx->wr.opcode, wr_rx->wr.send_flags, wr_rx->wr.imm_data, + wr_rx->sg[0].addr, wr_rx->sg[0].lkey, wr_rx->sg[0].length, + wr_rx->sg[1].addr, wr_rx->sg[1].lkey, wr_rx->sg[1].length); /* send WC with ERR to RW initiator, hold rxlock */ mpxy_lock(&m_qp->rxlock); @@ -788,6 +785,20 @@ static void m_pi_post_read(struct mcm_qp *m_qp, struct mcm_wr_rx *wr_rx) /* shared proxy-in buffer, device level serialization */ mpxy_lock(&smd->rblock); + /* Stall if data request arrives before RTU */ + if (m_qp->cm && m_qp->cm->state != MCM_CONNECTED && !m_qp->post_cnt_rr) { + mlog(1, " [%d:%d:%d] WARN: WR[%d] %p PAUSED !CONN: po-addr=%p" + " ln=%d, key=%x ctx=%Lx\n", + m_qp->smd->md->mc->scif_id, m_qp->smd->entry.tid, + m_qp->r_entry.tid, wr_rx->w_idx, wr_rx, + wr_rx->sg[0].addr, wr_rx->sg[0].length, + wr_rx->sg[0].lkey, wr_rx->context); + wr_rx->flags |= M_READ_PAUSED; + m_qp->stall_cnt_rr++; + mpxy_unlock(&smd->rblock); + return; + } + /* slice out proxy buffer for this segment */ l_start = ALIGN_64(smd->m_hd_r); if ((l_start + l_len) > smd->m_len_r) @@ -1131,18 +1142,10 @@ void m_pi_pending_wr(struct mcm_qp *m_qp, int *data) struct mcm_wr_rx *wr_rx; int wr_idx, wr_max, wr_cnt; - if (m_qp->cm && m_qp->cm->state != MCM_CONNECTED) { - if (m_qp->post_cnt_wt) { - mlog(8," !CONN: qp %p cm %p %s tl_r %d wt_tl_r %d hd_r %d pp %d st %d data %d\n", - m_qp, m_qp->cm, m_qp->cm ? mcm_state_str(m_qp->cm->state):"", - m_qp->wr_tl_r, m_qp->wr_tl_r_wt, - m_qp->wr_hd_r, m_qp->post_cnt_wt, - m_qp->stall_cnt_rr, *data); - } - return; - } - mpxy_lock(&m_qp->rxlock); + if (m_qp->cm && m_qp->cm->state != MCM_CONNECTED && !m_qp->post_cnt_rr) + goto done; + wr_max = mcm_wr_max; wr_idx = m_qp->wr_tl_r_wt; /* last write_to marker */ wr_cnt = 0;