From: Arlin Davis Date: Mon, 29 Apr 2013 19:00:39 +0000 (-0700) Subject: mpxyd: cleanup port space, qp and cm objects X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=c35bb89ba0ecc424a331784c28db032d48641c68;p=~ardavis%2Fdapl.git mpxyd: cleanup port space, qp and cm objects Port space leak during close, and CM disconnect. Changes to link and unlink CM and QP during QP create/destruction and CM disconnect states. Signed-off-by: Arlin Davis --- diff --git a/dapl/svc/mpxyd.c b/dapl/svc/mpxyd.c index 54d9e35..1a1ed91 100644 --- a/dapl/svc/mpxyd.c +++ b/dapl/svc/mpxyd.c @@ -1215,13 +1215,13 @@ static int modify_qp(struct ibv_qp *qp_handle, ret = ibv_modify_qp(qp_handle, &qp_attr, mask); if (ret == 0) { m_qp->qp_t.cur_state = m_qp->qp_t.state = qp_state; - return 0; } else { mlog(0, " ERR (%s): l_qpn %x type %d qpn 0x%x lid 0x%x" - " port %d state %s mtu %d rd %d rnr %d sl %d\n", + " port %d state %s->%s mtu %d rd %d rnr %d sl %d\n", strerror(ret), qp_handle->qp_num, qp_handle->qp_type, ntohl(qpn), ntohs(lid), m_qp->smd->md->port, - mcm_qp_state_str(m_qp->qp_t.cur_state), qp_attr.path_mtu, + mcm_qp_state_str(m_qp->qp_t.cur_state), + mcm_qp_state_str(qp_state), qp_attr.path_mtu, qp_attr.max_dest_rd_atomic, qp_attr.min_rnr_timer, qp_attr.ah_attr.sl); } @@ -1232,8 +1232,10 @@ static int modify_qp(struct ibv_qp *qp_handle, void m_cm_free(mcm_cm_t *cm) { /* client, release local conn id port */ - if (!cm->l_ep && cm->msg.sport) - mcm_free_port(cm->md->ports, ntohs(cm->msg.sport)); + if (!cm->l_ep && cm->sid) { + mcm_free_port(cm->md->ports, cm->sid); + cm->sid = 0; + } pthread_mutex_destroy(&cm->lock); cm->smd->ref_cnt--; @@ -1264,12 +1266,14 @@ mcm_cm_t *m_cm_create(mcm_scif_dev_t *smd, mcm_qp_t *m_qp) /* ACTIVE: init source address QP info from MPXYD and MIC client */ if (m_qp) { - cm->msg.sport = htons(mcm_get_port(smd->md->ports, 0, (uint64_t)smd)); + cm->sid = mcm_get_port(smd->md->ports, 0, (uint64_t)cm); + cm->msg.sport = htons(cm->sid); if (!cm->msg.sport) { pthread_mutex_destroy(&cm->lock); goto bail; } cm->m_qp = m_qp; + m_qp->cm = cm; /* MPXYD src IB info in network order, QPt = saddr2 */ cm->msg.saddr2.qpn = htonl(m_qp->ib_qp->qp_num); /* ep */ @@ -1322,11 +1326,12 @@ static void mcm_qlisten(mcm_scif_dev_t *smd, mcm_cm_t *cm) pthread_mutex_unlock(&smd->llock); } /* dequeue listen object from listen list */ -static void mcm_dqlisten(mcm_scif_dev_t *smd, mcm_cm_t *cm, uint16_t port) +static void mcm_dqlisten(mcm_scif_dev_t *smd, mcm_cm_t *cm) { pthread_mutex_lock(&smd->llock); remove_entry(&cm->entry); - mcm_free_port(smd->md->ports, port); + mcm_free_port(smd->md->ports, cm->sid); + cm->sid = 0; m_cm_free(cm); pthread_mutex_unlock(&smd->llock); } @@ -1576,12 +1581,19 @@ static void mcm_destroy_smd(mcm_scif_dev_t *smd) mcm_qp_t *m_qp, *next_qp; mcm_cq_t *m_cq, *next_cq; + /* free cm_id port */ + if (smd->cm_id) { + mcm_free_port(smd->md->ports, smd->cm_id); + smd->cm_id = 0; + } + /* free all listen objects */ pthread_mutex_lock(&smd->llock); m_cm = get_head_entry(&smd->llist); while (m_cm) { next_cm = get_next_entry(&m_cm->entry, &smd->llist); - mcm_free_port(smd->md->ports, (uint16_t)m_cm->sid); + if (m_cm->sid) + mcm_free_port(smd->md->ports, m_cm->sid); m_cm_free(m_cm); m_cm = next_cm; } @@ -2007,10 +2019,12 @@ static int mix_listen_free(mcm_scif_dev_t *smd, dat_mix_hdr_t *pmsg) pthread_mutex_unlock(&smd->llock); if (cm) { - mcm_dqlisten(smd, cm, (uint16_t)pmsg->req_id); + mcm_dqlisten(smd, cm); pmsg->status = MIX_SUCCESS; - } else + } else { + mlog(0, " MIX_LISTEN_FREE: ERR: sid 0x%x not found\n", pmsg->req_id); pmsg->status = MIX_EINVAL; + } /* send back response */ pmsg->flags = MIX_OP_RSP; @@ -2031,8 +2045,6 @@ static int mix_listen(mcm_scif_dev_t *smd, dat_mix_listen_t *pmsg) mlog(0, " ERR: ret %d, exp %d\n", ret, len); return ret; } - mlog(1, " MIX_LISTEN: sid 0x%x, backlog %d, qpn 0x%x lid 0x%x\n", - pmsg->sid, pmsg->backlog, smd->md->qp->qp_num, ntohs(smd->md->lid)); /* create listen EP for provided SID */ lport = mcm_get_port(smd->md->ports, pmsg->sid, (uint64_t)smd); @@ -2055,8 +2067,12 @@ static int mix_listen(mcm_scif_dev_t *smd, dat_mix_listen_t *pmsg) mcm_qlisten(smd, cm); pmsg->hdr.status = MIX_SUCCESS; } - } else + } else { + mlog(1, " MIX_LISTEN: WARN smd %p sid 0x%x port->ctx %p, backlog %d, qpn 0x%x lid 0x%x EADDRINUSE\n", + smd, pmsg->sid, mcm_get_port_ctx(smd->md->ports, pmsg->sid), + pmsg->backlog, smd->md->qp->qp_num, ntohs(smd->md->lid)); pmsg->hdr.status = MIX_EADDRINUSE; + } /* send back response */ pmsg->hdr.flags = MIX_OP_RSP; @@ -2381,8 +2397,15 @@ static int mix_cq_poll(mcm_scif_dev_t *smd, dat_mix_dto_comp_t *pmsg) /* called with smd->qplist lock held */ static void m_qp_free(struct mcm_qp *m_qp) { - ibv_destroy_qp(m_qp->ib_qp); + struct ibv_qp *ib_qp = m_qp->ib_qp; + m_qp->ib_qp = NULL; + if (m_qp->cm) { /* unlink CM */ + m_qp->cm->m_qp = NULL; + m_qp->cm = NULL; + } + modify_qp(ib_qp, IBV_QPS_ERR, 0, 0, NULL); + ibv_destroy_qp(ib_qp); remove_entry(&m_qp->entry); #ifdef MCM_PROFILE @@ -2947,6 +2970,8 @@ static int mix_cm_rep_out(mcm_scif_dev_t *smd, dat_mix_cm_t *pmsg) mlog(0, " ERR: mix_get_qp, id %d, not found\n", pmsg->qp_id); return -1; } + m_cm->m_qp->cm = m_cm; + mlog(1, " REP: found cm_id %d = %p qp_id %d = %p \n", pmsg->cm_id, m_cm, pmsg->qp_id, m_cm->m_qp); @@ -2978,8 +3003,19 @@ static int mix_cm_rep_out(mcm_scif_dev_t *smd, dat_mix_cm_t *pmsg) ret = modify_qp(m_cm->m_qp->ib_qp, IBV_QPS_RTS, m_cm->msg.daddr.qpn, m_cm->msg.daddr.lid, NULL); - if (ret) + if (ret) { + mlog(0," ERR: QPt 0x%x -> d_port 0x%x, cqpn %x QPr %x lid 0x%x psize %d\n", + m_cm->m_qp->qp_t.qp_num, + ntohs(m_cm->msg.dport), ntohl(m_cm->msg.dqpn), + ntohl(m_cm->msg.daddr.qpn), ntohs(m_cm->msg.daddr.lid), + ntohs(m_cm->msg.p_size)); + + mlog(0," ERR: QPr 0x%x -> d_port 0x%x, cqpn %x QPt %x lid 0x%x\n", + m_cm->m_qp->qp_r.qp_num, + ntohs(m_cm->msg.dport), ntohl(m_cm->msg.dqpn), + ntohl(m_cm->msg.daddr2.qpn), ntohs(m_cm->msg.daddr2.lid)); return -1; + } /* send RTU on wire, monitor for retries */ m_cm->state = MCM_RTU_PENDING; @@ -3897,7 +3933,8 @@ static void mcm_cm_disc(mcm_cm_t *cm) switch (cm->state) { case MCM_CONNECTED: /* CONSUMER: move to err state to flush */ - modify_qp(cm->m_qp->ib_qp, IBV_QPS_ERR,0,0,0); + if (cm->m_qp) + modify_qp(cm->m_qp->ib_qp, IBV_QPS_ERR, 0, 0, 0); /* send DREQ, event after DREP or DREQ timeout */ cm->state = MCM_DISC_PENDING; @@ -3937,7 +3974,8 @@ static void mcm_cm_disc(mcm_cm_t *cm) case MCM_DISC_RECV: MCNTR(cm->md, MCM_CM_DREQ_IN); /* CM_THREAD: move to err state to flush */ - modify_qp(cm->m_qp->ib_qp, IBV_QPS_ERR,0,0,0); + if (cm->m_qp) + modify_qp(cm->m_qp->ib_qp, IBV_QPS_ERR, 0, 0, 0); /* DREQ received, send DREP and schedule event, finalize */ cm->msg.op = htons(MCM_DREP);