From 6312c4a8d80f086491be4b7f65120f1431b7a6cb Mon Sep 17 00:00:00 2001 From: Arlin Davis Date: Mon, 16 Jun 2014 09:31:27 -0700 Subject: [PATCH] mcm: improve logging for scale-up debug Signed-off-by: Arlin Davis --- dapl/openib_common/cq.c | 11 +++++++++-- dapl/openib_common/dapl_ib_dto.h | 5 +++-- dapl/openib_mcm/mix.c | 8 +++++--- dapl/svc/mcm.c | 18 +++++++++++------- 4 files changed, 28 insertions(+), 14 deletions(-) diff --git a/dapl/openib_common/cq.c b/dapl/openib_common/cq.c index f3e1ae9..c5a43e1 100644 --- a/dapl/openib_common/cq.c +++ b/dapl/openib_common/cq.c @@ -575,12 +575,19 @@ DAT_RETURN dapls_ib_completion_poll(IN DAPL_HCA * hca_ptr, (ep_ptr->qp_handle->ep_map == MIC_XSOCK_DEV) && (DAPL_GET_CQE_OPTYPE(wc_ptr) == OP_RDMA_WRITE_IMM)) { dapl_log(DAPL_DBG_TYPE_EP, - " MCM_ib_completion_poll: RW_imm: evd %p ep %p st %d op %s ln %d\n", + " cq_dto_event: MCM RW_pi: evd %p ep %p st %d op %s ln %d wr_id %p\n", evd_ptr, ep_ptr, DAPL_GET_CQE_STATUS(wc_ptr), DAPL_GET_CQE_OP_STR(wc_ptr), - DAPL_GET_CQE_BYTESNUM(wc_ptr)); + cookie->val.dto.size, cookie); ret = 0; /* WR RW_imm to PI, WC pending from PI */ + } else { + dapl_log(DAPL_DBG_TYPE_EP, + " cq_dto_event: MCM RW_direct: evd %p ep %p st %d op %s ln %d wr_id %p\n", + evd_ptr, ep_ptr, + DAPL_GET_CQE_STATUS(wc_ptr), + DAPL_GET_CQE_OP_STR(wc_ptr), + cookie->val.dto.size, cookie); } } #endif diff --git a/dapl/openib_common/dapl_ib_dto.h b/dapl/openib_common/dapl_ib_dto.h index 076ddea..b9e4bc5 100644 --- a/dapl/openib_common/dapl_ib_dto.h +++ b/dapl/openib_common/dapl_ib_dto.h @@ -186,8 +186,9 @@ dapls_ib_post_send ( completion_flags) ? IBV_SEND_SOLICITED : 0; dapl_log(DAPL_DBG_TYPE_EP, - " post_snd: op 0x%x flags 0x%x sglist %p, %d\n", - wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge); + " post_snd: %s ep %p op %x flgs %x sgl %p,%d ln %d wr_id %Lx\n", + PROVIDER_NAME, ep_ptr, wr.opcode, wr.send_flags, wr.sg_list, + wr.num_sge, total_len, wr.wr_id); #ifdef _OPENIB_MCM_ if (ep_ptr->qp_handle->tp->scif_ep) diff --git a/dapl/openib_mcm/mix.c b/dapl/openib_mcm/mix.c index 06656dc..c14492b 100644 --- a/dapl/openib_mcm/mix.c +++ b/dapl/openib_mcm/mix.c @@ -1056,9 +1056,6 @@ int dapli_mix_dto_event_in(ib_hca_transport_t *tp, scif_epd_t scif_ep, dat_mix_d dapl_log(DAPL_DBG_TYPE_ERR, " ERR: ret %d, exp %d, error %s\n", ret, len, strerror(errno)); return ret; } - dapl_log(DAPL_DBG_TYPE_EXTENSION, - " MIX_DTO_EVENT_IN: id %d ctx %p wc's %d, wc[0].wr_id=%Lx\n", - pmsg->cq_id, pmsg->cq_ctx, pmsg->wc_cnt, pmsg->wc[0].wr_id); /* Get cq and post DTO event with this WC entry */ m_cq = (void*)pmsg->cq_ctx; @@ -1075,9 +1072,14 @@ int dapli_mix_dto_event_in(ib_hca_transport_t *tp, scif_epd_t scif_ep, dat_mix_d return 0; } pmsg->wc[i].byte_len = cookie->val.dto.size; + dapl_log(DAPL_DBG_TYPE_EP, + " mix_dto_event: MCM evd %p ep %p wr_id=%Lx ln=%d\n", + m_cq->evd, cookie->ep, pmsg->wc[i].wr_id, + cookie->val.dto.size); } mcm_const_ib_wc(&ib_wc, &pmsg->wc[i], 1); dapls_evd_cqe_to_event(m_cq->evd, &ib_wc); + } return 0; diff --git a/dapl/svc/mcm.c b/dapl/svc/mcm.c index 5b477a6..14cb969 100644 --- a/dapl/svc/mcm.c +++ b/dapl/svc/mcm.c @@ -1287,13 +1287,13 @@ mcm_cm_t *mcm_get_cm(mcm_ib_dev_t *md, dat_mcm_msg_t *msg) mpxy_unlock(&md->slock); if (!cm && !dup) { - mlog(2, " %s - op %s [lid, port, cqp, iqp, pid]:" - " %x %x %x %x %x <- %x %x %x %x lpid %x rpid %x\n", + mlog(2, " %s - op %s [lid, port, cqp, iqp]:" + " %x %x %x %x <- %x %x %x %x lpid %x rpid %x\n", ntohs(msg->op) == MCM_REQ ? "NO LISTENER":"NO MATCH", mcm_op_str(ntohs(msg->op)), ntohs(msg->daddr1.lid), ntohs(msg->dport), ntohl(msg->dqpn), ntohl(msg->daddr1.qpn), - ntohl(msg->d_id), ntohs(msg->saddr1.lid), + ntohs(msg->saddr1.lid), ntohs(msg->sport), ntohl(msg->sqpn), ntohl(msg->saddr1.qpn), ntohl(msg->s_id), ntohl(msg->d_id)); @@ -1421,7 +1421,7 @@ int mcm_cm_rtu_out(mcm_cm_t *m_cm) MCNTR(m_cm->md, MCM_CM_RTU_OUT); - mlog(1, "[%d:%d] CONNECTED[%d]: %p 0x%x %x 0x%x %Lx %s -> 0x%x %x 0x%x %Lx %s\n", + mlog(1, "[%d:%d] CONN_EST[%d]: %p 0x%x %x 0x%x %Lx %s -> 0x%x %x 0x%x %Lx %s\n", m_cm->md->mc->scif_id, m_cm->smd->entry.tid, m_cm->md->cntrs ? (uint32_t)((uint64_t *)m_cm->md->cntrs)[MCM_CM_RTU_OUT]:0, m_cm, htons(m_cm->msg.saddr2.lid), htonl(m_cm->msg.saddr2.qpn), @@ -1477,10 +1477,12 @@ void mcm_check_timers(mcm_scif_dev_t *smd, int *timer) /* wait longer each retry */ if ((time - cm->timer)/1000 > (cm->md->rep_time << cm->retries)) { mlog(1, " CM_REQ retry %p %d [lid, port, cqp, iqp]:" - " %x %x %x %x -> %x %x %x %x Time(ms) %d > %d\n", + " %x %x %x %x %s -> %s %x %x %x %x Time %d > %d\n", cm, cm->retries+1, ntohs(cm->msg.saddr1.lid), ntohs(cm->msg.sport), ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr1.qpn), + mcm_map_str(cm->msg.saddr1.ep_map), + mcm_map_str(cm->msg.daddr1.ep_map), ntohs(cm->msg.daddr1.lid), ntohs(cm->msg.dport), ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr1.qpn), (time - cm->timer)/1000, @@ -1497,11 +1499,13 @@ void mcm_check_timers(mcm_scif_dev_t *smd, int *timer) *timer = cm->md->cm_timer; if ((time - cm->timer)/1000 > (cm->md->rtu_time << cm->retries)) { mlog(1, " CM_REPLY retry %d %s [lid, port, cqp, iqp]:" - " %x %x %x %x -> %x %x %x %x r_pid %x Time(ms) %d > %d\n", + " %x %x %x %x %s -> %s %x %x %x %x r_pid %x Time %d > %d\n", cm->retries+1, mcm_op_str(ntohs(cm->msg.op)), ntohs(cm->msg.saddr1.lid), ntohs(cm->msg.sport), ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr1.qpn), + mcm_map_str(cm->msg.saddr1.ep_map), + mcm_map_str(cm->msg.daddr1.ep_map), ntohs(cm->msg.daddr1.lid), ntohs(cm->msg.dport), ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr1.qpn), ntohl(cm->msg.d_id), @@ -1519,7 +1523,7 @@ void mcm_check_timers(mcm_scif_dev_t *smd, int *timer) *timer = cm->md->cm_timer; if ((time - cm->timer)/1000 > (mcm_dreq_ms << cm->retries)) { mlog(1, " CM_DREQ retry %d [lid, port, cqp, iqp]:" - " %x %x %x %x -> %x %x %x %x r_pid %x Time(ms) %d > %d\n", + " %x %x %x %x -> %x %x %x %x r_pid %x Time %d > %d\n", cm->retries+1, ntohs(cm->msg.saddr1.lid), ntohs(cm->msg.sport), ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr1.qpn), -- 2.41.0