From 59489448a7918ab2a1a4b9bcac7e4661cdd97a23 Mon Sep 17 00:00:00 2001 From: Arlin Davis Date: Mon, 9 Aug 2010 14:19:50 -0700 Subject: [PATCH] common: add more debug levels for cm logging DAPL_DBG_TYPE_CM_EST = 0x8000, DAPL_DBG_TYPE_CM_WARN = 0x10000 Add level for connection establishment and events and for retries/timer events. Signed-off-by: Arlin Davis --- dapl/common/dapl_ep_connect.c | 2 +- dapl/include/dapl_debug.h | 4 +++- dapl/openib_cma/cm.c | 40 ++++++++++++++++------------------- dapl/openib_scm/cm.c | 16 +++++++++++--- dapl/openib_ucm/cm.c | 34 +++++++++++++++++++++-------- 5 files changed, 60 insertions(+), 36 deletions(-) diff --git a/dapl/common/dapl_ep_connect.c b/dapl/common/dapl_ep_connect.c index 9b5829e..80afead 100755 --- a/dapl/common/dapl_ep_connect.c +++ b/dapl/common/dapl_ep_connect.c @@ -336,7 +336,7 @@ dapl_ep_connect(IN DAT_EP_HANDLE ep_handle, dapls_timer_set(ep_ptr->cxn_timer, dapls_ep_timeout, ep_ptr, timeout); - dapl_log(DAPL_DBG_TYPE_WARN, " dapl_ep_connect timeout = %d us\n", timeout); + dapl_log(DAPL_DBG_TYPE_EP, " dapl_ep_connect timeout = %d us\n", timeout); } dapl_os_unlock(&ep_ptr->header.lock); } diff --git a/dapl/include/dapl_debug.h b/dapl/include/dapl_debug.h index 37edf90..6e332e6 100644 --- a/dapl/include/dapl_debug.h +++ b/dapl/include/dapl_debug.h @@ -67,7 +67,9 @@ typedef enum DAPL_DBG_TYPE_SRQ = 0x0800, DAPL_DBG_TYPE_CNTR = 0x1000, DAPL_DBG_TYPE_CM_LIST = 0x2000, - DAPL_DBG_TYPE_THREAD = 0x4000 + DAPL_DBG_TYPE_THREAD = 0x4000, + DAPL_DBG_TYPE_CM_EST = 0x8000, + DAPL_DBG_TYPE_CM_WARN = 0x10000 } DAPL_DBG_TYPE; diff --git a/dapl/openib_cma/cm.c b/dapl/openib_cma/cm.c index 503df96..9338d55 100644 --- a/dapl/openib_cma/cm.c +++ b/dapl/openib_cma/cm.c @@ -406,14 +406,14 @@ static void dapli_cm_active_cb(struct dapl_cm_id *conn, } break; case RDMA_CM_EVENT_ESTABLISHED: - dapl_dbg_log(DAPL_DBG_TYPE_CM, - " active_cb: cm_id %d PORT %d CONNECTED to %s!\n", - conn->cm_id, ntohs(((struct sockaddr_in *) - &conn->cm_id->route.addr. - dst_addr)->sin_port), - inet_ntoa(((struct sockaddr_in *) - &conn->cm_id->route.addr.dst_addr)-> - sin_addr)); + dapl_log(DAPL_DBG_TYPE_CM_EST, + " CMA ACTIVE CONN: %x -> %s %x\n", + ntohs(((struct sockaddr_in *) + &conn->cm_id->route.addr.src_addr)->sin_port), + inet_ntoa(((struct sockaddr_in *) + &conn->cm_id->route.addr.dst_addr)->sin_addr), + ntohs(((struct sockaddr_in *) + &conn->cm_id->route.addr.dst_addr)->sin_port)); /* setup local and remote ports for ep query */ conn->ep->param.remote_port_qual = @@ -488,23 +488,19 @@ static void dapli_cm_passive_cb(struct dapl_cm_id *conn, "dapl_cm_passive: non-consumer REJ, reason=%d," " DST %s, %d\n", event->status, - inet_ntoa(((struct sockaddr_in *) - &conn->cm_id->route.addr.dst_addr)-> - sin_addr), - ntohs(((struct sockaddr_in *) - &conn->cm_id->route.addr.dst_addr)-> - sin_port)); + inet_ntoa(((struct sockaddr_in *)&conn->cm_id->route.addr.dst_addr)->sin_addr), + ntohs(((struct sockaddr_in *)&conn->cm_id->route.addr.dst_addr)->sin_port)); ib_cm_event = IB_CME_DESTINATION_REJECT; break; case RDMA_CM_EVENT_ESTABLISHED: - dapl_dbg_log(DAPL_DBG_TYPE_CM, - " passive_cb: cm_id %p PORT %d CONNECTED from 0x%x!\n", - conn->cm_id, ntohs(((struct sockaddr_in *) - &conn->cm_id->route.addr. - src_addr)->sin_port), - ntohl(((struct sockaddr_in *) - &conn->cm_id->route.addr.dst_addr)-> - sin_addr.s_addr)); + dapl_log(DAPL_DBG_TYPE_CM_EST, + " CMA PASSIVE CONN: %x <- %s %x \n", + ntohs(((struct sockaddr_in *) + &conn->cm_id->route.addr.dst_addr)->sin_port), + inet_ntoa(((struct sockaddr_in *) + &conn->cm_id->route.addr.src_addr)->sin_addr), + ntohs(((struct sockaddr_in *) + &conn->cm_id->route.addr.src_addr)->sin_port)); ib_cm_event = IB_CME_CONNECTED; break; case RDMA_CM_EVENT_DISCONNECTED: diff --git a/dapl/openib_scm/cm.c b/dapl/openib_scm/cm.c index ce0d961..56d4c73 100644 --- a/dapl/openib_scm/cm.c +++ b/dapl/openib_scm/cm.c @@ -732,7 +732,7 @@ static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr) /* keep the QP, address info in network order */ /* save remote address information, in msg.daddr */ - dapl_os_memcpy(&ep_ptr->remote_ia_address, + dapl_os_memcpy(&cm_ptr->addr, &cm_ptr->msg.daddr.so, sizeof(union dcm_addr)); @@ -912,6 +912,11 @@ ud_bail: dapl_evd_connection_callback(cm_ptr, event, cm_ptr->msg.p_data, DCM_MAX_PDATA_SIZE, ep_ptr); } + dapl_log(DAPL_DBG_TYPE_CM_EST, + " SCM ACTIVE CONN: %x -> %s %x\n", + ntohs(((struct sockaddr_in *) &cm_ptr->addr)->sin_port), + inet_ntoa(((struct sockaddr_in *) &cm_ptr->msg.daddr.so)->sin_addr), + ntohs(((struct sockaddr_in *) &cm_ptr->msg.daddr.so)->sin_port)-1000); return; bail: @@ -1282,7 +1287,7 @@ dapli_socket_accept_usr(DAPL_EP * ep_ptr, ntohs(local.saddr.ib.lid), ntohl(local.saddr.ib.qpn), ntohs(local.p_size)); dapl_dbg_log(DAPL_DBG_TYPE_CM, - " ACCEPT_USR: SRC GID subnet %016llx id %016llx\n", + " ACCEPT_USR: local GID subnet %016llx id %016llx\n", (unsigned long long) htonll(*(uint64_t*)&local.saddr.ib.gid[0]), (unsigned long long) @@ -1309,7 +1314,7 @@ static void dapli_socket_accept_rtu(dp_ib_cm_handle_t cm_ptr) len = recv(cm_ptr->socket, (char *)&cm_ptr->msg, 4, 0); if (len != 4 || ntohs(cm_ptr->msg.op) != DCM_RTU) { dapl_log(DAPL_DBG_TYPE_ERR, - " ACCEPT_RTU: rcv ERR, rcnt=%d op=%x\n", + " ACCEPT_RTU: rcv ERR, rcnt=%d op=%x <- %s\n", len, ntohs(cm_ptr->msg.op), inet_ntoa(((struct sockaddr_in *) &cm_ptr->msg.daddr.so)->sin_addr)); @@ -1376,6 +1381,11 @@ ud_bail: dapli_ep_check(cm_ptr->ep); dapls_cr_callback(cm_ptr, event, NULL, 0, cm_ptr->sp); } + dapl_log(DAPL_DBG_TYPE_CM_EST, + " SCM PASSIVE CONN: %x <- %s %x\n", + cm_ptr->sp->conn_qual, + inet_ntoa(((struct sockaddr_in *) &cm_ptr->msg.daddr.so)->sin_addr), + ntohs(((struct sockaddr_in *) &cm_ptr->msg.daddr.so)->sin_port)); return; bail: diff --git a/dapl/openib_ucm/cm.c b/dapl/openib_ucm/cm.c index 05b5af5..be15c0f 100644 --- a/dapl/openib_ucm/cm.c +++ b/dapl/openib_ucm/cm.c @@ -164,7 +164,7 @@ static void ucm_check_timers(dp_ib_cm_handle_t cm, int *timer) /* wait longer each retry */ if ((time - cm->timer)/1000 > (cm->hca->ib_trans.rep_time << cm->retries)) { - dapl_log(DAPL_DBG_TYPE_WARN, + dapl_log(DAPL_DBG_TYPE_CM_WARN, " CM_REQ retry %p %d [lid, port, qpn]:" " %x %x %x -> %x %x %x Time(ms) %llu > %d\n", cm, cm->retries+1, ntohs(cm->msg.saddr.ib.lid), @@ -182,7 +182,7 @@ static void ucm_check_timers(dp_ib_cm_handle_t cm, int *timer) *timer = cm->hca->ib_trans.cm_timer; if ((time - cm->timer)/1000 > (cm->hca->ib_trans.rtu_time << cm->retries)) { - dapl_log(DAPL_DBG_TYPE_WARN, + dapl_log(DAPL_DBG_TYPE_CM_WARN, " CM_REPLY retry %d [lid, port, qpn]:" " %x %x %x -> %x %x %x r_pid %x (%x) Time(ms) %llu > %d\n", cm->retries+1, @@ -207,7 +207,7 @@ static void ucm_check_timers(dp_ib_cm_handle_t cm, int *timer) /* wait longer each retry */ if ((time - cm->timer)/1000 > (cm->hca->ib_trans.rtu_time << cm->retries)) { - dapl_log(DAPL_DBG_TYPE_WARN, + dapl_log(DAPL_DBG_TYPE_CM_WARN, " CM_DREQ retry %d [lid, port, qpn]:" " %x %x %x -> %x %x %x r_pid %x (%x) Time(ms) %llu > %d\n", cm->retries+1, @@ -354,7 +354,7 @@ static void ucm_process_recv(ib_hca_transport_t *tp, } /* active: RTU was dropped, resend */ if (ntohs(msg->op) == DCM_REP) { - dapl_log(DAPL_DBG_TYPE_WARN, + dapl_log(DAPL_DBG_TYPE_CM_WARN, " RESEND RTU: op %s st %s [lid, port, qpn]:" " %x %x %x -> %x %x %x\n", dapl_cm_op_str(ntohs(msg->op)), @@ -380,7 +380,7 @@ static void ucm_process_recv(ib_hca_transport_t *tp, case DCM_FREE: /* DREQ dropped, resend */ if (ntohs(msg->op) == DCM_DREQ) { - dapl_log(DAPL_DBG_TYPE_WARN, + dapl_log(DAPL_DBG_TYPE_CM_WARN, " RESEND DREP: op %s st %s [lid, port, qpn]:" " %x %x %x -> %x %x %x\n", dapl_cm_op_str(ntohs(msg->op)), @@ -461,7 +461,7 @@ retry_listenq: } else { /* duplicate; bail and throw away */ dapl_os_unlock(lock); - dapl_log(DAPL_DBG_TYPE_WARN, + dapl_log(DAPL_DBG_TYPE_CM_WARN, " DUPLICATE: cm %p op %s st %s [lid, port, qpn]:" " %x %x %x <- %x %x %x\n", cm, dapl_cm_op_str(ntohs(msg->op)), @@ -502,7 +502,7 @@ retry_listenq: } if (!found) { - dapl_log(DAPL_DBG_TYPE_WARN, + dapl_log(DAPL_DBG_TYPE_CM, " ucm_recv: NO MATCH op %s %x %x i%x c%x" " < %x %x %x\n", dapl_cm_op_str(ntohs(msg->op)), @@ -1123,6 +1123,10 @@ ud_bail: htons(lid), NULL); if (xevent.remote_ah.ah == NULL) { + dapl_log(DAPL_DBG_TYPE_ERR, + " active UD RTU: ERR create_ah" + " for qpn 0x%x lid 0x%x\n", + xevent.remote_ah.qpn, lid); event = IB_CME_LOCAL_FAILURE; goto bail; } @@ -1172,6 +1176,12 @@ ud_bail: IB_CME_CONNECTED, cm->msg.p_data, ntohs(cm->msg.p_size), cm->ep); } + dapl_log(DAPL_DBG_TYPE_CM_EST, + " UCM_ACTIVE_CONN %d [lid port qpn] %x %x %x -> %x %x %x\n", + cm->retries, ntohs(cm->msg.saddr.ib.lid), + ntohs(cm->msg.sport), ntohl(cm->msg.saddr.ib.qpn), + ntohs(cm->msg.daddr.ib.lid), ntohs(cm->msg.dport), + ntohl(cm->msg.dqpn)); return; bail: dapl_evd_connection_callback(NULL, event, cm->msg.p_data, ntohs(cm->msg.p_size), cm->ep); @@ -1294,7 +1304,7 @@ static void ucm_accept_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg) NULL); if (xevent.remote_ah.ah == NULL) { dapl_log(DAPL_DBG_TYPE_ERR, - " accept_rtu: ERR create_ah" + " passive UD RTU: ERR create_ah" " for qpn 0x%x lid 0x%x\n", xevent.remote_ah.qpn, lid); goto bail; @@ -1334,6 +1344,12 @@ static void ucm_accept_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg) #endif dapls_cr_callback(cm, IB_CME_CONNECTED, NULL, 0, cm->sp); } + dapl_log(DAPL_DBG_TYPE_CM_EST, + " UCM_PASSIVE_CONN %d [lid port qpn] %x %x %x <- %x %x %x\n", + cm->retries, ntohs(cm->msg.saddr.ib.lid), + ntohs(cm->msg.sport), ntohl(cm->msg.saddr.ib.qpn), + ntohs(cm->msg.daddr.ib.lid), ntohs(cm->msg.dport), + ntohl(cm->msg.dqpn)); return; bail: dapls_cr_callback(cm, IB_CME_LOCAL_FAILURE, NULL, 0, cm->sp); @@ -1851,7 +1867,7 @@ dapls_ib_reject_connection(IN dp_ib_cm_handle_t cm, if (ucm_send(&cm->hca->ib_trans, &cm->msg, pdata, psize)) { dapl_log(DAPL_DBG_TYPE_WARN, - " cm_reject: ERR: %s\n", strerror(errno)); + " cm_reject: send ERR: %s\n", strerror(errno)); dapl_os_unlock(&cm->lock); return DAT_INTERNAL_ERROR; } -- 2.46.0