From 4b9c105649cc24ca57e6026f1973a6357ceaccd0 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Fri, 12 Mar 2010 14:45:13 -0800 Subject: [PATCH] Refresh of dapl-ep --- trunk/ulp/dapl2/dapl/openib_scm/SOURCES | 2 +- trunk/ulp/dapl2/dapl/openib_scm/cm.c | 130 ++++++++++++++++++------ 2 files changed, 98 insertions(+), 34 deletions(-) diff --git a/trunk/ulp/dapl2/dapl/openib_scm/SOURCES b/trunk/ulp/dapl2/dapl/openib_scm/SOURCES index 6e4ad30c..6372f2d5 100644 --- a/trunk/ulp/dapl2/dapl/openib_scm/SOURCES +++ b/trunk/ulp/dapl2/dapl/openib_scm/SOURCES @@ -28,7 +28,7 @@ INCLUDES = ..\include;..\openib_common\;..\common;windows;..\..\dat\include;\ ..\..\dat\udat\windows;..\udapl\windows;\ ..\..\..\..\inc;..\..\..\..\inc\user;..\..\..\libibverbs\include -DAPL_OPTS = -DEXPORT_DAPL_SYMBOLS -DDAT_EXTENSIONS -DSOCK_CM -DOPENIB -DCQ_WAIT_OBJECT +DAPL_OPTS = -DEXPORT_DAPL_SYMBOLS -DDAT_EXTENSIONS -DDAPL_COUNTERS -DOPENIB -DCQ_WAIT_OBJECT USER_C_FLAGS = $(USER_C_FLAGS) $(DAPL_OPTS) diff --git a/trunk/ulp/dapl2/dapl/openib_scm/cm.c b/trunk/ulp/dapl2/dapl/openib_scm/cm.c index 29694835..31b5c94b 100644 --- a/trunk/ulp/dapl2/dapl/openib_scm/cm.c +++ b/trunk/ulp/dapl2/dapl/openib_scm/cm.c @@ -60,6 +60,44 @@ #include "dapl_ep_util.h" #include "dapl_osd.h" +/* Check for EP linking to IA and proper connect state */ +void dapli_ep_check(DAPL_EP *ep) +{ + DAPL_IA *ia_ptr = ep->header.owner_ia; + DAPL_EP *ep_ptr, *next_ep_ptr; + int found = 0; + + dapl_os_lock(&ia_ptr->header.lock); + ep_ptr = (dapl_llist_is_empty (&ia_ptr->ep_list_head) + ? NULL : dapl_llist_peek_head (&ia_ptr->ep_list_head)); + + while (ep_ptr != NULL) { + next_ep_ptr = + dapl_llist_next_entry(&ia_ptr->ep_list_head, + &ep_ptr->header.ia_list_entry); + if (ep == ep_ptr) { + found++; + if ((ep->cr_ptr && ep->param.ep_state + != DAT_EP_STATE_COMPLETION_PENDING) || + (!ep->cr_ptr && ep->param.ep_state + != DAT_EP_STATE_ACTIVE_CONNECTION_PENDING)) + goto err; + else + goto match; + } + ep_ptr = next_ep_ptr; + } +err: + dapl_log(DAPL_DBG_TYPE_ERR, + " dapli_ep_check ERR: %s %s ep=%p state=%d magic=0x%x\n", + ep->cr_ptr ? "PASSIVE":"ACTIVE", + found ? "WRONG_STATE":"NOT_FOUND" , + ep, ep->param.ep_state, ep->header.magic); +match: + dapl_os_unlock(&ia_ptr->header.lock); + return; +} + #if defined(_WIN32) || defined(_WIN64) enum DAPL_FD_EVENTS { DAPL_FD_READ = 0x1, @@ -452,7 +490,7 @@ static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err) dapl_log(DAPL_DBG_TYPE_ERR, " CONN_PENDING: %s ERR %s -> %s %d\n", err == -1 ? "POLL" : "SOCKOPT", - err == -1 ? strerror(errno) : strerror(err), + err == -1 ? strerror(dapl_socket_errno()) : strerror(err), inet_ntoa(((struct sockaddr_in *) &cm_ptr->addr)->sin_addr), ntohs(((struct sockaddr_in *) @@ -475,9 +513,10 @@ static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err) } if (len != (exp + ntohs(cm_ptr->msg.p_size))) { + int err = dapl_socket_errno(); dapl_log(DAPL_DBG_TYPE_ERR, - " CONN_PENDING len ERR %s, wcnt=%d(%d) -> %s\n", - strerror(errno), len, + " CONN_PENDING len ERR 0x%x %s, wcnt=%d(%d) -> %s\n", + err, strerror(err), len, exp + ntohs(cm_ptr->msg.p_size), inet_ntoa(((struct sockaddr_in *) ep_ptr->param. @@ -530,16 +569,19 @@ dapli_socket_connect(DAPL_EP * ep_ptr, /* create, connect, sockopt, and exchange QP information */ if ((cm_ptr->socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == DAPL_INVALID_SOCKET) { + int err = dapl_socket_errno(); dapl_log(DAPL_DBG_TYPE_ERR, - " connect: socket create ERR %s\n", strerror(errno)); + " connect: socket create ERR 0x%x %s\n", + err, strerror(err)); goto bail; } ret = dapl_config_socket(cm_ptr->socket); if (ret < 0) { dapl_log(DAPL_DBG_TYPE_ERR, - " connect: config socket %d ERR %d %s\n", - cm_ptr->socket, ret, strerror(dapl_socket_errno())); + " connect: config socket %d RET %d ERR 0x%x %s\n", + cm_ptr->socket, ret, + dapl_socket_errno(), strerror(dapl_socket_errno())); dat_ret = DAT_INTERNAL_ERROR; goto bail; } @@ -556,6 +598,10 @@ dapli_socket_connect(DAPL_EP * ep_ptr, ret = dapl_connect_socket(cm_ptr->socket, (struct sockaddr *)&cm_ptr->addr, sizeof(cm_ptr->addr)); if (ret && ret != EAGAIN) { + dapl_log(DAPL_DBG_TYPE_ERR, + " connect: dapl_connect_socket RET %d ERR 0x%x %s\n", + ret, dapl_socket_errno(), + strerror(dapl_socket_errno())); dat_ret = DAT_INVALID_ADDRESS; goto bail; } @@ -572,9 +618,10 @@ dapli_socket_connect(DAPL_EP * ep_ptr, /* get local address information from socket */ sl = sizeof(cm_ptr->msg.daddr.so); if (getsockname(cm_ptr->socket, (struct sockaddr *)&cm_ptr->msg.daddr.so, &sl)) { + int err = dapl_socket_errno(); dapl_log(DAPL_DBG_TYPE_ERR, - " connect getsockname ERROR: %s -> %s r_qual %d\n", - strerror(errno), + " connect getsockname ERROR: 0x%x %s -> %s r_qual %d\n", + err, strerror(err), inet_ntoa(((struct sockaddr_in *)r_addr)->sin_addr), (unsigned int)r_qual);; } @@ -604,8 +651,7 @@ dapli_socket_connect(DAPL_EP * ep_ptr, return DAT_SUCCESS; bail: dapl_log(DAPL_DBG_TYPE_ERR, - " connect ERROR: %s -> %s r_qual %d\n", - strerror(errno), + " connect ERROR: -> %s r_qual %d\n", inet_ntoa(((struct sockaddr_in *)r_addr)->sin_addr), (unsigned int)r_qual); @@ -629,9 +675,10 @@ static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr) len = recv(cm_ptr->socket, (char *)&cm_ptr->msg, exp, 0); if (len != exp || ntohs(cm_ptr->msg.ver) != DCM_VER) { + int err = dapl_socket_errno(); dapl_log(DAPL_DBG_TYPE_WARN, - " CONN_RTU read: sk %d ERR %s, rcnt=%d, v=%d -> %s PORT L-%x R-%x PID L-%x R-%x\n", - cm_ptr->socket, strerror(errno), len, ntohs(cm_ptr->msg.ver), + " CONN_RTU read: sk %d ERR 0x%x, rcnt=%d, v=%d -> %s PORT L-%x R-%x PID L-%x R-%x\n", + cm_ptr->socket, err, len, ntohs(cm_ptr->msg.ver), inet_ntoa(((struct sockaddr_in *)&cm_ptr->addr)->sin_addr), ntohs(((struct sockaddr_in *)&cm_ptr->msg.daddr.so)->sin_port), ntohs(((struct sockaddr_in *)&cm_ptr->addr)->sin_port), @@ -639,7 +686,7 @@ static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr) ntohs(*(uint16_t*)&cm_ptr->msg.resv[2])); /* Retry; corner case where server tcp stack resets under load */ - if (dapl_socket_errno() == ECONNRESET) { + if (err == ECONNRESET) { closesocket(cm_ptr->socket); cm_ptr->socket = DAPL_INVALID_SOCKET; dapli_socket_connect(cm_ptr->ep, (DAT_IA_ADDRESS_PTR)&cm_ptr->addr, @@ -692,9 +739,10 @@ static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr) if (exp) { len = recv(cm_ptr->socket, cm_ptr->msg.p_data, exp, 0); if (len != exp) { + int err = dapl_socket_errno(); dapl_log(DAPL_DBG_TYPE_ERR, - " CONN_RTU read pdata: ERR %s, rcnt=%d -> %s\n", - strerror(errno), len, + " CONN_RTU read pdata: ERR 0x%x %s, rcnt=%d -> %s\n", + err, strerror(err), len, inet_ntoa(((struct sockaddr_in *) ep_ptr->param. remote_ia_address_ptr)->sin_addr)); @@ -761,8 +809,10 @@ static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr) cm_ptr->state = DCM_CONNECTED; cm_ptr->msg.op = ntohs(DCM_RTU); if (send(cm_ptr->socket, (char *)&cm_ptr->msg, 4, 0) == -1) { + int err = dapl_socket_errno(); dapl_log(DAPL_DBG_TYPE_ERR, - " CONN_RTU: write error = %s\n", strerror(errno)); + " CONN_RTU: write ERR = 0x%x %s\n", + err, strerror(err)); goto bail; } /* post the event with private data */ @@ -821,6 +871,7 @@ ud_bail: } else #endif { + dapli_ep_check(cm_ptr->ep); dapl_evd_connection_callback(cm_ptr, event, cm_ptr->msg.p_data, DCM_MAX_PDATA_SIZE, ep_ptr); } @@ -864,8 +915,10 @@ dapli_socket_listen(DAPL_IA * ia_ptr, DAT_CONN_QUAL serviceID, DAPL_SP * sp_ptr) /* bind, listen, set sockopt, accept, exchange data */ if ((cm_ptr->socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == DAPL_INVALID_SOCKET) { - dapl_log(DAPL_DBG_TYPE_ERR, " ERR: listen socket create: %s\n", - strerror(errno)); + int err = dapl_socket_errno(); + dapl_log(DAPL_DBG_TYPE_ERR, + " listen: socket create: ERR 0x%x %s\n", + err, strerror(err)); dat_status = DAT_INSUFFICIENT_RESOURCES; goto bail; } @@ -877,10 +930,11 @@ dapli_socket_listen(DAPL_IA * ia_ptr, DAT_CONN_QUAL serviceID, DAPL_SP * sp_ptr) if ((bind(cm_ptr->socket, (struct sockaddr *)&addr, sizeof(addr)) < 0) || (listen(cm_ptr->socket, 128) < 0)) { + int err = dapl_socket_errno(); dapl_log(DAPL_DBG_TYPE_CM, - " listen: ERROR %s on port %d\n", - strerror(errno), serviceID + 1000); - if (dapl_socket_errno() == EADDRINUSE) + " listen: ERROR 0x%x %s on port %d\n", + err, strerror(err), serviceID + 1000); + if (err == EADDRINUSE) dat_status = DAT_CONN_QUAL_IN_USE; else dat_status = DAT_CONN_QUAL_UNAVAILABLE; @@ -933,9 +987,10 @@ static void dapli_socket_accept(ib_cm_srvc_handle_t cm_ptr) &acm_ptr->msg.daddr.so, (socklen_t *) &len); if (acm_ptr->socket == DAPL_INVALID_SOCKET) { + int err = dapl_socket_errno(); dapl_log(DAPL_DBG_TYPE_ERR, - " ACCEPT: ERR %s on FD %d l_cr %p\n", - strerror(errno), cm_ptr->socket, cm_ptr); + " ACCEPT: ERR 0x%x %s on FD %d l_cr %p\n", + err, strerror(err), cm_ptr->socket, cm_ptr); dapls_cm_release(acm_ptr); return; } @@ -948,11 +1003,14 @@ static void dapli_socket_accept(ib_cm_srvc_handle_t cm_ptr) /* no delay for small packets */ ret = setsockopt(acm_ptr->socket, IPPROTO_TCP, TCP_NODELAY, (char *)&opt, sizeof(opt)); - if (ret) + if (ret) { + int err = dapl_socket_errno(); dapl_log(DAPL_DBG_TYPE_ERR, - " ACCEPT: NODELAY setsockopt: 0x%x 0x%x %s\n", - ret, dapl_socket_errno(), strerror(dapl_socket_errno())); - + " ACCEPT: NODELAY setsockopt:" + " RET %d ERR 0x%x %s\n", + ret, err, strerror(err)); + } + /* get local address information from socket */ sl = sizeof(acm_ptr->addr); getsockname(acm_ptr->socket, (struct sockaddr *)&acm_ptr->addr, &sl); @@ -975,9 +1033,10 @@ static void dapli_socket_accept_data(ib_cm_srvc_handle_t acm_ptr) /* read in DST QP info, IA address. check for private data */ len = recv(acm_ptr->socket, (char *)&acm_ptr->msg, exp, 0); if (len != exp || ntohs(acm_ptr->msg.ver) != DCM_VER) { + int err = dapl_socket_errno(); dapl_log(DAPL_DBG_TYPE_ERR, - " ACCEPT read: ERR %s, rcnt=%d, ver=%d\n", - strerror(errno), len, ntohs(acm_ptr->msg.ver)); + " ACCEPT read: ERR 0x%x %s, rcnt=%d, ver=%d\n", + err, strerror(err), len, ntohs(acm_ptr->msg.ver)); goto bail; } @@ -996,9 +1055,10 @@ static void dapli_socket_accept_data(ib_cm_srvc_handle_t acm_ptr) if (exp) { len = recv(acm_ptr->socket, acm_ptr->msg.p_data, exp, 0); if (len != exp) { + int err = dapl_socket_errno(); dapl_log(DAPL_DBG_TYPE_ERR, - " accept read pdata: ERR %s, rcnt=%d\n", - strerror(errno), len); + " accept read pdata: ERR 0x%x %s, rcnt=%d\n", + err, strerror(err), len); goto bail; } p_data = acm_ptr->msg.p_data; @@ -1143,6 +1203,7 @@ dapli_socket_accept_usr(DAPL_EP * ep_ptr, cm_ptr->hca = ia_ptr->hca_ptr; cm_ptr->state = DCM_ACCEPTED; + /* Link CM to EP, already queued on work thread */ dapl_ep_link_cm(ep_ptr, cm_ptr); cm_ptr->ep = ep_ptr; @@ -1158,9 +1219,10 @@ dapli_socket_accept_usr(DAPL_EP * ep_ptr, len = writev(cm_ptr->socket, iov, 1); if (len != (p_size + exp)) { + int err = dapl_socket_errno(); dapl_log(DAPL_DBG_TYPE_ERR, - " ACCEPT_USR: ERR %s, wcnt=%d -> %s\n", - strerror(errno), len, + " ACCEPT_USR: ERR 0x%x %s, wcnt=%d -> %s\n", + err, strerror(err), len, inet_ntoa(((struct sockaddr_in *) &cm_ptr->msg.daddr.so)->sin_addr)); dapl_ep_unlink_cm(ep_ptr, cm_ptr); @@ -1180,6 +1242,7 @@ dapli_socket_accept_usr(DAPL_EP * ep_ptr, htonll(*(uint64_t*)&local.saddr.ib.gid[8])); dapl_dbg_log(DAPL_DBG_TYPE_EP, " PASSIVE: accepted!\n"); + return DAT_SUCCESS; bail: /* schedule cleanup from workq */ @@ -1261,6 +1324,7 @@ ud_bail: } else #endif { + dapli_ep_check(cm_ptr->ep); dapls_cr_callback(cm_ptr, event, NULL, 0, cm_ptr->sp); } return; -- 2.46.0