]> git.openfabrics.org - ~ardavis/dapl.git/commitdiff
scm: socket connect request count is reset improperly on retry
authorArlin Davis <arlin.r.davis@intel.com>
Wed, 26 Oct 2011 16:12:10 +0000 (09:12 -0700)
committerArlin Davis <arlin.r.davis@intel.com>
Wed, 26 Oct 2011 16:12:10 +0000 (09:12 -0700)
Include the current retry count with the new connect request call
and set according after creating the new cm object.

Signed-off-by: Arlin Davis <arlin.r.davis@intel.com>
dapl/openib_scm/cm.c

index 305f85b605f6902928375ca98c9aeeeeb3335d5d..968d9b9f329294cb515cf7e0aff9a7cf60536a70 100644 (file)
@@ -64,7 +64,7 @@
 static DAT_RETURN
 dapli_socket_connect(DAPL_EP * ep_ptr,
                     DAT_IA_ADDRESS_PTR r_addr,
-                    DAT_CONN_QUAL r_qual, DAT_COUNT p_size, DAT_PVOID p_data);
+                    DAT_CONN_QUAL r_qual, DAT_COUNT p_size, DAT_PVOID p_data, int retries);
 
 #ifdef DAPL_DBG
 /* Check for EP linking to IA and proper connect state */
@@ -505,8 +505,8 @@ static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err)
        struct dapl_ep *ep_ptr = cm_ptr->ep;
 
        if (err) {
-               dapl_log(DAPL_DBG_TYPE_ERR,
-                        " CONN_PENDING: %s ERR %s -> %s %d - %s\n",
+               dapl_log(DAPL_DBG_TYPE_WARN,
+                        " CONN_REQUEST: %s ERR %s -> %s %d - %s %d\n",
                         err == -1 ? "POLL" : "SOCKOPT",
                         err == -1 ? strerror(dapl_socket_errno()) : strerror(err), 
                         inet_ntoa(((struct sockaddr_in *)
@@ -514,7 +514,7 @@ static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err)
                         ntohs(((struct sockaddr_in *)
                                &cm_ptr->addr)->sin_port),
                         (err == ETIMEDOUT || err == ECONNREFUSED) ? 
-                        "RETRYING...":"ABORTING");
+                        "RETRYING...":"ABORTING", cm_ptr->retry);
 
                /* retry a timeout */
                if ((err == ETIMEDOUT) || (err == ECONNREFUSED && --cm_ptr->retry)) {
@@ -522,12 +522,11 @@ static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err)
                        cm_ptr->socket = DAPL_INVALID_SOCKET;
                        dapli_socket_connect(cm_ptr->ep, (DAT_IA_ADDRESS_PTR)&cm_ptr->addr, 
                                             ntohs(((struct sockaddr_in *)&cm_ptr->addr)->sin_port) - 1000,
-                                            ntohs(cm_ptr->msg.p_size), &cm_ptr->msg.p_data);
+                                            ntohs(cm_ptr->msg.p_size), &cm_ptr->msg.p_data, cm_ptr->retry);
                        dapl_ep_unlink_cm(cm_ptr->ep, cm_ptr);
                        dapli_cm_free(cm_ptr);
                        return;
                }
-
                goto bail;
        }
 
@@ -579,7 +578,7 @@ static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err)
 bail:
        /* mark CM object for cleanup */
        dapli_cm_free(cm_ptr);
-       dapl_evd_connection_callback(NULL, IB_CME_LOCAL_FAILURE, NULL, 0, ep_ptr);
+       dapl_evd_connection_callback(NULL, IB_CME_TIMEOUT, NULL, 0, ep_ptr);
 }
 
 /*
@@ -589,7 +588,7 @@ bail:
 static DAT_RETURN
 dapli_socket_connect(DAPL_EP * ep_ptr,
                     DAT_IA_ADDRESS_PTR r_addr,
-                    DAT_CONN_QUAL r_qual, DAT_COUNT p_size, DAT_PVOID p_data)
+                    DAT_CONN_QUAL r_qual, DAT_COUNT p_size, DAT_PVOID p_data, int retries)
 {
        dp_ib_cm_handle_t cm_ptr;
        int ret;
@@ -604,6 +603,8 @@ dapli_socket_connect(DAPL_EP * ep_ptr,
        if (cm_ptr == NULL)
                return dat_ret;
 
+       cm_ptr->retry = retries;
+
        /* create, connect, sockopt, and exchange QP information */
        if ((cm_ptr->socket =
             socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == DAPL_INVALID_SOCKET) {
@@ -724,12 +725,12 @@ static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
                         ntohs(*(uint16_t*)&cm_ptr->msg.resv[2]));
 
                /* Retry; corner case where server tcp stack resets under load */
-               if (err == ECONNRESET) {
+               if (err == ECONNRESET && --cm_ptr->retry) {
                        closesocket(cm_ptr->socket);
                        cm_ptr->socket = DAPL_INVALID_SOCKET;
                        dapli_socket_connect(cm_ptr->ep, (DAT_IA_ADDRESS_PTR)&cm_ptr->addr, 
                                             ntohs(((struct sockaddr_in *)&cm_ptr->addr)->sin_port) - 1000,
-                                            ntohs(cm_ptr->msg.p_size), &cm_ptr->msg.p_data);
+                                            ntohs(cm_ptr->msg.p_size), &cm_ptr->msg.p_data, cm_ptr->retry);
                        dapl_ep_unlink_cm(cm_ptr->ep, cm_ptr);
                        dapli_cm_free(cm_ptr);
                        return;
@@ -1455,7 +1456,7 @@ dapls_ib_connect(IN DAT_EP_HANDLE ep_handle,
 
        return (dapli_socket_connect(ep_ptr, remote_ia_address,
                                     remote_conn_qual,
-                                    private_data_size, private_data));
+                                    private_data_size, private_data, SCM_CR_RETRY));
 }
 
 /*