]> git.openfabrics.org - ~shefty/rdma-win.git/commitdiff
Refresh of retry-ibat
authorSean Hefty <sean.hefty@intel.com>
Wed, 17 Feb 2010 19:19:01 +0000 (11:19 -0800)
committerSean Hefty <sean.hefty@intel.com>
Wed, 17 Feb 2010 19:19:01 +0000 (11:19 -0800)
trunk/core/ibat/user/ibat.cpp
trunk/inc/user/iba/ibat.h
trunk/ulp/dapl2/dapl/include/dapl.h
trunk/ulp/dapl2/dapl/openib_common/qp.c
trunk/ulp/dapl2/dapl/openib_scm/cm.c
trunk/ulp/librdmacm/src/cma.cpp
trunk/ulp/netdirect/user/nd_connect.cpp
trunk/ulp/wsd/user/ibsp_ip.c

index 69d918615789e871b287b5a82bbf978f252a7f0e..2dfcff1ab77cc013348ff6a8e4b002ebccfcd68a 100644 (file)
@@ -358,6 +358,27 @@ Resolve(
     return S_OK;\r
 }\r
 \r
+HRESULT\r
+ResolvePath(\r
+    __in const struct sockaddr* pSrcAddr,\r
+    __in const struct sockaddr* pDestAddr,\r
+    __out IBAT_PATH_BLOB* pPath,\r
+       __in int Timeout)\r
+{\r
+       HRESULT hr;\r
+\r
+       do {\r
+               hr = Resolve(pSrcAddr, pDestAddr, pPath);\r
+               if( hr != E_PENDING || Timeout <= 0 )\r
+                       break;\r
+\r
+               Timeout -= 10;\r
+               Sleep(10);\r
+       } while( Timeout > 0 );\r
+\r
+       return hr;\r
+}\r
+\r
 #endif\r
 }\r
 \r
@@ -374,4 +395,14 @@ IbatResolve(
     return IBAT::Resolve( pSrcAddr, pDestAddr, pPath );\r
 }\r
 \r
+HRESULT\r
+IbatResolvePath(\r
+    __in const struct sockaddr* pSrcAddr,\r
+    __in const struct sockaddr* pDestAddr,\r
+    __out IBAT_PATH_BLOB* pPath,\r
+       __in const int Timeout)\r
+{\r
+       return IBAT::ResolvePath(pSrcAddr, pDestAddr, pPath, Timeout);\r
+}\r
+\r
 } /* extern "C" */\r
index c9a174059481380202dbc84a342c80bf705f6fcb..8dccd60229acfae2838718de3aad995e4fe04b3b 100644 (file)
@@ -41,6 +41,8 @@ typedef struct _IBAT_PATH_BLOB
 \r
 } IBAT_PATH_BLOB;\r
 \r
+typedef IBAT_MAX_TIMEOUT 0x0FFFFFFF\r
+\r
 #ifdef __cplusplus\r
 namespace IBAT\r
 {\r
@@ -52,6 +54,14 @@ Resolve(
     __out IBAT_PATH_BLOB* pPath\r
     );\r
 \r
+HRESULT\r
+ResolvePath(\r
+    __in const struct sockaddr* pSrcAddr,\r
+    __in const struct sockaddr* pDestAddr,\r
+    __out IBAT_PATH_BLOB* pPath,\r
+       __in int Timeout        /* ms */\r
+    );\r
+\r
 }\r
 #else /* __cplusplus */\r
 \r
@@ -62,6 +72,14 @@ IbatResolve(
     __out IBAT_PATH_BLOB* pPath\r
     );\r
 \r
+HRESULT\r
+IbatResolvePath(\r
+    __in const struct sockaddr* pSrcAddr,\r
+    __in const struct sockaddr* pDestAddr,\r
+    __out IBAT_PATH_BLOB* pPath,\r
+       __in int Timeout        /* ms */\r
+    );\r
+\r
 #endif /* __cplusplus */\r
 \r
 #endif // _IBAT_H_
\ No newline at end of file
index a36b110733d561d97fb0d5255ed3e598e10b46bf..91e041c158eb4d39b38330ed8a54ddfde8592610 100644 (file)
 typedef enum dapl_magic
 {
     /* magic number values for verification & debug */
-    DAPL_MAGIC_IA      = 0xCafeF00d,
-    DAPL_MAGIC_EVD     = 0xFeedFace,
-    DAPL_MAGIC_EP      = 0xDeadBabe,
-    DAPL_MAGIC_LMR     = 0xBeefCafe,
-    DAPL_MAGIC_RMR      = 0xABadCafe,
-    DAPL_MAGIC_PZ      = 0xDeafBeef,
-    DAPL_MAGIC_PSP     = 0xBeadeD0c,
-    DAPL_MAGIC_RSP     = 0xFab4Feed,
-    DAPL_MAGIC_SRQ     = 0xC001Babe,
-    DAPL_MAGIC_CR      = 0xBe12Cee1,
-    DAPL_MAGIC_CR_DESTROYED = 0xB12bDead,
-    DAPL_MAGIC_CNO     = 0xDeadF00d,
+    DAPL_MAGIC_IA      = 0x12345678,
+    DAPL_MAGIC_EVD     = 0x02468ace,
+    DAPL_MAGIC_EP      = 0x13579bdf,
+    DAPL_MAGIC_LMR     = 0x2123ab54,
+    DAPL_MAGIC_RMR      = 0x1358bc47,
+    DAPL_MAGIC_PZ      = 0x389d9075,
+    DAPL_MAGIC_PSP     = 0x238e9080,
+    DAPL_MAGIC_RSP     = 0x12390754,
+    DAPL_MAGIC_SRQ     = 0x0ee98434,
+    DAPL_MAGIC_CR      = 0x889f3398,
+    DAPL_MAGIC_CR_DESTROYED = 0x74749009,
+    DAPL_MAGIC_CNO     = 0x78899984,
     DAPL_MAGIC_INVALID  = 0xFFFFFFFF
 } DAPL_MAGIC;
 
index c2b5c69f1caa43fe51550a8fd724382a7783d87c..b0de59800015babdd4a89609b96d2b00d5ced9e8 100644 (file)
@@ -211,6 +211,7 @@ DAT_RETURN dapls_ib_qp_free(IN DAPL_IA * ia_ptr, IN DAPL_EP * ep_ptr)
                     ep_ptr, ep_ptr->qp_handle);\r
 \r
        if (ep_ptr->cm_handle != NULL) {\r
+dapl_log(DAPL_DBG_TYPE_ERR, "dapls_ib_qp_free - calling dapls_ib_cm_free\n");\r
                dapls_ib_cm_free(ep_ptr->cm_handle, ep_ptr);\r
        }\r
        \r
@@ -481,8 +482,13 @@ dapls_modify_qp_state(IN ib_qp_handle_t            qp_handle,
                                qp_attr.pkey_index, qp_attr.port_num,\r
                                qp_attr.qp_access_flags, qp_attr.qkey);\r
                break;\r
-       default:\r
+       case IBV_QPS_RESET:\r
+               break;\r
+       case IBV_QPS_ERR:\r
                break;\r
+       default:\r
+               dapl_log(DAPL_DBG_TYPE_ERR, "invalid QP state 0x%x!\n", qp_state);\r
+               return DAT_SUCCESS;\r
        }\r
 \r
        ret = ibv_modify_qp(qp_handle, &qp_attr, mask);\r
index 1d7a8dc4d28f37a11e63aec7f707d3e39c871575..f95b356035234d152beca402e391f76494e46432 100644 (file)
@@ -311,6 +311,8 @@ void dapls_ib_cm_free(dp_ib_cm_handle_t cm_ptr, DAPL_EP *ep)
 \r
        /* cleanup, never made it to work queue */\r
        dapl_os_lock(&cm_ptr->lock);\r
+if (cm_ptr->state == DCM_DESTROY)\r
+  dapl_log(DAPL_DBG_TYPE_ERR, "dapls_ib_cm_free - destroying twice!\n");\r
        if (cm_ptr->state == DCM_INIT) {\r
                if (cm_ptr->socket != DAPL_INVALID_SOCKET) {\r
                        shutdown(cm_ptr->socket, SHUT_RDWR);\r
@@ -391,7 +393,7 @@ notify_thread:
 /* queue socket for processing CM work */\r
 static void dapli_cm_queue(struct ib_cm_handle *cm_ptr)\r
 {\r
-       DAPL_HCA *hca_ptr = cm_ptr->hca;\r
+       DAPL_HCA *hca = cm_ptr->hca;\r
 \r
        /* add to work queue for cr thread processing */\r
        dapl_llist_init_entry((DAPL_LLIST_ENTRY *) & cm_ptr->entry);\r
@@ -411,12 +413,43 @@ static void dapli_cm_queue(struct ib_cm_handle *cm_ptr)
 DAT_RETURN dapli_socket_disconnect(dp_ib_cm_handle_t cm_ptr)\r
 {\r
        DAPL_EP *ep_ptr = cm_ptr->ep;\r
-       DAT_UINT32 disc_data = htonl(0xdead);\r
+       DAT_UINT32 disc_data = htonl(0xbad);\r
 \r
        if (ep_ptr == NULL)\r
                return DAT_SUCCESS;\r
+dapl_os_lock(&cm_ptr->lock);\r
+if (cm_ptr->ep->header.magic != DAPL_MAGIC_EP) {\r
+  dapl_log(DAPL_DBG_TYPE_ERR, "bad ep magic!!!\n");\r
+  dapl_os_unlock(&cm_ptr->lock);\r
+  return DAT_SUCCESS;\r
+}\r
+if (cm_ptr->ep->qp_handle->qp_context != cm_ptr->ep) {\r
+  dapl_log(DAPL_DBG_TYPE_ERR, "bad qp_handle->qp_context!!!\n");\r
+  dapl_os_unlock(&cm_ptr->lock);\r
+  return DAT_SUCCESS;\r
+}\r
+if (ep_ptr->qp_handle->srq) {\r
+  dapl_log(DAPL_DBG_TYPE_ERR, "qp handle has srq??? likely bad handle\n");\r
+  dapl_os_unlock(&cm_ptr->lock);\r
+  return DAT_SUCCESS;\r
+}\r
+if (ep_ptr->qp_handle->send_cq != ep_ptr->qp_handle->recv_cq) {\r
+  dapl_log(DAPL_DBG_TYPE_ERR, "qp send/recv cqs do not match\n");\r
+  dapl_os_unlock(&cm_ptr->lock);\r
+  return DAT_SUCCESS;\r
+}\r
+if (ep_ptr->qp_handle->context != ep_ptr->qp_handle->pd->context) {\r
+  dapl_log(DAPL_DBG_TYPE_ERR, "qp verbs != pd verbs\n");\r
+  dapl_os_unlock(&cm_ptr->lock);\r
+  return DAT_SUCCESS;\r
+}\r
+if (ep_ptr->qp_handle->qp_type != IBV_QPT_RC) {\r
+  dapl_log(DAPL_DBG_TYPE_ERR, "qp type is invalid\n");\r
+  dapl_os_unlock(&cm_ptr->lock);\r
+  return DAT_SUCCESS;\r
+}\r
 \r
-       dapl_os_lock(&cm_ptr->lock);\r
+//     dapl_os_lock(&cm_ptr->lock);\r
        if (cm_ptr->state != DCM_CONNECTED) {\r
                dapl_os_unlock(&cm_ptr->lock);\r
                return DAT_SUCCESS;\r
@@ -655,6 +688,7 @@ static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
                        dapli_socket_connect(cm_ptr->ep, (DAT_IA_ADDRESS_PTR)&cm_ptr->addr, \r
                                             ntohs(((struct sockaddr_in *)&cm_ptr->addr)->sin_port) - 1000,\r
                                             ntohs(cm_ptr->msg.p_size), &cm_ptr->msg.p_data);\r
+dapl_log(DAPL_DBG_TYPE_ERR, "dapli_socket_connect_rtu\n");\r
                        dapls_ib_cm_free(cm_ptr, NULL);\r
                        return;\r
                }\r
@@ -1460,6 +1494,8 @@ dapls_ib_remove_conn_listener(IN DAPL_IA * ia_ptr, IN DAPL_SP * sp_ptr)
        if (cm_ptr != NULL) {\r
                /* cr_thread will free */\r
                dapl_os_lock(&cm_ptr->lock);\r
+if (cm_ptr->state == DCM_DESTROY)\r
+  dapl_log(DAPL_DBG_TYPE_ERR, "dapls_ib_remove_conn_listener - destroying twice!\n");\r
                cm_ptr->state = DCM_DESTROY;\r
                sp_ptr->cm_srvc_handle = NULL;\r
                send(cm_ptr->hca->ib_trans.scm[1], "w", sizeof "w", 0);\r
index cde309b881321206bbbb73204bd544d8d9989991..9205c56f44e29dc3d3297665a1d84f121ac74fdf 100644 (file)
@@ -506,25 +506,6 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
        return 0;\r
 }\r
 \r
-static int\r
-ucma_resolve_ibat_path(struct rdma_cm_id *id, int timeout_ms,\r
-                                          IBAT_PATH_BLOB *path)\r
-{\r
-       HRESULT hr;\r
-\r
-       do {\r
-               hr = IBAT::Resolve(&id->route.addr.src_addr, &id->route.addr.dst_addr,\r
-                                                  path);\r
-               if (hr != E_PENDING || timeout_ms <= 0) {\r
-                       break;\r
-               }\r
-               timeout_ms -= 10;\r
-               Sleep(10);\r
-       } while (timeout_ms > 0);\r
-\r
-       return hr;\r
-}\r
-\r
 __declspec(dllexport)\r
 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)\r
 {\r
@@ -532,7 +513,8 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
        IBAT_PATH_BLOB path;\r
        HRESULT hr;\r
 \r
-       hr = ucma_resolve_ibat_path(id, timeout_ms, &path);\r
+       hr = IBAT::ResolvePath(&id->route.addr.src_addr, &id->route.addr.dst_addr,\r
+                                                  path, timeout_ms);\r
        if (FAILED(hr)) {\r
                return hr;\r
        }\r
index aa46adac41ce6b8187affee23c5d50b5b9066300..6ee6fa0f90d5aa1e61bb96839edcd5f1dde014d7 100644 (file)
@@ -138,12 +138,13 @@ Connect(INDEndpoint* pEndpoint,
        } else {\r
                addr.Sin6.sin6_port = LocalPort;\r
        }\r
-       hr = m_pWvConnEp->BindAddress(&addr.Sa);\r
+\r
+       hr = IBAT::ResolvePath(&addr.Sa, pAddress, &path, IBAT_MAX_TIMEOUT);\r
        if (FAILED(hr)) {\r
                goto out;\r
        }\r
 \r
-       hr = IBAT::Resolve(&addr.Sa, pAddress, &path);\r
+       hr = m_pWvConnEp->BindAddress(&addr.Sa);\r
        if (FAILED(hr)) {\r
                goto out;\r
        }\r
index a0afe89416cd5c287f143a12a118af65c72d8909..0da0c0e89b0759c758c011ace74ab0ae78f136b0 100644 (file)
@@ -270,20 +270,9 @@ query_guid_address(
        HRESULT hr;\r
 \r
        IBSP_ENTER( IBSP_DBG_HW );\r
+       hr = IbatResolvePath(p_src_addr, p_dest_addr, (IBAT_PATH_BLOB*)&path,\r
+               IBAT_MAX_TIMEOUT);\r
 \r
-       for(;;)\r
-       {\r
-               hr = IbatResolve(\r
-                       p_src_addr,\r
-                       p_dest_addr,\r
-                       (IBAT_PATH_BLOB*)&path\r
-                       );\r
-\r
-               if( hr != E_PENDING )\r
-                       break;\r
-\r
-               Sleep( 100 );\r
-       }\r
        if( hr == S_OK )\r
        {\r
                *port_guid = path.dgid.unicast.interface_id;\r