From: Sean Hefty Date: Wed, 17 Feb 2010 19:19:01 +0000 (-0800) Subject: Refresh of retry-ibat X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=f21a449384c874eb2764a7c2311904772846287b;p=~shefty%2Frdma-win.git Refresh of retry-ibat --- diff --git a/trunk/core/ibat/user/ibat.cpp b/trunk/core/ibat/user/ibat.cpp index 69d91861..2dfcff1a 100644 --- a/trunk/core/ibat/user/ibat.cpp +++ b/trunk/core/ibat/user/ibat.cpp @@ -358,6 +358,27 @@ Resolve( return S_OK; } +HRESULT +ResolvePath( + __in const struct sockaddr* pSrcAddr, + __in const struct sockaddr* pDestAddr, + __out IBAT_PATH_BLOB* pPath, + __in int Timeout) +{ + HRESULT hr; + + do { + hr = Resolve(pSrcAddr, pDestAddr, pPath); + if( hr != E_PENDING || Timeout <= 0 ) + break; + + Timeout -= 10; + Sleep(10); + } while( Timeout > 0 ); + + return hr; +} + #endif } @@ -374,4 +395,14 @@ IbatResolve( return IBAT::Resolve( pSrcAddr, pDestAddr, pPath ); } +HRESULT +IbatResolvePath( + __in const struct sockaddr* pSrcAddr, + __in const struct sockaddr* pDestAddr, + __out IBAT_PATH_BLOB* pPath, + __in const int Timeout) +{ + return IBAT::ResolvePath(pSrcAddr, pDestAddr, pPath, Timeout); +} + } /* extern "C" */ diff --git a/trunk/inc/user/iba/ibat.h b/trunk/inc/user/iba/ibat.h index c9a17405..8dccd602 100644 --- a/trunk/inc/user/iba/ibat.h +++ b/trunk/inc/user/iba/ibat.h @@ -41,6 +41,8 @@ typedef struct _IBAT_PATH_BLOB } IBAT_PATH_BLOB; +typedef IBAT_MAX_TIMEOUT 0x0FFFFFFF + #ifdef __cplusplus namespace IBAT { @@ -52,6 +54,14 @@ Resolve( __out IBAT_PATH_BLOB* pPath ); +HRESULT +ResolvePath( + __in const struct sockaddr* pSrcAddr, + __in const struct sockaddr* pDestAddr, + __out IBAT_PATH_BLOB* pPath, + __in int Timeout /* ms */ + ); + } #else /* __cplusplus */ @@ -62,6 +72,14 @@ IbatResolve( __out IBAT_PATH_BLOB* pPath ); +HRESULT +IbatResolvePath( + __in const struct sockaddr* pSrcAddr, + __in const struct sockaddr* pDestAddr, + __out IBAT_PATH_BLOB* pPath, + __in int Timeout /* ms */ + ); + #endif /* __cplusplus */ #endif // _IBAT_H_ \ No newline at end of file diff --git a/trunk/ulp/dapl2/dapl/include/dapl.h b/trunk/ulp/dapl2/dapl/include/dapl.h index a36b1107..91e041c1 100644 --- a/trunk/ulp/dapl2/dapl/include/dapl.h +++ b/trunk/ulp/dapl2/dapl/include/dapl.h @@ -64,18 +64,18 @@ typedef enum dapl_magic { /* magic number values for verification & debug */ - DAPL_MAGIC_IA = 0xCafeF00d, - DAPL_MAGIC_EVD = 0xFeedFace, - DAPL_MAGIC_EP = 0xDeadBabe, - DAPL_MAGIC_LMR = 0xBeefCafe, - DAPL_MAGIC_RMR = 0xABadCafe, - DAPL_MAGIC_PZ = 0xDeafBeef, - DAPL_MAGIC_PSP = 0xBeadeD0c, - DAPL_MAGIC_RSP = 0xFab4Feed, - DAPL_MAGIC_SRQ = 0xC001Babe, - DAPL_MAGIC_CR = 0xBe12Cee1, - DAPL_MAGIC_CR_DESTROYED = 0xB12bDead, - DAPL_MAGIC_CNO = 0xDeadF00d, + DAPL_MAGIC_IA = 0x12345678, + DAPL_MAGIC_EVD = 0x02468ace, + DAPL_MAGIC_EP = 0x13579bdf, + DAPL_MAGIC_LMR = 0x2123ab54, + DAPL_MAGIC_RMR = 0x1358bc47, + DAPL_MAGIC_PZ = 0x389d9075, + DAPL_MAGIC_PSP = 0x238e9080, + DAPL_MAGIC_RSP = 0x12390754, + DAPL_MAGIC_SRQ = 0x0ee98434, + DAPL_MAGIC_CR = 0x889f3398, + DAPL_MAGIC_CR_DESTROYED = 0x74749009, + DAPL_MAGIC_CNO = 0x78899984, DAPL_MAGIC_INVALID = 0xFFFFFFFF } DAPL_MAGIC; diff --git a/trunk/ulp/dapl2/dapl/openib_common/qp.c b/trunk/ulp/dapl2/dapl/openib_common/qp.c index c2b5c69f..b0de5980 100644 --- a/trunk/ulp/dapl2/dapl/openib_common/qp.c +++ b/trunk/ulp/dapl2/dapl/openib_common/qp.c @@ -211,6 +211,7 @@ DAT_RETURN dapls_ib_qp_free(IN DAPL_IA * ia_ptr, IN DAPL_EP * ep_ptr) ep_ptr, ep_ptr->qp_handle); if (ep_ptr->cm_handle != NULL) { +dapl_log(DAPL_DBG_TYPE_ERR, "dapls_ib_qp_free - calling dapls_ib_cm_free\n"); dapls_ib_cm_free(ep_ptr->cm_handle, ep_ptr); } @@ -481,8 +482,13 @@ dapls_modify_qp_state(IN ib_qp_handle_t qp_handle, qp_attr.pkey_index, qp_attr.port_num, qp_attr.qp_access_flags, qp_attr.qkey); break; - default: + case IBV_QPS_RESET: + break; + case IBV_QPS_ERR: break; + default: + dapl_log(DAPL_DBG_TYPE_ERR, "invalid QP state 0x%x!\n", qp_state); + return DAT_SUCCESS; } ret = ibv_modify_qp(qp_handle, &qp_attr, mask); diff --git a/trunk/ulp/dapl2/dapl/openib_scm/cm.c b/trunk/ulp/dapl2/dapl/openib_scm/cm.c index 1d7a8dc4..f95b3560 100644 --- a/trunk/ulp/dapl2/dapl/openib_scm/cm.c +++ b/trunk/ulp/dapl2/dapl/openib_scm/cm.c @@ -311,6 +311,8 @@ void dapls_ib_cm_free(dp_ib_cm_handle_t cm_ptr, DAPL_EP *ep) /* cleanup, never made it to work queue */ dapl_os_lock(&cm_ptr->lock); +if (cm_ptr->state == DCM_DESTROY) + dapl_log(DAPL_DBG_TYPE_ERR, "dapls_ib_cm_free - destroying twice!\n"); if (cm_ptr->state == DCM_INIT) { if (cm_ptr->socket != DAPL_INVALID_SOCKET) { shutdown(cm_ptr->socket, SHUT_RDWR); @@ -391,7 +393,7 @@ notify_thread: /* queue socket for processing CM work */ static void dapli_cm_queue(struct ib_cm_handle *cm_ptr) { - DAPL_HCA *hca_ptr = cm_ptr->hca; + DAPL_HCA *hca = cm_ptr->hca; /* add to work queue for cr thread processing */ dapl_llist_init_entry((DAPL_LLIST_ENTRY *) & cm_ptr->entry); @@ -411,12 +413,43 @@ static void dapli_cm_queue(struct ib_cm_handle *cm_ptr) DAT_RETURN dapli_socket_disconnect(dp_ib_cm_handle_t cm_ptr) { DAPL_EP *ep_ptr = cm_ptr->ep; - DAT_UINT32 disc_data = htonl(0xdead); + DAT_UINT32 disc_data = htonl(0xbad); if (ep_ptr == NULL) return DAT_SUCCESS; +dapl_os_lock(&cm_ptr->lock); +if (cm_ptr->ep->header.magic != DAPL_MAGIC_EP) { + dapl_log(DAPL_DBG_TYPE_ERR, "bad ep magic!!!\n"); + dapl_os_unlock(&cm_ptr->lock); + return DAT_SUCCESS; +} +if (cm_ptr->ep->qp_handle->qp_context != cm_ptr->ep) { + dapl_log(DAPL_DBG_TYPE_ERR, "bad qp_handle->qp_context!!!\n"); + dapl_os_unlock(&cm_ptr->lock); + return DAT_SUCCESS; +} +if (ep_ptr->qp_handle->srq) { + dapl_log(DAPL_DBG_TYPE_ERR, "qp handle has srq??? likely bad handle\n"); + dapl_os_unlock(&cm_ptr->lock); + return DAT_SUCCESS; +} +if (ep_ptr->qp_handle->send_cq != ep_ptr->qp_handle->recv_cq) { + dapl_log(DAPL_DBG_TYPE_ERR, "qp send/recv cqs do not match\n"); + dapl_os_unlock(&cm_ptr->lock); + return DAT_SUCCESS; +} +if (ep_ptr->qp_handle->context != ep_ptr->qp_handle->pd->context) { + dapl_log(DAPL_DBG_TYPE_ERR, "qp verbs != pd verbs\n"); + dapl_os_unlock(&cm_ptr->lock); + return DAT_SUCCESS; +} +if (ep_ptr->qp_handle->qp_type != IBV_QPT_RC) { + dapl_log(DAPL_DBG_TYPE_ERR, "qp type is invalid\n"); + dapl_os_unlock(&cm_ptr->lock); + return DAT_SUCCESS; +} - dapl_os_lock(&cm_ptr->lock); +// dapl_os_lock(&cm_ptr->lock); if (cm_ptr->state != DCM_CONNECTED) { dapl_os_unlock(&cm_ptr->lock); return DAT_SUCCESS; @@ -655,6 +688,7 @@ static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr) dapli_socket_connect(cm_ptr->ep, (DAT_IA_ADDRESS_PTR)&cm_ptr->addr, ntohs(((struct sockaddr_in *)&cm_ptr->addr)->sin_port) - 1000, ntohs(cm_ptr->msg.p_size), &cm_ptr->msg.p_data); +dapl_log(DAPL_DBG_TYPE_ERR, "dapli_socket_connect_rtu\n"); dapls_ib_cm_free(cm_ptr, NULL); return; } @@ -1460,6 +1494,8 @@ dapls_ib_remove_conn_listener(IN DAPL_IA * ia_ptr, IN DAPL_SP * sp_ptr) if (cm_ptr != NULL) { /* cr_thread will free */ dapl_os_lock(&cm_ptr->lock); +if (cm_ptr->state == DCM_DESTROY) + dapl_log(DAPL_DBG_TYPE_ERR, "dapls_ib_remove_conn_listener - destroying twice!\n"); cm_ptr->state = DCM_DESTROY; sp_ptr->cm_srvc_handle = NULL; send(cm_ptr->hca->ib_trans.scm[1], "w", sizeof "w", 0); diff --git a/trunk/ulp/librdmacm/src/cma.cpp b/trunk/ulp/librdmacm/src/cma.cpp index cde309b8..9205c56f 100644 --- a/trunk/ulp/librdmacm/src/cma.cpp +++ b/trunk/ulp/librdmacm/src/cma.cpp @@ -506,25 +506,6 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, return 0; } -static int -ucma_resolve_ibat_path(struct rdma_cm_id *id, int timeout_ms, - IBAT_PATH_BLOB *path) -{ - HRESULT hr; - - do { - hr = IBAT::Resolve(&id->route.addr.src_addr, &id->route.addr.dst_addr, - path); - if (hr != E_PENDING || timeout_ms <= 0) { - break; - } - timeout_ms -= 10; - Sleep(10); - } while (timeout_ms > 0); - - return hr; -} - __declspec(dllexport) int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) { @@ -532,7 +513,8 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) IBAT_PATH_BLOB path; HRESULT hr; - hr = ucma_resolve_ibat_path(id, timeout_ms, &path); + hr = IBAT::ResolvePath(&id->route.addr.src_addr, &id->route.addr.dst_addr, + path, timeout_ms); if (FAILED(hr)) { return hr; } diff --git a/trunk/ulp/netdirect/user/nd_connect.cpp b/trunk/ulp/netdirect/user/nd_connect.cpp index aa46adac..6ee6fa0f 100644 --- a/trunk/ulp/netdirect/user/nd_connect.cpp +++ b/trunk/ulp/netdirect/user/nd_connect.cpp @@ -138,12 +138,13 @@ Connect(INDEndpoint* pEndpoint, } else { addr.Sin6.sin6_port = LocalPort; } - hr = m_pWvConnEp->BindAddress(&addr.Sa); + + hr = IBAT::ResolvePath(&addr.Sa, pAddress, &path, IBAT_MAX_TIMEOUT); if (FAILED(hr)) { goto out; } - hr = IBAT::Resolve(&addr.Sa, pAddress, &path); + hr = m_pWvConnEp->BindAddress(&addr.Sa); if (FAILED(hr)) { goto out; } diff --git a/trunk/ulp/wsd/user/ibsp_ip.c b/trunk/ulp/wsd/user/ibsp_ip.c index a0afe894..0da0c0e8 100644 --- a/trunk/ulp/wsd/user/ibsp_ip.c +++ b/trunk/ulp/wsd/user/ibsp_ip.c @@ -270,20 +270,9 @@ query_guid_address( HRESULT hr; IBSP_ENTER( IBSP_DBG_HW ); + hr = IbatResolvePath(p_src_addr, p_dest_addr, (IBAT_PATH_BLOB*)&path, + IBAT_MAX_TIMEOUT); - for(;;) - { - hr = IbatResolve( - p_src_addr, - p_dest_addr, - (IBAT_PATH_BLOB*)&path - ); - - if( hr != E_PENDING ) - break; - - Sleep( 100 ); - } if( hr == S_OK ) { *port_guid = path.dgid.unicast.interface_id;