From 3315b8148ba52da67e422cf9afe6fa35d2161885 Mon Sep 17 00:00:00 2001 From: Arlin Davis Date: Tue, 1 Oct 2013 14:03:51 -0700 Subject: [PATCH] ucm, scm: UD mode triggers list_head assert with large scale alltoall test 1024+ ranks, IMB alltoall may hit assert when running Intel MPI in UD mode. CR clean up was implemented with EP to CR references still linked. During cr_accept, the CR remote_ia_address is linked to EP object by mistake with UD mode. UD mode my have multiple CRs per EP so no direct mappings to CR memory can exist unless RC mode which always has one EP to CR mapping. In scm, ucm: for CM object free with CR references the search and unlinking from SP must be under SP lock to serialize. Also, cleanup thread wakeup logic to only trigger the thread if reference count indicates the need for more processing. Signed-off-by: Arlin Davis --- dapl/common/dapl_cr_accept.c | 10 ++++++---- dapl/openib_scm/cm.c | 14 +++++++------- dapl/openib_ucm/cm.c | 26 +++++++++++++------------- dapl/openib_ucm/dapl_ib_util.h | 1 + 4 files changed, 27 insertions(+), 24 deletions(-) diff --git a/dapl/common/dapl_cr_accept.c b/dapl/common/dapl_cr_accept.c index 5df9458..4e48fea 100644 --- a/dapl/common/dapl_cr_accept.c +++ b/dapl/common/dapl_cr_accept.c @@ -180,11 +180,13 @@ dapl_cr_accept(IN DAT_CR_HANDLE cr_handle, entry_ep_state = ep_ptr->param.ep_state; entry_ep_handle = cr_ptr->param.local_ep_handle; ep_ptr->param.ep_state = DAT_EP_STATE_COMPLETION_PENDING; - ep_ptr->cr_ptr = cr_ptr; - ep_ptr->param.remote_ia_address_ptr = - cr_ptr->param.remote_ia_address_ptr; - cr_ptr->param.local_ep_handle = ep_handle; + /* UD supports multiple CR's per EP, provider will manage CR's */ + if (ep_ptr->param.ep_attr.service_type == DAT_SERVICE_TYPE_RC) { + ep_ptr->cr_ptr = cr_ptr; + ep_ptr->param.remote_ia_address_ptr = cr_ptr->param.remote_ia_address_ptr; + } + cr_ptr->param.local_ep_handle = ep_handle; dapl_os_unlock(&ep_ptr->header.lock); dat_status = dapls_ib_accept_connection(cr_handle, diff --git a/dapl/openib_scm/cm.c b/dapl/openib_scm/cm.c index d4964bd..f7838f2 100644 --- a/dapl/openib_scm/cm.c +++ b/dapl/openib_scm/cm.c @@ -439,25 +439,25 @@ void dapls_cm_free(dp_ib_cm_handle_t cm_ptr) cm_ptr, dapl_cm_state_str(cm_ptr->state), cm_ptr->ep, sp_ptr, cm_ptr->ref_count); + dapl_os_lock(&cm_ptr->lock); if (sp_ptr && cm_ptr->state == DCM_CONNECTED && cm_ptr->msg.daddr.ib.qp_type == IBV_QPT_UD) { - DAPL_CR *cr_ptr = dapl_sp_search_cr(sp_ptr, cm_ptr); + DAPL_CR *cr_ptr; + + dapl_os_lock(&sp_ptr->header.lock); + cr_ptr = dapl_sp_search_cr(sp_ptr, cm_ptr); if (cr_ptr != NULL) { - dapl_os_lock(&sp_ptr->header.lock); dapl_sp_remove_cr(sp_ptr, cr_ptr); - dapl_os_unlock(&sp_ptr->header.lock); dapls_cr_free(cr_ptr); } + dapl_os_unlock(&sp_ptr->header.lock); } /* free from internal workq, wait until EP is last ref */ - dapl_os_lock(&cm_ptr->lock); cm_ptr->state = DCM_FREE; - dapl_os_unlock(&cm_ptr->lock); - dapli_cm_thread_signal(cm_ptr); - dapl_os_lock(&cm_ptr->lock); if (cm_ptr->ref_count != 1) { + dapli_cm_thread_signal(cm_ptr); dapl_os_unlock(&cm_ptr->lock); dapl_os_wait_object_wait(&cm_ptr->event, DAT_TIMEOUT_INFINITE); dapl_os_lock(&cm_ptr->lock); diff --git a/dapl/openib_ucm/cm.c b/dapl/openib_ucm/cm.c index 05cff10..d6f923e 100644 --- a/dapl/openib_ucm/cm.c +++ b/dapl/openib_ucm/cm.c @@ -779,19 +779,19 @@ void dapli_cm_free(dp_ib_cm_handle_t cm) cm, dapl_cm_state_str(cm->state), cm->ep, sp_ptr, sp_ptr ? sp_ptr->cr_list_count:0, cm->ref_count); + dapl_os_lock(&cm->lock); if (sp_ptr && cm->state == DCM_CONNECTED && cm->msg.daddr.ib.qp_type == IBV_QPT_UD) { - DAPL_CR *cr_ptr = dapl_sp_search_cr(sp_ptr, cm); - dapl_log(DAPL_DBG_TYPE_CM, " dapli_cm_free: UD CR %p\n", cr_ptr); - if (cr_ptr != NULL) { - dapl_os_lock(&sp_ptr->header.lock); - dapl_sp_remove_cr(sp_ptr, cr_ptr); - dapl_os_unlock(&sp_ptr->header.lock); - dapls_cr_free(cr_ptr); + dapl_os_lock(&sp_ptr->header.lock); + cm->cr = dapl_sp_search_cr(sp_ptr, cm); + dapl_log(DAPL_DBG_TYPE_CM, " dapli_cm_free: UD CR %p\n", cm->cr); + + if (cm->cr != NULL) { + dapl_sp_remove_cr(sp_ptr, cm->cr); + /* free CR at EP destroy */ } + dapl_os_unlock(&sp_ptr->header.lock); } - - dapl_os_lock(&cm->lock); cm->state = DCM_FREE; dapls_thread_signal(&cm->hca->ib_trans.signal); dapl_os_unlock(&cm->lock); @@ -809,12 +809,12 @@ void dapls_cm_free(dp_ib_cm_handle_t cm) dapl_os_lock(&cm->lock); if (cm->state != DCM_FREE) cm->state = DCM_FREE; - - dapl_os_unlock(&cm->lock); - dapls_thread_signal(&cm->hca->ib_trans.signal); - dapl_os_lock(&cm->lock); + if (cm->cr) + dapls_cr_free(cm->cr); + if (cm->ref_count != 1) { + dapls_thread_signal(&cm->hca->ib_trans.signal); dapl_os_unlock(&cm->lock); dapl_os_wait_object_wait(&cm->f_event, DAT_TIMEOUT_INFINITE); dapl_os_lock(&cm->lock); diff --git a/dapl/openib_ucm/dapl_ib_util.h b/dapl/openib_ucm/dapl_ib_util.h index b5bd082..469560e 100644 --- a/dapl/openib_ucm/dapl_ib_util.h +++ b/dapl/openib_ucm/dapl_ib_util.h @@ -48,6 +48,7 @@ struct ib_cm_handle struct dapl_hca *hca; struct dapl_sp *sp; struct dapl_ep *ep; + struct dapl_cr *cr; struct ibv_ah *ah; uint16_t p_size; /* accept p_data, for retries */ uint8_t p_data[DCM_MAX_PDATA_SIZE]; -- 2.46.0