From 48c594ba32507e49e4c28be0fc7094df96b52340 Mon Sep 17 00:00:00 2001 From: Arlin Davis Date: Fri, 3 Dec 2010 14:24:40 -0800 Subject: [PATCH] ucm: hold lock when sending cm_msgs to sync timer start with packet send releasing the lock after setting start timer and before ucm_send could result in incorrect timeout on CM operations if thread is scheduled out when releasing lock. Signed-off-by: Arlin Davis --- dapl/openib_ucm/cm.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/dapl/openib_ucm/cm.c b/dapl/openib_ucm/cm.c index 25b3a39..fd3106a 100644 --- a/dapl/openib_ucm/cm.c +++ b/dapl/openib_ucm/cm.c @@ -969,13 +969,15 @@ dapli_cm_connect(DAPL_EP *ep, dp_ib_cm_handle_t cm) return DAT_ERROR(DAT_INVALID_ADDRESS, DAT_INVALID_ADDRESS_UNREACHABLE); } - dapl_os_unlock(&cm->lock); cm->msg.op = htons(DCM_REQ); dapl_os_get_time(&cm->timer); /* reply expected */ if (ucm_send(&cm->hca->ib_trans, &cm->msg, - &cm->msg.p_data, ntohs(cm->msg.p_size))) + &cm->msg.p_data, ntohs(cm->msg.p_size))) { + dapl_os_unlock(&cm->lock); goto bail; + } + dapl_os_unlock(&cm->lock); /* first time through, link EP and CM, put on work queue */ if (!cm->retries) { @@ -1183,7 +1185,6 @@ ud_bail: (DAT_COUNT)ntohs(cm->msg.p_size), (DAT_PVOID *)cm->msg.p_data, (DAT_PVOID *)&xevent); - dapli_cm_free(cm); /* still attached to EP */ } else #endif { @@ -1409,7 +1410,6 @@ static int ucm_reply(dp_ib_cm_handle_t cm) dapl_os_unlock(&cm->lock); #ifdef DAPL_COUNTERS - /* called from check_timers in cm_thread, cm lock held */ if (g_dapl_dbg_type & DAPL_DBG_TYPE_CM_LIST) { dapl_os_unlock(&cm->hca->ib_trans.lock); dapls_print_cm_list(dapl_llist_peek_head(&cm->hca->ia_list_head)); @@ -1437,12 +1437,14 @@ static int ucm_reply(dp_ib_cm_handle_t cm) NULL, 0, cm->sp); return -1; } + dapl_os_get_time(&cm->timer); /* RTU expected */ - dapl_os_unlock(&cm->lock); if (ucm_send(&cm->hca->ib_trans, &cm->msg, cm->p_data, cm->p_size)) { dapl_log(DAPL_DBG_TYPE_ERR," accept ERR: ucm reply send()\n"); + dapl_os_unlock(&cm->lock); return -1; } + dapl_os_unlock(&cm->lock); return 0; } @@ -1545,9 +1547,9 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data) dapl_os_memcpy(&cm->msg.saddr.ib.gid[0], &cm->hca->ib_trans.addr.ib.gid, 16); - /* - * UD: deliver p_data with REQ and EST event, keep REQ p_data in - * cm->msg.p_data and save REPLY accept data in cm->p_data for retries + /* + * UD: deliver p_data with REQ and EST event, keep REQ p_data in + * cm->msg.p_data and save REPLY accept data in cm->p_data for retries */ cm->p_size = p_size; dapl_os_memcpy(&cm->p_data, p_data, p_size); @@ -1556,16 +1558,19 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data) dapl_ep_link_cm(ep, cm); cm->ep = ep; cm->hca = ia->hca_ptr; - + + /* Send RTU and change state under CM lock */ dapl_os_lock(&cm->lock); - dapl_os_get_time(&cm->timer); /* RTU expected */ cm->state = DCM_RTU_PENDING; - dapl_os_unlock(&cm->lock); - - if (ucm_reply(cm)) { + dapl_os_get_time(&cm->timer); /* RTU expected */ + if (ucm_send(&cm->hca->ib_trans, &cm->msg, cm->p_data, cm->p_size)) { + dapl_log(DAPL_DBG_TYPE_ERR," accept ERR: ucm reply send()\n"); + dapl_os_unlock(&cm->lock); dapl_ep_unlink_cm(ep, cm); goto bail; } + dapl_os_unlock(&cm->lock); + dapl_dbg_log(DAPL_DBG_TYPE_CM, " PASSIVE: accepted!\n"); dapls_thread_signal(&cm->hca->ib_trans.signal); return DAT_SUCCESS; -- 2.41.0