From ad43b8d3ca9f67d3231525b2808776719686deba Mon Sep 17 00:00:00 2001 From: Arlin Davis Date: Wed, 12 Aug 2015 17:30:23 -0700 Subject: [PATCH] ucm: add cluster size environments to adjust CM timers Signed-off-by: Arlin Davis --- dapl/openib_ucm/dapl_ib_util.h | 7 +++++++ dapl/openib_ucm/device.c | 25 +++++++++++++++++++------ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/dapl/openib_ucm/dapl_ib_util.h b/dapl/openib_ucm/dapl_ib_util.h index ece9c88..ac74bab 100644 --- a/dapl/openib_ucm/dapl_ib_util.h +++ b/dapl/openib_ucm/dapl_ib_util.h @@ -57,6 +57,10 @@ #define UCM_ARRAY_IDX(idx, abits) (idx >> abits) #define UCM_ENTRY_IDX(idx, abits) (idx & (abits - 1)) +/* Default UCM timers set for small cluster */ +#define UCM_DEF_NODES 32 +#define UCM_DEF_PPN 32 +#define UCM_DEF_THRESHOLD (UCM_DEF_NODES * UCM_DEF_PPN) /* DAPL CM objects MUST include list_entry, ref_count, event for EP linking */ struct ib_cm_handle @@ -125,6 +129,9 @@ typedef struct _ib_hca_transport int wait_time; int dreq_cnt; int drep_time; + int nodes; + int ppn; + int threshold; DAPL_OS_LOCK slock; int s_hd; int s_tl; diff --git a/dapl/openib_ucm/device.c b/dapl/openib_ucm/device.c index 94ce812..98693b2 100644 --- a/dapl/openib_ucm/device.c +++ b/dapl/openib_ucm/device.c @@ -517,17 +517,30 @@ static int ucm_service_create(IN DAPL_HCA *hca) int hlen = sizeof(struct ibv_grh); /* hdr included with UD recv */ char *rbuf; - /* setup CM timers and queue sizes */ + /* setup CM timers and queue sizes based on cluster size */ + tp->nodes = dapl_os_get_env_val("DAPL_NETWORK_NODES", UCM_DEF_NODES); + tp->ppn = dapl_os_get_env_val("DAPL_NETWORK_PPN", UCM_DEF_PPN); + tp->threshold = dapl_os_get_env_val("DAPL_NETWORK_TRESHOLD", UCM_DEF_THRESHOLD); tp->retries = dapl_os_get_env_val("DAPL_UCM_RETRY", DCM_RETRY_CNT); - tp->rep_time = dapl_os_get_env_val("DAPL_UCM_REP_TIME", DCM_REP_TIME); - tp->rtu_time = dapl_os_get_env_val("DAPL_UCM_RTU_TIME", DCM_RTU_TIME); tp->wait_time = dapl_os_get_env_val("DAPL_UCM_WAIT_TIME", DCM_WAIT_TIME); - tp->qpe = dapl_os_get_env_val("DAPL_UCM_QP_SIZE", DCM_QP_SIZE); - tp->cqe = dapl_os_get_env_val("DAPL_UCM_CQ_SIZE", DCM_CQ_SIZE); - tp->burst = dapl_os_get_env_val("DAPL_UCM_TX_BURST", DCM_TX_BURST); tp->dreq_cnt = dapl_os_get_env_val("DAPL_UCM_DREQ_RETRY", DCM_DREQ_CNT); tp->drep_time = dapl_os_get_env_val("DAPL_UCM_DREP_TIME", DCM_DREP_TIME); tp->cm_timer = dapl_os_get_env_val("DAPL_UCM_TIMER", DCM_CM_TIMER); + + if ((tp->nodes * tp->ppn) <= tp->threshold) { + tp->rep_time = dapl_os_get_env_val("DAPL_UCM_REP_TIME", DCM_REP_TIME); + tp->rtu_time = dapl_os_get_env_val("DAPL_UCM_RTU_TIME", DCM_RTU_TIME); + tp->qpe = dapl_os_get_env_val("DAPL_UCM_QP_SIZE", DCM_QP_SIZE); + tp->cqe = dapl_os_get_env_val("DAPL_UCM_CQ_SIZE", DCM_CQ_SIZE); + tp->burst = dapl_os_get_env_val("DAPL_UCM_TX_BURST", DCM_TX_BURST); + } else { + tp->rep_time = dapl_os_get_env_val("DAPL_UCM_REP_TIME", DCM_REP_TIME*10); + tp->rtu_time = dapl_os_get_env_val("DAPL_UCM_RTU_TIME", DCM_RTU_TIME*10); + tp->qpe = dapl_os_get_env_val("DAPL_UCM_QP_SIZE", DCM_QP_SIZE*10); + tp->cqe = dapl_os_get_env_val("DAPL_UCM_CQ_SIZE", DCM_CQ_SIZE*10); + tp->burst = dapl_os_get_env_val("DAPL_UCM_TX_BURST", DCM_TX_BURST*2); + } + /* default = 11-bit, 2KB entries; 18 bit, 256KB total */ tp->cm_entry_bits = dapl_os_get_env_val("DAPL_UCM_ENTRY_BITS", UCM_ENTRY_BITS); tp->cm_array_bits = DAPL_MAX(dapl_os_get_env_val("DAPL_UCM_ARRAY_BITS", UCM_ARRAY_BITS), tp->cm_entry_bits); -- 2.46.0