]> git.openfabrics.org - ~ardavis/dapl.git/commitdiff
ucm: add time wait override capability for CM services
authorArlin Davis <arlin.r.davis@intel.com>
Tue, 9 Dec 2014 23:35:59 +0000 (15:35 -0800)
committerArlin Davis <arlin.r.davis@intel.com>
Tue, 9 Dec 2014 23:35:59 +0000 (15:35 -0800)
New environment variable DAPL_UCM_WAIT_TIME (ms) to
override the default wait_time for CM services.
Default setting is 60 seconds.

Signed-off-by: Arlin Davis <arlin.r.davis@intel.com>
dapl/openib_common/dapl_ib_common.h
dapl/openib_ucm/cm.c
dapl/openib_ucm/dapl_ib_util.h
dapl/openib_ucm/device.c

index c1b9267b59609d4eaa64528171053459eb5ff31e..d5b26ec5d2daa683e2fcd5c4410ed1e3723f4c39 100644 (file)
@@ -225,6 +225,7 @@ typedef uint16_t            ib_hca_port_t;
 #define DCM_RETRY_CNT   10
 #define DCM_REP_TIME    800    /* reply timeout in m_secs */
 #define DCM_RTU_TIME    800    /* rtu timeout in m_secs */
+#define DCM_WAIT_TIME   60000  /* wait timeout in m_secs */
 #define DCM_QP_SIZE     500     /* uCM tx, rx qp size */
 #define DCM_CQ_SIZE     500     /* uCM cq size */
 #define DCM_TX_BURST   50      /* uCM signal, every TX burst msgs posted */
index 141086d7b1bcc7f61f0ce14c3baf51f0a231ef62..04d5eacd7d21fc930b9420e93daa6e2394c81e55 100644 (file)
@@ -231,38 +231,26 @@ static void ucm_check_timers(dp_ib_cm_handle_t cm, int *timer)
                *timer = cm->hca->ib_trans.cm_timer;
                if ((time - cm->timer)/1000 >
                     (cm->hca->ib_trans.rtu_time << cm->retries)) {
-                       dapl_log(DAPL_DBG_TYPE_CM,
-                                " CM_TIMEWAIT %d %p [lid, port, cqp, iqp]:"
-                                " %x %x %x %x -> %x %x %x %x r_pid %x"
-                                " Time(ms) %d > %d\n",
-                                cm->retries+1, cm,
-                                ntohs(cm->msg.saddr.ib.lid), ntohs(cm->msg.sport),
-                                ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr.ib.qpn),
-                                ntohs(cm->msg.daddr.ib.lid), ntohs(cm->msg.dport),
-                                ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr.ib.qpn),
-                                ntohl(cm->msg.d_id),
-                                (time - cm->timer)/1000,
-                                cm->hca->ib_trans.rtu_time << cm->retries);
                        cm->retries++;
-               }
-               if (cm->retries > 2) {
-                       dapl_log(DAPL_DBG_TYPE_CM_WARN,
-                                " CM_TIMEWAIT EXPIRED %d %p [lid, port, cqp, iqp]:"
-                                " %x %x %x %x -> %x %x %x %x r_pid %x"
-                                " Time(ms) %d > %d\n",
-                                cm->retries+1, cm,
-                                ntohs(cm->msg.saddr.ib.lid), ntohs(cm->msg.sport),
-                                ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr.ib.qpn),
-                                ntohs(cm->msg.daddr.ib.lid), ntohs(cm->msg.dport),
-                                ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr.ib.qpn),
-                                ntohl(cm->msg.d_id),
-                                (time - cm->timer)/1000,
-                                cm->hca->ib_trans.rtu_time << cm->retries);
-                       cm->ah = NULL;  /* consumer will free AH */
-                       cm->state = DCM_FREE;
-                       dapl_os_unlock(&cm->lock);
-                       dapl_ep_unlink_cm(cm->ep, cm);  /* last CM ref */
-                       return;
+                       if ((time - cm->timer)/1000 > cm->hca->ib_trans.wait_time) {
+                               dapl_log(DAPL_DBG_TYPE_CM_WARN,
+                                        " CM_TIMEWAIT EXPIRED %d %p [lid, port, cqp, iqp]:"
+                                        " %x %x %x %x -> %x %x %x %x r_pid %x"
+                                        " Time(ms) %d > %d\n",
+                                        cm->retries+1, cm,
+                                        ntohs(cm->msg.saddr.ib.lid), ntohs(cm->msg.sport),
+                                        ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr.ib.qpn),
+                                        ntohs(cm->msg.daddr.ib.lid), ntohs(cm->msg.dport),
+                                        ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr.ib.qpn),
+                                        ntohl(cm->msg.d_id),
+                                        (time - cm->timer)/1000,
+                                        cm->hca->ib_trans.wait_time);
+                               cm->ah = NULL;  /* consumer will free AH */
+                               cm->state = DCM_FREE;
+                               dapl_os_unlock(&cm->lock);
+                               dapl_ep_unlink_cm(cm->ep, cm);  /* last CM ref */
+                               return;
+                       }
                }
                break;
 
@@ -737,7 +725,7 @@ void dapls_cm_release(dp_ib_cm_handle_t cm)
        dapl_os_lock(&cm->lock);
        cm->ref_count--;
        if (cm->ref_count) {
-               if (cm->ref_count == 1)
+               if ((cm->ref_count == 1) && (cm->list_entry.list_head))
                        dapl_os_wait_object_wakeup(&cm->f_event);
                 dapl_os_unlock(&cm->lock);
                return;
index 69d61a4daa993310b73b62a7e2d2e9b396bb682f..a5b9c5200a3e7b8983435490c609b54acb64d654 100644 (file)
@@ -101,6 +101,7 @@ typedef struct _ib_hca_transport
        int                     cm_timer;
        int                     rep_time;
        int                     rtu_time;
+       int                     wait_time;
        DAPL_OS_LOCK            slock;  
        int                     s_hd;
        int                     s_tl;
index 75d730618675bc71082a9e5908da154af5c98676..79796ccb75965429e104626e9bf2e3d13a36e80f 100644 (file)
@@ -504,12 +504,11 @@ static int ucm_service_create(IN DAPL_HCA *hca)
        int hlen = sizeof(struct ibv_grh); /* hdr included with UD recv */
        char *rbuf;
 
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ucm_create: \n");
-
        /* setup CM timers and queue sizes */
        tp->retries = dapl_os_get_env_val("DAPL_UCM_RETRY", DCM_RETRY_CNT);
        tp->rep_time = dapl_os_get_env_val("DAPL_UCM_REP_TIME", DCM_REP_TIME);
        tp->rtu_time = dapl_os_get_env_val("DAPL_UCM_RTU_TIME", DCM_RTU_TIME);
+       tp->wait_time = dapl_os_get_env_val("DAPL_UCM_WAIT_TIME", DCM_WAIT_TIME);
        tp->cm_timer = DAPL_MIN(tp->rep_time,tp->rtu_time);
        tp->qpe = dapl_os_get_env_val("DAPL_UCM_QP_SIZE", DCM_QP_SIZE);
        tp->cqe = dapl_os_get_env_val("DAPL_UCM_CQ_SIZE", DCM_CQ_SIZE);
@@ -519,8 +518,10 @@ static int ucm_service_create(IN DAPL_HCA *hca)
                 goto bail;
         
         dapl_log(DAPL_DBG_TYPE_UTIL,
-                        " create_service: pd %p ctx %p handle 0x%x\n",
-                         tp->pd, tp->pd->context, tp->pd->handle);
+                 " UCM: CM service - pd %p ctx %p "
+                " Timers(ms): req %d rtu %d wait %d\n",
+                 tp->pd, tp->pd->context, tp->rep_time,
+                 tp->rtu_time, tp->wait_time);
 
        tp->rch = ibv_create_comp_channel(hca->ib_hca_handle);
        if (!tp->rch)