]> git.openfabrics.org - ~ardavis/dapl.git/commitdiff
ucm: fix lock init bug in ucm_cm_find
authorArlin Davis <arlin.r.davis@intel.com>
Thu, 15 Oct 2009 16:19:45 +0000 (09:19 -0700)
committerArlin Davis <arlin.r.davis@intel.com>
Thu, 15 Oct 2009 16:19:45 +0000 (09:19 -0700)
the lock should be setup as pointer to lock
not lock structure. Cleanup lock and list
in cm_find function and cm_print function.

Add debug aid by passing process id in
msg resv area. cleanup cr references
and change to cm for consistency.

Signed-off-by: Arlin Davis <arlin.r.davis@intel.com>
dapl/openib_ucm/cm.c

index 9d702a8f86f9956a0e1d0c31fed30d880aed1d96..29f87b53e2a5a004153ed6db316680cd907566e0 100644 (file)
@@ -166,14 +166,14 @@ static void ucm_check_timers(dp_ib_cm_handle_t cm, int *timer)
                    (cm->hca->ib_trans.rep_time * cm->retries)) {
                        dapl_log(DAPL_DBG_TYPE_WARN,
                                 " CM_REQ retry %d [lid, port, qpn]:"
-                                " 0x%x %d 0x%x -> 0x%x %d 0x%x\n", 
+                                " %x %x %x -> %x %x %x \n", 
                                 cm->retries,
                                 ntohs(cm->msg.saddr.ib.lid), 
                                 ntohs(cm->msg.sport),
                                 ntohl(cm->msg.saddr.ib.qpn), 
                                 ntohs(cm->msg.daddr.ib.lid), 
                                 ntohs(cm->msg.dport),
-                                ntohl(cm->msg.dqpn)); 
+                                ntohl(cm->msg.dqpn));
                        dapl_os_unlock(&cm->lock);
                        dapli_cm_connect(cm->ep, cm);
                        return;
@@ -181,17 +181,20 @@ static void ucm_check_timers(dp_ib_cm_handle_t cm, int *timer)
                break;
        case DCM_RTU_PENDING: 
                *timer = cm->hca->ib_trans.cm_timer;  
-               if ((time - cm->timer)/1000 > cm->hca->ib_trans.rtu_time) {
+               if ((time - cm->timer)/1000 > 
+                   (cm->hca->ib_trans.rtu_time * cm->retries)) {
                        dapl_log(DAPL_DBG_TYPE_WARN,
                                 " CM_REPLY retry %d [lid, port, qpn]:"
-                                " 0x%x %d 0x%x -> 0x%x %d 0x%x\n", 
+                                " %x %x %x -> %x %x %x r_pid %x,%d\n", 
                                 cm->retries,
                                 ntohs(cm->msg.saddr.ib.lid), 
                                 ntohs(cm->msg.sport),
                                 ntohl(cm->msg.saddr.ib.qpn), 
                                 ntohs(cm->msg.daddr.ib.lid), 
                                 ntohs(cm->msg.dport),
-                                ntohl(cm->msg.daddr.ib.qpn));  
+                                ntohl(cm->msg.daddr.ib.qpn),  
+                                ntohl(*(DAT_UINT32*)cm->msg.resv),
+                                ntohl(*(DAT_UINT32*)cm->msg.resv)); 
                        dapl_os_unlock(&cm->lock);
                        ucm_reply(cm);
                        return;
@@ -204,14 +207,16 @@ static void ucm_check_timers(dp_ib_cm_handle_t cm, int *timer)
                    (cm->hca->ib_trans.rep_time)) {
                        dapl_log(DAPL_DBG_TYPE_WARN,
                                 " CM_DREQ retry %d [lid, port, qpn]:"
-                                " 0x%x %d 0x%x -> 0x%x %d 0x%x\n", 
+                                " %x %x %x -> %x %x %x r_pid %x,%d\n", 
                                 cm->retries,
                                 ntohs(cm->msg.saddr.ib.lid), 
                                 ntohs(cm->msg.sport),
                                 ntohl(cm->msg.saddr.ib.qpn), 
                                 ntohs(cm->msg.daddr.ib.lid), 
                                 ntohs(cm->msg.dport),
-                                ntohl(cm->msg.dqpn)); 
+                                ntohl(cm->msg.dqpn), 
+                                ntohl(*(DAT_UINT32*)cm->msg.resv),
+                                ntohl(*(DAT_UINT32*)cm->msg.resv)); 
                        dapl_os_unlock(&cm->lock);
                        dapli_cm_disconnect(cm);
                         return;
@@ -396,24 +401,24 @@ static void ucm_process_recv(ib_hca_transport_t *tp,
 dp_ib_cm_handle_t ucm_cm_find(ib_hca_transport_t *tp, ib_cm_msg_t *msg)
 {
        dp_ib_cm_handle_t cm, next, found = NULL;
-       struct dapl_llist_entry *list;
-       DAPL_OS_LOCK lock;
+       struct dapl_llist_entry **list;
+       DAPL_OS_LOCK *lock;
        int listenq = 0;
 
        /* conn list first, duplicate requests for DCM_REQ */
-       list = tp->list;
-       lock = tp->lock;
+       list = &tp->list;
+       lock = &tp->lock;
 
 retry_listenq:
-       dapl_os_lock(&lock);
-        if (!dapl_llist_is_empty(&list))
-               next = dapl_llist_peek_head(&list);
+       dapl_os_lock(lock);
+        if (!dapl_llist_is_empty(list))
+               next = dapl_llist_peek_head(list);
        else
                next = NULL;
 
        while (next) {
                cm = next;
-               next = dapl_llist_next_entry(&list,
+               next = dapl_llist_next_entry(list,
                                             (DAPL_LLIST_ENTRY *)&cm->entry);
                if (cm->state == DCM_DESTROY)
                        continue;
@@ -435,7 +440,7 @@ retry_listenq:
                                break; 
                        } else {
                                /* duplicate; bail and throw away */
-                               dapl_os_unlock(&lock);
+                               dapl_os_unlock(lock);
                                dapl_log(DAPL_DBG_TYPE_CM,
                                         " duplicate: op %s st %s [lid, port, qpn]:"
                                         " 0x%x %d 0x%x <- 0x%x %d 0x%x\n", 
@@ -451,13 +456,13 @@ retry_listenq:
                        }
                }
        }
-       dapl_os_unlock(&lock);
+       dapl_os_unlock(lock);
 
        /* no duplicate request on connq, check listenq for new request */
        if (ntohs(msg->op) == DCM_REQ && !listenq && !found) {
                listenq = 1;
-               list = tp->llist;
-               lock = tp->llock;
+               list = &tp->llist;
+               lock = &tp->llock;
                goto retry_listenq;
        }
 
@@ -608,6 +613,7 @@ dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep)
                goto bail;
 
        cm->msg.ver = htons(DCM_VER);
+       *(DAT_UINT32*)cm->msg.resv = htonl(dapl_os_getpid()); /* exchange PID for debugging */
        
        /* ACTIVE: init source address QP info from local EP */
        if (ep) {
@@ -648,7 +654,7 @@ static void ucm_ud_free(DAPL_EP *ep)
        DAPL_IA *ia = ep->header.owner_ia;
        DAPL_HCA *hca = NULL;
        ib_hca_transport_t *tp = &ia->hca_ptr->ib_trans;
-       dp_ib_cm_handle_t cr, next;
+       dp_ib_cm_handle_t cm, next;
 
        dapl_os_lock(&tp->lock);
        if (!dapl_llist_is_empty((DAPL_LLIST_HEAD*)&tp->list))
@@ -657,17 +663,17 @@ static void ucm_ud_free(DAPL_EP *ep)
            next = NULL;
 
        while (next) {
-               cr = next;
+               cm = next;
                next = dapl_llist_next_entry((DAPL_LLIST_HEAD*)&tp->list,
-                                            (DAPL_LLIST_ENTRY*)&cr->entry);
-               if (cr->ep == ep)  {
+                                            (DAPL_LLIST_ENTRY*)&cm->entry);
+               if (cm->ep == ep)  {
                        dapl_dbg_log(DAPL_DBG_TYPE_EP,
-                                    " qp_free CR: ep %p cr %p\n", ep, cr);
-                       dapl_os_lock(&cr->lock);
-                       hca = cr->hca;
-                       cr->ep = NULL;
-                       cr->state = DCM_DESTROY;
-                       dapl_os_unlock(&cr->lock);
+                                    " qp_free CM: ep %p cm %p\n", ep, cm);
+                       dapl_os_lock(&cm->lock);
+                       hca = cm->hca;
+                       cm->ep = NULL;
+                       cm->state = DCM_DESTROY;
+                       dapl_os_unlock(&cm->lock);
                }
        }
        dapl_os_unlock(&tp->lock);
@@ -855,8 +861,12 @@ dapli_cm_connect(DAPL_EP *ep, dp_ib_cm_handle_t cm)
                dapl_os_unlock(&cm->lock);
 
 #ifdef DAPL_COUNTERS
-               if (g_dapl_dbg_type & DAPL_DBG_TYPE_CM_LIST)
+               /* called from check_timers in cm_thread, cm lock held */
+               if (g_dapl_dbg_type & DAPL_DBG_TYPE_CM_LIST) {
+                       dapl_os_unlock(&cm->hca->ib_trans.lock);
                        dapls_print_cm_list(ep->header.owner_ia);
+                       dapl_os_lock(&cm->hca->ib_trans.lock);
+               }
 #endif
                dapl_evd_connection_callback(cm, 
                                             IB_CME_DESTINATION_UNREACHABLE,
@@ -928,6 +938,7 @@ static void ucm_connect_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg)
                                 ntohs(msg->saddr.ib.lid), 
                                 ntohl(msg->saddr.ib.qpn), 
                                 ntohs(msg->sport));
+                       dapl_os_unlock(&cm->lock);
                        goto bail;
                }
                dapl_os_memcpy(cm->msg.p_data, 
@@ -1272,8 +1283,12 @@ static int ucm_reply(dp_ib_cm_handle_t cm)
                        
                dapl_os_unlock(&cm->lock);
 #ifdef DAPL_COUNTERS
-               if (g_dapl_dbg_type & DAPL_DBG_TYPE_CM_LIST)
+               /* called from check_timers in cm_thread, cm lock held */
+               if (g_dapl_dbg_type & DAPL_DBG_TYPE_CM_LIST) {
+                       dapl_os_unlock(&cm->hca->ib_trans.lock);
                        dapls_print_cm_list(dapl_llist_peek_head(&cm->hca->ia_list_head));
+                       dapl_os_lock(&cm->hca->ib_trans.lock);
+               }
 #endif
 #ifdef DAT_EXTENSIONS
                if (cm->msg.saddr.ib.qp_type == IBV_QPT_UD) {
@@ -1577,7 +1592,7 @@ dapls_ib_setup_conn_listener(IN DAPL_IA *ia,
 
        /* reserve local port, then allocate CM object */
        if (!ucm_get_port(&ia->hca_ptr->ib_trans, (uint16_t)sid)) {
-               dapl_dbg_log(DAPL_DBG_TYPE_CM,
+               dapl_dbg_log(DAPL_DBG_TYPE_WARN,
                             " listen: ERROR %s on conn_qual 0x%x\n",
                             strerror(errno), sid);
                return DAT_CONN_QUAL_IN_USE;
@@ -2074,16 +2089,16 @@ void dapls_print_cm_list(IN DAPL_IA *ia_ptr)
        /* Print in process CM's for this IA, if debug type set */
        int i = 0;
        dp_ib_cm_handle_t cm, next_cm;
-       struct dapl_llist_entry *list;
-       DAPL_OS_LOCK lock;
+       struct dapl_llist_entry **list;
+       DAPL_OS_LOCK *lock;
        
        /* LISTEN LIST */
-       list = ia_ptr->hca_ptr->ib_trans.llist;
-       lock = ia_ptr->hca_ptr->ib_trans.llock;
+       list = &ia_ptr->hca_ptr->ib_trans.llist;
+       lock = &ia_ptr->hca_ptr->ib_trans.llock;
 
-       dapl_os_lock(&lock);
-       if (!dapl_llist_is_empty((DAPL_LLIST_HEAD*)&list))
-               next_cm = dapl_llist_peek_head((DAPL_LLIST_HEAD*)&list);
+       dapl_os_lock(lock);
+       if (!dapl_llist_is_empty((DAPL_LLIST_HEAD*)list))
+               next_cm = dapl_llist_peek_head((DAPL_LLIST_HEAD*)list);
        else
                next_cm = NULL;
 
@@ -2093,31 +2108,32 @@ void dapls_print_cm_list(IN DAPL_IA *ia_ptr)
                next_cm = dapl_llist_next_entry((DAPL_LLIST_HEAD*)list,
                                                (DAPL_LLIST_ENTRY*)&cm->entry);
 
-               printf( "  LISTEN[%d]: sp %p %s uCM_QP: 0x%x %d 0x%x\n",
+               printf( "  LISTEN[%d]: sp %p %s uCM_QP: 0x%x %d 0x%x l_pid %x,%d\n",
                        i, cm->sp, dapl_cm_state_str(cm->state),
                        ntohs(cm->msg.saddr.ib.lid), ntohs(cm->msg.sport),
-                       ntohl(cm->msg.sqpn));
+                       ntohl(cm->msg.sqpn), ntohl(*(DAT_UINT32*)cm->msg.resv), 
+                       ntohl(*(DAT_UINT32*)cm->msg.resv)); 
                i++;
        }
-       dapl_os_unlock(&lock);
+       dapl_os_unlock(lock);
 
        /* CONNECTION LIST */
-       list = ia_ptr->hca_ptr->ib_trans.list;
-       lock = ia_ptr->hca_ptr->ib_trans.lock;
+       list = &ia_ptr->hca_ptr->ib_trans.list;
+       lock = &ia_ptr->hca_ptr->ib_trans.lock;
 
-       dapl_os_lock(&lock);
-       if (!dapl_llist_is_empty((DAPL_LLIST_HEAD*)&list))
-               next_cm = dapl_llist_peek_head((DAPL_LLIST_HEAD*)&list);
+       dapl_os_lock(lock);
+       if (!dapl_llist_is_empty((DAPL_LLIST_HEAD*)list))
+               next_cm = dapl_llist_peek_head((DAPL_LLIST_HEAD*)list);
        else
                next_cm = NULL;
 
         while (next_cm) {
                cm = next_cm;
-               next_cm = dapl_llist_next_entry((DAPL_LLIST_HEAD*)&list,
+               next_cm = dapl_llist_next_entry((DAPL_LLIST_HEAD*)list,
                                                (DAPL_LLIST_ENTRY*)&cm->entry);
 
                printf( "  CONN[%d]: ep %p cm %p %s %s"
-                       "  0x%x %d 0x%x %s 0x%x %d 0x%x\n",
+                       "  %x %x %x %s %x %x %x r_pid %x,%d\n",
                        i, cm->ep, cm,
                        cm->msg.saddr.ib.qp_type == IBV_QPT_RC ? "RC" : "UD",
                        dapl_cm_state_str(cm->state),
@@ -2127,10 +2143,12 @@ void dapls_print_cm_list(IN DAPL_IA *ia_ptr)
                        cm->sp ? "<-" : "->",
                        ntohs(cm->msg.daddr.ib.lid),
                        ntohs(cm->msg.dport),
-                       ntohl(cm->msg.daddr.ib.qpn));
+                       ntohl(cm->msg.daddr.ib.qpn),
+                       ntohs(cm->msg.op) == DCM_REQ ? 0 : ntohl(*(DAT_UINT32*)cm->msg.resv), 
+                       ntohs(cm->msg.op) == DCM_REQ ? 0 : ntohl(*(DAT_UINT32*)cm->msg.resv)); 
                i++;
        }
        printf("\n");
-       dapl_os_unlock(&lock);
+       dapl_os_unlock(lock);
 }
 #endif