]> git.openfabrics.org - ~ardavis/dapl.git/commitdiff
dapl scm: fix corner case that delivers duplicate disconnect events
authorArlin Davis <arlin.r.davis@intel.com>
Thu, 21 Aug 2008 01:51:00 +0000 (18:51 -0700)
committerArlin Davis <arlin.r.davis@intel.com>
Thu, 21 Aug 2008 01:51:00 +0000 (18:51 -0700)
Signed-off by: Arlin Davis ardavis@ichips.intel.com

dapl/openib_scm/dapl_ib_cm.c

index 5ba5ddcf98055c921c6e4f0a1f7f01742122a9aa..d2982c7bd2ba4a803bb58edae6bf37549380ee6b 100644 (file)
@@ -160,7 +160,8 @@ dapli_socket_disconnect(dp_ib_cm_handle_t   cm_ptr)
        
        dapl_os_lock(&cm_ptr->lock);
        if ((cm_ptr->state == SCM_INIT) ||
-           (cm_ptr->state == SCM_DISCONNECTED)) {
+           (cm_ptr->state == SCM_DISCONNECTED) ||
+           (cm_ptr->state == SCM_DESTROY)) {
                dapl_os_unlock(&cm_ptr->lock);
                return DAT_SUCCESS;
        } else {
@@ -171,7 +172,6 @@ dapli_socket_disconnect(dp_ib_cm_handle_t   cm_ptr)
                        cm_ptr->socket = -1;
                }
                cm_ptr->state = SCM_DISCONNECTED;
-               write(g_scm_pipe[1], "w", sizeof "w");
        }
        dapl_os_unlock(&cm_ptr->lock);
 
@@ -188,12 +188,7 @@ dapli_socket_disconnect(dp_ib_cm_handle_t  cm_ptr)
                                             ep_ptr);
        }       
 
-       /* remove reference from endpoint */
-       ep_ptr->cm_handle = NULL;
-       
-       /* schedule destroy */
-
-
+       /* scheduled destroy via disconnect clean in callback */
        return DAT_SUCCESS;
 }
 
@@ -208,8 +203,11 @@ dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err)
        struct dapl_ep  *ep_ptr = cm_ptr->ep;
 
        if (err) {
-               dapl_log(DAPL_DBG_TYPE_ERR, " connect: socket ERR %s\n", 
-                        strerror(err)); 
+               dapl_log(DAPL_DBG_TYPE_ERR,
+                        " CONN_PENDING: socket ERR %s -> %s\n",
+                        strerror(err),
+                        inet_ntoa(((struct sockaddr_in *)
+                        ep_ptr->param.remote_ia_address_ptr)->sin_addr));
                goto bail;
        }
        dapl_dbg_log(DAPL_DBG_TYPE_EP,
@@ -988,8 +986,6 @@ dapls_ib_disconnect(
        IN      DAPL_EP                 *ep_ptr,
        IN      DAT_CLOSE_FLAGS         close_flags)
 {
-       dp_ib_cm_handle_t       cm_ptr = ep_ptr->cm_handle;
-
        dapl_dbg_log (DAPL_DBG_TYPE_EP,
                        "dapls_ib_disconnect(ep_handle %p ....)\n",
                        ep_ptr);
@@ -997,10 +993,11 @@ dapls_ib_disconnect(
        /* reinit to modify QP state */
        dapls_ib_reinit_ep(ep_ptr);
 
-       if (cm_ptr == NULL) 
+       if (ep_ptr->cm_handle == NULL ||
+           ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECTED)
                return DAT_SUCCESS;
        else
-               return(dapli_socket_disconnect(cm_ptr));
+               return(dapli_socket_disconnect(ep_ptr->cm_handle));
 }
 
 /*
@@ -1027,7 +1024,10 @@ dapls_ib_disconnect_clean (
        IN  DAT_BOOLEAN                 active,
        IN  const ib_cm_events_t        ib_cm_event )
 {
-    return;
+       if (ep_ptr->cm_handle)
+               dapli_cm_destroy(ep_ptr->cm_handle);
+
+       return;
 }
 
 /*
@@ -1095,6 +1095,7 @@ dapls_ib_remove_conn_listener (
                        cm_ptr->socket = -1;
                }
                /* cr_thread will free */
+               cm_ptr->state = SCM_DESTROY;
                sp_ptr->cm_srvc_handle = NULL;
                write(g_scm_pipe[1], "w", sizeof "w");
        }
@@ -1437,6 +1438,7 @@ void cr_thread(void *arg)
        idx=0;
        ufds[idx].fd = g_scm_pipe[0]; /* wakeup and process work */
         ufds[idx].events = POLLIN;
+       ufds[idx].revents = 0;
 
        if (!dapl_llist_is_empty(&hca_ptr->ib_trans.list))
             next_cr = dapl_llist_peek_head (&hca_ptr->ib_trans.list);
@@ -1445,7 +1447,7 @@ void cr_thread(void *arg)
 
        while (next_cr) {
            cr = next_cr;
-           if ((cr->socket == -1) || 
+           if ((cr->socket == -1 && cr->state == SCM_DESTROY) ||
                hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) {
 
                dapl_dbg_log(DAPL_DBG_TYPE_CM," cr_thread: Free %p\n", cr);
@@ -1465,6 +1467,7 @@ void cr_thread(void *arg)
                
            /* Add to ufds for poll, check for immediate work */
            ufds[++idx].fd = cr->socket; /* add listen or cr */
+           ufds[idx].revents = 0;
            if (cr->state == SCM_CONN_PENDING)
                ufds[idx].events = POLLOUT;
            else
@@ -1474,7 +1477,7 @@ void cr_thread(void *arg)
            dapl_dbg_log(DAPL_DBG_TYPE_CM," poll cr=%p, fd=%d,%d\n", 
                                cr, cr->socket, ufds[idx].fd);
            dapl_os_unlock(&hca_ptr->ib_trans.lock);
-           ret = poll(&ufds[idx],1,1);
+           ret = poll(&ufds[idx],1,0);
            dapl_dbg_log(DAPL_DBG_TYPE_CM,
                         " poll wakeup ret=%d cr->st=%d"
                         " ev=0x%x fd=%d\n",
@@ -1505,13 +1508,22 @@ void cr_thread(void *arg)
                        if (!ret)
                                dapli_socket_connected(cr,opt);
                        else
-                               dapli_socket_connected(cr,EFAULT);
+                               dapli_socket_connected(cr,errno);
+               } else {
+                       dapl_log(DAPL_DBG_TYPE_WARN,
+                                " CM poll ERR, wrong state(%d) -> %s SKIP\n",
+                                cr->state,
+                                inet_ntoa(((struct sockaddr_in*)
+                                       &cr->dst.ia_address)->sin_addr));
                }
            } else if (ret != 0) {
-               dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                            " cr_thread(cr=%p) st=%d poll ERR= %s\n",
-                            cr,cr->state,strerror(errno));
-               /* POLLUP or poll error case, issue event if connected */
+               dapl_log(DAPL_DBG_TYPE_CM,
+                        " CM poll warning %s, ret=%d revnt=%x st=%d -> %s\n",
+                        strerror(errno), ret, ufds[idx].revents, cr->state,
+                        inet_ntoa(((struct sockaddr_in*)
+                               &cr->dst.ia_address)->sin_addr));
+
+               /* POLLUP, NVAL, or poll error, issue event if connected */
                if (cr->state == SCM_CONNECTED)
                        dapli_socket_disconnect(cr);
            }