]> git.openfabrics.org - ~ardavis/dapl.git/commitdiff
r6392: Fix for RDMA cm id destruction and debug builds.
authorJames Lentini <jlentini@netapp.com>
Mon, 10 Apr 2006 20:16:44 +0000 (20:16 +0000)
committerJames Lentini <jlentini@netapp.com>
Mon, 10 Apr 2006 20:16:44 +0000 (20:16 +0000)
Signed-off by: Arlin Davis <ardavis@ichips.intel.com>
Signed-off-by: James Lentini <jlentini@netapp.com>
dapl/openib_cma/dapl_ib_cm.c

index 6de3897fb6e8431ae50bb8499723b79360df6bfe..c1630b1ed58dd2822240274cf1a5463d47f822c6 100644 (file)
@@ -62,9 +62,9 @@
 /* local prototypes */
 static struct dapl_cm_id * dapli_req_recv(struct dapl_cm_id *conn, 
                                          struct rdma_cm_event *event);
-static int dapli_cm_active_cb(struct dapl_cm_id *conn, 
+static void dapli_cm_active_cb(struct dapl_cm_id *conn, 
                              struct rdma_cm_event *event);
-static int dapli_cm_passive_cb(struct dapl_cm_id *conn, 
+static void dapli_cm_passive_cb(struct dapl_cm_id *conn, 
                               struct rdma_cm_event *event);
 static void dapli_addr_resolve(struct dapl_cm_id *conn);
 static void dapli_route_resolve(struct dapl_cm_id *conn);
@@ -87,7 +87,9 @@ static inline uint64_t cpu_to_be64(uint64_t x) { return x; }
 static void dapli_addr_resolve(struct dapl_cm_id *conn)
 {
        int ret;
-       
+#ifdef DAPL_DBG
+       struct rdma_addr *ipaddr = &conn->cm_id->route.addr;
+#endif
        dapl_dbg_log(DAPL_DBG_TYPE_CM, 
                " addr_resolve: cm_id %p SRC %x DST %x\n", 
                conn->cm_id, 
@@ -110,8 +112,10 @@ static void dapli_addr_resolve(struct dapl_cm_id *conn)
 static void dapli_route_resolve(struct dapl_cm_id *conn)
 {
        int ret;
-       struct rdma_cm_id *cm_id = conn->cm_id;
-
+#ifdef DAPL_DBG
+       struct rdma_addr *ipaddr = &conn->cm_id->route.addr;
+       struct ib_addr   *ibaddr = &conn->cm_id->route.addr.addr.ibaddr;
+#endif
        dapl_dbg_log(DAPL_DBG_TYPE_CM, 
                " route_resolve: cm_id %p SRC %x DST %x PORT %d\n", 
                conn->cm_id, 
@@ -158,37 +162,51 @@ bail:
                                     NULL, conn->ep);
 }
 
+/* 
+ * Called from consumer thread via dat_ep_free().
+ * CANNOT be called from the async event processing thread
+ * dapli_cma_event_cb() since a cm_id reference is held and
+ * a deadlock will occur.
+ */
 void dapli_destroy_conn(struct dapl_cm_id *conn)
 {
-       int in_callback;
+       struct rdma_cm_id *cm_id;
 
        dapl_dbg_log(DAPL_DBG_TYPE_CM, 
                     " destroy_conn: conn %p id %d\n",
                     conn,conn->cm_id);
-
+       
        dapl_os_lock(&conn->lock);
        conn->destroy = 1;
-       in_callback = conn->in_callback;
+       
+       if (conn->ep)
+               conn->ep->cm_handle = IB_INVALID_HANDLE;
+
+       cm_id = conn->cm_id;
+       conn->cm_id = NULL;
        dapl_os_unlock(&conn->lock);
 
-       if (!in_callback) {
-               if (conn->ep)
-                       conn->ep->cm_handle = IB_INVALID_HANDLE;
-               if (conn->cm_id) {
-                       if (conn->cm_id->qp)
-                               rdma_destroy_qp(conn->cm_id);
-                       rdma_destroy_id(conn->cm_id);
-               }
-                       
-               conn->cm_id = NULL;
-               dapl_os_free(conn, sizeof(*conn));
+       /* 
+        * rdma_destroy_id will force synchronization with async CM event 
+        * thread since it blocks until the in-process event reference
+        * is cleared during our event processing call exit.
+        */
+       if (cm_id) {
+               if (cm_id->qp)
+                       rdma_destroy_qp(cm_id);
+
+               rdma_destroy_id(cm_id);
        }
+       dapl_os_free(conn, sizeof(*conn));
 }
 
 static struct dapl_cm_id * dapli_req_recv(struct dapl_cm_id *conn,
                                          struct rdma_cm_event *event)
 {
        struct dapl_cm_id *new_conn;
+#ifdef DAPL_DBG
+       struct rdma_addr *ipaddr = &event->id->route.addr;
+#endif
        
        if (conn->sp == NULL) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
@@ -239,11 +257,9 @@ static struct dapl_cm_id * dapli_req_recv(struct dapl_cm_id *conn,
        return new_conn;
 }
 
-static int dapli_cm_active_cb(struct dapl_cm_id *conn,
+static void dapli_cm_active_cb(struct dapl_cm_id *conn,
                              struct rdma_cm_event *event)
 {
-       int destroy;
-
        dapl_dbg_log(DAPL_DBG_TYPE_CM, 
                     " active_cb: conn %p id %d event %d\n",
                     conn, conn->cm_id, event->event );
@@ -251,9 +267,8 @@ static int dapli_cm_active_cb(struct dapl_cm_id *conn,
        dapl_os_lock(&conn->lock);
        if (conn->destroy) {
                dapl_os_unlock(&conn->lock);
-               return 0;
+               return;
        }
-       conn->in_callback = 1;
        dapl_os_unlock(&conn->lock);
 
        switch (event->event) {
@@ -298,17 +313,12 @@ static int dapli_cm_active_cb(struct dapl_cm_id *conn,
                break;
        }
 
-       dapl_os_lock(&conn->lock);
-       destroy = conn->destroy;
-       conn->in_callback = conn->destroy;
-       dapl_os_unlock(&conn->lock);
-       return(destroy);
+       return;
 }
 
-static int dapli_cm_passive_cb(struct dapl_cm_id *conn,
+static void dapli_cm_passive_cb(struct dapl_cm_id *conn,
                               struct rdma_cm_event *event)
 {
-       int destroy;
        struct dapl_cm_id *new_conn;
 
        dapl_dbg_log(DAPL_DBG_TYPE_CM, 
@@ -318,9 +328,8 @@ static int dapli_cm_passive_cb(struct dapl_cm_id *conn,
        dapl_os_lock(&conn->lock);
        if (conn->destroy) {
                dapl_os_unlock(&conn->lock);
-               return 0;
+               return;
        }
-       conn->in_callback = 1;
        dapl_os_unlock(&conn->lock);
 
        switch (event->event) {
@@ -372,11 +381,7 @@ static int dapli_cm_passive_cb(struct dapl_cm_id *conn,
                break;
        }
 
-       dapl_os_lock(&conn->lock);
-       destroy = conn->destroy;
-       conn->in_callback = conn->destroy;
-       dapl_os_unlock(&conn->lock);
-       return(destroy);
+       return;
 }
 
 
@@ -1008,16 +1013,12 @@ dapls_ib_get_cm_event(IN DAT_EVENT_NUMBER dat_event_num)
 void dapli_cma_event_cb(void)
 {
        struct rdma_cm_event *event;
-       int ret;
-               
-       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cm_event()\n");
 
-       ret = rdma_get_cm_event(&event);
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cm_event()\n");
 
        /* process one CM event, fairness */
-       if(!ret) {
+       if(!rdma_get_cm_event(&event)) {
                struct dapl_cm_id *conn;
-               int ret;
                                
                /* set proper conn from cm_id context*/
                if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST)
@@ -1055,26 +1056,9 @@ void dapli_cma_event_cb(void)
                case RDMA_CM_EVENT_DISCONNECTED:
                        /* passive or active */
                        if (conn->sp) 
-                               ret = dapli_cm_passive_cb(conn,event);
+                               dapli_cm_passive_cb(conn,event);
                        else 
-                               ret = dapli_cm_active_cb(conn,event);
-
-                       /* destroy both qp and cm_id */
-                       if (ret) {
-                               dapl_dbg_log(DAPL_DBG_TYPE_CM, 
-                                            " cma_cb: DESTROY conn %p" 
-                                            " cm_id %p qp %p\n",
-                                            conn, conn->cm_id, 
-                                            conn->cm_id->qp);
-       
-                               if (conn->cm_id->qp)
-                                       rdma_destroy_qp(conn->cm_id);
-
-                               rdma_ack_cm_event(event);
-                               rdma_destroy_id(conn->cm_id);
-                               dapl_os_free(conn, sizeof(*conn));
-                               return;
-                       }
+                               dapli_cm_active_cb(conn,event);
                        break;
                case RDMA_CM_EVENT_CONNECT_RESPONSE:
                default:
@@ -1084,12 +1068,9 @@ void dapli_cma_event_cb(void)
                             event->id->context);
                        break;
                }
+               /* ack event, unblocks destroy_cm_id in consumer threads */
                rdma_ack_cm_event(event);
-       } else {
-               dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                       " cm_event: ERROR: rdma_get_cm_event() %d %d %s\n",
-                       ret, errno, strerror(errno));
-       }
+       } 
 }
 
 /*