]> git.openfabrics.org - ~ardavis/dapl.git/commitdiff
r3493: Support ib_cm_init_qp_attr(), add cm event processing on a per
authorJames Lentini <jlentini@netapp.com>
Tue, 20 Sep 2005 16:17:59 +0000 (16:17 +0000)
committerJames Lentini <jlentini@netapp.com>
Tue, 20 Sep 2005 16:17:59 +0000 (16:17 +0000)
device basis, and add copyrights for kDAPL cm work that was
used in uDAPL.
Signed-off by: Arlin Davis <ardavis@ichips.intel.com>
Signed-off by: James Lentini <jlentini@netapp.com>

dapl/openib/README
dapl/openib/TODO
dapl/openib/dapl_ib_cm.c
dapl/openib/dapl_ib_cq.c
dapl/openib/dapl_ib_qp.c
dapl/openib/dapl_ib_util.c
dapl/openib/dapl_ib_util.h

index 90d9752615a33cf3668cf23f0d14ae21523d167a..77aa150e6736e07e2720fb48c19d25d6a2f9277e 100644 (file)
@@ -47,5 +47,4 @@ Setup:
        
 Known issues:
        no memory windows support in ibverbs, dat_create_rmr fails.
-       hard coded modify QP RTR to port 1, waiting for ib_cm_init_qp_attr call.
        
index 82f38188db2de3f897cf23beee63db4b7847777f..ef775e38ae099b931258e9c1f74891a480d78798 100644 (file)
@@ -7,8 +7,6 @@ IB Verbs:
 DAPL:
 - reinit EP needs a QP timewait completion notification
 - direct cq_wait_object when multi-CQ verbs event support arrives
-- async event support
-- add support for ib_cm_init_qp_attr 
 - shared receive queue support
 
 Under discussion:
index 4e62d34c446bd2c93481367ecb87bcbfbb1eab32..de468013b914325ecaba4c82dbf26ee2b1aa52c7 100644 (file)
  *
  *    $Id: $
  *
- *     Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
+ * Copyright (c) 2005 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004-2005, Mellanox Technologies, Inc. All rights reserved. 
+ * Copyright (c) 2003 Topspin Corporation.  All rights reserved. 
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
  *
  **************************************************************************/
 
@@ -78,11 +82,11 @@ void dapli_ip_comp_handler(uint64_t req_id, void *context, int rec_num)
        int                     status;
 
        dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " ip_comp_handler: at_rec %p ->id %lld id %lld rec_num %d %x\n",
+                    " ip_comp_handler: rec %p ->id %lld id %lld num %d %x\n",
                     context, at_rec->req_id, req_id, rec_num,
                     ipv4_addr->sin_addr.s_addr);
 
-        if (rec_num <= 0) {
+        if (rec_num <= 0)  {
                struct ib_at_completion at_comp;
 
                 dapl_dbg_log(DAPL_DBG_TYPE_CM,
@@ -91,7 +95,7 @@ void dapli_ip_comp_handler(uint64_t req_id, void *context, int rec_num)
 
                ipv4_addr->sin_addr.s_addr = 0;
 
-               if (++at_rec->retries > IB_MAX_AT_RETRY) 
+               if ((++at_rec->retries > IB_MAX_AT_RETRY) || (rec_num == -EINTR))
                         goto bail;
 
                at_comp.fn = dapli_ip_comp_handler;
@@ -103,9 +107,9 @@ void dapli_ip_comp_handler(uint64_t req_id, void *context, int rec_num)
                if (status < 0) 
                        goto bail;
 
-               dapl_dbg_log (DAPL_DBG_TYPE_UTIL,
-                             " ip_comp_handler: NEW ips_by_gid ret %d at_rec %p -> id %lld\n",
-                             status, at_rec, at_rec->req_id );
+               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                       " ip_comp_handler: ips_by_gid %d rec %p->id %lld\n",
+                       status, at_rec, at_rec->req_id );
         } 
 
        if (ipv4_addr->sin_addr.s_addr)
@@ -114,7 +118,7 @@ void dapli_ip_comp_handler(uint64_t req_id, void *context, int rec_num)
        return;
 bail:
        dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " ip_comp_handler: ERR: at_rec  %p, req_id %lld rec_num %d\n",
+                    " ip_comp_handler: ERR: at_rec %p, id %lld num %d\n",
                     at_rec, req_id, rec_num);
 
        dapl_os_wait_object_wakeup(at_rec->wait_object);
@@ -130,23 +134,35 @@ static void dapli_path_comp_handler(uint64_t req_id, void *context, int rec_num)
                     " path_comp_handler: ctxt %p, req_id %lld rec_num %d\n",
                     context, req_id, rec_num);
 
+       dapl_dbg_log(DAPL_DBG_TYPE_CM, 
+               " path_comp_handler: SRC GID subnet %016llx id %016llx\n",
+               (unsigned long long)cpu_to_be64(conn->dapl_rt.sgid.global.subnet_prefix),
+               (unsigned long long)cpu_to_be64(conn->dapl_rt.sgid.global.interface_id) );
+
+       dapl_dbg_log(DAPL_DBG_TYPE_CM, 
+               " path_comp_handler: DST GID subnet %016llx id %016llx\n",
+               (unsigned long long)cpu_to_be64(conn->dapl_rt.dgid.global.subnet_prefix),
+               (unsigned long long)cpu_to_be64(conn->dapl_rt.dgid.global.interface_id) );
+
        if (rec_num <= 0) {
                dapl_dbg_log(DAPL_DBG_TYPE_CM, 
-                            " path_comp_handler: resolution err %d retry %d\n",
+                            " path_comp_handler: ERR %d retry %d\n",
                             rec_num, conn->retries + 1);
                if (++conn->retries > IB_MAX_AT_RETRY) {
                        dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                                    " path_comp_handler: ep_ptr 0x%p\n",conn->ep);
+                               " path_comp_handler: ERR no PATH (ep=%p)\n",
+                               conn->ep);
                        event = IB_CME_DESTINATION_UNREACHABLE;
                        goto bail;
                }
 
                status = ib_at_paths_by_route(&conn->dapl_rt, 0,
                                              &conn->dapl_path, 1,
-                                             &conn->dapl_comp, &conn->dapl_comp.req_id);
+                                             &conn->dapl_comp, 
+                                             &conn->dapl_comp.req_id);
                if (status) {
-                       dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                                    " path_by_route: err %d id %lld\n",
+                       dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+                                    " path_by_route: retry ERR %d id %lld\n",
                                     status, conn->dapl_comp.req_id);
                        event = IB_CME_LOCAL_FAILURE;
                        goto bail;
@@ -185,20 +201,9 @@ static void dapli_rt_comp_handler(uint64_t req_id, void *context, int rec_num)
                     " rt_comp_handler: conn %p, req_id %lld rec_num %d\n",
                     conn, req_id, rec_num);
 
-       dapl_dbg_log(DAPL_DBG_TYPE_CM, 
-                    " rt_comp_handler: SRC GID subnet %016llx id %016llx\n",
-                    (unsigned long long)cpu_to_be64(conn->dapl_rt.sgid.global.subnet_prefix),
-                    (unsigned long long)cpu_to_be64(conn->dapl_rt.sgid.global.interface_id) );
-
-       dapl_dbg_log(DAPL_DBG_TYPE_CM, 
-                    " rt_comp_handler: DST GID subnet %016llx id %016llx\n",
-                    (unsigned long long)cpu_to_be64(conn->dapl_rt.dgid.global.subnet_prefix),
-                    (unsigned long long)cpu_to_be64(conn->dapl_rt.dgid.global.interface_id) );
-
-
        if (rec_num <= 0) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                            " dapl_rt_comp_handler: rec %d retry %d\n",
+                            " dapl_rt_comp_handler: ERROR rec %d retry %d\n",
                             rec_num, conn->retries+1 ); 
 
                if (++conn->retries > IB_MAX_AT_RETRY) {
@@ -206,10 +211,11 @@ static void dapli_rt_comp_handler(uint64_t req_id, void *context, int rec_num)
                        goto bail;
                }
 
-               status = ib_at_route_by_ip(((struct sockaddr_in *)&conn->r_addr)->sin_addr.s_addr,
-                                          0, 0, IB_AT_ROUTE_FORCE_ATS, 
-                                          &conn->dapl_rt, 
-                                          &conn->dapl_comp,&conn->dapl_comp.req_id);
+               status = ib_at_route_by_ip(
+                       ((struct sockaddr_in *)&conn->r_addr)->sin_addr.s_addr,
+                       0, 0, IB_AT_ROUTE_FORCE_ATS, 
+                       &conn->dapl_rt, 
+                       &conn->dapl_comp,&conn->dapl_comp.req_id);
                if (status < 0) {
                        dapl_dbg_log(DAPL_DBG_TYPE_ERR, "dapl_rt_comp_handler: "
                                    "ib_at_route_by_ip failed with status %d\n",
@@ -223,9 +229,10 @@ static void dapli_rt_comp_handler(uint64_t req_id, void *context, int rec_num)
                return;
        }
 
-       if (!conn->dapl_rt.dgid.global.subnet_prefix || req_id != conn->dapl_comp.req_id) {
+       if (!conn->dapl_rt.dgid.global.subnet_prefix || 
+               req_id != conn->dapl_comp.req_id) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
-                            " dapl_rt_comp_handler: ERROR: unexpected callback req_id=%d(%d)\n",
+                            " dapl_rt_comp_handler: ERROR: cb id=%d(%d)\n",
                             req_id, conn->dapl_comp.req_id ); 
                return;
        }
@@ -234,11 +241,13 @@ static void dapli_rt_comp_handler(uint64_t req_id, void *context, int rec_num)
        conn->dapl_comp.context = conn;
        conn->retries = 0;
        status = ib_at_paths_by_route(&conn->dapl_rt, 0, &conn->dapl_path, 1,
-                                     &conn->dapl_comp, &conn->dapl_comp.req_id);
+                                     &conn->dapl_comp, 
+                                     &conn->dapl_comp.req_id);
        if (status) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                             "dapl_rt_comp_handler: ib_at_paths_by_route "
-                            "returned %d id %lld\n", status, conn->dapl_comp.req_id);
+                            "returned %d id %lld\n", status, 
+                            conn->dapl_comp.req_id);
                event = IB_CME_LOCAL_FAILURE;
                goto bail;
        }
@@ -284,19 +293,16 @@ static void dapli_rep_recv(struct dapl_cm_id  *conn,
        /* move QP state to RTR and RTS */
        /* TODO: could use a ib_cm_init_qp_attr() call here */
        dapl_dbg_log(DAPL_DBG_TYPE_CM, 
-                    " rep_recv: RTR_RTS: cm_id %d r_qp 0x%x r_lid 0x%x r_SID %d\n",
+                   " rep_recv: RTR_RTS: id %d rqp %x rlid %x rSID %d\n",
                     conn->cm_id,event->param.rep_rcvd.remote_qpn,
-                    ntohs(conn->req.primary_path->dlid),conn->service_id );
+                    ntohs(conn->req.primary_path->dlid),conn->service_id);
 
        if ( dapls_modify_qp_state( conn->ep->qp_handle, 
-                                   IBV_QPS_RTR, 
-                                   event->param.rep_rcvd.remote_qpn,
-                                   ntohs(conn->req.primary_path->dlid),
-                                   1 ) != DAT_SUCCESS )
+                                   IBV_QPS_RTR, conn ) != DAT_SUCCESS )
                goto disc;
 
        if ( dapls_modify_qp_state( conn->ep->qp_handle, 
-                                   IBV_QPS_RTS,0,0,0 ) != DAT_SUCCESS)
+                                   IBV_QPS_RTS, conn ) != DAT_SUCCESS)
                goto disc; 
 
 
@@ -356,10 +362,10 @@ static struct dapl_cm_id * dapli_req_recv(struct dapl_cm_id  *conn,
                                sizeof(struct ib_sa_path_rec));
                                
                dapl_dbg_log(DAPL_DBG_TYPE_CM, " passive_cb: "
-                               "REQ on HCA %p SP %p SID %d L_ID %d new_id %d p_data %p\n",
-                               new_conn->hca, new_conn->sp, 
-                               conn->service_id, conn->cm_id, new_conn->cm_id, 
-                               event->private_data );
+                       "REQ on HCA %p SP %p SID %d LID %d new_id %d pd %p\n",
+                       new_conn->hca, new_conn->sp, 
+                       conn->service_id, conn->cm_id, new_conn->cm_id, 
+                       event->private_data );
 
        }
        return new_conn;
@@ -454,7 +460,8 @@ static int dapli_cm_passive_cb(struct dapl_cm_id *conn,
                new_conn = dapli_req_recv(conn,event);
 
                if (new_conn)   
-                       dapls_cr_callback(new_conn, IB_CME_CONNECTION_REQUEST_PENDING, 
+                       dapls_cr_callback(new_conn, 
+                                         IB_CME_CONNECTION_REQUEST_PENDING, 
                                          event->private_data, new_conn->sp);
                break;
        case IB_CM_REP_ERROR:
@@ -468,7 +475,7 @@ static int dapli_cm_passive_cb(struct dapl_cm_id *conn,
        case IB_CM_RTU_RECEIVED:
                /* move QP to RTS state */
                if ( dapls_modify_qp_state(conn->ep->qp_handle, 
-                                          IBV_QPS_RTS,0,0,0 ) != DAT_SUCCESS) {
+                                          IBV_QPS_RTS, conn ) != DAT_SUCCESS) {
                        dapls_cr_callback(conn, IB_CME_LOCAL_FAILURE, 
                                          NULL, conn->sp);
                } else {
@@ -556,7 +563,7 @@ dapls_ib_connect (
        ep_ptr = (DAPL_EP*)ep_handle;
        qp_ptr = ep_ptr->qp_handle;
 
-       dapl_dbg_log(DAPL_DBG_TYPE_CM, " connect: r_SID %d, pdata %p, plen %d\n", 
+       dapl_dbg_log(DAPL_DBG_TYPE_CM, " connect: rSID %d, pdata %p, ln %d\n", 
                     r_qual,p_data,p_size);
                        
        /* Allocate CM and initialize lock */
@@ -573,20 +580,21 @@ dapls_ib_connect (
 
        conn->ep = ep_ptr;
        conn->hca = ep_ptr->header.owner_ia->hca_ptr;
-       status = ib_cm_create_id(conn->hca->ib_hca_handle, &conn->cm_id, conn);
+       status = ib_cm_create_id(conn->hca->ib_hca_handle,
+                                &conn->cm_id, conn);
        if (status < 0)  {
                dat_status = dapl_convert_errno(errno,"create_cm_id");
                dapl_os_free(conn, sizeof(*conn));
                return dat_status;
        }
-       conn->ep->cm_handle = conn;
+       ep_ptr->cm_handle = conn;
 
        /* Setup QP/CM parameters */
        (void)dapl_os_memzero(&conn->req,sizeof(conn->req));
        conn->service_id = r_qual;
        conn->req.qp_num = ep_ptr->qp_handle->qp_num;
        conn->req.qp_type = IBV_QPT_RC;
-       conn->req.starting_psn = 1;
+       conn->req.starting_psn = ep_ptr->qp_handle->qp_num;
        conn->req.private_data = p_data;
        conn->req.private_data_len = p_size;
        conn->req.peer_to_peer = 0;
@@ -607,14 +615,14 @@ dapls_ib_connect (
        
        status = ib_at_route_by_ip(
                ((struct sockaddr_in *)&conn->r_addr)->sin_addr.s_addr, 
-               ((struct sockaddr_in *)&conn->hca->hca_address)->sin_addr.s_addr, 
-               0, IB_AT_ROUTE_FORCE_ATS, &conn->dapl_rt, &conn->dapl_comp, 
-               &conn->dapl_comp.req_id);
+               ((struct sockaddr_in *)&conn->hca->hca_address)->sin_addr.s_addr,
+               0, 0, &conn->dapl_rt, &conn->dapl_comp, &conn->dapl_comp.req_id);
 
-       dapl_dbg_log(DAPL_DBG_TYPE_CM, " connect: at_route ret=%d,%s req_id %d GID %016llx %016llx\n", 
-                    status, strerror(errno), conn->dapl_comp.req_id,
-                    (unsigned long long)cpu_to_be64(conn->dapl_rt.dgid.global.subnet_prefix),
-                    (unsigned long long)cpu_to_be64(conn->dapl_rt.dgid.global.interface_id) );
+       dapl_dbg_log(DAPL_DBG_TYPE_CM, 
+               " connect: at_route requested(ret=%d,id=%d): SRC %x DST %x\n", 
+            status, conn->dapl_comp.req_id,
+            ((struct sockaddr_in *)&conn->hca->hca_address)->sin_addr.s_addr,
+            ((struct sockaddr_in *)&conn->r_addr)->sin_addr.s_addr);
 
        if (status < 0) {
                dat_status = dapl_convert_errno(errno,"ib_at_route_by_ip");
@@ -653,7 +661,7 @@ dapls_ib_disconnect (
        int status;
 
        dapl_dbg_log (DAPL_DBG_TYPE_CM,
-                       " disconnect(ep_handle %p, conn %p, cm_id %d flags %x)\n",
+                       " disconnect(ep %p, conn %p, id %d flags %x)\n",
                        ep_ptr,conn, (conn?conn->cm_id:0),close_flags);
 
        if (conn == IB_INVALID_HANDLE)
@@ -703,7 +711,7 @@ dapls_ib_disconnect_clean (
        dapls_ib_disconnect(ep_ptr, DAT_CLOSE_ABRUPT_FLAG);
          
        if (ep_ptr->qp_handle != IB_INVALID_HANDLE) 
-               dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, 0,0,0);
+               dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, 0 );
 }
 
 /*
@@ -749,8 +757,8 @@ dapls_ib_setup_conn_listener (
                return DAT_INTERNAL_ERROR;
        }
         
-       status = ib_cm_create_id(ia_ptr->hca_ptr->ib_hca_handle, &conn->cm_id,
-                                conn);
+       status = ib_cm_create_id(ia_ptr->hca_ptr->ib_hca_handle,
+                                &conn->cm_id, conn);
        if (status < 0)  {
                dat_status = dapl_convert_errno(errno,"create_cm_id");
                dapl_os_free(conn, sizeof(*conn));
@@ -758,8 +766,8 @@ dapls_ib_setup_conn_listener (
        }
 
        dapl_dbg_log(DAPL_DBG_TYPE_CM,
-                    " setup_listener(ia_ptr %p SID %d sp_ptr %p conn %p cm_id %d)\n",
-                    ia_ptr, ServiceID, sp_ptr, conn, conn->cm_id );
+               " setup_listener(ia_ptr %p SID %d sp %p conn %p id %d)\n",
+               ia_ptr, ServiceID, sp_ptr, conn, conn->cm_id );
 
        sp_ptr->cm_srvc_handle = conn;
        conn->sp = sp_ptr;
@@ -864,7 +872,7 @@ dapls_ib_accept_connection (
        conn = cr_ptr->ib_cm_handle;
 
        dapl_dbg_log (DAPL_DBG_TYPE_CM,
-                     " accept_connection(cr %p conn %p, cm_id %d, p_data %p, p_sz=%d)\n",
+                     " accept(cr %p conn %p, id %d, p_data %p, p_sz=%d)\n",
                      cr_ptr, conn, conn->cm_id, p_data, p_size );
 
        /* Obtain size of private data structure & contents */
@@ -888,13 +896,9 @@ dapls_ib_accept_connection (
                }
        }
 
-       /* move QP to RTR state, TODO fix port setting */
-       /* TODO: could use a ib_cm_init_qp_attr() call here */
+        /* move QP to RTR state */
        dat_status = dapls_modify_qp_state(ep_ptr->qp_handle, 
-                                          IBV_QPS_RTR, 
-                                          conn->req_rcvd.remote_qpn, 
-                                          ntohs(conn->req_rcvd.primary_path->dlid),
-                                          1 ); 
+                                          IBV_QPS_RTR, conn); 
        if (dat_status != DAT_SUCCESS ) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                                     " accept: modify_qp_state failed: %d\n",
@@ -910,6 +914,7 @@ dapls_ib_accept_connection (
        passive_params.private_data = p_data;
        passive_params.private_data_len = p_size;
        passive_params.qp_num = ep_ptr->qp_handle->qp_num;
+       passive_params.starting_psn = ep_ptr->qp_handle->qp_num;
        passive_params.responder_resources = IB_TARGET_MAX;
        passive_params.initiator_depth = IB_INITIATOR_DEPTH;
        passive_params.rnr_retry_count = IB_RNR_RETRY_COUNT;
@@ -1156,8 +1161,8 @@ dapls_ib_get_dat_event (
                }
        }
        dapl_dbg_log (DAPL_DBG_TYPE_CALLBACK,
-               "dapls_ib_get_dat_event: event translate(%s) ib=0x%x dat=0x%x\n",
-               active ? "active" : "passive",  ib_cm_event, dat_event_num);
+               "dapls_ib_get_dat_event: event(%s) ib=0x%x dat=0x%x\n",
+               active ? "active" : "passive", ib_cm_event, dat_event_num);
 
        return dat_event_num;
 }
@@ -1195,14 +1200,15 @@ dapls_ib_get_cm_event (
     return ib_cm_event;
 }
 
-void dapli_cm_event_cb()
+
+void dapli_cm_event_cb(struct _ib_hca_transport *hca)
 {
        struct ib_cm_event *event;
                
        dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " dapli_cm_event()\n");
 
        /* process one CM event, fairness */
-       if(!ib_cm_get_event_timed(0,&event)) {
+       if(!ib_cm_get_event(hca->ib_cm,&event)) {
                struct dapl_cm_id       *conn;
                int                     ret;
                dapl_dbg_log(DAPL_DBG_TYPE_CM,
index 4b0e1f341e73337ce4b021d641268d2318efd338..8fe2c6eddb6f32217a0bffedbd3d58185ae08b2d 100644 (file)
@@ -71,8 +71,10 @@ void dapli_cq_event_cb(struct _ib_hca_transport *hca)
                        (!ibv_get_cq_event(hca->ib_ctx, i, 
                                           &ibv_cq, (void*)&evd_ptr))) {
 
-                       if (DAPL_BAD_HANDLE(evd_ptr, DAPL_MAGIC_EVD))
+                       if (DAPL_BAD_HANDLE(evd_ptr, DAPL_MAGIC_EVD)) {
+                               ibv_ack_cq_events(ibv_cq, 1);
                                continue;
+                       }
 
                        /* process DTO event via callback */
                        dapl_evd_dto_callback ( hca->ib_ctx,
index d3f213940355480288d5c1058175990ca0676051..f39ad45468fc76b6cfe71419f06d24a801a83e31 100644 (file)
@@ -112,7 +112,7 @@ dapls_ib_qp_alloc (
        qp_create.qp_type = IBV_QPT_RC;
        qp_create.qp_context = (void*)ep_ptr;
 
-       ep_ptr->qp_handle = ibv_create_qp( ib_pd_handle, &qp_create);
+       ep_ptr->qp_handle = ibv_create_qp(ib_pd_handle, &qp_create);
        if (!ep_ptr->qp_handle) 
                return(dapl_convert_errno(ENOMEM, "create_qp"));
        
@@ -121,10 +121,10 @@ dapls_ib_qp_alloc (
                        ep_ptr->qp_handle->qp_num,
                        qp_create.cap.max_send_wr,qp_create.cap.max_send_sge,
                        qp_create.cap.max_recv_wr,qp_create.cap.max_recv_sge );
-
+       
        /* Setup QP attributes for INIT state on the way out */ 
        if (dapls_modify_qp_state(ep_ptr->qp_handle,
-                                 IBV_QPS_INIT,0,0,0 ) != DAT_SUCCESS ) {
+                                 IBV_QPS_INIT, 0) != DAT_SUCCESS ) {
                ibv_destroy_qp(ep_ptr->qp_handle);              
                ep_ptr->qp_handle = IB_INVALID_HANDLE;
                return DAT_INTERNAL_ERROR;
@@ -161,7 +161,7 @@ dapls_ib_qp_free (
 
        if (ep_ptr->qp_handle != IB_INVALID_HANDLE) {
                /* force error state to flush queue, then destroy */
-               dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, 0,0,0);
+               dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, 0);
                
                if (ibv_destroy_qp(ep_ptr->qp_handle)) 
                        return(dapl_convert_errno(errno,"destroy_qp"));
@@ -217,7 +217,7 @@ dapls_ib_qp_modify (
            (ep_ptr->qp_handle->state != IBV_QPS_ERR)) {
                ep_ptr->qp_state = IB_QP_STATE_ERROR;
                return (dapls_modify_qp_state(ep_ptr->qp_handle, 
-                                             IBV_QPS_ERR,0,0,0));
+                                             IBV_QPS_ERR, 0));
        }
 
        /*
@@ -272,8 +272,8 @@ dapls_ib_reinit_ep (
        
        if ( ep_ptr->qp_handle != IB_INVALID_HANDLE ) {
                /* move to RESET state and then to INIT */
-               dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_RESET, 0,0,0);
-               dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_INIT, 0,0,0);
+               dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_RESET, 0);
+               dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_INIT, 0);
                ep_ptr->qp_state = IB_QP_STATE_INIT;
        }
 
@@ -283,104 +283,101 @@ dapls_ib_reinit_ep (
 }
 
 /* 
- * Generic QP modify for init, reset, error, RTS, RTR
+ * Generic QP modify for reset, error, INIT, RTS, RTR
  */
 DAT_RETURN
 dapls_modify_qp_state ( IN ib_qp_handle_t      qp_handle,
                        IN ib_qp_state_t        qp_state,
-                       IN uint32_t             qpn,
-                       IN uint16_t             lid,
-                       IN uint8_t              port )
+                       IN struct dapl_cm_id    *conn )
 {
-       struct ibv_qp_attr      qp_attr;
-       enum ibv_qp_attr_mask   mask = IBV_QP_STATE;
-               
-       dapl_os_memzero((void*)&qp_attr, sizeof(qp_attr));
-       qp_attr.qp_state = qp_state;
+       struct ibv_qp_attr      attr;
+       int                     mask = 0;
+                       
+       dapl_dbg_log (DAPL_DBG_TYPE_EP, 
+                     " modify_qp: qp %p, state %d qp_num 0x%x\n",      
+                     qp_handle, qp_state, qp_handle->qp_num);
+
+       dapl_os_memzero((void*)&attr, sizeof(attr));
+       attr.qp_state = qp_state;
        
        switch (qp_state) {
-               /* additional attributes with RTR and RTS */
-               case IBV_QPS_RTR:
-               {
-                       mask |= IBV_QP_AV                 |
-                               IBV_QP_PATH_MTU           |
-                               IBV_QP_DEST_QPN           |
-                               IBV_QP_RQ_PSN             |
-                               IBV_QP_MAX_DEST_RD_ATOMIC |
-                               IBV_QP_MIN_RNR_TIMER;
-                       qp_attr.qp_state                = IBV_QPS_RTR;
-                       qp_attr.path_mtu                = IBV_MTU_1024;
-                       qp_attr.dest_qp_num             = qpn;
-                       qp_attr.rq_psn                  = 1;
-                       qp_attr.max_dest_rd_atomic      = IB_TARGET_MAX;
-                       qp_attr.min_rnr_timer           = 12;
-                       qp_attr.ah_attr.is_global       = 0;
-                       qp_attr.ah_attr.dlid            = lid;
-                       qp_attr.ah_attr.sl              = 0;
-                       qp_attr.ah_attr.src_path_bits   = 0;
-                       qp_attr.ah_attr.port_num        = port;
-
-                       dapl_dbg_log (DAPL_DBG_TYPE_EP,
-                             " modify_qp_RTR: qpn %x lid %x port %x, rq_psn %x\n",
-                             qpn,lid,port,ntohl(qp_attr.rq_psn) );
-                       break;
-
-               }               
-               case IBV_QPS_RTS: 
-               {
-                       mask |= IBV_QP_TIMEOUT            |
-                               IBV_QP_RETRY_CNT          |
-                               IBV_QP_RNR_RETRY          |
-                               IBV_QP_SQ_PSN             |
-                               IBV_QP_MAX_QP_RD_ATOMIC;
-                       qp_attr.qp_state        = IBV_QPS_RTS;
-                       qp_attr.timeout         = 14;
-                       qp_attr.retry_cnt       = 7;
-                       qp_attr.rnr_retry       = 7;
-                       qp_attr.sq_psn          = 1;
-                       qp_attr.max_rd_atomic   = IB_TARGET_MAX;
-                       dapl_dbg_log (DAPL_DBG_TYPE_EP,
-                             " modify_qp_RTS: psn %x or %x\n",
-                             ntohl(qp_attr.sq_psn), qp_attr.max_rd_atomic );
-                       break;
-               }
-               case IBV_QPS_INIT: 
+               case IBV_QPS_INIT:
                {
                        DAPL_IA *ia_ptr;
                        DAPL_EP *ep_ptr; 
+                       
                        /* need to find way back to port num */
                        ep_ptr = (DAPL_EP*)qp_handle->qp_context;
                        if (ep_ptr)
                                ia_ptr = ep_ptr->header.owner_ia;
                        else
-                               break;
+                               return(dapl_convert_errno(EINVAL," qp_CTX"));
 
-                       mask |= IBV_QP_PKEY_INDEX       |
-                               IBV_QP_PORT             |
-                               IBV_QP_ACCESS_FLAGS;
+                       /* 
+                        * Set qp attributes by hand for INIT state. Allows
+                        * consumers to pre-post receives, per uDAPL
+                        * specification, before IB has path record info
+                        * with connect request processing
+                        */ 
+                       mask =  IBV_QP_STATE | IBV_QP_PKEY_INDEX |
+                               IBV_QP_PORT | IBV_QP_ACCESS_FLAGS;
 
-                       qp_attr.pkey_index  = 0;
-                       qp_attr.port_num = ia_ptr->hca_ptr->port_num;
-                       qp_attr.qp_access_flags = 
+                       attr.pkey_index = 0;
+                       attr.port_num = ia_ptr->hca_ptr->port_num;
+                       attr.qp_access_flags = 
                                        IBV_ACCESS_LOCAL_WRITE |
                                        IBV_ACCESS_REMOTE_WRITE |
                                        IBV_ACCESS_REMOTE_READ |
                                        IBV_ACCESS_REMOTE_ATOMIC;
                        
-                       dapl_dbg_log (DAPL_DBG_TYPE_EP,
-                               " modify_qp_INIT: pi %x port %x acc %x\n",
-                               qp_attr.pkey_index, qp_attr.port_num,
-                               qp_attr.qp_access_flags );
+                       ep_ptr->qp_state = IB_QP_STATE_INIT;
                        break;
                }
+               case IBV_QPS_RTR:
+                       if (!conn)
+                               return(dapl_convert_errno(EINVAL," qp_RTR"));
+                       /* 
+                        * Get pkey_index from CM, move from INIT to INIT
+                        * to update index. The initial value was set by hand  
+                        * to allow consumers to pre-post receives.
+                        */
+                       attr.qp_state = IBV_QPS_INIT;
+                         
+                       /* get pkey_index from CM, move from INIT to INIT */ 
+                       if (ib_cm_init_qp_attr(conn->cm_id, &attr, &mask)) 
+                               return(dapl_convert_errno(errno," qp_cINIT"));
+                       
+                       mask = IBV_QP_PKEY_INDEX; 
+                       if (ibv_modify_qp(qp_handle, &attr, mask))
+                               return(dapl_convert_errno(errno," reINIT"));
+
+                       /* get qp attributes from CM, move to RTR */ 
+                       attr.qp_state = IBV_QPS_RTR;
+                       if (ib_cm_init_qp_attr(conn->cm_id, &attr, &mask)) 
+                               return(dapl_convert_errno(errno," qp_cRTR"));
+                       
+                       attr.path_mtu           = IBV_MTU_1024;
+                       attr.rq_psn = qp_handle->qp_num;
+                       break;
+
+               case IBV_QPS_RTS: 
+                       if (!conn)
+                               return(dapl_convert_errno(EINVAL," qp_RTS"));
+
+                       /* get qp attributes from CM, move to RTS */ 
+                       if (ib_cm_init_qp_attr(conn->cm_id, &attr, &mask)) 
+                               return(dapl_convert_errno(errno," qp_cRTS"));
+       
+                       break;
+
                default:
+                       mask = IBV_QP_STATE;
                        break;
-               
        }
-
-       if (ibv_modify_qp(qp_handle, &qp_attr, mask))
-               return(dapl_convert_errno(errno,"modify_qp_state"));
        
+       if (ibv_modify_qp(qp_handle, &attr, mask))
+               return(dapl_convert_errno(errno," modify_qp"));
+
        return DAT_SUCCESS;
 }
 
index 2bcccf3c6de14b39aa4fb17232eba0a79e6bc316..a1cbddf22c010744d5128157a575490cf322c730 100644 (file)
@@ -56,6 +56,7 @@ static const char rcsid[] = "$Id:  $";
 #include <stdlib.h>
 #include <netinet/tcp.h>
 #include <sys/poll.h>
+#include <fcntl.h>
 
 int                    g_dapl_loopback_connection = 0;
 int                    g_ib_destroy = 0;
@@ -85,26 +86,49 @@ int dapli_get_hca_addr( struct dapl_hca *hca_ptr )
        at_rec.retries = 0;
 
        /*  call with async_comp until the sync version works */
-       status = ib_at_ips_by_gid(&hca_ptr->ib_trans.gid, &ipv4_addr->sin_addr.s_addr, 1, 
+       status = ib_at_ips_by_gid(&hca_ptr->ib_trans.gid, 
+                                 &ipv4_addr->sin_addr.s_addr, 1, 
                                  &at_comp, &at_rec.req_id);
        
        if (status < 0) {
                dapl_dbg_log (DAPL_DBG_TYPE_ERR, 
-                             " get_hca_addr: ERR ips_by_gid %d %s \n",
+                             " ips_by_gid: ERR ips_by_gid %d %s \n",
                                status, strerror(errno));
                return 1;
        }
  
        dapl_dbg_log (DAPL_DBG_TYPE_UTIL, 
-                     " get_hca_addr: ips_by_gid ret %d at_rec %p -> id %lld\n",
+                     " ips_by_gid: RET %d at_rec %p -> id %lld\n",
                        status, &at_rec, at_rec.req_id );
 
         if (status > 0) { 
                 dapli_ip_comp_handler(at_rec.req_id, (void*)&at_rec, status);
        } else {
-               dat_status = dapl_os_wait_object_wait(&hca_ptr->ib_trans.wait_object,500000);
-               if (dat_status != DAT_SUCCESS)
-                       ib_at_cancel(at_rec.req_id);
+               /* limit the resolution and cancel times */
+               dat_status = dapl_os_wait_object_wait(
+                                       &hca_ptr->ib_trans.wait_object,
+                                       500000);
+               if (dat_status != DAT_SUCCESS) {
+                       dapl_dbg_log(
+                               DAPL_DBG_TYPE_UTIL, 
+                               " ips_by_gid: REQ TIMEOUT, cancel %lld\n",
+                               at_rec.req_id);
+                       
+                       /* 
+                        * FIX: indeterministic
+                        * AT may or may not provide -EINTR event 
+                        */
+                       ib_at_cancel(at_rec.req_id); 
+                       
+                       if (dapl_os_wait_object_wait(
+                                       &hca_ptr->ib_trans.wait_object,
+                                       500000) != DAT_SUCCESS)
+                       {
+                               dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
+                                       " ips_by_gid: cancel %lld failed\n",
+                                       at_rec.req_id);
+                       }
+               }
        }
 
        if (!ipv4_addr->sin_addr.s_addr) 
@@ -130,6 +154,7 @@ int dapli_get_hca_addr( struct dapl_hca *hca_ptr )
  */
 int32_t dapls_ib_init (void)
 {      
+       long    opts;
        dapl_dbg_log (DAPL_DBG_TYPE_UTIL, " dapl_ib_init: \n" );
 
        /* initialize hca_list lock */
@@ -142,6 +167,11 @@ int32_t dapls_ib_init (void)
        if (pipe(g_ib_pipe))
                return 1;
 
+       /* set AT fd to non-blocking */ 
+       opts = fcntl(ib_at_get_fd(), F_GETFL);
+       if (fcntl(ib_at_get_fd(), F_SETFL, opts | O_NONBLOCK) < 0)
+               return 1;
+
        if (dapli_ib_thread_init()) 
                return 1;
 
@@ -177,6 +207,8 @@ DAT_RETURN dapls_ib_open_hca (
         IN   DAPL_HCA          *hca_ptr)
 {
        struct dlist    *dev_list;
+       long            opts;
+       int             i;
 
        dapl_dbg_log (DAPL_DBG_TYPE_UTIL, 
                      " open_hca: %s - %p\n", hca_name, hca_ptr );
@@ -227,10 +259,10 @@ DAT_RETURN dapls_ib_open_hca (
                     (unsigned long long)bswap_64(hca_ptr->ib_trans.gid.global.subnet_prefix),
                     (unsigned long long)bswap_64(hca_ptr->ib_trans.gid.global.interface_id) );
 
-       /* get the IP address of the device */
+       /* get the IP address of the device using GID */
        if (dapli_get_hca_addr(hca_ptr)) {
                dapl_dbg_log (DAPL_DBG_TYPE_ERR, 
-                             " open_hca: IB get ADDR failed for %s\n", 
+                             " open_hca: ERR ib_at_ips_by_gid for %s\n", 
                              ibv_get_device_name(hca_ptr->ib_trans.ib_dev) );
                goto bail;
        }
@@ -238,6 +270,34 @@ DAT_RETURN dapls_ib_open_hca (
        /* initialize hca wait object for uAT event */
        dapl_os_wait_object_init(&hca_ptr->ib_trans.wait_object);
 
+       /* set event FD's to non-blocking */
+       opts = fcntl(hca_ptr->ib_hca_handle->async_fd, F_GETFL); /* uASYNC */
+       if (opts < 0 || fcntl(hca_ptr->ib_hca_handle->async_fd, 
+                             F_SETFL, opts | O_NONBLOCK) < 0) {
+               dapl_dbg_log (DAPL_DBG_TYPE_ERR, 
+                             " open_hca: ERR with async FD\n" );
+               goto bail;
+       }
+       for (i=0;i<hca_ptr->ib_hca_handle->num_comp;i++) { /* uCQ */
+               opts = fcntl(hca_ptr->ib_hca_handle->cq_fd[i], F_GETFL);
+               if (opts < 0 || fcntl(hca_ptr->ib_hca_handle->async_fd, 
+                                     F_SETFL, opts | O_NONBLOCK) < 0) {
+                       dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
+                                    " open_hca: ERR with CQ FD\n");
+                       goto bail;
+               }
+       }       
+       
+       /* Get CM device handle for events, and set to non-blocking */  
+       hca_ptr->ib_trans.ib_cm = ib_cm_get_device(hca_ptr->ib_hca_handle);
+       opts = fcntl(hca_ptr->ib_trans.ib_cm->fd, F_GETFL); /* uCM */
+       if (opts < 0 || fcntl(hca_ptr->ib_trans.ib_cm->fd, 
+                             F_SETFL, opts | O_NONBLOCK) < 0) {
+               dapl_dbg_log (DAPL_DBG_TYPE_ERR, 
+                             " open_hca: ERR with CM FD\n" );
+               goto bail;
+       }
+       
        /* 
         * Put new hca_transport on list for async and CQ event processing 
         * Wakeup work thread to add to polling list
@@ -509,20 +569,34 @@ int dapli_ib_thread_init(void)
 
 void dapli_ib_thread_destroy(void)
 {
+       int retries = 10;
        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
                     " ib_thread_destroy(%d)\n", getpid());
 
-       /* destroy ib_thread, wait for termination */
+       /* 
+        * wait for async thread to terminate. 
+        * pthread_join would be the correct method
+        * but some applications have some issues
+        */
+        
+       /* destroy ib_thread, wait for termination, if not already */
+       dapl_os_lock( &g_hca_lock );
        g_ib_destroy = 1;
        write(g_ib_pipe[1], "w", sizeof "w");
-       while (g_ib_destroy != 2) {
+               
+       while ((g_ib_destroy != 2) && (retries--)) {
                struct timespec sleep, remain;
                sleep.tv_sec = 0;
-               sleep.tv_nsec = 10000000; /* 10 ms */
+               sleep.tv_nsec = 20000000; /* 20 ms */
                dapl_dbg_log(DAPL_DBG_TYPE_UTIL, 
-                            " ib_thread_destroy: waiting for ib_thread\n");
+                       " ib_thread_destroy: waiting for ib_thread\n");
+               write(g_ib_pipe[1], "w", sizeof "w");
+               dapl_os_unlock( &g_hca_lock );
                nanosleep(&sleep, &remain);
+               dapl_os_lock( &g_hca_lock );
        }
+       dapl_os_unlock( &g_hca_lock );
+       
        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
                     " ib_thread_destroy(%d) exit\n",getpid());
 }
@@ -639,30 +713,26 @@ void dapli_thread(void *arg)
        struct _ib_hca_transport *hca;
        int                      ret,idx,fds;
        char                     rbuf[2];
-
+       
        dapl_dbg_log (DAPL_DBG_TYPE_UTIL,
-                     " ib_thread(%d,0x%x): ENTER: pipe %d cm %d at %d\n",
+                     " ib_thread(%d,0x%x): ENTER: pipe %d at %d\n",
                      getpid(), g_ib_thread, 
-                     g_ib_pipe[0], ib_cm_get_fd(), 
-                     ib_at_get_fd());
+                     g_ib_pipe[0], ib_at_get_fd());
 
        /* Poll across pipe, CM, AT never changes */
        dapl_os_lock( &g_hca_lock );
        
        ufds[0].fd = g_ib_pipe[0];      /* pipe */
        ufds[0].events = POLLIN;
-       ufds[1].fd = ib_cm_get_fd();    /* uCM */
+       ufds[1].fd = ib_at_get_fd();    /* uAT */
        ufds[1].events = POLLIN;
-       ufds[2].fd = ib_at_get_fd();    /* uAT */
-       ufds[2].events = POLLIN;
-               
+       
        while (!g_ib_destroy) {
                
-               /* build ufds after pipe, cm, at events */
+               /* build ufds after pipe, at events */
                ufds[0].revents = 0;
                ufds[1].revents = 0;
-               ufds[2].revents = 0;
-               idx=2;
+               idx=1;
 
                /*  Walk HCA list and setup async and CQ events */
                if (!dapl_llist_is_empty(&g_hca_list))
@@ -672,6 +742,10 @@ void dapli_thread(void *arg)
                
                while(hca) {
                        int i;
+                       ufds[++idx].fd = hca->ib_cm->fd; /* uCM */
+                       ufds[idx].events = POLLIN;
+                       ufds[idx].revents = 0;
+                       uhca[idx] = hca;
                        ufds[++idx].fd = hca->ib_ctx->async_fd; /* uASYNC */
                        ufds[idx].events = POLLIN;
                        ufds[idx].revents = 0;
@@ -699,10 +773,11 @@ void dapli_thread(void *arg)
                        continue;
                }
 
-               /* check and process CQ and ASYNC events, each open device */
-               for(idx=3;idx<fds;idx++) {
+               /* check and process CM, CQ and ASYNC events, per device */
+               for(idx=2;idx<fds;idx++) {
                        if (ufds[idx].revents == POLLIN) {
                                dapli_cq_event_cb(uhca[idx]);
+                               dapli_cm_event_cb(uhca[idx]);
                                dapli_async_event_cb(uhca[idx]);
                        }
                }
@@ -726,11 +801,8 @@ void dapli_thread(void *arg)
                        }
                }
 
-               /* CM and AT events */
+               /* AT events */
                if (ufds[1].revents == POLLIN)
-                       dapli_cm_event_cb();
-
-               if (ufds[2].revents == POLLIN)
                        dapli_at_event_cb();
 
                dapl_os_lock(&g_hca_lock);
@@ -738,5 +810,6 @@ void dapli_thread(void *arg)
        dapl_dbg_log(DAPL_DBG_TYPE_UTIL," ib_thread(%d) EXIT\n",getpid());
        g_ib_destroy = 2;
        dapl_os_unlock(&g_hca_lock);    
+       pthread_exit(NULL);
 }
 
index 867a2751ceb6d454aa4db01dd4e020ee65ef74a9..9c7833cb4b921f0ba2577bfd270b9be7ed0d3619 100644 (file)
@@ -235,6 +235,7 @@ typedef struct _ib_hca_transport
        int                     destroy;
        struct ibv_device       *ib_dev;
        struct ibv_context      *ib_ctx;
+       struct ib_cm_device     *ib_cm;
        ib_cq_handle_t          ib_cq_empty;
        DAPL_OS_WAIT_OBJECT     wait_object;
        int                     max_inline_send;
@@ -261,7 +262,7 @@ int  dapli_ib_thread_init(void);
 void dapli_ib_thread_destroy(void);
 int  dapli_get_hca_addr(struct dapl_hca *hca_ptr);
 void dapli_ip_comp_handler(uint64_t req_id, void *context, int rec_num);
-void dapli_cm_event_cb(void);
+void dapli_cm_event_cb(struct _ib_hca_transport *hca);
 void dapli_at_event_cb(void);
 void dapli_cq_event_cb(struct _ib_hca_transport *hca);
 void dapli_async_event_cb(struct _ib_hca_transport *hca);
@@ -269,9 +270,7 @@ void dapli_async_event_cb(struct _ib_hca_transport *hca);
 DAT_RETURN
 dapls_modify_qp_state ( IN ib_qp_handle_t      qp_handle,
                        IN ib_qp_state_t        qp_state,
-                       IN uint32_t             qpn,
-                       IN uint16_t             lid,
-                       IN uint8_t              port );
+                       IN struct dapl_cm_id    *conn );
 
 /* inline functions */
 STATIC _INLINE_ IB_HCA_NAME dapl_ib_convert_name (IN char *name)