From: James Lentini Date: Tue, 20 Sep 2005 16:17:59 +0000 (+0000) Subject: r3493: Support ib_cm_init_qp_attr(), add cm event processing on a per X-Git-Tag: libdapl-1.2.1~81 X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=4509fb64fdbf99db7bdcaad4d8e3884718184d86;p=~ardavis%2Fdapl.git r3493: Support ib_cm_init_qp_attr(), add cm event processing on a per device basis, and add copyrights for kDAPL cm work that was used in uDAPL. Signed-off by: Arlin Davis Signed-off by: James Lentini --- diff --git a/dapl/openib/README b/dapl/openib/README index 90d9752..77aa150 100644 --- a/dapl/openib/README +++ b/dapl/openib/README @@ -47,5 +47,4 @@ Setup: Known issues: no memory windows support in ibverbs, dat_create_rmr fails. - hard coded modify QP RTR to port 1, waiting for ib_cm_init_qp_attr call. diff --git a/dapl/openib/TODO b/dapl/openib/TODO index 82f3818..ef775e3 100644 --- a/dapl/openib/TODO +++ b/dapl/openib/TODO @@ -7,8 +7,6 @@ IB Verbs: DAPL: - reinit EP needs a QP timewait completion notification - direct cq_wait_object when multi-CQ verbs event support arrives -- async event support -- add support for ib_cm_init_qp_attr - shared receive queue support Under discussion: diff --git a/dapl/openib/dapl_ib_cm.c b/dapl/openib/dapl_ib_cm.c index 4e62d34..de46801 100644 --- a/dapl/openib/dapl_ib_cm.c +++ b/dapl/openib/dapl_ib_cm.c @@ -42,7 +42,11 @@ * * $Id: $ * - * Copyright (c) 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2005 Voltaire Inc. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2004-2005, Mellanox Technologies, Inc. All rights reserved. + * Copyright (c) 2003 Topspin Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * **************************************************************************/ @@ -78,11 +82,11 @@ void dapli_ip_comp_handler(uint64_t req_id, void *context, int rec_num) int status; dapl_dbg_log(DAPL_DBG_TYPE_CM, - " ip_comp_handler: at_rec %p ->id %lld id %lld rec_num %d %x\n", + " ip_comp_handler: rec %p ->id %lld id %lld num %d %x\n", context, at_rec->req_id, req_id, rec_num, ipv4_addr->sin_addr.s_addr); - if (rec_num <= 0) { + if (rec_num <= 0) { struct ib_at_completion at_comp; dapl_dbg_log(DAPL_DBG_TYPE_CM, @@ -91,7 +95,7 @@ void dapli_ip_comp_handler(uint64_t req_id, void *context, int rec_num) ipv4_addr->sin_addr.s_addr = 0; - if (++at_rec->retries > IB_MAX_AT_RETRY) + if ((++at_rec->retries > IB_MAX_AT_RETRY) || (rec_num == -EINTR)) goto bail; at_comp.fn = dapli_ip_comp_handler; @@ -103,9 +107,9 @@ void dapli_ip_comp_handler(uint64_t req_id, void *context, int rec_num) if (status < 0) goto bail; - dapl_dbg_log (DAPL_DBG_TYPE_UTIL, - " ip_comp_handler: NEW ips_by_gid ret %d at_rec %p -> id %lld\n", - status, at_rec, at_rec->req_id ); + dapl_dbg_log(DAPL_DBG_TYPE_UTIL, + " ip_comp_handler: ips_by_gid %d rec %p->id %lld\n", + status, at_rec, at_rec->req_id ); } if (ipv4_addr->sin_addr.s_addr) @@ -114,7 +118,7 @@ void dapli_ip_comp_handler(uint64_t req_id, void *context, int rec_num) return; bail: dapl_dbg_log(DAPL_DBG_TYPE_CM, - " ip_comp_handler: ERR: at_rec %p, req_id %lld rec_num %d\n", + " ip_comp_handler: ERR: at_rec %p, id %lld num %d\n", at_rec, req_id, rec_num); dapl_os_wait_object_wakeup(at_rec->wait_object); @@ -130,23 +134,35 @@ static void dapli_path_comp_handler(uint64_t req_id, void *context, int rec_num) " path_comp_handler: ctxt %p, req_id %lld rec_num %d\n", context, req_id, rec_num); + dapl_dbg_log(DAPL_DBG_TYPE_CM, + " path_comp_handler: SRC GID subnet %016llx id %016llx\n", + (unsigned long long)cpu_to_be64(conn->dapl_rt.sgid.global.subnet_prefix), + (unsigned long long)cpu_to_be64(conn->dapl_rt.sgid.global.interface_id) ); + + dapl_dbg_log(DAPL_DBG_TYPE_CM, + " path_comp_handler: DST GID subnet %016llx id %016llx\n", + (unsigned long long)cpu_to_be64(conn->dapl_rt.dgid.global.subnet_prefix), + (unsigned long long)cpu_to_be64(conn->dapl_rt.dgid.global.interface_id) ); + if (rec_num <= 0) { dapl_dbg_log(DAPL_DBG_TYPE_CM, - " path_comp_handler: resolution err %d retry %d\n", + " path_comp_handler: ERR %d retry %d\n", rec_num, conn->retries + 1); if (++conn->retries > IB_MAX_AT_RETRY) { dapl_dbg_log(DAPL_DBG_TYPE_CM, - " path_comp_handler: ep_ptr 0x%p\n",conn->ep); + " path_comp_handler: ERR no PATH (ep=%p)\n", + conn->ep); event = IB_CME_DESTINATION_UNREACHABLE; goto bail; } status = ib_at_paths_by_route(&conn->dapl_rt, 0, &conn->dapl_path, 1, - &conn->dapl_comp, &conn->dapl_comp.req_id); + &conn->dapl_comp, + &conn->dapl_comp.req_id); if (status) { - dapl_dbg_log(DAPL_DBG_TYPE_CM, - " path_by_route: err %d id %lld\n", + dapl_dbg_log(DAPL_DBG_TYPE_ERR, + " path_by_route: retry ERR %d id %lld\n", status, conn->dapl_comp.req_id); event = IB_CME_LOCAL_FAILURE; goto bail; @@ -185,20 +201,9 @@ static void dapli_rt_comp_handler(uint64_t req_id, void *context, int rec_num) " rt_comp_handler: conn %p, req_id %lld rec_num %d\n", conn, req_id, rec_num); - dapl_dbg_log(DAPL_DBG_TYPE_CM, - " rt_comp_handler: SRC GID subnet %016llx id %016llx\n", - (unsigned long long)cpu_to_be64(conn->dapl_rt.sgid.global.subnet_prefix), - (unsigned long long)cpu_to_be64(conn->dapl_rt.sgid.global.interface_id) ); - - dapl_dbg_log(DAPL_DBG_TYPE_CM, - " rt_comp_handler: DST GID subnet %016llx id %016llx\n", - (unsigned long long)cpu_to_be64(conn->dapl_rt.dgid.global.subnet_prefix), - (unsigned long long)cpu_to_be64(conn->dapl_rt.dgid.global.interface_id) ); - - if (rec_num <= 0) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, - " dapl_rt_comp_handler: rec %d retry %d\n", + " dapl_rt_comp_handler: ERROR rec %d retry %d\n", rec_num, conn->retries+1 ); if (++conn->retries > IB_MAX_AT_RETRY) { @@ -206,10 +211,11 @@ static void dapli_rt_comp_handler(uint64_t req_id, void *context, int rec_num) goto bail; } - status = ib_at_route_by_ip(((struct sockaddr_in *)&conn->r_addr)->sin_addr.s_addr, - 0, 0, IB_AT_ROUTE_FORCE_ATS, - &conn->dapl_rt, - &conn->dapl_comp,&conn->dapl_comp.req_id); + status = ib_at_route_by_ip( + ((struct sockaddr_in *)&conn->r_addr)->sin_addr.s_addr, + 0, 0, IB_AT_ROUTE_FORCE_ATS, + &conn->dapl_rt, + &conn->dapl_comp,&conn->dapl_comp.req_id); if (status < 0) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "dapl_rt_comp_handler: " "ib_at_route_by_ip failed with status %d\n", @@ -223,9 +229,10 @@ static void dapli_rt_comp_handler(uint64_t req_id, void *context, int rec_num) return; } - if (!conn->dapl_rt.dgid.global.subnet_prefix || req_id != conn->dapl_comp.req_id) { + if (!conn->dapl_rt.dgid.global.subnet_prefix || + req_id != conn->dapl_comp.req_id) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, - " dapl_rt_comp_handler: ERROR: unexpected callback req_id=%d(%d)\n", + " dapl_rt_comp_handler: ERROR: cb id=%d(%d)\n", req_id, conn->dapl_comp.req_id ); return; } @@ -234,11 +241,13 @@ static void dapli_rt_comp_handler(uint64_t req_id, void *context, int rec_num) conn->dapl_comp.context = conn; conn->retries = 0; status = ib_at_paths_by_route(&conn->dapl_rt, 0, &conn->dapl_path, 1, - &conn->dapl_comp, &conn->dapl_comp.req_id); + &conn->dapl_comp, + &conn->dapl_comp.req_id); if (status) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "dapl_rt_comp_handler: ib_at_paths_by_route " - "returned %d id %lld\n", status, conn->dapl_comp.req_id); + "returned %d id %lld\n", status, + conn->dapl_comp.req_id); event = IB_CME_LOCAL_FAILURE; goto bail; } @@ -284,19 +293,16 @@ static void dapli_rep_recv(struct dapl_cm_id *conn, /* move QP state to RTR and RTS */ /* TODO: could use a ib_cm_init_qp_attr() call here */ dapl_dbg_log(DAPL_DBG_TYPE_CM, - " rep_recv: RTR_RTS: cm_id %d r_qp 0x%x r_lid 0x%x r_SID %d\n", + " rep_recv: RTR_RTS: id %d rqp %x rlid %x rSID %d\n", conn->cm_id,event->param.rep_rcvd.remote_qpn, - ntohs(conn->req.primary_path->dlid),conn->service_id ); + ntohs(conn->req.primary_path->dlid),conn->service_id); if ( dapls_modify_qp_state( conn->ep->qp_handle, - IBV_QPS_RTR, - event->param.rep_rcvd.remote_qpn, - ntohs(conn->req.primary_path->dlid), - 1 ) != DAT_SUCCESS ) + IBV_QPS_RTR, conn ) != DAT_SUCCESS ) goto disc; if ( dapls_modify_qp_state( conn->ep->qp_handle, - IBV_QPS_RTS,0,0,0 ) != DAT_SUCCESS) + IBV_QPS_RTS, conn ) != DAT_SUCCESS) goto disc; @@ -356,10 +362,10 @@ static struct dapl_cm_id * dapli_req_recv(struct dapl_cm_id *conn, sizeof(struct ib_sa_path_rec)); dapl_dbg_log(DAPL_DBG_TYPE_CM, " passive_cb: " - "REQ on HCA %p SP %p SID %d L_ID %d new_id %d p_data %p\n", - new_conn->hca, new_conn->sp, - conn->service_id, conn->cm_id, new_conn->cm_id, - event->private_data ); + "REQ on HCA %p SP %p SID %d LID %d new_id %d pd %p\n", + new_conn->hca, new_conn->sp, + conn->service_id, conn->cm_id, new_conn->cm_id, + event->private_data ); } return new_conn; @@ -454,7 +460,8 @@ static int dapli_cm_passive_cb(struct dapl_cm_id *conn, new_conn = dapli_req_recv(conn,event); if (new_conn) - dapls_cr_callback(new_conn, IB_CME_CONNECTION_REQUEST_PENDING, + dapls_cr_callback(new_conn, + IB_CME_CONNECTION_REQUEST_PENDING, event->private_data, new_conn->sp); break; case IB_CM_REP_ERROR: @@ -468,7 +475,7 @@ static int dapli_cm_passive_cb(struct dapl_cm_id *conn, case IB_CM_RTU_RECEIVED: /* move QP to RTS state */ if ( dapls_modify_qp_state(conn->ep->qp_handle, - IBV_QPS_RTS,0,0,0 ) != DAT_SUCCESS) { + IBV_QPS_RTS, conn ) != DAT_SUCCESS) { dapls_cr_callback(conn, IB_CME_LOCAL_FAILURE, NULL, conn->sp); } else { @@ -556,7 +563,7 @@ dapls_ib_connect ( ep_ptr = (DAPL_EP*)ep_handle; qp_ptr = ep_ptr->qp_handle; - dapl_dbg_log(DAPL_DBG_TYPE_CM, " connect: r_SID %d, pdata %p, plen %d\n", + dapl_dbg_log(DAPL_DBG_TYPE_CM, " connect: rSID %d, pdata %p, ln %d\n", r_qual,p_data,p_size); /* Allocate CM and initialize lock */ @@ -573,20 +580,21 @@ dapls_ib_connect ( conn->ep = ep_ptr; conn->hca = ep_ptr->header.owner_ia->hca_ptr; - status = ib_cm_create_id(conn->hca->ib_hca_handle, &conn->cm_id, conn); + status = ib_cm_create_id(conn->hca->ib_hca_handle, + &conn->cm_id, conn); if (status < 0) { dat_status = dapl_convert_errno(errno,"create_cm_id"); dapl_os_free(conn, sizeof(*conn)); return dat_status; } - conn->ep->cm_handle = conn; + ep_ptr->cm_handle = conn; /* Setup QP/CM parameters */ (void)dapl_os_memzero(&conn->req,sizeof(conn->req)); conn->service_id = r_qual; conn->req.qp_num = ep_ptr->qp_handle->qp_num; conn->req.qp_type = IBV_QPT_RC; - conn->req.starting_psn = 1; + conn->req.starting_psn = ep_ptr->qp_handle->qp_num; conn->req.private_data = p_data; conn->req.private_data_len = p_size; conn->req.peer_to_peer = 0; @@ -607,14 +615,14 @@ dapls_ib_connect ( status = ib_at_route_by_ip( ((struct sockaddr_in *)&conn->r_addr)->sin_addr.s_addr, - ((struct sockaddr_in *)&conn->hca->hca_address)->sin_addr.s_addr, - 0, IB_AT_ROUTE_FORCE_ATS, &conn->dapl_rt, &conn->dapl_comp, - &conn->dapl_comp.req_id); + ((struct sockaddr_in *)&conn->hca->hca_address)->sin_addr.s_addr, + 0, 0, &conn->dapl_rt, &conn->dapl_comp, &conn->dapl_comp.req_id); - dapl_dbg_log(DAPL_DBG_TYPE_CM, " connect: at_route ret=%d,%s req_id %d GID %016llx %016llx\n", - status, strerror(errno), conn->dapl_comp.req_id, - (unsigned long long)cpu_to_be64(conn->dapl_rt.dgid.global.subnet_prefix), - (unsigned long long)cpu_to_be64(conn->dapl_rt.dgid.global.interface_id) ); + dapl_dbg_log(DAPL_DBG_TYPE_CM, + " connect: at_route requested(ret=%d,id=%d): SRC %x DST %x\n", + status, conn->dapl_comp.req_id, + ((struct sockaddr_in *)&conn->hca->hca_address)->sin_addr.s_addr, + ((struct sockaddr_in *)&conn->r_addr)->sin_addr.s_addr); if (status < 0) { dat_status = dapl_convert_errno(errno,"ib_at_route_by_ip"); @@ -653,7 +661,7 @@ dapls_ib_disconnect ( int status; dapl_dbg_log (DAPL_DBG_TYPE_CM, - " disconnect(ep_handle %p, conn %p, cm_id %d flags %x)\n", + " disconnect(ep %p, conn %p, id %d flags %x)\n", ep_ptr,conn, (conn?conn->cm_id:0),close_flags); if (conn == IB_INVALID_HANDLE) @@ -703,7 +711,7 @@ dapls_ib_disconnect_clean ( dapls_ib_disconnect(ep_ptr, DAT_CLOSE_ABRUPT_FLAG); if (ep_ptr->qp_handle != IB_INVALID_HANDLE) - dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, 0,0,0); + dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, 0 ); } /* @@ -749,8 +757,8 @@ dapls_ib_setup_conn_listener ( return DAT_INTERNAL_ERROR; } - status = ib_cm_create_id(ia_ptr->hca_ptr->ib_hca_handle, &conn->cm_id, - conn); + status = ib_cm_create_id(ia_ptr->hca_ptr->ib_hca_handle, + &conn->cm_id, conn); if (status < 0) { dat_status = dapl_convert_errno(errno,"create_cm_id"); dapl_os_free(conn, sizeof(*conn)); @@ -758,8 +766,8 @@ dapls_ib_setup_conn_listener ( } dapl_dbg_log(DAPL_DBG_TYPE_CM, - " setup_listener(ia_ptr %p SID %d sp_ptr %p conn %p cm_id %d)\n", - ia_ptr, ServiceID, sp_ptr, conn, conn->cm_id ); + " setup_listener(ia_ptr %p SID %d sp %p conn %p id %d)\n", + ia_ptr, ServiceID, sp_ptr, conn, conn->cm_id ); sp_ptr->cm_srvc_handle = conn; conn->sp = sp_ptr; @@ -864,7 +872,7 @@ dapls_ib_accept_connection ( conn = cr_ptr->ib_cm_handle; dapl_dbg_log (DAPL_DBG_TYPE_CM, - " accept_connection(cr %p conn %p, cm_id %d, p_data %p, p_sz=%d)\n", + " accept(cr %p conn %p, id %d, p_data %p, p_sz=%d)\n", cr_ptr, conn, conn->cm_id, p_data, p_size ); /* Obtain size of private data structure & contents */ @@ -888,13 +896,9 @@ dapls_ib_accept_connection ( } } - /* move QP to RTR state, TODO fix port setting */ - /* TODO: could use a ib_cm_init_qp_attr() call here */ + /* move QP to RTR state */ dat_status = dapls_modify_qp_state(ep_ptr->qp_handle, - IBV_QPS_RTR, - conn->req_rcvd.remote_qpn, - ntohs(conn->req_rcvd.primary_path->dlid), - 1 ); + IBV_QPS_RTR, conn); if (dat_status != DAT_SUCCESS ) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, " accept: modify_qp_state failed: %d\n", @@ -910,6 +914,7 @@ dapls_ib_accept_connection ( passive_params.private_data = p_data; passive_params.private_data_len = p_size; passive_params.qp_num = ep_ptr->qp_handle->qp_num; + passive_params.starting_psn = ep_ptr->qp_handle->qp_num; passive_params.responder_resources = IB_TARGET_MAX; passive_params.initiator_depth = IB_INITIATOR_DEPTH; passive_params.rnr_retry_count = IB_RNR_RETRY_COUNT; @@ -1156,8 +1161,8 @@ dapls_ib_get_dat_event ( } } dapl_dbg_log (DAPL_DBG_TYPE_CALLBACK, - "dapls_ib_get_dat_event: event translate(%s) ib=0x%x dat=0x%x\n", - active ? "active" : "passive", ib_cm_event, dat_event_num); + "dapls_ib_get_dat_event: event(%s) ib=0x%x dat=0x%x\n", + active ? "active" : "passive", ib_cm_event, dat_event_num); return dat_event_num; } @@ -1195,14 +1200,15 @@ dapls_ib_get_cm_event ( return ib_cm_event; } -void dapli_cm_event_cb() + +void dapli_cm_event_cb(struct _ib_hca_transport *hca) { struct ib_cm_event *event; dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " dapli_cm_event()\n"); /* process one CM event, fairness */ - if(!ib_cm_get_event_timed(0,&event)) { + if(!ib_cm_get_event(hca->ib_cm,&event)) { struct dapl_cm_id *conn; int ret; dapl_dbg_log(DAPL_DBG_TYPE_CM, diff --git a/dapl/openib/dapl_ib_cq.c b/dapl/openib/dapl_ib_cq.c index 4b0e1f3..8fe2c6e 100644 --- a/dapl/openib/dapl_ib_cq.c +++ b/dapl/openib/dapl_ib_cq.c @@ -71,8 +71,10 @@ void dapli_cq_event_cb(struct _ib_hca_transport *hca) (!ibv_get_cq_event(hca->ib_ctx, i, &ibv_cq, (void*)&evd_ptr))) { - if (DAPL_BAD_HANDLE(evd_ptr, DAPL_MAGIC_EVD)) + if (DAPL_BAD_HANDLE(evd_ptr, DAPL_MAGIC_EVD)) { + ibv_ack_cq_events(ibv_cq, 1); continue; + } /* process DTO event via callback */ dapl_evd_dto_callback ( hca->ib_ctx, diff --git a/dapl/openib/dapl_ib_qp.c b/dapl/openib/dapl_ib_qp.c index d3f2139..f39ad45 100644 --- a/dapl/openib/dapl_ib_qp.c +++ b/dapl/openib/dapl_ib_qp.c @@ -112,7 +112,7 @@ dapls_ib_qp_alloc ( qp_create.qp_type = IBV_QPT_RC; qp_create.qp_context = (void*)ep_ptr; - ep_ptr->qp_handle = ibv_create_qp( ib_pd_handle, &qp_create); + ep_ptr->qp_handle = ibv_create_qp(ib_pd_handle, &qp_create); if (!ep_ptr->qp_handle) return(dapl_convert_errno(ENOMEM, "create_qp")); @@ -121,10 +121,10 @@ dapls_ib_qp_alloc ( ep_ptr->qp_handle->qp_num, qp_create.cap.max_send_wr,qp_create.cap.max_send_sge, qp_create.cap.max_recv_wr,qp_create.cap.max_recv_sge ); - + /* Setup QP attributes for INIT state on the way out */ if (dapls_modify_qp_state(ep_ptr->qp_handle, - IBV_QPS_INIT,0,0,0 ) != DAT_SUCCESS ) { + IBV_QPS_INIT, 0) != DAT_SUCCESS ) { ibv_destroy_qp(ep_ptr->qp_handle); ep_ptr->qp_handle = IB_INVALID_HANDLE; return DAT_INTERNAL_ERROR; @@ -161,7 +161,7 @@ dapls_ib_qp_free ( if (ep_ptr->qp_handle != IB_INVALID_HANDLE) { /* force error state to flush queue, then destroy */ - dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, 0,0,0); + dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, 0); if (ibv_destroy_qp(ep_ptr->qp_handle)) return(dapl_convert_errno(errno,"destroy_qp")); @@ -217,7 +217,7 @@ dapls_ib_qp_modify ( (ep_ptr->qp_handle->state != IBV_QPS_ERR)) { ep_ptr->qp_state = IB_QP_STATE_ERROR; return (dapls_modify_qp_state(ep_ptr->qp_handle, - IBV_QPS_ERR,0,0,0)); + IBV_QPS_ERR, 0)); } /* @@ -272,8 +272,8 @@ dapls_ib_reinit_ep ( if ( ep_ptr->qp_handle != IB_INVALID_HANDLE ) { /* move to RESET state and then to INIT */ - dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_RESET, 0,0,0); - dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_INIT, 0,0,0); + dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_RESET, 0); + dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_INIT, 0); ep_ptr->qp_state = IB_QP_STATE_INIT; } @@ -283,104 +283,101 @@ dapls_ib_reinit_ep ( } /* - * Generic QP modify for init, reset, error, RTS, RTR + * Generic QP modify for reset, error, INIT, RTS, RTR */ DAT_RETURN dapls_modify_qp_state ( IN ib_qp_handle_t qp_handle, IN ib_qp_state_t qp_state, - IN uint32_t qpn, - IN uint16_t lid, - IN uint8_t port ) + IN struct dapl_cm_id *conn ) { - struct ibv_qp_attr qp_attr; - enum ibv_qp_attr_mask mask = IBV_QP_STATE; - - dapl_os_memzero((void*)&qp_attr, sizeof(qp_attr)); - qp_attr.qp_state = qp_state; + struct ibv_qp_attr attr; + int mask = 0; + + dapl_dbg_log (DAPL_DBG_TYPE_EP, + " modify_qp: qp %p, state %d qp_num 0x%x\n", + qp_handle, qp_state, qp_handle->qp_num); + + dapl_os_memzero((void*)&attr, sizeof(attr)); + attr.qp_state = qp_state; switch (qp_state) { - /* additional attributes with RTR and RTS */ - case IBV_QPS_RTR: - { - mask |= IBV_QP_AV | - IBV_QP_PATH_MTU | - IBV_QP_DEST_QPN | - IBV_QP_RQ_PSN | - IBV_QP_MAX_DEST_RD_ATOMIC | - IBV_QP_MIN_RNR_TIMER; - qp_attr.qp_state = IBV_QPS_RTR; - qp_attr.path_mtu = IBV_MTU_1024; - qp_attr.dest_qp_num = qpn; - qp_attr.rq_psn = 1; - qp_attr.max_dest_rd_atomic = IB_TARGET_MAX; - qp_attr.min_rnr_timer = 12; - qp_attr.ah_attr.is_global = 0; - qp_attr.ah_attr.dlid = lid; - qp_attr.ah_attr.sl = 0; - qp_attr.ah_attr.src_path_bits = 0; - qp_attr.ah_attr.port_num = port; - - dapl_dbg_log (DAPL_DBG_TYPE_EP, - " modify_qp_RTR: qpn %x lid %x port %x, rq_psn %x\n", - qpn,lid,port,ntohl(qp_attr.rq_psn) ); - break; - - } - case IBV_QPS_RTS: - { - mask |= IBV_QP_TIMEOUT | - IBV_QP_RETRY_CNT | - IBV_QP_RNR_RETRY | - IBV_QP_SQ_PSN | - IBV_QP_MAX_QP_RD_ATOMIC; - qp_attr.qp_state = IBV_QPS_RTS; - qp_attr.timeout = 14; - qp_attr.retry_cnt = 7; - qp_attr.rnr_retry = 7; - qp_attr.sq_psn = 1; - qp_attr.max_rd_atomic = IB_TARGET_MAX; - dapl_dbg_log (DAPL_DBG_TYPE_EP, - " modify_qp_RTS: psn %x or %x\n", - ntohl(qp_attr.sq_psn), qp_attr.max_rd_atomic ); - break; - } - case IBV_QPS_INIT: + case IBV_QPS_INIT: { DAPL_IA *ia_ptr; DAPL_EP *ep_ptr; + /* need to find way back to port num */ ep_ptr = (DAPL_EP*)qp_handle->qp_context; if (ep_ptr) ia_ptr = ep_ptr->header.owner_ia; else - break; + return(dapl_convert_errno(EINVAL," qp_CTX")); - mask |= IBV_QP_PKEY_INDEX | - IBV_QP_PORT | - IBV_QP_ACCESS_FLAGS; + /* + * Set qp attributes by hand for INIT state. Allows + * consumers to pre-post receives, per uDAPL + * specification, before IB has path record info + * with connect request processing + */ + mask = IBV_QP_STATE | IBV_QP_PKEY_INDEX | + IBV_QP_PORT | IBV_QP_ACCESS_FLAGS; - qp_attr.pkey_index = 0; - qp_attr.port_num = ia_ptr->hca_ptr->port_num; - qp_attr.qp_access_flags = + attr.pkey_index = 0; + attr.port_num = ia_ptr->hca_ptr->port_num; + attr.qp_access_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_ATOMIC; - dapl_dbg_log (DAPL_DBG_TYPE_EP, - " modify_qp_INIT: pi %x port %x acc %x\n", - qp_attr.pkey_index, qp_attr.port_num, - qp_attr.qp_access_flags ); + ep_ptr->qp_state = IB_QP_STATE_INIT; break; } + case IBV_QPS_RTR: + if (!conn) + return(dapl_convert_errno(EINVAL," qp_RTR")); + /* + * Get pkey_index from CM, move from INIT to INIT + * to update index. The initial value was set by hand + * to allow consumers to pre-post receives. + */ + attr.qp_state = IBV_QPS_INIT; + + /* get pkey_index from CM, move from INIT to INIT */ + if (ib_cm_init_qp_attr(conn->cm_id, &attr, &mask)) + return(dapl_convert_errno(errno," qp_cINIT")); + + mask = IBV_QP_PKEY_INDEX; + if (ibv_modify_qp(qp_handle, &attr, mask)) + return(dapl_convert_errno(errno," reINIT")); + + /* get qp attributes from CM, move to RTR */ + attr.qp_state = IBV_QPS_RTR; + if (ib_cm_init_qp_attr(conn->cm_id, &attr, &mask)) + return(dapl_convert_errno(errno," qp_cRTR")); + + attr.path_mtu = IBV_MTU_1024; + attr.rq_psn = qp_handle->qp_num; + break; + + case IBV_QPS_RTS: + if (!conn) + return(dapl_convert_errno(EINVAL," qp_RTS")); + + /* get qp attributes from CM, move to RTS */ + if (ib_cm_init_qp_attr(conn->cm_id, &attr, &mask)) + return(dapl_convert_errno(errno," qp_cRTS")); + + break; + default: + mask = IBV_QP_STATE; break; - } - - if (ibv_modify_qp(qp_handle, &qp_attr, mask)) - return(dapl_convert_errno(errno,"modify_qp_state")); + if (ibv_modify_qp(qp_handle, &attr, mask)) + return(dapl_convert_errno(errno," modify_qp")); + return DAT_SUCCESS; } diff --git a/dapl/openib/dapl_ib_util.c b/dapl/openib/dapl_ib_util.c index 2bcccf3..a1cbddf 100644 --- a/dapl/openib/dapl_ib_util.c +++ b/dapl/openib/dapl_ib_util.c @@ -56,6 +56,7 @@ static const char rcsid[] = "$Id: $"; #include #include #include +#include int g_dapl_loopback_connection = 0; int g_ib_destroy = 0; @@ -85,26 +86,49 @@ int dapli_get_hca_addr( struct dapl_hca *hca_ptr ) at_rec.retries = 0; /* call with async_comp until the sync version works */ - status = ib_at_ips_by_gid(&hca_ptr->ib_trans.gid, &ipv4_addr->sin_addr.s_addr, 1, + status = ib_at_ips_by_gid(&hca_ptr->ib_trans.gid, + &ipv4_addr->sin_addr.s_addr, 1, &at_comp, &at_rec.req_id); if (status < 0) { dapl_dbg_log (DAPL_DBG_TYPE_ERR, - " get_hca_addr: ERR ips_by_gid %d %s \n", + " ips_by_gid: ERR ips_by_gid %d %s \n", status, strerror(errno)); return 1; } dapl_dbg_log (DAPL_DBG_TYPE_UTIL, - " get_hca_addr: ips_by_gid ret %d at_rec %p -> id %lld\n", + " ips_by_gid: RET %d at_rec %p -> id %lld\n", status, &at_rec, at_rec.req_id ); if (status > 0) { dapli_ip_comp_handler(at_rec.req_id, (void*)&at_rec, status); } else { - dat_status = dapl_os_wait_object_wait(&hca_ptr->ib_trans.wait_object,500000); - if (dat_status != DAT_SUCCESS) - ib_at_cancel(at_rec.req_id); + /* limit the resolution and cancel times */ + dat_status = dapl_os_wait_object_wait( + &hca_ptr->ib_trans.wait_object, + 500000); + if (dat_status != DAT_SUCCESS) { + dapl_dbg_log( + DAPL_DBG_TYPE_UTIL, + " ips_by_gid: REQ TIMEOUT, cancel %lld\n", + at_rec.req_id); + + /* + * FIX: indeterministic + * AT may or may not provide -EINTR event + */ + ib_at_cancel(at_rec.req_id); + + if (dapl_os_wait_object_wait( + &hca_ptr->ib_trans.wait_object, + 500000) != DAT_SUCCESS) + { + dapl_dbg_log(DAPL_DBG_TYPE_ERR, + " ips_by_gid: cancel %lld failed\n", + at_rec.req_id); + } + } } if (!ipv4_addr->sin_addr.s_addr) @@ -130,6 +154,7 @@ int dapli_get_hca_addr( struct dapl_hca *hca_ptr ) */ int32_t dapls_ib_init (void) { + long opts; dapl_dbg_log (DAPL_DBG_TYPE_UTIL, " dapl_ib_init: \n" ); /* initialize hca_list lock */ @@ -142,6 +167,11 @@ int32_t dapls_ib_init (void) if (pipe(g_ib_pipe)) return 1; + /* set AT fd to non-blocking */ + opts = fcntl(ib_at_get_fd(), F_GETFL); + if (fcntl(ib_at_get_fd(), F_SETFL, opts | O_NONBLOCK) < 0) + return 1; + if (dapli_ib_thread_init()) return 1; @@ -177,6 +207,8 @@ DAT_RETURN dapls_ib_open_hca ( IN DAPL_HCA *hca_ptr) { struct dlist *dev_list; + long opts; + int i; dapl_dbg_log (DAPL_DBG_TYPE_UTIL, " open_hca: %s - %p\n", hca_name, hca_ptr ); @@ -227,10 +259,10 @@ DAT_RETURN dapls_ib_open_hca ( (unsigned long long)bswap_64(hca_ptr->ib_trans.gid.global.subnet_prefix), (unsigned long long)bswap_64(hca_ptr->ib_trans.gid.global.interface_id) ); - /* get the IP address of the device */ + /* get the IP address of the device using GID */ if (dapli_get_hca_addr(hca_ptr)) { dapl_dbg_log (DAPL_DBG_TYPE_ERR, - " open_hca: IB get ADDR failed for %s\n", + " open_hca: ERR ib_at_ips_by_gid for %s\n", ibv_get_device_name(hca_ptr->ib_trans.ib_dev) ); goto bail; } @@ -238,6 +270,34 @@ DAT_RETURN dapls_ib_open_hca ( /* initialize hca wait object for uAT event */ dapl_os_wait_object_init(&hca_ptr->ib_trans.wait_object); + /* set event FD's to non-blocking */ + opts = fcntl(hca_ptr->ib_hca_handle->async_fd, F_GETFL); /* uASYNC */ + if (opts < 0 || fcntl(hca_ptr->ib_hca_handle->async_fd, + F_SETFL, opts | O_NONBLOCK) < 0) { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + " open_hca: ERR with async FD\n" ); + goto bail; + } + for (i=0;iib_hca_handle->num_comp;i++) { /* uCQ */ + opts = fcntl(hca_ptr->ib_hca_handle->cq_fd[i], F_GETFL); + if (opts < 0 || fcntl(hca_ptr->ib_hca_handle->async_fd, + F_SETFL, opts | O_NONBLOCK) < 0) { + dapl_dbg_log(DAPL_DBG_TYPE_ERR, + " open_hca: ERR with CQ FD\n"); + goto bail; + } + } + + /* Get CM device handle for events, and set to non-blocking */ + hca_ptr->ib_trans.ib_cm = ib_cm_get_device(hca_ptr->ib_hca_handle); + opts = fcntl(hca_ptr->ib_trans.ib_cm->fd, F_GETFL); /* uCM */ + if (opts < 0 || fcntl(hca_ptr->ib_trans.ib_cm->fd, + F_SETFL, opts | O_NONBLOCK) < 0) { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + " open_hca: ERR with CM FD\n" ); + goto bail; + } + /* * Put new hca_transport on list for async and CQ event processing * Wakeup work thread to add to polling list @@ -509,20 +569,34 @@ int dapli_ib_thread_init(void) void dapli_ib_thread_destroy(void) { + int retries = 10; dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread_destroy(%d)\n", getpid()); - /* destroy ib_thread, wait for termination */ + /* + * wait for async thread to terminate. + * pthread_join would be the correct method + * but some applications have some issues + */ + + /* destroy ib_thread, wait for termination, if not already */ + dapl_os_lock( &g_hca_lock ); g_ib_destroy = 1; write(g_ib_pipe[1], "w", sizeof "w"); - while (g_ib_destroy != 2) { + + while ((g_ib_destroy != 2) && (retries--)) { struct timespec sleep, remain; sleep.tv_sec = 0; - sleep.tv_nsec = 10000000; /* 10 ms */ + sleep.tv_nsec = 20000000; /* 20 ms */ dapl_dbg_log(DAPL_DBG_TYPE_UTIL, - " ib_thread_destroy: waiting for ib_thread\n"); + " ib_thread_destroy: waiting for ib_thread\n"); + write(g_ib_pipe[1], "w", sizeof "w"); + dapl_os_unlock( &g_hca_lock ); nanosleep(&sleep, &remain); + dapl_os_lock( &g_hca_lock ); } + dapl_os_unlock( &g_hca_lock ); + dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread_destroy(%d) exit\n",getpid()); } @@ -639,30 +713,26 @@ void dapli_thread(void *arg) struct _ib_hca_transport *hca; int ret,idx,fds; char rbuf[2]; - + dapl_dbg_log (DAPL_DBG_TYPE_UTIL, - " ib_thread(%d,0x%x): ENTER: pipe %d cm %d at %d\n", + " ib_thread(%d,0x%x): ENTER: pipe %d at %d\n", getpid(), g_ib_thread, - g_ib_pipe[0], ib_cm_get_fd(), - ib_at_get_fd()); + g_ib_pipe[0], ib_at_get_fd()); /* Poll across pipe, CM, AT never changes */ dapl_os_lock( &g_hca_lock ); ufds[0].fd = g_ib_pipe[0]; /* pipe */ ufds[0].events = POLLIN; - ufds[1].fd = ib_cm_get_fd(); /* uCM */ + ufds[1].fd = ib_at_get_fd(); /* uAT */ ufds[1].events = POLLIN; - ufds[2].fd = ib_at_get_fd(); /* uAT */ - ufds[2].events = POLLIN; - + while (!g_ib_destroy) { - /* build ufds after pipe, cm, at events */ + /* build ufds after pipe, at events */ ufds[0].revents = 0; ufds[1].revents = 0; - ufds[2].revents = 0; - idx=2; + idx=1; /* Walk HCA list and setup async and CQ events */ if (!dapl_llist_is_empty(&g_hca_list)) @@ -672,6 +742,10 @@ void dapli_thread(void *arg) while(hca) { int i; + ufds[++idx].fd = hca->ib_cm->fd; /* uCM */ + ufds[idx].events = POLLIN; + ufds[idx].revents = 0; + uhca[idx] = hca; ufds[++idx].fd = hca->ib_ctx->async_fd; /* uASYNC */ ufds[idx].events = POLLIN; ufds[idx].revents = 0; @@ -699,10 +773,11 @@ void dapli_thread(void *arg) continue; } - /* check and process CQ and ASYNC events, each open device */ - for(idx=3;idx