From 3788335383b02325e20f35f455d355b061b468bc Mon Sep 17 00:00:00 2001 From: Arlin Davis Date: Tue, 6 Jan 2015 14:01:39 -0800 Subject: [PATCH] cleanup ib/cm attribute management across openib providers Signed-off-by: Arlin Davis --- dapl/openib_cma/dapl_ib_util.h | 13 +-- dapl/openib_cma/device.c | 2 +- dapl/openib_common/dapl_ib_common.h | 19 +++++ dapl/openib_common/dapl_ib_dto.h | 2 +- dapl/openib_common/dapl_mic_common.h | 8 +- dapl/openib_common/qp.c | 40 ++++----- dapl/openib_common/util.c | 118 +++++++++++++-------------- dapl/openib_mcm/cm.c | 2 +- dapl/openib_mcm/dapl_ib_util.h | 17 +--- dapl/openib_mcm/device.c | 20 ++--- dapl/openib_mcm/mix.c | 35 +++++++- dapl/openib_scm/dapl_ib_util.h | 14 +--- dapl/openib_scm/device.c | 18 ++-- dapl/openib_ucm/cm.c | 2 +- dapl/openib_ucm/dapl_ib_util.h | 15 +--- dapl/openib_ucm/device.c | 20 ++--- 16 files changed, 174 insertions(+), 171 deletions(-) diff --git a/dapl/openib_cma/dapl_ib_util.h b/dapl/openib_cma/dapl_ib_util.h index 1f3b63c..e0bcfdf 100755 --- a/dapl/openib_cma/dapl_ib_util.h +++ b/dapl/openib_cma/dapl_ib_util.h @@ -111,18 +111,7 @@ typedef struct _ib_hca_transport struct ibv_context *ib_ctx; struct ibv_device *ib_dev; /* dapls_modify_qp_state */ - uint16_t lid; - uint8_t ack_timer; - uint8_t ack_retry; - uint8_t rnr_timer; - uint8_t rnr_retry; - uint8_t global; - uint8_t hop_limit; - uint8_t tclass; - uint8_t mtu; - uint8_t sl; - uint16_t pkey; - int pkey_idx; + ib_cm_attr_t ib_cm; /* dev attr for QP and CM */ uint64_t guid; char guid_str[32]; ib_named_attr_t na; diff --git a/dapl/openib_cma/device.c b/dapl/openib_cma/device.c index affa704..8cab9de 100644 --- a/dapl/openib_cma/device.c +++ b/dapl/openib_cma/device.c @@ -393,7 +393,7 @@ DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, done: /* set default IB MTU */ - hca_ptr->ib_trans.mtu = dapl_ib_mtu(2048); + hca_ptr->ib_trans.ib_cm.mtu = dapl_ib_mtu(2048); return DAT_SUCCESS; } diff --git a/dapl/openib_common/dapl_ib_common.h b/dapl/openib_common/dapl_ib_common.h index d5b26ec..e3525f2 100644 --- a/dapl/openib_common/dapl_ib_common.h +++ b/dapl/openib_common/dapl_ib_common.h @@ -161,6 +161,25 @@ typedef struct _ib_named_attr } ib_named_attr_t; +typedef struct _ib_cm_attr +{ + uint8_t ack_timer; + uint8_t ack_retry; + uint8_t rnr_timer; + uint8_t rnr_retry; + uint8_t global; + uint8_t hop_limit; + uint8_t tclass; + uint8_t sl; + uint8_t mtu; + uint8_t rd_atom_in; + uint8_t rd_atom_out; + uint8_t pkey_idx; + uint16_t pkey; + uint16_t max_inline; + +} ib_cm_attr_t; + /* CM events */ typedef enum { IB_CME_CONNECTED, diff --git a/dapl/openib_common/dapl_ib_dto.h b/dapl/openib_common/dapl_ib_dto.h index 8801db4..de8fdc9 100644 --- a/dapl/openib_common/dapl_ib_dto.h +++ b/dapl/openib_common/dapl_ib_dto.h @@ -220,7 +220,7 @@ dapls_ib_post_send ( #ifndef _OPENIB_MCM_ /* don't use inline for MCM, optimized for large messages */ /* inline data for send or write ops */ - if ((total_len <= ep_ptr->qp_handle->tp->max_inline_send) && + if ((total_len <= ep_ptr->qp_handle->tp->ib_cm.max_inline) && ((op_type == OP_SEND) || (op_type == OP_RDMA_WRITE))) wr.send_flags |= IBV_SEND_INLINE; #endif diff --git a/dapl/openib_common/dapl_mic_common.h b/dapl/openib_common/dapl_mic_common.h index 4ce09a3..e5c7990 100755 --- a/dapl/openib_common/dapl_mic_common.h +++ b/dapl/openib_common/dapl_mic_common.h @@ -186,11 +186,14 @@ static inline char * mcm_ib_async_str(IN int st) #define HOST_SOCK_DEV 1 /* host to HCA, any socket */ #define MIC_SSOCK_DEV 2 /* MIC to HCA, same socket */ #define MIC_XSOCK_DEV 3 /* MIC to HCA, cross socket */ +#define MIC_FULL_DEV 4 /* MIC to HCA, full proxy offload, no direct verbs */ #define UND_EP(x) ((x)->ep_map < 1 || (x)->ep_map > 4) #define HST_EP(x) ((x)->ep_map == HOST_SOCK_DEV) -#define MXS_EP(x) ((x)->ep_map == MIC_XSOCK_DEV) #define MSS_EP(x) ((x)->ep_map == MIC_SSOCK_DEV) +#define MXS_EP(x) ((x)->ep_map == MIC_XSOCK_DEV) +#define MFO_EP(x) ((x)->ep_map == MIC_FULL_DEV) +#define MXF_EP(x) ((x)->ep_map == MIC_XSOCK_DEV || (x)->ep_map == MIC_FULL_DEV) static inline char * mcm_map_str(IN uint8_t ep_map) { @@ -199,8 +202,9 @@ static inline char * mcm_map_str(IN uint8_t ep_map) "HST", "MSS", "MXS", + "MFO" }; - return ((ep_map < 1 || ep_map > 3) ? "???" : map[ep_map]); + return ((ep_map < 1 || ep_map > 4) ? "???" : map[ep_map]); } /* MCM address, 28 bytes */ diff --git a/dapl/openib_common/qp.c b/dapl/openib_common/qp.c index 16dd5f0..10abfbb 100644 --- a/dapl/openib_common/qp.c +++ b/dapl/openib_common/qp.c @@ -66,7 +66,7 @@ dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr, ib_pd_handle_t ib_pd_handle; ib_srq_handle_t ib_srq_handle = NULL; int ret = EINVAL; - int max_inline = ia_ptr->hca_ptr->ib_trans.max_inline_send; + int max_inline = ia_ptr->hca_ptr->ib_trans.ib_cm.max_inline; struct ibv_qp_init_attr qp_create; #ifdef _OPENIB_CMA_ dp_ib_cm_handle_t conn; @@ -180,7 +180,7 @@ dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr, #endif qp_create.qp_type = IBV_QPT_UD; if (attr->max_message_size > - (128 << ia_ptr->hca_ptr->ib_trans.mtu)) { + (128 << ia_ptr->hca_ptr->ib_trans.ib_cm.mtu)) { goto err; } } @@ -268,7 +268,7 @@ skip_qp: } else { /* NON-MIC: need QPt, in case of shadowed QP's on remote MIC's */ /* Prep for HST -> MXS: xfers via remote PI instead of direct */ - ia_ptr->hca_ptr->ib_trans.max_inline_send = + ia_ptr->hca_ptr->ib_trans.ib_cm.max_inline = DAPL_MAX(sizeof(struct mcm_wr_rx), max_inline); /* create CQ for peer PI, HST->MXS case */ if (mcm_create_pi_cq(ep_ptr->qp_handle, MCM_WRC_QLEN)) @@ -279,7 +279,7 @@ skip_qp: qp_create.cap.max_recv_sge = 1; qp_create.cap.max_send_wr = DAPL_MAX(MCM_WRC_QLEN, attr->max_request_dtos); qp_create.cap.max_send_sge = attr->max_request_iov; - qp_create.cap.max_inline_data = ia_ptr->hca_ptr->ib_trans.max_inline_send; + qp_create.cap.max_inline_data = ia_ptr->hca_ptr->ib_trans.ib_cm.max_inline; ep_ptr->qp_handle->qp2 = ibv_create_qp(ib_pd_handle, &qp_create); if (!ep_ptr->qp_handle->qp2) { @@ -579,7 +579,7 @@ out: #else uint8_t dapls_get_sl(DAPL_HCA *hca_ptr, uint16_t dlid) { - return hca_ptr->ib_trans.sl; + return hca_ptr->ib_trans.ib_cm.sl; } #endif @@ -614,10 +614,10 @@ dapls_modify_qp_state(IN struct ibv_qp *qp_handle, qp_attr.dest_qp_num = ntohl(qpn); qp_attr.rq_psn = 1; - qp_attr.path_mtu = ia_ptr->hca_ptr->ib_trans.mtu; - qp_attr.min_rnr_timer = ia_ptr->hca_ptr->ib_trans.rnr_timer; + qp_attr.path_mtu = ia_ptr->hca_ptr->ib_trans.ib_cm.mtu; + qp_attr.min_rnr_timer = ia_ptr->hca_ptr->ib_trans.ib_cm.rnr_timer; #ifdef _OPENIB_MCM_ - qp_attr.max_dest_rd_atomic = ia_ptr->hca_ptr->ib_trans.rd_atom_in; + qp_attr.max_dest_rd_atomic = ia_ptr->hca_ptr->ib_trans.ib_cm.rd_atom_in; #else qp_attr.max_dest_rd_atomic = ep_ptr->param.ep_attr.max_rdma_read_in; #endif @@ -630,7 +630,7 @@ dapls_modify_qp_state(IN struct ibv_qp *qp_handle, /* address handle. RC and UD */ qp_attr.ah_attr.dlid = ntohs(lid); - if (gid && ia_ptr->hca_ptr->ib_trans.global) { + if (gid && ia_ptr->hca_ptr->ib_trans.ib_cm.global) { dapl_dbg_log(DAPL_DBG_TYPE_EP, " QPS_RTR: GID Subnet 0x" F64x " ID 0x" F64x "\n", (unsigned long long)htonll(gid->global.subnet_prefix), @@ -642,9 +642,9 @@ dapls_modify_qp_state(IN struct ibv_qp *qp_handle, qp_attr.ah_attr.grh.dgid.global.interface_id = gid->global.interface_id; qp_attr.ah_attr.grh.hop_limit = - ia_ptr->hca_ptr->ib_trans.hop_limit; + ia_ptr->hca_ptr->ib_trans.ib_cm.hop_limit; qp_attr.ah_attr.grh.traffic_class = - ia_ptr->hca_ptr->ib_trans.tclass; + ia_ptr->hca_ptr->ib_trans.ib_cm.tclass; } qp_attr.ah_attr.sl = dapls_get_sl(ia_ptr->hca_ptr, lid); qp_attr.ah_attr.src_path_bits = 0; @@ -665,13 +665,13 @@ dapls_modify_qp_state(IN struct ibv_qp *qp_handle, IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC; qp_attr.timeout = - ia_ptr->hca_ptr->ib_trans.ack_timer; + ia_ptr->hca_ptr->ib_trans.ib_cm.ack_timer; qp_attr.retry_cnt = - ia_ptr->hca_ptr->ib_trans.ack_retry; + ia_ptr->hca_ptr->ib_trans.ib_cm.ack_retry; qp_attr.rnr_retry = - ia_ptr->hca_ptr->ib_trans.rnr_retry; + ia_ptr->hca_ptr->ib_trans.ib_cm.rnr_retry; #ifdef _OPENIB_MCM_ - qp_attr.max_rd_atomic = ia_ptr->hca_ptr->ib_trans.rd_atom_out; + qp_attr.max_rd_atomic = ia_ptr->hca_ptr->ib_trans.ib_cm.rd_atom_out; #else qp_attr.max_rd_atomic = ep_ptr->param.ep_attr.max_rdma_read_out; #endif @@ -716,7 +716,7 @@ dapls_modify_qp_state(IN struct ibv_qp *qp_handle, qp_attr.qkey = DAT_UD_QKEY; } - qp_attr.pkey_index = ia_ptr->hca_ptr->ib_trans.pkey_idx; + qp_attr.pkey_index = ia_ptr->hca_ptr->ib_trans.ib_cm.pkey_idx; qp_attr.port_num = ia_ptr->hca_ptr->port_num; dapl_dbg_log(DAPL_DBG_TYPE_EP, @@ -737,7 +737,7 @@ dapls_modify_qp_state(IN struct ibv_qp *qp_handle, " modify_qp_state: ERR type %d qpn 0x%x gid %p (%d) lid 0x%x" " port %d state %d mtu %d rd %d rnr %d sl %d\n", qp_handle->qp_type, ntohl(qpn), gid, - ia_ptr->hca_ptr->ib_trans.global, + ia_ptr->hca_ptr->ib_trans.ib_cm.global, ntohs(lid), ia_ptr->hca_ptr->port_num, ep_ptr->qp_state, qp_attr.path_mtu, qp_attr.max_dest_rd_atomic, @@ -756,7 +756,7 @@ dapls_modify_qp_ud(IN DAPL_HCA *hca, IN struct ibv_qp *qp) /* modify QP, setup and prepost buffers */ dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr)); qp_attr.qp_state = IBV_QPS_INIT; - qp_attr.pkey_index = hca->ib_trans.pkey_idx; + qp_attr.pkey_index = hca->ib_trans.ib_cm.pkey_idx; qp_attr.port_num = hca->port_num; #ifdef _OPENIB_MCM_ qp_attr.qkey = DAT_MCM_UD_QKEY; /* MCM gets different key */ @@ -820,8 +820,8 @@ dapls_create_ah(IN DAPL_HCA *hca, ntohll(gid->global.subnet_prefix); qp_attr.ah_attr.grh.dgid.global.interface_id = ntohll(gid->global.interface_id); - qp_attr.ah_attr.grh.hop_limit = hca->ib_trans.hop_limit; - qp_attr.ah_attr.grh.traffic_class = hca->ib_trans.tclass; + qp_attr.ah_attr.grh.hop_limit = hca->ib_trans.ib_cm.hop_limit; + qp_attr.ah_attr.grh.traffic_class = hca->ib_trans.ib_cm.tclass; } qp_attr.ah_attr.sl = dapls_get_sl(hca, lid); qp_attr.ah_attr.src_path_bits = 0; diff --git a/dapl/openib_common/util.c b/dapl/openib_common/util.c index d65c172..22b63c8 100644 --- a/dapl/openib_common/util.c +++ b/dapl/openib_common/util.c @@ -367,6 +367,7 @@ DAT_RETURN dapls_ib_query_hca(IN DAPL_HCA * hca_ptr, OUT DAT_EP_ATTR * ep_attr, OUT DAT_SOCK_ADDR6 * ip_addr) { + ib_hca_transport_t *tp = &hca_ptr->ib_trans; struct ibv_device_attr dev_attr; struct ibv_port_attr port_attr; @@ -383,6 +384,30 @@ DAT_RETURN dapls_ib_query_hca(IN DAPL_HCA * hca_ptr, if (hca_ptr->ib_hca_handle == NULL) /* no open device, query mode */ return DAT_SUCCESS; +#ifdef _OPENIB_MCM_ + if (tp->self.node) + tp->na.mode = "PROXY"; + else + tp->na.mode = "DIRECT"; + + tp->na.read = "FALSE"; + sprintf(tp->ver_str, "%d", DAT_MIX_VER); + + if (!tp->pr_attr.cpu_family) { + if (tp->self.node) { + dapli_mix_get_attr(tp, &tp->pr_attr); + } else { + tp->pr_attr.cpu_family = cpuinfo_atoi("cpu family"); + tp->pr_attr.cpu_model = cpuinfo_atoi("model"); + } + sprintf(tp->fam_str, "%d", tp->pr_attr.cpu_family); + sprintf(tp->mod_str, "%d", tp->pr_attr.cpu_model); + } +#else + tp->na.mode = "DIRECT"; + tp->na.read = "TRUE"; +#endif + /* query verbs for this device and port attributes */ if (ibv_query_device(hca_ptr->ib_hca_handle, &dev_attr) || ibv_query_port(hca_ptr->ib_hca_handle, @@ -404,7 +429,7 @@ DAT_RETURN dapls_ib_query_hca(IN DAPL_HCA * hca_ptr, if (ia_attr != NULL) { (void)dapl_os_memzero(ia_attr, sizeof(*ia_attr)); strncpy(ia_attr->adapter_name, - ibv_get_device_name(hca_ptr->ib_trans.ib_dev), + ibv_get_device_name(tp->ib_dev), DAT_NAME_MAX_LENGTH - 1); ia_attr->adapter_name[DAT_NAME_MAX_LENGTH - 1] = '\0'; ia_attr->vendor_name[DAT_NAME_MAX_LENGTH - 1] = '\0'; @@ -454,60 +479,35 @@ DAT_RETURN dapls_ib_query_hca(IN DAPL_HCA * hca_ptr, ia_attr->extension_version = DAT_IB_EXTENSION_VERSION; #endif /* save key device attributes for CM exchange */ - hca_ptr->ib_trans.rd_atom_in = dev_attr.max_qp_rd_atom; - hca_ptr->ib_trans.rd_atom_out = dev_attr.max_qp_init_rd_atom; - - hca_ptr->ib_trans.mtu = DAPL_MIN(port_attr.active_mtu, - hca_ptr->ib_trans.mtu); - hca_ptr->ib_trans.ack_timer = - DAPL_MAX(dev_attr.local_ca_ack_delay, - hca_ptr->ib_trans.ack_timer); + tp->ib_cm.rd_atom_in = dev_attr.max_qp_rd_atom; + tp->ib_cm.rd_atom_out = dev_attr.max_qp_init_rd_atom; + tp->ib_cm.mtu = DAPL_MIN(port_attr.active_mtu, tp->ib_cm.mtu); + tp->ib_cm.ack_timer = DAPL_MAX(dev_attr.local_ca_ack_delay, tp->ib_cm.ack_timer); /* set provider/transport specific named attributes */ - hca_ptr->ib_trans.na.dev = ia_attr->adapter_name; - hca_ptr->ib_trans.na.mtu = dapl_ib_mtu_str(hca_ptr->ib_trans.mtu); - hca_ptr->ib_trans.na.port = dapl_ib_port_str(port_attr.state); - hca_ptr->ib_trans.na.port_num = dapl_ib_port_num_str(hca_ptr->port_num); - if (!hca_ptr->ib_trans.guid) - hca_ptr->ib_trans.guid = ntohll(ibv_get_device_guid(hca_ptr->ib_trans.ib_dev)); - sprintf(hca_ptr->ib_trans.guid_str, "%04x:%04x:%04x:%04x", - (unsigned) (hca_ptr->ib_trans.guid >> 48) & 0xffff, - (unsigned) (hca_ptr->ib_trans.guid >> 32) & 0xffff, - (unsigned) (hca_ptr->ib_trans.guid >> 16) & 0xffff, - (unsigned) (hca_ptr->ib_trans.guid >> 0) & 0xffff); -#ifdef _OPENIB_MCM_ - if (hca_ptr->ib_trans.self.node) - hca_ptr->ib_trans.na.mode = "PROXY"; - else - hca_ptr->ib_trans.na.mode = "DIRECT"; + tp->na.dev = ia_attr->adapter_name; + tp->na.mtu = dapl_ib_mtu_str(tp->ib_cm.mtu); + tp->na.port = dapl_ib_port_str(port_attr.state); + tp->na.port_num = dapl_ib_port_num_str(hca_ptr->port_num); + if (!tp->guid) + tp->guid = ntohll(ibv_get_device_guid(tp->ib_dev)); + + sprintf(tp->guid_str, "%04x:%04x:%04x:%04x", + (unsigned) (tp->guid >> 48) & 0xffff, + (unsigned) (tp->guid >> 32) & 0xffff, + (unsigned) (tp->guid >> 16) & 0xffff, + (unsigned) (tp->guid >> 0) & 0xffff); - hca_ptr->ib_trans.na.read = "FALSE"; - sprintf(hca_ptr->ib_trans.ver_str, "%d", DAT_MIX_VER); - - if (!hca_ptr->ib_trans.pr_attr.cpu_family) { - if (hca_ptr->ib_trans.self.node) { - dapli_mix_get_attr(&hca_ptr->ib_trans, &hca_ptr->ib_trans.pr_attr); - } else { - hca_ptr->ib_trans.pr_attr.cpu_family = cpuinfo_atoi("cpu family"); - hca_ptr->ib_trans.pr_attr.cpu_model = cpuinfo_atoi("model"); - } - sprintf(hca_ptr->ib_trans.fam_str, "%d", hca_ptr->ib_trans.pr_attr.cpu_family); - sprintf(hca_ptr->ib_trans.mod_str, "%d", hca_ptr->ib_trans.pr_attr.cpu_model); - } -#else - hca_ptr->ib_trans.na.mode = "DIRECT"; - hca_ptr->ib_trans.na.read = "TRUE"; -#endif if (hca_ptr->ib_hca_handle->device->transport_type != IBV_TRANSPORT_IB) goto skip_ib; - /* set SL, PKEY values, defaults = 0 */ - hca_ptr->ib_trans.pkey_idx = 0; - hca_ptr->ib_trans.pkey = htons(dapl_os_get_env_val("DAPL_IB_PKEY", 0)); - hca_ptr->ib_trans.sl = dapl_os_get_env_val("DAPL_IB_SL", 0); + /* set SL, PKEY values, defaults = 0 */ + tp->ib_cm.pkey_idx = 0; + tp->ib_cm.pkey = htons(dapl_os_get_env_val("DAPL_IB_PKEY", 0)); + tp->ib_cm.sl = dapl_os_get_env_val("DAPL_IB_SL", 0); /* index provided, get pkey; pkey provided, get index */ - if (hca_ptr->ib_trans.pkey) { + if (tp->ib_cm.pkey) { int i; uint16_t pkey = 0; for (i=0; i < dev_attr.max_pkeys; i++) { if (ibv_query_pkey(hca_ptr->ib_hca_handle, @@ -516,8 +516,8 @@ DAT_RETURN dapls_ib_query_hca(IN DAPL_HCA * hca_ptr, i = dev_attr.max_pkeys; break; } - if (pkey == hca_ptr->ib_trans.pkey) { - hca_ptr->ib_trans.pkey_idx = i; + if (pkey == tp->ib_cm.pkey) { + tp->ib_cm.pkey_idx = i; break; } } @@ -525,7 +525,7 @@ DAT_RETURN dapls_ib_query_hca(IN DAPL_HCA * hca_ptr, dapl_log(DAPL_DBG_TYPE_ERR, " ERR: new pkey(0x%x), query (%s)" " err or key !found, using default pkey_idx=0\n", - ntohs(hca_ptr->ib_trans.pkey), strerror(errno)); + ntohs(tp->ib_cm.pkey), strerror(errno)); } } skip_ib: @@ -534,22 +534,22 @@ skip_ib: #ifndef _OPENIB_CMA_ if (port_attr.link_layer != IBV_LINK_LAYER_INFINIBAND && port_attr.link_layer != IBV_LINK_LAYER_UNSPECIFIED) - hca_ptr->ib_trans.global = 1; + tp->ib_cm.global = 1; dapl_log(DAPL_DBG_TYPE_UTIL, " query_hca: port.link_layer = 0x%x, global = %d\n", - port_attr.link_layer, hca_ptr->ib_trans.global); + port_attr.link_layer, tp->ib_cm.global); #endif #endif #ifdef _WIN32 #ifndef _OPENIB_CMA_ if (port_attr.transport != IBV_TRANSPORT_IB) - hca_ptr->ib_trans.global = 1; + tp->ib_cm.global = 1; dapl_log(DAPL_DBG_TYPE_UTIL, " query_hca: port.transport %d ib_trans.global %d\n", - port_attr.transport, hca_ptr->ib_trans.global); + port_attr.transport, tp->ib_cm.global); #endif #endif @@ -560,10 +560,8 @@ skip_ib: ia_attr->hardware_version_minor, ia_attr->max_eps, ia_attr->max_dto_per_ep, ia_attr->max_evds, ia_attr->max_evd_qlen, - 128 << hca_ptr->ib_trans.mtu, - ntohs(hca_ptr->ib_trans.pkey), - hca_ptr->ib_trans.pkey_idx, - hca_ptr->ib_trans.sl, hca_ptr->ib_trans.global); + 128 << tp->ib_cm.mtu, ntohs(tp->ib_cm.pkey), + tp->ib_cm.pkey_idx, tp->ib_cm.sl, tp->ib_cm.global); dapl_log(DAPL_DBG_TYPE_UTIL, " query_hca: msg %llu rdma %llu iov %d lmr %d rmr %d" @@ -571,7 +569,7 @@ skip_ib: ia_attr->max_message_size, ia_attr->max_rdma_size, ia_attr->max_iov_segments_per_dto, ia_attr->max_lmrs, ia_attr->max_rmrs, - hca_ptr->ib_trans.ack_timer, + tp->ib_cm.ack_timer, ia_attr->max_lmr_block_size, ia_attr->ia_address_ptr); } @@ -592,7 +590,7 @@ skip_ib: " query_hca: MAX msg %llu mtu %d qsz %d iov %d" " rdma i%d,o%d\n", ep_attr->max_message_size, - 128 << hca_ptr->ib_trans.mtu, + 128 << tp->ib_cm.mtu, ep_attr->max_recv_dtos, ep_attr->max_recv_iov, ep_attr->max_rdma_read_in, diff --git a/dapl/openib_mcm/cm.c b/dapl/openib_mcm/cm.c index 204954b..fc623cc 100644 --- a/dapl/openib_mcm/cm.c +++ b/dapl/openib_mcm/cm.c @@ -642,7 +642,7 @@ static int mcm_send(ib_hca_transport_t *tp, dat_mcm_msg_t *msg, DAT_PVOID p_data wr.opcode = IBV_WR_SEND; wr.wr_id = (unsigned long)tp->s_hd; wr.send_flags = (wr.wr_id % tp->burst) ? 0 : IBV_SEND_SIGNALED; - if (len <= tp->max_inline_send) + if (len <= tp->ib_cm.max_inline) wr.send_flags |= IBV_SEND_INLINE; sge.length = len; diff --git a/dapl/openib_mcm/dapl_ib_util.h b/dapl/openib_mcm/dapl_ib_util.h index ab4d658..2d3e1bb 100644 --- a/dapl/openib_mcm/dapl_ib_util.h +++ b/dapl/openib_mcm/dapl_ib_util.h @@ -95,22 +95,7 @@ typedef struct _ib_hca_transport ib_async_qp_handler_t async_qp_error; struct dat_mcm_addr addr; /* lid, port, qp_num, gid */ struct dapl_thread_signal signal; - /* dat_mix_dev_attr_t */ - uint8_t ack_timer; - uint8_t ack_retry; - uint8_t rnr_timer; - uint8_t rnr_retry; - uint8_t global; - uint8_t hop_limit; - uint8_t tclass; - uint8_t sl; - uint8_t mtu; - uint8_t rd_atom_in; - uint8_t rd_atom_out; - uint8_t pkey_idx; - uint16_t pkey; - uint16_t max_inline_send; - /* dat_mix_dev_attr_t */ + ib_cm_attr_t ib_cm; /* dev attr for QP and CM */ int cqe; int qpe; int burst; diff --git a/dapl/openib_mcm/device.c b/dapl/openib_mcm/device.c index 9fdbe0b..9d529a6 100644 --- a/dapl/openib_mcm/device.c +++ b/dapl/openib_mcm/device.c @@ -255,23 +255,23 @@ DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, } /* set RC tunables via enviroment or default */ - hca_ptr->ib_trans.max_inline_send = + hca_ptr->ib_trans.ib_cm.max_inline = dapl_os_get_env_val("DAPL_MAX_INLINE", INLINE_SEND_IB_DEFAULT); - hca_ptr->ib_trans.ack_retry = + hca_ptr->ib_trans.ib_cm.ack_retry = dapl_os_get_env_val("DAPL_ACK_RETRY", DCM_ACK_RETRY); - hca_ptr->ib_trans.ack_timer = + hca_ptr->ib_trans.ib_cm.ack_timer = dapl_os_get_env_val("DAPL_ACK_TIMER", DCM_ACK_TIMER); - hca_ptr->ib_trans.rnr_retry = + hca_ptr->ib_trans.ib_cm.rnr_retry = dapl_os_get_env_val("DAPL_RNR_RETRY", DCM_RNR_RETRY); - hca_ptr->ib_trans.rnr_timer = + hca_ptr->ib_trans.ib_cm.rnr_timer = dapl_os_get_env_val("DAPL_RNR_TIMER", DCM_RNR_TIMER); - hca_ptr->ib_trans.global = + hca_ptr->ib_trans.ib_cm.global = dapl_os_get_env_val("DAPL_GLOBAL_ROUTING", DCM_GLOBAL); - hca_ptr->ib_trans.hop_limit = + hca_ptr->ib_trans.ib_cm.hop_limit = dapl_os_get_env_val("DAPL_HOP_LIMIT", DCM_HOP_LIMIT); - hca_ptr->ib_trans.tclass = + hca_ptr->ib_trans.ib_cm.tclass = dapl_os_get_env_val("DAPL_TCLASS", DCM_TCLASS); - hca_ptr->ib_trans.mtu = + hca_ptr->ib_trans.ib_cm.mtu = dapl_ib_mtu(dapl_os_get_env_val("DAPL_IB_MTU", DCM_IB_MTU)); if (dapli_mix_open(&hca_ptr->ib_trans, hca_name, @@ -557,7 +557,7 @@ static int mcm_service_create(IN DAPL_HCA *hca) qp_create.recv_cq = tp->rcq; qp_create.cap.max_send_wr = qp_create.cap.max_recv_wr = tp->qpe; qp_create.cap.max_send_sge = qp_create.cap.max_recv_sge = 1; - qp_create.cap.max_inline_data = tp->max_inline_send; + qp_create.cap.max_inline_data = tp->ib_cm.max_inline; qp_create.qp_context = (void *)hca; tp->qp = ibv_create_qp(tp->pd, &qp_create); diff --git a/dapl/openib_mcm/mix.c b/dapl/openib_mcm/mix.c index d518b0c..a97dfe5 100644 --- a/dapl/openib_mcm/mix.c +++ b/dapl/openib_mcm/mix.c @@ -143,7 +143,22 @@ int dapli_mix_open(ib_hca_transport_t *tp, char *name, int port, int query_only) msg.hdr.req_id = dapl_os_getpid(); msg.port = port; strcpy((char*)&msg.name, name); - memcpy(&msg.dev_attr, (void*)&tp->ack_timer, sizeof(dat_mix_dev_attr_t)); + + /* send any overridden attributes to proxy */ + msg.dev_attr.ack_timer = tp->ib_cm.ack_timer; + msg.dev_attr.ack_retry = tp->ib_cm.ack_retry; + msg.dev_attr.rnr_timer = tp->ib_cm.rnr_timer; + msg.dev_attr.rnr_retry = tp->ib_cm.rnr_retry ; + msg.dev_attr.global = tp->ib_cm.global; + msg.dev_attr.hop_limit = tp->ib_cm.hop_limit; + msg.dev_attr.tclass = tp->ib_cm.tclass; + msg.dev_attr.sl = tp->ib_cm.sl; + msg.dev_attr.mtu = tp->ib_cm.mtu; + msg.dev_attr.rd_atom_in = tp->ib_cm.rd_atom_in; + msg.dev_attr.rd_atom_out = tp->ib_cm.rd_atom_out; + msg.dev_attr.pkey_idx = tp->ib_cm.pkey_idx; + msg.dev_attr.pkey = tp->ib_cm.pkey; + msg.dev_attr.max_inline = tp->ib_cm.max_inline; len = sizeof(dat_mix_open_t); ret = scif_send(tp->scif_ep, &msg, len, SCIF_SEND_BLOCK); @@ -174,7 +189,25 @@ int dapli_mix_open(ib_hca_transport_t *tp, char *name, int port, int query_only) } /* save address to transport object, keeps IA queries local */ memcpy((void*)&tp->addr, (void*)&msg.dev_addr, sizeof(dat_mcm_addr_t)); + + /* save actual attributes and device ID */ + tp->ib_cm.ack_timer = msg.dev_attr.ack_timer; + tp->ib_cm.ack_retry = msg.dev_attr.ack_retry; + tp->ib_cm.rnr_timer = msg.dev_attr.rnr_timer; + tp->ib_cm.rnr_retry = msg.dev_attr.rnr_retry; + tp->ib_cm.global = msg.dev_attr.global; + tp->ib_cm.hop_limit = msg.dev_attr.hop_limit; + tp->ib_cm.tclass = msg.dev_attr.tclass; + tp->ib_cm.sl = msg.dev_attr.sl; + tp->ib_cm.mtu = msg.dev_attr.mtu; + tp->ib_cm.rd_atom_in = msg.dev_attr.rd_atom_in; + tp->ib_cm.rd_atom_out = msg.dev_attr.rd_atom_out; + tp->ib_cm.pkey_idx = msg.dev_attr.pkey_idx; + tp->ib_cm.pkey = msg.dev_attr.pkey; + tp->ib_cm.max_inline = msg.dev_attr.max_inline; + tp->dev_id = msg.hdr.req_id; + dapl_log(DAPL_DBG_TYPE_EXTENSION, " mix_open reply (msg %p, ln %d) EPs %d %d %d - dev_id %d\n", &msg, len, tp->scif_ep, tp->scif_ev_ep, diff --git a/dapl/openib_scm/dapl_ib_util.h b/dapl/openib_scm/dapl_ib_util.h index 0f48c44..b03018b 100644 --- a/dapl/openib_scm/dapl_ib_util.h +++ b/dapl/openib_scm/dapl_ib_util.h @@ -95,21 +95,9 @@ typedef struct _ib_hca_transport ib_async_cq_handler_t async_cq_error; ib_async_dto_handler_t async_cq; ib_async_qp_handler_t async_qp_error; - int rd_atom_in; - int rd_atom_out; uint16_t lid; - uint8_t ack_timer; - uint8_t ack_retry; - uint8_t rnr_timer; - uint8_t rnr_retry; - uint8_t global; - uint8_t hop_limit; - uint8_t tclass; - uint8_t mtu; + ib_cm_attr_t ib_cm; /* dev attr for QP and CM */ DAPL_SOCKET scm[2]; - uint8_t sl; - uint16_t pkey; - int pkey_idx; uint64_t guid; char guid_str[32]; ib_named_attr_t na; diff --git a/dapl/openib_scm/device.c b/dapl/openib_scm/device.c index aaea686..8d6cec1 100644 --- a/dapl/openib_scm/device.c +++ b/dapl/openib_scm/device.c @@ -352,23 +352,23 @@ DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, } /* set RC tunables via enviroment or default */ - hca_ptr->ib_trans.max_inline_send = + hca_ptr->ib_trans.ib_cm.max_inline = dapl_os_get_env_val("DAPL_MAX_INLINE", INLINE_SEND_DEFAULT); - hca_ptr->ib_trans.ack_retry = + hca_ptr->ib_trans.ib_cm.ack_retry = dapl_os_get_env_val("DAPL_ACK_RETRY", SCM_ACK_RETRY); - hca_ptr->ib_trans.ack_timer = + hca_ptr->ib_trans.ib_cm.ack_timer = dapl_os_get_env_val("DAPL_ACK_TIMER", SCM_ACK_TIMER); - hca_ptr->ib_trans.rnr_retry = + hca_ptr->ib_trans.ib_cm.rnr_retry = dapl_os_get_env_val("DAPL_RNR_RETRY", SCM_RNR_RETRY); - hca_ptr->ib_trans.rnr_timer = + hca_ptr->ib_trans.ib_cm.rnr_timer = dapl_os_get_env_val("DAPL_RNR_TIMER", SCM_RNR_TIMER); - hca_ptr->ib_trans.global = + hca_ptr->ib_trans.ib_cm.global = dapl_os_get_env_val("DAPL_GLOBAL_ROUTING", SCM_GLOBAL); - hca_ptr->ib_trans.hop_limit = + hca_ptr->ib_trans.ib_cm.hop_limit = dapl_os_get_env_val("DAPL_HOP_LIMIT", SCM_HOP_LIMIT); - hca_ptr->ib_trans.tclass = + hca_ptr->ib_trans.ib_cm.tclass = dapl_os_get_env_val("DAPL_TCLASS", SCM_TCLASS); - hca_ptr->ib_trans.mtu = + hca_ptr->ib_trans.ib_cm.mtu = dapl_ib_mtu(dapl_os_get_env_val("DAPL_IB_MTU", SCM_IB_MTU)); if (flags & DAPL_OPEN_QUERY) diff --git a/dapl/openib_ucm/cm.c b/dapl/openib_ucm/cm.c index 04d5eac..f107b02 100644 --- a/dapl/openib_ucm/cm.c +++ b/dapl/openib_ucm/cm.c @@ -665,7 +665,7 @@ static int ucm_send(ib_hca_transport_t *tp, ib_cm_msg_t *msg, DAT_PVOID p_data, wr.opcode = IBV_WR_SEND; wr.wr_id = (unsigned long)tp->s_hd; wr.send_flags = (wr.wr_id % tp->burst) ? 0 : IBV_SEND_SIGNALED; - if (len <= tp->max_inline_send) + if (len <= tp->ib_cm.max_inline) wr.send_flags |= IBV_SEND_INLINE; sge.length = len + p_size; diff --git a/dapl/openib_ucm/dapl_ib_util.h b/dapl/openib_ucm/dapl_ib_util.h index a5b9c52..c3a74de 100644 --- a/dapl/openib_ucm/dapl_ib_util.h +++ b/dapl/openib_ucm/dapl_ib_util.h @@ -82,17 +82,7 @@ typedef struct _ib_hca_transport ib_async_dto_handler_t async_cq; ib_async_qp_handler_t async_qp_error; union dcm_addr addr; /* lid, port, qp_num, gid */ - int max_inline_send; - int rd_atom_in; - int rd_atom_out; - uint8_t ack_timer; - uint8_t ack_retry; - uint8_t rnr_timer; - uint8_t rnr_retry; - uint8_t global; - uint8_t hop_limit; - uint8_t tclass; - uint8_t mtu; + ib_cm_attr_t ib_cm; /* dev attr for QP and CM */ struct dapl_thread_signal signal; int cqe; int qpe; @@ -118,9 +108,6 @@ typedef struct _ib_hca_transport DAPL_OS_LOCK plock; uint16_t lid; uint8_t *sid; /* Sevice IDs, port space, bitarray? */ - uint8_t sl; - uint16_t pkey; - int pkey_idx; uint64_t guid; char guid_str[32]; ib_named_attr_t na; diff --git a/dapl/openib_ucm/device.c b/dapl/openib_ucm/device.c index 79796cc..8c27f4d 100644 --- a/dapl/openib_ucm/device.c +++ b/dapl/openib_ucm/device.c @@ -273,23 +273,23 @@ DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, } /* set RC tunables via enviroment or default */ - hca_ptr->ib_trans.max_inline_send = + hca_ptr->ib_trans.ib_cm.max_inline = dapl_os_get_env_val("DAPL_MAX_INLINE", INLINE_SEND_IB_DEFAULT); - hca_ptr->ib_trans.ack_retry = + hca_ptr->ib_trans.ib_cm.ack_retry = dapl_os_get_env_val("DAPL_ACK_RETRY", DCM_ACK_RETRY); - hca_ptr->ib_trans.ack_timer = + hca_ptr->ib_trans.ib_cm.ack_timer = dapl_os_get_env_val("DAPL_ACK_TIMER", DCM_ACK_TIMER); - hca_ptr->ib_trans.rnr_retry = + hca_ptr->ib_trans.ib_cm.rnr_retry = dapl_os_get_env_val("DAPL_RNR_RETRY", DCM_RNR_RETRY); - hca_ptr->ib_trans.rnr_timer = + hca_ptr->ib_trans.ib_cm.rnr_timer = dapl_os_get_env_val("DAPL_RNR_TIMER", DCM_RNR_TIMER); - hca_ptr->ib_trans.global = + hca_ptr->ib_trans.ib_cm.global = dapl_os_get_env_val("DAPL_GLOBAL_ROUTING", DCM_GLOBAL); - hca_ptr->ib_trans.hop_limit = + hca_ptr->ib_trans.ib_cm.hop_limit = dapl_os_get_env_val("DAPL_HOP_LIMIT", DCM_HOP_LIMIT); - hca_ptr->ib_trans.tclass = + hca_ptr->ib_trans.ib_cm.tclass = dapl_os_get_env_val("DAPL_TCLASS", DCM_TCLASS); - hca_ptr->ib_trans.mtu = + hca_ptr->ib_trans.ib_cm.mtu = dapl_ib_mtu(dapl_os_get_env_val("DAPL_IB_MTU", DCM_IB_MTU)); if (flags & DAPL_OPEN_QUERY) @@ -545,7 +545,7 @@ static int ucm_service_create(IN DAPL_HCA *hca) qp_create.recv_cq = tp->rcq; qp_create.cap.max_send_wr = qp_create.cap.max_recv_wr = tp->qpe; qp_create.cap.max_send_sge = qp_create.cap.max_recv_sge = 1; - qp_create.cap.max_inline_data = tp->max_inline_send; + qp_create.cap.max_inline_data = tp->ib_cm.max_inline; qp_create.qp_context = (void *)hca; tp->qp = ibv_create_qp(tp->pd, &qp_create); -- 2.46.0