--- /dev/null
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <limits.h>
+#include <arpa/inet.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <malloc.h>
+#include <getopt.h>
+#include <time.h>
+#include <errno.h>
+
+#include "multicast_resources.h"
+#include "l2w.h"
+#include <infiniband/umad.h>
+
+
+
+/******************************************************************************
+ * prepare_mcast_mad
+ ******************************************************************************/
+static void prepare_mcast_mad(uint8_t method,
+ struct mcast_parameters *params,
+ struct sa_mad_packet_t *samad_packet) {
+
+ uint8_t *ptr;
+ uint64_t comp_mask;
+
+ memset(samad_packet,0,sizeof(*samad_packet));
+
+ /* prepare the MAD header. according to Table 145 in IB spec 1.2.1 */
+ ptr = samad_packet->mad_header_buf;
+ ptr[0] = 0x01; /* BaseVersion */
+ ptr[1] = MANAGMENT_CLASS_SUBN_ADM; /* MgmtClass */
+ ptr[2] = 0x02; /* ClassVersion */
+ ptr[3] = INSERTF(ptr[3], 0, method, 0, 7); /* Method */
+ (*(uint64_t *)(ptr + 8)) = htonll((uint64_t)DEF_TRANS_ID); /* TransactionID */
+ (*(uint16_t *)(ptr + 16)) = htons(SUBN_ADM_ATTR_MC_MEMBER_RECORD); /* AttributeID */
+
+ ptr = samad_packet->SubnetAdminData;
+
+ memcpy(&ptr[0],params->mgid.raw, 16);
+ memcpy(&ptr[16],params->port_gid.raw, 16);
+
+ (*(uint32_t *)(ptr + 32)) = htonl(DEF_QKEY);
+ (*(uint16_t *)(ptr + 40)) = htons(params->pkey);
+ ptr[39] = DEF_TCLASS;
+ ptr[44] = INSERTF(ptr[44], 4, DEF_SL, 0, 4);
+ ptr[44] = INSERTF(ptr[44], 0, DEF_FLOW_LABLE, 16, 4);
+ ptr[45] = INSERTF(ptr[45], 0, DEF_FLOW_LABLE, 8, 8);
+ ptr[46] = INSERTF(ptr[46], 0, DEF_FLOW_LABLE, 0, 8);
+ ptr[48] = INSERTF(ptr[48], 0, MCMEMBER_JOINSTATE_FULL_MEMBER, 0, 4);
+
+ comp_mask = SUBN_ADM_COMPMASK_MGID | SUBN_ADM_COMPMASK_PORT_GID | SUBN_ADM_COMPMASK_Q_KEY |
+ SUBN_ADM_COMPMASK_P_KEY | SUBN_ADM_COMPMASK_TCLASS | SUBN_ADM_COMPMASK_SL |
+ SUBN_ADM_COMPMASK_FLOW_LABEL | SUBN_ADM_COMPMASK_JOIN_STATE;
+
+ samad_packet->ComponentMask = htonll(comp_mask);
+}
+
+/******************************************************************************
+ * check_mad_status
+ ******************************************************************************/
+static int check_mad_status(struct sa_mad_packet_t *samad_packet) {
+
+ uint8_t *ptr;
+ uint32_t user_trans_id;
+ uint16_t mad_header_status;
+
+ ptr = samad_packet->mad_header_buf;
+
+ // the upper 32 bits of TransactionID were set by the kernel
+ user_trans_id = ntohl(*(uint32_t *)(ptr + 12));
+
+ // check the TransactionID to make sure this is the response
+ // for the join/leave multicast group request we posted
+ if (user_trans_id != DEF_TRANS_ID) {
+ fprintf(stderr, "received a mad with TransactionID 0x%x, when expecting 0x%x\n",
+ (unsigned int)user_trans_id, (unsigned int)DEF_TRANS_ID);;
+ return 1;
+ }
+
+ mad_header_status = 0x0;
+ mad_header_status = INSERTF(mad_header_status, 8, ptr[4], 0, 7);
+ mad_header_status = INSERTF(mad_header_status, 0, ptr[5], 0, 8);
+
+ if (mad_header_status) {
+ fprintf(stderr,"received UMAD with an error: 0x%x\n", mad_header_status);
+ return 1;
+ }
+
+ return 0;
+}
+
+
+/******************************************************************************
+ * get_mlid_from_mad
+ ******************************************************************************/
+static void get_mlid_from_mad(struct sa_mad_packet_t *samad_packet, uint16_t *mlid) {
+
+ uint8_t *ptr;
+
+ ptr = samad_packet->SubnetAdminData;
+ *mlid = ntohs(*(uint16_t *)(ptr + 36));
+}
+
+/******************************************************************************
+ * set_multicast_gid
+ ******************************************************************************/
+void set_multicast_gid(struct mcast_parameters *params) {
+
+ uint8_t mcg_gid[16] = MCG_GID;
+ const char *pstr = params->user_mgid;
+ char *term = NULL;
+ char tmp[20];
+ int i;
+
+ if (params->is_user_mgid) {
+ term = strpbrk(pstr, ":");
+ memcpy(tmp, pstr, term - pstr+1);
+ tmp[term - pstr] = 0;
+ mcg_gid[0] = (unsigned char)strtol(tmp, NULL, 0);
+ for (i = 1; i < 15; ++i) {
+ pstr += term - pstr + 1;
+ term = strpbrk(pstr, ":");
+ memcpy(tmp, pstr, term - pstr+1);
+ tmp[term - pstr] = 0;
+ mcg_gid[i] = (unsigned char)strtol(tmp, NULL, 0);
+ }
+ pstr += term - pstr + 1;
+ strcpy(tmp, pstr);
+ mcg_gid[15] = (unsigned char)strtol(tmp, NULL, 0);
+ }
+
+ memcpy(params->mgid.raw,mcg_gid,16);
+}
+
+/******************************************************************************
+ * join_multicast_group
+ ******************************************************************************/
+int join_multicast_group(subn_adm_method method,struct mcast_parameters *params) {
+
+ int portid = -1;
+ int agentid = -1;
+ void *umad_buff = NULL;
+ void *mad = NULL;
+ int length = MAD_SIZE;
+ int test_result = 0;
+
+ // mlid will be assigned to the new LID after the join
+ if (umad_init() < 0) {
+ fprintf(stderr, "failed to init the UMAD library\n");
+ goto cleanup;
+ }
+ /* use casting to loose the "const char0 *" */
+ portid = umad_open_port((char*)params->ib_devname,params->ib_port);
+ if (portid < 0) {
+ fprintf(stderr,"failed to open UMAD port %d\n",params->ib_port);
+ goto cleanup;
+ }
+
+ agentid = umad_register(portid,MANAGMENT_CLASS_SUBN_ADM, 2, 0, 0);
+ if (agentid < 0) {
+ fprintf(stderr,"failed to register UMAD agent for MADs\n");
+ goto cleanup;
+ }
+
+ umad_buff = umad_alloc(1, umad_size() + MAD_SIZE);
+ if (!umad_buff) {
+ fprintf(stderr, "failed to allocate MAD buffer\n");
+ goto cleanup;
+ }
+
+ mad = umad_get_mad(umad_buff);
+ prepare_mcast_mad(method,params,(struct sa_mad_packet_t *)mad);
+
+ if (umad_set_addr(umad_buff,params->sm_lid,1,params->sm_sl,QP1_WELL_KNOWN_Q_KEY) < 0) {
+ fprintf(stderr, "failed to set the destination address of the SMP\n");
+ goto cleanup;
+ }
+
+ if (umad_send(portid,agentid,umad_buff,MAD_SIZE,100,5) < 0) {
+ fprintf(stderr, "failed to send MAD\n");
+ goto cleanup;
+ }
+
+ if (umad_recv(portid,umad_buff,&length,5000) < 0) {
+ fprintf(stderr, "failed to receive MAD response\n");
+ goto cleanup;
+ }
+
+ if (check_mad_status((struct sa_mad_packet_t*)mad)) {
+ fprintf(stderr, "failed to get mlid from MAD\n");
+ goto cleanup;
+ }
+
+ // "Join multicast group" message was sent
+ if (method == SUBN_ADM_METHOD_SET) {
+ get_mlid_from_mad((struct sa_mad_packet_t*)mad,¶ms->mlid);
+ params->mcast_state |= MCAST_IS_JOINED;
+
+ // "Leave multicast group" message was sent
+ } else {
+ params->mcast_state &= ~MCAST_IS_JOINED;
+ }
+
+cleanup:
+ if (umad_buff)
+ umad_free(umad_buff);
+
+ if (portid >= 0) {
+ if (agentid >= 0) {
+ if (umad_unregister(portid, agentid)) {
+ fprintf(stderr, "failed to deregister UMAD agent for MADs\n");
+ test_result = 1;
+ }
+ }
+
+ if (umad_close_port(portid)) {
+ fprintf(stderr, "failed to close UMAD portid\n");
+ test_result = 1;
+ }
+ }
+
+ return test_result;
+}
+
+/******************************************************************************
+ * End
+ ******************************************************************************/
+
--- /dev/null
+/*
+ * Copyright (c) 2009 Mellanox Technologies Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Author: Ido Shamay <idos@dev.mellanox.co.il>
+ */
+
+
+#ifndef MULTICAST_RESOURCES_H
+#define MULTICAST_RESOURCES_H
+
+ /* Multicast Module for perftest.
+ *
+ * Description :
+ *
+ * This file contains the structures and methods for implementing a multiple
+ * multicast groups in user space enviroment.
+ * The module is in use in "send_bw" and "send_lat" ,but can be used on other
+ * applications and can generate more methods and serve more benchmarks.
+ * The Module uses only the structire defined here , enabling generic use of it.
+ *
+ * Defined Types :
+ *
+ * mcast_parameters - Contains all the parameters needed for this module.
+ * mcast_group - The multicast group entitiy itself.
+ * mcg_qp - Is a QP structure that is attahced to the group.
+ *
+ */
+
+
+/************************************************************************
+ * Macros , Defines and Files included for work. *
+ ************************************************************************/
+
+#include <infiniband/verbs.h>
+#include <infiniband/umad.h>
+#include "..\..\tools\perftests\user\get_clock.h"
+
+
+#define MCG_LID 0xc00f
+#define QPNUM_MCAST 0xffffff
+#define DEF_QKEY 0x11111111
+#define DEF_PKEY_IDX 0
+#define DEF_SL 0
+#define MAX_POLL_ITERATION_TIMEOUT 1000000
+#define MCG_GID {255,1,0,0,0,2,201,133,0,0,0,0,0,0,0,0}
+
+// Definitions section for MADs
+#define SUBN_ADM_ATTR_MC_MEMBER_RECORD 0x38
+#define MANAGMENT_CLASS_SUBN_ADM 0x03 /* Subnet Administration class */
+#define MCMEMBER_JOINSTATE_FULL_MEMBER 0x1
+#define MAD_SIZE 256 /* The size of a MAD is 256 bytes */
+#define QP1_WELL_KNOWN_Q_KEY 0x80010000 /* Q_Key value of QP1 */
+#define DEF_TRANS_ID 0x12345678 /* TransactionID */
+#define DEF_TCLASS 0
+#define DEF_FLOW_LABLE 0
+
+// generate a bit mask S bits width
+#define MASK32(S) ( ((uint32_t) ~0L) >> (32-(S)) )
+
+// generate a bit mask with bits O+S..O set (assumes 32 bit integer).
+#define BITS32(O,S) ( MASK32(S) << (O) )
+
+// extract S bits from (u_int32_t)W with offset O and shifts them O places to the right
+#define EXTRACT32(W,O,S) ( ((W)>>(O)) & MASK32(S) )
+
+// insert S bits with offset O from field F into word W (u_int32_t)
+#define INSERT32(W,F,O,S) (/*(W)=*/ ( ((W) & (~BITS32(O,S)) ) | (((F) & MASK32(S))<<(O)) ))
+
+#ifndef INSERTF
+ #define INSERTF(W,O1,F,O2,S) (INSERT32(W, EXTRACT32(F, O2, S), O1, S) )
+#endif
+
+
+// according to Table 187 in the IB spec 1.2.1
+typedef enum {
+ SUBN_ADM_METHOD_SET = 0x2,
+ SUBN_ADM_METHOD_DELETE = 0x15
+} subn_adm_method;
+
+// Utilities for Umad Usage.
+typedef enum {
+ SUBN_ADM_COMPMASK_MGID = (1ULL << 0),
+ SUBN_ADM_COMPMASK_PORT_GID = (1ULL << 1),
+ SUBN_ADM_COMPMASK_Q_KEY = (1ULL << 2),
+ SUBN_ADM_COMPMASK_P_KEY = (1ULL << 7),
+ SUBN_ADM_COMPMASK_TCLASS = (1ULL << 6),
+ SUBN_ADM_COMPMASK_SL = (1ULL << 12),
+ SUBN_ADM_COMPMASK_FLOW_LABEL = (1ULL << 13),
+ SUBN_ADM_COMPMASK_JOIN_STATE = (1ULL << 16),
+} subn_adm_component_mask;
+
+typedef enum {
+ MCAST_IS_JOINED = 1,
+ MCAST_IS_ATTACHED = (1 << 1)
+} mcast_state;
+
+
+/************************************************************************
+ * Multicast data structures. *
+ ************************************************************************/
+
+// Needed parameters for creating a multiple multicast group entity.
+struct mcast_parameters {
+ int num_qps_on_group;
+ int is_user_mgid;
+ int mcast_state;
+ int ib_port;
+ uint16_t mlid;
+ const char *user_mgid;
+ const char *ib_devname;
+ uint16_t pkey;
+ uint16_t sm_lid;
+ uint8_t sm_sl;
+ union ibv_gid port_gid;
+ union ibv_gid mgid;
+
+};
+
+// according to Table 195 in the IB spec 1.2.1
+struct sa_mad_packet_t {
+ uint8_t mad_header_buf[24];
+ uint8_t rmpp_header_buf[12];
+ uint64_t SM_Key;
+ uint16_t AttributeOffset;
+ uint16_t Reserved1;
+ uint64_t ComponentMask;
+ uint8_t SubnetAdminData[200];
+}__attribute__(packed);
+
+/************************************************************************
+ * Multicast resources methods. *
+ ************************************************************************/
+
+/* set_multicast_gid .
+ *
+ * Description :
+ *
+ * Sets the Multicast GID , and stores it in the "mgid" value of
+ * mcast resourcs. If the user requested for a specific MGID, which
+ * is stored in params->user_mgid (in this case params->is_user_mgid should be 1)
+ * than it will be his MGID, if not the library choose a default one.
+ *
+ * Parameters :
+ *
+ * params - The parameters of the machine
+ * my_dest ,rem_dest - The 2 sides that ends the connection.
+ *
+ * Return Value : 0 upon success. -1 if it fails.
+ */
+void set_multicast_gid(struct mcast_parameters *params);
+
+
+/* ctx_close_connection .
+ *
+ * Description :
+ *
+ * Close the connection between the 2 machines.
+ * It performs an handshake to ensure the 2 sides are there.
+ *
+ * Parameters :
+ *
+ * params - The parameters of the machine
+ * my_dest ,rem_dest - The 2 sides that ends the connection.
+ *
+ * Return Value : 0 upon success. -1 if it fails.
+ */
+int join_multicast_group(subn_adm_method method,struct mcast_parameters *params);
+
+
+#endif /* MULTICAST_RESOURCES_H */
--- /dev/null
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <time.h>
+#include <sys/types.h>
+#include <winsock2.h>
+#include <Winsock2.h>
+#include "perftest_resources.h"
+
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+
+static const char *sideArray[] = {"local", "remote"};
+
+static const char *gidArray[] = {"GID", "MGID"};
+
+static const char *linkArray[] = {"No link", "IB", "ETH"};
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+static int ctx_write_keys(const struct pingpong_dest *my_dest,
+ struct perftest_parameters *params) {
+
+ if (params->gid_index == -1 && !params->use_mcg) {
+
+ char msg[KEY_MSG_SIZE];
+ sprintf(msg,KEY_PRINT_FMT,my_dest->lid,my_dest->out_reads,
+ my_dest->qpn,my_dest->psn, my_dest->rkey, my_dest->vaddr);
+
+ if (send(params->sockfd,msg,sizeof msg,0) != sizeof msg) {
+ perror("client write");
+ fprintf(stderr, "Couldn't send local address\n");
+ return -1;
+ }
+
+ } else {
+
+ char msg[KEY_MSG_SIZE_GID];
+
+ sprintf(msg,KEY_PRINT_FMT_GID, my_dest->lid,my_dest->out_reads,
+ my_dest->qpn,my_dest->psn, my_dest->rkey, my_dest->vaddr,
+ my_dest->gid.raw[0],my_dest->gid.raw[1],
+ my_dest->gid.raw[2],my_dest->gid.raw[3],
+ my_dest->gid.raw[4],my_dest->gid.raw[5],
+ my_dest->gid.raw[6],my_dest->gid.raw[7],
+ my_dest->gid.raw[8],my_dest->gid.raw[9],
+ my_dest->gid.raw[10],my_dest->gid.raw[11],
+ my_dest->gid.raw[12],my_dest->gid.raw[13],
+ my_dest->gid.raw[14],my_dest->gid.raw[15]);
+
+ if (send(params->sockfd, msg, sizeof msg,0) != sizeof msg) {
+ perror("client write");
+ fprintf(stderr, "Couldn't send local address\n");
+ return -1;
+ }
+ }
+ return 0;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+static int ctx_read_keys(struct pingpong_dest *rem_dest,
+ struct perftest_parameters *params) {
+
+ if (params->gid_index == -1 && !params->use_mcg) {
+
+ int parsed;
+ char msg[KEY_MSG_SIZE];
+
+ if (recv(params->sockfd, msg, sizeof msg,0) != sizeof msg) {
+ perror("pp_read_keys");
+ fprintf(stderr, "Couldn't read remote address\n");
+ return -1;
+ }
+
+ parsed = sscanf(msg,KEY_PRINT_FMT,&rem_dest->lid,
+ &rem_dest->out_reads,&rem_dest->qpn,
+ &rem_dest->psn, &rem_dest->rkey,&rem_dest->vaddr);
+
+ if (parsed != 6) {
+ fprintf(stderr, "Couldn't parse line <%.*s>\n",(int)sizeof msg, msg);
+ return -1;
+ }
+
+ } else {
+
+ char msg[KEY_MSG_SIZE_GID];
+ char *pstr = msg, *term;
+ char tmp[20];
+ int i;
+
+ if (recv(params->sockfd, msg, sizeof msg,0) != sizeof msg) {
+ perror("pp_read_keys");
+ fprintf(stderr, "Couldn't read remote address\n");
+ return -1;
+ }
+
+ term = strpbrk(pstr, ":");
+ memcpy(tmp, pstr, term - pstr);
+ tmp[term - pstr] = 0;
+ rem_dest->lid = (int)strtol(tmp, NULL, 16); // LID
+
+ pstr += term - pstr + 1;
+ term = strpbrk(pstr, ":");
+ memcpy(tmp, pstr, term - pstr);
+ tmp[term - pstr] = 0;
+ rem_dest->out_reads = (int)strtol(tmp, NULL, 16); // OUT_READS
+
+ pstr += term - pstr + 1;
+ term = strpbrk(pstr, ":");
+ memcpy(tmp, pstr, term - pstr);
+ tmp[term - pstr] = 0;
+ rem_dest->qpn = (int)strtol(tmp, NULL, 16); // QPN
+
+ pstr += term - pstr + 1;
+ term = strpbrk(pstr, ":");
+ memcpy(tmp, pstr, term - pstr);
+ tmp[term - pstr] = 0;
+ rem_dest->psn = (int)strtol(tmp, NULL, 16); // PSN
+
+ pstr += term - pstr + 1;
+ term = strpbrk(pstr, ":");
+ memcpy(tmp, pstr, term - pstr);
+ tmp[term - pstr] = 0;
+ rem_dest->rkey = (unsigned)strtol(tmp, NULL, 16); // RKEY
+
+ pstr += term - pstr + 1;
+ term = strpbrk(pstr, ":");
+ memcpy(tmp, pstr, term - pstr);
+ tmp[term - pstr] = 0;
+ rem_dest->vaddr = _strtoui64(tmp, NULL, 16); // VA
+
+ for (i = 0; i < 15; ++i) {
+ pstr += term - pstr + 1;
+ term = strpbrk(pstr, ":");
+ memcpy(tmp, pstr, term - pstr);
+ tmp[term - pstr] = 0;
+ rem_dest->gid.raw[i] = (unsigned char)strtol(tmp, NULL, 16);
+ }
+ pstr += term - pstr + 1;
+ strcpy(tmp, pstr);
+ rem_dest->gid.raw[15] = (unsigned char)strtol(tmp, NULL, 16);
+ }
+ return 0;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+int ctx_set_link_layer(struct ibv_context *context,
+ struct perftest_parameters *params) {
+
+ LinkType type;
+ struct ibv_port_attr port_attr;
+
+ if (ibv_query_port(context,params->ib_port,&port_attr)) {
+ fprintf(stderr,"Unable to query port\n");
+ return -1;
+ }
+ //type = (LinkType)port_attr.link_layer;
+ type = IB;
+
+ if (type == UNDETECTED) {
+ fprintf(stderr," Unable to determine link layer \n");
+ return -1;
+ }
+ else {
+ printf(" Link type is %s \n",linkArray[type]);
+ }
+
+ if ((type == ETH || params->use_mcg) && params->gid_index == -1) {
+ params->gid_index = 0;
+ }
+
+ if (params->gid_index > -1 && (params->machine == CLIENT || params->duplex)) {
+ fprintf(stdout," Using gid index %d as source GID\n",params->gid_index);
+ }
+
+ return 0;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+struct ibv_cq* ctx_cq_create(struct ibv_context *context,
+ struct ibv_comp_channel *channel,
+ struct perftest_parameters *param) {
+
+ int cq_depth;
+ struct ibv_cq *curr_cq = NULL;
+
+ if (param->verb == WRITE || param->verb == READ)
+ cq_depth = param->tx_depth*param->num_of_qps;
+
+ else if (param->duplex)
+ cq_depth = param->tx_depth + param->rx_depth*(param->num_of_qps);
+
+ else if (param->machine == CLIENT)
+ cq_depth = param->tx_depth;
+
+ else
+ cq_depth = param->rx_depth*param->num_of_qps;
+
+ curr_cq = ibv_create_cq(context,cq_depth,NULL,channel,0);
+
+ return curr_cq;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+struct ibv_qp* ctx_qp_create(struct ibv_pd *pd,
+ struct ibv_cq *send_cq,
+ struct ibv_cq *recv_cq,
+ struct perftest_parameters *param) {
+
+ struct ibv_qp_init_attr attr;
+ struct ibv_qp* qp = NULL;
+
+ memset(&attr, 0, sizeof(struct ibv_qp_init_attr));
+ attr.send_cq = send_cq;
+ attr.recv_cq = recv_cq;
+ attr.cap.max_send_wr = param->tx_depth;
+ attr.cap.max_recv_wr = param->rx_depth;
+ attr.cap.max_send_sge = MAX_SEND_SGE;
+ attr.cap.max_recv_sge = MAX_RECV_SGE;
+ attr.cap.max_inline_data = param->inline_size;
+
+ switch (param->connection_type) {
+ case RC : attr.qp_type = IBV_QPT_RC; break;
+ case UC : attr.qp_type = IBV_QPT_UC; break;
+ case UD : attr.qp_type = IBV_QPT_UD; break;
+ default: fprintf(stderr, "Unknown connection type \n");
+ return NULL;
+ }
+
+ qp = ibv_create_qp(pd,&attr);
+ if (!qp) {
+ fprintf(stderr, "Couldn't create QP\n");
+ return NULL;
+ }
+ return qp;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+int ctx_modify_qp_to_init(struct ibv_qp *qp,struct perftest_parameters *param) {
+
+ struct ibv_qp_attr attr;
+ int flags = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT;
+
+ memset(&attr, 0, sizeof(struct ibv_qp_attr));
+ attr.qp_state = IBV_QPS_INIT;
+ attr.pkey_index = 0;
+ attr.port_num = param->ib_port;
+
+ if (param->connection_type == UD) {
+ // assert(param->verb == SEND);
+ attr.qkey = DEF_QKEY;
+ flags |= IBV_QP_QKEY;
+
+ } else {
+ switch(param->verb) {
+ case READ : attr.qp_access_flags = IBV_ACCESS_REMOTE_READ; break;
+ case WRITE : attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE; break;
+ case SEND : attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE |
+ IBV_ACCESS_LOCAL_WRITE;
+ }
+ flags |= IBV_QP_ACCESS_FLAGS;
+ }
+
+ if (ibv_modify_qp(qp,&attr,flags)) {
+ fprintf(stderr, "Failed to modify QP to INIT\n");
+ return 1;
+ }
+ return 0;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+uint16_t ctx_get_local_lid(struct ibv_context *context, uint8_t port) {
+
+ struct ibv_port_attr attr;
+
+ if (ibv_query_port(context,port,&attr))
+ return 0;
+
+ return attr.lid;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+int ctx_set_out_reads(struct ibv_context *context,int num_user_reads) {
+
+ int is_hermon = 0;
+ int max_reads;
+ struct ibv_device_attr attr;
+
+ if (ibv_query_device(context,&attr)) {
+ return -1;
+ }
+ // Checks the devide type for setting the max outstanding reads.
+ if (attr.vendor_part_id == 25408 || attr.vendor_part_id == 25418 ||
+ attr.vendor_part_id == 25448 || attr.vendor_part_id == 26418 ||
+ attr.vendor_part_id == 26428 || attr.vendor_part_id == 26438 ||
+ attr.vendor_part_id == 26448 || attr.vendor_part_id == 26458 ||
+ attr.vendor_part_id == 26468 || attr.vendor_part_id == 26478) {
+ is_hermon = 1;
+ }
+
+ max_reads = (is_hermon == 1) ? MAX_OUT_READ_HERMON : MAX_OUT_READ;
+
+ if (num_user_reads > max_reads) {
+ fprintf(stderr," Number of outstanding reads is above max = %d\n",max_reads);
+ fprintf(stderr," Changing to that max value\n");
+ num_user_reads = max_reads;
+ }
+ else if (num_user_reads <= 0) {
+ num_user_reads = max_reads;
+ }
+
+ printf(" Number of outstanding reads is %d\n",num_user_reads);
+
+ return num_user_reads;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+SOCKET ctx_client_connect(const char *servername,int port) {
+
+ struct addrinfo *res, *t;
+ struct addrinfo hints;
+ char service[6];
+ int n;
+ SOCKET sockfd = INVALID_SOCKET;
+
+ memset(&hints, 0, sizeof hints);
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_socktype = SOCK_STREAM;
+
+ if (sprintf(service, "%d\0", port) < 0)
+ return -1;
+
+ n = getaddrinfo(servername, service, &hints, &res);
+
+ if (n != 0) {
+ fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port);
+ return n;
+ }
+
+ for (t = res; t; t = t->ai_next) {
+ sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
+ if (sockfd != INVALID_SOCKET) {
+ if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
+ break;
+ closesocket(sockfd);
+ sockfd = INVALID_SOCKET;
+ }
+ }
+
+ freeaddrinfo(res);
+
+ if (sockfd == INVALID_SOCKET) {
+ fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
+ return sockfd;
+ }
+ return sockfd;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+SOCKET ctx_server_connect(int port)
+{
+ struct addrinfo *res, *t;
+ struct addrinfo hints;
+ char service[6];
+ SOCKET sockfd = INVALID_SOCKET, connfd;
+ int n;
+
+ memset(&hints, 0, sizeof hints);
+ hints.ai_flags = AI_PASSIVE;
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_socktype = SOCK_STREAM;
+
+ if (sprintf(service, "%d\0", port) < 0)
+ return -1;
+
+ n = getaddrinfo(NULL, service, &hints, &res);
+
+ if (n != 0) {
+ fprintf(stderr, "%s for port %d\n", gai_strerror(n), port);
+ return n;
+ }
+
+ for (t = res; t; t = t->ai_next) {
+ sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
+ if (sockfd != INVALID_SOCKET) {
+ n = 1;
+
+ setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, (char*)&n, sizeof n);
+
+ if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
+ break;
+ closesocket(sockfd);
+ sockfd = -1;
+ }
+ }
+
+ freeaddrinfo(res);
+
+ if (sockfd == INVALID_SOCKET) {
+ fprintf(stderr, "Couldn't listen to port %d\n", port);
+ return sockfd;
+ }
+
+ listen(sockfd, 1);
+ connfd = accept(sockfd, NULL, 0);
+ if (connfd == INVALID_SOCKET) {
+ perror("server accept");
+ fprintf(stderr, "accept() failed\n");
+ closesocket(sockfd);
+ return connfd;
+ }
+
+ closesocket(sockfd);
+ return connfd;
+}
+
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+int ctx_hand_shake(struct perftest_parameters *params,
+ struct pingpong_dest *my_dest,
+ struct pingpong_dest *rem_dest) {
+
+ // Client.
+ if (params->machine == CLIENT) {
+ if (ctx_write_keys(my_dest,params)) {
+ fprintf(stderr,"Unable to write on the socket\n");
+ return -1;
+ }
+ if (ctx_read_keys(rem_dest,params)) {
+ fprintf(stderr,"Unable to Read from the socket\n");
+ return -1;
+ }
+ }
+ // Server.
+ else {
+ if (ctx_read_keys(rem_dest,params)) {
+ fprintf(stderr,"Unable to Read from the socket\n");
+ return -1;
+ }
+ if (ctx_write_keys(my_dest,params)) {
+ fprintf(stderr,"Unable to write on the socket\n");
+ return -1;
+ }
+ }
+ return 0;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+void ctx_print_pingpong_data(struct pingpong_dest *element,
+ struct perftest_parameters *params) {
+
+ // First of all we print the basic format.
+ printf(BASIC_ADDR_FMT,sideArray[params->side],element->lid,element->qpn,element->psn);
+
+ switch (params->verb) {
+
+ case READ : printf(READ_FMT,element->out_reads);
+ case WRITE : printf(RDMA_FMT,element->rkey,element->vaddr);
+ default : putchar('\n');
+ }
+
+ if (params->gid_index > -1 || params->use_mcg) {
+
+ printf(GID_FMT,gidArray[params->use_mcg],
+ element->gid.raw[0], element->gid.raw[1],
+ element->gid.raw[2], element->gid.raw[3],
+ element->gid.raw[4], element->gid.raw[5],
+ element->gid.raw[6], element->gid.raw[7],
+ element->gid.raw[8], element->gid.raw[9],
+ element->gid.raw[10],element->gid.raw[11],
+ element->gid.raw[12],element->gid.raw[13],
+ element->gid.raw[14],element->gid.raw[15]);
+ }
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+int ctx_notify_events(struct ibv_cq *cq,struct ibv_comp_channel *channel) {
+
+ struct ibv_cq *ev_cq;
+ void *ev_ctx;
+
+ if (ibv_get_cq_event(channel,&ev_cq,&ev_ctx)) {
+ fprintf(stderr, "Failed to get cq_event\n");
+ return 1;
+ }
+
+ if (ev_cq != cq) {
+ fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
+ return 1;
+ }
+
+ if (ibv_req_notify_cq(cq, 0)) {
+ fprintf(stderr, "Couldn't request CQ notification\n");
+ return 1;
+ }
+ return 0;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+void increase_rem_addr(struct ibv_send_wr *wr,int size,int scnt,uint64_t prim_addr) {
+
+ wr->wr.rdma.remote_addr += INC(size);
+
+ if( ((scnt+1) % (CYCLE_BUFFER/ INC(size))) == 0 )
+ wr->wr.rdma.remote_addr = prim_addr;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+void increase_loc_addr(struct ibv_sge *sg,int size,int rcnt,uint64_t prim_addr) {
+
+ sg->addr += INC(size);
+
+ if( ((rcnt+1) % (CYCLE_BUFFER/ INC(size))) == 0 )
+ sg->addr = prim_addr;
+
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+int ctx_close_connection(struct perftest_parameters *params,
+ struct pingpong_dest *my_dest,
+ struct pingpong_dest *rem_dest) {
+
+
+ // Signal client is finished.
+ if (ctx_hand_shake(params,my_dest,rem_dest)) {
+ return -1;
+
+ }
+
+ // Close the Socket file descriptor.
+ if (send(params->sockfd,"done",sizeof "done",0) != sizeof "done") {
+ perror(" Client write");
+ fprintf(stderr,"Couldn't write to socket\n");
+ return -1;
+ }
+ closesocket(params->sockfd);
+ return 0;
+}
+/******************************************************************************
+ * End
+ ******************************************************************************/
--- /dev/null
+/*
+ * Copyright (c) 2009 Mellanox Technologies Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Author: Ido Shamay <idos@dev.mellanox.co.il>
+ *
+ * Description :
+ *
+ * This API gathres the Socket interface methods for all perftest benchmarks
+ * and can be used for any benchmark for IB.
+ * It passes messages between 2 end points through sockets interface methods,
+ * while passing the rellevant information for the IB entities.
+ *
+ * Methods :
+ *
+ * ctx_get_local_lid - Receives the Local id from the subnet manager.
+ * ctx_client_connect - Connects the client through sockets interface.
+ * ctx_server_connect - Connects the Server to client through sockets.
+ * ctx_hand_shake - Passes the data between 2 end points machines.
+ * ctx_print_pingpong_data - Prints the data that was passed.
+ * ctx_close_connection - Closing the sockets interface.
+ */
+
+#ifndef PERFTEST_RESOURCES_H
+#define PERFTEST_RESOURCES_H
+
+
+// Files included for work.
+#include <infiniband/verbs.h>
+
+// Connection types availible.
+#define RC 0
+#define UC 1
+#define UD 2
+// #define XRC 3
+
+#define CYCLE_BUFFER 4096
+#define CACHE_LINE_SIZE 64
+
+// Outstanding reads for "read" verb only.
+#define MAX_OUT_READ_HERMON 16
+#define MAX_OUT_READ 4
+#define MAX_SEND_SGE 1
+#define MAX_RECV_SGE 1
+#define DEF_WC_SIZE 1
+
+// Space for GRH when we scatter the packet in UD.
+#define UD_ADDITION 40
+#define PINGPONG_SEND_WRID 60
+#define PINGPONG_RDMA_WRID 3
+#define PINGPONG_READ_WRID 1
+#define DEF_QKEY 0x11111111
+#define ALL 1
+
+#define KEY_MSG_SIZE 50 // Message size without gid.
+#define KEY_MSG_SIZE_GID 98 // Message size with gid (MGID as well).
+
+// The Format of the message we pass through sockets , without passing Gid.
+#define KEY_PRINT_FMT "%04x:%04x:%06x:%06x:%08x:%016Lx"
+
+
+// The Format of the message we pass through sockets (With Gid).
+#define KEY_PRINT_FMT_GID "%04x:%04x:%06x:%06x:%08x:%016Lx:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x"
+
+
+// The Basic print format for all verbs.
+#define BASIC_ADDR_FMT " %s address: LID %#04x QPN %#06x PSN %#06x"
+
+// Addition format string for READ - the outstanding reads.
+#define READ_FMT " OUT %#04x"
+
+// The print format of the pingpong_dest element for RDMA verbs.
+#define RDMA_FMT " RKey %#08x VAddr %#016Lx"
+
+// The print format of a global address or a multicast address.
+#define GID_FMT " %s: %02d:%02d:%02d:%02d:%02d:%02d:%02d:%02d:%02d:%02d:%02d:%02d:%02d:%02d:%02d:%02d\n"
+
+// End of Test
+#define RESULT_LINE "------------------------------------------------------------------\n"
+
+// The format of the results
+#define RESULT_FMT " #bytes #iterations BW peak[MB/sec] BW average[MB/sec]\n"
+
+// Result print format
+#define REPORT_FMT " %7d %d %7.2f %7.2f\n"
+
+// Macro for allocating.
+#define ALLOCATE(var,type,size) \
+ { if((var = (type*)malloc(sizeof(type)*(size))) == NULL) \
+ { fprintf(stderr," Cannot Allocate\n"); exit(1);}}
+
+#define NOTIFY_COMP_ERROR_SEND(wc,scnt,ccnt) \
+ { fprintf(stderr," Completion with error at client\n"); \
+ fprintf(stderr," Failed status %d: wr_id %d syndrom 0x%x\n",wc.status,(int) wc.wr_id,wc.vendor_err); \
+ fprintf(stderr, "scnt=%d, ccnt=%d\n",scnt, ccnt); return 1;}
+
+#define NOTIFY_COMP_ERROR_RECV(wc,rcnt) \
+ { fprintf(stderr," Completion with error at server\n"); \
+ fprintf(stderr," Failed status %d: wr_id %d syndrom 0x%x\n",wc.status,(int) wc.wr_id,wc.vendor_err); \
+ fprintf(stderr, "rcnt=%d\n",rcnt); return 1;}
+
+// Macro to determine packet size in case of UD.
+// The UD addition is for the GRH .
+#define SIZE(type,size) ((type == UD) ? (size + UD_ADDITION) : (size))
+
+// Macro to define the buffer size (according to "Nahalem" chip set).
+// for small message size (under 4K) , we allocate 4K buffer , and the RDMA write
+// verb will write in cycle on the buffer. this improves the BW in "Nahalem" systems.
+#define BUFF_SIZE(size) ((size < CYCLE_BUFFER) ? (CYCLE_BUFFER) : (size))
+
+// Macro that defines the adress where we write in RDMA.
+// If message size is smaller then CACHE_LINE size then we write in CACHE_LINE jumps.
+#define INC(size) ((size > CACHE_LINE_SIZE) ? (size) : (CACHE_LINE_SIZE))
+
+// The Verb of the benchmark.
+typedef enum { SEND , WRITE , READ } VerbType;
+
+// The type of the machine ( server or client actually).
+typedef enum { SERVER , CLIENT } MachineType;
+
+// The type of the machine ( server or client actually).
+typedef enum { LOCAL , REMOTE } PrintDataSide;
+
+// The link layer of the current port.
+typedef enum { UNDETECTED = 0 , IB = 1 , ETH = 2 } LinkType;
+
+/******************************************************************************
+ * Perftest resources Structures and data types.
+ ******************************************************************************/
+
+struct perftest_parameters {
+ int connection_type;
+ int mtu;
+ int tx_depth;
+ int rx_depth;
+ int inline_size;
+ uint8_t qp_timeout;
+ int gid_index;
+ int port;
+ uint8_t ib_port;
+ int use_event;
+ int use_mcg;
+ SOCKET sockfd;
+ int signal_comp;
+ int num_of_qps;
+ int iters;
+ int out_reads;
+ int duplex;
+ int sl;
+ MachineType machine;
+ PrintDataSide side;
+ VerbType verb;
+};
+
+struct pingpong_dest {
+ uint16_t lid;
+ int out_reads;
+ int qpn;
+ int psn;
+ unsigned rkey;
+ unsigned long long vaddr;
+ union ibv_gid gid;
+};
+
+/******************************************************************************
+ * Perftest resources Methods and interface utilitizes.
+ ******************************************************************************/
+
+/* ctx_set_link_layer.
+ *
+ * Description : Determines the link layer type (IB or ETH).
+ *
+ * Parameters :
+ *
+ * context - The context of the HCA device.
+ * params - The perftest parameters of the device.
+ *
+ * Return Value : 0 upon success. -1 if it fails.
+ */
+int ctx_set_link_layer(struct ibv_context *context,struct perftest_parameters *params);
+
+/* ctx_cq_create.
+ *
+ * Description :
+ *
+ * Creates a QP , according to the attributes given in param.
+ * The relevent attributes are tx_depth,rx_depth,inline_size and connection_type.
+ *
+ * Parameters :
+ *
+ * pd - The Protection domain , each the qp will be assigned to.
+ * send_cq - The CQ that will produce send CQE.
+ * recv_qp - The CQ that will produce recv CQE.
+ * param - The parameters for the QP.
+ *
+ * Return Value : Adress of the new QP.
+ */
+struct ibv_cq* ctx_cq_create(struct ibv_context *context,
+ struct ibv_comp_channel *channel,
+ struct perftest_parameters *param);
+
+/* ctx_qp_create.
+ *
+ * Description :
+ *
+ * Creates a QP , according to the attributes given in param.
+ * The relevent attributes are tx_depth,rx_depth,inline_size and connection_type.
+ *
+ * Parameters :
+ *
+ * pd - The Protection domain , each the qp will be assigned to.
+ * send_cq - The CQ that will produce send CQE.
+ * recv_qp - The CQ that will produce recv CQE.
+ * param - The parameters for the QP.
+ *
+ * Return Value : Adress of the new QP.
+ */
+struct ibv_qp* ctx_qp_create(struct ibv_pd *pd,
+ struct ibv_cq *send_cq,
+ struct ibv_cq *recv_cq,
+ struct perftest_parameters *param);
+
+/* ctx_modify_qp_to_init.
+ *
+ * Description :
+ *
+ * Modifies the given QP to INIT state , according to attributes in param.
+ * The relevent attributes are ib_port, connection_type and verb.
+ *
+ * Parameters :
+ *
+ * qp - The QP that will be moved to INIT.
+ * param - The parameters for the QP.
+ *
+ * Return Value : 0 if success , 1 otherwise.
+ *
+ */
+int ctx_modify_qp_to_init(struct ibv_qp *qp,struct perftest_parameters *param);
+
+/* ctx_get_local_lid .
+ *
+ * Description :
+ *
+ * This method find and returns the local Id in IB subnet manager of
+ * the selected port and HCA given.The lid identifies the port.
+ *
+ * Parameters :
+ *
+ * context - the context of the HCA device.
+ * ib_port - The port of the HCA (1 or 2).
+ *
+ * Return Value : The Lid itself. (No error values).
+ */
+uint16_t ctx_get_local_lid(struct ibv_context *context,uint8_t ib_port);
+
+/* ctx_set_out_reads.
+ *
+ * Description :
+ *
+ * This Method is used in READ verb.
+ * it sets the outstanding reads number according to the HCA gen.
+ *
+ * Parameters :
+ *
+ * context - the context of the HCA device.
+ * num_user_reads - The num of outstanding reads the user requested
+ * ( 0 if he/she didn't requested ) and then MAX is selected.
+ *
+ * Return Value : The number of outstanding reads , -1 if query device failed.
+ */
+int ctx_set_out_reads(struct ibv_context *context,int num_user_reads);
+
+/* ctx_client_connect .
+ *
+ * Description :
+ *
+ * Connect the client the a well known server to a requested port.
+ * It assumes the Server is waiting for request on the port.
+ *
+ * Parameters :
+ *
+ * servername - The server name (according to DNS) or IP.
+ * port - The port that the server is listening to.
+ *
+ * Return Value : The file descriptor selected in the PDT for the socket.
+ */
+SOCKET ctx_client_connect(const char *servername, int port);
+
+/* ctx_server_connect .
+ *
+ * Description :
+ *
+ * Instructs a machine to listen on a requested port.
+ * when running this command the machine will wait for 1 client to
+ * contant it , on the selected port , with the ctx_client_connect method.
+ *
+ * Parameters :
+ *
+ * port - The port which the machine will listen.
+ *
+ * Return Value : The new file descriptor selected in the PDT for the socket.
+ */
+SOCKET ctx_server_connect(int port);
+
+/* ctx_hand_shake .
+ *
+ * Description :
+ *
+ * Exchanging the data , represented in struct pingpong_dest , between
+ * a server and client that performed the ctx_server/clinet_connect.
+ * The method fills in rem_dest the remote machine data , and passed the data
+ * in my_dest to other machine.
+ *
+ * Parameters :
+ *
+ * params - The parameters needed for this method. Are mentioned above ,and
+ * contains standard IB info. (exists on perftest).
+ * my_dest - Contains the data you want to pass to the other side.
+ * rem_dest - The other side data.
+ *
+ * Return Value : 0 upon success. -1 if it fails.
+ */
+int ctx_hand_shake(struct perftest_parameters *params,
+ struct pingpong_dest *my_dest,
+ struct pingpong_dest *rem_dest);
+
+
+/* ctx_print_pingpong_data.
+ *
+ * Description :
+ *
+ * Prints the data stored in the struct pingpong_dest.
+ *
+ * Parameters :
+ *
+ * params - The parameters of the machine.
+ * element - The element to print.
+ */
+void ctx_print_pingpong_data(struct pingpong_dest *element,
+ struct perftest_parameters *params);
+
+/*
+ * Description :
+ *
+ *
+ *
+ * Parameters :
+ *
+ *
+ */
+int ctx_notify_events(struct ibv_cq *cq,struct ibv_comp_channel *channel);
+
+/* increase_rem_addr.
+ *
+ * Description :
+ * Increases the remote address in RDMA verbs by INC ,
+ * (at least 64 CACHE_LINE size) , so that the system
+ *
+ */
+void increase_rem_addr(struct ibv_send_wr *wr,int size,int scnt,uint64_t prim_addr);
+
+/* increase_loc_addr.
+ *
+ * Description :
+ *
+ *
+ */
+void increase_loc_addr(struct ibv_sge *sg,int size,int rcnt,uint64_t prim_addr);
+
+/* ctx_close_connection .
+ *
+ * Description :
+ *
+ * Close the connection between the 2 machines.
+ * It performs an handshake to ensure the 2 sides are there.
+ *
+ * Parameters :
+ *
+ * params - The parameters of the machine
+ * my_dest ,rem_dest - The 2 sides that ends the connection.
+ *
+ * Return Value : 0 upon success. -1 if it fails.
+ */
+int ctx_close_connection(struct perftest_parameters *params,
+ struct pingpong_dest *my_dest,
+ struct pingpong_dest *rem_dest);
+
+
+#endif /* PERFTEST_RESOURCES_H */
USE_NATIVE_EH = 1\r
USE_IOSTREAM = 1\r
\r
-SOURCES = read_bw.rc read_bw.c ..\perftest.c\r
+SOURCES = read_bw.rc read_bw.c ..\perftest_resources.c ..\perftest.c\r
\r
-INCLUDES = ..;..\..\..\ulp\libibverbs\include;\\r
- ..\..\..\inc;..\..\..\inc\user;\\r
- ..\..\..\inc\user\linux;\r
+INCLUDES =..;..\..\..\ulp\libibverbs\include;\\r
+ ..\..\..\inc;..\..\..\inc\user;..\..\..\inc\user\linux;..\..\..\inc\complib;\\r
+ ..\..\..\hw\mlx4\user\hca;..\..\..\tools\perftests\user\\r
\r
TARGETLIBS = \\r
$(SDK_LIB_PATH)\kernel32.lib \\r
$(SDK_LIB_PATH)\user32.lib \\r
$(SDK_LIB_PATH)\ole32.lib \\r
$(SDK_LIB_PATH)\ws2_32.lib \\r
+ $(SDK_LIB_PATH)\uuid.lib \\r
!if $(FREEBUILD)\r
- $(TARGETPATH)\*\libibverbs.lib\r
+ $(TARGETPATH)\*\libibverbs.lib \\r
+ $(TARGETPATH)\*\complib.lib \\r
!else\r
- $(TARGETPATH)\*\libibverbsd.lib\r
-!endif\r
-\r
+ $(TARGETPATH)\*\libibverbsd.lib \\r
+ $(TARGETPATH)\*\complibd.lib \\r
+!endif
\ No newline at end of file
-/*\r
- * Copyright (c) 2005 Topspin Communications. All rights reserved.\r
- * Copyright (c) 2006 Mellanox Technologies Ltd. All rights reserved.\r
- * Copyright (c) 2008-2009 Intel Corporation. All rights reserved.\r
- *\r
- * This software is available to you under the OpenIB.org BSD license\r
- * below:\r
- *\r
- * Redistribution and use in source and binary forms, with or\r
- * without modification, are permitted provided that the following\r
- * conditions are met:\r
- *\r
- * - Redistributions of source code must retain the above\r
- * copyright notice, this list of conditions and the following\r
- * disclaimer.\r
- *\r
- * - Redistributions in binary form must reproduce the above\r
- * copyright notice, this list of conditions and the following\r
- * disclaimer in the documentation and/or other materials\r
- * provided with the distribution.\r
- *\r
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV\r
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
- * SOFTWARE.\r
- */\r
-\r
-#include <stdio.h>\r
-#include <stdlib.h>\r
-#include <string.h>\r
-#include <ws2tcpip.h>\r
-#include <winsock2.h>\r
-#include <time.h>\r
-\r
-#include "..\..\..\etc\user\getopt.c"\r
-#include "perftest.h"\r
-#include <infiniband/verbs.h>\r
-\r
-#define PINGPONG_READ_WRID 1\r
-#define VERSION 1.1\r
-#define ALL 1\r
-#define RC 0\r
-\r
-struct user_parameters {\r
- const char *servername;\r
- int connection_type;\r
- int mtu;\r
- int all; /* run all msg size */\r
- int iters;\r
- int tx_depth;\r
- int max_out_read;\r
- int use_event;\r
-};\r
-\r
-typedef UINT64 cycles_t;\r
-cycles_t *tposted;\r
-cycles_t *tcompleted;\r
-\r
-struct pingpong_context {\r
- struct ibv_context *context;\r
- struct ibv_comp_channel *channel;\r
- struct ibv_pd *pd;\r
- struct ibv_mr *mr;\r
- struct ibv_cq *cq;\r
- struct ibv_qp *qp;\r
- void *buf;\r
- unsigned size;\r
- int tx_depth;\r
- struct ibv_sge list;\r
- struct ibv_send_wr wr;\r
-};\r
-\r
-struct pingpong_dest {\r
- int lid;\r
- int qpn;\r
- int psn;\r
- unsigned rkey;\r
- unsigned long long vaddr;\r
-};\r
-\r
-static uint16_t pp_get_local_lid(struct pingpong_context *ctx, int port)\r
-{\r
- struct ibv_port_attr attr;\r
-\r
- if (ibv_query_port(ctx->context, (uint8_t) port, &attr))\r
- return 0;\r
-\r
- return attr.lid;\r
-}\r
-\r
-static struct pingpong_dest * pp_client_exch_dest(SOCKET sockfd,\r
- const struct pingpong_dest *my_dest)\r
-{\r
- struct pingpong_dest *rem_dest = NULL;\r
- char msg[sizeof "0000:000000:000000:00000000:0000000000000000"];\r
- int parsed;\r
-\r
- sprintf(msg, "%04x:%06x:%06x:%08x:%016Lx", my_dest->lid, my_dest->qpn,\r
- my_dest->psn,my_dest->rkey,my_dest->vaddr);\r
- if (send(sockfd, msg, sizeof msg, 0) != sizeof msg) {\r
- perror("client send");\r
- fprintf(stderr, "Couldn't send local address\n");\r
- goto out;\r
- }\r
-\r
- if (recv(sockfd, msg, sizeof msg, 0) != sizeof msg) {\r
- perror("client recv");\r
- fprintf(stderr, "Couldn't recv remote address\n");\r
- goto out;\r
- }\r
-\r
- rem_dest = malloc(sizeof *rem_dest);\r
- if (!rem_dest)\r
- goto out;\r
-\r
- memset(rem_dest, 0, sizeof *rem_dest);\r
- parsed = sscanf(msg, "%x:%x:%x:%x:%Lx", &rem_dest->lid, &rem_dest->qpn,\r
- &rem_dest->psn,&rem_dest->rkey,&rem_dest->vaddr);\r
-\r
- if (parsed != 5) {\r
- fprintf(stderr, "Couldn't parse line <%.*s>\n",(int)sizeof msg,\r
- msg);\r
- free(rem_dest);\r
- rem_dest = NULL;\r
- goto out;\r
- }\r
-out:\r
- return rem_dest;\r
-}\r
-\r
-static struct pingpong_dest *pp_server_exch_dest(SOCKET connfd, const struct pingpong_dest *my_dest)\r
-{\r
- char msg[sizeof "0000:000000:000000:00000000:0000000000000000"];\r
- struct pingpong_dest *rem_dest = NULL;\r
- int parsed;\r
- int n;\r
-\r
- n = recv(connfd, msg, sizeof msg, 0);\r
- if (n != sizeof msg) {\r
- perror("server recv");\r
- fprintf(stderr, "%d/%d: Couldn't recv remote address\n", n, (int) sizeof msg);\r
- goto out;\r
- }\r
-\r
- rem_dest = malloc(sizeof *rem_dest);\r
- if (!rem_dest)\r
- goto out;\r
-\r
- memset(rem_dest, 0, sizeof *rem_dest);\r
- parsed = sscanf(msg, "%x:%x:%x:%x:%Lx", &rem_dest->lid, &rem_dest->qpn,\r
- &rem_dest->psn, &rem_dest->rkey, &rem_dest->vaddr);\r
- if (parsed != 5) {\r
- fprintf(stderr, "Couldn't parse line <%.*s>\n",(int)sizeof msg,\r
- msg);\r
- free(rem_dest);\r
- rem_dest = NULL;\r
- goto out;\r
- }\r
-\r
- sprintf(msg, "%04x:%06x:%06x:%08x:%016Lx", my_dest->lid, my_dest->qpn,\r
- my_dest->psn, my_dest->rkey, my_dest->vaddr);\r
- if (send(connfd, msg, sizeof msg, 0) != sizeof msg) {\r
- perror("server send");\r
- fprintf(stderr, "Couldn't send local address\n");\r
- free(rem_dest);\r
- rem_dest = NULL;\r
- goto out;\r
- }\r
-out:\r
- return rem_dest;\r
-}\r
-\r
-static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev,\r
- unsigned size,\r
- int tx_depth, int port,\r
- struct user_parameters *user_parm)\r
-{\r
- struct pingpong_context *ctx;\r
- struct ibv_device_attr device_attr;\r
- ctx = malloc(sizeof *ctx);\r
- if (!ctx)\r
- return NULL;\r
-\r
- ctx->size = size;\r
- ctx->tx_depth = tx_depth;\r
-\r
- ctx->buf = malloc(size * 2);\r
- if (!ctx->buf) {\r
- fprintf(stderr, "Couldn't allocate work buf.\n");\r
- return NULL;\r
- }\r
-\r
- memset(ctx->buf, 0, size * 2);\r
-\r
- ctx->context = ibv_open_device(ib_dev);\r
- if (!ctx->context) {\r
- fprintf(stderr, "Couldn't get context for %s\n",\r
- ibv_get_device_name(ib_dev));\r
- return NULL;\r
- }\r
- if (user_parm->mtu == 0) {/*user did not ask for specific mtu */\r
- if (ibv_query_device(ctx->context, &device_attr)) {\r
- fprintf(stderr, "Failed to query device props");\r
- return NULL;\r
- }\r
- if (device_attr.vendor_part_id == 23108)\r
- user_parm->mtu = 1024;\r
- else\r
- user_parm->mtu = 2048;\r
- }\r
- if (user_parm->use_event) {\r
- ctx->channel = ibv_create_comp_channel(ctx->context);\r
- if (!ctx->channel) {\r
- fprintf(stderr, "Couldn't create completion channel\n");\r
- return NULL;\r
- }\r
- } else\r
- ctx->channel = NULL;\r
- ctx->pd = ibv_alloc_pd(ctx->context);\r
- if (!ctx->pd) {\r
- fprintf(stderr, "Couldn't allocate PD\n");\r
- return NULL;\r
- }\r
-\r
- /* We dont really want IBV_ACCESS_LOCAL_WRITE, but IB spec says:\r
- * The Consumer is not allowed to assign Remote Write or Remote Atomic to\r
- * a Memory Region that has not been assigned Local Write. */\r
- ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size * 2,\r
- IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ);\r
- if (!ctx->mr) {\r
- fprintf(stderr, "Couldn't allocate MR\n");\r
- return NULL;\r
- }\r
-\r
- ctx->cq = ibv_create_cq(ctx->context, tx_depth, NULL, ctx->channel, 0);\r
- if (!ctx->cq) {\r
- fprintf(stderr, "Couldn't create CQ\n");\r
- return NULL;\r
- }\r
-\r
- {\r
- struct ibv_qp_init_attr attr;\r
- memset(&attr, 0, sizeof(struct ibv_qp_init_attr));\r
- attr.send_cq = ctx->cq;\r
- attr.recv_cq = ctx->cq;\r
- attr.cap.max_send_wr = tx_depth;\r
- /* Work around: driver doesnt support\r
- * recv_wr = 0 */\r
- attr.cap.max_recv_wr = 1;\r
- attr.cap.max_send_sge = 1;\r
- attr.cap.max_recv_sge = 1;\r
- attr.qp_type = IBV_QPT_RC;\r
- ctx->qp = ibv_create_qp(ctx->pd, &attr);\r
- if (!ctx->qp) {\r
- fprintf(stderr, "Couldn't create QP\n");\r
- return NULL;\r
- }\r
- }\r
-\r
- {\r
- struct ibv_qp_attr attr;\r
-\r
- attr.qp_state = IBV_QPS_INIT;\r
- attr.pkey_index = 0;\r
- attr.port_num = (uint8_t) port;\r
- attr.qp_access_flags = IBV_ACCESS_REMOTE_READ;\r
-\r
- if (ibv_modify_qp(ctx->qp, &attr,\r
- IBV_QP_STATE |\r
- IBV_QP_PKEY_INDEX |\r
- IBV_QP_PORT |\r
- IBV_QP_ACCESS_FLAGS)) {\r
- fprintf(stderr, "Failed to modify QP to INIT\n");\r
- return NULL;\r
- }\r
- }\r
-\r
- return ctx;\r
-}\r
-\r
-static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,\r
- struct pingpong_dest *dest, struct user_parameters *user_parm)\r
-{\r
- struct ibv_qp_attr attr;\r
- memset(&attr, 0, sizeof attr);\r
-\r
- attr.qp_state = IBV_QPS_RTR;\r
- switch (user_parm->mtu) {\r
- case 256 : \r
- attr.path_mtu = IBV_MTU_256;\r
- break;\r
- case 512 :\r
- attr.path_mtu = IBV_MTU_512;\r
- break;\r
- case 1024 :\r
- attr.path_mtu = IBV_MTU_1024;\r
- break;\r
- case 2048 :\r
- attr.path_mtu = IBV_MTU_2048;\r
- break;\r
- case 4096 :\r
- attr.path_mtu = IBV_MTU_4096;\r
- break;\r
- }\r
- printf("Mtu : %d\n", user_parm->mtu);\r
- attr.dest_qp_num = dest->qpn;\r
- attr.rq_psn = dest->psn;\r
- attr.max_dest_rd_atomic = (uint8_t) user_parm->max_out_read;\r
- attr.min_rnr_timer = 12;\r
- attr.ah_attr.is_global = 0;\r
- attr.ah_attr.dlid = (uint16_t) dest->lid;\r
- attr.ah_attr.sl = 0;\r
- attr.ah_attr.src_path_bits = 0;\r
- attr.ah_attr.port_num = (uint8_t) port;\r
- if (ibv_modify_qp(ctx->qp, &attr,\r
- IBV_QP_STATE |\r
- IBV_QP_AV |\r
- IBV_QP_PATH_MTU |\r
- IBV_QP_DEST_QPN |\r
- IBV_QP_RQ_PSN |\r
- IBV_QP_MIN_RNR_TIMER |\r
- IBV_QP_MAX_DEST_RD_ATOMIC)) {\r
- fprintf(stderr, "Failed to modify RC QP to RTR\n");\r
- return 1;\r
- }\r
- attr.timeout = 14;\r
- attr.retry_cnt = 7;\r
- attr.rnr_retry = 7;\r
- attr.qp_state = IBV_QPS_RTS;\r
- attr.sq_psn = my_psn;\r
- attr.max_rd_atomic = (uint8_t) user_parm->max_out_read;\r
- if (ibv_modify_qp(ctx->qp, &attr,\r
- IBV_QP_STATE |\r
- IBV_QP_SQ_PSN |\r
- IBV_QP_TIMEOUT |\r
- IBV_QP_RETRY_CNT |\r
- IBV_QP_RNR_RETRY |\r
- IBV_QP_MAX_QP_RD_ATOMIC)) {\r
- fprintf(stderr, "Failed to modify RC QP to RTS\n");\r
- return 1;\r
- }\r
- return 0;\r
-}\r
-\r
-static void usage(const char *argv0)\r
-{\r
- printf("Usage:\n");\r
- printf(" %s start a server and wait for connection\n", argv0);\r
- printf(" %s -h <host> connect to server at <host>\n", argv0);\r
- printf("\n");\r
- printf("Options:\n");\r
- printf(" -p <port> listen on/connect to port <port> (default 18515)\n");\r
- printf(" -d <dev> use IB device <dev> (default first device found)\n");\r
- printf(" -i <port> use port <port> of IB device (default 1)\n");\r
- printf(" -m <mtu> mtu size (256 - 4096. default for hermon is 2048)\n");\r
- printf(" -o =<num> num of outstanding read/atom(default 4)\n");\r
- printf(" -s <size> size of message to exchange (default 65536)\n");\r
- printf(" -a Run sizes from 2 till 2^23\n");\r
- printf(" -t <dep> size of tx queue (default 100)\n");\r
- printf(" -n <iters> number of exchanges (at least 2, default 1000)\n");\r
- printf(" -b measure bidirectional bandwidth (default unidirectional)\n");\r
- printf(" -V display version number\n");\r
- printf(" -e sleep on CQ events (default poll)\n");\r
-}\r
-\r
-static void print_report(unsigned int iters, unsigned size, int duplex,\r
- cycles_t *tposted, cycles_t *tcompleted)\r
-{\r
- cycles_t cycles_to_units;\r
- unsigned long tsize; /* Transferred size */\r
- int i, j;\r
- int opt_posted = 0, opt_completed = 0;\r
- cycles_t opt_delta;\r
- cycles_t t;\r
-\r
- opt_delta = tcompleted[opt_posted] - tposted[opt_completed];\r
-\r
- /* Find the peak bandwidth */\r
- for (i = 0; i < (int) iters; ++i)\r
- for (j = i; j < (int) iters; ++j) {\r
- t = (tcompleted[j] - tposted[i]) / (j - i + 1);\r
- if (t < opt_delta) {\r
- opt_delta = t;\r
- opt_posted = i;\r
- opt_completed = j;\r
- }\r
- }\r
-\r
- cycles_to_units = get_freq();\r
-\r
- tsize = duplex ? 2 : 1;\r
- tsize = tsize * size;\r
- printf("%7d %d ", size, iters);\r
-\r
- {\r
- double sec = (double) opt_delta / (double) cycles_to_units;\r
- double mbytes = (double) tsize / (double) 0x100000;\r
- printf("%7.2f ", mbytes / sec);\r
-\r
- sec = (double) (tcompleted[iters - 1] - tposted[0]) / (double) cycles_to_units;\r
- mbytes = (double) tsize * (double) iters / (double) 0x100000;\r
- printf("%7.2f\n", mbytes / sec);\r
- }\r
-}\r
-\r
-static int run_iter(struct pingpong_context *ctx, struct user_parameters *user_param,\r
- struct pingpong_dest *rem_dest, int size)\r
-{\r
- struct ibv_qp *qp;\r
- int scnt, ccnt ;\r
-\r
- ctx->list.addr = (uintptr_t) ctx->buf;\r
- ctx->list.length = size;\r
- ctx->list.lkey = ctx->mr->lkey;\r
- ctx->wr.wr.rdma.remote_addr = rem_dest->vaddr;\r
- ctx->wr.wr.rdma.rkey = rem_dest->rkey;\r
- ctx->wr.wr_id = PINGPONG_READ_WRID;\r
- ctx->wr.sg_list = &ctx->list;\r
- ctx->wr.num_sge = 1;\r
- ctx->wr.opcode = IBV_WR_RDMA_READ;\r
- ctx->wr.send_flags = IBV_SEND_SIGNALED;\r
- ctx->wr.next = NULL;\r
-\r
- scnt = 0;\r
- ccnt = 0;\r
-\r
- qp = ctx->qp;\r
-\r
- /* Done with setup. Start the test. */\r
- while (scnt < user_param->iters || ccnt < user_param->iters) {\r
- while (scnt < user_param->iters && (scnt - ccnt) < user_param->tx_depth ) {\r
- struct ibv_send_wr *bad_wr;\r
- if (user_param->servername)\r
- tposted[scnt] = get_cycles();\r
- if (ibv_post_send(qp, &ctx->wr, &bad_wr)) {\r
- fprintf(stderr, "Couldn't post send: scnt=%d\n",\r
- scnt);\r
- return 1;\r
- }\r
- ++scnt;\r
- }\r
- if (ccnt < user_param->iters) {\r
- struct ibv_wc wc;\r
- int ne;\r
- if (user_param->use_event) {\r
- struct ibv_cq *ev_cq;\r
- void *ev_ctx;\r
- if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {\r
- fprintf(stderr, "Failed to get cq_event\n");\r
- return 1;\r
- } \r
- if (ev_cq != ctx->cq) {\r
- fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);\r
- return 1;\r
- }\r
- if (ibv_req_notify_cq(ctx->cq, 0)) {\r
- fprintf(stderr, "Couldn't request CQ notification\n");\r
- return 1;\r
- }\r
- }\r
- do {\r
- ne = ibv_poll_cq(ctx->cq, 1, &wc);\r
- if (ne) {\r
- if (user_param->servername)\r
- tcompleted[ccnt] = get_cycles();\r
- if (wc.status != IBV_WC_SUCCESS) {\r
- fprintf(stderr, "Completion wth error at %s:\n",\r
- user_param->servername ? "client" : "server");\r
- fprintf(stderr, "Failed status %d: wr_id %d syndrom 0x%x\n",\r
- wc.status, (int) wc.wr_id, wc.vendor_err);\r
- fprintf(stderr, "scnt=%d, ccnt=%d\n",\r
- scnt, ccnt);\r
- return 1;\r
- }\r
- ccnt = ccnt + ne;\r
- }\r
- } while (ne > 0 );\r
-\r
- if (ne < 0) {\r
- fprintf(stderr, "poll CQ failed %d\n", ne);\r
- return 1;\r
- }\r
- }\r
- }\r
-\r
- return 0;\r
-}\r
-\r
-int __cdecl main(int argc, char *argv[])\r
-{\r
- struct ibv_device **dev_list;\r
- struct ibv_device *ib_dev;\r
- struct pingpong_context *ctx;\r
- struct pingpong_dest my_dest;\r
- struct pingpong_dest *rem_dest;\r
- struct user_parameters user_param;\r
- char *ib_devname = NULL;\r
- int port = 18515;\r
- int ib_port = 1;\r
- int size = 65536;\r
- SOCKET sockfd;\r
- int duplex = 0;\r
- int i = 0;\r
- WORD version;\r
- WSADATA data;\r
- int err;\r
-\r
- srand((unsigned int) time(NULL));\r
- version = MAKEWORD(2, 2);\r
- err = WSAStartup(version, &data);\r
- if (err)\r
- return -1;\r
-\r
- /* init default values to user's parameters */\r
- memset(&user_param, 0, sizeof(struct user_parameters));\r
- user_param.mtu = 0;\r
- user_param.iters = 1000;\r
- user_param.tx_depth = 100;\r
- user_param.servername = NULL;\r
- user_param.use_event = 0;\r
- user_param.max_out_read = 4; /* the device capability on gen2 */\r
- /* Parameter parsing. */\r
- while (1) {\r
- int c;\r
-\r
- c = getopt(argc, argv, "h:p:d:i:m:o:s:n:t:abVe");\r
- if (c == -1)\r
- break;\r
-\r
- switch (c) {\r
- case 'p':\r
- port = strtol(optarg, NULL, 0);\r
- if (port < 0 || port > 65535) {\r
- usage(argv[0]);\r
- return 1;\r
- }\r
- break;\r
-\r
- case 'd':\r
- ib_devname = _strdup(optarg);\r
- break;\r
- case 'e':\r
- ++user_param.use_event;\r
- break;\r
- case 'm':\r
- user_param.mtu = strtol(optarg, NULL, 0);\r
- break;\r
- case 'o':\r
- user_param.max_out_read = strtol(optarg, NULL, 0);\r
- break;\r
- case 'a':\r
- user_param.all = ALL;\r
- break;\r
- case 'V':\r
- printf("read_bw version : %.2f\n",VERSION);\r
- return 0;\r
- case 'i':\r
- ib_port = strtol(optarg, NULL, 0);\r
- if (ib_port < 0) {\r
- usage(argv[0]);\r
- return 1;\r
- }\r
- break;\r
-\r
- case 's':\r
- size = strtol(optarg, NULL, 0);\r
- break;\r
-\r
- case 't':\r
- user_param.tx_depth = strtol(optarg, NULL, 0);\r
- if (user_param.tx_depth < 1) { usage(argv[0]); return 1; }\r
- break;\r
-\r
- case 'n':\r
- user_param.iters = strtol(optarg, NULL, 0);\r
- if (user_param.iters < 2) {\r
- usage(argv[0]);\r
- return 1;\r
- }\r
-\r
- break;\r
-\r
- case 'b':\r
- duplex = 1;\r
- break;\r
-\r
- case 'h':\r
- if (optarg) {\r
- user_param.servername = _strdup(optarg);\r
- break;\r
- }\r
-\r
- default:\r
- usage(argv[0]);\r
- return 1;\r
- }\r
- }\r
-\r
- printf("------------------------------------------------------------------\n");\r
- if (duplex == 1)\r
- printf(" RDMA_Read Bidirectional BW Test\n");\r
- else\r
- printf(" RDMA_Read BW Test\n");\r
-\r
- printf("Connection type : RC\n");\r
- /* Done with parameter parsing. Perform setup. */\r
- if (user_param.all == ALL)\r
- /*since we run all sizes */\r
- size = 8388608; /*2^23 */\r
-\r
- dev_list = ibv_get_device_list(NULL);\r
-\r
- if (!ib_devname) {\r
- ib_dev = dev_list[0];\r
- if (!ib_dev) {\r
- fprintf(stderr, "No IB devices found\n");\r
- return 1;\r
- }\r
- } else {\r
- for (; (ib_dev = *dev_list); ++dev_list)\r
- if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))\r
- break;\r
- if (!ib_dev) {\r
- fprintf(stderr, "IB device %s not found\n", ib_devname);\r
- return 1;\r
- }\r
- }\r
-\r
- ctx = pp_init_ctx(ib_dev, size, user_param.tx_depth, ib_port, &user_param);\r
- if (!ctx)\r
- return 1;\r
-\r
- /* Create connection between client and server.\r
- * We do it by exchanging data over a TCP socket connection. */\r
-\r
- my_dest.lid = pp_get_local_lid(ctx, ib_port);\r
- my_dest.qpn = ctx->qp->qp_num;\r
- my_dest.psn = rand() & 0xffffff;\r
- if (!my_dest.lid) {\r
- fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n");\r
- return 1;\r
- }\r
- my_dest.rkey = ctx->mr->rkey;\r
- my_dest.vaddr = (uintptr_t)ctx->buf + ctx->size;\r
-\r
- printf(" local address: LID %#04x, QPN %#06x, PSN %#06x "\r
- "RKey %#08x VAddr %#016Lx\n",\r
- my_dest.lid, my_dest.qpn, my_dest.psn,\r
- my_dest.rkey, my_dest.vaddr);\r
-\r
- if (user_param.servername) {\r
- sockfd = pp_client_connect(user_param.servername, port);\r
- if (sockfd == INVALID_SOCKET)\r
- return 1;\r
- rem_dest = pp_client_exch_dest(sockfd, &my_dest);\r
- } else {\r
- sockfd = pp_server_connect(port);\r
- if (sockfd == INVALID_SOCKET)\r
- return 1;\r
- rem_dest = pp_server_exch_dest(sockfd, &my_dest);\r
- }\r
-\r
- if (!rem_dest)\r
- return 1;\r
-\r
- printf(" remote address: LID %#04x, QPN %#06x, PSN %#06x, "\r
- "RKey %#08x VAddr %#016Lx\n",\r
- rem_dest->lid, rem_dest->qpn, rem_dest->psn,\r
- rem_dest->rkey, rem_dest->vaddr);\r
-\r
- if (pp_connect_ctx(ctx, ib_port, my_dest.psn, rem_dest, &user_param))\r
- return 1;\r
-\r
- /* An additional handshake is required *after* moving qp to RTR.\r
- Arbitrarily reuse exch_dest for this purpose. */\r
- if (user_param.servername)\r
- rem_dest = pp_client_exch_dest(sockfd, &my_dest);\r
- else\r
- rem_dest = pp_server_exch_dest(sockfd, &my_dest);\r
-\r
- if (!rem_dest)\r
- return 1;\r
- \r
- /* For half duplex tests, server just waits for client to exit */\r
-\r
- if (!user_param.servername && !duplex) {\r
- rem_dest = pp_server_exch_dest(sockfd, &my_dest);\r
- if (send(sockfd, "done", sizeof "done", 0) != sizeof "done"){\r
- perror("server write");\r
- fprintf(stderr, "Couldn't write to socket\n");\r
- return 1;\r
- }\r
- closesocket(sockfd);\r
- return 0;\r
- } else if (user_param.use_event) {\r
- printf("Test with events.\n");\r
- if (ibv_req_notify_cq(ctx->cq, 0)) {\r
- fprintf(stderr, "Couldn't request CQ notification\n");\r
- return 1;\r
- } \r
- }\r
- \r
- printf("------------------------------------------------------------------\n");\r
- printf(" #bytes #iterations BW peak[MB/sec] BW average[MB/sec] \n");\r
-\r
- tposted = malloc(user_param.iters * sizeof *tposted);\r
-\r
- if (!tposted) {\r
- perror("malloc");\r
- return 1;\r
- }\r
-\r
- tcompleted = malloc(user_param.iters * sizeof *tcompleted);\r
-\r
- if (!tcompleted) {\r
- perror("malloc");\r
- return 1;\r
- }\r
-\r
- if (user_param.all == ALL) {\r
- for (i = 1; i < 24 ; ++i) {\r
- size = 1 << i;\r
- if(run_iter(ctx, &user_param, rem_dest, size))\r
- return 17;\r
- print_report(user_param.iters, size, duplex, tposted, tcompleted);\r
- }\r
- } else {\r
- if(run_iter(ctx, &user_param, rem_dest, size))\r
- return 18;\r
- print_report(user_param.iters, size, duplex, tposted, tcompleted);\r
- }\r
-\r
- if (user_param.servername)\r
- rem_dest = pp_client_exch_dest(sockfd, &my_dest);\r
- else\r
- rem_dest = pp_server_exch_dest(sockfd, &my_dest);\r
-\r
- if (send(sockfd, "done", sizeof "done", 0) != sizeof "done"){\r
- perror("server write");\r
- fprintf(stderr, "Couldn't write to socket\n");\r
- return 1;\r
- }\r
- closesocket(sockfd);\r
-\r
- free(tposted);\r
- free(tcompleted);\r
-\r
- printf("------------------------------------------------------------------\n");\r
- return 0;\r
-}\r
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2006 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2008-2009 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under the OpenIB.org BSD license
+ * below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <string.h>
+#include <limits.h>
+#include <malloc.h>
+#include <getopt.h>
+#include <time.h>
+#include <infiniband/verbs.h>
+#include <windows.h>
+
+#include <ws2tcpip.h>
+#include <winsock2.h>
+
+
+#include "get_clock.h"
+#include "perftest_resources.h"
+#include "l2w.h"
+#include "..\..\etc\user\getopt.c"
+
+#define VERSION 1.3
+
+static uint8_t sl = 0;
+static int page_size;
+cycles_t *tposted;
+cycles_t *tcompleted;
+
+struct pingpong_context {
+ struct ibv_context *context;
+ struct ibv_comp_channel *channel;
+ struct ibv_pd *pd;
+ struct ibv_mr *mr;
+ struct ibv_cq *cq;
+ struct ibv_qp *qp;
+ void* __ptr64 buf;
+ unsigned size;
+ int tx_depth;
+ struct ibv_sge list;
+ struct ibv_send_wr wr;
+};
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+static int set_up_connection(struct pingpong_context *ctx,
+ struct perftest_parameters *user_parm,
+ struct pingpong_dest *my_dest) {
+
+ if (user_parm->gid_index != -1) {
+ if (ibv_query_gid(ctx->context,user_parm->ib_port,user_parm->gid_index,&my_dest->gid)) {
+ return -1;
+ }
+ }
+
+ my_dest->lid = ctx_get_local_lid(ctx->context,user_parm->ib_port);
+ my_dest->out_reads = ctx_set_out_reads(ctx->context,user_parm->out_reads);
+ my_dest->qpn = ctx->qp->qp_num;
+ my_dest->psn = rand() & 0xffffff;
+ my_dest->rkey = ctx->mr->rkey;
+ my_dest->vaddr = (uintptr_t)ctx->buf + BUFF_SIZE(ctx->size);
+
+ // We do not fail test upon lid above RoCE.
+ if (user_parm->gid_index == -1) {
+ if (!my_dest->lid) {
+ fprintf(stderr,"Local lid 0x0 detected,without any use of gid. Is SM running?\n");
+ return -1;
+ }
+ }
+ return 0;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+static int init_connection(struct perftest_parameters *params,
+ struct pingpong_dest *my_dest,
+ const char *servername) {
+
+ params->side = LOCAL;
+
+ ctx_print_pingpong_data(my_dest,params);
+
+ if (params->machine == CLIENT)
+ params->sockfd = ctx_client_connect(servername,params->port);
+ else
+ params->sockfd = ctx_server_connect(params->port);
+
+ if (params->sockfd == INVALID_SOCKET) {
+ fprintf(stderr,"Unable to open file descriptor for socket connection");
+ return 1;
+ }
+ return 0;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+static int destroy_ctx_resources(struct pingpong_context *ctx) {
+
+ int test_result = 0;
+
+ if (ibv_destroy_qp(ctx->qp)) {
+ fprintf(stderr, "failed to destroy QP\n");
+ test_result = 1;
+ }
+
+ if (ibv_destroy_cq(ctx->cq)) {
+ fprintf(stderr, "failed to destroy CQ\n");
+ test_result = 1;
+ }
+
+ if (ibv_dereg_mr(ctx->mr)) {
+ fprintf(stderr, "failed to deregister MR\n");
+ test_result = 1;
+ }
+
+ if (ctx->channel) {
+ if (ibv_destroy_comp_channel(ctx->channel)) {
+ fprintf(stderr, "failed to destroy channel \n");
+ test_result = 1;
+ }
+ }
+
+ if (ibv_dealloc_pd(ctx->pd)) {
+ fprintf(stderr, "failed to deallocate PD\n");
+ test_result = 1;
+ }
+
+ if (ibv_close_device(ctx->context)) {
+ fprintf(stderr, "failed to close device context\n");
+ test_result = 1;
+ }
+
+ posix_memfree(ctx->buf);
+ free(ctx);
+ free(tposted);
+ free(tcompleted);
+
+ return test_result;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev,unsigned size,
+ struct perftest_parameters *user_parm) {
+
+ struct pingpong_context *ctx;
+ struct ibv_device_attr device_attr;
+
+ ALLOCATE(ctx,struct pingpong_context,1);
+
+ ctx->size = size;
+ ctx->tx_depth = user_parm->tx_depth;
+
+ posix_memalign(&(ctx->buf),page_size, BUFF_SIZE(size) * 2);
+ if (!ctx->buf) {
+ fprintf(stderr, " Couldn't allocate work buf.\n");
+ return NULL;
+ }
+
+ memset(ctx->buf, 0, BUFF_SIZE(size) * 2);
+
+ ctx->context = ibv_open_device(ib_dev);
+ if (!ctx->context) {
+ fprintf(stderr, "Couldn't get context for %s\n",
+ ibv_get_device_name(ib_dev));
+ return NULL;
+ }
+
+ if (ibv_query_device(ctx->context, &device_attr)) {
+ fprintf(stderr, "Failed to query device props");
+ return NULL;
+ }
+
+ // Finds the link type and configure the HCA accordingly.
+ if (ctx_set_link_layer(ctx->context,user_parm)) {
+ fprintf(stderr, "Couldn't set the link layer\n");
+ return NULL;
+ }
+
+ if (user_parm->mtu == 0) {
+ if (device_attr.vendor_part_id == 23108 || user_parm->gid_index != -1)
+ user_parm->mtu = 1024;
+ else
+ user_parm->mtu = 2048;
+ }
+
+ if (user_parm->use_event) {
+ ctx->channel = ibv_create_comp_channel(ctx->context);
+ if (!ctx->channel) {
+ fprintf(stderr, "Couldn't create completion channel\n");
+ return NULL;
+ }
+ } else
+ ctx->channel = NULL;
+
+ ctx->pd = ibv_alloc_pd(ctx->context);
+ if (!ctx->pd) {
+ fprintf(stderr, "Couldn't allocate PD\n");
+ return NULL;
+ }
+
+ // We dont really want IBV_ACCESS_LOCAL_WRITE, but IB spec says:
+ // The Consumer is not allowed to assign Remote Write or Remote Atomic to
+ // a Memory Region that has not been assigned Local Write.
+ ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, BUFF_SIZE(size) * 2, IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ);
+ if (!ctx->mr) {
+ fprintf(stderr, "Couldn't allocate MR\n");
+ return NULL;
+ }
+
+ // Creates the CQ according to ctx_cq_create in perfetst_resources.
+ ctx->cq = ctx_cq_create(ctx->context,ctx->channel,user_parm);
+ if (!ctx->cq) {
+ fprintf(stderr, "Couldn't create CQ\n");
+ return NULL;
+ }
+
+ ctx->qp = ctx_qp_create(ctx->pd,ctx->cq,ctx->cq,user_parm);
+ if (!ctx->qp) {
+ fprintf(stderr, "Couldn't create QP\n");
+ return NULL;
+ }
+
+ if (ctx_modify_qp_to_init(ctx->qp,user_parm)) {
+ fprintf(stderr, "Failed to modify QP to INIT\n");
+ return NULL;
+ }
+ return ctx;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+static int pp_connect_ctx(struct pingpong_context *ctx,int my_psn,int my_out_reads,
+ struct pingpong_dest *dest,struct perftest_parameters *user_parm)
+{
+ struct ibv_qp_attr attr;
+ memset(&attr, 0, sizeof attr);
+
+ attr.qp_state = IBV_QPS_RTR;
+ switch (user_parm->mtu) {
+ case 256 :
+ attr.path_mtu = IBV_MTU_256;
+ break;
+ case 512 :
+ attr.path_mtu = IBV_MTU_512;
+ break;
+ case 1024 :
+ attr.path_mtu = IBV_MTU_1024;
+ break;
+ case 2048 :
+ attr.path_mtu = IBV_MTU_2048;
+ break;
+ case 4096 :
+ attr.path_mtu = IBV_MTU_4096;
+ break;
+ }
+ printf(" Mtu : %d\n", user_parm->mtu);
+ attr.dest_qp_num = dest->qpn;
+ attr.rq_psn = dest->psn;
+ attr.ah_attr.dlid = dest->lid;
+ attr.max_dest_rd_atomic = (uint8_t)(dest->out_reads);
+ attr.min_rnr_timer = 12;
+ if (user_parm->gid_index<0) {
+ attr.ah_attr.is_global = 0;
+ attr.ah_attr.sl = sl;
+ } else {
+ attr.ah_attr.is_global = 1;
+ attr.ah_attr.grh.dgid = dest->gid;
+ attr.ah_attr.grh.sgid_index = (uint8_t)user_parm->gid_index;
+ attr.ah_attr.grh.hop_limit = 1;
+ attr.ah_attr.sl = 0;
+ }
+ attr.ah_attr.src_path_bits = 0;
+ attr.ah_attr.port_num = user_parm->ib_port;
+ if (ibv_modify_qp(ctx->qp, &attr,
+ IBV_QP_STATE |
+ IBV_QP_AV |
+ IBV_QP_PATH_MTU |
+ IBV_QP_DEST_QPN |
+ IBV_QP_RQ_PSN |
+ IBV_QP_MIN_RNR_TIMER |
+ IBV_QP_MAX_DEST_RD_ATOMIC)) {
+ fprintf(stderr, "Failed to modify RC QP to RTR\n");
+ return 1;
+ }
+ attr.timeout = user_parm->qp_timeout;
+ attr.retry_cnt = 7;
+ attr.rnr_retry = 7;
+ attr.qp_state = IBV_QPS_RTS;
+ attr.sq_psn = my_psn;
+ attr.max_rd_atomic = (uint8_t)my_out_reads;
+ if (ibv_modify_qp(ctx->qp, &attr,
+ IBV_QP_STATE |
+ IBV_QP_SQ_PSN |
+ IBV_QP_TIMEOUT |
+ IBV_QP_RETRY_CNT |
+ IBV_QP_RNR_RETRY |
+ IBV_QP_MAX_QP_RD_ATOMIC)) {
+ fprintf(stderr, "Failed to modify RC QP to RTS\n");
+ return 1;
+ }
+ return 0;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+static void usage(const char *argv0)
+{
+ printf("Usage:\n");
+ printf(" %s start a server and wait for connection\n", argv0);
+ printf(" %s <host> connect to server at <host>\n", argv0);
+ printf("\n");
+ printf("Options:\n");
+ printf(" -p, --port=<port> listen on/connect to port <port> (default 18515)\n");
+ printf(" -d, --ib-dev=<dev> use IB device <dev> (default first device found)\n");
+ printf(" -i, --ib-port=<port> use port <port> of IB device (default 1)\n");
+ printf(" -m, --mtu=<mtu> mtu size (256 - 4096. default for hermon is 2048)\n");
+ printf(" -o, --outs=<num> num of outstanding read/atom(default for hermon 16 (others 4)\n");
+ printf(" -s, --size=<size> size of message to exchange (default 65536)\n");
+ printf(" -a, --all Run sizes from 2 till 2^23\n");
+ printf(" -t, --tx-depth=<dep> size of tx queue (default 100)\n");
+ printf(" -n, --iters=<iters> number of exchanges (at least 2, default 1000)\n");
+ printf(" -u, --qp-timeout=<timeout> QP timeout, timeout value is 4 usec * 2 ^(timeout), default 14\n");
+ printf(" -S, --sl=<sl> SL (default 0)\n");
+ printf(" -x, --gid-index=<index> test uses GID with GID index taken from command line (for RDMAoE index should be 0)\n");
+ printf(" -b, --bidirectional measure bidirectional bandwidth (default unidirectional)\n");
+ printf(" -V, --version display version number\n");
+ printf(" -e, --events sleep on CQ events (default poll)\n");
+ printf(" -F, --CPU-freq do not fail even if cpufreq_ondemand module is loaded\n");
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+static void print_report(unsigned int iters, unsigned size, int duplex,
+ int no_cpu_freq_fail)
+{
+ double cycles_to_units;
+ unsigned long tsize; /* Transferred size, in megabytes */
+ unsigned int i, j;
+ int opt_posted = 0, opt_completed = 0;
+ cycles_t opt_delta;
+ cycles_t t;
+
+
+ opt_delta = tcompleted[opt_posted] - tposted[opt_completed];
+
+ /* Find the peak bandwidth */
+ for (i = 0; i < iters; ++i)
+ for (j = i; j < iters; ++j) {
+ t = (tcompleted[j] - tposted[i]) / (j - i + 1);
+ if (t < opt_delta) {
+ opt_delta = t;
+ opt_posted = i;
+ opt_completed = j;
+ }
+ }
+
+ cycles_to_units = get_cpu_mhz();
+
+ tsize = duplex ? 2 : 1;
+ tsize = tsize * size;
+
+ printf(REPORT_FMT,size,iters, tsize * cycles_to_units / opt_delta / 0x100000,
+ (uint64_t)tsize*iters*cycles_to_units/(tcompleted[iters - 1] - tposted[0]) / 0x100000);
+
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+int run_iter(struct pingpong_context *ctx, struct perftest_parameters *user_param,
+ struct pingpong_dest *rem_dest, int size)
+{
+
+ int scnt = 0;
+ int ccnt = 0;
+ int i,ne;
+ uint64_t rem_addr, my_addr;
+ struct ibv_wc *wc = NULL;
+ struct ibv_send_wr *bad_wr = NULL;
+
+ ALLOCATE(wc , struct ibv_wc , DEF_WC_SIZE);
+
+ ctx->list.addr = (uintptr_t)ctx->buf;
+ ctx->list.length = size;
+ ctx->list.lkey = ctx->mr->lkey;
+
+ ctx->wr.sg_list = &ctx->list;
+ ctx->wr.wr.rdma.remote_addr = rem_dest->vaddr;
+ ctx->wr.wr.rdma.rkey = rem_dest->rkey;
+ ctx->wr.wr_id = PINGPONG_READ_WRID;
+ ctx->wr.num_sge = MAX_RECV_SGE;
+ ctx->wr.opcode = IBV_WR_RDMA_READ;
+ ctx->wr.send_flags = IBV_SEND_SIGNALED;
+ ctx->wr.next = NULL;
+
+ my_addr = (uint64_t)(ctx->buf);
+ rem_addr = rem_dest->vaddr;
+
+ while (scnt < user_param->iters || ccnt < user_param->iters) {
+
+ while (scnt < user_param->iters && (scnt - ccnt) < user_param->tx_depth ) {
+
+ tposted[scnt] = get_cycles();
+ if (ibv_post_send(ctx->qp, &ctx->wr, &bad_wr)) {
+ fprintf(stderr, "Couldn't post send: scnt=%d\n",scnt);
+ return 1;
+ }
+
+ if (size <= (CYCLE_BUFFER / 2)) {
+ increase_rem_addr(&ctx->wr,size,scnt,rem_addr);
+ increase_loc_addr(&ctx->list,size,scnt,my_addr);
+ }
+ ++scnt;
+ }
+
+ if (ccnt < user_param->iters) {
+
+ if (user_param->use_event) {
+ if (ctx_notify_events(ctx->cq,ctx->channel)) {
+ fprintf(stderr, "Couldn't request CQ notification\n");
+ return 1;
+ }
+ }
+
+ do {
+ ne = ibv_poll_cq(ctx->cq,DEF_WC_SIZE,wc);
+ if (ne > 0) {
+ for (i = 0; i < ne; i++) {
+
+ if (wc[i].status != IBV_WC_SUCCESS)
+ NOTIFY_COMP_ERROR_SEND(wc[i],scnt,ccnt);
+
+ tcompleted[ccnt++] = get_cycles();
+ }
+ }
+ } while (ne > 0 );
+
+ if (ne < 0) {
+ fprintf(stderr, "poll CQ failed %d\n", ne);
+ return 1;
+ }
+ }
+ }
+ free(wc);
+ return 0;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+int __cdecl main(int argc, char *argv[])
+{
+ struct ibv_device **dev_list;
+ struct ibv_device *ib_dev;
+ struct pingpong_context *ctx;
+ struct pingpong_dest my_dest,rem_dest;
+ struct perftest_parameters user_param;
+ char *ib_devname = NULL;
+ unsigned size = 65536;
+ int i = 0;
+ int no_cpu_freq_fail = 0;
+
+ int all = 0;
+ const char *servername = NULL;
+
+ SYSTEM_INFO si;
+ GetSystemInfo(&si);
+
+ /* init default values to user's parameters */
+ memset(&user_param , 0 , sizeof(struct perftest_parameters));
+ memset(&my_dest , 0 , sizeof(struct pingpong_dest));
+ memset(&rem_dest , 0 , sizeof(struct pingpong_dest));
+
+ user_param.mtu = 0;
+ user_param.ib_port = 1;
+ user_param.port = 18515;
+ user_param.tx_depth = 100;
+ user_param.rx_depth = 1;
+ user_param.iters = 1000;
+ user_param.use_event = 0;
+ user_param.num_of_qps = 1;
+ user_param.qp_timeout = 14;
+ user_param.gid_index = -1;
+ user_param.verb = READ;
+ /* Parameter parsing. */
+ while (1) {
+ int c;
+
+ static struct option long_options[] = {
+ { "port", 1, NULL, 'p' },
+ { "ib-dev", 1, NULL, 'd' },
+ { "ib-port", 1, NULL,'i' },
+ { "mtu", 1, NULL,'m' },
+ { "outs", 1, NULL,'o' },
+ { "size", 1, NULL,'s' },
+ { "iters", 1, NULL,'n' },
+ { "tx-depth", 1, NULL,'t' },
+ { "qp-timeout", 1, NULL,'u' },
+ { "sl", 1, NULL,'S' },
+ { "gid-index", 1, NULL,'x' },
+ { "all", 0, NULL, 'a' },
+ { "bidirectional", 0, NULL, 'b' },
+ { "version", 0, NULL, 'V' },
+ { "events", 0, NULL, 'e' },
+ { "CPU-freq", 0, NULL, 'F' },
+ { 0 }
+ };
+
+ c = getopt_long(argc, argv, "p:d:i:m:o:s:n:t:u:S:x:abVeF", long_options, NULL);
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'p':
+ user_param.port = strtol(optarg, NULL, 0);
+ if (user_param.port < 0 || user_param.port > 65535) {
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+
+ case 'd':
+ ib_devname = _strdup(optarg);
+ break;
+ case 'e':
+ ++user_param.use_event;
+ break;
+ case 'm':
+ user_param.mtu = strtol(optarg, NULL, 0);
+ break;
+ case 'o':
+ user_param.out_reads = strtol(optarg, NULL, 0);
+ break;
+ case 'a':
+ all = ALL;
+ break;
+ case 'V':
+ printf("read_bw version : %.2f\n",VERSION);
+ return 0;
+ break;
+ case 'i':
+ user_param.ib_port = (uint8_t)(strtol(optarg, NULL, 0));
+ if (user_param.ib_port < 0) {
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+
+ case 's':
+ size = strtol(optarg, NULL, 0);
+ if (size < 1 || size > UINT_MAX / 2) {
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+
+ case 't':
+ user_param.tx_depth = strtol(optarg, NULL, 0);
+ if (user_param.tx_depth < 1) { usage(argv[0]); return 1; }
+ break;
+
+ case 'n':
+ user_param.iters = strtol(optarg, NULL, 0);
+ if (user_param.iters < 2) {
+ usage(argv[0]);
+ return 1;
+ }
+
+ break;
+
+ case 'b':
+ user_param.duplex = 1;
+ break;
+
+ case 'F':
+ no_cpu_freq_fail = 1;
+ break;
+
+ case 'u':
+ user_param.qp_timeout = (uint8_t)(strtol(optarg, NULL, 0));
+ break;
+
+ case 'S':
+ sl = (uint8_t)(strtol(optarg, NULL, 0));
+ if (sl > 15) { usage(argv[0]); return 1; }
+ break;
+
+ case 'x':
+ user_param.gid_index = strtol(optarg, NULL, 0);
+ if (user_param.gid_index > 63) {
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+
+ default:
+ usage(argv[0]);
+ return 1;
+ }
+ }
+
+ if (optind == argc - 1) {
+ servername = _strdup(argv[optind]);
+ } else if (optind < argc) {
+ usage(argv[0]);
+ return 1;
+ }
+
+ printf(RESULT_LINE);
+
+ if (user_param.duplex == 1)
+ printf(" RDMA_Read Bidirectional BW Test\n");
+ else
+ printf(" RDMA_Read BW Test\n");
+
+ printf(" Connection type : RC\n");
+
+ // Set the machine role in the benchmark.
+ user_param.machine = servername ? CLIENT : SERVER;
+
+ // Done with parameter parsing. Perform setup.
+ if (all == ALL)
+ size = 8388608;
+
+ page_size = si.dwPageSize;
+
+ dev_list = ibv_get_device_list(NULL);
+
+ if (!ib_devname) {
+ ib_dev = dev_list[0];
+ if (!ib_dev) {
+ fprintf(stderr, "No IB devices found\n");
+ return 1;
+ }
+ } else {
+ for (; (ib_dev = *dev_list); ++dev_list)
+ if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
+ break;
+ if (!ib_dev) {
+ fprintf(stderr, "IB device %s not found\n", ib_devname);
+ return 1;
+ }
+ }
+
+ ctx = pp_init_ctx(ib_dev,size,&user_param);
+ if (!ctx)
+ return 1;
+
+ // Set up the Connection.
+ if (set_up_connection(ctx,&user_param,&my_dest)) {
+ fprintf(stderr," Unable to set up socket connection\n");
+ return 1;
+ }
+
+ // Init the connection and print the local data.
+ if (init_connection(&user_param,&my_dest,servername)) {
+ fprintf(stderr," Unable to init the socket connection\n");
+ return 1;
+ }
+
+ // shaking hands and gather the other side info.
+ if (ctx_hand_shake(&user_param,&my_dest,&rem_dest)) {
+ fprintf(stderr,"Failed to exchange date between server and clients\n");
+ return 1;
+
+ }
+ user_param.side = REMOTE;
+ ctx_print_pingpong_data(&rem_dest,&user_param);
+
+ if (pp_connect_ctx(ctx,my_dest.psn,my_dest.out_reads,&rem_dest,&user_param)) {
+ fprintf(stderr," Unable to Connect the HCA's through the link\n");
+ return 1;
+ }
+
+ // An additional handshake is required after moving qp to RTR.
+ if (ctx_hand_shake(&user_param,&my_dest,&rem_dest)) {
+ fprintf(stderr,"Failed to exchange date between server and clients\n");
+ return 1;
+
+ }
+
+ // For half duplex tests, server just waits for client to exit
+ if (user_param.machine == SERVER && !user_param.duplex) {
+ if (ctx_close_connection(&user_param,&my_dest,&rem_dest)) {
+ fprintf(stderr,"Failed to close connection between server and client\n");
+ return 1;
+ }
+ printf(RESULT_LINE);
+ return 0;
+
+ } else if (user_param.use_event) {
+ printf("Test with events.\n");
+ if (ibv_req_notify_cq(ctx->cq, 0)) {
+ fprintf(stderr, "Couldn't request CQ notification\n");
+ return 1;
+ }
+ }
+
+ printf(RESULT_LINE);
+ printf(RESULT_FMT);
+
+ ALLOCATE(tposted , cycles_t , user_param.iters);
+ ALLOCATE(tcompleted , cycles_t , user_param.iters);
+
+ if (all == ALL) {
+
+ for (i = 1; i < 24 ; ++i) {
+ size = 1 << i;
+ if(run_iter(ctx,&user_param,&rem_dest,size))
+ return 17;
+ print_report(user_param.iters,size,user_param.duplex,no_cpu_freq_fail);
+ }
+
+ } else {
+
+ if(run_iter(ctx,&user_param,&rem_dest,size))
+ return 18;
+ print_report(user_param.iters,size,user_param.duplex,no_cpu_freq_fail);
+ }
+
+ if (ctx_close_connection(&user_param,&my_dest,&rem_dest)) {
+ fprintf(stderr,"Failed to close connection between server and client\n");
+ return 1;
+ }
+
+ printf(RESULT_LINE);
+
+ if(!user_param.use_event)
+ destroy_ctx_resources(ctx);
+
+ return 0;
+
+}
USE_NATIVE_EH = 1\r
USE_IOSTREAM = 1\r
\r
-SOURCES = read_lat.rc read_lat.c ..\perftest.c\r
+SOURCES = read_lat.rc read_lat.c ..\perftest_resources.c\r
\r
INCLUDES = ..;..\..\..\ulp\libibverbs\include;\\r
- ..\..\..\inc;..\..\..\inc\user;\\r
- ..\..\..\inc\user\linux;\r
-\r
+ ..\..\..\inc;..\..\..\inc\user;..\..\..\inc\user\linux;..\..\..\inc\complib;\\r
+ ..\..\..\hw\mlx4\user\hca;\r
TARGETLIBS = \\r
$(SDK_LIB_PATH)\kernel32.lib \\r
$(SDK_LIB_PATH)\advapi32.lib \\r
$(SDK_LIB_PATH)\user32.lib \\r
$(SDK_LIB_PATH)\ole32.lib \\r
$(SDK_LIB_PATH)\ws2_32.lib \\r
+ $(SDK_LIB_PATH)\uuid.lib \\r
!if $(FREEBUILD)\r
- $(TARGETPATH)\*\libibverbs.lib\r
+ $(TARGETPATH)\*\libibverbs.lib \\r
+ $(TARGETPATH)\*\complib.lib \\r
!else\r
- $(TARGETPATH)\*\libibverbsd.lib\r
-!endif\r
-\r
+ $(TARGETPATH)\*\libibverbsd.lib \\r
+ $(TARGETPATH)\*\complibd.lib \\r
+!endif
\ No newline at end of file
-/*\r
- * Copyright (c) 2005 Topspin Communications. All rights reserved.\r
- * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.\r
- * Copyright (c) 2005 Hewlett Packard, Inc (Grant Grundler)\r
- * Copyright (c) 2008 Intel Corporation. All rights reserved.\r
- *\r
- * This software is available to you under the OpenIB.org BSD license\r
- * below:\r
- *\r
- * Redistribution and use in source and binary forms, with or\r
- * without modification, are permitted provided that the following\r
- * conditions are met:\r
- *\r
- * - Redistributions of source code must retain the above\r
- * copyright notice, this list of conditions and the following\r
- * disclaimer.\r
- *\r
- * - Redistributions in binary form must reproduce the above\r
- * copyright notice, this list of conditions and the following\r
- * disclaimer in the documentation and/or other materials\r
- * provided with the distribution.\r
- *\r
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV\r
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
- * SOFTWARE.\r
- */\r
-\r
-#include <stdio.h>\r
-#include <stdlib.h>\r
-#include <string.h>\r
-#include <ws2tcpip.h>\r
-#include <winsock2.h>\r
-#include <time.h>\r
-\r
-#include "..\..\..\etc\user\getopt.c"\r
-#include "perftest.h"\r
-#include <infiniband/verbs.h>\r
-\r
-#define PINGPONG_READ_WRID 1\r
-#define VERSION 1.1\r
-#define ALL 1\r
-\r
-typedef UINT64 cycles_t;\r
-cycles_t *tstamp;\r
-\r
-struct user_parameters {\r
- const char *servername;\r
- int connection_type;\r
- int mtu;\r
- int all; /* run all msg size */\r
- int iters;\r
- int tx_depth;\r
- SOCKET sockfd;\r
- int max_out_read;\r
- int use_event;\r
-\r
-};\r
-struct report_options {\r
- int unsorted;\r
- int histogram;\r
- int cycles; /* report delta's in cycles, not microsec's */\r
-};\r
-\r
-struct pingpong_context {\r
- struct ibv_context *context;\r
- struct ibv_comp_channel *channel;\r
- struct ibv_pd *pd;\r
- struct ibv_mr *mr;\r
- struct ibv_cq *cq;\r
- struct ibv_qp *qp;\r
- void *buf;\r
- volatile char *post_buf;\r
- volatile char *poll_buf;\r
- int size;\r
- int tx_depth;\r
- struct ibv_sge list;\r
- struct ibv_send_wr wr;\r
-};\r
-\r
-struct pingpong_dest {\r
- int lid;\r
- int qpn;\r
- int psn;\r
- unsigned rkey;\r
- unsigned long long vaddr;\r
-};\r
-struct pingpong_dest my_dest;\r
-\r
-static uint16_t pp_get_local_lid(struct pingpong_context *ctx, int port)\r
-{\r
- struct ibv_port_attr attr;\r
-\r
- if (ibv_query_port(ctx->context, (uint8_t) port, &attr))\r
- return 0;\r
-\r
- return attr.lid;\r
-}\r
-\r
-static struct ibv_device *pp_find_dev(const char *ib_devname) {\r
- struct ibv_device **dev_list;\r
- struct ibv_device *ib_dev = NULL;\r
-\r
- dev_list = ibv_get_device_list(NULL);\r
- if (!dev_list)\r
- return NULL;\r
-\r
- if (!ib_devname) {\r
- ib_dev = dev_list[0];\r
- if (!ib_dev)\r
- fprintf(stderr, "No IB devices found\n");\r
- } else {\r
- for (; (ib_dev = *dev_list); ++dev_list)\r
- if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))\r
- break;\r
- if (!ib_dev)\r
- fprintf(stderr, "IB device %s not found\n", ib_devname);\r
- }\r
- return ib_dev;\r
-}\r
-\r
-#define KEY_MSG_SIZE (sizeof "0000:000000:000000:00000000:0000000000000000")\r
-#define KEY_PRINT_FMT "%04x:%06x:%06x:%08x:%016Lx"\r
-\r
-static int pp_write_keys(SOCKET sockfd, const struct pingpong_dest *my_dest)\r
-{\r
- char msg[KEY_MSG_SIZE];\r
-\r
- sprintf(msg, KEY_PRINT_FMT, my_dest->lid, my_dest->qpn,\r
- my_dest->psn, my_dest->rkey, my_dest->vaddr);\r
-\r
- if (send(sockfd, msg, sizeof msg, 0) != sizeof msg) {\r
- perror("client write");\r
- fprintf(stderr, "Couldn't send local address\n");\r
- return -1;\r
- }\r
-\r
- return 0;\r
-}\r
-\r
-static int pp_read_keys(SOCKET sockfd, const struct pingpong_dest *my_dest,\r
- struct pingpong_dest *rem_dest)\r
-{\r
- int parsed;\r
- char msg[KEY_MSG_SIZE];\r
-\r
- if (recv(sockfd, msg, sizeof msg, 0) != sizeof msg) {\r
- perror("pp_read_keys");\r
- fprintf(stderr, "Couldn't read remote address\n");\r
- return -1;\r
- }\r
-\r
- memset(rem_dest, 0, sizeof *rem_dest);\r
- parsed = sscanf(msg, KEY_PRINT_FMT, &rem_dest->lid, &rem_dest->qpn,\r
- &rem_dest->psn, &rem_dest->rkey, &rem_dest->vaddr);\r
-\r
- if (parsed != 5) {\r
- fprintf(stderr, "Couldn't parse line <%.*s>\n",\r
- (int)sizeof msg, msg);\r
- return -1;\r
- }\r
-\r
- return 0;\r
-}\r
-\r
-static int pp_client_exch_dest(SOCKET sockfd, const struct pingpong_dest *my_dest,\r
- struct pingpong_dest *rem_dest)\r
-{\r
- if (pp_write_keys(sockfd, my_dest))\r
- return -1;\r
-\r
- return pp_read_keys(sockfd, my_dest, rem_dest);\r
-}\r
-\r
-static int pp_server_exch_dest(SOCKET sockfd, const struct pingpong_dest *my_dest,\r
- struct pingpong_dest* rem_dest)\r
-{\r
-\r
- if (pp_read_keys(sockfd, my_dest, rem_dest))\r
- return -1;\r
-\r
- return pp_write_keys(sockfd, my_dest);\r
-}\r
-\r
-static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,\r
- int tx_depth, int port, struct user_parameters *user_parm)\r
-{\r
- struct pingpong_context *ctx;\r
- struct ibv_device_attr device_attr;\r
-\r
- ctx = malloc(sizeof *ctx);\r
- if (!ctx)\r
- return NULL;\r
-\r
- ctx->size = size;\r
- ctx->tx_depth = tx_depth;\r
-\r
- ctx->buf = malloc(size * 2);\r
- if (!ctx->buf) {\r
- fprintf(stderr, "Couldn't allocate work buf.\n");\r
- return NULL;\r
- }\r
-\r
- memset(ctx->buf, 0, size * 2);\r
-\r
- ctx->post_buf = (char*)ctx->buf + (size - 1);\r
- ctx->poll_buf = (char*)ctx->buf + (2 * size - 1);\r
-\r
- ctx->context = ibv_open_device(ib_dev);\r
- if (!ctx->context) {\r
- fprintf(stderr, "Couldn't get context for %s\n",\r
- ibv_get_device_name(ib_dev));\r
- return NULL;\r
- }\r
- if (user_parm->mtu == 0) {/*user did not ask for specific mtu */\r
- if (ibv_query_device(ctx->context, &device_attr)) {\r
- fprintf(stderr, "Failed to query device props");\r
- return NULL;\r
- }\r
- if (device_attr.vendor_part_id == 23108) {\r
- user_parm->mtu = 1024;\r
- } else {\r
- user_parm->mtu = 2048;\r
- }\r
- }\r
- if (user_parm->use_event) {\r
- ctx->channel = ibv_create_comp_channel(ctx->context);\r
- if (!ctx->channel) {\r
- fprintf(stderr, "Couldn't create completion channel\n");\r
- return NULL;\r
- }\r
- } else\r
- ctx->channel = NULL;\r
- ctx->pd = ibv_alloc_pd(ctx->context);\r
- if (!ctx->pd) {\r
- fprintf(stderr, "Couldn't allocate PD\n");\r
- return NULL;\r
- }\r
-\r
- ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size * 2,\r
- IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ);\r
- if (!ctx->mr) {\r
- fprintf(stderr, "Couldn't allocate MR\n");\r
- return NULL;\r
- }\r
-\r
- ctx->cq = ibv_create_cq(ctx->context, tx_depth, NULL, ctx->channel, 0);\r
- if (!ctx->cq) {\r
- fprintf(stderr, "Couldn't create CQ\n");\r
- return NULL;\r
- }\r
-\r
- {\r
- struct ibv_qp_init_attr attr;\r
- memset(&attr, 0, sizeof(struct ibv_qp_init_attr));\r
- attr.send_cq = ctx->cq;\r
- attr.recv_cq = ctx->cq;\r
- attr.cap.max_send_wr = tx_depth;\r
- /* Work around: driver doesnt support\r
- * recv_wr = 0 */\r
- attr.cap.max_recv_wr = 1;\r
- attr.cap.max_send_sge = 1;\r
- attr.cap.max_recv_sge = 1;\r
- if (user_parm->connection_type==1) {\r
- attr.qp_type = IBV_QPT_UC;\r
- } else {\r
- attr.qp_type = IBV_QPT_RC;\r
- }\r
- ctx->qp = ibv_create_qp(ctx->pd, &attr);\r
- if (!ctx->qp) {\r
- fprintf(stderr, "Couldn't create QP\n");\r
- return NULL;\r
- }\r
- }\r
-\r
- {\r
- struct ibv_qp_attr attr;\r
- \r
- attr.qp_state = IBV_QPS_INIT;\r
- attr.pkey_index = 0;\r
- attr.port_num = (uint8_t) port;\r
- attr.qp_access_flags = IBV_ACCESS_REMOTE_READ;\r
-\r
- if (ibv_modify_qp(ctx->qp, &attr,\r
- IBV_QP_STATE |\r
- IBV_QP_PKEY_INDEX |\r
- IBV_QP_PORT |\r
- IBV_QP_ACCESS_FLAGS)) {\r
- fprintf(stderr, "Failed to modify QP to INIT\n");\r
- return NULL;\r
- }\r
- }\r
-\r
- ctx->wr.wr_id = PINGPONG_READ_WRID;\r
- ctx->wr.sg_list = &ctx->list;\r
- ctx->wr.num_sge = 1;\r
- ctx->wr.opcode = IBV_WR_RDMA_READ;\r
- ctx->wr.send_flags = IBV_SEND_SIGNALED;\r
- ctx->wr.next = NULL;\r
-\r
- return ctx;\r
-}\r
-\r
-static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,\r
- struct pingpong_dest *dest,struct user_parameters *user_parm)\r
-{\r
- struct ibv_qp_attr attr;\r
- memset(&attr, 0, sizeof(struct ibv_qp_attr));\r
- attr.qp_state = IBV_QPS_RTR;\r
- switch (user_parm->mtu) {\r
- case 256 : \r
- attr.path_mtu = IBV_MTU_256;\r
- break;\r
- case 512 :\r
- attr.path_mtu = IBV_MTU_512;\r
- break;\r
- case 1024 :\r
- attr.path_mtu = IBV_MTU_1024;\r
- break;\r
- case 2048 :\r
- attr.path_mtu = IBV_MTU_2048;\r
- break;\r
- case 4096 :\r
- attr.path_mtu = IBV_MTU_4096;\r
- break;\r
- }\r
- printf("Mtu : %d\n", user_parm->mtu);\r
- attr.dest_qp_num = dest->qpn;\r
- attr.rq_psn = dest->psn;\r
- attr.max_dest_rd_atomic = (uint8_t) user_parm->max_out_read;\r
- attr.min_rnr_timer = 12;\r
- attr.ah_attr.is_global = 0;\r
- attr.ah_attr.dlid = (uint16_t) dest->lid;\r
- attr.ah_attr.sl = 0;\r
- attr.ah_attr.src_path_bits = 0;\r
- attr.ah_attr.port_num = (uint8_t) port;\r
- if (ibv_modify_qp(ctx->qp, &attr,\r
- IBV_QP_STATE |\r
- IBV_QP_AV |\r
- IBV_QP_PATH_MTU |\r
- IBV_QP_DEST_QPN |\r
- IBV_QP_RQ_PSN |\r
- IBV_QP_MIN_RNR_TIMER |\r
- IBV_QP_MAX_DEST_RD_ATOMIC)) {\r
- fprintf(stderr, "Failed to modify RC QP to RTR\n");\r
- return 1;\r
- }\r
- attr.timeout = 14;\r
- attr.retry_cnt = 7;\r
- attr.rnr_retry = 7;\r
- attr.qp_state = IBV_QPS_RTS;\r
- attr.sq_psn = my_psn;\r
-\r
- if (user_parm->connection_type==0) {\r
- attr.max_rd_atomic = (uint8_t) user_parm->max_out_read;\r
- if (ibv_modify_qp(ctx->qp, &attr,\r
- IBV_QP_STATE |\r
- IBV_QP_SQ_PSN |\r
- IBV_QP_TIMEOUT |\r
- IBV_QP_RETRY_CNT |\r
- IBV_QP_RNR_RETRY |\r
- IBV_QP_MAX_QP_RD_ATOMIC)) {\r
- fprintf(stderr, "Failed to modify RC QP to RTS\n");\r
- return 1;\r
- }\r
- } else {\r
- if (ibv_modify_qp(ctx->qp, &attr,\r
- IBV_QP_STATE |\r
- IBV_QP_SQ_PSN)) {\r
- fprintf(stderr, "Failed to modify UC QP to RTS\n");\r
- return 1;\r
- }\r
-\r
- }\r
- return 0;\r
-}\r
-\r
-static SOCKET pp_open_port(struct pingpong_context *ctx, const char * servername,\r
- int ib_port, int port, struct pingpong_dest *rem_dest,\r
- struct user_parameters *user_parm)\r
-{\r
- char addr_fmt[] = "%8s address: LID %#04x QPN %#06x PSN %#06x RKey %#08x VAddr %#016Lx\n";\r
- SOCKET sockfd;\r
- int rc;\r
-\r
- /* Create connection between client and server.\r
- * We do it by exchanging data over a TCP socket connection. */\r
-\r
- my_dest.lid = pp_get_local_lid(ctx, ib_port);\r
- my_dest.qpn = ctx->qp->qp_num;\r
- my_dest.psn = rand() & 0xffffff;\r
- if (!my_dest.lid) {\r
- fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n");\r
- return INVALID_SOCKET;\r
- }\r
- my_dest.rkey = ctx->mr->rkey;\r
- my_dest.vaddr = (uintptr_t)ctx->buf + ctx->size;\r
-\r
- printf(addr_fmt, "local", my_dest.lid, my_dest.qpn, my_dest.psn,\r
- my_dest.rkey, my_dest.vaddr);\r
-\r
- sockfd = servername ? pp_client_connect(servername, port) :\r
- pp_server_connect(port);\r
-\r
- if (sockfd == INVALID_SOCKET) {\r
- printf("pp_connect_sock(%s,%d) failed (%d)!\n",\r
- servername, port, sockfd);\r
- return sockfd;\r
- }\r
-\r
- rc = servername ? pp_client_exch_dest(sockfd, &my_dest, rem_dest) :\r
- pp_server_exch_dest(sockfd, &my_dest, rem_dest);\r
- if (rc)\r
- return INVALID_SOCKET;\r
-\r
- printf(addr_fmt, "remote", rem_dest->lid, rem_dest->qpn, rem_dest->psn,\r
- rem_dest->rkey, rem_dest->vaddr);\r
-\r
- if ((rc = pp_connect_ctx(ctx, ib_port, my_dest.psn, rem_dest,user_parm)))\r
- return INVALID_SOCKET;\r
-\r
- /* An additional handshake is required *after* moving qp to RTR.\r
- * Arbitrarily reuse exch_dest for this purpose.\r
- */\r
-\r
- rc = servername ? pp_client_exch_dest(sockfd, &my_dest, rem_dest) :\r
- pp_server_exch_dest(sockfd, &my_dest, rem_dest);\r
-\r
- if (rc)\r
- return INVALID_SOCKET;\r
-\r
- return sockfd;\r
-}\r
-\r
-static void usage(const char *argv0)\r
-{\r
- printf("Usage:\n");\r
- printf(" %s start a server and wait for connection\n", argv0);\r
- printf(" %s -h <host> connect to server at <host>\n", argv0);\r
- printf("\n");\r
- printf("Options:\n");\r
- printf(" -p <port> listen on/connect to port <port> (default 18515)\n");\r
- printf(" -c <RC/UC> connection type RC/UC (default RC)\n");\r
- printf(" -m <mtu> mtu size (256 - 4096. default for hermon is 2048)\n");\r
- printf(" -d <dev> use IB device <dev> (default first device found)\n");\r
- printf(" -i <port> use port <port> of IB device (default 1)\n");\r
- printf(" -s <size> size of message to exchange (default 1)\n");\r
- printf(" -t <dep> size of tx queue (default 50)\n");\r
- printf(" -n <iters> number of exchanges (at least 2, default 1000)\n");\r
- printf(" -o <num> num of outstanding read/atom(default 4)\n");\r
- printf(" -a Run sizes from 2 till 2^23\n");\r
- printf(" -C report times in cpu cycle units (default microseconds)\n");\r
- printf(" -H print out all results (default print summary only)\n");\r
- printf(" -U (implies -H) print out unsorted results (default sorted)\n");\r
- printf(" -V display version number\n");\r
- printf(" -e sleep on CQ events (default poll)\n");\r
-}\r
-\r
-static void print_report(struct report_options * options,\r
- unsigned int iters, cycles_t *tstamp,int size)\r
-{\r
- cycles_t cycles_to_units;\r
- cycles_t median;\r
- unsigned int i;\r
- const char* units;\r
- cycles_t *delta = malloc((iters - 1) * sizeof *delta);\r
-\r
- if (!delta) {\r
- perror("malloc");\r
- return;\r
- }\r
-\r
- for (i = 0; i < iters - 1; ++i)\r
- delta[i] = tstamp[i + 1] - tstamp[i];\r
-\r
- if (options->cycles) {\r
- cycles_to_units = 1;\r
- units = "cycles";\r
- } else {\r
- cycles_to_units = get_freq();\r
- units = "sec";\r
- }\r
-\r
- if (options->unsorted) {\r
- printf("#, %s, frequency=%I64d\n", units, get_freq());\r
- for (i = 0; i < iters - 1; ++i)\r
- printf("%d, %g\n", i + 1, delta[i] / cycles_to_units);\r
- }\r
-\r
- qsort(delta, iters - 1, sizeof *delta, cycles_compare);\r
-\r
- if (options->histogram) {\r
- printf("#, %s, frequency=%I64d\n", units, get_freq());\r
- for (i = 0; i < iters - 1; ++i)\r
- printf("%d, %7.2f\n", i + 1, (double) delta[i] / (double) cycles_to_units);\r
- }\r
-\r
- median = get_median(iters - 1, delta);\r
- printf("%7d %d %7.2f %7.2f %7.2f\n",\r
- size, iters, (double) delta[0] / (double) cycles_to_units * 1000000.,\r
- (double) delta[iters - 2] / (double) cycles_to_units * 1000000.,\r
- (double) median / (double) cycles_to_units * 1000000.);\r
- free(delta);\r
-}\r
-\r
-static int run_iter(struct pingpong_context *ctx, struct user_parameters *user_param,\r
- struct pingpong_dest *rem_dest, int size)\r
-{\r
- struct ibv_qp *qp;\r
- struct ibv_send_wr *wr;\r
- volatile char *poll_buf; \r
- volatile char *post_buf;\r
-\r
- int scnt, ccnt;\r
- int iters;\r
- int tx_depth;\r
-\r
- struct ibv_wc wc;\r
- int ne;\r
-\r
- if (!user_param->servername)\r
- return 0;\r
-\r
- iters = user_param->iters;\r
- tx_depth = user_param->tx_depth;\r
- wr = &ctx->wr;\r
- ctx->list.addr = (uintptr_t) ctx->buf;\r
- ctx->list.length = size;\r
- ctx->list.lkey = ctx->mr->lkey;\r
- wr->wr.rdma.remote_addr = rem_dest->vaddr;\r
- wr->wr.rdma.rkey = rem_dest->rkey;\r
- scnt = 0;\r
- ccnt = 0;\r
- poll_buf = ctx->poll_buf;\r
- post_buf = ctx->post_buf;\r
- qp = ctx->qp;\r
-\r
- /* Done with setup. Start the test. */\r
-\r
- while (scnt < user_param->iters ) {\r
- struct ibv_send_wr *bad_wr;\r
- *post_buf = (char)++scnt;\r
- tstamp[scnt - 1] = get_cycles();\r
- if (ibv_post_send(qp, wr, &bad_wr)) {\r
- fprintf(stderr, "Couldn't post send: scnt=%d\n",\r
- scnt);\r
- return 11;\r
- }\r
- if (user_param->use_event) {\r
- struct ibv_cq *ev_cq;\r
- void *ev_ctx;\r
-\r
- if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {\r
- fprintf(stderr, "Failed to get cq_event\n");\r
- return 1;\r
- }\r
-\r
- if (ev_cq != ctx->cq) {\r
- fprintf(stderr, "CQ event for unknown RCQ %p\n", ev_cq);\r
- return 1;\r
- }\r
-\r
- if (ibv_req_notify_cq(ctx->cq, 0)) {\r
- fprintf(stderr, "Couldn't request CQ notification\n");\r
- return 1;\r
- }\r
- }\r
- do {\r
- ne = ibv_poll_cq(ctx->cq, 1, &wc);\r
- } while (!user_param->use_event && ne < 1);\r
-\r
- if (ne < 0) {\r
- fprintf(stderr, "poll CQ failed %d\n", ne);\r
- return 12;\r
- }\r
- if (wc.status != IBV_WC_SUCCESS) {\r
- fprintf(stderr, "Completion wth error at %s:\n",\r
- user_param->servername ? "client" : "server");\r
- fprintf(stderr, "Failed status %d: wr_id %d\n",\r
- wc.status, (int) wc.wr_id);\r
- fprintf(stderr, "scnt=%d, ccnt=%d\n",\r
- scnt, ccnt);\r
- return 13;\r
- }\r
- }\r
- return 0;\r
-}\r
-\r
-int __cdecl main(int argc, char *argv[])\r
-{\r
- const char *ib_devname = NULL;\r
- int port = 18515;\r
- int ib_port = 1;\r
- int size = 2;\r
- int tmp_size;\r
- int i = 0;\r
- struct report_options report;\r
- struct pingpong_context *ctx;\r
- struct pingpong_dest rem_dest;\r
- struct ibv_device *ib_dev;\r
- struct user_parameters user_param;\r
- WORD version;\r
- WSADATA data;\r
- int err;\r
-\r
- srand((unsigned int) time(NULL));\r
- version = MAKEWORD(2, 2);\r
- err = WSAStartup(version, &data);\r
- if (err)\r
- return -1;\r
-\r
- /* init default values to user's parameters */\r
- memset(&report, 0, sizeof report);\r
- memset(&user_param, 0, sizeof(struct user_parameters));\r
- user_param.mtu = 0;\r
- user_param.iters = 1000;\r
- user_param.tx_depth = 50;\r
- user_param.servername = NULL;\r
- user_param.use_event = 0;\r
- user_param.max_out_read = 4; /* the device capability on gen2 */\r
-\r
- /* Parameter parsing. */\r
- while (1) {\r
- int c;\r
-\r
- c = getopt(argc, argv, "h:p:c:m:d:i:s:o:n:t:aeHUV");\r
- if (c == -1)\r
- break;\r
-\r
- switch (c) {\r
- case 'p':\r
- port = strtol(optarg, NULL, 0);\r
- if (port < 0 || port > 65535) {\r
- usage(argv[0]);\r
- return 1;\r
- }\r
- break;\r
- case 'c':\r
- if (strcmp("UC",optarg)==0)\r
- user_param.connection_type=1;\r
- /* default is 0 for any other option RC*/\r
- break;\r
- case 'e':\r
- ++user_param.use_event;\r
- break;\r
-\r
- case 'm':\r
- user_param.mtu = strtol(optarg, NULL, 0);\r
- break;\r
- case 'o':\r
- user_param.max_out_read = strtol(optarg, NULL, 0);\r
- break;\r
- case 'a':\r
- user_param.all = ALL;\r
- break;\r
- case 'V':\r
- printf("perftest version : %.2f\n",VERSION);\r
- return 0;\r
- case 'd':\r
- ib_devname = _strdup(optarg);\r
- break;\r
-\r
- case 'i':\r
- ib_port = strtol(optarg, NULL, 0);\r
- if (ib_port < 0) {\r
- usage(argv[0]);\r
- return 2;\r
- }\r
- break;\r
-\r
- case 's':\r
- size = strtol(optarg, NULL, 0);\r
- if (size < 1) {\r
- usage(argv[0]); return 3;\r
- }\r
- break;\r
-\r
- case 't':\r
- user_param.tx_depth = strtol(optarg, NULL, 0);\r
- if (user_param.tx_depth < 1) {\r
- usage(argv[0]); return 4;\r
- }\r
- break;\r
-\r
- case 'n':\r
- user_param.iters = strtol(optarg, NULL, 0);\r
- if (user_param.iters < 2) {\r
- usage(argv[0]);\r
- return 5;\r
- }\r
- break;\r
-\r
- case 'C':\r
- report.cycles = 1;\r
- break;\r
-\r
- case 'H':\r
- report.histogram = 1;\r
- break;\r
-\r
- case 'U':\r
- report.unsorted = 1;\r
- break;\r
-\r
- case 'h':\r
- if (optarg) {\r
- user_param.servername = _strdup(optarg);\r
- break;\r
- }\r
-\r
- default:\r
- usage(argv[0]);\r
- return 5;\r
- }\r
- }\r
-\r
- /*\r
- * Done with parameter parsing. Perform setup.\r
- */\r
- tstamp = malloc(user_param.iters * sizeof *tstamp);\r
- if (!tstamp) {\r
- perror("malloc");\r
- return 10;\r
- }\r
- printf("------------------------------------------------------------------\n");\r
- printf(" RDMA_Read Latency Test\n");\r
- printf("Connection type : RC\n");\r
- /* anyway make sure the connection is RC */\r
- tmp_size = size;\r
- if (user_param.all == ALL) {\r
- /*since we run all sizes */\r
- size = 8388608; /*2^23 */\r
- } else if (size < 128) {\r
- /* can cut up to 70 nsec probably related to cache line size */ \r
- size = 128;\r
- }\r
- user_param.connection_type = 0;\r
-\r
- ib_dev = pp_find_dev(ib_devname);\r
- if (!ib_dev)\r
- return 7;\r
-\r
- ctx = pp_init_ctx(ib_dev, size, user_param.tx_depth, ib_port,&user_param);\r
- if (!ctx)\r
- return 8;\r
-\r
- user_param.sockfd=pp_open_port(ctx, user_param.servername, ib_port, port, &rem_dest,&user_param);\r
- if (user_param.sockfd == INVALID_SOCKET) {\r
- return 9;\r
- }\r
- /* fix for true size in small msg size */\r
- if (tmp_size < 128) {\r
- size = tmp_size ;\r
- }\r
- if (user_param.use_event) {\r
- printf("Test with events.\n");\r
- if (ibv_req_notify_cq(ctx->cq, 0)) {\r
- fprintf(stderr, "Couldn't request RCQ notification\n");\r
- return 1;\r
- } \r
- }\r
- printf("------------------------------------------------------------------\n");\r
- printf(" #bytes #iterations t_min[usec] t_max[usec] t_typical[usec]\n");\r
- if (user_param.all == ALL) {\r
- for (i = 1; i < 24 ; ++i) {\r
- size = 1 << i;\r
- if(run_iter(ctx, &user_param, &rem_dest, size))\r
- return 17;\r
- if(user_param.servername) {\r
- print_report(&report, user_param.iters, tstamp, size);\r
- }\r
- }\r
- } else {\r
- if(run_iter(ctx, &user_param, &rem_dest, size))\r
- return 18;\r
- if(user_param.servername) {\r
- print_report(&report, user_param.iters, tstamp, size);\r
- }\r
- }\r
-\r
- /* done close sockets */\r
- if(user_param.servername) {\r
- /*Signal client is finished */ \r
- pp_client_exch_dest(user_param.sockfd, &my_dest, &rem_dest);\r
- if (send(user_param.sockfd, "done", sizeof "done", 0) != sizeof "done"){\r
- perror("client write");\r
- fprintf(stderr, "Couldn't write to socket\n");\r
- return 1;\r
- }\r
- closesocket(user_param.sockfd);\r
- } else {\r
- /*Server is finished wait for client */\r
- pp_server_exch_dest(user_param.sockfd, &my_dest, &rem_dest);\r
- if (send(user_param.sockfd, "done", sizeof "done", 0) != sizeof "done"){\r
- perror("server write");\r
- fprintf(stderr, "Couldn't write to socket\n");\r
- return 1;\r
- }\r
- closesocket(user_param.sockfd);\r
- }\r
- printf("------------------------------------------------------------------\n");\r
- free(tstamp);\r
- return 0;\r
-}\r
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2006 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2008-2009 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under the OpenIB.org BSD license
+ * below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <malloc.h>
+#include <getopt.h>
+#include <time.h>
+#include <infiniband/verbs.h>
+#include <windows.h>
+
+#include <ws2tcpip.h>
+#include <winsock2.h>
+
+
+#include "..\..\tools\perftests\user\get_clock.h"
+#include "..\..\etc\user\getopt.c"
+#include "perftest_resources.h"
+#include "l2w.h"
+
+#define PINGPONG_READ_WRID 1
+#define VERSION 1.3
+
+static uint8_t sl = 0;
+static int page_size;
+cycles_t *tstamp;
+
+struct report_options {
+ int unsorted;
+ int histogram;
+ int cycles; /* report delta's in cycles, not microsec's */
+};
+
+struct pingpong_context {
+ struct ibv_context *context;
+ struct ibv_comp_channel *channel;
+ struct ibv_pd *pd;
+ struct ibv_mr *mr;
+ struct ibv_cq *cq;
+ struct ibv_qp *qp;
+ void *buf;
+ volatile char *post_buf;
+ volatile char *poll_buf;
+ int size;
+ int tx_depth;
+ struct ibv_sge list;
+ struct ibv_send_wr wr;
+};
+
+/*
+ *
+ */
+static int set_up_connection(struct pingpong_context *ctx,
+ struct perftest_parameters *user_parm,
+ struct pingpong_dest *my_dest) {
+
+ int use_i = user_parm->gid_index;
+ uint8_t port = user_parm->ib_port;
+
+ if (use_i != -1) {
+ if (ibv_query_gid(ctx->context,port,use_i,&my_dest->gid)) {
+ return -1;
+ }
+ }
+ my_dest->lid = ctx_get_local_lid(ctx->context,user_parm->ib_port);
+ my_dest->out_reads = ctx_set_out_reads(ctx->context,user_parm->out_reads);
+ my_dest->qpn = ctx->qp->qp_num;
+ my_dest->psn = rand() & 0xffffff;
+ my_dest->rkey = ctx->mr->rkey;
+ my_dest->vaddr = (uintptr_t)ctx->buf + ctx->size;
+
+ // We do not fail test upon lid in RDMAoE/Eth conf.
+ if (use_i < 0) {
+ if (!my_dest->lid) {
+ fprintf(stderr,"Local lid 0x0 detected. Is an SM running? \n");
+ fprintf(stderr,"If you're running RMDAoE you must use GIDs\n");
+ return -1;
+ }
+ }
+ return 0;
+}
+
+/*
+ *
+ */
+static int init_connection(struct perftest_parameters *params,
+ struct pingpong_dest *my_dest,
+ const char *servername) {
+
+ params->machine = servername ? CLIENT : SERVER;
+ params->side = LOCAL;
+ ctx_print_pingpong_data(my_dest,params);
+
+ if (servername)
+ params->sockfd = ctx_client_connect(servername,params->port);
+ else
+ params->sockfd = ctx_server_connect(params->port);
+
+ if (params->sockfd == INVALID_SOCKET) {
+ fprintf(stderr,"Unable to open file descriptor for socket connection");
+ return 1;
+ }
+ return 0;
+}
+
+static struct ibv_device *pp_find_dev(const char *ib_devname) {
+ struct ibv_device **dev_list;
+ struct ibv_device *ib_dev = NULL;
+
+ dev_list = ibv_get_device_list(NULL);
+
+ if (!ib_devname) {
+ ib_dev = dev_list[0];
+ if (!ib_dev)
+ fprintf(stderr, "No IB devices found\n");
+ } else {
+ for (; (ib_dev = *dev_list); ++dev_list)
+ if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
+ break;
+ if (!ib_dev)
+ fprintf(stderr, "IB device %s not found\n", ib_devname);
+ }
+ return ib_dev;
+}
+
+
+static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev,int size,
+ struct perftest_parameters *user_parm) {
+
+ struct pingpong_context *ctx;
+ struct ibv_device_attr device_attr;
+ ctx = malloc(sizeof *ctx);
+ if (!ctx)
+ return NULL;
+
+ ctx->size = size;
+ ctx->tx_depth = user_parm->tx_depth;
+
+ posix_memalign(&(ctx->buf),page_size, size * 2);
+ if (!ctx->buf) {
+ fprintf(stderr, "Couldn't allocate work buf.\n");
+ return NULL;
+ }
+
+ memset(ctx->buf, 0, size * 2);
+
+ ctx->post_buf = (char*)ctx->buf + (size - 1);
+ ctx->poll_buf = (char*)ctx->buf + (2 * size - 1);
+
+ ctx->context = ibv_open_device(ib_dev);
+ if (!ctx->context) {
+ fprintf(stderr, "Couldn't get context for %s\n",
+ ibv_get_device_name(ib_dev));
+ return NULL;
+ }
+
+ // Finds the link type and configure the HCA accordingly.
+ if (ctx_set_link_layer(ctx->context,user_parm)) {
+ fprintf(stderr, "Couldn't set the link layer\n");
+ return NULL;
+ }
+
+ if (user_parm->mtu == 0) {/*user did not ask for specific mtu */
+ if (ibv_query_device(ctx->context, &device_attr)) {
+ fprintf(stderr, "Failed to query device props");
+ return NULL;
+ }
+ if (device_attr.vendor_part_id == 23108 || user_parm->gid_index > -1) {
+ user_parm->mtu = 1024;
+ } else {
+ user_parm->mtu = 2048;
+ }
+ }
+ if (user_parm->use_event) {
+ ctx->channel = ibv_create_comp_channel(ctx->context);
+ if (!ctx->channel) {
+ fprintf(stderr, "Couldn't create completion channel\n");
+ return NULL;
+ }
+ } else
+ ctx->channel = NULL;
+
+ ctx->pd = ibv_alloc_pd(ctx->context);
+ if (!ctx->pd) {
+ fprintf(stderr, "Couldn't allocate PD\n");
+ return NULL;
+ }
+
+ ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size * 2,
+ IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ);
+ if (!ctx->mr) {
+ fprintf(stderr, "Couldn't allocate MR\n");
+ return NULL;
+ }
+
+ ctx->cq = ibv_create_cq(ctx->context,ctx->tx_depth, NULL,ctx->channel,0);
+ if (!ctx->cq) {
+ fprintf(stderr, "Couldn't create CQ\n");
+ return NULL;
+ }
+
+ {
+ struct ibv_qp_init_attr attr;
+ memset(&attr, 0, sizeof(struct ibv_qp_init_attr));
+ attr.send_cq = ctx->cq;
+ attr.recv_cq = ctx->cq;
+ attr.cap.max_send_wr = ctx->tx_depth;
+ /* Work around: driver doesnt support
+ * recv_wr = 0 */
+ attr.cap.max_recv_wr = 1;
+ attr.cap.max_send_sge = 1;
+ attr.cap.max_recv_sge = 1;
+ if (user_parm->connection_type==1) {
+ attr.qp_type = IBV_QPT_UC;
+ } else {
+ attr.qp_type = IBV_QPT_RC;
+ }
+ ctx->qp = ibv_create_qp(ctx->pd, &attr);
+ if (!ctx->qp) {
+ fprintf(stderr, "Couldn't create QP\n");
+ return NULL;
+ }
+ }
+
+ {
+ struct ibv_qp_attr attr;
+ attr.qp_state = IBV_QPS_INIT;
+ attr.pkey_index = 0;
+ attr.port_num = user_parm->ib_port;
+ attr.qp_access_flags = IBV_ACCESS_REMOTE_READ;
+
+ if (ibv_modify_qp(ctx->qp, &attr,
+ IBV_QP_STATE |
+ IBV_QP_PKEY_INDEX |
+ IBV_QP_PORT |
+ IBV_QP_ACCESS_FLAGS)) {
+ fprintf(stderr, "Failed to modify QP to INIT\n");
+ return NULL;
+ }
+ }
+
+ ctx->wr.wr_id = PINGPONG_READ_WRID;
+ ctx->wr.sg_list = &ctx->list;
+ ctx->wr.num_sge = 1;
+ ctx->wr.opcode = IBV_WR_RDMA_READ;
+ ctx->wr.send_flags = IBV_SEND_SIGNALED;
+ ctx->wr.next = NULL;
+
+ return ctx;
+}
+
+static int pp_connect_ctx(struct pingpong_context *ctx,int my_psn,
+ struct pingpong_dest *dest,int my_reads,
+ struct perftest_parameters *user_parm)
+{
+ struct ibv_qp_attr attr;
+ memset(&attr, 0, sizeof(struct ibv_qp_attr));
+ attr.qp_state = IBV_QPS_RTR;
+ switch (user_parm->mtu) {
+ case 256 :
+ attr.path_mtu = IBV_MTU_256;
+ break;
+ case 512 :
+ attr.path_mtu = IBV_MTU_512;
+ break;
+ case 1024 :
+ attr.path_mtu = IBV_MTU_1024;
+ break;
+ case 2048 :
+ attr.path_mtu = IBV_MTU_2048;
+ break;
+ case 4096 :
+ attr.path_mtu = IBV_MTU_4096;
+ break;
+ }
+ printf("Mtu : %d\n", user_parm->mtu);
+ attr.dest_qp_num = dest->qpn;
+ attr.rq_psn = dest->psn;
+ attr.ah_attr.dlid = dest->lid;
+ attr.max_dest_rd_atomic = (uint8_t)(dest->out_reads);
+ attr.min_rnr_timer = 12;
+ if (user_parm->gid_index < 0) {
+ attr.ah_attr.is_global = 0;
+ attr.ah_attr.sl = sl;
+ } else {
+ attr.ah_attr.is_global = 1;
+ attr.ah_attr.grh.dgid = dest->gid;
+ attr.ah_attr.grh.sgid_index = (uint8_t)(user_parm->gid_index);
+ attr.ah_attr.grh.hop_limit = 1;
+ attr.ah_attr.sl = 0;
+ }
+ attr.ah_attr.src_path_bits = 0;
+ attr.ah_attr.port_num = user_parm->ib_port;
+ if (ibv_modify_qp(ctx->qp, &attr,
+ IBV_QP_STATE |
+ IBV_QP_AV |
+ IBV_QP_PATH_MTU |
+ IBV_QP_DEST_QPN |
+ IBV_QP_RQ_PSN |
+ IBV_QP_MIN_RNR_TIMER |
+ IBV_QP_MAX_DEST_RD_ATOMIC)) {
+ fprintf(stderr, "Failed to modify RC QP to RTR\n");
+ return 1;
+ }
+ attr.timeout = user_parm->qp_timeout;
+ attr.retry_cnt = 7;
+ attr.rnr_retry = 7;
+ attr.qp_state = IBV_QPS_RTS;
+ attr.max_rd_atomic = (uint8_t)my_reads;
+ attr.sq_psn = my_psn;
+ attr.max_rd_atomic = (uint8_t)my_reads;
+
+ if (ibv_modify_qp(ctx->qp, &attr,
+ IBV_QP_STATE |
+ IBV_QP_SQ_PSN |
+ IBV_QP_TIMEOUT |
+ IBV_QP_RETRY_CNT |
+ IBV_QP_RNR_RETRY |
+ IBV_QP_MAX_QP_RD_ATOMIC)) {
+ fprintf(stderr, "Failed to modify RC QP to RTS\n");
+ return 1;
+ }
+ return 0;
+}
+
+
+static void usage(const char *argv0)
+{
+ printf("Usage:\n");
+ printf(" %s start a server and wait for connection\n", argv0);
+ printf(" %s <host> connect to server at <host>\n", argv0);
+ printf("\n");
+ printf("Options:\n");
+ printf(" -p, --port=<port> listen on/connect to port <port> (default 18515)\n");
+ printf(" -m, --mtu=<mtu> mtu size (256 - 4096. default for hermon is 2048)\n");
+ printf(" -d, --ib-dev=<dev> use IB device <dev> (default first device found)\n");
+ printf(" -i, --ib-port=<port> use port <port> of IB device (default 1)\n");
+ printf(" -s, --size=<size> size of message to exchange (default 1)\n");
+ printf(" -t, --tx-depth=<dep> size of tx queue (default 50)\n");
+ printf(" -n, --iters=<iters> number of exchanges (at least 2, default 1000)\n");
+ printf(" -o, --outs=<num> num of outstanding read/atom(default 4)\n");
+ printf(" -u, --qp-timeout=<timeout> QP timeout, timeout value is 4 usec * 2 ^(timeout), default 14\n");
+ printf(" -S, --sl=<sl> SL (default 0)\n");
+ printf(" -x, --gid-index=<index> test uses GID with GID index taken from command line (for RDMAoE index should be 0)\n");
+ printf(" -a, --all Run sizes from 2 till 2^23\n");
+ printf(" -C, --report-cycles report times in cpu cycle units (default microseconds)\n");
+ printf(" -H, --report-histogram print out all results (default print summary only)\n");
+ printf(" -U, --report-unsorted (implies -H) print out unsorted results (default sorted)\n");
+ printf(" -V, --version display version number\n");
+ printf(" -e, --events sleep on CQ events (default poll)\n");
+ printf(" -F, --CPU-freq do not fail test on different cpu frequencies\n");
+}
+
+
+static void print_report(struct report_options * options,
+ unsigned int iters, cycles_t *tstamp,int size, int no_cpu_freq_fail)
+{
+ double cycles_to_units;
+ cycles_t median;
+ unsigned int i;
+ const char* units;
+ cycles_t *delta = malloc((iters - 1) * sizeof *delta);
+
+ if (!delta) {
+ perror("malloc");
+ return;
+ }
+
+ for (i = 0; i < iters - 1; ++i)
+ delta[i] = tstamp[i + 1] - tstamp[i];
+
+
+ if (options->cycles) {
+ cycles_to_units = 1;
+ units = "cycles";
+ } else {
+ cycles_to_units = get_cpu_mhz()/1000000;
+ units = "usec";
+ }
+
+ if (options->unsorted) {
+ printf("#, %s\n", units);
+ for (i = 0; i < iters - 1; ++i)
+ printf("%d, %g\n", i + 1, delta[i] / cycles_to_units );
+ }
+
+ qsort(delta, iters - 1, sizeof *delta, cycles_compare);
+
+ if (options->histogram) {
+ printf("#, %s\n", units);
+ for (i = 0; i < iters - 1; ++i)
+ printf("%d, %g\n", i + 1, delta[i] / cycles_to_units );
+ }
+
+ median = get_median(iters - 1, delta);
+ printf("%7d %d %7.2f %7.2f %7.2f\n",
+ size,iters,delta[0] / cycles_to_units ,
+ delta[iters - 2] / cycles_to_units ,median / cycles_to_units );
+
+ free(delta);
+}
+
+int run_iter(struct pingpong_context *ctx, struct perftest_parameters *user_param,
+ struct pingpong_dest *rem_dest, int size)
+{
+ struct ibv_qp *qp;
+ struct ibv_send_wr *wr;
+ volatile char *poll_buf;
+ volatile char *post_buf;
+
+ int scnt, ccnt;
+ int tx_depth;
+
+ struct ibv_wc wc;
+ int ne;
+
+ if (user_param->machine == SERVER)
+ return 0;
+
+ tx_depth = user_param->tx_depth;
+ wr = &ctx->wr;
+ ctx->list.addr = (uintptr_t) ctx->buf;
+ ctx->list.length = size;
+ ctx->list.lkey = ctx->mr->lkey;
+ wr->wr.rdma.remote_addr = rem_dest->vaddr;
+ wr->wr.rdma.rkey = rem_dest->rkey;
+ scnt = 0;
+ ccnt = 0;
+ poll_buf = ctx->poll_buf;
+ post_buf = ctx->post_buf;
+ qp = ctx->qp;
+
+ /* Done with setup. Start the test. */
+
+ while (scnt < user_param->iters ) {
+ struct ibv_send_wr *bad_wr;
+ *post_buf = (char)++scnt;
+ tstamp[scnt - 1] = get_cycles();
+ if (ibv_post_send(qp, wr, &bad_wr)) {
+ fprintf(stderr, "Couldn't post send: scnt=%d\n",
+ scnt);
+ return 11;
+ }
+ if (user_param->use_event) {
+ struct ibv_cq *ev_cq;
+ void *ev_ctx;
+
+ if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {
+ fprintf(stderr, "Failed to get cq_event\n");
+ return 1;
+ }
+
+ if (ev_cq != ctx->cq) {
+ fprintf(stderr, "CQ event for unknown RCQ %p\n", ev_cq);
+ return 1;
+ }
+
+ if (ibv_req_notify_cq(ctx->cq, 0)) {
+ fprintf(stderr, "Couldn't request CQ notification\n");
+ return 1;
+ }
+ }
+ do {
+ ne = ibv_poll_cq(ctx->cq, 1, &wc);
+ } while (!user_param->use_event && ne < 1);
+
+ if (ne < 0) {
+ fprintf(stderr, "poll CQ failed %d\n", ne);
+ return 12;
+ }
+ if (wc.status != IBV_WC_SUCCESS) {
+ fprintf(stderr, "Completion wth error at %s:\n",
+ user_param->machine == CLIENT ? "client" : "server");
+ fprintf(stderr, "Failed status %d: wr_id %d\n",
+ wc.status, (int) wc.wr_id);
+ fprintf(stderr, "scnt=%d, ccnt=%d\n",
+ scnt, ccnt);
+ return 13;
+ }
+ }
+ return 0;
+}
+
+
+int __cdecl main(int argc, char *argv[]) {
+
+ const char *ib_devname = NULL;
+ int size = 2;
+ int tmp_size;
+ int i = 0;
+ struct report_options report;
+ struct pingpong_context *ctx;
+ struct ibv_device *ib_dev;
+ struct perftest_parameters user_param;
+ int no_cpu_freq_fail = 0;
+ struct pingpong_dest my_dest,rem_dest;
+
+ int all = 0;
+ const char *servername = NULL;
+
+ SYSTEM_INFO si;
+ GetSystemInfo(&si);
+
+ /* init default values to user's parameters */
+ memset(&user_param,0,sizeof(struct perftest_parameters));
+ user_param.mtu = 0;
+ user_param.ib_port = 1;
+ user_param.port = 18515;
+ user_param.iters = 1000;
+ user_param.tx_depth = 50;
+ user_param.use_event = 0;
+ user_param.qp_timeout = 14;
+ user_param.gid_index = -1; /* gid will not be used*/
+
+ report.cycles = 0;
+ report.histogram = 0;
+ report.unsorted = 0;
+
+ /* Parameter parsing. */
+ while (1) {
+ int c;
+
+ static struct option long_options[] = {
+ { "port", 1, NULL, 'p' },
+ { "mtu", 1, NULL, 'm' },
+ { "ib-dev", 1, NULL, 'd' },
+ { "ib-port", 1, NULL, 'i' },
+ { "size", 1, NULL, 's' },
+ { "iters", 1, NULL, 'n' },
+ { "outs", 1, NULL, 'o' },
+ { "tx-depth", 1, NULL, 't' },
+ { "qp-timeout", 1, NULL, 'u' },
+ { "sl", 1, NULL, 'S' },
+ { "gid-index", 1, NULL, 'x' },
+ { "all", 0, NULL, 'a' },
+ { "report-cycles", 0, NULL, 'C' },
+ { "report-histogram", 0, NULL, 'H' },
+ { "report-unsorted", 0, NULL, 'U' },
+ { "version", 0, NULL, 'V' },
+ { "events", 0, NULL, 'e' },
+ { "CPU-freq", 0, NULL, 'F' },
+ { 0 }
+ };
+
+ c = getopt_long(argc, argv, "p:c:m:d:i:s:o:n:t:u:S:x:aeHUVF", long_options, NULL);
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'p':
+ user_param.port = strtol(optarg, NULL, 0);
+ if (user_param.port < 0 || user_param.port > 65535) {
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+ case 'e':
+ ++user_param.use_event;
+ break;
+
+ case 'm':
+ user_param.mtu = strtol(optarg, NULL, 0);
+ break;
+ case 'o':
+ user_param.out_reads = strtol(optarg, NULL, 0);
+ break;
+ case 'a':
+ all = ALL;
+ break;
+ case 'V':
+ printf("perftest version : %.2f\n",VERSION);
+ return 0;
+ break;
+ case 'd':
+ ib_devname = _strdup(optarg);
+ break;
+
+ case 'i':
+ user_param.ib_port = (uint8_t)(strtol(optarg, NULL, 0));
+ if (user_param.ib_port < 0) {
+ usage(argv[0]);
+ return 2;
+ }
+ break;
+
+ case 's':
+ size = strtol(optarg, NULL, 0);
+ if (size < 1) {
+ usage(argv[0]); return 3;
+ }
+ break;
+
+ case 't':
+ user_param.tx_depth = strtol(optarg, NULL, 0);
+ if (user_param.tx_depth < 1) {
+ usage(argv[0]); return 4;
+ }
+ break;
+
+ case 'n':
+ user_param.iters = strtol(optarg, NULL, 0);
+ if (user_param.iters < 2) {
+ usage(argv[0]);
+ return 5;
+ }
+
+ break;
+
+ case 'C':
+ report.cycles = 1;
+ break;
+
+ case 'H':
+ report.histogram = 1;
+ break;
+
+ case 'U':
+ report.unsorted = 1;
+ break;
+
+ case 'F':
+ no_cpu_freq_fail = 1;
+ break;
+
+ case 'u':
+ user_param.qp_timeout = (uint8_t)(strtol(optarg, NULL, 0));
+ break;
+
+ case 'S':
+ sl = (uint8_t)(strtol(optarg, NULL, 0));
+ if (sl > 15) { usage(argv[0]); return 5; }
+ break;
+
+ case 'x':
+ user_param.gid_index = strtol(optarg, NULL, 0);
+ if (user_param.gid_index > 63) {
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+
+ default:
+ usage(argv[0]);
+ return 6;
+ }
+ }
+
+ if (optind == argc - 1)
+ servername = _strdup(argv[optind]);
+ else if (optind < argc) {
+ usage(argv[0]);
+ return 6;
+ }
+
+ /*
+ * Done with parameter parsing. Perform setup.
+ */
+ tstamp = malloc(user_param.iters * sizeof *tstamp);
+ if (!tstamp) {
+ perror("malloc");
+ return 10;
+ }
+ printf("------------------------------------------------------------------\n");
+ printf(" RDMA_Read Latency Test\n");
+ printf(" Connection type : RC\n");
+
+ tmp_size = size;
+ if (all == ALL) {
+ /*since we run all sizes */
+ size = 8388608; /*2^23 */
+ } else if (size < 128) {
+ /* can cut up to 70 nsec probably related to cache line size */
+ size = 128;
+ }
+
+
+ page_size = si.dwPageSize;
+
+ ib_dev = pp_find_dev(ib_devname);
+ if (!ib_dev)
+ return 7;
+
+ ctx = pp_init_ctx(ib_dev,size,&user_param);
+ if (!ctx)
+ return 8;
+
+ // Set up the Connection.
+ if (set_up_connection(ctx,&user_param,&my_dest)) {
+ fprintf(stderr," Unable to set up socket connection\n");
+ return 1;
+ }
+
+ // Init the connection and print the local data.
+ if (init_connection(&user_param,&my_dest,servername)) {
+ fprintf(stderr," Unable to init the socket connection\n");
+ return 1;
+ }
+
+ // shaking hands and gather the other side info.
+ if (ctx_hand_shake(&user_param,&my_dest,&rem_dest)) {
+ fprintf(stderr,"Failed to exchange date between server and clients\n");
+ return 1;
+
+ }
+ user_param.side = REMOTE;
+ ctx_print_pingpong_data(&rem_dest,&user_param);
+
+ if (pp_connect_ctx(ctx,my_dest.psn,&rem_dest,my_dest.out_reads,&user_param)) {
+ fprintf(stderr," Unable to Connect the HCA's through the link\n");
+ return 1;
+ }
+
+ // shaking hands and gather the other side info.
+ if (ctx_hand_shake(&user_param,&my_dest,&rem_dest)) {
+ fprintf(stderr,"Failed to exchange date between server and clients\n");
+ return 1;
+
+ }
+
+ /* fix for true size in small msg size */
+ if (tmp_size < 128) {
+ size = tmp_size;
+ }
+ if (user_param.use_event) {
+ printf("Test with events.\n");
+ if (ibv_req_notify_cq(ctx->cq, 0)) {
+ fprintf(stderr, "Couldn't request RCQ notification\n");
+ return 1;
+ }
+ }
+ printf("------------------------------------------------------------------\n");
+ printf(" #bytes #iterations t_min[usec] t_max[usec] t_typical[usec]\n");
+ if (all == ALL) {
+ for (i = 1; i < 24 ; ++i) {
+ size = 1 << i;
+ if(run_iter(ctx, &user_param, &rem_dest, size))
+ return 17;
+ if(user_param.machine == CLIENT) {
+ print_report(&report,user_param.iters, tstamp, size, no_cpu_freq_fail);
+ }
+ }
+ } else {
+ if(run_iter(ctx, &user_param, &rem_dest, size))
+ return 18;
+ if(user_param.machine == CLIENT) {
+ print_report(&report, user_param.iters, tstamp, size, no_cpu_freq_fail);
+ }
+ }
+
+ if (ctx_close_connection(&user_param,&my_dest,&rem_dest)) {
+ fprintf(stderr,"Failed to close connection between server and client\n");
+ return 1;
+ }
+
+ printf("------------------------------------------------------------------\n");
+ free(tstamp);
+ return 0;
+}
USE_NATIVE_EH = 1\r
USE_IOSTREAM = 1\r
\r
-SOURCES = send_bw.rc send_bw.c ..\perftest.c\r
+SOURCES = send_bw.rc send_bw.c ..\perftest_resources.c ..\perftest.c ..\multicast_resources.c\r
\r
-INCLUDES = ..;..\..\..\ulp\libibverbs\include;\\r
- ..\..\..\inc;..\..\..\inc\user;\\r
- ..\..\..\inc\user\linux;\r
-\r
+INCLUDES = ..;..\..\..\ulp\libibverbs\include;..\..\..\ulp\libibumad\include;\\r
+ ..\..\..\inc;..\..\..\inc\user;..\..\..\inc\user\linux;..\..\..\inc\complib;\\r
+ ..\..\..\hw\mlx4\user\hca;\r
TARGETLIBS = \\r
$(SDK_LIB_PATH)\kernel32.lib \\r
$(SDK_LIB_PATH)\advapi32.lib \\r
$(SDK_LIB_PATH)\user32.lib \\r
$(SDK_LIB_PATH)\ole32.lib \\r
$(SDK_LIB_PATH)\ws2_32.lib \\r
+ $(SDK_LIB_PATH)\uuid.lib \\r
!if $(FREEBUILD)\r
- $(TARGETPATH)\*\libibverbs.lib\r
+ $(TARGETPATH)\*\libibverbs.lib \\r
+ $(TARGETPATH)\*\libibumad.lib \\r
+ $(TARGETPATH)\*\complib.lib \\r
!else\r
- $(TARGETPATH)\*\libibverbsd.lib\r
-!endif\r
-\r
+ $(TARGETPATH)\*\libibverbsd.lib \\r
+ $(TARGETPATH)\*\libibumadd.lib \\r
+ $(TARGETPATH)\*\complibd.lib \\r
+!endif
\ No newline at end of file
/*\r
* Copyright (c) 2005 Topspin Communications. All rights reserved.\r
- * Copyright (c) 2008 Intel Corporation. All rights reserved.\r
+ * Copyright (c) 2006 Mellanox Technologies Ltd. All rights reserved.\r
+ * Copyright (c) 2008-2009 Intel Corporation. All rights reserved.\r
*\r
* This software is available to you under the OpenIB.org BSD license\r
* below:\r
#include <stdio.h>\r
#include <stdlib.h>\r
#include <string.h>\r
+#include <limits.h>\r
+#include <malloc.h>\r
+#include <getopt.h>\r
+#include <time.h>\r
+#include <errno.h>\r
+#include <infiniband/verbs.h>\r
+#include <windows.h>\r
+\r
#include <ws2tcpip.h>\r
#include <winsock2.h>\r
-#include <time.h>\r
\r
-#include "..\..\..\etc\user\getopt.c"\r
-#include "perftest.h"\r
-#include <infiniband/verbs.h>\r
\r
-#define PINGPONG_SEND_WRID 1\r
-#define PINGPONG_RECV_WRID 2\r
-#define RC 0\r
-#define UC 1\r
-#define UD 3\r
-#define VERSION 1.1\r
+#include "..\..\tools\perftests\user\get_clock.h"\r
+#include "..\..\etc\user\getopt.c"\r
+#include "multicast_resources.h"\r
+#include "perftest_resources.h"\r
+#include "l2w.h"\r
+\r
+#define VERSION 1.3\r
#define SIGNAL 1\r
#define MAX_INLINE 400\r
-#define ALL 1\r
-#define MCG_LID 0xc001\r
-#define MCG_GID {255,1,0,0,0,2,201,133,0,0,0,0,0,0,0,0}\r
-\r
-struct user_parameters {\r
- const char *servername;\r
- int connection_type;\r
- int mtu;\r
- int all; /* run all msg size */\r
- int signal_comp;\r
- int iters;\r
- int tx_depth;\r
- int rx_depth;\r
- int duplex;\r
- int use_event;\r
- int use_mcg;\r
- int inline_size;\r
-};\r
\r
-typedef UINT64 cycles_t;\r
-cycles_t *tposted;\r
-cycles_t *tcompleted;\r
-int post_recv;\r
+static uint8_t sl;\r
+static int page_size;\r
+cycles_t *tposted;\r
+cycles_t *tcompleted;\r
\r
struct pingpong_context {\r
- struct ibv_context *context;\r
+ struct ibv_context *context;\r
struct ibv_comp_channel *channel;\r
- struct ibv_pd *pd;\r
- struct ibv_mr *mr;\r
- struct ibv_cq *cq;\r
- struct ibv_qp *qp;\r
- void *buf;\r
- unsigned size;\r
- int tx_depth;\r
- int rx_depth;\r
- struct ibv_sge list;\r
- struct ibv_sge recv_list;\r
- struct ibv_send_wr wr;\r
- struct ibv_recv_wr rwr;\r
- struct ibv_ah *ah;\r
+ struct ibv_pd *pd;\r
+ struct ibv_mr *mr;\r
+ struct ibv_cq *cq;\r
+ struct ibv_qp **qp;\r
+ struct ibv_sge list;\r
+ struct ibv_send_wr wr;\r
+ struct ibv_sge *sge_list;\r
+ struct ibv_recv_wr *rwr;\r
+ struct ibv_ah *ah;\r
+ void *buf;\r
+ unsigned size;\r
+ uint64_t *my_addr;\r
};\r
\r
-struct pingpong_dest {\r
- int lid;\r
- int qpn;\r
- int psn;\r
- unsigned rkey;\r
- unsigned long long vaddr;\r
-};\r
+/****************************************************************************** \r
+ *\r
+ ******************************************************************************/\r
+static int set_mcast_group(struct pingpong_context *ctx,\r
+ struct perftest_parameters *user_parm,\r
+ struct mcast_parameters *mcg_params) {\r
\r
-static uint16_t pp_get_local_lid(struct pingpong_context *ctx, int port)\r
-{\r
- struct ibv_port_attr attr;\r
+ int i = (user_parm->duplex) ? 1 : 0;\r
+ struct ibv_port_attr port_attr;\r
+\r
+ if (ibv_query_gid(ctx->context,user_parm->ib_port,user_parm->gid_index,&mcg_params->port_gid)) {\r
+ return 1;\r
+ }\r
+ \r
+ if (ibv_query_pkey(ctx->context,user_parm->ib_port,DEF_PKEY_IDX,&mcg_params->pkey)) {\r
+ return 1;\r
+ }\r
\r
- if (ibv_query_port(ctx->context, (uint8_t) port, &attr))\r
- return 0;\r
+ if (ibv_query_port(ctx->context,user_parm->ib_port,&port_attr)) {\r
+ return 1;\r
+ }\r
+ mcg_params->sm_lid = port_attr.sm_lid;\r
+ mcg_params->sm_sl = port_attr.sm_sl;\r
+ mcg_params->ib_port = user_parm->ib_port;\r
+ set_multicast_gid(mcg_params);\r
+\r
+ // Request for Mcast group create registery in SM.\r
+ if (join_multicast_group(SUBN_ADM_METHOD_SET,mcg_params)) {\r
+ fprintf(stderr,"Couldn't Register the Mcast group on the SM\n");\r
+ return 1;\r
+ }\r
+\r
+ while (i < user_parm->num_of_qps) {\r
+\r
+ if (ibv_attach_mcast(ctx->qp[i],&mcg_params->mgid,mcg_params->mlid)) {\r
+ fprintf(stderr, "Couldn't attach QP to MultiCast group");\r
+ return 1;\r
+ }\r
+ i++;\r
+ }\r
+ mcg_params->mcast_state |= MCAST_IS_ATTACHED;\r
\r
- return attr.lid;\r
+ return 0;\r
}\r
\r
-static struct pingpong_dest * pp_client_exch_dest(SOCKET sockfd,\r
- const struct pingpong_dest *my_dest)\r
-{\r
- struct pingpong_dest *rem_dest = NULL;\r
- char msg[sizeof "0000:000000:000000:00000000:0000000000000000"];\r
- int parsed;\r
-\r
- sprintf(msg, "%04x:%06x:%06x:%08x:%016Lx", my_dest->lid, my_dest->qpn,\r
- my_dest->psn,my_dest->rkey,my_dest->vaddr);\r
- if (send(sockfd, msg, sizeof msg, 0) != sizeof msg) {\r
- perror("client send");\r
- fprintf(stderr, "Couldn't send local address\n");\r
- goto out;\r
+/****************************************************************************** \r
+ *\r
+ ******************************************************************************/\r
+static int destroy_mcast_group(struct pingpong_context *ctx,\r
+ struct perftest_parameters *user_parm,\r
+ struct mcast_parameters *mcg_params) {\r
+\r
+ int i = (user_parm->duplex) ? 1 : 0;\r
+\r
+ while (i < user_parm->num_of_qps) {\r
+\r
+ if (ibv_detach_mcast(ctx->qp[i],&mcg_params->mgid,mcg_params->mlid)) {\r
+ fprintf(stderr, "Couldn't deattach QP from MultiCast group\n");\r
+ return 1;\r
+ }\r
+ i++;\r
}\r
\r
- if (recv(sockfd, msg, sizeof msg, 0) != sizeof msg) {\r
- perror("client recv");\r
- fprintf(stderr, "Couldn't recv remote address\n");\r
- goto out;\r
+ // Removal Request for Mcast group in SM.\r
+ if (join_multicast_group(SUBN_ADM_METHOD_DELETE,mcg_params)) {\r
+ fprintf(stderr,"Couldn't Unregister the Mcast group on the SM\n");\r
+ return 1;\r
}\r
\r
- rem_dest = malloc(sizeof *rem_dest);\r
- if (!rem_dest)\r
- goto out;\r
+ mcg_params->mcast_state &= ~MCAST_IS_ATTACHED;\r
\r
- memset(rem_dest, 0, sizeof *rem_dest);\r
- parsed = sscanf(msg, "%x:%x:%x:%x:%Lx", &rem_dest->lid, &rem_dest->qpn,\r
- &rem_dest->psn,&rem_dest->rkey,&rem_dest->vaddr);\r
+ return 0;\r
+}\r
+\r
+/****************************************************************************** \r
+ *\r
+ ******************************************************************************/\r
+static int set_up_connection(struct pingpong_context *ctx,\r
+ struct perftest_parameters *user_parm,\r
+ struct pingpong_dest *my_dest,\r
+ struct mcast_parameters *mcg_params) {\r
+\r
+ \r
+\r
+ if (user_parm->use_mcg && (user_parm->duplex || user_parm->machine == SERVER)) {\r
\r
- if (parsed != 5) {\r
- fprintf(stderr, "Couldn't parse line <%.*s>\n",(int)sizeof msg,\r
- msg);\r
- free(rem_dest);\r
- rem_dest = NULL;\r
- goto out;\r
+ if (set_mcast_group(ctx,user_parm,mcg_params)) {\r
+ return 1;\r
+ }\r
+\r
+ my_dest->gid = mcg_params->mgid;\r
+ my_dest->lid = mcg_params->mlid;\r
+ my_dest->qpn = QPNUM_MCAST;\r
+\r
+ } else {\r
+ if (user_parm->gid_index != -1) {\r
+ if (ibv_query_gid(ctx->context,user_parm->ib_port,user_parm->gid_index,&my_dest->gid)) {\r
+ return -1;\r
+ }\r
+ }\r
+ my_dest->lid = ctx_get_local_lid(ctx->context,user_parm->ib_port);\r
+ my_dest->qpn = ctx->qp[0]->qp_num;\r
+ }\r
+ my_dest->psn = rand() & 0xffffff;\r
+\r
+ // We do not fail test upon lid above RoCE.\r
+\r
+ if (user_parm->gid_index < 0) {\r
+ if (!my_dest->lid) {\r
+ fprintf(stderr,"Local lid 0x0 detected,without any use of gid. Is SM running?\n");\r
+ return -1;\r
+ }\r
}\r
-out:\r
- return rem_dest;\r
+ return 0;\r
}\r
\r
-static struct pingpong_dest *pp_server_exch_dest(SOCKET connfd, const struct pingpong_dest *my_dest)\r
-{\r
- char msg[sizeof "0000:000000:000000:00000000:0000000000000000"];\r
- struct pingpong_dest *rem_dest = NULL;\r
- int parsed;\r
- int n;\r
-\r
- n = recv(connfd, msg, sizeof msg, 0);\r
- if (n != sizeof msg) {\r
- perror("server recv");\r
- fprintf(stderr, "%d/%d: Couldn't recv remote address\n", n, (int) sizeof msg);\r
- goto out;\r
+/****************************************************************************** \r
+ *\r
+ ******************************************************************************/\r
+static int init_connection(struct perftest_parameters *params,\r
+ struct pingpong_dest *my_dest,\r
+ const char *servername) {\r
+\r
+ params->side = LOCAL;\r
+ ctx_print_pingpong_data(my_dest,params);\r
+ \r
+ if (params->machine == CLIENT) \r
+ params->sockfd = ctx_client_connect(servername,params->port);\r
+ else \r
+ params->sockfd = ctx_server_connect(params->port);\r
+\r
+ if (params->sockfd == INVALID_SOCKET) {\r
+ fprintf(stderr,"Unable to open file descriptor for socket connection");\r
+ return 1;\r
+ }\r
+ return 0;\r
+}\r
+\r
+/****************************************************************************** \r
+ *\r
+ ******************************************************************************/\r
+static int destroy_ctx_resources(struct pingpong_context *ctx, \r
+ struct perftest_parameters *user_parm,\r
+ struct mcast_parameters *mcg_params) {\r
+\r
+ int i,test_result = 0;\r
+\r
+ if (user_parm->use_mcg && (user_parm->machine == SERVER || user_parm->duplex)) {\r
+ if (destroy_mcast_group(ctx,user_parm,mcg_params)) {\r
+ fprintf(stderr, "failed to destroy MultiCast resources\n");\r
+ test_result = 1;\r
+ }\r
}\r
\r
- rem_dest = malloc(sizeof *rem_dest);\r
- if (!rem_dest)\r
- goto out;\r
-\r
- memset(rem_dest, 0, sizeof *rem_dest);\r
- parsed = sscanf(msg, "%x:%x:%x:%x:%Lx", &rem_dest->lid, &rem_dest->qpn,\r
- &rem_dest->psn, &rem_dest->rkey, &rem_dest->vaddr);\r
- if (parsed != 5) {\r
- fprintf(stderr, "Couldn't parse line <%.*s>\n",(int)sizeof msg,\r
- msg);\r
- free(rem_dest);\r
- rem_dest = NULL;\r
- goto out;\r
+ if (ctx->ah) {\r
+ if (ibv_destroy_ah(ctx->ah)) {\r
+ fprintf(stderr, "failed to destroy AH\n");\r
+ test_result = 1;\r
+ }\r
+ }\r
+\r
+ for(i = 0; i < user_parm->num_of_qps; i++) {\r
+ if (ibv_destroy_qp(ctx->qp[i])) {\r
+ fprintf(stderr, "failed to destroy QP\n");\r
+ test_result = 1;\r
+ }\r
}\r
+ free(ctx->qp);\r
\r
- sprintf(msg, "%04x:%06x:%06x:%08x:%016Lx", my_dest->lid, my_dest->qpn,\r
- my_dest->psn, my_dest->rkey, my_dest->vaddr);\r
- if (send(connfd, msg, sizeof msg, 0) != sizeof msg) {\r
- perror("server send");\r
- fprintf(stderr, "Couldn't send local address\n");\r
- free(rem_dest);\r
- rem_dest = NULL;\r
- goto out;\r
+ if (ibv_destroy_cq(ctx->cq)) {\r
+ fprintf(stderr, "failed to destroy CQ\n");\r
+ test_result = 1;\r
}\r
-out:\r
- return rem_dest;\r
+ \r
+ if (ibv_dereg_mr(ctx->mr)) {\r
+ fprintf(stderr, "failed to deregister MR\n");\r
+ test_result = 1;\r
+ }\r
+ \r
+ if (ibv_dealloc_pd(ctx->pd)) {\r
+ fprintf(stderr, "failed to deallocate PD\n");\r
+ test_result = 1;\r
+ }\r
+\r
+ if (ctx->channel) {\r
+ if (ibv_destroy_comp_channel(ctx->channel)) {\r
+ fprintf(stderr, "failed to destroy channel \n");\r
+ test_result = 1;\r
+ }\r
+ }\r
+ \r
+ if (ibv_close_device(ctx->context)) {\r
+ fprintf(stderr, "failed to close device context\n");\r
+ test_result = 1;\r
+ }\r
+\r
+ if (user_parm->machine == SERVER || user_parm->duplex) {\r
+ free(ctx->rwr);\r
+ free(ctx->sge_list);\r
+ free(ctx->my_addr);\r
+ }\r
+\r
+ posix_memfree(ctx->buf);\r
+ free(ctx);\r
+ free(tposted);\r
+ free(tcompleted);\r
+ return test_result;\r
}\r
\r
-static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev,\r
- unsigned size,\r
- int tx_depth, int rx_depth, int port,\r
- struct user_parameters *user_parm)\r
-{\r
+/****************************************************************************** \r
+ *\r
+ ******************************************************************************/\r
+static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev,unsigned size,\r
+ struct perftest_parameters *user_parm) {\r
+\r
+ int i,m_size;\r
+ int duplex_uni_ind;\r
struct pingpong_context *ctx;\r
struct ibv_device_attr device_attr;\r
\r
- ctx = malloc(sizeof *ctx);\r
- if (!ctx)\r
- return NULL;\r
+ ALLOCATE(ctx,struct pingpong_context,1);\r
\r
+ ctx->ah = NULL;\r
+ ctx->channel = NULL;\r
ctx->size = size;\r
- ctx->tx_depth = tx_depth;\r
- ctx->rx_depth = rx_depth + tx_depth;\r
- /* in case of UD need space for the GRH */\r
- if (user_parm->connection_type==UD) {\r
- ctx->buf = malloc((size + 40) * 2);\r
- if (!ctx->buf) {\r
- fprintf(stderr, "Couldn't allocate work buf.\n");\r
- return NULL;\r
- }\r
- memset(ctx->buf, 0, ( size + 40 ) * 2);\r
- } else {\r
- ctx->buf = malloc(size * 2);\r
- if (!ctx->buf) {\r
- fprintf(stderr, "Couldn't allocate work buf.\n");\r
- return NULL;\r
- }\r
- memset(ctx->buf, 0, size * 2);\r
- }\r
\r
+ duplex_uni_ind = (user_parm->duplex && !user_parm->use_mcg) ? 2 : 1;\r
+ m_size = BUFF_SIZE(SIZE(user_parm->connection_type,size))*user_parm->num_of_qps*duplex_uni_ind;\r
+ \r
+ // Allocating the Buff size according to connection type and size.\r
+ posix_memalign(&(ctx->buf),page_size, m_size);\r
+ if (!ctx->buf) {\r
+ fprintf(stderr, "Couldn't allocate work buf.\n");\r
+ return NULL;\r
+ }\r
+ memset(ctx->buf, 0, m_size);\r
\r
ctx->context = ibv_open_device(ib_dev);\r
if (!ctx->context) {\r
ibv_get_device_name(ib_dev));\r
return NULL;\r
}\r
+\r
+ // Finds the link type and configure the HCA accordingly.\r
+ if (ctx_set_link_layer(ctx->context,user_parm)) {\r
+ fprintf(stderr, "Couldn't set the link layer\n");\r
+ return NULL;\r
+ }\r
+ \r
if (user_parm->mtu == 0) {/*user did not ask for specific mtu */\r
if (ibv_query_device(ctx->context, &device_attr)) {\r
fprintf(stderr, "Failed to query device props");\r
return NULL;\r
}\r
- if (device_attr.vendor_part_id == 23108) {\r
+ if (device_attr.vendor_part_id == 23108 || user_parm->gid_index != -1) {\r
user_parm->mtu = 1024;\r
} else {\r
user_parm->mtu = 2048;\r
}\r
}\r
+\r
if (user_parm->use_event) {\r
ctx->channel = ibv_create_comp_channel(ctx->context);\r
if (!ctx->channel) {\r
}\r
} else\r
ctx->channel = NULL; \r
+\r
ctx->pd = ibv_alloc_pd(ctx->context);\r
if (!ctx->pd) {\r
fprintf(stderr, "Couldn't allocate PD\n");\r
return NULL;\r
}\r
\r
- /* We dont really want IBV_ACCESS_LOCAL_WRITE, but IB spec says:\r
- * The Consumer is not allowed to assign Remote Write or Remote Atomic to\r
- * a Memory Region that has not been assigned Local Write. */\r
- if (user_parm->connection_type==UD) {\r
- ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, (size + 40 ) * 2,\r
- IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);\r
- if (!ctx->mr) {\r
- fprintf(stderr, "Couldn't allocate MR\n");\r
- return NULL;\r
- }\r
- } else {\r
- ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size * 2,\r
- IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);\r
- if (!ctx->mr) {\r
- fprintf(stderr, "Couldn't allocate MR\n");\r
- return NULL;\r
- }\r
+ // We dont really want IBV_ACCESS_LOCAL_WRITE, but IB spec says :\r
+ // The Consumer is not allowed to assign Remote Write or Remote Atomic to\r
+ // a Memory Region that has not been assigned Local Write. \r
+ ctx->mr = ibv_reg_mr(ctx->pd,ctx->buf,m_size,IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);\r
+ if (!ctx->mr) {\r
+ fprintf(stderr, "Couldn't allocate MR\n");\r
+ return NULL;\r
}\r
\r
- ctx->cq = ibv_create_cq(ctx->context, ctx->rx_depth, NULL, ctx->channel, 0);\r
- if (!ctx->cq) {\r
- fprintf(stderr, "Couldn't create CQ\n");\r
+ // Create the CQ according to Client/Server or Duplex setting.\r
+ ctx->cq = ctx_cq_create(ctx->context,ctx->channel,user_parm);\r
+ if (ctx->cq == NULL) {\r
+ fprintf(stderr, "Couldn't create CQ \n");\r
return NULL;\r
}\r
- {\r
- struct ibv_qp_init_attr attr;\r
- memset(&attr, 0, sizeof(struct ibv_qp_init_attr));\r
- attr.send_cq = ctx->cq;\r
- attr.recv_cq = ctx->cq; \r
- attr.cap.max_send_wr = tx_depth;\r
- /* Work around: driver doesnt support\r
- * recv_wr = 0 */\r
- attr.cap.max_recv_wr = ctx->rx_depth;\r
- attr.cap.max_send_sge = 1;\r
- attr.cap.max_recv_sge = 1;\r
- attr.cap.max_inline_data = user_parm->inline_size;\r
- switch (user_parm->connection_type) {\r
- case RC :\r
- attr.qp_type = IBV_QPT_RC;\r
- break;\r
- case UC :\r
- attr.qp_type = IBV_QPT_UC;\r
- break;\r
- case UD :\r
- attr.qp_type = IBV_QPT_UD;\r
- break;\r
- default:\r
- fprintf(stderr, "Unknown connection type %d \n",user_parm->connection_type);\r
- return NULL;\r
- }\r
- /*attr.sq_sig_all = 0;*/\r
\r
- ctx->qp = ibv_create_qp(ctx->pd, &attr);\r
- if (!ctx->qp) {\r
- fprintf(stderr, "Couldn't create QP\n");\r
+ ALLOCATE(ctx->qp,struct ibv_qp*,user_parm->num_of_qps);\r
+ \r
+ for(i=0; i < user_parm->num_of_qps; i++) {\r
+ ctx->qp[i] = ctx_qp_create(ctx->pd,ctx->cq,ctx->cq,user_parm);\r
+ if (ctx->qp[i] == NULL) {\r
return NULL;\r
}\r
\r
- if ((user_parm->connection_type==UD) && (user_parm->use_mcg)) {\r
- union ibv_gid gid;\r
- uint8_t mcg_gid[16] = MCG_GID;\r
-\r
- /* use the local QP number as part of the mcg */\r
- mcg_gid[11] = (user_parm->servername) ? 0 : 1;\r
- *(uint32_t *)(&mcg_gid[12]) = ctx->qp->qp_num;\r
- memcpy(gid.raw, mcg_gid, 16);\r
-\r
- if (ibv_attach_mcast(ctx->qp, &gid, MCG_LID)) {\r
- fprintf(stderr, "Couldn't attach QP to mcg\n");\r
- return NULL;\r
- }\r
- }\r
- }\r
-\r
- {\r
- struct ibv_qp_attr attr;\r
-\r
- attr.qp_state = IBV_QPS_INIT;\r
- attr.pkey_index = 0;\r
- attr.port_num = (uint8_t) port;\r
- if (user_parm->connection_type==UD)\r
- attr.qkey = 0x11111111;\r
- else\r
- attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE;\r
-\r
- if (user_parm->connection_type==UD) {\r
- if (ibv_modify_qp(ctx->qp, &attr,\r
- IBV_QP_STATE |\r
- IBV_QP_PKEY_INDEX |\r
- IBV_QP_PORT |\r
- IBV_QP_QKEY)) {\r
- fprintf(stderr, "Failed to modify UD QP to INIT\n");\r
- return NULL;\r
- }\r
- } else if (ibv_modify_qp(ctx->qp, &attr,\r
- IBV_QP_STATE |\r
- IBV_QP_PKEY_INDEX |\r
- IBV_QP_PORT |\r
- IBV_QP_ACCESS_FLAGS)) {\r
- fprintf(stderr, "Failed to modify QP to INIT\n");\r
+ if(ctx_modify_qp_to_init(ctx->qp[i],user_parm)) {\r
return NULL;\r
}\r
}\r
+\r
return ctx;\r
}\r
\r
-static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,\r
- struct pingpong_dest *dest, struct user_parameters *user_parm)\r
+/****************************************************************************** \r
+ *\r
+ ******************************************************************************/\r
+static int pp_connect_ctx(struct pingpong_context *ctx,int my_psn,\r
+ struct pingpong_dest *dest, \r
+ struct perftest_parameters *user_parm)\r
{\r
struct ibv_qp_attr attr;\r
+ int i;\r
memset(&attr, 0, sizeof attr);\r
\r
attr.qp_state = IBV_QPS_RTR;\r
attr.path_mtu = IBV_MTU_4096;\r
break;\r
}\r
- printf("Mtu : %d\n", user_parm->mtu);\r
- attr.dest_qp_num = dest->qpn;\r
- attr.rq_psn = dest->psn;\r
+ printf(" Mtu : %d\n", user_parm->mtu);\r
+ attr.dest_qp_num = dest->qpn;\r
+ attr.rq_psn = dest->psn;\r
+ attr.ah_attr.dlid = dest->lid;\r
if (user_parm->connection_type == RC) {\r
attr.max_dest_rd_atomic = 1;\r
attr.min_rnr_timer = 12;\r
}\r
- attr.ah_attr.is_global = 0;\r
- attr.ah_attr.dlid = (uint16_t) dest->lid;\r
- attr.ah_attr.sl = 0;\r
- attr.ah_attr.src_path_bits = 0;\r
- attr.ah_attr.port_num = (uint8_t) port;\r
- if ((user_parm->connection_type==UD) && (user_parm->use_mcg)) {\r
- uint8_t mcg_gid[16] = MCG_GID;\r
- /* send the message to the mcg of the other side */\r
- mcg_gid[11] = (user_parm->servername) ? 1 : 0;\r
- *(uint32_t *)(&mcg_gid[12]) = dest->qpn;\r
- attr.ah_attr.dlid = MCG_LID;\r
- attr.ah_attr.is_global = 1;\r
- attr.ah_attr.grh.sgid_index = 0;\r
- memcpy(attr.ah_attr.grh.dgid.raw, mcg_gid, 16);\r
- } else {\r
- attr.ah_attr.dlid = (uint16_t) dest->lid;\r
+ if (user_parm->gid_index < 0) {\r
attr.ah_attr.is_global = 0;\r
+ attr.ah_attr.sl = sl;\r
+ } else {\r
+ attr.ah_attr.is_global = 1;\r
+ attr.ah_attr.grh.dgid = dest->gid;\r
+ attr.ah_attr.grh.sgid_index = (uint8_t)(user_parm->gid_index);\r
+ attr.ah_attr.grh.hop_limit = 1;\r
+ attr.ah_attr.sl = 0;\r
}\r
+ attr.ah_attr.src_path_bits = 0;\r
+ attr.ah_attr.port_num = user_parm->ib_port;\r
+ \r
if (user_parm->connection_type == RC) {\r
- if (ibv_modify_qp(ctx->qp, &attr,\r
+ if (ibv_modify_qp(ctx->qp[0], &attr,\r
IBV_QP_STATE |\r
IBV_QP_AV |\r
IBV_QP_PATH_MTU |\r
fprintf(stderr, "Failed to modify RC QP to RTR\n");\r
return 1;\r
}\r
- attr.timeout = 14;\r
+ attr.timeout = user_parm->qp_timeout;\r
attr.retry_cnt = 7;\r
attr.rnr_retry = 7;\r
} else if (user_parm->connection_type == UC) {\r
- if (ibv_modify_qp(ctx->qp, &attr,\r
+ if (ibv_modify_qp(ctx->qp[0], &attr,\r
IBV_QP_STATE |\r
IBV_QP_AV |\r
IBV_QP_PATH_MTU |\r
fprintf(stderr, "Failed to modify UC QP to RTR\n");\r
return 1;\r
}\r
- } else {\r
- if (ibv_modify_qp(ctx->qp, &attr,\r
- IBV_QP_STATE )) {\r
- fprintf(stderr, "Failed to modify UC QP to RTR\n");\r
- return 1;\r
- }\r
- }\r
- attr.qp_state = IBV_QPS_RTS;\r
- attr.sq_psn = my_psn;\r
- attr.max_rd_atomic = 1;\r
- if (user_parm->connection_type == RC) {\r
- attr.max_rd_atomic = 1;\r
- if (ibv_modify_qp(ctx->qp, &attr,\r
- IBV_QP_STATE |\r
- IBV_QP_SQ_PSN |\r
- IBV_QP_TIMEOUT |\r
- IBV_QP_RETRY_CNT |\r
- IBV_QP_RNR_RETRY |\r
- IBV_QP_MAX_QP_RD_ATOMIC)) {\r
- fprintf(stderr, "Failed to modify RC QP to RTS\n");\r
- return 1;\r
- }\r
- } else { /*both UC and UD */\r
- if (ibv_modify_qp(ctx->qp, &attr,\r
- IBV_QP_STATE |\r
- IBV_QP_SQ_PSN)) {\r
- fprintf(stderr, "Failed to modify UC QP to RTS\n");\r
- return 1;\r
+ } \r
+ \r
+ else {\r
+ for (i = 0; i < user_parm->num_of_qps; i++) {\r
+ if (ibv_modify_qp(ctx->qp[i],&attr,IBV_QP_STATE )) {\r
+ fprintf(stderr, "Failed to modify UC QP to RTR\n");\r
+ return 1;\r
+ }\r
}\r
-\r
- }\r
- if (user_parm->connection_type==UD) {\r
- ctx->ah = ibv_create_ah(ctx->pd, &attr.ah_attr);\r
- if (!ctx->ah) {\r
- fprintf(stderr, "Failed to create AH for UD\n");\r
- return 1;\r
+ if (user_parm->machine == CLIENT || user_parm->duplex) {\r
+ ctx->ah = ibv_create_ah(ctx->pd,&attr.ah_attr);\r
+ if (!ctx->ah) {\r
+ fprintf(stderr, "Failed to create AH for UD\n");\r
+ return 1;\r
+ }\r
}\r
}\r
- /* post receive max msg size*/\r
- {\r
- int i;\r
- struct ibv_recv_wr *bad_wr_recv;\r
- //receive\r
- ctx->rwr.wr_id = PINGPONG_RECV_WRID;\r
- ctx->rwr.sg_list = &ctx->recv_list;\r
- ctx->rwr.num_sge = 1;\r
- ctx->rwr.next = NULL;\r
- ctx->recv_list.addr = (uintptr_t) ctx->buf;\r
- if (user_parm->connection_type==UD) {\r
- ctx->recv_list.length = ctx->size + 40;\r
+\r
+ if (user_parm->machine == CLIENT || user_parm->duplex) {\r
+\r
+ attr.qp_state = IBV_QPS_RTS;\r
+ attr.sq_psn = my_psn;\r
+ if (user_parm->connection_type == RC) {\r
+ attr.max_rd_atomic = 1;\r
+ if (ibv_modify_qp(ctx->qp[0], &attr,\r
+ IBV_QP_STATE |\r
+ IBV_QP_SQ_PSN |\r
+ IBV_QP_TIMEOUT |\r
+ IBV_QP_RETRY_CNT |\r
+ IBV_QP_RNR_RETRY |\r
+ IBV_QP_MAX_QP_RD_ATOMIC)) {\r
+ fprintf(stderr, "Failed to modify RC QP to RTS\n");\r
+ return 1;\r
+ }\r
+\r
} else {\r
- ctx->recv_list.length = ctx->size;\r
+ if(ibv_modify_qp(ctx->qp[0],&attr,IBV_QP_STATE |IBV_QP_SQ_PSN)) {\r
+ fprintf(stderr, "Failed to modify UC QP to RTS\n");\r
+ return 1;\r
+ }\r
}\r
- ctx->recv_list.lkey = ctx->mr->lkey;\r
- for (i = 0; i < ctx->rx_depth; ++i)\r
- if (ibv_post_recv(ctx->qp, &ctx->rwr, &bad_wr_recv)) {\r
- fprintf(stderr, "Couldn't post recv: counter=%d\n", i);\r
- return 14;\r
+ }\r
+\r
+ return 0;\r
+}\r
+\r
+/****************************************************************************** \r
+ *\r
+ ******************************************************************************/\r
+static int set_recv_wqes(struct pingpong_context *ctx,int size,\r
+ struct perftest_parameters *user_param) {\r
+ \r
+ int i,j,buff_size;\r
+ int duplex_uni_ind;\r
+ struct ibv_recv_wr *bad_wr_recv;\r
+\r
+ duplex_uni_ind = (user_param->duplex && !user_param->use_mcg) ? 1 : 0 ;\r
+ i = (user_param->duplex && user_param->use_mcg) ? 1 : 0;\r
+\r
+ buff_size = BUFF_SIZE(SIZE(user_param->connection_type,ctx->size));\r
+\r
+ while (i < user_param->num_of_qps) {\r
+\r
+ ctx->sge_list[i].addr = (uintptr_t)ctx->buf + (i + duplex_uni_ind)*buff_size;\r
+ ctx->sge_list[i].length = SIZE(user_param->connection_type,size);\r
+ ctx->sge_list[i].lkey = ctx->mr->lkey;\r
+ ctx->rwr[i].sg_list = &ctx->sge_list[i];\r
+ ctx->rwr[i].wr_id = i;\r
+ ctx->rwr[i].next = NULL;\r
+ ctx->rwr[i].num_sge = MAX_RECV_SGE;\r
+ ctx->my_addr[i] = ctx->sge_list[i].addr;\r
+ \r
+ for (j = 0; j < user_param->rx_depth; ++j) {\r
+\r
+ if (ibv_post_recv(ctx->qp[i],&ctx->rwr[i],&bad_wr_recv)) {\r
+ fprintf(stderr, "Couldn't post recv Qp = %d: counter=%d\n",i,j);\r
+ return 1;\r
}\r
+\r
+ if (SIZE(user_param->connection_type,size) <= (CYCLE_BUFFER / 2))\r
+ increase_loc_addr(&ctx->sge_list[i],SIZE(user_param->connection_type,size),j,ctx->my_addr[i]);\r
+ }\r
+ i++;\r
}\r
- post_recv = ctx->rx_depth;\r
return 0;\r
}\r
\r
+/****************************************************************************** \r
+ *\r
+ ******************************************************************************/\r
+static void set_send_wqe(struct pingpong_context *ctx,\r
+ int size, int rem_qpn,\r
+ struct perftest_parameters *user_param) {\r
+\r
+ ctx->list.addr = (uintptr_t)ctx->buf;\r
+ ctx->list.lkey = ctx->mr->lkey;\r
+\r
+ ctx->wr.sg_list = &ctx->list;\r
+ ctx->wr.num_sge = 1;\r
+ ctx->wr.opcode = IBV_WR_SEND;\r
+ ctx->wr.next = NULL;\r
+ ctx->wr.wr_id = PINGPONG_SEND_WRID;\r
+ ctx->wr.send_flags = IBV_SEND_SIGNALED;\r
+\r
+ if (size <= user_param->inline_size)\r
+ ctx->wr.send_flags |= IBV_SEND_INLINE; \r
+\r
+ if (user_param->connection_type == UD) {\r
+ ctx->wr.wr.ud.ah = ctx->ah;\r
+ ctx->wr.wr.ud.remote_qkey = DEF_QKEY;\r
+ ctx->wr.wr.ud.remote_qpn = rem_qpn;\r
+ }\r
+}\r
+\r
+/****************************************************************************** \r
+ *\r
+ ******************************************************************************/\r
+\r
static void usage(const char *argv0)\r
{\r
printf("Usage:\n");\r
- printf(" %s start a server and wait for connection\n", argv0);\r
- printf(" %s -h <host> connect to server at <host>\n", argv0);\r
+ printf(" %s start a server and wait for connection\n", argv0);\r
+ printf(" %s <host> connect to server at <host>\n", argv0);\r
printf("\n");\r
printf("Options:\n");\r
- printf(" -p <port> listen on/connect to port <port> (default 18515)\n");\r
- printf(" -d, <dev> use IB device <dev> (default first device found)\n");\r
- printf(" -i <port> use port <port> of IB device (default 1)\n");\r
- printf(" -c <RC/UC/UD> connection type RC/UC/UD (default RC)\n");\r
- printf(" -m <mtu> mtu size (256 - 4096. default for hermon is 2048)\n");\r
- printf(" -s <size> size of message to exchange (default 65536)\n");\r
- printf(" -a Run sizes from 2 till 2^23\n");\r
- printf(" -t <dep> size of tx queue (default 300)\n");\r
- printf(" -g send messages to multicast group(only available in UD connection\n");\r
- printf(" -r <dep> make rx queue bigger than tx (default 600)\n");\r
- printf(" -n <iters> number of exchanges (at least 2, default 1000)\n");\r
- printf(" -I <size> max size of message to be sent in inline mode (default 400)\n");\r
- printf(" -b measure bidirectional bandwidth (default unidirectional)\n");\r
- printf(" -V display version number\n");\r
- printf(" -e sleep on CQ events (default poll)\n");\r
- printf(" -N cancel peak-bw calculation (default with peak-bw)\n");\r
+ printf(" -p, --port=<port> Listen on/connect to port <port> (default 18515)\n");\r
+ printf(" -d, --ib-dev=<dev> Use IB device <dev> (default first device found)\n");\r
+ printf(" -i, --ib-port=<port> Use port <port> of IB device (default 1)\n");\r
+ printf(" -c, --connection=<RC/UC/UD> Connection type RC/UC/UD (default RC)\n");\r
+ printf(" -m, --mtu=<mtu> Mtu size (256 - 4096. default for hermon is 2048)\n");\r
+ printf(" -s, --size=<size> Size of message to exchange (default 65536)\n");\r
+ printf(" -a, --all Run sizes from 2 till 2^23\n");\r
+ printf(" -t, --tx-depth=<dep> Size of tx queue (default 300)\n");\r
+ printf(" -r, --rx-depth=<dep> Make rx queue bigger than tx (default 600)\n");\r
+ printf(" -n, --iters=<iters> Number of exchanges (at least 2, default 1000)\n");\r
+ printf(" -I, --inline_size=<size> Max size of message to be sent in inline mode (default 0)\n");\r
+ printf(" -u, --qp-timeout=<timeout> QP timeout, timeout value is 4 usec * 2 ^(timeout), default 14\n");\r
+ printf(" -S, --sl=<sl> SL (default 0)\n");\r
+ printf(" -x, --gid-index=<index> Test uses GID with GID index taken from command line (for RDMAoE index should be 0)\n");\r
+ printf(" -b, --bidirectional Measure bidirectional bandwidth (default unidirectional)\n");\r
+ printf(" -V, --version Display version number\n");\r
+ printf(" -e, --events Sleep on CQ events (default poll)\n");\r
+ printf(" -N, --no peak-bw Cancel peak-bw calculation (default with peak-bw)\n");\r
+ printf(" -F, --CPU-freq Do not fail even if cpufreq_ondemand module is loaded\n");\r
+ printf(" -g, --mcg=<num_of_qps> Send messages to multicast group with <num_of_qps> qps attached to it.\n");\r
+ printf(" -M, --MGID=<multicast_gid> In case of multicast, uses <multicast_gid> as the group MGID.\n");\r
+ printf(" The format must be '255:1:X:X:X:X:X:X:X:X:X:X:X:X:X:X', where X is a vlaue within [0,255].\n");\r
}\r
\r
+/****************************************************************************** \r
+ *\r
+ ******************************************************************************/\r
static void print_report(unsigned int iters, unsigned size, int duplex,\r
- cycles_t *tposted, cycles_t *tcompleted, int noPeak)\r
+ cycles_t *tposted, cycles_t *tcompleted, int noPeak, int no_cpu_freq_fail, MachineType machine)\r
{\r
- cycles_t cycles_to_units;\r
- unsigned long tsize; /* Transferred size */\r
- int i, j;\r
+ double cycles_to_units;\r
+ unsigned long tsize; /* Transferred size, in megabytes */\r
+ unsigned int i, j;\r
int opt_posted = 0, opt_completed = 0;\r
cycles_t opt_delta;\r
cycles_t t;\r
\r
if (!noPeak) {\r
/* Find the peak bandwidth, unless asked not to in command line */\r
- for (i = 0; i < (int) iters; ++i)\r
- for (j = i; j < (int) iters; ++j) {\r
+ for (i = 0; i < iters; ++i)\r
+ for (j = i; j < iters; ++j) {\r
t = (tcompleted[j] - tposted[i]) / (j - i + 1);\r
+ \r
if (t < opt_delta) {\r
opt_delta = t;\r
opt_posted = i;\r
}\r
}\r
\r
- cycles_to_units = get_freq();\r
+ cycles_to_units = get_cpu_mhz();\r
\r
tsize = duplex ? 2 : 1;\r
tsize = tsize * size;\r
- printf("%7d %d ", size, iters);\r
\r
- {\r
- double sec = (double) opt_delta / (double) cycles_to_units;\r
- double mbytes = (double) !(noPeak) * (double) tsize / (double) 0x100000;\r
- printf("%7.2f ", mbytes / sec);\r
+ if (machine == SERVER)\r
+ printf(" %7d %d N\\A %7.2f\n",size,iters,\r
+ (uint64_t)tsize * iters * cycles_to_units /(tcompleted[iters - 1] - tposted[0]) / 0x100000);\r
+ else\r
+ printf(REPORT_FMT,size,iters,!(noPeak) * tsize * cycles_to_units / opt_delta / 0x100000,\r
+ (uint64_t)tsize * iters * cycles_to_units /(tcompleted[iters - 1] - tposted[0]) / 0x100000);\r
\r
- sec = (double) (tcompleted[iters - 1] - tposted[0]) / (double) cycles_to_units;\r
- mbytes = (double) tsize * (double) iters / (double) 0x100000;\r
- printf("%7.2f\n", mbytes / sec);\r
- }\r
}\r
\r
-static int run_iter_bi(struct pingpong_context *ctx, struct user_parameters *user_param,\r
- struct pingpong_dest *rem_dest, int size)\r
-{\r
- struct ibv_qp *qp;\r
- int scnt, ccnt, rcnt;\r
- struct ibv_recv_wr *bad_wr_recv;\r
- if (user_param->connection_type == UD) {\r
- if (size > 2048)\r
- size = 2048;\r
+/****************************************************************************** \r
+ * Important note : \r
+ * In case of UD/UC this is NOT the way to measureBW since we are running with \r
+ * loop on the send side , while we should run on the recieve side or enable \r
+ * retry in SW , Since the sender may be faster than the reciver.\r
+ * Although we had posted recieve it is not enough and might end this will\r
+ * result in deadlock of test since both sides are stuck on poll cq.\r
+ * In this test i do not solve this for the general test ,need to write\r
+ * seperate test for UC/UD but in case the tx_depth is ~1/3 from the\r
+ * number of iterations this should be ok .\r
+ * Also note that the sender is limited in the number of send, ans\r
+ * i try to make the reciver full .\r
+ ******************************************************************************/\r
+int run_iter_bi(struct pingpong_context *ctx, \r
+ struct perftest_parameters *user_param,int size) {\r
+\r
+ int scnt = 0;\r
+ int ccnt = 0;\r
+ int rcnt = 0;\r
+ int i = 0;\r
+ int num_of_qps = user_param->num_of_qps;\r
+ int ne;\r
+ struct ibv_wc *wc = NULL;\r
+ int *rcnt_for_qp = NULL;\r
+ struct ibv_recv_wr *bad_wr_recv = NULL;\r
+ struct ibv_send_wr *bad_wr = NULL;\r
+\r
+ ALLOCATE(rcnt_for_qp,int,user_param->num_of_qps);\r
+ ALLOCATE(wc,struct ibv_wc,DEF_WC_SIZE);\r
+ memset(rcnt_for_qp,0,sizeof(int)*user_param->num_of_qps);\r
+\r
+ if (user_param->use_mcg)\r
+ num_of_qps--; \r
+\r
+ // Post recevie recv_wqe's.\r
+ if (set_recv_wqes(ctx,size,user_param)) {\r
+ fprintf(stderr," Failed to post receive recv_wqes\n");\r
+ return 1;\r
}\r
- /*********************************************\r
- * Important note :\r
- * In case of UD/UC this is NOT the way to measure\r
- * BW sicen we are running with loop on the send side\r
- * while we should run on the receive side or enable retry in SW\r
- * Since the sender may be faster than the reciver than although\r
- * we had posted receive it is not enough and might end this will\r
- * result in deadlock of test since both sides are stuck on poll cq\r
- * In this test i do not solve this for the general test ,need to send\r
- * seperate test for UC/UD but in case the tx_depth is ~1/3 from the\r
- * number of iterations this should be ok .\r
- * Also note that the sender is limited in the number of send, and\r
- * i try to make the receiver full \r
- *********************************************/\r
-\r
- if (user_param->connection_type == UD)\r
- ctx->recv_list.length = ctx->size + 40;\r
- else\r
- ctx->recv_list.length = ctx->size;\r
- if (size > user_param->inline_size) /*compliance to perf_main */\r
- ctx->wr.send_flags = IBV_SEND_SIGNALED;\r
- else\r
- ctx->wr.send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE;\r
\r
+ // Set the length of the scatter in case of ALL option.\r
ctx->list.length = size;\r
- scnt = 0;\r
- ccnt = 0;\r
- rcnt = 0;\r
- qp = ctx->qp;\r
-\r
- while (ccnt < user_param->iters || rcnt < user_param->iters ) {\r
- struct ibv_wc wc;\r
- int ne;\r
- while (scnt < user_param->iters &&\r
- (scnt - ccnt) < user_param->tx_depth / 2) {\r
- struct ibv_send_wr *bad_wr;\r
- if (user_param->servername)\r
- tposted[scnt] = get_cycles();\r
- if (ibv_post_send(qp, &ctx->wr, &bad_wr)) {\r
- fprintf(stderr, "Couldn't post send: scnt=%d\n",\r
- scnt);\r
+\r
+ while (ccnt < user_param->iters || rcnt < user_param->iters*num_of_qps) {\r
+\r
+ while (scnt < user_param->iters && (scnt - ccnt) < user_param->tx_depth) {\r
+\r
+ tposted[scnt] = get_cycles();\r
+ if (ibv_post_send(ctx->qp[0],&ctx->wr, &bad_wr)) {\r
+ fprintf(stderr, "Couldn't post send: scnt=%d\n",scnt);\r
return 1;\r
}\r
+\r
+ if (size <= (CYCLE_BUFFER / 2))\r
+ increase_loc_addr(&ctx->list,size,scnt,(uintptr_t)ctx->buf);\r
+\r
++scnt;\r
}\r
+\r
if (user_param->use_event) {\r
- struct ibv_cq *ev_cq;\r
- void *ev_ctx;\r
- if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {\r
- fprintf(stderr, "Failed to get cq_event\n");\r
- return 1;\r
- } \r
- if (ev_cq != ctx->cq) {\r
- fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);\r
+\r
+ if (ctx_notify_events(ctx->cq,ctx->channel)) {\r
+ fprintf(stderr,"Failed to notify events to CQ");\r
return 1;\r
}\r
- if (ibv_req_notify_cq(ctx->cq, 0)) {\r
- fprintf(stderr, "Couldn't request CQ notification\n");\r
- return 1;\r
+ }\r
+\r
+ do {\r
+ ne = ibv_poll_cq(ctx->cq,DEF_WC_SIZE,wc);\r
+ if (ne > 0) {\r
+ for (i = 0; i < ne; i++) {\r
+ if (wc[i].status != IBV_WC_SUCCESS)\r
+ NOTIFY_COMP_ERROR_SEND(wc[i],scnt,ccnt);\r
+\r
+ if ((int) wc[i].wr_id == PINGPONG_SEND_WRID) {\r
+ tcompleted[ccnt++] = get_cycles();\r
+ }\r
+\r
+ else {\r
+ rcnt_for_qp[wc[i].wr_id]++;\r
+ rcnt++;\r
+ if (rcnt_for_qp[wc[i].wr_id] + user_param->rx_depth <= user_param->iters) {\r
+\r
+ if (ibv_post_recv(ctx->qp[wc[i].wr_id],&ctx->rwr[wc[i].wr_id],&bad_wr_recv)) {\r
+ fprintf(stderr, "Couldn't post recv Qp=%d rcnt=%d\n",(int)wc[i].wr_id , rcnt_for_qp[wc[i].wr_id]);\r
+ return 15;\r
+ }\r
+\r
+ if (SIZE(user_param->connection_type,size) <= (CYCLE_BUFFER / 2))\r
+ increase_loc_addr(&ctx->sge_list[wc[i].wr_id],\r
+ SIZE(user_param->connection_type,size),\r
+ rcnt_for_qp[wc[i].wr_id] + user_param->rx_depth - 1,\r
+ ctx->my_addr[wc[i].wr_id]);\r
+\r
+ } \r
+ }\r
+ }\r
}\r
+ } while (ne > 0);\r
+\r
+ if (ne < 0) {\r
+ fprintf(stderr, "poll CQ failed %d\n", ne);\r
+ return 1;\r
}\r
- for (;;) {\r
- ne = ibv_poll_cq(ctx->cq, 1, &wc);\r
- if (ne <= 0)\r
- break;\r
+ }\r
+ free(rcnt_for_qp);\r
+ free(wc);\r
+ return 0;\r
+}\r
+\r
+/****************************************************************************** \r
+ *\r
+ ******************************************************************************/\r
+int run_iter_uni_server(struct pingpong_context *ctx, \r
+ struct perftest_parameters *user_param,int size) {\r
+\r
+ int rcnt = 0;\r
+ int ne,i;\r
+ int *rcnt_for_qp = NULL;\r
+ struct ibv_wc *wc = NULL;\r
+ struct ibv_recv_wr *bad_wr_recv = NULL;\r
+\r
+ ALLOCATE(wc,struct ibv_wc,DEF_WC_SIZE);\r
+ ALLOCATE(rcnt_for_qp,int,user_param->num_of_qps);\r
\r
- if (wc.status != IBV_WC_SUCCESS) {\r
- fprintf(stderr, "Completion wth error at %s:\n",\r
- user_param->servername ? "client" : "server");\r
- fprintf(stderr, "Failed status %d: wr_id %d syndrom 0x%x\n",\r
- wc.status, (int) wc.wr_id, wc.vendor_err);\r
- fprintf(stderr, "scnt=%d, ccnt=%d\n",\r
- scnt, ccnt);\r
+ memset(rcnt_for_qp,0,sizeof(int)*user_param->num_of_qps);\r
+\r
+ if (set_recv_wqes(ctx,size,user_param)) {\r
+ fprintf(stderr ," Failed to post receive recv_wqes\n");\r
+ return 1;\r
+ }\r
+\r
+ while (rcnt < user_param->iters) {\r
+\r
+ if (user_param->use_event) {\r
+ if (ctx_notify_events(ctx->cq,ctx->channel)) {\r
+ fprintf(stderr ," Failed to notify events to CQ");\r
return 1;\r
}\r
- switch ((int) wc.wr_id) {\r
- case PINGPONG_SEND_WRID:\r
- if (user_param->servername)\r
- tcompleted[ccnt] = get_cycles();\r
- ccnt += 1;\r
- break;\r
- case PINGPONG_RECV_WRID:\r
- if (--post_recv <= ctx->rx_depth - 2) {\r
- while (rcnt < user_param->iters &&\r
- (ctx->rx_depth - post_recv) > 0 ) {\r
- ++post_recv;\r
- if (ibv_post_recv(qp, &ctx->rwr, &bad_wr_recv)) {\r
- fprintf(stderr, "Couldn't post recv: rcnt=%d\n",\r
- rcnt);\r
+ }\r
+ \r
+ do {\r
+ ne = ibv_poll_cq(ctx->cq,DEF_WC_SIZE,wc);\r
+ if (ne > 0) {\r
+ for (i = 0; i < ne; i++) {\r
+\r
+ if (wc[i].status != IBV_WC_SUCCESS) \r
+ NOTIFY_COMP_ERROR_RECV(wc[i],rcnt_for_qp[wc[i].wr_id]);\r
+ \r
+ rcnt_for_qp[wc[i].wr_id]++;\r
+ tcompleted[rcnt++] = get_cycles();\r
+ \r
+ if (rcnt_for_qp[wc[i].wr_id] + user_param->rx_depth <= user_param->iters) {\r
+\r
+ if (ibv_post_recv(ctx->qp[wc[i].wr_id],&ctx->rwr[wc[i].wr_id],&bad_wr_recv)) {\r
+ fprintf(stderr, "Couldn't post recv Qp=%d rcnt=%d\n",(int)wc[i].wr_id,rcnt_for_qp[wc[i].wr_id]);\r
return 15;\r
}\r
- }\r
+\r
+ if (SIZE(user_param->connection_type,size) <= (CYCLE_BUFFER / 2))\r
+ increase_loc_addr(&ctx->sge_list[wc[i].wr_id],\r
+ SIZE(user_param->connection_type,size),\r
+ rcnt_for_qp[wc[i].wr_id] + user_param->rx_depth,\r
+ ctx->my_addr[wc[i].wr_id]);\r
+ } \r
}\r
- rcnt += 1;\r
- break;\r
- default:\r
- fprintf(stderr, "Completion for unknown wr_id %d\n",\r
- (int) wc.wr_id);\r
- break;\r
}\r
- }\r
+ } while (ne > 0);\r
\r
if (ne < 0) {\r
- fprintf(stderr, "poll CQ failed %d\n", ne);\r
+ fprintf(stderr, "Poll Recieve CQ failed %d\n", ne);\r
return 1;\r
}\r
}\r
-\r
- return(0);\r
+ tposted[0] = tcompleted[0];\r
+ free(wc);\r
+ free(rcnt_for_qp);\r
+ return 0;\r
}\r
\r
-static int run_iter_uni(struct pingpong_context *ctx, struct user_parameters *user_param,\r
- struct pingpong_dest *rem_dest, int size)\r
-{\r
- struct ibv_qp *qp;\r
- int scnt, ccnt, rcnt;\r
- struct ibv_recv_wr *bad_wr_recv;\r
+/****************************************************************************** \r
+ *\r
+ ******************************************************************************/\r
+int run_iter_uni_client(struct pingpong_context *ctx, \r
+ struct perftest_parameters *user_param,int size) {\r
\r
- if (user_param->connection_type == UD) {\r
- if (size > 2048)\r
- size = 2048;\r
- }\r
+ int ne;\r
+ int i = 0;\r
+ int scnt = 0;\r
+ int ccnt = 0;\r
+ struct ibv_wc *wc = NULL;\r
+ struct ibv_send_wr *bad_wr = NULL;\r
\r
- if (user_param->connection_type == UD)\r
- ctx->recv_list.length = ctx->size + 40;\r
- else\r
- ctx->recv_list.length = ctx->size;\r
+ ALLOCATE(wc,struct ibv_wc,DEF_WC_SIZE);\r
\r
- if (size > user_param->inline_size) { /*complaince to perf_main */\r
- ctx->wr.send_flags = IBV_SEND_SIGNALED;\r
- } else {\r
- ctx->wr.send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE;\r
- }\r
+ // Set the lenght of the scatter in case of ALL option.\r
ctx->list.length = size;\r
- scnt = 0;\r
- ccnt = 0;\r
- rcnt = 0;\r
- qp = ctx->qp;\r
- if (!user_param->servername) {\r
- while (rcnt < user_param->iters) {\r
- int ne;\r
- struct ibv_wc wc;\r
- /*Server is polling on receive first */\r
- if (user_param->use_event) {\r
- struct ibv_cq *ev_cq;\r
- void *ev_ctx;\r
- if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {\r
- fprintf(stderr, "Failed to get cq_event\n");\r
- return 1;\r
- } \r
- if (ev_cq != ctx->cq) {\r
- fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);\r
- return 1;\r
- }\r
- if (ibv_req_notify_cq(ctx->cq, 0)) {\r
- fprintf(stderr, "Couldn't request CQ notification\n");\r
- return 1;\r
- }\r
- }\r
- do {\r
- ne = ibv_poll_cq(ctx->cq, 1, &wc);\r
- if (ne) {\r
- if (user_param->servername)\r
- tcompleted[ccnt] = get_cycles();\r
- if (wc.status != IBV_WC_SUCCESS) {\r
- fprintf(stderr, "Completion wth error at %s:\n",\r
- user_param->servername ? "client" : "server");\r
- fprintf(stderr, "Failed status %d: wr_id %d syndrom 0x%x\n",\r
- wc.status, (int) wc.wr_id, wc.vendor_err);\r
- fprintf(stderr, "scnt=%d, ccnt=%d\n",\r
- scnt, ccnt);\r
- return 1;\r
- }\r
- ++rcnt;\r
- if (ibv_post_recv(qp, &ctx->rwr, &bad_wr_recv)) {\r
- fprintf(stderr, "Couldn't post recv: rcnt=%d\n",\r
- rcnt);\r
- return 15;\r
- }\r
\r
- }\r
- } while (ne > 0 );\r
+ while (scnt < user_param->iters || ccnt < user_param->iters) {\r
+ while (scnt < user_param->iters && (scnt - ccnt) < user_param->tx_depth ) {\r
\r
- if (ne < 0) {\r
- fprintf(stderr, "Poll Receive CQ failed %d\n", ne);\r
- return 12;\r
+ tposted[scnt] = get_cycles();\r
+ if (ibv_post_send(ctx->qp[0], &ctx->wr, &bad_wr)) {\r
+ fprintf(stderr, "Couldn't post send: scnt=%d\n",scnt);\r
+ return 1;\r
}\r
+\r
+ if (size <= (CYCLE_BUFFER / 2))\r
+ increase_loc_addr(&ctx->list,size,scnt,(uintptr_t)ctx->buf);\r
+\r
+ scnt++;\r
}\r
- } else {\r
- /* client is posting and not receiving. */\r
- while (scnt < user_param->iters || ccnt < user_param->iters) {\r
- while (scnt < user_param->iters && (scnt - ccnt) < user_param->tx_depth ) {\r
- struct ibv_send_wr *bad_wr;\r
- if (user_param->servername)\r
- tposted[scnt] = get_cycles();\r
- if (ibv_post_send(qp, &ctx->wr, &bad_wr)) {\r
- fprintf(stderr, "Couldn't post send: scnt=%d\n",\r
- scnt);\r
+\r
+ if (ccnt < user_param->iters) { \r
+ \r
+ if (user_param->use_event) {\r
+ if (ctx_notify_events(ctx->cq,ctx->channel)) {\r
+ fprintf(stderr , " Failed to notify events to CQ");\r
return 1;\r
}\r
- ++scnt;\r
- }\r
- if (ccnt < user_param->iters) {\r
- struct ibv_wc wc;\r
- int ne;\r
- if (user_param->use_event) {\r
- struct ibv_cq *ev_cq;\r
- void *ev_ctx;\r
- if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {\r
- fprintf(stderr, "Failed to get cq_event\n");\r
- return 1;\r
- } \r
- if (ev_cq != ctx->cq) {\r
- fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);\r
- return 1;\r
- }\r
- if (ibv_req_notify_cq(ctx->cq, 0)) {\r
- fprintf(stderr, "Couldn't request CQ notification\n");\r
- return 1;\r
- }\r
- } \r
- for (;;) {\r
- ne = ibv_poll_cq(ctx->cq, 1, &wc);\r
- if (ne <= 0)\r
- break;\r
-\r
- if (user_param->servername)\r
- tcompleted[ccnt] = get_cycles();\r
- if (wc.status != IBV_WC_SUCCESS) {\r
- fprintf(stderr, "Completion wth error at %s:\n",\r
- user_param->servername ? "client" : "server");\r
- fprintf(stderr, "Failed status %d: wr_id %d syndrom 0x%x\n",\r
- wc.status, (int) wc.wr_id, wc.vendor_err);\r
- fprintf(stderr, "scnt=%d, ccnt=%d\n",\r
- scnt, ccnt);\r
- return 1;\r
- }\r
- ccnt += ne;\r
- }\r
+ } \r
+ do {\r
+ ne = ibv_poll_cq(ctx->cq,DEF_WC_SIZE,wc);\r
+ if (ne > 0) {\r
+ for (i = 0; i < DEF_WC_SIZE; i++) {\r
\r
- if (ne < 0) {\r
- fprintf(stderr, "poll CQ failed %d\n", ne);\r
- return 1;\r
+ if (wc[i].status != IBV_WC_SUCCESS) \r
+ NOTIFY_COMP_ERROR_SEND(wc[i],scnt,ccnt);\r
+\r
+ tcompleted[ccnt++] = get_cycles();\r
+ } \r
}\r
+ } while (ne > 0);\r
+\r
+ if (ne < 0) {\r
+ fprintf(stderr, "poll CQ failed\n");\r
+ return 1;\r
}\r
}\r
}\r
+ free(wc);\r
return 0;\r
}\r
\r
+/****************************************************************************** \r
+ *\r
+ ******************************************************************************/\r
int __cdecl main(int argc, char *argv[])\r
{\r
- struct ibv_device **dev_list;\r
- struct ibv_device *ib_dev;\r
- struct pingpong_context *ctx;\r
- struct pingpong_dest my_dest;\r
- struct pingpong_dest *rem_dest;\r
- struct user_parameters user_param;\r
- struct ibv_device_attr device_attribute;\r
- char *ib_devname = NULL;\r
- int port = 18515;\r
- int ib_port = 1;\r
- int size = 65536;\r
- SOCKET sockfd;\r
- int i = 0;\r
- int size_max_pow = 24;\r
- int noPeak = 0;/*noPeak == 0: regular peak-bw calculation done*/\r
- int inline_given_in_cmd = 0;\r
- struct ibv_context *context;\r
- WORD version;\r
- WSADATA data;\r
- int err;\r
-\r
- srand((unsigned int) time(NULL));\r
- version = MAKEWORD(2, 2);\r
- err = WSAStartup(version, &data);\r
- if (err)\r
- return -1;\r
+ struct ibv_device **dev_list;\r
+ struct ibv_device *ib_dev;\r
+ struct pingpong_context *ctx;\r
+ struct pingpong_dest my_dest,rem_dest;\r
+ struct perftest_parameters user_param;\r
+ struct mcast_parameters mcg_params;\r
+ struct ibv_device_attr device_attribute;\r
+ char *ib_devname = NULL;\r
+ unsigned int size = 65536;\r
+ int i = 0;\r
+ int size_max_pow = 24;\r
+ int noPeak = 0;\r
+ int inline_given_in_cmd = 0;\r
+ struct ibv_context *context;\r
+ int no_cpu_freq_fail = 0;\r
+ int all = 0;\r
+ int size_of_arr;\r
+ const char *servername = NULL;\r
+\r
+ // Pointer to The relevent function of run_iter according to machine type.\r
+ int (*ptr_to_run_iter_uni)(struct pingpong_context*,struct perftest_parameters*,int);\r
+\r
+ SYSTEM_INFO si;\r
+ GetSystemInfo(&si);\r
\r
/* init default values to user's parameters */\r
- memset(&user_param, 0, sizeof(struct user_parameters));\r
- user_param.mtu = 0;\r
+ memset(&user_param, 0 , sizeof(struct perftest_parameters));\r
+ memset(&mcg_params, 0 , sizeof(struct mcast_parameters));\r
+ memset(&my_dest , 0 , sizeof(struct pingpong_dest));\r
+ memset(&rem_dest , 0 , sizeof(struct pingpong_dest));\r
+\r
+ user_param.ib_port = 1;\r
+ user_param.port = 18515;\r
user_param.iters = 1000;\r
user_param.tx_depth = 300;\r
- user_param.servername = NULL;\r
- user_param.use_event = 0;\r
- user_param.duplex = 0;\r
- user_param.inline_size = MAX_INLINE;\r
+ user_param.rx_depth = 600;\r
+ user_param.qp_timeout = 14;\r
+ user_param.gid_index = -1; \r
+ user_param.verb = SEND;\r
+ user_param.num_of_qps = 1;\r
+ // mcg_params.num_qps_on_group = 1;\r
\r
/* Parameter parsing. */\r
while (1) {\r
int c;\r
\r
- c = getopt(argc, argv, "h:p:d:i:m:c:s:n:t:I:r:ebaVgN");\r
+ static struct option long_options[] = {\r
+ { "port", 1, NULL, 'p' },\r
+ { "ib-dev", 1, NULL, 'd' },\r
+ { "ib-port", 1, NULL, 'i' },\r
+ { "mtu", 1, NULL, 'm' },\r
+ { "connection", 1, NULL, 'c' },\r
+ { "size", 1, NULL, 's' },\r
+ { "iters", 1, NULL, 'n' },\r
+ { "tx-depth", 1, NULL, 't' },\r
+ { "inline_size", 1, NULL, 'I' },\r
+ { "rx-depth", 1, NULL, 'r' },\r
+ { "qp-timeout", 1, NULL, 'u' },\r
+ { "sl", 1, NULL, 'S' },\r
+ { "gid-index", 1, NULL, 'x' },\r
+ { "MGID", 1, NULL, 'M' },\r
+ { "all", 0, NULL, 'a' },\r
+ { "bidirectional", 0, NULL, 'b' },\r
+ { "version", 0, NULL, 'V' },\r
+ { "events", 0, NULL, 'e' },\r
+ { "noPeak", 0, NULL, 'N' },\r
+ { "CPU-freq", 0, NULL, 'F' },\r
+ { "mcg", 1, NULL, 'g' },\r
+ { 0 }\r
+ };\r
+\r
+ c = getopt_long(argc, argv, "p:d:i:m:c:s:n:t:I:r:u:S:x:g:q:M:ebaVNF", long_options, NULL);\r
if (c == -1)\r
break;\r
\r
switch (c) {\r
case 'p':\r
- port = strtol(optarg, NULL, 0);\r
- if (port < 0 || port > 65535) {\r
+ user_param.port = strtol(optarg, NULL, 0);\r
+ if (user_param.port < 0 || user_param.port > 65535) {\r
usage(argv[0]);\r
return 1;\r
}\r
break;\r
case 'e':\r
- ++user_param.use_event;\r
+ user_param.use_event++;\r
break;\r
- case 'g':\r
- ++user_param.use_mcg;\r
+ case 'g':\r
+ user_param.use_mcg++;\r
+ user_param.num_of_qps = strtol(optarg, NULL, 0);\r
+ if (user_param.num_of_qps < 1 || user_param.num_of_qps > 56) {\r
+ usage(argv[0]);\r
+ return 1;\r
+ }\r
+ break;\r
+ case 'M' :\r
+ mcg_params.is_user_mgid = 1;\r
+ mcg_params.user_mgid = _strdup(optarg);\r
break;\r
case 'd':\r
ib_devname = _strdup(optarg);\r
user_param.mtu = strtol(optarg, NULL, 0);\r
break;\r
case 'a':\r
- user_param.all = ALL;\r
+ all = ALL;\r
break;\r
case 'V':\r
printf("send_bw version : %.2f\n",VERSION);\r
return 0;\r
+ break;\r
case 'i':\r
- ib_port = strtol(optarg, NULL, 0);\r
- if (ib_port < 0) {\r
+ user_param.ib_port = (uint8_t)(strtol(optarg, NULL, 0));\r
+ if (user_param.ib_port < 0) {\r
usage(argv[0]);\r
return 1;\r
}\r
\r
case 's':\r
size = strtol(optarg, NULL, 0);\r
+ if (size < 1 || size > UINT_MAX / 2) {\r
+ usage(argv[0]);\r
+ return 1;\r
+ }\r
+\r
break;\r
\r
- case 't':\r
- user_param.tx_depth = strtol(optarg, NULL, 0);\r
- if (user_param.tx_depth < 1) {\r
+ case 'x':\r
+ user_param.gid_index = strtol(optarg, NULL, 0);\r
+ if (user_param.gid_index > 63) {\r
usage(argv[0]);\r
return 1;\r
}\r
break;\r
\r
+ case 't':\r
+ user_param.tx_depth = strtol(optarg, NULL, 0);\r
+ if (user_param.tx_depth < 1) { usage(argv[0]); return 1; }\r
+ break;\r
+\r
case 'I':\r
user_param.inline_size = strtol(optarg, NULL, 0);\r
inline_given_in_cmd =1;\r
case 'r':\r
errno = 0;\r
user_param.rx_depth = strtol(optarg, NULL, 0);\r
- if (errno) {\r
- usage(argv[0]);\r
- return 1;\r
- }\r
- break;\r
+ if (errno) { usage(argv[0]); return 1; }\r
+ break;\r
\r
case 'n':\r
user_param.iters = strtol(optarg, NULL, 0);\r
usage(argv[0]);\r
return 1;\r
}\r
+\r
break;\r
\r
case 'b':\r
break;\r
\r
case 'N':\r
- noPeak = 1;\r
+ noPeak = 2;\r
break;\r
\r
- case 'h':\r
- if (optarg) {\r
- user_param.servername = _strdup(optarg);\r
- break;\r
- }\r
+ case 'F':\r
+ no_cpu_freq_fail = 1;\r
+ break;\r
+\r
+ case 'u':\r
+ user_param.qp_timeout = (uint8_t)(strtol(optarg, NULL, 0));\r
+ break;\r
+\r
+ case 'S':\r
+ sl = (uint8_t)(strtol(optarg, NULL, 0));\r
+ if (sl > 15) { usage(argv[0]); return 1; }\r
+ break;\r
\r
default:\r
usage(argv[0]);\r
}\r
}\r
\r
- printf("------------------------------------------------------------------\n");\r
- if (user_param.duplex == 1 && (!user_param.use_mcg || !(user_param.connection_type == UD)))\r
+ if (optind == argc - 1) {\r
+ servername = _strdup(argv[optind]);\r
+ }\r
+ \r
+ else if (optind < argc) {\r
+ usage(argv[0]);\r
+ return 1;\r
+ }\r
+\r
+ printf(RESULT_LINE);\r
+ user_param.rx_depth = user_param.iters < user_param.rx_depth ? user_param.iters : user_param.rx_depth ;\r
+\r
+ user_param.machine = servername ? CLIENT : SERVER;\r
+\r
+ if (user_param.use_mcg) {\r
+\r
+ user_param.connection_type = UD;\r
+ if (user_param.duplex) {\r
+ user_param.num_of_qps++;\r
+ printf(" Send Bidirectional BW - Multicast Test\n");\r
+ }\r
+ else {\r
+ printf(" Send BW - Multicast Test\n");\r
+ if (user_param.machine == CLIENT)\r
+ user_param.num_of_qps = 1;\r
+ }\r
+ }\r
+\r
+ else if (user_param.duplex) {\r
printf(" Send Bidirectional BW Test\n");\r
- else if (user_param.duplex == 1 && user_param.use_mcg && (user_param.connection_type == UD))\r
- printf(" Send Bidirectional BW Multicast Test\n");\r
- else if (!user_param.duplex == 1 && user_param.use_mcg && (user_param.connection_type == UD))\r
- printf(" Send BW Multicast Test\n");\r
- else\r
+ } else \r
printf(" Send BW Test\n");\r
\r
if (user_param.connection_type == RC)\r
- printf("Connection type : RC\n");\r
+ printf(" Connection type : RC\n");\r
else if (user_param.connection_type == UC)\r
- printf("Connection type : UC\n");\r
+ printf(" Connection type : UC\n");\r
else{\r
- printf("Connection type : UD\n");\r
+ printf(" Connection type : UD\n");\r
+ }\r
+ \r
+ // Done with parameter parsing. Perform setup.\r
+ if (all == ALL) {\r
+ // since we run all sizes \r
+ size = 8388608;\r
}\r
\r
- /* Done with parameter parsing. Perform setup. */\r
- if (user_param.all == ALL)\r
- /*since we run all sizes */\r
- size = 8388608; /*2^23 */\r
else if (user_param.connection_type == UD && size > 2048) {\r
- printf("Max msg size in UD is 2048 changing to 2048\n");\r
+ printf(" Max msg size in UD is 2048 changing to 2048\n");\r
size = 2048;\r
}\r
\r
+ page_size = si.dwPageSize;\r
+\r
dev_list = ibv_get_device_list(NULL);\r
\r
if (!ib_devname) {\r
}\r
}\r
\r
+ if (user_param.use_mcg) \r
+ mcg_params.ib_devname = ibv_get_device_name(ib_dev);\r
+\r
+ // Should be a function over here that computes the inline.\r
context = ibv_open_device(ib_dev);\r
if (ibv_query_device(context, &device_attribute)) {\r
fprintf(stderr, "Failed to query device props");\r
return 1;\r
}\r
- if ((device_attribute.vendor_part_id == 25418) && (!inline_given_in_cmd)) {\r
- user_param.inline_size = 1;\r
+ if ((device_attribute.vendor_part_id == 25408 ||\r
+ device_attribute.vendor_part_id == 25418 ||\r
+ device_attribute.vendor_part_id == 26408 ||\r
+ device_attribute.vendor_part_id == 26468 || // Mountain Top.\r
+ device_attribute.vendor_part_id == 26418 ||\r
+ device_attribute.vendor_part_id == 26428) && (!inline_given_in_cmd)) {\r
+\r
+ user_param.inline_size = 0;\r
}\r
- printf("Inline data is used up to %d bytes message\n", user_param.inline_size);\r
+ printf(" Inline data is used up to %d bytes message\n", user_param.inline_size);\r
\r
- ctx = pp_init_ctx(ib_dev, size, user_param.tx_depth, user_param.rx_depth,\r
- ib_port, &user_param);\r
+ ctx = pp_init_ctx(ib_dev,size,&user_param);\r
if (!ctx)\r
return 1;\r
\r
- /* Create connection between client and server.\r
- * We do it by exchanging data over a TCP socket connection. */\r
+ // Set up the Connection.\r
+ if (set_up_connection(ctx,&user_param,&my_dest,&mcg_params)) {\r
+ fprintf(stderr," Unable to set up socket connection\n");\r
+ return 1;\r
+ } \r
\r
- my_dest.lid = pp_get_local_lid(ctx, ib_port);\r
- my_dest.qpn = ctx->qp->qp_num;\r
- my_dest.psn = rand() & 0xffffff;\r
- if (!my_dest.lid) {\r
- fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n");\r
+ // Init the connection and print the local data.\r
+ if (init_connection(&user_param,&my_dest,servername)) {\r
+ fprintf(stderr," Unable to init the socket connection\n");\r
return 1;\r
}\r
- my_dest.rkey = ctx->mr->rkey;\r
- my_dest.vaddr = (uintptr_t)ctx->buf + size;\r
- printf(" local address: LID %#04x, QPN %#06x, PSN %#06x\n",\r
- my_dest.lid, my_dest.qpn, my_dest.psn);\r
-\r
- if (user_param.servername) {\r
- sockfd = pp_client_connect(user_param.servername, port);\r
- if (sockfd == INVALID_SOCKET)\r
- return 1;\r
- rem_dest = pp_client_exch_dest(sockfd, &my_dest);\r
- } else {\r
- sockfd = pp_server_connect(port);\r
- if (sockfd == INVALID_SOCKET)\r
- return 1;\r
- rem_dest = pp_server_exch_dest(sockfd, &my_dest);\r
- }\r
\r
- if (!rem_dest)\r
+ // shaking hands and gather the other side info.\r
+ if (ctx_hand_shake(&user_param,&my_dest,&rem_dest)) {\r
+ fprintf(stderr,"Failed to exchange date between server and clients\n");\r
+ return 1;\r
+ \r
+ }\r
+ // For printing only MGID in the remote side.\r
+ user_param.side = REMOTE;\r
+ ctx_print_pingpong_data(&rem_dest,&user_param);\r
+\r
+ // Prepare IB resources for rtr/rts.\r
+ if (pp_connect_ctx(ctx,my_dest.psn,&rem_dest,&user_param)) {\r
+ fprintf(stderr," Unable to Connect the HCA's through the link\n");\r
return 1;\r
+ }\r
\r
- printf(" remote address: LID %#04x, QPN %#06x, PSN %#06x\n",\r
- rem_dest->lid, rem_dest->qpn, rem_dest->psn);\r
-\r
- if (pp_connect_ctx(ctx, ib_port, my_dest.psn, rem_dest, &user_param))\r
- return 1;\r
+ // shaking hands and gather the other side info.\r
+ if (ctx_hand_shake(&user_param,&my_dest,&rem_dest)) {\r
+ fprintf(stderr,"Failed to exchange date between server and clients\n");\r
+ return 1;\r
+ \r
+ }\r
\r
- /* An additional handshake is required *after* moving qp to RTR.\r
- Arbitrarily reuse exch_dest for this purpose. */\r
- if (user_param.servername) {\r
- rem_dest = pp_client_exch_dest(sockfd, &my_dest);\r
- } else {\r
- rem_dest = pp_server_exch_dest(sockfd, &my_dest);\r
- }\r
if (user_param.use_event) {\r
- printf("Test with events.\n");\r
+ printf(" Test with events.\n");\r
+\r
if (ibv_req_notify_cq(ctx->cq, 0)) {\r
fprintf(stderr, "Couldn't request CQ notification\n");\r
return 1;\r
} \r
}\r
- printf("------------------------------------------------------------------\n");\r
- printf(" #bytes #iterations BW peak[MB/sec] BW average[MB/sec] \n");\r
\r
- tposted = malloc(user_param.iters * sizeof *tposted);\r
+ printf(RESULT_LINE);\r
+ printf(RESULT_FMT);\r
\r
- if (!tposted) {\r
- perror("malloc");\r
- return 1;\r
- }\r
+ size_of_arr = (user_param.duplex) ? 1 : user_param.num_of_qps;\r
\r
- tcompleted = malloc(user_param.iters * sizeof *tcompleted);\r
+ ALLOCATE(tposted,cycles_t,user_param.iters*size_of_arr);\r
+ ALLOCATE(tcompleted,cycles_t,user_param.iters*size_of_arr);\r
\r
- if (!tcompleted) {\r
- perror("malloc");\r
- return 1;\r
+ if (user_param.machine == SERVER || user_param.duplex) {\r
+ ALLOCATE(ctx->rwr,struct ibv_recv_wr,user_param.num_of_qps);\r
+ ALLOCATE(ctx->sge_list,struct ibv_sge,user_param.num_of_qps);\r
+ ALLOCATE(ctx->my_addr ,uint64_t ,user_param.num_of_qps);\r
}\r
- /* send */\r
- if (user_param.connection_type == UD) {\r
- ctx->list.addr = (uintptr_t) ctx->buf + 40;\r
- ctx->wr.wr.ud.ah = ctx->ah;\r
- ctx->wr.wr.ud.remote_qpn = rem_dest->qpn;\r
- ctx->wr.wr.ud.remote_qkey = 0x11111111;\r
- if (user_param.use_mcg) {\r
- ctx->wr.wr.ud.remote_qpn = 0xffffff;\r
- } else {\r
- ctx->wr.wr.ud.remote_qpn = rem_dest->qpn;\r
- }\r
- } else\r
- ctx->list.addr = (uintptr_t) ctx->buf;\r
- ctx->list.lkey = ctx->mr->lkey;\r
- ctx->wr.wr_id = PINGPONG_SEND_WRID;\r
- ctx->wr.sg_list = &ctx->list;\r
- ctx->wr.num_sge = 1;\r
- ctx->wr.opcode = IBV_WR_SEND;\r
- ctx->wr.next = NULL;\r
\r
- /* receive */\r
- ctx->rwr.wr_id = PINGPONG_RECV_WRID;\r
- ctx->rwr.sg_list = &ctx->recv_list;\r
- ctx->rwr.num_sge = 1;\r
- ctx->rwr.next = NULL;\r
- ctx->recv_list.addr = (uintptr_t) ctx->buf;\r
- ctx->recv_list.lkey = ctx->mr->lkey;\r
+ ptr_to_run_iter_uni = (user_param.machine == CLIENT) ? &run_iter_uni_client : &run_iter_uni_server;\r
\r
- if (user_param.all == ALL) {\r
- if (user_param.connection_type == UD)\r
- size_max_pow = 12;\r
+ if (user_param.machine == CLIENT || user_param.duplex) {\r
+ set_send_wqe(ctx,size,rem_dest.qpn,&user_param);\r
+ if (noPeak != 2) noPeak = 0;\r
+ }\r
+ \r
+ if (all == ALL) {\r
+ if (user_param.connection_type == UD) {\r
+ if (user_param.gid_index < 0 || user_param.use_mcg) {\r
+ size_max_pow = 12;\r
+ } else {\r
+ size_max_pow = 11;\r
+ }\r
+ }\r
\r
for (i = 1; i < size_max_pow ; ++i) {\r
size = 1 << i;\r
if (user_param.duplex) {\r
- if(run_iter_bi(ctx, &user_param, rem_dest, size))\r
+ if(run_iter_bi(ctx,&user_param,size))\r
return 17;\r
} else {\r
- if(run_iter_uni(ctx, &user_param, rem_dest, size))\r
+ if((*ptr_to_run_iter_uni)(ctx,&user_param,size))\r
return 17;\r
}\r
- if (user_param.servername) {\r
- print_report(user_param.iters, size, user_param.duplex, tposted, tcompleted, noPeak);\r
- /* sync again for the sake of UC/UC */\r
- rem_dest = pp_client_exch_dest(sockfd, &my_dest);\r
- } else\r
- rem_dest = pp_server_exch_dest(sockfd, &my_dest);\r
+ print_report(user_param.iters, size, user_param.duplex, tposted, tcompleted, noPeak, no_cpu_freq_fail, user_param.machine); \r
+\r
+ if (ctx_hand_shake(&user_param,&my_dest,&rem_dest)) {\r
+ fprintf(stderr,"Failed to exchange date between server and clients\n");\r
+ return 1;\r
+ }\r
+ \r
}\r
+\r
} else {\r
if (user_param.duplex) {\r
- if (run_iter_bi(ctx, &user_param, rem_dest, size))\r
+ if(run_iter_bi(ctx,&user_param,size))\r
return 18;\r
- }\r
- else {\r
- if(run_iter_uni(ctx, &user_param, rem_dest, size))\r
+ } else {\r
+ if((*ptr_to_run_iter_uni)(ctx,&user_param,size))\r
return 18;\r
}\r
-\r
- if (user_param.servername)\r
- print_report(user_param.iters, size, user_param.duplex, tposted, tcompleted, noPeak);\r
+ printf("machine %d noPeak %d\n",user_param.machine, noPeak);\r
+ print_report(user_param.iters, size, user_param.duplex, tposted, tcompleted, noPeak, no_cpu_freq_fail, user_param.machine); \r
}\r
-\r
- /* close sockets */\r
- if (user_param.servername)\r
- rem_dest = pp_client_exch_dest(sockfd, &my_dest);\r
- else\r
- rem_dest = pp_server_exch_dest(sockfd, &my_dest);\r
-\r
- if (send(sockfd, "done", sizeof "done", 0) != sizeof "done"){\r
- perror("send");\r
- fprintf(stderr, "Couldn't send to socket\n");\r
+ \r
+ if (ctx_close_connection(&user_param,&my_dest,&rem_dest)) {\r
+ fprintf(stderr," Failed to close connection between server and client\n");\r
return 1;\r
}\r
- closesocket(sockfd);\r
-\r
- free(tposted);\r
- free(tcompleted);\r
\r
- printf("------------------------------------------------------------------\n");\r
+ if (!user_param.use_event)\r
+ destroy_ctx_resources(ctx,&user_param,&mcg_params); \r
+ \r
+ printf(RESULT_LINE);\r
return 0;\r
}\r
USE_NATIVE_EH = 1\r
USE_IOSTREAM = 1\r
\r
-SOURCES = send_lat.rc send_lat.c ..\perftest.c\r
+SOURCES = send_lat.rc send_lat.c ..\perftest_resources.c ..\multicast_resources.c\r
\r
-INCLUDES = ..;..\..\..\ulp\libibverbs\include;\\r
- ..\..\..\inc;..\..\..\inc\user;\\r
- ..\..\..\inc\user\linux;\r
-\r
+INCLUDES = ..;..\..\..\ulp\libibverbs\include;..\..\..\ulp\libibumad\include;\\r
+ ..\..\..\inc;..\..\..\inc\user;..\..\..\inc\user\linux;..\..\..\inc\complib;\\r
+ ..\..\..\hw\mlx4\user\hca;\r
TARGETLIBS = \\r
$(SDK_LIB_PATH)\kernel32.lib \\r
$(SDK_LIB_PATH)\advapi32.lib \\r
$(SDK_LIB_PATH)\user32.lib \\r
$(SDK_LIB_PATH)\ole32.lib \\r
$(SDK_LIB_PATH)\ws2_32.lib \\r
+ $(SDK_LIB_PATH)\uuid.lib \\r
!if $(FREEBUILD)\r
- $(TARGETPATH)\*\libibverbs.lib\r
+ $(TARGETPATH)\*\libibverbs.lib \\r
+ $(TARGETPATH)\*\libibumad.lib \\r
+ $(TARGETPATH)\*\complib.lib \\r
!else\r
- $(TARGETPATH)\*\libibverbsd.lib\r
-!endif\r
-\r
+ $(TARGETPATH)\*\libibverbsd.lib \\r
+ $(TARGETPATH)\*\libibumadd.lib \\r
+ $(TARGETPATH)\*\complibd.lib \\r
+!endif
\ No newline at end of file
-/*\r
- * Copyright (c) 2005 Topspin Communications. All rights reserved.\r
- * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.\r
- * Copyright (c) 2005 Hewlett Packard, Inc (Grant Grundler)\r
- * Copyright (c) 2008 Intel Corporation. All rights reserved.\r
- *\r
- * This software is available to you under the OpenIB.org BSD license\r
- * below:\r
- *\r
- * Redistribution and use in source and binary forms, with or\r
- * without modification, are permitted provided that the following\r
- * conditions are met:\r
- *\r
- * - Redistributions of source code must retain the above\r
- * copyright notice, this list of conditions and the following\r
- * disclaimer.\r
- *\r
- * - Redistributions in binary form must reproduce the above\r
- * copyright notice, this list of conditions and the following\r
- * disclaimer in the documentation and/or other materials\r
- * provided with the distribution.\r
- *\r
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV\r
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
- * SOFTWARE.\r
- */\r
-\r
-#include <stdio.h>\r
-#include <stdlib.h>\r
-#include <string.h>\r
-#include <ws2tcpip.h>\r
-#include <winsock2.h>\r
-#include <time.h>\r
-\r
-#include "..\..\..\etc\user\getopt.c"\r
-#include "perftest.h"\r
-#include <infiniband/verbs.h>\r
-\r
-#define PINGPONG_SEND_WRID 1\r
-#define PINGPONG_RECV_WRID 2\r
-#define RC 0\r
-#define UC 1\r
-#define UD 3\r
-#define VERSION 1.1\r
-#define SIGNAL 1\r
-#define MAX_INLINE 400\r
-#define MCG_LID 0xc001\r
-#define MCG_GID {255,1,0,0,0,2,201,133,0,0,0,0,0,0,0,0}\r
-\r
-typedef UINT64 cycles_t;\r
-cycles_t *tstamp;\r
-\r
-struct user_parameters {\r
- const char *servername;\r
- int connection_type;\r
- int mtu;\r
- int signal_comp;\r
- int all; /* run all msg size */\r
- int iters;\r
- int tx_depth;\r
- int use_event;\r
- int inline_size;\r
- int use_mcg;\r
-};\r
-\r
-struct report_options {\r
- int unsorted;\r
- int histogram;\r
- int cycles; /* report delta's in cycles, not microsec's */\r
-};\r
-\r
-struct pingpong_context {\r
- struct ibv_sge list;\r
- struct ibv_sge recv_list;\r
- struct ibv_send_wr wr;\r
- struct ibv_recv_wr rwr;\r
- struct ibv_context *context;\r
- struct ibv_comp_channel *channel;\r
- struct ibv_pd *pd;\r
- struct ibv_mr *mr;\r
- struct ibv_cq *scq;\r
- struct ibv_cq *rcq;\r
- struct ibv_qp *qp;\r
- struct ibv_ah *ah;\r
- void *buf;\r
- volatile char *post_buf;\r
- volatile char *poll_buf;\r
- int size;\r
- int tx_depth;\r
-};\r
-\r
-struct pingpong_dest {\r
- unsigned long long vaddr;\r
- int lid;\r
- int qpn;\r
- int psn;\r
- unsigned int rkey;\r
-};\r
-\r
-static uint16_t pp_get_local_lid(struct pingpong_context *ctx, int port)\r
-{\r
- struct ibv_port_attr attr;\r
-\r
- if (ibv_query_port(ctx->context, (uint8_t) port, &attr))\r
- return 0;\r
-\r
- return attr.lid;\r
-}\r
-\r
-static struct ibv_device *pp_find_dev(const char *ib_devname) {\r
- struct ibv_device **dev_list;\r
- struct ibv_device *ib_dev = NULL;\r
-\r
- dev_list = ibv_get_device_list(NULL);\r
- if (!dev_list)\r
- return NULL;\r
-\r
- if (!ib_devname) {\r
- ib_dev = dev_list[0];\r
- if (!ib_dev)\r
- fprintf(stderr, "No IB devices found\n");\r
- } else {\r
- for (; (ib_dev = *dev_list); ++dev_list)\r
- if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))\r
- break;\r
- if (!ib_dev)\r
- fprintf(stderr, "IB device %s not found\n", ib_devname);\r
- }\r
- return ib_dev;\r
-}\r
-\r
-#define KEY_MSG_SIZE (sizeof "0000:000000:000000:00000000:0000000000000000")\r
-#define KEY_PRINT_FMT "%04x:%06x:%06x:%08x:%016Lx"\r
-\r
-static int pp_write_keys(SOCKET sockfd, const struct pingpong_dest *my_dest)\r
-{\r
- char msg[KEY_MSG_SIZE];\r
-\r
- sprintf(msg, KEY_PRINT_FMT, my_dest->lid, my_dest->qpn,\r
- my_dest->psn, my_dest->rkey, my_dest->vaddr);\r
-\r
- if (send(sockfd, msg, sizeof msg, 0) != sizeof msg) {\r
- perror("client write");\r
- fprintf(stderr, "Couldn't send local address\n");\r
- return -1;\r
- }\r
-\r
- return 0;\r
-}\r
-\r
-static int pp_read_keys(SOCKET sockfd, const struct pingpong_dest *my_dest,\r
- struct pingpong_dest *rem_dest)\r
-{\r
- int parsed;\r
- char msg[KEY_MSG_SIZE];\r
-\r
- if (recv(sockfd, msg, sizeof msg, 0) != sizeof msg) {\r
- perror("pp_read_keys");\r
- fprintf(stderr, "Couldn't read remote address\n");\r
- return -1;\r
- }\r
-\r
- memset(rem_dest, 0, sizeof *rem_dest);\r
- parsed = sscanf(msg, KEY_PRINT_FMT, &rem_dest->lid, &rem_dest->qpn,\r
- &rem_dest->psn, &rem_dest->rkey, &rem_dest->vaddr);\r
-\r
- if (parsed != 5) {\r
- fprintf(stderr, "Couldn't parse line <%.*s>\n",\r
- (int)sizeof msg, msg);\r
- return -1;\r
- }\r
-\r
- return 0;\r
-}\r
-\r
-static int pp_client_exch_dest(SOCKET sockfd, const struct pingpong_dest *my_dest,\r
- struct pingpong_dest *rem_dest)\r
-{\r
- if (pp_write_keys(sockfd, my_dest))\r
- return -1;\r
-\r
- return pp_read_keys(sockfd, my_dest, rem_dest);\r
-}\r
-\r
-static int pp_server_exch_dest(SOCKET sockfd, const struct pingpong_dest *my_dest,\r
- struct pingpong_dest* rem_dest)\r
-{\r
-\r
- if (pp_read_keys(sockfd, my_dest, rem_dest))\r
- return -1;\r
-\r
- return pp_write_keys(sockfd, my_dest);\r
-}\r
-\r
-static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,\r
- int tx_depth, int port,struct user_parameters *user_parm) {\r
- struct pingpong_context *ctx;\r
- struct ibv_device_attr device_attr;\r
-\r
- ctx = malloc(sizeof *ctx);\r
- if (!ctx)\r
- return NULL;\r
-\r
- ctx->size = size;\r
- ctx->tx_depth = tx_depth;\r
- /* in case of UD need space for the GRH */\r
- if (user_parm->connection_type==UD) {\r
- ctx->buf = malloc(( size + 40 ) * 2);\r
- if (!ctx->buf) {\r
- fprintf(stderr, "Couldn't allocate work buf.\n");\r
- return NULL;\r
- }\r
- memset(ctx->buf, 0, ( size + 40 ) * 2);\r
- } else {\r
- ctx->buf = malloc(size * 2);\r
- if (!ctx->buf) {\r
- fprintf(stderr, "Couldn't allocate work buf.\n");\r
- return NULL;\r
- }\r
- memset(ctx->buf, 0, size * 2);\r
- }\r
-\r
- ctx->post_buf = (char*)ctx->buf + (size - 1);\r
- ctx->poll_buf = (char*)ctx->buf + (2 * size - 1);\r
-\r
- ctx->context = ibv_open_device(ib_dev);\r
- if (!ctx->context) {\r
- fprintf(stderr, "Couldn't get context for %s\n",\r
- ibv_get_device_name(ib_dev));\r
- return NULL;\r
- }\r
- if (user_parm->mtu == 0) {/*user did not ask for specific mtu */\r
- if (ibv_query_device(ctx->context, &device_attr)) {\r
- fprintf(stderr, "Failed to query device props");\r
- return NULL;\r
- }\r
- if (device_attr.vendor_part_id == 23108) {\r
- user_parm->mtu = 1024;\r
- } else {\r
- user_parm->mtu = 2048;\r
- }\r
- }\r
- if (user_parm->use_event) {\r
- ctx->channel = ibv_create_comp_channel(ctx->context);\r
- if (!ctx->channel) {\r
- fprintf(stderr, "Couldn't create completion channel\n");\r
- return NULL;\r
- }\r
- } else\r
- ctx->channel = NULL;\r
- ctx->pd = ibv_alloc_pd(ctx->context);\r
- if (!ctx->pd) {\r
- fprintf(stderr, "Couldn't allocate PD\n");\r
- return NULL;\r
- }\r
- if (user_parm->connection_type==UD) {\r
- ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, (size + 40 ) * 2,\r
- IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);\r
- if (!ctx->mr) {\r
- fprintf(stderr, "Couldn't allocate MR\n");\r
- return NULL;\r
- }\r
- } else {\r
- ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size * 2,\r
- IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);\r
- if (!ctx->mr) {\r
- fprintf(stderr, "Couldn't allocate MR\n");\r
- return NULL;\r
- }\r
- }\r
-\r
- ctx->scq = ibv_create_cq(ctx->context, tx_depth, NULL, ctx->channel, 0);\r
- if (!ctx->scq) {\r
- fprintf(stderr, "Couldn't create CQ\n");\r
- return NULL;\r
- }\r
- ctx->rcq = ibv_create_cq(ctx->context, tx_depth, NULL, ctx->channel, 0);\r
- if (!ctx->rcq) {\r
- fprintf(stderr, "Couldn't create Receive CQ\n");\r
- return NULL;\r
- }\r
- {\r
- struct ibv_qp_init_attr attr;\r
- memset(&attr, 0, sizeof(struct ibv_qp_init_attr));\r
- attr.send_cq = ctx->scq;\r
- attr.recv_cq = ctx->rcq;\r
- attr.cap.max_send_wr = tx_depth;\r
- /* Work around: driver doesnt support\r
- * recv_wr = 0 */\r
- attr.cap.max_recv_wr = tx_depth;\r
- attr.cap.max_send_sge = 1;\r
- attr.cap.max_recv_sge = 1;\r
- attr.cap.max_inline_data = user_parm->inline_size;\r
- switch (user_parm->connection_type) {\r
- case RC :\r
- attr.qp_type = IBV_QPT_RC;\r
- break;\r
- case UC :\r
- attr.qp_type = IBV_QPT_UC;\r
- break;\r
- case UD :\r
- attr.qp_type = IBV_QPT_UD;\r
- break;\r
- default:\r
- fprintf(stderr, "Unknown connection type %d \n",user_parm->connection_type);\r
- return NULL;\r
- }\r
- attr.sq_sig_all = 0;\r
- ctx->qp = ibv_create_qp(ctx->pd, &attr);\r
- if (!ctx->qp) {\r
- fprintf(stderr, "Couldn't create QP\n");\r
- return NULL;\r
- }\r
-\r
- if ((user_parm->connection_type==UD) && (user_parm->use_mcg)) {\r
- union ibv_gid gid;\r
- uint8_t mcg_gid[16] = MCG_GID;\r
-\r
- /* use the local QP number as part of the mcg */\r
- mcg_gid[11] = (user_parm->servername) ? 0 : 1;\r
- *(uint32_t *)(&mcg_gid[12]) = ctx->qp->qp_num;\r
- memcpy(gid.raw, mcg_gid, 16);\r
-\r
- if (ibv_attach_mcast(ctx->qp, &gid, MCG_LID)) {\r
- fprintf(stderr, "Couldn't attach QP to mcg\n");\r
- return NULL;\r
- }\r
- }\r
- }\r
-\r
- {\r
- struct ibv_qp_attr attr;\r
- memset(&attr, 0, sizeof(struct ibv_qp_init_attr));\r
- attr.qp_state = IBV_QPS_INIT;\r
- attr.pkey_index = 0;\r
- attr.port_num = (uint8_t) port;\r
- if (user_parm->connection_type==UD) {\r
- attr.qkey = 0x11111111;\r
- } else {\r
- attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE;\r
- }\r
-\r
- if (user_parm->connection_type==UD) {\r
- if (ibv_modify_qp(ctx->qp, &attr,\r
- IBV_QP_STATE |\r
- IBV_QP_PKEY_INDEX |\r
- IBV_QP_PORT |\r
- IBV_QP_QKEY)) {\r
- fprintf(stderr, "Failed to modify UD QP to INIT\n");\r
- return NULL;\r
- }\r
- } else if (ibv_modify_qp(ctx->qp, &attr,\r
- IBV_QP_STATE |\r
- IBV_QP_PKEY_INDEX |\r
- IBV_QP_PORT |\r
- IBV_QP_ACCESS_FLAGS)) {\r
- fprintf(stderr, "Failed to modify QP to INIT\n");\r
- return NULL;\r
- }\r
- }\r
- //send \r
- ctx->wr.wr_id = PINGPONG_SEND_WRID;\r
- ctx->wr.sg_list = &ctx->list;\r
- ctx->wr.num_sge = 1;\r
- ctx->wr.opcode = IBV_WR_SEND;\r
- ctx->wr.next = NULL;\r
- //receive\r
- ctx->rwr.wr_id = PINGPONG_RECV_WRID;\r
- ctx->rwr.sg_list = &ctx->recv_list;\r
- ctx->rwr.num_sge = 1;\r
- ctx->rwr.next = NULL;\r
- return ctx;\r
-}\r
-\r
-static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,\r
- struct pingpong_dest *dest,struct user_parameters *user_parm)\r
-{\r
- struct ibv_qp_attr attr;\r
- memset(&attr, 0, sizeof(struct ibv_qp_attr));\r
- attr.qp_state = IBV_QPS_RTR;\r
- if (user_parm->connection_type != UD) {\r
- switch (user_parm->mtu) {\r
- case 256 : \r
- attr.path_mtu = IBV_MTU_256;\r
- break;\r
- case 512 :\r
- attr.path_mtu = IBV_MTU_512;\r
- break;\r
- case 1024 :\r
- attr.path_mtu = IBV_MTU_1024;\r
- break;\r
- case 2048 :\r
- attr.path_mtu = IBV_MTU_2048;\r
- break;\r
- case 4096 :\r
- attr.path_mtu = IBV_MTU_4096;\r
- break;\r
- }\r
- printf("Mtu : %d\n", user_parm->mtu);\r
- attr.dest_qp_num = dest->qpn;\r
- attr.rq_psn = dest->psn;\r
- }\r
- if (user_parm->connection_type==RC) {\r
- attr.max_dest_rd_atomic = 1;\r
- attr.min_rnr_timer = 12;\r
- }\r
-\r
- attr.ah_attr.is_global = 0;\r
- attr.ah_attr.dlid = (uint16_t) dest->lid;\r
- attr.ah_attr.sl = 0;\r
- attr.ah_attr.src_path_bits = 0;\r
- attr.ah_attr.port_num = (uint8_t) port;\r
- if ((user_parm->connection_type==UD) && (user_parm->use_mcg)) {\r
- uint8_t mcg_gid[16] = MCG_GID;\r
-\r
- /* send the message to the mcg of the other side */\r
- mcg_gid[11] = (user_parm->servername) ? 1 : 0;\r
- *(uint32_t *)(&mcg_gid[12]) = dest->qpn;\r
-\r
- attr.ah_attr.dlid = MCG_LID;\r
- attr.ah_attr.is_global = 1;\r
- attr.ah_attr.grh.sgid_index = 0;\r
- memcpy(attr.ah_attr.grh.dgid.raw, mcg_gid, 16);\r
- } else {\r
- attr.ah_attr.dlid = (uint16_t) dest->lid;\r
- attr.ah_attr.is_global = 0;\r
- }\r
-\r
- if (user_parm->connection_type==RC) {\r
- if (ibv_modify_qp(ctx->qp, &attr,\r
- IBV_QP_STATE |\r
- IBV_QP_AV |\r
- IBV_QP_PATH_MTU |\r
- IBV_QP_DEST_QPN |\r
- IBV_QP_RQ_PSN |\r
- IBV_QP_MIN_RNR_TIMER |\r
- IBV_QP_MAX_DEST_RD_ATOMIC)) {\r
- fprintf(stderr, "Failed to modify RC QP to RTR\n");\r
- return 1;\r
- }\r
- attr.timeout = 14;\r
- attr.retry_cnt = 7;\r
- attr.rnr_retry = 7;\r
- } else if (user_parm->connection_type==UC) {\r
- if (ibv_modify_qp(ctx->qp, &attr,\r
- IBV_QP_STATE |\r
- IBV_QP_AV |\r
- IBV_QP_PATH_MTU |\r
- IBV_QP_DEST_QPN |\r
- IBV_QP_RQ_PSN)) {\r
- fprintf(stderr, "Failed to modify UC QP to RTR\n");\r
- return 1;\r
- }\r
-\r
- } else {\r
- if (ibv_modify_qp(ctx->qp, &attr,\r
- IBV_QP_STATE )) {\r
- fprintf(stderr, "Failed to modify UC QP to RTR\n");\r
- return 1;\r
- }\r
-\r
- }\r
- attr.qp_state = IBV_QPS_RTS;\r
- attr.sq_psn = my_psn;\r
- if (user_parm->connection_type==RC) {\r
- attr.max_rd_atomic = 1;\r
- if (ibv_modify_qp(ctx->qp, &attr,\r
- IBV_QP_STATE |\r
- IBV_QP_SQ_PSN |\r
- IBV_QP_TIMEOUT |\r
- IBV_QP_RETRY_CNT |\r
- IBV_QP_RNR_RETRY |\r
- IBV_QP_MAX_QP_RD_ATOMIC)) {\r
- fprintf(stderr, "Failed to modify RC QP to RTS\n");\r
- return 1;\r
- }\r
- } else { /*both UC and UD */\r
- if (ibv_modify_qp(ctx->qp, &attr,\r
- IBV_QP_STATE |\r
- IBV_QP_SQ_PSN)) {\r
- fprintf(stderr, "Failed to modify UC/UD QP to RTS\n");\r
- return 1;\r
- }\r
-\r
- }\r
- if (user_parm->connection_type==UD) {\r
- ctx->ah = ibv_create_ah(ctx->pd, &attr.ah_attr);\r
- if (!ctx->ah) {\r
- fprintf(stderr, "Failed to create AH for UD\n");\r
- return 1;\r
- }\r
- }\r
- /* post receive max msg size*/\r
- {\r
- int i;\r
- struct ibv_recv_wr *bad_wr_recv;\r
-\r
- ctx->recv_list.addr = (uintptr_t) ctx->buf;\r
- if (user_parm->connection_type==UD) {\r
- ctx->recv_list.length = ctx->size + 40;\r
- } else {\r
- ctx->recv_list.length = ctx->size;\r
- }\r
- ctx->recv_list.lkey = ctx->mr->lkey;\r
- for (i = 0; i < user_parm->tx_depth / 2; ++i) {\r
- if (ibv_post_recv(ctx->qp, &ctx->rwr, &bad_wr_recv)) {\r
- fprintf(stderr, "Couldn't post recv: counter=%d\n",\r
- i);\r
- return 14;\r
- } \r
- }\r
- }\r
- return 0;\r
-}\r
-\r
-static int pp_open_port(struct pingpong_context *ctx, const char * servername,\r
- int ib_port, int port, struct pingpong_dest *rem_dest,struct user_parameters *user_parm)\r
-{\r
- char addr_fmt[] = "%8s address: LID %#04x QPN %#06x PSN %#06x\n";\r
- struct pingpong_dest my_dest;\r
- SOCKET sockfd;\r
- int rc;\r
-\r
- /* Create connection between client and server.\r
- * We do it by exchanging data over a TCP socket connection. */\r
-\r
- my_dest.lid = pp_get_local_lid(ctx, ib_port);\r
- my_dest.qpn = ctx->qp->qp_num;\r
- my_dest.psn = rand() & 0xffffff;\r
- if (!my_dest.lid) {\r
- fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n");\r
- return -1;\r
- }\r
- my_dest.rkey = ctx->mr->rkey;\r
- my_dest.vaddr = (uintptr_t)ctx->buf + ctx->size;\r
-\r
- printf(addr_fmt, "local", my_dest.lid, my_dest.qpn, my_dest.psn);\r
-\r
- sockfd = servername ? pp_client_connect(servername, port) :\r
- pp_server_connect(port);\r
-\r
- if (sockfd == INVALID_SOCKET) {\r
- printf("pp_connect_sock(%s,%d) failed (%d)!\n",\r
- servername, port, sockfd);\r
- return (int) sockfd;\r
- }\r
-\r
- rc = servername ? pp_client_exch_dest(sockfd, &my_dest, rem_dest) :\r
- pp_server_exch_dest(sockfd, &my_dest, rem_dest);\r
- if (rc)\r
- return rc;\r
-\r
- printf(addr_fmt, "remote", rem_dest->lid, rem_dest->qpn, rem_dest->psn,\r
- rem_dest->rkey, rem_dest->vaddr);\r
-\r
- if ((rc = pp_connect_ctx(ctx, ib_port, my_dest.psn, rem_dest,user_parm)))\r
- return rc;\r
-\r
- /* An additional handshake is required *after* moving qp to RTR.\r
- * Arbitrarily reuse exch_dest for this purpose.\r
- */\r
-\r
- rc = servername ? pp_client_exch_dest(sockfd, &my_dest, rem_dest) :\r
- pp_server_exch_dest(sockfd, &my_dest, rem_dest);\r
-\r
- if (rc)\r
- return rc;\r
-\r
- if (send(sockfd, "done", sizeof "done", 0) != sizeof "done"){\r
- perror("write");\r
- fprintf(stderr, "Couldn't write to socket\n");\r
- return 1;\r
- }\r
-\r
- closesocket(sockfd);\r
- return 0;\r
-}\r
-\r
-static void usage(const char *argv0)\r
-{\r
- printf("Usage:\n");\r
- printf(" %s start a server and wait for connection\n", argv0);\r
- printf(" %s -h <host> connect to server at <host>\n", argv0);\r
- printf("\n");\r
- printf("Options:\n");\r
- printf(" -p <port> listen on/connect to port <port> (default 18515)\n");\r
- printf(" -c <RC/UC/UD> connection type RC/UC/UD (default RC)\n");\r
- printf(" -m <mtu> mtu size (256 - 4096. default for hermon is 2048)\n");\r
- printf(" -d <dev> use IB device <dev> (default first device found)\n");\r
- printf(" -i <port> use port <port> of IB device (default 1)\n");\r
- printf(" -s <size> size of message to exchange (default 1)\n");\r
- printf(" -t <dep> size of tx queue (default 50)\n");\r
- printf(" -l signal completion on each msg\n");\r
- printf(" -a Run sizes from 2 till 2^23\n");\r
- printf(" -n <iters> number of exchanges (at least 2, default 1000)\n");\r
- printf(" -I <size> max size of message to be sent in inline mode (default 400)\n");\r
- printf(" -C report times in cpu cycle units (default microseconds)\n");\r
- printf(" -H print out all results (default print summary only)\n");\r
- printf(" -U (implies -H) print out unsorted results (default sorted)\n");\r
- printf(" -V display version number\n");\r
- printf(" -e sleep on CQ events (default poll)\n");\r
- printf(" -g send messages to multicast group(only available in UD connection\n");\r
-}\r
-\r
-static void print_report(struct report_options * options,\r
- unsigned int iters, cycles_t *tstamp,int size)\r
-{\r
- cycles_t cycles_to_units;\r
- cycles_t median;\r
- unsigned int i;\r
- const char* units;\r
- cycles_t *delta = malloc((iters - 1) * sizeof *delta);\r
-\r
- if (!delta) {\r
- perror("malloc");\r
- return;\r
- }\r
-\r
- for (i = 0; i < iters - 1; ++i)\r
- delta[i] = tstamp[i + 1] - tstamp[i];\r
-\r
- if (options->cycles) {\r
- cycles_to_units = 1;\r
- units = "cycles";\r
- } else {\r
- cycles_to_units = get_freq();\r
- units = "sec";\r
- }\r
-\r
- if (options->unsorted) {\r
- printf("#, %s, frequency=%I64d\n", units, get_freq());\r
- for (i = 0; i < iters - 1; ++i)\r
- printf("%d, %g\n", i + 1, delta[i] / cycles_to_units / 2. * 1000000.);\r
- }\r
-\r
- qsort(delta, iters - 1, sizeof *delta, cycles_compare);\r
-\r
- if (options->histogram) {\r
- printf("#, %s\n", units);\r
- for (i = 0; i < iters - 1; ++i)\r
- printf("%d, %7.2f\n", i + 1, (double) delta[i] / (double) cycles_to_units / 2. * 1000000.);\r
- }\r
-\r
- median = get_median(iters - 1, delta);\r
- printf("%7d %d %7.2f %7.2f %7.2f\n",\r
- size, iters, (double) delta[0] / (double) cycles_to_units / 2. * 1000000.,\r
- (double) delta[iters - 2] / (double) cycles_to_units / 2. * 1000000.,\r
- (double) median / (double) cycles_to_units / 2. * 1000000.);\r
- free(delta);\r
-}\r
-\r
-static int run_iter(struct pingpong_context *ctx, struct user_parameters *user_param,\r
- struct pingpong_dest *rem_dest, int size)\r
-{\r
- struct ibv_qp *qp;\r
- struct ibv_send_wr *wr, *bad_wr;\r
- struct ibv_recv_wr rwr;\r
- struct ibv_recv_wr *bad_wr_recv;\r
- volatile char *poll_buf; \r
- volatile char *post_buf;\r
- int scnt, rcnt, ccnt, poll;\r
- int iters;\r
- int tx_depth;\r
- iters = user_param->iters;\r
- tx_depth = user_param->tx_depth;\r
-\r
- if (user_param->connection_type==UD) {\r
- if (size > 2048) {\r
- size = 2048;\r
- }\r
- }\r
-\r
- ///send //\r
- wr = &ctx->wr;\r
- if (user_param->connection_type==UD) {\r
- ctx->list.addr = (uintptr_t) ctx->buf + 40;\r
- } else {\r
- ctx->list.addr = (uintptr_t) ctx->buf;\r
- }\r
- ctx->list.length = size;\r
- ctx->list.lkey = ctx->mr->lkey;\r
- if (user_param->connection_type==UD) {\r
- ctx->wr.wr.ud.ah = ctx->ah;\r
- ctx->wr.wr.ud.remote_qpn = rem_dest->qpn;\r
- ctx->wr.wr.ud.remote_qkey = 0x11111111;\r
- if (user_param->use_mcg) {\r
- ctx->wr.wr.ud.remote_qpn = 0xffffff;\r
- } else {\r
- ctx->wr.wr.ud.remote_qpn = rem_dest->qpn;\r
- }\r
- }\r
- /// receive //\r
- rwr = ctx->rwr;\r
- ctx->recv_list.addr = (uintptr_t) ctx->buf;\r
- if (user_param->connection_type==UD) {\r
- ctx->recv_list.length = ctx->size + 40;\r
- } else {\r
- ctx->recv_list.length = ctx->size;\r
- }\r
-\r
- ctx->recv_list.lkey = ctx->mr->lkey;\r
-\r
- scnt = 0;\r
- rcnt = 0;\r
- ccnt = 0;\r
- poll = 0;\r
- poll_buf = ctx->poll_buf;\r
- post_buf = ctx->post_buf;\r
- qp = ctx->qp;\r
- if (size > user_param->inline_size || size == 0) {/* complaince to perf_main don't signal*/\r
- ctx->wr.send_flags = 0;\r
- } else {\r
- ctx->wr.send_flags = IBV_SEND_INLINE;\r
- }\r
-\r
- while (scnt < iters || rcnt < iters) {\r
- if (rcnt < iters && !(scnt < 1 && user_param->servername)) {\r
- int ne;\r
- struct ibv_wc wc;\r
- /*Server is polling on receive first */\r
- ++rcnt;\r
- if (ibv_post_recv(qp, &rwr, &bad_wr_recv)) {\r
- fprintf(stderr, "Couldn't post recv: rcnt=%d\n",\r
- rcnt);\r
- return 15;\r
- }\r
- if (user_param->use_event) {\r
- struct ibv_cq *ev_cq;\r
- void *ev_ctx;\r
-\r
- if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {\r
- fprintf(stderr, "Failed to get receive cq_event\n");\r
- return 1;\r
- }\r
-\r
- if (ev_cq != ctx->rcq) {\r
- fprintf(stderr, "CQ event for unknown RCQ %p\n", ev_cq);\r
- return 1;\r
- }\r
-\r
- if (ibv_req_notify_cq(ctx->rcq, 0)) {\r
- fprintf(stderr, "Couldn't request RCQ notification\n");\r
- return 1;\r
- }\r
- }\r
- do {\r
- ne = ibv_poll_cq(ctx->rcq, 1, &wc);\r
- } while (!user_param->use_event && ne < 1);\r
-\r
- if (ne < 0) {\r
- fprintf(stderr, "Poll Receive CQ failed %d\n", ne);\r
- return 12;\r
- }\r
- if (wc.status != IBV_WC_SUCCESS) {\r
- fprintf(stderr, "Receive Completion wth error at %s:\n",\r
- user_param->servername ? "client" : "server");\r
- fprintf(stderr, "Failed status %d: wr_id %d\n",\r
- wc.status, (int) wc.wr_id);\r
- fprintf(stderr, "scnt=%d, rcnt=%d, ccnt=%d\n",\r
- scnt, rcnt, ccnt);\r
- return 13;\r
- }\r
- }\r
- if (scnt < iters ) {\r
- if (ccnt == (tx_depth - 2) || (user_param->signal_comp == SIGNAL)\r
- || (scnt == (iters - 1)) ) {\r
- ccnt = 0;\r
- poll=1;\r
- if (size > user_param->inline_size || size == 0) {/* complaince to perf_main */\r
- ctx->wr.send_flags = IBV_SEND_SIGNALED;\r
- } else {\r
- ctx->wr.send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE;\r
- }\r
-\r
- }\r
- /* client post first */\r
- if (user_param->servername)\r
- tstamp[scnt] = get_cycles();\r
- *post_buf = (char)++scnt;\r
- if (ibv_post_send(qp, wr, &bad_wr)) {\r
- fprintf(stderr, "Couldn't post send: scnt=%d\n",\r
- scnt);\r
- return 11;\r
- }\r
- }\r
- if (poll == 1) {\r
- struct ibv_wc wc;\r
- int ne;\r
- if (user_param->use_event) {\r
- struct ibv_cq *ev_cq;\r
- void *ev_ctx;\r
-\r
- if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {\r
- fprintf(stderr, "Failed to get send cq_event\n");\r
- return 1;\r
- }\r
-\r
- if (ev_cq != ctx->scq) {\r
- fprintf(stderr, "CQ event for unknown SCQ %p\n", ev_cq);\r
- return 1;\r
- }\r
-\r
- if (ibv_req_notify_cq(ctx->scq, 0)) {\r
- fprintf(stderr, "Couldn't request SCQ notification\n");\r
- return 1;\r
- }\r
- }\r
- /* poll on scq */\r
- do {\r
- ne = ibv_poll_cq(ctx->scq, 1, &wc);\r
- } while (!user_param->use_event && ne < 1);\r
-\r
- if (ne < 0) {\r
- fprintf(stderr, "poll SCQ failed %d\n", ne);\r
- return 12;\r
- }\r
- if (wc.status != IBV_WC_SUCCESS) {\r
- fprintf(stderr, "Completion wth error at %s:\n",\r
- user_param->servername ? "client" : "server");\r
- fprintf(stderr, "Failed status %d: wr_id %d\n",\r
- wc.status, (int) wc.wr_id);\r
- fprintf(stderr, "scnt=%d, rcnt=%d, ccnt=%d\n",\r
- scnt, rcnt, ccnt);\r
- return 13;\r
- }\r
- poll = 0;\r
- if (size > user_param->inline_size || size == 0) {/* complaince to perf_main don't signal*/\r
- ctx->wr.send_flags = 0;\r
- } else {\r
- ctx->wr.send_flags = IBV_SEND_INLINE;\r
- }\r
-\r
- }\r
- ++ccnt;\r
- }\r
-\r
- return(0);\r
-}\r
-\r
-int __cdecl main(int argc, char *argv[])\r
-{\r
- const char *ib_devname = NULL;\r
- int port = 18515;\r
- int ib_port = 1;\r
- int size = 2;\r
- int i = 0;\r
- int size_max_pow = 24;\r
- struct report_options report;\r
- struct pingpong_context *ctx;\r
- struct pingpong_dest rem_dest;\r
- struct ibv_device *ib_dev;\r
- struct user_parameters user_param;\r
- WORD version;\r
- WSADATA data;\r
- int err;\r
-\r
- srand((unsigned int) time(NULL));\r
- version = MAKEWORD(2, 2);\r
- err = WSAStartup(version, &data);\r
- if (err)\r
- return -1;\r
-\r
- /* init default values to user's parameters */\r
- memset(&report, 0, sizeof report); \r
- memset(&user_param, 0, sizeof(struct user_parameters));\r
- user_param.mtu = 0;\r
- user_param.iters = 1000;\r
- user_param.tx_depth = 50;\r
- user_param.servername = NULL;\r
- user_param.use_event = 0;\r
- user_param.use_mcg = 0;\r
- user_param.inline_size = MAX_INLINE;\r
- user_param.signal_comp = 0;\r
-\r
- /* Parameter parsing. */\r
- while (1) {\r
- int c;\r
-\r
- c = getopt(argc, argv, "h:p:c:m:d:i:s:n:t:I:laeCHUVg");\r
- if (c == -1)\r
- break;\r
-\r
- switch (c) {\r
- case 'p':\r
- port = strtol(optarg, NULL, 0);\r
- if (port < 0 || port > 65535) {\r
- usage(argv[0]);\r
- return 1;\r
- }\r
- break;\r
- case 'c':\r
- if (strcmp("UC",optarg)==0)\r
- user_param.connection_type=UC;\r
- if (strcmp("UD",optarg)==0)\r
- user_param.connection_type=UD;\r
- /* default is 0 for any other option RC*/\r
- break;\r
- case 'e':\r
- ++user_param.use_event;\r
- break;\r
- case 'g':\r
- ++user_param.use_mcg;\r
- break;\r
- case 'm':\r
- user_param.mtu = strtol(optarg, NULL, 0);\r
- break;\r
- case 'l':\r
- user_param.signal_comp = SIGNAL;\r
- break;\r
- case 'a':\r
- user_param.all = SIGNAL;\r
- break;\r
- case 'V':\r
- printf("perftest version : %.2f\n",VERSION);\r
- return 0;\r
- case 'd':\r
- ib_devname = _strdup(optarg);\r
- break;\r
-\r
- case 'i':\r
- ib_port = strtol(optarg, NULL, 0);\r
- if (ib_port < 0) {\r
- usage(argv[0]);\r
- return 2;\r
- }\r
- break;\r
-\r
- case 's':\r
- size = strtol(optarg, NULL, 0);\r
- if (size < 1) {\r
- usage(argv[0]); return 3;\r
- }\r
- break;\r
-\r
- case 't':\r
- user_param.tx_depth = strtol(optarg, NULL, 0);\r
- if (user_param.tx_depth < 1) {\r
- usage(argv[0]); return 4;\r
- }\r
- break;\r
-\r
- case 'I':\r
- user_param.inline_size = strtol(optarg, NULL, 0);\r
- if (user_param.inline_size > MAX_INLINE) {\r
- usage(argv[0]);\r
- return 19;\r
- }\r
- break;\r
-\r
- case 'n':\r
- user_param.iters = strtol(optarg, NULL, 0);\r
- if (user_param.iters < 2) {\r
- usage(argv[0]);\r
- return 5;\r
- }\r
-\r
- break;\r
-\r
- case 'C':\r
- report.cycles = 1;\r
- break;\r
-\r
- case 'H':\r
- report.histogram = 1;\r
- break;\r
-\r
- case 'U':\r
- report.unsorted = 1;\r
- break;\r
-\r
- case 'h':\r
- if (optarg) {\r
- user_param.servername = _strdup(optarg);\r
- break;\r
- }\r
-\r
- default:\r
- usage(argv[0]);\r
- return 5;\r
- }\r
- }\r
-\r
- /*\r
- * Done with parameter parsing. Perform setup.\r
- */\r
- tstamp = malloc(user_param.iters * sizeof *tstamp);\r
- if (!tstamp) {\r
- perror("malloc");\r
- return 10;\r
- }\r
- /* Print header data */\r
- printf("------------------------------------------------------------------\n");\r
- if (user_param.use_mcg && (user_param.connection_type == UD))\r
- printf(" Send Latency Multicast Test\n");\r
- else\r
- printf(" Send Latency Test\n");\r
- printf("Inline data is used up to %d bytes message\n", user_param.inline_size);\r
- if (user_param.connection_type==RC) {\r
- printf("Connection type : RC\n");\r
- } else if (user_param.connection_type==UC) { \r
- printf("Connection type : UC\n");\r
- } else {\r
- printf("Connection type : UD\n");\r
- }\r
- if (user_param.all == 1) {\r
- /*since we run all sizes lets allocate big enough buffer */\r
- size = 8388608; /*2^23 */\r
- }\r
-\r
- ib_dev = pp_find_dev(ib_devname);\r
- if (!ib_dev)\r
- return 7;\r
-\r
- ctx = pp_init_ctx(ib_dev, size, user_param.tx_depth, ib_port,&user_param);\r
- if (!ctx)\r
- return 8;\r
-\r
- if (pp_open_port(ctx, user_param.servername, ib_port, port, &rem_dest,&user_param))\r
- return 9;\r
- if (user_param.use_event) {\r
- printf("Test with events.\n");\r
- if (ibv_req_notify_cq(ctx->rcq, 0)) {\r
- fprintf(stderr, "Couldn't request RCQ notification\n");\r
- return 1;\r
- } \r
- if (ibv_req_notify_cq(ctx->scq, 0)) {\r
- fprintf(stderr, "Couldn't request SCQ notification\n");\r
- return 1;\r
- }\r
-\r
- }\r
- printf("------------------------------------------------------------------\n");\r
- printf(" #bytes #iterations t_min[usec] t_max[usec] t_typical[usec]\n");\r
- \r
- if (user_param.all == 1) {\r
- if (user_param.connection_type==UD) {\r
- size_max_pow = 12;\r
- }\r
- for (i = 1; i < size_max_pow ; ++i) {\r
- size = 1 << i;\r
- if(run_iter(ctx, &user_param, &rem_dest, size))\r
- return 17;\r
-\r
- print_report(&report, user_param.iters, tstamp, size);\r
- }\r
- } else {\r
- if(run_iter(ctx, &user_param, &rem_dest, size))\r
- return 18; \r
- print_report(&report, user_param.iters, tstamp, size);\r
- }\r
- printf("------------------------------------------------------------------\n");\r
- free(tstamp);\r
- return 0;\r
-}\r
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2006 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2008-2009 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under the OpenIB.org BSD license
+ * below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <string.h>
+#include <malloc.h>
+#include <getopt.h>
+#include <time.h>
+#include <infiniband/verbs.h>
+#include <windows.h>
+
+#include <ws2tcpip.h>
+#include <winsock2.h>
+
+
+#include "..\..\tools\perftests\user\get_clock.h"
+#include "..\..\etc\user\getopt.c"
+#include "perftest_resources.h"
+#include "multicast_resources.h"
+#include "l2w.h"
+
+#define VERSION 1.3
+#define SIGNAL 1
+#define MAX_INLINE 400
+
+static int page_size;
+cycles_t *tstamp;
+
+struct report_options {
+ int unsorted;
+ int histogram;
+ int cycles; /* report delta's in cycles, not microsec's */
+};
+
+struct pingpong_context {
+ struct ibv_sge list;
+ struct ibv_sge recv_list;
+ struct ibv_send_wr wr;
+ struct ibv_recv_wr rwr;
+ struct ibv_context *context;
+ struct ibv_comp_channel *channel;
+ struct ibv_pd *pd;
+ struct ibv_mr *mr;
+ struct ibv_cq *rcq;
+ struct ibv_cq *scq;
+ struct ibv_qp **qp;
+ struct ibv_ah *ah;
+ void *buf;
+ int size;
+};
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+static int set_mcast_group(struct pingpong_context *ctx,
+ struct perftest_parameters *user_parm,
+ struct mcast_parameters *mcg_params) {
+
+ int i;
+ struct ibv_port_attr port_attr;
+
+ if (ibv_query_gid(ctx->context,user_parm->ib_port,user_parm->gid_index,&mcg_params->port_gid)) {
+ return 1;
+ }
+
+ if (ibv_query_pkey(ctx->context,user_parm->ib_port,DEF_PKEY_IDX,&mcg_params->pkey)) {
+ return 1;
+ }
+
+ if (ibv_query_port(ctx->context,user_parm->ib_port,&port_attr)) {
+ return 1;
+ }
+ mcg_params->sm_lid = port_attr.sm_lid;
+ mcg_params->sm_sl = port_attr.sm_sl;
+ mcg_params->ib_port = user_parm->ib_port;
+ set_multicast_gid(mcg_params);
+
+ // Request for Mcast group create registery in SM.
+ if (join_multicast_group(SUBN_ADM_METHOD_SET,mcg_params)) {
+ fprintf(stderr,"Bla Bla ....\n");
+ return 1;
+ }
+
+ for (i=0; i < user_parm->num_of_qps; i++) {
+
+ if (ibv_attach_mcast(ctx->qp[i],&mcg_params->mgid,mcg_params->mlid)) {
+ fprintf(stderr, "Couldn't attach QP to MultiCast group");
+ return 1;
+ }
+ }
+ mcg_params->mcast_state |= MCAST_IS_ATTACHED;
+
+ return 0;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+static int set_up_connection(struct pingpong_context *ctx,
+ struct perftest_parameters *user_parm,
+ struct pingpong_dest *my_dest,
+ struct mcast_parameters *mcg_params) {
+
+
+
+ if (user_parm->use_mcg) {
+
+ if (set_mcast_group(ctx,user_parm,mcg_params)) {
+ return 1;
+ }
+
+ my_dest->gid = mcg_params->mgid;
+ my_dest->lid = mcg_params->mlid;
+ my_dest->qpn = QPNUM_MCAST;
+ }
+ else {
+ if (user_parm->gid_index != -1) {
+ if (ibv_query_gid(ctx->context,user_parm->ib_port,user_parm->gid_index,&my_dest->gid)) {
+ return -1;
+ }
+ }
+ my_dest->lid = ctx_get_local_lid(ctx->context,user_parm->ib_port);
+ my_dest->qpn = ctx->qp[0]->qp_num;
+ }
+
+ my_dest->psn = rand() & 0xffffff;
+ my_dest->rkey = ctx->mr->rkey;
+ my_dest->vaddr = (uintptr_t)ctx->buf + ctx->size;
+
+ // We do not fail test upon lid above RoCE.
+ if (user_parm->gid_index < 0) {
+ if (!my_dest->lid) {
+ fprintf(stderr,"Local lid 0x0 detected,without any use of gid. Is SM running?\n");
+ return -1;
+ }
+ }
+ return 0;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+static int init_connection(struct perftest_parameters *params,
+ struct pingpong_dest *my_dest,
+ const char *servername) {
+
+ params->side = LOCAL;
+
+ ctx_print_pingpong_data(my_dest,params);
+
+ if (params->machine == CLIENT)
+ params->sockfd = ctx_client_connect(servername,params->port);
+ else
+ params->sockfd = ctx_server_connect(params->port);
+
+ if (params->sockfd == INVALID_SOCKET) {
+ fprintf(stderr,"Unable to open file descriptor for socket connection");
+ return 1;
+ }
+ return 0;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+static struct ibv_device *pp_find_dev(const char *ib_devname) {
+ struct ibv_device **dev_list;
+ struct ibv_device *ib_dev = NULL;
+
+ dev_list = ibv_get_device_list(NULL);
+
+ if (!ib_devname) {
+ ib_dev = dev_list[0];
+ if (!ib_dev)
+ fprintf(stderr, "No IB devices found\n");
+ } else {
+ for (; (ib_dev = *dev_list); ++dev_list)
+ if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
+ break;
+ if (!ib_dev)
+ fprintf(stderr, "IB device %s not found\n", ib_devname);
+ }
+ return ib_dev;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
+ struct perftest_parameters *user_parm) {
+
+ struct pingpong_context *ctx;
+ struct ibv_device_attr device_attr;
+ int i;
+
+ ALLOCATE(ctx,struct pingpong_context,1);
+
+ ctx->ah = NULL;
+ ctx->size = size;
+
+ // Allocating the Buff size according to connection type and size.
+ posix_memalign(&(ctx->buf),page_size, 2*SIZE(user_parm->connection_type,size));
+ if (!ctx->buf) {
+ fprintf(stderr, "Couldn't allocate work buf.\n");
+ return NULL;
+ }
+ memset(ctx->buf, 0, 2*SIZE(user_parm->connection_type,size));
+
+ ctx->context = ibv_open_device(ib_dev);
+ if (!ctx->context) {
+ fprintf(stderr, "Couldn't get context for %s\n",
+ ibv_get_device_name(ib_dev));
+ return NULL;
+ }
+
+ // Finds the link type and configure the HCA accordingly.
+ if (ctx_set_link_layer(ctx->context,user_parm)) {
+ fprintf(stderr, "Couldn't set the link layer\n");
+ return NULL;
+ }
+
+ if (user_parm->mtu == 0) {/*user did not ask for specific mtu */
+ if (ibv_query_device(ctx->context, &device_attr)) {
+ fprintf(stderr, "Failed to query device props");
+ return NULL;
+ }
+ if (device_attr.vendor_part_id == 23108 || user_parm->gid_index > -1) {
+ user_parm->mtu = 1024;
+ } else {
+ user_parm->mtu = 2048;
+ }
+ }
+
+ if (user_parm->use_event) {
+ ctx->channel = ibv_create_comp_channel(ctx->context);
+ if (!ctx->channel) {
+ fprintf(stderr, "Couldn't create completion channel\n");
+ return NULL;
+ }
+ } else
+ ctx->channel = NULL;
+
+ ctx->pd = ibv_alloc_pd(ctx->context);
+ if (!ctx->pd) {
+ fprintf(stderr, "Couldn't allocate PD\n");
+ return NULL;
+ }
+
+ ctx->mr = ibv_reg_mr(ctx->pd,ctx->buf,2*SIZE(user_parm->connection_type,size),
+ IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);
+ if (!ctx->mr) {
+ fprintf(stderr, "Couldn't allocate MR\n");
+ return NULL;
+ }
+
+ ctx->scq = ibv_create_cq(ctx->context,user_parm->tx_depth,NULL,ctx->channel,0);
+ if (!ctx->scq) {
+ fprintf(stderr, "Couldn't create CQ\n");
+ return NULL;
+ }
+
+ ctx->rcq = ibv_create_cq(ctx->context,user_parm->rx_depth,NULL,ctx->channel,0);
+ if (!ctx->rcq) {
+ fprintf(stderr, "Couldn't create CQ\n");
+ return NULL;
+ }
+
+ ALLOCATE(ctx->qp,struct ibv_qp*,user_parm->num_of_qps);
+
+ for (i = 0; i < user_parm->num_of_qps; i++) {
+
+ ctx->qp[i] = ctx_qp_create(ctx->pd,ctx->scq,ctx->rcq,user_parm);
+ if (ctx->qp[i] == NULL) {
+ return NULL;
+ }
+
+ if(ctx_modify_qp_to_init(ctx->qp[i],user_parm)) {
+ return NULL;
+ }
+ }
+ return ctx;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+static int pp_connect_ctx(struct pingpong_context *ctx,int my_psn,
+ struct pingpong_dest *dest,
+ struct perftest_parameters *user_parm)
+{
+ struct ibv_qp_attr attr;
+ struct ibv_recv_wr *bad_wr_recv;
+ int i,j;
+
+ memset(&attr, 0, sizeof(struct ibv_qp_attr));
+ attr.qp_state = IBV_QPS_RTR;
+ switch (user_parm->mtu) {
+ case 256 :
+ attr.path_mtu = IBV_MTU_256;
+ break;
+ case 512 :
+ attr.path_mtu = IBV_MTU_512;
+ break;
+ case 1024 :
+ attr.path_mtu = IBV_MTU_1024;
+ break;
+ case 2048 :
+ attr.path_mtu = IBV_MTU_2048;
+ break;
+ case 4096 :
+ attr.path_mtu = IBV_MTU_4096;
+ break;
+ }
+ attr.dest_qp_num = dest->qpn;
+ attr.rq_psn = dest->psn;
+ attr.ah_attr.dlid = dest->lid;
+ printf("Mtu : %d\n", user_parm->mtu);
+ if (user_parm->connection_type==RC) {
+ attr.max_dest_rd_atomic = 1;
+ attr.min_rnr_timer = 12;
+ }
+
+ if (user_parm->gid_index < 0) {
+ attr.ah_attr.is_global = 0;
+ attr.ah_attr.sl = (uint8_t)(user_parm->sl);
+ } else {
+ attr.ah_attr.is_global = 1;
+ attr.ah_attr.grh.dgid = dest->gid;
+ attr.ah_attr.grh.sgid_index = (uint8_t)(user_parm->gid_index);
+ attr.ah_attr.grh.hop_limit = 1;
+ attr.ah_attr.sl = 0;
+ }
+ attr.ah_attr.src_path_bits = 0;
+ attr.ah_attr.port_num = user_parm->ib_port;
+
+ if (user_parm->connection_type==RC) {
+ if (ibv_modify_qp(ctx->qp[0], &attr,
+ IBV_QP_STATE |
+ IBV_QP_AV |
+ IBV_QP_PATH_MTU |
+ IBV_QP_DEST_QPN |
+ IBV_QP_RQ_PSN |
+ IBV_QP_MIN_RNR_TIMER |
+ IBV_QP_MAX_DEST_RD_ATOMIC)) {
+ fprintf(stderr, "Failed to modify RC QP to RTR\n");
+ return 1;
+ }
+ attr.timeout = user_parm->qp_timeout;
+ attr.retry_cnt = 7;
+ attr.rnr_retry = 7;
+ } else if (user_parm->connection_type==UC) {
+ if (ibv_modify_qp(ctx->qp[0], &attr,
+ IBV_QP_STATE |
+ IBV_QP_AV |
+ IBV_QP_PATH_MTU |
+ IBV_QP_DEST_QPN |
+ IBV_QP_RQ_PSN)) {
+ fprintf(stderr, "Failed to modify UC QP to RTR\n");
+ return 1;
+ }
+
+ } else {
+ for (i = 0; i < user_parm->num_of_qps; i++) {
+ if (ibv_modify_qp(ctx->qp[i],&attr,IBV_QP_STATE )) {
+ fprintf(stderr, "Failed to modify UC QP to RTR\n");
+ return 1;
+ }
+ }
+
+ ctx->ah = ibv_create_ah(ctx->pd,&attr.ah_attr);
+ if (!ctx->ah) {
+ fprintf(stderr, "Failed to create AH for UD\n");
+ return 1;
+ }
+ }
+
+ attr.qp_state = IBV_QPS_RTS;
+ attr.sq_psn = my_psn;
+ if (user_parm->connection_type==RC) {
+ attr.max_rd_atomic = 1;
+ if (ibv_modify_qp(ctx->qp[0], &attr,
+ IBV_QP_STATE |
+ IBV_QP_SQ_PSN |
+ IBV_QP_TIMEOUT |
+ IBV_QP_RETRY_CNT |
+ IBV_QP_RNR_RETRY |
+ IBV_QP_MAX_QP_RD_ATOMIC)) {
+ fprintf(stderr, "Failed to modify RC QP to RTS\n");
+ return 1;
+ }
+ } else {
+
+ if(ibv_modify_qp(ctx->qp[0],&attr,IBV_QP_STATE |IBV_QP_SQ_PSN)) {
+ fprintf(stderr, "Failed to modify UC QP to RTS\n");
+ return 1;
+ }
+ }
+
+ // Receive
+ ctx->rwr.sg_list = &ctx->recv_list;
+ ctx->rwr.num_sge = MAX_RECV_SGE;
+ ctx->rwr.next = NULL;
+ ctx->recv_list.lkey = ctx->mr->lkey;
+ ctx->recv_list.addr = (uintptr_t) ctx->buf;
+ ctx->recv_list.length = SIZE(user_parm->connection_type,ctx->size);
+
+ // Send
+ ctx->wr.sg_list = &ctx->list;
+ ctx->wr.num_sge = MAX_SEND_SGE;
+ ctx->wr.next = NULL;
+ ctx->wr.opcode = IBV_WR_SEND;
+ ctx->wr.wr_id = PINGPONG_SEND_WRID;
+ ctx->list.lkey = ctx->mr->lkey;
+ ctx->list.addr = (uintptr_t)ctx->buf + SIZE(user_parm->connection_type,ctx->size);
+
+ if (user_parm->connection_type == UD) {
+ ctx->wr.wr.ud.ah = ctx->ah;
+ ctx->wr.wr.ud.remote_qpn = dest->qpn;
+ ctx->wr.wr.ud.remote_qkey = DEF_QKEY;
+ }
+
+ // Post Receiving rx-depth to keep the Queue full.
+ for (i = 0; i < user_parm->num_of_qps; i++) {
+ ctx->rwr.wr_id = i;
+ for (j = 0; j < (user_parm->rx_depth / 2); j++ ) {
+ if (ibv_post_recv(ctx->qp[i],&ctx->rwr, &bad_wr_recv)) {
+ fprintf(stderr, "Couldn't post recv: counter=%d\n", j);
+ return 14;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static void usage(const char *argv0)
+{
+ printf("Usage:\n");
+ printf(" %s start a server and wait for connection\n", argv0);
+ printf(" %s <host> connect to server at <host>\n", argv0);
+ printf("\n");
+ printf("Options:\n");
+ printf(" -p, --port=<port> Listen on/connect to port <port> (default 18515)\n");
+ printf(" -c, --connection=<RC/UC/UD> Connection type RC/UC/UD (default RC)\n");
+ printf(" -m, --mtu=<mtu> Mtu size (256 - 4096. default for hermon is 2048)\n");
+ printf(" -d, --ib-dev=<dev> Use IB device <dev> (default first device found)\n");
+ printf(" -i, --ib-port=<port> Use port <port> of IB device (default 1)\n");
+ printf(" -s, --size=<size> Size of message to exchange (default 1)\n");
+ printf(" -t, --tx-depth=<dep> Size of tx queue (default 50)\n");
+ printf(" -l, --signal Signal completion on each msg\n");
+ printf(" -a, --all Run sizes from 2 till 2^23\n");
+ printf(" -n, --iters=<iters> Number of exchanges (at least 2, default 1000)\n");
+ printf(" -I, --inline_size=<size> Max size of message to be sent in inline mode (default 400)\n");
+ printf(" -u, --qp-timeout=<timeout> QP timeout, timeout value is 4 usec * 2 ^(timeout), default 14\n");
+ printf(" -S, --sl=<sl> SL (default 0)\n");
+ printf(" -x, --gid-index=<index> Test uses GID with GID index taken from command line (for RDMAoE index should be 0)\n");
+ printf(" -C, --report-cycles Report times in cpu cycle units (default microseconds)\n");
+ printf(" -H, --report-histogram Print out all results (default print summary only)\n");
+ printf(" -U, --report-unsorted (implies -H) print out unsorted results (default sorted)\n");
+ printf(" -V, --version Display version number\n");
+ printf(" -e, --events Sleep on CQ events (default poll)\n");
+ printf(" -g, --mcg=<num_of_qps> Send messages to multicast group with <num_of_qps> qps attached to it.\n");
+ printf(" -M, --MGID=<multicast_gid> In case of multicast, uses <multicast_gid> as the group MGID.\n");
+ printf(" The format must be '255:1:X:X:X:X:X:X:X:X:X:X:X:X:X:X', where X is a vlaue within [0,255].\n");
+ printf(" You must specify a different MGID on both sides (to avoid loopback).\n");
+ printf(" -F, --CPU-freq Do not fail even if cpufreq_ondemand module is loaded\n");
+}
+
+static void print_report(struct report_options *options,
+ unsigned int iters, cycles_t *tstamp,int size, int no_cpu_freq_fail)
+{
+ double cycles_to_units;
+ cycles_t median;
+ unsigned int i;
+ const char* units;
+ cycles_t *delta = malloc((iters - 1) * sizeof *delta);
+
+ if (!delta) {
+ perror("malloc");
+ return;
+ }
+
+ for (i = 0; i < iters - 1; ++i)
+ delta[i] = tstamp[i + 1] - tstamp[i];
+
+
+ if (options->cycles) {
+ cycles_to_units = 1;
+ units = "cycles";
+ } else {
+ cycles_to_units = get_cpu_mhz()/1000000;
+ units = "usec";
+ }
+
+ if (options->unsorted) {
+ printf("#, %s\n", units);
+ for (i = 0; i < iters - 1; ++i)
+ printf("%d, %g\n", i + 1, delta[i] / cycles_to_units / 2);
+ }
+
+ qsort(delta, iters - 1, sizeof *delta, cycles_compare);
+
+ if (options->histogram) {
+ printf("#, %s\n", units);
+ for (i = 0; i < iters - 1; ++i)
+ printf("%d, %g\n", i + 1, delta[i] / cycles_to_units / 2);
+ }
+
+ median = get_median(iters - 1, delta);
+ printf("%7d %d %7.2f %7.2f %7.2f\n",
+ size,iters,delta[0] / cycles_to_units / 2,
+ delta[iters - 2] / cycles_to_units / 2,median / cycles_to_units / 2);
+ free(delta);
+}
+
+/*
+ *
+ */
+int run_iter(struct pingpong_context *ctx, struct perftest_parameters *user_param,
+ struct pingpong_dest *rem_dest, int size)
+{
+
+ struct ibv_wc *wc;
+ struct ibv_recv_wr *bad_wr_recv;
+ struct ibv_send_wr *bad_wr;
+ int i,scnt,rcnt,poll,qp_counter,ne;
+
+ ALLOCATE(wc,struct ibv_wc,user_param->num_of_qps);
+
+
+ ctx->list.length = size;
+ ctx->wr.send_flags = size > user_param->inline_size ? 0 : IBV_SEND_INLINE;
+
+ scnt = 0;
+ rcnt = 0;
+ poll = 0;
+ qp_counter = 0;
+
+ while (scnt < user_param->iters || rcnt < user_param->iters) {
+
+ if (rcnt < user_param->iters && !(scnt < 1 && user_param->machine == CLIENT)) {
+
+ // Server is polling on recieve first .
+ if (user_param->use_event) {
+ struct ibv_cq *ev_cq;
+ void *ev_ctx;
+
+ if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {
+ fprintf(stderr, "Failed to get receive cq_event\n");
+ return 1;
+ }
+
+ if (ev_cq != ctx->rcq) {
+ fprintf(stderr, "CQ event for unknown RCQ %p\n", ev_cq);
+ return 1;
+ }
+
+ if (ibv_req_notify_cq(ctx->rcq, 0)) {
+ fprintf(stderr, "Couldn't request RCQ notification\n");
+ return 1;
+ }
+ }
+
+ do {
+ ne = ibv_poll_cq(ctx->rcq, user_param->num_of_qps, wc);
+ if (ne > 0) {
+ for (i = 0; i < ne; i++) {
+
+ if (wc[i].status != IBV_WC_SUCCESS) {
+ fprintf(stderr, "Completion wth error at %s:\n",user_param->machine == CLIENT ? "client" : "server");
+ fprintf(stderr, "Failed status %d: wr_id %d syndrom 0x%x\n",wc[i].status, (int)wc[i].wr_id, wc[i].vendor_err);
+ fprintf(stderr, "rcnt=%d\n",rcnt);
+ return 1;
+ }
+
+ qp_counter++;
+ ctx->rwr.wr_id = wc[i].wr_id;
+ if (ibv_post_recv(ctx->qp[wc[i].wr_id], &ctx->rwr, &bad_wr_recv)) {
+ fprintf(stderr, "Couldn't post recv: rcnt=%d\n",rcnt);
+ return 15;
+ }
+ }
+ }
+ } while (!user_param->use_event && qp_counter < user_param->num_of_qps);
+ rcnt++;
+ // printf(" Got completion %d \n",rcnt);
+ qp_counter = 0;
+ }
+
+ // client post first.
+ if (scnt < user_param->iters) {
+ tstamp[scnt++] = get_cycles();
+ if (scnt == user_param->iters || scnt%user_param->rx_depth == 0) {
+ poll = 1;
+ ctx->wr.send_flags |= IBV_SEND_SIGNALED;
+ }
+
+ if (ibv_post_send(ctx->qp[0],&ctx->wr,&bad_wr)) {
+ fprintf(stderr, "Couldn't post send: scnt=%d\n",scnt);
+ return 11;
+ }
+ }
+
+ if (poll == 1) {
+
+ struct ibv_wc s_wc;
+ int s_ne;
+
+ if (user_param->use_event) {
+ struct ibv_cq *ev_cq;
+ void *ev_ctx;
+
+ if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {
+ fprintf(stderr, "Failed to get send cq_event\n");
+ return 1;
+ }
+
+ if (ev_cq != ctx->scq) {
+ fprintf(stderr, "CQ event for unknown SCQ %p\n", ev_cq);
+ return 1;
+ }
+
+ if (ibv_req_notify_cq(ctx->scq, 0)) {
+ fprintf(stderr, "Couldn't request SCQ notification\n");
+ return 1;
+ }
+ }
+
+ do {
+ s_ne = ibv_poll_cq(ctx->scq, 1, &s_wc);
+ } while (!user_param->use_event && s_ne == 0);
+
+ if (s_ne < 0) {
+ fprintf(stderr, "poll SCQ failed %d\n", s_ne);
+ return 12;
+ }
+
+ if (s_wc.status != IBV_WC_SUCCESS) {
+ fprintf(stderr, "Completion wth error at %s:\n",user_param->machine == CLIENT ? "client" : "server");
+ fprintf(stderr, "Failed status %d: wr_id %d\n",s_wc.status, (int) s_wc.wr_id);
+ fprintf(stderr, "scnt=%d, rcnt=%d,\n" ,scnt, rcnt);
+ return 13;
+ }
+
+ poll = 0;
+ ctx->wr.send_flags &= ~IBV_SEND_SIGNALED;
+ }
+ }
+ return 0;
+}
+
+int __cdecl main(int argc, char *argv[])
+{
+ const char *ib_devname = NULL;
+ int size = 2;
+ int i = 0;
+ int size_max_pow = 24;
+ struct report_options report;
+ struct pingpong_context *ctx = NULL;
+ struct pingpong_dest my_dest,rem_dest;
+ struct mcast_parameters mcg_params;
+ struct ibv_device *ib_dev = NULL;
+ struct perftest_parameters user_param;
+ int no_cpu_freq_fail = 0;
+
+ int all = 0;
+ const char *servername = NULL;
+
+ SYSTEM_INFO si;
+ GetSystemInfo(&si);
+
+ /* init default values to user's parameters */
+ memset(&user_param, 0, sizeof(struct perftest_parameters));
+ memset(&mcg_params, 0, sizeof(struct mcast_parameters));
+ memset(&my_dest , 0 , sizeof(struct pingpong_dest));
+ memset(&rem_dest , 0 , sizeof(struct pingpong_dest));
+
+ user_param.iters = 1000;
+ user_param.port = 18515;
+ user_param.ib_port = 1;
+ user_param.tx_depth = 50;
+ user_param.rx_depth = 50;
+ user_param.inline_size = MAX_INLINE;
+ user_param.verb = SEND;
+ user_param.qp_timeout = 14;
+ user_param.gid_index = -1; /*gid will not be used*/
+ user_param.num_of_qps = 1;
+
+ /* Parameter parsing. */
+ while (1) {
+ int c;
+
+ static struct option long_options[] = {
+ { "port", 1, NULL, 'p' },
+ { "connection", 1, NULL, 'c' },
+ { "mtu", 1, NULL, 'm' },
+ { "ib-dev", 1, NULL, 'd' },
+ { "ib-port", 1, NULL, 'i' },
+ { "size", 1, NULL, 's' },
+ { "iters", 1, NULL, 'n' },
+ { "tx-depth", 1, NULL, 't' },
+ { "inline_size", 1, NULL, 'I' },
+ { "qp-timeout", 1, NULL, 'u' },
+ { "sl", 1, NULL, 'S' },
+ { "gid-index", 1, NULL, 'x' },
+ { "signal", 0, NULL, 'l' },
+ { "all", 0, NULL, 'a' },
+ { "report-cycles", 0, NULL, 'C' },
+ { "report-histogram", 0, NULL, 'H'},
+ { "report-unsorted", 0, NULL, 'U' },
+ { "version", 0, NULL, 'V' },
+ { "events", 0, NULL, 'e' },
+ { "mcg", 1, NULL, 'g' },
+ { "MGID", 1, NULL, 'M' },
+ { "CPU-freq", 0, NULL, 'F' },
+ { 0 }
+ };
+ c = getopt_long(argc, argv, "p:c:m:d:i:s:n:t:I:u:S:x:g:M:laeCHUVF", long_options, NULL);
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'p':
+ user_param.port = strtol(optarg, NULL, 0);
+ if (user_param.port < 0 || user_param.port > 65535) {
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+ case 'c':
+ if (strcmp("UC",optarg)==0)
+ user_param.connection_type=UC;
+ if (strcmp("UD",optarg)==0)
+ user_param.connection_type=UD;
+ /* default is 0 for any other option RC*/
+ break;
+ case 'e':
+ ++user_param.use_event;
+ break;
+ case 'g':
+ ++user_param.use_mcg;
+ user_param.num_of_qps = strtol(optarg, NULL, 0);
+ if (user_param.num_of_qps < 1 || user_param.num_of_qps > 57) {
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+ case 'M' :
+ mcg_params.is_user_mgid = 1;
+ mcg_params.user_mgid = _strdup(optarg);
+ break;
+ case 'm':
+ user_param.mtu = strtol(optarg, NULL, 0);
+ break;
+ case 'l':
+ user_param.signal_comp = SIGNAL;
+ break;
+ case 'a':
+ all = ALL;
+ break;
+ case 'V':
+ printf("perftest version : %.2f\n",VERSION);
+ return 0;
+ break;
+ case 'd':
+ ib_devname = _strdup(optarg);
+ break;
+
+ case 'i':
+ user_param.ib_port = (uint8_t)(strtol(optarg, NULL, 0));
+ if (user_param.ib_port < 0) {
+ usage(argv[0]);
+ return 2;
+ }
+ break;
+
+ case 's':
+ size = strtol(optarg, NULL, 0);
+ if (size < 1) {
+ usage(argv[0]); return 3;
+ }
+ break;
+
+ case 'x':
+ user_param.gid_index = strtol(optarg, NULL, 0);
+ if (user_param.gid_index > 63) {
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+
+ case 't':
+ user_param.tx_depth = strtol(optarg, NULL, 0);
+ user_param.rx_depth = user_param.tx_depth;
+ if (user_param.tx_depth < 1) {
+ usage(argv[0]); return 4;
+ }
+ break;
+
+ case 'I':
+ user_param.inline_size = strtol(optarg, NULL, 0);
+ if (user_param.inline_size > MAX_INLINE) {
+ usage(argv[0]);
+ return 19;
+ }
+ break;
+
+ case 'n':
+ user_param.iters = strtol(optarg, NULL, 0);
+ if (user_param.iters < 2) {
+ usage(argv[0]);
+ return 5;
+ }
+
+ break;
+
+ case 'C':
+ report.cycles = 1;
+ break;
+
+ case 'H':
+ report.histogram = 1;
+ break;
+
+ case 'U':
+ report.unsorted = 1;
+ break;
+
+ case 'F':
+ no_cpu_freq_fail = 1;
+ break;
+
+ case 'u':
+ user_param.qp_timeout = (uint8_t)(strtol(optarg, NULL, 0));
+ break;
+
+ case 'S':
+ user_param.sl = strtol(optarg, NULL, 0);
+ if (user_param.sl > 15) { usage(argv[0]); return 6; }
+ break;
+
+ default:
+ usage(argv[0]);
+ return 7;
+ }
+ }
+
+ if (optind == argc - 1)
+ servername = _strdup(argv[optind]);
+ else if (optind < argc) {
+ usage(argv[0]);
+ return 6;
+ }
+
+ user_param.machine = servername ? CLIENT : SERVER;
+
+ /*
+ * Done with parameter parsing. Perform setup.
+ */
+ tstamp = malloc(user_param.iters * sizeof *tstamp);
+ if (!tstamp) {
+ perror("malloc");
+ return 10;
+ }
+ /* Print header data */
+ printf("------------------------------------------------------------------\n");
+ if (user_param.use_mcg) {
+ user_param.connection_type = UD;
+ printf(" Send Latency Multicast Test\n");
+ } else {
+ printf(" Send Latency Test\n");
+ }
+
+ printf("Inline data is used up to %d bytes message\n", user_param.inline_size);
+ if (user_param.connection_type==RC) {
+ printf("Connection type : RC\n");
+ } else if (user_param.connection_type==UC) {
+ printf("Connection type : UC\n");
+ } else {
+ printf("Connection type : UD\n");
+ }
+
+ if (all == ALL) {
+ /*since we run all sizes lets allocate big enough buffer */
+ size = 8388608; /*2^23 */
+ }
+ if (user_param.connection_type == UD && size > 2048) {
+ printf("Max msg size in UD is 2048 changing to 2048\n");
+ size = 2048;
+ }
+ if (user_param.connection_type == UD && user_param.gid_index > -1 && size > 1024) {
+ printf("Max msg size in UD RDMAoE is 1024. changing to 1024\n");
+ size = 1024;
+ }
+
+ page_size = si.dwPageSize;
+
+ ib_dev = pp_find_dev(ib_devname);
+ if (!ib_dev)
+ return 7;
+
+ mcg_params.ib_devname = ibv_get_device_name(ib_dev);
+
+ ctx = pp_init_ctx(ib_dev,size,&user_param);
+ if (!ctx)
+ return 8;
+
+ // Set up the Connection.
+ if (set_up_connection(ctx,&user_param,&my_dest,&mcg_params)) {
+ fprintf(stderr," Unable to set up socket connection\n");
+ return 1;
+ }
+
+ // Init the connection and print the local data.
+ if (init_connection(&user_param,&my_dest,servername)) {
+ fprintf(stderr," Unable to init the socket connection\n");
+ return 1;
+ }
+
+ // shaking hands and gather the other side info.
+ if (ctx_hand_shake(&user_param,&my_dest,&rem_dest)) {
+ fprintf(stderr,"Failed to exchange date between server and clients\n");
+ return 1;
+
+ }
+ // For printing only MGID in the remote side.
+ user_param.side = REMOTE;
+ ctx_print_pingpong_data(&rem_dest,&user_param);
+
+ // Connects......
+ if (pp_connect_ctx(ctx,my_dest.psn,&rem_dest,&user_param)) {
+ fprintf(stderr," Unable to Connect the HCA's through the link\n");
+ return 1;
+ }
+
+ // An additional handshake is required after moving qp to RTR.
+ if (ctx_hand_shake(&user_param,&my_dest,&rem_dest)) {
+ fprintf(stderr,"Failed to exchange date between server and clients\n");
+ return 1;
+ }
+
+ if (user_param.use_event) {
+ printf("Test with events.\n");
+ if (ibv_req_notify_cq(ctx->rcq, 0)) {
+ fprintf(stderr, "Couldn't request RCQ notification\n");
+ return 1;
+ }
+ if (ibv_req_notify_cq(ctx->scq, 0)) {
+ fprintf(stderr, "Couldn't request SCQ notification\n");
+ return 1;
+ }
+
+ }
+ printf("------------------------------------------------------------------\n");
+ printf(" #bytes #iterations t_min[usec] t_max[usec] t_typical[usec]\n");
+
+ if (all == ALL) {
+ if (user_param.connection_type==UD) {
+ if (user_param.gid_index < 0) {
+ size_max_pow = 12;
+ } else {
+ size_max_pow = 11;
+ }
+ }
+ for (i = 1; i < size_max_pow ; ++i) {
+ size = 1 << i;
+ if(run_iter(ctx, &user_param, &rem_dest, size))
+ return 17;
+
+ print_report(&report,user_param.iters,tstamp,size,no_cpu_freq_fail);
+
+ if (ctx_hand_shake(&user_param,&my_dest,&rem_dest)) {
+ fprintf(stderr,"Failed to exchange date between server and clients\n");
+ return 1;
+ }
+ }
+ } else {
+ if(run_iter(ctx,&user_param,&rem_dest,size))
+ return 18;
+ print_report(&report, user_param.iters, tstamp, size, no_cpu_freq_fail);
+ }
+
+ if (ctx_close_connection(&user_param,&my_dest,&rem_dest)) {
+ fprintf(stderr,"Failed to close connection between server and client\n");
+ return 1;
+ }
+
+ printf("------------------------------------------------------------------\n");
+ return 0;
+}
USE_NATIVE_EH = 1\r
USE_IOSTREAM = 1\r
\r
-SOURCES = write_bw.rc write_bw.c ..\perftest.c\r
+SOURCES = write_bw.rc write_bw.c ..\perftest_resources.c ..\perftest.c\r
\r
INCLUDES = ..;..\..\..\ulp\libibverbs\include;\\r
- ..\..\..\inc;..\..\..\inc\user;\\r
- ..\..\..\inc\user\linux;\r
+ ..\..\..\inc;..\..\..\inc\user;..\..\..\inc\user\linux;..\..\..\inc\complib;\\r
+ ..\..\..\hw\mlx4\user\hca;..\..\..\tools\perftests\user\\r
+\r
TARGETLIBS = \\r
$(SDK_LIB_PATH)\kernel32.lib \\r
$(SDK_LIB_PATH)\advapi32.lib \\r
$(SDK_LIB_PATH)\user32.lib \\r
$(SDK_LIB_PATH)\ole32.lib \\r
$(SDK_LIB_PATH)\ws2_32.lib \\r
+ $(SDK_LIB_PATH)\uuid.lib \\r
!if $(FREEBUILD)\r
- $(TARGETPATH)\*\libibverbs.lib\r
+ $(TARGETPATH)\*\libibverbs.lib \\r
+ $(TARGETPATH)\*\complib.lib \\r
!else\r
- $(TARGETPATH)\*\libibverbsd.lib\r
+ $(TARGETPATH)\*\libibverbsd.lib \\r
+ $(TARGETPATH)\*\complibd.lib \\r
!endif\r
\r
-/*\r
- * Copyright (c) 2005 Topspin Communications. All rights reserved.\r
- * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.\r
- * Copyright (c) 2008 Intel Corporation. All rights reserved.\r
- *\r
- * This software is available to you under the OpenIB.org BSD license\r
- * below:\r
- *\r
- * Redistribution and use in source and binary forms, with or\r
- * without modification, are permitted provided that the following\r
- * conditions are met:\r
- *\r
- * - Redistributions of source code must retain the above\r
- * copyright notice, this list of conditions and the following\r
- * disclaimer.\r
- *\r
- * - Redistributions in binary form must reproduce the above\r
- * copyright notice, this list of conditions and the following\r
- * disclaimer in the documentation and/or other materials\r
- * provided with the distribution.\r
- *\r
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV\r
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
- * SOFTWARE.\r
- */\r
-\r
-#include <stdio.h>\r
-#include <stdlib.h>\r
-#include <string.h>\r
-#include <ws2tcpip.h>\r
-#include <winsock2.h>\r
-#include <time.h>\r
-\r
-#include "..\..\..\etc\user\getopt.c"\r
-#include "perftest.h"\r
-#include <infiniband/verbs.h>\r
-\r
-#define PINGPONG_RDMA_WRID 3\r
-#define VERSION 2.0\r
-#define ALL 1\r
-#define MAX_INLINE 400\r
-#define RC 0\r
-#define UC 1\r
-\r
-struct user_parameters {\r
- const char *servername;\r
- int connection_type;\r
- int mtu;\r
- int all; /* run all msg size */\r
- int iters;\r
- int tx_depth;\r
- int numofqps;\r
- int maxpostsofqpiniteration;\r
- int inline_size;\r
-};\r
-\r
-struct extended_qp {\r
- struct ibv_qp *qp;\r
- int scnt, ccnt ;\r
-};\r
-\r
-typedef UINT64 cycles_t;\r
-cycles_t *tposted;\r
-cycles_t *tcompleted;\r
-\r
-struct pingpong_context {\r
- struct ibv_context *context;\r
- struct ibv_pd *pd;\r
- struct ibv_mr *mr;\r
- struct ibv_cq *cq;\r
- struct ibv_qp **qp;\r
- void *buf;\r
- unsigned size;\r
- int tx_depth;\r
- struct ibv_sge list;\r
- struct ibv_send_wr wr;\r
- int *scnt;\r
- int *ccnt;\r
-};\r
-\r
-struct pingpong_dest {\r
- int lid;\r
- int qpn;\r
- int psn;\r
- unsigned rkey;\r
- unsigned long long vaddr;\r
-};\r
-\r
-static uint16_t pp_get_local_lid(struct pingpong_context *ctx, int port)\r
-{\r
- struct ibv_port_attr attr;\r
-\r
- if (ibv_query_port(ctx->context, (uint8_t) port, &attr))\r
- return 0;\r
-\r
- return attr.lid;\r
-}\r
-\r
-static struct pingpong_dest * pp_client_exch_dest(SOCKET sockfd,\r
- const struct pingpong_dest *my_dest)\r
-{\r
- struct pingpong_dest *rem_dest = NULL;\r
- char msg[sizeof "0000:000000:000000:00000000:0000000000000000"];\r
- int parsed;\r
-\r
- sprintf(msg, "%04x:%06x:%06x:%08x:%016Lx", my_dest->lid, my_dest->qpn,\r
- my_dest->psn,my_dest->rkey,my_dest->vaddr);\r
- if (send(sockfd, msg, sizeof msg, 0) != sizeof msg) {\r
- perror("client send");\r
- fprintf(stderr, "Couldn't send local address\n");\r
- goto out;\r
- }\r
-\r
- if (recv(sockfd, msg, sizeof msg, 0) != sizeof msg) {\r
- perror("client recv");\r
- fprintf(stderr, "Couldn't recv remote address\n");\r
- goto out;\r
- }\r
-\r
- rem_dest = malloc(sizeof *rem_dest);\r
- if (!rem_dest)\r
- goto out;\r
-\r
- memset(rem_dest, 0, sizeof *rem_dest);\r
- parsed = sscanf(msg, "%x:%x:%x:%x:%Lx", &rem_dest->lid, &rem_dest->qpn,\r
- &rem_dest->psn,&rem_dest->rkey,&rem_dest->vaddr);\r
-\r
- if (parsed != 5) {\r
- fprintf(stderr, "Couldn't parse line <%.*s>\n",(int)sizeof msg,\r
- msg);\r
- free(rem_dest);\r
- rem_dest = NULL;\r
- goto out;\r
- }\r
-out:\r
- return rem_dest;\r
-}\r
-\r
-static struct pingpong_dest *pp_server_exch_dest(SOCKET connfd, const struct pingpong_dest *my_dest)\r
-{\r
- char msg[sizeof "0000:000000:000000:00000000:0000000000000000"];\r
- struct pingpong_dest *rem_dest = NULL;\r
- int parsed;\r
- int n;\r
-\r
- n = recv(connfd, msg, sizeof msg, 0);\r
- if (n != sizeof msg) {\r
- perror("server recv");\r
- fprintf(stderr, "%d/%d: Couldn't recv remote address\n", n, (int) sizeof msg);\r
- goto out;\r
- }\r
-\r
- rem_dest = malloc(sizeof *rem_dest);\r
- if (!rem_dest)\r
- goto out;\r
-\r
- memset(rem_dest, 0, sizeof *rem_dest);\r
- parsed = sscanf(msg, "%x:%x:%x:%x:%Lx", &rem_dest->lid, &rem_dest->qpn,\r
- &rem_dest->psn, &rem_dest->rkey, &rem_dest->vaddr);\r
- if (parsed != 5) {\r
- fprintf(stderr, "Couldn't parse line <%.*s>\n",(int)sizeof msg,\r
- msg);\r
- free(rem_dest);\r
- rem_dest = NULL;\r
- goto out;\r
- }\r
-\r
- sprintf(msg, "%04x:%06x:%06x:%08x:%016Lx", my_dest->lid, my_dest->qpn,\r
- my_dest->psn, my_dest->rkey, my_dest->vaddr);\r
- if (send(connfd, msg, sizeof msg, 0) != sizeof msg) {\r
- perror("server send");\r
- fprintf(stderr, "Couldn't send local address\n");\r
- free(rem_dest);\r
- rem_dest = NULL;\r
- goto out;\r
- }\r
-out:\r
- return rem_dest;\r
-}\r
-\r
-static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev,\r
- unsigned size,\r
- int tx_depth, int port, struct user_parameters *user_parm)\r
-{\r
- struct pingpong_context *ctx;\r
- struct ibv_device_attr device_attr;\r
- int counter;\r
-\r
- ctx = malloc(sizeof *ctx);\r
- if (!ctx)\r
- return NULL;\r
- ctx->qp = malloc(sizeof (struct ibv_qp*) * user_parm->numofqps);\r
- ctx->size = size;\r
- ctx->tx_depth = tx_depth;\r
- ctx->scnt = malloc(user_parm->numofqps * sizeof (int));\r
- if (!ctx->scnt) {\r
- perror("malloc");\r
- return NULL;\r
- }\r
- ctx->ccnt = malloc(user_parm->numofqps * sizeof (int));\r
- if (!ctx->ccnt) {\r
- perror("malloc");\r
- return NULL;\r
- }\r
- memset(ctx->scnt, 0, user_parm->numofqps * sizeof (int));\r
- memset(ctx->ccnt, 0, user_parm->numofqps * sizeof (int));\r
- \r
- ctx->buf = malloc(size * 2 * user_parm->numofqps);\r
- if (!ctx->buf) {\r
- fprintf(stderr, "Couldn't allocate work buf.\n");\r
- return NULL;\r
- }\r
-\r
- memset(ctx->buf, 0, size * 2 * user_parm->numofqps);\r
-\r
- ctx->context = ibv_open_device(ib_dev);\r
- if (!ctx->context) {\r
- fprintf(stderr, "Couldn't get context for %s\n",\r
- ibv_get_device_name(ib_dev));\r
- return NULL;\r
- }\r
- if (user_parm->mtu == 0) {/*user did not ask for specific mtu */\r
- if (ibv_query_device(ctx->context, &device_attr)) {\r
- fprintf(stderr, "Failed to query device props");\r
- return NULL;\r
- }\r
- if (device_attr.vendor_part_id == 23108) {\r
- user_parm->mtu = 1024;\r
- } else {\r
- user_parm->mtu = 2048;\r
- }\r
- }\r
-\r
- ctx->pd = ibv_alloc_pd(ctx->context);\r
- if (!ctx->pd) {\r
- fprintf(stderr, "Couldn't allocate PD\n");\r
- return NULL;\r
- }\r
-\r
- /* We dont really want IBV_ACCESS_LOCAL_WRITE, but IB spec says:\r
- * The Consumer is not allowed to assign Remote Write or Remote Atomic to\r
- * a Memory Region that has not been assigned Local Write. */\r
- ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size * 2 * user_parm->numofqps,\r
- IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);\r
- if (!ctx->mr) {\r
- fprintf(stderr, "Couldn't allocate MR\n");\r
- return NULL;\r
- }\r
-\r
- ctx->cq = ibv_create_cq(ctx->context, tx_depth * user_parm->numofqps , NULL, NULL, 0);\r
- if (!ctx->cq) {\r
- fprintf(stderr, "Couldn't create CQ\n");\r
- return NULL;\r
- }\r
- for (counter =0 ; counter < user_parm->numofqps ; counter++)\r
- {\r
- struct ibv_qp_init_attr initattr;\r
- struct ibv_qp_attr attr;\r
- memset(&initattr, 0, sizeof(struct ibv_qp_init_attr));\r
- initattr.send_cq = ctx->cq;\r
- initattr.recv_cq = ctx->cq;\r
- initattr.cap.max_send_wr = tx_depth;\r
- /* Work around: driver doesnt support\r
- * recv_wr = 0 */\r
- initattr.cap.max_recv_wr = 1;\r
- initattr.cap.max_send_sge = 1;\r
- initattr.cap.max_recv_sge = 1;\r
- initattr.cap.max_inline_data = user_parm->inline_size;\r
-\r
- if (user_parm->connection_type == 1) {\r
- initattr.qp_type = IBV_QPT_UC;\r
- } else {\r
- initattr.qp_type = IBV_QPT_RC;\r
- }\r
- ctx->qp[counter] = ibv_create_qp(ctx->pd, &initattr);\r
- if (!ctx->qp[counter]) {\r
- fprintf(stderr, "Couldn't create QP\n");\r
- return NULL;\r
- }\r
- \r
- attr.qp_state = IBV_QPS_INIT;\r
- attr.pkey_index = 0;\r
- attr.port_num = (uint8_t) port;\r
- attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE;\r
-\r
- if (ibv_modify_qp(ctx->qp[counter], &attr,\r
- IBV_QP_STATE |\r
- IBV_QP_PKEY_INDEX |\r
- IBV_QP_PORT |\r
- IBV_QP_ACCESS_FLAGS)) {\r
- fprintf(stderr, "Failed to modify QP to INIT\n");\r
- return NULL;\r
- }\r
- }\r
-\r
- return ctx;\r
-}\r
-\r
-static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,\r
- struct pingpong_dest *dest, struct user_parameters *user_parm, int qpindex)\r
-{\r
- struct ibv_qp_attr attr;\r
- memset(&attr, 0, sizeof attr);\r
-\r
- attr.qp_state = IBV_QPS_RTR;\r
- switch (user_parm->mtu) {\r
- case 256 : \r
- attr.path_mtu = IBV_MTU_256;\r
- break;\r
- case 512 :\r
- attr.path_mtu = IBV_MTU_512;\r
- break;\r
- case 1024 :\r
- attr.path_mtu = IBV_MTU_1024;\r
- break;\r
- case 2048 :\r
- attr.path_mtu = IBV_MTU_2048;\r
- break;\r
- case 4096 :\r
- attr.path_mtu = IBV_MTU_4096;\r
- break;\r
- }\r
- printf("Mtu : %d\n", user_parm->mtu);\r
- attr.dest_qp_num = dest->qpn;\r
- attr.rq_psn = dest->psn;\r
- if (user_parm->connection_type==RC) {\r
- attr.max_dest_rd_atomic = 1;\r
- attr.min_rnr_timer = 12;\r
- }\r
- attr.ah_attr.is_global = 0;\r
- attr.ah_attr.dlid = (uint16_t) dest->lid;\r
- attr.ah_attr.sl = 0;\r
- attr.ah_attr.src_path_bits = 0;\r
- attr.ah_attr.port_num = (uint8_t) port;\r
- if (user_parm->connection_type == RC) {\r
- if (ibv_modify_qp(ctx->qp[qpindex], &attr,\r
- IBV_QP_STATE |\r
- IBV_QP_AV |\r
- IBV_QP_PATH_MTU |\r
- IBV_QP_DEST_QPN |\r
- IBV_QP_RQ_PSN |\r
- IBV_QP_MIN_RNR_TIMER |\r
- IBV_QP_MAX_DEST_RD_ATOMIC)) {\r
- fprintf(stderr, "Failed to modify RC QP to RTR\n");\r
- return 1;\r
- }\r
- attr.timeout = 14;\r
- attr.retry_cnt = 7;\r
- attr.rnr_retry = 7;\r
- } else {\r
- if (ibv_modify_qp(ctx->qp[qpindex], &attr,\r
- IBV_QP_STATE |\r
- IBV_QP_AV |\r
- IBV_QP_PATH_MTU |\r
- IBV_QP_DEST_QPN |\r
- IBV_QP_RQ_PSN)) {\r
- fprintf(stderr, "Failed to modify UC QP to RTR\n");\r
- return 1;\r
- }\r
-\r
- }\r
- attr.qp_state = IBV_QPS_RTS;\r
- attr.sq_psn = my_psn;\r
- attr.max_rd_atomic = 1;\r
- if (user_parm->connection_type == 0) {\r
- attr.max_rd_atomic = 1;\r
- if (ibv_modify_qp(ctx->qp[qpindex], &attr,\r
- IBV_QP_STATE |\r
- IBV_QP_SQ_PSN |\r
- IBV_QP_TIMEOUT |\r
- IBV_QP_RETRY_CNT |\r
- IBV_QP_RNR_RETRY |\r
- IBV_QP_MAX_QP_RD_ATOMIC)) {\r
- fprintf(stderr, "Failed to modify RC QP to RTS\n");\r
- return 1;\r
- }\r
- } else {\r
- if (ibv_modify_qp(ctx->qp[qpindex], &attr,\r
- IBV_QP_STATE |\r
- IBV_QP_SQ_PSN)) {\r
- fprintf(stderr, "Failed to modify UC QP to RTS\n");\r
- return 1;\r
- }\r
-\r
- }\r
- return 0;\r
-}\r
-\r
-static void usage(const char *argv0)\r
-{\r
- printf("Usage:\n");\r
- printf(" %s start a server and wait for connection\n", argv0);\r
- printf(" %s -h <host> connect to server at <host>\n", argv0);\r
- printf("\n");\r
- printf("Options:\n");\r
- printf(" -p <port> listen on/connect to port <port> (default 18515)\n");\r
- printf(" -d <dev> use IB device <dev> (default first device found)\n");\r
- printf(" -i <port> use port <port> of IB device (default 1)\n");\r
- printf(" -c <RC/UC> connection type RC/UC (default RC)\n");\r
- printf(" -m <mtu> mtu size (256 - 4096. default for hermon is 2048)\n");\r
- printf(" -g <num of posts> number of posts for each qp in the chain (default tx_depth)\n");\r
- printf(" -q <num of qp's> Num of qp's(default 1)\n");\r
- printf(" -s <size> size of message to exchange (default 65536)\n");\r
- printf(" -a Run sizes from 2 till 2^23\n");\r
- printf(" -t <dep> size of tx queue (default 100)\n");\r
- printf(" -n <iters> number of exchanges (at least 2, default 5000)\n");\r
- printf(" -I <size> max size of message to be sent in inline mode (default 400)\n");\r
- printf(" -b measure bidirectional bandwidth (default unidirectional)\n");\r
- printf(" -V display version number\n");\r
- printf(" -N cancel peak-bw calculation (default with peak-bw)\n");\r
-}\r
-\r
-static void print_report(unsigned int iters, unsigned size, int duplex,\r
- cycles_t *tposted, cycles_t *tcompleted, struct user_parameters *user_param,\r
- int noPeak)\r
-{\r
- cycles_t cycles_to_units;\r
- unsigned long tsize; /* Transferred size */\r
- int i, j;\r
- int opt_posted = 0, opt_completed = 0;\r
- cycles_t opt_delta;\r
- cycles_t t;\r
-\r
- opt_delta = tcompleted[opt_posted] - tposted[opt_completed];\r
-\r
- if (!noPeak) {\r
- /* Find the peak bandwidth, unless asked not to in command line */\r
- for (i = 0; i < (int) iters; ++i)\r
- for (j = i; j < (int) iters; ++j) {\r
- t = (tcompleted[j] - tposted[i]) / (j - i + 1);\r
- if (t < opt_delta) {\r
- opt_delta = t;\r
- opt_posted = i;\r
- opt_completed = j;\r
- }\r
- }\r
- }\r
-\r
- cycles_to_units = get_freq();\r
-\r
- tsize = duplex ? 2 : 1;\r
- tsize = tsize * size;\r
- printf("%7d %d ", size, iters);\r
-\r
- {\r
- double sec = (double) opt_delta / (double) cycles_to_units;\r
- double mbytes = (double) !(noPeak) * (double) tsize / (double) 0x100000;\r
- printf("%7.2f ", mbytes / sec);\r
-\r
- sec = (double) (tcompleted[iters - 1] - tposted[0]) / (double) cycles_to_units;\r
- mbytes = (double) tsize * (double) iters / (double) 0x100000;\r
- printf("%7.2f\n", mbytes / sec);\r
- }\r
-}\r
-\r
-static int run_iter(struct pingpong_context *ctx, struct user_parameters *user_param,\r
- struct pingpong_dest **rem_dest, int size)\r
-{\r
- struct ibv_qp *qp;\r
- int totscnt, totccnt ;\r
- int index ,warmindex;\r
- int inline_size;\r
- struct ibv_send_wr *bad_wr;\r
- struct ibv_wc wc;\r
- ctx->list.addr = (uintptr_t) ctx->buf;\r
- ctx->list.length = size;\r
- ctx->list.lkey = ctx->mr->lkey;\r
-\r
- ctx->wr.sg_list = &ctx->list;\r
- ctx->wr.num_sge = 1;\r
- ctx->wr.opcode = IBV_WR_RDMA_WRITE;\r
- inline_size = user_param->inline_size;\r
- if (size > inline_size) {/* complaince to perf_main */\r
- ctx->wr.send_flags = IBV_SEND_SIGNALED;\r
- } else {\r
- ctx->wr.send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE;\r
- }\r
- ctx->wr.next = NULL;\r
-\r
- totscnt = 0;\r
- totccnt = 0;\r
- /*clear the scnt ccnt counters for each iteration*/\r
- for (index =0 ; index < user_param->numofqps ; index++) {\r
- ctx->scnt[index] = 0;\r
- ctx->ccnt[index] = 0;\r
- }\r
- index = 0;\r
- \r
- /* Done with setup. Start the test. \r
- warm up posting of total 100 wq's per qp \r
- 1 for each qp till all qps have 100 */\r
- for (warmindex =0 ;warmindex < user_param->maxpostsofqpiniteration ;warmindex ++ ) {\r
- for (index =0 ; index < user_param->numofqps ; index++) {\r
- ctx->wr.wr.rdma.remote_addr = rem_dest[index]->vaddr;\r
- ctx->wr.wr.rdma.rkey = rem_dest[index]->rkey;\r
- qp = ctx->qp[index];\r
- ctx->wr.wr_id = index;\r
- if (user_param->servername)\r
- tposted[totscnt] = get_cycles();\r
- if (ibv_post_send(qp, &ctx->wr, &bad_wr)) {\r
- fprintf(stderr, "Couldn't post warmup send: qp index = %d qp scnt=%d total scnt %d\n",\r
- index,ctx->scnt[index],totscnt);\r
- return 1;\r
- }\r
- ctx->scnt[index]= ctx->scnt[index]+1;\r
- ++totscnt;\r
- }\r
- } \r
-\r
- /* main loop for posting */\r
- while (totscnt < (user_param->iters * user_param->numofqps) ||\r
- totccnt < (user_param->iters * user_param->numofqps) ) {\r
- /* main loop to run over all the qps and post each time n messages */\r
- for (index =0 ; index < user_param->numofqps ; index++) {\r
- ctx->wr.wr.rdma.remote_addr = rem_dest[index]->vaddr;\r
- ctx->wr.wr.rdma.rkey = rem_dest[index]->rkey;\r
- qp = ctx->qp[index];\r
- ctx->wr.wr_id = index;\r
-\r
- while (ctx->scnt[index] < user_param->iters &&\r
- (ctx->scnt[index] - ctx->ccnt[index]) < user_param->maxpostsofqpiniteration) {\r
- if (user_param->servername)\r
- tposted[totscnt] = get_cycles();\r
- if (ibv_post_send(qp, &ctx->wr, &bad_wr)) {\r
- fprintf(stderr, "Couldn't post send: qp index = %d qp scnt=%d total scnt %d\n",\r
- index,ctx->scnt[index],totscnt);\r
- return 1;\r
- } \r
- ctx->scnt[index]= ctx->scnt[index]+1;\r
- ++totscnt;\r
- }\r
- }\r
-\r
- /* finished posting now polling */\r
- if (totccnt < (user_param->iters * user_param->numofqps) ) {\r
- int ne;\r
- do {\r
- ne = ibv_poll_cq(ctx->cq, 1, &wc);\r
- } while (ne == 0);\r
-\r
- if (user_param->servername)\r
- tcompleted[totccnt] = get_cycles();\r
-\r
- if (ne < 0) {\r
- fprintf(stderr, "poll CQ failed %d\n", ne);\r
- return 1;\r
- }\r
-\r
- if (wc.status != IBV_WC_SUCCESS) {\r
- fprintf(stderr, "Completion wth error at %s:\n",\r
- user_param->servername ? "client" : "server");\r
- fprintf(stderr, "Failed status %d: wr_id %d\n",\r
- wc.status, (int) wc.wr_id);\r
- fprintf(stderr, "qp index %d ,qp scnt=%d, qp ccnt=%d total scnt %d total ccnt %d\n",\r
- (int)wc.wr_id, ctx->scnt[(int)wc.wr_id], ctx->ccnt[(int)wc.wr_id], totscnt, totccnt);\r
- return 1;\r
- }\r
- /*here the id is the index to the qp num */\r
- ctx->ccnt[(int)wc.wr_id] = ctx->ccnt[(int)wc.wr_id]+1;\r
- totccnt += 1;\r
- }\r
- }\r
- return(0);\r
-}\r
-\r
-int __cdecl main(int argc, char *argv[])\r
-{\r
- struct ibv_device **dev_list;\r
- struct ibv_device *ib_dev;\r
- struct pingpong_context *ctx;\r
- struct pingpong_dest *my_dest;\r
- struct pingpong_dest **rem_dest;\r
- struct user_parameters user_param;\r
- struct ibv_device_attr device_attribute;\r
- char *ib_devname = NULL;\r
- int port = 18515;\r
- int ib_port = 1;\r
- int size = 65536;\r
- SOCKET sockfd;\r
- int duplex = 0;\r
- int i = 0;\r
- int noPeak = 0;/*noPeak == 0: regular peak-bw calculation done*/\r
- int inline_given_in_cmd = 0;\r
- struct ibv_context *context;\r
- WORD version;\r
- WSADATA data;\r
- int err;\r
-\r
- srand((unsigned int) time(NULL));\r
- version = MAKEWORD(2, 2);\r
- err = WSAStartup(version, &data);\r
- if (err)\r
- return -1;\r
-\r
- /* init default values to user's parameters */\r
- memset(&user_param, 0, sizeof(struct user_parameters));\r
- user_param.mtu = 0;\r
- user_param.iters = 5000;\r
- user_param.tx_depth = 100;\r
- user_param.servername = NULL;\r
- user_param.numofqps = 1;\r
- user_param.maxpostsofqpiniteration = 100;\r
- user_param.inline_size = MAX_INLINE;\r
-\r
- /* Parameter parsing. */\r
- while (1) {\r
- int c;\r
-\r
- c = getopt(argc, argv, "h:p:d:i:m:q:g:c:s:n:t:I:baVN");\r
- if (c == -1)\r
- break;\r
-\r
- switch (c) {\r
- case 'p':\r
- port = strtol(optarg, NULL, 0);\r
- if (port < 0 || port > 65535) {\r
- usage(argv[0]);\r
- return 1;\r
- }\r
- break;\r
-\r
- case 'd':\r
- ib_devname = _strdup(optarg);\r
- break;\r
- case 'c':\r
- if (strcmp("UC",optarg)==0)\r
- user_param.connection_type=UC;\r
- break;\r
-\r
- case 'm':\r
- user_param.mtu = strtol(optarg, NULL, 0);\r
- break;\r
- case 'q':\r
- user_param.numofqps = strtol(optarg, NULL, 0);\r
- break;\r
- case 'g':\r
- user_param.maxpostsofqpiniteration = strtol(optarg, NULL, 0);\r
- break;\r
- case 'a':\r
- user_param.all = ALL;\r
- break;\r
- case 'V':\r
- printf("rdma_bw version : %.2f\n",VERSION);\r
- return 0;\r
- case 'i':\r
- ib_port = strtol(optarg, NULL, 0);\r
- if (ib_port < 0) {\r
- usage(argv[0]);\r
- return 1;\r
- }\r
- break;\r
-\r
- case 's':\r
- size = strtol(optarg, NULL, 0);\r
- break;\r
-\r
- case 't':\r
- user_param.tx_depth = strtol(optarg, NULL, 0);\r
- if (user_param.tx_depth < 1) { usage(argv[0]); return 1; }\r
- break;\r
-\r
- case 'I':\r
- user_param.inline_size = strtol(optarg, NULL, 0);\r
- inline_given_in_cmd =1;\r
- if (user_param.inline_size > MAX_INLINE) {\r
- usage(argv[0]);\r
- return 7;\r
- }\r
- break;\r
-\r
- case 'n':\r
- user_param.iters = strtol(optarg, NULL, 0);\r
- if (user_param.iters < 2) {\r
- usage(argv[0]);\r
- return 1;\r
- }\r
- break;\r
-\r
- case 'b':\r
- duplex = 1;\r
- break;\r
-\r
- case 'N':\r
- noPeak = 1;\r
- break;\r
-\r
- case 'h':\r
- if (optarg) {\r
- user_param.servername = _strdup(optarg);\r
- break;\r
- }\r
-\r
- default:\r
- usage(argv[0]);\r
- return 1;\r
- }\r
- }\r
- \r
- printf("------------------------------------------------------------------\n");\r
- if (duplex == 1) {\r
- printf(" RDMA_Write Bidirectional BW Test\n");\r
- } else {\r
- printf(" RDMA_Write BW Test\n");\r
- }\r
- \r
- printf("Number of qp's running %d\n",user_param.numofqps);\r
- if (user_param.connection_type==RC) {\r
- printf("Connection type : RC\n");\r
- } else {\r
- printf("Connection type : UC\n");\r
- }\r
- if (user_param.maxpostsofqpiniteration > user_param.tx_depth ) {\r
- printf("Can not post more than tx_depth , adjusting number of post to tx_depth\n");\r
- user_param.maxpostsofqpiniteration = user_param.tx_depth;\r
- }\r
- if (user_param.maxpostsofqpiniteration > user_param.iters ) {\r
- printf("Can not post more than iterations per qp , adjusting max number of post to num of iteration\n");\r
- user_param.maxpostsofqpiniteration = user_param.iters;\r
- } \r
- printf("Each Qp will post up to %d messages each time\n",user_param.maxpostsofqpiniteration);\r
- /* Done with parameter parsing. Perform setup. */\r
- if (user_param.all == ALL) {\r
- /*since we run all sizes */\r
- size = 8388608; /*2^23 */\r
- }\r
-\r
- dev_list = ibv_get_device_list(NULL);\r
-\r
- if (!ib_devname) {\r
- ib_dev = dev_list[0];\r
- if (!ib_dev) {\r
- fprintf(stderr, "No IB devices found\n");\r
- return 1;\r
- }\r
- } else {\r
- for (; (ib_dev = *dev_list); ++dev_list)\r
- if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))\r
- break;\r
- if (!ib_dev) {\r
- fprintf(stderr, "IB device %s not found\n", ib_devname);\r
- return 1;\r
- }\r
- }\r
-\r
- context = ibv_open_device(ib_dev);\r
- if (ibv_query_device(context, &device_attribute)) {\r
- fprintf(stderr, "Failed to query device props");\r
- return 1;\r
- }\r
- if ((device_attribute.vendor_part_id == 25418) && (!inline_given_in_cmd)) {\r
- user_param.inline_size = 1;\r
- }\r
- printf("Inline data is used up to %d bytes message\n", user_param.inline_size);\r
-\r
- ctx = pp_init_ctx(ib_dev, size, user_param.tx_depth, ib_port, &user_param);\r
- if (!ctx)\r
- return 1;\r
-\r
- \r
- if (user_param.servername) {\r
- sockfd = pp_client_connect(user_param.servername, port);\r
- if (sockfd == INVALID_SOCKET)\r
- return 1;\r
- } else {\r
- sockfd = pp_server_connect(port);\r
- if (sockfd == INVALID_SOCKET)\r
- return 1;\r
- }\r
- \r
- my_dest = malloc(user_param.numofqps * sizeof *my_dest);\r
- if (!my_dest) {\r
- perror("malloc my_dest");\r
- return 1;\r
- }\r
- rem_dest = malloc(sizeof (struct pingpong_dest*) * user_param.numofqps );\r
- if (!rem_dest ) {\r
- perror("malloc rem_dest");\r
- return 1;\r
- }\r
- \r
- for (i =0 ;i<user_param.numofqps;i ++) {\r
- /* Create connection between client and server.\r
- * We do it by exchanging data over a TCP socket connection. */\r
- my_dest[i].lid = pp_get_local_lid(ctx, ib_port);\r
- my_dest[i].psn = rand() & 0xffffff;\r
- if (!my_dest[i].lid) {\r
- fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n");\r
- return 1;\r
- }\r
- my_dest[i].qpn = ctx->qp[i]->qp_num;\r
- /* TBD this should be changed inot VA and different key to each qp */\r
- my_dest[i].rkey = ctx->mr->rkey;\r
- my_dest[i].vaddr = (uintptr_t)ctx->buf + ctx->size;\r
- \r
- printf(" local address: LID %#04x, QPN %#06x, PSN %#06x "\r
- "RKey %#08x VAddr %#016Lx\n",\r
- my_dest[i].lid, my_dest[i].qpn, my_dest[i].psn,\r
- my_dest[i].rkey, my_dest[i].vaddr);\r
- if (user_param.servername) {\r
- rem_dest[i] = pp_client_exch_dest(sockfd, &my_dest[i]);\r
- } else {\r
- rem_dest[i] = pp_server_exch_dest(sockfd, &my_dest[i]);\r
- }\r
- if (!rem_dest[i])\r
- return 1;\r
- printf(" remote address: LID %#04x, QPN %#06x, PSN %#06x, "\r
- "RKey %#08x VAddr %#016Lx\n",\r
- rem_dest[i]->lid, rem_dest[i]->qpn, rem_dest[i]->psn,\r
- rem_dest[i]->rkey, rem_dest[i]->vaddr);\r
- if (pp_connect_ctx(ctx, ib_port, my_dest[i].psn, rem_dest[i], &user_param, i))\r
- return 1;\r
- \r
- /* An additional handshake is required *after* moving qp to RTR.\r
- Arbitrarily reuse exch_dest for this purpose. */\r
- if (user_param.servername) {\r
- rem_dest[i] = pp_client_exch_dest(sockfd, &my_dest[i]);\r
- } else {\r
- rem_dest[i] = pp_server_exch_dest(sockfd, &my_dest[i]);\r
- } \r
- }\r
- \r
- printf("------------------------------------------------------------------\n");\r
- printf(" #bytes #iterations BW peak[MB/sec] BW average[MB/sec] \n");\r
- /* For half duplex tests, server just waits for client to exit */\r
- /* the 0th place is arbitrary to signal finish ... */\r
- if (!user_param.servername && !duplex) {\r
- rem_dest[0] = pp_server_exch_dest(sockfd, &my_dest[0]);\r
- if (send(sockfd, "done", sizeof "done", 0) != sizeof "done"){\r
- perror("server write");\r
- fprintf(stderr, "Couldn't write to socket\n");\r
- return 1;\r
- }\r
- closesocket(sockfd);\r
- return 0;\r
- }\r
-\r
- tposted = malloc(user_param.iters * user_param.numofqps * sizeof *tposted);\r
-\r
- if (!tposted) {\r
- perror("malloc");\r
- return 1;\r
- }\r
-\r
- tcompleted = malloc(user_param.iters * user_param.numofqps * sizeof *tcompleted);\r
-\r
- if (!tcompleted) {\r
- perror("malloc");\r
- return 1;\r
- }\r
-\r
- if (user_param.all == ALL) {\r
- for (i = 1; i < 24 ; ++i) {\r
- size = 1 << i;\r
- if(run_iter(ctx, &user_param, rem_dest, size))\r
- return 17;\r
- print_report(user_param.iters, size, duplex, tposted, tcompleted, &user_param, noPeak);\r
- }\r
- } else {\r
- if(run_iter(ctx, &user_param, rem_dest, size))\r
- return 18;\r
- print_report(user_param.iters, size, duplex, tposted, tcompleted, &user_param, noPeak);\r
- }\r
- /* the 0th place is arbitrary to signal finish ... */\r
- if (user_param.servername) {\r
- rem_dest[0] = pp_client_exch_dest(sockfd, &my_dest[0]);\r
- } else {\r
- rem_dest[0] = pp_server_exch_dest(sockfd, &my_dest[0]);\r
- }\r
-\r
- if (send(sockfd, "done", sizeof "done", 0) != sizeof "done"){\r
- perror("write");\r
- fprintf(stderr, "Couldn't write to socket\n");\r
- return 1;\r
- }\r
- closesocket(sockfd);\r
-\r
- free(tposted);\r
- free(tcompleted);\r
-\r
- printf("------------------------------------------------------------------\n");\r
- return 0;\r
-}\r
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2006 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2008-2009 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under the OpenIB.org BSD license
+ * below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <malloc.h>
+#include <getopt.h>
+#include <time.h>
+#include <infiniband/verbs.h>
+#include <windows.h>
+
+#include <ws2tcpip.h>
+#include <winsock2.h>
+
+#include "get_clock.h"
+#include "perftest_resources.h"
+#include "l2w.h"
+#include "..\..\etc\user\getopt.c"
+
+
+#define VERSION 2.1
+#define MAX_INLINE 400
+
+static uint8_t sl = 0;
+static int page_size;
+
+cycles_t *tposted;
+cycles_t *tcompleted;
+
+struct pingpong_context {
+ struct ibv_context *context;
+ struct ibv_pd *pd;
+ struct ibv_mr *mr;
+ struct ibv_cq *cq;
+ struct ibv_qp **qp;
+ void *buf;
+ unsigned size;
+ int tx_depth;
+ int *scnt;
+ int *ccnt;
+};
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+static int set_up_connection(struct pingpong_context *ctx,
+ struct perftest_parameters *user_parm,
+ struct pingpong_dest *my_dest) {
+
+ int i;
+ union ibv_gid temp_gid;
+
+ if (user_parm->gid_index != -1) {
+ if (ibv_query_gid(ctx->context,user_parm->ib_port,user_parm->gid_index,&temp_gid)) {
+ return -1;
+ }
+ }
+
+ for (i=0; i < user_parm->num_of_qps; i++) {
+ my_dest[i].lid = ctx_get_local_lid(ctx->context,user_parm->ib_port);
+ my_dest[i].qpn = ctx->qp[i]->qp_num;
+ my_dest[i].psn = rand() & 0xffffff;
+ my_dest[i].rkey = ctx->mr->rkey;
+ // Each qp gives his receive buffer address .
+ my_dest[i].vaddr = (uintptr_t)ctx->buf + (user_parm->num_of_qps + i)*BUFF_SIZE(ctx->size);
+ memcpy(my_dest[i].gid.raw,temp_gid.raw ,16);
+
+ // We do not fail test upon lid above RoCE.
+ if (user_parm->gid_index < 0) {
+ if (!my_dest[i].lid) {
+ fprintf(stderr,"Local lid 0x0 detected. Is an SM running? \n");
+ return -1;
+ }
+ }
+ }
+ return 0;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+static int init_connection(struct perftest_parameters *params,
+ struct pingpong_dest *my_dest,
+ const char *servername) {
+
+ int i;
+
+ params->side = LOCAL;
+
+ for (i=0; i < params->num_of_qps; i++) {
+ ctx_print_pingpong_data(&my_dest[i],params);
+ }
+
+ if (servername)
+ params->sockfd = ctx_client_connect(servername,params->port);
+ else
+ params->sockfd = ctx_server_connect(params->port);
+
+ if(params->sockfd == INVALID_SOCKET) {
+ fprintf(stderr,"Unable to open file descriptor for socket connection");
+ return 1;
+ }
+ return 0;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+static int destroy_ctx_resources(struct pingpong_context *ctx,int num_qps) {
+
+ int i;
+ int test_result = 0;
+
+ for (i = 0; i < num_qps; i++) {
+ if (ibv_destroy_qp(ctx->qp[i])) {
+ fprintf(stderr, "failed to destroy QP\n");
+ test_result = 1;
+ }
+ }
+
+ if (ibv_destroy_cq(ctx->cq)) {
+ fprintf(stderr, "failed to destroy CQ\n");
+ test_result = 1;
+ }
+
+ if (ibv_dereg_mr(ctx->mr)) {
+ fprintf(stderr, "failed to deregister MR\n");
+ test_result = 1;
+ }
+
+ if (ibv_dealloc_pd(ctx->pd)) {
+ fprintf(stderr, "failed to deallocate PD\n");
+ test_result = 1;
+ }
+
+ if (ibv_close_device(ctx->context)) {
+ fprintf(stderr, "failed to close device context\n");
+ test_result = 1;
+ }
+
+ posix_memfree(ctx->buf);
+ free(ctx->qp);
+ free(ctx->scnt);
+ free(ctx->ccnt);
+ free(ctx);
+
+ return test_result;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev,unsigned size,
+ struct perftest_parameters *user_parm)
+{
+ struct pingpong_context *ctx;
+ struct ibv_device_attr device_attr;
+ int counter;
+
+ ALLOCATE(ctx,struct pingpong_context,1);
+ ALLOCATE(ctx->qp,struct ibv_qp*,user_parm->num_of_qps);
+ ALLOCATE(ctx->scnt,int,user_parm->num_of_qps);
+ ALLOCATE(ctx->ccnt,int,user_parm->num_of_qps);
+
+ memset(ctx->scnt, 0, user_parm->num_of_qps * sizeof (int));
+ memset(ctx->ccnt, 0, user_parm->num_of_qps * sizeof (int));
+
+ ctx->size = size;
+ ctx->tx_depth = user_parm->tx_depth;
+
+ // We allocate the buffer in BUFF_SIZE size to support max performance in
+ // "Nahalem" systems , as described in BUFF_SIZE macro in perftest_resources.h
+ posix_memalign(&(ctx->buf),page_size, BUFF_SIZE(size) * 2 * user_parm->num_of_qps);
+ if (!ctx->buf) {
+ fprintf(stderr, "Couldn't allocate work buf.\n");
+ return NULL;
+ }
+
+ memset(ctx->buf, 0, BUFF_SIZE(size) * 2 * user_parm->num_of_qps);
+
+ ctx->context = ibv_open_device(ib_dev);
+ if (!ctx->context) {
+ fprintf(stderr, "Couldn't get context for %s\n",
+ ibv_get_device_name(ib_dev));
+ return NULL;
+ }
+
+ // Finds the link type and configure the HCA accordingly.
+ if (ctx_set_link_layer(ctx->context,user_parm)) {
+ fprintf(stderr, "Couldn't set the link layer\n");
+ return NULL;
+ }
+
+ if (user_parm->mtu == 0) {/*user did not ask for specific mtu */
+ if (ibv_query_device(ctx->context, &device_attr)) {
+ fprintf(stderr, "Failed to query device props");
+ return NULL;
+ }
+ if (device_attr.vendor_part_id == 23108 || user_parm->gid_index != -1) {
+ user_parm->mtu = 1024;
+ } else {
+ user_parm->mtu = 2048;
+ }
+ }
+
+ ctx->pd = ibv_alloc_pd(ctx->context);
+ if (!ctx->pd) {
+ fprintf(stderr, "Couldn't allocate PD\n");
+ return NULL;
+ }
+
+ // We dont really want IBV_ACCESS_LOCAL_WRITE, but IB spec says:
+ // The Consumer is not allowed to assign Remote Write or Remote Atomic to
+ // a Memory Region that has not been assigned Local Write.
+ ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, BUFF_SIZE(size) * 2 * user_parm->num_of_qps,IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);
+ if (!ctx->mr) {
+ fprintf(stderr, "Couldn't allocate MR\n");
+ return NULL;
+ }
+
+ // Creates the CQ according to ctx_cq_create in perfetst_resources.
+ ctx->cq = ctx_cq_create(ctx->context,NULL,user_parm);
+ if (!ctx->cq) {
+ fprintf(stderr, "Couldn't create CQ\n");
+ return NULL;
+ }
+
+
+ for (counter = 0 ; counter < user_parm->num_of_qps ; counter++) {
+
+ ctx->qp[counter] = ctx_qp_create(ctx->pd,ctx->cq,ctx->cq,user_parm);
+ if (!ctx->qp[counter]) {
+ fprintf(stderr, "Couldn't create QP\n");
+ return NULL;
+ }
+
+ if (ctx_modify_qp_to_init(ctx->qp[counter],user_parm)) {
+ fprintf(stderr, "Failed to modify QP to INIT\n");
+ return NULL;
+ }
+ }
+ return ctx;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+static int pp_connect_ctx(struct pingpong_context *ctx,int my_psn,
+ struct pingpong_dest *dest,
+ struct perftest_parameters *user_parm, int qpindex)
+{
+ struct ibv_qp_attr attr;
+ memset(&attr, 0, sizeof attr);
+
+ attr.qp_state = IBV_QPS_RTR;
+ switch (user_parm->mtu) {
+ case 256 :
+ attr.path_mtu = IBV_MTU_256;
+ break;
+ case 512 :
+ attr.path_mtu = IBV_MTU_512;
+ break;
+ case 1024 :
+ attr.path_mtu = IBV_MTU_1024;
+ break;
+ case 2048 :
+ attr.path_mtu = IBV_MTU_2048;
+ break;
+ case 4096 :
+ attr.path_mtu = IBV_MTU_4096;
+ break;
+ }
+ attr.dest_qp_num = dest->qpn;
+ attr.rq_psn = dest->psn;
+ attr.ah_attr.dlid = dest->lid;
+ if (user_parm->connection_type==RC) {
+ attr.max_dest_rd_atomic = 1;
+ attr.min_rnr_timer = 12;
+ }
+ if (user_parm->gid_index<0) {
+ attr.ah_attr.is_global = 0;
+ attr.ah_attr.sl = sl;
+ } else {
+ attr.ah_attr.is_global = 1;
+ attr.ah_attr.grh.dgid = dest->gid;
+ attr.ah_attr.grh.sgid_index = (uint8_t)user_parm->gid_index;
+ attr.ah_attr.grh.hop_limit = 1;
+ attr.ah_attr.sl = 0;
+ }
+ attr.ah_attr.src_path_bits = 0;
+ attr.ah_attr.port_num = user_parm->ib_port;
+ if (user_parm->connection_type == RC) {
+ if (ibv_modify_qp(ctx->qp[qpindex], &attr,
+ IBV_QP_STATE |
+ IBV_QP_AV |
+ IBV_QP_PATH_MTU |
+ IBV_QP_DEST_QPN |
+ IBV_QP_RQ_PSN |
+ IBV_QP_MIN_RNR_TIMER |
+ IBV_QP_MAX_DEST_RD_ATOMIC)) {
+ fprintf(stderr, "Failed to modify RC QP to RTR\n");
+ return 1;
+ }
+ attr.timeout = user_parm->qp_timeout;
+ attr.retry_cnt = 7;
+ attr.rnr_retry = 7;
+ } else {
+ if (ibv_modify_qp(ctx->qp[qpindex], &attr,
+ IBV_QP_STATE |
+ IBV_QP_AV |
+ IBV_QP_PATH_MTU |
+ IBV_QP_DEST_QPN |
+ IBV_QP_RQ_PSN)) {
+ fprintf(stderr, "Failed to modify UC QP to RTR\n");
+ return 1;
+ }
+
+ }
+ attr.qp_state = IBV_QPS_RTS;
+ attr.sq_psn = my_psn;
+ attr.max_rd_atomic = 1;
+ if (user_parm->connection_type == 0) {
+ attr.max_rd_atomic = 1;
+ if (ibv_modify_qp(ctx->qp[qpindex], &attr,
+ IBV_QP_STATE |
+ IBV_QP_SQ_PSN |
+ IBV_QP_TIMEOUT |
+ IBV_QP_RETRY_CNT |
+ IBV_QP_RNR_RETRY |
+ IBV_QP_MAX_QP_RD_ATOMIC)) {
+ fprintf(stderr, "Failed to modify RC QP to RTS\n");
+ return 1;
+ }
+ } else {
+ if (ibv_modify_qp(ctx->qp[qpindex], &attr,
+ IBV_QP_STATE |
+ IBV_QP_SQ_PSN)) {
+ fprintf(stderr, "Failed to modify UC QP to RTS\n");
+ return 1;
+ }
+
+ }
+ return 0;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+static void usage(const char *argv0)
+{
+ printf("Usage:\n");
+ printf(" %s start a server and wait for connection\n", argv0);
+ printf(" %s <host> connect to server at <host>\n", argv0);
+ printf("\n");
+ printf("Options:\n");
+ printf(" -p, --port=<port> listen on/connect to port <port> (default 18515)\n");
+ printf(" -d, --ib-dev=<dev> use IB device <dev> (default first device found)\n");
+ printf(" -i, --ib-port=<port> use port <port> of IB device (default 1)\n");
+ printf(" -c, --connection=<RC/UC> connection type RC/UC (default RC)\n");
+ printf(" -m, --mtu=<mtu> mtu size (256 - 4096. default for hermon is 2048)\n");
+ printf(" -g, --post=<num of posts> number of posts for each qp in the chain (default tx_depth)\n");
+ printf(" -q, --qp=<num of qp's> Num of qp's(default 1)\n");
+ printf(" -s, --size=<size> size of message to exchange (default 65536)\n");
+ printf(" -a, --all Run sizes from 2 till 2^23\n");
+ printf(" -t, --tx-depth=<dep> size of tx queue (default 100)\n");
+ printf(" -n, --iters=<iters> number of exchanges (at least 2, default 5000)\n");
+ printf(" -I, --inline_size=<size> max size of message to be sent in inline mode (default 400)\n");
+ printf(" -u, --qp-timeout=<timeout> QP timeout, timeout value is 4 usec * 2 ^(timeout), default 14\n");
+ printf(" -S, --sl=<sl> SL (default 0)\n");
+ printf(" -x, --gid-index=<index> test uses GID with GID index taken from command line (for RDMAoE index should be 0)\n");
+ printf(" -b, --bidirectional measure bidirectional bandwidth (default unidirectional)\n");
+ printf(" -V, --version display version number\n");
+ printf(" -N, --no peak-bw cancel peak-bw calculation (default with peak-bw)\n");
+ printf(" -F, --CPU-freq do not fail even if cpufreq_ondemand module is loaded\n");
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+static void print_report(unsigned size, int duplex,cycles_t *tposted, cycles_t *tcompleted,
+ struct perftest_parameters *user_param,int noPeak, int no_cpu_freq_fail) {
+
+ double cycles_to_units;
+ unsigned long tsize; /* Transferred size, in megabytes */
+ int i, j;
+ int opt_posted = 0, opt_completed = 0;
+ cycles_t opt_delta;
+ cycles_t t;
+ int iters = user_param->iters;
+
+
+ opt_delta = tcompleted[opt_posted] - tposted[opt_completed];
+
+ if (!noPeak) {
+ /* Find the peak bandwidth unless asked not to in command line*/
+ for (i = 0; i < iters * user_param->num_of_qps; ++i)
+ for (j = i; j < iters * user_param->num_of_qps; ++j) {
+ t = (tcompleted[j] - tposted[i]) / (j - i + 1);
+ if (t < opt_delta) {
+ opt_delta = t;
+ opt_posted = i;
+ opt_completed = j;
+ }
+ }
+ }
+
+ cycles_to_units = get_cpu_mhz();
+ tsize = duplex ? 2 : 1;
+ tsize = tsize * size;
+ printf(REPORT_FMT,size,iters,!(noPeak) * tsize * cycles_to_units / opt_delta / 0x100000,
+ (uint64_t)tsize*iters*user_param->num_of_qps*cycles_to_units/(tcompleted[(iters*user_param->num_of_qps) - 1] - tposted[0]) / 0x100000);
+
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+int run_iter(struct pingpong_context *ctx, struct perftest_parameters *user_param,
+ struct pingpong_dest *rem_dest, int size,int maxpostsofqpiniteration)
+{
+
+ int totscnt = 0;
+ int totccnt = 0;
+ int i = 0;
+ int index,ne;
+ int warmindex;
+ struct ibv_send_wr *bad_wr;
+ struct ibv_wc *wc = NULL;
+ struct ibv_sge *sge_list = NULL;
+ struct ibv_send_wr *wr = NULL;
+ uint64_t *my_addr = NULL;
+ uint64_t *rem_addr = NULL;
+
+ ALLOCATE(wr ,struct ibv_send_wr , user_param->num_of_qps);
+ ALLOCATE(sge_list ,struct ibv_sge , user_param->num_of_qps);
+ ALLOCATE(my_addr ,uint64_t ,user_param->num_of_qps);
+ ALLOCATE(rem_addr ,uint64_t ,user_param->num_of_qps);
+ ALLOCATE(wc ,struct ibv_wc , DEF_WC_SIZE);
+
+
+ // Each QP has its own wr and sge , that holds the qp addresses and attr.
+ // We write in cycles on the buffer to exploid the "Nahalem" system.
+ for (index = 0 ; index < user_param->num_of_qps ; index++) {
+
+ sge_list[index].addr = (uintptr_t)ctx->buf + (index*BUFF_SIZE(ctx->size));
+ sge_list[index].length = size;
+ sge_list[index].lkey = ctx->mr->lkey;
+
+ wr[index].sg_list = &sge_list[index];
+ wr[index].num_sge = MAX_SEND_SGE;
+ wr[index].opcode = IBV_WR_RDMA_WRITE;
+ wr[index].next = NULL;
+ wr[index].wr.rdma.remote_addr = rem_dest[index].vaddr;
+ wr[index].wr.rdma.rkey = rem_dest[index].rkey;
+ wr[index].wr_id = index;
+ wr[index].send_flags = IBV_SEND_SIGNALED;
+ if (size <= user_param->inline_size)
+ wr[index].send_flags |= IBV_SEND_INLINE;
+
+ ctx->scnt[index] = 0;
+ ctx->ccnt[index] = 0;
+ my_addr[index] = sge_list[index].addr;
+ rem_addr[index] = wr[index].wr.rdma.remote_addr;
+
+ }
+
+ // Done with setup. Start the test. warm up posting of total 100 wq's per
+ // qp 1 for each qp till all qps have 100.
+ for (warmindex = 0 ;warmindex < maxpostsofqpiniteration ;warmindex ++ ) {
+ for (index =0 ; index < user_param->num_of_qps ; index++) {
+
+ tposted[totscnt] = get_cycles();
+ if (ibv_post_send(ctx->qp[index],&wr[index],&bad_wr)) {
+ fprintf(stderr,"Couldn't post send: qp %d scnt=%d \n",index,ctx->scnt[index]);
+ return 1;
+ }
+ // If we can increase the remote address , so the next write will be to other address ,
+ // We do it.
+ if (size <= (CYCLE_BUFFER / 2)) {
+ increase_rem_addr(&wr[index],size,ctx->scnt[index],rem_addr[index]);
+ increase_loc_addr(wr[index].sg_list,size,ctx->scnt[index],my_addr[index]);
+ }
+
+ ctx->scnt[index]++;
+ totscnt++;
+
+ }
+ }
+
+ // main loop for posting
+ while (totscnt < (user_param->iters * user_param->num_of_qps) || totccnt < (user_param->iters * user_param->num_of_qps) ) {
+
+ // main loop to run over all the qps and post each time n messages
+ for (index =0 ; index < user_param->num_of_qps ; index++) {
+
+ while (ctx->scnt[index] < user_param->iters && (ctx->scnt[index] - ctx->ccnt[index]) < maxpostsofqpiniteration) {
+
+ tposted[totscnt] = get_cycles();
+ if (ibv_post_send(ctx->qp[index],&wr[index],&bad_wr)) {
+ fprintf(stderr,"Couldn't post send: qp %d scnt=%d \n",index,ctx->scnt[index]);
+ return 1;
+ }
+
+ if (size <= (CYCLE_BUFFER / 2)) {
+ increase_rem_addr(&wr[index],size,ctx->scnt[index],rem_addr[index]);
+ increase_loc_addr(wr[index].sg_list,size,ctx->scnt[index],my_addr[index]);
+ }
+
+ ctx->scnt[index] = ctx->scnt[index]+1;
+ totscnt++;
+ }
+ }
+
+ // finished posting now polling
+ if (totccnt < (user_param->iters * user_param->num_of_qps) ) {
+
+ do {
+ ne = ibv_poll_cq(ctx->cq, DEF_WC_SIZE, wc);
+ if (ne > 0) {
+ for (i = 0; i < ne; i++) {
+
+ if (wc[i].status != IBV_WC_SUCCESS)
+ NOTIFY_COMP_ERROR_SEND(wc[i],totscnt,totccnt);
+
+ ctx->ccnt[(int)wc[i].wr_id]++;
+ tcompleted[totccnt++] = get_cycles();
+ }
+ }
+ } while (ne > 0);
+
+ if (ne < 0) {
+ fprintf(stderr, "poll CQ failed %d\n", ne);
+ return 1;
+ }
+ }
+ }
+
+ free(wr);
+ free(sge_list);
+ free(my_addr);
+ free(rem_addr);
+ free(wc);
+ return 0;
+}
+
+/******************************************************************************
+ *
+ ******************************************************************************/
+int __cdecl main(int argc, char *argv[])
+{
+ struct ibv_device **dev_list;
+ struct ibv_device *ib_dev;
+ struct pingpong_context *ctx;
+ struct pingpong_dest *my_dest,*rem_dest;
+ struct perftest_parameters user_param;
+ struct ibv_device_attr device_attribute;
+ unsigned size = 65536;
+ int i = 0;
+ int noPeak = 0;
+ int inline_given_in_cmd = 0;
+ struct ibv_context *context;
+ int no_cpu_freq_fail = 0;
+
+ int all = 0;
+ char *ib_devname = NULL;
+ const char *servername = NULL;
+ int maxpostsofqpiniteration = 100;
+
+ SYSTEM_INFO si;
+ GetSystemInfo(&si);
+
+ /* init default values to user's parameters */
+ memset(&user_param,0,sizeof(struct perftest_parameters));
+
+ user_param.mtu = 0;
+ user_param.port = 18515;
+ user_param.ib_port = 1;
+ user_param.tx_depth = 100;
+ user_param.rx_depth = 1;
+ user_param.num_of_qps = 1;
+ user_param.inline_size = 0;
+ user_param.qp_timeout = 14;
+ user_param.gid_index = -1;
+ user_param.iters = 5000;
+ user_param.verb = WRITE;
+ /* Parameter parsing. */
+ while (1) {
+ int c;
+
+ static struct option long_options[] = {
+ { "port", 1, NULL, 'p' },
+ { "ib-dev", 1, NULL, 'd' },
+ { "ib-port", 1, NULL, 'i' },
+ { "mtu", 1, NULL, 'm' },
+ { "qp", 1, NULL, 'q' },
+ { "post", 1, NULL, 'g' },
+ { "connection", 1, NULL, 'c' },
+ { "size", 1, NULL, 's' },
+ { "iters", 1, NULL, 'n' },
+ { "tx-depth", 1, NULL, 't' },
+ { "inline_size", 1, NULL, 'I' },
+ { "qp-timeout", 1, NULL, 'u' },
+ { "sl", 1, NULL, 'S' },
+ { "gid-index", 1, NULL, 'x' },
+ { "all", 0, NULL, 'a' },
+ { "bidirectional", 0, NULL, 'b' },
+ { "version", 0, NULL, 'V' },
+ { "noPeak", 0, NULL, 'N' },
+ { "CPU-freq", 0, NULL, 'F' },
+ { 0 }
+ };
+
+ c = getopt_long(argc, argv, "p:d:i:m:q:g:c:s:n:t:I:u:S:x:baVNF", long_options, NULL);
+ if (c == -1)
+ break;
+
+ if (c == EINVAL)
+ {
+ usage(argv[0]);
+ return 7;
+ }
+
+ switch (c) {
+ case 'p':
+ user_param.port = strtol(optarg, NULL, 0);
+ if (user_param.port < 0 || user_param.port > 65535) {
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+
+ case 'd':
+ ib_devname = _strdup(optarg);
+ break;
+ case 'c':
+ if (strcmp("UC",optarg)==0)
+ user_param.connection_type=UC;
+ break;
+
+ case 'm':
+ user_param.mtu = strtol(optarg, NULL, 0);
+ break;
+ case 'q':
+ user_param.num_of_qps = strtol(optarg, NULL, 0);
+ break;
+ case 'g':
+ maxpostsofqpiniteration = strtol(optarg, NULL, 0);
+ break;
+ case 'a':
+ all = ALL;
+ break;
+ case 'V':
+ printf("rdma_bw version : %.2f\n",VERSION);
+ return 0;
+ break;
+ case 'i':
+ user_param.ib_port = (uint8_t)(strtol(optarg, NULL, 0));
+ if (user_param.ib_port < 0) {
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+
+ case 's':
+ size = strtol(optarg, NULL, 0);
+ if (size < 1 || size > UINT_MAX / 2) {
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+
+ case 't':
+ user_param.tx_depth = strtol(optarg, NULL, 0);
+ if (user_param.tx_depth < 1) { usage(argv[0]); return 1; }
+ break;
+
+ case 'I':
+ user_param.inline_size = strtol(optarg, NULL, 0);
+ inline_given_in_cmd =1;
+ if (user_param.inline_size > MAX_INLINE) {
+ usage(argv[0]);
+ return 7;
+ }
+ break;
+
+ case 'n':
+ user_param.iters = strtol(optarg, NULL, 0);
+ if (user_param.iters < 2) {
+ usage(argv[0]);
+ return 1;
+ }
+
+ break;
+
+ case 'b':
+ user_param.duplex = 1;
+ break;
+
+ case 'N':
+ noPeak = 1;
+ break;
+
+ case 'F':
+ no_cpu_freq_fail = 1;
+ break;
+
+ case 'u':
+ user_param.qp_timeout = (uint8_t)(strtol(optarg, NULL, 0));
+ break;
+
+ case 'S':
+ sl = (uint8_t)(strtol(optarg, NULL, 0));
+ if (sl > 15) { usage(argv[0]); return 1; }
+ break;
+
+ case 'x':
+ user_param.gid_index = (uint8_t)(strtol(optarg, NULL, 0));
+ if (user_param.gid_index > 63) {
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+
+ default:
+ usage(argv[0]);
+ return 1;
+ }
+ }
+
+ if (optind == argc - 1)
+ servername = _strdup(argv[optind]);
+ else if (optind < argc) {
+ usage(argv[0]);
+ return 1;
+ }
+
+ printf(RESULT_LINE);
+ user_param.machine = servername ? CLIENT : SERVER;
+
+ if (user_param.duplex == 1) {
+ printf(" RDMA_Write Bidirectional BW Test\n");
+ } else {
+ printf(" RDMA_Write BW Test\n");
+ }
+
+ printf(" Number of qp's running %d\n",user_param.num_of_qps);
+ if (user_param.connection_type==RC) {
+ printf(" Connection type : RC\n");
+ } else {
+ printf(" Connection type : UC\n");
+ }
+ if (maxpostsofqpiniteration > user_param.tx_depth ) {
+ printf(" Can not post more than tx_depth , adjusting number of post to tx_depth\n");
+ maxpostsofqpiniteration = user_param.tx_depth;
+ }
+ if (maxpostsofqpiniteration > user_param.iters ) {
+ printf(" Can not post more than iterations per qp , adjusting max number of post to num of iteration\n");
+ maxpostsofqpiniteration = user_param.iters;
+ }
+ printf(" Each Qp will post up to %d messages each time\n",maxpostsofqpiniteration);
+
+ /* Done with parameter parsing. Perform setup. */
+ if (all == ALL) {
+ /*since we run all sizes */
+ size = 8388608; /*2^23 */
+ }
+
+ page_size = si.dwPageSize;
+
+ dev_list = ibv_get_device_list(NULL);
+
+ if (!ib_devname) {
+ ib_dev = dev_list[0];
+ if (!ib_dev) {
+ fprintf(stderr, " No IB devices found\n");
+ return 1;
+ }
+ } else {
+ for (; (ib_dev = *dev_list); ++dev_list)
+ if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
+ break;
+ if (!ib_dev) {
+ fprintf(stderr, " IB device %s not found\n", ib_devname);
+ return 1;
+ }
+ }
+
+ context = ibv_open_device(ib_dev);
+ if (ibv_query_device(context, &device_attribute)) {
+ fprintf(stderr, " Failed to query device props");
+ return 1;
+ }
+ if ((device_attribute.vendor_part_id == 25408 ||
+ device_attribute.vendor_part_id == 25418 ||
+ device_attribute.vendor_part_id == 26408 ||
+ device_attribute.vendor_part_id == 26418 ||
+ device_attribute.vendor_part_id == 26428) && (!inline_given_in_cmd)) {
+ user_param.inline_size = 0;
+ }
+ printf(" Inline data is used up to %d bytes message\n", user_param.inline_size);
+
+ ctx = pp_init_ctx(ib_dev,size,&user_param);
+ if (!ctx)
+ return 1;
+
+ ALLOCATE(my_dest,struct pingpong_dest,user_param.num_of_qps);
+ ALLOCATE(rem_dest,struct pingpong_dest,user_param.num_of_qps);
+
+ // Set up the Connection.
+ if (set_up_connection(ctx,&user_param,my_dest)) {
+ fprintf(stderr," Unable to set up socket connection\n");
+ return 1;
+ }
+
+ // Init the connection and print the local data.
+ if (init_connection(&user_param,my_dest,servername)) {
+ fprintf(stderr," Unable to init the socket connection\n");
+ return 1;
+ }
+
+ // shaking hands and gather the other side info.
+ user_param.side = REMOTE;
+ for (i=0; i < user_param.num_of_qps; i++) {
+ if (ctx_hand_shake(&user_param,&my_dest[i],&rem_dest[i])) {
+ fprintf(stderr," Failed to exchange date between server and clients\n");
+ return 1;
+ }
+ ctx_print_pingpong_data(&rem_dest[i],&user_param);
+
+ if (pp_connect_ctx(ctx,my_dest[i].psn,&rem_dest[i],&user_param,i)) {
+ fprintf(stderr," Unable to Connect the HCA's through the link\n");
+ return 1;
+ }
+
+ // An additional handshake is required after moving qp to RTR.
+ if (ctx_hand_shake(&user_param,&my_dest[i],&rem_dest[i])) {
+ fprintf(stderr," Failed to exchange date between server and clients\n");
+ return 1;
+ }
+ }
+ printf(" Mtu : %d\n", user_param.mtu);
+ printf(RESULT_LINE);
+ printf(RESULT_FMT);
+
+ // For half duplex tests, server just waits for client to exit
+ if (user_param.machine == SERVER && !user_param.duplex) {
+ if (ctx_close_connection(&user_param,&my_dest[0],&rem_dest[0])) {
+ fprintf(stderr,"Failed to close connection between server and client\n");
+ return 1;
+ }
+ printf(RESULT_LINE);
+ return destroy_ctx_resources(ctx,user_param.num_of_qps);;
+ }
+
+ ALLOCATE(tposted,cycles_t,user_param.iters*user_param.num_of_qps);
+ ALLOCATE(tcompleted,cycles_t,user_param.iters*user_param.num_of_qps);
+
+ if (all == ALL) {
+
+ for (i = 1; i < 24 ; ++i) {
+ size = 1 << i;
+ if(run_iter(ctx,&user_param,rem_dest,size,maxpostsofqpiniteration))
+ return 17;
+ print_report(size,user_param.duplex,tposted,tcompleted,&user_param,noPeak,no_cpu_freq_fail);
+ }
+
+ } else {
+
+ if(run_iter(ctx, &user_param,rem_dest,size,maxpostsofqpiniteration))
+ return 18;
+ print_report(size,user_param.duplex,tposted,tcompleted, &user_param, noPeak, no_cpu_freq_fail);
+ }
+
+ free(tposted);
+ free(tcompleted);
+
+ // Closing connection.
+ if (ctx_close_connection(&user_param,&my_dest[0],&rem_dest[0])) {
+ fprintf(stderr,"Failed to close connection between server and client\n");
+ return 1;
+ }
+
+ free(my_dest);
+ free(rem_dest);
+ printf(RESULT_LINE);
+
+ return destroy_ctx_resources(ctx,user_param.num_of_qps);
+}
USE_NATIVE_EH = 1\r
USE_IOSTREAM = 1\r
\r
-SOURCES = write_lat.rc write_lat.c ..\perftest.c\r
+SOURCES = write_lat.rc write_lat.c ..\perftest_resources.c\r
\r
INCLUDES = ..;..\..\..\ulp\libibverbs\include;\\r
- ..\..\..\inc;..\..\..\inc\user;\\r
- ..\..\..\inc\user\linux;\r
+ ..\..\..\inc;..\..\..\inc\user;..\..\..\inc\user\linux;..\..\..\inc\complib;\\r
+ ..\..\..\hw\mlx4\user\hca\\r
\r
TARGETLIBS = \\r
$(SDK_LIB_PATH)\kernel32.lib \\r
$(SDK_LIB_PATH)\user32.lib \\r
$(SDK_LIB_PATH)\ole32.lib \\r
$(SDK_LIB_PATH)\ws2_32.lib \\r
+ $(SDK_LIB_PATH)\uuid.lib \\r
!if $(FREEBUILD)\r
- $(TARGETPATH)\*\libibverbs.lib\r
+ $(TARGETPATH)\*\libibverbs.lib \\r
+ $(TARGETPATH)\*\complib.lib \\r
!else\r
- $(TARGETPATH)\*\libibverbsd.lib\r
+ $(TARGETPATH)\*\libibverbsd.lib \\r
+ $(TARGETPATH)\*\complibd.lib \\r
!endif\r
-\r
-/*\r
- * Copyright (c) 2005 Topspin Communications. All rights reserved.\r
- * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.\r
- * Copyright (c) 2005 Hewlett Packard, Inc (Grant Grundler)\r
- * Copyright (c) 2008 Intel Corporation. All rights reserved.\r
- *\r
- * This software is available to you under the OpenIB.org BSD license\r
- * below:\r
- *\r
- * Redistribution and use in source and binary forms, with or\r
- * without modification, are permitted provided that the following\r
- * conditions are met:\r
- *\r
- * - Redistributions of source code must retain the above\r
- * copyright notice, this list of conditions and the following\r
- * disclaimer.\r
- *\r
- * - Redistributions in binary form must reproduce the above\r
- * copyright notice, this list of conditions and the following\r
- * disclaimer in the documentation and/or other materials\r
- * provided with the distribution.\r
- *\r
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV\r
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
- * SOFTWARE.\r
- */\r
-\r
-#include <stdio.h>\r
-#include <stdlib.h>\r
-#include <string.h>\r
-#include <ws2tcpip.h>\r
-#include <winsock2.h>\r
-#include <time.h>\r
-\r
-#include "..\..\..\etc\user\getopt.c"\r
-#include "perftest.h"\r
-#include <infiniband/verbs.h>\r
-\r
-#define PINGPONG_RDMA_WRID 3\r
-#define VERSION 1.0\r
-#define ALL 1\r
-#define MAX_INLINE 400\r
-\r
-typedef UINT64 cycles_t;\r
-cycles_t *tstamp;\r
-\r
-struct user_parameters {\r
- const char *servername;\r
- int connection_type;\r
- int mtu;\r
- int all; /* run all msg size */\r
- int iters;\r
- int tx_depth;\r
- int inline_size;\r
-};\r
-\r
-struct report_options {\r
- int unsorted;\r
- int histogram;\r
- int cycles; /* report delta's in cycles, not microsec's */\r
-};\r
-\r
-struct pingpong_context {\r
- struct ibv_context *context;\r
- struct ibv_pd *pd;\r
- struct ibv_mr *mr;\r
- struct ibv_cq *cq;\r
- struct ibv_qp *qp;\r
- void *buf;\r
- volatile char *post_buf;\r
- volatile char *poll_buf;\r
- int size;\r
- int tx_depth;\r
- struct ibv_sge list;\r
- struct ibv_send_wr wr;\r
-};\r
-\r
-struct pingpong_dest {\r
- int lid;\r
- int qpn;\r
- int psn;\r
- unsigned rkey;\r
- unsigned long long vaddr;\r
-};\r
-\r
-static uint16_t pp_get_local_lid(struct pingpong_context *ctx, int port)\r
-{\r
- struct ibv_port_attr attr;\r
-\r
- if (ibv_query_port(ctx->context, (uint8_t) port, &attr))\r
- return 0;\r
-\r
- return attr.lid;\r
-}\r
-\r
-static struct ibv_device *pp_find_dev(const char *ib_devname) {\r
- struct ibv_device **dev_list;\r
- struct ibv_device *ib_dev = NULL;\r
-\r
- dev_list = ibv_get_device_list(NULL);\r
- if (!dev_list)\r
- return NULL;\r
-\r
- if (!ib_devname) {\r
- ib_dev = dev_list[0];\r
- if (!ib_dev)\r
- fprintf(stderr, "No IB devices found\n");\r
- } else {\r
- for (; (ib_dev = *dev_list); ++dev_list)\r
- if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))\r
- break;\r
- if (!ib_dev)\r
- fprintf(stderr, "IB device %s not found\n", ib_devname);\r
- }\r
- return ib_dev;\r
-}\r
-\r
-#define KEY_MSG_SIZE (sizeof "0000:000000:000000:00000000:0000000000000000")\r
-#define KEY_PRINT_FMT "%04x:%06x:%06x:%08x:%016Lx"\r
-\r
-static int pp_write_keys(SOCKET sockfd, const struct pingpong_dest *my_dest)\r
-{\r
- char msg[KEY_MSG_SIZE];\r
-\r
- sprintf(msg, KEY_PRINT_FMT, my_dest->lid, my_dest->qpn,\r
- my_dest->psn, my_dest->rkey, my_dest->vaddr);\r
-\r
- if (send(sockfd, msg, sizeof msg, 0) != sizeof msg) {\r
- perror("client write");\r
- fprintf(stderr, "Couldn't send local address\n");\r
- return -1;\r
- }\r
-\r
- return 0;\r
-}\r
-\r
-static int pp_read_keys(SOCKET sockfd, const struct pingpong_dest *my_dest,\r
- struct pingpong_dest *rem_dest)\r
-{\r
- int parsed;\r
- char msg[KEY_MSG_SIZE];\r
-\r
- if (recv(sockfd, msg, sizeof msg, 0) != sizeof msg) {\r
- perror("pp_read_keys");\r
- fprintf(stderr, "Couldn't read remote address\n");\r
- return -1;\r
- }\r
-\r
- memset(rem_dest, 0, sizeof *rem_dest);\r
- parsed = sscanf(msg, KEY_PRINT_FMT, &rem_dest->lid, &rem_dest->qpn,\r
- &rem_dest->psn, &rem_dest->rkey, &rem_dest->vaddr);\r
-\r
- if (parsed != 5) {\r
- fprintf(stderr, "Couldn't parse line <%.*s>\n",\r
- (int)sizeof msg, msg);\r
- return -1;\r
- }\r
-\r
- return 0;\r
-}\r
-\r
-static int pp_client_exch_dest(SOCKET sockfd, const struct pingpong_dest *my_dest,\r
- struct pingpong_dest *rem_dest)\r
-{\r
- if (pp_write_keys(sockfd, my_dest))\r
- return -1;\r
-\r
- return pp_read_keys(sockfd, my_dest, rem_dest);\r
-}\r
-\r
-static int pp_server_exch_dest(SOCKET sockfd, const struct pingpong_dest *my_dest,\r
- struct pingpong_dest* rem_dest)\r
-{\r
-\r
- if (pp_read_keys(sockfd, my_dest, rem_dest))\r
- return -1;\r
-\r
- return pp_write_keys(sockfd, my_dest);\r
-}\r
-\r
-static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,\r
- int tx_depth, int port, struct user_parameters *user_parm)\r
-{\r
- struct pingpong_context *ctx;\r
- struct ibv_device_attr device_attr;\r
-\r
- ctx = malloc(sizeof *ctx);\r
- if (!ctx)\r
- return NULL;\r
-\r
- ctx->size = size;\r
- ctx->tx_depth = tx_depth;\r
-\r
- ctx->buf = malloc(size * 2);\r
- if (!ctx->buf) {\r
- fprintf(stderr, "Couldn't allocate work buf.\n");\r
- return NULL;\r
- }\r
-\r
- memset(ctx->buf, 0, size * 2);\r
-\r
- ctx->post_buf = (char*)ctx->buf + (size - 1);\r
- ctx->poll_buf = (char*)ctx->buf + (2 * size - 1);\r
-\r
- ctx->context = ibv_open_device(ib_dev);\r
- if (!ctx->context) {\r
- fprintf(stderr, "Couldn't get context for %s\n",\r
- ibv_get_device_name(ib_dev));\r
- return NULL;\r
- }\r
- if (user_parm->mtu == 0) {/*user did not ask for specific mtu */\r
- if (ibv_query_device(ctx->context, &device_attr)) {\r
- fprintf(stderr, "Failed to query device props");\r
- return NULL;\r
- }\r
- if (device_attr.vendor_part_id == 23108) {\r
- user_parm->mtu = 1024;\r
- } else {\r
- user_parm->mtu = 2048;\r
- }\r
- }\r
- ctx->pd = ibv_alloc_pd(ctx->context);\r
- if (!ctx->pd) {\r
- fprintf(stderr, "Couldn't allocate PD\n");\r
- return NULL;\r
- }\r
-\r
- ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size * 2,\r
- IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);\r
- if (!ctx->mr) {\r
- fprintf(stderr, "Couldn't allocate MR\n");\r
- return NULL;\r
- }\r
-\r
- ctx->cq = ibv_create_cq(ctx->context, tx_depth, NULL, NULL, 0);\r
- if (!ctx->cq) {\r
- fprintf(stderr, "Couldn't create CQ\n");\r
- return NULL;\r
- }\r
-\r
- {\r
- struct ibv_qp_init_attr attr;\r
- memset(&attr, 0, sizeof(struct ibv_qp_init_attr));\r
- attr.send_cq = ctx->cq;\r
- attr.recv_cq = ctx->cq;\r
- attr.cap.max_send_wr = tx_depth;\r
- /* Work around: driver doesnt support\r
- * recv_wr = 0 */\r
- attr.cap.max_recv_wr = 1;\r
- attr.cap.max_send_sge = 1;\r
- attr.cap.max_recv_sge = 1;\r
- attr.cap.max_inline_data = user_parm->inline_size;\r
-\r
- if (user_parm->connection_type==1) {\r
- attr.qp_type = IBV_QPT_UC;\r
- } else {\r
- attr.qp_type = IBV_QPT_RC;\r
- }\r
- ctx->qp = ibv_create_qp(ctx->pd, &attr);\r
- if (!ctx->qp) {\r
- fprintf(stderr, "Couldn't create QP\n");\r
- return NULL;\r
- }\r
- }\r
-\r
- {\r
- struct ibv_qp_attr attr;\r
-\r
- memset(&attr, 0, sizeof attr);\r
- attr.qp_state = IBV_QPS_INIT;\r
- attr.pkey_index = 0;\r
- attr.port_num = (uint8_t) port;\r
- attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE;\r
-\r
- if (ibv_modify_qp(ctx->qp, &attr,\r
- IBV_QP_STATE |\r
- IBV_QP_PKEY_INDEX |\r
- IBV_QP_PORT |\r
- IBV_QP_ACCESS_FLAGS)) {\r
- fprintf(stderr, "Failed to modify QP to INIT\n");\r
- return NULL;\r
- }\r
- }\r
-\r
- memset(&ctx->wr, 0, sizeof(ctx->wr));\r
-\r
- ctx->wr.wr_id = PINGPONG_RDMA_WRID;\r
- ctx->wr.sg_list = &ctx->list;\r
- ctx->wr.num_sge = 1;\r
- ctx->wr.opcode = IBV_WR_RDMA_WRITE;\r
- ctx->wr.next = NULL;\r
-\r
- return ctx;\r
-}\r
-\r
-static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,\r
- struct pingpong_dest *dest,struct user_parameters *user_parm)\r
-{\r
- struct ibv_qp_attr attr;\r
- memset(&attr, 0, sizeof(struct ibv_qp_attr));\r
- attr.qp_state = IBV_QPS_RTR;\r
- switch (user_parm->mtu) {\r
- case 256 : \r
- attr.path_mtu = IBV_MTU_256;\r
- break;\r
- case 512 :\r
- attr.path_mtu = IBV_MTU_512;\r
- break;\r
- case 1024 :\r
- attr.path_mtu = IBV_MTU_1024;\r
- break;\r
- case 2048 :\r
- attr.path_mtu = IBV_MTU_2048;\r
- break;\r
- case 4096 :\r
- attr.path_mtu = IBV_MTU_4096;\r
- break;\r
- }\r
- printf("Mtu : %d\n", user_parm->mtu);\r
- attr.dest_qp_num = dest->qpn;\r
- attr.rq_psn = dest->psn;\r
-\r
- if (user_parm->connection_type==0) {\r
- attr.max_dest_rd_atomic = 1;\r
- attr.min_rnr_timer = 12;\r
- }\r
-\r
- attr.ah_attr.is_global = 0;\r
- attr.ah_attr.dlid = (uint16_t) dest->lid;\r
- attr.ah_attr.sl = 0;\r
- attr.ah_attr.src_path_bits = 0;\r
- attr.ah_attr.port_num = (uint8_t) port;\r
-\r
- if (user_parm->connection_type == 0) {\r
- if (ibv_modify_qp(ctx->qp, &attr,\r
- IBV_QP_STATE |\r
- IBV_QP_AV |\r
- IBV_QP_PATH_MTU |\r
- IBV_QP_DEST_QPN |\r
- IBV_QP_RQ_PSN |\r
- IBV_QP_MIN_RNR_TIMER |\r
- IBV_QP_MAX_DEST_RD_ATOMIC)) {\r
- fprintf(stderr, "Failed to modify RC QP to RTR\n");\r
- return 1;\r
- }\r
- attr.timeout = 14;\r
- attr.retry_cnt = 7;\r
- attr.rnr_retry = 7;\r
- } else {\r
- if (ibv_modify_qp(ctx->qp, &attr,\r
- IBV_QP_STATE |\r
- IBV_QP_AV |\r
- IBV_QP_PATH_MTU |\r
- IBV_QP_DEST_QPN |\r
- IBV_QP_RQ_PSN)) {\r
- fprintf(stderr, "Failed to modify UC QP to RTR\n");\r
- return 1;\r
- }\r
-\r
- }\r
- attr.qp_state = IBV_QPS_RTS;\r
- attr.sq_psn = my_psn;\r
-\r
- if (user_parm->connection_type == 0) {\r
- attr.max_rd_atomic = 1;\r
- if (ibv_modify_qp(ctx->qp, &attr,\r
- IBV_QP_STATE |\r
- IBV_QP_SQ_PSN |\r
- IBV_QP_TIMEOUT |\r
- IBV_QP_RETRY_CNT |\r
- IBV_QP_RNR_RETRY |\r
- IBV_QP_MAX_QP_RD_ATOMIC)) {\r
- fprintf(stderr, "Failed to modify RC QP to RTS\n");\r
- return 1;\r
- }\r
- } else {\r
- if (ibv_modify_qp(ctx->qp, &attr,\r
- IBV_QP_STATE |\r
- IBV_QP_SQ_PSN)) {\r
- fprintf(stderr, "Failed to modify UC QP to RTS\n");\r
- return 1;\r
- }\r
-\r
- }\r
- return 0;\r
-}\r
-\r
-static int pp_open_port(struct pingpong_context *ctx, const char * servername,\r
- int ib_port, int port, struct pingpong_dest *rem_dest,struct user_parameters *user_parm)\r
-{\r
- char addr_fmt[] = "%8s address: LID %#04x QPN %#06x PSN %#06x RKey %#08x VAddr %#016Lx\n";\r
- struct pingpong_dest my_dest;\r
- SOCKET sockfd;\r
- int rc;\r
-\r
- /* Create connection between client and server.\r
- * We do it by exchanging data over a TCP socket connection. */\r
-\r
- my_dest.lid = pp_get_local_lid(ctx, ib_port);\r
- my_dest.qpn = ctx->qp->qp_num;\r
- my_dest.psn = rand() & 0xffffff;\r
- if (!my_dest.lid) {\r
- fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n");\r
- return -1;\r
- }\r
- my_dest.rkey = ctx->mr->rkey;\r
- my_dest.vaddr = (uintptr_t)ctx->buf + ctx->size;\r
-\r
- printf(addr_fmt, "local", my_dest.lid, my_dest.qpn, my_dest.psn,\r
- my_dest.rkey, my_dest.vaddr);\r
-\r
- sockfd = servername ? pp_client_connect(servername, port) :\r
- pp_server_connect(port);\r
-\r
- if (sockfd == INVALID_SOCKET) {\r
- printf("pp_connect_sock(%s,%d) failed (%d)!\n",\r
- servername, port, sockfd);\r
- return (int) sockfd;\r
- }\r
-\r
- rc = servername ? pp_client_exch_dest(sockfd, &my_dest, rem_dest) :\r
- pp_server_exch_dest(sockfd, &my_dest, rem_dest);\r
- if (rc)\r
- return rc;\r
-\r
- printf(addr_fmt, "remote", rem_dest->lid, rem_dest->qpn, rem_dest->psn,\r
- rem_dest->rkey, rem_dest->vaddr);\r
-\r
- if ((rc = pp_connect_ctx(ctx, ib_port, my_dest.psn, rem_dest,user_parm)))\r
- return rc;\r
-\r
- /* An additional handshake is required *after* moving qp to RTR.\r
- * Arbitrarily reuse exch_dest for this purpose.\r
- */\r
-\r
- rc = servername ? pp_client_exch_dest(sockfd, &my_dest, rem_dest) :\r
- pp_server_exch_dest(sockfd, &my_dest, rem_dest);\r
-\r
- if (rc)\r
- return rc;\r
-\r
- if (send(sockfd, "done", sizeof "done", 0) != sizeof "done"){\r
- perror("write");\r
- fprintf(stderr, "Couldn't write to socket\n");\r
- return 1;\r
- }\r
- closesocket(sockfd);\r
- return 0;\r
-}\r
-\r
-static void usage(const char *argv0)\r
-{\r
- printf("Usage:\n");\r
- printf(" %s start a server and wait for connection\n", argv0);\r
- printf(" %s -h <host> connect to server at <host>\n", argv0);\r
- printf("\n");\r
- printf("Options:\n");\r
- printf(" -p <port> listen on/connect to port <port> (default 18515)\n");\r
- printf(" -c <RC/UC> connection type RC/UC (default RC)\n");\r
- printf(" -m <mtu> mtu size (256 - 4096. default for hermon is 2048)\n");\r
- printf(" -d <dev> use IB device <dev> (default first device found)\n");\r
- printf(" -i <port> use port <port> of IB device (default 1)\n");\r
- printf(" -s <size> size of message to exchange (default 1)\n");\r
- printf(" -a Run sizes from 2 till 2^23\n");\r
- printf(" -t <dep> size of tx queue (default 50)\n");\r
- printf(" -n <iters> number of exchanges (at least 2, default 1000)\n");\r
- printf(" -I <size> max size of message to be sent in inline mode (default 400)\n");\r
- printf(" -C report times in cpu cycle units (default microseconds)\n");\r
- printf(" -H print out all results (default print summary only)\n");\r
- printf(" -U (implies -H) print out unsorted results (default sorted)\n");\r
- printf(" -V display version number\n");\r
-}\r
-\r
-static void print_report(struct report_options * options,\r
- unsigned int iters, cycles_t *tstamp,int size)\r
-{\r
- cycles_t cycles_to_units;\r
- cycles_t median;\r
- unsigned int i;\r
- const char* units;\r
- cycles_t *delta = malloc((iters - 1) * sizeof *delta);\r
-\r
- if (!delta) {\r
- perror("malloc");\r
- return;\r
- }\r
-\r
- for (i = 0; i < iters - 1; ++i)\r
- delta[i] = tstamp[i + 1] - tstamp[i];\r
-\r
- if (options->cycles) {\r
- cycles_to_units = 1;\r
- units = "cycles";\r
- } else {\r
- cycles_to_units = get_freq();\r
- units = "sec";\r
- }\r
-\r
- if (options->unsorted) {\r
- printf("#, %s, frequency=%I64d\n", units, get_freq());\r
- for (i = 0; i < iters - 1; ++i)\r
- printf("%d, %g\n", i + 1, delta[i] / cycles_to_units / 2. * 1000000.);\r
- }\r
-\r
- qsort(delta, iters - 1, sizeof *delta, cycles_compare);\r
-\r
- if (options->histogram) {\r
- printf("#, %s\n", units);\r
- for (i = 0; i < iters - 1; ++i)\r
- printf("%d, %7.2f\n", i + 1, (double) delta[i] / (double) cycles_to_units / 2. * 1000000.);\r
- }\r
-\r
- median = get_median(iters - 1, delta);\r
- printf("%7d %d %7.2f %7.2f %7.2f\n",\r
- size, iters, (double) delta[0] / (double) cycles_to_units / 2. * 1000000.,\r
- (double) delta[iters - 2] / (double) cycles_to_units / 2. * 1000000.,\r
- (double) median / (double) cycles_to_units / 2. * 1000000.);\r
- free(delta);\r
-}\r
-\r
-static int run_iter(struct pingpong_context *ctx, struct user_parameters *user_param,\r
- struct pingpong_dest *rem_dest, int size)\r
-{\r
- struct ibv_qp *qp;\r
- struct ibv_send_wr *wr;\r
- volatile char *poll_buf; \r
- volatile char *post_buf;\r
-\r
- int scnt, ccnt, rcnt;\r
- int iters;\r
- int tx_depth;\r
- int inline_size;\r
-\r
- iters = user_param->iters;\r
- tx_depth = user_param->tx_depth;\r
- inline_size = user_param->inline_size;\r
-\r
- wr = &ctx->wr;\r
- ctx->list.addr = (uintptr_t) ctx->buf;\r
- ctx->list.length = size;\r
- ctx->list.lkey = ctx->mr->lkey;\r
- wr->wr.rdma.remote_addr = rem_dest->vaddr;\r
- wr->wr.rdma.rkey = rem_dest->rkey;\r
-\r
- if (size > inline_size) {/* complaince to perf_main */\r
- ctx->wr.send_flags = IBV_SEND_SIGNALED;\r
- } else {\r
- ctx->wr.send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE;\r
- }\r
- scnt = 0;\r
- rcnt = 0;\r
- ccnt = 0;\r
-\r
- if(user_param->all == ALL) {\r
- post_buf = (char*)ctx->buf + size - 1;\r
- poll_buf = (char*)ctx->buf + 8388608 + size - 1;\r
- } else {\r
- poll_buf = ctx->poll_buf;\r
- post_buf = ctx->post_buf;\r
- } \r
- qp = ctx->qp;\r
-\r
- /* Done with setup. Start the test. */\r
- while (scnt < iters || ccnt < iters || rcnt < iters) {\r
-\r
- /* Wait till buffer changes. */\r
- if (rcnt < user_param->iters && !(scnt < 1 && user_param->servername)) {\r
- ++rcnt;\r
- while (*poll_buf != (char)rcnt)\r
- ;\r
- /* Here the data is already in the physical memory.\r
- If we wanted to actually use it, we may need\r
- a read memory barrier here. */\r
- }\r
-\r
- if (scnt < user_param->iters) {\r
- struct ibv_send_wr *bad_wr;\r
- if (user_param->servername)\r
- tstamp[scnt] = get_cycles();\r
-\r
- *post_buf = (char)++scnt;\r
-\r
- if (ibv_post_send(qp, wr, &bad_wr)) {\r
- fprintf(stderr, "Couldn't post send: scnt=%d\n",\r
- scnt);\r
- return 11;\r
- }\r
- }\r
-\r
- if (ccnt < user_param->iters) {\r
- struct ibv_wc wc;\r
- int ne;\r
- ++ccnt;\r
- do {\r
- ne = ibv_poll_cq(ctx->cq, 1, &wc);\r
- } while (ne == 0);\r
- if (ne < 0) {\r
- fprintf(stderr, "poll CQ failed %d\n", ne);\r
- return 12;\r
- }\r
- if (wc.status != IBV_WC_SUCCESS) {\r
- fprintf(stderr, "Completion wth error at %s:\n",\r
- user_param->servername ? "client" : "server");\r
- fprintf(stderr, "Failed status %d: wr_id %d\n",\r
- wc.status, (int) wc.wr_id);\r
- fprintf(stderr, "scnt=%d, rcnt=%d, ccnt=%d\n",\r
- scnt, rcnt, ccnt);\r
- return 13;\r
- }\r
- }\r
- }\r
- return(0);\r
-}\r
-\r
-int __cdecl main(int argc, char *argv[])\r
-{\r
- const char *ib_devname = NULL;\r
- int port = 18515;\r
- int ib_port = 1;\r
- int size = 2;\r
- int i = 0;\r
- struct report_options report;\r
- struct pingpong_context *ctx;\r
- struct pingpong_dest rem_dest;\r
- struct ibv_device *ib_dev;\r
- struct user_parameters user_param;\r
- WORD version;\r
- WSADATA data;\r
- int err;\r
-\r
- srand((unsigned int) time(NULL));\r
- version = MAKEWORD(2, 2);\r
- err = WSAStartup(version, &data);\r
- if (err)\r
- return -1;\r
-\r
- /* init default values to user's parameters */\r
- memset(&report, 0, sizeof report);\r
- memset(&user_param, 0, sizeof(struct user_parameters));\r
- user_param.mtu = 0; /* signal choose default by device */\r
- user_param.iters = 1000;\r
- user_param.tx_depth = 50;\r
- user_param.servername = NULL;\r
- user_param.inline_size = MAX_INLINE;\r
- \r
- /* Parameter parsing. */\r
- while (1) {\r
- int c;\r
-\r
- c = getopt(argc, argv, "h:p:c:m:d:i:s:n:t:I:aCHUV");\r
- if (c == -1)\r
- break;\r
-\r
- switch (c) {\r
- case 'p':\r
- port = strtol(optarg, NULL, 0);\r
- if (port < 0 || port > 65535) {\r
- usage(argv[0]);\r
- return 1;\r
- }\r
- break;\r
- case 'c':\r
- if (strcmp("UC",optarg)==0)\r
- user_param.connection_type=1;\r
- /* default is 0 for any other option RC*/\r
- break;\r
-\r
- case 'm':\r
- user_param.mtu = strtol(optarg, NULL, 0);\r
- break;\r
- case 'a':\r
- user_param.all = ALL;\r
- break;\r
- case 'V':\r
- printf("perftest version : %.2f\n",VERSION);\r
- return 0;\r
- case 'd':\r
- ib_devname = _strdup(optarg);\r
- break;\r
-\r
- case 'i':\r
- ib_port = strtol(optarg, NULL, 0);\r
- if (ib_port < 0) {\r
- usage(argv[0]);\r
- return 2;\r
- }\r
- break;\r
-\r
- case 's':\r
- size = strtol(optarg, NULL, 0);\r
- if (size < 1) {\r
- usage(argv[0]); return 3;\r
- }\r
- break;\r
-\r
- case 't':\r
- user_param.tx_depth = strtol(optarg, NULL, 0);\r
- if (user_param.tx_depth < 1) {\r
- usage(argv[0]); return 4;\r
- }\r
- break;\r
-\r
- case 'I':\r
- user_param.inline_size = strtol(optarg, NULL, 0);\r
- if (user_param.inline_size > MAX_INLINE) {\r
- usage(argv[0]); return 7;\r
- }\r
- break;\r
-\r
- case 'n':\r
- user_param.iters = strtol(optarg, NULL, 0);\r
- if (user_param.iters < 2) {\r
- usage(argv[0]);\r
- return 5;\r
- }\r
- break;\r
-\r
- case 'C':\r
- report.cycles = 1;\r
- break;\r
-\r
- case 'H':\r
- report.histogram = 1;\r
- break;\r
-\r
- case 'U':\r
- report.unsorted = 1;\r
- break;\r
-\r
- case 'h':\r
- if (optarg) {\r
- user_param.servername = _strdup(optarg);\r
- break;\r
- }\r
-\r
- default:\r
- usage(argv[0]);\r
- return 5;\r
- }\r
- }\r
-\r
- /*\r
- * Done with parameter parsing. Perform setup.\r
- */\r
-\r
- tstamp = malloc(user_param.iters * sizeof *tstamp);\r
- if (!tstamp) {\r
- perror("malloc");\r
- return 10;\r
- }\r
- printf("------------------------------------------------------------------\n");\r
- printf(" RDMA_Write Latency Test\n");\r
- printf("Inline data is used up to %d bytes message\n", user_param.inline_size);\r
- if (user_param.connection_type==0) {\r
- printf("Connection type : RC\n");\r
- } else {\r
- printf("Connection type : UC\n");\r
- }\r
- if (user_param.all == ALL) {\r
- /*since we run all sizes */\r
- size = 8388608; /*2^23 */\r
- }\r
-\r
- ib_dev = pp_find_dev(ib_devname);\r
- if (!ib_dev)\r
- return 7;\r
-\r
- ctx = pp_init_ctx(ib_dev, size, user_param.tx_depth, ib_port,&user_param);\r
- if (!ctx)\r
- return 8;\r
-\r
- if (pp_open_port(ctx, user_param.servername, ib_port, port, &rem_dest,&user_param))\r
- return 9;\r
- printf("------------------------------------------------------------------\n");\r
- printf(" #bytes #iterations t_min[usec] t_max[usec] t_typical[usec]\n");\r
-\r
- if (user_param.all == ALL) {\r
- for (i = 1; i < 24 ; ++i) {\r
- size = 1 << i;\r
- if(run_iter(ctx, &user_param, &rem_dest, size))\r
- return 17;\r
- print_report(&report, user_param.iters, tstamp, size);\r
- }\r
- } else {\r
- if(run_iter(ctx, &user_param, &rem_dest, size))\r
- return 18;\r
- print_report(&report, user_param.iters, tstamp, size);\r
- }\r
-\r
- printf("------------------------------------------------------------------\n");\r
- free(tstamp);\r
- return 0;\r
-}\r
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2006 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2008-2009 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under the OpenIB.org BSD license
+ * below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <windows.h>
+
+#include <ws2tcpip.h>
+#include <winsock2.h>
+
+#include <string.h>
+#include <malloc.h>
+#include <getopt.h>
+#include <time.h>
+#include <infiniband/verbs.h>
+#include <errno.h>
+
+#include "..\..\tools\perftests\user\get_clock.h"
+#include "..\..\etc\user\getopt.c"
+#include "perftest_resources.h"
+#include "l2w.h"
+
+#define PINGPONG_RDMA_WRID 3
+#define VERSION 1.1
+#define MAX_INLINE 400
+
+static uint8_t sl = 0;
+static int page_size;
+cycles_t *tstamp;
+
+struct report_options {
+ int unsorted;
+ int histogram;
+ int cycles; /* report delta's in cycles, not microsec's */
+};
+
+struct pingpong_context {
+ struct ibv_context *context;
+ struct ibv_pd *pd;
+ struct ibv_mr *mr;
+ struct ibv_cq *cq;
+ struct ibv_qp *qp;
+ void *buf;
+ volatile char *post_buf;
+ volatile char *poll_buf;
+ int size;
+ int tx_depth;
+ struct ibv_sge list;
+ struct ibv_send_wr wr;
+};
+
+/*
+ *
+ */
+static int set_up_connection(struct pingpong_context *ctx,
+ struct perftest_parameters *user_parm,
+ struct pingpong_dest *my_dest) {
+
+ int use_i = user_parm->gid_index;
+ uint8_t port = user_parm->ib_port;
+
+ if (use_i != -1) {
+ if (ibv_query_gid(ctx->context,port,use_i,&my_dest->gid)) {
+ return -1;
+ }
+ }
+ my_dest->lid = ctx_get_local_lid(ctx->context,user_parm->ib_port);
+ my_dest->qpn = ctx->qp->qp_num;
+ my_dest->psn = rand() & 0xffffff;
+ my_dest->rkey = ctx->mr->rkey;
+ my_dest->vaddr = (uintptr_t)ctx->buf + ctx->size;
+
+ // We do not fail test upon lid in RDMAoE/Eth conf.
+ if (use_i < 0) {
+ if (!my_dest->lid) {
+ fprintf(stderr,"Local lid 0x0 detected. Is an SM running? \n");
+ fprintf(stderr,"If you're running RMDAoE you must use GIDs\n");
+ return -1;
+ }
+ }
+ return 0;
+}
+
+/*
+ *
+ */
+static int init_connection(struct perftest_parameters *params,
+ struct pingpong_dest *my_dest,
+ const char *servername) {
+
+ params->machine = servername ? CLIENT : SERVER;
+ params->side = LOCAL;
+ ctx_print_pingpong_data(my_dest,params);
+
+ if (servername)
+ params->sockfd = ctx_client_connect(servername,params->port);
+ else
+ params->sockfd = ctx_server_connect(params->port);
+
+ if(params->sockfd == INVALID_SOCKET) {
+ fprintf(stderr,"Unable to open file descriptor for socket connection");
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ *
+ */
+static struct ibv_device *pp_find_dev(const char *ib_devname) {
+ struct ibv_device **dev_list;
+ struct ibv_device *ib_dev = NULL;
+
+ dev_list = ibv_get_device_list(NULL);
+
+ if (!ib_devname) {
+ ib_dev = dev_list[0];
+ if (!ib_dev)
+ fprintf(stderr, "No IB devices found\n");
+ } else {
+ for (; (ib_dev = *dev_list); ++dev_list)
+ if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
+ break;
+ if (!ib_dev)
+ fprintf(stderr, "IB device %s not found\n", ib_devname);
+ }
+ return ib_dev;
+}
+
+
+static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev,int size,
+ struct perftest_parameters *user_parm) {
+ struct pingpong_context *ctx;
+ struct ibv_device_attr device_attr;
+
+ ctx = malloc(sizeof *ctx);
+ if (!ctx)
+ return NULL;
+
+ ctx->size = size;
+ ctx->tx_depth = user_parm->tx_depth;
+
+ posix_memalign(&(ctx->buf),page_size,size * 2);
+ if (!ctx->buf) {
+ fprintf(stderr, "Couldn't allocate work buf.\n");
+ return NULL;
+ }
+
+ memset(ctx->buf, 0, size * 2);
+
+ ctx->post_buf = (char*)ctx->buf + (size - 1);
+ ctx->poll_buf = (char*)ctx->buf + (2 * size - 1);
+
+ ctx->context = ibv_open_device(ib_dev);
+ if (!ctx->context) {
+ fprintf(stderr, "Couldn't get context for %s\n",
+ ibv_get_device_name(ib_dev));
+ return NULL;
+ }
+
+ // Finds the link type and configure the HCA accordingly.
+ if (ctx_set_link_layer(ctx->context,user_parm)) {
+ fprintf(stderr, "Couldn't set the link layer\n");
+ return NULL;
+ }
+
+ if (user_parm->mtu == 0) {/*user did not ask for specific mtu */
+ if (ibv_query_device(ctx->context, &device_attr)) {
+ fprintf(stderr, "Failed to query device props");
+ return NULL;
+ }
+ if (device_attr.vendor_part_id == 23108 || user_parm->gid_index > -1) {
+ user_parm->mtu = 1024;
+ } else {
+ user_parm->mtu = 2048;
+ }
+ }
+ ctx->pd = ibv_alloc_pd(ctx->context);
+ if (!ctx->pd) {
+ fprintf(stderr, "Couldn't allocate PD\n");
+ return NULL;
+ }
+
+ ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size * 2,
+ IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);
+ if (!ctx->mr) {
+ fprintf(stderr, "Couldn't allocate MR\n");
+ return NULL;
+ }
+
+ ctx->cq = ibv_create_cq(ctx->context, user_parm->tx_depth, NULL, NULL, 0);
+ if (!ctx->cq) {
+ fprintf(stderr, "Couldn't create CQ\n");
+ return NULL;
+ }
+
+ {
+ struct ibv_qp_init_attr attr;
+ memset(&attr, 0, sizeof(struct ibv_qp_init_attr));
+ attr.send_cq = ctx->cq;
+ attr.recv_cq = ctx->cq;
+ attr.cap.max_send_wr = user_parm->tx_depth;
+ /* Work around: driver doesnt support
+ * recv_wr = 0 */
+ attr.cap.max_recv_wr = 1;
+ attr.cap.max_send_sge = 1;
+ attr.cap.max_recv_sge = 1;
+ attr.cap.max_inline_data = user_parm->inline_size;
+
+ if (user_parm->connection_type==1) {
+ attr.qp_type = IBV_QPT_UC;
+ } else {
+ attr.qp_type = IBV_QPT_RC;
+ }
+ ctx->qp = ibv_create_qp(ctx->pd, &attr);
+ if (!ctx->qp) {
+ fprintf(stderr, "Couldn't create QP\n");
+ return NULL;
+ }
+ }
+
+ {
+ struct ibv_qp_attr attr;
+ attr.qp_state = IBV_QPS_INIT;
+ attr.pkey_index = 0;
+ attr.port_num = user_parm->ib_port;
+ attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE;
+
+
+ if (ibv_modify_qp(ctx->qp, &attr,
+ IBV_QP_STATE |
+ IBV_QP_PKEY_INDEX |
+ IBV_QP_PORT |
+ IBV_QP_ACCESS_FLAGS)) {
+ fprintf(stderr, "Failed to modify QP to INIT\n");
+ return NULL;
+ }
+ }
+
+ memset(&ctx->wr, 0, sizeof(ctx->wr));
+
+ ctx->wr.wr_id = PINGPONG_RDMA_WRID;
+ ctx->wr.sg_list = &ctx->list;
+ ctx->wr.num_sge = 1;
+ ctx->wr.opcode = IBV_WR_RDMA_WRITE;
+ ctx->wr.next = NULL;
+
+ return ctx;
+}
+
+static int pp_connect_ctx(struct pingpong_context *ctx,int my_psn,
+ struct pingpong_dest *dest,
+ struct perftest_parameters *user_parm)
+{
+ struct ibv_qp_attr attr;
+ memset(&attr, 0, sizeof(struct ibv_qp_attr));
+ attr.qp_state = IBV_QPS_RTR;
+ switch (user_parm->mtu) {
+ case 256 :
+ attr.path_mtu = IBV_MTU_256;
+ break;
+ case 512 :
+ attr.path_mtu = IBV_MTU_512;
+ break;
+ case 1024 :
+ attr.path_mtu = IBV_MTU_1024;
+ break;
+ case 2048 :
+ attr.path_mtu = IBV_MTU_2048;
+ break;
+ case 4096 :
+ attr.path_mtu = IBV_MTU_4096;
+ break;
+ }
+ printf("Mtu : %d\n", user_parm->mtu);
+ attr.dest_qp_num = dest->qpn;
+ attr.rq_psn = dest->psn;
+ attr.ah_attr.dlid = dest->lid;
+
+ if (user_parm->connection_type==0) {
+ attr.max_dest_rd_atomic = 1;
+ attr.min_rnr_timer = 12;
+ }
+
+ if (user_parm->gid_index < 0) {
+ attr.ah_attr.is_global = 0;
+ attr.ah_attr.sl = sl;
+ } else {
+ attr.ah_attr.is_global = 1;
+ attr.ah_attr.grh.dgid = dest->gid;
+ attr.ah_attr.grh.sgid_index = (uint8_t)user_parm->gid_index;
+ attr.ah_attr.grh.hop_limit = 1;
+ attr.ah_attr.sl = 0;
+ }
+ attr.ah_attr.src_path_bits = 0;
+ attr.ah_attr.port_num = user_parm->ib_port;
+
+ if (user_parm->connection_type == 0) {
+ if (ibv_modify_qp(ctx->qp, &attr,
+ IBV_QP_STATE |
+ IBV_QP_AV |
+ IBV_QP_PATH_MTU |
+ IBV_QP_DEST_QPN |
+ IBV_QP_RQ_PSN |
+ IBV_QP_MIN_RNR_TIMER |
+ IBV_QP_MAX_DEST_RD_ATOMIC)) {
+ fprintf(stderr, "Failed to modify RC QP to RTR\n");
+ return 1;
+ }
+ attr.timeout = user_parm->qp_timeout;
+ attr.retry_cnt = 7;
+ attr.rnr_retry = 7;
+ } else {
+ if (ibv_modify_qp(ctx->qp, &attr,
+ IBV_QP_STATE |
+ IBV_QP_AV |
+ IBV_QP_PATH_MTU |
+ IBV_QP_DEST_QPN |
+ IBV_QP_RQ_PSN)) {
+ fprintf(stderr, "Failed to modify UC QP to RTR\n");
+ return 1;
+ }
+
+ }
+ attr.qp_state = IBV_QPS_RTS;
+ attr.sq_psn = my_psn;
+
+ if (user_parm->connection_type == 0) {
+ attr.max_rd_atomic = 1;
+ if (ibv_modify_qp(ctx->qp, &attr,
+ IBV_QP_STATE |
+ IBV_QP_SQ_PSN |
+ IBV_QP_TIMEOUT |
+ IBV_QP_RETRY_CNT |
+ IBV_QP_RNR_RETRY |
+ IBV_QP_MAX_QP_RD_ATOMIC)) {
+ fprintf(stderr, "Failed to modify RC QP to RTS\n");
+ return 1;
+ }
+ } else {
+ if (ibv_modify_qp(ctx->qp, &attr,
+ IBV_QP_STATE |
+ IBV_QP_SQ_PSN)) {
+ fprintf(stderr, "Failed to modify UC QP to RTS\n");
+ return 1;
+ }
+
+ }
+ return 0;
+}
+
+static void usage(const char *argv0)
+{
+ printf("Usage:\n");
+ printf(" %s start a server and wait for connection\n", argv0);
+ printf(" %s <host> connect to server at <host>\n", argv0);
+ printf("\n");
+ printf("Options:\n");
+ printf(" -p, --port=<port> listen on/connect to port <port> (default 18515)\n");
+ printf(" -c, --connection=<RC/UC> connection type RC/UC (default RC)\n");
+ printf(" -m, --mtu=<mtu> mtu size (256 - 4096. default for hermon is 2048)\n");
+ printf(" -d, --ib-dev=<dev> use IB device <dev> (default first device found)\n");
+ printf(" -i, --ib-port=<port> use port <port> of IB device (default 1)\n");
+ printf(" -s, --size=<size> size of message to exchange (default 1)\n");
+ printf(" -a, --all Run sizes from 2 till 2^23\n");
+ printf(" -t, --tx-depth=<dep> size of tx queue (default 50)\n");
+ printf(" -n, --iters=<iters> number of exchanges (at least 2, default 1000)\n");
+ printf(" -I, --inline_size=<size> max size of message to be sent in inline mode (default 400)\n");
+ printf(" -u, --qp-timeout=<timeout> QP timeout, timeout value is 4 usec * 2 ^(timeout), default 14\n");
+ printf(" -S, --sl=<sl> SL (default 0)\n");
+ printf(" -x, --gid-index=<index> test uses GID with GID index taken from command line (for RDMAoE index should be 0)\n");
+ printf(" -C, --report-cycles report times in cpu cycle units (default microseconds)\n");
+ printf(" -H, --report-histogram print out all results (default print summary only)\n");
+ printf(" -U, --report-unsorted (implies -H) print out unsorted results (default sorted)\n");
+ printf(" -V, --version display version number\n");
+ printf(" -F, --CPU-freq do not fail even if cpufreq_ondemand module is loaded\n");
+}
+
+static void print_report(struct report_options * options,
+ unsigned int iters, cycles_t *tstamp, int size, int no_cpu_freq_fail)
+{
+ double cycles_to_units;
+ cycles_t median;
+ unsigned int i;
+ const char* units;
+ cycles_t *delta = malloc((iters - 1) * sizeof *delta);
+
+ if (!delta) {
+ perror("malloc");
+ return;
+ }
+
+ for (i = 0; i < iters - 1; ++i)
+ delta[i] = tstamp[i + 1] - tstamp[i];
+
+
+ if (options->cycles) {
+ cycles_to_units = 1;
+ units = "cycles";
+ } else {
+ cycles_to_units = get_cpu_mhz()/1000000;
+ units = "usec";
+ }
+
+ if (options->unsorted) {
+ printf("#, %s\n", units);
+ for (i = 0; i < iters - 1; ++i)
+ printf("%d, %g\n", i + 1, delta[i] / cycles_to_units / 2);
+ }
+
+ qsort(delta, iters - 1, sizeof *delta, cycles_compare);
+
+ if (options->histogram) {
+ printf("#, %s\n", units);
+ for (i = 0; i < iters - 1; ++i)
+ printf("%d, %g\n", i + 1, delta[i] / cycles_to_units / 2);
+ }
+
+ median = get_median(iters - 1, delta);
+ printf("%7d %d %7.2f %7.2f %7.2f\n",
+ size,iters,delta[0] / cycles_to_units / 2,
+ delta[iters - 2] / cycles_to_units / 2,median / cycles_to_units / 2);
+
+ free(delta);
+}
+int run_iter(struct pingpong_context *ctx, struct perftest_parameters *user_param,
+ struct pingpong_dest *rem_dest,int size,int all)
+{
+ struct ibv_qp *qp;
+ struct ibv_send_wr *wr;
+ volatile char *poll_buf;
+ volatile char *post_buf;
+ int scnt, ccnt, rcnt;
+ int tx_depth;
+ int inline_size;
+
+ tx_depth = user_param->tx_depth;
+ inline_size = user_param->inline_size;
+
+ wr = &ctx->wr;
+ ctx->list.addr = (uintptr_t) ctx->buf ;
+ ctx->list.length = size;
+ ctx->list.lkey = ctx->mr->lkey;
+ wr->wr.rdma.remote_addr = rem_dest->vaddr;
+ wr->wr.rdma.rkey = rem_dest->rkey;
+
+ if (size > inline_size) {/* complaince to perf_main */
+ ctx->wr.send_flags = IBV_SEND_SIGNALED;
+ } else {
+ ctx->wr.send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE;
+ }
+ scnt = 0;
+ rcnt = 0;
+ ccnt = 0;
+
+ if(all == ALL) {
+ post_buf = (char*)ctx->buf + size - 1;
+ poll_buf = (char*)ctx->buf + 8388608 + size - 1;
+ } else {
+ poll_buf = ctx->poll_buf;
+ post_buf = ctx->post_buf;
+ }
+ qp = ctx->qp;
+
+ /* Done with setup. Start the test. */
+ while (scnt < user_param->iters || ccnt < user_param->iters || rcnt < user_param->iters) {
+
+ /* Wait till buffer changes. */
+ if (rcnt < user_param->iters && !(scnt < 1 && user_param->machine == SERVER)) {
+ ++rcnt;
+ while (*poll_buf != (char)rcnt)
+ ;
+ /* Here the data is already in the physical memory.
+ If we wanted to actually use it, we may need
+ a read memory barrier here. */
+ }
+
+ if (scnt < user_param->iters) {
+ struct ibv_send_wr *bad_wr;
+ tstamp[scnt] = get_cycles();
+
+ *post_buf = (char)++scnt;
+
+ if (ibv_post_send(qp, wr, &bad_wr)) {
+ fprintf(stderr, "Couldn't post send: scnt=%d\n",
+ scnt);
+ return 11;
+ }
+ }
+
+ if (ccnt < user_param->iters) {
+ struct ibv_wc wc;
+ int ne;
+ ++ccnt;
+ do {
+ ne = ibv_poll_cq(ctx->cq, 1, &wc);
+ } while (ne == 0);
+ if (ne < 0) {
+ fprintf(stderr, "poll CQ failed %d\n", ne);
+ return 12;
+ }
+ if (wc.status != IBV_WC_SUCCESS) {
+ fprintf(stderr, "Completion wth error at %s:\n",
+ user_param->machine == CLIENT ? "client" : "server");
+ fprintf(stderr, "Failed status %d: wr_id %d\n",
+ wc.status, (int) wc.wr_id);
+ fprintf(stderr, "scnt=%d, rcnt=%d, ccnt=%d\n",
+ scnt, rcnt, ccnt);
+ return 13;
+ }
+ }
+ }
+ return(0);
+}
+int __cdecl main(int argc, char *argv[])
+{
+ const char *ib_devname = NULL;
+ int size = 2;
+ int i = 0;
+ struct report_options report;
+ struct pingpong_context *ctx;
+ struct pingpong_dest my_dest,rem_dest;
+ struct ibv_device *ib_dev;
+ struct perftest_parameters user_param;
+ int no_cpu_freq_fail = 0;
+
+ int all = 0;
+ const char *servername = NULL;
+
+ SYSTEM_INFO si;
+ GetSystemInfo(&si);
+
+
+ /* init default values to user's parameters */
+ memset(&user_param, 0, sizeof(struct perftest_parameters));
+ user_param.mtu = 0; /* signal choose default by device */
+ user_param.tx_depth = 50;
+ user_param.ib_port = 1;
+ user_param.port = 18515;
+ user_param.iters = 1000;
+ user_param.inline_size = MAX_INLINE;
+ user_param.qp_timeout = 14;
+ user_param.gid_index = -1; /*gid will not be used*/
+
+ report.cycles = 0;
+ report.histogram = 0;
+ report.unsorted = 0;
+
+ /* Parameter parsing. */
+ while (1) {
+ int c;
+
+ static struct option long_options[] = {
+ { "port", 1, NULL, 'p' },
+ { "connection", 1, NULL, 'c' },
+ { "mtu", 1, NULL, 'm' },
+ { "ib-dev", 1, NULL, 'd' },
+ { "ib-port", 1, NULL, 'i' },
+ { "size", 1, NULL, 's' },
+ { "iters", 1, NULL, 'n' },
+ { "tx-depth", 1, NULL, 't' },
+ { "inline_size", 1, NULL, 'I' },
+ { "qp-timeout", 1, NULL, 'u' },
+ { "sl", 1, NULL, 'S' },
+ { "gid-index", 1, NULL, 'x' },
+ { "all", 0, NULL, 'a' },
+ { "report-cycles", 0, NULL, 'C' },
+ { "report-histogram", 0, NULL, 'H' },
+ { "report-unsorted", 0, NULL, 'U' },
+ { "version", 0, NULL, 'V' },
+ { "CPU-freq", 0, NULL, 'F' },
+ { 0 }
+ };
+
+ c = getopt_long(argc, argv, "p:c:m:d:i:s:n:t:I:u:S:x:aCHUVF", long_options, NULL);///cpufreq
+ if (c == -1)
+ break;
+
+ if (c == EINVAL)
+ {
+ usage(argv[0]);
+ return 7;
+ }
+
+ switch (c) {
+ case 'p':
+ user_param.port = strtol(optarg, NULL, 0);
+ if (user_param.port < 0 || user_param.port > 65535) {
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+ case 'c':
+ if (strcmp("UC",optarg)==0)
+ user_param.connection_type=1;
+ /* default is 0 for any other option RC*/
+ break;
+
+ case 'm':
+ user_param.mtu = strtol(optarg, NULL, 0);
+ break;
+ case 'a':
+ all = ALL;
+ break;
+ case 'V':
+ printf("perftest version : %.2f\n",VERSION);
+ return 0;
+ break;
+ case 'd':
+ ib_devname = _strdup(optarg);
+ break;
+
+ case 'i':
+ user_param.ib_port = (uint8_t)(strtol(optarg, NULL, 0));
+ if (user_param.ib_port < 0) {
+ usage(argv[0]);
+ return 2;
+ }
+ break;
+
+ case 's':
+ size = strtol(optarg, NULL, 0);
+ if (size < 1) {
+ usage(argv[0]); return 3;
+ }
+ break;
+
+ case 't':
+ user_param.tx_depth = strtol(optarg, NULL, 0);
+ if (user_param.tx_depth < 1) {
+ usage(argv[0]); return 4;
+ }
+ break;
+
+ case 'I':
+ user_param.inline_size = strtol(optarg, NULL, 0);
+ if (user_param.inline_size > MAX_INLINE) {
+ usage(argv[0]); return 7;
+ }
+ break;
+
+ case 'n':
+ user_param.iters = strtol(optarg, NULL, 0);
+ if (user_param.iters < 2) {
+ usage(argv[0]);
+ return 5;
+ }
+
+ break;
+
+ case 'C':
+ report.cycles = 1;
+ break;
+
+ case 'H':
+ report.histogram = 1;
+ break;
+
+ case 'U':
+ report.unsorted = 1;
+ break;
+
+ case 'F':
+ no_cpu_freq_fail = 1;
+ break;
+
+ case 'u':
+ user_param.qp_timeout = (uint8_t)(strtol(optarg, NULL, 0));
+ break;
+ case 'S':
+ sl = (uint8_t)(strtol(optarg, NULL, 0));
+ if (sl > 15) { usage(argv[0]); return 6; }
+ break;
+
+ case 'x':
+ user_param.gid_index = strtol(optarg, NULL, 0);
+ if (user_param.gid_index > 63) {
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+
+ default:
+ usage(argv[0]);
+ return 7;
+ }
+ }
+
+ if (optind == argc - 1)
+ servername = _strdup(argv[optind]);
+ else if (optind < argc) {
+ usage(argv[0]);
+ return 6;
+ }
+
+ /*
+ * Done with parameter parsing. Perform setup.
+ */
+
+ tstamp = malloc(user_param.iters * sizeof *tstamp);
+ if (!tstamp) {
+ perror("malloc");
+ return 10;
+ }
+ printf("------------------------------------------------------------------\n");
+ printf(" RDMA_Write Latency Test\n");
+ printf("Inline data is used up to %d bytes message\n", user_param.inline_size);
+ if (user_param.connection_type==0) {
+ printf("Connection type : RC\n");
+ } else {
+ printf("Connection type : UC\n");
+ }
+
+ if (all == ALL) {
+ /*since we run all sizes */
+ size = 8388608; /*2^23 */
+ }
+
+ page_size = si.dwPageSize;
+
+ ib_dev = pp_find_dev(ib_devname);
+ if (!ib_dev)
+ return 7;
+
+ ctx = pp_init_ctx(ib_dev,size,&user_param);
+ if (!ctx)
+ return 8;
+
+ // Set up the Connection.
+ if (set_up_connection(ctx,&user_param,&my_dest)) {
+ fprintf(stderr," Unable to set up socket connection\n");
+ return 1;
+ }
+
+ // Init the connection and print the local data.
+ if (init_connection(&user_param,&my_dest,servername)) {
+ fprintf(stderr," Unable to init the socket connection\n");
+ return 1;
+ }
+
+ // shaking hands and gather the other side info.
+ if (ctx_hand_shake(&user_param,&my_dest,&rem_dest)) {
+ fprintf(stderr,"Failed to exchange date between server and clients\n");
+ return 1;
+
+ }
+ user_param.side = REMOTE;
+ ctx_print_pingpong_data(&rem_dest,&user_param);
+
+ if (pp_connect_ctx(ctx,my_dest.psn,&rem_dest,&user_param)) {
+ fprintf(stderr," Unable to Connect the HCA's through the link\n");
+ return 1;
+ }
+
+ // An additional handshake is required after moving qp to RTR.
+ if (ctx_hand_shake(&user_param,&my_dest,&rem_dest)) {
+ fprintf(stderr,"Failed to exchange date between server and clients\n");
+ return 1;
+ }
+
+ printf("------------------------------------------------------------------\n");
+ printf(" #bytes #iterations t_min[usec] t_max[usec] t_typical[usec]\n");
+
+ if (all == ALL) {
+ for (i = 1; i < 24 ; ++i) {
+ size = 1 << i;
+ if(run_iter(ctx,&user_param,&rem_dest,size,all))
+ return 17;
+ print_report(&report,user_param.iters, tstamp, size, no_cpu_freq_fail);
+ }
+ } else {
+ if(run_iter(ctx,&user_param, &rem_dest,size,all))
+ return 18;
+ print_report(&report,user_param.iters, tstamp, size, no_cpu_freq_fail);
+ }
+
+ // Done close sockets
+ closesocket(user_param.sockfd);
+
+ printf("------------------------------------------------------------------\n");
+ free(tstamp);
+ return 0;
+}