--- /dev/null
+DIRS=\\r
+ user\r
--- /dev/null
+This directory includes gen2 uverbs microbenchmarks.
+
+The tests are intended as:
+ 1) simple, efficient usage examples.
+ Please see the COPYING file if you intend to copy it literally.
+
+ 2) a useful benchmark
+ e.g. for HW or SW tuning and/or functional testing.
+ Please post results/observations to the openib-general mailing
+ list. See http://openib.org/mailman/listinfo/openib-general
+ and http://www.openib.org "Contact Us" link for contact details.
+
+
+Testing methodology
+-------------------
+
+- uses CPU cycle counter to get time stamps without context switch.
+ Some CPU architectures do NOT have such capability. e.g. Intel 80486
+ or older PPC.
+
+- measures round-trip time but reports half of that as one-way latency.
+ ie. May not be sufficiently accurate for asymetrical configurations.
+
+- Min/Median/Max result is reported.
+ The median (vs average) is less sensitive to extreme scores.
+ Typically the "Max" value is the first value measured.
+
+- larger samples only marginally help. The default (1000) is pretty good.
+ Note that an array of cycles_t (typically unsigned long) is allocated
+ once to collect samples and again to store the difference between them.
+ Really big sample sizes (e.g. 1 million) might expose other problems
+ with the program.
+
+- "-H" option will dump the histogram for additional statistical analysis.
+ See xgraph, ygraph, r-base (http://www.r-project.org/), pspp, or other
+ statistical math programs.
+
+Architectures tested: i686, x86_64, ia64
+
+
+
+Test Descriptions
+-----------------
+
+rdma_lat.c - latency test with RDMA write transactions
+rdma_bw.c - streaming BW test with RDMA write transactions
+
+
+The following tests are mainly useful for HW/SW benchmarking.
+They are not intended as actual usage examples.
+-----------------
+
+send_lat.c - latency test with send transactions
+send_bw.c - BW test with send transactions
+write_lat.c - latency test with RDMA write transactions
+write_bw.c - BW test with RDMA write transactions
+read_lat.c - latency test with RDMA read transactions
+read_bw.c - BW test with RDMA read transactions
+
+
+Build Tests
+-----------
+
+"make" to build all tests
+
+ Debian: build-dep on linux-kernel-headers (for asm/timex.h file)
+ build-dep on libibverbs-dev
+ depends on libibverbs1
+
+
+Run Tests
+---------
+
+Prerequisites:
+ kernel 2.6
+ ib_uverbs (kernel module) matches libibverbs
+ ("match" means binary compatible, but ideally same SVN rev)
+ Debian: dpkg -i libibverbs1_0.1.0-1_ia64.deb
+
+Server: ./<test name> <options>
+Client: ./<test name> <options> <server IP address>
+
+ o IMPORTANT: The SAME OPTIONS must be passed to both server and client.
+ o "--help" will list the available <options>.
+ o <server address> is IPv4 or IPv6 address.
+ You can use the IPoIB address if you have IPoIB configured.
+
+You need to be running a Subnet Manager on the switch or one of the nodes
+in your fabric. To use the opensm tool for this purpose, run
+ modprobe ib_umad
+ opensm &
+on one of the nodes
+
+First load ib_uverbs on both client and server with something like:
+ modprobe ib_uverbs
+
+Then (e.g.) "rdma_lat -C" on the server side.
+Lastly "rmda_lat -C 10.0.1.31" on the client.
+
+rmda_lat will exit on both server and client after printing results.
+
--- /dev/null
+- support -- option ( like --port ...)\r
--- /dev/null
+#include <unistd.h>
+#include <stdio.h>
+#include "get_clock.h"
+
+int main()
+{
+ double mhz;
+ mhz = get_cpu_mhz();
+ cycles_t c1, c2;
+
+ if (!mhz) {
+ printf("Unable to calibrate cycles. Exiting.\n");
+ return 2;
+ }
+
+ printf("Type CTRL-C to cancel.\n");
+ for(;;)
+ {
+ c1 = get_cycles();
+ sleep(1);
+ c2 = get_cycles();
+ printf("1 sec = %g usec\n", (c2 - c1) / mhz);
+ }
+}
--- /dev/null
+DIRS=\\r
+ send_bw \ \r
+ send_lat \\r
+ write_lat \\r
+ write_bw \r
+\r
+# write_bw_postlist rdma_lat rdma_bw send_lat write_lat write_bw read_lat read_bwr\r
--- /dev/null
+/*
+ * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ *
+ * Author: Michael S. Tsirkin <mst@mellanox.co.il>
+ */
+
+/* #define DEBUG 1 */
+/* #define DEBUG_DATA 1 */
+/* #define GET_CPU_MHZ_FROM_PROC 1 */
+
+/* For gettimeofday */
+#define _BSD_SOURCE
+#include <sys/time.h>
+
+#include <unistd.h>
+#include <stdio.h>
+#include "get_clock.h"
+
+#ifndef DEBUG
+#define DEBUG 0
+#endif
+#ifndef DEBUG_DATA
+#define DEBUG_DATA 0
+#endif
+
+#define MEASUREMENTS 200
+#define USECSTEP 10
+#define USECSTART 100
+
+/*
+ Use linear regression to calculate cycles per microsecond.
+ http://en.wikipedia.org/wiki/Linear_regression#Parameter_estimation
+*/
+static double sample_get_cpu_mhz(void)
+{
+ struct timeval tv1, tv2;
+ cycles_t start;
+ double sx = 0, sy = 0, sxx = 0, syy = 0, sxy = 0;
+ double tx, ty;
+ int i;
+
+ /* Regression: y = a + b x */
+ long x[MEASUREMENTS];
+ cycles_t y[MEASUREMENTS];
+ double a; /* system call overhead in cycles */
+ double b; /* cycles per microsecond */
+ double r_2;
+
+ for (i = 0; i < MEASUREMENTS; ++i) {
+ start = get_cycles();
+
+ if (gettimeofday(&tv1, NULL)) {
+ fprintf(stderr, "gettimeofday failed.\n");
+ return 0;
+ }
+
+ do {
+ if (gettimeofday(&tv2, NULL)) {
+ fprintf(stderr, "gettimeofday failed.\n");
+ return 0;
+ }
+ } while ((tv2.tv_sec - tv1.tv_sec) * 1000000 +
+ (tv2.tv_usec - tv1.tv_usec) < USECSTART + i * USECSTEP);
+
+ x[i] = (tv2.tv_sec - tv1.tv_sec) * 1000000 +
+ tv2.tv_usec - tv1.tv_usec;
+ y[i] = get_cycles() - start;
+ if (DEBUG_DATA)
+ fprintf(stderr, "x=%ld y=%Ld\n", x[i], (long long)y[i]);
+ }
+
+ for (i = 0; i < MEASUREMENTS; ++i) {
+ tx = x[i];
+ ty = y[i];
+ sx += tx;
+ sy += ty;
+ sxx += tx * tx;
+ syy += ty * ty;
+ sxy += tx * ty;
+ }
+
+ b = (MEASUREMENTS * sxy - sx * sy) / (MEASUREMENTS * sxx - sx * sx);
+ a = (sy - b * sx) / MEASUREMENTS;
+
+ if (DEBUG)
+ fprintf(stderr, "a = %g\n", a);
+ if (DEBUG)
+ fprintf(stderr, "b = %g\n", b);
+ if (DEBUG)
+ fprintf(stderr, "a / b = %g\n", a / b);
+ r_2 = (MEASUREMENTS * sxy - sx * sy) * (MEASUREMENTS * sxy - sx * sy) /
+ (MEASUREMENTS * sxx - sx * sx) /
+ (MEASUREMENTS * syy - sy * sy);
+
+ if (DEBUG)
+ fprintf(stderr, "r^2 = %g\n", r_2);
+ if (r_2 < 0.9) {
+ fprintf(stderr,"Correlation coefficient r^2: %g < 0.9\n", r_2);
+ return 0;
+ }
+
+ return b;
+}
+
+static double proc_get_cpu_mhz(void)
+{
+ FILE* f;
+ char buf[256];
+ double mhz = 0.0;
+
+ f = fopen("/proc/cpuinfo","r");
+ if (!f)
+ return 0.0;
+ while(fgets(buf, sizeof(buf), f)) {
+ double m;
+ int rc;
+ rc = sscanf(buf, "cpu MHz : %lf", &m);
+ if (rc != 1) { /* PPC has a different format */
+ rc = sscanf(buf, "clock : %lf", &m);
+ if (rc != 1)
+ continue;
+ }
+ if (mhz == 0.0) {
+ mhz = m;
+ continue;
+ }
+ if (mhz != m) {
+ fprintf(stderr, "Conflicting CPU frequency values"
+ " detected: %lf != %lf\n", mhz, m);
+ return 0.0;
+ }
+ }
+ fclose(f);
+ return mhz;
+}
+
+
+double get_cpu_mhz(void)
+{
+ double sample, proc, delta;
+ sample = sample_get_cpu_mhz();
+ proc = proc_get_cpu_mhz();
+
+ if (!proc || !sample)
+ return 0;
+
+ delta = proc > sample ? proc - sample : sample - proc;
+ if (delta / proc > 0.01) {
+ fprintf(stderr, "Warning: measured timestamp frequency "
+ "%g differs from nominal %g MHz\n",
+ sample, proc);
+ return sample;
+ }
+ return proc;
+}
--- /dev/null
+/*
+ * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ *
+ * Author: Michael S. Tsirkin <mst@mellanox.co.il>
+ */
+
+#ifndef GET_CLOCK_H
+#define GET_CLOCK_H
+
+#include "complib/cl_timer.h"
+
+typedef uint64_t cycles_t;
+
+static int __cdecl cycles_compare(const void * aptr, const void * bptr)
+{
+ const cycles_t *a = aptr;
+ const cycles_t *b = bptr;
+ if (*a < *b) return -1;
+ if (*a > *b) return 1;
+ return 0;
+
+}
+
+static inline cycles_t get_cycles()
+{
+ return cl_get_tick_count();
+}
+
+static double get_cpu_mhz(void)
+{
+ return (double)cl_get_tick_freq();
+}
+
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under the OpenIB.org BSD license
+ * below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: vstat_main.c 1310 2006-05-01 15:28:12Z sleybo $
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stddef.h>
+#include <ctype.h>
+
+#include "getopt.h"
+
+/* for preventing C4996-warning on deprecated functions like strtok in VS 8.0. */
+#pragma warning(disable : 4996)
+
+/* Global variables for getopt_long */
+char *optarg;
+int optind = 1;
+int opterr = 1;
+int optopt = '?';
+
+static char* get_char_option(const char* optstring,char*const* argv,int argc, int iArg, int* opt_ind,char* opt_p);
+
+/* * * * * * * * * * */
+
+int iArg = 1;
+
+int getopt(int argc, char *const*argv, const char *optstring)
+{
+
+ char chOpt;
+
+ if (iArg == argc)
+ {
+ return (EOF);
+ }
+
+ if (argv[iArg][0] != '-')
+ {
+ /* Does not start with a - - we are done scanning */
+ return (EOF);
+ }
+
+ /*termination of scanning */
+ if (!strcmp("--",argv[iArg])) {
+ return EOF;
+ }
+ optarg = get_char_option(optstring,argv,argc,iArg,&optind,&chOpt);
+ iArg = optind;
+ return chOpt;
+}
+
+/* * * * * * * * * * */
+
+int getopt_long(int argc, char *const*argv,
+ const char *optstring,
+ const struct option *longopts, int *longindex)
+{
+ char chOpt;
+ char tmp_str[256];
+ char* tmp_arg = NULL;
+ char* tok=NULL;
+ int i;
+ char tokens[2] = {'='};
+
+ if (iArg == argc)
+ {
+ return (EOF);
+ }
+
+ if (argv[iArg][0] != '-')
+ {
+ /* Does not start with a - - we are done scanning */
+ return (EOF);
+ }
+
+ /*termination of scanning */
+ if (!strcmp("--",argv[iArg])) {
+ return EOF;
+ }
+
+
+ /* char option : -d 5 */
+ if ((argv[iArg][0] == '-') &&(argv[iArg][1] != '-') ) {
+ optarg = get_char_option(optstring,argv,argc,iArg,&optind,&chOpt);
+ iArg = optind;
+ return chOpt;
+ }
+
+ /* Look for this string in longopts */
+ strcpy(tmp_str,&(argv[iArg][2]));
+
+ /*get the option */
+ tok = strtok(tmp_str,tokens);
+
+ for (i = 0; longopts[i].name; i++){
+ if (strcmp (tok, longopts[i].name) == 0)
+ {
+ /* We have a match */
+ if (longindex != NULL) *longindex = i;
+
+ if (longopts[i].flag != NULL) {
+ *(longopts[i].flag) = longopts[i].val;
+ }
+
+ if (longopts[i].has_arg != no_argument)
+ {
+ /*get the argument */
+
+ if (strchr(argv[iArg],'=') != NULL)
+ {
+ optarg = strtok(NULL,tokens);
+ }else {
+ /*the next arg in cmd line is the param */
+ tmp_arg = argv[iArg+1];
+ if (*tmp_arg == '-') {
+ /*no param is found */
+ chOpt = '?';
+ if ((longopts[i].has_arg == required_argument) && opterr)
+ {
+ fprintf (stderr, "Option %s requires argument\n",tok);
+ }
+
+ }else {
+ optarg = tmp_arg;
+ iArg++;
+ optind++;
+ }
+ }
+
+ }/*longopts */
+
+ iArg++;
+ optind++;
+ if (longopts[i].flag == 0)
+ return (longopts[i].val);
+ else return 0;
+
+ }/*end if strcmp */
+ }
+
+ return ('?');
+}
+
+/* * * * * * * * * * * */
+
+static char* get_char_option(const char* optstring,char*const* argv,int argc, int iArg, int* opt_ind,char* opt_p)
+ {
+ char chOpt;
+ char* tmp_str;
+ char* prm = NULL;
+
+ chOpt = argv[iArg][1];
+
+
+ /*non valid argument*/
+ if (!isalpha(chOpt))
+ {
+ chOpt = EOF;
+ goto end;
+ }
+
+ tmp_str = strchr(optstring, chOpt);
+
+ /*the argument wasn't found in optstring */
+ if (tmp_str == NULL){
+ chOpt = EOF;
+ optopt = chOpt;
+ goto end;
+ }
+
+ /* don't need argument */
+ if (tmp_str[1]!= ':' ) {
+ goto end;
+ }
+
+ if (argv[iArg][2] != '\0')
+ {
+ // param is attached to option: -po8889
+ prm = &(argv[iArg][2]);
+ goto end;
+ }
+
+ // must look at next argv for param
+ /*at the end of arg list */
+ if ((iArg)+1 == argc) {
+ /* no param will be found */
+ if (tmp_str[2]== ':' ) {
+ /* optional argument ::*/
+ goto end;
+ }else{
+ chOpt = EOF;
+ goto end;
+ }
+ }
+
+ prm = &(argv[(iArg)+1][0]);
+ if (*prm == '-' )
+ {
+ // next argv is a new option, so param
+ // not given for current option
+ if (tmp_str[2]== ':' ) {
+ /* optional argument ::*/
+ goto end;
+ }
+ else
+ {
+ chOpt = EOF;
+ goto end;
+ }
+ }
+
+ // next argv is the param
+ (*opt_ind)++;
+
+
+end:
+ (*opt_ind)++;
+ *opt_p = chOpt;
+ return prm;
+}
+
--- /dev/null
+/*
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under the OpenIB.org BSD license
+ * below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id: vstat_main.c 1310 2006-05-01 15:28:12Z sleybo $
+ */
+
+#ifndef GETOPT_H
+#define GETOPT_H
+
+/* the string argument that came with the option */
+extern char *optarg;
+
+/* Index in ARGV of the next element to be scanned.
+
+ When `getopt' returns -1, this is the index of the first of the
+ non-option elements that the caller should itself scan.
+
+ Otherwise, `optind' communicates from one call to the next
+ how much of ARGV has been scanned so far. */
+
+extern int optind;
+
+/* Callers store zero here to inhibit the error message `getopt' prints
+ for unrecognized options. */
+
+extern int opterr;
+
+/* Set to an option character which was unrecognized. */
+extern int optopt;
+
+/* Describe the long-named options requested by the application.
+ The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
+ of `struct option' terminated by an element containing a name which is
+ zero.
+
+ The field `has_arg' is:
+ no_argument (or 0) if the option does not take an argument,
+ required_argument (or 1) if the option requires an argument,
+ optional_argument (or 2) if the option takes an optional argument.
+
+ If the field `flag' is not NULL, it points to a variable that is set
+ to the value given in the field `val' when the option is found, but
+ left unchanged if the option is not found.
+
+ To have a long-named option do something other than set an `int' to
+ a compiled-in constant, such as set a value from `optarg', set the
+ option's `flag' field to zero and its `val' field to a nonzero
+ value (the equivalent single-letter option character, if there is
+ one). For long options that have a zero `flag' field, `getopt'
+ returns the contents of the `val' field. */
+
+struct option
+{
+ const char *name;
+ int has_arg;
+ int *flag;
+ int val;
+};
+
+/* Names for the values of the `has_arg' field of `struct option'. */
+#define no_argument 0
+#define required_argument 1
+#define optional_argument 2
+
+/* Return the option character from OPTS just read. Return -1 when
+ there are no more options. For unrecognized options, or options
+ missing arguments, `optopt' is set to the option letter, and '?' is
+ returned.
+
+ The OPTS string is a list of characters which are recognized option
+ letters, optionally followed by colons, specifying that that letter
+ takes an argument, to be placed in `optarg'.
+
+ If a letter in OPTS is followed by two colons, its argument is
+ optional. This behavior is specific to the GNU `getopt'.
+
+ The argument `--' causes premature termination of argument
+ scanning, explicitly telling `getopt' that there are no more
+ options.
+
+ If OPTS begins with `--', then non-option arguments are treated as
+ arguments to the option '\0'. This behavior is specific to the GNU
+ `getopt'. */
+
+extern int getopt(int argc, char *const *argv, const char *shortopts);
+extern int getopt_long(int argc, char *const*argv,
+ const char *optstring,
+ const struct option *longopts, int *longindex);
+
+
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ *
+ * Author: Yossi Leybovich <sleybo@mellanox.co.il>
+ */
+
+#ifndef H_PERF_SOCK_H
+#define H_PERF_SOCK_H
+
+
+#include <WINSOCK2.h>
+#include <Ws2tcpip.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <malloc.h>
+#include <time.h>
+#include <limits.h>
+
+
+#include <iba/ib_types.h>
+#include <iba/ib_al.h>
+
+#define KEY_MSG_SIZE (sizeof "0000:000000:000000:00000000:0000000000000000")
+#define KEY_PRINT_FMT "%04x:%06x:%06x:%08x:%x"
+#define KEY_SCAN_FMT "%x:%x:%x:%x:%x"
+
+#define VERSION 2.0
+#define ALL 1
+
+#define RC 0
+#define UC 1
+#define UD 3
+
+
+#define PINGPONG_SEND_WRID 1
+#define PINGPONG_RECV_WRID 2
+#define PINGPONG_RDMA_WRID 3
+
+
+#if 0
+#define PERF_ENTER printf("%s: ===>\n",__FUNCTION__);
+#define PERF_EXIT printf("%s: <===\n",__FUNCTION__);
+#else
+#define PERF_ENTER
+#define PERF_EXIT
+#endif
+
+struct pingpong_context {
+ ib_ca_handle_t context;
+ ib_ca_handle_t ca;
+ ib_ca_attr_t *ca_attr;
+ ib_al_handle_t al;
+//PORTED struct ibv_comp_channel *channel;
+ void* channel; //PORTED REMOVE
+ ib_pd_handle_t pd;
+ ib_mr_handle_t mr;
+ uint32_t rkey;
+ uint32_t lkey;
+ ib_cq_handle_t scq;
+ ib_cq_handle_t rcq;
+ ib_qp_handle_t *qp;
+ ib_qp_attr_t *qp_attr;
+ void *buf;
+ unsigned size;
+ int tx_depth;
+
+ ib_local_ds_t list;
+ ib_local_ds_t recv_list;
+ ib_send_wr_t wr;
+ ib_recv_wr_t rwr;
+
+ ib_av_handle_t av;
+
+ volatile char *post_buf;
+ volatile char *poll_buf;
+
+ int *scnt,*ccnt;
+};
+
+
+struct pingpong_dest {
+ ib_net16_t lid;
+ ib_net32_t qpn;
+ ib_net32_t psn;
+ uint32_t rkey;
+ uint64_t vaddr;
+};
+
+
+static int pp_write_keys(SOCKET sockfd, const struct pingpong_dest *my_dest);
+static int pp_read_keys(SOCKET sockfd, struct pingpong_dest *rem_dest);
+ SOCKET pp_client_connect(const char *servername, int port);
+
+ int pp_client_exch_dest(SOCKET sockfd, const struct pingpong_dest *my_dest,
+ struct pingpong_dest *rem_dest);
+
+SOCKET pp_server_connect(int port);
+int pp_server_exch_dest(SOCKET sockfd, const struct pingpong_dest *my_dest,
+ struct pingpong_dest* rem_dest);
+
+
+
+#endif
--- /dev/null
+
+#include "perf_defs.h"
+
+const char *sock_get_error_str(void)
+{
+ switch (WSAGetLastError()) {
+ case WSANOTINITIALISED:
+ return "WSANOTINITIALISED"; /* A successful WSAStartup call must occur before using this function */
+ case WSAENETDOWN:
+ return "WSAENETDOWN"; /* The network subsystem has failed */
+ case WSAEFAULT:
+ return "WSAEFAULT"; /* The buf parameter is not completely contained in a valid part of the user address space */
+ case WSAENOTCONN:
+ return "WSAENOTCONN"; /* The socket is not connected */
+ case WSAEINTR:
+ return "WSAEINTR"; /* The (blocking) call was canceled through WSACancelBlockingCall */
+ case WSAEINPROGRESS:
+ return "WSAEINPROGRESS"; /* A blocking Windows Sockets 1.1 call is in progress, or the service provider is still processing a callback function */
+ case WSAENETRESET:
+ return "WSAENETRESET"; /* The connection has been broken due to the keep-alive activity detecting a failure while the operation was in progress */
+ case WSAENOTSOCK:
+ return "WSAENOTSOCK"; /* The descriptor is not a socket */
+ case WSAEOPNOTSUPP:
+ return "WSAEOPNOTSUPP"; /* MSG_OOB was specified, but the socket is not stream-style such as type SOCK_STREAM, OOB data is not supported in the communication domain associated with this socket, or the socket is unidirectional and supports only send operations */
+ case WSAESHUTDOWN:
+ return "WSAESHUTDOWN"; /* The socket has been shut down; it is not possible to receive on a socket after shutdown has been invoked with how set to SD_RECEIVE or SD_BOTH */
+ case WSAEWOULDBLOCK:
+ return "WSAEWOULDBLOCK"; /* The socket is marked as nonblocking and the receive operation would block */
+ case WSAEMSGSIZE:
+ return "WSAEMSGSIZE"; /* The message was too large to fit into the specified buffer and was truncated */
+ case WSAEINVAL:
+ return "WSAEINVAL"; /* The socket has not been bound with bind, or an unknown flag was specified, or MSG_OOB was specified for a socket with SO_OOBINLINE enabled or (for byte stream sockets only) len was zero or negative */
+ case WSAECONNABORTED:
+ return "WSAECONNABORTED"; /* The virtual circuit was terminated due to a time-out or other failure. The application should close the socket as it is no longer usable */
+ case WSAETIMEDOUT:
+ return "WSAETIMEDOUT"; /* The connection has been dropped because of a network failure or because the peer system failed to respond */
+ case WSAECONNRESET:
+ return "WSAECONNRESET"; /* The virtual circuit was reset by the remote side executing a hard or abortive close. The application should close the socket as it is no longer usable. On a UPD-datagram socket this error would indicate that a previous send operation resulted in an ICMP "Port Unreachable" message */
+ default:
+ return "Unknown error";
+ }
+}
+
+static int pp_write_keys(SOCKET sockfd, const struct pingpong_dest *my_dest)
+{
+ char msg[KEY_MSG_SIZE];
+ PERF_ENTER;
+ sprintf(msg, KEY_PRINT_FMT,my_dest->lid, my_dest->qpn,
+ my_dest->psn, my_dest->rkey, my_dest->vaddr);
+
+ if (send(sockfd, msg, sizeof msg,0) != sizeof msg) {
+ perror("pp_write_keys");
+ fprintf(stderr, "Couldn't send local address %s\n",sock_get_error_str());
+ return -1;
+ }
+ PERF_EXIT;
+ return 0;
+}
+
+static int pp_read_keys(SOCKET sockfd,
+ struct pingpong_dest *rem_dest)
+{
+ int parsed;
+
+ char msg[KEY_MSG_SIZE];
+ PERF_ENTER;
+ if (recv(sockfd, msg, sizeof msg, 0) != sizeof msg) {
+ perror("pp_read_keys");
+ fprintf(stderr, "Couldn't read remote address %s\n",sock_get_error_str());
+ return -1;
+ }
+
+ parsed = sscanf(msg, KEY_SCAN_FMT, &rem_dest->lid, &rem_dest->qpn,
+ &rem_dest->psn,&rem_dest->rkey, &rem_dest->vaddr);
+
+ if (parsed != 5) {
+ fprintf(stderr, "Couldn't parse line <%.*s > parsed = %d %s\n",
+ (int)sizeof msg, msg,parsed,sock_get_error_str());
+ return -1;
+ }
+ rem_dest->vaddr = (uintptr_t) rem_dest->vaddr;
+ PERF_EXIT;
+ return 0;
+}
+
+SOCKET pp_client_connect(const char *servername, int port)
+{
+ struct addrinfo *res, *t;
+ struct addrinfo hints = {
+ 0, //ai_flags
+ AF_UNSPEC, // ai_family
+ SOCK_STREAM //ai_socktype
+ };
+ char service[8];
+ int n;
+ SOCKET sockfd = INVALID_SOCKET;
+ PERF_ENTER;
+ sprintf(service, "%d", port);
+ n = getaddrinfo(servername, service, &hints, &res);
+
+ if (n) {
+ fprintf(stderr, "%s for %s:%d\n", sock_get_error_str(), servername, port);
+ return sockfd;
+ }
+
+ for (t = res; t; t = t->ai_next) {
+ sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
+ if (sockfd != INVALID_SOCKET) {
+ if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
+ break;
+ closesocket(sockfd);
+ sockfd = INVALID_SOCKET;
+ }
+ }
+
+ freeaddrinfo(res);
+
+ if (sockfd == INVALID_SOCKET) {
+ fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
+ return sockfd;
+ }
+ PERF_EXIT;
+ return sockfd;
+}
+
+int pp_client_exch_dest(SOCKET sockfd, const struct pingpong_dest *my_dest,
+ struct pingpong_dest *rem_dest)
+{
+ PERF_ENTER;
+ if (pp_write_keys(sockfd, my_dest))
+ return -1;
+ PERF_EXIT;
+ return pp_read_keys(sockfd,rem_dest);
+}
+
+SOCKET pp_server_connect(int port)
+{
+ struct addrinfo *res, *t;
+ struct addrinfo hints = {
+ AI_PASSIVE, //ai_flags
+ AF_UNSPEC, // ai_family
+ SOCK_STREAM //ai_socktype
+ };
+ char service[8];
+ SOCKET sockfd = INVALID_SOCKET, connfd;
+ int n;
+ PERF_ENTER;
+ sprintf(service, "%d", port);
+ n = getaddrinfo(NULL, service, &hints, &res);
+
+ if (n) {
+ fprintf(stderr, "%s for port %d\n", sock_get_error_str(), port);
+ return n;
+ }
+
+ for (t = res; t; t = t->ai_next) {
+ sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
+ if (sockfd != INVALID_SOCKET) {
+ n = 1;
+
+ setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, (const char*)&n, sizeof n);
+
+ if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
+ break;
+ closesocket(sockfd);
+ sockfd = INVALID_SOCKET;
+ }
+ }
+
+ freeaddrinfo(res);
+
+ if (sockfd == INVALID_SOCKET) {
+ fprintf(stderr, "Couldn't listen to port %d\n", port);
+ return sockfd;
+ }
+
+ listen(sockfd, 1);
+ connfd = accept(sockfd, NULL, 0);
+ if (connfd == INVALID_SOCKET) {
+ perror("server accept");
+ fprintf(stderr, "accept() failed\n");
+ closesocket(sockfd);
+ return connfd;
+ }
+
+ closesocket(sockfd);
+ PERF_EXIT;
+ return connfd;
+}
+
+int pp_server_exch_dest(SOCKET sockfd, const struct pingpong_dest *my_dest,
+ struct pingpong_dest* rem_dest)
+{
+ PERF_ENTER;
+ if (pp_read_keys(sockfd, rem_dest))
+ return -1;
+
+ PERF_EXIT;
+ return pp_write_keys(sockfd, my_dest);
+}
+
+
+
+
+
+
+
--- /dev/null
+TARGETNAME=ib_send_bw\r
+TARGETPATH=..\..\..\..\bin\user\obj$(BUILD_ALT_DIR)\r
+TARGETTYPE=PROGRAM\r
+UMTYPE=console\r
+USE_CRTDLL=1\r
+\r
+C_DEFINES=$(C_DEFINES) /D__WIN__ \r
+\r
+SOURCES=send_bw.rc \\r
+ ..\getopt.c \\r
+ ..\perf_utils.c \\r
+ send_bw.c \r
+\r
+INCLUDES=..;..\..\..\..\inc;..\..\..\..\inc\user\r
+\r
+RCOPTIONS=/I..\..\win\include\r
+\r
+TARGETLIBS= \\r
+ $(DDK_LIB_PATH)\Ws2_32.lib \\r
+!if $(FREEBUILD)\r
+ $(TARGETPATH)\*\complib.lib \\r
+ $(TARGETPATH)\*\ibal.lib\r
+!else\r
+ $(TARGETPATH)\*\complibd.lib \\r
+ $(TARGETPATH)\*\ibald.lib\r
+!endif\r
+\r
+MSC_WARNING_LEVEL= /W3\r
--- /dev/null
+#\r
+# DO NOT EDIT THIS FILE!!! Edit .\sources. if you want to add a new source\r
+# file to this component. This file merely indirects to the real make file\r
+# that is shared by all the driver components of the OpenIB Windows project.\r
+#\r
+\r
+!INCLUDE ..\..\..\..\inc\openib.def\r
--- /dev/null
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#include "getopt.h"
+#include "get_clock.h"
+
+#include "perf_defs.h"
+
+#define SIGNAL 1
+#define MAX_INLINE 400
+
+struct user_parameters {
+ const char *servername;
+ int connection_type;
+ int mtu;
+ int all; /* run all msg size */
+ int signal_comp;
+ int iters;
+ int tx_depth;
+ int duplex;
+ int use_event;
+};
+
+static int page_size;
+cycles_t *tposted;
+cycles_t *tcompleted;
+int post_recv;
+
+
+void
+pp_cq_comp_cb(
+ IN const ib_cq_handle_t h_cq,
+ IN void *cq_context )
+{
+ UNUSED_PARAM( h_cq );
+ UNUSED_PARAM( cq_context);
+ return ;
+}
+
+
+
+static struct pingpong_context *pp_init_ctx(unsigned size,int port, struct user_parameters *user_parm)
+{
+
+ struct pingpong_context *ctx;
+ ib_api_status_t ib_status = IB_SUCCESS;
+ size_t guid_count;
+ ib_net64_t *ca_guid_array;
+
+
+
+ ctx = malloc(sizeof *ctx);
+ if (!ctx)
+ return NULL;
+
+ ctx->qp = malloc(sizeof (ib_qp_handle_t));
+ if (!ctx->qp) {
+ perror("malloc");
+ return NULL;
+ }
+ ctx->qp_attr = malloc(sizeof (ib_qp_attr_t));
+ if (!ctx->qp_attr) {
+ perror("malloc");
+ return NULL;
+ }
+
+ ctx->size = size;
+ ctx->tx_depth = user_parm->tx_depth;
+ /* in case of UD need space for the GRH */
+ if (user_parm->connection_type==UD) {
+ ctx->buf = malloc(( size + 40 ) * 2); //PORTED ALINGED
+ if (!ctx->buf) {
+ fprintf(stderr, "Couldn't allocate work buf.\n");
+ return NULL;
+ }
+ memset(ctx->buf, 0, ( size + 40 ) * 2);
+ } else {
+ ctx->buf = malloc( size * 2); //PORTED ALINGED
+ if (!ctx->buf) {
+ fprintf(stderr, "Couldn't allocate work buf.\n");
+ return NULL;
+ }
+ memset(ctx->buf, 0, size * 2);
+ }
+
+ /*
+ * Open the AL instance
+ */
+ ib_status = ib_open_al(&ctx->al);
+ if(ib_status != IB_SUCCESS)
+ {
+ fprintf(stderr,"ib_open_al failed status = %d\n", ib_status);
+ return NULL;
+ }
+
+ /*
+ * Get the Local CA Guids
+ */
+ ib_status = ib_get_ca_guids(ctx->al, NULL, &guid_count);
+ if(ib_status != IB_INSUFFICIENT_MEMORY)
+ {
+ fprintf(stderr,"ib_get_ca_guids1 failed status = %d\n", (uint32_t)ib_status);
+ return NULL;
+ }
+
+ /*
+ * If no CA's Present then return
+ */
+
+ if(guid_count == 0)
+ return NULL;
+
+
+ ca_guid_array = (ib_net64_t*)malloc(sizeof(ib_net64_t) * guid_count);
+
+ ib_status = ib_get_ca_guids(ctx->al, ca_guid_array, &guid_count);
+ if(ib_status != IB_SUCCESS)
+ {
+ fprintf(stderr,"ib_get_ca_guids2 failed with status = %d\n", ib_status);
+ return NULL;
+ }
+
+ /*
+ * Open only the first HCA
+ */
+ /* Open the CA */
+ ib_status = ib_open_ca(ctx->al ,ca_guid_array[0] ,NULL,
+ NULL, //ca_context
+ &ctx->ca);
+
+ if(ib_status != IB_SUCCESS)
+ {
+ fprintf(stderr,"ib_open_ca failed with status = %d\n", ib_status);
+ return NULL;
+ }
+
+ //xxx
+ //printf("ib_open_ca passed i=%d\n",i);
+ //xxx
+
+
+ {
+
+ /* Query the CA */
+ uint32_t bsize = 0;
+ ib_status = ib_query_ca(ctx->ca, NULL, &bsize);
+ if(ib_status != IB_INSUFFICIENT_MEMORY)
+ {
+ fprintf(stderr, "Failed to query device props");
+ return NULL;
+ }
+
+ ctx->ca_attr = (ib_ca_attr_t *)malloc(bsize);
+
+ ib_status = ib_query_ca(ctx->ca, ctx->ca_attr, &bsize);
+ if(ib_status != IB_SUCCESS)
+ {
+ printf("ib_query_ca failed with status = %d\n", ib_status);
+ return NULL;
+ }
+ if (user_parm->mtu == 0) {/*user did not ask for specific mtu */
+ if (ctx->ca_attr->dev_id == 23108) {
+ user_parm->mtu = 1024;
+ } else {
+ user_parm->mtu = 2048;
+ }
+ }
+ }
+
+ if (user_parm->use_event) {
+//PORTED ctx->channel = ibv_create_comp_channel(ctx->context);
+ ctx->channel = NULL;//remove when PORTED
+ if (!ctx->channel) {
+ fprintf(stderr, "Couldn't create completion channel\n");
+ return NULL;
+ }
+ } else
+ ctx->channel = NULL;
+
+ ib_status = ib_alloc_pd(ctx->ca ,
+ IB_PDT_NORMAL,
+ ctx, //pd_context
+ &ctx->pd);
+ if (ib_status != IB_SUCCESS) {
+ fprintf(stderr, "Couldn't allocate PD\n");
+ return NULL;
+ }
+
+
+ {
+ ib_mr_create_t mr_create;
+ ib_cq_create_t cq_create;
+ /* We dont really want IBV_ACCESS_LOCAL_WRITE, but IB spec says:
+ * The Consumer is not allowed to assign Remote Write or Remote Atomic to
+ * a Memory Region that has not been assigned Local Write. */
+ if (user_parm->connection_type==UD) {
+ mr_create.length = (size + 40 ) * 2;
+ } else {
+ mr_create.length = size * 2;
+ }
+
+ mr_create.vaddr = ctx->buf;
+ mr_create.access_ctrl = IB_AC_RDMA_WRITE| IB_AC_LOCAL_WRITE;
+
+ ib_status = ib_reg_mem(ctx->pd ,&mr_create ,&ctx->lkey ,&ctx->rkey ,&ctx->mr);
+ if (ib_status != IB_SUCCESS) {
+ fprintf(stderr, "Couldn't allocate MR\n");
+ return NULL;
+ }
+
+ cq_create.size = user_parm->tx_depth*2;
+ cq_create.h_wait_obj = NULL;
+ cq_create.pfn_comp_cb = pp_cq_comp_cb;
+ ib_status = ib_create_cq(ctx->ca,&cq_create ,ctx, NULL, &ctx->scq);
+ if (ib_status != IB_SUCCESS) {
+ fprintf(stderr, "Couldn't create CQ\n");
+ return NULL;
+ }
+ }
+
+ {
+ ib_qp_create_t qp_create;
+ memset(&qp_create, 0, sizeof(ib_qp_create_t));
+ qp_create.h_sq_cq = ctx->scq;
+ qp_create.h_rq_cq = ctx->scq;
+ qp_create.sq_depth = user_parm->tx_depth;
+ qp_create.rq_depth = user_parm->tx_depth;
+ qp_create.sq_sge = 1;
+ qp_create.rq_sge = 1;
+ //TODO MAX_INLINE
+
+ switch (user_parm->connection_type) {
+ case RC :
+ qp_create.qp_type= IB_QPT_RELIABLE_CONN;
+ break;
+ case UC :
+ qp_create.qp_type = IB_QPT_UNRELIABLE_CONN;
+ break;
+ case UD :
+ qp_create.qp_type = IB_QPT_UNRELIABLE_DGRM;
+ break;
+ default:
+ fprintf(stderr, "Unknown connection type %d \n",user_parm->connection_type);
+ return NULL;
+ }
+
+ qp_create.sq_signaled = FALSE;
+ /*attr.sq_sig_all = 0;*/
+
+ ib_status = ib_create_qp(ctx->pd, &qp_create,NULL,NULL,&ctx->qp[0]);
+ if (ib_status != IB_SUCCESS){
+ fprintf(stderr, "Couldn't create QP\n");
+ return NULL;
+ }
+ }
+
+ {
+ ib_qp_mod_t qp_modify;
+ ib_qp_attr_t qp_attr;
+ memset(&qp_modify, 0, sizeof(ib_qp_mod_t));
+ qp_modify.req_state = IB_QPS_INIT;
+ qp_modify.state.init.pkey_index = 0 ;
+ qp_modify.state.init.primary_port = (uint8_t)port;
+ if (user_parm->connection_type==UD) {
+ qp_modify.state.init.qkey = 0x11111111;
+ } else {
+ qp_modify.state.init.access_ctrl = IB_AC_RDMA_WRITE | IB_AC_LOCAL_WRITE;
+ }
+
+ ib_status = ib_modify_qp(ctx->qp[0], &qp_modify);
+ if (ib_status != IB_SUCCESS){
+ fprintf(stderr, "Failed to modify QP to INIT\n");
+ return NULL;
+ }
+
+
+ memset(&qp_attr, 0, sizeof(ib_qp_attr_t));
+ ib_status = ib_query_qp(ctx->qp[0], &ctx->qp_attr[0]);
+ if (ib_status != IB_SUCCESS){
+ fprintf(stderr, "Failed to modify QP to INIT\n");
+ return NULL;
+ }
+ fprintf(stderr, "max inline size %d\n",ctx->qp_attr[0].sq_max_inline);
+
+ }
+ return ctx;
+
+}
+
+static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
+ struct pingpong_dest *dest, struct user_parameters *user_parm,int index)
+{
+
+ ib_api_status_t ib_status;
+ ib_qp_mod_t attr;
+ memset(&attr, 0, sizeof(ib_qp_mod_t));
+
+ attr.req_state = IB_QPS_RTR;
+ switch (user_parm->mtu) {
+ case 256 :
+ attr.state.rtr.primary_av.conn.path_mtu = IB_MTU_256;
+ break;
+ case 512 :
+ attr.state.rtr.primary_av.conn.path_mtu = IB_MTU_512;
+ break;
+ case 1024 :
+ attr.state.rtr.primary_av.conn.path_mtu = IB_MTU_1024;
+ break;
+ case 2048 :
+ attr.state.rtr.primary_av.conn.path_mtu = IB_MTU_2048;
+ break;
+ }
+ printf("Mtu : %d\n", user_parm->mtu);
+ attr.state.rtr.dest_qp = (dest->qpn);
+ attr.state.rtr.rq_psn = (dest->psn);
+ if (user_parm->connection_type==RC) {
+ attr.state.rtr.resp_res = 1;
+ attr.state.rtr.rnr_nak_timeout = 12;
+ }
+ attr.state.rtr.primary_av.grh_valid = 0;
+ attr.state.rtr.primary_av.dlid = dest->lid;
+ attr.state.rtr.primary_av.sl = 0;
+ attr.state.rtr.primary_av.path_bits = 0;
+ attr.state.rtr.primary_av.port_num = (uint8_t)port;
+ attr.state.rtr.primary_av.static_rate = IB_PATH_RECORD_RATE_10_GBS;
+ attr.state.rtr.opts = IB_MOD_QP_LOCAL_ACK_TIMEOUT |
+ IB_MOD_QP_RESP_RES |
+ IB_MOD_QP_PRIMARY_AV;
+
+
+ ib_status = ib_modify_qp(ctx->qp[0], &attr);
+ if(ib_status != IB_SUCCESS){
+ fprintf(stderr, "Failed to modify UC QP to RTR\n");
+ return 1;
+ }
+
+ if (user_parm->connection_type == UD) {
+ ib_av_attr_t av_attr;
+
+ av_attr.grh_valid = 0;
+ av_attr.dlid = dest->lid;
+ av_attr.sl = 0;
+ av_attr.path_bits = 0;
+ av_attr.port_num = (uint8_t)port;
+ av_attr.static_rate = IB_PATH_RECORD_RATE_10_GBS;
+
+ ib_status = ib_create_av(ctx->pd,&av_attr, &ctx->av);
+ if (ib_status != IB_SUCCESS) {
+ fprintf(stderr, "Failed to create AH for UD\n");
+ return 1;
+ }
+ }
+
+
+ memset(&attr, 0, sizeof(ib_qp_mod_t));
+ attr.req_state = IB_QPS_RTS;
+ attr.state.rts.sq_psn = my_psn;
+
+ if (user_parm->connection_type == RC) {
+ attr.state.rts.resp_res = 1;
+ attr.state.rts.local_ack_timeout = 14;
+ attr.state.rts.retry_cnt = 7;
+ attr.state.rts.rnr_retry_cnt = 7;
+ attr.state.rts.opts = IB_MOD_QP_RNR_RETRY_CNT |
+ IB_MOD_QP_RETRY_CNT |
+ IB_MOD_QP_LOCAL_ACK_TIMEOUT;
+
+ }
+ ib_status = ib_modify_qp(ctx->qp[index], &attr);
+ if(ib_status != IB_SUCCESS){
+ fprintf(stderr, "Failed to modify UC QP to RTS\n");
+ return 1;
+ }
+
+
+
+ /* post recieve max msg size*/
+ {
+ int i;
+ ib_recv_wr_t *bad_wr_recv;
+ //recieve
+ ctx->rwr.wr_id = PINGPONG_RECV_WRID;
+ ctx->rwr.ds_array = &ctx->recv_list;
+ ctx->rwr.num_ds = 1;
+ ctx->rwr.p_next = NULL;
+ ctx->recv_list.vaddr = (uintptr_t) ctx->buf;
+ if (user_parm->connection_type==UD) {
+ ctx->recv_list.length = ctx->size + 40;
+ } else {
+ ctx->recv_list.length = ctx->size;
+ }
+ ctx->recv_list.lkey = ctx->lkey;
+ for (i = 0; i < user_parm->tx_depth; ++i) {
+ ib_status = ib_post_recv(ctx->qp[index], &ctx->rwr, &bad_wr_recv);
+ if (ib_status != IB_SUCCESS)
+ {
+ fprintf(stderr, "Couldn't post recv: counter=%d\n", i);
+ return 14;
+ }
+ }
+ }
+ post_recv = user_parm->tx_depth;
+
+ return 0;
+}
+
+static SOCKET pp_open_port(struct pingpong_context *ctx, const char * servername,
+ int ib_port, int port, struct pingpong_dest **p_rem_dest,struct user_parameters *user_parm)
+{
+ struct pingpong_dest *my_dest;
+ struct pingpong_dest *rem_dest;
+ SOCKET sockfd;
+ int rc;
+ int i;
+ int numofqps = 1;
+
+ /* Create connection between client and server.
+ * We do it by exchanging data over a TCP socket connection. */
+
+
+ my_dest = malloc( sizeof (struct pingpong_dest) * numofqps);
+ if (!my_dest){
+ perror("malloc");
+ return INVALID_SOCKET;
+ }
+
+ rem_dest = malloc(sizeof (struct pingpong_dest) * numofqps );
+ if (!rem_dest){
+ perror("malloc");
+ return INVALID_SOCKET;
+ }
+
+ sockfd = servername ? pp_client_connect(servername, port) :
+ pp_server_connect(port);
+
+ if (sockfd == INVALID_SOCKET) {
+ printf("pp_connect_sock(%s,%d) failed (%d)!\n",
+ servername, port, sockfd);
+ return INVALID_SOCKET;
+ }
+
+
+ for (i =0 ;i<numofqps;i ++)
+ {
+ /* Create connection between client and server.
+ * We do it by exchanging data over a TCP socket connection. */
+
+ my_dest[i].lid = ctx->ca_attr->p_port_attr[ib_port-1].lid;
+ my_dest[i].psn = rand() & 0xffffff;
+ if (!my_dest[i].lid) {
+ fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n");
+ return 1;
+ }
+ my_dest[i].qpn = ctx->qp_attr[i].num;
+ /* TBD this should be changed inot VA and different key to each qp */
+ my_dest[i].rkey = ctx->rkey;
+ my_dest[i].vaddr = (uintptr_t)ctx->buf + ctx->size;
+
+ printf(" local address: LID %#04x, QPN %#06x, PSN %#06x "
+ "RKey %#08x VAddr %#016Lx\n",
+ my_dest[i].lid, my_dest[i].qpn, my_dest[i].psn,
+ my_dest[i].rkey, my_dest[i].vaddr);
+
+ rc = servername ? pp_client_exch_dest(sockfd, &my_dest[i],&rem_dest[i]):
+ pp_server_exch_dest(sockfd, &my_dest[i],&rem_dest[i]);
+ if (rc)
+ return INVALID_SOCKET;
+ printf(" remote address: LID %#04x, QPN %#06x, PSN %#06x, "
+ "RKey %#08x VAddr %#016Lx\n",
+ rem_dest[i].lid, rem_dest[i].qpn, rem_dest[i].psn,
+ rem_dest[i].rkey, rem_dest[i].vaddr);
+
+ if (pp_connect_ctx(ctx, ib_port, my_dest[i].psn, &rem_dest[i], user_parm, i))
+ return INVALID_SOCKET;
+ /* An additional handshake is required *after* moving qp to RTR.
+ Arbitrarily reuse exch_dest for this purpose. */
+ rc = servername ? pp_client_exch_dest(sockfd, &my_dest[i],&rem_dest[i]):
+ pp_server_exch_dest(sockfd, &my_dest[i],&rem_dest[i]);
+ if (rc)
+ return INVALID_SOCKET;
+ }
+ *p_rem_dest = rem_dest;
+ return sockfd;
+}
+
+
+static void usage(const char *argv0)
+{
+ printf("Usage:\n");
+ printf(" %s start a server and wait for connection\n", argv0);
+ printf(" %s <host> connect to server at <host>\n", argv0);
+ printf("\n");
+ printf("Options:\n");
+ printf(" -p, --port=<port> listen on/connect to port <port> (default 18515)\n");
+ printf(" -d, --ib-dev=<dev> use IB device <dev> (default first device found)\n");
+ printf(" -i, --ib-port=<port> use port <port> of IB device (default 1)\n");
+ printf(" -c, --connection=<RC/UC> connection type RC/UC/UD (default RC)\n");
+ printf(" -m, --mtu=<mtu> mtu size (default 1024)\n");
+ printf(" -s, --size=<size> size of message to exchange (default 65536)\n");
+ printf(" -a, --all Run sizes from 2 till 2^23\n");
+ printf(" -t, --tx-depth=<dep> size of tx queue (default 300)\n");
+ printf(" -n, --iters=<iters> number of exchanges (at least 2, default 1000)\n");
+ printf(" -b, --bidirectional measure bidirectional bandwidth (default unidirectional)\n");
+ printf(" -V, --version display version number\n");
+ printf(" -e, --events sleep on CQ events (default poll)\n");
+}
+
+static void print_report(unsigned int iters, unsigned size, int duplex,
+ cycles_t *tposted, cycles_t *tcompleted)
+{
+ double cycles_to_units;
+ uint64_t tsize; /* Transferred size, in megabytes */
+ int i, j;
+ int opt_posted = 0, opt_completed = 0;
+ cycles_t opt_delta;
+ cycles_t t;
+
+
+ opt_delta = tcompleted[opt_posted] - tposted[opt_completed];
+
+ /* Find the peak bandwidth */
+ for (i = 0; i < (int)iters; ++i)
+ for (j = i; j < (int)iters; ++j) {
+ t = (tcompleted[j] - tposted[i]) / (j - i + 1);
+ if (t < opt_delta) {
+ opt_delta = t;
+ opt_posted = i;
+ opt_completed = j;
+ }
+ }
+
+ cycles_to_units = get_cpu_mhz();
+
+ tsize = duplex ? 2 : 1;
+ tsize = tsize * size;
+ printf("%7d %d %7.2f %7.2f \n",
+ size,iters,tsize * cycles_to_units / opt_delta / 0x100000,
+ (uint64_t)tsize * iters * cycles_to_units /(tcompleted[iters - 1] - tposted[0]) / 0x100000);
+}
+
+
+int run_iter_bi(struct pingpong_context *ctx, struct user_parameters *user_param,
+ struct pingpong_dest *rem_dest, int size)
+{
+
+ ib_qp_handle_t qp;
+ int scnt, ccnt, rcnt;
+ ib_recv_wr_t *bad_wr_recv;
+ ib_api_status_t ib_status;
+
+ if (user_param->connection_type==UD) {
+ if (size > 2048) {
+ size = 2048;
+ }
+ }
+ /*********************************************
+ * Important note :
+ * In case of UD/UC this is NOT the way to measure
+ * BW sicen we are running with loop on the send side
+ * while we should run on the recieve side or enable retry in SW
+ * Since the sender may be faster than the reciver than although
+ * we had posted recieve it is not enough and might end this will
+ * result in deadlock of test since both sides are stuck on poll cq
+ * In this test i do not solve this for the general test ,need to write
+ * seperate test for UC/UD but in case the tx_depth is ~1/3 from the
+ * number of iterations this should be ok .
+ * Also note that the sender is limited in the number of send, ans
+ * i try to make the reciver full
+ *********************************************/
+ /* send */
+ if (user_param->connection_type==UD) {
+ ctx->list.vaddr = (uintptr_t) ctx->buf + 40;
+ ctx->wr.dgrm.ud.h_av = ctx->av;
+ ctx->wr.dgrm.ud.remote_qp = rem_dest->qpn;
+ ctx->wr.dgrm.ud.remote_qkey = 0x11111111;
+ } else {
+ ctx->list.vaddr = (uintptr_t) ctx->buf;
+ }
+ ctx->list.lkey = ctx->lkey;
+ ctx->wr.wr_id = PINGPONG_SEND_WRID;
+ ctx->wr.ds_array = &ctx->list;
+ ctx->wr.num_ds = 1;
+ ctx->wr.wr_type = WR_SEND;
+ ctx->wr.p_next = NULL;
+
+ if ((uint32_t)size > ctx->qp_attr[0].sq_max_inline) { /*complaince to perf_main */
+ ctx->wr.send_opt = IB_SEND_OPT_SIGNALED;
+ } else {
+ ctx->wr.send_opt = IB_SEND_OPT_SIGNALED | IB_SEND_OPT_INLINE;
+ }
+
+ ctx->list.length = size;
+ scnt = 0;
+ ccnt = 0;
+ rcnt = 0;
+ qp = ctx->qp[0];
+
+ while (ccnt < user_param->iters || rcnt < user_param->iters ) {
+ while (scnt < user_param->iters && (scnt - ccnt) < user_param->tx_depth / 2) {
+ ib_send_wr_t *bad_wr;
+ tposted[scnt] = get_cycles();
+ ib_status = ib_post_send(qp, &ctx->wr, &bad_wr);
+ if (ib_status != IB_SUCCESS) {
+ fprintf(stderr, "Couldn't post send: scnt=%d ib_status %d\n",
+ scnt,ib_status);
+ return 1;
+ }
+ ++scnt;
+//printf("scnt = %d \n",scnt);
+ }
+
+ {
+ ib_wc_t wc;
+ ib_wc_t *p_wc_done,*p_wc_free;
+
+
+ p_wc_free = &wc;
+ p_wc_free->p_next = NULL;
+ p_wc_done = NULL;
+#if PORTED
+ if (user_param->use_event) {
+ struct ibv_cq *ev_cq;
+ void *ev_ctx;
+ if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {
+ fprintf(stderr, "Failed to get cq_event\n");
+ return 1;
+ }
+ if (ev_cq != ctx->cq) {
+ fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
+ return 1;
+ }
+ if (ibv_req_notify_cq(ctx->cq, 0)) {
+ fprintf(stderr, "Couldn't request CQ notification\n");
+ return 1;
+ }
+ }
+#endif
+ do {
+ ib_status = ib_poll_cq(ctx->scq, &p_wc_free, &p_wc_done);
+ if (ib_status == IB_SUCCESS ) {
+ if (p_wc_done->status != IB_WCS_SUCCESS) {
+ fprintf(stderr, "Completion wth error at %s:\n",
+ user_param->servername ? "client" : "server");
+ fprintf(stderr, "Failed status %d: wr_id %d syndrom 0x%x\n",
+ p_wc_done->status, (int) p_wc_done->wr_id, p_wc_done->vendor_specific);
+ fprintf(stderr, "scnt=%d, ccnt=%d\n",
+ scnt, ccnt);
+ return 1;
+ }
+ switch ((int) p_wc_done->wr_id) {
+ case PINGPONG_SEND_WRID:
+ tcompleted[ccnt] = get_cycles();
+ ++ccnt;
+ break;
+ case PINGPONG_RECV_WRID:
+ if (--post_recv <= user_param->tx_depth - 2) {
+ while (rcnt < user_param->iters && (user_param->tx_depth - post_recv) > 0 ) {
+ post_recv++;
+ ib_status = ib_post_recv(ctx->qp[0], &ctx->rwr, &bad_wr_recv);
+ if (ib_status != IB_SUCCESS)
+ {
+ fprintf(stderr, "Couldn't post recv: rcnt=%d\n",
+ rcnt);
+ return 15;
+ }
+ }
+ }
+ ++rcnt;
+ break;
+ default:
+ fprintf(stderr, "Completion for unknown wr_id %d\n",
+ (int) wc.wr_id);
+ break;
+ }
+ p_wc_free = p_wc_done;
+ p_wc_free->p_next = NULL;
+ p_wc_done = NULL;
+ }
+ } while (ib_status == IB_SUCCESS );
+
+ if (ib_status != IB_NOT_FOUND) {
+ fprintf(stderr, "poll CQ failed %d\n", ib_status);
+ return 1;
+ }
+//printf("ccnt = %d \n",ccnt);
+ }
+ }
+ return(0);
+}
+
+
+int run_iter_uni(struct pingpong_context *ctx, struct user_parameters *user_param,
+ struct pingpong_dest *rem_dest, int size)
+{
+
+ ib_qp_handle_t qp;
+ int scnt, ccnt, rcnt;
+ ib_recv_wr_t *bad_wr_recv;
+ ib_api_status_t ib_status;
+
+ if (user_param->connection_type==UD) {
+ if (size > 2048) {
+ size = 2048;
+ }
+ }
+
+ /* send */
+ if (user_param->connection_type==UD) {
+ ctx->list.vaddr = (uintptr_t) ctx->buf + 40;
+ ctx->wr.dgrm.ud.h_av = ctx->av;
+ ctx->wr.dgrm.ud.remote_qp = rem_dest->qpn;
+ ctx->wr.dgrm.ud.remote_qkey = 0x11111111;
+ } else {
+ ctx->list.vaddr = (uintptr_t) ctx->buf;
+ }
+ ctx->list.lkey = ctx->lkey;
+ ctx->wr.wr_id = PINGPONG_SEND_WRID;
+ ctx->wr.ds_array = &ctx->list;
+ ctx->wr.num_ds = 1;
+ ctx->wr.wr_type = WR_SEND;
+ ctx->wr.p_next = NULL;
+
+
+ if ((uint32_t)size > ctx->qp_attr[0].sq_max_inline) { /*complaince to perf_main */
+ ctx->wr.send_opt = IB_SEND_OPT_SIGNALED;
+ } else {
+ ctx->wr.send_opt = IB_SEND_OPT_SIGNALED | IB_SEND_OPT_INLINE;
+ }
+ ctx->list.length = size;
+
+ scnt = 0;
+ ccnt = 0;
+ rcnt = 0;
+ qp = ctx->qp[0];
+ if (!user_param->servername) {
+ while (rcnt < user_param->iters) {
+ ib_wc_t wc;
+ ib_wc_t *p_wc_done,*p_wc_free;
+
+ p_wc_free = &wc;
+ p_wc_done = NULL;
+ p_wc_free->p_next = NULL;
+
+ /*Server is polling on recieve first */
+#if PORTED
+ if (user_param->use_event) {
+ struct ibv_cq *ev_cq;
+ void *ev_ctx;
+ if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {
+ fprintf(stderr, "Failed to get cq_event\n");
+ return 1;
+ }
+ if (ev_cq != ctx->cq) {
+ fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
+ return 1;
+ }
+ if (ibv_req_notify_cq(ctx->cq, 0)) {
+ fprintf(stderr, "Couldn't request CQ notification\n");
+ return 1;
+ }
+ }
+#endif
+ do {
+ ib_status = ib_poll_cq(ctx->scq, &p_wc_free, &p_wc_done);
+ if (ib_status == IB_SUCCESS) {
+ tcompleted[ccnt] = get_cycles();
+ if (p_wc_done->status != IB_WCS_SUCCESS) {
+ fprintf(stderr, "Completion wth error at %s:\n",
+ user_param->servername ? "client" : "server");
+ fprintf(stderr, "Failed status %d: wr_id %d syndrom 0x%x\n",
+ p_wc_done->status, (int) p_wc_done->wr_id, p_wc_done->vendor_specific);
+ fprintf(stderr, "scnt=%d, ccnt=%d\n",
+ scnt, ccnt);
+ return 1;
+ }
+ ++rcnt;
+ ib_status = ib_post_recv(ctx->qp[0], &ctx->rwr, &bad_wr_recv);
+ if (ib_status != IB_SUCCESS)
+ {
+ fprintf(stderr, "Couldn't post recv: rcnt=%d\n",
+ rcnt);
+ return 15;
+ }
+//printf("rcnt = %d \n",rcnt);
+ p_wc_free = p_wc_done;
+ p_wc_free->p_next = NULL;
+ p_wc_done = NULL;
+ }
+
+
+ } while (ib_status == IB_SUCCESS);
+ if (ib_status != IB_NOT_FOUND) {
+ fprintf(stderr, "Poll Recieve CQ failed %d\n", ib_status);
+ return 12;
+ }
+
+ }
+ } else {
+ /* client is posting and not receiving. */
+ while (scnt < user_param->iters || ccnt < user_param->iters) {
+ while (scnt < user_param->iters && (scnt - ccnt) < user_param->tx_depth ) {
+ ib_send_wr_t *bad_wr;
+ tposted[scnt] = get_cycles();
+ ib_status = ib_post_send(qp, &ctx->wr, &bad_wr);
+ if (ib_status != IB_SUCCESS) {
+ fprintf(stderr, "Couldn't post send: scnt=%d ib_status %d\n",
+ scnt,ib_status);
+ return 1;
+ }
+ ++scnt;
+//printf("scnt = %d \n",scnt);
+ }
+ if (ccnt < user_param->iters) {
+ ib_wc_t wc;
+ ib_wc_t *p_wc_done,*p_wc_free;
+
+
+ p_wc_free = &wc;
+ p_wc_free->p_next = NULL;
+ p_wc_done = NULL;
+
+#if PORTED
+ if (user_param->use_event) {
+ struct ibv_cq *ev_cq;
+ void *ev_ctx;
+ if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {
+ fprintf(stderr, "Failed to get cq_event\n");
+ return 1;
+ }
+ if (ev_cq != ctx->cq) {
+ fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
+ return 1;
+ }
+ if (ibv_req_notify_cq(ctx->cq, 0)) {
+ fprintf(stderr, "Couldn't request CQ notification\n");
+ return 1;
+ }
+ }
+#endif
+ do {
+ ib_status = ib_poll_cq(ctx->scq, &p_wc_free, &p_wc_done);
+ if (ib_status == IB_SUCCESS ) {
+ tcompleted[ccnt] = get_cycles();
+ if (p_wc_done->status != IB_WCS_SUCCESS) {
+ fprintf(stderr, "Completion wth error at %s:\n",
+ user_param->servername ? "client" : "server");
+ fprintf(stderr, "Failed status %d: wr_id %d syndrom 0x%x\n",
+ p_wc_done->status, (int) p_wc_done->wr_id, p_wc_done->vendor_specific);
+ fprintf(stderr, "scnt=%d, ccnt=%d\n",
+ scnt, ccnt);
+ return 1;
+ }
+ ccnt ++;
+ p_wc_free = p_wc_done;
+ p_wc_free->p_next = NULL;
+ p_wc_done = NULL;
+ }
+ } while (ib_status == IB_SUCCESS );
+ if (ib_status != IB_NOT_FOUND) {
+ fprintf(stderr, "poll CQ failed %d\n", ib_status);
+ return 1;
+ }
+//printf("ccnt = %d \n",ccnt);
+ }
+ }
+ }
+ return(0);
+}
+
+
+int __cdecl main(int argc, char *argv[])
+{
+ struct pingpong_context *ctx;
+ struct pingpong_dest my_dest;
+ struct pingpong_dest *rem_dest;
+ struct user_parameters user_param;
+ char *ib_devname = NULL;
+ int port = 18515;
+ int ib_port = 1;
+ unsigned size = 65536;
+ SOCKET sockfd = INVALID_SOCKET;
+ int i = 0;
+ int size_max_pow = 24;
+ WSADATA wsaData;
+ int iResult;
+
+
+
+ /* init default values to user's parameters */
+ memset(&user_param, 0, sizeof(struct user_parameters));
+ user_param.mtu = 0;
+ user_param.iters = 1000;
+ user_param.tx_depth = 300;
+ user_param.servername = NULL;
+ user_param.use_event = 0;
+ user_param.duplex = 0;
+ /* Parameter parsing. */
+ while (1) {
+ int c;
+
+ static struct option long_options[] = {
+ { "port", 1, NULL, 'p' },
+ { "ib-dev", 1, NULL, 'd' },
+ { "ib-port", 1, NULL, 'i' },
+ { "mtu", 1, NULL, 'm' },
+ { "connection", 1, NULL, 'c' },
+ { "size", 1, NULL, 's' },
+ { "iters", 1, NULL, 'n' },
+ { "tx-depth", 1, NULL, 't' },
+ { "all", 0, NULL, 'a' },
+ { "bidirectional", 0, NULL, 'b' },
+ { "version", 0, NULL, 'V' },
+ { "events", 0, NULL, 'e' },
+ { 0 }
+ };
+
+ c = getopt_long(argc, argv, "p:d:i:m:c:s:n:t:ebaV", long_options, NULL);
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'p':
+ port = strtol(optarg, NULL, 0);
+ if (port < 0 || port > 65535) {
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+ case 'e':
+ ++user_param.use_event;
+ break;
+ case 'd':
+ ib_devname = _strdup(optarg);
+ break;
+ case 'c':
+ if (strcmp("UC",optarg)==0)
+ user_param.connection_type=UC;
+ if (strcmp("UD",optarg)==0)
+ user_param.connection_type=UD;
+ break;
+
+ case 'm':
+ user_param.mtu = strtol(optarg, NULL, 0);
+ break;
+ case 'a':
+ user_param.all = ALL;
+ break;
+ case 'V':
+ printf("send_bw version : %.2f\n",VERSION);
+ return 0;
+ break;
+ case 'i':
+ ib_port = strtol(optarg, NULL, 0);
+ if (ib_port < 0) {
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+
+ case 's':
+ size = (unsigned)_strtoui64(optarg, NULL, 0);
+ if (size < 1 || size > UINT_MAX / 2) {
+ usage(argv[0]);
+ return 1;
+ }
+
+ break;
+
+ case 't':
+ user_param.tx_depth = strtol(optarg, NULL, 0);
+ if (user_param.tx_depth < 1) { usage(argv[0]); return 1; }
+ break;
+
+ case 'n':
+ user_param.iters = strtol(optarg, NULL, 0);
+ if (user_param.iters < 2) {
+ usage(argv[0]);
+ return 1;
+ }
+
+ break;
+
+ case 'b':
+ user_param.duplex = 1;
+ break;
+
+ default:
+ usage(argv[0]);
+ return 1;
+ }
+ }
+
+ if (optind == argc - 1)
+ user_param.servername = _strdup(argv[optind]);
+ else if (optind < argc) {
+ usage(argv[0]);
+ return 1;
+ }
+ printf("------------------------------------------------------------------\n");
+ if (user_param.duplex == 1)
+ printf(" Send Bidirectional BW Test\n");
+ else
+ printf(" Send BW Test\n");
+
+ printf("Inline data is used up to 400 bytes message\n");
+ if (user_param.connection_type==RC) {
+ printf("Connection type : RC\n");
+ } else if (user_param.connection_type==UC) {
+ printf("Connection type : UC\n");
+ } else {
+ printf("Connection type : UD\n");
+ }
+
+ /* Done with parameter parsing. Perform setup. */
+
+ // Initialize Winsock
+ iResult = WSAStartup(MAKEWORD(2,2), &wsaData);
+ if (iResult != NO_ERROR) {
+ printf("Error at WSAStartup()\n");
+ return 1;
+ }
+
+ if (user_param.all == ALL) {
+ /*since we run all sizes */
+ size = 8388608; /*2^23 */
+ } else {
+ if (user_param.connection_type==UD) {
+ if (size > 2048) {
+ printf("Max msg size in UD is 2048 changing to 2048\n");
+ size = 2048;
+ }
+ }
+ }
+
+ srand(GetCurrentProcessId() * GetTickCount());
+
+ //TODO: get pagesize from sysinfo
+ page_size = 4096;
+
+ //TODO:get the device names
+
+
+ // init the context
+ ctx = pp_init_ctx(size, ib_port, &user_param);
+ if (!ctx)
+ return 1;
+
+ sockfd = pp_open_port(ctx, user_param.servername, ib_port, port,&rem_dest,&user_param);
+ if (sockfd == INVALID_SOCKET)
+ return 9;
+
+#if PORTED
+ if (user_param.use_event) {
+ printf("Test with events.\n");
+ if (ibv_req_notify_cq(ctx->cq, 0)) {
+ fprintf(stderr, "Couldn't request CQ notification\n");
+ return 1;
+ }
+ }
+#endif
+ printf("------------------------------------------------------------------\n");
+ printf(" #bytes #iterations BW peak[MB/sec] BW average[MB/sec] \n");
+
+ tposted = malloc(user_param.iters * sizeof *tposted);
+
+ if (!tposted) {
+ perror("malloc");
+ return 1;
+ }
+
+ tcompleted = malloc(user_param.iters * sizeof *tcompleted);
+
+ if (!tcompleted) {
+ perror("malloc");
+ return 1;
+ }
+
+
+ if (user_param.all == ALL) {
+ if (user_param.connection_type==UD) {
+ size_max_pow = 12;
+ }
+ for (i = 1; i < size_max_pow ; ++i) {
+ size = 1 << i;
+ if (user_param.duplex) {
+ if(run_iter_bi(ctx, &user_param, rem_dest, size))
+ return 17;
+ } else {
+ if(run_iter_uni(ctx, &user_param, rem_dest, size))
+ return 17;
+ }
+ if (user_param.servername) {
+ print_report(user_param.iters, size, user_param.duplex, tposted, tcompleted);
+ /* sync again for the sake of UC/UC */
+ pp_client_exch_dest(sockfd, &my_dest,rem_dest);
+ } else {
+ pp_server_exch_dest(sockfd, &my_dest,rem_dest);
+ }
+
+ }
+ } else {
+ if (user_param.duplex) {
+ if(run_iter_bi(ctx, &user_param,rem_dest, size))
+ return 18;
+ } else {
+ if(run_iter_uni(ctx, &user_param,rem_dest, size))
+ return 18;
+ }
+ if (user_param.servername) {
+ print_report(user_param.iters, size, user_param.duplex, tposted, tcompleted);
+ }
+ }
+
+ /* close sockets */
+ if (user_param.servername) {
+ pp_client_exch_dest(sockfd, &my_dest,&rem_dest[0]);
+ } else {
+ pp_server_exch_dest(sockfd, &my_dest,&rem_dest[0]);
+ }
+
+ send(sockfd, "done", sizeof "done",0);
+ closesocket(sockfd);
+
+ free(tposted);
+ free(tcompleted);
+
+ printf("------------------------------------------------------------------\n");
+ goto end;
+
+
+end:
+ WSACleanup();
+ return 0;
+}
--- /dev/null
+/*\r
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.\r
+ *\r
+ * This software is available to you under the OpenIB.org BSD license\r
+ * below:\r
+ *\r
+ * Redistribution and use in source and binary forms, with or\r
+ * without modification, are permitted provided that the following\r
+ * conditions are met:\r
+ *\r
+ * - Redistributions of source code must retain the above\r
+ * copyright notice, this list of conditions and the following\r
+ * disclaimer.\r
+ *\r
+ * - Redistributions in binary form must reproduce the above\r
+ * copyright notice, this list of conditions and the following\r
+ * disclaimer in the documentation and/or other materials\r
+ * provided with the distribution.\r
+ *\r
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\r
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
+ * SOFTWARE.\r
+ *\r
+ * $Id: vstat.rc 636 2005-10-19 17:46:55Z sleybo $\r
+ */\r
+\r
+\r
+#include <oib_ver.h>\r
+\r
+#define VER_FILETYPE VFT_APP\r
+#define VER_FILESUBTYPE VFT2_UNKNOWN\r
+\r
+#ifdef _DEBUG_\r
+#define VER_FILEDESCRIPTION_STR "Send/Recv Bandwidth Test (Debug)"\r
+#else\r
+#define VER_FILEDESCRIPTION_STR "Send/Recv Bandwidth Test "\r
+#endif\r
+\r
+#define VER_INTERNALNAME_STR "ib_send_bw.exe"\r
+#define VER_ORIGINALFILENAME_STR "ib_send_bw.exe"\r
+\r
+#include <common.ver>\r
--- /dev/null
+TARGETNAME=ib_send_lat\r
+TARGETPATH=..\..\..\..\bin\user\obj$(BUILD_ALT_DIR)\r
+TARGETTYPE=PROGRAM\r
+UMTYPE=console\r
+USE_CRTDLL=1\r
+\r
+C_DEFINES=$(C_DEFINES) /D__WIN__ \r
+\r
+SOURCES=send_lat.rc \\r
+ ..\getopt.c \\r
+ ..\perf_utils.c \\r
+ send_lat.c \r
+\r
+INCLUDES=..;..\..\..\..\inc;..\..\..\..\inc\user\r
+\r
+RCOPTIONS=/I..\..\win\include\r
+\r
+TARGETLIBS= \\r
+ $(DDK_LIB_PATH)\Ws2_32.lib \\r
+!if $(FREEBUILD)\r
+ $(TARGETPATH)\*\complib.lib \\r
+ $(TARGETPATH)\*\ibal.lib\r
+!else\r
+ $(TARGETPATH)\*\complibd.lib \\r
+ $(TARGETPATH)\*\ibald.lib\r
+!endif\r
+\r
+MSC_WARNING_LEVEL= /W3\r
--- /dev/null
+#\r
+# DO NOT EDIT THIS FILE!!! Edit .\sources. if you want to add a new source\r
+# file to this component. This file merely indirects to the real make file\r
+# that is shared by all the driver components of the OpenIB Windows project.\r
+#\r
+\r
+!INCLUDE ..\..\..\..\inc\openib.def\r
--- /dev/null
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2005 Hewlett Packard, Inc (Grant Grundler)
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#include "getopt.h"
+#include "get_clock.h"
+#include "perf_defs.h"
+
+
+#define SIGNAL 1
+#define MAX_INLINE 400
+
+
+static int page_size;
+cycles_t *tstamp;
+
+struct user_parameters {
+ const char *servername;
+ int connection_type;
+ int mtu;
+ int signal_comp;
+ int all; /* run all msg size */
+ int iters;
+ int tx_depth;
+ int use_event;
+};
+
+struct report_options {
+ int unsorted;
+ int histogram;
+ int cycles; /* report delta's in cycles, not microsec's */
+};
+
+
+
+void
+pp_cq_comp_cb(
+ IN const ib_cq_handle_t h_cq,
+ IN void *cq_context )
+{
+ UNUSED_PARAM( h_cq );
+ UNUSED_PARAM( cq_context);
+ return ;
+}
+
+
+static struct pingpong_context *pp_init_ctx(unsigned int size,int port,struct user_parameters *user_parm) {
+
+ struct pingpong_context *ctx;
+ ib_api_status_t ib_status = IB_SUCCESS;
+ size_t guid_count;
+ ib_net64_t *ca_guid_array;
+
+
+
+ ctx = malloc(sizeof *ctx);
+ if (!ctx)
+ return NULL;
+
+ ctx->qp = malloc(sizeof (ib_qp_handle_t));
+ if (!ctx->qp) {
+ perror("malloc");
+ return NULL;
+ }
+
+ ctx->qp_attr = malloc(sizeof (ib_qp_attr_t));
+ if (!ctx->qp_attr) {
+ perror("malloc");
+ return NULL;
+ }
+
+ ctx->size = size;
+ ctx->tx_depth = user_parm->tx_depth;
+ /* in case of UD need space for the GRH */
+ if (user_parm->connection_type==UD) {
+ ctx->buf = malloc(( size + 40 ) * 2); //PORTED ALINGED
+ if (!ctx->buf) {
+ fprintf(stderr, "Couldn't allocate work buf.\n");
+ return NULL;
+ }
+ memset(ctx->buf, 0, ( size + 40 ) * 2);
+ } else {
+ ctx->buf = malloc( size * 2); //PORTED ALINGED
+ if (!ctx->buf) {
+ fprintf(stderr, "Couldn't allocate work buf.\n");
+ return NULL;
+ }
+ memset(ctx->buf, 0, size * 2);
+ }
+
+
+ ctx->post_buf = (char*)ctx->buf + (size - 1);
+ ctx->poll_buf = (char*)ctx->buf + (2 * size - 1);
+
+ /*
+ * Open the AL instance
+ */
+ ib_status = ib_open_al(&ctx->al);
+ if(ib_status != IB_SUCCESS)
+ {
+ fprintf(stderr,"ib_open_al failed status = %d\n", ib_status);
+ return NULL;
+ }
+
+ /*
+ * Get the Local CA Guids
+ */
+ ib_status = ib_get_ca_guids(ctx->al, NULL, &guid_count);
+ if(ib_status != IB_INSUFFICIENT_MEMORY)
+ {
+ fprintf(stderr,"ib_get_ca_guids1 failed status = %d\n", (uint32_t)ib_status);
+ return NULL;
+ }
+
+ /*
+ * If no CA's Present then return
+ */
+
+ if(guid_count == 0)
+ return NULL;
+
+
+ ca_guid_array = (ib_net64_t*)malloc(sizeof(ib_net64_t) * guid_count);
+
+ ib_status = ib_get_ca_guids(ctx->al, ca_guid_array, &guid_count);
+ if(ib_status != IB_SUCCESS)
+ {
+ fprintf(stderr,"ib_get_ca_guids2 failed with status = %d\n", ib_status);
+ return NULL;
+ }
+
+ /*
+ * Open only the first HCA
+ */
+ /* Open the CA */
+ ib_status = ib_open_ca(ctx->al ,ca_guid_array[0] ,NULL,
+ NULL, //ca_context
+ &ctx->ca);
+
+ if(ib_status != IB_SUCCESS)
+ {
+ fprintf(stderr,"ib_open_ca failed with status = %d\n", ib_status);
+ return NULL;
+ }
+
+ //xxx
+ //printf("ib_open_ca passed i=%d\n",i);
+ //xxx
+
+
+ {
+
+
+ /* Query the CA */
+ uint32_t bsize = 0;
+ ib_status = ib_query_ca(ctx->ca, NULL, &bsize);
+ if(ib_status != IB_INSUFFICIENT_MEMORY)
+ {
+ fprintf(stderr, "Failed to query device props");
+ return NULL;
+ }
+
+ ctx->ca_attr = (ib_ca_attr_t *)malloc(bsize);
+
+ ib_status = ib_query_ca(ctx->ca, ctx->ca_attr, &bsize);
+ if(ib_status != IB_SUCCESS)
+ {
+ printf("ib_query_ca failed with status = %d\n", ib_status);
+ return NULL;
+ }
+ if (user_parm->mtu == 0) {/*user did not ask for specific mtu */
+ if (ctx->ca_attr->dev_id == 23108) {
+ user_parm->mtu = 1024;
+ } else {
+ user_parm->mtu = 2048;
+ }
+ }
+ }
+ if (user_parm->use_event) {
+//PORTED ctx->channel = ibv_create_comp_channel(ctx->context);
+ ctx->channel = NULL;//remove when PORTED
+ if (!ctx->channel) {
+ fprintf(stderr, "Couldn't create completion channel\n");
+ return NULL;
+ }
+ } else
+ ctx->channel = NULL;
+
+ ib_status = ib_alloc_pd(ctx->ca ,
+ IB_PDT_NORMAL,
+ ctx, //pd_context
+ &ctx->pd);
+ if (ib_status != IB_SUCCESS) {
+ fprintf(stderr, "Couldn't allocate PD\n");
+ return NULL;
+ }
+
+
+ {
+ ib_mr_create_t mr_create;
+ ib_cq_create_t cq_create;
+ /* We dont really want IBV_ACCESS_LOCAL_WRITE, but IB spec says:
+ * The Consumer is not allowed to assign Remote Write or Remote Atomic to
+ * a Memory Region that has not been assigned Local Write. */
+ if (user_parm->connection_type==UD) {
+ mr_create.length = (size + 40 ) * 2;
+ } else {
+ mr_create.length = size * 2;
+ }
+
+ mr_create.vaddr = ctx->buf;
+ mr_create.access_ctrl = IB_AC_RDMA_WRITE| IB_AC_LOCAL_WRITE;
+
+ ib_status = ib_reg_mem(ctx->pd ,&mr_create ,&ctx->lkey ,&ctx->rkey ,&ctx->mr);
+ if (ib_status != IB_SUCCESS) {
+ fprintf(stderr, "Couldn't allocate MR\n");
+ return NULL;
+ }
+
+
+ cq_create.size = user_parm->tx_depth*2;
+ cq_create.h_wait_obj = NULL;
+ cq_create.pfn_comp_cb = pp_cq_comp_cb;
+ ib_status = ib_create_cq(ctx->ca,&cq_create ,ctx, NULL, &ctx->rcq);
+ if (ib_status != IB_SUCCESS) {
+ fprintf(stderr, "Couldn't create CQ\n");
+ return NULL;
+ }
+
+
+ cq_create.size = user_parm->tx_depth*2;
+ cq_create.h_wait_obj = NULL;
+ cq_create.pfn_comp_cb = pp_cq_comp_cb;
+ ib_status = ib_create_cq(ctx->ca,&cq_create ,ctx, NULL, &ctx->scq);
+ if (ib_status != IB_SUCCESS) {
+ fprintf(stderr, "Couldn't create CQ\n");
+ return NULL;
+ }
+ }
+
+ {
+ ib_qp_create_t qp_create;
+ memset(&qp_create, 0, sizeof(ib_qp_create_t));
+ qp_create.h_sq_cq = ctx->scq;
+ qp_create.h_rq_cq = ctx->rcq;
+ qp_create.sq_depth = user_parm->tx_depth;
+ qp_create.rq_depth = user_parm->tx_depth;
+ qp_create.sq_sge = 1;
+ qp_create.rq_sge = 1;
+ //TODO MAX_INLINE
+
+ switch (user_parm->connection_type) {
+ case RC :
+ qp_create.qp_type= IB_QPT_RELIABLE_CONN;
+ break;
+ case UC :
+ qp_create.qp_type = IB_QPT_UNRELIABLE_CONN;
+ break;
+ case UD :
+ qp_create.qp_type = IB_QPT_UNRELIABLE_DGRM;
+ break;
+ default:
+ fprintf(stderr, "Unknown connection type %d \n",user_parm->connection_type);
+ return NULL;
+ }
+
+ qp_create.sq_signaled = FALSE;
+ /*attr.sq_sig_all = 0;*/
+
+ ib_status = ib_create_qp(ctx->pd, &qp_create,NULL,NULL,&ctx->qp[0]);
+ if (ib_status != IB_SUCCESS){
+ fprintf(stderr, "Couldn't create QP\n");
+ return NULL;
+ }
+ }
+
+ {
+ ib_qp_mod_t qp_modify;
+ ib_qp_attr_t qp_attr;
+ memset(&qp_modify, 0, sizeof(ib_qp_mod_t));
+ qp_modify.req_state = IB_QPS_INIT;
+ qp_modify.state.init.pkey_index = 0 ;
+ qp_modify.state.init.primary_port = (uint8_t)port;
+ if (user_parm->connection_type==UD) {
+ qp_modify.state.init.qkey = 0x11111111;
+ } else {
+ qp_modify.state.init.access_ctrl = IB_AC_RDMA_WRITE | IB_AC_LOCAL_WRITE;
+ }
+
+ ib_status = ib_modify_qp(ctx->qp[0], &qp_modify);
+ if (ib_status != IB_SUCCESS){
+ fprintf(stderr, "Failed to modify QP to INIT\n");
+ return NULL;
+ }
+
+
+ memset(&qp_attr, 0, sizeof(ib_qp_attr_t));
+ ib_status = ib_query_qp(ctx->qp[0], &ctx->qp_attr[0]);
+ if (ib_status != IB_SUCCESS){
+ fprintf(stderr, "Failed to modify QP to INIT\n");
+ return NULL;
+ }
+ }
+
+
+ //send
+ ctx->wr.wr_id = PINGPONG_SEND_WRID;
+ ctx->wr.ds_array = &ctx->list;
+ ctx->wr.num_ds = 1;
+ ctx->wr.wr_type = WR_SEND;
+ ctx->wr.p_next = NULL;
+
+ //recieve
+ ctx->rwr.wr_id = PINGPONG_RECV_WRID;
+ ctx->rwr.ds_array = &ctx->recv_list;
+ ctx->rwr.num_ds = 1;
+ ctx->rwr.p_next = NULL;
+ return ctx;
+}
+
+static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
+ struct pingpong_dest *dest,struct user_parameters *user_parm,int index)
+{
+ ib_api_status_t ib_status;
+ ib_qp_mod_t attr;
+ memset(&attr, 0, sizeof(ib_qp_mod_t));
+
+ attr.req_state = IB_QPS_RTR;
+ switch (user_parm->mtu) {
+ case 256 :
+ attr.state.rtr.primary_av.conn.path_mtu = IB_MTU_256;
+ break;
+ case 512 :
+ attr.state.rtr.primary_av.conn.path_mtu = IB_MTU_512;
+ break;
+ case 1024 :
+ attr.state.rtr.primary_av.conn.path_mtu = IB_MTU_1024;
+ break;
+ case 2048 :
+ attr.state.rtr.primary_av.conn.path_mtu = IB_MTU_2048;
+ break;
+ }
+ printf("Mtu : %d\n", user_parm->mtu);
+ attr.state.rtr.dest_qp = (dest->qpn);
+ attr.state.rtr.rq_psn = (dest->psn);
+ if (user_parm->connection_type==RC) {
+ attr.state.rtr.resp_res = 1;
+ attr.state.rtr.rnr_nak_timeout = 12;
+ }
+ attr.state.rtr.primary_av.grh_valid = 0;
+ attr.state.rtr.primary_av.dlid = dest->lid;
+ attr.state.rtr.primary_av.sl = 0;
+ attr.state.rtr.primary_av.path_bits = 0;
+ attr.state.rtr.primary_av.port_num = (uint8_t)port;
+ attr.state.rtr.primary_av.static_rate = IB_PATH_RECORD_RATE_10_GBS;
+ attr.state.rtr.opts = IB_MOD_QP_LOCAL_ACK_TIMEOUT |
+ IB_MOD_QP_RESP_RES |
+ IB_MOD_QP_PRIMARY_AV;
+
+
+ ib_status = ib_modify_qp(ctx->qp[0], &attr);
+ if(ib_status != IB_SUCCESS){
+ fprintf(stderr, "Failed to modify UC QP to RTR\n");
+ return 1;
+ }
+
+
+ if (user_parm->connection_type == UD) {
+ ib_av_attr_t av_attr;
+
+ av_attr.grh_valid = 0;
+ av_attr.dlid = dest->lid;
+ av_attr.dlid = dest->lid;
+ av_attr.sl = 0;
+ av_attr.path_bits = 0;
+ av_attr.port_num = (uint8_t)port;
+ av_attr.static_rate = IB_PATH_RECORD_RATE_10_GBS;
+ ib_status = ib_create_av(ctx->pd,&av_attr, &ctx->av);
+ if (ib_status != IB_SUCCESS) {
+ fprintf(stderr, "Failed to create AH for UD\n");
+ return 1;
+ }
+ }
+ memset(&attr, 0, sizeof(ib_qp_mod_t));
+ attr.req_state = IB_QPS_RTS;
+ attr.state.rts.sq_psn = my_psn;
+
+ if (user_parm->connection_type == RC) {
+ attr.state.rts.resp_res = 1;
+ attr.state.rts.local_ack_timeout = 14;
+ attr.state.rts.retry_cnt = 7;
+ attr.state.rts.rnr_retry_cnt = 7;
+ attr.state.rts.opts = IB_MOD_QP_RNR_RETRY_CNT |
+ IB_MOD_QP_RETRY_CNT |
+ IB_MOD_QP_LOCAL_ACK_TIMEOUT;
+
+ }
+ ib_status = ib_modify_qp(ctx->qp[index], &attr);
+ if(ib_status != IB_SUCCESS){
+ fprintf(stderr, "Failed to modify UC QP to RTS\n");
+ return 1;
+ }
+
+
+
+ /* post recieve max msg size*/
+ {
+ int i;
+ ib_recv_wr_t *bad_wr_recv;
+ //recieve
+ ctx->rwr.wr_id = PINGPONG_RECV_WRID;
+ ctx->rwr.ds_array = &ctx->recv_list;
+ ctx->rwr.num_ds = 1;
+ ctx->rwr.p_next = NULL;
+ ctx->recv_list.vaddr = (uintptr_t) ctx->buf;
+ if (user_parm->connection_type==UD) {
+ ctx->recv_list.length = ctx->size + 40;
+ } else {
+ ctx->recv_list.length = ctx->size;
+ }
+ ctx->recv_list.lkey = ctx->lkey;
+ for (i = 0; i < user_parm->tx_depth / 2; ++i) {
+ if (ib_post_recv(ctx->qp[index], &ctx->rwr, &bad_wr_recv)) {
+ fprintf(stderr, "Couldn't post recv: counter=%d\n", i);
+ return 14;
+ }
+ }
+ }
+ return 0;
+}
+
+static SOCKET pp_open_port(struct pingpong_context *ctx, const char * servername,
+ int ib_port, int port, struct pingpong_dest **p_rem_dest,struct user_parameters *user_parm)
+{
+ struct pingpong_dest *my_dest;
+ struct pingpong_dest *rem_dest;
+ SOCKET sockfd;
+ int rc;
+ int i;
+ int numofqps = 1;
+
+ /* Create connection between client and server.
+ * We do it by exchanging data over a TCP socket connection. */
+
+
+ my_dest = malloc( sizeof (struct pingpong_dest) * numofqps);
+ if (!my_dest){
+ perror("malloc");
+ return INVALID_SOCKET;
+ }
+
+ rem_dest = malloc(sizeof (struct pingpong_dest) * numofqps );
+ if (!rem_dest){
+ perror("malloc");
+ return INVALID_SOCKET;
+ }
+
+ sockfd = servername ? pp_client_connect(servername, port) :
+ pp_server_connect(port);
+
+ if (sockfd == INVALID_SOCKET) {
+ printf("pp_connect_sock(%s,%d) failed (%d)!\n",
+ servername, port, sockfd);
+ return INVALID_SOCKET;
+ }
+
+
+ for (i =0 ;i<numofqps;i ++)
+ {
+ /* Create connection between client and server.
+ * We do it by exchanging data over a TCP socket connection. */
+
+ my_dest[i].lid = ctx->ca_attr->p_port_attr[ib_port-1].lid;
+ my_dest[i].psn = rand() & 0xffffff;
+ if (!my_dest[i].lid) {
+ fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n");
+ return 1;
+ }
+ my_dest[i].qpn = ctx->qp_attr[i].num;
+ /* TBD this should be changed inot VA and different key to each qp */
+ my_dest[i].rkey = ctx->rkey;
+ my_dest[i].vaddr = (uintptr_t)ctx->buf + ctx->size;
+
+ printf(" local address: LID %#04x, QPN %#06x, PSN %#06x "
+ "RKey %#08x VAddr %#016Lx\n",
+ my_dest[i].lid, my_dest[i].qpn, my_dest[i].psn,
+ my_dest[i].rkey, my_dest[i].vaddr);
+
+ rc = servername ? pp_client_exch_dest(sockfd, &my_dest[i],&rem_dest[i]):
+ pp_server_exch_dest(sockfd, &my_dest[i],&rem_dest[i]);
+ if (rc)
+ return INVALID_SOCKET;
+ printf(" remote address: LID %#04x, QPN %#06x, PSN %#06x, "
+ "RKey %#08x VAddr %#016Lx\n",
+ rem_dest[i].lid, rem_dest[i].qpn, rem_dest[i].psn,
+ rem_dest[i].rkey, rem_dest[i].vaddr);
+
+ if (pp_connect_ctx(ctx, ib_port, my_dest[i].psn, &rem_dest[i], user_parm, i))
+ return INVALID_SOCKET;
+ /* An additional handshake is required *after* moving qp to RTR.
+ Arbitrarily reuse exch_dest for this purpose. */
+ rc = servername ? pp_client_exch_dest(sockfd, &my_dest[i],&rem_dest[i]):
+ pp_server_exch_dest(sockfd, &my_dest[i],&rem_dest[i]);
+ if (rc)
+ return INVALID_SOCKET;
+ }
+ *p_rem_dest = rem_dest;
+ return sockfd;
+}
+
+
+
+static void usage(const char *argv0)
+{
+ printf("Usage:\n");
+ printf(" %s start a server and wait for connection\n", argv0);
+ printf(" %s <host> connect to server at <host>\n", argv0);
+ printf("\n");
+ printf("Options:\n");
+ printf(" -p, --port=<port> listen on/connect to port <port> (default 18515)\n");
+ printf(" -c, --connection=<RC/UC> connection type RC/UC (default RC)\n");
+ printf(" -m, --mtu=<mtu> mtu size (default 2048)\n");
+ printf(" -d, --ib-dev=<dev> use IB device <dev> (default first device found)\n");
+ printf(" -i, --ib-port=<port> use port <port> of IB device (default 1)\n");
+ printf(" -s, --size=<size> size of message to exchange (default 1)\n");
+ printf(" -t, --tx-depth=<dep> size of tx queue (default 50)\n");
+ printf(" -l, --signal signal completion on each msg\n");
+ printf(" -a, --all Run sizes from 2 till 2^23\n");
+ printf(" -n, --iters=<iters> number of exchanges (at least 2, default 1000)\n");
+ printf(" -C, --report-cycles report times in cpu cycle units (default microseconds)\n");
+ printf(" -H, --report-histogram print out all results (default print summary only)\n");
+ printf(" -U, --report-unsorted (implies -H) print out unsorted results (default sorted)\n");
+ printf(" -V, --version display version number\n");
+ printf(" -e, --events sleep on CQ events (default poll)\n");
+}
+
+/*
+ * When there is an
+ * odd number of samples, the median is the middle number.
+ * even number of samples, the median is the mean of the
+ * two middle numbers.
+ *
+ */
+static inline cycles_t get_median(int n, cycles_t delta[])
+{
+ if (n % 2)
+ return(delta[n / 2] + delta[n / 2 - 1]) / 2;
+ else
+ return delta[n / 2];
+}
+
+
+static void print_report(struct report_options * options,
+ unsigned int iters, cycles_t *tstamp,int size)
+{
+ double cycles_to_units;
+ cycles_t median;
+ unsigned int i;
+ const char* units;
+ cycles_t *delta = malloc(iters * sizeof *delta);
+
+ if (!delta) {
+ perror("malloc");
+ return;
+ }
+
+ for (i = 0; i < iters - 1; ++i)
+ delta[i] = tstamp[i + 1] - tstamp[i];
+
+
+ if (options->cycles) {
+ cycles_to_units = 1;
+ units = "cycles";
+ } else {
+ cycles_to_units = get_cpu_mhz()/1000000;
+ units = "usec";
+ }
+
+ if (options->unsorted) {
+ printf("#, %s\n", units);
+ for (i = 0; i < iters - 1; ++i)
+ printf("%d, %g\n", i + 1, delta[i] / cycles_to_units / 2);
+ }
+
+ qsort(delta, iters - 1, sizeof *delta, cycles_compare);
+
+ if (options->histogram) {
+ printf("#, %s\n", units);
+ for (i = 0; i < iters - 1; ++i)
+ printf("%d, %g\n", i + 1, delta[i] / cycles_to_units / 2);
+ }
+
+ median = get_median(iters - 1, delta);
+ printf("%7d %d %7.2f %7.2f %7.2f\n",
+ size,iters,delta[0] / cycles_to_units / 2,
+ delta[iters - 2] / cycles_to_units / 2,median / cycles_to_units / 2);
+ free(delta);
+}
+
+int run_iter(struct pingpong_context *ctx, struct user_parameters *user_param,
+ struct pingpong_dest *rem_dest, int size)
+{
+ ib_api_status_t ib_status;
+ ib_qp_handle_t qp;
+ ib_recv_wr_t rwr;
+ ib_recv_wr_t *bad_wr_recv;
+ volatile char *poll_buf;
+ volatile char *post_buf;
+
+ int scnt, rcnt, ccnt, poll;
+ int iters;
+ int tx_depth;
+ iters = user_param->iters;
+ tx_depth = user_param->tx_depth;
+
+
+ if (user_param->connection_type==UD) {
+ if (size > 2048) {
+ size = 2048;
+ }
+ }
+
+ ///send //
+ if (user_param->connection_type==UD) {
+ ctx->list.vaddr = (uintptr_t) ctx->buf + 40;
+ } else {
+ ctx->list.vaddr = (uintptr_t) ctx->buf;
+ }
+ ctx->list.length = size;
+ ctx->list.lkey = ctx->lkey;
+ if (user_param->connection_type==UD) {
+ ctx->wr.dgrm.ud.h_av = ctx->av;
+ ctx->wr.dgrm.ud.remote_qp = rem_dest->qpn;
+ ctx->wr.dgrm.ud.remote_qkey = 0x11111111;
+ }
+
+ /// receive //
+ rwr = ctx->rwr;
+ ctx->recv_list.vaddr = (uintptr_t) ctx->buf;
+ if (user_param->connection_type==UD) {
+ ctx->recv_list.length = ctx->size + 40;
+ } else {
+ ctx->recv_list.length = ctx->size;
+ }
+
+ ctx->recv_list.lkey = ctx->lkey;
+
+ scnt = 0;
+ rcnt = 0;
+ ccnt = 0;
+ poll = 0;
+ poll_buf = ctx->poll_buf;
+ post_buf = ctx->post_buf;
+ qp = ctx->qp[0];
+ if ((uint32_t)size > ctx->qp_attr[0].sq_max_inline || size == 0) {/* complaince to perf_main don't signal*/
+ ctx->wr.send_opt = 0;
+ } else {
+ ctx->wr.send_opt = IB_SEND_OPT_INLINE;
+ }
+
+ while (scnt < iters || rcnt < iters) {
+ if (rcnt < iters && !(scnt < 1 && user_param->servername)) {
+ ib_wc_t wc;
+ ib_wc_t *p_wc_done,*p_wc_free;
+
+ p_wc_free = &wc;
+ p_wc_done = NULL;
+ p_wc_free->p_next = NULL;
+ //printf("rcnt %d\n",rcnt);
+ //printf("scnt %d\n",scnt);
+ /*Server is polling on recieve first */
+ ++rcnt;
+ if (ib_post_recv(qp, &rwr, &bad_wr_recv)) {
+ fprintf(stderr, "Couldn't post recv: rcnt=%d\n",
+ rcnt);
+ return 15;
+ }
+
+#if PORTED
+ if (user_param->use_event) {
+ struct ibv_cq *ev_cq;
+ void *ev_ctx;
+
+ if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {
+ fprintf(stderr, "Failed to get receive cq_event\n");
+ return 1;
+ }
+
+ if (ev_cq != ctx->rcq) {
+ fprintf(stderr, "CQ event for unknown RCQ %p\n", ev_cq);
+ return 1;
+ }
+
+ if (ibv_req_notify_cq(ctx->rcq, 0)) {
+ fprintf(stderr, "Couldn't request RCQ notification\n");
+ return 1;
+ }
+ }
+#endif
+
+ do {
+ ib_status = ib_poll_cq(ctx->rcq,&p_wc_free, &p_wc_done);
+ } while (!user_param->use_event && ib_status == IB_NOT_FOUND);
+
+ if (ib_status != IB_SUCCESS) {
+ fprintf(stderr, "Poll Recieve CQ failed %d\n", ib_status);
+ return 12;
+ }
+
+ if (p_wc_done->status != IB_WCS_SUCCESS) {
+ fprintf(stderr, "Recieve Completion wth error at %s:\n",
+ user_param->servername ? "client" : "server");
+ fprintf(stderr, "Failed status %d: wr_id %d\n",
+ wc.status, (int) wc.wr_id);
+ fprintf(stderr, "scnt=%d, rcnt=%d, ccnt=%d\n",
+ scnt, rcnt, ccnt);
+ return 13;
+ }
+ }
+
+ if (scnt < iters ) {
+ ib_send_wr_t *bad_wr;
+
+ //printf("rcnt1 %d\n",rcnt);
+ //printf("scnt1 %d\n",scnt);
+ if (ccnt == (tx_depth - 2) || (user_param->signal_comp == SIGNAL)
+ || (scnt == (iters - 1)) ) {
+ ccnt = 0;
+ poll=1;
+ if ((uint32_t)size > ctx->qp_attr[0].sq_max_inline || size == 0) {/* complaince to perf_main */
+ ctx->wr.send_opt = IB_SEND_OPT_SIGNALED;
+ } else {
+ ctx->wr.send_opt = IB_SEND_OPT_SIGNALED | IB_SEND_OPT_INLINE;
+ }
+
+ }
+
+ /* client post first */
+ tstamp[scnt] = get_cycles();
+ *post_buf = (char)++scnt;
+ if (ib_post_send(qp,&ctx->wr, &bad_wr)) {
+ fprintf(stderr, "Couldn't post send: scnt=%d\n",
+ scnt);
+ return 11;
+ }
+ }
+ if (poll == 1) {
+ ib_wc_t wc;
+ ib_wc_t *p_wc_done,*p_wc_free;
+
+ //printf("rcnt2 %d\n",rcnt);
+ //printf("scnt2 %d\n",scnt);
+ p_wc_free = &wc;
+ p_wc_done = NULL;
+ p_wc_free->p_next = NULL;
+
+#if PORTED
+ if (user_param->use_event) {
+ struct ibv_cq *ev_cq;
+ void *ev_ctx;
+
+ if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {
+ fprintf(stderr, "Failed to get send cq_event\n");
+ return 1;
+ }
+
+ if (ev_cq != ctx->scq) {
+ fprintf(stderr, "CQ event for unknown SCQ %p\n", ev_cq);
+ return 1;
+ }
+
+ if (ibv_req_notify_cq(ctx->scq, 0)) {
+ fprintf(stderr, "Couldn't request SCQ notification\n");
+ return 1;
+ }
+ }
+
+#endif
+
+ /* poll on scq */
+ do {
+ ib_status = ib_poll_cq(ctx->scq, &p_wc_free, &p_wc_done);
+ } while (!user_param->use_event && ib_status == IB_NOT_FOUND);
+
+ if (ib_status != IB_SUCCESS) {
+ fprintf(stderr, "Poll Recieve CQ failed %d\n", ib_status);
+ return 12;
+ }
+
+ if (wc.status != IB_WCS_SUCCESS) {
+ fprintf(stderr, "Recieve Completion wth error at %s:\n",
+ user_param->servername ? "client" : "server");
+ fprintf(stderr, "Failed status %d: wr_id %d\n",
+ wc.status, (int) wc.wr_id);
+ fprintf(stderr, "scnt=%d, rcnt=%d, ccnt=%d\n",
+ scnt, rcnt, ccnt);
+ return 13;
+ }
+
+ poll = 0;
+ if ((uint32_t)size > ctx->qp_attr[0].sq_max_inline || size == 0) {/* complaince to perf_main don't signal*/
+ ctx->wr.send_opt = 0;
+ } else {
+ ctx->wr.send_opt = IB_SEND_OPT_INLINE;
+ }
+
+ }
+ ++ccnt;
+ }
+
+ return(0);
+}
+
+
+
+int __cdecl main(int argc, char *argv[])
+{
+
+ struct pingpong_context *ctx;
+ struct pingpong_dest *rem_dest;
+ struct user_parameters user_param;
+ char *ib_devname = NULL;
+ int port = 18515;
+ int ib_port = 1;
+ unsigned size = 2;
+ SOCKET sockfd = INVALID_SOCKET;
+ int i = 0;
+ int size_max_pow = 24;
+ WSADATA wsaData;
+ int iResult;
+
+
+ struct report_options report = {0};
+
+ /* init default values to user's parameters */
+ memset(&user_param, 0, sizeof(struct user_parameters));
+ user_param.mtu = 0;
+ user_param.iters = 1000;
+ user_param.tx_depth = 50;
+ user_param.servername = NULL;
+ user_param.use_event = 0;
+ /* Parameter parsing. */
+ while (1) {
+ int c;
+
+ static struct option long_options[] = {
+ { "port", 1,NULL, 'p' },
+ { "connection", 1,NULL, 'c' },
+ { "mtu", 1,NULL, 'm' },
+ { "ib-dev", 1,NULL, 'd' },
+ { "ib-port", 1,NULL, 'i' },
+ { "size", 1,NULL, 's' },
+ { "iters", 1,NULL, 'n' },
+ { "tx-depth", 1,NULL, 't' },
+ { "signal", 0,NULL, 'l' },
+ { "all", 0,NULL, 'a' },
+ { "report-cycles", 0,NULL, 'C' },
+ { "report-histogram", 0,NULL, 'H' },
+ { "report-unsorted", 0,NULL, 'U' },
+ { "version", 0,NULL, 'V' },
+ { "events", 0,NULL, 'e' },
+ { 0 }
+ };
+
+ c = getopt_long(argc, argv, "p:c:m:d:i:s:n:t:laeCHUV", long_options, NULL);
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'p':
+ port = strtol(optarg, NULL, 0);
+ if (port < 0 || port > 65535) {
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+ case 'c':
+ if (strcmp("UC",optarg)==0)
+ user_param.connection_type=UC;
+ if (strcmp("UD",optarg)==0)
+ user_param.connection_type=UD;
+ /* default is 0 for any other option RC*/
+ break;
+ case 'e':
+ ++user_param.use_event;
+ break;
+ case 'm':
+ user_param.mtu = strtol(optarg, NULL, 0);
+ break;
+ case 'l':
+ user_param.signal_comp = SIGNAL;
+ break;
+ case 'a':
+ user_param.all = SIGNAL;
+ break;
+ case 'V':
+ printf("perftest version : %.2f\n",VERSION);
+ return 0;
+ break;
+ case 'd':
+ ib_devname = _strdup(optarg);
+ break;
+
+ case 'i':
+ ib_port = strtol(optarg, NULL, 0);
+ if (ib_port < 0) {
+ usage(argv[0]);
+ return 2;
+ }
+ break;
+
+ case 's':
+ size = strtol(optarg, NULL, 0);
+ if (size < 1) {
+ usage(argv[0]); return 3;
+ }
+ break;
+
+ case 't':
+ user_param.tx_depth = strtol(optarg, NULL, 0);
+ if (user_param.tx_depth < 1) {
+ usage(argv[0]); return 4;
+ }
+ break;
+
+ case 'n':
+ user_param.iters = strtol(optarg, NULL, 0);
+ if (user_param.iters < 2) {
+ usage(argv[0]);
+ return 5;
+ }
+
+ break;
+
+ case 'C':
+ report.cycles = 1;
+ break;
+
+ case 'H':
+ report.histogram = 1;
+ break;
+
+ case 'U':
+ report.unsorted = 1;
+ break;
+
+ default:
+ usage(argv[0]);
+ return 5;
+ }
+ }
+
+ if (optind == argc - 1)
+ user_param.servername = _strdup(argv[optind]);
+ else if (optind < argc) {
+ usage(argv[0]);
+ return 6;
+ }
+
+ /*
+ * Done with parameter parsing. Perform setup.
+ */
+ tstamp = malloc(user_param.iters * sizeof *tstamp);
+ if (!tstamp) {
+ perror("malloc");
+ return 10;
+ }
+ /* Print header data */
+ printf("------------------------------------------------------------------\n");
+ printf(" Send Latency Test\n");
+ printf("Inline data is used up to 400 bytes message\n");
+ if (user_param.connection_type==RC) {
+ printf("Connection type : RC\n");
+ } else if (user_param.connection_type==UC) {
+ printf("Connection type : UC\n");
+ } else {
+ printf("Connection type : UD\n");
+ }
+
+ /* Done with parameter parsing. Perform setup. */
+
+ // Initialize Winsock
+ iResult = WSAStartup(MAKEWORD(2,2), &wsaData);
+ if (iResult != NO_ERROR) {
+ printf("Error at WSAStartup()\n");
+ return 1;
+ }
+
+
+ if (user_param.all == ALL) {
+ /*since we run all sizes lets allocate big enough buffer */
+ size = 8388608; /*2^23 */
+ }
+ srand(GetCurrentProcessId() * GetTickCount());
+
+ //TODO: get pagesize from sysinfo
+ page_size = 4096;
+
+ //TODO get the device names
+
+ ctx = pp_init_ctx( size, ib_port,&user_param);
+ if (!ctx)
+ return 8;
+
+ sockfd = pp_open_port(ctx, user_param.servername, ib_port, port,&rem_dest,&user_param);
+ if (sockfd == INVALID_SOCKET)
+ return 9;
+
+
+#if PORTED
+ if (user_param.use_event) {
+ printf("Test with events.\n");
+ if (ibv_req_notify_cq(ctx->rcq, 0)) {
+ fprintf(stderr, "Couldn't request RCQ notification\n");
+ return 1;
+ }
+ if (ibv_req_notify_cq(ctx->scq, 0)) {
+ fprintf(stderr, "Couldn't request SCQ notification\n");
+ return 1;
+ }
+ }
+#endif
+
+ printf("------------------------------------------------------------------\n");
+ printf(" #bytes #iterations t_min[usec] t_max[usec] t_typical[usec]\n");
+
+ if (user_param.all == 1) {
+ if (user_param.connection_type==UD) {
+ size_max_pow = 12;
+ }
+ for (i = 1; i < size_max_pow ; ++i) {
+ size = 1 << i;
+ if(run_iter(ctx, &user_param, rem_dest, size))
+ return 17;
+
+ print_report(&report, user_param.iters, tstamp, size);
+ }
+ } else {
+ if(run_iter(ctx, &user_param, rem_dest, size))
+ return 18;
+ print_report(&report, user_param.iters, tstamp, size);
+ }
+ printf("------------------------------------------------------------------\n");
+
+ send(sockfd, "done", sizeof "done",0);
+ closesocket(sockfd);
+
+
+ free(tstamp);
+ return 0;
+}
--- /dev/null
+/*\r
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.\r
+ *\r
+ * This software is available to you under the OpenIB.org BSD license\r
+ * below:\r
+ *\r
+ * Redistribution and use in source and binary forms, with or\r
+ * without modification, are permitted provided that the following\r
+ * conditions are met:\r
+ *\r
+ * - Redistributions of source code must retain the above\r
+ * copyright notice, this list of conditions and the following\r
+ * disclaimer.\r
+ *\r
+ * - Redistributions in binary form must reproduce the above\r
+ * copyright notice, this list of conditions and the following\r
+ * disclaimer in the documentation and/or other materials\r
+ * provided with the distribution.\r
+ *\r
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\r
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
+ * SOFTWARE.\r
+ *\r
+ * $Id: vstat.rc 636 2005-10-19 17:46:55Z sleybo $\r
+ */\r
+\r
+\r
+#include <oib_ver.h>\r
+\r
+#define VER_FILETYPE VFT_APP\r
+#define VER_FILESUBTYPE VFT2_UNKNOWN\r
+\r
+#ifdef _DEBUG_\r
+#define VER_FILEDESCRIPTION_STR "Send/Recv Latency Test (Debug)"\r
+#else\r
+#define VER_FILEDESCRIPTION_STR "Send/Recv Latency Test "\r
+#endif\r
+\r
+#define VER_INTERNALNAME_STR "ib_send_lat.exe"\r
+#define VER_ORIGINALFILENAME_STR "ib_send_lat.exe"\r
+\r
+#include <common.ver>\r
--- /dev/null
+TARGETNAME=ib_write_bw\r
+TARGETPATH=..\..\..\..\bin\user\obj$(BUILD_ALT_DIR)\r
+TARGETTYPE=PROGRAM\r
+UMTYPE=console\r
+USE_CRTDLL=1\r
+\r
+C_DEFINES=$(C_DEFINES) /D__WIN__ \r
+\r
+SOURCES=write_bw.rc \\r
+ ..\getopt.c \\r
+ ..\perf_utils.c \\r
+ write_bw.c \r
+\r
+INCLUDES=..;..\..\..\..\inc;..\..\..\..\inc\user\r
+\r
+RCOPTIONS=/I..\..\win\include\r
+\r
+TARGETLIBS= \\r
+ $(DDK_LIB_PATH)\Ws2_32.lib \\r
+!if $(FREEBUILD)\r
+ $(TARGETPATH)\*\complib.lib \\r
+ $(TARGETPATH)\*\ibal.lib\r
+!else\r
+ $(TARGETPATH)\*\complibd.lib \\r
+ $(TARGETPATH)\*\ibald.lib\r
+!endif\r
+\r
+MSC_WARNING_LEVEL= /W3\r
--- /dev/null
+#\r
+# DO NOT EDIT THIS FILE!!! Edit .\sources. if you want to add a new source\r
+# file to this component. This file merely indirects to the real make file\r
+# that is shared by all the driver components of the OpenIB Windows project.\r
+#\r
+\r
+!INCLUDE ..\..\..\..\inc\openib.def\r
--- /dev/null
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#include "getopt.h"
+#include "perf_defs.h"
+#include "get_clock.h"
+
+#define RC 0
+#define UC 1
+
+struct user_parameters {
+ const char *servername;
+ int connection_type;
+ int mtu;
+ int all; /* run all msg size */
+ int iters;
+ int tx_depth;
+ int numofqps;
+ int maxpostsofqpiniteration;
+};
+
+static int page_size;
+
+cycles_t *tposted;
+cycles_t *tcompleted;
+
+
+void
+pp_cq_comp_cb(
+ IN const ib_cq_handle_t h_cq,
+ IN void *cq_context )
+{
+ UNUSED_PARAM( h_cq );
+ UNUSED_PARAM( cq_context);
+ return ;
+}
+static struct pingpong_context *pp_init_ctx(unsigned size, int port, struct user_parameters *user_parm)
+{
+
+ struct pingpong_context *ctx;
+ ib_api_status_t ib_status = IB_SUCCESS;
+ size_t guid_count;
+ ib_net64_t *ca_guid_array;
+ int counter;
+
+ ctx = malloc(sizeof *ctx);
+ if (!ctx){
+ perror("malloc");
+ return NULL;
+ }
+ memset(ctx, 0, sizeof(struct pingpong_context));
+ ctx->size = size;
+ ctx->tx_depth = user_parm->tx_depth;
+
+ ctx->qp = malloc(sizeof (ib_qp_handle_t) * user_parm->numofqps );
+ if (!ctx->qp) {
+ perror("malloc");
+ return NULL;
+ }
+ ctx->qp_attr = malloc(sizeof (ib_qp_attr_t) * user_parm->numofqps );
+ if (!ctx->qp_attr) {
+ perror("malloc");
+ return NULL;
+ }
+
+ ctx->scnt = malloc(user_parm->numofqps * sizeof (int));
+ if (!ctx->scnt) {
+ perror("malloc");
+ return NULL;
+ }
+ ctx->ccnt = malloc(user_parm->numofqps * sizeof (int));
+ if (!ctx->ccnt) {
+ perror("malloc");
+ return NULL;
+ }
+ memset(ctx->scnt, 0, user_parm->numofqps * sizeof (int));
+ memset(ctx->ccnt, 0, user_parm->numofqps * sizeof (int));
+
+ ctx->buf = malloc( size * 2 * user_parm->numofqps );
+ if (!ctx->buf) {
+ fprintf(stderr, "Couldn't allocate work buf.\n");
+ return NULL;
+ }
+
+ memset(ctx->buf, 0, size * 2 * user_parm->numofqps);
+
+
+
+ /*
+ * Open the AL instance
+ */
+ ib_status = ib_open_al(&ctx->al);
+ if(ib_status != IB_SUCCESS)
+ {
+ fprintf(stderr,"ib_open_al failed status = %d\n", ib_status);
+ return NULL;
+ }
+
+ /*
+ * Get the Local CA Guids
+ */
+ ib_status = ib_get_ca_guids(ctx->al, NULL, &guid_count);
+ if(ib_status != IB_INSUFFICIENT_MEMORY)
+ {
+ fprintf(stderr,"ib_get_ca_guids1 failed status = %d\n", (uint32_t)ib_status);
+ return NULL;
+ }
+
+ /*
+ * If no CA's Present then return
+ */
+
+ if(guid_count == 0)
+ return NULL;
+
+
+ ca_guid_array = (ib_net64_t*)malloc(sizeof(ib_net64_t) * guid_count);
+
+ ib_status = ib_get_ca_guids(ctx->al, ca_guid_array, &guid_count);
+ if(ib_status != IB_SUCCESS)
+ {
+ fprintf(stderr,"ib_get_ca_guids2 failed with status = %d\n", ib_status);
+ return NULL;
+ }
+
+ /*
+ * Open only the first HCA
+ */
+ /* Open the CA */
+ ib_status = ib_open_ca(ctx->al ,ca_guid_array[0] ,NULL,
+ NULL, //ca_context
+ &ctx->ca);
+
+ if(ib_status != IB_SUCCESS)
+ {
+ fprintf(stderr,"ib_open_ca failed with status = %d\n", ib_status);
+ return NULL;
+ }
+
+ //xxx
+ //printf("ib_open_ca passed i=%d\n",i);
+ //xxx
+
+
+
+
+ {
+ /* Query the CA */
+ uint32_t bsize = 0;
+ ib_status = ib_query_ca(ctx->ca, NULL, &bsize);
+ if(ib_status != IB_INSUFFICIENT_MEMORY)
+ {
+ fprintf(stderr, "Failed to query device props");
+ return NULL;
+ }
+
+ ctx->ca_attr = (ib_ca_attr_t *)malloc(bsize);
+
+ ib_status = ib_query_ca(ctx->ca, ctx->ca_attr, &bsize);
+ if(ib_status != IB_SUCCESS)
+ {
+ printf("ib_query_ca failed with status = %d\n", ib_status);
+ return NULL;
+ }
+ if (user_parm->mtu == 0) {/*user did not ask for specific mtu */
+ if (ctx->ca_attr->dev_id == 23108) {
+ user_parm->mtu = 1024;
+ } else {
+ user_parm->mtu = 2048;
+ }
+ }
+ }
+
+
+ ib_status = ib_alloc_pd(ctx->ca ,
+ IB_PDT_NORMAL,
+ ctx, //pd_context
+ &ctx->pd);
+ if (ib_status != IB_SUCCESS) {
+ fprintf(stderr, "Couldn't allocate PD\n");
+ return NULL;
+ }
+
+
+ {
+ ib_mr_create_t mr_create;
+
+ mr_create.length = size * 2;
+
+ mr_create.vaddr = ctx->buf;
+ mr_create.access_ctrl = IB_AC_RDMA_WRITE| IB_AC_LOCAL_WRITE;
+
+ ib_status = ib_reg_mem(ctx->pd ,&mr_create ,&ctx->lkey ,&ctx->rkey ,&ctx->mr);
+ if (ib_status != IB_SUCCESS) {
+ fprintf(stderr, "Couldn't allocate MR\n");
+ return NULL;
+ }
+ }
+
+ {
+ ib_cq_create_t cq_create;
+
+ cq_create.size = user_parm->tx_depth * user_parm->numofqps;
+ cq_create.h_wait_obj = NULL;
+ cq_create.pfn_comp_cb = pp_cq_comp_cb;
+ ib_status = ib_create_cq(ctx->ca,&cq_create ,ctx, NULL, &ctx->scq);
+ if (ib_status != IB_SUCCESS) {
+ fprintf(stderr, "Couldn't create CQ\n");
+ return NULL;
+ }
+ }
+
+
+
+
+ for (counter =0 ; counter < user_parm->numofqps ; counter++)
+ {
+
+ ib_qp_create_t qp_create;
+ ib_qp_mod_t qp_modify;
+ ib_qp_attr_t qp_attr;
+
+ memset(&qp_create, 0, sizeof(ib_qp_create_t));
+ qp_create.h_sq_cq = ctx->scq;
+ qp_create.h_rq_cq = ctx->scq;
+ qp_create.sq_depth = user_parm->tx_depth;
+ qp_create.rq_depth = user_parm->tx_depth;
+ qp_create.sq_sge = 1;
+ qp_create.rq_sge = 1;
+ //TODO MAX_INLINE
+
+ switch (user_parm->connection_type) {
+ case RC :
+ qp_create.qp_type= IB_QPT_RELIABLE_CONN;
+ break;
+ case UC :
+ qp_create.qp_type = IB_QPT_UNRELIABLE_CONN;
+ break;
+ default:
+ fprintf(stderr, "Unknown connection type %d \n",user_parm->connection_type);
+ return NULL;
+ }
+
+ qp_create.sq_signaled = FALSE;
+ /*attr.sq_sig_all = 0;*/
+
+ ib_status = ib_create_qp(ctx->pd, &qp_create,NULL,NULL,&ctx->qp[counter]);
+ if (ib_status != IB_SUCCESS){
+ fprintf(stderr, "Couldn't create QP\n");
+ return NULL;
+ }
+
+
+
+ memset(&qp_modify, 0, sizeof(ib_qp_mod_t));
+ qp_modify.req_state = IB_QPS_INIT;
+ qp_modify.state.init.pkey_index = 0 ;
+ qp_modify.state.init.primary_port = (uint8_t)port;
+ qp_modify.state.init.access_ctrl = IB_AC_RDMA_WRITE | IB_AC_LOCAL_WRITE;
+
+
+ ib_status = ib_modify_qp(ctx->qp[counter], &qp_modify);
+ if (ib_status != IB_SUCCESS){
+ fprintf(stderr, "Failed to modify QP to INIT\n");
+ return NULL;
+ }
+
+
+ memset(&qp_attr, 0, sizeof(ib_qp_attr_t));
+ ib_status = ib_query_qp(ctx->qp[counter], &ctx->qp_attr[counter]);
+ if (ib_status != IB_SUCCESS){
+ fprintf(stderr, "Failed to modify QP to INIT\n");
+ return NULL;
+ }
+ fprintf(stderr, "max inline size %d\n",ctx->qp_attr[counter].sq_max_inline);
+ }
+
+ return ctx;
+}
+
+
+static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
+ struct pingpong_dest *dest, struct user_parameters *user_parm, int qpindex)
+{
+
+ ib_api_status_t ib_status;
+ ib_qp_mod_t attr;
+ memset(&attr, 0, sizeof(ib_qp_mod_t));
+
+ attr.req_state = IB_QPS_RTR;
+ switch (user_parm->mtu) {
+ case 256 :
+ attr.state.rtr.primary_av.conn.path_mtu = IB_MTU_256;
+ break;
+ case 512 :
+ attr.state.rtr.primary_av.conn.path_mtu = IB_MTU_512;
+ break;
+ case 1024 :
+ attr.state.rtr.primary_av.conn.path_mtu = IB_MTU_1024;
+ break;
+ case 2048 :
+ attr.state.rtr.primary_av.conn.path_mtu = IB_MTU_2048;
+ break;
+ }
+ printf("Mtu : %d\n", user_parm->mtu);
+ attr.state.rtr.dest_qp = dest->qpn;;
+ attr.state.rtr.rq_psn = dest->psn;
+ if (user_parm->connection_type==RC) {
+ attr.state.rtr.resp_res = 1;
+ attr.state.rtr.rnr_nak_timeout = 12;
+ }
+ attr.state.rtr.primary_av.grh_valid = 0;
+ attr.state.rtr.primary_av.dlid = dest->lid;
+ attr.state.rtr.primary_av.sl = 0;
+ attr.state.rtr.primary_av.path_bits = 0;
+ attr.state.rtr.primary_av.port_num = (uint8_t)port;
+ attr.state.rtr.primary_av.static_rate = IB_PATH_RECORD_RATE_10_GBS;
+ attr.state.rtr.opts = IB_MOD_QP_LOCAL_ACK_TIMEOUT |
+ IB_MOD_QP_RESP_RES |
+ IB_MOD_QP_PRIMARY_AV;
+
+
+ ib_status = ib_modify_qp(ctx->qp[qpindex], &attr);
+ if(ib_status != IB_SUCCESS){
+ fprintf(stderr, "Failed to modify QP to RTR\n");
+ return 1;
+ }
+
+ memset(&attr, 0, sizeof(ib_qp_mod_t));
+ attr.req_state = IB_QPS_RTS;
+ attr.state.rts.sq_psn = my_psn;
+
+ if (user_parm->connection_type == RC) {
+ attr.state.rts.resp_res = 1;
+ attr.state.rts.local_ack_timeout = 14;
+ attr.state.rts.retry_cnt = 7;
+ attr.state.rts.rnr_retry_cnt = 7;
+ attr.state.rts.opts = IB_MOD_QP_RNR_RETRY_CNT |
+ IB_MOD_QP_RETRY_CNT |
+ IB_MOD_QP_LOCAL_ACK_TIMEOUT;
+
+ }
+ ib_status = ib_modify_qp(ctx->qp[qpindex], &attr);
+ if(ib_status != IB_SUCCESS){
+ fprintf(stderr, "Failed to modify QP to RTS\n");
+ return 1;
+ }
+
+ return 0;
+
+}
+
+static SOCKET pp_open_port(struct pingpong_context *ctx, const char * servername,
+ int ib_port, int port, struct pingpong_dest **p_rem_dest,struct user_parameters *user_parm)
+{
+ struct pingpong_dest *my_dest;
+ struct pingpong_dest *rem_dest;
+ SOCKET sockfd;
+ int rc;
+ int i;
+ int numofqps = user_parm->numofqps;
+
+ /* Create connection between client and server.
+ * We do it by exchanging data over a TCP socket connection. */
+
+
+ my_dest = malloc( sizeof (struct pingpong_dest) * numofqps);
+ if (!my_dest){
+ perror("malloc");
+ return INVALID_SOCKET;
+ }
+
+ rem_dest = malloc(sizeof (struct pingpong_dest) * numofqps );
+ if (!rem_dest){
+ perror("malloc");
+ return INVALID_SOCKET;
+ }
+
+ sockfd = servername ? pp_client_connect(servername, port) :
+ pp_server_connect(port);
+
+ if (sockfd == INVALID_SOCKET) {
+ printf("pp_connect_sock(%s,%d) failed (%d)!\n",
+ servername, port, sockfd);
+ return INVALID_SOCKET;
+ }
+
+
+ for (i =0 ;i<numofqps;i ++)
+ {
+ /* Create connection between client and server.
+ * We do it by exchanging data over a TCP socket connection. */
+
+ my_dest[i].lid = ctx->ca_attr->p_port_attr[ib_port-1].lid;
+ my_dest[i].psn = rand() & 0xffffff;
+ if (!my_dest[i].lid) {
+ fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n");
+ return 1;
+ }
+ my_dest[i].qpn = ctx->qp_attr[i].num;
+ /* TBD this should be changed inot VA and different key to each qp */
+ my_dest[i].rkey = ctx->rkey;
+ my_dest[i].vaddr = (uintptr_t)ctx->buf + ctx->size;
+
+ printf(" local address: LID %#04x, QPN %#06x, PSN %#06x "
+ "RKey %#08x VAddr %#016Lx\n",
+ my_dest[i].lid, my_dest[i].qpn, my_dest[i].psn,
+ my_dest[i].rkey, my_dest[i].vaddr);
+
+ rc = servername ? pp_client_exch_dest(sockfd, &my_dest[i],&rem_dest[i]):
+ pp_server_exch_dest(sockfd, &my_dest[i],&rem_dest[i]);
+ if (rc)
+ return INVALID_SOCKET;
+ printf(" remote address: LID %#04x, QPN %#06x, PSN %#06x, "
+ "RKey %#08x VAddr %#016Lx\n",
+ rem_dest[i].lid, rem_dest[i].qpn, rem_dest[i].psn,
+ rem_dest[i].rkey, rem_dest[i].vaddr);
+
+ if (pp_connect_ctx(ctx, ib_port, my_dest[i].psn, &rem_dest[i], user_parm, i))
+ return INVALID_SOCKET;
+ /* An additional handshake is required *after* moving qp to RTR.
+ Arbitrarily reuse exch_dest for this purpose. */
+ rc = servername ? pp_client_exch_dest(sockfd, &my_dest[i],&rem_dest[i]):
+ pp_server_exch_dest(sockfd, &my_dest[i],&rem_dest[i]);
+ if (rc)
+ return INVALID_SOCKET;
+ }
+ *p_rem_dest = rem_dest;
+ return sockfd;
+}
+
+
+static void usage(const char *argv0)
+{
+ printf("Usage:\n");
+ printf(" %s start a server and wait for connection\n", argv0);
+ printf(" %s <host> connect to server at <host>\n", argv0);
+ printf("\n");
+ printf("Options:\n");
+ printf(" -p, --port=<port> listen on/connect to port <port> (default 18515)\n");
+ printf(" -d, --ib-dev=<dev> use IB device <dev> (default first device found)\n");
+ printf(" -i, --ib-port=<port> use port <port> of IB device (default 1)\n");
+ printf(" -c, --connection=<RC/UC> connection type RC/UC (default RC)\n");
+ printf(" -m, --mtu=<mtu> mtu size (default 1024)\n");
+ printf(" -g, --post=<num of posts> number of posts for each qp in the chain (default tx_depth)\n");
+ printf(" -q, --qp=<num of qp's> Num of qp's(default 1)\n");
+ printf(" -s, --size=<size> size of message to exchange (default 65536)\n");
+ printf(" -a, --all Run sizes from 2 till 2^23\n");
+ printf(" -t, --tx-depth=<dep> size of tx queue (default 100)\n");
+ printf(" -n, --iters=<iters> number of exchanges (at least 2, default 5000)\n");
+ printf(" -b, --bidirectional measure bidirectional bandwidth (default unidirectional)\n");
+ printf(" -V, --version display version number\n");
+}
+
+static void
+ print_report(unsigned int iters, unsigned size, int duplex,
+ cycles_t *tposted, cycles_t *tcompleted, struct user_parameters *user_param)
+{
+ double cycles_to_units;
+ uint64_t tsize; /* Transferred size, in megabytes */
+ unsigned int i, j;
+ int opt_posted = 0, opt_completed = 0;
+ cycles_t opt_delta;
+ cycles_t t;
+
+
+ opt_delta = tcompleted[opt_posted] - tposted[opt_completed];
+
+ /* Find the peak bandwidth */
+ for (i = 0; i < iters * user_param->numofqps; ++i)
+ for (j = i; j < iters * user_param->numofqps; ++j) {
+ t = (tcompleted[j] - tposted[i]) / (j - i + 1);
+ if (t < opt_delta) {
+ opt_delta = t;
+ opt_posted = i;
+ opt_completed = j;
+ }
+ }
+
+
+ cycles_to_units = get_cpu_mhz();
+
+ tsize = duplex ? 2 : 1;
+ tsize = tsize * size;
+ printf("%7d %d %7.2f %7.2f\n",
+ size,iters,tsize * cycles_to_units / opt_delta / 0x100000,
+ (uint64_t)tsize * iters * user_param->numofqps * cycles_to_units /(tcompleted[(iters* user_param->numofqps) - 1] - tposted[0]) / 0x100000);
+
+}
+
+
+int run_iter(struct pingpong_context *ctx, struct user_parameters *user_param,
+ struct pingpong_dest *rem_dest, int size)
+{
+ ib_api_status_t ib_status;
+ ib_qp_handle_t qp;
+ int scnt, ccnt ;
+ int index;
+ ib_send_wr_t *bad_wr;
+
+
+
+ ctx->list.vaddr = (uintptr_t) ctx->buf;
+ ctx->list.length = size;
+ ctx->list.lkey = ctx->lkey;
+
+ ctx->wr.ds_array = &ctx->list;
+ ctx->wr.num_ds= 1;
+ ctx->wr.wr_type = WR_RDMA_WRITE;
+
+ if ((uint32_t)size > ctx->qp_attr[0].sq_max_inline) { /*complaince to perf_main */
+ ctx->wr.send_opt = IB_SEND_OPT_SIGNALED;
+ } else {
+ ctx->wr.send_opt = IB_SEND_OPT_SIGNALED | IB_SEND_OPT_INLINE;
+ }
+ ctx->wr.p_next = NULL;
+
+ scnt = 0;
+ ccnt = 0;
+ /*clear the scnt ccnt counters for each iteration*/
+ for (index =0 ; index < user_param->numofqps ; index++) {
+ ctx->scnt[index] = 0;
+ ctx->ccnt[index] = 0;
+ }
+ index = 0;
+
+
+ /* main loop for posting */
+ while (scnt < (user_param->iters * user_param->numofqps) ||
+ ccnt < (user_param->iters * user_param->numofqps) )
+ {
+ /* main loop to run over all the qps and post each time n messages */
+ for (index =0 ; index < user_param->numofqps ; index++) {
+
+ ctx->wr.remote_ops.vaddr = rem_dest[index].vaddr;
+ ctx->wr.remote_ops.rkey = rem_dest[index].rkey;
+ qp = ctx->qp[index];
+ ctx->wr.wr_id = index ;
+
+ while (ctx->scnt[index] < user_param->iters &&
+ (ctx->scnt[index] - ctx->ccnt[index]) < user_param->maxpostsofqpiniteration)
+ {
+ //if(ctx->scnt[index] - ctx->ccnt[index] < 10 )
+ // fprintf(stderr, "Lower WQEs: qp index = %d qp scnt=%d total scnt %d qp ccnt=%d total ccnt %d\n",
+ // index,ctx->scnt[index],scnt,ctx->ccnt[index],ccnt);
+ tposted[scnt] = get_cycles();
+ ib_status = ib_post_send(qp, &ctx->wr, &bad_wr);
+ if (ib_status != IB_SUCCESS)
+ {
+ fprintf(stderr, "Couldn't post send: qp index = %d qp scnt=%d total scnt %d qp ccnt=%d total ccnt %d\n",
+ index,ctx->scnt[index],scnt,ctx->ccnt[index],ccnt);
+ return 1;
+ }
+ ctx->scnt[index]= ctx->scnt[index]+1;
+ ++scnt;
+//printf("scnt = %d \n",scnt);
+ }
+
+ }
+
+ /* finished posting now polling */
+ if (ccnt < (user_param->iters * user_param->numofqps) )
+ {
+ ib_wc_t wc;
+ ib_wc_t *p_wc_done,*p_wc_free;
+
+ p_wc_free = &wc;
+ p_wc_done = NULL;
+ p_wc_free->p_next = NULL;
+
+ do{
+ ib_status = ib_poll_cq(ctx->scq, &p_wc_free, &p_wc_done);
+ if (ib_status == IB_SUCCESS) {
+ tcompleted[ccnt] = get_cycles();
+ if (p_wc_done->status != IB_WCS_SUCCESS) {
+ fprintf(stderr, "Completion wth error at %s:\n",
+ user_param->servername ? "client" : "server");
+ fprintf(stderr, "Failed status %d: wr_id %d syndrom 0x%x\n",
+ p_wc_done->status, (int) p_wc_done->wr_id, p_wc_done->vendor_specific);
+ return 1;
+ }
+
+ /*here the id is the index to the qp num */
+ ctx->ccnt[(int)wc.wr_id] = ctx->ccnt[(int)wc.wr_id]+1;
+ ++ccnt;
+//printf("ccnt = %d \n",ccnt);
+ p_wc_free = p_wc_done;
+ p_wc_free->p_next = NULL;
+ p_wc_done = NULL;
+ }
+
+
+ } while (ib_status == IB_SUCCESS);
+
+ if (ib_status != IB_NOT_FOUND) {
+ fprintf(stderr, "Poll Recieve CQ failed %d\n", ib_status);
+ return 12;
+ }
+
+
+
+ }
+ }
+ return(0);
+}
+
+
+int __cdecl main(int argc, char *argv[])
+{
+ struct pingpong_context *ctx;
+ struct pingpong_dest my_dest;
+ struct pingpong_dest *rem_dest;
+ struct user_parameters user_param;
+ char *ib_devname = NULL;
+ int port = 18515;
+ int ib_port = 1;
+ unsigned size = 65536;
+ SOCKET sockfd = INVALID_SOCKET;
+ WSADATA wsaData;
+ int i = 0;
+ int iResult;
+ int duplex = 0;
+
+
+ /* init default values to user's parameters */
+ memset(&user_param, 0, sizeof(struct user_parameters));
+ user_param.mtu = 0;
+ user_param.iters = 5000;
+ user_param.tx_depth = 100;
+ user_param.servername = NULL;
+ user_param.numofqps = 1;
+ user_param.maxpostsofqpiniteration = 100;
+
+ /* Parameter parsing. */
+ while (1) {
+ int c;
+
+ static struct option long_options[] = {
+ { "port", 1, NULL, 'p' },
+ { "ib-dev", 1, NULL, 'd' },
+ { "ib-port", 1, NULL, 'i' },
+ { "mtu", 1, NULL, 'm' },
+ { "qp", 1, NULL, 'q' },
+ { "post", 1, NULL, 'g' },
+ { "connection", 1, NULL, 'c' },
+ { "size", 1, NULL, 's' },
+ { "iters", 1, NULL, 'n' },
+ { "tx-depth", 1, NULL, 't' },
+ { "all", 0, NULL, 'a' },
+ { "bidirectional", 0, NULL, 'b' },
+ { "version", 0, NULL, 'V' },
+ { 0 }
+ };
+
+ c = getopt_long(argc, argv, "p:d:i:m:q:g:c:s:n:t:baV", long_options, NULL);
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'p':
+ port = strtol(optarg, NULL, 0);
+ if (port < 0 || port > 65535) {
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+
+ case 'd':
+ ib_devname = _strdup(optarg);
+ break;
+ case 'c':
+ if (strcmp("UC",optarg)==0)
+ user_param.connection_type=UC;
+ break;
+
+ case 'm':
+ user_param.mtu = strtol(optarg, NULL, 0);
+ break;
+ case 'q':
+ user_param.numofqps = strtol(optarg, NULL, 0);
+ break;
+ case 'g':
+ user_param.maxpostsofqpiniteration = strtol(optarg, NULL, 0);
+ break;
+ case 'a':
+ user_param.all = ALL;
+ break;
+ case 'V':
+ printf("rdma_bw version : %.2f\n",VERSION);
+ return 0;
+ break;
+ case 'i':
+ ib_port = strtol(optarg, NULL, 0);
+ if (ib_port < 0) {
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+
+ case 's':
+ size = strtol(optarg, NULL, 0);
+ if (size < 1 || size > UINT_MAX / 2) {
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+
+ case 't':
+ user_param.tx_depth = strtol(optarg, NULL, 0);
+ if (user_param.tx_depth < 1) { usage(argv[0]); return 1; }
+ break;
+
+ case 'n':
+ user_param.iters = strtol(optarg, NULL, 0);
+ if (user_param.iters < 2) {
+ usage(argv[0]);
+ return 1;
+ }
+
+ break;
+
+ case 'b':
+ duplex = 1;
+ break;
+
+ default:
+ usage(argv[0]);
+ return 1;
+ }
+ }
+
+ if (optind == argc - 1)
+ user_param.servername = _strdup(argv[optind]);
+ else if (optind < argc) {
+ usage(argv[0]);
+ return 1;
+ }
+
+ printf("------------------------------------------------------------------\n");
+ if (duplex == 1) {
+ printf(" RDMA_Write Bidirectional BW Test\n");
+ } else {
+ printf(" RDMA_Write BW Test\n");
+ }
+
+ printf("Inline data is used up to 400 bytes message\n");
+ printf("Number of qp's running %d\n",user_param.numofqps);
+ printf("Number of iterations %d\n",user_param.iters);
+ printf("Massege size %d\n",size);
+ if (user_param.connection_type==RC) {
+ printf("Connection type : RC\n");
+ } else {
+ printf("Connection type : UC\n");
+ }
+ if (user_param.maxpostsofqpiniteration > user_param.tx_depth ) {
+ printf("Can not post more than tx_depth , adjusting number of post to tx_depth\n");
+ user_param.maxpostsofqpiniteration = user_param.tx_depth;
+ } else {
+ printf("Each Qp will post up to %d messages each time\n",user_param.maxpostsofqpiniteration);
+ }
+ /* Done with parameter parsing. Perform setup. */
+
+ // Initialize Winsock
+ iResult = WSAStartup(MAKEWORD(2,2), &wsaData);
+ if (iResult != NO_ERROR) {
+ printf("Error at WSAStartup()\n");
+ return 1;
+ }
+
+
+ if (user_param.all == ALL) {
+ /*since we run all sizes */
+ size = 8388608; /*2^23 */
+ }
+
+ srand(GetCurrentProcessId() * GetTickCount());
+
+ //TODO: get pagesize from sysinfo
+ page_size = 4096;
+
+ //TODO:get the device names
+
+
+ ctx = pp_init_ctx(size, ib_port, &user_param);
+ if (!ctx)
+ return 1;
+
+ sockfd = pp_open_port(ctx, user_param.servername, ib_port, port,&rem_dest,&user_param);
+ if (sockfd == INVALID_SOCKET)
+ return 9;
+
+
+ printf("------------------------------------------------------------------\n");
+ printf(" #bytes #iterations BW peak[MB/sec] BW average[MB/sec] \n");
+ /* For half duplex tests, server just waits for client to exit */
+ /* use dummy my_dest struct*/
+ if (!user_param.servername && !duplex) {
+ pp_server_exch_dest(sockfd, &my_dest,rem_dest);
+ send(sockfd, "done", sizeof "done",0);
+ closesocket(sockfd);
+ return 0;
+ }
+
+ tposted = malloc(user_param.iters * user_param.numofqps * sizeof *tposted);
+
+ if (!tposted) {
+ perror("malloc");
+ return 1;
+ }
+
+ tcompleted = malloc(user_param.iters * user_param.numofqps * sizeof *tcompleted);
+
+ if (!tcompleted) {
+ perror("malloc");
+ return 1;
+ }
+
+ if (user_param.all == ALL) {
+ for (i = 1; i < 24 ; ++i) {
+ size = 1 << i;
+ if(run_iter(ctx, &user_param, rem_dest, size))
+ return 17;
+ print_report(user_param.iters, size, duplex, tposted, tcompleted, &user_param);
+ }
+ } else {
+ if(run_iter(ctx, &user_param, rem_dest, size))
+ return 18;
+ print_report(user_param.iters, size, duplex, tposted, tcompleted, &user_param);
+
+ }
+
+ /* use dummy my_dest struct*/
+ if (user_param.servername) {
+ pp_client_exch_dest(sockfd, &my_dest,rem_dest);
+ } else {
+ pp_server_exch_dest(sockfd, &my_dest,rem_dest);
+ }
+ send(sockfd, "done", sizeof "done",0);
+ closesocket(sockfd);
+
+ free(tposted);
+ free(tcompleted);
+ printf("------------------------------------------------------------------\n");
+ return 0;
+}
--- /dev/null
+/*\r
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.\r
+ *\r
+ * This software is available to you under the OpenIB.org BSD license\r
+ * below:\r
+ *\r
+ * Redistribution and use in source and binary forms, with or\r
+ * without modification, are permitted provided that the following\r
+ * conditions are met:\r
+ *\r
+ * - Redistributions of source code must retain the above\r
+ * copyright notice, this list of conditions and the following\r
+ * disclaimer.\r
+ *\r
+ * - Redistributions in binary form must reproduce the above\r
+ * copyright notice, this list of conditions and the following\r
+ * disclaimer in the documentation and/or other materials\r
+ * provided with the distribution.\r
+ *\r
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\r
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
+ * SOFTWARE.\r
+ *\r
+ * $Id: vstat.rc 636 2005-10-19 17:46:55Z sleybo $\r
+ */\r
+\r
+\r
+#include <oib_ver.h>\r
+\r
+#define VER_FILETYPE VFT_APP\r
+#define VER_FILESUBTYPE VFT2_UNKNOWN\r
+\r
+#ifdef _DEBUG_\r
+#define VER_FILEDESCRIPTION_STR "RDMA write Bandwidth Test (Debug)"\r
+#else\r
+#define VER_FILEDESCRIPTION_STR "RDMA write Bandwidth Test "\r
+#endif\r
+\r
+#define VER_INTERNALNAME_STR "ib_write_bw.exe"\r
+#define VER_ORIGINALFILENAME_STR "ib_write_bw.exe"\r
+\r
+#include <common.ver>\r
--- /dev/null
+TARGETNAME=ib_write_lat\r
+TARGETPATH=..\..\..\..\bin\user\obj$(BUILD_ALT_DIR)\r
+TARGETTYPE=PROGRAM\r
+UMTYPE=console\r
+USE_CRTDLL=1\r
+\r
+C_DEFINES=$(C_DEFINES) /D__WIN__ \r
+\r
+SOURCES=write_lat.rc \\r
+ ..\getopt.c \\r
+ ..\perf_utils.c \\r
+ write_lat.c \r
+\r
+INCLUDES=..;..\..\..\..\inc;..\..\..\..\inc\user\r
+\r
+RCOPTIONS=/I..\..\win\include\r
+\r
+TARGETLIBS= \\r
+ $(DDK_LIB_PATH)\Ws2_32.lib \\r
+!if $(FREEBUILD)\r
+ $(TARGETPATH)\*\complib.lib \\r
+ $(TARGETPATH)\*\ibal.lib\r
+!else\r
+ $(TARGETPATH)\*\complibd.lib \\r
+ $(TARGETPATH)\*\ibald.lib\r
+!endif\r
+\r
+MSC_WARNING_LEVEL= /W3\r
--- /dev/null
+#\r
+# DO NOT EDIT THIS FILE!!! Edit .\sources. if you want to add a new source\r
+# file to this component. This file merely indirects to the real make file\r
+# that is shared by all the driver components of the OpenIB Windows project.\r
+#\r
+\r
+!INCLUDE ..\..\..\..\inc\openib.def\r
--- /dev/null
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2005 Hewlett Packard, Inc (Grant Grundler)
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#include "getopt.h"
+#include "perf_defs.h"
+#include "get_clock.h"
+
+
+#define RC 0
+#define UC 1
+
+
+static int page_size;
+cycles_t *tstamp;
+struct user_parameters {
+ const char *servername;
+ int connection_type;
+ int mtu;
+ int all; /* run all msg size */
+ int iters;
+ int tx_depth;
+};
+
+struct report_options {
+ int unsorted;
+ int histogram;
+ int cycles; /* report delta's in cycles, not microsec's */
+};
+
+
+void
+pp_cq_comp_cb(
+ IN const ib_cq_handle_t h_cq,
+ IN void *cq_context )
+{
+ UNUSED_PARAM( h_cq );
+ UNUSED_PARAM( cq_context);
+ return ;
+}
+
+
+static struct pingpong_context *pp_init_ctx(unsigned size, int port, struct user_parameters *user_parm)
+{
+
+
+ struct pingpong_context *ctx;
+ ib_api_status_t ib_status = IB_SUCCESS;
+ size_t guid_count;
+ ib_net64_t *ca_guid_array;
+
+ ctx = malloc(sizeof *ctx);
+ if (!ctx){
+ perror("malloc");
+ return NULL;
+ }
+ memset(ctx, 0, sizeof(struct pingpong_context));
+ ctx->size = size;
+ ctx->tx_depth = user_parm->tx_depth;
+
+ ctx->qp = malloc(sizeof (ib_qp_handle_t));
+ if (!ctx->qp) {
+ perror("malloc");
+ return NULL;
+ }
+ ctx->qp_attr = malloc(sizeof (ib_qp_attr_t));
+ if (!ctx->qp_attr) {
+ perror("malloc");
+ return NULL;
+ }
+
+ ctx->buf = malloc( size * 2);
+ if (!ctx->buf) {
+ fprintf(stderr, "Couldn't allocate work buf.\n");
+ return NULL;
+ }
+
+ memset(ctx->buf, 0, size * 2);
+ ctx->post_buf = (char*)ctx->buf + (size - 1);
+ ctx->poll_buf = (char*)ctx->buf + (2 * size - 1);
+
+
+
+ /*
+ * Open the AL instance
+ */
+ ib_status = ib_open_al(&ctx->al);
+ if(ib_status != IB_SUCCESS)
+ {
+ fprintf(stderr,"ib_open_al failed status = %d\n", ib_status);
+ return NULL;
+ }
+
+ /*
+ * Get the Local CA Guids
+ */
+ ib_status = ib_get_ca_guids(ctx->al, NULL, &guid_count);
+ if(ib_status != IB_INSUFFICIENT_MEMORY)
+ {
+ fprintf(stderr,"ib_get_ca_guids1 failed status = %d\n", (uint32_t)ib_status);
+ return NULL;
+ }
+
+ /*
+ * If no CA's Present then return
+ */
+
+ if(guid_count == 0)
+ return NULL;
+
+
+ ca_guid_array = (ib_net64_t*)malloc(sizeof(ib_net64_t) * guid_count);
+
+ ib_status = ib_get_ca_guids(ctx->al, ca_guid_array, &guid_count);
+ if(ib_status != IB_SUCCESS)
+ {
+ fprintf(stderr,"ib_get_ca_guids2 failed with status = %d\n", ib_status);
+ return NULL;
+ }
+
+ /*
+ * Open only the first HCA
+ */
+ /* Open the CA */
+ ib_status = ib_open_ca(ctx->al ,ca_guid_array[0] ,NULL,
+ NULL, //ca_context
+ &ctx->ca);
+
+ if(ib_status != IB_SUCCESS)
+ {
+ fprintf(stderr,"ib_open_ca failed with status = %d\n", ib_status);
+ return NULL;
+ }
+
+ //xxx
+ //printf("ib_open_ca passed i=%d\n",i);
+ //xxx
+
+
+
+
+ {
+ /* Query the CA */
+ uint32_t bsize = 0;
+ ib_status = ib_query_ca(ctx->ca, NULL, &bsize);
+ if(ib_status != IB_INSUFFICIENT_MEMORY)
+ {
+ fprintf(stderr, "Failed to query device props");
+ return NULL;
+ }
+
+ ctx->ca_attr = (ib_ca_attr_t *)malloc(bsize);
+
+ ib_status = ib_query_ca(ctx->ca, ctx->ca_attr, &bsize);
+ if(ib_status != IB_SUCCESS)
+ {
+ printf("ib_query_ca failed with status = %d\n", ib_status);
+ return NULL;
+ }
+ if (user_parm->mtu == 0) {/*user did not ask for specific mtu */
+ if (ctx->ca_attr->dev_id == 23108) {
+ user_parm->mtu = 1024;
+ } else {
+ user_parm->mtu = 2048;
+ }
+ }
+ }
+
+
+ ib_status = ib_alloc_pd(ctx->ca ,
+ IB_PDT_NORMAL,
+ ctx, //pd_context
+ &ctx->pd);
+ if (ib_status != IB_SUCCESS) {
+ fprintf(stderr, "Couldn't allocate PD\n");
+ return NULL;
+ }
+
+
+ {
+ ib_mr_create_t mr_create;
+
+ mr_create.length = size * 2;
+
+ mr_create.vaddr = ctx->buf;
+ mr_create.access_ctrl = IB_AC_RDMA_WRITE| IB_AC_LOCAL_WRITE;
+
+ ib_status = ib_reg_mem(ctx->pd ,&mr_create ,&ctx->lkey ,&ctx->rkey ,&ctx->mr);
+ if (ib_status != IB_SUCCESS) {
+ fprintf(stderr, "Couldn't allocate MR\n");
+ return NULL;
+ }
+ }
+
+ {
+ ib_cq_create_t cq_create;
+
+ cq_create.size = user_parm->tx_depth;
+ cq_create.h_wait_obj = NULL;
+ cq_create.pfn_comp_cb = pp_cq_comp_cb;
+ ib_status = ib_create_cq(ctx->ca,&cq_create ,ctx, NULL, &ctx->scq);
+ if (ib_status != IB_SUCCESS) {
+ fprintf(stderr, "Couldn't create CQ\n");
+ return NULL;
+ }
+ }
+
+
+
+
+
+ {
+
+ ib_qp_create_t qp_create;
+ ib_qp_mod_t qp_modify;
+ ib_qp_attr_t qp_attr;
+
+ memset(&qp_create, 0, sizeof(ib_qp_create_t));
+ qp_create.h_sq_cq = ctx->scq;
+ qp_create.h_rq_cq = ctx->scq;
+ qp_create.sq_depth = user_parm->tx_depth;
+ qp_create.rq_depth = 1;
+ qp_create.sq_sge = 1;
+ qp_create.rq_sge = 1;
+ //TODO MAX_INLINE
+
+ switch (user_parm->connection_type) {
+ case RC :
+ qp_create.qp_type= IB_QPT_RELIABLE_CONN;
+ break;
+ case UC :
+ qp_create.qp_type = IB_QPT_UNRELIABLE_CONN;
+ break;
+ default:
+ fprintf(stderr, "Unknown connection type %d \n",user_parm->connection_type);
+ return NULL;
+ }
+
+ qp_create.sq_signaled = FALSE;
+ /*attr.sq_sig_all = 0;*/
+
+ ib_status = ib_create_qp(ctx->pd, &qp_create,NULL,NULL,&ctx->qp[0]);
+ if (ib_status != IB_SUCCESS){
+ fprintf(stderr, "Couldn't create QP\n");
+ return NULL;
+ }
+
+
+
+ memset(&qp_modify, 0, sizeof(ib_qp_mod_t));
+ qp_modify.req_state = IB_QPS_INIT;
+ qp_modify.state.init.pkey_index = 0 ;
+ qp_modify.state.init.primary_port = (uint8_t)port;
+ qp_modify.state.init.access_ctrl = IB_AC_RDMA_WRITE | IB_AC_LOCAL_WRITE;
+
+
+ ib_status = ib_modify_qp(ctx->qp[0], &qp_modify);
+ if (ib_status != IB_SUCCESS){
+ fprintf(stderr, "Failed to modify QP to INIT\n");
+ return NULL;
+ }
+
+
+ memset(&qp_attr, 0, sizeof(ib_qp_attr_t));
+ ib_status = ib_query_qp(ctx->qp[0], &ctx->qp_attr[0]);
+ if (ib_status != IB_SUCCESS){
+ fprintf(stderr, "Failed to modify QP to INIT\n");
+ return NULL;
+ }
+ fprintf(stderr, "max inline size %d\n",ctx->qp_attr[0].sq_max_inline);
+ }
+
+
+ ctx->wr.wr_id = PINGPONG_RDMA_WRID;
+ ctx->wr.ds_array = &ctx->list;
+ ctx->wr.num_ds = 1;
+ ctx->wr.wr_type = WR_RDMA_WRITE;
+ ctx->wr.p_next = NULL;
+
+ return ctx;
+}
+
+
+
+
+static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
+ struct pingpong_dest *dest, struct user_parameters *user_parm,int index)
+{
+
+ ib_api_status_t ib_status;
+ ib_qp_mod_t attr;
+ memset(&attr, 0, sizeof(ib_qp_mod_t));
+
+ attr.req_state = IB_QPS_RTR;
+ switch (user_parm->mtu) {
+ case 256 :
+ attr.state.rtr.primary_av.conn.path_mtu = IB_MTU_256;
+ break;
+ case 512 :
+ attr.state.rtr.primary_av.conn.path_mtu = IB_MTU_512;
+ break;
+ case 1024 :
+ attr.state.rtr.primary_av.conn.path_mtu = IB_MTU_1024;
+ break;
+ case 2048 :
+ attr.state.rtr.primary_av.conn.path_mtu = IB_MTU_2048;
+ break;
+ }
+ printf("Mtu : %d\n", user_parm->mtu);
+ attr.state.rtr.dest_qp = dest->qpn;;
+ attr.state.rtr.rq_psn = dest->psn;
+ if (user_parm->connection_type==RC) {
+ attr.state.rtr.resp_res = 1;
+ attr.state.rtr.rnr_nak_timeout = 12;
+ }
+ attr.state.rtr.primary_av.grh_valid = 0;
+ attr.state.rtr.primary_av.dlid = dest->lid;
+ attr.state.rtr.primary_av.sl = 0;
+ attr.state.rtr.primary_av.path_bits = 0;
+ attr.state.rtr.primary_av.port_num = (uint8_t)port;
+ attr.state.rtr.primary_av.static_rate = IB_PATH_RECORD_RATE_10_GBS;
+ attr.state.rtr.opts = IB_MOD_QP_LOCAL_ACK_TIMEOUT |
+ IB_MOD_QP_RESP_RES |
+ IB_MOD_QP_PRIMARY_AV;
+
+
+ ib_status = ib_modify_qp(ctx->qp[index], &attr);
+ if(ib_status != IB_SUCCESS){
+ fprintf(stderr, "Failed to modify QP to RTR\n");
+ return 1;
+ }
+
+ memset(&attr, 0, sizeof(ib_qp_mod_t));
+ attr.req_state = IB_QPS_RTS;
+ attr.state.rts.sq_psn = my_psn;
+
+ if (user_parm->connection_type == RC) {
+ attr.state.rts.resp_res = 1;
+ attr.state.rts.local_ack_timeout = 14;
+ attr.state.rts.retry_cnt = 7;
+ attr.state.rts.rnr_retry_cnt = 7;
+ attr.state.rts.opts = IB_MOD_QP_RNR_RETRY_CNT |
+ IB_MOD_QP_RETRY_CNT |
+ IB_MOD_QP_LOCAL_ACK_TIMEOUT;
+
+ }
+ ib_status = ib_modify_qp(ctx->qp[index], &attr);
+ if(ib_status != IB_SUCCESS){
+ fprintf(stderr, "Failed to modify QP to RTS\n");
+ return 1;
+ }
+
+ return 0;
+
+}
+
+
+static SOCKET pp_open_port(struct pingpong_context *ctx, const char * servername,
+ int ib_port, int port, struct pingpong_dest **p_rem_dest,struct user_parameters *user_parm)
+{
+ struct pingpong_dest *my_dest;
+ struct pingpong_dest *rem_dest;
+ SOCKET sockfd;
+ int rc;
+ int i;
+ int numofqps = 1;
+
+ /* Create connection between client and server.
+ * We do it by exchanging data over a TCP socket connection. */
+
+
+ my_dest = malloc( sizeof (struct pingpong_dest) * numofqps);
+ if (!my_dest){
+ perror("malloc");
+ return INVALID_SOCKET;
+ }
+
+ rem_dest = malloc(sizeof (struct pingpong_dest) * numofqps );
+ if (!rem_dest){
+ perror("malloc");
+ return INVALID_SOCKET;
+ }
+
+ sockfd = servername ? pp_client_connect(servername, port) :
+ pp_server_connect(port);
+
+ if (sockfd == INVALID_SOCKET) {
+ printf("pp_connect_sock(%s,%d) failed (%d)!\n",
+ servername, port, sockfd);
+ return INVALID_SOCKET;
+ }
+
+
+ for (i =0 ;i<numofqps;i ++)
+ {
+ /* Create connection between client and server.
+ * We do it by exchanging data over a TCP socket connection. */
+
+ my_dest[i].lid = ctx->ca_attr->p_port_attr[ib_port-1].lid;
+ my_dest[i].psn = rand() & 0xffffff;
+ if (!my_dest[i].lid) {
+ fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n");
+ return 1;
+ }
+ my_dest[i].qpn = ctx->qp_attr[i].num;
+ /* TBD this should be changed inot VA and different key to each qp */
+ my_dest[i].rkey = ctx->rkey;
+ my_dest[i].vaddr = (uintptr_t)ctx->buf + ctx->size;
+
+ printf(" local address: LID %#04x, QPN %#06x, PSN %#06x "
+ "RKey %#08x VAddr %#016Lx\n",
+ my_dest[i].lid, my_dest[i].qpn, my_dest[i].psn,
+ my_dest[i].rkey, my_dest[i].vaddr);
+
+ rc = servername ? pp_client_exch_dest(sockfd, &my_dest[i],&rem_dest[i]):
+ pp_server_exch_dest(sockfd, &my_dest[i],&rem_dest[i]);
+ if (rc)
+ return INVALID_SOCKET;
+ printf(" remote address: LID %#04x, QPN %#06x, PSN %#06x, "
+ "RKey %#08x VAddr %#016Lx\n",
+ rem_dest[i].lid, rem_dest[i].qpn, rem_dest[i].psn,
+ rem_dest[i].rkey, rem_dest[i].vaddr);
+
+ if (pp_connect_ctx(ctx, ib_port, my_dest[i].psn, &rem_dest[i], user_parm, i))
+ return INVALID_SOCKET;
+ /* An additional handshake is required *after* moving qp to RTR.
+ Arbitrarily reuse exch_dest for this purpose. */
+ rc = servername ? pp_client_exch_dest(sockfd, &my_dest[i],&rem_dest[i]):
+ pp_server_exch_dest(sockfd, &my_dest[i],&rem_dest[i]);
+ if (rc)
+ return INVALID_SOCKET;
+ }
+ *p_rem_dest = rem_dest;
+ return sockfd;
+}
+
+static void usage(const char *argv0)
+{
+ printf("Usage:\n");
+ printf(" %s start a server and wait for connection\n", argv0);
+ printf(" %s <host> connect to server at <host>\n", argv0);
+ printf("\n");
+ printf("Options:\n");
+ printf(" -p, --port=<port> listen on/connect to port <port> (default 18515)\n");
+ printf(" -c, --connection=<RC/UC> connection type RC/UC (default RC)\n");
+ printf(" -m, --mtu=<mtu> mtu size (default 1024)\n");
+ printf(" -d, --ib-dev=<dev> use IB device <dev> (default first device found)\n");
+ printf(" -i, --ib-port=<port> use port <port> of IB device (default 1)\n");
+ printf(" -s, --size=<size> size of message to exchange (default 1)\n");
+ printf(" -a, --all Run sizes from 2 till 2^23\n");
+ printf(" -t, --tx-depth=<dep> size of tx queue (default 50)\n");
+ printf(" -n, --iters=<iters> number of exchanges (at least 2, default 1000)\n");
+ printf(" -C, --report-cycles report times in cpu cycle units (default microseconds)\n");
+ printf(" -H, --report-histogram print out all results (default print summary only)\n");
+ printf(" -U, --report-unsorted (implies -H) print out unsorted results (default sorted)\n");
+ printf(" -V, --version display version number\n");
+}
+
+/*
+ * When there is an
+ * odd number of samples, the median is the middle number.
+ * even number of samples, the median is the mean of the
+ * two middle numbers.
+ *
+ */
+static inline cycles_t get_median(int n, cycles_t delta[])
+{
+ if (n % 2)
+ return(delta[n / 2] + delta[n / 2 - 1]) / 2;
+ else
+ return delta[n / 2];
+}
+
+
+static void print_report(struct report_options * options,
+ unsigned int iters, cycles_t *tstamp, int size)
+{
+ double cycles_to_units;
+ cycles_t median;
+ unsigned int i;
+ const char* units;
+ cycles_t *delta = malloc(iters * sizeof *delta);
+
+ if (!delta) {
+ perror("malloc");
+ return;
+ }
+
+ for (i = 0; i < iters - 1; ++i)
+ delta[i] = tstamp[i + 1] - tstamp[i];
+
+
+ if (options->cycles) {
+ cycles_to_units = 1;
+ units = "cycles";
+ } else {
+ cycles_to_units = get_cpu_mhz()/1000000;
+ units = "usec";
+ }
+
+ if (options->unsorted) {
+ printf("#, %s\n", units);
+ for (i = 0; i < iters - 1; ++i)
+ printf("%d, %g\n", i + 1, delta[i] / cycles_to_units / 2);
+ }
+
+ qsort(delta, iters - 1, sizeof *delta, cycles_compare);
+
+ if (options->histogram) {
+ printf("#, %s\n", units);
+ for (i = 0; i < iters - 1; ++i)
+ printf("%d, %g\n", i + 1, delta[i] / cycles_to_units / 2);
+ }
+
+ median = get_median(iters - 1, delta);
+ printf("%7d %d %7.2f %7.2f %7.2f\n",
+ size,iters,delta[0] / cycles_to_units / 2,
+ delta[iters - 2] / cycles_to_units / 2,median / cycles_to_units / 2);
+
+ free(delta);
+}
+
+
+
+int run_iter(struct pingpong_context *ctx, struct user_parameters *user_param,
+ struct pingpong_dest *rem_dest, int size)
+{
+ ib_api_status_t ib_status;
+ ib_qp_handle_t qp;
+ ib_send_wr_t *bad_wr;
+ volatile char *poll_buf;
+ volatile char *post_buf;
+
+ int scnt, ccnt, rcnt;
+ int iters;
+ int tx_depth;
+
+ iters = user_param->iters;
+ tx_depth = user_param->tx_depth;
+
+
+ ctx->list.vaddr = (uintptr_t) ctx->buf ;
+ ctx->list.length = size;
+ ctx->list.lkey = ctx->lkey;
+ ctx->wr.remote_ops.vaddr = rem_dest->vaddr;
+ ctx->wr.remote_ops.rkey = rem_dest->rkey;
+
+ if ((uint32_t)size > ctx->qp_attr[0].sq_max_inline) {/* complaince to perf_main */
+ ctx->wr.send_opt = IB_SEND_OPT_SIGNALED;
+ } else {
+ ctx->wr.send_opt = IB_SEND_OPT_SIGNALED | IB_SEND_OPT_INLINE;
+ }
+ scnt = 0;
+ rcnt = 0;
+ ccnt = 0;
+
+ if(user_param->all == ALL) {
+ post_buf = (char*)ctx->buf + size - 1;
+ poll_buf = (char*)ctx->buf + 8388608 + size - 1;
+ } else {
+ poll_buf = ctx->poll_buf;
+ post_buf = ctx->post_buf;
+ }
+ qp = ctx->qp[0];
+
+ /* Done with setup. Start the test. */
+ while (scnt < iters || ccnt < iters || rcnt < iters) {
+
+ /* Wait till buffer changes. */
+ if (rcnt < user_param->iters && !(scnt < 1 && user_param->servername)) {
+ ++rcnt;
+ while (*poll_buf != (char)rcnt)
+ ;
+ /* Here the data is already in the physical memory.
+ If we wanted to actually use it, we may need
+ a read memory barrier here. */
+ }
+
+ if (scnt < user_param->iters) {
+
+ tstamp[scnt] = get_cycles();
+ *post_buf = (char)++scnt;
+
+ ib_status = ib_post_send(qp, &ctx->wr, &bad_wr);
+ if (ib_status != IB_SUCCESS)
+ {
+ fprintf(stderr, "Couldn't post send:scnt %d ccnt=%d \n",scnt,ccnt);
+ return 1;
+ }
+ }
+
+ if (ccnt < user_param->iters) {
+ ib_wc_t wc;
+ ib_wc_t *p_wc_done,*p_wc_free;
+
+ p_wc_free = &wc;
+ p_wc_done = NULL;
+ p_wc_free->p_next = NULL;
+
+ do{
+ ib_status = ib_poll_cq(ctx->scq, &p_wc_free, &p_wc_done);
+ } while (ib_status == IB_NOT_FOUND);
+
+ if (ib_status != IB_SUCCESS) {
+ fprintf(stderr, "Poll Send CQ failed %d\n", ib_status);
+ return 12;
+ }
+
+ if (p_wc_done->status != IB_WCS_SUCCESS) {
+ fprintf(stderr, "Completion wth error at %s:\n",
+ user_param->servername ? "client" : "server");
+ fprintf(stderr, "Failed status %d: wr_id %d syndrom 0x%x\n",
+ p_wc_done->status, (int) p_wc_done->wr_id, p_wc_done->vendor_specific);
+ return 1;
+ }
+
+ ++ccnt;
+ }
+//printf("ccnt = %d \n",ccnt);
+ }
+ return(0);
+}
+
+
+
+
+
+
+int __cdecl main(int argc, char *argv[])
+{
+
+
+
+
+ struct pingpong_context *ctx;
+ struct pingpong_dest *rem_dest;
+ struct user_parameters user_param;
+ char *ib_devname = NULL;
+ int port = 18515;
+ int ib_port = 1;
+ unsigned size = 65536;
+ SOCKET sockfd = INVALID_SOCKET;
+ WSADATA wsaData;
+ int i = 0;
+ int iResult;
+ struct report_options report = {0};
+
+
+ /* init default values to user's parameters */
+ memset(&user_param, 0, sizeof(struct user_parameters));
+ user_param.mtu = 0; /* signal choose default by device */
+ user_param.iters = 1000;
+ user_param.tx_depth = 50;
+ user_param.servername = NULL;
+ /* Parameter parsing. */
+ while (1) {
+ int c;
+
+ static struct option long_options[] = {
+ { "port", 1, NULL, 'p' },
+ { "connection", 1, NULL, 'c' },
+ { "mtu", 1, NULL, 'm' },
+ { "ib-dev", 1, NULL, 'd' },
+ { "ib-port", 1, NULL, 'i' },
+ { "size", 1, NULL, 's' },
+ { "iters", 1, NULL, 'n' },
+ { "tx-depth", 1, NULL, 't' },
+ { "all", 0, NULL, 'a' },
+ { "report-cycles", 0, NULL, 'C' },
+ { "report-histogram", 0, NULL, 'H' },
+ { "report-unsorted", 0, NULL, 'U' },
+ { "version", 0, NULL, 'V' },
+ { 0 }
+ };
+
+ c = getopt_long(argc, argv, "p:c:m:d:i:s:n:t:aCHUV", long_options, NULL);
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'p':
+ port = strtol(optarg, NULL, 0);
+ if (port < 0 || port > 65535) {
+ usage(argv[0]);
+ return 1;
+ }
+ break;
+ case 'c':
+ if (strcmp("UC",optarg)==0)
+ user_param.connection_type=1;
+ /* default is 0 for any other option RC*/
+ break;
+
+ case 'm':
+ user_param.mtu = strtol(optarg, NULL, 0);
+ break;
+ case 'a':
+ user_param.all = ALL;
+ break;
+ case 'V':
+ printf("perftest version : %.2f\n",VERSION);
+ return 0;
+ break;
+ case 'd':
+ ib_devname = _strdup(optarg);
+ break;
+
+ case 'i':
+ ib_port = strtol(optarg, NULL, 0);
+ if (ib_port < 0) {
+ usage(argv[0]);
+ return 2;
+ }
+ break;
+
+ case 's':
+ size = strtol(optarg, NULL, 0);
+ if (size < 1) {
+ usage(argv[0]); return 3;
+ }
+ break;
+
+ case 't':
+ user_param.tx_depth = strtol(optarg, NULL, 0);
+ if (user_param.tx_depth < 1) {
+ usage(argv[0]); return 4;
+ }
+ break;
+
+ case 'n':
+ user_param.iters = strtol(optarg, NULL, 0);
+ if (user_param.iters < 2) {
+ usage(argv[0]);
+ return 5;
+ }
+
+ break;
+
+ case 'C':
+ report.cycles = 1;
+ break;
+
+ case 'H':
+ report.histogram = 1;
+ break;
+
+ case 'U':
+ report.unsorted = 1;
+ break;
+
+ default:
+ usage(argv[0]);
+ return 5;
+ }
+ }
+
+ if (optind == argc - 1)
+ user_param.servername = _strdup(argv[optind]);
+ else if (optind < argc) {
+ usage(argv[0]);
+ return 6;
+ }
+
+ /*
+ * Done with parameter parsing. Perform setup.
+ */
+
+ tstamp = malloc(user_param.iters * sizeof *tstamp);
+ if (!tstamp) {
+ perror("malloc");
+ return 10;
+ }
+ printf("------------------------------------------------------------------\n");
+ printf(" RDMA_Write Latency Test\n");
+ printf("Inline data is used up to 400 bytes message\n");
+ if (user_param.connection_type==0) {
+ printf("Connection type : RC\n");
+ } else {
+ printf("Connection type : UC\n");
+ }
+
+ /* Done with parameter parsing. Perform setup. */
+
+ // Initialize Winsock
+ iResult = WSAStartup(MAKEWORD(2,2), &wsaData);
+ if (iResult != NO_ERROR) {
+ printf("Error at WSAStartup()\n");
+ return 1;
+ }
+
+
+ if (user_param.all == ALL) {
+ /*since we run all sizes lets allocate big enough buffer */
+ size = 8388608; /*2^23 */
+ }
+ srand(GetCurrentProcessId() * GetTickCount());
+
+ //TODO: get pagesize from sysinfo
+ page_size = 4096;
+
+ //TODO get the device names
+
+
+ ctx = pp_init_ctx( size, ib_port,&user_param);
+ if (!ctx)
+ return 8;
+ sockfd = pp_open_port(ctx, user_param.servername, ib_port, port, &rem_dest,&user_param);
+ if (sockfd == INVALID_SOCKET)
+ return 9;
+ printf("------------------------------------------------------------------\n");
+ printf(" #bytes #iterations t_min[usec] t_max[usec] t_typical[usec]\n");
+
+ if (user_param.all == ALL) {
+ for (i = 1; i < 24 ; ++i) {
+ size = 1 << i;
+ if(run_iter(ctx, &user_param, rem_dest, size))
+ return 17;
+ print_report(&report, user_param.iters, tstamp, size);
+ }
+ } else {
+ if(run_iter(ctx, &user_param, rem_dest, size))
+ return 18;
+ print_report(&report, user_param.iters, tstamp, size);
+ }
+ send(sockfd, "done", sizeof "done",0);
+ closesocket(sockfd);
+
+
+ printf("------------------------------------------------------------------\n");
+ free(tstamp);
+ return 0;
+}
--- /dev/null
+/*\r
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.\r
+ *\r
+ * This software is available to you under the OpenIB.org BSD license\r
+ * below:\r
+ *\r
+ * Redistribution and use in source and binary forms, with or\r
+ * without modification, are permitted provided that the following\r
+ * conditions are met:\r
+ *\r
+ * - Redistributions of source code must retain the above\r
+ * copyright notice, this list of conditions and the following\r
+ * disclaimer.\r
+ *\r
+ * - Redistributions in binary form must reproduce the above\r
+ * copyright notice, this list of conditions and the following\r
+ * disclaimer in the documentation and/or other materials\r
+ * provided with the distribution.\r
+ *\r
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\r
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
+ * SOFTWARE.\r
+ *\r
+ * $Id: vstat.rc 636 2005-10-19 17:46:55Z sleybo $\r
+ */\r
+\r
+\r
+#include <oib_ver.h>\r
+\r
+#define VER_FILETYPE VFT_APP\r
+#define VER_FILESUBTYPE VFT2_UNKNOWN\r
+\r
+#ifdef _DEBUG_\r
+#define VER_FILEDESCRIPTION_STR "RDMA write Latency Test (Debug)"\r
+#else\r
+#define VER_FILEDESCRIPTION_STR "RDMA write Latency Test "\r
+#endif\r
+\r
+#define VER_INTERNALNAME_STR "ib_write_lat.exe"\r
+#define VER_ORIGINALFILENAME_STR "ib_write_lat.exe"\r
+\r
+#include <common.ver>\r