]> git.openfabrics.org - ~shefty/libibverbs.git/commitdiff
Commit libibverbs code from roland-uverbs branch back onto trunk
authorRoland Dreier <roland@topspin.com>
Thu, 7 Apr 2005 21:26:32 +0000 (21:26 +0000)
committerRoland Dreier <rolandd@cisco.com>
Thu, 9 Nov 2006 19:35:56 +0000 (11:35 -0800)
15 files changed:
Makefile.am
examples/device_list.c
examples/pingpong.c
examples/ud-pingpong.c [new file with mode: 0644]
include/infiniband/arch.h [new file with mode: 0644]
include/infiniband/driver.h
include/infiniband/kern-abi.h [new file with mode: 0644]
include/infiniband/opcode.h [new file with mode: 0644]
include/infiniband/verbs.h
src/cmd.c [new file with mode: 0644]
src/device.c
src/ibverbs.h
src/init.c
src/libibverbs.map
src/verbs.c [new file with mode: 0644]

index 01a3f6ebfa019d29dbd494f0d5850e60f9497f32..5b3fb8c7e6614cf70055232c79e65cffddea0762 100644 (file)
@@ -4,7 +4,9 @@ INCLUDES = -I$(srcdir)/include
 
 lib_LTLIBRARIES = src/libibverbs.la
 
-src_libibverbs_la_CFLAGS = -Wall -D_GNU_SOURCE -DDRIVER_PATH=\"$(libdir)/infiniband\"
+AM_CFLAGS = -g -Wall -D_GNU_SOURCE
+
+src_libibverbs_la_CFLAGS = -g -Wall -D_GNU_SOURCE -DDRIVER_PATH=\"$(libdir)/infiniband\"
 
 if HAVE_LD_VERSION_SCRIPT
     libibverbs_version_script = -Wl,--version-script=$(srcdir)/src/libibverbs.map
@@ -12,24 +14,31 @@ else
     libibverbs_version_script =
 endif
 
-src_libibverbs_la_SOURCES = src/init.c src/device.c src/memory.c
+src_libibverbs_la_SOURCES = src/cmd.c src/device.c src/init.c src/memory.c src/verbs.c
 src_libibverbs_la_LDFLAGS = -version-info 1 -export-dynamic \
     $(libibverbs_version_script)
 src_libibverbs_la_DEPENDENCIES = $(srcdir)/src/libibverbs.map
 
-bin_PROGRAMS = examples/ib_devices examples/asyncwatch examples/pingpong
-examples_ib_devices_SOURCES = examples/device_list.c
-examples_ib_devices_LDADD = $(top_builddir)/src/libibverbs.la
-examples_pingpong_SOURCES = examples/pingpong.c
-examples_pingpong_LDADD = $(top_builddir)/src/libibverbs.la
-examples_asyncwatch_SOURCES = examples/asyncwatch.c
-examples_asyncwatch_LDADD = $(top_builddir)/src/libibverbs.la
+bin_PROGRAMS = examples/ibv_devices examples/ibv_asyncwatch \
+    examples/ibv_pingpong examples/ibv_ud_pingpong
+examples_ibv_devices_SOURCES = examples/device_list.c
+examples_ibv_devices_LDADD = $(top_builddir)/src/libibverbs.la
+examples_ibv_pingpong_SOURCES = examples/pingpong.c
+examples_ibv_pingpong_LDADD = $(top_builddir)/src/libibverbs.la
+examples_ibv_ud_pingpong_SOURCES = examples/ud-pingpong.c
+examples_ibv_ud_pingpong_LDADD = $(top_builddir)/src/libibverbs.la
+examples_ibv_asyncwatch_SOURCES = examples/asyncwatch.c
+examples_ibv_asyncwatch_LDADD = $(top_builddir)/src/libibverbs.la
 
 libibverbsincludedir = $(includedir)/infiniband
 
-libibverbsinclude_HEADERS = include/infiniband/verbs.h include/infiniband/driver.h
+libibverbsinclude_HEADERS = include/infiniband/arch.h include/infiniband/driver.h \
+    include/infiniband/kern-abi.h include/infiniband/opcode.h include/infiniband/verbs.h
 
-EXTRA_DIST = include/infiniband/verbs.h include/infiniband/driver.h \
+EXTRA_DIST = include/infiniband/driver.h include/infiniband/kern-abi.h \
+    include/infiniband/opcode.h include/infiniband/verbs.h \
+    src/ibverbs.h \
+    src/libibverbs.map \
     libibverbs.spec.in
 
 dist-hook: libibverbs.spec
index a5395d53af02ac087720e73ec9dbe54d4cf7b60d..414612ec91f9ab627d9d1b8bc2b77daec9a62c6d 100644 (file)
@@ -36,6 +36,8 @@
 #  include <config.h>
 #endif /* HAVE_CONFIG_H */
 
+#include <stdio.h>
+
 #include <endian.h>
 #include <byteswap.h>
 
@@ -60,5 +62,7 @@ int main(int argc, char *argv[])
        dlist_for_each_data(dev_list, ib_dev, struct ibv_device)
                printf("    %-16s\t%016llx\n",
                       ibv_get_device_name(ib_dev),
-                      be64_to_cpu(ibv_get_device_guid(ib_dev)));
+                      (unsigned long long) be64_to_cpu(ibv_get_device_guid(ib_dev)));
+
+       return 0;
 }
index c8e65024981c915ba9bfda4f88b390e140ccc1e5..9d3adcb8bdd6287d124b5c864081ff0a340ae199 100644 (file)
@@ -29,7 +29,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: device_list.c 1393 2004-12-28 02:15:24Z roland $
+ * $Id$
  */
 
 #if HAVE_CONFIG_H
 #endif /* HAVE_CONFIG_H */
 
 #include <stdio.h>
-#include <endian.h>
-#include <byteswap.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <netdb.h>
+#include <malloc.h>
+#include <getopt.h>
+#include <arpa/inet.h>
+#include <time.h>
+
+#include <sysfs/libsysfs.h>
 
 #include <infiniband/verbs.h>
 
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-static inline uint64_t be64_to_cpu(uint64_t x) { return bswap_64(x); }
-#elif __BYTE_ORDER == __BIG_ENDIAN
-static inline uint64_t be64_to_cpu(uint64_t x) { return x; }
-#endif
+enum {
+       PINGPONG_RECV_WRID = 1,
+       PINGPONG_SEND_WRID = 2,
+};
+
+static int page_size;
+
+struct pingpong_context {
+       struct ibv_context *context;
+       struct ibv_pd      *pd;
+       struct ibv_mr      *mr;
+       struct ibv_cq      *cq;
+       struct ibv_qp      *qp;
+       void               *buf;
+       int                 size;
+       int                 rx_depth;
+};
+
+struct pingpong_dest {
+       int lid;
+       int qpn;
+       int psn;
+};
+
+static uint16_t pp_get_local_lid(struct pingpong_context *ctx, int port)
+{
+       struct ibv_port_attr attr;
+
+       if (ibv_query_port(ctx->context, port, &attr))
+               return 0;
+
+       return attr.lid;
+}
+
+static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port,
+                                                const struct pingpong_dest *my_dest)
+{
+       struct addrinfo *res, *t;
+       struct addrinfo hints = {
+               .ai_family   = AF_UNSPEC,
+               .ai_socktype = SOCK_STREAM
+       };
+       char *service;
+       char msg[sizeof "0000:000000:000000"];
+       int n;
+       int sockfd = -1;
+       struct pingpong_dest *rem_dest = NULL;
+
+       asprintf(&service, "%d", port);
+       n = getaddrinfo(servername, service, &hints, &res);
+
+       if (n < 0) {
+               fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port);
+               return NULL;
+       }
+
+       for (t = res; t; t = t->ai_next) {
+               sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
+               if (sockfd >= 0) {
+                       if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
+                               break;
+                       close(sockfd);
+                       sockfd = -1;
+               }
+       }
+
+       freeaddrinfo(res);
+
+       if (sockfd < 0) {
+               fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
+               return NULL;
+       }
+
+       sprintf(msg, "%04x:%06x:%06x", my_dest->lid, my_dest->qpn, my_dest->psn);
+       if (write(sockfd, msg, sizeof msg) != sizeof msg) {
+               fprintf(stderr, "Couldn't send local address\n");
+               goto out;
+       }
+
+       if (read(sockfd, msg, sizeof msg) != sizeof msg) {
+               perror("client read");
+               fprintf(stderr, "Couldn't read remote address\n");
+               goto out;
+       }
+
+       write(sockfd, "done", sizeof "done");
+
+       rem_dest = malloc(sizeof *rem_dest);
+       if (!rem_dest)
+               goto out;
+
+       sscanf(msg, "%x:%x:%x", &rem_dest->lid, &rem_dest->qpn, &rem_dest->psn);
+
+out:
+       close(sockfd);
+       return rem_dest;
+}
+
+static struct pingpong_dest *pp_server_exch_dest(int port, const struct pingpong_dest *my_dest)
+{
+       struct addrinfo *res, *t;
+       struct addrinfo hints = {
+               .ai_flags    = AI_PASSIVE,
+               .ai_family   = AF_UNSPEC,
+               .ai_socktype = SOCK_STREAM
+       };
+       char *service;
+       char msg[sizeof "0000:000000:000000"];
+       int n;
+       int sockfd = -1, connfd;
+       struct pingpong_dest *rem_dest = NULL;
+
+       asprintf(&service, "%d", port);
+       n = getaddrinfo(NULL, service, &hints, &res);
+
+       if (n < 0) {
+               fprintf(stderr, "%s for port %d\n", gai_strerror(n), port);
+               return NULL;
+       }
+
+       for (t = res; t; t = t->ai_next) {
+               sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
+               if (sockfd >= 0) {
+                       n = 1;
+
+                       setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
+
+                       if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
+                               break;
+                       close(sockfd);
+                       sockfd = -1;
+               }
+       }
+
+       freeaddrinfo(res);
+
+       if (sockfd < 0) {
+               fprintf(stderr, "Couldn't listen to port %d\n", port);
+               return NULL;
+       }
+
+       listen(sockfd, 1);
+       connfd = accept(sockfd, NULL, 0);
+       close(sockfd);
+       if (connfd < 0) {
+               fprintf(stderr, "accept() failed\n");
+               return NULL;
+       }
+
+       n = read(connfd, msg, sizeof msg);
+       if (n != sizeof msg) {
+               perror("server read");
+               fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg);
+               goto out;
+       }
+
+       rem_dest = malloc(sizeof *rem_dest);
+       if (!rem_dest)
+               goto out;
+
+       sscanf(msg, "%x:%x:%x", &rem_dest->lid, &rem_dest->qpn, &rem_dest->psn);
+
+       sprintf(msg, "%04x:%06x:%06x", my_dest->lid, my_dest->qpn, my_dest->psn);
+       if (write(connfd, msg, sizeof msg) != sizeof msg) {
+               fprintf(stderr, "Couldn't send local address\n");
+               free(rem_dest);
+               rem_dest = NULL;
+               goto out;
+       }
+
+       read(connfd, msg, sizeof msg);
+
+out:
+       close(connfd);
+       return rem_dest;
+}
+
+static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
+                                           int rx_depth, int port)
+{
+       struct pingpong_context *ctx;
+
+       ctx = malloc(sizeof *ctx);
+       if (!ctx)
+               return NULL;
+
+       ctx->size     = size;
+       ctx->rx_depth = rx_depth;
+
+       ctx->buf = memalign(page_size, size);
+       if (!ctx->buf) {
+               fprintf(stderr, "Couldn't allocate work buf.\n");
+               return NULL;
+       }
+
+       memset(ctx->buf, 0, size);
+
+       ctx->context = ibv_open_device(ib_dev);
+       if (!ctx->context) {
+               fprintf(stderr, "Couldn't get context for %s\n",
+                       ibv_get_device_name(ib_dev));
+               return NULL;
+       }
+
+       ctx->pd = ibv_alloc_pd(ctx->context);
+       if (!ctx->pd) {
+               fprintf(stderr, "Couldn't allocate PD\n");
+               return NULL;
+       }
+
+       ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, IBV_ACCESS_LOCAL_WRITE);
+       if (!ctx->mr) {
+               fprintf(stderr, "Couldn't allocate MR\n");
+               return NULL;
+       }
+
+       ctx->cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL);
+       if (!ctx->cq) {
+               fprintf(stderr, "Couldn't create CQ\n");
+               return NULL;
+       }
+
+       {
+               struct ibv_qp_init_attr attr = {
+                       .send_cq = ctx->cq,
+                       .recv_cq = ctx->cq,
+                       .cap     = {
+                               .max_send_wr  = 1,
+                               .max_recv_wr  = rx_depth,
+                               .max_send_sge = 1,
+                               .max_recv_sge = 1
+                       },
+                       .qp_type = IBV_QPT_RC
+               };
+
+               ctx->qp = ibv_create_qp(ctx->pd, &attr);
+               if (!ctx->qp)  {
+                       fprintf(stderr, "Couldn't create QP\n");
+                       return NULL;
+               }
+       }
+
+       {
+               struct ibv_qp_attr attr;
+
+               attr.qp_state        = IBV_QPS_INIT;
+               attr.pkey_index      = 0;
+               attr.port_num        = port;
+               attr.qp_access_flags = 0;
+
+               if (ibv_modify_qp(ctx->qp, &attr,
+                                 IBV_QP_STATE              |
+                                 IBV_QP_PKEY_INDEX         |
+                                 IBV_QP_PORT               |
+                                 IBV_QP_ACCESS_FLAGS)) {
+                       fprintf(stderr, "Failed to modify QP to INIT\n");
+                       return NULL;
+               }
+       }
+
+       return ctx;
+}
+
+static int pp_post_recv(struct pingpong_context *ctx, int n)
+{
+       struct ibv_sge list = {
+               .addr   = (uintptr_t) ctx->buf,
+               .length = ctx->size,
+               .lkey   = ctx->mr->lkey
+       };
+       struct ibv_recv_wr wr = {
+               .wr_id      = PINGPONG_RECV_WRID,
+               .sg_list    = &list,
+               .num_sge    = 1,
+       };
+       struct ibv_recv_wr *bad_wr;
+       int i;
+
+       for (i = 0; i < n; ++i)
+               if (ibv_post_recv(ctx->qp, &wr, &bad_wr))
+                       break;
+
+       return i;
+}
+
+static int pp_post_send(struct pingpong_context *ctx)
+{
+       struct ibv_sge list = {
+               .addr   = (uintptr_t) ctx->buf,
+               .length = ctx->size,
+               .lkey   = ctx->mr->lkey
+       };
+       struct ibv_send_wr wr = {
+               .wr_id      = PINGPONG_SEND_WRID,
+               .sg_list    = &list,
+               .num_sge    = 1,
+               .opcode     = IBV_WR_SEND,
+               .send_flags = IBV_SEND_SIGNALED,
+       };
+       struct ibv_send_wr *bad_wr;
+
+       return ibv_post_send(ctx->qp, &wr, &bad_wr);
+}
+
+static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
+                         struct pingpong_dest *dest)
+{
+       struct ibv_qp_attr attr;
+
+       attr.qp_state           = IBV_QPS_RTR;
+       attr.path_mtu           = IBV_MTU_1024;
+       attr.dest_qp_num        = dest->qpn;
+       attr.rq_psn             = dest->psn;
+       attr.max_dest_rd_atomic = 1;
+       attr.min_rnr_timer      = 12;
+       attr.ah_attr.is_global  = 0;
+       attr.ah_attr.dlid       = dest->lid;
+       attr.ah_attr.sl         = 0;
+       attr.ah_attr.src_path_bits = 0;
+       attr.ah_attr.port_num   = port;
+       if (ibv_modify_qp(ctx->qp, &attr,
+                         IBV_QP_STATE              |
+                         IBV_QP_AV                 |
+                         IBV_QP_PATH_MTU           |
+                         IBV_QP_DEST_QPN           |
+                         IBV_QP_RQ_PSN             |
+                         IBV_QP_MAX_DEST_RD_ATOMIC |
+                         IBV_QP_MIN_RNR_TIMER)) {
+               fprintf(stderr, "Failed to modify QP to RTR\n");
+               return 1;
+       }
+
+       attr.qp_state       = IBV_QPS_RTS;
+       attr.timeout        = 14;
+       attr.retry_cnt      = 7;
+       attr.rnr_retry      = 7;
+       attr.sq_psn         = my_psn;
+       attr.max_rd_atomic  = 1;
+       if (ibv_modify_qp(ctx->qp, &attr,
+                         IBV_QP_STATE              |
+                         IBV_QP_TIMEOUT            |
+                         IBV_QP_RETRY_CNT          |
+                         IBV_QP_RNR_RETRY          |
+                         IBV_QP_SQ_PSN             |
+                         IBV_QP_MAX_QP_RD_ATOMIC)) {
+               fprintf(stderr, "Failed to modify QP to RTS\n");
+               return 1;
+       }
+
+       return 0;
+}
+
+static void usage(const char *argv0)
+{
+       printf("Usage:\n");
+       printf("  %s            start a server and wait for connection\n", argv0);
+       printf("  %s <host>     connect to server at <host>\n", argv0);
+       printf("\n");
+       printf("Options:\n");
+       printf("  -p, --port=<port>      listen on/connect to port <port> (default 18515)\n");
+       printf("  -d, --ib-dev=<dev>     use IB device <dev> (default first device found)\n");
+       printf("  -i, --ib-port=<port>   use port <port> of IB device (default 1)\n");
+       printf("  -s, --size=<size>      size of message to exchange (default 4096)\n");
+       printf("  -r, --rx-depth=<dep>   number of receives to post at a time (default 500)\n");
+       printf("  -n, --iters=<iters>    number of exchanges (default 1000)\n");
+       printf("  -e, --events           sleep on CQ events (default poll)\n");
+}
 
 int main(int argc, char *argv[])
 {
-       struct dlist *dev_list;
-       struct ibv_device *ib_dev;
-       struct ibv_context *context;
+       struct dlist            *dev_list;
+       struct ibv_device       *ib_dev;
+       struct pingpong_context *ctx;
+       struct pingpong_dest     my_dest;
+       struct pingpong_dest    *rem_dest;
+       struct timeval           start, end;
+       char                    *ib_devname = NULL;
+       char                    *servername = NULL;
+       int                      port = 18515;
+       int                      ib_port = 1;
+       int                      size = 4096;
+       int                      rx_depth = 500;
+       int                      iters = 1000;
+       int                      use_event = 0;
+       int                      routs;
+       int                      rcnt, scnt;
+
+       srand48(getpid() * time(NULL));
+
+       while (1) {
+               int c;
+
+               static struct option long_options[] = {
+                       { .name = "port",    .has_arg = 1, .val = 'p' },
+                       { .name = "ib-dev",  .has_arg = 1, .val = 'd' },
+                       { .name = "ib-port", .has_arg = 1, .val = 'i' },
+                       { .name = "size",    .has_arg = 1, .val = 's' },
+                       { .name = "iters",   .has_arg = 1, .val = 'n' },
+                       { .name = "events",  .has_arg = 0, .val = 'e' },
+                       { 0 }
+               };
+
+               c = getopt_long(argc, argv, "p:d:i:s:r:n:e", long_options, NULL);
+               if (c == -1)
+                       break;
+
+               switch (c) {
+               case 'p':
+                       port = strtol(optarg, NULL, 0);
+                       if (port < 0 || port > 65535) {
+                               usage(argv[0]);
+                               return 1;
+                       }
+                       break;
+
+               case 'd':
+                       ib_devname = strdupa(optarg);
+                       break;
+
+               case 'i':
+                       ib_port = strtol(optarg, NULL, 0);
+                       if (port < 0) {
+                               usage(argv[0]);
+                               return 1;
+                       }
+                       break;
+
+               case 's':
+                       size = strtol(optarg, NULL, 0);
+                       break;
+
+               case 'r':
+                       rx_depth = strtol(optarg, NULL, 0);
+                       break;
+
+               case 'n':
+                       iters = strtol(optarg, NULL, 0);
+                       break;
+
+               case 'e':
+                       ++use_event;
+                       break;
+
+               default:
+                       usage(argv[0]);
+                       return 1;
+               }
+       }
+
+       if (optind == argc - 1)
+               servername = strdupa(argv[optind]);
+       else if (optind < argc) {
+               usage(argv[0]);
+               return 1;
+       }
+
+       page_size = sysconf(_SC_PAGESIZE);
 
        dev_list = ibv_get_devices();
 
        dlist_start(dev_list);
-       ib_dev = dlist_next(dev_list);
+       if (!ib_devname) {
+               ib_dev = dlist_next(dev_list);
+               if (!ib_dev) {
+                       fprintf(stderr, "No IB devices found\n");
+                       return 1;
+               }
+       } else {
+               dlist_for_each_data(dev_list, ib_dev, struct ibv_device)
+                       if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
+                               break;
+               if (!ib_dev) {
+                       fprintf(stderr, "IB device %s not found\n", ib_devname);
+                       return 1;
+               }
+       }
+
+       ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port);
+       if (!ctx)
+               return 1;
 
-       if (!ib_dev) {
-               fprintf(stderr, "No IB devices found\n");
+       routs = pp_post_recv(ctx, ctx->rx_depth);
+       if (routs < ctx->rx_depth) {
+               fprintf(stderr, "Couldn't post receive (%d)\n", routs);
                return 1;
        }
 
-       context = ibv_open_device(ib_dev);
-       if (!context) {
-               fprintf(stderr, "Couldn't get context for %s\n",
-                       ibv_get_device_name(ib_dev));
+       my_dest.lid = pp_get_local_lid(ctx, ib_port);
+       my_dest.qpn = ctx->qp->qp_num;
+       my_dest.psn = lrand48() & 0xffffff;
+       if (!my_dest.lid) {
+               fprintf(stderr, "Couldn't get local LID\n");
+               return 1;
+       }
+
+       printf("  local address:  LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
+              my_dest.lid, my_dest.qpn, my_dest.psn);
+
+       if (servername)
+               rem_dest = pp_client_exch_dest(servername, port, &my_dest);
+       else
+               rem_dest = pp_server_exch_dest(port, &my_dest);
+
+       if (!rem_dest)
+               return 1;
+
+       printf("  remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
+              rem_dest->lid, rem_dest->qpn, rem_dest->psn);
+
+       if (pp_connect_ctx(ctx, ib_port, my_dest.psn, rem_dest))
                return 1;
+
+       if (use_event)
+               if (ibv_req_notify_cq(ctx->cq, 0)) {
+                       fprintf(stderr, "Couldn't request CQ notification\n");
+                       return 1;
+               }
+
+       if (servername)
+               if (pp_post_send(ctx)) {
+                       fprintf(stderr, "Couldn't post send\n");
+                       return 1;
+               }
+
+       if (gettimeofday(&start, NULL)) {
+               perror("gettimeofday");
+               return 1;
+       }
+
+       rcnt = scnt = 0;
+       while (rcnt < iters || scnt < iters) {
+               if (use_event) {
+                       struct ibv_cq *ev_cq;
+                       void          *ev_ctx;
+
+                       if (ibv_get_cq_event(ctx->context, 0, &ev_cq, &ev_ctx)) {
+                               fprintf(stderr, "Failed to get cq_event\n");
+                               return 1;
+                       }
+
+                       if (ev_cq != ctx->cq) {
+                               fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
+                               return 1;
+                       }
+
+                       if (ibv_req_notify_cq(ctx->cq, 0)) {
+                               fprintf(stderr, "Couldn't request CQ notification\n");
+                               return 1;
+                       }
+               }
+
+               {
+                       struct ibv_wc wc[2];
+                       int ne, i;
+
+                       do {
+                               ne = ibv_poll_cq(ctx->cq, 2, wc);
+                       } while (!use_event && ne < 1);
+
+                       if (ne < 0) {
+                               fprintf(stderr, "poll CQ failed %d\n", ne);
+                               return 1;
+                       }
+
+                       for (i = 0; i < ne; ++i) {
+                               if (wc[i].status != IBV_WC_SUCCESS) {
+                                       fprintf(stderr, "Failed status %d for wr_id %d\n",
+                                               wc[i].status, (int) wc[i].wr_id);
+                                       return 1;
+                               }
+
+                               switch ((int) wc[i].wr_id) {
+                               case PINGPONG_SEND_WRID:
+                                       ++scnt;
+                                       break;
+
+                               case PINGPONG_RECV_WRID:
+                                       if (--routs <= 1) {
+                                               routs += pp_post_recv(ctx, ctx->rx_depth - routs);
+                                               if (routs < ctx->rx_depth) {
+                                                       fprintf(stderr,
+                                                               "Couldn't post receive (%d)\n",
+                                                               routs);
+                                                       return 1;
+                                               }
+                                       }
+
+                                       if (scnt < iters)
+                                               if (pp_post_send(ctx)) {
+                                                       fprintf(stderr, "Couldn't post send\n");
+                                                       return 1;
+                                               }
+
+                                       ++rcnt;
+                                       break;
+
+                               default:
+                                       fprintf(stderr, "Completion for unknown wr_id %d\n",
+                                               (int) wc[i].wr_id);
+                                       return 1;
+                               }
+                       }
+               }
+       }
+
+       if (gettimeofday(&end, NULL)) {
+               perror("gettimeofday");
+               return 1;
+       }
+
+       {
+               float usec = (end.tv_sec - start.tv_sec) * 1000000 +
+                       (end.tv_usec - start.tv_usec);
+               long long bytes = (long long) size * iters * 2;
+
+               printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n",
+                      bytes, usec / 1000000., bytes * 8. / usec);
+               printf("%d iters in %.2f seconds = %.2f usec/iter\n",
+                      iters, usec / 1000000., usec / iters);
        }
 
        return 0;
diff --git a/examples/ud-pingpong.c b/examples/ud-pingpong.c
new file mode 100644 (file)
index 0000000..8aa6075
--- /dev/null
@@ -0,0 +1,677 @@
+/*
+ * Copyright (c) 2005 Topspin Communications.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <netdb.h>
+#include <malloc.h>
+#include <getopt.h>
+#include <arpa/inet.h>
+#include <time.h>
+
+#include <sysfs/libsysfs.h>
+
+#include <infiniband/verbs.h>
+
+enum {
+       PINGPONG_RECV_WRID = 1,
+       PINGPONG_SEND_WRID = 2,
+};
+
+static int page_size;
+
+struct pingpong_context {
+       struct ibv_context *context;
+       struct ibv_pd      *pd;
+       struct ibv_mr      *mr;
+       struct ibv_cq      *cq;
+       struct ibv_qp      *qp;
+       struct ibv_ah      *ah;
+       void               *buf;
+       int                 size;
+       int                 rx_depth;
+};
+
+struct pingpong_dest {
+       int lid;
+       int qpn;
+       int psn;
+};
+
+
+static uint16_t pp_get_local_lid(struct pingpong_context *ctx, int port)
+{
+       struct ibv_port_attr attr;
+
+       if (ibv_query_port(ctx->context, port, &attr))
+               return 0;
+
+       return attr.lid;
+}
+
+static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port,
+                                                const struct pingpong_dest *my_dest)
+{
+       struct addrinfo *res, *t;
+       struct addrinfo hints = {
+               .ai_family   = AF_UNSPEC,
+               .ai_socktype = SOCK_STREAM
+       };
+       char *service;
+       char msg[sizeof "0000:000000:000000"];
+       int n;
+       int sockfd = -1;
+       struct pingpong_dest *rem_dest = NULL;
+
+       asprintf(&service, "%d", port);
+       n = getaddrinfo(servername, service, &hints, &res);
+
+       if (n < 0) {
+               fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port);
+               return NULL;
+       }
+
+       for (t = res; t; t = t->ai_next) {
+               sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
+               if (sockfd >= 0) {
+                       if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
+                               break;
+                       close(sockfd);
+                       sockfd = -1;
+               }
+       }
+
+       freeaddrinfo(res);
+
+       if (sockfd < 0) {
+               fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
+               return NULL;
+       }
+
+       sprintf(msg, "%04x:%06x:%06x", my_dest->lid, my_dest->qpn, my_dest->psn);
+       if (write(sockfd, msg, sizeof msg) != sizeof msg) {
+               fprintf(stderr, "Couldn't send local address\n");
+               goto out;
+       }
+
+       if (read(sockfd, msg, sizeof msg) != sizeof msg) {
+               perror("client read");
+               fprintf(stderr, "Couldn't read remote address\n");
+               goto out;
+       }
+
+       write(sockfd, "done", sizeof "done");
+
+       rem_dest = malloc(sizeof *rem_dest);
+       if (!rem_dest)
+               goto out;
+
+       sscanf(msg, "%x:%x:%x", &rem_dest->lid, &rem_dest->qpn, &rem_dest->psn);
+
+out:
+       close(sockfd);
+       return rem_dest;
+}
+
+static struct pingpong_dest *pp_server_exch_dest(int port, const struct pingpong_dest *my_dest)
+{
+       struct addrinfo *res, *t;
+       struct addrinfo hints = {
+               .ai_flags    = AI_PASSIVE,
+               .ai_family   = AF_UNSPEC,
+               .ai_socktype = SOCK_STREAM
+       };
+       char *service;
+       char msg[sizeof "0000:000000:000000"];
+       int n;
+       int sockfd = -1, connfd;
+       struct pingpong_dest *rem_dest = NULL;
+
+       asprintf(&service, "%d", port);
+       n = getaddrinfo(NULL, service, &hints, &res);
+
+       if (n < 0) {
+               fprintf(stderr, "%s for port %d\n", gai_strerror(n), port);
+               return NULL;
+       }
+
+       for (t = res; t; t = t->ai_next) {
+               sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
+               if (sockfd >= 0) {
+                       n = 1;
+
+                       setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
+
+                       if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
+                               break;
+                       close(sockfd);
+                       sockfd = -1;
+               }
+       }
+
+       freeaddrinfo(res);
+
+       if (sockfd < 0) {
+               fprintf(stderr, "Couldn't listen to port %d\n", port);
+               return NULL;
+       }
+
+       listen(sockfd, 1);
+       connfd = accept(sockfd, NULL, 0);
+       close(sockfd);
+       if (connfd < 0) {
+               fprintf(stderr, "accept() failed\n");
+               return NULL;
+       }
+
+       n = read(connfd, msg, sizeof msg);
+       if (n != sizeof msg) {
+               perror("server read");
+               fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg);
+               goto out;
+       }
+
+       rem_dest = malloc(sizeof *rem_dest);
+       if (!rem_dest)
+               goto out;
+
+       sscanf(msg, "%x:%x:%x", &rem_dest->lid, &rem_dest->qpn, &rem_dest->psn);
+
+       sprintf(msg, "%04x:%06x:%06x", my_dest->lid, my_dest->qpn, my_dest->psn);
+       if (write(connfd, msg, sizeof msg) != sizeof msg) {
+               fprintf(stderr, "Couldn't send local address\n");
+               free(rem_dest);
+               rem_dest = NULL;
+               goto out;
+       }
+
+       read(connfd, msg, sizeof msg);
+
+out:
+       close(connfd);
+       return rem_dest;
+}
+
+static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
+                                           int rx_depth, int port)
+{
+       struct pingpong_context *ctx;
+
+       ctx = malloc(sizeof *ctx);
+       if (!ctx)
+               return NULL;
+
+       ctx->size     = size;
+       ctx->rx_depth = rx_depth;
+
+       ctx->buf = memalign(page_size, size + 40);
+       if (!ctx->buf) {
+               fprintf(stderr, "Couldn't allocate work buf.\n");
+               return NULL;
+       }
+
+       memset(ctx->buf, 0, size + 40);
+
+       ctx->context = ibv_open_device(ib_dev);
+       if (!ctx->context) {
+               fprintf(stderr, "Couldn't get context for %s\n",
+                       ibv_get_device_name(ib_dev));
+               return NULL;
+       }
+
+       ctx->pd = ibv_alloc_pd(ctx->context);
+       if (!ctx->pd) {
+               fprintf(stderr, "Couldn't allocate PD\n");
+               return NULL;
+       }
+
+       ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size + 40, IBV_ACCESS_LOCAL_WRITE);
+       if (!ctx->mr) {
+               fprintf(stderr, "Couldn't allocate MR\n");
+               return NULL;
+       }
+
+       ctx->cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL);
+       if (!ctx->cq) {
+               fprintf(stderr, "Couldn't create CQ\n");
+               return NULL;
+       }
+
+       {
+               struct ibv_qp_init_attr attr = {
+                       .send_cq = ctx->cq,
+                       .recv_cq = ctx->cq,
+                       .cap     = {
+                               .max_send_wr  = 1,
+                               .max_recv_wr  = rx_depth,
+                               .max_send_sge = 1,
+                               .max_recv_sge = 1
+                       },
+                       .qp_type = IBV_QPT_UD,
+               };
+
+               ctx->qp = ibv_create_qp(ctx->pd, &attr);
+               if (!ctx->qp)  {
+                       fprintf(stderr, "Couldn't create QP\n");
+                       return NULL;
+               }
+       }
+
+       {
+               struct ibv_qp_attr attr;
+
+               attr.qp_state        = IBV_QPS_INIT;
+               attr.pkey_index      = 0;
+               attr.port_num        = port;
+               attr.qkey            = 0x11111111;
+
+               if (ibv_modify_qp(ctx->qp, &attr,
+                                 IBV_QP_STATE              |
+                                 IBV_QP_PKEY_INDEX         |
+                                 IBV_QP_PORT               |
+                                 IBV_QP_QKEY)) {
+                       fprintf(stderr, "Failed to modify QP to INIT\n");
+                       return NULL;
+               }
+       }
+
+       return ctx;
+}
+
+static int pp_post_recv(struct pingpong_context *ctx, int n)
+{
+       struct ibv_sge list = {
+               .addr   = (uintptr_t) ctx->buf,
+               .length = ctx->size + 40,
+               .lkey   = ctx->mr->lkey
+       };
+       struct ibv_recv_wr wr = {
+               .wr_id      = PINGPONG_RECV_WRID,
+               .sg_list    = &list,
+               .num_sge    = 1,
+       };
+       struct ibv_recv_wr *bad_wr;
+       int i;
+
+       for (i = 0; i < n; ++i)
+               if (ibv_post_recv(ctx->qp, &wr, &bad_wr))
+                       break;
+
+       return i;
+}
+
+static int pp_post_send(struct pingpong_context *ctx, uint32_t qpn)
+{
+       struct ibv_sge list = {
+               .addr   = (uintptr_t) ctx->buf + 40,
+               .length = ctx->size,
+               .lkey   = ctx->mr->lkey
+       };
+       struct ibv_send_wr wr = {
+               .wr_id      = PINGPONG_SEND_WRID,
+               .sg_list    = &list,
+               .num_sge    = 1,
+               .opcode     = IBV_WR_SEND,
+               .send_flags = IBV_SEND_SIGNALED,
+               .wr         = {
+                       .ud = {
+                                .ah          = ctx->ah,
+                                .remote_qpn  = qpn,
+                                .remote_qkey = 0x11111111
+                        }
+               }
+       };
+       struct ibv_send_wr *bad_wr;
+
+       return ibv_post_send(ctx->qp, &wr, &bad_wr);
+}
+
+static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
+                         struct pingpong_dest *dest)
+{
+       struct ibv_qp_attr attr;
+       struct ibv_ah_attr ah_attr;
+
+       attr.qp_state           = IBV_QPS_RTR;
+
+       if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE)) {
+               fprintf(stderr, "Failed to modify QP to RTR\n");
+               return 1;
+       }
+
+       attr.qp_state       = IBV_QPS_RTS;
+       attr.sq_psn         = my_psn;
+
+       if (ibv_modify_qp(ctx->qp, &attr,
+                         IBV_QP_STATE              |
+                         IBV_QP_SQ_PSN)) {
+               fprintf(stderr, "Failed to modify QP to RTS\n");
+               return 1;
+       }
+
+       ah_attr.is_global     = 0;
+       ah_attr.dlid          = dest->lid;
+       ah_attr.sl            = 0;
+       ah_attr.src_path_bits = 0;
+       ah_attr.port_num      = port;
+
+       ctx->ah = ibv_create_ah(ctx->pd, &ah_attr);
+       if (!ctx->ah) {
+               fprintf(stderr, "Failed to create AH\n");
+               return 1;
+       }
+
+       return 0;
+}
+
+static void usage(const char *argv0)
+{
+       printf("Usage:\n");
+       printf("  %s            start a server and wait for connection\n", argv0);
+       printf("  %s <host>     connect to server at <host>\n", argv0);
+       printf("\n");
+       printf("Options:\n");
+       printf("  -p, --port=<port>      listen on/connect to port <port> (default 18515)\n");
+       printf("  -d, --ib-dev=<dev>     use IB device <dev> (default first device found)\n");
+       printf("  -i, --ib-port=<port>   use port <port> of IB device (default 1)\n");
+       printf("  -s, --size=<size>      size of message to exchange (default 2048)\n");
+       printf("  -r, --rx-depth=<dep>   number of receives to post at a time (default 500)\n");
+       printf("  -n, --iters=<iters>    number of exchanges (default 1000)\n");
+       printf("  -e, --events           sleep on CQ events (default poll)\n");
+}
+
+int main(int argc, char *argv[])
+{
+       struct dlist            *dev_list;
+       struct ibv_device       *ib_dev;
+       struct pingpong_context *ctx;
+       struct pingpong_dest     my_dest;
+       struct pingpong_dest    *rem_dest;
+       struct timeval           start, end;
+       char                    *ib_devname = NULL;
+       char                    *servername = NULL;
+       int                      port = 18515;
+       int                      ib_port = 1;
+       int                      size = 2048;
+       int                      rx_depth = 500;
+       int                      iters = 1000;
+       int                      use_event = 0;
+       int                      routs;
+       int                      rcnt, scnt;
+
+       srand48(getpid() * time(NULL));
+
+       while (1) {
+               int c;
+
+               static struct option long_options[] = {
+                       { .name = "port",    .has_arg = 1, .val = 'p' },
+                       { .name = "ib-dev",  .has_arg = 1, .val = 'd' },
+                       { .name = "ib-port", .has_arg = 1, .val = 'i' },
+                       { .name = "size",    .has_arg = 1, .val = 's' },
+                       { .name = "iters",   .has_arg = 1, .val = 'n' },
+                       { .name = "events",  .has_arg = 0, .val = 'e' },
+                       { 0 }
+               };
+
+               c = getopt_long(argc, argv, "p:d:i:s:r:n:e", long_options, NULL);
+               if (c == -1)
+                       break;
+
+               switch (c) {
+               case 'p':
+                       port = strtol(optarg, NULL, 0);
+                       if (port < 0 || port > 65535) {
+                               usage(argv[0]);
+                               return 1;
+                       }
+                       break;
+
+               case 'd':
+                       ib_devname = strdupa(optarg);
+                       break;
+
+               case 'i':
+                       ib_port = strtol(optarg, NULL, 0);
+                       if (port < 0) {
+                               usage(argv[0]);
+                               return 1;
+                       }
+                       break;
+
+               case 's':
+                       size = strtol(optarg, NULL, 0);
+                       break;
+
+               case 'r':
+                       rx_depth = strtol(optarg, NULL, 0);
+                       break;
+
+               case 'n':
+                       iters = strtol(optarg, NULL, 0);
+                       break;
+
+               case 'e':
+                       ++use_event;
+                       break;
+
+               default:
+                       usage(argv[0]);
+                       return 1;
+               }
+       }
+
+       if (optind == argc - 1)
+               servername = strdupa(argv[optind]);
+       else if (optind < argc) {
+               usage(argv[0]);
+               return 1;
+       }
+
+       page_size = sysconf(_SC_PAGESIZE);
+
+       dev_list = ibv_get_devices();
+
+       dlist_start(dev_list);
+       if (!ib_devname) {
+               ib_dev = dlist_next(dev_list);
+               if (!ib_dev) {
+                       fprintf(stderr, "No IB devices found\n");
+                       return 1;
+               }
+       } else {
+               dlist_for_each_data(dev_list, ib_dev, struct ibv_device)
+                       if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
+                               break;
+               if (!ib_dev) {
+                       fprintf(stderr, "IB device %s not found\n", ib_devname);
+                       return 1;
+               }
+       }
+
+       ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port);
+       if (!ctx)
+               return 1;
+
+       routs = pp_post_recv(ctx, ctx->rx_depth);
+       if (routs < ctx->rx_depth) {
+               fprintf(stderr, "Couldn't post receive (%d)\n", routs);
+               return 1;
+       }
+
+       my_dest.lid = pp_get_local_lid(ctx, ib_port);
+       my_dest.qpn = ctx->qp->qp_num;
+       my_dest.psn = lrand48() & 0xffffff;
+       if (!my_dest.lid) {
+               fprintf(stderr, "Couldn't get local LID\n");
+               return 1;
+       }
+
+       printf("  local address:  LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
+              my_dest.lid, my_dest.qpn, my_dest.psn);
+
+       if (servername)
+               rem_dest = pp_client_exch_dest(servername, port, &my_dest);
+       else
+               rem_dest = pp_server_exch_dest(port, &my_dest);
+
+       if (!rem_dest)
+               return 1;
+
+       printf("  remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
+              rem_dest->lid, rem_dest->qpn, rem_dest->psn);
+
+       if (pp_connect_ctx(ctx, ib_port, my_dest.psn, rem_dest))
+               return 1;
+
+       if (use_event)
+               if (ibv_req_notify_cq(ctx->cq, 0)) {
+                       fprintf(stderr, "Couldn't request CQ notification\n");
+                       return 1;
+               }
+
+       if (servername)
+               if (pp_post_send(ctx, rem_dest->qpn)) {
+                       fprintf(stderr, "Couldn't post send\n");
+                       return 1;
+               }
+
+       if (gettimeofday(&start, NULL)) {
+               perror("gettimeofday");
+               return 1;
+       }
+
+       rcnt = scnt = 0;
+       while (rcnt < iters || scnt < iters) {
+               if (use_event) {
+                       struct ibv_cq *ev_cq;
+                       void          *ev_ctx;
+
+                       if (ibv_get_cq_event(ctx->context, 0, &ev_cq, &ev_ctx)) {
+                               fprintf(stderr, "Failed to get cq_event\n");
+                               return 1;
+                       }
+
+                       if (ev_cq != ctx->cq) {
+                               fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
+                               return 1;
+                       }
+
+                       if (ibv_req_notify_cq(ctx->cq, 0)) {
+                               fprintf(stderr, "Couldn't request CQ notification\n");
+                               return 1;
+                       }
+               }
+
+               {
+                       struct ibv_wc wc[2];
+                       int ne, i;
+
+                       do {
+                               ne = ibv_poll_cq(ctx->cq, 2, wc);
+                       } while (!use_event && ne < 1);
+
+                       if (ne < 0) {
+                               fprintf(stderr, "poll CQ failed %d\n", ne);
+                               return 1;
+                       }
+
+                       for (i = 0; i < ne; ++i) {
+                               if (wc[i].status != IBV_WC_SUCCESS) {
+                                       fprintf(stderr, "Failed status %d for wr_id %d\n",
+                                               wc[i].status, (int) wc[i].wr_id);
+                                       return 1;
+                               }
+
+                               switch ((int) wc[i].wr_id) {
+                               case PINGPONG_SEND_WRID:
+                                       ++scnt;
+                                       break;
+
+                               case PINGPONG_RECV_WRID:
+                                       if (--routs <= 1) {
+                                               routs += pp_post_recv(ctx, ctx->rx_depth - routs);
+                                               if (routs < ctx->rx_depth) {
+                                                       fprintf(stderr,
+                                                               "Couldn't post receive (%d)\n",
+                                                               routs);
+                                                       return 1;
+                                               }
+                                       }
+
+                                       if (scnt < iters)
+                                               if (pp_post_send(ctx, rem_dest->qpn)) {
+                                                       fprintf(stderr, "Couldn't post send\n");
+                                                       return 1;
+                                               }
+
+                                       ++rcnt;
+                                       break;
+
+                               default:
+                                       fprintf(stderr, "Completion for unknown wr_id %d\n",
+                                               (int) wc[i].wr_id);
+                                       return 1;
+                               }
+                       }
+               }
+       }
+
+       if (gettimeofday(&end, NULL)) {
+               perror("gettimeofday");
+               return 1;
+       }
+
+       {
+               float usec = (end.tv_sec - start.tv_sec) * 1000000 +
+                       (end.tv_usec - start.tv_usec);
+               long long bytes = (long long) size * iters * 2;
+
+               printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n",
+                      bytes, usec / 1000000., bytes * 8. / usec);
+               printf("%d iters in %.2f seconds = %.2f usec/iter\n",
+                      iters, usec / 1000000., usec / iters);
+       }
+
+       return 0;
+}
diff --git a/include/infiniband/arch.h b/include/infiniband/arch.h
new file mode 100644 (file)
index 0000000..b582d18
--- /dev/null
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2005 Topspin Communications.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#ifndef INFINIBAND_ARCH_H
+#define INFINIBAND_ARCH_H
+
+/*
+ * Architecture-specific defines.  Currently, an architecture is
+ * required to implement the following operations:
+ *
+ * mb() - memory barrier.  No loads or stores may be reordered across
+ *     this macro by either the compiler or the CPU.
+ */
+
+#if defined(__i386__)
+
+#define mb()   asm volatile("" ::: "memory")
+
+#elif defined(__x86_64__)
+
+#define mb()   asm volatile("" ::: "memory")
+
+#elif defined(__PPC64__)
+
+#define mb()   asm volatile("sync" ::: "memory")
+
+#elif defined(__ia64__)
+
+#define mb()   asm volatile("mf" ::: "memory")
+
+#elif defined(__PPC__)
+
+#define mb()   asm volatile("sync" ::: "memory")
+
+#elif defined(__sparc__)
+
+#define mb()   asm volatile("membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad" ::: "memory")
+
+#else
+
+#warning No architecture specific defines found.  Using generic implementation.
+
+#define mb()   asm volatile("" ::: "memory")
+
+#endif
+
+#endif /* INFINIBAND_ARCH_H */
index a873621dd528fc4e7b28fdac731fd0d737f0d5ab..dde2650c082a62f12c8ab3e6a7ecca73ff8bafba 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -38,6 +38,7 @@
 #include <sysfs/libsysfs.h>
 
 #include <infiniband/verbs.h>
+#include <infiniband/kern-abi.h>
 
 #ifdef __cplusplus
 #  define BEGIN_C_DECLS extern "C" {
  
 typedef struct ibv_device *(*ibv_driver_init_func)(struct sysfs_class_device *);
 
+extern int ibv_cmd_get_context(int num_comp, struct ibv_context *context,
+                              struct ibv_get_context *cmd, size_t cmd_size);
+extern int ibv_cmd_query_port(struct ibv_context *context, uint8_t port_num,
+                             struct ibv_port_attr *port_attr,
+                             struct ibv_query_port *cmd, size_t cmd_size);
+extern int ibv_cmd_alloc_pd(struct ibv_context *context, struct ibv_pd *pd,
+                           struct ibv_alloc_pd *cmd, size_t cmd_size);
+extern int ibv_cmd_dealloc_pd(struct ibv_pd *pd);
+extern int ibv_cmd_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+                         uint64_t hca_va, enum ibv_access_flags access,
+                         struct ibv_mr *mr, struct ibv_reg_mr *cmd,
+                         size_t cmd_size);
+extern int ibv_cmd_dereg_mr(struct ibv_mr *mr);
+extern int ibv_cmd_create_cq(struct ibv_context *context, int cqe,
+                            struct ibv_cq *cq,
+                            struct ibv_create_cq *cmd, size_t cmd_size);
+extern int ibv_cmd_destroy_cq(struct ibv_cq *cq);
+
+extern int ibv_cmd_create_qp(struct ibv_pd *pd,
+                            struct ibv_qp *qp, struct ibv_qp_init_attr *attr,
+                            struct ibv_create_qp *cmd, size_t cmd_size);
+extern int ibv_cmd_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
+                            enum ibv_qp_attr_mask attr_mask,
+                            struct ibv_modify_qp *cmd, size_t cmd_size);
+extern int ibv_cmd_destroy_qp(struct ibv_qp *qp);
+
 #endif /* INFINIBAND_DRIVER_H */
diff --git a/include/infiniband/kern-abi.h b/include/infiniband/kern-abi.h
new file mode 100644 (file)
index 0000000..13a9c68
--- /dev/null
@@ -0,0 +1,299 @@
+/*
+ * Copyright (c) 2005 Topspin Communications.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#ifndef KERN_ABI_H
+#define KERN_ABI_H
+
+#include <linux/types.h>
+
+/*
+ * This file must be kept in sync with the kernel's version of
+ * drivers/infiniband/include/ib_user_verbs.h
+ */
+
+/*
+ * Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+#define IB_USER_VERBS_ABI_VERSION      1
+
+enum {
+       IB_USER_VERBS_CMD_QUERY_PARAMS,
+       IB_USER_VERBS_CMD_GET_CONTEXT,
+       IB_USER_VERBS_CMD_QUERY_PORT,
+       IB_USER_VERBS_CMD_ALLOC_PD,
+       IB_USER_VERBS_CMD_DEALLOC_PD,
+       IB_USER_VERBS_CMD_REG_MR,
+       IB_USER_VERBS_CMD_DEREG_MR,
+       IB_USER_VERBS_CMD_CREATE_CQ,
+       IB_USER_VERBS_CMD_DESTROY_CQ,
+       IB_USER_VERBS_CMD_CREATE_QP,
+       IB_USER_VERBS_CMD_MODIFY_QP,
+       IB_USER_VERBS_CMD_DESTROY_QP,
+};
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct ibv_kern_async_event {
+       __u64 element;
+       __u32 event_type;
+       __u32 reserved;
+};
+
+struct ibv_comp_event {
+       __u64 cq_handle;
+};
+
+/*
+ * All commands from userspace should start with a __u32 command field
+ * followed by __u16 in_words and out_words fields (which give the
+ * length of the command block and response buffer if any in 32-bit
+ * words).  The kernel driver will read these fields first and read
+ * the rest of the command struct based on these value.
+ */
+
+struct ibv_query_params {
+       __u32 command;
+       __u16 in_words;
+       __u16 out_words;
+       __u64 response;
+};
+
+struct ibv_query_params_resp {
+       __u32 num_cq_events;
+};
+
+struct ibv_get_context {
+       __u32 command;
+       __u16 in_words;
+       __u16 out_words;
+       __u64 response;
+       __u64 driver_data[0];
+};
+
+struct ibv_get_context_resp {
+       __u32 async_fd;
+       __u32 cq_fd[1];
+};
+
+struct ibv_query_port {
+       __u32 command;
+       __u16 in_words;
+       __u16 out_words;
+       __u64 response;
+       __u8  port_num;
+       __u8  reserved[7];
+       __u64 driver_data[0];
+};
+
+struct ibv_query_port_resp {
+       __u32 port_cap_flags;
+       __u32 max_msg_sz;
+       __u32 bad_pkey_cntr;
+       __u32 qkey_viol_cntr;
+       __u32 gid_tbl_len;
+       __u16 pkey_tbl_len;
+       __u16 lid;
+       __u16 sm_lid;
+       __u8  state;
+       __u8  max_mtu;
+       __u8  active_mtu;
+       __u8  lmc;
+       __u8  max_vl_num;
+       __u8  sm_sl;
+       __u8  subnet_timeout;
+       __u8  init_type_reply;
+       __u8  active_width;
+       __u8  active_speed;
+       __u8  phys_state;
+       __u8  reserved[3];
+};
+
+struct ibv_alloc_pd {
+       __u32 command;
+       __u16 in_words;
+       __u16 out_words;
+       __u64 response;
+       __u64 driver_data[0];
+};
+
+struct ibv_alloc_pd_resp {
+       __u32 pd_handle;
+};
+
+struct ibv_dealloc_pd {
+       __u32 command;
+       __u16 in_words;
+       __u16 out_words;
+       __u32 pd_handle;
+};
+
+struct ibv_reg_mr {
+       __u32 command;
+       __u16 in_words;
+       __u16 out_words;
+       __u64 response;
+       __u64 start;
+       __u64 length;
+       __u64 hca_va;
+       __u32 pd_handle;
+       __u32 access_flags;
+       __u64 driver_data[0];
+};
+
+struct ibv_reg_mr_resp {
+       __u32 mr_handle;
+       __u32 lkey;
+       __u32 rkey;
+};
+
+struct ibv_dereg_mr {
+       __u32 command;
+       __u16 in_words;
+       __u16 out_words;
+       __u32 mr_handle;
+};
+
+struct ibv_create_cq {
+       __u32 command;
+       __u16 in_words;
+       __u16 out_words;
+       __u64 response;
+       __u64 user_handle;
+       __u32 cqe;
+       __u32 reserved;
+       __u64 driver_data[0];
+};
+
+struct ibv_create_cq_resp {
+       __u32 cq_handle;
+       __u32 cqe;
+};
+
+struct ibv_destroy_cq {
+       __u32 command;
+       __u16 in_words;
+       __u16 out_words;
+       __u32 cq_handle;
+};
+
+struct ibv_create_qp {
+       __u32 command;
+       __u16 in_words;
+       __u16 out_words;
+       __u64 response;
+       __u64 user_handle;
+       __u32 pd_handle;
+       __u32 send_cq_handle;
+       __u32 recv_cq_handle;
+       __u32 srq_handle;
+       __u32 max_send_wr;
+       __u32 max_recv_wr;
+       __u32 max_send_sge;
+       __u32 max_recv_sge;
+       __u32 max_inline_data;
+       __u8  sq_sig_all;
+       __u8  qp_type;
+       __u8  is_srq;
+       __u8  reserved;
+       __u64 driver_data[0];
+};
+
+struct ibv_create_qp_resp {
+       __u32 qp_handle;
+       __u32 qpn;
+};
+
+struct ibv_qp_dest {
+       __u8  dgid[16];
+       __u32 flow_label;
+       __u16 dlid;
+       __u16 reserved;
+       __u8  sgid_index;
+       __u8  hop_limit;
+       __u8  traffic_class;
+       __u8  sl;
+       __u8  src_path_bits;
+       __u8  static_rate;
+       __u8  is_global;
+       __u8  port_num;
+};
+
+struct ibv_modify_qp {
+       __u32 command;
+       __u16 in_words;
+       __u16 out_words;
+       struct ibv_qp_dest dest;
+       struct ibv_qp_dest alt_dest;
+       __u32 qp_handle;
+       __u32 attr_mask;
+       __u32 qkey;
+       __u32 rq_psn;
+       __u32 sq_psn;
+       __u32 dest_qp_num;
+       __u32 qp_access_flags;
+       __u16 pkey_index;
+       __u16 alt_pkey_index;
+       __u8  qp_state;
+       __u8  cur_qp_state;
+       __u8  path_mtu;
+       __u8  path_mig_state;
+       __u8  en_sqd_async_notify;
+       __u8  max_rd_atomic;
+       __u8  max_dest_rd_atomic;
+       __u8  min_rnr_timer;
+       __u8  port_num;
+       __u8  timeout;
+       __u8  retry_cnt;
+       __u8  rnr_retry;
+       __u8  alt_port_num;
+       __u8  alt_timeout;
+       __u8  reserved[2];
+       __u64 driver_data[0];
+};
+
+struct ibv_destroy_qp {
+       __u32 command;
+       __u16 in_words;
+       __u16 out_words;
+       __u32 qp_handle;
+};
+
+#endif /* KERN_ABI_H */
diff --git a/include/infiniband/opcode.h b/include/infiniband/opcode.h
new file mode 100644 (file)
index 0000000..cf2598b
--- /dev/null
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2005 Topspin Communications.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#ifndef INFINIBAND_OPCODE_H
+#define INFINIBAND_OPCODE_H
+
+/*
+ * This macro cleans up the definitions of constants for BTH opcodes.
+ * It is used to define constants such as IBV_OPCODE_UD_SEND_ONLY,
+ * which becomes IBV_OPCODE_UD + IBV_OPCODE_SEND_ONLY, and this gives
+ * the correct value.
+ *
+ * In short, user code should use the constants defined using the
+ * macro rather than worrying about adding together other constants.
+*/
+#define IBV_OPCODE(transport, op) \
+       IBV_OPCODE_ ## transport ## _ ## op = \
+               IBV_OPCODE_ ## transport + IBV_OPCODE_ ## op
+
+enum {
+       /* transport types -- just used to define real constants */
+       IBV_OPCODE_RC                                = 0x00,
+       IBV_OPCODE_UC                                = 0x20,
+       IBV_OPCODE_RD                                = 0x40,
+       IBV_OPCODE_UD                                = 0x60,
+
+       /* operations -- just used to define real constants */
+       IBV_OPCODE_SEND_FIRST                        = 0x00,
+       IBV_OPCODE_SEND_MIDDLE                       = 0x01,
+       IBV_OPCODE_SEND_LAST                         = 0x02,
+       IBV_OPCODE_SEND_LAST_WITH_IMMEDIATE          = 0x03,
+       IBV_OPCODE_SEND_ONLY                         = 0x04,
+       IBV_OPCODE_SEND_ONLY_WITH_IMMEDIATE          = 0x05,
+       IBV_OPCODE_RDMA_WRITE_FIRST                  = 0x06,
+       IBV_OPCODE_RDMA_WRITE_MIDDLE                 = 0x07,
+       IBV_OPCODE_RDMA_WRITE_LAST                   = 0x08,
+       IBV_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE    = 0x09,
+       IBV_OPCODE_RDMA_WRITE_ONLY                   = 0x0a,
+       IBV_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE    = 0x0b,
+       IBV_OPCODE_RDMA_READ_REQUEST                 = 0x0c,
+       IBV_OPCODE_RDMA_READ_RESPONSE_FIRST          = 0x0d,
+       IBV_OPCODE_RDMA_READ_RESPONSE_MIDDLE         = 0x0e,
+       IBV_OPCODE_RDMA_READ_RESPONSE_LAST           = 0x0f,
+       IBV_OPCODE_RDMA_READ_RESPONSE_ONLY           = 0x10,
+       IBV_OPCODE_ACKNOWLEDGE                       = 0x11,
+       IBV_OPCODE_ATOMIC_ACKNOWLEDGE                = 0x12,
+       IBV_OPCODE_COMPARE_SWAP                      = 0x13,
+       IBV_OPCODE_FETCH_ADD                         = 0x14,
+
+       /* real constants follow -- see comment about above IBV_OPCODE()
+          macro for more details */
+
+       /* RC */
+       IBV_OPCODE(RC, SEND_FIRST),
+       IBV_OPCODE(RC, SEND_MIDDLE),
+       IBV_OPCODE(RC, SEND_LAST),
+       IBV_OPCODE(RC, SEND_LAST_WITH_IMMEDIATE),
+       IBV_OPCODE(RC, SEND_ONLY),
+       IBV_OPCODE(RC, SEND_ONLY_WITH_IMMEDIATE),
+       IBV_OPCODE(RC, RDMA_WRITE_FIRST),
+       IBV_OPCODE(RC, RDMA_WRITE_MIDDLE),
+       IBV_OPCODE(RC, RDMA_WRITE_LAST),
+       IBV_OPCODE(RC, RDMA_WRITE_LAST_WITH_IMMEDIATE),
+       IBV_OPCODE(RC, RDMA_WRITE_ONLY),
+       IBV_OPCODE(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE),
+       IBV_OPCODE(RC, RDMA_READ_REQUEST),
+       IBV_OPCODE(RC, RDMA_READ_RESPONSE_FIRST),
+       IBV_OPCODE(RC, RDMA_READ_RESPONSE_MIDDLE),
+       IBV_OPCODE(RC, RDMA_READ_RESPONSE_LAST),
+       IBV_OPCODE(RC, RDMA_READ_RESPONSE_ONLY),
+       IBV_OPCODE(RC, ACKNOWLEDGE),
+       IBV_OPCODE(RC, ATOMIC_ACKNOWLEDGE),
+       IBV_OPCODE(RC, COMPARE_SWAP),
+       IBV_OPCODE(RC, FETCH_ADD),
+
+       /* UC */
+       IBV_OPCODE(UC, SEND_FIRST),
+       IBV_OPCODE(UC, SEND_MIDDLE),
+       IBV_OPCODE(UC, SEND_LAST),
+       IBV_OPCODE(UC, SEND_LAST_WITH_IMMEDIATE),
+       IBV_OPCODE(UC, SEND_ONLY),
+       IBV_OPCODE(UC, SEND_ONLY_WITH_IMMEDIATE),
+       IBV_OPCODE(UC, RDMA_WRITE_FIRST),
+       IBV_OPCODE(UC, RDMA_WRITE_MIDDLE),
+       IBV_OPCODE(UC, RDMA_WRITE_LAST),
+       IBV_OPCODE(UC, RDMA_WRITE_LAST_WITH_IMMEDIATE),
+       IBV_OPCODE(UC, RDMA_WRITE_ONLY),
+       IBV_OPCODE(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE),
+
+       /* RD */
+       IBV_OPCODE(RD, SEND_FIRST),
+       IBV_OPCODE(RD, SEND_MIDDLE),
+       IBV_OPCODE(RD, SEND_LAST),
+       IBV_OPCODE(RD, SEND_LAST_WITH_IMMEDIATE),
+       IBV_OPCODE(RD, SEND_ONLY),
+       IBV_OPCODE(RD, SEND_ONLY_WITH_IMMEDIATE),
+       IBV_OPCODE(RD, RDMA_WRITE_FIRST),
+       IBV_OPCODE(RD, RDMA_WRITE_MIDDLE),
+       IBV_OPCODE(RD, RDMA_WRITE_LAST),
+       IBV_OPCODE(RD, RDMA_WRITE_LAST_WITH_IMMEDIATE),
+       IBV_OPCODE(RD, RDMA_WRITE_ONLY),
+       IBV_OPCODE(RD, RDMA_WRITE_ONLY_WITH_IMMEDIATE),
+       IBV_OPCODE(RD, RDMA_READ_REQUEST),
+       IBV_OPCODE(RD, RDMA_READ_RESPONSE_FIRST),
+       IBV_OPCODE(RD, RDMA_READ_RESPONSE_MIDDLE),
+       IBV_OPCODE(RD, RDMA_READ_RESPONSE_LAST),
+       IBV_OPCODE(RD, RDMA_READ_RESPONSE_ONLY),
+       IBV_OPCODE(RD, ACKNOWLEDGE),
+       IBV_OPCODE(RD, ATOMIC_ACKNOWLEDGE),
+       IBV_OPCODE(RD, COMPARE_SWAP),
+       IBV_OPCODE(RD, FETCH_ADD),
+
+       /* UD */
+       IBV_OPCODE(UD, SEND_ONLY),
+       IBV_OPCODE(UD, SEND_ONLY_WITH_IMMEDIATE)
+};
+
+#endif /* INFINIBAND_OPCODE_H */
index f5fa340128a8b16310fd4e63759ea98af442dd2b..5128c410af14370caab447614e2b02984c0f2427 100644 (file)
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2004 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
 
 BEGIN_C_DECLS
 
-enum ib_event_type {
+union ibv_gid {
+       uint8_t                 raw[16];
+       struct {
+               uint64_t        subnet_prefix;
+               uint64_t        interface_id;
+       } global;
+};
+
+enum ibv_mtu {
+       IBV_MTU_256  = 1,
+       IBV_MTU_512  = 2,
+       IBV_MTU_1024 = 3,
+       IBV_MTU_2048 = 4,
+       IBV_MTU_4096 = 5
+};
+
+enum ibv_port_state {
+       IBV_PORT_NOP            = 0,
+       IBV_PORT_DOWN           = 1,
+       IBV_PORT_INIT           = 2,
+       IBV_PORT_ARMED          = 3,
+       IBV_PORT_ACTIVE         = 4,
+       IBV_PORT_ACTIVE_DEFER   = 5
+};
+
+struct ibv_port_attr {
+       enum ibv_port_state     state;
+       enum ibv_mtu            max_mtu;
+       enum ibv_mtu            active_mtu;
+       int                     gid_tbl_len;
+       uint32_t                port_cap_flags;
+       uint32_t                max_msg_sz;
+       uint32_t                bad_pkey_cntr;
+       uint32_t                qkey_viol_cntr;
+       uint16_t                pkey_tbl_len;
+       uint16_t                lid;
+       uint16_t                sm_lid;
+       uint8_t                 lmc;
+       uint8_t                 max_vl_num;
+       uint8_t                 sm_sl;
+       uint8_t                 subnet_timeout;
+       uint8_t                 init_type_reply;
+       uint8_t                 active_width;
+       uint8_t                 active_speed;
+       uint8_t                 phys_state;
+};
+
+enum ibv_event_type {
        IBV_EVENT_CQ_ERR,
        IBV_EVENT_QP_FATAL,
        IBV_EVENT_QP_REQ_ERR,
@@ -72,7 +120,68 @@ struct ibv_async_event {
                struct ibv_qp *qp;
                int            port_num;
        }                  element;
-       enum ib_event_type event_type;
+       enum ibv_event_type event_type;
+};
+
+enum ibv_wc_status {
+       IBV_WC_SUCCESS,
+       IBV_WC_LOC_LEN_ERR,
+       IBV_WC_LOC_QP_OP_ERR,
+       IBV_WC_LOC_EEC_OP_ERR,
+       IBV_WC_LOC_PROT_ERR,
+       IBV_WC_WR_FLUSH_ERR,
+       IBV_WC_MW_BIND_ERR,
+       IBV_WC_BAD_RESP_ERR,
+       IBV_WC_LOC_ACCESS_ERR,
+       IBV_WC_REM_INV_REQ_ERR,
+       IBV_WC_REM_ACCESS_ERR,
+       IBV_WC_REM_OP_ERR,
+       IBV_WC_RETRY_EXC_ERR,
+       IBV_WC_RNR_RETRY_EXC_ERR,
+       IBV_WC_LOC_RDD_VIOL_ERR,
+       IBV_WC_REM_INV_RD_REQ_ERR,
+       IBV_WC_REM_ABORT_ERR,
+       IBV_WC_INV_EECN_ERR,
+       IBV_WC_INV_EEC_STATE_ERR,
+       IBV_WC_FATAL_ERR,
+       IBV_WC_RESP_TIMEOUT_ERR,
+       IBV_WC_GENERAL_ERR
+};
+
+enum ibv_wc_opcode {
+       IBV_WC_SEND,
+       IBV_WC_RDMA_WRITE,
+       IBV_WC_RDMA_READ,
+       IBV_WC_COMP_SWAP,
+       IBV_WC_FETCH_ADD,
+       IBV_WC_BIND_MW,
+/*
+ * Set value of IBV_WC_RECV so consumers can test if a completion is a
+ * receive by testing (opcode & IBV_WC_RECV).
+ */
+       IBV_WC_RECV                     = 1 << 7,
+       IBV_WC_RECV_RDMA_WITH_IMM
+};
+
+enum ibv_wc_flags {
+       IBV_WC_GRH              = 1 << 0,
+       IBV_WC_WITH_IMM         = 1 << 1
+};
+
+struct ibv_wc {
+       uint64_t                wr_id;
+       enum ibv_wc_status      status;
+       enum ibv_wc_opcode      opcode;
+       uint32_t                vendor_err;
+       uint32_t                byte_len;
+       uint32_t                imm_data;       /* in network byte order */
+       uint32_t                qp_num;
+       uint32_t                src_qp;
+       enum ibv_wc_flags       wc_flags;
+       uint16_t                pkey_index;
+       uint16_t                slid;
+       uint8_t                 sl;
+       uint8_t                 dlid_path_bits;
 };
 
 enum ibv_access_flags {
@@ -84,23 +193,215 @@ enum ibv_access_flags {
 };
 
 struct ibv_pd {
-
+       struct ibv_context     *context;
+       uint32_t                handle;
 };
 
 struct ibv_mr {
+       struct ibv_context     *context;
+       struct ibv_pd          *pd;
+       uint32_t                handle;
+       uint32_t                lkey;
+       uint32_t                rkey;
+};
 
+struct ibv_global_route {
+       union ibv_gid           dgid;
+       uint32_t                flow_label;
+       uint8_t                 sgid_index;
+       uint8_t                 hop_limit;
+       uint8_t                 traffic_class;
 };
 
-struct ibv_qp {
+struct ibv_ah_attr {
+       struct ibv_global_route grh;
+       uint16_t                dlid;
+       uint8_t                 sl;
+       uint8_t                 src_path_bits;
+       uint8_t                 static_rate;
+       uint8_t                 is_global;
+       uint8_t                 port_num;
+};
+
+enum ibv_qp_type {
+       IBV_QPT_RC = 2,
+       IBV_QPT_UC,
+       IBV_QPT_UD
+};
+
+struct ibv_qp_cap {
+       uint32_t                max_send_wr;
+       uint32_t                max_recv_wr;
+       uint32_t                max_send_sge;
+       uint32_t                max_recv_sge;
+       uint32_t                max_inline_data;
+};
+
+struct ibv_qp_init_attr {
+       void                   *qp_context;
+       struct ibv_cq          *send_cq;
+       struct ibv_cq          *recv_cq;
+       struct ibv_srq         *srq;
+       struct ibv_qp_cap       cap;
+       enum ibv_qp_type        qp_type;
+       int                     sq_sig_all;
+};
+
+enum ibv_qp_attr_mask {
+       IBV_QP_STATE                    = 1 <<  0,
+       IBV_QP_CUR_STATE                = 1 <<  1,
+       IBV_QP_EN_SQD_ASYNC_NOTIFY      = 1 <<  2,
+       IBV_QP_ACCESS_FLAGS             = 1 <<  3,
+       IBV_QP_PKEY_INDEX               = 1 <<  4,
+       IBV_QP_PORT                     = 1 <<  5,
+       IBV_QP_QKEY                     = 1 <<  6,
+       IBV_QP_AV                       = 1 <<  7,
+       IBV_QP_PATH_MTU                 = 1 <<  8,
+       IBV_QP_TIMEOUT                  = 1 <<  9,
+       IBV_QP_RETRY_CNT                = 1 << 10,
+       IBV_QP_RNR_RETRY                = 1 << 11,
+       IBV_QP_RQ_PSN                   = 1 << 12,
+       IBV_QP_MAX_QP_RD_ATOMIC         = 1 << 13,
+       IBV_QP_ALT_PATH                 = 1 << 14,
+       IBV_QP_MIN_RNR_TIMER            = 1 << 15,
+       IBV_QP_SQ_PSN                   = 1 << 16,
+       IBV_QP_MAX_DEST_RD_ATOMIC       = 1 << 17,
+       IBV_QP_PATH_MIG_STATE           = 1 << 18,
+       IBV_QP_CAP                      = 1 << 19,
+       IBV_QP_DEST_QPN                 = 1 << 20
+};
+
+enum ibv_qp_state {
+       IBV_QPS_RESET,
+       IBV_QPS_INIT,
+       IBV_QPS_RTR,
+       IBV_QPS_RTS,
+       IBV_QPS_SQD,
+       IBV_QPS_SQE,
+       IBV_QPS_ERR
+};
+
+enum ibv_mig_state {
+       IBV_MIG_MIGRATED,
+       IBV_MIG_REARM,
+       IBV_MIG_ARMED
+};
+
+struct ibv_qp_attr {
+       enum ibv_qp_state       qp_state;
+       enum ibv_qp_state       cur_qp_state;
+       enum ibv_mtu            path_mtu;
+       enum ibv_mig_state      path_mig_state;
+       uint32_t                qkey;
+       uint32_t                rq_psn;
+       uint32_t                sq_psn;
+       uint32_t                dest_qp_num;
+       int                     qp_access_flags;
+       struct ibv_qp_cap       cap;
+       struct ibv_ah_attr      ah_attr;
+       struct ibv_ah_attr      alt_ah_attr;
+       uint16_t                pkey_index;
+       uint16_t                alt_pkey_index;
+       uint8_t                 en_sqd_async_notify;
+       uint8_t                 sq_draining;
+       uint8_t                 max_rd_atomic;
+       uint8_t                 max_dest_rd_atomic;
+       uint8_t                 min_rnr_timer;
+       uint8_t                 port_num;
+       uint8_t                 timeout;
+       uint8_t                 retry_cnt;
+       uint8_t                 rnr_retry;
+       uint8_t                 alt_port_num;
+       uint8_t                 alt_timeout;
+};
+
+enum ibv_wr_opcode {
+       IBV_WR_RDMA_WRITE,
+       IBV_WR_RDMA_WRITE_WITH_IMM,
+       IBV_WR_SEND,
+       IBV_WR_SEND_WITH_IMM,
+       IBV_WR_RDMA_READ,
+       IBV_WR_ATOMIC_CMP_AND_SWP,
+       IBV_WR_ATOMIC_FETCH_AND_ADD
+};
+
+enum ibv_send_flags {
+       IBV_SEND_FENCE          = 1 << 0,
+       IBV_SEND_SIGNALED       = 1 << 1,
+       IBV_SEND_SOLICITED      = 1 << 2,
+       IBV_SEND_INLINE         = 1 << 3
+};
+
+struct ibv_sge {
+       uint64_t                addr;
+       uint32_t                length;
+       uint32_t                lkey;
+};
+
+struct ibv_send_wr {
+       struct ibv_send_wr     *next;
+       uint64_t                wr_id;
+       struct ibv_sge         *sg_list;
+       int                     num_sge;
+       enum ibv_wr_opcode      opcode;
+       enum ibv_send_flags     send_flags;
+       uint32_t                imm_data;
+       union {
+               struct {
+                       uint64_t        remote_addr;
+                       uint32_t        rkey;
+               } rdma;
+               struct {
+                       uint64_t        remote_addr;
+                       uint64_t        compare_add;
+                       uint64_t        swap;
+                       uint32_t        rkey;
+               } atomic;
+               struct {
+                       struct ibv_ah  *ah;
+                       uint32_t        remote_qpn;
+                       uint32_t        remote_qkey;
+               } ud;
+       } wr;
+};
+
+struct ibv_recv_wr {
+       struct ibv_recv_wr     *next;
+       uint64_t                wr_id;
+       struct ibv_sge         *sg_list;
+       int                     num_sge;
+};
 
+struct ibv_qp {
+       struct ibv_context     *context;
+       void                   *qp_context;
+       struct ibv_pd          *pd; 
+       struct ibv_cq          *send_cq;
+       struct ibv_cq          *recv_cq;
+       uint32_t                handle;
+       uint32_t                qp_num;
+       enum ibv_qp_state       state;
 };
 
 struct ibv_cq {
+       struct ibv_context     *context;
+       void                   *cq_context;
+       uint32_t                handle;
+       int                     cqe;
+};
 
+struct ibv_ah {
+       struct ibv_context     *context;
+       struct ibv_pd          *pd;
 };
 
-struct ibv_device_ops {
+struct ibv_device;
+struct ibv_context;
 
+struct ibv_device_ops {
+       struct ibv_context *    (*alloc_context)(struct ibv_device *device,
+                                                int num_comp, int cmd_fd);
+       void                    (*free_context)(struct ibv_context *context);
 };
 
 struct ibv_device {
@@ -110,8 +411,34 @@ struct ibv_device {
        struct ibv_device_ops      ops;
 };
 
+struct ibv_context_ops {
+       int                     (*query_port)(struct ibv_context *context, uint8_t port_num,
+                                             struct ibv_port_attr *port_attr);
+       struct ibv_pd *         (*alloc_pd)(struct ibv_context *context);
+       int                     (*dealloc_pd)(struct ibv_pd *pd);
+       struct ibv_mr *         (*reg_mr)(struct ibv_pd *pd, void *addr, size_t length,
+                                         enum ibv_access_flags access);
+       int                     (*dereg_mr)(struct ibv_mr *mr);
+       struct ibv_cq *         (*create_cq)(struct ibv_context *context, int cqe);
+       int                     (*poll_cq)(struct ibv_cq *cq, int num_entries, struct ibv_wc *wc);
+       int                     (*req_notify_cq)(struct ibv_cq *cq, int solicited);
+       void                    (*cq_event)(struct ibv_cq *cq);
+       int                     (*destroy_cq)(struct ibv_cq *cq);
+       struct ibv_qp *         (*create_qp)(struct ibv_pd *pd, struct ibv_qp_init_attr *attr);
+       int                     (*modify_qp)(struct ibv_qp *qp, struct ibv_qp_attr *attr,
+                                            enum ibv_qp_attr_mask attr_mask);
+       int                     (*destroy_qp)(struct ibv_qp *qp);
+       int                     (*post_send)(struct ibv_qp *qp, struct ibv_send_wr *wr,
+                                            struct ibv_send_wr **bad_wr);
+       int                     (*post_recv)(struct ibv_qp *qp, struct ibv_recv_wr *wr,
+                                            struct ibv_recv_wr **bad_wr);
+       struct ibv_ah *         (*create_ah)(struct ibv_pd *pd, struct ibv_ah_attr *attr);
+       int                     (*destroy_ah)(struct ibv_ah *ah);
+};
+
 struct ibv_context {
        struct ibv_device         *device;
+       struct ibv_context_ops     ops;
        int                        cmd_fd;
        int                        async_fd;
        int                        num_comp;
@@ -149,6 +476,12 @@ extern int ibv_close_device(struct ibv_context *context);
 extern int ibv_get_async_event(struct ibv_context *context,
                               struct ibv_async_event *event);
 
+/**
+ * ibv_query_port - Get port properties
+ */
+extern int ibv_query_port(struct ibv_context *context, uint8_t port_num,
+                         struct ibv_port_attr *port_attr);
+
 /**
  * ibv_alloc_pd - Allocate a protection domain
  */
@@ -170,6 +503,85 @@ extern struct ibv_mr *ibv_reg_mr(struct ibv_pd *pd, void *addr,
  */
 extern int ibv_dereg_mr(struct ibv_mr *mr);
 
+/**
+ * ibv_create_cq - Create a completion queue
+ */
+extern struct ibv_cq *ibv_create_cq(struct ibv_context *context, int cqe,
+                                   void *cq_context);
+
+/**
+ * ibv_destroy_cq - Destroy a completion queue
+ */
+extern int ibv_destroy_cq(struct ibv_cq *cq);
+
+/**
+ * ibv_get_cq_event - Read next CQ event
+ */
+extern int ibv_get_cq_event(struct ibv_context *context, int comp_num,
+                           struct ibv_cq **cq, void **cq_context);
+
+/**
+ * ibv_poll_cq - Poll a CQ for work completions
+ */
+static inline int ibv_poll_cq(struct ibv_cq *cq, int num_entries, struct ibv_wc *wc)
+{
+       return cq->context->ops.poll_cq(cq, num_entries, wc);
+}
+
+/**
+ * ibv_req_notify_cq - Request completion notification on a CQ.
+ */
+static inline int ibv_req_notify_cq(struct ibv_cq *cq, int solicited)
+{
+       return cq->context->ops.req_notify_cq(cq, solicited);
+}
+
+/**
+ * ibv_create_qp - Create a queue pair.
+ */
+extern struct ibv_qp *ibv_create_qp(struct ibv_pd *pd,
+                                   struct ibv_qp_init_attr *qp_init_attr);
+
+/**
+ * ibv_modify_qp - Modify a queue pair.
+ */
+extern int ibv_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
+                        enum ibv_qp_attr_mask attr_mask);
+
+/**
+ * ibv_destroy_qp - Destroy a queue pair.
+ */
+extern int ibv_destroy_qp(struct ibv_qp *qp);
+
+/**
+ * ibv_post_send - Post a list of work requests to a send queue.
+ */
+static inline int ibv_post_send(struct ibv_qp *qp, struct ibv_send_wr *wr,
+                               struct ibv_send_wr **bad_wr)
+{
+       return qp->context->ops.post_send(qp, wr, bad_wr);
+}
+
+/**
+ * ibv_post_recv - Post a list of work requests to a receive queue.
+ */
+static inline int ibv_post_recv(struct ibv_qp *qp, struct ibv_recv_wr *wr,
+                               struct ibv_recv_wr **bad_wr)
+{
+       return qp->context->ops.post_recv(qp, wr, bad_wr);
+}
+
+/**
+ * ibv_create_ah - Create an address handle.
+ */
+extern struct ibv_ah *ibv_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr);
+
+/**
+ * ibv_destroy_ah - Destroy an address handle.
+ */
+extern int ibv_destroy_ah(struct ibv_ah *ah);
+
 END_C_DECLS
 
 #endif /* INFINIBAND_VERBS_H */
diff --git a/src/cmd.c b/src/cmd.c
new file mode 100644 (file)
index 0000000..44e149f
--- /dev/null
+++ b/src/cmd.c
@@ -0,0 +1,298 @@
+/*
+ * Copyright (c) 2005 Topspin Communications.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <alloca.h>
+
+#include "ibverbs.h"
+
+int ibv_cmd_get_context(int num_comp, struct ibv_context *context,
+                       struct ibv_get_context *cmd, size_t cmd_size)
+{
+       struct ibv_get_context_resp *resp;
+       int i;
+
+       resp = alloca(sizeof *resp + num_comp * sizeof (int));
+       IBV_INIT_CMD_RESP(cmd, cmd_size, GET_CONTEXT, resp);
+
+       if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
+               return errno;
+
+       context->async_fd = resp->async_fd;
+       for (i = 0; i < num_comp; ++i)
+               context->cq_fd[i] = resp->cq_fd[i];
+
+       return 0;
+}
+
+int ibv_cmd_query_port(struct ibv_context *context, uint8_t port_num,
+                      struct ibv_port_attr *port_attr,
+                      struct ibv_query_port *cmd, size_t cmd_size)
+{
+       struct ibv_query_port_resp resp;
+
+       IBV_INIT_CMD_RESP(cmd, cmd_size, QUERY_PORT, &resp);
+       cmd->port_num = port_num;
+
+       if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
+               return errno;
+
+       port_attr->state           = resp.state;
+       port_attr->max_mtu         = resp.max_mtu;
+       port_attr->active_mtu      = resp.active_mtu;
+       port_attr->gid_tbl_len     = resp.gid_tbl_len;
+       port_attr->port_cap_flags  = resp.port_cap_flags;
+       port_attr->max_msg_sz      = resp.max_msg_sz;
+       port_attr->bad_pkey_cntr   = resp.bad_pkey_cntr;
+       port_attr->qkey_viol_cntr  = resp.qkey_viol_cntr;
+       port_attr->pkey_tbl_len    = resp.pkey_tbl_len;
+       port_attr->lid             = resp.lid;
+       port_attr->sm_lid          = resp.sm_lid;
+       port_attr->lmc             = resp.lmc;
+       port_attr->max_vl_num      = resp.max_vl_num;
+       port_attr->sm_sl           = resp.sm_sl;
+       port_attr->subnet_timeout  = resp.subnet_timeout;
+       port_attr->init_type_reply = resp.init_type_reply;
+       port_attr->active_width    = resp.active_width;
+       port_attr->active_speed    = resp.active_speed;
+       port_attr->phys_state      = resp.phys_state;
+
+       return 0;
+}
+
+int ibv_cmd_alloc_pd(struct ibv_context *context, struct ibv_pd *pd,
+                    struct ibv_alloc_pd *cmd, size_t cmd_size)
+{
+       struct ibv_alloc_pd_resp resp;
+
+       IBV_INIT_CMD_RESP(cmd, cmd_size, ALLOC_PD, &resp);
+
+       if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
+               return errno;
+
+       pd->handle  = resp.pd_handle;
+
+       return 0;
+}
+
+int ibv_cmd_dealloc_pd(struct ibv_pd *pd)
+{
+       struct ibv_dealloc_pd cmd;
+
+       IBV_INIT_CMD(&cmd, sizeof cmd, DEALLOC_PD);
+       cmd.pd_handle = pd->handle;
+
+       if (write(pd->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+               return errno;
+
+       return 0;
+}
+
+int ibv_cmd_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+                  uint64_t hca_va, enum ibv_access_flags access,
+                  struct ibv_mr *mr, struct ibv_reg_mr *cmd,
+                  size_t cmd_size)
+{
+       struct ibv_reg_mr_resp resp;
+
+       IBV_INIT_CMD_RESP(cmd, cmd_size, REG_MR, &resp);
+
+       cmd->start        = (uintptr_t) addr;
+       cmd->length       = length;
+       cmd->hca_va       = hca_va;
+       cmd->pd_handle    = pd->handle;
+       cmd->access_flags = access;
+
+       if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size)
+               return errno;
+
+       mr->handle  = resp.mr_handle;
+       mr->lkey    = resp.lkey;
+       mr->rkey    = resp.rkey;
+
+       return 0;
+}
+
+int ibv_cmd_dereg_mr(struct ibv_mr *mr)
+{
+       struct ibv_dereg_mr cmd;
+
+       IBV_INIT_CMD(&cmd, sizeof cmd, DEREG_MR);
+       cmd.mr_handle = mr->handle;
+
+       if (write(mr->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+               return errno;
+
+       return 0;
+}
+
+int ibv_cmd_create_cq(struct ibv_context *context, int cqe,
+                     struct ibv_cq *cq,
+                     struct ibv_create_cq *cmd, size_t cmd_size)
+{
+       struct ibv_create_cq_resp resp;
+
+       IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_CQ, &resp);
+       cmd->user_handle = (uintptr_t) cq;
+       cmd->cqe         = cqe;
+
+       if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
+               return errno;
+
+       cq->handle = resp.cq_handle;
+       cq->cqe    = resp.cqe;
+
+       return 0;
+}
+
+int ibv_cmd_destroy_cq(struct ibv_cq *cq)
+{
+       struct ibv_destroy_cq cmd;
+
+       IBV_INIT_CMD(&cmd, sizeof cmd, DESTROY_CQ);
+       cmd.cq_handle = cq->handle;
+
+       if (write(cq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+               return errno;
+
+       return 0;
+}
+
+int ibv_cmd_create_qp(struct ibv_pd *pd,
+                     struct ibv_qp *qp, struct ibv_qp_init_attr *attr,
+                     struct ibv_create_qp *cmd, size_t cmd_size)
+{
+       struct ibv_create_qp_resp resp;
+
+       IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_QP, &resp);
+       cmd->user_handle     = (uintptr_t) qp;
+       cmd->pd_handle       = pd->handle;
+       cmd->send_cq_handle  = attr->send_cq->handle;
+       cmd->recv_cq_handle  = attr->recv_cq->handle;
+       cmd->max_send_wr     = attr->cap.max_send_wr;
+       cmd->max_recv_wr     = attr->cap.max_recv_wr;
+       cmd->max_send_sge    = attr->cap.max_send_sge;
+       cmd->max_recv_sge    = attr->cap.max_recv_sge;
+       cmd->max_inline_data = attr->cap.max_inline_data;
+       cmd->sq_sig_all      = attr->sq_sig_all;
+       cmd->qp_type         = attr->qp_type;
+       cmd->is_srq          = 0;
+
+       if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size)
+               return errno;
+
+       qp->handle  = resp.qp_handle;
+       qp->qp_num  = resp.qpn;
+
+       return 0;
+}
+
+int ibv_cmd_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
+                     enum ibv_qp_attr_mask attr_mask,
+                     struct ibv_modify_qp *cmd, size_t cmd_size)
+{
+       IBV_INIT_CMD(cmd, cmd_size, MODIFY_QP);
+
+       cmd->qp_handle           = qp->handle;
+       cmd->attr_mask           = attr_mask;
+       cmd->qkey                = attr->qkey;
+       cmd->rq_psn              = attr->rq_psn;
+       cmd->sq_psn              = attr->sq_psn;
+       cmd->dest_qp_num         = attr->dest_qp_num;
+       cmd->qp_access_flags     = attr->qp_access_flags;
+       cmd->pkey_index          = attr->pkey_index;
+       cmd->alt_pkey_index      = attr->alt_pkey_index;
+       cmd->qp_state            = attr->qp_state;
+       cmd->cur_qp_state        = attr->cur_qp_state;
+       cmd->path_mtu            = attr->path_mtu;
+       cmd->path_mig_state      = attr->path_mig_state;
+       cmd->en_sqd_async_notify = attr->en_sqd_async_notify;
+       cmd->max_rd_atomic       = attr->max_rd_atomic;
+       cmd->max_dest_rd_atomic  = attr->max_dest_rd_atomic;
+       cmd->min_rnr_timer       = attr->min_rnr_timer;
+       cmd->port_num            = attr->port_num;
+       cmd->timeout             = attr->timeout;
+       cmd->retry_cnt           = attr->retry_cnt;
+       cmd->rnr_retry           = attr->rnr_retry;
+       cmd->alt_port_num        = attr->alt_port_num;
+       cmd->alt_timeout         = attr->alt_timeout;
+
+       memcpy(cmd->dest.dgid, attr->ah_attr.grh.dgid.raw, 16);
+       cmd->dest.flow_label        = attr->ah_attr.grh.flow_label;
+       cmd->dest.dlid              = attr->ah_attr.dlid;
+       cmd->dest.sgid_index        = attr->ah_attr.grh.sgid_index;
+       cmd->dest.hop_limit         = attr->ah_attr.grh.hop_limit;
+       cmd->dest.traffic_class     = attr->ah_attr.grh.traffic_class;
+       cmd->dest.sl                = attr->ah_attr.sl;
+       cmd->dest.src_path_bits     = attr->ah_attr.src_path_bits;
+       cmd->dest.static_rate       = attr->ah_attr.static_rate;
+       cmd->dest.is_global         = attr->ah_attr.is_global;
+       cmd->dest.port_num          = attr->ah_attr.port_num;
+
+       memcpy(cmd->alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16);
+       cmd->alt_dest.flow_label    = attr->alt_ah_attr.grh.flow_label;
+       cmd->alt_dest.dlid          = attr->alt_ah_attr.dlid;
+       cmd->alt_dest.sgid_index    = attr->alt_ah_attr.grh.sgid_index;
+       cmd->alt_dest.hop_limit     = attr->alt_ah_attr.grh.hop_limit;
+       cmd->alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class;
+       cmd->alt_dest.sl            = attr->alt_ah_attr.sl;
+       cmd->alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits;
+       cmd->alt_dest.static_rate   = attr->alt_ah_attr.static_rate;
+       cmd->alt_dest.is_global     = attr->alt_ah_attr.is_global;
+       cmd->alt_dest.port_num      = attr->alt_ah_attr.port_num;
+
+       if (write(qp->context->cmd_fd, cmd, cmd_size) != cmd_size)
+               return errno;
+
+       return 0;
+}
+
+int ibv_cmd_destroy_qp(struct ibv_qp *qp)
+{
+       struct ibv_destroy_qp cmd;
+
+       IBV_INIT_CMD(&cmd, sizeof cmd, DESTROY_QP);
+       cmd.qp_handle = qp->handle;
+
+       if (write(qp->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+               return errno;
+
+       return 0;
+}
index cf32602408d75c01ab2c7f47a63809734e1c1b5d..3b38b856141f5e70663e0160d9e3ba4ff3643560 100644 (file)
@@ -78,65 +78,39 @@ uint64_t ibv_get_device_guid(struct ibv_device *device)
 
 struct ibv_context *ibv_open_device(struct ibv_device *device)
 {
-       struct ibv_context *context, *tmp;
        char *devpath;
-       struct ibv_get_context context_cmd;
-       struct ibv_get_context_resp context_resp;
-       struct ibv_get_event_fds event_fds_cmd;
-       struct ibv_get_event_fds_resp *event_fds_resp;
-       int i;
-
-       context = malloc(sizeof *context);
-       if (!context)
-               return NULL;
-
-       context->device = device;
+       int cmd_fd;
+       struct ibv_context *context;
+       struct ibv_query_params      cmd;
+       struct ibv_query_params_resp resp;
 
        asprintf(&devpath, "/dev/infiniband/%s", device->dev->name);
-       context->cmd_fd = open(devpath, O_WRONLY);
-
-       if (context->cmd_fd < 0)
-               goto err;
-
-       context_cmd.command   = IB_USER_VERBS_CMD_GET_CONTEXT;
-       context_cmd.in_words  = sizeof context_cmd / 4;
-       context_cmd.out_words = sizeof context_resp / 4;
-       context_cmd.response  = (unsigned long) &context_resp;
-
-       if (write(context->cmd_fd, &context_cmd, sizeof context_cmd) != sizeof context_cmd)
-               goto err_close;
-
-       context->num_comp = context_resp.num_cq_events;
 
-       if (context->num_comp > 1) {
-               tmp = realloc(context, sizeof *context + context->num_comp * sizeof (int));
-               if (!tmp)
-                       goto err_close;
-               context = tmp;
-       }
-
-       event_fds_resp = alloca(sizeof *event_fds_resp + context->num_comp * 4);
+       /*
+        * We'll only be doing writes, but we need O_RDWR in case the
+        * provider needs to mmap() the file.
+        */
+       cmd_fd = open(devpath, O_RDWR);
+       if (cmd_fd < 0)
+               return NULL;
 
-       event_fds_cmd.command   = IB_USER_VERBS_CMD_GET_EVENT_FDS;
-       event_fds_cmd.in_words  = sizeof event_fds_cmd / 4;
-       event_fds_cmd.out_words = sizeof *event_fds_resp / 4 + context->num_comp;
-       event_fds_cmd.response  = (unsigned long) event_fds_resp;
+       IBV_INIT_CMD_RESP(&cmd, sizeof cmd, QUERY_PARAMS, &resp);
+       if (write(cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+               goto err;
 
-       if (write(context->cmd_fd, &event_fds_cmd, sizeof event_fds_cmd) !=
-           sizeof event_fds_cmd)
-               goto err_close;
+       context = device->ops.alloc_context(device, resp.num_cq_events, cmd_fd);
+       if (!context)
+               goto err;
 
-       context->async_fd = event_fds_resp->async_fd;
-       for (i = 0; i < context->num_comp; ++i)
-               context->cq_fd[i] = event_fds_resp->cq_fd[i];
+       context->device   = device;
+       context->cmd_fd   = cmd_fd;
+       context->num_comp = resp.num_cq_events;
 
        return context;
 
-err_close:
-       close(context->cmd_fd);
-
 err:
-       free(context);
+       close(cmd_fd);
+
        return NULL;
 }
 
@@ -149,7 +123,7 @@ int ibv_close_device(struct ibv_context *context)
                close(context->cq_fd[i]);
        close(context->cmd_fd);
 
-       free(context);
+       context->device->ops.free_context(context);
 
        return 0;
 }
@@ -159,9 +133,7 @@ int ibv_get_async_event(struct ibv_context *context,
 {
        struct ibv_kern_async_event ev;
 
-       int ret = read(context->async_fd, &ev, sizeof ev);
-
-       if (ret != sizeof ev)
+       if (read(context->async_fd, &ev, sizeof ev) != sizeof ev)
                return -1;
 
        /* XXX convert CQ/QP handles back to pointers */
index 678dec980c38931af8aea565407f619a95c4c7e4..0d0fec146069311ef6f268475b4b6b44640440f5 100644 (file)
 
 #include <pthread.h>
 
-#include <infiniband/verbs.h>
 #include <infiniband/driver.h>
 
-#include "kern_abi.h"
-
 #define HIDDEN         __attribute__((visibility ("hidden")))
 
 #define INIT           __attribute__((constructor))
@@ -59,4 +56,19 @@ extern int ibv_init_mem_map(void);
 extern int ibv_lock_range(void *base, size_t size);
 extern int ibv_unlock_range(void *base, size_t size);
 
+#define IBV_INIT_CMD(cmd, size, opcode)                                \
+       do {                                                    \
+               (cmd)->command   = IB_USER_VERBS_CMD_##opcode;  \
+               (cmd)->in_words  = (size) / 4;                  \
+               (cmd)->out_words = 0;                           \
+       } while (0)
+
+#define IBV_INIT_CMD_RESP(cmd, size, opcode, out)              \
+       do {                                                    \
+               (cmd)->command   = IB_USER_VERBS_CMD_##opcode;  \
+               (cmd)->in_words  = (size) / 4;                  \
+               (cmd)->out_words = sizeof (*(out)) / 4;         \
+               (cmd)->response  = (uintptr_t) (out);           \
+       } while (0)
+
 #endif /* IB_VERBS_H */
index 1bcd570dc2a6c20c21687682ebb830f862ce5598..87f9557fa1fd023314373db3d16a92202d086f8f 100644 (file)
@@ -74,7 +74,8 @@ static void load_driver(char *so_path)
        driver = malloc(sizeof *driver);
        if (!driver) {
                fprintf(stderr, PFX "Fatal: couldn't allocate driver for %s\n", so_path);
-               abort();
+               dlclose(dlhandle);
+               return;
        }
 
        driver->init_func = init_func;
@@ -148,7 +149,7 @@ static void init_drivers(struct sysfs_class_device *verbs_dev)
        fprintf(stderr, PFX "Warning: no driver for %s\n", verbs_dev->name);
 }
 
-static void check_abi_version(void)
+static int check_abi_version(void)
 {
        char path[256];
        char val[16];
@@ -156,14 +157,14 @@ static void check_abi_version(void)
 
        if (sysfs_get_mnt_path(path, sizeof path)) {
                fprintf(stderr, PFX "Fatal: couldn't find sysfs mount.\n");
-               abort();
+               return -1;
        }
 
        strncat(path, "/class/infiniband_verbs/abi_version", sizeof path);
 
        if (sysfs_read_attribute_value(path, val, sizeof val)) {
                fprintf(stderr, PFX "Fatal: couldn't read uverbs ABI version.\n");
-               abort();
+               return -1;
        }
 
        ver = strtol(val, NULL, 10);
@@ -172,8 +173,10 @@ static void check_abi_version(void)
                fprintf(stderr, PFX "Fatal: kernel ABI version %d "
                        "doesn't match library version %d.\n",
                        ver, IB_USER_VERBS_ABI_VERSION);
-               abort();
+               return -1;
        }
+
+       return 0;
 }
 
 
@@ -185,15 +188,15 @@ static void INIT ibverbs_init(void)
        Dlist *verbs_dev_list;
        struct sysfs_class_device *verbs_dev;
 
-       check_abi_version();
-
-       if (ibv_init_mem_map())
-               abort();
-
        driver_list = dlist_new(sizeof (struct ibv_driver));
        device_list = dlist_new(sizeof (struct ibv_device));
-       if (!driver_list || !device_list)
+       if (!driver_list || !device_list) {
+               fprintf(stderr, PFX "Fatal: couldn't allocate device/driver list.\n");
                abort();
+       }
+
+       if (ibv_init_mem_map())
+               return;
 
        user_path = getenv(OPENIB_DRIVER_PATH_ENV);
        if (user_path) {
@@ -207,13 +210,16 @@ static void INIT ibverbs_init(void)
        cls = sysfs_open_class("infiniband_verbs");
        if (!cls) {
                fprintf(stderr, PFX "Fatal: couldn't open infiniband sysfs class.\n");
-               abort();
+               return;
        }
 
+       if (check_abi_version())
+               return;
+
        verbs_dev_list = sysfs_get_class_devices(cls);
        if (!verbs_dev_list) {
                fprintf(stderr, PFX "Fatal: no infiniband class devices found.\n");
-               abort();
+               return;
        }
 
        dlist_for_each_data(verbs_dev_list, verbs_dev, struct sysfs_class_device)
index 941b2bf34e5f33e42765890e119b3c5e5b575ea2..b0690eb81c82b4bb368e9011cd363cecee6de66d 100644 (file)
@@ -6,9 +6,29 @@ IBVERBS_1.0 {
                ibv_open_device;
                ibv_close_device;
                ibv_get_async_event;
+               ibv_query_port;
                ibv_alloc_pd;
                ibv_dealloc_pd;
                ibv_reg_mr;
                ibv_dereg_mr;
+               ibv_create_cq;
+               ibv_destroy_cq;
+               ibv_get_cq_event;
+               ibv_create_qp;
+               ibv_modify_qp;
+               ibv_destroy_qp;
+               ibv_create_ah;
+               ibv_destroy_ah;
+               ibv_cmd_get_context;
+               ibv_cmd_query_port;
+               ibv_cmd_alloc_pd;
+               ibv_cmd_dealloc_pd;
+               ibv_cmd_reg_mr;
+               ibv_cmd_dereg_mr;
+               ibv_cmd_create_cq;
+               ibv_cmd_destroy_cq;
+               ibv_cmd_create_qp;
+               ibv_cmd_modify_qp;
+               ibv_cmd_destroy_qp;
        local: *;
 };
diff --git a/src/verbs.c b/src/verbs.c
new file mode 100644 (file)
index 0000000..087de30
--- /dev/null
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2005 Topspin Communications.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "ibverbs.h"
+
+int ibv_query_port(struct ibv_context *context, uint8_t port_num,
+                  struct ibv_port_attr *port_attr)
+{
+       return context->ops.query_port(context, port_num, port_attr);
+}
+
+struct ibv_pd *ibv_alloc_pd(struct ibv_context *context)
+{
+       struct ibv_pd *pd;
+
+       pd = context->ops.alloc_pd(context);
+       if (pd)
+               pd->context = context;
+
+       return pd;
+}
+
+int ibv_dealloc_pd(struct ibv_pd *pd)
+{
+       return pd->context->ops.dealloc_pd(pd);
+}
+
+struct ibv_mr *ibv_reg_mr(struct ibv_pd *pd, void *addr,
+                         size_t length, enum ibv_access_flags access)
+{
+       struct ibv_mr *mr;
+
+       mr = pd->context->ops.reg_mr(pd, addr, length, access);
+       if (mr) {
+               mr->context = pd->context;
+               mr->pd      = pd;
+       }
+
+       return mr;
+}
+
+int ibv_dereg_mr(struct ibv_mr *mr)
+{
+       return mr->context->ops.dereg_mr(mr);
+}
+
+struct ibv_cq *ibv_create_cq(struct ibv_context *context, int cqe,
+                            void *cq_context)
+{
+       struct ibv_cq *cq = context->ops.create_cq(context, cqe);
+
+       if (cq) {
+               cq->context    = context;
+               cq->cq_context = cq_context;
+       }
+
+       return cq;
+}
+
+int ibv_destroy_cq(struct ibv_cq *cq)
+{
+       return cq->context->ops.destroy_cq(cq);
+}
+
+
+int ibv_get_cq_event(struct ibv_context *context, int comp_num,
+                    struct ibv_cq **cq, void **cq_context)
+{
+       struct ibv_comp_event ev;
+
+       if (comp_num < 0 || comp_num >= context->num_comp)
+               return -1;
+
+       if (read(context->cq_fd[comp_num], &ev, sizeof ev) != sizeof ev)
+               return -1;
+
+       *cq         = (struct ibv_cq *) (uintptr_t) ev.cq_handle;
+       *cq_context = (*cq)->cq_context;
+
+       if ((*cq)->context->ops.cq_event)
+               (*cq)->context->ops.cq_event(*cq);
+
+       return 0;
+}
+
+struct ibv_qp *ibv_create_qp(struct ibv_pd *pd,
+                            struct ibv_qp_init_attr *qp_init_attr)
+{
+       struct ibv_qp *qp = pd->context->ops.create_qp(pd, qp_init_attr);
+
+       if (qp) {
+               qp->context    = pd->context;
+               qp->qp_context = qp_init_attr->qp_context;
+               qp->pd         = pd;
+               qp->send_cq    = qp_init_attr->send_cq;
+               qp->recv_cq    = qp_init_attr->recv_cq;
+       }
+
+       return qp;
+}
+int ibv_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
+                 enum ibv_qp_attr_mask attr_mask)
+{
+       int ret;
+
+       ret = qp->context->ops.modify_qp(qp, attr, attr_mask);
+       if (ret)
+               return ret;
+
+       if (attr_mask & IBV_QP_STATE)
+               qp->state = attr->qp_state;
+
+       return 0;
+}
+
+int ibv_destroy_qp(struct ibv_qp *qp)
+{
+       return qp->context->ops.destroy_qp(qp);
+}
+
+struct ibv_ah *ibv_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr)
+{
+       struct ibv_ah *ah = pd->context->ops.create_ah(pd, attr);
+
+       if (ah) {
+               ah->context = pd->context;
+               ah->pd      = pd;
+       }
+
+       return ah;
+}
+
+int ibv_destroy_ah(struct ibv_ah *ah)
+{
+       return ah->context->ops.destroy_ah(ah);
+}