]> git.openfabrics.org - ~shefty/librdmacm.git/commitdiff
rsocket: Spin before blocking on an rsocket
authorSean Hefty <sean.hefty@intel.com>
Mon, 4 Jun 2012 21:51:41 +0000 (14:51 -0700)
committerSean Hefty <sean.hefty@intel.com>
Fri, 8 Jun 2012 21:45:09 +0000 (14:45 -0700)
The latency cost of blocking is significant compared to round
trip ping-pong time.  Spin briefly on rsockets before calling
into the kernel and blocking.

The time to spin before blocking is read from an rsocket
configuration file %sysconfig%/rdma/rsocket/polling_time.  This
is user adjustable.

As a completely unintentional side effect, this just happens to
improve application performance in benchmarks, like netpipe,
significantly. ;)

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Makefile.am
configure.in
src/cma.h
src/rsocket.c

index cbd874dcbd76a92ffb4bb57add06550ac67dfc11..1dc61e152f190d46e004039bc7c50c05a4a46035 100644 (file)
@@ -3,7 +3,7 @@ INCLUDES = -I$(srcdir)/include
 lib_LTLIBRARIES = src/librdmacm.la
 
 ACLOCAL_AMFLAGS = -I config
-AM_CFLAGS = -g -Wall -D_GNU_SOURCE
+AM_CFLAGS = -g -Wall -D_GNU_SOURCE -DSYSCONFDIR=\"$(sysconfdir)\" -DRDMADIR=\"@rdmadir@\"
 
 src_librdmacm_la_CFLAGS = $(AM_CFLAGS)
 
index dec6064cc3e1351080b802638fc8bd15daf709d9..3ee7f9bed60767c65a75bc4bd77be7592126e6c1 100644 (file)
@@ -87,5 +87,10 @@ AC_CACHE_CHECK(whether ld accepts --version-script, ac_cv_version_script,
 
 AM_CONDITIONAL(HAVE_LD_VERSION_SCRIPT, test "$ac_cv_version_script" = "yes")
 
+AC_ARG_VAR(rdmadir, [Directory for configuration files])
+if test "x$rdmadir" = "x"; then
+   AC_SUBST(rdmadir, rdma)
+fi
+
 AC_CONFIG_FILES([Makefile librdmacm.spec])
 AC_OUTPUT
index 2ee47675d83cb5e1a00531c5170c3a1cf24f30bc..cedc0c362f5fa68c425ac49add1e911d806a55d8 100644 (file)
--- a/src/cma.h
+++ b/src/cma.h
@@ -166,4 +166,13 @@ struct ibv_path_data
 };
 #endif
 
+#ifndef SYSCONFDIR
+#define SYSCONFDIR "/etc"
+#endif
+#ifndef RDMADIR
+#define RDMADIR "rdma"
+#endif
+#define RDMA_CONF_DIR  SYSCONFDIR "/" RDMADIR
+#define RS_CONF_DIR RDMA_CONF_DIR "/rsocket"
+
 #endif /* CMA_H */
index e906fd492f24d037f6a1a5d49efdf4936af3f9cf..7692498611a6db0433c5ac076a05e9b295f09030 100644 (file)
 
 #include <sys/types.h>
 #include <sys/socket.h>
+#include <sys/time.h>
 #include <stdarg.h>
 #include <netdb.h>
 #include <unistd.h>
 #include <fcntl.h>
+#include <stdio.h>
 #include <string.h>
 #include <netinet/in.h>
 #include <netinet/tcp.h>
@@ -64,6 +66,8 @@
 static struct index_map idm;
 static pthread_mutex_t mut = PTHREAD_MUTEX_INITIALIZER;
 
+static uint32_t polling_time = 10;
+
 /*
  * Immediate data format is determined by the upper bits
  * bit 31: message type, 0 - data, 1 - control
@@ -196,6 +200,27 @@ struct rsocket {
        uint8_t           *sbuf;
 };
 
+void rs_configure(void)
+{
+       FILE *f;
+       static int init;
+
+       if (init)
+               return;
+
+       pthread_mutex_lock(&mut);
+       if (init)
+               goto out;
+
+       if ((f = fopen(RS_CONF_DIR "/polling_time", "r"))) {
+               fscanf(f, "%u", &polling_time);
+               fclose(f);
+       }
+       init = 1;
+out:
+       pthread_mutex_unlock(&mut);
+}
+
 /*
  * We currently generate a completion per send.  sqe_count = 1
  */
@@ -470,6 +495,7 @@ int rsocket(int domain, int type, int protocol)
            (type != SOCK_STREAM) || (protocol && protocol != IPPROTO_TCP))
                return ERR(ENOTSUP);
 
+       rs_configure();
        rs = rs_alloc(NULL);
        if (!rs)
                return ERR(ENOMEM);
@@ -923,6 +949,29 @@ static int rs_process_cq(struct rsocket *rs, int nonblock, int (*test)(struct rs
        return ret;
 }
 
+static int rs_get_comp(struct rsocket *rs, int nonblock, int (*test)(struct rsocket *rs))
+{
+       struct timeval s, e;
+       uint32_t poll_time = 0;
+       int ret;
+
+       do {
+               ret = rs_process_cq(rs, 1, test);
+               if (!ret || nonblock || errno != EWOULDBLOCK)
+                       return ret;
+
+               if (!poll_time)
+                       gettimeofday(&s, NULL);
+
+               gettimeofday(&e, NULL);
+               poll_time = (e.tv_sec - s.tv_sec) * 1000000 +
+                           (e.tv_usec - s.tv_usec) + 1;
+       } while (poll_time <= polling_time);
+
+       ret = rs_process_cq(rs, 0, test);
+       return ret;
+}
+
 static int rs_nonblocking(struct rsocket *rs, int flags)
 {
        return (rs->fd_flags & O_NONBLOCK) || (flags & MSG_DONTWAIT);
@@ -1034,7 +1083,7 @@ ssize_t rrecv(int socket, void *buf, size_t len, int flags)
        }
        fastlock_acquire(&rs->rlock);
        if (!rs_have_rdata(rs)) {
-               ret = rs_process_cq(rs, rs_nonblocking(rs, flags), rs_conn_have_rdata);
+               ret = rs_get_comp(rs, rs_nonblocking(rs, flags), rs_conn_have_rdata);
                if (ret && errno != ECONNRESET)
                        goto out;
        }
@@ -1138,8 +1187,8 @@ ssize_t rsend(int socket, const void *buf, size_t len, int flags)
        fastlock_acquire(&rs->slock);
        for (left = len; left; left -= xfer_size, buf += xfer_size) {
                if (!rs_can_send(rs)) {
-                       ret = rs_process_cq(rs, rs_nonblocking(rs, flags),
-                                           rs_conn_can_send);
+                       ret = rs_get_comp(rs, rs_nonblocking(rs, flags),
+                                         rs_conn_can_send);
                        if (ret)
                                break;
                        if (rs->state != rs_connected) {
@@ -1252,8 +1301,8 @@ static ssize_t rsendv(int socket, const struct iovec *iov, int iovcnt, int flags
        fastlock_acquire(&rs->slock);
        for (left = len; left; left -= xfer_size) {
                if (!rs_can_send(rs)) {
-                       ret = rs_process_cq(rs, rs_nonblocking(rs, flags),
-                                           rs_conn_can_send);
+                       ret = rs_get_comp(rs, rs_nonblocking(rs, flags),
+                                         rs_conn_can_send);
                        if (ret)
                                break;
                        if (rs->state != rs_connected) {
@@ -1468,12 +1517,23 @@ static int rs_poll_events(struct pollfd *rfds, struct pollfd *fds, nfds_t nfds)
  */
 int rpoll(struct pollfd *fds, nfds_t nfds, int timeout)
 {
+       struct timeval s, e;
        struct pollfd *rfds;
+       uint32_t poll_time = 0;
        int ret;
 
-       ret = rs_poll_check(fds, nfds);
-       if (ret || !timeout)
-               return ret;
+       do {
+               ret = rs_poll_check(fds, nfds);
+               if (ret || !timeout)
+                       return ret;
+
+               if (!poll_time)
+                       gettimeofday(&s, NULL);
+
+               gettimeofday(&e, NULL);
+               poll_time = (e.tv_sec - s.tv_sec) * 1000000 +
+                           (e.tv_usec - s.tv_usec) + 1;
+       } while (poll_time <= polling_time);
 
        rfds = rs_fds_alloc(nfds);
        if (!rfds)