From 928c12186176b0c894e9a2a9eb830627a485cf19 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 4 Jun 2012 14:51:41 -0700 Subject: [PATCH] rsocket: Spin before blocking on an rsocket The latency cost of blocking is significant compared to round trip ping-pong time. Spin briefly on rsockets before calling into the kernel and blocking. The time to spin before blocking is read from an rsocket configuration file rdma/rsocket/polling_time. This is user adjustable or may be set automatically by ibacm. As a completely unintentional side effect, this just happens to improve application performance in benchmarks, like netpipe, significantly. ;) Signed-off-by: Sean Hefty --- Makefile.am | 2 +- configure.in | 5 ++++ src/cma.h | 9 ++++++ src/rsocket.c | 76 +++++++++++++++++++++++++++++++++++++++++++++------ 4 files changed, 83 insertions(+), 9 deletions(-) diff --git a/Makefile.am b/Makefile.am index cbd874dc..1dc61e15 100644 --- a/Makefile.am +++ b/Makefile.am @@ -3,7 +3,7 @@ INCLUDES = -I$(srcdir)/include lib_LTLIBRARIES = src/librdmacm.la ACLOCAL_AMFLAGS = -I config -AM_CFLAGS = -g -Wall -D_GNU_SOURCE +AM_CFLAGS = -g -Wall -D_GNU_SOURCE -DSYSCONFDIR=\"$(sysconfdir)\" -DRDMADIR=\"@rdmadir@\" src_librdmacm_la_CFLAGS = $(AM_CFLAGS) diff --git a/configure.in b/configure.in index dec6064c..3ee7f9be 100644 --- a/configure.in +++ b/configure.in @@ -87,5 +87,10 @@ AC_CACHE_CHECK(whether ld accepts --version-script, ac_cv_version_script, AM_CONDITIONAL(HAVE_LD_VERSION_SCRIPT, test "$ac_cv_version_script" = "yes") +AC_ARG_VAR(rdmadir, [Directory for configuration files]) +if test "x$rdmadir" = "x"; then + AC_SUBST(rdmadir, rdma) +fi + AC_CONFIG_FILES([Makefile librdmacm.spec]) AC_OUTPUT diff --git a/src/cma.h b/src/cma.h index 2ee47675..cedc0c36 100644 --- a/src/cma.h +++ b/src/cma.h @@ -166,4 +166,13 @@ struct ibv_path_data }; #endif +#ifndef SYSCONFDIR +#define SYSCONFDIR "/etc" +#endif +#ifndef RDMADIR +#define RDMADIR "rdma" +#endif +#define RDMA_CONF_DIR SYSCONFDIR "/" RDMADIR +#define RS_CONF_DIR RDMA_CONF_DIR "/rsocket" + #endif /* CMA_H */ diff --git a/src/rsocket.c b/src/rsocket.c index e899e8ae..f53f1c8c 100644 --- a/src/rsocket.c +++ b/src/rsocket.c @@ -37,10 +37,12 @@ #include #include +#include #include #include #include #include +#include #include #include #include @@ -64,6 +66,8 @@ static struct index_map idm; static pthread_mutex_t mut = PTHREAD_MUTEX_INITIALIZER; +static uint32_t polling_time = 10; + /* * Immediate data format is determined by the upper bits * bit 31: message type, 0 - data, 1 - control @@ -197,6 +201,27 @@ struct rsocket { uint8_t *sbuf; }; +void rs_configure(void) +{ + FILE *f; + static int init; + + if (init) + return; + + pthread_mutex_lock(&mut); + if (init) + goto out; + + if ((f = fopen(RS_CONF_DIR "/polling_time", "r"))) { + fscanf(f, "%u", &polling_time); + fclose(f); + } + init = 1; +out: + pthread_mutex_unlock(&mut); +} + /* * We currently generate a completion per send. sqe_count = 1 */ @@ -471,6 +496,7 @@ int rsocket(int domain, int type, int protocol) (type != SOCK_STREAM) || (protocol && protocol != IPPROTO_TCP)) return ERR(ENOTSUP); + rs_configure(); rs = rs_alloc(NULL); if (!rs) return ERR(ENOMEM); @@ -924,6 +950,29 @@ static int rs_process_cq(struct rsocket *rs, int nonblock, int (*test)(struct rs return ret; } +static int rs_get_comp(struct rsocket *rs, int nonblock, int (*test)(struct rsocket *rs)) +{ + struct timeval s, e; + uint32_t poll_time = 0; + int ret; + + do { + ret = rs_process_cq(rs, 1, test); + if (!ret || nonblock || errno != EWOULDBLOCK) + return ret; + + if (!poll_time) + gettimeofday(&s, NULL); + + gettimeofday(&e, NULL); + poll_time = (e.tv_sec - s.tv_sec) * 1000000 + + (e.tv_usec - s.tv_usec) + 1; + } while (poll_time <= polling_time); + + ret = rs_process_cq(rs, 0, test); + return ret; +} + static int rs_nonblocking(struct rsocket *rs, int flags) { return (rs->fd_flags & O_NONBLOCK) || (flags & MSG_DONTWAIT); @@ -1035,7 +1084,7 @@ ssize_t rrecv(int socket, void *buf, size_t len, int flags) } fastlock_acquire(&rs->rlock); if (!rs_have_rdata(rs)) { - ret = rs_process_cq(rs, rs_nonblocking(rs, flags), rs_conn_have_rdata); + ret = rs_get_comp(rs, rs_nonblocking(rs, flags), rs_conn_have_rdata); if (ret && errno != ECONNRESET) goto out; } @@ -1139,8 +1188,8 @@ ssize_t rsend(int socket, const void *buf, size_t len, int flags) fastlock_acquire(&rs->slock); for (left = len; left; left -= xfer_size, buf += xfer_size) { if (!rs_can_send(rs)) { - ret = rs_process_cq(rs, rs_nonblocking(rs, flags), - rs_conn_can_send); + ret = rs_get_comp(rs, rs_nonblocking(rs, flags), + rs_conn_can_send); if (ret) break; if (rs->state != rs_connected) { @@ -1253,8 +1302,8 @@ static ssize_t rsendv(int socket, const struct iovec *iov, int iovcnt, int flags fastlock_acquire(&rs->slock); for (left = len; left; left -= xfer_size) { if (!rs_can_send(rs)) { - ret = rs_process_cq(rs, rs_nonblocking(rs, flags), - rs_conn_can_send); + ret = rs_get_comp(rs, rs_nonblocking(rs, flags), + rs_conn_can_send); if (ret) break; if (rs->state != rs_connected) { @@ -1469,12 +1518,23 @@ static int rs_poll_events(struct pollfd *rfds, struct pollfd *fds, nfds_t nfds) */ int rpoll(struct pollfd *fds, nfds_t nfds, int timeout) { + struct timeval s, e; struct pollfd *rfds; + uint32_t poll_time = 0; int ret; - ret = rs_poll_check(fds, nfds); - if (ret || !timeout) - return ret; + do { + ret = rs_poll_check(fds, nfds); + if (ret || !timeout) + return ret; + + if (!poll_time) + gettimeofday(&s, NULL); + + gettimeofday(&e, NULL); + poll_time = (e.tv_sec - s.tv_sec) * 1000000 + + (e.tv_usec - s.tv_usec) + 1; + } while (poll_time <= polling_time); rfds = rs_fds_alloc(nfds); if (!rfds) -- 2.45.2