From: Sean Hefty Date: Wed, 6 Jun 2012 21:41:35 +0000 (-0700) Subject: commit X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=1e8c6d40178047218190280d07e55cdedde56fc6;p=~shefty%2Flibrdmacm.git commit --- diff --git a/meta b/meta index 03dcd39c..b15b30f1 100644 --- a/meta +++ b/meta @@ -1,11 +1,7 @@ Version: 1 -Previous: e335d1d5989b4b030974f44d24201e07f3ad00e6 +Previous: 7e9ef43398a578b8db94c9e01653dd3fcb2ee5bb Head: d1067d98830663fc9a95f6c9a502ac62d8e995a0 Applied: - rs-shutdown: 110fcad2ecce39d6888448c19b08791ea193e7f9 - rs-maxseg: b99bd0638e7d22b1b0976a34a58efd352db420b3 - rs-spin: 9fb978ede0b7b548b7954f01097af337b938020c - rs-defaults: d1067d98830663fc9a95f6c9a502ac62d8e995a0 Unapplied: ip6-opt: 7e43a759255e9890d1e41f1edf71792836f53941 rs-1sge: 85e4c4a0da0b501b60a1035d7a003ee20a749511 diff --git a/patches/rs-defaults b/patches/rs-defaults deleted file mode 100644 index 6300c800..00000000 --- a/patches/rs-defaults +++ /dev/null @@ -1,109 +0,0 @@ -Bottom: 9a355c4fd6adc489cc0c3c07b3d436d25ff2cb0c -Top: 12cda031f6e4027c6df650d4c84ed637ba93cdf8 -Author: Sean Hefty -Date: 2012-06-05 15:28:18 -0700 - -rsocket: Use configuration files to specify default settings - -Give an administrator control over the default settings -used by rsockets. Use files under %sysconfig%/rdma/rsocket as shown: - -mem_default - default size of receive buffer(s) -wmem_default - default size of send buffer(s) -sqsize_default - default size of send queue -rqsize_default - default size of receive queue -inline_default - default size of inline data - -If configuration files are not available, rsockets will continue to -use internal defaults. - -Signed-off-by: Sean Hefty - - ---- - -diff --git a/src/rsocket.c b/src/rsocket.c -index d7344c7..5e0e413 100644 ---- a/src/rsocket.c -+++ b/src/rsocket.c -@@ -53,19 +53,20 @@ - #include "cma.h" - #include "indexer.h" - --#define RS_INLINE 64 - #define RS_OLAP_START_SIZE 2048 - #define RS_MAX_TRANSFER 65536 --#define RS_QP_SIZE 384 - #define RS_QP_MAX_SIZE 0xFFFE --#define RS_QP_MIN_SIZE 8 - #define RS_QP_CTRL_SIZE 4 - #define RS_CONN_RETRIES 6 - #define RS_SGL_SIZE 2 --#define RS_BUF_SIZE (1 << 17) - static struct index_map idm; - static pthread_mutex_t mut = PTHREAD_MUTEX_INITIALIZER; - -+static uint16_t def_inline = 64; -+static uint16_t def_sqsize = 384; -+static uint16_t def_rqsize = 384; -+static uint32_t def_mem = (1 << 17); -+static uint32_t def_wmem = (1 << 17); - static uint32_t polling_time = 10; - - /* -@@ -216,6 +217,40 @@ void rs_configure(void) - fscanf(f, "%u", &polling_time); - fclose(f); - } -+ -+ if ((f = fopen(RS_CONF_DIR "/inline_default", "r"))) { -+ fscanf(f, "%hu", &def_inline); -+ fclose(f); -+ -+ if (def_inline < RS_MIN_INLINE) -+ def_inline = RS_MIN_INLINE; -+ } -+ -+ if ((f = fopen(RS_CONF_DIR "/sqsize_default", "r"))) { -+ fscanf(f, "%hu", &def_sqsize); -+ fclose(f); -+ } -+ -+ if ((f = fopen(RS_CONF_DIR "/rqsize_default", "r"))) { -+ fscanf(f, "%hu", &def_rqsize); -+ fclose(f); -+ } -+ -+ if ((f = fopen(RS_CONF_DIR "/mem_default", "r"))) { -+ fscanf(f, "%u", &def_mem); -+ fclose(f); -+ -+ if (def_mem < 1) -+ def_mem = 1; -+ } -+ -+ if ((f = fopen(RS_CONF_DIR "/wmem_default", "r"))) { -+ fscanf(f, "%u", &def_wmem); -+ fclose(f); -+ -+ if (def_wmem < 1) -+ def_wmem = 1; -+ } - init = 1; - out: - pthread_mutex_unlock(&mut); -@@ -264,9 +299,11 @@ static struct rsocket *rs_alloc(struct rsocket *inherited_rs) - rs->rq_size = inherited_rs->rq_size; - rs->ctrl_avail = inherited_rs->ctrl_avail; - } else { -- rs->sbuf_size = rs->rbuf_size = RS_BUF_SIZE; -- rs->sq_inline = RS_INLINE; -- rs->sq_size = rs->rq_size = RS_QP_SIZE; -+ rs->sbuf_size = def_wmem; -+ rs->rbuf_size = def_mem; -+ rs->sq_inline = def_inline; -+ rs->sq_size = def_sqsize; -+ rs->rq_size = def_rqsize; - rs->ctrl_avail = RS_QP_CTRL_SIZE; - } - fastlock_init(&rs->slock); diff --git a/patches/rs-maxseg b/patches/rs-maxseg deleted file mode 100644 index be2a33b5..00000000 --- a/patches/rs-maxseg +++ /dev/null @@ -1,42 +0,0 @@ -Bottom: 5e0be3cec4b1092b15ffff22ce847641d572b180 -Top: e8942df497cda9d853e5bbaa69226ad2ede0b3b9 -Author: Sean Hefty -Date: 2012-06-04 13:22:10 -0700 - -rsocket: Handle TCP_MAXSEG socket option - -netperf uses the TCP_MAXSEG socket option. Add support for it. -Problem reported by Sridhar Samudrala - -Signed-off-by: Sean Hefty - - ---- - -diff --git a/src/rsocket.c b/src/rsocket.c -index 8f20b4a..2fd106a 100644 ---- a/src/rsocket.c -+++ b/src/rsocket.c -@@ -1716,6 +1716,9 @@ int rsetsockopt(int socket, int level, int optname, - opt_on = *(int *) optval; - ret = 0; - break; -+ case TCP_MAXSEG: -+ ret = 0; -+ break; - default: - break; - } -@@ -1802,6 +1805,12 @@ int rgetsockopt(int socket, int level, int optname, - *((int *) optval) = !!(rs->tcp_opts & (1 << optname)); - *optlen = sizeof(int); - break; -+ case TCP_MAXSEG: -+ *((int *) optval) = (rs->cm_id && rs->cm_id->route.num_paths) ? -+ 1 << (7 + rs->cm_id->route.path_rec->mtu) : -+ 2048; -+ *optlen = sizeof(int); -+ break; - default: - ret = ENOTSUP; - break; diff --git a/patches/rs-shutdown b/patches/rs-shutdown deleted file mode 100644 index 4ecc4001..00000000 --- a/patches/rs-shutdown +++ /dev/null @@ -1,38 +0,0 @@ -Bottom: d79d5464622c07a1ee7c42051fee5fca02c52207 -Top: 5e0be3cec4b1092b15ffff22ce847641d572b180 -Author: Sean Hefty -Date: 2012-06-04 13:14:42 -0700 - -rsocket: Handle SHUT_RD/WR shutdown flags - -Sridhar Samudrala reported an error (EOPNOTSUPP) -after calling select(). - -The issue is that rshutdown(SHUT_WR) was called before select(). -As part of shutdown, rsockets switches the underlying fd from -nonblocking to blocking to ensure that previously sent data has -completed. shutdown(SHUT_WR) indicates that the socket should be -kept open for receiving data. - -Delay handling the actual shutdown unless SHUT_RDWR is specified, -or the socket is closed. - -Signed-off-by: Sean Hefty - - ---- - -diff --git a/src/rsocket.c b/src/rsocket.c -index c111797..8f20b4a 100644 ---- a/src/rsocket.c -+++ b/src/rsocket.c -@@ -1592,6 +1592,9 @@ int rshutdown(int socket, int how) - struct rsocket *rs; - int ret = 0; - -+ if (how != SHUT_RDWR) -+ return 0; -+ - rs = idm_at(&idm, socket); - if (rs->fd_flags & O_NONBLOCK) - rs_set_nonblocking(rs, 0); diff --git a/patches/rs-spin b/patches/rs-spin deleted file mode 100644 index 7b4a4029..00000000 --- a/patches/rs-spin +++ /dev/null @@ -1,220 +0,0 @@ -Bottom: e8942df497cda9d853e5bbaa69226ad2ede0b3b9 -Top: 9a355c4fd6adc489cc0c3c07b3d436d25ff2cb0c -Author: Sean Hefty -Date: 2012-06-04 14:51:41 -0700 - -rsocket: Spin before blocking on an rsocket - -The latency cost of blocking is significant compared to round -trip ping-pong time. Spin briefly on rsockets before calling -into the kernel and blocking. - -The time to spin before blocking is read from an rsocket -configuration file %sysconfig%/rdma/rsocket/polling_time. This -is user adjustable. - -As a completely unintentional side effect, this just happens to -improve application performance in benchmarks, like netpipe, -significantly. ;) - -Signed-off-by: Sean Hefty - - ---- - -diff --git a/Makefile.am b/Makefile.am -index cbd874d..1dc61e1 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -3,7 +3,7 @@ INCLUDES = -I$(srcdir)/include - lib_LTLIBRARIES = src/librdmacm.la - - ACLOCAL_AMFLAGS = -I config --AM_CFLAGS = -g -Wall -D_GNU_SOURCE -+AM_CFLAGS = -g -Wall -D_GNU_SOURCE -DSYSCONFDIR=\"$(sysconfdir)\" -DRDMADIR=\"@rdmadir@\" - - src_librdmacm_la_CFLAGS = $(AM_CFLAGS) - -diff --git a/configure.in b/configure.in -index dec6064..3ee7f9b 100644 ---- a/configure.in -+++ b/configure.in -@@ -87,5 +87,10 @@ AC_CACHE_CHECK(whether ld accepts --version-script, ac_cv_version_script, - - AM_CONDITIONAL(HAVE_LD_VERSION_SCRIPT, test "$ac_cv_version_script" = "yes") - -+AC_ARG_VAR(rdmadir, [Directory for configuration files]) -+if test "x$rdmadir" = "x"; then -+ AC_SUBST(rdmadir, rdma) -+fi -+ - AC_CONFIG_FILES([Makefile librdmacm.spec]) - AC_OUTPUT -diff --git a/src/cma.h b/src/cma.h -index 2ee4767..cedc0c3 100644 ---- a/src/cma.h -+++ b/src/cma.h -@@ -166,4 +166,13 @@ struct ibv_path_data - }; - #endif - -+#ifndef SYSCONFDIR -+#define SYSCONFDIR "/etc" -+#endif -+#ifndef RDMADIR -+#define RDMADIR "rdma" -+#endif -+#define RDMA_CONF_DIR SYSCONFDIR "/" RDMADIR -+#define RS_CONF_DIR RDMA_CONF_DIR "/rsocket" -+ - #endif /* CMA_H */ -diff --git a/src/rsocket.c b/src/rsocket.c -index 2fd106a..d7344c7 100644 ---- a/src/rsocket.c -+++ b/src/rsocket.c -@@ -37,10 +37,12 @@ - - #include - #include -+#include - #include - #include - #include - #include -+#include - #include - #include - #include -@@ -64,6 +66,8 @@ - static struct index_map idm; - static pthread_mutex_t mut = PTHREAD_MUTEX_INITIALIZER; - -+static uint32_t polling_time = 10; -+ - /* - * Immediate data format is determined by the upper bits - * bit 31: message type, 0 - data, 1 - control -@@ -196,6 +200,27 @@ struct rsocket { - uint8_t *sbuf; - }; - -+void rs_configure(void) -+{ -+ FILE *f; -+ static int init; -+ -+ if (init) -+ return; -+ -+ pthread_mutex_lock(&mut); -+ if (init) -+ goto out; -+ -+ if ((f = fopen(RS_CONF_DIR "/polling_time", "r"))) { -+ fscanf(f, "%u", &polling_time); -+ fclose(f); -+ } -+ init = 1; -+out: -+ pthread_mutex_unlock(&mut); -+} -+ - /* - * We currently generate a completion per send. sqe_count = 1 - */ -@@ -470,6 +495,7 @@ int rsocket(int domain, int type, int protocol) - (type != SOCK_STREAM) || (protocol && protocol != IPPROTO_TCP)) - return ERR(ENOTSUP); - -+ rs_configure(); - rs = rs_alloc(NULL); - if (!rs) - return ERR(ENOMEM); -@@ -923,6 +949,29 @@ static int rs_process_cq(struct rsocket *rs, int nonblock, int (*test)(struct rs - return ret; - } - -+static int rs_get_comp(struct rsocket *rs, int nonblock, int (*test)(struct rsocket *rs)) -+{ -+ struct timeval s, e; -+ uint32_t poll_time = 0; -+ int ret; -+ -+ do { -+ ret = rs_process_cq(rs, 1, test); -+ if (!ret || nonblock || errno != EWOULDBLOCK) -+ return ret; -+ -+ if (!poll_time) -+ gettimeofday(&s, NULL); -+ -+ gettimeofday(&e, NULL); -+ poll_time = (e.tv_sec - s.tv_sec) * 1000000 + -+ (e.tv_usec - s.tv_usec) + 1; -+ } while (poll_time <= polling_time); -+ -+ ret = rs_process_cq(rs, 0, test); -+ return ret; -+} -+ - static int rs_nonblocking(struct rsocket *rs, int flags) - { - return (rs->fd_flags & O_NONBLOCK) || (flags & MSG_DONTWAIT); -@@ -1034,7 +1083,7 @@ ssize_t rrecv(int socket, void *buf, size_t len, int flags) - } - fastlock_acquire(&rs->rlock); - if (!rs_have_rdata(rs)) { -- ret = rs_process_cq(rs, rs_nonblocking(rs, flags), rs_conn_have_rdata); -+ ret = rs_get_comp(rs, rs_nonblocking(rs, flags), rs_conn_have_rdata); - if (ret && errno != ECONNRESET) - goto out; - } -@@ -1138,8 +1187,8 @@ ssize_t rsend(int socket, const void *buf, size_t len, int flags) - fastlock_acquire(&rs->slock); - for (left = len; left; left -= xfer_size, buf += xfer_size) { - if (!rs_can_send(rs)) { -- ret = rs_process_cq(rs, rs_nonblocking(rs, flags), -- rs_conn_can_send); -+ ret = rs_get_comp(rs, rs_nonblocking(rs, flags), -+ rs_conn_can_send); - if (ret) - break; - if (rs->state != rs_connected) { -@@ -1252,8 +1301,8 @@ static ssize_t rsendv(int socket, const struct iovec *iov, int iovcnt, int flags - fastlock_acquire(&rs->slock); - for (left = len; left; left -= xfer_size) { - if (!rs_can_send(rs)) { -- ret = rs_process_cq(rs, rs_nonblocking(rs, flags), -- rs_conn_can_send); -+ ret = rs_get_comp(rs, rs_nonblocking(rs, flags), -+ rs_conn_can_send); - if (ret) - break; - if (rs->state != rs_connected) { -@@ -1468,12 +1517,23 @@ static int rs_poll_events(struct pollfd *rfds, struct pollfd *fds, nfds_t nfds) - */ - int rpoll(struct pollfd *fds, nfds_t nfds, int timeout) - { -+ struct timeval s, e; - struct pollfd *rfds; -+ uint32_t poll_time = 0; - int ret; - -- ret = rs_poll_check(fds, nfds); -- if (ret || !timeout) -- return ret; -+ do { -+ ret = rs_poll_check(fds, nfds); -+ if (ret || !timeout) -+ return ret; -+ -+ if (!poll_time) -+ gettimeofday(&s, NULL); -+ -+ gettimeofday(&e, NULL); -+ poll_time = (e.tv_sec - s.tv_sec) * 1000000 + -+ (e.tv_usec - s.tv_usec) + 1; -+ } while (poll_time <= polling_time); - - rfds = rs_fds_alloc(nfds); - if (!rfds)