From: Sean Hefty Date: Fri, 8 Jun 2012 21:44:45 +0000 (-0700) Subject: uncommit X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=1d9aaa23667fef2eae56d0cf916fd49803332d01;p=~shefty%2Flibrdmacm.git uncommit --- diff --git a/meta b/meta index 6bdaf7eb..cf4c72fe 100644 --- a/meta +++ b/meta @@ -1,7 +1,11 @@ Version: 1 -Previous: 5d0edc0a75a660e501ae157be8ecee9b06c751ac +Previous: f455e5bfb3dd7286eaeb3f53db8694822c75f225 Head: d1067d98830663fc9a95f6c9a502ac62d8e995a0 Applied: + rs1: 110fcad2ecce39d6888448c19b08791ea193e7f9 + rs2: b99bd0638e7d22b1b0976a34a58efd352db420b3 + rs3: 9fb978ede0b7b548b7954f01097af337b938020c + rs4: d1067d98830663fc9a95f6c9a502ac62d8e995a0 Unapplied: ip6-opt: 7e43a759255e9890d1e41f1edf71792836f53941 rs-1sge: 85e4c4a0da0b501b60a1035d7a003ee20a749511 diff --git a/patches/rs1 b/patches/rs1 new file mode 100644 index 00000000..4ecc4001 --- /dev/null +++ b/patches/rs1 @@ -0,0 +1,38 @@ +Bottom: d79d5464622c07a1ee7c42051fee5fca02c52207 +Top: 5e0be3cec4b1092b15ffff22ce847641d572b180 +Author: Sean Hefty +Date: 2012-06-04 13:14:42 -0700 + +rsocket: Handle SHUT_RD/WR shutdown flags + +Sridhar Samudrala reported an error (EOPNOTSUPP) +after calling select(). + +The issue is that rshutdown(SHUT_WR) was called before select(). +As part of shutdown, rsockets switches the underlying fd from +nonblocking to blocking to ensure that previously sent data has +completed. shutdown(SHUT_WR) indicates that the socket should be +kept open for receiving data. + +Delay handling the actual shutdown unless SHUT_RDWR is specified, +or the socket is closed. + +Signed-off-by: Sean Hefty + + +--- + +diff --git a/src/rsocket.c b/src/rsocket.c +index c111797..8f20b4a 100644 +--- a/src/rsocket.c ++++ b/src/rsocket.c +@@ -1592,6 +1592,9 @@ int rshutdown(int socket, int how) + struct rsocket *rs; + int ret = 0; + ++ if (how != SHUT_RDWR) ++ return 0; ++ + rs = idm_at(&idm, socket); + if (rs->fd_flags & O_NONBLOCK) + rs_set_nonblocking(rs, 0); diff --git a/patches/rs2 b/patches/rs2 new file mode 100644 index 00000000..be2a33b5 --- /dev/null +++ b/patches/rs2 @@ -0,0 +1,42 @@ +Bottom: 5e0be3cec4b1092b15ffff22ce847641d572b180 +Top: e8942df497cda9d853e5bbaa69226ad2ede0b3b9 +Author: Sean Hefty +Date: 2012-06-04 13:22:10 -0700 + +rsocket: Handle TCP_MAXSEG socket option + +netperf uses the TCP_MAXSEG socket option. Add support for it. +Problem reported by Sridhar Samudrala + +Signed-off-by: Sean Hefty + + +--- + +diff --git a/src/rsocket.c b/src/rsocket.c +index 8f20b4a..2fd106a 100644 +--- a/src/rsocket.c ++++ b/src/rsocket.c +@@ -1716,6 +1716,9 @@ int rsetsockopt(int socket, int level, int optname, + opt_on = *(int *) optval; + ret = 0; + break; ++ case TCP_MAXSEG: ++ ret = 0; ++ break; + default: + break; + } +@@ -1802,6 +1805,12 @@ int rgetsockopt(int socket, int level, int optname, + *((int *) optval) = !!(rs->tcp_opts & (1 << optname)); + *optlen = sizeof(int); + break; ++ case TCP_MAXSEG: ++ *((int *) optval) = (rs->cm_id && rs->cm_id->route.num_paths) ? ++ 1 << (7 + rs->cm_id->route.path_rec->mtu) : ++ 2048; ++ *optlen = sizeof(int); ++ break; + default: + ret = ENOTSUP; + break; diff --git a/patches/rs3 b/patches/rs3 new file mode 100644 index 00000000..7b4a4029 --- /dev/null +++ b/patches/rs3 @@ -0,0 +1,220 @@ +Bottom: e8942df497cda9d853e5bbaa69226ad2ede0b3b9 +Top: 9a355c4fd6adc489cc0c3c07b3d436d25ff2cb0c +Author: Sean Hefty +Date: 2012-06-04 14:51:41 -0700 + +rsocket: Spin before blocking on an rsocket + +The latency cost of blocking is significant compared to round +trip ping-pong time. Spin briefly on rsockets before calling +into the kernel and blocking. + +The time to spin before blocking is read from an rsocket +configuration file %sysconfig%/rdma/rsocket/polling_time. This +is user adjustable. + +As a completely unintentional side effect, this just happens to +improve application performance in benchmarks, like netpipe, +significantly. ;) + +Signed-off-by: Sean Hefty + + +--- + +diff --git a/Makefile.am b/Makefile.am +index cbd874d..1dc61e1 100644 +--- a/Makefile.am ++++ b/Makefile.am +@@ -3,7 +3,7 @@ INCLUDES = -I$(srcdir)/include + lib_LTLIBRARIES = src/librdmacm.la + + ACLOCAL_AMFLAGS = -I config +-AM_CFLAGS = -g -Wall -D_GNU_SOURCE ++AM_CFLAGS = -g -Wall -D_GNU_SOURCE -DSYSCONFDIR=\"$(sysconfdir)\" -DRDMADIR=\"@rdmadir@\" + + src_librdmacm_la_CFLAGS = $(AM_CFLAGS) + +diff --git a/configure.in b/configure.in +index dec6064..3ee7f9b 100644 +--- a/configure.in ++++ b/configure.in +@@ -87,5 +87,10 @@ AC_CACHE_CHECK(whether ld accepts --version-script, ac_cv_version_script, + + AM_CONDITIONAL(HAVE_LD_VERSION_SCRIPT, test "$ac_cv_version_script" = "yes") + ++AC_ARG_VAR(rdmadir, [Directory for configuration files]) ++if test "x$rdmadir" = "x"; then ++ AC_SUBST(rdmadir, rdma) ++fi ++ + AC_CONFIG_FILES([Makefile librdmacm.spec]) + AC_OUTPUT +diff --git a/src/cma.h b/src/cma.h +index 2ee4767..cedc0c3 100644 +--- a/src/cma.h ++++ b/src/cma.h +@@ -166,4 +166,13 @@ struct ibv_path_data + }; + #endif + ++#ifndef SYSCONFDIR ++#define SYSCONFDIR "/etc" ++#endif ++#ifndef RDMADIR ++#define RDMADIR "rdma" ++#endif ++#define RDMA_CONF_DIR SYSCONFDIR "/" RDMADIR ++#define RS_CONF_DIR RDMA_CONF_DIR "/rsocket" ++ + #endif /* CMA_H */ +diff --git a/src/rsocket.c b/src/rsocket.c +index 2fd106a..d7344c7 100644 +--- a/src/rsocket.c ++++ b/src/rsocket.c +@@ -37,10 +37,12 @@ + + #include + #include ++#include + #include + #include + #include + #include ++#include + #include + #include + #include +@@ -64,6 +66,8 @@ + static struct index_map idm; + static pthread_mutex_t mut = PTHREAD_MUTEX_INITIALIZER; + ++static uint32_t polling_time = 10; ++ + /* + * Immediate data format is determined by the upper bits + * bit 31: message type, 0 - data, 1 - control +@@ -196,6 +200,27 @@ struct rsocket { + uint8_t *sbuf; + }; + ++void rs_configure(void) ++{ ++ FILE *f; ++ static int init; ++ ++ if (init) ++ return; ++ ++ pthread_mutex_lock(&mut); ++ if (init) ++ goto out; ++ ++ if ((f = fopen(RS_CONF_DIR "/polling_time", "r"))) { ++ fscanf(f, "%u", &polling_time); ++ fclose(f); ++ } ++ init = 1; ++out: ++ pthread_mutex_unlock(&mut); ++} ++ + /* + * We currently generate a completion per send. sqe_count = 1 + */ +@@ -470,6 +495,7 @@ int rsocket(int domain, int type, int protocol) + (type != SOCK_STREAM) || (protocol && protocol != IPPROTO_TCP)) + return ERR(ENOTSUP); + ++ rs_configure(); + rs = rs_alloc(NULL); + if (!rs) + return ERR(ENOMEM); +@@ -923,6 +949,29 @@ static int rs_process_cq(struct rsocket *rs, int nonblock, int (*test)(struct rs + return ret; + } + ++static int rs_get_comp(struct rsocket *rs, int nonblock, int (*test)(struct rsocket *rs)) ++{ ++ struct timeval s, e; ++ uint32_t poll_time = 0; ++ int ret; ++ ++ do { ++ ret = rs_process_cq(rs, 1, test); ++ if (!ret || nonblock || errno != EWOULDBLOCK) ++ return ret; ++ ++ if (!poll_time) ++ gettimeofday(&s, NULL); ++ ++ gettimeofday(&e, NULL); ++ poll_time = (e.tv_sec - s.tv_sec) * 1000000 + ++ (e.tv_usec - s.tv_usec) + 1; ++ } while (poll_time <= polling_time); ++ ++ ret = rs_process_cq(rs, 0, test); ++ return ret; ++} ++ + static int rs_nonblocking(struct rsocket *rs, int flags) + { + return (rs->fd_flags & O_NONBLOCK) || (flags & MSG_DONTWAIT); +@@ -1034,7 +1083,7 @@ ssize_t rrecv(int socket, void *buf, size_t len, int flags) + } + fastlock_acquire(&rs->rlock); + if (!rs_have_rdata(rs)) { +- ret = rs_process_cq(rs, rs_nonblocking(rs, flags), rs_conn_have_rdata); ++ ret = rs_get_comp(rs, rs_nonblocking(rs, flags), rs_conn_have_rdata); + if (ret && errno != ECONNRESET) + goto out; + } +@@ -1138,8 +1187,8 @@ ssize_t rsend(int socket, const void *buf, size_t len, int flags) + fastlock_acquire(&rs->slock); + for (left = len; left; left -= xfer_size, buf += xfer_size) { + if (!rs_can_send(rs)) { +- ret = rs_process_cq(rs, rs_nonblocking(rs, flags), +- rs_conn_can_send); ++ ret = rs_get_comp(rs, rs_nonblocking(rs, flags), ++ rs_conn_can_send); + if (ret) + break; + if (rs->state != rs_connected) { +@@ -1252,8 +1301,8 @@ static ssize_t rsendv(int socket, const struct iovec *iov, int iovcnt, int flags + fastlock_acquire(&rs->slock); + for (left = len; left; left -= xfer_size) { + if (!rs_can_send(rs)) { +- ret = rs_process_cq(rs, rs_nonblocking(rs, flags), +- rs_conn_can_send); ++ ret = rs_get_comp(rs, rs_nonblocking(rs, flags), ++ rs_conn_can_send); + if (ret) + break; + if (rs->state != rs_connected) { +@@ -1468,12 +1517,23 @@ static int rs_poll_events(struct pollfd *rfds, struct pollfd *fds, nfds_t nfds) + */ + int rpoll(struct pollfd *fds, nfds_t nfds, int timeout) + { ++ struct timeval s, e; + struct pollfd *rfds; ++ uint32_t poll_time = 0; + int ret; + +- ret = rs_poll_check(fds, nfds); +- if (ret || !timeout) +- return ret; ++ do { ++ ret = rs_poll_check(fds, nfds); ++ if (ret || !timeout) ++ return ret; ++ ++ if (!poll_time) ++ gettimeofday(&s, NULL); ++ ++ gettimeofday(&e, NULL); ++ poll_time = (e.tv_sec - s.tv_sec) * 1000000 + ++ (e.tv_usec - s.tv_usec) + 1; ++ } while (poll_time <= polling_time); + + rfds = rs_fds_alloc(nfds); + if (!rfds) diff --git a/patches/rs4 b/patches/rs4 new file mode 100644 index 00000000..6300c800 --- /dev/null +++ b/patches/rs4 @@ -0,0 +1,109 @@ +Bottom: 9a355c4fd6adc489cc0c3c07b3d436d25ff2cb0c +Top: 12cda031f6e4027c6df650d4c84ed637ba93cdf8 +Author: Sean Hefty +Date: 2012-06-05 15:28:18 -0700 + +rsocket: Use configuration files to specify default settings + +Give an administrator control over the default settings +used by rsockets. Use files under %sysconfig%/rdma/rsocket as shown: + +mem_default - default size of receive buffer(s) +wmem_default - default size of send buffer(s) +sqsize_default - default size of send queue +rqsize_default - default size of receive queue +inline_default - default size of inline data + +If configuration files are not available, rsockets will continue to +use internal defaults. + +Signed-off-by: Sean Hefty + + +--- + +diff --git a/src/rsocket.c b/src/rsocket.c +index d7344c7..5e0e413 100644 +--- a/src/rsocket.c ++++ b/src/rsocket.c +@@ -53,19 +53,20 @@ + #include "cma.h" + #include "indexer.h" + +-#define RS_INLINE 64 + #define RS_OLAP_START_SIZE 2048 + #define RS_MAX_TRANSFER 65536 +-#define RS_QP_SIZE 384 + #define RS_QP_MAX_SIZE 0xFFFE +-#define RS_QP_MIN_SIZE 8 + #define RS_QP_CTRL_SIZE 4 + #define RS_CONN_RETRIES 6 + #define RS_SGL_SIZE 2 +-#define RS_BUF_SIZE (1 << 17) + static struct index_map idm; + static pthread_mutex_t mut = PTHREAD_MUTEX_INITIALIZER; + ++static uint16_t def_inline = 64; ++static uint16_t def_sqsize = 384; ++static uint16_t def_rqsize = 384; ++static uint32_t def_mem = (1 << 17); ++static uint32_t def_wmem = (1 << 17); + static uint32_t polling_time = 10; + + /* +@@ -216,6 +217,40 @@ void rs_configure(void) + fscanf(f, "%u", &polling_time); + fclose(f); + } ++ ++ if ((f = fopen(RS_CONF_DIR "/inline_default", "r"))) { ++ fscanf(f, "%hu", &def_inline); ++ fclose(f); ++ ++ if (def_inline < RS_MIN_INLINE) ++ def_inline = RS_MIN_INLINE; ++ } ++ ++ if ((f = fopen(RS_CONF_DIR "/sqsize_default", "r"))) { ++ fscanf(f, "%hu", &def_sqsize); ++ fclose(f); ++ } ++ ++ if ((f = fopen(RS_CONF_DIR "/rqsize_default", "r"))) { ++ fscanf(f, "%hu", &def_rqsize); ++ fclose(f); ++ } ++ ++ if ((f = fopen(RS_CONF_DIR "/mem_default", "r"))) { ++ fscanf(f, "%u", &def_mem); ++ fclose(f); ++ ++ if (def_mem < 1) ++ def_mem = 1; ++ } ++ ++ if ((f = fopen(RS_CONF_DIR "/wmem_default", "r"))) { ++ fscanf(f, "%u", &def_wmem); ++ fclose(f); ++ ++ if (def_wmem < 1) ++ def_wmem = 1; ++ } + init = 1; + out: + pthread_mutex_unlock(&mut); +@@ -264,9 +299,11 @@ static struct rsocket *rs_alloc(struct rsocket *inherited_rs) + rs->rq_size = inherited_rs->rq_size; + rs->ctrl_avail = inherited_rs->ctrl_avail; + } else { +- rs->sbuf_size = rs->rbuf_size = RS_BUF_SIZE; +- rs->sq_inline = RS_INLINE; +- rs->sq_size = rs->rq_size = RS_QP_SIZE; ++ rs->sbuf_size = def_wmem; ++ rs->rbuf_size = def_mem; ++ rs->sq_inline = def_inline; ++ rs->sq_size = def_sqsize; ++ rs->rq_size = def_rqsize; + rs->ctrl_avail = RS_QP_CTRL_SIZE; + } + fastlock_init(&rs->slock);