Bottom: 1fa07c62817ac4b6cb8d9c5e327ea2cdc75dbd21
-Top: 719db85186f7ed857546278db44ac37a1e6e8dee
+Top: 2a1d1a24a767c8fd0bc9fda9532541008404ecc6
Author: Sean Hefty <sean.hefty@intel.com>
Date: 2012-11-09 10:26:38 -0800
{
errno = err;
diff --git a/src/rsocket.c b/src/rsocket.c
-index a060f66..0111836 100644
+index a060f66..7be42ca 100644
--- a/src/rsocket.c
+++ b/src/rsocket.c
@@ -47,6 +47,8 @@
rs->rmr = rdma_reg_write(rs->cm_id, rs->rbuf, rs->rbuf_size);
if (!rs->rmr)
-@@ -440,37 +681,59 @@ static int rs_init_bufs(struct rsocket *rs)
+@@ -440,37 +681,61 @@ static int rs_init_bufs(struct rsocket *rs)
return 0;
}
+ return 0;
+}
+
++/*
++ * If a user is waiting on a datagram rsocket through poll or select, then
++ * we need the first completion to generate an event on the related epoll fd
++ * in order to signal the user. We arm the CQ on creation for this purpose
++ */
+static int rs_create_cq(struct rsocket *rs, struct rdma_cm_id *cm_id)
{
- rs->cm_id->recv_cq_channel = ibv_create_comp_channel(rs->cm_id->verbs);
+ if (fcntl(cm_id->recv_cq_channel->fd, F_SETFL, O_NONBLOCK))
goto err2;
}
-+ //***
-+ //else {
-+ ibv_req_notify_cq(cm_id->recv_cq, 0);
-+ //}
- rs->cm_id->send_cq_channel = rs->cm_id->recv_cq_channel;
- rs->cm_id->send_cq = rs->cm_id->recv_cq;
++ ibv_req_notify_cq(cm_id->recv_cq, 0);
+ cm_id->send_cq_channel = cm_id->recv_cq_channel;
+ cm_id->send_cq = cm_id->recv_cq;
return 0;
{
struct ibv_recv_wr wr, *bad;
-@@ -482,6 +745,26 @@ rs_post_recv(struct rsocket *rs)
+@@ -482,6 +747,26 @@ rs_post_recv(struct rsocket *rs)
return rdma_seterrno(ibv_post_recv(rs->cm_id->qp, &wr, &bad));
}
static int rs_create_ep(struct rsocket *rs)
{
struct ibv_qp_init_attr qp_attr;
-@@ -492,7 +775,7 @@ static int rs_create_ep(struct rsocket *rs)
+@@ -492,7 +777,7 @@ static int rs_create_ep(struct rsocket *rs)
if (ret)
return ret;
if (ret)
return ret;
-@@ -549,8 +832,70 @@ static void rs_free_iomappings(struct rsocket *rs)
+@@ -549,8 +834,70 @@ static void rs_free_iomappings(struct rsocket *rs)
}
}
if (rs->index >= 0)
rs_remove(rs);
-@@ -582,7 +927,7 @@ static void rs_free(struct rsocket *rs)
+@@ -582,7 +929,7 @@ static void rs_free(struct rsocket *rs)
rdma_destroy_id(rs->cm_id);
}
fastlock_destroy(&rs->cq_wait_lock);
fastlock_destroy(&rs->cq_lock);
fastlock_destroy(&rs->rlock);
-@@ -636,29 +981,88 @@ static void rs_save_conn_data(struct rsocket *rs, struct rs_conn_data *conn)
+@@ -636,29 +983,88 @@ static void rs_save_conn_data(struct rsocket *rs, struct rs_conn_data *conn)
rs->sseq_comp = ntohs(conn->credits);
}
+ ret = rdma_create_id(NULL, &rs->cm_id, rs, RDMA_PS_TCP);
+ if (ret)
+ goto err;
-+
+
+- ret = rs_insert(rs);
+ rs->cm_id->route.addr.src_addr.sa_family = domain;
+ index = rs->cm_id->channel->fd;
+ } else {
+ ret = ds_init(rs, domain);
+ if (ret)
+ goto err;
-
-- ret = rs_insert(rs);
++
+ index = rs->udp_sock;
+ }
+
return rs->index;
err:
-@@ -672,9 +1076,18 @@ int rbind(int socket, const struct sockaddr *addr, socklen_t addrlen)
+@@ -672,9 +1078,18 @@ int rbind(int socket, const struct sockaddr *addr, socklen_t addrlen)
int ret;
rs = idm_at(&idm, socket);
return ret;
}
-@@ -710,7 +1123,7 @@ int raccept(int socket, struct sockaddr *addr, socklen_t *addrlen)
+@@ -710,7 +1125,7 @@ int raccept(int socket, struct sockaddr *addr, socklen_t *addrlen)
int ret;
rs = idm_at(&idm, socket);
if (!new_rs)
return ERR(ENOMEM);
-@@ -718,7 +1131,7 @@ int raccept(int socket, struct sockaddr *addr, socklen_t *addrlen)
+@@ -718,7 +1133,7 @@ int raccept(int socket, struct sockaddr *addr, socklen_t *addrlen)
if (ret)
goto err;
if (ret < 0)
goto err;
-@@ -729,7 +1142,7 @@ int raccept(int socket, struct sockaddr *addr, socklen_t *addrlen)
+@@ -729,7 +1144,7 @@ int raccept(int socket, struct sockaddr *addr, socklen_t *addrlen)
}
if (rs->fd_flags & O_NONBLOCK)
ret = rs_create_ep(new_rs);
if (ret)
-@@ -831,7 +1244,7 @@ connected:
+@@ -831,7 +1246,7 @@ connected:
break;
case rs_accepting:
if (!(rs->fd_flags & O_NONBLOCK))
ret = ucma_complete(rs->cm_id);
if (ret)
-@@ -855,13 +1268,240 @@ connected:
+@@ -855,13 +1270,240 @@ connected:
return ret;
}
}
static int rs_post_write_msg(struct rsocket *rs,
-@@ -903,6 +1543,24 @@ static int rs_post_write(struct rsocket *rs,
+@@ -903,6 +1545,24 @@ static int rs_post_write(struct rsocket *rs,
return rdma_seterrno(ibv_post_send(rs->cm_id->qp, &wr, &bad));
}
/*
* Update target SGE before sending data. Otherwise the remote side may
* update the entry before we do.
-@@ -1046,7 +1704,7 @@ static int rs_poll_cq(struct rsocket *rs)
+@@ -1046,7 +1706,7 @@ static int rs_poll_cq(struct rsocket *rs)
rs->state = rs_disconnected;
return 0;
} else if (rs_msg_data(imm_data) == RS_CTRL_SHUTDOWN) {
}
break;
case RS_OP_WRITE:
-@@ -1133,46 +1791,208 @@ static int rs_get_cq_event(struct rsocket *rs)
+@@ -1133,46 +1793,208 @@ static int rs_get_cq_event(struct rsocket *rs)
*/
static int rs_process_cq(struct rsocket *rs, int nonblock, int (*test)(struct rsocket *rs))
{
if (!ret || nonblock || errno != EWOULDBLOCK)
return ret;
-@@ -1184,7 +2004,7 @@ static int rs_get_comp(struct rsocket *rs, int nonblock, int (*test)(struct rsoc
+@@ -1184,7 +2006,7 @@ static int rs_get_comp(struct rsocket *rs, int nonblock, int (*test)(struct rsoc
(e.tv_usec - s.tv_usec) + 1;
} while (poll_time <= polling_time);
return ret;
}
-@@ -1219,9 +2039,19 @@ static int rs_can_send(struct rsocket *rs)
+@@ -1219,9 +2041,19 @@ static int rs_can_send(struct rsocket *rs)
(rs->target_sgl[rs->target_sge].length != 0);
}
}
static int rs_conn_can_send_ctrl(struct rsocket *rs)
-@@ -1236,7 +2066,7 @@ static int rs_have_rdata(struct rsocket *rs)
+@@ -1236,7 +2068,7 @@ static int rs_have_rdata(struct rsocket *rs)
static int rs_conn_have_rdata(struct rsocket *rs)
{
}
static int rs_conn_all_sends_done(struct rsocket *rs)
-@@ -1245,6 +2075,67 @@ static int rs_conn_all_sends_done(struct rsocket *rs)
+@@ -1245,6 +2077,67 @@ static int rs_conn_all_sends_done(struct rsocket *rs)
!(rs->state & rs_connected);
}
static ssize_t rs_peek(struct rsocket *rs, void *buf, size_t len)
{
size_t left = len;
-@@ -1290,6 +2181,13 @@ ssize_t rrecv(int socket, void *buf, size_t len, int flags)
+@@ -1290,6 +2183,13 @@ ssize_t rrecv(int socket, void *buf, size_t len, int flags)
int ret;
rs = idm_at(&idm, socket);
if (rs->state & rs_opening) {
ret = rs_do_connect(rs);
if (ret) {
-@@ -1339,7 +2237,7 @@ ssize_t rrecv(int socket, void *buf, size_t len, int flags)
+@@ -1339,7 +2239,7 @@ ssize_t rrecv(int socket, void *buf, size_t len, int flags)
rs->rbuf_bytes_avail += rsize;
}
fastlock_release(&rs->rlock);
return ret ? ret : len - left;
-@@ -1348,8 +2246,17 @@ ssize_t rrecv(int socket, void *buf, size_t len, int flags)
+@@ -1348,8 +2248,17 @@ ssize_t rrecv(int socket, void *buf, size_t len, int flags)
ssize_t rrecvfrom(int socket, void *buf, size_t len, int flags,
struct sockaddr *src_addr, socklen_t *addrlen)
{
ret = rrecv(socket, buf, len, flags);
if (ret > 0 && src_addr)
rgetpeername(socket, src_addr, addrlen);
-@@ -1391,14 +2298,14 @@ static int rs_send_iomaps(struct rsocket *rs, int flags)
+@@ -1391,14 +2300,14 @@ static int rs_send_iomaps(struct rsocket *rs, int flags)
struct rs_iomap iom;
int ret;
ret = ERR(ECONNRESET);
break;
}
-@@ -1447,10 +2354,92 @@ static int rs_send_iomaps(struct rsocket *rs, int flags)
+@@ -1447,10 +2356,92 @@ static int rs_send_iomaps(struct rsocket *rs, int flags)
}
rs->iomap_pending = !dlist_empty(&rs->iomap_queue);
/*
* We overlap sending the data, by posting a small work request immediately,
* then increasing the size of the send on each iteration.
-@@ -1464,6 +2453,13 @@ ssize_t rsend(int socket, const void *buf, size_t len, int flags)
+@@ -1464,6 +2455,13 @@ ssize_t rsend(int socket, const void *buf, size_t len, int flags)
int ret = 0;
rs = idm_at(&idm, socket);
if (rs->state & rs_opening) {
ret = rs_do_connect(rs);
if (ret) {
-@@ -1485,7 +2481,7 @@ ssize_t rsend(int socket, const void *buf, size_t len, int flags)
+@@ -1485,7 +2483,7 @@ ssize_t rsend(int socket, const void *buf, size_t len, int flags)
rs_conn_can_send);
if (ret)
break;
ret = ERR(ECONNRESET);
break;
}
-@@ -1538,10 +2534,34 @@ out:
+@@ -1538,10 +2536,34 @@ out:
ssize_t rsendto(int socket, const void *buf, size_t len, int flags,
const struct sockaddr *dest_addr, socklen_t addrlen)
{
}
static void rs_copy_iov(void *dst, const struct iovec **iov, size_t *offset, size_t len)
-@@ -1600,7 +2620,7 @@ static ssize_t rsendv(int socket, const struct iovec *iov, int iovcnt, int flags
+@@ -1600,7 +2622,7 @@ static ssize_t rsendv(int socket, const struct iovec *iov, int iovcnt, int flags
rs_conn_can_send);
if (ret)
break;
ret = ERR(ECONNRESET);
break;
}
-@@ -1653,7 +2673,7 @@ ssize_t rsendmsg(int socket, const struct msghdr *msg, int flags)
+@@ -1653,7 +2675,7 @@ ssize_t rsendmsg(int socket, const struct msghdr *msg, int flags)
if (msg->msg_control && msg->msg_controllen)
return ERR(ENOTSUP);
}
ssize_t rwrite(int socket, const void *buf, size_t count)
-@@ -1690,8 +2710,8 @@ static int rs_poll_rs(struct rsocket *rs, int events,
+@@ -1690,8 +2712,8 @@ static int rs_poll_rs(struct rsocket *rs, int events,
int ret;
check_cq:
rs_process_cq(rs, nonblock, test);
revents = 0;
-@@ -1707,6 +2727,16 @@ check_cq:
+@@ -1707,6 +2729,16 @@ check_cq:
}
return revents;
}
if (rs->state == rs_listening) {
-@@ -1766,18 +2796,20 @@ static int rs_poll_arm(struct pollfd *rfds, struct pollfd *fds, nfds_t nfds)
+@@ -1766,18 +2798,20 @@ static int rs_poll_arm(struct pollfd *rfds, struct pollfd *fds, nfds_t nfds)
if (fds[i].revents)
return 1;
}
return 0;
}
-@@ -1793,7 +2825,10 @@ static int rs_poll_events(struct pollfd *rfds, struct pollfd *fds, nfds_t nfds)
+@@ -1793,7 +2827,10 @@ static int rs_poll_events(struct pollfd *rfds, struct pollfd *fds, nfds_t nfds)
rs = idm_lookup(&idm, fds[i].fd);
if (rs) {
fds[i].revents = rs_poll_rs(rs, fds[i].events, 1, rs_poll_all);
} else {
fds[i].revents = rfds[i].revents;
-@@ -1949,7 +2984,7 @@ int rshutdown(int socket, int how)
+@@ -1949,7 +2986,7 @@ int rshutdown(int socket, int how)
rs = idm_at(&idm, socket);
if (how == SHUT_RD) {
return 0;
}
-@@ -1959,10 +2994,10 @@ int rshutdown(int socket, int how)
+@@ -1959,10 +2996,10 @@ int rshutdown(int socket, int how)
if (rs->state & rs_connected) {
if (how == SHUT_RDWR) {
ctrl = RS_CTRL_DISCONNECT;
RS_CTRL_SHUTDOWN : RS_CTRL_DISCONNECT;
}
if (!rs->ctrl_avail) {
-@@ -1987,13 +3022,32 @@ int rshutdown(int socket, int how)
+@@ -1987,13 +3024,32 @@ int rshutdown(int socket, int how)
return 0;
}
rs_free(rs);
return 0;
-@@ -2018,8 +3072,12 @@ int rgetpeername(int socket, struct sockaddr *addr, socklen_t *addrlen)
+@@ -2018,8 +3074,12 @@ int rgetpeername(int socket, struct sockaddr *addr, socklen_t *addrlen)
struct rsocket *rs;
rs = idm_at(&idm, socket);
}
int rgetsockname(int socket, struct sockaddr *addr, socklen_t *addrlen)
-@@ -2027,8 +3085,12 @@ int rgetsockname(int socket, struct sockaddr *addr, socklen_t *addrlen)
+@@ -2027,8 +3087,12 @@ int rgetsockname(int socket, struct sockaddr *addr, socklen_t *addrlen)
struct rsocket *rs;
rs = idm_at(&idm, socket);
}
int rsetsockopt(int socket, int level, int optname,
-@@ -2040,22 +3102,31 @@ int rsetsockopt(int socket, int level, int optname,
+@@ -2040,22 +3104,31 @@ int rsetsockopt(int socket, int level, int optname,
ret = ERR(ENOTSUP);
rs = idm_at(&idm, socket);
rs->rbuf_size = (*(uint32_t *) optval) << 1;
ret = 0;
break;
-@@ -2101,9 +3172,11 @@ int rsetsockopt(int socket, int level, int optname,
+@@ -2101,9 +3174,11 @@ int rsetsockopt(int socket, int level, int optname,
opts = &rs->ipv6_opts;
switch (optname) {
case IPV6_V6ONLY:
opt_on = *(int *) optval;
break;
default:
-@@ -2315,7 +3388,7 @@ off_t riomap(int socket, void *buf, size_t len, int prot, int flags, off_t offse
+@@ -2315,7 +3390,7 @@ off_t riomap(int socket, void *buf, size_t len, int prot, int flags, off_t offse
if (!rs->cm_id->pd || (prot & ~(PROT_WRITE | PROT_NONE)))
return ERR(EINVAL);
if (prot & PROT_WRITE) {
iomr = rs_get_iomap_mr(rs);
access |= IBV_ACCESS_REMOTE_WRITE;
-@@ -2349,7 +3422,7 @@ off_t riomap(int socket, void *buf, size_t len, int prot, int flags, off_t offse
+@@ -2349,7 +3424,7 @@ off_t riomap(int socket, void *buf, size_t len, int prot, int flags, off_t offse
dlist_insert_tail(&iomr->entry, &rs->iomap_list);
}
out:
return offset;
}
-@@ -2361,7 +3434,7 @@ int riounmap(int socket, void *buf, size_t len)
+@@ -2361,7 +3436,7 @@ int riounmap(int socket, void *buf, size_t len)
int ret = 0;
rs = idm_at(&idm, socket);
for (entry = rs->iomap_list.next; entry != &rs->iomap_list;
entry = entry->next) {
-@@ -2382,7 +3455,7 @@ int riounmap(int socket, void *buf, size_t len)
+@@ -2382,7 +3457,7 @@ int riounmap(int socket, void *buf, size_t len)
}
ret = ERR(EINVAL);
out:
return ret;
}
-@@ -2426,7 +3499,7 @@ size_t riowrite(int socket, const void *buf, size_t count, off_t offset, int fla
+@@ -2426,7 +3501,7 @@ size_t riowrite(int socket, const void *buf, size_t count, off_t offset, int fla
rs_conn_can_send);
if (ret)
break;
ret = ERR(ECONNRESET);
break;
}
-@@ -2476,3 +3549,272 @@ out:
+@@ -2476,3 +3551,272 @@ out:
return (ret && left == count) ? ret : count - left;
}