]> git.openfabrics.org - ~shefty/librdmacm.git/commitdiff
rsocket: Fix sbuf_bytes_avail counter 'overrun' with iwarp
authorSean Hefty <sean.hefty@intel.com>
Thu, 6 Mar 2014 21:42:31 +0000 (13:42 -0800)
committerSean Hefty <sean.hefty@intel.com>
Thu, 6 Mar 2014 21:42:31 +0000 (13:42 -0800)
Reported-by: Jonas Pfefferle1 <JPF@zurich.ibm.com>
"The problem is that on the client side sbuf_bytes_avail overflows
in rs_poll_cq.  And from what I debugged so far there are 2
completions for every send and this is because I use iWarp hardware
which does not support write with immediate so there is one completion
for the write and one for the send (both go into the default case
and add the length to sbuf_bytes_avail)."

To avoid the issue, we flag send message operations that are used
in place of immediate data.  Other send message operations are
not affected.  The completion code can then check whether the
completion is for a send message which was paired with an RDMA
write transaction and adjust the behavior accordingly.  Additionally,
such send messages only carry the opcode in their WR_ID, with the
data portion zeroed.  This avoids adding the length value twice.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
src/rsocket.c

index 5111a7c49e7b4d0e22b40da2ba77bd7ac944b0b9..30ea55ddcd874fd1d2a4047c28f3fa9e6b0a9a4c 100644 (file)
@@ -146,9 +146,11 @@ enum {
 #define RS_MSG_SIZE          sizeof(uint32_t)
 
 #define RS_WR_ID_FLAG_RECV (((uint64_t) 1) << 63)
+#define RS_WR_ID_FLAG_MSG_SEND (((uint64_t) 1) << 62) /* See RS_OPT_MSG_SEND */
 #define rs_send_wr_id(data) ((uint64_t) data)
 #define rs_recv_wr_id(data) (RS_WR_ID_FLAG_RECV | (uint64_t) data)
 #define rs_wr_is_recv(wr_id) (wr_id & RS_WR_ID_FLAG_RECV)
+#define rs_wr_is_msg_send(wr_id) (wr_id & RS_WR_ID_FLAG_MSG_SEND)
 #define rs_wr_data(wr_id) ((uint32_t) wr_id)
 
 enum {
@@ -1651,11 +1653,12 @@ static int rs_post_write_msg(struct rsocket *rs,
                         uint64_t addr, uint32_t rkey)
 {
        struct ibv_send_wr wr, *bad;
+       struct ibv_sge sge;
        int ret;
 
+       wr.next = NULL;
        if (!(rs->opts & RS_OPT_MSG_SEND)) {
                wr.wr_id = rs_send_wr_id(msg);
-               wr.next = NULL;
                wr.sg_list = sgl;
                wr.num_sge = nsge;
                wr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM;
@@ -1667,8 +1670,19 @@ static int rs_post_write_msg(struct rsocket *rs,
                return rdma_seterrno(ibv_post_send(rs->cm_id->qp, &wr, &bad));
        } else {
                ret = rs_post_write(rs, sgl, nsge, msg, flags, addr, rkey);
-               if (!ret)
-                       ret = rs_post_msg(rs, msg);
+               if (!ret) {
+                       wr.wr_id = rs_send_wr_id(rs_msg_set(rs_msg_op(msg), 0)) |
+                                  RS_WR_ID_FLAG_MSG_SEND;
+                       sge.addr = (uintptr_t) &msg;
+                       sge.lkey = 0;
+                       sge.length = sizeof msg;
+                       wr.sg_list = &sge;
+                       wr.num_sge = 1;
+                       wr.opcode = IBV_WR_SEND;
+                       wr.send_flags = IBV_SEND_INLINE;
+
+                       ret = rdma_seterrno(ibv_post_send(rs->cm_id->qp, &wr, &bad));
+               }
                return ret;
        }
 }
@@ -1881,7 +1895,8 @@ static int rs_poll_cq(struct rsocket *rs)
                                break;
                        case RS_OP_IOMAP_SGL:
                                rs->sqe_avail++;
-                               rs->sbuf_bytes_avail += sizeof(struct rs_iomap);
+                               if (!rs_wr_is_msg_send(wc.wr_id))
+                                       rs->sbuf_bytes_avail += sizeof(struct rs_iomap);
                                break;
                        default:
                                rs->sqe_avail++;