]> git.openfabrics.org - ~shefty/libmlx4.git/commitdiff
Make sure inline segments in send WQEs don't cross 64 byte boundaries
authorRoland Dreier <rolandd@cisco.com>
Thu, 14 Jun 2007 20:23:33 +0000 (13:23 -0700)
committerRoland Dreier <rolandd@cisco.com>
Thu, 14 Jun 2007 21:10:41 +0000 (14:10 -0700)
Hardware requires that inline data segments do not cross a 64 byte
boundary.  Make sure that send work requests satisfy this by using
multiple inline data segments when needed.

Based on a patch from Jack Morgenstein <jackm@dev.mellanox.co.il>.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
src/qp.c
src/wqe.h

index 301f7cbe20bacbc3621a2233d28e0da31b565050..0d536233148323b96b3c694202b945eab29405f7 100644 (file)
--- a/src/qp.c
+++ b/src/qp.c
@@ -240,33 +240,59 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
                        break;
                }
 
-               if (wr->send_flags & IBV_SEND_INLINE) {
-                       if (wr->num_sge) {
-                               struct mlx4_wqe_inline_seg *seg = wqe;
-
-                               inl = 0;
-                               wqe += sizeof *seg;
-                               for (i = 0; i < wr->num_sge; ++i) {
-                                       uint32_t len = wr->sg_list[i].length;
-
-                                       inl += len;
-
-                                       if (inl > qp->max_inline_data) {
-                                               inl = 0;
-                                               ret = -1;
-                                               *bad_wr = wr;
-                                               goto out;
-                                       }
-
-                                       memcpy(wqe,
-                                              (void *) (intptr_t) wr->sg_list[i].addr,
-                                              len);
-                                       wqe += len;
+               if (wr->send_flags & IBV_SEND_INLINE && wr->num_sge) {
+                       struct mlx4_wqe_inline_seg *seg;
+                       void *addr;
+                       int len, seg_len;
+                       int num_seg;
+                       int off, to_copy;
+
+                       inl = 0;
+
+                       seg = wqe;
+                       wqe += sizeof *seg;
+                       off = ((uintptr_t) wqe) & (MLX4_INLINE_ALIGN - 1);
+                       num_seg = 0;
+                       seg_len = 0;
+
+                       for (i = 0; i < wr->num_sge; ++i) {
+                               addr = (void *) (uintptr_t) wr->sg_list[i].addr;
+                               len  = wr->sg_list[i].length;
+                               inl += len;
+
+                               if (inl > qp->max_inline_data) {
+                                       inl = 0;
+                                       ret = -1;
+                                       *bad_wr = wr;
+                                       goto out;
                                }
 
-                               seg->byte_count = htonl(MLX4_INLINE_SEG | inl);
-                               size += (inl + sizeof *seg + 15) / 16;
+                               while (len >= MLX4_INLINE_ALIGN - off) {
+                                       to_copy = MLX4_INLINE_ALIGN - off;
+                                       memcpy(wqe, addr, to_copy);
+                                       len -= to_copy;
+                                       wqe += to_copy;
+                                       addr += to_copy;
+                                       seg_len += to_copy;
+                                       seg->byte_count = htonl(MLX4_INLINE_SEG | seg_len);
+                                       seg_len = 0;
+                                       seg = wqe;
+                                       wqe += sizeof *seg;
+                                       off = sizeof *seg;
+                                       ++num_seg;
+                               }
+
+                               memcpy(wqe, addr, len);
+                               wqe += len;
+                               seg_len += len;
+                       }
+
+                       if (seg_len) {
+                               ++num_seg;
+                               seg->byte_count = htonl(MLX4_INLINE_SEG | seg_len);
                        }
+
+                       size += (inl + num_seg * sizeof * seg + 15) / 16;
                } else {
                        struct mlx4_wqe_data_seg *seg = wqe;
 
@@ -413,14 +439,41 @@ out:
        return ret;
 }
 
+static int num_inline_segs(int data, enum ibv_qp_type type)
+{
+       /*
+        * Inline data segments are not allowed to cross 64 byte
+        * boundaries.  For UD QPs, the data segments always start
+        * aligned to 64 bytes (16 byte control segment + 48 byte
+        * datagram segment); for other QPs, there will be a 16 byte
+        * control segment and possibly a 16 byte remote address
+        * segment, so in the worst case there will be only 32 bytes
+        * available for the first data segment.
+        */
+       if (type == IBV_QPT_UD)
+               data += (sizeof (struct mlx4_wqe_ctrl_seg) +
+                        sizeof (struct mlx4_wqe_datagram_seg)) %
+                       MLX4_INLINE_ALIGN;
+       else
+               data += (sizeof (struct mlx4_wqe_ctrl_seg) +
+                        sizeof (struct mlx4_wqe_raddr_seg)) %
+                       MLX4_INLINE_ALIGN;
+
+       return (data + MLX4_INLINE_ALIGN - sizeof (struct mlx4_wqe_inline_seg) - 1) /
+               (MLX4_INLINE_ALIGN - sizeof (struct mlx4_wqe_inline_seg));
+}
+
 void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
                           struct mlx4_qp *qp)
 {
        int size;
        int max_sq_sge;
 
-       max_sq_sge       = align(cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg),
-                                sizeof (struct mlx4_wqe_data_seg)) / sizeof (struct mlx4_wqe_data_seg);
+       max_sq_sge       = align(cap->max_inline_data +
+                                num_inline_segs(cap->max_inline_data, type) *
+                                sizeof (struct mlx4_wqe_inline_seg),
+                                sizeof (struct mlx4_wqe_data_seg)) /
+               sizeof (struct mlx4_wqe_data_seg);
        if (max_sq_sge < cap->max_send_sge)
                max_sq_sge = cap->max_send_sge;
 
@@ -530,10 +583,19 @@ void mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap,
 
        qp->sq.max_gs        = wqe_size / sizeof (struct mlx4_wqe_data_seg);
        cap->max_send_sge    = qp->sq.max_gs;
-       qp->max_inline_data  = wqe_size - sizeof (struct mlx4_wqe_inline_seg);
        cap->max_inline_data = qp->max_inline_data;
        qp->sq.max_post      = qp->sq.wqe_cnt - qp->sq_spare_wqes;
        cap->max_send_wr     = qp->sq.max_post;
+
+       /*
+        * Inline data segments can't cross a 64 byte boundary.  So
+        * subtract off one segment header for each 64-byte chunk,
+        * taking into account the fact that wqe_size will be 32 mod
+        * 64 for non-UD QPs.
+        */
+       qp->max_inline_data  = wqe_size -
+               sizeof (struct mlx4_wqe_inline_seg) *
+               (align(wqe_size, MLX4_INLINE_ALIGN) / MLX4_INLINE_ALIGN);
 }
 
 struct mlx4_qp *mlx4_find_qp(struct mlx4_context *ctx, uint32_t qpn)
index 877ebfd912a349fcf62e87a1ee6d819ca58caebb..6f7f3091457e9ace4b6cacd63f1d8fc406ef726c 100644 (file)
--- a/src/wqe.h
+++ b/src/wqe.h
@@ -44,11 +44,12 @@ enum {
 };
 
 enum {
-       MLX4_INLINE_SEG                 = 1 << 31
+       MLX4_INLINE_SEG         = 1 << 31,
+       MLX4_INLINE_ALIGN       = 64,
 };
 
 enum {
-       MLX4_INVALID_LKEY               = 0x100,
+       MLX4_INVALID_LKEY       = 0x100,
 };
 
 struct mlx4_wqe_ctrl_seg {