]> git.openfabrics.org - ~shefty/libmlx4.git/commitdiff
Use BlueFlame for inline sends
authorRoland Dreier <rolandd@cisco.com>
Mon, 23 Apr 2007 22:07:49 +0000 (15:07 -0700)
committerRoland Dreier <rolandd@cisco.com>
Mon, 23 Apr 2007 22:13:59 +0000 (15:13 -0700)
If BlueFlame is available, map the BlueFlame page when creating a
context and use BlueFlame for inline sends.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
src/mlx4-abi.h
src/mlx4.c
src/mlx4.h
src/qp.c
src/wqe.h

index 2a392cb0cd88d24e66b8f5905d502a772dd62750..781091362cc79165cd1ff9430efbea7be553424b 100644 (file)
@@ -40,7 +40,8 @@
 struct mlx4_alloc_ucontext_resp {
        struct ibv_get_context_resp     ibv_resp;
        __u32                           qp_tab_size;
-       __u32                           bf_reg_size;
+       __u16                           bf_reg_size;
+       __u16                           bf_regs_per_page;
 };
 
 struct mlx4_alloc_pd_resp {
index 1d8de8707769e407d8bbace30e21e670869ea51f..23577c19151d77d1f6a45f71edf2e3b12a2b9770 100644 (file)
@@ -149,6 +149,25 @@ static struct ibv_context *mlx4_alloc_context(struct ibv_device *ibdev, int cmd_
        if (context->uar == MAP_FAILED)
                goto err_free;
 
+       if (resp.bf_reg_size) {
+               context->bf_page = mmap(NULL, to_mdev(ibdev)->page_size,
+                                       PROT_WRITE, MAP_SHARED, cmd_fd,
+                                       to_mdev(ibdev)->page_size);
+               if (context->bf_page == MAP_FAILED) {
+                       fprintf(stderr, PFX "Warning: BlueFlame available, "
+                               "but failed to mmap() BlueFlame page.\n");
+                               context->bf_page     = NULL;
+                               context->bf_buf_size = 0;
+               } else {
+                       context->bf_buf_size = resp.bf_reg_size / 2;
+                       context->bf_offset   = 0;
+                       pthread_spin_init(&context->bf_lock, PTHREAD_PROCESS_PRIVATE);
+               }
+       } else {
+               context->bf_page     = NULL;
+               context->bf_buf_size = 0;
+       }
+
        pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
 
        context->ibv_ctx.ops = mlx4_ctx_ops;
@@ -165,6 +184,8 @@ static void mlx4_free_context(struct ibv_context *ibctx)
        struct mlx4_context *context = to_mctx(ibctx);
 
        munmap(context->uar, to_mdev(ibctx->device)->page_size);
+       if (context->bf_page)
+               munmap(context->bf_page, to_mdev(ibctx->device)->page_size);
        free(context);
 }
 
index 8b4dc20adbb5916f778d53a02cac1350589305e9..c4d389f506cfc63b95fbb5863604caabc7cf1c83 100644 (file)
@@ -129,6 +129,11 @@ struct mlx4_context {
        void                           *uar;
        pthread_spinlock_t              uar_lock;
 
+       void                           *bf_page;
+       int                             bf_buf_size;
+       int                             bf_offset;
+       pthread_spinlock_t              bf_lock;
+
        struct {
                struct mlx4_qp        **table;
                int                     refcnt;
index b9b73053ad0e4027b3894633401d8883ee738e2b..76abf75f4049c83d0d1dcf384b64b581d857ac2c 100644 (file)
--- a/src/qp.c
+++ b/src/qp.c
@@ -91,11 +91,13 @@ static int wq_overflow(struct mlx4_wq *wq, int nreq, struct mlx4_cq *cq)
 int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
                          struct ibv_send_wr **bad_wr)
 {
+       struct mlx4_context *ctx;
        struct mlx4_qp *qp = to_mqp(ibqp);
        void *wqe;
        struct mlx4_wqe_ctrl_seg *ctrl;
        int ind;
        int nreq;
+       int inl = 0;
        int ret = 0;
        int size;
        int i;
@@ -214,15 +216,14 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
                if (wr->send_flags & IBV_SEND_INLINE) {
                        if (wr->num_sge) {
                                struct mlx4_wqe_inline_seg *seg = wqe;
-                               int s = 0;
 
                                wqe += sizeof *seg;
                                for (i = 0; i < wr->num_sge; ++i) {
                                        uint32_t len = wr->sg_list[i].length;
 
-                                       s += len;
+                                       inl += len;
 
-                                       if (s > qp->max_inline_data) {
+                                       if (inl > qp->max_inline_data) {
                                                ret = -1;
                                                *bad_wr = wr;
                                                goto out;
@@ -234,8 +235,8 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
                                        wqe += len;
                                }
 
-                               seg->byte_count = htonl(MLX4_INLINE_SEG | s);
-                               size += (s + sizeof *seg + 15) / 16;
+                               seg->byte_count = htonl(MLX4_INLINE_SEG | inl);
+                               size += (inl + sizeof *seg + 15) / 16;
                        }
                } else {
                        struct mlx4_wqe_data_seg *seg = wqe;
@@ -266,7 +267,25 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
        }
 
 out:
-       if (nreq) {
+       ctx = to_mctx(ibqp->context);
+
+       if (nreq == 1 && inl && size > 1 && size < ctx->bf_buf_size / 16) {
+               ctrl->owner_opcode |= htonl((qp->sq.head & 0xffff) << 8);
+               *(uint32_t *) ctrl->reserved |= qp->doorbell_qpn;
+               /*
+                * Make sure that descriptor is written to memory
+                * before writing to BlueFlame page.
+                */
+               wmb();
+
+               ++qp->sq.head;
+
+               pthread_spin_lock(&ctx->bf_lock);
+               memcpy(ctx->bf_page + ctx->bf_offset, ctrl, align(size * 16, 64));
+               /* FIXME flush wc buffers */
+               ctx->bf_offset ^= ctx->bf_buf_size;
+               pthread_spin_unlock(&ctx->bf_lock);
+       } else if (nreq) {
                qp->sq.head += nreq;
 
                /*
@@ -275,8 +294,7 @@ out:
                 */
                wmb();
 
-               *(uint32_t *) (to_mctx(ibqp->context)->uar + MLX4_SEND_DOORBELL) =
-                       qp->doorbell_qpn;
+               *(uint32_t *) (ctx->uar + MLX4_SEND_DOORBELL) = qp->doorbell_qpn;
        }
 
        pthread_spin_unlock(&qp->sq.lock);
index 5411fe2632c723844ff03cde265fcfa40c88a0fa..877ebfd912a349fcf62e87a1ee6d819ca58caebb 100644 (file)
--- a/src/wqe.h
+++ b/src/wqe.h
@@ -53,7 +53,7 @@ enum {
 
 struct mlx4_wqe_ctrl_seg {
        uint32_t                owner_opcode;
-       uint8_t                 reserved2[3];
+       uint8_t                 reserved[3];
        uint8_t                 fence_size;
        /*
         * High 24 bits are SRC remote buffer; low 8 bits are flags: