]> git.openfabrics.org - ~shefty/libmlx4.git/commitdiff
libmlx4: Add support for XRC QPs master xrc
authorSean Hefty <sean.hefty@intel.com>
Tue, 18 Sep 2012 18:24:19 +0000 (11:24 -0700)
committerSean Hefty <sean.hefty@intel.com>
Mon, 18 Mar 2013 18:36:00 +0000 (11:36 -0700)
Signed-off-by: Sean Hefty <sean.hefty@intel.com>
src/buf.c
src/cq.c
src/mlx4-abi.h
src/mlx4.c
src/mlx4.h
src/qp.c
src/srq.c
src/verbs.c

index a80bcb107a54683edbb1eda1ceed4486624683a4..50957bbc85da8c20ba7ab3ebb4a7a5e98c4a8f72 100644 (file)
--- a/src/buf.c
+++ b/src/buf.c
@@ -78,6 +78,8 @@ int mlx4_alloc_buf(struct mlx4_buf *buf, size_t size, int page_size)
 
 void mlx4_free_buf(struct mlx4_buf *buf)
 {
-       ibv_dofork_range(buf->buf, buf->length);
-       munmap(buf->buf, buf->length);
+       if (buf->length) {
+               ibv_dofork_range(buf->buf, buf->length);
+               munmap(buf->buf, buf->length);
+       }
 }
index 8f7a8cce094b70b51b3d912b5489f7a8611f0560..20ce1f1bf7d5cdd9e4234a0aca57d965a0fbd7f0 100644 (file)
--- a/src/cq.c
+++ b/src/cq.c
@@ -220,33 +220,43 @@ static int mlx4_poll_one(struct mlx4_cq *cq,
        rmb();
 
        qpn = ntohl(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK;
+       wc->qp_num = qpn;
 
        is_send  = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK;
        is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
                MLX4_CQE_OPCODE_ERROR;
 
-       if (!*cur_qp ||
-           (qpn != (*cur_qp)->ibv_qp.qp_num)) {
+       if ((qpn & MLX4_XRC_QPN_BIT) && !is_send) {
                /*
-                * We do not have to take the QP table lock here,
-                * because CQs will be locked while QPs are removed
+                * We do not have to take the XSRQ table lock here,
+                * because CQs will be locked while SRQs are removed
                 * from the table.
                 */
-               *cur_qp = mlx4_find_qp(to_mctx(cq->ibv_cq.context), qpn);
-               if (!*cur_qp)
+               srq = mlx4_find_xsrq(&to_mctx(cq->ibv_cq.context)->xsrq_table,
+                                    ntohl(cqe->g_mlpath_rqpn) & MLX4_CQE_QPN_MASK);
+               if (!srq)
                        return CQ_POLL_ERR;
+       } else {
+               if (!*cur_qp || (qpn != (*cur_qp)->verbs_qp.qp.qp_num)) {
+                       /*
+                        * We do not have to take the QP table lock here,
+                        * because CQs will be locked while QPs are removed
+                        * from the table.
+                        */
+                       *cur_qp = mlx4_find_qp(to_mctx(cq->ibv_cq.context), qpn);
+                       if (!*cur_qp)
+                               return CQ_POLL_ERR;
+               }
+               srq = ((*cur_qp)->verbs_qp.qp.srq) ? to_msrq((*cur_qp)->verbs_qp.qp.srq) : NULL;
        }
 
-       wc->qp_num = (*cur_qp)->ibv_qp.qp_num;
-
        if (is_send) {
                wq = &(*cur_qp)->sq;
                wqe_index = ntohs(cqe->wqe_index);
                wq->tail += (uint16_t) (wqe_index - (uint16_t) wq->tail);
                wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
                ++wq->tail;
-       } else if ((*cur_qp)->ibv_qp.srq) {
-               srq = to_msrq((*cur_qp)->ibv_qp.srq);
+       } else if (srq) {
                wqe_index = htons(cqe->wqe_index);
                wc->wr_id = srq->wrid[wqe_index];
                mlx4_free_srq_wqe(srq, wqe_index);
@@ -322,7 +332,8 @@ static int mlx4_poll_one(struct mlx4_cq *cq,
                wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
                wc->wc_flags      |= g_mlpath_rqpn & 0x80000000 ? IBV_WC_GRH : 0;
                wc->pkey_index     = ntohl(cqe->immed_rss_invalid) & 0x7f;
-               if ((*cur_qp)->link_layer == IBV_LINK_LAYER_ETHERNET)
+               /* HACK */
+               if ((*cur_qp) && (*cur_qp)->link_layer == IBV_LINK_LAYER_ETHERNET)
                        wc->sl     = ntohs(cqe->sl_vid) >> 13;
                else
                        wc->sl     = ntohs(cqe->sl_vid) >> 12;
@@ -411,7 +422,12 @@ void __mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq)
         */
        while ((int) --prod_index - (int) cq->cons_index >= 0) {
                cqe = get_cqe(cq, prod_index & cq->ibv_cq.cqe);
-               if ((ntohl(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) {
+               if (srq && srq->ext_srq &&
+                   ntohl(cqe->g_mlpath_rqpn & MLX4_CQE_QPN_MASK) == srq->verbs_srq.srq_num &&
+                   !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) {
+                       mlx4_free_srq_wqe(srq, ntohs(cqe->wqe_index));
+                       ++nfreed;
+               } else if ((ntohl(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) {
                        if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
                                mlx4_free_srq_wqe(srq, ntohs(cqe->wqe_index));
                        ++nfreed;
index 7cf68b439381740dadd35e0851cc95cbbf052ac8..53202f02a79864fc3313956aa31715f9b9684bee 100644 (file)
@@ -74,6 +74,12 @@ struct mlx4_create_srq {
        __u64                           db_addr;
 };
 
+struct mlx4_create_xsrq {
+       struct ibv_create_xsrq          ibv_cmd;
+       __u64                           buf_addr;
+       __u64                           db_addr;
+};
+
 struct mlx4_create_srq_resp {
        struct ibv_create_srq_resp      ibv_resp;
        __u32                           srqn;
index dcea02633c12fdf40edc2cb64aa107ac01e72852..801f4f09ddfad9f1b526152d2a0b519304691019 100644 (file)
@@ -127,13 +127,14 @@ static int mlx4_init_context(struct verbs_device *v_device,
        struct ibv_get_context          cmd;
        struct mlx4_alloc_ucontext_resp resp;
        int                             i;
-       /* verbs_context should be used for new verbs
-         *struct verbs_context *verbs_ctx = verbs_get_ctx(ibv_ctx);
-        */
+       struct verbs_context *verbs_ctx = verbs_get_ctx(ibv_ctx);
 
        /* memory footprint of mlx4_context and verbs_context share
-         * struct ibv_context.
-       */
+        * struct ibv_context.
+        */
+       if (sizeof(*verbs_ctx) > *(((size_t *) ibv_ctx) - 1))
+               return ENOSYS;
+
        context = to_mctx(ibv_ctx);
        ibv_ctx->cmd_fd = cmd_fd;
 
@@ -152,6 +153,7 @@ static int mlx4_init_context(struct verbs_device *v_device,
        for (i = 0; i < MLX4_NUM_DB_TYPE; ++i)
                context->db_list[i] = NULL;
 
+       mlx4_init_xsrq_table(&context->xsrq_table, resp.qp_tab_size);
        pthread_mutex_init(&context->db_list_mutex, NULL);
 
        context->uar = mmap(NULL, to_mdev(&v_device->device)->page_size,
@@ -182,15 +184,20 @@ static int mlx4_init_context(struct verbs_device *v_device,
        pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE);
 
        ibv_ctx->ops = mlx4_ctx_ops;
-       /* New verbs should be added as below
-         * verbs_ctx->drv_new_func1 = mlx4_new_func1;
-         */
-       return 0;
+       verbs_ctx->has_comp_mask = VERBS_CONTEXT_XRCD | VERBS_CONTEXT_SRQ |
+                                  VERBS_CONTEXT_QP;
+       verbs_ctx->close_xrcd = mlx4_close_xrcd;
+       verbs_ctx->open_xrcd = mlx4_open_xrcd;
+       verbs_ctx->create_srq_ex = mlx4_create_srq_ex;
+       verbs_ctx->get_srq_num = verbs_get_srq_num;
+       verbs_ctx->create_qp_ex = mlx4_create_qp_ex;
+       verbs_ctx->open_qp = mlx4_open_qp;
 
+       return 0;
 }
 
 static void mlx4_uninit_context(struct verbs_device *v_device,
-                                       struct ibv_context *ibv_ctx)
+                               struct ibv_context *ibv_ctx)
 {
        struct mlx4_context *context = to_mctx(ibv_ctx);
 
index 5028fea96673f5d8dea37c363419c73b65bc6587..6c627e7c2a48e5d721d675d225c872fc29dcf115 100644 (file)
@@ -38,6 +38,7 @@
 
 #include <infiniband/driver.h>
 #include <infiniband/arch.h>
+#include <infiniband/verbs.h>
 
 #ifdef HAVE_VALGRIND_MEMCHECK_H
 
@@ -97,6 +98,37 @@ enum {
        MLX4_QP_TABLE_MASK              = MLX4_QP_TABLE_SIZE - 1
 };
 
+#define MLX4_REMOTE_SRQN_FLAGS(wr) htonl((wr)->wr.xrc.remote_srqn << 8)
+#define MLX4_GET_SRQN(srq) (srq)->ibv_srq.srq_num
+
+enum {
+       MLX4_XSRQ_TABLE_BITS = 8,
+       MLX4_XSRQ_TABLE_SIZE = 1 << MLX4_XSRQ_TABLE_BITS,
+       MLX4_XSRQ_TABLE_MASK = MLX4_XSRQ_TABLE_SIZE - 1
+};
+
+struct mlx4_xsrq_table {
+       struct {
+               struct mlx4_srq **table;
+               int               refcnt;
+       } xsrq_table[MLX4_XSRQ_TABLE_SIZE];
+
+       pthread_mutex_t           mutex;
+       int                       num_xsrq;
+       int                       shift;
+       int                       mask;
+};
+
+void mlx4_init_xsrq_table(struct mlx4_xsrq_table *xsrq_table, int size);
+struct mlx4_srq *mlx4_find_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn);
+int mlx4_store_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn,
+                   struct mlx4_srq *srq);
+void mlx4_clear_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn);
+
+enum {
+       MLX4_XRC_QPN_BIT     = (1 << 23)
+};
+
 enum mlx4_db_type {
        MLX4_DB_TYPE_CQ,
        MLX4_DB_TYPE_RQ,
@@ -157,6 +189,8 @@ struct mlx4_context {
        int                             qp_table_shift;
        int                             qp_table_mask;
 
+       struct mlx4_xsrq_table          xsrq_table;
+
        struct mlx4_db_page            *db_list[MLX4_NUM_DB_TYPE];
        pthread_mutex_t                 db_list_mutex;
 };
@@ -184,7 +218,7 @@ struct mlx4_cq {
 };
 
 struct mlx4_srq {
-       struct ibv_srq                  ibv_srq;
+       struct verbs_srq                verbs_srq;
        struct mlx4_buf                 buf;
        pthread_spinlock_t              lock;
        uint64_t                       *wrid;
@@ -196,6 +230,7 @@ struct mlx4_srq {
        int                             tail;
        uint32_t                       *db;
        uint16_t                        counter;
+       uint8_t                         ext_srq;
 };
 
 struct mlx4_wq {
@@ -211,7 +246,7 @@ struct mlx4_wq {
 };
 
 struct mlx4_qp {
-       struct ibv_qp                   ibv_qp;
+       struct verbs_qp                 verbs_qp;
        struct mlx4_buf                 buf;
        int                             max_inline_data;
        int                             buf_size;
@@ -251,6 +286,7 @@ static inline unsigned long align(unsigned long val, unsigned long align)
 {
        return (val + align - 1) & ~(align - 1);
 }
+int align_queue_size(int req);
 
 #define to_mxxx(xxx, type)                                             \
        ((struct mlx4_##type *)                                 \
@@ -282,12 +318,14 @@ static inline struct mlx4_cq *to_mcq(struct ibv_cq *ibcq)
 
 static inline struct mlx4_srq *to_msrq(struct ibv_srq *ibsrq)
 {
-       return to_mxxx(srq, srq);
+       return container_of(container_of(ibsrq, struct verbs_srq, srq),
+                           struct mlx4_srq, verbs_srq);
 }
 
 static inline struct mlx4_qp *to_mqp(struct ibv_qp *ibqp)
 {
-       return to_mxxx(qp, qp);
+       return container_of(container_of(ibqp, struct verbs_qp, qp),
+                           struct mlx4_qp, verbs_qp);
 }
 
 static inline struct mlx4_ah *to_mah(struct ibv_ah *ibah)
@@ -308,6 +346,9 @@ int mlx4_query_port(struct ibv_context *context, uint8_t port,
 
 struct ibv_pd *mlx4_alloc_pd(struct ibv_context *context);
 int mlx4_free_pd(struct ibv_pd *pd);
+struct ibv_xrcd *mlx4_open_xrcd(struct ibv_context *context,
+                               struct ibv_xrcd_init_attr *attr);
+int mlx4_close_xrcd(struct ibv_xrcd *xrcd);
 
 struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr,
                            size_t length, int access);
@@ -329,20 +370,33 @@ void mlx4_cq_resize_copy_cqes(struct mlx4_cq *cq, void *buf, int new_cqe);
 
 struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
                                 struct ibv_srq_init_attr *attr);
+struct ibv_srq *mlx4_create_srq_ex(struct ibv_context *context,
+                                  struct ibv_srq_init_attr_ex *attr_ex);
+struct ibv_srq *mlx4_create_xrc_srq(struct ibv_context *context,
+                                   struct ibv_srq_init_attr_ex *attr_ex);
 int mlx4_modify_srq(struct ibv_srq *srq,
                     struct ibv_srq_attr *attr,
                     int mask);
 int mlx4_query_srq(struct ibv_srq *srq,
                           struct ibv_srq_attr *attr);
 int mlx4_destroy_srq(struct ibv_srq *srq);
+int mlx4_destroy_xrc_srq(struct ibv_srq *srq);
 int mlx4_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr,
                        struct mlx4_srq *srq);
+void mlx4_init_xsrq_table(struct mlx4_xsrq_table *xsrq_table, int size);
+struct mlx4_srq *mlx4_find_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn);
+int mlx4_store_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn,
+                   struct mlx4_srq *srq);
+void mlx4_clear_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn);
 void mlx4_free_srq_wqe(struct mlx4_srq *srq, int ind);
 int mlx4_post_srq_recv(struct ibv_srq *ibsrq,
                       struct ibv_recv_wr *wr,
                       struct ibv_recv_wr **bad_wr);
 
 struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr);
+struct ibv_qp *mlx4_create_qp_ex(struct ibv_context *context,
+                                struct ibv_qp_init_attr_ex *attr);
+struct ibv_qp *mlx4_open_qp(struct ibv_context *context, struct ibv_qp_open_attr *attr);
 int mlx4_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
                   int attr_mask,
                   struct ibv_qp_init_attr *init_attr);
@@ -357,7 +411,7 @@ int mlx4_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
                          struct ibv_recv_wr **bad_wr);
 void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
                           struct mlx4_qp *qp);
-int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
+int mlx4_alloc_qp_buf(struct ibv_context *context, struct ibv_qp_cap *cap,
                       enum ibv_qp_type type, struct mlx4_qp *qp);
 void mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap,
                       enum ibv_qp_type type);
index 40a66897c50a6cf43ee1f6bc60b3330d406317c9..132660f71bbb86d217fdb65cb9c793ba9622a685 100644 (file)
--- a/src/qp.c
+++ b/src/qp.c
@@ -208,7 +208,7 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
        ind = qp->sq.head;
 
        for (nreq = 0; wr; ++nreq, wr = wr->next) {
-               if (wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
+               if (wq_overflow(&qp->sq, nreq, to_mcq(ibqp->send_cq))) {
                        ret = ENOMEM;
                        *bad_wr = wr;
                        goto out;
@@ -246,6 +246,9 @@ int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
                size = sizeof *ctrl / 16;
 
                switch (ibqp->qp_type) {
+               case IBV_QPT_XRC_SEND:
+                       ctrl->srcrb_flags |= MLX4_REMOTE_SRQN_FLAGS(wr);
+                       /* fall through */
                case IBV_QPT_RC:
                case IBV_QPT_UC:
                        switch (wr->opcode) {
@@ -452,7 +455,7 @@ int mlx4_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
        ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
 
        for (nreq = 0; wr; ++nreq, wr = wr->next) {
-               if (wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {
+               if (wq_overflow(&qp->rq, nreq, to_mcq(ibqp->recv_cq))) {
                        ret = ENOMEM;
                        *bad_wr = wr;
                        goto out;
@@ -546,6 +549,7 @@ void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
                size += sizeof (struct mlx4_wqe_raddr_seg);
                break;
 
+       case IBV_QPT_XRC_SEND:
        case IBV_QPT_RC:
                size += sizeof (struct mlx4_wqe_raddr_seg);
                /*
@@ -575,14 +579,16 @@ void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
                ; /* nothing */
 }
 
-int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
+int mlx4_alloc_qp_buf(struct ibv_context *context, struct ibv_qp_cap *cap,
                       enum ibv_qp_type type, struct mlx4_qp *qp)
 {
        qp->rq.max_gs    = cap->max_recv_sge;
 
-       qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof (uint64_t));
-       if (!qp->sq.wrid)
-               return -1;
+       if (qp->sq.wqe_cnt) {
+               qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof (uint64_t));
+               if (!qp->sq.wrid)
+                       return -1;
+       }
 
        if (qp->rq.wqe_cnt) {
                qp->rq.wrid = malloc(qp->rq.wqe_cnt * sizeof (uint64_t));
@@ -607,15 +613,19 @@ int mlx4_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
                qp->sq.offset = 0;
        }
 
-       if (mlx4_alloc_buf(&qp->buf,
-                           align(qp->buf_size, to_mdev(pd->context->device)->page_size),
-                           to_mdev(pd->context->device)->page_size)) {
-               free(qp->sq.wrid);
-               free(qp->rq.wrid);
-               return -1;
-       }
+       if (qp->buf_size) {
+               if (mlx4_alloc_buf(&qp->buf,
+                                  align(qp->buf_size, to_mdev(context->device)->page_size),
+                                  to_mdev(context->device)->page_size)) {
+                       free(qp->sq.wrid);
+                       free(qp->rq.wrid);
+                       return -1;
+               }
 
-       memset(qp->buf.buf, 0, qp->buf_size);
+               memset(qp->buf.buf, 0, qp->buf_size);
+       } else {
+               qp->buf.buf = NULL;
+       }
 
        return 0;
 }
@@ -631,6 +641,7 @@ void mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap,
                wqe_size -= sizeof (struct mlx4_wqe_datagram_seg);
                break;
 
+       case IBV_QPT_XRC_SEND:
        case IBV_QPT_UC:
        case IBV_QPT_RC:
                wqe_size -= sizeof (struct mlx4_wqe_raddr_seg);
index f1d124027019a36a49a60050eb142da779e2f213..bc19c5172379f22ad6bc8bd69c7d4138cf11e736 100644 (file)
--- a/src/srq.c
+++ b/src/srq.c
@@ -42,6 +42,7 @@
 #include "mlx4.h"
 #include "doorbell.h"
 #include "wqe.h"
+#include "mlx4-abi.h"
 
 static void *get_wqe(struct mlx4_srq *srq, int n)
 {
@@ -173,3 +174,153 @@ int mlx4_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr,
 
        return 0;
 }
+
+void mlx4_init_xsrq_table(struct mlx4_xsrq_table *xsrq_table, int size)
+{
+       memset(xsrq_table, 0, sizeof *xsrq_table);
+       xsrq_table->num_xsrq = size;
+       xsrq_table->shift = ffs(size) - 1 - MLX4_XSRQ_TABLE_BITS;
+       xsrq_table->mask = (1 << xsrq_table->shift) - 1;
+
+       pthread_mutex_init(&xsrq_table->mutex, NULL);
+}
+
+struct mlx4_srq *mlx4_find_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn)
+{
+       int index;
+
+       index = (srqn & (xsrq_table->num_xsrq - 1)) >> xsrq_table->shift;
+       if (xsrq_table->xsrq_table[index].refcnt)
+               return xsrq_table->xsrq_table[index].table[srqn & xsrq_table->mask];
+
+       return NULL;
+}
+
+int mlx4_store_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn,
+                   struct mlx4_srq *srq)
+{
+       int index, ret = 0;
+
+       index = (srqn & (xsrq_table->num_xsrq - 1)) >> xsrq_table->shift;
+       pthread_mutex_lock(&xsrq_table->mutex);
+       if (!xsrq_table->xsrq_table[index].refcnt) {
+               xsrq_table->xsrq_table[index].table = calloc(xsrq_table->mask + 1,
+                                                            sizeof(struct mlx4_srq *));
+               if (!xsrq_table->xsrq_table[index].table) {
+                       ret = -1;
+                       goto out;
+               }
+       }
+
+       xsrq_table->xsrq_table[index].refcnt++;
+       xsrq_table->xsrq_table[index].table[srqn & xsrq_table->mask] = srq;
+
+out:
+       pthread_mutex_unlock(&xsrq_table->mutex);
+       return ret;
+}
+
+void mlx4_clear_xsrq(struct mlx4_xsrq_table *xsrq_table, uint32_t srqn)
+{
+       int index;
+
+       index = (srqn & (xsrq_table->num_xsrq - 1)) >> xsrq_table->shift;
+       pthread_mutex_lock(&xsrq_table->mutex);
+
+       if (--xsrq_table->xsrq_table[index].refcnt)
+               xsrq_table->xsrq_table[index].table[srqn & xsrq_table->mask] = NULL;
+       else
+               free(xsrq_table->xsrq_table[index].table);
+
+       pthread_mutex_unlock(&xsrq_table->mutex);
+}
+
+struct ibv_srq *mlx4_create_xrc_srq(struct ibv_context *context,
+                                   struct ibv_srq_init_attr_ex *attr_ex)
+{
+       struct mlx4_create_xsrq cmd;
+       struct mlx4_create_srq_resp resp;
+       struct mlx4_srq *srq;
+       int ret;
+
+       /* Sanity check SRQ size before proceeding */
+       if (attr_ex->attr.max_wr > 1 << 16 || attr_ex->attr.max_sge > 64)
+               return NULL;
+
+       srq = calloc(1, sizeof *srq);
+       if (!srq)
+               return NULL;
+
+       if (pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE))
+               goto err;
+
+       srq->max     = align_queue_size(attr_ex->attr.max_wr + 1);
+       srq->max_gs  = attr_ex->attr.max_sge;
+       srq->counter = 0;
+       srq->ext_srq = 1;
+
+       if (mlx4_alloc_srq_buf(attr_ex->pd, &attr_ex->attr, srq))
+               goto err;
+
+       srq->db = mlx4_alloc_db(to_mctx(context), MLX4_DB_TYPE_RQ);
+       if (!srq->db)
+               goto err_free;
+
+       *srq->db = 0;
+
+       cmd.buf_addr = (uintptr_t) srq->buf.buf;
+       cmd.db_addr  = (uintptr_t) srq->db;
+
+       ret = ibv_cmd_create_srq_ex(context, &srq->verbs_srq, attr_ex,
+                                   &cmd.ibv_cmd, sizeof cmd,
+                                   &resp.ibv_resp, sizeof resp);
+       if (ret)
+               goto err_db;
+
+       ret = mlx4_store_xsrq(&to_mctx(context)->xsrq_table,
+                             srq->verbs_srq.srq_num, srq);
+       if (ret)
+               goto err_destroy;
+
+       return &srq->verbs_srq.srq;
+
+err_destroy:
+       ibv_cmd_destroy_srq(&srq->verbs_srq.srq);
+err_db:
+       mlx4_free_db(to_mctx(context), MLX4_DB_TYPE_RQ, srq->db);
+err_free:
+       free(srq->wrid);
+       mlx4_free_buf(&srq->buf);
+err:
+       free(srq);
+       return NULL;
+}
+
+int mlx4_destroy_xrc_srq(struct ibv_srq *srq)
+{
+       struct mlx4_context *mctx = to_mctx(srq->context);
+       struct mlx4_srq *msrq = to_msrq(srq);
+       struct mlx4_cq *mcq;
+       int ret;
+
+       mcq = to_mcq(msrq->verbs_srq.cq);
+       mlx4_cq_clean(mcq, 0, msrq);
+       pthread_spin_lock(&mcq->lock);
+       mlx4_clear_xsrq(&mctx->xsrq_table, msrq->verbs_srq.srq_num);
+       pthread_spin_unlock(&mcq->lock);
+
+       ret = ibv_cmd_destroy_srq(srq);
+       if (ret) {
+               pthread_spin_lock(&mcq->lock);
+               mlx4_store_xsrq(&mctx->xsrq_table, msrq->verbs_srq.srq_num, msrq);
+               pthread_spin_unlock(&mcq->lock);
+               return ret;
+       }
+
+       mlx4_free_db(mctx, MLX4_DB_TYPE_RQ, msrq->db);
+       mlx4_free_buf(&msrq->buf);
+       free(msrq->wrid);
+       free(msrq);
+
+       return 0;
+}
index 408fc6d3480f70688498d645812a35d228385b45..1ebf766a9be58c0d97a5f80c6cd6ca857b93ee66 100644 (file)
@@ -107,6 +107,42 @@ int mlx4_free_pd(struct ibv_pd *pd)
        return 0;
 }
 
+struct ibv_xrcd *mlx4_open_xrcd(struct ibv_context *context,
+                               struct ibv_xrcd_init_attr *attr)
+{
+       struct ibv_open_xrcd cmd;
+       struct ibv_open_xrcd_resp resp;
+       struct verbs_xrcd *xrcd;
+       int ret;
+
+       xrcd = calloc(1, sizeof *xrcd);
+       if (!xrcd)
+               return NULL;
+
+       ret = ibv_cmd_open_xrcd(context, xrcd, attr,
+                               &cmd, sizeof cmd, &resp, sizeof resp);
+       if (ret)
+               goto err;
+
+       return &xrcd->xrcd;
+
+err:
+       free(xrcd);
+       return NULL;
+}
+
+int mlx4_close_xrcd(struct ibv_xrcd *ib_xrcd)
+{
+       struct verbs_xrcd *xrcd = container_of(ib_xrcd, struct verbs_xrcd, xrcd);
+       int ret;
+
+       ret = ibv_cmd_close_xrcd(xrcd);
+       if (!ret)
+               free(xrcd);
+
+       return ret;
+}
+
 struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
                           int access)
 {
@@ -150,7 +186,7 @@ int mlx4_dereg_mr(struct ibv_mr *mr)
        return 0;
 }
 
-static int align_queue_size(int req)
+int align_queue_size(int req)
 {
        int nent;
 
@@ -294,7 +330,7 @@ int mlx4_destroy_cq(struct ibv_cq *cq)
 }
 
 struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
-                                struct ibv_srq_init_attr *attr)
+                               struct ibv_srq_init_attr *attr)
 {
        struct mlx4_create_srq      cmd;
        struct mlx4_create_srq_resp resp;
@@ -315,6 +351,7 @@ struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
        srq->max     = align_queue_size(attr->attr.max_wr + 1);
        srq->max_gs  = attr->attr.max_sge;
        srq->counter = 0;
+       srq->ext_srq = 0;
 
        if (mlx4_alloc_srq_buf(pd, &attr->attr, srq))
                goto err;
@@ -328,15 +365,13 @@ struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
        cmd.buf_addr = (uintptr_t) srq->buf.buf;
        cmd.db_addr  = (uintptr_t) srq->db;
 
-       ret = ibv_cmd_create_srq(pd, &srq->ibv_srq, attr,
+       ret = ibv_cmd_create_srq(pd, &srq->verbs_srq.srq, attr,
                                 &cmd.ibv_cmd, sizeof cmd,
                                 &resp.ibv_resp, sizeof resp);
        if (ret)
                goto err_db;
 
-       srq->srqn = resp.srqn;
-
-       return &srq->ibv_srq;
+       return &srq->verbs_srq.srq;
 
 err_db:
        mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, srq->db);
@@ -351,6 +386,18 @@ err:
        return NULL;
 }
 
+struct ibv_srq *mlx4_create_srq_ex(struct ibv_context *context,
+                                  struct ibv_srq_init_attr_ex *attr_ex)
+{
+       if (!(attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_TYPE) ||
+           (attr_ex->srq_type == IBV_SRQT_BASIC))
+               return mlx4_create_srq(attr_ex->pd, (struct ibv_srq_init_attr *) attr_ex);
+       else if (attr_ex->srq_type == IBV_SRQT_XRC)
+               return mlx4_create_xrc_srq(context, attr_ex);
+
+       return NULL;
+}
+
 int mlx4_modify_srq(struct ibv_srq *srq,
                     struct ibv_srq_attr *attr,
                     int attr_mask)
@@ -372,6 +419,9 @@ int mlx4_destroy_srq(struct ibv_srq *srq)
 {
        int ret;
 
+       if (to_msrq(srq)->ext_srq)
+               return mlx4_destroy_xrc_srq(srq);
+
        ret = ibv_cmd_destroy_srq(srq);
        if (ret)
                return ret;
@@ -384,7 +434,8 @@ int mlx4_destroy_srq(struct ibv_srq *srq)
        return 0;
 }
 
-struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
+struct ibv_qp *mlx4_create_qp_ex(struct ibv_context *context,
+                                struct ibv_qp_init_attr_ex *attr)
 {
        struct mlx4_create_qp     cmd;
        struct ibv_create_qp_resp resp;
@@ -399,30 +450,34 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
            attr->cap.max_inline_data > 1024)
                return NULL;
 
-       qp = malloc(sizeof *qp);
+       qp = calloc(1, sizeof *qp);
        if (!qp)
                return NULL;
 
-       mlx4_calc_sq_wqe_size(&attr->cap, attr->qp_type, qp);
-
-       /*
-        * We need to leave 2 KB + 1 WQE of headroom in the SQ to
-        * allow HW to prefetch.
-        */
-       qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1;
-       qp->sq.wqe_cnt = align_queue_size(attr->cap.max_send_wr + qp->sq_spare_wqes);
-       qp->rq.wqe_cnt = align_queue_size(attr->cap.max_recv_wr);
+       if (attr->qp_type == IBV_QPT_XRC_RECV) {
+               attr->cap.max_send_wr = qp->sq.wqe_cnt = 0;
+       } else {
+               mlx4_calc_sq_wqe_size(&attr->cap, attr->qp_type, qp);
+               /*
+                * We need to leave 2 KB + 1 WQE of headroom in the SQ to
+                * allow HW to prefetch.
+                */
+               qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1;
+               qp->sq.wqe_cnt = align_queue_size(attr->cap.max_send_wr + qp->sq_spare_wqes);
+       }
 
-       if (attr->srq)
-               attr->cap.max_recv_wr = qp->rq.wqe_cnt = 0;
-       else {
+       if (attr->srq || attr->qp_type == IBV_QPT_XRC_SEND ||
+           attr->qp_type == IBV_QPT_XRC_RECV) {
+               attr->cap.max_recv_wr = qp->rq.wqe_cnt = attr->cap.max_recv_sge = 0;
+       } else {
+               qp->rq.wqe_cnt = align_queue_size(attr->cap.max_recv_wr);
                if (attr->cap.max_recv_sge < 1)
                        attr->cap.max_recv_sge = 1;
                if (attr->cap.max_recv_wr < 1)
                        attr->cap.max_recv_wr = 1;
        }
 
-       if (mlx4_alloc_qp_buf(pd, &attr->cap, attr->qp_type, qp))
+       if (mlx4_alloc_qp_buf(context, &attr->cap, attr->qp_type, qp))
                goto err;
 
        mlx4_init_qp_indices(qp);
@@ -431,19 +486,18 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
            pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE))
                goto err_free;
 
-       if (!attr->srq) {
-               qp->db = mlx4_alloc_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ);
+       if (attr->cap.max_recv_sge) {
+               qp->db = mlx4_alloc_db(to_mctx(context), MLX4_DB_TYPE_RQ);
                if (!qp->db)
                        goto err_free;
 
                *qp->db = 0;
+               cmd.db_addr = (uintptr_t) qp->db;
+       } else {
+               cmd.db_addr = 0;
        }
 
        cmd.buf_addr        = (uintptr_t) qp->buf.buf;
-       if (attr->srq)
-               cmd.db_addr = 0;
-       else
-               cmd.db_addr = (uintptr_t) qp->db;
        cmd.log_sq_stride   = qp->sq.wqe_shift;
        for (cmd.log_sq_bb_count = 0;
             qp->sq.wqe_cnt > 1 << cmd.log_sq_bb_count;
@@ -452,37 +506,39 @@ struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
        cmd.sq_no_prefetch = 0; /* OK for ABI 2: just a reserved field */
        memset(cmd.reserved, 0, sizeof cmd.reserved);
 
-       pthread_mutex_lock(&to_mctx(pd->context)->qp_table_mutex);
+       pthread_mutex_lock(&to_mctx(context)->qp_table_mutex);
 
-       ret = ibv_cmd_create_qp(pd, &qp->ibv_qp, attr, &cmd.ibv_cmd, sizeof cmd,
-                               &resp, sizeof resp);
+       ret = ibv_cmd_create_qp_ex(context, &qp->verbs_qp, attr,
+                                  &cmd.ibv_cmd, sizeof cmd, &resp, sizeof resp);
        if (ret)
                goto err_rq_db;
 
-       ret = mlx4_store_qp(to_mctx(pd->context), qp->ibv_qp.qp_num, qp);
-       if (ret)
-               goto err_destroy;
-       pthread_mutex_unlock(&to_mctx(pd->context)->qp_table_mutex);
+       if (qp->sq.wqe_cnt || qp->rq.wqe_cnt) {
+               ret = mlx4_store_qp(to_mctx(context), qp->verbs_qp.qp.qp_num, qp);
+               if (ret)
+                       goto err_destroy;
+       }
+       pthread_mutex_unlock(&to_mctx(context)->qp_table_mutex);
 
        qp->rq.wqe_cnt = qp->rq.max_post = attr->cap.max_recv_wr;
        qp->rq.max_gs  = attr->cap.max_recv_sge;
        mlx4_set_sq_sizes(qp, &attr->cap, attr->qp_type);
 
-       qp->doorbell_qpn    = htonl(qp->ibv_qp.qp_num << 8);
+       qp->doorbell_qpn    = htonl(qp->verbs_qp.qp.qp_num << 8);
        if (attr->sq_sig_all)
                qp->sq_signal_bits = htonl(MLX4_WQE_CTRL_CQ_UPDATE);
        else
                qp->sq_signal_bits = 0;
 
-       return &qp->ibv_qp;
+       return &qp->verbs_qp.qp;
 
 err_destroy:
-       ibv_cmd_destroy_qp(&qp->ibv_qp);
+       ibv_cmd_destroy_qp(&qp->verbs_qp.qp);
 
 err_rq_db:
-       pthread_mutex_unlock(&to_mctx(pd->context)->qp_table_mutex);
-       if (!attr->srq)
-               mlx4_free_db(to_mctx(pd->context), MLX4_DB_TYPE_RQ, qp->db);
+       pthread_mutex_unlock(&to_mctx(context)->qp_table_mutex);
+       if (attr->cap.max_recv_sge)
+               mlx4_free_db(to_mctx(context), MLX4_DB_TYPE_RQ, qp->db);
 
 err_free:
        free(qp->sq.wrid);
@@ -496,6 +552,43 @@ err:
        return NULL;
 }
 
+struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
+{
+       struct ibv_qp_init_attr_ex attr_ex;
+       struct ibv_qp *qp;
+
+       memcpy(&attr_ex, attr, sizeof *attr);
+       attr_ex.comp_mask = IBV_QP_INIT_ATTR_PD;
+       attr_ex.pd = pd;
+       qp = mlx4_create_qp_ex(pd->context, &attr_ex);
+       if (qp)
+               memcpy(attr, &attr_ex, sizeof *attr);
+       return qp;
+}
+
+struct ibv_qp *mlx4_open_qp(struct ibv_context *context, struct ibv_qp_open_attr *attr)
+{
+       struct ibv_open_qp cmd;
+       struct ibv_create_qp_resp resp;
+       struct mlx4_qp *qp;
+       int ret;
+
+       qp = calloc(1, sizeof *qp);
+       if (!qp)
+               return NULL;
+
+       ret = ibv_cmd_open_qp(context, &qp->verbs_qp, attr,
+                             &cmd, sizeof cmd, &resp, sizeof resp);
+       if (ret)
+               goto err;
+
+       return &qp->verbs_qp.qp;
+
+err:
+       free(qp);
+       return NULL;
+}
+
 int mlx4_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr,
                   int attr_mask,
                   struct ibv_qp_init_attr *init_attr)
@@ -526,7 +619,7 @@ int mlx4_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
        int ret;
 
        if (attr_mask & IBV_QP_PORT) {
-               if (ibv_query_port(qp->pd->context, attr->port_num, &port_attr))
+               if (ibv_query_port(qp->context, attr->port_num, &port_attr))
                        return -1;
                mqp->link_layer = port_attr.link_layer;
        }
@@ -542,13 +635,14 @@ int mlx4_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
        if (!ret                       &&
            (attr_mask & IBV_QP_STATE) &&
            attr->qp_state == IBV_QPS_RESET) {
-               mlx4_cq_clean(to_mcq(qp->recv_cq), qp->qp_num,
-                              qp->srq ? to_msrq(qp->srq) : NULL);
-               if (qp->send_cq != qp->recv_cq)
+               if (qp->recv_cq)
+                       mlx4_cq_clean(to_mcq(qp->recv_cq), qp->qp_num,
+                                     qp->srq ? to_msrq(qp->srq) : NULL);
+               if (qp->send_cq && qp->send_cq != qp->recv_cq)
                        mlx4_cq_clean(to_mcq(qp->send_cq), qp->qp_num, NULL);
 
                mlx4_init_qp_indices(to_mqp(qp));
-               if (!qp->srq)
+               if (to_mqp(qp)->rq.wqe_cnt)
                        *to_mqp(qp)->db = 0;
        }
 
@@ -560,9 +654,14 @@ static void mlx4_lock_cqs(struct ibv_qp *qp)
        struct mlx4_cq *send_cq = to_mcq(qp->send_cq);
        struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq);
 
-       if (send_cq == recv_cq)
+       if (!qp->send_cq || !qp->recv_cq) {
+               if (qp->send_cq)
+                       pthread_spin_lock(&send_cq->lock);
+               else if (qp->recv_cq)
+                       pthread_spin_lock(&recv_cq->lock);
+       } else if (send_cq == recv_cq) {
                pthread_spin_lock(&send_cq->lock);
-       else if (send_cq->cqn < recv_cq->cqn) {
+       else if (send_cq->cqn < recv_cq->cqn) {
                pthread_spin_lock(&send_cq->lock);
                pthread_spin_lock(&recv_cq->lock);
        } else {
@@ -576,9 +675,15 @@ static void mlx4_unlock_cqs(struct ibv_qp *qp)
        struct mlx4_cq *send_cq = to_mcq(qp->send_cq);
        struct mlx4_cq *recv_cq = to_mcq(qp->recv_cq);
 
-       if (send_cq == recv_cq)
+
+       if (!qp->send_cq || !qp->recv_cq) {
+               if (qp->send_cq)
+                       pthread_spin_unlock(&send_cq->lock);
+               else if (qp->recv_cq)
+                       pthread_spin_unlock(&recv_cq->lock);
+       } else if (send_cq == recv_cq) {
                pthread_spin_unlock(&send_cq->lock);
-       else if (send_cq->cqn < recv_cq->cqn) {
+       else if (send_cq->cqn < recv_cq->cqn) {
                pthread_spin_unlock(&recv_cq->lock);
                pthread_spin_unlock(&send_cq->lock);
        } else {
@@ -601,21 +706,24 @@ int mlx4_destroy_qp(struct ibv_qp *ibqp)
 
        mlx4_lock_cqs(ibqp);
 
-       __mlx4_cq_clean(to_mcq(ibqp->recv_cq), ibqp->qp_num,
-                       ibqp->srq ? to_msrq(ibqp->srq) : NULL);
-       if (ibqp->send_cq != ibqp->recv_cq)
+       if (ibqp->recv_cq)
+               __mlx4_cq_clean(to_mcq(ibqp->recv_cq), ibqp->qp_num,
+                               ibqp->srq ? to_msrq(ibqp->srq) : NULL);
+       if (ibqp->send_cq && ibqp->send_cq != ibqp->recv_cq)
                __mlx4_cq_clean(to_mcq(ibqp->send_cq), ibqp->qp_num, NULL);
 
-       mlx4_clear_qp(to_mctx(ibqp->context), ibqp->qp_num);
+       if (qp->sq.wqe_cnt || qp->rq.wqe_cnt)
+               mlx4_clear_qp(to_mctx(ibqp->context), ibqp->qp_num);
 
        mlx4_unlock_cqs(ibqp);
        pthread_mutex_unlock(&to_mctx(ibqp->context)->qp_table_mutex);
 
-       if (!ibqp->srq)
+       if (qp->rq.wqe_cnt) {
                mlx4_free_db(to_mctx(ibqp->context), MLX4_DB_TYPE_RQ, qp->db);
-       free(qp->sq.wrid);
-       if (qp->rq.wqe_cnt)
                free(qp->rq.wrid);
+       }
+       if (qp->sq.wqe_cnt)
+               free(qp->sq.wrid);
        mlx4_free_buf(&qp->buf);
        free(qp);