From: Roland Dreier <rolandd@cisco.com>
Date: Thu, 29 Nov 2007 22:52:36 +0000 (-0800)
Subject: Don't add an extra entry to CQs
X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=216b90eac10cc8e11b9abaa710385986e26fbf85;p=~shefty%2Flibmlx4.git

Don't add an extra entry to CQs

With mlx4 hardware, there is no need to add an extra entry when
creating a CQ.  This potentially saves a lot of memory if a consumer
asks for an exact power of 2 entries.

This change works without changing the kernel mlx4_ib driver's ABI by
subtracting 1 from the number of CQ entries before passing the value
to the kernel; the kernel will add 1 and end up with the same value
actually used by libmlx4.

Based on work from  Jack Morgenstein <jackm@dev.mellanox.co.il>.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
---

diff --git a/src/cq.c b/src/cq.c
index 06fd2ca..2d04883 100644
--- a/src/cq.c
+++ b/src/cq.c
@@ -114,10 +114,10 @@ static struct mlx4_cqe *get_cqe(struct mlx4_cq *cq, int entry)
 
 static void *get_sw_cqe(struct mlx4_cq *cq, int n)
 {
-	struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibv_cq.cqe);
+	struct mlx4_cqe *cqe = get_cqe(cq, n & (cq->ibv_cq.cqe - 1));
 
 	return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
-		!!(n & (cq->ibv_cq.cqe + 1))) ? NULL : cqe;
+		!!(n & cq->ibv_cq.cqe)) ? NULL : cqe;
 }
 
 static struct mlx4_cqe *next_cqe_sw(struct mlx4_cq *cq)
@@ -397,7 +397,7 @@ void mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq)
 	 * from our QP and therefore don't need to be checked.
 	 */
 	for (prod_index = cq->cons_index; get_sw_cqe(cq, prod_index); ++prod_index)
-		if (prod_index == cq->cons_index + cq->ibv_cq.cqe)
+		if (prod_index == cq->cons_index + cq->ibv_cq.cqe - 1)
 			break;
 
 	/*
@@ -405,13 +405,13 @@ void mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq)
 	 * that match our QP by copying older entries on top of them.
 	 */
 	while ((int) --prod_index - (int) cq->cons_index >= 0) {
-		cqe = get_cqe(cq, prod_index & cq->ibv_cq.cqe);
+		cqe = get_cqe(cq, prod_index & (cq->ibv_cq.cqe - 1));
 		if ((ntohl(cqe->my_qpn) & 0xffffff) == qpn) {
 			if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
 				mlx4_free_srq_wqe(srq, ntohs(cqe->wqe_index));
 			++nfreed;
 		} else if (nfreed) {
-			dest = get_cqe(cq, (prod_index + nfreed) & cq->ibv_cq.cqe);
+			dest = get_cqe(cq, (prod_index + nfreed) & (cq->ibv_cq.cqe - 1));
 			owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK;
 			memcpy(dest, cqe, sizeof *cqe);
 			dest->owner_sr_opcode = owner_bit |
diff --git a/src/verbs.c b/src/verbs.c
index 50e0947..0bbab57 100644
--- a/src/verbs.c
+++ b/src/verbs.c
@@ -182,7 +182,11 @@ struct ibv_cq *mlx4_create_cq(struct ibv_context *context, int cqe,
 	if (pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE))
 		goto err;
 
-	cqe = align_queue_size(cqe + 1);
+	cqe = align_queue_size(cqe);
+
+	/* Always allocate at least two CQEs to keep things simple */
+	if (cqe < 2)
+		cqe = 2;
 
 	if (mlx4_alloc_buf(&cq->buf, cqe * MLX4_CQ_ENTRY_SIZE,
 			   to_mdev(context->device)->page_size))
@@ -202,6 +206,8 @@ struct ibv_cq *mlx4_create_cq(struct ibv_context *context, int cqe,
 	cmd.buf_addr = (uintptr_t) cq->buf.buf;
 	cmd.db_addr  = (uintptr_t) cq->set_ci_db;
 
+	/* Subtract 1 from the number of entries we pass into the
+	 * kernel because the kernel mlx4_ib driver will add 1 again. */
 	ret = ibv_cmd_create_cq(context, cqe - 1, channel, comp_vector,
 				&cq->ibv_cq, &cmd.ibv_cmd, sizeof cmd,
 				&resp.ibv_resp, sizeof resp);
@@ -209,6 +215,8 @@ struct ibv_cq *mlx4_create_cq(struct ibv_context *context, int cqe,
 		goto err_db;
 
 	cq->cqn = resp.cqn;
+	/* Bump the number of entries to make up for subtracting 1 above */
+	++cq->ibv_cq.cqe;
 
 	return &cq->ibv_cq;