]> git.openfabrics.org - ~shefty/libmlx4.git/commitdiff
Fix CQ cleanup when QP is destroyed
authorRoland Dreier <rolandd@cisco.com>
Fri, 4 Apr 2008 19:14:57 +0000 (12:14 -0700)
committerRoland Dreier <rolandd@cisco.com>
Fri, 4 Apr 2008 19:18:48 +0000 (12:18 -0700)
The current code is mlx4_destroy_qp() cleans completions from the QP
being destroyed out of CQs before calling into the kernel to actually
destroy the QP.  This leaves a window where new completions could be
added and left in the CQ, which leads to problems when that completion
is polled.  Fix this by cleaning the CQ and removing the QP from the
QP table after the QP is really gone.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
src/cq.c
src/mlx4.h
src/verbs.c

index 91297e475bbf501335685189cb559d0fd78ff1d1..53c6e06dc4ecb7c260ee5147e99f5ea969a15b55 100644 (file)
--- a/src/cq.c
+++ b/src/cq.c
@@ -381,15 +381,13 @@ void mlx4_cq_event(struct ibv_cq *cq)
        to_mcq(cq)->arm_sn++;
 }
 
-void mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq)
+void __mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq)
 {
        struct mlx4_cqe *cqe, *dest;
        uint32_t prod_index;
        uint8_t owner_bit;
        int nfreed = 0;
 
-       pthread_spin_lock(&cq->lock);
-
        /*
         * First we need to find the current producer index, so we
         * know where to start cleaning from.  It doesn't matter if HW
@@ -429,7 +427,12 @@ void mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq)
                wmb();
                update_cons_index(cq);
        }
+}
 
+void mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq)
+{
+       pthread_spin_lock(&cq->lock);
+       __mlx4_cq_clean(cq, qpn, srq);
        pthread_spin_unlock(&cq->lock);
 }
 
index 3710a178371039fdaded666c14e71abea176a548..dda34ba3627b1a2c9dac50c8a01aa61cabf4bedd 100644 (file)
@@ -312,8 +312,8 @@ int mlx4_destroy_cq(struct ibv_cq *cq);
 int mlx4_poll_cq(struct ibv_cq *cq, int ne, struct ibv_wc *wc);
 int mlx4_arm_cq(struct ibv_cq *cq, int solicited);
 void mlx4_cq_event(struct ibv_cq *cq);
-void mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn,
-                   struct mlx4_srq *srq);
+void __mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq);
+void mlx4_cq_clean(struct mlx4_cq *cq, uint32_t qpn, struct mlx4_srq *srq);
 void mlx4_cq_resize_copy_cqes(struct mlx4_cq *cq, void *buf, int new_cqe);
 
 struct ibv_srq *mlx4_create_srq(struct ibv_pd *pd,
index 50e0947c4ee3935f8c6ea7de3196c7bb482bb153..26174f8648e984e7583aa153aa69ccb86850182f 100644 (file)
@@ -532,23 +532,20 @@ int mlx4_destroy_qp(struct ibv_qp *ibqp)
        struct mlx4_qp *qp = to_mqp(ibqp);
        int ret;
 
-       mlx4_cq_clean(to_mcq(ibqp->recv_cq), ibqp->qp_num,
-                      ibqp->srq ? to_msrq(ibqp->srq) : NULL);
-       if (ibqp->send_cq != ibqp->recv_cq)
-               mlx4_cq_clean(to_mcq(ibqp->send_cq), ibqp->qp_num, NULL);
+       ret = ibv_cmd_destroy_qp(ibqp);
+       if (ret)
+               return ret;
 
        mlx4_lock_cqs(ibqp);
-       mlx4_clear_qp(to_mctx(ibqp->context), ibqp->qp_num);
-       mlx4_unlock_cqs(ibqp);
 
-       ret = ibv_cmd_destroy_qp(ibqp);
-       if (ret) {
-               mlx4_lock_cqs(ibqp);
-               mlx4_store_qp(to_mctx(ibqp->context), ibqp->qp_num, qp);
-               mlx4_unlock_cqs(ibqp);
+       __mlx4_cq_clean(to_mcq(ibqp->recv_cq), ibqp->qp_num,
+                       ibqp->srq ? to_msrq(ibqp->srq) : NULL);
+       if (ibqp->send_cq != ibqp->recv_cq)
+               __mlx4_cq_clean(to_mcq(ibqp->send_cq), ibqp->qp_num, NULL);
 
-               return ret;
-       }
+       mlx4_clear_qp(to_mctx(ibqp->context), ibqp->qp_num);
+
+       mlx4_unlock_cqs(ibqp);
 
        if (!ibqp->srq)
                mlx4_free_db(to_mctx(ibqp->context), MLX4_DB_TYPE_RQ, qp->db);