From 2899b4e7e48358509021522622c50980befe90ef Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Tue, 25 Sep 2012 09:01:11 -0400 Subject: [PATCH] IB/qib: linux-3.6 patches backported 6a82649f and 354dff1b from 3.6. Signed-off-by: Mike Marciniszyn --- linux-next-pending/0006-IB-qib-mr-ebusy.patch | 52 + .../0007-IB-qib-uc-refcount-leak.patch | 1035 +++++++++++++++++ 2 files changed, 1087 insertions(+) create mode 100644 linux-next-pending/0006-IB-qib-mr-ebusy.patch create mode 100644 linux-next-pending/0007-IB-qib-uc-refcount-leak.patch diff --git a/linux-next-pending/0006-IB-qib-mr-ebusy.patch b/linux-next-pending/0006-IB-qib-mr-ebusy.patch new file mode 100644 index 0000000..8251564 --- /dev/null +++ b/linux-next-pending/0006-IB-qib-mr-ebusy.patch @@ -0,0 +1,52 @@ +IB/qib: Fix UC MR refs for immediate operations + +From: Mike Marciniszyn + +An MR reference leak exists when handling UC RDMA writes with +immediate data because we manipulate the reference counts as if the +operation had been a send. + +This patch moves the last_imm label so that the RDMA write operations +with immediate data converge at the cq building code. The copy/mr +deref code is now done correctly prior to the branch to last_imm. + +Reviewed-by: Edward Mascarenhas +Signed-off-by: Mike Marciniszyn +Signed-off-by: Roland Dreier +--- + drivers/infiniband/hw/qib/qib_uc.c | 8 +++++++- + 1 files changed, 7 insertions(+), 1 deletions(-) + +diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c +index ce7387f..70b4cb7 100644 +--- a/drivers/infiniband/hw/qib/qib_uc.c ++++ b/drivers/infiniband/hw/qib/qib_uc.c +@@ -403,7 +403,6 @@ send_last: + if (unlikely(wc.byte_len > qp->r_len)) + goto rewind; + wc.opcode = IB_WC_RECV; +-last_imm: + qib_copy_sge(&qp->r_sge, data, tlen, 0); + while (qp->s_rdma_read_sge.num_sge) { + atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount); +@@ -411,6 +410,7 @@ last_imm: + qp->s_rdma_read_sge.sge = + *qp->s_rdma_read_sge.sg_list++; + } ++last_imm: + wc.wr_id = qp->r_wr_id; + wc.status = IB_WC_SUCCESS; + wc.qp = &qp->ibqp; +@@ -509,6 +509,12 @@ rdma_last_imm: + } + wc.byte_len = qp->r_len; + wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; ++ qib_copy_sge(&qp->r_sge, data, tlen, 1); ++ while (qp->r_sge.num_sge) { ++ atomic_dec(&qp->r_sge.sge.mr->refcount); ++ if (--qp->r_sge.num_sge) ++ qp->r_sge.sge = *qp->r_sge.sg_list++; ++ } + goto last_imm; + + case OP(RDMA_WRITE_LAST): diff --git a/linux-next-pending/0007-IB-qib-uc-refcount-leak.patch b/linux-next-pending/0007-IB-qib-uc-refcount-leak.patch new file mode 100644 index 0000000..a9248e5 --- /dev/null +++ b/linux-next-pending/0007-IB-qib-uc-refcount-leak.patch @@ -0,0 +1,1035 @@ +IB/qib: Avoid returning EBUSY from MR deregister + +From: Mike Marciniszyn + +A timing issue can occur where qib_mr_dereg can return -EBUSY if the +MR use count is not zero. + +This can occur if the MR is de-registered while RDMA read response +packets are being progressed from the SDMA ring. The suspicion is +that the peer sent an RDMA read request, which has already been copied +across to the peer. The peer sees the completion of his request and +then communicates to the responder that the MR is not needed any +longer. The responder tries to de-register the MR, catching some +responses remaining in the SDMA ring holding the MR use count. + +The code now uses a get/put paradigm to track MR use counts and +coordinates with the MR de-registration process using a completion +when the count has reached zero. A timeout on the delay is in place +to catch other EBUSY issues. + +The reference count protocol is as follows: +- The return to the user counts as 1 +- A reference from the lk_table or the qib_ibdev counts as 1. +- Transient I/O operations increase/decrease as necessary + +A lot of code duplication has been folded into the new routines +init_qib_mregion() and deinit_qib_mregion(). Additionally, explicit +initialization of fields to zero is now handled by kzalloc(). + +Also, duplicated code 'while.*num_sge' that decrements reference +counts have been consolidated in qib_put_ss(). + +Reviewed-by: Ramkrishna Vepa +Signed-off-by: Mike Marciniszyn +Signed-off-by: Roland Dreier +--- + drivers/infiniband/hw/qib/qib_keys.c | 84 +++++++---- + drivers/infiniband/hw/qib/qib_mr.c | 242 ++++++++++++++++++--------------- + drivers/infiniband/hw/qib/qib_qp.c | 21 +-- + drivers/infiniband/hw/qib/qib_rc.c | 24 +-- + drivers/infiniband/hw/qib/qib_ruc.c | 14 +- + drivers/infiniband/hw/qib/qib_uc.c | 33 +---- + drivers/infiniband/hw/qib/qib_ud.c | 12 -- + drivers/infiniband/hw/qib/qib_verbs.c | 10 + + drivers/infiniband/hw/qib/qib_verbs.h | 28 ++++ + 9 files changed, 244 insertions(+), 224 deletions(-) + +diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c +index 8fd19a4..8b5ee3a 100644 +--- a/drivers/infiniband/hw/qib/qib_keys.c ++++ b/drivers/infiniband/hw/qib/qib_keys.c +@@ -35,21 +35,40 @@ + + /** + * qib_alloc_lkey - allocate an lkey +- * @rkt: lkey table in which to allocate the lkey + * @mr: memory region that this lkey protects ++ * @dma_region: 0->normal key, 1->restricted DMA key ++ * ++ * Returns 0 if successful, otherwise returns -errno. ++ * ++ * Increments mr reference count and sets published ++ * as required. ++ * ++ * Sets the lkey field mr for non-dma regions. + * +- * Returns 1 if successful, otherwise returns 0. + */ + +-int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr) ++int qib_alloc_lkey(struct qib_mregion *mr, int dma_region) + { + unsigned long flags; + u32 r; + u32 n; +- int ret; ++ int ret = 0; ++ struct qib_ibdev *dev = to_idev(mr->pd->device); ++ struct qib_lkey_table *rkt = &dev->lk_table; + + spin_lock_irqsave(&rkt->lock, flags); + ++ /* special case for dma_mr lkey == 0 */ ++ if (dma_region) { ++ /* should the dma_mr be relative to the pd? */ ++ if (!dev->dma_mr) { ++ qib_get_mr(mr); ++ dev->dma_mr = mr; ++ mr->lkey_published = 1; ++ } ++ goto success; ++ } ++ + /* Find the next available LKEY */ + r = rkt->next; + n = r; +@@ -57,11 +76,8 @@ int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr) + if (rkt->table[r] == NULL) + break; + r = (r + 1) & (rkt->max - 1); +- if (r == n) { +- spin_unlock_irqrestore(&rkt->lock, flags); +- ret = 0; ++ if (r == n) + goto bail; +- } + } + rkt->next = (r + 1) & (rkt->max - 1); + /* +@@ -76,46 +92,50 @@ int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr) + mr->lkey |= 1 << 8; + rkt->gen++; + } ++ qib_get_mr(mr); + rkt->table[r] = mr; ++ mr->lkey_published = 1; ++success: + spin_unlock_irqrestore(&rkt->lock, flags); +- +- ret = 1; +- +-bail: ++out: + return ret; ++bail: ++ spin_unlock_irqrestore(&rkt->lock, flags); ++ ret = -ENOMEM; ++ goto out; + } + + /** + * qib_free_lkey - free an lkey +- * @rkt: table from which to free the lkey +- * @lkey: lkey id to free ++ * @mr: mr to free from tables + */ +-int qib_free_lkey(struct qib_ibdev *dev, struct qib_mregion *mr) ++void qib_free_lkey(struct qib_mregion *mr) + { + unsigned long flags; + u32 lkey = mr->lkey; + u32 r; +- int ret; ++ struct qib_ibdev *dev = to_idev(mr->pd->device); ++ struct qib_lkey_table *rkt = &dev->lk_table; ++ ++ spin_lock_irqsave(&rkt->lock, flags); ++ if (!mr->lkey_published) ++ goto out; ++ mr->lkey_published = 0; ++ + + spin_lock_irqsave(&dev->lk_table.lock, flags); + if (lkey == 0) { + if (dev->dma_mr && dev->dma_mr == mr) { +- ret = atomic_read(&dev->dma_mr->refcount); +- if (!ret) +- dev->dma_mr = NULL; +- } else +- ret = 0; ++ qib_put_mr(dev->dma_mr); ++ dev->dma_mr = NULL; ++ } + } else { + r = lkey >> (32 - ib_qib_lkey_table_size); +- ret = atomic_read(&dev->lk_table.table[r]->refcount); +- if (!ret) +- dev->lk_table.table[r] = NULL; ++ qib_put_mr(dev->dma_mr); ++ rkt->table[r] = NULL; + } ++out: + spin_unlock_irqrestore(&dev->lk_table.lock, flags); +- +- if (ret) +- ret = -EBUSY; +- return ret; + } + + /** +@@ -150,7 +170,7 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, + goto bail; + if (!dev->dma_mr) + goto bail; +- atomic_inc(&dev->dma_mr->refcount); ++ qib_get_mr(dev->dma_mr); + spin_unlock_irqrestore(&rkt->lock, flags); + + isge->mr = dev->dma_mr; +@@ -171,7 +191,7 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, + off + sge->length > mr->length || + (mr->access_flags & acc) != acc)) + goto bail; +- atomic_inc(&mr->refcount); ++ qib_get_mr(mr); + spin_unlock_irqrestore(&rkt->lock, flags); + + off += mr->offset; +@@ -245,7 +265,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, + goto bail; + if (!dev->dma_mr) + goto bail; +- atomic_inc(&dev->dma_mr->refcount); ++ qib_get_mr(dev->dma_mr); + spin_unlock_irqrestore(&rkt->lock, flags); + + sge->mr = dev->dma_mr; +@@ -265,7 +285,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, + if (unlikely(vaddr < mr->iova || off + len > mr->length || + (mr->access_flags & acc) == 0)) + goto bail; +- atomic_inc(&mr->refcount); ++ qib_get_mr(mr); + spin_unlock_irqrestore(&rkt->lock, flags); + + off += mr->offset; +diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c +index 08944e2..6a2028a 100644 +--- a/drivers/infiniband/hw/qib/qib_mr.c ++++ b/drivers/infiniband/hw/qib/qib_mr.c +@@ -47,6 +47,43 @@ static inline struct qib_fmr *to_ifmr(struct ib_fmr *ibfmr) + return container_of(ibfmr, struct qib_fmr, ibfmr); + } + ++static int init_qib_mregion(struct qib_mregion *mr, struct ib_pd *pd, ++ int count) ++{ ++ int m, i = 0; ++ int rval = 0; ++ ++ m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ; ++ for (; i < m; i++) { ++ mr->map[i] = kzalloc(sizeof *mr->map[0], GFP_KERNEL); ++ if (!mr->map[i]) ++ goto bail; ++ } ++ mr->mapsz = m; ++ init_completion(&mr->comp); ++ /* count returning the ptr to user */ ++ atomic_set(&mr->refcount, 1); ++ mr->pd = pd; ++ mr->max_segs = count; ++out: ++ return rval; ++bail: ++ while (i) ++ kfree(mr->map[--i]); ++ rval = -ENOMEM; ++ goto out; ++} ++ ++static void deinit_qib_mregion(struct qib_mregion *mr) ++{ ++ int i = mr->mapsz; ++ ++ mr->mapsz = 0; ++ while (i) ++ kfree(mr->map[--i]); ++} ++ ++ + /** + * qib_get_dma_mr - get a DMA memory region + * @pd: protection domain for this memory region +@@ -58,10 +95,9 @@ static inline struct qib_fmr *to_ifmr(struct ib_fmr *ibfmr) + */ + struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc) + { +- struct qib_ibdev *dev = to_idev(pd->device); +- struct qib_mr *mr; ++ struct qib_mr *mr = NULL; + struct ib_mr *ret; +- unsigned long flags; ++ int rval; + + if (to_ipd(pd)->user) { + ret = ERR_PTR(-EPERM); +@@ -74,61 +110,64 @@ struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc) + goto bail; + } + +- mr->mr.access_flags = acc; +- atomic_set(&mr->mr.refcount, 0); ++ rval = init_qib_mregion(&mr->mr, pd, 0); ++ if (rval) { ++ ret = ERR_PTR(rval); ++ goto bail; ++ } + +- spin_lock_irqsave(&dev->lk_table.lock, flags); +- if (!dev->dma_mr) +- dev->dma_mr = &mr->mr; +- spin_unlock_irqrestore(&dev->lk_table.lock, flags); + ++ rval = qib_alloc_lkey(&mr->mr, 1); ++ if (rval) { ++ ret = ERR_PTR(rval); ++ goto bail_mregion; ++ } ++ ++ mr->mr.access_flags = acc; + ret = &mr->ibmr; ++done: ++ return ret; + ++bail_mregion: ++ deinit_qib_mregion(&mr->mr); + bail: +- return ret; ++ kfree(mr); ++ goto done; + } + +-static struct qib_mr *alloc_mr(int count, struct qib_lkey_table *lk_table) ++static struct qib_mr *alloc_mr(int count, struct ib_pd *pd) + { + struct qib_mr *mr; +- int m, i = 0; ++ int rval = -ENOMEM; ++ int m; + + /* Allocate struct plus pointers to first level page tables. */ + m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ; +- mr = kmalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL); ++ mr = kzalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL); + if (!mr) +- goto done; +- +- /* Allocate first level page tables. */ +- for (; i < m; i++) { +- mr->mr.map[i] = kmalloc(sizeof *mr->mr.map[0], GFP_KERNEL); +- if (!mr->mr.map[i]) +- goto bail; +- } +- mr->mr.mapsz = m; +- mr->mr.page_shift = 0; +- mr->mr.max_segs = count; ++ goto bail; + ++ rval = init_qib_mregion(&mr->mr, pd, count); ++ if (rval) ++ goto bail; + /* + * ib_reg_phys_mr() will initialize mr->ibmr except for + * lkey and rkey. + */ +- if (!qib_alloc_lkey(lk_table, &mr->mr)) +- goto bail; ++ rval = qib_alloc_lkey(&mr->mr, 0); ++ if (rval) ++ goto bail_mregion; + mr->ibmr.lkey = mr->mr.lkey; + mr->ibmr.rkey = mr->mr.lkey; ++done: ++ return mr; + +- atomic_set(&mr->mr.refcount, 0); +- goto done; +- ++bail_mregion: ++ deinit_qib_mregion(&mr->mr); + bail: +- while (i) +- kfree(mr->mr.map[--i]); + kfree(mr); +- mr = NULL; +- +-done: +- return mr; ++ mr = ERR_PTR(rval); ++ goto done; + } + + /** +@@ -148,19 +187,15 @@ struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd, + int n, m, i; + struct ib_mr *ret; + +- mr = alloc_mr(num_phys_buf, &to_idev(pd->device)->lk_table); +- if (mr == NULL) { +- ret = ERR_PTR(-ENOMEM); ++ mr = alloc_mr(num_phys_buf, pd); ++ if (IS_ERR(mr)) { ++ ret = (struct ib_mr *)mr; + goto bail; + } + +- mr->mr.pd = pd; + mr->mr.user_base = *iova_start; + mr->mr.iova = *iova_start; +- mr->mr.length = 0; +- mr->mr.offset = 0; + mr->mr.access_flags = acc; +- mr->umem = NULL; + + m = 0; + n = 0; +@@ -186,7 +221,6 @@ bail: + * @pd: protection domain for this memory region + * @start: starting userspace address + * @length: length of region to register +- * @virt_addr: virtual address to use (from HCA's point of view) + * @mr_access_flags: access flags for this memory region + * @udata: unused by the QLogic_IB driver + * +@@ -216,14 +250,13 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + list_for_each_entry(chunk, &umem->chunk_list, list) + n += chunk->nents; + +- mr = alloc_mr(n, &to_idev(pd->device)->lk_table); +- if (!mr) { +- ret = ERR_PTR(-ENOMEM); ++ mr = alloc_mr(n, pd); ++ if (IS_ERR(mr)) { ++ ret = (struct ib_mr *)mr; + ib_umem_release(umem); + goto bail; + } + +- mr->mr.pd = pd; + mr->mr.user_base = start; + mr->mr.iova = virt_addr; + mr->mr.length = length; +@@ -271,21 +304,25 @@ bail: + int qib_dereg_mr(struct ib_mr *ibmr) + { + struct qib_mr *mr = to_imr(ibmr); +- struct qib_ibdev *dev = to_idev(ibmr->device); +- int ret; +- int i; +- +- ret = qib_free_lkey(dev, &mr->mr); +- if (ret) +- return ret; +- +- i = mr->mr.mapsz; +- while (i) +- kfree(mr->mr.map[--i]); ++ int ret = 0; ++ unsigned long timeout; ++ ++ qib_free_lkey(&mr->mr); ++ ++ qib_put_mr(&mr->mr); /* will set completion if last */ ++ timeout = wait_for_completion_timeout(&mr->mr.comp, ++ 5 * HZ); ++ if (!timeout) { ++ qib_get_mr(&mr->mr); ++ ret = -EBUSY; ++ goto out; ++ } ++ deinit_qib_mregion(&mr->mr); + if (mr->umem) + ib_umem_release(mr->umem); + kfree(mr); +- return 0; ++out: ++ return ret; + } + + /* +@@ -298,17 +335,9 @@ struct ib_mr *qib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) + { + struct qib_mr *mr; + +- mr = alloc_mr(max_page_list_len, &to_idev(pd->device)->lk_table); +- if (mr == NULL) +- return ERR_PTR(-ENOMEM); +- +- mr->mr.pd = pd; +- mr->mr.user_base = 0; +- mr->mr.iova = 0; +- mr->mr.length = 0; +- mr->mr.offset = 0; +- mr->mr.access_flags = 0; +- mr->umem = NULL; ++ mr = alloc_mr(max_page_list_len, pd); ++ if (IS_ERR(mr)) ++ return (struct ib_mr *)mr; + + return &mr->ibmr; + } +@@ -322,11 +351,11 @@ qib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len) + if (size > PAGE_SIZE) + return ERR_PTR(-EINVAL); + +- pl = kmalloc(sizeof *pl, GFP_KERNEL); ++ pl = kzalloc(sizeof *pl, GFP_KERNEL); + if (!pl) + return ERR_PTR(-ENOMEM); + +- pl->page_list = kmalloc(size, GFP_KERNEL); ++ pl->page_list = kzalloc(size, GFP_KERNEL); + if (!pl->page_list) + goto err_free; + +@@ -355,57 +384,47 @@ struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags, + struct ib_fmr_attr *fmr_attr) + { + struct qib_fmr *fmr; +- int m, i = 0; ++ int m; + struct ib_fmr *ret; ++ int rval = -ENOMEM; + + /* Allocate struct plus pointers to first level page tables. */ + m = (fmr_attr->max_pages + QIB_SEGSZ - 1) / QIB_SEGSZ; +- fmr = kmalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL); ++ fmr = kzalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL); + if (!fmr) + goto bail; + +- /* Allocate first level page tables. */ +- for (; i < m; i++) { +- fmr->mr.map[i] = kmalloc(sizeof *fmr->mr.map[0], +- GFP_KERNEL); +- if (!fmr->mr.map[i]) +- goto bail; +- } +- fmr->mr.mapsz = m; ++ rval = init_qib_mregion(&fmr->mr, pd, fmr_attr->max_pages); ++ if (rval) ++ goto bail; + + /* + * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey & + * rkey. + */ +- if (!qib_alloc_lkey(&to_idev(pd->device)->lk_table, &fmr->mr)) +- goto bail; ++ rval = qib_alloc_lkey(&fmr->mr, 0); ++ if (rval) ++ goto bail_mregion; + fmr->ibfmr.rkey = fmr->mr.lkey; + fmr->ibfmr.lkey = fmr->mr.lkey; + /* + * Resources are allocated but no valid mapping (RKEY can't be + * used). + */ +- fmr->mr.pd = pd; +- fmr->mr.user_base = 0; +- fmr->mr.iova = 0; +- fmr->mr.length = 0; +- fmr->mr.offset = 0; + fmr->mr.access_flags = mr_access_flags; + fmr->mr.max_segs = fmr_attr->max_pages; + fmr->mr.page_shift = fmr_attr->page_shift; + +- atomic_set(&fmr->mr.refcount, 0); + ret = &fmr->ibfmr; +- goto done; ++done: ++ return ret; + ++bail_mregion: ++ deinit_qib_mregion(&fmr->mr); + bail: +- while (i) +- kfree(fmr->mr.map[--i]); + kfree(fmr); +- ret = ERR_PTR(-ENOMEM); +- +-done: +- return ret; ++ ret = ERR_PTR(rval); ++ goto done; + } + + /** +@@ -428,7 +447,8 @@ int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, + u32 ps; + int ret; + +- if (atomic_read(&fmr->mr.refcount)) ++ i = atomic_read(&fmr->mr.refcount); ++ if (i > 2) + return -EBUSY; + + if (list_len > fmr->mr.max_segs) { +@@ -490,16 +510,20 @@ int qib_unmap_fmr(struct list_head *fmr_list) + int qib_dealloc_fmr(struct ib_fmr *ibfmr) + { + struct qib_fmr *fmr = to_ifmr(ibfmr); +- int ret; +- int i; +- +- ret = qib_free_lkey(to_idev(ibfmr->device), &fmr->mr); +- if (ret) +- return ret; +- +- i = fmr->mr.mapsz; +- while (i) +- kfree(fmr->mr.map[--i]); ++ int ret = 0; ++ unsigned long timeout; ++ ++ qib_free_lkey(&fmr->mr); ++ qib_put_mr(&fmr->mr); /* will set completion if last */ ++ timeout = wait_for_completion_timeout(&fmr->mr.comp, ++ 5 * HZ); ++ if (!timeout) { ++ qib_get_mr(&fmr->mr); ++ ret = -EBUSY; ++ goto out; ++ } ++ deinit_qib_mregion(&fmr->mr); + kfree(fmr); +- return 0; ++out: ++ return ret; + } +diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c +index 1ce56b5..693041b 100644 +--- a/drivers/infiniband/hw/qib/qib_qp.c ++++ b/drivers/infiniband/hw/qib/qib_qp.c +@@ -406,18 +406,9 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) + unsigned n; + + if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags)) +- while (qp->s_rdma_read_sge.num_sge) { +- atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount); +- if (--qp->s_rdma_read_sge.num_sge) +- qp->s_rdma_read_sge.sge = +- *qp->s_rdma_read_sge.sg_list++; +- } ++ qib_put_ss(&qp->s_rdma_read_sge); + +- while (qp->r_sge.num_sge) { +- atomic_dec(&qp->r_sge.sge.mr->refcount); +- if (--qp->r_sge.num_sge) +- qp->r_sge.sge = *qp->r_sge.sg_list++; +- } ++ qib_put_ss(&qp->r_sge); + + if (clr_sends) { + while (qp->s_last != qp->s_head) { +@@ -427,7 +418,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) + for (i = 0; i < wqe->wr.num_sge; i++) { + struct qib_sge *sge = &wqe->sg_list[i]; + +- atomic_dec(&sge->mr->refcount); ++ qib_put_mr(sge->mr); + } + if (qp->ibqp.qp_type == IB_QPT_UD || + qp->ibqp.qp_type == IB_QPT_SMI || +@@ -437,7 +428,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) + qp->s_last = 0; + } + if (qp->s_rdma_mr) { +- atomic_dec(&qp->s_rdma_mr->refcount); ++ qib_put_mr(qp->s_rdma_mr); + qp->s_rdma_mr = NULL; + } + } +@@ -450,7 +441,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) + + if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && + e->rdma_sge.mr) { +- atomic_dec(&e->rdma_sge.mr->refcount); ++ qib_put_mr(e->rdma_sge.mr); + e->rdma_sge.mr = NULL; + } + } +@@ -495,7 +486,7 @@ int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err) + if (!(qp->s_flags & QIB_S_BUSY)) { + qp->s_hdrwords = 0; + if (qp->s_rdma_mr) { +- atomic_dec(&qp->s_rdma_mr->refcount); ++ qib_put_mr(qp->s_rdma_mr); + qp->s_rdma_mr = NULL; + } + if (qp->s_tx) { +diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c +index b641416..3ab3413 100644 +--- a/drivers/infiniband/hw/qib/qib_rc.c ++++ b/drivers/infiniband/hw/qib/qib_rc.c +@@ -95,7 +95,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp, + case OP(RDMA_READ_RESPONSE_ONLY): + e = &qp->s_ack_queue[qp->s_tail_ack_queue]; + if (e->rdma_sge.mr) { +- atomic_dec(&e->rdma_sge.mr->refcount); ++ qib_put_mr(e->rdma_sge.mr); + e->rdma_sge.mr = NULL; + } + /* FALLTHROUGH */ +@@ -133,7 +133,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp, + /* Copy SGE state in case we need to resend */ + qp->s_rdma_mr = e->rdma_sge.mr; + if (qp->s_rdma_mr) +- atomic_inc(&qp->s_rdma_mr->refcount); ++ qib_get_mr(qp->s_rdma_mr); + qp->s_ack_rdma_sge.sge = e->rdma_sge; + qp->s_ack_rdma_sge.num_sge = 1; + qp->s_cur_sge = &qp->s_ack_rdma_sge; +@@ -172,7 +172,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp, + qp->s_cur_sge = &qp->s_ack_rdma_sge; + qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr; + if (qp->s_rdma_mr) +- atomic_inc(&qp->s_rdma_mr->refcount); ++ qib_get_mr(qp->s_rdma_mr); + len = qp->s_ack_rdma_sge.sge.sge_length; + if (len > pmtu) + len = pmtu; +@@ -1012,7 +1012,7 @@ void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr) + for (i = 0; i < wqe->wr.num_sge; i++) { + struct qib_sge *sge = &wqe->sg_list[i]; + +- atomic_dec(&sge->mr->refcount); ++ qib_put_mr(sge->mr); + } + /* Post a send completion queue entry if requested. */ + if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || +@@ -1068,7 +1068,7 @@ static struct qib_swqe *do_rc_completion(struct qib_qp *qp, + for (i = 0; i < wqe->wr.num_sge; i++) { + struct qib_sge *sge = &wqe->sg_list[i]; + +- atomic_dec(&sge->mr->refcount); ++ qib_put_mr(sge->mr); + } + /* Post a send completion queue entry if requested. */ + if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || +@@ -1730,7 +1730,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr, + if (unlikely(offset + len != e->rdma_sge.sge_length)) + goto unlock_done; + if (e->rdma_sge.mr) { +- atomic_dec(&e->rdma_sge.mr->refcount); ++ qib_put_mr(e->rdma_sge.mr); + e->rdma_sge.mr = NULL; + } + if (len != 0) { +@@ -2024,11 +2024,7 @@ send_last: + if (unlikely(wc.byte_len > qp->r_len)) + goto nack_inv; + qib_copy_sge(&qp->r_sge, data, tlen, 1); +- while (qp->r_sge.num_sge) { +- atomic_dec(&qp->r_sge.sge.mr->refcount); +- if (--qp->r_sge.num_sge) +- qp->r_sge.sge = *qp->r_sge.sg_list++; +- } ++ qib_put_ss(&qp->r_sge); + qp->r_msn++; + if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) + break; +@@ -2116,7 +2112,7 @@ send_last: + } + e = &qp->s_ack_queue[qp->r_head_ack_queue]; + if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { +- atomic_dec(&e->rdma_sge.mr->refcount); ++ qib_put_mr(e->rdma_sge.mr); + e->rdma_sge.mr = NULL; + } + reth = &ohdr->u.rc.reth; +@@ -2188,7 +2184,7 @@ send_last: + } + e = &qp->s_ack_queue[qp->r_head_ack_queue]; + if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { +- atomic_dec(&e->rdma_sge.mr->refcount); ++ qib_put_mr(e->rdma_sge.mr); + e->rdma_sge.mr = NULL; + } + ateth = &ohdr->u.atomic_eth; +@@ -2210,7 +2206,7 @@ send_last: + (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, + be64_to_cpu(ateth->compare_data), + sdata); +- atomic_dec(&qp->r_sge.sge.mr->refcount); ++ qib_put_mr(qp->r_sge.sge.mr); + qp->r_sge.num_sge = 0; + e->opcode = opcode; + e->sent = 0; +diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c +index c0ee7e0..357b6cf 100644 +--- a/drivers/infiniband/hw/qib/qib_ruc.c ++++ b/drivers/infiniband/hw/qib/qib_ruc.c +@@ -110,7 +110,7 @@ bad_lkey: + while (j) { + struct qib_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge; + +- atomic_dec(&sge->mr->refcount); ++ qib_put_mr(sge->mr); + } + ss->num_sge = 0; + memset(&wc, 0, sizeof(wc)); +@@ -501,7 +501,7 @@ again: + (u64) atomic64_add_return(sdata, maddr) - sdata : + (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, + sdata, wqe->wr.wr.atomic.swap); +- atomic_dec(&qp->r_sge.sge.mr->refcount); ++ qib_put_mr(qp->r_sge.sge.mr); + qp->r_sge.num_sge = 0; + goto send_comp; + +@@ -525,7 +525,7 @@ again: + sge->sge_length -= len; + if (sge->sge_length == 0) { + if (!release) +- atomic_dec(&sge->mr->refcount); ++ qib_put_mr(sge->mr); + if (--sqp->s_sge.num_sge) + *sge = *sqp->s_sge.sg_list++; + } else if (sge->length == 0 && sge->mr->lkey) { +@@ -542,11 +542,7 @@ again: + sqp->s_len -= len; + } + if (release) +- while (qp->r_sge.num_sge) { +- atomic_dec(&qp->r_sge.sge.mr->refcount); +- if (--qp->r_sge.num_sge) +- qp->r_sge.sge = *qp->r_sge.sg_list++; +- } ++ qib_put_ss(&qp->r_sge); + + if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) + goto send_comp; +@@ -782,7 +778,7 @@ void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe, + for (i = 0; i < wqe->wr.num_sge; i++) { + struct qib_sge *sge = &wqe->sg_list[i]; + +- atomic_dec(&sge->mr->refcount); ++ qib_put_mr(sge->mr); + } + if (qp->ibqp.qp_type == IB_QPT_UD || + qp->ibqp.qp_type == IB_QPT_SMI || +diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c +index 70b4cb7..aa3a803 100644 +--- a/drivers/infiniband/hw/qib/qib_uc.c ++++ b/drivers/infiniband/hw/qib/qib_uc.c +@@ -281,11 +281,7 @@ inv: + set_bit(QIB_R_REWIND_SGE, &qp->r_aflags); + qp->r_sge.num_sge = 0; + } else +- while (qp->r_sge.num_sge) { +- atomic_dec(&qp->r_sge.sge.mr->refcount); +- if (--qp->r_sge.num_sge) +- qp->r_sge.sge = *qp->r_sge.sg_list++; +- } ++ qib_put_ss(&qp->r_sge); + qp->r_state = OP(SEND_LAST); + switch (opcode) { + case OP(SEND_FIRST): +@@ -404,12 +400,7 @@ send_last: + goto rewind; + wc.opcode = IB_WC_RECV; + qib_copy_sge(&qp->r_sge, data, tlen, 0); +- while (qp->s_rdma_read_sge.num_sge) { +- atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount); +- if (--qp->s_rdma_read_sge.num_sge) +- qp->s_rdma_read_sge.sge = +- *qp->s_rdma_read_sge.sg_list++; +- } ++ qib_put_ss(&qp->s_rdma_read_sge); + last_imm: + wc.wr_id = qp->r_wr_id; + wc.status = IB_WC_SUCCESS; +@@ -493,13 +484,7 @@ rdma_last_imm: + if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) + goto drop; + if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags)) +- while (qp->s_rdma_read_sge.num_sge) { +- atomic_dec(&qp->s_rdma_read_sge.sge.mr-> +- refcount); +- if (--qp->s_rdma_read_sge.num_sge) +- qp->s_rdma_read_sge.sge = +- *qp->s_rdma_read_sge.sg_list++; +- } ++ qib_put_ss(&qp->s_rdma_read_sge); + else { + ret = qib_get_rwqe(qp, 1); + if (ret < 0) +@@ -510,11 +495,7 @@ rdma_last_imm: + wc.byte_len = qp->r_len; + wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; + qib_copy_sge(&qp->r_sge, data, tlen, 1); +- while (qp->r_sge.num_sge) { +- atomic_dec(&qp->r_sge.sge.mr->refcount); +- if (--qp->r_sge.num_sge) +- qp->r_sge.sge = *qp->r_sge.sg_list++; +- } ++ qib_put_ss(&qp->r_sge); + goto last_imm; + + case OP(RDMA_WRITE_LAST): +@@ -530,11 +511,7 @@ rdma_last: + if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) + goto drop; + qib_copy_sge(&qp->r_sge, data, tlen, 1); +- while (qp->r_sge.num_sge) { +- atomic_dec(&qp->r_sge.sge.mr->refcount); +- if (--qp->r_sge.num_sge) +- qp->r_sge.sge = *qp->r_sge.sg_list++; +- } ++ qib_put_ss(&qp->r_sge); + break; + + default: +diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c +index a468bf2..d6c7fe7 100644 +--- a/drivers/infiniband/hw/qib/qib_ud.c ++++ b/drivers/infiniband/hw/qib/qib_ud.c +@@ -194,11 +194,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) + } + length -= len; + } +- while (qp->r_sge.num_sge) { +- atomic_dec(&qp->r_sge.sge.mr->refcount); +- if (--qp->r_sge.num_sge) +- qp->r_sge.sge = *qp->r_sge.sg_list++; +- } ++ qib_put_ss(&qp->r_sge); + if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) + goto bail_unlock; + wc.wr_id = qp->r_wr_id; +@@ -556,11 +552,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, + } else + qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); + qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1); +- while (qp->r_sge.num_sge) { +- atomic_dec(&qp->r_sge.sge.mr->refcount); +- if (--qp->r_sge.num_sge) +- qp->r_sge.sge = *qp->r_sge.sg_list++; +- } ++ qib_put_ss(&qp->r_sge); + if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) + return; + wc.wr_id = qp->r_wr_id; +diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c +index 7b6c3bf..76d7ce8 100644 +--- a/drivers/infiniband/hw/qib/qib_verbs.c ++++ b/drivers/infiniband/hw/qib/qib_verbs.c +@@ -183,7 +183,7 @@ void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release) + sge->sge_length -= len; + if (sge->sge_length == 0) { + if (release) +- atomic_dec(&sge->mr->refcount); ++ qib_put_mr(sge->mr); + if (--ss->num_sge) + *sge = *ss->sg_list++; + } else if (sge->length == 0 && sge->mr->lkey) { +@@ -224,7 +224,7 @@ void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release) + sge->sge_length -= len; + if (sge->sge_length == 0) { + if (release) +- atomic_dec(&sge->mr->refcount); ++ qib_put_mr(sge->mr); + if (--ss->num_sge) + *sge = *ss->sg_list++; + } else if (sge->length == 0 && sge->mr->lkey) { +@@ -435,7 +435,7 @@ bail_inval_free: + while (j) { + struct qib_sge *sge = &wqe->sg_list[--j]; + +- atomic_dec(&sge->mr->refcount); ++ qib_put_mr(sge->mr); + } + bail_inval: + ret = -EINVAL; +@@ -978,7 +978,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx) + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + if (tx->mr) { +- atomic_dec(&tx->mr->refcount); ++ qib_put_mr(tx->mr); + tx->mr = NULL; + } + if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) { +@@ -1336,7 +1336,7 @@ done: + } + qib_sendbuf_done(dd, pbufn); + if (qp->s_rdma_mr) { +- atomic_dec(&qp->s_rdma_mr->refcount); ++ qib_put_mr(qp->s_rdma_mr); + qp->s_rdma_mr = NULL; + } + if (qp->s_wqe) { +diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h +index 4876060..4a2277b 100644 +--- a/drivers/infiniband/hw/qib/qib_verbs.h ++++ b/drivers/infiniband/hw/qib/qib_verbs.h +@@ -41,6 +41,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -302,6 +303,8 @@ struct qib_mregion { + u32 max_segs; /* number of qib_segs in all the arrays */ + u32 mapsz; /* size of the map array */ + u8 page_shift; /* 0 - non unform/non powerof2 sizes */ ++ u8 lkey_published; /* in global table */ ++ struct completion comp; /* complete when refcount goes to zero */ + atomic_t refcount; + struct qib_segarray *map[0]; /* the segments */ + }; +@@ -944,9 +947,9 @@ int qib_post_ud_send(struct qib_qp *qp, struct ib_send_wr *wr); + void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, + int has_grh, void *data, u32 tlen, struct qib_qp *qp); + +-int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr); ++int qib_alloc_lkey(struct qib_mregion *mr, int dma_region); + +-int qib_free_lkey(struct qib_ibdev *dev, struct qib_mregion *mr); ++void qib_free_lkey(struct qib_mregion *mr); + + int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, + struct qib_sge *isge, struct ib_sge *sge, int acc); +@@ -1014,6 +1017,27 @@ int qib_unmap_fmr(struct list_head *fmr_list); + + int qib_dealloc_fmr(struct ib_fmr *ibfmr); + ++static inline void qib_get_mr(struct qib_mregion *mr) ++{ ++ atomic_inc(&mr->refcount); ++} ++ ++static inline void qib_put_mr(struct qib_mregion *mr) ++{ ++ if (unlikely(atomic_dec_and_test(&mr->refcount))) ++ complete(&mr->comp); ++} ++ ++static inline void qib_put_ss(struct qib_sge_state *ss) ++{ ++ while (ss->num_sge) { ++ qib_put_mr(ss->sge.mr); ++ if (--ss->num_sge) ++ ss->sge = *ss->sg_list++; ++ } ++} ++ ++ + void qib_release_mmap_info(struct kref *ref); + + struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, u32 size, -- 2.41.0