From: Mike Marciniszyn Date: Mon, 15 Oct 2012 15:38:15 +0000 (-0400) Subject: IB/qib: remove EBUSY patch X-Git-Tag: vofed-3.5-x~32 X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=52055559d50b1c03fac0313b8bf0d99e0398a478;p=~emulex%2Ffor-vlad%2Fold%2Fcompat-rdma.git IB/qib: remove EBUSY patch 0005-A-timing-issue-can-occur-where-qib_mr_dereg-can-retu.patch upstream patch causes and issue with MR deregistration. Signed-off-by: Mike Marciniszyn --- diff --git a/linux-next-pending/0005-A-timing-issue-can-occur-where-qib_mr_dereg-can-retu.patch b/linux-next-pending/0005-A-timing-issue-can-occur-where-qib_mr_dereg-can-retu.patch deleted file mode 100644 index 3576c79..0000000 --- a/linux-next-pending/0005-A-timing-issue-can-occur-where-qib_mr_dereg-can-retu.patch +++ /dev/null @@ -1,1037 +0,0 @@ -From f96007a6bc75a3644489b159050fce06d1f19655 Mon Sep 17 00:00:00 2001 -From: Mike Marciniszyn -Date: Tue, 2 Oct 2012 15:53:02 +0200 -Subject: [PATCH 5/5] A timing issue can occur where qib_mr_dereg can return - -EBUSY if the MR use count is not zero. - -This can occur if the MR is de-registered while RDMA read response -packets are being progressed from the SDMA ring. The suspicion is -that the peer sent an RDMA read request, which has already been copied -across to the peer. The peer sees the completion of his request and -then communicates to the responder that the MR is not needed any -longer. The responder tries to de-register the MR, catching some -responses remaining in the SDMA ring holding the MR use count. - -The code now uses a get/put paradigm to track MR use counts and -coordinates with the MR de-registration process using a completion -when the count has reached zero. A timeout on the delay is in place -to catch other EBUSY issues. - -The reference count protocol is as follows: -- The return to the user counts as 1 -- A reference from the lk_table or the qib_ibdev counts as 1. -- Transient I/O operations increase/decrease as necessary - -A lot of code duplication has been folded into the new routines -init_qib_mregion() and deinit_qib_mregion(). Additionally, explicit -initialization of fields to zero is now handled by kzalloc(). - -Also, duplicated code 'while.*num_sge' that decrements reference -counts have been consolidated in qib_put_ss(). - -Reviewed-by: Ramkrishna Vepa -Signed-off-by: Mike Marciniszyn -Signed-off-by: Roland Dreier ---- - drivers/infiniband/hw/qib/qib_keys.c | 84 +++++++----- - drivers/infiniband/hw/qib/qib_mr.c | 242 ++++++++++++++++++--------------- - drivers/infiniband/hw/qib/qib_qp.c | 21 +-- - drivers/infiniband/hw/qib/qib_rc.c | 24 ++-- - drivers/infiniband/hw/qib/qib_ruc.c | 14 +- - drivers/infiniband/hw/qib/qib_uc.c | 33 +---- - drivers/infiniband/hw/qib/qib_ud.c | 12 +- - drivers/infiniband/hw/qib/qib_verbs.c | 10 +- - drivers/infiniband/hw/qib/qib_verbs.h | 28 +++- - 9 files changed, 244 insertions(+), 224 deletions(-) - -diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c -index 8fd19a4..8b5ee3a 100644 ---- a/drivers/infiniband/hw/qib/qib_keys.c -+++ b/drivers/infiniband/hw/qib/qib_keys.c -@@ -35,21 +35,40 @@ - - /** - * qib_alloc_lkey - allocate an lkey -- * @rkt: lkey table in which to allocate the lkey - * @mr: memory region that this lkey protects -+ * @dma_region: 0->normal key, 1->restricted DMA key -+ * -+ * Returns 0 if successful, otherwise returns -errno. -+ * -+ * Increments mr reference count and sets published -+ * as required. -+ * -+ * Sets the lkey field mr for non-dma regions. - * -- * Returns 1 if successful, otherwise returns 0. - */ - --int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr) -+int qib_alloc_lkey(struct qib_mregion *mr, int dma_region) - { - unsigned long flags; - u32 r; - u32 n; -- int ret; -+ int ret = 0; -+ struct qib_ibdev *dev = to_idev(mr->pd->device); -+ struct qib_lkey_table *rkt = &dev->lk_table; - - spin_lock_irqsave(&rkt->lock, flags); - -+ /* special case for dma_mr lkey == 0 */ -+ if (dma_region) { -+ /* should the dma_mr be relative to the pd? */ -+ if (!dev->dma_mr) { -+ qib_get_mr(mr); -+ dev->dma_mr = mr; -+ mr->lkey_published = 1; -+ } -+ goto success; -+ } -+ - /* Find the next available LKEY */ - r = rkt->next; - n = r; -@@ -57,11 +76,8 @@ int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr) - if (rkt->table[r] == NULL) - break; - r = (r + 1) & (rkt->max - 1); -- if (r == n) { -- spin_unlock_irqrestore(&rkt->lock, flags); -- ret = 0; -+ if (r == n) - goto bail; -- } - } - rkt->next = (r + 1) & (rkt->max - 1); - /* -@@ -76,46 +92,50 @@ int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr) - mr->lkey |= 1 << 8; - rkt->gen++; - } -+ qib_get_mr(mr); - rkt->table[r] = mr; -+ mr->lkey_published = 1; -+success: - spin_unlock_irqrestore(&rkt->lock, flags); -- -- ret = 1; -- --bail: -+out: - return ret; -+bail: -+ spin_unlock_irqrestore(&rkt->lock, flags); -+ ret = -ENOMEM; -+ goto out; - } - - /** - * qib_free_lkey - free an lkey -- * @rkt: table from which to free the lkey -- * @lkey: lkey id to free -+ * @mr: mr to free from tables - */ --int qib_free_lkey(struct qib_ibdev *dev, struct qib_mregion *mr) -+void qib_free_lkey(struct qib_mregion *mr) - { - unsigned long flags; - u32 lkey = mr->lkey; - u32 r; -- int ret; -+ struct qib_ibdev *dev = to_idev(mr->pd->device); -+ struct qib_lkey_table *rkt = &dev->lk_table; -+ -+ spin_lock_irqsave(&rkt->lock, flags); -+ if (!mr->lkey_published) -+ goto out; -+ mr->lkey_published = 0; -+ - - spin_lock_irqsave(&dev->lk_table.lock, flags); - if (lkey == 0) { - if (dev->dma_mr && dev->dma_mr == mr) { -- ret = atomic_read(&dev->dma_mr->refcount); -- if (!ret) -- dev->dma_mr = NULL; -- } else -- ret = 0; -+ qib_put_mr(dev->dma_mr); -+ dev->dma_mr = NULL; -+ } - } else { - r = lkey >> (32 - ib_qib_lkey_table_size); -- ret = atomic_read(&dev->lk_table.table[r]->refcount); -- if (!ret) -- dev->lk_table.table[r] = NULL; -+ qib_put_mr(dev->dma_mr); -+ rkt->table[r] = NULL; - } -+out: - spin_unlock_irqrestore(&dev->lk_table.lock, flags); -- -- if (ret) -- ret = -EBUSY; -- return ret; - } - - /** -@@ -150,7 +170,7 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, - goto bail; - if (!dev->dma_mr) - goto bail; -- atomic_inc(&dev->dma_mr->refcount); -+ qib_get_mr(dev->dma_mr); - spin_unlock_irqrestore(&rkt->lock, flags); - - isge->mr = dev->dma_mr; -@@ -171,7 +191,7 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, - off + sge->length > mr->length || - (mr->access_flags & acc) != acc)) - goto bail; -- atomic_inc(&mr->refcount); -+ qib_get_mr(mr); - spin_unlock_irqrestore(&rkt->lock, flags); - - off += mr->offset; -@@ -245,7 +265,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, - goto bail; - if (!dev->dma_mr) - goto bail; -- atomic_inc(&dev->dma_mr->refcount); -+ qib_get_mr(dev->dma_mr); - spin_unlock_irqrestore(&rkt->lock, flags); - - sge->mr = dev->dma_mr; -@@ -265,7 +285,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, - if (unlikely(vaddr < mr->iova || off + len > mr->length || - (mr->access_flags & acc) == 0)) - goto bail; -- atomic_inc(&mr->refcount); -+ qib_get_mr(mr); - spin_unlock_irqrestore(&rkt->lock, flags); - - off += mr->offset; -diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c -index 08944e2..6a2028a 100644 ---- a/drivers/infiniband/hw/qib/qib_mr.c -+++ b/drivers/infiniband/hw/qib/qib_mr.c -@@ -47,6 +47,43 @@ static inline struct qib_fmr *to_ifmr(struct ib_fmr *ibfmr) - return container_of(ibfmr, struct qib_fmr, ibfmr); - } - -+static int init_qib_mregion(struct qib_mregion *mr, struct ib_pd *pd, -+ int count) -+{ -+ int m, i = 0; -+ int rval = 0; -+ -+ m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ; -+ for (; i < m; i++) { -+ mr->map[i] = kzalloc(sizeof *mr->map[0], GFP_KERNEL); -+ if (!mr->map[i]) -+ goto bail; -+ } -+ mr->mapsz = m; -+ init_completion(&mr->comp); -+ /* count returning the ptr to user */ -+ atomic_set(&mr->refcount, 1); -+ mr->pd = pd; -+ mr->max_segs = count; -+out: -+ return rval; -+bail: -+ while (i) -+ kfree(mr->map[--i]); -+ rval = -ENOMEM; -+ goto out; -+} -+ -+static void deinit_qib_mregion(struct qib_mregion *mr) -+{ -+ int i = mr->mapsz; -+ -+ mr->mapsz = 0; -+ while (i) -+ kfree(mr->map[--i]); -+} -+ -+ - /** - * qib_get_dma_mr - get a DMA memory region - * @pd: protection domain for this memory region -@@ -58,10 +95,9 @@ static inline struct qib_fmr *to_ifmr(struct ib_fmr *ibfmr) - */ - struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc) - { -- struct qib_ibdev *dev = to_idev(pd->device); -- struct qib_mr *mr; -+ struct qib_mr *mr = NULL; - struct ib_mr *ret; -- unsigned long flags; -+ int rval; - - if (to_ipd(pd)->user) { - ret = ERR_PTR(-EPERM); -@@ -74,61 +110,64 @@ struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc) - goto bail; - } - -- mr->mr.access_flags = acc; -- atomic_set(&mr->mr.refcount, 0); -+ rval = init_qib_mregion(&mr->mr, pd, 0); -+ if (rval) { -+ ret = ERR_PTR(rval); -+ goto bail; -+ } - -- spin_lock_irqsave(&dev->lk_table.lock, flags); -- if (!dev->dma_mr) -- dev->dma_mr = &mr->mr; -- spin_unlock_irqrestore(&dev->lk_table.lock, flags); - -+ rval = qib_alloc_lkey(&mr->mr, 1); -+ if (rval) { -+ ret = ERR_PTR(rval); -+ goto bail_mregion; -+ } -+ -+ mr->mr.access_flags = acc; - ret = &mr->ibmr; -+done: -+ return ret; - -+bail_mregion: -+ deinit_qib_mregion(&mr->mr); - bail: -- return ret; -+ kfree(mr); -+ goto done; - } - --static struct qib_mr *alloc_mr(int count, struct qib_lkey_table *lk_table) -+static struct qib_mr *alloc_mr(int count, struct ib_pd *pd) - { - struct qib_mr *mr; -- int m, i = 0; -+ int rval = -ENOMEM; -+ int m; - - /* Allocate struct plus pointers to first level page tables. */ - m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ; -- mr = kmalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL); -+ mr = kzalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL); - if (!mr) -- goto done; -- -- /* Allocate first level page tables. */ -- for (; i < m; i++) { -- mr->mr.map[i] = kmalloc(sizeof *mr->mr.map[0], GFP_KERNEL); -- if (!mr->mr.map[i]) -- goto bail; -- } -- mr->mr.mapsz = m; -- mr->mr.page_shift = 0; -- mr->mr.max_segs = count; -+ goto bail; - -+ rval = init_qib_mregion(&mr->mr, pd, count); -+ if (rval) -+ goto bail; - /* - * ib_reg_phys_mr() will initialize mr->ibmr except for - * lkey and rkey. - */ -- if (!qib_alloc_lkey(lk_table, &mr->mr)) -- goto bail; -+ rval = qib_alloc_lkey(&mr->mr, 0); -+ if (rval) -+ goto bail_mregion; - mr->ibmr.lkey = mr->mr.lkey; - mr->ibmr.rkey = mr->mr.lkey; -+done: -+ return mr; - -- atomic_set(&mr->mr.refcount, 0); -- goto done; -- -+bail_mregion: -+ deinit_qib_mregion(&mr->mr); - bail: -- while (i) -- kfree(mr->mr.map[--i]); - kfree(mr); -- mr = NULL; -- --done: -- return mr; -+ mr = ERR_PTR(rval); -+ goto done; - } - - /** -@@ -148,19 +187,15 @@ struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd, - int n, m, i; - struct ib_mr *ret; - -- mr = alloc_mr(num_phys_buf, &to_idev(pd->device)->lk_table); -- if (mr == NULL) { -- ret = ERR_PTR(-ENOMEM); -+ mr = alloc_mr(num_phys_buf, pd); -+ if (IS_ERR(mr)) { -+ ret = (struct ib_mr *)mr; - goto bail; - } - -- mr->mr.pd = pd; - mr->mr.user_base = *iova_start; - mr->mr.iova = *iova_start; -- mr->mr.length = 0; -- mr->mr.offset = 0; - mr->mr.access_flags = acc; -- mr->umem = NULL; - - m = 0; - n = 0; -@@ -186,7 +221,6 @@ bail: - * @pd: protection domain for this memory region - * @start: starting userspace address - * @length: length of region to register -- * @virt_addr: virtual address to use (from HCA's point of view) - * @mr_access_flags: access flags for this memory region - * @udata: unused by the QLogic_IB driver - * -@@ -216,14 +250,13 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - list_for_each_entry(chunk, &umem->chunk_list, list) - n += chunk->nents; - -- mr = alloc_mr(n, &to_idev(pd->device)->lk_table); -- if (!mr) { -- ret = ERR_PTR(-ENOMEM); -+ mr = alloc_mr(n, pd); -+ if (IS_ERR(mr)) { -+ ret = (struct ib_mr *)mr; - ib_umem_release(umem); - goto bail; - } - -- mr->mr.pd = pd; - mr->mr.user_base = start; - mr->mr.iova = virt_addr; - mr->mr.length = length; -@@ -271,21 +304,25 @@ bail: - int qib_dereg_mr(struct ib_mr *ibmr) - { - struct qib_mr *mr = to_imr(ibmr); -- struct qib_ibdev *dev = to_idev(ibmr->device); -- int ret; -- int i; -- -- ret = qib_free_lkey(dev, &mr->mr); -- if (ret) -- return ret; -- -- i = mr->mr.mapsz; -- while (i) -- kfree(mr->mr.map[--i]); -+ int ret = 0; -+ unsigned long timeout; -+ -+ qib_free_lkey(&mr->mr); -+ -+ qib_put_mr(&mr->mr); /* will set completion if last */ -+ timeout = wait_for_completion_timeout(&mr->mr.comp, -+ 5 * HZ); -+ if (!timeout) { -+ qib_get_mr(&mr->mr); -+ ret = -EBUSY; -+ goto out; -+ } -+ deinit_qib_mregion(&mr->mr); - if (mr->umem) - ib_umem_release(mr->umem); - kfree(mr); -- return 0; -+out: -+ return ret; - } - - /* -@@ -298,17 +335,9 @@ struct ib_mr *qib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) - { - struct qib_mr *mr; - -- mr = alloc_mr(max_page_list_len, &to_idev(pd->device)->lk_table); -- if (mr == NULL) -- return ERR_PTR(-ENOMEM); -- -- mr->mr.pd = pd; -- mr->mr.user_base = 0; -- mr->mr.iova = 0; -- mr->mr.length = 0; -- mr->mr.offset = 0; -- mr->mr.access_flags = 0; -- mr->umem = NULL; -+ mr = alloc_mr(max_page_list_len, pd); -+ if (IS_ERR(mr)) -+ return (struct ib_mr *)mr; - - return &mr->ibmr; - } -@@ -322,11 +351,11 @@ qib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len) - if (size > PAGE_SIZE) - return ERR_PTR(-EINVAL); - -- pl = kmalloc(sizeof *pl, GFP_KERNEL); -+ pl = kzalloc(sizeof *pl, GFP_KERNEL); - if (!pl) - return ERR_PTR(-ENOMEM); - -- pl->page_list = kmalloc(size, GFP_KERNEL); -+ pl->page_list = kzalloc(size, GFP_KERNEL); - if (!pl->page_list) - goto err_free; - -@@ -355,57 +384,47 @@ struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags, - struct ib_fmr_attr *fmr_attr) - { - struct qib_fmr *fmr; -- int m, i = 0; -+ int m; - struct ib_fmr *ret; -+ int rval = -ENOMEM; - - /* Allocate struct plus pointers to first level page tables. */ - m = (fmr_attr->max_pages + QIB_SEGSZ - 1) / QIB_SEGSZ; -- fmr = kmalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL); -+ fmr = kzalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL); - if (!fmr) - goto bail; - -- /* Allocate first level page tables. */ -- for (; i < m; i++) { -- fmr->mr.map[i] = kmalloc(sizeof *fmr->mr.map[0], -- GFP_KERNEL); -- if (!fmr->mr.map[i]) -- goto bail; -- } -- fmr->mr.mapsz = m; -+ rval = init_qib_mregion(&fmr->mr, pd, fmr_attr->max_pages); -+ if (rval) -+ goto bail; - - /* - * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey & - * rkey. - */ -- if (!qib_alloc_lkey(&to_idev(pd->device)->lk_table, &fmr->mr)) -- goto bail; -+ rval = qib_alloc_lkey(&fmr->mr, 0); -+ if (rval) -+ goto bail_mregion; - fmr->ibfmr.rkey = fmr->mr.lkey; - fmr->ibfmr.lkey = fmr->mr.lkey; - /* - * Resources are allocated but no valid mapping (RKEY can't be - * used). - */ -- fmr->mr.pd = pd; -- fmr->mr.user_base = 0; -- fmr->mr.iova = 0; -- fmr->mr.length = 0; -- fmr->mr.offset = 0; - fmr->mr.access_flags = mr_access_flags; - fmr->mr.max_segs = fmr_attr->max_pages; - fmr->mr.page_shift = fmr_attr->page_shift; - -- atomic_set(&fmr->mr.refcount, 0); - ret = &fmr->ibfmr; -- goto done; -+done: -+ return ret; - -+bail_mregion: -+ deinit_qib_mregion(&fmr->mr); - bail: -- while (i) -- kfree(fmr->mr.map[--i]); - kfree(fmr); -- ret = ERR_PTR(-ENOMEM); -- --done: -- return ret; -+ ret = ERR_PTR(rval); -+ goto done; - } - - /** -@@ -428,7 +447,8 @@ int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, - u32 ps; - int ret; - -- if (atomic_read(&fmr->mr.refcount)) -+ i = atomic_read(&fmr->mr.refcount); -+ if (i > 2) - return -EBUSY; - - if (list_len > fmr->mr.max_segs) { -@@ -490,16 +510,20 @@ int qib_unmap_fmr(struct list_head *fmr_list) - int qib_dealloc_fmr(struct ib_fmr *ibfmr) - { - struct qib_fmr *fmr = to_ifmr(ibfmr); -- int ret; -- int i; -- -- ret = qib_free_lkey(to_idev(ibfmr->device), &fmr->mr); -- if (ret) -- return ret; -- -- i = fmr->mr.mapsz; -- while (i) -- kfree(fmr->mr.map[--i]); -+ int ret = 0; -+ unsigned long timeout; -+ -+ qib_free_lkey(&fmr->mr); -+ qib_put_mr(&fmr->mr); /* will set completion if last */ -+ timeout = wait_for_completion_timeout(&fmr->mr.comp, -+ 5 * HZ); -+ if (!timeout) { -+ qib_get_mr(&fmr->mr); -+ ret = -EBUSY; -+ goto out; -+ } -+ deinit_qib_mregion(&fmr->mr); - kfree(fmr); -- return 0; -+out: -+ return ret; - } -diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c -index 1ce56b5..693041b 100644 ---- a/drivers/infiniband/hw/qib/qib_qp.c -+++ b/drivers/infiniband/hw/qib/qib_qp.c -@@ -406,18 +406,9 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) - unsigned n; - - if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags)) -- while (qp->s_rdma_read_sge.num_sge) { -- atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount); -- if (--qp->s_rdma_read_sge.num_sge) -- qp->s_rdma_read_sge.sge = -- *qp->s_rdma_read_sge.sg_list++; -- } -+ qib_put_ss(&qp->s_rdma_read_sge); - -- while (qp->r_sge.num_sge) { -- atomic_dec(&qp->r_sge.sge.mr->refcount); -- if (--qp->r_sge.num_sge) -- qp->r_sge.sge = *qp->r_sge.sg_list++; -- } -+ qib_put_ss(&qp->r_sge); - - if (clr_sends) { - while (qp->s_last != qp->s_head) { -@@ -427,7 +418,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) - for (i = 0; i < wqe->wr.num_sge; i++) { - struct qib_sge *sge = &wqe->sg_list[i]; - -- atomic_dec(&sge->mr->refcount); -+ qib_put_mr(sge->mr); - } - if (qp->ibqp.qp_type == IB_QPT_UD || - qp->ibqp.qp_type == IB_QPT_SMI || -@@ -437,7 +428,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) - qp->s_last = 0; - } - if (qp->s_rdma_mr) { -- atomic_dec(&qp->s_rdma_mr->refcount); -+ qib_put_mr(qp->s_rdma_mr); - qp->s_rdma_mr = NULL; - } - } -@@ -450,7 +441,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends) - - if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && - e->rdma_sge.mr) { -- atomic_dec(&e->rdma_sge.mr->refcount); -+ qib_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } - } -@@ -495,7 +486,7 @@ int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err) - if (!(qp->s_flags & QIB_S_BUSY)) { - qp->s_hdrwords = 0; - if (qp->s_rdma_mr) { -- atomic_dec(&qp->s_rdma_mr->refcount); -+ qib_put_mr(qp->s_rdma_mr); - qp->s_rdma_mr = NULL; - } - if (qp->s_tx) { -diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c -index b641416..3ab3413 100644 ---- a/drivers/infiniband/hw/qib/qib_rc.c -+++ b/drivers/infiniband/hw/qib/qib_rc.c -@@ -95,7 +95,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp, - case OP(RDMA_READ_RESPONSE_ONLY): - e = &qp->s_ack_queue[qp->s_tail_ack_queue]; - if (e->rdma_sge.mr) { -- atomic_dec(&e->rdma_sge.mr->refcount); -+ qib_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } - /* FALLTHROUGH */ -@@ -133,7 +133,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp, - /* Copy SGE state in case we need to resend */ - qp->s_rdma_mr = e->rdma_sge.mr; - if (qp->s_rdma_mr) -- atomic_inc(&qp->s_rdma_mr->refcount); -+ qib_get_mr(qp->s_rdma_mr); - qp->s_ack_rdma_sge.sge = e->rdma_sge; - qp->s_ack_rdma_sge.num_sge = 1; - qp->s_cur_sge = &qp->s_ack_rdma_sge; -@@ -172,7 +172,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp, - qp->s_cur_sge = &qp->s_ack_rdma_sge; - qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr; - if (qp->s_rdma_mr) -- atomic_inc(&qp->s_rdma_mr->refcount); -+ qib_get_mr(qp->s_rdma_mr); - len = qp->s_ack_rdma_sge.sge.sge_length; - if (len > pmtu) - len = pmtu; -@@ -1012,7 +1012,7 @@ void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr) - for (i = 0; i < wqe->wr.num_sge; i++) { - struct qib_sge *sge = &wqe->sg_list[i]; - -- atomic_dec(&sge->mr->refcount); -+ qib_put_mr(sge->mr); - } - /* Post a send completion queue entry if requested. */ - if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || -@@ -1068,7 +1068,7 @@ static struct qib_swqe *do_rc_completion(struct qib_qp *qp, - for (i = 0; i < wqe->wr.num_sge; i++) { - struct qib_sge *sge = &wqe->sg_list[i]; - -- atomic_dec(&sge->mr->refcount); -+ qib_put_mr(sge->mr); - } - /* Post a send completion queue entry if requested. */ - if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || -@@ -1730,7 +1730,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr, - if (unlikely(offset + len != e->rdma_sge.sge_length)) - goto unlock_done; - if (e->rdma_sge.mr) { -- atomic_dec(&e->rdma_sge.mr->refcount); -+ qib_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } - if (len != 0) { -@@ -2024,11 +2024,7 @@ send_last: - if (unlikely(wc.byte_len > qp->r_len)) - goto nack_inv; - qib_copy_sge(&qp->r_sge, data, tlen, 1); -- while (qp->r_sge.num_sge) { -- atomic_dec(&qp->r_sge.sge.mr->refcount); -- if (--qp->r_sge.num_sge) -- qp->r_sge.sge = *qp->r_sge.sg_list++; -- } -+ qib_put_ss(&qp->r_sge); - qp->r_msn++; - if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) - break; -@@ -2116,7 +2112,7 @@ send_last: - } - e = &qp->s_ack_queue[qp->r_head_ack_queue]; - if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { -- atomic_dec(&e->rdma_sge.mr->refcount); -+ qib_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } - reth = &ohdr->u.rc.reth; -@@ -2188,7 +2184,7 @@ send_last: - } - e = &qp->s_ack_queue[qp->r_head_ack_queue]; - if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { -- atomic_dec(&e->rdma_sge.mr->refcount); -+ qib_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } - ateth = &ohdr->u.atomic_eth; -@@ -2210,7 +2206,7 @@ send_last: - (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, - be64_to_cpu(ateth->compare_data), - sdata); -- atomic_dec(&qp->r_sge.sge.mr->refcount); -+ qib_put_mr(qp->r_sge.sge.mr); - qp->r_sge.num_sge = 0; - e->opcode = opcode; - e->sent = 0; -diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c -index c0ee7e0..357b6cf 100644 ---- a/drivers/infiniband/hw/qib/qib_ruc.c -+++ b/drivers/infiniband/hw/qib/qib_ruc.c -@@ -110,7 +110,7 @@ bad_lkey: - while (j) { - struct qib_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge; - -- atomic_dec(&sge->mr->refcount); -+ qib_put_mr(sge->mr); - } - ss->num_sge = 0; - memset(&wc, 0, sizeof(wc)); -@@ -501,7 +501,7 @@ again: - (u64) atomic64_add_return(sdata, maddr) - sdata : - (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, - sdata, wqe->wr.wr.atomic.swap); -- atomic_dec(&qp->r_sge.sge.mr->refcount); -+ qib_put_mr(qp->r_sge.sge.mr); - qp->r_sge.num_sge = 0; - goto send_comp; - -@@ -525,7 +525,7 @@ again: - sge->sge_length -= len; - if (sge->sge_length == 0) { - if (!release) -- atomic_dec(&sge->mr->refcount); -+ qib_put_mr(sge->mr); - if (--sqp->s_sge.num_sge) - *sge = *sqp->s_sge.sg_list++; - } else if (sge->length == 0 && sge->mr->lkey) { -@@ -542,11 +542,7 @@ again: - sqp->s_len -= len; - } - if (release) -- while (qp->r_sge.num_sge) { -- atomic_dec(&qp->r_sge.sge.mr->refcount); -- if (--qp->r_sge.num_sge) -- qp->r_sge.sge = *qp->r_sge.sg_list++; -- } -+ qib_put_ss(&qp->r_sge); - - if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) - goto send_comp; -@@ -782,7 +778,7 @@ void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe, - for (i = 0; i < wqe->wr.num_sge; i++) { - struct qib_sge *sge = &wqe->sg_list[i]; - -- atomic_dec(&sge->mr->refcount); -+ qib_put_mr(sge->mr); - } - if (qp->ibqp.qp_type == IB_QPT_UD || - qp->ibqp.qp_type == IB_QPT_SMI || -diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c -index 70b4cb7..aa3a803 100644 ---- a/drivers/infiniband/hw/qib/qib_uc.c -+++ b/drivers/infiniband/hw/qib/qib_uc.c -@@ -281,11 +281,7 @@ inv: - set_bit(QIB_R_REWIND_SGE, &qp->r_aflags); - qp->r_sge.num_sge = 0; - } else -- while (qp->r_sge.num_sge) { -- atomic_dec(&qp->r_sge.sge.mr->refcount); -- if (--qp->r_sge.num_sge) -- qp->r_sge.sge = *qp->r_sge.sg_list++; -- } -+ qib_put_ss(&qp->r_sge); - qp->r_state = OP(SEND_LAST); - switch (opcode) { - case OP(SEND_FIRST): -@@ -404,12 +400,7 @@ send_last: - goto rewind; - wc.opcode = IB_WC_RECV; - qib_copy_sge(&qp->r_sge, data, tlen, 0); -- while (qp->s_rdma_read_sge.num_sge) { -- atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount); -- if (--qp->s_rdma_read_sge.num_sge) -- qp->s_rdma_read_sge.sge = -- *qp->s_rdma_read_sge.sg_list++; -- } -+ qib_put_ss(&qp->s_rdma_read_sge); - last_imm: - wc.wr_id = qp->r_wr_id; - wc.status = IB_WC_SUCCESS; -@@ -493,13 +484,7 @@ rdma_last_imm: - if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) - goto drop; - if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags)) -- while (qp->s_rdma_read_sge.num_sge) { -- atomic_dec(&qp->s_rdma_read_sge.sge.mr-> -- refcount); -- if (--qp->s_rdma_read_sge.num_sge) -- qp->s_rdma_read_sge.sge = -- *qp->s_rdma_read_sge.sg_list++; -- } -+ qib_put_ss(&qp->s_rdma_read_sge); - else { - ret = qib_get_rwqe(qp, 1); - if (ret < 0) -@@ -510,11 +495,7 @@ rdma_last_imm: - wc.byte_len = qp->r_len; - wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; - qib_copy_sge(&qp->r_sge, data, tlen, 1); -- while (qp->r_sge.num_sge) { -- atomic_dec(&qp->r_sge.sge.mr->refcount); -- if (--qp->r_sge.num_sge) -- qp->r_sge.sge = *qp->r_sge.sg_list++; -- } -+ qib_put_ss(&qp->r_sge); - goto last_imm; - - case OP(RDMA_WRITE_LAST): -@@ -530,11 +511,7 @@ rdma_last: - if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) - goto drop; - qib_copy_sge(&qp->r_sge, data, tlen, 1); -- while (qp->r_sge.num_sge) { -- atomic_dec(&qp->r_sge.sge.mr->refcount); -- if (--qp->r_sge.num_sge) -- qp->r_sge.sge = *qp->r_sge.sg_list++; -- } -+ qib_put_ss(&qp->r_sge); - break; - - default: -diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c -index a468bf2..d6c7fe7 100644 ---- a/drivers/infiniband/hw/qib/qib_ud.c -+++ b/drivers/infiniband/hw/qib/qib_ud.c -@@ -194,11 +194,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) - } - length -= len; - } -- while (qp->r_sge.num_sge) { -- atomic_dec(&qp->r_sge.sge.mr->refcount); -- if (--qp->r_sge.num_sge) -- qp->r_sge.sge = *qp->r_sge.sg_list++; -- } -+ qib_put_ss(&qp->r_sge); - if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) - goto bail_unlock; - wc.wr_id = qp->r_wr_id; -@@ -556,11 +552,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, - } else - qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); - qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1); -- while (qp->r_sge.num_sge) { -- atomic_dec(&qp->r_sge.sge.mr->refcount); -- if (--qp->r_sge.num_sge) -- qp->r_sge.sge = *qp->r_sge.sg_list++; -- } -+ qib_put_ss(&qp->r_sge); - if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) - return; - wc.wr_id = qp->r_wr_id; -diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c -index 7b6c3bf..76d7ce8 100644 ---- a/drivers/infiniband/hw/qib/qib_verbs.c -+++ b/drivers/infiniband/hw/qib/qib_verbs.c -@@ -183,7 +183,7 @@ void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release) - sge->sge_length -= len; - if (sge->sge_length == 0) { - if (release) -- atomic_dec(&sge->mr->refcount); -+ qib_put_mr(sge->mr); - if (--ss->num_sge) - *sge = *ss->sg_list++; - } else if (sge->length == 0 && sge->mr->lkey) { -@@ -224,7 +224,7 @@ void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release) - sge->sge_length -= len; - if (sge->sge_length == 0) { - if (release) -- atomic_dec(&sge->mr->refcount); -+ qib_put_mr(sge->mr); - if (--ss->num_sge) - *sge = *ss->sg_list++; - } else if (sge->length == 0 && sge->mr->lkey) { -@@ -435,7 +435,7 @@ bail_inval_free: - while (j) { - struct qib_sge *sge = &wqe->sg_list[--j]; - -- atomic_dec(&sge->mr->refcount); -+ qib_put_mr(sge->mr); - } - bail_inval: - ret = -EINVAL; -@@ -978,7 +978,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx) - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); - if (tx->mr) { -- atomic_dec(&tx->mr->refcount); -+ qib_put_mr(tx->mr); - tx->mr = NULL; - } - if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) { -@@ -1336,7 +1336,7 @@ done: - } - qib_sendbuf_done(dd, pbufn); - if (qp->s_rdma_mr) { -- atomic_dec(&qp->s_rdma_mr->refcount); -+ qib_put_mr(qp->s_rdma_mr); - qp->s_rdma_mr = NULL; - } - if (qp->s_wqe) { -diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h -index 4876060..4a2277b 100644 ---- a/drivers/infiniband/hw/qib/qib_verbs.h -+++ b/drivers/infiniband/hw/qib/qib_verbs.h -@@ -41,6 +41,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -302,6 +303,8 @@ struct qib_mregion { - u32 max_segs; /* number of qib_segs in all the arrays */ - u32 mapsz; /* size of the map array */ - u8 page_shift; /* 0 - non unform/non powerof2 sizes */ -+ u8 lkey_published; /* in global table */ -+ struct completion comp; /* complete when refcount goes to zero */ - atomic_t refcount; - struct qib_segarray *map[0]; /* the segments */ - }; -@@ -944,9 +947,9 @@ int qib_post_ud_send(struct qib_qp *qp, struct ib_send_wr *wr); - void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, - int has_grh, void *data, u32 tlen, struct qib_qp *qp); - --int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr); -+int qib_alloc_lkey(struct qib_mregion *mr, int dma_region); - --int qib_free_lkey(struct qib_ibdev *dev, struct qib_mregion *mr); -+void qib_free_lkey(struct qib_mregion *mr); - - int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, - struct qib_sge *isge, struct ib_sge *sge, int acc); -@@ -1014,6 +1017,27 @@ int qib_unmap_fmr(struct list_head *fmr_list); - - int qib_dealloc_fmr(struct ib_fmr *ibfmr); - -+static inline void qib_get_mr(struct qib_mregion *mr) -+{ -+ atomic_inc(&mr->refcount); -+} -+ -+static inline void qib_put_mr(struct qib_mregion *mr) -+{ -+ if (unlikely(atomic_dec_and_test(&mr->refcount))) -+ complete(&mr->comp); -+} -+ -+static inline void qib_put_ss(struct qib_sge_state *ss) -+{ -+ while (ss->num_sge) { -+ qib_put_mr(ss->sge.mr); -+ if (--ss->num_sge) -+ ss->sge = *ss->sg_list++; -+ } -+} -+ -+ - void qib_release_mmap_info(struct kref *ref); - - struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, u32 size, --- -1.7.9.5 -