]> git.openfabrics.org - ~emulex/tmp/compat-rdma/.git/commitdiff
linux-next-pending: pull in more iw_cxgb4 fixes.
authorSteve Wise <swise@opengridcomputing.com>
Thu, 10 Apr 2014 15:02:51 +0000 (10:02 -0500)
committerSteve Wise <swise@opengridcomputing.com>
Thu, 10 Apr 2014 15:02:51 +0000 (10:02 -0500)
Pull in these heading into 3.15:

Hariprasad Shenai (1):
      iw_cxgb4: Use pr_warn_ratelimited

Steve Wise (9):
      iw_cxgb4: use the BAR2/WC path for kernel QPs and T5 devices
      iw_cxgb4: endpoint timeout fixes
      iw_cxgb4: rmb() after reading valid gen bit
      iw_cxgb4: SQ flush fix
      iw_cxgb4: Max fastreg depth depends on DSGL support
      iw_cxgb4: Initialize reserved fields in a FW work request
      iw_cxgb4: Add missing debug stats
      iw_cxgb4: Use uninitialized_var()
      iw_cxgb4: fix over-dereference when terminating

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
linux-next-pending/0048-iw_cxgb4--use-the-BAR2-WC-path-for-kernel-QPs-and-T5-devices.patch [new file with mode: 0644]
linux-next-pending/0049-iw_cxgb4--endpoint-timeout-fixes.patch [new file with mode: 0644]
linux-next-pending/0050-iw_cxgb4--rmb()-after-reading-valid-gen-bit.patch [new file with mode: 0644]
linux-next-pending/0051-iw_cxgb4--SQ-flush-fix.patch [new file with mode: 0644]
linux-next-pending/0052-iw_cxgb4--Max-fastreg-depth-depends-on-DSGL-support.patch [new file with mode: 0644]
linux-next-pending/0053-iw_cxgb4--Use-pr_warn_ratelimited.patch [new file with mode: 0644]
linux-next-pending/0054-iw_cxgb4--Initialize-reserved-fields-in-a-FW-work-request.patch [new file with mode: 0644]
linux-next-pending/0055-iw_cxgb4--Add-missing-debug-stats.patch [new file with mode: 0644]
linux-next-pending/0056-iw_cxgb4--Use-uninitialized_var().patch [new file with mode: 0644]
linux-next-pending/0057-iw_cxgb4--fix-over-dereference-when-terminating.patch [new file with mode: 0644]

diff --git a/linux-next-pending/0048-iw_cxgb4--use-the-BAR2-WC-path-for-kernel-QPs-and-T5-devices.patch b/linux-next-pending/0048-iw_cxgb4--use-the-BAR2-WC-path-for-kernel-QPs-and-T5-devices.patch
new file mode 100644 (file)
index 0000000..a4ae3e1
--- /dev/null
@@ -0,0 +1,305 @@
+commit e41266cc6e372af8353b34de320a4726ebb89e40
+Author: Steve Wise <swise@opengridcomputing.com>
+Date:   Wed Apr 9 09:38:25 2014 -0500
+
+    iw_cxgb4: use the BAR2/WC path for kernel QPs and T5 devices
+    
+    Signed-off-by: Steve Wise <swise@opengridcomputing.com>
+
+diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
+index 9489a38..f4fa50a 100644
+--- a/drivers/infiniband/hw/cxgb4/device.c
++++ b/drivers/infiniband/hw/cxgb4/device.c
+@@ -682,7 +682,10 @@ static void c4iw_dealloc(struct uld_ctx *ctx)
+       idr_destroy(&ctx->dev->hwtid_idr);
+       idr_destroy(&ctx->dev->stid_idr);
+       idr_destroy(&ctx->dev->atid_idr);
+-      iounmap(ctx->dev->rdev.oc_mw_kva);
++      if (ctx->dev->rdev.bar2_kva)
++              iounmap(ctx->dev->rdev.bar2_kva);
++      if (ctx->dev->rdev.oc_mw_kva)
++              iounmap(ctx->dev->rdev.oc_mw_kva);
+       ib_dealloc_device(&ctx->dev->ibdev);
+       ctx->dev = NULL;
+ }
+@@ -722,11 +725,31 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
+       }
+       devp->rdev.lldi = *infop;
+-      devp->rdev.oc_mw_pa = pci_resource_start(devp->rdev.lldi.pdev, 2) +
+-              (pci_resource_len(devp->rdev.lldi.pdev, 2) -
+-               roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size));
+-      devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa,
+-                                             devp->rdev.lldi.vr->ocq.size);
++      /*
++       * For T5 devices, we map all of BAR2 with WC.
++       * For T4 devices with onchip qp mem, we map only that part
++       * of BAR2 with WC.
++       */
++      devp->rdev.bar2_pa = pci_resource_start(devp->rdev.lldi.pdev, 2);
++      if (is_t5(devp->rdev.lldi.adapter_type)) {
++              devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa,
++                      pci_resource_len(devp->rdev.lldi.pdev, 2));
++              if (!devp->rdev.bar2_kva) {
++                      pr_err(MOD "Unable to ioremap BAR2\n");
++                      return ERR_PTR(-EINVAL);
++              }
++      } else if (ocqp_supported(infop)) {
++              devp->rdev.oc_mw_pa =
++                      pci_resource_start(devp->rdev.lldi.pdev, 2) +
++                      pci_resource_len(devp->rdev.lldi.pdev, 2) -
++                      roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size);
++              devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa,
++                      devp->rdev.lldi.vr->ocq.size);
++              if (!devp->rdev.oc_mw_kva) {
++                      pr_err(MOD "Unable to ioremap onchip mem\n");
++                      return ERR_PTR(-EINVAL);
++              }
++      }
+       PDBG(KERN_INFO MOD "ocq memory: "
+              "hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n",
+@@ -1003,9 +1026,11 @@ static int enable_qp_db(int id, void *p, void *data)
+ static void resume_rc_qp(struct c4iw_qp *qp)
+ {
+       spin_lock(&qp->lock);
+-      t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc);
++      t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc,
++                    is_t5(qp->rhp->rdev.lldi.adapter_type), NULL);
+       qp->wq.sq.wq_pidx_inc = 0;
+-      t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc);
++      t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc,
++                    is_t5(qp->rhp->rdev.lldi.adapter_type), NULL);
+       qp->wq.rq.wq_pidx_inc = 0;
+       spin_unlock(&qp->lock);
+ }
+diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+index e872203..7b8c580 100644
+--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
++++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+@@ -149,6 +149,8 @@ struct c4iw_rdev {
+       struct gen_pool *ocqp_pool;
+       u32 flags;
+       struct cxgb4_lld_info lldi;
++      unsigned long bar2_pa;
++      void __iomem *bar2_kva;
+       unsigned long oc_mw_pa;
+       void __iomem *oc_mw_kva;
+       struct c4iw_stats stats;
+diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
+index cb76eb5..244b2ef 100644
+--- a/drivers/infiniband/hw/cxgb4/qp.c
++++ b/drivers/infiniband/hw/cxgb4/qp.c
+@@ -212,13 +212,23 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
+       wq->db = rdev->lldi.db_reg;
+       wq->gts = rdev->lldi.gts_reg;
+-      if (user) {
+-              wq->sq.udb = (u64)pci_resource_start(rdev->lldi.pdev, 2) +
+-                                      (wq->sq.qid << rdev->qpshift);
+-              wq->sq.udb &= PAGE_MASK;
+-              wq->rq.udb = (u64)pci_resource_start(rdev->lldi.pdev, 2) +
+-                                      (wq->rq.qid << rdev->qpshift);
+-              wq->rq.udb &= PAGE_MASK;
++      if (user || is_t5(rdev->lldi.adapter_type)) {
++              u32 off;
++
++              off = (wq->sq.qid << rdev->qpshift) & PAGE_MASK;
++              if (user) {
++                      wq->sq.udb = (u64 __iomem *)(rdev->bar2_pa + off);
++              } else {
++                      off += 128 * (wq->sq.qid & rdev->qpmask) + 8;
++                      wq->sq.udb = (u64 __iomem *)(rdev->bar2_kva + off);
++              }
++              off = (wq->rq.qid << rdev->qpshift) & PAGE_MASK;
++              if (user) {
++                      wq->rq.udb = (u64 __iomem *)(rdev->bar2_pa + off);
++              } else {
++                      off += 128 * (wq->rq.qid & rdev->qpmask) + 8;
++                      wq->rq.udb = (u64 __iomem *)(rdev->bar2_kva + off);
++              }
+       }
+       wq->rdev = rdev;
+       wq->rq.msn = 1;
+@@ -301,7 +311,8 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
+       PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%llx rqudb 0x%llx\n",
+            __func__, wq->sq.qid, wq->rq.qid, wq->db,
+-           (unsigned long long)wq->sq.udb, (unsigned long long)wq->rq.udb);
++           (__force unsigned long long)wq->sq.udb,
++           (__force unsigned long long)wq->rq.udb);
+       return 0;
+ free_dma:
+@@ -650,9 +661,10 @@ static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc)
+       spin_lock_irqsave(&qhp->rhp->lock, flags);
+       spin_lock(&qhp->lock);
+-      if (qhp->rhp->db_state == NORMAL) {
+-              t4_ring_sq_db(&qhp->wq, inc);
+-      } else {
++      if (qhp->rhp->db_state == NORMAL)
++              t4_ring_sq_db(&qhp->wq, inc,
++                            is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL);
++      else {
+               add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
+               qhp->wq.sq.wq_pidx_inc += inc;
+       }
+@@ -667,9 +679,10 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc)
+       spin_lock_irqsave(&qhp->rhp->lock, flags);
+       spin_lock(&qhp->lock);
+-      if (qhp->rhp->db_state == NORMAL) {
+-              t4_ring_rq_db(&qhp->wq, inc);
+-      } else {
++      if (qhp->rhp->db_state == NORMAL)
++              t4_ring_rq_db(&qhp->wq, inc,
++                            is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL);
++      else {
+               add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
+               qhp->wq.rq.wq_pidx_inc += inc;
+       }
+@@ -686,7 +699,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+       enum fw_wr_opcodes fw_opcode = 0;
+       enum fw_ri_wr_flags fw_flags;
+       struct c4iw_qp *qhp;
+-      union t4_wr *wqe;
++      union t4_wr *wqe = NULL;
+       u32 num_wrs;
+       struct t4_swsqe *swsqe;
+       unsigned long flag;
+@@ -792,7 +805,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+               idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
+       }
+       if (!qhp->rhp->rdev.status_page->db_off) {
+-              t4_ring_sq_db(&qhp->wq, idx);
++              t4_ring_sq_db(&qhp->wq, idx,
++                            is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe);
+               spin_unlock_irqrestore(&qhp->lock, flag);
+       } else {
+               spin_unlock_irqrestore(&qhp->lock, flag);
+@@ -806,7 +820,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ {
+       int err = 0;
+       struct c4iw_qp *qhp;
+-      union t4_recv_wr *wqe;
++      union t4_recv_wr *wqe = NULL;
+       u32 num_wrs;
+       u8 len16 = 0;
+       unsigned long flag;
+@@ -858,7 +872,8 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+               num_wrs--;
+       }
+       if (!qhp->rhp->rdev.status_page->db_off) {
+-              t4_ring_rq_db(&qhp->wq, idx);
++              t4_ring_rq_db(&qhp->wq, idx,
++                            is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe);
+               spin_unlock_irqrestore(&qhp->lock, flag);
+       } else {
+               spin_unlock_irqrestore(&qhp->lock, flag);
+@@ -1677,11 +1692,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
+               mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize);
+               insert_mmap(ucontext, mm2);
+               mm3->key = uresp.sq_db_gts_key;
+-              mm3->addr = qhp->wq.sq.udb;
++              mm3->addr = (__force u64)qhp->wq.sq.udb;
+               mm3->len = PAGE_SIZE;
+               insert_mmap(ucontext, mm3);
+               mm4->key = uresp.rq_db_gts_key;
+-              mm4->addr = qhp->wq.rq.udb;
++              mm4->addr = (__force u64)qhp->wq.rq.udb;
+               mm4->len = PAGE_SIZE;
+               insert_mmap(ucontext, mm4);
+               if (mm5) {
+diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
+index eeca8b1..931bfd1 100644
+--- a/drivers/infiniband/hw/cxgb4/t4.h
++++ b/drivers/infiniband/hw/cxgb4/t4.h
+@@ -292,7 +292,7 @@ struct t4_sq {
+       unsigned long phys_addr;
+       struct t4_swsqe *sw_sq;
+       struct t4_swsqe *oldest_read;
+-      u64 udb;
++      u64 __iomem *udb;
+       size_t memsize;
+       u32 qid;
+       u16 in_use;
+@@ -314,7 +314,7 @@ struct t4_rq {
+       dma_addr_t dma_addr;
+       DEFINE_DMA_UNMAP_ADDR(mapping);
+       struct t4_swrqe *sw_rq;
+-      u64 udb;
++      u64 __iomem *udb;
+       size_t memsize;
+       u32 qid;
+       u32 msn;
+@@ -435,15 +435,67 @@ static inline u16 t4_sq_wq_size(struct t4_wq *wq)
+               return wq->sq.size * T4_SQ_NUM_SLOTS;
+ }
+-static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc)
++/* This function copies 64 byte coalesced work request to memory
++ * mapped BAR2 space. For coalesced WRs, the SGE fetches data
++ * from the FIFO instead of from Host.
++ */
++static inline void pio_copy(u64 __iomem *dst, u64 *src)
++{
++      int count = 8;
++
++      while (count) {
++              writeq(*src, dst);
++              src++;
++              dst++;
++              count--;
++      }
++}
++
++static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t5,
++                               union t4_wr *wqe)
+ {
++
++      /* Flush host queue memory writes. */
+       wmb();
++      if (t5) {
++              if (inc == 1 && wqe) {
++                      PDBG("%s: WC wq->sq.pidx = %d\n",
++                           __func__, wq->sq.pidx);
++                      pio_copy(wq->sq.udb + 7, (void *)wqe);
++              } else {
++                      PDBG("%s: DB wq->sq.pidx = %d\n",
++                           __func__, wq->sq.pidx);
++                      writel(PIDX_T5(inc), wq->sq.udb);
++              }
++
++              /* Flush user doorbell area writes. */
++              wmb();
++              return;
++      }
+       writel(QID(wq->sq.qid) | PIDX(inc), wq->db);
+ }
+-static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc)
++static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, u8 t5,
++                               union t4_recv_wr *wqe)
+ {
++
++      /* Flush host queue memory writes. */
+       wmb();
++      if (t5) {
++              if (inc == 1 && wqe) {
++                      PDBG("%s: WC wq->rq.pidx = %d\n",
++                           __func__, wq->rq.pidx);
++                      pio_copy(wq->rq.udb + 7, (void *)wqe);
++              } else {
++                      PDBG("%s: DB wq->rq.pidx = %d\n",
++                           __func__, wq->rq.pidx);
++                      writel(PIDX_T5(inc), wq->rq.udb);
++              }
++
++              /* Flush user doorbell area writes. */
++              wmb();
++              return;
++      }
+       writel(QID(wq->rq.qid) | PIDX(inc), wq->db);
+ }
diff --git a/linux-next-pending/0049-iw_cxgb4--endpoint-timeout-fixes.patch b/linux-next-pending/0049-iw_cxgb4--endpoint-timeout-fixes.patch
new file mode 100644 (file)
index 0000000..89d6ddc
--- /dev/null
@@ -0,0 +1,268 @@
+commit 3e1e783998f663db1c956b63f5212da21168990f
+Author: Steve Wise <swise@opengridcomputing.com>
+Date:   Wed Apr 9 09:38:25 2014 -0500
+
+    iw_cxgb4: endpoint timeout fixes
+    
+    1) timedout endpoint processing can be starved. If there is continual
+    CPL messages flowing into the driver, the endpoint timeout processing
+    can be starved.  This condition exposed the other bugs below.
+    
+    Solution: In process_work(), call process_timedout_eps() after each CPL
+    is processed.
+    
+    2) Connection events can be processed even though the endpoint is on
+    the timeout list.  If the endpoint is scheduled for timeout processing,
+    then we must ignore MPA Start Requests and Replies.
+    
+    Solution: Change stop_ep_timer() to return 1 if the ep has already been
+    queued for timeout processing.  All the callers of stop_ep_timer() need
+    to check this and act accordingly.  There are just a few cases where
+    the caller needs to do something different if stop_ep_timer() returns 1:
+    
+    1) in process_mpa_reply(), ignore the reply and  process_timeout()
+    will abort the connection.
+    
+    2) in process_mpa_request, ignore the request and process_timeout()
+    will abort the connection.
+    
+    It is ok for callers of stop_ep_timer() to abort the connection since
+    that will leave the state in ABORTING or DEAD, and process_timeout()
+    now ignores timeouts when the ep is in these states.
+    
+    3) Double insertion on the timeout list.  Since the endpoint timers are
+    used for connection setup and teardown, we need to guard against the
+    possibility that an endpoint is already on the timeout list.  This is
+    a rare condition and only seen under heavy load and in the presense of
+    the above 2 bugs.
+    
+    Solution: In ep_timeout(), don't queue the endpoint if it is already on
+    the queue.
+    
+    Signed-off-by: Steve Wise <swise@opengridcomputing.com>
+
+diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
+index 02436d5..185452a 100644
+--- a/drivers/infiniband/hw/cxgb4/cm.c
++++ b/drivers/infiniband/hw/cxgb4/cm.c
+@@ -173,12 +173,15 @@ static void start_ep_timer(struct c4iw_ep *ep)
+       add_timer(&ep->timer);
+ }
+-static void stop_ep_timer(struct c4iw_ep *ep)
++static int stop_ep_timer(struct c4iw_ep *ep)
+ {
+       PDBG("%s ep %p stopping\n", __func__, ep);
+       del_timer_sync(&ep->timer);
+-      if (!test_and_set_bit(TIMEOUT, &ep->com.flags))
++      if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
+               c4iw_put_ep(&ep->com);
++              return 0;
++      }
++      return 1;
+ }
+ static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb,
+@@ -1165,12 +1168,11 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
+       PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+       /*
+-       * Stop mpa timer.  If it expired, then the state has
+-       * changed and we bail since ep_timeout already aborted
+-       * the connection.
++       * Stop mpa timer.  If it expired, then
++       * we ignore the MPA reply.  process_timeout()
++       * will abort the connection.
+        */
+-      stop_ep_timer(ep);
+-      if (ep->com.state != MPA_REQ_SENT)
++      if (stop_ep_timer(ep))
+               return;
+       /*
+@@ -1375,15 +1377,12 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
+       PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+-      if (ep->com.state != MPA_REQ_WAIT)
+-              return;
+-
+       /*
+        * If we get more than the supported amount of private data
+        * then we must fail this connection.
+        */
+       if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
+-              stop_ep_timer(ep);
++              (void)stop_ep_timer(ep);
+               abort_connection(ep, skb, GFP_KERNEL);
+               return;
+       }
+@@ -1413,13 +1412,13 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
+       if (mpa->revision > mpa_rev) {
+               printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
+                      " Received = %d\n", __func__, mpa_rev, mpa->revision);
+-              stop_ep_timer(ep);
++              (void)stop_ep_timer(ep);
+               abort_connection(ep, skb, GFP_KERNEL);
+               return;
+       }
+       if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
+-              stop_ep_timer(ep);
++              (void)stop_ep_timer(ep);
+               abort_connection(ep, skb, GFP_KERNEL);
+               return;
+       }
+@@ -1430,7 +1429,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
+        * Fail if there's too much private data.
+        */
+       if (plen > MPA_MAX_PRIVATE_DATA) {
+-              stop_ep_timer(ep);
++              (void)stop_ep_timer(ep);
+               abort_connection(ep, skb, GFP_KERNEL);
+               return;
+       }
+@@ -1439,7 +1438,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
+        * If plen does not account for pkt size
+        */
+       if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
+-              stop_ep_timer(ep);
++              (void)stop_ep_timer(ep);
+               abort_connection(ep, skb, GFP_KERNEL);
+               return;
+       }
+@@ -1496,18 +1495,24 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
+            ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
+            ep->mpa_attr.p2p_type);
+-      __state_set(&ep->com, MPA_REQ_RCVD);
+-      stop_ep_timer(ep);
+-
+-      /* drive upcall */
+-      mutex_lock(&ep->parent_ep->com.mutex);
+-      if (ep->parent_ep->com.state != DEAD) {
+-              if (connect_request_upcall(ep))
++      /*
++       * If the endpoint timer already expired, then we ignore
++       * the start request.  process_timeout() will abort
++       * the connection.
++       */
++      if (!stop_ep_timer(ep)) {
++              __state_set(&ep->com, MPA_REQ_RCVD);
++
++              /* drive upcall */
++              mutex_lock(&ep->parent_ep->com.mutex);
++              if (ep->parent_ep->com.state != DEAD) {
++                      if (connect_request_upcall(ep))
++                              abort_connection(ep, skb, GFP_KERNEL);
++              } else {
+                       abort_connection(ep, skb, GFP_KERNEL);
+-      } else {
+-              abort_connection(ep, skb, GFP_KERNEL);
++              }
++              mutex_unlock(&ep->parent_ep->com.mutex);
+       }
+-      mutex_unlock(&ep->parent_ep->com.mutex);
+       return;
+ }
+@@ -2265,7 +2270,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
+               disconnect = 0;
+               break;
+       case MORIBUND:
+-              stop_ep_timer(ep);
++              (void)stop_ep_timer(ep);
+               if (ep->com.cm_id && ep->com.qp) {
+                       attrs.next_state = C4IW_QP_STATE_IDLE;
+                       c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
+@@ -2325,10 +2330,10 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
+       case CONNECTING:
+               break;
+       case MPA_REQ_WAIT:
+-              stop_ep_timer(ep);
++              (void)stop_ep_timer(ep);
+               break;
+       case MPA_REQ_SENT:
+-              stop_ep_timer(ep);
++              (void)stop_ep_timer(ep);
+               if (mpa_rev == 1 || (mpa_rev == 2 && ep->tried_with_mpa_v1))
+                       connect_reply_upcall(ep, -ECONNRESET);
+               else {
+@@ -2433,7 +2438,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
+               __state_set(&ep->com, MORIBUND);
+               break;
+       case MORIBUND:
+-              stop_ep_timer(ep);
++              (void)stop_ep_timer(ep);
+               if ((ep->com.cm_id) && (ep->com.qp)) {
+                       attrs.next_state = C4IW_QP_STATE_IDLE;
+                       c4iw_modify_qp(ep->com.qp->rhp,
+@@ -3028,7 +3033,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
+               if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
+                       close = 1;
+                       if (abrupt) {
+-                              stop_ep_timer(ep);
++                              (void)stop_ep_timer(ep);
+                               ep->com.state = ABORTING;
+                       } else
+                               ep->com.state = MORIBUND;
+@@ -3462,6 +3467,16 @@ static void process_timeout(struct c4iw_ep *ep)
+               __state_set(&ep->com, ABORTING);
+               close_complete_upcall(ep, -ETIMEDOUT);
+               break;
++      case ABORTING:
++      case DEAD:
++
++              /*
++               * These states are expected if the ep timed out at the same
++               * time as another thread was calling stop_ep_timer().
++               * So we silently do nothing for these states.
++               */
++              abort = 0;
++              break;
+       default:
+               WARN(1, "%s unexpected state ep %p tid %u state %u\n",
+                       __func__, ep, ep->hwtid, ep->com.state);
+@@ -3483,6 +3498,8 @@ static void process_timedout_eps(void)
+               tmp = timeout_list.next;
+               list_del(tmp);
++              tmp->next = NULL;
++              tmp->prev = NULL;
+               spin_unlock_irq(&timeout_lock);
+               ep = list_entry(tmp, struct c4iw_ep, entry);
+               process_timeout(ep);
+@@ -3499,6 +3516,7 @@ static void process_work(struct work_struct *work)
+       unsigned int opcode;
+       int ret;
++      process_timedout_eps();
+       while ((skb = skb_dequeue(&rxq))) {
+               rpl = cplhdr(skb);
+               dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
+@@ -3508,8 +3526,8 @@ static void process_work(struct work_struct *work)
+               ret = work_handlers[opcode](dev, skb);
+               if (!ret)
+                       kfree_skb(skb);
++              process_timedout_eps();
+       }
+-      process_timedout_eps();
+ }
+ static DECLARE_WORK(skb_work, process_work);
+@@ -3521,8 +3539,13 @@ static void ep_timeout(unsigned long arg)
+       spin_lock(&timeout_lock);
+       if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
+-              list_add_tail(&ep->entry, &timeout_list);
+-              kickit = 1;
++              /*
++               * Only insert if it is not already on the list.
++               */
++              if (!ep->entry.next) {
++                      list_add_tail(&ep->entry, &timeout_list);
++                      kickit = 1;
++              }
+       }
+       spin_unlock(&timeout_lock);
+       if (kickit)
diff --git a/linux-next-pending/0050-iw_cxgb4--rmb()-after-reading-valid-gen-bit.patch b/linux-next-pending/0050-iw_cxgb4--rmb()-after-reading-valid-gen-bit.patch
new file mode 100644 (file)
index 0000000..eca0ca7
--- /dev/null
@@ -0,0 +1,26 @@
+commit 6dc9592394753f4bd1bbe8bfed01feba0202c594
+Author: Steve Wise <swise@opengridcomputing.com>
+Date:   Wed Apr 9 09:38:26 2014 -0500
+
+    iw_cxgb4: rmb() after reading valid gen bit
+    
+    Some HW platforms can reorder read operations, so we must rmb() after
+    we see a valid gen bit in a CQE but before we read any other fields from
+    the CQE.
+    
+    Signed-off-by: Steve Wise <swise@opengridcomputing.com>
+
+diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
+index 931bfd1..1f329fa 100644
+--- a/drivers/infiniband/hw/cxgb4/t4.h
++++ b/drivers/infiniband/hw/cxgb4/t4.h
+@@ -620,6 +620,9 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
+               printk(KERN_ERR MOD "cq overflow cqid %u\n", cq->cqid);
+               BUG_ON(1);
+       } else if (t4_valid_cqe(cq, &cq->queue[cq->cidx])) {
++
++              /* Ensure CQE is flushed to memory */
++              rmb();
+               *cqe = &cq->queue[cq->cidx];
+               ret = 0;
+       } else
diff --git a/linux-next-pending/0051-iw_cxgb4--SQ-flush-fix.patch b/linux-next-pending/0051-iw_cxgb4--SQ-flush-fix.patch
new file mode 100644 (file)
index 0000000..5eb6e80
--- /dev/null
@@ -0,0 +1,114 @@
+commit f8ac55a8417e89b32cb44d8e8598fe0e2ccbfdbc
+Author: Steve Wise <swise@opengridcomputing.com>
+Date:   Wed Apr 9 09:38:26 2014 -0500
+
+    iw_cxgb4: SQ flush fix
+    
+    There is a race when moving a QP from RTS->CLOSING where a SQ work
+    request could be posted after the FW receives the RDMA_RI/FINI WR.
+    The SQ work request will never get processed, and should be completed
+    with FLUSHED status.  Function c4iw_flush_sq(), however was dropping
+    the oldest SQ work request when in CLOSING or IDLE states, instead of
+    completing the pending work request. If that oldest pending work request
+    was actually complete and has a CQE in the CQ, then when that CQE is
+    proceessed in poll_cq, we'll BUG_ON() due to the inconsistent SQ/CQ state.
+    
+    This is a very small timing hole and has only been hit once so far.
+    
+    The fix is two-fold:
+    
+    1) c4iw_flush_sq() MUST always flush all non-completed WRs with FLUSHED
+    status regardless of the QP state.
+    
+    2) In c4iw_modify_rc_qp(), always set the "in error" bit on the queue
+    before moving the state out of RTS.  This ensures that the state
+    transition will not happen while another thread is in post_rc_send(),
+    because set_state() and post_rc_send() both aquire the qp spinlock.
+    Also, once we transition the state out of RTS, subsequent calls to
+    post_rc_send() will fail because the "in error" bit is set.  I don't
+    think this fully closes the race where the FW can get a FINI followed a
+    SQ work request being posted (because they are posted to differente EQs),
+    but the #1 fix will handle the issue by flushing the SQ work request.
+    
+    Signed-off-by: Steve Wise <swise@opengridcomputing.com>
+
+diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
+index ce468e5..e17b155 100644
+--- a/drivers/infiniband/hw/cxgb4/cq.c
++++ b/drivers/infiniband/hw/cxgb4/cq.c
+@@ -235,27 +235,21 @@ int c4iw_flush_sq(struct c4iw_qp *qhp)
+       struct t4_cq *cq = &chp->cq;
+       int idx;
+       struct t4_swsqe *swsqe;
+-      int error = (qhp->attr.state != C4IW_QP_STATE_CLOSING &&
+-                      qhp->attr.state != C4IW_QP_STATE_IDLE);
+       if (wq->sq.flush_cidx == -1)
+               wq->sq.flush_cidx = wq->sq.cidx;
+       idx = wq->sq.flush_cidx;
+       BUG_ON(idx >= wq->sq.size);
+       while (idx != wq->sq.pidx) {
+-              if (error) {
+-                      swsqe = &wq->sq.sw_sq[idx];
+-                      BUG_ON(swsqe->flushed);
+-                      swsqe->flushed = 1;
+-                      insert_sq_cqe(wq, cq, swsqe);
+-                      if (wq->sq.oldest_read == swsqe) {
+-                              BUG_ON(swsqe->opcode != FW_RI_READ_REQ);
+-                              advance_oldest_read(wq);
+-                      }
+-                      flushed++;
+-              } else {
+-                      t4_sq_consume(wq);
++              swsqe = &wq->sq.sw_sq[idx];
++              BUG_ON(swsqe->flushed);
++              swsqe->flushed = 1;
++              insert_sq_cqe(wq, cq, swsqe);
++              if (wq->sq.oldest_read == swsqe) {
++                      BUG_ON(swsqe->opcode != FW_RI_READ_REQ);
++                      advance_oldest_read(wq);
+               }
++              flushed++;
+               if (++idx == wq->sq.size)
+                       idx = 0;
+       }
+diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
+index 244b2ef..ed7751c 100644
+--- a/drivers/infiniband/hw/cxgb4/qp.c
++++ b/drivers/infiniband/hw/cxgb4/qp.c
+@@ -1367,6 +1367,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
+               switch (attrs->next_state) {
+               case C4IW_QP_STATE_CLOSING:
+                       BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2);
++                      t4_set_wq_in_error(&qhp->wq);
+                       set_state(qhp, C4IW_QP_STATE_CLOSING);
+                       ep = qhp->ep;
+                       if (!internal) {
+@@ -1374,16 +1375,15 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
+                               disconnect = 1;
+                               c4iw_get_ep(&qhp->ep->com);
+                       }
+-                      t4_set_wq_in_error(&qhp->wq);
+                       ret = rdma_fini(rhp, qhp, ep);
+                       if (ret)
+                               goto err;
+                       break;
+               case C4IW_QP_STATE_TERMINATE:
++                      t4_set_wq_in_error(&qhp->wq);
+                       set_state(qhp, C4IW_QP_STATE_TERMINATE);
+                       qhp->attr.layer_etype = attrs->layer_etype;
+                       qhp->attr.ecode = attrs->ecode;
+-                      t4_set_wq_in_error(&qhp->wq);
+                       ep = qhp->ep;
+                       disconnect = 1;
+                       if (!internal)
+@@ -1396,8 +1396,8 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
+                       c4iw_get_ep(&qhp->ep->com);
+                       break;
+               case C4IW_QP_STATE_ERROR:
+-                      set_state(qhp, C4IW_QP_STATE_ERROR);
+                       t4_set_wq_in_error(&qhp->wq);
++                      set_state(qhp, C4IW_QP_STATE_ERROR);
+                       if (!internal) {
+                               abort = 1;
+                               disconnect = 1;
diff --git a/linux-next-pending/0052-iw_cxgb4--Max-fastreg-depth-depends-on-DSGL-support.patch b/linux-next-pending/0052-iw_cxgb4--Max-fastreg-depth-depends-on-DSGL-support.patch
new file mode 100644 (file)
index 0000000..82970d6
--- /dev/null
@@ -0,0 +1,59 @@
+commit a250cb1b779d43f54e5e729bcdc7859e59580a4a
+Author: Steve Wise <swise@opengridcomputing.com>
+Date:   Wed Apr 9 09:38:27 2014 -0500
+
+    iw_cxgb4: Max fastreg depth depends on DSGL support
+    
+    The max depth of a fastreg mr depends on whether the device supports
+    DSGL or not.  So compute it dynamically based on the device support and
+    the module use_dsgl option.
+    
+    Signed-off-by: Steve Wise <swise@opengridcomputing.com>
+
+diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
+index 7942925..a94a3e1 100644
+--- a/drivers/infiniband/hw/cxgb4/provider.c
++++ b/drivers/infiniband/hw/cxgb4/provider.c
+@@ -328,7 +328,7 @@ static int c4iw_query_device(struct ib_device *ibdev,
+       props->max_mr = c4iw_num_stags(&dev->rdev);
+       props->max_pd = T4_MAX_NUM_PD;
+       props->local_ca_ack_delay = 0;
+-      props->max_fast_reg_page_list_len = T4_MAX_FR_DEPTH;
++      props->max_fast_reg_page_list_len = t4_max_fr_depth(use_dsgl);
+       return 0;
+ }
+diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
+index ed7751c..9250127 100644
+--- a/drivers/infiniband/hw/cxgb4/qp.c
++++ b/drivers/infiniband/hw/cxgb4/qp.c
+@@ -566,7 +566,8 @@ static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe,
+       int pbllen = roundup(wr->wr.fast_reg.page_list_len * sizeof(u64), 32);
+       int rem;
+-      if (wr->wr.fast_reg.page_list_len > T4_MAX_FR_DEPTH)
++      if (wr->wr.fast_reg.page_list_len >
++          t4_max_fr_depth(use_dsgl))
+               return -EINVAL;
+       wqe->fr.qpbinde_to_dcacpu = 0;
+diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
+index 1f329fa..2178f31 100644
+--- a/drivers/infiniband/hw/cxgb4/t4.h
++++ b/drivers/infiniband/hw/cxgb4/t4.h
+@@ -84,7 +84,14 @@ struct t4_status_page {
+                       sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge))
+ #define T4_MAX_FR_IMMD ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_fr_nsmr_wr) - \
+                       sizeof(struct fw_ri_immd)) & ~31UL)
+-#define T4_MAX_FR_DEPTH (1024 / sizeof(u64))
++#define T4_MAX_FR_IMMD_DEPTH (T4_MAX_FR_IMMD / sizeof(u64))
++#define T4_MAX_FR_DSGL 1024
++#define T4_MAX_FR_DSGL_DEPTH (T4_MAX_FR_DSGL / sizeof(u64))
++
++static inline int t4_max_fr_depth(int use_dsgl)
++{
++      return use_dsgl ? T4_MAX_FR_DSGL_DEPTH : T4_MAX_FR_IMMD_DEPTH;
++}
+ #define T4_RQ_NUM_SLOTS 2
+ #define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS)
diff --git a/linux-next-pending/0053-iw_cxgb4--Use-pr_warn_ratelimited.patch b/linux-next-pending/0053-iw_cxgb4--Use-pr_warn_ratelimited.patch
new file mode 100644 (file)
index 0000000..663ed46
--- /dev/null
@@ -0,0 +1,23 @@
+commit 3100a13f0ddfa805fa6596b9efc3d93037025ef1
+Author: Hariprasad Shenai <hariprasad@chelsio.com>
+Date:   Wed Apr 9 09:38:27 2014 -0500
+
+    iw_cxgb4: Use pr_warn_ratelimited
+    
+    Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com>
+
+diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c
+index cdef4d7..94b5fd9 100644
+--- a/drivers/infiniband/hw/cxgb4/resource.c
++++ b/drivers/infiniband/hw/cxgb4/resource.c
+@@ -322,8 +322,8 @@ u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size)
+       unsigned long addr = gen_pool_alloc(rdev->rqt_pool, size << 6);
+       PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6);
+       if (!addr)
+-              printk_ratelimited(KERN_WARNING MOD "%s: Out of RQT memory\n",
+-                     pci_name(rdev->lldi.pdev));
++              pr_warn_ratelimited(MOD "%s: Out of RQT memory\n",
++                                  pci_name(rdev->lldi.pdev));
+       mutex_lock(&rdev->stats.lock);
+       if (addr) {
+               rdev->stats.rqt.cur += roundup(size << 6, 1 << MIN_RQT_SHIFT);
diff --git a/linux-next-pending/0054-iw_cxgb4--Initialize-reserved-fields-in-a-FW-work-request.patch b/linux-next-pending/0054-iw_cxgb4--Initialize-reserved-fields-in-a-FW-work-request.patch
new file mode 100644 (file)
index 0000000..f8ad379
--- /dev/null
@@ -0,0 +1,21 @@
+commit d9d8593b9727371cf3cd0221b51a2563d58286c6
+Author: Steve Wise <swise@opengridcomputing.com>
+Date:   Wed Apr 9 09:38:27 2014 -0500
+
+    iw_cxgb4: Initialize reserved fields in a FW work request
+    
+    Signed-off-by: Steve Wise <swise@opengridcomputing.com>
+
+diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
+index 9250127..8e04ecf 100644
+--- a/drivers/infiniband/hw/cxgb4/qp.c
++++ b/drivers/infiniband/hw/cxgb4/qp.c
+@@ -436,6 +436,8 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
+       default:
+               return -EINVAL;
+       }
++      wqe->send.r3 = 0;
++      wqe->send.r4 = 0;
+       plen = 0;
+       if (wr->num_sge) {
diff --git a/linux-next-pending/0055-iw_cxgb4--Add-missing-debug-stats.patch b/linux-next-pending/0055-iw_cxgb4--Add-missing-debug-stats.patch
new file mode 100644 (file)
index 0000000..c694ad1
--- /dev/null
@@ -0,0 +1,44 @@
+commit 40bf01fc14ba86833a0134c5f02795298aeb817f
+Author: Steve Wise <swise@opengridcomputing.com>
+Date:   Wed Apr 9 09:38:28 2014 -0500
+
+    iw_cxgb4: Add missing debug stats
+    
+    Signed-off-by: Steve Wise <swise@opengridcomputing.com>
+
+diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
+index f9ca072..ec7a298 100644
+--- a/drivers/infiniband/hw/cxgb4/mem.c
++++ b/drivers/infiniband/hw/cxgb4/mem.c
+@@ -259,8 +259,12 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
+       if ((!reset_tpt_entry) && (*stag == T4_STAG_UNSET)) {
+               stag_idx = c4iw_get_resource(&rdev->resource.tpt_table);
+-              if (!stag_idx)
++              if (!stag_idx) {
++                      mutex_lock(&rdev->stats.lock);
++                      rdev->stats.stag.fail++;
++                      mutex_unlock(&rdev->stats.lock);
+                       return -ENOMEM;
++              }
+               mutex_lock(&rdev->stats.lock);
+               rdev->stats.stag.cur += 32;
+               if (rdev->stats.stag.cur > rdev->stats.stag.max)
+diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c
+index 94b5fd9..67df71a 100644
+--- a/drivers/infiniband/hw/cxgb4/resource.c
++++ b/drivers/infiniband/hw/cxgb4/resource.c
+@@ -179,8 +179,12 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
+               kfree(entry);
+       } else {
+               qid = c4iw_get_resource(&rdev->resource.qid_table);
+-              if (!qid)
++              if (!qid) {
++                      mutex_lock(&rdev->stats.lock);
++                      rdev->stats.qid.fail++;
++                      mutex_unlock(&rdev->stats.lock);
+                       goto out;
++              }
+               mutex_lock(&rdev->stats.lock);
+               rdev->stats.qid.cur += rdev->qpmask + 1;
+               mutex_unlock(&rdev->stats.lock);
diff --git a/linux-next-pending/0056-iw_cxgb4--Use-uninitialized_var().patch b/linux-next-pending/0056-iw_cxgb4--Use-uninitialized_var().patch
new file mode 100644 (file)
index 0000000..b4e58d4
--- /dev/null
@@ -0,0 +1,21 @@
+commit ce77b41daa0acdb7258b0462a48399c9924d6226
+Author: Steve Wise <swise@opengridcomputing.com>
+Date:   Wed Apr 9 09:38:28 2014 -0500
+
+    iw_cxgb4: Use uninitialized_var()
+    
+    Signed-off-by: Steve Wise <swise@opengridcomputing.com>
+
+diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
+index e17b155..cfaa56a 100644
+--- a/drivers/infiniband/hw/cxgb4/cq.c
++++ b/drivers/infiniband/hw/cxgb4/cq.c
+@@ -672,7 +672,7 @@ skip_cqe:
+ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
+ {
+       struct c4iw_qp *qhp = NULL;
+-      struct t4_cqe cqe = {0, 0}, *rd_cqe;
++      struct t4_cqe uninitialized_var(cqe), *rd_cqe;
+       struct t4_wq *wq;
+       u32 credit = 0;
+       u8 cqe_flushed;
diff --git a/linux-next-pending/0057-iw_cxgb4--fix-over-dereference-when-terminating.patch b/linux-next-pending/0057-iw_cxgb4--fix-over-dereference-when-terminating.patch
new file mode 100644 (file)
index 0000000..de39ee4
--- /dev/null
@@ -0,0 +1,32 @@
+commit 6f22a5596f9e0899bdb2d021377b969910492dc9
+Author: Steve Wise <swise@opengridcomputing.com>
+Date:   Wed Apr 9 09:40:37 2014 -0500
+
+    iw_cxgb4: fix over-dereference when terminating
+    
+    Need to get the endpoint reference before calling
+    rdma_fini(), which might fail causing us to not
+    get the reference.
+    
+    Signed-off-by: Steve Wise <swise@opengridcomputing.com>
+
+diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
+index 8e04ecf..a558aa7 100644
+--- a/drivers/infiniband/hw/cxgb4/qp.c
++++ b/drivers/infiniband/hw/cxgb4/qp.c
+@@ -1389,6 +1389,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
+                       qhp->attr.ecode = attrs->ecode;
+                       ep = qhp->ep;
+                       disconnect = 1;
++                      c4iw_get_ep(&qhp->ep->com);
+                       if (!internal)
+                               terminate = 1;
+                       else {
+@@ -1396,7 +1397,6 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
+                               if (ret)
+                                       goto err;
+                       }
+-                      c4iw_get_ep(&qhp->ep->com);
+                       break;
+               case C4IW_QP_STATE_ERROR:
+                       t4_set_wq_in_error(&qhp->wq);