From 8019591ef73697c5715f55030511016c36a8dc40 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 10 Apr 2014 10:02:51 -0500 Subject: [PATCH] linux-next-pending: pull in more iw_cxgb4 fixes. Pull in these heading into 3.15: Hariprasad Shenai (1): iw_cxgb4: Use pr_warn_ratelimited Steve Wise (9): iw_cxgb4: use the BAR2/WC path for kernel QPs and T5 devices iw_cxgb4: endpoint timeout fixes iw_cxgb4: rmb() after reading valid gen bit iw_cxgb4: SQ flush fix iw_cxgb4: Max fastreg depth depends on DSGL support iw_cxgb4: Initialize reserved fields in a FW work request iw_cxgb4: Add missing debug stats iw_cxgb4: Use uninitialized_var() iw_cxgb4: fix over-dereference when terminating Signed-off-by: Steve Wise --- ...C-path-for-kernel-QPs-and-T5-devices.patch | 305 ++++++++++++++++++ ...049-iw_cxgb4--endpoint-timeout-fixes.patch | 268 +++++++++++++++ ...4--rmb()-after-reading-valid-gen-bit.patch | 26 ++ .../0051-iw_cxgb4--SQ-flush-fix.patch | 114 +++++++ ...astreg-depth-depends-on-DSGL-support.patch | 59 ++++ ...53-iw_cxgb4--Use-pr_warn_ratelimited.patch | 23 ++ ...reserved-fields-in-a-FW-work-request.patch | 21 ++ ...55-iw_cxgb4--Add-missing-debug-stats.patch | 44 +++ ...56-iw_cxgb4--Use-uninitialized_var().patch | 21 ++ ...ix-over-dereference-when-terminating.patch | 32 ++ 10 files changed, 913 insertions(+) create mode 100644 linux-next-pending/0048-iw_cxgb4--use-the-BAR2-WC-path-for-kernel-QPs-and-T5-devices.patch create mode 100644 linux-next-pending/0049-iw_cxgb4--endpoint-timeout-fixes.patch create mode 100644 linux-next-pending/0050-iw_cxgb4--rmb()-after-reading-valid-gen-bit.patch create mode 100644 linux-next-pending/0051-iw_cxgb4--SQ-flush-fix.patch create mode 100644 linux-next-pending/0052-iw_cxgb4--Max-fastreg-depth-depends-on-DSGL-support.patch create mode 100644 linux-next-pending/0053-iw_cxgb4--Use-pr_warn_ratelimited.patch create mode 100644 linux-next-pending/0054-iw_cxgb4--Initialize-reserved-fields-in-a-FW-work-request.patch create mode 100644 linux-next-pending/0055-iw_cxgb4--Add-missing-debug-stats.patch create mode 100644 linux-next-pending/0056-iw_cxgb4--Use-uninitialized_var().patch create mode 100644 linux-next-pending/0057-iw_cxgb4--fix-over-dereference-when-terminating.patch diff --git a/linux-next-pending/0048-iw_cxgb4--use-the-BAR2-WC-path-for-kernel-QPs-and-T5-devices.patch b/linux-next-pending/0048-iw_cxgb4--use-the-BAR2-WC-path-for-kernel-QPs-and-T5-devices.patch new file mode 100644 index 0000000..a4ae3e1 --- /dev/null +++ b/linux-next-pending/0048-iw_cxgb4--use-the-BAR2-WC-path-for-kernel-QPs-and-T5-devices.patch @@ -0,0 +1,305 @@ +commit e41266cc6e372af8353b34de320a4726ebb89e40 +Author: Steve Wise +Date: Wed Apr 9 09:38:25 2014 -0500 + + iw_cxgb4: use the BAR2/WC path for kernel QPs and T5 devices + + Signed-off-by: Steve Wise + +diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c +index 9489a38..f4fa50a 100644 +--- a/drivers/infiniband/hw/cxgb4/device.c ++++ b/drivers/infiniband/hw/cxgb4/device.c +@@ -682,7 +682,10 @@ static void c4iw_dealloc(struct uld_ctx *ctx) + idr_destroy(&ctx->dev->hwtid_idr); + idr_destroy(&ctx->dev->stid_idr); + idr_destroy(&ctx->dev->atid_idr); +- iounmap(ctx->dev->rdev.oc_mw_kva); ++ if (ctx->dev->rdev.bar2_kva) ++ iounmap(ctx->dev->rdev.bar2_kva); ++ if (ctx->dev->rdev.oc_mw_kva) ++ iounmap(ctx->dev->rdev.oc_mw_kva); + ib_dealloc_device(&ctx->dev->ibdev); + ctx->dev = NULL; + } +@@ -722,11 +725,31 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) + } + devp->rdev.lldi = *infop; + +- devp->rdev.oc_mw_pa = pci_resource_start(devp->rdev.lldi.pdev, 2) + +- (pci_resource_len(devp->rdev.lldi.pdev, 2) - +- roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size)); +- devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa, +- devp->rdev.lldi.vr->ocq.size); ++ /* ++ * For T5 devices, we map all of BAR2 with WC. ++ * For T4 devices with onchip qp mem, we map only that part ++ * of BAR2 with WC. ++ */ ++ devp->rdev.bar2_pa = pci_resource_start(devp->rdev.lldi.pdev, 2); ++ if (is_t5(devp->rdev.lldi.adapter_type)) { ++ devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa, ++ pci_resource_len(devp->rdev.lldi.pdev, 2)); ++ if (!devp->rdev.bar2_kva) { ++ pr_err(MOD "Unable to ioremap BAR2\n"); ++ return ERR_PTR(-EINVAL); ++ } ++ } else if (ocqp_supported(infop)) { ++ devp->rdev.oc_mw_pa = ++ pci_resource_start(devp->rdev.lldi.pdev, 2) + ++ pci_resource_len(devp->rdev.lldi.pdev, 2) - ++ roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size); ++ devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa, ++ devp->rdev.lldi.vr->ocq.size); ++ if (!devp->rdev.oc_mw_kva) { ++ pr_err(MOD "Unable to ioremap onchip mem\n"); ++ return ERR_PTR(-EINVAL); ++ } ++ } + + PDBG(KERN_INFO MOD "ocq memory: " + "hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n", +@@ -1003,9 +1026,11 @@ static int enable_qp_db(int id, void *p, void *data) + static void resume_rc_qp(struct c4iw_qp *qp) + { + spin_lock(&qp->lock); +- t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc); ++ t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc, ++ is_t5(qp->rhp->rdev.lldi.adapter_type), NULL); + qp->wq.sq.wq_pidx_inc = 0; +- t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc); ++ t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc, ++ is_t5(qp->rhp->rdev.lldi.adapter_type), NULL); + qp->wq.rq.wq_pidx_inc = 0; + spin_unlock(&qp->lock); + } +diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +index e872203..7b8c580 100644 +--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h ++++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +@@ -149,6 +149,8 @@ struct c4iw_rdev { + struct gen_pool *ocqp_pool; + u32 flags; + struct cxgb4_lld_info lldi; ++ unsigned long bar2_pa; ++ void __iomem *bar2_kva; + unsigned long oc_mw_pa; + void __iomem *oc_mw_kva; + struct c4iw_stats stats; +diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c +index cb76eb5..244b2ef 100644 +--- a/drivers/infiniband/hw/cxgb4/qp.c ++++ b/drivers/infiniband/hw/cxgb4/qp.c +@@ -212,13 +212,23 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, + + wq->db = rdev->lldi.db_reg; + wq->gts = rdev->lldi.gts_reg; +- if (user) { +- wq->sq.udb = (u64)pci_resource_start(rdev->lldi.pdev, 2) + +- (wq->sq.qid << rdev->qpshift); +- wq->sq.udb &= PAGE_MASK; +- wq->rq.udb = (u64)pci_resource_start(rdev->lldi.pdev, 2) + +- (wq->rq.qid << rdev->qpshift); +- wq->rq.udb &= PAGE_MASK; ++ if (user || is_t5(rdev->lldi.adapter_type)) { ++ u32 off; ++ ++ off = (wq->sq.qid << rdev->qpshift) & PAGE_MASK; ++ if (user) { ++ wq->sq.udb = (u64 __iomem *)(rdev->bar2_pa + off); ++ } else { ++ off += 128 * (wq->sq.qid & rdev->qpmask) + 8; ++ wq->sq.udb = (u64 __iomem *)(rdev->bar2_kva + off); ++ } ++ off = (wq->rq.qid << rdev->qpshift) & PAGE_MASK; ++ if (user) { ++ wq->rq.udb = (u64 __iomem *)(rdev->bar2_pa + off); ++ } else { ++ off += 128 * (wq->rq.qid & rdev->qpmask) + 8; ++ wq->rq.udb = (u64 __iomem *)(rdev->bar2_kva + off); ++ } + } + wq->rdev = rdev; + wq->rq.msn = 1; +@@ -301,7 +311,8 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, + + PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%llx rqudb 0x%llx\n", + __func__, wq->sq.qid, wq->rq.qid, wq->db, +- (unsigned long long)wq->sq.udb, (unsigned long long)wq->rq.udb); ++ (__force unsigned long long)wq->sq.udb, ++ (__force unsigned long long)wq->rq.udb); + + return 0; + free_dma: +@@ -650,9 +661,10 @@ static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc) + + spin_lock_irqsave(&qhp->rhp->lock, flags); + spin_lock(&qhp->lock); +- if (qhp->rhp->db_state == NORMAL) { +- t4_ring_sq_db(&qhp->wq, inc); +- } else { ++ if (qhp->rhp->db_state == NORMAL) ++ t4_ring_sq_db(&qhp->wq, inc, ++ is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL); ++ else { + add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); + qhp->wq.sq.wq_pidx_inc += inc; + } +@@ -667,9 +679,10 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc) + + spin_lock_irqsave(&qhp->rhp->lock, flags); + spin_lock(&qhp->lock); +- if (qhp->rhp->db_state == NORMAL) { +- t4_ring_rq_db(&qhp->wq, inc); +- } else { ++ if (qhp->rhp->db_state == NORMAL) ++ t4_ring_rq_db(&qhp->wq, inc, ++ is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL); ++ else { + add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); + qhp->wq.rq.wq_pidx_inc += inc; + } +@@ -686,7 +699,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + enum fw_wr_opcodes fw_opcode = 0; + enum fw_ri_wr_flags fw_flags; + struct c4iw_qp *qhp; +- union t4_wr *wqe; ++ union t4_wr *wqe = NULL; + u32 num_wrs; + struct t4_swsqe *swsqe; + unsigned long flag; +@@ -792,7 +805,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); + } + if (!qhp->rhp->rdev.status_page->db_off) { +- t4_ring_sq_db(&qhp->wq, idx); ++ t4_ring_sq_db(&qhp->wq, idx, ++ is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe); + spin_unlock_irqrestore(&qhp->lock, flag); + } else { + spin_unlock_irqrestore(&qhp->lock, flag); +@@ -806,7 +820,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, + { + int err = 0; + struct c4iw_qp *qhp; +- union t4_recv_wr *wqe; ++ union t4_recv_wr *wqe = NULL; + u32 num_wrs; + u8 len16 = 0; + unsigned long flag; +@@ -858,7 +872,8 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, + num_wrs--; + } + if (!qhp->rhp->rdev.status_page->db_off) { +- t4_ring_rq_db(&qhp->wq, idx); ++ t4_ring_rq_db(&qhp->wq, idx, ++ is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe); + spin_unlock_irqrestore(&qhp->lock, flag); + } else { + spin_unlock_irqrestore(&qhp->lock, flag); +@@ -1677,11 +1692,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, + mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize); + insert_mmap(ucontext, mm2); + mm3->key = uresp.sq_db_gts_key; +- mm3->addr = qhp->wq.sq.udb; ++ mm3->addr = (__force u64)qhp->wq.sq.udb; + mm3->len = PAGE_SIZE; + insert_mmap(ucontext, mm3); + mm4->key = uresp.rq_db_gts_key; +- mm4->addr = qhp->wq.rq.udb; ++ mm4->addr = (__force u64)qhp->wq.rq.udb; + mm4->len = PAGE_SIZE; + insert_mmap(ucontext, mm4); + if (mm5) { +diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h +index eeca8b1..931bfd1 100644 +--- a/drivers/infiniband/hw/cxgb4/t4.h ++++ b/drivers/infiniband/hw/cxgb4/t4.h +@@ -292,7 +292,7 @@ struct t4_sq { + unsigned long phys_addr; + struct t4_swsqe *sw_sq; + struct t4_swsqe *oldest_read; +- u64 udb; ++ u64 __iomem *udb; + size_t memsize; + u32 qid; + u16 in_use; +@@ -314,7 +314,7 @@ struct t4_rq { + dma_addr_t dma_addr; + DEFINE_DMA_UNMAP_ADDR(mapping); + struct t4_swrqe *sw_rq; +- u64 udb; ++ u64 __iomem *udb; + size_t memsize; + u32 qid; + u32 msn; +@@ -435,15 +435,67 @@ static inline u16 t4_sq_wq_size(struct t4_wq *wq) + return wq->sq.size * T4_SQ_NUM_SLOTS; + } + +-static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc) ++/* This function copies 64 byte coalesced work request to memory ++ * mapped BAR2 space. For coalesced WRs, the SGE fetches data ++ * from the FIFO instead of from Host. ++ */ ++static inline void pio_copy(u64 __iomem *dst, u64 *src) ++{ ++ int count = 8; ++ ++ while (count) { ++ writeq(*src, dst); ++ src++; ++ dst++; ++ count--; ++ } ++} ++ ++static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t5, ++ union t4_wr *wqe) + { ++ ++ /* Flush host queue memory writes. */ + wmb(); ++ if (t5) { ++ if (inc == 1 && wqe) { ++ PDBG("%s: WC wq->sq.pidx = %d\n", ++ __func__, wq->sq.pidx); ++ pio_copy(wq->sq.udb + 7, (void *)wqe); ++ } else { ++ PDBG("%s: DB wq->sq.pidx = %d\n", ++ __func__, wq->sq.pidx); ++ writel(PIDX_T5(inc), wq->sq.udb); ++ } ++ ++ /* Flush user doorbell area writes. */ ++ wmb(); ++ return; ++ } + writel(QID(wq->sq.qid) | PIDX(inc), wq->db); + } + +-static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc) ++static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, u8 t5, ++ union t4_recv_wr *wqe) + { ++ ++ /* Flush host queue memory writes. */ + wmb(); ++ if (t5) { ++ if (inc == 1 && wqe) { ++ PDBG("%s: WC wq->rq.pidx = %d\n", ++ __func__, wq->rq.pidx); ++ pio_copy(wq->rq.udb + 7, (void *)wqe); ++ } else { ++ PDBG("%s: DB wq->rq.pidx = %d\n", ++ __func__, wq->rq.pidx); ++ writel(PIDX_T5(inc), wq->rq.udb); ++ } ++ ++ /* Flush user doorbell area writes. */ ++ wmb(); ++ return; ++ } + writel(QID(wq->rq.qid) | PIDX(inc), wq->db); + } + diff --git a/linux-next-pending/0049-iw_cxgb4--endpoint-timeout-fixes.patch b/linux-next-pending/0049-iw_cxgb4--endpoint-timeout-fixes.patch new file mode 100644 index 0000000..89d6ddc --- /dev/null +++ b/linux-next-pending/0049-iw_cxgb4--endpoint-timeout-fixes.patch @@ -0,0 +1,268 @@ +commit 3e1e783998f663db1c956b63f5212da21168990f +Author: Steve Wise +Date: Wed Apr 9 09:38:25 2014 -0500 + + iw_cxgb4: endpoint timeout fixes + + 1) timedout endpoint processing can be starved. If there is continual + CPL messages flowing into the driver, the endpoint timeout processing + can be starved. This condition exposed the other bugs below. + + Solution: In process_work(), call process_timedout_eps() after each CPL + is processed. + + 2) Connection events can be processed even though the endpoint is on + the timeout list. If the endpoint is scheduled for timeout processing, + then we must ignore MPA Start Requests and Replies. + + Solution: Change stop_ep_timer() to return 1 if the ep has already been + queued for timeout processing. All the callers of stop_ep_timer() need + to check this and act accordingly. There are just a few cases where + the caller needs to do something different if stop_ep_timer() returns 1: + + 1) in process_mpa_reply(), ignore the reply and process_timeout() + will abort the connection. + + 2) in process_mpa_request, ignore the request and process_timeout() + will abort the connection. + + It is ok for callers of stop_ep_timer() to abort the connection since + that will leave the state in ABORTING or DEAD, and process_timeout() + now ignores timeouts when the ep is in these states. + + 3) Double insertion on the timeout list. Since the endpoint timers are + used for connection setup and teardown, we need to guard against the + possibility that an endpoint is already on the timeout list. This is + a rare condition and only seen under heavy load and in the presense of + the above 2 bugs. + + Solution: In ep_timeout(), don't queue the endpoint if it is already on + the queue. + + Signed-off-by: Steve Wise + +diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c +index 02436d5..185452a 100644 +--- a/drivers/infiniband/hw/cxgb4/cm.c ++++ b/drivers/infiniband/hw/cxgb4/cm.c +@@ -173,12 +173,15 @@ static void start_ep_timer(struct c4iw_ep *ep) + add_timer(&ep->timer); + } + +-static void stop_ep_timer(struct c4iw_ep *ep) ++static int stop_ep_timer(struct c4iw_ep *ep) + { + PDBG("%s ep %p stopping\n", __func__, ep); + del_timer_sync(&ep->timer); +- if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) ++ if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { + c4iw_put_ep(&ep->com); ++ return 0; ++ } ++ return 1; + } + + static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb, +@@ -1165,12 +1168,11 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + + /* +- * Stop mpa timer. If it expired, then the state has +- * changed and we bail since ep_timeout already aborted +- * the connection. ++ * Stop mpa timer. If it expired, then ++ * we ignore the MPA reply. process_timeout() ++ * will abort the connection. + */ +- stop_ep_timer(ep); +- if (ep->com.state != MPA_REQ_SENT) ++ if (stop_ep_timer(ep)) + return; + + /* +@@ -1375,15 +1377,12 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) + + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + +- if (ep->com.state != MPA_REQ_WAIT) +- return; +- + /* + * If we get more than the supported amount of private data + * then we must fail this connection. + */ + if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { +- stop_ep_timer(ep); ++ (void)stop_ep_timer(ep); + abort_connection(ep, skb, GFP_KERNEL); + return; + } +@@ -1413,13 +1412,13 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) + if (mpa->revision > mpa_rev) { + printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d," + " Received = %d\n", __func__, mpa_rev, mpa->revision); +- stop_ep_timer(ep); ++ (void)stop_ep_timer(ep); + abort_connection(ep, skb, GFP_KERNEL); + return; + } + + if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) { +- stop_ep_timer(ep); ++ (void)stop_ep_timer(ep); + abort_connection(ep, skb, GFP_KERNEL); + return; + } +@@ -1430,7 +1429,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) + * Fail if there's too much private data. + */ + if (plen > MPA_MAX_PRIVATE_DATA) { +- stop_ep_timer(ep); ++ (void)stop_ep_timer(ep); + abort_connection(ep, skb, GFP_KERNEL); + return; + } +@@ -1439,7 +1438,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) + * If plen does not account for pkt size + */ + if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { +- stop_ep_timer(ep); ++ (void)stop_ep_timer(ep); + abort_connection(ep, skb, GFP_KERNEL); + return; + } +@@ -1496,18 +1495,24 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) + ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, + ep->mpa_attr.p2p_type); + +- __state_set(&ep->com, MPA_REQ_RCVD); +- stop_ep_timer(ep); +- +- /* drive upcall */ +- mutex_lock(&ep->parent_ep->com.mutex); +- if (ep->parent_ep->com.state != DEAD) { +- if (connect_request_upcall(ep)) ++ /* ++ * If the endpoint timer already expired, then we ignore ++ * the start request. process_timeout() will abort ++ * the connection. ++ */ ++ if (!stop_ep_timer(ep)) { ++ __state_set(&ep->com, MPA_REQ_RCVD); ++ ++ /* drive upcall */ ++ mutex_lock(&ep->parent_ep->com.mutex); ++ if (ep->parent_ep->com.state != DEAD) { ++ if (connect_request_upcall(ep)) ++ abort_connection(ep, skb, GFP_KERNEL); ++ } else { + abort_connection(ep, skb, GFP_KERNEL); +- } else { +- abort_connection(ep, skb, GFP_KERNEL); ++ } ++ mutex_unlock(&ep->parent_ep->com.mutex); + } +- mutex_unlock(&ep->parent_ep->com.mutex); + return; + } + +@@ -2265,7 +2270,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) + disconnect = 0; + break; + case MORIBUND: +- stop_ep_timer(ep); ++ (void)stop_ep_timer(ep); + if (ep->com.cm_id && ep->com.qp) { + attrs.next_state = C4IW_QP_STATE_IDLE; + c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, +@@ -2325,10 +2330,10 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) + case CONNECTING: + break; + case MPA_REQ_WAIT: +- stop_ep_timer(ep); ++ (void)stop_ep_timer(ep); + break; + case MPA_REQ_SENT: +- stop_ep_timer(ep); ++ (void)stop_ep_timer(ep); + if (mpa_rev == 1 || (mpa_rev == 2 && ep->tried_with_mpa_v1)) + connect_reply_upcall(ep, -ECONNRESET); + else { +@@ -2433,7 +2438,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) + __state_set(&ep->com, MORIBUND); + break; + case MORIBUND: +- stop_ep_timer(ep); ++ (void)stop_ep_timer(ep); + if ((ep->com.cm_id) && (ep->com.qp)) { + attrs.next_state = C4IW_QP_STATE_IDLE; + c4iw_modify_qp(ep->com.qp->rhp, +@@ -3028,7 +3033,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) + if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) { + close = 1; + if (abrupt) { +- stop_ep_timer(ep); ++ (void)stop_ep_timer(ep); + ep->com.state = ABORTING; + } else + ep->com.state = MORIBUND; +@@ -3462,6 +3467,16 @@ static void process_timeout(struct c4iw_ep *ep) + __state_set(&ep->com, ABORTING); + close_complete_upcall(ep, -ETIMEDOUT); + break; ++ case ABORTING: ++ case DEAD: ++ ++ /* ++ * These states are expected if the ep timed out at the same ++ * time as another thread was calling stop_ep_timer(). ++ * So we silently do nothing for these states. ++ */ ++ abort = 0; ++ break; + default: + WARN(1, "%s unexpected state ep %p tid %u state %u\n", + __func__, ep, ep->hwtid, ep->com.state); +@@ -3483,6 +3498,8 @@ static void process_timedout_eps(void) + + tmp = timeout_list.next; + list_del(tmp); ++ tmp->next = NULL; ++ tmp->prev = NULL; + spin_unlock_irq(&timeout_lock); + ep = list_entry(tmp, struct c4iw_ep, entry); + process_timeout(ep); +@@ -3499,6 +3516,7 @@ static void process_work(struct work_struct *work) + unsigned int opcode; + int ret; + ++ process_timedout_eps(); + while ((skb = skb_dequeue(&rxq))) { + rpl = cplhdr(skb); + dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *))); +@@ -3508,8 +3526,8 @@ static void process_work(struct work_struct *work) + ret = work_handlers[opcode](dev, skb); + if (!ret) + kfree_skb(skb); ++ process_timedout_eps(); + } +- process_timedout_eps(); + } + + static DECLARE_WORK(skb_work, process_work); +@@ -3521,8 +3539,13 @@ static void ep_timeout(unsigned long arg) + + spin_lock(&timeout_lock); + if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { +- list_add_tail(&ep->entry, &timeout_list); +- kickit = 1; ++ /* ++ * Only insert if it is not already on the list. ++ */ ++ if (!ep->entry.next) { ++ list_add_tail(&ep->entry, &timeout_list); ++ kickit = 1; ++ } + } + spin_unlock(&timeout_lock); + if (kickit) diff --git a/linux-next-pending/0050-iw_cxgb4--rmb()-after-reading-valid-gen-bit.patch b/linux-next-pending/0050-iw_cxgb4--rmb()-after-reading-valid-gen-bit.patch new file mode 100644 index 0000000..eca0ca7 --- /dev/null +++ b/linux-next-pending/0050-iw_cxgb4--rmb()-after-reading-valid-gen-bit.patch @@ -0,0 +1,26 @@ +commit 6dc9592394753f4bd1bbe8bfed01feba0202c594 +Author: Steve Wise +Date: Wed Apr 9 09:38:26 2014 -0500 + + iw_cxgb4: rmb() after reading valid gen bit + + Some HW platforms can reorder read operations, so we must rmb() after + we see a valid gen bit in a CQE but before we read any other fields from + the CQE. + + Signed-off-by: Steve Wise + +diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h +index 931bfd1..1f329fa 100644 +--- a/drivers/infiniband/hw/cxgb4/t4.h ++++ b/drivers/infiniband/hw/cxgb4/t4.h +@@ -620,6 +620,9 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe) + printk(KERN_ERR MOD "cq overflow cqid %u\n", cq->cqid); + BUG_ON(1); + } else if (t4_valid_cqe(cq, &cq->queue[cq->cidx])) { ++ ++ /* Ensure CQE is flushed to memory */ ++ rmb(); + *cqe = &cq->queue[cq->cidx]; + ret = 0; + } else diff --git a/linux-next-pending/0051-iw_cxgb4--SQ-flush-fix.patch b/linux-next-pending/0051-iw_cxgb4--SQ-flush-fix.patch new file mode 100644 index 0000000..5eb6e80 --- /dev/null +++ b/linux-next-pending/0051-iw_cxgb4--SQ-flush-fix.patch @@ -0,0 +1,114 @@ +commit f8ac55a8417e89b32cb44d8e8598fe0e2ccbfdbc +Author: Steve Wise +Date: Wed Apr 9 09:38:26 2014 -0500 + + iw_cxgb4: SQ flush fix + + There is a race when moving a QP from RTS->CLOSING where a SQ work + request could be posted after the FW receives the RDMA_RI/FINI WR. + The SQ work request will never get processed, and should be completed + with FLUSHED status. Function c4iw_flush_sq(), however was dropping + the oldest SQ work request when in CLOSING or IDLE states, instead of + completing the pending work request. If that oldest pending work request + was actually complete and has a CQE in the CQ, then when that CQE is + proceessed in poll_cq, we'll BUG_ON() due to the inconsistent SQ/CQ state. + + This is a very small timing hole and has only been hit once so far. + + The fix is two-fold: + + 1) c4iw_flush_sq() MUST always flush all non-completed WRs with FLUSHED + status regardless of the QP state. + + 2) In c4iw_modify_rc_qp(), always set the "in error" bit on the queue + before moving the state out of RTS. This ensures that the state + transition will not happen while another thread is in post_rc_send(), + because set_state() and post_rc_send() both aquire the qp spinlock. + Also, once we transition the state out of RTS, subsequent calls to + post_rc_send() will fail because the "in error" bit is set. I don't + think this fully closes the race where the FW can get a FINI followed a + SQ work request being posted (because they are posted to differente EQs), + but the #1 fix will handle the issue by flushing the SQ work request. + + Signed-off-by: Steve Wise + +diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c +index ce468e5..e17b155 100644 +--- a/drivers/infiniband/hw/cxgb4/cq.c ++++ b/drivers/infiniband/hw/cxgb4/cq.c +@@ -235,27 +235,21 @@ int c4iw_flush_sq(struct c4iw_qp *qhp) + struct t4_cq *cq = &chp->cq; + int idx; + struct t4_swsqe *swsqe; +- int error = (qhp->attr.state != C4IW_QP_STATE_CLOSING && +- qhp->attr.state != C4IW_QP_STATE_IDLE); + + if (wq->sq.flush_cidx == -1) + wq->sq.flush_cidx = wq->sq.cidx; + idx = wq->sq.flush_cidx; + BUG_ON(idx >= wq->sq.size); + while (idx != wq->sq.pidx) { +- if (error) { +- swsqe = &wq->sq.sw_sq[idx]; +- BUG_ON(swsqe->flushed); +- swsqe->flushed = 1; +- insert_sq_cqe(wq, cq, swsqe); +- if (wq->sq.oldest_read == swsqe) { +- BUG_ON(swsqe->opcode != FW_RI_READ_REQ); +- advance_oldest_read(wq); +- } +- flushed++; +- } else { +- t4_sq_consume(wq); ++ swsqe = &wq->sq.sw_sq[idx]; ++ BUG_ON(swsqe->flushed); ++ swsqe->flushed = 1; ++ insert_sq_cqe(wq, cq, swsqe); ++ if (wq->sq.oldest_read == swsqe) { ++ BUG_ON(swsqe->opcode != FW_RI_READ_REQ); ++ advance_oldest_read(wq); + } ++ flushed++; + if (++idx == wq->sq.size) + idx = 0; + } +diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c +index 244b2ef..ed7751c 100644 +--- a/drivers/infiniband/hw/cxgb4/qp.c ++++ b/drivers/infiniband/hw/cxgb4/qp.c +@@ -1367,6 +1367,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, + switch (attrs->next_state) { + case C4IW_QP_STATE_CLOSING: + BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2); ++ t4_set_wq_in_error(&qhp->wq); + set_state(qhp, C4IW_QP_STATE_CLOSING); + ep = qhp->ep; + if (!internal) { +@@ -1374,16 +1375,15 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, + disconnect = 1; + c4iw_get_ep(&qhp->ep->com); + } +- t4_set_wq_in_error(&qhp->wq); + ret = rdma_fini(rhp, qhp, ep); + if (ret) + goto err; + break; + case C4IW_QP_STATE_TERMINATE: ++ t4_set_wq_in_error(&qhp->wq); + set_state(qhp, C4IW_QP_STATE_TERMINATE); + qhp->attr.layer_etype = attrs->layer_etype; + qhp->attr.ecode = attrs->ecode; +- t4_set_wq_in_error(&qhp->wq); + ep = qhp->ep; + disconnect = 1; + if (!internal) +@@ -1396,8 +1396,8 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, + c4iw_get_ep(&qhp->ep->com); + break; + case C4IW_QP_STATE_ERROR: +- set_state(qhp, C4IW_QP_STATE_ERROR); + t4_set_wq_in_error(&qhp->wq); ++ set_state(qhp, C4IW_QP_STATE_ERROR); + if (!internal) { + abort = 1; + disconnect = 1; diff --git a/linux-next-pending/0052-iw_cxgb4--Max-fastreg-depth-depends-on-DSGL-support.patch b/linux-next-pending/0052-iw_cxgb4--Max-fastreg-depth-depends-on-DSGL-support.patch new file mode 100644 index 0000000..82970d6 --- /dev/null +++ b/linux-next-pending/0052-iw_cxgb4--Max-fastreg-depth-depends-on-DSGL-support.patch @@ -0,0 +1,59 @@ +commit a250cb1b779d43f54e5e729bcdc7859e59580a4a +Author: Steve Wise +Date: Wed Apr 9 09:38:27 2014 -0500 + + iw_cxgb4: Max fastreg depth depends on DSGL support + + The max depth of a fastreg mr depends on whether the device supports + DSGL or not. So compute it dynamically based on the device support and + the module use_dsgl option. + + Signed-off-by: Steve Wise + +diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c +index 7942925..a94a3e1 100644 +--- a/drivers/infiniband/hw/cxgb4/provider.c ++++ b/drivers/infiniband/hw/cxgb4/provider.c +@@ -328,7 +328,7 @@ static int c4iw_query_device(struct ib_device *ibdev, + props->max_mr = c4iw_num_stags(&dev->rdev); + props->max_pd = T4_MAX_NUM_PD; + props->local_ca_ack_delay = 0; +- props->max_fast_reg_page_list_len = T4_MAX_FR_DEPTH; ++ props->max_fast_reg_page_list_len = t4_max_fr_depth(use_dsgl); + + return 0; + } +diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c +index ed7751c..9250127 100644 +--- a/drivers/infiniband/hw/cxgb4/qp.c ++++ b/drivers/infiniband/hw/cxgb4/qp.c +@@ -566,7 +566,8 @@ static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe, + int pbllen = roundup(wr->wr.fast_reg.page_list_len * sizeof(u64), 32); + int rem; + +- if (wr->wr.fast_reg.page_list_len > T4_MAX_FR_DEPTH) ++ if (wr->wr.fast_reg.page_list_len > ++ t4_max_fr_depth(use_dsgl)) + return -EINVAL; + + wqe->fr.qpbinde_to_dcacpu = 0; +diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h +index 1f329fa..2178f31 100644 +--- a/drivers/infiniband/hw/cxgb4/t4.h ++++ b/drivers/infiniband/hw/cxgb4/t4.h +@@ -84,7 +84,14 @@ struct t4_status_page { + sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge)) + #define T4_MAX_FR_IMMD ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_fr_nsmr_wr) - \ + sizeof(struct fw_ri_immd)) & ~31UL) +-#define T4_MAX_FR_DEPTH (1024 / sizeof(u64)) ++#define T4_MAX_FR_IMMD_DEPTH (T4_MAX_FR_IMMD / sizeof(u64)) ++#define T4_MAX_FR_DSGL 1024 ++#define T4_MAX_FR_DSGL_DEPTH (T4_MAX_FR_DSGL / sizeof(u64)) ++ ++static inline int t4_max_fr_depth(int use_dsgl) ++{ ++ return use_dsgl ? T4_MAX_FR_DSGL_DEPTH : T4_MAX_FR_IMMD_DEPTH; ++} + + #define T4_RQ_NUM_SLOTS 2 + #define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS) diff --git a/linux-next-pending/0053-iw_cxgb4--Use-pr_warn_ratelimited.patch b/linux-next-pending/0053-iw_cxgb4--Use-pr_warn_ratelimited.patch new file mode 100644 index 0000000..663ed46 --- /dev/null +++ b/linux-next-pending/0053-iw_cxgb4--Use-pr_warn_ratelimited.patch @@ -0,0 +1,23 @@ +commit 3100a13f0ddfa805fa6596b9efc3d93037025ef1 +Author: Hariprasad Shenai +Date: Wed Apr 9 09:38:27 2014 -0500 + + iw_cxgb4: Use pr_warn_ratelimited + + Signed-off-by: Hariprasad Shenai + +diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c +index cdef4d7..94b5fd9 100644 +--- a/drivers/infiniband/hw/cxgb4/resource.c ++++ b/drivers/infiniband/hw/cxgb4/resource.c +@@ -322,8 +322,8 @@ u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size) + unsigned long addr = gen_pool_alloc(rdev->rqt_pool, size << 6); + PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6); + if (!addr) +- printk_ratelimited(KERN_WARNING MOD "%s: Out of RQT memory\n", +- pci_name(rdev->lldi.pdev)); ++ pr_warn_ratelimited(MOD "%s: Out of RQT memory\n", ++ pci_name(rdev->lldi.pdev)); + mutex_lock(&rdev->stats.lock); + if (addr) { + rdev->stats.rqt.cur += roundup(size << 6, 1 << MIN_RQT_SHIFT); diff --git a/linux-next-pending/0054-iw_cxgb4--Initialize-reserved-fields-in-a-FW-work-request.patch b/linux-next-pending/0054-iw_cxgb4--Initialize-reserved-fields-in-a-FW-work-request.patch new file mode 100644 index 0000000..f8ad379 --- /dev/null +++ b/linux-next-pending/0054-iw_cxgb4--Initialize-reserved-fields-in-a-FW-work-request.patch @@ -0,0 +1,21 @@ +commit d9d8593b9727371cf3cd0221b51a2563d58286c6 +Author: Steve Wise +Date: Wed Apr 9 09:38:27 2014 -0500 + + iw_cxgb4: Initialize reserved fields in a FW work request + + Signed-off-by: Steve Wise + +diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c +index 9250127..8e04ecf 100644 +--- a/drivers/infiniband/hw/cxgb4/qp.c ++++ b/drivers/infiniband/hw/cxgb4/qp.c +@@ -436,6 +436,8 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe, + default: + return -EINVAL; + } ++ wqe->send.r3 = 0; ++ wqe->send.r4 = 0; + + plen = 0; + if (wr->num_sge) { diff --git a/linux-next-pending/0055-iw_cxgb4--Add-missing-debug-stats.patch b/linux-next-pending/0055-iw_cxgb4--Add-missing-debug-stats.patch new file mode 100644 index 0000000..c694ad1 --- /dev/null +++ b/linux-next-pending/0055-iw_cxgb4--Add-missing-debug-stats.patch @@ -0,0 +1,44 @@ +commit 40bf01fc14ba86833a0134c5f02795298aeb817f +Author: Steve Wise +Date: Wed Apr 9 09:38:28 2014 -0500 + + iw_cxgb4: Add missing debug stats + + Signed-off-by: Steve Wise + +diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c +index f9ca072..ec7a298 100644 +--- a/drivers/infiniband/hw/cxgb4/mem.c ++++ b/drivers/infiniband/hw/cxgb4/mem.c +@@ -259,8 +259,12 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, + + if ((!reset_tpt_entry) && (*stag == T4_STAG_UNSET)) { + stag_idx = c4iw_get_resource(&rdev->resource.tpt_table); +- if (!stag_idx) ++ if (!stag_idx) { ++ mutex_lock(&rdev->stats.lock); ++ rdev->stats.stag.fail++; ++ mutex_unlock(&rdev->stats.lock); + return -ENOMEM; ++ } + mutex_lock(&rdev->stats.lock); + rdev->stats.stag.cur += 32; + if (rdev->stats.stag.cur > rdev->stats.stag.max) +diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c +index 94b5fd9..67df71a 100644 +--- a/drivers/infiniband/hw/cxgb4/resource.c ++++ b/drivers/infiniband/hw/cxgb4/resource.c +@@ -179,8 +179,12 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx) + kfree(entry); + } else { + qid = c4iw_get_resource(&rdev->resource.qid_table); +- if (!qid) ++ if (!qid) { ++ mutex_lock(&rdev->stats.lock); ++ rdev->stats.qid.fail++; ++ mutex_unlock(&rdev->stats.lock); + goto out; ++ } + mutex_lock(&rdev->stats.lock); + rdev->stats.qid.cur += rdev->qpmask + 1; + mutex_unlock(&rdev->stats.lock); diff --git a/linux-next-pending/0056-iw_cxgb4--Use-uninitialized_var().patch b/linux-next-pending/0056-iw_cxgb4--Use-uninitialized_var().patch new file mode 100644 index 0000000..b4e58d4 --- /dev/null +++ b/linux-next-pending/0056-iw_cxgb4--Use-uninitialized_var().patch @@ -0,0 +1,21 @@ +commit ce77b41daa0acdb7258b0462a48399c9924d6226 +Author: Steve Wise +Date: Wed Apr 9 09:38:28 2014 -0500 + + iw_cxgb4: Use uninitialized_var() + + Signed-off-by: Steve Wise + +diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c +index e17b155..cfaa56a 100644 +--- a/drivers/infiniband/hw/cxgb4/cq.c ++++ b/drivers/infiniband/hw/cxgb4/cq.c +@@ -672,7 +672,7 @@ skip_cqe: + static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) + { + struct c4iw_qp *qhp = NULL; +- struct t4_cqe cqe = {0, 0}, *rd_cqe; ++ struct t4_cqe uninitialized_var(cqe), *rd_cqe; + struct t4_wq *wq; + u32 credit = 0; + u8 cqe_flushed; diff --git a/linux-next-pending/0057-iw_cxgb4--fix-over-dereference-when-terminating.patch b/linux-next-pending/0057-iw_cxgb4--fix-over-dereference-when-terminating.patch new file mode 100644 index 0000000..de39ee4 --- /dev/null +++ b/linux-next-pending/0057-iw_cxgb4--fix-over-dereference-when-terminating.patch @@ -0,0 +1,32 @@ +commit 6f22a5596f9e0899bdb2d021377b969910492dc9 +Author: Steve Wise +Date: Wed Apr 9 09:40:37 2014 -0500 + + iw_cxgb4: fix over-dereference when terminating + + Need to get the endpoint reference before calling + rdma_fini(), which might fail causing us to not + get the reference. + + Signed-off-by: Steve Wise + +diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c +index 8e04ecf..a558aa7 100644 +--- a/drivers/infiniband/hw/cxgb4/qp.c ++++ b/drivers/infiniband/hw/cxgb4/qp.c +@@ -1389,6 +1389,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, + qhp->attr.ecode = attrs->ecode; + ep = qhp->ep; + disconnect = 1; ++ c4iw_get_ep(&qhp->ep->com); + if (!internal) + terminate = 1; + else { +@@ -1396,7 +1397,6 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, + if (ret) + goto err; + } +- c4iw_get_ep(&qhp->ep->com); + break; + case C4IW_QP_STATE_ERROR: + t4_set_wq_in_error(&qhp->wq); -- 2.41.0