]> git.openfabrics.org - ~emulex/compat-rdma_3.12.git/commitdiff
More iw_cxgb4 fixes
authorSteve Wise <swise@opengridcomputing.com>
Tue, 29 Apr 2014 01:56:43 +0000 (20:56 -0500)
committerSteve Wise <swise@opengridcomputing.com>
Tue, 29 Apr 2014 01:56:43 +0000 (20:56 -0500)
Pull in these fixes:

c2f9da9 RDMA/cxgb4: Only allow kernel db ringing for T4 devs
92e5011 RDMA/cxgb4: Force T5 connections to use TAHOE congestion control
cc18b93 RDMA/cxgb4: Fix endpoint mutex deadlocks

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
linux-next-pending/0060-RDMA-cxgb4--Fix-endpoint-mutex-deadlocks.patch [new file with mode: 0644]
linux-next-pending/0061-RDMA-cxgb4--Force-T5-connections-to-use-TAHOE-congestion-control.patch [new file with mode: 0644]
linux-next-pending/0062-RDMA-cxgb4--Only-allow-kernel-db-ringing-for-T4-devs.patch [new file with mode: 0644]

diff --git a/linux-next-pending/0060-RDMA-cxgb4--Fix-endpoint-mutex-deadlocks.patch b/linux-next-pending/0060-RDMA-cxgb4--Fix-endpoint-mutex-deadlocks.patch
new file mode 100644 (file)
index 0000000..b4f43f0
--- /dev/null
@@ -0,0 +1,206 @@
+commit cc18b939e1efbc2a47f62dbd2b1df53d974df6b7
+Author: Steve Wise <swise@opengridcomputing.com>
+Date:   Thu Apr 24 14:31:53 2014 -0500
+
+    RDMA/cxgb4: Fix endpoint mutex deadlocks
+    
+    In cases where the cm calls c4iw_modify_rc_qp() with the endpoint
+    mutex held, they must be called with internal == 1.  rx_data() and
+    process_mpa_reply() are not doing this.  This causes a deadlock
+    because c4iw_modify_rc_qp() might call c4iw_ep_disconnect() in some
+    !internal cases, and c4iw_ep_disconnect() acquires the endpoint mutex.
+    The design was intended to only do the disconnect for !internal calls.
+    
+    Change rx_data(), FPDU_MODE case, to call c4iw_modify_rc_qp() with
+    internal == 1, and then disconnect only after releasing the mutex.
+    
+    Change process_mpa_reply() to call c4iw_modify_rc_qp(TERMINATE) with
+    internal == 1 and set a new attr flag telling it to send a TERMINATE
+    message.  Previously this was implied by !internal.
+    
+    Change process_mpa_reply() to return whether the caller should
+    disconnect after releasing the endpoint mutex.  Now rx_data() will do
+    the disconnect in the cases where process_mpa_reply() wants to
+    disconnect after the TERMINATE is sent.
+    
+    Change c4iw_modify_rc_qp() RTS->TERM to only disconnect if !internal,
+    and to send a TERMINATE message if attrs->send_term is 1.
+    
+    Change abort_connection() to not aquire the ep mutex for setting the
+    state, and make all calls to abort_connection() do so with the mutex
+    held.
+    
+    Signed-off-by: Steve Wise <swise@opengridcomputing.com>
+    Signed-off-by: Roland Dreier <roland@purestorage.com>
+
+diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
+index 185452a..f9b04bc 100644
+--- a/drivers/infiniband/hw/cxgb4/cm.c
++++ b/drivers/infiniband/hw/cxgb4/cm.c
+@@ -996,7 +996,7 @@ static void close_complete_upcall(struct c4iw_ep *ep, int status)
+ static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
+ {
+       PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+-      state_set(&ep->com, ABORTING);
++      __state_set(&ep->com, ABORTING);
+       set_bit(ABORT_CONN, &ep->com.history);
+       return send_abort(ep, skb, gfp);
+ }
+@@ -1154,7 +1154,7 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
+       return credits;
+ }
+-static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
++static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
+ {
+       struct mpa_message *mpa;
+       struct mpa_v2_conn_params *mpa_v2_params;
+@@ -1164,6 +1164,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
+       struct c4iw_qp_attributes attrs;
+       enum c4iw_qp_attr_mask mask;
+       int err;
++      int disconnect = 0;
+       PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+@@ -1173,7 +1174,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
+        * will abort the connection.
+        */
+       if (stop_ep_timer(ep))
+-              return;
++              return 0;
+       /*
+        * If we get more than the supported amount of private data
+@@ -1195,7 +1196,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
+        * if we don't even have the mpa message, then bail.
+        */
+       if (ep->mpa_pkt_len < sizeof(*mpa))
+-              return;
++              return 0;
+       mpa = (struct mpa_message *) ep->mpa_pkt;
+       /* Validate MPA header. */
+@@ -1235,7 +1236,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
+        * We'll continue process when more data arrives.
+        */
+       if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
+-              return;
++              return 0;
+       if (mpa->flags & MPA_REJECT) {
+               err = -ECONNREFUSED;
+@@ -1337,9 +1338,11 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
+               attrs.layer_etype = LAYER_MPA | DDP_LLP;
+               attrs.ecode = MPA_NOMATCH_RTR;
+               attrs.next_state = C4IW_QP_STATE_TERMINATE;
++              attrs.send_term = 1;
+               err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
+-                              C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
++                              C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
+               err = -ENOMEM;
++              disconnect = 1;
+               goto out;
+       }
+@@ -1355,9 +1358,11 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
+               attrs.layer_etype = LAYER_MPA | DDP_LLP;
+               attrs.ecode = MPA_INSUFF_IRD;
+               attrs.next_state = C4IW_QP_STATE_TERMINATE;
++              attrs.send_term = 1;
+               err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
+-                              C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
++                              C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
+               err = -ENOMEM;
++              disconnect = 1;
+               goto out;
+       }
+       goto out;
+@@ -1366,7 +1371,7 @@ err:
+       send_abort(ep, skb, GFP_KERNEL);
+ out:
+       connect_reply_upcall(ep, err);
+-      return;
++      return disconnect;
+ }
+ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
+@@ -1524,6 +1529,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
+       unsigned int tid = GET_TID(hdr);
+       struct tid_info *t = dev->rdev.lldi.tids;
+       __u8 status = hdr->status;
++      int disconnect = 0;
+       ep = lookup_tid(t, tid);
+       if (!ep)
+@@ -1539,7 +1545,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
+       switch (ep->com.state) {
+       case MPA_REQ_SENT:
+               ep->rcv_seq += dlen;
+-              process_mpa_reply(ep, skb);
++              disconnect = process_mpa_reply(ep, skb);
+               break;
+       case MPA_REQ_WAIT:
+               ep->rcv_seq += dlen;
+@@ -1555,13 +1561,16 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
+                              ep->com.state, ep->hwtid, status);
+               attrs.next_state = C4IW_QP_STATE_TERMINATE;
+               c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
+-                             C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
++                             C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
++              disconnect = 1;
+               break;
+       }
+       default:
+               break;
+       }
+       mutex_unlock(&ep->com.mutex);
++      if (disconnect)
++              c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
+       return 0;
+ }
+@@ -3482,9 +3491,9 @@ static void process_timeout(struct c4iw_ep *ep)
+                       __func__, ep, ep->hwtid, ep->com.state);
+               abort = 0;
+       }
+-      mutex_unlock(&ep->com.mutex);
+       if (abort)
+               abort_connection(ep, NULL, GFP_KERNEL);
++      mutex_unlock(&ep->com.mutex);
+       c4iw_put_ep(&ep->com);
+ }
+diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+index 7b8c580..7474b49 100644
+--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
++++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+@@ -435,6 +435,7 @@ struct c4iw_qp_attributes {
+       u8 ecode;
+       u16 sq_db_inc;
+       u16 rq_db_inc;
++      u8 send_term;
+ };
+ struct c4iw_qp {
+diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
+index 7b5114c..f18ef34 100644
+--- a/drivers/infiniband/hw/cxgb4/qp.c
++++ b/drivers/infiniband/hw/cxgb4/qp.c
+@@ -1388,11 +1388,12 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
+                       qhp->attr.layer_etype = attrs->layer_etype;
+                       qhp->attr.ecode = attrs->ecode;
+                       ep = qhp->ep;
+-                      disconnect = 1;
+-                      c4iw_get_ep(&qhp->ep->com);
+-                      if (!internal)
++                      if (!internal) {
++                              c4iw_get_ep(&qhp->ep->com);
+                               terminate = 1;
+-                      else {
++                              disconnect = 1;
++                      } else {
++                              terminate = qhp->attr.send_term;
+                               ret = rdma_fini(rhp, qhp, ep);
+                               if (ret)
+                                       goto err;
diff --git a/linux-next-pending/0061-RDMA-cxgb4--Force-T5-connections-to-use-TAHOE-congestion-control.patch b/linux-next-pending/0061-RDMA-cxgb4--Force-T5-connections-to-use-TAHOE-congestion-control.patch
new file mode 100644 (file)
index 0000000..3081298
--- /dev/null
@@ -0,0 +1,60 @@
+commit 92e5011ab0e073ab8fbb726c11529021e5e63973
+Author: Steve Wise <swise@opengridcomputing.com>
+Date:   Thu Apr 24 14:31:59 2014 -0500
+
+    RDMA/cxgb4: Force T5 connections to use TAHOE congestion control
+    
+    This is required to work around a T5 HW issue.
+    
+    Signed-off-by: Steve Wise <swise@opengridcomputing.com>
+    Signed-off-by: Roland Dreier <roland@purestorage.com>
+
+diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
+index f9b04bc..1f863a9 100644
+--- a/drivers/infiniband/hw/cxgb4/cm.c
++++ b/drivers/infiniband/hw/cxgb4/cm.c
+@@ -587,6 +587,10 @@ static int send_connect(struct c4iw_ep *ep)
+               opt2 |= SACK_EN(1);
+       if (wscale && enable_tcp_window_scaling)
+               opt2 |= WND_SCALE_EN(1);
++      if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
++              opt2 |= T5_OPT_2_VALID;
++              opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE);
++      }
+       t4_set_arp_err_handler(skb, NULL, act_open_req_arp_failure);
+       if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
+@@ -2018,6 +2022,10 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
+               if (tcph->ece && tcph->cwr)
+                       opt2 |= CCTRL_ECN(1);
+       }
++      if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
++              opt2 |= T5_OPT_2_VALID;
++              opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE);
++      }
+       rpl = cplhdr(skb);
+       INIT_TP_WR(rpl, ep->hwtid);
+diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
+index dc193c2..6121ca0 100644
+--- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
++++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
+@@ -836,4 +836,18 @@ struct ulptx_idata {
+ #define V_RX_DACK_CHANGE(x) ((x) << S_RX_DACK_CHANGE)
+ #define F_RX_DACK_CHANGE    V_RX_DACK_CHANGE(1U)
++enum {                     /* TCP congestion control algorithms */
++      CONG_ALG_RENO,
++      CONG_ALG_TAHOE,
++      CONG_ALG_NEWRENO,
++      CONG_ALG_HIGHSPEED
++};
++
++#define S_CONG_CNTRL    14
++#define M_CONG_CNTRL    0x3
++#define V_CONG_CNTRL(x) ((x) << S_CONG_CNTRL)
++#define G_CONG_CNTRL(x) (((x) >> S_CONG_CNTRL) & M_CONG_CNTRL)
++
++#define T5_OPT_2_VALID       (1 << 31)
++
+ #endif /* _T4FW_RI_API_H_ */
diff --git a/linux-next-pending/0062-RDMA-cxgb4--Only-allow-kernel-db-ringing-for-T4-devs.patch b/linux-next-pending/0062-RDMA-cxgb4--Only-allow-kernel-db-ringing-for-T4-devs.patch
new file mode 100644 (file)
index 0000000..a7857f8
--- /dev/null
@@ -0,0 +1,32 @@
+commit c2f9da92f2fd6dbf8f40ef4d5e00db688cc0416a
+Author: Steve Wise <swise@opengridcomputing.com>
+Date:   Thu Apr 24 14:32:04 2014 -0500
+
+    RDMA/cxgb4: Only allow kernel db ringing for T4 devs
+    
+    The whole db drop avoidance stuff is for T4 only.  So we cannot allow
+    that to be enabled for T5 devices.
+    
+    Signed-off-by: Steve Wise <swise@opengridcomputing.com>
+    Signed-off-by: Roland Dreier <roland@purestorage.com>
+
+diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
+index f18ef34..086f62f 100644
+--- a/drivers/infiniband/hw/cxgb4/qp.c
++++ b/drivers/infiniband/hw/cxgb4/qp.c
+@@ -1777,11 +1777,15 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+       /*
+        * Use SQ_PSN and RQ_PSN to pass in IDX_INC values for
+        * ringing the queue db when we're in DB_FULL mode.
++       * Only allow this on T4 devices.
+        */
+       attrs.sq_db_inc = attr->sq_psn;
+       attrs.rq_db_inc = attr->rq_psn;
+       mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0;
+       mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0;
++      if (is_t5(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) &&
++          (mask & (C4IW_QP_ATTR_SQ_DB|C4IW_QP_ATTR_RQ_DB)))
++              return -EINVAL;
+       return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0);
+ }