From: Steve Wise Date: Tue, 29 Apr 2014 01:56:43 +0000 (-0500) Subject: More iw_cxgb4 fixes X-Git-Tag: vofed-3.12~2 X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=77f67fc1c8b8edcc9755c4051e9b86b07ef9ab2f;p=~emulex%2Fcompat-rdma_3.12.git More iw_cxgb4 fixes Pull in these fixes: c2f9da9 RDMA/cxgb4: Only allow kernel db ringing for T4 devs 92e5011 RDMA/cxgb4: Force T5 connections to use TAHOE congestion control cc18b93 RDMA/cxgb4: Fix endpoint mutex deadlocks Signed-off-by: Steve Wise --- diff --git a/linux-next-pending/0060-RDMA-cxgb4--Fix-endpoint-mutex-deadlocks.patch b/linux-next-pending/0060-RDMA-cxgb4--Fix-endpoint-mutex-deadlocks.patch new file mode 100644 index 0000000..b4f43f0 --- /dev/null +++ b/linux-next-pending/0060-RDMA-cxgb4--Fix-endpoint-mutex-deadlocks.patch @@ -0,0 +1,206 @@ +commit cc18b939e1efbc2a47f62dbd2b1df53d974df6b7 +Author: Steve Wise +Date: Thu Apr 24 14:31:53 2014 -0500 + + RDMA/cxgb4: Fix endpoint mutex deadlocks + + In cases where the cm calls c4iw_modify_rc_qp() with the endpoint + mutex held, they must be called with internal == 1. rx_data() and + process_mpa_reply() are not doing this. This causes a deadlock + because c4iw_modify_rc_qp() might call c4iw_ep_disconnect() in some + !internal cases, and c4iw_ep_disconnect() acquires the endpoint mutex. + The design was intended to only do the disconnect for !internal calls. + + Change rx_data(), FPDU_MODE case, to call c4iw_modify_rc_qp() with + internal == 1, and then disconnect only after releasing the mutex. + + Change process_mpa_reply() to call c4iw_modify_rc_qp(TERMINATE) with + internal == 1 and set a new attr flag telling it to send a TERMINATE + message. Previously this was implied by !internal. + + Change process_mpa_reply() to return whether the caller should + disconnect after releasing the endpoint mutex. Now rx_data() will do + the disconnect in the cases where process_mpa_reply() wants to + disconnect after the TERMINATE is sent. + + Change c4iw_modify_rc_qp() RTS->TERM to only disconnect if !internal, + and to send a TERMINATE message if attrs->send_term is 1. + + Change abort_connection() to not aquire the ep mutex for setting the + state, and make all calls to abort_connection() do so with the mutex + held. + + Signed-off-by: Steve Wise + Signed-off-by: Roland Dreier + +diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c +index 185452a..f9b04bc 100644 +--- a/drivers/infiniband/hw/cxgb4/cm.c ++++ b/drivers/infiniband/hw/cxgb4/cm.c +@@ -996,7 +996,7 @@ static void close_complete_upcall(struct c4iw_ep *ep, int status) + static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) + { + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); +- state_set(&ep->com, ABORTING); ++ __state_set(&ep->com, ABORTING); + set_bit(ABORT_CONN, &ep->com.history); + return send_abort(ep, skb, gfp); + } +@@ -1154,7 +1154,7 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits) + return credits; + } + +-static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) ++static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) + { + struct mpa_message *mpa; + struct mpa_v2_conn_params *mpa_v2_params; +@@ -1164,6 +1164,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) + struct c4iw_qp_attributes attrs; + enum c4iw_qp_attr_mask mask; + int err; ++ int disconnect = 0; + + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + +@@ -1173,7 +1174,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) + * will abort the connection. + */ + if (stop_ep_timer(ep)) +- return; ++ return 0; + + /* + * If we get more than the supported amount of private data +@@ -1195,7 +1196,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) + * if we don't even have the mpa message, then bail. + */ + if (ep->mpa_pkt_len < sizeof(*mpa)) +- return; ++ return 0; + mpa = (struct mpa_message *) ep->mpa_pkt; + + /* Validate MPA header. */ +@@ -1235,7 +1236,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) + * We'll continue process when more data arrives. + */ + if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) +- return; ++ return 0; + + if (mpa->flags & MPA_REJECT) { + err = -ECONNREFUSED; +@@ -1337,9 +1338,11 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) + attrs.layer_etype = LAYER_MPA | DDP_LLP; + attrs.ecode = MPA_NOMATCH_RTR; + attrs.next_state = C4IW_QP_STATE_TERMINATE; ++ attrs.send_term = 1; + err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, +- C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); ++ C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + err = -ENOMEM; ++ disconnect = 1; + goto out; + } + +@@ -1355,9 +1358,11 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) + attrs.layer_etype = LAYER_MPA | DDP_LLP; + attrs.ecode = MPA_INSUFF_IRD; + attrs.next_state = C4IW_QP_STATE_TERMINATE; ++ attrs.send_term = 1; + err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, +- C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); ++ C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + err = -ENOMEM; ++ disconnect = 1; + goto out; + } + goto out; +@@ -1366,7 +1371,7 @@ err: + send_abort(ep, skb, GFP_KERNEL); + out: + connect_reply_upcall(ep, err); +- return; ++ return disconnect; + } + + static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) +@@ -1524,6 +1529,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) + unsigned int tid = GET_TID(hdr); + struct tid_info *t = dev->rdev.lldi.tids; + __u8 status = hdr->status; ++ int disconnect = 0; + + ep = lookup_tid(t, tid); + if (!ep) +@@ -1539,7 +1545,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) + switch (ep->com.state) { + case MPA_REQ_SENT: + ep->rcv_seq += dlen; +- process_mpa_reply(ep, skb); ++ disconnect = process_mpa_reply(ep, skb); + break; + case MPA_REQ_WAIT: + ep->rcv_seq += dlen; +@@ -1555,13 +1561,16 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) + ep->com.state, ep->hwtid, status); + attrs.next_state = C4IW_QP_STATE_TERMINATE; + c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, +- C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); ++ C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); ++ disconnect = 1; + break; + } + default: + break; + } + mutex_unlock(&ep->com.mutex); ++ if (disconnect) ++ c4iw_ep_disconnect(ep, 0, GFP_KERNEL); + return 0; + } + +@@ -3482,9 +3491,9 @@ static void process_timeout(struct c4iw_ep *ep) + __func__, ep, ep->hwtid, ep->com.state); + abort = 0; + } +- mutex_unlock(&ep->com.mutex); + if (abort) + abort_connection(ep, NULL, GFP_KERNEL); ++ mutex_unlock(&ep->com.mutex); + c4iw_put_ep(&ep->com); + } + +diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +index 7b8c580..7474b49 100644 +--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h ++++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +@@ -435,6 +435,7 @@ struct c4iw_qp_attributes { + u8 ecode; + u16 sq_db_inc; + u16 rq_db_inc; ++ u8 send_term; + }; + + struct c4iw_qp { +diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c +index 7b5114c..f18ef34 100644 +--- a/drivers/infiniband/hw/cxgb4/qp.c ++++ b/drivers/infiniband/hw/cxgb4/qp.c +@@ -1388,11 +1388,12 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, + qhp->attr.layer_etype = attrs->layer_etype; + qhp->attr.ecode = attrs->ecode; + ep = qhp->ep; +- disconnect = 1; +- c4iw_get_ep(&qhp->ep->com); +- if (!internal) ++ if (!internal) { ++ c4iw_get_ep(&qhp->ep->com); + terminate = 1; +- else { ++ disconnect = 1; ++ } else { ++ terminate = qhp->attr.send_term; + ret = rdma_fini(rhp, qhp, ep); + if (ret) + goto err; diff --git a/linux-next-pending/0061-RDMA-cxgb4--Force-T5-connections-to-use-TAHOE-congestion-control.patch b/linux-next-pending/0061-RDMA-cxgb4--Force-T5-connections-to-use-TAHOE-congestion-control.patch new file mode 100644 index 0000000..3081298 --- /dev/null +++ b/linux-next-pending/0061-RDMA-cxgb4--Force-T5-connections-to-use-TAHOE-congestion-control.patch @@ -0,0 +1,60 @@ +commit 92e5011ab0e073ab8fbb726c11529021e5e63973 +Author: Steve Wise +Date: Thu Apr 24 14:31:59 2014 -0500 + + RDMA/cxgb4: Force T5 connections to use TAHOE congestion control + + This is required to work around a T5 HW issue. + + Signed-off-by: Steve Wise + Signed-off-by: Roland Dreier + +diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c +index f9b04bc..1f863a9 100644 +--- a/drivers/infiniband/hw/cxgb4/cm.c ++++ b/drivers/infiniband/hw/cxgb4/cm.c +@@ -587,6 +587,10 @@ static int send_connect(struct c4iw_ep *ep) + opt2 |= SACK_EN(1); + if (wscale && enable_tcp_window_scaling) + opt2 |= WND_SCALE_EN(1); ++ if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { ++ opt2 |= T5_OPT_2_VALID; ++ opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE); ++ } + t4_set_arp_err_handler(skb, NULL, act_open_req_arp_failure); + + if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) { +@@ -2018,6 +2022,10 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, + if (tcph->ece && tcph->cwr) + opt2 |= CCTRL_ECN(1); + } ++ if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { ++ opt2 |= T5_OPT_2_VALID; ++ opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE); ++ } + + rpl = cplhdr(skb); + INIT_TP_WR(rpl, ep->hwtid); +diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h +index dc193c2..6121ca0 100644 +--- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h ++++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h +@@ -836,4 +836,18 @@ struct ulptx_idata { + #define V_RX_DACK_CHANGE(x) ((x) << S_RX_DACK_CHANGE) + #define F_RX_DACK_CHANGE V_RX_DACK_CHANGE(1U) + ++enum { /* TCP congestion control algorithms */ ++ CONG_ALG_RENO, ++ CONG_ALG_TAHOE, ++ CONG_ALG_NEWRENO, ++ CONG_ALG_HIGHSPEED ++}; ++ ++#define S_CONG_CNTRL 14 ++#define M_CONG_CNTRL 0x3 ++#define V_CONG_CNTRL(x) ((x) << S_CONG_CNTRL) ++#define G_CONG_CNTRL(x) (((x) >> S_CONG_CNTRL) & M_CONG_CNTRL) ++ ++#define T5_OPT_2_VALID (1 << 31) ++ + #endif /* _T4FW_RI_API_H_ */ diff --git a/linux-next-pending/0062-RDMA-cxgb4--Only-allow-kernel-db-ringing-for-T4-devs.patch b/linux-next-pending/0062-RDMA-cxgb4--Only-allow-kernel-db-ringing-for-T4-devs.patch new file mode 100644 index 0000000..a7857f8 --- /dev/null +++ b/linux-next-pending/0062-RDMA-cxgb4--Only-allow-kernel-db-ringing-for-T4-devs.patch @@ -0,0 +1,32 @@ +commit c2f9da92f2fd6dbf8f40ef4d5e00db688cc0416a +Author: Steve Wise +Date: Thu Apr 24 14:32:04 2014 -0500 + + RDMA/cxgb4: Only allow kernel db ringing for T4 devs + + The whole db drop avoidance stuff is for T4 only. So we cannot allow + that to be enabled for T5 devices. + + Signed-off-by: Steve Wise + Signed-off-by: Roland Dreier + +diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c +index f18ef34..086f62f 100644 +--- a/drivers/infiniband/hw/cxgb4/qp.c ++++ b/drivers/infiniband/hw/cxgb4/qp.c +@@ -1777,11 +1777,15 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + /* + * Use SQ_PSN and RQ_PSN to pass in IDX_INC values for + * ringing the queue db when we're in DB_FULL mode. ++ * Only allow this on T4 devices. + */ + attrs.sq_db_inc = attr->sq_psn; + attrs.rq_db_inc = attr->rq_psn; + mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0; + mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0; ++ if (is_t5(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) && ++ (mask & (C4IW_QP_ATTR_SQ_DB|C4IW_QP_ATTR_RQ_DB))) ++ return -EINVAL; + + return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0); + }