From: Selvin Xavier Date: Fri, 28 Sep 2018 04:33:28 +0000 (-0700) Subject: bnxt_re: Fix system crash during RDMA resource initialization X-Git-Tag: vofed-4.17-rc1^0 X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=78b04f2aa3d21e59ba97f96c7f805392591c1548;p=compat-rdma%2Fcompat-rdma.git bnxt_re: Fix system crash during RDMA resource initialization Pull from 4.19 rc tree. Fixing a crash due to a call back from L2 driver during resource initialization. Signed-off-by: Selvin Xavier --- diff --git a/linux-next-cherry-picks/0051-RDMA-bnxt_re-Fix-system-crash-during-RDMA-resource-i.patch b/linux-next-cherry-picks/0051-RDMA-bnxt_re-Fix-system-crash-during-RDMA-resource-i.patch new file mode 100644 index 0000000..b700a4f --- /dev/null +++ b/linux-next-cherry-picks/0051-RDMA-bnxt_re-Fix-system-crash-during-RDMA-resource-i.patch @@ -0,0 +1,412 @@ +From de5c95d0f518537f59ee5aef762abc46f868c377 Mon Sep 17 00:00:00 2001 +From: Selvin Xavier +Date: Thu, 20 Sep 2018 22:33:00 -0700 +Subject: [PATCH] RDMA/bnxt_re: Fix system crash during RDMA resource + initialization + +bnxt_re_ib_reg acquires and releases the rtnl lock whenever it accesses +the L2 driver. + +The following sequence can trigger a crash + +Acquires the rtnl_lock -> + Registers roce driver callback with L2 driver -> + release the rtnl lock +bnxt_re acquires the rtnl_lock -> + Request for MSIx vectors -> + release the rtnl_lock + +Issue happens when bnxt_re proceeds with remaining part of initialization +and L2 driver invokes bnxt_ulp_irq_stop as a part of bnxt_open_nic. + +The crash is in bnxt_qplib_nq_stop_irq as the NQ structures are +not initialized yet, + + +[ 3551.726647] BUG: unable to handle kernel NULL pointer dereference at (null) +[ 3551.726656] IP: [] bnxt_qplib_nq_stop_irq+0x59/0xb0 [bnxt_re] +[ 3551.726674] PGD 0 +[ 3551.726679] Oops: 0002 1 SMP +... +[ 3551.726822] Hardware name: Dell Inc. PowerEdge R720/08RW36, BIOS 2.4.3 07/09/2014 +[ 3551.726826] task: ffff97e30eec5ee0 ti: ffff97e3173bc000 task.ti: ffff97e3173bc000 +[ 3551.726829] RIP: 0010:[] [] +bnxt_qplib_nq_stop_irq+0x59/0xb0 [bnxt_re] +... +[ 3551.726872] Call Trace: +[ 3551.726886] [] bnxt_re_stop_irq+0x4e/0x70 [bnxt_re] +[ 3551.726899] [] bnxt_ulp_irq_stop+0x43/0x70 [bnxt_en] +[ 3551.726908] [] bnxt_reserve_rings+0x174/0x1e0 [bnxt_en] +[ 3551.726917] [] __bnxt_open_nic+0x368/0x9a0 [bnxt_en] +[ 3551.726925] [] bnxt_open_nic+0x1b/0x50 [bnxt_en] +[ 3551.726934] [] bnxt_setup_mq_tc+0x11f/0x260 [bnxt_en] +[ 3551.726943] [] bnxt_dcbnl_ieee_setets+0xb8/0x1f0 [bnxt_en] +[ 3551.726954] [] dcbnl_ieee_set+0x9a/0x250 +[ 3551.726966] [] ? __alloc_skb+0xa1/0x2d0 +[ 3551.726972] [] dcb_doit+0x13a/0x210 +[ 3551.726981] [] rtnetlink_rcv_msg+0xa7/0x260 +[ 3551.726989] [] ? rtnl_unicast+0x20/0x30 +[ 3551.726996] [] ? __kmalloc_node_track_caller+0x58/0x290 +[ 3551.727002] [] ? dcb_doit+0x166/0x210 +[ 3551.727007] [] ? __alloc_skb+0x8d/0x2d0 +[ 3551.727012] [] ? rtnl_newlink+0x880/0x880 +... +[ 3551.727104] [] system_call_fastpath+0x1c/0x21 +... +[ 3551.727164] RIP [] bnxt_qplib_nq_stop_irq+0x59/0xb0 [bnxt_re] +[ 3551.727175] RSP +[ 3551.727177] CR2: 0000000000000000 + +Avoid this inconsistent state and system crash by acquiring +the rtnl lock for the entire duration of device initialization. +Re-factor the code to remove the rtnl lock from the individual function +and acquire and release it from the caller. + +Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") +Fixes: 6e04b1035689 ("RDMA/bnxt_re: Fix broken RoCE driver due to recent L2 driver changes") +Signed-off-by: Selvin Xavier +Signed-off-by: Jason Gunthorpe + +diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c +index 20b9f31..85cd1a3 100644 +--- a/drivers/infiniband/hw/bnxt_re/main.c ++++ b/drivers/infiniband/hw/bnxt_re/main.c +@@ -78,7 +78,7 @@ static struct list_head bnxt_re_dev_list = LIST_HEAD_INIT(bnxt_re_dev_list); + /* Mutex to protect the list of bnxt_re devices added */ + static DEFINE_MUTEX(bnxt_re_dev_lock); + static struct workqueue_struct *bnxt_re_wq; +-static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait); ++static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev); + + /* SR-IOV helper functions */ + +@@ -182,7 +182,7 @@ static void bnxt_re_shutdown(void *p) + if (!rdev) + return; + +- bnxt_re_ib_unreg(rdev, false); ++ bnxt_re_ib_unreg(rdev); + } + + static void bnxt_re_stop_irq(void *handle) +@@ -251,7 +251,7 @@ static struct bnxt_ulp_ops bnxt_re_ulp_ops = { + /* Driver registration routines used to let the networking driver (bnxt_en) + * to know that the RoCE driver is now installed + */ +-static int bnxt_re_unregister_netdev(struct bnxt_re_dev *rdev, bool lock_wait) ++static int bnxt_re_unregister_netdev(struct bnxt_re_dev *rdev) + { + struct bnxt_en_dev *en_dev; + int rc; +@@ -260,14 +260,9 @@ static int bnxt_re_unregister_netdev(struct bnxt_re_dev *rdev, bool lock_wait) + return -EINVAL; + + en_dev = rdev->en_dev; +- /* Acquire rtnl lock if it is not invokded from netdev event */ +- if (lock_wait) +- rtnl_lock(); + + rc = en_dev->en_ops->bnxt_unregister_device(rdev->en_dev, + BNXT_ROCE_ULP); +- if (lock_wait) +- rtnl_unlock(); + return rc; + } + +@@ -281,14 +276,12 @@ static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev) + + en_dev = rdev->en_dev; + +- rtnl_lock(); + rc = en_dev->en_ops->bnxt_register_device(en_dev, BNXT_ROCE_ULP, + &bnxt_re_ulp_ops, rdev); +- rtnl_unlock(); + return rc; + } + +-static int bnxt_re_free_msix(struct bnxt_re_dev *rdev, bool lock_wait) ++static int bnxt_re_free_msix(struct bnxt_re_dev *rdev) + { + struct bnxt_en_dev *en_dev; + int rc; +@@ -298,13 +291,9 @@ static int bnxt_re_free_msix(struct bnxt_re_dev *rdev, bool lock_wait) + + en_dev = rdev->en_dev; + +- if (lock_wait) +- rtnl_lock(); + + rc = en_dev->en_ops->bnxt_free_msix(rdev->en_dev, BNXT_ROCE_ULP); + +- if (lock_wait) +- rtnl_unlock(); + return rc; + } + +@@ -320,7 +309,6 @@ static int bnxt_re_request_msix(struct bnxt_re_dev *rdev) + + num_msix_want = min_t(u32, BNXT_RE_MAX_MSIX, num_online_cpus()); + +- rtnl_lock(); + num_msix_got = en_dev->en_ops->bnxt_request_msix(en_dev, BNXT_ROCE_ULP, + rdev->msix_entries, + num_msix_want); +@@ -335,7 +323,6 @@ static int bnxt_re_request_msix(struct bnxt_re_dev *rdev) + } + rdev->num_msix = num_msix_got; + done: +- rtnl_unlock(); + return rc; + } + +@@ -358,24 +345,18 @@ static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg, + fw_msg->timeout = timeout; + } + +-static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id, +- bool lock_wait) ++static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id) + { + struct bnxt_en_dev *en_dev = rdev->en_dev; + struct hwrm_ring_free_input req = {0}; + struct hwrm_ring_free_output resp; + struct bnxt_fw_msg fw_msg; +- bool do_unlock = false; + int rc = -EINVAL; + + if (!en_dev) + return rc; + + memset(&fw_msg, 0, sizeof(fw_msg)); +- if (lock_wait) { +- rtnl_lock(); +- do_unlock = true; +- } + + bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_FREE, -1, -1); + req.ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL; +@@ -386,8 +367,6 @@ static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id, + if (rc) + dev_err(rdev_to_dev(rdev), + "Failed to free HW ring:%d :%#x", req.ring_id, rc); +- if (do_unlock) +- rtnl_unlock(); + return rc; + } + +@@ -405,7 +384,6 @@ static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, dma_addr_t *dma_arr, + return rc; + + memset(&fw_msg, 0, sizeof(fw_msg)); +- rtnl_lock(); + bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_ALLOC, -1, -1); + req.enables = 0; + req.page_tbl_addr = cpu_to_le64(dma_arr[0]); +@@ -426,27 +404,21 @@ static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, dma_addr_t *dma_arr, + if (!rc) + *fw_ring_id = le16_to_cpu(resp.ring_id); + +- rtnl_unlock(); + return rc; + } + + static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev, +- u32 fw_stats_ctx_id, bool lock_wait) ++ u32 fw_stats_ctx_id) + { + struct bnxt_en_dev *en_dev = rdev->en_dev; + struct hwrm_stat_ctx_free_input req = {0}; + struct bnxt_fw_msg fw_msg; +- bool do_unlock = false; + int rc = -EINVAL; + + if (!en_dev) + return rc; + + memset(&fw_msg, 0, sizeof(fw_msg)); +- if (lock_wait) { +- rtnl_lock(); +- do_unlock = true; +- } + + bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_FREE, -1, -1); + req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id); +@@ -457,8 +429,6 @@ static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev, + dev_err(rdev_to_dev(rdev), + "Failed to free HW stats context %#x", rc); + +- if (do_unlock) +- rtnl_unlock(); + return rc; + } + +@@ -478,7 +448,6 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, + return rc; + + memset(&fw_msg, 0, sizeof(fw_msg)); +- rtnl_lock(); + + bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1); + req.update_period_ms = cpu_to_le32(1000); +@@ -490,7 +459,6 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, + if (!rc) + *fw_stats_ctx_id = le32_to_cpu(resp.stat_ctx_id); + +- rtnl_unlock(); + return rc; + } + +@@ -929,19 +897,19 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev) + return rc; + } + +-static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev, bool lock_wait) ++static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev) + { + int i; + + for (i = 0; i < rdev->num_msix - 1; i++) { +- bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, lock_wait); ++ bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id); + bnxt_qplib_free_nq(&rdev->nq[i]); + } + } + +-static void bnxt_re_free_res(struct bnxt_re_dev *rdev, bool lock_wait) ++static void bnxt_re_free_res(struct bnxt_re_dev *rdev) + { +- bnxt_re_free_nq_res(rdev, lock_wait); ++ bnxt_re_free_nq_res(rdev); + + if (rdev->qplib_res.dpi_tbl.max) { + bnxt_qplib_dealloc_dpi(&rdev->qplib_res, +@@ -1219,7 +1187,7 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev) + return 0; + } + +-static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait) ++static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev) + { + int i, rc; + +@@ -1234,28 +1202,27 @@ static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait) + cancel_delayed_work(&rdev->worker); + + bnxt_re_cleanup_res(rdev); +- bnxt_re_free_res(rdev, lock_wait); ++ bnxt_re_free_res(rdev); + + if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) { + rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw); + if (rc) + dev_warn(rdev_to_dev(rdev), + "Failed to deinitialize RCFW: %#x", rc); +- bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id, +- lock_wait); ++ bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id); + bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx); + bnxt_qplib_disable_rcfw_channel(&rdev->rcfw); +- bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, lock_wait); ++ bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id); + bnxt_qplib_free_rcfw_channel(&rdev->rcfw); + } + if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags)) { +- rc = bnxt_re_free_msix(rdev, lock_wait); ++ rc = bnxt_re_free_msix(rdev); + if (rc) + dev_warn(rdev_to_dev(rdev), + "Failed to free MSI-X vectors: %#x", rc); + } + if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) { +- rc = bnxt_re_unregister_netdev(rdev, lock_wait); ++ rc = bnxt_re_unregister_netdev(rdev); + if (rc) + dev_warn(rdev_to_dev(rdev), + "Failed to unregister with netdev: %#x", rc); +@@ -1276,6 +1243,12 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) + { + int i, j, rc; + ++ bool locked; ++ ++ /* Acquire rtnl lock through out this function */ ++ rtnl_lock(); ++ locked = true; ++ + /* Registered a new RoCE device instance to netdev */ + rc = bnxt_re_register_netdev(rdev); + if (rc) { +@@ -1374,12 +1347,16 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) + schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000)); + } + ++ rtnl_unlock(); ++ locked = false; ++ + /* Register ib dev */ + rc = bnxt_re_register_ib(rdev); + if (rc) { + pr_err("Failed to register with IB: %#x\n", rc); + goto fail; + } ++ set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags); + dev_info(rdev_to_dev(rdev), "Device registered successfully"); + for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++) { + rc = device_create_file(&rdev->ibdev.dev, +@@ -1395,7 +1372,6 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) + goto fail; + } + } +- set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags); + ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed, + &rdev->active_width); + set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags); +@@ -1404,17 +1380,21 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) + + return 0; + free_sctx: +- bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id, true); ++ bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id); + free_ctx: + bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx); + disable_rcfw: + bnxt_qplib_disable_rcfw_channel(&rdev->rcfw); + free_ring: +- bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, true); ++ bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id); + free_rcfw: + bnxt_qplib_free_rcfw_channel(&rdev->rcfw); + fail: +- bnxt_re_ib_unreg(rdev, true); ++ if (!locked) ++ rtnl_lock(); ++ bnxt_re_ib_unreg(rdev); ++ rtnl_unlock(); ++ + return rc; + } + +@@ -1567,7 +1547,7 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier, + */ + if (atomic_read(&rdev->sched_count) > 0) + goto exit; +- bnxt_re_ib_unreg(rdev, false); ++ bnxt_re_ib_unreg(rdev); + bnxt_re_remove_one(rdev); + bnxt_re_dev_unreg(rdev); + break; +@@ -1646,7 +1626,10 @@ static void __exit bnxt_re_mod_exit(void) + */ + flush_workqueue(bnxt_re_wq); + bnxt_re_dev_stop(rdev); +- bnxt_re_ib_unreg(rdev, true); ++ /* Acquire the rtnl_lock as the L2 resources are freed here */ ++ rtnl_lock(); ++ bnxt_re_ib_unreg(rdev); ++ rtnl_unlock(); + bnxt_re_remove_one(rdev); + bnxt_re_dev_unreg(rdev); + } +-- +2.5.5 +