From f61268ba02ea03bff48d6fb54c28d29b04a6c0e5 Mon Sep 17 00:00:00 2001 From: Adit Ranadive Date: Sun, 5 Aug 2018 22:37:48 -0700 Subject: [PATCH] Add backport patches for vmw_pvrdma Signed-off-by: Adit Ranadive --- ...se-netdev-when-vmxnet3-module-is-rem.patch | 135 ++++++++ patches/0019-BACKPORT-vmw_pvrdma-irq.patch | 323 ++++++++++++++++++ 2 files changed, 458 insertions(+) create mode 100644 linux-next-cherry-picks/0001-vmw_pvrdma-Release-netdev-when-vmxnet3-module-is-rem.patch create mode 100644 patches/0019-BACKPORT-vmw_pvrdma-irq.patch diff --git a/linux-next-cherry-picks/0001-vmw_pvrdma-Release-netdev-when-vmxnet3-module-is-rem.patch b/linux-next-cherry-picks/0001-vmw_pvrdma-Release-netdev-when-vmxnet3-module-is-rem.patch new file mode 100644 index 0000000..de620ff --- /dev/null +++ b/linux-next-cherry-picks/0001-vmw_pvrdma-Release-netdev-when-vmxnet3-module-is-rem.patch @@ -0,0 +1,135 @@ +From: Neil Horman +Subject: [PATCH] vmw_pvrdma: Release netdev when vmxnet3 module is removed + +On repeated module load/unload cycles, its possible for the pvrmda driver +to encounter this crash: + +... +[ 297.032448] RIP: 0010:[] [] netdev_walk_all_upper_dev_rcu+0x50/0xb0 +[ 297.034078] RSP: 0018:ffff95087780bd08 EFLAGS: 00010286 +[ 297.034986] RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff95087a0c0000 +[ 297.036196] RDX: ffff95087a0c0000 RSI: ffffffff839e44e0 RDI: ffff950835d0c000 +[ 297.037421] RBP: ffff95087780bd40 R08: ffff95087a0e0ea0 R09: abddacd03f8e0ea0 +[ 297.038636] R10: abddacd03f8e0ea0 R11: ffffef5901e9dbc0 R12: ffff95087a0c0000 +[ 297.039854] R13: ffffffff839e44e0 R14: ffff95087a0c0000 R15: ffff950835d0c828 +[ 297.041071] FS: 0000000000000000(0000) GS:ffff95087fc00000(0000) knlGS:0000000000000000 +[ 297.042443] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 297.043429] CR2: ffffffffffffffe8 CR3: 000000007a652000 CR4: 00000000003607f0 +[ 297.044674] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 297.045893] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +[ 297.047109] Call Trace: +[ 297.047545] [] netdev_has_upper_dev_all_rcu+0x18/0x20 +[ 297.048691] [] is_eth_port_of_netdev+0x2f/0xa0 [ib_core] +[ 297.049886] [] ? is_eth_active_slave_of_bonding_rcu+0x70/0x70 [ib_core] +... + +This occurs because vmw_pvrdma on probe stores a pointer to the netdev +that exists on function 0 of the same bus/device/slot (which represents +the vmxnet3 ethernet driver). However, it never removes this pointer if +the vmxnet3 module is removed, leading to crashes resulting from use after +free dereferencing incidents like the one above. + +The fix is pretty straightforward. vmw_pvrdma should listen for +NETDEV_REGISTER and NETDEV_UNREGISTER events in its event listener code +block, and update the stored netdev pointer accordingly. This solution +has been tested by myself and the reporter with successful results. This +fix also allows the pvrdma driver to find its underlying ethernet device +in the event that vmxnet3 is loaded after pvrdma, which it was not able to +do before. + +Signed-off-by: Neil Horman +Reported-by: ruquin@redhat.com +Tested-by: Adit Ranadive +Acked-by: Adit Ranadive +Signed-off-by: Jason Gunthorpe +--- + drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 39 ++++++++++++++++++++++++-- + 1 file changed, 37 insertions(+), 2 deletions(-) + +diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +index xxxxxxx..xxxxxxx xxxxxx +--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c ++++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +@@ -853,8 +853,12 @@ static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context) + } + + static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, ++ struct net_device *ndev, + unsigned long event) + { ++ struct pci_dev *pdev_net; ++ unsigned int slot; ++ + switch (event) { + case NETDEV_REBOOT: + case NETDEV_DOWN: +@@ -872,6 +876,24 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, + else + pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE); + break; ++ case NETDEV_UNREGISTER: ++ dev_put(dev->netdev); ++ dev->netdev = NULL; ++ break; ++ case NETDEV_REGISTER: ++ /* vmxnet3 will have same bus, slot. But func will be 0 */ ++ slot = PCI_SLOT(dev->pdev->devfn); ++ pdev_net = pci_get_slot(dev->pdev->bus, ++ PCI_DEVFN(slot, 0)); ++ if ((dev->netdev == NULL) && ++ (pci_get_drvdata(pdev_net) == ndev)) { ++ /* this is our netdev */ ++ dev->netdev = ndev; ++ dev_hold(ndev); ++ } ++ pci_dev_put(pdev_net); ++ break; ++ + default: + dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n", + event, dev->ib_dev.name); +@@ -888,8 +910,11 @@ static void pvrdma_netdevice_event_work(struct work_struct *work) + + mutex_lock(&pvrdma_device_list_lock); + list_for_each_entry(dev, &pvrdma_device_list, device_link) { +- if (dev->netdev == netdev_work->event_netdev) { +- pvrdma_netdevice_event_handle(dev, netdev_work->event); ++ if ((netdev_work->event == NETDEV_REGISTER) || ++ (dev->netdev == netdev_work->event_netdev)) { ++ pvrdma_netdevice_event_handle(dev, ++ netdev_work->event_netdev, ++ netdev_work->event); + break; + } + } +@@ -1122,6 +1147,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, + ret = -ENODEV; + goto err_free_cq_ring; + } ++ dev_hold(dev->netdev); + + dev_info(&pdev->dev, "paired device to %s\n", dev->netdev->name); + +@@ -1198,6 +1224,10 @@ err_free_intrs: + pvrdma_disable_msi_all(dev); + #endif + err_free_cq_ring: ++ if (dev->netdev) { ++ dev_put(dev->netdev); ++ dev->netdev = NULL; ++ } + pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); + err_free_async_ring: + pvrdma_page_dir_cleanup(dev, &dev->async_pdir); +@@ -1237,6 +1267,11 @@ static void pvrdma_pci_remove(struct pci_dev *pdev) + + flush_workqueue(event_wq); + ++ if (dev->netdev) { ++ dev_put(dev->netdev); ++ dev->netdev = NULL; ++ } ++ + /* Unregister ib device */ + ib_unregister_device(&dev->ib_dev); + diff --git a/patches/0019-BACKPORT-vmw_pvrdma-irq.patch b/patches/0019-BACKPORT-vmw_pvrdma-irq.patch new file mode 100644 index 0000000..8a1888f --- /dev/null +++ b/patches/0019-BACKPORT-vmw_pvrdma-irq.patch @@ -0,0 +1,323 @@ +From: Adit Ranadive +Subject: [PATCH] RDMA/vmw_pvrdma: Add backport patch for PCI IRQ functions + +Signed-off-by: Adit Ranadive +--- + drivers/infiniband/hw/vmw_pvrdma/pvrdma.h | 16 ++- + drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h | 8 ++ + drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 162 ++++++++++++++++++++++ + 3 files changed, 185 insertions(+), 1 deletion(-) + +diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +index xxxxxxx..xxxxxxx xxxxxx +--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h ++++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +@@ -53,6 +53,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -65,8 +66,11 @@ + #define PVRDMA_MASK(n) ((n << 1) - 1) + + /* +- * VMware PVRDMA PCI device id. ++ * VMware VMXNET3 + PVRDMA PCI device ids. + */ ++#ifndef PCI_DEVICE_ID_VMWARE_VMXNET3 ++#define PCI_DEVICE_ID_VMWARE_VMXNET3 0x07b0 ++#endif + #define PCI_DEVICE_ID_VMWARE_PVRDMA 0x0820 + + #define PVRDMA_NUM_RING_PAGES 4 +@@ -217,7 +221,17 @@ struct pvrdma_dev { + spinlock_t cmd_lock; /* Command lock. */ + struct semaphore cmd_sema; + struct completion cmd_done; ++#ifdef HAVE_PCI_IRQ_API + unsigned int nr_vectors; ++#else ++ struct { ++ enum pvrdma_intr_type type; /* Intr type */ ++ struct msix_entry msix_entry[PVRDMA_MAX_INTERRUPTS]; ++ irq_handler_t handler[PVRDMA_MAX_INTERRUPTS]; ++ u8 enabled[PVRDMA_MAX_INTERRUPTS]; ++ u8 size; ++ } intr; ++#endif /* HAVE_PCI_IRQ_API */ + + /* RDMA-related device information. */ + union ib_gid *sgid_tbl; +diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h +index xxxxxxx..xxxxxxx xxxxxx +--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h ++++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h +@@ -182,6 +182,14 @@ enum pvrdma_intr_cause { + PVRDMA_INTR_CAUSE_CQ = (1 << PVRDMA_INTR_VECTOR_CQ), + }; + ++#ifndef HAVE_PCI_IRQ_API ++enum pvrdma_intr_type { ++ PVRDMA_INTR_TYPE_INTX, /* Legacy. */ ++ PVRDMA_INTR_TYPE_MSI, /* MSI. */ ++ PVRDMA_INTR_TYPE_MSIX, /* MSI-X. */ ++}; ++#endif ++ + enum pvrdma_gos_bits { + PVRDMA_GOS_BITS_UNK, /* Unknown. */ + PVRDMA_GOS_BITS_32, /* 32-bit. */ +diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +index xxxxxxx..xxxxxxx xxxxxx +--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c ++++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +@@ -306,7 +306,11 @@ static irqreturn_t pvrdma_intr0_handler(int irq, void *dev_id) + + dev_dbg(&dev->pdev->dev, "interrupt 0 (response) handler\n"); + ++#ifdef HAVE_PCI_IRQ_API + if (!dev->pdev->msix_enabled) { ++#else ++ if (dev->intr.type != PVRDMA_INTR_TYPE_MSIX) { ++#endif /* HAVE_PCI_IRQ_API */ + /* Legacy intr */ + icr = pvrdma_read_reg(dev, PVRDMA_REG_ICR); + if (icr == 0) +@@ -540,13 +544,39 @@ static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id) + return IRQ_HANDLED; + } + ++#ifndef HAVE_PCI_IRQ_API ++static void pvrdma_disable_msi_all(struct pvrdma_dev *dev) ++{ ++ if (dev->intr.type == PVRDMA_INTR_TYPE_MSIX) ++ pci_disable_msix(dev->pdev); ++ else if (dev->intr.type == PVRDMA_INTR_TYPE_MSI) ++ pci_disable_msi(dev->pdev); ++} ++#endif ++ ++ + static void pvrdma_free_irq(struct pvrdma_dev *dev) + { + int i; + + dev_dbg(&dev->pdev->dev, "freeing interrupts\n"); ++ ++#ifdef HAVE_PCI_IRQ_API + for (i = 0; i < dev->nr_vectors; i++) + free_irq(pci_irq_vector(dev->pdev, i), dev); ++#else ++ if (dev->intr.type == PVRDMA_INTR_TYPE_MSIX) { ++ for (i = 0; i < dev->intr.size; i++) { ++ if (dev->intr.enabled[i]) { ++ free_irq(dev->intr.msix_entry[i].vector, dev); ++ dev->intr.enabled[i] = 0; ++ } ++ } ++ } else if (dev->intr.type == PVRDMA_INTR_TYPE_INTX || ++ dev->intr.type == PVRDMA_INTR_TYPE_MSI) { ++ free_irq(dev->pdev->irq, dev); ++ } ++#endif /* HAVE_PCI_IRQ_API */ + } + + static void pvrdma_enable_intrs(struct pvrdma_dev *dev) +@@ -561,11 +591,62 @@ static void pvrdma_disable_intrs(struct pvrdma_dev *dev) + pvrdma_write_reg(dev, PVRDMA_REG_IMR, ~0); + } + ++#ifndef HAVE_PCI_IRQ_API ++static int pvrdma_enable_msix(struct pci_dev *pdev, struct pvrdma_dev *dev) ++{ ++ int i; ++ int ret; ++ ++ for (i = 0; i < PVRDMA_MAX_INTERRUPTS; i++) { ++ dev->intr.msix_entry[i].entry = i; ++ dev->intr.msix_entry[i].vector = i; ++ ++ switch (i) { ++ case 0: ++ /* CMD ring handler */ ++ dev->intr.handler[i] = pvrdma_intr0_handler; ++ break; ++ case 1: ++ /* Async event ring handler */ ++ dev->intr.handler[i] = pvrdma_intr1_handler; ++ break; ++ default: ++ /* Completion queue handler */ ++ dev->intr.handler[i] = pvrdma_intrx_handler; ++ break; ++ } ++ } ++ ++ ret = pci_enable_msix(pdev, dev->intr.msix_entry, ++ PVRDMA_MAX_INTERRUPTS); ++ if (!ret) { ++ dev->intr.type = PVRDMA_INTR_TYPE_MSIX; ++ dev->intr.size = PVRDMA_MAX_INTERRUPTS; ++ } else if (ret > 0) { ++ ret = pci_enable_msix(pdev, dev->intr.msix_entry, ret); ++ if (!ret) { ++ dev->intr.type = PVRDMA_INTR_TYPE_MSIX; ++ dev->intr.size = ret; ++ } else { ++ dev->intr.size = 0; ++ } ++ } ++ ++ dev_dbg(&pdev->dev, "using interrupt type %d, size %d\n", ++ dev->intr.type, dev->intr.size); ++ ++ return ret; ++} ++#endif /* HAVE_PCI_IRQ_API */ ++ + static int pvrdma_alloc_intrs(struct pvrdma_dev *dev) + { ++#ifdef HAVE_PCI_IRQ_API + struct pci_dev *pdev = dev->pdev; ++#endif + int ret = 0, i; + ++#ifdef HAVE_PCI_IRQ_API + ret = pci_alloc_irq_vectors(pdev, 1, PVRDMA_MAX_INTERRUPTS, + PCI_IRQ_MSIX); + if (ret < 0) { +@@ -575,34 +656,107 @@ static int pvrdma_alloc_intrs(struct pvrdma_dev *dev) + return ret; + } + dev->nr_vectors = ret; ++#else ++ if (pci_find_capability(dev->pdev, PCI_CAP_ID_MSIX) && ++ pvrdma_enable_msix(dev->pdev, dev)) { ++ /* Try MSI */ ++ ret = pci_enable_msi(dev->pdev); ++ if (!ret) { ++ dev->intr.type = PVRDMA_INTR_TYPE_MSI; ++ } else { ++ /* Legacy INTR */ ++ dev->intr.type = PVRDMA_INTR_TYPE_INTX; ++ } ++ } ++#endif /* HAVE_PCI_IRQ_API */ + ++#ifdef HAVE_PCI_IRQ_API + ret = request_irq(pci_irq_vector(dev->pdev, 0), pvrdma_intr0_handler, + pdev->msix_enabled ? 0 : IRQF_SHARED, DRV_NAME, dev); + if (ret) { + dev_err(&dev->pdev->dev, + "failed to request interrupt 0\n"); + goto out_free_vectors; ++#else ++ /* Request First IRQ */ ++ switch (dev->intr.type) { ++ case PVRDMA_INTR_TYPE_INTX: ++ case PVRDMA_INTR_TYPE_MSI: ++ ret = request_irq(dev->pdev->irq, pvrdma_intr0_handler, ++ IRQF_SHARED, DRV_NAME, dev); ++ if (ret) { ++ dev_err(&dev->pdev->dev, ++ "failed to request interrupt\n"); ++ goto disable_msi; ++ } ++ break; ++ case PVRDMA_INTR_TYPE_MSIX: ++ ret = request_irq(dev->intr.msix_entry[0].vector, ++ pvrdma_intr0_handler, 0, DRV_NAME, dev); ++ if (ret) { ++ dev_err(&dev->pdev->dev, ++ "failed to request interrupt 0\n"); ++ goto disable_msi; ++ } ++ dev->intr.enabled[0] = 1; ++ break; ++ default: ++ /* Not reached */ ++ break; ++#endif /* HAVE_PCI_IRQ_API */ + } + ++#ifdef HAVE_PCI_IRQ_API + for (i = 1; i < dev->nr_vectors; i++) { + ret = request_irq(pci_irq_vector(dev->pdev, i), + i == 1 ? pvrdma_intr1_handler : + pvrdma_intrx_handler, + 0, DRV_NAME, dev); ++#else ++ /* For MSIX: request intr for each vector */ ++ if (dev->intr.size > 1) { ++ ret = request_irq(dev->intr.msix_entry[1].vector, ++ pvrdma_intr1_handler, 0, DRV_NAME, dev); ++#endif /* HAVE_PCI_IRQ_API */ + if (ret) { + dev_err(&dev->pdev->dev, ++#ifdef HAVE_PCI_IRQ_API + "failed to request interrupt %d\n", i); + goto free_irqs; ++#else ++ "failed to request interrupt 1\n"); ++ goto free_irq; ++ } ++ dev->intr.enabled[1] = 1; ++ ++ for (i = 2; i < dev->intr.size; i++) { ++ ret = request_irq(dev->intr.msix_entry[i].vector, ++ pvrdma_intrx_handler, 0, ++ DRV_NAME, dev); ++ if (ret) { ++ dev_err(&dev->pdev->dev, ++ "failed to request interrupt %d\n", i); ++ goto free_irq; ++ } ++ dev->intr.enabled[i] = 1; ++#endif /* HAVE_PCI_IRQ_API */ + } + } + + return 0; + ++#ifdef HAVE_PCI_IRQ_API + free_irqs: + while (--i >= 0) + free_irq(pci_irq_vector(dev->pdev, i), dev); + out_free_vectors: + pci_free_irq_vectors(pdev); ++#else ++free_irq: ++ pvrdma_free_irq(dev); ++disable_msi: ++ pvrdma_disable_msi_all(dev); ++#endif /* HAVE_PCI_IRQ_API */ + return ret; + } + +@@ -1038,7 +1192,11 @@ err_free_uar_table: + pvrdma_uar_table_cleanup(dev); + err_free_intrs: + pvrdma_free_irq(dev); ++#ifdef HAVE_PCI_IRQ_API + pci_free_irq_vectors(pdev); ++#else ++ pvrdma_disable_msi_all(dev); ++#endif + err_free_cq_ring: + pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); + err_free_async_ring: +@@ -1088,7 +1246,11 @@ static void pvrdma_pci_remove(struct pci_dev *pdev) + + pvrdma_disable_intrs(dev); + pvrdma_free_irq(dev); ++#ifdef HAVE_PCI_IRQ_API + pci_free_irq_vectors(pdev); ++#else ++ pvrdma_disable_msi_all(dev); ++#endif /* HAVE_PCI_IRQ_API */ + + /* Deactivate pvrdma device */ + pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_RESET); -- 2.46.0