From dfd24515d50f9de0e010739916361567bb4a2fd2 Mon Sep 17 00:00:00 2001 From: Jubin John Date: Tue, 25 Nov 2014 10:42:36 -0800 Subject: [PATCH] IB/qib: streamline qib xeon-phi patches --- .../0169-IB-qib-add-missing-braces.patch | 52 + .../0170-IB-qib-change-SDMA-progression.patch | 241 + .../0171-IB-qib-fix-debugfs-ordering.patch | 74 + .../0172-IB-qib-add-missing-serdes-init.patch | 34 + .../0038-IB-qib-add-RHEL7-support.patch | 39 +- ...0010-Update-qib-for-XEON-PHI-support.patch | 1883 +++++-- .../xeon-phi/0013-Updates-to-qib-driver.patch | 4787 ----------------- 7 files changed, 1770 insertions(+), 5340 deletions(-) create mode 100644 linux-next-cherry-picks/0169-IB-qib-add-missing-braces.patch create mode 100644 linux-next-cherry-picks/0170-IB-qib-change-SDMA-progression.patch create mode 100644 linux-next-cherry-picks/0171-IB-qib-fix-debugfs-ordering.patch create mode 100644 linux-next-cherry-picks/0172-IB-qib-add-missing-serdes-init.patch rename tech-preview/xeon-phi/0014-qib-add-RHEL7-support.patch => patches/0038-IB-qib-add-RHEL7-support.patch (78%) delete mode 100644 tech-preview/xeon-phi/0013-Updates-to-qib-driver.patch diff --git a/linux-next-cherry-picks/0169-IB-qib-add-missing-braces.patch b/linux-next-cherry-picks/0169-IB-qib-add-missing-braces.patch new file mode 100644 index 0000000..87d247a --- /dev/null +++ b/linux-next-cherry-picks/0169-IB-qib-add-missing-braces.patch @@ -0,0 +1,52 @@ +IB/qib: add missing braces in do_qib_user_sdma_queue_create() + +From: Yann Droneaud + +Commit c804f07248895ff9c moved qib_assign_ctxt() to +do_qib_user_sdma_queue_create() but dropped the braces +around the statements. + +This was spotted by coccicheck (coccinelle/spatch): + +$ make C=2 CHECK=scripts/coccicheck drivers/infiniband/hw/qib/ + + CHECK drivers/infiniband/hw/qib/qib_file_ops.c +drivers/infiniband/hw/qib/qib_file_ops.c:1583:2-23: code aligned with following code on line 1587 + +This patch adds braces back. + +Link: http://marc.info/?i=cover.1394485254.git.ydroneaud@opteya.com +Cc: Mike Marciniszyn +Cc: infinipath@intel.com +Cc: Julia Lawall +Cc: cocci@systeme.lip6.fr +Cc: stable@vger.kernel.org +Signed-off-by: Yann Droneaud +Tested-by: Mike Marciniszyn +Acked-by: Mike Marciniszyn +Signed-off-by: Roland Dreier +--- + drivers/infiniband/hw/qib/qib_file_ops.c | 3 ++- + 1 files changed, 2 insertions(+), 1 deletions(-) + +diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c +index 2037630..c062c60 100644 +--- a/drivers/infiniband/hw/qib/qib_file_ops.c ++++ b/drivers/infiniband/hw/qib/qib_file_ops.c +@@ -1587,7 +1587,7 @@ static int do_qib_user_sdma_queue_create(struct file *fp) + struct qib_ctxtdata *rcd = fd->rcd; + struct qib_devdata *dd = rcd->dd; + +- if (dd->flags & QIB_HAS_SEND_DMA) ++ if (dd->flags & QIB_HAS_SEND_DMA) { + + fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev, + dd->unit, +@@ -1595,6 +1595,7 @@ static int do_qib_user_sdma_queue_create(struct file *fp) + fd->subctxt); + if (!fd->pq) + return -ENOMEM; ++ } + + return 0; + } diff --git a/linux-next-cherry-picks/0170-IB-qib-change-SDMA-progression.patch b/linux-next-cherry-picks/0170-IB-qib-change-SDMA-progression.patch new file mode 100644 index 0000000..a11dd66 --- /dev/null +++ b/linux-next-cherry-picks/0170-IB-qib-change-SDMA-progression.patch @@ -0,0 +1,241 @@ +IB/qib: Change SDMA progression mode depending on single- or multi-rail + +From: CQ Tang + +Improve performance by changing the behavour of the driver when all +SDMA descriptors are in use, and the processes adding new descriptors +are single- or multi-rail. + +For single-rail processes, the driver will block the call and finish +posting all SDMA descriptors onto the hardware queue before returning +back to PSM. Repeated kernel calls are slower than blocking. + +For multi-rail processes, the driver will return to PSM as quick as +possible so PSM can feed packets to other rail. If all hardware +queues are full, PSM will buffer the remaining SDMA descriptors until +notified by interrupt that space is available. + +This patch builds a red-black tree to track the number rails opened by +a particular PID. If the number is more than one, it is a multi-rail +PSM process, otherwise, it is a single-rail process. + +Reviewed-by: Dean Luick +Reviewed-by: John A Gregor +Reviewed-by: Mitko Haralanov +Signed-off-by: CQ Tang +Signed-off-by: Mike Marciniszyn +Signed-off-by: Roland Dreier +--- + drivers/infiniband/hw/qib/qib_user_sdma.c | 136 ++++++++++++++++++++++++++--- + 1 files changed, 123 insertions(+), 13 deletions(-) + +diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c +index 165aee2..d2806ca 100644 +--- a/drivers/infiniband/hw/qib/qib_user_sdma.c ++++ b/drivers/infiniband/hw/qib/qib_user_sdma.c +@@ -52,6 +52,17 @@ + /* attempt to drain the queue for 5secs */ + #define QIB_USER_SDMA_DRAIN_TIMEOUT 500 + ++/* ++ * track how many times a process open this driver. ++ */ ++static struct rb_root qib_user_sdma_rb_root = RB_ROOT; ++ ++struct qib_user_sdma_rb_node { ++ struct rb_node node; ++ int refcount; ++ pid_t pid; ++}; ++ + struct qib_user_sdma_pkt { + struct list_head list; /* list element */ + +@@ -120,15 +131,60 @@ struct qib_user_sdma_queue { + /* dma page table */ + struct rb_root dma_pages_root; + ++ struct qib_user_sdma_rb_node *sdma_rb_node; ++ + /* protect everything above... */ + struct mutex lock; + }; + ++static struct qib_user_sdma_rb_node * ++qib_user_sdma_rb_search(struct rb_root *root, pid_t pid) ++{ ++ struct qib_user_sdma_rb_node *sdma_rb_node; ++ struct rb_node *node = root->rb_node; ++ ++ while (node) { ++ sdma_rb_node = container_of(node, ++ struct qib_user_sdma_rb_node, node); ++ if (pid < sdma_rb_node->pid) ++ node = node->rb_left; ++ else if (pid > sdma_rb_node->pid) ++ node = node->rb_right; ++ else ++ return sdma_rb_node; ++ } ++ return NULL; ++} ++ ++static int ++qib_user_sdma_rb_insert(struct rb_root *root, struct qib_user_sdma_rb_node *new) ++{ ++ struct rb_node **node = &(root->rb_node); ++ struct rb_node *parent = NULL; ++ struct qib_user_sdma_rb_node *got; ++ ++ while (*node) { ++ got = container_of(*node, struct qib_user_sdma_rb_node, node); ++ parent = *node; ++ if (new->pid < got->pid) ++ node = &((*node)->rb_left); ++ else if (new->pid > got->pid) ++ node = &((*node)->rb_right); ++ else ++ return 0; ++ } ++ ++ rb_link_node(&new->node, parent, node); ++ rb_insert_color(&new->node, root); ++ return 1; ++} ++ + struct qib_user_sdma_queue * + qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) + { + struct qib_user_sdma_queue *pq = + kmalloc(sizeof(struct qib_user_sdma_queue), GFP_KERNEL); ++ struct qib_user_sdma_rb_node *sdma_rb_node; + + if (!pq) + goto done; +@@ -138,6 +194,7 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) + pq->num_pending = 0; + pq->num_sending = 0; + pq->added = 0; ++ pq->sdma_rb_node = NULL; + + INIT_LIST_HEAD(&pq->sent); + spin_lock_init(&pq->sent_lock); +@@ -163,8 +220,30 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) + + pq->dma_pages_root = RB_ROOT; + ++ sdma_rb_node = qib_user_sdma_rb_search(&qib_user_sdma_rb_root, ++ current->pid); ++ if (sdma_rb_node) { ++ sdma_rb_node->refcount++; ++ } else { ++ int ret; ++ sdma_rb_node = kmalloc(sizeof( ++ struct qib_user_sdma_rb_node), GFP_KERNEL); ++ if (!sdma_rb_node) ++ goto err_rb; ++ ++ sdma_rb_node->refcount = 1; ++ sdma_rb_node->pid = current->pid; ++ ++ ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root, ++ sdma_rb_node); ++ BUG_ON(ret == 0); ++ } ++ pq->sdma_rb_node = sdma_rb_node; ++ + goto done; + ++err_rb: ++ dma_pool_destroy(pq->header_cache); + err_slab: + kmem_cache_destroy(pq->pkt_slab); + err_kfree: +@@ -1020,8 +1099,13 @@ void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq) + if (!pq) + return; + +- kmem_cache_destroy(pq->pkt_slab); ++ pq->sdma_rb_node->refcount--; ++ if (pq->sdma_rb_node->refcount == 0) { ++ rb_erase(&pq->sdma_rb_node->node, &qib_user_sdma_rb_root); ++ kfree(pq->sdma_rb_node); ++ } + dma_pool_destroy(pq->header_cache); ++ kmem_cache_destroy(pq->pkt_slab); + kfree(pq); + } + +@@ -1241,26 +1325,52 @@ static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd, + struct qib_user_sdma_queue *pq, + struct list_head *pktlist, int count) + { +- int ret = 0; + unsigned long flags; + + if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE))) + return -ECOMM; + +- spin_lock_irqsave(&ppd->sdma_lock, flags); +- +- if (unlikely(!__qib_sdma_running(ppd))) { +- ret = -ECOMM; +- goto unlock; ++ /* non-blocking mode */ ++ if (pq->sdma_rb_node->refcount > 1) { ++ spin_lock_irqsave(&ppd->sdma_lock, flags); ++ if (unlikely(!__qib_sdma_running(ppd))) { ++ spin_unlock_irqrestore(&ppd->sdma_lock, flags); ++ return -ECOMM; ++ } ++ pq->num_pending += count; ++ list_splice_tail_init(pktlist, &ppd->sdma_userpending); ++ qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending); ++ spin_unlock_irqrestore(&ppd->sdma_lock, flags); ++ return 0; + } + ++ /* In this case, descriptors from this process are not ++ * linked to ppd pending queue, interrupt handler ++ * won't update this process, it is OK to directly ++ * modify without sdma lock. ++ */ ++ ++ + pq->num_pending += count; +- list_splice_tail_init(pktlist, &ppd->sdma_userpending); +- qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending); ++ /* ++ * Blocking mode for single rail process, we must ++ * release/regain sdma_lock to give other process ++ * chance to make progress. This is important for ++ * performance. ++ */ ++ do { ++ spin_lock_irqsave(&ppd->sdma_lock, flags); ++ if (unlikely(!__qib_sdma_running(ppd))) { ++ spin_unlock_irqrestore(&ppd->sdma_lock, flags); ++ return -ECOMM; ++ } ++ qib_user_sdma_send_desc(ppd, pktlist); ++ if (!list_empty(pktlist)) ++ qib_sdma_make_progress(ppd); ++ spin_unlock_irqrestore(&ppd->sdma_lock, flags); ++ } while (!list_empty(pktlist)); + +-unlock: +- spin_unlock_irqrestore(&ppd->sdma_lock, flags); +- return ret; ++ return 0; + } + + int qib_user_sdma_writev(struct qib_ctxtdata *rcd, +@@ -1290,7 +1400,7 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd, + qib_user_sdma_queue_clean(ppd, pq); + + while (dim) { +- int mxp = 8; ++ int mxp = 1; + int ndesc = 0; + + ret = qib_user_sdma_queue_pkts(dd, ppd, pq, diff --git a/linux-next-cherry-picks/0171-IB-qib-fix-debugfs-ordering.patch b/linux-next-cherry-picks/0171-IB-qib-fix-debugfs-ordering.patch new file mode 100644 index 0000000..6f0744c --- /dev/null +++ b/linux-next-cherry-picks/0171-IB-qib-fix-debugfs-ordering.patch @@ -0,0 +1,74 @@ +IB/qib: Fix debugfs ordering issue with multiple HCAs + +From: Mike Marciniszyn + +The debugfs init code was incorrectly called before the idr mechanism +is used to get the unit number, so the dd->unit hasn't been +initialized. This caused the unit relative directory creation to fail +after the first. + +This patch moves the init for the debugfs stuff until after all of the +failures and after the unit number has been determined. + +A bug in unwind code in qib_alloc_devdata() is also fixed. + +Cc: +Reviewed-by: Dennis Dalessandro +Signed-off-by: Mike Marciniszyn +Signed-off-by: Roland Dreier +--- + drivers/infiniband/hw/qib/qib_init.c | 25 +++++++++++-------------- + 1 files changed, 11 insertions(+), 14 deletions(-) + +diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c +index c1248a6..17e0831 100644 +--- a/drivers/infiniband/hw/qib/qib_init.c ++++ b/drivers/infiniband/hw/qib/qib_init.c +@@ -1097,14 +1097,10 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) + int ret; + + dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra); +- if (!dd) { +- dd = ERR_PTR(-ENOMEM); +- goto bail; +- } ++ if (!dd) ++ return ERR_PTR(-ENOMEM); + +-#ifdef CONFIG_DEBUG_FS +- qib_dbg_ibdev_init(&dd->verbs_dev); +-#endif ++ INIT_LIST_HEAD(&dd->list); + + idr_preload(GFP_KERNEL); + spin_lock_irqsave(&qib_devs_lock, flags); +@@ -1121,11 +1117,6 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) + if (ret < 0) { + qib_early_err(&pdev->dev, + "Could not allocate unit ID: error %d\n", -ret); +-#ifdef CONFIG_DEBUG_FS +- qib_dbg_ibdev_exit(&dd->verbs_dev); +-#endif +- ib_dealloc_device(&dd->verbs_dev.ibdev); +- dd = ERR_PTR(ret); + goto bail; + } + +@@ -1139,9 +1130,15 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) + qib_early_err(&pdev->dev, + "Could not alloc cpulist info, cpu affinity might be wrong\n"); + } +- +-bail: ++#ifdef CONFIG_DEBUG_FS ++ qib_dbg_ibdev_init(&dd->verbs_dev); ++#endif + return dd; ++bail: ++ if (!list_empty(&dd->list)) ++ list_del_init(&dd->list); ++ ib_dealloc_device(&dd->verbs_dev.ibdev); ++ return ERR_PTR(ret); + } + + /* diff --git a/linux-next-cherry-picks/0172-IB-qib-add-missing-serdes-init.patch b/linux-next-cherry-picks/0172-IB-qib-add-missing-serdes-init.patch new file mode 100644 index 0000000..1d43891 --- /dev/null +++ b/linux-next-cherry-picks/0172-IB-qib-add-missing-serdes-init.patch @@ -0,0 +1,34 @@ +IB/qib: Add missing serdes init sequence + +From: Mike Marciniszyn + +Research has shown that commit a77fcf895046 ("IB/qib: Use a single +txselect module parameter for serdes tuning") missed a key serdes init +sequence. + +This patch adds that sequence. + +Cc: +Reviewed-by: Dennis Dalessandro +Signed-off-by: Mike Marciniszyn +Signed-off-by: Roland Dreier +--- + drivers/infiniband/hw/qib/qib_iba7322.c | 5 +++++ + 1 files changed, 5 insertions(+), 0 deletions(-) + +diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c +index 016e742..9b642d4 100644 +--- a/drivers/infiniband/hw/qib/qib_iba7322.c ++++ b/drivers/infiniband/hw/qib/qib_iba7322.c +@@ -2395,6 +2395,11 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd) + qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a); + qib_write_kreg(dd, kr_scratch, 0ULL); + ++ /* ensure previous Tx parameters are not still forced */ ++ qib_write_kreg_port(ppd, krp_tx_deemph_override, ++ SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, ++ reset_tx_deemphasis_override)); ++ + if (qib_compat_ddr_negotiate) { + ppd->cpspec->ibdeltainprog = 1; + ppd->cpspec->ibsymsnap = read_7322_creg32_port(ppd, diff --git a/tech-preview/xeon-phi/0014-qib-add-RHEL7-support.patch b/patches/0038-IB-qib-add-RHEL7-support.patch similarity index 78% rename from tech-preview/xeon-phi/0014-qib-add-RHEL7-support.patch rename to patches/0038-IB-qib-add-RHEL7-support.patch index 42f5399..9470c66 100644 --- a/tech-preview/xeon-phi/0014-qib-add-RHEL7-support.patch +++ b/patches/0038-IB-qib-add-RHEL7-support.patch @@ -1,17 +1,17 @@ -From e90045185670bbdb315d50e5f89bf3f16249ee42 Mon Sep 17 00:00:00 2001 +IB/qib: add RHEL7 support + From: Jubin John -Date: Mon, 20 Oct 2014 23:53:59 -0700 -Subject: [PATCH] qib add RHEL7 support +Reviewed-by: Mike Marciniszyn +Signed-off-by: Jubin John --- drivers/infiniband/hw/qib/qib_file_ops.c | 9 +++++++++ drivers/infiniband/hw/qib/qib_fs.c | 5 +++++ drivers/infiniband/hw/qib/qib_init.c | 23 +++++++++++++++++++++-- - drivers/infiniband/hw/qib/qib_knx.c | 1 + - 4 files changed, 36 insertions(+), 2 deletions(-) + 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c -index 376961d..ea5bdd5 100644 +index 275f247..2037630 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -39,11 +39,16 @@ @@ -31,7 +31,7 @@ index 376961d..ea5bdd5 100644 #include "qib.h" #include "qib_common.h" -@@ -1163,7 +1168,11 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, +@@ -971,7 +976,11 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; vma->vm_ops = &qib_file_vm_ops; @@ -62,10 +62,10 @@ index f247fc6..cbe6e3c 100644 inode->i_atime = CURRENT_TIME; inode->i_mtime = inode->i_atime; diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c -index 0e83ed4..995d301 100644 +index 24e802f..c1248a6 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c -@@ -1226,9 +1226,15 @@ void qib_disable_after_error(struct qib_devdata *dd) +@@ -1177,9 +1177,15 @@ void qib_disable_after_error(struct qib_devdata *dd) if (dd->devstatusp) *dd->devstatusp |= QIB_STATUS_HWERROR; } @@ -83,7 +83,7 @@ index 0e83ed4..995d301 100644 #define DRIVER_LOAD_MSG "Intel " QIB_DRV_NAME " loaded: " #define PFX QIB_DRV_NAME ": " -@@ -1245,7 +1251,11 @@ MODULE_DEVICE_TABLE(pci, qib_pci_tbl); +@@ -1196,7 +1202,11 @@ MODULE_DEVICE_TABLE(pci, qib_pci_tbl); static struct pci_driver qib_driver = { .name = QIB_DRV_NAME, .probe = qib_init_one, @@ -95,7 +95,7 @@ index 0e83ed4..995d301 100644 .id_table = qib_pci_tbl, .err_handler = &qib_pci_err_handler, }; -@@ -1486,7 +1496,12 @@ static void qib_postinit_cleanup(struct qib_devdata *dd) +@@ -1417,7 +1427,12 @@ static void qib_postinit_cleanup(struct qib_devdata *dd) qib_free_devdata(dd); } @@ -108,7 +108,7 @@ index 0e83ed4..995d301 100644 { int ret, j, pidx, initfail; struct qib_devdata *dd = NULL; -@@ -1593,7 +1608,11 @@ bail: +@@ -1522,7 +1537,11 @@ bail: return ret; } @@ -120,18 +120,3 @@ index 0e83ed4..995d301 100644 { struct qib_devdata *dd = pci_get_drvdata(pdev); int ret; -diff --git a/drivers/infiniband/hw/qib/qib_knx.c b/drivers/infiniband/hw/qib/qib_knx.c -index f692913..efe79d6 100644 ---- a/drivers/infiniband/hw/qib/qib_knx.c -+++ b/drivers/infiniband/hw/qib/qib_knx.c -@@ -29,6 +29,7 @@ - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -+#include - #include - #include - #include --- -1.7.1 - diff --git a/tech-preview/xeon-phi/0010-Update-qib-for-XEON-PHI-support.patch b/tech-preview/xeon-phi/0010-Update-qib-for-XEON-PHI-support.patch index 2cc8740..ca875cb 100644 --- a/tech-preview/xeon-phi/0010-Update-qib-for-XEON-PHI-support.patch +++ b/tech-preview/xeon-phi/0010-Update-qib-for-XEON-PHI-support.patch @@ -1,23 +1,24 @@ -From 0ed3bd45f3b358e5f32ff4e6e87b94fd80da69b5 Mon Sep 17 00:00:00 2001 -From: Phil Cayton -Date: Thu, 6 Feb 2014 13:45:33 -0800 -Subject: [PATCH 10/12] Update qib for XEON PHI support +IB/qib: Update qib for XEON PHI support +From: Jubin John + +Reviewed-by: Mike Marciniszyn +Signed-off-by: Jubin John --- - drivers/infiniband/hw/qib/Makefile | 5 + - drivers/infiniband/hw/qib/qib.h | 41 +- - drivers/infiniband/hw/qib/qib_common.h | 8 +- - drivers/infiniband/hw/qib/qib_file_ops.c | 369 +++++++++++- - drivers/infiniband/hw/qib/qib_init.c | 61 +- - drivers/infiniband/hw/qib/qib_knx.c | 923 +++++++++++++++++++++++++++++ - drivers/infiniband/hw/qib/qib_knx.h | 63 ++ - drivers/infiniband/hw/qib/qib_knx_sdma.h | 105 ++++ - drivers/infiniband/hw/qib/qib_knx_tidrcv.h | 48 ++ - 9 files changed, 1596 insertions(+), 27 deletions(-) + drivers/infiniband/hw/qib/Makefile | 5 + drivers/infiniband/hw/qib/qib.h | 19 + drivers/infiniband/hw/qib/qib_common.h | 7 + drivers/infiniband/hw/qib/qib_file_ops.c | 334 +++++- + drivers/infiniband/hw/qib/qib_init.c | 16 + drivers/infiniband/hw/qib/qib_knx.c | 1532 ++++++++++++++++++++++++++++ + drivers/infiniband/hw/qib/qib_knx.h | 74 + + drivers/infiniband/hw/qib/qib_knx_common.h | 126 ++ + drivers/infiniband/hw/qib/qib_user_sdma.c | 173 +-- + drivers/infiniband/hw/qib/qib_user_sdma.h | 106 ++ + 10 files changed, 2241 insertions(+), 151 deletions(-) create mode 100644 drivers/infiniband/hw/qib/qib_knx.c create mode 100644 drivers/infiniband/hw/qib/qib_knx.h - create mode 100644 drivers/infiniband/hw/qib/qib_knx_sdma.h - create mode 100644 drivers/infiniband/hw/qib/qib_knx_tidrcv.h + create mode 100644 drivers/infiniband/hw/qib/qib_knx_common.h diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile index 57f8103..ba2a49d 100644 @@ -33,61 +34,33 @@ index 57f8103..ba2a49d 100644 +ccflags-y += -DQIB_CONFIG_KNX +endif diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h -index 1946101..ad87abd 100644 +index 1946101..85c078e 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h -@@ -112,7 +112,20 @@ struct qib_eep_log_mask { - }; - - /* -- * Below contains all data related to a single context (formerly called port). -+ * Indicates to the driver that the loadable parameter could be -+ * configured by it as it was not configured by the user. -+ */ -+#define QIB_DRIVER_AUTO_CONFIGURATION 10 -+ -+#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) -+#define qib_configure_numa(a) \ -+ (a.x86_vendor == X86_VENDOR_INTEL && a.x86 == 6 && a.x86_model == 45) -+#else -+#define qib_configure_numa(a) 0 +@@ -234,6 +234,10 @@ struct qib_ctxtdata { + u32 lookaside_qpn; + /* QPs waiting for context processing */ + struct list_head qp_wait_list; ++#ifdef QIB_CONFIG_KNX ++ /* KNX Receive Context Data */ ++ struct qib_knx_ctxt *krcd; +#endif -+ -+/* -+ * Below contains all data related to a single context (formerly called port). - */ - #ifdef CONFIG_DEBUG_FS -@@ -739,6 +752,12 @@ struct qib_devdata { - - /* mem-mapped pointer to base of chip regs */ - u64 __iomem *kregbase; -+ -+ /* mem-mapped base of chip regs plus offset of the SendBufAvail0 -+ * register -+ */ -+ u64 sendbufavail0; -+ - /* end of mem-mapped chip space excluding sendbuf and user regs */ - u64 __iomem *kregend; - /* physical address of chip for io_remap, etc. */ -@@ -1103,7 +1122,15 @@ struct qib_devdata { - /* per device cq worker */ + /* verbs stats per CTX */ + struct qib_opcode_stats_perctx *opstats; +@@ -1104,6 +1108,11 @@ struct qib_devdata { struct kthread_worker *worker; -+ int local_node_id; /* NUMA node closest to HCA */ int assigned_node_id; /* NUMA node closest to HCA */ + +#ifdef QIB_CONFIG_KNX -+ /* peer node id of connected KNX node */ -+ u16 node_id; -+ struct qib_knx *knx; ++ /* number of KNx nodes using this device */ ++ u16 num_knx; +#endif -+ }; /* hol_state values */ -@@ -1132,6 +1159,9 @@ struct qib_filedata { +@@ -1132,6 +1141,9 @@ struct qib_filedata { unsigned tidcursor; struct qib_user_sdma_queue *pq; int rec_cpu_num; /* for cpu affinity; -1 if none */ @@ -97,7 +70,7 @@ index 1946101..ad87abd 100644 }; extern struct list_head qib_dev_list; -@@ -1209,6 +1239,13 @@ int qib_set_uevent_bits(struct qib_pportdata *, const int); +@@ -1209,6 +1221,13 @@ int qib_set_uevent_bits(struct qib_pportdata *, const int); (((struct qib_filedata *)(fp)->private_data)->tidcursor) #define user_sdma_queue_fp(fp) \ (((struct qib_filedata *)(fp)->private_data)->pq) @@ -111,17 +84,8 @@ index 1946101..ad87abd 100644 static inline struct qib_devdata *dd_from_ppd(struct qib_pportdata *ppd) { -@@ -1476,6 +1513,8 @@ extern unsigned qib_n_krcv_queues; - extern unsigned qib_sdma_fetch_arb; - extern unsigned qib_compat_ddr_negotiate; - extern int qib_special_trigger; -+extern unsigned qib_pio_avail_bits; -+extern unsigned qib_rcvhdrpoll; - extern unsigned qib_numa_aware; - - extern struct mutex qib_mutex; diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h -index 5670ace..9182d02 100644 +index 5670ace..39eef25 100644 --- a/drivers/infiniband/hw/qib/qib_common.h +++ b/drivers/infiniband/hw/qib/qib_common.h @@ -1,4 +1,5 @@ @@ -144,62 +108,29 @@ index 5670ace..9182d02 100644 /* size of struct base_info to write to */ __u32 spu_base_info_size; -@@ -360,7 +365,6 @@ struct qib_user_info { - * address of struct base_info to write to - */ - __u64 spu_base_info; -- - } __attribute__ ((aligned(8))); - - /* User commands. */ diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c -index 275f247..6eebad0 100644 +index c062c60..93ca1f9 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c -@@ -48,6 +48,42 @@ +@@ -53,6 +53,7 @@ #include "qib.h" #include "qib_common.h" #include "qib_user_sdma.h" -+#ifdef QIB_CONFIG_KNX +#include "qib_knx.h" -+#endif -+ -+/* -+ * Option for a user application to read from the SendBufAvailn registers -+ * for the send buffer status as a memory IO operation or from main memory. -+ * The default mode of operation is to have the user process read this -+ * register from mapped memory when running on the local socket and have -+ * it read from the register directly (memory IO) when running on the far -+ * socket. For older applications, ie.., with QIB_USER_SWMINOR less than -+ * 12, all processes will read the register from main memory. -+ */ -+unsigned qib_pio_avail_bits = 1; -+module_param_named(pio_avail_bits, qib_pio_avail_bits, uint, S_IRUGO); -+MODULE_PARM_DESC(pio_avail_bits, "send buffer status read: " -+ "0=memory read on local NUMA node & MMIO read on far nodes, " -+ "1=memory read(default), 2=MMIO read, " -+ "10=option 1 for AMD & <= Intel Westmere cpus and option 0 for newer cpus"); -+ -+/* -+ * Option for a user application to read from the RcvHdrTailn registers -+ * for the next empty receive header queue entry as a memory IO operation -+ * or from main memory. The default mode of operation is to have the user -+ * process read this register from mapped memory when running on the local -+ * socket and have it read from the register directly (memory IO) when -+ * running on the far socket. For older applications, ie.., with -+ * QIB_USER_SWMINOR less than 12, all user processes will read the -+ * register from main memory. -+ */ -+unsigned qib_rcvhdrpoll = 1; -+module_param_named(rcvhdrpoll, qib_rcvhdrpoll, uint, S_IRUGO); -+MODULE_PARM_DESC(rcvhdrpoll, "receive buffer status read: " -+ "0=memory read on local NUMA node & MMIO read on far nodes, " -+ "1=memory read(default), 2=MMIO read, " -+ "10=option 1 for AMD & <= Intel Westmere cpus and option 0 for newer cpus"); #undef pr_fmt #define pr_fmt(fmt) QIB_DRV_NAME ": " fmt -@@ -89,6 +125,73 @@ static u64 cvt_kvaddr(void *p) +@@ -64,6 +65,9 @@ static ssize_t qib_aio_write(struct kiocb *, const struct iovec *, + unsigned long, loff_t); + static unsigned int qib_poll(struct file *, struct poll_table_struct *); + static int qib_mmapf(struct file *, struct vm_area_struct *); ++static int subctxt_search_ctxts(struct qib_devdata *, struct file *, ++ const struct qib_user_info *); ++ + + static const struct file_operations qib_file_ops = { + .owner = THIS_MODULE, +@@ -94,6 +98,64 @@ static u64 cvt_kvaddr(void *p) return paddr; } @@ -235,15 +166,6 @@ index 275f247..6eebad0 100644 + if (ret < 0) + goto bail_free; + -+ switch (qib_rcvhdrpoll) { -+ case 0: -+ if (local_node) -+ break; -+ case 2: -+ kinfo->spi_runtime_flags &= ~QIB_RUNTIME_NODMA_RTAIL; -+ break; -+ } -+ + if (rcd->subctxt_cnt && !subctxt_fp(fp)) + kinfo->spi_runtime_flags |= QIB_RUNTIME_MASTER; + @@ -273,19 +195,10 @@ index 275f247..6eebad0 100644 static int qib_get_base_info(struct file *fp, void __user *ubase, size_t ubase_size) { -@@ -100,6 +203,7 @@ static int qib_get_base_info(struct file *fp, void __user *ubase, - unsigned subctxt_cnt; - int shared, master; - size_t sz; -+ int local_node = (numa_node_id() == pcibus_to_node(dd->pcidev->bus)); - - subctxt_cnt = rcd->subctxt_cnt; - if (!subctxt_cnt) { -@@ -176,15 +280,91 @@ static int qib_get_base_info(struct file *fp, void __user *ubase, - * both can be enabled and used. +@@ -182,14 +244,43 @@ static int qib_get_base_info(struct file *fp, void __user *ubase, */ kinfo->spi_rcvhdr_base = (u64) rcd->rcvhdrq_phys; -- kinfo->spi_rcvhdr_tailaddr = (u64) rcd->rcvhdrqtailaddr_phys; + kinfo->spi_rcvhdr_tailaddr = (u64) rcd->rcvhdrqtailaddr_phys; + /* + * In the case of KNX, qib_do_user_init() would call into the + * KNX-specific memory allocation/registration functions. These @@ -300,64 +213,16 @@ index 275f247..6eebad0 100644 + if (knx_node_fp(fp)) + kinfo->spi_runtime_flags = + qib_knx_ctxt_info(rcd, QIB_KNX_CTXTINFO_FLAGS, fp); -+ else { -+ switch (qib_rcvhdrpoll) { -+ case 0: -+ if (local_node) -+ kinfo->spi_rcvhdr_tailaddr = -+ (u64) rcd->rcvhdrqtailaddr_phys; -+ else { -+ kinfo->spi_rcvhdr_tailaddr = -+ (u64) (kinfo->spi_uregbase + -+ ur_rcvhdrtail); -+ kinfo->spi_runtime_flags &= -+ ~QIB_RUNTIME_NODMA_RTAIL; -+ } -+ break; -+ case 1: -+ kinfo->spi_rcvhdr_tailaddr = -+ (u64) rcd->rcvhdrqtailaddr_phys; -+ break; -+ case 2: -+ kinfo->spi_rcvhdr_tailaddr = -+ (u64) (kinfo->spi_uregbase + ur_rcvhdrtail); -+ kinfo->spi_runtime_flags &= ~QIB_RUNTIME_NODMA_RTAIL; -+ break; -+ default: -+ ret = -EINVAL; -+ break; -+ } -+ } -+ kinfo->spi_rhf_offset = dd->rhf_offset; kinfo->spi_rcv_egrbufs = (u64) rcd->rcvegr_phys; - kinfo->spi_pioavailaddr = (u64) dd->pioavailregs_phys; + + /* see comment for spi_uregbase above */ -+ if (knx_node_fp(fp)) { ++ if (knx_node_fp(fp)) + kinfo->spi_pioavailaddr = + qib_knx_ctxt_info(rcd, QIB_KNX_CTXTINFO_PIOAVAIL, fp); -+ } else { -+ switch (qib_pio_avail_bits) { -+ case 0: -+ kinfo->spi_pioavailaddr = local_node ? -+ (u64)dd->pioavailregs_phys : -+ (u64)dd->sendbufavail0; -+ break; -+ case 1: -+ kinfo->spi_pioavailaddr = (u64)dd->pioavailregs_phys; -+ break; -+ case 2: -+ kinfo->spi_pioavailaddr = (u64)dd->sendbufavail0; -+ break; -+ default: -+ ret = -EINVAL; -+ break; -+ } -+ } -+ -+ if (ret < 0) -+ goto bail; ++ else ++ kinfo->spi_pioavailaddr = (u64) dd->pioavailregs_phys; + /* setup per-unit (not port) status area for user programs */ - kinfo->spi_status = (u64) kinfo->spi_pioavailaddr + @@ -379,7 +244,7 @@ index 275f247..6eebad0 100644 if (!shared) { kinfo->spi_piocnt = rcd->piocnt; kinfo->spi_piobufbase = (u64) rcd->piobufs; -@@ -204,7 +384,11 @@ static int qib_get_base_info(struct file *fp, void __user *ubase, +@@ -209,7 +300,11 @@ static int qib_get_base_info(struct file *fp, void __user *ubase, dd->palign * kinfo->spi_piocnt * slave; } @@ -392,7 +257,7 @@ index 275f247..6eebad0 100644 kinfo->spi_sendbuf_status = cvt_kvaddr(&rcd->user_event_mask[subctxt_fp(fp)]); /* only spi_subctxt_* fields should be set in this block! */ -@@ -225,6 +409,11 @@ static int qib_get_base_info(struct file *fp, void __user *ubase, +@@ -230,6 +325,11 @@ static int qib_get_base_info(struct file *fp, void __user *ubase, kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->pio2k_bufbase) / dd->palign; kinfo->spi_pioalign = dd->palign; @@ -404,65 +269,7 @@ index 275f247..6eebad0 100644 kinfo->spi_qpair = QIB_KD_QP; /* * user mode PIO buffers are always 2KB, even when 4KB can -@@ -978,6 +1167,35 @@ bail: - return ret; - } - -+static int mmap_sendbufavail(struct vm_area_struct *vma, struct qib_devdata *dd, -+ u64 ureg) -+{ -+ unsigned long phys; -+ unsigned long sz; -+ int ret; -+ -+ /* -+ * This is real hardware, so use io_remap. This is the mechanism -+ * for the user process to update the head registers for their ctxt -+ * in the chip. -+ */ -+ sz = PAGE_SIZE; -+ if ((vma->vm_end - vma->vm_start) > sz) -+ ret = -EFAULT; -+ else { -+ phys = dd->physaddr + ureg; -+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); -+ -+ vma->vm_flags &= ~VM_MAYWRITE; -+ vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_READ; -+ -+ ret = io_remap_pfn_range(vma, vma->vm_start, -+ phys >> PAGE_SHIFT, -+ vma->vm_end - vma->vm_start, -+ vma->vm_page_prot); -+ } -+ return ret; -+} - /** - * qib_mmapf - mmap various structures into user space - * @fp: the file pointer -@@ -1056,6 +1274,8 @@ static int qib_mmapf(struct file *fp, struct vm_area_struct *vma) - - if (pgaddr == ureg) - ret = mmap_ureg(vma, dd, ureg); -+ else if (pgaddr == dd->sendbufavail0) -+ ret = mmap_sendbufavail(vma, dd, pgaddr - (u64)dd->kregbase); - else if (pgaddr == piobufs) - ret = mmap_piobufs(vma, dd, rcd, piobufs, piocnt); - else if (pgaddr == dd->pioavailregs_phys) -@@ -1187,11 +1407,7 @@ static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd) - int cpu; - cpu = find_first_zero_bit(qib_cpulist, - qib_cpulist_count); -- if (cpu == qib_cpulist_count) -- qib_dev_err(dd, -- "no cpus avail for affinity PID %u\n", -- current->pid); -- else { -+ if (cpu != qib_cpulist_count) { - __set_bit(cpu, qib_cpulist); - fd->rec_cpu_num = cpu; - } -@@ -1261,6 +1477,17 @@ static int init_subctxts(struct qib_devdata *dd, +@@ -1270,6 +1370,17 @@ static int init_subctxts(struct qib_devdata *dd, goto bail; } @@ -480,7 +287,7 @@ index 275f247..6eebad0 100644 rcd->subctxt_uregbase = vmalloc_user(PAGE_SIZE * num_subctxts); if (!rcd->subctxt_uregbase) { ret = -ENOMEM; -@@ -1283,6 +1510,9 @@ static int init_subctxts(struct qib_devdata *dd, +@@ -1292,6 +1403,9 @@ static int init_subctxts(struct qib_devdata *dd, goto bail_rhdr; } @@ -490,36 +297,131 @@ index 275f247..6eebad0 100644 rcd->subctxt_cnt = uinfo->spu_subctxt_cnt; rcd->subctxt_id = uinfo->spu_subctxt_id; rcd->active_slaves = 1; -@@ -1333,6 +1563,7 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, +@@ -1326,6 +1440,14 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, + + rcd = qib_create_ctxtdata(ppd, ctxt, numa_id); + ++#ifdef QIB_CONFIG_KNX ++ if (uinfo->spu_knx_node_id) ++ /* ++ * Skip allocation of page pointer list for TID ++ * receives. This will be done on the KNX. ++ */ ++ goto no_page_list; ++#endif + /* + * Allocate memory for use in qib_tid_update() at open to + * reduce cost of expected send setup per message segment +@@ -1341,7 +1463,11 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, + ret = -ENOMEM; goto bailerr; } ++#ifdef QIB_CONFIG_KNX ++no_page_list: ++#endif rcd->userversion = uinfo->spu_userversion; + ret = init_subctxts(dd, rcd, uinfo); if (ret) goto bailerr; -@@ -1496,7 +1727,16 @@ static int find_shared_ctxt(struct file *fp, +@@ -1498,43 +1624,68 @@ done: + static int find_shared_ctxt(struct file *fp, + const struct qib_user_info *uinfo) + { +- int devmax, ndev, i; ++ int devmax, ndev; + int ret = 0; ++ struct qib_devdata *dd; - for (ndev = 0; ndev < devmax; ndev++) { - struct qib_devdata *dd = qib_lookup(ndev); -- +#ifdef QIB_CONFIG_KNX -+ /* -+ * In the case we are allocating a context for a KNX process, -+ * reject any device that is not associated with the -+ * requesting KNX. -+ */ -+ if ((uinfo->spu_knx_node_id && -+ dd->node_id != uinfo->spu_knx_node_id)) -+ continue; ++ /* ++ * In the case we are allocating a context for a KNX process, ++ * Don't loop over all devices but use the one assosiated with the ++ * requesting KNX. ++ */ ++ if (uinfo->spu_knx_node_id) { ++ dd = qib_knx_node_to_dd(uinfo->spu_knx_node_id); ++ if (dd && dd->num_knx) ++ ret = subctxt_search_ctxts(dd, fp, uinfo); ++ goto done; ++ } +#endif + devmax = qib_count_units(NULL, NULL); + + for (ndev = 0; ndev < devmax; ndev++) { +- struct qib_devdata *dd = qib_lookup(ndev); +- ++ dd = qib_lookup(ndev); /* device portion of usable() */ if (!(dd && (dd->flags & QIB_PRESENT) && dd->kregbase)) continue; -@@ -1617,6 +1857,14 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo) +- for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) { +- struct qib_ctxtdata *rcd = dd->rcd[i]; ++ ret = subctxt_search_ctxts(dd, fp, uinfo); ++ if (ret) ++ break; ++ } ++#ifdef QIB_CONFIG_KNX ++done: ++#endif ++ return ret; ++} + +- /* Skip ctxts which are not yet open */ +- if (!rcd || !rcd->cnt) +- continue; +- /* Skip ctxt if it doesn't match the requested one */ +- if (rcd->subctxt_id != uinfo->spu_subctxt_id) +- continue; +- /* Verify the sharing process matches the master */ +- if (rcd->subctxt_cnt != uinfo->spu_subctxt_cnt || +- rcd->userversion != uinfo->spu_userversion || +- rcd->cnt >= rcd->subctxt_cnt) { +- ret = -EINVAL; +- goto done; +- } +- ctxt_fp(fp) = rcd; +- subctxt_fp(fp) = rcd->cnt++; +- rcd->subpid[subctxt_fp(fp)] = current->pid; +- tidcursor_fp(fp) = 0; +- rcd->active_slaves |= 1 << subctxt_fp(fp); +- ret = 1; ++static int subctxt_search_ctxts(struct qib_devdata *dd, struct file *fp, ++ const struct qib_user_info *uinfo) ++{ ++ int ret = 0, i; ++ for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) { ++ struct qib_ctxtdata *rcd = dd->rcd[i]; ++ ++ /* Skip ctxts which are not yet open */ ++ if (!rcd || !rcd->cnt) ++ continue; ++ /* Skip ctxt if it doesn't match the requested one */ ++ if (rcd->subctxt_id != uinfo->spu_subctxt_id) ++ continue; ++ /* Verify the sharing process matches the master */ ++ if (rcd->subctxt_cnt != uinfo->spu_subctxt_cnt || ++ rcd->userversion != uinfo->spu_userversion || ++ rcd->cnt >= rcd->subctxt_cnt) { ++ ret = -EINVAL; + goto done; + } ++ ctxt_fp(fp) = rcd; ++ subctxt_fp(fp) = rcd->cnt++; ++ rcd->subpid[subctxt_fp(fp)] = current->pid; ++ tidcursor_fp(fp) = 0; ++ rcd->active_slaves |= 1 << subctxt_fp(fp); ++ ret = 1; ++ break; + } +- + done: + return ret; + } +@@ -1626,6 +1777,13 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo) + if (swminor >= 11 && uinfo->spu_port_alg < QIB_PORT_ALG_COUNT) alg = uinfo->spu_port_alg; - +#ifdef QIB_CONFIG_KNX + /* Make sure we have a connection to the KNX module on the right node */ + if (uinfo->spu_knx_node_id && !qib_knx_get(uinfo->spu_knx_node_id)) { @@ -527,38 +429,73 @@ index 275f247..6eebad0 100644 + goto done; + } +#endif -+ + mutex_lock(&qib_mutex); - if (qib_compatible_subctxts(swmajor, swminor) && -@@ -1638,6 +1886,24 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo) +@@ -1633,13 +1791,38 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo) + uinfo->spu_subctxt_cnt) { + ret = find_shared_ctxt(fp, uinfo); + if (ret > 0) { +- ret = do_qib_user_sdma_queue_create(fp); ++#ifdef QIB_CONFIG_KNX ++ if (uinfo->spu_knx_node_id) { ++ ret = qib_knx_sdma_queue_create(fp); ++ } else ++#endif ++ ret = do_qib_user_sdma_queue_create(fp); + if (!ret) + assign_ctxt_affinity(fp, (ctxt_fp(fp))->dd); + goto done_ok; + } + } + ++#ifdef QIB_CONFIG_KNX ++ /* ++ * If there is a KNX node set, we pick the device that is ++ * associate with that KNX node ++ */ ++ if (uinfo->spu_knx_node_id) { ++ struct qib_devdata *dd = ++ qib_knx_node_to_dd(uinfo->spu_knx_node_id); ++ if (dd) { ++ ret = find_free_ctxt(dd->unit, fp, uinfo); ++ if (!ret) ++ ret = qib_knx_alloc_ctxt( ++ uinfo->spu_knx_node_id, ++ ctxt_fp(fp)->ctxt); ++ } else ++ ret = -ENXIO; ++ goto done_chk_sdma; ++ } ++ ++#endif + i_minor = iminor(file_inode(fp)) - QIB_USER_MINOR_BASE; + if (i_minor) + ret = find_free_ctxt(i_minor - 1, fp, uinfo); +@@ -1648,7 +1831,6 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo) const unsigned int cpu = cpumask_first(¤t->cpus_allowed); const unsigned int weight = cpumask_weight(¤t->cpus_allowed); +- + if (weight == 1 && !test_bit(cpu, qib_cpulist)) + if (!find_hca(cpu, &unit) && unit >= 0) + if (!find_free_ctxt(unit, fp, uinfo)) { +@@ -1659,9 +1841,21 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo) + } + + done_chk_sdma: +- if (!ret) ++ if (!ret) { +#ifdef QIB_CONFIG_KNX -+ /* -+ * If there is a KNX node set, we pick the device that is on -+ * the same NUMA node as the KNX. -+ */ + if (uinfo->spu_knx_node_id) { -+ struct qib_devdata *dd = -+ qib_knx_node_to_dd(uinfo->spu_knx_node_id); -+ if (dd) { -+ ret = find_free_ctxt(dd->unit, fp, uinfo); -+ if (!ret) -+ ret = qib_knx_alloc_ctxt(dd, -+ ctxt_fp(fp)->ctxt); -+ } else -+ ret = -ENXIO; -+ goto done_chk_sdma; ++ ret = qib_knx_sdma_queue_create(fp); ++ /*if (!ret) ++ ret = qib_knx_setup_tidrcv(fp);*/ ++ goto done_ok; + } +#endif - - if (weight == 1 && !test_bit(cpu, qib_cpulist)) - if (!find_hca(cpu, &unit) && unit >= 0) -@@ -1652,6 +1918,9 @@ done_chk_sdma: - if (!ret) ret = do_qib_user_sdma_queue_create(fp); ++ } done_ok: +#ifdef QIB_CONFIG_KNX + knx_node_fp(fp) = uinfo->spu_knx_node_id; @@ -566,7 +503,7 @@ index 275f247..6eebad0 100644 mutex_unlock(&qib_mutex); done: -@@ -1666,11 +1935,25 @@ static int qib_do_user_init(struct file *fp, +@@ -1676,11 +1870,25 @@ static int qib_do_user_init(struct file *fp, struct qib_ctxtdata *rcd = ctxt_fp(fp); struct qib_devdata *dd; unsigned uctxt; @@ -592,7 +529,7 @@ index 275f247..6eebad0 100644 goto bail; } -@@ -1721,6 +2004,41 @@ static int qib_do_user_init(struct file *fp, +@@ -1731,6 +1939,41 @@ static int qib_do_user_init(struct file *fp, */ dd->f_sendctrl(dd->pport, QIB_SENDCTRL_AVAIL_BLIP); @@ -634,23 +571,31 @@ index 275f247..6eebad0 100644 /* * Now allocate the rcvhdr Q and eager TIDs; skip the TID * array for time being. If rcd->ctxt > chip-supported, -@@ -1730,6 +2048,7 @@ static int qib_do_user_init(struct file *fp, +@@ -1740,6 +1983,9 @@ static int qib_do_user_init(struct file *fp, ret = qib_create_rcvhdrq(dd, rcd); if (!ret) ret = qib_setup_eagerbufs(rcd); ++#ifdef QIB_CONFIG_KNX +cont_init: ++#endif if (ret) goto bail_pio; -@@ -1752,7 +2071,6 @@ static int qib_do_user_init(struct file *fp, - */ - if (rcd->rcvhdrtail_kvaddr) - qib_clear_rcvhdrtail(rcd); -- - dd->f_rcvctrl(rcd->ppd, QIB_RCVCTRL_CTXT_ENB | QIB_RCVCTRL_TIDFLOW_ENB, - rcd->ctxt); +@@ -1837,6 +2083,13 @@ static int qib_close(struct inode *in, struct file *fp) -@@ -1884,6 +2202,12 @@ static int qib_close(struct inode *in, struct file *fp) + /* drain user sdma queue */ + if (fd->pq) { ++#ifdef QIB_CONFIG_KNX ++ /* ++ * The thread should be stopped first before attempting ++ * to clean the queue. ++ */ ++ qib_knx_sdma_queue_destroy(fd); ++#endif + qib_user_sdma_queue_drain(rcd->ppd, fd->pq); + qib_user_sdma_queue_destroy(fd->pq); + } +@@ -1894,6 +2147,12 @@ static int qib_close(struct inode *in, struct file *fp) } mutex_unlock(&qib_mutex); @@ -663,7 +608,7 @@ index 275f247..6eebad0 100644 qib_free_ctxtdata(dd, rcd); /* after releasing the mutex */ bail: -@@ -2169,15 +2493,22 @@ static ssize_t qib_write(struct file *fp, const char __user *data, +@@ -2179,6 +2438,13 @@ static ssize_t qib_write(struct file *fp, const char __user *data, ret = qib_assign_ctxt(fp, &cmd.cmd.user_info); if (ret) goto bail; @@ -677,22 +622,8 @@ index 275f247..6eebad0 100644 break; case QIB_CMD_USER_INIT: - ret = qib_do_user_init(fp, &cmd.cmd.user_info); -- if (ret) -- goto bail; -- ret = qib_get_base_info(fp, (void __user *) (unsigned long) -- cmd.cmd.user_info.spu_base_info, -- cmd.cmd.user_info.spu_base_info_size); -+ if (!ret) -+ ret = qib_get_base_info( -+ fp, (void __user *) (unsigned long) -+ cmd.cmd.user_info.spu_base_info, -+ cmd.cmd.user_info.spu_base_info_size); - break; - - case QIB_CMD_RECV_CTRL: diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c -index 24e802f..84b3222 100644 +index 17e0831..f8992f7 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -51,6 +51,10 @@ @@ -706,72 +637,7 @@ index 24e802f..84b3222 100644 #undef pr_fmt #define pr_fmt(fmt) QIB_DRV_NAME ": " fmt -@@ -64,6 +68,14 @@ - #define QLOGIC_IB_R_EMULATOR_MASK (1ULL<<62) - - /* -+ * Select the NUMA node id on which to allocate the receive header -+ * queue, eager buffers and send pioavail register. -+ */ -+int qib_numa_node; -+module_param_named(numa_node, qib_numa_node, int, S_IRUGO); -+MODULE_PARM_DESC(numa_node, "NUMA node on which memory is allocated"); -+ -+/* - * Number of ctxts we are configured to use (to allow for more pio - * buffers per ctxt, etc.) Zero means use chip value. - */ -@@ -71,11 +83,6 @@ ushort qib_cfgctxts; - module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO); - MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use"); - --unsigned qib_numa_aware; --module_param_named(numa_aware, qib_numa_aware, uint, S_IRUGO); --MODULE_PARM_DESC(numa_aware, -- "0 -> PSM allocation close to HCA, 1 -> PSM allocation local to process"); -- - /* - * If set, do not write to any regs if avoidable, hack to allow - * check for deranged default register values. -@@ -84,6 +91,12 @@ ushort qib_mini_init; - module_param_named(mini_init, qib_mini_init, ushort, S_IRUGO); - MODULE_PARM_DESC(mini_init, "If set, do minimal diag init"); - -+unsigned qib_numa_aware = QIB_DRIVER_AUTO_CONFIGURATION; -+module_param_named(numa_aware, qib_numa_aware, uint, S_IRUGO); -+MODULE_PARM_DESC(numa_aware, "Use NUMA aware allocations: " -+ "0=disabled, 1=enabled, " -+ "10=option 0 for AMD & <= Intel Westmere cpus and option 1 for newer cpus(default)"); -+ - unsigned qib_n_krcv_queues; - module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO); - MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port"); -@@ -1095,6 +1108,24 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) - unsigned long flags; - struct qib_devdata *dd; - int ret; -+ int node_id; -+ int local_node_id = pcibus_to_node(dd->pcidev->bus); -+ s64 new_node_id = qib_numa_node; -+ -+ if (local_node_id < 0) -+ local_node_id = numa_node_id(); -+ -+ if (new_node_id < 0) -+ new_node_id = local_node_id; -+ -+ new_node_id = node_online(new_node_id) ? new_node_id : -+ local_node_id; -+ -+ dd->local_node_id = local_node_id; -+ dd->assigned_node_id = new_node_id; -+ -+ node_id = qib_numa_aware ? dd->local_node_id : -+ dd->assigned_node_id; - - dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra); - if (!dd) { -@@ -1263,6 +1294,13 @@ static int __init qlogic_ib_init(void) +@@ -1270,6 +1274,12 @@ static int __init qlogic_ib_init(void) /* not fatal if it doesn't work */ if (qib_init_qibfs()) pr_err("Unable to register ipathfs\n"); @@ -779,47 +645,39 @@ index 24e802f..84b3222 100644 +#ifdef QIB_CONFIG_KNX + ret = qib_knx_server_init(); + if (ret < 0) -+ pr_err("Unable to start KNX listen thread\n"); ++ pr_err(": Unable to start KNX listen thread\n"); +#endif -+ goto bail; /* all OK */ bail_dev: -@@ -1287,6 +1325,10 @@ static void __exit qlogic_ib_cleanup(void) +@@ -1294,6 +1304,9 @@ static void __exit qlogic_ib_cleanup(void) { int ret; +#ifdef QIB_CONFIG_KNX + qib_knx_server_exit(); +#endif -+ ret = qib_exit_qibfs(); if (ret) pr_err( -@@ -1754,6 +1796,15 @@ int init_chip_wc_pat(struct qib_devdata *dd, u32 vl15buflen) - iounmap(dd->kregbase); - dd->kregbase = NULL; +@@ -1546,6 +1559,9 @@ static void __devexit qib_remove_one(struct pci_dev *pdev) + /* unregister from IB core */ + qib_unregister_ib_device(dd); -+ if (qib_numa_aware == QIB_DRIVER_AUTO_CONFIGURATION) -+ qib_numa_aware = qib_configure_numa(boot_cpu_data) ? 1 : 0; -+ -+ if (qib_rcvhdrpoll == QIB_DRIVER_AUTO_CONFIGURATION) -+ qib_rcvhdrpoll = qib_configure_numa(boot_cpu_data) ? 0 : 1; -+ -+ if (qib_pio_avail_bits == QIB_DRIVER_AUTO_CONFIGURATION) -+ qib_pio_avail_bits = qib_configure_numa(boot_cpu_data) ? 0 : 1; -+ ++#ifdef QIB_CONFIG_KNX ++ qib_knx_remove_device(dd); ++#endif /* - * Assumes chip address space looks like: - * - kregs + sregs + cregs + uregs (in any order) + * Disable the IB link, disable interrupts on the device, + * clear dma engines, etc. diff --git a/drivers/infiniband/hw/qib/qib_knx.c b/drivers/infiniband/hw/qib/qib_knx.c new file mode 100644 -index 0000000..c15276f +index 0000000..5a9bdaa --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_knx.c -@@ -0,0 +1,923 @@ +@@ -0,0 +1,1532 @@ +/* -+ * Copyright (c) 2012 Intel Corporation. All rights reserved. ++ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU @@ -849,6 +707,7 @@ index 0000000..c15276f + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ ++#include +#include +#include +#include @@ -857,12 +716,21 @@ index 0000000..c15276f + +#include "qib.h" +#include "qib_knx.h" ++#include "qib_user_sdma.h" ++#include "qib_knx_common.h" + +unsigned int qib_knx_nconns = 5; +module_param_named(num_conns, qib_knx_nconns, uint, S_IRUGO); +MODULE_PARM_DESC(num_conns, "Max number of pending connections"); + +#define QIB_KNX_SCIF_PORT SCIF_OFED_PORT_9 ++#define CLIENT_THREAD_NAME(x) "qib/mic" __stringify(x) ++ ++#define knx_sdma_next(sdma) \ ++ (sdma->head = ((sdma->head + 1) % sdma->desc_num)) ++#define per_ctxt(ctxt, sub) ((ctxt * QLOGIC_IB_MAX_SUBCTXT) + sub) ++#define QIB_KNX_SDMA_STATUS(sdma, st) \ ++ QIB_KNX_SDMA_SET(sdma->mflags->status, ((u64)st << 32) | 1) + +struct qib_knx_server { + struct task_struct *kthread; @@ -902,7 +770,16 @@ index 0000000..c15276f + struct scif_range *pages; +}; + ++struct qib_knx_tidrcv { ++ struct qib_knx_rma tidmem; ++ u64 tidbase; ++ u32 tidcnt; ++}; ++ +struct qib_knx_ctxt { ++ u16 ctxt; ++ struct qib_knx *knx; ++ struct qib_pportdata *ppd; + /* local registered memory for PIO buffers */ + struct qib_knx_rma piobufs[QLOGIC_IB_MAX_SUBCTXT]; + /* local registered memory for user registers */ @@ -924,6 +801,23 @@ index 0000000..c15276f + __u64 status; + __u64 piobufbase[QLOGIC_IB_MAX_SUBCTXT]; + __u32 runtime_flags; ++ ++ struct qib_user_sdma_queue *pq[QLOGIC_IB_MAX_SUBCTXT]; ++}; ++ ++struct qib_knx_sdma { ++ /* KNX flags page */ ++ struct scif_range *mflag_pages; ++ struct qib_knx_sdma_mflags *mflags; ++ /* KNX descriptor queue */ ++ struct scif_range *queue_pages; ++ struct qib_knx_sdma_desc *queue; ++ u32 desc_num; ++ /* host flags (in host memory) */ ++ struct qib_knx_rma hflags_mem; ++ struct qib_knx_sdma_hflags *hflags; ++ u32 head; /* shadow */ ++ u32 complete; +}; + +struct qib_knx { @@ -934,10 +828,16 @@ index 0000000..c15276f + int numa_node; + struct qib_devdata *dd; + struct qib_knx_ctxt **ctxts; ++ spinlock_t ctxt_lock; ++ resource_size_t bar; ++ u64 barlen; ++ struct qib_knx_sdma *sdma; ++ struct task_struct *sdma_poll; ++ atomic_t tref; ++ char tname[64]; ++ struct qib_knx_rma tidmem; +}; + -+#define CLIENT_THREAD_NAME(x) "qib/mic" __stringify(x) -+ +static struct qib_knx_server *server; + +static int qib_knx_init(struct qib_knx_server *); @@ -947,19 +847,20 @@ index 0000000..c15276f + void *, size_t, int, const char *); +static int qib_knx_unregister_memory(struct qib_knx *, struct qib_knx_rma *, + const char *); ++static __always_inline void qib_knx_memcpy(void *, void __iomem *, size_t); +static ssize_t qib_show_knx_node(struct device *, struct device_attribute *, + char *); -+ -+static DEVICE_ATTR(knx_node, S_IRUGO, qib_show_knx_node, NULL); -+static ssize_t qib_show_knx_node(struct device *dev, -+ struct device_attribute *attr, char *buf) -+{ -+ struct qib_ibdev *ibdev = -+ container_of(dev, struct qib_ibdev, ibdev.dev); -+ struct qib_devdata *dd = dd_from_dev(ibdev); -+ -+ return scnprintf(buf, PAGE_SIZE, "%u\n", dd->knx->peer.node); -+} ++static int qib_knx_sdma_init(struct qib_knx *); ++static void qib_knx_sdma_teardown(struct qib_knx *); ++static __always_inline struct page * ++qib_knx_phys_to_page(struct qib_knx *, unsigned long); ++static int qib_knx_sdma_pkts_to_descs(struct qib_knx_ctxt *, ++ struct qib_knx_sdma_desc *, ++ struct qib_user_sdma_queue *, ++ int *, struct list_head *); ++static int qib_knx_sdma_poll(void *); ++static int qib_knx_tidrcv_init(struct qib_knx *); ++static int qib_knx_tidrcv_teardown(struct qib_knx *); + +inline struct qib_knx *qib_knx_get(u16 nodeid) +{ @@ -982,10 +883,11 @@ index 0000000..c15276f + +static int qib_knx_init(struct qib_knx_server *server) +{ -+ int ret = 0, num_devs = 0, i; -+ struct qib_devdata *dd; ++ int ret = 0, num_devs = 0, i, seen = 0; ++ unsigned fewest = -1U; ++ struct qib_devdata *dd = NULL, *dd_no_numa = NULL; + struct qib_knx *knx; -+ struct ib_device *ibdev; ++ struct qib_device_info info = { -1 }; + + knx = kzalloc(sizeof(*knx), GFP_KERNEL); + if (!knx) { @@ -999,10 +901,14 @@ index 0000000..c15276f + } + + INIT_LIST_HEAD(&knx->list); ++ spin_lock_init(&knx->ctxt_lock); + knx->numa_node = -1; + ret = scif_pci_info(knx->peer.node, &knx->pci_info); -+ if (!ret) ++ if (!ret) { + knx->numa_node = pcibus_to_node(knx->pci_info.pdev->bus); ++ knx->bar = pci_resource_start(knx->pci_info.pdev, 0); ++ knx->barlen = pci_resource_len(knx->pci_info.pdev, 0); ++ } + + if (knx->numa_node < 0) + knx->numa_node = numa_node_id(); @@ -1010,40 +916,58 @@ index 0000000..c15276f + num_devs = qib_count_units(NULL, NULL); + if (unlikely(!num_devs)) { + ret = -ENODEV; ++ /* we have to send this */ ++ scif_send(knx->epd.epd, &info, sizeof(info), ++ SCIF_SEND_BLOCK); + goto done; + } + -+ for (i = 0; i < num_devs; i++) { ++ /* ++ * Attempt to find an HCA on the same NUMA node as the card. Save ++ * the first HCA that hasn't been associated with a card in case ++ * there is no HCA on the same NUMA node. ++ */ ++ for (i = 0; seen < num_devs; i++) { + dd = qib_lookup(i); -+ if (dd && dd->local_node_id == knx->numa_node) -+ knx->dd = dd; ++ if (dd) { ++ if (dd->assigned_node_id == knx->numa_node) { ++ knx->dd = dd; ++ break; ++ } else if (dd->num_knx < fewest) ++ dd_no_numa = dd; ++ seen++; ++ } + } + /* + * We didn't find a QIB device on the same NUMA node, -+ * round-robin across all devices. ++ * use the "backup". + */ + if (unlikely(!knx->dd)) { -+ knx->dd = qib_lookup(server->nclients % num_devs); -+ /* it is possible for qib_lookup to return NULL */ -+ if (unlikely(!knx->dd)) { ++ if (!dd_no_numa) { + ret = -ENODEV; ++ /* we have to send this */ ++ scif_send(knx->epd.epd, &info, sizeof(info), ++ SCIF_SEND_BLOCK); + goto done; + } ++ knx->dd = dd_no_numa; + } -+ knx->dd->node_id = knx->peer.node; -+ knx->dd->knx = knx; ++ knx->dd->num_knx++; ++ + knx->ctxts = kzalloc_node(knx->dd->ctxtcnt * sizeof(*knx->ctxts), + GFP_KERNEL, knx->numa_node); + if (!knx->ctxts) + ret = -ENOMEM; -+ ibdev = &knx->dd->verbs_dev.ibdev; -+ ret = device_create_file(&ibdev->dev, &dev_attr_knx_node); ++ /* Give the KNX the associated device information. */ ++ info.unit = knx->dd->unit; ++ ret = scif_send(knx->epd.epd, &info, sizeof(info), ++ SCIF_SEND_BLOCK); ++ ++ ret = qib_knx_sdma_init(knx); + if (ret) -+ /* -+ * clear the error code since we don't want to fail the -+ * initialization. -+ */ -+ ret = 0; ++ goto done; ++ atomic_set(&knx->tref, 0); ++ ret = qib_knx_tidrcv_init(knx); +done: + spin_lock(&server->client_lock); + list_add_tail(&knx->list, &server->clients); @@ -1057,13 +981,12 @@ index 0000000..c15276f +static void qib_knx_free(struct qib_knx *knx, int unload) +{ + struct qib_devdata *dd = knx->dd; -+ struct ib_device *ibdev; + int i; + -+ if (dd) { -+ ibdev = &dd->verbs_dev.ibdev; -+ device_remove_file(&ibdev->dev, &dev_attr_knx_node); -+ } ++ qib_knx_tidrcv_teardown(knx); ++ qib_knx_sdma_teardown(knx); ++ if (dd) ++ dd->num_knx--; + /* + * If this function is called with unload set, we can + * free the context data. Otherwise, we are here @@ -1180,9 +1103,16 @@ index 0000000..c15276f + return ret; +} + -+int qib_knx_alloc_ctxt(struct qib_devdata *dd, unsigned ctxt) ++static __always_inline void qib_knx_memcpy(void *dst, void __iomem *src, ++ size_t size) +{ -+ struct qib_knx *knx = dd_to_knx(dd); ++ memcpy_fromio(dst, src, size); ++} ++ ++int qib_knx_alloc_ctxt(u16 node_id, unsigned ctxt) ++{ ++ struct qib_knx *knx = qib_knx_get(node_id); ++ struct qib_devdata *dd = knx->dd; + struct qib_knx_ctxt *ptr; + int ret = 0; + @@ -1199,7 +1129,14 @@ index 0000000..c15276f + ret = -ENOMEM; + goto bail; + } ++ ptr->knx = knx; ++ ptr->ctxt = ctxt; ++ ptr->ppd = dd->rcd[ctxt]->ppd; ++ ++ spin_lock(&knx->ctxt_lock); + knx->ctxts[ctxt] = ptr; ++ dd->rcd[ctxt]->krcd = ptr; ++ spin_unlock(&knx->ctxt_lock); +bail: + return ret; +} @@ -1208,10 +1145,11 @@ index 0000000..c15276f + enum qib_knx_ctxtinfo_type type, + struct file *fp) +{ -+ struct qib_knx *knx = dd_to_knx(rcd->dd); ++ struct qib_knx *knx = rcd->krcd->knx; + __u16 subctxt; + __u64 ret = 0; + ++ spin_lock(&knx->ctxt_lock); + if (!knx || !knx->ctxts || !knx->ctxts[rcd->ctxt]) + goto done; + @@ -1234,6 +1172,7 @@ index 0000000..c15276f + break; + } +done: ++ spin_unlock(&knx->ctxt_lock); + return ret; +} + @@ -1244,7 +1183,7 @@ index 0000000..c15276f + char buf[16]; + off_t offset; + int ret = 0; -+ struct qib_knx *knx = dd_to_knx(dd); ++ struct qib_knx *knx = rcd->krcd->knx; + + if (unlikely(!knx)) { + ret = -ENODEV; @@ -1292,7 +1231,7 @@ index 0000000..c15276f +{ + int ret = 0; + off_t offset; -+ struct qib_knx *knx = dd_to_knx(dd); ++ struct qib_knx *knx = rcd->krcd->knx; + + if (unlikely(!knx)) { + ret = -ENODEV; @@ -1353,7 +1292,7 @@ index 0000000..c15276f +{ + struct qib_knx_mem_map_sg *mapsg; + struct qib_knx_mem_map *map; -+ struct qib_knx *knx = dd_to_knx(dd); ++ struct qib_knx *knx = rcd->krcd->knx; + dma_addr_t offset; + struct scatterlist *sg; + unsigned num_pages; @@ -1410,7 +1349,8 @@ index 0000000..c15276f + * can use 64bit addresses for DMA but the CPU might not. + * (see pci_set_dma_mask() in qib_pcie.c). + */ -+ mapsg->sglist = kzalloc(num_pages * sizeof(*mapsg->sglist), GFP_KERNEL); ++ mapsg->sglist = kzalloc_node(num_pages * sizeof(*mapsg->sglist), ++ GFP_KERNEL, knx->numa_node); + if (!mapsg->sglist) { + ret = -ENOMEM; + goto bail_rcvq_pages; @@ -1426,7 +1366,7 @@ index 0000000..c15276f + } + /* + * pci_map_sg() will remap all 128 pages of the -+ * scatterlist seperately (without coalescing them). ++ * scatterlist separately (without coalescing them). + * However, since the buffer is contiguous, as long + * as the base address is mapped correctly, everything + * should work. In any case, check that the mapped @@ -1520,7 +1460,7 @@ index 0000000..c15276f + struct qib_knx_mem_map_sg *map; + struct scatterlist *sg; + struct qib_devdata *dd = rcd->dd; -+ struct qib_knx *knx = dd_to_knx(dd); ++ struct qib_knx *knx = rcd->krcd->knx; + unsigned size, egrsize, egrcnt, num_pages, bufs_ppage, + egrbufcnt; + dma_addr_t dma_addr, page; @@ -1598,7 +1538,8 @@ index 0000000..c15276f + + map->size = size; + map->dir = DMA_BIDIRECTIONAL; -+ map->sglist = kzalloc(num_pages * sizeof(*map->sglist), GFP_KERNEL); ++ map->sglist = kzalloc_node(num_pages * sizeof(*map->sglist), GFP_KERNEL, ++ knx->numa_node); + if (!map->sglist) { + ret = -ENOMEM; + goto bail_free_rcvegr_phys; @@ -1619,10 +1560,10 @@ index 0000000..c15276f + rcd->rcvegrbuf[i] = map->pages->va[i]; + } + -+ for (egrbufcnt = i = 0; i < num_pages ; i++) { ++ for (egrbufcnt = i = 0; i < num_pages; i++) { + page = rcd->rcvegrbuf_phys[i]; + dma_addr = page; -+ for (bufcnt = 0 ; egrbufcnt < egrcnt && bufcnt < bufs_ppage; ++ for (bufcnt = 0; egrbufcnt < egrcnt && bufcnt < bufs_ppage; + egrbufcnt++, bufcnt++) { + dd->f_put_tid(dd, rcd->rcvegr_tid_base + + egrbufcnt + @@ -1650,7 +1591,7 @@ index 0000000..c15276f + +void qib_knx_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd) +{ -+ struct qib_knx *knx = dd_to_knx(dd); ++ struct qib_knx *knx = rcd->krcd->knx; + struct qib_knx_ctxt *ctxt; + char buf[16]; + int i, ret = 0; @@ -1658,7 +1599,11 @@ index 0000000..c15276f + if (!rcd || !knx || !knx->ctxts) + return; + ++ spin_lock(&knx->ctxt_lock); + ctxt = knx->ctxts[rcd->ctxt]; ++ knx->ctxts[rcd->ctxt] = NULL; ++ spin_unlock(&knx->ctxt_lock); ++ + if (!ctxt) + return; + @@ -1704,12 +1649,535 @@ index 0000000..c15276f + qib_knx_unregister_memory(knx, &ctxt->piobufs[i], buf); + } + -+ /* MITKO XXX: handle rcd->tid_pg_list */ -+ knx->ctxts[rcd->ctxt] = NULL; + kfree(ctxt); + kfree(rcd); +} + ++/* ++ * TID management for processes on the MIC happens on the MIC. Therefore, ++ * we only register the HW TID array here. ++ * The MIC will calculate TID array offsets using the same algorithm is ++ * the host. Therefore, it is OK that the entire HW TID array is mapped ++ * since neither side should step on the other. ++ */ ++static int qib_knx_tidrcv_init(struct qib_knx *knx) ++{ ++ struct qib_devdata *dd = knx->dd; ++ struct qib_knx_tid_info info; ++ void *tidbase; ++ int ret = 0; ++ off_t offset = 0; ++ size_t len; ++ char buf[64]; ++ ++ memset(&info, 0, sizeof(info)); ++ ++ info.tidcnt = dd->rcvtidcnt; ++ tidbase = ((char *)dd->kregbase + dd->rcvtidbase); ++ info.tidbase_len = dd->ctxtcnt * dd->rcvtidcnt * sizeof(tidbase); ++ info.tidtemplate = dd->tidtemplate; ++ info.invalidtid = dd->tidinvalid; ++ /* information needed to properly calculate DMA address to MIC pages */ ++ info.bar_addr = knx->bar; ++ info.bar_len = knx->barlen; ++ ++ snprintf(buf, sizeof(buf), "TID array KNx%u", knx->peer.node); ++ offset = qib_knx_register_memory(knx, &knx->tidmem, tidbase, ++ info.tidbase_len, SCIF_PROT_WRITE, ++ buf); ++ info.tidbase_offset = offset; ++ if (IS_ERR_VALUE(offset)) ++ ret = offset; ++ len = scif_send(knx->epd.epd, &info, sizeof(info), ++ SCIF_SEND_BLOCK); ++ if (len < sizeof(info)) ++ ret = -EFAULT; ++ return ret; ++} ++ ++static int qib_knx_tidrcv_teardown(struct qib_knx *knx) ++{ ++ char buf[64]; ++ snprintf(buf, sizeof(buf), "TID array KNx%u", knx->peer.node); ++ return qib_knx_unregister_memory(knx, &knx->tidmem, buf); ++} ++ ++static int qib_knx_sdma_init(struct qib_knx *knx) ++{ ++ struct qib_knx_host_mem flags; ++ struct qib_knx_knc_mem mflags; ++ struct qib_knx_sdma *sdma; ++ char buf[64]; ++ int ret = 0; ++ ++ sdma = kzalloc_node(sizeof(*sdma), GFP_KERNEL, knx->numa_node); ++ if (!sdma) { ++ ret = -ENOMEM; ++ goto done; ++ } ++ sdma->hflags = kzalloc_node(PAGE_SIZE, GFP_KERNEL, knx->numa_node); ++ if (!sdma->hflags) { ++ ret = -ENOMEM; ++ goto done_free; ++ } ++ snprintf(buf, sizeof(buf), "Host SDMA flags KNx%u", knx->peer.node); ++ flags.flags_offset = qib_knx_register_memory(knx, &sdma->hflags_mem, ++ sdma->hflags, ++ PAGE_SIZE, ++ SCIF_PROT_WRITE, ++ buf); ++ if (IS_ERR_VALUE(flags.flags_offset)) { ++ ret = flags.flags_offset; ++ goto free_flags; ++ } ++ sdma->desc_num = knx->dd->pport[0].sdma_descq_cnt; ++ flags.desc_num = sdma->desc_num; ++ ret = scif_send(knx->epd.epd, &flags, sizeof(flags), ++ SCIF_SEND_BLOCK); ++ if (ret < sizeof(flags)) ++ goto unregister; ++ ret = scif_recv(knx->epd.epd, &mflags, sizeof(mflags), ++ SCIF_RECV_BLOCK); ++ if (ret < sizeof(mflags)) { ++ ret = -EINVAL; ++ goto unregister; ++ } ++ ret = scif_get_pages(knx->epd.epd, mflags.flags_offset, ++ PAGE_SIZE, &sdma->mflag_pages); ++ if (ret < 0 || !sdma->mflag_pages->nr_pages) { ++ ret = -EFAULT; ++ goto unregister; ++ } ++ sdma->mflags = sdma->mflag_pages->va[0]; ++ ret = scif_get_pages(knx->epd.epd, mflags.queue_offset, ++ mflags.queue_len, &sdma->queue_pages); ++ if (ret < 0) ++ goto put_flags; ++ if ((sdma->queue_pages->nr_pages * PAGE_SIZE) != ++ mflags.queue_len) { ++ ret = -EFAULT; ++ goto put_queue; ++ } ++ sdma->queue = sdma->queue_pages->va[0]; ++ sdma->complete = -1; ++ sdma->head = -1; ++ /* set the initial trigger value */ ++ QIB_KNX_SDMA_SET(sdma->hflags->trigger, -1); ++ QIB_KNX_SDMA_SET(sdma->mflags->complete, sdma->complete); ++ snprintf(knx->tname, sizeof(knx->tname), "qib/mic%u/poll", ++ knx->peer.node); ++ knx->sdma = sdma; ++ ret = 0; ++ goto done; ++put_queue: ++ scif_put_pages(sdma->queue_pages); ++put_flags: ++ scif_put_pages(sdma->mflag_pages); ++unregister: ++ qib_knx_unregister_memory(knx, &sdma->hflags_mem, buf); ++free_flags: ++ kfree(sdma->hflags); ++done_free: ++ kfree(sdma); ++done: ++ /* ++ * we have to respond to the MIC so it doesn't get stuck ++ * in the scif_recv call ++ */ ++ scif_send(knx->epd.epd, &ret, sizeof(ret), SCIF_SEND_BLOCK); ++ return ret; ++} ++ ++static void qib_knx_sdma_teardown(struct qib_knx *knx) ++{ ++ int ret; ++ if (knx->sdma_poll) ++ ret = kthread_stop(knx->sdma_poll); ++ if (knx->sdma) { ++ if (knx->sdma->queue_pages->nr_pages) { ++ knx->sdma->queue = NULL; ++ scif_put_pages(knx->sdma->queue_pages); ++ } ++ if (knx->sdma->mflag_pages->nr_pages) { ++ knx->sdma->mflags = NULL; ++ scif_put_pages(knx->sdma->mflag_pages); ++ } ++ kfree(knx->sdma->hflags); ++ kfree(knx->sdma); ++ knx->sdma = NULL; ++ } ++} ++ ++int qib_knx_sdma_queue_create(struct file *fd) ++{ ++ struct qib_ctxtdata *rcd = ctxt_fp(fd); ++ struct qib_devdata *dd = rcd->dd; ++ struct qib_knx *knx = rcd->krcd->knx; ++ struct qib_knx_ctxt *ctxt = knx->ctxts[rcd->ctxt]; ++ u8 subctxt = subctxt_fp(fd); ++ int ret = 0; ++ ++ if (!ctxt) { ++ ret = -EINVAL; ++ goto done; ++ } ++ ctxt->pq[subctxt] = qib_user_sdma_queue_create(&dd->pcidev->dev, ++ dd->unit, rcd->ctxt, ++ subctxt); ++ if (!ctxt->pq[subctxt]) ++ ret = -ENOMEM; ++ user_sdma_queue_fp(fd) = ctxt->pq[subctxt]; ++ /* ++ * We start the polling thread the first time a user SDMA ++ * queue is created. There is no reason to take up CPU ++ * cycles before then. ++ */ ++ if (atomic_inc_return(&knx->tref) == 1) { ++ knx->sdma_poll = kthread_run(qib_knx_sdma_poll, knx, ++ knx->tname); ++ if (IS_ERR(knx->sdma_poll)) { ++ ret = -PTR_ERR(knx->sdma_poll); ++ atomic_dec(&knx->tref); ++ goto free_queue; ++ } ++ } ++ goto done; ++free_queue: ++ user_sdma_queue_fp(fd) = NULL; ++ qib_user_sdma_queue_destroy(ctxt->pq[subctxt]); ++ ctxt->pq[subctxt] = NULL; ++done: ++ return ret; ++} ++ ++void qib_knx_sdma_queue_destroy(struct qib_filedata *fd) ++{ ++ struct qib_ctxtdata *rcd = fd->rcd; ++ struct qib_knx *knx; ++ unsigned ctxt = rcd->ctxt, subctxt = fd->subctxt; ++ ++ /* Host processes do not have a KNX rcd pointer. */ ++ if (!rcd->krcd) ++ return; ++ knx = rcd->krcd->knx; ++ /* We still have the memory pointer through fd->pq */ ++ spin_lock(&knx->ctxt_lock); ++ if (knx->ctxts[ctxt]) ++ knx->ctxts[ctxt]->pq[subctxt] = NULL; ++ spin_unlock(&knx->ctxt_lock); ++ if (atomic_dec_and_test(&knx->tref)) { ++ int ret = kthread_stop(knx->sdma_poll); ++ knx->sdma_poll = NULL; ++ } ++} ++ ++/* ++ * Convert a MIC physical address to the corresponding host page. ++ */ ++static __always_inline struct page * ++qib_knx_phys_to_page(struct qib_knx *knx, unsigned long addr) { ++ unsigned long paddr; ++ if ((knx->bar + addr + PAGE_SIZE) > ++ (knx->bar + knx->barlen)) ++ return NULL; ++ paddr = knx->bar + addr; ++ return pfn_to_page(paddr >> PAGE_SHIFT); ++} ++ ++static int qib_knx_sdma_pkts_to_descs(struct qib_knx_ctxt *ctxt, ++ struct qib_knx_sdma_desc *desc, ++ struct qib_user_sdma_queue *pq, ++ int *ndesc, struct list_head *list) ++{ ++ struct qib_knx *knx = ctxt->knx; ++ struct qib_user_sdma_pkt *pkt; ++ dma_addr_t pbc_dma_addr; ++ unsigned pktnw, pbcnw; ++ u32 counter; ++ u16 frag_size; ++ int ret = 0; ++ __le32 *pbc; ++ ++ counter = pq->counter; ++ ++ pbc = qib_user_sdma_alloc_header(pq, desc->pbclen, &pbc_dma_addr); ++ if (!pbc) { ++ ret = -ENOMEM; ++ goto done; ++ } ++ memcpy(pbc, desc->pbc, desc->pbclen); ++ ++ pktnw = (le32_to_cpu(*pbc) & 0xFFFF); ++ /* ++ * This assignment is a bit strange. it's because the ++ * the pbc counts the number of 32 bit words in the full ++ * packet _except_ the first word of the pbc itself... ++ */ ++ pbcnw = (desc->pbclen >> 2) - 1; ++ ++ if (pktnw < pbcnw) { ++ ret = -EINVAL; ++ goto free_pbc; ++ } ++ ++ if (pktnw != ((desc->length >> 2) + pbcnw)) { ++ ret = -EINVAL; ++ goto free_pbc; ++ } ++ ++ frag_size = (le32_to_cpu(*pbc)>>16) & 0xFFFF; ++ if (((frag_size ? frag_size : desc->length) + desc->pbclen) > ++ ctxt->ppd->ibmaxlen) { ++ ret = -EINVAL; ++ goto free_pbc; ++ } ++ if (frag_size) { ++ /* new SDMA "protocol" */ ++ unsigned pktsize, n; ++ ++ n = desc->npages * ((2 * PAGE_SIZE / frag_size) + 1); ++ pktsize = sizeof(*pkt) + sizeof(pkt->addr[0]) * n; ++ ++ pkt = kzalloc(pktsize + desc->tidlen, GFP_KERNEL); ++ if (!pkt) { ++ ret = -ENOMEM; ++ goto free_pbc; ++ } ++ pkt->largepkt = 1; ++ pkt->frag_size = frag_size; ++ pkt->addrlimit = n + ARRAY_SIZE(pkt->addr); ++ ++ if (desc->tidlen) { ++ char *tidsmptr = (char *)pkt + pktsize; ++ memcpy(tidsmptr, desc->tidsm, desc->tidlen); ++ pkt->tidsm = ++ (struct qib_tid_session_member *)tidsmptr; ++ pkt->tidsmcount = desc->tidlen / ++ sizeof(*desc->tidsm); ++ pkt->tidsmidx = 0; ++ } ++ *pbc = cpu_to_le32(le32_to_cpu(*pbc) & 0x0000FFFF); ++ } else { ++ /* old SDMA */ ++ pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL); ++ if (!pkt) { ++ ret = -ENOMEM; ++ goto free_pbc; ++ } ++ pkt->largepkt = 0; ++ pkt->frag_size = desc->length; ++ pkt->addrlimit = ARRAY_SIZE(pkt->addr); ++ } ++ pkt->bytes_togo = desc->length; ++ pkt->payload_size = 0; ++ pkt->counter = counter; ++ pkt->tiddma = !!desc->tidlen; ++ /* ++ * The generic user SDMA code will use this as a flag to ++ * decide whether to call the KNx-specific pkt free ++ * function. However, it doesn't know what the value ++ * actually means. ++ */ ++ pkt->remote = (u64)knx; ++ ++ qib_user_sdma_init_frag(pkt, 0, ++ 0, desc->pbclen, ++ 1, 0, ++ 0, 0, ++ NULL, pbc, ++ pbc_dma_addr, desc->pbclen); ++ pkt->index = 0; ++ pkt->naddr = 1; ++ ++ if (desc->npages) { ++ /* we have user data */ ++ int i; ++ struct page *page; ++ unsigned plen = 0, len = desc->length; ++ for (i = 0; i < desc->npages; i++) { ++ unsigned long off = (i == 0 ? desc->offset : 0); ++ plen = (len > PAGE_SIZE ? PAGE_SIZE : len); ++ page = qib_knx_phys_to_page(knx, desc->pages[i]); ++ ret = qib_user_sdma_page_to_frags(knx->dd, pq, ++ pkt, page, 0, off, ++ (off + plen > PAGE_SIZE ? ++ PAGE_SIZE - off : plen), ++ NULL); ++ if (ret < 0) ++ goto free_sdma; ++ len -= plen - off; ++ } ++ } else { ++ pkt->addr[0].last_desc = 1; ++ if (pbc_dma_addr == 0) { ++ pbc_dma_addr = dma_map_single(&knx->dd->pcidev->dev, ++ pbc, desc->pbclen, ++ DMA_TO_DEVICE); ++ if (dma_mapping_error(&knx->dd->pcidev->dev, ++ pbc_dma_addr)) { ++ ret = -ENOMEM; ++ goto free_sdma; ++ } ++ pkt->addr[0].addr = pbc_dma_addr; ++ pkt->addr[0].dma_mapped = 1; ++ } ++ } ++ counter++; ++ pkt->pq = pq; ++ pkt->index = 0; ++ *ndesc = pkt->naddr; ++ ++ list_add_tail(&pkt->list, list); ++ goto done; ++free_sdma: ++ if (pkt->largepkt) ++ kfree(pkt); ++ else ++ kmem_cache_free(pq->pkt_slab, pkt); ++free_pbc: ++ if (pbc_dma_addr) ++ dma_pool_free(pq->header_cache, pbc, pbc_dma_addr); ++ else ++ kfree(pbc); ++done: ++ return ret; ++} ++ ++void qib_knx_sdma_free_pkt(struct qib_user_sdma_pkt *pkt) ++{ ++ struct qib_knx *knx = (struct qib_knx *)pkt->remote; ++ struct qib_knx_sdma *sdma = knx->sdma; ++ sdma_next(sdma, complete); ++ QIB_KNX_SDMA_SET(sdma->mflags->complete, sdma->complete); ++} ++ ++static int qib_knx_sdma_poll(void *data) ++{ ++ struct qib_knx *knx = (struct qib_knx *)data; ++ struct qib_knx_ctxt *ctxt; ++ struct qib_knx_sdma_desc desc; ++ struct qib_knx_sdma *sdma = knx->sdma; ++ struct qib_user_sdma_queue *pq; ++ struct list_head list; ++ u32 new_head; ++ int ret = 0, ndesc = 0, added; ++ ++ if (!sdma) ++ return -EFAULT; ++ ++ while (!kthread_should_stop()) { ++ added = 0; ++ new_head = QIB_KNX_SDMA_VALUE(sdma->hflags->trigger); ++ while (sdma->head != new_head) { ++ knx_sdma_next(sdma); ++ qib_knx_memcpy(&desc, sdma->queue + sdma->head, ++ sizeof(desc)); ++ if (!desc.ctxt) { ++ QIB_KNX_SDMA_STATUS(sdma, -EINVAL); ++ continue; ++ } ++ spin_lock(&knx->ctxt_lock); ++ ctxt = knx->ctxts[desc.ctxt]; ++ if (!ctxt) { ++ /* we should never get here */ ++ QIB_KNX_SDMA_STATUS(sdma, -EINVAL); ++ goto done_unlock; ++ } ++ pq = ctxt->pq[desc.subctxt]; ++ if (!pq) { ++ QIB_KNX_SDMA_STATUS(sdma, -EFAULT); ++ goto done_unlock; ++ } ++ mutex_lock(&pq->lock); ++ if (pq->added > ctxt->ppd->sdma_descq_removed) ++ qib_user_sdma_hwqueue_clean(ctxt->ppd); ++ if (pq->num_sending) ++ qib_user_sdma_queue_clean(ctxt->ppd, pq); ++ ++ INIT_LIST_HEAD(&list); ++ ret = qib_knx_sdma_pkts_to_descs(ctxt, &desc, pq, ++ &ndesc, &list); ++ QIB_KNX_SDMA_STATUS(sdma, ret); ++ if (!list_empty(&list)) { ++ if (qib_sdma_descq_freecnt(ctxt->ppd) < ++ ndesc) { ++ qib_user_sdma_hwqueue_clean( ++ ctxt->ppd); ++ if (pq->num_sending) ++ qib_user_sdma_queue_clean( ++ ctxt->ppd, pq); ++ } ++ ret = qib_user_sdma_push_pkts(ctxt->ppd, ++ pq, &list, 1); ++ if (ret < 0) ++ goto free_pkts; ++ else { ++ pq->counter++; ++ added++; ++ } ++ } ++free_pkts: ++ if (!list_empty(&list)) ++ qib_user_sdma_free_pkt_list( ++ &knx->dd->pcidev->dev, pq, &list); ++ mutex_unlock(&pq->lock); ++done_unlock: ++ spin_unlock(&knx->ctxt_lock); ++ } ++ if (!added) { ++ int i; ++ /* ++ * Push the queues along ++ * The polling thread will enter the inner loop only ++ * if the KNX has posted new descriptors to the queue. ++ * However, any packets that have been completed by ++ * the HW need to be cleaned and that won't happen ++ * unless we explicitly check. ++ */ ++ for (i = 0; ++ i < knx->dd->ctxtcnt * QLOGIC_IB_MAX_SUBCTXT; ++ i++) { ++ int c = i / QLOGIC_IB_MAX_SUBCTXT, ++ s = i % QLOGIC_IB_MAX_SUBCTXT; ++ spin_lock(&knx->ctxt_lock); ++ ctxt = knx->ctxts[c]; ++ if (!ctxt) ++ goto loop_unlock; ++ pq = ctxt->pq[s]; ++ if (!pq) ++ goto loop_unlock; ++ mutex_lock(&pq->lock); ++ if (pq->num_sending) ++ qib_user_sdma_queue_clean(ctxt->ppd, ++ pq); ++ mutex_unlock(&pq->lock); ++loop_unlock: ++ spin_unlock(&knx->ctxt_lock); ++ } ++ might_sleep(); ++ } ++ } ++ return ret; ++} ++ ++void qib_knx_remove_device(struct qib_devdata *dd) ++{ ++ if (server && dd->num_knx) { ++ struct qib_knx *knx, *knxp; ++ list_for_each_entry_safe(knx, knxp, &server->clients, list) { ++ if (knx->dd == dd) { ++ spin_lock(&server->client_lock); ++ list_del(&knx->list); ++ server->nclients--; ++ spin_unlock(&server->client_lock); ++ qib_knx_free(knx, 0); ++ kfree(knx); ++ } ++ } ++ } ++ return; ++} ++ +int __init qib_knx_server_init(void) +{ + server = kzalloc(sizeof(struct qib_knx_server), GFP_KERNEL); @@ -1728,7 +2196,6 @@ index 0000000..c15276f +{ + if (server) { + struct qib_knx *t, *tt; -+ + /* Stop the thread so we don't accept any new connections. */ + kthread_stop(server->kthread); + list_for_each_entry_safe(t, tt, &server->clients, list) { @@ -1743,12 +2210,12 @@ index 0000000..c15276f +} diff --git a/drivers/infiniband/hw/qib/qib_knx.h b/drivers/infiniband/hw/qib/qib_knx.h new file mode 100644 -index 0000000..d767a60 +index 0000000..0e8d7ce --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_knx.h -@@ -0,0 +1,63 @@ +@@ -0,0 +1,74 @@ +/* -+ * Copyright (c) 2012 Intel Corporation. All rights reserved. ++ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU @@ -1791,15 +2258,15 @@ index 0000000..d767a60 + QIB_KNX_CTXTINFO_FLAGS +}; + ++#ifdef QIB_CONFIG_KNX +int __init qib_knx_server_init(void); +void __exit qib_knx_server_exit(void); -+static __always_inline struct qib_knx *dd_to_knx(struct qib_devdata *dd) -+{ -+ return (struct qib_knx *)dd->knx; -+} ++ ++void qib_knx_remove_device(struct qib_devdata *); ++ +inline struct qib_knx *qib_knx_get(uint16_t); +inline struct qib_devdata *qib_knx_node_to_dd(uint16_t); -+int qib_knx_alloc_ctxt(struct qib_devdata *, unsigned); ++int qib_knx_alloc_ctxt(u16, unsigned); +int qib_knx_setup_piobufs(struct qib_devdata *, struct qib_ctxtdata *, __u16); +int qib_knx_setup_pioregs(struct qib_devdata *, struct qib_ctxtdata *, + struct qib_base_info *); @@ -1809,13 +2276,24 @@ index 0000000..d767a60 +void qib_knx_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *); +__u64 qib_knx_ctxt_info(struct qib_ctxtdata *, enum qib_knx_ctxtinfo_type, + struct file *); ++int qib_knx_sdma_queue_create(struct file *); ++void qib_knx_sdma_queue_destroy(struct qib_filedata *); ++#else ++static inline u64 qib_knx_ctxt_info( ++ struct qib_ctxtdata *rcd, ++ enum qib_knx_ctxtinfo_type type, ++ struct file *fp) ++{ ++ return 0; ++} ++#endif +#endif /* _QIB_KNX_H */ -diff --git a/drivers/infiniband/hw/qib/qib_knx_sdma.h b/drivers/infiniband/hw/qib/qib_knx_sdma.h +diff --git a/drivers/infiniband/hw/qib/qib_knx_common.h b/drivers/infiniband/hw/qib/qib_knx_common.h new file mode 100644 -index 0000000..8c67b1f +index 0000000..53c521f --- /dev/null -+++ b/drivers/infiniband/hw/qib/qib_knx_sdma.h -@@ -0,0 +1,105 @@ ++++ b/drivers/infiniband/hw/qib/qib_knx_common.h +@@ -0,0 +1,126 @@ +/* + * Copyright (c) 2013 Intel Corporation. All rights reserved. + * @@ -1847,11 +2325,15 @@ index 0000000..8c67b1f + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ -+#ifndef _QIB_KNX_SDMA_H -+#define _QIB_KNX_SDMA_H ++#ifndef _QIB_KNX_COMMON_H ++#define _QIB_KNX_COMMON_H ++ ++struct qib_device_info { ++ u16 unit; ++}; + +#define QIB_SDMA_MAX_NPAGES 33 -+#define QIB_KNX_SDMA_VALUE(fld) (volatile u64)fld ++#define QIB_KNX_SDMA_VALUE(fld) ((volatile u64)fld) +#define QIB_KNX_SDMA_SET(fld, val) \ + do { \ + fld = (u64)(val); \ @@ -1870,9 +2352,9 @@ index 0000000..8c67b1f +}; + +struct qib_tid_sm { -+ __u16 tid; -+ __u16 offset; -+ __u16 length; ++ __u16 tid; ++ __u16 offset; ++ __u16 length; +}; + +/* @@ -1889,7 +2371,7 @@ index 0000000..8c67b1f + u64 length; + u32 npages; + unsigned tidlen; -+ off_t offset; ++ off_t offset; + unsigned long pages[QIB_SDMA_MAX_NPAGES]; + /* This array is 198B so the compiler will pad + * it by 2B to make it multiple of 8B. */ @@ -1913,6 +2395,9 @@ index 0000000..8c67b1f + u64 __padding[7]; +}; + ++#define sdma_next(s, fld) \ ++ ((s)->fld = (((s)->fld + 1) == (s)->desc_num) ? 0 : ((s)->fld + 1)) ++ +struct qib_knx_sdma_mflags { + u64 status; + u64 __padding1[7]; @@ -1920,61 +2405,407 @@ index 0000000..8c67b1f + u64 __padding2[7]; +}; + -+#endif /* _QIB_KNX_SDMA_H */ -diff --git a/drivers/infiniband/hw/qib/qib_knx_tidrcv.h b/drivers/infiniband/hw/qib/qib_knx_tidrcv.h -new file mode 100644 -index 0000000..842fca1 ---- /dev/null -+++ b/drivers/infiniband/hw/qib/qib_knx_tidrcv.h -@@ -0,0 +1,48 @@ -+/* -+ * Copyright (c) 2013 Intel Corporation. All rights reserved. -+ * -+ * This software is available to you under a choice of one of two -+ * licenses. You may choose to be licensed under the terms of the GNU -+ * General Public License (GPL) Version 2, available from the file -+ * COPYING in the main directory of this source tree, or the -+ * OpenIB.org BSD license below: -+ * -+ * Redistribution and use in source and binary forms, with or -+ * without modification, are permitted provided that the following -+ * conditions are met: -+ * -+ * - Redistributions of source code must retain the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer. -+ * -+ * - Redistributions in binary form must reproduce the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer in the documentation and/or other materials -+ * provided with the distribution. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -+ * SOFTWARE. -+ */ -+#ifndef _QIB_KNX_TIDRCV_H -+ +struct qib_knx_tid_info { + /* this is the entire set of 512 entries (= 4K) so -+ * we can resgister. subctxt devision will be done -+ * in MIC driver. */ -+ off_t tidbase_offset; -+ size_t tidbase_len; -+ u64 tidbase; -+ unsigned tidcnt; -+ u64 tidtemplate; -+ unsigned long invalidtid; -+ u64 bar_addr; -+ u64 bar_len; ++ * we can resgister. subctxt devision will be done ++ * in MIC driver. */ ++ off_t tidbase_offset; ++ size_t tidbase_len; ++ u64 tidbase; ++ unsigned tidcnt; ++ u64 tidtemplate; ++ unsigned long invalidtid; ++ u64 bar_addr; ++ u64 bar_len; +}; + -+#endif /* QIB_KNX_TIDRCV_H */ --- -1.8.3.1 - ++#endif /* _QIB_KNX_COMMON_H */ +diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c +index d2806ca..c25bd5a 100644 +--- a/drivers/infiniband/hw/qib/qib_user_sdma.c ++++ b/drivers/infiniband/hw/qib/qib_user_sdma.c +@@ -63,80 +63,6 @@ struct qib_user_sdma_rb_node { + pid_t pid; + }; + +-struct qib_user_sdma_pkt { +- struct list_head list; /* list element */ +- +- u8 tiddma; /* if this is NEW tid-sdma */ +- u8 largepkt; /* this is large pkt from kmalloc */ +- u16 frag_size; /* frag size used by PSM */ +- u16 index; /* last header index or push index */ +- u16 naddr; /* dimension of addr (1..3) ... */ +- u16 addrlimit; /* addr array size */ +- u16 tidsmidx; /* current tidsm index */ +- u16 tidsmcount; /* tidsm array item count */ +- u16 payload_size; /* payload size so far for header */ +- u32 bytes_togo; /* bytes for processing */ +- u32 counter; /* sdma pkts queued counter for this entry */ +- struct qib_tid_session_member *tidsm; /* tid session member array */ +- struct qib_user_sdma_queue *pq; /* which pq this pkt belongs to */ +- u64 added; /* global descq number of entries */ +- +- struct { +- u16 offset; /* offset for kvaddr, addr */ +- u16 length; /* length in page */ +- u16 first_desc; /* first desc */ +- u16 last_desc; /* last desc */ +- u16 put_page; /* should we put_page? */ +- u16 dma_mapped; /* is page dma_mapped? */ +- u16 dma_length; /* for dma_unmap_page() */ +- u16 padding; +- struct page *page; /* may be NULL (coherent mem) */ +- void *kvaddr; /* FIXME: only for pio hack */ +- dma_addr_t addr; +- } addr[4]; /* max pages, any more and we coalesce */ +-}; +- +-struct qib_user_sdma_queue { +- /* +- * pkts sent to dma engine are queued on this +- * list head. the type of the elements of this +- * list are struct qib_user_sdma_pkt... +- */ +- struct list_head sent; +- +- /* +- * Because above list will be accessed by both process and +- * signal handler, we need a spinlock for it. +- */ +- spinlock_t sent_lock ____cacheline_aligned_in_smp; +- +- /* headers with expected length are allocated from here... */ +- char header_cache_name[64]; +- struct dma_pool *header_cache; +- +- /* packets are allocated from the slab cache... */ +- char pkt_slab_name[64]; +- struct kmem_cache *pkt_slab; +- +- /* as packets go on the queued queue, they are counted... */ +- u32 counter; +- u32 sent_counter; +- /* pending packets, not sending yet */ +- u32 num_pending; +- /* sending packets, not complete yet */ +- u32 num_sending; +- /* global descq number of entry of last sending packet */ +- u64 added; +- +- /* dma page table */ +- struct rb_root dma_pages_root; +- +- struct qib_user_sdma_rb_node *sdma_rb_node; +- +- /* protect everything above... */ +- struct mutex lock; +-}; +- + static struct qib_user_sdma_rb_node * + qib_user_sdma_rb_search(struct rb_root *root, pid_t pid) + { +@@ -254,12 +180,12 @@ done: + return pq; + } + +-static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt, +- int i, u16 offset, u16 len, +- u16 first_desc, u16 last_desc, +- u16 put_page, u16 dma_mapped, +- struct page *page, void *kvaddr, +- dma_addr_t dma_addr, u16 dma_length) ++void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt, ++ int i, u16 offset, u16 len, ++ u16 first_desc, u16 last_desc, ++ u16 put_page, u16 dma_mapped, ++ struct page *page, void *kvaddr, ++ dma_addr_t dma_addr, u16 dma_length) + { + pkt->addr[i].offset = offset; + pkt->addr[i].length = len; +@@ -273,7 +199,7 @@ static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt, + pkt->addr[i].dma_length = dma_length; + } + +-static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq, ++void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq, + size_t len, dma_addr_t *dma_addr) + { + void *hdr; +@@ -295,11 +221,11 @@ static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq, + return hdr; + } + +-static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd, +- struct qib_user_sdma_queue *pq, +- struct qib_user_sdma_pkt *pkt, +- struct page *page, u16 put, +- u16 offset, u16 len, void *kvaddr) ++int qib_user_sdma_page_to_frags(const struct qib_devdata *dd, ++ struct qib_user_sdma_queue *pq, ++ struct qib_user_sdma_pkt *pkt, ++ struct page *page, u16 put, ++ u16 offset, u16 len, void *kvaddr) + { + __le16 *pbc16; + void *pbcvaddr; +@@ -314,21 +240,27 @@ static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd, + int ret = 0; + + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { +- /* +- * dma mapping error, pkt has not managed +- * this page yet, return the page here so +- * the caller can ignore this page. +- */ +- if (put) { +- put_page(page); +- } else { +- /* coalesce case */ +- kunmap(page); +- __free_page(page); ++#ifdef QIB_CONFIG_KNX ++ if (!pkt->remote) { ++#endif ++ /* ++ * dma mapping error, pkt has not managed ++ * this page yet, return the page here so ++ * the caller can ignore this page. ++ */ ++ if (put) { ++ put_page(page); ++ } else { ++ /* coalesce case */ ++ kunmap(page); ++ __free_page(page); ++ } ++ ret = -ENOMEM; ++ goto done; + } +- ret = -ENOMEM; +- goto done; ++#ifdef QIB_CONFIG_KNX + } ++#endif + offset = 0; + dma_mapped = 1; + +@@ -630,13 +562,19 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev, + pkt->addr[i].dma_length, + DMA_TO_DEVICE); + +- if (pkt->addr[i].kvaddr) +- kunmap(pkt->addr[i].page); ++#ifdef QIB_CONFIG_KNX ++ if (!pkt->remote) { ++#endif ++ if (pkt->addr[i].kvaddr) ++ kunmap(pkt->addr[i].page); + +- if (pkt->addr[i].put_page) +- put_page(pkt->addr[i].page); +- else +- __free_page(pkt->addr[i].page); ++ if (pkt->addr[i].put_page) ++ put_page(pkt->addr[i].page); ++ else ++ __free_page(pkt->addr[i].page); ++#ifdef QIB_CONFIG_KNX ++ } ++#endif + } else if (pkt->addr[i].kvaddr) { + /* for headers */ + if (pkt->addr[i].dma_mapped) { +@@ -775,9 +713,9 @@ static int qib_user_sdma_init_payload(const struct qib_devdata *dd, + } + + /* free a packet list -- return counter value of last packet */ +-static void qib_user_sdma_free_pkt_list(struct device *dev, +- struct qib_user_sdma_queue *pq, +- struct list_head *list) ++void qib_user_sdma_free_pkt_list(struct device *dev, ++ struct qib_user_sdma_queue *pq, ++ struct list_head *list) + { + struct qib_user_sdma_pkt *pkt, *pkt_next; + +@@ -787,6 +725,10 @@ static void qib_user_sdma_free_pkt_list(struct device *dev, + for (i = 0; i < pkt->naddr; i++) + qib_user_sdma_free_pkt_frag(dev, pq, pkt, i); + ++#ifdef QIB_CONFIG_KNX ++ if (pkt->remote) ++ qib_knx_sdma_free_pkt(pkt); ++#endif + if (pkt->largepkt) + kfree(pkt); + else +@@ -970,6 +912,9 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, + pkt->payload_size = 0; + pkt->counter = counter; + pkt->tiddma = tiddma; ++#ifdef QIB_CONFIG_KNX ++ pkt->remote = 0; ++#endif + + /* setup the first header */ + qib_user_sdma_init_frag(pkt, 0, /* index */ +@@ -1045,8 +990,8 @@ static void qib_user_sdma_set_complete_counter(struct qib_user_sdma_queue *pq, + } + + /* try to clean out queue -- needs pq->lock */ +-static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd, +- struct qib_user_sdma_queue *pq) ++int qib_user_sdma_queue_clean(struct qib_pportdata *ppd, ++ struct qib_user_sdma_queue *pq) + { + struct qib_devdata *dd = ppd->dd; + struct list_head free_list; +@@ -1110,7 +1055,7 @@ void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq) + } + + /* clean descriptor queue, returns > 0 if some elements cleaned */ +-static int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd) ++int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd) + { + int ret; + unsigned long flags; +@@ -1321,9 +1266,9 @@ retry: + } + + /* pq->lock must be held, get packets on the wire... */ +-static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd, +- struct qib_user_sdma_queue *pq, +- struct list_head *pktlist, int count) ++int qib_user_sdma_push_pkts(struct qib_pportdata *ppd, ++ struct qib_user_sdma_queue *pq, ++ struct list_head *pktlist, int count) + { + unsigned long flags; + +diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.h b/drivers/infiniband/hw/qib/qib_user_sdma.h +index ce8cbaf..07d5bc5 100644 +--- a/drivers/infiniband/hw/qib/qib_user_sdma.h ++++ b/drivers/infiniband/hw/qib/qib_user_sdma.h +@@ -31,12 +31,108 @@ + */ + #include + +-struct qib_user_sdma_queue; ++struct qib_user_sdma_pkt { ++ struct list_head list; /* list element */ ++ ++ u8 tiddma; /* if this is NEW tid-sdma */ ++ u8 largepkt; /* this is large pkt from kmalloc */ ++ u16 frag_size; /* frag size used by PSM */ ++ u16 index; /* last header index or push index */ ++ u16 naddr; /* dimension of addr (1..3) ... */ ++ u16 addrlimit; /* addr array size */ ++ u16 tidsmidx; /* current tidsm index */ ++ u16 tidsmcount; /* tidsm array item count */ ++ u16 payload_size; /* payload size so far for header */ ++ u32 bytes_togo; /* bytes for processing */ ++ u32 counter; /* sdma pkts queued counter for this entry */ ++ struct qib_tid_session_member *tidsm; /* tid session member array */ ++ struct qib_user_sdma_queue *pq; /* which pq this pkt belongs to */ ++ u64 added; /* global descq number of entries */ ++#ifdef QIB_CONFIG_KNX ++ u64 remote; /* does the packet originate on the host */ ++#endif ++ ++ struct { ++ u16 offset; /* offset for kvaddr, addr */ ++ u16 length; /* length in page */ ++ u16 first_desc; /* first desc */ ++ u16 last_desc; /* last desc */ ++ u16 put_page; /* should we put_page? */ ++ u16 dma_mapped; /* is page dma_mapped? */ ++ u16 dma_length; /* for dma_unmap_page() */ ++ u16 padding; ++ struct page *page; /* may be NULL (coherent mem) */ ++ void *kvaddr; /* FIXME: only for pio hack */ ++ dma_addr_t addr; ++ } addr[4]; /* max pages, any more and we coalesce */ ++}; ++ ++struct qib_user_sdma_queue { ++ /* ++ * pkts sent to dma engine are queued on this ++ * list head. the type of the elements of this ++ * list are struct qib_user_sdma_pkt... ++ */ ++ struct list_head sent; ++ ++ /* ++ * Because above list will be accessed by both process and ++ * signal handler, we need a spinlock for it. ++ */ ++ spinlock_t sent_lock ____cacheline_aligned_in_smp; ++ ++ /* headers with expected length are allocated from here... */ ++ char header_cache_name[64]; ++ struct dma_pool *header_cache; ++ ++ /* packets are allocated from the slab cache... */ ++ char pkt_slab_name[64]; ++ struct kmem_cache *pkt_slab; ++ ++ /* as packets go on the queued queue, they are counted... */ ++ u32 counter; ++ u32 sent_counter; ++ /* pending packets, not sending yet */ ++ u32 num_pending; ++ /* sending packets, not complete yet */ ++ u32 num_sending; ++ /* global descq number of entry of last sending packet */ ++ u64 added; ++ ++ /* dma page table */ ++ struct rb_root dma_pages_root; ++ ++ struct qib_user_sdma_rb_node *sdma_rb_node; ++ ++ /* protect everything above... */ ++ struct mutex lock; ++}; + + struct qib_user_sdma_queue * + qib_user_sdma_queue_create(struct device *dev, int unit, int port, int sport); + void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq); +- ++void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq, ++ size_t len, dma_addr_t *dma_addr); ++void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt, ++ int i, u16 offset, u16 len, ++ u16 first_desc, u16 last_desc, ++ u16 put_page, u16 dma_mapped, ++ struct page *page, void *kvaddr, ++ dma_addr_t dma_addr, u16 dma_length); ++int qib_user_sdma_page_to_frags(const struct qib_devdata *dd, ++ struct qib_user_sdma_queue *pq, ++ struct qib_user_sdma_pkt *pkt, ++ struct page *page, u16 put, ++ u16 offset, u16 len, void *kvaddr); ++int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd); ++int qib_user_sdma_queue_clean(struct qib_pportdata *ppd, ++ struct qib_user_sdma_queue *pq); ++void qib_user_sdma_free_pkt_list(struct device *dev, ++ struct qib_user_sdma_queue *pq, ++ struct list_head *list); ++int qib_user_sdma_push_pkts(struct qib_pportdata *ppd, ++ struct qib_user_sdma_queue *pq, ++ struct list_head *pktlist, int count); + int qib_user_sdma_writev(struct qib_ctxtdata *pd, + struct qib_user_sdma_queue *pq, + const struct iovec *iov, +@@ -50,3 +146,9 @@ void qib_user_sdma_queue_drain(struct qib_pportdata *ppd, + + u32 qib_user_sdma_complete_counter(const struct qib_user_sdma_queue *pq); + u32 qib_user_sdma_inflight_counter(struct qib_user_sdma_queue *pq); ++ ++/* ++ * This function prototype somewhat polutes this header file ++ * but I don't want to create a new header file just for it. ++ */ ++void qib_knx_sdma_free_pkt(struct qib_user_sdma_pkt *pkt); diff --git a/tech-preview/xeon-phi/0013-Updates-to-qib-driver.patch b/tech-preview/xeon-phi/0013-Updates-to-qib-driver.patch deleted file mode 100644 index abc3a3a..0000000 --- a/tech-preview/xeon-phi/0013-Updates-to-qib-driver.patch +++ /dev/null @@ -1,4787 +0,0 @@ -From 6975d8e44fc3f04c14cf4f83e2df6f69a25546dc Mon Sep 17 00:00:00 2001 -From: Jubin John -Date: Fri, 26 Sep 2014 09:41:32 -0700 -Subject: [PATCH] Updates to qib driver - ---- - drivers/infiniband/hw/qib/Makefile | 2 +- - drivers/infiniband/hw/qib/qib.h | 172 +++++- - drivers/infiniband/hw/qib/qib_driver.c | 223 +++++++- - drivers/infiniband/hw/qib/qib_file_ops.c | 166 ++++-- - drivers/infiniband/hw/qib/qib_iba6120.c | 12 +- - drivers/infiniband/hw/qib/qib_iba7220.c | 20 +- - drivers/infiniband/hw/qib/qib_iba7322.c | 122 ++-- - drivers/infiniband/hw/qib/qib_init.c | 118 +++-- - drivers/infiniband/hw/qib/qib_knx.c | 721 +++++++++++++++++++-- - drivers/infiniband/hw/qib/qib_knx.h | 13 +- - drivers/infiniband/hw/qib/qib_knx_common.h | 126 ++++ - drivers/infiniband/hw/qib/qib_knx_sdma.h | 105 --- - drivers/infiniband/hw/qib/qib_knx_tidrcv.h | 48 -- - drivers/infiniband/hw/qib/qib_mad.c | 3 +- - drivers/infiniband/hw/qib/qib_pcie.c | 21 +- - drivers/infiniband/hw/qib/qib_qp.c | 6 +- - drivers/infiniband/hw/qib/qib_sdma.c | 11 +- - drivers/infiniband/hw/qib/qib_snoop.c | 970 ++++++++++++++++++++++++++++ - drivers/infiniband/hw/qib/qib_user_sdma.c | 296 +++++---- - drivers/infiniband/hw/qib/qib_user_sdma.h | 105 +++- - drivers/infiniband/hw/qib/qib_verbs.c | 116 ++++- - 21 files changed, 2831 insertions(+), 545 deletions(-) - create mode 100644 drivers/infiniband/hw/qib/qib_knx_common.h - delete mode 100644 drivers/infiniband/hw/qib/qib_knx_sdma.h - delete mode 100644 drivers/infiniband/hw/qib/qib_knx_tidrcv.h - create mode 100644 drivers/infiniband/hw/qib/qib_snoop.c - -diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile -index ba2a49d..047d191 100644 ---- a/drivers/infiniband/hw/qib/Makefile -+++ b/drivers/infiniband/hw/qib/Makefile -@@ -6,7 +6,7 @@ ib_qib-y := qib_cq.o qib_diag.o qib_dma.o qib_driver.o qib_eeprom.o \ - qib_qp.o qib_qsfp.o qib_rc.o qib_ruc.o qib_sdma.o qib_srq.o \ - qib_sysfs.o qib_twsi.o qib_tx.o qib_uc.o qib_ud.o \ - qib_user_pages.o qib_user_sdma.o qib_verbs_mcast.o qib_iba7220.o \ -- qib_sd7220.o qib_iba7322.o qib_verbs.o -+ qib_sd7220.o qib_iba7322.o qib_snoop.o qib_verbs.o - - # 6120 has no fallback if no MSI interrupts, others can do INTx - ib_qib-$(CONFIG_PCI_MSI) += qib_iba6120.o -diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h -index ad87abd..e34b0f7 100644 ---- a/drivers/infiniband/hw/qib/qib.h -+++ b/drivers/infiniband/hw/qib/qib.h -@@ -52,6 +52,7 @@ - #include - #include - #include -+#include - - #include "qib_common.h" - #include "qib_verbs.h" -@@ -247,6 +248,10 @@ struct qib_ctxtdata { - u32 lookaside_qpn; - /* QPs waiting for context processing */ - struct list_head qp_wait_list; -+#ifdef QIB_CONFIG_KNX -+ /* KNX Receive Context Data */ -+ struct qib_knx_ctxt *krcd; -+#endif - #ifdef CONFIG_DEBUG_FS - /* verbs stats per CTX */ - struct qib_opcode_stats_perctx *opstats; -@@ -546,6 +551,11 @@ struct xmit_wait { - * clarifies things a bit. Note that to conform to IB conventions, - * port-numbers are one-based. The first or only port is port1. - */ -+#define QIB_CHAR_DEVICES_PER_PORT 2 -+/* Extract packet length from LRH header */ -+#define QIB_GET_PKT_LEN(x) (((be16_to_cpu((x)->lrh[2]) & 0x7FF)) << 2) -+#define QIB_SNOOP_DEV_INDEX 0 -+#define QIB_CAPTURE_DEV_INDEX 1 - struct qib_pportdata { - struct qib_ibport ibport_data; - -@@ -656,6 +666,7 @@ struct qib_pportdata { - u8 link_speed_active; - u8 vls_supported; - u8 vls_operational; -+ u8 n_krcv_queues; - /* Rx Polarity inversion (compensate for ~tx on partner) */ - u8 rx_pol_inv; - -@@ -675,6 +686,22 @@ struct qib_pportdata { - struct xmit_wait cong_stats; - struct timer_list symerr_clear_timer; - -+ /* snoop/capture related fields */ -+ unsigned int mode_flag; -+ void *filter_value; -+ int (*filter_callback)(void *hdr, void *data, void *value); -+ /* lock while sending packet out */ -+ spinlock_t snoop_write_lock; -+ struct qib_aux_device { -+ struct cdev *snoop_cdev; -+ struct device *snoop_class_dev; -+ /* snooping lock */ -+ spinlock_t snoop_lock; -+ struct list_head snoop_queue; -+ wait_queue_head_t snoop_waitq; -+ struct qib_pportdata *pport; -+ } sc_device[QIB_CHAR_DEVICES_PER_PORT]; -+ - /* Synchronize access between driver writes and sysfs reads */ - spinlock_t cc_shadow_lock - ____cacheline_aligned_in_smp; -@@ -755,14 +782,14 @@ struct qib_devdata { - - /* mem-mapped base of chip regs plus offset of the SendBufAvail0 - * register -- */ -+ */ - u64 sendbufavail0; - - /* end of mem-mapped chip space excluding sendbuf and user regs */ - u64 __iomem *kregend; - /* physical address of chip for io_remap, etc. */ - resource_size_t physaddr; -- /* qib_cfgctxts pointers */ -+ /* cfgctxts pointers */ - struct qib_ctxtdata **rcd; /* Receive Context Data */ - - /* qib_pportdata, points to array of (physical) port-specific -@@ -1079,7 +1106,6 @@ struct qib_devdata { - u8 num_pports; - /* Lowest context number which can be used by user processes */ - u8 first_user_ctxt; -- u8 n_krcv_queues; - u8 qpn_mask; - u8 skip_kctxt_mask; - -@@ -1126,13 +1152,119 @@ struct qib_devdata { - int assigned_node_id; /* NUMA node closest to HCA */ - - #ifdef QIB_CONFIG_KNX -- /* peer node id of connected KNX node */ -- u16 node_id; -- struct qib_knx *knx; -+ /* number of KNx nodes using this device */ -+ u16 num_knx; - #endif -+}; - -+enum qib_mod_param_t { -+ qib_mod_param_drv, -+ qib_mod_param_unit, -+ qib_mod_param_port - }; - -+typedef int (*param_set_func_t)(struct qib_devdata *, u8, u64); -+ -+struct qib_mod_param { -+ const char *name; -+ enum qib_mod_param_t type; -+ param_set_func_t func; -+ ulong dflt; -+ struct list_head list; -+ struct list_head pport; -+}; -+ -+extern int qib_set_mod_param(const char *, struct kernel_param *); -+extern int qib_get_mod_param(char *, struct kernel_param *); -+extern u64 qib_read_mod_param(struct qib_mod_param *, u16, u8); -+extern void qib_clean_mod_param(void); -+ -+#define MAX_QIB_PARAM_LEN 128 -+/** -+ * QIB_MODPARAM_GLOBAL - define a global module parameter -+ * @N: name of the module parameter -+ * -+ * Define a global module parameter for use in multiple files. -+ */ -+#define QIB_MODPARAM_GLOBAL(N) \ -+extern struct qib_mod_param qmp_##N -+/** -+ * QIB_MODPARAM_DRV - define a driver-scope module parameter -+ * @N: name of the module parameter -+ * @D: default value -+ * @P: visibility in sysfs -+ * @S: description -+ * -+ * Define a driver-scope (global to the driver instance) module -+ * parameter. -+ */ -+#define QIB_MODPARAM_DRV(N, D, P, S) \ -+ struct qib_mod_param qmp_##N = { \ -+ .name = __stringify(N), \ -+ .type = qib_mod_param_drv, \ -+ .dflt = (ulong)D, \ -+ .pport = { NULL, NULL } \ -+ }; \ -+ module_param_named(N, qmp_##N.dflt, ulong, P); \ -+ MODULE_PARM_DESC(N, S " (dflt: " __stringify(D) ")") -+/** -+ * QIB_MODPARAM_UNIT - define a unit-scope module parameter -+ * @N: name of the module parameter -+ * @F: callback function for dynamic value settings -+ * @D: default value -+ * @P: visibility in sysfs -+ * @D: description -+ * -+ * Define a unit-scope module parameter. Unit-scope module -+ * parameters allows specifying individual values for each of the -+ * QIB units. -+ */ -+#define QIB_MODPARAM_UNIT(N, F, D, P, S) \ -+ struct qib_mod_param qmp_##N = { \ -+ .name = __stringify(N), \ -+ .func = ((P) & S_IWUGO ? F : NULL), \ -+ .type = qib_mod_param_unit, \ -+ .dflt = (ulong)D, \ -+ .pport = { NULL, NULL } \ -+ }; \ -+ module_param_call(N, qib_set_mod_param, qib_get_mod_param, \ -+ &qmp_##N, (P)); \ -+ MODULE_PARM_DESC(N, S " (dflt: " __stringify(D) ")") -+/** -+ * QIB_MODPARAM_PORT - define a port-scope module parameter -+ * @N: name of the module parameter -+ * @F: callback function for dynamic value settings -+ * @D: default value -+ * @P: visibility in sysfs -+ * @D: description -+ * -+ * Define a port-scope module parameter. Port-scope module -+ * parameters allow specifying individual values foe each of the -+ * ports on any of the QIB units. -+ */ -+#define QIB_MODPARAM_PORT(N, F, D, P, S) \ -+ struct qib_mod_param qmp_##N = { \ -+ .name = __stringify(N), \ -+ .func = ((P) & S_IWUGO ? F : NULL), \ -+ .type = qib_mod_param_port, \ -+ .dflt = (ulong)D, \ -+ .pport = { NULL, NULL } \ -+ }; \ -+ module_param_call(N, qib_set_mod_param, qib_get_mod_param, \ -+ &qmp_##N, (P)); \ -+ MODULE_PARM_DESC(N, S " (dflt: " __stringify(D) ")") -+/** -+ * QIB_MODPARAM_GET - retrieve a module parameter value -+ * @N: name of the module parameter -+ * @U: unit number -+ * @P: port number -+ * -+ * Get the value for the specific unit/port. The macro will return -+ * the correct value regardless of a specific value for the -+ * specified unit/port is present or the default should be used. -+ */ -+#define QIB_MODPARAM_GET(N, U, P) qib_read_mod_param(&qmp_##N, U, P) -+ - /* hol_state values */ - #define QIB_HOL_UP 0 - #define QIB_HOL_INIT 1 -@@ -1165,12 +1297,14 @@ struct qib_filedata { - }; - - extern struct list_head qib_dev_list; -+extern struct list_head qib_mod_param_list; - extern spinlock_t qib_devs_lock; - extern struct qib_devdata *qib_lookup(int unit); - extern u32 qib_cpulist_count; - extern unsigned long *qib_cpulist; - - extern unsigned qib_wc_pat; -+extern unsigned int snoop_enable; - extern unsigned qib_cc_table_size; - int qib_init(struct qib_devdata *, int); - int init_chip_wc_pat(struct qib_devdata *dd, u32); -@@ -1230,6 +1364,24 @@ void qib_hol_event(unsigned long); - void qib_disable_after_error(struct qib_devdata *); - int qib_set_uevent_bits(struct qib_pportdata *, const int); - -+#define QIB_PORT_SNOOP_MODE 1U -+#define QIB_PORT_CAPTURE_MODE 2U -+ -+struct snoop_packet { -+ struct list_head list; -+ u32 total_len; -+ u8 data[]; -+}; -+ -+int qib_snoop_add(struct qib_devdata *); -+void qib_snoop_remove(struct qib_devdata *); -+int qib_snoop_rcv_queue_packet(struct qib_pportdata *, void *, -+ void *, u32); -+void qib_snoop_send_queue_packet(struct qib_pportdata *, -+ struct snoop_packet *); -+int snoop_get_header_size(struct qib_devdata *, struct qib_ib_header *, -+ void *, u32); -+ - /* for use in system calls, where we want to know device type, etc. */ - #define ctxt_fp(fp) \ - (((struct qib_filedata *)(fp)->private_data)->rcd) -@@ -1367,7 +1519,7 @@ void qib_sdma_intr(struct qib_pportdata *); - void qib_user_sdma_send_desc(struct qib_pportdata *dd, - struct list_head *pktlist); - int qib_sdma_verbs_send(struct qib_pportdata *, struct qib_sge_state *, -- u32, struct qib_verbs_txreq *); -+ u32, struct qib_verbs_txreq *, struct snoop_packet *); - /* ppd->sdma_lock should be locked before calling this. */ - int qib_sdma_make_progress(struct qib_pportdata *dd); - -@@ -1505,9 +1657,9 @@ const char *qib_get_unit_name(int unit); - #endif - - /* global module parameter variables */ --extern unsigned qib_ibmtu; --extern ushort qib_cfgctxts; --extern ushort qib_num_cfg_vls; -+QIB_MODPARAM_GLOBAL(ibmtu); -+QIB_MODPARAM_GLOBAL(cfgctxts); -+QIB_MODPARAM_GLOBAL(krcvqs); - extern ushort qib_mini_init; /* If set, do few (ideally 0) writes to chip */ - extern unsigned qib_n_krcv_queues; - extern unsigned qib_sdma_fetch_arb; -diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c -index 5bee08f..e5fb836 100644 ---- a/drivers/infiniband/hw/qib/qib_driver.c -+++ b/drivers/infiniband/hw/qib/qib_driver.c -@@ -43,6 +43,9 @@ - - #include "qib.h" - -+#undef pr_fmt -+#define pr_fmt(fmt) QIB_DRV_NAME " " fmt -+ - /* - * The size has to be longer than this string, so we can append - * board/chip information to it in the init code. -@@ -51,11 +54,21 @@ const char ib_qib_version[] = QIB_DRIVER_VERSION "\n"; - - DEFINE_SPINLOCK(qib_devs_lock); - LIST_HEAD(qib_dev_list); -+LIST_HEAD(qib_mod_param_list); - DEFINE_MUTEX(qib_mutex); /* general driver use */ - --unsigned qib_ibmtu; --module_param_named(ibmtu, qib_ibmtu, uint, S_IRUGO); --MODULE_PARM_DESC(ibmtu, "Set max IB MTU (0=2KB, 1=256, 2=512, ... 5=4096"); -+/* Per-unit/port module parameter value structure -+ * linked to the qib_mod_param structure - one per -+ * unit/port */ -+struct qib_mod_param_pport { -+ struct list_head list; -+ u16 unit; -+ u8 port; -+ u64 value; -+}; -+ -+QIB_MODPARAM_PORT(ibmtu, NULL, 5, S_IRUGO, -+ "Set max IB MTU (0=2KB, 1=256, 2=512, ... 5=4096"); - - unsigned qib_compat_ddr_negotiate = 1; - module_param_named(compat_ddr_negotiate, qib_compat_ddr_negotiate, uint, -@@ -90,6 +103,178 @@ const char *qib_get_unit_name(int unit) - return iname; - } - -+int qib_set_mod_param(const char *str, struct kernel_param *kp) -+{ -+ char *next = (char *)str, *tmp; -+ unsigned long val = 0, dft; -+ u32 unit = 0, port = 0; -+ struct qib_mod_param *param = -+ (struct qib_mod_param *)kp->arg; -+ struct qib_mod_param_pport *pport, *p; -+ int ret = 0; -+ -+ if (strlen(str) >= MAX_QIB_PARAM_LEN) { -+ pr_warn("parameter value too long\n"); -+ ret = -ENOSPC; -+ goto done; -+ } -+ -+ /* qib_dev_list will be empty only when the driver is initially -+ * loading. */ -+ if (list_empty(&qib_dev_list) || !param->pport.next) -+ INIT_LIST_HEAD(¶m->pport); -+ tmp = next; -+ dft = simple_strtoul(tmp, &next, 0); -+ if (next == tmp) { -+ pr_warn("invalid parameter value\n"); -+ ret = -EINVAL; -+ goto done; -+ } -+ /* clear any previously added port entries */ -+ list_for_each_entry_safe(pport, p, ¶m->pport, list) { -+ list_del(&pport->list); -+ kfree(pport); -+ } -+ if (!*next || *next == '\n' || *next == ',') -+ param->dflt = dft; -+ else if (*next && *next == ':') -+ /* no default, rewind the string */ -+ next = tmp; -+ else -+ pr_warn("invalid parameter value\n"); -+ while (*next && next[1]) { -+ if (*next == ',') -+ tmp = ++next; -+ unit = simple_strtoul(tmp, &next, 0); -+ if (param->type == qib_mod_param_port) { -+ if (next == tmp || !*next || *next != ':') { -+ pr_warn("Invalid unit:port argument at \"%s\".\n", -+ tmp); -+ while (*next && *next++ != ',') -+ ; -+ tmp = next; -+ continue; -+ } -+ tmp = ++next; -+ port = simple_strtoul(tmp, &next, 0); -+ if (!port) { -+ /* port numbers start at 1, 0 is invalid */ -+ pr_warn("Invalid argument at \"%s\". Port numbers start at 1.\n", -+ tmp); -+ while (*next && *next++ != ',') -+ ; -+ tmp = next; -+ continue; -+ } -+ } -+ if (next == tmp || *next != '=') { -+ pr_warn("Invalid %s argument at \"%s\".\n", -+ (param->type == qib_mod_param_port ? -+ "port" : "unit"), tmp); -+ while (*next && *next++ != ',') -+ ; -+ tmp = next; -+ continue; -+ } -+ tmp = ++next; -+ val = simple_strtoul(tmp, &next, 0); -+ if (next == tmp) { -+ pr_warn("Invalid value string at \"%s\"\n", tmp); -+ while (*next && *next++ != ',') -+ ; -+ tmp = next; -+ continue; -+ } -+ pport = kzalloc(sizeof(struct qib_mod_param_pport), -+ GFP_KERNEL); -+ if (!pport) { -+ pr_err("no memory for module parameter.\n"); -+ ret = -ENOMEM; -+ goto done; -+ } -+ pport->unit = unit; -+ pport->port = port; -+ pport->value = val; -+ list_add_tail(&pport->list, ¶m->pport); -+ if (!*next || *next == '\n') -+ break; -+ tmp = ++next; -+ } -+ /* add parameter to list so it can be cleaned up */ -+ if (!param->list.next) -+ list_add(¶m->list, &qib_mod_param_list); -+ -+ if (param->func && qib_count_units(NULL, NULL)) { -+ struct qib_devdata *dd; -+ list_for_each_entry(pport, ¶m->pport, list) { -+ param_set_func_t setfunc = param->func; -+ list_for_each_entry(dd, &qib_dev_list, list) -+ if (dd->unit == pport->unit) -+ break; -+ if (!setfunc(dd, pport->port, pport->value)) -+ pr_err("Error setting module parameter %s for IB%u:%u", -+ param->name, -+ pport->unit, -+ pport->port); -+ } -+ } -+done: -+ return ret; -+} -+ -+int qib_get_mod_param(char *buffer, struct kernel_param *kp) -+{ -+ struct qib_mod_param *param = -+ (struct qib_mod_param *)kp->arg; -+ struct qib_mod_param_pport *pport; -+ char *p = buffer; -+ int s = 0; -+ -+ s = scnprintf(p, PAGE_SIZE, "%lu", param->dflt); -+ p += s; -+ -+ if (param->pport.next) -+ list_for_each_entry(pport, ¶m->pport, list) { -+ *p++ = ','; -+ if (param->type == qib_mod_param_unit) -+ s = scnprintf(p, PAGE_SIZE, "%u=%llu", -+ pport->unit, pport->value); -+ else if (param->type == qib_mod_param_port) -+ s = scnprintf(p, PAGE_SIZE, "%u:%u=%llu", -+ pport->unit, pport->port, -+ pport->value); -+ p += s; -+ } -+ return strlen(buffer); -+} -+ -+u64 qib_read_mod_param(struct qib_mod_param *param, u16 unit, u8 port) -+{ -+ struct qib_mod_param_pport *pport; -+ u64 ret = param->dflt; -+ -+ if (param->type != qib_mod_param_drv) -+ if (param->pport.next && !list_empty(¶m->pport)) -+ list_for_each_entry(pport, ¶m->pport, list) -+ if (pport->unit == unit && -+ pport->port == port) -+ ret = pport->value; -+ return ret; -+} -+ -+void qib_clean_mod_param(void) -+{ -+ struct qib_mod_param *p; -+ struct qib_mod_param_pport *pp, *pps; -+ -+ list_for_each_entry(p, &qib_mod_param_list, list) { -+ list_for_each_entry_safe(pp, pps, &p->pport, list) { -+ list_del(&pp->list); -+ kfree(pp); -+ } -+ } -+} -+ - /* - * Return count of units with at least one port ACTIVE. - */ -@@ -456,6 +641,8 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts) - int last; - u64 lval; - struct qib_qp *qp, *nqp; -+ struct snoop_packet *packet = NULL; -+ u32 hdr_len = 0; - - l = rcd->head; - rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset; -@@ -478,6 +665,25 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts) - /* total length */ - tlen = qib_hdrget_length_in_bytes(rhf_addr); - ebuf = NULL; -+ /* applicable only for capture */ -+ if (unlikely(ppd->mode_flag & QIB_PORT_CAPTURE_MODE)) { -+ int nomatch = 0; -+ /* We want to filter packet before copying it */ -+ if (ppd->filter_callback) -+ nomatch = ppd->filter_callback(hdr, ebuf, -+ ppd->filter_value); -+ if (nomatch == 0) { -+ packet = kzalloc(sizeof(*packet) + tlen, -+ GFP_ATOMIC); -+ if (packet) { -+ /* copy header first */ -+ packet->total_len = tlen; -+ INIT_LIST_HEAD(&packet->list); -+ hdr_len = (u8 *)rhf_addr - (u8 *)hdr; -+ memcpy(packet->data, hdr, hdr_len); -+ } -+ } -+ } - if ((dd->flags & QIB_NODMA_RTAIL) ? - qib_hdrget_use_egr_buf(rhf_addr) : - (etype != RCVHQ_RCV_TYPE_EXPECTED)) { -@@ -512,6 +718,10 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts) - crcs += qib_rcv_hdrerr(rcd, ppd, rcd->ctxt, eflags, l, - etail, rhf_addr, hdr); - else if (etype == RCVHQ_RCV_TYPE_NON_KD) { -+ /* copy packet data */ -+ if (ebuf && packet) -+ memcpy((packet->data + hdr_len), ebuf, -+ (tlen - hdr_len)); - qib_ib_rcv(rcd, hdr, ebuf, tlen); - if (crcs) - crcs--; -@@ -519,6 +729,10 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts) - --*llic; - } - move_along: -+ if (packet) { -+ qib_snoop_send_queue_packet(ppd, packet); -+ packet = NULL; -+ } - l += rsize; - if (l >= maxcnt) - l = 0; -@@ -619,7 +833,8 @@ int qib_set_mtu(struct qib_pportdata *ppd, u16 arg) - ret = -EINVAL; - goto bail; - } -- chk = ib_mtu_enum_to_int(qib_ibmtu); -+ chk = ib_mtu_enum_to_int( -+ QIB_MODPARAM_GET(ibmtu, ppd->dd->unit, ppd->port)); - if (chk > 0 && arg > chk) { - ret = -EINVAL; - goto bail; -diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c -index 6eebad0..376961d 100644 ---- a/drivers/infiniband/hw/qib/qib_file_ops.c -+++ b/drivers/infiniband/hw/qib/qib_file_ops.c -@@ -95,6 +95,9 @@ static ssize_t qib_aio_write(struct kiocb *, const struct iovec *, - unsigned long, loff_t); - static unsigned int qib_poll(struct file *, struct poll_table_struct *); - static int qib_mmapf(struct file *, struct vm_area_struct *); -+static int subctxt_search_ctxts(struct qib_devdata *, struct file *, -+ const struct qib_user_info *); -+ - - static const struct file_operations qib_file_ops = { - .owner = THIS_MODULE, -@@ -1547,6 +1550,14 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, - - rcd = qib_create_ctxtdata(ppd, ctxt, numa_id); - -+#ifdef QIB_CONFIG_KNX -+ if (uinfo->spu_knx_node_id) -+ /* -+ * Skip allocation of page pointer list for TID -+ * receives. This will be done on the KNX. -+ */ -+ goto no_page_list; -+#endif - /* - * Allocate memory for use in qib_tid_update() at open to - * reduce cost of expected send setup per message segment -@@ -1562,6 +1573,9 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, - ret = -ENOMEM; - goto bailerr; - } -+#ifdef QIB_CONFIG_KNX -+no_page_list: -+#endif - rcd->userversion = uinfo->spu_userversion; - - ret = init_subctxts(dd, rcd, uinfo); -@@ -1720,52 +1734,66 @@ done: - static int find_shared_ctxt(struct file *fp, - const struct qib_user_info *uinfo) - { -- int devmax, ndev, i; -+ int devmax, ndev; - int ret = 0; -+ struct qib_devdata *dd; - -+#ifdef QIB_CONFIG_KNX -+ /* -+ * In the case we are allocating a context for a KNX process, -+ * Don't loop over all devices but use the one assosiated with the -+ * requesting KNX. -+ */ -+ if (uinfo->spu_knx_node_id) { -+ dd = qib_knx_node_to_dd(uinfo->spu_knx_node_id); -+ if (dd && dd->num_knx) -+ ret = subctxt_search_ctxts(dd, fp, uinfo); -+ goto done; -+ } -+#endif - devmax = qib_count_units(NULL, NULL); - - for (ndev = 0; ndev < devmax; ndev++) { -- struct qib_devdata *dd = qib_lookup(ndev); --#ifdef QIB_CONFIG_KNX -- /* -- * In the case we are allocating a context for a KNX process, -- * reject any device that is not associated with the -- * requesting KNX. -- */ -- if ((uinfo->spu_knx_node_id && -- dd->node_id != uinfo->spu_knx_node_id)) -- continue; --#endif -+ dd = qib_lookup(ndev); - /* device portion of usable() */ - if (!(dd && (dd->flags & QIB_PRESENT) && dd->kregbase)) - continue; -- for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) { -- struct qib_ctxtdata *rcd = dd->rcd[i]; -+ ret = subctxt_search_ctxts(dd, fp, uinfo); -+ if (ret) -+ break; -+ } -+done: -+ return ret; -+} - -- /* Skip ctxts which are not yet open */ -- if (!rcd || !rcd->cnt) -- continue; -- /* Skip ctxt if it doesn't match the requested one */ -- if (rcd->subctxt_id != uinfo->spu_subctxt_id) -- continue; -- /* Verify the sharing process matches the master */ -- if (rcd->subctxt_cnt != uinfo->spu_subctxt_cnt || -- rcd->userversion != uinfo->spu_userversion || -- rcd->cnt >= rcd->subctxt_cnt) { -- ret = -EINVAL; -- goto done; -- } -- ctxt_fp(fp) = rcd; -- subctxt_fp(fp) = rcd->cnt++; -- rcd->subpid[subctxt_fp(fp)] = current->pid; -- tidcursor_fp(fp) = 0; -- rcd->active_slaves |= 1 << subctxt_fp(fp); -- ret = 1; -+static int subctxt_search_ctxts(struct qib_devdata *dd, struct file *fp, -+ const struct qib_user_info *uinfo) -+{ -+ int ret = 0, i; -+ for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) { -+ struct qib_ctxtdata *rcd = dd->rcd[i]; -+ -+ /* Skip ctxts which are not yet open */ -+ if (!rcd || !rcd->cnt) -+ continue; -+ /* Skip ctxt if it doesn't match the requested one */ -+ if (rcd->subctxt_id != uinfo->spu_subctxt_id) -+ continue; -+ /* Verify the sharing process matches the master */ -+ if (rcd->subctxt_cnt != uinfo->spu_subctxt_cnt || -+ rcd->userversion != uinfo->spu_userversion || -+ rcd->cnt >= rcd->subctxt_cnt) { -+ ret = -EINVAL; - goto done; - } -+ ctxt_fp(fp) = rcd; -+ subctxt_fp(fp) = rcd->cnt++; -+ rcd->subpid[subctxt_fp(fp)] = current->pid; -+ tidcursor_fp(fp) = 0; -+ rcd->active_slaves |= 1 << subctxt_fp(fp); -+ ret = 1; -+ break; - } -- - done: - return ret; - } -@@ -1856,6 +1884,10 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo) - - if (swminor >= 11 && uinfo->spu_port_alg < QIB_PORT_ALG_COUNT) - alg = uinfo->spu_port_alg; -+ if (swminor <= 11) { -+ qib_pio_avail_bits = 1; -+ qib_rcvhdrpoll = 1; -+ } - - #ifdef QIB_CONFIG_KNX - /* Make sure we have a connection to the KNX module on the right node */ -@@ -1871,13 +1903,38 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo) - uinfo->spu_subctxt_cnt) { - ret = find_shared_ctxt(fp, uinfo); - if (ret > 0) { -- ret = do_qib_user_sdma_queue_create(fp); -+#ifdef QIB_CONFIG_KNX -+ if (uinfo->spu_knx_node_id) { -+ ret = qib_knx_sdma_queue_create(fp); -+ } else -+#endif -+ ret = do_qib_user_sdma_queue_create(fp); - if (!ret) - assign_ctxt_affinity(fp, (ctxt_fp(fp))->dd); - goto done_ok; - } - } - -+#ifdef QIB_CONFIG_KNX -+ /* -+ * If there is a KNX node set, we pick the device that is -+ * associate with that KNX node -+ */ -+ if (uinfo->spu_knx_node_id) { -+ struct qib_devdata *dd = -+ qib_knx_node_to_dd(uinfo->spu_knx_node_id); -+ if (dd) { -+ ret = find_free_ctxt(dd->unit, fp, uinfo); -+ if (!ret) -+ ret = qib_knx_alloc_ctxt( -+ uinfo->spu_knx_node_id, -+ ctxt_fp(fp)->ctxt); -+ } else -+ ret = -ENXIO; -+ goto done_chk_sdma; -+ } -+ -+#endif - i_minor = iminor(file_inode(fp)) - QIB_USER_MINOR_BASE; - if (i_minor) - ret = find_free_ctxt(i_minor - 1, fp, uinfo); -@@ -1886,25 +1943,6 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo) - const unsigned int cpu = cpumask_first(¤t->cpus_allowed); - const unsigned int weight = - cpumask_weight(¤t->cpus_allowed); --#ifdef QIB_CONFIG_KNX -- /* -- * If there is a KNX node set, we pick the device that is on -- * the same NUMA node as the KNX. -- */ -- if (uinfo->spu_knx_node_id) { -- struct qib_devdata *dd = -- qib_knx_node_to_dd(uinfo->spu_knx_node_id); -- if (dd) { -- ret = find_free_ctxt(dd->unit, fp, uinfo); -- if (!ret) -- ret = qib_knx_alloc_ctxt(dd, -- ctxt_fp(fp)->ctxt); -- } else -- ret = -ENXIO; -- goto done_chk_sdma; -- } --#endif -- - if (weight == 1 && !test_bit(cpu, qib_cpulist)) - if (!find_hca(cpu, &unit) && unit >= 0) - if (!find_free_ctxt(unit, fp, uinfo)) { -@@ -1915,8 +1953,17 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo) - } - - done_chk_sdma: -- if (!ret) -+ if (!ret) { -+#ifdef QIB_CONFIG_KNX -+ if (uinfo->spu_knx_node_id) { -+ ret = qib_knx_sdma_queue_create(fp); -+ /*if (!ret) -+ ret = qib_knx_setup_tidrcv(fp);*/ -+ goto done_ok; -+ } -+#endif - ret = do_qib_user_sdma_queue_create(fp); -+ } - done_ok: - #ifdef QIB_CONFIG_KNX - knx_node_fp(fp) = uinfo->spu_knx_node_id; -@@ -2145,6 +2192,13 @@ static int qib_close(struct inode *in, struct file *fp) - - /* drain user sdma queue */ - if (fd->pq) { -+#ifdef QIB_CONFIG_KNX -+ /* -+ * The thread should be stopped first before attempting -+ * to clean the queue. -+ */ -+ qib_knx_sdma_queue_destroy(fd); -+#endif - qib_user_sdma_queue_drain(rcd->ppd, fd->pq); - qib_user_sdma_queue_destroy(fd->pq); - } -@@ -2737,4 +2791,6 @@ void qib_device_remove(struct qib_devdata *dd) - { - qib_user_remove(dd); - qib_diag_remove(dd); -+ if (snoop_enable) -+ qib_snoop_remove(dd); - } -diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c -index 84e593d..9ab46ed 100644 ---- a/drivers/infiniband/hw/qib/qib_iba6120.c -+++ b/drivers/infiniband/hw/qib/qib_iba6120.c -@@ -2070,15 +2070,16 @@ qib_6120_get_msgheader(struct qib_devdata *dd, __le32 *rhf_addr) - - static void qib_6120_config_ctxts(struct qib_devdata *dd) - { -+ u32 nkrcvqs = QIB_MODPARAM_GET(krcvqs, dd->unit, 0); - dd->ctxtcnt = qib_read_kreg32(dd, kr_portcnt); -- if (qib_n_krcv_queues > 1) { -- dd->first_user_ctxt = qib_n_krcv_queues * dd->num_pports; -+ if (nkrcvqs > 1) { -+ dd->first_user_ctxt = nkrcvqs * dd->num_pports; - if (dd->first_user_ctxt > dd->ctxtcnt) - dd->first_user_ctxt = dd->ctxtcnt; - dd->qpn_mask = dd->first_user_ctxt <= 2 ? 2 : 6; - } else - dd->first_user_ctxt = dd->num_pports; -- dd->n_krcv_queues = dd->first_user_ctxt; -+ dd->pport[0].n_krcv_queues = dd->first_user_ctxt; - } - - static void qib_update_6120_usrhead(struct qib_ctxtdata *rcd, u64 hd, -@@ -3133,7 +3134,7 @@ static void get_6120_chip_params(struct qib_devdata *dd) - dd->piosize2k = val & ~0U; - dd->piosize4k = val >> 32; - -- mtu = ib_mtu_enum_to_int(qib_ibmtu); -+ mtu = ib_mtu_enum_to_int(QIB_MODPARAM_GET(ibmtu, dd->unit, 1)); - if (mtu == -1) - mtu = QIB_DEFAULT_MTU; - dd->pport->ibmtu = (u32)mtu; -@@ -3282,7 +3283,7 @@ static int init_6120_variables(struct qib_devdata *dd) - dd->rhf_offset = 0; - - /* we always allocate at least 2048 bytes for eager buffers */ -- ret = ib_mtu_enum_to_int(qib_ibmtu); -+ ret = ib_mtu_enum_to_int(QIB_MODPARAM_GET(ibmtu, dd->unit, 1)); - dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU; - BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); - dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); -@@ -3322,7 +3323,6 @@ static int init_6120_variables(struct qib_devdata *dd) - if (qib_mini_init) - goto bail; - -- qib_num_cfg_vls = 1; /* if any 6120's, only one VL */ - - ret = qib_create_ctxts(dd); - init_6120_cntrnames(dd); -diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c -index 454c2e7..19ad170 100644 ---- a/drivers/infiniband/hw/qib/qib_iba7220.c -+++ b/drivers/infiniband/hw/qib/qib_iba7220.c -@@ -2299,19 +2299,21 @@ static void qib_7220_config_ctxts(struct qib_devdata *dd) - { - unsigned long flags; - u32 nchipctxts; -+ u32 cfgctxts = QIB_MODPARAM_GET(cfgctxts, dd->unit, 0); -+ u32 nkrcvqs = QIB_MODPARAM_GET(krcvqs, dd->unit, 0); - - nchipctxts = qib_read_kreg32(dd, kr_portcnt); - dd->cspec->numctxts = nchipctxts; -- if (qib_n_krcv_queues > 1) { -+ if (nkrcvqs > 1) { - dd->qpn_mask = 0x3e; -- dd->first_user_ctxt = qib_n_krcv_queues * dd->num_pports; -+ dd->first_user_ctxt = nkrcvqs * dd->num_pports; - if (dd->first_user_ctxt > nchipctxts) - dd->first_user_ctxt = nchipctxts; - } else - dd->first_user_ctxt = dd->num_pports; -- dd->n_krcv_queues = dd->first_user_ctxt; -+ dd->pport[0].n_krcv_queues = dd->first_user_ctxt; - -- if (!qib_cfgctxts) { -+ if (!cfgctxts) { - int nctxts = dd->first_user_ctxt + num_online_cpus(); - - if (nctxts <= 5) -@@ -2320,8 +2322,8 @@ static void qib_7220_config_ctxts(struct qib_devdata *dd) - dd->ctxtcnt = 9; - else if (nctxts <= nchipctxts) - dd->ctxtcnt = nchipctxts; -- } else if (qib_cfgctxts <= nchipctxts) -- dd->ctxtcnt = qib_cfgctxts; -+ } else if (cfgctxts <= nchipctxts) -+ dd->ctxtcnt = cfgctxts; - if (!dd->ctxtcnt) /* none of the above, set to max */ - dd->ctxtcnt = nchipctxts; - -@@ -3846,7 +3848,7 @@ static void get_7220_chip_params(struct qib_devdata *dd) - dd->piosize2k = val & ~0U; - dd->piosize4k = val >> 32; - -- mtu = ib_mtu_enum_to_int(qib_ibmtu); -+ mtu = ib_mtu_enum_to_int(QIB_MODPARAM_GET(ibmtu, dd->unit, 1)); - if (mtu == -1) - mtu = QIB_DEFAULT_MTU; - dd->pport->ibmtu = (u32)mtu; -@@ -4084,15 +4086,13 @@ static int qib_init_7220_variables(struct qib_devdata *dd) - ppd->cpspec->chase_timer.function = reenable_7220_chase; - ppd->cpspec->chase_timer.data = (unsigned long)ppd; - -- qib_num_cfg_vls = 1; /* if any 7220's, only one VL */ -- - dd->rcvhdrentsize = QIB_RCVHDR_ENTSIZE; - dd->rcvhdrsize = QIB_DFLT_RCVHDRSIZE; - dd->rhf_offset = - dd->rcvhdrentsize - sizeof(u64) / sizeof(u32); - - /* we always allocate at least 2048 bytes for eager buffers */ -- ret = ib_mtu_enum_to_int(qib_ibmtu); -+ ret = ib_mtu_enum_to_int(QIB_MODPARAM_GET(ibmtu, dd->unit, 1)); - dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU; - BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); - dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); -diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c -index 016e742..35fc492 100644 ---- a/drivers/infiniband/hw/qib/qib_iba7322.c -+++ b/drivers/infiniband/hw/qib/qib_iba7322.c -@@ -107,9 +107,8 @@ static const unsigned sdma_idle_cnt = 64; - * Number of VLs we are configured to use (to allow for more - * credits per vl, etc.) - */ --ushort qib_num_cfg_vls = 2; --module_param_named(num_vls, qib_num_cfg_vls, ushort, S_IRUGO); --MODULE_PARM_DESC(num_vls, "Set number of Virtual Lanes to use (1-8)"); -+static QIB_MODPARAM_PORT(num_vls, NULL, 2, S_IRUGO, -+ "Set number of Virtual Lanes to use (1-8)"); - - static ushort qib_chase = 1; - module_param_named(chase, qib_chase, ushort, S_IRUGO); -@@ -120,9 +119,8 @@ module_param_named(long_attenuation, qib_long_atten, ushort, S_IRUGO); - MODULE_PARM_DESC(long_attenuation, \ - "attenuation cutoff (dB) for long copper cable setup"); - --static ushort qib_singleport; --module_param_named(singleport, qib_singleport, ushort, S_IRUGO); --MODULE_PARM_DESC(singleport, "Use only IB port 1; more per-port buffer space"); -+static QIB_MODPARAM_UNIT(singleport, NULL, 0, S_IRUGO, -+ "Use only IB port 1; more per-port buffer space"); - - static ushort qib_krcvq01_no_msi; - module_param_named(krcvq01_no_msi, qib_krcvq01_no_msi, ushort, S_IRUGO); -@@ -2395,6 +2393,11 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd) - qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a); - qib_write_kreg(dd, kr_scratch, 0ULL); - -+ /* ensure previous Tx parameters are not still forced */ -+ qib_write_kreg_port(ppd, krp_tx_deemph_override, -+ SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, -+ reset_tx_deemphasis_override)); -+ - if (qib_compat_ddr_negotiate) { - ppd->cpspec->ibdeltainprog = 1; - ppd->cpspec->ibsymsnap = read_7322_creg32_port(ppd, -@@ -3515,7 +3518,8 @@ try_intx: - snprintf(dd->cspec->msix_entries[msixnum].name, - sizeof(dd->cspec->msix_entries[msixnum].name) - - 1, -- QIB_DRV_NAME "%d (kctx)", dd->unit); -+ QIB_DRV_NAME "%d:%d (kctx)", dd->unit, -+ ((struct qib_ctxtdata *)arg)->ppd->port); - } - ret = request_irq( - dd->cspec->msix_entries[msixnum].msix.vector, -@@ -3651,10 +3655,10 @@ static unsigned qib_7322_boardname(struct qib_devdata *dd) - dd->majrev, dd->minrev, - (unsigned)SYM_FIELD(dd->revision, Revision_R, SW)); - -- if (qib_singleport && (features >> PORT_SPD_CAP_SHIFT) & PORT_SPD_CAP) { -- qib_devinfo(dd->pcidev, -- "IB%u: Forced to single port mode by module parameter\n", -- dd->unit); -+ if (QIB_MODPARAM_GET(singleport, dd->unit, 0) && -+ (features >> PORT_SPD_CAP_SHIFT) & PORT_SPD_CAP) { -+ qib_devinfo(dd->pcidev, "IB%u: Forced to single port mode" -+ " by module param\n", dd->unit); - features &= PORT_SPD_CAP; - } - -@@ -3941,22 +3945,30 @@ qib_7322_get_msgheader(struct qib_devdata *dd, __le32 *rhf_addr) - static void qib_7322_config_ctxts(struct qib_devdata *dd) - { - unsigned long flags; -- u32 nchipctxts; -+ u32 nchipctxts, nkrcvqs; -+ u32 cfgctxts = QIB_MODPARAM_GET(cfgctxts, dd->unit, 0); -+ u8 pidx; - - nchipctxts = qib_read_kreg32(dd, kr_contextcnt); - dd->cspec->numctxts = nchipctxts; -- if (qib_n_krcv_queues > 1 && dd->num_pports) { -- dd->first_user_ctxt = NUM_IB_PORTS + -- (qib_n_krcv_queues - 1) * dd->num_pports; -- if (dd->first_user_ctxt > nchipctxts) -- dd->first_user_ctxt = nchipctxts; -- dd->n_krcv_queues = dd->first_user_ctxt / dd->num_pports; -- } else { -- dd->first_user_ctxt = NUM_IB_PORTS; -- dd->n_krcv_queues = 1; -+ dd->first_user_ctxt = NUM_IB_PORTS; -+ -+ for (pidx = 0; pidx < dd->num_pports; pidx++) { -+ nkrcvqs = QIB_MODPARAM_GET(krcvqs, dd->unit, pidx+1); -+ if (nkrcvqs > 1) { -+ if (nkrcvqs - 1 > nchipctxts - dd->first_user_ctxt) -+ dd->pport[pidx].n_krcv_queues = -+ (nchipctxts - dd->first_user_ctxt) + 1; -+ else -+ dd->pport[pidx].n_krcv_queues = nkrcvqs; -+ dd->first_user_ctxt += -+ dd->pport[pidx].n_krcv_queues - 1; -+ } else -+ /* Account for the HW ctxt */ -+ dd->pport[pidx].n_krcv_queues = 1; - } - -- if (!qib_cfgctxts) { -+ if (!cfgctxts) { - int nctxts = dd->first_user_ctxt + num_online_cpus(); - - if (nctxts <= 6) -@@ -3965,10 +3977,10 @@ static void qib_7322_config_ctxts(struct qib_devdata *dd) - dd->ctxtcnt = 10; - else if (nctxts <= nchipctxts) - dd->ctxtcnt = nchipctxts; -- } else if (qib_cfgctxts < dd->num_pports) -+ } else if (cfgctxts < dd->num_pports) - dd->ctxtcnt = dd->num_pports; -- else if (qib_cfgctxts <= nchipctxts) -- dd->ctxtcnt = qib_cfgctxts; -+ else if (cfgctxts <= nchipctxts) -+ dd->ctxtcnt = cfgctxts; - if (!dd->ctxtcnt) /* none of the above, set to max */ - dd->ctxtcnt = nchipctxts; - -@@ -5799,7 +5811,6 @@ static void get_7322_chip_params(struct qib_devdata *dd) - { - u64 val; - u32 piobufs; -- int mtu; - - dd->palign = qib_read_kreg32(dd, kr_pagealign); - -@@ -5818,11 +5829,10 @@ static void get_7322_chip_params(struct qib_devdata *dd) - dd->piosize2k = val & ~0U; - dd->piosize4k = val >> 32; - -- mtu = ib_mtu_enum_to_int(qib_ibmtu); -- if (mtu == -1) -- mtu = QIB_DEFAULT_MTU; -- dd->pport[0].ibmtu = (u32)mtu; -- dd->pport[1].ibmtu = (u32)mtu; -+ dd->pport[0].ibmtu = ib_mtu_enum_to_int( -+ QIB_MODPARAM_GET(ibmtu, dd->unit, 1)); -+ dd->pport[1].ibmtu = ib_mtu_enum_to_int( -+ QIB_MODPARAM_GET(ibmtu, dd->unit, 2)); - - /* these may be adjusted in init_chip_wc_pat() */ - dd->pio2kbase = (u32 __iomem *) -@@ -6342,11 +6352,11 @@ static void write_7322_initregs(struct qib_devdata *dd) - qib_write_kreg(dd, KREG_IDX(RcvQPMulticastContext_1), 1); - - for (pidx = 0; pidx < dd->num_pports; ++pidx) { -- unsigned n, regno; -+ unsigned i, n, regno, ctxts[18]; - unsigned long flags; - -- if (dd->n_krcv_queues < 2 || -- !dd->pport[pidx].link_speed_supported) -+ if (dd->pport[pidx].n_krcv_queues == 1 || -+ !dd->pport[pidx].link_speed_supported) - continue; - - ppd = &dd->pport[pidx]; -@@ -6359,19 +6369,18 @@ static void write_7322_initregs(struct qib_devdata *dd) - /* Initialize QP to context mapping */ - regno = krp_rcvqpmaptable; - val = 0; -- if (dd->num_pports > 1) -- n = dd->first_user_ctxt / dd->num_pports; -- else -- n = dd->first_user_ctxt - 1; -+ for (i = 0, n = 0; n < dd->first_user_ctxt; n++) { -+ if (dd->skip_kctxt_mask & (1 << n)) -+ continue; -+ if (dd->rcd[n]->ppd->port == pidx+1) -+ ctxts[i++] = n; -+ if (i == ppd->n_krcv_queues) -+ break; -+ } - for (i = 0; i < 32; ) { - unsigned ctxt; - -- if (dd->num_pports > 1) -- ctxt = (i % n) * dd->num_pports + pidx; -- else if (i % n) -- ctxt = (i % n) + 1; -- else -- ctxt = ppd->hw_pidx; -+ ctxt = ctxts[i % ppd->n_krcv_queues]; - val |= ctxt << (5 * (i % 6)); - i++; - if (i % 6 == 0) { -@@ -6419,7 +6428,7 @@ static int qib_init_7322_variables(struct qib_devdata *dd) - { - struct qib_pportdata *ppd; - unsigned features, pidx, sbufcnt; -- int ret, mtu; -+ int ret, maxmtu = 0; - u32 sbufs, updthresh; - - /* pport structs are contiguous, allocated after devdata */ -@@ -6496,10 +6505,6 @@ static int qib_init_7322_variables(struct qib_devdata *dd) - */ - qib_7322_set_baseaddrs(dd); - -- mtu = ib_mtu_enum_to_int(qib_ibmtu); -- if (mtu == -1) -- mtu = QIB_DEFAULT_MTU; -- - dd->cspec->int_enable_mask = QIB_I_BITSEXTANT; - /* all hwerrors become interrupts, unless special purposed */ - dd->cspec->hwerrmask = ~0ULL; -@@ -6509,9 +6514,14 @@ static int qib_init_7322_variables(struct qib_devdata *dd) - ~(SYM_MASK(HwErrMask, IBSerdesPClkNotDetectMask_0) | - SYM_MASK(HwErrMask, IBSerdesPClkNotDetectMask_1) | - HWE_MASK(LATriggered)); -- - for (pidx = 0; pidx < NUM_IB_PORTS; ++pidx) { - struct qib_chippport_specific *cp = ppd->cpspec; -+ int mtu = ib_mtu_enum_to_int( -+ QIB_MODPARAM_GET(ibmtu, dd->unit, pidx+1)); -+ u8 vls = QIB_MODPARAM_GET(num_vls, dd->unit, pidx+1); -+ if (mtu == -1) -+ mtu = QIB_DEFAULT_MTU; -+ maxmtu = max(maxmtu, mtu); - ppd->link_speed_supported = features & PORT_SPD_CAP; - features >>= PORT_SPD_CAP_SHIFT; - if (!ppd->link_speed_supported) { -@@ -6565,7 +6575,7 @@ static int qib_init_7322_variables(struct qib_devdata *dd) - ppd->link_width_active = IB_WIDTH_4X; - ppd->link_speed_active = QIB_IB_SDR; - ppd->delay_mult = ib_rate_to_delay[IB_RATE_10_GBPS]; -- switch (qib_num_cfg_vls) { -+ switch (vls) { - case 1: - ppd->vls_supported = IB_VL_VL0; - break; -@@ -6575,8 +6585,7 @@ static int qib_init_7322_variables(struct qib_devdata *dd) - default: - qib_devinfo(dd->pcidev, - "Invalid num_vls %u, using 4 VLs\n", -- qib_num_cfg_vls); -- qib_num_cfg_vls = 4; -+ vls); - /* fall through */ - case 4: - ppd->vls_supported = IB_VL_VL0_3; -@@ -6588,9 +6597,8 @@ static int qib_init_7322_variables(struct qib_devdata *dd) - qib_devinfo(dd->pcidev, - "Invalid num_vls %u for MTU %d " - ", using 4 VLs\n", -- qib_num_cfg_vls, mtu); -+ vls, mtu); - ppd->vls_supported = IB_VL_VL0_3; -- qib_num_cfg_vls = 4; - } - break; - } -@@ -6640,7 +6648,7 @@ static int qib_init_7322_variables(struct qib_devdata *dd) - dd->rhf_offset = dd->rcvhdrentsize - sizeof(u64) / sizeof(u32); - - /* we always allocate at least 2048 bytes for eager buffers */ -- dd->rcvegrbufsize = max(mtu, 2048); -+ dd->rcvegrbufsize = max(maxmtu, 2048); - BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); - dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); - -@@ -6698,8 +6706,8 @@ static int qib_init_7322_variables(struct qib_devdata *dd) - goto bail; /* no error, so can still figure out why err */ - } - -- write_7322_initregs(dd); - ret = qib_create_ctxts(dd); -+ write_7322_initregs(dd); - init_7322_cntrnames(dd); - - updthresh = 8U; /* update threshold */ -diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c -index 84b3222..0e83ed4 100644 ---- a/drivers/infiniband/hw/qib/qib_init.c -+++ b/drivers/infiniband/hw/qib/qib_init.c -@@ -67,6 +67,11 @@ - #define QLOGIC_IB_R_SOFTWARE_SHIFT 24 - #define QLOGIC_IB_R_EMULATOR_MASK (1ULL<<62) - -+unsigned int snoop_enable; /* By default (0) snooping is disabled */ -+ -+module_param_named(snoop_enable, snoop_enable , int, 0644); -+MODULE_PARM_DESC(snoop_enable, "snooping mode "); -+ - /* - * Select the NUMA node id on which to allocate the receive header - * queue, eager buffers and send pioavail register. -@@ -79,9 +84,8 @@ MODULE_PARM_DESC(numa_node, "NUMA node on which memory is allocated"); - * Number of ctxts we are configured to use (to allow for more pio - * buffers per ctxt, etc.) Zero means use chip value. - */ --ushort qib_cfgctxts; --module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO); --MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use"); -+QIB_MODPARAM_UNIT(cfgctxts, NULL, 0, S_IRUGO, -+ "Set max number of contexts to use"); - - /* - * If set, do not write to any regs if avoidable, hack to allow -@@ -97,9 +101,8 @@ MODULE_PARM_DESC(numa_aware, "Use NUMA aware allocations: " - "0=disabled, 1=enabled, " - "10=option 0 for AMD & <= Intel Westmere cpus and option 1 for newer cpus(default)"); - --unsigned qib_n_krcv_queues; --module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO); --MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port"); -+QIB_MODPARAM_PORT(krcvqs, NULL, 0, S_IRUGO, -+ "number of kernel receive queues per IB port"); - - unsigned qib_cc_table_size; - module_param_named(cc_table_size, qib_cc_table_size, uint, S_IRUGO); -@@ -123,14 +126,15 @@ unsigned long *qib_cpulist; - /* set number of contexts we'll actually use */ - void qib_set_ctxtcnt(struct qib_devdata *dd) - { -- if (!qib_cfgctxts) { -+ u64 val = QIB_MODPARAM_GET(cfgctxts, dd->unit, 0); -+ if (!val) { - dd->cfgctxts = dd->first_user_ctxt + num_online_cpus(); - if (dd->cfgctxts > dd->ctxtcnt) - dd->cfgctxts = dd->ctxtcnt; -- } else if (qib_cfgctxts < dd->num_pports) -+ } else if (val < dd->num_pports) - dd->cfgctxts = dd->ctxtcnt; -- else if (qib_cfgctxts <= dd->ctxtcnt) -- dd->cfgctxts = qib_cfgctxts; -+ else if (val <= dd->ctxtcnt) -+ dd->cfgctxts = val; - else - dd->cfgctxts = dd->ctxtcnt; - dd->freectxts = (dd->first_user_ctxt > dd->cfgctxts) ? 0 : -@@ -142,13 +146,27 @@ void qib_set_ctxtcnt(struct qib_devdata *dd) - */ - int qib_create_ctxts(struct qib_devdata *dd) - { -- unsigned i; -+ unsigned i, c, p; -+ unsigned port; - int ret; -+ int node_id; - int local_node_id = pcibus_to_node(dd->pcidev->bus); -+ s64 new_node_id = qib_numa_node; - - if (local_node_id < 0) - local_node_id = numa_node_id(); -- dd->assigned_node_id = local_node_id; -+ -+ if (new_node_id < 0) -+ new_node_id = local_node_id; -+ -+ new_node_id = node_online(new_node_id) ? new_node_id : -+ local_node_id; -+ -+ dd->local_node_id = local_node_id; -+ dd->assigned_node_id = new_node_id; -+ -+ node_id = qib_numa_aware ? dd->local_node_id : -+ dd->assigned_node_id; - - /* - * Allocate full ctxtcnt array, rather than just cfgctxts, because -@@ -162,17 +180,29 @@ int qib_create_ctxts(struct qib_devdata *dd) - goto done; - } - -+ c = dd->num_pports ? min( -+ (unsigned)dd->pport[0].n_krcv_queues, -+ (dd->num_pports > 1 ? -+ (unsigned)dd->pport[1].n_krcv_queues : (unsigned)-1)) -+ : 0; -+ p = dd->num_pports > 1 ? -+ (dd->pport[0].n_krcv_queues > dd->pport[1].n_krcv_queues ? -+ 0 : 1) : 0; -+ - /* create (one or more) kctxt */ -- for (i = 0; i < dd->first_user_ctxt; ++i) { -+ for (port = 0, i = 0; i < dd->first_user_ctxt; ++i) { - struct qib_pportdata *ppd; - struct qib_ctxtdata *rcd; - - if (dd->skip_kctxt_mask & (1 << i)) - continue; - -- ppd = dd->pport + (i % dd->num_pports); -+ if (i < (c * dd->num_pports)) -+ ppd = dd->pport + (i % dd->num_pports); -+ else -+ ppd = dd->pport + p; - -- rcd = qib_create_ctxtdata(ppd, i, dd->assigned_node_id); -+ rcd = qib_create_ctxtdata(ppd, i, node_id); - if (!rcd) { - qib_dev_err(dd, - "Unable to allocate ctxtdata for Kernel ctxt, failing\n"); -@@ -722,10 +752,10 @@ int qib_init(struct qib_devdata *dd, int reinit) - if (lastfail) - ret = lastfail; - ppd = dd->pport + pidx; -- mtu = ib_mtu_enum_to_int(qib_ibmtu); -+ mtu = ib_mtu_enum_to_int( -+ QIB_MODPARAM_GET(ibmtu, dd->unit, ppd->port)); - if (mtu == -1) { - mtu = QIB_DEFAULT_MTU; -- qib_ibmtu = 0; /* don't leave invalid value */ - } - /* set max we can ever have for this driver load */ - ppd->init_ibmaxlen = min(mtu > 2048 ? -@@ -750,6 +780,11 @@ int qib_init(struct qib_devdata *dd, int reinit) - lastfail = -ENETDOWN; - continue; - } -+ if (snoop_enable) { -+ ppd->filter_callback = NULL; -+ ppd->filter_value = NULL; -+ ppd->mode_flag = 0; -+ } - - portok++; - } -@@ -1108,24 +1143,7 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) - unsigned long flags; - struct qib_devdata *dd; - int ret; -- int node_id; -- int local_node_id = pcibus_to_node(dd->pcidev->bus); -- s64 new_node_id = qib_numa_node; -- -- if (local_node_id < 0) -- local_node_id = numa_node_id(); -- -- if (new_node_id < 0) -- new_node_id = local_node_id; -- -- new_node_id = node_online(new_node_id) ? new_node_id : -- local_node_id; -- -- dd->local_node_id = local_node_id; -- dd->assigned_node_id = new_node_id; - -- node_id = qib_numa_aware ? dd->local_node_id : -- dd->assigned_node_id; - - dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra); - if (!dd) { -@@ -1273,6 +1291,15 @@ static int __init qlogic_ib_init(void) - if (ret) - goto bail; - -+ if (qib_numa_aware == QIB_DRIVER_AUTO_CONFIGURATION) -+ qib_numa_aware = qib_configure_numa(boot_cpu_data) ? 1 : 0; -+ -+ if (qib_rcvhdrpoll == QIB_DRIVER_AUTO_CONFIGURATION) -+ qib_rcvhdrpoll = qib_configure_numa(boot_cpu_data) ? 0 : 1; -+ -+ if (qib_pio_avail_bits == QIB_DRIVER_AUTO_CONFIGURATION) -+ qib_pio_avail_bits = qib_configure_numa(boot_cpu_data) ? 0 : 1; -+ - /* - * These must be called before the driver is registered with - * the PCI subsystem. -@@ -1298,13 +1325,13 @@ static int __init qlogic_ib_init(void) - #ifdef QIB_CONFIG_KNX - ret = qib_knx_server_init(); - if (ret < 0) -- pr_err("Unable to start KNX listen thread\n"); -+ printk(KERN_ERR QIB_DRV_NAME -+ ": Unable to start KNX listen thread\n"); - #endif -- - goto bail; /* all OK */ - - bail_dev: --#ifdef CONFIG_INFINIBAND_QIB_DCA -+ #ifdef CONFIG_INFINIBAND_QIB_DCA - dca_unregister_notify(&dca_notifier); - #endif - #ifdef CONFIG_DEBUG_FS -@@ -1328,7 +1355,6 @@ static void __exit qlogic_ib_cleanup(void) - #ifdef QIB_CONFIG_KNX - qib_knx_server_exit(); - #endif -- - ret = qib_exit_qibfs(); - if (ret) - pr_err( -@@ -1348,6 +1374,7 @@ static void __exit qlogic_ib_cleanup(void) - - idr_destroy(&qib_unit_table); - qib_dev_cleanup(); -+ qib_clean_mod_param(); - } - - module_exit(qlogic_ib_cleanup); -@@ -1560,6 +1587,8 @@ static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) - } - - qib_verify_pioperf(dd); -+ if (snoop_enable) -+ qib_snoop_add(dd); - bail: - return ret; - } -@@ -1572,6 +1601,9 @@ static void qib_remove_one(struct pci_dev *pdev) - /* unregister from IB core */ - qib_unregister_ib_device(dd); - -+#ifdef QIB_CONFIG_KNX -+ qib_knx_remove_device(dd); -+#endif - /* - * Disable the IB link, disable interrupts on the device, - * clear dma engines, etc. -@@ -1686,7 +1718,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) - unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff; - size_t size; - gfp_t gfp_flags; -- int old_node_id; -+ int old_dev_node; - - /* - * GFP_USER, but without GFP_FS, so buffer cache can be -@@ -1706,14 +1738,14 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) - if (!rcd->rcvegrbuf) { - rcd->rcvegrbuf = - kzalloc_node(chunk * sizeof(rcd->rcvegrbuf[0]), -- GFP_KERNEL, rcd->node_id); -+ GFP_KERNEL, rcd->node_id); - if (!rcd->rcvegrbuf) - goto bail; - } - if (!rcd->rcvegrbuf_phys) { - rcd->rcvegrbuf_phys = - kmalloc_node(chunk * sizeof(rcd->rcvegrbuf_phys[0]), -- GFP_KERNEL, rcd->node_id); -+ GFP_KERNEL, rcd->node_id); - if (!rcd->rcvegrbuf_phys) - goto bail_rcvegrbuf; - } -@@ -1721,13 +1753,13 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) - if (rcd->rcvegrbuf[e]) - continue; - -- old_node_id = dev_to_node(&dd->pcidev->dev); -+ old_dev_node = dev_to_node(&dd->pcidev->dev); - set_dev_node(&dd->pcidev->dev, rcd->node_id); - rcd->rcvegrbuf[e] = - dma_alloc_coherent(&dd->pcidev->dev, size, - &rcd->rcvegrbuf_phys[e], - gfp_flags); -- set_dev_node(&dd->pcidev->dev, old_node_id); -+ set_dev_node(&dd->pcidev->dev, old_dev_node); - if (!rcd->rcvegrbuf[e]) - goto bail_rcvegrbuf_phys; - } -diff --git a/drivers/infiniband/hw/qib/qib_knx.c b/drivers/infiniband/hw/qib/qib_knx.c -index c15276f..f692913 100644 ---- a/drivers/infiniband/hw/qib/qib_knx.c -+++ b/drivers/infiniband/hw/qib/qib_knx.c -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2012 Intel Corporation. All rights reserved. -+ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU -@@ -37,12 +37,21 @@ - - #include "qib.h" - #include "qib_knx.h" -+#include "qib_user_sdma.h" -+#include "qib_knx_common.h" - - unsigned int qib_knx_nconns = 5; - module_param_named(num_conns, qib_knx_nconns, uint, S_IRUGO); - MODULE_PARM_DESC(num_conns, "Max number of pending connections"); - - #define QIB_KNX_SCIF_PORT SCIF_OFED_PORT_9 -+#define CLIENT_THREAD_NAME(x) "qib/mic" __stringify(x) -+ -+#define knx_sdma_next(sdma) \ -+ sdma->head = ((sdma->head + 1) % sdma->desc_num) -+#define per_ctxt(ctxt, sub) ((ctxt * QLOGIC_IB_MAX_SUBCTXT) + sub) -+#define QIB_KNX_SDMA_STATUS(sdma, st) \ -+ QIB_KNX_SDMA_SET(sdma->mflags->status, ((u64)st << 32) | 1) - - struct qib_knx_server { - struct task_struct *kthread; -@@ -82,7 +91,16 @@ struct qib_knx_mem_map_sg { - struct scif_range *pages; - }; - -+struct qib_knx_tidrcv { -+ struct qib_knx_rma tidmem; -+ u64 tidbase; -+ u32 tidcnt; -+}; -+ - struct qib_knx_ctxt { -+ u16 ctxt; -+ struct qib_knx *knx; -+ struct qib_pportdata *ppd; - /* local registered memory for PIO buffers */ - struct qib_knx_rma piobufs[QLOGIC_IB_MAX_SUBCTXT]; - /* local registered memory for user registers */ -@@ -104,6 +122,23 @@ struct qib_knx_ctxt { - __u64 status; - __u64 piobufbase[QLOGIC_IB_MAX_SUBCTXT]; - __u32 runtime_flags; -+ -+ struct qib_user_sdma_queue *pq[QLOGIC_IB_MAX_SUBCTXT]; -+}; -+ -+struct qib_knx_sdma { -+ /* KNX flags page */ -+ struct scif_range *mflag_pages; -+ struct qib_knx_sdma_mflags *mflags; -+ /* KNX descriptor queue */ -+ struct scif_range *queue_pages; -+ struct qib_knx_sdma_desc *queue; -+ u32 desc_num; -+ /* host flags (in host memory) */ -+ struct qib_knx_rma hflags_mem; -+ struct qib_knx_sdma_hflags *hflags; -+ u32 head; /* shadow */ -+ u32 complete; - }; - - struct qib_knx { -@@ -114,10 +149,16 @@ struct qib_knx { - int numa_node; - struct qib_devdata *dd; - struct qib_knx_ctxt **ctxts; -+ spinlock_t ctxt_lock; -+ resource_size_t bar; -+ u64 barlen; -+ struct qib_knx_sdma *sdma; -+ struct task_struct *sdma_poll; -+ atomic_t tref; -+ char tname[64]; -+ struct qib_knx_rma tidmem; - }; - --#define CLIENT_THREAD_NAME(x) "qib/mic" __stringify(x) -- - static struct qib_knx_server *server; - - static int qib_knx_init(struct qib_knx_server *); -@@ -127,19 +168,20 @@ static off_t qib_knx_register_memory(struct qib_knx *, struct qib_knx_rma *, - void *, size_t, int, const char *); - static int qib_knx_unregister_memory(struct qib_knx *, struct qib_knx_rma *, - const char *); -+static __always_inline void qib_knx_memcpy(void *, void __iomem *, size_t); - static ssize_t qib_show_knx_node(struct device *, struct device_attribute *, - char *); -- --static DEVICE_ATTR(knx_node, S_IRUGO, qib_show_knx_node, NULL); --static ssize_t qib_show_knx_node(struct device *dev, -- struct device_attribute *attr, char *buf) --{ -- struct qib_ibdev *ibdev = -- container_of(dev, struct qib_ibdev, ibdev.dev); -- struct qib_devdata *dd = dd_from_dev(ibdev); -- -- return scnprintf(buf, PAGE_SIZE, "%u\n", dd->knx->peer.node); --} -+static int qib_knx_sdma_init(struct qib_knx *); -+static void qib_knx_sdma_teardown(struct qib_knx *); -+static __always_inline struct page * -+qib_knx_phys_to_page(struct qib_knx *, unsigned long); -+static int qib_knx_sdma_pkts_to_descs(struct qib_knx_ctxt *, -+ struct qib_knx_sdma_desc *, -+ struct qib_user_sdma_queue *, -+ int *, struct list_head *); -+static int qib_knx_sdma_poll(void *); -+static int qib_knx_tidrcv_init(struct qib_knx *); -+static int qib_knx_tidrcv_teardown(struct qib_knx *); - - inline struct qib_knx *qib_knx_get(u16 nodeid) - { -@@ -162,10 +204,11 @@ inline struct qib_devdata *qib_knx_node_to_dd(u16 node) - - static int qib_knx_init(struct qib_knx_server *server) - { -- int ret = 0, num_devs = 0, i; -- struct qib_devdata *dd; -+ int ret = 0, num_devs = 0, i, seen = 0; -+ unsigned fewest = -1U; -+ struct qib_devdata *dd = NULL, *dd_no_numa = NULL; - struct qib_knx *knx; -- struct ib_device *ibdev; -+ struct qib_device_info info = { -1 }; - - knx = kzalloc(sizeof(*knx), GFP_KERNEL); - if (!knx) { -@@ -179,10 +222,14 @@ static int qib_knx_init(struct qib_knx_server *server) - } - - INIT_LIST_HEAD(&knx->list); -+ spin_lock_init(&knx->ctxt_lock); - knx->numa_node = -1; - ret = scif_pci_info(knx->peer.node, &knx->pci_info); -- if (!ret) -+ if (!ret) { - knx->numa_node = pcibus_to_node(knx->pci_info.pdev->bus); -+ knx->bar = pci_resource_start(knx->pci_info.pdev, 0); -+ knx->barlen = pci_resource_len(knx->pci_info.pdev, 0); -+ } - - if (knx->numa_node < 0) - knx->numa_node = numa_node_id(); -@@ -190,40 +237,58 @@ static int qib_knx_init(struct qib_knx_server *server) - num_devs = qib_count_units(NULL, NULL); - if (unlikely(!num_devs)) { - ret = -ENODEV; -+ /* we have to send this */ -+ scif_send(knx->epd.epd, &info, sizeof(info), -+ SCIF_SEND_BLOCK); - goto done; - } - -- for (i = 0; i < num_devs; i++) { -+ /* -+ * Attempt to find an HCA on the same NUMA node as the card. Save -+ * the first HCA that hasn't been associated with a card in case -+ * there is no HCA on the same NUMA node. -+ */ -+ for (i = 0; seen < num_devs; i++) { - dd = qib_lookup(i); -- if (dd && dd->local_node_id == knx->numa_node) -- knx->dd = dd; -+ if (dd) { -+ if (dd->local_node_id == knx->numa_node) { -+ knx->dd = dd; -+ break; -+ } else if (dd->num_knx < fewest) -+ dd_no_numa = dd; -+ seen++; -+ } - } - /* - * We didn't find a QIB device on the same NUMA node, -- * round-robin across all devices. -+ * use the "backup". - */ - if (unlikely(!knx->dd)) { -- knx->dd = qib_lookup(server->nclients % num_devs); -- /* it is possible for qib_lookup to return NULL */ -- if (unlikely(!knx->dd)) { -+ if (!dd_no_numa) { - ret = -ENODEV; -+ /* we have to send this */ -+ scif_send(knx->epd.epd, &info, sizeof(info), -+ SCIF_SEND_BLOCK); - goto done; - } -+ knx->dd = dd_no_numa; - } -- knx->dd->node_id = knx->peer.node; -- knx->dd->knx = knx; -+ knx->dd->num_knx++; -+ - knx->ctxts = kzalloc_node(knx->dd->ctxtcnt * sizeof(*knx->ctxts), - GFP_KERNEL, knx->numa_node); - if (!knx->ctxts) - ret = -ENOMEM; -- ibdev = &knx->dd->verbs_dev.ibdev; -- ret = device_create_file(&ibdev->dev, &dev_attr_knx_node); -+ /* Give the KNX the associated device information. */ -+ info.unit = knx->dd->unit; -+ ret = scif_send(knx->epd.epd, &info, sizeof(info), -+ SCIF_SEND_BLOCK); -+ -+ ret = qib_knx_sdma_init(knx); - if (ret) -- /* -- * clear the error code since we don't want to fail the -- * initialization. -- */ -- ret = 0; -+ goto done; -+ atomic_set(&knx->tref, 0); -+ ret = qib_knx_tidrcv_init(knx); - done: - spin_lock(&server->client_lock); - list_add_tail(&knx->list, &server->clients); -@@ -237,13 +302,12 @@ bail: - static void qib_knx_free(struct qib_knx *knx, int unload) - { - struct qib_devdata *dd = knx->dd; -- struct ib_device *ibdev; - int i; - -- if (dd) { -- ibdev = &dd->verbs_dev.ibdev; -- device_remove_file(&ibdev->dev, &dev_attr_knx_node); -- } -+ qib_knx_tidrcv_teardown(knx); -+ qib_knx_sdma_teardown(knx); -+ if (dd) -+ dd->num_knx--; - /* - * If this function is called with unload set, we can - * free the context data. Otherwise, we are here -@@ -360,9 +424,16 @@ done: - return ret; - } - --int qib_knx_alloc_ctxt(struct qib_devdata *dd, unsigned ctxt) -+static __always_inline void qib_knx_memcpy(void *dst, void __iomem *src, -+ size_t size) - { -- struct qib_knx *knx = dd_to_knx(dd); -+ memcpy_fromio(dst, src, size); -+} -+ -+int qib_knx_alloc_ctxt(u16 node_id, unsigned ctxt) -+{ -+ struct qib_knx *knx = qib_knx_get(node_id); -+ struct qib_devdata *dd = knx->dd; - struct qib_knx_ctxt *ptr; - int ret = 0; - -@@ -379,7 +450,14 @@ int qib_knx_alloc_ctxt(struct qib_devdata *dd, unsigned ctxt) - ret = -ENOMEM; - goto bail; - } -+ ptr->knx = knx; -+ ptr->ctxt = ctxt; -+ ptr->ppd = dd->rcd[ctxt]->ppd; -+ -+ spin_lock(&knx->ctxt_lock); - knx->ctxts[ctxt] = ptr; -+ dd->rcd[ctxt]->krcd = ptr; -+ spin_unlock(&knx->ctxt_lock); - bail: - return ret; - } -@@ -388,10 +466,11 @@ __u64 qib_knx_ctxt_info(struct qib_ctxtdata *rcd, - enum qib_knx_ctxtinfo_type type, - struct file *fp) - { -- struct qib_knx *knx = dd_to_knx(rcd->dd); -+ struct qib_knx *knx = rcd->krcd->knx; - __u16 subctxt; - __u64 ret = 0; - -+ spin_lock(&knx->ctxt_lock); - if (!knx || !knx->ctxts || !knx->ctxts[rcd->ctxt]) - goto done; - -@@ -414,6 +493,7 @@ __u64 qib_knx_ctxt_info(struct qib_ctxtdata *rcd, - break; - } - done: -+ spin_unlock(&knx->ctxt_lock); - return ret; - } - -@@ -424,7 +504,7 @@ int qib_knx_setup_piobufs(struct qib_devdata *dd, struct qib_ctxtdata *rcd, - char buf[16]; - off_t offset; - int ret = 0; -- struct qib_knx *knx = dd_to_knx(dd); -+ struct qib_knx *knx = rcd->krcd->knx; - - if (unlikely(!knx)) { - ret = -ENODEV; -@@ -472,7 +552,7 @@ int qib_knx_setup_pioregs(struct qib_devdata *dd, struct qib_ctxtdata *rcd, - { - int ret = 0; - off_t offset; -- struct qib_knx *knx = dd_to_knx(dd); -+ struct qib_knx *knx = rcd->krcd->knx; - - if (unlikely(!knx)) { - ret = -ENODEV; -@@ -496,7 +576,7 @@ int qib_knx_setup_pioregs(struct qib_devdata *dd, struct qib_ctxtdata *rcd, - goto bail; - } - knx->ctxts[rcd->ctxt]->uregbase = offset; -- -+ - /* - * register the PIO availability registers. - * user status 64bit values are part of the page containing the -@@ -533,7 +613,7 @@ int qib_knx_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd, - { - struct qib_knx_mem_map_sg *mapsg; - struct qib_knx_mem_map *map; -- struct qib_knx *knx = dd_to_knx(dd); -+ struct qib_knx *knx = rcd->krcd->knx; - dma_addr_t offset; - struct scatterlist *sg; - unsigned num_pages; -@@ -590,7 +670,8 @@ int qib_knx_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd, - * can use 64bit addresses for DMA but the CPU might not. - * (see pci_set_dma_mask() in qib_pcie.c). - */ -- mapsg->sglist = kzalloc(num_pages * sizeof(*mapsg->sglist), GFP_KERNEL); -+ mapsg->sglist = kzalloc_node(num_pages * sizeof(*mapsg->sglist), -+ GFP_KERNEL, knx->numa_node); - if (!mapsg->sglist) { - ret = -ENOMEM; - goto bail_rcvq_pages; -@@ -625,7 +706,7 @@ int qib_knx_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd, - } - rcd->rcvhdrq_phys = sg_dma_address(mapsg->sglist); - rcd->rcvhdrq = mapsg->pages->va[0]; -- -+ - map = &knx->ctxts[rcd->ctxt]->sbufstatus; - ret = scif_get_pages(knx->epd.epd, binfo->spi_sendbuf_status, - PAGE_SIZE, &map->pages); -@@ -700,7 +781,7 @@ int qib_knx_setup_eagerbufs(struct qib_ctxtdata *rcd, - struct qib_knx_mem_map_sg *map; - struct scatterlist *sg; - struct qib_devdata *dd = rcd->dd; -- struct qib_knx *knx = dd_to_knx(dd); -+ struct qib_knx *knx = rcd->krcd->knx; - unsigned size, egrsize, egrcnt, num_pages, bufs_ppage, - egrbufcnt; - dma_addr_t dma_addr, page; -@@ -761,7 +842,7 @@ int qib_knx_setup_eagerbufs(struct qib_ctxtdata *rcd, - goto bail_free_scif; - } - } -- -+ - /* - * Allocate array of DMA addresses for each of the mapped - * pages. -@@ -775,10 +856,11 @@ int qib_knx_setup_eagerbufs(struct qib_ctxtdata *rcd, - goto bail_free_rcvegr; - } - } -- -+ - map->size = size; - map->dir = DMA_BIDIRECTIONAL; -- map->sglist = kzalloc(num_pages * sizeof(*map->sglist), GFP_KERNEL); -+ map->sglist = kzalloc_node(num_pages * sizeof(*map->sglist), GFP_KERNEL, -+ knx->numa_node); - if (!map->sglist) { - ret = -ENOMEM; - goto bail_free_rcvegr_phys; -@@ -830,7 +912,7 @@ bail: - - void qib_knx_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd) - { -- struct qib_knx *knx = dd_to_knx(dd); -+ struct qib_knx *knx = rcd->krcd->knx; - struct qib_knx_ctxt *ctxt; - char buf[16]; - int i, ret = 0; -@@ -838,7 +920,11 @@ void qib_knx_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd) - if (!rcd || !knx || !knx->ctxts) - return; - -+ spin_lock(&knx->ctxt_lock); - ctxt = knx->ctxts[rcd->ctxt]; -+ knx->ctxts[rcd->ctxt] = NULL; -+ spin_unlock(&knx->ctxt_lock); -+ - if (!ctxt) - return; - -@@ -884,12 +970,535 @@ void qib_knx_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd) - qib_knx_unregister_memory(knx, &ctxt->piobufs[i], buf); - } - -- /* MITKO XXX: handle rcd->tid_pg_list */ -- knx->ctxts[rcd->ctxt] = NULL; - kfree(ctxt); - kfree(rcd); - } - -+/* -+ * TID management for processes on the MIC happens on the MIC. Therefore, -+ * we only register the HW TID array here. -+ * The MIC will calculate TID array offsets using the same algorithm is -+ * the host. Therefore, it is OK that the entire HW TID array is mapped -+ * since neither side should step on the other. -+ */ -+static int qib_knx_tidrcv_init(struct qib_knx *knx) -+{ -+ struct qib_devdata *dd = knx->dd; -+ struct qib_knx_tid_info info; -+ void *tidbase; -+ int ret = 0; -+ off_t offset = 0; -+ size_t len; -+ char buf[64]; -+ -+ memset(&info, 0, sizeof(info)); -+ -+ info.tidcnt = dd->rcvtidcnt; -+ tidbase = ((char *)dd->kregbase + dd->rcvtidbase); -+ info.tidbase_len = dd->ctxtcnt * dd->rcvtidcnt * sizeof(tidbase); -+ info.tidtemplate = dd->tidtemplate; -+ info.invalidtid = dd->tidinvalid; -+ /* information needed to properly calculate DMA address to MIC pages */ -+ info.bar_addr = knx->bar; -+ info.bar_len = knx->barlen; -+ -+ snprintf(buf, sizeof(buf), "TID array KNx%u", knx->peer.node); -+ offset = qib_knx_register_memory(knx, &knx->tidmem, tidbase, -+ info.tidbase_len, SCIF_PROT_WRITE, -+ buf); -+ info.tidbase_offset = offset; -+ if (IS_ERR_VALUE(offset)) -+ ret = offset; -+ len = scif_send(knx->epd.epd, &info, sizeof(info), -+ SCIF_SEND_BLOCK); -+ if (len < sizeof(info)) -+ ret = -EFAULT; -+ return ret; -+} -+ -+static int qib_knx_tidrcv_teardown(struct qib_knx *knx) -+{ -+ char buf[64]; -+ snprintf(buf, sizeof(buf), "TID array KNx%u", knx->peer.node); -+ return qib_knx_unregister_memory(knx, &knx->tidmem, buf); -+} -+ -+static int qib_knx_sdma_init(struct qib_knx *knx) -+{ -+ struct qib_knx_host_mem flags; -+ struct qib_knx_knc_mem mflags; -+ struct qib_knx_sdma *sdma; -+ char buf[64]; -+ int ret = 0; -+ -+ sdma = kzalloc_node(sizeof(*sdma), GFP_KERNEL, knx->numa_node); -+ if (!sdma) { -+ ret = -ENOMEM; -+ goto done; -+ } -+ sdma->hflags = kzalloc_node(PAGE_SIZE, GFP_KERNEL, knx->numa_node); -+ if (!sdma->hflags) { -+ ret = -ENOMEM; -+ goto done_free; -+ } -+ snprintf(buf, sizeof(buf), "Host SDMA flags KNx%u", knx->peer.node); -+ flags.flags_offset = qib_knx_register_memory(knx, &sdma->hflags_mem, -+ sdma->hflags, -+ PAGE_SIZE, -+ SCIF_PROT_WRITE, -+ buf); -+ if (IS_ERR_VALUE(flags.flags_offset)) { -+ ret = flags.flags_offset; -+ goto free_flags; -+ } -+ sdma->desc_num = knx->dd->pport[0].sdma_descq_cnt; -+ flags.desc_num = sdma->desc_num; -+ ret = scif_send(knx->epd.epd, &flags, sizeof(flags), -+ SCIF_SEND_BLOCK); -+ if (ret < sizeof(flags)) -+ goto unregister; -+ ret = scif_recv(knx->epd.epd, &mflags, sizeof(mflags), -+ SCIF_RECV_BLOCK); -+ if (ret < sizeof(mflags)) { -+ ret = -EINVAL; -+ goto unregister; -+ } -+ ret = scif_get_pages(knx->epd.epd, mflags.flags_offset, -+ PAGE_SIZE, &sdma->mflag_pages); -+ if (ret < 0 || !sdma->mflag_pages->nr_pages) { -+ ret = -EFAULT; -+ goto unregister; -+ } -+ sdma->mflags = sdma->mflag_pages->va[0]; -+ ret = scif_get_pages(knx->epd.epd, mflags.queue_offset, -+ mflags.queue_len, &sdma->queue_pages); -+ if (ret < 0) -+ goto put_flags; -+ if ((sdma->queue_pages->nr_pages * PAGE_SIZE) != -+ mflags.queue_len) { -+ ret = -EFAULT; -+ goto put_queue; -+ } -+ sdma->queue = sdma->queue_pages->va[0]; -+ sdma->complete = -1; -+ sdma->head = -1; -+ /* set the initial trigger value */ -+ QIB_KNX_SDMA_SET(sdma->hflags->trigger, -1); -+ QIB_KNX_SDMA_SET(sdma->mflags->complete, sdma->complete); -+ snprintf(knx->tname, sizeof(knx->tname), "qib/mic%u/poll", -+ knx->peer.node); -+ knx->sdma = sdma; -+ ret = 0; -+ goto done; -+put_queue: -+ scif_put_pages(sdma->queue_pages); -+put_flags: -+ scif_put_pages(sdma->mflag_pages); -+unregister: -+ qib_knx_unregister_memory(knx, &sdma->hflags_mem, buf); -+free_flags: -+ kfree(sdma->hflags); -+done_free: -+ kfree(sdma); -+done: -+ /* -+ * we have to respond to the MIC so it doesn't get stuck -+ * in the scif_recv call -+ */ -+ scif_send(knx->epd.epd, &ret, sizeof(ret), SCIF_SEND_BLOCK); -+ return ret; -+} -+ -+static void qib_knx_sdma_teardown(struct qib_knx *knx) -+{ -+ int ret; -+ if (knx->sdma_poll) -+ ret = kthread_stop(knx->sdma_poll); -+ if (knx->sdma) { -+ if (knx->sdma->queue_pages->nr_pages) { -+ knx->sdma->queue = NULL; -+ scif_put_pages(knx->sdma->queue_pages); -+ } -+ if (knx->sdma->mflag_pages->nr_pages) { -+ knx->sdma->mflags = NULL; -+ scif_put_pages(knx->sdma->mflag_pages); -+ } -+ kfree(knx->sdma->hflags); -+ kfree(knx->sdma); -+ knx->sdma = NULL; -+ } -+} -+ -+int qib_knx_sdma_queue_create(struct file *fd) -+{ -+ struct qib_ctxtdata *rcd = ctxt_fp(fd); -+ struct qib_devdata *dd = rcd->dd; -+ struct qib_knx *knx = rcd->krcd->knx; -+ struct qib_knx_ctxt *ctxt = knx->ctxts[rcd->ctxt]; -+ u8 subctxt = subctxt_fp(fd); -+ int ret = 0; -+ -+ if (!ctxt) { -+ ret = -EINVAL; -+ goto done; -+ } -+ ctxt->pq[subctxt] = qib_user_sdma_queue_create(&dd->pcidev->dev, -+ dd->unit, rcd->ctxt, -+ subctxt); -+ if (!ctxt->pq[subctxt]) -+ ret = -ENOMEM; -+ user_sdma_queue_fp(fd) = ctxt->pq[subctxt]; -+ /* -+ * We start the polling thread the first time a user SDMA -+ * queue is created. There is no reason to take up CPU -+ * cycles before then. -+ */ -+ if (atomic_inc_return(&knx->tref) == 1) { -+ knx->sdma_poll = kthread_run(qib_knx_sdma_poll, knx, -+ knx->tname); -+ if (IS_ERR(knx->sdma_poll)) { -+ ret = -PTR_ERR(knx->sdma_poll); -+ atomic_dec(&knx->tref); -+ goto free_queue; -+ } -+ } -+ goto done; -+free_queue: -+ user_sdma_queue_fp(fd) = NULL; -+ qib_user_sdma_queue_destroy(ctxt->pq[subctxt]); -+ ctxt->pq[subctxt] = NULL; -+done: -+ return ret; -+} -+ -+void qib_knx_sdma_queue_destroy(struct qib_filedata *fd) -+{ -+ struct qib_ctxtdata *rcd = fd->rcd; -+ struct qib_knx *knx; -+ unsigned ctxt = rcd->ctxt, subctxt = fd->subctxt; -+ -+ /* Host processes do not have a KNX rcd pointer. */ -+ if (!rcd->krcd) -+ return; -+ knx = rcd->krcd->knx; -+ /* We still have the memory pointer through fd->pq */ -+ spin_lock(&knx->ctxt_lock); -+ if (knx->ctxts[ctxt]) -+ knx->ctxts[ctxt]->pq[subctxt] = NULL; -+ spin_unlock(&knx->ctxt_lock); -+ if (atomic_dec_and_test(&knx->tref)) { -+ int ret = kthread_stop(knx->sdma_poll); -+ knx->sdma_poll = NULL; -+ } -+} -+ -+/* -+ * Convert a MIC physical address to the corresponding host page. -+ */ -+static __always_inline struct page * -+qib_knx_phys_to_page(struct qib_knx *knx, unsigned long addr) { -+ unsigned long paddr; -+ if ((knx->bar + addr + PAGE_SIZE) > -+ (knx->bar + knx->barlen)) -+ return NULL; -+ paddr = knx->bar + addr; -+ return pfn_to_page(paddr >> PAGE_SHIFT); -+} -+ -+static int qib_knx_sdma_pkts_to_descs(struct qib_knx_ctxt *ctxt, -+ struct qib_knx_sdma_desc *desc, -+ struct qib_user_sdma_queue *pq, -+ int *ndesc, struct list_head *list) -+{ -+ struct qib_knx *knx = ctxt->knx; -+ struct qib_user_sdma_pkt *pkt; -+ dma_addr_t pbc_dma_addr; -+ unsigned pktnw, pbcnw; -+ u32 counter; -+ u16 frag_size; -+ int ret = 0; -+ __le32 *pbc; -+ -+ counter = pq->counter; -+ -+ pbc = qib_user_sdma_alloc_header(pq, desc->pbclen, &pbc_dma_addr); -+ if (!pbc) { -+ ret = -ENOMEM; -+ goto done; -+ } -+ memcpy(pbc, desc->pbc, desc->pbclen); -+ -+ pktnw = (le32_to_cpu(*pbc) & 0xFFFF); -+ /* -+ * This assignment is a bit strange. it's because the -+ * the pbc counts the number of 32 bit words in the full -+ * packet _except_ the first word of the pbc itself... -+ */ -+ pbcnw = (desc->pbclen >> 2) - 1; -+ -+ if (pktnw < pbcnw) { -+ ret = -EINVAL; -+ goto free_pbc; -+ } -+ -+ if (pktnw != ((desc->length >> 2) + pbcnw)) { -+ ret = -EINVAL; -+ goto free_pbc; -+ } -+ -+ frag_size = (le32_to_cpu(*pbc)>>16) & 0xFFFF; -+ if (((frag_size ? frag_size : desc->length) + desc->pbclen) > -+ ctxt->ppd->ibmaxlen) { -+ ret = -EINVAL; -+ goto free_pbc; -+ } -+ if (frag_size) { -+ /* new SDMA "protocol" */ -+ unsigned pktsize, n; -+ -+ n = desc->npages * ((2 * PAGE_SIZE / frag_size) + 1); -+ pktsize = sizeof(*pkt) + sizeof(pkt->addr[0]) * n; -+ -+ pkt = kzalloc(pktsize + desc->tidlen, GFP_KERNEL); -+ if (!pkt) { -+ ret = -ENOMEM; -+ goto free_pbc; -+ } -+ pkt->largepkt = 1; -+ pkt->frag_size = frag_size; -+ pkt->addrlimit = n + ARRAY_SIZE(pkt->addr); -+ -+ if (desc->tidlen) { -+ char *tidsmptr = (char *)pkt + pktsize; -+ memcpy(tidsmptr, desc->tidsm, desc->tidlen); -+ pkt->tidsm = -+ (struct qib_tid_session_member *)tidsmptr; -+ pkt->tidsmcount = desc->tidlen / -+ sizeof(*desc->tidsm); -+ pkt->tidsmidx = 0; -+ } -+ *pbc = cpu_to_le32(le32_to_cpu(*pbc) & 0x0000FFFF); -+ } else { -+ /* old SDMA */ -+ pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL); -+ if (!pkt) { -+ ret = -ENOMEM; -+ goto free_pbc; -+ } -+ pkt->largepkt = 0; -+ pkt->frag_size = desc->length; -+ pkt->addrlimit = ARRAY_SIZE(pkt->addr); -+ } -+ pkt->bytes_togo = desc->length; -+ pkt->payload_size = 0; -+ pkt->counter = counter; -+ pkt->tiddma = !!desc->tidlen; -+ /* -+ * The generic user SDMA code will use this as a flag to -+ * decide whether to call the KNx-specific pkt free -+ * function. However, it doesn't know what the value -+ * actually means. -+ */ -+ pkt->remote = (u64)knx; -+ -+ qib_user_sdma_init_frag(pkt, 0, -+ 0, desc->pbclen, -+ 1, 0, -+ 0, 0, -+ NULL, pbc, -+ pbc_dma_addr, desc->pbclen); -+ pkt->index = 0; -+ pkt->naddr = 1; -+ -+ if (desc->npages) { -+ /* we have user data */ -+ int i; -+ struct page *page; -+ unsigned plen = 0, len = desc->length; -+ for (i = 0; i < desc->npages; i++) { -+ unsigned long off = (i == 0 ? desc->offset : 0); -+ plen = (len > PAGE_SIZE ? PAGE_SIZE : len); -+ page = qib_knx_phys_to_page(knx, desc->pages[i]); -+ ret = qib_user_sdma_page_to_frags(knx->dd, pq, -+ pkt, page, 0, off, -+ (off + plen > PAGE_SIZE ? -+ PAGE_SIZE - off : plen), -+ NULL); -+ if (ret < 0) -+ goto free_sdma; -+ len -= plen - off; -+ } -+ } else { -+ pkt->addr[0].last_desc = 1; -+ if (pbc_dma_addr == 0) { -+ pbc_dma_addr = dma_map_single(&knx->dd->pcidev->dev, -+ pbc, desc->pbclen, -+ DMA_TO_DEVICE); -+ if (dma_mapping_error(&knx->dd->pcidev->dev, -+ pbc_dma_addr)) { -+ ret = -ENOMEM; -+ goto free_sdma; -+ } -+ pkt->addr[0].addr = pbc_dma_addr; -+ pkt->addr[0].dma_mapped = 1; -+ } -+ } -+ counter++; -+ pkt->pq = pq; -+ pkt->index = 0; -+ *ndesc = pkt->naddr; -+ -+ list_add_tail(&pkt->list, list); -+ goto done; -+free_sdma: -+ if (pkt->largepkt) -+ kfree(pkt); -+ else -+ kmem_cache_free(pq->pkt_slab, pkt); -+free_pbc: -+ if (pbc_dma_addr) -+ dma_pool_free(pq->header_cache, pbc, pbc_dma_addr); -+ else -+ kfree(pbc); -+done: -+ return ret; -+} -+ -+void qib_knx_sdma_free_pkt(struct qib_user_sdma_pkt *pkt) -+{ -+ struct qib_knx *knx = (struct qib_knx *)pkt->remote; -+ struct qib_knx_sdma *sdma = knx->sdma; -+ sdma_next(sdma, complete); -+ QIB_KNX_SDMA_SET(sdma->mflags->complete, sdma->complete); -+} -+ -+static int qib_knx_sdma_poll(void *data) -+{ -+ struct qib_knx *knx = (struct qib_knx *)data; -+ struct qib_knx_ctxt *ctxt; -+ struct qib_knx_sdma_desc desc; -+ struct qib_knx_sdma *sdma = knx->sdma; -+ struct qib_user_sdma_queue *pq; -+ struct list_head list; -+ u32 new_head; -+ int ret = 0, ndesc = 0, added; -+ -+ if (!sdma) -+ return -EFAULT; -+ -+ while (!kthread_should_stop()) { -+ added = 0; -+ new_head = QIB_KNX_SDMA_VALUE(sdma->hflags->trigger); -+ while (sdma->head != new_head) { -+ knx_sdma_next(sdma); -+ qib_knx_memcpy(&desc, sdma->queue + sdma->head, -+ sizeof(desc)); -+ if (!desc.ctxt) { -+ QIB_KNX_SDMA_STATUS(sdma, -EINVAL); -+ continue; -+ } -+ spin_lock(&knx->ctxt_lock); -+ ctxt = knx->ctxts[desc.ctxt]; -+ if (!ctxt) { -+ /* we should never get here */ -+ QIB_KNX_SDMA_STATUS(sdma, -EINVAL); -+ goto done_unlock; -+ } -+ pq = ctxt->pq[desc.subctxt]; -+ if (!pq) { -+ QIB_KNX_SDMA_STATUS(sdma, -EFAULT); -+ goto done_unlock; -+ } -+ mutex_lock(&pq->lock); -+ if (pq->added > ctxt->ppd->sdma_descq_removed) -+ qib_user_sdma_hwqueue_clean(ctxt->ppd); -+ if (pq->num_sending) -+ qib_user_sdma_queue_clean(ctxt->ppd, pq); -+ -+ INIT_LIST_HEAD(&list); -+ ret = qib_knx_sdma_pkts_to_descs(ctxt, &desc, pq, -+ &ndesc, &list); -+ QIB_KNX_SDMA_STATUS(sdma, ret); -+ if (!list_empty(&list)) { -+ if (qib_sdma_descq_freecnt(ctxt->ppd) < -+ ndesc) { -+ qib_user_sdma_hwqueue_clean( -+ ctxt->ppd); -+ if (pq->num_sending) -+ qib_user_sdma_queue_clean( -+ ctxt->ppd, pq); -+ } -+ ret = qib_user_sdma_push_pkts(ctxt->ppd, -+ pq, &list, 1); -+ if (ret < 0) -+ goto free_pkts; -+ else { -+ pq->counter++; -+ added++; -+ } -+ } -+free_pkts: -+ if (!list_empty(&list)) -+ qib_user_sdma_free_pkt_list( -+ &knx->dd->pcidev->dev, pq, &list); -+ mutex_unlock(&pq->lock); -+done_unlock: -+ spin_unlock(&knx->ctxt_lock); -+ } -+ if (!added) { -+ int i; -+ /* -+ * Push the queues along -+ * The polling thread will enter the inner loop only -+ * if the KNX has posted new descriptors to the queue. -+ * However, any packets that have been completed by -+ * the HW need to be cleaned and that won't happen -+ * unless we explicitly check. -+ */ -+ for (i = 0; -+ i < knx->dd->ctxtcnt * QLOGIC_IB_MAX_SUBCTXT; -+ i++) { -+ int c = i / QLOGIC_IB_MAX_SUBCTXT, -+ s = i % QLOGIC_IB_MAX_SUBCTXT; -+ spin_lock(&knx->ctxt_lock); -+ ctxt = knx->ctxts[c]; -+ if (!ctxt) -+ goto loop_unlock; -+ pq = ctxt->pq[s]; -+ if (!pq) -+ goto loop_unlock; -+ mutex_lock(&pq->lock); -+ if (pq->num_sending) -+ qib_user_sdma_queue_clean(ctxt->ppd, -+ pq); -+ mutex_unlock(&pq->lock); -+loop_unlock: -+ spin_unlock(&knx->ctxt_lock); -+ } -+ might_sleep(); -+ } -+ } -+ return ret; -+} -+ -+void qib_knx_remove_device(struct qib_devdata *dd) -+{ -+ if (server && dd->num_knx) { -+ struct qib_knx *knx, *knxp; -+ list_for_each_entry_safe(knx, knxp, &server->clients, list) { -+ if (knx->dd == dd) { -+ spin_lock(&server->client_lock); -+ list_del(&knx->list); -+ server->nclients--; -+ spin_unlock(&server->client_lock); -+ qib_knx_free(knx, 0); -+ kfree(knx); -+ } -+ } -+ } -+ return; -+} -+ - int __init qib_knx_server_init(void) - { - server = kzalloc(sizeof(struct qib_knx_server), GFP_KERNEL); -@@ -908,7 +1517,6 @@ void __exit qib_knx_server_exit(void) - { - if (server) { - struct qib_knx *t, *tt; -- - /* Stop the thread so we don't accept any new connections. */ - kthread_stop(server->kthread); - list_for_each_entry_safe(t, tt, &server->clients, list) { -@@ -921,3 +1529,4 @@ void __exit qib_knx_server_exit(void) - kfree(server); - } - } -+ -diff --git a/drivers/infiniband/hw/qib/qib_knx.h b/drivers/infiniband/hw/qib/qib_knx.h -index d767a60..fcb5a3e 100644 ---- a/drivers/infiniband/hw/qib/qib_knx.h -+++ b/drivers/infiniband/hw/qib/qib_knx.h -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2012 Intel Corporation. All rights reserved. -+ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU -@@ -44,13 +44,12 @@ enum qib_knx_ctxtinfo_type { - - int __init qib_knx_server_init(void); - void __exit qib_knx_server_exit(void); --static __always_inline struct qib_knx *dd_to_knx(struct qib_devdata *dd) --{ -- return (struct qib_knx *)dd->knx; --} -+ -+void qib_knx_remove_device(struct qib_devdata *); -+ - inline struct qib_knx *qib_knx_get(uint16_t); - inline struct qib_devdata *qib_knx_node_to_dd(uint16_t); --int qib_knx_alloc_ctxt(struct qib_devdata *, unsigned); -+int qib_knx_alloc_ctxt(u16, unsigned); - int qib_knx_setup_piobufs(struct qib_devdata *, struct qib_ctxtdata *, __u16); - int qib_knx_setup_pioregs(struct qib_devdata *, struct qib_ctxtdata *, - struct qib_base_info *); -@@ -60,4 +59,6 @@ int qib_knx_setup_eagerbufs(struct qib_ctxtdata *, struct qib_base_info *); - void qib_knx_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *); - __u64 qib_knx_ctxt_info(struct qib_ctxtdata *, enum qib_knx_ctxtinfo_type, - struct file *); -+int qib_knx_sdma_queue_create(struct file *); -+void qib_knx_sdma_queue_destroy(struct qib_filedata *); - #endif /* _QIB_KNX_H */ -diff --git a/drivers/infiniband/hw/qib/qib_knx_common.h b/drivers/infiniband/hw/qib/qib_knx_common.h -new file mode 100644 -index 0000000..9639592 ---- /dev/null -+++ b/drivers/infiniband/hw/qib/qib_knx_common.h -@@ -0,0 +1,126 @@ -+/* -+ * Copyright (c) 2013 Intel Corporation. All rights reserved. -+ * -+ * This software is available to you under a choice of one of two -+ * licenses. You may choose to be licensed under the terms of the GNU -+ * General Public License (GPL) Version 2, available from the file -+ * COPYING in the main directory of this source tree, or the -+ * OpenIB.org BSD license below: -+ * -+ * Redistribution and use in source and binary forms, with or -+ * without modification, are permitted provided that the following -+ * conditions are met: -+ * -+ * - Redistributions of source code must retain the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer. -+ * -+ * - Redistributions in binary form must reproduce the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer in the documentation and/or other materials -+ * provided with the distribution. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -+ * SOFTWARE. -+ */ -+#ifndef _QIB_KNX_COMMON_H -+#define _QIB_KNX_COMMON_H -+ -+struct qib_device_info { -+ u16 unit; -+}; -+ -+#define QIB_SDMA_MAX_NPAGES 33 -+#define QIB_KNX_SDMA_VALUE(fld) (volatile u64)fld -+#define QIB_KNX_SDMA_SET(fld, val) \ -+ do { \ -+ fld = (u64)(val); \ -+ smp_mb(); \ -+ } while (0) -+ -+struct qib_knx_host_mem { -+ off_t flags_offset; -+ unsigned desc_num; -+}; -+ -+struct qib_knx_knc_mem { -+ off_t flags_offset; -+ off_t queue_offset; -+ size_t queue_len; -+}; -+ -+struct qib_tid_sm { -+ __u16 tid; -+ __u16 offset; -+ __u16 length; -+}; -+ -+/* -+ * SDMA transfer descriptor. This structure communicates the SDMA -+ * transfers from the MIC to the host. It is very important for -+ * performance reasons that its size is multiple of 64B in order -+ * to guarantee proper alignment in the descriptor array. -+ */ -+struct qib_knx_sdma_desc { -+ u16 ctxt; -+ u16 subctxt; -+ u32 pbclen; -+ __le32 pbc[16]; -+ u64 length; -+ u32 npages; -+ unsigned tidlen; -+ off_t offset; -+ unsigned long pages[QIB_SDMA_MAX_NPAGES]; -+ /* This array is 198B so the compiler will pad -+ * it by 2B to make it multiple of 8B. */ -+ struct qib_tid_sm tidsm[QIB_SDMA_MAX_NPAGES]; -+ /* -+ * The two paddings below are included in order to -+ * make the size of the entire struct 576B (multiple -+ * of 64B). The goal is that all elements in an array -+ * of struct qib_knx_sdma_desc are 64B aligned. -+ */ -+ u16 __padding0; -+ u64 __padding1[2]; -+}; -+ -+/* -+ * trigger, status, and complete fields are by 8 to be -+ * cacheline size. -+ */ -+struct qib_knx_sdma_hflags { -+ u64 trigger; -+ u64 __padding[7]; -+}; -+ -+#define sdma_next(s, fld) \ -+ (s)->fld = (((s)->fld + 1) == (s)->desc_num) ? 0 : ((s)->fld + 1) -+ -+struct qib_knx_sdma_mflags { -+ u64 status; -+ u64 __padding1[7]; -+ u64 complete; -+ u64 __padding2[7]; -+}; -+ -+struct qib_knx_tid_info { -+ /* this is the entire set of 512 entries (= 4K) so -+ * we can resgister. subctxt devision will be done -+ * in MIC driver. */ -+ off_t tidbase_offset; -+ size_t tidbase_len; -+ u64 tidbase; -+ unsigned tidcnt; -+ u64 tidtemplate; -+ unsigned long invalidtid; -+ u64 bar_addr; -+ u64 bar_len; -+}; -+ -+#endif /* _QIB_KNX_COMMON_H */ -diff --git a/drivers/infiniband/hw/qib/qib_knx_sdma.h b/drivers/infiniband/hw/qib/qib_knx_sdma.h -deleted file mode 100644 -index 8c67b1f..0000000 ---- a/drivers/infiniband/hw/qib/qib_knx_sdma.h -+++ /dev/null -@@ -1,105 +0,0 @@ --/* -- * Copyright (c) 2013 Intel Corporation. All rights reserved. -- * -- * This software is available to you under a choice of one of two -- * licenses. You may choose to be licensed under the terms of the GNU -- * General Public License (GPL) Version 2, available from the file -- * COPYING in the main directory of this source tree, or the -- * OpenIB.org BSD license below: -- * -- * Redistribution and use in source and binary forms, with or -- * without modification, are permitted provided that the following -- * conditions are met: -- * -- * - Redistributions of source code must retain the above -- * copyright notice, this list of conditions and the following -- * disclaimer. -- * -- * - Redistributions in binary form must reproduce the above -- * copyright notice, this list of conditions and the following -- * disclaimer in the documentation and/or other materials -- * provided with the distribution. -- * -- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -- * SOFTWARE. -- */ --#ifndef _QIB_KNX_SDMA_H --#define _QIB_KNX_SDMA_H -- --#define QIB_SDMA_MAX_NPAGES 33 --#define QIB_KNX_SDMA_VALUE(fld) (volatile u64)fld --#define QIB_KNX_SDMA_SET(fld, val) \ -- do { \ -- fld = (u64)(val); \ -- smp_mb(); \ -- } while (0) -- --struct qib_knx_host_mem { -- off_t flags_offset; -- unsigned desc_num; --}; -- --struct qib_knx_knc_mem { -- off_t flags_offset; -- off_t queue_offset; -- size_t queue_len; --}; -- --struct qib_tid_sm { -- __u16 tid; -- __u16 offset; -- __u16 length; --}; -- --/* -- * SDMA transfer descriptor. This structure communicates the SDMA -- * transfers from the MIC to the host. It is very important for -- * performance reasons that its size is multiple of 64B in order -- * to guarantee proper alignment in the descriptor array. -- */ --struct qib_knx_sdma_desc { -- u16 ctxt; -- u16 subctxt; -- u32 pbclen; -- __le32 pbc[16]; -- u64 length; -- u32 npages; -- unsigned tidlen; -- off_t offset; -- unsigned long pages[QIB_SDMA_MAX_NPAGES]; -- /* This array is 198B so the compiler will pad -- * it by 2B to make it multiple of 8B. */ -- struct qib_tid_sm tidsm[QIB_SDMA_MAX_NPAGES]; -- /* -- * The two paddings below are included in order to -- * make the size of the entire struct 576B (multiple -- * of 64B). The goal is that all elements in an array -- * of struct qib_knx_sdma_desc are 64B aligned. -- */ -- u16 __padding0; -- u64 __padding1[2]; --}; -- --/* -- * trigger, status, and complete fields are by 8 to be -- * cacheline size. -- */ --struct qib_knx_sdma_hflags { -- u64 trigger; -- u64 __padding[7]; --}; -- --struct qib_knx_sdma_mflags { -- u64 status; -- u64 __padding1[7]; -- u64 complete; -- u64 __padding2[7]; --}; -- --#endif /* _QIB_KNX_SDMA_H */ -diff --git a/drivers/infiniband/hw/qib/qib_knx_tidrcv.h b/drivers/infiniband/hw/qib/qib_knx_tidrcv.h -deleted file mode 100644 -index 842fca1..0000000 ---- a/drivers/infiniband/hw/qib/qib_knx_tidrcv.h -+++ /dev/null -@@ -1,48 +0,0 @@ --/* -- * Copyright (c) 2013 Intel Corporation. All rights reserved. -- * -- * This software is available to you under a choice of one of two -- * licenses. You may choose to be licensed under the terms of the GNU -- * General Public License (GPL) Version 2, available from the file -- * COPYING in the main directory of this source tree, or the -- * OpenIB.org BSD license below: -- * -- * Redistribution and use in source and binary forms, with or -- * without modification, are permitted provided that the following -- * conditions are met: -- * -- * - Redistributions of source code must retain the above -- * copyright notice, this list of conditions and the following -- * disclaimer. -- * -- * - Redistributions in binary form must reproduce the above -- * copyright notice, this list of conditions and the following -- * disclaimer in the documentation and/or other materials -- * provided with the distribution. -- * -- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -- * SOFTWARE. -- */ --#ifndef _QIB_KNX_TIDRCV_H -- --struct qib_knx_tid_info { -- /* this is the entire set of 512 entries (= 4K) so -- * we can resgister. subctxt devision will be done -- * in MIC driver. */ -- off_t tidbase_offset; -- size_t tidbase_len; -- u64 tidbase; -- unsigned tidcnt; -- u64 tidtemplate; -- unsigned long invalidtid; -- u64 bar_addr; -- u64 bar_len; --}; -- --#endif /* QIB_KNX_TIDRCV_H */ -diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c -index ccb1191..4b46f6c 100644 ---- a/drivers/infiniband/hw/qib/qib_mad.c -+++ b/drivers/infiniband/hw/qib/qib_mad.c -@@ -536,7 +536,8 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev, - pip->vl_arb_low_cap = - dd->f_get_ib_cfg(ppd, QIB_IB_CFG_VL_LOW_CAP); - /* InitTypeReply = 0 */ -- pip->inittypereply_mtucap = qib_ibmtu ? qib_ibmtu : IB_MTU_4096; -+ pip->inittypereply_mtucap = -+ QIB_MODPARAM_GET(ibmtu, dd->unit, ppd->port); - /* HCAs ignore VLStallCount and HOQLife */ - /* pip->vlstallcnt_hoqlife; */ - pip->operationalvl_pei_peo_fpi_fpo = -diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c -index 3f14009..d7eebfb 100644 ---- a/drivers/infiniband/hw/qib/qib_pcie.c -+++ b/drivers/infiniband/hw/qib/qib_pcie.c -@@ -501,9 +501,8 @@ static int val2fld(int wd, int mask) - return wd; - } - --static int qib_pcie_coalesce; --module_param_named(pcie_coalesce, qib_pcie_coalesce, int, S_IRUGO); --MODULE_PARM_DESC(pcie_coalesce, "tune PCIe colescing on some Intel chipsets"); -+static QIB_MODPARAM_UNIT(pcie_coalesce, NULL, 0, S_IRUGO, -+ "tune PCIe colescing on some Intel chipsets"); - - /* - * Enable PCIe completion and data coalescing, on Intel 5x00 and 7300 -@@ -518,7 +517,7 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd) - u16 devid; - u32 mask, bits, val; - -- if (!qib_pcie_coalesce) -+ if (!QIB_MODPARAM_GET(pcie_coalesce, dd->unit, 0)) - return 0; - - /* Find out supported and configured values for parent (root) */ -@@ -576,9 +575,8 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd) - * BIOS may not set PCIe bus-utilization parameters for best performance. - * Check and optionally adjust them to maximize our throughput. - */ --static int qib_pcie_caps; --module_param_named(pcie_caps, qib_pcie_caps, int, S_IRUGO); --MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)"); -+static QIB_MODPARAM_UNIT(pcie_caps, NULL, 0, S_IRUGO, -+ "Max PCIe tuning: Payload (4lsb), ReadReq (D4..7)"); - - static int qib_tune_pcie_caps(struct qib_devdata *dd) - { -@@ -587,6 +585,7 @@ static int qib_tune_pcie_caps(struct qib_devdata *dd) - u16 pcaps, pctl, ecaps, ectl; - int rc_sup, ep_sup; - int rc_cur, ep_cur; -+ int caps = QIB_MODPARAM_GET(pcie_caps, dd->unit, 0); - - /* Find out supported and configured values for parent (root) */ - parent = dd->pcidev->bus->self; -@@ -614,8 +613,8 @@ static int qib_tune_pcie_caps(struct qib_devdata *dd) - ep_cur = fld2val(ectl, PCI_EXP_DEVCTL_PAYLOAD); - - /* If Supported greater than limit in module param, limit it */ -- if (rc_sup > (qib_pcie_caps & 7)) -- rc_sup = qib_pcie_caps & 7; -+ if (rc_sup > (caps & 7)) -+ rc_sup = caps & 7; - /* If less than (allowed, supported), bump root payload */ - if (rc_sup > rc_cur) { - rc_cur = rc_sup; -@@ -637,8 +636,8 @@ static int qib_tune_pcie_caps(struct qib_devdata *dd) - * which is code '5' (log2(4096) - 7) - */ - rc_sup = 5; -- if (rc_sup > ((qib_pcie_caps >> 4) & 7)) -- rc_sup = (qib_pcie_caps >> 4) & 7; -+ if (rc_sup > ((caps >> 4) & 7)) -+ rc_sup = (caps >> 4) & 7; - rc_cur = fld2val(pctl, PCI_EXP_DEVCTL_READRQ); - ep_cur = fld2val(ectl, PCI_EXP_DEVCTL_READRQ); - -diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c -index 3cca55b..4208b20 100644 ---- a/drivers/infiniband/hw/qib/qib_qp.c -+++ b/drivers/infiniband/hw/qib/qib_qp.c -@@ -124,6 +124,7 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, - enum ib_qp_type type, u8 port) - { - u32 i, offset, max_scan, qpn; -+ unsigned krcvqs; - struct qpn_map *map; - u32 ret; - -@@ -141,10 +142,11 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, - goto bail; - } - -+ krcvqs = dd->pport[port-1].n_krcv_queues; - qpn = qpt->last + 2; - if (qpn >= QPN_MAX) - qpn = 2; -- if (qpt->mask && ((qpn & qpt->mask) >> 1) >= dd->n_krcv_queues) -+ if (qpt->mask && ((qpn & qpt->mask) >> 1) >= krcvqs) - qpn = (qpn | qpt->mask) + 2; - offset = qpn & BITS_PER_PAGE_MASK; - map = &qpt->map[qpn / BITS_PER_PAGE]; -@@ -162,7 +164,7 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, - goto bail; - } - offset = find_next_offset(qpt, map, offset, -- dd->n_krcv_queues); -+ krcvqs); - qpn = mk_qpn(qpt, map, offset); - /* - * This test differs from alloc_pidmap(). -diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c -index c6d6a54..1e08943 100644 ---- a/drivers/infiniband/hw/qib/qib_sdma.c -+++ b/drivers/infiniband/hw/qib/qib_sdma.c -@@ -532,7 +532,8 @@ static void complete_sdma_err_req(struct qib_pportdata *ppd, - */ - int qib_sdma_verbs_send(struct qib_pportdata *ppd, - struct qib_sge_state *ss, u32 dwords, -- struct qib_verbs_txreq *tx) -+ struct qib_verbs_txreq *tx, -+ struct snoop_packet *packet) - { - unsigned long flags; - struct qib_sge *sge; -@@ -543,6 +544,10 @@ int qib_sdma_verbs_send(struct qib_pportdata *ppd, - u64 sdmadesc[2]; - u32 dwoffset; - dma_addr_t addr; -+ u8 *packet_data = NULL; -+ -+ if (packet) -+ packet_data = packet->data + ((tx->hdr_dwords-2) << 2); - - spin_lock_irqsave(&ppd->sdma_lock, flags); - -@@ -599,6 +604,10 @@ retry: - dw << 2, DMA_TO_DEVICE); - if (dma_mapping_error(&ppd->dd->pcidev->dev, addr)) - goto unmap; -+ if (packet) { -+ memcpy(packet_data, sge->vaddr, len); -+ packet_data += len; -+ } - sdmadesc[0] = 0; - make_sdma_desc(ppd, sdmadesc, (u64) addr, dw, dwoffset); - /* SDmaUseLargeBuf has to be set in every descriptor */ -diff --git a/drivers/infiniband/hw/qib/qib_snoop.c b/drivers/infiniband/hw/qib/qib_snoop.c -new file mode 100644 -index 0000000..3c62bbb ---- /dev/null -+++ b/drivers/infiniband/hw/qib/qib_snoop.c -@@ -0,0 +1,970 @@ -+/* -+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. -+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. -+ * -+ * This software is available to you under a choice of one of two -+ * licenses. You may choose to be licensed under the terms of the GNU -+ * General Public License (GPL) Version 2, available from the file -+ * COPYING in the main directory of this source tree, or the -+ * OpenIB.org BSD license below: -+ * -+ * Redistribution and use in source and binary forms, with or -+ * without modification, are permitted provided that the following -+ * conditions are met: -+ * -+ * - Redistributions of source code must retain the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer. -+ * -+ * - Redistributions in binary form must reproduce the above -+ * copyright notice, this list of conditions and the following -+ * disclaimer in the documentation and/or other materials -+ * provided with the distribution. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -+ * SOFTWARE. -+ */ -+ -+/* -+ * This file implements a raw read/raw write interface for snooping raw -+ * packets from the wire and injecting raw packets to the wire. -+ * -+ * Other things that this interface could do at somepoint are: -+ * - Allow packets to be injected back into the stack -+ * - Provide an intercept for packets coming from the upper layers to -+ * move them back into user-space. -+ */ -+ -+#include -+#include -+#include -+#include -+ -+#include /* for ioctl constants */ -+#include -+ -+ -+#include "qib.h" -+#include "qib_verbs.h" -+#include "qib_common.h" -+#include -+ -+#define QIB_SNOOP_IOC_MAGIC IB_IOCTL_MAGIC -+#define QIB_SNOOP_IOC_BASE_SEQ 0x80 -+/* This starts our ioctl sequence -+ * numbers *way* off from the ones -+ * defined in ib_core -+ */ -+#define QIB_SNOOP_IOCGETLINKSTATE \ -+ _IO(QIB_SNOOP_IOC_MAGIC, QIB_SNOOP_IOC_BASE_SEQ) -+#define QIB_SNOOP_IOCSETLINKSTATE \ -+ _IO(QIB_SNOOP_IOC_MAGIC, QIB_SNOOP_IOC_BASE_SEQ+1) -+#define QIB_SNOOP_IOCCLEARQUEUE \ -+ _IO(QIB_SNOOP_IOC_MAGIC, QIB_SNOOP_IOC_BASE_SEQ+2) -+#define QIB_SNOOP_IOCCLEARFILTER \ -+ _IO(QIB_SNOOP_IOC_MAGIC, QIB_SNOOP_IOC_BASE_SEQ+3) -+#define QIB_SNOOP_IOCSETFILTER \ -+ _IO(QIB_SNOOP_IOC_MAGIC, QIB_SNOOP_IOC_BASE_SEQ+4) -+ -+/* local prototypes */ -+static int qib_snoop_open(struct inode *in, struct file *fp); -+static unsigned int qib_snoop_poll(struct file *fp, -+ struct poll_table_struct *wait); -+static ssize_t qib_snoop_read(struct file *fp, char __user *data, -+ size_t pkt_len, loff_t *off); -+static int qib_snoop_release(struct inode *in, struct file *fp); -+ -+static long qib_ioctl(struct file *fp, unsigned int cmd, unsigned long arg); -+ -+static ssize_t qib_snoop_write(struct file *fp, const char __user *data, -+ size_t pkt_len, loff_t *off); -+ -+#include -+ -+struct qib_packet_filter_command { -+ int opcode; -+ int length; -+ void *value_ptr; -+}; -+ -+enum qib_packet_filter_opcodes { -+ FILTER_BY_LID, -+ FILTER_BY_DLID, -+ FILTER_BY_MAD_MGMT_CLASS, -+ FILTER_BY_QP_NUMBER, -+ FILTER_BY_PKT_TYPE, -+ FILTER_BY_SERVICE_LEVEL, -+ FILTER_BY_PKEY -+}; -+ -+static const struct file_operations snoop_file_ops = { -+ .owner = THIS_MODULE, -+ .open = qib_snoop_open, -+ .read = qib_snoop_read, -+ .unlocked_ioctl = qib_ioctl, -+ .poll = qib_snoop_poll, -+ .write = qib_snoop_write, -+ .release = qib_snoop_release -+}; -+ -+struct qib_filter_array { -+ int (*filter)(void *, void *, void *); -+}; -+ -+static int qib_filter_lid(void *ibhdr, void *packet_data, void *value); -+static int qib_filter_dlid(void *ibhdr, void *packet_data, void *value); -+static int qib_filter_mad_mgmt_class(void *ibhdr, void *packet_data, -+ void *value); -+static int qib_filter_qp_number(void *ibhdr, void *packet_data, void *value); -+static int qib_filter_ibpacket_type(void *ibhdr, void *packet_data, -+ void *value); -+static int qib_filter_ib_service_level(void *ibhdr, void *packet_data, -+ void *value); -+static int qib_filter_ib_pkey(void *ibhdr, void *packet_data, void *value); -+ -+static struct qib_filter_array qib_filters[] = { -+ { qib_filter_lid }, -+ { qib_filter_dlid }, -+ { qib_filter_mad_mgmt_class }, -+ { qib_filter_qp_number }, -+ { qib_filter_ibpacket_type }, -+ { qib_filter_ib_service_level }, -+ { qib_filter_ib_pkey } -+}; -+ -+#define QIB_MAX_FILTERS ARRAY_SIZE(qib_filters) -+#define QIB_DRV_NAME "ib_qib" -+#define QIB_MAJOR 233 -+#define QIB_USER_MINOR_BASE 0 -+#define QIB_DIAG_MINOR_BASE 129 -+#define QIB_SNOOP_MINOR_BASE 160 -+#define QIB_CAPTURE_MINOR_BASE 200 -+#define QIB_NMINORS 255 -+#define PORT_BITS 2 -+#define PORT_MASK ((1U << PORT_BITS) - 1) -+#define GET_HCA(x) ((unsigned int)((x) >> PORT_BITS)) -+#define GET_PORT(x) ((unsigned int)((x) & PORT_MASK)) -+ -+int qib_snoop_add(struct qib_devdata *dd) -+{ -+ char name[32]; -+ int ret = 0; -+ int i; -+ int j; -+ int minor = 0; -+ -+ for (i = 0; i < dd->num_pports; i++) { -+ spin_lock_init(&dd->pport[i].snoop_write_lock); -+ for (j = 0; j < QIB_CHAR_DEVICES_PER_PORT; j++) { -+ spin_lock_init(&dd->pport[i].sc_device[j].snoop_lock); -+ INIT_LIST_HEAD( -+ &(dd->pport[i].sc_device[j].snoop_queue)); -+ init_waitqueue_head( -+ &dd->pport[i].sc_device[j].snoop_waitq); -+ -+ if (j == 0) { -+ minor = (((dd->unit << PORT_BITS) | i)) + -+ QIB_SNOOP_MINOR_BASE; -+ snprintf(name, sizeof(name), -+ "ipath_snoop_%02d_%02d", dd->unit, i+1); -+ } else { -+ minor = (((dd->unit << PORT_BITS) | i)) + -+ QIB_CAPTURE_MINOR_BASE; -+ snprintf(name, sizeof(name), -+ "ipath_capture_%02d_%02d", -+ dd->unit, i+1); -+ } -+ -+ ret = qib_cdev_init( -+ minor, name, -+ &snoop_file_ops, -+ &dd->pport[i].sc_device[j].snoop_cdev, -+ &dd->pport[i].sc_device[j].snoop_class_dev); -+ if (ret) -+ goto bail; -+ } -+ pr_info("qib%d: snoop dev for hca %02d enabled port %02d\n" -+ "qib%d: capture dev for hca %02d enabled port %02d\n", -+ dd->unit, dd->unit, i+1, dd->unit, dd->unit, i+1); -+ dd->pport[i].mode_flag = 0; -+ } -+out: -+ return ret; -+bail: -+ qib_dev_err(dd, "Couldn't create %s device: %d", name, ret); -+ i--; -+ if (i != dd->num_pports) { -+ for (; i >= 0 ; i--) { -+ for (j = 0; j < QIB_CHAR_DEVICES_PER_PORT; j++) -+ qib_cdev_cleanup( -+ &dd->pport[i]. -+ sc_device[j]. -+ snoop_cdev, -+ &dd->pport[i]. -+ sc_device[j]. -+ snoop_class_dev); -+ dd->pport[i].mode_flag = 0; -+ } -+ } -+ goto out; -+} -+ -+/* this must be called w/ dd->snoop_in_lock held */ -+static void drain_snoop_list(struct qib_aux_device *sc_device) -+{ -+ struct list_head *pos, *q; -+ struct snoop_packet *packet; -+ -+ list_for_each_safe(pos, q, &(sc_device->snoop_queue)) { -+ packet = list_entry(pos, struct snoop_packet, list); -+ list_del(pos); -+ kfree(packet); -+ } -+} -+ -+void qib_snoop_remove(struct qib_devdata *dd) -+{ -+ unsigned long flags = 0; -+ int i; -+ int j; -+ -+ for (i = 0; i < dd->num_pports; i++) { -+ dd->pport[i].mode_flag = 0; -+ for (j = 0; j < QIB_CHAR_DEVICES_PER_PORT; j++) { -+ spin_lock_irqsave(&dd->pport[i].sc_device[j].snoop_lock, -+ flags); -+ drain_snoop_list(&dd->pport[i].sc_device[j]); -+ qib_cdev_cleanup(&dd->pport[i].sc_device[j].snoop_cdev, -+ &dd->pport[i].sc_device[j].snoop_class_dev); -+ spin_unlock_irqrestore( -+ &dd->pport[i].sc_device[j].snoop_lock, -+ flags); -+ } -+ } -+} -+ -+static int qib_snoop_open(struct inode *in, struct file *fp) -+{ -+ int unit = iminor(in); -+ int devnum; -+ int portnum = 0; -+ int ret; -+ int mode_flag = 0; -+ unsigned long flags; -+ struct qib_devdata *dd; -+ -+ mutex_lock(&qib_mutex); -+ -+ if (unit >= QIB_CAPTURE_MINOR_BASE) { -+ unit -= QIB_CAPTURE_MINOR_BASE; -+ devnum = 1; -+ mode_flag = QIB_PORT_CAPTURE_MODE; -+ } else { -+ unit -= QIB_SNOOP_MINOR_BASE; -+ devnum = 0; -+ mode_flag = QIB_PORT_SNOOP_MODE; -+ } -+ -+ dd = qib_lookup(GET_HCA(unit)); -+ if (dd == NULL || !(dd->flags & QIB_PRESENT) || -+ !dd->kregbase) { -+ ret = -ENODEV; -+ goto bail; -+ } -+ portnum = GET_PORT(unit); -+ -+ spin_lock_irqsave(&dd->pport[portnum].sc_device[devnum].snoop_lock, -+ flags); -+ -+ if (dd->pport[portnum].mode_flag & mode_flag) { -+ ret = -EBUSY; -+ spin_unlock_irqrestore( -+ &dd->pport[portnum].sc_device[devnum].snoop_lock, -+ flags); -+ goto bail; -+ } -+ -+ drain_snoop_list(&dd->pport[portnum].sc_device[devnum]); -+ spin_unlock_irqrestore( -+ &dd->pport[portnum].sc_device[devnum].snoop_lock, flags); -+ if (devnum) -+ pr_alert("capture device for hca %02d port %02d is opened\n", -+ GET_HCA(unit), portnum+1); -+ else -+ pr_alert("snoop device for hca %02d port %02d is opened\n", -+ GET_HCA(unit), portnum+1); -+ -+ dd->pport[portnum].sc_device[devnum].pport = &dd->pport[portnum]; -+ fp->private_data = &dd->pport[portnum].sc_device[devnum]; -+ ret = 0; -+ dd->pport[portnum].mode_flag |= mode_flag; -+ -+bail: -+ mutex_unlock(&qib_mutex); -+ -+ return ret; -+} -+ -+static int qib_snoop_release(struct inode *in, struct file *fp) -+{ -+ struct qib_aux_device *sc_device = fp->private_data; -+ struct qib_pportdata *pport = sc_device->pport; -+ unsigned long flags = 0; -+ int devnum = iminor(in); -+ -+ if (devnum >= QIB_CAPTURE_MINOR_BASE) -+ devnum = 1; -+ else -+ devnum = 0; -+ -+ spin_lock_irqsave(&sc_device->snoop_lock, flags); -+ if (devnum) -+ pport->mode_flag = pport->mode_flag & (~QIB_PORT_CAPTURE_MODE); -+ else -+ pport->mode_flag = pport->mode_flag & (~QIB_PORT_SNOOP_MODE); -+ -+ drain_snoop_list(sc_device); -+ /* Clear filters before going out */ -+ pport->filter_callback = NULL; -+ kfree(pport->filter_value); -+ pport->filter_value = NULL; -+ -+ spin_unlock_irqrestore(&sc_device->snoop_lock, flags); -+ -+ if (devnum) -+ pr_alert("capture device for hca %02d port %02d is closed\n", -+ pport->dd->unit, pport->port); -+ else -+ pr_alert("snoop device for hca %02d port %02d is closed\n", -+ pport->dd->unit, pport->port); -+ -+ fp->private_data = NULL; -+ return 0; -+} -+ -+static unsigned int qib_snoop_poll(struct file *fp, -+ struct poll_table_struct *wait) -+{ -+ struct qib_aux_device *sc_device = fp->private_data; -+ int ret = 0; -+ unsigned long flags = 0; -+ -+ spin_lock_irqsave(&sc_device->snoop_lock, flags); -+ -+ poll_wait(fp, &sc_device->snoop_waitq, wait); -+ if (!list_empty(&sc_device->snoop_queue)) -+ ret |= POLLIN | POLLRDNORM; -+ -+ spin_unlock_irqrestore(&sc_device->snoop_lock, flags); -+ return ret; -+ -+} -+ -+static ssize_t qib_snoop_read(struct file *fp, char __user *data, -+ size_t pkt_len, loff_t *off) -+{ -+ struct qib_aux_device *sc_device = fp->private_data; -+ ssize_t ret = 0; -+ unsigned long flags = 0; -+ struct snoop_packet *packet = NULL; -+ -+ spin_lock_irqsave(&sc_device->snoop_lock, flags); -+ -+ while (list_empty(&sc_device->snoop_queue)) { -+ spin_unlock_irqrestore(&sc_device->snoop_lock, flags); -+ -+ if (fp->f_flags & O_NONBLOCK) -+ return -EAGAIN; -+ -+ -+ if (wait_event_interruptible(sc_device->snoop_waitq, -+ !list_empty(&sc_device->snoop_queue))) -+ return -EINTR; -+ -+ spin_lock_irqsave(&sc_device->snoop_lock, flags); -+ } -+ -+ if (!list_empty(&(sc_device->snoop_queue))) { -+ packet = list_entry(sc_device->snoop_queue.next, -+ struct snoop_packet, list); -+ list_del(&packet->list); -+ spin_unlock_irqrestore(&sc_device->snoop_lock, flags); -+ if (pkt_len >= packet->total_len) { -+ if (copy_to_user(data, packet->data, -+ packet->total_len)) -+ ret = -EFAULT; -+ else -+ ret = packet->total_len; -+ } else -+ ret = -EINVAL; -+ -+ kfree(packet); -+ } else -+ spin_unlock_irqrestore(&sc_device->snoop_lock, flags); -+ -+ return ret; -+} -+ -+static long qib_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) -+{ -+ struct qib_aux_device *sc_device = fp->private_data; -+ struct qib_pportdata *ppd = sc_device->pport; -+ struct qib_devdata *dd = ppd->dd; -+ void *filter_value = NULL; -+ long ret = 0; -+ int value = 0; -+ u8 physState = 0; -+ u8 linkState = 0; -+ u16 devState = 0; -+ unsigned long flags = 0; -+ unsigned long *argp = NULL; -+ struct qib_packet_filter_command filter_cmd = {0}; -+ -+ if (((_IOC_DIR(cmd) & _IOC_READ) -+ && !access_ok(VERIFY_WRITE, (void __user *)arg, _IOC_SIZE(cmd))) -+ || ((_IOC_DIR(cmd) & _IOC_WRITE) -+ && !access_ok(VERIFY_READ, (void __user *)arg, _IOC_SIZE(cmd)))) { -+ ret = -EFAULT; -+ } else if (!capable(CAP_SYS_ADMIN)) { -+ ret = -EPERM; -+ } else if (sc_device != (&ppd->sc_device[QIB_SNOOP_DEV_INDEX]) -+ && cmd != QIB_SNOOP_IOCCLEARQUEUE -+ && cmd != QIB_SNOOP_IOCCLEARFILTER -+ && cmd != QIB_SNOOP_IOCSETFILTER) { -+ /* Capture devices are allowed only 3 operations -+ * 1.Clear capture queue -+ * 2.Clear capture filter -+ * 3.Set capture filter -+ * Other are invalid. -+ */ -+ ret = -EINVAL; -+ } else { -+ switch (cmd) { -+ case QIB_SNOOP_IOCSETLINKSTATE: -+ ret = __get_user(value, (int __user *) arg); -+ if (ret != 0) -+ break; -+ -+ physState = (value >> 4) & 0xF; -+ linkState = value & 0xF; -+ -+ switch (linkState) { -+ case IB_PORT_NOP: -+ if (physState == 0) -+ break; -+ /* fall through */ -+ case IB_PORT_DOWN: -+ switch (physState) { -+ case 0: -+ if (dd->f_ibphys_portstate && -+ (dd->f_ibphys_portstate(ppd->lastibcstat) -+ & 0xF & IB_PHYSPORTSTATE_SLEEP)) -+ devState = -+ QIB_IB_LINKDOWN_SLEEP; -+ else -+ devState = -+ QIB_IB_LINKDOWN; -+ break; -+ case 1: -+ devState = QIB_IB_LINKDOWN_SLEEP; -+ break; -+ case 2: -+ devState = QIB_IB_LINKDOWN; -+ break; -+ case 3: -+ devState = QIB_IB_LINKDOWN_DISABLE; -+ break; -+ default: -+ ret = -EINVAL; -+ goto done; -+ break; -+ } -+ ret = qib_set_linkstate(ppd, devState); -+ break; -+ case IB_PORT_ARMED: -+ if (!(dd->flags & -+ (QIB_IB_LINKARM | QIB_IB_LINKACTIVE))) { -+ ret = -EINVAL; -+ break; -+ } -+ ret = qib_set_linkstate(ppd, QIB_IB_LINKARM); -+ break; -+ case IB_PORT_ACTIVE: -+ if (!(dd->flags & QIB_IB_LINKARM)) { -+ ret = -EINVAL; -+ break; -+ } -+ ret = qib_set_linkstate(ppd, QIB_IB_LINKACTIVE); -+ break; -+ default: -+ ret = -EINVAL; -+ break; -+ } -+ -+ if (ret) -+ break; -+ /* fall through */ -+ -+ case QIB_SNOOP_IOCGETLINKSTATE: -+ value = dd->f_ibphys_portstate(ppd->lastibcstat); -+ value <<= 4; -+ value |= dd->f_iblink_state(ppd->lastibcstat); -+ ret = __put_user(value, (int __user *)arg); -+ break; -+ -+ case QIB_SNOOP_IOCCLEARQUEUE: -+ spin_lock_irqsave(&sc_device->snoop_lock, flags); -+ drain_snoop_list(sc_device); -+ spin_unlock_irqrestore(&sc_device->snoop_lock, flags); -+ break; -+ -+ case QIB_SNOOP_IOCCLEARFILTER: -+ spin_lock_irqsave(&sc_device->snoop_lock, flags); -+ if (ppd->filter_callback) { -+ /* Drain packets first */ -+ drain_snoop_list(sc_device); -+ ppd->filter_callback = NULL; -+ } -+ kfree(ppd->filter_value); -+ ppd->filter_value = NULL; -+ spin_unlock_irqrestore(&sc_device->snoop_lock, flags); -+ break; -+ -+ case QIB_SNOOP_IOCSETFILTER: -+ /* just copy command structure */ -+ argp = (unsigned long *)arg; -+ ret = copy_from_user(&filter_cmd, (u8 *)argp, -+ sizeof(filter_cmd)); -+ if (ret < 0) { -+ pr_alert("Error copying filter command\n"); -+ break; -+ } -+ if (filter_cmd.opcode >= QIB_MAX_FILTERS) { -+ pr_alert("Invalid opcode in request\n"); -+ ret = -EINVAL; -+ break; -+ } -+ filter_value = kzalloc( -+ filter_cmd.length * sizeof(u8), -+ GFP_KERNEL); -+ if (!filter_value) { -+ pr_alert("Not enough memory\n"); -+ ret = -ENOMEM; -+ break; -+ } -+ /* copy remaining data from userspace */ -+ ret = copy_from_user((u8 *)filter_value, -+ (u8 *)filter_cmd.value_ptr, -+ filter_cmd.length); -+ if (ret < 0) { -+ kfree(filter_value); -+ pr_alert("Error copying filter data\n"); -+ break; -+ } -+ /* Drain packets first */ -+ spin_lock_irqsave(&sc_device->snoop_lock, flags); -+ drain_snoop_list(sc_device); -+ ppd->filter_callback = -+ qib_filters[filter_cmd.opcode].filter; -+ /* just in case we see back to back sets */ -+ kfree(ppd->filter_value); -+ ppd->filter_value = filter_value; -+ spin_unlock_irqrestore(&sc_device->snoop_lock, flags); -+ break; -+ -+ default: -+ ret = -ENOTTY; -+ break; -+ } -+ } -+done: -+ return ret; -+} -+ -+ -+static ssize_t qib_pio_send_pkt(struct qib_pportdata *ppd, -+ u32 *data, u32 pkt_len) -+{ -+ int i; -+ u64 pbc; -+ u32 __iomem *piobuf; -+ u32 pnum, control, len; -+ struct qib_devdata *dd = ppd->dd; -+ u32 dwords = pkt_len >> 2; -+ unsigned long flags; -+ ssize_t ret = -EINVAL; -+ -+ i = 0; -+ len = dwords + 1; -+ control = dd->f_setpbc_control(ppd, len, 0, -+ (((u8 *)data)[0] >> 4) & 0xf); -+ pbc = ((u64) control << 32) | len; -+ while (!(piobuf = dd->f_getsendbuf(ppd, pbc, &pnum))) { -+ if (i > 15) { -+ ret = -ENOMEM; -+ goto Err; -+ } -+ i++; -+ /* lets try to flush all of it */ -+ dd->f_sendctrl(ppd, QIB_SENDCTRL_DISARM_ALL); -+ udelay(100); -+ } -+ spin_lock_irqsave(&ppd->snoop_write_lock, flags); -+ /* disable header check on this packet, since it can't be valid */ -+ dd->f_txchk_change(dd, pnum, 1, TXCHK_CHG_TYPE_DIS1, NULL); -+ writeq(pbc, piobuf); -+ qib_flush_wc(); -+ if (dd->flags & QIB_PIO_FLUSH_WC) { -+ qib_flush_wc(); -+ qib_pio_copy(piobuf + 2, data, dwords - 1); -+ qib_flush_wc(); -+ __raw_writel(data[dwords - 1], piobuf + dwords + 1); -+ } else -+ qib_pio_copy(piobuf + 2, data, dwords); -+ if (dd->flags & QIB_USE_SPCL_TRIG) { -+ u32 spcl_off = (pnum >= dd->piobcnt2k) ? 2047 : 1023; -+ -+ qib_flush_wc(); -+ __raw_writel(0xaebecede, piobuf + spcl_off); -+ } -+ qib_sendbuf_done(dd, pnum); -+ qib_flush_wc(); -+ /* and re-enable hdr check */ -+ dd->f_txchk_change(dd, pnum, 1, TXCHK_CHG_TYPE_ENAB1, NULL); -+ spin_unlock_irqrestore(&ppd->snoop_write_lock, flags); -+ ret = pkt_len; -+Err: -+ return ret; -+} -+ -+ -+static ssize_t qib_snoop_write(struct file *fp, const char __user *data, -+ size_t pkt_len, loff_t *off) -+{ -+ struct qib_aux_device *sc_device = fp->private_data; -+ struct qib_pportdata *ppd = sc_device->pport; -+ struct qib_devdata *dd = ppd->dd; -+ ssize_t ret = 0; -+ u32 *buffer = NULL; -+ u32 plen, clen; -+ -+ /* capture device should not be entertaining writes */ -+ if (sc_device != (&ppd->sc_device[QIB_SNOOP_DEV_INDEX])) { -+ ret = -EINVAL; -+ goto bail; -+ } -+ -+ if (pkt_len == 0) -+ goto bail; -+ -+ if (pkt_len & 3) { -+ ret = -EINVAL; -+ goto bail; -+ } -+ -+ clen = pkt_len >> 2; -+ -+ if (!dd || !(dd->flags & QIB_PRESENT) || -+ !dd->kregbase) { -+ ret = -ENODEV; -+ goto bail; -+ } -+ -+ if (!(dd->flags & QIB_INITTED)) { -+ /* no hardware, freeze, etc. */ -+ ret = -ENODEV; -+ goto bail; -+ } -+ -+ plen = sizeof(u32) + pkt_len; -+ -+ if ((plen + 4) > ppd->ibmaxlen) { -+ ret = -EINVAL; -+ goto bail; /* before writing pbc */ -+ } -+ -+ buffer = vmalloc(plen); -+ if (!buffer) { -+ ret = -ENOMEM; -+ goto bail; -+ } -+ if (copy_from_user(buffer, -+ (const void __user *) (unsigned long) data, pkt_len)) { -+ ret = -EFAULT; -+ goto bail; -+ } -+ -+ ret = qib_pio_send_pkt(ppd, buffer, pkt_len); -+ -+bail: -+ vfree(buffer); -+ -+ return ret; -+} -+ -+int snoop_get_header_size(struct qib_devdata *dd, -+ struct qib_ib_header *hdr, -+ void *data, u32 tlen) -+{ -+ int lnh, header_size = -1; -+ u8 opcode, opcode_major; -+ struct qib_other_headers *ohdr; -+ -+ lnh = (be16_to_cpu(hdr->lrh[0]) & 3); -+ -+ if (lnh == QIB_LRH_BTH) -+ ohdr = &hdr->u.oth; -+ else if (lnh == QIB_LRH_GRH) -+ ohdr = &hdr->u.l.oth; -+ else -+ goto bail; -+ -+ opcode = be32_to_cpu(ohdr->bth[0]) >> 24; -+ -+ opcode_major = (opcode >> 5) & 0x7; -+ -+ switch (opcode_major) { -+ case 0x03: /* UD */ -+ if (lnh == QIB_LRH_BTH) -+ header_size = 8 + 12 + 8 /* LRH + BTH + DETH */; -+ else if (lnh == QIB_LRH_GRH) { -+ -+ /* LRH + GRH + BTH + DETH */; -+ header_size = 8 + 40 + 12 + 8; -+ /* Some of the header data is in the data segment */ -+ if (dd->rcvhdrentsize == 16) -+ header_size -= 12; -+ } else -+ header_size = -1; -+ -+ break; -+ case 0x0: /* RC */ -+ case 0x1: /* UC */ -+ case 0x2: /* RD */ -+ default: -+ header_size = -1; -+ break; -+ } -+ -+bail: -+ return header_size; -+} -+ -+static void qib_snoop_list_add_tail(struct snoop_packet *packet, -+ struct qib_pportdata *ppd, -+ int dev_index) -+{ -+ unsigned long flags = 0; -+ struct qib_aux_device *sc_device = &ppd->sc_device[dev_index]; -+ -+ spin_lock_irqsave(&sc_device->snoop_lock, flags); -+ if (likely((dev_index == QIB_CAPTURE_DEV_INDEX && -+ (ppd->mode_flag & QIB_PORT_CAPTURE_MODE)) || -+ (dev_index == QIB_SNOOP_DEV_INDEX && -+ (ppd->mode_flag & QIB_PORT_SNOOP_MODE)))) -+ list_add_tail(&(packet->list), &sc_device->snoop_queue); -+ spin_unlock_irqrestore(&sc_device->snoop_lock, flags); -+ wake_up_interruptible(&sc_device->snoop_waitq); -+} -+ -+void qib_snoop_send_queue_packet(struct qib_pportdata *ppd, -+ struct snoop_packet *packet) -+{ -+ /* If we are dealing with mix mode then we need to make another copy -+ * of same packet and queue it in snoop device as well. -+ * However if we do not get sufficient memory here then we just -+ * add packet to capture queue by default so that we atleast have one -+ * packet with us in capture queue. -+ */ -+ if (unlikely(ppd->mode_flag == -+ (QIB_PORT_SNOOP_MODE | QIB_PORT_CAPTURE_MODE))) { -+ struct snoop_packet *pcopy; -+ pcopy = kmalloc(sizeof(*pcopy) + packet->total_len, GFP_ATOMIC); -+ if (pcopy != NULL) { -+ memcpy(pcopy, packet, -+ packet->total_len + sizeof(*pcopy)); -+ qib_snoop_list_add_tail(pcopy, ppd, -+ QIB_SNOOP_DEV_INDEX); -+ } -+ qib_snoop_list_add_tail(packet, ppd, QIB_CAPTURE_DEV_INDEX); -+ } else if (ppd->mode_flag == QIB_PORT_CAPTURE_MODE) -+ qib_snoop_list_add_tail(packet, ppd, QIB_CAPTURE_DEV_INDEX); -+ else if (ppd->mode_flag == QIB_PORT_SNOOP_MODE) -+ qib_snoop_list_add_tail(packet, ppd, QIB_SNOOP_DEV_INDEX); -+} -+ -+/* -+ * qib_snoop_rcv_queue_packet - receive a packet for snoop interface -+ * @port - Hca port on which this packet is received. -+ * @rhdr - Packet header -+ * @data - Packet data/payloaa -+ * @tlen - total length of packet including header and payload. -+ * -+ * Called on for every packet received when snooping/mix mode is turned on -+ * Copies received packet to internal buffer and appends it to -+ * packet list. -+ * -+ * Returns, -+ * 0 if this packet needs to be forwarded by driver -+ * 1 if this packet needs to be dropped by driver -+ */ -+ -+int qib_snoop_rcv_queue_packet(struct qib_pportdata *port, void *rhdr, -+ void *data, u32 tlen) -+{ -+ int header_size = 0; -+ struct qib_ib_header *hdr = rhdr; -+ struct snoop_packet *packet = NULL; -+ -+ header_size = snoop_get_header_size(port->dd, hdr, data, tlen); -+ if (header_size <= 0) -+ return 0; -+ -+ /* qib_snoop_send_queue_packet takes care or mix mode, -+ * so just return from here. -+ */ -+ if (port->mode_flag == (QIB_PORT_SNOOP_MODE | QIB_PORT_CAPTURE_MODE)) -+ return 0; -+ -+ packet = kmalloc(sizeof(struct snoop_packet) + tlen, -+ GFP_ATOMIC); -+ if (likely(packet)) { -+ memcpy(packet->data, rhdr, header_size); -+ memcpy(packet->data + header_size, data, -+ tlen - header_size); -+ packet->total_len = tlen; -+ qib_snoop_list_add_tail(packet, port, QIB_SNOOP_DEV_INDEX); -+ return 1; -+ } -+ -+ return 0; -+} -+ -+static int qib_filter_lid(void *ibhdr, void *packet_data, void *value) -+{ -+ struct qib_ib_header *hdr = (struct qib_ib_header *)ibhdr; -+ if (*((u16 *)value) == be16_to_cpu(hdr->lrh[3])) -+ return 0; /* matched */ -+ return 1; /* Not matched */ -+} -+ -+static int qib_filter_dlid(void *ibhdr, void *packet_data, void *value) -+{ -+ struct qib_ib_header *hdr = (struct qib_ib_header *)ibhdr; -+ if (*((u16 *)value) == be16_to_cpu(hdr->lrh[1])) -+ return 0; -+ return 1; -+} -+ -+static int qib_filter_mad_mgmt_class(void *ibhdr, void *packet_data, -+ void *value) -+{ -+ struct qib_ib_header *hdr = (struct qib_ib_header *)ibhdr; -+ struct qib_other_headers *ohdr = NULL; -+ struct ib_smp *smp = NULL; -+ u32 qpn = 0; -+ -+ /* packet_data could be null if only header is captured */ -+ if (packet_data == NULL) -+ return 1; -+ /* Check for GRH */ -+ if ((be16_to_cpu(hdr->lrh[0]) & 3) == QIB_LRH_BTH) -+ ohdr = &hdr->u.oth; /* LRH + BTH + DETH */ -+ else -+ ohdr = &hdr->u.l.oth; /* LRH + GRH + BTH + DETH */ -+ qpn = be32_to_cpu(ohdr->bth[1]) & 0x00FFFFFF; -+ if (qpn <= 1) { -+ smp = (struct ib_smp *)packet_data; -+ if (*((u8 *)value) == smp->mgmt_class) -+ return 0; -+ else -+ return 1; -+ } -+ return 1; -+} -+ -+static int qib_filter_qp_number(void *ibhdr, void *packet_data, void *value) -+{ -+ -+ struct qib_ib_header *hdr = (struct qib_ib_header *)ibhdr; -+ struct qib_other_headers *ohdr = NULL; -+ -+ /* Check for GRH */ -+ if ((be16_to_cpu(hdr->lrh[0]) & 3) == QIB_LRH_BTH) -+ ohdr = &hdr->u.oth; /* LRH + BTH + DETH */ -+ else -+ ohdr = &hdr->u.l.oth; /* LRH + GRH + BTH + DETH */ -+ if (*((u32 *)value) == (be32_to_cpu(ohdr->bth[1]) & 0x00FFFFFF)) -+ return 0; -+ return 1; -+} -+ -+ -+static int qib_filter_ibpacket_type(void *ibhdr, void *packet_data, -+ void *value) -+{ -+ u32 lnh = 0; -+ u8 opcode = 0; -+ struct qib_ib_header *hdr = (struct qib_ib_header *)ibhdr; -+ struct qib_other_headers *ohdr = NULL; -+ -+ lnh = (be16_to_cpu(hdr->lrh[0]) & 3); -+ -+ if (lnh == QIB_LRH_BTH) -+ ohdr = &hdr->u.oth; -+ else if (lnh == QIB_LRH_GRH) -+ ohdr = &hdr->u.l.oth; -+ else -+ return 1; -+ -+ opcode = be32_to_cpu(ohdr->bth[0]) >> 24; -+ -+ if (*((u8 *)value) == ((opcode >> 5) & 0x7)) -+ return 0; -+ return 1; -+} -+ -+static int qib_filter_ib_service_level(void *ibhdr, void *packet_data, -+ void *value) -+{ -+ struct qib_ib_header *hdr = (struct qib_ib_header *)ibhdr; -+ -+ if ((*((u8 *)value)) == (be16_to_cpu(hdr->lrh[0] >> 4) & 0xF)) -+ return 0; -+ return 1; -+} -+ -+static int qib_filter_ib_pkey(void *ibhdr, void *packet_data, void *value) -+{ -+ -+ u32 lnh = 0; -+ struct qib_ib_header *hdr = (struct qib_ib_header *)ibhdr; -+ struct qib_other_headers *ohdr = NULL; -+ -+ lnh = (be16_to_cpu(hdr->lrh[0]) & 3); -+ if (lnh == QIB_LRH_BTH) -+ ohdr = &hdr->u.oth; -+ else if (lnh == QIB_LRH_GRH) -+ ohdr = &hdr->u.l.oth; -+ else -+ return 1; -+ -+ /* P_key is 16-bit entity, however top most bit indicates -+ * type of membership. 0 for limited and 1 for Full. -+ * Limited members cannot accept information from other -+ * Limited members, but communication is allowed between -+ * every other combination of membership. -+ * Hence we'll omitt comparing top-most bit while filtering -+ */ -+ -+ if ((*(u16 *)value & 0x7FFF) == -+ ((be32_to_cpu(ohdr->bth[0])) & 0x7FFF)) -+ return 0; -+ return 1; -+} -diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c -index d0a0ea0..a98635d 100644 ---- a/drivers/infiniband/hw/qib/qib_user_sdma.c -+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c -@@ -1,4 +1,5 @@ - /* -+ * Copyright (c) 2013 Intel Corporation. All rights reserved. - * Copyright (c) 2007, 2008, 2009 QLogic Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two -@@ -52,83 +53,65 @@ - /* attempt to drain the queue for 5secs */ - #define QIB_USER_SDMA_DRAIN_TIMEOUT 500 - --struct qib_user_sdma_pkt { -- struct list_head list; /* list element */ -- -- u8 tiddma; /* if this is NEW tid-sdma */ -- u8 largepkt; /* this is large pkt from kmalloc */ -- u16 frag_size; /* frag size used by PSM */ -- u16 index; /* last header index or push index */ -- u16 naddr; /* dimension of addr (1..3) ... */ -- u16 addrlimit; /* addr array size */ -- u16 tidsmidx; /* current tidsm index */ -- u16 tidsmcount; /* tidsm array item count */ -- u16 payload_size; /* payload size so far for header */ -- u32 bytes_togo; /* bytes for processing */ -- u32 counter; /* sdma pkts queued counter for this entry */ -- struct qib_tid_session_member *tidsm; /* tid session member array */ -- struct qib_user_sdma_queue *pq; /* which pq this pkt belongs to */ -- u64 added; /* global descq number of entries */ -- -- struct { -- u16 offset; /* offset for kvaddr, addr */ -- u16 length; /* length in page */ -- u16 first_desc; /* first desc */ -- u16 last_desc; /* last desc */ -- u16 put_page; /* should we put_page? */ -- u16 dma_mapped; /* is page dma_mapped? */ -- u16 dma_length; /* for dma_unmap_page() */ -- u16 padding; -- struct page *page; /* may be NULL (coherent mem) */ -- void *kvaddr; /* FIXME: only for pio hack */ -- dma_addr_t addr; -- } addr[4]; /* max pages, any more and we coalesce */ -+/* -+ * track how many times a process open this driver. -+ */ -+struct rb_root qib_user_sdma_rb_root = RB_ROOT; -+ -+struct qib_user_sdma_rb_node { -+ struct rb_node node; -+ int refcount; -+ pid_t pid; - }; - --struct qib_user_sdma_queue { -- /* -- * pkts sent to dma engine are queued on this -- * list head. the type of the elements of this -- * list are struct qib_user_sdma_pkt... -- */ -- struct list_head sent; -+static struct qib_user_sdma_rb_node * -+qib_user_sdma_rb_search(struct rb_root *root, pid_t pid) -+{ -+ struct qib_user_sdma_rb_node *sdma_rb_node; -+ struct rb_node *node = root->rb_node; -+ -+ while (node) { -+ sdma_rb_node = container_of(node, -+ struct qib_user_sdma_rb_node, node); -+ if (pid < sdma_rb_node->pid) -+ node = node->rb_left; -+ else if (pid > sdma_rb_node->pid) -+ node = node->rb_right; -+ else -+ return sdma_rb_node; -+ } -+ return NULL; -+} - -- /* -- * Because above list will be accessed by both process and -- * signal handler, we need a spinlock for it. -- */ -- spinlock_t sent_lock ____cacheline_aligned_in_smp; -- -- /* headers with expected length are allocated from here... */ -- char header_cache_name[64]; -- struct dma_pool *header_cache; -- -- /* packets are allocated from the slab cache... */ -- char pkt_slab_name[64]; -- struct kmem_cache *pkt_slab; -- -- /* as packets go on the queued queue, they are counted... */ -- u32 counter; -- u32 sent_counter; -- /* pending packets, not sending yet */ -- u32 num_pending; -- /* sending packets, not complete yet */ -- u32 num_sending; -- /* global descq number of entry of last sending packet */ -- u64 added; -- -- /* dma page table */ -- struct rb_root dma_pages_root; -- -- /* protect everything above... */ -- struct mutex lock; --}; -+static int -+qib_user_sdma_rb_insert(struct rb_root *root, struct qib_user_sdma_rb_node *new) -+{ -+ struct rb_node **node = &(root->rb_node); -+ struct rb_node *parent = NULL; -+ struct qib_user_sdma_rb_node *got; -+ -+ while (*node) { -+ got = container_of(*node, struct qib_user_sdma_rb_node, node); -+ parent = *node; -+ if (new->pid < got->pid) -+ node = &((*node)->rb_left); -+ else if (new->pid > got->pid) -+ node = &((*node)->rb_right); -+ else -+ return 0; -+ } -+ -+ rb_link_node(&new->node, parent, node); -+ rb_insert_color(&new->node, root); -+ return 1; -+} - - struct qib_user_sdma_queue * - qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) - { - struct qib_user_sdma_queue *pq = - kmalloc(sizeof(struct qib_user_sdma_queue), GFP_KERNEL); -+ struct qib_user_sdma_rb_node *sdma_rb_node; - - if (!pq) - goto done; -@@ -138,6 +121,7 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) - pq->num_pending = 0; - pq->num_sending = 0; - pq->added = 0; -+ pq->sdma_rb_node = NULL; - - INIT_LIST_HEAD(&pq->sent); - spin_lock_init(&pq->sent_lock); -@@ -163,8 +147,30 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) - - pq->dma_pages_root = RB_ROOT; - -+ sdma_rb_node = qib_user_sdma_rb_search(&qib_user_sdma_rb_root, -+ current->pid); -+ if (sdma_rb_node) { -+ sdma_rb_node->refcount++; -+ } else { -+ int ret; -+ sdma_rb_node = kmalloc(sizeof( -+ struct qib_user_sdma_rb_node), GFP_KERNEL); -+ if (!sdma_rb_node) -+ goto err_rb; -+ -+ sdma_rb_node->refcount = 1; -+ sdma_rb_node->pid = current->pid; -+ -+ ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root, -+ sdma_rb_node); -+ BUG_ON(ret == 0); -+ } -+ pq->sdma_rb_node = sdma_rb_node; -+ - goto done; - -+err_rb: -+ dma_pool_destroy(pq->header_cache); - err_slab: - kmem_cache_destroy(pq->pkt_slab); - err_kfree: -@@ -175,12 +181,12 @@ done: - return pq; - } - --static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt, -- int i, u16 offset, u16 len, -- u16 first_desc, u16 last_desc, -- u16 put_page, u16 dma_mapped, -- struct page *page, void *kvaddr, -- dma_addr_t dma_addr, u16 dma_length) -+void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt, -+ int i, u16 offset, u16 len, -+ u16 first_desc, u16 last_desc, -+ u16 put_page, u16 dma_mapped, -+ struct page *page, void *kvaddr, -+ dma_addr_t dma_addr, u16 dma_length) - { - pkt->addr[i].offset = offset; - pkt->addr[i].length = len; -@@ -194,7 +200,7 @@ static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt, - pkt->addr[i].dma_length = dma_length; - } - --static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq, -+void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq, - size_t len, dma_addr_t *dma_addr) - { - void *hdr; -@@ -216,11 +222,11 @@ static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq, - return hdr; - } - --static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd, -- struct qib_user_sdma_queue *pq, -- struct qib_user_sdma_pkt *pkt, -- struct page *page, u16 put, -- u16 offset, u16 len, void *kvaddr) -+int qib_user_sdma_page_to_frags(const struct qib_devdata *dd, -+ struct qib_user_sdma_queue *pq, -+ struct qib_user_sdma_pkt *pkt, -+ struct page *page, u16 put, -+ u16 offset, u16 len, void *kvaddr) - { - __le16 *pbc16; - void *pbcvaddr; -@@ -235,21 +241,27 @@ static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd, - int ret = 0; - - if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { -- /* -- * dma mapping error, pkt has not managed -- * this page yet, return the page here so -- * the caller can ignore this page. -- */ -- if (put) { -- put_page(page); -- } else { -- /* coalesce case */ -- kunmap(page); -- __free_page(page); -+#ifdef QIB_CONFIG_KNX -+ if (!pkt->remote) { -+#endif -+ /* -+ * dma mapping error, pkt has not managed -+ * this page yet, return the page here so -+ * the caller can ignore this page. -+ */ -+ if (put) { -+ put_page(page); -+ } else { -+ /* coalesce case */ -+ kunmap(page); -+ __free_page(page); -+ } -+ ret = -ENOMEM; -+ goto done; - } -- ret = -ENOMEM; -- goto done; -+#ifdef QIB_CONFIG_KNX - } -+#endif - offset = 0; - dma_mapped = 1; - -@@ -551,13 +563,19 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev, - pkt->addr[i].dma_length, - DMA_TO_DEVICE); - -- if (pkt->addr[i].kvaddr) -- kunmap(pkt->addr[i].page); -+#ifdef QIB_CONFIG_KNX -+ if (!pkt->remote) { -+#endif -+ if (pkt->addr[i].kvaddr) -+ kunmap(pkt->addr[i].page); - -- if (pkt->addr[i].put_page) -- put_page(pkt->addr[i].page); -- else -- __free_page(pkt->addr[i].page); -+ if (pkt->addr[i].put_page) -+ put_page(pkt->addr[i].page); -+ else -+ __free_page(pkt->addr[i].page); -+#ifdef QIB_CONFIG_KNX -+ } -+#endif - } else if (pkt->addr[i].kvaddr) { - /* for headers */ - if (pkt->addr[i].dma_mapped) { -@@ -697,9 +715,9 @@ static int qib_user_sdma_init_payload(const struct qib_devdata *dd, - } - - /* free a packet list -- return counter value of last packet */ --static void qib_user_sdma_free_pkt_list(struct device *dev, -- struct qib_user_sdma_queue *pq, -- struct list_head *list) -+void qib_user_sdma_free_pkt_list(struct device *dev, -+ struct qib_user_sdma_queue *pq, -+ struct list_head *list) - { - struct qib_user_sdma_pkt *pkt, *pkt_next; - -@@ -709,6 +727,10 @@ static void qib_user_sdma_free_pkt_list(struct device *dev, - for (i = 0; i < pkt->naddr; i++) - qib_user_sdma_free_pkt_frag(dev, pq, pkt, i); - -+#ifdef QIB_CONFIG_KNX -+ if (pkt->remote) -+ qib_knx_sdma_free_pkt(pkt); -+#endif - if (pkt->largepkt) - kfree(pkt); - else -@@ -892,6 +914,7 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, - pkt->payload_size = 0; - pkt->counter = counter; - pkt->tiddma = tiddma; -+ pkt->remote = 0; - - /* setup the first header */ - qib_user_sdma_init_frag(pkt, 0, /* index */ -@@ -967,8 +990,8 @@ static void qib_user_sdma_set_complete_counter(struct qib_user_sdma_queue *pq, - } - - /* try to clean out queue -- needs pq->lock */ --static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd, -- struct qib_user_sdma_queue *pq) -+int qib_user_sdma_queue_clean(struct qib_pportdata *ppd, -+ struct qib_user_sdma_queue *pq) - { - struct qib_devdata *dd = ppd->dd; - struct list_head free_list; -@@ -1021,13 +1044,18 @@ void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq) - if (!pq) - return; - -- kmem_cache_destroy(pq->pkt_slab); -+ pq->sdma_rb_node->refcount--; -+ if (pq->sdma_rb_node->refcount == 0) { -+ rb_erase(&pq->sdma_rb_node->node, &qib_user_sdma_rb_root); -+ kfree(pq->sdma_rb_node); -+ } - dma_pool_destroy(pq->header_cache); -+ kmem_cache_destroy(pq->pkt_slab); - kfree(pq); - } - - /* clean descriptor queue, returns > 0 if some elements cleaned */ --static int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd) -+int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd) - { - int ret; - unsigned long flags; -@@ -1238,30 +1266,56 @@ retry: - } - - /* pq->lock must be held, get packets on the wire... */ --static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd, -- struct qib_user_sdma_queue *pq, -- struct list_head *pktlist, int count) -+int qib_user_sdma_push_pkts(struct qib_pportdata *ppd, -+ struct qib_user_sdma_queue *pq, -+ struct list_head *pktlist, int count) - { -- int ret = 0; - unsigned long flags; - - if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE))) - return -ECOMM; - -- spin_lock_irqsave(&ppd->sdma_lock, flags); -- -- if (unlikely(!__qib_sdma_running(ppd))) { -- ret = -ECOMM; -- goto unlock; -+ /* non-blocking mode */ -+ if (pq->sdma_rb_node->refcount > 1) { -+ spin_lock_irqsave(&ppd->sdma_lock, flags); -+ if (unlikely(!__qib_sdma_running(ppd))) { -+ spin_unlock_irqrestore(&ppd->sdma_lock, flags); -+ return -ECOMM; -+ } -+ pq->num_pending += count; -+ list_splice_tail_init(pktlist, &ppd->sdma_userpending); -+ qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending); -+ spin_unlock_irqrestore(&ppd->sdma_lock, flags); -+ return 0; - } - -+ /* In this case, descriptors from this process are not -+ * linked to ppd pending queue, interrupt handler -+ * won't update this process, it is OK to directly -+ * modify without sdma lock. -+ */ -+ -+ - pq->num_pending += count; -- list_splice_tail_init(pktlist, &ppd->sdma_userpending); -- qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending); -+ /* -+ * Blocking mode for single rail process, we must -+ * release/regain sdma_lock to give other process -+ * chance to make progress. This is important for -+ * performance. -+ */ -+ do { -+ spin_lock_irqsave(&ppd->sdma_lock, flags); -+ if (unlikely(!__qib_sdma_running(ppd))) { -+ spin_unlock_irqrestore(&ppd->sdma_lock, flags); -+ return -ECOMM; -+ } -+ qib_user_sdma_send_desc(ppd, pktlist); -+ if (!list_empty(pktlist)) -+ qib_sdma_make_progress(ppd); -+ spin_unlock_irqrestore(&ppd->sdma_lock, flags); -+ } while (!list_empty(pktlist)); - --unlock: -- spin_unlock_irqrestore(&ppd->sdma_lock, flags); -- return ret; -+ return 0; - } - - int qib_user_sdma_writev(struct qib_ctxtdata *rcd, -@@ -1291,7 +1345,7 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd, - qib_user_sdma_queue_clean(ppd, pq); - - while (dim) { -- int mxp = 8; -+ int mxp = 1; - int ndesc = 0; - - down_write(¤t->mm->mmap_sem); -diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.h b/drivers/infiniband/hw/qib/qib_user_sdma.h -index ce8cbaf..93ce40b 100644 ---- a/drivers/infiniband/hw/qib/qib_user_sdma.h -+++ b/drivers/infiniband/hw/qib/qib_user_sdma.h -@@ -31,12 +31,108 @@ - */ - #include - --struct qib_user_sdma_queue; -+struct qib_user_sdma_pkt { -+ struct list_head list; /* list element */ -+ -+ u8 tiddma; /* if this is NEW tid-sdma */ -+ u8 largepkt; /* this is large pkt from kmalloc */ -+ u16 frag_size; /* frag size used by PSM */ -+ u16 index; /* last header index or push index */ -+ u16 naddr; /* dimension of addr (1..3) ... */ -+ u16 addrlimit; /* addr array size */ -+ u16 tidsmidx; /* current tidsm index */ -+ u16 tidsmcount; /* tidsm array item count */ -+ u16 payload_size; /* payload size so far for header */ -+ u32 bytes_togo; /* bytes for processing */ -+ u32 counter; /* sdma pkts queued counter for this entry */ -+ struct qib_tid_session_member *tidsm; /* tid session member array */ -+ struct qib_user_sdma_queue *pq; /* which pq this pkt belongs to */ -+ u64 added; /* global descq number of entries */ -+#ifdef QIB_CONFIG_KNX -+ u64 remote; /* does the packet original on the host */ -+#endif -+ -+ struct { -+ u16 offset; /* offset for kvaddr, addr */ -+ u16 length; /* length in page */ -+ u16 first_desc; /* first desc */ -+ u16 last_desc; /* last desc */ -+ u16 put_page; /* should we put_page? */ -+ u16 dma_mapped; /* is page dma_mapped? */ -+ u16 dma_length; /* for dma_unmap_page() */ -+ u16 padding; -+ struct page *page; /* may be NULL (coherent mem) */ -+ void *kvaddr; /* FIXME: only for pio hack */ -+ dma_addr_t addr; -+ } addr[4]; /* max pages, any more and we coalesce */ -+}; -+ -+struct qib_user_sdma_queue { -+ /* -+ * pkts sent to dma engine are queued on this -+ * list head. the type of the elements of this -+ * list are struct qib_user_sdma_pkt... -+ */ -+ struct list_head sent; -+ -+ /* -+ * Because above list will be accessed by both process and -+ * signal handler, we need a spinlock for it. -+ */ -+ spinlock_t sent_lock ____cacheline_aligned_in_smp; -+ -+ /* headers with expected length are allocated from here... */ -+ char header_cache_name[64]; -+ struct dma_pool *header_cache; -+ -+ /* packets are allocated from the slab cache... */ -+ char pkt_slab_name[64]; -+ struct kmem_cache *pkt_slab; -+ -+ /* as packets go on the queued queue, they are counted... */ -+ u32 counter; -+ u32 sent_counter; -+ /* pending packets, not sending yet */ -+ u32 num_pending; -+ /* sending packets, not complete yet */ -+ u32 num_sending; -+ /* global descq number of entry of last sending packet */ -+ u64 added; -+ -+ /* dma page table */ -+ struct rb_root dma_pages_root; -+ -+ struct qib_user_sdma_rb_node *sdma_rb_node; -+ -+ /* protect everything above... */ -+ struct mutex lock; -+}; - - struct qib_user_sdma_queue * - qib_user_sdma_queue_create(struct device *dev, int unit, int port, int sport); - void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq); -- -+void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq, -+ size_t len, dma_addr_t *dma_addr); -+void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt, -+ int i, u16 offset, u16 len, -+ u16 first_desc, u16 last_desc, -+ u16 put_page, u16 dma_mapped, -+ struct page *page, void *kvaddr, -+ dma_addr_t dma_addr, u16 dma_length); -+int qib_user_sdma_page_to_frags(const struct qib_devdata *dd, -+ struct qib_user_sdma_queue *pq, -+ struct qib_user_sdma_pkt *pkt, -+ struct page *page, u16 put, -+ u16 offset, u16 len, void *kvaddr); -+int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd); -+int qib_user_sdma_queue_clean(struct qib_pportdata *ppd, -+ struct qib_user_sdma_queue *pq); -+void qib_user_sdma_free_pkt_list(struct device *dev, -+ struct qib_user_sdma_queue *pq, -+ struct list_head *list); -+int qib_user_sdma_push_pkts(struct qib_pportdata *ppd, -+ struct qib_user_sdma_queue *pq, -+ struct list_head *pktlist, int count); - int qib_user_sdma_writev(struct qib_ctxtdata *pd, - struct qib_user_sdma_queue *pq, - const struct iovec *iov, -@@ -50,3 +146,8 @@ void qib_user_sdma_queue_drain(struct qib_pportdata *ppd, - - u32 qib_user_sdma_complete_counter(const struct qib_user_sdma_queue *pq); - u32 qib_user_sdma_inflight_counter(struct qib_user_sdma_queue *pq); -+/* -+ * This function prototype somewhat polutes this header file -+ * but I don't want to create a new header file just for it. -+ */ -+void qib_knx_sdma_free_pkt(struct qib_user_sdma_pkt *pkt); -diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c -index 092b0bb..687c216 100644 ---- a/drivers/infiniband/hw/qib/qib_verbs.c -+++ b/drivers/infiniband/hw/qib/qib_verbs.c -@@ -621,6 +621,15 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) - if (unlikely(tlen < 24)) - goto drop; - -+ if (ppd->mode_flag & QIB_PORT_SNOOP_MODE) { -+ int nomatch = 0; -+ if (ppd->filter_callback) -+ nomatch = ppd->filter_callback(hdr, data, -+ ppd->filter_value); -+ if (nomatch == 0 && -+ qib_snoop_rcv_queue_packet(ppd, rhdr, data, tlen)) -+ goto drop; -+ } - /* Check for a valid destination LID (see ch. 7.11.1). */ - lid = be16_to_cpu(hdr->lrh[1]); - if (lid < QIB_MULTICAST_LID_BASE) { -@@ -789,11 +798,17 @@ static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) - #endif - - static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss, -- u32 length, unsigned flush_wc) -+ u32 length, unsigned flush_wc, struct snoop_packet *packet, -+ u8 *data_orig) - { - u32 extra = 0; - u32 data = 0; - u32 last; -+ u32 *packet_data = NULL; -+ -+ /* This ensures copying word at a time */ -+ if (packet) -+ packet_data = (u32 *)data_orig; - - while (1) { - u32 len = ss->sge.length; -@@ -825,6 +840,10 @@ static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss, - } - __raw_writel(data, piobuf); - piobuf++; -+ if (packet_data) { -+ *packet_data = data; -+ packet_data++; -+ } - extra = 0; - data = 0; - } else { -@@ -851,6 +870,10 @@ static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss, - data = get_upper_bits(v, ushift); - piobuf++; - addr++; -+ if (packet_data) { -+ *packet_data = data; -+ packet_data++; -+ } - l -= sizeof(u32); - } - /* -@@ -868,6 +891,10 @@ static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss, - } - __raw_writel(data, piobuf); - piobuf++; -+ if (packet_data) { -+ *packet_data = data; -+ packet_data++; -+ } - extra = 0; - data = 0; - } else { -@@ -894,12 +921,20 @@ static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss, - qib_pio_copy(piobuf, ss->sge.vaddr, w - 1); - piobuf += w - 1; - last = ((u32 *) ss->sge.vaddr)[w - 1]; -+ if (packet_data) { -+ memcpy(packet_data, ss->sge.vaddr, len); -+ packet_data += w; -+ } - break; - } else { - u32 w = len >> 2; - - qib_pio_copy(piobuf, ss->sge.vaddr, w); - piobuf += w; -+ if (packet_data) { -+ memcpy(packet_data, ss->sge.vaddr, len); -+ packet_data += w; -+ } - - extra = len & (sizeof(u32) - 1); - if (extra) { -@@ -1144,12 +1179,13 @@ static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr, - u32 control; - u32 ndesc; - int ret; -+ struct snoop_packet *packet = NULL; - - tx = qp->s_tx; - if (tx) { - qp->s_tx = NULL; - /* resend previously constructed packet */ -- ret = qib_sdma_verbs_send(ppd, tx->ss, tx->dwords, tx); -+ ret = qib_sdma_verbs_send(ppd, tx->ss, tx->dwords, tx, NULL); - goto bail; - } - -@@ -1173,6 +1209,19 @@ static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr, - if (plen + 1 > dd->piosize2kmax_dwords) - tx->txreq.flags |= QIB_SDMA_TXREQ_F_USELARGEBUF; - -+ if (ppd->mode_flag) { -+ int nomatch = 0; -+ if (ppd->filter_callback) -+ nomatch = ppd->filter_callback(hdr, NULL, -+ ppd->filter_value); -+ if (nomatch == 0) { -+ packet = kzalloc(sizeof(*packet)+QIB_GET_PKT_LEN(hdr), -+ GFP_ATOMIC); -+ if (packet) -+ packet->total_len = QIB_GET_PKT_LEN(hdr); -+ } -+ } -+ - if (len) { - /* - * Don't try to DMA if it takes more descriptors than -@@ -1193,7 +1242,9 @@ static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr, - tx->txreq.addr = dev->pio_hdrs_phys + - tx->hdr_inx * sizeof(struct qib_pio_header); - tx->hdr_dwords = hdrwords + 2; /* add PBC length */ -- ret = qib_sdma_verbs_send(ppd, ss, dwords, tx); -+ if (packet) -+ memcpy(packet->data, hdr, (hdrwords << 2)); -+ ret = qib_sdma_verbs_send(ppd, ss, dwords, tx, packet); - goto bail; - } - -@@ -1206,6 +1257,12 @@ static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr, - phdr->pbc[1] = cpu_to_le32(control); - memcpy(&phdr->hdr, hdr, hdrwords << 2); - qib_copy_from_sge((u32 *) &phdr->hdr + hdrwords, ss, len); -+ if (packet) { -+ memcpy(packet->data, &phdr->hdr, (hdrwords << 2)); -+ memcpy(packet->data+(hdrwords << 2), -+ (u8 *)((u32 *) &phdr->hdr + hdrwords), -+ len); -+ } - - tx->txreq.addr = dma_map_single(&dd->pcidev->dev, phdr, - tx->hdr_dwords << 2, DMA_TO_DEVICE); -@@ -1214,7 +1271,7 @@ static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr, - tx->align_buf = phdr; - tx->txreq.flags |= QIB_SDMA_TXREQ_F_FREEBUF; - tx->txreq.sg_count = 1; -- ret = qib_sdma_verbs_send(ppd, NULL, 0, tx); -+ ret = qib_sdma_verbs_send(ppd, NULL, 0, tx, NULL); - goto unaligned; - - map_err: -@@ -1222,9 +1279,24 @@ map_err: - err_tx: - qib_put_txreq(tx); - ret = wait_kmem(dev, qp); -+ /* If wait_kmem returns 0 then -+ * (ret==0) will hold true and we don't want -+ * that as it will add ignored packet in list, -+ * so free packet here. -+ */ -+ kfree(packet); -+ packet = NULL; - unaligned: - ibp->n_unaligned++; - bail: -+ if (packet) { -+ if (ret == 0) -+ qib_snoop_send_queue_packet(ppd, packet); -+ else { -+ kfree(packet); -+ packet = NULL; -+ } -+ } - return ret; - bail_tx: - ret = PTR_ERR(tx); -@@ -1280,6 +1352,8 @@ static int qib_verbs_send_pio(struct qib_qp *qp, struct qib_ib_header *ibhdr, - unsigned flush_wc; - u32 control; - u32 pbufn; -+ u8 *data_orig = NULL; -+ struct snoop_packet *packet = NULL; - - control = dd->f_setpbc_control(ppd, plen, qp->s_srate, - be16_to_cpu(ibhdr->lrh[0]) >> 12); -@@ -1288,6 +1362,20 @@ static int qib_verbs_send_pio(struct qib_qp *qp, struct qib_ib_header *ibhdr, - if (unlikely(piobuf == NULL)) - return no_bufs_available(qp); - -+ if (snoop_enable && ppd->mode_flag) { -+ int nomatch = 0; -+ if (ppd->filter_callback) -+ nomatch = ppd->filter_callback(ibhdr, NULL, -+ ppd->filter_value); -+ if (nomatch == 0) { -+ packet = kzalloc(sizeof(*packet)+QIB_GET_PKT_LEN(ibhdr), -+ GFP_ATOMIC); -+ if (packet) { -+ INIT_LIST_HEAD(&packet->list); -+ packet->total_len = QIB_GET_PKT_LEN(ibhdr); -+ } -+ } -+ } - /* - * Write the pbc. - * We have to flush after the PBC for correctness on some cpus -@@ -1297,6 +1385,12 @@ static int qib_verbs_send_pio(struct qib_qp *qp, struct qib_ib_header *ibhdr, - piobuf_orig = piobuf; - piobuf += 2; - -+ if (packet) { -+ /* Copy header */ -+ data_orig = packet->data; -+ memcpy(data_orig, hdr, (hdrwords << 2)); -+ data_orig += (hdrwords << 2); -+ } - flush_wc = dd->flags & QIB_PIO_FLUSH_WC; - if (len == 0) { - /* -@@ -1336,10 +1430,19 @@ static int qib_verbs_send_pio(struct qib_qp *qp, struct qib_ib_header *ibhdr, - qib_flush_wc(); - } else - qib_pio_copy(piobuf, addr, dwords); -+ if (packet) { -+ /* Copy data */ -+ memcpy(data_orig, addr, len); -+ data_orig += len; -+ } - goto done; - } -- copy_io(piobuf, ss, len, flush_wc); -+ copy_io(piobuf, ss, len, flush_wc, packet, data_orig); - done: -+ if (packet) { -+ qib_snoop_send_queue_packet(ppd, packet); -+ packet = NULL; -+ } - if (dd->flags & QIB_USE_SPCL_TRIG) { - u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023; - qib_flush_wc(); -@@ -1623,7 +1726,8 @@ static int qib_query_port(struct ib_device *ibdev, u8 port, - props->max_vl_num = qib_num_vls(ppd->vls_supported); - props->init_type_reply = 0; - -- props->max_mtu = qib_ibmtu ? qib_ibmtu : IB_MTU_4096; -+ props->max_mtu = QIB_MODPARAM_GET(ibmtu, dd->unit, ppd->port) ? -+ QIB_MODPARAM_GET(ibmtu, dd->unit, ppd->port) : IB_MTU_4096; - switch (ppd->ibmtu) { - case 4096: - mtu = IB_MTU_4096; --- -1.7.1 - -- 2.41.0