]> git.openfabrics.org - ~emulex/compat-rdma.git/commitdiff
IB/qib: streamline qib xeon-phi patches ofed_3_12_1
authorJubin John <jubin.john@intel.com>
Tue, 25 Nov 2014 18:42:36 +0000 (10:42 -0800)
committerJubin John <jubin.john@intel.com>
Tue, 25 Nov 2014 18:42:36 +0000 (10:42 -0800)
linux-next-cherry-picks/0169-IB-qib-add-missing-braces.patch [new file with mode: 0644]
linux-next-cherry-picks/0170-IB-qib-change-SDMA-progression.patch [new file with mode: 0644]
linux-next-cherry-picks/0171-IB-qib-fix-debugfs-ordering.patch [new file with mode: 0644]
linux-next-cherry-picks/0172-IB-qib-add-missing-serdes-init.patch [new file with mode: 0644]
patches/0038-IB-qib-add-RHEL7-support.patch [new file with mode: 0644]
tech-preview/xeon-phi/0010-Update-qib-for-XEON-PHI-support.patch
tech-preview/xeon-phi/0013-Updates-to-qib-driver.patch [deleted file]
tech-preview/xeon-phi/0014-qib-add-RHEL7-support.patch [deleted file]

diff --git a/linux-next-cherry-picks/0169-IB-qib-add-missing-braces.patch b/linux-next-cherry-picks/0169-IB-qib-add-missing-braces.patch
new file mode 100644 (file)
index 0000000..87d247a
--- /dev/null
@@ -0,0 +1,52 @@
+IB/qib: add missing braces in do_qib_user_sdma_queue_create()
+
+From: Yann Droneaud <ydroneaud@opteya.com>
+
+Commit c804f07248895ff9c moved qib_assign_ctxt() to
+do_qib_user_sdma_queue_create() but dropped the braces
+around the statements.
+
+This was spotted by coccicheck (coccinelle/spatch):
+
+$ make C=2 CHECK=scripts/coccicheck drivers/infiniband/hw/qib/
+
+  CHECK   drivers/infiniband/hw/qib/qib_file_ops.c
+drivers/infiniband/hw/qib/qib_file_ops.c:1583:2-23: code aligned with following code on line 1587
+
+This patch adds braces back.
+
+Link: http://marc.info/?i=cover.1394485254.git.ydroneaud@opteya.com
+Cc: Mike Marciniszyn <mike.marciniszyn@intel.com>
+Cc: infinipath@intel.com
+Cc: Julia Lawall <julia.lawall@lip6.fr>
+Cc: cocci@systeme.lip6.fr
+Cc: stable@vger.kernel.org
+Signed-off-by: Yann Droneaud <ydroneaud@opteya.com>
+Tested-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
+Acked-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/qib/qib_file_ops.c |    3 ++-
+ 1 files changed, 2 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
+index 2037630..c062c60 100644
+--- a/drivers/infiniband/hw/qib/qib_file_ops.c
++++ b/drivers/infiniband/hw/qib/qib_file_ops.c
+@@ -1587,7 +1587,7 @@ static int do_qib_user_sdma_queue_create(struct file *fp)
+       struct qib_ctxtdata *rcd = fd->rcd;
+       struct qib_devdata *dd = rcd->dd;
+-      if (dd->flags & QIB_HAS_SEND_DMA)
++      if (dd->flags & QIB_HAS_SEND_DMA) {
+               fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev,
+                                                   dd->unit,
+@@ -1595,6 +1595,7 @@ static int do_qib_user_sdma_queue_create(struct file *fp)
+                                                   fd->subctxt);
+               if (!fd->pq)
+                       return -ENOMEM;
++      }
+       return 0;
+ }
diff --git a/linux-next-cherry-picks/0170-IB-qib-change-SDMA-progression.patch b/linux-next-cherry-picks/0170-IB-qib-change-SDMA-progression.patch
new file mode 100644 (file)
index 0000000..a11dd66
--- /dev/null
@@ -0,0 +1,241 @@
+IB/qib: Change SDMA progression mode depending on single- or multi-rail
+
+From: CQ Tang <cq.tang@intel.com>
+
+Improve performance by changing the behavour of the driver when all
+SDMA descriptors are in use, and the processes adding new descriptors
+are single- or multi-rail.
+
+For single-rail processes, the driver will block the call and finish
+posting all SDMA descriptors onto the hardware queue before returning
+back to PSM.  Repeated kernel calls are slower than blocking.
+
+For multi-rail processes, the driver will return to PSM as quick as
+possible so PSM can feed packets to other rail.  If all hardware
+queues are full, PSM will buffer the remaining SDMA descriptors until
+notified by interrupt that space is available.
+
+This patch builds a red-black tree to track the number rails opened by
+a particular PID. If the number is more than one, it is a multi-rail
+PSM process, otherwise, it is a single-rail process.
+
+Reviewed-by: Dean Luick <dean.luick@intel.com>
+Reviewed-by: John A Gregor <john.a.gregor@intel.com>
+Reviewed-by: Mitko Haralanov <mitko.haralanov@intel.com>
+Signed-off-by: CQ Tang <cq.tang@intel.com>
+Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/qib/qib_user_sdma.c |  136 ++++++++++++++++++++++++++---
+ 1 files changed, 123 insertions(+), 13 deletions(-)
+
+diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
+index 165aee2..d2806ca 100644
+--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
++++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
+@@ -52,6 +52,17 @@
+ /* attempt to drain the queue for 5secs */
+ #define QIB_USER_SDMA_DRAIN_TIMEOUT 500
++/*
++ * track how many times a process open this driver.
++ */
++static struct rb_root qib_user_sdma_rb_root = RB_ROOT;
++
++struct qib_user_sdma_rb_node {
++      struct rb_node node;
++      int refcount;
++      pid_t pid;
++};
++
+ struct qib_user_sdma_pkt {
+       struct list_head list;  /* list element */
+@@ -120,15 +131,60 @@ struct qib_user_sdma_queue {
+       /* dma page table */
+       struct rb_root dma_pages_root;
++      struct qib_user_sdma_rb_node *sdma_rb_node;
++
+       /* protect everything above... */
+       struct mutex lock;
+ };
++static struct qib_user_sdma_rb_node *
++qib_user_sdma_rb_search(struct rb_root *root, pid_t pid)
++{
++      struct qib_user_sdma_rb_node *sdma_rb_node;
++      struct rb_node *node = root->rb_node;
++
++      while (node) {
++              sdma_rb_node = container_of(node,
++                      struct qib_user_sdma_rb_node, node);
++              if (pid < sdma_rb_node->pid)
++                      node = node->rb_left;
++              else if (pid > sdma_rb_node->pid)
++                      node = node->rb_right;
++              else
++                      return sdma_rb_node;
++      }
++      return NULL;
++}
++
++static int
++qib_user_sdma_rb_insert(struct rb_root *root, struct qib_user_sdma_rb_node *new)
++{
++      struct rb_node **node = &(root->rb_node);
++      struct rb_node *parent = NULL;
++      struct qib_user_sdma_rb_node *got;
++
++      while (*node) {
++              got = container_of(*node, struct qib_user_sdma_rb_node, node);
++              parent = *node;
++              if (new->pid < got->pid)
++                      node = &((*node)->rb_left);
++              else if (new->pid > got->pid)
++                      node = &((*node)->rb_right);
++              else
++                      return 0;
++      }
++
++      rb_link_node(&new->node, parent, node);
++      rb_insert_color(&new->node, root);
++      return 1;
++}
++
+ struct qib_user_sdma_queue *
+ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
+ {
+       struct qib_user_sdma_queue *pq =
+               kmalloc(sizeof(struct qib_user_sdma_queue), GFP_KERNEL);
++      struct qib_user_sdma_rb_node *sdma_rb_node;
+       if (!pq)
+               goto done;
+@@ -138,6 +194,7 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
+       pq->num_pending = 0;
+       pq->num_sending = 0;
+       pq->added = 0;
++      pq->sdma_rb_node = NULL;
+       INIT_LIST_HEAD(&pq->sent);
+       spin_lock_init(&pq->sent_lock);
+@@ -163,8 +220,30 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
+       pq->dma_pages_root = RB_ROOT;
++      sdma_rb_node = qib_user_sdma_rb_search(&qib_user_sdma_rb_root,
++                                      current->pid);
++      if (sdma_rb_node) {
++              sdma_rb_node->refcount++;
++      } else {
++              int ret;
++              sdma_rb_node = kmalloc(sizeof(
++                      struct qib_user_sdma_rb_node), GFP_KERNEL);
++              if (!sdma_rb_node)
++                      goto err_rb;
++
++              sdma_rb_node->refcount = 1;
++              sdma_rb_node->pid = current->pid;
++
++              ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root,
++                                      sdma_rb_node);
++              BUG_ON(ret == 0);
++      }
++      pq->sdma_rb_node = sdma_rb_node;
++
+       goto done;
++err_rb:
++      dma_pool_destroy(pq->header_cache);
+ err_slab:
+       kmem_cache_destroy(pq->pkt_slab);
+ err_kfree:
+@@ -1020,8 +1099,13 @@ void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq)
+       if (!pq)
+               return;
+-      kmem_cache_destroy(pq->pkt_slab);
++      pq->sdma_rb_node->refcount--;
++      if (pq->sdma_rb_node->refcount == 0) {
++              rb_erase(&pq->sdma_rb_node->node, &qib_user_sdma_rb_root);
++              kfree(pq->sdma_rb_node);
++      }
+       dma_pool_destroy(pq->header_cache);
++      kmem_cache_destroy(pq->pkt_slab);
+       kfree(pq);
+ }
+@@ -1241,26 +1325,52 @@ static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
+                                struct qib_user_sdma_queue *pq,
+                                struct list_head *pktlist, int count)
+ {
+-      int ret = 0;
+       unsigned long flags;
+       if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE)))
+               return -ECOMM;
+-      spin_lock_irqsave(&ppd->sdma_lock, flags);
+-
+-      if (unlikely(!__qib_sdma_running(ppd))) {
+-              ret = -ECOMM;
+-              goto unlock;
++      /* non-blocking mode */
++      if (pq->sdma_rb_node->refcount > 1) {
++              spin_lock_irqsave(&ppd->sdma_lock, flags);
++              if (unlikely(!__qib_sdma_running(ppd))) {
++                      spin_unlock_irqrestore(&ppd->sdma_lock, flags);
++                      return -ECOMM;
++              }
++              pq->num_pending += count;
++              list_splice_tail_init(pktlist, &ppd->sdma_userpending);
++              qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
++              spin_unlock_irqrestore(&ppd->sdma_lock, flags);
++              return 0;
+       }
++      /* In this case, descriptors from this process are not
++       * linked to ppd pending queue, interrupt handler
++       * won't update this process, it is OK to directly
++       * modify without sdma lock.
++       */
++
++
+       pq->num_pending += count;
+-      list_splice_tail_init(pktlist, &ppd->sdma_userpending);
+-      qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
++      /*
++       * Blocking mode for single rail process, we must
++       * release/regain sdma_lock to give other process
++       * chance to make progress. This is important for
++       * performance.
++       */
++      do {
++              spin_lock_irqsave(&ppd->sdma_lock, flags);
++              if (unlikely(!__qib_sdma_running(ppd))) {
++                      spin_unlock_irqrestore(&ppd->sdma_lock, flags);
++                      return -ECOMM;
++              }
++              qib_user_sdma_send_desc(ppd, pktlist);
++              if (!list_empty(pktlist))
++                      qib_sdma_make_progress(ppd);
++              spin_unlock_irqrestore(&ppd->sdma_lock, flags);
++      } while (!list_empty(pktlist));
+-unlock:
+-      spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+-      return ret;
++      return 0;
+ }
+ int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
+@@ -1290,7 +1400,7 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
+               qib_user_sdma_queue_clean(ppd, pq);
+       while (dim) {
+-              int mxp = 8;
++              int mxp = 1;
+               int ndesc = 0;
+               ret = qib_user_sdma_queue_pkts(dd, ppd, pq,
diff --git a/linux-next-cherry-picks/0171-IB-qib-fix-debugfs-ordering.patch b/linux-next-cherry-picks/0171-IB-qib-fix-debugfs-ordering.patch
new file mode 100644 (file)
index 0000000..6f0744c
--- /dev/null
@@ -0,0 +1,74 @@
+IB/qib: Fix debugfs ordering issue with multiple HCAs
+
+From: Mike Marciniszyn <mike.marciniszyn@intel.com>
+
+The debugfs init code was incorrectly called before the idr mechanism
+is used to get the unit number, so the dd->unit hasn't been
+initialized.  This caused the unit relative directory creation to fail
+after the first.
+
+This patch moves the init for the debugfs stuff until after all of the
+failures and after the unit number has been determined.
+
+A bug in unwind code in qib_alloc_devdata() is also fixed.
+
+Cc: <stable@vger.kernel.org>
+Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
+Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/qib/qib_init.c |   25 +++++++++++--------------
+ 1 files changed, 11 insertions(+), 14 deletions(-)
+
+diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
+index c1248a6..17e0831 100644
+--- a/drivers/infiniband/hw/qib/qib_init.c
++++ b/drivers/infiniband/hw/qib/qib_init.c
+@@ -1097,14 +1097,10 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
+       int ret;
+       dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra);
+-      if (!dd) {
+-              dd = ERR_PTR(-ENOMEM);
+-              goto bail;
+-      }
++      if (!dd)
++              return ERR_PTR(-ENOMEM);
+-#ifdef CONFIG_DEBUG_FS
+-      qib_dbg_ibdev_init(&dd->verbs_dev);
+-#endif
++      INIT_LIST_HEAD(&dd->list);
+       idr_preload(GFP_KERNEL);
+       spin_lock_irqsave(&qib_devs_lock, flags);
+@@ -1121,11 +1117,6 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
+       if (ret < 0) {
+               qib_early_err(&pdev->dev,
+                             "Could not allocate unit ID: error %d\n", -ret);
+-#ifdef CONFIG_DEBUG_FS
+-              qib_dbg_ibdev_exit(&dd->verbs_dev);
+-#endif
+-              ib_dealloc_device(&dd->verbs_dev.ibdev);
+-              dd = ERR_PTR(ret);
+               goto bail;
+       }
+@@ -1139,9 +1130,15 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
+                       qib_early_err(&pdev->dev,
+                               "Could not alloc cpulist info, cpu affinity might be wrong\n");
+       }
+-
+-bail:
++#ifdef CONFIG_DEBUG_FS
++      qib_dbg_ibdev_init(&dd->verbs_dev);
++#endif
+       return dd;
++bail:
++      if (!list_empty(&dd->list))
++              list_del_init(&dd->list);
++      ib_dealloc_device(&dd->verbs_dev.ibdev);
++      return ERR_PTR(ret);
+ }
+ /*
diff --git a/linux-next-cherry-picks/0172-IB-qib-add-missing-serdes-init.patch b/linux-next-cherry-picks/0172-IB-qib-add-missing-serdes-init.patch
new file mode 100644 (file)
index 0000000..1d43891
--- /dev/null
@@ -0,0 +1,34 @@
+IB/qib: Add missing serdes init sequence
+
+From: Mike Marciniszyn <mike.marciniszyn@intel.com>
+
+Research has shown that commit a77fcf895046 ("IB/qib: Use a single
+txselect module parameter for serdes tuning") missed a key serdes init
+sequence.
+
+This patch adds that sequence.
+
+Cc: <stable@vger.kernel.org>
+Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
+Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
+Signed-off-by: Roland Dreier <roland@purestorage.com>
+---
+ drivers/infiniband/hw/qib/qib_iba7322.c |    5 +++++
+ 1 files changed, 5 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
+index 016e742..9b642d4 100644
+--- a/drivers/infiniband/hw/qib/qib_iba7322.c
++++ b/drivers/infiniband/hw/qib/qib_iba7322.c
+@@ -2395,6 +2395,11 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd)
+       qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a);
+       qib_write_kreg(dd, kr_scratch, 0ULL);
++      /* ensure previous Tx parameters are not still forced */
++      qib_write_kreg_port(ppd, krp_tx_deemph_override,
++              SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0,
++              reset_tx_deemphasis_override));
++
+       if (qib_compat_ddr_negotiate) {
+               ppd->cpspec->ibdeltainprog = 1;
+               ppd->cpspec->ibsymsnap = read_7322_creg32_port(ppd,
diff --git a/patches/0038-IB-qib-add-RHEL7-support.patch b/patches/0038-IB-qib-add-RHEL7-support.patch
new file mode 100644 (file)
index 0000000..9470c66
--- /dev/null
@@ -0,0 +1,122 @@
+IB/qib: add RHEL7 support
+
+From: Jubin John <jubin.john@intel.com>
+
+Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
+Signed-off-by: Jubin John <jubin.john@intel.com>
+---
+ drivers/infiniband/hw/qib/qib_file_ops.c |    9 +++++++++
+ drivers/infiniband/hw/qib/qib_fs.c       |    5 +++++
+ drivers/infiniband/hw/qib/qib_init.c     |   23 +++++++++++++++++++++--
+ 3 files changed, 35 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
+index 275f247..2037630 100644
+--- a/drivers/infiniband/hw/qib/qib_file_ops.c
++++ b/drivers/infiniband/hw/qib/qib_file_ops.c
+@@ -39,11 +39,16 @@
+ #include <linux/vmalloc.h>
+ #include <linux/highmem.h>
+ #include <linux/io.h>
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)
+ #include <linux/aio.h>
++#else
++#include <linux/uio.h>
++#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0) */
+ #include <linux/jiffies.h>
+ #include <asm/pgtable.h>
+ #include <linux/delay.h>
+ #include <linux/export.h>
++#include <linux/moduleparam.h>
+ #include "qib.h"
+ #include "qib_common.h"
+@@ -971,7 +976,11 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
+       vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
+       vma->vm_ops = &qib_file_vm_ops;
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+       vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
++#else
++      vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
++#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */
+       ret = 1;
+ bail:
+diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
+index f247fc6..cbe6e3c 100644
+--- a/drivers/infiniband/hw/qib/qib_fs.c
++++ b/drivers/infiniband/hw/qib/qib_fs.c
+@@ -61,8 +61,13 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry,
+       inode->i_ino = get_next_ino();
+       inode->i_mode = mode;
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+       inode->i_uid = GLOBAL_ROOT_UID;
+       inode->i_gid = GLOBAL_ROOT_GID;
++#else
++      inode->i_uid = 0;
++      inode->i_gid = 0;
++#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */
+       inode->i_blocks = 0;
+       inode->i_atime = CURRENT_TIME;
+       inode->i_mtime = inode->i_atime;
+diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
+index 24e802f..c1248a6 100644
+--- a/drivers/infiniband/hw/qib/qib_init.c
++++ b/drivers/infiniband/hw/qib/qib_init.c
+@@ -1177,9 +1177,15 @@ void qib_disable_after_error(struct qib_devdata *dd)
+       if (dd->devstatusp)
+               *dd->devstatusp |= QIB_STATUS_HWERROR;
+ }
+-
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+ static void qib_remove_one(struct pci_dev *);
+-static int qib_init_one(struct pci_dev *, const struct pci_device_id *);
++static int qib_init_one(struct pci_dev *,
++                                const struct pci_device_id *);
++#else
++static void __devexit qib_remove_one(struct pci_dev *);
++static int __devinit qib_init_one(struct pci_dev *,
++                                const struct pci_device_id *);
++#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
+ #define DRIVER_LOAD_MSG "Intel " QIB_DRV_NAME " loaded: "
+ #define PFX QIB_DRV_NAME ": "
+@@ -1196,7 +1202,11 @@ MODULE_DEVICE_TABLE(pci, qib_pci_tbl);
+ static struct pci_driver qib_driver = {
+       .name = QIB_DRV_NAME,
+       .probe = qib_init_one,
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+       .remove = qib_remove_one,
++#else
++      .remove = __devexit_p(qib_remove_one),
++#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
+       .id_table = qib_pci_tbl,
+       .err_handler = &qib_pci_err_handler,
+ };
+@@ -1417,7 +1427,12 @@ static void qib_postinit_cleanup(struct qib_devdata *dd)
+       qib_free_devdata(dd);
+ }
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+ static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
++#else
++static int __devinit qib_init_one(struct pci_dev *pdev,
++                                const struct pci_device_id *ent)
++#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
+ {
+       int ret, j, pidx, initfail;
+       struct qib_devdata *dd = NULL;
+@@ -1522,7 +1537,11 @@ bail:
+       return ret;
+ }
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+ static void qib_remove_one(struct pci_dev *pdev)
++#else
++static void __devexit qib_remove_one(struct pci_dev *pdev)
++#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
+ {
+       struct qib_devdata *dd = pci_get_drvdata(pdev);
+       int ret;
index 2cc8740337a802cc6164aa3704c1a734c74324ab..ca875cb19f980690ff09e04164583e04fafc1e11 100644 (file)
@@ -1,23 +1,24 @@
-From 0ed3bd45f3b358e5f32ff4e6e87b94fd80da69b5 Mon Sep 17 00:00:00 2001
-From: Phil Cayton <phil.cayton@intel.com>
-Date: Thu, 6 Feb 2014 13:45:33 -0800
-Subject: [PATCH 10/12] Update qib for XEON PHI support
+IB/qib: Update qib for XEON PHI support
 
+From: Jubin John <jubin.john@intel.com>
+
+Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
+Signed-off-by: Jubin John <jubin.john@intel.com>
 ---
- drivers/infiniband/hw/qib/Makefile         |   5 +
- drivers/infiniband/hw/qib/qib.h            |  41 +-
- drivers/infiniband/hw/qib/qib_common.h     |   8 +-
- drivers/infiniband/hw/qib/qib_file_ops.c   | 369 +++++++++++-
- drivers/infiniband/hw/qib/qib_init.c       |  61 +-
- drivers/infiniband/hw/qib/qib_knx.c        | 923 +++++++++++++++++++++++++++++
- drivers/infiniband/hw/qib/qib_knx.h        |  63 ++
- drivers/infiniband/hw/qib/qib_knx_sdma.h   | 105 ++++
- drivers/infiniband/hw/qib/qib_knx_tidrcv.h |  48 ++
- 9 files changed, 1596 insertions(+), 27 deletions(-)
+ drivers/infiniband/hw/qib/Makefile         |    5 
+ drivers/infiniband/hw/qib/qib.h            |   19 
+ drivers/infiniband/hw/qib/qib_common.h     |    7 
+ drivers/infiniband/hw/qib/qib_file_ops.c   |  334 +++++-
+ drivers/infiniband/hw/qib/qib_init.c       |   16 
+ drivers/infiniband/hw/qib/qib_knx.c        | 1532 ++++++++++++++++++++++++++++
+ drivers/infiniband/hw/qib/qib_knx.h        |   74 +
+ drivers/infiniband/hw/qib/qib_knx_common.h |  126 ++
+ drivers/infiniband/hw/qib/qib_user_sdma.c  |  173 +--
+ drivers/infiniband/hw/qib/qib_user_sdma.h  |  106 ++
+ 10 files changed, 2241 insertions(+), 151 deletions(-)
  create mode 100644 drivers/infiniband/hw/qib/qib_knx.c
  create mode 100644 drivers/infiniband/hw/qib/qib_knx.h
- create mode 100644 drivers/infiniband/hw/qib/qib_knx_sdma.h
- create mode 100644 drivers/infiniband/hw/qib/qib_knx_tidrcv.h
+ create mode 100644 drivers/infiniband/hw/qib/qib_knx_common.h
 
 diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile
 index 57f8103..ba2a49d 100644
@@ -33,61 +34,33 @@ index 57f8103..ba2a49d 100644
 +ccflags-y += -DQIB_CONFIG_KNX
 +endif
 diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
-index 1946101..ad87abd 100644
+index 1946101..85c078e 100644
 --- a/drivers/infiniband/hw/qib/qib.h
 +++ b/drivers/infiniband/hw/qib/qib.h
-@@ -112,7 +112,20 @@ struct qib_eep_log_mask {
- };
- /*
-- * Below contains all data related to a single context (formerly called port).
-+ * Indicates to the driver that the loadable parameter could be
-+ * configured by it as it was not configured by the user.
-+ */
-+#define QIB_DRIVER_AUTO_CONFIGURATION 10
-+
-+#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
-+#define qib_configure_numa(a) \
-+      (a.x86_vendor == X86_VENDOR_INTEL && a.x86 == 6 && a.x86_model == 45)
-+#else
-+#define qib_configure_numa(a) 0
+@@ -234,6 +234,10 @@ struct qib_ctxtdata {
+       u32 lookaside_qpn;
+       /* QPs waiting for context processing */
+       struct list_head qp_wait_list;
++#ifdef QIB_CONFIG_KNX
++      /* KNX Receive Context Data */
++      struct qib_knx_ctxt *krcd;
 +#endif
-+
-+/*
-+  * Below contains all data related to a single context (formerly called port).
-  */
  #ifdef CONFIG_DEBUG_FS
-@@ -739,6 +752,12 @@ struct qib_devdata {
-       /* mem-mapped pointer to base of chip regs */
-       u64 __iomem *kregbase;
-+
-+      /* mem-mapped base of chip regs plus offset of the SendBufAvail0
-+       * register
-+       */
-+      u64 sendbufavail0;
-+
-       /* end of mem-mapped chip space excluding sendbuf and user regs */
-       u64 __iomem *kregend;
-       /* physical address of chip for io_remap, etc. */
-@@ -1103,7 +1122,15 @@ struct qib_devdata {
-       /* per device cq worker */
+       /* verbs stats per CTX */
+       struct qib_opcode_stats_perctx *opstats;
+@@ -1104,6 +1108,11 @@ struct qib_devdata {
        struct kthread_worker *worker;
  
-+      int local_node_id; /* NUMA node closest to HCA */
        int assigned_node_id; /* NUMA node closest to HCA */
 +
 +#ifdef QIB_CONFIG_KNX
-+      /* peer node id of connected KNX node */
-+      u16 node_id;
-+      struct qib_knx *knx;
++      /* number of KNx nodes using this device */
++      u16 num_knx;
 +#endif
-+
  };
  
  /* hol_state values */
-@@ -1132,6 +1159,9 @@ struct qib_filedata {
+@@ -1132,6 +1141,9 @@ struct qib_filedata {
        unsigned tidcursor;
        struct qib_user_sdma_queue *pq;
        int rec_cpu_num; /* for cpu affinity; -1 if none */
@@ -97,7 +70,7 @@ index 1946101..ad87abd 100644
  };
  
  extern struct list_head qib_dev_list;
-@@ -1209,6 +1239,13 @@ int qib_set_uevent_bits(struct qib_pportdata *, const int);
+@@ -1209,6 +1221,13 @@ int qib_set_uevent_bits(struct qib_pportdata *, const int);
        (((struct qib_filedata *)(fp)->private_data)->tidcursor)
  #define user_sdma_queue_fp(fp) \
        (((struct qib_filedata *)(fp)->private_data)->pq)
@@ -111,17 +84,8 @@ index 1946101..ad87abd 100644
  
  static inline struct qib_devdata *dd_from_ppd(struct qib_pportdata *ppd)
  {
-@@ -1476,6 +1513,8 @@ extern unsigned qib_n_krcv_queues;
- extern unsigned qib_sdma_fetch_arb;
- extern unsigned qib_compat_ddr_negotiate;
- extern int qib_special_trigger;
-+extern unsigned qib_pio_avail_bits;
-+extern unsigned qib_rcvhdrpoll;
- extern unsigned qib_numa_aware;
- extern struct mutex qib_mutex;
 diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h
-index 5670ace..9182d02 100644
+index 5670ace..39eef25 100644
 --- a/drivers/infiniband/hw/qib/qib_common.h
 +++ b/drivers/infiniband/hw/qib/qib_common.h
 @@ -1,4 +1,5 @@
@@ -144,62 +108,29 @@ index 5670ace..9182d02 100644
  
        /* size of struct base_info to write to */
        __u32 spu_base_info_size;
-@@ -360,7 +365,6 @@ struct qib_user_info {
-        * address of struct base_info to write to
-        */
-       __u64 spu_base_info;
--
- } __attribute__ ((aligned(8)));
- /* User commands. */
 diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
-index 275f247..6eebad0 100644
+index c062c60..93ca1f9 100644
 --- a/drivers/infiniband/hw/qib/qib_file_ops.c
 +++ b/drivers/infiniband/hw/qib/qib_file_ops.c
-@@ -48,6 +48,42 @@
+@@ -53,6 +53,7 @@
  #include "qib.h"
  #include "qib_common.h"
  #include "qib_user_sdma.h"
-+#ifdef QIB_CONFIG_KNX
 +#include "qib_knx.h"
-+#endif
-+
-+/*
-+ * Option for a user application to read from the SendBufAvailn registers
-+ * for the send buffer status as a memory IO operation or from main memory.
-+ * The default mode of operation is to have the user process read this
-+ * register from mapped memory when running on the local socket and have
-+ * it read from the register directly (memory IO) when running on the far
-+ * socket. For older applications, ie.., with QIB_USER_SWMINOR less than
-+ * 12, all processes will read the register from main memory.
-+ */
-+unsigned qib_pio_avail_bits = 1;
-+module_param_named(pio_avail_bits, qib_pio_avail_bits, uint, S_IRUGO);
-+MODULE_PARM_DESC(pio_avail_bits, "send buffer status read: "
-+      "0=memory read on local NUMA node & MMIO read on far nodes, "
-+      "1=memory read(default), 2=MMIO read, "
-+      "10=option 1 for AMD & <= Intel Westmere cpus and option 0 for newer cpus");
-+
-+/*
-+ * Option for a user application to read from the RcvHdrTailn registers
-+ * for the next empty receive header queue entry as a memory IO operation
-+ * or from main memory. The default mode of operation is to have the user
-+ * process read this register from mapped memory when running on the local
-+ * socket and have it read from the register directly (memory IO) when
-+ * running on the far socket. For older applications, ie.., with
-+ * QIB_USER_SWMINOR less than 12, all user processes will read the
-+ * register from main memory.
-+ */
-+unsigned qib_rcvhdrpoll = 1;
-+module_param_named(rcvhdrpoll, qib_rcvhdrpoll, uint, S_IRUGO);
-+MODULE_PARM_DESC(rcvhdrpoll, "receive buffer status read: "
-+      "0=memory read on local NUMA node & MMIO read on far nodes, "
-+      "1=memory read(default), 2=MMIO read, "
-+      "10=option 1 for AMD & <= Intel Westmere cpus and option 0 for newer cpus");
  
  #undef pr_fmt
  #define pr_fmt(fmt) QIB_DRV_NAME ": " fmt
-@@ -89,6 +125,73 @@ static u64 cvt_kvaddr(void *p)
+@@ -64,6 +65,9 @@ static ssize_t qib_aio_write(struct kiocb *, const struct iovec *,
+                            unsigned long, loff_t);
+ static unsigned int qib_poll(struct file *, struct poll_table_struct *);
+ static int qib_mmapf(struct file *, struct vm_area_struct *);
++static int subctxt_search_ctxts(struct qib_devdata *, struct file *,
++                              const struct qib_user_info *);
++
+ static const struct file_operations qib_file_ops = {
+       .owner = THIS_MODULE,
+@@ -94,6 +98,64 @@ static u64 cvt_kvaddr(void *p)
        return paddr;
  }
  
@@ -235,15 +166,6 @@ index 275f247..6eebad0 100644
 +      if (ret < 0)
 +              goto bail_free;
 +
-+      switch (qib_rcvhdrpoll) {
-+      case 0:
-+              if (local_node)
-+                      break;
-+      case 2:
-+              kinfo->spi_runtime_flags &= ~QIB_RUNTIME_NODMA_RTAIL;
-+              break;
-+      }
-+
 +      if (rcd->subctxt_cnt && !subctxt_fp(fp))
 +              kinfo->spi_runtime_flags |= QIB_RUNTIME_MASTER;
 +
@@ -273,19 +195,10 @@ index 275f247..6eebad0 100644
  static int qib_get_base_info(struct file *fp, void __user *ubase,
                             size_t ubase_size)
  {
-@@ -100,6 +203,7 @@ static int qib_get_base_info(struct file *fp, void __user *ubase,
-       unsigned subctxt_cnt;
-       int shared, master;
-       size_t sz;
-+      int local_node = (numa_node_id() == pcibus_to_node(dd->pcidev->bus));
-       subctxt_cnt = rcd->subctxt_cnt;
-       if (!subctxt_cnt) {
-@@ -176,15 +280,91 @@ static int qib_get_base_info(struct file *fp, void __user *ubase,
-        * both can be enabled and used.
+@@ -182,14 +244,43 @@ static int qib_get_base_info(struct file *fp, void __user *ubase,
         */
        kinfo->spi_rcvhdr_base = (u64) rcd->rcvhdrq_phys;
--      kinfo->spi_rcvhdr_tailaddr = (u64) rcd->rcvhdrqtailaddr_phys;
+       kinfo->spi_rcvhdr_tailaddr = (u64) rcd->rcvhdrqtailaddr_phys;
 +      /*
 +       * In the case of KNX, qib_do_user_init() would call into the
 +       * KNX-specific memory allocation/registration functions. These
@@ -300,64 +213,16 @@ index 275f247..6eebad0 100644
 +      if (knx_node_fp(fp))
 +              kinfo->spi_runtime_flags =
 +                      qib_knx_ctxt_info(rcd, QIB_KNX_CTXTINFO_FLAGS, fp);
-+      else {
-+              switch (qib_rcvhdrpoll) {
-+              case 0:
-+                      if (local_node)
-+                              kinfo->spi_rcvhdr_tailaddr =
-+                                      (u64) rcd->rcvhdrqtailaddr_phys;
-+                      else {
-+                              kinfo->spi_rcvhdr_tailaddr =
-+                                      (u64) (kinfo->spi_uregbase +
-+                                             ur_rcvhdrtail);
-+                              kinfo->spi_runtime_flags &=
-+                                      ~QIB_RUNTIME_NODMA_RTAIL;
-+                      }
-+                      break;
-+              case 1:
-+                      kinfo->spi_rcvhdr_tailaddr =
-+                              (u64) rcd->rcvhdrqtailaddr_phys;
-+                      break;
-+              case 2:
-+                      kinfo->spi_rcvhdr_tailaddr =
-+                              (u64) (kinfo->spi_uregbase + ur_rcvhdrtail);
-+                      kinfo->spi_runtime_flags &= ~QIB_RUNTIME_NODMA_RTAIL;
-+                      break;
-+              default:
-+                      ret = -EINVAL;
-+                      break;
-+              }
-+      }
-+
        kinfo->spi_rhf_offset = dd->rhf_offset;
        kinfo->spi_rcv_egrbufs = (u64) rcd->rcvegr_phys;
 -      kinfo->spi_pioavailaddr = (u64) dd->pioavailregs_phys;
 +
 +      /* see comment for spi_uregbase above */
-+      if (knx_node_fp(fp)) {
++      if (knx_node_fp(fp))
 +              kinfo->spi_pioavailaddr =
 +                      qib_knx_ctxt_info(rcd, QIB_KNX_CTXTINFO_PIOAVAIL, fp);
-+      } else {
-+              switch (qib_pio_avail_bits) {
-+              case 0:
-+                      kinfo->spi_pioavailaddr = local_node ?
-+                              (u64)dd->pioavailregs_phys :
-+                      (u64)dd->sendbufavail0;
-+                      break;
-+              case 1:
-+                      kinfo->spi_pioavailaddr = (u64)dd->pioavailregs_phys;
-+                      break;
-+              case 2:
-+                      kinfo->spi_pioavailaddr = (u64)dd->sendbufavail0;
-+                      break;
-+              default:
-+                      ret = -EINVAL;
-+                      break;
-+              }
-+      }
-+
-+      if (ret < 0)
-+              goto bail;
++      else
++              kinfo->spi_pioavailaddr = (u64) dd->pioavailregs_phys;
 +
        /* setup per-unit (not port) status area for user programs */
 -      kinfo->spi_status = (u64) kinfo->spi_pioavailaddr +
@@ -379,7 +244,7 @@ index 275f247..6eebad0 100644
        if (!shared) {
                kinfo->spi_piocnt = rcd->piocnt;
                kinfo->spi_piobufbase = (u64) rcd->piobufs;
-@@ -204,7 +384,11 @@ static int qib_get_base_info(struct file *fp, void __user *ubase,
+@@ -209,7 +300,11 @@ static int qib_get_base_info(struct file *fp, void __user *ubase,
                        dd->palign * kinfo->spi_piocnt * slave;
        }
  
@@ -392,7 +257,7 @@ index 275f247..6eebad0 100644
                kinfo->spi_sendbuf_status =
                        cvt_kvaddr(&rcd->user_event_mask[subctxt_fp(fp)]);
                /* only spi_subctxt_* fields should be set in this block! */
-@@ -225,6 +409,11 @@ static int qib_get_base_info(struct file *fp, void __user *ubase,
+@@ -230,6 +325,11 @@ static int qib_get_base_info(struct file *fp, void __user *ubase,
        kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->pio2k_bufbase) /
                dd->palign;
        kinfo->spi_pioalign = dd->palign;
@@ -404,65 +269,7 @@ index 275f247..6eebad0 100644
        kinfo->spi_qpair = QIB_KD_QP;
        /*
         * user mode PIO buffers are always 2KB, even when 4KB can
-@@ -978,6 +1167,35 @@ bail:
-       return ret;
- }
-+static int mmap_sendbufavail(struct vm_area_struct *vma, struct qib_devdata *dd,
-+                   u64 ureg)
-+{
-+      unsigned long phys;
-+      unsigned long sz;
-+      int ret;
-+
-+      /*
-+       * This is real hardware, so use io_remap.  This is the mechanism
-+       * for the user process to update the head registers for their ctxt
-+       * in the chip.
-+       */
-+      sz = PAGE_SIZE;
-+      if ((vma->vm_end - vma->vm_start) > sz)
-+              ret = -EFAULT;
-+      else {
-+              phys = dd->physaddr + ureg;
-+              vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-+
-+              vma->vm_flags &= ~VM_MAYWRITE;
-+              vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_READ;
-+
-+              ret = io_remap_pfn_range(vma, vma->vm_start,
-+                                       phys >> PAGE_SHIFT,
-+                                       vma->vm_end - vma->vm_start,
-+                                       vma->vm_page_prot);
-+      }
-+      return ret;
-+}
- /**
-  * qib_mmapf - mmap various structures into user space
-  * @fp: the file pointer
-@@ -1056,6 +1274,8 @@ static int qib_mmapf(struct file *fp, struct vm_area_struct *vma)
-       if (pgaddr == ureg)
-               ret = mmap_ureg(vma, dd, ureg);
-+      else if (pgaddr == dd->sendbufavail0)
-+              ret = mmap_sendbufavail(vma, dd, pgaddr - (u64)dd->kregbase);
-       else if (pgaddr == piobufs)
-               ret = mmap_piobufs(vma, dd, rcd, piobufs, piocnt);
-       else if (pgaddr == dd->pioavailregs_phys)
-@@ -1187,11 +1407,7 @@ static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd)
-               int cpu;
-               cpu = find_first_zero_bit(qib_cpulist,
-                                         qib_cpulist_count);
--              if (cpu == qib_cpulist_count)
--                      qib_dev_err(dd,
--                      "no cpus avail for affinity PID %u\n",
--                      current->pid);
--              else {
-+              if (cpu != qib_cpulist_count) {
-                       __set_bit(cpu, qib_cpulist);
-                       fd->rec_cpu_num = cpu;
-               }
-@@ -1261,6 +1477,17 @@ static int init_subctxts(struct qib_devdata *dd,
+@@ -1270,6 +1370,17 @@ static int init_subctxts(struct qib_devdata *dd,
                goto bail;
        }
  
@@ -480,7 +287,7 @@ index 275f247..6eebad0 100644
        rcd->subctxt_uregbase = vmalloc_user(PAGE_SIZE * num_subctxts);
        if (!rcd->subctxt_uregbase) {
                ret = -ENOMEM;
-@@ -1283,6 +1510,9 @@ static int init_subctxts(struct qib_devdata *dd,
+@@ -1292,6 +1403,9 @@ static int init_subctxts(struct qib_devdata *dd,
                goto bail_rhdr;
        }
  
@@ -490,36 +297,131 @@ index 275f247..6eebad0 100644
        rcd->subctxt_cnt = uinfo->spu_subctxt_cnt;
        rcd->subctxt_id = uinfo->spu_subctxt_id;
        rcd->active_slaves = 1;
-@@ -1333,6 +1563,7 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
+@@ -1326,6 +1440,14 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
+       rcd = qib_create_ctxtdata(ppd, ctxt, numa_id);
++#ifdef QIB_CONFIG_KNX
++      if (uinfo->spu_knx_node_id)
++              /*
++               * Skip allocation of page pointer list for TID
++               * receives. This will be done on the KNX.
++               */
++              goto no_page_list;
++#endif
+       /*
+        * Allocate memory for use in qib_tid_update() at open to
+        * reduce cost of expected send setup per message segment
+@@ -1341,7 +1463,11 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
+               ret = -ENOMEM;
                goto bailerr;
        }
++#ifdef QIB_CONFIG_KNX
++no_page_list:
++#endif
        rcd->userversion = uinfo->spu_userversion;
 +
        ret = init_subctxts(dd, rcd, uinfo);
        if (ret)
                goto bailerr;
-@@ -1496,7 +1727,16 @@ static int find_shared_ctxt(struct file *fp,
+@@ -1498,43 +1624,68 @@ done:
+ static int find_shared_ctxt(struct file *fp,
+                           const struct qib_user_info *uinfo)
+ {
+-      int devmax, ndev, i;
++      int devmax, ndev;
+       int ret = 0;
++      struct qib_devdata *dd;
  
-       for (ndev = 0; ndev < devmax; ndev++) {
-               struct qib_devdata *dd = qib_lookup(ndev);
--
 +#ifdef QIB_CONFIG_KNX
-+              /*
-+               * In the case we are allocating a context for a KNX process,
-+               * reject any device that is not associated with the
-+               * requesting KNX.
-+               */
-+              if ((uinfo->spu_knx_node_id &&
-+                   dd->node_id != uinfo->spu_knx_node_id))
-+                      continue;
++      /*
++       * In the case we are allocating a context for a KNX process,
++       * Don't loop over all devices but use the one assosiated with the
++       * requesting KNX.
++       */
++      if (uinfo->spu_knx_node_id) {
++              dd = qib_knx_node_to_dd(uinfo->spu_knx_node_id);
++              if (dd && dd->num_knx)
++                      ret = subctxt_search_ctxts(dd, fp, uinfo);
++              goto done;
++      }
 +#endif
+       devmax = qib_count_units(NULL, NULL);
+       for (ndev = 0; ndev < devmax; ndev++) {
+-              struct qib_devdata *dd = qib_lookup(ndev);
+-
++              dd = qib_lookup(ndev);
                /* device portion of usable() */
                if (!(dd && (dd->flags & QIB_PRESENT) && dd->kregbase))
                        continue;
-@@ -1617,6 +1857,14 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
+-              for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) {
+-                      struct qib_ctxtdata *rcd = dd->rcd[i];
++              ret = subctxt_search_ctxts(dd, fp, uinfo);
++              if (ret)
++                      break;
++      }
++#ifdef QIB_CONFIG_KNX
++done:
++#endif
++      return ret;
++}
+-                      /* Skip ctxts which are not yet open */
+-                      if (!rcd || !rcd->cnt)
+-                              continue;
+-                      /* Skip ctxt if it doesn't match the requested one */
+-                      if (rcd->subctxt_id != uinfo->spu_subctxt_id)
+-                              continue;
+-                      /* Verify the sharing process matches the master */
+-                      if (rcd->subctxt_cnt != uinfo->spu_subctxt_cnt ||
+-                          rcd->userversion != uinfo->spu_userversion ||
+-                          rcd->cnt >= rcd->subctxt_cnt) {
+-                              ret = -EINVAL;
+-                              goto done;
+-                      }
+-                      ctxt_fp(fp) = rcd;
+-                      subctxt_fp(fp) = rcd->cnt++;
+-                      rcd->subpid[subctxt_fp(fp)] = current->pid;
+-                      tidcursor_fp(fp) = 0;
+-                      rcd->active_slaves |= 1 << subctxt_fp(fp);
+-                      ret = 1;
++static int subctxt_search_ctxts(struct qib_devdata *dd, struct file *fp,
++                              const struct qib_user_info *uinfo)
++{
++      int ret = 0, i;
++      for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) {
++              struct qib_ctxtdata *rcd = dd->rcd[i];
++
++              /* Skip ctxts which are not yet open */
++              if (!rcd || !rcd->cnt)
++                      continue;
++              /* Skip ctxt if it doesn't match the requested one */
++              if (rcd->subctxt_id != uinfo->spu_subctxt_id)
++                      continue;
++              /* Verify the sharing process matches the master */
++              if (rcd->subctxt_cnt != uinfo->spu_subctxt_cnt ||
++                  rcd->userversion != uinfo->spu_userversion ||
++                  rcd->cnt >= rcd->subctxt_cnt) {
++                      ret = -EINVAL;
+                       goto done;
+               }
++              ctxt_fp(fp) = rcd;
++              subctxt_fp(fp) = rcd->cnt++;
++              rcd->subpid[subctxt_fp(fp)] = current->pid;
++              tidcursor_fp(fp) = 0;
++              rcd->active_slaves |= 1 << subctxt_fp(fp);
++              ret = 1;
++              break;
+       }
+-
+ done:
+       return ret;
+ }
+@@ -1626,6 +1777,13 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
        if (swminor >= 11 && uinfo->spu_port_alg < QIB_PORT_ALG_COUNT)
                alg = uinfo->spu_port_alg;
 +#ifdef QIB_CONFIG_KNX
 +      /* Make sure we have a connection to the KNX module on the right node */
 +      if (uinfo->spu_knx_node_id && !qib_knx_get(uinfo->spu_knx_node_id)) {
@@ -527,38 +429,73 @@ index 275f247..6eebad0 100644
 +              goto done;
 +      }
 +#endif
-+
        mutex_lock(&qib_mutex);
  
-       if (qib_compatible_subctxts(swmajor, swminor) &&
-@@ -1638,6 +1886,24 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
+@@ -1633,13 +1791,38 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
+           uinfo->spu_subctxt_cnt) {
+               ret = find_shared_ctxt(fp, uinfo);
+               if (ret > 0) {
+-                      ret = do_qib_user_sdma_queue_create(fp);
++#ifdef QIB_CONFIG_KNX
++                      if (uinfo->spu_knx_node_id) {
++                              ret = qib_knx_sdma_queue_create(fp);
++                      } else
++#endif
++                              ret = do_qib_user_sdma_queue_create(fp);
+                       if (!ret)
+                               assign_ctxt_affinity(fp, (ctxt_fp(fp))->dd);
+                       goto done_ok;
+               }
+       }
++#ifdef QIB_CONFIG_KNX
++      /*
++       * If there is a KNX node set, we pick the device that is
++       * associate with that KNX node
++       */
++      if (uinfo->spu_knx_node_id) {
++              struct qib_devdata *dd =
++                      qib_knx_node_to_dd(uinfo->spu_knx_node_id);
++              if (dd) {
++                      ret = find_free_ctxt(dd->unit, fp, uinfo);
++                      if (!ret)
++                              ret = qib_knx_alloc_ctxt(
++                                      uinfo->spu_knx_node_id,
++                                      ctxt_fp(fp)->ctxt);
++              } else
++                      ret = -ENXIO;
++              goto done_chk_sdma;
++      }
++
++#endif
+       i_minor = iminor(file_inode(fp)) - QIB_USER_MINOR_BASE;
+       if (i_minor)
+               ret = find_free_ctxt(i_minor - 1, fp, uinfo);
+@@ -1648,7 +1831,6 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
                const unsigned int cpu = cpumask_first(&current->cpus_allowed);
                const unsigned int weight =
                        cpumask_weight(&current->cpus_allowed);
+-
+               if (weight == 1 && !test_bit(cpu, qib_cpulist))
+                       if (!find_hca(cpu, &unit) && unit >= 0)
+                               if (!find_free_ctxt(unit, fp, uinfo)) {
+@@ -1659,9 +1841,21 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
+       }
+ done_chk_sdma:
+-      if (!ret)
++      if (!ret) {
 +#ifdef QIB_CONFIG_KNX
-+              /*
-+               * If there is a KNX node set, we pick the device that is on
-+               * the same NUMA node as the KNX.
-+               */
 +              if (uinfo->spu_knx_node_id) {
-+                      struct qib_devdata *dd =
-+                              qib_knx_node_to_dd(uinfo->spu_knx_node_id);
-+                      if (dd) {
-+                              ret = find_free_ctxt(dd->unit, fp, uinfo);
-+                              if (!ret)
-+                                      ret = qib_knx_alloc_ctxt(dd,
-+                                                      ctxt_fp(fp)->ctxt);
-+                      } else
-+                              ret = -ENXIO;
-+                      goto done_chk_sdma;
++                      ret = qib_knx_sdma_queue_create(fp);
++                      /*if (!ret)
++                        ret = qib_knx_setup_tidrcv(fp);*/
++                      goto done_ok;
 +              }
 +#endif
-               if (weight == 1 && !test_bit(cpu, qib_cpulist))
-                       if (!find_hca(cpu, &unit) && unit >= 0)
-@@ -1652,6 +1918,9 @@ done_chk_sdma:
-       if (!ret)
                ret = do_qib_user_sdma_queue_create(fp);
++      }
  done_ok:
 +#ifdef QIB_CONFIG_KNX
 +      knx_node_fp(fp) = uinfo->spu_knx_node_id;
@@ -566,7 +503,7 @@ index 275f247..6eebad0 100644
        mutex_unlock(&qib_mutex);
  
  done:
-@@ -1666,11 +1935,25 @@ static int qib_do_user_init(struct file *fp,
+@@ -1676,11 +1870,25 @@ static int qib_do_user_init(struct file *fp,
        struct qib_ctxtdata *rcd = ctxt_fp(fp);
        struct qib_devdata *dd;
        unsigned uctxt;
@@ -592,7 +529,7 @@ index 275f247..6eebad0 100644
                goto bail;
        }
  
-@@ -1721,6 +2004,41 @@ static int qib_do_user_init(struct file *fp,
+@@ -1731,6 +1939,41 @@ static int qib_do_user_init(struct file *fp,
         */
        dd->f_sendctrl(dd->pport, QIB_SENDCTRL_AVAIL_BLIP);
  
@@ -634,23 +571,31 @@ index 275f247..6eebad0 100644
        /*
         * Now allocate the rcvhdr Q and eager TIDs; skip the TID
         * array for time being.  If rcd->ctxt > chip-supported,
-@@ -1730,6 +2048,7 @@ static int qib_do_user_init(struct file *fp,
+@@ -1740,6 +1983,9 @@ static int qib_do_user_init(struct file *fp,
        ret = qib_create_rcvhdrq(dd, rcd);
        if (!ret)
                ret = qib_setup_eagerbufs(rcd);
++#ifdef QIB_CONFIG_KNX
 +cont_init:
++#endif
        if (ret)
                goto bail_pio;
  
-@@ -1752,7 +2071,6 @@ static int qib_do_user_init(struct file *fp,
-        */
-       if (rcd->rcvhdrtail_kvaddr)
-               qib_clear_rcvhdrtail(rcd);
--
-       dd->f_rcvctrl(rcd->ppd, QIB_RCVCTRL_CTXT_ENB | QIB_RCVCTRL_TIDFLOW_ENB,
-                     rcd->ctxt);
+@@ -1837,6 +2083,13 @@ static int qib_close(struct inode *in, struct file *fp)
  
-@@ -1884,6 +2202,12 @@ static int qib_close(struct inode *in, struct file *fp)
+       /* drain user sdma queue */
+       if (fd->pq) {
++#ifdef QIB_CONFIG_KNX
++              /*
++               * The thread should be stopped first before attempting
++               * to clean the queue.
++               */
++              qib_knx_sdma_queue_destroy(fd);
++#endif
+               qib_user_sdma_queue_drain(rcd->ppd, fd->pq);
+               qib_user_sdma_queue_destroy(fd->pq);
+       }
+@@ -1894,6 +2147,12 @@ static int qib_close(struct inode *in, struct file *fp)
        }
  
        mutex_unlock(&qib_mutex);
@@ -663,7 +608,7 @@ index 275f247..6eebad0 100644
        qib_free_ctxtdata(dd, rcd); /* after releasing the mutex */
  
  bail:
-@@ -2169,15 +2493,22 @@ static ssize_t qib_write(struct file *fp, const char __user *data,
+@@ -2179,6 +2438,13 @@ static ssize_t qib_write(struct file *fp, const char __user *data,
                ret = qib_assign_ctxt(fp, &cmd.cmd.user_info);
                if (ret)
                        goto bail;
@@ -677,22 +622,8 @@ index 275f247..6eebad0 100644
                break;
  
        case QIB_CMD_USER_INIT:
-               ret = qib_do_user_init(fp, &cmd.cmd.user_info);
--              if (ret)
--                      goto bail;
--              ret = qib_get_base_info(fp, (void __user *) (unsigned long)
--                                      cmd.cmd.user_info.spu_base_info,
--                                      cmd.cmd.user_info.spu_base_info_size);
-+              if (!ret)
-+                      ret = qib_get_base_info(
-+                              fp, (void __user *) (unsigned long)
-+                              cmd.cmd.user_info.spu_base_info,
-+                              cmd.cmd.user_info.spu_base_info_size);
-               break;
-       case QIB_CMD_RECV_CTRL:
 diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
-index 24e802f..84b3222 100644
+index 17e0831..f8992f7 100644
 --- a/drivers/infiniband/hw/qib/qib_init.c
 +++ b/drivers/infiniband/hw/qib/qib_init.c
 @@ -51,6 +51,10 @@
@@ -706,72 +637,7 @@ index 24e802f..84b3222 100644
  #undef pr_fmt
  #define pr_fmt(fmt) QIB_DRV_NAME ": " fmt
  
-@@ -64,6 +68,14 @@
- #define QLOGIC_IB_R_EMULATOR_MASK (1ULL<<62)
- /*
-+ * Select the NUMA node id on which to allocate the receive header
-+ * queue, eager buffers and send pioavail register.
-+ */
-+int qib_numa_node;
-+module_param_named(numa_node, qib_numa_node, int, S_IRUGO);
-+MODULE_PARM_DESC(numa_node, "NUMA node on which memory is allocated");
-+
-+/*
-  * Number of ctxts we are configured to use (to allow for more pio
-  * buffers per ctxt, etc.)  Zero means use chip value.
-  */
-@@ -71,11 +83,6 @@ ushort qib_cfgctxts;
- module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO);
- MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use");
--unsigned qib_numa_aware;
--module_param_named(numa_aware, qib_numa_aware, uint, S_IRUGO);
--MODULE_PARM_DESC(numa_aware,
--      "0 -> PSM allocation close to HCA, 1 -> PSM allocation local to process");
--
- /*
-  * If set, do not write to any regs if avoidable, hack to allow
-  * check for deranged default register values.
-@@ -84,6 +91,12 @@ ushort qib_mini_init;
- module_param_named(mini_init, qib_mini_init, ushort, S_IRUGO);
- MODULE_PARM_DESC(mini_init, "If set, do minimal diag init");
-+unsigned qib_numa_aware = QIB_DRIVER_AUTO_CONFIGURATION;
-+module_param_named(numa_aware, qib_numa_aware, uint, S_IRUGO);
-+MODULE_PARM_DESC(numa_aware, "Use NUMA aware allocations: "
-+      "0=disabled, 1=enabled, "
-+      "10=option 0 for AMD & <= Intel Westmere cpus and option 1 for newer cpus(default)");
-+
- unsigned qib_n_krcv_queues;
- module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO);
- MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port");
-@@ -1095,6 +1108,24 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
-       unsigned long flags;
-       struct qib_devdata *dd;
-       int ret;
-+      int node_id;
-+      int local_node_id = pcibus_to_node(dd->pcidev->bus);
-+      s64 new_node_id = qib_numa_node;
-+
-+      if (local_node_id < 0)
-+              local_node_id = numa_node_id();
-+
-+      if (new_node_id < 0)
-+              new_node_id = local_node_id;
-+
-+      new_node_id = node_online(new_node_id) ? new_node_id :
-+              local_node_id;
-+
-+      dd->local_node_id = local_node_id;
-+      dd->assigned_node_id = new_node_id;
-+
-+      node_id = qib_numa_aware ? dd->local_node_id :
-+              dd->assigned_node_id;
-       dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra);
-       if (!dd) {
-@@ -1263,6 +1294,13 @@ static int __init qlogic_ib_init(void)
+@@ -1270,6 +1274,12 @@ static int __init qlogic_ib_init(void)
        /* not fatal if it doesn't work */
        if (qib_init_qibfs())
                pr_err("Unable to register ipathfs\n");
@@ -779,47 +645,39 @@ index 24e802f..84b3222 100644
 +#ifdef QIB_CONFIG_KNX
 +      ret = qib_knx_server_init();
 +      if (ret < 0)
-+              pr_err("Unable to start KNX listen thread\n");
++              pr_err("Unable to start KNX listen thread\n");
 +#endif
-+
        goto bail; /* all OK */
  
  bail_dev:
-@@ -1287,6 +1325,10 @@ static void __exit qlogic_ib_cleanup(void)
+@@ -1294,6 +1304,9 @@ static void __exit qlogic_ib_cleanup(void)
  {
        int ret;
  
 +#ifdef QIB_CONFIG_KNX
 +      qib_knx_server_exit();
 +#endif
-+
        ret = qib_exit_qibfs();
        if (ret)
                pr_err(
-@@ -1754,6 +1796,15 @@ int init_chip_wc_pat(struct qib_devdata *dd, u32 vl15buflen)
-       iounmap(dd->kregbase);
-       dd->kregbase = NULL;
+@@ -1546,6 +1559,9 @@ static void __devexit qib_remove_one(struct pci_dev *pdev)
+       /* unregister from IB core */
+       qib_unregister_ib_device(dd);
  
-+      if (qib_numa_aware == QIB_DRIVER_AUTO_CONFIGURATION)
-+              qib_numa_aware = qib_configure_numa(boot_cpu_data) ? 1 : 0;
-+
-+      if (qib_rcvhdrpoll == QIB_DRIVER_AUTO_CONFIGURATION)
-+              qib_rcvhdrpoll = qib_configure_numa(boot_cpu_data) ? 0 : 1;
-+
-+      if (qib_pio_avail_bits == QIB_DRIVER_AUTO_CONFIGURATION)
-+              qib_pio_avail_bits = qib_configure_numa(boot_cpu_data) ? 0 : 1;
-+
++#ifdef QIB_CONFIG_KNX
++      qib_knx_remove_device(dd);
++#endif
        /*
-        * Assumes chip address space looks like:
-        *      - kregs + sregs + cregs + uregs (in any order)
+        * Disable the IB link, disable interrupts on the device,
+        * clear dma engines, etc.
 diff --git a/drivers/infiniband/hw/qib/qib_knx.c b/drivers/infiniband/hw/qib/qib_knx.c
 new file mode 100644
-index 0000000..c15276f
+index 0000000..5a9bdaa
 --- /dev/null
 +++ b/drivers/infiniband/hw/qib/qib_knx.c
-@@ -0,0 +1,923 @@
+@@ -0,0 +1,1532 @@
 +/*
-+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
++ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
 + *
 + * This software is available to you under a choice of one of two
 + * licenses.  You may choose to be licensed under the terms of the GNU
@@ -849,6 +707,7 @@ index 0000000..c15276f
 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 + * SOFTWARE.
 + */
++#include <linux/module.h>
 +#include <linux/kthread.h>
 +#include <linux/kernel.h>
 +#include <linux/dma-mapping.h>
@@ -857,12 +716,21 @@ index 0000000..c15276f
 +
 +#include "qib.h"
 +#include "qib_knx.h"
++#include "qib_user_sdma.h"
++#include "qib_knx_common.h"
 +
 +unsigned int qib_knx_nconns = 5;
 +module_param_named(num_conns, qib_knx_nconns, uint, S_IRUGO);
 +MODULE_PARM_DESC(num_conns, "Max number of pending connections");
 +
 +#define QIB_KNX_SCIF_PORT SCIF_OFED_PORT_9
++#define CLIENT_THREAD_NAME(x) "qib/mic" __stringify(x)
++
++#define knx_sdma_next(sdma) \
++      (sdma->head = ((sdma->head + 1) % sdma->desc_num))
++#define per_ctxt(ctxt, sub) ((ctxt * QLOGIC_IB_MAX_SUBCTXT) + sub)
++#define QIB_KNX_SDMA_STATUS(sdma, st) \
++      QIB_KNX_SDMA_SET(sdma->mflags->status, ((u64)st << 32) | 1)
 +
 +struct qib_knx_server {
 +      struct task_struct *kthread;
@@ -902,7 +770,16 @@ index 0000000..c15276f
 +      struct scif_range *pages;
 +};
 +
++struct qib_knx_tidrcv {
++      struct qib_knx_rma tidmem;
++      u64 tidbase;
++      u32 tidcnt;
++};
++
 +struct qib_knx_ctxt {
++      u16 ctxt;
++      struct qib_knx *knx;
++      struct qib_pportdata *ppd;
 +      /* local registered memory for PIO buffers */
 +      struct qib_knx_rma piobufs[QLOGIC_IB_MAX_SUBCTXT];
 +      /* local registered memory for user registers */
@@ -924,6 +801,23 @@ index 0000000..c15276f
 +      __u64 status;
 +      __u64 piobufbase[QLOGIC_IB_MAX_SUBCTXT];
 +      __u32 runtime_flags;
++
++      struct qib_user_sdma_queue *pq[QLOGIC_IB_MAX_SUBCTXT];
++};
++
++struct qib_knx_sdma {
++      /* KNX flags page */
++      struct scif_range *mflag_pages;
++      struct qib_knx_sdma_mflags *mflags;
++      /* KNX descriptor queue */
++      struct scif_range *queue_pages;
++      struct qib_knx_sdma_desc *queue;
++      u32 desc_num;
++      /* host flags (in host memory) */
++      struct qib_knx_rma hflags_mem;
++      struct qib_knx_sdma_hflags *hflags;
++      u32 head;                           /* shadow */
++      u32 complete;
 +};
 +
 +struct qib_knx {
@@ -934,10 +828,16 @@ index 0000000..c15276f
 +      int numa_node;
 +      struct qib_devdata *dd;
 +      struct qib_knx_ctxt **ctxts;
++      spinlock_t ctxt_lock;
++      resource_size_t bar;
++      u64 barlen;
++      struct qib_knx_sdma *sdma;
++      struct task_struct *sdma_poll;
++      atomic_t tref;
++      char tname[64];
++      struct qib_knx_rma tidmem;
 +};
 +
-+#define CLIENT_THREAD_NAME(x) "qib/mic" __stringify(x)
-+
 +static struct qib_knx_server *server;
 +
 +static int qib_knx_init(struct qib_knx_server *);
@@ -947,19 +847,20 @@ index 0000000..c15276f
 +                                   void *, size_t, int, const char *);
 +static int qib_knx_unregister_memory(struct qib_knx *, struct qib_knx_rma *,
 +                                   const char *);
++static __always_inline void qib_knx_memcpy(void *, void __iomem *, size_t);
 +static ssize_t qib_show_knx_node(struct device *, struct device_attribute *,
 +                               char *);
-+
-+static DEVICE_ATTR(knx_node, S_IRUGO, qib_show_knx_node, NULL);
-+static ssize_t qib_show_knx_node(struct device *dev,
-+                               struct device_attribute *attr, char *buf)
-+{
-+      struct qib_ibdev *ibdev =
-+              container_of(dev, struct qib_ibdev, ibdev.dev);
-+      struct qib_devdata *dd = dd_from_dev(ibdev);
-+
-+      return scnprintf(buf, PAGE_SIZE, "%u\n", dd->knx->peer.node);
-+}
++static int qib_knx_sdma_init(struct qib_knx *);
++static void qib_knx_sdma_teardown(struct qib_knx *);
++static __always_inline struct page *
++qib_knx_phys_to_page(struct qib_knx *, unsigned long);
++static int qib_knx_sdma_pkts_to_descs(struct qib_knx_ctxt *,
++                                    struct qib_knx_sdma_desc *,
++                                    struct qib_user_sdma_queue *,
++                                    int *, struct list_head *);
++static int qib_knx_sdma_poll(void *);
++static int qib_knx_tidrcv_init(struct qib_knx *);
++static int qib_knx_tidrcv_teardown(struct qib_knx *);
 +
 +inline struct qib_knx *qib_knx_get(u16 nodeid)
 +{
@@ -982,10 +883,11 @@ index 0000000..c15276f
 +
 +static int qib_knx_init(struct qib_knx_server *server)
 +{
-+      int ret = 0, num_devs = 0, i;
-+      struct qib_devdata *dd;
++      int ret = 0, num_devs = 0, i, seen = 0;
++      unsigned fewest = -1U;
++      struct qib_devdata *dd = NULL, *dd_no_numa = NULL;
 +      struct qib_knx *knx;
-+      struct ib_device *ibdev;
++      struct qib_device_info info = { -1 };
 +
 +      knx = kzalloc(sizeof(*knx), GFP_KERNEL);
 +      if (!knx) {
@@ -999,10 +901,14 @@ index 0000000..c15276f
 +      }
 +
 +      INIT_LIST_HEAD(&knx->list);
++      spin_lock_init(&knx->ctxt_lock);
 +      knx->numa_node = -1;
 +      ret = scif_pci_info(knx->peer.node, &knx->pci_info);
-+      if (!ret)
++      if (!ret) {
 +              knx->numa_node = pcibus_to_node(knx->pci_info.pdev->bus);
++              knx->bar = pci_resource_start(knx->pci_info.pdev, 0);
++              knx->barlen = pci_resource_len(knx->pci_info.pdev, 0);
++      }
 +
 +      if (knx->numa_node < 0)
 +              knx->numa_node = numa_node_id();
@@ -1010,40 +916,58 @@ index 0000000..c15276f
 +      num_devs = qib_count_units(NULL, NULL);
 +      if (unlikely(!num_devs)) {
 +              ret = -ENODEV;
++              /* we have to send this */
++              scif_send(knx->epd.epd, &info, sizeof(info),
++                        SCIF_SEND_BLOCK);
 +              goto done;
 +      }
 +
-+      for (i = 0; i < num_devs; i++) {
++      /*
++       * Attempt to find an HCA on the same NUMA node as the card. Save
++       * the first HCA that hasn't been associated with a card in case
++       * there is no HCA on the same NUMA node.
++       */
++      for (i = 0; seen < num_devs; i++) {
 +              dd = qib_lookup(i);
-+              if (dd && dd->local_node_id == knx->numa_node)
-+                      knx->dd = dd;
++              if (dd) {
++                      if (dd->assigned_node_id == knx->numa_node) {
++                              knx->dd = dd;
++                              break;
++                      } else if (dd->num_knx < fewest)
++                              dd_no_numa = dd;
++                      seen++;
++              }
 +      }
 +      /*
 +       * We didn't find a QIB device on the same NUMA node,
-+       * round-robin across all devices.
++       * use the "backup".
 +       */
 +      if (unlikely(!knx->dd)) {
-+              knx->dd = qib_lookup(server->nclients % num_devs);
-+              /* it is possible for qib_lookup to return NULL */
-+              if (unlikely(!knx->dd)) {
++              if (!dd_no_numa) {
 +                      ret = -ENODEV;
++                      /* we have to send this */
++                      scif_send(knx->epd.epd, &info, sizeof(info),
++                                SCIF_SEND_BLOCK);
 +                      goto done;
 +              }
++              knx->dd = dd_no_numa;
 +      }
-+      knx->dd->node_id = knx->peer.node;
-+      knx->dd->knx = knx;
++      knx->dd->num_knx++;
++
 +      knx->ctxts = kzalloc_node(knx->dd->ctxtcnt * sizeof(*knx->ctxts),
 +                                GFP_KERNEL, knx->numa_node);
 +      if (!knx->ctxts)
 +              ret = -ENOMEM;
-+      ibdev = &knx->dd->verbs_dev.ibdev;
-+      ret = device_create_file(&ibdev->dev, &dev_attr_knx_node);
++      /* Give the KNX the associated device information. */
++      info.unit = knx->dd->unit;
++      ret = scif_send(knx->epd.epd, &info, sizeof(info),
++                      SCIF_SEND_BLOCK);
++
++      ret = qib_knx_sdma_init(knx);
 +      if (ret)
-+              /*
-+               * clear the error code since we don't want to fail the
-+               * initialization.
-+               */
-+              ret = 0;
++              goto done;
++      atomic_set(&knx->tref, 0);
++      ret = qib_knx_tidrcv_init(knx);
 +done:
 +      spin_lock(&server->client_lock);
 +      list_add_tail(&knx->list, &server->clients);
@@ -1057,13 +981,12 @@ index 0000000..c15276f
 +static void qib_knx_free(struct qib_knx *knx, int unload)
 +{
 +      struct qib_devdata *dd = knx->dd;
-+      struct ib_device *ibdev;
 +      int i;
 +
-+      if (dd) {
-+              ibdev = &dd->verbs_dev.ibdev;
-+              device_remove_file(&ibdev->dev, &dev_attr_knx_node);
-+      }
++      qib_knx_tidrcv_teardown(knx);
++      qib_knx_sdma_teardown(knx);
++      if (dd)
++              dd->num_knx--;
 +      /*
 +       * If this function is called with unload set, we can
 +       * free the context data. Otherwise, we are here
@@ -1180,9 +1103,16 @@ index 0000000..c15276f
 +      return ret;
 +}
 +
-+int qib_knx_alloc_ctxt(struct qib_devdata *dd, unsigned ctxt)
++static __always_inline void qib_knx_memcpy(void *dst, void __iomem *src,
++                                         size_t size)
 +{
-+      struct qib_knx *knx = dd_to_knx(dd);
++      memcpy_fromio(dst, src, size);
++}
++
++int qib_knx_alloc_ctxt(u16 node_id, unsigned ctxt)
++{
++      struct qib_knx *knx = qib_knx_get(node_id);
++      struct qib_devdata *dd = knx->dd;
 +      struct qib_knx_ctxt *ptr;
 +      int ret = 0;
 +
@@ -1199,7 +1129,14 @@ index 0000000..c15276f
 +              ret = -ENOMEM;
 +              goto bail;
 +      }
++      ptr->knx = knx;
++      ptr->ctxt = ctxt;
++      ptr->ppd = dd->rcd[ctxt]->ppd;
++
++      spin_lock(&knx->ctxt_lock);
 +      knx->ctxts[ctxt] = ptr;
++      dd->rcd[ctxt]->krcd = ptr;
++      spin_unlock(&knx->ctxt_lock);
 +bail:
 +      return ret;
 +}
@@ -1208,10 +1145,11 @@ index 0000000..c15276f
 +                      enum qib_knx_ctxtinfo_type type,
 +                      struct file *fp)
 +{
-+      struct qib_knx *knx = dd_to_knx(rcd->dd);
++      struct qib_knx *knx = rcd->krcd->knx;
 +      __u16 subctxt;
 +      __u64 ret = 0;
 +
++      spin_lock(&knx->ctxt_lock);
 +      if (!knx || !knx->ctxts || !knx->ctxts[rcd->ctxt])
 +              goto done;
 +
@@ -1234,6 +1172,7 @@ index 0000000..c15276f
 +              break;
 +      }
 +done:
++      spin_unlock(&knx->ctxt_lock);
 +      return ret;
 +}
 +
@@ -1244,7 +1183,7 @@ index 0000000..c15276f
 +      char buf[16];
 +      off_t offset;
 +      int ret = 0;
-+      struct qib_knx *knx = dd_to_knx(dd);
++      struct qib_knx *knx = rcd->krcd->knx;
 +
 +      if (unlikely(!knx)) {
 +              ret = -ENODEV;
@@ -1292,7 +1231,7 @@ index 0000000..c15276f
 +{
 +      int ret = 0;
 +      off_t offset;
-+      struct qib_knx *knx = dd_to_knx(dd);
++      struct qib_knx *knx = rcd->krcd->knx;
 +
 +      if (unlikely(!knx)) {
 +              ret = -ENODEV;
@@ -1353,7 +1292,7 @@ index 0000000..c15276f
 +{
 +      struct qib_knx_mem_map_sg *mapsg;
 +      struct qib_knx_mem_map *map;
-+      struct qib_knx *knx = dd_to_knx(dd);
++      struct qib_knx *knx = rcd->krcd->knx;
 +      dma_addr_t offset;
 +      struct scatterlist *sg;
 +      unsigned num_pages;
@@ -1410,7 +1349,8 @@ index 0000000..c15276f
 +       * can use 64bit addresses for DMA but the CPU might not.
 +       * (see pci_set_dma_mask() in qib_pcie.c).
 +       */
-+      mapsg->sglist = kzalloc(num_pages * sizeof(*mapsg->sglist), GFP_KERNEL);
++      mapsg->sglist = kzalloc_node(num_pages * sizeof(*mapsg->sglist),
++                                   GFP_KERNEL, knx->numa_node);
 +      if (!mapsg->sglist) {
 +              ret = -ENOMEM;
 +              goto bail_rcvq_pages;
@@ -1426,7 +1366,7 @@ index 0000000..c15276f
 +      }
 +      /*
 +       * pci_map_sg() will remap all 128 pages of the
-+       * scatterlist seperately (without coalescing them).
++       * scatterlist separately (without coalescing them).
 +       * However, since the buffer is contiguous, as long
 +       * as the base address is mapped correctly, everything
 +       * should work. In any case, check that the mapped
@@ -1520,7 +1460,7 @@ index 0000000..c15276f
 +      struct qib_knx_mem_map_sg *map;
 +      struct scatterlist *sg;
 +      struct qib_devdata *dd = rcd->dd;
-+      struct qib_knx *knx = dd_to_knx(dd);
++      struct qib_knx *knx = rcd->krcd->knx;
 +      unsigned size, egrsize, egrcnt, num_pages, bufs_ppage,
 +              egrbufcnt;
 +      dma_addr_t dma_addr, page;
@@ -1598,7 +1538,8 @@ index 0000000..c15276f
 +
 +      map->size = size;
 +      map->dir = DMA_BIDIRECTIONAL;
-+      map->sglist = kzalloc(num_pages * sizeof(*map->sglist), GFP_KERNEL);
++      map->sglist = kzalloc_node(num_pages * sizeof(*map->sglist), GFP_KERNEL,
++                                 knx->numa_node);
 +      if (!map->sglist) {
 +              ret = -ENOMEM;
 +              goto bail_free_rcvegr_phys;
@@ -1619,10 +1560,10 @@ index 0000000..c15276f
 +              rcd->rcvegrbuf[i] = map->pages->va[i];
 +      }
 +
-+      for (egrbufcnt = i = 0; i < num_pages ; i++) {
++      for (egrbufcnt = i = 0; i < num_pages; i++) {
 +              page = rcd->rcvegrbuf_phys[i];
 +              dma_addr = page;
-+              for (bufcnt = 0 ; egrbufcnt < egrcnt && bufcnt < bufs_ppage;
++              for (bufcnt = 0; egrbufcnt < egrcnt && bufcnt < bufs_ppage;
 +                   egrbufcnt++, bufcnt++) {
 +                      dd->f_put_tid(dd, rcd->rcvegr_tid_base +
 +                                         egrbufcnt +
@@ -1650,7 +1591,7 @@ index 0000000..c15276f
 +
 +void qib_knx_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
 +{
-+      struct qib_knx *knx = dd_to_knx(dd);
++      struct qib_knx *knx = rcd->krcd->knx;
 +      struct qib_knx_ctxt *ctxt;
 +      char buf[16];
 +      int i, ret = 0;
@@ -1658,7 +1599,11 @@ index 0000000..c15276f
 +      if (!rcd || !knx || !knx->ctxts)
 +              return;
 +
++      spin_lock(&knx->ctxt_lock);
 +      ctxt = knx->ctxts[rcd->ctxt];
++      knx->ctxts[rcd->ctxt] = NULL;
++      spin_unlock(&knx->ctxt_lock);
++
 +      if (!ctxt)
 +              return;
 +
@@ -1704,12 +1649,535 @@ index 0000000..c15276f
 +              qib_knx_unregister_memory(knx, &ctxt->piobufs[i], buf);
 +      }
 +
-+      /* MITKO XXX: handle rcd->tid_pg_list */
-+      knx->ctxts[rcd->ctxt] = NULL;
 +      kfree(ctxt);
 +      kfree(rcd);
 +}
 +
++/*
++ * TID management for processes on the MIC happens on the MIC. Therefore,
++ * we only register the HW TID array here.
++ * The MIC will calculate TID array offsets using the same algorithm is
++ * the host. Therefore, it is OK that the entire HW TID array is mapped
++ * since neither side should step on the other.
++ */
++static int qib_knx_tidrcv_init(struct qib_knx *knx)
++{
++      struct qib_devdata *dd = knx->dd;
++      struct qib_knx_tid_info info;
++      void *tidbase;
++      int ret = 0;
++      off_t offset = 0;
++      size_t len;
++      char buf[64];
++
++      memset(&info, 0, sizeof(info));
++
++      info.tidcnt = dd->rcvtidcnt;
++      tidbase = ((char *)dd->kregbase + dd->rcvtidbase);
++      info.tidbase_len = dd->ctxtcnt * dd->rcvtidcnt * sizeof(tidbase);
++      info.tidtemplate = dd->tidtemplate;
++      info.invalidtid = dd->tidinvalid;
++      /* information needed to properly calculate DMA address to MIC pages */
++      info.bar_addr = knx->bar;
++      info.bar_len = knx->barlen;
++
++      snprintf(buf, sizeof(buf), "TID array KNx%u", knx->peer.node);
++      offset = qib_knx_register_memory(knx, &knx->tidmem, tidbase,
++                                       info.tidbase_len, SCIF_PROT_WRITE,
++                                       buf);
++      info.tidbase_offset = offset;
++      if (IS_ERR_VALUE(offset))
++              ret = offset;
++      len = scif_send(knx->epd.epd, &info, sizeof(info),
++                      SCIF_SEND_BLOCK);
++      if (len < sizeof(info))
++              ret = -EFAULT;
++      return ret;
++}
++
++static int qib_knx_tidrcv_teardown(struct qib_knx *knx)
++{
++      char buf[64];
++      snprintf(buf, sizeof(buf), "TID array KNx%u", knx->peer.node);
++      return qib_knx_unregister_memory(knx, &knx->tidmem, buf);
++}
++
++static int qib_knx_sdma_init(struct qib_knx *knx)
++{
++      struct qib_knx_host_mem flags;
++      struct qib_knx_knc_mem mflags;
++      struct qib_knx_sdma *sdma;
++      char buf[64];
++      int ret = 0;
++
++      sdma = kzalloc_node(sizeof(*sdma), GFP_KERNEL, knx->numa_node);
++      if (!sdma) {
++              ret = -ENOMEM;
++              goto done;
++      }
++      sdma->hflags = kzalloc_node(PAGE_SIZE, GFP_KERNEL, knx->numa_node);
++      if (!sdma->hflags) {
++              ret = -ENOMEM;
++              goto done_free;
++      }
++      snprintf(buf, sizeof(buf), "Host SDMA flags KNx%u", knx->peer.node);
++      flags.flags_offset = qib_knx_register_memory(knx, &sdma->hflags_mem,
++                                                   sdma->hflags,
++                                                   PAGE_SIZE,
++                                                   SCIF_PROT_WRITE,
++                                                   buf);
++      if (IS_ERR_VALUE(flags.flags_offset)) {
++              ret = flags.flags_offset;
++              goto free_flags;
++      }
++      sdma->desc_num = knx->dd->pport[0].sdma_descq_cnt;
++      flags.desc_num = sdma->desc_num;
++      ret = scif_send(knx->epd.epd, &flags, sizeof(flags),
++                      SCIF_SEND_BLOCK);
++      if (ret < sizeof(flags))
++              goto unregister;
++      ret = scif_recv(knx->epd.epd, &mflags, sizeof(mflags),
++                      SCIF_RECV_BLOCK);
++      if (ret < sizeof(mflags)) {
++              ret = -EINVAL;
++              goto unregister;
++      }
++      ret = scif_get_pages(knx->epd.epd, mflags.flags_offset,
++                           PAGE_SIZE, &sdma->mflag_pages);
++      if (ret < 0 || !sdma->mflag_pages->nr_pages) {
++              ret = -EFAULT;
++              goto unregister;
++      }
++      sdma->mflags = sdma->mflag_pages->va[0];
++      ret = scif_get_pages(knx->epd.epd, mflags.queue_offset,
++                           mflags.queue_len, &sdma->queue_pages);
++      if (ret < 0)
++              goto put_flags;
++      if ((sdma->queue_pages->nr_pages * PAGE_SIZE) !=
++          mflags.queue_len) {
++              ret = -EFAULT;
++              goto put_queue;
++      }
++      sdma->queue = sdma->queue_pages->va[0];
++      sdma->complete = -1;
++      sdma->head = -1;
++      /* set the initial trigger value */
++      QIB_KNX_SDMA_SET(sdma->hflags->trigger, -1);
++      QIB_KNX_SDMA_SET(sdma->mflags->complete, sdma->complete);
++      snprintf(knx->tname, sizeof(knx->tname), "qib/mic%u/poll",
++               knx->peer.node);
++      knx->sdma = sdma;
++      ret = 0;
++      goto done;
++put_queue:
++      scif_put_pages(sdma->queue_pages);
++put_flags:
++      scif_put_pages(sdma->mflag_pages);
++unregister:
++      qib_knx_unregister_memory(knx, &sdma->hflags_mem, buf);
++free_flags:
++      kfree(sdma->hflags);
++done_free:
++      kfree(sdma);
++done:
++      /*
++       * we have to respond to the MIC so it doesn't get stuck
++       * in the scif_recv call
++       */
++      scif_send(knx->epd.epd, &ret, sizeof(ret), SCIF_SEND_BLOCK);
++      return ret;
++}
++
++static void qib_knx_sdma_teardown(struct qib_knx *knx)
++{
++      int ret;
++      if (knx->sdma_poll)
++              ret = kthread_stop(knx->sdma_poll);
++      if (knx->sdma) {
++              if (knx->sdma->queue_pages->nr_pages) {
++                      knx->sdma->queue = NULL;
++                      scif_put_pages(knx->sdma->queue_pages);
++              }
++              if (knx->sdma->mflag_pages->nr_pages) {
++                      knx->sdma->mflags = NULL;
++                      scif_put_pages(knx->sdma->mflag_pages);
++              }
++              kfree(knx->sdma->hflags);
++              kfree(knx->sdma);
++              knx->sdma = NULL;
++      }
++}
++
++int qib_knx_sdma_queue_create(struct file *fd)
++{
++      struct qib_ctxtdata *rcd = ctxt_fp(fd);
++      struct qib_devdata *dd = rcd->dd;
++      struct qib_knx *knx = rcd->krcd->knx;
++      struct qib_knx_ctxt *ctxt = knx->ctxts[rcd->ctxt];
++      u8 subctxt = subctxt_fp(fd);
++      int ret = 0;
++
++      if (!ctxt) {
++              ret = -EINVAL;
++              goto done;
++      }
++      ctxt->pq[subctxt] = qib_user_sdma_queue_create(&dd->pcidev->dev,
++                                                     dd->unit, rcd->ctxt,
++                                                     subctxt);
++      if (!ctxt->pq[subctxt])
++              ret = -ENOMEM;
++      user_sdma_queue_fp(fd) = ctxt->pq[subctxt];
++      /*
++       * We start the polling thread the first time a user SDMA
++       * queue is created. There is no reason to take up CPU
++       * cycles before then.
++       */
++      if (atomic_inc_return(&knx->tref) == 1) {
++              knx->sdma_poll = kthread_run(qib_knx_sdma_poll, knx,
++                                           knx->tname);
++              if (IS_ERR(knx->sdma_poll)) {
++                      ret = -PTR_ERR(knx->sdma_poll);
++                      atomic_dec(&knx->tref);
++                      goto free_queue;
++              }
++      }
++      goto done;
++free_queue:
++      user_sdma_queue_fp(fd) = NULL;
++      qib_user_sdma_queue_destroy(ctxt->pq[subctxt]);
++      ctxt->pq[subctxt] = NULL;
++done:
++      return ret;
++}
++
++void qib_knx_sdma_queue_destroy(struct qib_filedata *fd)
++{
++      struct qib_ctxtdata *rcd = fd->rcd;
++      struct qib_knx *knx;
++      unsigned ctxt = rcd->ctxt, subctxt = fd->subctxt;
++
++      /* Host processes do not have a KNX rcd pointer. */
++      if (!rcd->krcd)
++              return;
++      knx = rcd->krcd->knx;
++      /* We still have the memory pointer through fd->pq */
++      spin_lock(&knx->ctxt_lock);
++      if (knx->ctxts[ctxt])
++              knx->ctxts[ctxt]->pq[subctxt] = NULL;
++      spin_unlock(&knx->ctxt_lock);
++      if (atomic_dec_and_test(&knx->tref)) {
++              int ret = kthread_stop(knx->sdma_poll);
++              knx->sdma_poll = NULL;
++      }
++}
++
++/*
++ * Convert a MIC physical address to the corresponding host page.
++ */
++static __always_inline struct page *
++qib_knx_phys_to_page(struct qib_knx *knx, unsigned long addr) {
++      unsigned long paddr;
++      if ((knx->bar + addr + PAGE_SIZE) >
++          (knx->bar + knx->barlen))
++              return NULL;
++      paddr = knx->bar + addr;
++      return pfn_to_page(paddr >> PAGE_SHIFT);
++}
++
++static int qib_knx_sdma_pkts_to_descs(struct qib_knx_ctxt *ctxt,
++                                    struct qib_knx_sdma_desc *desc,
++                                    struct qib_user_sdma_queue *pq,
++                                    int *ndesc, struct list_head *list)
++{
++      struct qib_knx *knx = ctxt->knx;
++      struct qib_user_sdma_pkt *pkt;
++      dma_addr_t pbc_dma_addr;
++      unsigned pktnw, pbcnw;
++      u32 counter;
++      u16 frag_size;
++      int ret = 0;
++      __le32 *pbc;
++
++      counter = pq->counter;
++
++      pbc = qib_user_sdma_alloc_header(pq, desc->pbclen, &pbc_dma_addr);
++      if (!pbc) {
++              ret = -ENOMEM;
++              goto done;
++      }
++      memcpy(pbc, desc->pbc, desc->pbclen);
++
++      pktnw = (le32_to_cpu(*pbc) & 0xFFFF);
++      /*
++       * This assignment is a bit strange.  it's because the
++       * the pbc counts the number of 32 bit words in the full
++       * packet _except_ the first word of the pbc itself...
++       */
++      pbcnw = (desc->pbclen >> 2) - 1;
++
++      if (pktnw < pbcnw) {
++              ret = -EINVAL;
++              goto free_pbc;
++      }
++
++      if (pktnw != ((desc->length >> 2) + pbcnw)) {
++              ret = -EINVAL;
++              goto free_pbc;
++      }
++
++      frag_size = (le32_to_cpu(*pbc)>>16) & 0xFFFF;
++      if (((frag_size ? frag_size : desc->length) + desc->pbclen) >
++          ctxt->ppd->ibmaxlen) {
++              ret = -EINVAL;
++              goto free_pbc;
++      }
++      if (frag_size) {
++              /* new SDMA "protocol" */
++              unsigned pktsize, n;
++
++              n = desc->npages * ((2 * PAGE_SIZE / frag_size) + 1);
++              pktsize = sizeof(*pkt) + sizeof(pkt->addr[0]) * n;
++
++              pkt = kzalloc(pktsize + desc->tidlen, GFP_KERNEL);
++              if (!pkt) {
++                      ret = -ENOMEM;
++                      goto free_pbc;
++              }
++              pkt->largepkt = 1;
++              pkt->frag_size = frag_size;
++              pkt->addrlimit = n + ARRAY_SIZE(pkt->addr);
++
++              if (desc->tidlen) {
++                      char *tidsmptr = (char *)pkt + pktsize;
++                      memcpy(tidsmptr, desc->tidsm, desc->tidlen);
++                      pkt->tidsm =
++                              (struct qib_tid_session_member *)tidsmptr;
++                      pkt->tidsmcount = desc->tidlen /
++                              sizeof(*desc->tidsm);
++                      pkt->tidsmidx = 0;
++              }
++              *pbc = cpu_to_le32(le32_to_cpu(*pbc) & 0x0000FFFF);
++      } else {
++              /* old SDMA */
++              pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
++              if (!pkt) {
++                      ret = -ENOMEM;
++                      goto free_pbc;
++              }
++              pkt->largepkt = 0;
++              pkt->frag_size = desc->length;
++              pkt->addrlimit = ARRAY_SIZE(pkt->addr);
++      }
++      pkt->bytes_togo = desc->length;
++      pkt->payload_size = 0;
++      pkt->counter = counter;
++      pkt->tiddma = !!desc->tidlen;
++      /*
++       * The generic user SDMA code will use this as a flag to
++       * decide whether to call the KNx-specific pkt free
++       * function. However, it doesn't know what the value
++       * actually means.
++       */
++      pkt->remote = (u64)knx;
++
++      qib_user_sdma_init_frag(pkt, 0,
++                              0, desc->pbclen,
++                              1, 0,
++                              0, 0,
++                              NULL, pbc,
++                              pbc_dma_addr, desc->pbclen);
++      pkt->index = 0;
++      pkt->naddr = 1;
++
++      if (desc->npages) {
++              /* we have user data */
++              int i;
++              struct page *page;
++              unsigned plen = 0, len = desc->length;
++              for (i = 0; i < desc->npages; i++) {
++                      unsigned long off = (i == 0 ? desc->offset : 0);
++                      plen = (len > PAGE_SIZE ? PAGE_SIZE : len);
++                      page = qib_knx_phys_to_page(knx, desc->pages[i]);
++                      ret = qib_user_sdma_page_to_frags(knx->dd, pq,
++                                 pkt, page, 0, off,
++                                 (off + plen > PAGE_SIZE ?
++                                  PAGE_SIZE - off : plen),
++                                 NULL);
++                      if (ret < 0)
++                              goto free_sdma;
++                      len -= plen - off;
++              }
++      } else {
++              pkt->addr[0].last_desc = 1;
++              if (pbc_dma_addr == 0) {
++                      pbc_dma_addr = dma_map_single(&knx->dd->pcidev->dev,
++                                                    pbc, desc->pbclen,
++                                                    DMA_TO_DEVICE);
++                      if (dma_mapping_error(&knx->dd->pcidev->dev,
++                                            pbc_dma_addr)) {
++                              ret = -ENOMEM;
++                              goto free_sdma;
++                      }
++                      pkt->addr[0].addr = pbc_dma_addr;
++                      pkt->addr[0].dma_mapped = 1;
++              }
++      }
++      counter++;
++      pkt->pq = pq;
++      pkt->index = 0;
++      *ndesc = pkt->naddr;
++
++      list_add_tail(&pkt->list, list);
++      goto done;
++free_sdma:
++      if (pkt->largepkt)
++              kfree(pkt);
++      else
++              kmem_cache_free(pq->pkt_slab, pkt);
++free_pbc:
++      if (pbc_dma_addr)
++              dma_pool_free(pq->header_cache, pbc, pbc_dma_addr);
++      else
++              kfree(pbc);
++done:
++      return ret;
++}
++
++void qib_knx_sdma_free_pkt(struct qib_user_sdma_pkt *pkt)
++{
++      struct qib_knx *knx = (struct qib_knx *)pkt->remote;
++      struct qib_knx_sdma *sdma = knx->sdma;
++      sdma_next(sdma, complete);
++      QIB_KNX_SDMA_SET(sdma->mflags->complete, sdma->complete);
++}
++
++static int qib_knx_sdma_poll(void *data)
++{
++      struct qib_knx *knx = (struct qib_knx *)data;
++      struct qib_knx_ctxt *ctxt;
++      struct qib_knx_sdma_desc desc;
++      struct qib_knx_sdma *sdma = knx->sdma;
++      struct qib_user_sdma_queue *pq;
++      struct list_head list;
++      u32 new_head;
++      int ret = 0, ndesc = 0, added;
++
++      if (!sdma)
++              return -EFAULT;
++
++      while (!kthread_should_stop()) {
++              added = 0;
++              new_head = QIB_KNX_SDMA_VALUE(sdma->hflags->trigger);
++              while (sdma->head != new_head) {
++                      knx_sdma_next(sdma);
++                      qib_knx_memcpy(&desc, sdma->queue + sdma->head,
++                                     sizeof(desc));
++                      if (!desc.ctxt) {
++                              QIB_KNX_SDMA_STATUS(sdma, -EINVAL);
++                              continue;
++                      }
++                      spin_lock(&knx->ctxt_lock);
++                      ctxt = knx->ctxts[desc.ctxt];
++                      if (!ctxt) {
++                              /* we should never get here */
++                              QIB_KNX_SDMA_STATUS(sdma, -EINVAL);
++                              goto done_unlock;
++                      }
++                      pq = ctxt->pq[desc.subctxt];
++                      if (!pq) {
++                              QIB_KNX_SDMA_STATUS(sdma, -EFAULT);
++                              goto done_unlock;
++                      }
++                      mutex_lock(&pq->lock);
++                      if (pq->added > ctxt->ppd->sdma_descq_removed)
++                              qib_user_sdma_hwqueue_clean(ctxt->ppd);
++                      if (pq->num_sending)
++                              qib_user_sdma_queue_clean(ctxt->ppd, pq);
++
++                      INIT_LIST_HEAD(&list);
++                      ret = qib_knx_sdma_pkts_to_descs(ctxt, &desc, pq,
++                                                       &ndesc, &list);
++                      QIB_KNX_SDMA_STATUS(sdma, ret);
++                      if (!list_empty(&list)) {
++                              if (qib_sdma_descq_freecnt(ctxt->ppd) <
++                                  ndesc) {
++                                      qib_user_sdma_hwqueue_clean(
++                                              ctxt->ppd);
++                                      if (pq->num_sending)
++                                              qib_user_sdma_queue_clean(
++                                                      ctxt->ppd, pq);
++                              }
++                              ret = qib_user_sdma_push_pkts(ctxt->ppd,
++                                                            pq, &list, 1);
++                              if (ret < 0)
++                                      goto free_pkts;
++                              else {
++                                      pq->counter++;
++                                      added++;
++                              }
++                      }
++free_pkts:
++                      if (!list_empty(&list))
++                              qib_user_sdma_free_pkt_list(
++                                      &knx->dd->pcidev->dev, pq, &list);
++                      mutex_unlock(&pq->lock);
++done_unlock:
++                      spin_unlock(&knx->ctxt_lock);
++              }
++              if (!added) {
++                      int i;
++                      /*
++                       * Push the queues along
++                       * The polling thread will enter the inner loop only
++                       * if the KNX has posted new descriptors to the queue.
++                       * However, any packets that have been completed by
++                       * the HW need to be cleaned and that won't happen
++                       * unless we explicitly check.
++                       */
++                      for (i = 0;
++                           i < knx->dd->ctxtcnt * QLOGIC_IB_MAX_SUBCTXT;
++                           i++) {
++                              int c = i / QLOGIC_IB_MAX_SUBCTXT,
++                                      s = i % QLOGIC_IB_MAX_SUBCTXT;
++                              spin_lock(&knx->ctxt_lock);
++                              ctxt = knx->ctxts[c];
++                              if (!ctxt)
++                                      goto loop_unlock;
++                              pq = ctxt->pq[s];
++                              if (!pq)
++                                      goto loop_unlock;
++                              mutex_lock(&pq->lock);
++                              if (pq->num_sending)
++                                      qib_user_sdma_queue_clean(ctxt->ppd,
++                                                                pq);
++                              mutex_unlock(&pq->lock);
++loop_unlock:
++                              spin_unlock(&knx->ctxt_lock);
++                      }
++                      might_sleep();
++              }
++      }
++      return ret;
++}
++
++void qib_knx_remove_device(struct qib_devdata *dd)
++{
++      if (server && dd->num_knx) {
++              struct qib_knx *knx, *knxp;
++              list_for_each_entry_safe(knx, knxp, &server->clients, list) {
++                      if (knx->dd == dd) {
++                              spin_lock(&server->client_lock);
++                              list_del(&knx->list);
++                              server->nclients--;
++                              spin_unlock(&server->client_lock);
++                              qib_knx_free(knx, 0);
++                              kfree(knx);
++                      }
++              }
++      }
++      return;
++}
++
 +int __init qib_knx_server_init(void)
 +{
 +      server = kzalloc(sizeof(struct qib_knx_server), GFP_KERNEL);
@@ -1728,7 +2196,6 @@ index 0000000..c15276f
 +{
 +      if (server) {
 +              struct qib_knx *t, *tt;
-+
 +              /* Stop the thread so we don't accept any new connections. */
 +              kthread_stop(server->kthread);
 +              list_for_each_entry_safe(t, tt, &server->clients, list) {
@@ -1743,12 +2210,12 @@ index 0000000..c15276f
 +}
 diff --git a/drivers/infiniband/hw/qib/qib_knx.h b/drivers/infiniband/hw/qib/qib_knx.h
 new file mode 100644
-index 0000000..d767a60
+index 0000000..0e8d7ce
 --- /dev/null
 +++ b/drivers/infiniband/hw/qib/qib_knx.h
-@@ -0,0 +1,63 @@
+@@ -0,0 +1,74 @@
 +/*
-+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
++ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
 + *
 + * This software is available to you under a choice of one of two
 + * licenses.  You may choose to be licensed under the terms of the GNU
@@ -1791,15 +2258,15 @@ index 0000000..d767a60
 +      QIB_KNX_CTXTINFO_FLAGS
 +};
 +
++#ifdef QIB_CONFIG_KNX
 +int __init qib_knx_server_init(void);
 +void __exit qib_knx_server_exit(void);
-+static __always_inline struct qib_knx *dd_to_knx(struct qib_devdata *dd)
-+{
-+      return (struct qib_knx *)dd->knx;
-+}
++
++void qib_knx_remove_device(struct qib_devdata *);
++
 +inline struct qib_knx *qib_knx_get(uint16_t);
 +inline struct qib_devdata *qib_knx_node_to_dd(uint16_t);
-+int qib_knx_alloc_ctxt(struct qib_devdata *, unsigned);
++int qib_knx_alloc_ctxt(u16, unsigned);
 +int qib_knx_setup_piobufs(struct qib_devdata *, struct qib_ctxtdata *, __u16);
 +int qib_knx_setup_pioregs(struct qib_devdata *, struct qib_ctxtdata *,
 +                        struct qib_base_info *);
@@ -1809,13 +2276,24 @@ index 0000000..d767a60
 +void qib_knx_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *);
 +__u64 qib_knx_ctxt_info(struct qib_ctxtdata *, enum qib_knx_ctxtinfo_type,
 +                      struct file *);
++int qib_knx_sdma_queue_create(struct file *);
++void qib_knx_sdma_queue_destroy(struct qib_filedata *);
++#else
++static inline u64 qib_knx_ctxt_info(
++      struct qib_ctxtdata *rcd,
++      enum qib_knx_ctxtinfo_type type,
++      struct file *fp)
++{
++      return 0;
++}
++#endif
 +#endif /* _QIB_KNX_H */
-diff --git a/drivers/infiniband/hw/qib/qib_knx_sdma.h b/drivers/infiniband/hw/qib/qib_knx_sdma.h
+diff --git a/drivers/infiniband/hw/qib/qib_knx_common.h b/drivers/infiniband/hw/qib/qib_knx_common.h
 new file mode 100644
-index 0000000..8c67b1f
+index 0000000..53c521f
 --- /dev/null
-+++ b/drivers/infiniband/hw/qib/qib_knx_sdma.h
-@@ -0,0 +1,105 @@
++++ b/drivers/infiniband/hw/qib/qib_knx_common.h
+@@ -0,0 +1,126 @@
 +/*
 + * Copyright (c) 2013 Intel Corporation. All rights reserved.
 + *
@@ -1847,11 +2325,15 @@ index 0000000..8c67b1f
 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 + * SOFTWARE.
 + */
-+#ifndef _QIB_KNX_SDMA_H
-+#define _QIB_KNX_SDMA_H
++#ifndef _QIB_KNX_COMMON_H
++#define _QIB_KNX_COMMON_H
++
++struct qib_device_info {
++      u16 unit;
++};
 +
 +#define QIB_SDMA_MAX_NPAGES 33
-+#define QIB_KNX_SDMA_VALUE(fld) (volatile u64)fld
++#define QIB_KNX_SDMA_VALUE(fld) ((volatile u64)fld)
 +#define QIB_KNX_SDMA_SET(fld, val)            \
 +      do {                                    \
 +              fld = (u64)(val);               \
@@ -1870,9 +2352,9 @@ index 0000000..8c67b1f
 +};
 +
 +struct qib_tid_sm {
-+        __u16 tid;
-+        __u16 offset;
-+        __u16 length;
++      __u16 tid;
++      __u16 offset;
++      __u16 length;
 +};
 +
 +/*
@@ -1889,7 +2371,7 @@ index 0000000..8c67b1f
 +      u64 length;
 +      u32 npages;
 +      unsigned tidlen;
-+        off_t offset;
++      off_t offset;
 +      unsigned long pages[QIB_SDMA_MAX_NPAGES];
 +      /* This array is 198B so the compiler will pad
 +       * it by 2B to make it multiple of 8B. */
@@ -1913,6 +2395,9 @@ index 0000000..8c67b1f
 +      u64 __padding[7];
 +};
 +
++#define sdma_next(s, fld) \
++      ((s)->fld = (((s)->fld + 1) == (s)->desc_num) ? 0 : ((s)->fld + 1))
++
 +struct qib_knx_sdma_mflags {
 +      u64 status;
 +      u64 __padding1[7];
@@ -1920,61 +2405,407 @@ index 0000000..8c67b1f
 +      u64 __padding2[7];
 +};
 +
-+#endif /* _QIB_KNX_SDMA_H */
-diff --git a/drivers/infiniband/hw/qib/qib_knx_tidrcv.h b/drivers/infiniband/hw/qib/qib_knx_tidrcv.h
-new file mode 100644
-index 0000000..842fca1
---- /dev/null
-+++ b/drivers/infiniband/hw/qib/qib_knx_tidrcv.h
-@@ -0,0 +1,48 @@
-+/*
-+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
-+ *
-+ * This software is available to you under a choice of one of two
-+ * licenses.  You may choose to be licensed under the terms of the GNU
-+ * General Public License (GPL) Version 2, available from the file
-+ * COPYING in the main directory of this source tree, or the
-+ * OpenIB.org BSD license below:
-+ *
-+ *     Redistribution and use in source and binary forms, with or
-+ *     without modification, are permitted provided that the following
-+ *     conditions are met:
-+ *
-+ *      - Redistributions of source code must retain the above
-+ *        copyright notice, this list of conditions and the following
-+ *        disclaimer.
-+ *
-+ *      - Redistributions in binary form must reproduce the above
-+ *        copyright notice, this list of conditions and the following
-+ *        disclaimer in the documentation and/or other materials
-+ *        provided with the distribution.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-+ * SOFTWARE.
-+ */
-+#ifndef _QIB_KNX_TIDRCV_H
-+
 +struct qib_knx_tid_info {
 +      /* this is the entire set of 512 entries (= 4K) so
-+         * we can resgister. subctxt devision will be done
-+         * in MIC driver. */
-+        off_t tidbase_offset;
-+        size_t tidbase_len;
-+        u64 tidbase;
-+        unsigned tidcnt;
-+        u64 tidtemplate;
-+        unsigned long invalidtid;
-+        u64 bar_addr;
-+        u64 bar_len;
++       * we can resgister. subctxt devision will be done
++       * in MIC driver. */
++      off_t tidbase_offset;
++      size_t tidbase_len;
++      u64 tidbase;
++      unsigned tidcnt;
++      u64 tidtemplate;
++      unsigned long invalidtid;
++      u64 bar_addr;
++      u64 bar_len;
 +};
 +
-+#endif /* QIB_KNX_TIDRCV_H */
--- 
-1.8.3.1
-
++#endif /* _QIB_KNX_COMMON_H */
+diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
+index d2806ca..c25bd5a 100644
+--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
++++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
+@@ -63,80 +63,6 @@ struct qib_user_sdma_rb_node {
+       pid_t pid;
+ };
+-struct qib_user_sdma_pkt {
+-      struct list_head list;  /* list element */
+-
+-      u8  tiddma;             /* if this is NEW tid-sdma */
+-      u8  largepkt;           /* this is large pkt from kmalloc */
+-      u16 frag_size;          /* frag size used by PSM */
+-      u16 index;              /* last header index or push index */
+-      u16 naddr;              /* dimension of addr (1..3) ... */
+-      u16 addrlimit;          /* addr array size */
+-      u16 tidsmidx;           /* current tidsm index */
+-      u16 tidsmcount;         /* tidsm array item count */
+-      u16 payload_size;       /* payload size so far for header */
+-      u32 bytes_togo;         /* bytes for processing */
+-      u32 counter;            /* sdma pkts queued counter for this entry */
+-      struct qib_tid_session_member *tidsm;   /* tid session member array */
+-      struct qib_user_sdma_queue *pq; /* which pq this pkt belongs to */
+-      u64 added;              /* global descq number of entries */
+-
+-      struct {
+-              u16 offset;                     /* offset for kvaddr, addr */
+-              u16 length;                     /* length in page */
+-              u16 first_desc;                 /* first desc */
+-              u16 last_desc;                  /* last desc */
+-              u16 put_page;                   /* should we put_page? */
+-              u16 dma_mapped;                 /* is page dma_mapped? */
+-              u16 dma_length;                 /* for dma_unmap_page() */
+-              u16 padding;
+-              struct page *page;              /* may be NULL (coherent mem) */
+-              void *kvaddr;                   /* FIXME: only for pio hack */
+-              dma_addr_t addr;
+-      } addr[4];   /* max pages, any more and we coalesce */
+-};
+-
+-struct qib_user_sdma_queue {
+-      /*
+-       * pkts sent to dma engine are queued on this
+-       * list head.  the type of the elements of this
+-       * list are struct qib_user_sdma_pkt...
+-       */
+-      struct list_head sent;
+-
+-      /*
+-       * Because above list will be accessed by both process and
+-       * signal handler, we need a spinlock for it.
+-       */
+-      spinlock_t sent_lock ____cacheline_aligned_in_smp;
+-
+-      /* headers with expected length are allocated from here... */
+-      char header_cache_name[64];
+-      struct dma_pool *header_cache;
+-
+-      /* packets are allocated from the slab cache... */
+-      char pkt_slab_name[64];
+-      struct kmem_cache *pkt_slab;
+-
+-      /* as packets go on the queued queue, they are counted... */
+-      u32 counter;
+-      u32 sent_counter;
+-      /* pending packets, not sending yet */
+-      u32 num_pending;
+-      /* sending packets, not complete yet */
+-      u32 num_sending;
+-      /* global descq number of entry of last sending packet */
+-      u64 added;
+-
+-      /* dma page table */
+-      struct rb_root dma_pages_root;
+-
+-      struct qib_user_sdma_rb_node *sdma_rb_node;
+-
+-      /* protect everything above... */
+-      struct mutex lock;
+-};
+-
+ static struct qib_user_sdma_rb_node *
+ qib_user_sdma_rb_search(struct rb_root *root, pid_t pid)
+ {
+@@ -254,12 +180,12 @@ done:
+       return pq;
+ }
+-static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
+-                                  int i, u16 offset, u16 len,
+-                                  u16 first_desc, u16 last_desc,
+-                                  u16 put_page, u16 dma_mapped,
+-                                  struct page *page, void *kvaddr,
+-                                  dma_addr_t dma_addr, u16 dma_length)
++void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
++                           int i, u16 offset, u16 len,
++                           u16 first_desc, u16 last_desc,
++                           u16 put_page, u16 dma_mapped,
++                           struct page *page, void *kvaddr,
++                           dma_addr_t dma_addr, u16 dma_length)
+ {
+       pkt->addr[i].offset = offset;
+       pkt->addr[i].length = len;
+@@ -273,7 +199,7 @@ static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
+       pkt->addr[i].dma_length = dma_length;
+ }
+-static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq,
++void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq,
+                               size_t len, dma_addr_t *dma_addr)
+ {
+       void *hdr;
+@@ -295,11 +221,11 @@ static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq,
+       return hdr;
+ }
+-static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
+-                                     struct qib_user_sdma_queue *pq,
+-                                     struct qib_user_sdma_pkt *pkt,
+-                                     struct page *page, u16 put,
+-                                     u16 offset, u16 len, void *kvaddr)
++int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
++                              struct qib_user_sdma_queue *pq,
++                              struct qib_user_sdma_pkt *pkt,
++                              struct page *page, u16 put,
++                              u16 offset, u16 len, void *kvaddr)
+ {
+       __le16 *pbc16;
+       void *pbcvaddr;
+@@ -314,21 +240,27 @@ static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
+       int ret = 0;
+       if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+-              /*
+-               * dma mapping error, pkt has not managed
+-               * this page yet, return the page here so
+-               * the caller can ignore this page.
+-               */
+-              if (put) {
+-                      put_page(page);
+-              } else {
+-                      /* coalesce case */
+-                      kunmap(page);
+-                      __free_page(page);
++#ifdef QIB_CONFIG_KNX
++              if (!pkt->remote) {
++#endif
++                      /*
++                       * dma mapping error, pkt has not managed
++                       * this page yet, return the page here so
++                       * the caller can ignore this page.
++                       */
++                      if (put) {
++                              put_page(page);
++                      } else {
++                              /* coalesce case */
++                              kunmap(page);
++                              __free_page(page);
++                      }
++                      ret = -ENOMEM;
++                      goto done;
+               }
+-              ret = -ENOMEM;
+-              goto done;
++#ifdef QIB_CONFIG_KNX
+       }
++#endif
+       offset = 0;
+       dma_mapped = 1;
+@@ -630,13 +562,19 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev,
+                                      pkt->addr[i].dma_length,
+                                      DMA_TO_DEVICE);
+-              if (pkt->addr[i].kvaddr)
+-                      kunmap(pkt->addr[i].page);
++#ifdef QIB_CONFIG_KNX
++              if (!pkt->remote) {
++#endif
++                      if (pkt->addr[i].kvaddr)
++                              kunmap(pkt->addr[i].page);
+-              if (pkt->addr[i].put_page)
+-                      put_page(pkt->addr[i].page);
+-              else
+-                      __free_page(pkt->addr[i].page);
++                      if (pkt->addr[i].put_page)
++                              put_page(pkt->addr[i].page);
++                      else
++                              __free_page(pkt->addr[i].page);
++#ifdef QIB_CONFIG_KNX
++              }
++#endif
+       } else if (pkt->addr[i].kvaddr) {
+               /* for headers */
+               if (pkt->addr[i].dma_mapped) {
+@@ -775,9 +713,9 @@ static int qib_user_sdma_init_payload(const struct qib_devdata *dd,
+ }
+ /* free a packet list -- return counter value of last packet */
+-static void qib_user_sdma_free_pkt_list(struct device *dev,
+-                                      struct qib_user_sdma_queue *pq,
+-                                      struct list_head *list)
++void qib_user_sdma_free_pkt_list(struct device *dev,
++                               struct qib_user_sdma_queue *pq,
++                               struct list_head *list)
+ {
+       struct qib_user_sdma_pkt *pkt, *pkt_next;
+@@ -787,6 +725,10 @@ static void qib_user_sdma_free_pkt_list(struct device *dev,
+               for (i = 0; i < pkt->naddr; i++)
+                       qib_user_sdma_free_pkt_frag(dev, pq, pkt, i);
++#ifdef QIB_CONFIG_KNX
++              if (pkt->remote)
++                      qib_knx_sdma_free_pkt(pkt);
++#endif
+               if (pkt->largepkt)
+                       kfree(pkt);
+               else
+@@ -970,6 +912,9 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
+               pkt->payload_size = 0;
+               pkt->counter = counter;
+               pkt->tiddma = tiddma;
++#ifdef QIB_CONFIG_KNX
++              pkt->remote = 0;
++#endif
+               /* setup the first header */
+               qib_user_sdma_init_frag(pkt, 0, /* index */
+@@ -1045,8 +990,8 @@ static void qib_user_sdma_set_complete_counter(struct qib_user_sdma_queue *pq,
+ }
+ /* try to clean out queue -- needs pq->lock */
+-static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd,
+-                                   struct qib_user_sdma_queue *pq)
++int qib_user_sdma_queue_clean(struct qib_pportdata *ppd,
++                            struct qib_user_sdma_queue *pq)
+ {
+       struct qib_devdata *dd = ppd->dd;
+       struct list_head free_list;
+@@ -1110,7 +1055,7 @@ void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq)
+ }
+ /* clean descriptor queue, returns > 0 if some elements cleaned */
+-static int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd)
++int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd)
+ {
+       int ret;
+       unsigned long flags;
+@@ -1321,9 +1266,9 @@ retry:
+ }
+ /* pq->lock must be held, get packets on the wire... */
+-static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
+-                               struct qib_user_sdma_queue *pq,
+-                               struct list_head *pktlist, int count)
++int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
++                          struct qib_user_sdma_queue *pq,
++                          struct list_head *pktlist, int count)
+ {
+       unsigned long flags;
+diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.h b/drivers/infiniband/hw/qib/qib_user_sdma.h
+index ce8cbaf..07d5bc5 100644
+--- a/drivers/infiniband/hw/qib/qib_user_sdma.h
++++ b/drivers/infiniband/hw/qib/qib_user_sdma.h
+@@ -31,12 +31,108 @@
+  */
+ #include <linux/device.h>
+-struct qib_user_sdma_queue;
++struct qib_user_sdma_pkt {
++      struct list_head list;  /* list element */
++
++      u8  tiddma;             /* if this is NEW tid-sdma */
++      u8  largepkt;           /* this is large pkt from kmalloc */
++      u16 frag_size;          /* frag size used by PSM */
++      u16 index;              /* last header index or push index */
++      u16 naddr;              /* dimension of addr (1..3) ... */
++      u16 addrlimit;          /* addr array size */
++      u16 tidsmidx;           /* current tidsm index */
++      u16 tidsmcount;         /* tidsm array item count */
++      u16 payload_size;       /* payload size so far for header */
++      u32 bytes_togo;         /* bytes for processing */
++      u32 counter;            /* sdma pkts queued counter for this entry */
++      struct qib_tid_session_member *tidsm;   /* tid session member array */
++      struct qib_user_sdma_queue *pq; /* which pq this pkt belongs to */
++      u64 added;              /* global descq number of entries */
++#ifdef QIB_CONFIG_KNX
++      u64 remote;             /* does the packet originate on the host */
++#endif
++
++      struct {
++              u16 offset;                     /* offset for kvaddr, addr */
++              u16 length;                     /* length in page */
++              u16 first_desc;                 /* first desc */
++              u16 last_desc;                  /* last desc */
++              u16 put_page;                   /* should we put_page? */
++              u16 dma_mapped;                 /* is page dma_mapped? */
++              u16 dma_length;                 /* for dma_unmap_page() */
++              u16 padding;
++              struct page *page;              /* may be NULL (coherent mem) */
++              void *kvaddr;                   /* FIXME: only for pio hack */
++              dma_addr_t addr;
++      } addr[4];   /* max pages, any more and we coalesce */
++};
++
++struct qib_user_sdma_queue {
++      /*
++       * pkts sent to dma engine are queued on this
++       * list head.  the type of the elements of this
++       * list are struct qib_user_sdma_pkt...
++       */
++      struct list_head sent;
++
++      /*
++       * Because above list will be accessed by both process and
++       * signal handler, we need a spinlock for it.
++       */
++      spinlock_t sent_lock ____cacheline_aligned_in_smp;
++
++      /* headers with expected length are allocated from here... */
++      char header_cache_name[64];
++      struct dma_pool *header_cache;
++
++      /* packets are allocated from the slab cache... */
++      char pkt_slab_name[64];
++      struct kmem_cache *pkt_slab;
++
++      /* as packets go on the queued queue, they are counted... */
++      u32 counter;
++      u32 sent_counter;
++      /* pending packets, not sending yet */
++      u32 num_pending;
++      /* sending packets, not complete yet */
++      u32 num_sending;
++      /* global descq number of entry of last sending packet */
++      u64 added;
++
++      /* dma page table */
++      struct rb_root dma_pages_root;
++
++      struct qib_user_sdma_rb_node *sdma_rb_node;
++
++      /* protect everything above... */
++      struct mutex lock;
++};
+ struct qib_user_sdma_queue *
+ qib_user_sdma_queue_create(struct device *dev, int unit, int port, int sport);
+ void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq);
+-
++void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq,
++                               size_t len, dma_addr_t *dma_addr);
++void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
++                           int i, u16 offset, u16 len,
++                           u16 first_desc, u16 last_desc,
++                           u16 put_page, u16 dma_mapped,
++                           struct page *page, void *kvaddr,
++                           dma_addr_t dma_addr, u16 dma_length);
++int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
++                              struct qib_user_sdma_queue *pq,
++                              struct qib_user_sdma_pkt *pkt,
++                              struct page *page, u16 put,
++                              u16 offset, u16 len, void *kvaddr);
++int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd);
++int qib_user_sdma_queue_clean(struct qib_pportdata *ppd,
++                            struct qib_user_sdma_queue *pq);
++void qib_user_sdma_free_pkt_list(struct device *dev,
++                               struct qib_user_sdma_queue *pq,
++                               struct list_head *list);
++int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
++                          struct qib_user_sdma_queue *pq,
++                          struct list_head *pktlist, int count);
+ int qib_user_sdma_writev(struct qib_ctxtdata *pd,
+                        struct qib_user_sdma_queue *pq,
+                        const struct iovec *iov,
+@@ -50,3 +146,9 @@ void qib_user_sdma_queue_drain(struct qib_pportdata *ppd,
+ u32 qib_user_sdma_complete_counter(const struct qib_user_sdma_queue *pq);
+ u32 qib_user_sdma_inflight_counter(struct qib_user_sdma_queue *pq);
++
++/*
++ * This function prototype somewhat polutes this header file
++ * but I don't want to create a new header file just for it.
++ */
++void qib_knx_sdma_free_pkt(struct qib_user_sdma_pkt *pkt);
diff --git a/tech-preview/xeon-phi/0013-Updates-to-qib-driver.patch b/tech-preview/xeon-phi/0013-Updates-to-qib-driver.patch
deleted file mode 100644 (file)
index abc3a3a..0000000
+++ /dev/null
@@ -1,4787 +0,0 @@
-From 6975d8e44fc3f04c14cf4f83e2df6f69a25546dc Mon Sep 17 00:00:00 2001
-From: Jubin John <jubin.john@intel.com>
-Date: Fri, 26 Sep 2014 09:41:32 -0700
-Subject: [PATCH] Updates to qib driver
-
----
- drivers/infiniband/hw/qib/Makefile         |    2 +-
- drivers/infiniband/hw/qib/qib.h            |  172 +++++-
- drivers/infiniband/hw/qib/qib_driver.c     |  223 +++++++-
- drivers/infiniband/hw/qib/qib_file_ops.c   |  166 ++++--
- drivers/infiniband/hw/qib/qib_iba6120.c    |   12 +-
- drivers/infiniband/hw/qib/qib_iba7220.c    |   20 +-
- drivers/infiniband/hw/qib/qib_iba7322.c    |  122 ++--
- drivers/infiniband/hw/qib/qib_init.c       |  118 +++--
- drivers/infiniband/hw/qib/qib_knx.c        |  721 +++++++++++++++++++--
- drivers/infiniband/hw/qib/qib_knx.h        |   13 +-
- drivers/infiniband/hw/qib/qib_knx_common.h |  126 ++++
- drivers/infiniband/hw/qib/qib_knx_sdma.h   |  105 ---
- drivers/infiniband/hw/qib/qib_knx_tidrcv.h |   48 --
- drivers/infiniband/hw/qib/qib_mad.c        |    3 +-
- drivers/infiniband/hw/qib/qib_pcie.c       |   21 +-
- drivers/infiniband/hw/qib/qib_qp.c         |    6 +-
- drivers/infiniband/hw/qib/qib_sdma.c       |   11 +-
- drivers/infiniband/hw/qib/qib_snoop.c      |  970 ++++++++++++++++++++++++++++
- drivers/infiniband/hw/qib/qib_user_sdma.c  |  296 +++++----
- drivers/infiniband/hw/qib/qib_user_sdma.h  |  105 +++-
- drivers/infiniband/hw/qib/qib_verbs.c      |  116 ++++-
- 21 files changed, 2831 insertions(+), 545 deletions(-)
- create mode 100644 drivers/infiniband/hw/qib/qib_knx_common.h
- delete mode 100644 drivers/infiniband/hw/qib/qib_knx_sdma.h
- delete mode 100644 drivers/infiniband/hw/qib/qib_knx_tidrcv.h
- create mode 100644 drivers/infiniband/hw/qib/qib_snoop.c
-
-diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile
-index ba2a49d..047d191 100644
---- a/drivers/infiniband/hw/qib/Makefile
-+++ b/drivers/infiniband/hw/qib/Makefile
-@@ -6,7 +6,7 @@ ib_qib-y := qib_cq.o qib_diag.o qib_dma.o qib_driver.o qib_eeprom.o \
-       qib_qp.o qib_qsfp.o qib_rc.o qib_ruc.o qib_sdma.o qib_srq.o \
-       qib_sysfs.o qib_twsi.o qib_tx.o qib_uc.o qib_ud.o \
-       qib_user_pages.o qib_user_sdma.o qib_verbs_mcast.o qib_iba7220.o \
--      qib_sd7220.o qib_iba7322.o qib_verbs.o
-+      qib_sd7220.o qib_iba7322.o qib_snoop.o qib_verbs.o
- # 6120 has no fallback if no MSI interrupts, others can do INTx
- ib_qib-$(CONFIG_PCI_MSI) += qib_iba6120.o
-diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
-index ad87abd..e34b0f7 100644
---- a/drivers/infiniband/hw/qib/qib.h
-+++ b/drivers/infiniband/hw/qib/qib.h
-@@ -52,6 +52,7 @@
- #include <linux/kref.h>
- #include <linux/sched.h>
- #include <linux/kthread.h>
-+#include <linux/moduleparam.h>
- #include "qib_common.h"
- #include "qib_verbs.h"
-@@ -247,6 +248,10 @@ struct qib_ctxtdata {
-       u32 lookaside_qpn;
-       /* QPs waiting for context processing */
-       struct list_head qp_wait_list;
-+#ifdef QIB_CONFIG_KNX
-+      /* KNX Receive Context Data */
-+      struct qib_knx_ctxt *krcd;
-+#endif
- #ifdef CONFIG_DEBUG_FS
-       /* verbs stats per CTX */
-       struct qib_opcode_stats_perctx *opstats;
-@@ -546,6 +551,11 @@ struct xmit_wait {
-  * clarifies things a bit. Note that to conform to IB conventions,
-  * port-numbers are one-based. The first or only port is port1.
-  */
-+#define QIB_CHAR_DEVICES_PER_PORT     2
-+/* Extract packet length from LRH header */
-+#define QIB_GET_PKT_LEN(x)    (((be16_to_cpu((x)->lrh[2]) & 0x7FF)) << 2)
-+#define QIB_SNOOP_DEV_INDEX   0
-+#define QIB_CAPTURE_DEV_INDEX 1
- struct qib_pportdata {
-       struct qib_ibport ibport_data;
-@@ -656,6 +666,7 @@ struct qib_pportdata {
-       u8 link_speed_active;
-       u8 vls_supported;
-       u8 vls_operational;
-+      u8 n_krcv_queues;
-       /* Rx Polarity inversion (compensate for ~tx on partner) */
-       u8 rx_pol_inv;
-@@ -675,6 +686,22 @@ struct qib_pportdata {
-       struct xmit_wait cong_stats;
-       struct timer_list symerr_clear_timer;
-+      /* snoop/capture related fields */
-+      unsigned int mode_flag;
-+      void *filter_value;
-+      int (*filter_callback)(void *hdr, void *data, void *value);
-+      /* lock while sending packet out */
-+      spinlock_t snoop_write_lock;
-+      struct qib_aux_device {
-+              struct cdev *snoop_cdev;
-+              struct device *snoop_class_dev;
-+              /* snooping lock */
-+              spinlock_t snoop_lock;
-+              struct list_head snoop_queue;
-+              wait_queue_head_t snoop_waitq;
-+              struct qib_pportdata *pport;
-+      } sc_device[QIB_CHAR_DEVICES_PER_PORT];
-+
-       /* Synchronize access between driver writes and sysfs reads */
-       spinlock_t cc_shadow_lock
-               ____cacheline_aligned_in_smp;
-@@ -755,14 +782,14 @@ struct qib_devdata {
-       /* mem-mapped base of chip regs plus offset of the SendBufAvail0
-        * register
--       */
-+       */
-       u64 sendbufavail0;
-       /* end of mem-mapped chip space excluding sendbuf and user regs */
-       u64 __iomem *kregend;
-       /* physical address of chip for io_remap, etc. */
-       resource_size_t physaddr;
--      /* qib_cfgctxts pointers */
-+      /* cfgctxts pointers */
-       struct qib_ctxtdata **rcd; /* Receive Context Data */
-       /* qib_pportdata, points to array of (physical) port-specific
-@@ -1079,7 +1106,6 @@ struct qib_devdata {
-       u8 num_pports;
-       /* Lowest context number which can be used by user processes */
-       u8 first_user_ctxt;
--      u8 n_krcv_queues;
-       u8 qpn_mask;
-       u8 skip_kctxt_mask;
-@@ -1126,13 +1152,119 @@ struct qib_devdata {
-       int assigned_node_id; /* NUMA node closest to HCA */
- #ifdef QIB_CONFIG_KNX
--      /* peer node id of connected KNX node */
--      u16 node_id;
--      struct qib_knx *knx;
-+      /* number of KNx nodes using this device */
-+      u16 num_knx;
- #endif
-+};
-+enum qib_mod_param_t {
-+      qib_mod_param_drv,
-+      qib_mod_param_unit,
-+      qib_mod_param_port
- };
-+typedef int (*param_set_func_t)(struct qib_devdata *, u8, u64);
-+
-+struct qib_mod_param {
-+      const char *name;
-+      enum qib_mod_param_t type;
-+      param_set_func_t func;
-+      ulong dflt;
-+      struct list_head list;
-+      struct list_head pport;
-+};
-+
-+extern int qib_set_mod_param(const char *, struct kernel_param *);
-+extern int qib_get_mod_param(char *, struct kernel_param *);
-+extern u64 qib_read_mod_param(struct qib_mod_param *, u16, u8);
-+extern void qib_clean_mod_param(void);
-+
-+#define MAX_QIB_PARAM_LEN 128
-+/**
-+ * QIB_MODPARAM_GLOBAL - define a global module parameter
-+ * @N: name of the module parameter
-+ *
-+ * Define a global module parameter for use in multiple files.
-+ */
-+#define QIB_MODPARAM_GLOBAL(N) \
-+extern struct qib_mod_param qmp_##N
-+/**
-+ * QIB_MODPARAM_DRV - define a driver-scope module parameter
-+ * @N: name of the module parameter
-+ * @D: default value
-+ * @P: visibility in sysfs
-+ * @S: description
-+ *
-+ * Define a driver-scope (global to the driver instance) module
-+ * parameter.
-+ */
-+#define QIB_MODPARAM_DRV(N, D, P, S)                            \
-+      struct qib_mod_param qmp_##N = {                          \
-+              .name = __stringify(N),                           \
-+              .type = qib_mod_param_drv,                        \
-+              .dflt = (ulong)D,                                 \
-+              .pport = { NULL, NULL }                           \
-+      };                                                        \
-+      module_param_named(N, qmp_##N.dflt, ulong, P);            \
-+      MODULE_PARM_DESC(N, S " (dflt: " __stringify(D) ")")
-+/**
-+ * QIB_MODPARAM_UNIT - define a unit-scope module parameter
-+ * @N: name of the module parameter
-+ * @F: callback function for dynamic value settings
-+ * @D: default value
-+ * @P: visibility in sysfs
-+ * @D: description
-+ *
-+ * Define a unit-scope module parameter. Unit-scope module
-+ * parameters allows specifying individual values for each of the
-+ * QIB units.
-+ */
-+#define QIB_MODPARAM_UNIT(N, F, D, P, S)                         \
-+      struct qib_mod_param qmp_##N = {                           \
-+              .name = __stringify(N),                            \
-+              .func = ((P) & S_IWUGO ? F : NULL),                \
-+              .type = qib_mod_param_unit,                        \
-+              .dflt = (ulong)D,                                  \
-+              .pport = { NULL, NULL }                            \
-+      };                                                         \
-+      module_param_call(N, qib_set_mod_param, qib_get_mod_param, \
-+                        &qmp_##N, (P));                          \
-+      MODULE_PARM_DESC(N, S " (dflt: " __stringify(D) ")")
-+/**
-+ * QIB_MODPARAM_PORT - define a port-scope module parameter
-+ * @N: name of the module parameter
-+ * @F: callback function for dynamic value settings
-+ * @D: default value
-+ * @P: visibility in sysfs
-+ * @D: description
-+ *
-+ * Define a port-scope module parameter. Port-scope module
-+ * parameters allow specifying individual values foe each of the
-+ * ports on any of the QIB units.
-+ */
-+#define QIB_MODPARAM_PORT(N, F, D, P, S)                         \
-+      struct qib_mod_param qmp_##N = {                           \
-+              .name = __stringify(N),                            \
-+              .func = ((P) & S_IWUGO ? F : NULL),                \
-+              .type = qib_mod_param_port,                        \
-+              .dflt = (ulong)D,                                  \
-+              .pport = { NULL, NULL }                            \
-+      };                                                         \
-+      module_param_call(N, qib_set_mod_param, qib_get_mod_param, \
-+                        &qmp_##N, (P));                          \
-+      MODULE_PARM_DESC(N, S " (dflt: " __stringify(D) ")")
-+/**
-+ * QIB_MODPARAM_GET - retrieve a module parameter value
-+ * @N: name of the module parameter
-+ * @U: unit number
-+ * @P: port number
-+ *
-+ * Get the value for the specific unit/port. The macro will return
-+ * the correct value regardless of a specific value for the
-+ * specified unit/port is present or the default should be used.
-+ */
-+#define QIB_MODPARAM_GET(N, U, P) qib_read_mod_param(&qmp_##N, U, P)
-+
- /* hol_state values */
- #define QIB_HOL_UP       0
- #define QIB_HOL_INIT     1
-@@ -1165,12 +1297,14 @@ struct qib_filedata {
- };
- extern struct list_head qib_dev_list;
-+extern struct list_head qib_mod_param_list;
- extern spinlock_t qib_devs_lock;
- extern struct qib_devdata *qib_lookup(int unit);
- extern u32 qib_cpulist_count;
- extern unsigned long *qib_cpulist;
- extern unsigned qib_wc_pat;
-+extern unsigned int snoop_enable;
- extern unsigned qib_cc_table_size;
- int qib_init(struct qib_devdata *, int);
- int init_chip_wc_pat(struct qib_devdata *dd, u32);
-@@ -1230,6 +1364,24 @@ void qib_hol_event(unsigned long);
- void qib_disable_after_error(struct qib_devdata *);
- int qib_set_uevent_bits(struct qib_pportdata *, const int);
-+#define QIB_PORT_SNOOP_MODE   1U
-+#define QIB_PORT_CAPTURE_MODE 2U
-+
-+struct snoop_packet {
-+      struct list_head list;
-+      u32 total_len;
-+      u8 data[];
-+};
-+
-+int qib_snoop_add(struct qib_devdata *);
-+void qib_snoop_remove(struct qib_devdata *);
-+int qib_snoop_rcv_queue_packet(struct qib_pportdata *, void *,
-+                              void *, u32);
-+void qib_snoop_send_queue_packet(struct qib_pportdata *,
-+                              struct snoop_packet *);
-+int snoop_get_header_size(struct qib_devdata *, struct qib_ib_header *,
-+                              void *, u32);
-+
- /* for use in system calls, where we want to know device type, etc. */
- #define ctxt_fp(fp) \
-       (((struct qib_filedata *)(fp)->private_data)->rcd)
-@@ -1367,7 +1519,7 @@ void qib_sdma_intr(struct qib_pportdata *);
- void qib_user_sdma_send_desc(struct qib_pportdata *dd,
-                       struct list_head *pktlist);
- int qib_sdma_verbs_send(struct qib_pportdata *, struct qib_sge_state *,
--                      u32, struct qib_verbs_txreq *);
-+                      u32, struct qib_verbs_txreq *, struct snoop_packet *);
- /* ppd->sdma_lock should be locked before calling this. */
- int qib_sdma_make_progress(struct qib_pportdata *dd);
-@@ -1505,9 +1657,9 @@ const char *qib_get_unit_name(int unit);
- #endif
- /* global module parameter variables */
--extern unsigned qib_ibmtu;
--extern ushort qib_cfgctxts;
--extern ushort qib_num_cfg_vls;
-+QIB_MODPARAM_GLOBAL(ibmtu);
-+QIB_MODPARAM_GLOBAL(cfgctxts);
-+QIB_MODPARAM_GLOBAL(krcvqs);
- extern ushort qib_mini_init; /* If set, do few (ideally 0) writes to chip */
- extern unsigned qib_n_krcv_queues;
- extern unsigned qib_sdma_fetch_arb;
-diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
-index 5bee08f..e5fb836 100644
---- a/drivers/infiniband/hw/qib/qib_driver.c
-+++ b/drivers/infiniband/hw/qib/qib_driver.c
-@@ -43,6 +43,9 @@
- #include "qib.h"
-+#undef pr_fmt
-+#define pr_fmt(fmt) QIB_DRV_NAME " " fmt
-+
- /*
-  * The size has to be longer than this string, so we can append
-  * board/chip information to it in the init code.
-@@ -51,11 +54,21 @@ const char ib_qib_version[] = QIB_DRIVER_VERSION "\n";
- DEFINE_SPINLOCK(qib_devs_lock);
- LIST_HEAD(qib_dev_list);
-+LIST_HEAD(qib_mod_param_list);
- DEFINE_MUTEX(qib_mutex);      /* general driver use */
--unsigned qib_ibmtu;
--module_param_named(ibmtu, qib_ibmtu, uint, S_IRUGO);
--MODULE_PARM_DESC(ibmtu, "Set max IB MTU (0=2KB, 1=256, 2=512, ... 5=4096");
-+/* Per-unit/port module parameter value structure
-+ * linked to the qib_mod_param structure - one per
-+ * unit/port */
-+struct qib_mod_param_pport {
-+      struct list_head list;
-+      u16 unit;
-+      u8 port;
-+      u64 value;
-+};
-+
-+QIB_MODPARAM_PORT(ibmtu, NULL, 5, S_IRUGO,
-+                "Set max IB MTU (0=2KB, 1=256, 2=512, ... 5=4096");
- unsigned qib_compat_ddr_negotiate = 1;
- module_param_named(compat_ddr_negotiate, qib_compat_ddr_negotiate, uint,
-@@ -90,6 +103,178 @@ const char *qib_get_unit_name(int unit)
-       return iname;
- }
-+int qib_set_mod_param(const char *str, struct kernel_param *kp)
-+{
-+      char *next = (char *)str, *tmp;
-+      unsigned long val = 0, dft;
-+      u32 unit = 0, port = 0;
-+      struct qib_mod_param *param =
-+              (struct qib_mod_param *)kp->arg;
-+      struct qib_mod_param_pport *pport, *p;
-+      int ret = 0;
-+
-+      if (strlen(str) >= MAX_QIB_PARAM_LEN) {
-+              pr_warn("parameter value too long\n");
-+              ret = -ENOSPC;
-+              goto done;
-+      }
-+
-+      /* qib_dev_list will be empty only when the driver is initially
-+       * loading. */
-+      if (list_empty(&qib_dev_list) || !param->pport.next)
-+              INIT_LIST_HEAD(&param->pport);
-+      tmp = next;
-+      dft = simple_strtoul(tmp, &next, 0);
-+      if (next == tmp) {
-+              pr_warn("invalid parameter value\n");
-+              ret =  -EINVAL;
-+              goto done;
-+      }
-+      /* clear any previously added port entries */
-+      list_for_each_entry_safe(pport, p, &param->pport, list) {
-+              list_del(&pport->list);
-+              kfree(pport);
-+      }
-+      if (!*next || *next == '\n' || *next == ',')
-+              param->dflt = dft;
-+      else if (*next && *next == ':')
-+              /* no default, rewind the string */
-+              next = tmp;
-+      else
-+              pr_warn("invalid parameter value\n");
-+      while (*next && next[1]) {
-+              if (*next == ',')
-+                      tmp = ++next;
-+              unit = simple_strtoul(tmp, &next, 0);
-+              if (param->type == qib_mod_param_port) {
-+                      if (next == tmp || !*next || *next != ':') {
-+                              pr_warn("Invalid unit:port argument at \"%s\".\n",
-+                                      tmp);
-+                              while (*next && *next++ != ',')
-+                                      ;
-+                              tmp = next;
-+                              continue;
-+                      }
-+                      tmp = ++next;
-+                      port = simple_strtoul(tmp, &next, 0);
-+                      if (!port) {
-+                              /* port numbers start at 1, 0 is invalid */
-+                              pr_warn("Invalid argument at \"%s\". Port numbers start at 1.\n",
-+                                      tmp);
-+                              while (*next && *next++ != ',')
-+                                      ;
-+                              tmp = next;
-+                              continue;
-+                      }
-+              }
-+              if (next == tmp || *next != '=') {
-+                      pr_warn("Invalid %s argument at \"%s\".\n",
-+                              (param->type == qib_mod_param_port ?
-+                              "port" : "unit"), tmp);
-+                      while (*next && *next++ != ',')
-+                              ;
-+                      tmp = next;
-+                      continue;
-+              }
-+              tmp = ++next;
-+              val = simple_strtoul(tmp, &next, 0);
-+              if (next == tmp) {
-+                      pr_warn("Invalid value string at \"%s\"\n", tmp);
-+                      while (*next && *next++ != ',')
-+                              ;
-+                      tmp = next;
-+                      continue;
-+              }
-+              pport = kzalloc(sizeof(struct qib_mod_param_pport),
-+                              GFP_KERNEL);
-+              if (!pport) {
-+                      pr_err("no memory for module parameter.\n");
-+                      ret =  -ENOMEM;
-+                      goto done;
-+              }
-+              pport->unit = unit;
-+              pport->port = port;
-+              pport->value = val;
-+              list_add_tail(&pport->list, &param->pport);
-+              if (!*next || *next == '\n')
-+                      break;
-+              tmp = ++next;
-+      }
-+      /* add parameter to list so it can be cleaned up */
-+      if (!param->list.next)
-+              list_add(&param->list, &qib_mod_param_list);
-+
-+      if (param->func && qib_count_units(NULL, NULL)) {
-+              struct qib_devdata *dd;
-+              list_for_each_entry(pport, &param->pport, list) {
-+                      param_set_func_t setfunc = param->func;
-+                      list_for_each_entry(dd, &qib_dev_list, list)
-+                              if (dd->unit == pport->unit)
-+                                      break;
-+                      if (!setfunc(dd, pport->port, pport->value))
-+                              pr_err("Error setting module parameter %s for IB%u:%u",
-+                                     param->name,
-+                                     pport->unit,
-+                                     pport->port);
-+              }
-+      }
-+done:
-+      return ret;
-+}
-+
-+int qib_get_mod_param(char *buffer, struct kernel_param *kp)
-+{
-+      struct qib_mod_param *param =
-+              (struct qib_mod_param *)kp->arg;
-+      struct qib_mod_param_pport *pport;
-+      char *p = buffer;
-+      int s = 0;
-+
-+      s = scnprintf(p, PAGE_SIZE, "%lu", param->dflt);
-+      p += s;
-+
-+      if (param->pport.next)
-+              list_for_each_entry(pport, &param->pport, list) {
-+                      *p++ = ',';
-+                      if (param->type == qib_mod_param_unit)
-+                              s = scnprintf(p, PAGE_SIZE, "%u=%llu",
-+                                            pport->unit, pport->value);
-+                      else if (param->type == qib_mod_param_port)
-+                              s = scnprintf(p, PAGE_SIZE, "%u:%u=%llu",
-+                                            pport->unit, pport->port,
-+                                            pport->value);
-+                      p += s;
-+              }
-+      return strlen(buffer);
-+}
-+
-+u64 qib_read_mod_param(struct qib_mod_param *param, u16 unit, u8 port)
-+{
-+      struct qib_mod_param_pport *pport;
-+      u64 ret = param->dflt;
-+
-+      if (param->type != qib_mod_param_drv)
-+              if (param->pport.next && !list_empty(&param->pport))
-+                      list_for_each_entry(pport, &param->pport, list)
-+                              if (pport->unit == unit &&
-+                                  pport->port == port)
-+                                      ret = pport->value;
-+      return ret;
-+}
-+
-+void qib_clean_mod_param(void)
-+{
-+      struct qib_mod_param *p;
-+      struct qib_mod_param_pport *pp, *pps;
-+
-+      list_for_each_entry(p, &qib_mod_param_list, list) {
-+              list_for_each_entry_safe(pp, pps, &p->pport, list) {
-+                      list_del(&pp->list);
-+                      kfree(pp);
-+              }
-+      }
-+}
-+
- /*
-  * Return count of units with at least one port ACTIVE.
-  */
-@@ -456,6 +641,8 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
-       int last;
-       u64 lval;
-       struct qib_qp *qp, *nqp;
-+      struct snoop_packet *packet = NULL;
-+      u32 hdr_len = 0;
-       l = rcd->head;
-       rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset;
-@@ -478,6 +665,25 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
-               /* total length */
-               tlen = qib_hdrget_length_in_bytes(rhf_addr);
-               ebuf = NULL;
-+              /* applicable only for capture */
-+              if (unlikely(ppd->mode_flag & QIB_PORT_CAPTURE_MODE)) {
-+                      int nomatch = 0;
-+                      /* We want to filter packet before copying it */
-+                      if (ppd->filter_callback)
-+                              nomatch = ppd->filter_callback(hdr, ebuf,
-+                                      ppd->filter_value);
-+                      if (nomatch == 0) {
-+                              packet = kzalloc(sizeof(*packet) + tlen,
-+                                              GFP_ATOMIC);
-+                              if (packet) {
-+                                      /* copy header first */
-+                                      packet->total_len = tlen;
-+                                      INIT_LIST_HEAD(&packet->list);
-+                                      hdr_len = (u8 *)rhf_addr - (u8 *)hdr;
-+                                      memcpy(packet->data, hdr, hdr_len);
-+                              }
-+                      }
-+              }
-               if ((dd->flags & QIB_NODMA_RTAIL) ?
-                   qib_hdrget_use_egr_buf(rhf_addr) :
-                   (etype != RCVHQ_RCV_TYPE_EXPECTED)) {
-@@ -512,6 +718,10 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
-                       crcs += qib_rcv_hdrerr(rcd, ppd, rcd->ctxt, eflags, l,
-                                              etail, rhf_addr, hdr);
-               else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
-+                      /* copy packet data */
-+                      if (ebuf && packet)
-+                              memcpy((packet->data + hdr_len), ebuf,
-+                                      (tlen - hdr_len));
-                       qib_ib_rcv(rcd, hdr, ebuf, tlen);
-                       if (crcs)
-                               crcs--;
-@@ -519,6 +729,10 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
-                               --*llic;
-               }
- move_along:
-+              if (packet) {
-+                      qib_snoop_send_queue_packet(ppd, packet);
-+                      packet = NULL;
-+              }
-               l += rsize;
-               if (l >= maxcnt)
-                       l = 0;
-@@ -619,7 +833,8 @@ int qib_set_mtu(struct qib_pportdata *ppd, u16 arg)
-               ret = -EINVAL;
-               goto bail;
-       }
--      chk = ib_mtu_enum_to_int(qib_ibmtu);
-+      chk = ib_mtu_enum_to_int(
-+              QIB_MODPARAM_GET(ibmtu, ppd->dd->unit, ppd->port));
-       if (chk > 0 && arg > chk) {
-               ret = -EINVAL;
-               goto bail;
-diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
-index 6eebad0..376961d 100644
---- a/drivers/infiniband/hw/qib/qib_file_ops.c
-+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
-@@ -95,6 +95,9 @@ static ssize_t qib_aio_write(struct kiocb *, const struct iovec *,
-                            unsigned long, loff_t);
- static unsigned int qib_poll(struct file *, struct poll_table_struct *);
- static int qib_mmapf(struct file *, struct vm_area_struct *);
-+static int subctxt_search_ctxts(struct qib_devdata *, struct file *,
-+                              const struct qib_user_info *);
-+
- static const struct file_operations qib_file_ops = {
-       .owner = THIS_MODULE,
-@@ -1547,6 +1550,14 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
-       rcd = qib_create_ctxtdata(ppd, ctxt, numa_id);
-+#ifdef QIB_CONFIG_KNX
-+      if (uinfo->spu_knx_node_id)
-+              /*
-+               * Skip allocation of page pointer list for TID
-+               * receives. This will be done on the KNX.
-+               */
-+              goto no_page_list;
-+#endif
-       /*
-        * Allocate memory for use in qib_tid_update() at open to
-        * reduce cost of expected send setup per message segment
-@@ -1562,6 +1573,9 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
-               ret = -ENOMEM;
-               goto bailerr;
-       }
-+#ifdef QIB_CONFIG_KNX
-+no_page_list:
-+#endif
-       rcd->userversion = uinfo->spu_userversion;
-       ret = init_subctxts(dd, rcd, uinfo);
-@@ -1720,52 +1734,66 @@ done:
- static int find_shared_ctxt(struct file *fp,
-                           const struct qib_user_info *uinfo)
- {
--      int devmax, ndev, i;
-+      int devmax, ndev;
-       int ret = 0;
-+      struct qib_devdata *dd;
-+#ifdef QIB_CONFIG_KNX
-+      /*
-+       * In the case we are allocating a context for a KNX process,
-+       * Don't loop over all devices but use the one assosiated with the
-+       * requesting KNX.
-+       */
-+      if (uinfo->spu_knx_node_id) {
-+              dd = qib_knx_node_to_dd(uinfo->spu_knx_node_id);
-+              if (dd && dd->num_knx)
-+                      ret = subctxt_search_ctxts(dd, fp, uinfo);
-+              goto done;
-+      }
-+#endif
-       devmax = qib_count_units(NULL, NULL);
-       for (ndev = 0; ndev < devmax; ndev++) {
--              struct qib_devdata *dd = qib_lookup(ndev);
--#ifdef QIB_CONFIG_KNX
--              /*
--               * In the case we are allocating a context for a KNX process,
--               * reject any device that is not associated with the
--               * requesting KNX.
--               */
--              if ((uinfo->spu_knx_node_id &&
--                   dd->node_id != uinfo->spu_knx_node_id))
--                      continue;
--#endif
-+              dd = qib_lookup(ndev);
-               /* device portion of usable() */
-               if (!(dd && (dd->flags & QIB_PRESENT) && dd->kregbase))
-                       continue;
--              for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) {
--                      struct qib_ctxtdata *rcd = dd->rcd[i];
-+              ret = subctxt_search_ctxts(dd, fp, uinfo);
-+              if (ret)
-+                      break;
-+      }
-+done:
-+      return ret;
-+}
--                      /* Skip ctxts which are not yet open */
--                      if (!rcd || !rcd->cnt)
--                              continue;
--                      /* Skip ctxt if it doesn't match the requested one */
--                      if (rcd->subctxt_id != uinfo->spu_subctxt_id)
--                              continue;
--                      /* Verify the sharing process matches the master */
--                      if (rcd->subctxt_cnt != uinfo->spu_subctxt_cnt ||
--                          rcd->userversion != uinfo->spu_userversion ||
--                          rcd->cnt >= rcd->subctxt_cnt) {
--                              ret = -EINVAL;
--                              goto done;
--                      }
--                      ctxt_fp(fp) = rcd;
--                      subctxt_fp(fp) = rcd->cnt++;
--                      rcd->subpid[subctxt_fp(fp)] = current->pid;
--                      tidcursor_fp(fp) = 0;
--                      rcd->active_slaves |= 1 << subctxt_fp(fp);
--                      ret = 1;
-+static int subctxt_search_ctxts(struct qib_devdata *dd, struct file *fp,
-+                              const struct qib_user_info *uinfo)
-+{
-+      int ret = 0, i;
-+      for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) {
-+              struct qib_ctxtdata *rcd = dd->rcd[i];
-+
-+              /* Skip ctxts which are not yet open */
-+              if (!rcd || !rcd->cnt)
-+                      continue;
-+              /* Skip ctxt if it doesn't match the requested one */
-+              if (rcd->subctxt_id != uinfo->spu_subctxt_id)
-+                      continue;
-+              /* Verify the sharing process matches the master */
-+              if (rcd->subctxt_cnt != uinfo->spu_subctxt_cnt ||
-+                  rcd->userversion != uinfo->spu_userversion ||
-+                  rcd->cnt >= rcd->subctxt_cnt) {
-+                      ret = -EINVAL;
-                       goto done;
-               }
-+              ctxt_fp(fp) = rcd;
-+              subctxt_fp(fp) = rcd->cnt++;
-+              rcd->subpid[subctxt_fp(fp)] = current->pid;
-+              tidcursor_fp(fp) = 0;
-+              rcd->active_slaves |= 1 << subctxt_fp(fp);
-+              ret = 1;
-+              break;
-       }
--
- done:
-       return ret;
- }
-@@ -1856,6 +1884,10 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
-       if (swminor >= 11 && uinfo->spu_port_alg < QIB_PORT_ALG_COUNT)
-               alg = uinfo->spu_port_alg;
-+      if (swminor <= 11) {
-+              qib_pio_avail_bits = 1;
-+              qib_rcvhdrpoll = 1;
-+      }
- #ifdef QIB_CONFIG_KNX
-       /* Make sure we have a connection to the KNX module on the right node */
-@@ -1871,13 +1903,38 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
-           uinfo->spu_subctxt_cnt) {
-               ret = find_shared_ctxt(fp, uinfo);
-               if (ret > 0) {
--                      ret = do_qib_user_sdma_queue_create(fp);
-+#ifdef QIB_CONFIG_KNX
-+                      if (uinfo->spu_knx_node_id) {
-+                              ret = qib_knx_sdma_queue_create(fp);
-+                      } else
-+#endif
-+                              ret = do_qib_user_sdma_queue_create(fp);
-                       if (!ret)
-                               assign_ctxt_affinity(fp, (ctxt_fp(fp))->dd);
-                       goto done_ok;
-               }
-       }
-+#ifdef QIB_CONFIG_KNX
-+      /*
-+       * If there is a KNX node set, we pick the device that is
-+       * associate with that KNX node
-+       */
-+      if (uinfo->spu_knx_node_id) {
-+              struct qib_devdata *dd =
-+                      qib_knx_node_to_dd(uinfo->spu_knx_node_id);
-+              if (dd) {
-+                      ret = find_free_ctxt(dd->unit, fp, uinfo);
-+                      if (!ret)
-+                              ret = qib_knx_alloc_ctxt(
-+                                      uinfo->spu_knx_node_id,
-+                                      ctxt_fp(fp)->ctxt);
-+              } else
-+                      ret = -ENXIO;
-+              goto done_chk_sdma;
-+      }
-+
-+#endif
-       i_minor = iminor(file_inode(fp)) - QIB_USER_MINOR_BASE;
-       if (i_minor)
-               ret = find_free_ctxt(i_minor - 1, fp, uinfo);
-@@ -1886,25 +1943,6 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
-               const unsigned int cpu = cpumask_first(&current->cpus_allowed);
-               const unsigned int weight =
-                       cpumask_weight(&current->cpus_allowed);
--#ifdef QIB_CONFIG_KNX
--              /*
--               * If there is a KNX node set, we pick the device that is on
--               * the same NUMA node as the KNX.
--               */
--              if (uinfo->spu_knx_node_id) {
--                      struct qib_devdata *dd =
--                              qib_knx_node_to_dd(uinfo->spu_knx_node_id);
--                      if (dd) {
--                              ret = find_free_ctxt(dd->unit, fp, uinfo);
--                              if (!ret)
--                                      ret = qib_knx_alloc_ctxt(dd,
--                                                      ctxt_fp(fp)->ctxt);
--                      } else
--                              ret = -ENXIO;
--                      goto done_chk_sdma;
--              }
--#endif
--
-               if (weight == 1 && !test_bit(cpu, qib_cpulist))
-                       if (!find_hca(cpu, &unit) && unit >= 0)
-                               if (!find_free_ctxt(unit, fp, uinfo)) {
-@@ -1915,8 +1953,17 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
-       }
- done_chk_sdma:
--      if (!ret)
-+      if (!ret) {
-+#ifdef QIB_CONFIG_KNX
-+              if (uinfo->spu_knx_node_id) {
-+                      ret = qib_knx_sdma_queue_create(fp);
-+                      /*if (!ret)
-+                        ret = qib_knx_setup_tidrcv(fp);*/
-+                      goto done_ok;
-+              }
-+#endif
-               ret = do_qib_user_sdma_queue_create(fp);
-+      }
- done_ok:
- #ifdef QIB_CONFIG_KNX
-       knx_node_fp(fp) = uinfo->spu_knx_node_id;
-@@ -2145,6 +2192,13 @@ static int qib_close(struct inode *in, struct file *fp)
-       /* drain user sdma queue */
-       if (fd->pq) {
-+#ifdef QIB_CONFIG_KNX
-+              /*
-+               * The thread should be stopped first before attempting
-+               * to clean the queue.
-+               */
-+              qib_knx_sdma_queue_destroy(fd);
-+#endif
-               qib_user_sdma_queue_drain(rcd->ppd, fd->pq);
-               qib_user_sdma_queue_destroy(fd->pq);
-       }
-@@ -2737,4 +2791,6 @@ void qib_device_remove(struct qib_devdata *dd)
- {
-       qib_user_remove(dd);
-       qib_diag_remove(dd);
-+      if (snoop_enable)
-+              qib_snoop_remove(dd);
- }
-diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
-index 84e593d..9ab46ed 100644
---- a/drivers/infiniband/hw/qib/qib_iba6120.c
-+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
-@@ -2070,15 +2070,16 @@ qib_6120_get_msgheader(struct qib_devdata *dd, __le32 *rhf_addr)
- static void qib_6120_config_ctxts(struct qib_devdata *dd)
- {
-+      u32 nkrcvqs = QIB_MODPARAM_GET(krcvqs, dd->unit, 0);
-       dd->ctxtcnt = qib_read_kreg32(dd, kr_portcnt);
--      if (qib_n_krcv_queues > 1) {
--              dd->first_user_ctxt = qib_n_krcv_queues * dd->num_pports;
-+      if (nkrcvqs > 1) {
-+              dd->first_user_ctxt = nkrcvqs * dd->num_pports;
-               if (dd->first_user_ctxt > dd->ctxtcnt)
-                       dd->first_user_ctxt = dd->ctxtcnt;
-               dd->qpn_mask = dd->first_user_ctxt <= 2 ? 2 : 6;
-       } else
-               dd->first_user_ctxt = dd->num_pports;
--      dd->n_krcv_queues = dd->first_user_ctxt;
-+      dd->pport[0].n_krcv_queues = dd->first_user_ctxt;
- }
- static void qib_update_6120_usrhead(struct qib_ctxtdata *rcd, u64 hd,
-@@ -3133,7 +3134,7 @@ static void get_6120_chip_params(struct qib_devdata *dd)
-       dd->piosize2k = val & ~0U;
-       dd->piosize4k = val >> 32;
--      mtu = ib_mtu_enum_to_int(qib_ibmtu);
-+      mtu = ib_mtu_enum_to_int(QIB_MODPARAM_GET(ibmtu, dd->unit, 1));
-       if (mtu == -1)
-               mtu = QIB_DEFAULT_MTU;
-       dd->pport->ibmtu = (u32)mtu;
-@@ -3282,7 +3283,7 @@ static int init_6120_variables(struct qib_devdata *dd)
-       dd->rhf_offset = 0;
-       /* we always allocate at least 2048 bytes for eager buffers */
--      ret = ib_mtu_enum_to_int(qib_ibmtu);
-+      ret = ib_mtu_enum_to_int(QIB_MODPARAM_GET(ibmtu, dd->unit, 1));
-       dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU;
-       BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
-       dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
-@@ -3322,7 +3323,6 @@ static int init_6120_variables(struct qib_devdata *dd)
-       if (qib_mini_init)
-               goto bail;
--      qib_num_cfg_vls = 1; /* if any 6120's, only one VL */
-       ret = qib_create_ctxts(dd);
-       init_6120_cntrnames(dd);
-diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
-index 454c2e7..19ad170 100644
---- a/drivers/infiniband/hw/qib/qib_iba7220.c
-+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
-@@ -2299,19 +2299,21 @@ static void qib_7220_config_ctxts(struct qib_devdata *dd)
- {
-       unsigned long flags;
-       u32 nchipctxts;
-+      u32 cfgctxts = QIB_MODPARAM_GET(cfgctxts, dd->unit, 0);
-+      u32 nkrcvqs = QIB_MODPARAM_GET(krcvqs, dd->unit, 0);
-       nchipctxts = qib_read_kreg32(dd, kr_portcnt);
-       dd->cspec->numctxts = nchipctxts;
--      if (qib_n_krcv_queues > 1) {
-+      if (nkrcvqs > 1) {
-               dd->qpn_mask = 0x3e;
--              dd->first_user_ctxt = qib_n_krcv_queues * dd->num_pports;
-+              dd->first_user_ctxt = nkrcvqs * dd->num_pports;
-               if (dd->first_user_ctxt > nchipctxts)
-                       dd->first_user_ctxt = nchipctxts;
-       } else
-               dd->first_user_ctxt = dd->num_pports;
--      dd->n_krcv_queues = dd->first_user_ctxt;
-+      dd->pport[0].n_krcv_queues = dd->first_user_ctxt;
--      if (!qib_cfgctxts) {
-+      if (!cfgctxts) {
-               int nctxts = dd->first_user_ctxt + num_online_cpus();
-               if (nctxts <= 5)
-@@ -2320,8 +2322,8 @@ static void qib_7220_config_ctxts(struct qib_devdata *dd)
-                       dd->ctxtcnt = 9;
-               else if (nctxts <= nchipctxts)
-                       dd->ctxtcnt = nchipctxts;
--      } else if (qib_cfgctxts <= nchipctxts)
--              dd->ctxtcnt = qib_cfgctxts;
-+      } else if (cfgctxts <= nchipctxts)
-+              dd->ctxtcnt = cfgctxts;
-       if (!dd->ctxtcnt) /* none of the above, set to max */
-               dd->ctxtcnt = nchipctxts;
-@@ -3846,7 +3848,7 @@ static void get_7220_chip_params(struct qib_devdata *dd)
-       dd->piosize2k = val & ~0U;
-       dd->piosize4k = val >> 32;
--      mtu = ib_mtu_enum_to_int(qib_ibmtu);
-+      mtu = ib_mtu_enum_to_int(QIB_MODPARAM_GET(ibmtu, dd->unit, 1));
-       if (mtu == -1)
-               mtu = QIB_DEFAULT_MTU;
-       dd->pport->ibmtu = (u32)mtu;
-@@ -4084,15 +4086,13 @@ static int qib_init_7220_variables(struct qib_devdata *dd)
-       ppd->cpspec->chase_timer.function = reenable_7220_chase;
-       ppd->cpspec->chase_timer.data = (unsigned long)ppd;
--      qib_num_cfg_vls = 1; /* if any 7220's, only one VL */
--
-       dd->rcvhdrentsize = QIB_RCVHDR_ENTSIZE;
-       dd->rcvhdrsize = QIB_DFLT_RCVHDRSIZE;
-       dd->rhf_offset =
-               dd->rcvhdrentsize - sizeof(u64) / sizeof(u32);
-       /* we always allocate at least 2048 bytes for eager buffers */
--      ret = ib_mtu_enum_to_int(qib_ibmtu);
-+      ret = ib_mtu_enum_to_int(QIB_MODPARAM_GET(ibmtu, dd->unit, 1));
-       dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU;
-       BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
-       dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
-diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
-index 016e742..35fc492 100644
---- a/drivers/infiniband/hw/qib/qib_iba7322.c
-+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
-@@ -107,9 +107,8 @@ static const unsigned sdma_idle_cnt = 64;
-  * Number of VLs we are configured to use (to allow for more
-  * credits per vl, etc.)
-  */
--ushort qib_num_cfg_vls = 2;
--module_param_named(num_vls, qib_num_cfg_vls, ushort, S_IRUGO);
--MODULE_PARM_DESC(num_vls, "Set number of Virtual Lanes to use (1-8)");
-+static QIB_MODPARAM_PORT(num_vls, NULL, 2, S_IRUGO,
-+                       "Set number of Virtual Lanes to use (1-8)");
- static ushort qib_chase = 1;
- module_param_named(chase, qib_chase, ushort, S_IRUGO);
-@@ -120,9 +119,8 @@ module_param_named(long_attenuation, qib_long_atten, ushort, S_IRUGO);
- MODULE_PARM_DESC(long_attenuation, \
-                "attenuation cutoff (dB) for long copper cable setup");
--static ushort qib_singleport;
--module_param_named(singleport, qib_singleport, ushort, S_IRUGO);
--MODULE_PARM_DESC(singleport, "Use only IB port 1; more per-port buffer space");
-+static QIB_MODPARAM_UNIT(singleport, NULL, 0, S_IRUGO,
-+                       "Use only IB port 1; more per-port buffer space");
- static ushort qib_krcvq01_no_msi;
- module_param_named(krcvq01_no_msi, qib_krcvq01_no_msi, ushort, S_IRUGO);
-@@ -2395,6 +2393,11 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd)
-       qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a);
-       qib_write_kreg(dd, kr_scratch, 0ULL);
-+      /* ensure previous Tx parameters are not still forced */
-+      qib_write_kreg_port(ppd, krp_tx_deemph_override,
-+              SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0,
-+              reset_tx_deemphasis_override));
-+
-       if (qib_compat_ddr_negotiate) {
-               ppd->cpspec->ibdeltainprog = 1;
-               ppd->cpspec->ibsymsnap = read_7322_creg32_port(ppd,
-@@ -3515,7 +3518,8 @@ try_intx:
-                       snprintf(dd->cspec->msix_entries[msixnum].name,
-                               sizeof(dd->cspec->msix_entries[msixnum].name)
-                                - 1,
--                              QIB_DRV_NAME "%d (kctx)", dd->unit);
-+                               QIB_DRV_NAME "%d:%d (kctx)", dd->unit,
-+                               ((struct qib_ctxtdata *)arg)->ppd->port);
-               }
-               ret = request_irq(
-                       dd->cspec->msix_entries[msixnum].msix.vector,
-@@ -3651,10 +3655,10 @@ static unsigned qib_7322_boardname(struct qib_devdata *dd)
-                dd->majrev, dd->minrev,
-                (unsigned)SYM_FIELD(dd->revision, Revision_R, SW));
--      if (qib_singleport && (features >> PORT_SPD_CAP_SHIFT) & PORT_SPD_CAP) {
--              qib_devinfo(dd->pcidev,
--                      "IB%u: Forced to single port mode by module parameter\n",
--                      dd->unit);
-+      if (QIB_MODPARAM_GET(singleport, dd->unit, 0) &&
-+          (features >> PORT_SPD_CAP_SHIFT) & PORT_SPD_CAP) {
-+              qib_devinfo(dd->pcidev, "IB%u: Forced to single port mode"
-+                          " by module param\n", dd->unit);
-               features &= PORT_SPD_CAP;
-       }
-@@ -3941,22 +3945,30 @@ qib_7322_get_msgheader(struct qib_devdata *dd, __le32 *rhf_addr)
- static void qib_7322_config_ctxts(struct qib_devdata *dd)
- {
-       unsigned long flags;
--      u32 nchipctxts;
-+      u32 nchipctxts, nkrcvqs;
-+      u32 cfgctxts = QIB_MODPARAM_GET(cfgctxts, dd->unit, 0);
-+      u8 pidx;
-       nchipctxts = qib_read_kreg32(dd, kr_contextcnt);
-       dd->cspec->numctxts = nchipctxts;
--      if (qib_n_krcv_queues > 1 && dd->num_pports) {
--              dd->first_user_ctxt = NUM_IB_PORTS +
--                      (qib_n_krcv_queues - 1) * dd->num_pports;
--              if (dd->first_user_ctxt > nchipctxts)
--                      dd->first_user_ctxt = nchipctxts;
--              dd->n_krcv_queues = dd->first_user_ctxt / dd->num_pports;
--      } else {
--              dd->first_user_ctxt = NUM_IB_PORTS;
--              dd->n_krcv_queues = 1;
-+      dd->first_user_ctxt = NUM_IB_PORTS;
-+
-+      for (pidx = 0; pidx < dd->num_pports; pidx++) {
-+              nkrcvqs = QIB_MODPARAM_GET(krcvqs, dd->unit, pidx+1);
-+              if (nkrcvqs > 1) {
-+                      if (nkrcvqs - 1 > nchipctxts - dd->first_user_ctxt)
-+                              dd->pport[pidx].n_krcv_queues =
-+                                      (nchipctxts - dd->first_user_ctxt) + 1;
-+                      else
-+                              dd->pport[pidx].n_krcv_queues = nkrcvqs;
-+                      dd->first_user_ctxt +=
-+                              dd->pport[pidx].n_krcv_queues - 1;
-+              } else
-+                      /* Account for the HW ctxt */
-+                      dd->pport[pidx].n_krcv_queues = 1;
-       }
--      if (!qib_cfgctxts) {
-+      if (!cfgctxts) {
-               int nctxts = dd->first_user_ctxt + num_online_cpus();
-               if (nctxts <= 6)
-@@ -3965,10 +3977,10 @@ static void qib_7322_config_ctxts(struct qib_devdata *dd)
-                       dd->ctxtcnt = 10;
-               else if (nctxts <= nchipctxts)
-                       dd->ctxtcnt = nchipctxts;
--      } else if (qib_cfgctxts < dd->num_pports)
-+      } else if (cfgctxts < dd->num_pports)
-               dd->ctxtcnt = dd->num_pports;
--      else if (qib_cfgctxts <= nchipctxts)
--              dd->ctxtcnt = qib_cfgctxts;
-+      else if (cfgctxts <= nchipctxts)
-+              dd->ctxtcnt = cfgctxts;
-       if (!dd->ctxtcnt) /* none of the above, set to max */
-               dd->ctxtcnt = nchipctxts;
-@@ -5799,7 +5811,6 @@ static void get_7322_chip_params(struct qib_devdata *dd)
- {
-       u64 val;
-       u32 piobufs;
--      int mtu;
-       dd->palign = qib_read_kreg32(dd, kr_pagealign);
-@@ -5818,11 +5829,10 @@ static void get_7322_chip_params(struct qib_devdata *dd)
-       dd->piosize2k = val & ~0U;
-       dd->piosize4k = val >> 32;
--      mtu = ib_mtu_enum_to_int(qib_ibmtu);
--      if (mtu == -1)
--              mtu = QIB_DEFAULT_MTU;
--      dd->pport[0].ibmtu = (u32)mtu;
--      dd->pport[1].ibmtu = (u32)mtu;
-+      dd->pport[0].ibmtu = ib_mtu_enum_to_int(
-+              QIB_MODPARAM_GET(ibmtu, dd->unit, 1));
-+      dd->pport[1].ibmtu = ib_mtu_enum_to_int(
-+              QIB_MODPARAM_GET(ibmtu, dd->unit, 2));
-       /* these may be adjusted in init_chip_wc_pat() */
-       dd->pio2kbase = (u32 __iomem *)
-@@ -6342,11 +6352,11 @@ static void write_7322_initregs(struct qib_devdata *dd)
-       qib_write_kreg(dd, KREG_IDX(RcvQPMulticastContext_1), 1);
-       for (pidx = 0; pidx < dd->num_pports; ++pidx) {
--              unsigned n, regno;
-+              unsigned i, n, regno, ctxts[18];
-               unsigned long flags;
--              if (dd->n_krcv_queues < 2 ||
--                      !dd->pport[pidx].link_speed_supported)
-+              if (dd->pport[pidx].n_krcv_queues == 1 ||
-+                  !dd->pport[pidx].link_speed_supported)
-                       continue;
-               ppd = &dd->pport[pidx];
-@@ -6359,19 +6369,18 @@ static void write_7322_initregs(struct qib_devdata *dd)
-               /* Initialize QP to context mapping */
-               regno = krp_rcvqpmaptable;
-               val = 0;
--              if (dd->num_pports > 1)
--                      n = dd->first_user_ctxt / dd->num_pports;
--              else
--                      n = dd->first_user_ctxt - 1;
-+              for (i = 0, n = 0; n < dd->first_user_ctxt; n++) {
-+                      if (dd->skip_kctxt_mask & (1 << n))
-+                              continue;
-+                      if (dd->rcd[n]->ppd->port == pidx+1)
-+                              ctxts[i++] = n;
-+                      if (i == ppd->n_krcv_queues)
-+                              break;
-+              }
-               for (i = 0; i < 32; ) {
-                       unsigned ctxt;
--                      if (dd->num_pports > 1)
--                              ctxt = (i % n) * dd->num_pports + pidx;
--                      else if (i % n)
--                              ctxt = (i % n) + 1;
--                      else
--                              ctxt = ppd->hw_pidx;
-+                      ctxt = ctxts[i % ppd->n_krcv_queues];
-                       val |= ctxt << (5 * (i % 6));
-                       i++;
-                       if (i % 6 == 0) {
-@@ -6419,7 +6428,7 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
- {
-       struct qib_pportdata *ppd;
-       unsigned features, pidx, sbufcnt;
--      int ret, mtu;
-+      int ret, maxmtu = 0;
-       u32 sbufs, updthresh;
-       /* pport structs are contiguous, allocated after devdata */
-@@ -6496,10 +6505,6 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
-        */
-       qib_7322_set_baseaddrs(dd);
--      mtu = ib_mtu_enum_to_int(qib_ibmtu);
--      if (mtu == -1)
--              mtu = QIB_DEFAULT_MTU;
--
-       dd->cspec->int_enable_mask = QIB_I_BITSEXTANT;
-       /* all hwerrors become interrupts, unless special purposed */
-       dd->cspec->hwerrmask = ~0ULL;
-@@ -6509,9 +6514,14 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
-               ~(SYM_MASK(HwErrMask, IBSerdesPClkNotDetectMask_0) |
-                 SYM_MASK(HwErrMask, IBSerdesPClkNotDetectMask_1) |
-                 HWE_MASK(LATriggered));
--
-       for (pidx = 0; pidx < NUM_IB_PORTS; ++pidx) {
-               struct qib_chippport_specific *cp = ppd->cpspec;
-+              int mtu = ib_mtu_enum_to_int(
-+                      QIB_MODPARAM_GET(ibmtu, dd->unit, pidx+1));
-+              u8 vls = QIB_MODPARAM_GET(num_vls, dd->unit, pidx+1);
-+              if (mtu == -1)
-+                      mtu = QIB_DEFAULT_MTU;
-+              maxmtu = max(maxmtu, mtu);
-               ppd->link_speed_supported = features & PORT_SPD_CAP;
-               features >>=  PORT_SPD_CAP_SHIFT;
-               if (!ppd->link_speed_supported) {
-@@ -6565,7 +6575,7 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
-               ppd->link_width_active = IB_WIDTH_4X;
-               ppd->link_speed_active = QIB_IB_SDR;
-               ppd->delay_mult = ib_rate_to_delay[IB_RATE_10_GBPS];
--              switch (qib_num_cfg_vls) {
-+              switch (vls) {
-               case 1:
-                       ppd->vls_supported = IB_VL_VL0;
-                       break;
-@@ -6575,8 +6585,7 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
-               default:
-                       qib_devinfo(dd->pcidev,
-                                   "Invalid num_vls %u, using 4 VLs\n",
--                                  qib_num_cfg_vls);
--                      qib_num_cfg_vls = 4;
-+                                  vls);
-                       /* fall through */
-               case 4:
-                       ppd->vls_supported = IB_VL_VL0_3;
-@@ -6588,9 +6597,8 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
-                               qib_devinfo(dd->pcidev,
-                                           "Invalid num_vls %u for MTU %d "
-                                           ", using 4 VLs\n",
--                                          qib_num_cfg_vls, mtu);
-+                                          vls, mtu);
-                               ppd->vls_supported = IB_VL_VL0_3;
--                              qib_num_cfg_vls = 4;
-                       }
-                       break;
-               }
-@@ -6640,7 +6648,7 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
-       dd->rhf_offset = dd->rcvhdrentsize - sizeof(u64) / sizeof(u32);
-       /* we always allocate at least 2048 bytes for eager buffers */
--      dd->rcvegrbufsize = max(mtu, 2048);
-+      dd->rcvegrbufsize = max(maxmtu, 2048);
-       BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
-       dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
-@@ -6698,8 +6706,8 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
-               goto bail; /* no error, so can still figure out why err */
-       }
--      write_7322_initregs(dd);
-       ret = qib_create_ctxts(dd);
-+      write_7322_initregs(dd);
-       init_7322_cntrnames(dd);
-       updthresh = 8U; /* update threshold */
-diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
-index 84b3222..0e83ed4 100644
---- a/drivers/infiniband/hw/qib/qib_init.c
-+++ b/drivers/infiniband/hw/qib/qib_init.c
-@@ -67,6 +67,11 @@
- #define QLOGIC_IB_R_SOFTWARE_SHIFT 24
- #define QLOGIC_IB_R_EMULATOR_MASK (1ULL<<62)
-+unsigned int snoop_enable; /* By default (0) snooping is disabled */
-+
-+module_param_named(snoop_enable, snoop_enable , int, 0644);
-+MODULE_PARM_DESC(snoop_enable, "snooping mode ");
-+
- /*
-  * Select the NUMA node id on which to allocate the receive header
-  * queue, eager buffers and send pioavail register.
-@@ -79,9 +84,8 @@ MODULE_PARM_DESC(numa_node, "NUMA node on which memory is allocated");
-  * Number of ctxts we are configured to use (to allow for more pio
-  * buffers per ctxt, etc.)  Zero means use chip value.
-  */
--ushort qib_cfgctxts;
--module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO);
--MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use");
-+QIB_MODPARAM_UNIT(cfgctxts, NULL, 0, S_IRUGO,
-+                "Set max number of contexts to use");
- /*
-  * If set, do not write to any regs if avoidable, hack to allow
-@@ -97,9 +101,8 @@ MODULE_PARM_DESC(numa_aware, "Use NUMA aware allocations: "
-       "0=disabled, 1=enabled, "
-       "10=option 0 for AMD & <= Intel Westmere cpus and option 1 for newer cpus(default)");
--unsigned qib_n_krcv_queues;
--module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO);
--MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port");
-+QIB_MODPARAM_PORT(krcvqs, NULL, 0, S_IRUGO,
-+                "number of kernel receive queues per IB port");
- unsigned qib_cc_table_size;
- module_param_named(cc_table_size, qib_cc_table_size, uint, S_IRUGO);
-@@ -123,14 +126,15 @@ unsigned long *qib_cpulist;
- /* set number of contexts we'll actually use */
- void qib_set_ctxtcnt(struct qib_devdata *dd)
- {
--      if (!qib_cfgctxts) {
-+      u64 val = QIB_MODPARAM_GET(cfgctxts, dd->unit, 0);
-+      if (!val) {
-               dd->cfgctxts = dd->first_user_ctxt + num_online_cpus();
-               if (dd->cfgctxts > dd->ctxtcnt)
-                       dd->cfgctxts = dd->ctxtcnt;
--      } else if (qib_cfgctxts < dd->num_pports)
-+      } else if (val < dd->num_pports)
-               dd->cfgctxts = dd->ctxtcnt;
--      else if (qib_cfgctxts <= dd->ctxtcnt)
--              dd->cfgctxts = qib_cfgctxts;
-+      else if (val <= dd->ctxtcnt)
-+              dd->cfgctxts = val;
-       else
-               dd->cfgctxts = dd->ctxtcnt;
-       dd->freectxts = (dd->first_user_ctxt > dd->cfgctxts) ? 0 :
-@@ -142,13 +146,27 @@ void qib_set_ctxtcnt(struct qib_devdata *dd)
-  */
- int qib_create_ctxts(struct qib_devdata *dd)
- {
--      unsigned i;
-+      unsigned i, c, p;
-+      unsigned port;
-       int ret;
-+      int node_id;
-       int local_node_id = pcibus_to_node(dd->pcidev->bus);
-+      s64 new_node_id = qib_numa_node;
-       if (local_node_id < 0)
-               local_node_id = numa_node_id();
--      dd->assigned_node_id = local_node_id;
-+
-+      if (new_node_id < 0)
-+              new_node_id = local_node_id;
-+
-+      new_node_id = node_online(new_node_id) ? new_node_id :
-+              local_node_id;
-+
-+      dd->local_node_id = local_node_id;
-+      dd->assigned_node_id = new_node_id;
-+
-+      node_id = qib_numa_aware ? dd->local_node_id :
-+              dd->assigned_node_id;
-       /*
-        * Allocate full ctxtcnt array, rather than just cfgctxts, because
-@@ -162,17 +180,29 @@ int qib_create_ctxts(struct qib_devdata *dd)
-               goto done;
-       }
-+      c = dd->num_pports ? min(
-+              (unsigned)dd->pport[0].n_krcv_queues,
-+              (dd->num_pports > 1 ?
-+               (unsigned)dd->pport[1].n_krcv_queues : (unsigned)-1))
-+              : 0;
-+      p = dd->num_pports > 1 ?
-+              (dd->pport[0].n_krcv_queues > dd->pport[1].n_krcv_queues ?
-+               0 : 1) : 0;
-+
-       /* create (one or more) kctxt */
--      for (i = 0; i < dd->first_user_ctxt; ++i) {
-+      for (port = 0, i = 0; i < dd->first_user_ctxt; ++i) {
-               struct qib_pportdata *ppd;
-               struct qib_ctxtdata *rcd;
-               if (dd->skip_kctxt_mask & (1 << i))
-                       continue;
--              ppd = dd->pport + (i % dd->num_pports);
-+              if (i < (c * dd->num_pports))
-+                      ppd = dd->pport + (i % dd->num_pports);
-+              else
-+                      ppd = dd->pport + p;
--              rcd = qib_create_ctxtdata(ppd, i, dd->assigned_node_id);
-+              rcd = qib_create_ctxtdata(ppd, i, node_id);
-               if (!rcd) {
-                       qib_dev_err(dd,
-                               "Unable to allocate ctxtdata for Kernel ctxt, failing\n");
-@@ -722,10 +752,10 @@ int qib_init(struct qib_devdata *dd, int reinit)
-               if (lastfail)
-                       ret = lastfail;
-               ppd = dd->pport + pidx;
--              mtu = ib_mtu_enum_to_int(qib_ibmtu);
-+              mtu = ib_mtu_enum_to_int(
-+                      QIB_MODPARAM_GET(ibmtu, dd->unit, ppd->port));
-               if (mtu == -1) {
-                       mtu = QIB_DEFAULT_MTU;
--                      qib_ibmtu = 0; /* don't leave invalid value */
-               }
-               /* set max we can ever have for this driver load */
-               ppd->init_ibmaxlen = min(mtu > 2048 ?
-@@ -750,6 +780,11 @@ int qib_init(struct qib_devdata *dd, int reinit)
-                       lastfail = -ENETDOWN;
-                       continue;
-               }
-+              if (snoop_enable) {
-+                      ppd->filter_callback = NULL;
-+                      ppd->filter_value = NULL;
-+                      ppd->mode_flag = 0;
-+              }
-               portok++;
-       }
-@@ -1108,24 +1143,7 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
-       unsigned long flags;
-       struct qib_devdata *dd;
-       int ret;
--      int node_id;
--      int local_node_id = pcibus_to_node(dd->pcidev->bus);
--      s64 new_node_id = qib_numa_node;
--
--      if (local_node_id < 0)
--              local_node_id = numa_node_id();
--
--      if (new_node_id < 0)
--              new_node_id = local_node_id;
--
--      new_node_id = node_online(new_node_id) ? new_node_id :
--              local_node_id;
--
--      dd->local_node_id = local_node_id;
--      dd->assigned_node_id = new_node_id;
--      node_id = qib_numa_aware ? dd->local_node_id :
--              dd->assigned_node_id;
-       dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra);
-       if (!dd) {
-@@ -1273,6 +1291,15 @@ static int __init qlogic_ib_init(void)
-       if (ret)
-               goto bail;
-+      if (qib_numa_aware == QIB_DRIVER_AUTO_CONFIGURATION)
-+              qib_numa_aware = qib_configure_numa(boot_cpu_data) ? 1 : 0;
-+
-+      if (qib_rcvhdrpoll == QIB_DRIVER_AUTO_CONFIGURATION)
-+              qib_rcvhdrpoll = qib_configure_numa(boot_cpu_data) ? 0 : 1;
-+
-+      if (qib_pio_avail_bits == QIB_DRIVER_AUTO_CONFIGURATION)
-+              qib_pio_avail_bits = qib_configure_numa(boot_cpu_data) ? 0 : 1;
-+
-       /*
-        * These must be called before the driver is registered with
-        * the PCI subsystem.
-@@ -1298,13 +1325,13 @@ static int __init qlogic_ib_init(void)
- #ifdef QIB_CONFIG_KNX
-       ret = qib_knx_server_init();
-       if (ret < 0)
--              pr_err("Unable to start KNX listen thread\n");
-+              printk(KERN_ERR QIB_DRV_NAME
-+                     ": Unable to start KNX listen thread\n");
- #endif
--
-       goto bail; /* all OK */
- bail_dev:
--#ifdef CONFIG_INFINIBAND_QIB_DCA
-+      #ifdef CONFIG_INFINIBAND_QIB_DCA
-       dca_unregister_notify(&dca_notifier);
- #endif
- #ifdef CONFIG_DEBUG_FS
-@@ -1328,7 +1355,6 @@ static void __exit qlogic_ib_cleanup(void)
- #ifdef QIB_CONFIG_KNX
-       qib_knx_server_exit();
- #endif
--
-       ret = qib_exit_qibfs();
-       if (ret)
-               pr_err(
-@@ -1348,6 +1374,7 @@ static void __exit qlogic_ib_cleanup(void)
-       idr_destroy(&qib_unit_table);
-       qib_dev_cleanup();
-+      qib_clean_mod_param();
- }
- module_exit(qlogic_ib_cleanup);
-@@ -1560,6 +1587,8 @@ static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
-       }
-       qib_verify_pioperf(dd);
-+      if (snoop_enable)
-+              qib_snoop_add(dd);
- bail:
-       return ret;
- }
-@@ -1572,6 +1601,9 @@ static void qib_remove_one(struct pci_dev *pdev)
-       /* unregister from IB core */
-       qib_unregister_ib_device(dd);
-+#ifdef QIB_CONFIG_KNX
-+      qib_knx_remove_device(dd);
-+#endif
-       /*
-        * Disable the IB link, disable interrupts on the device,
-        * clear dma engines, etc.
-@@ -1686,7 +1718,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
-       unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
-       size_t size;
-       gfp_t gfp_flags;
--      int old_node_id;
-+      int old_dev_node;
-       /*
-        * GFP_USER, but without GFP_FS, so buffer cache can be
-@@ -1706,14 +1738,14 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
-       if (!rcd->rcvegrbuf) {
-               rcd->rcvegrbuf =
-                       kzalloc_node(chunk * sizeof(rcd->rcvegrbuf[0]),
--                              GFP_KERNEL, rcd->node_id);
-+                                   GFP_KERNEL, rcd->node_id);
-               if (!rcd->rcvegrbuf)
-                       goto bail;
-       }
-       if (!rcd->rcvegrbuf_phys) {
-               rcd->rcvegrbuf_phys =
-                       kmalloc_node(chunk * sizeof(rcd->rcvegrbuf_phys[0]),
--                              GFP_KERNEL, rcd->node_id);
-+                                   GFP_KERNEL, rcd->node_id);
-               if (!rcd->rcvegrbuf_phys)
-                       goto bail_rcvegrbuf;
-       }
-@@ -1721,13 +1753,13 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
-               if (rcd->rcvegrbuf[e])
-                       continue;
--              old_node_id = dev_to_node(&dd->pcidev->dev);
-+              old_dev_node = dev_to_node(&dd->pcidev->dev);
-               set_dev_node(&dd->pcidev->dev, rcd->node_id);
-               rcd->rcvegrbuf[e] =
-                       dma_alloc_coherent(&dd->pcidev->dev, size,
-                                          &rcd->rcvegrbuf_phys[e],
-                                          gfp_flags);
--              set_dev_node(&dd->pcidev->dev, old_node_id);
-+              set_dev_node(&dd->pcidev->dev, old_dev_node);
-               if (!rcd->rcvegrbuf[e])
-                       goto bail_rcvegrbuf_phys;
-       }
-diff --git a/drivers/infiniband/hw/qib/qib_knx.c b/drivers/infiniband/hw/qib/qib_knx.c
-index c15276f..f692913 100644
---- a/drivers/infiniband/hw/qib/qib_knx.c
-+++ b/drivers/infiniband/hw/qib/qib_knx.c
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2012 Intel Corporation. All rights reserved.
-+ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
-  *
-  * This software is available to you under a choice of one of two
-  * licenses.  You may choose to be licensed under the terms of the GNU
-@@ -37,12 +37,21 @@
- #include "qib.h"
- #include "qib_knx.h"
-+#include "qib_user_sdma.h"
-+#include "qib_knx_common.h"
- unsigned int qib_knx_nconns = 5;
- module_param_named(num_conns, qib_knx_nconns, uint, S_IRUGO);
- MODULE_PARM_DESC(num_conns, "Max number of pending connections");
- #define QIB_KNX_SCIF_PORT SCIF_OFED_PORT_9
-+#define CLIENT_THREAD_NAME(x) "qib/mic" __stringify(x)
-+
-+#define knx_sdma_next(sdma) \
-+      sdma->head = ((sdma->head + 1) % sdma->desc_num)
-+#define per_ctxt(ctxt, sub) ((ctxt * QLOGIC_IB_MAX_SUBCTXT) + sub)
-+#define QIB_KNX_SDMA_STATUS(sdma, st) \
-+      QIB_KNX_SDMA_SET(sdma->mflags->status, ((u64)st << 32) | 1)
- struct qib_knx_server {
-       struct task_struct *kthread;
-@@ -82,7 +91,16 @@ struct qib_knx_mem_map_sg {
-       struct scif_range *pages;
- };
-+struct qib_knx_tidrcv {
-+      struct qib_knx_rma tidmem;
-+      u64 tidbase;
-+      u32 tidcnt;
-+};
-+
- struct qib_knx_ctxt {
-+      u16 ctxt;
-+      struct qib_knx *knx;
-+      struct qib_pportdata *ppd;
-       /* local registered memory for PIO buffers */
-       struct qib_knx_rma piobufs[QLOGIC_IB_MAX_SUBCTXT];
-       /* local registered memory for user registers */
-@@ -104,6 +122,23 @@ struct qib_knx_ctxt {
-       __u64 status;
-       __u64 piobufbase[QLOGIC_IB_MAX_SUBCTXT];
-       __u32 runtime_flags;
-+
-+      struct qib_user_sdma_queue *pq[QLOGIC_IB_MAX_SUBCTXT];
-+};
-+
-+struct qib_knx_sdma {
-+      /* KNX flags page */
-+      struct scif_range *mflag_pages;
-+      struct qib_knx_sdma_mflags *mflags;
-+      /* KNX descriptor queue */
-+      struct scif_range *queue_pages;
-+      struct qib_knx_sdma_desc *queue;
-+      u32 desc_num;
-+      /* host flags (in host memory) */
-+      struct qib_knx_rma hflags_mem;
-+      struct qib_knx_sdma_hflags *hflags;
-+      u32 head;                           /* shadow */
-+      u32 complete;
- };
- struct qib_knx {
-@@ -114,10 +149,16 @@ struct qib_knx {
-       int numa_node;
-       struct qib_devdata *dd;
-       struct qib_knx_ctxt **ctxts;
-+      spinlock_t ctxt_lock;
-+      resource_size_t bar;
-+      u64 barlen;
-+      struct qib_knx_sdma *sdma;
-+      struct task_struct *sdma_poll;
-+      atomic_t tref;
-+      char tname[64];
-+      struct qib_knx_rma tidmem;
- };
--#define CLIENT_THREAD_NAME(x) "qib/mic" __stringify(x)
--
- static struct qib_knx_server *server;
- static int qib_knx_init(struct qib_knx_server *);
-@@ -127,19 +168,20 @@ static off_t qib_knx_register_memory(struct qib_knx *, struct qib_knx_rma *,
-                                    void *, size_t, int, const char *);
- static int qib_knx_unregister_memory(struct qib_knx *, struct qib_knx_rma *,
-                                    const char *);
-+static __always_inline void qib_knx_memcpy(void *, void __iomem *, size_t);
- static ssize_t qib_show_knx_node(struct device *, struct device_attribute *,
-                                char *);
--
--static DEVICE_ATTR(knx_node, S_IRUGO, qib_show_knx_node, NULL);
--static ssize_t qib_show_knx_node(struct device *dev,
--                               struct device_attribute *attr, char *buf)
--{
--      struct qib_ibdev *ibdev =
--              container_of(dev, struct qib_ibdev, ibdev.dev);
--      struct qib_devdata *dd = dd_from_dev(ibdev);
--
--      return scnprintf(buf, PAGE_SIZE, "%u\n", dd->knx->peer.node);
--}
-+static int qib_knx_sdma_init(struct qib_knx *);
-+static void qib_knx_sdma_teardown(struct qib_knx *);
-+static __always_inline struct page *
-+qib_knx_phys_to_page(struct qib_knx *, unsigned long);
-+static int qib_knx_sdma_pkts_to_descs(struct qib_knx_ctxt *,
-+                                    struct qib_knx_sdma_desc *,
-+                                    struct qib_user_sdma_queue *,
-+                                    int *, struct list_head *);
-+static int qib_knx_sdma_poll(void *);
-+static int qib_knx_tidrcv_init(struct qib_knx *);
-+static int qib_knx_tidrcv_teardown(struct qib_knx *);
- inline struct qib_knx *qib_knx_get(u16 nodeid)
- {
-@@ -162,10 +204,11 @@ inline struct qib_devdata *qib_knx_node_to_dd(u16 node)
- static int qib_knx_init(struct qib_knx_server *server)
- {
--      int ret = 0, num_devs = 0, i;
--      struct qib_devdata *dd;
-+      int ret = 0, num_devs = 0, i, seen = 0;
-+      unsigned fewest = -1U;
-+      struct qib_devdata *dd = NULL, *dd_no_numa = NULL;
-       struct qib_knx *knx;
--      struct ib_device *ibdev;
-+      struct qib_device_info info = { -1 };
-       knx = kzalloc(sizeof(*knx), GFP_KERNEL);
-       if (!knx) {
-@@ -179,10 +222,14 @@ static int qib_knx_init(struct qib_knx_server *server)
-       }
-       INIT_LIST_HEAD(&knx->list);
-+      spin_lock_init(&knx->ctxt_lock);
-       knx->numa_node = -1;
-       ret = scif_pci_info(knx->peer.node, &knx->pci_info);
--      if (!ret)
-+      if (!ret) {
-               knx->numa_node = pcibus_to_node(knx->pci_info.pdev->bus);
-+              knx->bar = pci_resource_start(knx->pci_info.pdev, 0);
-+              knx->barlen = pci_resource_len(knx->pci_info.pdev, 0);
-+      }
-       if (knx->numa_node < 0)
-               knx->numa_node = numa_node_id();
-@@ -190,40 +237,58 @@ static int qib_knx_init(struct qib_knx_server *server)
-       num_devs = qib_count_units(NULL, NULL);
-       if (unlikely(!num_devs)) {
-               ret = -ENODEV;
-+              /* we have to send this */
-+              scif_send(knx->epd.epd, &info, sizeof(info),
-+                        SCIF_SEND_BLOCK);
-               goto done;
-       }
--      for (i = 0; i < num_devs; i++) {
-+      /*
-+       * Attempt to find an HCA on the same NUMA node as the card. Save
-+       * the first HCA that hasn't been associated with a card in case
-+       * there is no HCA on the same NUMA node.
-+       */
-+      for (i = 0; seen < num_devs; i++) {
-               dd = qib_lookup(i);
--              if (dd && dd->local_node_id == knx->numa_node)
--                      knx->dd = dd;
-+              if (dd) {
-+                      if (dd->local_node_id == knx->numa_node) {
-+                              knx->dd = dd;
-+                              break;
-+                      } else if (dd->num_knx < fewest)
-+                              dd_no_numa = dd;
-+                      seen++;
-+              }
-       }
-       /*
-        * We didn't find a QIB device on the same NUMA node,
--       * round-robin across all devices.
-+       * use the "backup".
-        */
-       if (unlikely(!knx->dd)) {
--              knx->dd = qib_lookup(server->nclients % num_devs);
--              /* it is possible for qib_lookup to return NULL */
--              if (unlikely(!knx->dd)) {
-+              if (!dd_no_numa) {
-                       ret = -ENODEV;
-+                      /* we have to send this */
-+                      scif_send(knx->epd.epd, &info, sizeof(info),
-+                                SCIF_SEND_BLOCK);
-                       goto done;
-               }
-+              knx->dd = dd_no_numa;
-       }
--      knx->dd->node_id = knx->peer.node;
--      knx->dd->knx = knx;
-+      knx->dd->num_knx++;
-+
-       knx->ctxts = kzalloc_node(knx->dd->ctxtcnt * sizeof(*knx->ctxts),
-                                 GFP_KERNEL, knx->numa_node);
-       if (!knx->ctxts)
-               ret = -ENOMEM;
--      ibdev = &knx->dd->verbs_dev.ibdev;
--      ret = device_create_file(&ibdev->dev, &dev_attr_knx_node);
-+      /* Give the KNX the associated device information. */
-+      info.unit = knx->dd->unit;
-+      ret = scif_send(knx->epd.epd, &info, sizeof(info),
-+                      SCIF_SEND_BLOCK);
-+
-+      ret = qib_knx_sdma_init(knx);
-       if (ret)
--              /*
--               * clear the error code since we don't want to fail the
--               * initialization.
--               */
--              ret = 0;
-+              goto done;
-+      atomic_set(&knx->tref, 0);
-+      ret = qib_knx_tidrcv_init(knx);
- done:
-       spin_lock(&server->client_lock);
-       list_add_tail(&knx->list, &server->clients);
-@@ -237,13 +302,12 @@ bail:
- static void qib_knx_free(struct qib_knx *knx, int unload)
- {
-       struct qib_devdata *dd = knx->dd;
--      struct ib_device *ibdev;
-       int i;
--      if (dd) {
--              ibdev = &dd->verbs_dev.ibdev;
--              device_remove_file(&ibdev->dev, &dev_attr_knx_node);
--      }
-+      qib_knx_tidrcv_teardown(knx);
-+      qib_knx_sdma_teardown(knx);
-+      if (dd)
-+              dd->num_knx--;
-       /*
-        * If this function is called with unload set, we can
-        * free the context data. Otherwise, we are here
-@@ -360,9 +424,16 @@ done:
-       return ret;
- }
--int qib_knx_alloc_ctxt(struct qib_devdata *dd, unsigned ctxt)
-+static __always_inline void qib_knx_memcpy(void *dst, void __iomem *src,
-+                                         size_t size)
- {
--      struct qib_knx *knx = dd_to_knx(dd);
-+      memcpy_fromio(dst, src, size);
-+}
-+
-+int qib_knx_alloc_ctxt(u16 node_id, unsigned ctxt)
-+{
-+      struct qib_knx *knx = qib_knx_get(node_id);
-+      struct qib_devdata *dd = knx->dd;
-       struct qib_knx_ctxt *ptr;
-       int ret = 0;
-@@ -379,7 +450,14 @@ int qib_knx_alloc_ctxt(struct qib_devdata *dd, unsigned ctxt)
-               ret = -ENOMEM;
-               goto bail;
-       }
-+      ptr->knx = knx;
-+      ptr->ctxt = ctxt;
-+      ptr->ppd = dd->rcd[ctxt]->ppd;
-+
-+      spin_lock(&knx->ctxt_lock);
-       knx->ctxts[ctxt] = ptr;
-+      dd->rcd[ctxt]->krcd = ptr;
-+      spin_unlock(&knx->ctxt_lock);
- bail:
-       return ret;
- }
-@@ -388,10 +466,11 @@ __u64 qib_knx_ctxt_info(struct qib_ctxtdata *rcd,
-                       enum qib_knx_ctxtinfo_type type,
-                       struct file *fp)
- {
--      struct qib_knx *knx = dd_to_knx(rcd->dd);
-+      struct qib_knx *knx = rcd->krcd->knx;
-       __u16 subctxt;
-       __u64 ret = 0;
-+      spin_lock(&knx->ctxt_lock);
-       if (!knx || !knx->ctxts || !knx->ctxts[rcd->ctxt])
-               goto done;
-@@ -414,6 +493,7 @@ __u64 qib_knx_ctxt_info(struct qib_ctxtdata *rcd,
-               break;
-       }
- done:
-+      spin_unlock(&knx->ctxt_lock);
-       return ret;
- }
-@@ -424,7 +504,7 @@ int qib_knx_setup_piobufs(struct qib_devdata *dd, struct qib_ctxtdata *rcd,
-       char buf[16];
-       off_t offset;
-       int ret = 0;
--      struct qib_knx *knx = dd_to_knx(dd);
-+      struct qib_knx *knx = rcd->krcd->knx;
-       if (unlikely(!knx)) {
-               ret = -ENODEV;
-@@ -472,7 +552,7 @@ int qib_knx_setup_pioregs(struct qib_devdata *dd, struct qib_ctxtdata *rcd,
- {
-       int ret = 0;
-       off_t offset;
--      struct qib_knx *knx = dd_to_knx(dd);
-+      struct qib_knx *knx = rcd->krcd->knx;
-       if (unlikely(!knx)) {
-               ret = -ENODEV;
-@@ -496,7 +576,7 @@ int qib_knx_setup_pioregs(struct qib_devdata *dd, struct qib_ctxtdata *rcd,
-               goto bail;
-       }
-       knx->ctxts[rcd->ctxt]->uregbase = offset;
--
-+      
-       /*
-        * register the PIO availability registers.
-        * user status 64bit values are part of the page containing the
-@@ -533,7 +613,7 @@ int qib_knx_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd,
- {
-       struct qib_knx_mem_map_sg *mapsg;
-       struct qib_knx_mem_map *map;
--      struct qib_knx *knx = dd_to_knx(dd);
-+      struct qib_knx *knx = rcd->krcd->knx;
-       dma_addr_t offset;
-       struct scatterlist *sg;
-       unsigned num_pages;
-@@ -590,7 +670,8 @@ int qib_knx_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd,
-        * can use 64bit addresses for DMA but the CPU might not.
-        * (see pci_set_dma_mask() in qib_pcie.c).
-        */
--      mapsg->sglist = kzalloc(num_pages * sizeof(*mapsg->sglist), GFP_KERNEL);
-+      mapsg->sglist = kzalloc_node(num_pages * sizeof(*mapsg->sglist),
-+                                   GFP_KERNEL, knx->numa_node);
-       if (!mapsg->sglist) {
-               ret = -ENOMEM;
-               goto bail_rcvq_pages;
-@@ -625,7 +706,7 @@ int qib_knx_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd,
-       }
-       rcd->rcvhdrq_phys = sg_dma_address(mapsg->sglist);
-       rcd->rcvhdrq = mapsg->pages->va[0];
--
-+      
-       map = &knx->ctxts[rcd->ctxt]->sbufstatus;
-       ret = scif_get_pages(knx->epd.epd, binfo->spi_sendbuf_status,
-                            PAGE_SIZE, &map->pages);
-@@ -700,7 +781,7 @@ int qib_knx_setup_eagerbufs(struct qib_ctxtdata *rcd,
-       struct qib_knx_mem_map_sg *map;
-       struct scatterlist *sg;
-       struct qib_devdata *dd = rcd->dd;
--      struct qib_knx *knx = dd_to_knx(dd);
-+      struct qib_knx *knx = rcd->krcd->knx;
-       unsigned size, egrsize, egrcnt, num_pages, bufs_ppage,
-               egrbufcnt;
-       dma_addr_t dma_addr, page;
-@@ -761,7 +842,7 @@ int qib_knx_setup_eagerbufs(struct qib_ctxtdata *rcd,
-                       goto bail_free_scif;
-               }
-       }
--
-+      
-       /*
-        * Allocate array of DMA addresses for each of the mapped
-        * pages.
-@@ -775,10 +856,11 @@ int qib_knx_setup_eagerbufs(struct qib_ctxtdata *rcd,
-                       goto bail_free_rcvegr;
-               }
-       }
--
-+      
-       map->size = size;
-       map->dir = DMA_BIDIRECTIONAL;
--      map->sglist = kzalloc(num_pages * sizeof(*map->sglist), GFP_KERNEL);
-+      map->sglist = kzalloc_node(num_pages * sizeof(*map->sglist), GFP_KERNEL,
-+                                 knx->numa_node);
-       if (!map->sglist) {
-               ret = -ENOMEM;
-               goto bail_free_rcvegr_phys;
-@@ -830,7 +912,7 @@ bail:
- void qib_knx_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
- {
--      struct qib_knx *knx = dd_to_knx(dd);
-+      struct qib_knx *knx = rcd->krcd->knx;
-       struct qib_knx_ctxt *ctxt;
-       char buf[16];
-       int i, ret = 0;
-@@ -838,7 +920,11 @@ void qib_knx_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
-       if (!rcd || !knx || !knx->ctxts)
-               return;
-+      spin_lock(&knx->ctxt_lock);
-       ctxt = knx->ctxts[rcd->ctxt];
-+      knx->ctxts[rcd->ctxt] = NULL;
-+      spin_unlock(&knx->ctxt_lock);
-+
-       if (!ctxt)
-               return;
-@@ -884,12 +970,535 @@ void qib_knx_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
-               qib_knx_unregister_memory(knx, &ctxt->piobufs[i], buf);
-       }
--      /* MITKO XXX: handle rcd->tid_pg_list */
--      knx->ctxts[rcd->ctxt] = NULL;
-       kfree(ctxt);
-       kfree(rcd);
- }
-+/*
-+ * TID management for processes on the MIC happens on the MIC. Therefore,
-+ * we only register the HW TID array here.
-+ * The MIC will calculate TID array offsets using the same algorithm is
-+ * the host. Therefore, it is OK that the entire HW TID array is mapped
-+ * since neither side should step on the other.
-+ */
-+static int qib_knx_tidrcv_init(struct qib_knx *knx)
-+{
-+      struct qib_devdata *dd = knx->dd;
-+      struct qib_knx_tid_info info;
-+      void *tidbase;
-+      int ret = 0;
-+      off_t offset = 0;
-+      size_t len;
-+      char buf[64];
-+
-+      memset(&info, 0, sizeof(info));
-+
-+      info.tidcnt = dd->rcvtidcnt;
-+      tidbase = ((char *)dd->kregbase + dd->rcvtidbase);
-+      info.tidbase_len = dd->ctxtcnt * dd->rcvtidcnt * sizeof(tidbase);
-+      info.tidtemplate = dd->tidtemplate;
-+      info.invalidtid = dd->tidinvalid;
-+      /* information needed to properly calculate DMA address to MIC pages */
-+      info.bar_addr = knx->bar;
-+      info.bar_len = knx->barlen;
-+
-+      snprintf(buf, sizeof(buf), "TID array KNx%u", knx->peer.node);
-+      offset = qib_knx_register_memory(knx, &knx->tidmem, tidbase,
-+                                       info.tidbase_len, SCIF_PROT_WRITE,
-+                                       buf);
-+      info.tidbase_offset = offset;
-+      if (IS_ERR_VALUE(offset))
-+              ret = offset;
-+      len = scif_send(knx->epd.epd, &info, sizeof(info),
-+                      SCIF_SEND_BLOCK);
-+      if (len < sizeof(info))
-+              ret = -EFAULT;
-+      return ret;
-+}
-+
-+static int qib_knx_tidrcv_teardown(struct qib_knx *knx)
-+{
-+      char buf[64];
-+      snprintf(buf, sizeof(buf), "TID array KNx%u", knx->peer.node);
-+      return qib_knx_unregister_memory(knx, &knx->tidmem, buf);
-+}
-+
-+static int qib_knx_sdma_init(struct qib_knx *knx)
-+{
-+      struct qib_knx_host_mem flags;
-+      struct qib_knx_knc_mem mflags;
-+      struct qib_knx_sdma *sdma;
-+      char buf[64];
-+      int ret = 0;
-+
-+      sdma = kzalloc_node(sizeof(*sdma), GFP_KERNEL, knx->numa_node);
-+      if (!sdma) {
-+              ret = -ENOMEM;
-+              goto done;
-+      }
-+      sdma->hflags = kzalloc_node(PAGE_SIZE, GFP_KERNEL, knx->numa_node);
-+      if (!sdma->hflags) {
-+              ret = -ENOMEM;
-+              goto done_free;
-+      }
-+      snprintf(buf, sizeof(buf), "Host SDMA flags KNx%u", knx->peer.node);
-+      flags.flags_offset = qib_knx_register_memory(knx, &sdma->hflags_mem,
-+                                                   sdma->hflags,
-+                                                   PAGE_SIZE,
-+                                                   SCIF_PROT_WRITE,
-+                                                   buf);
-+      if (IS_ERR_VALUE(flags.flags_offset)) {
-+              ret = flags.flags_offset;
-+              goto free_flags;
-+      }
-+      sdma->desc_num = knx->dd->pport[0].sdma_descq_cnt;
-+      flags.desc_num = sdma->desc_num;
-+      ret = scif_send(knx->epd.epd, &flags, sizeof(flags),
-+                      SCIF_SEND_BLOCK);
-+      if (ret < sizeof(flags))
-+              goto unregister;
-+      ret = scif_recv(knx->epd.epd, &mflags, sizeof(mflags),
-+                      SCIF_RECV_BLOCK);
-+      if (ret < sizeof(mflags)) {
-+              ret = -EINVAL;
-+              goto unregister;
-+      }
-+      ret = scif_get_pages(knx->epd.epd, mflags.flags_offset,
-+                           PAGE_SIZE, &sdma->mflag_pages);
-+      if (ret < 0 || !sdma->mflag_pages->nr_pages) {
-+              ret = -EFAULT;
-+              goto unregister;
-+      }
-+      sdma->mflags = sdma->mflag_pages->va[0];
-+      ret = scif_get_pages(knx->epd.epd, mflags.queue_offset,
-+                           mflags.queue_len, &sdma->queue_pages);
-+      if (ret < 0)
-+              goto put_flags;
-+      if ((sdma->queue_pages->nr_pages * PAGE_SIZE) !=
-+          mflags.queue_len) {
-+              ret = -EFAULT;
-+              goto put_queue;
-+      }
-+      sdma->queue = sdma->queue_pages->va[0];
-+      sdma->complete = -1;
-+      sdma->head = -1;
-+      /* set the initial trigger value */
-+      QIB_KNX_SDMA_SET(sdma->hflags->trigger, -1);
-+      QIB_KNX_SDMA_SET(sdma->mflags->complete, sdma->complete);
-+      snprintf(knx->tname, sizeof(knx->tname), "qib/mic%u/poll",
-+               knx->peer.node);
-+      knx->sdma = sdma;
-+      ret = 0;
-+      goto done;
-+put_queue:
-+      scif_put_pages(sdma->queue_pages);
-+put_flags:
-+      scif_put_pages(sdma->mflag_pages);
-+unregister:
-+      qib_knx_unregister_memory(knx, &sdma->hflags_mem, buf);
-+free_flags:
-+      kfree(sdma->hflags);
-+done_free:
-+      kfree(sdma);
-+done:
-+      /*
-+       * we have to respond to the MIC so it doesn't get stuck
-+       * in the scif_recv call
-+       */
-+      scif_send(knx->epd.epd, &ret, sizeof(ret), SCIF_SEND_BLOCK);
-+      return ret;
-+}
-+
-+static void qib_knx_sdma_teardown(struct qib_knx *knx)
-+{
-+      int ret;
-+      if (knx->sdma_poll)
-+              ret = kthread_stop(knx->sdma_poll);
-+      if (knx->sdma) {
-+              if (knx->sdma->queue_pages->nr_pages) {
-+                      knx->sdma->queue = NULL;
-+                      scif_put_pages(knx->sdma->queue_pages);
-+              }
-+              if (knx->sdma->mflag_pages->nr_pages) {
-+                      knx->sdma->mflags = NULL;
-+                      scif_put_pages(knx->sdma->mflag_pages);
-+              }
-+              kfree(knx->sdma->hflags);
-+              kfree(knx->sdma);
-+              knx->sdma = NULL;
-+      }
-+}
-+
-+int qib_knx_sdma_queue_create(struct file *fd)
-+{
-+      struct qib_ctxtdata *rcd = ctxt_fp(fd);
-+      struct qib_devdata *dd = rcd->dd;
-+      struct qib_knx *knx = rcd->krcd->knx;
-+      struct qib_knx_ctxt *ctxt = knx->ctxts[rcd->ctxt];
-+      u8 subctxt = subctxt_fp(fd);
-+      int ret = 0;
-+
-+      if (!ctxt) {
-+              ret = -EINVAL;
-+              goto done;
-+      }
-+      ctxt->pq[subctxt] = qib_user_sdma_queue_create(&dd->pcidev->dev,
-+                                                     dd->unit, rcd->ctxt,
-+                                                     subctxt);
-+      if (!ctxt->pq[subctxt])
-+              ret = -ENOMEM;
-+      user_sdma_queue_fp(fd) = ctxt->pq[subctxt];
-+      /*
-+       * We start the polling thread the first time a user SDMA
-+       * queue is created. There is no reason to take up CPU
-+       * cycles before then.
-+       */
-+      if (atomic_inc_return(&knx->tref) == 1) {
-+              knx->sdma_poll = kthread_run(qib_knx_sdma_poll, knx,
-+                                           knx->tname);
-+              if (IS_ERR(knx->sdma_poll)) {
-+                      ret = -PTR_ERR(knx->sdma_poll);
-+                      atomic_dec(&knx->tref);
-+                      goto free_queue;
-+              }
-+      }
-+      goto done;
-+free_queue:
-+      user_sdma_queue_fp(fd) = NULL;
-+      qib_user_sdma_queue_destroy(ctxt->pq[subctxt]);
-+      ctxt->pq[subctxt] = NULL;
-+done:
-+      return ret;
-+}
-+
-+void qib_knx_sdma_queue_destroy(struct qib_filedata *fd)
-+{
-+      struct qib_ctxtdata *rcd = fd->rcd;
-+      struct qib_knx *knx;
-+      unsigned ctxt = rcd->ctxt, subctxt = fd->subctxt;
-+
-+      /* Host processes do not have a KNX rcd pointer. */
-+      if (!rcd->krcd)
-+              return;
-+      knx = rcd->krcd->knx;
-+      /* We still have the memory pointer through fd->pq */
-+      spin_lock(&knx->ctxt_lock);
-+      if (knx->ctxts[ctxt])
-+              knx->ctxts[ctxt]->pq[subctxt] = NULL;
-+      spin_unlock(&knx->ctxt_lock);
-+      if (atomic_dec_and_test(&knx->tref)) {
-+              int ret = kthread_stop(knx->sdma_poll);
-+              knx->sdma_poll = NULL;
-+      }
-+}
-+
-+/*
-+ * Convert a MIC physical address to the corresponding host page.
-+ */
-+static __always_inline struct page *
-+qib_knx_phys_to_page(struct qib_knx *knx, unsigned long addr) {
-+      unsigned long paddr;
-+      if ((knx->bar + addr + PAGE_SIZE) >
-+          (knx->bar + knx->barlen))
-+              return NULL;
-+      paddr = knx->bar + addr;
-+      return pfn_to_page(paddr >> PAGE_SHIFT);
-+}
-+
-+static int qib_knx_sdma_pkts_to_descs(struct qib_knx_ctxt *ctxt,
-+                                    struct qib_knx_sdma_desc *desc,
-+                                    struct qib_user_sdma_queue *pq,
-+                                    int *ndesc, struct list_head *list)
-+{
-+      struct qib_knx *knx = ctxt->knx;
-+      struct qib_user_sdma_pkt *pkt;
-+      dma_addr_t pbc_dma_addr;
-+      unsigned pktnw, pbcnw;
-+      u32 counter;
-+      u16 frag_size;
-+      int ret = 0;
-+      __le32 *pbc;
-+
-+      counter = pq->counter;
-+
-+      pbc = qib_user_sdma_alloc_header(pq, desc->pbclen, &pbc_dma_addr);
-+      if (!pbc) {
-+              ret = -ENOMEM;
-+              goto done;
-+      }
-+      memcpy(pbc, desc->pbc, desc->pbclen);
-+
-+      pktnw = (le32_to_cpu(*pbc) & 0xFFFF);
-+      /*
-+       * This assignment is a bit strange.  it's because the
-+       * the pbc counts the number of 32 bit words in the full
-+       * packet _except_ the first word of the pbc itself...
-+       */
-+      pbcnw = (desc->pbclen >> 2) - 1;
-+
-+      if (pktnw < pbcnw) {
-+              ret = -EINVAL;
-+              goto free_pbc;
-+      }
-+
-+      if (pktnw != ((desc->length >> 2) + pbcnw)) {
-+              ret = -EINVAL;
-+              goto free_pbc;
-+      }
-+
-+      frag_size = (le32_to_cpu(*pbc)>>16) & 0xFFFF;
-+      if (((frag_size ? frag_size : desc->length) + desc->pbclen) >
-+          ctxt->ppd->ibmaxlen) {
-+              ret = -EINVAL;
-+              goto free_pbc;
-+      }
-+      if (frag_size) {
-+              /* new SDMA "protocol" */
-+              unsigned pktsize, n;
-+
-+              n = desc->npages * ((2 * PAGE_SIZE / frag_size) + 1);
-+              pktsize = sizeof(*pkt) + sizeof(pkt->addr[0]) * n;
-+
-+              pkt = kzalloc(pktsize + desc->tidlen, GFP_KERNEL);
-+              if (!pkt) {
-+                      ret = -ENOMEM;
-+                      goto free_pbc;
-+              }
-+              pkt->largepkt = 1;
-+              pkt->frag_size = frag_size;
-+              pkt->addrlimit = n + ARRAY_SIZE(pkt->addr);
-+
-+              if (desc->tidlen) {
-+                      char *tidsmptr = (char *)pkt + pktsize;
-+                      memcpy(tidsmptr, desc->tidsm, desc->tidlen);
-+                      pkt->tidsm =
-+                              (struct qib_tid_session_member *)tidsmptr;
-+                      pkt->tidsmcount = desc->tidlen /
-+                              sizeof(*desc->tidsm);
-+                      pkt->tidsmidx = 0;
-+              }
-+              *pbc = cpu_to_le32(le32_to_cpu(*pbc) & 0x0000FFFF);
-+      } else {
-+              /* old SDMA */
-+              pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
-+              if (!pkt) {
-+                      ret = -ENOMEM;
-+                      goto free_pbc;
-+              }
-+              pkt->largepkt = 0;
-+              pkt->frag_size = desc->length;
-+              pkt->addrlimit = ARRAY_SIZE(pkt->addr);
-+      }
-+      pkt->bytes_togo = desc->length;
-+      pkt->payload_size = 0;
-+      pkt->counter = counter;
-+      pkt->tiddma = !!desc->tidlen;
-+      /*
-+       * The generic user SDMA code will use this as a flag to
-+       * decide whether to call the KNx-specific pkt free
-+       * function. However, it doesn't know what the value
-+       * actually means.
-+       */
-+      pkt->remote = (u64)knx;
-+
-+      qib_user_sdma_init_frag(pkt, 0,
-+                              0, desc->pbclen,
-+                              1, 0,
-+                              0, 0,
-+                              NULL, pbc,
-+                              pbc_dma_addr, desc->pbclen);
-+      pkt->index = 0;
-+      pkt->naddr = 1;
-+
-+      if (desc->npages) {
-+              /* we have user data */
-+              int i;
-+              struct page *page;
-+              unsigned plen = 0, len = desc->length;
-+              for (i = 0; i < desc->npages; i++) {
-+                      unsigned long off = (i == 0 ? desc->offset : 0);
-+                      plen = (len > PAGE_SIZE ? PAGE_SIZE : len);
-+                      page = qib_knx_phys_to_page(knx, desc->pages[i]);
-+                      ret = qib_user_sdma_page_to_frags(knx->dd, pq,
-+                                 pkt, page, 0, off,
-+                                 (off + plen > PAGE_SIZE ?
-+                                  PAGE_SIZE - off : plen),
-+                                 NULL);
-+                      if (ret < 0)
-+                              goto free_sdma;
-+                      len -= plen - off;
-+              }
-+      } else {
-+              pkt->addr[0].last_desc = 1;
-+              if (pbc_dma_addr == 0) {
-+                      pbc_dma_addr = dma_map_single(&knx->dd->pcidev->dev,
-+                                                    pbc, desc->pbclen,
-+                                                    DMA_TO_DEVICE);
-+                      if (dma_mapping_error(&knx->dd->pcidev->dev,
-+                                            pbc_dma_addr)) {
-+                              ret = -ENOMEM;
-+                              goto free_sdma;
-+                      }
-+                      pkt->addr[0].addr = pbc_dma_addr;
-+                      pkt->addr[0].dma_mapped = 1;
-+              }
-+      }
-+      counter++;
-+      pkt->pq = pq;
-+      pkt->index = 0;
-+      *ndesc = pkt->naddr;
-+
-+      list_add_tail(&pkt->list, list);
-+      goto done;
-+free_sdma:
-+      if (pkt->largepkt)
-+              kfree(pkt);
-+      else
-+              kmem_cache_free(pq->pkt_slab, pkt);
-+free_pbc:
-+      if (pbc_dma_addr)
-+              dma_pool_free(pq->header_cache, pbc, pbc_dma_addr);
-+      else
-+              kfree(pbc);
-+done:
-+      return ret;
-+}
-+
-+void qib_knx_sdma_free_pkt(struct qib_user_sdma_pkt *pkt)
-+{
-+      struct qib_knx *knx = (struct qib_knx *)pkt->remote;
-+      struct qib_knx_sdma *sdma = knx->sdma;
-+      sdma_next(sdma, complete);
-+      QIB_KNX_SDMA_SET(sdma->mflags->complete, sdma->complete);
-+}
-+
-+static int qib_knx_sdma_poll(void *data)
-+{
-+      struct qib_knx *knx = (struct qib_knx *)data;
-+      struct qib_knx_ctxt *ctxt;
-+      struct qib_knx_sdma_desc desc;
-+      struct qib_knx_sdma *sdma = knx->sdma;
-+      struct qib_user_sdma_queue *pq;
-+      struct list_head list;
-+      u32 new_head;
-+      int ret = 0, ndesc = 0, added;
-+
-+      if (!sdma)
-+              return -EFAULT;
-+
-+      while (!kthread_should_stop()) {
-+              added = 0;
-+              new_head = QIB_KNX_SDMA_VALUE(sdma->hflags->trigger);
-+              while (sdma->head != new_head) {
-+                      knx_sdma_next(sdma);
-+                      qib_knx_memcpy(&desc, sdma->queue + sdma->head,
-+                                     sizeof(desc));
-+                      if (!desc.ctxt) {
-+                              QIB_KNX_SDMA_STATUS(sdma, -EINVAL);
-+                              continue;
-+                      }
-+                      spin_lock(&knx->ctxt_lock);
-+                      ctxt = knx->ctxts[desc.ctxt];
-+                      if (!ctxt) {
-+                              /* we should never get here */
-+                              QIB_KNX_SDMA_STATUS(sdma, -EINVAL);
-+                              goto done_unlock;
-+                      }
-+                      pq = ctxt->pq[desc.subctxt];
-+                      if (!pq) {
-+                              QIB_KNX_SDMA_STATUS(sdma, -EFAULT);
-+                              goto done_unlock;
-+                      }
-+                      mutex_lock(&pq->lock);
-+                      if (pq->added > ctxt->ppd->sdma_descq_removed)
-+                              qib_user_sdma_hwqueue_clean(ctxt->ppd);
-+                      if (pq->num_sending)
-+                              qib_user_sdma_queue_clean(ctxt->ppd, pq);
-+
-+                      INIT_LIST_HEAD(&list);
-+                      ret = qib_knx_sdma_pkts_to_descs(ctxt, &desc, pq,
-+                                                       &ndesc, &list);
-+                      QIB_KNX_SDMA_STATUS(sdma, ret);
-+                      if (!list_empty(&list)) {
-+                              if (qib_sdma_descq_freecnt(ctxt->ppd) <
-+                                  ndesc) {
-+                                      qib_user_sdma_hwqueue_clean(
-+                                              ctxt->ppd);
-+                                      if (pq->num_sending)
-+                                              qib_user_sdma_queue_clean(
-+                                                      ctxt->ppd, pq);
-+                              }
-+                              ret = qib_user_sdma_push_pkts(ctxt->ppd,
-+                                                            pq, &list, 1);
-+                              if (ret < 0)
-+                                      goto free_pkts;
-+                              else {
-+                                      pq->counter++;
-+                                      added++;
-+                              }
-+                      }
-+free_pkts:
-+                      if (!list_empty(&list))
-+                              qib_user_sdma_free_pkt_list(
-+                                      &knx->dd->pcidev->dev, pq, &list);
-+                      mutex_unlock(&pq->lock);
-+done_unlock:
-+                      spin_unlock(&knx->ctxt_lock);
-+              }
-+              if (!added) {
-+                      int i;
-+                      /*
-+                       * Push the queues along
-+                       * The polling thread will enter the inner loop only
-+                       * if the KNX has posted new descriptors to the queue.
-+                       * However, any packets that have been completed by
-+                       * the HW need to be cleaned and that won't happen
-+                       * unless we explicitly check.
-+                       */
-+                      for (i = 0;
-+                           i < knx->dd->ctxtcnt * QLOGIC_IB_MAX_SUBCTXT;
-+                           i++) {
-+                              int c = i / QLOGIC_IB_MAX_SUBCTXT,
-+                                      s = i % QLOGIC_IB_MAX_SUBCTXT;
-+                              spin_lock(&knx->ctxt_lock);
-+                              ctxt = knx->ctxts[c];
-+                              if (!ctxt)
-+                                      goto loop_unlock;
-+                              pq = ctxt->pq[s];
-+                              if (!pq)
-+                                      goto loop_unlock;
-+                              mutex_lock(&pq->lock);
-+                              if (pq->num_sending)
-+                                      qib_user_sdma_queue_clean(ctxt->ppd,
-+                                                                pq);
-+                              mutex_unlock(&pq->lock);
-+loop_unlock:
-+                              spin_unlock(&knx->ctxt_lock);
-+                      }
-+                      might_sleep();
-+              }
-+      }
-+      return ret;
-+}
-+
-+void qib_knx_remove_device(struct qib_devdata *dd)
-+{
-+      if (server && dd->num_knx) {
-+              struct qib_knx *knx, *knxp;
-+              list_for_each_entry_safe(knx, knxp, &server->clients, list) {
-+                      if (knx->dd == dd) {
-+                              spin_lock(&server->client_lock);
-+                              list_del(&knx->list);
-+                              server->nclients--;
-+                              spin_unlock(&server->client_lock);
-+                              qib_knx_free(knx, 0);
-+                              kfree(knx);
-+                      }
-+              }
-+      }
-+      return;
-+}
-+
- int __init qib_knx_server_init(void)
- {
-       server = kzalloc(sizeof(struct qib_knx_server), GFP_KERNEL);
-@@ -908,7 +1517,6 @@ void __exit qib_knx_server_exit(void)
- {
-       if (server) {
-               struct qib_knx *t, *tt;
--
-               /* Stop the thread so we don't accept any new connections. */
-               kthread_stop(server->kthread);
-               list_for_each_entry_safe(t, tt, &server->clients, list) {
-@@ -921,3 +1529,4 @@ void __exit qib_knx_server_exit(void)
-               kfree(server);
-       }
- }
-+
-diff --git a/drivers/infiniband/hw/qib/qib_knx.h b/drivers/infiniband/hw/qib/qib_knx.h
-index d767a60..fcb5a3e 100644
---- a/drivers/infiniband/hw/qib/qib_knx.h
-+++ b/drivers/infiniband/hw/qib/qib_knx.h
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2012 Intel Corporation. All rights reserved.
-+ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
-  *
-  * This software is available to you under a choice of one of two
-  * licenses.  You may choose to be licensed under the terms of the GNU
-@@ -44,13 +44,12 @@ enum qib_knx_ctxtinfo_type {
- int __init qib_knx_server_init(void);
- void __exit qib_knx_server_exit(void);
--static __always_inline struct qib_knx *dd_to_knx(struct qib_devdata *dd)
--{
--      return (struct qib_knx *)dd->knx;
--}
-+
-+void qib_knx_remove_device(struct qib_devdata *);
-+
- inline struct qib_knx *qib_knx_get(uint16_t);
- inline struct qib_devdata *qib_knx_node_to_dd(uint16_t);
--int qib_knx_alloc_ctxt(struct qib_devdata *, unsigned);
-+int qib_knx_alloc_ctxt(u16, unsigned);
- int qib_knx_setup_piobufs(struct qib_devdata *, struct qib_ctxtdata *, __u16);
- int qib_knx_setup_pioregs(struct qib_devdata *, struct qib_ctxtdata *,
-                         struct qib_base_info *);
-@@ -60,4 +59,6 @@ int qib_knx_setup_eagerbufs(struct qib_ctxtdata *, struct qib_base_info *);
- void qib_knx_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *);
- __u64 qib_knx_ctxt_info(struct qib_ctxtdata *, enum qib_knx_ctxtinfo_type,
-                       struct file *);
-+int qib_knx_sdma_queue_create(struct file *);
-+void qib_knx_sdma_queue_destroy(struct qib_filedata *);
- #endif /* _QIB_KNX_H */
-diff --git a/drivers/infiniband/hw/qib/qib_knx_common.h b/drivers/infiniband/hw/qib/qib_knx_common.h
-new file mode 100644
-index 0000000..9639592
---- /dev/null
-+++ b/drivers/infiniband/hw/qib/qib_knx_common.h
-@@ -0,0 +1,126 @@
-+/*
-+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
-+ *
-+ * This software is available to you under a choice of one of two
-+ * licenses.  You may choose to be licensed under the terms of the GNU
-+ * General Public License (GPL) Version 2, available from the file
-+ * COPYING in the main directory of this source tree, or the
-+ * OpenIB.org BSD license below:
-+ *
-+ *     Redistribution and use in source and binary forms, with or
-+ *     without modification, are permitted provided that the following
-+ *     conditions are met:
-+ *
-+ *      - Redistributions of source code must retain the above
-+ *        copyright notice, this list of conditions and the following
-+ *        disclaimer.
-+ *
-+ *      - Redistributions in binary form must reproduce the above
-+ *        copyright notice, this list of conditions and the following
-+ *        disclaimer in the documentation and/or other materials
-+ *        provided with the distribution.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-+ * SOFTWARE.
-+ */
-+#ifndef _QIB_KNX_COMMON_H
-+#define _QIB_KNX_COMMON_H
-+
-+struct qib_device_info {
-+      u16 unit;
-+};
-+
-+#define QIB_SDMA_MAX_NPAGES 33
-+#define QIB_KNX_SDMA_VALUE(fld) (volatile u64)fld
-+#define QIB_KNX_SDMA_SET(fld, val)            \
-+      do {                                    \
-+              fld = (u64)(val);               \
-+              smp_mb();                       \
-+      } while (0)
-+
-+struct qib_knx_host_mem {
-+      off_t flags_offset;
-+      unsigned desc_num;
-+};
-+
-+struct qib_knx_knc_mem {
-+      off_t flags_offset;
-+      off_t queue_offset;
-+      size_t queue_len;
-+};
-+
-+struct qib_tid_sm {
-+        __u16 tid;
-+        __u16 offset;
-+        __u16 length;
-+};
-+
-+/*
-+ * SDMA transfer descriptor. This structure communicates the SDMA
-+ * transfers from the MIC to the host. It is very important for
-+ * performance reasons that its size is multiple of 64B in order
-+ * to guarantee proper alignment in the descriptor array.
-+ */
-+struct qib_knx_sdma_desc {
-+      u16 ctxt;
-+      u16 subctxt;
-+      u32 pbclen;
-+      __le32 pbc[16];
-+      u64 length;
-+      u32 npages;
-+      unsigned tidlen;
-+        off_t offset;
-+      unsigned long pages[QIB_SDMA_MAX_NPAGES];
-+      /* This array is 198B so the compiler will pad
-+       * it by 2B to make it multiple of 8B. */
-+      struct qib_tid_sm tidsm[QIB_SDMA_MAX_NPAGES];
-+      /*
-+       * The two paddings below are included in order to
-+       * make the size of the entire struct 576B (multiple
-+       * of 64B). The goal is that all elements in an array
-+       * of struct qib_knx_sdma_desc are 64B aligned.
-+       */
-+      u16 __padding0;
-+      u64 __padding1[2];
-+};
-+
-+/*
-+ * trigger, status, and complete fields are by 8 to be
-+ * cacheline size.
-+ */
-+struct qib_knx_sdma_hflags {
-+      u64 trigger;
-+      u64 __padding[7];
-+};
-+
-+#define sdma_next(s, fld) \
-+      (s)->fld = (((s)->fld + 1) == (s)->desc_num) ? 0 : ((s)->fld + 1)
-+
-+struct qib_knx_sdma_mflags {
-+      u64 status;
-+      u64 __padding1[7];
-+      u64 complete;
-+      u64 __padding2[7];
-+};
-+
-+struct qib_knx_tid_info {
-+      /* this is the entire set of 512 entries (= 4K) so
-+         * we can resgister. subctxt devision will be done
-+         * in MIC driver. */
-+        off_t tidbase_offset;
-+        size_t tidbase_len;
-+        u64 tidbase;
-+        unsigned tidcnt;
-+        u64 tidtemplate;
-+        unsigned long invalidtid;
-+        u64 bar_addr;
-+        u64 bar_len;
-+};
-+
-+#endif /* _QIB_KNX_COMMON_H */
-diff --git a/drivers/infiniband/hw/qib/qib_knx_sdma.h b/drivers/infiniband/hw/qib/qib_knx_sdma.h
-deleted file mode 100644
-index 8c67b1f..0000000
---- a/drivers/infiniband/hw/qib/qib_knx_sdma.h
-+++ /dev/null
-@@ -1,105 +0,0 @@
--/*
-- * Copyright (c) 2013 Intel Corporation. All rights reserved.
-- *
-- * This software is available to you under a choice of one of two
-- * licenses.  You may choose to be licensed under the terms of the GNU
-- * General Public License (GPL) Version 2, available from the file
-- * COPYING in the main directory of this source tree, or the
-- * OpenIB.org BSD license below:
-- *
-- *     Redistribution and use in source and binary forms, with or
-- *     without modification, are permitted provided that the following
-- *     conditions are met:
-- *
-- *      - Redistributions of source code must retain the above
-- *        copyright notice, this list of conditions and the following
-- *        disclaimer.
-- *
-- *      - Redistributions in binary form must reproduce the above
-- *        copyright notice, this list of conditions and the following
-- *        disclaimer in the documentation and/or other materials
-- *        provided with the distribution.
-- *
-- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-- * SOFTWARE.
-- */
--#ifndef _QIB_KNX_SDMA_H
--#define _QIB_KNX_SDMA_H
--
--#define QIB_SDMA_MAX_NPAGES 33
--#define QIB_KNX_SDMA_VALUE(fld) (volatile u64)fld
--#define QIB_KNX_SDMA_SET(fld, val)            \
--      do {                                    \
--              fld = (u64)(val);               \
--              smp_mb();                       \
--      } while (0)
--
--struct qib_knx_host_mem {
--      off_t flags_offset;
--      unsigned desc_num;
--};
--
--struct qib_knx_knc_mem {
--      off_t flags_offset;
--      off_t queue_offset;
--      size_t queue_len;
--};
--
--struct qib_tid_sm {
--        __u16 tid;
--        __u16 offset;
--        __u16 length;
--};
--
--/*
-- * SDMA transfer descriptor. This structure communicates the SDMA
-- * transfers from the MIC to the host. It is very important for
-- * performance reasons that its size is multiple of 64B in order
-- * to guarantee proper alignment in the descriptor array.
-- */
--struct qib_knx_sdma_desc {
--      u16 ctxt;
--      u16 subctxt;
--      u32 pbclen;
--      __le32 pbc[16];
--      u64 length;
--      u32 npages;
--      unsigned tidlen;
--        off_t offset;
--      unsigned long pages[QIB_SDMA_MAX_NPAGES];
--      /* This array is 198B so the compiler will pad
--       * it by 2B to make it multiple of 8B. */
--      struct qib_tid_sm tidsm[QIB_SDMA_MAX_NPAGES];
--      /*
--       * The two paddings below are included in order to
--       * make the size of the entire struct 576B (multiple
--       * of 64B). The goal is that all elements in an array
--       * of struct qib_knx_sdma_desc are 64B aligned.
--       */
--      u16 __padding0;
--      u64 __padding1[2];
--};
--
--/*
-- * trigger, status, and complete fields are by 8 to be
-- * cacheline size.
-- */
--struct qib_knx_sdma_hflags {
--      u64 trigger;
--      u64 __padding[7];
--};
--
--struct qib_knx_sdma_mflags {
--      u64 status;
--      u64 __padding1[7];
--      u64 complete;
--      u64 __padding2[7];
--};
--
--#endif /* _QIB_KNX_SDMA_H */
-diff --git a/drivers/infiniband/hw/qib/qib_knx_tidrcv.h b/drivers/infiniband/hw/qib/qib_knx_tidrcv.h
-deleted file mode 100644
-index 842fca1..0000000
---- a/drivers/infiniband/hw/qib/qib_knx_tidrcv.h
-+++ /dev/null
-@@ -1,48 +0,0 @@
--/*
-- * Copyright (c) 2013 Intel Corporation. All rights reserved.
-- *
-- * This software is available to you under a choice of one of two
-- * licenses.  You may choose to be licensed under the terms of the GNU
-- * General Public License (GPL) Version 2, available from the file
-- * COPYING in the main directory of this source tree, or the
-- * OpenIB.org BSD license below:
-- *
-- *     Redistribution and use in source and binary forms, with or
-- *     without modification, are permitted provided that the following
-- *     conditions are met:
-- *
-- *      - Redistributions of source code must retain the above
-- *        copyright notice, this list of conditions and the following
-- *        disclaimer.
-- *
-- *      - Redistributions in binary form must reproduce the above
-- *        copyright notice, this list of conditions and the following
-- *        disclaimer in the documentation and/or other materials
-- *        provided with the distribution.
-- *
-- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-- * SOFTWARE.
-- */
--#ifndef _QIB_KNX_TIDRCV_H
--
--struct qib_knx_tid_info {
--      /* this is the entire set of 512 entries (= 4K) so
--         * we can resgister. subctxt devision will be done
--         * in MIC driver. */
--        off_t tidbase_offset;
--        size_t tidbase_len;
--        u64 tidbase;
--        unsigned tidcnt;
--        u64 tidtemplate;
--        unsigned long invalidtid;
--        u64 bar_addr;
--        u64 bar_len;
--};
--
--#endif /* QIB_KNX_TIDRCV_H */
-diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
-index ccb1191..4b46f6c 100644
---- a/drivers/infiniband/hw/qib/qib_mad.c
-+++ b/drivers/infiniband/hw/qib/qib_mad.c
-@@ -536,7 +536,8 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
-       pip->vl_arb_low_cap =
-               dd->f_get_ib_cfg(ppd, QIB_IB_CFG_VL_LOW_CAP);
-       /* InitTypeReply = 0 */
--      pip->inittypereply_mtucap = qib_ibmtu ? qib_ibmtu : IB_MTU_4096;
-+      pip->inittypereply_mtucap =
-+              QIB_MODPARAM_GET(ibmtu, dd->unit, ppd->port);
-       /* HCAs ignore VLStallCount and HOQLife */
-       /* pip->vlstallcnt_hoqlife; */
-       pip->operationalvl_pei_peo_fpi_fpo =
-diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c
-index 3f14009..d7eebfb 100644
---- a/drivers/infiniband/hw/qib/qib_pcie.c
-+++ b/drivers/infiniband/hw/qib/qib_pcie.c
-@@ -501,9 +501,8 @@ static int val2fld(int wd, int mask)
-       return wd;
- }
--static int qib_pcie_coalesce;
--module_param_named(pcie_coalesce, qib_pcie_coalesce, int, S_IRUGO);
--MODULE_PARM_DESC(pcie_coalesce, "tune PCIe colescing on some Intel chipsets");
-+static QIB_MODPARAM_UNIT(pcie_coalesce, NULL, 0, S_IRUGO,
-+                       "tune PCIe colescing on some Intel chipsets");
- /*
-  * Enable PCIe completion and data coalescing, on Intel 5x00 and 7300
-@@ -518,7 +517,7 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd)
-       u16 devid;
-       u32 mask, bits, val;
--      if (!qib_pcie_coalesce)
-+      if (!QIB_MODPARAM_GET(pcie_coalesce, dd->unit, 0))
-               return 0;
-       /* Find out supported and configured values for parent (root) */
-@@ -576,9 +575,8 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd)
-  * BIOS may not set PCIe bus-utilization parameters for best performance.
-  * Check and optionally adjust them to maximize our throughput.
-  */
--static int qib_pcie_caps;
--module_param_named(pcie_caps, qib_pcie_caps, int, S_IRUGO);
--MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)");
-+static QIB_MODPARAM_UNIT(pcie_caps, NULL, 0, S_IRUGO,
-+                       "Max PCIe tuning: Payload (4lsb), ReadReq (D4..7)");
- static int qib_tune_pcie_caps(struct qib_devdata *dd)
- {
-@@ -587,6 +585,7 @@ static int qib_tune_pcie_caps(struct qib_devdata *dd)
-       u16 pcaps, pctl, ecaps, ectl;
-       int rc_sup, ep_sup;
-       int rc_cur, ep_cur;
-+      int caps = QIB_MODPARAM_GET(pcie_caps, dd->unit, 0);
-       /* Find out supported and configured values for parent (root) */
-       parent = dd->pcidev->bus->self;
-@@ -614,8 +613,8 @@ static int qib_tune_pcie_caps(struct qib_devdata *dd)
-       ep_cur = fld2val(ectl, PCI_EXP_DEVCTL_PAYLOAD);
-       /* If Supported greater than limit in module param, limit it */
--      if (rc_sup > (qib_pcie_caps & 7))
--              rc_sup = qib_pcie_caps & 7;
-+      if (rc_sup > (caps & 7))
-+              rc_sup = caps & 7;
-       /* If less than (allowed, supported), bump root payload */
-       if (rc_sup > rc_cur) {
-               rc_cur = rc_sup;
-@@ -637,8 +636,8 @@ static int qib_tune_pcie_caps(struct qib_devdata *dd)
-        * which is code '5' (log2(4096) - 7)
-        */
-       rc_sup = 5;
--      if (rc_sup > ((qib_pcie_caps >> 4) & 7))
--              rc_sup = (qib_pcie_caps >> 4) & 7;
-+      if (rc_sup > ((caps >> 4) & 7))
-+              rc_sup = (caps >> 4) & 7;
-       rc_cur = fld2val(pctl, PCI_EXP_DEVCTL_READRQ);
-       ep_cur = fld2val(ectl, PCI_EXP_DEVCTL_READRQ);
-diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
-index 3cca55b..4208b20 100644
---- a/drivers/infiniband/hw/qib/qib_qp.c
-+++ b/drivers/infiniband/hw/qib/qib_qp.c
-@@ -124,6 +124,7 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
-                    enum ib_qp_type type, u8 port)
- {
-       u32 i, offset, max_scan, qpn;
-+      unsigned krcvqs;
-       struct qpn_map *map;
-       u32 ret;
-@@ -141,10 +142,11 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
-               goto bail;
-       }
-+      krcvqs = dd->pport[port-1].n_krcv_queues;
-       qpn = qpt->last + 2;
-       if (qpn >= QPN_MAX)
-               qpn = 2;
--      if (qpt->mask && ((qpn & qpt->mask) >> 1) >= dd->n_krcv_queues)
-+      if (qpt->mask && ((qpn & qpt->mask) >> 1) >= krcvqs)
-               qpn = (qpn | qpt->mask) + 2;
-       offset = qpn & BITS_PER_PAGE_MASK;
-       map = &qpt->map[qpn / BITS_PER_PAGE];
-@@ -162,7 +164,7 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
-                               goto bail;
-                       }
-                       offset = find_next_offset(qpt, map, offset,
--                              dd->n_krcv_queues);
-+                              krcvqs);
-                       qpn = mk_qpn(qpt, map, offset);
-                       /*
-                        * This test differs from alloc_pidmap().
-diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
-index c6d6a54..1e08943 100644
---- a/drivers/infiniband/hw/qib/qib_sdma.c
-+++ b/drivers/infiniband/hw/qib/qib_sdma.c
-@@ -532,7 +532,8 @@ static void complete_sdma_err_req(struct qib_pportdata *ppd,
-  */
- int qib_sdma_verbs_send(struct qib_pportdata *ppd,
-                       struct qib_sge_state *ss, u32 dwords,
--                      struct qib_verbs_txreq *tx)
-+                      struct qib_verbs_txreq *tx,
-+                      struct snoop_packet *packet)
- {
-       unsigned long flags;
-       struct qib_sge *sge;
-@@ -543,6 +544,10 @@ int qib_sdma_verbs_send(struct qib_pportdata *ppd,
-       u64 sdmadesc[2];
-       u32 dwoffset;
-       dma_addr_t addr;
-+      u8 *packet_data = NULL;
-+
-+      if (packet)
-+              packet_data = packet->data + ((tx->hdr_dwords-2) << 2);
-       spin_lock_irqsave(&ppd->sdma_lock, flags);
-@@ -599,6 +604,10 @@ retry:
-                                     dw << 2, DMA_TO_DEVICE);
-               if (dma_mapping_error(&ppd->dd->pcidev->dev, addr))
-                       goto unmap;
-+              if (packet) {
-+                      memcpy(packet_data, sge->vaddr, len);
-+                      packet_data += len;
-+              }
-               sdmadesc[0] = 0;
-               make_sdma_desc(ppd, sdmadesc, (u64) addr, dw, dwoffset);
-               /* SDmaUseLargeBuf has to be set in every descriptor */
-diff --git a/drivers/infiniband/hw/qib/qib_snoop.c b/drivers/infiniband/hw/qib/qib_snoop.c
-new file mode 100644
-index 0000000..3c62bbb
---- /dev/null
-+++ b/drivers/infiniband/hw/qib/qib_snoop.c
-@@ -0,0 +1,970 @@
-+/*
-+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
-+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
-+ *
-+ * This software is available to you under a choice of one of two
-+ * licenses.  You may choose to be licensed under the terms of the GNU
-+ * General Public License (GPL) Version 2, available from the file
-+ * COPYING in the main directory of this source tree, or the
-+ * OpenIB.org BSD license below:
-+ *
-+ *     Redistribution and use in source and binary forms, with or
-+ *     without modification, are permitted provided that the following
-+ *     conditions are met:
-+ *
-+ *      - Redistributions of source code must retain the above
-+ *        copyright notice, this list of conditions and the following
-+ *        disclaimer.
-+ *
-+ *      - Redistributions in binary form must reproduce the above
-+ *        copyright notice, this list of conditions and the following
-+ *        disclaimer in the documentation and/or other materials
-+ *        provided with the distribution.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-+ * SOFTWARE.
-+ */
-+
-+/*
-+ * This file implements a raw read/raw write interface for snooping raw
-+ * packets from the wire and injecting raw packets to the wire.
-+ *
-+ * Other things that this interface could do at somepoint are:
-+ *   - Allow packets to be injected back into the stack
-+ *   - Provide an intercept for packets coming from the upper layers to
-+ *     move them back into user-space.
-+ */
-+
-+#include <linux/pci.h>
-+#include <linux/vmalloc.h>
-+#include <linux/uaccess.h>
-+#include <linux/module.h>
-+
-+#include <rdma/ib_user_mad.h> /* for ioctl constants */
-+#include <rdma/ib_smi.h>
-+
-+
-+#include "qib.h"
-+#include "qib_verbs.h"
-+#include "qib_common.h"
-+#include <linux/poll.h>
-+
-+#define QIB_SNOOP_IOC_MAGIC IB_IOCTL_MAGIC
-+#define QIB_SNOOP_IOC_BASE_SEQ 0x80
-+/* This starts our ioctl sequence
-+ * numbers *way* off from the ones
-+ * defined in ib_core
-+                                      */
-+#define QIB_SNOOP_IOCGETLINKSTATE \
-+      _IO(QIB_SNOOP_IOC_MAGIC, QIB_SNOOP_IOC_BASE_SEQ)
-+#define QIB_SNOOP_IOCSETLINKSTATE \
-+      _IO(QIB_SNOOP_IOC_MAGIC, QIB_SNOOP_IOC_BASE_SEQ+1)
-+#define QIB_SNOOP_IOCCLEARQUEUE \
-+      _IO(QIB_SNOOP_IOC_MAGIC, QIB_SNOOP_IOC_BASE_SEQ+2)
-+#define QIB_SNOOP_IOCCLEARFILTER \
-+      _IO(QIB_SNOOP_IOC_MAGIC, QIB_SNOOP_IOC_BASE_SEQ+3)
-+#define QIB_SNOOP_IOCSETFILTER \
-+      _IO(QIB_SNOOP_IOC_MAGIC, QIB_SNOOP_IOC_BASE_SEQ+4)
-+
-+/* local prototypes */
-+static int qib_snoop_open(struct inode *in, struct file *fp);
-+static unsigned int qib_snoop_poll(struct file *fp,
-+                                      struct poll_table_struct *wait);
-+static ssize_t qib_snoop_read(struct file *fp, char __user *data,
-+                              size_t pkt_len, loff_t *off);
-+static int qib_snoop_release(struct inode *in, struct file *fp);
-+
-+static long qib_ioctl(struct file *fp, unsigned int cmd, unsigned long arg);
-+
-+static ssize_t qib_snoop_write(struct file *fp, const char __user *data,
-+                                size_t pkt_len, loff_t *off);
-+
-+#include <linux/delay.h>
-+
-+struct qib_packet_filter_command {
-+      int opcode;
-+      int length;
-+      void *value_ptr;
-+};
-+
-+enum qib_packet_filter_opcodes {
-+      FILTER_BY_LID,
-+      FILTER_BY_DLID,
-+      FILTER_BY_MAD_MGMT_CLASS,
-+      FILTER_BY_QP_NUMBER,
-+      FILTER_BY_PKT_TYPE,
-+      FILTER_BY_SERVICE_LEVEL,
-+      FILTER_BY_PKEY
-+};
-+
-+static const struct file_operations snoop_file_ops = {
-+      .owner = THIS_MODULE,
-+      .open = qib_snoop_open,
-+      .read = qib_snoop_read,
-+      .unlocked_ioctl = qib_ioctl,
-+      .poll = qib_snoop_poll,
-+      .write = qib_snoop_write,
-+      .release = qib_snoop_release
-+};
-+
-+struct qib_filter_array {
-+      int (*filter)(void *, void *, void *);
-+};
-+
-+static int qib_filter_lid(void *ibhdr, void *packet_data, void *value);
-+static int qib_filter_dlid(void *ibhdr, void *packet_data, void *value);
-+static int qib_filter_mad_mgmt_class(void *ibhdr, void *packet_data,
-+                                   void *value);
-+static int qib_filter_qp_number(void *ibhdr, void *packet_data, void *value);
-+static int qib_filter_ibpacket_type(void *ibhdr, void *packet_data,
-+                                  void *value);
-+static int qib_filter_ib_service_level(void *ibhdr, void *packet_data,
-+                                     void *value);
-+static int qib_filter_ib_pkey(void *ibhdr, void *packet_data, void *value);
-+
-+static struct qib_filter_array qib_filters[] = {
-+      { qib_filter_lid },
-+      { qib_filter_dlid },
-+      { qib_filter_mad_mgmt_class },
-+      { qib_filter_qp_number },
-+      { qib_filter_ibpacket_type },
-+      { qib_filter_ib_service_level },
-+      { qib_filter_ib_pkey }
-+};
-+
-+#define QIB_MAX_FILTERS       ARRAY_SIZE(qib_filters)
-+#define QIB_DRV_NAME          "ib_qib"
-+#define QIB_MAJOR             233
-+#define QIB_USER_MINOR_BASE   0
-+#define QIB_DIAG_MINOR_BASE   129
-+#define QIB_SNOOP_MINOR_BASE  160
-+#define QIB_CAPTURE_MINOR_BASE        200
-+#define QIB_NMINORS           255
-+#define PORT_BITS             2
-+#define PORT_MASK             ((1U << PORT_BITS) - 1)
-+#define GET_HCA(x)            ((unsigned int)((x) >> PORT_BITS))
-+#define GET_PORT(x)           ((unsigned int)((x) & PORT_MASK))
-+
-+int qib_snoop_add(struct qib_devdata *dd)
-+{
-+      char name[32];
-+      int ret = 0;
-+      int i;
-+      int j;
-+      int minor = 0;
-+
-+      for (i = 0; i < dd->num_pports; i++) {
-+              spin_lock_init(&dd->pport[i].snoop_write_lock);
-+              for (j = 0; j < QIB_CHAR_DEVICES_PER_PORT; j++) {
-+                      spin_lock_init(&dd->pport[i].sc_device[j].snoop_lock);
-+                      INIT_LIST_HEAD(
-+                              &(dd->pport[i].sc_device[j].snoop_queue));
-+                      init_waitqueue_head(
-+                              &dd->pport[i].sc_device[j].snoop_waitq);
-+
-+                      if (j == 0) {
-+                              minor = (((dd->unit << PORT_BITS) | i)) +
-+                                      QIB_SNOOP_MINOR_BASE;
-+                              snprintf(name, sizeof(name),
-+                                      "ipath_snoop_%02d_%02d", dd->unit, i+1);
-+                      } else {
-+                              minor = (((dd->unit << PORT_BITS) | i)) +
-+                                              QIB_CAPTURE_MINOR_BASE;
-+                              snprintf(name, sizeof(name),
-+                                      "ipath_capture_%02d_%02d",
-+                                      dd->unit, i+1);
-+                      }
-+
-+                      ret = qib_cdev_init(
-+                              minor, name,
-+                              &snoop_file_ops,
-+                              &dd->pport[i].sc_device[j].snoop_cdev,
-+                              &dd->pport[i].sc_device[j].snoop_class_dev);
-+                      if (ret)
-+                              goto bail;
-+              }
-+              pr_info("qib%d: snoop dev for hca %02d enabled port %02d\n"
-+                      "qib%d: capture dev for hca %02d enabled port %02d\n",
-+                      dd->unit, dd->unit, i+1, dd->unit, dd->unit, i+1);
-+              dd->pport[i].mode_flag = 0;
-+      }
-+out:
-+      return ret;
-+bail:
-+      qib_dev_err(dd, "Couldn't create %s device: %d", name, ret);
-+      i--;
-+      if (i != dd->num_pports) {
-+              for (; i >= 0 ; i--) {
-+                      for (j = 0; j < QIB_CHAR_DEVICES_PER_PORT; j++)
-+                              qib_cdev_cleanup(
-+                                       &dd->pport[i].
-+                                       sc_device[j].
-+                                       snoop_cdev,
-+                                       &dd->pport[i].
-+                                       sc_device[j].
-+                                       snoop_class_dev);
-+                      dd->pport[i].mode_flag = 0;
-+              }
-+      }
-+      goto out;
-+}
-+
-+/* this must be called w/ dd->snoop_in_lock held */
-+static void drain_snoop_list(struct qib_aux_device *sc_device)
-+{
-+      struct list_head *pos, *q;
-+      struct snoop_packet *packet;
-+
-+      list_for_each_safe(pos, q, &(sc_device->snoop_queue)) {
-+              packet = list_entry(pos, struct snoop_packet, list);
-+              list_del(pos);
-+              kfree(packet);
-+      }
-+}
-+
-+void qib_snoop_remove(struct qib_devdata *dd)
-+{
-+      unsigned long flags = 0;
-+      int i;
-+      int j;
-+
-+      for (i = 0; i < dd->num_pports; i++) {
-+              dd->pport[i].mode_flag = 0;
-+              for (j = 0; j < QIB_CHAR_DEVICES_PER_PORT; j++) {
-+                      spin_lock_irqsave(&dd->pport[i].sc_device[j].snoop_lock,
-+                              flags);
-+                      drain_snoop_list(&dd->pport[i].sc_device[j]);
-+                      qib_cdev_cleanup(&dd->pport[i].sc_device[j].snoop_cdev,
-+                              &dd->pport[i].sc_device[j].snoop_class_dev);
-+                      spin_unlock_irqrestore(
-+                              &dd->pport[i].sc_device[j].snoop_lock,
-+                              flags);
-+              }
-+      }
-+}
-+
-+static int qib_snoop_open(struct inode *in, struct file *fp)
-+{
-+      int unit = iminor(in);
-+      int devnum;
-+      int portnum = 0;
-+      int ret;
-+      int mode_flag = 0;
-+      unsigned long flags;
-+      struct qib_devdata *dd;
-+
-+      mutex_lock(&qib_mutex);
-+
-+      if (unit >= QIB_CAPTURE_MINOR_BASE) {
-+              unit -= QIB_CAPTURE_MINOR_BASE;
-+              devnum = 1;
-+              mode_flag = QIB_PORT_CAPTURE_MODE;
-+      } else {
-+              unit -= QIB_SNOOP_MINOR_BASE;
-+              devnum = 0;
-+              mode_flag = QIB_PORT_SNOOP_MODE;
-+      }
-+
-+      dd = qib_lookup(GET_HCA(unit));
-+      if (dd == NULL || !(dd->flags & QIB_PRESENT) ||
-+          !dd->kregbase) {
-+              ret = -ENODEV;
-+              goto bail;
-+      }
-+      portnum = GET_PORT(unit);
-+
-+      spin_lock_irqsave(&dd->pport[portnum].sc_device[devnum].snoop_lock,
-+              flags);
-+
-+      if (dd->pport[portnum].mode_flag & mode_flag) {
-+              ret = -EBUSY;
-+              spin_unlock_irqrestore(
-+                      &dd->pport[portnum].sc_device[devnum].snoop_lock,
-+                      flags);
-+              goto bail;
-+      }
-+
-+      drain_snoop_list(&dd->pport[portnum].sc_device[devnum]);
-+      spin_unlock_irqrestore(
-+              &dd->pport[portnum].sc_device[devnum].snoop_lock, flags);
-+      if (devnum)
-+              pr_alert("capture device for hca %02d port %02d is opened\n",
-+                      GET_HCA(unit), portnum+1);
-+      else
-+              pr_alert("snoop device for hca %02d port %02d is opened\n",
-+                      GET_HCA(unit), portnum+1);
-+
-+      dd->pport[portnum].sc_device[devnum].pport = &dd->pport[portnum];
-+      fp->private_data = &dd->pport[portnum].sc_device[devnum];
-+      ret = 0;
-+      dd->pport[portnum].mode_flag |= mode_flag;
-+
-+bail:
-+      mutex_unlock(&qib_mutex);
-+
-+      return ret;
-+}
-+
-+static int qib_snoop_release(struct inode *in, struct file *fp)
-+{
-+      struct qib_aux_device *sc_device = fp->private_data;
-+      struct qib_pportdata *pport = sc_device->pport;
-+      unsigned long flags = 0;
-+      int devnum = iminor(in);
-+
-+      if (devnum >= QIB_CAPTURE_MINOR_BASE)
-+              devnum = 1;
-+      else
-+              devnum = 0;
-+
-+      spin_lock_irqsave(&sc_device->snoop_lock, flags);
-+      if (devnum)
-+              pport->mode_flag = pport->mode_flag & (~QIB_PORT_CAPTURE_MODE);
-+      else
-+              pport->mode_flag = pport->mode_flag & (~QIB_PORT_SNOOP_MODE);
-+
-+      drain_snoop_list(sc_device);
-+      /* Clear filters before going out */
-+      pport->filter_callback = NULL;
-+      kfree(pport->filter_value);
-+      pport->filter_value = NULL;
-+
-+      spin_unlock_irqrestore(&sc_device->snoop_lock, flags);
-+
-+      if (devnum)
-+              pr_alert("capture device for hca %02d port %02d is closed\n",
-+                       pport->dd->unit, pport->port);
-+      else
-+              pr_alert("snoop device for hca %02d port %02d is closed\n",
-+                       pport->dd->unit, pport->port);
-+
-+      fp->private_data = NULL;
-+      return 0;
-+}
-+
-+static unsigned int qib_snoop_poll(struct file *fp,
-+              struct poll_table_struct *wait)
-+{
-+      struct qib_aux_device *sc_device = fp->private_data;
-+      int ret = 0;
-+      unsigned long flags = 0;
-+
-+      spin_lock_irqsave(&sc_device->snoop_lock, flags);
-+
-+      poll_wait(fp, &sc_device->snoop_waitq, wait);
-+      if (!list_empty(&sc_device->snoop_queue))
-+              ret |= POLLIN | POLLRDNORM;
-+
-+      spin_unlock_irqrestore(&sc_device->snoop_lock, flags);
-+      return ret;
-+
-+}
-+
-+static ssize_t qib_snoop_read(struct file *fp, char __user *data,
-+                             size_t pkt_len, loff_t *off)
-+{
-+      struct qib_aux_device *sc_device = fp->private_data;
-+      ssize_t ret = 0;
-+      unsigned long flags = 0;
-+      struct snoop_packet *packet = NULL;
-+
-+      spin_lock_irqsave(&sc_device->snoop_lock, flags);
-+
-+      while (list_empty(&sc_device->snoop_queue)) {
-+              spin_unlock_irqrestore(&sc_device->snoop_lock, flags);
-+
-+              if (fp->f_flags & O_NONBLOCK)
-+                      return -EAGAIN;
-+
-+
-+              if (wait_event_interruptible(sc_device->snoop_waitq,
-+                                      !list_empty(&sc_device->snoop_queue)))
-+                      return -EINTR;
-+
-+              spin_lock_irqsave(&sc_device->snoop_lock, flags);
-+      }
-+
-+      if (!list_empty(&(sc_device->snoop_queue))) {
-+              packet = list_entry(sc_device->snoop_queue.next,
-+                              struct snoop_packet, list);
-+              list_del(&packet->list);
-+              spin_unlock_irqrestore(&sc_device->snoop_lock, flags);
-+              if (pkt_len >= packet->total_len) {
-+                      if (copy_to_user(data, packet->data,
-+                              packet->total_len))
-+                              ret = -EFAULT;
-+                      else
-+                              ret = packet->total_len;
-+              } else
-+                      ret = -EINVAL;
-+
-+              kfree(packet);
-+      } else
-+              spin_unlock_irqrestore(&sc_device->snoop_lock, flags);
-+
-+      return ret;
-+}
-+
-+static long qib_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
-+{
-+      struct qib_aux_device *sc_device = fp->private_data;
-+      struct qib_pportdata *ppd = sc_device->pport;
-+      struct qib_devdata *dd = ppd->dd;
-+      void *filter_value = NULL;
-+      long ret = 0;
-+      int value = 0;
-+      u8 physState = 0;
-+      u8 linkState = 0;
-+      u16 devState = 0;
-+      unsigned long flags = 0;
-+      unsigned long *argp = NULL;
-+      struct qib_packet_filter_command filter_cmd = {0};
-+
-+      if (((_IOC_DIR(cmd) & _IOC_READ)
-+      && !access_ok(VERIFY_WRITE, (void __user *)arg, _IOC_SIZE(cmd)))
-+      || ((_IOC_DIR(cmd) & _IOC_WRITE)
-+      && !access_ok(VERIFY_READ, (void __user *)arg, _IOC_SIZE(cmd)))) {
-+              ret = -EFAULT;
-+      } else if (!capable(CAP_SYS_ADMIN)) {
-+              ret = -EPERM;
-+      } else if (sc_device != (&ppd->sc_device[QIB_SNOOP_DEV_INDEX])
-+              && cmd != QIB_SNOOP_IOCCLEARQUEUE
-+              && cmd != QIB_SNOOP_IOCCLEARFILTER
-+              && cmd != QIB_SNOOP_IOCSETFILTER) {
-+              /* Capture devices are allowed only 3 operations
-+               * 1.Clear capture queue
-+               * 2.Clear capture filter
-+               * 3.Set capture filter
-+               * Other are invalid.
-+               */
-+              ret = -EINVAL;
-+      } else {
-+              switch (cmd) {
-+              case QIB_SNOOP_IOCSETLINKSTATE:
-+                      ret = __get_user(value, (int __user *) arg);
-+                      if (ret !=  0)
-+                              break;
-+
-+                      physState = (value >> 4) & 0xF;
-+                      linkState = value & 0xF;
-+
-+                      switch (linkState) {
-+                      case IB_PORT_NOP:
-+                              if (physState == 0)
-+                                      break;
-+                                      /* fall through */
-+                      case IB_PORT_DOWN:
-+                              switch (physState) {
-+                              case 0:
-+                                      if (dd->f_ibphys_portstate &&
-+                              (dd->f_ibphys_portstate(ppd->lastibcstat)
-+                                      & 0xF & IB_PHYSPORTSTATE_SLEEP))
-+                                              devState =
-+                                              QIB_IB_LINKDOWN_SLEEP;
-+                                      else
-+                                              devState =
-+                                              QIB_IB_LINKDOWN;
-+                                              break;
-+                              case 1:
-+                                      devState = QIB_IB_LINKDOWN_SLEEP;
-+                                      break;
-+                              case 2:
-+                                      devState = QIB_IB_LINKDOWN;
-+                                      break;
-+                              case 3:
-+                                      devState = QIB_IB_LINKDOWN_DISABLE;
-+                                      break;
-+                              default:
-+                                      ret = -EINVAL;
-+                                      goto done;
-+                                      break;
-+                              }
-+                              ret = qib_set_linkstate(ppd, devState);
-+                              break;
-+                      case IB_PORT_ARMED:
-+                              if (!(dd->flags &
-+                              (QIB_IB_LINKARM | QIB_IB_LINKACTIVE))) {
-+                                              ret = -EINVAL;
-+                                              break;
-+                                      }
-+                              ret = qib_set_linkstate(ppd, QIB_IB_LINKARM);
-+                              break;
-+                      case IB_PORT_ACTIVE:
-+                              if (!(dd->flags & QIB_IB_LINKARM)) {
-+                                      ret = -EINVAL;
-+                                      break;
-+                              }
-+                              ret = qib_set_linkstate(ppd, QIB_IB_LINKACTIVE);
-+                              break;
-+                      default:
-+                              ret = -EINVAL;
-+                              break;
-+                      }
-+
-+                      if (ret)
-+                              break;
-+                      /* fall through */
-+
-+              case QIB_SNOOP_IOCGETLINKSTATE:
-+                      value = dd->f_ibphys_portstate(ppd->lastibcstat);
-+                      value <<= 4;
-+                      value |= dd->f_iblink_state(ppd->lastibcstat);
-+                      ret = __put_user(value, (int __user *)arg);
-+                      break;
-+
-+              case QIB_SNOOP_IOCCLEARQUEUE:
-+                      spin_lock_irqsave(&sc_device->snoop_lock, flags);
-+                      drain_snoop_list(sc_device);
-+                      spin_unlock_irqrestore(&sc_device->snoop_lock, flags);
-+                      break;
-+
-+              case QIB_SNOOP_IOCCLEARFILTER:
-+                      spin_lock_irqsave(&sc_device->snoop_lock, flags);
-+                      if (ppd->filter_callback) {
-+                              /* Drain packets first */
-+                              drain_snoop_list(sc_device);
-+                              ppd->filter_callback = NULL;
-+                      }
-+                      kfree(ppd->filter_value);
-+                      ppd->filter_value = NULL;
-+                      spin_unlock_irqrestore(&sc_device->snoop_lock, flags);
-+                      break;
-+
-+              case QIB_SNOOP_IOCSETFILTER:
-+                      /* just copy command structure */
-+                      argp = (unsigned long *)arg;
-+                      ret = copy_from_user(&filter_cmd, (u8 *)argp,
-+                              sizeof(filter_cmd));
-+                      if (ret < 0) {
-+                              pr_alert("Error copying filter command\n");
-+                              break;
-+                      }
-+                      if (filter_cmd.opcode >= QIB_MAX_FILTERS) {
-+                              pr_alert("Invalid opcode in request\n");
-+                              ret = -EINVAL;
-+                              break;
-+                      }
-+                      filter_value = kzalloc(
-+                                              filter_cmd.length * sizeof(u8),
-+                                              GFP_KERNEL);
-+                      if (!filter_value) {
-+                              pr_alert("Not enough memory\n");
-+                              ret = -ENOMEM;
-+                              break;
-+                      }
-+                      /* copy remaining data from userspace */
-+                      ret = copy_from_user((u8 *)filter_value,
-+                                      (u8 *)filter_cmd.value_ptr,
-+                                      filter_cmd.length);
-+                      if (ret < 0) {
-+                              kfree(filter_value);
-+                              pr_alert("Error copying filter data\n");
-+                              break;
-+                      }
-+                      /* Drain packets first */
-+                      spin_lock_irqsave(&sc_device->snoop_lock, flags);
-+                      drain_snoop_list(sc_device);
-+                      ppd->filter_callback =
-+                              qib_filters[filter_cmd.opcode].filter;
-+                      /* just in case we see back to back sets */
-+                      kfree(ppd->filter_value);
-+                      ppd->filter_value = filter_value;
-+                      spin_unlock_irqrestore(&sc_device->snoop_lock, flags);
-+                      break;
-+
-+              default:
-+                      ret = -ENOTTY;
-+                      break;
-+              }
-+      }
-+done:
-+      return ret;
-+}
-+
-+
-+static ssize_t qib_pio_send_pkt(struct qib_pportdata *ppd,
-+                              u32 *data, u32 pkt_len)
-+{
-+      int i;
-+      u64 pbc;
-+      u32 __iomem *piobuf;
-+      u32 pnum, control, len;
-+      struct qib_devdata *dd = ppd->dd;
-+      u32 dwords = pkt_len >> 2;
-+      unsigned long flags;
-+      ssize_t ret = -EINVAL;
-+
-+      i = 0;
-+      len = dwords + 1;
-+      control = dd->f_setpbc_control(ppd, len, 0,
-+                (((u8 *)data)[0] >> 4) & 0xf);
-+      pbc = ((u64) control << 32) | len;
-+      while (!(piobuf = dd->f_getsendbuf(ppd, pbc, &pnum))) {
-+              if (i > 15) {
-+                      ret = -ENOMEM;
-+                      goto Err;
-+              }
-+              i++;
-+              /* lets try to flush all of it */
-+              dd->f_sendctrl(ppd, QIB_SENDCTRL_DISARM_ALL);
-+              udelay(100);
-+      }
-+      spin_lock_irqsave(&ppd->snoop_write_lock, flags);
-+      /* disable header check on this packet, since it can't be valid */
-+      dd->f_txchk_change(dd, pnum, 1, TXCHK_CHG_TYPE_DIS1, NULL);
-+      writeq(pbc, piobuf);
-+      qib_flush_wc();
-+      if (dd->flags & QIB_PIO_FLUSH_WC) {
-+              qib_flush_wc();
-+              qib_pio_copy(piobuf + 2, data, dwords - 1);
-+              qib_flush_wc();
-+              __raw_writel(data[dwords - 1], piobuf + dwords + 1);
-+      } else
-+              qib_pio_copy(piobuf + 2, data, dwords);
-+      if (dd->flags & QIB_USE_SPCL_TRIG) {
-+              u32 spcl_off = (pnum >= dd->piobcnt2k) ? 2047 : 1023;
-+
-+              qib_flush_wc();
-+              __raw_writel(0xaebecede, piobuf + spcl_off);
-+      }
-+      qib_sendbuf_done(dd, pnum);
-+      qib_flush_wc();
-+      /* and re-enable hdr check */
-+      dd->f_txchk_change(dd, pnum, 1, TXCHK_CHG_TYPE_ENAB1, NULL);
-+      spin_unlock_irqrestore(&ppd->snoop_write_lock, flags);
-+      ret = pkt_len;
-+Err:
-+      return ret;
-+}
-+
-+
-+static ssize_t qib_snoop_write(struct file *fp, const char __user *data,
-+                                      size_t pkt_len, loff_t *off)
-+{
-+      struct qib_aux_device *sc_device = fp->private_data;
-+      struct qib_pportdata *ppd = sc_device->pport;
-+      struct qib_devdata *dd = ppd->dd;
-+      ssize_t ret = 0;
-+      u32 *buffer = NULL;
-+      u32 plen, clen;
-+
-+      /* capture device should not be entertaining writes */
-+      if (sc_device != (&ppd->sc_device[QIB_SNOOP_DEV_INDEX])) {
-+              ret = -EINVAL;
-+              goto bail;
-+      }
-+
-+      if (pkt_len == 0)
-+              goto bail;
-+
-+      if (pkt_len & 3) {
-+              ret = -EINVAL;
-+              goto bail;
-+      }
-+
-+      clen = pkt_len >> 2;
-+
-+      if (!dd || !(dd->flags & QIB_PRESENT) ||
-+                      !dd->kregbase) {
-+              ret = -ENODEV;
-+              goto bail;
-+      }
-+
-+      if (!(dd->flags & QIB_INITTED)) {
-+              /* no hardware, freeze, etc. */
-+              ret = -ENODEV;
-+              goto bail;
-+      }
-+
-+      plen = sizeof(u32) + pkt_len;
-+
-+      if ((plen + 4) > ppd->ibmaxlen) {
-+              ret = -EINVAL;
-+              goto bail;  /* before writing pbc */
-+      }
-+
-+      buffer = vmalloc(plen);
-+      if (!buffer) {
-+              ret = -ENOMEM;
-+              goto bail;
-+      }
-+      if (copy_from_user(buffer,
-+              (const void __user *) (unsigned long) data, pkt_len)) {
-+              ret = -EFAULT;
-+              goto bail;
-+      }
-+
-+      ret = qib_pio_send_pkt(ppd, buffer, pkt_len);
-+
-+bail:
-+      vfree(buffer);
-+
-+      return ret;
-+}
-+
-+int snoop_get_header_size(struct qib_devdata *dd,
-+                      struct qib_ib_header *hdr,
-+                      void *data, u32 tlen)
-+{
-+      int lnh, header_size = -1;
-+      u8 opcode, opcode_major;
-+      struct qib_other_headers *ohdr;
-+
-+      lnh = (be16_to_cpu(hdr->lrh[0]) & 3);
-+
-+      if (lnh == QIB_LRH_BTH)
-+              ohdr = &hdr->u.oth;
-+      else if (lnh == QIB_LRH_GRH)
-+              ohdr = &hdr->u.l.oth;
-+      else
-+              goto bail;
-+
-+      opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
-+
-+      opcode_major = (opcode >> 5) & 0x7;
-+
-+      switch (opcode_major) {
-+      case 0x03: /* UD */
-+              if (lnh == QIB_LRH_BTH)
-+                      header_size = 8 + 12 + 8 /* LRH + BTH + DETH */;
-+              else if (lnh == QIB_LRH_GRH) {
-+
-+                      /* LRH + GRH + BTH + DETH */;
-+                      header_size = 8 + 40 + 12 + 8;
-+                      /* Some of the header data is in the data segment */
-+                      if (dd->rcvhdrentsize == 16)
-+                              header_size -= 12;
-+              } else
-+                      header_size = -1;
-+
-+              break;
-+      case 0x0: /* RC */
-+      case 0x1: /* UC */
-+      case 0x2: /* RD */
-+      default:
-+              header_size = -1;
-+              break;
-+      }
-+
-+bail:
-+      return header_size;
-+}
-+
-+static void qib_snoop_list_add_tail(struct snoop_packet *packet,
-+                                      struct qib_pportdata *ppd,
-+                                      int dev_index)
-+{
-+      unsigned long flags = 0;
-+      struct qib_aux_device *sc_device = &ppd->sc_device[dev_index];
-+
-+      spin_lock_irqsave(&sc_device->snoop_lock, flags);
-+      if (likely((dev_index == QIB_CAPTURE_DEV_INDEX &&
-+              (ppd->mode_flag & QIB_PORT_CAPTURE_MODE)) ||
-+              (dev_index == QIB_SNOOP_DEV_INDEX &&
-+              (ppd->mode_flag & QIB_PORT_SNOOP_MODE))))
-+              list_add_tail(&(packet->list), &sc_device->snoop_queue);
-+      spin_unlock_irqrestore(&sc_device->snoop_lock, flags);
-+      wake_up_interruptible(&sc_device->snoop_waitq);
-+}
-+
-+void qib_snoop_send_queue_packet(struct qib_pportdata *ppd,
-+                              struct snoop_packet *packet)
-+{
-+      /* If we are dealing with mix mode then we need to make another copy
-+       * of same packet and queue it in snoop device as well.
-+       * However if we do not get sufficient memory here then we just
-+       * add packet to capture queue by default so that we atleast have one
-+       * packet with us in capture queue.
-+       */
-+      if (unlikely(ppd->mode_flag ==
-+              (QIB_PORT_SNOOP_MODE | QIB_PORT_CAPTURE_MODE))) {
-+              struct snoop_packet *pcopy;
-+              pcopy = kmalloc(sizeof(*pcopy) + packet->total_len, GFP_ATOMIC);
-+              if (pcopy != NULL) {
-+                      memcpy(pcopy, packet,
-+                              packet->total_len + sizeof(*pcopy));
-+                      qib_snoop_list_add_tail(pcopy, ppd,
-+                              QIB_SNOOP_DEV_INDEX);
-+              }
-+              qib_snoop_list_add_tail(packet, ppd, QIB_CAPTURE_DEV_INDEX);
-+      } else if (ppd->mode_flag == QIB_PORT_CAPTURE_MODE)
-+              qib_snoop_list_add_tail(packet, ppd, QIB_CAPTURE_DEV_INDEX);
-+      else if (ppd->mode_flag == QIB_PORT_SNOOP_MODE)
-+              qib_snoop_list_add_tail(packet, ppd, QIB_SNOOP_DEV_INDEX);
-+}
-+
-+/*
-+ * qib_snoop_rcv_queue_packet - receive a packet for snoop interface
-+ * @port - Hca port on which this packet is received.
-+ * @rhdr - Packet header
-+ * @data - Packet data/payloaa
-+ * @tlen - total length of packet including header and payload.
-+ *
-+ * Called on for every packet received when snooping/mix mode is turned on
-+ * Copies received packet to internal buffer and appends it to
-+ * packet list.
-+ *
-+ * Returns,
-+ * 0 if this packet needs to be forwarded by driver
-+ * 1 if this packet needs to be dropped by driver
-+ */
-+
-+int qib_snoop_rcv_queue_packet(struct qib_pportdata *port, void *rhdr,
-+                              void *data, u32 tlen)
-+{
-+      int header_size = 0;
-+      struct qib_ib_header *hdr = rhdr;
-+      struct snoop_packet *packet = NULL;
-+
-+      header_size = snoop_get_header_size(port->dd, hdr, data, tlen);
-+      if (header_size <= 0)
-+              return 0;
-+
-+      /* qib_snoop_send_queue_packet takes care or mix mode,
-+       * so just return from here.
-+       */
-+      if (port->mode_flag == (QIB_PORT_SNOOP_MODE | QIB_PORT_CAPTURE_MODE))
-+              return 0;
-+
-+      packet = kmalloc(sizeof(struct snoop_packet) + tlen,
-+                                      GFP_ATOMIC);
-+      if (likely(packet)) {
-+              memcpy(packet->data, rhdr, header_size);
-+              memcpy(packet->data + header_size, data,
-+              tlen - header_size);
-+              packet->total_len = tlen;
-+              qib_snoop_list_add_tail(packet, port, QIB_SNOOP_DEV_INDEX);
-+              return 1;
-+      }
-+
-+      return 0;
-+}
-+
-+static int qib_filter_lid(void *ibhdr, void *packet_data, void *value)
-+{
-+      struct qib_ib_header *hdr = (struct qib_ib_header *)ibhdr;
-+      if (*((u16 *)value) == be16_to_cpu(hdr->lrh[3]))
-+              return 0; /* matched */
-+      return 1; /* Not matched */
-+}
-+
-+static int qib_filter_dlid(void *ibhdr, void *packet_data, void *value)
-+{
-+      struct qib_ib_header *hdr = (struct qib_ib_header *)ibhdr;
-+      if (*((u16 *)value) == be16_to_cpu(hdr->lrh[1]))
-+              return 0;
-+      return 1;
-+}
-+
-+static int qib_filter_mad_mgmt_class(void *ibhdr, void *packet_data,
-+                                       void *value)
-+{
-+      struct qib_ib_header *hdr = (struct qib_ib_header *)ibhdr;
-+      struct qib_other_headers *ohdr = NULL;
-+      struct ib_smp *smp = NULL;
-+      u32 qpn = 0;
-+
-+      /* packet_data could be null if only header is captured */
-+      if (packet_data == NULL)
-+              return 1;
-+      /* Check for GRH */
-+      if ((be16_to_cpu(hdr->lrh[0]) & 3) == QIB_LRH_BTH)
-+              ohdr = &hdr->u.oth; /* LRH + BTH + DETH */
-+      else
-+              ohdr = &hdr->u.l.oth; /* LRH + GRH + BTH + DETH */
-+      qpn = be32_to_cpu(ohdr->bth[1]) & 0x00FFFFFF;
-+      if (qpn <= 1) {
-+              smp = (struct ib_smp *)packet_data;
-+              if (*((u8 *)value) == smp->mgmt_class)
-+                      return 0;
-+              else
-+                      return 1;
-+              }
-+      return 1;
-+}
-+
-+static int qib_filter_qp_number(void *ibhdr, void *packet_data, void *value)
-+{
-+
-+      struct qib_ib_header *hdr = (struct qib_ib_header *)ibhdr;
-+      struct qib_other_headers *ohdr = NULL;
-+
-+      /* Check for GRH */
-+      if ((be16_to_cpu(hdr->lrh[0]) & 3) == QIB_LRH_BTH)
-+              ohdr = &hdr->u.oth; /* LRH + BTH + DETH */
-+      else
-+              ohdr = &hdr->u.l.oth; /* LRH + GRH + BTH + DETH */
-+      if (*((u32 *)value) == (be32_to_cpu(ohdr->bth[1]) & 0x00FFFFFF))
-+              return 0;
-+      return 1;
-+}
-+
-+
-+static int qib_filter_ibpacket_type(void *ibhdr, void *packet_data,
-+                                      void *value)
-+{
-+      u32 lnh = 0;
-+      u8 opcode = 0;
-+      struct qib_ib_header *hdr = (struct qib_ib_header *)ibhdr;
-+      struct qib_other_headers *ohdr = NULL;
-+
-+      lnh = (be16_to_cpu(hdr->lrh[0]) & 3);
-+
-+      if (lnh == QIB_LRH_BTH)
-+              ohdr = &hdr->u.oth;
-+      else if (lnh == QIB_LRH_GRH)
-+              ohdr = &hdr->u.l.oth;
-+      else
-+              return 1;
-+
-+      opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
-+
-+      if (*((u8 *)value) == ((opcode >> 5) & 0x7))
-+              return 0;
-+      return 1;
-+}
-+
-+static int qib_filter_ib_service_level(void *ibhdr, void *packet_data,
-+                                         void *value)
-+{
-+      struct qib_ib_header *hdr = (struct qib_ib_header *)ibhdr;
-+
-+      if ((*((u8 *)value)) == (be16_to_cpu(hdr->lrh[0] >> 4) & 0xF))
-+              return 0;
-+      return 1;
-+}
-+
-+static int qib_filter_ib_pkey(void *ibhdr, void *packet_data, void *value)
-+{
-+
-+      u32 lnh = 0;
-+      struct qib_ib_header *hdr = (struct qib_ib_header *)ibhdr;
-+      struct qib_other_headers *ohdr = NULL;
-+
-+      lnh = (be16_to_cpu(hdr->lrh[0]) & 3);
-+      if (lnh == QIB_LRH_BTH)
-+              ohdr = &hdr->u.oth;
-+      else if (lnh == QIB_LRH_GRH)
-+              ohdr = &hdr->u.l.oth;
-+      else
-+              return 1;
-+
-+      /* P_key is 16-bit entity, however top most bit indicates
-+       * type of membership. 0 for limited and 1 for Full.
-+       * Limited members cannot accept information from other
-+       * Limited members, but communication is allowed between
-+       * every other combination of membership.
-+       * Hence we'll omitt comparing top-most bit while filtering
-+       */
-+
-+      if ((*(u16 *)value & 0x7FFF) ==
-+              ((be32_to_cpu(ohdr->bth[0])) & 0x7FFF))
-+              return 0;
-+      return 1;
-+}
-diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
-index d0a0ea0..a98635d 100644
---- a/drivers/infiniband/hw/qib/qib_user_sdma.c
-+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
-@@ -1,4 +1,5 @@
- /*
-+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
-  * Copyright (c) 2007, 2008, 2009 QLogic Corporation. All rights reserved.
-  *
-  * This software is available to you under a choice of one of two
-@@ -52,83 +53,65 @@
- /* attempt to drain the queue for 5secs */
- #define QIB_USER_SDMA_DRAIN_TIMEOUT 500
--struct qib_user_sdma_pkt {
--      struct list_head list;  /* list element */
--
--      u8  tiddma;             /* if this is NEW tid-sdma */
--      u8  largepkt;           /* this is large pkt from kmalloc */
--      u16 frag_size;          /* frag size used by PSM */
--      u16 index;              /* last header index or push index */
--      u16 naddr;              /* dimension of addr (1..3) ... */
--      u16 addrlimit;          /* addr array size */
--      u16 tidsmidx;           /* current tidsm index */
--      u16 tidsmcount;         /* tidsm array item count */
--      u16 payload_size;       /* payload size so far for header */
--      u32 bytes_togo;         /* bytes for processing */
--      u32 counter;            /* sdma pkts queued counter for this entry */
--      struct qib_tid_session_member *tidsm;   /* tid session member array */
--      struct qib_user_sdma_queue *pq; /* which pq this pkt belongs to */
--      u64 added;              /* global descq number of entries */
--
--      struct {
--              u16 offset;                     /* offset for kvaddr, addr */
--              u16 length;                     /* length in page */
--              u16 first_desc;                 /* first desc */
--              u16 last_desc;                  /* last desc */
--              u16 put_page;                   /* should we put_page? */
--              u16 dma_mapped;                 /* is page dma_mapped? */
--              u16 dma_length;                 /* for dma_unmap_page() */
--              u16 padding;
--              struct page *page;              /* may be NULL (coherent mem) */
--              void *kvaddr;                   /* FIXME: only for pio hack */
--              dma_addr_t addr;
--      } addr[4];   /* max pages, any more and we coalesce */
-+/*
-+ * track how many times a process open this driver.
-+ */
-+struct rb_root qib_user_sdma_rb_root = RB_ROOT;
-+
-+struct qib_user_sdma_rb_node {
-+      struct rb_node node;
-+      int refcount;
-+      pid_t pid;
- };
--struct qib_user_sdma_queue {
--      /*
--       * pkts sent to dma engine are queued on this
--       * list head.  the type of the elements of this
--       * list are struct qib_user_sdma_pkt...
--       */
--      struct list_head sent;
-+static struct qib_user_sdma_rb_node *
-+qib_user_sdma_rb_search(struct rb_root *root, pid_t pid)
-+{
-+      struct qib_user_sdma_rb_node *sdma_rb_node;
-+      struct rb_node *node = root->rb_node;
-+
-+      while (node) {
-+              sdma_rb_node = container_of(node,
-+                      struct qib_user_sdma_rb_node, node);
-+              if (pid < sdma_rb_node->pid)
-+                      node = node->rb_left;
-+              else if (pid > sdma_rb_node->pid)
-+                      node = node->rb_right;
-+              else
-+                      return sdma_rb_node;
-+      }
-+      return NULL;
-+}
--      /*
--       * Because above list will be accessed by both process and
--       * signal handler, we need a spinlock for it.
--       */
--      spinlock_t sent_lock ____cacheline_aligned_in_smp;
--
--      /* headers with expected length are allocated from here... */
--      char header_cache_name[64];
--      struct dma_pool *header_cache;
--
--      /* packets are allocated from the slab cache... */
--      char pkt_slab_name[64];
--      struct kmem_cache *pkt_slab;
--
--      /* as packets go on the queued queue, they are counted... */
--      u32 counter;
--      u32 sent_counter;
--      /* pending packets, not sending yet */
--      u32 num_pending;
--      /* sending packets, not complete yet */
--      u32 num_sending;
--      /* global descq number of entry of last sending packet */
--      u64 added;
--
--      /* dma page table */
--      struct rb_root dma_pages_root;
--
--      /* protect everything above... */
--      struct mutex lock;
--};
-+static int
-+qib_user_sdma_rb_insert(struct rb_root *root, struct qib_user_sdma_rb_node *new)
-+{
-+      struct rb_node **node = &(root->rb_node);
-+      struct rb_node *parent = NULL;
-+      struct qib_user_sdma_rb_node *got;
-+
-+      while (*node) {
-+              got = container_of(*node, struct qib_user_sdma_rb_node, node);
-+              parent = *node;
-+              if (new->pid < got->pid)
-+                      node = &((*node)->rb_left);
-+              else if (new->pid > got->pid)
-+                      node = &((*node)->rb_right);
-+              else
-+                      return 0;
-+      }
-+
-+      rb_link_node(&new->node, parent, node);
-+      rb_insert_color(&new->node, root);
-+      return 1;
-+}
- struct qib_user_sdma_queue *
- qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
- {
-       struct qib_user_sdma_queue *pq =
-               kmalloc(sizeof(struct qib_user_sdma_queue), GFP_KERNEL);
-+      struct qib_user_sdma_rb_node *sdma_rb_node;
-       if (!pq)
-               goto done;
-@@ -138,6 +121,7 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
-       pq->num_pending = 0;
-       pq->num_sending = 0;
-       pq->added = 0;
-+      pq->sdma_rb_node = NULL;
-       INIT_LIST_HEAD(&pq->sent);
-       spin_lock_init(&pq->sent_lock);
-@@ -163,8 +147,30 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
-       pq->dma_pages_root = RB_ROOT;
-+      sdma_rb_node = qib_user_sdma_rb_search(&qib_user_sdma_rb_root,
-+                                      current->pid);
-+      if (sdma_rb_node) {
-+              sdma_rb_node->refcount++;
-+      } else {
-+              int ret;
-+              sdma_rb_node = kmalloc(sizeof(
-+                      struct qib_user_sdma_rb_node), GFP_KERNEL);
-+              if (!sdma_rb_node)
-+                      goto err_rb;
-+
-+              sdma_rb_node->refcount = 1;
-+              sdma_rb_node->pid = current->pid;
-+
-+              ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root,
-+                                      sdma_rb_node);
-+              BUG_ON(ret == 0);
-+      }
-+      pq->sdma_rb_node = sdma_rb_node;
-+
-       goto done;
-+err_rb:
-+      dma_pool_destroy(pq->header_cache);
- err_slab:
-       kmem_cache_destroy(pq->pkt_slab);
- err_kfree:
-@@ -175,12 +181,12 @@ done:
-       return pq;
- }
--static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
--                                  int i, u16 offset, u16 len,
--                                  u16 first_desc, u16 last_desc,
--                                  u16 put_page, u16 dma_mapped,
--                                  struct page *page, void *kvaddr,
--                                  dma_addr_t dma_addr, u16 dma_length)
-+void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
-+                           int i, u16 offset, u16 len,
-+                           u16 first_desc, u16 last_desc,
-+                           u16 put_page, u16 dma_mapped,
-+                           struct page *page, void *kvaddr,
-+                           dma_addr_t dma_addr, u16 dma_length)
- {
-       pkt->addr[i].offset = offset;
-       pkt->addr[i].length = len;
-@@ -194,7 +200,7 @@ static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
-       pkt->addr[i].dma_length = dma_length;
- }
--static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq,
-+void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq,
-                               size_t len, dma_addr_t *dma_addr)
- {
-       void *hdr;
-@@ -216,11 +222,11 @@ static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq,
-       return hdr;
- }
--static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
--                                     struct qib_user_sdma_queue *pq,
--                                     struct qib_user_sdma_pkt *pkt,
--                                     struct page *page, u16 put,
--                                     u16 offset, u16 len, void *kvaddr)
-+int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
-+                              struct qib_user_sdma_queue *pq,
-+                              struct qib_user_sdma_pkt *pkt,
-+                              struct page *page, u16 put,
-+                              u16 offset, u16 len, void *kvaddr)
- {
-       __le16 *pbc16;
-       void *pbcvaddr;
-@@ -235,21 +241,27 @@ static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
-       int ret = 0;
-       if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
--              /*
--               * dma mapping error, pkt has not managed
--               * this page yet, return the page here so
--               * the caller can ignore this page.
--               */
--              if (put) {
--                      put_page(page);
--              } else {
--                      /* coalesce case */
--                      kunmap(page);
--                      __free_page(page);
-+#ifdef QIB_CONFIG_KNX
-+              if (!pkt->remote) {
-+#endif
-+                      /*
-+                       * dma mapping error, pkt has not managed
-+                       * this page yet, return the page here so
-+                       * the caller can ignore this page.
-+                       */
-+                      if (put) {
-+                              put_page(page);
-+                      } else {
-+                              /* coalesce case */
-+                              kunmap(page);
-+                              __free_page(page);
-+                      }
-+                      ret = -ENOMEM;
-+                      goto done;
-               }
--              ret = -ENOMEM;
--              goto done;
-+#ifdef QIB_CONFIG_KNX
-       }
-+#endif
-       offset = 0;
-       dma_mapped = 1;
-@@ -551,13 +563,19 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev,
-                                      pkt->addr[i].dma_length,
-                                      DMA_TO_DEVICE);
--              if (pkt->addr[i].kvaddr)
--                      kunmap(pkt->addr[i].page);
-+#ifdef QIB_CONFIG_KNX
-+              if (!pkt->remote) {
-+#endif
-+                      if (pkt->addr[i].kvaddr)
-+                              kunmap(pkt->addr[i].page);
--              if (pkt->addr[i].put_page)
--                      put_page(pkt->addr[i].page);
--              else
--                      __free_page(pkt->addr[i].page);
-+                      if (pkt->addr[i].put_page)
-+                              put_page(pkt->addr[i].page);
-+                      else
-+                              __free_page(pkt->addr[i].page);
-+#ifdef QIB_CONFIG_KNX
-+              }
-+#endif
-       } else if (pkt->addr[i].kvaddr) {
-               /* for headers */
-               if (pkt->addr[i].dma_mapped) {
-@@ -697,9 +715,9 @@ static int qib_user_sdma_init_payload(const struct qib_devdata *dd,
- }
- /* free a packet list -- return counter value of last packet */
--static void qib_user_sdma_free_pkt_list(struct device *dev,
--                                      struct qib_user_sdma_queue *pq,
--                                      struct list_head *list)
-+void qib_user_sdma_free_pkt_list(struct device *dev,
-+                               struct qib_user_sdma_queue *pq,
-+                               struct list_head *list)
- {
-       struct qib_user_sdma_pkt *pkt, *pkt_next;
-@@ -709,6 +727,10 @@ static void qib_user_sdma_free_pkt_list(struct device *dev,
-               for (i = 0; i < pkt->naddr; i++)
-                       qib_user_sdma_free_pkt_frag(dev, pq, pkt, i);
-+#ifdef QIB_CONFIG_KNX
-+              if (pkt->remote)
-+                      qib_knx_sdma_free_pkt(pkt);
-+#endif
-               if (pkt->largepkt)
-                       kfree(pkt);
-               else
-@@ -892,6 +914,7 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
-               pkt->payload_size = 0;
-               pkt->counter = counter;
-               pkt->tiddma = tiddma;
-+              pkt->remote = 0;
-               /* setup the first header */
-               qib_user_sdma_init_frag(pkt, 0, /* index */
-@@ -967,8 +990,8 @@ static void qib_user_sdma_set_complete_counter(struct qib_user_sdma_queue *pq,
- }
- /* try to clean out queue -- needs pq->lock */
--static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd,
--                                   struct qib_user_sdma_queue *pq)
-+int qib_user_sdma_queue_clean(struct qib_pportdata *ppd,
-+                            struct qib_user_sdma_queue *pq)
- {
-       struct qib_devdata *dd = ppd->dd;
-       struct list_head free_list;
-@@ -1021,13 +1044,18 @@ void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq)
-       if (!pq)
-               return;
--      kmem_cache_destroy(pq->pkt_slab);
-+      pq->sdma_rb_node->refcount--;
-+      if (pq->sdma_rb_node->refcount == 0) {
-+              rb_erase(&pq->sdma_rb_node->node, &qib_user_sdma_rb_root);
-+              kfree(pq->sdma_rb_node);
-+      }
-       dma_pool_destroy(pq->header_cache);
-+      kmem_cache_destroy(pq->pkt_slab);
-       kfree(pq);
- }
- /* clean descriptor queue, returns > 0 if some elements cleaned */
--static int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd)
-+int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd)
- {
-       int ret;
-       unsigned long flags;
-@@ -1238,30 +1266,56 @@ retry:
- }
- /* pq->lock must be held, get packets on the wire... */
--static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
--                               struct qib_user_sdma_queue *pq,
--                               struct list_head *pktlist, int count)
-+int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
-+                          struct qib_user_sdma_queue *pq,
-+                          struct list_head *pktlist, int count)
- {
--      int ret = 0;
-       unsigned long flags;
-       if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE)))
-               return -ECOMM;
--      spin_lock_irqsave(&ppd->sdma_lock, flags);
--
--      if (unlikely(!__qib_sdma_running(ppd))) {
--              ret = -ECOMM;
--              goto unlock;
-+      /* non-blocking mode */
-+      if (pq->sdma_rb_node->refcount > 1) {
-+              spin_lock_irqsave(&ppd->sdma_lock, flags);
-+              if (unlikely(!__qib_sdma_running(ppd))) {
-+                      spin_unlock_irqrestore(&ppd->sdma_lock, flags);
-+                      return -ECOMM;
-+              }
-+              pq->num_pending += count;
-+              list_splice_tail_init(pktlist, &ppd->sdma_userpending);
-+              qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
-+              spin_unlock_irqrestore(&ppd->sdma_lock, flags);
-+              return 0;
-       }
-+      /* In this case, descriptors from this process are not
-+       * linked to ppd pending queue, interrupt handler
-+       * won't update this process, it is OK to directly
-+       * modify without sdma lock.
-+       */
-+
-+
-       pq->num_pending += count;
--      list_splice_tail_init(pktlist, &ppd->sdma_userpending);
--      qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
-+      /*
-+       * Blocking mode for single rail process, we must
-+       * release/regain sdma_lock to give other process
-+       * chance to make progress. This is important for
-+       * performance.
-+       */
-+      do {
-+              spin_lock_irqsave(&ppd->sdma_lock, flags);
-+              if (unlikely(!__qib_sdma_running(ppd))) {
-+                      spin_unlock_irqrestore(&ppd->sdma_lock, flags);
-+                      return -ECOMM;
-+              }
-+              qib_user_sdma_send_desc(ppd, pktlist);
-+              if (!list_empty(pktlist))
-+                      qib_sdma_make_progress(ppd);
-+              spin_unlock_irqrestore(&ppd->sdma_lock, flags);
-+      } while (!list_empty(pktlist));
--unlock:
--      spin_unlock_irqrestore(&ppd->sdma_lock, flags);
--      return ret;
-+      return 0;
- }
- int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
-@@ -1291,7 +1345,7 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
-               qib_user_sdma_queue_clean(ppd, pq);
-       while (dim) {
--              int mxp = 8;
-+              int mxp = 1;
-               int ndesc = 0;
-               down_write(&current->mm->mmap_sem);
-diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.h b/drivers/infiniband/hw/qib/qib_user_sdma.h
-index ce8cbaf..93ce40b 100644
---- a/drivers/infiniband/hw/qib/qib_user_sdma.h
-+++ b/drivers/infiniband/hw/qib/qib_user_sdma.h
-@@ -31,12 +31,108 @@
-  */
- #include <linux/device.h>
--struct qib_user_sdma_queue;
-+struct qib_user_sdma_pkt {
-+      struct list_head list;  /* list element */
-+
-+      u8  tiddma;             /* if this is NEW tid-sdma */
-+      u8  largepkt;           /* this is large pkt from kmalloc */
-+      u16 frag_size;          /* frag size used by PSM */
-+      u16 index;              /* last header index or push index */
-+      u16 naddr;              /* dimension of addr (1..3) ... */
-+      u16 addrlimit;          /* addr array size */
-+      u16 tidsmidx;           /* current tidsm index */
-+      u16 tidsmcount;         /* tidsm array item count */
-+      u16 payload_size;       /* payload size so far for header */
-+      u32 bytes_togo;         /* bytes for processing */
-+      u32 counter;            /* sdma pkts queued counter for this entry */
-+      struct qib_tid_session_member *tidsm;   /* tid session member array */
-+      struct qib_user_sdma_queue *pq; /* which pq this pkt belongs to */
-+      u64 added;              /* global descq number of entries */
-+#ifdef QIB_CONFIG_KNX
-+      u64 remote;             /* does the packet original on the host */
-+#endif
-+
-+      struct {
-+              u16 offset;                     /* offset for kvaddr, addr */
-+              u16 length;                     /* length in page */
-+              u16 first_desc;                 /* first desc */
-+              u16 last_desc;                  /* last desc */
-+              u16 put_page;                   /* should we put_page? */
-+              u16 dma_mapped;                 /* is page dma_mapped? */
-+              u16 dma_length;                 /* for dma_unmap_page() */
-+              u16 padding;
-+              struct page *page;              /* may be NULL (coherent mem) */
-+              void *kvaddr;                   /* FIXME: only for pio hack */
-+              dma_addr_t addr;
-+      } addr[4];   /* max pages, any more and we coalesce */
-+};
-+
-+struct qib_user_sdma_queue {
-+      /*
-+       * pkts sent to dma engine are queued on this
-+       * list head.  the type of the elements of this
-+       * list are struct qib_user_sdma_pkt...
-+       */
-+      struct list_head sent;
-+
-+      /*
-+       * Because above list will be accessed by both process and
-+       * signal handler, we need a spinlock for it.
-+       */
-+      spinlock_t sent_lock ____cacheline_aligned_in_smp;
-+
-+      /* headers with expected length are allocated from here... */
-+      char header_cache_name[64];
-+      struct dma_pool *header_cache;
-+
-+      /* packets are allocated from the slab cache... */
-+      char pkt_slab_name[64];
-+      struct kmem_cache *pkt_slab;
-+
-+      /* as packets go on the queued queue, they are counted... */
-+      u32 counter;
-+      u32 sent_counter;
-+      /* pending packets, not sending yet */
-+      u32 num_pending;
-+      /* sending packets, not complete yet */
-+      u32 num_sending;
-+      /* global descq number of entry of last sending packet */
-+      u64 added;
-+
-+      /* dma page table */
-+      struct rb_root dma_pages_root;
-+
-+      struct qib_user_sdma_rb_node *sdma_rb_node;
-+
-+      /* protect everything above... */
-+      struct mutex lock;
-+};
- struct qib_user_sdma_queue *
- qib_user_sdma_queue_create(struct device *dev, int unit, int port, int sport);
- void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq);
--
-+void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq,
-+                               size_t len, dma_addr_t *dma_addr);
-+void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
-+                           int i, u16 offset, u16 len,
-+                           u16 first_desc, u16 last_desc,
-+                           u16 put_page, u16 dma_mapped,
-+                           struct page *page, void *kvaddr,
-+                           dma_addr_t dma_addr, u16 dma_length);
-+int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
-+                              struct qib_user_sdma_queue *pq,
-+                              struct qib_user_sdma_pkt *pkt,
-+                              struct page *page, u16 put,
-+                              u16 offset, u16 len, void *kvaddr);
-+int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd);
-+int qib_user_sdma_queue_clean(struct qib_pportdata *ppd,
-+                            struct qib_user_sdma_queue *pq);
-+void qib_user_sdma_free_pkt_list(struct device *dev,
-+                               struct qib_user_sdma_queue *pq,
-+                               struct list_head *list);
-+int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
-+                          struct qib_user_sdma_queue *pq,
-+                          struct list_head *pktlist, int count);
- int qib_user_sdma_writev(struct qib_ctxtdata *pd,
-                        struct qib_user_sdma_queue *pq,
-                        const struct iovec *iov,
-@@ -50,3 +146,8 @@ void qib_user_sdma_queue_drain(struct qib_pportdata *ppd,
- u32 qib_user_sdma_complete_counter(const struct qib_user_sdma_queue *pq);
- u32 qib_user_sdma_inflight_counter(struct qib_user_sdma_queue *pq);
-+/*
-+ * This function prototype somewhat polutes this header file
-+ * but I don't want to create a new header file just for it.
-+ */
-+void qib_knx_sdma_free_pkt(struct qib_user_sdma_pkt *pkt);
-diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
-index 092b0bb..687c216 100644
---- a/drivers/infiniband/hw/qib/qib_verbs.c
-+++ b/drivers/infiniband/hw/qib/qib_verbs.c
-@@ -621,6 +621,15 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
-       if (unlikely(tlen < 24))
-               goto drop;
-+      if (ppd->mode_flag & QIB_PORT_SNOOP_MODE) {
-+              int nomatch = 0;
-+              if (ppd->filter_callback)
-+                      nomatch = ppd->filter_callback(hdr, data,
-+                              ppd->filter_value);
-+              if (nomatch == 0 &&
-+                      qib_snoop_rcv_queue_packet(ppd, rhdr, data, tlen))
-+                      goto drop;
-+      }
-       /* Check for a valid destination LID (see ch. 7.11.1). */
-       lid = be16_to_cpu(hdr->lrh[1]);
-       if (lid < QIB_MULTICAST_LID_BASE) {
-@@ -789,11 +798,17 @@ static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
- #endif
- static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss,
--                  u32 length, unsigned flush_wc)
-+                  u32 length, unsigned flush_wc, struct snoop_packet *packet,
-+                  u8 *data_orig)
- {
-       u32 extra = 0;
-       u32 data = 0;
-       u32 last;
-+      u32 *packet_data = NULL;
-+
-+      /* This ensures copying word at a time */
-+      if (packet)
-+              packet_data = (u32 *)data_orig;
-       while (1) {
-               u32 len = ss->sge.length;
-@@ -825,6 +840,10 @@ static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss,
-                               }
-                               __raw_writel(data, piobuf);
-                               piobuf++;
-+                              if (packet_data) {
-+                                      *packet_data = data;
-+                                      packet_data++;
-+                              }
-                               extra = 0;
-                               data = 0;
-                       } else {
-@@ -851,6 +870,10 @@ static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss,
-                               data = get_upper_bits(v, ushift);
-                               piobuf++;
-                               addr++;
-+                              if (packet_data) {
-+                                      *packet_data = data;
-+                                      packet_data++;
-+                              }
-                               l -= sizeof(u32);
-                       }
-                       /*
-@@ -868,6 +891,10 @@ static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss,
-                                       }
-                                       __raw_writel(data, piobuf);
-                                       piobuf++;
-+                                      if (packet_data) {
-+                                              *packet_data = data;
-+                                              packet_data++;
-+                                      }
-                                       extra = 0;
-                                       data = 0;
-                               } else {
-@@ -894,12 +921,20 @@ static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss,
-                       qib_pio_copy(piobuf, ss->sge.vaddr, w - 1);
-                       piobuf += w - 1;
-                       last = ((u32 *) ss->sge.vaddr)[w - 1];
-+                      if (packet_data) {
-+                              memcpy(packet_data, ss->sge.vaddr, len);
-+                              packet_data += w;
-+                      }
-                       break;
-               } else {
-                       u32 w = len >> 2;
-                       qib_pio_copy(piobuf, ss->sge.vaddr, w);
-                       piobuf += w;
-+                      if (packet_data) {
-+                              memcpy(packet_data, ss->sge.vaddr, len);
-+                              packet_data += w;
-+                      }
-                       extra = len & (sizeof(u32) - 1);
-                       if (extra) {
-@@ -1144,12 +1179,13 @@ static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr,
-       u32 control;
-       u32 ndesc;
-       int ret;
-+      struct snoop_packet *packet = NULL;
-       tx = qp->s_tx;
-       if (tx) {
-               qp->s_tx = NULL;
-               /* resend previously constructed packet */
--              ret = qib_sdma_verbs_send(ppd, tx->ss, tx->dwords, tx);
-+              ret = qib_sdma_verbs_send(ppd, tx->ss, tx->dwords, tx, NULL);
-               goto bail;
-       }
-@@ -1173,6 +1209,19 @@ static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr,
-       if (plen + 1 > dd->piosize2kmax_dwords)
-               tx->txreq.flags |= QIB_SDMA_TXREQ_F_USELARGEBUF;
-+      if (ppd->mode_flag) {
-+              int nomatch = 0;
-+              if (ppd->filter_callback)
-+                      nomatch = ppd->filter_callback(hdr, NULL,
-+                              ppd->filter_value);
-+              if (nomatch == 0) {
-+                      packet = kzalloc(sizeof(*packet)+QIB_GET_PKT_LEN(hdr),
-+                                      GFP_ATOMIC);
-+                      if (packet)
-+                              packet->total_len = QIB_GET_PKT_LEN(hdr);
-+              }
-+      }
-+
-       if (len) {
-               /*
-                * Don't try to DMA if it takes more descriptors than
-@@ -1193,7 +1242,9 @@ static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr,
-               tx->txreq.addr = dev->pio_hdrs_phys +
-                       tx->hdr_inx * sizeof(struct qib_pio_header);
-               tx->hdr_dwords = hdrwords + 2; /* add PBC length */
--              ret = qib_sdma_verbs_send(ppd, ss, dwords, tx);
-+              if (packet)
-+                      memcpy(packet->data, hdr, (hdrwords << 2));
-+              ret = qib_sdma_verbs_send(ppd, ss, dwords, tx, packet);
-               goto bail;
-       }
-@@ -1206,6 +1257,12 @@ static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr,
-       phdr->pbc[1] = cpu_to_le32(control);
-       memcpy(&phdr->hdr, hdr, hdrwords << 2);
-       qib_copy_from_sge((u32 *) &phdr->hdr + hdrwords, ss, len);
-+      if (packet) {
-+              memcpy(packet->data, &phdr->hdr, (hdrwords << 2));
-+              memcpy(packet->data+(hdrwords << 2),
-+                      (u8 *)((u32 *) &phdr->hdr + hdrwords),
-+                      len);
-+      }
-       tx->txreq.addr = dma_map_single(&dd->pcidev->dev, phdr,
-                                       tx->hdr_dwords << 2, DMA_TO_DEVICE);
-@@ -1214,7 +1271,7 @@ static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr,
-       tx->align_buf = phdr;
-       tx->txreq.flags |= QIB_SDMA_TXREQ_F_FREEBUF;
-       tx->txreq.sg_count = 1;
--      ret = qib_sdma_verbs_send(ppd, NULL, 0, tx);
-+      ret = qib_sdma_verbs_send(ppd, NULL, 0, tx, NULL);
-       goto unaligned;
- map_err:
-@@ -1222,9 +1279,24 @@ map_err:
- err_tx:
-       qib_put_txreq(tx);
-       ret = wait_kmem(dev, qp);
-+      /* If wait_kmem returns 0 then
-+       * (ret==0) will hold true and we don't want
-+       * that as it will add ignored packet in list,
-+       * so free packet here.
-+       */
-+      kfree(packet);
-+      packet = NULL;
- unaligned:
-       ibp->n_unaligned++;
- bail:
-+      if (packet) {
-+              if (ret == 0)
-+                      qib_snoop_send_queue_packet(ppd, packet);
-+              else {
-+                      kfree(packet);
-+                      packet = NULL;
-+              }
-+      }
-       return ret;
- bail_tx:
-       ret = PTR_ERR(tx);
-@@ -1280,6 +1352,8 @@ static int qib_verbs_send_pio(struct qib_qp *qp, struct qib_ib_header *ibhdr,
-       unsigned flush_wc;
-       u32 control;
-       u32 pbufn;
-+      u8 *data_orig = NULL;
-+      struct snoop_packet *packet = NULL;
-       control = dd->f_setpbc_control(ppd, plen, qp->s_srate,
-               be16_to_cpu(ibhdr->lrh[0]) >> 12);
-@@ -1288,6 +1362,20 @@ static int qib_verbs_send_pio(struct qib_qp *qp, struct qib_ib_header *ibhdr,
-       if (unlikely(piobuf == NULL))
-               return no_bufs_available(qp);
-+      if (snoop_enable && ppd->mode_flag) {
-+              int nomatch = 0;
-+              if (ppd->filter_callback)
-+                      nomatch = ppd->filter_callback(ibhdr, NULL,
-+                                                      ppd->filter_value);
-+              if (nomatch == 0) {
-+                      packet = kzalloc(sizeof(*packet)+QIB_GET_PKT_LEN(ibhdr),
-+                                      GFP_ATOMIC);
-+                      if (packet) {
-+                              INIT_LIST_HEAD(&packet->list);
-+                              packet->total_len = QIB_GET_PKT_LEN(ibhdr);
-+                      }
-+              }
-+      }
-       /*
-        * Write the pbc.
-        * We have to flush after the PBC for correctness on some cpus
-@@ -1297,6 +1385,12 @@ static int qib_verbs_send_pio(struct qib_qp *qp, struct qib_ib_header *ibhdr,
-       piobuf_orig = piobuf;
-       piobuf += 2;
-+      if (packet) {
-+              /* Copy header */
-+              data_orig = packet->data;
-+              memcpy(data_orig, hdr, (hdrwords << 2));
-+              data_orig += (hdrwords << 2);
-+      }
-       flush_wc = dd->flags & QIB_PIO_FLUSH_WC;
-       if (len == 0) {
-               /*
-@@ -1336,10 +1430,19 @@ static int qib_verbs_send_pio(struct qib_qp *qp, struct qib_ib_header *ibhdr,
-                       qib_flush_wc();
-               } else
-                       qib_pio_copy(piobuf, addr, dwords);
-+              if (packet) {
-+                      /* Copy data */
-+                      memcpy(data_orig, addr, len);
-+                      data_orig += len;
-+              }
-               goto done;
-       }
--      copy_io(piobuf, ss, len, flush_wc);
-+      copy_io(piobuf, ss, len, flush_wc, packet, data_orig);
- done:
-+      if (packet) {
-+                      qib_snoop_send_queue_packet(ppd, packet);
-+                      packet = NULL;
-+              }
-       if (dd->flags & QIB_USE_SPCL_TRIG) {
-               u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
-               qib_flush_wc();
-@@ -1623,7 +1726,8 @@ static int qib_query_port(struct ib_device *ibdev, u8 port,
-       props->max_vl_num = qib_num_vls(ppd->vls_supported);
-       props->init_type_reply = 0;
--      props->max_mtu = qib_ibmtu ? qib_ibmtu : IB_MTU_4096;
-+      props->max_mtu = QIB_MODPARAM_GET(ibmtu, dd->unit, ppd->port) ?
-+              QIB_MODPARAM_GET(ibmtu, dd->unit, ppd->port) : IB_MTU_4096;
-       switch (ppd->ibmtu) {
-       case 4096:
-               mtu = IB_MTU_4096;
--- 
-1.7.1
-
diff --git a/tech-preview/xeon-phi/0014-qib-add-RHEL7-support.patch b/tech-preview/xeon-phi/0014-qib-add-RHEL7-support.patch
deleted file mode 100644 (file)
index 42f5399..0000000
+++ /dev/null
@@ -1,137 +0,0 @@
-From e90045185670bbdb315d50e5f89bf3f16249ee42 Mon Sep 17 00:00:00 2001
-From: Jubin John <jubin.john@intel.com>
-Date: Mon, 20 Oct 2014 23:53:59 -0700
-Subject: [PATCH] qib add RHEL7 support
-
----
- drivers/infiniband/hw/qib/qib_file_ops.c |    9 +++++++++
- drivers/infiniband/hw/qib/qib_fs.c       |    5 +++++
- drivers/infiniband/hw/qib/qib_init.c     |   23 +++++++++++++++++++++--
- drivers/infiniband/hw/qib/qib_knx.c      |    1 +
- 4 files changed, 36 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
-index 376961d..ea5bdd5 100644
---- a/drivers/infiniband/hw/qib/qib_file_ops.c
-+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
-@@ -39,11 +39,16 @@
- #include <linux/vmalloc.h>
- #include <linux/highmem.h>
- #include <linux/io.h>
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)
- #include <linux/aio.h>
-+#else
-+#include <linux/uio.h>
-+#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0) */
- #include <linux/jiffies.h>
- #include <asm/pgtable.h>
- #include <linux/delay.h>
- #include <linux/export.h>
-+#include <linux/moduleparam.h>
- #include "qib.h"
- #include "qib_common.h"
-@@ -1163,7 +1168,11 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
-       vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
-       vma->vm_ops = &qib_file_vm_ops;
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
-       vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
-+#else
-+      vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
-+#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */
-       ret = 1;
- bail:
-diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
-index f247fc6..cbe6e3c 100644
---- a/drivers/infiniband/hw/qib/qib_fs.c
-+++ b/drivers/infiniband/hw/qib/qib_fs.c
-@@ -61,8 +61,13 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry,
-       inode->i_ino = get_next_ino();
-       inode->i_mode = mode;
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
-       inode->i_uid = GLOBAL_ROOT_UID;
-       inode->i_gid = GLOBAL_ROOT_GID;
-+#else
-+      inode->i_uid = 0;
-+      inode->i_gid = 0;
-+#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */
-       inode->i_blocks = 0;
-       inode->i_atime = CURRENT_TIME;
-       inode->i_mtime = inode->i_atime;
-diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
-index 0e83ed4..995d301 100644
---- a/drivers/infiniband/hw/qib/qib_init.c
-+++ b/drivers/infiniband/hw/qib/qib_init.c
-@@ -1226,9 +1226,15 @@ void qib_disable_after_error(struct qib_devdata *dd)
-       if (dd->devstatusp)
-               *dd->devstatusp |= QIB_STATUS_HWERROR;
- }
--
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
- static void qib_remove_one(struct pci_dev *);
--static int qib_init_one(struct pci_dev *, const struct pci_device_id *);
-+static int qib_init_one(struct pci_dev *,
-+                                const struct pci_device_id *);
-+#else
-+static void __devexit qib_remove_one(struct pci_dev *);
-+static int __devinit qib_init_one(struct pci_dev *,
-+                                const struct pci_device_id *);
-+#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
- #define DRIVER_LOAD_MSG "Intel " QIB_DRV_NAME " loaded: "
- #define PFX QIB_DRV_NAME ": "
-@@ -1245,7 +1251,11 @@ MODULE_DEVICE_TABLE(pci, qib_pci_tbl);
- static struct pci_driver qib_driver = {
-       .name = QIB_DRV_NAME,
-       .probe = qib_init_one,
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
-       .remove = qib_remove_one,
-+#else
-+      .remove = __devexit_p(qib_remove_one),
-+#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
-       .id_table = qib_pci_tbl,
-       .err_handler = &qib_pci_err_handler,
- };
-@@ -1486,7 +1496,12 @@ static void qib_postinit_cleanup(struct qib_devdata *dd)
-       qib_free_devdata(dd);
- }
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
- static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
-+#else
-+static int __devinit qib_init_one(struct pci_dev *pdev,
-+                                const struct pci_device_id *ent)
-+#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
- {
-       int ret, j, pidx, initfail;
-       struct qib_devdata *dd = NULL;
-@@ -1593,7 +1608,11 @@ bail:
-       return ret;
- }
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
- static void qib_remove_one(struct pci_dev *pdev)
-+#else
-+static void __devexit qib_remove_one(struct pci_dev *pdev)
-+#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
- {
-       struct qib_devdata *dd = pci_get_drvdata(pdev);
-       int ret;
-diff --git a/drivers/infiniband/hw/qib/qib_knx.c b/drivers/infiniband/hw/qib/qib_knx.c
-index f692913..efe79d6 100644
---- a/drivers/infiniband/hw/qib/qib_knx.c
-+++ b/drivers/infiniband/hw/qib/qib_knx.c
-@@ -29,6 +29,7 @@
-  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-  * SOFTWARE.
-  */
-+#include <linux/module.h>
- #include <linux/kthread.h>
- #include <linux/kernel.h>
- #include <linux/dma-mapping.h>
--- 
-1.7.1
-