-From 0ed3bd45f3b358e5f32ff4e6e87b94fd80da69b5 Mon Sep 17 00:00:00 2001
-From: Phil Cayton <phil.cayton@intel.com>
-Date: Thu, 6 Feb 2014 13:45:33 -0800
-Subject: [PATCH 10/12] Update qib for XEON PHI support
+IB/qib: Update qib for XEON PHI support
+From: Jubin John <jubin.john@intel.com>
+
+Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
+Signed-off-by: Jubin John <jubin.john@intel.com>
---
- drivers/infiniband/hw/qib/Makefile | 5 +
- drivers/infiniband/hw/qib/qib.h | 41 +-
- drivers/infiniband/hw/qib/qib_common.h | 8 +-
- drivers/infiniband/hw/qib/qib_file_ops.c | 369 +++++++++++-
- drivers/infiniband/hw/qib/qib_init.c | 61 +-
- drivers/infiniband/hw/qib/qib_knx.c | 923 +++++++++++++++++++++++++++++
- drivers/infiniband/hw/qib/qib_knx.h | 63 ++
- drivers/infiniband/hw/qib/qib_knx_sdma.h | 105 ++++
- drivers/infiniband/hw/qib/qib_knx_tidrcv.h | 48 ++
- 9 files changed, 1596 insertions(+), 27 deletions(-)
+ drivers/infiniband/hw/qib/Makefile | 5
+ drivers/infiniband/hw/qib/qib.h | 19
+ drivers/infiniband/hw/qib/qib_common.h | 7
+ drivers/infiniband/hw/qib/qib_file_ops.c | 334 +++++-
+ drivers/infiniband/hw/qib/qib_init.c | 16
+ drivers/infiniband/hw/qib/qib_knx.c | 1532 ++++++++++++++++++++++++++++
+ drivers/infiniband/hw/qib/qib_knx.h | 74 +
+ drivers/infiniband/hw/qib/qib_knx_common.h | 126 ++
+ drivers/infiniband/hw/qib/qib_user_sdma.c | 173 +--
+ drivers/infiniband/hw/qib/qib_user_sdma.h | 106 ++
+ 10 files changed, 2241 insertions(+), 151 deletions(-)
create mode 100644 drivers/infiniband/hw/qib/qib_knx.c
create mode 100644 drivers/infiniband/hw/qib/qib_knx.h
- create mode 100644 drivers/infiniband/hw/qib/qib_knx_sdma.h
- create mode 100644 drivers/infiniband/hw/qib/qib_knx_tidrcv.h
+ create mode 100644 drivers/infiniband/hw/qib/qib_knx_common.h
diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile
index 57f8103..ba2a49d 100644
+ccflags-y += -DQIB_CONFIG_KNX
+endif
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
-index 1946101..ad87abd 100644
+index 1946101..85c078e 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
-@@ -112,7 +112,20 @@ struct qib_eep_log_mask {
- };
-
- /*
-- * Below contains all data related to a single context (formerly called port).
-+ * Indicates to the driver that the loadable parameter could be
-+ * configured by it as it was not configured by the user.
-+ */
-+#define QIB_DRIVER_AUTO_CONFIGURATION 10
-+
-+#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
-+#define qib_configure_numa(a) \
-+ (a.x86_vendor == X86_VENDOR_INTEL && a.x86 == 6 && a.x86_model == 45)
-+#else
-+#define qib_configure_numa(a) 0
+@@ -234,6 +234,10 @@ struct qib_ctxtdata {
+ u32 lookaside_qpn;
+ /* QPs waiting for context processing */
+ struct list_head qp_wait_list;
++#ifdef QIB_CONFIG_KNX
++ /* KNX Receive Context Data */
++ struct qib_knx_ctxt *krcd;
+#endif
-+
-+/*
-+ * Below contains all data related to a single context (formerly called port).
- */
-
#ifdef CONFIG_DEBUG_FS
-@@ -739,6 +752,12 @@ struct qib_devdata {
-
- /* mem-mapped pointer to base of chip regs */
- u64 __iomem *kregbase;
-+
-+ /* mem-mapped base of chip regs plus offset of the SendBufAvail0
-+ * register
-+ */
-+ u64 sendbufavail0;
-+
- /* end of mem-mapped chip space excluding sendbuf and user regs */
- u64 __iomem *kregend;
- /* physical address of chip for io_remap, etc. */
-@@ -1103,7 +1122,15 @@ struct qib_devdata {
- /* per device cq worker */
+ /* verbs stats per CTX */
+ struct qib_opcode_stats_perctx *opstats;
+@@ -1104,6 +1108,11 @@ struct qib_devdata {
struct kthread_worker *worker;
-+ int local_node_id; /* NUMA node closest to HCA */
int assigned_node_id; /* NUMA node closest to HCA */
+
+#ifdef QIB_CONFIG_KNX
-+ /* peer node id of connected KNX node */
-+ u16 node_id;
-+ struct qib_knx *knx;
++ /* number of KNx nodes using this device */
++ u16 num_knx;
+#endif
-+
};
/* hol_state values */
-@@ -1132,6 +1159,9 @@ struct qib_filedata {
+@@ -1132,6 +1141,9 @@ struct qib_filedata {
unsigned tidcursor;
struct qib_user_sdma_queue *pq;
int rec_cpu_num; /* for cpu affinity; -1 if none */
};
extern struct list_head qib_dev_list;
-@@ -1209,6 +1239,13 @@ int qib_set_uevent_bits(struct qib_pportdata *, const int);
+@@ -1209,6 +1221,13 @@ int qib_set_uevent_bits(struct qib_pportdata *, const int);
(((struct qib_filedata *)(fp)->private_data)->tidcursor)
#define user_sdma_queue_fp(fp) \
(((struct qib_filedata *)(fp)->private_data)->pq)
static inline struct qib_devdata *dd_from_ppd(struct qib_pportdata *ppd)
{
-@@ -1476,6 +1513,8 @@ extern unsigned qib_n_krcv_queues;
- extern unsigned qib_sdma_fetch_arb;
- extern unsigned qib_compat_ddr_negotiate;
- extern int qib_special_trigger;
-+extern unsigned qib_pio_avail_bits;
-+extern unsigned qib_rcvhdrpoll;
- extern unsigned qib_numa_aware;
-
- extern struct mutex qib_mutex;
diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h
-index 5670ace..9182d02 100644
+index 5670ace..39eef25 100644
--- a/drivers/infiniband/hw/qib/qib_common.h
+++ b/drivers/infiniband/hw/qib/qib_common.h
@@ -1,4 +1,5 @@
/* size of struct base_info to write to */
__u32 spu_base_info_size;
-@@ -360,7 +365,6 @@ struct qib_user_info {
- * address of struct base_info to write to
- */
- __u64 spu_base_info;
--
- } __attribute__ ((aligned(8)));
-
- /* User commands. */
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
-index 275f247..6eebad0 100644
+index c062c60..93ca1f9 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
-@@ -48,6 +48,42 @@
+@@ -53,6 +53,7 @@
#include "qib.h"
#include "qib_common.h"
#include "qib_user_sdma.h"
-+#ifdef QIB_CONFIG_KNX
+#include "qib_knx.h"
-+#endif
-+
-+/*
-+ * Option for a user application to read from the SendBufAvailn registers
-+ * for the send buffer status as a memory IO operation or from main memory.
-+ * The default mode of operation is to have the user process read this
-+ * register from mapped memory when running on the local socket and have
-+ * it read from the register directly (memory IO) when running on the far
-+ * socket. For older applications, ie.., with QIB_USER_SWMINOR less than
-+ * 12, all processes will read the register from main memory.
-+ */
-+unsigned qib_pio_avail_bits = 1;
-+module_param_named(pio_avail_bits, qib_pio_avail_bits, uint, S_IRUGO);
-+MODULE_PARM_DESC(pio_avail_bits, "send buffer status read: "
-+ "0=memory read on local NUMA node & MMIO read on far nodes, "
-+ "1=memory read(default), 2=MMIO read, "
-+ "10=option 1 for AMD & <= Intel Westmere cpus and option 0 for newer cpus");
-+
-+/*
-+ * Option for a user application to read from the RcvHdrTailn registers
-+ * for the next empty receive header queue entry as a memory IO operation
-+ * or from main memory. The default mode of operation is to have the user
-+ * process read this register from mapped memory when running on the local
-+ * socket and have it read from the register directly (memory IO) when
-+ * running on the far socket. For older applications, ie.., with
-+ * QIB_USER_SWMINOR less than 12, all user processes will read the
-+ * register from main memory.
-+ */
-+unsigned qib_rcvhdrpoll = 1;
-+module_param_named(rcvhdrpoll, qib_rcvhdrpoll, uint, S_IRUGO);
-+MODULE_PARM_DESC(rcvhdrpoll, "receive buffer status read: "
-+ "0=memory read on local NUMA node & MMIO read on far nodes, "
-+ "1=memory read(default), 2=MMIO read, "
-+ "10=option 1 for AMD & <= Intel Westmere cpus and option 0 for newer cpus");
#undef pr_fmt
#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt
-@@ -89,6 +125,73 @@ static u64 cvt_kvaddr(void *p)
+@@ -64,6 +65,9 @@ static ssize_t qib_aio_write(struct kiocb *, const struct iovec *,
+ unsigned long, loff_t);
+ static unsigned int qib_poll(struct file *, struct poll_table_struct *);
+ static int qib_mmapf(struct file *, struct vm_area_struct *);
++static int subctxt_search_ctxts(struct qib_devdata *, struct file *,
++ const struct qib_user_info *);
++
+
+ static const struct file_operations qib_file_ops = {
+ .owner = THIS_MODULE,
+@@ -94,6 +98,64 @@ static u64 cvt_kvaddr(void *p)
return paddr;
}
+ if (ret < 0)
+ goto bail_free;
+
-+ switch (qib_rcvhdrpoll) {
-+ case 0:
-+ if (local_node)
-+ break;
-+ case 2:
-+ kinfo->spi_runtime_flags &= ~QIB_RUNTIME_NODMA_RTAIL;
-+ break;
-+ }
-+
+ if (rcd->subctxt_cnt && !subctxt_fp(fp))
+ kinfo->spi_runtime_flags |= QIB_RUNTIME_MASTER;
+
static int qib_get_base_info(struct file *fp, void __user *ubase,
size_t ubase_size)
{
-@@ -100,6 +203,7 @@ static int qib_get_base_info(struct file *fp, void __user *ubase,
- unsigned subctxt_cnt;
- int shared, master;
- size_t sz;
-+ int local_node = (numa_node_id() == pcibus_to_node(dd->pcidev->bus));
-
- subctxt_cnt = rcd->subctxt_cnt;
- if (!subctxt_cnt) {
-@@ -176,15 +280,91 @@ static int qib_get_base_info(struct file *fp, void __user *ubase,
- * both can be enabled and used.
+@@ -182,14 +244,43 @@ static int qib_get_base_info(struct file *fp, void __user *ubase,
*/
kinfo->spi_rcvhdr_base = (u64) rcd->rcvhdrq_phys;
-- kinfo->spi_rcvhdr_tailaddr = (u64) rcd->rcvhdrqtailaddr_phys;
+ kinfo->spi_rcvhdr_tailaddr = (u64) rcd->rcvhdrqtailaddr_phys;
+ /*
+ * In the case of KNX, qib_do_user_init() would call into the
+ * KNX-specific memory allocation/registration functions. These
+ if (knx_node_fp(fp))
+ kinfo->spi_runtime_flags =
+ qib_knx_ctxt_info(rcd, QIB_KNX_CTXTINFO_FLAGS, fp);
-+ else {
-+ switch (qib_rcvhdrpoll) {
-+ case 0:
-+ if (local_node)
-+ kinfo->spi_rcvhdr_tailaddr =
-+ (u64) rcd->rcvhdrqtailaddr_phys;
-+ else {
-+ kinfo->spi_rcvhdr_tailaddr =
-+ (u64) (kinfo->spi_uregbase +
-+ ur_rcvhdrtail);
-+ kinfo->spi_runtime_flags &=
-+ ~QIB_RUNTIME_NODMA_RTAIL;
-+ }
-+ break;
-+ case 1:
-+ kinfo->spi_rcvhdr_tailaddr =
-+ (u64) rcd->rcvhdrqtailaddr_phys;
-+ break;
-+ case 2:
-+ kinfo->spi_rcvhdr_tailaddr =
-+ (u64) (kinfo->spi_uregbase + ur_rcvhdrtail);
-+ kinfo->spi_runtime_flags &= ~QIB_RUNTIME_NODMA_RTAIL;
-+ break;
-+ default:
-+ ret = -EINVAL;
-+ break;
-+ }
-+ }
-+
kinfo->spi_rhf_offset = dd->rhf_offset;
kinfo->spi_rcv_egrbufs = (u64) rcd->rcvegr_phys;
- kinfo->spi_pioavailaddr = (u64) dd->pioavailregs_phys;
+
+ /* see comment for spi_uregbase above */
-+ if (knx_node_fp(fp)) {
++ if (knx_node_fp(fp))
+ kinfo->spi_pioavailaddr =
+ qib_knx_ctxt_info(rcd, QIB_KNX_CTXTINFO_PIOAVAIL, fp);
-+ } else {
-+ switch (qib_pio_avail_bits) {
-+ case 0:
-+ kinfo->spi_pioavailaddr = local_node ?
-+ (u64)dd->pioavailregs_phys :
-+ (u64)dd->sendbufavail0;
-+ break;
-+ case 1:
-+ kinfo->spi_pioavailaddr = (u64)dd->pioavailregs_phys;
-+ break;
-+ case 2:
-+ kinfo->spi_pioavailaddr = (u64)dd->sendbufavail0;
-+ break;
-+ default:
-+ ret = -EINVAL;
-+ break;
-+ }
-+ }
-+
-+ if (ret < 0)
-+ goto bail;
++ else
++ kinfo->spi_pioavailaddr = (u64) dd->pioavailregs_phys;
+
/* setup per-unit (not port) status area for user programs */
- kinfo->spi_status = (u64) kinfo->spi_pioavailaddr +
if (!shared) {
kinfo->spi_piocnt = rcd->piocnt;
kinfo->spi_piobufbase = (u64) rcd->piobufs;
-@@ -204,7 +384,11 @@ static int qib_get_base_info(struct file *fp, void __user *ubase,
+@@ -209,7 +300,11 @@ static int qib_get_base_info(struct file *fp, void __user *ubase,
dd->palign * kinfo->spi_piocnt * slave;
}
kinfo->spi_sendbuf_status =
cvt_kvaddr(&rcd->user_event_mask[subctxt_fp(fp)]);
/* only spi_subctxt_* fields should be set in this block! */
-@@ -225,6 +409,11 @@ static int qib_get_base_info(struct file *fp, void __user *ubase,
+@@ -230,6 +325,11 @@ static int qib_get_base_info(struct file *fp, void __user *ubase,
kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->pio2k_bufbase) /
dd->palign;
kinfo->spi_pioalign = dd->palign;
kinfo->spi_qpair = QIB_KD_QP;
/*
* user mode PIO buffers are always 2KB, even when 4KB can
-@@ -978,6 +1167,35 @@ bail:
- return ret;
- }
-
-+static int mmap_sendbufavail(struct vm_area_struct *vma, struct qib_devdata *dd,
-+ u64 ureg)
-+{
-+ unsigned long phys;
-+ unsigned long sz;
-+ int ret;
-+
-+ /*
-+ * This is real hardware, so use io_remap. This is the mechanism
-+ * for the user process to update the head registers for their ctxt
-+ * in the chip.
-+ */
-+ sz = PAGE_SIZE;
-+ if ((vma->vm_end - vma->vm_start) > sz)
-+ ret = -EFAULT;
-+ else {
-+ phys = dd->physaddr + ureg;
-+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-+
-+ vma->vm_flags &= ~VM_MAYWRITE;
-+ vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_READ;
-+
-+ ret = io_remap_pfn_range(vma, vma->vm_start,
-+ phys >> PAGE_SHIFT,
-+ vma->vm_end - vma->vm_start,
-+ vma->vm_page_prot);
-+ }
-+ return ret;
-+}
- /**
- * qib_mmapf - mmap various structures into user space
- * @fp: the file pointer
-@@ -1056,6 +1274,8 @@ static int qib_mmapf(struct file *fp, struct vm_area_struct *vma)
-
- if (pgaddr == ureg)
- ret = mmap_ureg(vma, dd, ureg);
-+ else if (pgaddr == dd->sendbufavail0)
-+ ret = mmap_sendbufavail(vma, dd, pgaddr - (u64)dd->kregbase);
- else if (pgaddr == piobufs)
- ret = mmap_piobufs(vma, dd, rcd, piobufs, piocnt);
- else if (pgaddr == dd->pioavailregs_phys)
-@@ -1187,11 +1407,7 @@ static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd)
- int cpu;
- cpu = find_first_zero_bit(qib_cpulist,
- qib_cpulist_count);
-- if (cpu == qib_cpulist_count)
-- qib_dev_err(dd,
-- "no cpus avail for affinity PID %u\n",
-- current->pid);
-- else {
-+ if (cpu != qib_cpulist_count) {
- __set_bit(cpu, qib_cpulist);
- fd->rec_cpu_num = cpu;
- }
-@@ -1261,6 +1477,17 @@ static int init_subctxts(struct qib_devdata *dd,
+@@ -1270,6 +1370,17 @@ static int init_subctxts(struct qib_devdata *dd,
goto bail;
}
rcd->subctxt_uregbase = vmalloc_user(PAGE_SIZE * num_subctxts);
if (!rcd->subctxt_uregbase) {
ret = -ENOMEM;
-@@ -1283,6 +1510,9 @@ static int init_subctxts(struct qib_devdata *dd,
+@@ -1292,6 +1403,9 @@ static int init_subctxts(struct qib_devdata *dd,
goto bail_rhdr;
}
rcd->subctxt_cnt = uinfo->spu_subctxt_cnt;
rcd->subctxt_id = uinfo->spu_subctxt_id;
rcd->active_slaves = 1;
-@@ -1333,6 +1563,7 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
+@@ -1326,6 +1440,14 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
+
+ rcd = qib_create_ctxtdata(ppd, ctxt, numa_id);
+
++#ifdef QIB_CONFIG_KNX
++ if (uinfo->spu_knx_node_id)
++ /*
++ * Skip allocation of page pointer list for TID
++ * receives. This will be done on the KNX.
++ */
++ goto no_page_list;
++#endif
+ /*
+ * Allocate memory for use in qib_tid_update() at open to
+ * reduce cost of expected send setup per message segment
+@@ -1341,7 +1463,11 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
+ ret = -ENOMEM;
goto bailerr;
}
++#ifdef QIB_CONFIG_KNX
++no_page_list:
++#endif
rcd->userversion = uinfo->spu_userversion;
+
ret = init_subctxts(dd, rcd, uinfo);
if (ret)
goto bailerr;
-@@ -1496,7 +1727,16 @@ static int find_shared_ctxt(struct file *fp,
+@@ -1498,43 +1624,68 @@ done:
+ static int find_shared_ctxt(struct file *fp,
+ const struct qib_user_info *uinfo)
+ {
+- int devmax, ndev, i;
++ int devmax, ndev;
+ int ret = 0;
++ struct qib_devdata *dd;
- for (ndev = 0; ndev < devmax; ndev++) {
- struct qib_devdata *dd = qib_lookup(ndev);
--
+#ifdef QIB_CONFIG_KNX
-+ /*
-+ * In the case we are allocating a context for a KNX process,
-+ * reject any device that is not associated with the
-+ * requesting KNX.
-+ */
-+ if ((uinfo->spu_knx_node_id &&
-+ dd->node_id != uinfo->spu_knx_node_id))
-+ continue;
++ /*
++ * In the case we are allocating a context for a KNX process,
++ * Don't loop over all devices but use the one assosiated with the
++ * requesting KNX.
++ */
++ if (uinfo->spu_knx_node_id) {
++ dd = qib_knx_node_to_dd(uinfo->spu_knx_node_id);
++ if (dd && dd->num_knx)
++ ret = subctxt_search_ctxts(dd, fp, uinfo);
++ goto done;
++ }
+#endif
+ devmax = qib_count_units(NULL, NULL);
+
+ for (ndev = 0; ndev < devmax; ndev++) {
+- struct qib_devdata *dd = qib_lookup(ndev);
+-
++ dd = qib_lookup(ndev);
/* device portion of usable() */
if (!(dd && (dd->flags & QIB_PRESENT) && dd->kregbase))
continue;
-@@ -1617,6 +1857,14 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
+- for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) {
+- struct qib_ctxtdata *rcd = dd->rcd[i];
++ ret = subctxt_search_ctxts(dd, fp, uinfo);
++ if (ret)
++ break;
++ }
++#ifdef QIB_CONFIG_KNX
++done:
++#endif
++ return ret;
++}
+
+- /* Skip ctxts which are not yet open */
+- if (!rcd || !rcd->cnt)
+- continue;
+- /* Skip ctxt if it doesn't match the requested one */
+- if (rcd->subctxt_id != uinfo->spu_subctxt_id)
+- continue;
+- /* Verify the sharing process matches the master */
+- if (rcd->subctxt_cnt != uinfo->spu_subctxt_cnt ||
+- rcd->userversion != uinfo->spu_userversion ||
+- rcd->cnt >= rcd->subctxt_cnt) {
+- ret = -EINVAL;
+- goto done;
+- }
+- ctxt_fp(fp) = rcd;
+- subctxt_fp(fp) = rcd->cnt++;
+- rcd->subpid[subctxt_fp(fp)] = current->pid;
+- tidcursor_fp(fp) = 0;
+- rcd->active_slaves |= 1 << subctxt_fp(fp);
+- ret = 1;
++static int subctxt_search_ctxts(struct qib_devdata *dd, struct file *fp,
++ const struct qib_user_info *uinfo)
++{
++ int ret = 0, i;
++ for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) {
++ struct qib_ctxtdata *rcd = dd->rcd[i];
++
++ /* Skip ctxts which are not yet open */
++ if (!rcd || !rcd->cnt)
++ continue;
++ /* Skip ctxt if it doesn't match the requested one */
++ if (rcd->subctxt_id != uinfo->spu_subctxt_id)
++ continue;
++ /* Verify the sharing process matches the master */
++ if (rcd->subctxt_cnt != uinfo->spu_subctxt_cnt ||
++ rcd->userversion != uinfo->spu_userversion ||
++ rcd->cnt >= rcd->subctxt_cnt) {
++ ret = -EINVAL;
+ goto done;
+ }
++ ctxt_fp(fp) = rcd;
++ subctxt_fp(fp) = rcd->cnt++;
++ rcd->subpid[subctxt_fp(fp)] = current->pid;
++ tidcursor_fp(fp) = 0;
++ rcd->active_slaves |= 1 << subctxt_fp(fp);
++ ret = 1;
++ break;
+ }
+-
+ done:
+ return ret;
+ }
+@@ -1626,6 +1777,13 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
+
if (swminor >= 11 && uinfo->spu_port_alg < QIB_PORT_ALG_COUNT)
alg = uinfo->spu_port_alg;
-
+#ifdef QIB_CONFIG_KNX
+ /* Make sure we have a connection to the KNX module on the right node */
+ if (uinfo->spu_knx_node_id && !qib_knx_get(uinfo->spu_knx_node_id)) {
+ goto done;
+ }
+#endif
-+
+
mutex_lock(&qib_mutex);
- if (qib_compatible_subctxts(swmajor, swminor) &&
-@@ -1638,6 +1886,24 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
+@@ -1633,13 +1791,38 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
+ uinfo->spu_subctxt_cnt) {
+ ret = find_shared_ctxt(fp, uinfo);
+ if (ret > 0) {
+- ret = do_qib_user_sdma_queue_create(fp);
++#ifdef QIB_CONFIG_KNX
++ if (uinfo->spu_knx_node_id) {
++ ret = qib_knx_sdma_queue_create(fp);
++ } else
++#endif
++ ret = do_qib_user_sdma_queue_create(fp);
+ if (!ret)
+ assign_ctxt_affinity(fp, (ctxt_fp(fp))->dd);
+ goto done_ok;
+ }
+ }
+
++#ifdef QIB_CONFIG_KNX
++ /*
++ * If there is a KNX node set, we pick the device that is
++ * associate with that KNX node
++ */
++ if (uinfo->spu_knx_node_id) {
++ struct qib_devdata *dd =
++ qib_knx_node_to_dd(uinfo->spu_knx_node_id);
++ if (dd) {
++ ret = find_free_ctxt(dd->unit, fp, uinfo);
++ if (!ret)
++ ret = qib_knx_alloc_ctxt(
++ uinfo->spu_knx_node_id,
++ ctxt_fp(fp)->ctxt);
++ } else
++ ret = -ENXIO;
++ goto done_chk_sdma;
++ }
++
++#endif
+ i_minor = iminor(file_inode(fp)) - QIB_USER_MINOR_BASE;
+ if (i_minor)
+ ret = find_free_ctxt(i_minor - 1, fp, uinfo);
+@@ -1648,7 +1831,6 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
const unsigned int cpu = cpumask_first(¤t->cpus_allowed);
const unsigned int weight =
cpumask_weight(¤t->cpus_allowed);
+-
+ if (weight == 1 && !test_bit(cpu, qib_cpulist))
+ if (!find_hca(cpu, &unit) && unit >= 0)
+ if (!find_free_ctxt(unit, fp, uinfo)) {
+@@ -1659,9 +1841,21 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
+ }
+
+ done_chk_sdma:
+- if (!ret)
++ if (!ret) {
+#ifdef QIB_CONFIG_KNX
-+ /*
-+ * If there is a KNX node set, we pick the device that is on
-+ * the same NUMA node as the KNX.
-+ */
+ if (uinfo->spu_knx_node_id) {
-+ struct qib_devdata *dd =
-+ qib_knx_node_to_dd(uinfo->spu_knx_node_id);
-+ if (dd) {
-+ ret = find_free_ctxt(dd->unit, fp, uinfo);
-+ if (!ret)
-+ ret = qib_knx_alloc_ctxt(dd,
-+ ctxt_fp(fp)->ctxt);
-+ } else
-+ ret = -ENXIO;
-+ goto done_chk_sdma;
++ ret = qib_knx_sdma_queue_create(fp);
++ /*if (!ret)
++ ret = qib_knx_setup_tidrcv(fp);*/
++ goto done_ok;
+ }
+#endif
-
- if (weight == 1 && !test_bit(cpu, qib_cpulist))
- if (!find_hca(cpu, &unit) && unit >= 0)
-@@ -1652,6 +1918,9 @@ done_chk_sdma:
- if (!ret)
ret = do_qib_user_sdma_queue_create(fp);
++ }
done_ok:
+#ifdef QIB_CONFIG_KNX
+ knx_node_fp(fp) = uinfo->spu_knx_node_id;
mutex_unlock(&qib_mutex);
done:
-@@ -1666,11 +1935,25 @@ static int qib_do_user_init(struct file *fp,
+@@ -1676,11 +1870,25 @@ static int qib_do_user_init(struct file *fp,
struct qib_ctxtdata *rcd = ctxt_fp(fp);
struct qib_devdata *dd;
unsigned uctxt;
goto bail;
}
-@@ -1721,6 +2004,41 @@ static int qib_do_user_init(struct file *fp,
+@@ -1731,6 +1939,41 @@ static int qib_do_user_init(struct file *fp,
*/
dd->f_sendctrl(dd->pport, QIB_SENDCTRL_AVAIL_BLIP);
/*
* Now allocate the rcvhdr Q and eager TIDs; skip the TID
* array for time being. If rcd->ctxt > chip-supported,
-@@ -1730,6 +2048,7 @@ static int qib_do_user_init(struct file *fp,
+@@ -1740,6 +1983,9 @@ static int qib_do_user_init(struct file *fp,
ret = qib_create_rcvhdrq(dd, rcd);
if (!ret)
ret = qib_setup_eagerbufs(rcd);
++#ifdef QIB_CONFIG_KNX
+cont_init:
++#endif
if (ret)
goto bail_pio;
-@@ -1752,7 +2071,6 @@ static int qib_do_user_init(struct file *fp,
- */
- if (rcd->rcvhdrtail_kvaddr)
- qib_clear_rcvhdrtail(rcd);
--
- dd->f_rcvctrl(rcd->ppd, QIB_RCVCTRL_CTXT_ENB | QIB_RCVCTRL_TIDFLOW_ENB,
- rcd->ctxt);
+@@ -1837,6 +2083,13 @@ static int qib_close(struct inode *in, struct file *fp)
-@@ -1884,6 +2202,12 @@ static int qib_close(struct inode *in, struct file *fp)
+ /* drain user sdma queue */
+ if (fd->pq) {
++#ifdef QIB_CONFIG_KNX
++ /*
++ * The thread should be stopped first before attempting
++ * to clean the queue.
++ */
++ qib_knx_sdma_queue_destroy(fd);
++#endif
+ qib_user_sdma_queue_drain(rcd->ppd, fd->pq);
+ qib_user_sdma_queue_destroy(fd->pq);
+ }
+@@ -1894,6 +2147,12 @@ static int qib_close(struct inode *in, struct file *fp)
}
mutex_unlock(&qib_mutex);
qib_free_ctxtdata(dd, rcd); /* after releasing the mutex */
bail:
-@@ -2169,15 +2493,22 @@ static ssize_t qib_write(struct file *fp, const char __user *data,
+@@ -2179,6 +2438,13 @@ static ssize_t qib_write(struct file *fp, const char __user *data,
ret = qib_assign_ctxt(fp, &cmd.cmd.user_info);
if (ret)
goto bail;
break;
case QIB_CMD_USER_INIT:
- ret = qib_do_user_init(fp, &cmd.cmd.user_info);
-- if (ret)
-- goto bail;
-- ret = qib_get_base_info(fp, (void __user *) (unsigned long)
-- cmd.cmd.user_info.spu_base_info,
-- cmd.cmd.user_info.spu_base_info_size);
-+ if (!ret)
-+ ret = qib_get_base_info(
-+ fp, (void __user *) (unsigned long)
-+ cmd.cmd.user_info.spu_base_info,
-+ cmd.cmd.user_info.spu_base_info_size);
- break;
-
- case QIB_CMD_RECV_CTRL:
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
-index 24e802f..84b3222 100644
+index 17e0831..f8992f7 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -51,6 +51,10 @@
#undef pr_fmt
#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt
-@@ -64,6 +68,14 @@
- #define QLOGIC_IB_R_EMULATOR_MASK (1ULL<<62)
-
- /*
-+ * Select the NUMA node id on which to allocate the receive header
-+ * queue, eager buffers and send pioavail register.
-+ */
-+int qib_numa_node;
-+module_param_named(numa_node, qib_numa_node, int, S_IRUGO);
-+MODULE_PARM_DESC(numa_node, "NUMA node on which memory is allocated");
-+
-+/*
- * Number of ctxts we are configured to use (to allow for more pio
- * buffers per ctxt, etc.) Zero means use chip value.
- */
-@@ -71,11 +83,6 @@ ushort qib_cfgctxts;
- module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO);
- MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use");
-
--unsigned qib_numa_aware;
--module_param_named(numa_aware, qib_numa_aware, uint, S_IRUGO);
--MODULE_PARM_DESC(numa_aware,
-- "0 -> PSM allocation close to HCA, 1 -> PSM allocation local to process");
--
- /*
- * If set, do not write to any regs if avoidable, hack to allow
- * check for deranged default register values.
-@@ -84,6 +91,12 @@ ushort qib_mini_init;
- module_param_named(mini_init, qib_mini_init, ushort, S_IRUGO);
- MODULE_PARM_DESC(mini_init, "If set, do minimal diag init");
-
-+unsigned qib_numa_aware = QIB_DRIVER_AUTO_CONFIGURATION;
-+module_param_named(numa_aware, qib_numa_aware, uint, S_IRUGO);
-+MODULE_PARM_DESC(numa_aware, "Use NUMA aware allocations: "
-+ "0=disabled, 1=enabled, "
-+ "10=option 0 for AMD & <= Intel Westmere cpus and option 1 for newer cpus(default)");
-+
- unsigned qib_n_krcv_queues;
- module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO);
- MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port");
-@@ -1095,6 +1108,24 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
- unsigned long flags;
- struct qib_devdata *dd;
- int ret;
-+ int node_id;
-+ int local_node_id = pcibus_to_node(dd->pcidev->bus);
-+ s64 new_node_id = qib_numa_node;
-+
-+ if (local_node_id < 0)
-+ local_node_id = numa_node_id();
-+
-+ if (new_node_id < 0)
-+ new_node_id = local_node_id;
-+
-+ new_node_id = node_online(new_node_id) ? new_node_id :
-+ local_node_id;
-+
-+ dd->local_node_id = local_node_id;
-+ dd->assigned_node_id = new_node_id;
-+
-+ node_id = qib_numa_aware ? dd->local_node_id :
-+ dd->assigned_node_id;
-
- dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra);
- if (!dd) {
-@@ -1263,6 +1294,13 @@ static int __init qlogic_ib_init(void)
+@@ -1270,6 +1274,12 @@ static int __init qlogic_ib_init(void)
/* not fatal if it doesn't work */
if (qib_init_qibfs())
pr_err("Unable to register ipathfs\n");
+#ifdef QIB_CONFIG_KNX
+ ret = qib_knx_server_init();
+ if (ret < 0)
-+ pr_err("Unable to start KNX listen thread\n");
++ pr_err(": Unable to start KNX listen thread\n");
+#endif
-+
goto bail; /* all OK */
bail_dev:
-@@ -1287,6 +1325,10 @@ static void __exit qlogic_ib_cleanup(void)
+@@ -1294,6 +1304,9 @@ static void __exit qlogic_ib_cleanup(void)
{
int ret;
+#ifdef QIB_CONFIG_KNX
+ qib_knx_server_exit();
+#endif
-+
ret = qib_exit_qibfs();
if (ret)
pr_err(
-@@ -1754,6 +1796,15 @@ int init_chip_wc_pat(struct qib_devdata *dd, u32 vl15buflen)
- iounmap(dd->kregbase);
- dd->kregbase = NULL;
+@@ -1546,6 +1559,9 @@ static void __devexit qib_remove_one(struct pci_dev *pdev)
+ /* unregister from IB core */
+ qib_unregister_ib_device(dd);
-+ if (qib_numa_aware == QIB_DRIVER_AUTO_CONFIGURATION)
-+ qib_numa_aware = qib_configure_numa(boot_cpu_data) ? 1 : 0;
-+
-+ if (qib_rcvhdrpoll == QIB_DRIVER_AUTO_CONFIGURATION)
-+ qib_rcvhdrpoll = qib_configure_numa(boot_cpu_data) ? 0 : 1;
-+
-+ if (qib_pio_avail_bits == QIB_DRIVER_AUTO_CONFIGURATION)
-+ qib_pio_avail_bits = qib_configure_numa(boot_cpu_data) ? 0 : 1;
-+
++#ifdef QIB_CONFIG_KNX
++ qib_knx_remove_device(dd);
++#endif
/*
- * Assumes chip address space looks like:
- * - kregs + sregs + cregs + uregs (in any order)
+ * Disable the IB link, disable interrupts on the device,
+ * clear dma engines, etc.
diff --git a/drivers/infiniband/hw/qib/qib_knx.c b/drivers/infiniband/hw/qib/qib_knx.c
new file mode 100644
-index 0000000..c15276f
+index 0000000..5a9bdaa
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_knx.c
-@@ -0,0 +1,923 @@
+@@ -0,0 +1,1532 @@
+/*
-+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
++ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
++#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+
+#include "qib.h"
+#include "qib_knx.h"
++#include "qib_user_sdma.h"
++#include "qib_knx_common.h"
+
+unsigned int qib_knx_nconns = 5;
+module_param_named(num_conns, qib_knx_nconns, uint, S_IRUGO);
+MODULE_PARM_DESC(num_conns, "Max number of pending connections");
+
+#define QIB_KNX_SCIF_PORT SCIF_OFED_PORT_9
++#define CLIENT_THREAD_NAME(x) "qib/mic" __stringify(x)
++
++#define knx_sdma_next(sdma) \
++ (sdma->head = ((sdma->head + 1) % sdma->desc_num))
++#define per_ctxt(ctxt, sub) ((ctxt * QLOGIC_IB_MAX_SUBCTXT) + sub)
++#define QIB_KNX_SDMA_STATUS(sdma, st) \
++ QIB_KNX_SDMA_SET(sdma->mflags->status, ((u64)st << 32) | 1)
+
+struct qib_knx_server {
+ struct task_struct *kthread;
+ struct scif_range *pages;
+};
+
++struct qib_knx_tidrcv {
++ struct qib_knx_rma tidmem;
++ u64 tidbase;
++ u32 tidcnt;
++};
++
+struct qib_knx_ctxt {
++ u16 ctxt;
++ struct qib_knx *knx;
++ struct qib_pportdata *ppd;
+ /* local registered memory for PIO buffers */
+ struct qib_knx_rma piobufs[QLOGIC_IB_MAX_SUBCTXT];
+ /* local registered memory for user registers */
+ __u64 status;
+ __u64 piobufbase[QLOGIC_IB_MAX_SUBCTXT];
+ __u32 runtime_flags;
++
++ struct qib_user_sdma_queue *pq[QLOGIC_IB_MAX_SUBCTXT];
++};
++
++struct qib_knx_sdma {
++ /* KNX flags page */
++ struct scif_range *mflag_pages;
++ struct qib_knx_sdma_mflags *mflags;
++ /* KNX descriptor queue */
++ struct scif_range *queue_pages;
++ struct qib_knx_sdma_desc *queue;
++ u32 desc_num;
++ /* host flags (in host memory) */
++ struct qib_knx_rma hflags_mem;
++ struct qib_knx_sdma_hflags *hflags;
++ u32 head; /* shadow */
++ u32 complete;
+};
+
+struct qib_knx {
+ int numa_node;
+ struct qib_devdata *dd;
+ struct qib_knx_ctxt **ctxts;
++ spinlock_t ctxt_lock;
++ resource_size_t bar;
++ u64 barlen;
++ struct qib_knx_sdma *sdma;
++ struct task_struct *sdma_poll;
++ atomic_t tref;
++ char tname[64];
++ struct qib_knx_rma tidmem;
+};
+
-+#define CLIENT_THREAD_NAME(x) "qib/mic" __stringify(x)
-+
+static struct qib_knx_server *server;
+
+static int qib_knx_init(struct qib_knx_server *);
+ void *, size_t, int, const char *);
+static int qib_knx_unregister_memory(struct qib_knx *, struct qib_knx_rma *,
+ const char *);
++static __always_inline void qib_knx_memcpy(void *, void __iomem *, size_t);
+static ssize_t qib_show_knx_node(struct device *, struct device_attribute *,
+ char *);
-+
-+static DEVICE_ATTR(knx_node, S_IRUGO, qib_show_knx_node, NULL);
-+static ssize_t qib_show_knx_node(struct device *dev,
-+ struct device_attribute *attr, char *buf)
-+{
-+ struct qib_ibdev *ibdev =
-+ container_of(dev, struct qib_ibdev, ibdev.dev);
-+ struct qib_devdata *dd = dd_from_dev(ibdev);
-+
-+ return scnprintf(buf, PAGE_SIZE, "%u\n", dd->knx->peer.node);
-+}
++static int qib_knx_sdma_init(struct qib_knx *);
++static void qib_knx_sdma_teardown(struct qib_knx *);
++static __always_inline struct page *
++qib_knx_phys_to_page(struct qib_knx *, unsigned long);
++static int qib_knx_sdma_pkts_to_descs(struct qib_knx_ctxt *,
++ struct qib_knx_sdma_desc *,
++ struct qib_user_sdma_queue *,
++ int *, struct list_head *);
++static int qib_knx_sdma_poll(void *);
++static int qib_knx_tidrcv_init(struct qib_knx *);
++static int qib_knx_tidrcv_teardown(struct qib_knx *);
+
+inline struct qib_knx *qib_knx_get(u16 nodeid)
+{
+
+static int qib_knx_init(struct qib_knx_server *server)
+{
-+ int ret = 0, num_devs = 0, i;
-+ struct qib_devdata *dd;
++ int ret = 0, num_devs = 0, i, seen = 0;
++ unsigned fewest = -1U;
++ struct qib_devdata *dd = NULL, *dd_no_numa = NULL;
+ struct qib_knx *knx;
-+ struct ib_device *ibdev;
++ struct qib_device_info info = { -1 };
+
+ knx = kzalloc(sizeof(*knx), GFP_KERNEL);
+ if (!knx) {
+ }
+
+ INIT_LIST_HEAD(&knx->list);
++ spin_lock_init(&knx->ctxt_lock);
+ knx->numa_node = -1;
+ ret = scif_pci_info(knx->peer.node, &knx->pci_info);
-+ if (!ret)
++ if (!ret) {
+ knx->numa_node = pcibus_to_node(knx->pci_info.pdev->bus);
++ knx->bar = pci_resource_start(knx->pci_info.pdev, 0);
++ knx->barlen = pci_resource_len(knx->pci_info.pdev, 0);
++ }
+
+ if (knx->numa_node < 0)
+ knx->numa_node = numa_node_id();
+ num_devs = qib_count_units(NULL, NULL);
+ if (unlikely(!num_devs)) {
+ ret = -ENODEV;
++ /* we have to send this */
++ scif_send(knx->epd.epd, &info, sizeof(info),
++ SCIF_SEND_BLOCK);
+ goto done;
+ }
+
-+ for (i = 0; i < num_devs; i++) {
++ /*
++ * Attempt to find an HCA on the same NUMA node as the card. Save
++ * the first HCA that hasn't been associated with a card in case
++ * there is no HCA on the same NUMA node.
++ */
++ for (i = 0; seen < num_devs; i++) {
+ dd = qib_lookup(i);
-+ if (dd && dd->local_node_id == knx->numa_node)
-+ knx->dd = dd;
++ if (dd) {
++ if (dd->assigned_node_id == knx->numa_node) {
++ knx->dd = dd;
++ break;
++ } else if (dd->num_knx < fewest)
++ dd_no_numa = dd;
++ seen++;
++ }
+ }
+ /*
+ * We didn't find a QIB device on the same NUMA node,
-+ * round-robin across all devices.
++ * use the "backup".
+ */
+ if (unlikely(!knx->dd)) {
-+ knx->dd = qib_lookup(server->nclients % num_devs);
-+ /* it is possible for qib_lookup to return NULL */
-+ if (unlikely(!knx->dd)) {
++ if (!dd_no_numa) {
+ ret = -ENODEV;
++ /* we have to send this */
++ scif_send(knx->epd.epd, &info, sizeof(info),
++ SCIF_SEND_BLOCK);
+ goto done;
+ }
++ knx->dd = dd_no_numa;
+ }
-+ knx->dd->node_id = knx->peer.node;
-+ knx->dd->knx = knx;
++ knx->dd->num_knx++;
++
+ knx->ctxts = kzalloc_node(knx->dd->ctxtcnt * sizeof(*knx->ctxts),
+ GFP_KERNEL, knx->numa_node);
+ if (!knx->ctxts)
+ ret = -ENOMEM;
-+ ibdev = &knx->dd->verbs_dev.ibdev;
-+ ret = device_create_file(&ibdev->dev, &dev_attr_knx_node);
++ /* Give the KNX the associated device information. */
++ info.unit = knx->dd->unit;
++ ret = scif_send(knx->epd.epd, &info, sizeof(info),
++ SCIF_SEND_BLOCK);
++
++ ret = qib_knx_sdma_init(knx);
+ if (ret)
-+ /*
-+ * clear the error code since we don't want to fail the
-+ * initialization.
-+ */
-+ ret = 0;
++ goto done;
++ atomic_set(&knx->tref, 0);
++ ret = qib_knx_tidrcv_init(knx);
+done:
+ spin_lock(&server->client_lock);
+ list_add_tail(&knx->list, &server->clients);
+static void qib_knx_free(struct qib_knx *knx, int unload)
+{
+ struct qib_devdata *dd = knx->dd;
-+ struct ib_device *ibdev;
+ int i;
+
-+ if (dd) {
-+ ibdev = &dd->verbs_dev.ibdev;
-+ device_remove_file(&ibdev->dev, &dev_attr_knx_node);
-+ }
++ qib_knx_tidrcv_teardown(knx);
++ qib_knx_sdma_teardown(knx);
++ if (dd)
++ dd->num_knx--;
+ /*
+ * If this function is called with unload set, we can
+ * free the context data. Otherwise, we are here
+ return ret;
+}
+
-+int qib_knx_alloc_ctxt(struct qib_devdata *dd, unsigned ctxt)
++static __always_inline void qib_knx_memcpy(void *dst, void __iomem *src,
++ size_t size)
+{
-+ struct qib_knx *knx = dd_to_knx(dd);
++ memcpy_fromio(dst, src, size);
++}
++
++int qib_knx_alloc_ctxt(u16 node_id, unsigned ctxt)
++{
++ struct qib_knx *knx = qib_knx_get(node_id);
++ struct qib_devdata *dd = knx->dd;
+ struct qib_knx_ctxt *ptr;
+ int ret = 0;
+
+ ret = -ENOMEM;
+ goto bail;
+ }
++ ptr->knx = knx;
++ ptr->ctxt = ctxt;
++ ptr->ppd = dd->rcd[ctxt]->ppd;
++
++ spin_lock(&knx->ctxt_lock);
+ knx->ctxts[ctxt] = ptr;
++ dd->rcd[ctxt]->krcd = ptr;
++ spin_unlock(&knx->ctxt_lock);
+bail:
+ return ret;
+}
+ enum qib_knx_ctxtinfo_type type,
+ struct file *fp)
+{
-+ struct qib_knx *knx = dd_to_knx(rcd->dd);
++ struct qib_knx *knx = rcd->krcd->knx;
+ __u16 subctxt;
+ __u64 ret = 0;
+
++ spin_lock(&knx->ctxt_lock);
+ if (!knx || !knx->ctxts || !knx->ctxts[rcd->ctxt])
+ goto done;
+
+ break;
+ }
+done:
++ spin_unlock(&knx->ctxt_lock);
+ return ret;
+}
+
+ char buf[16];
+ off_t offset;
+ int ret = 0;
-+ struct qib_knx *knx = dd_to_knx(dd);
++ struct qib_knx *knx = rcd->krcd->knx;
+
+ if (unlikely(!knx)) {
+ ret = -ENODEV;
+{
+ int ret = 0;
+ off_t offset;
-+ struct qib_knx *knx = dd_to_knx(dd);
++ struct qib_knx *knx = rcd->krcd->knx;
+
+ if (unlikely(!knx)) {
+ ret = -ENODEV;
+{
+ struct qib_knx_mem_map_sg *mapsg;
+ struct qib_knx_mem_map *map;
-+ struct qib_knx *knx = dd_to_knx(dd);
++ struct qib_knx *knx = rcd->krcd->knx;
+ dma_addr_t offset;
+ struct scatterlist *sg;
+ unsigned num_pages;
+ * can use 64bit addresses for DMA but the CPU might not.
+ * (see pci_set_dma_mask() in qib_pcie.c).
+ */
-+ mapsg->sglist = kzalloc(num_pages * sizeof(*mapsg->sglist), GFP_KERNEL);
++ mapsg->sglist = kzalloc_node(num_pages * sizeof(*mapsg->sglist),
++ GFP_KERNEL, knx->numa_node);
+ if (!mapsg->sglist) {
+ ret = -ENOMEM;
+ goto bail_rcvq_pages;
+ }
+ /*
+ * pci_map_sg() will remap all 128 pages of the
-+ * scatterlist seperately (without coalescing them).
++ * scatterlist separately (without coalescing them).
+ * However, since the buffer is contiguous, as long
+ * as the base address is mapped correctly, everything
+ * should work. In any case, check that the mapped
+ struct qib_knx_mem_map_sg *map;
+ struct scatterlist *sg;
+ struct qib_devdata *dd = rcd->dd;
-+ struct qib_knx *knx = dd_to_knx(dd);
++ struct qib_knx *knx = rcd->krcd->knx;
+ unsigned size, egrsize, egrcnt, num_pages, bufs_ppage,
+ egrbufcnt;
+ dma_addr_t dma_addr, page;
+
+ map->size = size;
+ map->dir = DMA_BIDIRECTIONAL;
-+ map->sglist = kzalloc(num_pages * sizeof(*map->sglist), GFP_KERNEL);
++ map->sglist = kzalloc_node(num_pages * sizeof(*map->sglist), GFP_KERNEL,
++ knx->numa_node);
+ if (!map->sglist) {
+ ret = -ENOMEM;
+ goto bail_free_rcvegr_phys;
+ rcd->rcvegrbuf[i] = map->pages->va[i];
+ }
+
-+ for (egrbufcnt = i = 0; i < num_pages ; i++) {
++ for (egrbufcnt = i = 0; i < num_pages; i++) {
+ page = rcd->rcvegrbuf_phys[i];
+ dma_addr = page;
-+ for (bufcnt = 0 ; egrbufcnt < egrcnt && bufcnt < bufs_ppage;
++ for (bufcnt = 0; egrbufcnt < egrcnt && bufcnt < bufs_ppage;
+ egrbufcnt++, bufcnt++) {
+ dd->f_put_tid(dd, rcd->rcvegr_tid_base +
+ egrbufcnt +
+
+void qib_knx_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
+{
-+ struct qib_knx *knx = dd_to_knx(dd);
++ struct qib_knx *knx = rcd->krcd->knx;
+ struct qib_knx_ctxt *ctxt;
+ char buf[16];
+ int i, ret = 0;
+ if (!rcd || !knx || !knx->ctxts)
+ return;
+
++ spin_lock(&knx->ctxt_lock);
+ ctxt = knx->ctxts[rcd->ctxt];
++ knx->ctxts[rcd->ctxt] = NULL;
++ spin_unlock(&knx->ctxt_lock);
++
+ if (!ctxt)
+ return;
+
+ qib_knx_unregister_memory(knx, &ctxt->piobufs[i], buf);
+ }
+
-+ /* MITKO XXX: handle rcd->tid_pg_list */
-+ knx->ctxts[rcd->ctxt] = NULL;
+ kfree(ctxt);
+ kfree(rcd);
+}
+
++/*
++ * TID management for processes on the MIC happens on the MIC. Therefore,
++ * we only register the HW TID array here.
++ * The MIC will calculate TID array offsets using the same algorithm is
++ * the host. Therefore, it is OK that the entire HW TID array is mapped
++ * since neither side should step on the other.
++ */
++static int qib_knx_tidrcv_init(struct qib_knx *knx)
++{
++ struct qib_devdata *dd = knx->dd;
++ struct qib_knx_tid_info info;
++ void *tidbase;
++ int ret = 0;
++ off_t offset = 0;
++ size_t len;
++ char buf[64];
++
++ memset(&info, 0, sizeof(info));
++
++ info.tidcnt = dd->rcvtidcnt;
++ tidbase = ((char *)dd->kregbase + dd->rcvtidbase);
++ info.tidbase_len = dd->ctxtcnt * dd->rcvtidcnt * sizeof(tidbase);
++ info.tidtemplate = dd->tidtemplate;
++ info.invalidtid = dd->tidinvalid;
++ /* information needed to properly calculate DMA address to MIC pages */
++ info.bar_addr = knx->bar;
++ info.bar_len = knx->barlen;
++
++ snprintf(buf, sizeof(buf), "TID array KNx%u", knx->peer.node);
++ offset = qib_knx_register_memory(knx, &knx->tidmem, tidbase,
++ info.tidbase_len, SCIF_PROT_WRITE,
++ buf);
++ info.tidbase_offset = offset;
++ if (IS_ERR_VALUE(offset))
++ ret = offset;
++ len = scif_send(knx->epd.epd, &info, sizeof(info),
++ SCIF_SEND_BLOCK);
++ if (len < sizeof(info))
++ ret = -EFAULT;
++ return ret;
++}
++
++static int qib_knx_tidrcv_teardown(struct qib_knx *knx)
++{
++ char buf[64];
++ snprintf(buf, sizeof(buf), "TID array KNx%u", knx->peer.node);
++ return qib_knx_unregister_memory(knx, &knx->tidmem, buf);
++}
++
++static int qib_knx_sdma_init(struct qib_knx *knx)
++{
++ struct qib_knx_host_mem flags;
++ struct qib_knx_knc_mem mflags;
++ struct qib_knx_sdma *sdma;
++ char buf[64];
++ int ret = 0;
++
++ sdma = kzalloc_node(sizeof(*sdma), GFP_KERNEL, knx->numa_node);
++ if (!sdma) {
++ ret = -ENOMEM;
++ goto done;
++ }
++ sdma->hflags = kzalloc_node(PAGE_SIZE, GFP_KERNEL, knx->numa_node);
++ if (!sdma->hflags) {
++ ret = -ENOMEM;
++ goto done_free;
++ }
++ snprintf(buf, sizeof(buf), "Host SDMA flags KNx%u", knx->peer.node);
++ flags.flags_offset = qib_knx_register_memory(knx, &sdma->hflags_mem,
++ sdma->hflags,
++ PAGE_SIZE,
++ SCIF_PROT_WRITE,
++ buf);
++ if (IS_ERR_VALUE(flags.flags_offset)) {
++ ret = flags.flags_offset;
++ goto free_flags;
++ }
++ sdma->desc_num = knx->dd->pport[0].sdma_descq_cnt;
++ flags.desc_num = sdma->desc_num;
++ ret = scif_send(knx->epd.epd, &flags, sizeof(flags),
++ SCIF_SEND_BLOCK);
++ if (ret < sizeof(flags))
++ goto unregister;
++ ret = scif_recv(knx->epd.epd, &mflags, sizeof(mflags),
++ SCIF_RECV_BLOCK);
++ if (ret < sizeof(mflags)) {
++ ret = -EINVAL;
++ goto unregister;
++ }
++ ret = scif_get_pages(knx->epd.epd, mflags.flags_offset,
++ PAGE_SIZE, &sdma->mflag_pages);
++ if (ret < 0 || !sdma->mflag_pages->nr_pages) {
++ ret = -EFAULT;
++ goto unregister;
++ }
++ sdma->mflags = sdma->mflag_pages->va[0];
++ ret = scif_get_pages(knx->epd.epd, mflags.queue_offset,
++ mflags.queue_len, &sdma->queue_pages);
++ if (ret < 0)
++ goto put_flags;
++ if ((sdma->queue_pages->nr_pages * PAGE_SIZE) !=
++ mflags.queue_len) {
++ ret = -EFAULT;
++ goto put_queue;
++ }
++ sdma->queue = sdma->queue_pages->va[0];
++ sdma->complete = -1;
++ sdma->head = -1;
++ /* set the initial trigger value */
++ QIB_KNX_SDMA_SET(sdma->hflags->trigger, -1);
++ QIB_KNX_SDMA_SET(sdma->mflags->complete, sdma->complete);
++ snprintf(knx->tname, sizeof(knx->tname), "qib/mic%u/poll",
++ knx->peer.node);
++ knx->sdma = sdma;
++ ret = 0;
++ goto done;
++put_queue:
++ scif_put_pages(sdma->queue_pages);
++put_flags:
++ scif_put_pages(sdma->mflag_pages);
++unregister:
++ qib_knx_unregister_memory(knx, &sdma->hflags_mem, buf);
++free_flags:
++ kfree(sdma->hflags);
++done_free:
++ kfree(sdma);
++done:
++ /*
++ * we have to respond to the MIC so it doesn't get stuck
++ * in the scif_recv call
++ */
++ scif_send(knx->epd.epd, &ret, sizeof(ret), SCIF_SEND_BLOCK);
++ return ret;
++}
++
++static void qib_knx_sdma_teardown(struct qib_knx *knx)
++{
++ int ret;
++ if (knx->sdma_poll)
++ ret = kthread_stop(knx->sdma_poll);
++ if (knx->sdma) {
++ if (knx->sdma->queue_pages->nr_pages) {
++ knx->sdma->queue = NULL;
++ scif_put_pages(knx->sdma->queue_pages);
++ }
++ if (knx->sdma->mflag_pages->nr_pages) {
++ knx->sdma->mflags = NULL;
++ scif_put_pages(knx->sdma->mflag_pages);
++ }
++ kfree(knx->sdma->hflags);
++ kfree(knx->sdma);
++ knx->sdma = NULL;
++ }
++}
++
++int qib_knx_sdma_queue_create(struct file *fd)
++{
++ struct qib_ctxtdata *rcd = ctxt_fp(fd);
++ struct qib_devdata *dd = rcd->dd;
++ struct qib_knx *knx = rcd->krcd->knx;
++ struct qib_knx_ctxt *ctxt = knx->ctxts[rcd->ctxt];
++ u8 subctxt = subctxt_fp(fd);
++ int ret = 0;
++
++ if (!ctxt) {
++ ret = -EINVAL;
++ goto done;
++ }
++ ctxt->pq[subctxt] = qib_user_sdma_queue_create(&dd->pcidev->dev,
++ dd->unit, rcd->ctxt,
++ subctxt);
++ if (!ctxt->pq[subctxt])
++ ret = -ENOMEM;
++ user_sdma_queue_fp(fd) = ctxt->pq[subctxt];
++ /*
++ * We start the polling thread the first time a user SDMA
++ * queue is created. There is no reason to take up CPU
++ * cycles before then.
++ */
++ if (atomic_inc_return(&knx->tref) == 1) {
++ knx->sdma_poll = kthread_run(qib_knx_sdma_poll, knx,
++ knx->tname);
++ if (IS_ERR(knx->sdma_poll)) {
++ ret = -PTR_ERR(knx->sdma_poll);
++ atomic_dec(&knx->tref);
++ goto free_queue;
++ }
++ }
++ goto done;
++free_queue:
++ user_sdma_queue_fp(fd) = NULL;
++ qib_user_sdma_queue_destroy(ctxt->pq[subctxt]);
++ ctxt->pq[subctxt] = NULL;
++done:
++ return ret;
++}
++
++void qib_knx_sdma_queue_destroy(struct qib_filedata *fd)
++{
++ struct qib_ctxtdata *rcd = fd->rcd;
++ struct qib_knx *knx;
++ unsigned ctxt = rcd->ctxt, subctxt = fd->subctxt;
++
++ /* Host processes do not have a KNX rcd pointer. */
++ if (!rcd->krcd)
++ return;
++ knx = rcd->krcd->knx;
++ /* We still have the memory pointer through fd->pq */
++ spin_lock(&knx->ctxt_lock);
++ if (knx->ctxts[ctxt])
++ knx->ctxts[ctxt]->pq[subctxt] = NULL;
++ spin_unlock(&knx->ctxt_lock);
++ if (atomic_dec_and_test(&knx->tref)) {
++ int ret = kthread_stop(knx->sdma_poll);
++ knx->sdma_poll = NULL;
++ }
++}
++
++/*
++ * Convert a MIC physical address to the corresponding host page.
++ */
++static __always_inline struct page *
++qib_knx_phys_to_page(struct qib_knx *knx, unsigned long addr) {
++ unsigned long paddr;
++ if ((knx->bar + addr + PAGE_SIZE) >
++ (knx->bar + knx->barlen))
++ return NULL;
++ paddr = knx->bar + addr;
++ return pfn_to_page(paddr >> PAGE_SHIFT);
++}
++
++static int qib_knx_sdma_pkts_to_descs(struct qib_knx_ctxt *ctxt,
++ struct qib_knx_sdma_desc *desc,
++ struct qib_user_sdma_queue *pq,
++ int *ndesc, struct list_head *list)
++{
++ struct qib_knx *knx = ctxt->knx;
++ struct qib_user_sdma_pkt *pkt;
++ dma_addr_t pbc_dma_addr;
++ unsigned pktnw, pbcnw;
++ u32 counter;
++ u16 frag_size;
++ int ret = 0;
++ __le32 *pbc;
++
++ counter = pq->counter;
++
++ pbc = qib_user_sdma_alloc_header(pq, desc->pbclen, &pbc_dma_addr);
++ if (!pbc) {
++ ret = -ENOMEM;
++ goto done;
++ }
++ memcpy(pbc, desc->pbc, desc->pbclen);
++
++ pktnw = (le32_to_cpu(*pbc) & 0xFFFF);
++ /*
++ * This assignment is a bit strange. it's because the
++ * the pbc counts the number of 32 bit words in the full
++ * packet _except_ the first word of the pbc itself...
++ */
++ pbcnw = (desc->pbclen >> 2) - 1;
++
++ if (pktnw < pbcnw) {
++ ret = -EINVAL;
++ goto free_pbc;
++ }
++
++ if (pktnw != ((desc->length >> 2) + pbcnw)) {
++ ret = -EINVAL;
++ goto free_pbc;
++ }
++
++ frag_size = (le32_to_cpu(*pbc)>>16) & 0xFFFF;
++ if (((frag_size ? frag_size : desc->length) + desc->pbclen) >
++ ctxt->ppd->ibmaxlen) {
++ ret = -EINVAL;
++ goto free_pbc;
++ }
++ if (frag_size) {
++ /* new SDMA "protocol" */
++ unsigned pktsize, n;
++
++ n = desc->npages * ((2 * PAGE_SIZE / frag_size) + 1);
++ pktsize = sizeof(*pkt) + sizeof(pkt->addr[0]) * n;
++
++ pkt = kzalloc(pktsize + desc->tidlen, GFP_KERNEL);
++ if (!pkt) {
++ ret = -ENOMEM;
++ goto free_pbc;
++ }
++ pkt->largepkt = 1;
++ pkt->frag_size = frag_size;
++ pkt->addrlimit = n + ARRAY_SIZE(pkt->addr);
++
++ if (desc->tidlen) {
++ char *tidsmptr = (char *)pkt + pktsize;
++ memcpy(tidsmptr, desc->tidsm, desc->tidlen);
++ pkt->tidsm =
++ (struct qib_tid_session_member *)tidsmptr;
++ pkt->tidsmcount = desc->tidlen /
++ sizeof(*desc->tidsm);
++ pkt->tidsmidx = 0;
++ }
++ *pbc = cpu_to_le32(le32_to_cpu(*pbc) & 0x0000FFFF);
++ } else {
++ /* old SDMA */
++ pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
++ if (!pkt) {
++ ret = -ENOMEM;
++ goto free_pbc;
++ }
++ pkt->largepkt = 0;
++ pkt->frag_size = desc->length;
++ pkt->addrlimit = ARRAY_SIZE(pkt->addr);
++ }
++ pkt->bytes_togo = desc->length;
++ pkt->payload_size = 0;
++ pkt->counter = counter;
++ pkt->tiddma = !!desc->tidlen;
++ /*
++ * The generic user SDMA code will use this as a flag to
++ * decide whether to call the KNx-specific pkt free
++ * function. However, it doesn't know what the value
++ * actually means.
++ */
++ pkt->remote = (u64)knx;
++
++ qib_user_sdma_init_frag(pkt, 0,
++ 0, desc->pbclen,
++ 1, 0,
++ 0, 0,
++ NULL, pbc,
++ pbc_dma_addr, desc->pbclen);
++ pkt->index = 0;
++ pkt->naddr = 1;
++
++ if (desc->npages) {
++ /* we have user data */
++ int i;
++ struct page *page;
++ unsigned plen = 0, len = desc->length;
++ for (i = 0; i < desc->npages; i++) {
++ unsigned long off = (i == 0 ? desc->offset : 0);
++ plen = (len > PAGE_SIZE ? PAGE_SIZE : len);
++ page = qib_knx_phys_to_page(knx, desc->pages[i]);
++ ret = qib_user_sdma_page_to_frags(knx->dd, pq,
++ pkt, page, 0, off,
++ (off + plen > PAGE_SIZE ?
++ PAGE_SIZE - off : plen),
++ NULL);
++ if (ret < 0)
++ goto free_sdma;
++ len -= plen - off;
++ }
++ } else {
++ pkt->addr[0].last_desc = 1;
++ if (pbc_dma_addr == 0) {
++ pbc_dma_addr = dma_map_single(&knx->dd->pcidev->dev,
++ pbc, desc->pbclen,
++ DMA_TO_DEVICE);
++ if (dma_mapping_error(&knx->dd->pcidev->dev,
++ pbc_dma_addr)) {
++ ret = -ENOMEM;
++ goto free_sdma;
++ }
++ pkt->addr[0].addr = pbc_dma_addr;
++ pkt->addr[0].dma_mapped = 1;
++ }
++ }
++ counter++;
++ pkt->pq = pq;
++ pkt->index = 0;
++ *ndesc = pkt->naddr;
++
++ list_add_tail(&pkt->list, list);
++ goto done;
++free_sdma:
++ if (pkt->largepkt)
++ kfree(pkt);
++ else
++ kmem_cache_free(pq->pkt_slab, pkt);
++free_pbc:
++ if (pbc_dma_addr)
++ dma_pool_free(pq->header_cache, pbc, pbc_dma_addr);
++ else
++ kfree(pbc);
++done:
++ return ret;
++}
++
++void qib_knx_sdma_free_pkt(struct qib_user_sdma_pkt *pkt)
++{
++ struct qib_knx *knx = (struct qib_knx *)pkt->remote;
++ struct qib_knx_sdma *sdma = knx->sdma;
++ sdma_next(sdma, complete);
++ QIB_KNX_SDMA_SET(sdma->mflags->complete, sdma->complete);
++}
++
++static int qib_knx_sdma_poll(void *data)
++{
++ struct qib_knx *knx = (struct qib_knx *)data;
++ struct qib_knx_ctxt *ctxt;
++ struct qib_knx_sdma_desc desc;
++ struct qib_knx_sdma *sdma = knx->sdma;
++ struct qib_user_sdma_queue *pq;
++ struct list_head list;
++ u32 new_head;
++ int ret = 0, ndesc = 0, added;
++
++ if (!sdma)
++ return -EFAULT;
++
++ while (!kthread_should_stop()) {
++ added = 0;
++ new_head = QIB_KNX_SDMA_VALUE(sdma->hflags->trigger);
++ while (sdma->head != new_head) {
++ knx_sdma_next(sdma);
++ qib_knx_memcpy(&desc, sdma->queue + sdma->head,
++ sizeof(desc));
++ if (!desc.ctxt) {
++ QIB_KNX_SDMA_STATUS(sdma, -EINVAL);
++ continue;
++ }
++ spin_lock(&knx->ctxt_lock);
++ ctxt = knx->ctxts[desc.ctxt];
++ if (!ctxt) {
++ /* we should never get here */
++ QIB_KNX_SDMA_STATUS(sdma, -EINVAL);
++ goto done_unlock;
++ }
++ pq = ctxt->pq[desc.subctxt];
++ if (!pq) {
++ QIB_KNX_SDMA_STATUS(sdma, -EFAULT);
++ goto done_unlock;
++ }
++ mutex_lock(&pq->lock);
++ if (pq->added > ctxt->ppd->sdma_descq_removed)
++ qib_user_sdma_hwqueue_clean(ctxt->ppd);
++ if (pq->num_sending)
++ qib_user_sdma_queue_clean(ctxt->ppd, pq);
++
++ INIT_LIST_HEAD(&list);
++ ret = qib_knx_sdma_pkts_to_descs(ctxt, &desc, pq,
++ &ndesc, &list);
++ QIB_KNX_SDMA_STATUS(sdma, ret);
++ if (!list_empty(&list)) {
++ if (qib_sdma_descq_freecnt(ctxt->ppd) <
++ ndesc) {
++ qib_user_sdma_hwqueue_clean(
++ ctxt->ppd);
++ if (pq->num_sending)
++ qib_user_sdma_queue_clean(
++ ctxt->ppd, pq);
++ }
++ ret = qib_user_sdma_push_pkts(ctxt->ppd,
++ pq, &list, 1);
++ if (ret < 0)
++ goto free_pkts;
++ else {
++ pq->counter++;
++ added++;
++ }
++ }
++free_pkts:
++ if (!list_empty(&list))
++ qib_user_sdma_free_pkt_list(
++ &knx->dd->pcidev->dev, pq, &list);
++ mutex_unlock(&pq->lock);
++done_unlock:
++ spin_unlock(&knx->ctxt_lock);
++ }
++ if (!added) {
++ int i;
++ /*
++ * Push the queues along
++ * The polling thread will enter the inner loop only
++ * if the KNX has posted new descriptors to the queue.
++ * However, any packets that have been completed by
++ * the HW need to be cleaned and that won't happen
++ * unless we explicitly check.
++ */
++ for (i = 0;
++ i < knx->dd->ctxtcnt * QLOGIC_IB_MAX_SUBCTXT;
++ i++) {
++ int c = i / QLOGIC_IB_MAX_SUBCTXT,
++ s = i % QLOGIC_IB_MAX_SUBCTXT;
++ spin_lock(&knx->ctxt_lock);
++ ctxt = knx->ctxts[c];
++ if (!ctxt)
++ goto loop_unlock;
++ pq = ctxt->pq[s];
++ if (!pq)
++ goto loop_unlock;
++ mutex_lock(&pq->lock);
++ if (pq->num_sending)
++ qib_user_sdma_queue_clean(ctxt->ppd,
++ pq);
++ mutex_unlock(&pq->lock);
++loop_unlock:
++ spin_unlock(&knx->ctxt_lock);
++ }
++ might_sleep();
++ }
++ }
++ return ret;
++}
++
++void qib_knx_remove_device(struct qib_devdata *dd)
++{
++ if (server && dd->num_knx) {
++ struct qib_knx *knx, *knxp;
++ list_for_each_entry_safe(knx, knxp, &server->clients, list) {
++ if (knx->dd == dd) {
++ spin_lock(&server->client_lock);
++ list_del(&knx->list);
++ server->nclients--;
++ spin_unlock(&server->client_lock);
++ qib_knx_free(knx, 0);
++ kfree(knx);
++ }
++ }
++ }
++ return;
++}
++
+int __init qib_knx_server_init(void)
+{
+ server = kzalloc(sizeof(struct qib_knx_server), GFP_KERNEL);
+{
+ if (server) {
+ struct qib_knx *t, *tt;
-+
+ /* Stop the thread so we don't accept any new connections. */
+ kthread_stop(server->kthread);
+ list_for_each_entry_safe(t, tt, &server->clients, list) {
+}
diff --git a/drivers/infiniband/hw/qib/qib_knx.h b/drivers/infiniband/hw/qib/qib_knx.h
new file mode 100644
-index 0000000..d767a60
+index 0000000..0e8d7ce
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_knx.h
-@@ -0,0 +1,63 @@
+@@ -0,0 +1,74 @@
+/*
-+ * Copyright (c) 2012 Intel Corporation. All rights reserved.
++ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ QIB_KNX_CTXTINFO_FLAGS
+};
+
++#ifdef QIB_CONFIG_KNX
+int __init qib_knx_server_init(void);
+void __exit qib_knx_server_exit(void);
-+static __always_inline struct qib_knx *dd_to_knx(struct qib_devdata *dd)
-+{
-+ return (struct qib_knx *)dd->knx;
-+}
++
++void qib_knx_remove_device(struct qib_devdata *);
++
+inline struct qib_knx *qib_knx_get(uint16_t);
+inline struct qib_devdata *qib_knx_node_to_dd(uint16_t);
-+int qib_knx_alloc_ctxt(struct qib_devdata *, unsigned);
++int qib_knx_alloc_ctxt(u16, unsigned);
+int qib_knx_setup_piobufs(struct qib_devdata *, struct qib_ctxtdata *, __u16);
+int qib_knx_setup_pioregs(struct qib_devdata *, struct qib_ctxtdata *,
+ struct qib_base_info *);
+void qib_knx_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *);
+__u64 qib_knx_ctxt_info(struct qib_ctxtdata *, enum qib_knx_ctxtinfo_type,
+ struct file *);
++int qib_knx_sdma_queue_create(struct file *);
++void qib_knx_sdma_queue_destroy(struct qib_filedata *);
++#else
++static inline u64 qib_knx_ctxt_info(
++ struct qib_ctxtdata *rcd,
++ enum qib_knx_ctxtinfo_type type,
++ struct file *fp)
++{
++ return 0;
++}
++#endif
+#endif /* _QIB_KNX_H */
-diff --git a/drivers/infiniband/hw/qib/qib_knx_sdma.h b/drivers/infiniband/hw/qib/qib_knx_sdma.h
+diff --git a/drivers/infiniband/hw/qib/qib_knx_common.h b/drivers/infiniband/hw/qib/qib_knx_common.h
new file mode 100644
-index 0000000..8c67b1f
+index 0000000..53c521f
--- /dev/null
-+++ b/drivers/infiniband/hw/qib/qib_knx_sdma.h
-@@ -0,0 +1,105 @@
++++ b/drivers/infiniband/hw/qib/qib_knx_common.h
+@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
+ *
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
-+#ifndef _QIB_KNX_SDMA_H
-+#define _QIB_KNX_SDMA_H
++#ifndef _QIB_KNX_COMMON_H
++#define _QIB_KNX_COMMON_H
++
++struct qib_device_info {
++ u16 unit;
++};
+
+#define QIB_SDMA_MAX_NPAGES 33
-+#define QIB_KNX_SDMA_VALUE(fld) (volatile u64)fld
++#define QIB_KNX_SDMA_VALUE(fld) ((volatile u64)fld)
+#define QIB_KNX_SDMA_SET(fld, val) \
+ do { \
+ fld = (u64)(val); \
+};
+
+struct qib_tid_sm {
-+ __u16 tid;
-+ __u16 offset;
-+ __u16 length;
++ __u16 tid;
++ __u16 offset;
++ __u16 length;
+};
+
+/*
+ u64 length;
+ u32 npages;
+ unsigned tidlen;
-+ off_t offset;
++ off_t offset;
+ unsigned long pages[QIB_SDMA_MAX_NPAGES];
+ /* This array is 198B so the compiler will pad
+ * it by 2B to make it multiple of 8B. */
+ u64 __padding[7];
+};
+
++#define sdma_next(s, fld) \
++ ((s)->fld = (((s)->fld + 1) == (s)->desc_num) ? 0 : ((s)->fld + 1))
++
+struct qib_knx_sdma_mflags {
+ u64 status;
+ u64 __padding1[7];
+ u64 __padding2[7];
+};
+
-+#endif /* _QIB_KNX_SDMA_H */
-diff --git a/drivers/infiniband/hw/qib/qib_knx_tidrcv.h b/drivers/infiniband/hw/qib/qib_knx_tidrcv.h
-new file mode 100644
-index 0000000..842fca1
---- /dev/null
-+++ b/drivers/infiniband/hw/qib/qib_knx_tidrcv.h
-@@ -0,0 +1,48 @@
-+/*
-+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
-+ *
-+ * This software is available to you under a choice of one of two
-+ * licenses. You may choose to be licensed under the terms of the GNU
-+ * General Public License (GPL) Version 2, available from the file
-+ * COPYING in the main directory of this source tree, or the
-+ * OpenIB.org BSD license below:
-+ *
-+ * Redistribution and use in source and binary forms, with or
-+ * without modification, are permitted provided that the following
-+ * conditions are met:
-+ *
-+ * - Redistributions of source code must retain the above
-+ * copyright notice, this list of conditions and the following
-+ * disclaimer.
-+ *
-+ * - Redistributions in binary form must reproduce the above
-+ * copyright notice, this list of conditions and the following
-+ * disclaimer in the documentation and/or other materials
-+ * provided with the distribution.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-+ * SOFTWARE.
-+ */
-+#ifndef _QIB_KNX_TIDRCV_H
-+
+struct qib_knx_tid_info {
+ /* this is the entire set of 512 entries (= 4K) so
-+ * we can resgister. subctxt devision will be done
-+ * in MIC driver. */
-+ off_t tidbase_offset;
-+ size_t tidbase_len;
-+ u64 tidbase;
-+ unsigned tidcnt;
-+ u64 tidtemplate;
-+ unsigned long invalidtid;
-+ u64 bar_addr;
-+ u64 bar_len;
++ * we can resgister. subctxt devision will be done
++ * in MIC driver. */
++ off_t tidbase_offset;
++ size_t tidbase_len;
++ u64 tidbase;
++ unsigned tidcnt;
++ u64 tidtemplate;
++ unsigned long invalidtid;
++ u64 bar_addr;
++ u64 bar_len;
+};
+
-+#endif /* QIB_KNX_TIDRCV_H */
---
-1.8.3.1
-
++#endif /* _QIB_KNX_COMMON_H */
+diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
+index d2806ca..c25bd5a 100644
+--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
++++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
+@@ -63,80 +63,6 @@ struct qib_user_sdma_rb_node {
+ pid_t pid;
+ };
+
+-struct qib_user_sdma_pkt {
+- struct list_head list; /* list element */
+-
+- u8 tiddma; /* if this is NEW tid-sdma */
+- u8 largepkt; /* this is large pkt from kmalloc */
+- u16 frag_size; /* frag size used by PSM */
+- u16 index; /* last header index or push index */
+- u16 naddr; /* dimension of addr (1..3) ... */
+- u16 addrlimit; /* addr array size */
+- u16 tidsmidx; /* current tidsm index */
+- u16 tidsmcount; /* tidsm array item count */
+- u16 payload_size; /* payload size so far for header */
+- u32 bytes_togo; /* bytes for processing */
+- u32 counter; /* sdma pkts queued counter for this entry */
+- struct qib_tid_session_member *tidsm; /* tid session member array */
+- struct qib_user_sdma_queue *pq; /* which pq this pkt belongs to */
+- u64 added; /* global descq number of entries */
+-
+- struct {
+- u16 offset; /* offset for kvaddr, addr */
+- u16 length; /* length in page */
+- u16 first_desc; /* first desc */
+- u16 last_desc; /* last desc */
+- u16 put_page; /* should we put_page? */
+- u16 dma_mapped; /* is page dma_mapped? */
+- u16 dma_length; /* for dma_unmap_page() */
+- u16 padding;
+- struct page *page; /* may be NULL (coherent mem) */
+- void *kvaddr; /* FIXME: only for pio hack */
+- dma_addr_t addr;
+- } addr[4]; /* max pages, any more and we coalesce */
+-};
+-
+-struct qib_user_sdma_queue {
+- /*
+- * pkts sent to dma engine are queued on this
+- * list head. the type of the elements of this
+- * list are struct qib_user_sdma_pkt...
+- */
+- struct list_head sent;
+-
+- /*
+- * Because above list will be accessed by both process and
+- * signal handler, we need a spinlock for it.
+- */
+- spinlock_t sent_lock ____cacheline_aligned_in_smp;
+-
+- /* headers with expected length are allocated from here... */
+- char header_cache_name[64];
+- struct dma_pool *header_cache;
+-
+- /* packets are allocated from the slab cache... */
+- char pkt_slab_name[64];
+- struct kmem_cache *pkt_slab;
+-
+- /* as packets go on the queued queue, they are counted... */
+- u32 counter;
+- u32 sent_counter;
+- /* pending packets, not sending yet */
+- u32 num_pending;
+- /* sending packets, not complete yet */
+- u32 num_sending;
+- /* global descq number of entry of last sending packet */
+- u64 added;
+-
+- /* dma page table */
+- struct rb_root dma_pages_root;
+-
+- struct qib_user_sdma_rb_node *sdma_rb_node;
+-
+- /* protect everything above... */
+- struct mutex lock;
+-};
+-
+ static struct qib_user_sdma_rb_node *
+ qib_user_sdma_rb_search(struct rb_root *root, pid_t pid)
+ {
+@@ -254,12 +180,12 @@ done:
+ return pq;
+ }
+
+-static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
+- int i, u16 offset, u16 len,
+- u16 first_desc, u16 last_desc,
+- u16 put_page, u16 dma_mapped,
+- struct page *page, void *kvaddr,
+- dma_addr_t dma_addr, u16 dma_length)
++void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
++ int i, u16 offset, u16 len,
++ u16 first_desc, u16 last_desc,
++ u16 put_page, u16 dma_mapped,
++ struct page *page, void *kvaddr,
++ dma_addr_t dma_addr, u16 dma_length)
+ {
+ pkt->addr[i].offset = offset;
+ pkt->addr[i].length = len;
+@@ -273,7 +199,7 @@ static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
+ pkt->addr[i].dma_length = dma_length;
+ }
+
+-static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq,
++void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq,
+ size_t len, dma_addr_t *dma_addr)
+ {
+ void *hdr;
+@@ -295,11 +221,11 @@ static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq,
+ return hdr;
+ }
+
+-static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
+- struct qib_user_sdma_queue *pq,
+- struct qib_user_sdma_pkt *pkt,
+- struct page *page, u16 put,
+- u16 offset, u16 len, void *kvaddr)
++int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
++ struct qib_user_sdma_queue *pq,
++ struct qib_user_sdma_pkt *pkt,
++ struct page *page, u16 put,
++ u16 offset, u16 len, void *kvaddr)
+ {
+ __le16 *pbc16;
+ void *pbcvaddr;
+@@ -314,21 +240,27 @@ static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
+ int ret = 0;
+
+ if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+- /*
+- * dma mapping error, pkt has not managed
+- * this page yet, return the page here so
+- * the caller can ignore this page.
+- */
+- if (put) {
+- put_page(page);
+- } else {
+- /* coalesce case */
+- kunmap(page);
+- __free_page(page);
++#ifdef QIB_CONFIG_KNX
++ if (!pkt->remote) {
++#endif
++ /*
++ * dma mapping error, pkt has not managed
++ * this page yet, return the page here so
++ * the caller can ignore this page.
++ */
++ if (put) {
++ put_page(page);
++ } else {
++ /* coalesce case */
++ kunmap(page);
++ __free_page(page);
++ }
++ ret = -ENOMEM;
++ goto done;
+ }
+- ret = -ENOMEM;
+- goto done;
++#ifdef QIB_CONFIG_KNX
+ }
++#endif
+ offset = 0;
+ dma_mapped = 1;
+
+@@ -630,13 +562,19 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev,
+ pkt->addr[i].dma_length,
+ DMA_TO_DEVICE);
+
+- if (pkt->addr[i].kvaddr)
+- kunmap(pkt->addr[i].page);
++#ifdef QIB_CONFIG_KNX
++ if (!pkt->remote) {
++#endif
++ if (pkt->addr[i].kvaddr)
++ kunmap(pkt->addr[i].page);
+
+- if (pkt->addr[i].put_page)
+- put_page(pkt->addr[i].page);
+- else
+- __free_page(pkt->addr[i].page);
++ if (pkt->addr[i].put_page)
++ put_page(pkt->addr[i].page);
++ else
++ __free_page(pkt->addr[i].page);
++#ifdef QIB_CONFIG_KNX
++ }
++#endif
+ } else if (pkt->addr[i].kvaddr) {
+ /* for headers */
+ if (pkt->addr[i].dma_mapped) {
+@@ -775,9 +713,9 @@ static int qib_user_sdma_init_payload(const struct qib_devdata *dd,
+ }
+
+ /* free a packet list -- return counter value of last packet */
+-static void qib_user_sdma_free_pkt_list(struct device *dev,
+- struct qib_user_sdma_queue *pq,
+- struct list_head *list)
++void qib_user_sdma_free_pkt_list(struct device *dev,
++ struct qib_user_sdma_queue *pq,
++ struct list_head *list)
+ {
+ struct qib_user_sdma_pkt *pkt, *pkt_next;
+
+@@ -787,6 +725,10 @@ static void qib_user_sdma_free_pkt_list(struct device *dev,
+ for (i = 0; i < pkt->naddr; i++)
+ qib_user_sdma_free_pkt_frag(dev, pq, pkt, i);
+
++#ifdef QIB_CONFIG_KNX
++ if (pkt->remote)
++ qib_knx_sdma_free_pkt(pkt);
++#endif
+ if (pkt->largepkt)
+ kfree(pkt);
+ else
+@@ -970,6 +912,9 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
+ pkt->payload_size = 0;
+ pkt->counter = counter;
+ pkt->tiddma = tiddma;
++#ifdef QIB_CONFIG_KNX
++ pkt->remote = 0;
++#endif
+
+ /* setup the first header */
+ qib_user_sdma_init_frag(pkt, 0, /* index */
+@@ -1045,8 +990,8 @@ static void qib_user_sdma_set_complete_counter(struct qib_user_sdma_queue *pq,
+ }
+
+ /* try to clean out queue -- needs pq->lock */
+-static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd,
+- struct qib_user_sdma_queue *pq)
++int qib_user_sdma_queue_clean(struct qib_pportdata *ppd,
++ struct qib_user_sdma_queue *pq)
+ {
+ struct qib_devdata *dd = ppd->dd;
+ struct list_head free_list;
+@@ -1110,7 +1055,7 @@ void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq)
+ }
+
+ /* clean descriptor queue, returns > 0 if some elements cleaned */
+-static int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd)
++int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd)
+ {
+ int ret;
+ unsigned long flags;
+@@ -1321,9 +1266,9 @@ retry:
+ }
+
+ /* pq->lock must be held, get packets on the wire... */
+-static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
+- struct qib_user_sdma_queue *pq,
+- struct list_head *pktlist, int count)
++int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
++ struct qib_user_sdma_queue *pq,
++ struct list_head *pktlist, int count)
+ {
+ unsigned long flags;
+
+diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.h b/drivers/infiniband/hw/qib/qib_user_sdma.h
+index ce8cbaf..07d5bc5 100644
+--- a/drivers/infiniband/hw/qib/qib_user_sdma.h
++++ b/drivers/infiniband/hw/qib/qib_user_sdma.h
+@@ -31,12 +31,108 @@
+ */
+ #include <linux/device.h>
+
+-struct qib_user_sdma_queue;
++struct qib_user_sdma_pkt {
++ struct list_head list; /* list element */
++
++ u8 tiddma; /* if this is NEW tid-sdma */
++ u8 largepkt; /* this is large pkt from kmalloc */
++ u16 frag_size; /* frag size used by PSM */
++ u16 index; /* last header index or push index */
++ u16 naddr; /* dimension of addr (1..3) ... */
++ u16 addrlimit; /* addr array size */
++ u16 tidsmidx; /* current tidsm index */
++ u16 tidsmcount; /* tidsm array item count */
++ u16 payload_size; /* payload size so far for header */
++ u32 bytes_togo; /* bytes for processing */
++ u32 counter; /* sdma pkts queued counter for this entry */
++ struct qib_tid_session_member *tidsm; /* tid session member array */
++ struct qib_user_sdma_queue *pq; /* which pq this pkt belongs to */
++ u64 added; /* global descq number of entries */
++#ifdef QIB_CONFIG_KNX
++ u64 remote; /* does the packet originate on the host */
++#endif
++
++ struct {
++ u16 offset; /* offset for kvaddr, addr */
++ u16 length; /* length in page */
++ u16 first_desc; /* first desc */
++ u16 last_desc; /* last desc */
++ u16 put_page; /* should we put_page? */
++ u16 dma_mapped; /* is page dma_mapped? */
++ u16 dma_length; /* for dma_unmap_page() */
++ u16 padding;
++ struct page *page; /* may be NULL (coherent mem) */
++ void *kvaddr; /* FIXME: only for pio hack */
++ dma_addr_t addr;
++ } addr[4]; /* max pages, any more and we coalesce */
++};
++
++struct qib_user_sdma_queue {
++ /*
++ * pkts sent to dma engine are queued on this
++ * list head. the type of the elements of this
++ * list are struct qib_user_sdma_pkt...
++ */
++ struct list_head sent;
++
++ /*
++ * Because above list will be accessed by both process and
++ * signal handler, we need a spinlock for it.
++ */
++ spinlock_t sent_lock ____cacheline_aligned_in_smp;
++
++ /* headers with expected length are allocated from here... */
++ char header_cache_name[64];
++ struct dma_pool *header_cache;
++
++ /* packets are allocated from the slab cache... */
++ char pkt_slab_name[64];
++ struct kmem_cache *pkt_slab;
++
++ /* as packets go on the queued queue, they are counted... */
++ u32 counter;
++ u32 sent_counter;
++ /* pending packets, not sending yet */
++ u32 num_pending;
++ /* sending packets, not complete yet */
++ u32 num_sending;
++ /* global descq number of entry of last sending packet */
++ u64 added;
++
++ /* dma page table */
++ struct rb_root dma_pages_root;
++
++ struct qib_user_sdma_rb_node *sdma_rb_node;
++
++ /* protect everything above... */
++ struct mutex lock;
++};
+
+ struct qib_user_sdma_queue *
+ qib_user_sdma_queue_create(struct device *dev, int unit, int port, int sport);
+ void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq);
+-
++void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq,
++ size_t len, dma_addr_t *dma_addr);
++void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
++ int i, u16 offset, u16 len,
++ u16 first_desc, u16 last_desc,
++ u16 put_page, u16 dma_mapped,
++ struct page *page, void *kvaddr,
++ dma_addr_t dma_addr, u16 dma_length);
++int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
++ struct qib_user_sdma_queue *pq,
++ struct qib_user_sdma_pkt *pkt,
++ struct page *page, u16 put,
++ u16 offset, u16 len, void *kvaddr);
++int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd);
++int qib_user_sdma_queue_clean(struct qib_pportdata *ppd,
++ struct qib_user_sdma_queue *pq);
++void qib_user_sdma_free_pkt_list(struct device *dev,
++ struct qib_user_sdma_queue *pq,
++ struct list_head *list);
++int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
++ struct qib_user_sdma_queue *pq,
++ struct list_head *pktlist, int count);
+ int qib_user_sdma_writev(struct qib_ctxtdata *pd,
+ struct qib_user_sdma_queue *pq,
+ const struct iovec *iov,
+@@ -50,3 +146,9 @@ void qib_user_sdma_queue_drain(struct qib_pportdata *ppd,
+
+ u32 qib_user_sdma_complete_counter(const struct qib_user_sdma_queue *pq);
+ u32 qib_user_sdma_inflight_counter(struct qib_user_sdma_queue *pq);
++
++/*
++ * This function prototype somewhat polutes this header file
++ * but I don't want to create a new header file just for it.
++ */
++void qib_knx_sdma_free_pkt(struct qib_user_sdma_pkt *pkt);
+++ /dev/null
-From 6975d8e44fc3f04c14cf4f83e2df6f69a25546dc Mon Sep 17 00:00:00 2001
-From: Jubin John <jubin.john@intel.com>
-Date: Fri, 26 Sep 2014 09:41:32 -0700
-Subject: [PATCH] Updates to qib driver
-
----
- drivers/infiniband/hw/qib/Makefile | 2 +-
- drivers/infiniband/hw/qib/qib.h | 172 +++++-
- drivers/infiniband/hw/qib/qib_driver.c | 223 +++++++-
- drivers/infiniband/hw/qib/qib_file_ops.c | 166 ++++--
- drivers/infiniband/hw/qib/qib_iba6120.c | 12 +-
- drivers/infiniband/hw/qib/qib_iba7220.c | 20 +-
- drivers/infiniband/hw/qib/qib_iba7322.c | 122 ++--
- drivers/infiniband/hw/qib/qib_init.c | 118 +++--
- drivers/infiniband/hw/qib/qib_knx.c | 721 +++++++++++++++++++--
- drivers/infiniband/hw/qib/qib_knx.h | 13 +-
- drivers/infiniband/hw/qib/qib_knx_common.h | 126 ++++
- drivers/infiniband/hw/qib/qib_knx_sdma.h | 105 ---
- drivers/infiniband/hw/qib/qib_knx_tidrcv.h | 48 --
- drivers/infiniband/hw/qib/qib_mad.c | 3 +-
- drivers/infiniband/hw/qib/qib_pcie.c | 21 +-
- drivers/infiniband/hw/qib/qib_qp.c | 6 +-
- drivers/infiniband/hw/qib/qib_sdma.c | 11 +-
- drivers/infiniband/hw/qib/qib_snoop.c | 970 ++++++++++++++++++++++++++++
- drivers/infiniband/hw/qib/qib_user_sdma.c | 296 +++++----
- drivers/infiniband/hw/qib/qib_user_sdma.h | 105 +++-
- drivers/infiniband/hw/qib/qib_verbs.c | 116 ++++-
- 21 files changed, 2831 insertions(+), 545 deletions(-)
- create mode 100644 drivers/infiniband/hw/qib/qib_knx_common.h
- delete mode 100644 drivers/infiniband/hw/qib/qib_knx_sdma.h
- delete mode 100644 drivers/infiniband/hw/qib/qib_knx_tidrcv.h
- create mode 100644 drivers/infiniband/hw/qib/qib_snoop.c
-
-diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile
-index ba2a49d..047d191 100644
---- a/drivers/infiniband/hw/qib/Makefile
-+++ b/drivers/infiniband/hw/qib/Makefile
-@@ -6,7 +6,7 @@ ib_qib-y := qib_cq.o qib_diag.o qib_dma.o qib_driver.o qib_eeprom.o \
- qib_qp.o qib_qsfp.o qib_rc.o qib_ruc.o qib_sdma.o qib_srq.o \
- qib_sysfs.o qib_twsi.o qib_tx.o qib_uc.o qib_ud.o \
- qib_user_pages.o qib_user_sdma.o qib_verbs_mcast.o qib_iba7220.o \
-- qib_sd7220.o qib_iba7322.o qib_verbs.o
-+ qib_sd7220.o qib_iba7322.o qib_snoop.o qib_verbs.o
-
- # 6120 has no fallback if no MSI interrupts, others can do INTx
- ib_qib-$(CONFIG_PCI_MSI) += qib_iba6120.o
-diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
-index ad87abd..e34b0f7 100644
---- a/drivers/infiniband/hw/qib/qib.h
-+++ b/drivers/infiniband/hw/qib/qib.h
-@@ -52,6 +52,7 @@
- #include <linux/kref.h>
- #include <linux/sched.h>
- #include <linux/kthread.h>
-+#include <linux/moduleparam.h>
-
- #include "qib_common.h"
- #include "qib_verbs.h"
-@@ -247,6 +248,10 @@ struct qib_ctxtdata {
- u32 lookaside_qpn;
- /* QPs waiting for context processing */
- struct list_head qp_wait_list;
-+#ifdef QIB_CONFIG_KNX
-+ /* KNX Receive Context Data */
-+ struct qib_knx_ctxt *krcd;
-+#endif
- #ifdef CONFIG_DEBUG_FS
- /* verbs stats per CTX */
- struct qib_opcode_stats_perctx *opstats;
-@@ -546,6 +551,11 @@ struct xmit_wait {
- * clarifies things a bit. Note that to conform to IB conventions,
- * port-numbers are one-based. The first or only port is port1.
- */
-+#define QIB_CHAR_DEVICES_PER_PORT 2
-+/* Extract packet length from LRH header */
-+#define QIB_GET_PKT_LEN(x) (((be16_to_cpu((x)->lrh[2]) & 0x7FF)) << 2)
-+#define QIB_SNOOP_DEV_INDEX 0
-+#define QIB_CAPTURE_DEV_INDEX 1
- struct qib_pportdata {
- struct qib_ibport ibport_data;
-
-@@ -656,6 +666,7 @@ struct qib_pportdata {
- u8 link_speed_active;
- u8 vls_supported;
- u8 vls_operational;
-+ u8 n_krcv_queues;
- /* Rx Polarity inversion (compensate for ~tx on partner) */
- u8 rx_pol_inv;
-
-@@ -675,6 +686,22 @@ struct qib_pportdata {
- struct xmit_wait cong_stats;
- struct timer_list symerr_clear_timer;
-
-+ /* snoop/capture related fields */
-+ unsigned int mode_flag;
-+ void *filter_value;
-+ int (*filter_callback)(void *hdr, void *data, void *value);
-+ /* lock while sending packet out */
-+ spinlock_t snoop_write_lock;
-+ struct qib_aux_device {
-+ struct cdev *snoop_cdev;
-+ struct device *snoop_class_dev;
-+ /* snooping lock */
-+ spinlock_t snoop_lock;
-+ struct list_head snoop_queue;
-+ wait_queue_head_t snoop_waitq;
-+ struct qib_pportdata *pport;
-+ } sc_device[QIB_CHAR_DEVICES_PER_PORT];
-+
- /* Synchronize access between driver writes and sysfs reads */
- spinlock_t cc_shadow_lock
- ____cacheline_aligned_in_smp;
-@@ -755,14 +782,14 @@ struct qib_devdata {
-
- /* mem-mapped base of chip regs plus offset of the SendBufAvail0
- * register
-- */
-+ */
- u64 sendbufavail0;
-
- /* end of mem-mapped chip space excluding sendbuf and user regs */
- u64 __iomem *kregend;
- /* physical address of chip for io_remap, etc. */
- resource_size_t physaddr;
-- /* qib_cfgctxts pointers */
-+ /* cfgctxts pointers */
- struct qib_ctxtdata **rcd; /* Receive Context Data */
-
- /* qib_pportdata, points to array of (physical) port-specific
-@@ -1079,7 +1106,6 @@ struct qib_devdata {
- u8 num_pports;
- /* Lowest context number which can be used by user processes */
- u8 first_user_ctxt;
-- u8 n_krcv_queues;
- u8 qpn_mask;
- u8 skip_kctxt_mask;
-
-@@ -1126,13 +1152,119 @@ struct qib_devdata {
- int assigned_node_id; /* NUMA node closest to HCA */
-
- #ifdef QIB_CONFIG_KNX
-- /* peer node id of connected KNX node */
-- u16 node_id;
-- struct qib_knx *knx;
-+ /* number of KNx nodes using this device */
-+ u16 num_knx;
- #endif
-+};
-
-+enum qib_mod_param_t {
-+ qib_mod_param_drv,
-+ qib_mod_param_unit,
-+ qib_mod_param_port
- };
-
-+typedef int (*param_set_func_t)(struct qib_devdata *, u8, u64);
-+
-+struct qib_mod_param {
-+ const char *name;
-+ enum qib_mod_param_t type;
-+ param_set_func_t func;
-+ ulong dflt;
-+ struct list_head list;
-+ struct list_head pport;
-+};
-+
-+extern int qib_set_mod_param(const char *, struct kernel_param *);
-+extern int qib_get_mod_param(char *, struct kernel_param *);
-+extern u64 qib_read_mod_param(struct qib_mod_param *, u16, u8);
-+extern void qib_clean_mod_param(void);
-+
-+#define MAX_QIB_PARAM_LEN 128
-+/**
-+ * QIB_MODPARAM_GLOBAL - define a global module parameter
-+ * @N: name of the module parameter
-+ *
-+ * Define a global module parameter for use in multiple files.
-+ */
-+#define QIB_MODPARAM_GLOBAL(N) \
-+extern struct qib_mod_param qmp_##N
-+/**
-+ * QIB_MODPARAM_DRV - define a driver-scope module parameter
-+ * @N: name of the module parameter
-+ * @D: default value
-+ * @P: visibility in sysfs
-+ * @S: description
-+ *
-+ * Define a driver-scope (global to the driver instance) module
-+ * parameter.
-+ */
-+#define QIB_MODPARAM_DRV(N, D, P, S) \
-+ struct qib_mod_param qmp_##N = { \
-+ .name = __stringify(N), \
-+ .type = qib_mod_param_drv, \
-+ .dflt = (ulong)D, \
-+ .pport = { NULL, NULL } \
-+ }; \
-+ module_param_named(N, qmp_##N.dflt, ulong, P); \
-+ MODULE_PARM_DESC(N, S " (dflt: " __stringify(D) ")")
-+/**
-+ * QIB_MODPARAM_UNIT - define a unit-scope module parameter
-+ * @N: name of the module parameter
-+ * @F: callback function for dynamic value settings
-+ * @D: default value
-+ * @P: visibility in sysfs
-+ * @D: description
-+ *
-+ * Define a unit-scope module parameter. Unit-scope module
-+ * parameters allows specifying individual values for each of the
-+ * QIB units.
-+ */
-+#define QIB_MODPARAM_UNIT(N, F, D, P, S) \
-+ struct qib_mod_param qmp_##N = { \
-+ .name = __stringify(N), \
-+ .func = ((P) & S_IWUGO ? F : NULL), \
-+ .type = qib_mod_param_unit, \
-+ .dflt = (ulong)D, \
-+ .pport = { NULL, NULL } \
-+ }; \
-+ module_param_call(N, qib_set_mod_param, qib_get_mod_param, \
-+ &qmp_##N, (P)); \
-+ MODULE_PARM_DESC(N, S " (dflt: " __stringify(D) ")")
-+/**
-+ * QIB_MODPARAM_PORT - define a port-scope module parameter
-+ * @N: name of the module parameter
-+ * @F: callback function for dynamic value settings
-+ * @D: default value
-+ * @P: visibility in sysfs
-+ * @D: description
-+ *
-+ * Define a port-scope module parameter. Port-scope module
-+ * parameters allow specifying individual values foe each of the
-+ * ports on any of the QIB units.
-+ */
-+#define QIB_MODPARAM_PORT(N, F, D, P, S) \
-+ struct qib_mod_param qmp_##N = { \
-+ .name = __stringify(N), \
-+ .func = ((P) & S_IWUGO ? F : NULL), \
-+ .type = qib_mod_param_port, \
-+ .dflt = (ulong)D, \
-+ .pport = { NULL, NULL } \
-+ }; \
-+ module_param_call(N, qib_set_mod_param, qib_get_mod_param, \
-+ &qmp_##N, (P)); \
-+ MODULE_PARM_DESC(N, S " (dflt: " __stringify(D) ")")
-+/**
-+ * QIB_MODPARAM_GET - retrieve a module parameter value
-+ * @N: name of the module parameter
-+ * @U: unit number
-+ * @P: port number
-+ *
-+ * Get the value for the specific unit/port. The macro will return
-+ * the correct value regardless of a specific value for the
-+ * specified unit/port is present or the default should be used.
-+ */
-+#define QIB_MODPARAM_GET(N, U, P) qib_read_mod_param(&qmp_##N, U, P)
-+
- /* hol_state values */
- #define QIB_HOL_UP 0
- #define QIB_HOL_INIT 1
-@@ -1165,12 +1297,14 @@ struct qib_filedata {
- };
-
- extern struct list_head qib_dev_list;
-+extern struct list_head qib_mod_param_list;
- extern spinlock_t qib_devs_lock;
- extern struct qib_devdata *qib_lookup(int unit);
- extern u32 qib_cpulist_count;
- extern unsigned long *qib_cpulist;
-
- extern unsigned qib_wc_pat;
-+extern unsigned int snoop_enable;
- extern unsigned qib_cc_table_size;
- int qib_init(struct qib_devdata *, int);
- int init_chip_wc_pat(struct qib_devdata *dd, u32);
-@@ -1230,6 +1364,24 @@ void qib_hol_event(unsigned long);
- void qib_disable_after_error(struct qib_devdata *);
- int qib_set_uevent_bits(struct qib_pportdata *, const int);
-
-+#define QIB_PORT_SNOOP_MODE 1U
-+#define QIB_PORT_CAPTURE_MODE 2U
-+
-+struct snoop_packet {
-+ struct list_head list;
-+ u32 total_len;
-+ u8 data[];
-+};
-+
-+int qib_snoop_add(struct qib_devdata *);
-+void qib_snoop_remove(struct qib_devdata *);
-+int qib_snoop_rcv_queue_packet(struct qib_pportdata *, void *,
-+ void *, u32);
-+void qib_snoop_send_queue_packet(struct qib_pportdata *,
-+ struct snoop_packet *);
-+int snoop_get_header_size(struct qib_devdata *, struct qib_ib_header *,
-+ void *, u32);
-+
- /* for use in system calls, where we want to know device type, etc. */
- #define ctxt_fp(fp) \
- (((struct qib_filedata *)(fp)->private_data)->rcd)
-@@ -1367,7 +1519,7 @@ void qib_sdma_intr(struct qib_pportdata *);
- void qib_user_sdma_send_desc(struct qib_pportdata *dd,
- struct list_head *pktlist);
- int qib_sdma_verbs_send(struct qib_pportdata *, struct qib_sge_state *,
-- u32, struct qib_verbs_txreq *);
-+ u32, struct qib_verbs_txreq *, struct snoop_packet *);
- /* ppd->sdma_lock should be locked before calling this. */
- int qib_sdma_make_progress(struct qib_pportdata *dd);
-
-@@ -1505,9 +1657,9 @@ const char *qib_get_unit_name(int unit);
- #endif
-
- /* global module parameter variables */
--extern unsigned qib_ibmtu;
--extern ushort qib_cfgctxts;
--extern ushort qib_num_cfg_vls;
-+QIB_MODPARAM_GLOBAL(ibmtu);
-+QIB_MODPARAM_GLOBAL(cfgctxts);
-+QIB_MODPARAM_GLOBAL(krcvqs);
- extern ushort qib_mini_init; /* If set, do few (ideally 0) writes to chip */
- extern unsigned qib_n_krcv_queues;
- extern unsigned qib_sdma_fetch_arb;
-diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
-index 5bee08f..e5fb836 100644
---- a/drivers/infiniband/hw/qib/qib_driver.c
-+++ b/drivers/infiniband/hw/qib/qib_driver.c
-@@ -43,6 +43,9 @@
-
- #include "qib.h"
-
-+#undef pr_fmt
-+#define pr_fmt(fmt) QIB_DRV_NAME " " fmt
-+
- /*
- * The size has to be longer than this string, so we can append
- * board/chip information to it in the init code.
-@@ -51,11 +54,21 @@ const char ib_qib_version[] = QIB_DRIVER_VERSION "\n";
-
- DEFINE_SPINLOCK(qib_devs_lock);
- LIST_HEAD(qib_dev_list);
-+LIST_HEAD(qib_mod_param_list);
- DEFINE_MUTEX(qib_mutex); /* general driver use */
-
--unsigned qib_ibmtu;
--module_param_named(ibmtu, qib_ibmtu, uint, S_IRUGO);
--MODULE_PARM_DESC(ibmtu, "Set max IB MTU (0=2KB, 1=256, 2=512, ... 5=4096");
-+/* Per-unit/port module parameter value structure
-+ * linked to the qib_mod_param structure - one per
-+ * unit/port */
-+struct qib_mod_param_pport {
-+ struct list_head list;
-+ u16 unit;
-+ u8 port;
-+ u64 value;
-+};
-+
-+QIB_MODPARAM_PORT(ibmtu, NULL, 5, S_IRUGO,
-+ "Set max IB MTU (0=2KB, 1=256, 2=512, ... 5=4096");
-
- unsigned qib_compat_ddr_negotiate = 1;
- module_param_named(compat_ddr_negotiate, qib_compat_ddr_negotiate, uint,
-@@ -90,6 +103,178 @@ const char *qib_get_unit_name(int unit)
- return iname;
- }
-
-+int qib_set_mod_param(const char *str, struct kernel_param *kp)
-+{
-+ char *next = (char *)str, *tmp;
-+ unsigned long val = 0, dft;
-+ u32 unit = 0, port = 0;
-+ struct qib_mod_param *param =
-+ (struct qib_mod_param *)kp->arg;
-+ struct qib_mod_param_pport *pport, *p;
-+ int ret = 0;
-+
-+ if (strlen(str) >= MAX_QIB_PARAM_LEN) {
-+ pr_warn("parameter value too long\n");
-+ ret = -ENOSPC;
-+ goto done;
-+ }
-+
-+ /* qib_dev_list will be empty only when the driver is initially
-+ * loading. */
-+ if (list_empty(&qib_dev_list) || !param->pport.next)
-+ INIT_LIST_HEAD(¶m->pport);
-+ tmp = next;
-+ dft = simple_strtoul(tmp, &next, 0);
-+ if (next == tmp) {
-+ pr_warn("invalid parameter value\n");
-+ ret = -EINVAL;
-+ goto done;
-+ }
-+ /* clear any previously added port entries */
-+ list_for_each_entry_safe(pport, p, ¶m->pport, list) {
-+ list_del(&pport->list);
-+ kfree(pport);
-+ }
-+ if (!*next || *next == '\n' || *next == ',')
-+ param->dflt = dft;
-+ else if (*next && *next == ':')
-+ /* no default, rewind the string */
-+ next = tmp;
-+ else
-+ pr_warn("invalid parameter value\n");
-+ while (*next && next[1]) {
-+ if (*next == ',')
-+ tmp = ++next;
-+ unit = simple_strtoul(tmp, &next, 0);
-+ if (param->type == qib_mod_param_port) {
-+ if (next == tmp || !*next || *next != ':') {
-+ pr_warn("Invalid unit:port argument at \"%s\".\n",
-+ tmp);
-+ while (*next && *next++ != ',')
-+ ;
-+ tmp = next;
-+ continue;
-+ }
-+ tmp = ++next;
-+ port = simple_strtoul(tmp, &next, 0);
-+ if (!port) {
-+ /* port numbers start at 1, 0 is invalid */
-+ pr_warn("Invalid argument at \"%s\". Port numbers start at 1.\n",
-+ tmp);
-+ while (*next && *next++ != ',')
-+ ;
-+ tmp = next;
-+ continue;
-+ }
-+ }
-+ if (next == tmp || *next != '=') {
-+ pr_warn("Invalid %s argument at \"%s\".\n",
-+ (param->type == qib_mod_param_port ?
-+ "port" : "unit"), tmp);
-+ while (*next && *next++ != ',')
-+ ;
-+ tmp = next;
-+ continue;
-+ }
-+ tmp = ++next;
-+ val = simple_strtoul(tmp, &next, 0);
-+ if (next == tmp) {
-+ pr_warn("Invalid value string at \"%s\"\n", tmp);
-+ while (*next && *next++ != ',')
-+ ;
-+ tmp = next;
-+ continue;
-+ }
-+ pport = kzalloc(sizeof(struct qib_mod_param_pport),
-+ GFP_KERNEL);
-+ if (!pport) {
-+ pr_err("no memory for module parameter.\n");
-+ ret = -ENOMEM;
-+ goto done;
-+ }
-+ pport->unit = unit;
-+ pport->port = port;
-+ pport->value = val;
-+ list_add_tail(&pport->list, ¶m->pport);
-+ if (!*next || *next == '\n')
-+ break;
-+ tmp = ++next;
-+ }
-+ /* add parameter to list so it can be cleaned up */
-+ if (!param->list.next)
-+ list_add(¶m->list, &qib_mod_param_list);
-+
-+ if (param->func && qib_count_units(NULL, NULL)) {
-+ struct qib_devdata *dd;
-+ list_for_each_entry(pport, ¶m->pport, list) {
-+ param_set_func_t setfunc = param->func;
-+ list_for_each_entry(dd, &qib_dev_list, list)
-+ if (dd->unit == pport->unit)
-+ break;
-+ if (!setfunc(dd, pport->port, pport->value))
-+ pr_err("Error setting module parameter %s for IB%u:%u",
-+ param->name,
-+ pport->unit,
-+ pport->port);
-+ }
-+ }
-+done:
-+ return ret;
-+}
-+
-+int qib_get_mod_param(char *buffer, struct kernel_param *kp)
-+{
-+ struct qib_mod_param *param =
-+ (struct qib_mod_param *)kp->arg;
-+ struct qib_mod_param_pport *pport;
-+ char *p = buffer;
-+ int s = 0;
-+
-+ s = scnprintf(p, PAGE_SIZE, "%lu", param->dflt);
-+ p += s;
-+
-+ if (param->pport.next)
-+ list_for_each_entry(pport, ¶m->pport, list) {
-+ *p++ = ',';
-+ if (param->type == qib_mod_param_unit)
-+ s = scnprintf(p, PAGE_SIZE, "%u=%llu",
-+ pport->unit, pport->value);
-+ else if (param->type == qib_mod_param_port)
-+ s = scnprintf(p, PAGE_SIZE, "%u:%u=%llu",
-+ pport->unit, pport->port,
-+ pport->value);
-+ p += s;
-+ }
-+ return strlen(buffer);
-+}
-+
-+u64 qib_read_mod_param(struct qib_mod_param *param, u16 unit, u8 port)
-+{
-+ struct qib_mod_param_pport *pport;
-+ u64 ret = param->dflt;
-+
-+ if (param->type != qib_mod_param_drv)
-+ if (param->pport.next && !list_empty(¶m->pport))
-+ list_for_each_entry(pport, ¶m->pport, list)
-+ if (pport->unit == unit &&
-+ pport->port == port)
-+ ret = pport->value;
-+ return ret;
-+}
-+
-+void qib_clean_mod_param(void)
-+{
-+ struct qib_mod_param *p;
-+ struct qib_mod_param_pport *pp, *pps;
-+
-+ list_for_each_entry(p, &qib_mod_param_list, list) {
-+ list_for_each_entry_safe(pp, pps, &p->pport, list) {
-+ list_del(&pp->list);
-+ kfree(pp);
-+ }
-+ }
-+}
-+
- /*
- * Return count of units with at least one port ACTIVE.
- */
-@@ -456,6 +641,8 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
- int last;
- u64 lval;
- struct qib_qp *qp, *nqp;
-+ struct snoop_packet *packet = NULL;
-+ u32 hdr_len = 0;
-
- l = rcd->head;
- rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset;
-@@ -478,6 +665,25 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
- /* total length */
- tlen = qib_hdrget_length_in_bytes(rhf_addr);
- ebuf = NULL;
-+ /* applicable only for capture */
-+ if (unlikely(ppd->mode_flag & QIB_PORT_CAPTURE_MODE)) {
-+ int nomatch = 0;
-+ /* We want to filter packet before copying it */
-+ if (ppd->filter_callback)
-+ nomatch = ppd->filter_callback(hdr, ebuf,
-+ ppd->filter_value);
-+ if (nomatch == 0) {
-+ packet = kzalloc(sizeof(*packet) + tlen,
-+ GFP_ATOMIC);
-+ if (packet) {
-+ /* copy header first */
-+ packet->total_len = tlen;
-+ INIT_LIST_HEAD(&packet->list);
-+ hdr_len = (u8 *)rhf_addr - (u8 *)hdr;
-+ memcpy(packet->data, hdr, hdr_len);
-+ }
-+ }
-+ }
- if ((dd->flags & QIB_NODMA_RTAIL) ?
- qib_hdrget_use_egr_buf(rhf_addr) :
- (etype != RCVHQ_RCV_TYPE_EXPECTED)) {
-@@ -512,6 +718,10 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
- crcs += qib_rcv_hdrerr(rcd, ppd, rcd->ctxt, eflags, l,
- etail, rhf_addr, hdr);
- else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
-+ /* copy packet data */
-+ if (ebuf && packet)
-+ memcpy((packet->data + hdr_len), ebuf,
-+ (tlen - hdr_len));
- qib_ib_rcv(rcd, hdr, ebuf, tlen);
- if (crcs)
- crcs--;
-@@ -519,6 +729,10 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
- --*llic;
- }
- move_along:
-+ if (packet) {
-+ qib_snoop_send_queue_packet(ppd, packet);
-+ packet = NULL;
-+ }
- l += rsize;
- if (l >= maxcnt)
- l = 0;
-@@ -619,7 +833,8 @@ int qib_set_mtu(struct qib_pportdata *ppd, u16 arg)
- ret = -EINVAL;
- goto bail;
- }
-- chk = ib_mtu_enum_to_int(qib_ibmtu);
-+ chk = ib_mtu_enum_to_int(
-+ QIB_MODPARAM_GET(ibmtu, ppd->dd->unit, ppd->port));
- if (chk > 0 && arg > chk) {
- ret = -EINVAL;
- goto bail;
-diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
-index 6eebad0..376961d 100644
---- a/drivers/infiniband/hw/qib/qib_file_ops.c
-+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
-@@ -95,6 +95,9 @@ static ssize_t qib_aio_write(struct kiocb *, const struct iovec *,
- unsigned long, loff_t);
- static unsigned int qib_poll(struct file *, struct poll_table_struct *);
- static int qib_mmapf(struct file *, struct vm_area_struct *);
-+static int subctxt_search_ctxts(struct qib_devdata *, struct file *,
-+ const struct qib_user_info *);
-+
-
- static const struct file_operations qib_file_ops = {
- .owner = THIS_MODULE,
-@@ -1547,6 +1550,14 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
-
- rcd = qib_create_ctxtdata(ppd, ctxt, numa_id);
-
-+#ifdef QIB_CONFIG_KNX
-+ if (uinfo->spu_knx_node_id)
-+ /*
-+ * Skip allocation of page pointer list for TID
-+ * receives. This will be done on the KNX.
-+ */
-+ goto no_page_list;
-+#endif
- /*
- * Allocate memory for use in qib_tid_update() at open to
- * reduce cost of expected send setup per message segment
-@@ -1562,6 +1573,9 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
- ret = -ENOMEM;
- goto bailerr;
- }
-+#ifdef QIB_CONFIG_KNX
-+no_page_list:
-+#endif
- rcd->userversion = uinfo->spu_userversion;
-
- ret = init_subctxts(dd, rcd, uinfo);
-@@ -1720,52 +1734,66 @@ done:
- static int find_shared_ctxt(struct file *fp,
- const struct qib_user_info *uinfo)
- {
-- int devmax, ndev, i;
-+ int devmax, ndev;
- int ret = 0;
-+ struct qib_devdata *dd;
-
-+#ifdef QIB_CONFIG_KNX
-+ /*
-+ * In the case we are allocating a context for a KNX process,
-+ * Don't loop over all devices but use the one assosiated with the
-+ * requesting KNX.
-+ */
-+ if (uinfo->spu_knx_node_id) {
-+ dd = qib_knx_node_to_dd(uinfo->spu_knx_node_id);
-+ if (dd && dd->num_knx)
-+ ret = subctxt_search_ctxts(dd, fp, uinfo);
-+ goto done;
-+ }
-+#endif
- devmax = qib_count_units(NULL, NULL);
-
- for (ndev = 0; ndev < devmax; ndev++) {
-- struct qib_devdata *dd = qib_lookup(ndev);
--#ifdef QIB_CONFIG_KNX
-- /*
-- * In the case we are allocating a context for a KNX process,
-- * reject any device that is not associated with the
-- * requesting KNX.
-- */
-- if ((uinfo->spu_knx_node_id &&
-- dd->node_id != uinfo->spu_knx_node_id))
-- continue;
--#endif
-+ dd = qib_lookup(ndev);
- /* device portion of usable() */
- if (!(dd && (dd->flags & QIB_PRESENT) && dd->kregbase))
- continue;
-- for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) {
-- struct qib_ctxtdata *rcd = dd->rcd[i];
-+ ret = subctxt_search_ctxts(dd, fp, uinfo);
-+ if (ret)
-+ break;
-+ }
-+done:
-+ return ret;
-+}
-
-- /* Skip ctxts which are not yet open */
-- if (!rcd || !rcd->cnt)
-- continue;
-- /* Skip ctxt if it doesn't match the requested one */
-- if (rcd->subctxt_id != uinfo->spu_subctxt_id)
-- continue;
-- /* Verify the sharing process matches the master */
-- if (rcd->subctxt_cnt != uinfo->spu_subctxt_cnt ||
-- rcd->userversion != uinfo->spu_userversion ||
-- rcd->cnt >= rcd->subctxt_cnt) {
-- ret = -EINVAL;
-- goto done;
-- }
-- ctxt_fp(fp) = rcd;
-- subctxt_fp(fp) = rcd->cnt++;
-- rcd->subpid[subctxt_fp(fp)] = current->pid;
-- tidcursor_fp(fp) = 0;
-- rcd->active_slaves |= 1 << subctxt_fp(fp);
-- ret = 1;
-+static int subctxt_search_ctxts(struct qib_devdata *dd, struct file *fp,
-+ const struct qib_user_info *uinfo)
-+{
-+ int ret = 0, i;
-+ for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) {
-+ struct qib_ctxtdata *rcd = dd->rcd[i];
-+
-+ /* Skip ctxts which are not yet open */
-+ if (!rcd || !rcd->cnt)
-+ continue;
-+ /* Skip ctxt if it doesn't match the requested one */
-+ if (rcd->subctxt_id != uinfo->spu_subctxt_id)
-+ continue;
-+ /* Verify the sharing process matches the master */
-+ if (rcd->subctxt_cnt != uinfo->spu_subctxt_cnt ||
-+ rcd->userversion != uinfo->spu_userversion ||
-+ rcd->cnt >= rcd->subctxt_cnt) {
-+ ret = -EINVAL;
- goto done;
- }
-+ ctxt_fp(fp) = rcd;
-+ subctxt_fp(fp) = rcd->cnt++;
-+ rcd->subpid[subctxt_fp(fp)] = current->pid;
-+ tidcursor_fp(fp) = 0;
-+ rcd->active_slaves |= 1 << subctxt_fp(fp);
-+ ret = 1;
-+ break;
- }
--
- done:
- return ret;
- }
-@@ -1856,6 +1884,10 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
-
- if (swminor >= 11 && uinfo->spu_port_alg < QIB_PORT_ALG_COUNT)
- alg = uinfo->spu_port_alg;
-+ if (swminor <= 11) {
-+ qib_pio_avail_bits = 1;
-+ qib_rcvhdrpoll = 1;
-+ }
-
- #ifdef QIB_CONFIG_KNX
- /* Make sure we have a connection to the KNX module on the right node */
-@@ -1871,13 +1903,38 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
- uinfo->spu_subctxt_cnt) {
- ret = find_shared_ctxt(fp, uinfo);
- if (ret > 0) {
-- ret = do_qib_user_sdma_queue_create(fp);
-+#ifdef QIB_CONFIG_KNX
-+ if (uinfo->spu_knx_node_id) {
-+ ret = qib_knx_sdma_queue_create(fp);
-+ } else
-+#endif
-+ ret = do_qib_user_sdma_queue_create(fp);
- if (!ret)
- assign_ctxt_affinity(fp, (ctxt_fp(fp))->dd);
- goto done_ok;
- }
- }
-
-+#ifdef QIB_CONFIG_KNX
-+ /*
-+ * If there is a KNX node set, we pick the device that is
-+ * associate with that KNX node
-+ */
-+ if (uinfo->spu_knx_node_id) {
-+ struct qib_devdata *dd =
-+ qib_knx_node_to_dd(uinfo->spu_knx_node_id);
-+ if (dd) {
-+ ret = find_free_ctxt(dd->unit, fp, uinfo);
-+ if (!ret)
-+ ret = qib_knx_alloc_ctxt(
-+ uinfo->spu_knx_node_id,
-+ ctxt_fp(fp)->ctxt);
-+ } else
-+ ret = -ENXIO;
-+ goto done_chk_sdma;
-+ }
-+
-+#endif
- i_minor = iminor(file_inode(fp)) - QIB_USER_MINOR_BASE;
- if (i_minor)
- ret = find_free_ctxt(i_minor - 1, fp, uinfo);
-@@ -1886,25 +1943,6 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
- const unsigned int cpu = cpumask_first(¤t->cpus_allowed);
- const unsigned int weight =
- cpumask_weight(¤t->cpus_allowed);
--#ifdef QIB_CONFIG_KNX
-- /*
-- * If there is a KNX node set, we pick the device that is on
-- * the same NUMA node as the KNX.
-- */
-- if (uinfo->spu_knx_node_id) {
-- struct qib_devdata *dd =
-- qib_knx_node_to_dd(uinfo->spu_knx_node_id);
-- if (dd) {
-- ret = find_free_ctxt(dd->unit, fp, uinfo);
-- if (!ret)
-- ret = qib_knx_alloc_ctxt(dd,
-- ctxt_fp(fp)->ctxt);
-- } else
-- ret = -ENXIO;
-- goto done_chk_sdma;
-- }
--#endif
--
- if (weight == 1 && !test_bit(cpu, qib_cpulist))
- if (!find_hca(cpu, &unit) && unit >= 0)
- if (!find_free_ctxt(unit, fp, uinfo)) {
-@@ -1915,8 +1953,17 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
- }
-
- done_chk_sdma:
-- if (!ret)
-+ if (!ret) {
-+#ifdef QIB_CONFIG_KNX
-+ if (uinfo->spu_knx_node_id) {
-+ ret = qib_knx_sdma_queue_create(fp);
-+ /*if (!ret)
-+ ret = qib_knx_setup_tidrcv(fp);*/
-+ goto done_ok;
-+ }
-+#endif
- ret = do_qib_user_sdma_queue_create(fp);
-+ }
- done_ok:
- #ifdef QIB_CONFIG_KNX
- knx_node_fp(fp) = uinfo->spu_knx_node_id;
-@@ -2145,6 +2192,13 @@ static int qib_close(struct inode *in, struct file *fp)
-
- /* drain user sdma queue */
- if (fd->pq) {
-+#ifdef QIB_CONFIG_KNX
-+ /*
-+ * The thread should be stopped first before attempting
-+ * to clean the queue.
-+ */
-+ qib_knx_sdma_queue_destroy(fd);
-+#endif
- qib_user_sdma_queue_drain(rcd->ppd, fd->pq);
- qib_user_sdma_queue_destroy(fd->pq);
- }
-@@ -2737,4 +2791,6 @@ void qib_device_remove(struct qib_devdata *dd)
- {
- qib_user_remove(dd);
- qib_diag_remove(dd);
-+ if (snoop_enable)
-+ qib_snoop_remove(dd);
- }
-diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
-index 84e593d..9ab46ed 100644
---- a/drivers/infiniband/hw/qib/qib_iba6120.c
-+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
-@@ -2070,15 +2070,16 @@ qib_6120_get_msgheader(struct qib_devdata *dd, __le32 *rhf_addr)
-
- static void qib_6120_config_ctxts(struct qib_devdata *dd)
- {
-+ u32 nkrcvqs = QIB_MODPARAM_GET(krcvqs, dd->unit, 0);
- dd->ctxtcnt = qib_read_kreg32(dd, kr_portcnt);
-- if (qib_n_krcv_queues > 1) {
-- dd->first_user_ctxt = qib_n_krcv_queues * dd->num_pports;
-+ if (nkrcvqs > 1) {
-+ dd->first_user_ctxt = nkrcvqs * dd->num_pports;
- if (dd->first_user_ctxt > dd->ctxtcnt)
- dd->first_user_ctxt = dd->ctxtcnt;
- dd->qpn_mask = dd->first_user_ctxt <= 2 ? 2 : 6;
- } else
- dd->first_user_ctxt = dd->num_pports;
-- dd->n_krcv_queues = dd->first_user_ctxt;
-+ dd->pport[0].n_krcv_queues = dd->first_user_ctxt;
- }
-
- static void qib_update_6120_usrhead(struct qib_ctxtdata *rcd, u64 hd,
-@@ -3133,7 +3134,7 @@ static void get_6120_chip_params(struct qib_devdata *dd)
- dd->piosize2k = val & ~0U;
- dd->piosize4k = val >> 32;
-
-- mtu = ib_mtu_enum_to_int(qib_ibmtu);
-+ mtu = ib_mtu_enum_to_int(QIB_MODPARAM_GET(ibmtu, dd->unit, 1));
- if (mtu == -1)
- mtu = QIB_DEFAULT_MTU;
- dd->pport->ibmtu = (u32)mtu;
-@@ -3282,7 +3283,7 @@ static int init_6120_variables(struct qib_devdata *dd)
- dd->rhf_offset = 0;
-
- /* we always allocate at least 2048 bytes for eager buffers */
-- ret = ib_mtu_enum_to_int(qib_ibmtu);
-+ ret = ib_mtu_enum_to_int(QIB_MODPARAM_GET(ibmtu, dd->unit, 1));
- dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU;
- BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
- dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
-@@ -3322,7 +3323,6 @@ static int init_6120_variables(struct qib_devdata *dd)
- if (qib_mini_init)
- goto bail;
-
-- qib_num_cfg_vls = 1; /* if any 6120's, only one VL */
-
- ret = qib_create_ctxts(dd);
- init_6120_cntrnames(dd);
-diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
-index 454c2e7..19ad170 100644
---- a/drivers/infiniband/hw/qib/qib_iba7220.c
-+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
-@@ -2299,19 +2299,21 @@ static void qib_7220_config_ctxts(struct qib_devdata *dd)
- {
- unsigned long flags;
- u32 nchipctxts;
-+ u32 cfgctxts = QIB_MODPARAM_GET(cfgctxts, dd->unit, 0);
-+ u32 nkrcvqs = QIB_MODPARAM_GET(krcvqs, dd->unit, 0);
-
- nchipctxts = qib_read_kreg32(dd, kr_portcnt);
- dd->cspec->numctxts = nchipctxts;
-- if (qib_n_krcv_queues > 1) {
-+ if (nkrcvqs > 1) {
- dd->qpn_mask = 0x3e;
-- dd->first_user_ctxt = qib_n_krcv_queues * dd->num_pports;
-+ dd->first_user_ctxt = nkrcvqs * dd->num_pports;
- if (dd->first_user_ctxt > nchipctxts)
- dd->first_user_ctxt = nchipctxts;
- } else
- dd->first_user_ctxt = dd->num_pports;
-- dd->n_krcv_queues = dd->first_user_ctxt;
-+ dd->pport[0].n_krcv_queues = dd->first_user_ctxt;
-
-- if (!qib_cfgctxts) {
-+ if (!cfgctxts) {
- int nctxts = dd->first_user_ctxt + num_online_cpus();
-
- if (nctxts <= 5)
-@@ -2320,8 +2322,8 @@ static void qib_7220_config_ctxts(struct qib_devdata *dd)
- dd->ctxtcnt = 9;
- else if (nctxts <= nchipctxts)
- dd->ctxtcnt = nchipctxts;
-- } else if (qib_cfgctxts <= nchipctxts)
-- dd->ctxtcnt = qib_cfgctxts;
-+ } else if (cfgctxts <= nchipctxts)
-+ dd->ctxtcnt = cfgctxts;
- if (!dd->ctxtcnt) /* none of the above, set to max */
- dd->ctxtcnt = nchipctxts;
-
-@@ -3846,7 +3848,7 @@ static void get_7220_chip_params(struct qib_devdata *dd)
- dd->piosize2k = val & ~0U;
- dd->piosize4k = val >> 32;
-
-- mtu = ib_mtu_enum_to_int(qib_ibmtu);
-+ mtu = ib_mtu_enum_to_int(QIB_MODPARAM_GET(ibmtu, dd->unit, 1));
- if (mtu == -1)
- mtu = QIB_DEFAULT_MTU;
- dd->pport->ibmtu = (u32)mtu;
-@@ -4084,15 +4086,13 @@ static int qib_init_7220_variables(struct qib_devdata *dd)
- ppd->cpspec->chase_timer.function = reenable_7220_chase;
- ppd->cpspec->chase_timer.data = (unsigned long)ppd;
-
-- qib_num_cfg_vls = 1; /* if any 7220's, only one VL */
--
- dd->rcvhdrentsize = QIB_RCVHDR_ENTSIZE;
- dd->rcvhdrsize = QIB_DFLT_RCVHDRSIZE;
- dd->rhf_offset =
- dd->rcvhdrentsize - sizeof(u64) / sizeof(u32);
-
- /* we always allocate at least 2048 bytes for eager buffers */
-- ret = ib_mtu_enum_to_int(qib_ibmtu);
-+ ret = ib_mtu_enum_to_int(QIB_MODPARAM_GET(ibmtu, dd->unit, 1));
- dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU;
- BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
- dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
-diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
-index 016e742..35fc492 100644
---- a/drivers/infiniband/hw/qib/qib_iba7322.c
-+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
-@@ -107,9 +107,8 @@ static const unsigned sdma_idle_cnt = 64;
- * Number of VLs we are configured to use (to allow for more
- * credits per vl, etc.)
- */
--ushort qib_num_cfg_vls = 2;
--module_param_named(num_vls, qib_num_cfg_vls, ushort, S_IRUGO);
--MODULE_PARM_DESC(num_vls, "Set number of Virtual Lanes to use (1-8)");
-+static QIB_MODPARAM_PORT(num_vls, NULL, 2, S_IRUGO,
-+ "Set number of Virtual Lanes to use (1-8)");
-
- static ushort qib_chase = 1;
- module_param_named(chase, qib_chase, ushort, S_IRUGO);
-@@ -120,9 +119,8 @@ module_param_named(long_attenuation, qib_long_atten, ushort, S_IRUGO);
- MODULE_PARM_DESC(long_attenuation, \
- "attenuation cutoff (dB) for long copper cable setup");
-
--static ushort qib_singleport;
--module_param_named(singleport, qib_singleport, ushort, S_IRUGO);
--MODULE_PARM_DESC(singleport, "Use only IB port 1; more per-port buffer space");
-+static QIB_MODPARAM_UNIT(singleport, NULL, 0, S_IRUGO,
-+ "Use only IB port 1; more per-port buffer space");
-
- static ushort qib_krcvq01_no_msi;
- module_param_named(krcvq01_no_msi, qib_krcvq01_no_msi, ushort, S_IRUGO);
-@@ -2395,6 +2393,11 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd)
- qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a);
- qib_write_kreg(dd, kr_scratch, 0ULL);
-
-+ /* ensure previous Tx parameters are not still forced */
-+ qib_write_kreg_port(ppd, krp_tx_deemph_override,
-+ SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0,
-+ reset_tx_deemphasis_override));
-+
- if (qib_compat_ddr_negotiate) {
- ppd->cpspec->ibdeltainprog = 1;
- ppd->cpspec->ibsymsnap = read_7322_creg32_port(ppd,
-@@ -3515,7 +3518,8 @@ try_intx:
- snprintf(dd->cspec->msix_entries[msixnum].name,
- sizeof(dd->cspec->msix_entries[msixnum].name)
- - 1,
-- QIB_DRV_NAME "%d (kctx)", dd->unit);
-+ QIB_DRV_NAME "%d:%d (kctx)", dd->unit,
-+ ((struct qib_ctxtdata *)arg)->ppd->port);
- }
- ret = request_irq(
- dd->cspec->msix_entries[msixnum].msix.vector,
-@@ -3651,10 +3655,10 @@ static unsigned qib_7322_boardname(struct qib_devdata *dd)
- dd->majrev, dd->minrev,
- (unsigned)SYM_FIELD(dd->revision, Revision_R, SW));
-
-- if (qib_singleport && (features >> PORT_SPD_CAP_SHIFT) & PORT_SPD_CAP) {
-- qib_devinfo(dd->pcidev,
-- "IB%u: Forced to single port mode by module parameter\n",
-- dd->unit);
-+ if (QIB_MODPARAM_GET(singleport, dd->unit, 0) &&
-+ (features >> PORT_SPD_CAP_SHIFT) & PORT_SPD_CAP) {
-+ qib_devinfo(dd->pcidev, "IB%u: Forced to single port mode"
-+ " by module param\n", dd->unit);
- features &= PORT_SPD_CAP;
- }
-
-@@ -3941,22 +3945,30 @@ qib_7322_get_msgheader(struct qib_devdata *dd, __le32 *rhf_addr)
- static void qib_7322_config_ctxts(struct qib_devdata *dd)
- {
- unsigned long flags;
-- u32 nchipctxts;
-+ u32 nchipctxts, nkrcvqs;
-+ u32 cfgctxts = QIB_MODPARAM_GET(cfgctxts, dd->unit, 0);
-+ u8 pidx;
-
- nchipctxts = qib_read_kreg32(dd, kr_contextcnt);
- dd->cspec->numctxts = nchipctxts;
-- if (qib_n_krcv_queues > 1 && dd->num_pports) {
-- dd->first_user_ctxt = NUM_IB_PORTS +
-- (qib_n_krcv_queues - 1) * dd->num_pports;
-- if (dd->first_user_ctxt > nchipctxts)
-- dd->first_user_ctxt = nchipctxts;
-- dd->n_krcv_queues = dd->first_user_ctxt / dd->num_pports;
-- } else {
-- dd->first_user_ctxt = NUM_IB_PORTS;
-- dd->n_krcv_queues = 1;
-+ dd->first_user_ctxt = NUM_IB_PORTS;
-+
-+ for (pidx = 0; pidx < dd->num_pports; pidx++) {
-+ nkrcvqs = QIB_MODPARAM_GET(krcvqs, dd->unit, pidx+1);
-+ if (nkrcvqs > 1) {
-+ if (nkrcvqs - 1 > nchipctxts - dd->first_user_ctxt)
-+ dd->pport[pidx].n_krcv_queues =
-+ (nchipctxts - dd->first_user_ctxt) + 1;
-+ else
-+ dd->pport[pidx].n_krcv_queues = nkrcvqs;
-+ dd->first_user_ctxt +=
-+ dd->pport[pidx].n_krcv_queues - 1;
-+ } else
-+ /* Account for the HW ctxt */
-+ dd->pport[pidx].n_krcv_queues = 1;
- }
-
-- if (!qib_cfgctxts) {
-+ if (!cfgctxts) {
- int nctxts = dd->first_user_ctxt + num_online_cpus();
-
- if (nctxts <= 6)
-@@ -3965,10 +3977,10 @@ static void qib_7322_config_ctxts(struct qib_devdata *dd)
- dd->ctxtcnt = 10;
- else if (nctxts <= nchipctxts)
- dd->ctxtcnt = nchipctxts;
-- } else if (qib_cfgctxts < dd->num_pports)
-+ } else if (cfgctxts < dd->num_pports)
- dd->ctxtcnt = dd->num_pports;
-- else if (qib_cfgctxts <= nchipctxts)
-- dd->ctxtcnt = qib_cfgctxts;
-+ else if (cfgctxts <= nchipctxts)
-+ dd->ctxtcnt = cfgctxts;
- if (!dd->ctxtcnt) /* none of the above, set to max */
- dd->ctxtcnt = nchipctxts;
-
-@@ -5799,7 +5811,6 @@ static void get_7322_chip_params(struct qib_devdata *dd)
- {
- u64 val;
- u32 piobufs;
-- int mtu;
-
- dd->palign = qib_read_kreg32(dd, kr_pagealign);
-
-@@ -5818,11 +5829,10 @@ static void get_7322_chip_params(struct qib_devdata *dd)
- dd->piosize2k = val & ~0U;
- dd->piosize4k = val >> 32;
-
-- mtu = ib_mtu_enum_to_int(qib_ibmtu);
-- if (mtu == -1)
-- mtu = QIB_DEFAULT_MTU;
-- dd->pport[0].ibmtu = (u32)mtu;
-- dd->pport[1].ibmtu = (u32)mtu;
-+ dd->pport[0].ibmtu = ib_mtu_enum_to_int(
-+ QIB_MODPARAM_GET(ibmtu, dd->unit, 1));
-+ dd->pport[1].ibmtu = ib_mtu_enum_to_int(
-+ QIB_MODPARAM_GET(ibmtu, dd->unit, 2));
-
- /* these may be adjusted in init_chip_wc_pat() */
- dd->pio2kbase = (u32 __iomem *)
-@@ -6342,11 +6352,11 @@ static void write_7322_initregs(struct qib_devdata *dd)
- qib_write_kreg(dd, KREG_IDX(RcvQPMulticastContext_1), 1);
-
- for (pidx = 0; pidx < dd->num_pports; ++pidx) {
-- unsigned n, regno;
-+ unsigned i, n, regno, ctxts[18];
- unsigned long flags;
-
-- if (dd->n_krcv_queues < 2 ||
-- !dd->pport[pidx].link_speed_supported)
-+ if (dd->pport[pidx].n_krcv_queues == 1 ||
-+ !dd->pport[pidx].link_speed_supported)
- continue;
-
- ppd = &dd->pport[pidx];
-@@ -6359,19 +6369,18 @@ static void write_7322_initregs(struct qib_devdata *dd)
- /* Initialize QP to context mapping */
- regno = krp_rcvqpmaptable;
- val = 0;
-- if (dd->num_pports > 1)
-- n = dd->first_user_ctxt / dd->num_pports;
-- else
-- n = dd->first_user_ctxt - 1;
-+ for (i = 0, n = 0; n < dd->first_user_ctxt; n++) {
-+ if (dd->skip_kctxt_mask & (1 << n))
-+ continue;
-+ if (dd->rcd[n]->ppd->port == pidx+1)
-+ ctxts[i++] = n;
-+ if (i == ppd->n_krcv_queues)
-+ break;
-+ }
- for (i = 0; i < 32; ) {
- unsigned ctxt;
-
-- if (dd->num_pports > 1)
-- ctxt = (i % n) * dd->num_pports + pidx;
-- else if (i % n)
-- ctxt = (i % n) + 1;
-- else
-- ctxt = ppd->hw_pidx;
-+ ctxt = ctxts[i % ppd->n_krcv_queues];
- val |= ctxt << (5 * (i % 6));
- i++;
- if (i % 6 == 0) {
-@@ -6419,7 +6428,7 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
- {
- struct qib_pportdata *ppd;
- unsigned features, pidx, sbufcnt;
-- int ret, mtu;
-+ int ret, maxmtu = 0;
- u32 sbufs, updthresh;
-
- /* pport structs are contiguous, allocated after devdata */
-@@ -6496,10 +6505,6 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
- */
- qib_7322_set_baseaddrs(dd);
-
-- mtu = ib_mtu_enum_to_int(qib_ibmtu);
-- if (mtu == -1)
-- mtu = QIB_DEFAULT_MTU;
--
- dd->cspec->int_enable_mask = QIB_I_BITSEXTANT;
- /* all hwerrors become interrupts, unless special purposed */
- dd->cspec->hwerrmask = ~0ULL;
-@@ -6509,9 +6514,14 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
- ~(SYM_MASK(HwErrMask, IBSerdesPClkNotDetectMask_0) |
- SYM_MASK(HwErrMask, IBSerdesPClkNotDetectMask_1) |
- HWE_MASK(LATriggered));
--
- for (pidx = 0; pidx < NUM_IB_PORTS; ++pidx) {
- struct qib_chippport_specific *cp = ppd->cpspec;
-+ int mtu = ib_mtu_enum_to_int(
-+ QIB_MODPARAM_GET(ibmtu, dd->unit, pidx+1));
-+ u8 vls = QIB_MODPARAM_GET(num_vls, dd->unit, pidx+1);
-+ if (mtu == -1)
-+ mtu = QIB_DEFAULT_MTU;
-+ maxmtu = max(maxmtu, mtu);
- ppd->link_speed_supported = features & PORT_SPD_CAP;
- features >>= PORT_SPD_CAP_SHIFT;
- if (!ppd->link_speed_supported) {
-@@ -6565,7 +6575,7 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
- ppd->link_width_active = IB_WIDTH_4X;
- ppd->link_speed_active = QIB_IB_SDR;
- ppd->delay_mult = ib_rate_to_delay[IB_RATE_10_GBPS];
-- switch (qib_num_cfg_vls) {
-+ switch (vls) {
- case 1:
- ppd->vls_supported = IB_VL_VL0;
- break;
-@@ -6575,8 +6585,7 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
- default:
- qib_devinfo(dd->pcidev,
- "Invalid num_vls %u, using 4 VLs\n",
-- qib_num_cfg_vls);
-- qib_num_cfg_vls = 4;
-+ vls);
- /* fall through */
- case 4:
- ppd->vls_supported = IB_VL_VL0_3;
-@@ -6588,9 +6597,8 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
- qib_devinfo(dd->pcidev,
- "Invalid num_vls %u for MTU %d "
- ", using 4 VLs\n",
-- qib_num_cfg_vls, mtu);
-+ vls, mtu);
- ppd->vls_supported = IB_VL_VL0_3;
-- qib_num_cfg_vls = 4;
- }
- break;
- }
-@@ -6640,7 +6648,7 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
- dd->rhf_offset = dd->rcvhdrentsize - sizeof(u64) / sizeof(u32);
-
- /* we always allocate at least 2048 bytes for eager buffers */
-- dd->rcvegrbufsize = max(mtu, 2048);
-+ dd->rcvegrbufsize = max(maxmtu, 2048);
- BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
- dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
-
-@@ -6698,8 +6706,8 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
- goto bail; /* no error, so can still figure out why err */
- }
-
-- write_7322_initregs(dd);
- ret = qib_create_ctxts(dd);
-+ write_7322_initregs(dd);
- init_7322_cntrnames(dd);
-
- updthresh = 8U; /* update threshold */
-diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
-index 84b3222..0e83ed4 100644
---- a/drivers/infiniband/hw/qib/qib_init.c
-+++ b/drivers/infiniband/hw/qib/qib_init.c
-@@ -67,6 +67,11 @@
- #define QLOGIC_IB_R_SOFTWARE_SHIFT 24
- #define QLOGIC_IB_R_EMULATOR_MASK (1ULL<<62)
-
-+unsigned int snoop_enable; /* By default (0) snooping is disabled */
-+
-+module_param_named(snoop_enable, snoop_enable , int, 0644);
-+MODULE_PARM_DESC(snoop_enable, "snooping mode ");
-+
- /*
- * Select the NUMA node id on which to allocate the receive header
- * queue, eager buffers and send pioavail register.
-@@ -79,9 +84,8 @@ MODULE_PARM_DESC(numa_node, "NUMA node on which memory is allocated");
- * Number of ctxts we are configured to use (to allow for more pio
- * buffers per ctxt, etc.) Zero means use chip value.
- */
--ushort qib_cfgctxts;
--module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO);
--MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use");
-+QIB_MODPARAM_UNIT(cfgctxts, NULL, 0, S_IRUGO,
-+ "Set max number of contexts to use");
-
- /*
- * If set, do not write to any regs if avoidable, hack to allow
-@@ -97,9 +101,8 @@ MODULE_PARM_DESC(numa_aware, "Use NUMA aware allocations: "
- "0=disabled, 1=enabled, "
- "10=option 0 for AMD & <= Intel Westmere cpus and option 1 for newer cpus(default)");
-
--unsigned qib_n_krcv_queues;
--module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO);
--MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port");
-+QIB_MODPARAM_PORT(krcvqs, NULL, 0, S_IRUGO,
-+ "number of kernel receive queues per IB port");
-
- unsigned qib_cc_table_size;
- module_param_named(cc_table_size, qib_cc_table_size, uint, S_IRUGO);
-@@ -123,14 +126,15 @@ unsigned long *qib_cpulist;
- /* set number of contexts we'll actually use */
- void qib_set_ctxtcnt(struct qib_devdata *dd)
- {
-- if (!qib_cfgctxts) {
-+ u64 val = QIB_MODPARAM_GET(cfgctxts, dd->unit, 0);
-+ if (!val) {
- dd->cfgctxts = dd->first_user_ctxt + num_online_cpus();
- if (dd->cfgctxts > dd->ctxtcnt)
- dd->cfgctxts = dd->ctxtcnt;
-- } else if (qib_cfgctxts < dd->num_pports)
-+ } else if (val < dd->num_pports)
- dd->cfgctxts = dd->ctxtcnt;
-- else if (qib_cfgctxts <= dd->ctxtcnt)
-- dd->cfgctxts = qib_cfgctxts;
-+ else if (val <= dd->ctxtcnt)
-+ dd->cfgctxts = val;
- else
- dd->cfgctxts = dd->ctxtcnt;
- dd->freectxts = (dd->first_user_ctxt > dd->cfgctxts) ? 0 :
-@@ -142,13 +146,27 @@ void qib_set_ctxtcnt(struct qib_devdata *dd)
- */
- int qib_create_ctxts(struct qib_devdata *dd)
- {
-- unsigned i;
-+ unsigned i, c, p;
-+ unsigned port;
- int ret;
-+ int node_id;
- int local_node_id = pcibus_to_node(dd->pcidev->bus);
-+ s64 new_node_id = qib_numa_node;
-
- if (local_node_id < 0)
- local_node_id = numa_node_id();
-- dd->assigned_node_id = local_node_id;
-+
-+ if (new_node_id < 0)
-+ new_node_id = local_node_id;
-+
-+ new_node_id = node_online(new_node_id) ? new_node_id :
-+ local_node_id;
-+
-+ dd->local_node_id = local_node_id;
-+ dd->assigned_node_id = new_node_id;
-+
-+ node_id = qib_numa_aware ? dd->local_node_id :
-+ dd->assigned_node_id;
-
- /*
- * Allocate full ctxtcnt array, rather than just cfgctxts, because
-@@ -162,17 +180,29 @@ int qib_create_ctxts(struct qib_devdata *dd)
- goto done;
- }
-
-+ c = dd->num_pports ? min(
-+ (unsigned)dd->pport[0].n_krcv_queues,
-+ (dd->num_pports > 1 ?
-+ (unsigned)dd->pport[1].n_krcv_queues : (unsigned)-1))
-+ : 0;
-+ p = dd->num_pports > 1 ?
-+ (dd->pport[0].n_krcv_queues > dd->pport[1].n_krcv_queues ?
-+ 0 : 1) : 0;
-+
- /* create (one or more) kctxt */
-- for (i = 0; i < dd->first_user_ctxt; ++i) {
-+ for (port = 0, i = 0; i < dd->first_user_ctxt; ++i) {
- struct qib_pportdata *ppd;
- struct qib_ctxtdata *rcd;
-
- if (dd->skip_kctxt_mask & (1 << i))
- continue;
-
-- ppd = dd->pport + (i % dd->num_pports);
-+ if (i < (c * dd->num_pports))
-+ ppd = dd->pport + (i % dd->num_pports);
-+ else
-+ ppd = dd->pport + p;
-
-- rcd = qib_create_ctxtdata(ppd, i, dd->assigned_node_id);
-+ rcd = qib_create_ctxtdata(ppd, i, node_id);
- if (!rcd) {
- qib_dev_err(dd,
- "Unable to allocate ctxtdata for Kernel ctxt, failing\n");
-@@ -722,10 +752,10 @@ int qib_init(struct qib_devdata *dd, int reinit)
- if (lastfail)
- ret = lastfail;
- ppd = dd->pport + pidx;
-- mtu = ib_mtu_enum_to_int(qib_ibmtu);
-+ mtu = ib_mtu_enum_to_int(
-+ QIB_MODPARAM_GET(ibmtu, dd->unit, ppd->port));
- if (mtu == -1) {
- mtu = QIB_DEFAULT_MTU;
-- qib_ibmtu = 0; /* don't leave invalid value */
- }
- /* set max we can ever have for this driver load */
- ppd->init_ibmaxlen = min(mtu > 2048 ?
-@@ -750,6 +780,11 @@ int qib_init(struct qib_devdata *dd, int reinit)
- lastfail = -ENETDOWN;
- continue;
- }
-+ if (snoop_enable) {
-+ ppd->filter_callback = NULL;
-+ ppd->filter_value = NULL;
-+ ppd->mode_flag = 0;
-+ }
-
- portok++;
- }
-@@ -1108,24 +1143,7 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
- unsigned long flags;
- struct qib_devdata *dd;
- int ret;
-- int node_id;
-- int local_node_id = pcibus_to_node(dd->pcidev->bus);
-- s64 new_node_id = qib_numa_node;
--
-- if (local_node_id < 0)
-- local_node_id = numa_node_id();
--
-- if (new_node_id < 0)
-- new_node_id = local_node_id;
--
-- new_node_id = node_online(new_node_id) ? new_node_id :
-- local_node_id;
--
-- dd->local_node_id = local_node_id;
-- dd->assigned_node_id = new_node_id;
-
-- node_id = qib_numa_aware ? dd->local_node_id :
-- dd->assigned_node_id;
-
- dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra);
- if (!dd) {
-@@ -1273,6 +1291,15 @@ static int __init qlogic_ib_init(void)
- if (ret)
- goto bail;
-
-+ if (qib_numa_aware == QIB_DRIVER_AUTO_CONFIGURATION)
-+ qib_numa_aware = qib_configure_numa(boot_cpu_data) ? 1 : 0;
-+
-+ if (qib_rcvhdrpoll == QIB_DRIVER_AUTO_CONFIGURATION)
-+ qib_rcvhdrpoll = qib_configure_numa(boot_cpu_data) ? 0 : 1;
-+
-+ if (qib_pio_avail_bits == QIB_DRIVER_AUTO_CONFIGURATION)
-+ qib_pio_avail_bits = qib_configure_numa(boot_cpu_data) ? 0 : 1;
-+
- /*
- * These must be called before the driver is registered with
- * the PCI subsystem.
-@@ -1298,13 +1325,13 @@ static int __init qlogic_ib_init(void)
- #ifdef QIB_CONFIG_KNX
- ret = qib_knx_server_init();
- if (ret < 0)
-- pr_err("Unable to start KNX listen thread\n");
-+ printk(KERN_ERR QIB_DRV_NAME
-+ ": Unable to start KNX listen thread\n");
- #endif
--
- goto bail; /* all OK */
-
- bail_dev:
--#ifdef CONFIG_INFINIBAND_QIB_DCA
-+ #ifdef CONFIG_INFINIBAND_QIB_DCA
- dca_unregister_notify(&dca_notifier);
- #endif
- #ifdef CONFIG_DEBUG_FS
-@@ -1328,7 +1355,6 @@ static void __exit qlogic_ib_cleanup(void)
- #ifdef QIB_CONFIG_KNX
- qib_knx_server_exit();
- #endif
--
- ret = qib_exit_qibfs();
- if (ret)
- pr_err(
-@@ -1348,6 +1374,7 @@ static void __exit qlogic_ib_cleanup(void)
-
- idr_destroy(&qib_unit_table);
- qib_dev_cleanup();
-+ qib_clean_mod_param();
- }
-
- module_exit(qlogic_ib_cleanup);
-@@ -1560,6 +1587,8 @@ static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
- }
-
- qib_verify_pioperf(dd);
-+ if (snoop_enable)
-+ qib_snoop_add(dd);
- bail:
- return ret;
- }
-@@ -1572,6 +1601,9 @@ static void qib_remove_one(struct pci_dev *pdev)
- /* unregister from IB core */
- qib_unregister_ib_device(dd);
-
-+#ifdef QIB_CONFIG_KNX
-+ qib_knx_remove_device(dd);
-+#endif
- /*
- * Disable the IB link, disable interrupts on the device,
- * clear dma engines, etc.
-@@ -1686,7 +1718,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
- unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
- size_t size;
- gfp_t gfp_flags;
-- int old_node_id;
-+ int old_dev_node;
-
- /*
- * GFP_USER, but without GFP_FS, so buffer cache can be
-@@ -1706,14 +1738,14 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
- if (!rcd->rcvegrbuf) {
- rcd->rcvegrbuf =
- kzalloc_node(chunk * sizeof(rcd->rcvegrbuf[0]),
-- GFP_KERNEL, rcd->node_id);
-+ GFP_KERNEL, rcd->node_id);
- if (!rcd->rcvegrbuf)
- goto bail;
- }
- if (!rcd->rcvegrbuf_phys) {
- rcd->rcvegrbuf_phys =
- kmalloc_node(chunk * sizeof(rcd->rcvegrbuf_phys[0]),
-- GFP_KERNEL, rcd->node_id);
-+ GFP_KERNEL, rcd->node_id);
- if (!rcd->rcvegrbuf_phys)
- goto bail_rcvegrbuf;
- }
-@@ -1721,13 +1753,13 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
- if (rcd->rcvegrbuf[e])
- continue;
-
-- old_node_id = dev_to_node(&dd->pcidev->dev);
-+ old_dev_node = dev_to_node(&dd->pcidev->dev);
- set_dev_node(&dd->pcidev->dev, rcd->node_id);
- rcd->rcvegrbuf[e] =
- dma_alloc_coherent(&dd->pcidev->dev, size,
- &rcd->rcvegrbuf_phys[e],
- gfp_flags);
-- set_dev_node(&dd->pcidev->dev, old_node_id);
-+ set_dev_node(&dd->pcidev->dev, old_dev_node);
- if (!rcd->rcvegrbuf[e])
- goto bail_rcvegrbuf_phys;
- }
-diff --git a/drivers/infiniband/hw/qib/qib_knx.c b/drivers/infiniband/hw/qib/qib_knx.c
-index c15276f..f692913 100644
---- a/drivers/infiniband/hw/qib/qib_knx.c
-+++ b/drivers/infiniband/hw/qib/qib_knx.c
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2012 Intel Corporation. All rights reserved.
-+ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
-@@ -37,12 +37,21 @@
-
- #include "qib.h"
- #include "qib_knx.h"
-+#include "qib_user_sdma.h"
-+#include "qib_knx_common.h"
-
- unsigned int qib_knx_nconns = 5;
- module_param_named(num_conns, qib_knx_nconns, uint, S_IRUGO);
- MODULE_PARM_DESC(num_conns, "Max number of pending connections");
-
- #define QIB_KNX_SCIF_PORT SCIF_OFED_PORT_9
-+#define CLIENT_THREAD_NAME(x) "qib/mic" __stringify(x)
-+
-+#define knx_sdma_next(sdma) \
-+ sdma->head = ((sdma->head + 1) % sdma->desc_num)
-+#define per_ctxt(ctxt, sub) ((ctxt * QLOGIC_IB_MAX_SUBCTXT) + sub)
-+#define QIB_KNX_SDMA_STATUS(sdma, st) \
-+ QIB_KNX_SDMA_SET(sdma->mflags->status, ((u64)st << 32) | 1)
-
- struct qib_knx_server {
- struct task_struct *kthread;
-@@ -82,7 +91,16 @@ struct qib_knx_mem_map_sg {
- struct scif_range *pages;
- };
-
-+struct qib_knx_tidrcv {
-+ struct qib_knx_rma tidmem;
-+ u64 tidbase;
-+ u32 tidcnt;
-+};
-+
- struct qib_knx_ctxt {
-+ u16 ctxt;
-+ struct qib_knx *knx;
-+ struct qib_pportdata *ppd;
- /* local registered memory for PIO buffers */
- struct qib_knx_rma piobufs[QLOGIC_IB_MAX_SUBCTXT];
- /* local registered memory for user registers */
-@@ -104,6 +122,23 @@ struct qib_knx_ctxt {
- __u64 status;
- __u64 piobufbase[QLOGIC_IB_MAX_SUBCTXT];
- __u32 runtime_flags;
-+
-+ struct qib_user_sdma_queue *pq[QLOGIC_IB_MAX_SUBCTXT];
-+};
-+
-+struct qib_knx_sdma {
-+ /* KNX flags page */
-+ struct scif_range *mflag_pages;
-+ struct qib_knx_sdma_mflags *mflags;
-+ /* KNX descriptor queue */
-+ struct scif_range *queue_pages;
-+ struct qib_knx_sdma_desc *queue;
-+ u32 desc_num;
-+ /* host flags (in host memory) */
-+ struct qib_knx_rma hflags_mem;
-+ struct qib_knx_sdma_hflags *hflags;
-+ u32 head; /* shadow */
-+ u32 complete;
- };
-
- struct qib_knx {
-@@ -114,10 +149,16 @@ struct qib_knx {
- int numa_node;
- struct qib_devdata *dd;
- struct qib_knx_ctxt **ctxts;
-+ spinlock_t ctxt_lock;
-+ resource_size_t bar;
-+ u64 barlen;
-+ struct qib_knx_sdma *sdma;
-+ struct task_struct *sdma_poll;
-+ atomic_t tref;
-+ char tname[64];
-+ struct qib_knx_rma tidmem;
- };
-
--#define CLIENT_THREAD_NAME(x) "qib/mic" __stringify(x)
--
- static struct qib_knx_server *server;
-
- static int qib_knx_init(struct qib_knx_server *);
-@@ -127,19 +168,20 @@ static off_t qib_knx_register_memory(struct qib_knx *, struct qib_knx_rma *,
- void *, size_t, int, const char *);
- static int qib_knx_unregister_memory(struct qib_knx *, struct qib_knx_rma *,
- const char *);
-+static __always_inline void qib_knx_memcpy(void *, void __iomem *, size_t);
- static ssize_t qib_show_knx_node(struct device *, struct device_attribute *,
- char *);
--
--static DEVICE_ATTR(knx_node, S_IRUGO, qib_show_knx_node, NULL);
--static ssize_t qib_show_knx_node(struct device *dev,
-- struct device_attribute *attr, char *buf)
--{
-- struct qib_ibdev *ibdev =
-- container_of(dev, struct qib_ibdev, ibdev.dev);
-- struct qib_devdata *dd = dd_from_dev(ibdev);
--
-- return scnprintf(buf, PAGE_SIZE, "%u\n", dd->knx->peer.node);
--}
-+static int qib_knx_sdma_init(struct qib_knx *);
-+static void qib_knx_sdma_teardown(struct qib_knx *);
-+static __always_inline struct page *
-+qib_knx_phys_to_page(struct qib_knx *, unsigned long);
-+static int qib_knx_sdma_pkts_to_descs(struct qib_knx_ctxt *,
-+ struct qib_knx_sdma_desc *,
-+ struct qib_user_sdma_queue *,
-+ int *, struct list_head *);
-+static int qib_knx_sdma_poll(void *);
-+static int qib_knx_tidrcv_init(struct qib_knx *);
-+static int qib_knx_tidrcv_teardown(struct qib_knx *);
-
- inline struct qib_knx *qib_knx_get(u16 nodeid)
- {
-@@ -162,10 +204,11 @@ inline struct qib_devdata *qib_knx_node_to_dd(u16 node)
-
- static int qib_knx_init(struct qib_knx_server *server)
- {
-- int ret = 0, num_devs = 0, i;
-- struct qib_devdata *dd;
-+ int ret = 0, num_devs = 0, i, seen = 0;
-+ unsigned fewest = -1U;
-+ struct qib_devdata *dd = NULL, *dd_no_numa = NULL;
- struct qib_knx *knx;
-- struct ib_device *ibdev;
-+ struct qib_device_info info = { -1 };
-
- knx = kzalloc(sizeof(*knx), GFP_KERNEL);
- if (!knx) {
-@@ -179,10 +222,14 @@ static int qib_knx_init(struct qib_knx_server *server)
- }
-
- INIT_LIST_HEAD(&knx->list);
-+ spin_lock_init(&knx->ctxt_lock);
- knx->numa_node = -1;
- ret = scif_pci_info(knx->peer.node, &knx->pci_info);
-- if (!ret)
-+ if (!ret) {
- knx->numa_node = pcibus_to_node(knx->pci_info.pdev->bus);
-+ knx->bar = pci_resource_start(knx->pci_info.pdev, 0);
-+ knx->barlen = pci_resource_len(knx->pci_info.pdev, 0);
-+ }
-
- if (knx->numa_node < 0)
- knx->numa_node = numa_node_id();
-@@ -190,40 +237,58 @@ static int qib_knx_init(struct qib_knx_server *server)
- num_devs = qib_count_units(NULL, NULL);
- if (unlikely(!num_devs)) {
- ret = -ENODEV;
-+ /* we have to send this */
-+ scif_send(knx->epd.epd, &info, sizeof(info),
-+ SCIF_SEND_BLOCK);
- goto done;
- }
-
-- for (i = 0; i < num_devs; i++) {
-+ /*
-+ * Attempt to find an HCA on the same NUMA node as the card. Save
-+ * the first HCA that hasn't been associated with a card in case
-+ * there is no HCA on the same NUMA node.
-+ */
-+ for (i = 0; seen < num_devs; i++) {
- dd = qib_lookup(i);
-- if (dd && dd->local_node_id == knx->numa_node)
-- knx->dd = dd;
-+ if (dd) {
-+ if (dd->local_node_id == knx->numa_node) {
-+ knx->dd = dd;
-+ break;
-+ } else if (dd->num_knx < fewest)
-+ dd_no_numa = dd;
-+ seen++;
-+ }
- }
- /*
- * We didn't find a QIB device on the same NUMA node,
-- * round-robin across all devices.
-+ * use the "backup".
- */
- if (unlikely(!knx->dd)) {
-- knx->dd = qib_lookup(server->nclients % num_devs);
-- /* it is possible for qib_lookup to return NULL */
-- if (unlikely(!knx->dd)) {
-+ if (!dd_no_numa) {
- ret = -ENODEV;
-+ /* we have to send this */
-+ scif_send(knx->epd.epd, &info, sizeof(info),
-+ SCIF_SEND_BLOCK);
- goto done;
- }
-+ knx->dd = dd_no_numa;
- }
-- knx->dd->node_id = knx->peer.node;
-- knx->dd->knx = knx;
-+ knx->dd->num_knx++;
-+
- knx->ctxts = kzalloc_node(knx->dd->ctxtcnt * sizeof(*knx->ctxts),
- GFP_KERNEL, knx->numa_node);
- if (!knx->ctxts)
- ret = -ENOMEM;
-- ibdev = &knx->dd->verbs_dev.ibdev;
-- ret = device_create_file(&ibdev->dev, &dev_attr_knx_node);
-+ /* Give the KNX the associated device information. */
-+ info.unit = knx->dd->unit;
-+ ret = scif_send(knx->epd.epd, &info, sizeof(info),
-+ SCIF_SEND_BLOCK);
-+
-+ ret = qib_knx_sdma_init(knx);
- if (ret)
-- /*
-- * clear the error code since we don't want to fail the
-- * initialization.
-- */
-- ret = 0;
-+ goto done;
-+ atomic_set(&knx->tref, 0);
-+ ret = qib_knx_tidrcv_init(knx);
- done:
- spin_lock(&server->client_lock);
- list_add_tail(&knx->list, &server->clients);
-@@ -237,13 +302,12 @@ bail:
- static void qib_knx_free(struct qib_knx *knx, int unload)
- {
- struct qib_devdata *dd = knx->dd;
-- struct ib_device *ibdev;
- int i;
-
-- if (dd) {
-- ibdev = &dd->verbs_dev.ibdev;
-- device_remove_file(&ibdev->dev, &dev_attr_knx_node);
-- }
-+ qib_knx_tidrcv_teardown(knx);
-+ qib_knx_sdma_teardown(knx);
-+ if (dd)
-+ dd->num_knx--;
- /*
- * If this function is called with unload set, we can
- * free the context data. Otherwise, we are here
-@@ -360,9 +424,16 @@ done:
- return ret;
- }
-
--int qib_knx_alloc_ctxt(struct qib_devdata *dd, unsigned ctxt)
-+static __always_inline void qib_knx_memcpy(void *dst, void __iomem *src,
-+ size_t size)
- {
-- struct qib_knx *knx = dd_to_knx(dd);
-+ memcpy_fromio(dst, src, size);
-+}
-+
-+int qib_knx_alloc_ctxt(u16 node_id, unsigned ctxt)
-+{
-+ struct qib_knx *knx = qib_knx_get(node_id);
-+ struct qib_devdata *dd = knx->dd;
- struct qib_knx_ctxt *ptr;
- int ret = 0;
-
-@@ -379,7 +450,14 @@ int qib_knx_alloc_ctxt(struct qib_devdata *dd, unsigned ctxt)
- ret = -ENOMEM;
- goto bail;
- }
-+ ptr->knx = knx;
-+ ptr->ctxt = ctxt;
-+ ptr->ppd = dd->rcd[ctxt]->ppd;
-+
-+ spin_lock(&knx->ctxt_lock);
- knx->ctxts[ctxt] = ptr;
-+ dd->rcd[ctxt]->krcd = ptr;
-+ spin_unlock(&knx->ctxt_lock);
- bail:
- return ret;
- }
-@@ -388,10 +466,11 @@ __u64 qib_knx_ctxt_info(struct qib_ctxtdata *rcd,
- enum qib_knx_ctxtinfo_type type,
- struct file *fp)
- {
-- struct qib_knx *knx = dd_to_knx(rcd->dd);
-+ struct qib_knx *knx = rcd->krcd->knx;
- __u16 subctxt;
- __u64 ret = 0;
-
-+ spin_lock(&knx->ctxt_lock);
- if (!knx || !knx->ctxts || !knx->ctxts[rcd->ctxt])
- goto done;
-
-@@ -414,6 +493,7 @@ __u64 qib_knx_ctxt_info(struct qib_ctxtdata *rcd,
- break;
- }
- done:
-+ spin_unlock(&knx->ctxt_lock);
- return ret;
- }
-
-@@ -424,7 +504,7 @@ int qib_knx_setup_piobufs(struct qib_devdata *dd, struct qib_ctxtdata *rcd,
- char buf[16];
- off_t offset;
- int ret = 0;
-- struct qib_knx *knx = dd_to_knx(dd);
-+ struct qib_knx *knx = rcd->krcd->knx;
-
- if (unlikely(!knx)) {
- ret = -ENODEV;
-@@ -472,7 +552,7 @@ int qib_knx_setup_pioregs(struct qib_devdata *dd, struct qib_ctxtdata *rcd,
- {
- int ret = 0;
- off_t offset;
-- struct qib_knx *knx = dd_to_knx(dd);
-+ struct qib_knx *knx = rcd->krcd->knx;
-
- if (unlikely(!knx)) {
- ret = -ENODEV;
-@@ -496,7 +576,7 @@ int qib_knx_setup_pioregs(struct qib_devdata *dd, struct qib_ctxtdata *rcd,
- goto bail;
- }
- knx->ctxts[rcd->ctxt]->uregbase = offset;
--
-+
- /*
- * register the PIO availability registers.
- * user status 64bit values are part of the page containing the
-@@ -533,7 +613,7 @@ int qib_knx_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd,
- {
- struct qib_knx_mem_map_sg *mapsg;
- struct qib_knx_mem_map *map;
-- struct qib_knx *knx = dd_to_knx(dd);
-+ struct qib_knx *knx = rcd->krcd->knx;
- dma_addr_t offset;
- struct scatterlist *sg;
- unsigned num_pages;
-@@ -590,7 +670,8 @@ int qib_knx_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd,
- * can use 64bit addresses for DMA but the CPU might not.
- * (see pci_set_dma_mask() in qib_pcie.c).
- */
-- mapsg->sglist = kzalloc(num_pages * sizeof(*mapsg->sglist), GFP_KERNEL);
-+ mapsg->sglist = kzalloc_node(num_pages * sizeof(*mapsg->sglist),
-+ GFP_KERNEL, knx->numa_node);
- if (!mapsg->sglist) {
- ret = -ENOMEM;
- goto bail_rcvq_pages;
-@@ -625,7 +706,7 @@ int qib_knx_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd,
- }
- rcd->rcvhdrq_phys = sg_dma_address(mapsg->sglist);
- rcd->rcvhdrq = mapsg->pages->va[0];
--
-+
- map = &knx->ctxts[rcd->ctxt]->sbufstatus;
- ret = scif_get_pages(knx->epd.epd, binfo->spi_sendbuf_status,
- PAGE_SIZE, &map->pages);
-@@ -700,7 +781,7 @@ int qib_knx_setup_eagerbufs(struct qib_ctxtdata *rcd,
- struct qib_knx_mem_map_sg *map;
- struct scatterlist *sg;
- struct qib_devdata *dd = rcd->dd;
-- struct qib_knx *knx = dd_to_knx(dd);
-+ struct qib_knx *knx = rcd->krcd->knx;
- unsigned size, egrsize, egrcnt, num_pages, bufs_ppage,
- egrbufcnt;
- dma_addr_t dma_addr, page;
-@@ -761,7 +842,7 @@ int qib_knx_setup_eagerbufs(struct qib_ctxtdata *rcd,
- goto bail_free_scif;
- }
- }
--
-+
- /*
- * Allocate array of DMA addresses for each of the mapped
- * pages.
-@@ -775,10 +856,11 @@ int qib_knx_setup_eagerbufs(struct qib_ctxtdata *rcd,
- goto bail_free_rcvegr;
- }
- }
--
-+
- map->size = size;
- map->dir = DMA_BIDIRECTIONAL;
-- map->sglist = kzalloc(num_pages * sizeof(*map->sglist), GFP_KERNEL);
-+ map->sglist = kzalloc_node(num_pages * sizeof(*map->sglist), GFP_KERNEL,
-+ knx->numa_node);
- if (!map->sglist) {
- ret = -ENOMEM;
- goto bail_free_rcvegr_phys;
-@@ -830,7 +912,7 @@ bail:
-
- void qib_knx_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
- {
-- struct qib_knx *knx = dd_to_knx(dd);
-+ struct qib_knx *knx = rcd->krcd->knx;
- struct qib_knx_ctxt *ctxt;
- char buf[16];
- int i, ret = 0;
-@@ -838,7 +920,11 @@ void qib_knx_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
- if (!rcd || !knx || !knx->ctxts)
- return;
-
-+ spin_lock(&knx->ctxt_lock);
- ctxt = knx->ctxts[rcd->ctxt];
-+ knx->ctxts[rcd->ctxt] = NULL;
-+ spin_unlock(&knx->ctxt_lock);
-+
- if (!ctxt)
- return;
-
-@@ -884,12 +970,535 @@ void qib_knx_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
- qib_knx_unregister_memory(knx, &ctxt->piobufs[i], buf);
- }
-
-- /* MITKO XXX: handle rcd->tid_pg_list */
-- knx->ctxts[rcd->ctxt] = NULL;
- kfree(ctxt);
- kfree(rcd);
- }
-
-+/*
-+ * TID management for processes on the MIC happens on the MIC. Therefore,
-+ * we only register the HW TID array here.
-+ * The MIC will calculate TID array offsets using the same algorithm is
-+ * the host. Therefore, it is OK that the entire HW TID array is mapped
-+ * since neither side should step on the other.
-+ */
-+static int qib_knx_tidrcv_init(struct qib_knx *knx)
-+{
-+ struct qib_devdata *dd = knx->dd;
-+ struct qib_knx_tid_info info;
-+ void *tidbase;
-+ int ret = 0;
-+ off_t offset = 0;
-+ size_t len;
-+ char buf[64];
-+
-+ memset(&info, 0, sizeof(info));
-+
-+ info.tidcnt = dd->rcvtidcnt;
-+ tidbase = ((char *)dd->kregbase + dd->rcvtidbase);
-+ info.tidbase_len = dd->ctxtcnt * dd->rcvtidcnt * sizeof(tidbase);
-+ info.tidtemplate = dd->tidtemplate;
-+ info.invalidtid = dd->tidinvalid;
-+ /* information needed to properly calculate DMA address to MIC pages */
-+ info.bar_addr = knx->bar;
-+ info.bar_len = knx->barlen;
-+
-+ snprintf(buf, sizeof(buf), "TID array KNx%u", knx->peer.node);
-+ offset = qib_knx_register_memory(knx, &knx->tidmem, tidbase,
-+ info.tidbase_len, SCIF_PROT_WRITE,
-+ buf);
-+ info.tidbase_offset = offset;
-+ if (IS_ERR_VALUE(offset))
-+ ret = offset;
-+ len = scif_send(knx->epd.epd, &info, sizeof(info),
-+ SCIF_SEND_BLOCK);
-+ if (len < sizeof(info))
-+ ret = -EFAULT;
-+ return ret;
-+}
-+
-+static int qib_knx_tidrcv_teardown(struct qib_knx *knx)
-+{
-+ char buf[64];
-+ snprintf(buf, sizeof(buf), "TID array KNx%u", knx->peer.node);
-+ return qib_knx_unregister_memory(knx, &knx->tidmem, buf);
-+}
-+
-+static int qib_knx_sdma_init(struct qib_knx *knx)
-+{
-+ struct qib_knx_host_mem flags;
-+ struct qib_knx_knc_mem mflags;
-+ struct qib_knx_sdma *sdma;
-+ char buf[64];
-+ int ret = 0;
-+
-+ sdma = kzalloc_node(sizeof(*sdma), GFP_KERNEL, knx->numa_node);
-+ if (!sdma) {
-+ ret = -ENOMEM;
-+ goto done;
-+ }
-+ sdma->hflags = kzalloc_node(PAGE_SIZE, GFP_KERNEL, knx->numa_node);
-+ if (!sdma->hflags) {
-+ ret = -ENOMEM;
-+ goto done_free;
-+ }
-+ snprintf(buf, sizeof(buf), "Host SDMA flags KNx%u", knx->peer.node);
-+ flags.flags_offset = qib_knx_register_memory(knx, &sdma->hflags_mem,
-+ sdma->hflags,
-+ PAGE_SIZE,
-+ SCIF_PROT_WRITE,
-+ buf);
-+ if (IS_ERR_VALUE(flags.flags_offset)) {
-+ ret = flags.flags_offset;
-+ goto free_flags;
-+ }
-+ sdma->desc_num = knx->dd->pport[0].sdma_descq_cnt;
-+ flags.desc_num = sdma->desc_num;
-+ ret = scif_send(knx->epd.epd, &flags, sizeof(flags),
-+ SCIF_SEND_BLOCK);
-+ if (ret < sizeof(flags))
-+ goto unregister;
-+ ret = scif_recv(knx->epd.epd, &mflags, sizeof(mflags),
-+ SCIF_RECV_BLOCK);
-+ if (ret < sizeof(mflags)) {
-+ ret = -EINVAL;
-+ goto unregister;
-+ }
-+ ret = scif_get_pages(knx->epd.epd, mflags.flags_offset,
-+ PAGE_SIZE, &sdma->mflag_pages);
-+ if (ret < 0 || !sdma->mflag_pages->nr_pages) {
-+ ret = -EFAULT;
-+ goto unregister;
-+ }
-+ sdma->mflags = sdma->mflag_pages->va[0];
-+ ret = scif_get_pages(knx->epd.epd, mflags.queue_offset,
-+ mflags.queue_len, &sdma->queue_pages);
-+ if (ret < 0)
-+ goto put_flags;
-+ if ((sdma->queue_pages->nr_pages * PAGE_SIZE) !=
-+ mflags.queue_len) {
-+ ret = -EFAULT;
-+ goto put_queue;
-+ }
-+ sdma->queue = sdma->queue_pages->va[0];
-+ sdma->complete = -1;
-+ sdma->head = -1;
-+ /* set the initial trigger value */
-+ QIB_KNX_SDMA_SET(sdma->hflags->trigger, -1);
-+ QIB_KNX_SDMA_SET(sdma->mflags->complete, sdma->complete);
-+ snprintf(knx->tname, sizeof(knx->tname), "qib/mic%u/poll",
-+ knx->peer.node);
-+ knx->sdma = sdma;
-+ ret = 0;
-+ goto done;
-+put_queue:
-+ scif_put_pages(sdma->queue_pages);
-+put_flags:
-+ scif_put_pages(sdma->mflag_pages);
-+unregister:
-+ qib_knx_unregister_memory(knx, &sdma->hflags_mem, buf);
-+free_flags:
-+ kfree(sdma->hflags);
-+done_free:
-+ kfree(sdma);
-+done:
-+ /*
-+ * we have to respond to the MIC so it doesn't get stuck
-+ * in the scif_recv call
-+ */
-+ scif_send(knx->epd.epd, &ret, sizeof(ret), SCIF_SEND_BLOCK);
-+ return ret;
-+}
-+
-+static void qib_knx_sdma_teardown(struct qib_knx *knx)
-+{
-+ int ret;
-+ if (knx->sdma_poll)
-+ ret = kthread_stop(knx->sdma_poll);
-+ if (knx->sdma) {
-+ if (knx->sdma->queue_pages->nr_pages) {
-+ knx->sdma->queue = NULL;
-+ scif_put_pages(knx->sdma->queue_pages);
-+ }
-+ if (knx->sdma->mflag_pages->nr_pages) {
-+ knx->sdma->mflags = NULL;
-+ scif_put_pages(knx->sdma->mflag_pages);
-+ }
-+ kfree(knx->sdma->hflags);
-+ kfree(knx->sdma);
-+ knx->sdma = NULL;
-+ }
-+}
-+
-+int qib_knx_sdma_queue_create(struct file *fd)
-+{
-+ struct qib_ctxtdata *rcd = ctxt_fp(fd);
-+ struct qib_devdata *dd = rcd->dd;
-+ struct qib_knx *knx = rcd->krcd->knx;
-+ struct qib_knx_ctxt *ctxt = knx->ctxts[rcd->ctxt];
-+ u8 subctxt = subctxt_fp(fd);
-+ int ret = 0;
-+
-+ if (!ctxt) {
-+ ret = -EINVAL;
-+ goto done;
-+ }
-+ ctxt->pq[subctxt] = qib_user_sdma_queue_create(&dd->pcidev->dev,
-+ dd->unit, rcd->ctxt,
-+ subctxt);
-+ if (!ctxt->pq[subctxt])
-+ ret = -ENOMEM;
-+ user_sdma_queue_fp(fd) = ctxt->pq[subctxt];
-+ /*
-+ * We start the polling thread the first time a user SDMA
-+ * queue is created. There is no reason to take up CPU
-+ * cycles before then.
-+ */
-+ if (atomic_inc_return(&knx->tref) == 1) {
-+ knx->sdma_poll = kthread_run(qib_knx_sdma_poll, knx,
-+ knx->tname);
-+ if (IS_ERR(knx->sdma_poll)) {
-+ ret = -PTR_ERR(knx->sdma_poll);
-+ atomic_dec(&knx->tref);
-+ goto free_queue;
-+ }
-+ }
-+ goto done;
-+free_queue:
-+ user_sdma_queue_fp(fd) = NULL;
-+ qib_user_sdma_queue_destroy(ctxt->pq[subctxt]);
-+ ctxt->pq[subctxt] = NULL;
-+done:
-+ return ret;
-+}
-+
-+void qib_knx_sdma_queue_destroy(struct qib_filedata *fd)
-+{
-+ struct qib_ctxtdata *rcd = fd->rcd;
-+ struct qib_knx *knx;
-+ unsigned ctxt = rcd->ctxt, subctxt = fd->subctxt;
-+
-+ /* Host processes do not have a KNX rcd pointer. */
-+ if (!rcd->krcd)
-+ return;
-+ knx = rcd->krcd->knx;
-+ /* We still have the memory pointer through fd->pq */
-+ spin_lock(&knx->ctxt_lock);
-+ if (knx->ctxts[ctxt])
-+ knx->ctxts[ctxt]->pq[subctxt] = NULL;
-+ spin_unlock(&knx->ctxt_lock);
-+ if (atomic_dec_and_test(&knx->tref)) {
-+ int ret = kthread_stop(knx->sdma_poll);
-+ knx->sdma_poll = NULL;
-+ }
-+}
-+
-+/*
-+ * Convert a MIC physical address to the corresponding host page.
-+ */
-+static __always_inline struct page *
-+qib_knx_phys_to_page(struct qib_knx *knx, unsigned long addr) {
-+ unsigned long paddr;
-+ if ((knx->bar + addr + PAGE_SIZE) >
-+ (knx->bar + knx->barlen))
-+ return NULL;
-+ paddr = knx->bar + addr;
-+ return pfn_to_page(paddr >> PAGE_SHIFT);
-+}
-+
-+static int qib_knx_sdma_pkts_to_descs(struct qib_knx_ctxt *ctxt,
-+ struct qib_knx_sdma_desc *desc,
-+ struct qib_user_sdma_queue *pq,
-+ int *ndesc, struct list_head *list)
-+{
-+ struct qib_knx *knx = ctxt->knx;
-+ struct qib_user_sdma_pkt *pkt;
-+ dma_addr_t pbc_dma_addr;
-+ unsigned pktnw, pbcnw;
-+ u32 counter;
-+ u16 frag_size;
-+ int ret = 0;
-+ __le32 *pbc;
-+
-+ counter = pq->counter;
-+
-+ pbc = qib_user_sdma_alloc_header(pq, desc->pbclen, &pbc_dma_addr);
-+ if (!pbc) {
-+ ret = -ENOMEM;
-+ goto done;
-+ }
-+ memcpy(pbc, desc->pbc, desc->pbclen);
-+
-+ pktnw = (le32_to_cpu(*pbc) & 0xFFFF);
-+ /*
-+ * This assignment is a bit strange. it's because the
-+ * the pbc counts the number of 32 bit words in the full
-+ * packet _except_ the first word of the pbc itself...
-+ */
-+ pbcnw = (desc->pbclen >> 2) - 1;
-+
-+ if (pktnw < pbcnw) {
-+ ret = -EINVAL;
-+ goto free_pbc;
-+ }
-+
-+ if (pktnw != ((desc->length >> 2) + pbcnw)) {
-+ ret = -EINVAL;
-+ goto free_pbc;
-+ }
-+
-+ frag_size = (le32_to_cpu(*pbc)>>16) & 0xFFFF;
-+ if (((frag_size ? frag_size : desc->length) + desc->pbclen) >
-+ ctxt->ppd->ibmaxlen) {
-+ ret = -EINVAL;
-+ goto free_pbc;
-+ }
-+ if (frag_size) {
-+ /* new SDMA "protocol" */
-+ unsigned pktsize, n;
-+
-+ n = desc->npages * ((2 * PAGE_SIZE / frag_size) + 1);
-+ pktsize = sizeof(*pkt) + sizeof(pkt->addr[0]) * n;
-+
-+ pkt = kzalloc(pktsize + desc->tidlen, GFP_KERNEL);
-+ if (!pkt) {
-+ ret = -ENOMEM;
-+ goto free_pbc;
-+ }
-+ pkt->largepkt = 1;
-+ pkt->frag_size = frag_size;
-+ pkt->addrlimit = n + ARRAY_SIZE(pkt->addr);
-+
-+ if (desc->tidlen) {
-+ char *tidsmptr = (char *)pkt + pktsize;
-+ memcpy(tidsmptr, desc->tidsm, desc->tidlen);
-+ pkt->tidsm =
-+ (struct qib_tid_session_member *)tidsmptr;
-+ pkt->tidsmcount = desc->tidlen /
-+ sizeof(*desc->tidsm);
-+ pkt->tidsmidx = 0;
-+ }
-+ *pbc = cpu_to_le32(le32_to_cpu(*pbc) & 0x0000FFFF);
-+ } else {
-+ /* old SDMA */
-+ pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
-+ if (!pkt) {
-+ ret = -ENOMEM;
-+ goto free_pbc;
-+ }
-+ pkt->largepkt = 0;
-+ pkt->frag_size = desc->length;
-+ pkt->addrlimit = ARRAY_SIZE(pkt->addr);
-+ }
-+ pkt->bytes_togo = desc->length;
-+ pkt->payload_size = 0;
-+ pkt->counter = counter;
-+ pkt->tiddma = !!desc->tidlen;
-+ /*
-+ * The generic user SDMA code will use this as a flag to
-+ * decide whether to call the KNx-specific pkt free
-+ * function. However, it doesn't know what the value
-+ * actually means.
-+ */
-+ pkt->remote = (u64)knx;
-+
-+ qib_user_sdma_init_frag(pkt, 0,
-+ 0, desc->pbclen,
-+ 1, 0,
-+ 0, 0,
-+ NULL, pbc,
-+ pbc_dma_addr, desc->pbclen);
-+ pkt->index = 0;
-+ pkt->naddr = 1;
-+
-+ if (desc->npages) {
-+ /* we have user data */
-+ int i;
-+ struct page *page;
-+ unsigned plen = 0, len = desc->length;
-+ for (i = 0; i < desc->npages; i++) {
-+ unsigned long off = (i == 0 ? desc->offset : 0);
-+ plen = (len > PAGE_SIZE ? PAGE_SIZE : len);
-+ page = qib_knx_phys_to_page(knx, desc->pages[i]);
-+ ret = qib_user_sdma_page_to_frags(knx->dd, pq,
-+ pkt, page, 0, off,
-+ (off + plen > PAGE_SIZE ?
-+ PAGE_SIZE - off : plen),
-+ NULL);
-+ if (ret < 0)
-+ goto free_sdma;
-+ len -= plen - off;
-+ }
-+ } else {
-+ pkt->addr[0].last_desc = 1;
-+ if (pbc_dma_addr == 0) {
-+ pbc_dma_addr = dma_map_single(&knx->dd->pcidev->dev,
-+ pbc, desc->pbclen,
-+ DMA_TO_DEVICE);
-+ if (dma_mapping_error(&knx->dd->pcidev->dev,
-+ pbc_dma_addr)) {
-+ ret = -ENOMEM;
-+ goto free_sdma;
-+ }
-+ pkt->addr[0].addr = pbc_dma_addr;
-+ pkt->addr[0].dma_mapped = 1;
-+ }
-+ }
-+ counter++;
-+ pkt->pq = pq;
-+ pkt->index = 0;
-+ *ndesc = pkt->naddr;
-+
-+ list_add_tail(&pkt->list, list);
-+ goto done;
-+free_sdma:
-+ if (pkt->largepkt)
-+ kfree(pkt);
-+ else
-+ kmem_cache_free(pq->pkt_slab, pkt);
-+free_pbc:
-+ if (pbc_dma_addr)
-+ dma_pool_free(pq->header_cache, pbc, pbc_dma_addr);
-+ else
-+ kfree(pbc);
-+done:
-+ return ret;
-+}
-+
-+void qib_knx_sdma_free_pkt(struct qib_user_sdma_pkt *pkt)
-+{
-+ struct qib_knx *knx = (struct qib_knx *)pkt->remote;
-+ struct qib_knx_sdma *sdma = knx->sdma;
-+ sdma_next(sdma, complete);
-+ QIB_KNX_SDMA_SET(sdma->mflags->complete, sdma->complete);
-+}
-+
-+static int qib_knx_sdma_poll(void *data)
-+{
-+ struct qib_knx *knx = (struct qib_knx *)data;
-+ struct qib_knx_ctxt *ctxt;
-+ struct qib_knx_sdma_desc desc;
-+ struct qib_knx_sdma *sdma = knx->sdma;
-+ struct qib_user_sdma_queue *pq;
-+ struct list_head list;
-+ u32 new_head;
-+ int ret = 0, ndesc = 0, added;
-+
-+ if (!sdma)
-+ return -EFAULT;
-+
-+ while (!kthread_should_stop()) {
-+ added = 0;
-+ new_head = QIB_KNX_SDMA_VALUE(sdma->hflags->trigger);
-+ while (sdma->head != new_head) {
-+ knx_sdma_next(sdma);
-+ qib_knx_memcpy(&desc, sdma->queue + sdma->head,
-+ sizeof(desc));
-+ if (!desc.ctxt) {
-+ QIB_KNX_SDMA_STATUS(sdma, -EINVAL);
-+ continue;
-+ }
-+ spin_lock(&knx->ctxt_lock);
-+ ctxt = knx->ctxts[desc.ctxt];
-+ if (!ctxt) {
-+ /* we should never get here */
-+ QIB_KNX_SDMA_STATUS(sdma, -EINVAL);
-+ goto done_unlock;
-+ }
-+ pq = ctxt->pq[desc.subctxt];
-+ if (!pq) {
-+ QIB_KNX_SDMA_STATUS(sdma, -EFAULT);
-+ goto done_unlock;
-+ }
-+ mutex_lock(&pq->lock);
-+ if (pq->added > ctxt->ppd->sdma_descq_removed)
-+ qib_user_sdma_hwqueue_clean(ctxt->ppd);
-+ if (pq->num_sending)
-+ qib_user_sdma_queue_clean(ctxt->ppd, pq);
-+
-+ INIT_LIST_HEAD(&list);
-+ ret = qib_knx_sdma_pkts_to_descs(ctxt, &desc, pq,
-+ &ndesc, &list);
-+ QIB_KNX_SDMA_STATUS(sdma, ret);
-+ if (!list_empty(&list)) {
-+ if (qib_sdma_descq_freecnt(ctxt->ppd) <
-+ ndesc) {
-+ qib_user_sdma_hwqueue_clean(
-+ ctxt->ppd);
-+ if (pq->num_sending)
-+ qib_user_sdma_queue_clean(
-+ ctxt->ppd, pq);
-+ }
-+ ret = qib_user_sdma_push_pkts(ctxt->ppd,
-+ pq, &list, 1);
-+ if (ret < 0)
-+ goto free_pkts;
-+ else {
-+ pq->counter++;
-+ added++;
-+ }
-+ }
-+free_pkts:
-+ if (!list_empty(&list))
-+ qib_user_sdma_free_pkt_list(
-+ &knx->dd->pcidev->dev, pq, &list);
-+ mutex_unlock(&pq->lock);
-+done_unlock:
-+ spin_unlock(&knx->ctxt_lock);
-+ }
-+ if (!added) {
-+ int i;
-+ /*
-+ * Push the queues along
-+ * The polling thread will enter the inner loop only
-+ * if the KNX has posted new descriptors to the queue.
-+ * However, any packets that have been completed by
-+ * the HW need to be cleaned and that won't happen
-+ * unless we explicitly check.
-+ */
-+ for (i = 0;
-+ i < knx->dd->ctxtcnt * QLOGIC_IB_MAX_SUBCTXT;
-+ i++) {
-+ int c = i / QLOGIC_IB_MAX_SUBCTXT,
-+ s = i % QLOGIC_IB_MAX_SUBCTXT;
-+ spin_lock(&knx->ctxt_lock);
-+ ctxt = knx->ctxts[c];
-+ if (!ctxt)
-+ goto loop_unlock;
-+ pq = ctxt->pq[s];
-+ if (!pq)
-+ goto loop_unlock;
-+ mutex_lock(&pq->lock);
-+ if (pq->num_sending)
-+ qib_user_sdma_queue_clean(ctxt->ppd,
-+ pq);
-+ mutex_unlock(&pq->lock);
-+loop_unlock:
-+ spin_unlock(&knx->ctxt_lock);
-+ }
-+ might_sleep();
-+ }
-+ }
-+ return ret;
-+}
-+
-+void qib_knx_remove_device(struct qib_devdata *dd)
-+{
-+ if (server && dd->num_knx) {
-+ struct qib_knx *knx, *knxp;
-+ list_for_each_entry_safe(knx, knxp, &server->clients, list) {
-+ if (knx->dd == dd) {
-+ spin_lock(&server->client_lock);
-+ list_del(&knx->list);
-+ server->nclients--;
-+ spin_unlock(&server->client_lock);
-+ qib_knx_free(knx, 0);
-+ kfree(knx);
-+ }
-+ }
-+ }
-+ return;
-+}
-+
- int __init qib_knx_server_init(void)
- {
- server = kzalloc(sizeof(struct qib_knx_server), GFP_KERNEL);
-@@ -908,7 +1517,6 @@ void __exit qib_knx_server_exit(void)
- {
- if (server) {
- struct qib_knx *t, *tt;
--
- /* Stop the thread so we don't accept any new connections. */
- kthread_stop(server->kthread);
- list_for_each_entry_safe(t, tt, &server->clients, list) {
-@@ -921,3 +1529,4 @@ void __exit qib_knx_server_exit(void)
- kfree(server);
- }
- }
-+
-diff --git a/drivers/infiniband/hw/qib/qib_knx.h b/drivers/infiniband/hw/qib/qib_knx.h
-index d767a60..fcb5a3e 100644
---- a/drivers/infiniband/hw/qib/qib_knx.h
-+++ b/drivers/infiniband/hw/qib/qib_knx.h
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2012 Intel Corporation. All rights reserved.
-+ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
-@@ -44,13 +44,12 @@ enum qib_knx_ctxtinfo_type {
-
- int __init qib_knx_server_init(void);
- void __exit qib_knx_server_exit(void);
--static __always_inline struct qib_knx *dd_to_knx(struct qib_devdata *dd)
--{
-- return (struct qib_knx *)dd->knx;
--}
-+
-+void qib_knx_remove_device(struct qib_devdata *);
-+
- inline struct qib_knx *qib_knx_get(uint16_t);
- inline struct qib_devdata *qib_knx_node_to_dd(uint16_t);
--int qib_knx_alloc_ctxt(struct qib_devdata *, unsigned);
-+int qib_knx_alloc_ctxt(u16, unsigned);
- int qib_knx_setup_piobufs(struct qib_devdata *, struct qib_ctxtdata *, __u16);
- int qib_knx_setup_pioregs(struct qib_devdata *, struct qib_ctxtdata *,
- struct qib_base_info *);
-@@ -60,4 +59,6 @@ int qib_knx_setup_eagerbufs(struct qib_ctxtdata *, struct qib_base_info *);
- void qib_knx_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *);
- __u64 qib_knx_ctxt_info(struct qib_ctxtdata *, enum qib_knx_ctxtinfo_type,
- struct file *);
-+int qib_knx_sdma_queue_create(struct file *);
-+void qib_knx_sdma_queue_destroy(struct qib_filedata *);
- #endif /* _QIB_KNX_H */
-diff --git a/drivers/infiniband/hw/qib/qib_knx_common.h b/drivers/infiniband/hw/qib/qib_knx_common.h
-new file mode 100644
-index 0000000..9639592
---- /dev/null
-+++ b/drivers/infiniband/hw/qib/qib_knx_common.h
-@@ -0,0 +1,126 @@
-+/*
-+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
-+ *
-+ * This software is available to you under a choice of one of two
-+ * licenses. You may choose to be licensed under the terms of the GNU
-+ * General Public License (GPL) Version 2, available from the file
-+ * COPYING in the main directory of this source tree, or the
-+ * OpenIB.org BSD license below:
-+ *
-+ * Redistribution and use in source and binary forms, with or
-+ * without modification, are permitted provided that the following
-+ * conditions are met:
-+ *
-+ * - Redistributions of source code must retain the above
-+ * copyright notice, this list of conditions and the following
-+ * disclaimer.
-+ *
-+ * - Redistributions in binary form must reproduce the above
-+ * copyright notice, this list of conditions and the following
-+ * disclaimer in the documentation and/or other materials
-+ * provided with the distribution.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-+ * SOFTWARE.
-+ */
-+#ifndef _QIB_KNX_COMMON_H
-+#define _QIB_KNX_COMMON_H
-+
-+struct qib_device_info {
-+ u16 unit;
-+};
-+
-+#define QIB_SDMA_MAX_NPAGES 33
-+#define QIB_KNX_SDMA_VALUE(fld) (volatile u64)fld
-+#define QIB_KNX_SDMA_SET(fld, val) \
-+ do { \
-+ fld = (u64)(val); \
-+ smp_mb(); \
-+ } while (0)
-+
-+struct qib_knx_host_mem {
-+ off_t flags_offset;
-+ unsigned desc_num;
-+};
-+
-+struct qib_knx_knc_mem {
-+ off_t flags_offset;
-+ off_t queue_offset;
-+ size_t queue_len;
-+};
-+
-+struct qib_tid_sm {
-+ __u16 tid;
-+ __u16 offset;
-+ __u16 length;
-+};
-+
-+/*
-+ * SDMA transfer descriptor. This structure communicates the SDMA
-+ * transfers from the MIC to the host. It is very important for
-+ * performance reasons that its size is multiple of 64B in order
-+ * to guarantee proper alignment in the descriptor array.
-+ */
-+struct qib_knx_sdma_desc {
-+ u16 ctxt;
-+ u16 subctxt;
-+ u32 pbclen;
-+ __le32 pbc[16];
-+ u64 length;
-+ u32 npages;
-+ unsigned tidlen;
-+ off_t offset;
-+ unsigned long pages[QIB_SDMA_MAX_NPAGES];
-+ /* This array is 198B so the compiler will pad
-+ * it by 2B to make it multiple of 8B. */
-+ struct qib_tid_sm tidsm[QIB_SDMA_MAX_NPAGES];
-+ /*
-+ * The two paddings below are included in order to
-+ * make the size of the entire struct 576B (multiple
-+ * of 64B). The goal is that all elements in an array
-+ * of struct qib_knx_sdma_desc are 64B aligned.
-+ */
-+ u16 __padding0;
-+ u64 __padding1[2];
-+};
-+
-+/*
-+ * trigger, status, and complete fields are by 8 to be
-+ * cacheline size.
-+ */
-+struct qib_knx_sdma_hflags {
-+ u64 trigger;
-+ u64 __padding[7];
-+};
-+
-+#define sdma_next(s, fld) \
-+ (s)->fld = (((s)->fld + 1) == (s)->desc_num) ? 0 : ((s)->fld + 1)
-+
-+struct qib_knx_sdma_mflags {
-+ u64 status;
-+ u64 __padding1[7];
-+ u64 complete;
-+ u64 __padding2[7];
-+};
-+
-+struct qib_knx_tid_info {
-+ /* this is the entire set of 512 entries (= 4K) so
-+ * we can resgister. subctxt devision will be done
-+ * in MIC driver. */
-+ off_t tidbase_offset;
-+ size_t tidbase_len;
-+ u64 tidbase;
-+ unsigned tidcnt;
-+ u64 tidtemplate;
-+ unsigned long invalidtid;
-+ u64 bar_addr;
-+ u64 bar_len;
-+};
-+
-+#endif /* _QIB_KNX_COMMON_H */
-diff --git a/drivers/infiniband/hw/qib/qib_knx_sdma.h b/drivers/infiniband/hw/qib/qib_knx_sdma.h
-deleted file mode 100644
-index 8c67b1f..0000000
---- a/drivers/infiniband/hw/qib/qib_knx_sdma.h
-+++ /dev/null
-@@ -1,105 +0,0 @@
--/*
-- * Copyright (c) 2013 Intel Corporation. All rights reserved.
-- *
-- * This software is available to you under a choice of one of two
-- * licenses. You may choose to be licensed under the terms of the GNU
-- * General Public License (GPL) Version 2, available from the file
-- * COPYING in the main directory of this source tree, or the
-- * OpenIB.org BSD license below:
-- *
-- * Redistribution and use in source and binary forms, with or
-- * without modification, are permitted provided that the following
-- * conditions are met:
-- *
-- * - Redistributions of source code must retain the above
-- * copyright notice, this list of conditions and the following
-- * disclaimer.
-- *
-- * - Redistributions in binary form must reproduce the above
-- * copyright notice, this list of conditions and the following
-- * disclaimer in the documentation and/or other materials
-- * provided with the distribution.
-- *
-- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-- * SOFTWARE.
-- */
--#ifndef _QIB_KNX_SDMA_H
--#define _QIB_KNX_SDMA_H
--
--#define QIB_SDMA_MAX_NPAGES 33
--#define QIB_KNX_SDMA_VALUE(fld) (volatile u64)fld
--#define QIB_KNX_SDMA_SET(fld, val) \
-- do { \
-- fld = (u64)(val); \
-- smp_mb(); \
-- } while (0)
--
--struct qib_knx_host_mem {
-- off_t flags_offset;
-- unsigned desc_num;
--};
--
--struct qib_knx_knc_mem {
-- off_t flags_offset;
-- off_t queue_offset;
-- size_t queue_len;
--};
--
--struct qib_tid_sm {
-- __u16 tid;
-- __u16 offset;
-- __u16 length;
--};
--
--/*
-- * SDMA transfer descriptor. This structure communicates the SDMA
-- * transfers from the MIC to the host. It is very important for
-- * performance reasons that its size is multiple of 64B in order
-- * to guarantee proper alignment in the descriptor array.
-- */
--struct qib_knx_sdma_desc {
-- u16 ctxt;
-- u16 subctxt;
-- u32 pbclen;
-- __le32 pbc[16];
-- u64 length;
-- u32 npages;
-- unsigned tidlen;
-- off_t offset;
-- unsigned long pages[QIB_SDMA_MAX_NPAGES];
-- /* This array is 198B so the compiler will pad
-- * it by 2B to make it multiple of 8B. */
-- struct qib_tid_sm tidsm[QIB_SDMA_MAX_NPAGES];
-- /*
-- * The two paddings below are included in order to
-- * make the size of the entire struct 576B (multiple
-- * of 64B). The goal is that all elements in an array
-- * of struct qib_knx_sdma_desc are 64B aligned.
-- */
-- u16 __padding0;
-- u64 __padding1[2];
--};
--
--/*
-- * trigger, status, and complete fields are by 8 to be
-- * cacheline size.
-- */
--struct qib_knx_sdma_hflags {
-- u64 trigger;
-- u64 __padding[7];
--};
--
--struct qib_knx_sdma_mflags {
-- u64 status;
-- u64 __padding1[7];
-- u64 complete;
-- u64 __padding2[7];
--};
--
--#endif /* _QIB_KNX_SDMA_H */
-diff --git a/drivers/infiniband/hw/qib/qib_knx_tidrcv.h b/drivers/infiniband/hw/qib/qib_knx_tidrcv.h
-deleted file mode 100644
-index 842fca1..0000000
---- a/drivers/infiniband/hw/qib/qib_knx_tidrcv.h
-+++ /dev/null
-@@ -1,48 +0,0 @@
--/*
-- * Copyright (c) 2013 Intel Corporation. All rights reserved.
-- *
-- * This software is available to you under a choice of one of two
-- * licenses. You may choose to be licensed under the terms of the GNU
-- * General Public License (GPL) Version 2, available from the file
-- * COPYING in the main directory of this source tree, or the
-- * OpenIB.org BSD license below:
-- *
-- * Redistribution and use in source and binary forms, with or
-- * without modification, are permitted provided that the following
-- * conditions are met:
-- *
-- * - Redistributions of source code must retain the above
-- * copyright notice, this list of conditions and the following
-- * disclaimer.
-- *
-- * - Redistributions in binary form must reproduce the above
-- * copyright notice, this list of conditions and the following
-- * disclaimer in the documentation and/or other materials
-- * provided with the distribution.
-- *
-- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-- * SOFTWARE.
-- */
--#ifndef _QIB_KNX_TIDRCV_H
--
--struct qib_knx_tid_info {
-- /* this is the entire set of 512 entries (= 4K) so
-- * we can resgister. subctxt devision will be done
-- * in MIC driver. */
-- off_t tidbase_offset;
-- size_t tidbase_len;
-- u64 tidbase;
-- unsigned tidcnt;
-- u64 tidtemplate;
-- unsigned long invalidtid;
-- u64 bar_addr;
-- u64 bar_len;
--};
--
--#endif /* QIB_KNX_TIDRCV_H */
-diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
-index ccb1191..4b46f6c 100644
---- a/drivers/infiniband/hw/qib/qib_mad.c
-+++ b/drivers/infiniband/hw/qib/qib_mad.c
-@@ -536,7 +536,8 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
- pip->vl_arb_low_cap =
- dd->f_get_ib_cfg(ppd, QIB_IB_CFG_VL_LOW_CAP);
- /* InitTypeReply = 0 */
-- pip->inittypereply_mtucap = qib_ibmtu ? qib_ibmtu : IB_MTU_4096;
-+ pip->inittypereply_mtucap =
-+ QIB_MODPARAM_GET(ibmtu, dd->unit, ppd->port);
- /* HCAs ignore VLStallCount and HOQLife */
- /* pip->vlstallcnt_hoqlife; */
- pip->operationalvl_pei_peo_fpi_fpo =
-diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c
-index 3f14009..d7eebfb 100644
---- a/drivers/infiniband/hw/qib/qib_pcie.c
-+++ b/drivers/infiniband/hw/qib/qib_pcie.c
-@@ -501,9 +501,8 @@ static int val2fld(int wd, int mask)
- return wd;
- }
-
--static int qib_pcie_coalesce;
--module_param_named(pcie_coalesce, qib_pcie_coalesce, int, S_IRUGO);
--MODULE_PARM_DESC(pcie_coalesce, "tune PCIe colescing on some Intel chipsets");
-+static QIB_MODPARAM_UNIT(pcie_coalesce, NULL, 0, S_IRUGO,
-+ "tune PCIe colescing on some Intel chipsets");
-
- /*
- * Enable PCIe completion and data coalescing, on Intel 5x00 and 7300
-@@ -518,7 +517,7 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd)
- u16 devid;
- u32 mask, bits, val;
-
-- if (!qib_pcie_coalesce)
-+ if (!QIB_MODPARAM_GET(pcie_coalesce, dd->unit, 0))
- return 0;
-
- /* Find out supported and configured values for parent (root) */
-@@ -576,9 +575,8 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd)
- * BIOS may not set PCIe bus-utilization parameters for best performance.
- * Check and optionally adjust them to maximize our throughput.
- */
--static int qib_pcie_caps;
--module_param_named(pcie_caps, qib_pcie_caps, int, S_IRUGO);
--MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)");
-+static QIB_MODPARAM_UNIT(pcie_caps, NULL, 0, S_IRUGO,
-+ "Max PCIe tuning: Payload (4lsb), ReadReq (D4..7)");
-
- static int qib_tune_pcie_caps(struct qib_devdata *dd)
- {
-@@ -587,6 +585,7 @@ static int qib_tune_pcie_caps(struct qib_devdata *dd)
- u16 pcaps, pctl, ecaps, ectl;
- int rc_sup, ep_sup;
- int rc_cur, ep_cur;
-+ int caps = QIB_MODPARAM_GET(pcie_caps, dd->unit, 0);
-
- /* Find out supported and configured values for parent (root) */
- parent = dd->pcidev->bus->self;
-@@ -614,8 +613,8 @@ static int qib_tune_pcie_caps(struct qib_devdata *dd)
- ep_cur = fld2val(ectl, PCI_EXP_DEVCTL_PAYLOAD);
-
- /* If Supported greater than limit in module param, limit it */
-- if (rc_sup > (qib_pcie_caps & 7))
-- rc_sup = qib_pcie_caps & 7;
-+ if (rc_sup > (caps & 7))
-+ rc_sup = caps & 7;
- /* If less than (allowed, supported), bump root payload */
- if (rc_sup > rc_cur) {
- rc_cur = rc_sup;
-@@ -637,8 +636,8 @@ static int qib_tune_pcie_caps(struct qib_devdata *dd)
- * which is code '5' (log2(4096) - 7)
- */
- rc_sup = 5;
-- if (rc_sup > ((qib_pcie_caps >> 4) & 7))
-- rc_sup = (qib_pcie_caps >> 4) & 7;
-+ if (rc_sup > ((caps >> 4) & 7))
-+ rc_sup = (caps >> 4) & 7;
- rc_cur = fld2val(pctl, PCI_EXP_DEVCTL_READRQ);
- ep_cur = fld2val(ectl, PCI_EXP_DEVCTL_READRQ);
-
-diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
-index 3cca55b..4208b20 100644
---- a/drivers/infiniband/hw/qib/qib_qp.c
-+++ b/drivers/infiniband/hw/qib/qib_qp.c
-@@ -124,6 +124,7 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
- enum ib_qp_type type, u8 port)
- {
- u32 i, offset, max_scan, qpn;
-+ unsigned krcvqs;
- struct qpn_map *map;
- u32 ret;
-
-@@ -141,10 +142,11 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
- goto bail;
- }
-
-+ krcvqs = dd->pport[port-1].n_krcv_queues;
- qpn = qpt->last + 2;
- if (qpn >= QPN_MAX)
- qpn = 2;
-- if (qpt->mask && ((qpn & qpt->mask) >> 1) >= dd->n_krcv_queues)
-+ if (qpt->mask && ((qpn & qpt->mask) >> 1) >= krcvqs)
- qpn = (qpn | qpt->mask) + 2;
- offset = qpn & BITS_PER_PAGE_MASK;
- map = &qpt->map[qpn / BITS_PER_PAGE];
-@@ -162,7 +164,7 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
- goto bail;
- }
- offset = find_next_offset(qpt, map, offset,
-- dd->n_krcv_queues);
-+ krcvqs);
- qpn = mk_qpn(qpt, map, offset);
- /*
- * This test differs from alloc_pidmap().
-diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
-index c6d6a54..1e08943 100644
---- a/drivers/infiniband/hw/qib/qib_sdma.c
-+++ b/drivers/infiniband/hw/qib/qib_sdma.c
-@@ -532,7 +532,8 @@ static void complete_sdma_err_req(struct qib_pportdata *ppd,
- */
- int qib_sdma_verbs_send(struct qib_pportdata *ppd,
- struct qib_sge_state *ss, u32 dwords,
-- struct qib_verbs_txreq *tx)
-+ struct qib_verbs_txreq *tx,
-+ struct snoop_packet *packet)
- {
- unsigned long flags;
- struct qib_sge *sge;
-@@ -543,6 +544,10 @@ int qib_sdma_verbs_send(struct qib_pportdata *ppd,
- u64 sdmadesc[2];
- u32 dwoffset;
- dma_addr_t addr;
-+ u8 *packet_data = NULL;
-+
-+ if (packet)
-+ packet_data = packet->data + ((tx->hdr_dwords-2) << 2);
-
- spin_lock_irqsave(&ppd->sdma_lock, flags);
-
-@@ -599,6 +604,10 @@ retry:
- dw << 2, DMA_TO_DEVICE);
- if (dma_mapping_error(&ppd->dd->pcidev->dev, addr))
- goto unmap;
-+ if (packet) {
-+ memcpy(packet_data, sge->vaddr, len);
-+ packet_data += len;
-+ }
- sdmadesc[0] = 0;
- make_sdma_desc(ppd, sdmadesc, (u64) addr, dw, dwoffset);
- /* SDmaUseLargeBuf has to be set in every descriptor */
-diff --git a/drivers/infiniband/hw/qib/qib_snoop.c b/drivers/infiniband/hw/qib/qib_snoop.c
-new file mode 100644
-index 0000000..3c62bbb
---- /dev/null
-+++ b/drivers/infiniband/hw/qib/qib_snoop.c
-@@ -0,0 +1,970 @@
-+/*
-+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
-+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
-+ *
-+ * This software is available to you under a choice of one of two
-+ * licenses. You may choose to be licensed under the terms of the GNU
-+ * General Public License (GPL) Version 2, available from the file
-+ * COPYING in the main directory of this source tree, or the
-+ * OpenIB.org BSD license below:
-+ *
-+ * Redistribution and use in source and binary forms, with or
-+ * without modification, are permitted provided that the following
-+ * conditions are met:
-+ *
-+ * - Redistributions of source code must retain the above
-+ * copyright notice, this list of conditions and the following
-+ * disclaimer.
-+ *
-+ * - Redistributions in binary form must reproduce the above
-+ * copyright notice, this list of conditions and the following
-+ * disclaimer in the documentation and/or other materials
-+ * provided with the distribution.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-+ * SOFTWARE.
-+ */
-+
-+/*
-+ * This file implements a raw read/raw write interface for snooping raw
-+ * packets from the wire and injecting raw packets to the wire.
-+ *
-+ * Other things that this interface could do at somepoint are:
-+ * - Allow packets to be injected back into the stack
-+ * - Provide an intercept for packets coming from the upper layers to
-+ * move them back into user-space.
-+ */
-+
-+#include <linux/pci.h>
-+#include <linux/vmalloc.h>
-+#include <linux/uaccess.h>
-+#include <linux/module.h>
-+
-+#include <rdma/ib_user_mad.h> /* for ioctl constants */
-+#include <rdma/ib_smi.h>
-+
-+
-+#include "qib.h"
-+#include "qib_verbs.h"
-+#include "qib_common.h"
-+#include <linux/poll.h>
-+
-+#define QIB_SNOOP_IOC_MAGIC IB_IOCTL_MAGIC
-+#define QIB_SNOOP_IOC_BASE_SEQ 0x80
-+/* This starts our ioctl sequence
-+ * numbers *way* off from the ones
-+ * defined in ib_core
-+ */
-+#define QIB_SNOOP_IOCGETLINKSTATE \
-+ _IO(QIB_SNOOP_IOC_MAGIC, QIB_SNOOP_IOC_BASE_SEQ)
-+#define QIB_SNOOP_IOCSETLINKSTATE \
-+ _IO(QIB_SNOOP_IOC_MAGIC, QIB_SNOOP_IOC_BASE_SEQ+1)
-+#define QIB_SNOOP_IOCCLEARQUEUE \
-+ _IO(QIB_SNOOP_IOC_MAGIC, QIB_SNOOP_IOC_BASE_SEQ+2)
-+#define QIB_SNOOP_IOCCLEARFILTER \
-+ _IO(QIB_SNOOP_IOC_MAGIC, QIB_SNOOP_IOC_BASE_SEQ+3)
-+#define QIB_SNOOP_IOCSETFILTER \
-+ _IO(QIB_SNOOP_IOC_MAGIC, QIB_SNOOP_IOC_BASE_SEQ+4)
-+
-+/* local prototypes */
-+static int qib_snoop_open(struct inode *in, struct file *fp);
-+static unsigned int qib_snoop_poll(struct file *fp,
-+ struct poll_table_struct *wait);
-+static ssize_t qib_snoop_read(struct file *fp, char __user *data,
-+ size_t pkt_len, loff_t *off);
-+static int qib_snoop_release(struct inode *in, struct file *fp);
-+
-+static long qib_ioctl(struct file *fp, unsigned int cmd, unsigned long arg);
-+
-+static ssize_t qib_snoop_write(struct file *fp, const char __user *data,
-+ size_t pkt_len, loff_t *off);
-+
-+#include <linux/delay.h>
-+
-+struct qib_packet_filter_command {
-+ int opcode;
-+ int length;
-+ void *value_ptr;
-+};
-+
-+enum qib_packet_filter_opcodes {
-+ FILTER_BY_LID,
-+ FILTER_BY_DLID,
-+ FILTER_BY_MAD_MGMT_CLASS,
-+ FILTER_BY_QP_NUMBER,
-+ FILTER_BY_PKT_TYPE,
-+ FILTER_BY_SERVICE_LEVEL,
-+ FILTER_BY_PKEY
-+};
-+
-+static const struct file_operations snoop_file_ops = {
-+ .owner = THIS_MODULE,
-+ .open = qib_snoop_open,
-+ .read = qib_snoop_read,
-+ .unlocked_ioctl = qib_ioctl,
-+ .poll = qib_snoop_poll,
-+ .write = qib_snoop_write,
-+ .release = qib_snoop_release
-+};
-+
-+struct qib_filter_array {
-+ int (*filter)(void *, void *, void *);
-+};
-+
-+static int qib_filter_lid(void *ibhdr, void *packet_data, void *value);
-+static int qib_filter_dlid(void *ibhdr, void *packet_data, void *value);
-+static int qib_filter_mad_mgmt_class(void *ibhdr, void *packet_data,
-+ void *value);
-+static int qib_filter_qp_number(void *ibhdr, void *packet_data, void *value);
-+static int qib_filter_ibpacket_type(void *ibhdr, void *packet_data,
-+ void *value);
-+static int qib_filter_ib_service_level(void *ibhdr, void *packet_data,
-+ void *value);
-+static int qib_filter_ib_pkey(void *ibhdr, void *packet_data, void *value);
-+
-+static struct qib_filter_array qib_filters[] = {
-+ { qib_filter_lid },
-+ { qib_filter_dlid },
-+ { qib_filter_mad_mgmt_class },
-+ { qib_filter_qp_number },
-+ { qib_filter_ibpacket_type },
-+ { qib_filter_ib_service_level },
-+ { qib_filter_ib_pkey }
-+};
-+
-+#define QIB_MAX_FILTERS ARRAY_SIZE(qib_filters)
-+#define QIB_DRV_NAME "ib_qib"
-+#define QIB_MAJOR 233
-+#define QIB_USER_MINOR_BASE 0
-+#define QIB_DIAG_MINOR_BASE 129
-+#define QIB_SNOOP_MINOR_BASE 160
-+#define QIB_CAPTURE_MINOR_BASE 200
-+#define QIB_NMINORS 255
-+#define PORT_BITS 2
-+#define PORT_MASK ((1U << PORT_BITS) - 1)
-+#define GET_HCA(x) ((unsigned int)((x) >> PORT_BITS))
-+#define GET_PORT(x) ((unsigned int)((x) & PORT_MASK))
-+
-+int qib_snoop_add(struct qib_devdata *dd)
-+{
-+ char name[32];
-+ int ret = 0;
-+ int i;
-+ int j;
-+ int minor = 0;
-+
-+ for (i = 0; i < dd->num_pports; i++) {
-+ spin_lock_init(&dd->pport[i].snoop_write_lock);
-+ for (j = 0; j < QIB_CHAR_DEVICES_PER_PORT; j++) {
-+ spin_lock_init(&dd->pport[i].sc_device[j].snoop_lock);
-+ INIT_LIST_HEAD(
-+ &(dd->pport[i].sc_device[j].snoop_queue));
-+ init_waitqueue_head(
-+ &dd->pport[i].sc_device[j].snoop_waitq);
-+
-+ if (j == 0) {
-+ minor = (((dd->unit << PORT_BITS) | i)) +
-+ QIB_SNOOP_MINOR_BASE;
-+ snprintf(name, sizeof(name),
-+ "ipath_snoop_%02d_%02d", dd->unit, i+1);
-+ } else {
-+ minor = (((dd->unit << PORT_BITS) | i)) +
-+ QIB_CAPTURE_MINOR_BASE;
-+ snprintf(name, sizeof(name),
-+ "ipath_capture_%02d_%02d",
-+ dd->unit, i+1);
-+ }
-+
-+ ret = qib_cdev_init(
-+ minor, name,
-+ &snoop_file_ops,
-+ &dd->pport[i].sc_device[j].snoop_cdev,
-+ &dd->pport[i].sc_device[j].snoop_class_dev);
-+ if (ret)
-+ goto bail;
-+ }
-+ pr_info("qib%d: snoop dev for hca %02d enabled port %02d\n"
-+ "qib%d: capture dev for hca %02d enabled port %02d\n",
-+ dd->unit, dd->unit, i+1, dd->unit, dd->unit, i+1);
-+ dd->pport[i].mode_flag = 0;
-+ }
-+out:
-+ return ret;
-+bail:
-+ qib_dev_err(dd, "Couldn't create %s device: %d", name, ret);
-+ i--;
-+ if (i != dd->num_pports) {
-+ for (; i >= 0 ; i--) {
-+ for (j = 0; j < QIB_CHAR_DEVICES_PER_PORT; j++)
-+ qib_cdev_cleanup(
-+ &dd->pport[i].
-+ sc_device[j].
-+ snoop_cdev,
-+ &dd->pport[i].
-+ sc_device[j].
-+ snoop_class_dev);
-+ dd->pport[i].mode_flag = 0;
-+ }
-+ }
-+ goto out;
-+}
-+
-+/* this must be called w/ dd->snoop_in_lock held */
-+static void drain_snoop_list(struct qib_aux_device *sc_device)
-+{
-+ struct list_head *pos, *q;
-+ struct snoop_packet *packet;
-+
-+ list_for_each_safe(pos, q, &(sc_device->snoop_queue)) {
-+ packet = list_entry(pos, struct snoop_packet, list);
-+ list_del(pos);
-+ kfree(packet);
-+ }
-+}
-+
-+void qib_snoop_remove(struct qib_devdata *dd)
-+{
-+ unsigned long flags = 0;
-+ int i;
-+ int j;
-+
-+ for (i = 0; i < dd->num_pports; i++) {
-+ dd->pport[i].mode_flag = 0;
-+ for (j = 0; j < QIB_CHAR_DEVICES_PER_PORT; j++) {
-+ spin_lock_irqsave(&dd->pport[i].sc_device[j].snoop_lock,
-+ flags);
-+ drain_snoop_list(&dd->pport[i].sc_device[j]);
-+ qib_cdev_cleanup(&dd->pport[i].sc_device[j].snoop_cdev,
-+ &dd->pport[i].sc_device[j].snoop_class_dev);
-+ spin_unlock_irqrestore(
-+ &dd->pport[i].sc_device[j].snoop_lock,
-+ flags);
-+ }
-+ }
-+}
-+
-+static int qib_snoop_open(struct inode *in, struct file *fp)
-+{
-+ int unit = iminor(in);
-+ int devnum;
-+ int portnum = 0;
-+ int ret;
-+ int mode_flag = 0;
-+ unsigned long flags;
-+ struct qib_devdata *dd;
-+
-+ mutex_lock(&qib_mutex);
-+
-+ if (unit >= QIB_CAPTURE_MINOR_BASE) {
-+ unit -= QIB_CAPTURE_MINOR_BASE;
-+ devnum = 1;
-+ mode_flag = QIB_PORT_CAPTURE_MODE;
-+ } else {
-+ unit -= QIB_SNOOP_MINOR_BASE;
-+ devnum = 0;
-+ mode_flag = QIB_PORT_SNOOP_MODE;
-+ }
-+
-+ dd = qib_lookup(GET_HCA(unit));
-+ if (dd == NULL || !(dd->flags & QIB_PRESENT) ||
-+ !dd->kregbase) {
-+ ret = -ENODEV;
-+ goto bail;
-+ }
-+ portnum = GET_PORT(unit);
-+
-+ spin_lock_irqsave(&dd->pport[portnum].sc_device[devnum].snoop_lock,
-+ flags);
-+
-+ if (dd->pport[portnum].mode_flag & mode_flag) {
-+ ret = -EBUSY;
-+ spin_unlock_irqrestore(
-+ &dd->pport[portnum].sc_device[devnum].snoop_lock,
-+ flags);
-+ goto bail;
-+ }
-+
-+ drain_snoop_list(&dd->pport[portnum].sc_device[devnum]);
-+ spin_unlock_irqrestore(
-+ &dd->pport[portnum].sc_device[devnum].snoop_lock, flags);
-+ if (devnum)
-+ pr_alert("capture device for hca %02d port %02d is opened\n",
-+ GET_HCA(unit), portnum+1);
-+ else
-+ pr_alert("snoop device for hca %02d port %02d is opened\n",
-+ GET_HCA(unit), portnum+1);
-+
-+ dd->pport[portnum].sc_device[devnum].pport = &dd->pport[portnum];
-+ fp->private_data = &dd->pport[portnum].sc_device[devnum];
-+ ret = 0;
-+ dd->pport[portnum].mode_flag |= mode_flag;
-+
-+bail:
-+ mutex_unlock(&qib_mutex);
-+
-+ return ret;
-+}
-+
-+static int qib_snoop_release(struct inode *in, struct file *fp)
-+{
-+ struct qib_aux_device *sc_device = fp->private_data;
-+ struct qib_pportdata *pport = sc_device->pport;
-+ unsigned long flags = 0;
-+ int devnum = iminor(in);
-+
-+ if (devnum >= QIB_CAPTURE_MINOR_BASE)
-+ devnum = 1;
-+ else
-+ devnum = 0;
-+
-+ spin_lock_irqsave(&sc_device->snoop_lock, flags);
-+ if (devnum)
-+ pport->mode_flag = pport->mode_flag & (~QIB_PORT_CAPTURE_MODE);
-+ else
-+ pport->mode_flag = pport->mode_flag & (~QIB_PORT_SNOOP_MODE);
-+
-+ drain_snoop_list(sc_device);
-+ /* Clear filters before going out */
-+ pport->filter_callback = NULL;
-+ kfree(pport->filter_value);
-+ pport->filter_value = NULL;
-+
-+ spin_unlock_irqrestore(&sc_device->snoop_lock, flags);
-+
-+ if (devnum)
-+ pr_alert("capture device for hca %02d port %02d is closed\n",
-+ pport->dd->unit, pport->port);
-+ else
-+ pr_alert("snoop device for hca %02d port %02d is closed\n",
-+ pport->dd->unit, pport->port);
-+
-+ fp->private_data = NULL;
-+ return 0;
-+}
-+
-+static unsigned int qib_snoop_poll(struct file *fp,
-+ struct poll_table_struct *wait)
-+{
-+ struct qib_aux_device *sc_device = fp->private_data;
-+ int ret = 0;
-+ unsigned long flags = 0;
-+
-+ spin_lock_irqsave(&sc_device->snoop_lock, flags);
-+
-+ poll_wait(fp, &sc_device->snoop_waitq, wait);
-+ if (!list_empty(&sc_device->snoop_queue))
-+ ret |= POLLIN | POLLRDNORM;
-+
-+ spin_unlock_irqrestore(&sc_device->snoop_lock, flags);
-+ return ret;
-+
-+}
-+
-+static ssize_t qib_snoop_read(struct file *fp, char __user *data,
-+ size_t pkt_len, loff_t *off)
-+{
-+ struct qib_aux_device *sc_device = fp->private_data;
-+ ssize_t ret = 0;
-+ unsigned long flags = 0;
-+ struct snoop_packet *packet = NULL;
-+
-+ spin_lock_irqsave(&sc_device->snoop_lock, flags);
-+
-+ while (list_empty(&sc_device->snoop_queue)) {
-+ spin_unlock_irqrestore(&sc_device->snoop_lock, flags);
-+
-+ if (fp->f_flags & O_NONBLOCK)
-+ return -EAGAIN;
-+
-+
-+ if (wait_event_interruptible(sc_device->snoop_waitq,
-+ !list_empty(&sc_device->snoop_queue)))
-+ return -EINTR;
-+
-+ spin_lock_irqsave(&sc_device->snoop_lock, flags);
-+ }
-+
-+ if (!list_empty(&(sc_device->snoop_queue))) {
-+ packet = list_entry(sc_device->snoop_queue.next,
-+ struct snoop_packet, list);
-+ list_del(&packet->list);
-+ spin_unlock_irqrestore(&sc_device->snoop_lock, flags);
-+ if (pkt_len >= packet->total_len) {
-+ if (copy_to_user(data, packet->data,
-+ packet->total_len))
-+ ret = -EFAULT;
-+ else
-+ ret = packet->total_len;
-+ } else
-+ ret = -EINVAL;
-+
-+ kfree(packet);
-+ } else
-+ spin_unlock_irqrestore(&sc_device->snoop_lock, flags);
-+
-+ return ret;
-+}
-+
-+static long qib_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
-+{
-+ struct qib_aux_device *sc_device = fp->private_data;
-+ struct qib_pportdata *ppd = sc_device->pport;
-+ struct qib_devdata *dd = ppd->dd;
-+ void *filter_value = NULL;
-+ long ret = 0;
-+ int value = 0;
-+ u8 physState = 0;
-+ u8 linkState = 0;
-+ u16 devState = 0;
-+ unsigned long flags = 0;
-+ unsigned long *argp = NULL;
-+ struct qib_packet_filter_command filter_cmd = {0};
-+
-+ if (((_IOC_DIR(cmd) & _IOC_READ)
-+ && !access_ok(VERIFY_WRITE, (void __user *)arg, _IOC_SIZE(cmd)))
-+ || ((_IOC_DIR(cmd) & _IOC_WRITE)
-+ && !access_ok(VERIFY_READ, (void __user *)arg, _IOC_SIZE(cmd)))) {
-+ ret = -EFAULT;
-+ } else if (!capable(CAP_SYS_ADMIN)) {
-+ ret = -EPERM;
-+ } else if (sc_device != (&ppd->sc_device[QIB_SNOOP_DEV_INDEX])
-+ && cmd != QIB_SNOOP_IOCCLEARQUEUE
-+ && cmd != QIB_SNOOP_IOCCLEARFILTER
-+ && cmd != QIB_SNOOP_IOCSETFILTER) {
-+ /* Capture devices are allowed only 3 operations
-+ * 1.Clear capture queue
-+ * 2.Clear capture filter
-+ * 3.Set capture filter
-+ * Other are invalid.
-+ */
-+ ret = -EINVAL;
-+ } else {
-+ switch (cmd) {
-+ case QIB_SNOOP_IOCSETLINKSTATE:
-+ ret = __get_user(value, (int __user *) arg);
-+ if (ret != 0)
-+ break;
-+
-+ physState = (value >> 4) & 0xF;
-+ linkState = value & 0xF;
-+
-+ switch (linkState) {
-+ case IB_PORT_NOP:
-+ if (physState == 0)
-+ break;
-+ /* fall through */
-+ case IB_PORT_DOWN:
-+ switch (physState) {
-+ case 0:
-+ if (dd->f_ibphys_portstate &&
-+ (dd->f_ibphys_portstate(ppd->lastibcstat)
-+ & 0xF & IB_PHYSPORTSTATE_SLEEP))
-+ devState =
-+ QIB_IB_LINKDOWN_SLEEP;
-+ else
-+ devState =
-+ QIB_IB_LINKDOWN;
-+ break;
-+ case 1:
-+ devState = QIB_IB_LINKDOWN_SLEEP;
-+ break;
-+ case 2:
-+ devState = QIB_IB_LINKDOWN;
-+ break;
-+ case 3:
-+ devState = QIB_IB_LINKDOWN_DISABLE;
-+ break;
-+ default:
-+ ret = -EINVAL;
-+ goto done;
-+ break;
-+ }
-+ ret = qib_set_linkstate(ppd, devState);
-+ break;
-+ case IB_PORT_ARMED:
-+ if (!(dd->flags &
-+ (QIB_IB_LINKARM | QIB_IB_LINKACTIVE))) {
-+ ret = -EINVAL;
-+ break;
-+ }
-+ ret = qib_set_linkstate(ppd, QIB_IB_LINKARM);
-+ break;
-+ case IB_PORT_ACTIVE:
-+ if (!(dd->flags & QIB_IB_LINKARM)) {
-+ ret = -EINVAL;
-+ break;
-+ }
-+ ret = qib_set_linkstate(ppd, QIB_IB_LINKACTIVE);
-+ break;
-+ default:
-+ ret = -EINVAL;
-+ break;
-+ }
-+
-+ if (ret)
-+ break;
-+ /* fall through */
-+
-+ case QIB_SNOOP_IOCGETLINKSTATE:
-+ value = dd->f_ibphys_portstate(ppd->lastibcstat);
-+ value <<= 4;
-+ value |= dd->f_iblink_state(ppd->lastibcstat);
-+ ret = __put_user(value, (int __user *)arg);
-+ break;
-+
-+ case QIB_SNOOP_IOCCLEARQUEUE:
-+ spin_lock_irqsave(&sc_device->snoop_lock, flags);
-+ drain_snoop_list(sc_device);
-+ spin_unlock_irqrestore(&sc_device->snoop_lock, flags);
-+ break;
-+
-+ case QIB_SNOOP_IOCCLEARFILTER:
-+ spin_lock_irqsave(&sc_device->snoop_lock, flags);
-+ if (ppd->filter_callback) {
-+ /* Drain packets first */
-+ drain_snoop_list(sc_device);
-+ ppd->filter_callback = NULL;
-+ }
-+ kfree(ppd->filter_value);
-+ ppd->filter_value = NULL;
-+ spin_unlock_irqrestore(&sc_device->snoop_lock, flags);
-+ break;
-+
-+ case QIB_SNOOP_IOCSETFILTER:
-+ /* just copy command structure */
-+ argp = (unsigned long *)arg;
-+ ret = copy_from_user(&filter_cmd, (u8 *)argp,
-+ sizeof(filter_cmd));
-+ if (ret < 0) {
-+ pr_alert("Error copying filter command\n");
-+ break;
-+ }
-+ if (filter_cmd.opcode >= QIB_MAX_FILTERS) {
-+ pr_alert("Invalid opcode in request\n");
-+ ret = -EINVAL;
-+ break;
-+ }
-+ filter_value = kzalloc(
-+ filter_cmd.length * sizeof(u8),
-+ GFP_KERNEL);
-+ if (!filter_value) {
-+ pr_alert("Not enough memory\n");
-+ ret = -ENOMEM;
-+ break;
-+ }
-+ /* copy remaining data from userspace */
-+ ret = copy_from_user((u8 *)filter_value,
-+ (u8 *)filter_cmd.value_ptr,
-+ filter_cmd.length);
-+ if (ret < 0) {
-+ kfree(filter_value);
-+ pr_alert("Error copying filter data\n");
-+ break;
-+ }
-+ /* Drain packets first */
-+ spin_lock_irqsave(&sc_device->snoop_lock, flags);
-+ drain_snoop_list(sc_device);
-+ ppd->filter_callback =
-+ qib_filters[filter_cmd.opcode].filter;
-+ /* just in case we see back to back sets */
-+ kfree(ppd->filter_value);
-+ ppd->filter_value = filter_value;
-+ spin_unlock_irqrestore(&sc_device->snoop_lock, flags);
-+ break;
-+
-+ default:
-+ ret = -ENOTTY;
-+ break;
-+ }
-+ }
-+done:
-+ return ret;
-+}
-+
-+
-+static ssize_t qib_pio_send_pkt(struct qib_pportdata *ppd,
-+ u32 *data, u32 pkt_len)
-+{
-+ int i;
-+ u64 pbc;
-+ u32 __iomem *piobuf;
-+ u32 pnum, control, len;
-+ struct qib_devdata *dd = ppd->dd;
-+ u32 dwords = pkt_len >> 2;
-+ unsigned long flags;
-+ ssize_t ret = -EINVAL;
-+
-+ i = 0;
-+ len = dwords + 1;
-+ control = dd->f_setpbc_control(ppd, len, 0,
-+ (((u8 *)data)[0] >> 4) & 0xf);
-+ pbc = ((u64) control << 32) | len;
-+ while (!(piobuf = dd->f_getsendbuf(ppd, pbc, &pnum))) {
-+ if (i > 15) {
-+ ret = -ENOMEM;
-+ goto Err;
-+ }
-+ i++;
-+ /* lets try to flush all of it */
-+ dd->f_sendctrl(ppd, QIB_SENDCTRL_DISARM_ALL);
-+ udelay(100);
-+ }
-+ spin_lock_irqsave(&ppd->snoop_write_lock, flags);
-+ /* disable header check on this packet, since it can't be valid */
-+ dd->f_txchk_change(dd, pnum, 1, TXCHK_CHG_TYPE_DIS1, NULL);
-+ writeq(pbc, piobuf);
-+ qib_flush_wc();
-+ if (dd->flags & QIB_PIO_FLUSH_WC) {
-+ qib_flush_wc();
-+ qib_pio_copy(piobuf + 2, data, dwords - 1);
-+ qib_flush_wc();
-+ __raw_writel(data[dwords - 1], piobuf + dwords + 1);
-+ } else
-+ qib_pio_copy(piobuf + 2, data, dwords);
-+ if (dd->flags & QIB_USE_SPCL_TRIG) {
-+ u32 spcl_off = (pnum >= dd->piobcnt2k) ? 2047 : 1023;
-+
-+ qib_flush_wc();
-+ __raw_writel(0xaebecede, piobuf + spcl_off);
-+ }
-+ qib_sendbuf_done(dd, pnum);
-+ qib_flush_wc();
-+ /* and re-enable hdr check */
-+ dd->f_txchk_change(dd, pnum, 1, TXCHK_CHG_TYPE_ENAB1, NULL);
-+ spin_unlock_irqrestore(&ppd->snoop_write_lock, flags);
-+ ret = pkt_len;
-+Err:
-+ return ret;
-+}
-+
-+
-+static ssize_t qib_snoop_write(struct file *fp, const char __user *data,
-+ size_t pkt_len, loff_t *off)
-+{
-+ struct qib_aux_device *sc_device = fp->private_data;
-+ struct qib_pportdata *ppd = sc_device->pport;
-+ struct qib_devdata *dd = ppd->dd;
-+ ssize_t ret = 0;
-+ u32 *buffer = NULL;
-+ u32 plen, clen;
-+
-+ /* capture device should not be entertaining writes */
-+ if (sc_device != (&ppd->sc_device[QIB_SNOOP_DEV_INDEX])) {
-+ ret = -EINVAL;
-+ goto bail;
-+ }
-+
-+ if (pkt_len == 0)
-+ goto bail;
-+
-+ if (pkt_len & 3) {
-+ ret = -EINVAL;
-+ goto bail;
-+ }
-+
-+ clen = pkt_len >> 2;
-+
-+ if (!dd || !(dd->flags & QIB_PRESENT) ||
-+ !dd->kregbase) {
-+ ret = -ENODEV;
-+ goto bail;
-+ }
-+
-+ if (!(dd->flags & QIB_INITTED)) {
-+ /* no hardware, freeze, etc. */
-+ ret = -ENODEV;
-+ goto bail;
-+ }
-+
-+ plen = sizeof(u32) + pkt_len;
-+
-+ if ((plen + 4) > ppd->ibmaxlen) {
-+ ret = -EINVAL;
-+ goto bail; /* before writing pbc */
-+ }
-+
-+ buffer = vmalloc(plen);
-+ if (!buffer) {
-+ ret = -ENOMEM;
-+ goto bail;
-+ }
-+ if (copy_from_user(buffer,
-+ (const void __user *) (unsigned long) data, pkt_len)) {
-+ ret = -EFAULT;
-+ goto bail;
-+ }
-+
-+ ret = qib_pio_send_pkt(ppd, buffer, pkt_len);
-+
-+bail:
-+ vfree(buffer);
-+
-+ return ret;
-+}
-+
-+int snoop_get_header_size(struct qib_devdata *dd,
-+ struct qib_ib_header *hdr,
-+ void *data, u32 tlen)
-+{
-+ int lnh, header_size = -1;
-+ u8 opcode, opcode_major;
-+ struct qib_other_headers *ohdr;
-+
-+ lnh = (be16_to_cpu(hdr->lrh[0]) & 3);
-+
-+ if (lnh == QIB_LRH_BTH)
-+ ohdr = &hdr->u.oth;
-+ else if (lnh == QIB_LRH_GRH)
-+ ohdr = &hdr->u.l.oth;
-+ else
-+ goto bail;
-+
-+ opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
-+
-+ opcode_major = (opcode >> 5) & 0x7;
-+
-+ switch (opcode_major) {
-+ case 0x03: /* UD */
-+ if (lnh == QIB_LRH_BTH)
-+ header_size = 8 + 12 + 8 /* LRH + BTH + DETH */;
-+ else if (lnh == QIB_LRH_GRH) {
-+
-+ /* LRH + GRH + BTH + DETH */;
-+ header_size = 8 + 40 + 12 + 8;
-+ /* Some of the header data is in the data segment */
-+ if (dd->rcvhdrentsize == 16)
-+ header_size -= 12;
-+ } else
-+ header_size = -1;
-+
-+ break;
-+ case 0x0: /* RC */
-+ case 0x1: /* UC */
-+ case 0x2: /* RD */
-+ default:
-+ header_size = -1;
-+ break;
-+ }
-+
-+bail:
-+ return header_size;
-+}
-+
-+static void qib_snoop_list_add_tail(struct snoop_packet *packet,
-+ struct qib_pportdata *ppd,
-+ int dev_index)
-+{
-+ unsigned long flags = 0;
-+ struct qib_aux_device *sc_device = &ppd->sc_device[dev_index];
-+
-+ spin_lock_irqsave(&sc_device->snoop_lock, flags);
-+ if (likely((dev_index == QIB_CAPTURE_DEV_INDEX &&
-+ (ppd->mode_flag & QIB_PORT_CAPTURE_MODE)) ||
-+ (dev_index == QIB_SNOOP_DEV_INDEX &&
-+ (ppd->mode_flag & QIB_PORT_SNOOP_MODE))))
-+ list_add_tail(&(packet->list), &sc_device->snoop_queue);
-+ spin_unlock_irqrestore(&sc_device->snoop_lock, flags);
-+ wake_up_interruptible(&sc_device->snoop_waitq);
-+}
-+
-+void qib_snoop_send_queue_packet(struct qib_pportdata *ppd,
-+ struct snoop_packet *packet)
-+{
-+ /* If we are dealing with mix mode then we need to make another copy
-+ * of same packet and queue it in snoop device as well.
-+ * However if we do not get sufficient memory here then we just
-+ * add packet to capture queue by default so that we atleast have one
-+ * packet with us in capture queue.
-+ */
-+ if (unlikely(ppd->mode_flag ==
-+ (QIB_PORT_SNOOP_MODE | QIB_PORT_CAPTURE_MODE))) {
-+ struct snoop_packet *pcopy;
-+ pcopy = kmalloc(sizeof(*pcopy) + packet->total_len, GFP_ATOMIC);
-+ if (pcopy != NULL) {
-+ memcpy(pcopy, packet,
-+ packet->total_len + sizeof(*pcopy));
-+ qib_snoop_list_add_tail(pcopy, ppd,
-+ QIB_SNOOP_DEV_INDEX);
-+ }
-+ qib_snoop_list_add_tail(packet, ppd, QIB_CAPTURE_DEV_INDEX);
-+ } else if (ppd->mode_flag == QIB_PORT_CAPTURE_MODE)
-+ qib_snoop_list_add_tail(packet, ppd, QIB_CAPTURE_DEV_INDEX);
-+ else if (ppd->mode_flag == QIB_PORT_SNOOP_MODE)
-+ qib_snoop_list_add_tail(packet, ppd, QIB_SNOOP_DEV_INDEX);
-+}
-+
-+/*
-+ * qib_snoop_rcv_queue_packet - receive a packet for snoop interface
-+ * @port - Hca port on which this packet is received.
-+ * @rhdr - Packet header
-+ * @data - Packet data/payloaa
-+ * @tlen - total length of packet including header and payload.
-+ *
-+ * Called on for every packet received when snooping/mix mode is turned on
-+ * Copies received packet to internal buffer and appends it to
-+ * packet list.
-+ *
-+ * Returns,
-+ * 0 if this packet needs to be forwarded by driver
-+ * 1 if this packet needs to be dropped by driver
-+ */
-+
-+int qib_snoop_rcv_queue_packet(struct qib_pportdata *port, void *rhdr,
-+ void *data, u32 tlen)
-+{
-+ int header_size = 0;
-+ struct qib_ib_header *hdr = rhdr;
-+ struct snoop_packet *packet = NULL;
-+
-+ header_size = snoop_get_header_size(port->dd, hdr, data, tlen);
-+ if (header_size <= 0)
-+ return 0;
-+
-+ /* qib_snoop_send_queue_packet takes care or mix mode,
-+ * so just return from here.
-+ */
-+ if (port->mode_flag == (QIB_PORT_SNOOP_MODE | QIB_PORT_CAPTURE_MODE))
-+ return 0;
-+
-+ packet = kmalloc(sizeof(struct snoop_packet) + tlen,
-+ GFP_ATOMIC);
-+ if (likely(packet)) {
-+ memcpy(packet->data, rhdr, header_size);
-+ memcpy(packet->data + header_size, data,
-+ tlen - header_size);
-+ packet->total_len = tlen;
-+ qib_snoop_list_add_tail(packet, port, QIB_SNOOP_DEV_INDEX);
-+ return 1;
-+ }
-+
-+ return 0;
-+}
-+
-+static int qib_filter_lid(void *ibhdr, void *packet_data, void *value)
-+{
-+ struct qib_ib_header *hdr = (struct qib_ib_header *)ibhdr;
-+ if (*((u16 *)value) == be16_to_cpu(hdr->lrh[3]))
-+ return 0; /* matched */
-+ return 1; /* Not matched */
-+}
-+
-+static int qib_filter_dlid(void *ibhdr, void *packet_data, void *value)
-+{
-+ struct qib_ib_header *hdr = (struct qib_ib_header *)ibhdr;
-+ if (*((u16 *)value) == be16_to_cpu(hdr->lrh[1]))
-+ return 0;
-+ return 1;
-+}
-+
-+static int qib_filter_mad_mgmt_class(void *ibhdr, void *packet_data,
-+ void *value)
-+{
-+ struct qib_ib_header *hdr = (struct qib_ib_header *)ibhdr;
-+ struct qib_other_headers *ohdr = NULL;
-+ struct ib_smp *smp = NULL;
-+ u32 qpn = 0;
-+
-+ /* packet_data could be null if only header is captured */
-+ if (packet_data == NULL)
-+ return 1;
-+ /* Check for GRH */
-+ if ((be16_to_cpu(hdr->lrh[0]) & 3) == QIB_LRH_BTH)
-+ ohdr = &hdr->u.oth; /* LRH + BTH + DETH */
-+ else
-+ ohdr = &hdr->u.l.oth; /* LRH + GRH + BTH + DETH */
-+ qpn = be32_to_cpu(ohdr->bth[1]) & 0x00FFFFFF;
-+ if (qpn <= 1) {
-+ smp = (struct ib_smp *)packet_data;
-+ if (*((u8 *)value) == smp->mgmt_class)
-+ return 0;
-+ else
-+ return 1;
-+ }
-+ return 1;
-+}
-+
-+static int qib_filter_qp_number(void *ibhdr, void *packet_data, void *value)
-+{
-+
-+ struct qib_ib_header *hdr = (struct qib_ib_header *)ibhdr;
-+ struct qib_other_headers *ohdr = NULL;
-+
-+ /* Check for GRH */
-+ if ((be16_to_cpu(hdr->lrh[0]) & 3) == QIB_LRH_BTH)
-+ ohdr = &hdr->u.oth; /* LRH + BTH + DETH */
-+ else
-+ ohdr = &hdr->u.l.oth; /* LRH + GRH + BTH + DETH */
-+ if (*((u32 *)value) == (be32_to_cpu(ohdr->bth[1]) & 0x00FFFFFF))
-+ return 0;
-+ return 1;
-+}
-+
-+
-+static int qib_filter_ibpacket_type(void *ibhdr, void *packet_data,
-+ void *value)
-+{
-+ u32 lnh = 0;
-+ u8 opcode = 0;
-+ struct qib_ib_header *hdr = (struct qib_ib_header *)ibhdr;
-+ struct qib_other_headers *ohdr = NULL;
-+
-+ lnh = (be16_to_cpu(hdr->lrh[0]) & 3);
-+
-+ if (lnh == QIB_LRH_BTH)
-+ ohdr = &hdr->u.oth;
-+ else if (lnh == QIB_LRH_GRH)
-+ ohdr = &hdr->u.l.oth;
-+ else
-+ return 1;
-+
-+ opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
-+
-+ if (*((u8 *)value) == ((opcode >> 5) & 0x7))
-+ return 0;
-+ return 1;
-+}
-+
-+static int qib_filter_ib_service_level(void *ibhdr, void *packet_data,
-+ void *value)
-+{
-+ struct qib_ib_header *hdr = (struct qib_ib_header *)ibhdr;
-+
-+ if ((*((u8 *)value)) == (be16_to_cpu(hdr->lrh[0] >> 4) & 0xF))
-+ return 0;
-+ return 1;
-+}
-+
-+static int qib_filter_ib_pkey(void *ibhdr, void *packet_data, void *value)
-+{
-+
-+ u32 lnh = 0;
-+ struct qib_ib_header *hdr = (struct qib_ib_header *)ibhdr;
-+ struct qib_other_headers *ohdr = NULL;
-+
-+ lnh = (be16_to_cpu(hdr->lrh[0]) & 3);
-+ if (lnh == QIB_LRH_BTH)
-+ ohdr = &hdr->u.oth;
-+ else if (lnh == QIB_LRH_GRH)
-+ ohdr = &hdr->u.l.oth;
-+ else
-+ return 1;
-+
-+ /* P_key is 16-bit entity, however top most bit indicates
-+ * type of membership. 0 for limited and 1 for Full.
-+ * Limited members cannot accept information from other
-+ * Limited members, but communication is allowed between
-+ * every other combination of membership.
-+ * Hence we'll omitt comparing top-most bit while filtering
-+ */
-+
-+ if ((*(u16 *)value & 0x7FFF) ==
-+ ((be32_to_cpu(ohdr->bth[0])) & 0x7FFF))
-+ return 0;
-+ return 1;
-+}
-diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
-index d0a0ea0..a98635d 100644
---- a/drivers/infiniband/hw/qib/qib_user_sdma.c
-+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
-@@ -1,4 +1,5 @@
- /*
-+ * Copyright (c) 2013 Intel Corporation. All rights reserved.
- * Copyright (c) 2007, 2008, 2009 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
-@@ -52,83 +53,65 @@
- /* attempt to drain the queue for 5secs */
- #define QIB_USER_SDMA_DRAIN_TIMEOUT 500
-
--struct qib_user_sdma_pkt {
-- struct list_head list; /* list element */
--
-- u8 tiddma; /* if this is NEW tid-sdma */
-- u8 largepkt; /* this is large pkt from kmalloc */
-- u16 frag_size; /* frag size used by PSM */
-- u16 index; /* last header index or push index */
-- u16 naddr; /* dimension of addr (1..3) ... */
-- u16 addrlimit; /* addr array size */
-- u16 tidsmidx; /* current tidsm index */
-- u16 tidsmcount; /* tidsm array item count */
-- u16 payload_size; /* payload size so far for header */
-- u32 bytes_togo; /* bytes for processing */
-- u32 counter; /* sdma pkts queued counter for this entry */
-- struct qib_tid_session_member *tidsm; /* tid session member array */
-- struct qib_user_sdma_queue *pq; /* which pq this pkt belongs to */
-- u64 added; /* global descq number of entries */
--
-- struct {
-- u16 offset; /* offset for kvaddr, addr */
-- u16 length; /* length in page */
-- u16 first_desc; /* first desc */
-- u16 last_desc; /* last desc */
-- u16 put_page; /* should we put_page? */
-- u16 dma_mapped; /* is page dma_mapped? */
-- u16 dma_length; /* for dma_unmap_page() */
-- u16 padding;
-- struct page *page; /* may be NULL (coherent mem) */
-- void *kvaddr; /* FIXME: only for pio hack */
-- dma_addr_t addr;
-- } addr[4]; /* max pages, any more and we coalesce */
-+/*
-+ * track how many times a process open this driver.
-+ */
-+struct rb_root qib_user_sdma_rb_root = RB_ROOT;
-+
-+struct qib_user_sdma_rb_node {
-+ struct rb_node node;
-+ int refcount;
-+ pid_t pid;
- };
-
--struct qib_user_sdma_queue {
-- /*
-- * pkts sent to dma engine are queued on this
-- * list head. the type of the elements of this
-- * list are struct qib_user_sdma_pkt...
-- */
-- struct list_head sent;
-+static struct qib_user_sdma_rb_node *
-+qib_user_sdma_rb_search(struct rb_root *root, pid_t pid)
-+{
-+ struct qib_user_sdma_rb_node *sdma_rb_node;
-+ struct rb_node *node = root->rb_node;
-+
-+ while (node) {
-+ sdma_rb_node = container_of(node,
-+ struct qib_user_sdma_rb_node, node);
-+ if (pid < sdma_rb_node->pid)
-+ node = node->rb_left;
-+ else if (pid > sdma_rb_node->pid)
-+ node = node->rb_right;
-+ else
-+ return sdma_rb_node;
-+ }
-+ return NULL;
-+}
-
-- /*
-- * Because above list will be accessed by both process and
-- * signal handler, we need a spinlock for it.
-- */
-- spinlock_t sent_lock ____cacheline_aligned_in_smp;
--
-- /* headers with expected length are allocated from here... */
-- char header_cache_name[64];
-- struct dma_pool *header_cache;
--
-- /* packets are allocated from the slab cache... */
-- char pkt_slab_name[64];
-- struct kmem_cache *pkt_slab;
--
-- /* as packets go on the queued queue, they are counted... */
-- u32 counter;
-- u32 sent_counter;
-- /* pending packets, not sending yet */
-- u32 num_pending;
-- /* sending packets, not complete yet */
-- u32 num_sending;
-- /* global descq number of entry of last sending packet */
-- u64 added;
--
-- /* dma page table */
-- struct rb_root dma_pages_root;
--
-- /* protect everything above... */
-- struct mutex lock;
--};
-+static int
-+qib_user_sdma_rb_insert(struct rb_root *root, struct qib_user_sdma_rb_node *new)
-+{
-+ struct rb_node **node = &(root->rb_node);
-+ struct rb_node *parent = NULL;
-+ struct qib_user_sdma_rb_node *got;
-+
-+ while (*node) {
-+ got = container_of(*node, struct qib_user_sdma_rb_node, node);
-+ parent = *node;
-+ if (new->pid < got->pid)
-+ node = &((*node)->rb_left);
-+ else if (new->pid > got->pid)
-+ node = &((*node)->rb_right);
-+ else
-+ return 0;
-+ }
-+
-+ rb_link_node(&new->node, parent, node);
-+ rb_insert_color(&new->node, root);
-+ return 1;
-+}
-
- struct qib_user_sdma_queue *
- qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
- {
- struct qib_user_sdma_queue *pq =
- kmalloc(sizeof(struct qib_user_sdma_queue), GFP_KERNEL);
-+ struct qib_user_sdma_rb_node *sdma_rb_node;
-
- if (!pq)
- goto done;
-@@ -138,6 +121,7 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
- pq->num_pending = 0;
- pq->num_sending = 0;
- pq->added = 0;
-+ pq->sdma_rb_node = NULL;
-
- INIT_LIST_HEAD(&pq->sent);
- spin_lock_init(&pq->sent_lock);
-@@ -163,8 +147,30 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
-
- pq->dma_pages_root = RB_ROOT;
-
-+ sdma_rb_node = qib_user_sdma_rb_search(&qib_user_sdma_rb_root,
-+ current->pid);
-+ if (sdma_rb_node) {
-+ sdma_rb_node->refcount++;
-+ } else {
-+ int ret;
-+ sdma_rb_node = kmalloc(sizeof(
-+ struct qib_user_sdma_rb_node), GFP_KERNEL);
-+ if (!sdma_rb_node)
-+ goto err_rb;
-+
-+ sdma_rb_node->refcount = 1;
-+ sdma_rb_node->pid = current->pid;
-+
-+ ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root,
-+ sdma_rb_node);
-+ BUG_ON(ret == 0);
-+ }
-+ pq->sdma_rb_node = sdma_rb_node;
-+
- goto done;
-
-+err_rb:
-+ dma_pool_destroy(pq->header_cache);
- err_slab:
- kmem_cache_destroy(pq->pkt_slab);
- err_kfree:
-@@ -175,12 +181,12 @@ done:
- return pq;
- }
-
--static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
-- int i, u16 offset, u16 len,
-- u16 first_desc, u16 last_desc,
-- u16 put_page, u16 dma_mapped,
-- struct page *page, void *kvaddr,
-- dma_addr_t dma_addr, u16 dma_length)
-+void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
-+ int i, u16 offset, u16 len,
-+ u16 first_desc, u16 last_desc,
-+ u16 put_page, u16 dma_mapped,
-+ struct page *page, void *kvaddr,
-+ dma_addr_t dma_addr, u16 dma_length)
- {
- pkt->addr[i].offset = offset;
- pkt->addr[i].length = len;
-@@ -194,7 +200,7 @@ static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
- pkt->addr[i].dma_length = dma_length;
- }
-
--static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq,
-+void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq,
- size_t len, dma_addr_t *dma_addr)
- {
- void *hdr;
-@@ -216,11 +222,11 @@ static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq,
- return hdr;
- }
-
--static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
-- struct qib_user_sdma_queue *pq,
-- struct qib_user_sdma_pkt *pkt,
-- struct page *page, u16 put,
-- u16 offset, u16 len, void *kvaddr)
-+int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
-+ struct qib_user_sdma_queue *pq,
-+ struct qib_user_sdma_pkt *pkt,
-+ struct page *page, u16 put,
-+ u16 offset, u16 len, void *kvaddr)
- {
- __le16 *pbc16;
- void *pbcvaddr;
-@@ -235,21 +241,27 @@ static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
- int ret = 0;
-
- if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
-- /*
-- * dma mapping error, pkt has not managed
-- * this page yet, return the page here so
-- * the caller can ignore this page.
-- */
-- if (put) {
-- put_page(page);
-- } else {
-- /* coalesce case */
-- kunmap(page);
-- __free_page(page);
-+#ifdef QIB_CONFIG_KNX
-+ if (!pkt->remote) {
-+#endif
-+ /*
-+ * dma mapping error, pkt has not managed
-+ * this page yet, return the page here so
-+ * the caller can ignore this page.
-+ */
-+ if (put) {
-+ put_page(page);
-+ } else {
-+ /* coalesce case */
-+ kunmap(page);
-+ __free_page(page);
-+ }
-+ ret = -ENOMEM;
-+ goto done;
- }
-- ret = -ENOMEM;
-- goto done;
-+#ifdef QIB_CONFIG_KNX
- }
-+#endif
- offset = 0;
- dma_mapped = 1;
-
-@@ -551,13 +563,19 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev,
- pkt->addr[i].dma_length,
- DMA_TO_DEVICE);
-
-- if (pkt->addr[i].kvaddr)
-- kunmap(pkt->addr[i].page);
-+#ifdef QIB_CONFIG_KNX
-+ if (!pkt->remote) {
-+#endif
-+ if (pkt->addr[i].kvaddr)
-+ kunmap(pkt->addr[i].page);
-
-- if (pkt->addr[i].put_page)
-- put_page(pkt->addr[i].page);
-- else
-- __free_page(pkt->addr[i].page);
-+ if (pkt->addr[i].put_page)
-+ put_page(pkt->addr[i].page);
-+ else
-+ __free_page(pkt->addr[i].page);
-+#ifdef QIB_CONFIG_KNX
-+ }
-+#endif
- } else if (pkt->addr[i].kvaddr) {
- /* for headers */
- if (pkt->addr[i].dma_mapped) {
-@@ -697,9 +715,9 @@ static int qib_user_sdma_init_payload(const struct qib_devdata *dd,
- }
-
- /* free a packet list -- return counter value of last packet */
--static void qib_user_sdma_free_pkt_list(struct device *dev,
-- struct qib_user_sdma_queue *pq,
-- struct list_head *list)
-+void qib_user_sdma_free_pkt_list(struct device *dev,
-+ struct qib_user_sdma_queue *pq,
-+ struct list_head *list)
- {
- struct qib_user_sdma_pkt *pkt, *pkt_next;
-
-@@ -709,6 +727,10 @@ static void qib_user_sdma_free_pkt_list(struct device *dev,
- for (i = 0; i < pkt->naddr; i++)
- qib_user_sdma_free_pkt_frag(dev, pq, pkt, i);
-
-+#ifdef QIB_CONFIG_KNX
-+ if (pkt->remote)
-+ qib_knx_sdma_free_pkt(pkt);
-+#endif
- if (pkt->largepkt)
- kfree(pkt);
- else
-@@ -892,6 +914,7 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
- pkt->payload_size = 0;
- pkt->counter = counter;
- pkt->tiddma = tiddma;
-+ pkt->remote = 0;
-
- /* setup the first header */
- qib_user_sdma_init_frag(pkt, 0, /* index */
-@@ -967,8 +990,8 @@ static void qib_user_sdma_set_complete_counter(struct qib_user_sdma_queue *pq,
- }
-
- /* try to clean out queue -- needs pq->lock */
--static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd,
-- struct qib_user_sdma_queue *pq)
-+int qib_user_sdma_queue_clean(struct qib_pportdata *ppd,
-+ struct qib_user_sdma_queue *pq)
- {
- struct qib_devdata *dd = ppd->dd;
- struct list_head free_list;
-@@ -1021,13 +1044,18 @@ void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq)
- if (!pq)
- return;
-
-- kmem_cache_destroy(pq->pkt_slab);
-+ pq->sdma_rb_node->refcount--;
-+ if (pq->sdma_rb_node->refcount == 0) {
-+ rb_erase(&pq->sdma_rb_node->node, &qib_user_sdma_rb_root);
-+ kfree(pq->sdma_rb_node);
-+ }
- dma_pool_destroy(pq->header_cache);
-+ kmem_cache_destroy(pq->pkt_slab);
- kfree(pq);
- }
-
- /* clean descriptor queue, returns > 0 if some elements cleaned */
--static int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd)
-+int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd)
- {
- int ret;
- unsigned long flags;
-@@ -1238,30 +1266,56 @@ retry:
- }
-
- /* pq->lock must be held, get packets on the wire... */
--static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
-- struct qib_user_sdma_queue *pq,
-- struct list_head *pktlist, int count)
-+int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
-+ struct qib_user_sdma_queue *pq,
-+ struct list_head *pktlist, int count)
- {
-- int ret = 0;
- unsigned long flags;
-
- if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE)))
- return -ECOMM;
-
-- spin_lock_irqsave(&ppd->sdma_lock, flags);
--
-- if (unlikely(!__qib_sdma_running(ppd))) {
-- ret = -ECOMM;
-- goto unlock;
-+ /* non-blocking mode */
-+ if (pq->sdma_rb_node->refcount > 1) {
-+ spin_lock_irqsave(&ppd->sdma_lock, flags);
-+ if (unlikely(!__qib_sdma_running(ppd))) {
-+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
-+ return -ECOMM;
-+ }
-+ pq->num_pending += count;
-+ list_splice_tail_init(pktlist, &ppd->sdma_userpending);
-+ qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
-+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
-+ return 0;
- }
-
-+ /* In this case, descriptors from this process are not
-+ * linked to ppd pending queue, interrupt handler
-+ * won't update this process, it is OK to directly
-+ * modify without sdma lock.
-+ */
-+
-+
- pq->num_pending += count;
-- list_splice_tail_init(pktlist, &ppd->sdma_userpending);
-- qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
-+ /*
-+ * Blocking mode for single rail process, we must
-+ * release/regain sdma_lock to give other process
-+ * chance to make progress. This is important for
-+ * performance.
-+ */
-+ do {
-+ spin_lock_irqsave(&ppd->sdma_lock, flags);
-+ if (unlikely(!__qib_sdma_running(ppd))) {
-+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
-+ return -ECOMM;
-+ }
-+ qib_user_sdma_send_desc(ppd, pktlist);
-+ if (!list_empty(pktlist))
-+ qib_sdma_make_progress(ppd);
-+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
-+ } while (!list_empty(pktlist));
-
--unlock:
-- spin_unlock_irqrestore(&ppd->sdma_lock, flags);
-- return ret;
-+ return 0;
- }
-
- int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
-@@ -1291,7 +1345,7 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
- qib_user_sdma_queue_clean(ppd, pq);
-
- while (dim) {
-- int mxp = 8;
-+ int mxp = 1;
- int ndesc = 0;
-
- down_write(¤t->mm->mmap_sem);
-diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.h b/drivers/infiniband/hw/qib/qib_user_sdma.h
-index ce8cbaf..93ce40b 100644
---- a/drivers/infiniband/hw/qib/qib_user_sdma.h
-+++ b/drivers/infiniband/hw/qib/qib_user_sdma.h
-@@ -31,12 +31,108 @@
- */
- #include <linux/device.h>
-
--struct qib_user_sdma_queue;
-+struct qib_user_sdma_pkt {
-+ struct list_head list; /* list element */
-+
-+ u8 tiddma; /* if this is NEW tid-sdma */
-+ u8 largepkt; /* this is large pkt from kmalloc */
-+ u16 frag_size; /* frag size used by PSM */
-+ u16 index; /* last header index or push index */
-+ u16 naddr; /* dimension of addr (1..3) ... */
-+ u16 addrlimit; /* addr array size */
-+ u16 tidsmidx; /* current tidsm index */
-+ u16 tidsmcount; /* tidsm array item count */
-+ u16 payload_size; /* payload size so far for header */
-+ u32 bytes_togo; /* bytes for processing */
-+ u32 counter; /* sdma pkts queued counter for this entry */
-+ struct qib_tid_session_member *tidsm; /* tid session member array */
-+ struct qib_user_sdma_queue *pq; /* which pq this pkt belongs to */
-+ u64 added; /* global descq number of entries */
-+#ifdef QIB_CONFIG_KNX
-+ u64 remote; /* does the packet original on the host */
-+#endif
-+
-+ struct {
-+ u16 offset; /* offset for kvaddr, addr */
-+ u16 length; /* length in page */
-+ u16 first_desc; /* first desc */
-+ u16 last_desc; /* last desc */
-+ u16 put_page; /* should we put_page? */
-+ u16 dma_mapped; /* is page dma_mapped? */
-+ u16 dma_length; /* for dma_unmap_page() */
-+ u16 padding;
-+ struct page *page; /* may be NULL (coherent mem) */
-+ void *kvaddr; /* FIXME: only for pio hack */
-+ dma_addr_t addr;
-+ } addr[4]; /* max pages, any more and we coalesce */
-+};
-+
-+struct qib_user_sdma_queue {
-+ /*
-+ * pkts sent to dma engine are queued on this
-+ * list head. the type of the elements of this
-+ * list are struct qib_user_sdma_pkt...
-+ */
-+ struct list_head sent;
-+
-+ /*
-+ * Because above list will be accessed by both process and
-+ * signal handler, we need a spinlock for it.
-+ */
-+ spinlock_t sent_lock ____cacheline_aligned_in_smp;
-+
-+ /* headers with expected length are allocated from here... */
-+ char header_cache_name[64];
-+ struct dma_pool *header_cache;
-+
-+ /* packets are allocated from the slab cache... */
-+ char pkt_slab_name[64];
-+ struct kmem_cache *pkt_slab;
-+
-+ /* as packets go on the queued queue, they are counted... */
-+ u32 counter;
-+ u32 sent_counter;
-+ /* pending packets, not sending yet */
-+ u32 num_pending;
-+ /* sending packets, not complete yet */
-+ u32 num_sending;
-+ /* global descq number of entry of last sending packet */
-+ u64 added;
-+
-+ /* dma page table */
-+ struct rb_root dma_pages_root;
-+
-+ struct qib_user_sdma_rb_node *sdma_rb_node;
-+
-+ /* protect everything above... */
-+ struct mutex lock;
-+};
-
- struct qib_user_sdma_queue *
- qib_user_sdma_queue_create(struct device *dev, int unit, int port, int sport);
- void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq);
--
-+void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq,
-+ size_t len, dma_addr_t *dma_addr);
-+void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
-+ int i, u16 offset, u16 len,
-+ u16 first_desc, u16 last_desc,
-+ u16 put_page, u16 dma_mapped,
-+ struct page *page, void *kvaddr,
-+ dma_addr_t dma_addr, u16 dma_length);
-+int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
-+ struct qib_user_sdma_queue *pq,
-+ struct qib_user_sdma_pkt *pkt,
-+ struct page *page, u16 put,
-+ u16 offset, u16 len, void *kvaddr);
-+int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd);
-+int qib_user_sdma_queue_clean(struct qib_pportdata *ppd,
-+ struct qib_user_sdma_queue *pq);
-+void qib_user_sdma_free_pkt_list(struct device *dev,
-+ struct qib_user_sdma_queue *pq,
-+ struct list_head *list);
-+int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
-+ struct qib_user_sdma_queue *pq,
-+ struct list_head *pktlist, int count);
- int qib_user_sdma_writev(struct qib_ctxtdata *pd,
- struct qib_user_sdma_queue *pq,
- const struct iovec *iov,
-@@ -50,3 +146,8 @@ void qib_user_sdma_queue_drain(struct qib_pportdata *ppd,
-
- u32 qib_user_sdma_complete_counter(const struct qib_user_sdma_queue *pq);
- u32 qib_user_sdma_inflight_counter(struct qib_user_sdma_queue *pq);
-+/*
-+ * This function prototype somewhat polutes this header file
-+ * but I don't want to create a new header file just for it.
-+ */
-+void qib_knx_sdma_free_pkt(struct qib_user_sdma_pkt *pkt);
-diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
-index 092b0bb..687c216 100644
---- a/drivers/infiniband/hw/qib/qib_verbs.c
-+++ b/drivers/infiniband/hw/qib/qib_verbs.c
-@@ -621,6 +621,15 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
- if (unlikely(tlen < 24))
- goto drop;
-
-+ if (ppd->mode_flag & QIB_PORT_SNOOP_MODE) {
-+ int nomatch = 0;
-+ if (ppd->filter_callback)
-+ nomatch = ppd->filter_callback(hdr, data,
-+ ppd->filter_value);
-+ if (nomatch == 0 &&
-+ qib_snoop_rcv_queue_packet(ppd, rhdr, data, tlen))
-+ goto drop;
-+ }
- /* Check for a valid destination LID (see ch. 7.11.1). */
- lid = be16_to_cpu(hdr->lrh[1]);
- if (lid < QIB_MULTICAST_LID_BASE) {
-@@ -789,11 +798,17 @@ static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
- #endif
-
- static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss,
-- u32 length, unsigned flush_wc)
-+ u32 length, unsigned flush_wc, struct snoop_packet *packet,
-+ u8 *data_orig)
- {
- u32 extra = 0;
- u32 data = 0;
- u32 last;
-+ u32 *packet_data = NULL;
-+
-+ /* This ensures copying word at a time */
-+ if (packet)
-+ packet_data = (u32 *)data_orig;
-
- while (1) {
- u32 len = ss->sge.length;
-@@ -825,6 +840,10 @@ static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss,
- }
- __raw_writel(data, piobuf);
- piobuf++;
-+ if (packet_data) {
-+ *packet_data = data;
-+ packet_data++;
-+ }
- extra = 0;
- data = 0;
- } else {
-@@ -851,6 +870,10 @@ static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss,
- data = get_upper_bits(v, ushift);
- piobuf++;
- addr++;
-+ if (packet_data) {
-+ *packet_data = data;
-+ packet_data++;
-+ }
- l -= sizeof(u32);
- }
- /*
-@@ -868,6 +891,10 @@ static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss,
- }
- __raw_writel(data, piobuf);
- piobuf++;
-+ if (packet_data) {
-+ *packet_data = data;
-+ packet_data++;
-+ }
- extra = 0;
- data = 0;
- } else {
-@@ -894,12 +921,20 @@ static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss,
- qib_pio_copy(piobuf, ss->sge.vaddr, w - 1);
- piobuf += w - 1;
- last = ((u32 *) ss->sge.vaddr)[w - 1];
-+ if (packet_data) {
-+ memcpy(packet_data, ss->sge.vaddr, len);
-+ packet_data += w;
-+ }
- break;
- } else {
- u32 w = len >> 2;
-
- qib_pio_copy(piobuf, ss->sge.vaddr, w);
- piobuf += w;
-+ if (packet_data) {
-+ memcpy(packet_data, ss->sge.vaddr, len);
-+ packet_data += w;
-+ }
-
- extra = len & (sizeof(u32) - 1);
- if (extra) {
-@@ -1144,12 +1179,13 @@ static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr,
- u32 control;
- u32 ndesc;
- int ret;
-+ struct snoop_packet *packet = NULL;
-
- tx = qp->s_tx;
- if (tx) {
- qp->s_tx = NULL;
- /* resend previously constructed packet */
-- ret = qib_sdma_verbs_send(ppd, tx->ss, tx->dwords, tx);
-+ ret = qib_sdma_verbs_send(ppd, tx->ss, tx->dwords, tx, NULL);
- goto bail;
- }
-
-@@ -1173,6 +1209,19 @@ static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr,
- if (plen + 1 > dd->piosize2kmax_dwords)
- tx->txreq.flags |= QIB_SDMA_TXREQ_F_USELARGEBUF;
-
-+ if (ppd->mode_flag) {
-+ int nomatch = 0;
-+ if (ppd->filter_callback)
-+ nomatch = ppd->filter_callback(hdr, NULL,
-+ ppd->filter_value);
-+ if (nomatch == 0) {
-+ packet = kzalloc(sizeof(*packet)+QIB_GET_PKT_LEN(hdr),
-+ GFP_ATOMIC);
-+ if (packet)
-+ packet->total_len = QIB_GET_PKT_LEN(hdr);
-+ }
-+ }
-+
- if (len) {
- /*
- * Don't try to DMA if it takes more descriptors than
-@@ -1193,7 +1242,9 @@ static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr,
- tx->txreq.addr = dev->pio_hdrs_phys +
- tx->hdr_inx * sizeof(struct qib_pio_header);
- tx->hdr_dwords = hdrwords + 2; /* add PBC length */
-- ret = qib_sdma_verbs_send(ppd, ss, dwords, tx);
-+ if (packet)
-+ memcpy(packet->data, hdr, (hdrwords << 2));
-+ ret = qib_sdma_verbs_send(ppd, ss, dwords, tx, packet);
- goto bail;
- }
-
-@@ -1206,6 +1257,12 @@ static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr,
- phdr->pbc[1] = cpu_to_le32(control);
- memcpy(&phdr->hdr, hdr, hdrwords << 2);
- qib_copy_from_sge((u32 *) &phdr->hdr + hdrwords, ss, len);
-+ if (packet) {
-+ memcpy(packet->data, &phdr->hdr, (hdrwords << 2));
-+ memcpy(packet->data+(hdrwords << 2),
-+ (u8 *)((u32 *) &phdr->hdr + hdrwords),
-+ len);
-+ }
-
- tx->txreq.addr = dma_map_single(&dd->pcidev->dev, phdr,
- tx->hdr_dwords << 2, DMA_TO_DEVICE);
-@@ -1214,7 +1271,7 @@ static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr,
- tx->align_buf = phdr;
- tx->txreq.flags |= QIB_SDMA_TXREQ_F_FREEBUF;
- tx->txreq.sg_count = 1;
-- ret = qib_sdma_verbs_send(ppd, NULL, 0, tx);
-+ ret = qib_sdma_verbs_send(ppd, NULL, 0, tx, NULL);
- goto unaligned;
-
- map_err:
-@@ -1222,9 +1279,24 @@ map_err:
- err_tx:
- qib_put_txreq(tx);
- ret = wait_kmem(dev, qp);
-+ /* If wait_kmem returns 0 then
-+ * (ret==0) will hold true and we don't want
-+ * that as it will add ignored packet in list,
-+ * so free packet here.
-+ */
-+ kfree(packet);
-+ packet = NULL;
- unaligned:
- ibp->n_unaligned++;
- bail:
-+ if (packet) {
-+ if (ret == 0)
-+ qib_snoop_send_queue_packet(ppd, packet);
-+ else {
-+ kfree(packet);
-+ packet = NULL;
-+ }
-+ }
- return ret;
- bail_tx:
- ret = PTR_ERR(tx);
-@@ -1280,6 +1352,8 @@ static int qib_verbs_send_pio(struct qib_qp *qp, struct qib_ib_header *ibhdr,
- unsigned flush_wc;
- u32 control;
- u32 pbufn;
-+ u8 *data_orig = NULL;
-+ struct snoop_packet *packet = NULL;
-
- control = dd->f_setpbc_control(ppd, plen, qp->s_srate,
- be16_to_cpu(ibhdr->lrh[0]) >> 12);
-@@ -1288,6 +1362,20 @@ static int qib_verbs_send_pio(struct qib_qp *qp, struct qib_ib_header *ibhdr,
- if (unlikely(piobuf == NULL))
- return no_bufs_available(qp);
-
-+ if (snoop_enable && ppd->mode_flag) {
-+ int nomatch = 0;
-+ if (ppd->filter_callback)
-+ nomatch = ppd->filter_callback(ibhdr, NULL,
-+ ppd->filter_value);
-+ if (nomatch == 0) {
-+ packet = kzalloc(sizeof(*packet)+QIB_GET_PKT_LEN(ibhdr),
-+ GFP_ATOMIC);
-+ if (packet) {
-+ INIT_LIST_HEAD(&packet->list);
-+ packet->total_len = QIB_GET_PKT_LEN(ibhdr);
-+ }
-+ }
-+ }
- /*
- * Write the pbc.
- * We have to flush after the PBC for correctness on some cpus
-@@ -1297,6 +1385,12 @@ static int qib_verbs_send_pio(struct qib_qp *qp, struct qib_ib_header *ibhdr,
- piobuf_orig = piobuf;
- piobuf += 2;
-
-+ if (packet) {
-+ /* Copy header */
-+ data_orig = packet->data;
-+ memcpy(data_orig, hdr, (hdrwords << 2));
-+ data_orig += (hdrwords << 2);
-+ }
- flush_wc = dd->flags & QIB_PIO_FLUSH_WC;
- if (len == 0) {
- /*
-@@ -1336,10 +1430,19 @@ static int qib_verbs_send_pio(struct qib_qp *qp, struct qib_ib_header *ibhdr,
- qib_flush_wc();
- } else
- qib_pio_copy(piobuf, addr, dwords);
-+ if (packet) {
-+ /* Copy data */
-+ memcpy(data_orig, addr, len);
-+ data_orig += len;
-+ }
- goto done;
- }
-- copy_io(piobuf, ss, len, flush_wc);
-+ copy_io(piobuf, ss, len, flush_wc, packet, data_orig);
- done:
-+ if (packet) {
-+ qib_snoop_send_queue_packet(ppd, packet);
-+ packet = NULL;
-+ }
- if (dd->flags & QIB_USE_SPCL_TRIG) {
- u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
- qib_flush_wc();
-@@ -1623,7 +1726,8 @@ static int qib_query_port(struct ib_device *ibdev, u8 port,
- props->max_vl_num = qib_num_vls(ppd->vls_supported);
- props->init_type_reply = 0;
-
-- props->max_mtu = qib_ibmtu ? qib_ibmtu : IB_MTU_4096;
-+ props->max_mtu = QIB_MODPARAM_GET(ibmtu, dd->unit, ppd->port) ?
-+ QIB_MODPARAM_GET(ibmtu, dd->unit, ppd->port) : IB_MTU_4096;
- switch (ppd->ibmtu) {
- case 4096:
- mtu = IB_MTU_4096;
---
-1.7.1
-