From: Vladimir Sokolovsky <vlad@mellanox.com>
Date: Tue, 3 Mar 2015 09:10:26 +0000 (+0200)
Subject: Added XEON Phi
X-Git-Tag: vofed-3.18~32
X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=5e4a77ee6847ce1f94296947e3051ee2409fe27d;p=~emulex%2Ffor-vlad%2Fcompat-rdma.git

Added XEON Phi

Signed-off-by: Phil Cayton <phil.cayton@intel.com>
Signed-off-by: Vladimir Sokolovsky <vlad@mellanox.com>
---

diff --git a/ofed_scripts/ofed-mic b/ofed_scripts/ofed-mic
index 238b46d..9132e70 100755
--- a/ofed_scripts/ofed-mic
+++ b/ofed_scripts/ofed-mic
@@ -60,6 +60,7 @@ foreach_card()
 			names=(${host}-${card} ${card} ${host}-${card}.${domn})
 		else
 			eval $_failure
+			errors+=1
 			continue
 		fi
 
diff --git a/tech-preview/xeon-phi/0001-ib_core-add-mic-node-and-scif-transport-types.patch b/tech-preview/xeon-phi/0001-ib_core-add-mic-node-and-scif-transport-types.patch
new file mode 100644
index 0000000..c3b2c55
--- /dev/null
+++ b/tech-preview/xeon-phi/0001-ib_core-add-mic-node-and-scif-transport-types.patch
@@ -0,0 +1,82 @@
+From c01faf2a8053f8968b9bac84a4cbd54a9952d472 Mon Sep 17 00:00:00 2001
+From: Phil Cayton <phil.cayton@intel.com>
+Date: Tue, 21 Jan 2014 08:59:29 -0800
+Subject: [PATCH 01/12] ib_core add mic node and scif transport types
+
+The OFED SCIF driver implements a software-emulated RDMA device to allow OFED
+based applications, such as Intel MPI, to run on Intel(R) MIC Architecture
+without the presence of a physical HCA.  OFED SCIF is only targeted for inter-
+node communication within a single platform, where a node is a coprocessor
+or the host processor.  This patch adds new node and transport types to the
+ib_core kernel module to distinguish this new RDMA interface type.
+---
+diff -urN a0/drivers/infiniband/core/sysfs.c a1/drivers/infiniband/core/sysfs.c
+--- a0/drivers/infiniband/core/sysfs.c	2015-01-05 13:35:35.692687746 -0800
++++ a1/drivers/infiniband/core/sysfs.c	2015-01-05 13:46:38.792659814 -0800
+@@ -253,6 +253,8 @@
+ 		return sprintf(buf, "%s\n", "InfiniBand");
+ 	case IB_LINK_LAYER_ETHERNET:
+ 		return sprintf(buf, "%s\n", "Ethernet");
++	case IB_LINK_LAYER_SCIF:
++		return sprintf(buf, "%s\n", "SCIF");
+ 	default:
+ 		return sprintf(buf, "%s\n", "Unknown");
+ 	}
+@@ -623,6 +625,7 @@
+ 	case RDMA_NODE_USNIC_UDP: return sprintf(buf, "%d: usNIC UDP\n", dev->node_type);
+ 	case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
+ 	case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
++	case RDMA_NODE_MIC:	  return sprintf(buf, "%d: MIC\n", dev->node_type);
+ 	default:		  return sprintf(buf, "%d: <unknown>\n", dev->node_type);
+ 	}
+ }
+diff -urN a0/drivers/infiniband/core/verbs.c a1/drivers/infiniband/core/verbs.c
+--- a0/drivers/infiniband/core/verbs.c	2015-01-05 13:35:35.693687746 -0800
++++ a1/drivers/infiniband/core/verbs.c	2015-01-05 13:49:08.470653509 -0800
+@@ -121,6 +121,8 @@
+ 		return RDMA_TRANSPORT_USNIC;
+ 	case RDMA_NODE_USNIC_UDP:
+ 		return RDMA_TRANSPORT_USNIC_UDP;
++	case RDMA_NODE_MIC:
++		return RDMA_TRANSPORT_SCIF;
+ 	default:
+ 		BUG();
+ 		return 0;
+@@ -140,6 +142,8 @@
+ 	case RDMA_TRANSPORT_USNIC:
+ 	case RDMA_TRANSPORT_USNIC_UDP:
+ 		return IB_LINK_LAYER_ETHERNET;
++	case RDMA_TRANSPORT_SCIF:
++		return IB_LINK_LAYER_SCIF;
+ 	default:
+ 		return IB_LINK_LAYER_UNSPECIFIED;
+ 	}
+diff -urN a0/include/rdma/ib_verbs.h a1/include/rdma/ib_verbs.h
+--- a0/include/rdma/ib_verbs.h	2015-01-05 13:45:40.299662278 -0800
++++ a1/include/rdma/ib_verbs.h	2015-01-05 13:50:57.590648913 -0800
+@@ -75,13 +75,15 @@
+ 	RDMA_NODE_RNIC,
+ 	RDMA_NODE_USNIC,
+ 	RDMA_NODE_USNIC_UDP,
++	RDMA_NODE_MIC,
+ };
+ 
+ enum rdma_transport_type {
+ 	RDMA_TRANSPORT_IB,
+ 	RDMA_TRANSPORT_IWARP,
+ 	RDMA_TRANSPORT_USNIC,
+-	RDMA_TRANSPORT_USNIC_UDP
++	RDMA_TRANSPORT_USNIC_UDP,
++	RDMA_TRANSPORT_SCIF,
+ };
+ 
+ __attribute_const__ enum rdma_transport_type
+@@ -91,6 +93,7 @@
+ 	IB_LINK_LAYER_UNSPECIFIED,
+ 	IB_LINK_LAYER_INFINIBAND,
+ 	IB_LINK_LAYER_ETHERNET,
++	IB_LINK_LAYER_SCIF
+ };
+ 
+ enum ib_device_cap_flags {
+Binary files a0/include/rdma/.ib_verbs.h.rej.swp and a1/include/rdma/.ib_verbs.h.rej.swp differ
diff --git a/tech-preview/xeon-phi/0002-rdma_cm-add-mic-node-and-scif-transport-types.patch b/tech-preview/xeon-phi/0002-rdma_cm-add-mic-node-and-scif-transport-types.patch
new file mode 100644
index 0000000..27e90b2
--- /dev/null
+++ b/tech-preview/xeon-phi/0002-rdma_cm-add-mic-node-and-scif-transport-types.patch
@@ -0,0 +1,117 @@
+From faf3b3f931806d4f044068c4e9b2ca4482a9177a Mon Sep 17 00:00:00 2001
+From: Phil Cayton <phil.cayton@intel.com>
+Date: Tue, 3 Jun 2014 09:50:57 -0700
+Subject: [PATCH 02/12] rdma_cm add mic node and scif transport types
+
+The OFED SCIF driver can leverage the iWARP cm calls to establish connections.
+This patch utilizes the new node and transport types in the rdma cm to call
+the underlying driver as needed.
+---
+diff -urN a1/drivers/infiniband/core/cma.c a2/drivers/infiniband/core/cma.c
+--- a1/drivers/infiniband/core/cma.c	2015-01-05 13:46:27.953660271 -0800
++++ a2/drivers/infiniband/core/cma.c	2015-01-05 14:05:11.897612926 -0800
+@@ -747,6 +747,7 @@
+ 			qp_attr->rq_psn = id_priv->seq_num;
+ 		break;
+ 	case RDMA_TRANSPORT_IWARP:
++	case RDMA_TRANSPORT_SCIF:
+ 		if (!id_priv->cm_id.iw) {
+ 			qp_attr->qp_access_flags = 0;
+ 			*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
+@@ -1043,6 +1044,7 @@
+ 				ib_destroy_cm_id(id_priv->cm_id.ib);
+ 			break;
+ 		case RDMA_TRANSPORT_IWARP:
++		case RDMA_TRANSPORT_SCIF:
+ 			if (id_priv->cm_id.iw)
+ 				iw_destroy_cm_id(id_priv->cm_id.iw);
+ 			break;
+@@ -1994,6 +1996,7 @@
+ 		}
+ 		break;
+ 	case RDMA_TRANSPORT_IWARP:
++	case RDMA_TRANSPORT_SCIF:
+ 		ret = cma_resolve_iw_route(id_priv, timeout_ms);
+ 		break;
+ 	default:
+@@ -2184,6 +2187,25 @@
+ 	return ret;
+ }
+ 
++static int cma_resolve_scif(struct rdma_id_private *id_priv)
++{
++	struct cma_work *work;
++
++	work = kzalloc(sizeof *work, GFP_KERNEL);
++	if (!work)
++		return -ENOMEM;
++
++	/* we probably can leave it empty here */
++
++	work->id = id_priv;
++	INIT_WORK(&work->work, cma_work_handler);
++	work->old_state = RDMA_CM_ADDR_QUERY;
++	work->new_state = RDMA_CM_ADDR_RESOLVED;
++	work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
++	queue_work(cma_wq, &work->work);
++	return 0;
++}
++
+ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+ 			 struct sockaddr *dst_addr)
+ {
+@@ -2225,9 +2247,12 @@
+ 	if (cma_any_addr(dst_addr)) {
+ 		ret = cma_resolve_loopback(id_priv);
+ 	} else {
+-		if (dst_addr->sa_family == AF_IB) {
++		if (dst_addr->sa_family == AF_IB)
+ 			ret = cma_resolve_ib_addr(id_priv);
+-		} else {
++		else if ((id_priv->id.device != NULL) &&
++			 (rdma_node_get_transport(id_priv->id.device->node_type) == RDMA_TRANSPORT_SCIF))
++			ret = cma_resolve_scif(id_priv);
++		else {
+ 			ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv),
+ 					      dst_addr, &id->route.addr.dev_addr,
+ 					      timeout_ms, addr_handler, id_priv);
+@@ -2598,6 +2623,7 @@
+ 				goto err;
+ 			break;
+ 		case RDMA_TRANSPORT_IWARP:
++		case RDMA_TRANSPORT_SCIF:
+ 			ret = cma_iw_listen(id_priv, backlog);
+ 			if (ret)
+ 				goto err;
+@@ -2946,6 +2972,7 @@
+ 			ret = cma_connect_ib(id_priv, conn_param);
+ 		break;
+ 	case RDMA_TRANSPORT_IWARP:
++	case RDMA_TRANSPORT_SCIF:
+ 		ret = cma_connect_iw(id_priv, conn_param);
+ 		break;
+ 	default:
+@@ -3073,6 +3100,7 @@
+ 		}
+ 		break;
+ 	case RDMA_TRANSPORT_IWARP:
++	case RDMA_TRANSPORT_SCIF:
+ 		ret = cma_accept_iw(id_priv, conn_param);
+ 		break;
+ 	default:
+@@ -3133,6 +3161,7 @@
+ 					     0, private_data, private_data_len);
+ 		break;
+ 	case RDMA_TRANSPORT_IWARP:
++	case RDMA_TRANSPORT_SCIF:
+ 		ret = iw_cm_reject(id_priv->cm_id.iw,
+ 				   private_data, private_data_len);
+ 		break;
+@@ -3163,6 +3192,7 @@
+ 			ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
+ 		break;
+ 	case RDMA_TRANSPORT_IWARP:
++	case RDMA_TRANSPORT_SCIF:
+ 		ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
+ 		break;
+ 	default:
diff --git a/tech-preview/xeon-phi/0003-add-context-based-udata-support.patch b/tech-preview/xeon-phi/0003-add-context-based-udata-support.patch
new file mode 100644
index 0000000..113a3e9
--- /dev/null
+++ b/tech-preview/xeon-phi/0003-add-context-based-udata-support.patch
@@ -0,0 +1,100 @@
+From 2ddd9c09050d6f74a2ea9e3e21a76510bbdff155 Mon Sep 17 00:00:00 2001
+From: Phil Cayton <phil.cayton@intel.com>
+Date: Thu, 6 Feb 2014 14:23:36 -0800
+Subject: [PATCH 03/12] add context based udata support
+
+Normally the copy_to_user and copy_from_user calls are used to access vendor
+private data when allocating resources from processes.  However, when the
+processes are running on MIC, this communication is proxied to the host kernel
+via SCIF.  This patch allows setup of context-based udata access routines.
+---
+diff -urN a2/drivers/infiniband/core/uverbs_cmd.c a3/drivers/infiniband/core/uverbs_cmd.c
+--- a2/drivers/infiniband/core/uverbs_cmd.c	2015-01-05 13:59:55.217626266 -0800
++++ a3/drivers/infiniband/core/uverbs_cmd.c	2015-01-05 14:30:40.647548530 -0800
+@@ -57,6 +57,21 @@
+ static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
+ static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
+ 
++static int uverbs_copy_from_udata(void *dst, struct ib_udata *udata, size_t len)
++{
++	return copy_from_user(dst, udata->inbuf, len) ? -EFAULT : 0;
++}
++
++static int uverbs_copy_to_udata(struct ib_udata *udata, void *src, size_t len)
++{
++	return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
++}
++
++struct ib_udata_ops uverbs_copy = {
++	.copy_from = uverbs_copy_from_udata,
++	.copy_to = uverbs_copy_to_udata
++};
++
+ /*
+  * The ib_uobject locking scheme is as follows:
+  *
+@@ -330,6 +345,7 @@
+ 		goto err;
+ 	}
+ 
++	ucontext->umem_ops = NULL;
+ 	ucontext->device = ibdev;
+ 	INIT_LIST_HEAD(&ucontext->pd_list);
+ 	INIT_LIST_HEAD(&ucontext->mr_list);
+Binary files a2/drivers/infiniband/core/.uverbs_cmd.c.rej.swp and a3/drivers/infiniband/core/.uverbs_cmd.c.rej.swp differ
+diff -urN a2/drivers/infiniband/core/uverbs.h a3/drivers/infiniband/core/uverbs.h
+--- a2/drivers/infiniband/core/uverbs.h	2015-01-05 13:59:55.216626266 -0800
++++ a3/drivers/infiniband/core/uverbs.h	2015-01-05 14:29:27.559551609 -0800
+@@ -47,8 +47,11 @@
+ #include <rdma/ib_umem.h>
+ #include <rdma/ib_user_verbs.h>
+ 
++extern struct ib_udata_ops uverbs_copy;
++
+ #define INIT_UDATA(udata, ibuf, obuf, ilen, olen)			\
+ 	do {								\
++		(udata)->ops	= &uverbs_copy;				\
+ 		(udata)->inbuf  = (const void __user *) (ibuf);		\
+ 		(udata)->outbuf = (void __user *) (obuf);		\
+ 		(udata)->inlen  = (ilen);				\
+@@ -57,6 +60,7 @@
+ 
+ #define INIT_UDATA_BUF_OR_NULL(udata, ibuf, obuf, ilen, olen)			\
+ 	do {									\
++		(udata)->ops	= &uverbs_copy;					\
+ 		(udata)->inbuf  = (ilen) ? (const void __user *) (ibuf) : NULL;	\
+ 		(udata)->outbuf = (olen) ? (void __user *) (obuf) : NULL;	\
+ 		(udata)->inlen  = (ilen);					\
+diff -urN a2/include/rdma/ib_verbs.h a3/include/rdma/ib_verbs.h
+--- a2/include/rdma/ib_verbs.h	2015-01-05 13:59:55.219626266 -0800
++++ a3/include/rdma/ib_verbs.h	2015-01-05 14:18:48.871578512 -0800
+@@ -1147,7 +1147,14 @@
+ 	int			live;
+ };
+ 
++struct ib_udata;
++struct ib_udata_ops {
++	int	(*copy_from)(void *dest, struct ib_udata *udata, size_t len);
++	int	(*copy_to)(struct ib_udata *udata, void *src, size_t len);
++};
++
+ struct ib_udata {
++	struct ib_udata_ops	*ops;
+ 	const void __user *inbuf;
+ 	void __user *outbuf;
+ 	size_t       inlen;
+@@ -1664,12 +1671,12 @@
+ 
+ static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
+ {
+-	return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0;
++	return udata->ops->copy_from(dest, udata, len);
+ }
+ 
+ static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len)
+ {
+-	return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
++	return udata->ops->copy_to(udata, src, len);
+ }
+ 
+ /**
diff --git a/tech-preview/xeon-phi/0004-add-context-based-umem-support.patch b/tech-preview/xeon-phi/0004-add-context-based-umem-support.patch
new file mode 100644
index 0000000..41970be
--- /dev/null
+++ b/tech-preview/xeon-phi/0004-add-context-based-umem-support.patch
@@ -0,0 +1,353 @@
+From 8b06f1090da0e12c6012d0d13d8b48c69640a6a7 Mon Sep 17 00:00:00 2001
+From: Phil Cayton <phil.cayton@intel.com>
+Date: Thu, 6 Feb 2014 14:08:02 -0800
+Subject: [PATCH 04/12] add context based umem support
+
+The ib_umem_get routine calls get_user_pages to pin pages and create the
+ib_umem structure.  Memory on MIC, however, must be mapped through SCIF for
+access across PCI.  This patch allows setup of context-based ib_umem mapping
+routines.
+
+Also update mthca to support these changes
+---
+diff -urN a3/drivers/infiniband/core/umem.c a4/drivers/infiniband/core/umem.c
+--- a3/drivers/infiniband/core/umem.c	2015-01-05 14:12:52.117593540 -0800
++++ a4/drivers/infiniband/core/umem.c	2015-01-05 14:41:51.927520253 -0800
+@@ -57,6 +57,10 @@
+ 	for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
+ 
+ 		page = sg_page(sg);
++
++		if (!pfn_valid(page_to_pfn(page)))
++			continue;
++
+ 		if (umem->writable && dirty)
+ 			set_page_dirty_lock(page);
+ 		put_page(page);
+@@ -68,14 +72,71 @@
+ }
+ 
+ /**
+- * ib_umem_get - Pin and DMA map userspace memory.
++ * get_remap_pages() - get pages remapped to user virtual space
++ * @mm:		mm struct of target mm
++ * @start:	starting user address
++ * @nr_pages:	number of pages to lookup
++ * @write	flag to verify if vma is writable
++ * @pages:      array that receives pointers to the pages.  Should
++ *              be at least nr_pages long. Or NULL, if caller only
++ *              intends to ensure the pages are valid.
++ * @vmas:       array of pointers to vmas corresponding to each page.
++ *              Or NULL if the caller does not require them.
++ *
++ * Pages may be system ram or io space mmapped to user virtual
++ * space via remap_pfn_range or io_remap_page_range, respectively.
++ *
++ * Returns number of pages found, which may be less than the number
++ * requested.  Returns 0 if nr_pages is 0.
++ *
++ * Must be called with mmap_sem held for read or write.
++ */
++static long get_remap_pages(struct mm_struct *mm, unsigned long start,
++			    unsigned long nr_pages, int write,
++			    struct page **pages, struct vm_area_struct **vmas)
++{
++	struct vm_area_struct *vma;
++	unsigned long pfn;
++	long i = 0;
++	int ret;
++
++	while (nr_pages) {
++		if (!(vma = find_vma(mm, start)))
++			return i ? : -EFAULT;
++		if (write && !(vma->vm_flags & VM_WRITE))
++			return i ? : -EFAULT;
++
++		do {
++			ret = follow_pfn(vma, start, &pfn);
++			if (ret)
++				return i ? : ret;
++
++			if (pages) {
++				pages[i] = pfn_to_page(pfn);
++				if (pfn_valid(pfn))
++					get_page(pages[i]);
++			}
++			if (vmas)
++				vmas[i] = vma;
++
++			start += PAGE_SIZE;
++			nr_pages--;
++			i++;
++		} while (nr_pages && start < vma->vm_end);
++	}
++
++	return i;
++}
++
++/**
++ * ib_get_umem - Pin and DMA map userspace memory.
+  * @context: userspace context to pin memory for
+  * @addr: userspace virtual address to start at
+  * @size: length of region to pin
+  * @access: IB_ACCESS_xxx flags for memory being pinned
+  * @dmasync: flush in-flight DMA when the memory region is written
+  */
+-struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
++struct ib_umem *ib_get_umem(struct ib_ucontext *context, unsigned long addr,
+ 			    size_t size, int access, int dmasync)
+ {
+ 	struct ib_umem *umem;
+@@ -101,7 +162,6 @@
+ 	if (!umem)
+ 		return ERR_PTR(-ENOMEM);
+ 
+-	umem->context   = context;
+ 	umem->length    = size;
+ 	umem->offset    = addr & ~PAGE_MASK;
+ 	umem->page_size = PAGE_SIZE;
+@@ -163,11 +223,18 @@
+ 	sg_list_start = umem->sg_head.sgl;
+ 
+ 	while (npages) {
++
+ 		ret = get_user_pages(current, current->mm, cur_base,
+ 				     min_t(unsigned long, npages,
+ 					   PAGE_SIZE / sizeof (struct page *)),
+ 				     1, !umem->writable, page_list, vma_list);
+ 
++		if (ret == -EFAULT) /* may be a remapped area; try again */
++			ret = get_remap_pages(current->mm, cur_base,
++				     min_t(unsigned long, npages,
++					   PAGE_SIZE / sizeof (struct page *)),
++				     !umem->writable, page_list, vma_list);
++
+ 		if (ret < 0)
+ 			goto out;
+ 
+@@ -219,7 +286,6 @@
+ 
+ 	return ret < 0 ? ERR_PTR(ret) : umem;
+ }
+-EXPORT_SYMBOL(ib_umem_get);
+ 
+ static void ib_umem_account(struct work_struct *work)
+ {
+@@ -237,10 +303,10 @@
+ }
+ 
+ /**
+- * ib_umem_release - release memory pinned with ib_umem_get
++ * ib_release_umem - release memory pinned with ib_umem_get
+  * @umem: umem struct to release
+  */
+-void ib_umem_release(struct ib_umem *umem)
++void ib_release_umem(struct ib_umem *umem)
+ {
+ 	struct ib_ucontext *context = umem->context;
+ 	struct mm_struct *mm;
+@@ -290,9 +356,8 @@
+ out:
+ 	kfree(umem);
+ }
+-EXPORT_SYMBOL(ib_umem_release);
+ 
+-int ib_umem_page_count(struct ib_umem *umem)
++int ib_page_count_umem(struct ib_umem *umem)
+ {
+ 	int shift;
+ 	int i;
+@@ -307,4 +372,40 @@
+ 
+ 	return n;
+ }
++
++struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
++			    size_t size, int access, int dmasync)
++{
++	struct ib_umem_ops *ops = context->umem_ops;
++	struct ib_umem *umem;
++
++	umem = (ops && ops->get) ?
++		ops->get(context, addr, size, access, dmasync) :
++		ib_get_umem(context, addr, size, access, dmasync);
++
++	if (!IS_ERR(umem))
++		umem->context = context;
++
++	return umem;
++}
++EXPORT_SYMBOL(ib_umem_get);
++
++void ib_umem_release(struct ib_umem *umem)
++{
++	struct ib_umem_ops *ops = umem->context->umem_ops;
++
++	if (ops && ops->release)
++		ops->release(umem);
++	else
++		ib_release_umem(umem);
++}
++EXPORT_SYMBOL(ib_umem_release);
++
++int ib_umem_page_count(struct ib_umem *umem)
++{
++	struct ib_umem_ops *ops = umem->context->umem_ops;
++
++	return (ops && ops->page_count) ?
++		ops->page_count(umem) : ib_page_count_umem(umem);
++}
+ EXPORT_SYMBOL(ib_umem_page_count);
+diff -urN a3/drivers/infiniband/hw/mthca/mthca_memfree.c a4/drivers/infiniband/hw/mthca/mthca_memfree.c
+--- a3/drivers/infiniband/hw/mthca/mthca_memfree.c	2015-01-05 14:12:52.112593540 -0800
++++ a4/drivers/infiniband/hw/mthca/mthca_memfree.c	2015-01-05 14:36:00.825535043 -0800
+@@ -39,6 +39,12 @@
+ 
+ #include <asm/page.h>
+ 
++/* Must use the ib_umem routines to support the IB proxy server. */
++#define	MTHCA_IB_UMEM
++#ifdef	MTHCA_IB_UMEM
++#include <rdma/ib_umem.h>
++#endif
++
+ #include "mthca_memfree.h"
+ #include "mthca_dev.h"
+ #include "mthca_cmd.h"
+@@ -56,7 +62,11 @@
+ 	struct mutex mutex;
+ 	struct {
+ 		u64                uvirt;
++#ifdef	MTHCA_IB_UMEM
++		struct ib_umem	   *umem;
++#else
+ 		struct scatterlist mem;
++#endif
+ 		int                refcount;
+ 	}                page[0];
+ };
+@@ -446,7 +456,12 @@
+ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
+ 		      struct mthca_user_db_table *db_tab, int index, u64 uaddr)
+ {
++#ifdef	MTHCA_IB_UMEM
++	struct mthca_ucontext *context;
++	struct ib_umem_chunk *chunk;
++#else
+ 	struct page *pages[1];
++#endif
+ 	int ret = 0;
+ 	int i;
+ 
+@@ -472,6 +487,22 @@
+ 		goto out;
+ 	}
+ 
++#ifdef	MTHCA_IB_UMEM
++	context = container_of(uar, struct mthca_ucontext, uar);
++
++	db_tab->page[i].umem = ib_umem_get(&context->ibucontext,
++					   uaddr & PAGE_MASK, PAGE_SIZE, 0, 0);
++	if (IS_ERR(db_tab->page[i].umem)) {
++		ret = PTR_ERR(db_tab->page[i].umem);
++		goto out;
++	}
++
++	chunk = list_entry(db_tab->page[i].umem->chunk_list.next,
++			   struct ib_umem_chunk, list);
++
++	ret = mthca_MAP_ICM_page(dev, sg_dma_address(&chunk->page_list[0]),
++				 mthca_uarc_virt(dev, uar, i));
++#else
+ 	ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0,
+ 			     pages, NULL);
+ 	if (ret < 0)
+@@ -488,9 +519,14 @@
+ 
+ 	ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem),
+ 				 mthca_uarc_virt(dev, uar, i));
++#endif
+ 	if (ret) {
++#ifdef	MTHCA_IB_UMEM
++		ib_umem_release(db_tab->page[i].umem);
++#else
+ 		pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
+ 		put_page(sg_page(&db_tab->page[i].mem));
++#endif
+ 		goto out;
+ 	}
+ 
+@@ -505,6 +541,9 @@
+ void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
+ 			 struct mthca_user_db_table *db_tab, int index)
+ {
++#ifdef	MTHCA_IB_UMEM
++	int i;
++#endif
+ 	if (!mthca_is_memfree(dev))
+ 		return;
+ 
+@@ -515,7 +554,16 @@
+ 
+ 	mutex_lock(&db_tab->mutex);
+ 
++#ifdef	MTHCA_IB_UMEM
++	i = index / MTHCA_DB_REC_PER_PAGE;
++	if (!--db_tab->page[i].refcount) {
++		mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1);
++		ib_umem_release(db_tab->page[i].umem);
++		db_tab->page[i].uvirt = 0;
++	}
++#else
+ 	--db_tab->page[index / MTHCA_DB_REC_PER_PAGE].refcount;
++#endif
+ 
+ 	mutex_unlock(&db_tab->mutex);
+ }
+@@ -538,7 +586,11 @@
+ 	for (i = 0; i < npages; ++i) {
+ 		db_tab->page[i].refcount = 0;
+ 		db_tab->page[i].uvirt    = 0;
++#ifdef	MTHCA_IB_UMEM
++		db_tab->page[i].umem     = NULL;
++#else
+ 		sg_init_table(&db_tab->page[i].mem, 1);
++#endif
+ 	}
+ 
+ 	return db_tab;
+@@ -555,8 +607,12 @@
+ 	for (i = 0; i < dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE; ++i) {
+ 		if (db_tab->page[i].uvirt) {
+ 			mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1);
++#ifdef	MTHCA_IB_UMEM
++			ib_umem_release(db_tab->page[i].umem);
++#else
+ 			pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
+ 			put_page(sg_page(&db_tab->page[i].mem));
++#endif
+ 		}
+ 	}
+ 
+diff -urN a3/include/rdma/ib_verbs.h a4/include/rdma/ib_verbs.h
+--- a3/include/rdma/ib_verbs.h	2015-01-05 14:18:48.871578512 -0800
++++ a4/include/rdma/ib_verbs.h	2015-01-05 14:36:00.826535043 -0800
+@@ -1122,7 +1122,18 @@
+ 	u8	page_shift;
+ };
+ 
++struct ib_ucontext;
++struct ib_umem_ops {
++	struct ib_umem	     *(*get)(struct ib_ucontext *context,
++				     unsigned long addr, size_t size,
++				     int access, int dmasync);
++	void		      (*release)(struct ib_umem *umem);
++	int		      (*page_count)(struct ib_umem *umem);
++};
++
+ struct ib_ucontext {
++	struct ib_umem_ops     *umem_ops;	/* set to NULL for default ops */
++	void		       *umem_private_data;
+ 	struct ib_device       *device;
+ 	struct list_head	pd_list;
+ 	struct list_head	mr_list;
diff --git a/tech-preview/xeon-phi/0005-allow-mic-ipoib-qp-creation.patch b/tech-preview/xeon-phi/0005-allow-mic-ipoib-qp-creation.patch
new file mode 100644
index 0000000..7c97357
--- /dev/null
+++ b/tech-preview/xeon-phi/0005-allow-mic-ipoib-qp-creation.patch
@@ -0,0 +1,94 @@
+From 8e3cff460efe00954b4c99ea23e42527c234c3f9 Mon Sep 17 00:00:00 2001
+From: Phil Cayton <phil.cayton@intel.com>
+Date: Tue, 4 Feb 2014 12:22:38 -0800
+Subject: [PATCH 05/12] allow mic ipoib qp creation
+
+From the host point of view, each MIC kernel appears as a "user-mode process"
+to allow address translation to access the correct coprocessor mapped across
+PCI.  To enable the IPoIB driver in MIC kernel, some QP creation flags must
+be checked regardless of whether the call originates from kernel or user
+space.  Because these create_flags cannot be set by normal user-mode calls
+through ib_uverbs, moving the check is not an issue.  This patch allows the
+IPoIB driver on MIC to create QPs correctly.
+---
+diff -urN a4/drivers/infiniband/hw/mlx4/qp.c a5/drivers/infiniband/hw/mlx4/qp.c
+--- a4/drivers/infiniband/hw/mlx4/qp.c	2015-01-05 14:35:38.055536002 -0800
++++ a5/drivers/infiniband/hw/mlx4/qp.c	2015-01-08 09:50:29.971123797 -0800
+@@ -692,6 +692,12 @@
+ 
+ 	qp->mlx4_ib_qp_type = qp_type;
+ 
++	if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
++		qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
++
++	if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
++		qp->flags |= MLX4_IB_QP_LSO;
++
+ 	mutex_init(&qp->mutex);
+ 	spin_lock_init(&qp->sq.lock);
+ 	spin_lock_init(&qp->rq.lock);
+@@ -744,13 +750,7 @@
+ 		}
+ 	} else {
+ 		qp->sq_no_prefetch = 0;
+-
+-		if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
+-			qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
+-
+-		if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
+-			qp->flags |= MLX4_IB_QP_LSO;
+-
++/*
+ 		if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
+ 			if (dev->steering_support ==
+ 			    MLX4_STEERING_MODE_DEVICE_MANAGED)
+@@ -758,7 +758,7 @@
+ 			else
+ 				goto err;
+ 		}
+-
++*/
+ 		err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
+ 		if (err)
+ 			goto err;
+@@ -1060,6 +1060,7 @@
+ 
+ 	gfp = (init_attr->create_flags & MLX4_IB_QP_CREATE_USE_GFP_NOIO) ?
+ 		GFP_NOIO : GFP_KERNEL;
++#if 0 /* Removed to allow Xeon Phi's use of ib_ipoib via CCL-Direct (ibp) */
+ 	/*
+ 	 * We only support LSO, vendor flag1, and multicast loopback blocking,
+ 	 * and only for kernel UD QPs.
+@@ -1084,6 +1085,7 @@
+ 	     ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
+ 	      init_attr->qp_type > IB_QPT_GSI)))
+ 		return ERR_PTR(-EINVAL);
++#endif /* if 0 */
+ 
+ 	switch (init_attr->qp_type) {
+ 	case IB_QPT_XRC_TGT:
+@@ -1120,9 +1122,11 @@
+ 	case IB_QPT_SMI:
+ 	case IB_QPT_GSI:
+ 	{
++#if 0 /* Removed to allow Xeon Phi's use of ib_ipoib via CCL-Direct (ibp) */
+ 		/* Userspace is not allowed to create special QPs: */
+ 		if (udata)
+ 			return ERR_PTR(-EINVAL);
++#endif /* if 0 */
+ 
+ 		err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
+ 				       get_sqp_num(to_mdev(pd->device), init_attr),
+diff -urN a4/drivers/infiniband/hw/mlx5/qp.c a5/drivers/infiniband/hw/mlx5/qp.c
+--- a4/drivers/infiniband/hw/mlx5/qp.c	2015-01-05 14:35:38.065536002 -0800
++++ a5/drivers/infiniband/hw/mlx5/qp.c	2015-01-05 14:46:41.322508063 -0800
+@@ -852,6 +852,9 @@
+ 	}
+ 
+ 	if (pd) {
++		if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
++			qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK;
++
+ 		if (pd->uobject) {
+ 			mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count);
+ 			if (ucmd.rq_wqe_shift != qp->rq.wqe_shift ||
diff --git a/tech-preview/xeon-phi/0006-add-scif.h-to-the-include-directory-matching-the-loc.patch b/tech-preview/xeon-phi/0006-add-scif.h-to-the-include-directory-matching-the-loc.patch
new file mode 100644
index 0000000..ca0529c
--- /dev/null
+++ b/tech-preview/xeon-phi/0006-add-scif.h-to-the-include-directory-matching-the-loc.patch
@@ -0,0 +1,1760 @@
+From 129a1e301d8567b8d79abe19fd2d998738951cda Mon Sep 17 00:00:00 2001
+From: Phil Cayton <phil.cayton@intel.com>
+Date: Tue, 4 Feb 2014 12:23:56 -0800
+Subject: [PATCH 06/12] add scif.h to the include directory matching the
+ location that is in the mpss installation
+
+Signed-off-by: Phil Cayton <phil.cayton@intel.com>
+---
+diff -urN a5/include/modules/scif.h a6/include/modules/scif.h
+--- a5/include/modules/scif.h	1969-12-31 16:00:00.000000000 -0800
++++ a6/include/modules/scif.h	2015-01-05 14:59:07.370476637 -0800
+@@ -0,0 +1,1748 @@
++/*
++ * Copyright 2010-2013 Intel Corporation.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License, version 2,
++ * as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Disclaimer: The codes contained in these modules may be specific to
++ * the Intel Software Development Platform codenamed Knights Ferry,
++ * and the Intel product codenamed Knights Corner, and are not backward
++ * compatible with other Intel products. Additionally, Intel will NOT
++ * support the codes or instruction set in future products.
++ *
++ * Intel offers no warranty of any kind regarding the code. This code is
++ * licensed on an "AS IS" basis and Intel is not obligated to provide
++ * any support, assistance, installation, training, or other services
++ * of any kind. Intel is also not obligated to provide any updates,
++ * enhancements or extensions. Intel specifically disclaims any warranty
++ * of merchantability, non-infringement, fitness for any particular
++ * purpose, and any other warranty.
++ *
++ * Further, Intel disclaims all liability of any kind, including but
++ * not limited to liability for infringement of any proprietary rights,
++ * relating to the use of the code, even if Intel is notified of the
++ * possibility of such liability. Except as expressly stated in an Intel
++ * license agreement provided with this code and agreed upon with Intel,
++ * no license, express or implied, by estoppel or otherwise, to any
++ * intellectual property rights is granted herein.
++ */
++
++/*
++ * Revised 15:05 11/24/2010
++ * Derived from SCIF SAS v0.41 with additional corrections
++ */
++
++#ifndef __SCIF_H__
++#define __SCIF_H__
++
++#include <linux/types.h>
++#include <linux/errno.h>
++#include <linux/poll.h>
++#include <linux/pci.h>
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++#define SCIF_ACCEPT_SYNC	1
++#define SCIF_SEND_BLOCK		1
++#define SCIF_RECV_BLOCK		1
++
++/**
++ * The purpose of SCIF_VERSION is to check for compatibility between host and
++ * card SCIF modules. This version should be incremented whenever any changes
++ * are made to the SCIF driver code that is common to both card and the host.
++ * Whenever this version is incremented, SCIF_LIB_VERSION in user mode libscif
++ * scif.h file should be incremented and vice versa. Both the versions should
++ * always match.
++ */
++#define SCIF_VERSION		1
++
++/* Start: Deprecated Temporary definition for compatability */
++#define ACCEPT_SYNC		SCIF_ACCEPT_SYNC
++#define SEND_BLOCK		SCIF_SEND_BLOCK
++#define RECV_BLOCK		SCIF_RECV_BLOCK
++/* End: Deprecated Temporary definition for compatability */
++
++enum {
++	SCIF_PROT_READ  = (1<<0),
++	SCIF_PROT_WRITE = (1<<1)
++};
++
++enum {
++	SCIF_MAP_FIXED = 0x10,
++	SCIF_MAP_KERNEL	= 0x20
++};
++
++enum {
++	SCIF_FENCE_INIT_SELF = (1<<0),
++	SCIF_FENCE_INIT_PEER = (1<<1)
++};
++
++enum {
++	SCIF_FENCE_RAS_SELF = (1<<2),
++	SCIF_FENCE_RAS_PEER = (1<<3)
++};
++
++enum {
++	SCIF_SIGNAL_LOCAL = (1<<4),
++	SCIF_SIGNAL_REMOTE = (1<<5)
++};
++
++#define SCIF_RMA_USECPU     1
++#define SCIF_RMA_USECACHE   (1<<1)
++#define SCIF_RMA_SYNC       (1<<2)
++#define SCIF_RMA_ORDERED    (1<<3)
++//! @cond (Prevent doxygen from including these)
++#define SCIF_POLLIN		POLLIN
++#define SCIF_POLLOUT		POLLOUT
++#define SCIF_POLLERR		POLLERR
++#define SCIF_POLLHUP		POLLHUP
++#define SCIF_POLLNVAL		POLLNVAL
++
++/* SCIF Reserved Ports */
++/* COI */
++#define SCIF_COI_PORT_0		40
++#define SCIF_COI_PORT_1		41
++#define SCIF_COI_PORT_2		42
++#define SCIF_COI_PORT_3		43
++#define SCIF_COI_PORT_4		44
++#define SCIF_COI_PORT_5		45
++#define SCIF_COI_PORT_6		46
++#define SCIF_COI_PORT_7		47
++#define SCIF_COI_PORT_8		48
++#define SCIF_COI_PORT_9		49
++
++/* OFED */
++#define SCIF_OFED_PORT_0	60
++#define SCIF_OFED_PORT_1	61
++#define SCIF_OFED_PORT_2	62
++#define SCIF_OFED_PORT_3	63
++#define SCIF_OFED_PORT_4	64
++#define SCIF_OFED_PORT_5	65
++#define SCIF_OFED_PORT_6	66
++#define SCIF_OFED_PORT_7	67
++#define SCIF_OFED_PORT_8	68
++#define SCIF_OFED_PORT_9	69
++
++/* NETDEV */
++#define SCIF_NETDEV_PORT_0	80
++#define SCIF_NETDEV_PORT_1	81
++#define SCIF_NETDEV_PORT_2	82
++#define SCIF_NETDEV_PORT_3	83
++#define SCIF_NETDEV_PORT_4	84
++#define SCIF_NETDEV_PORT_5	85
++#define SCIF_NETDEV_PORT_6	86
++#define SCIF_NETDEV_PORT_7	87
++#define SCIF_NETDEV_PORT_8	88
++#define SCIF_NETDEV_PORT_9	89
++
++/* RAS */
++#define SCIF_RAS_PORT_0		100
++#define SCIF_RAS_PORT_1		101
++#define SCIF_RAS_PORT_2		102
++#define SCIF_RAS_PORT_3		103
++#define SCIF_RAS_PORT_4		104
++#define SCIF_RAS_PORT_5		105
++#define SCIF_RAS_PORT_6		106
++#define SCIF_RAS_PORT_7		107
++#define SCIF_RAS_PORT_8		108
++#define SCIF_RAS_PORT_9		109
++
++/* Power Management */
++#define SCIF_PM_PORT_0		120
++#define SCIF_PM_PORT_1		121
++#define SCIF_PM_PORT_2		122
++#define SCIF_PM_PORT_3		123
++#define SCIF_PM_PORT_4		124
++#define SCIF_PM_PORT_5		125
++#define SCIF_PM_PORT_6		126
++#define SCIF_PM_PORT_7		127
++#define SCIF_PM_PORT_8		128
++#define SCIF_PM_PORT_9		129
++
++/* Board Tools */
++#define SCIF_BT_PORT_0		130
++#define SCIF_BT_PORT_1		131
++#define SCIF_BT_PORT_2		132
++#define SCIF_BT_PORT_3		133
++#define SCIF_BT_PORT_4		134
++#define SCIF_BT_PORT_5		135
++#define SCIF_BT_PORT_6		136
++#define SCIF_BT_PORT_7		137
++#define SCIF_BT_PORT_8		138
++#define SCIF_BT_PORT_9		139
++
++/* MIC Boot/Configuration support */
++#define MPSSD_DOWNLOAD		160
++#define MIC_NOTIFY		161
++
++#define SCIF_ADMIN_PORT_END	1024
++
++/* MYO */
++#define SCIF_MYO_PORT_0		1025
++#define SCIF_MYO_PORT_1		1026
++#define SCIF_MYO_PORT_2		1027
++#define SCIF_MYO_PORT_3		1028
++#define SCIF_MYO_PORT_4		1029
++#define SCIF_MYO_PORT_5		1030
++#define SCIF_MYO_PORT_6		1031
++#define SCIF_MYO_PORT_7		1032
++#define SCIF_MYO_PORT_8		1033
++#define SCIF_MYO_PORT_9		1034
++
++/* SSG Tools */
++#define SCIF_ST_PORT_0		1044
++#define SCIF_ST_PORT_1		1045
++#define SCIF_ST_PORT_2		1046
++#define SCIF_ST_PORT_3		1047
++#define SCIF_ST_PORT_4		1048
++#define SCIF_ST_PORT_5		1049
++#define SCIF_ST_PORT_6		1050
++#define SCIF_ST_PORT_7		1051
++#define SCIF_ST_PORT_8		1052
++#define SCIF_ST_PORT_9		1053
++
++/* End of SCIF Reserved Ports */
++#define SCIF_PORT_RSVD		1088
++//! @endcond
++
++typedef struct endpt *scif_epd_t;
++
++typedef struct scif_pinned_pages *scif_pinned_pages_t;
++
++struct scif_range {
++	void *cookie;		/* cookie */
++	int nr_pages;		/* Number of Pages */
++	int prot_flags;		/* R/W protection */
++	/* Arrays phys_addr/va below are virtually contiguous */
++	dma_addr_t *phys_addr;	/* Array of physical addresses */
++	void **va;		/* Array of virtual addresses
++				 * and populated only when called
++				 * on the host for a remote SCIF
++				 * connection on MIC.
++				 */
++};
++
++struct scif_pollepd {
++	scif_epd_t epd;   /* endpoint descriptor */
++	short events;     /* requested events */
++	short revents;    /* returned events */
++};
++enum scif_event_type {
++	SCIF_NODE_ADDED = 1<<0,
++	SCIF_NODE_REMOVED = 1<<1
++};
++
++union eventd {
++       uint16_t scif_node_added;
++       uint16_t scif_node_removed;
++};
++
++typedef void (*scif_callback_t)(enum scif_event_type event, union eventd
++data);
++
++struct scif_callback {
++	struct list_head	list_member;
++	scif_callback_t		callback_handler;
++};
++
++#define SCIF_OPEN_FAILED ((scif_epd_t)-1)
++#define SCIF_REGISTER_FAILED ((off_t)-1)
++#define SCIF_MMAP_FAILED ((void *)-1)
++
++struct scif_portID {
++	uint16_t node; /* node on which port resides */
++	uint16_t port; /* Local port number */
++};
++
++/* Start: Deprecated Temporary definition for compatability */
++#define portID scif_portID
++typedef struct portID portID_t;
++/* End: Deprecated Temporary definition for compatability */
++
++/**
++ * scif_open - Create an endpoint
++ *
++ *\return
++ * The scif_open() function creates a new endpoint.
++ *
++ * Upon successful completion, scif_open() returns an endpoint descriptor to
++ * be used in subsequent SCIF functions calls to refer to that endpoint;
++ * otherwise: in user mode SCIF_OPEN_FAILED (that is ((scif_epd_t)-1)) is
++ * returned and errno is set to indicate the error; in kernel mode a NULL
++ * scif_epd_t is returned.
++ *
++ *\par Errors:
++ *- ENOMEM
++ * - Insufficient kernel memory was available.
++ *- ENXIO
++ * - Version mismatch between micscif driver and libscif.
++ */
++scif_epd_t scif_open(void);
++
++/**
++ * scif _bind - Bind an endpoint to a port
++ *	\param epd			endpoint descriptor
++ *	\param pn			port number
++ *
++ * scif_bind() binds endpoint epd to port pn, where pn is a port number on the
++ * local node. If pn is zero, a port number greater than or equal to
++ * SCIF_PORT_RSVD is assigned and returned. Each endpoint may be bound to
++ * exactly one local port. Ports less than 1024 when requested can only be bound
++ * by system (or root) processes or by processes executed by privileged users.
++ *
++ *\return
++ * Upon successful completion, scif_bind() returns the port number to which epd
++ * is bound; otherwise: in user mode -1 is returned and errno is set to
++ * indicate the error; in kernel mode the negative of one of the following
++ * errors is returned.
++ *
++ *\par Errors:
++ *- EBADF
++ * - epd is not a valid endpoint descriptor
++ *- EINVAL
++ * - epd is not a valid endpoint descriptor, or
++ * - The endpoint or the port are already bound.
++ *- EISCONN
++ * - The endpoint is already connected.
++ *- ENOSPC
++ * - No port number available for assignment (when pn==0).
++ *- ENOTTY
++ * - epd is not a valid endpoint descriptor
++ *- EACCES
++ * - The port requested is protected and the user is not the superuser.
++*/
++int scif_bind(scif_epd_t epd, uint16_t pn);
++
++/**
++ * scif_listen - Listen for connections on an endpoint
++ *
++ *	\param epd		endpoint descriptor
++ *	\param backlog		maximum pending connection requests
++ *
++ * scif_listen() marks the endpoint epd as a listening endpoint - that is, as
++ * an endpoint that will be used to accept incoming connection requests. Once
++ * so marked, the endpoint is said to be in the listening state and may not be
++ * used as the endpoint of a connection.
++ *
++ * The endpoint, epd, must have been bound to a port.
++ *
++ * The backlog argument defines the maximum length to which the queue of
++ * pending connections for epd may grow.  If a connection request arrives when
++ * the queue is full, the client may receive an error with an indication that
++ * the connection was refused.
++ *
++ *\return
++ * Upon successful completion, scif_listen() returns 0; otherwise: in user mode
++ * -1 is returned and errno is set to indicate the error; in kernel mode the
++ * negative of one of the following errors is returned.
++ *
++ *\par Errors:
++ *- EBADF
++ * - epd is not a valid endpoint descriptor
++ *- EINVAL
++ * - epd is not a valid endpoint descriptor, or
++ * - The endpoint is not bound to a port
++ *- EISCONN
++ * - The endpoint is already connected or listening
++ *- ENOTTY
++ * - epd is not a valid endpoint descriptor
++*/
++int scif_listen(scif_epd_t epd, int backlog);
++
++/**
++ * scif_connect - Initiate a connection on a port
++ *	\param epd		endpoint descriptor
++ *	\param dst		global id of port to which to connect
++ *
++ * The scif_connect() function requests the connection of endpoint epd to remote
++ * port dst. If the connection is successful, a peer endpoint, bound to dst, is
++ * created on node dst.node. On successful return, the connection is complete.
++ *
++ * If the endpoint epd has not already been bound to a port, scif_connect()
++ * will bind it to an unused local port.
++ *
++ * A connection is terminated when an endpoint of the connection is closed,
++ * either explicitly by scif_close(), or when a process that owns one of the
++ * endpoints of a connection is terminated.
++ *
++ *\return
++ * Upon successful completion, scif_connect() returns the port ID to which the
++ * endpoint, epd, is bound; otherwise: in user mode -1 is returned and errno is
++ * set to indicate the error; in kernel mode the negative of one of the
++ * following errors is returned.
++ *
++ *\par Errors:
++ *- EBADF
++ * - epd is not a valid endpoint descriptor
++ *- ECONNREFUSED
++ * - The destination was not listening for connections or refused the
++ *   connection request.
++ *- EINTR
++ * - Interrupted function
++ *- EINVAL
++ * - epd is not a valid endpoint descriptor, or
++ * - dst.port is not a valid port ID
++ *- EISCONN
++ * - The endpoint is already connected
++ *- ENOBUFS
++ * - No buffer space is available
++ *- ENODEV
++ * - The destination node does not exist, or
++ * - The node is lost.
++ *- ENOSPC
++ * - No port number available for assignment (when pn==0).
++ *- ENOTTY
++ * - epd is not a valid endpoint descriptor
++ *- EOPNOTSUPP
++ * - The endpoint is listening and cannot be connected
++*/
++int scif_connect(scif_epd_t epd, struct scif_portID *dst);
++
++/**
++ * scif_accept - Accept a connection on an endpoint
++ *	\param epd		endpoint descriptor
++ *	\param peer		global id of port to which connected
++ *	\param newepd		new connected endpoint descriptor
++ *	\param flags		flags
++ *
++ * The scif_accept() call extracts the first connection request on the queue of
++ * pending connections for the port on which epd is listening. scif_accept()
++ * creates a new endpoint, bound to the same port as epd, and allocates a new
++ * SCIF endpoint descriptor, returned in newepd, for the endpoint.  The new
++ * endpoint is connected to the endpoint through which the connection was
++ * requested. epd is unaffected by this call, and remains in the listening
++ * state.
++ *
++ * On successful return, peer holds the global port identifier (node id and
++ * local port number) of the port which requested the connection.
++ *
++ * If the peer endpoint which requested the connection is closed, the endpoint
++ * returned by scif_accept() is closed.
++ *
++ * The number of connections that can (subsequently) be accepted on epd is only
++ * limited by system resources (memory).
++ *
++ * The flags argument is formed by OR'ing together zero or more of the
++ * following values:
++ *- SCIF_ACCEPT_SYNC: block until a connection request is presented. If
++ *		      SCIF_ACCEPT_SYNC is not in flags, and no pending
++ *		      connections are present on the queue, scif_accept()fails
++ *		      with an EAGAIN error
++ *
++ * On Linux in user mode, the select() and poll() functions can be used to
++ * determine when there is a connection request. On Microsoft Windows* and on
++ * Linux in kernel mode, the scif_poll() function may be used for this purpose.
++ * A readable event will be delivered when a connection is requested.
++ *
++ *\return
++ * Upon successful completion, scif_accept() returns 0; otherwise: in user mode
++ * -1 is returned and errno is set to indicate the error; in kernel mode the
++ *  negative of one of the following errors is returned.
++ *
++ *\par Errors:
++ *- EAGAIN
++ * - SCIF_ACCEPT_SYNC is not set and no connections are present to be accepted, or
++ * - SCIF_ACCEPT_SYNC is not set and remote node failed to complete its
++ *   connection request
++ *- EBADF
++ * - epd is not a valid endpoint descriptor
++ *- EINTR
++ * - Interrupted function
++ *- EINVAL
++ * - epd is not a valid endpoint descriptor, or
++ * - epd is not a listening endpoint
++ * - flags is invalid
++ * - peer is NULL
++ * - newepd is NULL
++ *- ENOBUFS
++ * - No buffer space is available
++ *- ENODEV
++ * - The requesting node is lost.
++ *- ENOMEM
++ * - Not enough space
++ *- ENOTTY
++ * - epd is not a valid endpoint descriptor
++ *- ENOENT
++ * - Secondary part of epd registeration failed.
++*/
++int scif_accept(scif_epd_t epd, struct scif_portID *peer, scif_epd_t
++*newepd, int flags);
++
++/**
++ * scif_close - Close an endpoint
++ *	\param epd	endpoint descriptor
++ *
++ * scif_close() closes an endpoint and performs necessary teardown of
++ * facilities associated with that endpoint.
++ *
++ * If epd is a listening endpoint then it will no longer accept connection
++ * requests on the port to which it is bound. Any pending connection requests
++ * are rejected.
++ *
++ * If epd is a connected endpoint, then its peer endpoint is also closed. RMAs
++ * which are in-process through epd or its peer endpoint will complete before
++ * scif_close() returns. Registered windows of the local and peer endpoints are
++ * released as if scif_unregister() was called against each window.
++ *
++ * Closing an endpoint does not affect mappings to remote memory. These remain
++ * until explicitly removed by calling scif_munmap().
++ *
++ * If the peer endpoint's receive queue is not empty at the time that epd is
++ * closed, then the peer endpoint can be passed as the endpoint parameter to
++ * scif_recv() until the receive queue is empty.
++ *
++ * If epd is bound to a port, then the port is returned to the pool of
++ * available ports.
++ *
++ * epd is freed and may no longer be accessed.
++ *
++ *\return
++ * Upon successful completion, scif_close() returns 0; otherwise: in user mode
++ * -1 is returned and errno is set to indicate the error; in kernel mode the
++ * negative of one of the following errors is returned.
++ *
++ *\par Errors:
++ *- EBADF
++ * - epd is not a valid endpoint descriptor
++ *- EINVAL
++ * - epd is not a valid endpoint descriptor
++ */
++int scif_close(scif_epd_t epd);
++
++/**
++ * scif_send - Send a message
++ *	\param epd		endpoint descriptor
++ *	\param msg		message buffer address
++ *	\param len		message length
++ *	\param flags		blocking mode flags
++ *
++ * scif_send() sends data to the peer of endpoint epd. Up to len bytes of data
++ * are copied from memory starting at address msg. On successful execution the
++ * return value of scif_send() is the number of bytes that were sent, and is
++ * zero if no bytes were sent because len was zero. scif_send() may be called
++ * only when the endpoint is in a connected state.
++ *
++ * If a scif_send() call is non-blocking, then it sends only those bytes which
++ * can be sent without waiting, up to a maximum of len bytes.
++ *
++ * If a scif_send() call is blocking, then it normally returns after sending
++ * all len bytes. If a blocking call is interrupted or the connection is
++ * forcibly closed, the call is considered successful if some bytes were sent
++ * or len is zero, otherwise the call is considered unsuccessful.
++ *
++ * On Linux in user mode, the select() and poll() functions can be used to
++ * determine when the send queue is not full. On Microsoft Windows* and on
++ * Linux in kernel mode, the scif_poll() function may be used for this purpose.
++ *
++ * It is recommended that scif_send()/scif_recv() only be used for short
++ * control-type message communication between SCIF endpoints. The SCIF RMA
++ * APIs are expected to provide better performance for transfer sizes of
++ * 1024 bytes or longer.
++ *
++ * The flags argument is formed by ORing together zero or more of the following
++ * values:
++ *- SCIF_SEND_BLOCK: block until the entire message is sent.
++ *
++ *\return
++ * Upon successful completion, scif_send() returns the number of bytes sent;
++ * otherwise: in user mode -1 is returned and errno is set to indicate the
++ * error; in kernel mode the negative of one of the following errors is
++ * returned.
++ *
++ *\par Errors:
++ *- EBADF
++ * - epd is not a valid endpoint descriptor
++ *- ECONNRESET
++ * - A connection was forcibly closed by a peer.
++ *- EFAULT
++ * - An invalid address was specified for a parameter.
++ *- EINTR
++ * - epd was closed by scif_close()
++ *- EINVAL
++ * - epd is not a valid endpoint descriptor, or
++ * - flags is invalid
++ * - len is negative
++ *- ENODEV
++ * - The remote node is lost.
++ *- ENOMEM
++ * - Not enough space
++ *- ENOTCONN
++ * - The endpoint is not connected
++ *- ENOTTY
++ * - epd is not a valid endpoint descriptor
++ */
++int scif_send(scif_epd_t epd, void *msg, int len, int flags);
++
++/**
++ * scif_recv - Receive a message
++ *	\param epd		endpoint descriptor
++ *	\param msg		message buffer address
++ *	\param len		message buffer length
++ *	\param flags		blocking mode flags
++ *
++ * scif_recv() receives data from the peer of endpoint epd. Up to len bytes of
++ * data are copied to memory starting at address msg. On successful execution
++ * the return value of scif_recv() is the number of bytes that were received,
++ * and is zero if no bytes were received because len was zero. scif_recv() may
++ * be called only when the endpoint is in a connected state.
++ *
++ * If a scif_recv() call is non-blocking, then it receives only those bytes
++ * which can be received without waiting, up to a maximum of len bytes.
++ *
++ * If a scif_recv() call is blocking, then it normally returns after receiving
++ * all len bytes. If a blocking call is interrupted or the connection is
++ * forcibly closed, the call is considered successful if some bytes were
++ * received or len is zero, otherwise the call is considered unsuccessful;
++ * subsequent calls to scif_recv() will successfully receive all data sent
++ * through peer endpoint interruption or the connection was forcibly closed.
++ *
++ * On Linux in user mode, the select() and poll() functions can be used to
++ * determine when data is available to be received. On Microsoft Windows* and
++ * on Linux in kernel mode, the scif_poll() function may be used for this
++ * purpose.
++ *
++ * It is recommended that scif_send()/scif_recv() only be used for short
++ * control-type message communication between SCIF endpoints. The SCIF RMA
++ * APIs are expected to provide better performance for transfer sizes of
++ * 1024 bytes or longer.
++ *
++ * The flags argument is formed by ORing together zero or more of the following
++ * values:
++ *- SCIF_RECV_BLOCK: block until the entire message is received.
++ *
++ *\return
++ * Upon successful completion, scif_recv() returns the number of bytes
++ * received; otherwise: in user mode -1 is returned and errno is set to
++ * indicate the error; in kernel mode the negative of one of the following
++ * errors is returned.
++ *
++ *\par Errors:
++ *- EAGAIN
++ * - The destination node is returning from a low power state.
++ *- EBADF
++ * - epd is not a valid endpoint descriptor .
++ *- ECONNRESET
++ * - A connection was forcibly closed by a peer.
++ *- EFAULT
++ * - An invalid address was specified for a parameter.
++ *- EINVAL
++ * - epd is not a valid endpoint descriptor, or
++ * - flags  is invalid, or
++ * - len is negative.
++ *- ENODEV
++ * - The remote node is lost.
++ *- ENOMEM
++ * - Not enough space.
++ *- ENOTCONN
++ * - The endpoint is not connected.
++ *- ENOTTY
++ * - epd is not a valid endpoint descriptor
++ */
++int scif_recv(scif_epd_t epd, void *msg, int len, int flags);
++
++/**
++ * scif_register - Mark a memory region for remote access.
++ *	\param epd		endpoint descriptor
++ *	\param addr		starting virtual address
++ *	\param len		length of range
++ *	\param offset		offset of window
++ *	\param prot_flags	read/write protection flags
++ *	\param map_flags	mapping flags
++ *
++ * The scif_register() function opens a window, a range of whole pages of the
++ * registered address space of the endpoint epd, starting at offset po and
++ * continuing for len bytes. The value of po, further described below, is a
++ * function of the parameters offset and len, and the value of map_flags. Each
++ * page of the window represents the physical memory page which backs the
++ * corresponding page of the range of virtual address pages starting at addr
++ * and continuing for len bytes. addr and len are constrained to be multiples
++ * of the page size. addr is interpreted as a user space address. A successful
++ * scif_register() call returns po as the return value.
++ *
++ * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset
++ * exactly, and offset is constrained to be a multiple of the page size. The
++ * mapping established by scif_register() will not replace any existing
++ * registration; an error is returned if any page within the range [offset,
++ * offset+len-1] intersects an existing window.
++ * Note: When SCIF_MAP_FIXED is set the current implementation limits
++ * offset to the range [0..2^62-1] and returns EADDRINUSE if the offset
++ * requested with SCIF_MAP_FIXED is in the range [2^62..2^63-1].
++ *
++ * When SCIF_MAP_FIXED is not set, the implementation uses offset in an
++ * implementation-defined manner to arrive at po. The po value so chosen will
++ * be an area of the registered address space that the implementation deems
++ * suitable for a mapping of len bytes. An offset value of 0 is interpreted as
++ * granting the implementation complete freedom in selecting po, subject to
++ * constraints described below. A non-zero value of offset is taken to be a
++ * suggestion of an offset near which the mapping should be placed. When the
++ * implementation selects a value for po, it does not replace any extant
++ * window. In all cases, po will be a multiple of the page size.
++ *
++ * The physical pages which are so represented by a window are available for
++ * access in calls to scif_mmap(), scif_readfrom(), scif_writeto(),
++ * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the
++ * physical pages represented by the window will not be reused by the memory
++ * subsystem for any other purpose. Note that the same physical page may be
++ * represented by multiple windows.
++ *
++ * Subsequent operations which change the memory pages to which virtual
++ * addresses are mapped (such as mmap(), munmap(), scif_mmap() and
++ * scif_munmap()) have no effect on existing windows.
++ *
++ * On Linux, if the process will fork(), it is recommended that the registered
++ * virtual address range be marked with MADV_DONTFORK. Doing so will prevent
++ * problems due to copy-on-write semantics.
++ *
++ * The prot_flags argument is formed by OR'ing together one or more of the
++ * following values:
++ *- SCIF_PROT_READ: allow read operations from the window
++ *- SCIF_PROT_WRITE: allow write operations to the window
++ *
++ * The map_flags argument is formed by OR'ing together zero or more of
++ * the following values:
++ *- SCIF_MAP_FIXED: interpret offset exactly
++ *
++ *\return
++ * Upon successful completion, scif_register() returns the offset at which the
++ * mapping was placed (po); otherwise: in user mode SCIF_REGISTER_FAILED (that
++ * is (off_t *)-1) is returned and errno is set to indicate the error; in
++ * kernel mode the negative of one of the following errors is returned.
++ *
++ *\par Errors:
++ *- EADDRINUSE
++ * - SCIF_MAP_FIXED is set in map_flags, and pages in the range [offset,
++ *   offset+len-1] are already registered
++ *- EAGAIN
++ * - The mapping could not be performed due to lack of resources
++ *- EBADF
++ * - epd is not a valid endpoint descriptor
++ *- ECONNRESET
++ * - A connection was forcibly closed by a peer.
++ *- EFAULT
++ * - Addresses in the range [addr , addr + len - 1] are invalid
++ *- EINVAL
++ * - epd is not a valid endpoint descriptor, or
++ * - map_flags is invalid, or
++ * - prot_flags is invalid, or
++ * - SCIF_MAP_FIXED is set in flags, and offset is not a multiple of
++ *   the page size, or
++ * - addr is not a multiple of the page size, or
++ * - len is not a multiple of the page size, or is 0, or
++ * - offset is negative
++ *- ENODEV
++ * - The remote node is lost.
++ *- ENOMEM
++ * - Not enough space
++ *- ENOTCONN
++ * - The endpoint is not connected
++ *- ENOTTY
++ * - epd is not a valid endpoint descriptor
++ */
++off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
++int prot_flags, int map_flags);
++
++/**
++ * scif_unregister - Mark a memory region for remote access.
++ *	\param epd		endpoint descriptor
++ *	\param offset		start of range to unregister
++ *	\param len		length of range to unregister
++ *
++ * The scif_unregister() function closes those previously registered windows
++ * which are entirely within the range [offset,offset+len-1]. It is an error to
++ * specify a range which intersects only a subrange of a window.
++ *
++ * On a successful return, pages within the window may no longer be specified
++ * in calls to scif_mmap(), scif_readfrom(), scif_writeto(), scif_vreadfrom(),
++ * scif_vwriteto(), scif_get_pages, and scif_fence_signal(). The window, however,
++ * continues to exist until all previous references against it are removed. A
++ * window is referenced if there is a mapping to it created by scif_mmap(), or if
++ * scif_get_pages() was called against the window (and the pages have not been
++ * returned via scif_put_pages()). A window is also referenced while an RMA, in
++ * which some range of the window is a source or destination, is in progress.
++ * Finally a window is referenced while some offset in that window was specified
++ * to scif_fence_signal(), and the RMAs marked by that call to
++ * scif_fence_signal() have not completed. While a window is in this state, its
++ * registered address space pages are not available for use in a new registered
++ * window.
++ *
++ * When all such references to the window have been removed, its references to
++ * all the physical pages which it represents are removed. Similarly, the
++ * registered address space pages of the window become available for
++ * registration in a new window.
++ *
++ *\return
++ * Upon successful completion, scif_unregister() returns 0; otherwise: in user
++ * mode -1 is returned and errno is set to indicate the error; in kernel mode
++ * the negative of one of the following errors is returned. In the event of an
++ * error, no windows are unregistered.
++ *
++ *\par Errors:
++ *- EBADF
++ * - epd is not a valid endpoint descriptor
++ *- ECONNRESET
++ * - A connection was forcibly closed by a peer.
++ *- EINVAL
++ * - epd  is not a valid endpoint descriptor, or
++ * - The range [offset,offset+len-1] intersects a subrange of a window, or
++ * - offset is negative
++ *- ENODEV
++ * -The remote node is lost.
++ *- ENOTCONN
++ * - The endpoint is not connected
++ *- ENOTTY
++ * - epd is not a valid endpoint descriptor
++ *- ENXIO
++ * - Addresses in the range [offset,offset+len-1] are invalid for the
++ *   registered address space of epd.
++ */
++int scif_unregister(scif_epd_t epd, off_t offset, size_t len);
++
++
++/**
++ * scif_readfrom - Copy from a remote address space
++ *	\param epd		endpoint descriptor
++ *	\param loffset		offset in local registered address space to
++ *				which to copy
++ *	\param len		length of range to copy
++ *	\param roffset		offset in remote registered address space
++ *				from which to copy
++ *	\param rma_flags	transfer mode flags
++ *
++ * scif_readfrom() copies len bytes from the remote registered address space of
++ * the peer of endpoint epd, starting at the offset roffset to the local
++ * registered address space of epd, starting at the offset loffset.
++ *
++ * Each of the specified ranges [loffset,loffset+len-1] and [roffset,roffset+
++ * len-1] must be within some registered window or windows of the local and
++ * remote nodes respectively. A range  may intersect multiple registered
++ * windows, but only if those windows are contiguous in the registered address
++ * space.
++ *
++ * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
++ * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
++ * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the
++ * transfer is complete. Otherwise, the transfer may be performed asynchron-
++ * ously. The order in which any two aynchronous RMA operations complete
++ * is non-deterministic. The synchronization functions, scif_fence_mark()/
++ * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
++ * the completion of asynchronous RMA operations.
++ *
++ * The DMA transfer of individual bytes is not guaranteed to complete in
++ * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
++ * cacheline or partial cacheline of the source range will become visible on
++ * the destination node after all other transferred data in the source
++ * range has become visible on the destination node.
++ *
++ * The optimal DMA performance will likely be realized if both
++ * loffset and roffset are cacheline aligned (are a multiple of 64). Lower
++ * performance will likely be realized if loffset  and roffset are not
++ * cacheline aligned but are separated by some multiple of 64. The lowest level
++ * of performance is likely if loffset and roffset are not separated by a
++ * multiple of 64.
++ *
++ * The rma_flags argument is formed by ORing together zero or more of the
++ * following values:
++ *- SCIF_RMA_USECPU: perform the transfer using the CPU, otherwise use the DMA
++ *                   engine.
++ *- SCIF_RMA_SYNC: perform the transfer synchronously, returning after the
++ *                 transfer has completed. Passing this flag might result in
++ *                 the API busy waiting and consuming CPU cycles while the DMA
++ *                 transfer is in progress.
++ *- SCIF_RMA_ORDERED: ensure that the last cacheline or partial cacheline of
++ *                    the source range becomes visible on the destination node
++ *                    after all other transferred data in the source range has
++ *                    become visible on the destination
++ *
++ *\return
++ * Upon successful completion, scif_readfrom() returns 0; otherwise: in user
++ * mode -1 is returned and errno is set to indicate the error; in kernel mode
++ * the negative of one of the following errors is returned.
++ *
++ *\par Errors
++ *- EACCESS
++ * - Attempt to write to a read-only range or read from a write-only range
++ *- EBADF
++ * - epd is not a valid endpoint descriptor
++ *- ECONNRESET
++ * - A connection was forcibly closed by a peer.
++ *- EINVAL
++ * - epd is not a valid endpoint descriptor, or
++ * - rma_flags is invalid
++ *- ENODEV
++ * -The remote node is lost.
++ *- ENOTCONN
++ * - The endpoint is not connected
++ *- ENOTTY
++ * - epd is not a valid endpoint descriptor
++ *- ENXIO
++ * - The range [loffset,loffset+len-1] is invalid for the registered address
++ *   space of epd, or,
++ * - The range [roffset,roffset+len-1] is invalid for the registered address
++ *   space of the peer of epd, or
++ * - loffset or roffset is negative
++*/
++int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t
++roffset, int rma_flags);
++
++/**
++ * scif_writeto - Copy to a remote address space
++ *	\param epd		endpoint descriptor
++ *	\param loffset		offset in local registered address space
++ *				from which to copy
++ *	\param len		length of range to copy
++ *	\param roffset		offset in remote registered address space to
++ *				which to copy
++ *	\param rma_flags	transfer mode flags
++ *
++ * scif_writeto() copies len bytes from the local registered address space of
++ * epd, starting at the offset loffset to the remote registered address space
++ * of the peer of endpoint epd, starting at the offset roffset.
++ *
++ * Each of the specified ranges [loffset,loffset+len-1] and [roffset,roffset+
++ * len-1] must be within some registered window or windows of the local and
++ * remote nodes respectively. A range may intersect multiple registered
++ * windows, but only if those windows are contiguous in the registered address
++ * space.
++ *
++ * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
++ * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
++ * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the
++ * transfer is complete. Otherwise, the transfer may be performed asynchron-
++ * ously. The order in which any two aynchronous RMA operations complete
++ * is non-deterministic. The synchronization functions, scif_fence_mark()/
++ * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
++ * the completion of asynchronous RMA operations.
++ *
++ * The DMA transfer of individual bytes is not guaranteed to complete in
++ * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
++ * cacheline or partial cacheline of the source range will become visible on
++ * the destination node after all other transferred data in the source
++ * range has become visible on the destination node.
++ *
++ * The optimal DMA performance will likely be realized if both
++ * loffset and roffset are cacheline aligned (are a multiple of 64). Lower
++ * performance will likely be realized if loffset and roffset are not cacheline
++ * aligned but are separated by some multiple of 64. The lowest level of
++ * performance is likely if loffset and roffset are not separated by a multiple
++ * of 64.
++ *
++ * The rma_flags argument is formed by ORing together zero or more of the
++ * following values:
++ *- SCIF_RMA_USECPU: perform the transfer using the CPU, otherwise use the DMA
++ *                   engine.
++ *- SCIF_RMA_SYNC: perform the transfer synchronously, returning after the
++ *                 transfer has completed. Passing this flag might result in
++ *                 the API busy waiting and consuming CPU cycles while the DMA
++ *                 transfer is in progress.
++ *- SCIF_RMA_ORDERED: ensure that the last cacheline or partial cacheline of
++ *                    the source range becomes visible on the destination node
++ *                    after all other transferred data in the source range has
++ *                    become visible on the destination
++ *
++ *\return
++ * Upon successful completion, scif_readfrom() returns 0; otherwise: in user
++ * mode -1 is returned and errno is set to indicate the error; in kernel mode
++ * the negative of one of the following errors is returned.
++ *
++ *\par Errors:
++ *- EACCESS
++ * - Attempt to write to a read-only range or read from a write-only range
++ *- EBADF
++ * - epd is not a valid endpoint descriptor
++ *- ECONNRESET
++ * - A connection was forcibly closed by a peer.
++ *- EINVAL
++ * - epd is not a valid endpoint descriptor, or
++ * - rma_flags is invalid
++ *- ENODEV
++ * - The remote node is lost.
++ *- ENOTCONN
++ * - The endpoint is not connected
++ *- ENOTTY
++ * - epd is not a valid endpoint descriptor
++ *- ENXIO
++ * - The range [loffset,loffset+len-1] is invalid for the registered address
++ *   space of epd, or,
++ * - The range [roffset , roffset + len -1] is invalid for the registered
++ *   address space of the peer of epd, or
++ * - loffset or roffset is negative
++ */
++int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t
++roffset, int rma_flags);
++
++/**
++ * scif_vreadfrom - Copy from a remote address space
++ *	\param epd		endpoint descriptor
++ *	\param addr		address to which to copy
++ *	\param len		length of range to copy
++ *	\param roffset		offset in remote registered address space
++ *				from which to copy
++ *	\param rma_flags	transfer mode flags
++ *
++ * scif_vreadfrom() copies len bytes from the remote registered address
++ * space of the peer of endpoint epd, starting at the offset roffset, to local
++ * memory, starting at addr. addr is interpreted as a user space address.
++ *
++ * The specified range [roffset,roffset+len-1] must be within some registered
++ * window or windows of the remote nodes respectively. The range may intersect
++ * multiple registered windows, but only if those windows are contiguous in the
++ * registered address space.
++ *
++ * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
++ * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
++ * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the
++ * transfer is complete. Otherwise, the transfer may be performed asynchron-
++ * ously. The order in which any two aynchronous RMA operations complete
++ * is non-deterministic. The synchronization functions, scif_fence_mark()/
++ * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
++ * the completion of asynchronous RMA operations.
++ *
++ * The DMA transfer of individual bytes is not guaranteed to complete in
++ * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
++ * cacheline or partial cacheline of the source range will become visible on
++ * the destination node after all other transferred data in the source
++ * range has become visible on the destination node.
++ *
++ * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back
++ * the specified local memory range may be remain in a pinned state even after
++ * the specified transfer completes. This may reduce overhead if some or all of
++ * the same virtual address range is referenced in a subsequent call of
++ * scif_vreadfrom() or scif_vwriteto().
++ *
++ * The optimal DMA performance will likely be realized if both
++ * loffset and roffset are cacheline aligned (are a multiple of 64). Lower
++ * performance will likely be realized if loffset  and roffset are not
++ * cacheline aligned but are separated by some multiple of 64. The lowest level
++ * of performance is likely if loffset and roffset are not separated by a
++ * multiple of 64.
++ *
++ * The rma_flags argument is formed by ORing together zero or more of the
++ * following values:
++ *- SCIF_RMA_USECPU: perform the transfer using the CPU, otherwise use the DMA
++ *                   engine.
++ *- SCIF_RMA_USECACHE: enable registration caching
++ *- SCIF_RMA_SYNC: perform the transfer synchronously, returning after the
++ *                 transfer has completed. Passing this flag might result in
++ *                 the API busy waiting and consuming CPU cycles while the DMA
++ *                 transfer is in progress.
++ *- SCIF_RMA_ORDERED: ensure that the last cacheline or partial cacheline of
++ *                    the source range becomes visible on the destination node
++ *                    after all other transferred data in the source range has
++ *                    become visible on the destination
++ *
++ *\return
++ * Upon successful completion, scif_vreadfrom() returns 0; otherwise: in user
++ * mode -1 is returned and errno is set to indicate the error; in kernel mode
++ * the negative of one of the following errors is returned.
++ *
++ *\par Errors:
++ *- EACCESS
++ * - Attempt to write to a read-only range or read from a write-only range
++ *- EBADF
++ * - epd is not a valid endpoint descriptor
++ *- ECONNRESET
++ * - A connection was forcibly closed by a peer.
++ *- EFAULT
++ * - Addresses in the range [addr,addr+len-1] are invalid
++ *- EINVAL
++ * - epd is not a valid endpoint descriptor, or
++ * - rma_flags is invalid
++ *- ENODEV
++ * - The remote node is lost.
++ *- ENOTCONN
++ * - The endpoint is not connected
++ *- ENOTTY
++ * - epd is not a valid endpoint descriptor
++ *- ENXIO
++ * - Addresses in the range [roffset,roffset+len-1] are invalid for the
++ *   registered address space of epd.
++ */
++int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t offset,
++int rma_flags);
++
++/**
++ * scif_vwriteto - Copy to a remote address space
++ *	\param epd		endpoint descriptor
++ *	\param addr		address from which to copy
++ *	\param len		length of range to copy
++ *	\param roffset		offset in remote registered address space to
++ *				which to copy
++ *	\param rma_flags	transfer mode flags
++ *
++ * scif_vwriteto() copies len bytes from the local memory, starting at addr, to
++ * the remote registered address space of the peer of endpoint epd, starting at
++ * the offset roffset. addr is interpreted as a user space address.
++ *
++ * The specified range [roffset,roffset+len-1] must be within some registered
++ * window or windows of the remote nodes respectively. The range may intersect
++ * multiple registered windows, but only if those windows are contiguous in the
++ * registered address space.
++ *
++ * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
++ * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
++ * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the
++ * transfer is complete. Otherwise, the transfer may be performed asynchron-
++ * ously. The order in which any two aynchronous RMA operations complete
++ * is non-deterministic. The synchronization functions, scif_fence_mark()/
++ * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
++ * the completion of asynchronous RMA operations.
++ *
++ * The DMA transfer of individual bytes is not guaranteed to complete in
++ * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
++ * cacheline or partial cacheline of the source range will become visible on
++ * the destination node after all other transferred data in the source
++ * range has become visible on the destination node.
++ *
++ * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back
++ * the specified local memory range may be remain in a pinned state even after
++ * the specified transfer completes. This may reduce overhead if some or all of
++ * the same virtual address range is referenced in a subsequent call of
++ * scif_vreadfrom() or scif_vwriteto().
++ *
++ * The optimal DMA performance will likely be realized if both
++ * addr and offset are cacheline aligned (are a multiple of 64). Lower
++ * performance will likely be realized if addr  and offset are not cacheline
++ * aligned but are separated by some multiple of 64. The lowest level of
++ * performance is likely if addr and offset are not separated by a multiple of
++ * 64.
++ *
++ * The rma_flags argument is formed by ORing together zero or more of the
++ * following values:
++ *- SCIF_RMA_USECPU: perform the transfer using the CPU, otherwise use the DMA
++ *                   engine.
++ *- SCIF_RMA_USECACHE: allow registration caching
++ *- SCIF_RMA_SYNC: perform the transfer synchronously, returning after the
++ *                 transfer has completed. Passing this flag might result in
++ *                 the API busy waiting and consuming CPU cycles while the DMA
++ *                 transfer is in progress.
++ *- SCIF_RMA_ORDERED: ensure that the last cacheline or partial cacheline of
++ *                    the source range becomes visible on the destination node
++ *                    after all other transferred data in the source range has
++ *                    become visible on the destination
++ *
++ *\return
++ * Upon successful completion, scif_vwriteto () returns 0; otherwise: in user
++ * mode -1 is returned and errno is set to indicate the error; in kernel mode
++ * the negative of one of the following errors is returned.
++ *
++ *\par Errors:
++ *- EACCESS
++ * - Attempt to write to a read-only range or read from a write-only range
++ *- EBADF
++ * - epd is not a valid endpoint descriptor
++ *- ECONNRESET
++ * - A connection was forcibly closed by a peer.
++ *- EFAULT
++ * - Addresses in the range [addr,addr+len-1] are invalid
++ *- EINVAL
++ * - epd is not a valid endpoint descriptor, or
++ * - rma_flags is invalid
++ *- ENODEV
++ * - The remote node is lost.
++ *- ENOTCONN
++ * - The endpoint is not connected
++ *- ENOTTY
++ * - epd is not a valid endpoint descriptor
++ *- ENXIO
++ * - Addresses in the range [roffset,roffset+len-1] are invalid for the
++ *   registered address space of epd.
++ */
++int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, off_t offset,
++int rma_flags);
++
++/**
++ * scif_fence_mark - Mark previously issued RMAs
++ * 	\param epd		endpoint descriptor
++ * 	\param flags		control flags
++ * 	\param mark		marked handle returned as output.
++ *
++ * scif_fence_mark() returns after marking the current set of all uncompleted
++ * RMAs initiated through the endpoint epd or the current set of all
++ * uncompleted RMAs initiated through the peer of endpoint epd. The RMAs are
++ * marked with a value returned at mark. The application may subsequently call
++ * scif_fence_wait(), passing the value returned at mark, to await completion
++ * of all RMAs so marked.
++ *
++ * The flags argument has exactly one of the following values:
++ *- SCIF_FENCE_INIT_SELF: RMA operations initiated through endpoint
++ *  epd are marked
++ *- SCIF_FENCE_INIT_PEER: RMA operations initiated through the peer
++ *  of endpoint epd are marked
++ *
++ * \return
++ * Upon successful completion, scif_fence_mark() returns 0; otherwise: in user
++ * mode -1 is returned and errno is set to indicate the error; in kernel mode
++ * the negative of one of the following errors is returned.
++ *
++ *\par Errors:
++ *- EBADF
++ * - epd is not a valid endpoint descriptor
++ *- ECONNRESET
++ * - A connection was forcibly closed by a peer.
++ *- EINVAL
++ * - flags is invalid, or
++ * - epd is not a valid endpoint descriptor, or
++ *- ENODEV
++ * - The remote node is lost.
++ *- ENOTCONN
++ * - The endpoint is not connected
++ *- ENOMEM
++ * - Insufficient kernel memory was available.
++ *- ENOTTY
++ * - epd is not a valid endpoint descriptor
++ */
++int scif_fence_mark(scif_epd_t epd, int flags, int *mark);
++
++/**
++ * scif_fence_wait - Wait for completion of marked RMAs
++ *
++ * 	\param epd		endpoint descriptor
++ * 	\param mark		mark request
++ *
++ * scif_fence_wait() returns after all RMAs marked with mark have completed.
++ * The value passed in mark must have been obtained in a previous call to
++ * scif_fence_mark().
++ *
++ *\return
++ * Upon successful completion, scif_fence_wait() returns 0; otherwise: in user
++ * mode -1 is returned and errno is set to indicate the error; in kernel mode
++ * the negative of one of the following errors is returned.
++ *
++ *\par Errors:
++ *- EBADF
++ * - epd is not a valid endpoint descriptor
++ *- ECONNRESET
++ * - A connection was forcibly closed by a peer.
++ *- EINVAL
++ * - epd is not a valid endpoint descriptor, or
++ *- ENODEV
++ * - The remote node is lost.
++ *- ENOTCONN
++ * - The endpoint is not connected
++ *- ENOMEM
++ * - Insufficient kernel memory was available.
++ *- ENOTTY
++ * - epd is not a valid endpoint descriptor
++ */
++int scif_fence_wait(scif_epd_t epd, int mark);
++
++/**
++ * scif_fence_signal - Request a signal on completion of RMAs
++ * 	\param loff		local offset
++ * 	\param lval		local value to write to loffset
++ * 	\param roff		remote offset
++ * 	\param rval		remote value to write to roffset
++ * 	\param flags		flags
++ *
++ * scif_fence_signal() returns after marking the current set of all uncompleted
++ * RMAs initiated through the endpoint epd or marking the current set of all
++ * uncompleted RMAs initiated through the peer of endpoint epd.
++ *
++ * If flags includes SCIF_SIGNAL_LOCAL, then on completion of the RMAs in the
++ * marked set, lval is written to memory at the address corresponding to offset
++ * loff in the local registered address space of epd. loff must be within a
++ * registered window. If flags includes SCIF_SIGNAL_REMOTE, then on completion
++ * of the RMAs in the marked set, rval is written to memory at the  * address
++ * corresponding to offset roff in the remote registered address space of epd.
++ * roff must be within a remote registered window of the peer of epd. Note
++ * that any specified offset must be DWORD (4 byte / 32 bit) aligned.
++ *
++ * The flags argument is formed by OR'ing together the following:
++ *- Exactly one of the following values:
++ * - SCIF_FENCE_INIT_SELF: RMA operations initiated through endpoint
++ *   epd are marked
++ * - SCIF_FENCE_INIT_PEER: RMA operations initiated through the peer
++ *   of endpoint epd are marked
++ *- One or more of the following values:
++ * - SCIF_SIGNAL_LOCAL: On completion of the marked set of RMAs, write lval to
++ *   memory at the address corresponding to offset loff in the local registered
++ *   address space of epd.
++ * - SCIF_SIGNAL_REMOTE: On completion of the marked set of RMAs, write lval to
++ *   memory at the address corresponding to offset roff in the remote registered
++ *   address space of epd.
++ *
++ *\return
++ * Upon successful completion, scif_fence_signal() returns 0; otherwise: in
++ * user mode -1 is returned and errno is set to indicate the error; in kernel
++ * mode the negative of one of the following errors is returned.
++ *\par Errors:
++ *- EBADF
++ * - epd is not a valid endpoint descriptor
++ *- ECONNRESET
++ * - A connection was forcibly closed by a peer.
++ *- EINVAL
++ * - epd is not a valid endpoint descriptor, or
++ * - flags is invalid, or
++ * - loff or roff are not DWORD aligned
++ *- ENODEV
++ * - The remote node is lost.
++ *- ENOTCONN
++ * - The endpoint is not connected
++ *- ENOTTY
++ * - epd is not a valid endpoint descriptor
++ *- ENXIO
++ * - loff is invalid for the registered address of epd, or
++ * - roff is invalid for the registered address space, of the peer of epd
++ */
++int scif_fence_signal(scif_epd_t epd, off_t loff, uint64_t lval, off_t roff,
++uint64_t rval, int flags);
++
++/**
++ * scif_get_nodeIDs - Return information about online nodes
++ * 	\param nodes 		array in which to return online node IDs
++ * 	\param len	 	number of entries in the nodes array
++ * 	\param self 		address to place the node ID of the local node
++ *
++ * scif_get_nodeIDs() fills in the nodes array with up to len node IDs of the
++ * nodes in the SCIF network. If there is not enough space in nodes, as
++ * indicated by the len parameter, only len node IDs are returned in nodes. The
++ * return value of scif_get_nodeID() is the total number of nodes currently in
++ * the SCIF network. By checking the return value against the len parameter, the user may
++ * determine if enough space for nodes was allocated.
++ *
++ * The node ID of the local node is returned at self.
++ *
++ *\return
++ * Upon successful completion, scif_get_nodeIDs() returns the actual number of
++ * online nodes in the SCIF network including 'self'; otherwise: in user mode
++ * -1 is returned and errno is set to indicate the error; in kernel mode no
++ * errors are returned.
++ *
++ *\par Errors:
++ *- EFAULT
++ * - Bad address
++ */
++int scif_get_nodeIDs(uint16_t *nodes, int len, uint16_t *self);
++
++
++/**
++ * scif_pin_pages - Pin a set of pages
++ * 	\param addr		Virtual address of range to pin
++ * 	\param len		Length of range to pin
++ * 	\param prot_flags 	Page protection flags
++ * 	\param map_flags	Page classification flags
++ * 	\param pinned_pages	Opaque handle of pinned pages
++ *
++ * scif_pin_pages() pins (locks in physical memory) the physical pages which
++ * back the range of virtual address pages starting at addr and continuing for
++ * len bytes. addr and len are constrained to be multiples of the page size. A
++ * successful scif_register() call returns an opaque pointer value at
++ * pinned_pages which may be used in subsequent calls to
++ * scif_register_pinned_pages().
++ *
++ * The pages will remain pinned as long as there is a reference against the
++ * scif_pinned_pages_t value returned by scif_pin_pages() and until
++ * scif_unpin_pages() is called, passing the scif_pinned_pages_t value. A
++ * reference is added to a scif_pinned_pages_t value each time a window is
++ * created by calling scif_register_pinned_pages() and passing the
++ * scif_pinned_pages_t value. A reference is removed from a scif_pinned_pages_t value
++ * each time such a window is deleted.
++ *
++ * Subsequent operations which change the memory pages to which virtual
++ * addresses are mapped (such as mmap(), munmap(), scif_mmap() and
++ * scif_munmap()) have no effect on the scif_pinned_pages_t value or windows
++ * created against it.
++ *
++ * On Linux, if the process will fork(), it is recommended that the registered
++ * virtual address range be marked with MADV_DONTFORK. Doing so will prevent
++ * problems due to copy-on-write semantics.
++ *
++ * The prot_flags argument is formed by OR'ing together one or more of the
++ * following values:
++ *- SCIF_PROT_READ: allow read operations against the pages
++ *- SCIF_PROT_WRITE: allow write operations against the pages
++ * The map_flags argument is formed by OR'ing together zero or more of the
++ * following values:
++ *- SCIF_MAP_KERNEL: interpret addr as a kernel space address. By default, addr
++ *  is interpreted as a user space address.
++ *
++ *\return
++ * Upon successful completion, scif_register() returns 0; otherwise the
++ * negative of one of the following errors is returned.
++ *\par Errors:
++ *- EFAULT
++ * - Addresses in the range [addr,addr+len-1]  are invalid
++ *- EINVAL
++ * - prot_flags is invalid,
++ * - map_flags is invalid, or
++ * - offset is negative
++ *- ENOMEM
++ * - Not enough space
++ */
++int
++scif_pin_pages(
++	void *addr,
++	size_t len,
++	int prot_flags,
++	int map_flags,
++	scif_pinned_pages_t *pinned_pages);
++
++/**
++ * scif_unpin_pages - Unpin a set of pages
++ * 	\param pinned_pages	Opaque handle of pages to be unpinned
++ *
++ * scif_unpin_pages() prevents scif_register_pinned_pages()from registering new
++ * windows against pinned_pages. The physical pages represented by pinned_pages
++ * will remain pinned until all windows previously registered against
++ * pinned_pages are deleted (the window is scif_unregister()'d and all
++ * references to the window are removed (see scif_unregister()).
++ *
++ * pinned_pages must have been obtain from a previous call to scif_pin_pages().
++ * After calling scif_unpin_pages(), it is an error to pass pinned_pages to
++ * scif_register_pinned_pages().
++ *
++ *\return:
++ * Upon successful completion, scif_unpin_pages() returns 0; otherwise the
++ * negative of one of the following errors is returned.
++ *
++ *\par Errors:
++ *- EINVAL
++ * - pinned_pages is not valid
++ */
++int
++scif_unpin_pages(
++       scif_pinned_pages_t pinned_pages);
++
++/**
++ * scif_register_pinned_pages - Mark a memory region for remote access.
++ * 	\param epd		Endpoint descriptor
++ * 	\param pinned_pages	Opaque handle of pinned pages
++ * 	\param offset		Registered address space offset
++ * 	\param map_flags	Flags which control where pages are mapped
++ *
++ * The scif_register_pinned_pages() function opens a window, a range of whole
++ * pages of the registered address space of the endpoint epd, starting at
++ * offset po. The value of po, further described below, is a function of the
++ * parameters offset and pinned_pages, and the value of map_flags. Each page of
++ * the window represents a corresponding physical memory page of the range
++ * represented by pinned_pages; the length of the window is the same as the
++ * length of range represented by pinned_pages. A successful scif_register()
++ * call returns po as the return value.
++ *
++ * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset
++ * exactly, and offset is constrained to be a multiple of the page size. The
++ * mapping established by scif_register() will not replace any existing
++ * registration; an error is returned if any page of the new window would
++ * intersect an existing window.
++ *
++ * When SCIF_MAP_FIXED is not set, the implementation uses offset in an
++ * implementation-defined manner to arrive at po. The po so chosen will be an
++ * area of the registered address space that the implementation deems suitable
++ * for a mapping of the required size. An offset value of 0 is interpreted as
++ * granting the implementation complete freedom in selecting po, subject to
++ * constraints described below. A non-zero value of offset is taken to be a
++ * suggestion of an offset near which the mapping should be placed. When the
++ * implementation selects a value for po, it does not replace any extant
++ * window. In all cases, po will be a multiple of the page size.
++ *
++ * The physical pages which are so represented by a window are available for
++ * access in calls to scif_get_pages(), scif_readfrom(), scif_writeto(),
++ * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the
++ * physical pages represented by the window will not be reused by the memory
++ * subsytem for any other purpose. Note that the same physical page may be
++ * represented by multiple windows.
++ *
++ * Windows created by scif_register_pinned_pages() are unregistered by
++ * scif_unregister().
++ *
++ * The map_flags argument is formed by OR'ing together zero or more of the
++ * following values:
++ *- SCIF_MAP_FIXED: interpret offset exactly
++ *
++ *\return
++ * Upon successful completion, scif_register_pinned_pages() returns the offset
++ * at which the mapping was placed (po); otherwise the negative of one of the
++ * following errors is returned.
++ *\par Errors:
++ *- EADDRINUSE
++ * - SCIF_MAP_FIXED is set in map_flags and pages in the new
++ *   window would intersect an existing window
++ *- EAGAIN
++ * - The mapping could not be performed due to lack of resources
++ *- ECONNRESET
++ * - A connection was forcibly closed by a peer.
++ *- EINVAL
++ * - epd is not a valid endpoint descriptor, or
++ * - map_flags is invalid, or
++ * - SCIF_MAP_FIXED is set in map_flags, and offset is not a
++ *   multiple of the page size, or
++ * - offset is negative
++ *- ENODEV
++ * - The remote node is lost.
++ *- ENOMEM
++ * - Not enough space
++ *- ENOTCONN
++ * - The endpoint is not connected
++ */
++off_t
++scif_register_pinned_pages(
++       scif_epd_t epd,
++	scif_pinned_pages_t pinned_pages,
++	off_t offset,
++	int map_flags);
++
++/**
++ * scif_get_pages - Add references to remote registered pages
++ * 	\param epd		endpoint descriptor
++ * 	\param offset		registered address space offset
++ * 	\param len		length of range of pages
++ * 	\param pages		returned scif_range structure
++ *
++ * scif_get_pages() returns the addresses of the physical pages represented by
++ * those pages of the registered address space of the peer of epd, starting at
++ * offset and continuing for len bytes. offset and len are constrained to be
++ * multiples of the page size.
++ *
++ * All of the pages in the specified range [offset,offset+len-1] must be within
++ * a single window of the registered address space of the peer of epd.
++ *
++ * The addresses are returned as a virtually contiguous array pointed to by the
++ * phys_addr component of the scif_range structure whose address is returned in
++ * pages. The nr_pages component of scif_range is the length of the array. The
++ * prot_flags component of scif_range holds the protection flag value passed
++ * when the pages were registered.
++ *
++ * Each physical page whose address is returned by scif_get_pages() remains
++ * available and will not be released for reuse until the scif_range structure
++ * is returned in a call to scif_put_pages(). The scif_range structure returned
++ * by scif_get_pages() must be unmodified.
++ *
++ * It is an error to call scif_close() on an endpoint on which a scif_range
++ * structure of that endpoint has not been returned to scif_put_pages().
++ *
++ *\return
++ * Upon successful completion, scif_get_pages() returns 0; otherwise the
++ * negative of one of the following errors is returned.
++ *\par Errors:
++ *- ECONNRESET
++ * - A connection was forcibly closed by a peer.
++ *- EINVAL
++ * - epd is not a valid endpoint descriptor, or
++ * - offset is not a multiple of the page size, or
++ * - offset is negative, or
++ * - len is not a multiple of the page size
++ *- ENODEV
++ * -The remote node is lost.
++ *- ENOTCONN
++ * - The endpoint is not connected
++ *- ENXIO
++ * - Addresses in the range [offset,offset+len-1] are invalid
++ *   for the registered address space of the peer epd.
++ */
++int scif_get_pages(
++       scif_epd_t epd,
++       off_t offset,
++       size_t len,
++       struct scif_range **pages);
++
++/**
++ * scif_put_pages - Remove references from remote registered pages
++ * 	\param pages		pages to be returned
++ *
++ * scif_put_pages() releases a scif_range structure previously obtained by
++ * calling scif_get_pages(). The physical pages represented by pages may
++ * be reused when the window which represented those pages is unregistered.
++ * Therefore, those pages must not be accessed after calling scif_put_pages().
++ *
++ *\return
++ * Upon successful completion, scif_put_pages() returns 0; otherwise the
++ * negative of one of the following errors is returned.
++ *\par Errors:
++ *- EINVAL
++ * - pages does not point to a valid scif_range structure, or
++ * - the scif_range structure pointed to by pages was already returned.
++ *- ENODEV
++ * - The remote node is lost.
++ *- ENOTCONN
++ * - The endpoint is not connected.
++ */
++int scif_put_pages(
++       struct scif_range *pages);
++
++/**
++ * scif_poll - Wait for some event on an endpoint
++ * 	\param epds		Array of endpoint descriptors
++ * 	\param nepds		Length of epds
++ * 	\param timeout		Upper limit on time for which scif_poll() will
++ * 				block
++ *
++ * scif_poll() waits for one of a set of endpoints to become ready to perform
++ * an I/O operation. scif_poll() exposes a subset of the functionality of the
++ * POSIX standard poll() function.
++ *
++ * The epds argument specifies the endpoint descriptors to be examined and the
++ * events of interest for each endpoint descriptor. epds is a pointer to an
++ * array with one member for each open endpoint descriptor of interest.
++ *
++ * The number of items in the epds array is specified in nepds. The epd field
++ * of scif_pollepd is an endpoint descriptor of an open endpoint. The field
++ * events is a bitmask specifying the events which the application is
++ * interested in. The field revents is an output parameter, filled by the
++ * kernel with the events that actually occurred. The bits returned in revents
++ * can include any of those specified in events, or one of the values
++ * SCIF_POLLERR, SCIF_POLLHUP, or SCIF_POLLNVAL. (These three bits are
++ * meaningless in the events field, and will be set in the revents field
++ * whenever the corresponding condition is true.)
++ *
++ * If none of the events requested (and no error) has occurred for any of the
++ * endpoint descriptors, then scif_poll() blocks until one of the events occurs.
++ *
++ * The timeout argument specifies an upper limit on the time for which
++ * scif_poll() will block, in milliseconds. Specifying a negative value in
++ * timeout means an infinite timeout.
++ *
++ * The following bits may be set in events and returned in revents:
++ *- SCIF_POLLIN: Data may be received without blocking. For a connected
++ *  endpoint, this means that scif_recv() may be called without blocking. For a
++ *  listening endpoint, this means that scif_accept() may be called without
++ *  blocking.
++ *- SCIF_POLLOUT: Data may be sent without blocking. For a connected endpoint,
++ *  this means that scif_send() may be called without blocking. This bit value
++ *  has no meaning for a listening endpoint and is ignored if specified.
++ *
++ * The following bits are only returned in revents, and are ignored if set in
++ * events:
++ *- SCIF_POLLERR: An error occurred on the endpoint
++ *- SCIF_POLLHUP: The connection to the peer endpoint was disconnected
++ *- SCIF_POLLNVAL: The specified endpoint descriptor is invalid.
++ *
++ *\return
++ * Upon successful completion, scif_poll()returns a non-negative value. A
++ * positive value indicates the total number of endpoint descriptors that have
++ * been selected (that is, endpoint descriptors for which the revents member is
++ * non-zero. A value of 0 indicates that the call timed out and no endpoint
++ * descriptors have been selected. Otherwise: in user mode -1 is returned and
++ * errno is set to indicate the error; in kernel mode the negative of one of
++ * the following errors is returned.
++ *
++ *\par Errors:
++ *- EFAULT
++ * - The array given as argument was not contained in the calling program's
++ *   address space.
++ *- EINTR
++ * - A signal occurred before any requested event.
++ *- EINVAL
++ * - The nepds argument is greater than {OPEN_MAX}
++ *- ENOMEM
++ * - There was no space to allocate file descriptor tables.
++*/
++int
++scif_poll(
++	struct scif_pollepd *epds,
++	unsigned int nepds,
++	long timeout);
++
++/**
++ * scif_event_register - Register an event handler
++ *	\param handler		Event handler to be registered
++ *
++ * scif_event_register() registers a routine, handler, to be called when some
++ * event occurs. The event parameter to handler indicates the type of event
++ * which has occurred, and the corresponding component of the data parameter to
++ * handler provides additional data about the event.
++ *
++ * The following events are defined:
++ *- SCIF_NODE_ADDED: A node has been added to the SCIF network. The
++ *  scif_node_added component of the data parameter to handler identifies the
++ *  node. This event is informational. There are no requirements on the event
++ *  handler.
++ *- SCIF_NODE_REMOVED: A node is being removed from the SCIF network. The
++ *  scif_node_removed component of the data parameter to handler identifies the
++ *  node. Upon being called, and before returning, the event handler must
++ *  return, using scif_put_pages(), all structures obtained using
++ *  scif_get_pages() against an endpoint connected to the lost node. It is
++ *  recommended and expected that the handler will also scif_close() all
++ *  endpoints connected to the lost node.
++ *
++ *\return
++ * Upon successful completion scif_event_register() returns 0.
++ *
++ *\par Errors:
++ *- ENOMEM
++ * - There was no space to allocate file descriptor tables.
++*/
++
++int
++scif_event_register(
++	scif_callback_t handler);
++
++/**
++ * scif_event_unregister - Unregister event handler
++ *	\param handler		Event handler to be unregistered
++ *
++ * scif_event_unregister() unregisters the handler which was registered
++ * previously by using scif_event_register().
++ *
++ * WARNING: scif_event_unregister must be called before the module
++ * (that registered handles) exits for every handler that is registered.
++ * Failure to do so will result in crash of the scif module.
++ *
++ *\return
++ * Upon successful completion scif_event_unregister() returns 0.
++ *\par Errors:
++ *- EINVAL
++ * -If the event handler was not found/registered.
++*/
++int
++scif_event_unregister(
++	scif_callback_t handler);
++
++/*
++ * Note: The callee can use pci_resource_start(dev, index) and
++ * pci_resource_len(dev, index) to obtain the PCI resource starting
++ * physical address and length for valid non null indexes of the va
++ * array. MMIO bars will not have IORESOURCE_PREFETCH set in the
++ * flags obtained from pci_resource_flags(dev, index). va[index]
++ * will be set to NULL for invalid resources.
++ */
++struct scif_pci_info {
++	/* pci_dev pointer associated with a node */
++	struct pci_dev *pdev;
++	/* Ioremapped virtual address base for every valid PCIe resource */
++	void __iomem *va[PCI_NUM_RESOURCES];
++};
++
++/**
++ * scif_pci_info - Populate the scif_pci_info structure for a node.
++ * \param node		The node to query
++ * \param dev		The scif_pci_info structure to populate.
++ *
++ * scif_pci_info() populates the provided scif_pci_info structure
++ * associated with a node. The requested node ID cannot be the same as
++ * the current node.  This routine will only return success when called from
++ * the host.
++ *
++ *\return
++ * Upon successful completion, scif_pci_info() returns 0; otherwise the
++ * negative of one of the following errors is returned.
++ *
++ *\par Errors:
++ *- EINVAL
++ * - The requested node is not valid.
++ * - Called on MIC instead of the host.
++ *- ENODEV
++ * - No pci_dev association exists for the node.
++ */
++int
++scif_pci_info(
++	uint16_t node,
++	struct scif_pci_info *dev);
++
++
++#ifdef __cplusplus
++} /* extern "C" */
++#endif
++
++#endif /* __SCIF_H__ */
diff --git a/tech-preview/xeon-phi/0007-Add-CCL-Direct-ibp-drivers-to-Infiniband.patch b/tech-preview/xeon-phi/0007-Add-CCL-Direct-ibp-drivers-to-Infiniband.patch
new file mode 100644
index 0000000..86e2eef
--- /dev/null
+++ b/tech-preview/xeon-phi/0007-Add-CCL-Direct-ibp-drivers-to-Infiniband.patch
@@ -0,0 +1,9643 @@
+From a6d3fc7a6f6d3b3b621dfbd71babbff5ae58d1dd Mon Sep 17 00:00:00 2001
+From: Phil Cayton <phil.cayton@intel.com>
+Date: Wed, 28 May 2014 15:50:26 -0700
+Subject: [PATCH 07/13] Add CCL-Direct (ibp) drivers to Infiniband
+
+This includes the base ibp server module as well as
+the server modules for sa and cm
+
+Signed-off-by: Phil Cayton <phil.cayton@intel.com>
+---
+diff -urN a6/drivers/infiniband/ibp/cm/cm_ibp_abi.h a7/drivers/infiniband/ibp/cm/cm_ibp_abi.h
+--- a6/drivers/infiniband/ibp/cm/cm_ibp_abi.h	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/cm/cm_ibp_abi.h	2015-02-23 10:01:30.289769309 -0800
+@@ -0,0 +1,399 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above
++ *	copyright notice, this list of conditions and the following
++ *	disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *	copyright notice, this list of conditions and the following
++ *	disclaimer in the documentation and/or other materials
++ *	provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef CM_IBP_ABI_H
++#define CM_IBP_ABI_H
++
++#include <linux/types.h>
++#include <rdma/ib_verbs.h>
++#include <rdma/ib_cm.h>
++
++/* Increment this value if any changes break compatibility. */
++#define IBP_CM_ABI_VERSION	1
++
++/*
++ * Make sure that all structs defined in this file are laid out to pack
++ * the same way on different architectures to avoid incompatibility.
++ *
++ * Specifically:
++ *  - Do not use pointer types -- pass pointers in a u64 instead.
++ *  - Make sure that any structure larger than 4 bytes is padded
++ *    to a multiple of 8 bytes; otherwise the structure size may
++ *    be different between architectures.
++ */
++
++struct ibp_event_msg {
++	struct ibp_msg_header		header;
++	u64				length;
++	u8				event[0];
++};
++
++
++struct ibp_sa_path_rec {
++	__be64				service_id;
++	u64				dgid_prefix;
++	u64				dgid_id;
++	u64				sgid_prefix;
++	u64				sgid_id;
++	__be16				dlid;
++	__be16				slid;
++	u32				raw_traffic;
++	__be32				flow_label;
++	u8				hop_limit;
++	u8				traffic_class;
++	u32				reversible;
++	u8				numb_path;
++	__be16				pkey;
++	__be16				qos_class;
++	u8				sl;
++	u8				mtu_selector;
++	u8				mtu;
++	u8				rate_selector;
++	u8				rate;
++	u8				packet_life_time_selector;
++	u8				packet_life_time;
++	u8				preference;
++};
++
++struct ibp_create_cm_id_cmd {
++	struct ibp_msg_header		header;
++	u64				device;
++};
++
++struct ibp_create_cm_id_resp {
++	u64				ibp_cm_id;
++	__be64				service_id;
++	__be64				service_mask;
++	__be32				local_id;
++	__be32				remote_id;
++	u32				remote_cm_qpn;
++	u32				filler;
++};
++
++struct ibp_destroy_cm_id_cmd {
++	struct ibp_msg_header		header;
++	u64				ibp_cm_id;
++};
++
++struct ibp_cm_listen_cmd {
++	struct ibp_msg_header		header;
++	u64				ibp_cm_id;
++	__be64				service_id;
++	__be64				service_mask;
++	u64				null_comp_data;
++	struct ib_cm_compare_data	compare_data;
++};
++
++struct ibp_send_cm_req_cmd {
++	struct ibp_msg_header		header;
++	u64				ibp_cm_id;
++	struct ibp_sa_path_rec		primary_path;
++	struct ibp_sa_path_rec		alternate_path;
++	__be64				service_id;
++	u32				qp_num;
++	enum ib_qp_type			qp_type;
++	u32				starting_psn;
++	u8				peer_to_peer;
++	u8				responder_resources;
++	u8				initiator_depth;
++	u8				remote_cm_response_timeout;
++	u8				flow_control;
++	u8				local_cm_response_timeout;
++	u8				retry_count;
++	u8				rnr_retry_count;
++	u8				max_cm_retries;
++	u8				srq;
++	u8				private_data_len;
++	char				private_data[0];
++};
++
++struct ibp_send_cm_rep_cmd {
++	struct ibp_msg_header		header;
++	u64				ibp_cm_id;
++	u32				qp_num;
++	u32				starting_psn;
++	u8				responder_resources;
++	u8				initiator_depth;
++	u8				failover_accepted;
++	u8				flow_control;
++	u8				rnr_retry_count;
++	u8				srq;
++	u8				private_data_len;
++	char				private_data[0];
++};
++
++struct ibp_send_cm_rtu_cmd {
++	struct ibp_msg_header		header;
++	u64				ibp_cm_id;
++	u8				private_data_len;
++	char				private_data[0];
++};
++
++struct ibp_send_cm_dreq_cmd {
++	struct ibp_msg_header		header;
++	u64				ibp_cm_id;
++	u8				private_data_len;
++	char				private_data[0];
++};
++
++struct ibp_send_cm_drep_cmd {
++	struct ibp_msg_header		header;
++	u64				ibp_cm_id;
++	u8				private_data_len;
++	char				private_data[0];
++};
++
++struct ibp_send_cm_rej_cmd {
++	struct ibp_msg_header		header;
++	u64				ibp_cm_id;
++	u64				reason;
++	u8				private_data_len;
++	u8				ari_length;
++	char				data[0];
++};
++
++struct ibp_send_cm_mra_cmd {
++	struct ibp_msg_header		header;
++	u64				ibp_cm_id;
++	u8				service_timeout;
++	u8				private_data_len;
++	char				private_data[0];
++};
++
++struct ibp_send_cm_lap_cmd {
++	struct ibp_msg_header		header;
++	u64				ibp_cm_id;
++	struct ibp_sa_path_rec		alternate_path;
++	u8				private_data_len;
++	char				private_data[0];
++};
++
++struct ibp_send_cm_apr_cmd {
++	struct ibp_msg_header		header;
++	u64				ibp_cm_id;
++	u64				status;
++	u8				private_data_len;
++	u8				info_length;
++	char				data[0];
++};
++
++struct ibp_send_cm_sidr_req_cmd {
++	struct ibp_msg_header		header;
++	u64				ibp_cm_id;
++	struct ibp_sa_path_rec		path;
++	__be64				service_id;
++	int				timeout_ms;
++	u8				max_cm_retries;
++	u8				private_data_len;
++	char				private_data[0];
++};
++
++struct ibp_send_cm_sidr_rep_cmd {
++	struct ibp_msg_header		header;
++	u64				ibp_cm_id;
++	u32				qp_num;
++	u32				qkey;
++	u64				status;
++	u8				info_length;
++	u8				private_data_len;
++	char				data[0];
++};
++
++struct ibp_cm_notify_cmd {
++	struct ibp_msg_header		header;
++	u64				ibp_cm_id;
++	u64				event;
++};
++
++struct ibp_cm_init_qp_attr_cmd {
++	struct ibp_msg_header		header;
++	u64				ibp_cm_id;
++	u64				qp_attr_state;
++};
++
++struct ibp_cm_init_qp_attr_resp {
++	u64				qp_attr_mask;
++	u64				qp_access_flags;
++	u64				qp_state;
++	u64				cur_qp_state;
++	u64				path_mtu;
++	u64				path_mig_state;
++	u32				qkey;
++	u32				rq_psn;
++	u32				sq_psn;
++	u64				dest_qp_num;
++
++	u32				cap_max_send_wr;
++	u32				cap_max_recv_wr;
++	u32				cap_max_send_sge;
++	u32				cap_max_recv_sge;
++	u32				cap_max_inline_data;
++
++	u64				ah_attr_grh_dgid_subnet_prefix;
++	u64				ah_attr_grh_dgid_interface_id;
++	u32				ah_attr_grh_flow_label;
++	u8				ah_attr_grh_sgid_index;
++	u8				ah_attr_grh_hop_limit;
++	u8				ah_attr_grh_traffic_class;
++	u16				ah_attr_dlid;
++	u8				ah_attr_sl;
++	u8				ah_attr_src_path_bits;
++	u8				ah_attr_static_rate;
++	u8				ah_attr_ah_flags;
++	u8				ah_attr_port_num;
++
++	u64				alt_attr_grh_dgid_subnet_prefix;
++	u64				alt_attr_grh_dgid_interface_id;
++	u32				alt_attr_grh_flow_label;
++	u8				alt_attr_grh_sgid_index;
++	u8				alt_attr_grh_hop_limit;
++	u8				alt_attr_grh_traffic_class;
++	u16				alt_attr_dlid;
++	u8				alt_attr_sl;
++	u8				alt_attr_src_path_bits;
++	u8				alt_attr_static_rate;
++	u8				alt_attr_ah_flags;
++	u8				alt_attr_port_num;
++
++	u16				pkey_index;
++	u16				alt_pkey_index;
++	u8				en_sqd_async_notify;
++	u8				sq_draining;
++	u8				max_rd_atomic;
++	u8				max_dest_rd_atomic;
++	u8				min_rnr_timer;
++	u8				port_num;
++	u8				timeout;
++	u8				retry_cnt;
++	u8				rnr_retry;
++	u8				alt_port_num;
++	u8				alt_timeout;
++
++};
++
++struct ibp_cm_req_event_resp {
++	struct ibp_sa_path_rec		primary_path;
++	struct ibp_sa_path_rec		alternate_path;
++	u64				listen_id;
++	__be64				remote_ca_guid;
++	__u32				remote_qkey;
++	__u32				remote_qpn;
++	__u32				qp_type;
++	__u32				starting_psn;
++	__u8				responder_resources;
++	__u8				initiator_depth;
++	__u8				local_cm_response_timeout;
++	__u8				flow_control;
++	__u8				remote_cm_response_timeout;
++	__u8				retry_count;
++	__u8				rnr_retry_count;
++	__u8				srq;
++	__u8				port;
++	__u8				reserved[7];
++};
++
++struct ibp_cm_rep_event_resp {
++	__be64				remote_ca_guid;
++	__u32				remote_qkey;
++	__u32				remote_qpn;
++	__u32				starting_psn;
++	__u8				responder_resources;
++	__u8				initiator_depth;
++	__u8				target_ack_delay;
++	__u8				failover_accepted;
++	__u8				flow_control;
++	__u8				rnr_retry_count;
++	__u8				srq;
++	__u8				reserved[5];
++};
++
++struct ibp_cm_rej_event_resp {
++	__u32				reason;
++};
++
++struct ibp_cm_mra_event_resp {
++	__u8				timeout;
++	__u8				reserved[3];
++};
++
++struct ibp_cm_lap_event_resp {
++	struct ibp_sa_path_rec		path;
++};
++
++struct ibp_cm_rtu_event_resp {
++	__u32				status;
++	__be32				local_id;
++	__be32				remote_id;
++};
++
++struct ibp_cm_apr_event_resp {
++	__u32				status;
++};
++
++struct ibp_cm_sidr_req_event_resp {
++	u64				listen_id;
++	__u16				pkey;
++	__u8				port;
++	__u8				reserved;
++};
++
++struct ibp_cm_sidr_rep_event_resp {
++	__u32				status;
++	__u32				qkey;
++	__u32				qpn;
++};
++
++struct ibp_cm_event {
++	enum ib_event_type		event_type;
++	union {
++		struct ibp_cm_req_event_resp		req_resp;
++		struct ibp_cm_rep_event_resp		rep_resp;
++		struct ibp_cm_rej_event_resp		rej_resp;
++		struct ibp_cm_rtu_event_resp		rtu_resp;
++		struct ibp_cm_mra_event_resp		mra_resp;
++		struct ibp_cm_lap_event_resp		lap_resp;
++		struct ibp_cm_apr_event_resp		apr_resp;
++		struct ibp_cm_sidr_req_event_resp	sidr_req_resp;
++		struct ibp_cm_sidr_rep_event_resp	sidr_rep_resp;
++
++		__u32			send_status;
++	} u;
++
++	u64				event_cm_id;
++	u64				ibp_cm_id;
++	u64				data_length;
++	u64				info_length;
++
++	u8				data[0];
++};
++
++#endif /* CM_IBP_ABI_H */
+diff -urN a6/drivers/infiniband/ibp/cm/cm_server_msg.c a7/drivers/infiniband/ibp/cm/cm_server_msg.c
+--- a6/drivers/infiniband/ibp/cm/cm_server_msg.c	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/cm/cm_server_msg.c	2015-02-23 10:18:09.042820508 -0800
+@@ -0,0 +1,1058 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *     - Redistributions of source code must retain the above
++ *	 copyright notice, this list of conditions and the following
++ *	 disclaimer.
++ *
++ *     - Redistributions in binary form must reproduce the above
++ *	 copyright notice, this list of conditions and the following
++ *	 disclaimer in the documentation and/or other materials
++ *	 provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "server.h"
++
++LIST_HEAD(cm_entry_list);
++
++void ibp_copy_sa_path_rec(struct ibp_sa_path_rec *a, struct ib_sa_path_rec *b)
++{
++	/*Copy ibp_sa_path_rec to ib_sa_path_rec*/
++	b->service_id			= a->service_id;
++	b->dgid.global.subnet_prefix	= a->dgid_prefix;
++	b->dgid.global.interface_id	= a->dgid_id;
++	b->sgid.global.subnet_prefix	= a->sgid_prefix;
++	b->sgid.global.interface_id	= a->sgid_id;
++	b->dlid				= a->dlid;
++	b->slid				= a->slid;
++	b->raw_traffic			= a->raw_traffic;
++	b->flow_label			= a->flow_label;
++	b->hop_limit			= a->hop_limit;
++	b->traffic_class		= a->traffic_class;
++	b->reversible			= a->reversible;
++	b->numb_path			= a->numb_path;
++	b->pkey				= a->pkey;
++	b->qos_class			= a->qos_class;
++	b->sl				= a->sl;
++	b->mtu_selector			= a->mtu_selector;
++	b->mtu				= a->mtu;
++	b->rate_selector		= a->rate_selector;
++	b->rate				= a->rate;
++	b->packet_life_time_selector	= a->packet_life_time_selector;
++	b->packet_life_time		= a->packet_life_time;
++	b->preference			= a->preference;
++}
++
++void ib_copy_sa_path_rec(struct ibp_sa_path_rec *a, struct ib_sa_path_rec *b)
++{
++	/*Copy ib_sa_path_rec to ibp_sa_path_rec*/
++	a->service_id			= b->service_id;
++	a->dgid_prefix			= b->dgid.global.subnet_prefix;
++	a->dgid_id			= b->dgid.global.interface_id;
++	a->sgid_prefix			= b->sgid.global.subnet_prefix;
++	a->sgid_id			= b->sgid.global.interface_id;
++	a->dlid				= b->dlid;
++	a->slid				= b->slid;
++	a->raw_traffic			= b->raw_traffic;
++	a->flow_label			= b->flow_label;
++	a->hop_limit			= b->hop_limit;
++	a->traffic_class		= b->traffic_class;
++	a->reversible			= b->reversible;
++	a->numb_path			= b->numb_path;
++	a->pkey				= b->pkey;
++	a->qos_class			= b->qos_class;
++	a->sl				= b->sl;
++	a->mtu_selector			= b->mtu_selector;
++	a->mtu				= b->mtu;
++	a->rate_selector		= b->rate_selector;
++	a->rate				= b->rate;
++	a->packet_life_time_selector	= b->packet_life_time_selector;
++	a->packet_life_time		= b->packet_life_time;
++	a->preference			= b->preference;
++}
++
++void cleanup_cm_entry_list(void)
++{
++	struct cm_entry				*entry;
++	struct cm_entry				*next;
++
++	down_write(&list_rwsem);
++
++	list_for_each_entry_safe(entry, next, &cm_entry_list, list)
++		kfree(entry);
++
++	up_write(&list_rwsem);
++}
++
++static struct cm_entry *find_cm_entry(struct ib_cm_id *cm_id)
++{
++	struct cm_entry				*entry;
++
++	down_read(&list_rwsem);
++
++	list_for_each_entry(entry, &cm_entry_list, list)
++		if (entry->cm_id == cm_id)
++			goto out;
++
++	print_err("Could not find cm id %p\n", cm_id);
++	entry = NULL;
++
++out:
++	up_read(&list_rwsem);
++
++	return entry;
++}
++
++/* find the entry id for the listen cm id so we can add the new cm id
++ * that is being accepted to the list so it can be found on future events
++ */
++static struct cm_entry *find_cm_entry_and_add(struct ib_cm_id *listen_id,
++					      struct ib_cm_id *cm_id)
++{
++	struct cm_entry				*entry;
++	struct cm_entry				*listen_entry;
++
++	listen_entry = find_cm_entry(listen_id);
++	if (!listen_entry) {
++		print_err("Could not find listen id %p\n", listen_id);
++		return NULL;
++	}
++
++	entry = kzalloc(sizeof(struct cm_entry), GFP_KERNEL);
++	if (!entry) {
++		print_err("kzalloc failed\n");
++		return NULL;
++	}
++
++	entry->client = listen_entry->client;
++	entry->cm_id = cm_id;
++
++	down_write(&list_rwsem);
++	list_add(&entry->list, &cm_entry_list);
++	up_write(&list_rwsem);
++
++	return listen_entry;
++}
++
++static void ibp_event_req_get(struct ibp_cm_req_event_resp *proxy_req,
++			      struct ib_cm_req_event_param *req)
++
++{
++	proxy_req->listen_id		      = (u64) req->listen_id;
++	proxy_req->remote_ca_guid	      = req->remote_ca_guid;
++	proxy_req->remote_qkey		      = req->remote_qkey;
++	proxy_req->remote_qpn		      = req->remote_qpn;
++	proxy_req->qp_type		      = req->qp_type;
++	proxy_req->starting_psn		      = req->starting_psn;
++	proxy_req->responder_resources	      = req->responder_resources;
++	proxy_req->initiator_depth	      = req->initiator_depth;
++	proxy_req->local_cm_response_timeout  = req->local_cm_response_timeout;
++	proxy_req->flow_control		      = req->flow_control;
++	proxy_req->remote_cm_response_timeout = req->remote_cm_response_timeout;
++	proxy_req->retry_count		      = req->retry_count;
++	proxy_req->rnr_retry_count	      = req->rnr_retry_count;
++	proxy_req->srq			      = req->srq;
++	proxy_req->port			      = req->port;
++	ib_copy_sa_path_rec(&proxy_req->primary_path, req->primary_path);
++	if (req->alternate_path)
++		ib_copy_sa_path_rec(&proxy_req->alternate_path,
++				     req->alternate_path);
++}
++
++static void ibp_event_rep_get(struct ibp_cm_rep_event_resp *proxy_rep,
++			      struct ib_cm_rep_event_param *rep)
++{
++	proxy_rep->remote_ca_guid	= rep->remote_ca_guid;
++	proxy_rep->remote_qkey		= rep->remote_qkey;
++	proxy_rep->remote_qpn		= rep->remote_qpn;
++	proxy_rep->starting_psn		= rep->starting_psn;
++	proxy_rep->responder_resources	= rep->responder_resources;
++	proxy_rep->initiator_depth	= rep->initiator_depth;
++	proxy_rep->target_ack_delay	= rep->target_ack_delay;
++	proxy_rep->failover_accepted	= rep->failover_accepted;
++	proxy_rep->flow_control		= rep->flow_control;
++	proxy_rep->rnr_retry_count	= rep->rnr_retry_count;
++	proxy_rep->srq			= rep->srq;
++}
++
++static
++void ibp_event_sidr_rep_get(struct ibp_cm_sidr_rep_event_resp *proxy_resp,
++			    struct ib_cm_sidr_rep_event_param *rep)
++{
++	proxy_resp->status = rep->status;
++	proxy_resp->qkey   = rep->qkey;
++	proxy_resp->qpn    = rep->qpn;
++}
++
++static void ibp_event(struct work_struct *work)
++{
++	struct ibp_event			*event_work;
++	struct ibp_event_msg			*msg;
++	int					msg_len;
++	int					event_len;
++
++	print_trace("in\n");
++
++	event_work = (struct ibp_event *) work;
++
++	event_len = event_work->event.data_length +
++		    event_work->event.info_length +
++		    sizeof(struct ibp_cm_event);
++
++	msg_len = sizeof(struct ibp_event_msg) + event_len;
++
++	msg = kzalloc(msg_len, GFP_KERNEL);
++	if (!msg) {
++		print_err("kzmalloc failed\n");
++		goto err;
++	}
++
++	memcpy(msg->event, &(event_work->event), event_len);
++	msg->length = event_len;
++
++	IBP_INIT_MSG(NULL, msg, msg_len, IBP_EVENT);
++
++	ibp_send(event_work->client->ep, msg, msg_len);
++err:
++	kfree(event_work);
++}
++
++static int ibp_event_handler(struct ib_cm_id *cm_id,
++			     struct ib_cm_event *ib_cm_event)
++{
++	struct ibp_event			*event_work;
++	struct ibp_client			*client;
++	struct cm_entry				*entry;
++	void					*info = NULL;
++	int					info_length = 0;
++	int					data_length = 0;
++
++	print_trace("in\n");
++
++	switch (ib_cm_event->event) {
++	case IB_CM_REQ_RECEIVED:
++		data_length = IB_CM_REQ_PRIVATE_DATA_SIZE;
++		break;
++	case IB_CM_REP_RECEIVED:
++		data_length = IB_CM_REP_PRIVATE_DATA_SIZE;
++		break;
++	case IB_CM_RTU_RECEIVED:
++		data_length = IB_CM_RTU_PRIVATE_DATA_SIZE;
++		break;
++	case IB_CM_DREQ_RECEIVED:
++		data_length = IB_CM_DREQ_PRIVATE_DATA_SIZE;
++		break;
++	case IB_CM_DREP_RECEIVED:
++		data_length = IB_CM_DREP_PRIVATE_DATA_SIZE;
++		break;
++	case IB_CM_MRA_RECEIVED:
++		data_length = IB_CM_MRA_PRIVATE_DATA_SIZE;
++		break;
++	case IB_CM_REJ_RECEIVED:
++		data_length = IB_CM_REJ_PRIVATE_DATA_SIZE;
++		info_length = ib_cm_event->param.rej_rcvd.ari_length;
++		break;
++	case IB_CM_LAP_RECEIVED:
++		data_length = IB_CM_LAP_PRIVATE_DATA_SIZE;
++		break;
++	case IB_CM_APR_RECEIVED:
++		data_length = IB_CM_APR_PRIVATE_DATA_SIZE;
++		info_length = ib_cm_event->param.apr_rcvd.info_len;
++		break;
++	case IB_CM_SIDR_REQ_RECEIVED:
++		data_length = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE;
++		break;
++	case IB_CM_SIDR_REP_RECEIVED:
++		data_length = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
++		info_length = ib_cm_event->param.sidr_rep_rcvd.info_len;
++		break;
++	default:
++		break;
++	}
++	event_work = kzalloc((sizeof(struct ibp_event)) +
++			     data_length + info_length, GFP_KERNEL);
++	if (!event_work) {
++		print_err("kzalloc failed\n");
++		return -ENOMEM;
++	}
++
++	if (ib_cm_event->event == IB_CM_REQ_RECEIVED) {
++		struct ib_cm_req_event_param *param;
++		param = &ib_cm_event->param.req_rcvd;
++		entry = find_cm_entry_and_add(param->listen_id, cm_id);
++	} else if (ib_cm_event->event == IB_CM_SIDR_REQ_RECEIVED) {
++		struct ib_cm_sidr_req_event_param *param;
++		param = &ib_cm_event->param.sidr_req_rcvd;
++		entry = find_cm_entry_and_add(param->listen_id, cm_id);
++	} else
++		entry = find_cm_entry(cm_id);
++
++	if (!entry) {
++		kfree(event_work);
++		return -EINVAL;
++	}
++
++	client = entry->client;
++
++	event_work->client		= client;
++	event_work->event.ibp_cm_id	= (u64) entry->cm_id;
++	event_work->event.event_cm_id	= (u64) cm_id;
++	event_work->event.event_type	= ib_cm_event->event;
++	event_work->event.data_length	= data_length;
++	event_work->event.info_length	= info_length;
++
++	/* parse and copy the proper event */
++	switch (ib_cm_event->event) {
++	case IB_CM_REQ_RECEIVED:
++		print_dbg("IB_CM_REQ_RECEIVED (%d)\n", ib_cm_event->event);
++		ibp_event_req_get(&event_work->event.u.req_resp,
++				  &ib_cm_event->param.req_rcvd);
++		break;
++	case IB_CM_REP_RECEIVED:
++		print_dbg("IB_CM_REP_RECEIVED (%d)\n", ib_cm_event->event);
++		ibp_event_rep_get(&event_work->event.u.rep_resp,
++				  &ib_cm_event->param.rep_rcvd);
++		break;
++	case IB_CM_MRA_RECEIVED:
++		print_dbg("IB_CM_MRA_RECEIVED (%d)\n", ib_cm_event->event);
++		event_work->event.u.mra_resp.timeout =
++			ib_cm_event->param.mra_rcvd.service_timeout;
++		break;
++	case IB_CM_REJ_RECEIVED:
++		print_dbg("IB_CM_REJ_RECEIVED (%d)\n", ib_cm_event->event);
++		event_work->event.u.rej_resp.reason =
++			ib_cm_event->param.rej_rcvd.reason;
++		info = ib_cm_event->param.rej_rcvd.ari;
++		break;
++	case IB_CM_RTU_RECEIVED:
++		print_dbg("IB_CM_RTU_RECEIVED (%d)\n", ib_cm_event->event);
++		event_work->event.u.rtu_resp.status =
++			ib_cm_event->param.send_status;
++		event_work->event.u.rtu_resp.local_id = cm_id->local_id;
++		event_work->event.u.rtu_resp.remote_id = cm_id->remote_id;
++		break;
++	case IB_CM_LAP_RECEIVED:
++		print_dbg("IB_CM_LAP_RECEIVED (%d)\n", ib_cm_event->event);
++		ib_copy_sa_path_rec(&event_work->event.u.lap_resp.path,
++				    ib_cm_event->param.lap_rcvd.alternate_path);
++		break;
++	case IB_CM_APR_RECEIVED:
++		print_dbg("IB_CM_APR_RECEIVED (%d)\n", ib_cm_event->event);
++		event_work->event.u.apr_resp.status =
++			ib_cm_event->param.apr_rcvd.ap_status;
++		info = ib_cm_event->param.apr_rcvd.apr_info;
++		break;
++	case IB_CM_SIDR_REQ_RECEIVED:
++		print_dbg("IB_CM_SIDR_REQ_RECEIVED (%d)\n",
++			    ib_cm_event->event);
++		event_work->event.u.sidr_req_resp.listen_id =
++			(u64) ib_cm_event->param.sidr_req_rcvd.listen_id;
++		event_work->event.u.sidr_req_resp.pkey =
++			ib_cm_event->param.sidr_req_rcvd.pkey;
++		event_work->event.u.sidr_req_resp.port =
++			ib_cm_event->param.sidr_req_rcvd.port;
++		break;
++	case IB_CM_SIDR_REP_RECEIVED:
++		print_dbg("IB_CM_SIDR_REP_RECEIVED (%d)\n",
++			    ib_cm_event->event);
++		ibp_event_sidr_rep_get(&event_work->event.u.sidr_rep_resp,
++				       &ib_cm_event->param.sidr_rep_rcvd);
++		info = ib_cm_event->param.sidr_rep_rcvd.info;
++		break;
++	case IB_CM_TIMEWAIT_EXIT:
++	case IB_CM_REQ_ERROR:
++	case IB_CM_REP_ERROR:
++	case IB_CM_DREQ_ERROR:
++	case IB_CM_LAP_ERROR:
++	case IB_CM_SIDR_REQ_ERROR:
++		print_dbg("IB_CM_..._ERROR (%d)\n", ib_cm_event->event);
++		event_work->event.u.send_status =
++			ib_cm_event->param.send_status;
++		break;
++
++	case IB_CM_USER_ESTABLISHED:
++		print_dbg("IB_CM_USER_ESTABLISHED (%d)\n",
++			    ib_cm_event->event);
++		event_work->event.u.send_status =
++			ib_cm_event->param.send_status;
++		break;
++	case IB_CM_DREQ_RECEIVED:
++		print_dbg("IB_CM_DREQ_RECEIVED (%d)\n", ib_cm_event->event);
++		event_work->event.u.send_status =
++			ib_cm_event->param.send_status;
++		break;
++	case IB_CM_DREP_RECEIVED:
++		print_dbg("IB_CM_DREP_RECEIVED (%d)\n", ib_cm_event->event);
++		event_work->event.u.send_status =
++			ib_cm_event->param.send_status;
++		break;
++	default:
++		print_dbg("event not handled %d\n", ib_cm_event->event);
++		break;
++	}
++
++	if (data_length)
++		memcpy(event_work->event.data, ib_cm_event->private_data,
++		       data_length);
++
++	if (info_length)
++		memcpy(event_work->event.data + data_length, info, info_length);
++
++	INIT_WORK(&event_work->work, ibp_event);
++	queue_work(client->workqueue, &event_work->work);
++
++	return 0;
++}
++
++int ibp_cmd_create_cm_id(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++	struct ibp_response_msg			*msg;
++	struct ibp_create_cm_id_cmd		*cmd;
++	struct ibp_create_cm_id_resp		*resp;
++	struct ib_device			*ib_device;
++	struct ib_cm_id				*cm_id = NULL;
++	struct cm_entry				*entry;
++	size_t					len;
++	int					status = 0;
++	int					ret;
++
++	print_trace("in\n");
++
++	cmd	= (struct ibp_create_cm_id_cmd *) hdr;
++	ib_device = (struct ib_device *) cmd->device;
++	msg	= (struct ibp_response_msg *) client->tx_buf;
++	len	= sizeof(*msg);
++
++	entry = kzalloc(sizeof(struct cm_entry), GFP_KERNEL);
++	if (!entry) {
++		print_err("kzalloc failed\n");
++		status = -ENOMEM;
++		goto send_resp;
++	}
++
++	cm_id = ib_create_cm_id(ib_device,
++				(ib_cm_handler) ibp_event_handler,
++				NULL);
++	if (IS_ERR(cm_id)) {
++		status = PTR_ERR(cm_id);
++		print_err("ib_create_cm_id returned %d\n", status);
++		goto send_resp;
++	}
++
++	len += sizeof(*resp);
++
++	resp = (struct ibp_create_cm_id_resp *) msg->data;
++
++	resp->ibp_cm_id		= (u64) cm_id;
++	resp->service_id	= cm_id->service_id;
++	resp->service_mask	= cm_id->service_mask;
++	resp->local_id		= cm_id->local_id;
++	resp->remote_id		= cm_id->remote_id;
++	resp->remote_cm_qpn	= cm_id->remote_cm_qpn;
++
++send_resp:
++	IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, status);
++
++	ret = ibp_send(client->ep, msg, len);
++	if (ret) {
++		kfree(entry);
++		print_err("ibp_send returned %d\n", ret);
++		return ret;
++	}
++	if (status) {
++		kfree(entry);
++		return status;
++	}
++
++	entry->client = client;
++	entry->cm_id = cm_id;
++
++	down_write(&list_rwsem);
++	list_add(&entry->list, &cm_entry_list);
++	up_write(&list_rwsem);
++
++	return 0;
++}
++
++int ibp_cmd_destroy_cm_id(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++	struct ibp_response_msg			*msg;
++	struct ibp_destroy_cm_id_cmd		*cmd;
++	struct ib_cm_id				*cm_id;
++	struct cm_entry				*entry;
++	size_t					len;
++	int					ret = 0;
++
++	print_trace("in\n");
++
++	cmd	= (struct ibp_destroy_cm_id_cmd *) hdr;
++	cm_id	= (struct ib_cm_id *) cmd->ibp_cm_id;
++	msg	= (struct ibp_response_msg *) client->tx_buf;
++	len	= sizeof(*msg);
++
++	entry = find_cm_entry(cm_id);
++	if (!entry)
++		goto send_resp;
++
++	down_write(&list_rwsem);
++	list_del(&entry->list);
++	up_write(&list_rwsem);
++
++	kfree(entry);
++
++	ib_destroy_cm_id(cm_id);
++
++send_resp:
++	IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++	return ibp_send(client->ep, msg, len);
++}
++
++int ibp_cmd_cm_listen(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++	struct ibp_response_msg			*msg;
++	struct ibp_cm_listen_cmd		*cmd;
++	struct ib_cm_id				*cm_id;
++	struct ib_cm_compare_data		*data = NULL;
++	size_t					len;
++	int					ret;
++
++	print_trace("in\n");
++
++	cmd	= (struct ibp_cm_listen_cmd *) hdr;
++	cm_id	= (struct ib_cm_id *) cmd->ibp_cm_id;
++	msg	= (struct ibp_response_msg *) client->tx_buf;
++	len	= sizeof(*msg);
++
++	if (!cmd->null_comp_data)
++		data = &(cmd->compare_data);
++
++	ret = ib_cm_listen(cm_id, cmd->service_id, cmd->service_mask, data);
++	if (ret)
++		print_err("ib_cm_listen returned %d\n", ret);
++
++	IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++	return ibp_send(client->ep, msg, len);
++}
++
++int ibp_cmd_send_cm_req(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++	struct ibp_response_msg			*msg;
++	struct ibp_send_cm_req_cmd		*cmd;
++	struct ib_cm_id				*cm_id;
++	struct ib_cm_req_param			param = {0};
++	struct ib_sa_path_rec			primary_path;
++	struct ib_sa_path_rec			alternate_path;
++	size_t					len;
++	int					ret;
++
++	print_trace("in\n");
++
++	cmd	= (struct ibp_send_cm_req_cmd *) hdr;
++	cm_id	= (struct ib_cm_id *) cmd->ibp_cm_id;
++	msg	= (struct ibp_response_msg *) client->tx_buf;
++	len	= sizeof(*msg);
++
++	if (cmd->alternate_path.pkey) {
++		param.alternate_path = &alternate_path;
++		ibp_copy_sa_path_rec(&cmd->alternate_path, &alternate_path);
++	}
++
++	param.primary_path = &primary_path;
++	ibp_copy_sa_path_rec(&cmd->primary_path, &primary_path);
++
++	param.service_id		 = cmd->service_id;
++	param.qp_num			 = cmd->qp_num;
++	param.qp_type			 = cmd->qp_type;
++	param.starting_psn		 = cmd->starting_psn;
++	param.peer_to_peer		 = cmd->peer_to_peer;
++	param.responder_resources	 = cmd->responder_resources;
++	param.initiator_depth		 = cmd->initiator_depth;
++	param.remote_cm_response_timeout = cmd->remote_cm_response_timeout;
++	param.flow_control		 = cmd->flow_control;
++	param.local_cm_response_timeout  = cmd->local_cm_response_timeout;
++	param.retry_count		 = cmd->retry_count;
++	param.rnr_retry_count		 = cmd->rnr_retry_count;
++	param.max_cm_retries		 = cmd->max_cm_retries;
++	param.srq			 = cmd->srq;
++	param.private_data_len		 = cmd->private_data_len;
++
++	if (cmd->private_data_len)
++		param.private_data	 = cmd->private_data;
++
++	ret = ib_send_cm_req(cm_id, &param);
++
++	if (ret)
++		print_err("send_cm_req returned %d\n", ret);
++
++	IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++	return ibp_send(client->ep, msg, len);
++}
++
++int ibp_cmd_send_cm_rep(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++	struct ibp_response_msg			*msg;
++	struct ibp_send_cm_rep_cmd		*cmd;
++	struct ib_cm_id				*cm_id;
++	struct ib_cm_rep_param			param = {0};
++	size_t					len;
++	int					ret;
++
++	print_trace("in\n");
++
++	cmd	= (struct ibp_send_cm_rep_cmd *) hdr;
++	cm_id	= (struct ib_cm_id *) cmd->ibp_cm_id;
++	msg	= (struct ibp_response_msg *) client->tx_buf;
++	len	= sizeof(*msg);
++
++	param.qp_num		  = cmd->qp_num;
++	param.starting_psn	  = cmd->starting_psn;
++	param.responder_resources = cmd->responder_resources;
++	param.initiator_depth	  = cmd->initiator_depth;
++	param.failover_accepted   = cmd->failover_accepted;
++	param.rnr_retry_count	  = cmd->rnr_retry_count;
++	param.srq		  = cmd->srq;
++	param.private_data_len	  = cmd->private_data_len;
++
++	if (cmd->private_data_len)
++		param.private_data = cmd->private_data;
++
++	ret = ib_send_cm_rep(cm_id, &param);
++	if (ret)
++		print_err("send_cm_rep returned %d\n", ret);
++
++	IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++	return ibp_send(client->ep, msg, len);
++}
++
++int ibp_cmd_send_cm_rtu(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++	struct ibp_send_cm_rtu_cmd		*cmd;
++	struct ibp_response_msg			*msg;
++	struct ib_cm_id				*cm_id;
++	void					*private_data = NULL;
++	size_t					len;
++	int					ret;
++
++	print_trace("in\n");
++
++	cmd	= (struct ibp_send_cm_rtu_cmd *) hdr;
++	cm_id	= (struct ib_cm_id *) cmd->ibp_cm_id;
++	msg	= (struct ibp_response_msg *) client->tx_buf;
++	len	= sizeof(*msg);
++
++	if (cmd->private_data_len)
++		private_data = cmd->private_data;
++
++	ret = ib_send_cm_rtu(cm_id, private_data, cmd->private_data_len);
++	if (ret)
++		print_err("send_cm_rtu returned %d\n", ret);
++
++	IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++	return ibp_send(client->ep, msg, len);
++}
++
++int ibp_cmd_send_cm_dreq(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++	struct ibp_response_msg			*msg;
++	struct ibp_send_cm_dreq_cmd		*cmd;
++	struct ib_cm_id				*cm_id;
++	void					*private_data = NULL;
++	size_t					len;
++	int					ret;
++
++	print_trace("in\n");
++
++	cmd	= (struct ibp_send_cm_dreq_cmd *) hdr;
++	cm_id	= (struct ib_cm_id *) cmd->ibp_cm_id;
++	msg	= (struct ibp_response_msg *) client->tx_buf;
++	len	= sizeof(*msg);
++
++	if (cmd->private_data_len)
++		private_data = cmd->private_data;
++
++	ret = ib_send_cm_dreq(cm_id, private_data, cmd->private_data_len);
++	if (ret)
++		print_dbg("send_cm_dreq returned %d\n", ret);
++
++	IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++	return ibp_send(client->ep, msg, len);
++}
++
++int ibp_cmd_send_cm_drep(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++	struct ibp_response_msg			*msg;
++	struct ibp_send_cm_drep_cmd		*cmd;
++	struct ib_cm_id				*cm_id;
++	void					*private_data = NULL;
++	size_t					len;
++	int					ret;
++
++	print_trace("in\n");
++
++	cmd	= (struct ibp_send_cm_drep_cmd *) hdr;
++	cm_id	= (struct ib_cm_id *) cmd->ibp_cm_id;
++	msg	= (struct ibp_response_msg *) client->tx_buf;
++	len	= sizeof(*msg);
++
++	if (cmd->private_data_len)
++		private_data = cmd->private_data;
++
++	ret = ib_send_cm_drep(cm_id, private_data, cmd->private_data_len);
++	if (ret)
++		print_dbg("send_cm_drep returned %d\n", ret);
++
++	IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++	return ibp_send(client->ep, msg, len);
++}
++
++int ibp_cmd_send_cm_rej(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++	struct ibp_response_msg			*msg;
++	struct ibp_send_cm_rej_cmd		*cmd;
++	struct ib_cm_id				*cm_id;
++	void					*ari;
++	void					*private_data = NULL;
++	size_t					len;
++	int					ret;
++
++	print_trace("in\n");
++
++	cmd	= (struct ibp_send_cm_rej_cmd *) hdr;
++	cm_id	= (struct ib_cm_id *) cmd->ibp_cm_id;
++	msg	= (struct ibp_response_msg *) client->tx_buf;
++	len	= sizeof(*msg);
++
++	if (cmd->private_data_len)
++		private_data = cmd->data;
++
++	ari = &(cmd->data[cmd->private_data_len]);
++
++	ret = ib_send_cm_rej(cm_id, cmd->reason, ari, cmd->ari_length,
++			     private_data, cmd->private_data_len);
++	if (ret)
++		print_err("send_cm_rej returned %d\n", ret);
++
++	IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++	return ibp_send(client->ep, msg, len);
++}
++
++int ibp_cmd_send_cm_mra(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++	struct ibp_response_msg			*msg;
++	struct ibp_send_cm_mra_cmd		*cmd;
++	struct ib_cm_id				*cm_id;
++	void					*private_data = NULL;
++	size_t					 len;
++	int					 ret;
++
++	print_trace("in\n");
++
++	cmd	= (struct ibp_send_cm_mra_cmd *) hdr;
++	cm_id	= (struct ib_cm_id *) cmd->ibp_cm_id;
++	msg	= (struct ibp_response_msg *) client->tx_buf;
++	len	= sizeof(*msg);
++
++	if (cmd->private_data_len)
++		private_data = cmd->private_data;
++
++	ret = ib_send_cm_mra(cm_id, cmd->service_timeout,
++			     private_data, cmd->private_data_len);
++	if (ret)
++		print_err("send_cm_mra returned %d\n", ret);
++
++	IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++	return ibp_send(client->ep, msg, len);
++}
++
++int ibp_cmd_send_cm_lap(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++	struct ibp_response_msg			*msg;
++	struct ibp_send_cm_lap_cmd		*cmd;
++	struct ib_cm_id				*cm_id;
++	struct ib_sa_path_rec			alt_path;
++	void					*private_data = NULL;
++	size_t					len;
++	int					ret;
++
++	print_trace("in\n");
++
++	cmd	= (struct ibp_send_cm_lap_cmd *) hdr;
++	cm_id	= (struct ib_cm_id *) cmd->ibp_cm_id;
++	msg	= (struct ibp_response_msg *) client->tx_buf;
++	len	= sizeof(*msg);
++
++	if (cmd->private_data_len)
++		private_data = cmd->private_data;
++
++	ibp_copy_sa_path_rec(&cmd->alternate_path, &alt_path);
++
++	ret = ib_send_cm_lap(cm_id, &alt_path,
++			     private_data, cmd->private_data_len);
++	if (ret)
++		print_err("send_cm_lap returned %d\n", ret);
++
++	IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++	return ibp_send(client->ep, msg, len);
++}
++
++int ibp_cmd_send_cm_apr(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++	struct ibp_response_msg			*msg;
++	struct ibp_send_cm_apr_cmd		*cmd;
++	struct ib_cm_id				*cm_id;
++	void					*info = NULL;
++	void					*private_data = NULL;
++	size_t					len;
++	int					ret;
++
++	print_trace("in\n");
++
++	cmd	= (struct ibp_send_cm_apr_cmd *) hdr;
++	cm_id	= (struct ib_cm_id *) cmd->ibp_cm_id;
++	msg	= (struct ibp_response_msg *) client->tx_buf;
++	len	= sizeof(*msg);
++
++	if (cmd->private_data_len)
++		private_data = cmd->data;
++	if (cmd->info_length)
++		info = &(cmd->data[cmd->private_data_len]);
++
++	ret = ib_send_cm_apr(cm_id, cmd->status, info, cmd->info_length,
++			     private_data, cmd->private_data_len);
++	if (ret)
++		print_err("send_cm_apr returned %d\n", ret);
++
++	IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++	return ibp_send(client->ep, msg, len);
++}
++
++int
++ibp_cmd_send_cm_sidr_req(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++	struct ibp_response_msg			*msg;
++	struct ibp_send_cm_sidr_req_cmd		*cmd;
++	struct ib_cm_id				*cm_id;
++	struct ib_cm_sidr_req_param		param = {0};
++	struct ib_sa_path_rec			path;
++	size_t					len;
++	int					ret;
++
++	print_trace("in\n");
++
++	cmd	= (struct ibp_send_cm_sidr_req_cmd *) hdr;
++	cm_id	= (struct ib_cm_id *) cmd->ibp_cm_id;
++	msg	= (struct ibp_response_msg *) client->tx_buf;
++	len	= sizeof(*msg);
++
++	param.path = &path;
++	ibp_copy_sa_path_rec(&cmd->path, &path);
++
++	param.service_id       = cmd->service_id;
++	param.timeout_ms       = cmd->timeout_ms;
++	param.max_cm_retries   = cmd->max_cm_retries;
++	param.private_data_len = cmd->private_data_len;
++
++	if (cmd->private_data_len)
++		param.private_data = cmd->private_data;
++
++	ret = ib_send_cm_sidr_req(cm_id, &param);
++	if (ret)
++		print_err("send_cm_sidr_req returned %d\n", ret);
++
++	IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++	return ibp_send(client->ep, msg, len);
++}
++
++int
++ibp_cmd_send_cm_sidr_rep(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++	struct ibp_response_msg			*msg;
++	struct ibp_send_cm_sidr_rep_cmd		*cmd;
++	struct ib_cm_sidr_rep_param		param = {0};
++	struct ib_cm_id				*cm_id;
++	size_t					len;
++	int					ret;
++
++	print_trace("in\n");
++
++	cmd	= (struct ibp_send_cm_sidr_rep_cmd *) hdr;
++	cm_id	= (struct ib_cm_id *) cmd->ibp_cm_id;
++	msg	= (struct ibp_response_msg *) client->tx_buf;
++	len	= sizeof(*msg);
++
++
++	param.qp_num		= cmd->qp_num;
++	param.qkey		= cmd->qkey;
++	param.status		= cmd->status;
++	param.info_length	= cmd->info_length;
++	param.private_data_len  = cmd->private_data_len;
++
++	if (cmd->private_data_len)
++		param.private_data = cmd->data;
++	if (cmd->info_length)
++		param.info = &(cmd->data[cmd->private_data_len]);
++
++	ret = ib_send_cm_sidr_rep(cm_id, &param);
++	if (ret)
++		print_err("send_cm_sidr_rep returned %d\n", ret);
++
++	IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++	return ibp_send(client->ep, msg, len);
++}
++
++int ibp_cmd_cm_notify(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++	struct ibp_response_msg			*msg;
++	struct ibp_cm_notify_cmd		*cmd;
++	struct ib_cm_id				*cm_id;
++	size_t					len;
++	int					ret;
++
++	print_trace("in\n");
++
++	cmd	= (struct ibp_cm_notify_cmd	*) hdr;
++	cm_id	= (struct ib_cm_id *) cmd->ibp_cm_id;
++	msg	= (struct ibp_response_msg *) client->tx_buf;
++	len	= sizeof(*msg);
++
++	ret = ib_cm_notify(cm_id, cmd->event);
++	if (ret)
++		print_err("cm_notify returned %d\n", ret);
++
++	IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++	return ibp_send(client->ep, msg, len);
++}
++
++int
++ibp_cmd_cm_init_qp_attr(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++	struct ibp_response_msg			*msg;
++	struct ibp_cm_init_qp_attr_cmd		*cmd;
++	struct ibp_cm_init_qp_attr_resp		*resp;
++	struct ib_cm_id				*cm_id;
++	struct ib_qp_attr			 qp_attr;
++	int					 qp_attr_mask;
++	size_t					 len;
++	int					 ret;
++
++	print_trace("in\n");
++
++	cmd	= (struct ibp_cm_init_qp_attr_cmd *) hdr;
++	cm_id	= (struct ib_cm_id *) cmd->ibp_cm_id;
++	msg	= (struct ibp_response_msg *) client->tx_buf;
++	len	= sizeof(*msg);
++
++	qp_attr.qp_state = cmd->qp_attr_state;
++
++	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
++	if (ret) {
++		print_err("init_qp_attr returned %d\n", ret);
++		goto send_resp;
++	}
++
++	/* Workaround to avoid modify_qp error from Xeon Phi IPoIB connected mode */
++	qp_attr_mask &= ~IB_QP_SMAC;
++
++	len += sizeof(*resp);
++
++	resp = (struct ibp_cm_init_qp_attr_resp *) msg->data;
++
++	resp->qp_attr_mask		= qp_attr_mask;
++	resp->qp_access_flags		= qp_attr.qp_access_flags;
++	resp->qp_state			= qp_attr.qp_state;
++	resp->cur_qp_state		= qp_attr.cur_qp_state;
++	resp->path_mtu			= qp_attr.path_mtu;
++	resp->path_mig_state		= qp_attr.path_mig_state;
++	resp->qkey			= qp_attr.qkey;
++	resp->rq_psn			= qp_attr.rq_psn;
++	resp->sq_psn			= qp_attr.sq_psn;
++	resp->dest_qp_num		= qp_attr.dest_qp_num;
++
++	resp->cap_max_send_wr		= qp_attr.cap.max_send_wr;
++	resp->cap_max_recv_wr		= qp_attr.cap.max_recv_wr;
++	resp->cap_max_send_sge		= qp_attr.cap.max_send_sge;
++	resp->cap_max_recv_sge		= qp_attr.cap.max_recv_sge;
++	resp->cap_max_inline_data	= qp_attr.cap.max_inline_data;
++
++	resp->ah_attr_grh_dgid_subnet_prefix =
++			qp_attr.ah_attr.grh.dgid.global.subnet_prefix;
++	resp->ah_attr_grh_dgid_interface_id =
++			qp_attr.ah_attr.grh.dgid.global.interface_id;
++	resp->ah_attr_grh_flow_label	= qp_attr.ah_attr.grh.flow_label;
++	resp->ah_attr_grh_sgid_index	= qp_attr.ah_attr.grh.sgid_index;
++	resp->ah_attr_grh_hop_limit	= qp_attr.ah_attr.grh.hop_limit;
++	resp->ah_attr_grh_traffic_class	= qp_attr.ah_attr.grh.traffic_class;
++	resp->ah_attr_dlid		= qp_attr.ah_attr.dlid;
++	resp->ah_attr_sl		= qp_attr.ah_attr.sl;
++	resp->ah_attr_src_path_bits	= qp_attr.ah_attr.src_path_bits;
++	resp->ah_attr_static_rate	= qp_attr.ah_attr.static_rate;
++	resp->ah_attr_ah_flags		= qp_attr.ah_attr.ah_flags;
++	resp->ah_attr_port_num		= qp_attr.ah_attr.port_num;
++
++	resp->alt_attr_grh_dgid_subnet_prefix =
++			qp_attr.alt_ah_attr.grh.dgid.global.subnet_prefix;
++	resp->alt_attr_grh_dgid_interface_id =
++			qp_attr.alt_ah_attr.grh.dgid.global.interface_id;
++	resp->alt_attr_grh_flow_label	= qp_attr.alt_ah_attr.grh.flow_label;
++	resp->alt_attr_grh_sgid_index	= qp_attr.alt_ah_attr.grh.sgid_index;
++	resp->alt_attr_grh_hop_limit	= qp_attr.alt_ah_attr.grh.hop_limit;
++	resp->alt_attr_grh_traffic_class
++					= qp_attr.alt_ah_attr.grh.traffic_class;
++	resp->alt_attr_dlid		= qp_attr.alt_ah_attr.dlid;
++	resp->alt_attr_sl		= qp_attr.alt_ah_attr.sl;
++	resp->alt_attr_src_path_bits	= qp_attr.alt_ah_attr.src_path_bits;
++	resp->alt_attr_static_rate	= qp_attr.alt_ah_attr.static_rate;
++	resp->alt_attr_ah_flags		= qp_attr.alt_ah_attr.ah_flags;
++	resp->alt_attr_port_num		= qp_attr.alt_ah_attr.port_num;
++
++	resp->pkey_index		= qp_attr.pkey_index;
++	resp->alt_pkey_index		= qp_attr.alt_pkey_index;
++	resp->en_sqd_async_notify	= qp_attr.en_sqd_async_notify;
++	resp->sq_draining		= qp_attr.sq_draining;
++	resp->max_rd_atomic		= qp_attr.max_rd_atomic;
++	resp->max_dest_rd_atomic	= qp_attr.max_dest_rd_atomic;
++	resp->min_rnr_timer		= qp_attr.min_rnr_timer;
++	resp->port_num			= qp_attr.port_num;
++	resp->timeout			= qp_attr.timeout;
++	resp->retry_cnt			= qp_attr.retry_cnt;
++	resp->rnr_retry			= qp_attr.rnr_retry;
++	resp->alt_port_num		= qp_attr.alt_port_num;
++	resp->alt_timeout		= qp_attr.alt_timeout;
++
++send_resp:
++	IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++	return ibp_send(client->ep, msg, len);
++}
+diff -urN a6/drivers/infiniband/ibp/cm/common.h a7/drivers/infiniband/ibp/cm/common.h
+--- a6/drivers/infiniband/ibp/cm/common.h	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/cm/common.h	2015-02-23 10:01:30.289769309 -0800
+@@ -0,0 +1,106 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer in the documentation and/or other materials
++ *	  provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef COMMON_H
++#define COMMON_H
++
++#include <linux/module.h>
++#include <linux/kthread.h>
++#include <linux/types.h>
++#include <linux/slab.h>
++#include <linux/poll.h>
++#include <linux/mman.h>
++#include <linux/pci.h>
++#include <linux/net.h>
++#include <rdma/ib_verbs.h>
++#include <modules/scif.h>
++
++#define DRV_DESC	"CCL Direct CM " DRV_ROLE
++#define DRV_VERSION	"1.0"
++#define DRV_BASE	"ibp_cm"
++#define PFX		DRV_BASE "_"
++#define DRV_PFX		DRV_NAME ": "
++
++#define DRV_COPYRIGHT	"Copyright (c) 2011-2013 Intel Corporation"
++#define DRV_SIGNON	DRV_DESC " v" DRV_VERSION "\n" DRV_COPYRIGHT "\n"
++
++#define MODULE_PARAM(name, var, type, value, desc)	\
++	type var = value;				\
++	module_param_named(name, var, type, 0644);	\
++	MODULE_PARM_DESC(name, desc)
++
++#ifdef IBP_DEBUG
++extern int debug_level;
++#endif
++
++enum {
++	IBP_DEBUG_NONE,
++	IBP_DEBUG_TARGETED,
++	IBP_DEBUG_VERBOSE,
++};
++
++#define _PRINTK(l, f, arg...)	\
++	printk(l DRV_PFX "%s(%d) " f, __func__, __LINE__, ##arg)
++
++#ifdef IBP_DEBUG
++#define PRINTK(dbg, l, f, arg...)				\
++	do {							\
++		if (debug_level >= dbg)				\
++			printk(l DRV_PFX "%s(%d) " f,		\
++			       __func__, __LINE__, ##arg);	\
++	} while (0)
++#else
++#define PRINTK(dbg, l, f, arg...) do { } while (0)
++#endif
++
++#define print_dbg(f, arg...) PRINTK(IBP_DEBUG_TARGETED, KERN_DEBUG, f, ##arg)
++#define print_err(f, arg...) _PRINTK(KERN_ERR, f, ##arg)
++#define print_info(f, arg...) pr_info(f, ##arg)
++
++#if 0
++#define FORCED_FUNCTION_TRACING
++#endif
++
++#ifdef FORCED_FUNCTION_TRACING
++#define print_trace(f, arg...) _PRINTK(KERN_ERR, f, ##arg)
++#else
++#define print_trace(f, arg...) PRINTK(IBP_DEBUG_VERBOSE, KERN_ERR, f, ##arg)
++#endif
++
++#ifndef IBP_CM_PORT		/* unique scif port for this service */
++#define IBP_CM_PORT		SCIF_OFED_PORT_3
++#endif
++
++int ibp_send(scif_epd_t ep, void *buf, size_t len);
++int ibp_recv(scif_epd_t ep, void *buf, size_t len);
++
++#endif /* COMMON_H */
+diff -urN a6/drivers/infiniband/ibp/cm/ibp-abi.h a7/drivers/infiniband/ibp/cm/ibp-abi.h
+--- a6/drivers/infiniband/ibp/cm/ibp-abi.h	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/cm/ibp-abi.h	2015-02-23 10:01:30.290769309 -0800
+@@ -0,0 +1,94 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above
++ *	copyright notice, this list of conditions and the following
++ *	disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *	copyright notice, this list of conditions and the following
++ *	disclaimer in the documentation and/or other materials
++ *	provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef IBP_ABI_H
++#define IBP_ABI_H
++
++#include <linux/types.h>
++#include <rdma/ib_verbs.h>
++#include <rdma/ib_cm.h>
++
++/* Increment this value if any changes break compatibility. */
++#define IBP_CM_ABI_VERSION	1
++
++/* Client to server message enums. */
++enum {
++	IBP_CREATE_CM_ID,
++	IBP_DESTROY_CM_ID,
++	IBP_CM_LISTEN,
++	IBP_CM_NOTIFY,
++	IBP_SEND_CM_REQ,
++	IBP_SEND_CM_REP,
++	IBP_SEND_CM_RTU,
++	IBP_SEND_CM_DREQ,
++	IBP_SEND_CM_DREP,
++	IBP_SEND_CM_REJ,
++	IBP_SEND_CM_MRA,
++	IBP_SEND_CM_LAP,
++	IBP_SEND_CM_APR,
++	IBP_SEND_CM_SIDR_REQ,
++	IBP_SEND_CM_SIDR_REP,
++	IBP_CM_INIT_QP_ATTR,
++};
++
++/* Server to client message enums. */
++enum {
++	IBP_IBP_EVENT,
++	IBP_IBP_RESPONSE,
++};
++
++/*
++ * Make sure that all structs defined in this file are laid out to pack
++ * the same way on different architectures to avoid incompatibility.
++ *
++ * Specifically:
++ *  - Do not use pointer types -- pass pointers in a u64 instead.
++ *  - Make sure that any structure larger than 4 bytes is padded
++ *    to a multiple of 8 bytes; otherwise the structure size may
++ *    be different between architectures.
++ */
++
++struct ibp_msg_header {			/* present in all messages */
++	u32			opcode;
++	u32			length;
++	u32			status;
++	u32			reserved;
++	u64			request;
++	u64			data[0];
++};
++
++struct ibp_response_msg {
++	struct ibp_msg_header		header;
++	u64				data[0];
++};
++
++#endif /* IBP_ABI_H */
+diff -urN a6/drivers/infiniband/ibp/cm/ibp_exports.h a7/drivers/infiniband/ibp/cm/ibp_exports.h
+--- a6/drivers/infiniband/ibp/cm/ibp_exports.h	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/cm/ibp_exports.h	2015-02-23 10:01:30.290769309 -0800
+@@ -0,0 +1,50 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer in the documentation and/or other materials
++ *	  provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef IBP_EXPORTS_H
++#define IBP_EXPORTS_H
++
++#include <rdma/ib_verbs.h>
++
++/*
++ ibp_resolve_ib_device - Return the host ib_device handle
++ @ibdev:Card IB device
++
++ Upper level drivers may require the host ib_device handle associated
++ with the card ib_device.  This routine resolves the card ib_device to
++ the cooresponding host ib_device handle.  A value of 0 is returned if
++ no match was found.
++*/
++u64 ibp_resolve_ib_device(struct ib_device *ibdev);
++
++
++#endif /* IBP_EXPORTS_H */
+diff -urN a6/drivers/infiniband/ibp/cm/Makefile a7/drivers/infiniband/ibp/cm/Makefile
+--- a6/drivers/infiniband/ibp/cm/Makefile	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/cm/Makefile	2015-02-23 10:01:30.290769309 -0800
+@@ -0,0 +1,21 @@
++KDIR ?= /lib/modules/`uname -r`/build
++
++obj-$(CONFIG_IBP_SERVER) += ibp_cm_server.o
++
++ccflags-$(CONFIG_IBP_DEBUG) += -g -DIBP_DEBUG
++
++ibp_cm_server-y :=	server.o		\
++			server_msg.o		\
++			cm_server_msg.o
++
++default:
++	$(MAKE) -C $(KDIR) M=`pwd`
++
++modules_install:
++	$(MAKE) -C $(KDIR) M=`pwd` modules_install
++
++clean:
++	rm -rf *.ko *.o .*.ko.cmd .*.o.cmd *.mod.c Module.* modules.order .tmp_versions
++
++unix:
++	dos2unix *.[ch] Kconfig Makefile
+diff -urN a6/drivers/infiniband/ibp/cm/server.c a7/drivers/infiniband/ibp/cm/server.c
+--- a6/drivers/infiniband/ibp/cm/server.c	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/cm/server.c	2015-02-23 10:01:30.290769309 -0800
+@@ -0,0 +1,221 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer in the documentation and/or other materials
++ *	  provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "server.h"
++
++MODULE_AUTHOR("Jerrie Coffman");
++MODULE_AUTHOR("Phil Cayton");
++MODULE_AUTHOR("Jay Sternberg");
++MODULE_LICENSE("Dual BSD/GPL");
++MODULE_DESCRIPTION(DRV_DESC);
++MODULE_VERSION(DRV_VERSION);
++
++MODULE_PARAM(port, port, int, IBP_CM_PORT, "Connection port");
++MODULE_PARAM(backlog, backlog, int, 8, "Connection backlog");
++MODULE_PARAM(timeout, timeout, int, 1000, "Listen/Poll time in milliseconds");
++
++#ifdef IBP_DEBUG
++MODULE_PARAM(debug_level, debug_level, int, 0, "Debug: 0-none, 1-some, 2-all");
++#endif
++
++struct rw_semaphore				list_rwsem;
++
++LIST_HEAD(client_list);
++
++static struct task_struct			*listen_thread;
++
++static struct ibp_client *ibp_create_client(scif_epd_t ep, uint16_t node)
++{
++	struct ibp_client		*client;
++	int				ret = -ENOMEM;
++
++	client = kzalloc(sizeof(*client), GFP_KERNEL);
++	if (!client) {
++		print_err("kzalloc failed\n");
++		return ERR_PTR(ret);
++	}
++
++	client->ep = ep;
++
++	client->rx_buf = (void *)__get_free_page(GFP_KERNEL);
++	if (!client->rx_buf) {
++		print_err("__get_free_page rx_buf failed\n");
++		goto err0;
++	}
++
++	client->tx_buf = (void *)__get_free_page(GFP_KERNEL);
++	if (!client->tx_buf) {
++		print_err("__get_free_page tx_buf failed\n");
++		goto err1;
++	}
++
++	client->workqueue = create_singlethread_workqueue(DRV_NAME);
++	if (!client->workqueue) {
++		print_err("create_singlethread_workqueue failed\n");
++		goto err2;
++	}
++
++	down_write(&list_rwsem);
++	list_add(&client->list, &client_list);
++	up_write(&list_rwsem);
++
++	client->ibp_cm_client_thread = kthread_run(ibp_process_recvs,
++						   client, DRV_NAME);
++	if (!client->ibp_cm_client_thread) {
++		print_err("create cleint thread failed\n");
++		goto err3;
++	}
++
++	return client;
++err3:
++	down_write(&list_rwsem);
++	list_del(&client->list);
++	up_write(&list_rwsem);
++
++	destroy_workqueue(client->workqueue);
++err2:
++	free_page((uintptr_t)client->tx_buf);
++err1:
++	free_page((uintptr_t)client->rx_buf);
++err0:
++	kfree(client);
++	return ERR_PTR(ret);
++}
++
++static int ibp_cm_listen(void *data)
++{
++	struct ibp_client		*client;
++	struct scif_pollepd		listen;
++	struct scif_portID		peer;
++	scif_epd_t			ep;
++	int				ret;
++
++	listen.epd = scif_open();
++	if (!listen.epd) {
++		print_err("scif_open failed\n");
++		ret = -EIO;
++		goto err0;
++	}
++	listen.events = POLLIN;
++
++	ret = scif_bind(listen.epd, port);
++	if (ret < 0) {
++		print_err("scif_bind returned %d\n", ret);
++		goto err1;
++	}
++
++	ret = scif_listen(listen.epd, backlog);
++	if (ret) {
++		print_err("scif_listen returned %d\n", ret);
++		goto err1;
++	}
++
++	while (!kthread_should_stop()) {
++
++		schedule();
++
++		ret = scif_poll(&listen, 1, timeout);
++		if (ret == 0)	/* timeout */
++			continue;
++		if (ret < 0) {
++			print_err("scif_poll revents 0x%x\n", listen.revents);
++			continue;
++		}
++
++		ret = scif_accept(listen.epd, &peer, &ep, 0);
++		if (ret) {
++			print_err("scif_accept returned %d\n", ret);
++			continue;
++		}
++
++		print_dbg("accepted node %d port %d\n", peer.node, peer.port);
++
++		client = ibp_create_client(ep, peer.node);
++		if (IS_ERR(client)) {
++			ret = PTR_ERR(client);
++			print_err("ibp_create_client returned %d\n", ret);
++			scif_close(ep);
++		}
++	}
++err1:
++	scif_close(listen.epd);
++err0:
++	return ret;
++}
++
++static int __init ibp_cm_server_init(void)
++{
++	int				ret = 0;
++
++	print_info(DRV_SIGNON);
++
++	init_rwsem(&list_rwsem);
++
++	/* Start a thread for inbound connections. */
++	listen_thread = kthread_run(ibp_cm_listen, NULL, DRV_NAME);
++	if (IS_ERR(listen_thread)) {
++		ret = PTR_ERR(listen_thread);
++		print_err("kthread_run returned %d\n", ret);
++	}
++
++	return ret;
++}
++
++static void __exit ibp_cm_server_exit(void)
++{
++	struct ibp_client		*client, *next;
++	struct completion		done;
++
++	kthread_stop(listen_thread);
++
++	down_write(&list_rwsem);
++	list_for_each_entry_safe(client, next, &client_list, list) {
++		init_completion(&done);
++		client->done = &done;
++
++		/* Close scif ep to unblock the client thread scif_recv */
++		scif_close(client->ep);
++
++		up_write(&list_rwsem);
++
++		/* Wait for client thread to finish */
++		wait_for_completion(&done);
++
++		down_write(&list_rwsem);
++	}
++	up_write(&list_rwsem);
++
++	print_info(DRV_DESC " unloaded\n");
++}
++
++module_init(ibp_cm_server_init);
++module_exit(ibp_cm_server_exit);
+diff -urN a6/drivers/infiniband/ibp/cm/server.h a7/drivers/infiniband/ibp/cm/server.h
+--- a6/drivers/infiniband/ibp/cm/server.h	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/cm/server.h	2015-02-23 10:01:30.290769309 -0800
+@@ -0,0 +1,128 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer in the documentation and/or other materials
++ *	  provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef SERVER_H
++#define SERVER_H
++
++#include <linux/fs.h>
++#include <linux/cdev.h>
++#include <linux/anon_inodes.h>
++#include <rdma/ib_umem.h>
++#include "ibp-abi.h"
++#include "cm_ibp_abi.h"
++#include "common.h"
++
++#define DRV_ROLE	"Server"
++#define DRV_NAME	"ibp_cm_server"
++
++#define MAX_MSG_SIZE	PAGE_SIZE
++
++extern int			timeout;
++extern struct rw_semaphore	list_rwsem;
++extern struct list_head		client_list;
++extern struct list_head		cm_entry_list;
++
++struct ibp_client {
++	struct list_head	list;
++	scif_epd_t		ep;
++	void			*rx_buf;
++	void			*tx_buf;
++	struct completion	*done;
++	struct workqueue_struct	*workqueue;
++	struct task_struct	*ibp_cm_client_thread;
++};
++
++struct cm_entry {
++	struct list_head	list;
++	struct ib_cm_id		*cm_id;
++	struct ibp_client	*client;
++};
++
++struct ibp_event_get {
++	__u64 response;
++	__u64 data;
++	__u64 info;
++	__u8  data_len;
++	__u8  info_len;
++	__u8  reserved[6];
++};
++
++struct ibp_event {
++	struct work_struct      work;
++	struct ibp_client       *client;
++	struct ibp_cm_event	event;
++};
++
++#define IBP_INIT_MSG(device, msg, size, op)			\
++	do {							\
++		(msg)->header.opcode	= IBP_##op;		\
++		(msg)->header.length	= (size);		\
++		(msg)->header.status	= 0;			\
++		(msg)->header.reserved	= 0;			\
++		(msg)->header.request	= 0;			\
++	} while (0)
++
++#define IBP_INIT_RESP(device, resp, size, op, req, stat)	\
++	do {							\
++		(resp)->header.opcode	= IBP_##op;		\
++		(resp)->header.length	= (size);		\
++		(resp)->header.status	= (stat);		\
++		(resp)->header.reserved	= 0;			\
++		(resp)->header.request	= (req);		\
++	} while (0)
++
++int ibp_process_recvs(void *p);
++void cleanup_cm_entry_list(void);
++
++int ibp_cmd_create_cm_id(struct ibp_client *client, struct ibp_msg_header *hdr);
++int ibp_cmd_destroy_cm_id(struct ibp_client *client,
++			  struct ibp_msg_header *hdr);
++int ibp_cmd_cm_listen(struct ibp_client *client, struct ibp_msg_header *hdr);
++int ibp_cmd_cm_notify(struct ibp_client *client, struct ibp_msg_header *hdr);
++int ibp_cmd_send_cm_req(struct ibp_client *client, struct ibp_msg_header *hdr);
++int ibp_cmd_send_cm_rep(struct ibp_client *client, struct ibp_msg_header *hdr);
++int ibp_cmd_send_cm_rtu(struct ibp_client *client, struct ibp_msg_header *hdr);
++int ibp_cmd_send_cm_dreq(struct ibp_client *client, struct ibp_msg_header *hdr);
++int ibp_cmd_send_cm_drep(struct ibp_client *client, struct ibp_msg_header *hdr);
++int ibp_cmd_send_cm_rej(struct ibp_client *client, struct ibp_msg_header *hdr);
++int ibp_cmd_send_cm_mra(struct ibp_client *client, struct ibp_msg_header *hdr);
++int ibp_cmd_send_cm_lap(struct ibp_client *client, struct ibp_msg_header *hdr);
++int ibp_cmd_send_cm_apr(struct ibp_client *client, struct ibp_msg_header *hdr);
++int ibp_cmd_send_cm_sidr_req(struct ibp_client *client,
++			     struct ibp_msg_header *hdr);
++int ibp_cmd_send_cm_sidr_rep(struct ibp_client *client,
++			     struct ibp_msg_header *hdr);
++int ibp_cmd_cm_event(struct  ibp_client *client, struct ibp_msg_header *hdr);
++int ibp_cmd_cm_init_qp_attr(struct ibp_client *client,
++			    struct ibp_msg_header *hdr);
++
++#endif /* SERVER_H */
+diff -urN a6/drivers/infiniband/ibp/cm/server_msg.c a7/drivers/infiniband/ibp/cm/server_msg.c
+--- a6/drivers/infiniband/ibp/cm/server_msg.c	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/cm/server_msg.c	2015-02-23 10:01:30.290769309 -0800
+@@ -0,0 +1,176 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer in the documentation and/or other materials
++ *	  provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "server.h"
++#include "cm_ibp_abi.h"
++
++int ibp_send(scif_epd_t ep, void *buf, size_t len)
++{
++	int				ret;
++
++	while (len) {
++		ret = scif_send(ep, buf, (uint32_t)len, SCIF_SEND_BLOCK);
++		if (ret < 0) {
++			print_dbg("scif_send returned %d\n", ret);
++			return ret;
++		}
++		buf += ret;
++		len -= ret;
++	}
++
++	return 0;
++}
++
++int ibp_recv(scif_epd_t ep, void *buf, size_t len)
++{
++	int				ret;
++
++	while (len) {
++		ret = scif_recv(ep, buf, (uint32_t)len, SCIF_RECV_BLOCK);
++		if (ret < 0) {
++			print_dbg("scif_recv returned %d\n", ret);
++			return ret;
++		}
++		buf += ret;
++		len -= ret;
++	}
++
++	return 0;
++}
++
++static int
++ibp_cmd_bad_request(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++	struct ibp_response_msg		*msg;
++	size_t				len;
++	int				status = -EBADRQC;
++
++	print_dbg("opcode 0x%x\n", hdr->opcode);
++
++	msg = (struct ibp_response_msg *) client->tx_buf;
++	len = sizeof(*msg);
++
++	IBP_INIT_RESP(NULL, msg, len, IBP_RESPONSE, hdr->request, status);
++	return ibp_send(client->ep, msg, len);
++}
++
++static void
++ibp_cm_destroy_client(struct ibp_client *client)
++{
++	struct cm_entry			*cm, *tmp;
++
++	down_write(&list_rwsem);
++	list_del(&client->list);
++	list_for_each_entry_safe(cm, tmp, &cm_entry_list, list)
++		if (cm->client == client) {
++			ib_destroy_cm_id(cm->cm_id);
++			list_del(&cm->list);
++			kfree(cm);
++		}
++	up_write(&list_rwsem);
++
++	destroy_workqueue(client->workqueue);
++
++	free_page((uintptr_t)client->tx_buf);
++	free_page((uintptr_t)client->rx_buf);
++
++	if (client->done)
++		complete(client->done);
++	else
++		scif_close(client->ep);
++
++	kfree(client);
++}
++
++static int
++(*ibp_msg_table[])(struct ibp_client *c, struct ibp_msg_header *h) = {
++	[IBP_CREATE_CM_ID]		= ibp_cmd_create_cm_id,
++	[IBP_DESTROY_CM_ID]		= ibp_cmd_destroy_cm_id,
++	[IBP_CM_LISTEN]			= ibp_cmd_cm_listen,
++	[IBP_CM_NOTIFY]			= ibp_cmd_cm_notify,
++	[IBP_SEND_CM_REQ]		= ibp_cmd_send_cm_req,
++	[IBP_SEND_CM_REP]		= ibp_cmd_send_cm_rep,
++	[IBP_SEND_CM_RTU]		= ibp_cmd_send_cm_rtu,
++	[IBP_SEND_CM_DREQ]		= ibp_cmd_send_cm_dreq,
++	[IBP_SEND_CM_DREP]		= ibp_cmd_send_cm_drep,
++	[IBP_SEND_CM_REJ]		= ibp_cmd_send_cm_rej,
++	[IBP_SEND_CM_MRA]		= ibp_cmd_send_cm_mra,
++	[IBP_SEND_CM_LAP]		= ibp_cmd_send_cm_lap,
++	[IBP_SEND_CM_APR]		= ibp_cmd_send_cm_apr,
++	[IBP_SEND_CM_SIDR_REQ]		= ibp_cmd_send_cm_sidr_req,
++	[IBP_SEND_CM_SIDR_REP]		= ibp_cmd_send_cm_sidr_rep,
++	[IBP_CM_INIT_QP_ATTR]		= ibp_cmd_cm_init_qp_attr,
++};
++
++int ibp_process_recvs(void *p)
++{
++	struct ibp_client		*client;
++	struct ibp_msg_header		*hdr;
++	int				ret;
++
++	client = (struct ibp_client *) p;
++	hdr = (struct ibp_msg_header *) client->rx_buf;
++
++	for (;;) {
++		ret = ibp_recv(client->ep, hdr, sizeof(*hdr));
++		if (ret)
++			break;
++
++		if (hdr->length > MAX_MSG_SIZE) {
++			print_err("message too large, len %u max %lu\n",
++				  hdr->length, MAX_MSG_SIZE);
++			ret = -EMSGSIZE;
++			break;
++		}
++
++		if (hdr->length > sizeof(*hdr)) {
++			ret = ibp_recv(client->ep, hdr->data,
++				       hdr->length - sizeof(*hdr));
++			if (ret)
++				break;
++		}
++
++		if ((hdr->opcode >= ARRAY_SIZE(ibp_msg_table)) ||
++		    !ibp_msg_table[hdr->opcode]) {
++			ibp_cmd_bad_request(client, hdr);
++			continue;
++		}
++
++		ret = ibp_msg_table[hdr->opcode](client, hdr);
++		if (ret)
++			break;
++	}
++
++	ibp_cm_destroy_client(client);
++
++	return ret;
++}
+diff -urN a6/drivers/infiniband/ibp/drv/common.h a7/drivers/infiniband/ibp/drv/common.h
+--- a6/drivers/infiniband/ibp/drv/common.h	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/drv/common.h	2015-02-23 10:01:30.290769309 -0800
+@@ -0,0 +1,109 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer in the documentation and/or other materials
++ *	  provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef COMMON_H
++#define COMMON_H
++
++#include <linux/module.h>
++#include <linux/kthread.h>
++#include <linux/types.h>
++#include <linux/slab.h>
++#include <linux/poll.h>
++#include <linux/mman.h>
++#include <linux/pci.h>
++#include <linux/net.h>
++#include <rdma/ib_verbs.h>
++#include <modules/scif.h>
++
++#define DRV_DESC	"CCL Direct " DRV_ROLE
++#define DRV_VERSION	"1.0"
++#define DRV_BASE	"ibp"
++#define PFX		DRV_BASE "_"
++#define DRV_PFX		DRV_NAME ": "
++
++#define DRV_COPYRIGHT	"Copyright (c) 2011-2013 Intel Corporation"
++#define DRV_SIGNON	DRV_DESC " v" DRV_VERSION "\n" DRV_COPYRIGHT "\n"
++
++#define MODULE_PARAM(name, var, type, value, desc)	\
++	type var = value;				\
++	module_param_named(name, var, type, 0644);	\
++	MODULE_PARM_DESC(name, desc)
++
++#ifdef IBP_DEBUG
++extern int debug_level;
++#endif
++
++enum {
++	IBP_DEBUG_NONE,
++	IBP_DEBUG_TARGETED,
++	IBP_DEBUG_VERBOSE,
++};
++
++#define _PRINTK(l, f, arg...)	\
++	printk(l DRV_PFX "%s(%d) " f, __func__, __LINE__, ##arg)
++
++#ifdef IBP_DEBUG
++#define PRINTK(dbg, l, f, arg...)				\
++	do {							\
++		if (debug_level >= dbg)				\
++			printk(l DRV_PFX "%s(%d) " f,		\
++			       __func__, __LINE__, ##arg);	\
++	} while (0)
++#else
++#define PRINTK(dbg, l, f, arg...) do { } while (0)
++#endif
++
++#define print_dbg(f, arg...) PRINTK(IBP_DEBUG_TARGETED, KERN_DEBUG, f, ##arg)
++#define print_err(f, arg...) _PRINTK(KERN_ERR, f, ##arg)
++#define print_info(f, arg...) pr_info(f, ##arg)
++
++#if 0
++#define FORCED_FUNCTION_TRACING
++#endif
++
++#ifdef FORCED_FUNCTION_TRACING
++#define print_trace(f, arg...) _PRINTK(KERN_ERR, f, ##arg)
++#else
++#define print_trace(f, arg...) PRINTK(IBP_DEBUG_VERBOSE, KERN_ERR, f, ##arg)
++#endif
++
++#ifndef IBP_PORT		/* unique scif port for this service */
++#define IBP_PORT		SCIF_OFED_PORT_2
++#endif
++
++#define IS_NULL_OR_ERR(p) (!(p) || IS_ERR_VALUE((unsigned long)p))
++
++int ibp_init(void);
++
++void ibp_cleanup(void);
++
++#endif /* COMMON_H */
+diff -urN a6/drivers/infiniband/ibp/drv/ibp-abi.h a7/drivers/infiniband/ibp/drv/ibp-abi.h
+--- a6/drivers/infiniband/ibp/drv/ibp-abi.h	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/drv/ibp-abi.h	2015-02-23 10:01:30.290769309 -0800
+@@ -0,0 +1,649 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer in the documentation and/or other materials
++ *	  provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef IBP_ABI_H
++#define IBP_ABI_H
++
++#include <linux/types.h>
++
++/* Increment this value if any changes break compatibility. */
++#define IBP_ABI_VERSION	2
++
++/* Client to server message enums. */
++enum {
++	IBP_VERB_GET_PROTOCOL_STATS,
++	IBP_VERB_QUERY_DEVICE,
++	IBP_VERB_QUERY_PORT,
++	IBP_VERB_GET_LINK_LAYER,
++	IBP_VERB_QUERY_GID,
++	IBP_VERB_QUERY_PKEY,
++	IBP_VERB_MODIFY_DEVICE,
++	IBP_VERB_MODIFY_PORT,
++	IBP_VERB_ALLOC_UCONTEXT,
++	IBP_VERB_DEALLOC_UCONTEXT,
++	IBP_VERB_REG_BUF,
++	IBP_VERB_DEREG_BUF,
++	IBP_VERB_MMAP,
++	IBP_VERB_UNMMAP,
++	IBP_VERB_ALLOC_PD,
++	IBP_VERB_DEALLOC_PD,
++	IBP_VERB_CREATE_AH,
++	IBP_VERB_MODIFY_AH,
++	IBP_VERB_QUERY_AH,
++	IBP_VERB_DESTROY_AH,
++	IBP_VERB_CREATE_SRQ,
++	IBP_VERB_MODIFY_SRQ,
++	IBP_VERB_QUERY_SRQ,
++	IBP_VERB_DESTROY_SRQ,
++	IBP_VERB_POST_SRQ_RECV,
++	IBP_VERB_CREATE_QP,
++	IBP_VERB_MODIFY_QP,
++	IBP_VERB_QUERY_QP,
++	IBP_VERB_DESTROY_QP,
++	IBP_VERB_POST_SEND,
++	IBP_VERB_POST_RECV,
++	IBP_VERB_CREATE_CQ,
++	IBP_VERB_MODIFY_CQ,
++	IBP_VERB_DESTROY_CQ,
++	IBP_VERB_RESIZE_CQ,
++	IBP_VERB_POLL_CQ,
++	IBP_VERB_PEEK_CQ,
++	IBP_VERB_REQ_NOTIFY_CQ,
++	IBP_VERB_REQ_NCOMP_NOTIF,
++	IBP_VERB_GET_DMA_MR,
++	IBP_VERB_REG_PHYS_MR,
++	IBP_VERB_REG_USER_MR,
++	IBP_VERB_QUERY_MR,
++	IBP_VERB_DEREG_MR,
++	IBP_VERB_ALLOC_FAST_REG_MR,
++	IBP_VERB_ALLOC_FAST_REG_PAGE_LIST,
++	IBP_VERB_FREE_FAST_REG_PAGE_LIST,
++	IBP_VERB_REREG_PHYS_MR,
++	IBP_VERB_ALLOC_MW,
++	IBP_VERB_BIND_MW,
++	IBP_VERB_DEALLOC_MW,
++	IBP_VERB_ALLOC_FMR,
++	IBP_VERB_MAP_PHYS_FMR,
++	IBP_VERB_UNMAP_FMR,
++	IBP_VERB_DEALLOC_FMR,
++	IBP_VERB_ATTACH_MCAST,
++	IBP_VERB_DETACH_MCAST,
++	IBP_VERB_PROCESS_MAD,
++	IBP_VERB_ALLOC_XRCD,
++	IBP_VERB_DEALLOC_XRCD,
++};
++
++/* Server to client message enums. */
++enum {
++	IBP_ADD_DEVICE,
++	IBP_REMOVE_DEVICE,
++	IBP_VERB_RESPONSE,
++	IBP_QUEUED_RESPONSE,
++	IBP_ASYNC_EVENT,
++	IBP_CQ_COMP,
++};
++
++/*
++ * Make sure that all structs defined in this file are laid out to pack
++ * the same way on different architectures to avoid incompatibility.
++ *
++ * Specifically:
++ *  - Do not use pointer types -- pass pointers in a u64 instead.
++ *  - Make sure that any structure larger than 4 bytes is padded
++ *    to a multiple of 8 bytes; otherwise the structure size may
++ *    be different between architectures.
++ */
++
++struct ibp_msg_header {			/* present in all messages */
++	u32			opcode;
++	u32			length;
++	u32			status;
++	u32			reserved;
++	u64			device;
++	u64			request;
++	u64			data[0];
++};
++
++#define IBP_DEVICE_NAME_MAX	64
++
++struct ibp_add_device {
++	u8			name[IBP_DEVICE_NAME_MAX];
++	u32			vendor_id;
++	u32			device_id;
++	u64			ib_device;
++	u64			device;
++	__be64			node_guid;
++	u64			uverbs_cmd_mask;
++	u32			uverbs_abi_ver;
++	u32			ibp_abi_ver;
++	u32			num_comp_vectors;
++	u8			phys_port_cnt;
++	u8			reserved[7];
++};
++
++struct ibp_add_device_msg {
++	struct ibp_msg_header	header;
++	struct ibp_add_device	data;
++};
++
++struct ibp_remove_device_msg {
++	struct ibp_msg_header	header;
++};
++
++struct ibp_verb_response_msg {
++	struct ibp_msg_header	header;
++	u64			data[0];
++};
++
++struct ibp_queued_response_msg {
++	struct ibp_msg_header	header;
++	u64			data[0];
++};
++
++struct ibp_async_event {
++	u64			ibdev;
++	u64			context;
++	u32			type;
++	u8			reserved[4];
++};
++
++struct ibp_async_event_msg {
++	struct ibp_msg_header	header;
++	struct ibp_async_event	data;
++};
++
++struct ibp_cq_comp {
++	u64			cq_context;
++};
++
++struct ibp_cq_comp_msg {
++	struct ibp_msg_header	header;
++	struct ibp_cq_comp	data;
++};
++
++struct ibp_alloc_ucontext_cmd {
++	struct ibp_msg_header	header;
++	u64			ibdev;
++	u64			data[0];
++};
++
++struct ibp_alloc_ucontext_resp {
++	u64			ucontext;
++	u64			data[0];
++};
++
++struct ibp_dealloc_ucontext_cmd {
++	struct ibp_msg_header	header;
++	u64			ucontext;
++};
++
++struct ibp_mmap_cmd {
++	struct ibp_msg_header	header;
++	u64			len;
++	u64			prot;
++	u64			flags;
++	u64			pgoff;
++	u64			ucontext;
++};
++
++struct ibp_mmap_resp {
++	u64			mmap;
++	u64			scif_addr;
++};
++
++struct ibp_unmmap_cmd {
++	struct ibp_msg_header	header;
++	u64			mmap;
++};
++
++struct ibp_reg_buf_cmd {
++	struct ibp_msg_header	header;
++	u64			ucontext;
++	u64			virt_addr;
++	u64			scif_addr;
++	u64			length;
++	u32			offset;
++	u32			access;
++};
++
++struct ibp_reg_buf_resp {
++	u64			reg;
++};
++
++struct ibp_dereg_buf_cmd {
++	struct ibp_msg_header	header;
++	u64			reg;
++};
++
++struct ibp_query_device_cmd {
++	struct ibp_msg_header	header;
++};
++
++struct ibp_query_device_resp {
++	u64			fw_ver;
++	__be64			sys_image_guid;
++	u64			max_mr_size;
++	u64			page_size_cap;
++	u32			vendor_id;
++	u32			vendor_part_id;
++	u32			hw_ver;
++	u32			max_qp;
++	u32			max_qp_wr;
++	u32			device_cap_flags;
++	u32			max_sge;
++	u32			max_sge_rd;
++	u32			max_cq;
++	u32			max_cqe;
++	u32			max_mr;
++	u32			max_pd;
++	u32			max_qp_rd_atom;
++	u32			max_ee_rd_atom;
++	u32			max_res_rd_atom;
++	u32			max_qp_init_rd_atom;
++	u32			max_ee_init_rd_atom;
++	u32			atomic_cap;
++	u32			masked_atomic_cap;
++	u32			max_ee;
++	u32			max_rdd;
++	u32			max_mw;
++	u32			max_raw_ipv6_qp;
++	u32			max_raw_ethy_qp;
++	u32			max_mcast_grp;
++	u32			max_mcast_qp_attach;
++	u32			max_total_mcast_qp_attach;
++	u32			max_ah;
++	u32			max_fmr;
++	u32			max_map_per_fmr;
++	u32			max_srq;
++	u32			max_srq_wr;
++	u32			max_srq_sge;
++	u32			max_fast_reg_page_list_len;
++	u16			max_pkeys;
++	u8			local_ca_ack_delay;
++	u8			reserved[5];
++};
++
++struct ibp_query_port_cmd {
++	struct ibp_msg_header	header;
++	u8			port_num;
++	u8			reserved[7];
++};
++
++struct ibp_query_port_resp {
++	u32			port_cap_flags;
++	u32			max_msg_sz;
++	u32			bad_pkey_cntr;
++	u32			qkey_viol_cntr;
++	u32			gid_tbl_len;
++	u16			pkey_tbl_len;
++	u16			lid;
++	u16			sm_lid;
++	u8			state;
++	u8			max_mtu;
++	u8			active_mtu;
++	u8			lmc;
++	u8			max_vl_num;
++	u8			sm_sl;
++	u8			subnet_timeout;
++	u8			init_type_reply;
++	u8			active_width;
++	u8			active_speed;
++	u8			phys_state;
++	u8			link_layer;
++	u8			reserved[2];
++};
++
++struct ibp_query_gid_cmd {
++	struct ibp_msg_header	header;
++	u32			index;
++	u8			port_num;
++	u8			reserved[3];
++};
++
++struct ibp_query_gid_resp {
++	__be64			subnet_prefix;
++	__be64			interface_id;
++};
++
++struct ibp_query_pkey_cmd {
++	struct ibp_msg_header	header;
++	u32			index;
++	u8			port_num;
++	u8			reserved[3];
++};
++
++struct ibp_query_pkey_resp {
++	u16			pkey;
++	u8			reserved[6];
++};
++
++struct ibp_alloc_pd_cmd {
++	struct ibp_msg_header	header;
++	u64			ucontext;
++	u64			data[0];
++};
++
++struct ibp_alloc_pd_resp {
++	u64			pd;
++	u64			data[0];
++};
++
++struct ibp_dealloc_pd_cmd {
++	struct ibp_msg_header	header;
++	u64			pd;
++};
++
++struct ibp_global_route {
++	__be64			dgid_subnet_prefix;
++	__be64			dgid_interface_id;
++	u32			flow_label;
++	u8			sgid_index;
++	u8			hop_limit;
++	u8			traffic_class;
++	u8			reserved[1];
++};
++
++struct ibp_ah_attr {
++	struct ibp_global_route	grh;
++	u16			dlid;
++	u8			sl;
++	u8			src_path_bits;
++	u8			static_rate;
++	u8			ah_flags;
++	u8			port_num;
++	u8			reserved[1];
++};
++
++struct ibp_create_ah_cmd {
++	struct ibp_msg_header	header;
++	u64			pd;
++	struct ibp_ah_attr	ah_attr;
++};
++
++struct ibp_create_ah_resp {
++	u64			ah;
++};
++
++struct ibp_query_ah_cmd {
++	struct ibp_msg_header	header;
++	u64			ah;
++};
++
++struct ibp_query_ah_resp {
++	struct ibp_ah_attr	attr;
++};
++
++struct ibp_destroy_ah_cmd {
++	struct ibp_msg_header	header;
++	u64			ah;
++};
++
++struct ibp_srq_attr {
++	u32			max_wr;
++	u32			max_sge;
++	u32			srq_limit;
++	u8			reserved[4];
++};
++
++struct ibp_create_srq_cmd {
++	struct ibp_msg_header	header;
++	u64			pd;
++	u64			srq_context;
++	struct ibp_srq_attr	attr;
++	u64			data[0];
++};
++
++struct ibp_create_srq_resp {
++	u64			srq;
++	struct ibp_srq_attr	attr;
++	u64			data[0];
++};
++
++struct ibp_query_srq_cmd {
++	struct ibp_msg_header	header;
++	u64			srq;
++};
++
++struct ibp_query_srq_resp {
++	struct ibp_srq_attr	attr;
++};
++
++struct ibp_modify_srq_cmd {
++	struct ibp_msg_header	header;
++	u64			srq;
++	struct ibp_srq_attr	attr;
++	u32			srq_attr_mask;
++	u8			reserved[4];
++	u64			data[0];
++};
++
++struct ibp_modify_srq_resp {
++	struct ibp_srq_attr	attr;
++	u64			data[0];
++};
++
++struct ibp_destroy_srq_cmd {
++	struct ibp_msg_header	header;
++	u64			srq;
++};
++
++struct ibp_qp_cap {
++	u32			max_send_wr;
++	u32			max_recv_wr;
++	u32			max_send_sge;
++	u32			max_recv_sge;
++	u32			max_inline_data;
++	u8			reserved[4];
++};
++
++struct ibp_create_qp_cmd {
++	struct ibp_msg_header	header;
++	u64			pd;
++	u64			send_cq;
++	u64			recv_cq;
++	u64			srq;
++	u64			xrc_domain;
++	u64			qp_context;
++	struct ibp_qp_cap	cap;
++	u8			sq_sig_type;
++	u8			qp_type;
++	u8			create_flags;
++	u8			port_num;
++	u64			data[0];
++};
++
++struct ibp_create_qp_resp {
++	u64			qp;
++	struct ibp_qp_cap	cap;
++	u32			qpn;
++	u8			reserved[4];
++	u64			data[0];
++};
++
++struct ibp_query_qp_cmd {
++	struct ibp_msg_header	header;
++	u64			qp;
++	u32			qp_attr_mask;
++	u8			reserved[4];
++};
++
++struct ibp_query_qp_resp {
++	u32			qp_state;
++	u32			cur_qp_state;
++	u32			path_mtu;
++	u32			path_mig_state;
++	u32			qkey;
++	u32			rq_psn;
++	u32			sq_psn;
++	u32			dest_qp_num;
++	u32			qp_access_flags;
++	u32			init_create_flags;
++	struct ibp_qp_cap	init_cap;
++	struct ibp_qp_cap	cap;
++	struct ibp_ah_attr	ah;
++	struct ibp_ah_attr	alt_ah;
++	u16			pkey_index;
++	u16			alt_pkey_index;
++	u8			en_sqd_async_notify;
++	u8			sq_draining;
++	u8			max_rd_atomic;
++	u8			max_dest_rd_atomic;
++	u8			min_rnr_timer;
++	u8			port_num;
++	u8			timeout;
++	u8			retry_cnt;
++	u8			rnr_retry;
++	u8			alt_port_num;
++	u8			alt_timeout;
++	u8			init_sq_sig_type;
++};
++
++struct ibp_modify_qp_cmd {
++	struct ibp_msg_header	header;
++	u64			qp;
++	u32			qp_attr_mask;
++	u32			qp_state;
++	u32			cur_qp_state;
++	u32			path_mtu;
++	u32			path_mig_state;
++	u32			qkey;
++	u32			rq_psn;
++	u32			sq_psn;
++	u32			dest_qp_num;
++	u32			qp_access_flags;
++	struct ibp_qp_cap	cap;
++	struct ibp_ah_attr	ah;
++	struct ibp_ah_attr	alt_ah;
++	u16			pkey_index;
++	u16			alt_pkey_index;
++	u8			en_sqd_async_notify;
++	u8			sq_draining;
++	u8			max_rd_atomic;
++	u8			max_dest_rd_atomic;
++	u8			min_rnr_timer;
++	u8			port_num;
++	u8			timeout;
++	u8			retry_cnt;
++	u8			rnr_retry;
++	u8			alt_port_num;
++	u8			alt_timeout;
++	u8			reserved[1];
++	u64			data[0];
++};
++
++struct ibp_modify_qp_resp {
++	struct ibp_qp_cap	cap;
++	u64			data[0];
++};
++
++struct ibp_destroy_qp_cmd {
++	struct ibp_msg_header	header;
++	u64			qp;
++};
++
++struct ibp_create_cq_cmd {
++	struct ibp_msg_header	header;
++	u64			ucontext;
++	u64			cq_context;
++	u32			cqe;
++	u32			vector;
++	u64			data[0];
++};
++
++struct ibp_create_cq_resp {
++	u64			cq;
++	u32			cqe;
++	u8			reserved[4];
++	u64			data[0];
++};
++
++struct ibp_resize_cq_cmd {
++	struct ibp_msg_header	header;
++	u64			cq;
++	u32			cqe;
++	u8			reserved[4];
++	u64			data[0];
++};
++
++struct ibp_resize_cq_resp {
++	u32			cqe;
++	u8			reserved[4];
++	u64			data[0];
++};
++
++struct ibp_destroy_cq_cmd {
++	struct ibp_msg_header	header;
++	u64			cq;
++};
++
++struct ibp_reg_user_mr_cmd {
++	struct ibp_msg_header	header;
++	u64			pd;
++	u64			hca_va;
++	u64			scif_addr;
++	u64			length;
++	u32			offset;
++	u32			access;
++	u64			data[0];
++};
++
++struct ibp_reg_user_mr_resp {
++	u64			mr;
++	u32			lkey;
++	u32			rkey;
++	u64			data[0];
++};
++
++struct ibp_dereg_mr_cmd {
++	struct ibp_msg_header	header;
++	u64			mr;
++};
++
++struct ibp_attach_mcast_cmd {
++	struct ibp_msg_header	header;
++	u64			qp;
++	__be64			subnet_prefix;
++	__be64			interface_id;
++	u16			lid;
++	u8			data[6];
++};
++
++struct ibp_detach_mcast_cmd {
++	struct ibp_msg_header	header;
++	u64			qp;
++	__be64			subnet_prefix;
++	__be64			interface_id;
++	u16			lid;
++	u8			data[6];
++};
++
++#endif /* IBP_ABI_H */
+diff -urN a6/drivers/infiniband/ibp/drv/ibp.h a7/drivers/infiniband/ibp/drv/ibp.h
+--- a6/drivers/infiniband/ibp/drv/ibp.h	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/drv/ibp.h	2015-02-23 10:01:30.291769309 -0800
+@@ -0,0 +1,257 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer in the documentation and/or other materials
++ *	  provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef IBP_H
++#define IBP_H
++
++#include <rdma/ib_verbs.h>
++#include "ibp-abi.h"
++
++struct ibp_device {
++	char				name[IBP_DEVICE_NAME_MAX];
++	u32				vendor_id;
++	u32				device_id;
++	u64				ib_device;
++	u64				device;
++	__be64				node_guid;
++	u64				uverbs_cmd_mask;
++	u32				uverbs_abi_ver;
++	u32				ibp_abi_ver;
++	struct device			*linux_dev;
++	struct list_head		list;
++	u64				driver_data;
++	int				abi_version;
++	int				num_comp_vectors;
++	u8				phys_port_cnt;
++};
++
++struct ibp_id_table {
++	u32				vendor_id;
++	u32				device_id;
++};
++
++struct ibp_driver {
++	const char			*name;
++	const struct ibp_id_table	*id_table;
++	int				(*add)(struct ibp_device *device);
++	void				(*remove)(struct ibp_device *device);
++	u64				(*resolve)(struct ib_device *ibdev);
++
++	struct list_head		list;
++};
++
++struct ibp_rb {
++	u64				handle;
++};
++
++struct ibp_iomem {
++	void				*cookie;
++	void __iomem			*addr;
++};
++
++/**
++ * ibp_resolve_ib_device - Return the host ib_device handle
++ * @ibdev:Card IB device
++ *
++ * Upper level drivers may require the host ib_device handle associated
++ * with the card ib_device.  This routine resolves the card ib_device to
++ * the cooresponding host ib_device handle.  A value of 0 is returned if
++ * no match was found.
++ */
++u64 ibp_resolve_ib_device(struct ib_device *ibdev);
++
++/**
++ * ibp_register_driver - Register this driver
++ * @driver:Driver to register
++ *
++ * Lower level drivers use ibp_register_driver to register for callbacks
++ * on IB device addition and removal.  Only one low level driver registration
++ * is allowed for a each vendor/device id pair.  When an IB device is added,
++ * it is compared with each registered driver vendor and device id.  The add
++ * callback routine for the matching driver will be called.
++ */
++int ibp_register_driver(struct ibp_driver *driver);
++
++/**
++ * ibp_unregister_driver - Unregister this driver
++ * @client:Driver to unregister
++ *
++ * Lower level drivers use ibp_unregister_driver() to remove their
++ * registration.  When ibp_unregister_driver() is called, the driver
++ * will receive a remove callback for each IB device with matcing vendor
++ * and device ids.
++ */
++void ibp_unregister_driver(struct ibp_driver *driver);
++
++static inline void ibp_set_driver_data(struct ibp_device *device, u64 data)
++{
++	device->driver_data = data;
++}
++
++static inline u64 ibp_get_driver_data(struct ibp_device *device)
++{
++	return device->driver_data;
++}
++
++int ibp_cmd_alloc_ucontext(struct ibp_device *device, struct ib_device *ibdev,
++			   u64 *ucontext, struct ibp_alloc_ucontext_cmd *cmd,
++			   size_t cmd_size,
++			   struct ibp_alloc_ucontext_resp *resp,
++			   size_t resp_size);
++
++int ibp_cmd_dealloc_ucontext(struct ibp_device *device, u64 ucontext);
++
++/**
++ * ibp_reg_buf - Register a private buffer with this driver
++ * @device: the device on which to register
++ * @ucontext: peer driver ucontext handle
++ * @vaddr: starting virtual address of the buffer
++ * @length: length of the buffer
++ * @access: IB_ACCESS_xxx flags for buffer
++ *
++ * Lower level drivers use ibp_reg_buf() to register private buffers.
++ * Upon success, a pointer to a registered buffer structure is returned
++ * which contains an addr handle.  The addr handle can be shared with
++ * a peer driver on the host server for its use with ib_umem_get().
++ * This routine should not be used to register IB memory regions.
++ */
++struct ibp_rb *ibp_reg_buf(struct ibp_device *device, u64 ucontext,
++			   unsigned long vaddr, size_t length, int access);
++
++/**
++ * ibp_dereg_buf - Deregister a private buffer through this driver
++ * @device: the device on which to deregister
++ * @rb: pointer to the registered buffer structure; may be ERR or NULL
++ *
++ * Lower level drivers use ibp_dereg_buf() to deregister a private buffer.
++ */
++int ibp_dereg_buf(struct ibp_device *device, struct ibp_rb *rb);
++
++int ibp_cmd_mmap(struct ibp_device *device, u64 ucontext,
++		 struct vm_area_struct *vma);
++
++struct ibp_iomem *ibp_cmd_ioremap(struct ibp_device *device, u64 ucontext,
++				  phys_addr_t offset, unsigned long size);
++
++int ibp_cmd_iounmap(struct ibp_iomem *iomem);
++
++int ibp_cmd_query_device(struct ibp_device *device,
++			 struct ib_device_attr *device_attr);
++
++int ibp_cmd_query_port(struct ibp_device *device, u8 port_num,
++		       struct ib_port_attr *port_attr);
++
++int ibp_cmd_query_gid(struct ibp_device *device, u8 port_num, int index,
++		      union ib_gid *gid);
++
++int ibp_cmd_query_pkey(struct ibp_device *device, u8 port_num, int index,
++		       u16 *pkey);
++
++int ibp_cmd_alloc_pd(struct ibp_device *device, u64 ucontext, u64 *pd,
++		     struct ibp_alloc_pd_cmd *cmd, size_t cmd_size,
++		     struct ibp_alloc_pd_resp *resp, size_t resp_size);
++
++int ibp_cmd_dealloc_pd(struct ibp_device *device, u64 pd);
++
++int ibp_cmd_create_ah(struct ibp_device *device, u64 pd,
++		      struct ib_ah_attr *ah_attr,
++		      u64 *ah);
++
++int ibp_cmd_query_ah(struct ibp_device *device, u64 ah,
++		     struct ib_ah_attr *ah_attr);
++
++int ibp_cmd_destroy_ah(struct ibp_device *device, u64 ah);
++
++int ibp_cmd_create_srq(struct ibp_device *device, u64 pd,
++		       struct ib_srq_init_attr *init_attr,
++		       u64 *srq, struct ib_srq *ibsrq,
++		       struct ibp_create_srq_cmd *cmd, size_t cmd_size,
++		       struct ibp_create_srq_resp *resp, size_t resp_size);
++
++int ibp_cmd_query_srq(struct ibp_device *device, u64 srq,
++		      struct ib_srq_attr *attr);
++
++int ibp_cmd_modify_srq(struct ibp_device *device, u64 srq,
++		       struct ib_srq_attr *attr, enum ib_srq_attr_mask mask,
++		       struct ibp_modify_srq_cmd *cmd, size_t cmd_size,
++		       struct ibp_modify_srq_resp *resp, size_t resp_size);
++
++int ibp_cmd_destroy_srq(struct ibp_device *device, u64 srq);
++
++int ibp_cmd_create_qp(struct ibp_device *device, u64 pd,
++		      u64 send_cq, u64 recv_cq, u64 srq,
++		      struct ib_qp_init_attr *init_attr,
++		      u64 *qp, struct ib_qp *ibqp,
++		      struct ibp_create_qp_cmd *cmd, size_t cmd_size,
++		      struct ibp_create_qp_resp *resp, size_t resp_size);
++
++int ibp_cmd_query_qp(struct ibp_device *device, u64 qp,
++		     struct ib_qp_attr *attr, int qp_attr_mask,
++		     struct ib_qp_init_attr *init_attr);
++
++int ibp_cmd_modify_qp(struct ibp_device *device, u64 qp,
++		      struct ib_qp_attr *attr, int qp_attr_mask,
++		      struct ibp_modify_qp_cmd *cmd, size_t cmd_size,
++		      struct ibp_modify_qp_resp *resp, size_t resp_size);
++
++int ibp_cmd_destroy_qp(struct ibp_device *device, u64 qp);
++
++int ibp_cmd_create_cq(struct ibp_device *device, u64 ucontext,
++		      int entries, int vector, u64 *cq, struct ib_cq *ibcq,
++		      struct ibp_create_cq_cmd *cmd, size_t cmd_size,
++		      struct ibp_create_cq_resp *resp, size_t resp_size);
++
++int ibp_cmd_resize_cq(struct ibp_device *device, u64 cq,
++		      int entries, struct ib_cq *ibcq,
++		      struct ibp_resize_cq_cmd *cmd, size_t cmd_size,
++		      struct ibp_resize_cq_resp *resp, size_t resp_size);
++
++int ibp_cmd_destroy_cq(struct ibp_device *device, u64 cq);
++
++int ibp_cmd_reg_user_mr(struct ibp_device *device, u64 pd, u64 start,
++			u64 length, u64 virt_addr, int access, u64 *mr,
++			u32 *lkey, u32 *rkey,
++			struct ibp_reg_user_mr_cmd *cmd, size_t cmd_size,
++			struct ibp_reg_user_mr_resp *resp, size_t resp_size);
++
++int ibp_cmd_dereg_mr(struct ibp_device *device, u64 mr);
++
++int ibp_cmd_get_dma_mr(struct ibp_device *device, u64 pd, int access,
++		       u64 *mr, u32 *lkey, u32 *rkey);
++
++int ibp_cmd_attach_mcast(struct ibp_device *device, u64 qp,
++			 union ib_gid *gid, u16 lid);
++
++int ibp_cmd_detach_mcast(struct ibp_device *device, u64 qp,
++			 union ib_gid *gid, u16 lid);
++
++#endif /* IBP_H */
+diff -urN a6/drivers/infiniband/ibp/drv/Makefile a7/drivers/infiniband/ibp/drv/Makefile
+--- a6/drivers/infiniband/ibp/drv/Makefile	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/drv/Makefile	2015-02-23 10:01:30.291769309 -0800
+@@ -0,0 +1,21 @@
++KDIR ?= /lib/modules/`uname -r`/build
++
++obj-$(CONFIG_IBP_SERVER)	+= ibp_server.o
++
++ccflags-$(CONFIG_IBP_DEBUG) += -g -DIBP_DEBUG
++
++ibp_server-y :=		server.o	\
++			stack.o		\
++			server_msg.o
++
++default:
++	$(MAKE) -C $(KDIR) M=`pwd`
++
++modules_install:
++	$(MAKE) -C $(KDIR) M=`pwd` modules_install
++
++clean:
++	rm -rf *.ko *.o .*.ko.cmd .*.o.cmd *.mod.c Module.* modules.order .tmp_versions
++
++unix:
++	dos2unix *.[ch] Kconfig Makefile
+diff -urN a6/drivers/infiniband/ibp/drv/server.c a7/drivers/infiniband/ibp/drv/server.c
+--- a6/drivers/infiniband/ibp/drv/server.c	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/drv/server.c	2015-02-23 10:01:30.291769309 -0800
+@@ -0,0 +1,548 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer in the documentation and/or other materials
++ *	  provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "server.h"
++
++MODULE_AUTHOR("Jerrie Coffman");
++MODULE_AUTHOR("Phil Cayton");
++MODULE_AUTHOR("Jay Sternberg");
++MODULE_LICENSE("Dual BSD/GPL");
++MODULE_DESCRIPTION(DRV_DESC);
++MODULE_VERSION(DRV_VERSION);
++
++MODULE_PARAM(port, port, int, IBP_PORT, "Connection port");
++MODULE_PARAM(backlog, backlog, int, 8, "Connection backlog");
++MODULE_PARAM(timeout, timeout, int, 1000, "Listen/Poll time in milliseconds");
++
++#ifdef IBP_DEBUG
++MODULE_PARAM(debug_level, debug_level, int, 0, "Debug: 0-none, 1-some, 2-all");
++#endif
++
++#ifdef MOFED
++void					*ibp_peer_mem_handle;
++invalidate_peer_memory			ib_invalidate;
++#endif
++
++struct rw_semaphore			list_rwsem;
++
++static struct class			*ibp_class;
++static struct task_struct		*listen_thread;
++
++static LIST_HEAD(device_list);
++static LIST_HEAD(client_list);
++static LIST_HEAD(cdev_list);
++
++static void ibp_add_one(struct ib_device *ib_dev);
++static void ibp_remove_one(struct ib_device *ib_dev);
++
++static struct ib_client ib_client = {
++	.name	= DRV_NAME,
++	.add	= ibp_add_one,
++	.remove	= ibp_remove_one
++};
++
++static int ibp_open(struct inode *inode, struct file *filp);
++static ssize_t ibp_write(struct file *filp, const char __user *buf,
++			 size_t count, loff_t *pos);
++static int ibp_close(struct inode *inode, struct file *filp);
++
++static const struct file_operations ibp_fops = {
++	.owner	 = THIS_MODULE,
++	.open	 = ibp_open,
++	.write	 = ibp_write,
++	.release = ibp_close,
++};
++
++static int ibp_create_cdev(struct ibp_client *client, uint16_t node)
++{
++	struct device			*device;
++	dev_t				devt;
++	int				ret;
++
++	ret = alloc_chrdev_region(&devt, 0, 1, DRV_BASE);
++	if (ret) {
++		print_err("alloc_chrdev_region returned %d\n", ret);
++		return ret;
++	}
++
++	cdev_init(&client->cdev, &ibp_fops);
++	client->cdev.owner = THIS_MODULE;
++
++	ret = cdev_add(&client->cdev, devt, 1);
++	if (ret) {
++		print_err("cdev_add returned %d\n", ret);
++		goto err0;
++	}
++
++	device = device_create(ibp_class, NULL, devt,
++			       NULL, DRV_BASE "%u", node);
++	if (IS_ERR(device)) {
++		ret = PTR_ERR(device);
++		goto err1;
++	}
++
++	/* Start on the cdev_list (until ibp_register_client). */
++	down_write(&list_rwsem);
++	list_add_tail(&client->list, &cdev_list);
++	up_write(&list_rwsem);
++
++	return 0;
++err1:
++	cdev_del(&client->cdev);
++err0:
++	unregister_chrdev_region(devt, 1);
++	return ret;
++}
++
++static void ibp_destroy_cdev(struct ibp_client *client)
++{
++	device_destroy(ibp_class, client->cdev.dev);
++	cdev_del(&client->cdev);
++	unregister_chrdev_region(client->cdev.dev, 1);
++}
++
++static struct ibp_client *ibp_create_client(scif_epd_t ep, uint16_t node)
++{
++	struct ibp_client		*client;
++	int				ret;
++
++	/* If a reconnect occurs while on the cdev_list just update the ep. */
++	down_read(&list_rwsem);
++	list_for_each_entry(client, &cdev_list, list) {
++		if (client->node == node) {
++			up_read(&list_rwsem);
++			scif_close(client->ep);
++			client->ep = ep;
++			return client;
++		}
++	}
++	up_read(&list_rwsem);
++
++	client = kzalloc(sizeof(*client), GFP_KERNEL);
++	if (!client) {
++		print_err("kzalloc failed\n");
++		return ERR_PTR(-ENOMEM);
++	}
++
++	client->ep = ep;
++	client->node = node;
++	atomic_set(&client->busy, 0);
++	atomic_set(&client->rx_in_process, 0);
++	init_waitqueue_head(&client->rx_wait_queue);
++	mutex_init(&client->ucontext_mutex);
++	INIT_LIST_HEAD(&client->ucontext_list);
++
++	client->workqueue = create_singlethread_workqueue(DRV_NAME);
++	if (!client->workqueue) {
++		print_err("create_singlethread_workqueue failed\n");
++		goto err0;
++	}
++
++	ret = ibp_create_cdev(client, node);
++	if (ret)
++		goto err1;
++
++	return client;
++err1:
++	destroy_workqueue(client->workqueue);
++err0:
++	kfree(client);
++	return ERR_PTR(ret);
++}
++
++static void ibp_destroy_client(struct ibp_client *client)
++{
++	ibp_cleanup_ucontext(&client->ucontext_list);
++	scif_close(client->ep);
++	flush_workqueue(client->workqueue);
++	destroy_workqueue(client->workqueue);
++	ibp_destroy_cdev(client);
++	kfree(client);
++}
++
++static void ibp_register_client(struct ibp_client *client)
++{
++	struct ibp_device		*device;
++
++	down_write(&list_rwsem);
++
++	list_move(&client->list, &client_list);
++
++	list_for_each_entry(device, &device_list, list)
++		ibp_send_add(client, device);
++
++	up_write(&list_rwsem);
++}
++
++static void ibp_unregister_client(struct ibp_client *client)
++{
++	struct ibp_device		*device;
++
++	flush_workqueue(client->workqueue);
++
++	down_write(&list_rwsem);
++
++	list_del(&client->list);
++
++	list_for_each_entry(device, &device_list, list)
++		ibp_send_remove(client, device);
++
++	up_write(&list_rwsem);
++}
++
++static int ibp_open(struct inode *inode, struct file *filp)
++{
++	struct ibp_client		*client;
++
++	client = container_of(inode->i_cdev, struct ibp_client, cdev);
++
++	filp->private_data = client;
++
++	if (atomic_add_return(1, &client->busy) == 1)
++		ibp_register_client(client);
++
++	return 0;
++}
++
++static ssize_t ibp_write(struct file *filp, const char __user *buf,
++			 size_t count, loff_t *pos)
++{
++	struct ibp_client		*client;
++	void				*rx_buf;
++	void				*tx_buf;
++	int				ret = -ENOMEM;
++
++	client = filp->private_data;
++
++	rx_buf = (void *) __get_free_page(GFP_KERNEL);
++	if (!rx_buf) {
++		print_err("__get_free_page rx_buf failed\n");
++		goto err0;
++	}
++
++	tx_buf = (void *) __get_free_page(GFP_KERNEL);
++	if (!tx_buf) {
++		print_err("__get_free_page tx_buf failed\n");
++		goto err1;
++	}
++
++	ret = ibp_process_recvs(client, rx_buf, tx_buf);
++
++	free_page((uintptr_t) tx_buf);
++err1:
++	free_page((uintptr_t) rx_buf);
++err0:
++	return ret;
++}
++
++static int ibp_close(struct inode *inode, struct file *filp)
++{
++	struct ibp_client		*client;
++
++	client = filp->private_data;
++
++	if (atomic_sub_and_test(1, &client->busy)) {
++		ibp_unregister_client(client);
++		device_destroy(ibp_class, client->cdev.dev);
++		ibp_destroy_client(client);
++	}
++
++	return 0;
++}
++
++int ibp_get_device(struct ibp_device *device)
++{
++	struct ibp_device		*entry;
++
++	down_read(&list_rwsem);
++
++	list_for_each_entry(entry, &device_list, list) {
++		if (entry == device) {
++			kref_get(&device->ref);
++			break;
++		}
++	}
++
++	up_read(&list_rwsem);
++
++	return (entry == device) ? 0 : -ENODEV;
++}
++
++static void ibp_complete_device(struct kref *ref)
++{
++	struct ibp_device		*device;
++
++	device = container_of(ref, struct ibp_device, ref);
++	complete(&device->done);
++}
++
++void ibp_put_device(struct ibp_device *device)
++{
++	kref_put(&device->ref, ibp_complete_device);
++}
++
++static struct ibp_device *ibp_create_device(struct ib_device *ib_dev)
++{
++	struct ibp_device		*device;
++
++	device = kzalloc(sizeof(*device), GFP_KERNEL);
++	if (!device) {
++		print_err("kzalloc failed\n");
++		return ERR_PTR(-ENOMEM);
++	}
++	device->ib_dev = ib_dev;
++	kref_init(&device->ref);
++	init_completion(&device->done);
++
++	ib_set_client_data(ib_dev, &ib_client, device);
++
++	return device;
++}
++
++static void ibp_destroy_device(struct ibp_device *device)
++{
++	ibp_put_device(device);
++	wait_for_completion(&device->done);
++
++	ib_set_client_data(device->ib_dev, &ib_client, NULL);
++	kfree(device);
++}
++
++static void ibp_register_device(struct ibp_device *device)
++{
++	struct ibp_client		*client;
++
++	down_write(&list_rwsem);
++
++	list_add_tail(&device->list, &device_list);
++	list_for_each_entry(client, &client_list, list)
++		ibp_send_add(client, device);
++
++	up_write(&list_rwsem);
++}
++
++static void ibp_unregister_device(struct ibp_device *device)
++{
++	struct ibp_client		*client;
++
++	down_write(&list_rwsem);
++
++	list_for_each_entry(client, &client_list, list)
++		ibp_send_remove(client, device);
++
++	list_del(&device->list);
++
++	up_write(&list_rwsem);
++}
++
++static int ibp_ignore_ib_dev(struct ib_device *ib_dev)
++{
++	/*
++	 * Only allow PCI-based channel adapters and RNICs.
++	 * PCI is required in order to read the vendor id.
++	 */
++	return (!ib_dev->dma_device->bus			  ||
++		!ib_dev->dma_device->bus->name			  ||
++		strnicmp(ib_dev->dma_device->bus->name, "pci", 3) ||
++		((ib_dev->node_type != RDMA_NODE_IB_CA) &&
++		 (ib_dev->node_type != RDMA_NODE_RNIC))) ? 1 : 0;
++}
++
++static void ibp_add_one(struct ib_device *ib_dev)
++{
++	struct ibp_device		*device;
++
++	if (ibp_ignore_ib_dev(ib_dev))
++		return;
++
++	device = ibp_create_device(ib_dev);
++	if (IS_ERR(device))
++		return;
++
++	ibp_register_device(device);
++}
++
++static void ibp_remove_one(struct ib_device *ib_dev)
++{
++	struct ibp_device		*device;
++
++	device = ib_get_client_data(ib_dev, &ib_client);
++	if (!device)
++		return;
++
++	ibp_unregister_device(device);
++	ibp_destroy_device(device);
++}
++
++static int ibp_listen(void *data)
++{
++	struct ibp_client		*client;
++	struct scif_pollepd		listen;
++	struct scif_portID		peer;
++	scif_epd_t			ep;
++	int				ret;
++
++	listen.epd = scif_open();
++	if (!listen.epd) {
++		print_err("scif_open failed\n");
++		ret = -EIO;
++		goto err0;
++	}
++	listen.events = POLLIN;
++
++	ret = scif_bind(listen.epd, port);
++	if (ret < 0) {
++		print_err("scif_bind returned %d\n", ret);
++		goto err1;
++	}
++
++	ret = scif_listen(listen.epd, backlog);
++	if (ret) {
++		print_err("scif_listen returned %d\n", ret);
++		goto err1;
++	}
++
++	while (!kthread_should_stop()) {
++
++		schedule();
++
++		ret = scif_poll(&listen, 1, timeout);
++		if (ret == 0)	/* timeout */
++			continue;
++		if (ret < 0) {
++			print_err("scif_poll revents 0x%x\n", listen.revents);
++			continue;
++		}
++
++		ret = scif_accept(listen.epd, &peer, &ep, 0);
++		if (ret) {
++			print_err("scif_accept returned %d\n", ret);
++			continue;
++		}
++
++		print_dbg("accepted node %d port %d\n", peer.node, peer.port);
++
++		client = ibp_create_client(ep, peer.node);
++		if (IS_ERR(client)) {
++			ret = PTR_ERR(client);
++			print_err("ibp_create_client returned %d\n", ret);
++			scif_close(ep);
++		}
++	}
++err1:
++	scif_close(listen.epd);
++err0:
++	return ret;
++}
++
++static int __init ibp_server_init(void)
++{
++	int				ret;
++
++	print_info(DRV_SIGNON);
++
++	init_rwsem(&list_rwsem);
++
++	ret = ibp_init();
++	if (ret) {
++		print_err("ibp_init_server returned %d\n", ret);
++		return ret;
++	}
++
++	ibp_class = class_create(THIS_MODULE, "infiniband_proxy");
++	if (IS_ERR(ibp_class)) {
++		ret = PTR_ERR(ibp_class);
++		print_err("class_create returned %d\n", ret);
++		goto err0;
++	}
++
++	ret = ib_register_client(&ib_client);
++	if (ret) {
++		print_err("ib_register_client returned %d\n", ret);
++		goto err1;
++	}
++
++#ifdef MOFED
++	ibp_peer_mem_handle = ib_register_peer_memory_client(&ibp_peer_mem,
++							     &ib_invalidate);
++	if (IS_ERR(ibp_peer_mem_handle)) {
++		ret = PTR_ERR(ibp_peer_mem_handle);
++		print_err("ib_register_peer_memory_client returned %d\n", ret);
++		goto err2;
++	}
++#endif
++
++	/* Start a thread for inbound connections. */
++	listen_thread = kthread_run(ibp_listen, NULL, DRV_NAME);
++	if (IS_ERR(listen_thread)) {
++		ret = PTR_ERR(listen_thread);
++		print_err("kthread_run returned %d\n", ret);
++		goto err3;
++	}
++
++	return 0;
++err3:
++#ifdef MOFED
++	ib_unregister_peer_memory_client(ibp_peer_mem_handle);
++err2:
++#endif
++	ib_unregister_client(&ib_client);
++err1:
++	class_destroy(ibp_class);
++err0:
++	ibp_cleanup();
++	return ret;
++}
++
++static void __exit ibp_server_exit(void)
++{
++	struct ibp_client		*client;
++	struct ibp_client		*next;
++
++	kthread_stop(listen_thread);
++
++	list_for_each_entry_safe(client, next, &cdev_list, list)
++		ibp_destroy_client(client);
++
++#ifdef MOFED
++	ib_unregister_peer_memory_client(ibp_peer_mem_handle);
++#endif
++	ib_unregister_client(&ib_client);
++	class_destroy(ibp_class);
++
++	ibp_cleanup();
++
++	print_info(DRV_DESC " unloaded\n");
++}
++
++module_init(ibp_server_init);
++module_exit(ibp_server_exit);
+diff -urN a6/drivers/infiniband/ibp/drv/server.h a7/drivers/infiniband/ibp/drv/server.h
+--- a6/drivers/infiniband/ibp/drv/server.h	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/drv/server.h	2015-02-23 10:01:30.291769309 -0800
+@@ -0,0 +1,191 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer in the documentation and/or other materials
++ *	  provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef SERVER_H
++#define SERVER_H
++
++#include <linux/fs.h>
++#include <linux/cdev.h>
++#include <linux/anon_inodes.h>
++#include <linux/file.h>
++#include <rdma/ib_user_verbs.h>
++#include <rdma/ib_umem.h>
++#include "ibp-abi.h"
++#include "common.h"
++
++#define DRV_ROLE	"Server"
++#define DRV_NAME	"ibp_server"
++
++#define MAX_MSG_SIZE	PAGE_SIZE
++
++extern int			timeout;
++extern struct rw_semaphore	list_rwsem;
++
++struct ibp_device {
++	struct list_head	list;
++	struct ib_device	*ib_dev;
++	struct kref		ref;
++	struct completion	done;
++};
++
++struct ibp_client {
++	struct list_head	list;
++	scif_epd_t		ep;
++	struct workqueue_struct	*workqueue;
++	struct mutex		ucontext_mutex;
++	struct list_head	ucontext_list;
++	wait_queue_head_t	rx_wait_queue;
++	atomic_t		rx_in_process;
++	struct cdev		cdev;
++	atomic_t		busy;
++	uint16_t		node;
++};
++
++struct ibp_queued_response {
++	struct ibp_client	*client;
++	struct work_struct	work;
++	u64			msg[0];
++};
++
++struct ibp_event {
++	struct ibp_client	*client;
++	struct work_struct	work;
++	u64			context;
++	u64			ibdev;
++	enum ib_event_type	type;
++};
++
++struct ibp_comp {
++	struct ibp_client	*client;
++	struct work_struct	work;
++	void			*cq_context;
++};
++
++struct ibp_ucontext {
++	struct ib_ucontext	*ibucontext;
++	struct ibp_client	*client;
++	struct ibp_device	*device;
++	struct file		*filp;
++	struct ib_event_handler	event_handler;
++	u64			ibdev;
++	struct mutex		mutex;
++	struct list_head	list;
++	struct list_head	mmap_list;
++	struct rb_root		reg_tree;
++};
++
++struct ibp_qp {
++	struct ib_qp		*ibqp;
++	struct list_head	mcast;
++};
++
++struct ibp_mcast_entry {
++	struct list_head	list;
++	union ib_gid		gid;
++	u16			lid;
++};
++
++struct ibp_mmap {
++	struct list_head	list;
++	struct ibp_ucontext	*ucontext;
++	u64			len;
++	u64			prot;
++	u64			vaddr;
++	void __iomem		*io_addr;
++	off_t			scif_addr;
++};
++
++struct ibp_reg {
++	struct rb_node		node;
++	struct scif_range	*range;
++	struct ibp_ucontext	*ucontext;
++	struct kref		ref;
++	u64			virt_addr;
++	u64			length;
++	off_t			offset;
++	u32			access;
++};
++
++struct ibp_mr {
++	struct ib_mr		*ibmr;
++	struct ibp_reg		*reg;
++};
++
++#ifdef MOFED
++#include <rdma/peer_mem.h>
++extern struct peer_memory_client ibp_peer_mem;
++extern void			 *ibp_peer_mem_handle;
++extern invalidate_peer_memory    ib_invalidate;
++#else
++#define IBP_UMEM_MAX_PAGE_CHUNK						\
++	((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) /	\
++	 ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] -	\
++	  (void *) &((struct ib_umem_chunk *) 0)->page_list[0]))
++#endif
++
++#define INIT_UDATA(udata, ibuf, obuf, ilen, olen)		\
++	do {							\
++		(udata)->ops		= &ibp_copy;		\
++		(udata)->inbuf		= (void *)(ibuf);	\
++		(udata)->outbuf		= (void *)(obuf);	\
++		(udata)->inlen		= (ilen);		\
++		(udata)->outlen		= (olen);		\
++	} while (0)
++
++#define IBP_INIT_MSG(handle, msg, size, op)			\
++	do {							\
++		(msg)->header.opcode	= IBP_##op;		\
++		(msg)->header.length	= (size);		\
++		(msg)->header.status	= 0;			\
++		(msg)->header.reserved	= 0;			\
++		(msg)->header.device	= (uintptr_t)(handle);	\
++		(msg)->header.request	= 0;			\
++	} while (0)
++
++#define IBP_INIT_RESP(handle, resp, size, op, req, stat)	\
++	do {							\
++		(resp)->header.opcode	= IBP_##op;		\
++		(resp)->header.length	= (size);		\
++		(resp)->header.status	= (stat);		\
++		(resp)->header.reserved	= 0;			\
++		(resp)->header.device	= (uintptr_t)(handle);	\
++		(resp)->header.request	= (req);		\
++	} while (0)
++
++int ibp_process_recvs(struct ibp_client *client, void *rx_buf, void *tx_buf);
++void ibp_cleanup_ucontext(struct list_head *ucontext_list);
++int ibp_send_add(struct ibp_client *client, struct ibp_device *device);
++int ibp_send_remove(struct ibp_client *client, struct ibp_device *device);
++int ibp_get_device(struct ibp_device *device);
++void ibp_put_device(struct ibp_device *device);
++
++#endif /* SERVER_H */
+diff -urN a6/drivers/infiniband/ibp/drv/server_msg.c a7/drivers/infiniband/ibp/drv/server_msg.c
+--- a6/drivers/infiniband/ibp/drv/server_msg.c	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/drv/server_msg.c	2015-02-23 10:01:30.292769309 -0800
+@@ -0,0 +1,3098 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer in the documentation and/or other materials
++ *	  provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include <linux/delay.h>
++
++#include "server.h"
++#include "stack.h"
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
++    #define MUNMAP(x,y,z)				\
++	do {						\
++	    down_write(&current->mm->mmap_sem);		\
++	    do_munmap(x,y,z);				\
++	    up_write(&current->mm->mmap_sem);		\
++	} while (0)
++#else
++    #define MUNMAP(x,y,z)				\
++	vm_munmap((unsigned long)y,z)
++#endif
++
++static struct ibp_stack			*o_stack;
++static struct ibp_stack			*a_stack;
++static struct ibp_stack			*c_stack;
++
++/*
++ * umem functions
++ */
++static int ibp_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
++{
++	size_t				bytes;
++
++	bytes = min(len, udata->inlen);
++
++	memcpy(dest, udata->inbuf, bytes);
++	if (bytes < len) {
++		memset(dest + bytes, 0, len - bytes);
++		return -EFAULT;
++	}
++	return 0;
++}
++
++static int ibp_copy_to_udata(struct ib_udata *udata, void *src, size_t len)
++{
++	size_t				bytes;
++
++	bytes = min(len, udata->outlen);
++
++	memcpy(udata->outbuf, src, bytes);
++	udata->outlen -= bytes;
++
++	return (bytes < len) ? -EFAULT : 0;
++}
++
++static struct ib_udata_ops ibp_copy = {
++	.copy_from = ibp_copy_from_udata,
++	.copy_to   = ibp_copy_to_udata
++};
++
++#ifdef MOFED
++
++static struct ibp_reg *__ibp_find_reg(struct ibp_ucontext *ucontext,
++				      unsigned long virt, size_t size)
++{
++	struct rb_node			*node;
++	struct ibp_reg			*reg;
++
++	node = ucontext->reg_tree.rb_node;
++
++	while (node) {
++		reg = rb_entry(node, struct ibp_reg, node);
++
++		if ((virt   == reg->virt_addr) &&
++		    (size   == reg->length))
++			return reg;
++
++		if (virt < reg->virt_addr)
++			node = node->rb_left;
++		else if (virt > reg->virt_addr)
++			node = node->rb_right;
++		else if (size < reg->length)
++			node = node->rb_left;
++		else if (size > reg->length)
++			node = node->rb_right;
++		else
++			node = node->rb_right;
++	}
++
++	return ERR_PTR(-EFAULT);
++}
++
++static struct ibp_reg *ibp_find_reg(struct ibp_ucontext *ucontext,
++				    unsigned long virt, size_t size)
++{
++	struct ibp_reg			*reg;
++
++	mutex_lock(&ucontext->mutex);
++	reg = __ibp_find_reg(ucontext, virt, size);
++	mutex_unlock(&ucontext->mutex);
++
++	return reg;
++}
++
++/* ibp_peer_acquire return code: 1 mine, 0 not mine */
++static int ibp_peer_acquire(unsigned long addr,
++			    size_t size, void* peer_mem_private_data,
++			    char* peer_mem_name, void** client_context)
++{
++	struct ibp_ucontext *ucontext;
++	struct ibp_reg	    *reg;
++
++	/* Verify private data is ours before ibp_ucontext cast. */
++	if (!peer_mem_name || !peer_mem_private_data ||
++	    strncmp(peer_mem_name, ibp_peer_mem.name,
++		    sizeof(ibp_peer_mem.name)))
++		return 0;
++
++	ucontext = (struct ibp_ucontext *) peer_mem_private_data;
++
++	reg = ibp_find_reg(ucontext, addr, size);
++	if (IS_ERR(reg)) {
++		print_err("ibp_find_reg returned %d\n", (int)PTR_ERR(reg));
++		return 0;
++	}
++
++	*client_context = (void *) reg;
++
++	return 1;
++}
++
++static int ibp_peer_get_pages(unsigned long addr, size_t size, int write,
++			      int force, struct sg_table *sg_head,
++			      void* client_context, void* core_context)
++{
++	struct ibp_reg	   *reg;
++	struct page	   *page;
++	struct scatterlist *sg;
++	void		   **va;
++	int npages, off, i, ret;
++
++	reg = (struct ibp_reg *) client_context;
++
++	off = (addr - reg->virt_addr) + reg->offset;
++	npages = PAGE_ALIGN(size + (off & ~PAGE_MASK)) >> PAGE_SHIFT;
++
++	ret = sg_alloc_table(sg_head, npages, GFP_KERNEL);
++	if (ret)
++		return ret;
++
++	va = reg->range->va;
++
++	for_each_sg(sg_head->sgl, sg, npages, i) {
++		page = vmalloc_to_page(va[i]);
++		if (!page) {
++			print_err("vmalloc_to_page failed\n");
++			ret = -EINVAL;
++			goto err;
++		}
++		sg_set_page(sg, page, PAGE_SIZE, 0);
++	}
++
++	return 0;
++err:
++	sg_free_table(sg_head);
++	return ret;
++}
++
++static int ibp_peer_dma_map(struct sg_table *sg_head, void *client_context,
++			    struct device *dma_device, int dmasync, int *nmap)
++{
++	DEFINE_DMA_ATTRS(attrs);
++	int ret = 0;
++
++	if (dmasync)
++		dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
++
++	*nmap = dma_map_sg_attrs(dma_device,
++				 sg_head->sgl,
++				 sg_head->orig_nents,
++				 DMA_BIDIRECTIONAL,
++				 &attrs);
++
++	if (*nmap > 0)
++		sg_head->nents = *nmap;
++	else
++		ret = -ENOMEM;
++
++	return ret;
++}
++
++static int ibp_peer_dma_umap(struct sg_table *sg_head, void *client_context,
++			     struct device *dma_device)
++{
++	dma_unmap_sg(dma_device,
++		     sg_head->sgl,
++		     sg_head->nents,
++		     DMA_BIDIRECTIONAL);
++	return 0;
++}
++
++static void ibp_peer_put_pages(struct sg_table *sg_head, void *client_context)
++{
++	sg_free_table(sg_head);
++}
++
++static unsigned long ibp_peer_get_page_size(void *client_context)
++{
++	return PAGE_SIZE;
++}
++
++struct peer_memory_client ibp_peer_mem = {
++	.name		= DRV_NAME,
++	.version	= DRV_VERSION,
++	.acquire	= &ibp_peer_acquire,
++	.get_pages	= &ibp_peer_get_pages,
++	.dma_map	= &ibp_peer_dma_map,
++	.dma_unmap	= &ibp_peer_dma_umap,
++	.put_pages	= &ibp_peer_put_pages,
++	.get_page_size	= &ibp_peer_get_page_size,
++};
++
++#else /* MOFED */
++
++static struct ibp_reg *__ibp_find_reg(struct ibp_ucontext *ucontext,
++				      unsigned long virt, size_t size,
++				      int access)
++{
++	struct rb_node			*node;
++	struct ibp_reg			*reg;
++
++	node = ucontext->reg_tree.rb_node;
++
++	while (node) {
++		reg = rb_entry(node, struct ibp_reg, node);
++
++		if ((virt   == reg->virt_addr) &&
++		    (size   == reg->length)    &&
++		    (access == reg->access))
++			return reg;
++
++		if (virt < reg->virt_addr)
++			node = node->rb_left;
++		else if (virt > reg->virt_addr)
++			node = node->rb_right;
++		else if (size < reg->length)
++			node = node->rb_left;
++		else if (size > reg->length)
++			node = node->rb_right;
++		else if (access < reg->access)
++			node = node->rb_left;
++		else
++			node = node->rb_right;
++	}
++
++	return ERR_PTR(-EFAULT);
++}
++
++static struct ibp_reg *ibp_find_reg(struct ibp_ucontext *ucontext,
++				    unsigned long virt, size_t size,
++				    int access)
++{
++	struct ibp_reg			*reg;
++
++	mutex_lock(&ucontext->mutex);
++	reg = __ibp_find_reg(ucontext, virt, size, access);
++	mutex_unlock(&ucontext->mutex);
++
++	return reg;
++}
++
++static void __ibp_umem_release(struct ib_device *dev, struct ib_umem *umem,
++			       int dirty)
++{
++	struct scatterlist		*sg;
++	int				i;
++
++	if (umem->nmap > 0)
++		ib_dma_unmap_sg(dev, umem->sg_head.sgl,
++				umem->nmap, DMA_BIDIRECTIONAL);
++
++	if (umem->writable && dirty)
++		for_each_sg(umem->sg_head.sgl, sg, umem->npages, i)
++			set_page_dirty_lock(sg_page(sg));
++
++	sg_free_table(&umem->sg_head);
++}
++
++static struct ib_umem *ibp_umem_get(struct ib_ucontext *ibucontext,
++				    unsigned long addr, size_t size,
++				    int access, int dmasync)
++{
++	struct ibp_reg			*reg;
++	struct ib_umem			*umem;
++	struct device			*dma_device;
++	struct page			*page;
++	struct scatterlist		*sg;
++	void				**va;
++	dma_addr_t			*pa;
++	dma_addr_t			daddr;
++	unsigned int			dsize;
++	int				npages;
++	int				off;
++	int				i;
++	int				ret = 0;
++
++	DEFINE_DMA_ATTRS(attrs);
++
++	reg = ibp_find_reg(ibucontext->umem_private_data, addr, size, access);
++	if (IS_ERR(reg))
++		return  ERR_CAST(reg);
++
++	if (dmasync)
++		dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
++
++	umem = kzalloc(sizeof(*umem), GFP_KERNEL);
++	if (!umem) {
++		print_err("kalloc failed\n");
++		return ERR_PTR(-ENOMEM);
++	}
++
++	umem->length    = size;
++	umem->offset    = addr & ~PAGE_MASK;
++	umem->page_size = PAGE_SIZE;
++	umem->pid	= get_task_pid(current, PIDTYPE_PID);
++	umem->writable  = !!(access & ~IB_ACCESS_REMOTE_READ);
++
++	dsize	   = 0;
++	daddr	   = 0;
++	va	   = reg->range->va;
++	pa	   = reg->range->phys_addr;
++	dma_device = ibucontext->device->dma_device;
++	off	   = (addr - reg->virt_addr) + reg->offset;
++	npages	   = PAGE_ALIGN(size + (off & ~PAGE_MASK)) >> PAGE_SHIFT;
++	off	 >>= PAGE_SHIFT;
++
++	ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
++	if (ret) {
++		print_err("sg_alloc_table failed\n");
++		goto err1;
++	}
++
++	/* Assume hugetlb unless proven otherwise. */
++	umem->hugetlb = 1;
++	for (i = 0; i < npages && umem->hugetlb; i++) {
++		if (!dsize) {
++			dsize = PAGE_SIZE;
++			daddr = pa[i + off];
++			/* Page must start on a huge page boundary. */
++			if ((daddr & ~HPAGE_MASK) >= PAGE_SIZE)
++				umem->hugetlb = 0;
++		} else if (daddr + dsize != pa[i + off])
++			/* Pages must be contiguous. */
++			umem->hugetlb = 0;
++		else {
++			dsize += PAGE_SIZE;
++			if (dsize == HPAGE_SIZE)
++				dsize = 0;
++		}
++	}
++	/* Page must end on a huge page boundary.*/
++	if (umem->hugetlb && ((daddr + dsize) & ~HPAGE_MASK))
++		umem->hugetlb = 0;
++
++	for_each_sg(umem->sg_head.sgl, sg, npages, i) {
++		page = vmalloc_to_page(va[i]);
++		if (!page) {
++			print_err("vmalloc_to_page failed\n");
++			ret = -EINVAL;
++			goto err2;
++		}
++		sg_set_page(sg, page, PAGE_SIZE, 0);
++	}
++
++	umem->npages = npages;
++
++	umem->nmap = ib_dma_map_sg_attrs(ibucontext->device,
++					 umem->sg_head.sgl,
++					 umem->npages,
++					 DMA_BIDIRECTIONAL,
++					 &attrs);
++	if (umem->nmap <= 0) {
++		print_err("map_sg_attrs failed\n");
++		ret = -ENOMEM;
++		goto err2;
++	}
++
++	return umem;
++err2:
++	__ibp_umem_release(ibucontext->device, umem, 0);
++err1:
++	put_pid(umem->pid);
++	kfree(umem);
++	return ERR_PTR(ret);
++}
++
++static void ibp_umem_release(struct ib_umem *umem)
++{
++	struct ib_ucontext		*ibucontext;
++
++	ibucontext = umem->context;
++
++	__ibp_umem_release(ibucontext->device, umem, 0);
++
++	put_pid(umem->pid);
++	kfree(umem);
++}
++
++static struct ib_umem_ops ibp_umem = {
++	.get	    = &ibp_umem_get,
++	.release    = &ibp_umem_release,
++};
++
++#endif	/* MOFED */
++
++static int ibp_send(scif_epd_t ep, void *buf, size_t len)
++{
++	int				ret;
++
++	while (len) {
++		ret = scif_send(ep, buf, (uint32_t) len, SCIF_SEND_BLOCK);
++		if (ret < 0) {
++			print_dbg("scif_send returned %d\n", ret);
++			return ret;
++		}
++		buf += ret;
++		len -= ret;
++	}
++
++	return 0;
++}
++
++static int ibp_recv(scif_epd_t ep, void *buf, size_t len)
++{
++	int				ret;
++
++	while (len) {
++		ret = scif_recv(ep, buf, (uint32_t) len, SCIF_RECV_BLOCK);
++		if (ret < 0) {
++			print_dbg("scif_recv returned %d\n", ret);
++			return ret;
++		}
++		buf += ret;
++		len -= ret;
++	}
++
++	return 0;
++}
++
++int ibp_send_add(struct ibp_client *client, struct ibp_device *device)
++{
++	struct pci_dev			*pdev;
++	struct ibp_add_device_msg	msg;
++
++	print_trace("in\n");
++
++	pdev = to_pci_dev(device->ib_dev->dma_device);
++
++	IBP_INIT_MSG(device, &msg, sizeof(msg), ADD_DEVICE);
++
++	strncpy(msg.data.name, device->ib_dev->name, sizeof(msg.data.name));
++	msg.data.vendor_id	  = pdev->vendor;
++	msg.data.device_id	  = pdev->device;
++
++	msg.data.ib_device	  = (uintptr_t) device->ib_dev;
++	msg.data.device		  = (uintptr_t) device;
++	msg.data.node_guid	  = device->ib_dev->node_guid;
++	msg.data.uverbs_cmd_mask  = device->ib_dev->uverbs_cmd_mask;
++	msg.data.uverbs_abi_ver	  = device->ib_dev->uverbs_abi_ver;
++	msg.data.ibp_abi_ver	  = IBP_ABI_VERSION;
++	msg.data.num_comp_vectors = device->ib_dev->num_comp_vectors;
++	msg.data.phys_port_cnt	  = device->ib_dev->phys_port_cnt;
++
++	return ibp_send(client->ep, &msg, sizeof(msg));
++}
++
++int ibp_send_remove(struct ibp_client *client, struct ibp_device *device)
++{
++	struct ibp_remove_device_msg	msg;
++
++	print_trace("in\n");
++
++	IBP_INIT_MSG(device, &msg, sizeof(msg), REMOVE_DEVICE);
++	return ibp_send(client->ep, &msg, sizeof(msg));
++}
++
++static void ibp_send_queued_response(struct work_struct *work)
++{
++	struct ibp_queued_response_msg	*msg;
++	struct ibp_queued_response	*resp;
++
++	resp = container_of(work, struct ibp_queued_response, work);
++	msg = (struct ibp_queued_response_msg *) resp->msg;
++
++	ibp_send(resp->client->ep, msg, msg->header.length);
++	kfree(resp);
++}
++
++static int ibp_queue_response(struct ibp_client *client,
++			      struct ibp_queued_response_msg *msg)
++{
++	struct ibp_queued_response	*resp;
++	size_t				len;
++
++	len = sizeof(*resp) + msg->header.length;
++
++	resp = kmalloc(len, GFP_ATOMIC);
++	if (!resp) {
++		print_err("kalloc failed\n");
++		return -ENOMEM;
++	}
++
++	resp->client = client;
++	memcpy(&resp->msg, msg, msg->header.length);
++
++	/* Queue to serialize behing any associated events. */
++	INIT_WORK(&resp->work, ibp_send_queued_response);
++	queue_work(client->workqueue, &resp->work);
++
++	return 0;
++}
++
++static int ibp_cmd_error(struct ibp_client *client,
++			 struct ibp_msg_header *hdr, void *tx_buf, int ret)
++{
++	struct ibp_verb_response_msg	*msg;
++	size_t				len;
++
++	msg = (struct ibp_verb_response_msg *) tx_buf;
++	len = sizeof(*msg);
++
++	IBP_INIT_RESP(hdr->device, msg, len, VERB_RESPONSE, hdr->request, ret);
++	return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_bad_request(struct ibp_client *client,
++			       struct ibp_msg_header *hdr, void *tx_buf)
++{
++	print_dbg("opcode 0x%x\n", hdr->opcode);
++	return ibp_cmd_error(client, hdr, tx_buf, -EBADRQC);
++}
++
++static int ibp_cmd_not_supported(struct ibp_client *client,
++				 struct ibp_msg_header *hdr, void *tx_buf)
++{
++	print_dbg("opcode 0x%x\n", hdr->opcode);
++	return ibp_cmd_error(client, hdr, tx_buf, -ENOSYS);
++}
++
++static int ibp_cmd_query_device(struct ibp_client *client,
++				struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_verb_response_msg	*msg;
++	struct ibp_query_device_resp	*resp;
++	struct ib_device_attr		attr;
++	size_t				len;
++	int				ret;
++
++	print_trace("in\n");
++
++	device = (struct ibp_device *) hdr->device;
++	msg = (struct ibp_verb_response_msg *) tx_buf;
++	len = sizeof(*msg);
++
++	ret = ib_query_device(device->ib_dev, &attr);
++	if (ret) {
++		print_err("ib_query_device returned %d\n", ret);
++		goto send_resp;
++	}
++
++	resp = (struct ibp_query_device_resp *) msg->data;
++	len += sizeof(*resp);
++
++	resp->fw_ver			 = attr.fw_ver;
++	resp->sys_image_guid		 = attr.sys_image_guid;
++	resp->max_mr_size		 = attr.max_mr_size;
++	resp->page_size_cap		 = attr.page_size_cap;
++	resp->vendor_id			 = attr.vendor_id;
++	resp->vendor_part_id		 = attr.vendor_part_id;
++	resp->hw_ver			 = attr.hw_ver;
++	resp->max_qp			 = attr.max_qp;
++	resp->max_qp_wr			 = attr.max_qp_wr;
++	resp->device_cap_flags		 = attr.device_cap_flags;
++	resp->max_sge			 = attr.max_sge;
++	resp->max_sge_rd		 = attr.max_sge_rd;
++	resp->max_cq			 = attr.max_cq;
++	resp->max_cqe			 = attr.max_cqe;
++	resp->max_mr			 = attr.max_mr;
++	resp->max_pd			 = attr.max_pd;
++	resp->max_qp_rd_atom		 = attr.max_qp_rd_atom;
++	resp->max_ee_rd_atom		 = attr.max_ee_rd_atom;
++	resp->max_res_rd_atom		 = attr.max_res_rd_atom;
++	resp->max_qp_init_rd_atom	 = attr.max_qp_init_rd_atom;
++	resp->max_ee_init_rd_atom	 = attr.max_ee_init_rd_atom;
++	resp->atomic_cap		 = attr.atomic_cap;
++	resp->masked_atomic_cap		 = attr.masked_atomic_cap;
++	resp->max_ee			 = attr.max_ee;
++	resp->max_rdd			 = attr.max_rdd;
++	resp->max_mw			 = attr.max_mw;
++	resp->max_raw_ipv6_qp		 = attr.max_raw_ipv6_qp;
++	resp->max_raw_ethy_qp		 = attr.max_raw_ethy_qp;
++	resp->max_mcast_grp		 = attr.max_mcast_grp;
++	resp->max_mcast_qp_attach	 = attr.max_mcast_qp_attach;
++	resp->max_total_mcast_qp_attach  = attr.max_total_mcast_qp_attach;
++	resp->max_ah			 = attr.max_ah;
++	resp->max_fmr			 = attr.max_fmr;
++	resp->max_map_per_fmr		 = attr.max_map_per_fmr;
++	resp->max_srq			 = attr.max_srq;
++	resp->max_srq_wr		 = attr.max_srq_wr;
++	resp->max_srq_sge		 = attr.max_srq_sge;
++	resp->max_fast_reg_page_list_len = attr.max_fast_reg_page_list_len;
++	resp->max_pkeys			 = attr.max_pkeys;
++	resp->local_ca_ack_delay	 = attr.local_ca_ack_delay;
++
++send_resp:
++	IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++	return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_query_port(struct ibp_client *client,
++			      struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_verb_response_msg	*msg;
++	struct ibp_query_port_cmd	*cmd;
++	struct ibp_query_port_resp	*resp;
++	struct ib_port_attr		attr;
++	size_t				len;
++	int				ret;
++
++	device	= (struct ibp_device *) hdr->device;
++	cmd	= (struct ibp_query_port_cmd *) hdr;
++	msg	= (struct ibp_verb_response_msg *) tx_buf;
++	len	= sizeof(*msg);
++
++	ret = ib_query_port(device->ib_dev, cmd->port_num, &attr);
++	if (ret) {
++		print_err("ib_query_port returned %d\n", ret);
++		goto send_resp;
++	}
++
++	resp = (struct ibp_query_port_resp *) msg->data;
++	len += sizeof(*resp);
++
++	resp->state		= attr.state;
++	resp->max_mtu		= attr.max_mtu;
++	resp->active_mtu	= attr.active_mtu;
++	resp->gid_tbl_len	= attr.gid_tbl_len;
++	resp->port_cap_flags	= attr.port_cap_flags;
++	resp->max_msg_sz	= attr.max_msg_sz;
++	resp->bad_pkey_cntr	= attr.bad_pkey_cntr;
++	resp->qkey_viol_cntr	= attr.qkey_viol_cntr;
++	resp->pkey_tbl_len	= attr.pkey_tbl_len;
++	resp->lid		= attr.lid;
++	resp->sm_lid		= attr.sm_lid;
++	resp->lmc		= attr.lmc;
++	resp->max_vl_num	= attr.max_vl_num;
++	resp->sm_sl		= attr.sm_sl;
++	resp->subnet_timeout	= attr.subnet_timeout;
++	resp->init_type_reply	= attr.init_type_reply;
++	resp->active_width	= attr.active_width;
++	resp->active_speed	= attr.active_speed;
++	resp->phys_state	= attr.phys_state;
++	resp->link_layer	= rdma_port_get_link_layer(device->ib_dev,
++							   cmd->port_num);
++
++send_resp:
++	IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++	return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_query_gid(struct ibp_client *client,
++			     struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_verb_response_msg	*msg;
++	struct ibp_query_gid_cmd	*cmd;
++	struct ibp_query_gid_resp	*resp;
++	size_t				len;
++	union ib_gid			gid;
++	int				ret;
++
++	device	= (struct ibp_device *) hdr->device;
++	cmd	= (struct ibp_query_gid_cmd *) hdr;
++	msg	= (struct ibp_verb_response_msg *) tx_buf;
++	len	= sizeof(*msg);
++
++	ret = ib_query_gid(device->ib_dev, cmd->port_num, cmd->index, &gid);
++	if (ret) {
++		print_err("ib_query_gid returned %d\n", ret);
++		goto send_resp;
++	}
++
++	resp = (struct ibp_query_gid_resp *) msg->data;
++	len += sizeof(*resp);
++
++	resp->subnet_prefix = gid.global.subnet_prefix;
++	resp->interface_id  = gid.global.interface_id;
++
++send_resp:
++	IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++	return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_query_pkey(struct ibp_client *client,
++			      struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_verb_response_msg	*msg;
++	struct ibp_query_pkey_cmd	*cmd;
++	struct ibp_query_pkey_resp	*resp;
++	size_t				len;
++	u16				pkey;
++	int				ret;
++
++	device	= (struct ibp_device *) hdr->device;
++	cmd	= (struct ibp_query_pkey_cmd *) hdr;
++	msg	= (struct ibp_verb_response_msg *) tx_buf;
++	len	= sizeof(*msg);
++
++	ret = ib_query_pkey(device->ib_dev, cmd->port_num, cmd->index, &pkey);
++	if (ret) {
++		print_err("ib_query_pkey returned %d\n", ret);
++		goto send_resp;
++	}
++	resp = (struct ibp_query_pkey_resp *) msg->data;
++	len += sizeof(*resp);
++
++	resp->pkey = pkey;
++
++send_resp:
++	IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++	return ibp_send(client->ep, msg, len);
++}
++
++static void ibp_async_event(struct work_struct *work)
++{
++	struct ibp_event		*event;
++	struct ibp_async_event_msg	msg;
++
++	event = container_of(work, struct ibp_event, work);
++
++	IBP_INIT_MSG(NULL, &msg, sizeof(msg), ASYNC_EVENT);
++
++	msg.data.context = (uintptr_t) event->context;
++	msg.data.type	 = event->type;
++
++	ibp_send(event->client->ep, &msg, sizeof(msg));
++
++	ibp_add_to_stack(a_stack, (void *) event);
++}
++
++static void ibp_event_handler(struct ib_event_handler *handler,
++			      struct ib_event *ibevent)
++{
++	struct ibp_ucontext	*ucontext;
++	struct ibp_client	*client;
++	struct ibp_event	*event;
++
++	ucontext = container_of(handler, struct ibp_ucontext, event_handler);
++
++	if (ucontext->ibucontext->closing) {
++		print_dbg("ignoring event, connection closing\n");
++		return;
++	}
++
++	event = (struct ibp_event *)
++		ibp_pull_from_stack(a_stack, sizeof(*event), GFP_ATOMIC);
++	if (!event) {
++		print_err("kalloc failed\n");
++		return;
++	}
++
++	client = ucontext->client;
++
++	event->client  = client;
++	event->context = ibevent->element.port_num;
++	event->type    = ibevent->event;
++	event->ibdev   = ucontext->ibdev;
++
++	INIT_WORK(&event->work, ibp_async_event);
++	queue_work(client->workqueue, &event->work);
++}
++
++static int ibp_mmap(struct file *filp, struct vm_area_struct *vma)
++{
++	struct ibp_ucontext		*ucontext;
++	struct ib_ucontext		*ibucontext;
++
++	ucontext = filp->private_data;
++	ibucontext = ucontext->ibucontext;
++
++	return (ibucontext->device->mmap) ?
++		ibucontext->device->mmap(ibucontext, vma) : -ENOSYS;
++}
++
++static const struct file_operations ibp_fops = {
++	.mmap = ibp_mmap,
++};
++
++static int ibp_cmd_alloc_ucontext(struct ibp_client *client,
++				  struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_verb_response_msg	*msg;
++	struct ibp_alloc_ucontext_cmd	*cmd;
++	struct ibp_alloc_ucontext_resp	*resp;
++	struct ibp_ucontext		*ucontext;
++	struct ib_ucontext		*ibucontext;
++	struct ib_udata			udata;
++	size_t				len;
++	size_t				outlen;
++	int				ret;
++
++	print_trace("in\n");
++
++	device	= (struct ibp_device *) hdr->device;
++	cmd	= (struct ibp_alloc_ucontext_cmd *) hdr;
++	msg	= (struct ibp_verb_response_msg *) tx_buf;
++	resp	= (struct ibp_alloc_ucontext_resp *) msg->data;
++	len	= hdr->length - sizeof(*cmd);
++	outlen	= MAX_MSG_SIZE - sizeof(*msg) - sizeof(*resp);
++
++	/* Workaround for len check in mlx5 driver (no impact to others) */
++	len += sizeof(struct ib_uverbs_cmd_hdr);
++
++	INIT_UDATA(&udata, cmd->data, resp->data, len, outlen);
++
++	len = sizeof(*msg);
++
++	ret = ibp_get_device(device);
++	if (ret) {
++		print_err("ibp_get_device returned %d\n", ret);
++		goto send_resp;
++	}
++
++	ucontext = kzalloc(sizeof(*ucontext), GFP_KERNEL);
++	if (!ucontext) {
++		print_err("kzalloc failed\n");
++		ret = -ENOMEM;
++		goto err1;
++	}
++	ucontext->device = device;
++
++	ibucontext = device->ib_dev->alloc_ucontext(device->ib_dev, &udata);
++	if (IS_ERR(ibucontext)) {
++		ret = PTR_ERR(ibucontext);
++		print_err("Invalid ibucontext %p\n", ibucontext);
++		goto err2;
++	}
++
++#ifdef MOFED
++	ibucontext->peer_mem_name = ibp_peer_mem.name;
++	ibucontext->peer_mem_private_data = ucontext;
++#else
++	ibucontext->umem_ops = &ibp_umem;
++	ibucontext->umem_private_data = ucontext;
++#endif
++
++	ibucontext->device = device->ib_dev;
++	ibucontext->closing = 0;
++
++	INIT_LIST_HEAD(&ibucontext->pd_list);
++	INIT_LIST_HEAD(&ibucontext->mr_list);
++	INIT_LIST_HEAD(&ibucontext->mw_list);
++	INIT_LIST_HEAD(&ibucontext->cq_list);
++	INIT_LIST_HEAD(&ibucontext->qp_list);
++	INIT_LIST_HEAD(&ibucontext->srq_list);
++	INIT_LIST_HEAD(&ibucontext->ah_list);
++	INIT_LIST_HEAD(&ibucontext->xrcd_list);
++
++	ucontext->filp = anon_inode_getfile("["DRV_NAME"]", &ibp_fops,
++					    ucontext, O_RDWR);
++	if (IS_ERR(ucontext->filp)) {
++		ret = PTR_ERR(ucontext->filp);
++		print_err("anon_inode_getfile returned %d\n", ret);
++		goto err3;
++	}
++
++	if (cmd->ibdev) {
++		ucontext->ibdev = cmd->ibdev;
++		INIT_IB_EVENT_HANDLER(&ucontext->event_handler, device->ib_dev,
++				      ibp_event_handler);
++		ret = ib_register_event_handler(&ucontext->event_handler);
++		if (ret) {
++			print_err("event_handler returned %d\n", ret);
++			goto err4;
++		}
++	}
++
++	ucontext->client = client;
++	ucontext->ibucontext = ibucontext;
++	mutex_init(&ucontext->mutex);
++	INIT_LIST_HEAD(&ucontext->mmap_list);
++	ucontext->reg_tree = RB_ROOT;
++
++	mutex_lock(&client->ucontext_mutex);
++	list_add_tail(&ucontext->list, &client->ucontext_list);
++	mutex_unlock(&client->ucontext_mutex);
++
++	len += sizeof(*resp);
++	len += outlen - udata.outlen;	/* add driver private data */
++
++	resp->ucontext = (uintptr_t)ucontext;
++
++	goto send_resp;
++
++err4:
++	fput(ucontext->filp);
++err3:
++	device->ib_dev->dealloc_ucontext(ibucontext);
++err2:
++	kfree(ucontext);
++err1:
++	ibp_put_device(device);
++
++send_resp:
++	IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++	return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_dealloc_ucontext(struct ibp_client *client,
++				    struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_dealloc_ucontext_cmd	*cmd;
++	struct ibp_queued_response_msg	*msg;
++	struct ibp_ucontext		*ucontext;
++	struct ib_ucontext		*ibucontext;
++	size_t				len;
++	int				ret = -EINVAL;
++
++	print_trace("in\n");
++
++	device = (struct ibp_device *) hdr->device;
++	cmd = (struct ibp_dealloc_ucontext_cmd *) hdr;
++	ucontext = (struct ibp_ucontext *) cmd->ucontext;
++	msg = (struct ibp_queued_response_msg *) tx_buf;
++	len = sizeof(*msg);
++
++	if (IS_NULL_OR_ERR(ucontext)) {
++		print_err("Invalid ucontext %p\n", ucontext);
++		goto send_resp;
++	}
++
++	ibucontext = ucontext->ibucontext;
++
++	if (ucontext->ibdev)
++		ib_unregister_event_handler(&ucontext->event_handler);
++
++	fput(ucontext->filp);
++
++	if (device && device->ib_dev) {
++		ret = device->ib_dev->dealloc_ucontext(ibucontext);
++		if (ret) {
++			print_err("ib_dealloc_ucontext returned %d\n", ret);
++			goto send_resp;
++		}
++	}
++
++	mutex_lock(&client->ucontext_mutex);
++	list_del(&ucontext->list);
++	mutex_unlock(&client->ucontext_mutex);
++
++	ibp_put_device(device);
++	kfree(ucontext);
++
++send_resp:
++	IBP_INIT_RESP(device, msg, len, QUEUED_RESPONSE, hdr->request, ret);
++	return ibp_queue_response(client, msg);
++}
++
++static void ibp_dereg_buf(struct kref *ref)
++{
++	struct ibp_reg			*reg;
++	struct ibp_ucontext		*ucontext;
++
++	reg = container_of(ref, struct ibp_reg, ref);
++	ucontext = reg->ucontext;
++
++	if (!RB_EMPTY_NODE(&reg->node)) {
++		mutex_lock(&ucontext->mutex);
++		rb_erase(&reg->node, &ucontext->reg_tree);
++		mutex_unlock(&ucontext->mutex);
++	}
++
++	if (reg->range)
++		scif_put_pages(reg->range);
++
++	kfree(reg);
++}
++
++static struct ibp_reg *__ibp_insert_reg_buf(struct ibp_ucontext *ucontext,
++					    struct ibp_reg *reg)
++{
++	struct rb_node			**link;
++	struct rb_node			*parent;
++	struct ibp_reg			*cur_reg;
++
++	link = &ucontext->reg_tree.rb_node;
++	parent = NULL;
++
++	while (*link) {
++		parent = *link;
++		cur_reg = rb_entry(parent, struct ibp_reg, node);
++
++#ifdef MOFED
++		if ((reg->virt_addr == cur_reg->virt_addr) &&
++		    (reg->length    == cur_reg->length))
++			return cur_reg;
++#else
++		if ((reg->virt_addr == cur_reg->virt_addr) &&
++		    (reg->length    == cur_reg->length)	   &&
++		    (reg->access    == cur_reg->access))
++			return cur_reg;
++#endif
++
++		if (reg->virt_addr < cur_reg->virt_addr)
++			link = &(*link)->rb_left;
++		else if (reg->virt_addr > cur_reg->virt_addr)
++			link = &(*link)->rb_right;
++		else if (reg->length < cur_reg->length)
++			link = &(*link)->rb_left;
++		else if (reg->length > cur_reg->length)
++			link = &(*link)->rb_right;
++#ifndef MOFED
++		else if (reg->access < cur_reg->access)
++			link = &(*link)->rb_left;
++#endif
++		else
++			link = &(*link)->rb_right;
++	}
++
++	rb_link_node(&reg->node, parent, link);
++	rb_insert_color(&reg->node, &ucontext->reg_tree);
++
++	return NULL;
++}
++
++static struct ibp_reg *ibp_reg_buf(struct ibp_ucontext *ucontext,
++				   u64 virt_addr, u64 scif_addr, u64 length,
++				   u64 offset, u32 access)
++{
++	struct ibp_reg			*reg;
++	struct ibp_reg			*cur_reg;
++	int				ret;
++
++	reg = kzalloc(sizeof(*reg), GFP_KERNEL);
++	if (!reg) {
++		print_err("kzalloc failed\n");
++		return ERR_PTR(-ENOMEM);
++	}
++
++	kref_init(&reg->ref);
++	RB_CLEAR_NODE(&reg->node);
++	reg->ucontext  = ucontext;
++	reg->virt_addr = virt_addr;
++	reg->length    = length;
++	reg->offset    = offset;
++	reg->access    = access;
++
++	ret = scif_get_pages(ucontext->client->ep, scif_addr,
++			     PAGE_ALIGN(reg->length +
++			    (reg->virt_addr & ~PAGE_MASK)),
++			     &reg->range);
++	if (ret) {
++		print_err("scif_get_pages returned %d\n", ret);
++		kref_put(&reg->ref, ibp_dereg_buf);
++		return ERR_PTR(ret);
++	}
++
++	mutex_lock(&ucontext->mutex);
++
++	cur_reg = __ibp_insert_reg_buf(ucontext, reg);
++	if (cur_reg) {
++		print_dbg("__ibp_insert_reg_buf duplicate entry\n");
++		kref_get(&cur_reg->ref);
++	}
++
++	mutex_unlock(&ucontext->mutex);
++
++	if (cur_reg) {
++		kref_put(&reg->ref, ibp_dereg_buf);
++		reg = cur_reg;
++	}
++
++	return reg;
++}
++
++static int ibp_cmd_reg_buf(struct ibp_client *client,
++			   struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_reg_buf_cmd		*cmd;
++	struct ibp_reg_buf_resp		*resp;
++	struct ibp_ucontext		*ucontext;
++	struct ibp_verb_response_msg	*msg;
++	struct ibp_reg			*reg;
++	size_t				len;
++	int				ret = 0;
++
++	print_trace("in\n");
++
++	device = (struct ibp_device *) hdr->device;
++	cmd = (struct ibp_reg_buf_cmd *) hdr;
++	ucontext = (struct ibp_ucontext *) cmd->ucontext;
++	msg = (struct ibp_verb_response_msg *) tx_buf;
++	len = sizeof(*msg);
++
++	reg = ibp_reg_buf(ucontext, cmd->virt_addr, cmd->scif_addr,
++			  cmd->length, cmd->offset, cmd->access);
++	if (IS_ERR(reg)) {
++		ret = PTR_ERR(reg);
++		print_err("ibp_reg_buf returned %d\n", ret);
++		goto send_resp;
++	}
++
++	resp = (struct ibp_reg_buf_resp *) msg->data;
++	len += sizeof(*resp);
++
++	resp->reg = (uintptr_t)reg;
++
++send_resp:
++	IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++	return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_dereg_buf(struct ibp_client *client,
++			     struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_dereg_buf_cmd	*cmd;
++	struct ibp_verb_response_msg	*msg;
++	struct ibp_reg			*reg;
++	size_t				len;
++
++	print_trace("in\n");
++
++	device	= (struct ibp_device *) hdr->device;
++	cmd	= (struct ibp_dereg_buf_cmd *) hdr;
++	reg	= (struct ibp_reg *) cmd->reg;
++	msg	= (struct ibp_verb_response_msg *) tx_buf;
++	len	= sizeof(*msg);
++
++	kref_put(&reg->ref, ibp_dereg_buf);
++
++	IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, 0);
++	return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_convert_prot_flags(unsigned long prot)
++{
++	int				prot_flags;
++
++	prot_flags = 0;
++
++	if (prot & PROT_READ)
++		prot_flags |= SCIF_PROT_READ;
++
++	if (prot & PROT_WRITE)
++		prot_flags |= SCIF_PROT_WRITE;
++
++	return prot_flags;
++}
++
++static int ibp_convert_map_flags(unsigned long flags)
++{
++	int				map_flags;
++
++	map_flags = SCIF_MAP_KERNEL;
++
++	if (flags & MAP_FIXED)
++		map_flags |= SCIF_MAP_FIXED;
++
++	return map_flags;
++}
++
++static int ibp_scif_register(struct ibp_client *client, struct ibp_mmap *mmap,
++			     unsigned long flags)
++{
++	struct vm_area_struct		*vma;
++	unsigned long			npages;
++	unsigned long			pfn;
++	int				offset;
++	int				ret;
++
++	print_trace("in\n");
++
++	offset = mmap->vaddr & ~PAGE_MASK;
++	npages = PAGE_ALIGN(mmap->len + offset) >> PAGE_SHIFT;
++	if (npages != 1) {
++		print_err("request %lu but only one page supported\n", npages);
++		return -EINVAL;
++	}
++
++	down_write(&current->mm->mmap_sem);
++	vma = find_vma(current->mm, mmap->vaddr);
++	if (!vma) {
++		up_write(&current->mm->mmap_sem);
++		print_err("find_vma failed\n");
++		return -EFAULT;
++	}
++
++	ret = follow_pfn(vma, mmap->vaddr, &pfn);
++
++	up_write(&current->mm->mmap_sem);
++	if (ret) {
++		print_err("follow_pfn returned %d\n", ret);
++		return ret;
++	}
++
++	mmap->io_addr = ioremap(page_to_phys(pfn_to_page(pfn)), mmap->len);
++	if (!mmap->io_addr) {
++		print_err("ioremap failed\n");
++		return -ENOMEM;
++	}
++
++	mmap->scif_addr = scif_register(client->ep, (void *) mmap->io_addr,
++					mmap->len, (off_t) mmap->io_addr,
++					ibp_convert_prot_flags(mmap->prot),
++					ibp_convert_map_flags(flags));
++	if (IS_ERR_VALUE(mmap->scif_addr)) {
++		ret = mmap->scif_addr;
++		print_err("scif_register returned %d\n", ret);
++		goto err0;
++
++	}
++
++	return 0;
++err0:
++	iounmap(mmap->io_addr);
++	return ret;
++}
++
++static
++void ibp_scif_unregister(struct ibp_client *client, struct ibp_mmap *mmap)
++{
++	int				ret;
++
++	print_trace("in\n");
++
++	ret = scif_unregister(client->ep, mmap->scif_addr, mmap->len);
++	if (ret) {
++		if (ret == -ECONNRESET)
++			print_dbg("scif connection reset\n");
++		else
++			print_err("scif_unregister returned %d\n", ret);
++	}
++
++	iounmap(mmap->io_addr);
++}
++
++static int ibp_cmd_mmap(struct ibp_client *client,
++			struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_mmap_cmd		*cmd;
++	struct ibp_mmap_resp		*resp;
++	struct ibp_ucontext		*ucontext;
++	struct ibp_verb_response_msg	*msg;
++	struct ibp_mmap			*mmap;
++	size_t				len;
++	int				ret;
++
++	print_trace("in\n");
++
++	device = (struct ibp_device *) hdr->device;
++	cmd = (struct ibp_mmap_cmd *) hdr;
++	ucontext = (struct ibp_ucontext *) cmd->ucontext;
++	msg = (struct ibp_verb_response_msg *) tx_buf;
++	len = sizeof(*msg);
++
++	mmap = kzalloc(sizeof(*mmap), GFP_KERNEL);
++	if (!mmap) {
++		print_err("kzalloc failed\n");
++		ret = -ENOMEM;
++		goto send_resp;
++	}
++	mmap->ucontext = ucontext;
++	mmap->len      = cmd->len;
++	mmap->prot     = cmd->prot;
++
++	/* The mmap syscall ignores these bits; do the same here. */
++	cmd->flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
++	down_write(&current->mm->mmap_sem);
++	mmap->vaddr = do_mmap_pgoff(ucontext->filp, 0, cmd->len,
++				    cmd->prot, cmd->flags, cmd->pgoff);
++	up_write(&current->mm->mmap_sem);
++#else
++	mmap->vaddr = vm_mmap(ucontext->filp, 0, cmd->len, cmd->prot,
++			      cmd->flags, cmd->pgoff << PAGE_SHIFT);
++#endif
++
++	if (mmap->vaddr & ~PAGE_MASK) {
++		ret = mmap->vaddr;
++		print_err("mmap returned %d\n", ret);
++		goto err1;
++	}
++
++	ret = ibp_scif_register(client, mmap, cmd->flags);
++	if (ret) {
++		print_err("ibp_scif_register returned %d\n", ret);
++		goto err2;
++	}
++
++	mutex_lock(&ucontext->mutex);
++	list_add_tail(&mmap->list, &ucontext->mmap_list);
++	mutex_unlock(&ucontext->mutex);
++
++	resp = (struct ibp_mmap_resp *) msg->data;
++	len += sizeof(*resp);
++
++	resp->scif_addr = mmap->scif_addr;
++	resp->mmap	= (uintptr_t)mmap;
++
++	goto send_resp;
++err2:
++	MUNMAP(current->mm, mmap->vaddr, cmd->len);
++err1:
++	kfree(mmap);
++
++send_resp:
++	IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++	return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_unmmap(struct ibp_client *client,
++			  struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_unmmap_cmd		*cmd;
++	struct ibp_mmap			*mmap;
++	struct ibp_verb_response_msg	*msg;
++	size_t				len;
++	int				ret = 0;
++
++	print_trace("in\n");
++
++	device	= (struct ibp_device *) hdr->device;
++	cmd	= (struct ibp_unmmap_cmd *) hdr;
++	mmap	= (struct ibp_mmap *) cmd->mmap;
++	msg	= (struct ibp_verb_response_msg *) tx_buf;
++	len	= sizeof(*msg);
++
++	if (IS_NULL_OR_ERR(mmap)) {
++		print_err("Invalid mmap %p\n", mmap);
++		ret = -EINVAL;
++		goto send_resp;
++	}
++
++	ibp_scif_unregister(client, mmap);
++
++	if (IS_NULL_OR_ERR(current) || IS_NULL_OR_ERR(current->mm)) {
++		print_err("Invalid current mm pointer\n");
++		ret = -EINVAL;
++		goto send_resp;
++	}
++
++	MUNMAP(current->mm, mmap->vaddr, mmap->len);
++
++	if (mmap->ucontext) {
++		mutex_lock(&mmap->ucontext->mutex);
++		list_del(&mmap->list);
++		mutex_unlock(&mmap->ucontext->mutex);
++	}
++
++	kfree(mmap);
++send_resp:
++	IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++	return ibp_send(client->ep, msg, len);
++}
++
++static struct ib_uobject *ibp_create_uobj(struct ibp_ucontext *ucontext)
++{
++	static struct lock_class_key	__key;
++	struct ib_uobject		*uobj;
++
++	if (IS_NULL_OR_ERR(ucontext))
++		return ERR_PTR(-EINVAL);
++
++	uobj = (struct ib_uobject *)
++		ibp_pull_from_stack(o_stack, sizeof(*uobj), GFP_ATOMIC);
++	if (!uobj)
++		return ERR_PTR(-ENOMEM);
++
++	/*
++	 * the uobj struct is updated since this is kernel-to-kernel,
++	 * so this structure is not fully setup as in ib_uverbs.
++	 */
++	uobj->context	  = ucontext->ibucontext;
++	uobj->user_handle = (uintptr_t)ucontext;
++	kref_init(&uobj->ref);
++	init_rwsem(&uobj->mutex);
++	lockdep_set_class(&uobj->mutex, &__key);
++	uobj->live = 1;
++
++	return uobj;
++}
++
++static void ibp_destroy_uobj(struct ib_uobject *uobj)
++{
++	struct ibp_ucontext		*ucontext;
++
++	if (!IS_NULL_OR_ERR(uobj)) {
++		ucontext = (struct ibp_ucontext *) uobj->user_handle;
++		if (ucontext) {
++			mutex_lock(&ucontext->mutex);
++			list_del(&uobj->list);
++			mutex_unlock(&ucontext->mutex);
++		}
++
++		ibp_add_to_stack(o_stack, (void *) uobj);
++	}
++}
++
++static int ibp_cmd_alloc_pd(struct ibp_client *client,
++			    struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_verb_response_msg	*msg;
++	struct ibp_alloc_pd_cmd		*cmd;
++	struct ibp_alloc_pd_resp	*resp;
++	struct ibp_ucontext		*ucontext;
++	struct ib_uobject		*uobj;
++	struct ib_udata			udata;
++	struct ib_pd			*pd;
++	size_t				len;
++	size_t				outlen;
++	int				ret = 0;
++
++	print_trace("in\n");
++
++	device = (struct ibp_device *) hdr->device;
++	cmd = (struct ibp_alloc_pd_cmd *) hdr;
++	ucontext = (struct ibp_ucontext *) cmd->ucontext;
++	msg = (struct ibp_verb_response_msg *) tx_buf;
++	resp = (struct ibp_alloc_pd_resp *) msg->data;
++	len  = hdr->length - sizeof(*cmd);
++	outlen = MAX_MSG_SIZE - sizeof(*msg) - sizeof(*resp);
++
++	INIT_UDATA(&udata, cmd->data, resp->data, len, outlen);
++
++	len = sizeof(*msg);
++
++	uobj = ibp_create_uobj(ucontext);
++	if (IS_ERR(uobj)) {
++		ret = PTR_ERR(uobj);
++		print_err("ibp_create_uobj returned %d\n", ret);
++		goto send_resp;
++	}
++
++	pd = device->ib_dev->alloc_pd(device->ib_dev, ucontext->ibucontext,
++				      &udata);
++	if (IS_ERR(pd)) {
++		ret = PTR_ERR(pd);
++		print_err("ib_alloc_pd returned %d\n", ret);
++		/*
++		 * Clear uobj's user_handle as destroy_uobj tries to list_del
++		 * uobj from the list and uobj has NOT been added yet
++		 */
++		uobj->user_handle = 0;
++		ibp_destroy_uobj(uobj);
++		goto send_resp;
++	}
++
++	pd->device = device->ib_dev;
++	atomic_set(&pd->usecnt, 0);
++
++	pd->uobject  = uobj;
++	uobj->object = pd;
++
++	mutex_lock(&ucontext->mutex);
++	list_add_tail(&uobj->list, &ucontext->ibucontext->pd_list);
++	mutex_unlock(&ucontext->mutex);
++
++	len += sizeof(*resp);
++	len += outlen - udata.outlen;	/* add driver private data */
++
++	resp->pd = (uintptr_t)pd;
++
++send_resp:
++	IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++	return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_dealloc_pd(struct ibp_client *client,
++			      struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_dealloc_pd_cmd	*cmd;
++	struct ibp_verb_response_msg	*msg;
++	struct ib_uobject		*uobj;
++	struct ib_pd			*pd;
++	size_t				len;
++	int				ret;
++
++	print_trace("in\n");
++
++	device	= (struct ibp_device *) hdr->device;
++	cmd	= (struct ibp_dealloc_pd_cmd *) hdr;
++	pd	= (struct ib_pd *) cmd->pd;
++	msg	= (struct ibp_verb_response_msg *) tx_buf;
++	len	= sizeof(*msg);
++
++	if (IS_NULL_OR_ERR(pd)) {
++		print_err("Invalid pd %p\n", pd);
++		ret = -EINVAL;
++		goto send_resp;
++	}
++
++	uobj = pd->uobject;
++
++	ret = ib_dealloc_pd(pd);
++	if (unlikely(ret == -EBUSY)) {
++		msleep(100);
++		ret = ib_dealloc_pd(pd);
++	}
++	if (ret) {
++		print_err("ib_dealloc_pd returned %d\n", ret);
++		goto send_resp;
++	}
++
++	ibp_destroy_uobj(uobj);
++
++send_resp:
++	IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++	return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_create_ah(struct ibp_client *client,
++			     struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_verb_response_msg	*msg;
++	struct ibp_create_ah_cmd	*cmd;
++	struct ibp_create_ah_resp	*resp;
++	struct ibp_ucontext		*ucontext;
++	struct ib_uobject		*uobj;
++	struct ib_pd			*pd;
++	struct ib_ah			*ah;
++	struct ib_ah_attr		attr;
++	size_t				len;
++	int				ret = 0;
++
++	print_trace("in\n");
++
++	device	= (struct ibp_device *) hdr->device;
++	cmd	= (struct ibp_create_ah_cmd *) hdr;
++	pd	= (struct ib_pd *) cmd->pd;
++	msg	= (struct ibp_verb_response_msg *) tx_buf;
++	len	= sizeof(*msg);
++
++	ucontext = (struct ibp_ucontext *) pd->uobject->user_handle;
++
++	uobj = ibp_create_uobj(ucontext);
++	if (IS_ERR(uobj)) {
++		ret = PTR_ERR(uobj);
++		print_err("ibp_create_uobj returned %d\n", ret);
++		goto send_resp;
++	}
++
++	memset(&attr, 0, sizeof(attr));
++
++	attr.dlid			   = cmd->ah_attr.dlid;
++	attr.sl				   = cmd->ah_attr.sl;
++	attr.src_path_bits		   = cmd->ah_attr.src_path_bits;
++	attr.static_rate		   = cmd->ah_attr.static_rate;
++	attr.ah_flags			   = cmd->ah_attr.ah_flags;
++	attr.port_num			   = cmd->ah_attr.port_num;
++	attr.grh.dgid.global.subnet_prefix =
++			cmd->ah_attr.grh.dgid_subnet_prefix;
++	attr.grh.dgid.global.interface_id  = cmd->ah_attr.grh.dgid_interface_id;
++	attr.grh.flow_label		   = cmd->ah_attr.grh.flow_label;
++	attr.grh.sgid_index		   = cmd->ah_attr.grh.sgid_index;
++	attr.grh.hop_limit		   = cmd->ah_attr.grh.hop_limit;
++	attr.grh.traffic_class		   = cmd->ah_attr.grh.traffic_class;
++
++	ah = ib_create_ah(pd, &attr);
++	if (IS_ERR(ah)) {
++		ret = PTR_ERR(ah);
++		print_err("ib_create_ah returned %d\n", ret);
++		/*
++		 * Clear uobj's user_handle as destroy_uobj tries to list_del
++		 * uobj from the list and uobj has NOT been added yet
++		 */
++		uobj->user_handle = 0;
++		ibp_destroy_uobj(uobj);
++		goto send_resp;
++	}
++
++	ah->uobject  = uobj;
++	uobj->object = ah;
++
++	mutex_lock(&ucontext->mutex);
++	list_add_tail(&uobj->list, &ucontext->ibucontext->ah_list);
++	mutex_unlock(&ucontext->mutex);
++
++	resp = (struct ibp_create_ah_resp *) msg->data;
++	len += sizeof(*resp);
++
++	resp->ah = (uintptr_t) ah;
++
++send_resp:
++	IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++	return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_query_ah(struct ibp_client *client,
++			    struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_query_ah_cmd		*cmd;
++	struct ibp_query_ah_resp	*resp;
++	struct ibp_verb_response_msg	*msg;
++	struct ib_ah			*ah;
++	struct ib_ah_attr		attr;
++	size_t				len;
++	int				ret;
++
++	print_trace("in\n");
++
++	device	= (struct ibp_device *) hdr->device;
++	cmd	= (struct ibp_query_ah_cmd *) hdr;
++	ah	= (struct ib_ah *) cmd->ah;
++	msg	= (struct ibp_verb_response_msg *) tx_buf;
++	len	= sizeof(*msg);
++
++	ret = ib_query_ah(ah, &attr);
++	if (ret) {
++		print_err("ib_query_ah returned %d\n", ret);
++		goto send_resp;
++	}
++
++	resp = (struct ibp_query_ah_resp *) msg->data;
++	len += sizeof(*resp);
++
++	resp->attr.dlid			  = attr.dlid;
++	resp->attr.sl			  = attr.sl;
++	resp->attr.src_path_bits	  = attr.src_path_bits;
++	resp->attr.static_rate		  = attr.static_rate;
++	resp->attr.ah_flags		  = attr.ah_flags;
++	resp->attr.port_num		  = attr.port_num;
++	resp->attr.grh.dgid_subnet_prefix = attr.grh.dgid.global.subnet_prefix;
++	resp->attr.grh.dgid_interface_id  = attr.grh.dgid.global.interface_id;
++	resp->attr.grh.flow_label	  = attr.grh.flow_label;
++	resp->attr.grh.sgid_index	  = attr.grh.sgid_index;
++	resp->attr.grh.hop_limit	  = attr.grh.hop_limit;
++	resp->attr.grh.traffic_class	  = attr.grh.traffic_class;
++
++send_resp:
++	IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++	return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_destroy_ah(struct ibp_client *client,
++			      struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_verb_response_msg	*msg;
++	struct ibp_destroy_ah_cmd	*cmd;
++	struct ib_uobject		*uobj;
++	struct ib_ah			*ah;
++	size_t				len;
++	int				ret;
++
++	print_trace("in\n");
++
++	device	= (struct ibp_device *) hdr->device;
++	cmd	= (struct ibp_destroy_ah_cmd *) hdr;
++	msg	= (struct ibp_verb_response_msg *) tx_buf;
++	ah	= (struct ib_ah *) cmd->ah;
++	len	= sizeof(*msg);
++
++	uobj = ah->uobject;
++
++	ret = ib_destroy_ah(ah);
++	if (ret) {
++		print_err("ib_destroy_ah returned %d\n", ret);
++		goto send_resp;
++	}
++
++	ibp_destroy_uobj(uobj);
++
++send_resp:
++	IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++	return ibp_send(client->ep, msg, len);
++}
++
++static void ibp_ibsrq_event(struct ib_event *ibevent, void *srq_context)
++{
++	struct ibp_ucontext		*ucontext;
++	struct ibp_client		*client;
++	struct ibp_event		*event;
++	struct ib_uobject		*uobj;
++
++	print_trace("in\n");
++
++	event = kmalloc(sizeof(*event), GFP_ATOMIC);
++	if (!event) {
++		print_err("kalloc failed\n");
++		return;
++	}
++
++	uobj = ibevent->element.srq->uobject;
++	ucontext = (struct ibp_ucontext *) uobj->user_handle;
++	client = ucontext->client;
++
++	event->client  = client;
++	event->context = (uintptr_t) srq_context;
++	event->type    = ibevent->event;
++	event->ibdev   = ucontext->ibdev;
++
++	INIT_WORK(&event->work, ibp_async_event);
++	queue_work(client->workqueue, &event->work);
++}
++
++static int ibp_cmd_create_srq(struct ibp_client *client,
++			      struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_verb_response_msg	*msg;
++	struct ibp_create_srq_cmd	*cmd;
++	struct ibp_create_srq_resp	*resp;
++	struct ibp_ucontext		*ucontext;
++	struct ib_uobject		*uobj;
++	struct ib_pd			*pd;
++	struct ib_srq			*srq;
++	struct ib_srq_init_attr		init_attr;
++	struct ib_udata			udata;
++	size_t				len;
++	size_t				outlen;
++	int				ret = 0;
++
++	print_trace("in\n");
++
++	device	= (struct ibp_device *) hdr->device;
++	cmd	= (struct ibp_create_srq_cmd *) hdr;
++	pd	= (struct ib_pd *) cmd->pd;
++	msg	= (struct ibp_verb_response_msg *) tx_buf;
++	resp	= (struct ibp_create_srq_resp *) msg->data;
++	len = hdr->length - sizeof(*cmd);
++	outlen = MAX_MSG_SIZE - sizeof(*msg) - sizeof(*resp);
++
++	INIT_UDATA(&udata, cmd->data, resp->data, len, outlen);
++
++	len = sizeof(*msg);
++
++	ucontext = (struct ibp_ucontext *) pd->uobject->user_handle;
++
++	uobj = ibp_create_uobj(ucontext);
++	if (IS_ERR(uobj)) {
++		ret = PTR_ERR(uobj);
++		print_err("ibp_create_uobj returned %d\n", ret);
++		goto send_resp;
++	}
++
++	memset(&init_attr, 0, sizeof(init_attr));
++
++	init_attr.event_handler	 = ibp_ibsrq_event;
++	init_attr.srq_context	 = (void *) cmd->srq_context;
++	init_attr.attr.max_wr	 = cmd->attr.max_wr;
++	init_attr.attr.max_sge	 = cmd->attr.max_sge;
++	init_attr.attr.srq_limit = cmd->attr.srq_limit;
++
++	srq = device->ib_dev->create_srq(pd, &init_attr, &udata);
++	if (IS_ERR(srq)) {
++		ret = PTR_ERR(srq);
++		print_err("ib_create_srq returned %d\n", ret);
++		/*
++		 * Clear uobj's user_handle as destroy_uobj tries to list_del
++		 * uobj from the list and uobj has NOT been added yet
++		 */
++		uobj->user_handle = 0;
++		ibp_destroy_uobj(uobj);
++		goto send_resp;
++	}
++
++	srq->device	   = device->ib_dev;
++	srq->pd		   = pd;
++	srq->event_handler = init_attr.event_handler;
++	srq->srq_context   = init_attr.srq_context;
++	srq->srq_type      = 0;
++	srq->ext.xrc.cq	   = NULL;
++	srq->ext.xrc.xrcd  = NULL;
++
++	atomic_inc(&pd->usecnt);
++	atomic_set(&srq->usecnt, 0);
++
++	srq->uobject = uobj;
++	uobj->object = srq;
++
++	mutex_lock(&ucontext->mutex);
++	list_add_tail(&uobj->list, &ucontext->ibucontext->srq_list);
++	mutex_unlock(&ucontext->mutex);
++
++	len += sizeof(*resp);
++	len += outlen - udata.outlen;	/* add driver private data */
++
++	resp->srq	     = (uintptr_t)srq;
++	resp->attr.max_wr    = init_attr.attr.max_wr;
++	resp->attr.max_sge   = init_attr.attr.max_sge;
++	resp->attr.srq_limit = init_attr.attr.srq_limit;
++
++send_resp:
++	IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++	return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_modify_srq(struct ibp_client *client,
++			      struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_verb_response_msg	*msg;
++	struct ibp_modify_srq_cmd	*cmd;
++	struct ibp_modify_srq_resp	*resp;
++	struct ib_srq			*srq;
++	struct ib_srq_attr		attr;
++	struct ib_udata			udata;
++	size_t				len;
++	size_t				outlen;
++	int				ret;
++
++	print_trace("in\n");
++
++	device	= (struct ibp_device *) hdr->device;
++	cmd	= (struct ibp_modify_srq_cmd *) hdr;
++	srq	= (struct ib_srq *) cmd->srq;
++	msg	= (struct ibp_verb_response_msg *) tx_buf;
++	resp	= (struct ibp_modify_srq_resp *) msg->data;
++	len	= hdr->length - sizeof(*cmd);
++	outlen	= MAX_MSG_SIZE - sizeof(*msg) - sizeof(*resp);
++
++	INIT_UDATA(&udata, cmd->data, resp->data, len, outlen);
++
++	len = sizeof(*msg);
++
++	memset(&attr, 0, sizeof(attr));
++
++	attr.max_wr    = cmd->attr.max_wr;
++	attr.max_sge   = cmd->attr.max_sge;
++	attr.srq_limit = cmd->attr.srq_limit;
++
++	ret = device->ib_dev->modify_srq(srq, &attr, cmd->srq_attr_mask,
++					 &udata);
++	if (ret) {
++		print_err("ib_modify_srq returned %d\n", ret);
++		goto send_resp;
++	}
++
++	len += sizeof(*resp);
++	len += outlen - udata.outlen;	/* add driver private data */
++
++	resp->attr.max_wr    = attr.max_wr;
++	resp->attr.max_sge   = attr.max_sge;
++	resp->attr.srq_limit = attr.srq_limit;
++
++send_resp:
++	IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++	return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_query_srq(struct ibp_client *client,
++			     struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_verb_response_msg	*msg;
++	struct ibp_query_srq_cmd	*cmd;
++	struct ibp_query_srq_resp	*resp;
++	struct ib_srq			*srq;
++	struct ib_srq_attr		attr;
++	size_t				len;
++	int				ret;
++
++	print_trace("in\n");
++
++	device	= (struct ibp_device *) hdr->device;
++	cmd	= (struct ibp_query_srq_cmd *) hdr;
++	srq	= (struct ib_srq *) cmd->srq;
++	msg	= (struct ibp_verb_response_msg *) tx_buf;
++	len	= sizeof(*msg);
++
++	ret = ib_query_srq(srq, &attr);
++	if (ret) {
++		print_err("ib_query_srq returned %d\n", ret);
++		goto send_resp;
++	}
++
++	resp = (struct ibp_query_srq_resp *) msg->data;
++	len += sizeof(*resp);
++
++	resp->attr.max_wr    = attr.max_wr;
++	resp->attr.max_sge   = attr.max_sge;
++	resp->attr.srq_limit = attr.srq_limit;
++
++send_resp:
++	IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++	return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_destroy_srq(struct ibp_client *client,
++			       struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_queued_response_msg	*msg;
++	struct ibp_destroy_srq_cmd	*cmd;
++	struct ib_uobject		*uobj;
++	struct ib_srq			*srq;
++	size_t				len;
++	int				ret;
++
++	print_trace("in\n");
++
++	device	= (struct ibp_device *) hdr->device;
++	cmd	= (struct ibp_destroy_srq_cmd *) hdr;
++	srq	= (struct ib_srq *) cmd->srq;
++	msg	= (struct ibp_queued_response_msg *) tx_buf;
++	len	= sizeof(*msg);
++
++	uobj = srq->uobject;
++
++	ret = ib_destroy_srq(srq);
++	if (unlikely(ret == -EBUSY)) {
++		msleep(100);
++		ret = ib_destroy_srq(srq);
++	}
++	if (ret) {
++		print_err("ib_destroy_srq returned %d\n", ret);
++		goto send_resp;
++	}
++
++	ibp_destroy_uobj(uobj);
++
++send_resp:
++	IBP_INIT_RESP(device, msg, len, QUEUED_RESPONSE, hdr->request, ret);
++	return ibp_queue_response(client, msg);
++}
++
++static void ibp_ibqp_event(struct ib_event *ibevent, void *qp_context)
++{
++	struct ibp_ucontext		*ucontext;
++	struct ibp_client		*client;
++	struct ibp_event		*event;
++	struct ib_uobject		*uobj;
++
++	event = kmalloc(sizeof(*event), GFP_ATOMIC);
++	if (!event) {
++		print_err("kalloc failed\n");
++		return;
++	}
++
++	uobj = ibevent->element.qp->uobject;
++	ucontext = (struct ibp_ucontext *) uobj->user_handle;
++	client = ucontext->client;
++
++	event->client  = client;
++	event->context = (uintptr_t) qp_context;
++	event->type    = ibevent->event;
++	event->ibdev   = ucontext->ibdev;
++
++	INIT_WORK(&event->work, ibp_async_event);
++	queue_work(client->workqueue, &event->work);
++}
++
++static int ibp_cmd_create_qp(struct ibp_client *client,
++			     struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_verb_response_msg	*msg;
++	struct ibp_create_qp_cmd	*cmd;
++	struct ibp_create_qp_resp	*resp;
++	struct ibp_ucontext		*ucontext;
++	struct ib_uobject		*uobj;
++	struct ib_pd			*pd;
++	struct ibp_qp			*qp;
++	struct ib_qp_init_attr		init_attr;
++	struct ib_udata			udata;
++	size_t				len;
++	size_t				outlen;
++	int				ret = 0;
++
++	print_trace("in\n");
++
++	device	= (struct ibp_device *) hdr->device;
++	cmd	= (struct ibp_create_qp_cmd *) hdr;
++	pd	= (struct ib_pd *) cmd->pd;
++	msg	= (struct ibp_verb_response_msg *) tx_buf;
++	resp	= (struct ibp_create_qp_resp *) msg->data;
++	len	= hdr->length - sizeof(*cmd);
++	outlen	= MAX_MSG_SIZE - sizeof(*msg) - sizeof(*resp);
++
++	INIT_UDATA(&udata, cmd->data, resp->data, len, outlen);
++
++	len = sizeof(*msg);
++
++	qp = kzalloc(sizeof *qp, GFP_KERNEL);
++	if (!qp) {
++		print_err("kzalloc failed\n");
++		ret = -ENOMEM;
++		goto send_resp;
++	}
++	INIT_LIST_HEAD(&qp->mcast);
++
++	ucontext = (struct ibp_ucontext *) pd->uobject->user_handle;
++
++	uobj = ibp_create_uobj(ucontext);
++	if (IS_ERR(uobj)) {
++		ret = PTR_ERR(uobj);
++		print_err("ibp_create_uobj returned %d\n", ret);
++		goto send_resp;
++	}
++
++	memset(&init_attr, 0, sizeof(init_attr));
++
++	init_attr.send_cq	      = (struct ib_cq *) cmd->send_cq;
++	init_attr.recv_cq	      = (struct ib_cq *) cmd->recv_cq;
++	init_attr.srq		      = (struct ib_srq *) cmd->srq;
++	init_attr.xrcd		      = (struct ib_xrcd *) cmd->xrc_domain;
++	init_attr.cap.max_send_wr     = cmd->cap.max_send_wr;
++	init_attr.cap.max_recv_wr     = cmd->cap.max_recv_wr;
++	init_attr.cap.max_send_sge    = cmd->cap.max_send_sge;
++	init_attr.cap.max_recv_sge    = cmd->cap.max_recv_sge;
++	init_attr.cap.max_inline_data = cmd->cap.max_inline_data;
++	init_attr.sq_sig_type	      = cmd->sq_sig_type;
++	init_attr.qp_type	      = cmd->qp_type;
++	init_attr.create_flags	      = cmd->create_flags;
++	init_attr.port_num	      = cmd->port_num;
++
++	qp->ibqp = device->ib_dev->create_qp(pd, &init_attr, &udata);
++	if (IS_ERR(qp->ibqp)) {
++		ret = PTR_ERR(qp->ibqp);
++		print_err("ib_create_qp returned %d\n", ret);
++		/*
++		 * Clear uobj's user_handle as destroy_uobj tries to list_del
++		 * uobj from the list and uobj has NOT been added yet
++		 */
++		uobj->user_handle = 0;
++		ibp_destroy_uobj(uobj);
++		goto send_resp;
++	}
++
++	qp->ibqp->device	= device->ib_dev;
++	qp->ibqp->pd		= pd;
++	qp->ibqp->send_cq	= init_attr.send_cq;
++	qp->ibqp->recv_cq	= init_attr.recv_cq;
++	qp->ibqp->srq		= init_attr.srq;
++	qp->ibqp->event_handler = ibp_ibqp_event;
++	qp->ibqp->qp_context	= (void *) cmd->qp_context;
++	qp->ibqp->qp_type	= init_attr.qp_type;
++
++	if (qp->ibqp->qp_type == IB_QPT_XRC_TGT) {
++		qp->ibqp->xrcd = init_attr.xrcd;
++		atomic_inc(&qp->ibqp->xrcd->usecnt);
++	} else {
++		qp->ibqp->xrcd = NULL;
++		qp->ibqp->real_qp = qp->ibqp;
++	}
++	atomic_set(&qp->ibqp->usecnt, 0);
++
++	atomic_inc(&pd->usecnt);
++	atomic_inc(&init_attr.send_cq->usecnt);
++	atomic_inc(&init_attr.recv_cq->usecnt);
++
++	if (init_attr.srq)
++		atomic_inc(&init_attr.srq->usecnt);
++
++	qp->ibqp->uobject  = uobj;
++	uobj->object = qp;
++
++	mutex_lock(&ucontext->mutex);
++	list_add_tail(&uobj->list, &ucontext->ibucontext->qp_list);
++	mutex_unlock(&ucontext->mutex);
++
++	len += sizeof(*resp);
++	len += outlen - udata.outlen;	/* add driver private data */
++
++	resp->qp		  = (uintptr_t) qp;
++	resp->qpn		  = qp->ibqp->qp_num;
++	resp->cap.max_send_wr	  = init_attr.cap.max_send_wr;
++	resp->cap.max_recv_wr	  = init_attr.cap.max_recv_wr;
++	resp->cap.max_send_sge	  = init_attr.cap.max_send_sge;
++	resp->cap.max_recv_sge	  = init_attr.cap.max_recv_sge;
++	resp->cap.max_inline_data = init_attr.cap.max_inline_data;
++
++send_resp:
++	if (ret)
++		kfree(qp);
++
++	IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++	return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_modify_qp(struct ibp_client *client,
++			     struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_verb_response_msg	*msg;
++	struct ibp_modify_qp_cmd	*cmd;
++	struct ibp_modify_qp_resp	*resp;
++	struct ibp_qp			*qp;
++	struct ib_qp_attr		attr;
++	struct ib_udata			udata;
++	size_t				len;
++	size_t				outlen;
++	int				ret;
++
++	print_trace("in\n");
++
++	device	= (struct ibp_device *) hdr->device;
++	cmd	= (struct ibp_modify_qp_cmd *) hdr;
++	qp	= (struct ibp_qp *) cmd->qp;
++	msg	= (struct ibp_verb_response_msg *) tx_buf;
++	resp	= (struct ibp_modify_qp_resp *) msg->data;
++	len	= hdr->length - sizeof(*cmd);
++	outlen	= MAX_MSG_SIZE - sizeof(*msg) - sizeof(*resp);
++
++	INIT_UDATA(&udata, cmd->data, resp->data, len, outlen);
++
++	len = sizeof(*msg);
++
++	memset(&attr, 0, sizeof(attr));
++
++	attr.qp_state			= cmd->qp_state;
++	attr.cur_qp_state		= cmd->cur_qp_state;
++	attr.path_mtu			= cmd->path_mtu;
++	attr.path_mig_state		= cmd->path_mig_state;
++	attr.qkey			= cmd->qkey;
++	attr.rq_psn			= cmd->rq_psn;
++	attr.sq_psn			= cmd->sq_psn;
++	attr.dest_qp_num		= cmd->dest_qp_num;
++	attr.qp_access_flags		= cmd->qp_access_flags;
++	attr.cap.max_send_wr		= cmd->cap.max_send_wr;
++	attr.cap.max_recv_wr		= cmd->cap.max_recv_wr;
++	attr.cap.max_send_sge		= cmd->cap.max_send_sge;
++	attr.cap.max_recv_sge		= cmd->cap.max_recv_sge;
++	attr.cap.max_inline_data	= cmd->cap.max_inline_data;
++	attr.ah_attr.grh.dgid.global.subnet_prefix =
++			cmd->ah.grh.dgid_subnet_prefix;
++	attr.ah_attr.grh.dgid.global.interface_id  =
++			cmd->ah.grh.dgid_interface_id;
++	attr.ah_attr.grh.flow_label	= cmd->ah.grh.flow_label;
++	attr.ah_attr.grh.sgid_index	= cmd->ah.grh.sgid_index;
++	attr.ah_attr.grh.hop_limit	= cmd->ah.grh.hop_limit;
++	attr.ah_attr.grh.traffic_class  = cmd->ah.grh.traffic_class;
++	attr.ah_attr.dlid		= cmd->ah.dlid;
++	attr.ah_attr.sl			= cmd->ah.sl;
++	attr.ah_attr.src_path_bits	= cmd->ah.src_path_bits;
++	attr.ah_attr.static_rate	= cmd->ah.static_rate;
++	attr.ah_attr.ah_flags		= cmd->ah.ah_flags;
++	attr.ah_attr.port_num		= cmd->ah.port_num;
++	attr.alt_ah_attr.grh.dgid.global.subnet_prefix =
++			cmd->alt_ah.grh.dgid_subnet_prefix;
++	attr.alt_ah_attr.grh.dgid.global.interface_id  =
++			cmd->alt_ah.grh.dgid_interface_id;
++	attr.alt_ah_attr.grh.flow_label	= cmd->alt_ah.grh.flow_label;
++	attr.alt_ah_attr.grh.sgid_index	= cmd->alt_ah.grh.sgid_index;
++	attr.alt_ah_attr.grh.hop_limit	= cmd->alt_ah.grh.hop_limit;
++	attr.alt_ah_attr.grh.traffic_class = cmd->alt_ah.grh.traffic_class;
++	attr.alt_ah_attr.dlid		= cmd->alt_ah.dlid;
++	attr.alt_ah_attr.sl		= cmd->alt_ah.sl;
++	attr.alt_ah_attr.src_path_bits	= cmd->alt_ah.src_path_bits;
++	attr.alt_ah_attr.static_rate	= cmd->alt_ah.static_rate;
++	attr.alt_ah_attr.ah_flags	= cmd->alt_ah.ah_flags;
++	attr.alt_ah_attr.port_num	= cmd->alt_ah.port_num;
++	attr.pkey_index			= cmd->pkey_index;
++	attr.alt_pkey_index		= cmd->alt_pkey_index;
++	attr.en_sqd_async_notify	= cmd->en_sqd_async_notify;
++	attr.sq_draining		= cmd->sq_draining;
++	attr.max_rd_atomic		= cmd->max_rd_atomic;
++	attr.max_dest_rd_atomic		= cmd->max_dest_rd_atomic;
++	attr.min_rnr_timer		= cmd->min_rnr_timer;
++	attr.port_num			= cmd->port_num;
++	attr.timeout			= cmd->timeout;
++	attr.retry_cnt			= cmd->retry_cnt;
++	attr.rnr_retry			= cmd->rnr_retry;
++	attr.alt_port_num		= cmd->alt_port_num;
++	attr.alt_timeout		= cmd->alt_timeout;
++
++	ret = device->ib_dev->modify_qp(qp->ibqp, &attr, cmd->qp_attr_mask, &udata);
++	if (ret) {
++		print_err("ib_modify_qp returned %d\n", ret);
++		goto send_resp;
++	}
++
++	len += sizeof(*resp);
++	len += outlen - udata.outlen;	/* add driver private data */
++
++	resp->cap.max_send_wr	  = attr.cap.max_send_wr;
++	resp->cap.max_recv_wr	  = attr.cap.max_recv_wr;
++	resp->cap.max_send_sge	  = attr.cap.max_send_sge;
++	resp->cap.max_recv_sge	  = attr.cap.max_recv_sge;
++	resp->cap.max_inline_data = attr.cap.max_inline_data;
++
++send_resp:
++	IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++	return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_query_qp(struct ibp_client *client,
++			    struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_verb_response_msg	*msg;
++	struct ibp_query_qp_cmd		*cmd;
++	struct ibp_query_qp_resp	*resp;
++	struct ibp_qp			*qp;
++	struct ib_qp_attr		qp_attr;
++	struct ib_qp_init_attr		qp_init_attr;
++	size_t				len;
++	int				ret;
++
++	print_trace("in\n");
++
++	device	= (struct ibp_device *) hdr->device;
++	cmd	= (struct ibp_query_qp_cmd *) hdr;
++	qp	= (struct ibp_qp *) cmd->qp;
++	msg	= (struct ibp_verb_response_msg *) tx_buf;
++	len	= sizeof(*msg);
++
++	ret = ib_query_qp(qp->ibqp, &qp_attr, cmd->qp_attr_mask, &qp_init_attr);
++	if (ret) {
++		print_err("ib_query_qp returned %d\n", ret);
++		goto send_resp;
++	}
++
++	resp = (struct ibp_query_qp_resp *) msg->data;
++	len += sizeof(*resp);
++
++	resp->qp_state	      = qp_attr.qp_state;
++	resp->cur_qp_state    = qp_attr.cur_qp_state;
++	resp->path_mtu	      = qp_attr.path_mtu;
++	resp->path_mig_state  = qp_attr.path_mig_state;
++	resp->qkey	      = qp_attr.qkey;
++	resp->rq_psn	      = qp_attr.rq_psn;
++	resp->sq_psn	      = qp_attr.sq_psn;
++	resp->dest_qp_num     = qp_attr.dest_qp_num;
++	resp->qp_access_flags = qp_attr.qp_access_flags;
++
++	resp->init_cap.max_send_wr     = qp_init_attr.cap.max_send_wr;
++	resp->init_cap.max_recv_wr     = qp_init_attr.cap.max_recv_wr;
++	resp->init_cap.max_send_sge    = qp_init_attr.cap.max_send_sge;
++	resp->init_cap.max_recv_sge    = qp_init_attr.cap.max_recv_sge;
++	resp->init_cap.max_inline_data = qp_init_attr.cap.max_inline_data;
++	resp->init_create_flags	       = qp_init_attr.create_flags;
++	resp->init_sq_sig_type	       = qp_init_attr.sq_sig_type;
++
++	resp->cap.max_send_wr	  = qp_attr.cap.max_send_wr;
++	resp->cap.max_recv_wr	  = qp_attr.cap.max_recv_wr;
++	resp->cap.max_send_sge	  = qp_attr.cap.max_send_sge;
++	resp->cap.max_recv_sge	  = qp_attr.cap.max_recv_sge;
++	resp->cap.max_inline_data = qp_attr.cap.max_inline_data;
++
++	resp->ah.grh.dgid_subnet_prefix	=
++			qp_attr.ah_attr.grh.dgid.global.subnet_prefix;
++	resp->ah.grh.dgid_interface_id	=
++			qp_attr.ah_attr.grh.dgid.global.interface_id;
++	resp->ah.grh.flow_label	   = qp_attr.ah_attr.grh.flow_label;
++	resp->ah.grh.sgid_index	   = qp_attr.ah_attr.grh.sgid_index;
++	resp->ah.grh.hop_limit	   = qp_attr.ah_attr.grh.hop_limit;
++	resp->ah.grh.traffic_class = qp_attr.ah_attr.grh.traffic_class;
++	resp->ah.dlid		   = qp_attr.ah_attr.dlid;
++	resp->ah.sl		   = qp_attr.ah_attr.sl;
++	resp->ah.src_path_bits	   = qp_attr.ah_attr.src_path_bits;
++	resp->ah.static_rate	   = qp_attr.ah_attr.static_rate;
++	resp->ah.ah_flags	   = qp_attr.ah_attr.ah_flags;
++	resp->ah.port_num	   = qp_attr.ah_attr.port_num;
++
++	resp->alt_ah.grh.dgid_subnet_prefix =
++			qp_attr.alt_ah_attr.grh.dgid.global.subnet_prefix;
++	resp->alt_ah.grh.dgid_interface_id  =
++			qp_attr.alt_ah_attr.grh.dgid.global.interface_id;
++	resp->alt_ah.grh.flow_label    = qp_attr.alt_ah_attr.grh.flow_label;
++	resp->alt_ah.grh.sgid_index    = qp_attr.alt_ah_attr.grh.sgid_index;
++	resp->alt_ah.grh.hop_limit     = qp_attr.alt_ah_attr.grh.hop_limit;
++	resp->alt_ah.grh.traffic_class = qp_attr.alt_ah_attr.grh.traffic_class;
++	resp->alt_ah.dlid	       = qp_attr.alt_ah_attr.dlid;
++	resp->alt_ah.sl		       = qp_attr.alt_ah_attr.sl;
++	resp->alt_ah.src_path_bits     = qp_attr.alt_ah_attr.src_path_bits;
++	resp->alt_ah.static_rate       = qp_attr.alt_ah_attr.static_rate;
++	resp->alt_ah.ah_flags	       = qp_attr.alt_ah_attr.ah_flags;
++	resp->alt_ah.port_num	       = qp_attr.alt_ah_attr.port_num;
++
++	resp->pkey_index	  = qp_attr.pkey_index;
++	resp->alt_pkey_index	  = qp_attr.alt_pkey_index;
++	resp->en_sqd_async_notify = qp_attr.en_sqd_async_notify;
++	resp->sq_draining	  = qp_attr.sq_draining;
++	resp->max_rd_atomic	  = qp_attr.max_rd_atomic;
++	resp->max_dest_rd_atomic  = qp_attr.max_dest_rd_atomic;
++	resp->min_rnr_timer	  = qp_attr.min_rnr_timer;
++	resp->port_num		  = qp_attr.port_num;
++	resp->timeout		  = qp_attr.timeout;
++	resp->retry_cnt		  = qp_attr.retry_cnt;
++	resp->rnr_retry		  = qp_attr.rnr_retry;
++	resp->alt_port_num	  = qp_attr.alt_port_num;
++	resp->alt_timeout	  = qp_attr.alt_timeout;
++
++send_resp:
++	IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++	return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_destroy_qp(struct ibp_client *client,
++			      struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_queued_response_msg	*msg;
++	struct ibp_destroy_qp_cmd	*cmd;
++	struct ib_uobject		*uobj;
++	struct ibp_qp			*qp;
++	size_t				len;
++	int				ret;
++
++	print_trace("in\n");
++
++	device	= (struct ibp_device *) hdr->device;
++	cmd	= (struct ibp_destroy_qp_cmd *) hdr;
++	qp	= (struct ibp_qp *) cmd->qp;
++	msg	= (struct ibp_queued_response_msg *) tx_buf;
++	len	= sizeof(*msg);
++
++	uobj = qp->ibqp->uobject;
++
++	ret = ib_destroy_qp(qp->ibqp);
++	if (ret) {
++		print_err("ib_destroy_qp returned %d\n", ret);
++		goto send_resp;
++	}
++
++	ibp_destroy_uobj(uobj);
++
++	kfree(qp);
++
++send_resp:
++	IBP_INIT_RESP(device, msg, len, QUEUED_RESPONSE, hdr->request, ret);
++	return ibp_queue_response(client, msg);
++}
++
++static void ibp_ibcq_event(struct ib_event *ibevent, void *cq_context)
++{
++	struct ibp_ucontext		*ucontext;
++	struct ibp_client		*client;
++	struct ibp_event		*event;
++	struct ib_uobject		*uobj;
++
++	event = kmalloc(sizeof(*event), GFP_ATOMIC);
++	if (!event) {
++		print_err("kalloc failed\n");
++		return;
++	}
++
++	uobj = (struct ib_uobject *) ibevent->element.cq->uobject;
++	ucontext = (void *) uobj->user_handle;
++	client = ucontext->client;
++
++	event->client  = client;
++	event->context = (uintptr_t) cq_context;
++	event->type    = ibevent->event;
++	event->ibdev   = ucontext->ibdev;
++
++	INIT_WORK(&event->work, ibp_async_event);
++	queue_work(client->workqueue, &event->work);
++}
++
++static void ibp_cq_comp(struct work_struct *work)
++{
++	struct ibp_comp			*comp;
++	struct ibp_cq_comp_msg		msg;
++
++	comp = container_of(work, struct ibp_comp, work);
++
++	IBP_INIT_MSG(NULL, &msg, sizeof(msg), CQ_COMP);
++
++	msg.data.cq_context = (uintptr_t) comp->cq_context;
++
++	ibp_send(comp->client->ep, &msg, sizeof(msg));
++
++	ibp_add_to_stack(c_stack, (void *) comp);
++}
++
++static void ibp_ibcq_comp(struct ib_cq *ibcq, void *cq_context)
++{
++	struct ibp_ucontext		*ucontext;
++	struct ibp_client		*client;
++	struct ibp_comp			*comp;
++
++	ucontext = (struct ibp_ucontext *) ibcq->uobject->user_handle;
++
++	if (ucontext->ibucontext->closing) {
++		print_dbg("ignoring cq completion, connection closing\n");
++		return;
++	}
++
++	comp = (struct ibp_comp *)
++		ibp_pull_from_stack(c_stack, sizeof(*comp), GFP_ATOMIC);
++	if (!comp) {
++		print_err("kalloc failed\n");
++		return;
++	}
++
++	client = ucontext->client;
++
++	comp->client	 = client;
++	comp->cq_context = cq_context;
++
++	INIT_WORK(&comp->work, ibp_cq_comp);
++	queue_work(client->workqueue, &comp->work);
++}
++
++static int ibp_cmd_create_cq(struct ibp_client *client,
++			     struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_verb_response_msg	*msg;
++	struct ibp_create_cq_cmd	*cmd;
++	struct ibp_create_cq_resp	*resp;
++	struct ibp_ucontext		*ucontext;
++	struct ib_uobject		*uobj;
++	struct ib_udata			udata;
++	struct ib_cq			*cq;
++	size_t				len;
++	size_t				outlen;
++	int				ret = 0;
++#ifdef MOFED
++	struct ib_cq_init_attr		attr;
++#endif
++
++	print_trace("in\n");
++
++	device = (struct ibp_device *) hdr->device;
++	cmd = (struct ibp_create_cq_cmd *) hdr;
++	ucontext = (struct ibp_ucontext *) cmd->ucontext;
++	msg = (struct ibp_verb_response_msg *) tx_buf;
++	resp = (struct ibp_create_cq_resp *) msg->data;
++	len = hdr->length - sizeof(*cmd);
++	outlen = MAX_MSG_SIZE - sizeof(*msg) - sizeof(*resp);
++
++	INIT_UDATA(&udata, cmd->data, resp->data, len, outlen);
++
++	len = sizeof(*msg);
++
++	uobj = ibp_create_uobj(ucontext);
++	if (IS_ERR(uobj)) {
++		ret = PTR_ERR(uobj);
++		print_err("ibp_create_uobj returned %d\n", ret);
++		goto send_resp;
++	}
++
++#ifdef  MOFED
++	memset(&attr, 0, sizeof(attr));
++	attr.cqe = cmd->cqe;
++	attr.comp_vector = cmd->vector;
++
++	cq = device->ib_dev->create_cq(device->ib_dev, &attr,
++				       ucontext->ibucontext, &udata);
++#else
++	cq = device->ib_dev->create_cq(device->ib_dev, (int) cmd->cqe,
++				      (int) cmd->vector,
++				       ucontext->ibucontext, &udata);
++#endif
++	if (IS_ERR(cq)) {
++		ret = PTR_ERR(cq);
++		print_err("ib_create_cq returned %d\n", ret);
++		/*
++		 * Clear uobj's user_handle as destroy_uobj tries to list_del
++		 * uobj from the list and uobj has NOT been added yet
++		 */
++		uobj->user_handle = 0;
++		ibp_destroy_uobj(uobj);
++		goto send_resp;
++	}
++
++	cq->device	  = device->ib_dev;
++	cq->event_handler = ibp_ibcq_event;
++	cq->comp_handler  = ibp_ibcq_comp;
++	cq->cq_context    = (void *) cmd->cq_context;
++	atomic_set(&cq->usecnt, 0);
++
++	cq->uobject  = uobj;
++	uobj->object = cq;
++
++	mutex_lock(&ucontext->mutex);
++	list_add_tail(&uobj->list, &ucontext->ibucontext->cq_list);
++	mutex_unlock(&ucontext->mutex);
++
++	len += sizeof(*resp);
++	len += outlen - udata.outlen;	/* add driver private data */
++
++	resp->cq  = (uintptr_t)cq;
++	resp->cqe = cq->cqe;
++
++send_resp:
++	IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++	return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_destroy_cq(struct ibp_client *client,
++			      struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_queued_response_msg	*msg;
++	struct ibp_destroy_cq_cmd	*cmd;
++	struct ib_uobject		*uobj;
++	struct ib_cq			*cq;
++	size_t				len;
++	int				ret;
++
++	print_trace("in\n");
++
++	device	= (struct ibp_device *) hdr->device;
++	cmd	= (struct ibp_destroy_cq_cmd *) hdr;
++	cq	= (struct ib_cq *) cmd->cq;
++	msg	= (struct ibp_queued_response_msg *) tx_buf;
++	len	= sizeof(*msg);
++
++	uobj = cq->uobject;
++
++	ret = ib_destroy_cq(cq);
++	if (unlikely(ret == -EBUSY)) {
++		msleep(100);
++		ret = ib_destroy_cq(cq);
++	}
++	if (ret) {
++		print_err("ib_destroy_cq returned %d\n", ret);
++		goto send_resp;
++	}
++
++	ibp_destroy_uobj(uobj);
++
++send_resp:
++	IBP_INIT_RESP(device, msg, len, QUEUED_RESPONSE, hdr->request, ret);
++	return ibp_queue_response(client, msg);
++}
++
++static int ibp_cmd_resize_cq(struct ibp_client *client,
++			     struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_verb_response_msg	*msg;
++	struct ibp_resize_cq_cmd	*cmd;
++	struct ibp_resize_cq_resp	*resp;
++	struct ib_cq			*cq;
++	struct ib_udata			udata;
++	size_t				len;
++	size_t				outlen;
++	int				ret;
++
++	device	= (struct ibp_device *) hdr->device;
++	cmd	= (struct ibp_resize_cq_cmd *) hdr;
++	cq	= (struct ib_cq *) cmd->cq;
++	msg	= (struct ibp_verb_response_msg *) tx_buf;
++	resp	= (struct ibp_resize_cq_resp *) msg->data;
++	len	= hdr->length - sizeof(*cmd);
++	outlen	= MAX_MSG_SIZE - sizeof(*msg) - sizeof(*resp);
++
++	INIT_UDATA(&udata, cmd->data, resp->data, len, outlen);
++
++	len = sizeof(*msg);
++
++	ret = device->ib_dev->resize_cq ?
++		device->ib_dev->resize_cq(cq, (int) cmd->cqe, &udata) : -ENOSYS;
++	if (ret) {
++		print_err("ib_resize_cq returned %d\n", ret);
++		goto send_resp;
++	}
++
++	len += sizeof(*resp);
++	len += outlen - udata.outlen;	/* add driver private data */
++
++	resp->cqe = cq->cqe;
++
++send_resp:
++	IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++	return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_reg_user_mr(struct ibp_client *client,
++			       struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_verb_response_msg	*msg;
++	struct ibp_reg_user_mr_cmd	*cmd;
++	struct ibp_reg_user_mr_resp	*resp;
++	struct ibp_mr			*mr;
++	struct ibp_ucontext		*ucontext;
++	struct ib_uobject		*uobj;
++	struct ib_udata			udata;
++	struct ib_pd			*pd;
++	size_t				len;
++	size_t				outlen;
++	int				ret = 0;
++
++	print_trace("in\n");
++
++	device	= (struct ibp_device *) hdr->device;
++	cmd	= (struct ibp_reg_user_mr_cmd *) hdr;
++	msg	= (struct ibp_verb_response_msg *) tx_buf;
++	resp	= (struct ibp_reg_user_mr_resp *) msg->data;
++	len	= hdr->length - sizeof(*cmd);
++	outlen	= MAX_MSG_SIZE - sizeof(*msg) - sizeof(*resp);
++
++	INIT_UDATA(&udata, cmd->data, resp->data, len, outlen);
++
++	len = sizeof(*msg);
++
++	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
++	if (!mr) {
++		print_err("kzalloc failed\n");
++		ret = -ENOMEM;
++		goto send_resp;
++	}
++
++	pd = (struct ib_pd *) cmd->pd;
++
++	ucontext = (struct ibp_ucontext *) pd->uobject->user_handle;
++
++	mr->reg = ibp_reg_buf(ucontext, cmd->hca_va, cmd->scif_addr,
++			      cmd->length, cmd->offset, cmd->access);
++	if (IS_ERR(mr->reg)) {
++		ret = PTR_ERR(mr->reg);
++		print_err("ibp_reg_buf returned %d\n", ret);
++		goto send_resp;
++	}
++
++	uobj = ibp_create_uobj(ucontext);
++	if (IS_ERR(uobj)) {
++		ret = PTR_ERR(uobj);
++		print_err("ibp_create_uobj returned %d\n", ret);
++		kref_put(&mr->reg->ref, ibp_dereg_buf);
++		goto send_resp;
++	}
++
++#ifdef MOFED
++	mr->ibmr = pd->device->reg_user_mr(pd, cmd->hca_va, cmd->length,
++					   cmd->hca_va, cmd->access, &udata, 0);
++#else
++	mr->ibmr = pd->device->reg_user_mr(pd, cmd->hca_va, cmd->length,
++					   cmd->hca_va, cmd->access, &udata);
++#endif
++	if (IS_ERR(mr->ibmr)) {
++		ret = PTR_ERR(mr->ibmr);
++		print_err("ib_reg_user_mr returned %d\n", ret);
++		kref_put(&mr->reg->ref, ibp_dereg_buf);
++		ibp_destroy_uobj(uobj);
++		goto  send_resp;
++	}
++
++	mr->ibmr->pd     = pd;
++	mr->ibmr->device = pd->device;
++	atomic_inc(&pd->usecnt);
++	atomic_set(&mr->ibmr->usecnt, 0);
++
++	mr->ibmr->uobject = uobj;
++	uobj->object = mr;
++
++	mutex_lock(&ucontext->mutex);
++	list_add_tail(&uobj->list, &ucontext->ibucontext->mr_list);
++	mutex_unlock(&ucontext->mutex);
++
++	len += sizeof(*resp);
++	len += outlen - udata.outlen;	/* add driver private data */
++
++	resp->mr   = (uintptr_t) mr;
++	resp->lkey = mr->ibmr->lkey;
++	resp->rkey = mr->ibmr->rkey;
++
++send_resp:
++	if (ret)
++		kfree(mr);
++
++	IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++	return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_dereg_mr(struct ibp_client *client,
++			    struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_verb_response_msg	*msg;
++	struct ibp_dereg_mr_cmd		*cmd;
++	struct ibp_mr			*mr;
++	struct ib_uobject		*uobj;
++	size_t				len;
++	int				ret;
++
++	device	= (struct ibp_device *) hdr->device;
++	cmd	= (struct ibp_dereg_mr_cmd *) hdr;
++	mr	= (struct ibp_mr *) cmd->mr;
++	msg	= (struct ibp_verb_response_msg *) tx_buf;
++	len	= sizeof(*msg);
++
++	if (IS_NULL_OR_ERR(mr)) {
++		print_err("Invalid mr %p\n", mr);
++		ret = -EINVAL;
++		goto send_resp;
++	}
++
++	uobj = mr->ibmr->uobject;
++
++	ret = ib_dereg_mr(mr->ibmr);
++	if (unlikely(ret == -EBUSY)) {
++		msleep(100);
++		ret = ib_dereg_mr(mr->ibmr);
++	}
++	if (ret) {
++		print_err("ib_dereg_mr returned %d\n", ret);
++		goto send_resp;
++	}
++
++	ibp_destroy_uobj(uobj);
++
++	if (mr->reg)
++		kref_put(&mr->reg->ref, ibp_dereg_buf);
++
++	kfree(mr);
++
++send_resp:
++	IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++	return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_attach_mcast(struct ibp_client *client,
++				struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_verb_response_msg	*msg;
++	struct ibp_attach_mcast_cmd	*cmd;
++	struct ibp_mcast_entry		*mcast;
++	struct ibp_ucontext		*ucontext;
++	struct ibp_qp			*qp;
++	union ib_gid			gid;
++	size_t				len;
++	int				ret;
++
++	print_trace("in\n");
++
++	device	= (struct ibp_device *) hdr->device;
++	cmd	= (struct ibp_attach_mcast_cmd *) hdr;
++	qp	= (struct ibp_qp *) cmd->qp;
++	msg	= (struct ibp_verb_response_msg *) tx_buf;
++	len	= sizeof(*msg);
++
++	ucontext = (struct ibp_ucontext *) qp->ibqp->uobject->user_handle;
++
++	mcast = kzalloc(sizeof *mcast, GFP_KERNEL);
++	if (!mcast) {
++		print_err("kzalloc failed\n");
++		ret = -ENOMEM;
++		goto send_resp;
++	}
++
++	gid.global.subnet_prefix = cmd->subnet_prefix;
++	gid.global.interface_id  = cmd->interface_id;
++
++	ret = ib_attach_mcast(qp->ibqp, &gid, cmd->lid);
++	if (ret) {
++		print_err("ib_attach_mcast returned %d\n", ret);
++		kfree(mcast);
++		goto send_resp;
++	}
++
++	mcast->lid = cmd->lid;
++	mcast->gid.global.subnet_prefix = cmd->subnet_prefix;
++	mcast->gid.global.interface_id  = cmd->interface_id;
++
++	mutex_lock(&ucontext->mutex);
++	list_add_tail(&mcast->list, &qp->mcast);
++	mutex_unlock(&ucontext->mutex);
++
++send_resp:
++	IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++	return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_detach_mcast(struct ibp_client *client,
++				struct ibp_msg_header *hdr, void *tx_buf)
++{
++	struct ibp_device		*device;
++	struct ibp_verb_response_msg	*msg;
++	struct ibp_detach_mcast_cmd	*cmd;
++	struct ibp_mcast_entry		*mcast;
++	struct ibp_ucontext		*ucontext;
++	struct ibp_qp			*qp;
++	union ib_gid			gid;
++	size_t				len;
++	int				ret;
++
++	print_trace("in\n");
++
++	device	= (struct ibp_device *) hdr->device;
++	cmd	= (struct ibp_detach_mcast_cmd *) hdr;
++	qp	= (struct ibp_qp *) cmd->qp;
++	msg	= (struct ibp_verb_response_msg *) tx_buf;
++	len	= sizeof(*msg);
++
++	ucontext = (struct ibp_ucontext *) qp->ibqp->uobject->user_handle;
++
++	gid.global.subnet_prefix = cmd->subnet_prefix;
++	gid.global.interface_id  = cmd->interface_id;
++
++	ret = ib_detach_mcast(qp->ibqp, &gid, cmd->lid);
++	if (ret) {
++		print_err("ib_detach_mcast returned %d\n", ret);
++		goto send_resp;
++	}
++
++	mutex_lock(&ucontext->mutex);
++	list_for_each_entry(mcast, &qp->mcast, list)
++		if (cmd->lid == mcast->lid &&
++		    !memcmp(&gid , mcast->gid.raw, sizeof mcast->gid.raw)) {
++			list_del(&mcast->list);
++			kfree(mcast);
++			break;
++		}
++	mutex_unlock(&ucontext->mutex);
++
++send_resp:
++	IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++	return ibp_send(client->ep, msg, len);
++}
++
++static void ibp_detach_mcast(struct ibp_qp *qp)
++{
++	struct ibp_mcast_entry		*mcast, *tmp;
++
++	list_for_each_entry_safe(mcast, tmp, &qp->mcast, list) {
++		ib_detach_mcast(qp->ibqp, &mcast->gid, mcast->lid);
++		list_del(&mcast->list);
++		kfree(mcast);
++	}
++}
++
++static void ibp_destroy_ucontext(struct ibp_ucontext *ucontext)
++{
++	struct ib_ucontext		*ibuctx;
++	struct ib_uobject		*uobj;
++	struct ib_uobject		*tmp;
++	struct ibp_mmap			*mmap;
++	struct ibp_mmap			*tmp_mmap;
++
++	ibuctx = ucontext->ibucontext;
++	if (!ibuctx)
++		goto out;
++
++	ibuctx->closing = 1;
++
++	synchronize_sched();
++
++	down_write(&list_rwsem);
++
++	list_for_each_entry_safe(uobj, tmp, &ibuctx->ah_list, list) {
++		struct ib_ah *ibah = uobj->object;
++		ib_destroy_ah(ibah);
++		ibp_destroy_uobj(uobj);
++	}
++
++	list_for_each_entry_safe(uobj, tmp, &ibuctx->qp_list, list) {
++		struct ibp_qp *qp = uobj->object;
++		ibp_detach_mcast(qp);
++		ib_destroy_qp(qp->ibqp);
++		ibp_destroy_uobj(uobj);
++		kfree(qp);
++	}
++
++	list_for_each_entry_safe(uobj, tmp, &ibuctx->cq_list, list) {
++		struct ib_cq *ibcq = uobj->object;
++		ib_destroy_cq(ibcq);
++		ibp_destroy_uobj(uobj);
++	}
++
++	list_for_each_entry_safe(uobj, tmp, &ibuctx->srq_list, list) {
++		struct ib_srq *ibsrq = uobj->object;
++		ib_destroy_srq(ibsrq);
++		ibp_destroy_uobj(uobj);
++	}
++
++	list_for_each_entry_safe(uobj, tmp, &ibuctx->mr_list, list) {
++		struct ibp_mr *mr = uobj->object;
++		ib_dereg_mr(mr->ibmr);
++		ibp_destroy_uobj(uobj);
++		kref_put(&mr->reg->ref, ibp_dereg_buf);
++		kfree(mr);
++	}
++
++	list_for_each_entry_safe(uobj, tmp, &ibuctx->xrcd_list, list) {
++		struct ib_xrcd *ibxrcd = uobj->object;
++		ib_dealloc_xrcd(ibxrcd);
++		ibp_destroy_uobj(uobj);
++	}
++
++	list_for_each_entry_safe(uobj, tmp, &ibuctx->pd_list, list) {
++		struct ib_pd *ibpd = uobj->object;
++		ib_dealloc_pd(ibpd);
++		ibp_destroy_uobj(uobj);
++	}
++
++	up_write(&list_rwsem);
++
++	synchronize_sched();
++
++	ibuctx->device->dealloc_ucontext(ibuctx);
++out:
++	if (ucontext->ibdev)
++		ib_unregister_event_handler(&ucontext->event_handler);
++
++	list_for_each_entry_safe(mmap, tmp_mmap, &ucontext->mmap_list, list) {
++		ibp_scif_unregister(ucontext->client, mmap);
++
++		if (!IS_NULL_OR_ERR(current) && !IS_NULL_OR_ERR(current->mm)) {
++			MUNMAP(current->mm, mmap->vaddr, mmap->len);
++		}
++		kfree(mmap);
++	}
++
++	while (!RB_EMPTY_ROOT(&ucontext->reg_tree)) {
++		struct ibp_reg *reg;
++		reg = rb_entry(ucontext->reg_tree.rb_node, struct ibp_reg,
++			       node);
++		kref_put(&reg->ref, ibp_dereg_buf);
++	}
++
++	ibp_put_device(ucontext->device);
++	fput(ucontext->filp);
++	kfree(ucontext);
++}
++
++void ibp_cleanup_ucontext(struct list_head *ucontext_list)
++{
++	struct ibp_ucontext		*ucontext;
++	struct ibp_ucontext		*next;
++
++	list_for_each_entry_safe(ucontext, next, ucontext_list, list)
++		ibp_destroy_ucontext(ucontext);
++}
++
++static int (*ibp_msg_table[])(struct ibp_client *client,
++			      struct ibp_msg_header *hdr, void *tx_buf) = {
++	[IBP_VERB_GET_PROTOCOL_STATS]		= ibp_cmd_not_supported,
++	[IBP_VERB_QUERY_DEVICE]			= ibp_cmd_query_device,
++	[IBP_VERB_QUERY_PORT]			= ibp_cmd_query_port,
++	[IBP_VERB_GET_LINK_LAYER]		= ibp_cmd_not_supported,
++	[IBP_VERB_QUERY_GID]			= ibp_cmd_query_gid,
++	[IBP_VERB_QUERY_PKEY]			= ibp_cmd_query_pkey,
++	[IBP_VERB_MODIFY_DEVICE]		= ibp_cmd_not_supported,
++	[IBP_VERB_MODIFY_PORT]			= ibp_cmd_not_supported,
++	[IBP_VERB_ALLOC_UCONTEXT]		= ibp_cmd_alloc_ucontext,
++	[IBP_VERB_DEALLOC_UCONTEXT]		= ibp_cmd_dealloc_ucontext,
++	[IBP_VERB_REG_BUF]			= ibp_cmd_reg_buf,
++	[IBP_VERB_DEREG_BUF]			= ibp_cmd_dereg_buf,
++	[IBP_VERB_MMAP]				= ibp_cmd_mmap,
++	[IBP_VERB_UNMMAP]			= ibp_cmd_unmmap,
++	[IBP_VERB_ALLOC_PD]			= ibp_cmd_alloc_pd,
++	[IBP_VERB_DEALLOC_PD]			= ibp_cmd_dealloc_pd,
++	[IBP_VERB_CREATE_AH]			= ibp_cmd_create_ah,
++	[IBP_VERB_MODIFY_AH]			= ibp_cmd_not_supported,
++	[IBP_VERB_QUERY_AH]			= ibp_cmd_query_ah,
++	[IBP_VERB_DESTROY_AH]			= ibp_cmd_destroy_ah,
++	[IBP_VERB_CREATE_SRQ]			= ibp_cmd_create_srq,
++	[IBP_VERB_MODIFY_SRQ]			= ibp_cmd_modify_srq,
++	[IBP_VERB_QUERY_SRQ]			= ibp_cmd_query_srq,
++	[IBP_VERB_DESTROY_SRQ]			= ibp_cmd_destroy_srq,
++	[IBP_VERB_POST_SRQ_RECV]		= ibp_cmd_not_supported,
++	[IBP_VERB_CREATE_QP]			= ibp_cmd_create_qp,
++	[IBP_VERB_MODIFY_QP]			= ibp_cmd_modify_qp,
++	[IBP_VERB_QUERY_QP]			= ibp_cmd_query_qp,
++	[IBP_VERB_DESTROY_QP]			= ibp_cmd_destroy_qp,
++	[IBP_VERB_POST_SEND]			= ibp_cmd_not_supported,
++	[IBP_VERB_POST_RECV]			= ibp_cmd_not_supported,
++	[IBP_VERB_CREATE_CQ]			= ibp_cmd_create_cq,
++	[IBP_VERB_MODIFY_CQ]			= ibp_cmd_not_supported,
++	[IBP_VERB_DESTROY_CQ]			= ibp_cmd_destroy_cq,
++	[IBP_VERB_RESIZE_CQ]			= ibp_cmd_resize_cq,
++	[IBP_VERB_POLL_CQ]			= ibp_cmd_not_supported,
++	[IBP_VERB_PEEK_CQ]			= ibp_cmd_not_supported,
++	[IBP_VERB_REQ_NOTIFY_CQ]		= ibp_cmd_not_supported,
++	[IBP_VERB_REQ_NCOMP_NOTIF]		= ibp_cmd_not_supported,
++	[IBP_VERB_GET_DMA_MR]			= ibp_cmd_not_supported,
++	[IBP_VERB_REG_PHYS_MR]			= ibp_cmd_not_supported,
++	[IBP_VERB_REG_USER_MR]			= ibp_cmd_reg_user_mr,
++	[IBP_VERB_QUERY_MR]			= ibp_cmd_not_supported,
++	[IBP_VERB_DEREG_MR]			= ibp_cmd_dereg_mr,
++	[IBP_VERB_ALLOC_FAST_REG_MR]		= ibp_cmd_not_supported,
++	[IBP_VERB_ALLOC_FAST_REG_PAGE_LIST]	= ibp_cmd_not_supported,
++	[IBP_VERB_FREE_FAST_REG_PAGE_LIST]	= ibp_cmd_not_supported,
++	[IBP_VERB_REREG_PHYS_MR]		= ibp_cmd_not_supported,
++	[IBP_VERB_ALLOC_MW]			= ibp_cmd_not_supported,
++	[IBP_VERB_BIND_MW]			= ibp_cmd_not_supported,
++	[IBP_VERB_DEALLOC_MW]			= ibp_cmd_not_supported,
++	[IBP_VERB_ALLOC_FMR]			= ibp_cmd_not_supported,
++	[IBP_VERB_MAP_PHYS_FMR]			= ibp_cmd_not_supported,
++	[IBP_VERB_UNMAP_FMR]			= ibp_cmd_not_supported,
++	[IBP_VERB_DEALLOC_FMR]			= ibp_cmd_not_supported,
++	[IBP_VERB_ATTACH_MCAST]			= ibp_cmd_attach_mcast,
++	[IBP_VERB_DETACH_MCAST]			= ibp_cmd_detach_mcast,
++	[IBP_VERB_PROCESS_MAD]			= ibp_cmd_not_supported,
++	[IBP_VERB_ALLOC_XRCD]			= ibp_cmd_not_supported,
++	[IBP_VERB_DEALLOC_XRCD]			= ibp_cmd_not_supported,
++};
++
++int ibp_init()
++{
++	a_stack = ibp_init_stack();
++	c_stack = ibp_init_stack();
++	o_stack = ibp_init_stack();
++
++	if (!a_stack || !c_stack || !o_stack) {
++		print_err("stack allocation failed\n");
++		return -ENOMEM;
++	}
++
++	return 0;
++}
++
++void ibp_cleanup()
++{
++	ibp_clear_stack(a_stack);
++	ibp_clear_stack(c_stack);
++	ibp_clear_stack(o_stack);
++}
++
++int ibp_process_recvs(struct ibp_client *client, void *rx_buf, void *tx_buf)
++{
++	struct ibp_msg_header		*hdr;
++	int				ret;
++
++	hdr = (struct ibp_msg_header *) rx_buf;
++
++	for (;;) {
++		wait_event_interruptible(client->rx_wait_queue,
++					 !atomic_xchg(&client->rx_in_process,
++						      1));
++
++		ret = ibp_recv(client->ep, hdr, sizeof(*hdr));
++		if (ret)
++			goto err;
++
++		if (hdr->length > MAX_MSG_SIZE) {
++			print_err("message too large, len %u max %lu\n",
++				  hdr->length, MAX_MSG_SIZE);
++			ret = -EMSGSIZE;
++			goto err;
++		}
++
++		ret = ibp_recv(client->ep, hdr->data,
++			       hdr->length - sizeof(*hdr));
++		if (ret)
++			goto err;
++
++		atomic_set(&client->rx_in_process, 0);
++		wake_up_interruptible(&client->rx_wait_queue);
++
++		if ((hdr->opcode >= ARRAY_SIZE(ibp_msg_table)) ||
++		    !ibp_msg_table[hdr->opcode]) {
++			ibp_cmd_bad_request(client, hdr, tx_buf);
++			continue;
++		}
++
++		ret = ibp_msg_table[hdr->opcode](client, hdr, tx_buf);
++		if (ret)
++			goto err;
++	}
++
++	goto out;
++err:
++	atomic_set(&client->rx_in_process, 0);
++	wake_up_interruptible(&client->rx_wait_queue);
++
++out:
++	return ret;
++}
+diff -urN a6/drivers/infiniband/ibp/drv/stack.c a7/drivers/infiniband/ibp/drv/stack.c
+--- a6/drivers/infiniband/ibp/drv/stack.c	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/drv/stack.c	2015-02-23 10:01:30.292769309 -0800
+@@ -0,0 +1,102 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer in the documentation and/or other materials
++ *	  provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "common.h"
++#include "stack.h"
++
++static DEFINE_SPINLOCK(stack_lock);
++
++struct ibp_stack *ibp_init_stack(void)
++{
++	struct ibp_stack	*s;
++
++	s = kzalloc(sizeof(struct ibp_stack), GFP_KERNEL);
++	if (s)
++		s->top_pointer = &s->base[0];
++
++	return s;
++}
++
++void ibp_clear_stack(struct ibp_stack *s)
++{
++	while (s->top_pointer != s->base) {
++		s->top_pointer--;
++		kfree(*s->top_pointer);
++	}
++	kfree(s);
++}
++
++void ibp_add_to_stack(struct ibp_stack *s, void *p)
++{
++	spin_lock_irq(&stack_lock);
++
++	if (unlikely(++s->sample_cnt == STACK_GC_SAMPLE)) {
++		s->sample_cnt = 0;
++		if (unlikely(++s->gc_cnt == STACK_GC_RATE)) {
++			s->gc_cnt = 0;
++			while (s->current_count > s->high_water_mark) {
++				s->top_pointer--;
++				s->current_count--;
++				kfree(*s->top_pointer);
++			}
++		} else if (s->high_water_mark < s->current_count)
++			s->high_water_mark = s->current_count;
++	}
++
++	if (likely(s->current_count < MAX_STACK)) {
++		*s->top_pointer++ = p;
++		s->current_count++;
++	} else
++		kfree(p);
++
++	spin_unlock_irq(&stack_lock);
++}
++
++void *ibp_pull_from_stack(struct ibp_stack *s, size_t size, int gfp_mask)
++{
++	void			*p;
++	unsigned long		flag;
++
++	spin_lock_irqsave(&stack_lock, flag);
++
++	if (s->top_pointer == s->base)
++		p = kmalloc(size, gfp_mask);
++	else {
++		s->current_count--;
++		s->top_pointer--;
++		p = *s->top_pointer;
++	}
++
++	spin_unlock_irqrestore(&stack_lock, flag);
++
++	return p;
++}
+diff -urN a6/drivers/infiniband/ibp/drv/stack.h a7/drivers/infiniband/ibp/drv/stack.h
+--- a6/drivers/infiniband/ibp/drv/stack.h	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/drv/stack.h	2015-02-23 10:01:30.293769309 -0800
+@@ -0,0 +1,57 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer in the documentation and/or other materials
++ *	  provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef _IBP_STACK_H_
++#define _IBP_STACK_H_
++
++#define STACK_GC_SAMPLE		5
++#define STACK_GC_RATE		10
++#define MAX_STACK		128
++
++struct ibp_stack {
++	int			current_count;
++	int			high_water_mark;
++	int			gc_cnt;
++	int			sample_cnt;
++	void			**top_pointer;
++	void			*base[MAX_STACK+1];
++};
++
++struct ibp_stack *ibp_init_stack(void);
++
++void ibp_add_to_stack(struct ibp_stack *s, void *p);
++
++void *ibp_pull_from_stack(struct ibp_stack *s, size_t size, int gfp_mask);
++
++void ibp_clear_stack(struct ibp_stack *s);
++
++#endif /* _IBP_STACK_H_ */
+diff -urN a6/drivers/infiniband/ibp/Kconfig a7/drivers/infiniband/ibp/Kconfig
+--- a6/drivers/infiniband/ibp/Kconfig	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/Kconfig	2015-02-23 10:01:30.293769309 -0800
+@@ -0,0 +1,16 @@
++config IBP_SERVER
++	tristate "CCL Direct IB Server drivers"
++	---help---
++	  Server drivers for CCL Direct including server proxies for
++	  hw drivers, and core drivers ib_sa and ib_cm.
++	  Also includes is a kernel mode test module
++
++	  To compile this driver as a module, choose M here.
++	  If unsure, say N.
++
++config IBP_DEBUG
++	bool "CCL Direct debugging"
++	depends on IBP_SERVER
++	default y
++	---help---
++	  This option causes debug code to be compiled into the CCL Direct drivers.
+diff -urN a6/drivers/infiniband/ibp/Makefile a7/drivers/infiniband/ibp/Makefile
+--- a6/drivers/infiniband/ibp/Makefile	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/Makefile	2015-02-23 10:01:30.293769309 -0800
+@@ -0,0 +1,3 @@
++obj-$(CONFIG_IBP_SERVER)		+= drv/
++obj-$(CONFIG_IBP_SERVER)		+= cm/
++obj-$(CONFIG_IBP_SERVER)		+= sa/
+diff -urN a6/drivers/infiniband/ibp/sa/common.h a7/drivers/infiniband/ibp/sa/common.h
+--- a6/drivers/infiniband/ibp/sa/common.h	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/sa/common.h	2015-02-23 10:01:30.293769309 -0800
+@@ -0,0 +1,108 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer in the documentation and/or other materials
++ *	  provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef COMMON_H
++#define COMMON_H
++
++#include <linux/module.h>
++#include <linux/kthread.h>
++#include <linux/types.h>
++#include <linux/slab.h>
++#include <linux/poll.h>
++#include <linux/mman.h>
++#include <linux/pci.h>
++#include <linux/net.h>
++#include <rdma/ib_verbs.h>
++#include <modules/scif.h>
++
++#define DRV_DESC	"CCL Direct SA " DRV_ROLE
++#define DRV_VERSION	"1.0"
++#define DRV_BASE	"ibp_sa"
++#define PFX		DRV_BASE "_"
++#define DRV_PFX		DRV_NAME ": "
++
++#define DRV_COPYRIGHT	"Copyright (c) 2011-2013 Intel Corporation"
++#define DRV_SIGNON	DRV_DESC " v" DRV_VERSION "\n" DRV_COPYRIGHT "\n"
++
++#define MODULE_PARAM(name, var, type, value, desc)	\
++	type var = value;				\
++	module_param_named(name, var, type, 0644);	\
++	MODULE_PARM_DESC(name, desc)
++
++#ifdef IBP_DEBUG
++extern int debug_level;
++#endif
++
++enum {
++	IBP_DEBUG_NONE,
++	IBP_DEBUG_TARGETED,
++	IBP_DEBUG_VERBOSE,
++};
++
++#define _PRINTK(l, f, arg...)	\
++	printk(l DRV_PFX "%s(%d) " f, __func__, __LINE__, ##arg)
++
++#ifdef IBP_DEBUG
++#define PRINTK(dbg, l, f, arg...)				\
++	do {							\
++		if (debug_level >= dbg)				\
++			printk(l DRV_PFX "%s(%d) " f,		\
++			       __func__, __LINE__, ##arg);	\
++	} while (0)
++#else
++#define PRINTK(dbg, l, f, arg...) do { } while (0)
++#endif
++
++#define print_dbg(f, arg...) PRINTK(IBP_DEBUG_TARGETED, KERN_DEBUG, f, ##arg)
++#define print_err(f, arg...) _PRINTK(KERN_ERR, f, ##arg)
++#define print_info(f, arg...) pr_info(f, ##arg)
++
++#if 0
++#define FORCED_FUNCTION_TRACING
++#endif
++
++#ifdef FORCED_FUNCTION_TRACING
++#define print_trace(f, arg...) _PRINTK(KERN_ERR, f, ##arg)
++#else
++#define print_trace(f, arg...) PRINTK(IBP_DEBUG_VERBOSE, KERN_ERR, f, ##arg)
++#endif
++
++#ifndef IBP_SA_PORT		/* unique scif port for this service */
++#define IBP_SA_PORT		SCIF_OFED_PORT_4
++#endif
++
++#define IS_NULL_OR_ERR(p) (!(p) || IS_ERR_VALUE((unsigned long)p))
++
++int ibp_send(scif_epd_t ep, void *buf, size_t len);
++int ibp_recv(scif_epd_t ep, void *buf, size_t len);
++
++#endif /* COMMON_H */
+diff -urN a6/drivers/infiniband/ibp/sa/ibp-abi.h a7/drivers/infiniband/ibp/sa/ibp-abi.h
+--- a6/drivers/infiniband/ibp/sa/ibp-abi.h	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/sa/ibp-abi.h	2015-02-23 10:01:30.293769309 -0800
+@@ -0,0 +1,101 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above
++ *	copyright notice, this list of conditions and the following
++ *	disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *	copyright notice, this list of conditions and the following
++ *	disclaimer in the documentation and/or other materials
++ *	provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef IBP_ABI_H
++#define IBP_ABI_H
++
++#include <linux/types.h>
++#include <rdma/ib_verbs.h>
++#include <rdma/ib_sa.h>
++
++/* Increment this value if any changes break compatibility. */
++#define IBP_CM_ABI_VERSION	1
++#define MAX_MSG_SIZE		PAGE_SIZE
++
++/* Client to server message enums. */
++enum {
++	/* have callback */
++	IBP_SA_PATH_REC_GET,
++	IBP_SA_JOIN_MCAST,
++
++	/* no callback */
++	IBP_SA_FREE_MCAST,
++	IBP_SA_GET_MCMEMBER_REC,
++	IBP_SA_REGISTER_CLIENT,
++	IBP_SA_UNREGISTER_CLIENT,
++	IBP_SA_CANCEL_QUERY,
++	IBP_INIT_AH_FROM_PATH,
++	IBP_INIT_AH_FROM_MCMEMBER,
++#if 0
++	/* not used or local to client */
++	IBP_SA_SERVICE_REC_QUERY,
++	IBP_SA_UNPACK_PATH,
++#endif
++};
++
++/* Server to client message enums. */
++enum {
++	IBP_CALLBACK,
++	IBP_RESPONSE,
++};
++
++enum {
++	PATH_REC_GET_CB,
++	JOIN_MCAST_CB,
++};
++
++/*
++ * Make sure that all structs defined in this file are laid out to pack
++ * the same way on different architectures to avoid incompatibility.
++ *
++ * Specifically:
++ *  - Do not use pointer types -- pass pointers in a u64 instead.
++ *  - Make sure that any structure larger than 4 bytes is padded
++ *    to a multiple of 8 bytes; otherwise the structure size may
++ *    be different between architectures.
++ */
++
++struct ibp_msg_header {			/* present in all messages */
++	u32			opcode;
++	u32			length;
++	u32			status;
++	u32			reserved;
++	u64			request;
++	u64			data[0];
++};
++
++struct ibp_verb_response_msg {
++	struct ibp_msg_header		header;
++	u64				data[0];
++};
++
++#endif /* IBP_ABI_H */
+diff -urN a6/drivers/infiniband/ibp/sa/ibp_exports.h a7/drivers/infiniband/ibp/sa/ibp_exports.h
+--- a6/drivers/infiniband/ibp/sa/ibp_exports.h	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/sa/ibp_exports.h	2015-02-23 10:01:30.293769309 -0800
+@@ -0,0 +1,49 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer in the documentation and/or other materials
++ *	  provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef IBP_EXPORTS_H
++#define IBP_EXPORTS_H
++
++#include <rdma/ib_verbs.h>
++
++/*
++ ibp_resolve_ib_device - Return the host ib_device handle
++ @ibdev:Card IB device
++
++ Upper level drivers may require the host ib_device handle associated
++ with the card ib_device.  This routine resolves the card ib_device to
++ the cooresponding host ib_device handle.  A value of 0 is returned if
++ no match was found.
++*/
++u64 ibp_resolve_ib_device(struct ib_device *ibdev);
++
++#endif /* IBP_EXPORTS_H */
+diff -urN a6/drivers/infiniband/ibp/sa/Makefile a7/drivers/infiniband/ibp/sa/Makefile
+--- a6/drivers/infiniband/ibp/sa/Makefile	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/sa/Makefile	2015-02-23 10:01:30.293769309 -0800
+@@ -0,0 +1,21 @@
++KDIR ?= /lib/modules/`uname -r`/build
++
++obj-$(CONFIG_IBP_SERVER) += ibp_sa_server.o
++
++ccflags-$(CONFIG_IBP_DEBUG) += -g -DIBP_DEBUG
++
++ibp_sa_server-y :=	server.o		\
++			server_msg.o		\
++			sa_server_msg.o
++
++default:
++	$(MAKE) -C $(KDIR) M=`pwd`
++
++modules_install:
++	$(MAKE) -C $(KDIR) M=`pwd` modules_install
++
++clean:
++	rm -rf *.ko *.o .*.ko.cmd .*.o.cmd *.mod.c Module.* modules.order .tmp_versions
++
++unix:
++	dos2unix *.[ch] Kconfig Makefile
+diff -urN a6/drivers/infiniband/ibp/sa/sa_ibp_abi.h a7/drivers/infiniband/ibp/sa/sa_ibp_abi.h
+--- a6/drivers/infiniband/ibp/sa/sa_ibp_abi.h	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/sa/sa_ibp_abi.h	2015-02-23 10:01:30.293769309 -0800
+@@ -0,0 +1,251 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above
++ *	copyright notice, this list of conditions and the following
++ *	disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *	copyright notice, this list of conditions and the following
++ *	disclaimer in the documentation and/or other materials
++ *	provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef SA_IBP_ABI_H
++#define SA_IBP_ABI_H
++
++#include <linux/types.h>
++#include <rdma/ib_verbs.h>
++#include <rdma/ib_sa.h>
++
++/* Increment this value if any changes break compatibility. */
++#define IBP_SA_ABI_VERSION	1
++
++/*
++ * Make sure that all structs defined in this file are laid out to pack
++ * the same way on different architectures to avoid incompatibility.
++ *
++ * Specifically:
++ *  - Do not use pointer types -- pass pointers in a u64 instead.
++ *  - Make sure that any structure larger than 4 bytes is padded
++ *    to a multiple of 8 bytes; otherwise the structure size may
++ *    be different between architectures.
++ */
++
++struct cb_header {
++	u64				cb_type;
++	u64				status;
++	u64				ibp_client;
++};
++
++struct ibp_sa_path_rec {
++	__be64				service_id;
++	u64				dgid_prefix;
++	u64				dgid_id;
++	u64				sgid_prefix;
++	u64				sgid_id;
++	__be16				dlid;
++	__be16				slid;
++	u32				raw_traffic;
++	__be32				flow_label;
++	u8				hop_limit;
++	u8				traffic_class;
++	u32				reversible;
++	u8				numb_path;
++	__be16				pkey;
++	__be16				qos_class;
++	u8				sl;
++	u8				mtu_selector;
++	u8				mtu;
++	u8				rate_selector;
++	u8				rate;
++	u8				packet_life_time_selector;
++	u8				packet_life_time;
++	u8				preference;
++};
++
++struct path_rec_data {
++	u64				entry;
++	u64				query;
++	struct ibp_sa_path_rec		resp;
++	u8				reserved[1];
++};
++
++struct ibp_sa_mcmember_rec {
++	u64				mgid_prefix;
++	u64				mgid_id;
++	u64				port_gid_prefix;
++	u64				port_gid_id;
++	__be32				qkey;
++	__be16				mlid;
++	u8				mtu_selector;
++	u8				mtu;
++	u8				traffic_class;
++	__be16				pkey;
++	u8				rate_selector;
++	u8				rate;
++	u8				packet_life_time_selector;
++	u8				packet_life_time;
++	u8				sl;
++	__be32				flow_label;
++	u8				hop_limit;
++	u8				scope;
++	u8				join_state;
++	u64				proxy_join;
++	u8				reserved[1];
++};
++
++struct mc_join_data {
++	u64				mcentry;
++	u64				ibp_mcast;
++	struct ibp_sa_mcmember_rec	rec;
++};
++
++struct callback_msg {
++	struct cb_header		header;
++	union {
++		struct path_rec_data	path_rec;
++		struct mc_join_data	mc_join;
++	} u;
++};
++
++struct ibp_callback_msg {
++	struct ibp_msg_header		header;
++	u8				data[0];
++};
++
++struct ibp_sa_path_rec_get_cmd {
++	struct ibp_msg_header		header;
++	u64				ibp_client;
++	u64				entry;
++	u64				query;
++	u64				device;
++	u64				port_num;
++	u64				comp_mask;
++	u64				timeout_ms;
++	u64				gfp_mask;
++	struct ibp_sa_path_rec		rec;
++};
++
++struct ibp_sa_path_rec_get_resp {
++	u64				sa_query;
++	u64				query_id;
++};
++
++struct ibp_sa_register_client_cmd {
++	struct ibp_msg_header		header;
++};
++
++struct ibp_sa_register_client_resp {
++	u64				ibp_client;
++};
++
++struct ibp_sa_unregister_client_cmd {
++	struct ibp_msg_header		header;
++	u64				ibp_client;
++};
++
++struct ibp_sa_cancel_query_cmd {
++	struct ibp_msg_header		header;
++	u64				id;
++	u64				client;
++};
++
++struct ibp_init_ah_from_path_cmd {
++	struct ibp_msg_header		header;
++	u64				device;
++	u8				port_num;
++	struct ibp_sa_path_rec		rec;
++};
++
++struct ibp_ah_attr {
++	u64			dgid_prefix;
++	u64			dgid_id;
++	u32			flow_label;
++	u8			sgid_index;
++	u8			hop_limit;
++	u8			traffic_class;
++	u16			dlid;
++	u8			sl;
++	u8			src_path_bits;
++	u8			static_rate;
++	u8			ah_flags;
++	u8			port_num;
++};
++struct ibp_init_ah_from_path_resp {
++	struct ibp_ah_attr		attr;
++};
++
++struct ibp_init_ah_from_mcmember_cmd {
++	struct ibp_msg_header		header;
++	u64				device;
++	u8				port_num;
++	struct ib_sa_mcmember_rec	rec;
++};
++
++struct ibp_init_ah_from_mcmember_resp {
++	struct ibp_ah_attr		attr;
++};
++
++struct ibp_sa_join_multicast_cmd {
++	struct ibp_msg_header		header;
++	u64				ibp_client;
++	u64				mcentry;
++	u64				device;
++	u8				port_num;
++	u64				comp_mask;
++	u64				gfp_mask;
++	struct ib_sa_mcmember_rec	rec;
++};
++
++struct ibp_sa_join_multicast_resp {
++	u64				ibp_mcast;
++};
++
++struct ibp_sa_free_multicast_cmd {
++	struct ibp_msg_header		header;
++	u64				ibp_mcast;
++};
++
++struct ibp_sa_get_mcmember_rec_cmd {
++	struct ibp_msg_header		header;
++	u64				device;
++	u8				port_num;
++	u64				subnet_prefix;
++	u64				interface_id;
++};
++
++struct ibp_sa_get_mcmember_rec_resp {
++	struct ib_sa_mcmember_rec	rec;
++};
++
++struct ibp_sa_event {
++	enum ib_event_type		event_type;
++	u64				ibp_client;
++	union {
++		__u32			send_status;
++	} u;
++	u64				data_length;
++	u8				data[0];
++};
++
++#endif /* SA_IBP_ABI_H */
+diff -urN a6/drivers/infiniband/ibp/sa/sa_server_msg.c a7/drivers/infiniband/ibp/sa/sa_server_msg.c
+--- a6/drivers/infiniband/ibp/sa/sa_server_msg.c	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/sa/sa_server_msg.c	2015-02-23 10:01:30.294769309 -0800
+@@ -0,0 +1,970 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ * * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *     - Redistributions of source code must retain the above
++ *	 copyright notice, this list of conditions and the following
++ *	 disclaimer.
++ *
++ *     - Redistributions in binary form must reproduce the above
++ *	 copyright notice, this list of conditions and the following
++ *	 disclaimer in the documentation and/or other materials
++ *	 provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "server.h"
++
++LIST_HEAD(sa_entry_list);
++LIST_HEAD(query_list);
++LIST_HEAD(mcast_list);
++
++static void free_query_list(struct sa_query_entry *entry)
++{
++	if (entry) {
++		down_write(&list_rwsem);
++
++		list_del(&entry->list);
++
++		up_write(&list_rwsem);
++	}
++}
++
++static struct sa_query_entry *add_query_list(struct ibp_client *client)
++{
++	struct sa_query_entry			*entry;
++
++	print_trace("in\n");
++
++	entry = kzalloc(sizeof(struct sa_query_entry), GFP_KERNEL);
++	if (!entry) {
++		print_err("kzalloc failed\n");
++		return ERR_PTR(-ENOMEM);
++	}
++
++	entry->ibp_client	= client;
++
++	down_write(&list_rwsem);
++
++	list_add(&entry->list, &query_list);
++
++	up_write(&list_rwsem);
++
++	return entry;
++}
++
++static struct sa_query_entry *find_query_entry(struct ib_sa_client *client)
++{
++	struct sa_query_entry			*query = NULL;
++
++	down_read(&list_rwsem);
++
++	list_for_each_entry(query, &query_list, list)
++		if (query->sa_client == client)
++			goto out;
++
++	print_err("Could not find sa_query_entry\n");
++
++out:
++	up_read(&list_rwsem);
++
++	return query;
++}
++
++static struct sa_entry *find_sa_entry(struct ib_sa_client *ib_client)
++{
++	struct sa_entry				*entry = NULL;
++
++	down_read(&list_rwsem);
++
++	list_for_each_entry(entry, &sa_entry_list, list)
++		if (&entry->ib_client == ib_client)
++			goto out;
++
++	print_err("Could not find sa_entry\n");
++
++out:
++	up_read(&list_rwsem);
++
++	return entry;
++}
++
++/* Translate from server side "true" SA client to proxied SA client on the
++ * client
++ */
++static struct ib_sa_client *find_ibp_client(struct ibp_client *ibp_client)
++{
++	struct sa_entry				*entry;
++	struct ib_sa_client			*client = NULL;
++
++	down_read(&list_rwsem);
++
++	list_for_each_entry(entry, &sa_entry_list, list)
++		if (entry->client == ibp_client) {
++			client = &entry->ib_client;
++			goto out;
++		}
++
++	print_err("Could not find proxied sa_client %p\n", ibp_client);
++
++out:
++	up_read(&list_rwsem);
++
++	return client;
++}
++
++int ibp_cmd_sa_register_client(struct ibp_client *ibp_client,
++			       struct ibp_msg_header *hdr)
++{
++	struct sa_entry				*entry;
++	struct ibp_verb_response_msg		*msg;
++	struct ibp_sa_register_client_resp	*resp;
++	size_t					len;
++	int					status = 0;
++	int					ret;
++
++	print_trace("in\n");
++
++	msg = (struct ibp_verb_response_msg *) ibp_client->tx_buf;
++	len = sizeof(*msg);
++
++	entry = kzalloc((sizeof(struct sa_entry)), GFP_KERNEL);
++	if (!entry) {
++		print_err("kzalloc failed\n");
++		status = -ENOMEM;
++		goto send_resp;
++	}
++
++	entry->client = ibp_client;
++
++	len  += sizeof(*resp);
++
++	resp = (struct ibp_sa_register_client_resp *) msg->data;
++
++	resp->ibp_client = (u64) &entry->ib_client;
++send_resp:
++	IBP_INIT_RESP(msg, len, RESPONSE, hdr->request, status);
++
++	ret = ibp_send(ibp_client->ep, msg, len);
++	if (ret) {
++		kfree(entry);
++		print_err("ibp_send returned %d\n", ret);
++		return ret;
++	}
++	if (status)
++		return status;
++
++	ib_sa_register_client(&entry->ib_client);
++
++	down_write(&list_rwsem);
++	list_add(&entry->list, &sa_entry_list);
++	up_write(&list_rwsem);
++
++	return 0;
++}
++
++int ibp_cmd_sa_unregister_client(struct ibp_client *ibp_client,
++				 struct ibp_msg_header *hdr)
++{
++	struct sa_entry				*entry;
++	struct ibp_sa_unregister_client_cmd	*cmd;
++	struct ibp_verb_response_msg		*msg;
++	struct ib_sa_client			*client;
++	size_t					len;
++	int					ret = 0;
++
++	print_trace("in\n");
++
++	cmd = (struct ibp_sa_unregister_client_cmd *) hdr;
++	client = (struct ib_sa_client *) cmd->ibp_client;
++	msg = (struct ibp_verb_response_msg *) ibp_client->tx_buf;
++	len = sizeof(*msg);
++
++	entry = find_sa_entry(client);
++	if (!entry) {
++		ret = -EINVAL;
++		goto send_resp;
++	}
++
++	down_write(&list_rwsem);
++	list_del(&entry->list);
++	up_write(&list_rwsem);
++
++	ib_sa_unregister_client(&entry->ib_client);
++
++send_resp:
++	IBP_INIT_RESP(msg, len, RESPONSE, hdr->request, ret);
++
++	ret = ibp_send(ibp_client->ep, msg, len);
++	if (ret)
++		print_err("ibp_send returned %d\n", ret);
++
++	return ret;
++}
++
++int ibp_cmd_sa_cancel_query(struct ibp_client *ibp_client,
++			    struct ibp_msg_header *hdr)
++{
++	struct sa_query_entry			*entry;
++	struct ibp_sa_cancel_query_cmd		*cmd;
++	struct ibp_verb_response_msg		*msg;
++	size_t					len;
++	int					ret = 0;
++
++	print_trace("in\n");
++
++	cmd = (struct ibp_sa_cancel_query_cmd *) hdr;
++	msg = (struct ibp_verb_response_msg *) ibp_client->tx_buf;
++	len = sizeof(*msg);
++
++	entry = find_query_entry((struct ib_sa_client *) cmd->client);
++	if (!entry) {
++		ret = -EINVAL;
++		goto send_resp;
++	}
++
++	ib_sa_cancel_query(cmd->id, entry->query);
++
++	free_query_list(entry);
++
++send_resp:
++	IBP_INIT_RESP(msg, len, RESPONSE, hdr->request, ret);
++
++	ret = ibp_send(ibp_client->ep, msg, len);
++	if (ret)
++		print_err("ibp_send returned %d\n", ret);
++
++	return ret;
++}
++
++int ibp_cmd_init_ah_from_path(struct ibp_client *ibp_client,
++			      struct ibp_msg_header *hdr)
++{
++	struct ib_device			*device;
++	struct ibp_verb_response_msg		*msg;
++	struct ibp_init_ah_from_path_cmd	*cmd;
++	struct ibp_init_ah_from_path_resp	*resp;
++	struct ib_sa_path_rec			rec;
++	struct ib_ah_attr			attr;
++	size_t					len;
++	u8					port_num;
++	int					ret;
++
++	print_trace("in\n");
++
++	cmd = (struct ibp_init_ah_from_path_cmd *) hdr;
++	device = (struct ib_device *) cmd->device;
++	msg = (struct ibp_verb_response_msg *) ibp_client->tx_buf;
++	len = sizeof(*msg);
++
++	port_num  = cmd->port_num;
++
++	rec.service_id		= cmd->rec.service_id;
++	rec.dgid.global.interface_id
++				= cmd->rec.dgid_id;
++	rec.dgid.global.subnet_prefix
++				= cmd->rec.dgid_prefix;
++	rec.sgid.global.interface_id
++				= cmd->rec.sgid_id;
++	rec.sgid.global.subnet_prefix
++				= cmd->rec.sgid_prefix;
++	rec.dlid		= cmd->rec.dlid;
++	rec.slid		= cmd->rec.slid;
++	rec.raw_traffic		= cmd->rec.raw_traffic;
++	rec.flow_label		= cmd->rec.flow_label;
++	rec.hop_limit		= cmd->rec.hop_limit;
++	rec.traffic_class	= cmd->rec.traffic_class;
++	rec.reversible		= cmd->rec.reversible;
++	rec.numb_path		= cmd->rec.numb_path;
++	rec.pkey		= cmd->rec.pkey;
++	rec.qos_class		= cmd->rec.qos_class;
++	rec.sl			= cmd->rec.sl;
++	rec.mtu_selector	= cmd->rec.mtu_selector;
++	rec.mtu			= cmd->rec.mtu;
++	rec.rate_selector	= cmd->rec.rate_selector;
++	rec.rate		= cmd->rec.rate;
++	rec.packet_life_time_selector
++				= cmd->rec.packet_life_time_selector;
++	rec.packet_life_time	= cmd->rec.packet_life_time;
++	rec.preference		= cmd->rec.preference;
++
++	ret = ib_init_ah_from_path(device, port_num, &rec, &attr);
++	if (ret)
++		print_err("init_ah_from_path returned %d\n", ret);
++
++	len += sizeof(*resp);
++	resp = (struct ibp_init_ah_from_path_resp *) msg->data;
++
++	resp->attr.dgid_prefix	= attr.grh.dgid.global.subnet_prefix;
++	resp->attr.dgid_id	= attr.grh.dgid.global.interface_id;
++	resp->attr.flow_label	= attr.grh.flow_label;
++	resp->attr.sgid_index	= attr.grh.sgid_index;
++	resp->attr.hop_limit	= attr.grh.hop_limit;
++	resp->attr.traffic_class
++				= attr.grh.traffic_class;
++	resp->attr.dlid		= attr.dlid;
++	resp->attr.sl		= attr.sl;
++	resp->attr.src_path_bits
++				= attr.src_path_bits;
++	resp->attr.static_rate	= attr.static_rate;
++	resp->attr.ah_flags	= attr.ah_flags;
++	resp->attr.port_num	= attr.port_num;
++
++	IBP_INIT_RESP(msg, len, RESPONSE, hdr->request, ret);
++
++	ret = ibp_send(ibp_client->ep, msg, len);
++	if (ret)
++		print_err("ibp_send returned %d\n", ret);
++
++	return ret;
++}
++
++int ibp_cmd_init_ah_from_mcmember(struct ibp_client *ibp_client,
++				  struct ibp_msg_header *hdr)
++{
++	struct ib_device			*device;
++	struct ibp_init_ah_from_mcmember_cmd	*cmd;
++	struct ibp_verb_response_msg		*msg;
++	struct ibp_init_ah_from_mcmember_resp	*resp;
++	struct ib_sa_mcmember_rec		rec;
++	struct ib_ah_attr			attr;
++	size_t					len;
++	int					ret;
++
++	print_trace("in\n");
++
++	cmd = (struct ibp_init_ah_from_mcmember_cmd *) hdr;
++	device = (struct ib_device *) cmd->device;
++	msg = (struct ibp_verb_response_msg *) ibp_client->tx_buf;
++	len = sizeof(*msg);
++
++	rec.mgid.global.subnet_prefix	= cmd->rec.mgid.global.subnet_prefix;
++	rec.mgid.global.interface_id	= cmd->rec.mgid.global.interface_id;
++	rec.port_gid.global.subnet_prefix
++				= cmd->rec.port_gid.global.subnet_prefix;
++	rec.port_gid.global.interface_id
++				= cmd->rec.port_gid.global.interface_id;
++	rec.qkey		= cmd->rec.qkey;
++	rec.mlid		= cmd->rec.mlid;
++	rec.mtu_selector	= cmd->rec.mtu_selector;
++	rec.mtu			= cmd->rec.mtu;
++	rec.traffic_class	= cmd->rec.traffic_class;
++	rec.pkey		= cmd->rec.pkey;
++	rec.rate_selector	= cmd->rec.rate_selector;
++	rec.rate		= cmd->rec.rate;
++	rec.packet_life_time_selector
++				= cmd->rec.packet_life_time_selector;
++	rec.packet_life_time	= cmd->rec.packet_life_time;
++	rec.sl			= cmd->rec.sl;
++	rec.flow_label		= cmd->rec.flow_label;
++	rec.hop_limit		= cmd->rec.hop_limit;
++	rec.scope		= cmd->rec.scope;
++	rec.join_state		= cmd->rec.join_state;
++	rec.proxy_join		= cmd->rec.proxy_join;
++
++	ret = ib_init_ah_from_mcmember(device, cmd->port_num, &rec, &attr);
++	if (ret) {
++		print_err("ib_init_ah_from_mcmember returned %d\n", ret);
++		goto send_resp;
++	}
++
++	len += sizeof(*resp);
++	resp = (struct ibp_init_ah_from_mcmember_resp *) msg->data;
++
++	resp->attr.dgid_prefix	= attr.grh.dgid.global.subnet_prefix;
++	resp->attr.dgid_id	= attr.grh.dgid.global.interface_id;
++	resp->attr.flow_label	= attr.grh.flow_label;
++	resp->attr.sgid_index	= attr.grh.sgid_index;
++	resp->attr.hop_limit	= attr.grh.hop_limit;
++	resp->attr.traffic_class
++				= attr.grh.traffic_class;
++	resp->attr.dlid		= attr.dlid;
++	resp->attr.sl		= attr.sl;
++	resp->attr.src_path_bits
++				= attr.src_path_bits;
++	resp->attr.static_rate	= attr.static_rate;
++	resp->attr.ah_flags	= attr.ah_flags;
++	resp->attr.port_num	= attr.port_num;
++
++
++send_resp:
++	IBP_INIT_RESP(msg, len, RESPONSE, hdr->request, ret);
++
++	ret = ibp_send(ibp_client->ep, msg, len);
++	if (ret)
++		print_err("ibp_send returned %d\n", ret);
++
++	return ret;
++}
++
++static void ibp_send_callback(struct work_struct *work)
++{
++	struct callback_work			*cb_work;
++	struct ibp_callback_msg			*msg;
++	struct cb_header			*header;
++	struct ibp_client			*client;
++	size_t					len;
++	int					data_length;
++	int					cb_type;
++	int					ret;
++
++	print_trace("in\n");
++
++	cb_work = (struct callback_work *) work;
++	len = sizeof(*msg);
++
++	if (!cb_work) {
++		print_err("Invalid callback work_struct\n");
++		return;
++	}
++
++	header = &cb_work->msg.header;
++	cb_type = header->cb_type;
++
++	client = cb_work->client;
++	if (!client) {
++		print_err("Invalid callback client\n");
++		goto err;
++	}
++	if (!client->ep) {
++		print_err("Invalid callback client ep\n");
++		goto err;
++	}
++	if (cb_work->data->ret) {
++		print_err("caller failed to send msg to card\n");
++		goto err;
++	}
++
++	data_length = cb_work->length;
++
++	if (cb_type == PATH_REC_GET_CB) {
++		ret = sizeof(struct path_rec_data) + sizeof(struct cb_header);
++		if (data_length != ret) {
++			print_err("Invalid data length %d, expecting %d\n",
++				  data_length, ret);
++			goto err;
++		}
++	} else if (cb_type == JOIN_MCAST_CB) {
++		ret = sizeof(struct mc_join_data) + sizeof(struct cb_header);
++		if (data_length != ret) {
++			print_err("Invalid data length %d, expecting %d\n",
++				  data_length, ret);
++			goto err;
++		}
++	} else {
++		print_err("Invalid callback type %d\n", cb_type);
++		goto err;
++	}
++
++	len += data_length;
++
++	msg = kzalloc(len, GFP_KERNEL);
++	if (!msg) {
++		print_err("kzmalloc failed\n");
++		goto err;
++	}
++	IBP_INIT_MSG(msg, len, CALLBACK);
++
++	memcpy(msg->data, &cb_work->msg, data_length);
++
++	/* wait for host to send message to card before processing cb */
++	mutex_lock(&cb_work->data->lock);
++
++	ret = ibp_send(client->ep, msg, len);
++	if (ret)
++		print_err("ibp_send returned %d\n", ret);
++
++	mutex_unlock(&cb_work->data->lock);
++
++	kfree(msg);
++err:
++	if (cb_type == PATH_REC_GET_CB)
++		kfree(cb_work->data);
++
++	kfree(cb_work);
++}
++
++static void path_rec_get_callback(int status, struct ib_sa_path_rec *resp,
++				  void *context)
++{
++	struct path_rec_cb_data			*data;
++	struct sa_query_entry			*entry;
++	struct ibp_client			*client;
++	struct ib_sa_client			*ib_client;
++	struct callback_work			*cb_work;
++	struct cb_header			*header;
++	struct path_rec_data			*path_rec;
++
++	print_trace("in\n");
++
++	data = (struct path_rec_cb_data *) context;
++	entry = data->entry;
++	client = entry->ibp_client;
++
++	cb_work = kzalloc(sizeof(struct callback_work), GFP_KERNEL);
++	if (!cb_work) {
++		print_err("kzalloc failed\n");
++		goto err1;
++	}
++
++	ib_client = find_ibp_client(client);
++	if (!ib_client) {
++		print_err("Could not find client for event handler\n");
++		goto err2;
++	}
++
++	if (!entry->query) {
++		print_err("Callback occurred before call returned\n");
++		goto err2;
++	}
++
++	cb_work->data		= (struct generic_cb_data *) data;
++	cb_work->client		= client;
++	cb_work->length		= sizeof(*header) + sizeof(*path_rec);
++
++	header			= &cb_work->msg.header;
++	header->cb_type		= PATH_REC_GET_CB;
++	header->status		= status;
++	header->ibp_client	= (u64) ib_client;
++
++	path_rec		= &cb_work->msg.u.path_rec;
++	path_rec->entry		= data->ibp_entry;
++	path_rec->query		= data->ibp_query;
++
++	if (status) {
++		print_err("callback status %d\n", status);
++		// XXX How is data deallocated in error cases?
++		goto queue_work;
++	}
++
++	path_rec->resp.service_id	= resp->service_id;
++	path_rec->resp.dgid_prefix	= resp->dgid.global.subnet_prefix;
++	path_rec->resp.dgid_id		= resp->dgid.global.interface_id;
++	path_rec->resp.sgid_prefix	= resp->sgid.global.subnet_prefix;
++	path_rec->resp.sgid_id		= resp->sgid.global.interface_id;
++	path_rec->resp.dlid		= resp->dlid;
++	path_rec->resp.slid		= resp->slid;
++	path_rec->resp.raw_traffic	= resp->raw_traffic;
++	path_rec->resp.flow_label	= resp->flow_label;
++	path_rec->resp.hop_limit	= resp->hop_limit;
++	path_rec->resp.traffic_class	= resp->traffic_class;
++	path_rec->resp.reversible	= resp->reversible;
++	path_rec->resp.numb_path	= resp->numb_path;
++	path_rec->resp.pkey		= resp->pkey;
++	path_rec->resp.qos_class	= resp->qos_class;
++	path_rec->resp.sl		= resp->sl;
++	path_rec->resp.mtu_selector	= resp->mtu_selector;
++	path_rec->resp.mtu		= resp->mtu;
++	path_rec->resp.rate_selector	= resp->rate_selector;
++	path_rec->resp.rate		= resp->rate;
++	path_rec->resp.packet_life_time_selector
++					= resp->packet_life_time_selector;
++	path_rec->resp.packet_life_time	= resp->packet_life_time;
++	path_rec->resp.preference	= resp->preference;
++
++queue_work:
++	free_query_list(entry);
++
++	INIT_WORK(&cb_work->work, ibp_send_callback);
++	queue_work(client->workqueue, &cb_work->work);
++	return;
++err2:
++	kfree(cb_work);
++err1:
++	kfree(data);
++	return;
++}
++
++int ibp_cmd_sa_path_rec_get(struct ibp_client *ibp_client,
++			    struct ibp_msg_header *hdr)
++{
++	struct ib_device			*ib_device;
++	struct ibp_verb_response_msg		*msg;
++	struct ibp_sa_path_rec_get_cmd		*cmd;
++	struct ibp_sa_path_rec_get_resp		*resp;
++	struct ib_sa_client			*client;
++	struct ib_sa_query			*sa_query;
++	struct sa_query_entry			*entry;
++	struct path_rec_cb_data			*data = NULL;
++	struct ib_sa_path_rec			rec;
++	size_t					len;
++	int					query_id;
++	int					ret = 0;
++
++	print_trace("in\n");
++
++	cmd = (struct ibp_sa_path_rec_get_cmd *) hdr;
++	ib_device = (struct ib_device *) cmd->device;
++	client = (struct ib_sa_client *) cmd->ibp_client;
++	msg = (struct ibp_verb_response_msg *) ibp_client->tx_buf;
++	len = sizeof(*msg);
++
++	entry = add_query_list(ibp_client);
++	if (IS_ERR(entry)) {
++		ret = PTR_ERR(entry);
++		goto send_resp;
++	}
++
++	data = kzalloc(sizeof(*data), GFP_KERNEL);
++	if (!data) {
++		free_query_list(entry);
++		print_err("kzalloc failed\n");
++		ret = -ENOMEM;
++		goto send_resp;
++	}
++
++	data->entry = entry;
++	data->ibp_entry = cmd->entry;
++	data->ibp_query = cmd->query;
++
++	rec.service_id		= cmd->rec.service_id;
++	rec.dgid.global.interface_id
++				= cmd->rec.dgid_id;
++	rec.dgid.global.subnet_prefix
++				= cmd->rec.dgid_prefix;
++	rec.sgid.global.interface_id
++				= cmd->rec.sgid_id;
++	rec.sgid.global.subnet_prefix
++				= cmd->rec.sgid_prefix;
++	rec.dlid		= cmd->rec.dlid;
++	rec.slid		= cmd->rec.slid;
++	rec.raw_traffic		= cmd->rec.raw_traffic;
++	rec.flow_label		= cmd->rec.flow_label;
++	rec.hop_limit		= cmd->rec.hop_limit;
++	rec.traffic_class	= cmd->rec.traffic_class;
++	rec.reversible		= cmd->rec.reversible;
++	rec.numb_path		= cmd->rec.numb_path;
++	rec.pkey		= cmd->rec.pkey;
++	rec.qos_class		= cmd->rec.qos_class;
++	rec.sl			= cmd->rec.sl;
++	rec.mtu_selector	= cmd->rec.mtu_selector;
++	rec.mtu			= cmd->rec.mtu;
++	rec.rate_selector	= cmd->rec.rate_selector;
++	rec.rate		= cmd->rec.rate;
++	rec.packet_life_time_selector
++				= cmd->rec.packet_life_time_selector;
++	rec.packet_life_time	= cmd->rec.packet_life_time;
++	rec.preference		= cmd->rec.preference;
++
++	mutex_init(&data->lock);
++	mutex_lock(&data->lock);
++
++	query_id = ib_sa_path_rec_get(client, ib_device, cmd->port_num, &rec,
++				      cmd->comp_mask, cmd->timeout_ms,
++				      GFP_KERNEL, path_rec_get_callback, data,
++				      &sa_query);
++	if (query_id < 0) {
++		ret = query_id;
++		print_err("ib_sa_path_rec_get returned %d\n", ret);
++		free_query_list(entry);
++		mutex_unlock(&data->lock);
++		kfree(data);
++		data = NULL;
++		goto send_resp;
++	}
++	entry->query	 = sa_query;
++	entry->sa_client = client;
++	entry->id	 = query_id;
++
++	len += sizeof(*resp);
++	resp = (struct ibp_sa_path_rec_get_resp *) msg->data;
++	resp->query_id = query_id;
++	resp->sa_query = (u64)sa_query;
++
++send_resp:
++	IBP_INIT_RESP(msg, len, RESPONSE, hdr->request, ret);
++
++	ret = ibp_send(ibp_client->ep, msg, len);
++
++	if (data) {
++		data->ret = ret;
++		mutex_unlock(&data->lock);
++	}
++
++	if (ret)
++		print_err("ibp_send returned %d\n", ret);
++
++	return ret;
++}
++
++static int sa_join_callback(int status, struct ib_sa_multicast *multicast)
++{
++	struct join_mcast_cb_data		*data;
++	struct ibp_client			*client;
++	struct ib_sa_client			*ib_client;
++	struct callback_work			*cb_work;
++	struct cb_header			*header;
++	struct mc_join_data			*mc_join;
++	struct ib_sa_mcmember_rec		*ib_rec;
++	struct ibp_sa_mcmember_rec		*ibp_rec;
++	int					ret = 0;
++
++	print_trace("in\n");
++
++	data = (struct join_mcast_cb_data *) multicast->context;
++
++	if (status == -ENETRESET)
++		goto err1;
++
++	cb_work = kzalloc(sizeof(struct callback_work), GFP_KERNEL);
++	if (!cb_work) {
++		print_err("kzalloc failed\n");
++		ret = -ENOMEM;
++		goto err1;
++	}
++
++	client = data->client;
++
++	ib_client = find_ibp_client(client);
++	if (!ib_client) {
++		print_err("Could not find client for event handler\n");
++		ret = -EINVAL;
++		goto err2;
++	}
++
++	cb_work->data		= (struct generic_cb_data *) data;
++	cb_work->client		= client;
++	cb_work->length		= sizeof(*header) + sizeof(*mc_join);
++
++	header			= &cb_work->msg.header;
++	header->cb_type		= JOIN_MCAST_CB;
++	header->status		= status;
++	header->ibp_client	= (u64) ib_client;
++
++	mc_join			= &cb_work->msg.u.mc_join;
++	mc_join->ibp_mcast	= (u64) multicast;
++	mc_join->mcentry	= data->mcentry;
++
++	if (status) {
++		print_err("callback status %d\n", status);
++		goto queue_work;
++	}
++
++	ib_rec = &multicast->rec;
++	ibp_rec = &mc_join->rec;
++
++	ibp_rec->mgid_prefix	  = ib_rec->mgid.global.subnet_prefix;
++	ibp_rec->mgid_id	  = ib_rec->mgid.global.interface_id;
++	ibp_rec->port_gid_prefix  = ib_rec->port_gid.global.subnet_prefix;
++	ibp_rec->port_gid_id	  = ib_rec->port_gid.global.interface_id;
++	ibp_rec->qkey		  = ib_rec->qkey;
++	ibp_rec->mlid		  = ib_rec->mlid;
++	ibp_rec->mtu_selector	  = ib_rec->mtu_selector;
++	ibp_rec->mtu		  = ib_rec->mtu;
++	ibp_rec->traffic_class	  = ib_rec->traffic_class;
++	ibp_rec->pkey		  = ib_rec->pkey;
++	ibp_rec->rate_selector	  = ib_rec->rate_selector;
++	ibp_rec->rate		  = ib_rec->rate;
++	ibp_rec->packet_life_time_selector
++				  = ib_rec->packet_life_time_selector;
++	ibp_rec->packet_life_time = ib_rec->packet_life_time;
++	ibp_rec->sl		  = ib_rec->sl;
++	ibp_rec->flow_label	  = ib_rec->flow_label;
++	ibp_rec->hop_limit	  = ib_rec->hop_limit;
++	ibp_rec->join_state	  = ib_rec->join_state;
++	ibp_rec->proxy_join	  = ib_rec->proxy_join;
++
++queue_work:
++	INIT_WORK(&cb_work->work, ibp_send_callback);
++	queue_work(client->workqueue, &cb_work->work);
++	return 0;
++err2:
++	kfree(cb_work);
++err1:
++	return ret;
++}
++
++int ibp_cmd_sa_join_multicast(struct ibp_client *ibp_client,
++			      struct ibp_msg_header *hdr)
++{
++	struct ib_device			*ib_device;
++	struct ibp_verb_response_msg		*msg;
++	struct ibp_sa_join_multicast_cmd	*cmd;
++	struct ibp_sa_join_multicast_resp	*resp;
++	struct ib_sa_client			*client;
++	struct ib_sa_multicast			*multicast;
++	struct join_mcast_cb_data		*data;
++	size_t					len;
++	int					ret = 0;
++
++	print_trace("in\n");
++
++	cmd = (struct ibp_sa_join_multicast_cmd *) hdr;
++	ib_device = (struct ib_device *) cmd->device;
++	client = (struct ib_sa_client *) cmd->ibp_client;
++	msg = (struct ibp_verb_response_msg *) ibp_client->tx_buf;
++	len = sizeof(*msg);
++
++	data = kzalloc(sizeof(*data), GFP_KERNEL);
++	if (!data) {
++		ret = -ENOMEM;
++		goto send_resp;
++	}
++
++	data->client = ibp_client;
++	data->mcentry = cmd->mcentry;
++
++	mutex_init(&data->lock);
++	mutex_lock(&data->lock);
++
++	down_write(&list_rwsem);
++	list_add(&data->list, &mcast_list);
++	up_write(&list_rwsem);
++
++	multicast = ib_sa_join_multicast(client, ib_device,
++					 cmd->port_num, &cmd->rec,
++					 cmd->comp_mask, GFP_KERNEL,
++					 sa_join_callback, data);
++
++	if (IS_ERR(multicast)) {
++		ret = PTR_ERR(multicast);
++		print_err("ib_sa_join_multicast returned %d\n", ret);
++		mutex_unlock(&data->lock);
++		down_write(&list_rwsem);
++		list_del(&data->list);
++		up_write(&list_rwsem);
++		kfree(data);
++		data = NULL;
++		goto send_resp;
++	}
++	data->mcast = multicast;
++
++	len += sizeof(*resp);
++	resp = (struct ibp_sa_join_multicast_resp *) msg->data;
++
++	resp->ibp_mcast = (u64) multicast;
++
++send_resp:
++	IBP_INIT_RESP(msg, len, RESPONSE, hdr->request, ret);
++
++	ret = ibp_send(ibp_client->ep, msg, len);
++
++	if (data) {
++		data->ret = ret;
++		mutex_unlock(&data->lock);
++	}
++
++	if (ret)
++		print_err("ibp_send returned %d\n", ret);
++
++	return ret;
++}
++
++int ibp_cmd_sa_free_multicast(struct ibp_client *ibp_client,
++			      struct ibp_msg_header *hdr)
++{
++	struct ibp_verb_response_msg		*msg;
++	struct ibp_sa_free_multicast_cmd	*cmd;
++	struct ib_sa_multicast			*multicast;
++	struct join_mcast_cb_data		*data;
++	size_t					len;
++	int					ret = 0;
++
++	print_trace("in\n");
++
++	cmd = (struct ibp_sa_free_multicast_cmd *) hdr;
++	multicast = (struct ib_sa_multicast *) cmd->ibp_mcast;
++	data = (struct join_mcast_cb_data *) multicast->context;
++	msg = (struct ibp_verb_response_msg *) ibp_client->tx_buf;
++	len = sizeof(*msg);
++
++	ib_sa_free_multicast(multicast);
++
++	down_write(&list_rwsem);
++	list_del(&data->list);
++	up_write(&list_rwsem);
++
++	kfree(data);
++
++	IBP_INIT_RESP(msg, len, RESPONSE, hdr->request, ret);
++
++	ret = ibp_send(ibp_client->ep, msg, len);
++	if (ret)
++		print_err("ibp_send returned %d\n", ret);
++
++	return ret;
++}
++
++int ibp_cmd_sa_get_mcmember_rec(struct ibp_client *ibp_client,
++				struct ibp_msg_header *hdr)
++{
++	struct ib_device			*ib_device;
++	struct ibp_verb_response_msg		*msg;
++	struct ibp_sa_get_mcmember_rec_cmd	*cmd;
++	struct ibp_sa_get_mcmember_rec_resp	*resp;
++	struct ib_sa_mcmember_rec		rec;
++	union  ib_gid				mgid;
++	size_t					len;
++	int					ret;
++
++	print_trace("in\n");
++
++	cmd = (struct ibp_sa_get_mcmember_rec_cmd *) hdr;
++	ib_device = (struct ib_device *) cmd->device;
++	msg = (struct ibp_verb_response_msg *) ibp_client->tx_buf;
++	len = sizeof(*msg);
++
++	mgid.global.subnet_prefix = cmd->subnet_prefix;
++	mgid.global.interface_id  = cmd->interface_id;
++
++	ret = ib_sa_get_mcmember_rec(ib_device, cmd->port_num, &mgid, &rec);
++	if (ret) {
++		print_err("ib_sa_get_mcmember_rec returned %d\n", ret);
++		goto send_resp;
++	}
++
++	len += sizeof(*resp);
++	resp = (struct ibp_sa_get_mcmember_rec_resp *) msg->data;
++
++	resp->rec.mgid.global.subnet_prefix
++				= rec.mgid.global.subnet_prefix;
++	resp->rec.mgid.global.interface_id
++				= rec.mgid.global.interface_id;
++	resp->rec.port_gid.global.subnet_prefix
++				= rec.port_gid.global.subnet_prefix;
++	resp->rec.port_gid.global.interface_id
++				= rec.port_gid.global.interface_id;
++	resp->rec.qkey		= rec.qkey;
++	resp->rec.mlid		= rec.mlid;
++	resp->rec.mtu_selector	= rec.mtu_selector;
++	resp->rec.mtu		= rec.mtu;
++	resp->rec.traffic_class	= rec.traffic_class;
++	resp->rec.pkey		= rec.pkey;
++	resp->rec.rate_selector	= rec.rate_selector;
++	resp->rec.rate		= rec.rate;
++	resp->rec.packet_life_time_selector
++				= rec.packet_life_time_selector;
++	resp->rec.packet_life_time
++				= rec.packet_life_time;
++	resp->rec.sl		= rec.sl;
++	resp->rec.flow_label	= rec.flow_label;
++	resp->rec.hop_limit	= rec.hop_limit;
++	resp->rec.scope		= rec.scope;
++	resp->rec.join_state	= rec.join_state;
++	resp->rec.proxy_join	= rec.proxy_join;
++
++send_resp:
++	IBP_INIT_RESP(msg, len, RESPONSE, hdr->request, ret);
++
++	ret = ibp_send(ibp_client->ep, msg, len);
++	if (ret)
++		print_err("ibp_send returned %d\n", ret);
++
++	return ret;
++}
+diff -urN a6/drivers/infiniband/ibp/sa/sa_table.h a7/drivers/infiniband/ibp/sa/sa_table.h
+--- a6/drivers/infiniband/ibp/sa/sa_table.h	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/sa/sa_table.h	2015-02-23 10:01:30.294769309 -0800
+@@ -0,0 +1,131 @@
++/*"
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above
++ *      copyright notice, this list of conditions and the following
++ *      disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *      copyright notice, this list of conditions and the following
++ *      disclaimer in the documentation and/or other materials
++ *      provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#define PATH_REC_FIELD(field) \
++	.struct_offset_bytes = offsetof(struct ib_sa_path_rec, field),     \
++	.struct_size_bytes   = sizeof((struct ib_sa_path_rec *) 0)->field, \
++	.field_name	     = "sa_path_rec:" #field
++
++static const struct ib_field path_rec_table[] = {
++	{ PATH_REC_FIELD(service_id),
++	  .offset_words = 0,
++	  .offset_bits  = 0,
++	  .size_bits    = 64 },
++	{ PATH_REC_FIELD(dgid),
++	  .offset_words = 2,
++	  .offset_bits  = 0,
++	  .size_bits    = 128 },
++	{ PATH_REC_FIELD(sgid),
++	  .offset_words = 6,
++	  .offset_bits  = 0,
++	  .size_bits    = 128 },
++	{ PATH_REC_FIELD(dlid),
++	  .offset_words = 10,
++	  .offset_bits  = 0,
++	  .size_bits    = 16 },
++	{ PATH_REC_FIELD(slid),
++	  .offset_words = 10,
++	  .offset_bits  = 16,
++	  .size_bits    = 16 },
++	{ PATH_REC_FIELD(raw_traffic),
++	  .offset_words = 11,
++	  .offset_bits  = 0,
++	  .size_bits    = 1 },
++	{ RESERVED,
++	  .offset_words = 11,
++	  .offset_bits  = 1,
++	  .size_bits    = 3 },
++	{ PATH_REC_FIELD(flow_label),
++	  .offset_words = 11,
++	  .offset_bits  = 4,
++	  .size_bits    = 20 },
++	{ PATH_REC_FIELD(hop_limit),
++	  .offset_words = 11,
++	  .offset_bits  = 24,
++	  .size_bits    = 8 },
++	{ PATH_REC_FIELD(traffic_class),
++	  .offset_words = 12,
++	  .offset_bits  = 0,
++	  .size_bits    = 8 },
++	{ PATH_REC_FIELD(reversible),
++	  .offset_words = 12,
++	  .offset_bits  = 8,
++	  .size_bits    = 1 },
++	{ PATH_REC_FIELD(numb_path),
++	  .offset_words = 12,
++	  .offset_bits  = 9,
++	  .size_bits    = 7 },
++	{ PATH_REC_FIELD(pkey),
++	  .offset_words = 12,
++	  .offset_bits  = 16,
++	  .size_bits    = 16 },
++	{ PATH_REC_FIELD(qos_class),
++	  .offset_words = 13,
++	  .offset_bits  = 0,
++	  .size_bits    = 12 },
++	{ PATH_REC_FIELD(sl),
++	  .offset_words = 13,
++	  .offset_bits  = 12,
++	  .size_bits    = 4 },
++	{ PATH_REC_FIELD(mtu_selector),
++	  .offset_words = 13,
++	  .offset_bits  = 16,
++	  .size_bits    = 2 },
++	{ PATH_REC_FIELD(mtu),
++	  .offset_words = 13,
++	  .offset_bits  = 18,
++	  .size_bits    = 6 },
++	{ PATH_REC_FIELD(rate_selector),
++	  .offset_words = 13,
++	  .offset_bits  = 24,
++	  .size_bits    = 2 },
++	{ PATH_REC_FIELD(rate),
++	  .offset_words = 13,
++	  .offset_bits  = 26,
++	  .size_bits    = 6 },
++	{ PATH_REC_FIELD(packet_life_time_selector),
++	  .offset_words = 14,
++	  .offset_bits  = 0,
++	  .size_bits    = 2 },
++	{ PATH_REC_FIELD(packet_life_time),
++	  .offset_words = 14,
++	  .offset_bits  = 2,
++	  .size_bits    = 6 },
++	{ PATH_REC_FIELD(preference),
++	  .offset_words = 14,
++	  .offset_bits  = 8,
++	  .size_bits    = 8 },
++	{ RESERVED,
++	  .offset_words = 14,
++	  .offset_bits  = 16,
++	  .size_bits    = 48 },
++};
+diff -urN a6/drivers/infiniband/ibp/sa/server.c a7/drivers/infiniband/ibp/sa/server.c
+--- a6/drivers/infiniband/ibp/sa/server.c	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/sa/server.c	2015-02-23 10:01:30.294769309 -0800
+@@ -0,0 +1,221 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer in the documentation and/or other materials
++ *	  provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "server.h"
++
++MODULE_AUTHOR("Jerrie Coffman");
++MODULE_AUTHOR("Phil Cayton");
++MODULE_AUTHOR("Jay Sternberg");
++MODULE_LICENSE("Dual BSD/GPL");
++MODULE_DESCRIPTION(DRV_DESC);
++MODULE_VERSION(DRV_VERSION);
++
++MODULE_PARAM(port, port, int, IBP_SA_PORT, "Connection port");
++MODULE_PARAM(backlog, backlog, int, 8, "Connection backlog");
++MODULE_PARAM(timeout, timeout, int, 1000, "Listen/Poll time in milliseconds");
++
++#ifdef IBP_DEBUG
++MODULE_PARAM(debug_level, debug_level, int, 0, "Debug: 0-none, 1-some, 2-all");
++#endif
++
++struct rw_semaphore				list_rwsem;
++
++LIST_HEAD(client_list);
++
++static struct task_struct			*listen_thread;
++
++static struct ibp_client *ibp_create_client(scif_epd_t ep, uint16_t node)
++{
++	struct ibp_client		*client;
++	int				ret = -ENOMEM;
++
++	client = kzalloc(sizeof(*client), GFP_KERNEL);
++	if (!client) {
++		print_err("kzalloc failed\n");
++		return ERR_PTR(ret);
++	}
++
++	client->ep = ep;
++
++	client->rx_buf = (void *)__get_free_page(GFP_KERNEL);
++	if (!client->rx_buf) {
++		print_err("__get_free_page rx_buf failed\n");
++		goto err0;
++	}
++
++	client->tx_buf = (void *)__get_free_page(GFP_KERNEL);
++	if (!client->tx_buf) {
++		print_err("__get_free_page tx_buf failed\n");
++		goto err1;
++	}
++
++	client->workqueue = create_singlethread_workqueue(DRV_NAME);
++	if (!client->workqueue) {
++		print_err("create_singlethread_workqueue failed\n");
++		goto err2;
++	}
++
++	down_write(&list_rwsem);
++	list_add(&client->list, &client_list);
++	up_write(&list_rwsem);
++
++	client->ibp_sa_client_thread = kthread_run(ibp_process_recvs,
++						   client, DRV_NAME);
++	if (!client->ibp_sa_client_thread) {
++		print_err("create client thread failed\n");
++		goto err3;
++	}
++
++	return client;
++err3:
++	down_write(&list_rwsem);
++	list_del(&client->list);
++	up_write(&list_rwsem);
++
++	destroy_workqueue(client->workqueue);
++err2:
++	free_page((uintptr_t)client->tx_buf);
++err1:
++	free_page((uintptr_t)client->rx_buf);
++err0:
++	kfree(client);
++	return ERR_PTR(ret);
++}
++
++static int ibp_sa_listen(void *data)
++{
++	struct ibp_client		*client;
++	struct scif_pollepd		listen;
++	struct scif_portID		peer;
++	scif_epd_t			ep;
++	int				ret;
++
++	listen.epd = scif_open();
++	if (IS_NULL_OR_ERR(listen.epd)) {
++		print_err("scif_open failed\n");
++		ret = -EIO;
++		goto err0;
++	}
++	listen.events = POLLIN;
++
++	ret = scif_bind(listen.epd, port);
++	if (ret < 0) {
++		print_err("scif_bind returned %d\n", ret);
++		goto err1;
++	}
++
++	ret = scif_listen(listen.epd, backlog);
++	if (ret) {
++		print_err("scif_listen returned %d\n", ret);
++		goto err1;
++	}
++
++	while (!kthread_should_stop()) {
++
++		schedule();
++
++		ret = scif_poll(&listen, 1, timeout);
++		if (ret == 0)	/* timeout */
++			continue;
++		if (ret < 0) {
++			print_err("scif_poll revents 0x%x\n", listen.revents);
++			continue;
++		}
++
++		ret = scif_accept(listen.epd, &peer, &ep, 0);
++		if (ret) {
++			print_err("scif_accept returned %d\n", ret);
++			continue;
++		}
++
++		print_dbg("accepted node %d port %d\n", peer.node, peer.port);
++
++		client = ibp_create_client(ep, peer.node);
++		if (IS_ERR(client)) {
++			ret = PTR_ERR(client);
++			print_err("ibp_create_client returned %d\n", ret);
++			scif_close(ep);
++		}
++	}
++err1:
++	scif_close(listen.epd);
++err0:
++	return ret;
++}
++
++static int __init ibp_sa_server_init(void)
++{
++	int				ret = 0;
++
++	print_info(DRV_SIGNON);
++
++	init_rwsem(&list_rwsem);
++
++	/* Start a thread for inbound connections. */
++	listen_thread = kthread_run(ibp_sa_listen, NULL, DRV_NAME);
++	if (IS_NULL_OR_ERR(listen_thread)) {
++		ret = PTR_ERR(listen_thread);
++		print_err("kthread_run returned %d\n", ret);
++	}
++
++	return ret;
++}
++
++static void __exit ibp_sa_server_exit(void)
++{
++	struct ibp_client		*client, *next;
++	struct completion		done;
++
++	kthread_stop(listen_thread);
++
++	down_write(&list_rwsem);
++	list_for_each_entry_safe(client, next, &client_list, list) {
++		init_completion(&done);
++		client->done = &done;
++
++		/* Close scif ep to unblock the client thread scif_recv */
++		scif_close(client->ep);
++
++		up_write(&list_rwsem);
++
++		/* Wait for client thread to finish */
++		wait_for_completion(&done);
++
++		down_write(&list_rwsem);
++	}
++	up_write(&list_rwsem);
++
++	print_info(DRV_DESC " unloaded\n");
++}
++
++module_init(ibp_sa_server_init);
++module_exit(ibp_sa_server_exit);
+diff -urN a6/drivers/infiniband/ibp/sa/server.h a7/drivers/infiniband/ibp/sa/server.h
+--- a6/drivers/infiniband/ibp/sa/server.h	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/sa/server.h	2015-02-23 10:01:30.294769309 -0800
+@@ -0,0 +1,172 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *	- Redistributions of source code must retain the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer.
++ *
++ *	- Redistributions in binary form must reproduce the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer in the documentation and/or other materials
++ *	  provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef SERVER_H
++#define SERVER_H
++
++#include <linux/fs.h>
++#include <linux/cdev.h>
++#include <linux/anon_inodes.h>
++#include <rdma/ib_umem.h>
++#include <rdma/ib_cache.h>
++#include "ibp-abi.h"
++#include "sa_ibp_abi.h"
++#include "common.h"
++
++#define DRV_ROLE	"Server"
++#define DRV_NAME	"ibp_sa_server"
++
++extern int timeout;
++extern struct rw_semaphore	list_rwsem;
++extern struct list_head		client_list;
++extern struct list_head		sa_entry_list;
++extern struct list_head		query_list;
++extern struct list_head		mcast_list;
++
++struct ib_sa_sm_ah {
++	struct ib_ah		*ah;
++	struct kref		ref;
++	u16			pkey_index;
++	u8			src_path_mask;
++};
++
++struct ib_sa_port {
++	struct ib_mad_agent	*agent;
++	struct ib_mad_agent	*notice_agent;
++	struct ib_sa_sm_ah	*sm_ah;
++	struct work_struct	update_task;
++	spinlock_t		ah_lock;
++	u8			port_num;
++	struct ib_device	*device;
++};
++
++struct ib_sa_device {
++	int			start_port, end_port;
++	struct ib_event_handler	event_handler;
++	struct ib_sa_port	port[0];
++};
++
++struct ibp_client {
++	struct list_head	list;
++	scif_epd_t		ep;
++	void			*rx_buf;
++	void			*tx_buf;
++	struct completion	*done;
++	struct workqueue_struct	*workqueue;
++	struct task_struct	*ibp_sa_client_thread;
++};
++
++struct sa_entry {
++	struct list_head	list;
++	struct ib_sa_client	ib_client;
++	struct ibp_client	*client;
++};
++
++struct sa_query_entry {
++	struct list_head	list;
++	int			id;
++	struct ibp_client	*ibp_client;
++	struct ib_sa_client     *sa_client;
++	struct ib_sa_query	*query;
++};
++
++struct path_rec_cb_data {
++	struct mutex		lock;
++	int			ret;
++	struct sa_query_entry	*entry;
++	u64			ibp_entry;
++	u64			ibp_query;
++};
++
++struct join_mcast_cb_data {
++	struct mutex		lock;
++	int			ret;
++	struct ibp_client       *client;
++	struct ib_sa_multicast	*mcast;
++	struct list_head	list;
++	u64			entry;
++	u64			mcentry;
++};
++
++struct generic_cb_data {
++	struct mutex		lock;
++	int			ret;
++};
++
++struct callback_work {
++	struct work_struct      work;
++	struct ibp_client       *client;
++	struct generic_cb_data	*data;
++	int			length;
++	struct callback_msg	msg;
++};
++
++#define IBP_INIT_MSG(msg, size, op)				\
++	do {							\
++		(msg)->header.opcode	= IBP_##op;		\
++		(msg)->header.length	= (size);		\
++		(msg)->header.status	= 0;			\
++		(msg)->header.reserved	= 0;			\
++		(msg)->header.request	= 0;			\
++	} while (0)
++
++#define IBP_INIT_RESP(resp, size, op, req, stat)		\
++	do {							\
++		(resp)->header.opcode	= IBP_##op;		\
++		(resp)->header.length	= (size);		\
++		(resp)->header.status	= (stat);		\
++		(resp)->header.reserved	= 0;			\
++		(resp)->header.request	= (req);		\
++	} while (0)
++
++int ibp_process_recvs(void *p);
++
++int ibp_cmd_sa_path_rec_get(struct ibp_client *client,
++			    struct ibp_msg_header *hdr);
++int ibp_cmd_sa_register_client(struct ibp_client *client,
++			       struct ibp_msg_header *hdr);
++int ibp_cmd_sa_unregister_client(struct ibp_client *client,
++				 struct ibp_msg_header *hdr);
++int ibp_cmd_sa_cancel_query(struct ibp_client *client,
++			    struct ibp_msg_header *hdr);
++int ibp_cmd_init_ah_from_path(struct ibp_client *client,
++			      struct ibp_msg_header *hdr);
++int ibp_cmd_init_ah_from_mcmember(struct ibp_client *client,
++				  struct ibp_msg_header *hdr);
++int ibp_cmd_sa_join_multicast(struct ibp_client *client,
++			      struct ibp_msg_header *hdr);
++int ibp_cmd_sa_free_multicast(struct ibp_client *client,
++			      struct ibp_msg_header *hdr);
++int ibp_cmd_sa_get_mcmember_rec(struct ibp_client *client,
++				struct ibp_msg_header *hdr);
++
++#endif /* SERVER_H */
+diff -urN a6/drivers/infiniband/ibp/sa/server_msg.c a7/drivers/infiniband/ibp/sa/server_msg.c
+--- a6/drivers/infiniband/ibp/sa/server_msg.c	1969-12-31 16:00:00.000000000 -0800
++++ a7/drivers/infiniband/ibp/sa/server_msg.c	2015-02-23 10:01:30.294769309 -0800
+@@ -0,0 +1,185 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *	- Redistributions of source code must retain the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer.
++ *
++ *	- Redistributions in binary form must reproduce the above
++ *	  copyright notice, this list of conditions and the following
++ *	  disclaimer in the documentation and/or other materials
++ *	  provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include <linux/delay.h>
++
++#include "server.h"
++#include "sa_ibp_abi.h"
++
++int ibp_send(scif_epd_t ep, void *buf, size_t len)
++{
++	int				ret;
++
++	while (len) {
++		ret = scif_send(ep, buf, (uint32_t)len, SCIF_SEND_BLOCK);
++		if (ret < 0) {
++			print_dbg("scif_send returned %d\n", ret);
++			return ret;
++		}
++		buf += ret;
++		len -= ret;
++	}
++
++	return 0;
++}
++
++int ibp_recv(scif_epd_t ep, void *buf, size_t len)
++{
++	int				ret;
++
++	while (len) {
++		ret = scif_recv(ep, buf, (uint32_t)len, SCIF_RECV_BLOCK);
++		if (ret < 0) {
++			print_dbg("scif_recv returned %d\n", ret);
++			return ret;
++		}
++		buf += ret;
++		len -= ret;
++	}
++
++	return 0;
++}
++
++static int
++ibp_cmd_bad_request(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++	struct ibp_verb_response_msg	*msg;
++	size_t				len;
++	int				status = -EBADRQC;
++
++	msg = (struct ibp_verb_response_msg *) client->tx_buf;
++	len = sizeof(*msg);
++
++	print_dbg("opcode 0x%x\n", hdr->opcode);
++
++	IBP_INIT_RESP(msg, len, RESPONSE, hdr->request, status);
++	return ibp_send(client->ep, msg, len);
++}
++
++static void
++ibp_sa_destroy_client(struct ibp_client *client)
++{
++	struct join_mcast_cb_data	*mcast, *next_mcast;
++	struct sa_query_entry		*query, *next_query;
++	struct sa_entry			*sa, *next_sa;
++
++	down_write(&list_rwsem);
++	list_del(&client->list);
++	list_for_each_entry_safe(mcast, next_mcast, &mcast_list, list)
++		if (mcast->client == client) {
++			ib_sa_free_multicast(mcast->mcast);
++			list_del(&mcast->list);
++			kfree(mcast);
++		}
++	list_for_each_entry_safe(query, next_query, &query_list, list)
++		if (query->ibp_client == client) {
++			ib_sa_cancel_query(query->id, query->query);
++			list_del(&query->list);
++			kfree(query);
++		}
++	list_for_each_entry_safe(sa, next_sa, &sa_entry_list, list)
++		if (sa->client == client) {
++			ib_sa_unregister_client(&sa->ib_client);
++			list_del(&sa->list);
++			kfree(sa);
++		}
++	up_write(&list_rwsem);
++
++	destroy_workqueue(client->workqueue);
++
++	free_page((uintptr_t)client->tx_buf);
++	free_page((uintptr_t)client->rx_buf);
++
++	if (client->done)
++		complete(client->done);
++	else
++		scif_close(client->ep);
++
++	kfree(client);
++}
++
++static int
++(*ibp_msg_table[])(struct ibp_client *c, struct ibp_msg_header *h) = {
++	[IBP_SA_PATH_REC_GET]		= ibp_cmd_sa_path_rec_get,
++	[IBP_SA_REGISTER_CLIENT]	= ibp_cmd_sa_register_client,
++	[IBP_SA_UNREGISTER_CLIENT]	= ibp_cmd_sa_unregister_client,
++	[IBP_SA_CANCEL_QUERY]		= ibp_cmd_sa_cancel_query,
++	[IBP_INIT_AH_FROM_PATH]		= ibp_cmd_init_ah_from_path,
++	[IBP_INIT_AH_FROM_MCMEMBER]	= ibp_cmd_init_ah_from_mcmember,
++	[IBP_SA_JOIN_MCAST]		= ibp_cmd_sa_join_multicast,
++	[IBP_SA_FREE_MCAST]		= ibp_cmd_sa_free_multicast,
++	[IBP_SA_GET_MCMEMBER_REC]	= ibp_cmd_sa_get_mcmember_rec,
++};
++
++int ibp_process_recvs(void *p)
++{
++	struct ibp_client		*client;
++	struct ibp_msg_header		*hdr;
++	int				ret;
++
++	client = (struct ibp_client *) p;
++	hdr = (struct ibp_msg_header *) client->rx_buf;
++
++	for (;;) {
++		ret = ibp_recv(client->ep, hdr, sizeof(*hdr));
++		if (ret)
++			break;
++
++		if (hdr->length > MAX_MSG_SIZE) {
++			print_err("message too large, len %u max %lu\n",
++				  hdr->length, MAX_MSG_SIZE);
++			ret = -EMSGSIZE;
++			break;
++		}
++
++		if (hdr->length > sizeof(*hdr)) {
++			ret = ibp_recv(client->ep, hdr->data,
++				       hdr->length - sizeof(*hdr));
++			if (ret)
++				break;
++		}
++
++		if ((hdr->opcode >= ARRAY_SIZE(ibp_msg_table)) ||
++		    !ibp_msg_table[hdr->opcode]) {
++			ibp_cmd_bad_request(client, hdr);
++			continue;
++		}
++
++		ret = ibp_msg_table[hdr->opcode](client, hdr);
++		if (ret)
++			break;
++	}
++
++	ibp_sa_destroy_client(client);
++
++	return ret;
++}
diff --git a/tech-preview/xeon-phi/0008-Add-ibscif-to-the-Infiniband-HW-directory.patch b/tech-preview/xeon-phi/0008-Add-ibscif-to-the-Infiniband-HW-directory.patch
new file mode 100644
index 0000000..b34ff02
--- /dev/null
+++ b/tech-preview/xeon-phi/0008-Add-ibscif-to-the-Infiniband-HW-directory.patch
@@ -0,0 +1,9132 @@
+From 674c5e41008346a8d68f534d408e240b152dec5e Mon Sep 17 00:00:00 2001
+From: Phil Cayton <phil.cayton@intel.com>
+Date: Wed, 28 May 2014 15:53:58 -0700
+Subject: [PATCH 08/13] Add ibscif to the Infiniband HW directory
+
+Signed-off-by: Phil Cayton <phil.cayton@intel.com>
+---
+diff -urN a7/drivers/infiniband/hw/scif/ibscif_ah.c a8/drivers/infiniband/hw/scif/ibscif_ah.c
+--- a7/drivers/infiniband/hw/scif/ibscif_ah.c	1969-12-31 16:00:00.000000000 -0800
++++ a8/drivers/infiniband/hw/scif/ibscif_ah.c	2015-02-23 10:14:37.482809663 -0800
+@@ -0,0 +1,50 @@
++/*
++ * Copyright (c) 2008 Intel Corporation.  All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above copyright
++ *        notice, this list of conditions and the following disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *        copyright notice, this list of conditions and the following
++ *        disclaimer in the documentation and/or other materials
++ *        provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include "ibscif_driver.h"
++
++struct ib_ah *ibscif_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
++{
++	struct ibscif_ah *ah;
++
++	ah = kzalloc(sizeof *ah, GFP_KERNEL);
++	if (!ah) 
++		return ERR_PTR(-ENOMEM);
++
++	ah->dlid = cpu_to_be16(attr->dlid);
++
++	return &ah->ibah;
++}
++
++int ibscif_destroy_ah(struct ib_ah *ibah)
++{
++	kfree(to_ah(ibah));
++	return 0;
++}
+diff -urN a7/drivers/infiniband/hw/scif/ibscif_cm.c a8/drivers/infiniband/hw/scif/ibscif_cm.c
+--- a7/drivers/infiniband/hw/scif/ibscif_cm.c	1969-12-31 16:00:00.000000000 -0800
++++ a8/drivers/infiniband/hw/scif/ibscif_cm.c	2015-02-23 10:14:37.482809663 -0800
+@@ -0,0 +1,515 @@
++/*
++ * Copyright (c) 2008 Intel Corporation.  All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above copyright
++ *        notice, this list of conditions and the following disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *        copyright notice, this list of conditions and the following
++ *        disclaimer in the documentation and/or other materials
++ *        provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include "ibscif_driver.h"
++
++static LIST_HEAD(listen_list);
++DEFINE_SPINLOCK(listen_list_lock);
++
++static int sockaddr_in_to_node_id( struct sockaddr_in addr )
++{
++	u8 *p = (u8 *)&addr.sin_addr.s_addr;
++
++	if (p[0]==192 && p[1]==0 && p[2]==2 && p[3]>=100 && p[3]<100+IBSCIF_MAX_DEVICES)
++		return (int)(p[3]-100);
++
++	else
++		return -EINVAL;
++}
++
++static struct sockaddr_in node_id_to_sockaddr_in( int node_id )
++{
++	struct sockaddr_in addr;
++	u8 *p = (u8 *)&addr.sin_addr.s_addr;
++
++	addr.sin_family = AF_INET;
++	addr.sin_addr.s_addr = 0;
++	addr.sin_port = 0;
++
++	p[0] = 192;
++	p[1] = 0;
++	p[2] = 2;
++	p[3] = 100 + node_id;
++
++	return addr;
++}
++
++void free_cm(struct kref *kref)
++{
++	struct ibscif_cm *cm_ctx;
++	cm_ctx = container_of(kref, struct ibscif_cm, kref);
++	if (cm_ctx->conn)
++		ibscif_put_conn(cm_ctx->conn);
++	kfree(cm_ctx);
++}
++
++static inline void get_cm(struct ibscif_cm *cm_ctx)
++{        
++        kref_get(&cm_ctx->kref);
++}
++ 
++static inline void put_cm(struct ibscif_cm *cm_ctx)
++{        
++        kref_put(&cm_ctx->kref, free_cm);
++}
++
++void free_listen(struct kref *kref)
++{
++	struct ibscif_listen *listen;
++	listen = container_of(kref, struct ibscif_listen, kref);
++	kfree(listen);
++}
++
++static inline void get_listen(struct ibscif_listen *listen)
++{        
++        kref_get(&listen->kref);
++}
++ 
++static inline void put_listen(struct ibscif_listen *listen)
++{        
++        kref_put(&listen->kref, free_listen);
++}
++
++static int connect_qp(struct ibscif_cm *cm_ctx)
++{
++        struct ibscif_qp *qp;
++        struct ib_qp_attr qp_attr;
++        int qp_attr_mask;
++        int err;
++
++        qp = ibscif_get_qp(cm_ctx->qpn);
++        if (IS_ERR(qp)) {
++                printk(KERN_ERR PFX "%s: invalid QP number: %d\n", __func__, cm_ctx->qpn);
++                return -EINVAL; 
++        }
++         
++        qp_attr_mask =  IB_QP_STATE |
++                        IB_QP_AV |  
++                        IB_QP_DEST_QPN |
++                        IB_QP_ACCESS_FLAGS |
++                        IB_QP_MAX_QP_RD_ATOMIC |
++                        IB_QP_MAX_DEST_RD_ATOMIC;
++ 
++        qp_attr.ah_attr.ah_flags = 0;
++        qp_attr.ah_attr.dlid = IBSCIF_NODE_ID_TO_LID(cm_ctx->remote_node_id);
++        qp_attr.dest_qp_num = cm_ctx->remote_qpn;
++        qp_attr.qp_state = IB_QPS_RTS;
++        qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE |
++                                  IB_ACCESS_REMOTE_WRITE |
++                                  IB_ACCESS_REMOTE_READ |
++                                  IB_ACCESS_REMOTE_ATOMIC;
++        qp_attr.max_rd_atomic = 16;     /* 8-bit value, don't use MAX_OR */
++        qp_attr.max_dest_rd_atomic = 16;/* 8-bit value, don't use MAX_IR */
++ 
++        err = ib_modify_qp(&qp->ibqp, &qp_attr, qp_attr_mask);
++ 
++        if (!err) {
++                qp->cm_context = cm_ctx;
++		get_cm(cm_ctx);
++	}
++ 
++        ibscif_put_qp(qp);
++ 
++        return err;
++}
++
++static void event_connection_close(struct ibscif_cm *cm_ctx)
++{
++        struct iw_cm_event event;
++ 
++        memset(&event, 0, sizeof(event));
++        event.event = IW_CM_EVENT_CLOSE;
++        event.status = -ECONNRESET;
++        if (cm_ctx->cm_id) {
++                cm_ctx->cm_id->event_handler(cm_ctx->cm_id, &event);
++                cm_ctx->cm_id->rem_ref(cm_ctx->cm_id);
++                cm_ctx->cm_id = NULL;
++        }
++}
++
++static void event_connection_reply(struct ibscif_cm *cm_ctx, int status)
++{
++        struct iw_cm_event event;
++ 
++        memset(&event, 0, sizeof(event));
++        event.event             = IW_CM_EVENT_CONNECT_REPLY;
++        event.status            = status;
++        event.local_addr        = *(struct sockaddr_storage *) &cm_ctx->local_addr;
++        event.remote_addr       = *(struct sockaddr_storage *) &cm_ctx->remote_addr;
++
++        if ((status == 0) || (status == -ECONNREFUSED)) {
++                event.private_data_len = cm_ctx->plen;
++                event.private_data = cm_ctx->pdata;
++        }
++        if (cm_ctx->cm_id) {
++                cm_ctx->cm_id->event_handler(cm_ctx->cm_id, &event);
++		if (status == -ECONNREFUSED) {
++			cm_ctx->cm_id->rem_ref(cm_ctx->cm_id);
++			cm_ctx->cm_id = NULL;
++		}
++	}
++}
++
++static void event_connection_request(struct ibscif_cm *cm_ctx)
++{
++        struct iw_cm_event event;
++ 
++        memset(&event, 0, sizeof(event));
++        event.event             = IW_CM_EVENT_CONNECT_REQUEST;
++        event.local_addr        = *(struct sockaddr_storage *) &cm_ctx->local_addr;
++        event.remote_addr       = *(struct sockaddr_storage *) &cm_ctx->remote_addr;
++        event.private_data_len  = cm_ctx->plen;
++        event.private_data      = cm_ctx->pdata;
++        event.provider_data     = cm_ctx;
++	event.ird = 16;
++	event.ord = 16;
++
++        if (cm_ctx->listen) {
++                cm_ctx->listen->cm_id->event_handler( cm_ctx->listen->cm_id, &event);
++		put_listen(cm_ctx->listen);
++		cm_ctx->listen = NULL;
++        }
++}
++
++static void event_connection_established( struct ibscif_cm *cm_ctx )
++{
++        struct iw_cm_event event;
++ 
++        memset(&event, 0, sizeof(event));
++        event.event = IW_CM_EVENT_ESTABLISHED;
++	event.ird = 16;
++	event.ord = 16;
++        if (cm_ctx->cm_id) {
++                cm_ctx->cm_id->event_handler(cm_ctx->cm_id, &event);
++        }
++}
++
++void ibscif_cm_async_callback(void *cm_context)
++{
++        struct ibscif_cm *cm_ctx = cm_context;
++ 
++        if (cm_ctx) {
++                event_connection_close(cm_ctx);
++                put_cm(cm_ctx);
++        }
++}
++
++int ibscif_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
++{
++	struct ibscif_cm *cm_ctx;
++	struct sockaddr_in *local_addr = (struct sockaddr_in *) &cm_id->local_addr;
++	struct sockaddr_in *remote_addr = (struct sockaddr_in *) &cm_id->remote_addr;
++	int node_id;
++	int remote_node_id;
++	int err = 0;
++
++	cm_ctx = kzalloc(sizeof *cm_ctx, GFP_KERNEL);
++	if (!cm_ctx) {
++		printk(KERN_ALERT PFX "%s: cannot allocate cm_ctx\n", __func__);
++		return -ENOMEM;
++	}
++
++	kref_init(&cm_ctx->kref); /* refcnt <- 1 */
++	spin_lock_init(&cm_ctx->lock);
++
++	node_id = sockaddr_in_to_node_id(*local_addr);
++	remote_node_id = sockaddr_in_to_node_id(*remote_addr);
++	if (node_id<0 || remote_node_id<0) {
++		printk(KERN_ALERT PFX "%s: invalid address, local_addr=%8x, remote_addr=%8x, node_id=%d, remote_node_id=%d\n",
++				__func__, local_addr->sin_addr.s_addr, remote_addr->sin_addr.s_addr,
++				node_id, remote_node_id);
++		err = -EINVAL;
++		goto out_free;
++	}
++
++	cm_ctx->conn = ibscif_get_conn( node_id, remote_node_id, 0 );
++	if (!cm_ctx->conn) {
++		printk(KERN_ALERT PFX "%s: failed to get connection %d-->%d\n", __func__, node_id, remote_node_id);
++		err = -EINVAL;
++		goto out_free;
++	}
++
++	cm_id->add_ref(cm_id);
++	cm_id->provider_data = cm_ctx;
++
++	cm_ctx->cm_id = cm_id;
++	cm_ctx->node_id = node_id;
++	cm_ctx->remote_node_id = remote_node_id;
++	cm_ctx->local_addr = *local_addr;
++	cm_ctx->remote_addr = *remote_addr;
++	cm_ctx->qpn = conn_param->qpn;
++	cm_ctx->plen = conn_param->private_data_len;
++	if (cm_ctx->plen > IBSCIF_MAX_PDATA_SIZE) {
++		printk(KERN_ALERT PFX "%s: plen (%d) exceeds the limit (%d), truncated.\n",
++				__func__, cm_ctx->plen, IBSCIF_MAX_PDATA_SIZE);
++		cm_ctx->plen = IBSCIF_MAX_PDATA_SIZE;
++	}
++	if (cm_ctx->plen)
++		memcpy(cm_ctx->pdata, conn_param->private_data, cm_ctx->plen);
++
++	err = ibscif_send_cm_req( cm_ctx );
++
++	return err;
++
++out_free:
++	kfree(cm_ctx);
++	return err;
++}
++
++int ibscif_cm_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
++{
++	struct ibscif_cm *cm_ctx = cm_id->provider_data;
++	int err = 0;
++
++	cm_id->add_ref(cm_id);
++	cm_ctx->cm_id = cm_id;
++	cm_ctx->qpn = conn_param->qpn;
++	cm_ctx->plen = conn_param->private_data_len;
++	if (cm_ctx->plen > IBSCIF_MAX_PDATA_SIZE) {
++		printk(KERN_ALERT PFX "%s: plen (%d) exceeds the limit (%d), truncated.\n",
++				__func__, cm_ctx->plen, IBSCIF_MAX_PDATA_SIZE);
++		cm_ctx->plen = IBSCIF_MAX_PDATA_SIZE;
++	}
++	if (cm_ctx->plen)
++		memcpy(cm_ctx->pdata, conn_param->private_data, cm_ctx->plen);
++
++	err = connect_qp( cm_ctx );
++	if (err) {
++		printk(KERN_ALERT PFX "%s: failed to modify QP into connected state\n", __func__);
++		goto err_out;
++	}
++
++	err = ibscif_send_cm_rep( cm_ctx );
++	if (err) {
++		printk(KERN_ALERT PFX "%s: failed to send REP\n", __func__);
++		goto err_out;
++	}
++
++	return 0;
++
++err_out:
++	cm_id->rem_ref(cm_id);
++	cm_ctx->cm_id = NULL;
++	put_cm(cm_ctx);
++	return err;
++}
++
++int ibscif_cm_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
++{
++	struct ibscif_cm *cm_ctx = cm_id->provider_data;
++	int err = 0;
++
++	err = ibscif_send_cm_rej( cm_ctx, pdata, pdata_len );
++
++	put_cm(cm_ctx);
++	return err;
++}
++
++int ibscif_cm_create_listen(struct iw_cm_id *cm_id, int backlog)
++{
++	struct ibscif_listen *listen;
++	struct sockaddr_in *local_addr = (struct sockaddr_in *) &cm_id->local_addr;
++
++	listen = kzalloc(sizeof *listen, GFP_KERNEL);
++	if (!listen) {
++		printk(KERN_ALERT PFX "%s: cannot allocate listen object\n", __func__);
++		return -ENOMEM;
++	}
++	
++	kref_init(&listen->kref); /* refcnt <- 1 */
++
++	listen->cm_id = cm_id;
++	listen->port = local_addr->sin_port;
++	cm_id->provider_data = listen;
++	cm_id->add_ref(cm_id);
++
++	spin_lock_bh(&listen_list_lock);
++	list_add(&listen->entry, &listen_list);
++	spin_unlock_bh(&listen_list_lock);
++
++	return 0;
++}
++
++int ibscif_cm_destroy_listen(struct iw_cm_id *cm_id)
++{
++	struct ibscif_listen *listen = cm_id->provider_data;
++
++	spin_lock_bh(&listen_list_lock);
++	list_del(&listen->entry);
++	spin_unlock_bh(&listen_list_lock);
++	cm_id->rem_ref(cm_id);
++	put_listen(listen);
++
++	return 0;
++}
++
++/* similar to ibscif_get_qp(), but differs in:
++ * (1) use the "irqsave" version of the lock functions to avoid the
++ *     kernel warnings about "local_bh_enable_ip";
++ * (2) don't hold the reference on success;
++ * (3) return NULL instead of error code on failure.
++ */
++struct ib_qp *ibscif_cm_get_qp(struct ib_device *ibdev, int qpn)
++{
++	struct ibscif_qp *qp;
++	unsigned long flags;
++
++	read_lock_irqsave(&wiremap_lock, flags);
++	qp = idr_find(&wiremap, qpn);
++	if (likely(qp) && unlikely(qp->magic != QP_MAGIC))
++		qp = NULL;
++	read_unlock_irqrestore(&wiremap_lock,flags);
++
++	return qp ? &qp->ibqp : NULL;
++}
++
++void ibscif_cm_add_ref(struct ib_qp *ibqp)
++{
++	struct ibscif_qp *qp;
++
++	if (likely(ibqp)) {
++		qp = to_qp(ibqp);
++		kref_get(&qp->ref);
++	}
++}
++
++void ibscif_cm_rem_ref(struct ib_qp *ibqp)
++{
++	struct ibscif_qp *qp;
++
++	if (likely(ibqp)) {
++		qp = to_qp(ibqp);
++		ibscif_put_qp(qp);
++	}
++}
++
++int ibscif_process_cm_skb(struct sk_buff *skb, struct ibscif_conn *conn)
++{
++	union ibscif_pdu *pdu = (union ibscif_pdu *)skb->data;
++	struct ibscif_cm *cm_ctx;
++	struct ibscif_listen *listen;
++	int cmd, qpn, status, plen, err, port;
++	u64 req_ctx, rep_ctx;
++
++	req_ctx	= __be64_to_cpu(pdu->cm.req_ctx);
++	rep_ctx	= __be64_to_cpu(pdu->cm.rep_ctx);
++	cmd	= __be32_to_cpu(pdu->cm.cmd);
++	port	= __be32_to_cpu(pdu->cm.port);
++	qpn	= __be32_to_cpu(pdu->cm.qpn);
++	status	= __be32_to_cpu(pdu->cm.status);
++	plen	= __be32_to_cpu(pdu->cm.plen);
++
++	switch (cmd) {
++	  case IBSCIF_CM_REQ:
++		cm_ctx = kzalloc(sizeof *cm_ctx, GFP_KERNEL);
++		if (!cm_ctx) {
++			printk(KERN_ALERT PFX "%s: cannot allocate cm_ctx\n", __func__);
++			return -ENOMEM;
++		}
++		kref_init(&cm_ctx->kref); /* refcnt <- 1 */
++		spin_lock_init(&cm_ctx->lock);
++
++		spin_lock_bh(&listen_list_lock);
++		list_for_each_entry(listen, &listen_list, entry) {
++			if (listen->port == port) {
++				cm_ctx->listen = listen;
++				get_listen(listen);
++			}
++		}
++		spin_unlock_bh(&listen_list_lock);
++
++		if (!cm_ctx->listen) {
++			printk(KERN_ALERT PFX "%s: no matching listener for connection request, port=%d\n", __func__, port);
++			put_cm(cm_ctx);
++			/* fix me: send CM_REJ */
++			return -EINVAL;
++		}
++
++		cm_ctx->cm_id = NULL;
++		cm_ctx->node_id = conn->dev->node_id;
++		cm_ctx->remote_node_id = conn->remote_node_id;
++		cm_ctx->local_addr = node_id_to_sockaddr_in(cm_ctx->node_id);
++		if (cm_ctx->listen)
++			cm_ctx->local_addr.sin_port = cm_ctx->listen->port;
++		cm_ctx->remote_addr = node_id_to_sockaddr_in(cm_ctx->remote_node_id);
++		cm_ctx->remote_qpn = qpn;
++		cm_ctx->plen = plen;
++		if (cm_ctx->plen > IBSCIF_MAX_PDATA_SIZE) {
++			printk(KERN_ALERT PFX "%s: plen (%d) exceeds the limit (%d), truncated.\n",
++					__func__, cm_ctx->plen, IBSCIF_MAX_PDATA_SIZE);
++			cm_ctx->plen = IBSCIF_MAX_PDATA_SIZE;
++		}
++		if (cm_ctx->plen)
++			memcpy(cm_ctx->pdata, pdu->cm.pdata, cm_ctx->plen);
++
++		cm_ctx->peer_context = req_ctx;
++		cm_ctx->conn = conn;
++		atomic_inc(&conn->refcnt);
++
++		event_connection_request(cm_ctx);
++		break;
++
++	  case IBSCIF_CM_REP:
++		cm_ctx = (struct ibscif_cm *)req_ctx;
++		cm_ctx->plen = plen;
++		memcpy(cm_ctx->pdata, pdu->cm.pdata, plen);
++		cm_ctx->remote_qpn = qpn;
++		cm_ctx->peer_context = rep_ctx;
++		err = connect_qp( cm_ctx );
++		if (!err) 
++			err = ibscif_send_cm_rtu(cm_ctx);
++		if (err)
++			printk(KERN_ALERT PFX "%s: failed to modify QP into connected state\n", __func__);
++		event_connection_reply(cm_ctx, err);
++		put_cm(cm_ctx);
++		break;
++
++	  case IBSCIF_CM_REJ:
++		cm_ctx = (struct ibscif_cm *)req_ctx;
++		cm_ctx->plen = plen;
++		memcpy(cm_ctx->pdata, pdu->cm.pdata, plen);
++		event_connection_reply(cm_ctx, status);
++		put_cm(cm_ctx);
++		break;
++
++	  case IBSCIF_CM_RTU:
++		cm_ctx = (struct ibscif_cm *)rep_ctx;
++		event_connection_established( cm_ctx );
++		put_cm(cm_ctx);
++		break;
++
++	  default:
++		printk(KERN_ALERT PFX "%s: invalid CM cmd: %d\n", __func__, pdu->cm.cmd);
++		break;
++	}
++
++	return 0;
++}
++
+diff -urN a7/drivers/infiniband/hw/scif/ibscif_cq.c a8/drivers/infiniband/hw/scif/ibscif_cq.c
+--- a7/drivers/infiniband/hw/scif/ibscif_cq.c	1969-12-31 16:00:00.000000000 -0800
++++ a8/drivers/infiniband/hw/scif/ibscif_cq.c	2015-02-23 10:14:37.483809663 -0800
+@@ -0,0 +1,313 @@
++/*
++ * Copyright (c) 2008 Intel Corporation.  All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above copyright
++ *        notice, this list of conditions and the following disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *        copyright notice, this list of conditions and the following
++ *        disclaimer in the documentation and/or other materials
++ *        provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include "ibscif_driver.h"
++
++static void ibscif_cq_tasklet(unsigned long cq_ptr)
++{
++	struct ibscif_cq *cq = (struct ibscif_cq *)cq_ptr;
++	cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
++}
++
++#ifdef	MOFED
++struct ib_cq *ibscif_create_cq(struct ib_device *ibdev, struct ib_cq_init_attr *attr,
++			      struct ib_ucontext *context, struct ib_udata *udata)
++#else
++struct ib_cq *ibscif_create_cq(struct ib_device *ibdev, int entries, int comp_vector,
++			      struct ib_ucontext *context, struct ib_udata *udata)
++#endif
++{
++	struct ibscif_dev *dev = to_dev(ibdev);
++	struct ibscif_cq *cq;
++	int nbytes, npages;
++	int err;
++#ifdef	MOFED
++	int entries = attr->cqe;
++#endif
++
++	if (entries < 1 || entries > MAX_CQ_SIZE)
++		return ERR_PTR(-EINVAL);
++
++	if (!atomic_add_unless(&dev->cq_cnt, 1, MAX_CQS))
++		return ERR_PTR(-EAGAIN);
++
++	cq = kzalloc(sizeof *cq, GFP_KERNEL);
++	if (!cq) {
++		atomic_dec(&dev->cq_cnt);
++		return ERR_PTR(-ENOMEM);
++	}
++
++	spin_lock_init(&cq->lock);
++	tasklet_init(&cq->tasklet, ibscif_cq_tasklet, (unsigned long)cq);
++	cq->state = CQ_READY;
++
++	nbytes = PAGE_ALIGN(entries * sizeof *cq->wc);
++	npages = nbytes >> PAGE_SHIFT;
++
++	err = ibscif_reserve_quota(&npages);
++	if (err)
++		goto out;
++
++	cq->wc = vzalloc(nbytes); /* Consider using vmalloc_user */
++	if (!cq->wc) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	cq->ibcq.cqe = nbytes / sizeof *cq->wc;
++
++	return &cq->ibcq;
++out:
++	ibscif_destroy_cq(&cq->ibcq);
++	return ERR_PTR(err);
++}
++
++int ibscif_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
++{
++	struct ibscif_cq *cq = to_cq(ibcq);
++	struct ibscif_wc *old_wc, *new_wc;
++	int nbytes, old_npages, new_npages, i, err;
++
++	if (cqe < 1 || cqe > MAX_CQ_SIZE)
++		return -EINVAL;
++
++	nbytes = PAGE_ALIGN(cqe * sizeof *cq->wc);
++	new_npages = nbytes >> PAGE_SHIFT;
++	old_npages = PAGE_ALIGN(ibcq->cqe * sizeof *cq->wc) >> PAGE_SHIFT;
++	new_npages -= old_npages;
++
++	if (new_npages == 0)
++		return 0;
++
++	if (new_npages > 0) {
++		err = ibscif_reserve_quota(&new_npages);
++		if (err)
++			return err;
++	}
++
++	new_wc = vzalloc(nbytes); /* Consider using vmalloc_user */
++	if (!new_wc) {
++		err = -ENOMEM;
++		goto out1;
++	}
++	cqe = nbytes / sizeof *cq->wc;
++	old_wc = cq->wc;
++
++	spin_lock_bh(&cq->lock);
++
++	if (cqe < cq->depth) {
++		err = -EBUSY;
++		goto out2;
++	}
++
++	for (i = 0; i < cq->depth; i++) {
++		new_wc[i] = old_wc[cq->head];
++		cq->head = (cq->head + 1) % ibcq->cqe;
++	}
++
++	cq->wc	  = new_wc;
++	cq->head  = 0;
++	cq->tail  = cq->depth;
++	ibcq->cqe = cqe;
++
++	spin_unlock_bh(&cq->lock);
++
++	if (old_wc)
++		vfree(old_wc);
++	if (new_npages < 0)
++		ibscif_release_quota(-new_npages);
++
++	return 0;
++out2:
++	spin_unlock_bh(&cq->lock);
++	vfree(new_wc);
++out1:
++	if (new_npages > 0)
++		ibscif_release_quota(new_npages);
++	return err;
++}
++
++int ibscif_destroy_cq(struct ib_cq *ibcq)
++{
++	struct ibscif_dev *dev = to_dev(ibcq->device);
++	struct ibscif_cq *cq = to_cq(ibcq);
++
++	tasklet_kill(&cq->tasklet);
++
++	if (cq->wc)
++		vfree(cq->wc);
++
++	ibscif_release_quota(PAGE_ALIGN(ibcq->cqe * sizeof *cq->wc) >> PAGE_SHIFT);
++
++	atomic_dec(&dev->cq_cnt);
++
++	kfree(cq);
++	return 0;
++}
++
++int ibscif_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
++{
++	struct ibscif_cq *cq = to_cq(ibcq);
++	struct ibscif_wq *wq;
++	int i, reap;
++
++	/*
++	 * The protocol layer holds WQ lock while processing a packet and acquires
++	 * the CQ lock to append a work completion.  To avoid a deadly embrace, do
++	 * not hold the CQ lock when adjusting the WQ reap count.
++	 */
++	for (i = 0; (i < num_entries) && cq->depth; i++) {
++
++		spin_lock_bh(&cq->lock);
++		entry[i] = cq->wc[cq->head].ibwc;
++		reap = cq->wc[cq->head].reap;
++		cq->depth--;
++		wq = cq->wc[cq->head].wq;
++		cq->head = (cq->head + 1) % ibcq->cqe;
++		spin_unlock_bh(&cq->lock);
++
++		/* WQ may no longer exist or has been flushed. */ 
++		if (wq) {
++			spin_lock_bh(&wq->lock);
++			wq->head = (wq->head + reap) % wq->size;
++			wq->depth -= reap;
++			wq->completions -= reap;
++			spin_unlock_bh(&wq->lock);
++		}
++	}
++
++	return i;
++}
++
++int ibscif_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify)
++{
++	struct ibscif_cq *cq = to_cq(ibcq);
++	int ret;
++
++	spin_lock_bh(&cq->lock);
++
++	cq->arm |= notify & IB_CQ_SOLICITED_MASK;
++
++	if (notify & IB_CQ_SOLICITED)
++		cq->solicited = 0;
++
++	ret = (notify & IB_CQ_REPORT_MISSED_EVENTS) && cq->depth;
++
++	spin_unlock_bh(&cq->lock);
++
++	return ret;
++}
++
++void ibscif_notify_cq(struct ibscif_cq *cq)
++{
++	if (!cq->arm || !cq->depth)
++		return;
++
++	spin_lock_bh(&cq->lock);
++	if ((cq->arm & IB_CQ_NEXT_COMP) || ((cq->arm & IB_CQ_SOLICITED) && cq->solicited)) {
++		cq->arm = 0;	/* Disarm the CQ */
++		spin_unlock_bh(&cq->lock);
++		tasklet_hi_schedule(&cq->tasklet);
++	} else
++		spin_unlock_bh(&cq->lock);
++}
++
++void ibscif_clear_cqes(struct ibscif_cq *cq, struct ibscif_wq *wq)
++{
++	struct ibscif_wc *wc;
++	int i, j;
++
++	if (!cq)
++		return;
++
++	/*
++	 * Walk the CQ work completions and clear pointers to the
++	 * given WQ to prevent retiring WQEs when CQEs are polled.
++	 */
++	spin_lock_bh(&cq->lock);
++	j = cq->head;
++	for (i = 0; i < cq->depth; i++) {
++		wc = &cq->wc[j];
++		if (wc->wq == wq)
++			wc->wq = NULL;
++		j = (j + 1) % cq->ibcq.cqe;
++	}
++	spin_unlock_bh(&cq->lock);
++}
++
++/*
++ * Acquire lock and reserve a completion queue entry.
++ * Note that cq->lock is held upon successful completion of this call.
++ * On error, WQs affiliated with this CQ should generate an event and
++ * transition to the error state; refer to IB Spec r1.2 C11-39 and C11-40.
++ */
++int ibscif_reserve_cqe(struct ibscif_cq *cq, struct ibscif_wc **wc)
++{
++	spin_lock_bh(&cq->lock);
++
++	if (cq->state != CQ_READY) {
++		spin_unlock_bh(&cq->lock);
++		return -EIO;
++	}
++	if (!cq->ibcq.cqe) {
++		spin_unlock_bh(&cq->lock);
++		return -ENOSPC;
++	}
++	if (cq->depth == cq->ibcq.cqe) {
++		cq->state = CQ_ERROR;
++		spin_unlock_bh(&cq->lock);
++
++		if (cq->ibcq.event_handler) {
++			struct ib_event record;
++			record.event	  = IB_EVENT_CQ_ERR;
++			record.device	  = cq->ibcq.device;
++			record.element.cq = &cq->ibcq;
++			cq->ibcq.event_handler(&record, cq->ibcq.cq_context);
++		}
++		return -ENOBUFS;
++	}
++
++	*wc = &cq->wc[cq->tail];
++
++	return 0;
++}
++
++/*
++ * Append a completion queue entry and release lock.
++ * Note that this function assumes that the cq->lock is currently held.
++ */
++void ibscif_append_cqe(struct ibscif_cq *cq, struct ibscif_wc *wc, int solicited)
++{
++	cq->solicited = !!(solicited || (wc->ibwc.status != IB_WC_SUCCESS));
++	cq->tail = (cq->tail + 1) % cq->ibcq.cqe;
++	cq->depth++;
++
++	spin_unlock_bh(&cq->lock);
++}
+diff -urN a7/drivers/infiniband/hw/scif/ibscif_driver.h a8/drivers/infiniband/hw/scif/ibscif_driver.h
+--- a7/drivers/infiniband/hw/scif/ibscif_driver.h	1969-12-31 16:00:00.000000000 -0800
++++ a8/drivers/infiniband/hw/scif/ibscif_driver.h	2015-02-23 10:14:37.483809663 -0800
+@@ -0,0 +1,787 @@
++/*
++ * Copyright (c) 2008 Intel Corporation.  All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above copyright
++ *        notice, this list of conditions and the following disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *        copyright notice, this list of conditions and the following
++ *        disclaimer in the documentation and/or other materials
++ *        provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#ifndef IBSCIF_DRIVER_H
++#define IBSCIF_DRIVER_H
++
++#include <linux/module.h>
++#include <linux/idr.h>		/* for idr routines	*/
++#include <linux/kthread.h>	/* for kthread routines	*/
++#include <linux/highmem.h>	/* for kmap_atomic	*/
++#include <linux/pkt_sched.h>	/* for TC_PRIO_CONTROL	*/
++#include <linux/if_arp.h>	/* for ARPHRD_ETHER	*/
++#include <linux/swap.h>		/* for totalram_pages	*/
++#include <linux/proc_fs.h>	/* for proc_mkdir	*/
++#include <linux/version.h>	/* for LINUX_VERSION_CODE */
++#include <linux/poll.h>
++#include <linux/workqueue.h>
++#include <linux/semaphore.h>
++
++/* these macros are defined in "linux/semaphore.h".
++ * however, they may be missing on older systems.
++ */
++#ifndef DECLARE_MUTEX
++#define DECLARE_MUTEX(name) \
++	struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1)
++#endif
++
++#ifndef init_MUTEX
++#define init_MUTEX(sem)         sema_init(sem, 1)
++#endif
++
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,4,0)
++  #include <linux/interrupt.h>
++
++  #define KMAP_ATOMIC(x,y) kmap_atomic(x)
++  #define KUNMAP_ATOMIC(x,y) kunmap_atomic(x)
++#else
++  #define KMAP_ATOMIC(x,y) kmap_atomic(x, y)
++  #define KUNMAP_ATOMIC(x,y) kunmap_atomic(x, y)
++#endif
++
++#include <rdma/ib_umem.h>
++#include <rdma/ib_verbs.h>
++#include <rdma/ib_user_verbs.h>
++#include <rdma/iw_cm.h>
++
++#include <modules/scif.h>
++#include "ibscif_protocol.h"
++
++#define IBSCIF_MTU	4096
++
++#define IBSCIF_EP_TYPE_LISTEN	0
++#define IBSCIF_EP_TYPE_COMM	1
++
++#define DRV_NAME	"ibscif"
++#define PFX		DRV_NAME ": "
++#define	IBDEV_PFX	DRV_NAME ""
++#define DRV_DESC	"OpenFabrics IBSCIF Driver"
++#define DRV_VERSION	"0.1"
++#define DRV_SIGNON	DRV_DESC " v" DRV_VERSION
++#define DRV_BUILD	" built " __DATE__ " " __TIME__
++
++#define UVERBS_ABI_VER	6
++#define VENDOR_ID	0x8086	/* Intel Corporation */
++#define DEVICE_ID	0
++#define HW_REV		1
++#define FW_REV		IBSCIF_PROTOCOL_VER
++
++/*
++ * Attribute limits.
++ * These limits are imposed on client requests, however, the actual values
++ * returned may be larger than these limits on some objects due to rounding.
++ * The definitions are intended to show the thinking behind the values.
++ * E.g., MAX_PDS defined as MAX_QPS is intended to allow each QP to be
++ * on a separate PD, although that is not a usage requirement.
++ */
++#define	MAX_QPS		(64 * 1024)
++#define	MAX_QP_SIZE	(16 * 1024)
++#define	MAX_CQS		(MAX_QPS * 2)	  /* x2:send queues + recv queues */
++#define	MAX_CQ_SIZE	(MAX_QP_SIZE * 4) /* or combined		  */ 
++#define	MAX_PDS		MAX_QPS		  /* 1 per QP			  */
++#if 0
++#define	MAX_MRS		(MAX_QPS * 4)	  /* x4:local/remote,read/write	  */
++#else
++#define	MAX_MRS		16383	  	  /* limited by IBSCIF_MR_MAX_KEY */
++#endif
++#define	MAX_MR_SIZE	(2U * 1024 * 1024 * 1024)
++#define	MAX_SGES	(PAGE_SIZE / sizeof(struct ib_sge))
++#define	MAX_OR		(MAX_QP_SIZE / 2) /* half outbound reqs		  */
++#define	MAX_IR		MAX_OR	/* balance inbound with outbound	  */
++
++extern int window_size;
++#define MIN_WINDOW_SIZE	4	/* Ack every window_size/MIN_WINDOW_SIZE packets */
++
++extern int rma_threshold;
++extern int fast_rdma;
++extern int blocking_send;
++extern int blocking_recv;
++extern int scif_loopback;
++extern int host_proxy;
++extern int new_ib_type;
++extern int verbose;
++extern int check_grh;
++
++extern struct list_head devlist;
++extern struct semaphore devlist_mutex;
++
++extern struct idr wiremap;
++extern rwlock_t wiremap_lock;
++
++extern struct ib_dma_mapping_ops ibscif_dma_mapping_ops;
++
++/* Match IB opcodes for copy in post_send; append driver specific values. */
++enum ibscif_wr_opcode {
++	WR_SEND			= IB_WR_SEND,
++	WR_SEND_WITH_IMM	= IB_WR_SEND_WITH_IMM,
++	WR_RDMA_WRITE		= IB_WR_RDMA_WRITE,
++	WR_RDMA_WRITE_WITH_IMM	= IB_WR_RDMA_WRITE_WITH_IMM,
++	WR_RDMA_READ		= IB_WR_RDMA_READ,
++	WR_ATOMIC_CMP_AND_SWP	= IB_WR_ATOMIC_CMP_AND_SWP,
++	WR_ATOMIC_FETCH_AND_ADD = IB_WR_ATOMIC_FETCH_AND_ADD,
++	WR_RDMA_READ_RSP,
++	WR_ATOMIC_RSP,
++	WR_RMA_RSP,
++	WR_UD,
++	NR_WR_OPCODES		/* Must be last (for stats) */
++};
++
++struct ibscif_stats {
++	unsigned long	packets_sent;
++	unsigned long	packets_rcvd;
++	unsigned long	bytes_sent;
++	unsigned long	bytes_rcvd;
++	unsigned long	duplicates;
++	unsigned long	tx_errors;
++	unsigned long	sched_exhaust;
++	unsigned long	unavailable;
++	unsigned long	loopback;
++	unsigned long	recv;
++	unsigned long	recv_imm;
++	unsigned long	wr_opcode[NR_WR_OPCODES];
++	unsigned long	fast_rdma_write;
++	unsigned long	fast_rdma_read;
++	unsigned long	fast_rdma_unavailable;
++	unsigned long	fast_rdma_fallback;
++	unsigned long	fast_rdma_force_ack;
++	unsigned long	fast_rdma_tail_write;
++};
++
++#define	DEV_STAT(dev, counter)	dev->stats.counter
++
++#define IBSCIF_MAX_DEVICES	16
++#define IBSCIF_NAME_SIZE	12
++
++#define IBSCIF_NODE_ID_TO_LID(node_id)	(node_id+1000)
++#define IBSCIF_LID_TO_NODE_ID(lid)	(lid-1000)
++
++struct ibscif_conn {
++	struct list_head	entry;
++	atomic_t		refcnt;
++	scif_epd_t 		ep;
++	unsigned short		remote_node_id;
++	union ib_gid		remote_gid;
++	struct ibscif_dev	*dev;
++	int			local_close;
++	int			remote_close;
++};
++
++struct ibscif_listen {
++	struct iw_cm_id		*cm_id;
++	struct list_head	entry;
++	struct kref		kref;
++	int			port;
++};
++
++#define IBSCIF_MAX_PDATA_SIZE	256
++struct ibscif_cm {
++	struct iw_cm_id		*cm_id;
++	struct ibscif_conn	*conn;
++	struct ibscif_listen	*listen;
++	struct kref		kref;
++	spinlock_t		lock;
++	struct sockaddr_in	local_addr;
++	struct sockaddr_in	remote_addr;
++	unsigned short		node_id;
++	unsigned short		remote_node_id;
++	u32			qpn;
++	u32			remote_qpn;
++	int			plen;
++	u8			pdata[IBSCIF_MAX_PDATA_SIZE];
++	u64			peer_context;
++};
++
++struct ibscif_dev {
++	struct ib_device	ibdev;
++	struct net_device	*netdev; 	/* for RDMA CM support */
++	struct list_head	entry;
++
++	char			name[IBSCIF_NAME_SIZE];
++	union ib_gid		gid;
++	unsigned short		node_id;
++	atomic_t		refcnt;
++	scif_epd_t 		listen_ep;
++	struct list_head 	conn_list;
++	struct list_head	mr_list;
++	struct semaphore	mr_list_mutex;
++
++	struct proc_dir_entry	*procfs;
++	struct ibscif_stats	stats;
++
++	atomic_t		pd_cnt;
++	atomic_t		cq_cnt;
++	atomic_t		qp_cnt;
++	atomic_t		mr_cnt;
++
++	atomic_t		available;
++	atomic_t		was_new;
++
++	spinlock_t		atomic_op;
++
++	struct semaphore	mutex;
++	struct list_head	wq_list;	/* List of WQ's on this device */
++};
++
++struct ibscif_pd {
++	struct ib_pd		ibpd;
++};
++
++struct ibscif_ah {
++	struct ib_ah		ibah;
++	__be16			dlid;
++};
++
++struct ibscif_wc {
++	struct ib_wc		ibwc;
++	int			reap;
++	struct ibscif_wq	*wq;
++};
++
++enum ibscif_cq_state {
++	CQ_READY,
++	CQ_ERROR
++};
++
++struct ibscif_cq {
++	struct ib_cq		ibcq;
++	spinlock_t		lock;
++	struct tasklet_struct	tasklet;
++	enum ibscif_cq_state	state;
++	enum ib_cq_notify_flags	arm;
++	int			solicited;
++	int			head;
++	int			tail;
++	int			depth;
++	struct ibscif_wc	*wc;
++};
++
++struct ibscif_ds {
++	struct ibscif_mr	*mr;
++	u32			offset;
++	u32			length;
++	u32			lkey;
++	u32			in_use;
++	struct ibscif_mreg_info	*current_mreg;
++};
++
++struct ibscif_segmentation {
++	struct ibscif_ds	*current_ds;
++	u32			current_page_index;
++	u32			current_page_offset;
++	u32			wr_length_remaining;
++	u32			ds_length_remaining;
++	u32			starting_seq;
++	u32			next_seq;
++	u32			ending_seq;
++};
++
++struct ibscif_reassembly {
++	struct ibscif_ds	*current_ds;
++	u32			current_ds_offset;
++	u32			last_packet_seq;
++	u32			last_seen_seq;
++	__be32			immediate_data;
++	int			final_length;
++	u16			opcode;
++};
++
++struct ibscif_sar {
++	struct ibscif_segmentation seg;
++	struct ibscif_reassembly  rea;
++};
++
++enum ibscif_wr_state {
++	WR_WAITING,
++	WR_STARTED,
++	WR_WAITING_FOR_ACK,
++	WR_WAITING_FOR_RSP,
++	WR_LAST_SEEN,
++	WR_COMPLETED
++};
++
++struct ibscif_wr {
++	u64			id;
++	enum ibscif_wr_opcode	opcode;
++	int			length;
++	enum ib_send_flags	flags;
++
++	u32			msg_id;
++	enum ibscif_wr_state	state;
++	struct ibscif_sar	sar;
++	u32			use_rma;
++	u32			rma_id;
++
++	union {
++		struct ibscif_send {
++			u32		immediate_data;
++		} send;
++
++		struct ibscif_ud {
++			u16		remote_node_id;
++			u32		remote_qpn;
++		} ud;
++
++		struct ibscif_read {
++			u64		remote_address;
++			int		remote_length;
++			u32		rkey;
++		} read;
++
++		struct ibscif_write {
++			u64		remote_address;
++			u32		rkey;
++			u32		immediate_data;
++		} write;
++
++		struct ibscif_cmp_swp {
++			u64		cmp_operand;
++			u64		swp_operand;
++			u64		remote_address;
++			u32		rkey;
++		} cmp_swp;
++
++		struct ibscif_fetch_add {
++			u64		add_operand;
++			u64		remote_address;
++			u32		rkey;
++		} fetch_add;
++
++		struct ibscif_atomic_rsp {
++			u64		orig_data;
++			u16		opcode;
++		} atomic_rsp;
++
++		struct ibscif_rma_rsp {
++			u32		xfer_length;
++			u32		error;
++		} rma_rsp;
++	};
++
++	u32			num_ds;
++	struct ibscif_ds		ds_list[0];	/* Must be last */
++};
++
++struct ibscif_tx_state {
++	u32			next_seq;
++	u32			last_ack_seq_recvd;
++	u32			next_msg_id;
++};
++
++struct ibscif_rx_state {
++	u32			last_in_seq;
++	u32			last_seq_acked;
++	int			defer_in_process;
++};
++
++struct ibscif_wirestate {
++	struct ibscif_tx_state	tx;
++	struct ibscif_rx_state	rx;
++};
++
++struct ibscif_wire {
++	struct ibscif_wirestate	sq;
++	struct ibscif_wirestate	iq;
++};
++
++struct ibscif_wq {
++	struct list_head	entry;
++	struct ibscif_qp	*qp;
++	spinlock_t		lock;
++	struct ibscif_wr	*wr;
++	int			head;
++	int			tail;
++	int			depth;
++	int			size;
++	int			max_sge;
++	int			wr_size;
++	int			completions;
++	int			reap;
++	int			next_wr;
++	int			next_msg_id;
++	struct ibscif_wirestate	*wirestate;
++	int			fast_rdma_completions;
++	int			ud_msg_id;
++};
++
++enum ibscif_qp_state {
++	QP_IDLE,
++	QP_CONNECTED,
++	QP_DISCONNECT,
++	QP_ERROR,
++	QP_RESET,
++	QP_IGNORE,
++	NR_QP_STATES		/* Must be last */
++};
++
++enum ibscif_schedule {
++	SCHEDULE_RESUME	 = 1 << 0,
++	SCHEDULE_RETRY	 = 1 << 1,
++	SCHEDULE_TIMEOUT = 1 << 2,
++	SCHEDULE_SQ	 = 1 << 6,
++	SCHEDULE_IQ	 = 1 << 7
++};
++
++struct ibscif_qp {
++	int			magic;		/* Must be first */
++#	define QP_MAGIC		0x5b51505d	/*    "[QP]"     */
++	struct kref		ref;
++	struct completion	done;
++	struct ib_qp		ibqp;
++	struct ibscif_dev	*dev;
++	enum ib_access_flags	access;
++	enum ib_sig_type	sq_policy;
++	enum ibscif_schedule	schedule;
++	struct ibscif_wire	wire;
++	int			mtu;
++
++	int			max_or;
++	atomic_t		or_depth;
++	atomic_t		or_posted;
++
++	struct semaphore	modify_mutex;
++	spinlock_t		lock;
++	enum ibscif_qp_state	state;
++	u16			local_node_id;
++	u16			remote_node_id;
++	struct ibscif_conn	*conn;
++	u32			remote_qpn;
++	int			loopback;
++	struct ibscif_wq	sq;
++	struct ibscif_wq	rq;
++	struct ibscif_wq	iq;
++	int			in_scheduler;
++
++	struct ibscif_conn	*ud_conn[IBSCIF_MAX_DEVICES];
++	struct ibscif_cm	*cm_context;
++};
++
++#define	is_sq(wq)		(wq == &wq->qp->sq)
++#define	is_rq(wq)		(wq == &wq->qp->rq)
++#define	is_iq(wq)		(wq == &wq->qp->iq)
++
++/* Info about MR registered via SCIF API */
++struct ibscif_mreg_info {
++	struct list_head	entry;
++	struct ibscif_conn	*conn;
++	u64			offset;
++	u64			aligned_offset;
++	u32			aligned_length;
++};
++
++struct ibscif_mr {
++	int			magic;		/* Must be first */
++#	define MR_MAGIC		0x5b4d525d	/*    "[MR]"     */
++	struct list_head	entry;
++	struct kref		ref;
++	struct completion	done;
++	struct ib_mr		ibmr;
++	struct ib_umem		*umem;
++	enum ib_access_flags	access;
++	u64			addr;
++	u32			length;
++	int			npages;
++	struct page		**page;
++	scif_pinned_pages_t 	pinned_pages;
++	struct list_head	mreg_list;
++};
++
++/* Canonical virtual address on X86_64 falls in the range 0x0000000000000000-0x00007fffffffffff
++ * and 0xffff800000000000-0xffffffffffffffff. The range 0x0000800000000000-0xffff7fffffffffff
++ * are unused. This basically means only 48 bits are used and the highest 16 bits are just sign
++ * extensions. We can put rkey into these 16 bits and use the result as the "offset" of SCIF's 
++ * registered address space. By doing this, the SCIF_MAP_FIXED flag can be used so that the offset
++ * can be calculated directly from rkey and virtual address w/o using the "remote registration cache" 
++ * mechanism.
++ *
++ * SCIF reserve the top 2 bits of the offset for internal uses, leaving 14 bits for rkey. 
++ */
++#define IBSCIF_MR_MAX_KEY	(0x3FFF)
++#define IBSCIF_MR_VADDR_MASK	(0x0000FFFFFFFFFFFFUL)
++#define IBSCIF_MR_SIGN_MASK	(0x0000800000000000UL)
++#define IBSCIF_MR_SIGN_EXT	(0xFFFF000000000000UL)
++#define IBSCIF_MR_RKEY_MASK	(0x3FFF000000000000UL)
++
++#define IBSCIF_MR_VADDR_TO_OFFSET(rkey, vaddr)	((((unsigned long)rkey) << 48) | \
++						 (vaddr & IBSCIF_MR_VADDR_MASK))
++
++#define IBSCIF_MR_OFFSET_TO_VADDR(offset)	((offset & IBSCIF_MR_SIGN_MASK) ? \
++						 (offset | IBSCIF_MR_SIGN_EXT) : \
++						 (offset & IBSCIF_MR_VADDR_MASK))
++
++#define IBSCIF_MR_OFFSET_TO_RKEY(offset)	((offset & IBSCIF_MR_RKEY_MASK) >> 48)
++
++#define	TO_OBJ(name, src, dst, field)				\
++static inline struct dst *name(struct src *field)		\
++{								\
++	return container_of(field, struct dst, field);		\
++}
++TO_OBJ(to_dev, ib_device, ibscif_dev, ibdev)
++TO_OBJ(to_pd, ib_pd, ibscif_pd, ibpd)
++TO_OBJ(to_cq, ib_cq, ibscif_cq, ibcq)
++TO_OBJ(to_qp, ib_qp, ibscif_qp, ibqp)
++TO_OBJ(to_mr, ib_mr, ibscif_mr, ibmr)
++TO_OBJ(to_ah, ib_ah, ibscif_ah, ibah)
++
++#define OBJ_GET(obj, type)					\
++static inline struct ibscif_##obj *ibscif_get_##obj(int id)	\
++{								\
++	struct ibscif_##obj *obj;				\
++	read_lock_bh(&wiremap_lock);				\
++	obj = idr_find(&wiremap, id);				\
++	if (likely(obj)) {					\
++		if (likely(obj->magic == type))			\
++			kref_get(&obj->ref);			\
++		else						\
++			obj = ERR_PTR(-ENXIO);			\
++	} else							\
++		obj = ERR_PTR(-ENOENT);				\
++	read_unlock_bh(&wiremap_lock);				\
++	return obj;						\
++}
++OBJ_GET(mr, MR_MAGIC)
++OBJ_GET(qp, QP_MAGIC)
++
++void ibscif_complete_mr(struct kref *kref);
++void ibscif_complete_qp(struct kref *kref);
++
++#define OBJ_PUT(obj)						\
++static inline void ibscif_put_##obj(struct ibscif_##obj *obj)	\
++{								\
++	if (likely(obj))					\
++		kref_put(&obj->ref, ibscif_complete_##obj);	\
++}
++OBJ_PUT(mr)
++OBJ_PUT(qp)
++
++#define RHEL61_AND_ABOVE 0
++#if defined(RHEL_MAJOR) && defined(RHEL_MINOR)
++#if (RHEL_MAJOR==6) && (RHEL_MINOR>0)
++#undef RHEL61_AND_ABOVE
++#define RHEL61_AND_ABOVE 1
++#endif
++#endif
++
++#if (LINUX_VERSION_CODE<KERNEL_VERSION(2,6,37)) && ! RHEL61_AND_ABOVE
++static inline void *vzalloc(unsigned long size)
++{
++	void *addr = vmalloc(size);
++	if (addr)
++		memset(addr, 0, size);
++	return addr;
++}
++#endif
++
++/* This function assumes the WQ is protected by a lock. */
++static inline struct ibscif_wr *ibscif_get_wr(struct ibscif_wq *wq, int index)
++{
++	/* Must calculate because WQ array elements are variable sized. */
++	return (struct ibscif_wr *)((void *)wq->wr + (wq->wr_size * index));
++}
++
++/* This function assumes the WQ is protected by a lock. */
++static inline void ibscif_append_wq(struct ibscif_wq *wq)
++{
++	wq->tail = (wq->tail + 1) % wq->size;
++	wq->depth++;
++	wq->next_msg_id++;
++}
++
++static inline void ibscif_clear_ds_ref(struct ibscif_ds *ds)
++{
++	if (ds->in_use) {
++		ds->in_use = 0;
++		ibscif_put_mr(ds->mr);
++	}
++}
++
++static inline void ibscif_clear_ds_refs(struct ibscif_ds *ds, int num_ds)
++{
++	while(num_ds--)
++		ibscif_clear_ds_ref(ds++);
++}
++
++static inline enum ib_wc_opcode to_ib_wc_opcode(enum ib_wr_opcode opcode)
++{
++	/* SQ only - RQ is either IB_WC_RECV or IB_WC_RECV_RDMA_WITH_IMM. */
++	switch (opcode) {
++	case IB_WR_RDMA_WRITE:		 return IB_WC_RDMA_WRITE;
++	case IB_WR_RDMA_WRITE_WITH_IMM:	 return IB_WC_RDMA_WRITE;
++	case IB_WR_SEND:		 return IB_WC_SEND;
++	case IB_WR_SEND_WITH_IMM:	 return IB_WC_SEND;
++	case IB_WR_RDMA_READ:		 return IB_WC_RDMA_READ;
++	case IB_WR_ATOMIC_CMP_AND_SWP:	 return IB_WC_COMP_SWAP;
++	case IB_WR_ATOMIC_FETCH_AND_ADD: return IB_WC_FETCH_ADD;
++	default:			 return -1;
++	}
++}
++
++static inline void *ibscif_map_src(struct page *page)
++{
++	return KMAP_ATOMIC(page, KM_SOFTIRQ0);
++}
++
++static inline void *ibscif_map_dst(struct page *page)
++{
++	return KMAP_ATOMIC(page, KM_SOFTIRQ1);
++}
++
++static inline void ibscif_unmap_src(struct page *page, void *addr)
++{
++	if (likely(addr))
++		KUNMAP_ATOMIC(addr, KM_SOFTIRQ0);
++}
++
++static inline void ibscif_unmap_dst(struct page *page, void *addr)
++{
++	if (likely(addr))
++		KUNMAP_ATOMIC(addr, KM_SOFTIRQ1);
++	if (likely(page)) {
++		flush_dcache_page(page);
++		if (!PageReserved(page))
++			set_page_dirty(page);
++	}
++}
++
++#ifdef IBSCIF_PERF_TEST
++#define IBSCIF_PERF_SAMPLE(counter,next) ibscif_perf_sample(counter,next)
++#else
++#define IBSCIF_PERF_SAMPLE(counter,next)
++#endif
++
++int ibscif_atomic_copy(void *dst_addr, void *src_addr, u32 copy_len, int head_copied);
++
++int ibscif_wiremap_add(void *obj, int *id);
++void ibscif_wiremap_del(int id);
++
++int ibscif_dev_init(void);
++void ibscif_protocol_init_pre(void);
++void ibscif_protocol_init_post(void);
++
++void ibscif_dev_cleanup(void);
++void ibscif_protocol_cleanup(void);
++
++int ibscif_procfs_add_dev(struct ibscif_dev *dev);
++void ibscif_procfs_remove_dev(struct ibscif_dev *dev);
++
++int ibscif_reserve_quota(int *npages);
++void ibscif_release_quota(int npages);
++
++void ibscif_scheduler_add_qp(struct ibscif_qp *qp);
++void ibscif_scheduler_remove_qp(struct ibscif_qp *qp);
++void ibscif_schedule(struct ibscif_wq *wq);
++
++struct ib_ah *ibscif_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr);
++int ibscif_destroy_ah(struct ib_ah *ibah);
++
++struct ib_pd *ibscif_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata);
++int ibscif_dealloc_pd(struct ib_pd *ibpd);
++
++struct ib_qp *ibscif_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *attr, struct ib_udata *udata);
++int ibscif_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr);
++int ibscif_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata);
++int ibscif_destroy_qp(struct ib_qp *ibqp);
++void ibscif_qp_internal_disconnect(struct ibscif_qp *qp, enum ibscif_reason reason);
++void ibscif_qp_remote_disconnect(struct ibscif_qp *qp, enum ibscif_reason reason);
++void ibscif_qp_add_ud_conn(struct ibscif_qp *qp, struct ibscif_conn *conn);
++
++#ifdef	MOFED
++struct ib_cq *ibscif_create_cq(struct ib_device *ibdev, struct ib_cq_init_attr *attr,
++			      struct ib_ucontext *context, struct ib_udata *udata);
++#else
++struct ib_cq *ibscif_create_cq(struct ib_device *ibdev, int entries, int comp_vector,
++			      struct ib_ucontext *context, struct ib_udata *udata);
++#endif
++int ibscif_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
++int ibscif_destroy_cq(struct ib_cq *ibcq);
++int ibscif_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
++int ibscif_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify);
++void ibscif_notify_cq(struct ibscif_cq *cq);
++void ibscif_clear_cqes(struct ibscif_cq *cq, struct ibscif_wq *wq);
++int ibscif_reserve_cqe(struct ibscif_cq *cq, struct ibscif_wc **wc);
++void ibscif_append_cqe(struct ibscif_cq *cq, struct ibscif_wc *wc, int solicited);
++
++struct ib_mr *ibscif_get_dma_mr(struct ib_pd *ibpd, int access);
++struct ib_mr *ibscif_reg_phys_mr(struct ib_pd *ibpd, struct ib_phys_buf *phys_buf_array,
++				int num_phys_buf, int access, u64 *iova_start);
++#ifdef	MOFED
++struct ib_mr *ibscif_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
++				u64 virt_addr, int access, struct ib_udata *udata, int mr_id);
++#else
++struct ib_mr *ibscif_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
++				u64 virt_addr, int access, struct ib_udata *udata);
++#endif
++int ibscif_dereg_mr(struct ib_mr *ibmr);
++struct ibscif_mr *ibscif_validate_mr(u32 key, u64 addr, int length,
++				   struct ib_pd *ibpd, enum ib_access_flags access);
++struct ibscif_mreg_info *ibscif_mr_get_mreg(struct ibscif_mr *mr, struct ibscif_conn *conn);
++void ibscif_refresh_mreg( struct ibscif_conn *conn );
++
++int ibscif_post_send(struct ib_qp *ibqp, struct ib_send_wr *ibwr, struct ib_send_wr **bad_wr);
++int ibscif_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ibwr, struct ib_recv_wr **bad_wr);
++
++void ibscif_send_disconnect(struct ibscif_qp *qp, enum ibscif_reason reason);
++void ibscif_send_close(struct ibscif_conn *conn);
++void ibscif_send_reopen(struct ibscif_conn *conn);
++
++void ibscif_loopback_disconnect(struct ibscif_qp *qp, enum ibscif_reason reason);
++void ibscif_loopback(struct ibscif_wq *sq);
++
++int ibscif_xmit_wr(struct ibscif_wq *wq, struct ibscif_wr *wr, int tx_limit, int retransmit,
++		  u32 from_seq, u32 *posted);
++int ibscif_process_sq_completions(struct ibscif_qp *qp);
++
++struct ibscif_conn *ibscif_get_conn( int node_id, int remote_node_id, int find_local_peer );
++void ibscif_put_conn( struct ibscif_conn *conn );
++void ibscif_do_accept(struct ibscif_dev *dev);
++void ibscif_get_pollep_list(struct scif_pollepd *polleps, struct ibscif_dev **devs,
++			  int *types, struct ibscif_conn **conns, int *count);
++void ibscif_refresh_pollep_list(void);
++void ibscif_get_ep_list(scif_epd_t *eps, int *count);
++void ibscif_remove_ep(struct ibscif_dev *dev, scif_epd_t ep);
++void ibscif_free_conn(struct ibscif_conn *conn);
++int  ibscif_cleanup_idle_conn( void );
++void ibscif_perf_sample(int counter, int next);
++
++int ibscif_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
++int ibscif_cm_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
++int ibscif_cm_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
++int ibscif_cm_create_listen(struct iw_cm_id *cm_id, int backlog);
++int ibscif_cm_destroy_listen(struct iw_cm_id *cm_id);
++struct ib_qp *ibscif_cm_get_qp(struct ib_device *ibdev, int qpn);
++void ibscif_cm_add_ref(struct ib_qp *ibqp);
++void ibscif_cm_rem_ref(struct ib_qp *ibqp);
++void ibscif_cm_async_callback(void *cm_context);
++int ibscif_process_cm_skb(struct sk_buff *skb, struct ibscif_conn *conn);
++int ibscif_send_cm_req(struct ibscif_cm *cm_ctx);
++int ibscif_send_cm_rep(struct ibscif_cm *cm_ctx);
++int ibscif_send_cm_rej(struct ibscif_cm *cm_ctx, const void *pdata, u8 plen);
++int ibscif_send_cm_rtu(struct ibscif_cm *cm_ctx);
++
++#endif /* IBSCIF_DRIVER_H */
+diff -urN a7/drivers/infiniband/hw/scif/ibscif_loopback.c a8/drivers/infiniband/hw/scif/ibscif_loopback.c
+--- a7/drivers/infiniband/hw/scif/ibscif_loopback.c	1969-12-31 16:00:00.000000000 -0800
++++ a8/drivers/infiniband/hw/scif/ibscif_loopback.c	2015-02-23 10:14:37.484809663 -0800
+@@ -0,0 +1,582 @@
++/*
++ * Copyright (c) 2008 Intel Corporation.  All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above copyright
++ *        notice, this list of conditions and the following disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *        copyright notice, this list of conditions and the following
++ *        disclaimer in the documentation and/or other materials
++ *        provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include "ibscif_driver.h"
++
++struct ibscif_seg {
++	enum ib_access_flags	access;
++	struct ibscif_ds		*ds;
++	struct ibscif_mr		*mr;
++	struct page		**page;
++	void			*addr;
++	u32			offset;
++	u32			ds_len;
++	u32			pg_len;
++	void			*(*map)(struct page *page);
++	void			(*unmap)(struct page *page, void *addr);
++};
++
++static void ibscif_seg_init(struct ibscif_seg *seg, struct ibscif_ds *ds,
++			   void *(*map)(struct page *page), void (*unmap)(struct page *page, void *addr),
++			   enum ib_access_flags access)
++{
++	memset(seg, 0, sizeof *seg);
++	seg->ds	    = ds;
++	seg->map    = map;
++	seg->unmap  = unmap;
++	seg->access = access;
++}
++
++static void ibscif_seg_fini(struct ibscif_seg *seg)
++{
++	seg->unmap(*seg->page, seg->addr);
++	if (likely(seg->mr))
++		ibscif_put_mr(seg->mr);
++}
++
++static int ibscif_seg_set(struct ibscif_seg *seg, u32 length, u32 copy_len)
++{
++	struct page **prev_page;
++
++	if (!seg->ds_len) {
++
++		if (seg->mr)
++			ibscif_put_mr(seg->mr);
++
++		seg->mr = ibscif_get_mr(seg->ds->lkey);
++		if (unlikely(IS_ERR(seg->mr)))
++			return PTR_ERR(seg->mr);
++
++		if (unlikely(seg->access && !(seg->mr->access & seg->access)))
++			return -EACCES;
++
++		prev_page    = seg->page;
++		seg->offset  = seg->ds->offset + (seg->mr->addr & ~PAGE_MASK);
++		seg->page    = &seg->mr->page[seg->offset >> PAGE_SHIFT];
++		seg->offset &= ~PAGE_MASK;
++		seg->ds_len  = seg->ds->length;
++		seg->pg_len  = min(seg->ds_len, (u32)PAGE_SIZE - seg->offset);
++		seg->pg_len  = min(seg->pg_len, length);
++
++		if (seg->page != prev_page)
++			seg->addr = seg->map(*seg->page) + seg->offset;
++
++		seg->ds++;
++
++	} else if (!seg->pg_len) {
++
++		seg->unmap(*seg->page, seg->addr);
++
++		seg->page++;
++		seg->addr   = seg->map(*seg->page);
++		seg->pg_len = min(seg->ds_len, (u32)PAGE_SIZE);
++		seg->pg_len = min(seg->pg_len, length);
++	} else
++		seg->addr += copy_len;
++
++	return 0;
++}
++
++static inline int ibscif_seg_copy(struct ibscif_seg *dst, struct ibscif_seg *src, u32 length, int head_copied)
++{
++	src->ds_len -= length;
++	src->pg_len -= length;
++
++	dst->ds_len -= length;
++	dst->pg_len -= length;
++
++	return ibscif_atomic_copy(dst->addr, src->addr, length, head_copied);
++}
++
++/*
++ * Copy data from the source to the destination data segment list.
++ * This is a bit complicated since we must map and copy each page
++ * individually and because each data segment can be split across
++ * multiple pages within the memory region as illustrated below:
++ *
++ *	+---page---+   +---page---+   +---page---+
++ *	|  .~~mr~~~|~~~|~~~~~~~~~~|~~~|~~~~~~.   |
++ *	|  |       |   |  [==ds===|===|====] |   |
++ *	|  '~~~~~~~|~~~|~~~~~~~~~~|~~~|~~~~~~'   |
++ *	+----------+   +----------+   +----------+
++ *
++ * For example, due to different buffer page offsets, copying data
++ * between the following buffers will result in five separate copy
++ * operations as shown by the numeric labels below:
++ *
++ *	       +----------+     +----------+
++ *	       |          |     |          |
++ *	       |1111111111|     |          |
++ *	       |2222222222|     |1111111111|
++ *	       +----------+     +----------+
++ *
++ *	       +----------+     +----------+
++ *	       |3333333333|     |2222222222|
++ *	       |3333333333|     |3333333333|
++ *	       |4444444444|     |3333333333|
++ *	       +----------+     +----------+
++ *
++ *	       +----------+     +----------+
++ *	       |5555555555|     |4444444444|
++ *	       |          |     |5555555555|
++ *	       |          |     |          |
++ *	       +----------+     +----------+
++ *
++ * The source and destination data segment list lengths are
++ * assumed to have been validated outside of this function.
++ */
++static int ibscif_dscopy(struct ibscif_ds *dst_ds, struct ibscif_ds *src_ds, u32 length)
++{
++	struct ibscif_seg src, dst;
++	int head_copied;
++	u32 copy_len;
++	int err = 0;
++
++	ibscif_seg_init(&src, src_ds, ibscif_map_src, ibscif_unmap_src, 0);
++	ibscif_seg_init(&dst, dst_ds, ibscif_map_dst, ibscif_unmap_dst, IB_ACCESS_LOCAL_WRITE);
++
++	head_copied = 0;
++	for (copy_len = 0; length; length -= copy_len) {
++
++		err = ibscif_seg_set(&src, length, copy_len);
++		if (unlikely(err))
++			break;
++		err = ibscif_seg_set(&dst, length, copy_len);
++		if (unlikely(err))
++			break;
++
++		copy_len = min(src.pg_len, dst.pg_len);
++		head_copied = ibscif_seg_copy(&dst, &src, copy_len, head_copied);
++	}
++
++	ibscif_seg_fini(&src);
++	ibscif_seg_fini(&dst);
++
++	return err;
++}
++
++/* Hold sq->lock during this call for synchronization. */
++static int ibscif_complete_sq_wr(struct ibscif_wq *sq, struct ibscif_wr *send_wr, enum ib_wc_status status)
++{
++	struct ibscif_qp *qp = sq->qp;
++	struct ibscif_wc *wc;
++	int err;
++
++	ibscif_clear_ds_refs(send_wr->ds_list, send_wr->num_ds);
++	sq->completions++;
++	sq->reap++;
++
++	if (send_wr->flags & IB_SEND_SIGNALED) {
++		struct ibscif_cq *cq = to_cq(qp->ibqp.send_cq);
++
++		err = ibscif_reserve_cqe(cq, &wc);
++		if (unlikely(err))
++			return err;
++
++		wc->ibwc.qp	  = &qp->ibqp;
++		wc->ibwc.src_qp	  = qp->remote_qpn;
++		wc->ibwc.wr_id	  = send_wr->id;
++		wc->ibwc.opcode	  = to_ib_wc_opcode(send_wr->opcode);
++		wc->ibwc.status	  = status;
++		wc->ibwc.ex.imm_data = 0;
++		wc->ibwc.port_num = 1;
++
++		if ((enum ib_wr_opcode)send_wr->opcode == IB_WR_RDMA_READ)
++			wc->ibwc.byte_len = send_wr->read.remote_length;
++		else if (((enum ib_wr_opcode)send_wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) ||
++			 ((enum ib_wr_opcode)send_wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD))
++			 wc->ibwc.byte_len = sizeof send_wr->atomic_rsp.orig_data;
++		else
++			wc->ibwc.byte_len = send_wr->length;
++
++		wc->wq	 = sq;
++		wc->reap = sq->reap;
++		sq->reap = 0;
++
++		ibscif_append_cqe(cq, wc, 0);
++	}
++
++	return 0;
++}
++
++/* Hold rq->lock during this call for synchronization. */
++static int ibscif_complete_rq_wr(struct ibscif_wq *rq, struct ibscif_wr *recv_wr,
++				struct ibscif_wr *send_wr, enum ib_wc_status status)
++{
++	struct ibscif_qp *qp = rq->qp;
++	struct ibscif_cq *cq = to_cq(qp->ibqp.recv_cq);
++	struct ibscif_wc *wc;
++	int err;
++
++	ibscif_clear_ds_refs(recv_wr->ds_list, recv_wr->num_ds);
++
++	err = ibscif_reserve_cqe(cq, &wc);
++	if (unlikely(err))
++		return err;
++
++	wc->ibwc.qp	  = &qp->ibqp;
++	wc->ibwc.src_qp	  = qp->remote_qpn;
++	wc->ibwc.wr_id	  = recv_wr->id;
++	wc->ibwc.status	  = status;
++	wc->ibwc.byte_len = send_wr->length;
++	wc->ibwc.port_num = 1;
++
++	if ((enum ib_wr_opcode)send_wr->opcode == IB_WR_SEND_WITH_IMM) {
++		DEV_STAT(qp->dev, recv_imm++);
++		wc->ibwc.opcode	  = IB_WC_RECV_RDMA_WITH_IMM;
++		wc->ibwc.ex.imm_data = cpu_to_be32(send_wr->send.immediate_data);
++	} else if ((enum ib_wr_opcode)send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
++		DEV_STAT(qp->dev, recv_imm++);
++		wc->ibwc.opcode	  = IB_WC_RECV_RDMA_WITH_IMM;
++		wc->ibwc.ex.imm_data = cpu_to_be32(send_wr->write.immediate_data);
++	} else {
++		DEV_STAT(qp->dev, recv++);
++		wc->ibwc.opcode	  = IB_WC_RECV;
++		wc->ibwc.ex.imm_data = 0;
++	}
++
++	wc->wq	 = rq;
++	wc->reap = 1;
++	rq->completions++;
++
++	ibscif_append_cqe(cq, wc, !!(send_wr->flags & IB_SEND_SOLICITED));
++
++	return 0;
++}
++
++/* Hold wq lock during this call for synchronization. */
++static int ibscif_validate_wq(struct ibscif_wq *wq, struct ibscif_wr **wr, enum ib_access_flags access)
++{
++	if (unlikely(wq->qp->state != QP_CONNECTED))
++		return -ENOTCONN;
++
++	if (unlikely(access && !(wq->qp->access & access)))
++		return -EACCES;
++
++	if (wr) {
++		int next;
++
++		if (unlikely(!wq->size))
++			return -ENOSPC;
++
++		next = (wq->head + wq->completions) % wq->size;
++
++		if (unlikely(next == wq->tail))
++			return -ENOBUFS;
++
++		*wr = ibscif_get_wr(wq, next);
++	}
++
++	return 0;
++}
++
++static int ibscif_loopback_send(struct ibscif_wq *sq, struct ibscif_wq *rq, struct ibscif_wr *send_wr)
++{
++	struct ibscif_wr *recv_wr;
++	int err;
++
++	spin_lock_bh(&rq->lock);
++
++	err = ibscif_validate_wq(rq, &recv_wr, 0);
++	if (unlikely(err))
++		goto out;
++
++	if (likely(send_wr->length)) {
++		if (unlikely(send_wr->length > recv_wr->length)) {
++			err = -EMSGSIZE;
++			goto out;
++		}
++
++		err = ibscif_dscopy(recv_wr->ds_list, send_wr->ds_list, send_wr->length);
++		if (unlikely(err))
++			goto out;
++	}
++
++	err = ibscif_complete_rq_wr(rq, recv_wr, send_wr, IB_WC_SUCCESS);
++out:
++	spin_unlock_bh(&rq->lock);
++
++	return err;
++}
++
++static int ibscif_loopback_write(struct ibscif_wq *sq, struct ibscif_wq *rq, struct ibscif_wr *write_wr)
++{
++	struct ibscif_wr *recv_wr = NULL;
++	struct ibscif_mr *dst_mr	 = ERR_PTR(-ENOENT);
++	int err;
++
++	spin_lock_bh(&rq->lock);
++
++	err = ibscif_validate_wq(rq, ((enum ib_wr_opcode)write_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) ?
++					&recv_wr : NULL, IB_ACCESS_REMOTE_WRITE);
++	if (unlikely(err))
++		goto out;
++
++	if (likely(write_wr->length)) {
++		struct ibscif_ds dst_ds;
++
++		dst_mr = ibscif_validate_mr(write_wr->write.rkey, write_wr->write.remote_address,
++					   write_wr->length, rq->qp->ibqp.pd, IB_ACCESS_REMOTE_WRITE);
++		if (unlikely(IS_ERR(dst_mr))) {
++			err = PTR_ERR(dst_mr);
++			goto out;
++		}
++
++		dst_ds.mr     = dst_mr;
++		dst_ds.offset = write_wr->write.remote_address - dst_mr->addr;
++		dst_ds.length = write_wr->length;
++		dst_ds.lkey   = dst_mr->ibmr.lkey;
++
++		err = ibscif_dscopy(&dst_ds, write_wr->ds_list, dst_ds.length);
++		if (unlikely(err))
++			goto out;
++	} else
++		err = 0;
++
++	if (recv_wr)
++		err = ibscif_complete_rq_wr(rq, recv_wr, write_wr, IB_WC_SUCCESS);
++out:
++	if (likely(!IS_ERR(dst_mr)))
++		ibscif_put_mr(dst_mr);
++
++	spin_unlock_bh(&rq->lock);
++
++	return err;
++}
++
++static int ibscif_loopback_read(struct ibscif_wq *sq, struct ibscif_wq *iq, struct ibscif_wr *read_wr)
++{
++	struct ibscif_mr *src_mr = ERR_PTR(-ENOENT);
++	int err;
++
++	spin_lock_bh(&iq->lock);
++
++	err = ibscif_validate_wq(iq, NULL, IB_ACCESS_REMOTE_READ);
++	if (unlikely(err))
++		goto out;
++
++	if (!iq->size) {
++		err = -ENOBUFS;
++		goto out;
++	}
++
++	if (likely(read_wr->read.remote_length)) {
++		struct ibscif_ds src_ds;
++
++		src_mr = ibscif_validate_mr(read_wr->read.rkey, read_wr->read.remote_address,
++					   read_wr->read.remote_length, iq->qp->ibqp.pd,
++					   IB_ACCESS_REMOTE_READ);
++		if (unlikely(IS_ERR(src_mr))) {
++			err = PTR_ERR(src_mr);
++			goto out;
++		}
++
++		src_ds.mr     = src_mr;
++		src_ds.offset = read_wr->read.remote_address - src_mr->addr;
++		src_ds.length = read_wr->read.remote_length;
++		src_ds.lkey   = src_mr->ibmr.lkey;
++
++		err = ibscif_dscopy(read_wr->ds_list, &src_ds, src_ds.length);
++	} else
++		err = 0;
++out:
++	if (likely(!IS_ERR(src_mr)))
++		ibscif_put_mr(src_mr);
++
++	spin_unlock_bh(&iq->lock);
++
++	atomic_dec(&sq->qp->or_posted);
++
++	return err;
++}
++
++static int ibscif_loopback_atomic(struct ibscif_wq *sq, struct ibscif_wq *iq, struct ibscif_wr *atomic_wr)
++{
++	struct ibscif_mr *src_mr = ERR_PTR(-ENOENT);
++	struct ibscif_ds  src_ds;
++	struct page *src_page;
++	u64 *src_addr, addr;
++	u32 src_offset, rkey;
++	int err;
++
++	if ((enum ib_wr_opcode)atomic_wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
++		addr = atomic_wr->cmp_swp.remote_address;
++		rkey = atomic_wr->cmp_swp.rkey;
++	} else {
++		addr = atomic_wr->fetch_add.remote_address;
++		rkey = atomic_wr->fetch_add.rkey;
++	}
++
++	spin_lock_bh(&iq->lock);
++
++	err = ibscif_validate_wq(iq, NULL, IB_ACCESS_REMOTE_ATOMIC);
++	if (unlikely(err))
++		goto out;
++
++	if (!iq->size) {
++		err = -ENOBUFS;
++		goto out;
++	}
++
++	src_mr = ibscif_validate_mr(rkey, addr, sizeof atomic_wr->atomic_rsp.orig_data,
++				   iq->qp->ibqp.pd, IB_ACCESS_REMOTE_ATOMIC);
++	if (unlikely(IS_ERR(src_mr))) {
++		err = PTR_ERR(src_mr);
++		goto out;
++	}
++
++	/* Build a source data segment to copy the original data. */
++	src_ds.mr     = src_mr;
++	src_ds.offset = addr - src_mr->addr;
++	src_ds.length = sizeof atomic_wr->atomic_rsp.orig_data;
++	src_ds.lkey   = src_mr->ibmr.lkey;
++
++	/* Determine which page to map. */
++	src_offset  = src_ds.offset + (src_mr->addr & ~PAGE_MASK);
++	src_page    = src_mr->page[src_offset >> PAGE_SHIFT];
++	src_offset &= ~PAGE_MASK;
++
++	/* Lock to perform the atomic operation atomically. */
++	spin_lock_bh(&iq->qp->dev->atomic_op);
++
++	/* Copy the original data; this handles any ds_list crossing. */
++	err = ibscif_dscopy(atomic_wr->ds_list, &src_ds, sizeof atomic_wr->atomic_rsp.orig_data);
++	if (likely(!err)) {
++		src_addr = ibscif_map_src(src_page) + src_offset;
++		if ((enum ib_wr_opcode)atomic_wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
++			 *src_addr += atomic_wr->fetch_add.add_operand;
++		else if (*src_addr == atomic_wr->cmp_swp.cmp_operand)
++			 *src_addr  = atomic_wr->cmp_swp.swp_operand;
++		ibscif_unmap_src(src_page, src_addr);
++	}
++
++	/* Atomic operation is complete. */
++	spin_unlock_bh(&iq->qp->dev->atomic_op);
++out:
++	if (likely(!IS_ERR(src_mr)))
++		ibscif_put_mr(src_mr);
++
++	spin_unlock_bh(&iq->lock);
++
++	atomic_dec(&sq->qp->or_posted);
++
++	return err;
++}
++
++void ibscif_loopback_disconnect(struct ibscif_qp *qp, enum ibscif_reason reason)
++{
++	struct ibscif_qp *remote_qp;
++
++	remote_qp = ibscif_get_qp(qp->remote_qpn);
++	if (unlikely(IS_ERR(remote_qp)))
++		return;
++
++	/* Don't bother if the SQ is connected to the RQ on the same QP. */
++	if (remote_qp != qp)
++		ibscif_qp_remote_disconnect(remote_qp, reason);
++
++	ibscif_put_qp(remote_qp);
++}
++
++/*
++ * Loopback QPs connected through the same MAC address.
++ * This includes an SQ connected to the RQ on the same QP.
++ */
++void ibscif_loopback(struct ibscif_wq *sq)
++{
++	struct ibscif_wq *rq, *iq;
++	struct ibscif_qp *remote_qp;
++	struct ibscif_wr *wr;
++	int status = 0, err = 0;
++
++	BUG_ON(!is_sq(sq));
++
++again:
++	remote_qp = ibscif_get_qp(sq->qp->remote_qpn);
++	if (unlikely(IS_ERR(remote_qp))) {
++		ibscif_qp_remote_disconnect(sq->qp, IBSCIF_REASON_INVALID_QP);
++		return;
++	}
++	rq = &remote_qp->rq;
++	iq = &remote_qp->iq;
++
++	DEV_STAT(sq->qp->dev, loopback++);
++
++	spin_lock_bh(&sq->lock);
++	for (wr = ibscif_get_wr(sq, sq->next_wr);
++	     (sq->next_wr != sq->tail) && !err;
++	     sq->next_wr = (sq->next_wr + 1) % sq->size) {
++
++		switch (wr->opcode) {
++
++		case WR_SEND:
++		case WR_SEND_WITH_IMM:
++			status = ibscif_loopback_send(sq, rq, wr);
++			break;
++		case WR_RDMA_WRITE:
++		case WR_RDMA_WRITE_WITH_IMM:
++			status = ibscif_loopback_write(sq, rq, wr);
++			break;
++		case WR_RDMA_READ:
++			status = ibscif_loopback_read(sq, iq, wr);
++			break;
++		case WR_ATOMIC_CMP_AND_SWP:
++		case WR_ATOMIC_FETCH_AND_ADD:
++			status = ibscif_loopback_atomic(sq, iq, wr);
++			break;
++		default:
++			status = -ENOSYS;
++			break;
++		}
++
++		if (likely(!status)) {
++			err = ibscif_complete_sq_wr(sq, wr, IB_WC_SUCCESS);
++
++			spin_unlock_bh(&sq->lock);
++			ibscif_notify_cq(to_cq(sq->qp->ibqp.send_cq));
++			ibscif_notify_cq(to_cq(remote_qp->ibqp.recv_cq));
++			spin_lock_bh(&sq->lock);
++		} else
++			break;
++	}
++	spin_unlock_bh(&sq->lock);
++
++	if (unlikely(status) && status != -ENOBUFS)
++		ibscif_qp_remote_disconnect(sq->qp, IBSCIF_REASON_QP_FATAL);
++	else if (unlikely(err))
++		ibscif_qp_internal_disconnect(sq->qp, IBSCIF_REASON_QP_FATAL);
++
++	ibscif_put_qp(remote_qp);
++
++	if (status == -ENOBUFS) {
++		schedule();
++		goto again;
++	}
++}
+diff -urN a7/drivers/infiniband/hw/scif/ibscif_main.c a8/drivers/infiniband/hw/scif/ibscif_main.c
+--- a7/drivers/infiniband/hw/scif/ibscif_main.c	1969-12-31 16:00:00.000000000 -0800
++++ a8/drivers/infiniband/hw/scif/ibscif_main.c	2015-02-23 10:14:37.484809663 -0800
+@@ -0,0 +1,357 @@
++/*
++ * Copyright (c) 2008 Intel Corporation.  All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above copyright
++ *        notice, this list of conditions and the following disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *        copyright notice, this list of conditions and the following
++ *        disclaimer in the documentation and/or other materials
++ *        provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include "ibscif_driver.h"
++
++static const char ibscif_signon[] = DRV_SIGNON DRV_BUILD;
++
++MODULE_AUTHOR("Intel Corporation");
++MODULE_LICENSE("Dual BSD/GPL");
++MODULE_DESCRIPTION(DRV_DESC);
++MODULE_VERSION(DRV_VERSION);
++
++#define MODULE_PARAM(type, name, value, desc)			\
++	type name = value;					\
++	module_param(name, type, 0664);				\
++	MODULE_PARM_DESC(name, desc)
++
++#define MODULE_ARRAY(name, size, value, desc)			\
++	unsigned int name##_argc;				\
++	char *name[size] = { [0 ... size-1] = value };		\
++	module_param_array(name, charp, &name##_argc, 0644);	\
++	MODULE_PARM_DESC(name, desc)
++
++#define DEFAULT_MAX_PINNED	50
++MODULE_PARAM(int, max_pinned, DEFAULT_MAX_PINNED,
++	     "Maximum percent of physical memory that may be pinned");
++
++#define DEFAULT_WINDOW_SIZE	40
++MODULE_PARAM(int, window_size, DEFAULT_WINDOW_SIZE,
++	     "Maximum number of outstanding unacknowledged packets");
++
++#define DEFAULT_RMA_THRESHOLD	1024
++MODULE_PARAM(int, rma_threshold, DEFAULT_RMA_THRESHOLD,
++	     "Maximum message size sent through scif_send()");
++
++MODULE_PARAM(int, fast_rdma, 1,
++	     "Use scif_writeto()/scif_readfrom() directly for RDMA write/read");
++
++MODULE_PARAM(int, blocking_send, 0,
++	     "Use blocking version of scif_send()");
++
++MODULE_PARAM(int, blocking_recv, 1,
++	     "Use blocking version of scif_recv()");
++
++MODULE_PARAM(int, scif_loopback, 1,
++	     "Use SCIF lookback instead of kernel copy based loopback");
++
++MODULE_PARAM(int, host_proxy, 0,
++	     "Proxy card side RDMA operations to host");
++
++#if ((LINUX_VERSION_CODE>=KERNEL_VERSION(3,5,0)) || CONFIG_MK1OM || CONFIG_ML1OM)
++#define USE_NEW_IB_TYPE	1
++#else
++#define USE_NEW_IB_TYPE	0
++#endif
++MODULE_PARAM(int, new_ib_type, USE_NEW_IB_TYPE,
++	     "Use new transport type dedicated to IBSCIF");
++
++MODULE_PARAM(int, verbose, 0,
++	     "Produce more log info for debugging purpose");
++
++MODULE_PARAM(int, check_grh, 1,
++	     "Detect outside-box connection by checking the global routing header");
++
++static atomic_t avail_pages; /* Calculated from max_pinned and totalram_pages */
++
++LIST_HEAD(devlist);
++DECLARE_MUTEX(devlist_mutex);
++
++DEFINE_IDR(wiremap);
++DEFINE_RWLOCK(wiremap_lock);
++static u32 reserved_0 = 0;
++
++void ibscif_dump(char *str, unsigned char* buf, int len)
++{
++	unsigned char *p, tmp[(16*3)+1];
++	int i;
++	return;
++	len = len > 64 ? 64 : len;
++	while (len) {
++		p = tmp;
++		for (i = len > 16 ? 16 : len; i; i--, len--) 
++			p += sprintf(p, "%2x ", *buf++);
++		printk("(%d)%s: %s\n", smp_processor_id(), str, tmp);
++	}
++}
++
++int ibscif_reserve_quota(int *npages)
++{
++	int c, old, err;
++
++	if (!*npages)
++		return 0;
++
++	err = 0;
++	c = atomic_read(&avail_pages);
++	for (;;) {
++		if (unlikely(c < *npages))
++			break;
++		old = atomic_cmpxchg(&avail_pages, c, c - *npages);
++		if (likely(old == c))
++			break;
++		c = old;
++	}
++
++	if (c < *npages) {
++		*npages = 0;
++		err = -EDQUOT;
++	}
++
++	return err;
++}
++
++void ibscif_release_quota(int npages)
++{
++	if (npages)
++		atomic_add(npages, &avail_pages);
++}
++
++/*
++ * To work around MPI's assumptions that data is written atomically in their
++ * header structures, write the first 16 integers of a transfer atomically.
++ *
++ * Update: the assumption of MPI's ofa module is different in that the last 
++ * four bytes needs to be written last and atomically. The buffers used in
++ * this case is always aligned.
++ */
++int ibscif_atomic_copy(void *dst_addr, void *src_addr, u32 copy_len, int head_copied)
++{
++	volatile int *src_x = (int *)src_addr;
++	volatile int *dst_x = (int *)dst_addr;
++	volatile u8  *src_c, *dst_c;
++	int head_aligned, tail_aligned;
++
++	if (unlikely(!copy_len))
++		return head_copied;
++
++	head_aligned =	!((unsigned long)src_addr & (sizeof(int)-1)) &&
++		  	!((unsigned long)dst_addr & (sizeof(int)-1)); 
++
++
++	tail_aligned =	!((unsigned long)(src_addr+copy_len) & (sizeof(int)-1)) &&
++		  	!((unsigned long)(dst_addr+copy_len) & (sizeof(int)-1)); 
++
++	if (!head_copied && head_aligned) {
++
++		switch (copy_len) {
++		case sizeof(int):
++			*dst_x = *src_x;
++			goto done;
++		case sizeof(int)*2:
++			*dst_x++ = *src_x++;
++			*dst_x	 = *src_x;
++			goto done;
++		case sizeof(int)*3:
++			*dst_x++ = *src_x++;
++			*dst_x++ = *src_x++;
++			*dst_x	 = *src_x;
++			goto done;
++		default:
++			if (copy_len >= (sizeof(int)*4)) {
++				/* We have at least a whole header to copy. */
++				head_copied = 1;
++				copy_len -= sizeof(int)*4;
++
++				*dst_x++ = *src_x++;
++				*dst_x++ = *src_x++;
++				*dst_x++ = *src_x++;
++
++				if (copy_len == 0) {
++					*dst_x = *src_x;
++					goto done;
++				}
++				*dst_x++ = *src_x++;
++			}
++			break;
++		}
++	}
++
++        /* The last integer is aligned. Copy all but the last int, then the last int */
++        if (tail_aligned && copy_len >= sizeof(int)) {
++                copy_len -= sizeof(int);
++                if (copy_len)
++                        memcpy((void *)dst_x, (void *)src_x, copy_len);
++                smp_wmb();
++                src_x = (volatile int *)((char *)src_x + copy_len);
++                dst_x = (volatile int *)((char *)dst_x + copy_len);
++                *dst_x = *src_x;
++                goto done;
++        }
++ 
++	/* Bad alignment. Copy all but the last byte, then the last byte */
++	if (--copy_len)
++		memcpy((void *)dst_x, (void *)src_x, copy_len);
++
++	src_c = ((volatile u8 *)src_x) + copy_len;
++	dst_c = ((volatile u8 *)dst_x) + copy_len;
++	smp_wmb();
++	*dst_c = *src_c;
++done:
++	return head_copied;
++}
++
++/*
++ * Because idr_pre_get acquires the same internal spinlock used by idr_pre_get/idr_remove
++ * calls under a write_lock_bh, we need to call idr_pre_get with bottom half disabled.
++ * We cannot simply take the write_lock_bh(&wiremap_lock) because idr_pre_get does a
++ * blocking memory allocation call.  Since bh is disabled, mask must be GFP_ATOMIC.
++ */
++static inline int ibscif_wiremap_pre_get(void)
++{
++	int ret;
++
++	local_bh_disable();
++	ret = idr_pre_get(&wiremap, GFP_ATOMIC);
++	local_bh_enable();
++
++	return ret;
++}
++
++int ibscif_wiremap_add(void *obj, int *id)
++{
++	int ret;
++
++	do {
++		if (!ibscif_wiremap_pre_get())
++			return -ENOMEM;
++
++		write_lock_bh(&wiremap_lock);
++		ret = idr_get_new(&wiremap, obj, id);
++		write_unlock_bh(&wiremap_lock);
++	} while (ret == -EAGAIN);
++
++	return ret;
++}
++
++void ibscif_wiremap_del(int id)
++{
++	write_lock_bh(&wiremap_lock);
++	idr_remove(&wiremap, id);
++	write_unlock_bh(&wiremap_lock);
++}
++
++static int ibscif_init_wiremap(void)
++{
++	/*
++	 * Instead of treating them as opaque, some applications assert that returned key
++	 * values are non-zero.  As a work-around, reserve the first key from the wiremap.
++	 */
++	int ret = ibscif_wiremap_add(&reserved_0, &reserved_0);
++	BUG_ON(reserved_0 != 0);
++	return ret;
++}
++
++static void ibscif_free_wiremap(void)
++{
++	write_lock_bh(&wiremap_lock);
++	idr_remove_all(&wiremap);
++	idr_destroy(&wiremap);
++	write_unlock_bh(&wiremap_lock);
++}
++
++static void ibscif_init_params(void)
++{
++	if ((max_pinned <= 0) || (max_pinned > 100)) {
++		max_pinned = DEFAULT_MAX_PINNED;
++		printk(KERN_WARNING PFX "Corrected max_pinned module parameter to %d.\n",
++		       max_pinned);
++	}
++	if (window_size < MIN_WINDOW_SIZE) {
++		window_size = MIN_WINDOW_SIZE;
++		printk(KERN_WARNING PFX "Corrected window_size module parameter to %d.\n",
++		       window_size);
++	}
++	if (rma_threshold < 0) {
++		rma_threshold = 0x7FFFFFFF;
++		printk(KERN_WARNING PFX "Corrected rma_threshold module parameter to %d.\n",
++		       rma_threshold);
++	}
++
++	/*
++	 * Hardware RDMA devices have built-in limits on the number of registered pages.
++	 * The avail_pages variable provides a limit for this software device.
++	 */
++	atomic_set(&avail_pages, max_pinned * (totalram_pages / 100));
++}
++
++static int __init ibscif_init(void)
++{
++	int err;
++
++	printk(KERN_INFO PFX "%s\n", ibscif_signon);
++	printk(KERN_INFO PFX "max_pinned=%d, window_size=%d, "
++			"blocking_send=%d, blocking_recv=%d, "
++			"fast_rdma=%d, "
++			"host_proxy=%d, "
++			"rma_threshold=%d, scif_loopback=%d, "
++			"new_ib_type=%d, verbose=%d, "
++			"check_grh=%d\n",
++			max_pinned, window_size,
++			blocking_send, blocking_recv,
++			fast_rdma,
++			host_proxy,
++			rma_threshold, scif_loopback,
++			new_ib_type, verbose,
++			check_grh);
++
++	ibscif_init_params();
++
++	err = ibscif_init_wiremap();
++	if (err)
++		return err;
++
++	err = ibscif_dev_init();
++	if (!err)
++		return 0;
++
++	ibscif_free_wiremap();
++	return err;
++}
++
++static void __exit ibscif_exit(void)
++{
++	ibscif_dev_cleanup();
++	ibscif_free_wiremap();
++	printk(KERN_INFO PFX "unloaded\n");
++}
++
++module_init(ibscif_init);
++module_exit(ibscif_exit);
+diff -urN a7/drivers/infiniband/hw/scif/ibscif_mr.c a8/drivers/infiniband/hw/scif/ibscif_mr.c
+--- a7/drivers/infiniband/hw/scif/ibscif_mr.c	1969-12-31 16:00:00.000000000 -0800
++++ a8/drivers/infiniband/hw/scif/ibscif_mr.c	2015-02-23 10:14:37.484809663 -0800
+@@ -0,0 +1,559 @@
++/*
++ * Copyright (c) 2008 Intel Corporation.  All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above copyright
++ *        notice, this list of conditions and the following disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *        copyright notice, this list of conditions and the following
++ *        disclaimer in the documentation and/or other materials
++ *        provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include "ibscif_driver.h"
++
++static int ibscif_mr_init_mreg(struct ibscif_mr *mr);
++
++struct ib_mr *ibscif_get_dma_mr(struct ib_pd *ibpd, int access)
++{
++	struct ibscif_dev *dev = to_dev(ibpd->device);
++	struct ibscif_mr *mr;
++	int err;
++
++	if (!atomic_add_unless(&dev->mr_cnt, 1, MAX_MRS))
++		return ERR_PTR(-EAGAIN);
++
++	mr = kzalloc(sizeof *mr, GFP_KERNEL);
++	if (!mr) {
++		err = -ENOMEM;
++		printk(KERN_ALERT PFX "%s: unable to allocate mr.\n", __func__); 
++		goto out1;
++	}
++
++	kref_init(&mr->ref);
++	init_completion(&mr->done);
++
++	err = ibscif_wiremap_add(mr, &mr->ibmr.lkey);
++	if (err) {
++		printk(KERN_ALERT PFX "%s: unable to allocate lkey.\n", __func__); 
++		goto out2;
++	}
++
++	if (mr->ibmr.lkey > IBSCIF_MR_MAX_KEY) {
++		err = -ENOSPC;
++		printk(KERN_ALERT PFX "%s: lkey (%x) out of range.\n", __func__, mr->ibmr.lkey); 
++		goto out3;
++	}
++
++	mr->ibmr.device = ibpd->device;		/* For ibscif_dereg_mr() calls below. */
++	mr->ibmr.rkey	= mr->ibmr.lkey;
++	mr->access	= access;
++	mr->magic	= MR_MAGIC;
++	INIT_LIST_HEAD(&mr->mreg_list);
++
++	return &mr->ibmr;
++
++out3:
++	ibscif_wiremap_del(mr->ibmr.lkey);
++out2:
++	kfree(mr);
++out1:
++	atomic_dec(&dev->mr_cnt);
++	return ERR_PTR(err);
++}
++
++struct ib_mr *ibscif_reg_phys_mr(struct ib_pd *ibpd, struct ib_phys_buf *phys_buf_array,
++				int num_phys_buf, int access, u64 *iova_start)
++{
++	struct ibscif_mr *mr;
++	struct ib_mr *ibmr;
++	int i, j, k, err;
++	u64 mask;
++
++	ibmr = ibscif_get_dma_mr(ibpd, access);
++	if (IS_ERR(ibmr))
++		return ibmr;
++
++	mr = to_mr(ibmr);
++	mr->addr = *iova_start;
++
++	mask = 0;
++	for (i = 0; i < num_phys_buf; i++) {
++		if (i != 0)
++			mask |= phys_buf_array[i].addr;				 /* All but 1st are aligned    */
++		if (i != num_phys_buf - 1)
++			mask |= phys_buf_array[i].addr + phys_buf_array[i].size; /* Middle bufs are full pages */
++
++		mr->length += phys_buf_array[i].size;
++	}
++	if ((mask & ~PAGE_MASK) || (mr->length > MAX_MR_SIZE)) {
++		err = -EINVAL;
++		goto out;
++	}
++	if (mr->length && ((mr->addr + mr->length - 1) < mr->addr)) {
++		err = -EOVERFLOW;
++		goto out;
++	}
++
++	phys_buf_array[0].size += phys_buf_array[0].addr & ~PAGE_MASK;	/* Adjust 1st buf size by page offset */
++	phys_buf_array[0].addr &= PAGE_MASK;				/* Truncate 1st buf to start of page  */
++
++	for (i = 0; i < num_phys_buf; i++)
++		mr->npages += PAGE_ALIGN(phys_buf_array[i].size) >> PAGE_SHIFT;
++
++	if (!mr->npages)
++		return &mr->ibmr;
++
++	err = ibscif_reserve_quota(&mr->npages);
++	if (err)
++		goto out;
++
++	mr->page = vzalloc(mr->npages * sizeof *mr->page);
++	if (!mr->page) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	k = 0;
++	for (i = 0; i < num_phys_buf; i++)
++		for (j = 0; j < PAGE_ALIGN(phys_buf_array[i].size) >> PAGE_SHIFT; j++)
++			mr->page[k++] = pfn_to_page((phys_buf_array[i].addr >> PAGE_SHIFT) + j);
++
++	return &mr->ibmr;
++out:
++	ibscif_dereg_mr(ibmr);
++	return ERR_PTR(err);
++}
++
++#ifdef	MOFED
++struct ib_mr *ibscif_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
++				u64 virt_addr, int access, struct ib_udata *udata, int mr_id)
++#else
++struct ib_mr *ibscif_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
++				u64 virt_addr, int access, struct ib_udata *udata)
++#endif
++{
++	struct ib_mr *ibmr;
++	struct ibscif_mr *mr;
++	struct scatterlist *sg;
++	struct ibscif_dev *dev;
++	int i, k, err;
++
++	if (length && ((start + length - 1) < start))
++		return ERR_PTR(-EOVERFLOW);
++
++	ibmr = ibscif_get_dma_mr(ibpd, access);
++	if (IS_ERR(ibmr))
++		return ibmr;
++
++	mr = to_mr(ibmr);
++	mr->addr = start;
++
++	mr->umem = ib_umem_get(ibpd->uobject->context, start, length, access, 0/*dma_sync*/);
++	if (IS_ERR(mr->umem)) {
++		err = PTR_ERR(mr->umem);
++		printk(KERN_ALERT PFX "%s: ib_umem_get returns %d.\n", __func__, err); 
++		goto out;
++	}
++
++	mr->npages = ib_umem_page_count(mr->umem);
++	if (!mr->npages)
++		return &mr->ibmr;
++
++	mr->length = mr->umem->length;
++
++	err = ibscif_reserve_quota(&mr->npages);
++	if (err)
++		goto out;
++
++	mr->page = vzalloc(mr->npages * sizeof *mr->page);
++	if (!mr->page) {
++		err = -ENOMEM;
++		printk(KERN_ALERT PFX "%s: unable to allocate mr->page.\n", __func__); 
++		goto out;
++	}
++
++	k = 0;
++	for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, i)
++		mr->page[k++] = sg_page(sg);
++
++	err = ibscif_mr_init_mreg(mr);
++	if (err) 
++		goto out;
++
++ 	dev = to_dev(mr->ibmr.device);
++	down(&dev->mr_list_mutex);
++	list_add_tail(&mr->entry, &dev->mr_list);
++	up(&dev->mr_list_mutex);
++
++	return &mr->ibmr;
++out:
++	ibscif_dereg_mr(ibmr);
++	return ERR_PTR(err);
++}
++
++void ibscif_complete_mr(struct kref *ref)
++{
++	struct ibscif_mr *mr = container_of(ref, struct ibscif_mr, ref);
++	complete(&mr->done);
++}
++
++int ibscif_dereg_mr(struct ib_mr *ibmr)
++{
++	struct ibscif_dev *dev = to_dev(ibmr->device);
++	struct ibscif_mr *mr = to_mr(ibmr);
++	struct ibscif_mreg_info *mreg, *next;
++	struct ibscif_mr *mr0, *next0;
++	int ret;
++
++	ibscif_put_mr(mr);
++	wait_for_completion(&mr->done);
++
++	list_for_each_entry_safe(mreg, next, &mr->mreg_list, entry) {
++		do {
++			ret = scif_unregister(mreg->conn->ep, mreg->aligned_offset, mreg->aligned_length);
++		}
++		while (ret == -ERESTARTSYS);
++
++		if (ret && ret != -ENOTCONN) 
++			printk(KERN_ALERT PFX "%s: scif_unregister returns %d. ep=%p, offset=%llx, length=%x\n", 
++				__func__, ret, mreg->conn->ep, mreg->aligned_offset, mreg->aligned_length);
++
++		ibscif_put_conn(mreg->conn);
++		list_del(&mreg->entry);
++		kfree(mreg);
++	}
++
++	down(&dev->mr_list_mutex);
++	list_for_each_entry_safe(mr0, next0, &dev->mr_list, entry) {
++		if (mr0 == mr) {
++			list_del(&mr0->entry);
++			break;
++		}
++	}
++	up(&dev->mr_list_mutex);
++
++	if (mr->pinned_pages)
++		scif_unpin_pages(mr->pinned_pages);
++
++	if (mr->umem && !IS_ERR(mr->umem))
++		ib_umem_release(mr->umem);
++	if (mr->page)
++		vfree(mr->page);
++
++	ibscif_release_quota(mr->npages);
++	atomic_dec(&dev->mr_cnt);
++
++	ibscif_wiremap_del(mr->ibmr.lkey);
++
++	kfree(mr);
++	return 0;
++}
++
++/*
++ * Lookup and validate the given memory region access.  A reference is held on success.
++ */
++struct ibscif_mr *ibscif_validate_mr(u32 key, u64 addr, int length,
++				   struct ib_pd *ibpd, enum ib_access_flags access)
++{
++	struct ibscif_mr *mr;
++	int err;
++
++	mr = ibscif_get_mr(key);
++	if (unlikely(IS_ERR(mr)))
++		return mr;
++
++	if (unlikely(mr->ibmr.pd != ibpd)) {
++		err = -EPERM;
++		goto out;
++	}
++	if (unlikely(access && !(mr->access & access))) {
++		err = -EACCES;
++		goto out;
++	}
++	if (unlikely((addr < mr->addr) || ((addr + length) > (mr->addr + mr->length)))) {
++		err = -ERANGE;
++		goto out;
++	}
++
++	return mr;
++out:
++	ibscif_put_mr(mr);
++	return ERR_PTR(err);
++}
++
++static void ibscif_dma_nop(struct ib_device *ibdev, u64 addr, size_t size, enum dma_data_direction direction)
++{
++}
++
++static int ibscif_mapping_error(struct ib_device *ibdev, u64 dma_addr)
++{
++	return !dma_addr;
++}
++
++static u64 ibscif_dma_map_single(struct ib_device *ibdev, void *cpu_addr, size_t size,
++			        enum dma_data_direction direction)
++{
++	return (u64)cpu_addr;
++}
++
++static u64 ibscif_dma_map_page(struct ib_device *ibdev, struct page *page, unsigned long offset, size_t size,
++			      enum dma_data_direction direction)
++{
++	u64 addr;
++
++	if (offset + size > PAGE_SIZE)
++		return 0;
++
++	addr = (u64)page_address(page);
++	if (addr)
++		addr += offset;
++
++	return addr;
++}
++
++static int ibscif_map_sg(struct ib_device *ibdev, struct scatterlist *sg, int nents,
++			enum dma_data_direction direction)
++{
++	u64 addr;
++	int i, ret = nents;
++
++	for (i = 0; i < nents; i++, sg++) {
++		addr = (u64)page_address(sg_page(sg));
++		if (!addr) {
++			ret = 0;
++			break;
++		}
++
++		sg->dma_address = sg->offset + addr;
++		sg->dma_length  = sg->length;
++	}
++	return ret;
++}
++
++static void ibscif_unmap_sg(struct ib_device *ibdev, struct scatterlist *sg, int nents,
++			   enum dma_data_direction direction)
++{
++}
++
++static u64 ibscif_sg_dma_address(struct ib_device *ibdev, struct scatterlist *sg)
++{
++	return (u64)sg->dma_address;
++}
++
++static unsigned int ibscif_sg_dma_len(struct ib_device *ibdev, struct scatterlist *sg)
++{
++	return sg->dma_length;
++}
++
++static void *ibscif_dma_alloc_coherent(struct ib_device *ibdev, size_t size, u64 *dma_handle, gfp_t flag)
++{
++	struct page *p = alloc_pages(flag, get_order(size));
++	void *addr = p ? page_address(p) : NULL;
++
++	if (dma_handle)
++		*dma_handle = (u64)addr;
++
++	return addr;
++}
++
++static void ibscif_dma_free_coherent(struct ib_device *ibdev, size_t size, void *cpu_addr, u64 dma_handle)
++{
++	free_pages((unsigned long)cpu_addr, get_order(size));
++}
++
++struct ib_dma_mapping_ops ibscif_dma_mapping_ops = {
++	ibscif_mapping_error,
++	ibscif_dma_map_single,
++	ibscif_dma_nop,
++	ibscif_dma_map_page,
++	ibscif_dma_nop,
++	ibscif_map_sg,
++	ibscif_unmap_sg,
++	ibscif_sg_dma_address,
++	ibscif_sg_dma_len,
++	ibscif_dma_nop,
++	ibscif_dma_nop,
++	ibscif_dma_alloc_coherent,
++	ibscif_dma_free_coherent
++};
++
++static void ibscif_dump_mr_list( struct ibscif_dev *dev )
++{
++	struct ibscif_mr *mr;
++
++	list_for_each_entry(mr, &dev->mr_list, entry){
++		printk(KERN_ALERT PFX "%s: mr=%p [%llx, %x, %x]\n", __func__, mr, mr->addr, mr->length, mr->ibmr.rkey);
++	}
++}
++
++static int ibscif_mr_reg_with_conn(struct ibscif_mr *mr, struct ibscif_conn *conn, struct ibscif_mreg_info **new_mreg)
++{
++	struct ibscif_mreg_info *mreg;
++	off_t offset, aligned_offset;
++	u64 aligned_addr;
++	int aligned_length;
++	int offset_in_page;
++	int err;
++
++	aligned_addr = mr->addr & PAGE_MASK;
++	offset_in_page = (int)(mr->addr & ~PAGE_MASK);
++	aligned_length = (mr->length + offset_in_page + PAGE_SIZE - 1) & PAGE_MASK;
++	aligned_offset = IBSCIF_MR_VADDR_TO_OFFSET(mr->ibmr.rkey, aligned_addr);
++
++	offset = scif_register_pinned_pages(conn->ep, mr->pinned_pages, aligned_offset, SCIF_MAP_FIXED);
++
++	if (IS_ERR_VALUE(offset)) {
++		printk(KERN_ALERT PFX "%s: scif_register_pinned_pages returns %d\n", __func__, (int)offset);
++		printk(KERN_ALERT PFX "%s: conn=%p, ep=%p, mr=%p, addr=%llx, length=%x, rkey=%x, "
++			"aligned_addr=%llx, aligned_length=%x, aligned_offset=%llx\n", 
++			__func__, conn, conn->ep, mr, mr->addr, mr->length, mr->ibmr.rkey,
++			aligned_addr, aligned_length, (uint64_t)aligned_offset);
++		ibscif_dump_mr_list(conn->dev);
++		return (int)offset;
++	}
++
++	BUG_ON(offset != aligned_offset);
++
++	offset += offset_in_page;
++
++	mreg = kzalloc(sizeof(struct ibscif_mreg_info), GFP_KERNEL);
++	if (!mreg) {
++		do {
++			err = scif_unregister(conn->ep, aligned_offset, aligned_length);
++		}
++		while (err == -ERESTARTSYS);
++
++		if (err && err != -ENOTCONN) 
++			printk(KERN_ALERT PFX "%s: scif_unregister returns %d. ep=%p, offset=%llx, length=%x\n",
++				__func__, err, conn->ep, (uint64_t)aligned_offset, aligned_length);
++
++		return -ENOMEM;
++	}
++	mreg->conn = conn;
++	mreg->offset = (u64)offset;
++	mreg->aligned_offset = aligned_offset;
++	mreg->aligned_length = aligned_length;
++	list_add_tail(&mreg->entry, &mr->mreg_list);
++
++	atomic_inc(&conn->refcnt);
++	if (conn->local_close) {
++		conn->local_close = 0;
++		ibscif_send_reopen(conn);
++	}
++
++	if (new_mreg) 
++		*new_mreg = mreg;
++
++	return 0;
++}
++
++struct ibscif_mreg_info *ibscif_mr_get_mreg(struct ibscif_mr *mr, struct ibscif_conn *conn)
++{
++	struct ibscif_mreg_info *mreg;
++	int err;
++	int i;
++
++	if (unlikely(!conn)) {
++		printk(KERN_ALERT PFX "%s: conn==NULL\n", __func__);
++		return NULL;
++	}
++
++	list_for_each_entry(mreg, &mr->mreg_list, entry){
++		if (mreg->conn == conn)
++			return mreg;
++	}
++
++	mreg = NULL;
++	err = ibscif_mr_reg_with_conn(mr, conn, &mreg);
++	if (err != -EADDRINUSE)
++		return mreg;
++		
++	/* another thread is performing the registration */
++	if (verbose)
++		printk(KERN_INFO PFX "%s: mr is being registered by another thread. mr=%p, conn=%p.\n", __func__, mr, conn);
++	for (i=0; i<10000; i++) {
++		list_for_each_entry(mreg, &mr->mreg_list, entry){
++			if (mreg->conn == conn) {
++				if (verbose)
++					printk(KERN_INFO PFX "%s: got mreg after %d retries.\n", __func__, i+1);
++				return mreg;
++			}
++		}
++		schedule();
++	}
++	if (verbose)
++		printk(KERN_INFO PFX "%s: failed to get mreg after %d retries.\n", __func__, i);
++	return NULL;
++}
++
++static int ibscif_mr_init_mreg(struct ibscif_mr *mr)
++{
++	struct ibscif_dev *dev = to_dev(mr->ibmr.device);
++	struct ibscif_conn *conn;
++	int prot;
++	u64 aligned_addr;
++	int aligned_length;
++	int offset_in_page;
++	int err;
++
++	aligned_addr = mr->addr & PAGE_MASK;
++	offset_in_page = (int)(mr->addr & ~PAGE_MASK);
++	aligned_length = (mr->length + offset_in_page + PAGE_SIZE - 1) & PAGE_MASK;
++
++#if 0
++	prot =  ((mr->access & IB_ACCESS_REMOTE_READ)?SCIF_PROT_READ:0) |
++		((mr->access & IB_ACCESS_REMOTE_WRITE)?SCIF_PROT_WRITE:0);
++#else
++  	// In IB, the same buffer can be registered multiple times with different access rights. 
++  	// SCIF doesn't have mechanism to support that. So we just turn on all the access rights.
++  	// Otherwise we may end up with protection error.
++	prot = SCIF_PROT_READ | SCIF_PROT_WRITE;
++#endif
++
++	err = scif_pin_pages((void *)aligned_addr, aligned_length, prot, 0/*user addr*/, &mr->pinned_pages);
++	if (err) {
++		printk(KERN_ALERT PFX "%s: scif_pin_pages returns %d\n", __func__, err);
++		return err;
++	}
++
++	down(&dev->mutex);
++	list_for_each_entry(conn, &dev->conn_list, entry) {
++		err = ibscif_mr_reg_with_conn(mr, conn, NULL);
++		if (err)
++			break;
++	}
++	up(&dev->mutex);
++
++	return err;
++}
++
++void ibscif_refresh_mreg( struct ibscif_conn *conn )
++{
++	struct ibscif_mr *mr;
++
++	down(&conn->dev->mr_list_mutex);
++	list_for_each_entry(mr, &conn->dev->mr_list, entry){
++		ibscif_mr_get_mreg(mr, conn);
++	}
++	up(&conn->dev->mr_list_mutex);
++}
++
+diff -urN a7/drivers/infiniband/hw/scif/ibscif_pd.c a8/drivers/infiniband/hw/scif/ibscif_pd.c
+--- a7/drivers/infiniband/hw/scif/ibscif_pd.c	1969-12-31 16:00:00.000000000 -0800
++++ a8/drivers/infiniband/hw/scif/ibscif_pd.c	2015-02-23 10:14:37.484809663 -0800
+@@ -0,0 +1,56 @@
++/*
++ * Copyright (c) 2008 Intel Corporation.  All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above copyright
++ *        notice, this list of conditions and the following disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *        copyright notice, this list of conditions and the following
++ *        disclaimer in the documentation and/or other materials
++ *        provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include "ibscif_driver.h"
++
++struct ib_pd *ibscif_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata)
++{
++	struct ibscif_dev *dev = to_dev(ibdev);
++	struct ibscif_pd *pd;
++
++	if (!atomic_add_unless(&dev->pd_cnt, 1, MAX_PDS))
++		return ERR_PTR(-EAGAIN);
++
++	pd = kzalloc(sizeof *pd, GFP_KERNEL);
++	if (!pd) {
++		atomic_dec(&dev->pd_cnt);
++		return ERR_PTR(-ENOMEM);
++	}
++
++	return &pd->ibpd;
++}
++
++int ibscif_dealloc_pd(struct ib_pd *ibpd)
++{
++	struct ibscif_dev *dev = to_dev(ibpd->device);
++	atomic_dec(&dev->pd_cnt);
++	kfree(to_pd(ibpd));
++	return 0;
++}
+diff -urN a7/drivers/infiniband/hw/scif/ibscif_post.c a8/drivers/infiniband/hw/scif/ibscif_post.c
+--- a7/drivers/infiniband/hw/scif/ibscif_post.c	1969-12-31 16:00:00.000000000 -0800
++++ a8/drivers/infiniband/hw/scif/ibscif_post.c	2015-02-23 10:14:37.485809663 -0800
+@@ -0,0 +1,306 @@
++/*
++ * Copyright (c) 2008 Intel Corporation.  All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above copyright
++ *        notice, this list of conditions and the following disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *        copyright notice, this list of conditions and the following
++ *        disclaimer in the documentation and/or other materials
++ *        provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include "ibscif_driver.h"
++
++void ibscif_dump_sg(char *str, struct ib_sge *sge, int num)
++{
++	extern void ibscif_dump(char*, void*, int);
++	if (!sge)
++		return;
++	while (num--) {
++		ibscif_dump(str, (void*)sge->addr, sge->length);
++		sge++;
++	}
++}
++
++/*
++ * Build and validate the wr->ds_list from the given sg_list.
++ * If successful, a reference is held on each mr in the wr->ds_list.
++ */
++static int ibscif_wr_ds(struct ib_pd *ibpd, struct ib_sge *sg_list, int num_sge,
++		       struct ibscif_wr *wr, int *total_length, enum ib_access_flags access)
++{
++	struct ibscif_ds *ds_list = wr->ds_list;
++	int err;
++
++	*total_length = 0;
++	for (wr->num_ds = 0; wr->num_ds < num_sge; sg_list++, ds_list++) {
++
++		ds_list->mr = ibscif_validate_mr(sg_list->lkey, sg_list->addr, sg_list->length, ibpd, access);
++		if (unlikely(IS_ERR(ds_list->mr))) {
++			err = PTR_ERR(ds_list->mr);
++			goto out;
++		}
++
++		ds_list->in_use = 1;
++		wr->num_ds++;
++
++		if (unlikely((*total_length + sg_list->length) < *total_length)) {
++			err = -EOVERFLOW;
++			goto out;
++		}
++
++		ds_list->offset = sg_list->addr - ds_list->mr->addr;
++		ds_list->length = sg_list->length;
++		ds_list->lkey   = sg_list->lkey;
++		ds_list->current_mreg = NULL;
++
++		*total_length += ds_list->length;
++	}
++
++	return 0;
++out:
++	ibscif_clear_ds_refs(wr->ds_list, wr->num_ds);
++	return err;
++}
++
++int ibscif_post_send(struct ib_qp *ibqp, struct ib_send_wr *ibwr, struct ib_send_wr **bad_wr)
++{
++	struct ibscif_qp *qp = to_qp(ibqp);
++	struct ibscif_wq *sq = &qp->sq;
++	struct ibscif_wr *wr;
++	int nreq = 0, err;
++
++	IBSCIF_PERF_SAMPLE(0, 0);
++
++	spin_lock_bh(&sq->lock);
++
++	if (unlikely(ibqp->qp_type != IB_QPT_UD && qp->state != QP_CONNECTED)) {
++		err = -ENOTCONN;
++		goto out;
++	}
++	if (unlikely(!sq->size)) {
++		err = -ENOSPC;
++		goto out;
++	}
++
++	for (err = 0; ibwr; ibwr = ibwr->next, nreq++) {
++
++		if (unlikely(sq->depth == sq->size)) {
++			err = -ENOBUFS;
++			goto out;
++		}
++		if (unlikely(ibwr->num_sge > sq->max_sge)) {
++			err = -E2BIG;
++			goto out;
++		}
++
++		wr = ibscif_get_wr(sq, sq->tail);
++
++		memset(&wr->sar, 0, sizeof wr->sar);
++
++		wr->id	   = ibwr->wr_id;
++		wr->opcode = ibwr->opcode;
++		wr->flags  = ibwr->send_flags | ((qp->sq_policy == IB_SIGNAL_ALL_WR) ? IB_SEND_SIGNALED : 0);
++		wr->state  = WR_WAITING;
++		wr->use_rma = 0;
++		wr->rma_id = 0;
++
++		if (ibqp->qp_type == IB_QPT_UD) {
++			wr->opcode = WR_UD;
++			wr->ud.remote_node_id = IBSCIF_LID_TO_NODE_ID(be16_to_cpu(to_ah(ibwr->wr.ud.ah)->dlid));
++			wr->ud.remote_qpn = ibwr->wr.ud.remote_qpn;
++
++			/* the remainings are the same as IB_WR_SEND */
++			err = ibscif_wr_ds(ibqp->pd, ibwr->sg_list, ibwr->num_sge, wr, &wr->length, 0);
++			if (unlikely(err))
++				goto out;
++			wr->msg_id = sq->wirestate->tx.next_msg_id++;
++		}
++
++		else switch (ibwr->opcode) {
++
++		case IB_WR_SEND_WITH_IMM:
++			wr->send.immediate_data = ibwr->ex.imm_data;
++		case IB_WR_SEND:
++			err = ibscif_wr_ds(ibqp->pd, ibwr->sg_list, ibwr->num_sge, wr, &wr->length, 0);
++			if (unlikely(err))
++				goto out;
++			wr->msg_id = sq->wirestate->tx.next_msg_id++;
++			if (wr->length > rma_threshold) {
++				wr->use_rma = 1;
++				wr->rma_id = sq->next_msg_id;
++			}
++			break;
++
++		case IB_WR_RDMA_WRITE_WITH_IMM:
++			wr->msg_id = sq->wirestate->tx.next_msg_id++;
++			wr->write.immediate_data = ibwr->ex.imm_data;
++		case IB_WR_RDMA_WRITE:
++			err = ibscif_wr_ds(ibqp->pd, ibwr->sg_list, ibwr->num_sge, wr, &wr->length, 0);
++			if (unlikely(err))
++				goto out;
++			if (wr->length &&
++			    ((ibwr->wr.rdma.remote_addr + wr->length - 1) < ibwr->wr.rdma.remote_addr)) {
++				err = -EOVERFLOW;
++				goto out;
++			}
++			wr->write.remote_address = ibwr->wr.rdma.remote_addr;
++			wr->write.rkey		 = ibwr->wr.rdma.rkey;
++			if (ibwr->opcode == IB_WR_RDMA_WRITE)
++				wr->msg_id = 0;
++			if (wr->length > rma_threshold) {
++				wr->use_rma = 1;
++				wr->rma_id = sq->next_msg_id;
++			}
++			break;
++
++		case IB_WR_RDMA_READ:
++			if (unlikely(!qp->max_or)) {
++				err = -ENOBUFS;
++				goto out;
++			}
++			err = ibscif_wr_ds(ibqp->pd, ibwr->sg_list, ibwr->num_sge, wr, &wr->length, IB_ACCESS_LOCAL_WRITE);
++			if (unlikely(err))
++				goto out;
++			if (wr->length &&
++			    ((ibwr->wr.rdma.remote_addr + wr->length - 1) < ibwr->wr.rdma.remote_addr)) {
++				err = -EOVERFLOW;
++				goto out;
++			}
++			wr->read.remote_address = ibwr->wr.rdma.remote_addr;
++			wr->read.remote_length	= wr->length;
++			wr->read.rkey		= ibwr->wr.rdma.rkey;
++			wr->length		= 0;	  /* No tx data with this opcode */
++			wr->msg_id		= sq->next_msg_id;
++			atomic_inc(&qp->or_posted);
++			if (wr->read.remote_length > rma_threshold) {
++				wr->use_rma = 1;
++				wr->rma_id = wr->msg_id;
++			}
++			break;
++
++		case IB_WR_ATOMIC_CMP_AND_SWP:
++		case IB_WR_ATOMIC_FETCH_AND_ADD:
++			if (unlikely(!qp->max_or)) {
++				err = -ENOBUFS;
++				goto out;
++			}
++			if (unlikely(ibwr->wr.atomic.remote_addr & (sizeof wr->atomic_rsp.orig_data - 1))) {
++				err = -EADDRNOTAVAIL;
++				goto out;
++			}
++			err = ibscif_wr_ds(ibqp->pd, ibwr->sg_list, ibwr->num_sge, wr, &wr->length, IB_ACCESS_LOCAL_WRITE);
++			if (unlikely(err))
++				goto out;
++			if (unlikely(wr->length < sizeof wr->atomic_rsp.orig_data)) {
++				err = -EINVAL;
++				goto out;
++			}
++			if (ibwr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
++				wr->cmp_swp.cmp_operand    = ibwr->wr.atomic.compare_add;
++				wr->cmp_swp.swp_operand    = ibwr->wr.atomic.swap;
++				wr->cmp_swp.remote_address = ibwr->wr.atomic.remote_addr;
++				wr->cmp_swp.rkey	   = ibwr->wr.atomic.rkey;
++			} else {
++				wr->fetch_add.add_operand    = ibwr->wr.atomic.compare_add;
++				wr->fetch_add.remote_address = ibwr->wr.atomic.remote_addr;
++				wr->fetch_add.rkey	     = ibwr->wr.atomic.rkey;
++			}
++			wr->length = 0; /* No tx data with these opcodes */
++			wr->msg_id = sq->next_msg_id;
++			atomic_inc(&qp->or_posted);
++			break;
++
++		default:
++			err = -ENOMSG;
++			goto out;
++		}
++
++		DEV_STAT(qp->dev, wr_opcode[wr->opcode]++);
++		ibscif_append_wq(sq);
++	}
++out:
++	spin_unlock_bh(&sq->lock);
++
++	IBSCIF_PERF_SAMPLE(1, 0);
++
++	if (err)
++		*bad_wr = ibwr;
++	if (nreq)
++		ibscif_schedule(sq);
++
++	IBSCIF_PERF_SAMPLE(9, 1);
++
++	return err;
++}
++
++int ibscif_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ibwr, struct ib_recv_wr **bad_wr)
++{
++	struct ibscif_qp *qp = to_qp(ibqp);
++	struct ibscif_wq *rq = &qp->rq;
++	struct ibscif_wr *wr;
++	int err;
++
++	spin_lock_bh(&rq->lock);
++
++	if ((qp->state != QP_IDLE) && (qp->state != QP_CONNECTED)) {
++		err = -ENOTCONN;
++		goto out;
++	}
++	if (unlikely(!rq->size)) {
++		err = -ENOSPC;
++		goto out;
++	}
++
++	for (err = 0; ibwr; ibwr = ibwr->next) {
++ 
++		if (unlikely(rq->depth == rq->size)) {
++			err = -ENOBUFS;
++			goto out;
++		}
++		if (unlikely(ibwr->num_sge > rq->max_sge)) {
++			err = -E2BIG;
++			goto out;
++		}
++
++		wr = ibscif_get_wr(rq, rq->tail);
++
++		memset(&wr->sar, 0, sizeof wr->sar);
++
++		wr->id	   = ibwr->wr_id;
++		wr->msg_id = rq->next_msg_id;
++		wr->state  = WR_WAITING;
++
++		err = ibscif_wr_ds(ibqp->pd, ibwr->sg_list, ibwr->num_sge, wr, &wr->length, IB_ACCESS_LOCAL_WRITE);
++		ibscif_clear_ds_refs(wr->ds_list, wr->num_ds);
++		if (unlikely(err))
++			goto out;
++
++		ibscif_append_wq(rq);
++	}
++out:
++	spin_unlock_bh(&rq->lock);
++	if (err)
++		*bad_wr = ibwr;
++
++	return err;
++}
+diff -urN a7/drivers/infiniband/hw/scif/ibscif_procfs.c a8/drivers/infiniband/hw/scif/ibscif_procfs.c
+--- a7/drivers/infiniband/hw/scif/ibscif_procfs.c	1969-12-31 16:00:00.000000000 -0800
++++ a8/drivers/infiniband/hw/scif/ibscif_procfs.c	2015-02-23 10:14:37.485809663 -0800
+@@ -0,0 +1,180 @@
++/*
++ * Copyright (c) 2008 Intel Corporation.  All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above copyright
++ *        notice, this list of conditions and the following disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *        copyright notice, this list of conditions and the following
++ *        disclaimer in the documentation and/or other materials
++ *        provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include "ibscif_driver.h"
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
++static int ibscif_stats_show(struct seq_file *m, void *v)
++#else
++static int ibscif_stats_read(char *page, char **start, off_t offset,
++			     int count, int *eof, void *data)
++#endif
++{
++	int l = 0;
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
++	struct ibscif_dev *dev = m->private;
++#else
++	struct ibscif_dev *dev = data;
++	char *m = page;
++
++	if (offset)
++		return 0;
++
++	*eof = 1;
++#endif
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
++	seq_printf
++#else
++	l += sprintf
++#endif
++		(m,
++		"%s statistics:\n"
++		"    tx_bytes %lu rx_bytes %lu\n"
++		"    tx_pkts %lu rx_pkts %lu loopback_pkts %lu\n"
++		"    sched_exhaust %lu unavailable %lu\n"
++		"    tx_errors %lu duplicates %lu\n"
++		"    total wr %lu :\n"
++		"        send %lu send_imm %lu write %lu write_imm %lu\n"
++		"        recv %lu recv_imm %lu read %lu comp %lu fetch %lu\n"
++		"        read_rsp %lu atomic_rsp %lu ud %lu\n"
++		"    fast_rdma :\n"
++		"        write %lu read %lu unavailable %lu fallback %lu force_ack %lu tail_write %lu\n",
++		dev->ibdev.name,
++		DEV_STAT(dev, bytes_sent),
++		DEV_STAT(dev, bytes_rcvd),
++		DEV_STAT(dev, packets_sent),
++		DEV_STAT(dev, packets_rcvd),
++		DEV_STAT(dev, loopback),
++		DEV_STAT(dev, sched_exhaust),
++		DEV_STAT(dev, unavailable),
++		DEV_STAT(dev, tx_errors),
++		DEV_STAT(dev, duplicates),
++		DEV_STAT(dev, wr_opcode[WR_SEND])			+
++		DEV_STAT(dev, wr_opcode[WR_SEND_WITH_IMM])		+
++		DEV_STAT(dev, wr_opcode[WR_RDMA_WRITE])			+
++		DEV_STAT(dev, wr_opcode[WR_RDMA_WRITE_WITH_IMM])	+
++		DEV_STAT(dev, recv)					+
++		DEV_STAT(dev, recv_imm)					+
++		DEV_STAT(dev, wr_opcode[WR_RDMA_READ])			+
++		DEV_STAT(dev, wr_opcode[WR_ATOMIC_CMP_AND_SWP])		+
++		DEV_STAT(dev, wr_opcode[WR_ATOMIC_FETCH_AND_ADD])	+
++		DEV_STAT(dev, wr_opcode[WR_RDMA_READ_RSP])		+
++		DEV_STAT(dev, wr_opcode[WR_ATOMIC_RSP]),
++		DEV_STAT(dev, wr_opcode[WR_SEND]),
++		DEV_STAT(dev, wr_opcode[WR_SEND_WITH_IMM]),
++		DEV_STAT(dev, wr_opcode[WR_RDMA_WRITE]),
++		DEV_STAT(dev, wr_opcode[WR_RDMA_WRITE_WITH_IMM]),
++		DEV_STAT(dev, recv),
++		DEV_STAT(dev, recv_imm),
++		DEV_STAT(dev, wr_opcode[WR_RDMA_READ]),
++		DEV_STAT(dev, wr_opcode[WR_ATOMIC_CMP_AND_SWP]),
++		DEV_STAT(dev, wr_opcode[WR_ATOMIC_FETCH_AND_ADD]),
++		DEV_STAT(dev, wr_opcode[WR_RDMA_READ_RSP]),
++		DEV_STAT(dev, wr_opcode[WR_ATOMIC_RSP]),
++		DEV_STAT(dev, wr_opcode[WR_UD]),
++		DEV_STAT(dev, fast_rdma_write),
++		DEV_STAT(dev, fast_rdma_read),
++		DEV_STAT(dev, fast_rdma_unavailable),
++		DEV_STAT(dev, fast_rdma_fallback),
++		DEV_STAT(dev, fast_rdma_force_ack),
++		DEV_STAT(dev, fast_rdma_tail_write)
++		);
++
++	return l;
++}
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
++static ssize_t ibscif_stats_write(struct file *file, const char __user *buffer,
++				 size_t count, loff_t *ppos)
++{
++       struct ibscif_dev *dev = PDE_DATA(file_inode(file));
++       memset(&dev->stats, 0, sizeof dev->stats);
++       return count;
++}
++
++static int ibscif_stats_open(struct inode *inode, struct file *file)
++{
++       return single_open(file, ibscif_stats_show, PDE_DATA(inode));
++}
++
++struct file_operations ibscif_fops = {
++       .owner = THIS_MODULE,
++       .open = ibscif_stats_open,
++       .read = seq_read,
++       .write = ibscif_stats_write,
++       .llseek = seq_lseek,
++       .release = seq_release,
++};
++
++int ibscif_procfs_add_dev(struct ibscif_dev *dev)
++{
++       dev->procfs = proc_mkdir(dev->ibdev.name, init_net.proc_net);
++       if (!dev->procfs)
++	       return -ENOENT;
++
++       if (proc_create_data("stats", S_IRUGO | S_IWUGO, dev->procfs,
++			    &ibscif_fops ,dev))
++	       return -ENOENT;
++
++       return 0;
++}
++#else /* (LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)) */
++static int ibscif_stats_write(struct file *file, const char __user *buffer, unsigned long count, void *data)
++{
++	struct ibscif_dev *dev = data;
++	memset(&dev->stats, 0, sizeof dev->stats);
++	return count;
++}
++
++int ibscif_procfs_add_dev(struct ibscif_dev *dev)
++{
++	struct proc_dir_entry *entry;
++
++	dev->procfs = proc_mkdir(dev->ibdev.name, init_net.proc_net);
++	if (!dev->procfs)
++		return -ENOENT;
++
++	entry = create_proc_read_entry("stats", S_IRUGO | S_IWUGO, dev->procfs, ibscif_stats_read, dev);
++	if (!entry)
++		return -ENOENT;
++	entry->write_proc = ibscif_stats_write;
++
++	return 0;
++}
++#endif
++
++void ibscif_procfs_remove_dev(struct ibscif_dev *dev)
++{
++	if (dev->procfs)
++		remove_proc_entry("stats", dev->procfs);
++	remove_proc_entry(dev->ibdev.name, init_net.proc_net);
++}
+diff -urN a7/drivers/infiniband/hw/scif/ibscif_protocol.c a8/drivers/infiniband/hw/scif/ibscif_protocol.c
+--- a7/drivers/infiniband/hw/scif/ibscif_protocol.c	1969-12-31 16:00:00.000000000 -0800
++++ a8/drivers/infiniband/hw/scif/ibscif_protocol.c	2015-02-23 10:14:37.487809663 -0800
+@@ -0,0 +1,2816 @@
++/*
++ * Copyright (c) 2008 Intel Corporation.  All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above copyright
++ *        notice, this list of conditions and the following disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *        copyright notice, this list of conditions and the following
++ *        disclaimer in the documentation and/or other materials
++ *        provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include "ibscif_driver.h"
++
++#include <linux/sched.h>
++/* dev/wr/qp backpointers overlayed in skb cb[] */
++struct ibscif_skb_cb {
++	struct ibscif_dev	*dev;
++	struct ibscif_wr	*wr;
++	scif_epd_t 		scif_ep;
++	struct ibscif_qp	*qp;	/* for UD only */
++};
++
++#define SET_SKB_DEV(skb,dev0)	((struct ibscif_skb_cb *)&skb->cb)->dev = dev0
++#define SET_SKB_WR(skb,wr0)	((struct ibscif_skb_cb *)&skb->cb)->wr = wr0
++#define SET_SKB_EP(skb,ep0)	((struct ibscif_skb_cb *)&skb->cb)->scif_ep = ep0
++#define SET_SKB_QP(skb,qp0)	((struct ibscif_skb_cb *)&skb->cb)->qp = qp0
++
++#define GET_SKB_DEV(skb)	((struct ibscif_skb_cb *)&skb->cb)->dev
++#define GET_SKB_WR(skb)		((struct ibscif_skb_cb *)&skb->cb)->wr
++#define GET_SKB_EP(skb)		((struct ibscif_skb_cb *)&skb->cb)->scif_ep
++#define GET_SKB_QP(skb)		((struct ibscif_skb_cb *)&skb->cb)->qp
++
++#define hw_addr_equal(h1, h2)	(!memcmp(h1, h2, ETH_ALEN))
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0)
++  #define KMAP(x) kmap(x->page)
++  #define KUNMAP(x) kunmap(x->page)
++  #define SET_PAGE(x,y) x->page = y
++  #define GET_PAGE(x) get_page(x->page)
++#else
++  #define KMAP(x) kmap(skb_frag_page(x))
++  #define KUNMAP(x) kunmap(skb_frag_page(x))
++  #define SET_PAGE(x,y) __skb_frag_set_page(x, y)
++  #define GET_PAGE(x) __skb_frag_ref(x)
++#endif
++
++void ibscif_skb_destructor(struct sk_buff *skb)
++{
++	struct ibscif_dev *dev = GET_SKB_DEV(skb);
++
++	/* A sk_buff is now available. */
++	if (atomic_inc_return(&dev->available) == 1)
++		; /* Could invoke the scheduler here. */
++
++	/* Release the module reference held for this sk_buff. */
++	module_put(THIS_MODULE);
++}
++
++static struct sk_buff *ibscif_alloc_tx_skb(struct ibscif_dev *dev, int hdr_size, int payload_size)
++{
++	struct sk_buff *skb;
++
++	skb = dev_alloc_skb(hdr_size);
++	if (unlikely(!skb))
++		return NULL;
++
++	skb_reset_mac_header(skb);
++	skb_reset_network_header(skb);
++
++	skb->protocol  = IBSCIF_PACKET_TYPE;
++	skb->ip_summed = CHECKSUM_UNNECESSARY;
++	skb->priority  = TC_PRIO_CONTROL;	/* highest defined priority */
++	skb->dev       = (void *) dev;
++	skb->len       = hdr_size + payload_size;
++	skb->data_len  = payload_size;
++	skb->tail     += hdr_size;
++
++	return skb;
++}
++
++static struct	sk_buff_head xmit_queue;
++static void	ibscif_xmit_work_handler( struct work_struct *context );
++static DECLARE_WORK(ibscif_xmit_work, ibscif_xmit_work_handler);
++static atomic_t	xmit_busy = ATOMIC_INIT(0);
++
++static void ibscif_xmit_work_handler( struct work_struct *context )
++{
++	struct sk_buff *skb;
++	scif_epd_t scif_ep;
++	int num_frags;
++	skb_frag_t *frag;
++	void *vaddr;
++	int ret;
++	int hdr_size;
++	int i;
++	struct ibscif_qp *qp;
++
++again:
++	while ((skb = skb_dequeue(&xmit_queue))) {
++		scif_ep = GET_SKB_EP(skb);
++		if (!scif_ep) {
++			printk(KERN_ALERT PFX "%s: NULL scif_ep, skb=%p\n", __func__, skb);
++			goto next;
++		}
++
++		hdr_size = skb->len - skb->data_len;
++		for (i=0; i<hdr_size; ) {
++			ret = scif_send(scif_ep, skb->data+i, hdr_size-i,
++					 blocking_send ? SCIF_SEND_BLOCK : 0); 
++			if (ret < 0) {
++				printk(KERN_ALERT PFX "%s: fail to send header, hdr_size=%d, ret=%d\n", __func__, hdr_size, ret);
++				goto next;
++			}
++			i += ret;
++		}
++
++		num_frags = skb_shinfo(skb)->nr_frags;
++		frag = skb_shinfo(skb)->frags;
++		while (num_frags--) {
++			vaddr = KMAP(frag); /* because scif_send() may cause scheduling */
++			for (i=0; i<frag->size; ) {
++				ret = scif_send(scif_ep, vaddr + frag->page_offset + i, 
++						frag->size - i, 
++						blocking_send ? SCIF_SEND_BLOCK : 0); 
++				if (ret < 0) {
++					printk(KERN_ALERT PFX "%s: scif_send returns %d, frag_size=%d\n", __func__, ret, frag->size);
++					break;
++				}
++				i += ret;
++			}
++			KUNMAP(frag);
++			frag++;
++		}
++next:
++		qp = GET_SKB_QP(skb);
++		if (qp && qp->ibqp.qp_type == IB_QPT_UD) {
++			struct ibscif_full_frame *pdu = (struct ibscif_full_frame*)skb->data;
++			u16 opcode = __be16_to_cpu(pdu->ibscif.hdr.opcode);
++			if (ibscif_pdu_is_last(opcode)) {
++				struct ibscif_wr *wr = GET_SKB_WR(skb);
++				ibscif_clear_ds_refs(wr->ds_list, wr->num_ds);
++				wr->state = WR_COMPLETED;
++				ibscif_process_sq_completions(GET_SKB_QP(skb));
++			}
++			/* Release the reference held on UD QPs */
++			ibscif_put_qp(qp);
++		}
++		kfree_skb(skb);
++	}
++
++	if (!skb_queue_empty(&xmit_queue))
++		goto again;
++
++	atomic_set(&xmit_busy, 0);
++}
++
++static void ibscif_dev_queue_xmit(struct sk_buff *skb)
++{
++	struct ibscif_dev *dev=NULL;
++	int len = 0;
++
++	if (skb) {
++		dev = GET_SKB_DEV(skb);
++		len = skb->len;
++		skb_queue_tail(&xmit_queue, skb);
++	}
++
++	/* only one instance can be enqueued, otherwise there is race condition between scif_send() calls. */
++	/* notice that the current running worker may miss the newly added item, but it will be picked up in the poll_thread */ 
++	if (!atomic_xchg(&xmit_busy, 1))
++		schedule_work(&ibscif_xmit_work);
++
++	if (likely(dev)) {
++		DEV_STAT(dev, packets_sent++);
++		DEV_STAT(dev, bytes_sent += len);
++	}
++}
++
++static int ibscif_create_hdr(struct ibscif_qp *qp, struct ibscif_wr *wr, struct sk_buff *skb,
++			    u32 seq_num, u32 wr_len_remaining, int force)
++{
++	struct ibscif_full_frame *pdu = (struct ibscif_full_frame*)skb->data;
++	u32 sq_seq, iq_seq;
++	u16 opcode;
++	int i;
++
++	sq_seq = qp->wire.sq.rx.last_in_seq;
++	iq_seq = qp->wire.iq.rx.last_in_seq;
++	qp->wire.sq.rx.last_seq_acked = sq_seq;
++	qp->wire.iq.rx.last_seq_acked = iq_seq;
++
++	pdu->ibscif.hdr.length	 = __cpu_to_be16(skb->data_len);
++	if (qp->ibqp.qp_type == IB_QPT_UD) {
++		pdu->ibscif.hdr.dst_qp	 = __cpu_to_be32(wr->ud.remote_qpn);
++	}
++	else {
++		pdu->ibscif.hdr.dst_qp	 = __cpu_to_be32(qp->remote_qpn);
++	}
++	pdu->ibscif.hdr.src_qp	 = __cpu_to_be32(qp->ibqp.qp_num);
++	pdu->ibscif.hdr.seq_num	 = __cpu_to_be32(seq_num);
++	pdu->ibscif.hdr.sq_ack_num = __cpu_to_be32(sq_seq);
++	pdu->ibscif.hdr.iq_ack_num = __cpu_to_be32(iq_seq);
++
++	switch (wr->opcode) {
++	case WR_UD:
++		opcode = ibscif_op_ud;
++		if (skb->data_len == wr_len_remaining) {
++			opcode = ibscif_pdu_set_last(opcode);
++			if (wr->flags & IB_SEND_SIGNALED)
++				force = 1;
++			if (wr->flags & IB_SEND_SOLICITED)
++				opcode = ibscif_pdu_set_se(opcode);
++		}
++		pdu->ibscif.ud.msg_length = __cpu_to_be32(wr->length);
++		pdu->ibscif.ud.msg_offset = __cpu_to_be32(wr->length - wr_len_remaining);
++		memset(&pdu->ibscif.ud.grh, 0, 40);
++		break;
++
++	case WR_SEND:
++	case WR_SEND_WITH_IMM:
++		opcode = ibscif_op_send;
++		if (skb->data_len == wr_len_remaining || opcode == ibscif_op_send_rma) {
++			opcode = ibscif_pdu_set_last(opcode);
++			if (wr->flags & IB_SEND_SIGNALED)
++				force = 1;
++			if (wr->opcode == WR_SEND_WITH_IMM) {
++				opcode = ibscif_pdu_set_immed(opcode);
++				pdu->ibscif.send.immed_data = __cpu_to_be32(wr->send.immediate_data);
++			} else pdu->ibscif.send.immed_data = 0;
++			if (wr->flags & IB_SEND_SOLICITED)
++				opcode = ibscif_pdu_set_se(opcode);
++		}
++		pdu->ibscif.send.msg_id	  = __cpu_to_be32(wr->msg_id);
++		pdu->ibscif.send.msg_length = __cpu_to_be32(wr->length);
++		pdu->ibscif.send.msg_offset = __cpu_to_be32(wr->length - wr_len_remaining);
++		if (wr->use_rma) {
++			opcode = ibscif_op_send_rma;
++			pdu->ibscif.send.rma_id	     = __cpu_to_be32(wr->rma_id);
++			pdu->ibscif.send.num_rma_addrs = __cpu_to_be32(wr->num_ds);
++			for (i=0; i<wr->num_ds; i++) {
++				pdu->ibscif.send.rma_addrs[i].offset = __cpu_to_be64(wr->ds_list[i].current_mreg->offset + wr->ds_list[i].offset);
++				pdu->ibscif.send.rma_addrs[i].length = __cpu_to_be32(wr->ds_list[i].length);
++			}
++		}
++		break;
++
++	case WR_RDMA_READ:
++		opcode = ibscif_op_read;
++		pdu->ibscif.read_req.rdma_id	= __cpu_to_be32(wr->msg_id);
++		pdu->ibscif.read_req.rdma_key	= __cpu_to_be32(wr->read.rkey);
++		pdu->ibscif.read_req.rdma_length= __cpu_to_be32(wr->read.remote_length);
++		pdu->ibscif.read_req.rdma_address = __cpu_to_be64(wr->read.remote_address);
++		if (wr->use_rma) {
++			opcode = ibscif_op_read_rma;
++			pdu->ibscif.read_req.num_rma_addrs = __cpu_to_be32(wr->num_ds);
++			for (i=0; i<wr->num_ds; i++) {
++				pdu->ibscif.read_req.rma_addrs[i].offset = __cpu_to_be64(wr->ds_list[i].current_mreg->offset + wr->ds_list[i].offset);
++				pdu->ibscif.read_req.rma_addrs[i].length = __cpu_to_be32(wr->ds_list[i].length);
++			}
++		}
++		break;
++
++	case WR_RDMA_WRITE:
++	case WR_RDMA_WRITE_WITH_IMM:
++		opcode = ibscif_op_write;
++		if ((enum ib_wr_opcode)wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
++			opcode = ibscif_pdu_set_immed(opcode);
++			pdu->ibscif.write.immed_data = __cpu_to_be32(wr->write.immediate_data);
++			if (wr->flags & IB_SEND_SOLICITED)
++				opcode = ibscif_pdu_set_se(opcode);
++		} else pdu->ibscif.write.immed_data = 0;
++		if (skb->data_len == wr_len_remaining || opcode == ibscif_op_write_rma) {
++			opcode = ibscif_pdu_set_last(opcode);
++			if (wr->flags & IB_SEND_SIGNALED)
++				force = 1;
++		}
++		pdu->ibscif.write.msg_id	     = __cpu_to_be32(wr->msg_id);
++		pdu->ibscif.write.rdma_key     = __cpu_to_be32(wr->write.rkey);
++		pdu->ibscif.write.rdma_address = __cpu_to_be64(wr->write.remote_address + 
++							      (wr->length - wr_len_remaining));
++		if (wr->use_rma) {
++			opcode = ibscif_op_write_rma;
++			if (wr->opcode == WR_RDMA_WRITE_WITH_IMM)
++				opcode = ibscif_pdu_set_immed(opcode);
++			pdu->ibscif.write.rma_id	= __cpu_to_be32(wr->rma_id);
++			pdu->ibscif.write.rma_length    = __cpu_to_be32(wr->length);
++			pdu->ibscif.write.num_rma_addrs = __cpu_to_be32(wr->num_ds);
++			for (i=0; i<wr->num_ds; i++) {
++				pdu->ibscif.write.rma_addrs[i].offset = __cpu_to_be64(wr->ds_list[i].current_mreg->offset + wr->ds_list[i].offset);
++				pdu->ibscif.write.rma_addrs[i].length = __cpu_to_be32(wr->ds_list[i].length);
++			}
++		}
++		break;
++
++	case WR_ATOMIC_CMP_AND_SWP:
++		opcode = ibscif_pdu_set_last(ibscif_op_comp_swap);
++		pdu->ibscif.comp_swap.atomic_id	    = __cpu_to_be32(wr->msg_id);
++		pdu->ibscif.comp_swap.atomic_key    = __cpu_to_be32(wr->cmp_swp.rkey);
++		pdu->ibscif.comp_swap.comp_data	    = __cpu_to_be64(wr->cmp_swp.cmp_operand);
++		pdu->ibscif.comp_swap.swap_data	    = __cpu_to_be64(wr->cmp_swp.swp_operand);
++		pdu->ibscif.comp_swap.atomic_address = __cpu_to_be64(wr->cmp_swp.remote_address);
++		break;
++
++	case WR_ATOMIC_FETCH_AND_ADD:
++		opcode = ibscif_pdu_set_last(ibscif_op_fetch_add);
++		pdu->ibscif.fetch_add.atomic_id	   = __cpu_to_be32(wr->msg_id);
++		pdu->ibscif.fetch_add.atomic_key   = __cpu_to_be32(wr->fetch_add.rkey);
++		pdu->ibscif.fetch_add.add_data	   = __cpu_to_be64(wr->fetch_add.add_operand);
++		pdu->ibscif.fetch_add.atomic_address = __cpu_to_be64(wr->fetch_add.remote_address);
++		break;
++
++	case WR_RDMA_READ_RSP:
++		opcode = ibscif_op_read_rsp;
++		if (skb->data_len == wr_len_remaining)
++			opcode = ibscif_pdu_set_last(opcode);
++		pdu->ibscif.read_rsp.rdma_id	= __cpu_to_be32(wr->msg_id);
++		pdu->ibscif.read_rsp.rdma_offset  = __cpu_to_be32(wr->length - wr_len_remaining);
++		break;
++
++	case WR_ATOMIC_RSP:
++		opcode = ibscif_pdu_set_last(wr->atomic_rsp.opcode);
++		pdu->ibscif.atomic_rsp.atomic_id = __cpu_to_be32(wr->msg_id);
++		pdu->ibscif.atomic_rsp.orig_data = __cpu_to_be64(wr->atomic_rsp.orig_data);
++		break;
++
++	case WR_RMA_RSP:
++		opcode = ibscif_op_rma_rsp;
++		pdu->ibscif.rma_rsp.rma_id	= __cpu_to_be32(wr->msg_id);
++		pdu->ibscif.rma_rsp.xfer_length	= __cpu_to_be32(wr->rma_rsp.xfer_length);
++		pdu->ibscif.rma_rsp.error	= __cpu_to_be32(wr->rma_rsp.error);
++		break;
++	default:
++		printk(KERN_ERR PFX "%s() invalid opcode %d\n", __func__, wr->opcode);
++		return 1;
++	}
++
++	if (force)
++		opcode = ibscif_pdu_set_force_ack(opcode);
++
++	pdu->ibscif.hdr.opcode = __cpu_to_be16(opcode);
++
++	return 0;
++}
++
++static struct sk_buff* ibscif_alloc_pdu(struct ibscif_dev *dev, struct ibscif_qp *qp, struct ibscif_wr *wr,
++				       int hdr_size, u32 seq_num, u32 payload_size, u32 len_remaining, int force)
++{
++	struct sk_buff *skb;
++	struct ibscif_full_frame *pdu;
++
++	if (unlikely(!qp->conn && qp->ibqp.qp_type != IB_QPT_UD)) {
++		printk(KERN_ALERT PFX "%s: ERROR: qp->conn == NULL\n", __func__);
++		return NULL;
++	}
++
++	if (!atomic_add_unless(&dev->available, -1, 0)) {
++		printk(KERN_NOTICE PFX "%s throttled by available tx buffer limit\n", dev->ibdev.name);
++		DEV_STAT(dev, unavailable++);
++		return NULL;
++	}
++
++	/* Get an skb for this protocol packet. */
++	skb = ibscif_alloc_tx_skb(dev, hdr_size, payload_size);
++	if (unlikely(!skb))
++		goto bail;
++
++	/* Hold a reference on the module until skb->destructor is called. */
++	__module_get(THIS_MODULE);
++	skb->destructor = ibscif_skb_destructor;
++
++	SET_SKB_DEV(skb, dev);
++	SET_SKB_WR(skb, wr);
++
++	if (qp->ibqp.qp_type == IB_QPT_UD) {
++		struct ibscif_conn *conn;
++		int flag = qp->ibqp.qp_num > wr->ud.remote_qpn;
++		conn = ibscif_get_conn(qp->local_node_id, wr->ud.remote_node_id, flag);
++		if (unlikely(!conn)) {
++			kfree_skb(skb);
++			goto bail;
++		}
++
++		ibscif_qp_add_ud_conn(qp, conn);
++		ibscif_put_conn(conn);
++		SET_SKB_EP(skb, conn->ep);
++		SET_SKB_QP(skb, qp);
++
++		/* Reference UD QPs until the wr is transmitted by ibscif_xmit_work_handler */
++		kref_get(&qp->ref);
++	}
++	else {
++		SET_SKB_EP(skb, qp->conn->ep);
++	}
++
++	/* Construct the header and copy it to the skb. */
++	if (unlikely(ibscif_create_hdr(qp, wr, skb, seq_num, len_remaining, force))) {
++		kfree_skb(skb);
++		goto bail;
++	}
++
++	pdu = (struct ibscif_full_frame *)skb->data;
++	pdu->ibscif.hdr.hdr_size	= __cpu_to_be16(hdr_size);
++	
++	return skb;
++bail:
++	atomic_inc(&dev->available);
++	return NULL;
++}
++
++static int ibscif_send_null_pdu(struct ibscif_dev *dev, struct ibscif_qp *qp, struct ibscif_wr *wr, u32 hdr_size)
++{
++	struct sk_buff *skb;
++
++	/* Allocate an initialized skb with a PDU header. */
++	skb = ibscif_alloc_pdu(dev, qp, wr, hdr_size, wr->sar.seg.starting_seq, 0, 0, 0);
++	if (unlikely(!skb))
++		return 0;
++
++	ibscif_dev_queue_xmit(skb);
++	return 1;
++}
++
++static int get_hdr_size_from_wr(struct ibscif_wr *wr)
++{
++	switch (wr->opcode) {
++	case WR_UD:			return sizeof(struct ud_hdr);
++	case WR_SEND:
++	case WR_SEND_WITH_IMM:		return sizeof(struct send_hdr);
++	case WR_RDMA_WRITE:
++	case WR_RDMA_WRITE_WITH_IMM:	return sizeof(struct write_hdr);
++	case WR_RDMA_READ:		return sizeof(struct read_req_hdr);
++	case WR_ATOMIC_CMP_AND_SWP:	return sizeof(struct comp_swap_hdr);
++	case WR_ATOMIC_FETCH_AND_ADD:	return sizeof(struct fetch_add_hdr);
++	case WR_RDMA_READ_RSP:		return sizeof(struct read_rsp_hdr);
++	case WR_ATOMIC_RSP:		return sizeof(struct atomic_rsp_hdr);
++	case WR_RMA_RSP:		return sizeof(struct rma_rsp_hdr);
++	default:			return 0;
++	}
++}
++
++static int get_rma_addr_size_from_wr(struct ibscif_wr *wr)
++{
++	switch (wr->opcode) {
++	case WR_UD:			return 0;
++	case WR_SEND:
++	case WR_SEND_WITH_IMM:
++	case WR_RDMA_WRITE:
++	case WR_RDMA_WRITE_WITH_IMM:
++	case WR_RDMA_READ:		return wr->num_ds * sizeof(struct rma_addr);
++	case WR_ATOMIC_CMP_AND_SWP:	return 0;
++	case WR_ATOMIC_FETCH_AND_ADD:	return 0;
++	case WR_RDMA_READ_RSP:		return 0;
++	case WR_ATOMIC_RSP:		return 0;
++	case WR_RMA_RSP:		return 0;
++	default:			return 0;
++	}
++}
++
++static int setup_rma_addrs(struct ibscif_wq *wq, struct ibscif_wr *wr)
++{
++	struct ibscif_ds *ds;
++	int i;
++
++	if (!wr->num_ds)
++		return 1;
++
++	for (i=0; i<wr->num_ds; i++) {
++		ds = &wr->ds_list[i];
++		if (!ds->current_mreg) 
++			ds->current_mreg = ibscif_mr_get_mreg(ds->mr, wq->qp->conn);
++
++		if (!ds->current_mreg) 
++			return 0;
++	}
++
++	return 1;
++}
++
++/* when necessary SCIF will allocate temp buffer to align up cache line offset.
++ *  * so we only need to use roffset to calculate the dma size.
++ *   */
++static inline int ibscif_dma_size(u32 len, u64 roffset)
++{
++	u32 head, tail;
++
++	tail = (roffset + len) % 64;
++	head = (64 - roffset % 64) % 64;
++	if (len >= head + tail) 
++		return (len - head - tail);
++	else
++		return 0;
++}
++
++static void ibscif_send_ack(struct ibscif_qp *qp); /* defined later in this file */
++
++static int ibscif_try_fast_rdma(struct ibscif_wq *wq, struct ibscif_wr *wr)
++{
++	struct ibscif_qp *qp;
++	int i, err;
++	u64 loffset, roffset;
++	u32 total_length, rdma_length, xfer_len;
++	u64 raddress;
++	u32 rkey;
++	enum ib_access_flags access;
++	u32 dma_size = 0;
++	int rma_flag = 0;
++
++	IBSCIF_PERF_SAMPLE(2, 0);
++
++	switch (wr->opcode) {
++	  case WR_RDMA_WRITE:
++		raddress = wr->write.remote_address;
++		rkey = wr->write.rkey;
++		total_length = rdma_length = wr->length;
++		access = IB_ACCESS_REMOTE_WRITE;
++		break;
++
++	  case WR_RDMA_READ:
++		raddress = wr->read.remote_address;
++		rkey = wr->read.rkey;
++		total_length = rdma_length = wr->read.remote_length; /* wr->length is 0 */
++		access = IB_ACCESS_REMOTE_READ;
++		break;
++
++	  default:
++		return 0;
++	}
++
++	qp = wq->qp;
++
++	if (unlikely(!qp->conn)) {
++		printk(KERN_ALERT PFX "%s: ERROR: qp->conn == NULL\n", __func__);
++		return 0;
++	}
++
++	if (!setup_rma_addrs(wq, wr)) {
++		DEV_STAT(qp->dev, fast_rdma_fallback++);
++		return 0;
++	}
++
++	roffset = IBSCIF_MR_VADDR_TO_OFFSET( rkey, raddress );
++
++	for (i=0; i<wr->num_ds; i++) {
++		if (rdma_length == 0)
++			break;
++
++		loffset = wr->ds_list[i].current_mreg->offset + wr->ds_list[i].offset;
++		xfer_len = min(wr->ds_list[i].length, rdma_length);
++		if (xfer_len == 0)
++			continue;
++
++		IBSCIF_PERF_SAMPLE(3, 0);
++
++		dma_size = ibscif_dma_size(xfer_len, roffset);
++
++		if (i==wr->num_ds-1)
++			rma_flag = dma_size ? SCIF_RMA_SYNC : 0;
++
++		if (wr->opcode == WR_RDMA_WRITE) {
++			err = scif_writeto(wq->qp->conn->ep, loffset, xfer_len, roffset, rma_flag|SCIF_RMA_ORDERED);
++			if (err)
++				printk(KERN_INFO PFX "%s(): error writing ordered message, size=%d, err=%d.\n", __func__, xfer_len, err);
++		}
++		else {
++			err = scif_readfrom(wq->qp->conn->ep, loffset, xfer_len, roffset, rma_flag);
++			if (err)
++				printk(KERN_INFO PFX "%s(): error reading the message, size=%d, err=%d.\n", __func__, xfer_len, err);
++		}
++
++		IBSCIF_PERF_SAMPLE(4, 0);
++
++		if (err){
++			DEV_STAT(qp->dev, fast_rdma_fallback++);
++			return 0;
++		}
++
++		roffset += xfer_len;
++		rdma_length -= xfer_len;
++	}
++
++	if (rdma_length)
++		printk(KERN_INFO PFX "%s(): remaining rdma_length=%d.\n", __func__, rdma_length);
++
++	IBSCIF_PERF_SAMPLE(5, 0);
++
++	/* complete the wr */
++	ibscif_clear_ds_refs(wr->ds_list, wr->num_ds);
++	wr->state = WR_COMPLETED;
++	wr->sar.rea.final_length = total_length - rdma_length;
++
++	/* we can't call ibscif_process_sq_completions here because we are holding the sq lock.
++ 	 * set the flag and let the upper level make the call */
++	wq->fast_rdma_completions = 1;
++
++	if (wr->opcode == WR_RDMA_WRITE)
++		DEV_STAT(qp->dev, fast_rdma_write++);
++	else
++		DEV_STAT(qp->dev, fast_rdma_read++);
++
++	/* the fast rdma protocol doesn't send any packet, and thus can not piggyback any ack
++ 	 * for the peer. send separate ack packet when necessary. */
++	if (qp->wire.sq.rx.last_seq_acked < qp->wire.sq.rx.last_in_seq ||
++	    qp->wire.iq.rx.last_seq_acked < qp->wire.iq.rx.last_in_seq) {
++		ibscif_send_ack(qp);
++		DEV_STAT(qp->dev, fast_rdma_force_ack++);
++	}
++
++	IBSCIF_PERF_SAMPLE(8, 0);
++
++	return 1;
++}
++
++/*
++ * Setup for a fresh data descriptor.
++ */
++#define DS_SETUP(ds, mr, page_offset, page_index, ds_len_left)	\
++do {								\
++	mr = ds->mr;						\
++	ds_len_left  = ds->length;				\
++	page_offset  = ds->offset + (mr->addr & ~PAGE_MASK);	\
++	page_index   = page_offset >> PAGE_SHIFT;		\
++	page_offset &= ~PAGE_MASK;				\
++} while(0)
++
++/*
++ * Setup for page crossing within a data descriptor.
++ */
++#define NEXT_PAGE(ds, mr, page_offset, page_index, ds_len_left)		\
++do {									\
++	if (!ds_len_left) {						\
++		ds++;							\
++		DS_SETUP(ds, mr, page_offset, page_index, ds_len_left);	\
++	} else {							\
++		page_index++;						\
++		BUG_ON(!(mr->npages > page_index));			\
++		page_offset = 0;					\
++	}								\
++} while(0)
++
++/*
++ * Setup the data descriptor, page, and offset for specified sequence number
++ */
++#define SETUP_BY_SEQ(wr, ds, mr, from_seq, wr_length, page_offset, page_index,		\
++		     ds_len_left, max_payload)						\
++do {											\
++	u32 i, frag_len_max;								\
++											\
++	DS_SETUP(ds, mr, page_offset, page_index, ds_len_left);				\
++	for (i = wr->sar.seg.starting_seq; seq_before(i, from_seq); i++) {		\
++		num_frags = 0;								\
++		payload_left = max_payload;						\
++		while (payload_left && (num_frags < MAX_SKB_FRAGS)) {			\
++			frag_len_max = min(ds_len_left, (u32)(PAGE_SIZE - page_offset));\
++			if (wr_length > payload_left) {					\
++				if (payload_left > frag_len_max) {			\
++					ds_len_left -= frag_len_max;			\
++					NEXT_PAGE(ds, mr, page_offset,			\
++						  page_index, ds_len_left);		\
++				} else {						\
++					frag_len_max = payload_left; /* frag->size */	\
++					ds_len_left -= payload_left;			\
++					page_offset += payload_left;			\
++				}							\
++			} else {							\
++				if (wr_length > frag_len_max) {				\
++					ds_len_left -= frag_len_max;			\
++					NEXT_PAGE(ds, mr, page_offset,			\
++						  page_index, ds_len_left);		\
++				} else {						\
++					printk(KERN_ERR	PFX				\
++				"from_seq (%d) botch wr %p opcode %d length %d\n",	\
++					from_seq, wr, wr->opcode, wr_length);		\
++					return 0;					\
++				}							\
++			}								\
++			wr_length    -= frag_len_max;					\
++			payload_left -= frag_len_max;					\
++			num_frags++;							\
++		}									\
++	}										\
++} while(0)
++
++int ibscif_xmit_wr(struct ibscif_wq *wq, struct ibscif_wr *wr, int tx_limit, int retransmit, u32 from_seq, u32 *posted)
++{
++	struct ibscif_dev *dev;
++	struct ibscif_qp *qp;
++	struct ibscif_ds *ds;
++	struct ibscif_mr *mr;
++	int hdr_size, page_index, num_frags, num_xmited;
++	u32 max_payload, wr_length, page_offset, ds_len_left, payload_left;
++
++	/* Try to process RDMA read/write directly with SCIF functions.
++	 * The usual reason for failure is that the remote memory has not yet been 
++	 * registered with SCIF. The normal packet based path should handle that. 
++	 */
++	if (host_proxy && wq->qp->local_node_id>0 && wq->qp->remote_node_id==0) {
++		/* don't try fast rdma becasue we want to let the host do the data transfer */
++	}
++	else if (fast_rdma) { 
++		num_xmited = 0;
++		if (ibscif_try_fast_rdma(wq, wr))
++			goto finish2;
++	}
++
++	if (!tx_limit) {
++		printk(KERN_INFO PFX "%s() called with tx_limit of zero\n", __func__);
++		return 0;
++	}
++
++	qp = wq->qp;
++	dev = qp->dev;
++	hdr_size = get_hdr_size_from_wr(wr);
++	max_payload = qp->mtu - hdr_size;
++
++	if (wr->use_rma) {
++		struct sk_buff *skb;
++
++		wr_length = wr->length;
++		wr->sar.seg.starting_seq = from_seq;
++		wr->sar.seg.ending_seq	 = from_seq;
++		wr->state = WR_STARTED;
++
++		num_xmited = 0;
++		if (setup_rma_addrs(wq, wr)) {
++			/* Make room in the header for RMA addresses */
++			hdr_size += get_rma_addr_size_from_wr(wr);
++
++			/* Allocate an initialized skb with PDU header. */
++			skb = ibscif_alloc_pdu(dev, qp, wr, hdr_size, from_seq, 0, wr_length, 0);
++			if (likely(skb)) {
++				ibscif_dev_queue_xmit(skb);
++				num_xmited++;
++				from_seq++;
++			}
++		}
++		else 
++			printk(KERN_ALERT PFX "%s: fail to set up RMA addresses for the work request.\n", __func__);
++
++		goto finish;
++	}
++
++	if (!wr->sar.seg.current_ds) {
++		/*
++		 * This is a fresh send so intialize the wr by setting the static
++		 * parts of the header and sequence number range for this wr.
++		 */
++		wr_length = wr->length;
++		wr->sar.seg.starting_seq = from_seq;
++		wr->sar.seg.ending_seq	 = from_seq;
++		if (wr_length > max_payload) {
++			wr->sar.seg.ending_seq += (wr_length / max_payload);
++			if (!(wr_length % max_payload))
++				wr->sar.seg.ending_seq--;
++		}
++
++		wr->state = WR_STARTED;
++
++		/*
++		 * If this request has a payload, setup for fragmentation.
++		 * Otherwise, send it on its way.
++		 */
++		if (wr_length) {
++			ds = wr->ds_list;
++			DS_SETUP(ds, mr, page_offset, page_index, ds_len_left);
++		} else {
++			num_xmited = ibscif_send_null_pdu(dev, qp, wr, hdr_size);
++			/* from_seq must always advanced even in null PDU cases. */
++			from_seq++;
++			goto finish;
++		}
++	} else {
++		/* We're picking up from a paritally sent request. */
++		ds = wr->sar.seg.current_ds;
++		mr = ds->mr;
++		wr_length   = wr->sar.seg.wr_length_remaining;
++		ds_len_left = wr->sar.seg.ds_length_remaining;
++		page_index  = wr->sar.seg.current_page_index;
++		page_offset = wr->sar.seg.current_page_offset;
++		from_seq    = wr->sar.seg.next_seq;
++	}
++
++	/* Ok, let's break this bad-boy up. */
++	num_xmited = 0;
++	while (wr_length && (num_xmited < tx_limit) && (qp->state == QP_CONNECTED)) {
++		struct sk_buff *skb;
++		skb_frag_t *frag;
++
++		/* Allocate an initialized skb with PDU header. */
++		skb = ibscif_alloc_pdu(dev, qp, wr, hdr_size, from_seq, min(wr_length, max_payload),
++				      wr_length, retransmit && (num_xmited == (tx_limit - 1)));
++		if (unlikely(!skb))
++			break;
++
++		/* Update sequence number for next pass. */
++		from_seq++;
++
++		/* Fill the skb fragment list. */
++		frag = skb_shinfo(skb)->frags;
++		num_frags = 0;
++		payload_left = max_payload;
++
++		while (payload_left && (num_frags < MAX_SKB_FRAGS)) {
++			u32 frag_len_max;
++
++			SET_PAGE(frag, mr->page[page_index]);
++			frag->page_offset = page_offset;
++
++			/* Take a reference on the page - kfree_skb will release. */
++			GET_PAGE(frag);
++
++			frag_len_max = min(ds_len_left, (u32)(PAGE_SIZE - page_offset));
++			if (wr_length > payload_left) {
++				if (payload_left > frag_len_max) {
++					/* Deal with page boundary crossing. */
++					frag->size   = frag_len_max;
++					ds_len_left -= frag_len_max;
++					NEXT_PAGE(ds, mr, page_offset, page_index, ds_len_left);
++				} else {
++					frag->size   = payload_left;
++					ds_len_left -= payload_left;
++					page_offset += payload_left;
++				}
++			} else {
++				if (wr_length > frag_len_max) {
++					/* Deal with page boundary crossing. */
++					frag->size   = frag_len_max;
++					ds_len_left -= frag_len_max;
++					NEXT_PAGE(ds, mr, page_offset, page_index, ds_len_left);
++				} else {
++					frag->size    = wr_length;
++					payload_left -= wr_length;
++					wr_length = 0;
++					num_frags++; /* Change from index to number. */
++					break;
++				}
++			}
++
++			wr_length    -= frag->size;
++			payload_left -= frag->size;
++			num_frags++;
++			frag++;
++		}
++		skb_shinfo(skb)->nr_frags = num_frags;
++
++		/* Check if we need to do a fixup because we ran out of frags. */
++		if ((num_frags == MAX_SKB_FRAGS) && wr_length) {
++			struct ibscif_full_frame *pdu = (struct ibscif_full_frame*)skb->data;
++			skb->len      = hdr_size + (max_payload - payload_left);
++			skb->data_len = (max_payload - payload_left);
++			pdu->ibscif.hdr.length = __cpu_to_be16(skb->data_len);
++			pdu->ibscif.hdr.opcode = __cpu_to_be16(__be16_to_cpu(pdu->ibscif.hdr.opcode) & ~ibscif_last_flag);
++		}
++
++		/*
++		 * Send it.
++		 */
++		ibscif_dev_queue_xmit(skb);
++		num_xmited++;
++	}
++
++	/*
++	 * Update state. If this is a retransmit, don't update anything.  If not and
++	 * there's more to do on the wr, save state.  Otherwise, setup for next wr.
++	 */
++	if (wr_length && !wr->use_rma) {
++		wr->sar.seg.current_ds = ds;
++		wr->sar.seg.wr_length_remaining = wr_length;
++		wr->sar.seg.ds_length_remaining = ds_len_left;
++		wr->sar.seg.current_page_index	= page_index;
++		wr->sar.seg.current_page_offset = page_offset;
++	} else {
++finish:		if (wr->opcode != WR_UD)
++			wr->state = WR_WAITING_FOR_ACK;
++finish2:	wq->next_wr = (wq->next_wr + 1) % wq->size;
++	}
++	wr->sar.seg.next_seq = from_seq;
++	if (posted)
++		*posted = from_seq;
++
++	return num_xmited;
++}
++
++static struct sk_buff *ibscif_create_disconnect_hdr(struct ibscif_dev *dev, u32 src_qpn,
++						   u32 dst_qpn, enum ibscif_reason reason)
++{
++	struct ibscif_full_frame *pdu;
++	struct sk_buff *skb;
++
++	skb = ibscif_alloc_tx_skb(dev, sizeof pdu->ibscif.disconnect, 0);
++	if (unlikely(!skb)) {
++		printk(KERN_ERR PFX "%s() can't allocate skb\n", __func__);
++		return NULL;
++	}
++
++	pdu = (struct ibscif_full_frame *)skb->data;
++
++	/* The eth_hdr and ack fields are set by the caller. */
++	pdu->ibscif.disconnect.hdr.opcode  = __cpu_to_be16(ibscif_op_disconnect);
++	pdu->ibscif.disconnect.hdr.length  = 0; /* Length has no meaning. */
++	pdu->ibscif.disconnect.hdr.dst_qp  = __cpu_to_be32(dst_qpn);
++	pdu->ibscif.disconnect.hdr.src_qp  = __cpu_to_be32(src_qpn);
++	pdu->ibscif.disconnect.hdr.seq_num = 0; /* seq_num has no meaning. */
++	pdu->ibscif.disconnect.hdr.hdr_size = __cpu_to_be16(sizeof(pdu->ibscif.disconnect));
++	pdu->ibscif.disconnect.reason	  = __cpu_to_be32(reason);
++
++	SET_SKB_DEV(skb, dev);
++	SET_SKB_WR(skb, NULL);
++
++	return skb;
++}
++
++void ibscif_send_disconnect(struct ibscif_qp *qp, enum ibscif_reason reason)
++{
++	struct ibscif_dev *dev = qp->dev;
++	struct ibscif_full_frame *pdu;
++	struct sk_buff *skb;
++
++	if (qp->ibqp.qp_type == IB_QPT_UD)
++		return;
++
++	if (qp->loopback) {
++		ibscif_loopback_disconnect(qp, reason);
++		return;
++	}
++
++	if (unlikely(!qp->conn)) {
++		printk(KERN_ALERT PFX "%s: ERROR: qp->conn == NULL\n", __func__);
++		return;
++	}
++
++	skb = ibscif_create_disconnect_hdr(dev, qp->ibqp.qp_num, qp->remote_qpn, reason);
++	if (unlikely(!skb))
++		return;
++
++	SET_SKB_EP(skb, qp->conn->ep);
++
++	pdu = (struct ibscif_full_frame *)skb->data;
++
++	pdu->ibscif.disconnect.hdr.sq_ack_num = __cpu_to_be32(qp->wire.sq.rx.last_in_seq);
++	pdu->ibscif.disconnect.hdr.iq_ack_num = __cpu_to_be32(qp->wire.iq.rx.last_in_seq);
++
++	ibscif_dev_queue_xmit(skb);
++}
++
++void ibscif_reflect_disconnect(struct ibscif_qp *qp, struct base_hdr *hdr, struct sk_buff *in_skb, enum ibscif_reason reason)
++{
++	struct ibscif_full_frame *pdu;
++	struct sk_buff *skb;
++
++	if (!qp || IS_ERR(qp)) {
++		if (qp != ERR_PTR(-ENOENT) && verbose)
++			printk(KERN_ALERT PFX "%s: qp=%p hdr=%p in_skb=%p reason=%d\n", __func__, qp, hdr, in_skb, reason);
++		return;
++	}
++
++	/* Don't send a disconnect for a disconnect. */
++	if (ibscif_pdu_base_type(hdr->opcode) == ibscif_op_disconnect)
++		return;
++
++	if (!qp->conn || !qp->conn->ep)
++		return;
++
++	skb = ibscif_create_disconnect_hdr((void *)in_skb->dev, hdr->dst_qp, hdr->src_qp, reason);
++	if (unlikely(!skb))
++		return;
++
++	SET_SKB_EP(skb, qp->conn->ep);
++
++	pdu = (struct ibscif_full_frame *)skb->data;
++
++	pdu->ibscif.disconnect.hdr.sq_ack_num = 0; /* sq_ack_num has no meaning. */
++	pdu->ibscif.disconnect.hdr.iq_ack_num = 0; /* iq_ack_num has no meaning. */
++
++	ibscif_dev_queue_xmit(skb);
++}
++
++static struct sk_buff *ibscif_create_ack_hdr(struct ibscif_qp *qp, int size)
++{
++	struct ibscif_full_frame *pdu;
++	struct sk_buff *skb;
++	u32 sq_seq, iq_seq;
++
++	if (unlikely(!qp->conn)) {
++		printk(KERN_ALERT PFX "%s: ERROR: qp->conn == NULL\n", __func__);
++		return NULL;
++	}
++
++	skb = ibscif_alloc_tx_skb(qp->dev, size, 0);
++	if (unlikely(!skb)) {
++		printk(KERN_ERR PFX "%s() can't allocate skb\n", __func__);
++		return NULL;
++	}
++
++	SET_SKB_DEV(skb, qp->dev);
++	SET_SKB_WR(skb, NULL);
++	SET_SKB_EP(skb, qp->conn->ep);
++
++	sq_seq = qp->wire.sq.rx.last_in_seq;
++	iq_seq = qp->wire.iq.rx.last_in_seq;
++	qp->wire.sq.rx.last_seq_acked = sq_seq;
++	qp->wire.iq.rx.last_seq_acked = iq_seq;
++
++	pdu = (struct ibscif_full_frame *)skb->data;
++
++	/* The opcode field set by the caller. */
++	pdu->ibscif.hdr.length	  = 0; /* Length has no meaning. */
++	pdu->ibscif.hdr.dst_qp	  = __cpu_to_be32(qp->remote_qpn);
++	pdu->ibscif.hdr.src_qp	  = __cpu_to_be32(qp->ibqp.qp_num);
++	pdu->ibscif.hdr.seq_num	  = 0; /* seq_num has no meaning. */
++	pdu->ibscif.hdr.sq_ack_num = __cpu_to_be32(sq_seq);
++	pdu->ibscif.hdr.iq_ack_num = __cpu_to_be32(iq_seq);
++	pdu->ibscif.hdr.hdr_size	 = __cpu_to_be16(size); 
++
++	return skb;
++}
++
++static void ibscif_send_ack(struct ibscif_qp *qp)
++{
++	struct ibscif_full_frame *pdu;
++	struct sk_buff *skb;
++
++	skb = ibscif_create_ack_hdr(qp, sizeof pdu->ibscif.ack);
++	if (unlikely(!skb))
++		return;
++
++	pdu = (struct ibscif_full_frame *)skb->data;
++	pdu->ibscif.ack.hdr.opcode = __cpu_to_be16(ibscif_op_ack);
++
++	ibscif_dev_queue_xmit(skb);
++}
++
++static struct sk_buff *ibscif_create_close_hdr(struct ibscif_conn *conn, int size)
++{
++	struct ibscif_full_frame *pdu;
++	struct sk_buff *skb;
++
++	if (unlikely(!conn)) {
++		printk(KERN_ALERT PFX "%s: ERROR: conn == NULL\n", __func__);
++		return NULL;
++	}
++
++	skb = ibscif_alloc_tx_skb(conn->dev, size, 0);
++	if (unlikely(!skb)) {
++		printk(KERN_ERR PFX "%s() can't allocate skb\n", __func__);
++		return NULL;
++	}
++
++	SET_SKB_DEV(skb, conn->dev);
++	SET_SKB_WR(skb, NULL);
++	SET_SKB_EP(skb, conn->ep);
++
++	pdu = (struct ibscif_full_frame *)skb->data;
++
++	/* The opcode field set by the caller. */
++	pdu->ibscif.hdr.length	  = 0; /* Length has no meaning. */
++	pdu->ibscif.hdr.dst_qp	  = 0; /* unused */
++	pdu->ibscif.hdr.src_qp	  = 0; /* unused */
++	pdu->ibscif.hdr.seq_num	  = 0; /* seq_num has no meaning. */
++	pdu->ibscif.hdr.sq_ack_num = 0; /* unused */
++	pdu->ibscif.hdr.iq_ack_num = 0; /* unused */
++	pdu->ibscif.hdr.hdr_size	 = __cpu_to_be16(size); 
++
++	return skb;
++}
++
++void ibscif_send_close(struct ibscif_conn *conn)
++{
++	struct ibscif_full_frame *pdu;
++	struct sk_buff *skb;
++
++	skb = ibscif_create_close_hdr(conn, sizeof pdu->ibscif.close);
++	if (unlikely(!skb))
++		return;
++
++	pdu = (struct ibscif_full_frame *)skb->data;
++	pdu->ibscif.close.hdr.opcode = __cpu_to_be16(ibscif_op_close);
++
++	ibscif_dev_queue_xmit(skb);
++}
++
++void ibscif_send_reopen(struct ibscif_conn *conn)
++{
++	struct ibscif_full_frame *pdu;
++	struct sk_buff *skb;
++
++	skb = ibscif_create_close_hdr(conn, sizeof pdu->ibscif.close);
++	if (unlikely(!skb))
++		return;
++
++	pdu = (struct ibscif_full_frame *)skb->data;
++	pdu->ibscif.close.hdr.opcode = __cpu_to_be16(ibscif_op_reopen);
++
++	ibscif_dev_queue_xmit(skb);
++}
++
++static struct sk_buff *ibscif_create_cm_hdr(struct ibscif_conn *conn, int size)
++{
++	struct ibscif_full_frame *pdu;
++	struct sk_buff *skb;
++
++	if (unlikely(!conn)) {
++		printk(KERN_ALERT PFX "%s: ERROR: conn == NULL\n", __func__);
++		return NULL;
++	}
++
++	skb = ibscif_alloc_tx_skb(conn->dev, size, 0);
++	if (unlikely(!skb)) {
++		printk(KERN_ERR PFX "%s() can't allocate skb\n", __func__);
++		return NULL;
++	}
++
++	SET_SKB_DEV(skb, conn->dev);
++	SET_SKB_WR(skb, NULL);
++	SET_SKB_EP(skb, conn->ep);
++
++	pdu = (struct ibscif_full_frame *)skb->data;
++
++	pdu->ibscif.hdr.opcode    = __cpu_to_be16(ibscif_op_cm);
++	pdu->ibscif.hdr.length	  = 0; /* Length has no meaning. */
++	pdu->ibscif.hdr.dst_qp	  = 0; /* unused */
++	pdu->ibscif.hdr.src_qp	  = 0; /* unused */
++	pdu->ibscif.hdr.seq_num	  = 0; /* seq_num has no meaning. */
++	pdu->ibscif.hdr.sq_ack_num = 0; /* unused */
++	pdu->ibscif.hdr.iq_ack_num = 0; /* unused */
++	pdu->ibscif.hdr.hdr_size  = __cpu_to_be16(size); 
++
++	return skb;
++}
++
++int ibscif_send_cm_req(struct ibscif_cm *cm_ctx)
++{
++	struct ibscif_full_frame *pdu;
++	struct sk_buff *skb;
++
++	skb = ibscif_create_cm_hdr(cm_ctx->conn, sizeof pdu->ibscif.cm + cm_ctx->plen); 
++	if (unlikely(!skb))
++		return -ENOMEM;
++
++	pdu = (struct ibscif_full_frame *)skb->data;
++	pdu->ibscif.cm.req_ctx	= __cpu_to_be64((u64)(uintptr_t)cm_ctx);
++	pdu->ibscif.cm.cmd	= __cpu_to_be32(IBSCIF_CM_REQ);
++	pdu->ibscif.cm.port	= __cpu_to_be32((u32)cm_ctx->remote_addr.sin_port);
++	pdu->ibscif.cm.qpn	= __cpu_to_be32(cm_ctx->qpn);
++	pdu->ibscif.cm.plen	= __cpu_to_be32(cm_ctx->plen);
++	memcpy(pdu->ibscif.cm.pdata, cm_ctx->pdata, cm_ctx->plen);
++
++	ibscif_dev_queue_xmit(skb);
++
++	return 0;
++}
++
++int ibscif_send_cm_rep(struct ibscif_cm *cm_ctx)
++{
++	struct ibscif_full_frame *pdu;
++	struct sk_buff *skb;
++
++	skb = ibscif_create_cm_hdr(cm_ctx->conn, sizeof pdu->ibscif.cm + cm_ctx->plen); 
++	if (unlikely(!skb))
++		return -ENOMEM;
++
++	pdu = (struct ibscif_full_frame *)skb->data;
++	pdu->ibscif.cm.req_ctx	= __cpu_to_be64(cm_ctx->peer_context);
++	pdu->ibscif.cm.rep_ctx	= __cpu_to_be64((__u64)cm_ctx);
++	pdu->ibscif.cm.cmd	= __cpu_to_be32(IBSCIF_CM_REP);
++	pdu->ibscif.cm.qpn	= __cpu_to_be32(cm_ctx->qpn);
++	pdu->ibscif.cm.status	= __cpu_to_be32(0);
++	pdu->ibscif.cm.plen	= __cpu_to_be32(cm_ctx->plen);
++	memcpy(pdu->ibscif.cm.pdata, cm_ctx->pdata, cm_ctx->plen);
++
++	ibscif_dev_queue_xmit(skb);
++
++	return 0;
++}
++
++int ibscif_send_cm_rej(struct ibscif_cm *cm_ctx, const void *pdata, u8 plen)
++{
++	struct ibscif_full_frame *pdu;
++	struct sk_buff *skb;
++
++	skb = ibscif_create_cm_hdr(cm_ctx->conn, sizeof pdu->ibscif.cm + plen); 
++	if (unlikely(!skb))
++		return -ENOMEM;
++
++	pdu = (struct ibscif_full_frame *)skb->data;
++	pdu->ibscif.cm.req_ctx	= __cpu_to_be64(cm_ctx->peer_context);
++	pdu->ibscif.cm.cmd	= __cpu_to_be32(IBSCIF_CM_REJ);
++	pdu->ibscif.cm.status	= __cpu_to_be32(-ECONNREFUSED);
++	pdu->ibscif.cm.plen	= __cpu_to_be32((u32)plen);
++	memcpy(pdu->ibscif.cm.pdata, pdata, plen);
++
++	ibscif_dev_queue_xmit(skb);
++
++	return 0;
++}
++
++int ibscif_send_cm_rtu(struct ibscif_cm *cm_ctx)
++{
++	struct ibscif_full_frame *pdu;
++	struct sk_buff *skb;
++
++	skb = ibscif_create_cm_hdr(cm_ctx->conn, sizeof pdu->ibscif.cm); 
++	if (unlikely(!skb))
++		return -ENOMEM;
++
++	pdu = (struct ibscif_full_frame *)skb->data;
++	pdu->ibscif.cm.rep_ctx	= __cpu_to_be64(cm_ctx->peer_context);
++	pdu->ibscif.cm.cmd	= __cpu_to_be32(IBSCIF_CM_RTU);
++
++	ibscif_dev_queue_xmit(skb);
++
++	return 0;
++}
++
++/* ---------------------- tx routines above this line ---------------------- */
++/* ---------------------- rx routines below this line ---------------------- */
++
++static void ibscif_protocol_error(struct ibscif_qp *qp, enum ibscif_reason reason)
++{
++	printk(KERN_NOTICE PFX "Disconnect due to protocol error %d\n", reason);
++	ibscif_qp_internal_disconnect(qp, reason);
++}
++
++int ibscif_process_sq_completions(struct ibscif_qp *qp)
++{
++	struct ibscif_cq *cq = to_cq(qp->ibqp.send_cq);
++	struct ibscif_wq *sq = &qp->sq;
++	struct ibscif_wr *wr;
++	struct ibscif_wc *wc;
++	int index, err = 0, i;
++
++	spin_lock_bh(&sq->lock);
++
++	/* Prevent divide by zero traps on wrap math. */
++	if (!sq->size)
++		goto out;
++
++	/* Iterate the send queue looking for defered completions. */
++	for (i=sq->completions; i<sq->depth; i++) {
++		index = (sq->head + i) % sq->size;
++
++		wr = ibscif_get_wr(sq, index);
++		if (wr->state != WR_COMPLETED)
++			break;
++
++		sq->completions++;
++		sq->reap++;
++
++		/* An IQ request has been completed; update the throttling variables. */
++		if ((wr->opcode == WR_RDMA_READ)	  ||
++		    (wr->opcode == WR_ATOMIC_CMP_AND_SWP) ||
++		    (wr->opcode == WR_ATOMIC_FETCH_AND_ADD)) {
++			BUG_ON(!atomic_read(&qp->or_depth));
++			atomic_dec(&qp->or_depth);
++			atomic_dec(&qp->or_posted);
++		}
++
++		/* See if we need to generate a completion. */
++		if (!(wr->flags & IB_SEND_SIGNALED))
++			continue;
++
++		err = ibscif_reserve_cqe(cq, &wc);
++		if (unlikely(err))
++			break;
++
++		wc->ibwc.qp	  = &qp->ibqp;
++		wc->ibwc.src_qp	  = qp->remote_qpn;
++		wc->ibwc.wr_id	  = wr->id;
++		wc->ibwc.opcode	  = to_ib_wc_opcode(wr->opcode);
++		wc->ibwc.wc_flags = (((enum ib_wr_opcode)wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) ||
++				     ((enum ib_wr_opcode)wr->opcode == IB_WR_SEND_WITH_IMM)) ?
++					IB_WC_WITH_IMM : 0;
++		wc->ibwc.status	  = IB_WC_SUCCESS;
++		wc->ibwc.ex.imm_data = 0;
++		wc->ibwc.port_num = 1;
++		wc->ibwc.byte_len = (((enum ib_wr_opcode)wr->opcode == IB_WR_RDMA_READ)	      ||
++				     ((enum ib_wr_opcode)wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) ||
++				     ((enum ib_wr_opcode)wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) ?
++					wr->sar.rea.final_length : 0;
++		wc->wq	 = sq;
++		wc->reap = sq->reap;
++		sq->reap = 0;
++
++		ibscif_append_cqe(cq, wc, 0);
++	}
++out:
++	spin_unlock_bh(&sq->lock);
++
++	ibscif_notify_cq(cq);
++	return err;
++}
++
++static int ibscif_schedule_rx_completions(struct ibscif_qp *qp, int iq_flag, struct ibscif_rx_state *rx)
++{
++	struct ibscif_cq *cq = to_cq(qp->ibqp.recv_cq);
++	struct ibscif_wq *wq;
++	struct ibscif_wr *wr;
++	struct ibscif_wc *wc;
++	u32 last_in_seq;
++	int index, err, i;
++
++	wq = iq_flag ? &qp->sq /* yep, the SQ */ : &qp->rq;
++	last_in_seq = rx->last_in_seq;
++
++	/* Prevent divide by zero traps on wrap math. */
++	if (!wq->size)
++		return 0;
++
++	spin_lock_bh(&wq->lock);
++	for (i=wq->completions; i<wq->depth; i++) {
++		index = (wq->head + i) % wq->size;
++
++		wr = ibscif_get_wr(wq, index);
++
++		/* Skip over non-IQ entries. */
++		if (iq_flag && 
++		    ((wr->opcode == WR_UD)	      ||
++		     (wr->opcode == WR_SEND)	      ||
++		     (wr->opcode == WR_SEND_WITH_IMM) ||
++		     (wr->opcode == WR_RDMA_WRITE)    ||
++		     (wr->opcode == WR_RDMA_WRITE_WITH_IMM)))
++			continue;
++
++		/*
++		 * If this WR hasn't seen the final segment in sequence then
++		 * there is nothing more to process in this queue.  We use the
++		 * last seen state as a qualifier because last_packet_seq will
++		 * be uninitialized until last packet is seen.
++		 */
++		if ((wr->state != WR_LAST_SEEN) ||
++		    seq_before(last_in_seq, wr->sar.rea.last_packet_seq))
++			break;
++
++		/* Clear references on memory regions. */
++		ibscif_clear_ds_refs(wr->ds_list, wr->num_ds);
++
++		if (iq_flag) {
++			/*
++			 * Completed IQ replies are defered until earlier
++			 * non-IQ WR have completed.  This is determined 
++			 * with a second iteration of the WQ below.
++			 */
++			wr->state = WR_COMPLETED;
++			continue; /* Look for more IQ completions. */
++		}
++
++		/* All receive queue completions are done here. */
++		err = ibscif_reserve_cqe(cq, &wc);
++		if (unlikely(err)) {
++			spin_unlock_bh(&wq->lock);
++			return err;
++		}
++
++		wc->ibwc.qp	  = &qp->ibqp;
++		wc->ibwc.src_qp	  = qp->remote_qpn;
++		wc->ibwc.wr_id	  = wr->id;
++		wc->ibwc.status	  = IB_WC_SUCCESS;
++		wc->ibwc.byte_len = wr->sar.rea.final_length;
++		wc->ibwc.port_num = 1;
++
++		if (ibscif_pdu_is_immed(wr->sar.rea.opcode)) {
++			DEV_STAT(qp->dev, recv_imm++);
++			wc->ibwc.opcode	  = IB_WC_RECV_RDMA_WITH_IMM;
++			wc->ibwc.ex.imm_data = wr->sar.rea.immediate_data;
++		} else {
++			DEV_STAT(qp->dev, recv++);
++			wc->ibwc.opcode	  = IB_WC_RECV;
++			wc->ibwc.ex.imm_data = 0;
++		}
++
++		wc->wq	 = wq;
++		wc->reap = 1;
++		wq->completions++;
++
++		ibscif_append_cqe(cq, wc, !!ibscif_pdu_is_se(wr->sar.rea.opcode));
++	}
++	spin_unlock_bh(&wq->lock);
++
++	/* If this was the recieve queue, there is no more processing to be done. */
++	if (!iq_flag) {
++		ibscif_notify_cq(cq);
++		return 0;
++	}
++
++	err = ibscif_process_sq_completions(qp);
++	if (unlikely(err))
++		return err;
++
++	/*
++	 * If we just created room for a backlogged IQ stream request
++	 * and there is a tx window, reschedule to get it sent.
++	 */
++	if ((atomic_read(&qp->or_posted) > atomic_read(&qp->or_depth)) &&
++	    (atomic_read(&qp->or_depth) < qp->max_or)		       &&
++	    ibscif_tx_window(&qp->wire.sq.tx))
++		qp->schedule |= SCHEDULE_RESUME | SCHEDULE_SQ;
++
++	return 0;
++}
++
++static enum ibscif_schedule ibscif_process_wq_ack(struct ibscif_wq *wq, u32 seq_num)
++{
++	struct ibscif_tx_state *tx = &wq->wirestate->tx;
++	enum ibscif_schedule status = 0;
++	int throttled, index, err = 0, i;
++
++	if (!wq->size || !wq->depth)
++		return 0;
++
++	/* If this is old news, get out. */
++	if (!seq_after(seq_num, tx->last_ack_seq_recvd))
++		return 0;
++
++	/* Capture if window was closed before updating. */
++	throttled = !ibscif_tx_window(tx);
++	tx->last_ack_seq_recvd = seq_num;
++
++	/*
++	 * If were were throttled and now have an open window or
++	 * simply up to date, resume streaming transfers.  This
++	 * can be overwritten with other schedule states below.
++	 */
++	if (throttled && ibscif_tx_window(tx))
++		status = SCHEDULE_RESUME;
++
++	spin_lock_bh(&wq->lock);
++	for (i=wq->completions; i<wq->depth; i++) {
++		struct ibscif_wr *wr;
++
++		index = (wq->head + i) % wq->size;
++
++		wr = ibscif_get_wr(wq, index);
++
++		/* Get out if the WR hasn't been scheduled. */
++		if (wr->state == WR_WAITING)
++			break;
++
++		if (seq_after(wr->sar.seg.ending_seq, seq_num)) {
++
++			if ((wr->state == WR_STARTED) && !ibscif_tx_unacked_window(tx))
++				status = SCHEDULE_RESUME;
++
++			break;
++		}
++
++		/* We seem to have a completed WQ element. */
++
++		if (is_iq(wq)) {
++			/*
++			 * We have a completed IQ reply.
++			 * Clear references to the memory region.
++			 */
++			ibscif_clear_ds_refs(wr->ds_list, wr->num_ds);
++
++			/*
++			 * It's more effecient to retire an IQ wqe manually
++			 * here instead of calling ibscif_retire_wqes().
++			 */
++			wq->head   = (wq->head + 1) % wq->size;
++			wq->depth -= 1;
++
++		} else if ((wr->opcode == WR_RDMA_READ)		  ||
++			   (wr->opcode == WR_ATOMIC_CMP_AND_SWP)  ||
++			   (wr->opcode == WR_ATOMIC_FETCH_AND_ADD)||
++			   (wr->opcode == WR_UD && wr->use_rma)   ||
++			   (wr->opcode == WR_SEND && wr->use_rma) ||
++			   (wr->opcode == WR_SEND_WITH_IMM && wr->use_rma) ||
++			   (wr->opcode == WR_RDMA_WRITE && wr->use_rma)    ||
++			   (wr->opcode == WR_RDMA_WRITE_WITH_IMM && wr->use_rma)) {
++			/*
++			 * We have a request acknowledgment.
++			 * Note the state change so it isn't retried.
++			 *
++			 * BTW, these request types are completed in the
++			 * ibscif_schedule_rx_completions() routine when
++			 * the data has arrived.
++			 */
++			if (wr->state == WR_WAITING_FOR_ACK)
++				wr->state = WR_WAITING_FOR_RSP;
++
++		} else if (wr->state != WR_COMPLETED) {
++			/* Request is complete so no need to keep references. */
++			ibscif_clear_ds_refs(wr->ds_list, wr->num_ds);
++			wr->state = WR_COMPLETED;
++		}
++	}
++	spin_unlock_bh(&wq->lock);
++
++	if (is_sq(wq)) {
++		err = ibscif_process_sq_completions(wq->qp);
++		if (unlikely(err)) {
++			printk(KERN_ALERT PFX "%s: sq completion error: err=%d \n", __func__, err);
++			ibscif_protocol_error(wq->qp, IBSCIF_REASON_QP_FATAL);
++			status = 0;
++		}
++	}
++
++	return status;
++}
++
++static void ibscif_process_ack(struct ibscif_qp *qp, struct base_hdr *hdr)
++{
++	qp->schedule |= ibscif_process_wq_ack(&qp->sq, hdr->sq_ack_num) | SCHEDULE_SQ;
++	qp->schedule |= ibscif_process_wq_ack(&qp->iq, hdr->iq_ack_num) | SCHEDULE_IQ;
++}
++
++/* Note that the WQ lock is held on success. */
++static struct ibscif_wr *ibscif_reserve_wqe(struct ibscif_wq *wq)
++{
++	int err;
++
++	spin_lock_bh(&wq->lock);
++
++	if (unlikely(wq->qp->state != QP_CONNECTED)) {
++		err = -ENOTCONN;
++		goto out;
++	}
++	if (unlikely(!wq->size)) {
++		err = -ENOSPC;
++		goto out;
++	}
++	if (unlikely(wq->depth == wq->size)) {
++		err = -ENOBUFS;
++		goto out;
++	}
++
++	return ibscif_get_wr(wq, wq->tail);
++out:
++	spin_unlock_bh(&wq->lock);
++	return ERR_PTR(err);
++}
++
++/* Note that this assumes the WQ lock is currently held. */
++static void ibscif_append_wqe(struct ibscif_wq *wq)
++{
++	DEV_STAT(wq->qp->dev, wr_opcode[ibscif_get_wr(wq, wq->tail)->opcode]++);
++	ibscif_append_wq(wq);
++	spin_unlock_bh(&wq->lock);
++}
++
++static struct ibscif_wr* ibscif_wr_by_msg_id(struct ibscif_wq *wq, u32 msg_id)
++{
++	struct ibscif_wr *wr;
++	int size = wq->size;
++
++	if (!size)
++		return NULL;
++
++	wr = ibscif_get_wr(wq, msg_id % size);
++	if (wr->use_rma)
++		return (wr->rma_id == msg_id) ? wr : NULL;
++	else
++		return (wr->msg_id == msg_id) ? wr : NULL;
++}
++
++static int ibscif_ds_dma(struct ibscif_qp *qp, struct page **page, u32 page_offset, struct sk_buff *skb, u32 dma_len, int head_copied)
++{
++	void *dst, *src = skb->data;
++	u32 copy_len;
++
++	while (dma_len) {
++		copy_len = min(dma_len, (u32)PAGE_SIZE - page_offset);
++
++		dst = ibscif_map_dst(*page) + page_offset;
++		head_copied = ibscif_atomic_copy(dst, src, copy_len, head_copied);
++		ibscif_unmap_dst(*page, dst);
++
++		src	+= copy_len;
++		dma_len -= copy_len;
++
++		page++;
++		page_offset = 0;
++	}
++
++	return head_copied;
++}
++
++static int ibscif_place_data(struct ibscif_qp *qp, struct ibscif_wr *wr, struct sk_buff *skb,
++			    u32 length, u32 offset, u32 seq_num)
++{
++	struct ibscif_ds *ds;
++	struct ibscif_mr *mr;
++	int seg_num, page_index;
++	u32 dma_len, ds_offset, page_offset;
++	int head_copied = 0;
++
++	if (!length) {
++		ds = NULL;
++		dma_len = 0;
++		ds_offset = 0;
++		goto no_data;
++	}
++
++	/* See if we can use our ds cache. */
++	if (likely((wr->sar.rea.current_ds) && (wr->sar.rea.last_seen_seq == seq_num - 1))) {
++		/* Take the cached entires. */
++		ds = wr->sar.rea.current_ds;
++		mr = ds->mr;
++		ds_offset = wr->sar.rea.current_ds_offset;
++		seg_num = (ds - wr->ds_list) / sizeof *wr->ds_list;
++	} else {
++		ds_offset = offset;
++		ds = wr->ds_list;
++		seg_num = 0;
++		while ((ds_offset >= ds->length) && (seg_num < wr->num_ds)) {
++			ds_offset -= ds->length;
++			ds++;
++			seg_num++;
++		}
++next_ds:
++		if (unlikely(seg_num >= wr->num_ds))
++			return -EMSGSIZE;
++		/*
++		 * A memory region which may have posted receives against it can
++		 * still be freed, therefore, we need to burn the cycles here to
++		 * make sure it's still valid.  We'll take a reference on it now
++		 * that data is coming in.
++		 */
++		if (!ds->in_use) {
++			mr = ibscif_get_mr(ds->lkey);
++			if (unlikely(IS_ERR(mr)))
++				return PTR_ERR(mr);
++			ds->in_use = 1;
++			if (unlikely(mr != ds->mr))
++				return -ENXIO;
++			if (unlikely(!(mr->access & IB_ACCESS_LOCAL_WRITE)))
++				return -EACCES;
++		} else
++			mr = ds->mr;
++	}
++
++	/* Place data for this descriptor.  Routine will handle page boundary crossings. */
++	page_offset  = ds->offset + ds_offset + (mr->addr & ~PAGE_MASK);
++	page_index   = page_offset >> PAGE_SHIFT;
++	page_offset &= ~PAGE_MASK;
++
++	dma_len = min(ds->length - ds_offset, length);
++	head_copied = ibscif_ds_dma(qp, &mr->page[page_index], page_offset, skb, dma_len, head_copied);
++	length -= dma_len;
++	if (length) {
++		ds++;
++		seg_num++;
++		ds_offset = 0;
++		skb_pull(skb, dma_len);
++		goto next_ds;
++	}
++no_data:
++	wr->sar.rea.last_seen_seq = seq_num;
++
++	if (ds && ((ds_offset + dma_len) < ds->length)) {
++		wr->sar.rea.current_ds = ds;
++		wr->sar.rea.current_ds_offset = ds_offset + dma_len;
++	} else
++		wr->sar.rea.current_ds = NULL;	/* Force a validation of the next ds. */
++
++	return 0;
++}
++
++static int ibscif_process_ud(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct sk_buff *skb)
++{
++	struct ibscif_wr *wr;
++	int err;
++	int grh_size = 40;
++	int msg_id;
++
++	if (unlikely(qp->ibqp.qp_type != IB_QPT_UD)) {
++		printk(KERN_ALERT PFX "%s: UD packet received on non-UD QP\n", __func__);
++		return -EINVAL;
++	}
++
++	pdu->ud.msg_length = __be32_to_cpu(pdu->ud.msg_length);
++	pdu->ud.msg_offset = __be32_to_cpu(pdu->ud.msg_offset);
++
++	/* Only one pdu is allowed for one UD packet, otherwise drop the pdu */
++	if (unlikely(pdu->ud.msg_length != pdu->hdr.length || pdu->ud.msg_offset)) {
++		printk(KERN_INFO PFX "%s: dropping fragmented UD packet. total_length=%d msg_length=%d msg_offset=%d\n",
++				__func__, pdu->hdr.length, pdu->ud.msg_length, pdu->ud.msg_offset);
++		return -EINVAL;
++	}
++
++	spin_lock_bh(&qp->rq.lock);
++	if (unlikely(qp->rq.ud_msg_id >= qp->rq.next_msg_id)) {
++		spin_unlock_bh(&qp->rq.lock);
++		printk(KERN_ALERT PFX "%s: ERROR: message arrives before recv is posted. msg_id=%d, rq.next_msg_id=%d\n",
++				__func__, pdu->send.msg_id, qp->rq.next_msg_id);
++		return -EBADRQC;
++	}
++	msg_id = qp->rq.ud_msg_id++;
++	spin_unlock_bh(&qp->rq.lock);
++
++	wr = ibscif_wr_by_msg_id(&qp->rq, msg_id);
++	if (unlikely(!wr))
++		return -EBADR;
++
++	if (unlikely((pdu->ud.msg_length + grh_size) > wr->length))
++		return -EMSGSIZE;
++
++	/* GRH is included as part of the received message */
++	skb_pull(skb, sizeof(pdu->ud)-grh_size); 
++
++	err = ibscif_place_data(qp, wr, skb, pdu->hdr.length+grh_size, pdu->ud.msg_offset, pdu->hdr.seq_num);
++	if (unlikely(err))
++		return err;
++
++	wr->state = WR_LAST_SEEN;
++	wr->sar.rea.opcode	    = pdu->hdr.opcode;
++	wr->sar.rea.last_packet_seq = 0;
++	wr->sar.rea.immediate_data  = 0;
++	wr->sar.rea.final_length    = pdu->ud.msg_length+grh_size;
++
++	return 0;
++}
++
++static int ibscif_process_send(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct sk_buff *skb)
++{
++	struct ibscif_wr *wr;
++	int err;
++
++	pdu->send.msg_id = __be32_to_cpu(pdu->send.msg_id);
++	spin_lock_bh(&qp->rq.lock);
++	if (unlikely(pdu->send.msg_id >= qp->rq.next_msg_id)) {
++		spin_unlock_bh(&qp->rq.lock);
++		printk(KERN_ALERT PFX "%s: ERROR: message arrives before recv is posted. msg_id=%d, rq.next_msg_id=%d\n",
++				__func__, pdu->send.msg_id, qp->rq.next_msg_id);
++		return -EBADRQC;
++	}
++	spin_unlock_bh(&qp->rq.lock);
++
++	wr = ibscif_wr_by_msg_id(&qp->rq, pdu->send.msg_id);
++	if (unlikely(!wr))
++		return -EBADR;
++
++	pdu->send.msg_length = __be32_to_cpu(pdu->send.msg_length);
++	if (unlikely(pdu->send.msg_length > wr->length))
++		return -EMSGSIZE;
++
++	pdu->send.msg_offset = __be32_to_cpu(pdu->send.msg_offset);
++	if (unlikely(pdu->send.msg_offset > pdu->send.msg_length))
++		return -EINVAL;
++
++	if (unlikely((pdu->hdr.length + pdu->send.msg_offset) > wr->length))
++		return -ESPIPE;
++
++	skb_pull(skb, sizeof(pdu->send));
++
++	err = ibscif_place_data(qp, wr, skb, pdu->hdr.length, pdu->send.msg_offset, pdu->hdr.seq_num);
++	if (unlikely(err))
++		return err;
++
++	if (ibscif_pdu_is_last(pdu->hdr.opcode)) {
++		/*
++		 * We've got the last of the message data.
++		 * We always assume immediate data; if not needed, no harm, on foul.
++		 */
++		wr->state = WR_LAST_SEEN;
++		wr->sar.rea.opcode	    = pdu->hdr.opcode;
++		wr->sar.rea.last_packet_seq = pdu->hdr.seq_num;
++		wr->sar.rea.immediate_data  = __be32_to_cpu(pdu->send.immed_data);
++		wr->sar.rea.final_length    = pdu->send.msg_length;
++	}
++
++	return 0;
++}
++
++static int ibscif_process_write(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct sk_buff *skb)
++{
++	struct ibscif_wr *wr;
++	struct ibscif_mr *mr;
++	u64 rdma_addr;
++	u32 rdma_len, page_offset;
++	int page_index;
++
++	if (unlikely(!(qp->access & IB_ACCESS_REMOTE_WRITE)))
++		return -EACCES;
++
++	/* Writes with immediate data consume an rq wqe. */
++	if (ibscif_pdu_is_immed(pdu->hdr.opcode)) {
++		pdu->write.msg_id = __be32_to_cpu(pdu->write.msg_id);
++		spin_lock_bh(&qp->rq.lock);
++		if (unlikely(pdu->write.msg_id >= qp->rq.next_msg_id)) {
++			spin_unlock_bh(&qp->rq.lock);
++			printk(KERN_ALERT PFX "%s: ERROR: message arrives before recv is posted. msg_id=%d, rq.next_msg_id=%d\n",
++					__func__, pdu->write.msg_id, qp->rq.next_msg_id);
++			return -EBADRQC;
++		}
++		spin_unlock_bh(&qp->rq.lock);
++
++		wr = ibscif_wr_by_msg_id(&qp->rq, pdu->write.msg_id);
++		if (unlikely(!wr))
++			return -EBADR;
++	} else
++		wr = NULL;
++
++	skb_pull(skb, sizeof(pdu->write));
++
++	rdma_addr = __be64_to_cpu(pdu->write.rdma_address);
++	rdma_len  = pdu->hdr.length;
++	if (unlikely((rdma_addr + (rdma_len - 1)) < rdma_addr))
++		return -EOVERFLOW;
++
++	mr = ibscif_validate_mr(__be32_to_cpu(pdu->write.rdma_key), rdma_addr,
++			       rdma_len, qp->ibqp.pd, IB_ACCESS_REMOTE_WRITE);
++	if (unlikely(IS_ERR(mr)))
++		return PTR_ERR(mr);
++
++	page_offset = rdma_addr & ~PAGE_MASK;
++	page_index  = ((rdma_addr - mr->addr) + (mr->addr & ~PAGE_MASK)) >> PAGE_SHIFT;
++
++	ibscif_ds_dma(qp, &mr->page[page_index], page_offset, skb, rdma_len, 0);
++
++	ibscif_put_mr(mr);
++
++	if (wr) {
++		wr->sar.rea.final_length += rdma_len;
++		if (ibscif_pdu_is_last(pdu->hdr.opcode)) {
++			/* We've got the last of the write data. */
++			wr->state = WR_LAST_SEEN;
++			wr->sar.rea.opcode	    = pdu->hdr.opcode;
++			wr->sar.rea.last_packet_seq = pdu->hdr.seq_num;
++			wr->sar.rea.immediate_data  = __be32_to_cpu(pdu->write.immed_data);
++		}
++	}
++
++	return 0;
++}
++
++static int ibscif_process_read(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct sk_buff *skb)
++{
++	struct ibscif_wr *wr;
++	struct ibscif_mr *mr;
++	u64 rdma_addr;
++	u32 rdma_len;
++
++	if (unlikely(!(qp->access & IB_ACCESS_REMOTE_READ)))
++		return -EACCES;
++
++	rdma_addr = __be64_to_cpu(pdu->read_req.rdma_address);
++	rdma_len  = __be32_to_cpu(pdu->read_req.rdma_length);
++	if (unlikely((rdma_addr + (rdma_len - 1)) < rdma_addr))
++		return -EOVERFLOW;
++
++	mr = ibscif_validate_mr(__be32_to_cpu(pdu->read_req.rdma_key), rdma_addr,
++			       rdma_len, qp->ibqp.pd, IB_ACCESS_REMOTE_READ);
++	if (unlikely(IS_ERR(mr)))
++		return PTR_ERR(mr);
++
++	wr = ibscif_reserve_wqe(&qp->iq);
++	if (unlikely(IS_ERR(wr))) {
++		ibscif_put_mr(mr);
++		return PTR_ERR(wr);
++	}
++
++	memset(&wr->sar, 0, sizeof wr->sar);
++
++	wr->opcode = WR_RDMA_READ_RSP;
++	wr->state  = WR_WAITING;
++	wr->length = rdma_len;
++	wr->msg_id = __be32_to_cpu(pdu->read_req.rdma_id);
++	wr->num_ds = 1;
++	wr->ds_list[0].mr     = mr;
++	wr->ds_list[0].offset = rdma_addr - mr->addr;
++	wr->ds_list[0].length = rdma_len;
++	wr->ds_list[0].in_use = 1;
++
++	ibscif_append_wqe(&qp->iq);
++	qp->schedule |= SCHEDULE_RESUME | SCHEDULE_IQ;
++
++	return 0;
++}
++
++static int ibscif_process_read_rsp(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct sk_buff *skb)
++{
++	struct ibscif_wr *wr;
++	int err;
++
++	/* Find the requesting sq wr. */
++	wr = ibscif_wr_by_msg_id(&qp->sq, __be32_to_cpu(pdu->read_rsp.rdma_id));
++	if (unlikely(!wr))
++		return -EBADR;
++	if (unlikely(wr->opcode != WR_RDMA_READ))
++		return -ENOMSG;
++
++	skb_pull(skb, sizeof(pdu->read_rsp));
++
++	pdu->read_rsp.rdma_offset = __be32_to_cpu(pdu->read_rsp.rdma_offset);
++
++	err = ibscif_place_data(qp, wr, skb, pdu->hdr.length, pdu->read_rsp.rdma_offset, pdu->hdr.seq_num);
++	if (unlikely(err))
++		return err;
++
++	if (ibscif_pdu_is_last(pdu->hdr.opcode)) {
++		/* We've got the last of the read data. */
++		wr->state = WR_LAST_SEEN;
++		wr->sar.rea.opcode	    = pdu->hdr.opcode;
++		wr->sar.rea.last_packet_seq = pdu->hdr.seq_num;
++		wr->sar.rea.final_length    = pdu->read_rsp.rdma_offset + pdu->hdr.length;
++	}
++
++	return 0;
++}
++
++static int ibscif_process_atomic_req(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct sk_buff *skb)
++{
++	struct ibscif_wr *wr;
++	struct ibscif_mr *mr;
++	struct page *page;
++	u64 *addr;
++	u32 offset, rkey, msg_id;
++	u16 opcode;
++
++	if (unlikely(!(qp->access & IB_ACCESS_REMOTE_ATOMIC)))
++		return -EACCES;
++
++	opcode = ibscif_pdu_base_type(pdu->hdr.opcode);
++	if (opcode == ibscif_op_comp_swap) {
++		addr   = (u64 *)__be64_to_cpu(pdu->comp_swap.atomic_address);
++		rkey   = __be32_to_cpu(pdu->comp_swap.atomic_key);
++		msg_id = __be32_to_cpu(pdu->comp_swap.atomic_id);
++	} else {
++		addr   = (u64 *)__be64_to_cpu(pdu->fetch_add.atomic_address);
++		rkey   = __be32_to_cpu(pdu->fetch_add.atomic_key);
++		msg_id = __be32_to_cpu(pdu->fetch_add.atomic_id);
++	}
++
++	if (unlikely((u64)addr & (sizeof *addr - 1)))
++		return -EADDRNOTAVAIL;
++	if (unlikely((addr + (sizeof *addr - 1)) < addr))
++		return -EOVERFLOW;
++
++	mr = ibscif_validate_mr(rkey, (u64)addr, sizeof *addr, qp->ibqp.pd, IB_ACCESS_REMOTE_ATOMIC);
++	if (unlikely(IS_ERR(mr)))
++		return PTR_ERR(mr);
++
++	wr = ibscif_reserve_wqe(&qp->iq);
++	if (unlikely(IS_ERR(wr))) {
++		ibscif_put_mr(mr);
++		return PTR_ERR(wr);
++	}
++
++	/* Determine which page to map. */
++	offset	= ((u64)addr - mr->addr) + (mr->addr & ~PAGE_MASK);
++	page	= mr->page[offset >> PAGE_SHIFT];
++	offset &= ~PAGE_MASK;
++
++	/* Lock to perform the atomic operation atomically. */
++	spin_lock_bh(&qp->dev->atomic_op);
++
++	addr = ibscif_map_src(page) + offset;
++	wr->atomic_rsp.orig_data = *addr;
++	if (opcode == ibscif_op_fetch_add)
++		*addr += __be64_to_cpu(pdu->fetch_add.add_data);
++	else if (wr->atomic_rsp.orig_data == __be64_to_cpu(pdu->comp_swap.comp_data))
++		*addr  = __be64_to_cpu(pdu->comp_swap.swap_data);
++	ibscif_unmap_src(page, addr);
++
++	ibscif_put_mr(mr);
++
++	/* Atomic operation is complete. */
++	spin_unlock_bh(&qp->dev->atomic_op);
++
++	memset(&wr->sar, 0, sizeof wr->sar);
++
++	wr->opcode = WR_ATOMIC_RSP;
++	wr->state  = WR_WAITING;
++	wr->length = 0;
++	wr->msg_id = msg_id;
++	wr->num_ds = 0;
++	wr->atomic_rsp.opcode = (opcode==ibscif_op_comp_swap)? ibscif_op_comp_swap_rsp : ibscif_op_fetch_add_rsp; 
++	/* The wr->atomic_rsp.orig_data field was set above. */
++
++	ibscif_append_wqe(&qp->iq);
++	qp->schedule |= SCHEDULE_RESUME | SCHEDULE_IQ;
++
++	return 0;
++}
++
++static int ibscif_process_atomic_rsp(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct sk_buff *skb)
++{
++	struct ibscif_wr *wr;
++	u16 opcode;
++	int err;
++
++	if (unlikely(!ibscif_pdu_is_last(pdu->atomic_rsp.hdr.opcode)))
++		return -EINVAL;
++
++	/* Find the requesting sq wr. */
++	wr = ibscif_wr_by_msg_id(&qp->sq, __be32_to_cpu(pdu->atomic_rsp.atomic_id));
++	if (unlikely(!wr))
++		return -EBADR;
++
++	opcode = ibscif_pdu_base_type(pdu->hdr.opcode);
++	if (unlikely(wr->opcode != ((opcode == ibscif_op_comp_swap_rsp) ?
++				    WR_ATOMIC_CMP_AND_SWP : WR_ATOMIC_FETCH_AND_ADD)))
++		return -ENOMSG;
++
++	skb_pull(skb, (unsigned long)&pdu->atomic_rsp.orig_data - (unsigned long)pdu);
++
++	pdu->atomic_rsp.orig_data = __be64_to_cpu(pdu->atomic_rsp.orig_data);
++	err = ibscif_place_data(qp, wr, skb, sizeof pdu->atomic_rsp.orig_data, 0, pdu->hdr.seq_num);
++	if (unlikely(err))
++		return err;
++
++	wr->state = WR_LAST_SEEN;
++	wr->sar.rea.opcode	    = pdu->hdr.opcode;
++	wr->sar.rea.last_packet_seq = pdu->hdr.seq_num;
++	wr->sar.rea.final_length    = sizeof pdu->atomic_rsp.orig_data;
++
++	return 0;
++}
++
++static int ibscif_process_disconnect(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct sk_buff *skb)
++{
++	ibscif_qp_remote_disconnect(qp, __be32_to_cpu(pdu->disconnect.reason));
++	return 0;
++}
++
++static int ibscif_process_send_rma(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct sk_buff *skb)
++{
++	struct ibscif_ds *ds;
++	struct ibscif_wr *wr;
++	struct ibscif_mr *mr;
++	struct ibscif_mreg_info *mreg;
++	u32 num_rma_addrs;
++	u64 rma_offset;
++	u32 rma_length;
++	u32 total;
++	int seg_num;
++	int cur_rma_addr;
++	u32 xfer_len, ds_offset;
++	int err;
++	u64 loffset;
++	u32 dma_size = 0;
++	int rma_flag = 0;
++
++	if (unlikely(!qp->conn)) {
++		printk(KERN_ALERT PFX "%s: ERROR: qp->conn == NULL\n", __func__);
++		return -EACCES;
++	}
++
++	pdu->send.msg_id = __be32_to_cpu(pdu->send.msg_id);
++	spin_lock_bh(&qp->rq.lock);
++	if (unlikely(pdu->send.msg_id >= qp->rq.next_msg_id)) {
++		spin_unlock_bh(&qp->rq.lock);
++		printk(KERN_ALERT PFX "%s: ERROR: message arrives before recv is posted. msg_id=%d, rq.next_msg_id=%d\n",
++				__func__, pdu->send.msg_id, qp->rq.next_msg_id);
++		return -EBADRQC;
++	}
++	spin_unlock_bh(&qp->rq.lock);
++
++	wr = ibscif_wr_by_msg_id(&qp->rq, pdu->send.msg_id);
++	if (unlikely(!wr))
++		return -EBADR;
++
++	pdu->send.msg_length = __be32_to_cpu(pdu->send.msg_length);
++	if (unlikely(pdu->send.msg_length > wr->length))
++		return -EMSGSIZE;
++
++	pdu->send.msg_offset = __be32_to_cpu(pdu->send.msg_offset);
++	if (unlikely(pdu->send.msg_offset > pdu->send.msg_length))
++		return -EINVAL;
++
++	if (unlikely((pdu->hdr.length + pdu->send.msg_offset) > wr->length))
++		return -ESPIPE;
++
++	total = 0;
++
++	num_rma_addrs = __be32_to_cpu(pdu->send.num_rma_addrs);
++	cur_rma_addr = 0;
++	rma_offset = __be64_to_cpu(pdu->send.rma_addrs[cur_rma_addr].offset);
++	rma_length = __be32_to_cpu(pdu->send.rma_addrs[cur_rma_addr].length);
++
++	ds_offset = pdu->send.msg_offset;
++	ds = wr->ds_list;
++	seg_num = 0;
++	while ((ds_offset >= ds->length) && (seg_num < wr->num_ds)) {
++		ds_offset -= ds->length;
++		ds++;
++		seg_num++;
++	}
++
++	err = 0;
++	while (total < pdu->send.msg_length && !err) {
++		if (unlikely(seg_num >= wr->num_ds))
++			return -EMSGSIZE;
++
++		if (!ds->in_use) {
++			mr = ibscif_get_mr(ds->lkey);
++			if (unlikely(IS_ERR(mr)))
++				return PTR_ERR(mr);
++			ds->in_use = 1;
++			if (unlikely(mr != ds->mr))
++				return -ENXIO;
++			if (unlikely(!(mr->access & IB_ACCESS_LOCAL_WRITE)))
++				return -EACCES;
++		} else
++			mr = ds->mr;
++
++		mreg = ibscif_mr_get_mreg(mr, qp->conn);
++		if (!mreg)
++			return -EACCES;
++
++		while (ds->length > ds_offset) {
++			xfer_len = min( ds->length - ds_offset, rma_length );
++			if (xfer_len) {
++				loffset = mreg->offset + ds->offset + ds_offset;
++				dma_size += ibscif_dma_size(xfer_len, rma_offset);
++
++				if ((total + xfer_len >= pdu->send.msg_length) && dma_size)
++					rma_flag = SCIF_RMA_SYNC;
++
++				err = scif_readfrom(qp->conn->ep, loffset, xfer_len, rma_offset, rma_flag);
++				if (err) {
++					printk(KERN_ALERT PFX "%s: scif_readfrom (%d bytes) returns %d\n", __func__, xfer_len, err);
++					break;
++				}
++
++				ds_offset += xfer_len;
++				rma_offset += xfer_len;
++				rma_length -= xfer_len;
++				total += xfer_len;
++
++				if (total >= pdu->send.msg_length)
++					break;
++			}
++			if (rma_length == 0) {
++				cur_rma_addr++;
++				if (unlikely(cur_rma_addr >= num_rma_addrs))
++					return -EMSGSIZE;
++
++				rma_offset = __be64_to_cpu(pdu->send.rma_addrs[cur_rma_addr].offset);
++				rma_length = __be32_to_cpu(pdu->send.rma_addrs[cur_rma_addr].length);
++			}
++		}
++		
++		seg_num++;
++		ds++;
++	}
++
++	wr->state = WR_LAST_SEEN;
++	wr->sar.rea.opcode	    = pdu->hdr.opcode;
++	wr->sar.rea.last_packet_seq = pdu->hdr.seq_num;
++	wr->sar.rea.immediate_data  = __be32_to_cpu(pdu->send.immed_data);
++	wr->sar.rea.final_length    = pdu->send.msg_length;
++
++	/* Respond to the initiator with the result */
++	wr = ibscif_reserve_wqe(&qp->iq);
++	if (unlikely(IS_ERR(wr))) {
++		return PTR_ERR(wr);
++	}
++
++	memset(&wr->sar, 0, sizeof wr->sar);
++
++	wr->opcode = WR_RMA_RSP;
++	wr->state  = WR_WAITING;
++	wr->length = 0;
++	wr->msg_id = __be32_to_cpu(pdu->send.rma_id);
++	wr->num_ds = 0;
++	wr->rma_rsp.xfer_length = total;
++	wr->rma_rsp.error = err;
++
++	ibscif_append_wqe(&qp->iq);
++	qp->schedule |= SCHEDULE_RESUME | SCHEDULE_IQ;
++
++	return 0;
++}
++
++static int ibscif_process_write_rma(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct sk_buff *skb)
++{
++	struct ibscif_wr *wr;
++	struct ibscif_mr *mr;
++	u64 rdma_addr;
++	u32 rdma_len;
++	struct ibscif_mreg_info *mreg;
++	u32 num_rma_addrs;
++	u64 rma_offset;
++	u32 rma_length;
++	u32 total;
++	int i;
++	int err;
++	u64 loffset;
++	u32 dma_size = 0;
++	int rma_flag = 0;
++
++	if (unlikely(!qp->conn)) {
++		printk(KERN_ALERT PFX "%s: ERROR: qp->conn == NULL\n", __func__);
++		return -EACCES;
++	}
++
++	if (unlikely(!(qp->access & IB_ACCESS_REMOTE_WRITE)))
++		return -EACCES;
++
++	/* Writes with immediate data consume an rq wqe. */
++	if (ibscif_pdu_is_immed(pdu->hdr.opcode)) {
++		pdu->write.msg_id = __be32_to_cpu(pdu->write.msg_id);
++		spin_lock_bh(&qp->rq.lock);
++		if (unlikely(pdu->write.msg_id >= qp->rq.next_msg_id)) {
++			spin_unlock_bh(&qp->rq.lock);
++			return -EBADRQC;
++		}
++		spin_unlock_bh(&qp->rq.lock);
++
++		wr = ibscif_wr_by_msg_id(&qp->rq, pdu->write.msg_id);
++		if (unlikely(!wr))
++			return -EBADR;
++	}
++	else
++		wr = NULL;
++
++	rdma_addr = __be64_to_cpu(pdu->write.rdma_address);
++	rdma_len  = __be32_to_cpu(pdu->write.rma_length);
++	if (unlikely((rdma_addr + (rdma_len - 1)) < rdma_addr))
++		return -EOVERFLOW;
++
++	mr = ibscif_validate_mr(__be32_to_cpu(pdu->write.rdma_key), rdma_addr,
++			       rdma_len, qp->ibqp.pd, IB_ACCESS_REMOTE_WRITE);
++	if (unlikely(IS_ERR(mr)))
++		return PTR_ERR(mr);
++
++	mreg = ibscif_mr_get_mreg(mr, qp->conn);
++	if (!mreg)
++		return -EACCES;
++
++	total = 0;
++	err = 0;
++	num_rma_addrs = __be32_to_cpu(pdu->write.num_rma_addrs);
++	for (i=0; i<num_rma_addrs; i++) {
++		rma_offset = __be64_to_cpu(pdu->write.rma_addrs[i].offset);
++		rma_length = __be32_to_cpu(pdu->write.rma_addrs[i].length);
++
++		if (rdma_len < rma_length)
++			rma_length = rdma_len;
++
++		if (rma_length == 0) 
++			continue;
++
++		loffset = mreg->offset + (rdma_addr - mr->addr) + total;
++		dma_size += ibscif_dma_size(rma_length, rma_offset);
++
++		if ((i==num_rma_addrs-1) && dma_size)
++			rma_flag = SCIF_RMA_SYNC;
++
++		err = scif_readfrom(qp->conn->ep, loffset, rma_length, rma_offset, rma_flag);
++		if (err) {
++			printk(KERN_ALERT PFX "%s: scif_readfrom (%d bytes) returns %d\n", __func__, rma_length, err);
++			break;
++		}
++
++		rdma_len -= rma_length;
++		total += rma_length;
++	}
++
++	ibscif_put_mr(mr);
++
++	if (wr) {
++		wr->sar.rea.final_length    = total;
++		wr->state = WR_LAST_SEEN; 
++		wr->sar.rea.opcode	    = pdu->hdr.opcode;
++		wr->sar.rea.last_packet_seq = pdu->hdr.seq_num;
++		wr->sar.rea.immediate_data  = __be32_to_cpu(pdu->write.immed_data);
++	} 
++
++	/* Respond to the initiator with the result */
++	wr = ibscif_reserve_wqe(&qp->iq);
++	if (unlikely(IS_ERR(wr))) {
++		return PTR_ERR(wr);
++	}
++
++	memset(&wr->sar, 0, sizeof wr->sar);
++
++	wr->opcode = WR_RMA_RSP;
++	wr->state  = WR_WAITING;
++	wr->length = 0;
++	wr->msg_id = __be32_to_cpu(pdu->write.rma_id);
++	wr->num_ds = 0;
++	wr->rma_rsp.xfer_length = total;
++	wr->rma_rsp.error = err;
++
++	ibscif_append_wqe(&qp->iq);
++	qp->schedule |= SCHEDULE_RESUME | SCHEDULE_IQ;
++
++	return 0;
++}
++
++static int ibscif_process_read_rma(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct sk_buff *skb)
++{
++	struct ibscif_wr *wr;
++	struct ibscif_mr *mr;
++	u64 rdma_addr;
++	u32 rdma_len;
++	struct ibscif_mreg_info *mreg;
++	u32 num_rma_addrs;
++	u64 rma_offset;
++	u32 rma_length;
++	u32 total;
++	int i;
++	int err;
++	u64 loffset;
++	u32 dma_size = 0;
++	int rma_flag = 0;
++
++	if (unlikely(!qp->conn)) {
++		printk(KERN_ALERT PFX "%s: ERROR: qp->conn == NULL\n", __func__);
++		return -EACCES;
++	}
++
++	if (unlikely(!(qp->access & IB_ACCESS_REMOTE_READ)))
++		return -EACCES;
++
++	rdma_addr = __be64_to_cpu(pdu->read_req.rdma_address);
++	rdma_len  = __be32_to_cpu(pdu->read_req.rdma_length);
++	if (unlikely((rdma_addr + (rdma_len - 1)) < rdma_addr))
++		return -EOVERFLOW;
++
++	mr = ibscif_validate_mr(__be32_to_cpu(pdu->read_req.rdma_key), rdma_addr,
++			       rdma_len, qp->ibqp.pd, IB_ACCESS_REMOTE_READ);
++	if (unlikely(IS_ERR(mr)))
++		return PTR_ERR(mr);
++
++	mreg = ibscif_mr_get_mreg(mr, qp->conn);
++	if (!mreg)
++		return -EACCES;
++
++	total = 0;
++	err = 0;
++	num_rma_addrs = __be32_to_cpu(pdu->read_req.num_rma_addrs);
++	for (i=0; i<num_rma_addrs; i++) {
++		rma_offset = __be64_to_cpu(pdu->read_req.rma_addrs[i].offset);
++		rma_length = __be32_to_cpu(pdu->read_req.rma_addrs[i].length);
++
++		if (rdma_len < rma_length)
++			rma_length = rdma_len;
++
++		if (rma_length == 0) 
++			continue;
++
++		loffset = mreg->offset + (rdma_addr - mr->addr) + total;
++		dma_size += ibscif_dma_size(rma_length, rma_offset);
++
++		if ((i==num_rma_addrs-1) && dma_size)
++			rma_flag = SCIF_RMA_SYNC;
++
++		err = scif_writeto(qp->conn->ep, loffset, rma_length, rma_offset, rma_flag);
++		if (err) {
++			printk(KERN_ALERT PFX "%s: scif_writeto (%d bytes) returns %d\n", __func__, rma_length, err);
++			break;
++		}
++
++		rdma_len -= rma_length;
++		total += rma_length;
++	}
++
++	ibscif_put_mr(mr);
++
++	/* Respond to the initiator with the result */
++	wr = ibscif_reserve_wqe(&qp->iq);
++	if (unlikely(IS_ERR(wr))) {
++		return PTR_ERR(wr);
++	}
++
++	memset(&wr->sar, 0, sizeof wr->sar);
++
++	wr->opcode = WR_RMA_RSP;
++	wr->state  = WR_WAITING;
++	wr->length = 0;
++	wr->msg_id = __be32_to_cpu(pdu->read_req.rdma_id);
++	wr->num_ds = 0;
++	wr->rma_rsp.xfer_length = total;
++	wr->rma_rsp.error = err;
++
++	ibscif_append_wqe(&qp->iq);
++	qp->schedule |= SCHEDULE_RESUME | SCHEDULE_IQ;
++
++	return 0;
++}
++
++static int ibscif_process_rma_rsp(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct sk_buff *skb)
++{
++	struct ibscif_wr *wr;
++
++	wr = ibscif_wr_by_msg_id(&qp->sq, __be32_to_cpu(pdu->rma_rsp.rma_id));
++	if (unlikely(!wr))
++		return -EBADR;
++	if (unlikely(!wr->use_rma))
++		return -ENOMSG;
++
++	if (wr->opcode == WR_RDMA_READ) {
++		/* ibscif_clear_ds_refs() is called in ibscif_schedule_rx_completions() */
++		wr->state = WR_LAST_SEEN;
++	}
++	else {
++		ibscif_clear_ds_refs(wr->ds_list, wr->num_ds);
++		wr->state = WR_COMPLETED;
++	}
++
++	wr->sar.rea.opcode	    = pdu->hdr.opcode;
++	wr->sar.rea.last_packet_seq = pdu->hdr.seq_num;
++	wr->sar.rea.final_length    = pdu->rma_rsp.xfer_length;
++
++	return 0;
++}
++
++static int ibscif_process_pdu(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct sk_buff *skb)
++{
++	int err;
++
++	switch (ibscif_pdu_base_type(pdu->hdr.opcode)) {
++	case ibscif_op_ud:
++		err = ibscif_process_ud(qp, pdu, skb);
++		break;
++	case ibscif_op_send:
++		err = ibscif_process_send(qp, pdu, skb);
++		break;
++	case ibscif_op_write:
++		err = ibscif_process_write(qp, pdu, skb);
++		break;
++	case ibscif_op_read:
++		err = ibscif_process_read(qp, pdu, skb);
++		break;
++	case ibscif_op_read_rsp:
++		err = ibscif_process_read_rsp(qp, pdu, skb);
++		break;
++	case ibscif_op_comp_swap_rsp:
++	case ibscif_op_fetch_add_rsp:
++		err = ibscif_process_atomic_rsp(qp, pdu, skb);
++		break;
++	case ibscif_op_comp_swap:
++	case ibscif_op_fetch_add:
++		err = ibscif_process_atomic_req(qp, pdu, skb);
++		break;
++	case ibscif_op_ack:
++		/* Handled in piggyback ack processing. */
++		err = 0;
++		break;
++	case ibscif_op_disconnect:
++		/* Post send completions before the disconnect flushes the queues. */
++		ibscif_process_ack(qp, &pdu->hdr);
++		/* Now disconnect the QP. */
++		err = ibscif_process_disconnect(qp, pdu, skb);
++		break;
++	case ibscif_op_send_rma:
++		err = ibscif_process_send_rma(qp, pdu, skb);
++		break;
++	case ibscif_op_write_rma:
++		err = ibscif_process_write_rma(qp, pdu, skb);
++		break;
++	case ibscif_op_read_rma:
++		err = ibscif_process_read_rma(qp, pdu, skb);
++		break;
++	case ibscif_op_rma_rsp:
++		err = ibscif_process_rma_rsp(qp, pdu, skb);
++		break;
++	default:
++		printk(KERN_INFO PFX "Received invalid opcode (%x)\n",
++		       ibscif_pdu_base_type(pdu->hdr.opcode));
++		err = IBSCIF_REASON_INVALID_OPCODE;
++		break;
++	}
++
++	if (unlikely(err)) {
++		printk(KERN_ALERT PFX "%s: ERROR: err=%d, opcode=%d\n", __func__, err, ibscif_pdu_base_type(pdu->hdr.opcode));
++		ibscif_protocol_error(qp, IBSCIF_REASON_QP_FATAL);
++	}
++
++	return err;
++}
++
++static int update_rx_seq_numbers(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct ibscif_rx_state *rx)
++{
++	u32 seq_num = pdu->hdr.seq_num;
++
++	if (pdu->hdr.opcode == ibscif_op_ack)
++		return 0;
++
++	if (seq_num != rx->last_in_seq + 1)
++		return 0;
++
++	rx->last_in_seq = seq_num;
++
++	return 1;
++}
++
++static void ibscif_process_qp_skb(struct ibscif_qp *qp, struct sk_buff *skb)
++{
++	union ibscif_pdu *pdu = (union ibscif_pdu *)skb->data;
++	struct ibscif_rx_state *rx;
++	int err = 0;
++
++	/* Start with no scheduling. */
++	qp->schedule = 0;
++
++	rx = ibscif_pdu_is_iq(pdu->hdr.opcode) ? &qp->wire.iq.rx : &qp->wire.sq.rx;
++
++	if (ibscif_process_pdu(qp, pdu, skb) == IBSCIF_REASON_INVALID_OPCODE)
++		return;
++
++	/* skip ack and seq_num for UD QP */
++	if (qp->ibqp.qp_type == IB_QPT_UD) {
++		err = ibscif_schedule_rx_completions(qp, 0, rx);
++		if (unlikely(err)) {
++			printk(KERN_ALERT PFX "%s: rx completion error: err=%d, opcode=%d\n", __func__, err, ibscif_pdu_base_type(pdu->hdr.opcode));
++			ibscif_protocol_error(qp, IBSCIF_REASON_QP_FATAL);
++		}
++		goto done;
++	}
++
++	/* Process piggybacked acks. */
++	ibscif_process_ack(qp, &pdu->hdr);
++
++	if (update_rx_seq_numbers(qp, pdu, rx)) {
++		/* PDU is in sequence so schedule/remove completed work requests. */
++		err = ibscif_schedule_rx_completions(qp, ibscif_pdu_is_iq(pdu->hdr.opcode), rx);
++		if (unlikely(err)) {
++			printk(KERN_ALERT PFX "%s: rx completion error: err=%d, opcode=%d\n", __func__, err, ibscif_pdu_base_type(pdu->hdr.opcode));
++			ibscif_protocol_error(qp, IBSCIF_REASON_QP_FATAL);
++			goto done;
++		}
++	}
++
++	/* Generate an ack if forced or if the current window dictates it. */
++	if (ibscif_pdu_is_force_ack(pdu->hdr.opcode)) {
++		ibscif_send_ack(qp);
++	} else if (pdu->hdr.opcode != ibscif_op_ack) {
++		u32 window = ibscif_rx_window(rx);
++		if (window && (window % (window_size / MIN_WINDOW_SIZE)) == 0)
++			ibscif_send_ack(qp);
++	}
++done:
++	/* Run the scheduler if it was requested. */
++	if (qp->schedule & SCHEDULE_RESUME) {
++		if (qp->schedule & SCHEDULE_SQ)
++			ibscif_schedule(&qp->sq);
++		if (qp->schedule & SCHEDULE_IQ)
++			ibscif_schedule(&qp->iq);
++	}
++
++	return;
++}
++
++#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,21)
++#define skb_mac_header(skb) (skb->mac.raw) 
++#endif
++
++static int ibscif_recv_pkt(struct sk_buff *skb, struct ibscif_dev *dev, scif_epd_t ep, struct ibscif_conn *conn)
++{
++	union ibscif_pdu *pdu = (union ibscif_pdu *)skb->data;
++	struct ibscif_qp *qp = ERR_PTR(-ENOENT);
++
++	/* Convert the base header. */
++	pdu->hdr.opcode	    = __be16_to_cpu(pdu->hdr.opcode);
++	pdu->hdr.length	    = __be16_to_cpu(pdu->hdr.length);
++	pdu->hdr.dst_qp	    = __be32_to_cpu(pdu->hdr.dst_qp);
++	pdu->hdr.src_qp	    = __be32_to_cpu(pdu->hdr.src_qp);
++	pdu->hdr.seq_num    = __be32_to_cpu(pdu->hdr.seq_num);
++	pdu->hdr.sq_ack_num = __be32_to_cpu(pdu->hdr.sq_ack_num);
++	pdu->hdr.iq_ack_num = __be32_to_cpu(pdu->hdr.iq_ack_num);
++
++	if (pdu->hdr.opcode == ibscif_op_close) {
++		//printk(KERN_INFO PFX "%s: op_close, conn=%p, local_close=%d\n", __func__, conn, conn->local_close);
++		conn->remote_close = 1;
++		goto done_no_qp;
++	}
++	else if (pdu->hdr.opcode == ibscif_op_reopen) {
++		//printk(KERN_INFO PFX "%s: op_reopen, conn=%p, local_close=%d\n", __func__, conn, conn->local_close);
++		conn->remote_close = 0;
++		goto done_no_qp;
++	}
++	else if (pdu->hdr.opcode == ibscif_op_cm) {
++		ibscif_process_cm_skb(skb, conn);
++		goto done_no_qp;
++	}
++
++	qp = ibscif_get_qp(pdu->hdr.dst_qp);
++	if (unlikely(IS_ERR(qp)				  ||
++		     (qp->state != QP_CONNECTED && qp->ibqp.qp_type != IB_QPT_UD) ||
++		     (qp->ibqp.qp_num != pdu->hdr.dst_qp) ||
++		     (qp->remote_qpn != pdu->hdr.src_qp && qp->ibqp.qp_type != IB_QPT_UD) ||
++		     0)) {
++		/* Disconnect the rogue. */
++		ibscif_reflect_disconnect(qp, &pdu->hdr, skb, IBSCIF_REASON_INVALID_QP);
++		goto done;
++	}
++
++	if (qp->ibqp.qp_type == IB_QPT_UD)
++		ibscif_qp_add_ud_conn(qp, conn);
++
++	DEV_STAT(qp->dev, packets_rcvd++);
++	DEV_STAT(qp->dev, bytes_rcvd += skb->len);
++
++	ibscif_process_qp_skb(qp, skb);
++done:
++	if (likely(!IS_ERR(qp)))
++		ibscif_put_qp(qp);
++
++done_no_qp:
++	kfree_skb(skb);
++	return 0;
++}
++
++void ibscif_do_recv( struct ibscif_dev *dev, scif_epd_t ep, struct ibscif_conn *conn )
++{
++	struct sk_buff *skb;
++	union ibscif_pdu *pdu;
++	int hdr_size, payload_size, recv_size, pdu_size;
++	char *recv_buffer;
++	int ret;
++
++	skb = dev_alloc_skb( IBSCIF_MTU );
++	if (unlikely(skb==NULL)) {
++		printk(KERN_ALERT PFX "%s(): fail to allocate skb, exiting\n", __func__);
++		return;
++	}
++
++	skb->protocol  = IBSCIF_PACKET_TYPE;
++	skb->ip_summed = CHECKSUM_UNNECESSARY;
++	skb->priority  = TC_PRIO_CONTROL;	/* highest defined priority */
++	skb->dev       = (void *) dev;
++
++	pdu = (union ibscif_pdu *)skb->data;
++
++	/* get the base header first so the packet size can be determinied */
++	recv_size = sizeof(pdu->hdr);
++	recv_buffer = (char *)&pdu->hdr;
++	while (recv_size) {
++		ret = scif_recv(ep, recv_buffer, recv_size, blocking_recv ? SCIF_RECV_BLOCK : 0);
++		if (ret < 0) {
++			printk(KERN_ALERT PFX "%s(): fail to receive hdr, ret=%d, expecting %d\n", __func__, ret, (int)recv_size);
++			if (ret == -ENOTCONN || ret == -ECONNRESET) {
++				if (verbose)
++					printk(KERN_INFO PFX "%s: ep disconnected by peer (%d). conn=%p, local_close=%d\n",
++							__func__, ret, conn, conn->local_close);
++				ibscif_remove_ep( dev, ep );
++				ibscif_refresh_pollep_list();
++				conn->remote_close = 1;
++				if (conn->local_close) {
++					ibscif_free_conn(conn);
++				}
++			}
++			goto errout;
++		}
++		recv_size -= ret;
++		recv_buffer += ret;
++	}
++
++	hdr_size = __be16_to_cpu(pdu->hdr.hdr_size);
++	payload_size = __be16_to_cpu(pdu->hdr.length);
++	pdu_size = hdr_size + payload_size;
++	if (unlikely(pdu_size > IBSCIF_MTU)) {
++		printk(KERN_ALERT PFX "%s(): packet size exceed MTU, size=%d\n", __func__, pdu_size);
++		goto errout;
++	}
++
++	recv_size = pdu_size - sizeof(pdu->hdr);
++	recv_buffer = (char *)pdu + sizeof(pdu->hdr);
++
++	/* get the remaining of the packet */
++	//printk(KERN_INFO PFX "%s(): hdr_size=%d payload_size=%d pdu_size=%d recv_size=%d\n", __func__, hdr_size, payload_size, pdu_size, recv_size);
++	ret = 0;
++	while (recv_size) {
++		ret = scif_recv(ep, recv_buffer, recv_size, blocking_recv ? SCIF_RECV_BLOCK : 0);
++
++		if (ret < 0) {
++			printk(KERN_ALERT PFX "%s(): fail to receive data, ret=%d, expecting %d\n", __func__, ret, recv_size);
++			break;
++		}
++
++		recv_size -= ret;
++		recv_buffer += ret;
++	}
++
++	if (ret < 0) 
++		goto errout;
++
++	skb->len       = pdu_size;
++	skb->data_len  = payload_size;
++	skb->tail     += pdu_size;
++
++	ibscif_recv_pkt(skb, dev, ep, conn);
++	return;
++
++errout:
++	kfree_skb(skb);
++}
++
++#define IBSCIF_MAX_POLL_COUNT (IBSCIF_MAX_DEVICES * 2)
++static struct scif_pollepd	poll_eps[IBSCIF_MAX_POLL_COUNT];
++static struct ibscif_dev	*poll_devs[IBSCIF_MAX_POLL_COUNT];
++static int			poll_types[IBSCIF_MAX_POLL_COUNT];
++static struct ibscif_conn	*poll_conns[IBSCIF_MAX_POLL_COUNT];
++static struct task_struct	*poll_thread = NULL;
++static atomic_t			poll_eps_changed = ATOMIC_INIT(0);
++static volatile int		poll_thread_running = 0;
++
++void ibscif_refresh_pollep_list( void )
++{
++	atomic_set(&poll_eps_changed, 1);
++}
++
++int ibscif_poll_thread( void *unused )
++{
++	int poll_count = 0;
++	int ret;
++	int i;
++	int busy;
++	int idle_count = 0;
++
++	poll_thread_running = 1;
++	while (!kthread_should_stop()) {
++		if (atomic_xchg(&poll_eps_changed, 0)) {
++			poll_count = IBSCIF_MAX_POLL_COUNT;
++			ibscif_get_pollep_list( poll_eps, poll_devs, poll_types, poll_conns, &poll_count );
++		}
++
++		if (poll_count == 0) {
++			schedule();
++			continue;
++		}
++
++		ret = scif_poll(poll_eps, poll_count, 1000); /* 1s timeout */
++
++		busy = 0;
++		if (ret > 0) {
++			for (i=0; i<poll_count; i++) {
++				if (poll_eps[i].revents & POLLIN) {
++					if (poll_types[i] == IBSCIF_EP_TYPE_LISTEN) { 
++						ibscif_do_accept( poll_devs[i] );
++						busy = 1;
++					}
++					else {
++						ibscif_do_recv( poll_devs[i], poll_eps[i].epd, poll_conns[i] );
++						busy = 1;
++					}
++				}
++				else if (poll_eps[i].revents & POLLERR) {
++					if (verbose)
++						printk(KERN_INFO PFX "%s: ep error, conn=%p.\n", __func__, poll_conns[i]);
++					ibscif_remove_ep( poll_devs[i], poll_eps[i].epd );
++					ibscif_refresh_pollep_list();
++					/* in most the case, the error is caused by ep being already closed */
++					busy = 1;
++				}
++				else if (poll_eps[i].revents & POLLHUP) {
++					struct ibscif_conn *conn = poll_conns[i];
++					if (verbose)
++						printk(KERN_INFO PFX "%s: ep disconnected by peer.\n", __func__);
++					ibscif_remove_ep( poll_devs[i], poll_eps[i].epd );
++					ibscif_refresh_pollep_list();
++					if (conn) {
++						if (verbose)
++							printk(KERN_INFO PFX "%s: conn=%p, local_close=%d.\n", __func__, conn, conn->local_close);
++						conn->remote_close = 1;
++						if (conn->local_close) {
++							ibscif_free_conn(conn);	
++						}
++					}
++					busy = 1;
++				}
++			}
++		}
++
++		if (busy) {
++			idle_count = 0;
++		}
++		else {
++			idle_count++;
++			/* close unused endpoint after 60 seconds */
++			if (idle_count == 60) {
++				if (ibscif_cleanup_idle_conn())
++					ibscif_refresh_pollep_list();
++				idle_count = 0;
++			}
++			/* pick up the unprocessed items in the xmit queue */
++			if (!skb_queue_empty(&xmit_queue))
++				ibscif_dev_queue_xmit(NULL);
++			schedule();
++		}
++	}
++
++	poll_thread_running = 0;
++	return 0;
++}
++
++void ibscif_protocol_init_pre(void)
++{
++	skb_queue_head_init(&xmit_queue);
++}
++
++void ibscif_protocol_init_post(void)
++{
++	poll_thread = kthread_run( ibscif_poll_thread, NULL, "ibscif_polld" );
++}
++
++void ibscif_protocol_cleanup(void)
++{
++	kthread_stop( poll_thread );
++
++	while (poll_thread_running)
++		schedule();
++}
+diff -urN a7/drivers/infiniband/hw/scif/ibscif_protocol.h a8/drivers/infiniband/hw/scif/ibscif_protocol.h
+--- a7/drivers/infiniband/hw/scif/ibscif_protocol.h	1969-12-31 16:00:00.000000000 -0800
++++ a8/drivers/infiniband/hw/scif/ibscif_protocol.h	2015-02-23 10:14:37.487809663 -0800
+@@ -0,0 +1,395 @@
++/*
++ * Copyright (c) 2008 Intel Corporation.  All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above copyright
++ *        notice, this list of conditions and the following disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *        copyright notice, this list of conditions and the following
++ *        disclaimer in the documentation and/or other materials
++ *        provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#ifndef IBSCIF_PROTOCOL_H
++#define IBSCIF_PROTOCOL_H
++
++/*
++ * Protocol EtherType
++ */
++#define	IBSCIF_PACKET_TYPE	0x8086
++
++/*
++ * Base protocol header version
++ */
++#define	IBSCIF_PROTOCOL_VER_1	1
++#define	IBSCIF_PROTOCOL_VER	IBSCIF_PROTOCOL_VER_1
++
++/*
++ * Protocol opcode values - All other values are reserved.
++ */
++#define ibscif_last_flag		0x4000
++#define ibscif_immed_flag		0x2000
++#define ibscif_se_flag			0x1000
++#define ibscif_force_ack_flag		0x0800
++#define ibscif_iq_flag			0x0400
++
++#define	ibscif_op_send			0
++#define	ibscif_op_send_last		(ibscif_op_send | ibscif_last_flag)
++#define	ibscif_op_send_last_se		(ibscif_op_send | ibscif_last_flag  | ibscif_se_flag)
++#define	ibscif_op_send_immed		(ibscif_op_send | ibscif_immed_flag)
++#define	ibscif_op_send_immed_se		(ibscif_op_send | ibscif_immed_flag | ibscif_se_flag)
++
++#define	ibscif_op_write			1
++#define	ibscif_op_write_last		(ibscif_op_write | ibscif_last_flag)
++#define	ibscif_op_write_immed		(ibscif_op_write | ibscif_immed_flag)
++#define	ibscif_op_write_immed_se	(ibscif_op_write | ibscif_immed_flag | ibscif_se_flag)
++
++#define	ibscif_op_read			2
++#define	ibscif_op_read_rsp		(ibscif_op_read | ibscif_iq_flag)
++#define	ibscif_op_read_rsp_last		(ibscif_op_read_rsp | ibscif_last_flag)
++
++#define	ibscif_op_comp_swap		3
++#define ibscif_op_comp_swap_rsp		(ibscif_op_comp_swap | ibscif_iq_flag)
++
++#define	ibscif_op_fetch_add		4
++#define ibscif_op_fetch_add_rsp		(ibscif_op_fetch_add | ibscif_iq_flag)
++
++#define	ibscif_op_ack			5
++#define	ibscif_op_disconnect		6
++
++#define ibscif_op_send_rma		7
++#define ibscif_op_send_rma_se		(ibscif_op_send_rma | ibscif_se_flag)
++#define ibscif_op_send_rma_immed	(ibscif_op_send_rma | ibscif_immed_flag)
++#define ibscif_op_send_rma_immed_se	(ibscif_op_send_rma | ibscif_immed_flag | ibscif_se_flag)
++
++#define ibscif_op_write_rma		8
++#define ibscif_op_write_rma_immed	(ibscif_op_write_rma | ibscif_immed_flag)
++#define ibscif_op_write_rma_immed_se	(ibscif_op_write_rma | ibscif_immed_flag | ibscif_se_flag)
++
++#define	ibscif_op_read_rma		9
++#define ibscif_op_rma_rsp		(10 | ibscif_iq_flag)
++
++#define	ibscif_op_reg			11
++#define	ibscif_op_dereg			12
++
++#define ibscif_op_close			13
++#define ibscif_op_reopen		14
++
++#define ibscif_op_ud			15
++#define ibscif_op_cm			16
++
++#define ibscif_pdu_is_last(op)		(op & ibscif_last_flag)
++#define ibscif_pdu_is_immed(op)		(op & ibscif_immed_flag)
++#define ibscif_pdu_is_se(op)		(op & ibscif_se_flag)
++#define ibscif_pdu_is_force_ack(op)	(op & ibscif_force_ack_flag)
++#define ibscif_pdu_is_iq(op)		(op & ibscif_iq_flag)
++
++#define ibscif_pdu_set_last(op)		(op | ibscif_last_flag)
++#define ibscif_pdu_set_immed(op)	(op | ibscif_immed_flag)
++#define ibscif_pdu_set_se(op)		(op | ibscif_se_flag)
++#define ibscif_pdu_set_force_ack(op)	(op | ibscif_force_ack_flag)
++#define ibscif_pdu_set_iq(op)		(op | ibscif_iq_flag)
++
++#define ibscif_pdu_base_type(op)	\
++	(op & ~(ibscif_last_flag       | \
++		ibscif_se_flag         | \
++		ibscif_immed_flag      | \
++		ibscif_force_ack_flag))
++
++/*
++ * Remote address descriptor for SCIF RMA operations
++ */
++struct rma_addr {
++	__be64			offset;
++	__be32			length;
++	__be32			reserved;
++} __attribute__ ((packed));
++
++/*
++ * Base header present in every packet
++ */
++struct base_hdr {
++	__be16			opcode;
++	__be16			length;
++	__be32			dst_qp;
++	__be32			src_qp;
++	__be32			seq_num;
++	__be32			sq_ack_num;
++	__be32			iq_ack_num;
++	__be16			hdr_size;
++	__be16			reserved[3];
++} __attribute__ ((packed));
++
++/*
++ * UD Header
++ */
++struct ud_hdr {
++	struct base_hdr		hdr;
++	__be32			msg_id;
++	__be32			msg_length;
++	__be32			msg_offset;
++	u8			grh[40];
++} __attribute__ ((packed));
++
++/*
++ * Send Header
++ */
++struct send_hdr {
++	struct base_hdr		hdr;
++	__be32			msg_id;
++	__be32			msg_length;
++	__be32			msg_offset;
++	__be32			immed_data;
++	__be32			rma_id;		/* RMA */
++	__be32			num_rma_addrs;	/* RMA */
++	struct rma_addr		rma_addrs[0];	/* RMA */
++} __attribute__ ((packed));
++
++/*
++ * RDMA Write Header
++ */
++struct write_hdr {
++	struct base_hdr		hdr;
++	__be64			rdma_address;
++	__be32			rdma_key;
++	__be32			immed_data;
++	__be32			msg_id;
++	__be32			rma_length;	/* RMA */
++	__be32			rma_id;		/* RMA */
++	__be32			num_rma_addrs;	/* RMA */
++	struct rma_addr		rma_addrs[0];	/* RMA */
++} __attribute__ ((packed));
++
++/*
++ * RDMA Read Request Header
++ */
++struct read_req_hdr {
++	struct base_hdr		hdr;
++	__be64			rdma_address;
++	__be32			rdma_key;
++	__be32			rdma_length;	/* shared with RMA */
++	__be32			rdma_id;	/* shared with RMA */
++	__be32			num_rma_addrs;	/* RMA */
++	struct rma_addr		rma_addrs[0];	/* RMA */
++} __attribute__ ((packed));
++
++/*
++ * RDMA Read Response Header
++ */
++struct read_rsp_hdr {
++	struct base_hdr		hdr;
++	__be32			rdma_offset;
++	__be32			rdma_id;
++} __attribute__ ((packed));
++
++
++/*
++ * Atomic Compare and Swap Header
++ */
++struct comp_swap_hdr {
++	struct base_hdr		hdr;
++	__be64			atomic_address;
++	__be64			comp_data;
++	__be64			swap_data;
++	__be32			atomic_key;
++	__be32			atomic_id;
++	/* no pad needed */
++} __attribute__ ((packed));
++
++
++/*
++ * Atomic Fetch/Add Header
++ */
++struct fetch_add_hdr {
++	struct base_hdr		hdr;
++	__be64			atomic_address;
++	__be64			add_data;
++	__be32			atomic_key;
++	__be32			atomic_id;
++	/* no pad needed */
++} __attribute__ ((packed));
++
++/*
++ * Atomic Response Header
++ */
++struct atomic_rsp_hdr {
++	struct base_hdr		hdr;
++	__be64			orig_data;
++	__be32			atomic_id;
++} __attribute__ ((packed));
++
++/*
++ * ACK Header
++ */
++struct ack_hdr {
++	struct base_hdr		hdr;
++} __attribute__ ((packed));
++
++/*
++ * Disconnect Header
++ */
++struct disconnect_hdr {
++	struct base_hdr		hdr;
++	__be32			reason;
++} __attribute__ ((packed));
++
++/*
++ * RMA Response Header
++ */
++struct rma_rsp_hdr {
++	struct base_hdr		hdr;
++	__be32			rma_id;
++	__be32			xfer_length;
++	__be32			error;
++} __attribute__ ((packed));
++
++/*
++ * MR Reg/Dereg Info Header
++ */
++struct reg_hdr {
++	struct base_hdr		hdr;
++	__be64			scif_offset;
++	__be64			address;
++	__be32			length;
++	__be32			rkey;
++	__be32			access;
++} __attribute__ ((packed));
++
++/*
++ * SCIF endpoint close notiffication
++ */
++struct close_hdr {
++	struct base_hdr		hdr;
++} __attribute__ ((packed));
++
++
++#define IBSCIF_CM_REQ	1
++#define IBSCIF_CM_REP	2
++#define IBSCIF_CM_REJ	3
++#define IBSCIF_CM_RTU	4
++
++/*
++ * RDMA CM Header
++ */
++
++struct cm_hdr {
++	struct base_hdr		hdr;
++	__be64			req_ctx;
++	__be64			rep_ctx;
++	__be32			cmd;
++	__be32			port;
++	__be32			qpn;
++	__be32			status;
++	__be32			plen;
++	u8			pdata[0];
++} __attribute__ ((packed));
++
++enum ibscif_reason {	/* Set each value to simplify manual lookup. */
++
++	/* Local Events */
++	IBSCIF_REASON_USER_GENERATED	  = 0,
++	IBSCIF_REASON_CQ_COMPLETION	  = 1,
++	IBSCIF_REASON_NIC_FATAL		  = 2,
++	IBSCIF_REASON_NIC_REMOVED	  = 3,
++
++	/* Disconnect Event */
++	IBSCIF_REASON_DISCONNECT		  = 4,
++
++	/* CQ Error */
++	IBSCIF_REASON_CQ_OVERRUN		  = 5,
++	IBSCIF_REASON_CQ_FATAL		  = 6,
++
++	/* QP Errors */
++	IBSCIF_REASON_QP_SQ_ERROR	  = 7,
++	IBSCIF_REASON_QP_RQ_ERROR	  = 8,
++	IBSCIF_REASON_QP_DESTROYED	  = 9,
++	IBSCIF_REASON_QP_ERROR		  = 10,
++	IBSCIF_REASON_QP_FATAL		  = 11,
++
++	/* Operation Errors */
++	IBSCIF_REASON_INVALID_OPCODE	  = 12,
++	IBSCIF_REASON_INVALID_LENGTH	  = 13,
++	IBSCIF_REASON_INVALID_QP		  = 14,
++	IBSCIF_REASON_INVALID_MSG_ID	  = 15,
++	IBSCIF_REASON_INVALID_LKEY	  = 16,
++	IBSCIF_REASON_INVALID_RDMA_RKEY	  = 17,
++	IBSCIF_REASON_INVALID_RDMA_ID	  = 18,
++	IBSCIF_REASON_INVALID_ATOMIC_RKEY  = 19,
++	IBSCIF_REASON_INVALID_ATOMIC_ID	  = 20,
++	IBSCIF_REASON_MAX_IR_EXCEEDED	  = 21,
++	IBSCIF_REASON_ACK_TIMEOUT	  = 22,
++
++	/* Protection Errors */
++	IBSCIF_REASON_PROTECTION_VIOLATION = 23,
++	IBSCIF_REASON_BOUNDS_VIOLATION	  = 24,
++	IBSCIF_REASON_ACCESS_VIOLATION	  = 25,
++	IBSCIF_REASON_WRAP_ERROR		  = 26
++};
++
++union ibscif_pdu {
++	struct base_hdr		hdr;
++	struct ud_hdr		ud;
++	struct send_hdr 	send;
++	struct write_hdr	write;
++	struct read_req_hdr	read_req;
++	struct read_rsp_hdr	read_rsp;
++	struct comp_swap_hdr	comp_swap;
++	struct fetch_add_hdr	fetch_add;
++	struct atomic_rsp_hdr	atomic_rsp;
++	struct ack_hdr		ack;
++	struct disconnect_hdr	disconnect;
++	struct rma_rsp_hdr	rma_rsp;
++	struct reg_hdr		reg;
++	struct close_hdr	close;
++	struct cm_hdr		cm;
++};
++
++struct ibscif_full_frame {
++	union ibscif_pdu	ibscif;
++};
++
++static inline int seq_before(u32 seq1, u32 seq2)
++{
++	return (s32)(seq1 - seq2) < 0;
++}
++
++static inline int seq_after(u32 seq1, u32 seq2)
++{
++	return (s32)(seq2 - seq1) < 0;
++}
++
++static inline int seq_between(u32 seq_target, u32 seq_low, u32 seq_high)
++{
++	return seq_high - seq_low >= seq_target - seq_low;
++}
++
++static inline u32 seq_window(u32 earlier, u32 later)
++{
++	return earlier > later ? ((u32)~0 - earlier) + later : later - earlier;
++}
++
++#define ibscif_tx_unacked_window(tx)	seq_window((tx)->last_ack_seq_recvd, (tx)->next_seq - 1)
++
++#define ibscif_rx_window(rx)		seq_window((rx)->last_seq_acked, (rx)->last_in_seq)
++
++#define ibscif_tx_window(tx)		((u32)window_size - ibscif_tx_unacked_window(tx))
++
++#endif /* IBSCIF_PROTOCOL_H */
+diff -urN a7/drivers/infiniband/hw/scif/ibscif_provider.c a8/drivers/infiniband/hw/scif/ibscif_provider.c
+--- a7/drivers/infiniband/hw/scif/ibscif_provider.c	1969-12-31 16:00:00.000000000 -0800
++++ a8/drivers/infiniband/hw/scif/ibscif_provider.c	2015-02-23 10:14:37.488809663 -0800
+@@ -0,0 +1,406 @@
++/*
++ * Copyright (c) 2008 Intel Corporation.  All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above copyright
++ *        notice, this list of conditions and the following disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *        copyright notice, this list of conditions and the following
++ *        disclaimer in the documentation and/or other materials
++ *        provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include "ibscif_driver.h"
++
++static int ibscif_query_device(struct ib_device *ibdev, struct ib_device_attr *attr)
++{
++	memset(attr, 0, sizeof *attr);
++
++	attr->vendor_id           = VENDOR_ID;
++	attr->vendor_part_id      = DEVICE_ID;
++	attr->hw_ver              = HW_REV;
++	attr->fw_ver              = FW_REV;
++	attr->device_cap_flags    = IB_DEVICE_PORT_ACTIVE_EVENT;
++	attr->max_mr_size         = MAX_MR_SIZE;
++	attr->page_size_cap       = PAGE_SIZE;
++	attr->max_qp              = MAX_QPS;
++	attr->max_qp_wr           = MAX_QP_SIZE;
++	attr->max_sge             = MAX_SGES;
++	attr->max_cq              = MAX_CQS;
++	attr->max_cqe             = MAX_CQ_SIZE;
++	attr->max_mr              = MAX_MRS;
++	attr->max_pd              = MAX_PDS;
++	attr->max_qp_rd_atom      = MAX_IR>255 ? 255 : MAX_IR;
++	attr->max_qp_init_rd_atom = MAX_OR>255 ? 255 : MAX_OR;
++	attr->max_res_rd_atom     = MAX_IR>255 ? 255 : MAX_IR;
++	attr->atomic_cap          = IB_ATOMIC_HCA;
++	attr->sys_image_guid	  = ibdev->node_guid;
++
++	return 0;
++}
++
++static int ibscif_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
++{
++	struct ibscif_dev *dev = to_dev(ibdev);
++
++	memset(attr, 0, sizeof *attr);
++
++	/* See IB Spec r1.2 Table 145 for physical port state values. */
++	attr->lid	   = IBSCIF_NODE_ID_TO_LID(dev->node_id);
++	attr->sm_lid	   = 1;
++	attr->gid_tbl_len  = 1;
++	attr->pkey_tbl_len = 1;
++	attr->max_msg_sz   = MAX_MR_SIZE;
++	attr->phys_state   = 5; /* LinkUp */ 
++	attr->state	   = IB_PORT_ACTIVE;
++	attr->max_mtu	   = IB_MTU_4096;
++	attr->active_mtu   = IB_MTU_4096;
++	attr->active_width = IB_WIDTH_4X;
++	attr->active_speed = 4;
++	attr->max_vl_num   = 1;
++	attr->port_cap_flags = IB_PORT_SM_DISABLED;
++
++	return 0;
++}
++
++static int ibscif_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
++{
++	*pkey = 0xffff;	/* IB_DEFAULT_PKEY_FULL */
++	return 0;
++}
++
++static int ibscif_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *ibgid)
++{
++	struct ibscif_dev *dev = to_dev(ibdev);
++
++	memcpy(ibgid, &dev->gid, sizeof(*ibgid));
++	return 0;
++}
++
++static struct ib_ucontext *ibscif_alloc_ucontext(struct ib_device *ibdev, struct ib_udata *udata)
++{
++	struct ib_ucontext *context = kzalloc(sizeof *context, GFP_KERNEL);
++	return (!context) ? ERR_PTR(-ENOMEM) : context;
++}
++
++static int ibscif_dealloc_ucontext(struct ib_ucontext *context)
++{
++	kfree(context);
++	return 0;
++}
++
++static void ibscif_generate_eui64(struct ibscif_dev *dev, u8 *eui64)
++{
++	memcpy(eui64, dev->netdev->dev_addr, 3);
++	eui64[3] = 0xFF;
++	eui64[4] = 0xFE;
++	memcpy(eui64+5, dev->netdev->dev_addr+3, 3);
++}
++
++static int ibscif_register_device(struct ibscif_dev *dev)
++{
++	strncpy(dev->ibdev.node_desc, DRV_SIGNON, sizeof dev->ibdev.node_desc);
++	ibscif_generate_eui64(dev, (u8 *)&dev->ibdev.node_guid);
++	dev->ibdev.owner		= THIS_MODULE;
++	dev->ibdev.uverbs_abi_ver	= UVERBS_ABI_VER;
++	dev->ibdev.uverbs_cmd_mask	=
++		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
++		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
++		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)		|
++		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)		|
++		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)		|
++		(1ull << IB_USER_VERBS_CMD_CREATE_AH)		|
++		(1ull << IB_USER_VERBS_CMD_DESTROY_AH)		|
++		(1ull << IB_USER_VERBS_CMD_CREATE_QP)		|
++		(1ull << IB_USER_VERBS_CMD_QUERY_QP)		|
++		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
++		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
++		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)	|
++		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
++		(1ull << IB_USER_VERBS_CMD_RESIZE_CQ)		|
++		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)		|
++		(1ull << IB_USER_VERBS_CMD_POLL_CQ)		|
++		(1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)	|
++		(1ull << IB_USER_VERBS_CMD_REG_MR)		|
++		(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
++		(1ull << IB_USER_VERBS_CMD_POST_SEND)		|
++		(1ull << IB_USER_VERBS_CMD_POST_RECV);		
++#if	defined(MOFED) && !defined(MOFED_2_1)
++	dev->ibdev.node_type		= new_ib_type ? RDMA_EXP_NODE_MIC : RDMA_NODE_RNIC;
++#else
++	dev->ibdev.node_type		= new_ib_type ? RDMA_NODE_MIC : RDMA_NODE_RNIC;
++#endif
++	dev->ibdev.phys_port_cnt	= 1;
++
++	dev->ibdev.query_device		= ibscif_query_device;		// Mandatory
++	dev->ibdev.num_comp_vectors     = 1;				// Mandatory
++	dev->ibdev.query_port		= ibscif_query_port;		// Mandatory
++	dev->ibdev.query_pkey		= ibscif_query_pkey;		// Mandatory
++	dev->ibdev.query_gid		= ibscif_query_gid;		// Mandatory
++	dev->ibdev.alloc_ucontext	= ibscif_alloc_ucontext;	// Required
++	dev->ibdev.dealloc_ucontext	= ibscif_dealloc_ucontext;	// Required
++	dev->ibdev.alloc_pd		= ibscif_alloc_pd;		// Mandatory
++	dev->ibdev.dealloc_pd		= ibscif_dealloc_pd;		// Mandatory
++	dev->ibdev.create_ah		= ibscif_create_ah;		// Mandatory
++	dev->ibdev.destroy_ah		= ibscif_destroy_ah;		// Mandatory
++	dev->ibdev.create_qp		= ibscif_create_qp;		// Mandatory
++	dev->ibdev.query_qp		= ibscif_query_qp;		// Optional
++	dev->ibdev.modify_qp		= ibscif_modify_qp;		// Mandatory
++	dev->ibdev.destroy_qp		= ibscif_destroy_qp;		// Mandatory
++	dev->ibdev.create_cq		= ibscif_create_cq;		// Mandatory
++	dev->ibdev.resize_cq		= ibscif_resize_cq;		// Optional
++	dev->ibdev.destroy_cq		= ibscif_destroy_cq;		// Mandatory
++	dev->ibdev.poll_cq		= ibscif_poll_cq;		// Mandatory
++	dev->ibdev.req_notify_cq	= ibscif_arm_cq;		// Mandatory
++	dev->ibdev.get_dma_mr		= ibscif_get_dma_mr;		// Mandatory
++	dev->ibdev.reg_phys_mr		= ibscif_reg_phys_mr;		// Required
++	dev->ibdev.reg_user_mr		= ibscif_reg_user_mr;		// Required
++	dev->ibdev.dereg_mr		= ibscif_dereg_mr;		// Mandatory
++	dev->ibdev.post_send		= ibscif_post_send;		// Mandatory
++	dev->ibdev.post_recv		= ibscif_post_receive;		// Mandatory
++	dev->ibdev.dma_ops              = &ibscif_dma_mapping_ops;	// ??
++
++	dev->ibdev.iwcm = kzalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
++	if (!dev->ibdev.iwcm)
++		return -ENOMEM;
++
++	dev->ibdev.iwcm->connect = ibscif_cm_connect;
++	dev->ibdev.iwcm->accept = ibscif_cm_accept;
++	dev->ibdev.iwcm->reject = ibscif_cm_reject;
++	dev->ibdev.iwcm->create_listen = ibscif_cm_create_listen;
++	dev->ibdev.iwcm->destroy_listen = ibscif_cm_destroy_listen;
++	dev->ibdev.iwcm->add_ref = ibscif_cm_add_ref;
++	dev->ibdev.iwcm->rem_ref = ibscif_cm_rem_ref;
++	dev->ibdev.iwcm->get_qp = ibscif_cm_get_qp;
++
++	return ib_register_device(&dev->ibdev, NULL);
++}
++
++static void ibscif_dev_release(struct device *dev)
++{
++	kfree(dev);
++}
++
++/*
++ * Hold devlist_mutex during this call for synchronization as needed.
++ * Upon return, dev is invalid.
++ */
++static void ibscif_remove_dev(struct ibscif_dev *dev)
++{
++	struct ibscif_conn *conn, *next;
++
++	if (dev->ibdev.reg_state == IB_DEV_REGISTERED)
++		ib_unregister_device(&dev->ibdev);
++
++	WARN_ON(!list_empty(&dev->wq_list));
++
++	down(&devlist_mutex);
++	list_del(&dev->entry);
++	up(&devlist_mutex);
++
++	ibscif_refresh_pollep_list();
++
++	down(&dev->mutex); 
++	list_for_each_entry_safe(conn, next, &dev->conn_list, entry) {
++		scif_close(conn->ep);
++		list_del(&conn->entry);
++		kfree(conn);
++	}
++	up(&dev->mutex);
++
++	if (dev->listen_ep)
++		scif_close(dev->listen_ep);
++	ibscif_procfs_remove_dev(dev);
++
++	dev_put(dev->netdev);
++	device_unregister(dev->ibdev.dma_device);
++	ib_dealloc_device(&dev->ibdev);
++}
++
++static void ibscif_remove_one(struct net_device *netdev)
++{
++        struct ibscif_dev *dev, *next;
++ 
++        list_for_each_entry_safe(dev, next, &devlist, entry) {
++                if (netdev == dev->netdev) {
++                        ibscif_remove_dev(dev); 
++                        break;
++                }
++        }
++}
++
++static int node_cnt;
++static uint16_t node_ids[IBSCIF_MAX_DEVICES];
++static uint16_t my_node_id;
++
++static void ibscif_add_one(struct net_device *netdev)
++{
++	static int dev_cnt;
++	static dma_addr_t dma_mask = -1;
++	struct ibscif_dev *dev;
++	int ret;
++
++	dev = (struct ibscif_dev *)ib_alloc_device(sizeof *dev);
++	if (!dev) {
++		printk(KERN_ALERT PFX "%s: fail to allocate ib_device\n", __func__);
++		return;
++	}
++
++	INIT_LIST_HEAD(&dev->conn_list);
++	INIT_LIST_HEAD(&dev->mr_list);
++	init_MUTEX(&dev->mr_list_mutex);
++	init_MUTEX(&dev->mutex);
++	spin_lock_init(&dev->atomic_op);
++	INIT_LIST_HEAD(&dev->wq_list);
++	atomic_set(&dev->available, 256); /* FIXME */
++
++	dev_hold(netdev);
++	dev->netdev = netdev;
++
++	/* use the MAC address of the netdev as the GID so that RDMA CM can
++	 * find the ibdev from the IP address associated with the netdev.
++	 */
++	memcpy(&dev->gid, dev->netdev->dev_addr, ETH_ALEN);
++
++	dev->ibdev.dma_device = kzalloc(sizeof *dev->ibdev.dma_device, GFP_KERNEL);
++	if (!dev->ibdev.dma_device) {
++		printk(KERN_ALERT PFX "%s: fail to allocate dma_device\n", __func__);
++		goto out_free_ibdev;
++	}
++
++	snprintf(dev->name, IBSCIF_NAME_SIZE, "scif_dma_%d", dev_cnt);
++	snprintf(dev->ibdev.name, IB_DEVICE_NAME_MAX, "scif%d", dev_cnt++);
++	dev->ibdev.dma_device->release = ibscif_dev_release;
++	dev->ibdev.dma_device->init_name = dev->name;
++	dev->ibdev.dma_device->dma_mask = &dma_mask;
++	ret = device_register(dev->ibdev.dma_device);
++	if (ret) {
++		printk(KERN_ALERT PFX "%s: fail to register dma_device, ret=%d\n", __func__, ret);
++		kfree(dev->ibdev.dma_device); 
++		goto out_free_ibdev;
++	}
++
++	/* Notice: set up listen ep before inserting to devlist */
++
++	dev->listen_ep = scif_open();
++	if (!dev->listen_ep || IS_ERR(dev->listen_ep)) {
++		printk(KERN_ALERT PFX "%s: scif_open returns %ld\n", __func__, PTR_ERR(dev->listen_ep));
++		goto out_unreg_dmadev ;
++	}
++
++	ret = scif_get_nodeIDs( node_ids, IBSCIF_MAX_DEVICES, &my_node_id);
++	if (ret < 0) {
++		printk(KERN_ALERT PFX "%s: scif_get_nodeIDS returns %d\n",
++			__func__, ret);
++		goto out_close_ep;
++	}
++
++	node_cnt = ret;
++	dev->node_id = my_node_id;
++	printk(KERN_ALERT PFX "%s: my node_id is %d\n", __func__, dev->node_id);
++
++	ret = scif_bind(dev->listen_ep, SCIF_OFED_PORT_0);
++	if (ret < 0) {
++		printk(KERN_ALERT PFX "%s: scif_bind returns %d, port=%d\n",
++			__func__, ret, SCIF_OFED_PORT_0);
++		goto out_close_ep;
++	}
++
++	ret = scif_listen(dev->listen_ep, IBSCIF_MAX_DEVICES);
++	if (ret < 0) {
++		printk(KERN_ALERT PFX "%s: scif_listen returns %d\n", __func__, ret);
++		goto out_close_ep;
++	}
++
++	down(&devlist_mutex);
++	list_add_tail(&dev->entry, &devlist);
++	up(&devlist_mutex);
++
++	if (ibscif_register_device(dev))
++		ibscif_remove_dev(dev);
++	else
++		ibscif_procfs_add_dev(dev);
++
++	ibscif_refresh_pollep_list();
++
++	return;
++
++out_close_ep:
++	scif_close(dev->listen_ep);
++
++out_unreg_dmadev:
++	device_unregister(dev->ibdev.dma_device); /* it will free the memory, too */
++
++out_free_ibdev:
++	ib_dealloc_device(&dev->ibdev);
++}
++
++static int ibscif_notifier(struct notifier_block *nb, unsigned long event, void *ptr)
++{
++	struct net_device *netdev = (struct net_device *)ptr;
++
++	if (strcmp(netdev->name, "mic0"))
++		return NOTIFY_DONE;
++
++	switch(event) {
++	  case NETDEV_REGISTER:
++		ibscif_add_one(netdev);
++		ibscif_protocol_init_post();
++		break;
++
++	  case NETDEV_UNREGISTER:
++		ibscif_remove_one(netdev);
++		break;
++
++	  default:
++		/* we only care about the MAC address, ignore other notifications */
++		break;
++	}
++
++	return NOTIFY_DONE;
++}
++
++static struct notifier_block ibscif_notifier_block = {
++	.notifier_call = ibscif_notifier,
++};
++
++int ibscif_dev_init(void)
++{
++	int err = 0;
++
++	ibscif_protocol_init_pre();
++
++	err = register_netdevice_notifier(&ibscif_notifier_block);
++	if (err) 
++		ibscif_protocol_cleanup();
++
++	return err;
++}
++
++void ibscif_dev_cleanup(void)
++{
++	struct ibscif_dev *dev, *next;
++
++	ibscif_protocol_cleanup();
++	unregister_netdevice_notifier(&ibscif_notifier_block);
++	list_for_each_entry_safe(dev, next, &devlist, entry)
++		ibscif_remove_dev(dev);
++}
+diff -urN a7/drivers/infiniband/hw/scif/ibscif_qp.c a8/drivers/infiniband/hw/scif/ibscif_qp.c
+--- a7/drivers/infiniband/hw/scif/ibscif_qp.c	1969-12-31 16:00:00.000000000 -0800
++++ a8/drivers/infiniband/hw/scif/ibscif_qp.c	2015-02-23 10:14:37.488809663 -0800
+@@ -0,0 +1,868 @@
++/*
++ * Copyright (c) 2008 Intel Corporation.  All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above copyright
++ *        notice, this list of conditions and the following disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *        copyright notice, this list of conditions and the following
++ *        disclaimer in the documentation and/or other materials
++ *        provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include "ibscif_driver.h"
++
++static struct ibscif_wr *ibscif_alloc_wr(struct ibscif_wq *wq, int new_size, int bytes)
++{
++	if (new_size && (new_size != wq->size)) {
++		struct ibscif_wr *new_wr = vzalloc(bytes);
++		return new_wr ? new_wr : ERR_PTR(-ENOMEM);
++	}
++	return NULL;
++}
++
++static void ibscif_move_wr(struct ibscif_wq *wq, struct ibscif_wr *new_wr, int new_size)
++{
++	int i;
++
++	if (wq->size == new_size)
++		return;
++
++	for (i = 0; i < wq->depth; i++) {
++		memcpy(&new_wr[i], &wq->wr[wq->head], wq->wr_size);
++		wq->head = (wq->head + 1) % wq->size;
++	}
++
++	if (wq->wr) {
++		vfree(wq->wr);
++	}
++
++	wq->wr   = new_wr;
++	wq->head = 0;
++	wq->tail = wq->depth;
++	wq->size = new_size;
++}
++
++/* Caller must provide proper synchronization. */
++static int ibscif_resize_qp(struct ibscif_qp *qp, int sq_size, int rq_size, int iq_size)
++{
++	struct ibscif_wr *new_sq, *new_rq, *new_iq;
++	int sq_bytes, rq_bytes, iq_bytes;
++	int old_npages, new_npages, err;
++
++	sq_bytes = PAGE_ALIGN(sq_size * qp->sq.wr_size);
++	rq_bytes = PAGE_ALIGN(rq_size * qp->rq.wr_size);
++	iq_bytes = PAGE_ALIGN(iq_size * qp->iq.wr_size);
++
++	sq_size = sq_bytes / qp->sq.wr_size;
++	rq_size = rq_bytes / qp->rq.wr_size;
++	iq_size = iq_bytes / qp->iq.wr_size;
++
++	if ((sq_size == qp->sq.size) &&
++	    (rq_size == qp->rq.size) &&
++	    (iq_size == qp->iq.size))
++		return 0;
++
++	if ((sq_size < qp->sq.depth) ||
++	    (rq_size < qp->rq.depth) ||
++	    (iq_size < qp->iq.depth))
++		return -EINVAL;
++
++	/* Calculate the number of new pages required for this allocation. */
++	new_npages = (sq_bytes + rq_bytes + iq_bytes) >> PAGE_SHIFT;
++	old_npages = (PAGE_ALIGN(qp->sq.size * qp->sq.wr_size) +
++		      PAGE_ALIGN(qp->rq.size * qp->rq.wr_size) +
++		      PAGE_ALIGN(qp->iq.size * qp->iq.wr_size)) >> PAGE_SHIFT;
++	new_npages -= old_npages;
++
++	if (new_npages > 0) {
++		err = ibscif_reserve_quota(&new_npages);
++		if (err)
++			return err;
++	}
++
++	new_sq = ibscif_alloc_wr(&qp->sq, sq_size, sq_bytes);
++	new_rq = ibscif_alloc_wr(&qp->rq, rq_size, rq_bytes);
++	new_iq = ibscif_alloc_wr(&qp->iq, iq_size, iq_bytes);
++	if (IS_ERR(new_sq) || IS_ERR(new_rq) || IS_ERR(new_iq))
++		goto out;
++
++	ibscif_move_wr(&qp->sq, new_sq, sq_size);
++	ibscif_move_wr(&qp->rq, new_rq, rq_size);
++	ibscif_move_wr(&qp->iq, new_iq, iq_size);
++
++	if (new_npages < 0)
++		ibscif_release_quota(-new_npages);
++
++	return 0;
++out:
++	if (new_sq && !IS_ERR(new_sq))
++		vfree(new_sq);
++	if (new_rq && !IS_ERR(new_rq))
++		vfree(new_rq);
++	if (new_iq && !IS_ERR(new_iq))
++		vfree(new_iq);
++
++	return -ENOMEM;
++}
++
++static int ibscif_init_wqs(struct ibscif_qp *qp, struct ib_qp_init_attr *attr)
++{
++	spin_lock_init(&qp->sq.lock);
++	spin_lock_init(&qp->rq.lock);
++	spin_lock_init(&qp->iq.lock);
++
++	qp->sq.qp = qp;
++	qp->rq.qp = qp;
++	qp->iq.qp = qp;
++
++	qp->sq.wirestate = &qp->wire.sq;
++	qp->iq.wirestate = &qp->wire.iq;
++
++	qp->sq.max_sge = attr->cap.max_send_sge;
++	qp->rq.max_sge = attr->cap.max_recv_sge;
++	qp->iq.max_sge = 1;
++
++	qp->sq.wr_size = sizeof *qp->sq.wr + (sizeof *qp->sq.wr->ds_list * qp->sq.max_sge);
++	qp->rq.wr_size = sizeof *qp->rq.wr + (sizeof *qp->rq.wr->ds_list * qp->rq.max_sge);
++	qp->iq.wr_size = sizeof *qp->iq.wr + (sizeof *qp->iq.wr->ds_list * qp->iq.max_sge);
++
++	return ibscif_resize_qp(qp, attr->cap.max_send_wr, attr->cap.max_recv_wr, (rma_threshold==0x7FFFFFFF)?0:attr->cap.max_send_wr);
++}
++
++static void ibscif_reset_tx_state(struct ibscif_tx_state *tx)
++{
++	tx->next_seq	       = 1;
++	tx->last_ack_seq_recvd = 0;
++	tx->next_msg_id	       = 0;
++}
++
++static void ibscif_reset_rx_state(struct ibscif_rx_state *rx)
++{
++	rx->last_in_seq	       = 0;
++	rx->last_seq_acked     = 0;
++	rx->defer_in_process   = 0;
++}
++
++static void ibscif_reset_wirestate(struct ibscif_wirestate *wirestate)
++{
++	ibscif_reset_tx_state(&wirestate->tx);
++	ibscif_reset_rx_state(&wirestate->rx);
++}
++
++static void ibscif_reset_wire(struct ibscif_wire *wire)
++{
++	ibscif_reset_wirestate(&wire->sq);
++	ibscif_reset_wirestate(&wire->iq);
++}
++
++static void ibscif_init_wire(struct ibscif_wire *wire)
++{
++	ibscif_reset_wire(wire);
++}
++
++static void ibscif_query_qp_cap(struct ibscif_qp *qp, struct ib_qp_cap *cap)
++{
++	memset(cap, 0, sizeof *cap);
++	cap->max_send_wr  = qp->sq.size;
++	cap->max_recv_wr  = qp->rq.size;
++	cap->max_send_sge = qp->sq.max_sge;
++	cap->max_recv_sge = qp->rq.max_sge;
++}
++
++struct ib_qp *ibscif_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *attr, struct ib_udata *udata)
++{
++	struct ibscif_dev *dev = to_dev(ibpd->device);
++	struct ibscif_qp *qp;
++	int err;
++
++	if ((attr->qp_type != IB_QPT_RC && attr->qp_type != IB_QPT_UD) ||
++	    (attr->cap.max_send_wr  > MAX_QP_SIZE)    ||
++	    (attr->cap.max_recv_wr  > MAX_QP_SIZE)    ||
++	    (attr->cap.max_send_sge > MAX_SGES)	      ||
++	    (attr->cap.max_recv_sge > MAX_SGES)	      ||
++	    (attr->cap.max_send_wr && !attr->send_cq) ||
++	    (attr->cap.max_recv_wr && !attr->recv_cq))
++		return ERR_PTR(-EINVAL);
++
++	if (!atomic_add_unless(&dev->qp_cnt, 1, MAX_QPS))
++		return ERR_PTR(-EAGAIN);
++
++	qp = kzalloc(sizeof *qp, GFP_KERNEL);
++	if (!qp) {
++		atomic_dec(&dev->qp_cnt);
++		return ERR_PTR(-ENOMEM);
++	}
++
++	qp->local_node_id = dev->node_id;
++
++	kref_init(&qp->ref);
++	init_completion(&qp->done);
++	init_MUTEX(&qp->modify_mutex);
++	spin_lock_init(&qp->lock);
++	ibscif_init_wire(&qp->wire);
++	qp->sq_policy = attr->sq_sig_type;
++	qp->dev	      = dev;
++	qp->mtu	      = IBSCIF_MTU; /* FIXME */
++	qp->state     = QP_IDLE;
++
++	err = ibscif_init_wqs(qp, attr);
++	if (err)
++		goto out;
++
++	ibscif_query_qp_cap(qp, &attr->cap);
++
++	err = ibscif_wiremap_add(qp, &qp->ibqp.qp_num);
++	if (err)
++		goto out;
++
++	qp->magic = QP_MAGIC;
++
++	ibscif_scheduler_add_qp(qp);
++	qp->in_scheduler = 1;
++
++	return &qp->ibqp;
++out:
++	ibscif_destroy_qp(&qp->ibqp);
++	return ERR_PTR(err);
++}
++
++static inline enum ib_qp_state to_ib_qp_state(enum ibscif_qp_state state)
++{
++	switch (state) {
++	case QP_IDLE:		return IB_QPS_INIT;
++	case QP_CONNECTED:	return IB_QPS_RTS;
++	case QP_DISCONNECT:	return IB_QPS_SQD;
++	case QP_ERROR:		return IB_QPS_ERR;
++	case QP_RESET:		return IB_QPS_RESET;
++	default:		return -1;
++	}
++}
++
++static inline enum ibscif_qp_state to_ibscif_qp_state(enum ib_qp_state state)
++{
++	switch (state) {
++	case IB_QPS_INIT:	return QP_IDLE;
++	case IB_QPS_RTS:	return QP_CONNECTED;
++	case IB_QPS_SQD:	return QP_DISCONNECT;
++	case IB_QPS_ERR:	return QP_ERROR;
++	case IB_QPS_RESET:	return QP_RESET;
++	case IB_QPS_RTR:	return QP_IGNORE;
++	default:		return -1;
++	}
++}
++
++/* Caller must provide proper synchronization. */
++static void __ibscif_query_qp(struct ibscif_qp *qp, struct ib_qp_attr *attr, struct ib_qp_init_attr *init_attr)
++{
++	struct ib_qp_cap cap;
++
++	ibscif_query_qp_cap(qp, &cap);
++
++	if (attr) {
++		attr->qp_state		 = to_ib_qp_state(qp->state);
++		attr->cur_qp_state	 = attr->qp_state;
++		attr->port_num		 = 1;
++		attr->path_mtu		 = qp->mtu;
++		attr->dest_qp_num	 = qp->remote_qpn;
++		attr->qp_access_flags	 = qp->access;
++		attr->max_rd_atomic	 = qp->max_or;
++		attr->max_dest_rd_atomic = qp->iq.size;
++		attr->cap		 = cap;
++	}
++
++	if (init_attr) {
++		init_attr->qp_type	 = qp->ibqp.qp_type;
++		init_attr->sq_sig_type	 = qp->sq_policy;
++		init_attr->cap		 = cap;
++	}
++}
++
++int ibscif_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr)
++{
++	struct ibscif_qp *qp = to_qp(ibqp);
++
++	memset(attr, 0, sizeof *attr);
++	memset(init_attr, 0, sizeof *init_attr);
++
++	spin_lock_bh(&qp->lock);
++	__ibscif_query_qp(qp, attr, init_attr);
++	spin_unlock_bh(&qp->lock);
++
++	return 0;
++}
++
++static int ibscif_flush_wq(struct ibscif_wq *wq, struct ibscif_cq *cq)
++{
++	struct ibscif_wr *wr;
++	struct ibscif_wc *wc;
++	int i, num_wr, err;
++
++	/* Prevent divide by zero traps on wrap math. */
++	if (!wq->size)
++		return 0;
++
++	spin_lock_bh(&wq->lock);
++	for (i = (wq->head + wq->completions) % wq->size, num_wr = 0;
++	     wq->depth && (wq->completions != wq->depth);
++	     i = (i + 1) % wq->size, num_wr++) {
++
++		wr = ibscif_get_wr(wq, i);
++
++		ibscif_clear_ds_refs(wr->ds_list, wr->num_ds);
++
++		if (!cq) {
++			wq->completions++;
++			continue;
++		}
++
++		err = ibscif_reserve_cqe(cq, &wc);
++		if (err) {
++			num_wr = err;
++			break;
++		}
++
++		wc->ibwc.qp	  = &wq->qp->ibqp;
++		wc->ibwc.src_qp	  = wq->qp->remote_qpn;
++		wc->ibwc.wr_id	  = wr->id;
++		wc->ibwc.opcode	  = is_rq(wq) ? IB_WC_RECV : to_ib_wc_opcode(wr->opcode);
++		wc->ibwc.status	  = IB_WC_WR_FLUSH_ERR;
++		wc->ibwc.ex.imm_data = 0;
++		wc->ibwc.byte_len = 0;
++		wc->ibwc.port_num = 1;
++
++		wc->wq	 = wq;
++		wc->reap = wq->reap + 1;
++		wq->reap = 0;
++		wq->completions++;
++
++		ibscif_append_cqe(cq, wc, 0);
++	}
++	spin_unlock_bh(&wq->lock);
++
++	if (num_wr && cq)
++		ibscif_notify_cq(cq);
++
++	return num_wr;
++}
++
++static void ibscif_flush_wqs(struct ibscif_qp *qp)
++{
++	int ret;
++
++	ret = ibscif_flush_wq(&qp->sq, to_cq(qp->ibqp.send_cq));
++	if (ret) /* A clean SQ flush should have done nothing. */
++		qp->state = QP_ERROR;
++
++	ret = ibscif_flush_wq(&qp->rq, to_cq(qp->ibqp.recv_cq));
++	if (ret < 0)
++		qp->state = QP_ERROR;
++
++	ibscif_flush_wq(&qp->iq, NULL);
++}
++
++static void ibscif_reset_wq(struct ibscif_wq *wq, struct ibscif_cq *cq)
++{
++	ibscif_clear_cqes(cq, wq);
++
++	wq->head	= 0;
++	wq->tail	= 0;
++	wq->depth	= 0;
++	wq->reap	= 0;
++	wq->next_wr	= 0;
++	wq->next_msg_id	= 0;
++	wq->completions = 0;
++}
++
++static void ibscif_reset_wqs(struct ibscif_qp *qp)
++{
++	ibscif_reset_wq(&qp->sq, to_cq(qp->ibqp.send_cq));
++	ibscif_reset_wq(&qp->rq, to_cq(qp->ibqp.recv_cq));
++	ibscif_reset_wq(&qp->iq, NULL);
++}
++
++static void ibscif_qp_event(struct ibscif_qp *qp, enum ib_event_type event)
++{
++	if (qp->ibqp.event_handler) {
++		struct ib_event record;
++		record.event	  = event;
++		record.device	  = qp->ibqp.device;
++		record.element.qp = &qp->ibqp;
++		qp->ibqp.event_handler(&record, qp->ibqp.qp_context);
++	}
++}
++
++/* Caller must provide proper synchronization. */
++static void ibscif_qp_error(struct ibscif_qp *qp)
++{
++	if (qp->state == QP_ERROR)
++		return;
++
++	if (qp->state == QP_CONNECTED)
++		ibscif_send_disconnect(qp, IBSCIF_REASON_DISCONNECT);
++
++	qp->state = QP_ERROR;
++
++	ibscif_flush_wqs(qp);
++
++	ibscif_cm_async_callback(qp->cm_context);
++	qp->cm_context = NULL;
++
++	/* don't generate the error event because transitioning to IB_QPS_ERR 
++           state is normal when a QP is disconnected */
++
++	//ibscif_qp_event(qp, IB_EVENT_QP_FATAL);
++}
++
++/* Caller must provide proper synchronization. */
++static void ibscif_qp_reset(struct ibscif_qp *qp)
++{
++	if (qp->state == QP_RESET)
++		return;
++
++	if (qp->state == QP_CONNECTED)
++		ibscif_send_disconnect(qp, IBSCIF_REASON_DISCONNECT);
++
++	ibscif_reset_wqs(qp);
++	ibscif_reset_wire(&qp->wire);
++
++	ibscif_cm_async_callback(qp->cm_context);
++	qp->cm_context = NULL;
++
++	qp->state = QP_RESET;
++}
++
++/* Caller must provide proper synchronization. */
++void ibscif_qp_idle(struct ibscif_qp *qp)
++{
++	if (qp->state == QP_IDLE)
++		return;
++
++	ibscif_reset_wqs(qp);
++	ibscif_reset_wire(&qp->wire);
++
++	qp->state = QP_IDLE;
++}
++
++/* Caller must provide proper synchronization. */
++static void ibscif_qp_connect(struct ibscif_qp *qp, enum ibscif_qp_state cur_state)
++{
++	if (cur_state == QP_CONNECTED)
++		return;
++
++	qp->loopback = (qp->ibqp.qp_type != IB_QPT_UD) && !scif_loopback && (qp->local_node_id == qp->remote_node_id);
++	qp->conn = NULL;
++
++	qp->state = QP_CONNECTED;
++}
++
++/* Caller must provide proper synchronization. */
++static void ibscif_qp_local_disconnect(struct ibscif_qp *qp, enum ibscif_reason reason)
++{
++	if (qp->state != QP_CONNECTED)
++		return;
++
++	if (reason != IBSCIF_REASON_DISCONNECT)
++		printk(KERN_NOTICE PFX "QP %u sending abnormal disconnect %d\n",
++		       qp->ibqp.qp_num, reason);
++
++	qp->state = QP_DISCONNECT;
++	ibscif_send_disconnect(qp, reason);
++
++	ibscif_flush_wqs(qp);
++
++	ibscif_cm_async_callback(qp->cm_context);
++	qp->cm_context = NULL;
++
++	if (reason != IBSCIF_REASON_DISCONNECT) {
++		qp->state = QP_ERROR;
++		ibscif_qp_event(qp, IB_EVENT_QP_FATAL);
++	} else
++		ibscif_qp_idle(qp);
++}
++
++void ibscif_qp_internal_disconnect(struct ibscif_qp *qp, enum ibscif_reason reason)
++{
++	spin_lock_bh(&qp->lock);
++	ibscif_qp_local_disconnect(qp, reason);
++	spin_unlock_bh(&qp->lock);
++}
++
++void ibscif_qp_remote_disconnect(struct ibscif_qp *qp, enum ibscif_reason reason)
++{
++	if (reason != IBSCIF_REASON_DISCONNECT)
++		printk(KERN_NOTICE PFX "QP %u received abnormal disconnect %d\n",
++		       qp->ibqp.qp_num, reason);
++
++	if (qp->loopback) {
++		/*
++		 * Prevent simultaneous loopback QP disconnect deadlocks.
++		 * This is no worse than dropping a disconnect packet.
++		 */
++		if (!spin_trylock_bh(&qp->lock))
++			return;
++	} else
++		spin_lock_bh(&qp->lock);
++
++	if (qp->state != QP_CONNECTED) {
++		spin_unlock_bh(&qp->lock);
++		return;
++	}
++
++	ibscif_flush_wqs(qp);
++
++	ibscif_cm_async_callback(qp->cm_context);
++	qp->cm_context = NULL;
++
++	if (reason != IBSCIF_REASON_DISCONNECT) {
++		qp->state = QP_ERROR;
++		ibscif_qp_event(qp, IB_EVENT_QP_FATAL);
++	} else
++		qp->state = QP_IDLE;
++
++	spin_unlock_bh(&qp->lock);
++}
++
++#define	MODIFY_ALLOWED					1
++#define	MODIFY_INVALID					0
++#define	VALID_TRANSITION(next_state, modify_allowed)	{ 1, modify_allowed },
++#define	INVAL_TRANSITION(next_state)			{ 0, MODIFY_INVALID },
++#define	START_STATE(current_state)			{
++#define	CEASE_STATE(current_state)			},
++
++static const struct {
++
++	int valid;
++	int modify_allowed;
++
++} qp_transition[NR_QP_STATES][NR_QP_STATES] = {
++
++	START_STATE(QP_IDLE)
++		VALID_TRANSITION( QP_IDLE,	 MODIFY_ALLOWED	)	
++		VALID_TRANSITION( QP_CONNECTED,	 MODIFY_ALLOWED	)
++		INVAL_TRANSITION( QP_DISCONNECT			)
++		VALID_TRANSITION( QP_ERROR,	 MODIFY_INVALID	)
++		VALID_TRANSITION( QP_RESET,	 MODIFY_INVALID	)	
++		VALID_TRANSITION( QP_IGNORE,	 MODIFY_ALLOWED	)	
++	CEASE_STATE(QP_IDLE)
++
++	START_STATE(QP_CONNECTED)
++		INVAL_TRANSITION( QP_IDLE			)
++		VALID_TRANSITION( QP_CONNECTED,	 MODIFY_INVALID	)
++		VALID_TRANSITION( QP_DISCONNECT, MODIFY_INVALID	)
++		VALID_TRANSITION( QP_ERROR,	 MODIFY_INVALID	)
++		VALID_TRANSITION( QP_RESET,	 MODIFY_INVALID	)	
++		VALID_TRANSITION( QP_IGNORE,	 MODIFY_ALLOWED	)	
++	CEASE_STATE(QP_CONNECTED)
++
++	START_STATE(QP_DISCONNECT) /* Automatic transition to IDLE */
++		INVAL_TRANSITION( QP_IDLE			)
++		INVAL_TRANSITION( QP_CONNECTED			)
++		INVAL_TRANSITION( QP_DISCONNECT			)
++		INVAL_TRANSITION( QP_ERROR			)
++		INVAL_TRANSITION( QP_RESET			)	
++		INVAL_TRANSITION( QP_IGNORE			)	
++	CEASE_STATE(QP_DISCONNECT)
++
++	START_STATE(QP_ERROR)
++		VALID_TRANSITION( QP_IDLE,	 MODIFY_INVALID	)
++		INVAL_TRANSITION( QP_CONNECTED			)
++		INVAL_TRANSITION( QP_DISCONNECT			)
++		VALID_TRANSITION( QP_ERROR,	 MODIFY_INVALID	)
++		VALID_TRANSITION( QP_RESET,	 MODIFY_INVALID	)	
++		VALID_TRANSITION( QP_IGNORE,	 MODIFY_ALLOWED	)	
++	CEASE_STATE(QP_ERROR)
++
++	START_STATE(QP_RESET)
++		VALID_TRANSITION( QP_IDLE,	 MODIFY_ALLOWED	)
++		INVAL_TRANSITION( QP_CONNECTED			)
++		INVAL_TRANSITION( QP_DISCONNECT			)
++		VALID_TRANSITION( QP_ERROR,	 MODIFY_INVALID	)
++		VALID_TRANSITION( QP_RESET,	 MODIFY_INVALID	)	
++		VALID_TRANSITION( QP_IGNORE,	 MODIFY_ALLOWED	)	
++	CEASE_STATE(QP_RESET)
++};
++
++int ibscif_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata)
++{
++	struct ibscif_qp *qp = to_qp(ibqp);
++	enum ibscif_qp_state cur_state, new_state;
++	int sq_size, rq_size, max_or, max_ir;
++	int err = -EINVAL;
++
++	/*
++	 * Mutex prevents simultaneous user-mode QP modifies.
++	 */
++	down(&qp->modify_mutex);
++
++	cur_state = qp->state;
++
++	if ((attr_mask & IB_QP_CUR_STATE) && (to_ibscif_qp_state(attr->cur_qp_state) != cur_state))
++		goto out;
++	if ((attr_mask & IB_QP_PORT) && (attr->port_num == 0 || attr->port_num > 1))
++		goto out;
++
++	/* Validate any state transition. */
++	if (attr_mask & IB_QP_STATE) {
++		new_state = to_ibscif_qp_state(attr->qp_state);
++		if (new_state < 0 || new_state >= NR_QP_STATES)
++			goto out;
++
++		if (!qp_transition[cur_state][new_state].valid)
++			goto out;
++	} else
++		new_state = cur_state;
++
++	/* Validate any attribute modify request. */
++	if (attr_mask & (IB_QP_AV		  |
++			 IB_QP_CAP		  |
++			 IB_QP_DEST_QPN		  |
++			 IB_QP_ACCESS_FLAGS	  |
++			 IB_QP_MAX_QP_RD_ATOMIC	  |
++			 IB_QP_MAX_DEST_RD_ATOMIC)) {
++
++		if (!qp_transition[cur_state][new_state].modify_allowed)
++			goto out;
++
++		if ((attr_mask & IB_QP_AV)  && (attr->ah_attr.ah_flags & IB_AH_GRH) && check_grh) {
++			int remote_node_id = IBSCIF_LID_TO_NODE_ID(attr->ah_attr.dlid);
++			struct ibscif_conn *conn;
++			union ib_gid *dgid;
++
++			if (verbose)
++				printk(KERN_INFO PFX "%s: %d-->%d, DGID=%llx:%llx\n",
++					__func__, qp->local_node_id, remote_node_id,
++					__be64_to_cpu(attr->ah_attr.grh.dgid.global.subnet_prefix),
++					__be64_to_cpu(attr->ah_attr.grh.dgid.global.interface_id));
++
++			if (remote_node_id == qp->local_node_id) {
++				dgid = &qp->dev->gid;
++			}
++			else {
++				spin_lock(&qp->lock);
++				conn = ibscif_get_conn(qp->local_node_id, remote_node_id, 0);
++				spin_unlock(&qp->lock);
++				if (!conn) {
++					if (verbose)
++						printk(KERN_INFO PFX "%s: failed to make SCIF connection %d-->%d.\n",
++							__func__, qp->local_node_id, remote_node_id);
++					goto out;
++				}
++				dgid = &conn->remote_gid;
++				ibscif_put_conn(conn);
++			}
++
++			if (verbose)
++				printk(KERN_INFO PFX "%s: local GID[%d]=%llx:%llx\n",
++					__func__, remote_node_id,
++					__be64_to_cpu(dgid->global.subnet_prefix),
++					__be64_to_cpu(dgid->global.interface_id));
++
++			if (memcmp(dgid, &attr->ah_attr.grh.dgid, sizeof(*dgid))) {
++				if (verbose)
++					printk(KERN_INFO PFX "%s: connecting to DGID outside the box is unsupported.\n",
++						__func__);
++				goto out;
++			}
++		}
++
++		if (attr_mask & IB_QP_CAP) {
++			sq_size = attr->cap.max_send_wr;
++			rq_size = attr->cap.max_recv_wr;
++			if ((sq_size > MAX_QP_SIZE) || (rq_size > MAX_QP_SIZE))
++				goto out;
++		} else {
++			sq_size = qp->sq.size;
++			rq_size = qp->rq.size;
++		}
++		if ((sq_size && !qp->ibqp.send_cq) || (rq_size && !qp->ibqp.recv_cq))
++			goto out;
++
++		max_or = (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) ?
++			  attr->max_rd_atomic : qp->max_or;
++		max_ir = (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) ?
++			  attr->max_dest_rd_atomic : qp->iq.size;
++
++		if (rma_threshold<0x7FFFFFFF && max_ir>MAX_IR && max_ir>=qp->sq.size)
++			max_ir -= qp->sq.size;
++
++		if ((max_or > MAX_OR) || (max_ir > MAX_IR))
++			goto out;
++
++		/* Validation successful; resize the QP as needed. */
++		err = ibscif_resize_qp(qp, sq_size, rq_size, max_ir + ((rma_threshold==0x7FFFFFFFF)?0:sq_size));
++		if (err)
++			goto out;
++
++		/* No failure paths below the QP resize. */
++
++		qp->max_or = max_or;
++
++		if (attr_mask & IB_QP_ACCESS_FLAGS)
++			qp->access = attr->qp_access_flags;
++
++		if (attr_mask & IB_QP_DEST_QPN)
++			qp->remote_qpn = attr->dest_qp_num;
++
++		if (attr_mask & IB_QP_AV)
++			qp->remote_node_id = IBSCIF_LID_TO_NODE_ID(attr->ah_attr.dlid);
++	}
++
++	err = 0;
++	if (attr_mask & IB_QP_STATE) {
++
++		/* Perform state change processing. */
++		spin_lock_bh(&qp->lock);
++		switch (new_state) {
++		case QP_IDLE:
++			ibscif_qp_idle(qp);
++			break;
++		case QP_CONNECTED:
++			ibscif_qp_connect(qp, cur_state);
++			break;
++		case QP_DISCONNECT:
++			ibscif_qp_local_disconnect(qp, IBSCIF_REASON_DISCONNECT);
++			break;
++		case QP_ERROR:
++			ibscif_qp_error(qp);
++			break;
++		case QP_RESET:
++			ibscif_qp_reset(qp);
++			break;
++		default:
++			break;
++		}
++		spin_unlock_bh(&qp->lock);
++
++		/* scif_connect() can not be called with spin_lock_bh() held */
++		if (ibqp->qp_type != IB_QPT_UD && new_state == QP_CONNECTED && !qp->loopback) {
++			int flag = (qp->ibqp.qp_num > qp->remote_qpn);
++			spin_lock(&qp->lock);
++			qp->conn = ibscif_get_conn( qp->local_node_id, qp->remote_node_id, flag );
++			spin_unlock(&qp->lock);
++		}
++	}
++
++	__ibscif_query_qp(qp, attr, NULL);
++out:
++	up(&qp->modify_mutex);
++	return err;
++}
++
++void ibscif_complete_qp(struct kref *ref)
++{
++	struct ibscif_qp *qp = container_of(ref, struct ibscif_qp, ref);
++	complete(&qp->done);
++}
++
++int ibscif_destroy_qp(struct ib_qp *ibqp)
++{
++	struct ibscif_qp *qp = to_qp(ibqp);
++	struct ibscif_dev *dev = qp->dev;
++	int i, j;
++	struct ibscif_conn *conn[IBSCIF_MAX_DEVICES];
++
++	if (qp->cm_context) {
++		ibscif_cm_async_callback(qp->cm_context);
++		qp->cm_context = NULL;
++	}
++
++	if (ibqp->qp_num)
++		ibscif_wiremap_del(ibqp->qp_num);
++
++	if (qp->in_scheduler)
++		ibscif_scheduler_remove_qp(qp);
++
++	spin_lock_bh(&qp->lock);
++	if (qp->state == QP_CONNECTED)
++		ibscif_send_disconnect(qp, IBSCIF_REASON_DISCONNECT);
++	spin_unlock_bh(&qp->lock);
++
++	ibscif_put_qp(qp);
++	wait_for_completion(&qp->done);
++
++	ibscif_flush_wqs(qp);
++	ibscif_reset_wqs(qp);
++	ibscif_reset_wire(&qp->wire);
++
++	if (qp->sq.wr)
++		vfree(qp->sq.wr);
++	if (qp->rq.wr)
++		vfree(qp->rq.wr);
++	if (qp->iq.wr)
++		vfree(qp->iq.wr);
++
++	ibscif_release_quota((PAGE_ALIGN(qp->sq.size * qp->sq.wr_size) +
++			     PAGE_ALIGN(qp->rq.size * qp->rq.wr_size) +
++			     PAGE_ALIGN(qp->iq.size * qp->iq.wr_size)) >> PAGE_SHIFT);
++
++	atomic_dec(&dev->qp_cnt);
++
++	ibscif_put_conn(qp->conn);
++
++	if (qp->ibqp.qp_type == IB_QPT_UD) {
++		spin_lock_bh(&qp->lock);
++		for (i=0, j=0; i<IBSCIF_MAX_DEVICES; i++) {
++			if (qp->ud_conn[i]) {
++				conn[j++] = qp->ud_conn[i];
++				qp->ud_conn[i] = NULL;
++			}
++		}
++		spin_unlock_bh(&qp->lock);
++
++		/* ibscif_put_conn() may call scif_unregister(), should not hold a lock */
++		for (i=0; i<j; i++)
++			ibscif_put_conn(conn[i]);
++	}
++
++	kfree(qp);
++	return 0;
++}
++
++void ibscif_qp_add_ud_conn(struct ibscif_qp *qp, struct ibscif_conn *conn)
++{
++	int i;
++
++	if (!qp || !conn)
++		return;
++
++	if (qp->ibqp.qp_type != IB_QPT_UD)
++		return;
++
++	
++	spin_lock_bh(&qp->lock);
++
++	for (i=0; i<IBSCIF_MAX_DEVICES; i++) {
++		if (qp->ud_conn[i] == conn)
++			goto done;
++	}
++
++	for (i=0; i<IBSCIF_MAX_DEVICES; i++) {
++		if (qp->ud_conn[i] == NULL) {
++			atomic_inc(&conn->refcnt);
++			qp->ud_conn[i] = conn;
++			break;
++		}
++	}
++done:
++	spin_unlock_bh(&qp->lock);
++}
++
+diff -urN a7/drivers/infiniband/hw/scif/ibscif_scheduler.c a8/drivers/infiniband/hw/scif/ibscif_scheduler.c
+--- a7/drivers/infiniband/hw/scif/ibscif_scheduler.c	1969-12-31 16:00:00.000000000 -0800
++++ a8/drivers/infiniband/hw/scif/ibscif_scheduler.c	2015-02-23 10:14:37.488809663 -0800
+@@ -0,0 +1,195 @@
++/*
++ * Copyright (c) 2008 Intel Corporation.  All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above copyright
++ *        notice, this list of conditions and the following disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *        copyright notice, this list of conditions and the following
++ *        disclaimer in the documentation and/or other materials
++ *        provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include "ibscif_driver.h"
++
++static int ibscif_schedule_tx(struct ibscif_wq *wq, int max_send)
++{
++	struct ibscif_tx_state *tx = &wq->wirestate->tx;
++	struct ibscif_qp *qp = wq->qp;
++	struct ibscif_wr *wr;
++	int index, sent = 0;
++
++	while ((wq->next_wr != wq->tail) && ibscif_tx_window(tx) && max_send) {
++
++		index = wq->next_wr;
++		wr = ibscif_get_wr(wq, index);
++
++		/*
++		 * Ack processing can reschedule a WR that is in retry; only process
++		 * it if we are all caught up.  Also, do not start a fenced WR until
++		 * all prior RDMA read and atomic operations have completed.
++		 */
++		if ((wr->flags & IB_SEND_FENCE) && atomic_read(&qp->or_depth) &&
++		    (wr->state == WR_WAITING))
++			break;
++
++		switch (wr->opcode) {
++		case WR_RDMA_READ:
++		case WR_ATOMIC_CMP_AND_SWP:
++		case WR_ATOMIC_FETCH_AND_ADD:
++			/* Throttle IQ stream requests if needed. */
++			if (wr->state == WR_WAITING) {
++				if (atomic_read(&qp->or_depth) == qp->max_or)
++					return 0;
++				atomic_inc(&qp->or_depth);
++			}
++			/* Fall through. */
++		case WR_SEND:
++		case WR_SEND_WITH_IMM:
++		case WR_RDMA_WRITE:
++		case WR_RDMA_WRITE_WITH_IMM:
++		case WR_RDMA_READ_RSP:
++		case WR_ATOMIC_RSP:
++		case WR_RMA_RSP:
++			sent = ibscif_xmit_wr(wq, wr, min((u32)max_send, ibscif_tx_window(tx)),
++					     0, tx->next_seq, &tx->next_seq);
++			break;
++		case WR_UD:
++			sent = ibscif_xmit_wr(wq, wr, min((u32)max_send, ibscif_tx_window(tx)),
++					     0, 0, NULL);
++			break;
++		default:
++			printk(KERN_ERR PFX "%s() botch: found opcode %d on work queue\n",
++			       __func__, wr->opcode);
++			return -EOPNOTSUPP;
++		}
++
++		/* If an IQ stream request did not get started we need to back off or_depth. */
++		if ((wr->state == WR_WAITING) &&
++		    ((wr->opcode == WR_RDMA_READ) ||
++		     (wr->opcode == WR_ATOMIC_CMP_AND_SWP) || (wr->opcode == WR_ATOMIC_FETCH_AND_ADD)))
++			atomic_dec(&qp->or_depth);
++
++		if (sent < 0)
++			return sent;
++
++		 max_send -= sent;
++
++		/*
++		 * The tx engine bumps next_wr when finished sending a whole WR.
++		 * Bail if it didn't this time around.
++		 */
++		if (wq->next_wr == index)
++			break;
++	}
++
++	return 0;
++}
++
++static int ibscif_schedule_wq(struct ibscif_wq *wq)
++{
++	int max_send, err = 0;
++	int need_call_sq_completions = 0;
++
++	/* Ignore loopback QPs that may be scheduled by retry processing. */
++	if (wq->qp->loopback)
++		return 0;
++
++	if (!(max_send = atomic_read(&wq->qp->dev->available)))
++		return -EBUSY;
++
++	spin_lock(&wq->lock);
++	err = ibscif_schedule_tx(wq, max_send);
++	need_call_sq_completions = wq->fast_rdma_completions;
++	wq->fast_rdma_completions = 0;
++	spin_unlock(&wq->lock);
++
++	if (unlikely(err))
++		ibscif_qp_internal_disconnect(wq->qp, IBSCIF_REASON_QP_FATAL);
++
++	if (fast_rdma && need_call_sq_completions) 
++		ibscif_process_sq_completions(wq->qp);
++		
++	return err;
++}
++
++void ibscif_schedule(struct ibscif_wq *wq)
++{
++	struct ibscif_dev *dev;
++	struct list_head processed;
++
++	if (wq->qp->loopback) {
++		ibscif_loopback(wq);
++		return;
++	}
++	dev = wq->qp->dev;
++
++	if (!ibscif_schedule_wq(wq))
++		goto out;
++
++	while (atomic_xchg(&dev->was_new, 0)) {
++		/* Bail if the device is busy. */
++		if (down_trylock(&dev->mutex))
++			goto out;
++
++		/*
++		 * Schedule each WQ on the device and move it to the processed list.
++		 * When complete, append the processed list to the device WQ list.
++		 */
++		INIT_LIST_HEAD(&processed);
++		while (!list_empty(&dev->wq_list)) {
++			wq = list_entry(dev->wq_list.next, typeof(*wq), entry);
++			if (!ibscif_schedule_wq(wq)) {
++				DEV_STAT(dev, sched_exhaust++);
++				list_splice(&processed, dev->wq_list.prev);
++				up(&dev->mutex);
++				goto out;
++			}
++			list_move_tail(&wq->entry, &processed);
++		}
++		list_splice(&processed, dev->wq_list.prev);
++
++		up(&dev->mutex);
++	}
++	return;
++out:
++	atomic_inc(&dev->was_new);
++}
++
++void ibscif_scheduler_add_qp(struct ibscif_qp *qp)
++{
++	struct ibscif_dev *dev = qp->dev;
++
++	down(&dev->mutex);
++	list_add_tail(&qp->sq.entry, &dev->wq_list);
++	list_add_tail(&qp->iq.entry, &dev->wq_list);
++	up(&dev->mutex);
++}
++
++void ibscif_scheduler_remove_qp(struct ibscif_qp *qp)
++{
++	struct ibscif_dev *dev = qp->dev;
++
++	down(&dev->mutex);
++	list_del(&qp->sq.entry);
++	list_del(&qp->iq.entry);
++	up(&dev->mutex);
++}
+diff -urN a7/drivers/infiniband/hw/scif/ibscif_util.c a8/drivers/infiniband/hw/scif/ibscif_util.c
+--- a7/drivers/infiniband/hw/scif/ibscif_util.c	1969-12-31 16:00:00.000000000 -0800
++++ a8/drivers/infiniband/hw/scif/ibscif_util.c	2015-02-23 10:14:37.488809663 -0800
+@@ -0,0 +1,623 @@
++/*
++ * Copyright (c) 2008 Intel Corporation.  All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above copyright
++ *        notice, this list of conditions and the following disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *        copyright notice, this list of conditions and the following
++ *        disclaimer in the documentation and/or other materials
++ *        provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include "ibscif_driver.h"
++
++#define IBSCIF_CONN_IDLE 0
++#define IBSCIF_CONN_REQ_SENT 1
++#define IBSCIF_CONN_REQ_RCVD 2
++#define IBSCIF_CONN_ESTABLISHED 3
++#define IBSCIF_CONN_ACTIVE 4
++
++DEFINE_SPINLOCK(conn_state_lock);
++static int conn_state[IBSCIF_MAX_DEVICES][IBSCIF_MAX_DEVICES];
++
++#define IBSCIF_CONN_REP 1
++#define IBSCIF_CONN_REJ 2
++#define IBSCIF_CONN_ERR 3
++
++struct ibscif_conn_resp {
++	int cmd;
++	union ib_gid gid;
++};
++
++void ibscif_do_accept(struct ibscif_dev *dev)
++{
++	struct scif_portID peer;
++	scif_epd_t ep;
++	struct ibscif_conn *conn;
++	int ret;
++	struct ibscif_conn_resp resp;
++	int resp_size;
++
++	if (check_grh)
++		resp_size = sizeof(resp);
++	else
++		resp_size = sizeof(int);
++
++	ret = scif_accept(dev->listen_ep, &peer, &ep, SCIF_ACCEPT_SYNC);
++	if (ret) {
++		printk(KERN_ALERT PFX "%s: scif_accept returns %ld\n", __func__, PTR_ERR(ep));
++		return;
++	}
++
++	if (verbose)
++		printk(KERN_INFO PFX "%s: %d<--%d\n", __func__, dev->node_id, peer.node);
++
++	if (check_grh)
++		memcpy(&resp.gid, &dev->gid, sizeof(resp.gid));
++
++	spin_lock(&conn_state_lock);
++	switch (conn_state[dev->node_id][peer.node]) {
++	  case IBSCIF_CONN_IDLE:
++		conn_state[dev->node_id][peer.node] = IBSCIF_CONN_REQ_RCVD;
++		resp.cmd = IBSCIF_CONN_REP;
++		if (verbose)
++			printk(KERN_INFO PFX "%s: no double connection, accepting\n", __func__);
++		break;
++
++	  case IBSCIF_CONN_REQ_SENT:
++		/* A connection request has been sent, but no response yet. Node id is used to
++ 		 * break the tie when both side send the connection request. One side is allowed
++ 		 * to accept the request and its own request will be rejected by the peer.
++ 		 */
++		if (dev->node_id > peer.node) {
++			resp.cmd = IBSCIF_CONN_REJ;
++			if (verbose)
++				printk(KERN_INFO PFX "%s: double connection, rejecting (peer will accept)\n", __func__);
++		}
++		else if (dev->node_id == peer.node) {
++			conn_state[dev->node_id][peer.node] = IBSCIF_CONN_REQ_RCVD;
++			resp.cmd = IBSCIF_CONN_REP;
++			if (verbose)
++				printk(KERN_INFO PFX "%s: loopback connection, accepting\n", __func__);
++		}
++		else {
++			conn_state[dev->node_id][peer.node] = IBSCIF_CONN_REQ_RCVD;
++			resp.cmd = IBSCIF_CONN_REP;
++			if (verbose)
++				printk(KERN_INFO PFX "%s: double connection, accepting (peer will reject)\n", __func__);
++		}
++		break;
++
++	  case IBSCIF_CONN_REQ_RCVD:
++		if (verbose)
++			printk(KERN_INFO PFX "%s: duplicated connection request, rejecting\n", __func__);
++		resp.cmd = IBSCIF_CONN_REJ;
++		break;
++
++	  case IBSCIF_CONN_ESTABLISHED:
++	  case IBSCIF_CONN_ACTIVE:
++		if (verbose)
++			printk(KERN_INFO PFX "%s: already connected, rejecting\n", __func__);
++		resp.cmd = IBSCIF_CONN_REJ;
++		break;
++
++	  default:
++		if (verbose)
++			printk(KERN_INFO PFX "%s: invalid state: %d\n", __func__, conn_state[dev->node_id][peer.node]);
++		resp.cmd = IBSCIF_CONN_ERR;
++		break;
++	}
++	spin_unlock(&conn_state_lock);
++
++	ret = scif_send(ep, &resp, resp_size, SCIF_SEND_BLOCK);
++	if (ret < 0) {
++		printk(KERN_ALERT PFX "%s: scif_send returns %d\n", __func__, ret);
++		scif_close(ep);
++		return;
++	}
++
++	if (resp.cmd != IBSCIF_CONN_REP) {
++		/* one additional hand shaking to prevent the previous send from being trashed by ep closing */
++		scif_recv(ep, &resp, resp_size, SCIF_RECV_BLOCK);
++		scif_close(ep);
++		return;
++	}
++
++	if (check_grh) {
++		ret = scif_recv(ep, &resp, resp_size, SCIF_RECV_BLOCK);
++		if (ret < 0) {
++			printk(KERN_ALERT PFX "%s: scif_recv returns %d\n", __func__, ret);
++			scif_close(ep);
++			spin_lock(&conn_state_lock);
++			conn_state[dev->node_id][peer.node] = IBSCIF_CONN_IDLE;
++			spin_unlock(&conn_state_lock);
++			return;
++		}
++	}
++
++	conn = kzalloc(sizeof (*conn), GFP_KERNEL);
++	if (!conn) {
++		printk(KERN_ALERT PFX "%s: cannot allocate connection context.\n", __func__);
++		scif_close(ep);
++		spin_lock(&conn_state_lock);
++		conn_state[dev->node_id][peer.node] = IBSCIF_CONN_IDLE;
++		spin_unlock(&conn_state_lock);
++		return;
++	}
++
++	conn->ep = ep;
++	conn->remote_node_id = peer.node;
++	if (check_grh)
++		memcpy(&conn->remote_gid, &resp.gid, sizeof(conn->remote_gid));
++	conn->dev = dev;
++	atomic_set(&conn->refcnt, 0);
++
++	spin_lock(&conn_state_lock);
++	conn_state[dev->node_id][peer.node] = IBSCIF_CONN_ESTABLISHED;
++	spin_unlock(&conn_state_lock);
++
++	if (verbose)
++		printk(KERN_INFO PFX "%s: connection established. ep=%p\n", __func__, ep);
++
++	ibscif_refresh_mreg(conn);
++
++	/* one addition sync to ensure the MRs are registered with the new ep at both side */
++	scif_send(ep, &resp, resp_size, SCIF_SEND_BLOCK);
++	scif_recv(ep, &resp, resp_size, SCIF_RECV_BLOCK);
++
++	list_add(&conn->entry, &dev->conn_list); 
++	ibscif_refresh_pollep_list();
++
++	spin_lock(&conn_state_lock);
++	conn_state[dev->node_id][peer.node] = IBSCIF_CONN_ACTIVE;
++	spin_unlock(&conn_state_lock);
++}
++
++struct ibscif_conn *ibscif_do_connect(struct ibscif_dev *dev, int remote_node_id)
++{
++	struct scif_portID dest;
++	struct ibscif_conn *conn = NULL;
++	int ret;
++	scif_epd_t ep;
++	struct ibscif_conn_resp resp;
++	union ib_gid peer_gid;
++	int resp_size;
++
++	if (check_grh)
++		resp_size = sizeof(resp);
++	else
++		resp_size = sizeof(int);
++
++	if (verbose)
++		printk(KERN_INFO PFX "%s: %d-->%d\n", __func__, dev->node_id, remote_node_id);
++
++	/* Validate remote_node_id for conn_state array check */
++	if ((remote_node_id < 0) || (remote_node_id >= IBSCIF_MAX_DEVICES))
++		return ERR_PTR(-EINVAL);
++
++	spin_lock(&conn_state_lock);
++	if (conn_state[dev->node_id][remote_node_id] != IBSCIF_CONN_IDLE) {
++		spin_unlock(&conn_state_lock);
++		if (verbose)
++			printk(KERN_INFO PFX "%s: connection already in progress, retry\n", __func__);
++		return ERR_PTR(-EAGAIN);
++	}
++	conn_state[dev->node_id][remote_node_id] = IBSCIF_CONN_REQ_SENT;
++	spin_unlock(&conn_state_lock);
++
++	ep = scif_open();
++	if (!ep) /* SCIF API semantics */
++		goto out_state; 
++
++	if (IS_ERR(ep)) /* SCIF emulator semantics */
++		goto out_state;
++
++	dest.node = remote_node_id;
++	dest.port = SCIF_OFED_PORT_0;
++
++	ret = scif_connect(ep, &dest);
++	if (ret < 0) 
++		goto out_close;
++
++	/* Now ret is the port number ep is bound to */
++
++	ret = scif_recv(ep, &resp, resp_size, SCIF_RECV_BLOCK);
++	if (ret < 0) {
++		printk(KERN_ALERT PFX "%s: scif_recv returns %d\n", __func__, ret);
++		goto out_close;
++	}
++
++	if (resp.cmd != IBSCIF_CONN_REP) {
++		scif_send(ep, &resp, resp_size, SCIF_SEND_BLOCK);
++		/* the peer has issued the connection request */
++		if (resp.cmd == IBSCIF_CONN_REJ) {
++			if (verbose)
++				printk(KERN_INFO PFX "%s: rejected by peer due to double connection\n", __func__);
++			scif_close(ep);
++			/* don't reset the state becasue it's used for checking connection state */
++			return ERR_PTR(-EAGAIN);
++		}
++		else {
++			if (verbose)
++				printk(KERN_INFO PFX "%s: rejected by peer due to invalid state\n", __func__);
++			goto out_close;
++		}
++	}
++
++	if (check_grh) {
++		memcpy(&peer_gid, &resp.gid, sizeof(peer_gid));
++		memcpy(&resp.gid, &dev->gid, sizeof(resp.gid));
++		ret = scif_send(ep, &resp, resp_size, SCIF_SEND_BLOCK);
++		if (ret < 0) {
++			printk(KERN_ALERT PFX "%s: scif_send returns %d\n", __func__, ret);
++			goto out_close;
++		}
++	}
++
++	if (verbose)
++		printk(KERN_INFO PFX "%s: connection established. ep=%p\n", __func__, ep);
++
++	spin_lock(&conn_state_lock);
++	conn_state[dev->node_id][remote_node_id] = IBSCIF_CONN_ESTABLISHED;
++	spin_unlock(&conn_state_lock);
++
++	conn = kzalloc(sizeof *conn, GFP_KERNEL);
++	if (!conn) {
++		printk(KERN_ALERT PFX "%s: failed to allocate connection object\n", __func__);
++		goto out_close;
++	}
++
++	conn->ep = ep;
++	conn->remote_node_id = remote_node_id;
++	if (check_grh)
++		memcpy(&conn->remote_gid, &peer_gid, sizeof(conn->remote_gid));
++	conn->dev = dev;
++	atomic_set(&conn->refcnt, 0);
++
++	ibscif_refresh_mreg(conn);
++
++	/* one addition sync to ensure the MRs are registered with the new ep at both side */
++	scif_send(ep, &resp, resp_size, SCIF_SEND_BLOCK);
++	scif_recv(ep, &resp, resp_size, SCIF_RECV_BLOCK);
++
++	list_add_tail(&conn->entry, &dev->conn_list);
++	ibscif_refresh_pollep_list();
++
++	spin_lock(&conn_state_lock);
++	conn_state[dev->node_id][remote_node_id] = IBSCIF_CONN_ACTIVE;
++	spin_unlock(&conn_state_lock);
++
++	return conn;
++
++out_close:
++	scif_close(ep);
++
++out_state:
++	spin_lock(&conn_state_lock);
++	if (conn_state[dev->node_id][remote_node_id] == IBSCIF_CONN_REQ_SENT)
++		conn_state[dev->node_id][remote_node_id] = IBSCIF_CONN_IDLE;
++	spin_unlock(&conn_state_lock);
++	return conn;
++}
++
++struct ibscif_conn *ibscif_get_conn(int node_id, int remote_node_id, int find_local_peer)
++{
++	struct ibscif_dev *cur, *next, *dev = NULL;
++	struct ibscif_conn *conn, *conn1, *conn2;
++	int done=0, err=0, connect_tried=0;
++
++	down(&devlist_mutex);
++	list_for_each_entry_safe(cur, next, &devlist, entry) {
++		if (cur->node_id == node_id) {
++			dev = cur;
++			break;
++		}
++	}
++	up(&devlist_mutex);
++
++	if (!dev)
++		return NULL;
++
++again:
++	conn1 = NULL;
++	conn2 = NULL;
++	down(&dev->mutex);
++	list_for_each_entry(conn, &dev->conn_list, entry)
++	{
++		if (conn->remote_node_id == remote_node_id) {
++			if (node_id == remote_node_id) {
++				if (!conn1) {
++					conn1 = conn;
++					continue;
++				}
++				else {
++					conn2 = conn;
++					break;
++				}
++			}
++			up(&dev->mutex);
++			atomic_inc(&conn->refcnt);
++			if (conn->local_close) {
++				conn->local_close = 0;
++				ibscif_send_reopen(conn);
++			}
++			return conn;
++		}
++	}
++	up(&dev->mutex);
++
++	/* for loopback connections, we must wait for both endpoints be in the list to ensure that
++ 	 * different endpoints are assigned to the two sides
++ 	 */
++	if (node_id == remote_node_id) {
++		if (conn1 && conn2) {
++			conn = find_local_peer ? conn2 : conn1;
++			atomic_inc(&conn->refcnt);
++			if (conn->local_close) {
++				conn->local_close = 0;
++				ibscif_send_reopen(conn);
++			}
++			return conn;
++		}
++		else if (conn1) {
++			schedule();
++			goto again;
++		}
++	}
++
++	if (connect_tried) {
++		printk(KERN_ALERT PFX "%s: ERROR: cannot get connection (%d-->%d) after waiting, state=%d\n", 
++				__func__, dev->node_id, remote_node_id, err-1);
++		return NULL;
++	}
++
++	conn = ibscif_do_connect(dev, remote_node_id);
++
++	/* If a connection is in progress, wait for its finish */
++	if (conn == ERR_PTR(-EAGAIN)) {
++	    while (!done && !err) {
++		spin_lock(&conn_state_lock);
++		switch (conn_state[node_id][remote_node_id]) {
++		  case IBSCIF_CONN_REQ_SENT:
++		  case IBSCIF_CONN_REQ_RCVD:
++		  case IBSCIF_CONN_ESTABLISHED:
++			break;
++		  case IBSCIF_CONN_ACTIVE:
++			done = 1;
++			break;
++		  default:
++			err = 1 + conn_state[node_id][remote_node_id];
++			break;
++		}
++		spin_unlock(&conn_state_lock);
++		schedule();
++	    }
++	}
++
++	connect_tried = 1;
++	goto again;
++}
++
++void ibscif_put_conn(struct ibscif_conn *conn)
++{
++	if (!conn)
++		return;
++
++	if (atomic_dec_and_test(&conn->refcnt)) {
++		// printk(KERN_INFO PFX "%s: local_close, conn=%p, remote_close=%d\n", __func__, conn, conn->remote_close);
++		ibscif_send_close(conn);
++		conn->local_close = 1;
++	}
++}
++
++void ibscif_get_pollep_list(struct scif_pollepd *polleps,
++			  struct ibscif_dev **devs, int *types, struct ibscif_conn **conns, int *count)
++{
++	struct ibscif_dev *dev;
++	struct ibscif_conn *conn;
++	int i = 0;
++	int max = *count;
++
++	down(&devlist_mutex);
++	list_for_each_entry(dev, &devlist, entry) {
++		if (i >= max)
++			break;
++
++		polleps[i].epd = dev->listen_ep;
++		polleps[i].events = POLLIN;
++		polleps[i].revents = 0;
++		devs[i] = dev;
++		types[i] = IBSCIF_EP_TYPE_LISTEN; 
++		conns[i] = NULL;
++		i++;
++		if (verbose)
++			printk(KERN_INFO PFX "%s: ep=%p (%d:listen)\n", __func__, dev->listen_ep, dev->node_id);
++
++		down(&dev->mutex);
++		list_for_each_entry(conn, &dev->conn_list, entry)
++		{
++			if (i >= max)
++				break;
++			polleps[i].epd = conn->ep;
++			polleps[i].events = POLLIN;
++			polleps[i].revents = 0;
++			devs[i] = dev;
++			types[i] = IBSCIF_EP_TYPE_COMM; 
++			conns[i] = conn;
++			i++;
++			if (verbose)
++				printk(KERN_INFO PFX "%s: ep=%p (%d<--->%d)\n", __func__, conn->ep, dev->node_id, conn->remote_node_id);
++		}
++		up(&dev->mutex);
++	}
++	up(&devlist_mutex);
++
++	if (verbose)
++		printk(KERN_INFO PFX "%s: count=%d\n", __func__, i);
++	*count = i;
++}
++
++void ibscif_get_ep_list(scif_epd_t *eps, int *count)
++{
++	struct ibscif_dev *dev;
++	struct ibscif_conn *conn;
++	int i = 0;
++	int max = *count;
++
++	down(&devlist_mutex);
++	list_for_each_entry(dev, &devlist, entry) {
++		if (i >= max)
++			break;
++
++		down(&dev->mutex);
++		list_for_each_entry(conn, &dev->conn_list, entry)
++		{
++			if (i >= max)
++				break;
++			eps[i] = conn->ep;
++			i++;
++		}
++		up(&dev->mutex);
++	}
++	up(&devlist_mutex);
++
++	*count = i;
++}
++
++void ibscif_remove_ep(struct ibscif_dev *dev, scif_epd_t ep)
++{
++	struct ibscif_conn *conn, *next;
++	down(&dev->mutex);
++	list_for_each_entry_safe(conn, next, &dev->conn_list, entry)
++	{
++		if (conn->ep == ep) {
++			spin_lock(&conn_state_lock);
++			conn_state[conn->dev->node_id][conn->remote_node_id] = IBSCIF_CONN_IDLE;
++			spin_unlock(&conn_state_lock);
++			list_del(&conn->entry);
++		}
++	}
++	up(&dev->mutex);
++}
++
++
++void ibscif_free_conn(struct ibscif_conn *conn)
++{
++	scif_close(conn->ep);
++	kfree(conn);
++}
++
++int ibscif_cleanup_idle_conn(void)
++{
++	struct ibscif_dev *dev;
++	struct ibscif_conn *conn, *next;
++	struct ibscif_conn *idle_conns[IBSCIF_MAX_DEVICES];
++	int i, n=0;
++
++	down(&devlist_mutex);
++	list_for_each_entry(dev, &devlist, entry) {
++		down(&dev->mutex);
++		list_for_each_entry_safe(conn, next, &dev->conn_list, entry)
++		{
++			if (conn->local_close && conn->remote_close) {
++				spin_lock(&conn_state_lock);
++				conn_state[conn->dev->node_id][conn->remote_node_id] = IBSCIF_CONN_IDLE;
++				spin_unlock(&conn_state_lock);
++				list_del(&conn->entry);
++				idle_conns[n++] = conn;
++			}
++		}
++		up(&dev->mutex);
++	}
++	up(&devlist_mutex);
++
++	for (i=0; i<n; i++)
++		ibscif_free_conn(idle_conns[i]);
++
++	if (n && verbose)
++		printk(KERN_ALERT PFX "%s: n=%d\n", __func__, n);
++
++	return n;
++}
++
++/*
++ * Simple routines to support performance profiling
++ */
++
++#include <linux/time.h>
++
++static uint32_t ibscif_time_passed(void)
++{
++	static int first = 1;
++	static struct timeval t0;
++	static struct timeval t;
++	uint32_t usec;
++	
++	if (first) {
++		do_gettimeofday(&t0);
++		first = 0;
++		return 0;
++	}
++
++	do_gettimeofday(&t);
++	usec = (t.tv_sec - t0.tv_sec) * 1000000UL;
++	if (t.tv_usec >= t0.tv_usec) 
++		usec += (t.tv_usec - t0.tv_usec);
++	else
++		usec -= (t0.tv_usec - t.tv_usec);
++
++	t0 = t;
++	return usec;
++}
++
++#define IBSCIF_PERF_MAX_SAMPLES		100
++#define IBSCIF_PERF_MAX_COUNTERS	10
++
++void ibscif_perf_sample(int counter, int next)
++{
++	static uint32_t T[IBSCIF_PERF_MAX_SAMPLES][IBSCIF_PERF_MAX_COUNTERS];
++	static int T_idx=0;
++	int i, j, sum;
++
++	if (counter>=0 && counter<IBSCIF_PERF_MAX_COUNTERS)
++		T[T_idx][counter] = ibscif_time_passed();
++
++	if (next) {
++		if (++T_idx < IBSCIF_PERF_MAX_SAMPLES)
++			return;
++
++		T_idx = 0;
++
++		/* batch output to minimize the impact on higher level timing */
++		for (i=0; i<IBSCIF_PERF_MAX_SAMPLES; i++) {
++			sum = 0;
++			printk(KERN_INFO PFX "%d: ", i);
++			for (j=0; j<IBSCIF_PERF_MAX_COUNTERS; j++) {
++				printk("T%d=%u ", j, T[i][j]);
++				if (j>0)
++					sum += T[i][j];
++			}
++			printk("SUM(T1..T%d)=%u\n", IBSCIF_PERF_MAX_COUNTERS-1, sum);
++		}
++	}
++}
++
+diff -urN a7/drivers/infiniband/hw/scif/Kconfig a8/drivers/infiniband/hw/scif/Kconfig
+--- a7/drivers/infiniband/hw/scif/Kconfig	1969-12-31 16:00:00.000000000 -0800
++++ a8/drivers/infiniband/hw/scif/Kconfig	2015-02-23 10:14:37.489809663 -0800
+@@ -0,0 +1,4 @@
++config INFINIBAND_SCIF
++	tristate "SCIF RDMA driver support"
++	---help---
++	  RDMA over SCIF driver.
+diff -urN a7/drivers/infiniband/hw/scif/Makefile a8/drivers/infiniband/hw/scif/Makefile
+--- a7/drivers/infiniband/hw/scif/Makefile	1969-12-31 16:00:00.000000000 -0800
++++ a8/drivers/infiniband/hw/scif/Makefile	2015-02-23 10:14:37.489809663 -0800
+@@ -0,0 +1,41 @@
++ifneq ($(KERNELRELEASE),)
++
++# Original Make begins
++
++obj-$(CONFIG_INFINIBAND_SCIF) += ibscif.o
++
++ibscif-y :=	ibscif_main.o		\
++		ibscif_ah.o		\
++		ibscif_pd.o		\
++		ibscif_cq.o		\
++		ibscif_qp.o		\
++		ibscif_mr.o		\
++		ibscif_cm.o		\
++		ibscif_post.o		\
++		ibscif_procfs.o		\
++		ibscif_loopback.o	\
++		ibscif_provider.o	\
++		ibscif_protocol.o	\
++		ibscif_scheduler.o	\
++		ibscif_util.o
++
++# Original Makefile ends
++
++else
++
++ifeq ($(KVER),)
++  ifeq ($(KDIR),)
++    KDIR := /lib/modules/$(shell uname -r)/build
++  endif
++else
++  KDIR := /lib/modules/$(KVER)/build
++endif
++
++all:
++	$(MAKE) -C $(KDIR) SUBDIRS=$(shell pwd) CONFIG_INFINIBAND_SCIF=m
++
++clean:
++	rm -rf *.o *.ko *.mod.c .*.cmd Module.* .tmp_versions
++
++endif
++
diff --git a/tech-preview/xeon-phi/0009-update-drivers-infiniband-s-Kconfig-and-Makefile-to-.patch b/tech-preview/xeon-phi/0009-update-drivers-infiniband-s-Kconfig-and-Makefile-to-.patch
new file mode 100644
index 0000000..bacf14f
--- /dev/null
+++ b/tech-preview/xeon-phi/0009-update-drivers-infiniband-s-Kconfig-and-Makefile-to-.patch
@@ -0,0 +1,37 @@
+From 4f27d323bd47563f40a663672a331c5b2c95138e Mon Sep 17 00:00:00 2001
+From: Phil Cayton <phil.cayton@intel.com>
+Date: Tue, 4 Feb 2014 12:25:45 -0800
+Subject: [PATCH 09/12] update drivers/infiniband's Kconfig and Makefile to
+ allow compilation of CCL-Direct (ibp)
+
+Signed-off-by: Phil Cayton <phil.cayton@intel.com>
+---
+diff -urN a8/drivers/infiniband/hw/Makefile a9/drivers/infiniband/hw/Makefile
+--- a8/drivers/infiniband/hw/Makefile	2015-01-05 15:04:13.993463721 -0800
++++ a9/drivers/infiniband/hw/Makefile	2015-01-05 15:09:10.056451249 -0800
+@@ -10,3 +10,4 @@
+ obj-$(CONFIG_INFINIBAND_NES)		+= nes/
+ obj-$(CONFIG_INFINIBAND_OCRDMA)		+= ocrdma/
+ obj-$(CONFIG_INFINIBAND_USNIC)		+= usnic/
++obj-$(CONFIG_INFINIBAND_SCIF)		+= scif/
+diff -urN a8/drivers/infiniband/Kconfig a9/drivers/infiniband/Kconfig
+--- a8/drivers/infiniband/Kconfig	2015-01-05 15:04:14.001463720 -0800
++++ a9/drivers/infiniband/Kconfig	2015-01-05 15:07:03.176456594 -0800
+@@ -55,6 +55,9 @@
+ source "drivers/infiniband/hw/nes/Kconfig"
+ source "drivers/infiniband/hw/ocrdma/Kconfig"
+ source "drivers/infiniband/hw/usnic/Kconfig"
++source "drivers/infiniband/hw/scif/Kconfig"
++
++source "drivers/infiniband/ibp/Kconfig"
+ 
+ source "drivers/infiniband/ulp/ipoib/Kconfig"
+ 
+diff -urN a8/drivers/infiniband/Makefile a9/drivers/infiniband/Makefile
+--- a8/drivers/infiniband/Makefile	2015-01-05 15:04:14.001463720 -0800
++++ a9/drivers/infiniband/Makefile	2015-01-05 15:08:25.112453143 -0800
+@@ -1,3 +1,4 @@
+ obj-$(CONFIG_INFINIBAND)		+= core/
+ obj-$(CONFIG_INFINIBAND)		+= hw/
+ obj-$(CONFIG_INFINIBAND)		+= ulp/
++obj-$(CONFIG_IBP_SERVER)		+= ibp/
diff --git a/tech-preview/xeon-phi/0010-Update-qib-for-XEON-PHI-support.patch b/tech-preview/xeon-phi/0010-Update-qib-for-XEON-PHI-support.patch
new file mode 100644
index 0000000..09a8ba0
--- /dev/null
+++ b/tech-preview/xeon-phi/0010-Update-qib-for-XEON-PHI-support.patch
@@ -0,0 +1,2783 @@
+IB/qib: Update qib for XEON PHI support
+
+From: Jubin John <jubin.john@intel.com>
+
+Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
+Signed-off-by: Jubin John <jubin.john@intel.com>
+---
+diff -urN a9/drivers/infiniband/hw/qib/Makefile a10/drivers/infiniband/hw/qib/Makefile
+--- a9/drivers/infiniband/hw/qib/Makefile	2015-01-05 15:05:04.280461602 -0800
++++ a10/drivers/infiniband/hw/qib/Makefile	2015-01-05 15:10:58.250446692 -0800
+@@ -14,3 +14,8 @@
+ ib_qib-$(CONFIG_X86_64) += qib_wc_x86_64.o
+ ib_qib-$(CONFIG_PPC64) += qib_wc_ppc64.o
+ ib_qib-$(CONFIG_DEBUG_FS) += qib_debugfs.o
++
++ifeq ($(CONFIG_INFINIBAND_SCIF),m)
++ib_qib-y += qib_knx.o
++ccflags-y += -DQIB_CONFIG_KNX
++endif
+diff -urN a9/drivers/infiniband/hw/qib/qib_common.h a10/drivers/infiniband/hw/qib/qib_common.h
+--- a9/drivers/infiniband/hw/qib/qib_common.h	2015-01-05 15:05:04.281461602 -0800
++++ a10/drivers/infiniband/hw/qib/qib_common.h	2015-01-05 15:10:58.250446692 -0800
+@@ -1,4 +1,5 @@
+ /*
++ * Copyright (c) 2012 Intel Corporation. All rights reserved.
+  * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
+  * All rights reserved.
+  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+@@ -337,8 +338,12 @@
+ 	 * Should be set to QIB_USER_SWVERSION.
+ 	 */
+ 	__u32 spu_userversion;
+-
++#ifdef QIB_CONFIG_KNX
++	__u16 spu_knx_node_id;
++	__u16 _spu_unused2;
++#else
+ 	__u32 _spu_unused2;
++#endif
+ 
+ 	/* size of struct base_info to write to */
+ 	__u32 spu_base_info_size;
+diff -urN a9/drivers/infiniband/hw/qib/qib_file_ops.c a10/drivers/infiniband/hw/qib/qib_file_ops.c
+--- a9/drivers/infiniband/hw/qib/qib_file_ops.c	2015-01-05 15:05:04.280461602 -0800
++++ a10/drivers/infiniband/hw/qib/qib_file_ops.c	2015-01-05 15:10:58.251446692 -0800
+@@ -48,6 +48,7 @@
+ #include "qib.h"
+ #include "qib_common.h"
+ #include "qib_user_sdma.h"
++#include "qib_knx.h"
+ 
+ #undef pr_fmt
+ #define pr_fmt(fmt) QIB_DRV_NAME ": " fmt
+@@ -59,6 +60,9 @@
+ 			     unsigned long, loff_t);
+ static unsigned int qib_poll(struct file *, struct poll_table_struct *);
+ static int qib_mmapf(struct file *, struct vm_area_struct *);
++static int subctxt_search_ctxts(struct qib_devdata *, struct file *,
++				const struct qib_user_info *);
++
+ 
+ static const struct file_operations qib_file_ops = {
+ 	.owner = THIS_MODULE,
+@@ -89,6 +93,64 @@
+ 	return paddr;
+ }
+ 
++#ifdef QIB_CONFIG_KNX
++/*
++ * Fills in only a few of the fields in the qib_base_info structure so the
++ * module on the KNX size can allocate all necessary memories locally.
++ */
++static int qib_get_early_base_info(struct file *fp, void __user *ubase,
++				   size_t ubase_size) {
++	struct qib_ctxtdata *rcd = ctxt_fp(fp);
++	int ret = 0;
++	struct qib_devdata *dd = rcd->dd;
++	struct qib_base_info *kinfo = NULL;
++	size_t sz;
++	int local_node = (numa_node_id() == pcibus_to_node(dd->pcidev->bus));
++
++	sz = sizeof(*kinfo);
++	if (!rcd->subctxt_cnt)
++		sz -= 7 * sizeof(u64);
++	if (ubase_size < sz) {
++		ret = -EINVAL;
++		goto bail;
++	}
++
++	kinfo = kzalloc(sizeof(*kinfo), GFP_KERNEL);
++	if (kinfo == NULL) {
++		ret = -ENOMEM;
++		goto bail;
++	}
++
++	ret = dd->f_get_base_info(rcd, kinfo);
++	if (ret < 0)
++		goto bail_free;
++
++	if (rcd->subctxt_cnt && !subctxt_fp(fp))
++		kinfo->spi_runtime_flags |= QIB_RUNTIME_MASTER;
++
++	kinfo->spi_unit = dd->unit;
++	kinfo->spi_port = rcd->ppd->port;
++	kinfo->spi_ctxt = rcd->ctxt;
++	kinfo->spi_subctxt = subctxt_fp(fp);
++	kinfo->spi_rcvhdr_cnt = dd->rcvhdrcnt;
++	kinfo->spi_rcvhdrent_size = dd->rcvhdrentsize;
++	kinfo->spi_rcv_egrbufsize = dd->rcvegrbufsize;
++	kinfo->spi_rcv_egrbuftotlen =
++		rcd->rcvegrbuf_chunks * rcd->rcvegrbuf_size;
++	kinfo->spi_rcv_egrperchunk = rcd->rcvegrbufs_perchunk;
++	kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen /
++		rcd->rcvegrbuf_chunks;
++
++	sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo);
++	if (copy_to_user(ubase, kinfo, sz))
++		ret = -EFAULT;
++bail_free:
++	kfree(kinfo);
++bail:
++	return ret;
++}
++#endif
++
+ static int qib_get_base_info(struct file *fp, void __user *ubase,
+ 			     size_t ubase_size)
+ {
+@@ -177,14 +239,43 @@
+ 	 */
+ 	kinfo->spi_rcvhdr_base = (u64) rcd->rcvhdrq_phys;
+ 	kinfo->spi_rcvhdr_tailaddr = (u64) rcd->rcvhdrqtailaddr_phys;
++	/*
++	 * In the case of KNX, qib_do_user_init() would call into the
++	 * KNX-specific memory allocation/registration functions. These
++	 * functions will write the registered memory offsets in the
++	 * qib_base_info structure. Those are the addresses that need to be
++	 * handled to user level.
++	 */
++	kinfo->spi_uregbase = knx_node_fp(fp) ?
++		qib_knx_ctxt_info(rcd, QIB_KNX_CTXTINFO_UREG, fp) :
++		(u64) dd->uregbase + dd->ureg_align * rcd->ctxt;
++
++	if (knx_node_fp(fp))
++		kinfo->spi_runtime_flags =
++			qib_knx_ctxt_info(rcd, QIB_KNX_CTXTINFO_FLAGS, fp);
+ 	kinfo->spi_rhf_offset = dd->rhf_offset;
+ 	kinfo->spi_rcv_egrbufs = (u64) rcd->rcvegr_phys;
+-	kinfo->spi_pioavailaddr = (u64) dd->pioavailregs_phys;
++
++	/* see comment for spi_uregbase above */
++	if (knx_node_fp(fp))
++		kinfo->spi_pioavailaddr =
++			qib_knx_ctxt_info(rcd, QIB_KNX_CTXTINFO_PIOAVAIL, fp);
++	else
++		kinfo->spi_pioavailaddr = (u64) dd->pioavailregs_phys;
++
+ 	/* setup per-unit (not port) status area for user programs */
+-	kinfo->spi_status = (u64) kinfo->spi_pioavailaddr +
+-		(char *) ppd->statusp -
+-		(char *) dd->pioavailregs_dma;
+-	kinfo->spi_uregbase = (u64) dd->uregbase + dd->ureg_align * rcd->ctxt;
++	kinfo->spi_status = (knx_node_fp(fp) ?
++			     qib_knx_ctxt_info(
++				     rcd, QIB_KNX_CTXTINFO_STATUS, fp) :
++			     (u64) dd->pioavailregs_phys) +
++		(char *) ppd->statusp -	(char *) dd->pioavailregs_dma;
++
++	/*
++	 * Do not set spi_piobufbase to KNX offset here as it is used in
++	 * PIO index calculations below. For KNX contexts, the value of
++	 * spi_piobufbase is not the physical address but the offset of
++	 * the registered memory.
++	 */
+ 	if (!shared) {
+ 		kinfo->spi_piocnt = rcd->piocnt;
+ 		kinfo->spi_piobufbase = (u64) rcd->piobufs;
+@@ -204,7 +295,11 @@
+ 			dd->palign * kinfo->spi_piocnt * slave;
+ 	}
+ 
+-	if (shared) {
++	/*
++	 * In the case of KNX contexts, shared context memory is setup and
++	 * handled on the the KNX.
++	 */
++	if (shared && !knx_node_fp(fp)) {
+ 		kinfo->spi_sendbuf_status =
+ 			cvt_kvaddr(&rcd->user_event_mask[subctxt_fp(fp)]);
+ 		/* only spi_subctxt_* fields should be set in this block! */
+@@ -225,6 +320,11 @@
+ 	kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->pio2k_bufbase) /
+ 		dd->palign;
+ 	kinfo->spi_pioalign = dd->palign;
++	/* Update spi_piobufbase after all calculations are done. */
++	if (knx_node_fp(fp))
++		kinfo->spi_piobufbase =
++			qib_knx_ctxt_info(rcd, QIB_KNX_CTXTINFO_PIOBUFBASE, fp);
++
+ 	kinfo->spi_qpair = QIB_KD_QP;
+ 	/*
+ 	 * user mode PIO buffers are always 2KB, even when 4KB can
+@@ -1261,6 +1361,17 @@
+ 		goto bail;
+ 	}
+ 
++#ifdef QIB_CONFIG_KNX
++	if (uinfo->spu_knx_node_id)
++		/*
++		 * When setting up a context for a KNX process, setup of
++		 * the subcontexts memory is done on the KNX side and
++		 * mapped into user level. Therefore, the host driver never
++		 * has to worry about it unless we are setting up a context
++		 * on the host.
++		 */
++		goto no_subctxt_mem;
++#endif
+ 	rcd->subctxt_uregbase = vmalloc_user(PAGE_SIZE * num_subctxts);
+ 	if (!rcd->subctxt_uregbase) {
+ 		ret = -ENOMEM;
+@@ -1283,6 +1394,9 @@
+ 		goto bail_rhdr;
+ 	}
+ 
++#ifdef QIB_CONFIG_KNX
++no_subctxt_mem:
++#endif
+ 	rcd->subctxt_cnt = uinfo->spu_subctxt_cnt;
+ 	rcd->subctxt_id = uinfo->spu_subctxt_id;
+ 	rcd->active_slaves = 1;
+@@ -1317,6 +1431,14 @@
+ 
+ 	rcd = qib_create_ctxtdata(ppd, ctxt, numa_id);
+ 
++#ifdef QIB_CONFIG_KNX
++	if (uinfo->spu_knx_node_id)
++		/*
++		 * Skip allocation of page pointer list for TID
++		 * receives. This will be done on the KNX.
++		 */
++		goto no_page_list;
++#endif
+ 	/*
+ 	 * Allocate memory for use in qib_tid_update() at open to
+ 	 * reduce cost of expected send setup per message segment
+@@ -1332,7 +1454,11 @@
+ 		ret = -ENOMEM;
+ 		goto bailerr;
+ 	}
++#ifdef QIB_CONFIG_KNX
++no_page_list:
++#endif
+ 	rcd->userversion = uinfo->spu_userversion;
++
+ 	ret = init_subctxts(dd, rcd, uinfo);
+ 	if (ret)
+ 		goto bailerr;
+@@ -1489,43 +1615,68 @@
+ static int find_shared_ctxt(struct file *fp,
+ 			    const struct qib_user_info *uinfo)
+ {
+-	int devmax, ndev, i;
++	int devmax, ndev;
+ 	int ret = 0;
++	struct qib_devdata *dd;
+ 
++#ifdef QIB_CONFIG_KNX
++	/*
++	 * In the case we are allocating a context for a KNX process,
++	 * Don't loop over all devices but use the one assosiated with the
++	 * requesting KNX.
++	 */
++	if (uinfo->spu_knx_node_id) {
++		dd = qib_knx_node_to_dd(uinfo->spu_knx_node_id);
++		if (dd && dd->num_knx)
++			ret = subctxt_search_ctxts(dd, fp, uinfo);
++		goto done;
++	}
++#endif
+ 	devmax = qib_count_units(NULL, NULL);
+ 
+ 	for (ndev = 0; ndev < devmax; ndev++) {
+-		struct qib_devdata *dd = qib_lookup(ndev);
+-
++		dd = qib_lookup(ndev);
+ 		/* device portion of usable() */
+ 		if (!(dd && (dd->flags & QIB_PRESENT) && dd->kregbase))
+ 			continue;
+-		for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) {
+-			struct qib_ctxtdata *rcd = dd->rcd[i];
++		ret = subctxt_search_ctxts(dd, fp, uinfo);
++		if (ret)
++			break;
++	}
++#ifdef QIB_CONFIG_KNX
++done:
++#endif
++	return ret;
++}
+ 
+-			/* Skip ctxts which are not yet open */
+-			if (!rcd || !rcd->cnt)
+-				continue;
+-			/* Skip ctxt if it doesn't match the requested one */
+-			if (rcd->subctxt_id != uinfo->spu_subctxt_id)
+-				continue;
+-			/* Verify the sharing process matches the master */
+-			if (rcd->subctxt_cnt != uinfo->spu_subctxt_cnt ||
+-			    rcd->userversion != uinfo->spu_userversion ||
+-			    rcd->cnt >= rcd->subctxt_cnt) {
+-				ret = -EINVAL;
+-				goto done;
+-			}
+-			ctxt_fp(fp) = rcd;
+-			subctxt_fp(fp) = rcd->cnt++;
+-			rcd->subpid[subctxt_fp(fp)] = current->pid;
+-			tidcursor_fp(fp) = 0;
+-			rcd->active_slaves |= 1 << subctxt_fp(fp);
+-			ret = 1;
++static int subctxt_search_ctxts(struct qib_devdata *dd, struct file *fp,
++				const struct qib_user_info *uinfo)
++{
++	int ret = 0, i;
++	for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) {
++		struct qib_ctxtdata *rcd = dd->rcd[i];
++
++		/* Skip ctxts which are not yet open */
++		if (!rcd || !rcd->cnt)
++			continue;
++		/* Skip ctxt if it doesn't match the requested one */
++		if (rcd->subctxt_id != uinfo->spu_subctxt_id)
++			continue;
++		/* Verify the sharing process matches the master */
++		if (rcd->subctxt_cnt != uinfo->spu_subctxt_cnt ||
++		    rcd->userversion != uinfo->spu_userversion ||
++		    rcd->cnt >= rcd->subctxt_cnt) {
++			ret = -EINVAL;
+ 			goto done;
+ 		}
++		ctxt_fp(fp) = rcd;
++		subctxt_fp(fp) = rcd->cnt++;
++		rcd->subpid[subctxt_fp(fp)] = current->pid;
++		tidcursor_fp(fp) = 0;
++		rcd->active_slaves |= 1 << subctxt_fp(fp);
++		ret = 1;
++		break;
+ 	}
+-
+ done:
+ 	return ret;
+ }
+@@ -1617,6 +1768,13 @@
+ 
+ 	if (swminor >= 11 && uinfo->spu_port_alg < QIB_PORT_ALG_COUNT)
+ 		alg = uinfo->spu_port_alg;
++#ifdef QIB_CONFIG_KNX
++	/* Make sure we have a connection to the KNX module on the right node */
++	if (uinfo->spu_knx_node_id && !qib_knx_get(uinfo->spu_knx_node_id)) {
++		ret = -ENODEV;
++		goto done;
++	}
++#endif
+ 
+ 	mutex_lock(&qib_mutex);
+ 
+@@ -1624,13 +1782,38 @@
+ 	    uinfo->spu_subctxt_cnt) {
+ 		ret = find_shared_ctxt(fp, uinfo);
+ 		if (ret > 0) {
+-			ret = do_qib_user_sdma_queue_create(fp);
++#ifdef QIB_CONFIG_KNX
++			if (uinfo->spu_knx_node_id) {
++				ret = qib_knx_sdma_queue_create(fp);
++			} else
++#endif
++				ret = do_qib_user_sdma_queue_create(fp);
+ 			if (!ret)
+ 				assign_ctxt_affinity(fp, (ctxt_fp(fp))->dd);
+ 			goto done_ok;
+ 		}
+ 	}
+ 
++#ifdef QIB_CONFIG_KNX
++	/*
++	 * If there is a KNX node set, we pick the device that is
++	 * associate with that KNX node
++	 */
++	if (uinfo->spu_knx_node_id) {
++		struct qib_devdata *dd =
++			qib_knx_node_to_dd(uinfo->spu_knx_node_id);
++		if (dd) {
++			ret = find_free_ctxt(dd->unit, fp, uinfo);
++			if (!ret)
++				ret = qib_knx_alloc_ctxt(
++					uinfo->spu_knx_node_id,
++					ctxt_fp(fp)->ctxt);
++		} else
++			ret = -ENXIO;
++		goto done_chk_sdma;
++	}
++
++#endif
+ 	i_minor = iminor(file_inode(fp)) - QIB_USER_MINOR_BASE;
+ 	if (i_minor)
+ 		ret = find_free_ctxt(i_minor - 1, fp, uinfo);
+@@ -1639,7 +1822,6 @@
+ 		const unsigned int cpu = cpumask_first(&current->cpus_allowed);
+ 		const unsigned int weight =
+ 			cpumask_weight(&current->cpus_allowed);
+-
+ 		if (weight == 1 && !test_bit(cpu, qib_cpulist))
+ 			if (!find_hca(cpu, &unit) && unit >= 0)
+ 				if (!find_free_ctxt(unit, fp, uinfo)) {
+@@ -1650,9 +1832,21 @@
+ 	}
+ 
+ done_chk_sdma:
+-	if (!ret)
++	if (!ret) {
++#ifdef QIB_CONFIG_KNX
++		if (uinfo->spu_knx_node_id) {
++			ret = qib_knx_sdma_queue_create(fp);
++			/*if (!ret)
++			  ret = qib_knx_setup_tidrcv(fp);*/
++			goto done_ok;
++		}
++#endif
+ 		ret = do_qib_user_sdma_queue_create(fp);
++	}
+ done_ok:
++#ifdef QIB_CONFIG_KNX
++	knx_node_fp(fp) = uinfo->spu_knx_node_id;
++#endif
+ 	mutex_unlock(&qib_mutex);
+ 
+ done:
+@@ -1667,11 +1861,25 @@
+ 	struct qib_ctxtdata *rcd = ctxt_fp(fp);
+ 	struct qib_devdata *dd;
+ 	unsigned uctxt;
++#ifdef QIB_CONFIG_KNX
++	struct qib_base_info *base_info = NULL;
++	void __user *ubase = (void __user *)(unsigned long)
++		uinfo->spu_base_info;
++#endif
+ 
+ 	/* Subctxts don't need to initialize anything since master did it. */
+ 	if (subctxt_fp(fp)) {
+ 		ret = wait_event_interruptible(rcd->wait,
+ 			!test_bit(QIB_CTXT_MASTER_UNINIT, &rcd->flag));
++#ifdef QIB_CONFIG_KNX
++		/*
++		 * Subctxt pio buffers need to be registered after the
++		 * master has set everything up.
++		 */
++		if (uinfo->spu_knx_node_id)
++			ret = qib_knx_setup_piobufs(rcd->dd, rcd,
++						    subctxt_fp(fp));
++#endif
+ 		goto bail;
+ 	}
+ 
+@@ -1722,6 +1930,41 @@
+ 	 */
+ 	dd->f_sendctrl(dd->pport, QIB_SENDCTRL_AVAIL_BLIP);
+ 
++#ifdef QIB_CONFIG_KNX
++	if (uinfo->spu_knx_node_id) {
++		/*
++		 * When setting up rcvhdr Q and eager buffers for a KNX, the
++		 * memory comes from the KNX side encoded in the qib_base_info
++		 * structure.
++		 */
++		if (uinfo->spu_base_info_size < (sizeof(*base_info) -
++						 7 * sizeof(u64))) {
++			ret = -EINVAL;
++			goto bail_pio;
++		}
++		base_info = kzalloc(sizeof(*base_info), GFP_KERNEL);
++		if (!base_info) {
++			ret = -ENOMEM;
++			goto bail_pio;
++		}
++		if (copy_from_user(base_info, ubase,
++				   uinfo->spu_base_info_size)) {
++			ret = -EFAULT;
++			goto bail_pio;
++		}
++		ret = qib_knx_setup_piobufs(dd, rcd, subctxt_fp(fp));
++		if (ret)
++			goto cont_init;
++		ret = qib_knx_setup_pioregs(dd, rcd, base_info);
++		if (ret)
++			goto cont_init;
++		ret = qib_knx_create_rcvhdrq(dd, rcd, base_info);
++		if (ret)
++			goto cont_init;
++		ret = qib_knx_setup_eagerbufs(rcd, base_info);
++		goto cont_init;
++	}
++#endif /* QIB_CONFIG_KNX */
+ 	/*
+ 	 * Now allocate the rcvhdr Q and eager TIDs; skip the TID
+ 	 * array for time being.  If rcd->ctxt > chip-supported,
+@@ -1731,6 +1974,9 @@
+ 	ret = qib_create_rcvhdrq(dd, rcd);
+ 	if (!ret)
+ 		ret = qib_setup_eagerbufs(rcd);
++#ifdef QIB_CONFIG_KNX
++cont_init:
++#endif
+ 	if (ret)
+ 		goto bail_pio;
+ 
+@@ -1828,6 +2074,13 @@
+ 
+ 	/* drain user sdma queue */
+ 	if (fd->pq) {
++#ifdef QIB_CONFIG_KNX
++		/*
++		 * The thread should be stopped first before attempting
++		 * to clean the queue.
++		 */
++		qib_knx_sdma_queue_destroy(fd);
++#endif
+ 		qib_user_sdma_queue_drain(rcd->ppd, fd->pq);
+ 		qib_user_sdma_queue_destroy(fd->pq);
+ 	}
+@@ -1885,6 +2138,12 @@
+ 	}
+ 
+ 	mutex_unlock(&qib_mutex);
++#ifdef QIB_CONFIG_KNX
++	if (fd->knx_node_id) {
++		qib_knx_free_ctxtdata(dd, rcd);
++		goto bail;
++	}
++#endif
+ 	qib_free_ctxtdata(dd, rcd); /* after releasing the mutex */
+ 
+ bail:
+@@ -2170,6 +2429,13 @@
+ 		ret = qib_assign_ctxt(fp, &cmd.cmd.user_info);
+ 		if (ret)
+ 			goto bail;
++#ifdef QIB_CONFIG_KNX
++		if (cmd.cmd.user_info.spu_knx_node_id)
++			ret = qib_get_early_base_info(
++				fp, (void __user *) (unsigned long)
++				cmd.cmd.user_info.spu_base_info,
++				cmd.cmd.user_info.spu_base_info_size);
++#endif
+ 		break;
+ 
+ 	case QIB_CMD_USER_INIT:
+diff -urN a9/drivers/infiniband/hw/qib/qib.h a10/drivers/infiniband/hw/qib/qib.h
+--- a9/drivers/infiniband/hw/qib/qib.h	2015-01-05 15:05:04.280461602 -0800
++++ a10/drivers/infiniband/hw/qib/qib.h	2015-01-05 15:10:58.250446692 -0800
+@@ -234,6 +234,10 @@
+ 	u32 lookaside_qpn;
+ 	/* QPs waiting for context processing */
+ 	struct list_head qp_wait_list;
++#ifdef QIB_CONFIG_KNX
++	/* KNX Receive Context Data */
++	struct qib_knx_ctxt *krcd;
++#endif
+ #ifdef CONFIG_DEBUG_FS
+ 	/* verbs stats per CTX */
+ 	struct qib_opcode_stats_perctx *opstats;
+@@ -1106,6 +1110,11 @@
+ 	struct kthread_worker *worker;
+ 
+ 	int assigned_node_id; /* NUMA node closest to HCA */
++
++#ifdef QIB_CONFIG_KNX
++	/* number of KNx nodes using this device */
++	u16 num_knx;
++#endif
+ };
+ 
+ /* hol_state values */
+@@ -1134,6 +1143,9 @@
+ 	unsigned tidcursor;
+ 	struct qib_user_sdma_queue *pq;
+ 	int rec_cpu_num; /* for cpu affinity; -1 if none */
++#ifdef QIB_CONFIG_KNX
++	u16 knx_node_id;
++#endif
+ };
+ 
+ extern struct list_head qib_dev_list;
+@@ -1211,6 +1223,13 @@
+ 	(((struct qib_filedata *)(fp)->private_data)->tidcursor)
+ #define user_sdma_queue_fp(fp) \
+ 	(((struct qib_filedata *)(fp)->private_data)->pq)
++#ifdef QIB_CONFIG_KNX
++#define knx_node_fp(fp) \
++	(((struct qib_filedata *)(fp)->private_data)->knx_node_id)
++#else
++/* allow the use of knx_node_fp() outside of a #ifdef QIB_CONFIG_KNX */
++#define knx_node_fp(fp) 0
++#endif
+ 
+ static inline struct qib_devdata *dd_from_ppd(struct qib_pportdata *ppd)
+ {
+diff -urN a9/drivers/infiniband/hw/qib/qib_init.c a10/drivers/infiniband/hw/qib/qib_init.c
+--- a9/drivers/infiniband/hw/qib/qib_init.c	2015-01-05 15:05:04.279461602 -0800
++++ a10/drivers/infiniband/hw/qib/qib_init.c	2015-01-05 15:10:58.251446692 -0800
+@@ -51,6 +51,10 @@
+ #include "qib_verbs.h"
+ #endif
+ 
++#ifdef QIB_CONFIG_KNX
++#include "qib_knx.h"
++#endif
++
+ #undef pr_fmt
+ #define pr_fmt(fmt) QIB_DRV_NAME ": " fmt
+ 
+@@ -1301,6 +1305,12 @@
+ 	/* not fatal if it doesn't work */
+ 	if (qib_init_qibfs())
+ 		pr_err("Unable to register ipathfs\n");
++
++#ifdef QIB_CONFIG_KNX
++	ret = qib_knx_server_init();
++	if (ret < 0)
++		pr_err(": Unable to start KNX listen thread\n");
++#endif
+ 	goto bail; /* all OK */
+ 
+ bail_dev:
+@@ -1325,6 +1335,9 @@
+ {
+ 	int ret;
+ 
++#ifdef QIB_CONFIG_KNX
++	qib_knx_server_exit();
++#endif
+ 	ret = qib_exit_qibfs();
+ 	if (ret)
+ 		pr_err(
+@@ -1568,6 +1581,9 @@
+ 	/* unregister from IB core */
+ 	qib_unregister_ib_device(dd);
+ 
++#ifdef QIB_CONFIG_KNX
++	qib_knx_remove_device(dd);
++#endif
+ 	/*
+ 	 * Disable the IB link, disable interrupts on the device,
+ 	 * clear dma engines, etc.
+diff -urN a9/drivers/infiniband/hw/qib/qib_knx.c a10/drivers/infiniband/hw/qib/qib_knx.c
+--- a9/drivers/infiniband/hw/qib/qib_knx.c	1969-12-31 16:00:00.000000000 -0800
++++ a10/drivers/infiniband/hw/qib/qib_knx.c	2015-01-05 15:10:58.252446692 -0800
+@@ -0,0 +1,1532 @@
++/*
++ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above
++ *        copyright notice, this list of conditions and the following
++ *        disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *        copyright notice, this list of conditions and the following
++ *        disclaimer in the documentation and/or other materials
++ *        provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++#include <linux/module.h>
++#include <linux/kthread.h>
++#include <linux/kernel.h>
++#include <linux/dma-mapping.h>
++#include <linux/scatterlist.h>
++#include <modules/scif.h>
++
++#include "qib.h"
++#include "qib_knx.h"
++#include "qib_user_sdma.h"
++#include "qib_knx_common.h"
++
++unsigned int qib_knx_nconns = 5;
++module_param_named(num_conns, qib_knx_nconns, uint, S_IRUGO);
++MODULE_PARM_DESC(num_conns, "Max number of pending connections");
++
++#define QIB_KNX_SCIF_PORT SCIF_OFED_PORT_9
++#define CLIENT_THREAD_NAME(x) "qib/mic" __stringify(x)
++
++#define knx_sdma_next(sdma) \
++	(sdma->head = ((sdma->head + 1) % sdma->desc_num))
++#define per_ctxt(ctxt, sub) ((ctxt * QLOGIC_IB_MAX_SUBCTXT) + sub)
++#define QIB_KNX_SDMA_STATUS(sdma, st) \
++	QIB_KNX_SDMA_SET(sdma->mflags->status, ((u64)st << 32) | 1)
++
++struct qib_knx_server {
++	struct task_struct *kthread;
++	struct scif_pollepd epd;
++	spinlock_t client_lock;
++	struct list_head clients;
++	unsigned int nclients;
++};
++
++struct qib_knx_rma {
++	/* SCIF registered offset */
++	off_t offset;
++	/* size of mapped memory (in bytes) */
++	size_t size;
++	/* kernel virtual address of ioremap'ed memory */
++	void *kvaddr;
++};
++
++struct qib_knx_mem_map {
++	/* physical address is DMA range */
++	dma_addr_t dma_mapped_addr;
++	/* DMA direction */
++	enum dma_data_direction dir;
++	/* size of remote memory area */
++	size_t size;
++	/* SCIF array of physical pages */
++	struct scif_range *pages;
++};
++
++struct qib_knx_mem_map_sg {
++	/* list of pages to map */
++	struct scatterlist *sglist;
++	/* DMA direction */
++	enum dma_data_direction dir;
++	/* total size of mapped memory */
++	size_t size;
++	struct scif_range *pages;
++};
++
++struct qib_knx_tidrcv {
++	struct qib_knx_rma tidmem;
++	u64 tidbase;
++	u32 tidcnt;
++};
++
++struct qib_knx_ctxt {
++	u16 ctxt;
++	struct qib_knx *knx;
++	struct qib_pportdata *ppd;
++	/* local registered memory for PIO buffers */
++	struct qib_knx_rma piobufs[QLOGIC_IB_MAX_SUBCTXT];
++	/* local registered memory for user registers */
++	struct qib_knx_rma uregs;
++	/* local registered memory for PIO avail registers */
++	struct qib_knx_rma pioavail;
++	/* remote registered memory for RcvHdr Q */
++	struct qib_knx_mem_map_sg rcvhdrq;
++	/* remote registered memory for SendBuf status */
++	struct qib_knx_mem_map sbufstatus;
++	/* remote registered memory for RcvHdrTail register */
++	struct qib_knx_mem_map rcvhdrqtailaddr;
++	/* remote registered memory for Eager buffers */
++	struct qib_knx_mem_map_sg eagerbufs;
++
++	/* Saved offsets for shared context processes */
++	__u64 uregbase;
++	__u64 pioavailaddr;
++	__u64 status;
++	__u64 piobufbase[QLOGIC_IB_MAX_SUBCTXT];
++	__u32 runtime_flags;
++
++	struct qib_user_sdma_queue *pq[QLOGIC_IB_MAX_SUBCTXT];
++};
++
++struct qib_knx_sdma {
++	/* KNX flags page */
++	struct scif_range *mflag_pages;
++	struct qib_knx_sdma_mflags *mflags;
++	/* KNX descriptor queue */
++	struct scif_range *queue_pages;
++	struct qib_knx_sdma_desc *queue;
++	u32 desc_num;
++	/* host flags (in host memory) */
++	struct qib_knx_rma hflags_mem;
++	struct qib_knx_sdma_hflags *hflags;
++	u32 head;                           /* shadow */
++	u32 complete;
++};
++
++struct qib_knx {
++	struct list_head list;
++	struct scif_pollepd epd;
++	struct scif_portID peer;
++	struct scif_pci_info pci_info;
++	int numa_node;
++	struct qib_devdata *dd;
++	struct qib_knx_ctxt **ctxts;
++	spinlock_t ctxt_lock;
++	resource_size_t bar;
++	u64 barlen;
++	struct qib_knx_sdma *sdma;
++	struct task_struct *sdma_poll;
++	atomic_t tref;
++	char tname[64];
++	struct qib_knx_rma tidmem;
++};
++
++static struct qib_knx_server *server;
++
++static int qib_knx_init(struct qib_knx_server *);
++static void qib_knx_free(struct qib_knx *, int);
++static int qib_knx_server_listen(void *);
++static off_t qib_knx_register_memory(struct qib_knx *, struct qib_knx_rma *,
++				     void *, size_t, int, const char *);
++static int qib_knx_unregister_memory(struct qib_knx *, struct qib_knx_rma *,
++				     const char *);
++static __always_inline void qib_knx_memcpy(void *, void __iomem *, size_t);
++static ssize_t qib_show_knx_node(struct device *, struct device_attribute *,
++				 char *);
++static int qib_knx_sdma_init(struct qib_knx *);
++static void qib_knx_sdma_teardown(struct qib_knx *);
++static __always_inline struct page *
++qib_knx_phys_to_page(struct qib_knx *, unsigned long);
++static int qib_knx_sdma_pkts_to_descs(struct qib_knx_ctxt *,
++				      struct qib_knx_sdma_desc *,
++				      struct qib_user_sdma_queue *,
++				      int *, struct list_head *);
++static int qib_knx_sdma_poll(void *);
++static int qib_knx_tidrcv_init(struct qib_knx *);
++static int qib_knx_tidrcv_teardown(struct qib_knx *);
++
++inline struct qib_knx *qib_knx_get(u16 nodeid)
++{
++	struct qib_knx *knx = NULL;
++
++	spin_lock(&server->client_lock);
++	if (!list_empty(&server->clients))
++		list_for_each_entry(knx, &server->clients, list)
++			if (knx->peer.node == nodeid)
++				break;
++	spin_unlock(&server->client_lock);
++	return knx;
++}
++
++inline struct qib_devdata *qib_knx_node_to_dd(u16 node)
++{
++	struct qib_knx *knx = qib_knx_get(node);
++	return knx ? knx->dd : NULL;
++}
++
++static int qib_knx_init(struct qib_knx_server *server)
++{
++	int ret = 0, num_devs = 0, i, seen = 0;
++	unsigned fewest = -1U;
++	struct qib_devdata *dd = NULL, *dd_no_numa = NULL;
++	struct qib_knx *knx;
++	struct qib_device_info info = { -1 };
++
++	knx = kzalloc(sizeof(*knx), GFP_KERNEL);
++	if (!knx) {
++		ret = -ENOMEM;
++		goto bail;
++	}
++	ret = scif_accept(server->epd.epd, &knx->peer, &knx->epd.epd, 0);
++	if (ret) {
++		kfree(knx);
++		goto bail;
++	}
++
++	INIT_LIST_HEAD(&knx->list);
++	spin_lock_init(&knx->ctxt_lock);
++	knx->numa_node = -1;
++	ret = scif_pci_info(knx->peer.node, &knx->pci_info);
++	if (!ret) {
++		knx->numa_node = pcibus_to_node(knx->pci_info.pdev->bus);
++		knx->bar = pci_resource_start(knx->pci_info.pdev, 0);
++		knx->barlen = pci_resource_len(knx->pci_info.pdev, 0);
++	}
++
++	if (knx->numa_node < 0)
++		knx->numa_node = numa_node_id();
++
++	num_devs = qib_count_units(NULL, NULL);
++	if (unlikely(!num_devs)) {
++		ret = -ENODEV;
++		/* we have to send this */
++		scif_send(knx->epd.epd, &info, sizeof(info),
++			  SCIF_SEND_BLOCK);
++		goto done;
++	}
++
++	/*
++	 * Attempt to find an HCA on the same NUMA node as the card. Save
++	 * the first HCA that hasn't been associated with a card in case
++	 * there is no HCA on the same NUMA node.
++	 */
++	for (i = 0; seen < num_devs; i++) {
++		dd = qib_lookup(i);
++		if (dd) {
++			if (dd->assigned_node_id == knx->numa_node) {
++				knx->dd = dd;
++				break;
++			} else if (dd->num_knx < fewest)
++				dd_no_numa = dd;
++			seen++;
++		}
++	}
++	/*
++	 * We didn't find a QIB device on the same NUMA node,
++	 * use the "backup".
++	 */
++	if (unlikely(!knx->dd)) {
++		if (!dd_no_numa) {
++			ret = -ENODEV;
++			/* we have to send this */
++			scif_send(knx->epd.epd, &info, sizeof(info),
++				  SCIF_SEND_BLOCK);
++			goto done;
++		}
++		knx->dd = dd_no_numa;
++	}
++	knx->dd->num_knx++;
++
++	knx->ctxts = kzalloc_node(knx->dd->ctxtcnt * sizeof(*knx->ctxts),
++				  GFP_KERNEL, knx->numa_node);
++	if (!knx->ctxts)
++		ret = -ENOMEM;
++	/* Give the KNX the associated device information. */
++	info.unit = knx->dd->unit;
++	ret = scif_send(knx->epd.epd, &info, sizeof(info),
++			SCIF_SEND_BLOCK);
++
++	ret = qib_knx_sdma_init(knx);
++	if (ret)
++		goto done;
++	atomic_set(&knx->tref, 0);
++	ret = qib_knx_tidrcv_init(knx);
++done:
++	spin_lock(&server->client_lock);
++	list_add_tail(&knx->list, &server->clients);
++	server->nclients++;
++	spin_unlock(&server->client_lock);
++	try_module_get(THIS_MODULE);
++bail:
++	return ret;
++}
++
++static void qib_knx_free(struct qib_knx *knx, int unload)
++{
++	struct qib_devdata *dd = knx->dd;
++	int i;
++
++	qib_knx_tidrcv_teardown(knx);
++	qib_knx_sdma_teardown(knx);
++	if (dd)
++		dd->num_knx--;
++	/*
++	 * If this function is called with unload set, we can
++	 * free the context data. Otherwise, we are here
++	 * because the connection between the modules has broken.
++	 */
++	if (knx->ctxts && unload && dd)
++		for (i = dd->first_user_ctxt; i < dd->ctxtcnt; i++)
++			qib_knx_free_ctxtdata(dd, dd->rcd[i]);
++
++	scif_close(knx->epd.epd);
++	module_put(THIS_MODULE);
++	if (unload)
++		kfree(knx->ctxts);
++}
++
++static int qib_knx_server_listen(void *data)
++{
++	struct qib_knx_server *server =
++		(struct qib_knx_server *)data;
++	struct qib_knx *client, *ptr;
++	int ret = 0;
++
++	server->epd.epd = scif_open();
++	if (!server->epd.epd) {
++		ret = -EIO;
++		goto done;
++	}
++	server->epd.events = POLLIN;
++	ret = scif_bind(server->epd.epd, QIB_KNX_SCIF_PORT);
++	if (ret < 0)
++		goto err_close;
++
++	ret = scif_listen(server->epd.epd, qib_knx_nconns);
++	if (ret)
++		goto err_close;
++
++	while (!kthread_should_stop()) {
++		schedule();
++
++		/* poll for one millisecond. Is 50ms good? */
++		ret = scif_poll(&server->epd, 1, 50);
++		if (ret > 0)
++			ret = qib_knx_init(server);
++
++		/*
++		 * Check for any disconnected clients and clean them up.
++		 * Since there is nothing anywhere else that can change the
++		 * list, we only lock when we are deleting a client so
++		 * querying functions operate on "static" list.
++		 */
++		list_for_each_entry_safe(client, ptr, &server->clients, list) {
++			client->epd.events = POLLIN;
++			if (scif_poll(&client->epd, 1, 1)) {
++				if (client->epd.revents & POLLHUP) {
++					spin_lock(&server->client_lock);
++					list_del(&client->list);
++					spin_unlock(&server->client_lock);
++					qib_knx_free(client, 0);
++					kfree(client);
++				}
++			}
++		}
++	}
++err_close:
++	scif_close(server->epd.epd);
++done:
++	return ret;
++}
++
++
++static off_t qib_knx_register_memory(struct qib_knx *knx,
++				     struct qib_knx_rma *rma, void *kvaddr,
++				     size_t size, int prot, const char *what)
++{
++	int ret = 0;
++	off_t regoffset;
++
++	if (!kvaddr || ((unsigned long)kvaddr & ~PAGE_MASK)) {
++		ret = -EINVAL;
++		goto bail;
++	}
++	rma->kvaddr = kvaddr;
++	rma->size = size;
++
++	regoffset = scif_register(knx->epd.epd, rma->kvaddr, rma->size,
++				  0, prot, SCIF_MAP_KERNEL);
++	if (IS_ERR_VALUE(regoffset)) {
++		ret = regoffset;
++		goto bail;
++	}
++	rma->offset = regoffset;
++	return regoffset;
++bail:
++	rma->kvaddr = NULL;
++	rma->size = 0;
++	return ret;
++}
++
++static int qib_knx_unregister_memory(struct qib_knx *knx,
++				     struct qib_knx_rma *rma, const char *what)
++{
++	int ret = 0;
++
++	if (!rma) {
++		ret = -EINVAL;
++		goto done;
++	}
++	if (rma->offset)
++		ret = scif_unregister(knx->epd.epd, rma->offset, rma->size);
++	rma->kvaddr = NULL;
++	rma->size = 0;
++	rma->offset = 0;
++done:
++	return ret;
++}
++
++static __always_inline void qib_knx_memcpy(void *dst, void __iomem *src,
++					   size_t size)
++{
++	memcpy_fromio(dst, src, size);
++}
++
++int qib_knx_alloc_ctxt(u16 node_id, unsigned ctxt)
++{
++	struct qib_knx *knx = qib_knx_get(node_id);
++	struct qib_devdata *dd = knx->dd;
++	struct qib_knx_ctxt *ptr;
++	int ret = 0;
++
++	if (ctxt >= dd->ctxtcnt) {
++		ret = -EINVAL;
++		goto bail;
++	}
++	if (unlikely(!knx->ctxts)) {
++		ret = -ENOMEM;
++		goto bail;
++	}
++	ptr = kzalloc_node(sizeof(*ptr), GFP_KERNEL, knx->numa_node);
++	if (unlikely(!ptr)) {
++		ret = -ENOMEM;
++		goto bail;
++	}
++	ptr->knx = knx;
++	ptr->ctxt = ctxt;
++	ptr->ppd = dd->rcd[ctxt]->ppd;
++
++	spin_lock(&knx->ctxt_lock);
++	knx->ctxts[ctxt] = ptr;
++	dd->rcd[ctxt]->krcd = ptr;
++	spin_unlock(&knx->ctxt_lock);
++bail:
++	return ret;
++}
++
++__u64 qib_knx_ctxt_info(struct qib_ctxtdata *rcd,
++			enum qib_knx_ctxtinfo_type type,
++			struct file *fp)
++{
++	struct qib_knx *knx = rcd->krcd->knx;
++	__u16 subctxt;
++	__u64 ret = 0;
++
++	spin_lock(&knx->ctxt_lock);
++	if (!knx || !knx->ctxts || !knx->ctxts[rcd->ctxt])
++		goto done;
++
++	switch (type) {
++	case QIB_KNX_CTXTINFO_UREG:
++		ret = knx->ctxts[rcd->ctxt]->uregbase;
++		break;
++	case QIB_KNX_CTXTINFO_PIOAVAIL:
++		ret = knx->ctxts[rcd->ctxt]->pioavailaddr;
++		break;
++	case QIB_KNX_CTXTINFO_STATUS:
++		ret = knx->ctxts[rcd->ctxt]->status;
++		break;
++	case QIB_KNX_CTXTINFO_PIOBUFBASE:
++		subctxt = fp ? subctxt_fp(fp) : 0;
++		ret = knx->ctxts[rcd->ctxt]->piobufbase[subctxt];
++		break;
++	case QIB_KNX_CTXTINFO_FLAGS:
++		ret = knx->ctxts[rcd->ctxt]->runtime_flags;
++		break;
++	}
++done:
++	spin_unlock(&knx->ctxt_lock);
++	return ret;
++}
++
++int qib_knx_setup_piobufs(struct qib_devdata *dd, struct qib_ctxtdata *rcd,
++			  __u16 subctxt)
++{
++	unsigned piobufs, piocnt;
++	char buf[16];
++	off_t offset;
++	int ret = 0;
++	struct qib_knx *knx = rcd->krcd->knx;
++
++	if (unlikely(!knx)) {
++		ret = -ENODEV;
++		goto bail;
++	}
++	if (unlikely(!knx->ctxts[rcd->ctxt])) {
++		ret = -EINVAL;
++		goto bail;
++	}
++
++	/*
++	 * We don't calculate piobufs based on the rcd->piobufs like
++	 * everywhere else in the driver because rcd->piobufs is based
++	 * on the 2K PIO buffer virtual address. We just need an offset.
++	 */
++	piobufs = rcd->pio_base * dd->palign;
++	if (!rcd->subctxt_cnt)
++		piocnt = rcd->piocnt;
++	else if (!subctxt) {
++		piocnt = (rcd->piocnt / rcd->subctxt_cnt) +
++			(rcd->piocnt % rcd->subctxt_cnt);
++		piobufs += dd->palign * (rcd->piocnt - piocnt);
++	} else {
++		piocnt = rcd->piocnt / rcd->subctxt_cnt;
++		piobufs += dd->palign * piocnt * (subctxt - 1);
++	}
++
++	/* register PIO buffers */
++	snprintf(buf, sizeof(buf), "PIO bufs %u:%u", rcd->ctxt, subctxt);
++	offset = qib_knx_register_memory(
++		knx, &knx->ctxts[rcd->ctxt]->piobufs[subctxt],
++		dd->piobase + piobufs, piocnt * dd->palign,
++		SCIF_PROT_WRITE, buf);
++	if (IS_ERR_VALUE(offset)) {
++		ret = offset;
++		goto bail;
++	}
++	knx->ctxts[rcd->ctxt]->piobufbase[subctxt] = offset;
++bail:
++	return ret;
++}
++
++int qib_knx_setup_pioregs(struct qib_devdata *dd, struct qib_ctxtdata *rcd,
++			  struct qib_base_info *binfo)
++{
++	int ret = 0;
++	off_t offset;
++	struct qib_knx *knx = rcd->krcd->knx;
++
++	if (unlikely(!knx)) {
++		ret = -ENODEV;
++		goto bail;
++	}
++	if (unlikely(!knx->ctxts[rcd->ctxt])) {
++		ret = -EINVAL;
++		goto bail;
++	}
++
++	/* register the user registers to remote mapping */
++	offset = qib_knx_register_memory(knx, &knx->ctxts[rcd->ctxt]->uregs,
++					 (char *)dd->userbase +
++					 (dd->ureg_align * rcd->ctxt),
++					 dd->flags & QIB_HAS_HDRSUPP ?
++					 2 * PAGE_SIZE : PAGE_SIZE,
++					 SCIF_PROT_READ|SCIF_PROT_WRITE,
++					 "UserRegs");
++	if (IS_ERR_VALUE(offset)) {
++		ret = offset;
++		goto bail;
++	}
++	knx->ctxts[rcd->ctxt]->uregbase = offset;
++
++	/*
++	 * register the PIO availability registers.
++	 * user status 64bit values are part of the page containing the
++	 * pio availability registers.
++	 */
++	offset = qib_knx_register_memory(knx, &knx->ctxts[rcd->ctxt]->pioavail,
++					 (void *)dd->pioavailregs_dma,
++					 PAGE_SIZE, SCIF_PROT_READ,
++					 "pioavail regs");
++	if (IS_ERR_VALUE(offset)) {
++		ret = offset;
++		goto bail_uregs;
++	}
++	knx->ctxts[rcd->ctxt]->pioavailaddr = offset;
++	/*
++	 * User status bitmask is part of the same mapped page as the PIO
++	 * availability bits and user level code should know that. Therefore,
++	 * we just need to give it the offset into the mapped page where the
++	 * status mask is located.
++	 */
++	knx->ctxts[rcd->ctxt]->status = offset;
++	/* Record the run time flags that were passed in by the user. */
++	knx->ctxts[rcd->ctxt]->runtime_flags = binfo->spi_runtime_flags;
++	goto bail;
++bail_uregs:
++	qib_knx_unregister_memory(knx, &knx->ctxts[rcd->ctxt]->uregs,
++				  "UserRegs");
++bail:
++	return ret;
++}
++
++int qib_knx_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd,
++			   struct qib_base_info *binfo)
++{
++	struct qib_knx_mem_map_sg *mapsg;
++	struct qib_knx_mem_map *map;
++	struct qib_knx *knx = rcd->krcd->knx;
++	dma_addr_t offset;
++	struct scatterlist *sg;
++	unsigned num_pages;
++	size_t size;
++	int ret = 0, i;
++
++	if (unlikely(!knx)) {
++		ret = -ENODEV;
++		goto bail;
++	}
++	if (unlikely(!knx->ctxts[rcd->ctxt])) {
++		ret = -EINVAL;
++		goto bail;
++	}
++	if (unlikely(!binfo->spi_rcvhdr_base)) {
++		ret = -EIO;
++		goto bail;
++	}
++
++	size = ALIGN(dd->rcvhdrcnt * dd->rcvhdrentsize *
++		     sizeof(u32), PAGE_SIZE);
++	mapsg = &knx->ctxts[rcd->ctxt]->rcvhdrq;
++	ret = scif_get_pages(knx->epd.epd, binfo->spi_rcvhdr_base,
++			     size, &mapsg->pages);
++	if (ret)
++		goto bail;
++	if (!mapsg->pages->nr_pages) {
++		rcd->rcvhdrq = NULL;
++		ret = -ENOMEM;
++		goto bail_rcvq_pages;
++	}
++	num_pages = mapsg->pages->nr_pages;
++	if (num_pages * PAGE_SIZE != size) {
++		ret = -EINVAL;
++		goto bail_rcvq_pages;
++	}
++	rcd->rcvhdrq_size = size;
++	/* verify that rcvhdr q is contiguous */
++	offset = mapsg->pages->phys_addr[0];
++	for (i = 1; i < num_pages; i++) {
++		if (offset + PAGE_SIZE != mapsg->pages->phys_addr[i]) {
++			ret = -EFAULT;
++			goto bail_rcvq_pages;
++		}
++		offset += PAGE_SIZE;
++	}
++	memset(mapsg->pages->va[0], 0, size);
++	mapsg->size = size;
++	mapsg->dir = DMA_FROM_DEVICE;
++	/*
++	 * Streaming DMa mappings are supposed to be short-lived.
++	 * The mappings here are not exactly short-lived and
++	 * technically we might not even need them since SusieQ
++	 * can use 64bit addresses for DMA but the CPU might not.
++	 * (see pci_set_dma_mask() in qib_pcie.c).
++	 */
++	mapsg->sglist = kzalloc_node(num_pages * sizeof(*mapsg->sglist),
++				     GFP_KERNEL, knx->numa_node);
++	if (!mapsg->sglist) {
++		ret = -ENOMEM;
++		goto bail_rcvq_pages;
++	}
++	sg_init_table(mapsg->sglist, num_pages);
++	for_each_sg(mapsg->sglist, sg, num_pages, i)
++		sg_set_page(sg, vmalloc_to_page(mapsg->pages->va[i]), PAGE_SIZE,
++			    0);
++	ret = pci_map_sg(dd->pcidev, mapsg->sglist, num_pages, mapsg->dir);
++	if (!ret) {
++		rcd->rcvhdrq_phys = 0;
++		goto bail_free_sgtable;
++	}
++	/*
++	 * pci_map_sg() will remap all 128 pages of the
++	 * scatterlist separately (without coalescing them).
++	 * However, since the buffer is contiguous, as long
++	 * as the base address is mapped correctly, everything
++	 * should work. In any case, check that the mapped
++	 * addresses are contiguous anyway.
++	 */
++	offset = sg_dma_address(mapsg->sglist);
++	for_each_sg(mapsg->sglist, sg, num_pages, i) {
++		dma_addr_t sgaddr;
++		sgaddr = sg_dma_address(sg);
++		if ((offset == sgaddr && i) ||
++		    (offset != sgaddr && sgaddr != offset + PAGE_SIZE)) {
++			ret = -EINVAL;
++			goto bail_rcvhdrq;
++		}
++		offset = sgaddr;
++	}
++	rcd->rcvhdrq_phys = sg_dma_address(mapsg->sglist);
++	rcd->rcvhdrq = mapsg->pages->va[0];
++
++	map = &knx->ctxts[rcd->ctxt]->sbufstatus;
++	ret = scif_get_pages(knx->epd.epd, binfo->spi_sendbuf_status,
++			     PAGE_SIZE, &map->pages);
++	if (ret)
++		goto bail_rcvhdrq;
++
++	map->size = PAGE_SIZE;
++	if (map->pages->nr_pages > 0) {
++		rcd->user_event_mask = map->pages->va[0];
++		/*
++		 * clear the mapped page - this is important as it will cause
++		 * user level to request "invalid" updates on every PIO send.
++		 */
++		memset(rcd->user_event_mask, 0, PAGE_SIZE);
++	}
++	/*
++	 * Map the rcvhdrtailaddr page(s) if we are goign to DMA the tail
++	 * register to memory, the chip will be prgrammed when
++	 * qib_do_user_init() calls f_rcvctrl().
++	 */
++	if (!(dd->flags & QIB_NODMA_RTAIL) && binfo->spi_rcvhdr_tailaddr) {
++		map = &knx->ctxts[rcd->ctxt]->rcvhdrqtailaddr;
++		ret = scif_get_pages(knx->epd.epd, binfo->spi_rcvhdr_tailaddr,
++				     PAGE_SIZE, &map->pages);
++		if (ret)
++			goto bail_umask;
++		map->size = PAGE_SIZE;
++		map->dir = DMA_FROM_DEVICE;
++		/* don't reuse num_pages in case there is an error */
++		if (map->pages->nr_pages > 0) {
++			rcd->rcvhdrqtailaddr_phys =
++				pci_map_page(dd->pcidev,
++					     vmalloc_to_page(map->pages->va[0]),
++					     0, map->size, map->dir);
++			if (pci_dma_mapping_error(dd->pcidev,
++						  rcd->rcvhdrqtailaddr_phys)) {
++				rcd->rcvhdrqtailaddr_phys = 0;
++				ret = -ENOMEM;
++				goto bail_tail;
++			}
++			rcd->rcvhdrtail_kvaddr = map->pages->va[0];
++			/* clear, just in case... */
++			memset(rcd->rcvhdrtail_kvaddr, 0, map->size);
++			map->dma_mapped_addr =
++				rcd->rcvhdrqtailaddr_phys;
++			knx->ctxts[rcd->ctxt]->runtime_flags &=
++				~QIB_RUNTIME_NODMA_RTAIL;
++		}
++	}
++	ret = 0;
++	goto bail;
++bail_tail:
++	scif_put_pages(knx->ctxts[rcd->ctxt]->rcvhdrqtailaddr.pages);
++bail_umask:
++	rcd->user_event_mask = NULL;
++	scif_put_pages(knx->ctxts[rcd->ctxt]->sbufstatus.pages);
++bail_rcvhdrq:
++	rcd->rcvhdrq = NULL;
++	pci_unmap_sg(dd->pcidev, knx->ctxts[rcd->ctxt]->rcvhdrq.sglist,
++		     num_pages, knx->ctxts[rcd->ctxt]->rcvhdrq.dir);
++bail_free_sgtable:
++	kfree(knx->ctxts[rcd->ctxt]->rcvhdrq.sglist);
++bail_rcvq_pages:
++	scif_put_pages(knx->ctxts[rcd->ctxt]->rcvhdrq.pages);
++bail:
++	return ret;
++}
++
++int qib_knx_setup_eagerbufs(struct qib_ctxtdata *rcd,
++			    struct qib_base_info *binfo)
++{
++	struct qib_knx_mem_map_sg *map;
++	struct scatterlist *sg;
++	struct qib_devdata *dd = rcd->dd;
++	struct qib_knx *knx = rcd->krcd->knx;
++	unsigned size, egrsize, egrcnt, num_pages, bufs_ppage,
++		egrbufcnt;
++	dma_addr_t dma_addr, page;
++	int ret = -ENOMEM, i, bufcnt;
++
++	if (unlikely(!knx)) {
++		ret = -ENODEV;
++		goto bail;
++	}
++	if (unlikely(!knx->ctxts[rcd->ctxt])) {
++		ret = -EINVAL;
++		goto bail;
++	}
++	if (unlikely(!binfo->spi_rcv_egrbufs)) {
++		ret = -ENOBUFS;
++		goto bail;
++	}
++	size = binfo->spi_rcv_egrbuftotlen;
++	egrsize = dd->rcvegrbufsize;
++	egrcnt = rcd->rcvegrcnt;
++
++	/*
++	 * Check whether the total size of the buffer is enough for all
++	 * Eager buffers.
++	 */
++	if (size < egrsize * egrcnt) {
++		ret = -EINVAL;
++		goto bail;
++	}
++
++	/* number of pages required to fit all the eager buffers */
++	num_pages = (egrsize * egrcnt) / PAGE_SIZE;
++	/* number of buffers per page (depends on MTU) */
++	bufs_ppage = PAGE_SIZE / egrsize;
++	map = &knx->ctxts[rcd->ctxt]->eagerbufs;
++	ret = scif_get_pages(knx->epd.epd, binfo->spi_rcv_egrbufs,
++			     size, &map->pages);
++	if (ret)
++		goto bail;
++
++	if (map->pages->nr_pages != num_pages) {
++		ret = -EINVAL;
++		goto bail_free_scif;
++	}
++
++	/*
++	 * Allocate pointer to the pages from the KNX memory.
++	 * In the case of KNX eager buffers, we are not dealing with
++	 * 32K chunks of locally allocated memory. Therefore, we
++	 * allocate num_pages pointers instead of rcd->rcvegrbuf_chunks.
++	 */
++	if (likely(!rcd->rcvegrbuf)) {
++		rcd->rcvegrbuf = kzalloc_node(num_pages *
++					      sizeof(rcd->rcvegrbuf[0]),
++					      GFP_KERNEL, rcd->node_id);
++		if (!rcd->rcvegrbuf) {
++			ret = -ENOMEM;
++			goto bail_free_scif;
++		}
++	}
++
++	/*
++	 * Allocate array of DMA addresses for each of the mapped
++	 * pages.
++	 */
++	if (likely(!rcd->rcvegrbuf_phys)) {
++		rcd->rcvegrbuf_phys =
++			kzalloc_node(num_pages * sizeof(rcd->rcvegrbuf_phys[0]),
++				     GFP_KERNEL, rcd->node_id);
++		if (!rcd->rcvegrbuf_phys) {
++			ret = -ENOMEM;
++			goto bail_free_rcvegr;
++		}
++	}
++
++	map->size = size;
++	map->dir = DMA_BIDIRECTIONAL;
++	map->sglist = kzalloc_node(num_pages * sizeof(*map->sglist), GFP_KERNEL,
++				   knx->numa_node);
++	if (!map->sglist) {
++		ret = -ENOMEM;
++		goto bail_free_rcvegr_phys;
++	}
++	sg_init_table(map->sglist, num_pages);
++	for_each_sg(map->sglist, sg, num_pages, i) {
++		memset(map->pages->va[i], 0, PAGE_SIZE);
++		sg_set_page(sg, vmalloc_to_page(map->pages->va[i]),
++			    PAGE_SIZE, 0);
++	}
++	ret = pci_map_sg(dd->pcidev, map->sglist, num_pages, map->dir);
++	if (!ret) {
++		ret = -ENOMEM;
++		goto bail_free_rcvegr_phys;
++	}
++	for_each_sg(map->sglist, sg, num_pages, i) {
++		rcd->rcvegrbuf_phys[i] = sg_dma_address(sg);
++		rcd->rcvegrbuf[i] = map->pages->va[i];
++	}
++
++	for (egrbufcnt = i = 0; i < num_pages; i++) {
++		page = rcd->rcvegrbuf_phys[i];
++		dma_addr = page;
++		for (bufcnt = 0; egrbufcnt < egrcnt && bufcnt < bufs_ppage;
++		     egrbufcnt++, bufcnt++) {
++			dd->f_put_tid(dd, rcd->rcvegr_tid_base +
++					   egrbufcnt +
++					   (u64 __iomem *)((char __iomem *)
++							   dd->kregbase +
++							   dd->rcvegrbase),
++					   RCVHQ_RCV_TYPE_EAGER, dma_addr);
++			dma_addr += egrsize;
++		}
++	}
++	ret = 0;
++	goto bail;
++bail_free_rcvegr_phys:
++	kfree(map->sglist);
++	kfree(rcd->rcvegrbuf_phys);
++	rcd->rcvegrbuf_phys = NULL;
++bail_free_rcvegr:
++	kfree(rcd->rcvegrbuf);
++	rcd->rcvegrbuf = NULL;
++bail_free_scif:
++	scif_put_pages(map->pages);
++bail:
++	return ret;
++}
++
++void qib_knx_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
++{
++	struct qib_knx *knx = rcd->krcd->knx;
++	struct qib_knx_ctxt *ctxt;
++	char buf[16];
++	int i, ret = 0;
++
++	if (!rcd || !knx || !knx->ctxts)
++		return;
++
++	spin_lock(&knx->ctxt_lock);
++	ctxt = knx->ctxts[rcd->ctxt];
++	knx->ctxts[rcd->ctxt] = NULL;
++	spin_unlock(&knx->ctxt_lock);
++
++	if (!ctxt)
++		return;
++
++	if (rcd->rcvhdrq) {
++		/* Unmap the RcvHdr Q */
++		pci_unmap_sg(dd->pcidev, ctxt->rcvhdrq.sglist,
++			     ctxt->rcvhdrq.pages->nr_pages,
++			     ctxt->rcvhdrq.dir);
++		/* TODO: do something with return value */
++		ret = scif_put_pages(ctxt->rcvhdrq.pages);
++		kfree(ctxt->rcvhdrq.sglist);
++	}
++
++	if (rcd->user_event_mask)
++		/* TODO: do something with return value */
++		ret = scif_put_pages(ctxt->sbufstatus.pages);
++
++	if (rcd->rcvhdrtail_kvaddr) {
++		pci_unmap_page(dd->pcidev,
++			       ctxt->rcvhdrqtailaddr.dma_mapped_addr,
++			       ctxt->rcvhdrqtailaddr.size,
++			       ctxt->rcvhdrqtailaddr.dir);
++		/* TODO: do something with return value */
++		ret = scif_put_pages(ctxt->rcvhdrqtailaddr.pages);
++	}
++
++	if (rcd->rcvegrbuf) {
++		pci_unmap_sg(dd->pcidev, ctxt->eagerbufs.sglist,
++			     ctxt->eagerbufs.pages->nr_pages,
++			     ctxt->eagerbufs.dir);
++		/* TODO: do something with return value */
++		ret = scif_put_pages(ctxt->eagerbufs.pages);
++		kfree(ctxt->eagerbufs.sglist);
++		kfree(rcd->rcvegrbuf);
++		kfree(rcd->rcvegrbuf_phys);
++	}
++
++	/* We are done with all remote memory, handle local */
++	qib_knx_unregister_memory(knx, &ctxt->pioavail, "pioavail regs");
++	qib_knx_unregister_memory(knx, &ctxt->uregs, "UserRegs");
++	for (i = 0; i < QLOGIC_IB_MAX_SUBCTXT; i++) {
++		snprintf(buf, sizeof(buf), "PIO bufs %u:%u", rcd->ctxt, i);
++		qib_knx_unregister_memory(knx, &ctxt->piobufs[i], buf);
++	}
++
++	kfree(ctxt);
++	kfree(rcd);
++}
++
++/*
++ * TID management for processes on the MIC happens on the MIC. Therefore,
++ * we only register the HW TID array here.
++ * The MIC will calculate TID array offsets using the same algorithm is
++ * the host. Therefore, it is OK that the entire HW TID array is mapped
++ * since neither side should step on the other.
++ */
++static int qib_knx_tidrcv_init(struct qib_knx *knx)
++{
++	struct qib_devdata *dd = knx->dd;
++	struct qib_knx_tid_info info;
++	void *tidbase;
++	int ret = 0;
++	off_t offset = 0;
++	size_t len;
++	char buf[64];
++
++	memset(&info, 0, sizeof(info));
++
++	info.tidcnt = dd->rcvtidcnt;
++	tidbase = ((char *)dd->kregbase + dd->rcvtidbase);
++	info.tidbase_len = dd->ctxtcnt * dd->rcvtidcnt * sizeof(tidbase);
++	info.tidtemplate = dd->tidtemplate;
++	info.invalidtid = dd->tidinvalid;
++	/* information needed to properly calculate DMA address to MIC pages */
++	info.bar_addr = knx->bar;
++	info.bar_len = knx->barlen;
++
++	snprintf(buf, sizeof(buf), "TID array KNx%u", knx->peer.node);
++	offset = qib_knx_register_memory(knx, &knx->tidmem, tidbase,
++					 info.tidbase_len, SCIF_PROT_WRITE,
++					 buf);
++	info.tidbase_offset = offset;
++	if (IS_ERR_VALUE(offset))
++		ret = offset;
++	len = scif_send(knx->epd.epd, &info, sizeof(info),
++			SCIF_SEND_BLOCK);
++	if (len < sizeof(info))
++		ret = -EFAULT;
++	return ret;
++}
++
++static int qib_knx_tidrcv_teardown(struct qib_knx *knx)
++{
++	char buf[64];
++	snprintf(buf, sizeof(buf), "TID array KNx%u", knx->peer.node);
++	return qib_knx_unregister_memory(knx, &knx->tidmem, buf);
++}
++
++static int qib_knx_sdma_init(struct qib_knx *knx)
++{
++	struct qib_knx_host_mem flags;
++	struct qib_knx_knc_mem mflags;
++	struct qib_knx_sdma *sdma;
++	char buf[64];
++	int ret = 0;
++
++	sdma = kzalloc_node(sizeof(*sdma), GFP_KERNEL, knx->numa_node);
++	if (!sdma) {
++		ret = -ENOMEM;
++		goto done;
++	}
++	sdma->hflags = kzalloc_node(PAGE_SIZE, GFP_KERNEL, knx->numa_node);
++	if (!sdma->hflags) {
++		ret = -ENOMEM;
++		goto done_free;
++	}
++	snprintf(buf, sizeof(buf), "Host SDMA flags KNx%u", knx->peer.node);
++	flags.flags_offset = qib_knx_register_memory(knx, &sdma->hflags_mem,
++						     sdma->hflags,
++						     PAGE_SIZE,
++						     SCIF_PROT_WRITE,
++						     buf);
++	if (IS_ERR_VALUE(flags.flags_offset)) {
++		ret = flags.flags_offset;
++		goto free_flags;
++	}
++	sdma->desc_num = knx->dd->pport[0].sdma_descq_cnt;
++	flags.desc_num = sdma->desc_num;
++	ret = scif_send(knx->epd.epd, &flags, sizeof(flags),
++			SCIF_SEND_BLOCK);
++	if (ret < sizeof(flags))
++		goto unregister;
++	ret = scif_recv(knx->epd.epd, &mflags, sizeof(mflags),
++			SCIF_RECV_BLOCK);
++	if (ret < sizeof(mflags)) {
++		ret = -EINVAL;
++		goto unregister;
++	}
++	ret = scif_get_pages(knx->epd.epd, mflags.flags_offset,
++			     PAGE_SIZE, &sdma->mflag_pages);
++	if (ret < 0 || !sdma->mflag_pages->nr_pages) {
++		ret = -EFAULT;
++		goto unregister;
++	}
++	sdma->mflags = sdma->mflag_pages->va[0];
++	ret = scif_get_pages(knx->epd.epd, mflags.queue_offset,
++			     mflags.queue_len, &sdma->queue_pages);
++	if (ret < 0)
++		goto put_flags;
++	if ((sdma->queue_pages->nr_pages * PAGE_SIZE) !=
++	    mflags.queue_len) {
++		ret = -EFAULT;
++		goto put_queue;
++	}
++	sdma->queue = sdma->queue_pages->va[0];
++	sdma->complete = -1;
++	sdma->head = -1;
++	/* set the initial trigger value */
++	QIB_KNX_SDMA_SET(sdma->hflags->trigger, -1);
++	QIB_KNX_SDMA_SET(sdma->mflags->complete, sdma->complete);
++	snprintf(knx->tname, sizeof(knx->tname), "qib/mic%u/poll",
++		 knx->peer.node);
++	knx->sdma = sdma;
++	ret = 0;
++	goto done;
++put_queue:
++	scif_put_pages(sdma->queue_pages);
++put_flags:
++	scif_put_pages(sdma->mflag_pages);
++unregister:
++	qib_knx_unregister_memory(knx, &sdma->hflags_mem, buf);
++free_flags:
++	kfree(sdma->hflags);
++done_free:
++	kfree(sdma);
++done:
++	/*
++	 * we have to respond to the MIC so it doesn't get stuck
++	 * in the scif_recv call
++	 */
++	scif_send(knx->epd.epd, &ret, sizeof(ret), SCIF_SEND_BLOCK);
++	return ret;
++}
++
++static void qib_knx_sdma_teardown(struct qib_knx *knx)
++{
++	int ret;
++	if (knx->sdma_poll)
++		ret = kthread_stop(knx->sdma_poll);
++	if (knx->sdma) {
++		if (knx->sdma->queue_pages->nr_pages) {
++			knx->sdma->queue = NULL;
++			scif_put_pages(knx->sdma->queue_pages);
++		}
++		if (knx->sdma->mflag_pages->nr_pages) {
++			knx->sdma->mflags = NULL;
++			scif_put_pages(knx->sdma->mflag_pages);
++		}
++		kfree(knx->sdma->hflags);
++		kfree(knx->sdma);
++		knx->sdma = NULL;
++	}
++}
++
++int qib_knx_sdma_queue_create(struct file *fd)
++{
++	struct qib_ctxtdata *rcd = ctxt_fp(fd);
++	struct qib_devdata *dd = rcd->dd;
++	struct qib_knx *knx = rcd->krcd->knx;
++	struct qib_knx_ctxt *ctxt = knx->ctxts[rcd->ctxt];
++	u8 subctxt = subctxt_fp(fd);
++	int ret = 0;
++
++	if (!ctxt) {
++		ret = -EINVAL;
++		goto done;
++	}
++	ctxt->pq[subctxt] = qib_user_sdma_queue_create(&dd->pcidev->dev,
++						       dd->unit, rcd->ctxt,
++						       subctxt);
++	if (!ctxt->pq[subctxt])
++		ret = -ENOMEM;
++	user_sdma_queue_fp(fd) = ctxt->pq[subctxt];
++	/*
++	 * We start the polling thread the first time a user SDMA
++	 * queue is created. There is no reason to take up CPU
++	 * cycles before then.
++	 */
++	if (atomic_inc_return(&knx->tref) == 1) {
++		knx->sdma_poll = kthread_run(qib_knx_sdma_poll, knx,
++					     knx->tname);
++		if (IS_ERR(knx->sdma_poll)) {
++			ret = -PTR_ERR(knx->sdma_poll);
++			atomic_dec(&knx->tref);
++			goto free_queue;
++		}
++	}
++	goto done;
++free_queue:
++	user_sdma_queue_fp(fd) = NULL;
++	qib_user_sdma_queue_destroy(ctxt->pq[subctxt]);
++	ctxt->pq[subctxt] = NULL;
++done:
++	return ret;
++}
++
++void qib_knx_sdma_queue_destroy(struct qib_filedata *fd)
++{
++	struct qib_ctxtdata *rcd = fd->rcd;
++	struct qib_knx *knx;
++	unsigned ctxt = rcd->ctxt, subctxt = fd->subctxt;
++
++	/* Host processes do not have a KNX rcd pointer. */
++	if (!rcd->krcd)
++		return;
++	knx = rcd->krcd->knx;
++	/* We still have the memory pointer through fd->pq */
++	spin_lock(&knx->ctxt_lock);
++	if (knx->ctxts[ctxt])
++		knx->ctxts[ctxt]->pq[subctxt] = NULL;
++	spin_unlock(&knx->ctxt_lock);
++	if (atomic_dec_and_test(&knx->tref)) {
++		int ret = kthread_stop(knx->sdma_poll);
++		knx->sdma_poll = NULL;
++	}
++}
++
++/*
++ * Convert a MIC physical address to the corresponding host page.
++ */
++static __always_inline struct page *
++qib_knx_phys_to_page(struct qib_knx *knx, unsigned long addr) {
++	unsigned long paddr;
++	if ((knx->bar + addr + PAGE_SIZE) >
++	    (knx->bar + knx->barlen))
++		return NULL;
++	paddr = knx->bar + addr;
++	return pfn_to_page(paddr >> PAGE_SHIFT);
++}
++
++static int qib_knx_sdma_pkts_to_descs(struct qib_knx_ctxt *ctxt,
++				      struct qib_knx_sdma_desc *desc,
++				      struct qib_user_sdma_queue *pq,
++				      int *ndesc, struct list_head *list)
++{
++	struct qib_knx *knx = ctxt->knx;
++	struct qib_user_sdma_pkt *pkt;
++	dma_addr_t pbc_dma_addr;
++	unsigned pktnw, pbcnw;
++	u32 counter;
++	u16 frag_size;
++	int ret = 0;
++	__le32 *pbc;
++
++	counter = pq->counter;
++
++	pbc = qib_user_sdma_alloc_header(pq, desc->pbclen, &pbc_dma_addr);
++	if (!pbc) {
++		ret = -ENOMEM;
++		goto done;
++	}
++	memcpy(pbc, desc->pbc, desc->pbclen);
++
++	pktnw = (le32_to_cpu(*pbc) & 0xFFFF);
++	/*
++	 * This assignment is a bit strange.  it's because the
++	 * the pbc counts the number of 32 bit words in the full
++	 * packet _except_ the first word of the pbc itself...
++	 */
++	pbcnw = (desc->pbclen >> 2) - 1;
++
++	if (pktnw < pbcnw) {
++		ret = -EINVAL;
++		goto free_pbc;
++	}
++
++	if (pktnw != ((desc->length >> 2) + pbcnw)) {
++		ret = -EINVAL;
++		goto free_pbc;
++	}
++
++	frag_size = (le32_to_cpu(*pbc)>>16) & 0xFFFF;
++	if (((frag_size ? frag_size : desc->length) + desc->pbclen) >
++	    ctxt->ppd->ibmaxlen) {
++		ret = -EINVAL;
++		goto free_pbc;
++	}
++	if (frag_size) {
++		/* new SDMA "protocol" */
++		unsigned pktsize, n;
++
++		n = desc->npages * ((2 * PAGE_SIZE / frag_size) + 1);
++		pktsize = sizeof(*pkt) + sizeof(pkt->addr[0]) * n;
++
++		pkt = kzalloc(pktsize + desc->tidlen, GFP_KERNEL);
++		if (!pkt) {
++			ret = -ENOMEM;
++			goto free_pbc;
++		}
++		pkt->largepkt = 1;
++		pkt->frag_size = frag_size;
++		pkt->addrlimit = n + ARRAY_SIZE(pkt->addr);
++
++		if (desc->tidlen) {
++			char *tidsmptr = (char *)pkt + pktsize;
++			memcpy(tidsmptr, desc->tidsm, desc->tidlen);
++			pkt->tidsm =
++				(struct qib_tid_session_member *)tidsmptr;
++			pkt->tidsmcount = desc->tidlen /
++				sizeof(*desc->tidsm);
++			pkt->tidsmidx = 0;
++		}
++		*pbc = cpu_to_le32(le32_to_cpu(*pbc) & 0x0000FFFF);
++	} else {
++		/* old SDMA */
++		pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
++		if (!pkt) {
++			ret = -ENOMEM;
++			goto free_pbc;
++		}
++		pkt->largepkt = 0;
++		pkt->frag_size = desc->length;
++		pkt->addrlimit = ARRAY_SIZE(pkt->addr);
++	}
++	pkt->bytes_togo = desc->length;
++	pkt->payload_size = 0;
++	pkt->counter = counter;
++	pkt->tiddma = !!desc->tidlen;
++	/*
++	 * The generic user SDMA code will use this as a flag to
++	 * decide whether to call the KNx-specific pkt free
++	 * function. However, it doesn't know what the value
++	 * actually means.
++	 */
++	pkt->remote = (u64)knx;
++
++	qib_user_sdma_init_frag(pkt, 0,
++				0, desc->pbclen,
++				1, 0,
++				0, 0,
++				NULL, pbc,
++				pbc_dma_addr, desc->pbclen);
++	pkt->index = 0;
++	pkt->naddr = 1;
++
++	if (desc->npages) {
++		/* we have user data */
++		int i;
++		struct page *page;
++		unsigned plen = 0, len = desc->length;
++		for (i = 0; i < desc->npages; i++) {
++			unsigned long off = (i == 0 ? desc->offset : 0);
++			plen = (len > PAGE_SIZE ? PAGE_SIZE : len);
++			page = qib_knx_phys_to_page(knx, desc->pages[i]);
++			ret = qib_user_sdma_page_to_frags(knx->dd, pq,
++				   pkt, page, 0, off,
++				   (off + plen > PAGE_SIZE ?
++				    PAGE_SIZE - off : plen),
++				   NULL);
++			if (ret < 0)
++				goto free_sdma;
++			len -= plen - off;
++		}
++	} else {
++		pkt->addr[0].last_desc = 1;
++		if (pbc_dma_addr == 0) {
++			pbc_dma_addr = dma_map_single(&knx->dd->pcidev->dev,
++						      pbc, desc->pbclen,
++						      DMA_TO_DEVICE);
++			if (dma_mapping_error(&knx->dd->pcidev->dev,
++					      pbc_dma_addr)) {
++				ret = -ENOMEM;
++				goto free_sdma;
++			}
++			pkt->addr[0].addr = pbc_dma_addr;
++			pkt->addr[0].dma_mapped = 1;
++		}
++	}
++	counter++;
++	pkt->pq = pq;
++	pkt->index = 0;
++	*ndesc = pkt->naddr;
++
++	list_add_tail(&pkt->list, list);
++	goto done;
++free_sdma:
++	if (pkt->largepkt)
++		kfree(pkt);
++	else
++		kmem_cache_free(pq->pkt_slab, pkt);
++free_pbc:
++	if (pbc_dma_addr)
++		dma_pool_free(pq->header_cache, pbc, pbc_dma_addr);
++	else
++		kfree(pbc);
++done:
++	return ret;
++}
++
++void qib_knx_sdma_free_pkt(struct qib_user_sdma_pkt *pkt)
++{
++	struct qib_knx *knx = (struct qib_knx *)pkt->remote;
++	struct qib_knx_sdma *sdma = knx->sdma;
++	sdma_next(sdma, complete);
++	QIB_KNX_SDMA_SET(sdma->mflags->complete, sdma->complete);
++}
++
++static int qib_knx_sdma_poll(void *data)
++{
++	struct qib_knx *knx = (struct qib_knx *)data;
++	struct qib_knx_ctxt *ctxt;
++	struct qib_knx_sdma_desc desc;
++	struct qib_knx_sdma *sdma = knx->sdma;
++	struct qib_user_sdma_queue *pq;
++	struct list_head list;
++	u32 new_head;
++	int ret = 0, ndesc = 0, added;
++
++	if (!sdma)
++		return -EFAULT;
++
++	while (!kthread_should_stop()) {
++		added = 0;
++		new_head = QIB_KNX_SDMA_VALUE(sdma->hflags->trigger);
++		while (sdma->head != new_head) {
++			knx_sdma_next(sdma);
++			qib_knx_memcpy(&desc, sdma->queue + sdma->head,
++				       sizeof(desc));
++			if (!desc.ctxt) {
++				QIB_KNX_SDMA_STATUS(sdma, -EINVAL);
++				continue;
++			}
++			spin_lock(&knx->ctxt_lock);
++			ctxt = knx->ctxts[desc.ctxt];
++			if (!ctxt) {
++				/* we should never get here */
++				QIB_KNX_SDMA_STATUS(sdma, -EINVAL);
++				goto done_unlock;
++			}
++			pq = ctxt->pq[desc.subctxt];
++			if (!pq) {
++				QIB_KNX_SDMA_STATUS(sdma, -EFAULT);
++				goto done_unlock;
++			}
++			mutex_lock(&pq->lock);
++			if (pq->added > ctxt->ppd->sdma_descq_removed)
++				qib_user_sdma_hwqueue_clean(ctxt->ppd);
++			if (pq->num_sending)
++				qib_user_sdma_queue_clean(ctxt->ppd, pq);
++
++			INIT_LIST_HEAD(&list);
++			ret = qib_knx_sdma_pkts_to_descs(ctxt, &desc, pq,
++							 &ndesc, &list);
++			QIB_KNX_SDMA_STATUS(sdma, ret);
++			if (!list_empty(&list)) {
++				if (qib_sdma_descq_freecnt(ctxt->ppd) <
++				    ndesc) {
++					qib_user_sdma_hwqueue_clean(
++						ctxt->ppd);
++					if (pq->num_sending)
++						qib_user_sdma_queue_clean(
++							ctxt->ppd, pq);
++				}
++				ret = qib_user_sdma_push_pkts(ctxt->ppd,
++							      pq, &list, 1);
++				if (ret < 0)
++					goto free_pkts;
++				else {
++					pq->counter++;
++					added++;
++				}
++			}
++free_pkts:
++			if (!list_empty(&list))
++				qib_user_sdma_free_pkt_list(
++					&knx->dd->pcidev->dev, pq, &list);
++			mutex_unlock(&pq->lock);
++done_unlock:
++			spin_unlock(&knx->ctxt_lock);
++		}
++		if (!added) {
++			int i;
++			/*
++			 * Push the queues along
++			 * The polling thread will enter the inner loop only
++			 * if the KNX has posted new descriptors to the queue.
++			 * However, any packets that have been completed by
++			 * the HW need to be cleaned and that won't happen
++			 * unless we explicitly check.
++			 */
++			for (i = 0;
++			     i < knx->dd->ctxtcnt * QLOGIC_IB_MAX_SUBCTXT;
++			     i++) {
++				int c = i / QLOGIC_IB_MAX_SUBCTXT,
++					s = i % QLOGIC_IB_MAX_SUBCTXT;
++				spin_lock(&knx->ctxt_lock);
++				ctxt = knx->ctxts[c];
++				if (!ctxt)
++					goto loop_unlock;
++				pq = ctxt->pq[s];
++				if (!pq)
++					goto loop_unlock;
++				mutex_lock(&pq->lock);
++				if (pq->num_sending)
++					qib_user_sdma_queue_clean(ctxt->ppd,
++								  pq);
++				mutex_unlock(&pq->lock);
++loop_unlock:
++				spin_unlock(&knx->ctxt_lock);
++			}
++			might_sleep();
++		}
++	}
++	return ret;
++}
++
++void qib_knx_remove_device(struct qib_devdata *dd)
++{
++	if (server && dd->num_knx) {
++		struct qib_knx *knx, *knxp;
++		list_for_each_entry_safe(knx, knxp, &server->clients, list) {
++			if (knx->dd == dd) {
++				spin_lock(&server->client_lock);
++				list_del(&knx->list);
++				server->nclients--;
++				spin_unlock(&server->client_lock);
++				qib_knx_free(knx, 0);
++				kfree(knx);
++			}
++		}
++	}
++	return;
++}
++
++int __init qib_knx_server_init(void)
++{
++	server = kzalloc(sizeof(struct qib_knx_server), GFP_KERNEL);
++	if (!server)
++		return -ENOMEM;
++	INIT_LIST_HEAD(&server->clients);
++	spin_lock_init(&server->client_lock);
++	server->kthread = kthread_run(qib_knx_server_listen,
++				      server, CLIENT_THREAD_NAME(0));
++	if (IS_ERR(server->kthread))
++		return -PTR_ERR(server->kthread);
++	return 0;
++}
++
++void __exit qib_knx_server_exit(void)
++{
++	if (server) {
++		struct qib_knx *t, *tt;
++		/* Stop the thread so we don't accept any new connections. */
++		kthread_stop(server->kthread);
++		list_for_each_entry_safe(t, tt, &server->clients, list) {
++			spin_lock(&server->client_lock);
++			list_del(&t->list);
++			spin_unlock(&server->client_lock);
++			qib_knx_free(t, 1);
++			kfree(t);
++		}
++		kfree(server);
++	}
++}
+diff -urN a9/drivers/infiniband/hw/qib/qib_knx_common.h a10/drivers/infiniband/hw/qib/qib_knx_common.h
+--- a9/drivers/infiniband/hw/qib/qib_knx_common.h	1969-12-31 16:00:00.000000000 -0800
++++ a10/drivers/infiniband/hw/qib/qib_knx_common.h	2015-01-05 15:10:58.252446692 -0800
+@@ -0,0 +1,126 @@
++/*
++ * Copyright (c) 2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above
++ *        copyright notice, this list of conditions and the following
++ *        disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *        copyright notice, this list of conditions and the following
++ *        disclaimer in the documentation and/or other materials
++ *        provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++#ifndef _QIB_KNX_COMMON_H
++#define _QIB_KNX_COMMON_H
++
++struct qib_device_info {
++	u16 unit;
++};
++
++#define QIB_SDMA_MAX_NPAGES 33
++#define QIB_KNX_SDMA_VALUE(fld) ((volatile u64)fld)
++#define QIB_KNX_SDMA_SET(fld, val)		\
++	do {					\
++		fld = (u64)(val);		\
++		smp_mb();			\
++	} while (0)
++
++struct qib_knx_host_mem {
++	off_t flags_offset;
++	unsigned desc_num;
++};
++
++struct qib_knx_knc_mem {
++	off_t flags_offset;
++	off_t queue_offset;
++	size_t queue_len;
++};
++
++struct qib_tid_sm {
++	__u16 tid;
++	__u16 offset;
++	__u16 length;
++};
++
++/*
++ * SDMA transfer descriptor. This structure communicates the SDMA
++ * transfers from the MIC to the host. It is very important for
++ * performance reasons that its size is multiple of 64B in order
++ * to guarantee proper alignment in the descriptor array.
++ */
++struct qib_knx_sdma_desc {
++	u16 ctxt;
++	u16 subctxt;
++	u32 pbclen;
++	__le32 pbc[16];
++	u64 length;
++	u32 npages;
++	unsigned tidlen;
++	off_t offset;
++	unsigned long pages[QIB_SDMA_MAX_NPAGES];
++	/* This array is 198B so the compiler will pad
++	 * it by 2B to make it multiple of 8B. */
++	struct qib_tid_sm tidsm[QIB_SDMA_MAX_NPAGES];
++	/*
++	 * The two paddings below are included in order to
++	 * make the size of the entire struct 576B (multiple
++	 * of 64B). The goal is that all elements in an array
++	 * of struct qib_knx_sdma_desc are 64B aligned.
++	 */
++	u16 __padding0;
++	u64 __padding1[2];
++};
++
++/*
++ * trigger, status, and complete fields are by 8 to be
++ * cacheline size.
++ */
++struct qib_knx_sdma_hflags {
++	u64 trigger;
++	u64 __padding[7];
++};
++
++#define sdma_next(s, fld) \
++	((s)->fld = (((s)->fld + 1) == (s)->desc_num) ? 0 : ((s)->fld + 1))
++
++struct qib_knx_sdma_mflags {
++	u64 status;
++	u64 __padding1[7];
++	u64 complete;
++	u64 __padding2[7];
++};
++
++struct qib_knx_tid_info {
++	/* this is the entire set of 512 entries (= 4K) so
++	 * we can resgister. subctxt devision will be done
++	 * in MIC driver. */
++	off_t tidbase_offset;
++	size_t tidbase_len;
++	u64 tidbase;
++	unsigned tidcnt;
++	u64 tidtemplate;
++	unsigned long invalidtid;
++	u64 bar_addr;
++	u64 bar_len;
++};
++
++#endif /* _QIB_KNX_COMMON_H */
+diff -urN a9/drivers/infiniband/hw/qib/qib_knx.h a10/drivers/infiniband/hw/qib/qib_knx.h
+--- a9/drivers/infiniband/hw/qib/qib_knx.h	1969-12-31 16:00:00.000000000 -0800
++++ a10/drivers/infiniband/hw/qib/qib_knx.h	2015-01-05 15:10:58.252446692 -0800
+@@ -0,0 +1,74 @@
++/*
++ * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses.  You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ *     Redistribution and use in source and binary forms, with or
++ *     without modification, are permitted provided that the following
++ *     conditions are met:
++ *
++ *      - Redistributions of source code must retain the above
++ *        copyright notice, this list of conditions and the following
++ *        disclaimer.
++ *
++ *      - Redistributions in binary form must reproduce the above
++ *        copyright notice, this list of conditions and the following
++ *        disclaimer in the documentation and/or other materials
++ *        provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++#ifndef _QIB_KNX_H
++#define _QIB_KNX_H
++
++#include "qib.h"
++
++enum qib_knx_ctxtinfo_type {
++	QIB_KNX_CTXTINFO_UREG,
++	QIB_KNX_CTXTINFO_PIOAVAIL,
++	QIB_KNX_CTXTINFO_STATUS,
++	QIB_KNX_CTXTINFO_PIOBUFBASE,
++	QIB_KNX_CTXTINFO_FLAGS
++};
++
++#ifdef QIB_CONFIG_KNX
++int __init qib_knx_server_init(void);
++void __exit qib_knx_server_exit(void);
++
++void qib_knx_remove_device(struct qib_devdata *);
++
++inline struct qib_knx *qib_knx_get(uint16_t);
++inline struct qib_devdata *qib_knx_node_to_dd(uint16_t);
++int qib_knx_alloc_ctxt(u16, unsigned);
++int qib_knx_setup_piobufs(struct qib_devdata *, struct qib_ctxtdata *, __u16);
++int qib_knx_setup_pioregs(struct qib_devdata *, struct qib_ctxtdata *,
++			  struct qib_base_info *);
++int qib_knx_create_rcvhdrq(struct qib_devdata *, struct qib_ctxtdata *,
++			   struct qib_base_info *);
++int qib_knx_setup_eagerbufs(struct qib_ctxtdata *, struct qib_base_info *);
++void qib_knx_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *);
++__u64 qib_knx_ctxt_info(struct qib_ctxtdata *, enum qib_knx_ctxtinfo_type,
++			struct file *);
++int qib_knx_sdma_queue_create(struct file *);
++void qib_knx_sdma_queue_destroy(struct qib_filedata *);
++#else
++static inline u64 qib_knx_ctxt_info(
++	struct qib_ctxtdata *rcd,
++	enum qib_knx_ctxtinfo_type type,
++	struct file *fp)
++{
++	return 0;
++}
++#endif
++#endif /* _QIB_KNX_H */
+diff -urN a9/drivers/infiniband/hw/qib/qib_user_sdma.c a10/drivers/infiniband/hw/qib/qib_user_sdma.c
+--- a9/drivers/infiniband/hw/qib/qib_user_sdma.c	2015-01-05 15:05:04.279461602 -0800
++++ a10/drivers/infiniband/hw/qib/qib_user_sdma.c	2015-01-05 15:10:58.252446692 -0800
+@@ -63,80 +63,6 @@
+ 	pid_t pid;
+ };
+ 
+-struct qib_user_sdma_pkt {
+-	struct list_head list;  /* list element */
+-
+-	u8  tiddma;		/* if this is NEW tid-sdma */
+-	u8  largepkt;		/* this is large pkt from kmalloc */
+-	u16 frag_size;		/* frag size used by PSM */
+-	u16 index;              /* last header index or push index */
+-	u16 naddr;              /* dimension of addr (1..3) ... */
+-	u16 addrlimit;		/* addr array size */
+-	u16 tidsmidx;		/* current tidsm index */
+-	u16 tidsmcount;		/* tidsm array item count */
+-	u16 payload_size;	/* payload size so far for header */
+-	u32 bytes_togo;		/* bytes for processing */
+-	u32 counter;            /* sdma pkts queued counter for this entry */
+-	struct qib_tid_session_member *tidsm;	/* tid session member array */
+-	struct qib_user_sdma_queue *pq;	/* which pq this pkt belongs to */
+-	u64 added;              /* global descq number of entries */
+-
+-	struct {
+-		u16 offset;                     /* offset for kvaddr, addr */
+-		u16 length;                     /* length in page */
+-		u16 first_desc;			/* first desc */
+-		u16 last_desc;			/* last desc */
+-		u16 put_page;                   /* should we put_page? */
+-		u16 dma_mapped;                 /* is page dma_mapped? */
+-		u16 dma_length;			/* for dma_unmap_page() */
+-		u16 padding;
+-		struct page *page;              /* may be NULL (coherent mem) */
+-		void *kvaddr;                   /* FIXME: only for pio hack */
+-		dma_addr_t addr;
+-	} addr[4];   /* max pages, any more and we coalesce */
+-};
+-
+-struct qib_user_sdma_queue {
+-	/*
+-	 * pkts sent to dma engine are queued on this
+-	 * list head.  the type of the elements of this
+-	 * list are struct qib_user_sdma_pkt...
+-	 */
+-	struct list_head sent;
+-
+-	/*
+-	 * Because above list will be accessed by both process and
+-	 * signal handler, we need a spinlock for it.
+-	 */
+-	spinlock_t sent_lock ____cacheline_aligned_in_smp;
+-
+-	/* headers with expected length are allocated from here... */
+-	char header_cache_name[64];
+-	struct dma_pool *header_cache;
+-
+-	/* packets are allocated from the slab cache... */
+-	char pkt_slab_name[64];
+-	struct kmem_cache *pkt_slab;
+-
+-	/* as packets go on the queued queue, they are counted... */
+-	u32 counter;
+-	u32 sent_counter;
+-	/* pending packets, not sending yet */
+-	u32 num_pending;
+-	/* sending packets, not complete yet */
+-	u32 num_sending;
+-	/* global descq number of entry of last sending packet */
+-	u64 added;
+-
+-	/* dma page table */
+-	struct rb_root dma_pages_root;
+-
+-	struct qib_user_sdma_rb_node *sdma_rb_node;
+-
+-	/* protect everything above... */
+-	struct mutex lock;
+-};
+-
+ static struct qib_user_sdma_rb_node *
+ qib_user_sdma_rb_search(struct rb_root *root, pid_t pid)
+ {
+@@ -254,12 +180,12 @@
+ 	return pq;
+ }
+ 
+-static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
+-				    int i, u16 offset, u16 len,
+-				    u16 first_desc, u16 last_desc,
+-				    u16 put_page, u16 dma_mapped,
+-				    struct page *page, void *kvaddr,
+-				    dma_addr_t dma_addr, u16 dma_length)
++void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
++			     int i, u16 offset, u16 len,
++			     u16 first_desc, u16 last_desc,
++			     u16 put_page, u16 dma_mapped,
++			     struct page *page, void *kvaddr,
++			     dma_addr_t dma_addr, u16 dma_length)
+ {
+ 	pkt->addr[i].offset = offset;
+ 	pkt->addr[i].length = len;
+@@ -273,7 +199,7 @@
+ 	pkt->addr[i].dma_length = dma_length;
+ }
+ 
+-static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq,
++void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq,
+ 				size_t len, dma_addr_t *dma_addr)
+ {
+ 	void *hdr;
+@@ -295,11 +221,11 @@
+ 	return hdr;
+ }
+ 
+-static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
+-				       struct qib_user_sdma_queue *pq,
+-				       struct qib_user_sdma_pkt *pkt,
+-				       struct page *page, u16 put,
+-				       u16 offset, u16 len, void *kvaddr)
++int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
++				struct qib_user_sdma_queue *pq,
++				struct qib_user_sdma_pkt *pkt,
++				struct page *page, u16 put,
++				u16 offset, u16 len, void *kvaddr)
+ {
+ 	__le16 *pbc16;
+ 	void *pbcvaddr;
+@@ -314,21 +240,27 @@
+ 	int ret = 0;
+ 
+ 	if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+-		/*
+-		 * dma mapping error, pkt has not managed
+-		 * this page yet, return the page here so
+-		 * the caller can ignore this page.
+-		 */
+-		if (put) {
+-			put_page(page);
+-		} else {
+-			/* coalesce case */
+-			kunmap(page);
+-			__free_page(page);
++#ifdef QIB_CONFIG_KNX
++		if (!pkt->remote) {
++#endif
++			/*
++			 * dma mapping error, pkt has not managed
++			 * this page yet, return the page here so
++			 * the caller can ignore this page.
++			 */
++			if (put) {
++				put_page(page);
++			} else {
++				/* coalesce case */
++				kunmap(page);
++				__free_page(page);
++			}
++			ret = -ENOMEM;
++			goto done;
+ 		}
+-		ret = -ENOMEM;
+-		goto done;
++#ifdef QIB_CONFIG_KNX
+ 	}
++#endif
+ 	offset = 0;
+ 	dma_mapped = 1;
+ 
+@@ -630,13 +562,19 @@
+ 				       pkt->addr[i].dma_length,
+ 				       DMA_TO_DEVICE);
+ 
+-		if (pkt->addr[i].kvaddr)
+-			kunmap(pkt->addr[i].page);
++#ifdef QIB_CONFIG_KNX
++		if (!pkt->remote) {
++#endif
++			if (pkt->addr[i].kvaddr)
++				kunmap(pkt->addr[i].page);
+ 
+-		if (pkt->addr[i].put_page)
+-			put_page(pkt->addr[i].page);
+-		else
+-			__free_page(pkt->addr[i].page);
++			if (pkt->addr[i].put_page)
++				put_page(pkt->addr[i].page);
++			else
++				__free_page(pkt->addr[i].page);
++#ifdef QIB_CONFIG_KNX
++		}
++#endif
+ 	} else if (pkt->addr[i].kvaddr) {
+ 		/* for headers */
+ 		if (pkt->addr[i].dma_mapped) {
+@@ -775,9 +713,9 @@
+ }
+ 
+ /* free a packet list -- return counter value of last packet */
+-static void qib_user_sdma_free_pkt_list(struct device *dev,
+-					struct qib_user_sdma_queue *pq,
+-					struct list_head *list)
++void qib_user_sdma_free_pkt_list(struct device *dev,
++				 struct qib_user_sdma_queue *pq,
++				 struct list_head *list)
+ {
+ 	struct qib_user_sdma_pkt *pkt, *pkt_next;
+ 
+@@ -787,6 +725,10 @@
+ 		for (i = 0; i < pkt->naddr; i++)
+ 			qib_user_sdma_free_pkt_frag(dev, pq, pkt, i);
+ 
++#ifdef QIB_CONFIG_KNX
++		if (pkt->remote)
++			qib_knx_sdma_free_pkt(pkt);
++#endif
+ 		if (pkt->largepkt)
+ 			kfree(pkt);
+ 		else
+@@ -970,6 +912,9 @@
+ 		pkt->payload_size = 0;
+ 		pkt->counter = counter;
+ 		pkt->tiddma = tiddma;
++#ifdef QIB_CONFIG_KNX
++		pkt->remote = 0;
++#endif
+ 
+ 		/* setup the first header */
+ 		qib_user_sdma_init_frag(pkt, 0, /* index */
+@@ -1045,8 +990,8 @@
+ }
+ 
+ /* try to clean out queue -- needs pq->lock */
+-static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd,
+-				     struct qib_user_sdma_queue *pq)
++int qib_user_sdma_queue_clean(struct qib_pportdata *ppd,
++			      struct qib_user_sdma_queue *pq)
+ {
+ 	struct qib_devdata *dd = ppd->dd;
+ 	struct list_head free_list;
+@@ -1110,7 +1055,7 @@
+ }
+ 
+ /* clean descriptor queue, returns > 0 if some elements cleaned */
+-static int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd)
++int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd)
+ {
+ 	int ret;
+ 	unsigned long flags;
+@@ -1321,9 +1266,9 @@
+ }
+ 
+ /* pq->lock must be held, get packets on the wire... */
+-static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
+-				 struct qib_user_sdma_queue *pq,
+-				 struct list_head *pktlist, int count)
++int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
++			    struct qib_user_sdma_queue *pq,
++			    struct list_head *pktlist, int count)
+ {
+ 	unsigned long flags;
+ 
+diff -urN a9/drivers/infiniband/hw/qib/qib_user_sdma.h a10/drivers/infiniband/hw/qib/qib_user_sdma.h
+--- a9/drivers/infiniband/hw/qib/qib_user_sdma.h	2015-01-05 15:05:04.280461602 -0800
++++ a10/drivers/infiniband/hw/qib/qib_user_sdma.h	2015-01-05 15:10:58.253446692 -0800
+@@ -31,12 +31,108 @@
+  */
+ #include <linux/device.h>
+ 
+-struct qib_user_sdma_queue;
++struct qib_user_sdma_pkt {
++	struct list_head list;  /* list element */
++
++	u8  tiddma;		/* if this is NEW tid-sdma */
++	u8  largepkt;		/* this is large pkt from kmalloc */
++	u16 frag_size;		/* frag size used by PSM */
++	u16 index;              /* last header index or push index */
++	u16 naddr;              /* dimension of addr (1..3) ... */
++	u16 addrlimit;		/* addr array size */
++	u16 tidsmidx;		/* current tidsm index */
++	u16 tidsmcount;		/* tidsm array item count */
++	u16 payload_size;	/* payload size so far for header */
++	u32 bytes_togo;		/* bytes for processing */
++	u32 counter;            /* sdma pkts queued counter for this entry */
++	struct qib_tid_session_member *tidsm;	/* tid session member array */
++	struct qib_user_sdma_queue *pq;	/* which pq this pkt belongs to */
++	u64 added;              /* global descq number of entries */
++#ifdef QIB_CONFIG_KNX
++	u64 remote;             /* does the packet originate on the host */
++#endif
++
++	struct {
++		u16 offset;                     /* offset for kvaddr, addr */
++		u16 length;                     /* length in page */
++		u16 first_desc;			/* first desc */
++		u16 last_desc;			/* last desc */
++		u16 put_page;                   /* should we put_page? */
++		u16 dma_mapped;                 /* is page dma_mapped? */
++		u16 dma_length;			/* for dma_unmap_page() */
++		u16 padding;
++		struct page *page;              /* may be NULL (coherent mem) */
++		void *kvaddr;                   /* FIXME: only for pio hack */
++		dma_addr_t addr;
++	} addr[4];   /* max pages, any more and we coalesce */
++};
++
++struct qib_user_sdma_queue {
++	/*
++	 * pkts sent to dma engine are queued on this
++	 * list head.  the type of the elements of this
++	 * list are struct qib_user_sdma_pkt...
++	 */
++	struct list_head sent;
++
++	/*
++	 * Because above list will be accessed by both process and
++	 * signal handler, we need a spinlock for it.
++	 */
++	spinlock_t sent_lock ____cacheline_aligned_in_smp;
++
++	/* headers with expected length are allocated from here... */
++	char header_cache_name[64];
++	struct dma_pool *header_cache;
++
++	/* packets are allocated from the slab cache... */
++	char pkt_slab_name[64];
++	struct kmem_cache *pkt_slab;
++
++	/* as packets go on the queued queue, they are counted... */
++	u32 counter;
++	u32 sent_counter;
++	/* pending packets, not sending yet */
++	u32 num_pending;
++	/* sending packets, not complete yet */
++	u32 num_sending;
++	/* global descq number of entry of last sending packet */
++	u64 added;
++
++	/* dma page table */
++	struct rb_root dma_pages_root;
++
++	struct qib_user_sdma_rb_node *sdma_rb_node;
++
++	/* protect everything above... */
++	struct mutex lock;
++};
+ 
+ struct qib_user_sdma_queue *
+ qib_user_sdma_queue_create(struct device *dev, int unit, int port, int sport);
+ void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq);
+-
++void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq,
++				 size_t len, dma_addr_t *dma_addr);
++void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
++			     int i, u16 offset, u16 len,
++			     u16 first_desc, u16 last_desc,
++			     u16 put_page, u16 dma_mapped,
++			     struct page *page, void *kvaddr,
++			     dma_addr_t dma_addr, u16 dma_length);
++int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
++				struct qib_user_sdma_queue *pq,
++				struct qib_user_sdma_pkt *pkt,
++				struct page *page, u16 put,
++				u16 offset, u16 len, void *kvaddr);
++int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd);
++int qib_user_sdma_queue_clean(struct qib_pportdata *ppd,
++			      struct qib_user_sdma_queue *pq);
++void qib_user_sdma_free_pkt_list(struct device *dev,
++				 struct qib_user_sdma_queue *pq,
++				 struct list_head *list);
++int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
++			    struct qib_user_sdma_queue *pq,
++			    struct list_head *pktlist, int count);
+ int qib_user_sdma_writev(struct qib_ctxtdata *pd,
+ 			 struct qib_user_sdma_queue *pq,
+ 			 const struct iovec *iov,
+@@ -50,3 +146,9 @@
+ 
+ u32 qib_user_sdma_complete_counter(const struct qib_user_sdma_queue *pq);
+ u32 qib_user_sdma_inflight_counter(struct qib_user_sdma_queue *pq);
++
++/*
++ * This function prototype somewhat polutes this header file
++ * but I don't want to create a new header file just for it.
++ */
++void qib_knx_sdma_free_pkt(struct qib_user_sdma_pkt *pkt);
diff --git a/tech-preview/xeon-phi/0011-correct-ib_addr.h-for-older-kernels.patch b/tech-preview/xeon-phi/0011-correct-ib_addr.h-for-older-kernels.patch
new file mode 100644
index 0000000..bce5ecd
--- /dev/null
+++ b/tech-preview/xeon-phi/0011-correct-ib_addr.h-for-older-kernels.patch
@@ -0,0 +1,46 @@
+From 536a8d5b5c68ecd2ca73446f25443fe8bb234a46 Mon Sep 17 00:00:00 2001
+From: Phil Cayton <phil.cayton@intel.com>
+Date: Thu, 29 May 2014 14:35:13 -0700
+Subject: [PATCH 11/12] correct ib_addr.h for older kernels
+
+Signed-off-by: Phil Cayton <phil.cayton@intel.com>
+---
+diff -urN a10/include/rdma/ib_addr.h a11/include/rdma/ib_addr.h
+--- a10/include/rdma/ib_addr.h	2015-01-05 15:10:42.263447365 -0800
++++ a11/include/rdma/ib_addr.h	2015-01-05 15:12:36.058442572 -0800
+@@ -239,6 +239,27 @@
+ 		return 0;
+ }
+ 
++#if LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0)
++static inline int iboe_get_rate(struct net_device *dev)
++{
++	struct ethtool_cmd cmd;
++
++	if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings ||
++	    dev->ethtool_ops->get_settings(dev, &cmd))
++		return IB_RATE_PORT_CURRENT;
++
++	if (cmd.speed >= 40000)
++		return IB_RATE_40_GBPS;
++	else if (cmd.speed >= 30000)
++		return IB_RATE_30_GBPS;
++	else if (cmd.speed >= 20000)
++		return IB_RATE_20_GBPS;
++	else if (cmd.speed >= 10000)
++		return IB_RATE_10_GBPS;
++	else
++		return IB_RATE_PORT_CURRENT;
++}
++#else
+ static inline int iboe_get_rate(struct net_device *dev)
+ {
+ 	struct ethtool_cmd cmd;
+@@ -263,6 +284,7 @@
+ 	else
+ 		return IB_RATE_PORT_CURRENT;
+ }
++#endif
+ 
+ static inline int rdma_link_local_addr(struct in6_addr *addr)
+ {
diff --git a/tech-preview/xeon-phi/0012-add-mlx4-cq_comp-locking-already-done-in-event-handl.patch b/tech-preview/xeon-phi/0012-add-mlx4-cq_comp-locking-already-done-in-event-handl.patch
new file mode 100644
index 0000000..498b1c8
--- /dev/null
+++ b/tech-preview/xeon-phi/0012-add-mlx4-cq_comp-locking-already-done-in-event-handl.patch
@@ -0,0 +1,39 @@
+From 6d88a748ca017a22c08d25e29144dd392c988eb9 Mon Sep 17 00:00:00 2001
+From: Phil Cayton <phil.cayton@intel.com>
+Date: Thu, 5 Jun 2014 09:44:42 -0700
+Subject: [PATCH 12/12] add mlx4 cq_comp locking already done in event handler
+
+---
+diff -urN a11/drivers/net/ethernet/mellanox/mlx4/cq.c a12/drivers/net/ethernet/mellanox/mlx4/cq.c
+--- a11/drivers/net/ethernet/mellanox/mlx4/cq.c	2015-01-05 15:12:24.028443079 -0800
++++ a12/drivers/net/ethernet/mellanox/mlx4/cq.c	2015-01-05 15:14:27.994437857 -0800
+@@ -54,10 +54,17 @@
+ 
+ void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn)
+ {
++	struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
+ 	struct mlx4_cq *cq;
+ 
+-	cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree,
+-			       cqn & (dev->caps.num_cqs - 1));
++	spin_lock(&cq_table->lock);
++
++	cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1));
++	if (cq)
++		atomic_inc(&cq->refcount);
++
++	spin_unlock(&cq_table->lock);
++
+ 	if (!cq) {
+ 		mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn);
+ 		return;
+@@ -66,6 +73,9 @@
+ 	++cq->arm_sn;
+ 
+ 	cq->comp(cq);
++
++	if (atomic_dec_and_test(&cq->refcount))
++		complete(&cq->free);
+ }
+ 
+ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type)