From 20c0cf89971d35f7ccbd547204180da906808f48 Mon Sep 17 00:00:00 2001
From: "Jeffrey C. Becker" <Jeffrey.C.Becker@nasa.gov>
Date: Fri, 15 Aug 2014 17:10:44 -0700
Subject: [PATCH] NFSoRDMA: fixes for 3.12 and RHEL7, RHEL6.5, SLES11SP3
 backports Signed-off-by: Jeff Becker <Jeffrey.C.Becker@nasa.gov>

---
 ...1-SUNRPC-Fix-large-reads-on-NFS-RDMA.patch |   51 +
 ...ve-KERN_INFO-from-dprintk-call-sites.patch |   49 +
 ...-printk-when-memory-allocation-fails.patch |   30 +
 ...104-Fix-regression-in-NFSRDMA-server.patch |   74 +
 ...et-calculation-for-non-page-aligned-.patch |   33 +
 ...a-Backport-RPC_CWNDSHIFT-from-sunrpc.patch |   12 +
 ...e-device-s-max-fast-register-page-li.patch |  146 ++
 .../0108-nfs-rdma-Fix-for-FMR-leaks.patch     |  140 ++
 ...A-must-invoke-xprt_wake_pending_task.patch |  112 ++
 ...BOUNCEBUFFERS-memory-registration-mo.patch |  104 ++
 ...Remove-MEMWINDOWS-registration-modes.patch |  455 ++++++
 ...ve-REGISTER-memory-registration-mode.patch |  191 +++
 ...ck-to-MTHCAFMR-when-FRMR-is-not-supp.patch |   73 +
 ...eports-Invalid-mount-option-if-memre.patch |   46 +
 ...y-rpcrdma_deregister_external-synops.patch |   86 ++
 ...-Make-rpcrdma_ep_destroy-return-void.patch |   95 ++
 ...-xprtrdma-Split-the-completion-queue.patch |  395 ++++++
 ...lock-contention-in-completion-handle.patch |   50 +
 ...calls-to-ib_poll_cq-in-completion-ha.patch |  165 +++
 ...imit-work-done-by-completion-handler.patch |   79 ++
 ...the-number-of-hardway-buffer-allocat.patch |  128 ++
 ...ia-ri_id-qp-is-not-NULL-when-reconne.patch |   94 ++
 ...23-xprtrdma-Remove-Tavor-MTU-setting.patch |   55 +
 ...4-xprtrdma-Allocate-missing-pagelist.patch |   38 +
 ...ros-for-reconnection-timeout-constan.patch |   61 +
 ...onnection-timeout-after-successful-r.patch |   33 +
 ...deadlock-when-credit-window-is-reset.patch |  104 ++
 ...28-xprtrdma-Remove-BUG_ON-call-sites.patch |   83 ++
 ...a-Disconnect-on-registration-failure.patch |  215 +++
 ...0-svcrdma-refactor-marshalling-logic.patch | 1243 +++++++++++++++++
 ...vcrdma-Fence-LOCAL_INV-work-requests.patch |   31 +
 ...svcrdma-send_write-must-not-overflow.patch |  129 ++
 .../0133-nfsrdma-backport-fixes.patch         |   48 +
 ...4-SUNRPC-Fix-large_reads-on-NFS-RDMA.patch |   46 -
 ...DMA-Fix-regression-in-NFSRDMA-server.patch |   66 -
 .../0023-nfsrdma-Backport-for-rhel6.5.patch   |   19 +-
 .../0026-nfsrdma-Backport-for-sles11sp3.patch |   28 -
 37 files changed, 4651 insertions(+), 156 deletions(-)
 create mode 100644 linux-next-cherry-picks/0101-SUNRPC-Fix-large-reads-on-NFS-RDMA.patch
 create mode 100644 linux-next-cherry-picks/0102-SUNRPC-remove-KERN_INFO-from-dprintk-call-sites.patch
 create mode 100644 linux-next-cherry-picks/0103-svcrdma-fix-printk-when-memory-allocation-fails.patch
 create mode 100644 linux-next-cherry-picks/0104-Fix-regression-in-NFSRDMA-server.patch
 create mode 100644 linux-next-cherry-picks/0105-svcrdma-fix-offset-calculation-for-non-page-aligned-.patch
 create mode 100644 linux-next-cherry-picks/0106-xprtrdma-Backport-RPC_CWNDSHIFT-from-sunrpc.patch
 create mode 100644 linux-next-cherry-picks/0107-xprtrdma-mind-the-device-s-max-fast-register-page-li.patch
 create mode 100644 linux-next-cherry-picks/0108-nfs-rdma-Fix-for-FMR-leaks.patch
 create mode 100644 linux-next-cherry-picks/0109-xprtrdma-RPC-RDMA-must-invoke-xprt_wake_pending_task.patch
 create mode 100644 linux-next-cherry-picks/0110-xprtrdma-Remove-BOUNCEBUFFERS-memory-registration-mo.patch
 create mode 100644 linux-next-cherry-picks/0111-xprtrdma-Remove-MEMWINDOWS-registration-modes.patch
 create mode 100644 linux-next-cherry-picks/0112-xprtrdma-Remove-REGISTER-memory-registration-mode.patch
 create mode 100644 linux-next-cherry-picks/0113-xprtrdma-Fall-back-to-MTHCAFMR-when-FRMR-is-not-supp.patch
 create mode 100644 linux-next-cherry-picks/0114-xprtrdma-mount-reports-Invalid-mount-option-if-memre.patch
 create mode 100644 linux-next-cherry-picks/0115-xprtrdma-Simplify-rpcrdma_deregister_external-synops.patch
 create mode 100644 linux-next-cherry-picks/0116-xprtrdma-Make-rpcrdma_ep_destroy-return-void.patch
 create mode 100644 linux-next-cherry-picks/0117-xprtrdma-Split-the-completion-queue.patch
 create mode 100644 linux-next-cherry-picks/0118-xprtrmda-Reduce-lock-contention-in-completion-handle.patch
 create mode 100644 linux-next-cherry-picks/0119-xprtrmda-Reduce-calls-to-ib_poll_cq-in-completion-ha.patch
 create mode 100644 linux-next-cherry-picks/0120-xprtrdma-Limit-work-done-by-completion-handler.patch
 create mode 100644 linux-next-cherry-picks/0121-xprtrdma-Reduce-the-number-of-hardway-buffer-allocat.patch
 create mode 100644 linux-next-cherry-picks/0122-xprtrdma-Ensure-ia-ri_id-qp-is-not-NULL-when-reconne.patch
 create mode 100644 linux-next-cherry-picks/0123-xprtrdma-Remove-Tavor-MTU-setting.patch
 create mode 100644 linux-next-cherry-picks/0124-xprtrdma-Allocate-missing-pagelist.patch
 create mode 100644 linux-next-cherry-picks/0125-xprtrdma-Use-macros-for-reconnection-timeout-constan.patch
 create mode 100644 linux-next-cherry-picks/0126-xprtrdma-Reset-connection-timeout-after-successful-r.patch
 create mode 100644 linux-next-cherry-picks/0127-xprtrdma-Avoid-deadlock-when-credit-window-is-reset.patch
 create mode 100644 linux-next-cherry-picks/0128-xprtrdma-Remove-BUG_ON-call-sites.patch
 create mode 100644 linux-next-cherry-picks/0129-xprtrdma-Disconnect-on-registration-failure.patch
 create mode 100644 linux-next-cherry-picks/0130-svcrdma-refactor-marshalling-logic.patch
 create mode 100644 linux-next-cherry-picks/0131-svcrdma-Fence-LOCAL_INV-work-requests.patch
 create mode 100644 linux-next-cherry-picks/0132-svcrdma-send_write-must-not-overflow.patch
 create mode 100644 linux-next-cherry-picks/0133-nfsrdma-backport-fixes.patch
 delete mode 100644 linux-next-pending/0024-SUNRPC-Fix-large_reads-on-NFS-RDMA.patch
 delete mode 100644 linux-next-pending/0025-NFSRDMA-Fix-regression-in-NFSRDMA-server.patch

diff --git a/linux-next-cherry-picks/0101-SUNRPC-Fix-large-reads-on-NFS-RDMA.patch b/linux-next-cherry-picks/0101-SUNRPC-Fix-large-reads-on-NFS-RDMA.patch
new file mode 100644
index 0000000..20a476d
--- /dev/null
+++ b/linux-next-cherry-picks/0101-SUNRPC-Fix-large-reads-on-NFS-RDMA.patch
@@ -0,0 +1,51 @@
+From 2b7bbc963da8d076f263574af4138b5df2e1581f Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Wed, 12 Mar 2014 12:51:30 -0400
+Subject: [PATCH 101/132] SUNRPC: Fix large reads on NFS/RDMA
+
+After commit a11a2bf4, "SUNRPC: Optimise away unnecessary data moves
+in xdr_align_pages", Thu Aug 2 13:21:43 2012, READs larger than a
+few hundred bytes via NFS/RDMA no longer work.  This commit exposed
+a long-standing bug in rpcrdma_inline_fixup().
+
+I reproduce this with an rsize=4096 mount using the cthon04 basic
+tests.  Test 5 fails with an EIO error.
+
+For my reproducer, kernel log shows:
+
+  NFS: server cheating in read reply: count 4096 > recvd 0
+
+rpcrdma_inline_fixup() is zeroing the xdr_stream::page_len field,
+and xdr_align_pages() is now returning that value to the READ XDR
+decoder function.
+
+That field is set up by xdr_inline_pages() by the READ XDR encoder
+function.  As far as I can tell, it is supposed to be left alone
+after that, as it describes the dimensions of the reply xdr_stream,
+not the contents of that stream.
+
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=68391
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
+---
+ net/sunrpc/xprtrdma/rpc_rdma.c |    4 +---
+ 1 files changed, 1 insertions(+), 3 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
+index e03725b..96ead52 100644
+--- a/net/sunrpc/xprtrdma/rpc_rdma.c
++++ b/net/sunrpc/xprtrdma/rpc_rdma.c
+@@ -649,9 +649,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
+ 				break;
+ 			page_base = 0;
+ 		}
+-		rqst->rq_rcv_buf.page_len = olen - copy_len;
+-	} else
+-		rqst->rq_rcv_buf.page_len = 0;
++	}
+ 
+ 	if (copy_len && rqst->rq_rcv_buf.tail[0].iov_len) {
+ 		curlen = copy_len;
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0102-SUNRPC-remove-KERN_INFO-from-dprintk-call-sites.patch b/linux-next-cherry-picks/0102-SUNRPC-remove-KERN_INFO-from-dprintk-call-sites.patch
new file mode 100644
index 0000000..54a2345
--- /dev/null
+++ b/linux-next-cherry-picks/0102-SUNRPC-remove-KERN_INFO-from-dprintk-call-sites.patch
@@ -0,0 +1,49 @@
+From 3a0799a94c0384a3b275a73267aaa10517b1bf7d Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Wed, 12 Mar 2014 12:51:39 -0400
+Subject: [PATCH 102/132] SUNRPC: remove KERN_INFO from dprintk() call sites
+
+The use of KERN_INFO causes garbage characters to appear when
+debugging is enabled.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
+---
+ net/sunrpc/xprtrdma/transport.c |   10 +++++-----
+ 1 files changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
+index 285dc08..1eb9c46 100644
+--- a/net/sunrpc/xprtrdma/transport.c
++++ b/net/sunrpc/xprtrdma/transport.c
+@@ -733,7 +733,7 @@ static void __exit xprt_rdma_cleanup(void)
+ {
+ 	int rc;
+ 
+-	dprintk(KERN_INFO "RPCRDMA Module Removed, deregister RPC RDMA transport\n");
++	dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n");
+ #ifdef RPC_DEBUG
+ 	if (sunrpc_table_header) {
+ 		unregister_sysctl_table(sunrpc_table_header);
+@@ -755,14 +755,14 @@ static int __init xprt_rdma_init(void)
+ 	if (rc)
+ 		return rc;
+ 
+-	dprintk(KERN_INFO "RPCRDMA Module Init, register RPC RDMA transport\n");
++	dprintk("RPCRDMA Module Init, register RPC RDMA transport\n");
+ 
+-	dprintk(KERN_INFO "Defaults:\n");
+-	dprintk(KERN_INFO "\tSlots %d\n"
++	dprintk("Defaults:\n");
++	dprintk("\tSlots %d\n"
+ 		"\tMaxInlineRead %d\n\tMaxInlineWrite %d\n",
+ 		xprt_rdma_slot_table_entries,
+ 		xprt_rdma_max_inline_read, xprt_rdma_max_inline_write);
+-	dprintk(KERN_INFO "\tPadding %d\n\tMemreg %d\n",
++	dprintk("\tPadding %d\n\tMemreg %d\n",
+ 		xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy);
+ 
+ #ifdef RPC_DEBUG
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0103-svcrdma-fix-printk-when-memory-allocation-fails.patch b/linux-next-cherry-picks/0103-svcrdma-fix-printk-when-memory-allocation-fails.patch
new file mode 100644
index 0000000..9efe81c
--- /dev/null
+++ b/linux-next-cherry-picks/0103-svcrdma-fix-printk-when-memory-allocation-fails.patch
@@ -0,0 +1,30 @@
+From c42a01eee74dfd9ba8f8abb7cb81dd9a8839dc7b Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@redhat.com>
+Date: Mon, 10 Mar 2014 11:33:48 -0400
+Subject: [PATCH 103/132] svcrdma: fix printk when memory allocation fails
+
+It retries in 1s, not 1000 jiffies.
+
+Signed-off-by: Jeff Layton <jlayton@redhat.com>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+---
+ net/sunrpc/xprtrdma/svc_rdma_transport.c |    3 +--
+ 1 files changed, 1 insertions(+), 2 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
+index 62e4f9b..25688fa 100644
+--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
++++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
+@@ -477,8 +477,7 @@ struct page *svc_rdma_get_page(void)
+ 
+ 	while ((page = alloc_page(GFP_KERNEL)) == NULL) {
+ 		/* If we can't get memory, wait a bit and try again */
+-		printk(KERN_INFO "svcrdma: out of memory...retrying in 1000 "
+-		       "jiffies.\n");
++		printk(KERN_INFO "svcrdma: out of memory...retrying in 1s\n");
+ 		schedule_timeout_uninterruptible(msecs_to_jiffies(1000));
+ 	}
+ 	return page;
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0104-Fix-regression-in-NFSRDMA-server.patch b/linux-next-cherry-picks/0104-Fix-regression-in-NFSRDMA-server.patch
new file mode 100644
index 0000000..172f9a2
--- /dev/null
+++ b/linux-next-cherry-picks/0104-Fix-regression-in-NFSRDMA-server.patch
@@ -0,0 +1,74 @@
+From 7e4359e2611f95a97037e2b6905eab52f28afbeb Mon Sep 17 00:00:00 2001
+From: Tom Tucker <tom@ogc.us>
+Date: Tue, 25 Mar 2014 15:14:57 -0500
+Subject: [PATCH 104/132] Fix regression in NFSRDMA server
+
+The server regression was caused by the addition of rq_next_page
+(afc59400d6c65bad66d4ad0b2daf879cbff8e23e). There were a few places that
+were missed with the update of the rq_respages array.
+
+Signed-off-by: Tom Tucker <tom@ogc.us>
+Tested-by: Steve Wise <swise@ogc.us>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+---
+ net/sunrpc/xprtrdma/svc_rdma_recvfrom.c |   12 ++++--------
+ net/sunrpc/xprtrdma/svc_rdma_sendto.c   |    1 +
+ 2 files changed, 5 insertions(+), 8 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+index 0ce7552..8d904e4 100644
+--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
++++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+@@ -90,6 +90,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
+ 		sge_no++;
+ 	}
+ 	rqstp->rq_respages = &rqstp->rq_pages[sge_no];
++	rqstp->rq_next_page = rqstp->rq_respages + 1;
+ 
+ 	/* We should never run out of SGE because the limit is defined to
+ 	 * support the max allowed RPC data length
+@@ -169,6 +170,7 @@ static int map_read_chunks(struct svcxprt_rdma *xprt,
+ 		 */
+ 		head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
+ 		rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1];
++		rqstp->rq_next_page = rqstp->rq_respages + 1;
+ 
+ 		byte_count -= sge_bytes;
+ 		ch_bytes -= sge_bytes;
+@@ -276,6 +278,7 @@ static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
+ 
+ 	/* rq_respages points one past arg pages */
+ 	rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
++	rqstp->rq_next_page = rqstp->rq_respages + 1;
+ 
+ 	/* Create the reply and chunk maps */
+ 	offset = 0;
+@@ -520,13 +523,6 @@ next_sge:
+ 	for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++)
+ 		rqstp->rq_pages[ch_no] = NULL;
+ 
+-	/*
+-	 * Detach res pages. If svc_release sees any it will attempt to
+-	 * put them.
+-	 */
+-	while (rqstp->rq_next_page != rqstp->rq_respages)
+-		*(--rqstp->rq_next_page) = NULL;
+-
+ 	return err;
+ }
+ 
+diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+index c1d124d..11e90f8 100644
+--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
++++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+@@ -625,6 +625,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
+ 		if (page_no+1 >= sge_no)
+ 			ctxt->sge[page_no+1].length = 0;
+ 	}
++	rqstp->rq_next_page = rqstp->rq_respages + 1;
+ 	BUG_ON(sge_no > rdma->sc_max_sge);
+ 	memset(&send_wr, 0, sizeof send_wr);
+ 	ctxt->wr_op = IB_WR_SEND;
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0105-svcrdma-fix-offset-calculation-for-non-page-aligned-.patch b/linux-next-cherry-picks/0105-svcrdma-fix-offset-calculation-for-non-page-aligned-.patch
new file mode 100644
index 0000000..8e3e81f
--- /dev/null
+++ b/linux-next-cherry-picks/0105-svcrdma-fix-offset-calculation-for-non-page-aligned-.patch
@@ -0,0 +1,33 @@
+From 3cbe01a94c7b369f943f8a9d40394198d757cdd4 Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@redhat.com>
+Date: Mon, 17 Mar 2014 13:10:05 -0400
+Subject: [PATCH 106/132] svcrdma: fix offset calculation for non-page aligned sge entries
+
+The xdr_off value in dma_map_xdr gets passed to ib_dma_map_page as the
+offset into the page to be mapped. This calculation does not correctly
+take into account the case where the data starts at some offset into
+the page. Increment the xdr_off by the page_base to ensure that it is
+respected.
+
+Cc: Tom Tucker <tom@opengridcomputing.com>
+Signed-off-by: Jeff Layton <jlayton@redhat.com>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+---
+ net/sunrpc/xprtrdma/svc_rdma_sendto.c |    1 +
+ 1 files changed, 1 insertions(+), 0 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+index 11e90f8..7e024a5 100644
+--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
++++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+@@ -265,6 +265,7 @@ static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
+ 		xdr_off -= xdr->head[0].iov_len;
+ 		if (xdr_off < xdr->page_len) {
+ 			/* This offset is in the page list */
++			xdr_off += xdr->page_base;
+ 			page = xdr->pages[xdr_off >> PAGE_SHIFT];
+ 			xdr_off &= ~PAGE_MASK;
+ 		} else {
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0106-xprtrdma-Backport-RPC_CWNDSHIFT-from-sunrpc.patch b/linux-next-cherry-picks/0106-xprtrdma-Backport-RPC_CWNDSHIFT-from-sunrpc.patch
new file mode 100644
index 0000000..11d70f2
--- /dev/null
+++ b/linux-next-cherry-picks/0106-xprtrdma-Backport-RPC_CWNDSHIFT-from-sunrpc.patch
@@ -0,0 +1,12 @@
+diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
+index cc1445d..f1cd3d3 100644
+--- a/net/sunrpc/xprtrdma/xprt_rdma.h
++++ b/net/sunrpc/xprtrdma/xprt_rdma.h
+@@ -53,6 +53,7 @@
+ 
+ #define RDMA_RESOLVE_TIMEOUT	(5000)	/* 5 seconds */
+ #define RDMA_CONNECT_RETRY_MAX	(2)	/* retries if no listener backlog */
++#define RPC_CWNDSHIFT		(8U)    /* backported from linux/sunrpc/xprt.h */
+ 
+ /*
+  * Interface Adapter -- one per transport instance
diff --git a/linux-next-cherry-picks/0107-xprtrdma-mind-the-device-s-max-fast-register-page-li.patch b/linux-next-cherry-picks/0107-xprtrdma-mind-the-device-s-max-fast-register-page-li.patch
new file mode 100644
index 0000000..935efdb
--- /dev/null
+++ b/linux-next-cherry-picks/0107-xprtrdma-mind-the-device-s-max-fast-register-page-li.patch
@@ -0,0 +1,146 @@
+From 0fc6c4e7bb287148eb5e949efd89327929d4841d Mon Sep 17 00:00:00 2001
+From: Steve Wise <swise@opengridcomputing.com>
+Date: Wed, 28 May 2014 10:32:00 -0400
+Subject: [PATCH 108/132] xprtrdma: mind the device's max fast register page list depth
+
+Some rdma devices don't support a fast register page list depth of
+at least RPCRDMA_MAX_DATA_SEGS.  So xprtrdma needs to chunk its fast
+register regions according to the minimum of the device max supported
+depth or RPCRDMA_MAX_DATA_SEGS.
+
+Signed-off-by: Steve Wise <swise@opengridcomputing.com>
+Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+---
+ net/sunrpc/xprtrdma/rpc_rdma.c  |    4 ---
+ net/sunrpc/xprtrdma/verbs.c     |   47 +++++++++++++++++++++++++++++----------
+ net/sunrpc/xprtrdma/xprt_rdma.h |    1 +
+ 3 files changed, 36 insertions(+), 16 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
+index 96ead52..400aa1b 100644
+--- a/net/sunrpc/xprtrdma/rpc_rdma.c
++++ b/net/sunrpc/xprtrdma/rpc_rdma.c
+@@ -248,10 +248,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
+ 	/* success. all failures return above */
+ 	req->rl_nchunks = nchunks;
+ 
+-	BUG_ON(nchunks == 0);
+-	BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
+-	       && (nchunks > 3));
+-
+ 	/*
+ 	 * finish off header. If write, marshal discrim and nchunks.
+ 	 */
+diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
+index 9372656..55fb09a 100644
+--- a/net/sunrpc/xprtrdma/verbs.c
++++ b/net/sunrpc/xprtrdma/verbs.c
+@@ -539,6 +539,11 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
+ 				__func__);
+ 			memreg = RPCRDMA_REGISTER;
+ #endif
++		} else {
++			/* Mind the ia limit on FRMR page list depth */
++			ia->ri_max_frmr_depth = min_t(unsigned int,
++				RPCRDMA_MAX_DATA_SEGS,
++				devattr.max_fast_reg_page_list_len);
+ 		}
+ 		break;
+ 	}
+@@ -659,24 +664,42 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
+ 	ep->rep_attr.srq = NULL;
+ 	ep->rep_attr.cap.max_send_wr = cdata->max_requests;
+ 	switch (ia->ri_memreg_strategy) {
+-	case RPCRDMA_FRMR:
++	case RPCRDMA_FRMR: {
++		int depth = 7;
++
+ 		/* Add room for frmr register and invalidate WRs.
+ 		 * 1. FRMR reg WR for head
+ 		 * 2. FRMR invalidate WR for head
+-		 * 3. FRMR reg WR for pagelist
+-		 * 4. FRMR invalidate WR for pagelist
++		 * 3. N FRMR reg WRs for pagelist
++		 * 4. N FRMR invalidate WRs for pagelist
+ 		 * 5. FRMR reg WR for tail
+ 		 * 6. FRMR invalidate WR for tail
+ 		 * 7. The RDMA_SEND WR
+ 		 */
+-		ep->rep_attr.cap.max_send_wr *= 7;
++
++		/* Calculate N if the device max FRMR depth is smaller than
++		 * RPCRDMA_MAX_DATA_SEGS.
++		 */
++		if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
++			int delta = RPCRDMA_MAX_DATA_SEGS -
++				    ia->ri_max_frmr_depth;
++
++			do {
++				depth += 2; /* FRMR reg + invalidate */
++				delta -= ia->ri_max_frmr_depth;
++			} while (delta > 0);
++
++		}
++		ep->rep_attr.cap.max_send_wr *= depth;
+ 		if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
+-			cdata->max_requests = devattr.max_qp_wr / 7;
++			cdata->max_requests = devattr.max_qp_wr / depth;
+ 			if (!cdata->max_requests)
+ 				return -EINVAL;
+-			ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7;
++			ep->rep_attr.cap.max_send_wr = cdata->max_requests *
++						       depth;
+ 		}
+ 		break;
++	}
+ 	case RPCRDMA_MEMWINDOWS_ASYNC:
+ 	case RPCRDMA_MEMWINDOWS:
+ 		/* Add room for mw_binds+unbinds - overkill! */
+@@ -1043,16 +1066,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
+ 	case RPCRDMA_FRMR:
+ 		for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
+ 			r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
+-							 RPCRDMA_MAX_SEGS);
++						ia->ri_max_frmr_depth);
+ 			if (IS_ERR(r->r.frmr.fr_mr)) {
+ 				rc = PTR_ERR(r->r.frmr.fr_mr);
+ 				dprintk("RPC:       %s: ib_alloc_fast_reg_mr"
+ 					" failed %i\n", __func__, rc);
+ 				goto out;
+ 			}
+-			r->r.frmr.fr_pgl =
+-				ib_alloc_fast_reg_page_list(ia->ri_id->device,
+-							    RPCRDMA_MAX_SEGS);
++			r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
++						ia->ri_id->device,
++						ia->ri_max_frmr_depth);
+ 			if (IS_ERR(r->r.frmr.fr_pgl)) {
+ 				rc = PTR_ERR(r->r.frmr.fr_pgl);
+ 				dprintk("RPC:       %s: "
+@@ -1498,8 +1521,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
+ 	seg1->mr_offset -= pageoff;	/* start of page */
+ 	seg1->mr_len += pageoff;
+ 	len = -pageoff;
+-	if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
+-		*nsegs = RPCRDMA_MAX_DATA_SEGS;
++	if (*nsegs > ia->ri_max_frmr_depth)
++		*nsegs = ia->ri_max_frmr_depth;
+ 	for (page_no = i = 0; i < *nsegs;) {
+ 		rpcrdma_map_one(ia, seg, writing);
+ 		pa = seg->mr_dma;
+diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
+index cc1445d..98340a3 100644
+--- a/net/sunrpc/xprtrdma/xprt_rdma.h
++++ b/net/sunrpc/xprtrdma/xprt_rdma.h
+@@ -66,6 +66,7 @@ struct rpcrdma_ia {
+ 	struct completion	ri_done;
+ 	int			ri_async_rc;
+ 	enum rpcrdma_memreg	ri_memreg_strategy;
++	unsigned int		ri_max_frmr_depth;
+ };
+ 
+ /*
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0108-nfs-rdma-Fix-for-FMR-leaks.patch b/linux-next-cherry-picks/0108-nfs-rdma-Fix-for-FMR-leaks.patch
new file mode 100644
index 0000000..5e01e25
--- /dev/null
+++ b/linux-next-cherry-picks/0108-nfs-rdma-Fix-for-FMR-leaks.patch
@@ -0,0 +1,140 @@
+From 4034ba04231f554abb97ad8900a4c1af03f8e21d Mon Sep 17 00:00:00 2001
+From: Allen Andrews <allen.andrews@emulex.com>
+Date: Wed, 28 May 2014 10:32:09 -0400
+Subject: [PATCH 109/132] nfs-rdma: Fix for FMR leaks
+
+Two memory region leaks were found during testing:
+
+1. rpcrdma_buffer_create: While allocating RPCRDMA_FRMR's
+ib_alloc_fast_reg_mr is called and then ib_alloc_fast_reg_page_list is
+called.  If ib_alloc_fast_reg_page_list returns an error it bails out of
+the routine dropping the last ib_alloc_fast_reg_mr frmr region creating a
+memory leak.  Added code to dereg the last frmr if
+ib_alloc_fast_reg_page_list fails.
+
+2. rpcrdma_buffer_destroy: While cleaning up, the routine will only free
+the MR's on the rb_mws list if there are rb_send_bufs present.  However, in
+rpcrdma_buffer_create while the rb_mws list is being built if one of the MR
+allocation requests fail after some MR's have been allocated on the rb_mws
+list the routine never gets to create any rb_send_bufs but instead jumps to
+the rpcrdma_buffer_destroy routine which will never free the MR's on rb_mws
+list because the rb_send_bufs were never created.   This leaks all the MR's
+on the rb_mws list that were created prior to one of the MR allocations
+failing.
+
+Issue(2) was seen during testing. Our adapter had a finite number of MR's
+available and we created enough connections to where we saw an MR
+allocation failure on our Nth NFS connection request. After the kernel
+cleaned up the resources it had allocated for the Nth connection we noticed
+that FMR's had been leaked due to the coding error described above.
+
+Issue(1) was seen during a code review while debugging issue(2).
+
+Signed-off-by: Allen Andrews <allen.andrews@emulex.com>
+Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+---
+ net/sunrpc/xprtrdma/verbs.c |   73 ++++++++++++++++++++++--------------------
+ 1 files changed, 38 insertions(+), 35 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
+index 55fb09a..8f9704e 100644
+--- a/net/sunrpc/xprtrdma/verbs.c
++++ b/net/sunrpc/xprtrdma/verbs.c
+@@ -1081,6 +1081,8 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
+ 				dprintk("RPC:       %s: "
+ 					"ib_alloc_fast_reg_page_list "
+ 					"failed %i\n", __func__, rc);
++
++				ib_dereg_mr(r->r.frmr.fr_mr);
+ 				goto out;
+ 			}
+ 			list_add(&r->mw_list, &buf->rb_mws);
+@@ -1217,41 +1219,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
+ 			kfree(buf->rb_recv_bufs[i]);
+ 		}
+ 		if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
+-			while (!list_empty(&buf->rb_mws)) {
+-				r = list_entry(buf->rb_mws.next,
+-					struct rpcrdma_mw, mw_list);
+-				list_del(&r->mw_list);
+-				switch (ia->ri_memreg_strategy) {
+-				case RPCRDMA_FRMR:
+-					rc = ib_dereg_mr(r->r.frmr.fr_mr);
+-					if (rc)
+-						dprintk("RPC:       %s:"
+-							" ib_dereg_mr"
+-							" failed %i\n",
+-							__func__, rc);
+-					ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
+-					break;
+-				case RPCRDMA_MTHCAFMR:
+-					rc = ib_dealloc_fmr(r->r.fmr);
+-					if (rc)
+-						dprintk("RPC:       %s:"
+-							" ib_dealloc_fmr"
+-							" failed %i\n",
+-							__func__, rc);
+-					break;
+-				case RPCRDMA_MEMWINDOWS_ASYNC:
+-				case RPCRDMA_MEMWINDOWS:
+-					rc = ib_dealloc_mw(r->r.mw);
+-					if (rc)
+-						dprintk("RPC:       %s:"
+-							" ib_dealloc_mw"
+-							" failed %i\n",
+-							__func__, rc);
+-					break;
+-				default:
+-					break;
+-				}
+-			}
+ 			rpcrdma_deregister_internal(ia,
+ 					buf->rb_send_bufs[i]->rl_handle,
+ 					&buf->rb_send_bufs[i]->rl_iov);
+@@ -1259,6 +1226,42 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
+ 		}
+ 	}
+ 
++	while (!list_empty(&buf->rb_mws)) {
++		r = list_entry(buf->rb_mws.next,
++			struct rpcrdma_mw, mw_list);
++		list_del(&r->mw_list);
++		switch (ia->ri_memreg_strategy) {
++		case RPCRDMA_FRMR:
++			rc = ib_dereg_mr(r->r.frmr.fr_mr);
++			if (rc)
++				dprintk("RPC:       %s:"
++					" ib_dereg_mr"
++					" failed %i\n",
++					__func__, rc);
++			ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
++			break;
++		case RPCRDMA_MTHCAFMR:
++			rc = ib_dealloc_fmr(r->r.fmr);
++			if (rc)
++				dprintk("RPC:       %s:"
++					" ib_dealloc_fmr"
++					" failed %i\n",
++					__func__, rc);
++			break;
++		case RPCRDMA_MEMWINDOWS_ASYNC:
++		case RPCRDMA_MEMWINDOWS:
++			rc = ib_dealloc_mw(r->r.mw);
++			if (rc)
++				dprintk("RPC:       %s:"
++					" ib_dealloc_mw"
++					" failed %i\n",
++					__func__, rc);
++			break;
++		default:
++			break;
++		}
++	}
++
+ 	kfree(buf->rb_pool);
+ }
+ 
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0109-xprtrdma-RPC-RDMA-must-invoke-xprt_wake_pending_task.patch b/linux-next-cherry-picks/0109-xprtrdma-RPC-RDMA-must-invoke-xprt_wake_pending_task.patch
new file mode 100644
index 0000000..53ca2a6
--- /dev/null
+++ b/linux-next-cherry-picks/0109-xprtrdma-RPC-RDMA-must-invoke-xprt_wake_pending_task.patch
@@ -0,0 +1,112 @@
+From 254f91e2fa1f4cc18fd2eb9d5481888ffe126d5b Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Wed, 28 May 2014 10:32:17 -0400
+Subject: [PATCH 110/132] xprtrdma: RPC/RDMA must invoke xprt_wake_pending_tasks() in process context
+
+An IB provider can invoke rpcrdma_conn_func() in an IRQ context,
+thus rpcrdma_conn_func() cannot be allowed to directly invoke
+generic RPC functions like xprt_wake_pending_tasks().
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Tested-by: Steve Wise <swise@opengridcomputing.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+---
+ net/sunrpc/xprtrdma/rpc_rdma.c  |   22 +++++++++++++++-------
+ net/sunrpc/xprtrdma/verbs.c     |    3 +++
+ net/sunrpc/xprtrdma/xprt_rdma.h |    3 +++
+ 3 files changed, 21 insertions(+), 7 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
+index 400aa1b..c296468 100644
+--- a/net/sunrpc/xprtrdma/rpc_rdma.c
++++ b/net/sunrpc/xprtrdma/rpc_rdma.c
+@@ -676,15 +676,11 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
+ 	rqst->rq_private_buf = rqst->rq_rcv_buf;
+ }
+ 
+-/*
+- * This function is called when an async event is posted to
+- * the connection which changes the connection state. All it
+- * does at this point is mark the connection up/down, the rpc
+- * timers do the rest.
+- */
+ void
+-rpcrdma_conn_func(struct rpcrdma_ep *ep)
++rpcrdma_connect_worker(struct work_struct *work)
+ {
++	struct rpcrdma_ep *ep =
++		container_of(work, struct rpcrdma_ep, rep_connect_worker.work);
+ 	struct rpc_xprt *xprt = ep->rep_xprt;
+ 
+ 	spin_lock_bh(&xprt->transport_lock);
+@@ -701,6 +697,18 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep)
+ }
+ 
+ /*
++ * This function is called when an async event is posted to
++ * the connection which changes the connection state. All it
++ * does at this point is mark the connection up/down, the rpc
++ * timers do the rest.
++ */
++void
++rpcrdma_conn_func(struct rpcrdma_ep *ep)
++{
++	schedule_delayed_work(&ep->rep_connect_worker, 0);
++}
++
++/*
+  * This function is called when memory window unbind which we are waiting
+  * for completes. Just use rr_func (zeroed by upcall) to signal completion.
+  */
+diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
+index 8f9704e..9cb88f3 100644
+--- a/net/sunrpc/xprtrdma/verbs.c
++++ b/net/sunrpc/xprtrdma/verbs.c
+@@ -742,6 +742,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
+ 	INIT_CQCOUNT(ep);
+ 	ep->rep_ia = ia;
+ 	init_waitqueue_head(&ep->rep_connect_wait);
++	INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
+ 
+ 	/*
+ 	 * Create a single cq for receive dto and mw_bind (only ever
+@@ -817,6 +818,8 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+ 	dprintk("RPC:       %s: entering, connected is %d\n",
+ 		__func__, ep->rep_connected);
+ 
++	cancel_delayed_work_sync(&ep->rep_connect_worker);
++
+ 	if (ia->ri_id->qp) {
+ 		rc = rpcrdma_ep_disconnect(ep, ia);
+ 		if (rc)
+diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
+index 98340a3..c620d13 100644
+--- a/net/sunrpc/xprtrdma/xprt_rdma.h
++++ b/net/sunrpc/xprtrdma/xprt_rdma.h
+@@ -43,6 +43,7 @@
+ #include <linux/wait.h> 		/* wait_queue_head_t, etc */
+ #include <linux/spinlock.h> 		/* spinlock_t, etc */
+ #include <linux/atomic.h>			/* atomic_t, etc */
++#include <linux/workqueue.h>		/* struct work_struct */
+ 
+ #include <rdma/rdma_cm.h>		/* RDMA connection api */
+ #include <rdma/ib_verbs.h>		/* RDMA verbs api */
+@@ -87,6 +88,7 @@ struct rpcrdma_ep {
+ 	struct rpc_xprt		*rep_xprt;	/* for rep_func */
+ 	struct rdma_conn_param	rep_remote_cma;
+ 	struct sockaddr_storage	rep_remote_addr;
++	struct delayed_work	rep_connect_worker;
+ };
+ 
+ #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
+@@ -336,6 +338,7 @@ int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
+ /*
+  * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c
+  */
++void rpcrdma_connect_worker(struct work_struct *);
+ void rpcrdma_conn_func(struct rpcrdma_ep *);
+ void rpcrdma_reply_handler(struct rpcrdma_rep *);
+ 
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0110-xprtrdma-Remove-BOUNCEBUFFERS-memory-registration-mo.patch b/linux-next-cherry-picks/0110-xprtrdma-Remove-BOUNCEBUFFERS-memory-registration-mo.patch
new file mode 100644
index 0000000..30307fd
--- /dev/null
+++ b/linux-next-cherry-picks/0110-xprtrdma-Remove-BOUNCEBUFFERS-memory-registration-mo.patch
@@ -0,0 +1,104 @@
+From 03ff8821eb5ed168792667cfc3ddff903e97af99 Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Wed, 28 May 2014 10:32:26 -0400
+Subject: [PATCH 111/132] xprtrdma: Remove BOUNCEBUFFERS memory registration mode
+
+Clean up: This memory registration mode is slow and was never
+meant for use in production environments. Remove it to reduce
+implementation complexity.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Tested-by: Steve Wise <swise@opengridcomputing.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+---
+ net/sunrpc/xprtrdma/rpc_rdma.c  |   11 -----------
+ net/sunrpc/xprtrdma/transport.c |   13 -------------
+ net/sunrpc/xprtrdma/verbs.c     |    5 +----
+ 3 files changed, 1 insertions(+), 28 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
+index c296468..02b2941 100644
+--- a/net/sunrpc/xprtrdma/rpc_rdma.c
++++ b/net/sunrpc/xprtrdma/rpc_rdma.c
+@@ -77,9 +77,6 @@ static const char transfertypes[][12] = {
+  * Prepare the passed-in xdr_buf into representation as RPC/RDMA chunk
+  * elements. Segments are then coalesced when registered, if possible
+  * within the selected memreg mode.
+- *
+- * Note, this routine is never called if the connection's memory
+- * registration strategy is 0 (bounce buffers).
+  */
+ 
+ static int
+@@ -439,14 +436,6 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
+ 		wtype = rpcrdma_noch;
+ 	BUG_ON(rtype != rpcrdma_noch && wtype != rpcrdma_noch);
+ 
+-	if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS &&
+-	    (rtype != rpcrdma_noch || wtype != rpcrdma_noch)) {
+-		/* forced to "pure inline"? */
+-		dprintk("RPC:       %s: too much data (%d/%d) for inline\n",
+-			__func__, rqst->rq_rcv_buf.len, rqst->rq_snd_buf.len);
+-		return -1;
+-	}
+-
+ 	hdrlen = 28; /*sizeof *headerp;*/
+ 	padlen = 0;
+ 
+diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
+index 1eb9c46..8c5035a 100644
+--- a/net/sunrpc/xprtrdma/transport.c
++++ b/net/sunrpc/xprtrdma/transport.c
+@@ -503,18 +503,6 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
+ 		 * If the allocation or registration fails, the RPC framework
+ 		 * will (doggedly) retry.
+ 		 */
+-		if (rpcx_to_rdmax(xprt)->rx_ia.ri_memreg_strategy ==
+-				RPCRDMA_BOUNCEBUFFERS) {
+-			/* forced to "pure inline" */
+-			dprintk("RPC:       %s: too much data (%zd) for inline "
+-					"(r/w max %d/%d)\n", __func__, size,
+-					rpcx_to_rdmad(xprt).inline_rsize,
+-					rpcx_to_rdmad(xprt).inline_wsize);
+-			size = req->rl_size;
+-			rpc_exit(task, -EIO);		/* fail the operation */
+-			rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++;
+-			goto out;
+-		}
+ 		if (task->tk_flags & RPC_TASK_SWAPPER)
+ 			nreq = kmalloc(sizeof *req + size, GFP_ATOMIC);
+ 		else
+@@ -543,7 +531,6 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
+ 		req = nreq;
+ 	}
+ 	dprintk("RPC:       %s: size %zd, request 0x%p\n", __func__, size, req);
+-out:
+ 	req->rl_connect_cookie = 0;	/* our reserved value */
+ 	return req->rl_xdr_buf;
+ 
+diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
+index 9cb88f3..4a4e4ea 100644
+--- a/net/sunrpc/xprtrdma/verbs.c
++++ b/net/sunrpc/xprtrdma/verbs.c
+@@ -557,7 +557,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
+ 	 * adapter.
+ 	 */
+ 	switch (memreg) {
+-	case RPCRDMA_BOUNCEBUFFERS:
+ 	case RPCRDMA_REGISTER:
+ 	case RPCRDMA_FRMR:
+ 		break;
+@@ -778,9 +777,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
+ 
+ 	/* Client offers RDMA Read but does not initiate */
+ 	ep->rep_remote_cma.initiator_depth = 0;
+-	if (ia->ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS)
+-		ep->rep_remote_cma.responder_resources = 0;
+-	else if (devattr.max_qp_rd_atom > 32)	/* arbitrary but <= 255 */
++	if (devattr.max_qp_rd_atom > 32)	/* arbitrary but <= 255 */
+ 		ep->rep_remote_cma.responder_resources = 32;
+ 	else
+ 		ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0111-xprtrdma-Remove-MEMWINDOWS-registration-modes.patch b/linux-next-cherry-picks/0111-xprtrdma-Remove-MEMWINDOWS-registration-modes.patch
new file mode 100644
index 0000000..fb7158d
--- /dev/null
+++ b/linux-next-cherry-picks/0111-xprtrdma-Remove-MEMWINDOWS-registration-modes.patch
@@ -0,0 +1,455 @@
+From b45ccfd25d506e83d9ecf93d0ac7edf031d35d2f Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Wed, 28 May 2014 10:32:34 -0400
+Subject: [PATCH 112/132] xprtrdma: Remove MEMWINDOWS registration modes
+
+The MEMWINDOWS and MEMWINDOWS_ASYNC memory registration modes were
+intended as stop-gap modes before the introduction of FRMR. They
+are now considered obsolete.
+
+MEMWINDOWS_ASYNC is also considered unsafe because it can leave
+client memory registered and exposed for an indeterminant time after
+each I/O.
+
+At this point, the MEMWINDOWS modes add needless complexity, so
+remove them.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Tested-by: Steve Wise <swise@opengridcomputing.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+---
+ net/sunrpc/xprtrdma/rpc_rdma.c  |   34 +--------
+ net/sunrpc/xprtrdma/transport.c |    9 +--
+ net/sunrpc/xprtrdma/verbs.c     |  165 +-------------------------------------
+ net/sunrpc/xprtrdma/xprt_rdma.h |    2 -
+ 4 files changed, 7 insertions(+), 203 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
+index 02b2941..46b5172 100644
+--- a/net/sunrpc/xprtrdma/rpc_rdma.c
++++ b/net/sunrpc/xprtrdma/rpc_rdma.c
+@@ -199,7 +199,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
+ 		return 0;
+ 
+ 	do {
+-		/* bind/register the memory, then build chunk from result. */
+ 		int n = rpcrdma_register_external(seg, nsegs,
+ 						cur_wchunk != NULL, r_xprt);
+ 		if (n <= 0)
+@@ -698,16 +697,6 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep)
+ }
+ 
+ /*
+- * This function is called when memory window unbind which we are waiting
+- * for completes. Just use rr_func (zeroed by upcall) to signal completion.
+- */
+-static void
+-rpcrdma_unbind_func(struct rpcrdma_rep *rep)
+-{
+-	wake_up(&rep->rr_unbind);
+-}
+-
+-/*
+  * Called as a tasklet to do req/reply match and complete a request
+  * Errors must result in the RPC task either being awakened, or
+  * allowed to timeout, to discover the errors at that time.
+@@ -721,7 +710,7 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
+ 	struct rpc_xprt *xprt = rep->rr_xprt;
+ 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ 	__be32 *iptr;
+-	int i, rdmalen, status;
++	int rdmalen, status;
+ 
+ 	/* Check status. If bad, signal disconnect and return rep to pool */
+ 	if (rep->rr_len == ~0U) {
+@@ -850,27 +839,6 @@ badheader:
+ 		break;
+ 	}
+ 
+-	/* If using mw bind, start the deregister process now. */
+-	/* (Note: if mr_free(), cannot perform it here, in tasklet context) */
+-	if (req->rl_nchunks) switch (r_xprt->rx_ia.ri_memreg_strategy) {
+-	case RPCRDMA_MEMWINDOWS:
+-		for (i = 0; req->rl_nchunks-- > 1;)
+-			i += rpcrdma_deregister_external(
+-				&req->rl_segments[i], r_xprt, NULL);
+-		/* Optionally wait (not here) for unbinds to complete */
+-		rep->rr_func = rpcrdma_unbind_func;
+-		(void) rpcrdma_deregister_external(&req->rl_segments[i],
+-						   r_xprt, rep);
+-		break;
+-	case RPCRDMA_MEMWINDOWS_ASYNC:
+-		for (i = 0; req->rl_nchunks--;)
+-			i += rpcrdma_deregister_external(&req->rl_segments[i],
+-							 r_xprt, NULL);
+-		break;
+-	default:
+-		break;
+-	}
+-
+ 	dprintk("RPC:       %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
+ 			__func__, xprt, rqst, status);
+ 	xprt_complete_rqst(rqst->rq_task, status);
+diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
+index 8c5035a..c23b0c1 100644
+--- a/net/sunrpc/xprtrdma/transport.c
++++ b/net/sunrpc/xprtrdma/transport.c
+@@ -566,9 +566,7 @@ xprt_rdma_free(void *buffer)
+ 		__func__, rep, (rep && rep->rr_func) ? " (with waiter)" : "");
+ 
+ 	/*
+-	 * Finish the deregistration. When using mw bind, this was
+-	 * begun in rpcrdma_reply_handler(). In all other modes, we
+-	 * do it here, in thread context. The process is considered
++	 * Finish the deregistration.  The process is considered
+ 	 * complete when the rr_func vector becomes NULL - this
+ 	 * was put in place during rpcrdma_reply_handler() - the wait
+ 	 * call below will not block if the dereg is "done". If
+@@ -580,11 +578,6 @@ xprt_rdma_free(void *buffer)
+ 			&req->rl_segments[i], r_xprt, NULL);
+ 	}
+ 
+-	if (rep && wait_event_interruptible(rep->rr_unbind, !rep->rr_func)) {
+-		rep->rr_func = NULL;	/* abandon the callback */
+-		req->rl_reply = NULL;
+-	}
+-
+ 	if (req->rl_iov.length == 0) {	/* see allocate above */
+ 		struct rpcrdma_req *oreq = (struct rpcrdma_req *)req->rl_buffer;
+ 		oreq->rl_reply = req->rl_reply;
+diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
+index 4a4e4ea..304c7ad 100644
+--- a/net/sunrpc/xprtrdma/verbs.c
++++ b/net/sunrpc/xprtrdma/verbs.c
+@@ -152,7 +152,7 @@ void rpcrdma_event_process(struct ib_wc *wc)
+ 	dprintk("RPC:       %s: event rep %p status %X opcode %X length %u\n",
+ 		__func__, rep, wc->status, wc->opcode, wc->byte_len);
+ 
+-	if (!rep) /* send or bind completion that we don't care about */
++	if (!rep) /* send completion that we don't care about */
+ 		return;
+ 
+ 	if (IB_WC_SUCCESS != wc->status) {
+@@ -197,8 +197,6 @@ void rpcrdma_event_process(struct ib_wc *wc)
+ 			}
+ 			atomic_set(&rep->rr_buffer->rb_credits, credits);
+ 		}
+-		/* fall through */
+-	case IB_WC_BIND_MW:
+ 		rpcrdma_schedule_tasklet(rep);
+ 		break;
+ 	default:
+@@ -233,7 +231,7 @@ rpcrdma_cq_poll(struct ib_cq *cq)
+ /*
+  * rpcrdma_cq_event_upcall
+  *
+- * This upcall handles recv, send, bind and unbind events.
++ * This upcall handles recv and send events.
+  * It is reentrant but processes single events in order to maintain
+  * ordering of receives to keep server credits.
+  *
+@@ -494,16 +492,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
+ 	}
+ 
+ 	switch (memreg) {
+-	case RPCRDMA_MEMWINDOWS:
+-	case RPCRDMA_MEMWINDOWS_ASYNC:
+-		if (!(devattr.device_cap_flags & IB_DEVICE_MEM_WINDOW)) {
+-			dprintk("RPC:       %s: MEMWINDOWS registration "
+-				"specified but not supported by adapter, "
+-				"using slower RPCRDMA_REGISTER\n",
+-				__func__);
+-			memreg = RPCRDMA_REGISTER;
+-		}
+-		break;
+ 	case RPCRDMA_MTHCAFMR:
+ 		if (!ia->ri_id->device->alloc_fmr) {
+ #if RPCRDMA_PERSISTENT_REGISTRATION
+@@ -567,16 +555,13 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
+ 				IB_ACCESS_REMOTE_READ;
+ 		goto register_setup;
+ #endif
+-	case RPCRDMA_MEMWINDOWS_ASYNC:
+-	case RPCRDMA_MEMWINDOWS:
+-		mem_priv = IB_ACCESS_LOCAL_WRITE |
+-				IB_ACCESS_MW_BIND;
+-		goto register_setup;
+ 	case RPCRDMA_MTHCAFMR:
+ 		if (ia->ri_have_dma_lkey)
+ 			break;
+ 		mem_priv = IB_ACCESS_LOCAL_WRITE;
++#if RPCRDMA_PERSISTENT_REGISTRATION
+ 	register_setup:
++#endif
+ 		ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
+ 		if (IS_ERR(ia->ri_bind_mem)) {
+ 			printk(KERN_ALERT "%s: ib_get_dma_mr for "
+@@ -699,14 +684,6 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
+ 		}
+ 		break;
+ 	}
+-	case RPCRDMA_MEMWINDOWS_ASYNC:
+-	case RPCRDMA_MEMWINDOWS:
+-		/* Add room for mw_binds+unbinds - overkill! */
+-		ep->rep_attr.cap.max_send_wr++;
+-		ep->rep_attr.cap.max_send_wr *= (2 * RPCRDMA_MAX_SEGS);
+-		if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr)
+-			return -EINVAL;
+-		break;
+ 	default:
+ 		break;
+ 	}
+@@ -728,14 +705,6 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
+ 
+ 	/* set trigger for requesting send completion */
+ 	ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 /*  - 1*/;
+-	switch (ia->ri_memreg_strategy) {
+-	case RPCRDMA_MEMWINDOWS_ASYNC:
+-	case RPCRDMA_MEMWINDOWS:
+-		ep->rep_cqinit -= RPCRDMA_MAX_SEGS;
+-		break;
+-	default:
+-		break;
+-	}
+ 	if (ep->rep_cqinit <= 2)
+ 		ep->rep_cqinit = 0;
+ 	INIT_CQCOUNT(ep);
+@@ -743,11 +712,6 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
+ 	init_waitqueue_head(&ep->rep_connect_wait);
+ 	INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
+ 
+-	/*
+-	 * Create a single cq for receive dto and mw_bind (only ever
+-	 * care about unbind, really). Send completions are suppressed.
+-	 * Use single threaded tasklet upcalls to maintain ordering.
+-	 */
+ 	ep->rep_cq = ib_create_cq(ia->ri_id->device, rpcrdma_cq_event_upcall,
+ 				  rpcrdma_cq_async_error_upcall, NULL,
+ 				  ep->rep_attr.cap.max_recv_wr +
+@@ -1020,11 +984,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
+ 		len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
+ 				sizeof(struct rpcrdma_mw);
+ 		break;
+-	case RPCRDMA_MEMWINDOWS_ASYNC:
+-	case RPCRDMA_MEMWINDOWS:
+-		len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
+-				sizeof(struct rpcrdma_mw);
+-		break;
+ 	default:
+ 		break;
+ 	}
+@@ -1055,11 +1014,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
+ 	}
+ 	p += cdata->padding;
+ 
+-	/*
+-	 * Allocate the fmr's, or mw's for mw_bind chunk registration.
+-	 * We "cycle" the mw's in order to minimize rkey reuse,
+-	 * and also reduce unbind-to-bind collision.
+-	 */
+ 	INIT_LIST_HEAD(&buf->rb_mws);
+ 	r = (struct rpcrdma_mw *)p;
+ 	switch (ia->ri_memreg_strategy) {
+@@ -1107,21 +1061,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
+ 			++r;
+ 		}
+ 		break;
+-	case RPCRDMA_MEMWINDOWS_ASYNC:
+-	case RPCRDMA_MEMWINDOWS:
+-		/* Allocate one extra request's worth, for full cycling */
+-		for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
+-			r->r.mw = ib_alloc_mw(ia->ri_pd, IB_MW_TYPE_1);
+-			if (IS_ERR(r->r.mw)) {
+-				rc = PTR_ERR(r->r.mw);
+-				dprintk("RPC:       %s: ib_alloc_mw"
+-					" failed %i\n", __func__, rc);
+-				goto out;
+-			}
+-			list_add(&r->mw_list, &buf->rb_mws);
+-			++r;
+-		}
+-		break;
+ 	default:
+ 		break;
+ 	}
+@@ -1170,7 +1109,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
+ 		memset(rep, 0, sizeof(struct rpcrdma_rep));
+ 		buf->rb_recv_bufs[i] = rep;
+ 		buf->rb_recv_bufs[i]->rr_buffer = buf;
+-		init_waitqueue_head(&rep->rr_unbind);
+ 
+ 		rc = rpcrdma_register_internal(ia, rep->rr_base,
+ 				len - offsetof(struct rpcrdma_rep, rr_base),
+@@ -1204,7 +1142,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
+ 
+ 	/* clean up in reverse order from create
+ 	 *   1.  recv mr memory (mr free, then kfree)
+-	 *   1a. bind mw memory
+ 	 *   2.  send mr memory (mr free, then kfree)
+ 	 *   3.  padding (if any) [moved to rpcrdma_ep_destroy]
+ 	 *   4.  arrays
+@@ -1248,15 +1185,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
+ 					" failed %i\n",
+ 					__func__, rc);
+ 			break;
+-		case RPCRDMA_MEMWINDOWS_ASYNC:
+-		case RPCRDMA_MEMWINDOWS:
+-			rc = ib_dealloc_mw(r->r.mw);
+-			if (rc)
+-				dprintk("RPC:       %s:"
+-					" ib_dealloc_mw"
+-					" failed %i\n",
+-					__func__, rc);
+-			break;
+ 		default:
+ 			break;
+ 		}
+@@ -1331,15 +1259,12 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
+ 	req->rl_niovs = 0;
+ 	if (req->rl_reply) {
+ 		buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply;
+-		init_waitqueue_head(&req->rl_reply->rr_unbind);
+ 		req->rl_reply->rr_func = NULL;
+ 		req->rl_reply = NULL;
+ 	}
+ 	switch (ia->ri_memreg_strategy) {
+ 	case RPCRDMA_FRMR:
+ 	case RPCRDMA_MTHCAFMR:
+-	case RPCRDMA_MEMWINDOWS_ASYNC:
+-	case RPCRDMA_MEMWINDOWS:
+ 		/*
+ 		 * Cycle mw's back in reverse order, and "spin" them.
+ 		 * This delays and scrambles reuse as much as possible.
+@@ -1384,8 +1309,7 @@ rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
+ 
+ /*
+  * Put reply buffers back into pool when not attached to
+- * request. This happens in error conditions, and when
+- * aborting unbinds. Pre-decrement counter/array index.
++ * request. This happens in error conditions.
+  */
+ void
+ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
+@@ -1688,74 +1612,6 @@ rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
+ }
+ 
+ static int
+-rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg,
+-			int *nsegs, int writing, struct rpcrdma_ia *ia,
+-			struct rpcrdma_xprt *r_xprt)
+-{
+-	int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
+-				  IB_ACCESS_REMOTE_READ);
+-	struct ib_mw_bind param;
+-	int rc;
+-
+-	*nsegs = 1;
+-	rpcrdma_map_one(ia, seg, writing);
+-	param.bind_info.mr = ia->ri_bind_mem;
+-	param.wr_id = 0ULL;	/* no send cookie */
+-	param.bind_info.addr = seg->mr_dma;
+-	param.bind_info.length = seg->mr_len;
+-	param.send_flags = 0;
+-	param.bind_info.mw_access_flags = mem_priv;
+-
+-	DECR_CQCOUNT(&r_xprt->rx_ep);
+-	rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, &param);
+-	if (rc) {
+-		dprintk("RPC:       %s: failed ib_bind_mw "
+-			"%u@0x%llx status %i\n",
+-			__func__, seg->mr_len,
+-			(unsigned long long)seg->mr_dma, rc);
+-		rpcrdma_unmap_one(ia, seg);
+-	} else {
+-		seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey;
+-		seg->mr_base = param.bind_info.addr;
+-		seg->mr_nsegs = 1;
+-	}
+-	return rc;
+-}
+-
+-static int
+-rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg,
+-			struct rpcrdma_ia *ia,
+-			struct rpcrdma_xprt *r_xprt, void **r)
+-{
+-	struct ib_mw_bind param;
+-	LIST_HEAD(l);
+-	int rc;
+-
+-	BUG_ON(seg->mr_nsegs != 1);
+-	param.bind_info.mr = ia->ri_bind_mem;
+-	param.bind_info.addr = 0ULL;	/* unbind */
+-	param.bind_info.length = 0;
+-	param.bind_info.mw_access_flags = 0;
+-	if (*r) {
+-		param.wr_id = (u64) (unsigned long) *r;
+-		param.send_flags = IB_SEND_SIGNALED;
+-		INIT_CQCOUNT(&r_xprt->rx_ep);
+-	} else {
+-		param.wr_id = 0ULL;
+-		param.send_flags = 0;
+-		DECR_CQCOUNT(&r_xprt->rx_ep);
+-	}
+-	rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, &param);
+-	rpcrdma_unmap_one(ia, seg);
+-	if (rc)
+-		dprintk("RPC:       %s: failed ib_(un)bind_mw,"
+-			" status %i\n", __func__, rc);
+-	else
+-		*r = NULL;	/* will upcall on completion */
+-	return rc;
+-}
+-
+-static int
+ rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg,
+ 			int *nsegs, int writing, struct rpcrdma_ia *ia)
+ {
+@@ -1845,12 +1701,6 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
+ 		rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
+ 		break;
+ 
+-	/* Registration using memory windows */
+-	case RPCRDMA_MEMWINDOWS_ASYNC:
+-	case RPCRDMA_MEMWINDOWS:
+-		rc = rpcrdma_register_memwin_external(seg, &nsegs, writing, ia, r_xprt);
+-		break;
+-
+ 	/* Default registration each time */
+ 	default:
+ 		rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia);
+@@ -1887,11 +1737,6 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
+ 		rc = rpcrdma_deregister_fmr_external(seg, ia);
+ 		break;
+ 
+-	case RPCRDMA_MEMWINDOWS_ASYNC:
+-	case RPCRDMA_MEMWINDOWS:
+-		rc = rpcrdma_deregister_memwin_external(seg, ia, r_xprt, &r);
+-		break;
+-
+ 	default:
+ 		rc = rpcrdma_deregister_default_external(seg, ia);
+ 		break;
+diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
+index c620d13..bf08ee0 100644
+--- a/net/sunrpc/xprtrdma/xprt_rdma.h
++++ b/net/sunrpc/xprtrdma/xprt_rdma.h
+@@ -127,7 +127,6 @@ struct rpcrdma_rep {
+ 	struct rpc_xprt	*rr_xprt;	/* needed for request/reply matching */
+ 	void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */
+ 	struct list_head rr_list;	/* tasklet list */
+-	wait_queue_head_t rr_unbind;	/* optional unbind wait */
+ 	struct ib_sge	rr_iov;		/* for posting */
+ 	struct ib_mr	*rr_handle;	/* handle for mem in rr_iov */
+ 	char	rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */
+@@ -162,7 +161,6 @@ struct rpcrdma_mr_seg {		/* chunk descriptors */
+ 		struct ib_mr	*rl_mr;		/* if registered directly */
+ 		struct rpcrdma_mw {		/* if registered from region */
+ 			union {
+-				struct ib_mw	*mw;
+ 				struct ib_fmr	*fmr;
+ 				struct {
+ 					struct ib_fast_reg_page_list *fr_pgl;
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0112-xprtrdma-Remove-REGISTER-memory-registration-mode.patch b/linux-next-cherry-picks/0112-xprtrdma-Remove-REGISTER-memory-registration-mode.patch
new file mode 100644
index 0000000..06c8809
--- /dev/null
+++ b/linux-next-cherry-picks/0112-xprtrdma-Remove-REGISTER-memory-registration-mode.patch
@@ -0,0 +1,191 @@
+From 0ac531c1832318efa3dc3d723e356a7e09330e80 Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Wed, 28 May 2014 10:32:43 -0400
+Subject: [PATCH 113/132] xprtrdma: Remove REGISTER memory registration mode
+
+All kernel RDMA providers except amso1100 support either MTHCAFMR
+or FRMR, both of which are faster than REGISTER.  amso1100 can
+continue to use ALLPHYSICAL.
+
+The only other ULP consumer in the kernel that uses the reg_phys_mr
+verb is Lustre.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+---
+ net/sunrpc/xprtrdma/rpc_rdma.c |    3 +-
+ net/sunrpc/xprtrdma/verbs.c    |   90 ++--------------------------------------
+ 2 files changed, 5 insertions(+), 88 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
+index 46b5172..aae1726 100644
+--- a/net/sunrpc/xprtrdma/rpc_rdma.c
++++ b/net/sunrpc/xprtrdma/rpc_rdma.c
+@@ -476,8 +476,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
+ 			 * on receive. Therefore, we request a reply chunk
+ 			 * for non-writes wherever feasible and efficient.
+ 			 */
+-			if (wtype == rpcrdma_noch &&
+-			    r_xprt->rx_ia.ri_memreg_strategy > RPCRDMA_REGISTER)
++			if (wtype == rpcrdma_noch)
+ 				wtype = rpcrdma_replych;
+ 		}
+ 	}
+diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
+index 304c7ad..6bb9a07 100644
+--- a/net/sunrpc/xprtrdma/verbs.c
++++ b/net/sunrpc/xprtrdma/verbs.c
+@@ -494,19 +494,11 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
+ 	switch (memreg) {
+ 	case RPCRDMA_MTHCAFMR:
+ 		if (!ia->ri_id->device->alloc_fmr) {
+-#if RPCRDMA_PERSISTENT_REGISTRATION
+ 			dprintk("RPC:       %s: MTHCAFMR registration "
+ 				"specified but not supported by adapter, "
+ 				"using riskier RPCRDMA_ALLPHYSICAL\n",
+ 				__func__);
+ 			memreg = RPCRDMA_ALLPHYSICAL;
+-#else
+-			dprintk("RPC:       %s: MTHCAFMR registration "
+-				"specified but not supported by adapter, "
+-				"using slower RPCRDMA_REGISTER\n",
+-				__func__);
+-			memreg = RPCRDMA_REGISTER;
+-#endif
+ 		}
+ 		break;
+ 	case RPCRDMA_FRMR:
+@@ -514,19 +506,11 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
+ 		if ((devattr.device_cap_flags &
+ 		     (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
+ 		    (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
+-#if RPCRDMA_PERSISTENT_REGISTRATION
+ 			dprintk("RPC:       %s: FRMR registration "
+ 				"specified but not supported by adapter, "
+ 				"using riskier RPCRDMA_ALLPHYSICAL\n",
+ 				__func__);
+ 			memreg = RPCRDMA_ALLPHYSICAL;
+-#else
+-			dprintk("RPC:       %s: FRMR registration "
+-				"specified but not supported by adapter, "
+-				"using slower RPCRDMA_REGISTER\n",
+-				__func__);
+-			memreg = RPCRDMA_REGISTER;
+-#endif
+ 		} else {
+ 			/* Mind the ia limit on FRMR page list depth */
+ 			ia->ri_max_frmr_depth = min_t(unsigned int,
+@@ -545,7 +529,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
+ 	 * adapter.
+ 	 */
+ 	switch (memreg) {
+-	case RPCRDMA_REGISTER:
+ 	case RPCRDMA_FRMR:
+ 		break;
+ #if RPCRDMA_PERSISTENT_REGISTRATION
+@@ -565,11 +548,10 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
+ 		ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
+ 		if (IS_ERR(ia->ri_bind_mem)) {
+ 			printk(KERN_ALERT "%s: ib_get_dma_mr for "
+-				"phys register failed with %lX\n\t"
+-				"Will continue with degraded performance\n",
++				"phys register failed with %lX\n",
+ 				__func__, PTR_ERR(ia->ri_bind_mem));
+-			memreg = RPCRDMA_REGISTER;
+-			ia->ri_bind_mem = NULL;
++			rc = -ENOMEM;
++			goto out2;
+ 		}
+ 		break;
+ 	default:
+@@ -1611,67 +1593,6 @@ rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
+ 	return rc;
+ }
+ 
+-static int
+-rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg,
+-			int *nsegs, int writing, struct rpcrdma_ia *ia)
+-{
+-	int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
+-				  IB_ACCESS_REMOTE_READ);
+-	struct rpcrdma_mr_seg *seg1 = seg;
+-	struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS];
+-	int len, i, rc = 0;
+-
+-	if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
+-		*nsegs = RPCRDMA_MAX_DATA_SEGS;
+-	for (len = 0, i = 0; i < *nsegs;) {
+-		rpcrdma_map_one(ia, seg, writing);
+-		ipb[i].addr = seg->mr_dma;
+-		ipb[i].size = seg->mr_len;
+-		len += seg->mr_len;
+-		++seg;
+-		++i;
+-		/* Check for holes */
+-		if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
+-		    offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
+-			break;
+-	}
+-	seg1->mr_base = seg1->mr_dma;
+-	seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd,
+-				ipb, i, mem_priv, &seg1->mr_base);
+-	if (IS_ERR(seg1->mr_chunk.rl_mr)) {
+-		rc = PTR_ERR(seg1->mr_chunk.rl_mr);
+-		dprintk("RPC:       %s: failed ib_reg_phys_mr "
+-			"%u@0x%llx (%d)... status %i\n",
+-			__func__, len,
+-			(unsigned long long)seg1->mr_dma, i, rc);
+-		while (i--)
+-			rpcrdma_unmap_one(ia, --seg);
+-	} else {
+-		seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey;
+-		seg1->mr_nsegs = i;
+-		seg1->mr_len = len;
+-	}
+-	*nsegs = i;
+-	return rc;
+-}
+-
+-static int
+-rpcrdma_deregister_default_external(struct rpcrdma_mr_seg *seg,
+-			struct rpcrdma_ia *ia)
+-{
+-	struct rpcrdma_mr_seg *seg1 = seg;
+-	int rc;
+-
+-	rc = ib_dereg_mr(seg1->mr_chunk.rl_mr);
+-	seg1->mr_chunk.rl_mr = NULL;
+-	while (seg1->mr_nsegs--)
+-		rpcrdma_unmap_one(ia, seg++);
+-	if (rc)
+-		dprintk("RPC:       %s: failed ib_dereg_mr,"
+-			" status %i\n", __func__, rc);
+-	return rc;
+-}
+-
+ int
+ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
+ 			int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
+@@ -1701,10 +1622,8 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
+ 		rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
+ 		break;
+ 
+-	/* Default registration each time */
+ 	default:
+-		rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia);
+-		break;
++		return -1;
+ 	}
+ 	if (rc)
+ 		return -1;
+@@ -1738,7 +1657,6 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
+ 		break;
+ 
+ 	default:
+-		rc = rpcrdma_deregister_default_external(seg, ia);
+ 		break;
+ 	}
+ 	if (r) {
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0113-xprtrdma-Fall-back-to-MTHCAFMR-when-FRMR-is-not-supp.patch b/linux-next-cherry-picks/0113-xprtrdma-Fall-back-to-MTHCAFMR-when-FRMR-is-not-supp.patch
new file mode 100644
index 0000000..0743d58
--- /dev/null
+++ b/linux-next-cherry-picks/0113-xprtrdma-Fall-back-to-MTHCAFMR-when-FRMR-is-not-supp.patch
@@ -0,0 +1,73 @@
+From f10eafd3a6ce9da7e96999c124b643ea6c4921f3 Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Wed, 28 May 2014 10:32:51 -0400
+Subject: [PATCH 114/132] xprtrdma: Fall back to MTHCAFMR when FRMR is not supported
+
+An audit of in-kernel RDMA providers that do not support the FRMR
+memory registration shows that several of them support MTHCAFMR.
+Prefer MTHCAFMR when FRMR is not supported.
+
+If MTHCAFMR is not supported, only then choose ALLPHYSICAL.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+---
+ net/sunrpc/xprtrdma/verbs.c |   31 +++++++++++++++----------------
+ 1 files changed, 15 insertions(+), 16 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
+index 6bb9a07..a352798 100644
+--- a/net/sunrpc/xprtrdma/verbs.c
++++ b/net/sunrpc/xprtrdma/verbs.c
+@@ -491,33 +491,32 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
+ 		ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
+ 	}
+ 
+-	switch (memreg) {
+-	case RPCRDMA_MTHCAFMR:
+-		if (!ia->ri_id->device->alloc_fmr) {
+-			dprintk("RPC:       %s: MTHCAFMR registration "
+-				"specified but not supported by adapter, "
+-				"using riskier RPCRDMA_ALLPHYSICAL\n",
+-				__func__);
+-			memreg = RPCRDMA_ALLPHYSICAL;
+-		}
+-		break;
+-	case RPCRDMA_FRMR:
++	if (memreg == RPCRDMA_FRMR) {
+ 		/* Requires both frmr reg and local dma lkey */
+ 		if ((devattr.device_cap_flags &
+ 		     (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
+ 		    (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
+ 			dprintk("RPC:       %s: FRMR registration "
+-				"specified but not supported by adapter, "
+-				"using riskier RPCRDMA_ALLPHYSICAL\n",
+-				__func__);
+-			memreg = RPCRDMA_ALLPHYSICAL;
++				"not supported by HCA\n", __func__);
++			memreg = RPCRDMA_MTHCAFMR;
+ 		} else {
+ 			/* Mind the ia limit on FRMR page list depth */
+ 			ia->ri_max_frmr_depth = min_t(unsigned int,
+ 				RPCRDMA_MAX_DATA_SEGS,
+ 				devattr.max_fast_reg_page_list_len);
+ 		}
+-		break;
++	}
++	if (memreg == RPCRDMA_MTHCAFMR) {
++		if (!ia->ri_id->device->alloc_fmr) {
++			dprintk("RPC:       %s: MTHCAFMR registration "
++				"not supported by HCA\n", __func__);
++#if RPCRDMA_PERSISTENT_REGISTRATION
++			memreg = RPCRDMA_ALLPHYSICAL;
++#else
++			rc = -EINVAL;
++			goto out2;
++#endif
++		}
+ 	}
+ 
+ 	/*
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0114-xprtrdma-mount-reports-Invalid-mount-option-if-memre.patch b/linux-next-cherry-picks/0114-xprtrdma-mount-reports-Invalid-mount-option-if-memre.patch
new file mode 100644
index 0000000..8052dfa
--- /dev/null
+++ b/linux-next-cherry-picks/0114-xprtrdma-mount-reports-Invalid-mount-option-if-memre.patch
@@ -0,0 +1,46 @@
+From cdd9ade711599e7672a635add0406080856f8b92 Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Wed, 28 May 2014 10:33:00 -0400
+Subject: [PATCH 115/132] xprtrdma: mount reports "Invalid mount option" if memreg mode not supported
+
+If the selected memory registration mode is not supported by the
+underlying provider/HCA, the NFS mount command reports that there was
+an invalid mount option, and fails. This is misleading.
+
+Reporting a problem allocating memory is a lot closer to the truth.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+---
+ net/sunrpc/xprtrdma/verbs.c |    8 ++++----
+ 1 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
+index a352798..7c7e9b4 100644
+--- a/net/sunrpc/xprtrdma/verbs.c
++++ b/net/sunrpc/xprtrdma/verbs.c
+@@ -513,7 +513,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
+ #if RPCRDMA_PERSISTENT_REGISTRATION
+ 			memreg = RPCRDMA_ALLPHYSICAL;
+ #else
+-			rc = -EINVAL;
++			rc = -ENOMEM;
+ 			goto out2;
+ #endif
+ 		}
+@@ -554,9 +554,9 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
+ 		}
+ 		break;
+ 	default:
+-		printk(KERN_ERR "%s: invalid memory registration mode %d\n",
+-				__func__, memreg);
+-		rc = -EINVAL;
++		printk(KERN_ERR "RPC: Unsupported memory "
++				"registration mode: %d\n", memreg);
++		rc = -ENOMEM;
+ 		goto out2;
+ 	}
+ 	dprintk("RPC:       %s: memory registration strategy is %d\n",
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0115-xprtrdma-Simplify-rpcrdma_deregister_external-synops.patch b/linux-next-cherry-picks/0115-xprtrdma-Simplify-rpcrdma_deregister_external-synops.patch
new file mode 100644
index 0000000..883ab79
--- /dev/null
+++ b/linux-next-cherry-picks/0115-xprtrdma-Simplify-rpcrdma_deregister_external-synops.patch
@@ -0,0 +1,86 @@
+From 13c9ff8f673862b69e795ea99a237b461c557eb3 Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Wed, 28 May 2014 10:33:08 -0400
+Subject: [PATCH 116/132] xprtrdma: Simplify rpcrdma_deregister_external() synopsis
+
+Clean up: All remaining callers of rpcrdma_deregister_external()
+pass NULL as the last argument, so remove that argument.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Tested-by: Steve Wise <swise@opengridcomputing.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+---
+ net/sunrpc/xprtrdma/rpc_rdma.c  |    2 +-
+ net/sunrpc/xprtrdma/transport.c |    2 +-
+ net/sunrpc/xprtrdma/verbs.c     |    8 +-------
+ net/sunrpc/xprtrdma/xprt_rdma.h |    2 +-
+ 4 files changed, 4 insertions(+), 10 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
+index aae1726..436d229 100644
+--- a/net/sunrpc/xprtrdma/rpc_rdma.c
++++ b/net/sunrpc/xprtrdma/rpc_rdma.c
+@@ -270,7 +270,7 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
+ out:
+ 	for (pos = 0; nchunks--;)
+ 		pos += rpcrdma_deregister_external(
+-				&req->rl_segments[pos], r_xprt, NULL);
++				&req->rl_segments[pos], r_xprt);
+ 	return 0;
+ }
+ 
+diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
+index c23b0c1..430cabb 100644
+--- a/net/sunrpc/xprtrdma/transport.c
++++ b/net/sunrpc/xprtrdma/transport.c
+@@ -575,7 +575,7 @@ xprt_rdma_free(void *buffer)
+ 	for (i = 0; req->rl_nchunks;) {
+ 		--req->rl_nchunks;
+ 		i += rpcrdma_deregister_external(
+-			&req->rl_segments[i], r_xprt, NULL);
++			&req->rl_segments[i], r_xprt);
+ 	}
+ 
+ 	if (req->rl_iov.length == 0) {	/* see allocate above */
+diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
+index 7c7e9b4..0cbc83c 100644
+--- a/net/sunrpc/xprtrdma/verbs.c
++++ b/net/sunrpc/xprtrdma/verbs.c
+@@ -1632,7 +1632,7 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
+ 
+ int
+ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
+-		struct rpcrdma_xprt *r_xprt, void *r)
++		struct rpcrdma_xprt *r_xprt)
+ {
+ 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+ 	int nsegs = seg->mr_nsegs, rc;
+@@ -1658,12 +1658,6 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
+ 	default:
+ 		break;
+ 	}
+-	if (r) {
+-		struct rpcrdma_rep *rep = r;
+-		void (*func)(struct rpcrdma_rep *) = rep->rr_func;
+-		rep->rr_func = NULL;
+-		func(rep);	/* dereg done, callback now */
+-	}
+ 	return nsegs;
+ }
+ 
+diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
+index bf08ee0..3f44d6a 100644
+--- a/net/sunrpc/xprtrdma/xprt_rdma.h
++++ b/net/sunrpc/xprtrdma/xprt_rdma.h
+@@ -331,7 +331,7 @@ int rpcrdma_deregister_internal(struct rpcrdma_ia *,
+ int rpcrdma_register_external(struct rpcrdma_mr_seg *,
+ 				int, int, struct rpcrdma_xprt *);
+ int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
+-				struct rpcrdma_xprt *, void *);
++				struct rpcrdma_xprt *);
+ 
+ /*
+  * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0116-xprtrdma-Make-rpcrdma_ep_destroy-return-void.patch b/linux-next-cherry-picks/0116-xprtrdma-Make-rpcrdma_ep_destroy-return-void.patch
new file mode 100644
index 0000000..bf5a979
--- /dev/null
+++ b/linux-next-cherry-picks/0116-xprtrdma-Make-rpcrdma_ep_destroy-return-void.patch
@@ -0,0 +1,95 @@
+From 7f1d54191ed6fa0f79f584fe3ebf6519738e817f Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Wed, 28 May 2014 10:33:16 -0400
+Subject: [PATCH 117/132] xprtrdma: Make rpcrdma_ep_destroy() return void
+
+Clean up: rpcrdma_ep_destroy() returns a value that is used
+only to print a debugging message. rpcrdma_ep_destroy() already
+prints debugging messages in all error cases.
+
+Make rpcrdma_ep_destroy() return void instead.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Tested-by: Steve Wise <swise@opengridcomputing.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+---
+ net/sunrpc/xprtrdma/transport.c |    8 ++------
+ net/sunrpc/xprtrdma/verbs.c     |    7 +------
+ net/sunrpc/xprtrdma/xprt_rdma.h |    2 +-
+ 3 files changed, 4 insertions(+), 13 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
+index 430cabb..d18b2a3 100644
+--- a/net/sunrpc/xprtrdma/transport.c
++++ b/net/sunrpc/xprtrdma/transport.c
+@@ -229,7 +229,6 @@ static void
+ xprt_rdma_destroy(struct rpc_xprt *xprt)
+ {
+ 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+-	int rc;
+ 
+ 	dprintk("RPC:       %s: called\n", __func__);
+ 
+@@ -238,10 +237,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
+ 	xprt_clear_connected(xprt);
+ 
+ 	rpcrdma_buffer_destroy(&r_xprt->rx_buf);
+-	rc = rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
+-	if (rc)
+-		dprintk("RPC:       %s: rpcrdma_ep_destroy returned %i\n",
+-			__func__, rc);
++	rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
+ 	rpcrdma_ia_close(&r_xprt->rx_ia);
+ 
+ 	xprt_rdma_free_addresses(xprt);
+@@ -391,7 +387,7 @@ out4:
+ 	xprt_rdma_free_addresses(xprt);
+ 	rc = -EINVAL;
+ out3:
+-	(void) rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
++	rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
+ out2:
+ 	rpcrdma_ia_close(&new_xprt->rx_ia);
+ out1:
+diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
+index 0cbc83c..edc951e 100644
+--- a/net/sunrpc/xprtrdma/verbs.c
++++ b/net/sunrpc/xprtrdma/verbs.c
+@@ -748,11 +748,8 @@ out1:
+  * Disconnect and destroy endpoint. After this, the only
+  * valid operations on the ep are to free it (if dynamically
+  * allocated) or re-create it.
+- *
+- * The caller's error handling must be sure to not leak the endpoint
+- * if this function fails.
+  */
+-int
++void
+ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+ {
+ 	int rc;
+@@ -782,8 +779,6 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+ 	if (rc)
+ 		dprintk("RPC:       %s: ib_destroy_cq returned %i\n",
+ 			__func__, rc);
+-
+-	return rc;
+ }
+ 
+ /*
+diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
+index 3f44d6a..362a19d 100644
+--- a/net/sunrpc/xprtrdma/xprt_rdma.h
++++ b/net/sunrpc/xprtrdma/xprt_rdma.h
+@@ -301,7 +301,7 @@ void rpcrdma_ia_close(struct rpcrdma_ia *);
+  */
+ int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
+ 				struct rpcrdma_create_data_internal *);
+-int rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
++void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
+ int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
+ int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
+ 
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0117-xprtrdma-Split-the-completion-queue.patch b/linux-next-cherry-picks/0117-xprtrdma-Split-the-completion-queue.patch
new file mode 100644
index 0000000..8c813af
--- /dev/null
+++ b/linux-next-cherry-picks/0117-xprtrdma-Split-the-completion-queue.patch
@@ -0,0 +1,395 @@
+From fc66448549bbb77f2f1a38b270ab2d6b6a22da33 Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Wed, 28 May 2014 10:33:25 -0400
+Subject: [PATCH 118/132] xprtrdma: Split the completion queue
+
+The current CQ handler uses the ib_wc.opcode field to distinguish
+between event types. However, the contents of that field are not
+reliable if the completion status is not IB_WC_SUCCESS.
+
+When an error completion occurs on a send event, the CQ handler
+schedules a tasklet with something that is not a struct rpcrdma_rep.
+This is never correct behavior, and sometimes it results in a panic.
+
+To resolve this issue, split the completion queue into a send CQ and
+a receive CQ. The send CQ handler now handles only struct rpcrdma_mw
+wr_id's, and the receive CQ handler now handles only struct
+rpcrdma_rep wr_id's.
+
+Fix suggested by Shirley Ma <shirley.ma@oracle.com>
+
+Reported-by: Rafael Reiter <rafael.reiter@ims.co.at>
+Fixes: 5c635e09cec0feeeb310968e51dad01040244851
+BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=73211
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Tested-by: Klemens Senn <klemens.senn@ims.co.at>
+Tested-by: Steve Wise <swise@opengridcomputing.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+---
+ net/sunrpc/xprtrdma/verbs.c     |  228 +++++++++++++++++++++++----------------
+ net/sunrpc/xprtrdma/xprt_rdma.h |    1 -
+ 2 files changed, 137 insertions(+), 92 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
+index edc951e..af2d097 100644
+--- a/net/sunrpc/xprtrdma/verbs.c
++++ b/net/sunrpc/xprtrdma/verbs.c
+@@ -142,96 +142,115 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
+ 	}
+ }
+ 
+-static inline
+-void rpcrdma_event_process(struct ib_wc *wc)
++static void
++rpcrdma_sendcq_process_wc(struct ib_wc *wc)
+ {
+-	struct rpcrdma_mw *frmr;
+-	struct rpcrdma_rep *rep =
+-			(struct rpcrdma_rep *)(unsigned long) wc->wr_id;
++	struct rpcrdma_mw *frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
+ 
+-	dprintk("RPC:       %s: event rep %p status %X opcode %X length %u\n",
+-		__func__, rep, wc->status, wc->opcode, wc->byte_len);
++	dprintk("RPC:       %s: frmr %p status %X opcode %d\n",
++		__func__, frmr, wc->status, wc->opcode);
+ 
+-	if (!rep) /* send completion that we don't care about */
++	if (wc->wr_id == 0ULL)
+ 		return;
+-
+-	if (IB_WC_SUCCESS != wc->status) {
+-		dprintk("RPC:       %s: WC opcode %d status %X, connection lost\n",
+-			__func__, wc->opcode, wc->status);
+-		rep->rr_len = ~0U;
+-		if (wc->opcode != IB_WC_FAST_REG_MR && wc->opcode != IB_WC_LOCAL_INV)
+-			rpcrdma_schedule_tasklet(rep);
++	if (wc->status != IB_WC_SUCCESS)
+ 		return;
+-	}
+ 
+-	switch (wc->opcode) {
+-	case IB_WC_FAST_REG_MR:
+-		frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
++	if (wc->opcode == IB_WC_FAST_REG_MR)
+ 		frmr->r.frmr.state = FRMR_IS_VALID;
+-		break;
+-	case IB_WC_LOCAL_INV:
+-		frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
++	else if (wc->opcode == IB_WC_LOCAL_INV)
+ 		frmr->r.frmr.state = FRMR_IS_INVALID;
+-		break;
+-	case IB_WC_RECV:
+-		rep->rr_len = wc->byte_len;
+-		ib_dma_sync_single_for_cpu(
+-			rdmab_to_ia(rep->rr_buffer)->ri_id->device,
+-			rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
+-		/* Keep (only) the most recent credits, after check validity */
+-		if (rep->rr_len >= 16) {
+-			struct rpcrdma_msg *p =
+-					(struct rpcrdma_msg *) rep->rr_base;
+-			unsigned int credits = ntohl(p->rm_credit);
+-			if (credits == 0) {
+-				dprintk("RPC:       %s: server"
+-					" dropped credits to 0!\n", __func__);
+-				/* don't deadlock */
+-				credits = 1;
+-			} else if (credits > rep->rr_buffer->rb_max_requests) {
+-				dprintk("RPC:       %s: server"
+-					" over-crediting: %d (%d)\n",
+-					__func__, credits,
+-					rep->rr_buffer->rb_max_requests);
+-				credits = rep->rr_buffer->rb_max_requests;
+-			}
+-			atomic_set(&rep->rr_buffer->rb_credits, credits);
+-		}
+-		rpcrdma_schedule_tasklet(rep);
+-		break;
+-	default:
+-		dprintk("RPC:       %s: unexpected WC event %X\n",
+-			__func__, wc->opcode);
+-		break;
+-	}
+ }
+ 
+-static inline int
+-rpcrdma_cq_poll(struct ib_cq *cq)
++static int
++rpcrdma_sendcq_poll(struct ib_cq *cq)
+ {
+ 	struct ib_wc wc;
+ 	int rc;
+ 
+-	for (;;) {
+-		rc = ib_poll_cq(cq, 1, &wc);
+-		if (rc < 0) {
+-			dprintk("RPC:       %s: ib_poll_cq failed %i\n",
+-				__func__, rc);
+-			return rc;
+-		}
+-		if (rc == 0)
+-			break;
++	while ((rc = ib_poll_cq(cq, 1, &wc)) == 1)
++		rpcrdma_sendcq_process_wc(&wc);
++	return rc;
++}
+ 
+-		rpcrdma_event_process(&wc);
++/*
++ * Handle send, fast_reg_mr, and local_inv completions.
++ *
++ * Send events are typically suppressed and thus do not result
++ * in an upcall. Occasionally one is signaled, however. This
++ * prevents the provider's completion queue from wrapping and
++ * losing a completion.
++ */
++static void
++rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
++{
++	int rc;
++
++	rc = rpcrdma_sendcq_poll(cq);
++	if (rc) {
++		dprintk("RPC:       %s: ib_poll_cq failed: %i\n",
++			__func__, rc);
++		return;
+ 	}
+ 
+-	return 0;
++	rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
++	if (rc) {
++		dprintk("RPC:       %s: ib_req_notify_cq failed: %i\n",
++			__func__, rc);
++		return;
++	}
++
++	rpcrdma_sendcq_poll(cq);
++}
++
++static void
++rpcrdma_recvcq_process_wc(struct ib_wc *wc)
++{
++	struct rpcrdma_rep *rep =
++			(struct rpcrdma_rep *)(unsigned long)wc->wr_id;
++
++	dprintk("RPC:       %s: rep %p status %X opcode %X length %u\n",
++		__func__, rep, wc->status, wc->opcode, wc->byte_len);
++
++	if (wc->status != IB_WC_SUCCESS) {
++		rep->rr_len = ~0U;
++		goto out_schedule;
++	}
++	if (wc->opcode != IB_WC_RECV)
++		return;
++
++	rep->rr_len = wc->byte_len;
++	ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
++			rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
++
++	if (rep->rr_len >= 16) {
++		struct rpcrdma_msg *p = (struct rpcrdma_msg *)rep->rr_base;
++		unsigned int credits = ntohl(p->rm_credit);
++
++		if (credits == 0)
++			credits = 1;	/* don't deadlock */
++		else if (credits > rep->rr_buffer->rb_max_requests)
++			credits = rep->rr_buffer->rb_max_requests;
++		atomic_set(&rep->rr_buffer->rb_credits, credits);
++	}
++
++out_schedule:
++	rpcrdma_schedule_tasklet(rep);
++}
++
++static int
++rpcrdma_recvcq_poll(struct ib_cq *cq)
++{
++	struct ib_wc wc;
++	int rc;
++
++	while ((rc = ib_poll_cq(cq, 1, &wc)) == 1)
++		rpcrdma_recvcq_process_wc(&wc);
++	return rc;
+ }
+ 
+ /*
+- * rpcrdma_cq_event_upcall
++ * Handle receive completions.
+  *
+- * This upcall handles recv and send events.
+  * It is reentrant but processes single events in order to maintain
+  * ordering of receives to keep server credits.
+  *
+@@ -240,26 +259,27 @@ rpcrdma_cq_poll(struct ib_cq *cq)
+  * connection shutdown. That is, the structures required for
+  * the completion of the reply handler must remain intact until
+  * all memory has been reclaimed.
+- *
+- * Note that send events are suppressed and do not result in an upcall.
+  */
+ static void
+-rpcrdma_cq_event_upcall(struct ib_cq *cq, void *context)
++rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
+ {
+ 	int rc;
+ 
+-	rc = rpcrdma_cq_poll(cq);
+-	if (rc)
++	rc = rpcrdma_recvcq_poll(cq);
++	if (rc) {
++		dprintk("RPC:       %s: ib_poll_cq failed: %i\n",
++			__func__, rc);
+ 		return;
++	}
+ 
+ 	rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+ 	if (rc) {
+-		dprintk("RPC:       %s: ib_req_notify_cq failed %i\n",
++		dprintk("RPC:       %s: ib_req_notify_cq failed: %i\n",
+ 			__func__, rc);
+ 		return;
+ 	}
+ 
+-	rpcrdma_cq_poll(cq);
++	rpcrdma_recvcq_poll(cq);
+ }
+ 
+ #ifdef RPC_DEBUG
+@@ -610,6 +630,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
+ 				struct rpcrdma_create_data_internal *cdata)
+ {
+ 	struct ib_device_attr devattr;
++	struct ib_cq *sendcq, *recvcq;
+ 	int rc, err;
+ 
+ 	rc = ib_query_device(ia->ri_id->device, &devattr);
+@@ -685,7 +706,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
+ 		ep->rep_attr.cap.max_recv_sge);
+ 
+ 	/* set trigger for requesting send completion */
+-	ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 /*  - 1*/;
++	ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
+ 	if (ep->rep_cqinit <= 2)
+ 		ep->rep_cqinit = 0;
+ 	INIT_CQCOUNT(ep);
+@@ -693,26 +714,43 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
+ 	init_waitqueue_head(&ep->rep_connect_wait);
+ 	INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
+ 
+-	ep->rep_cq = ib_create_cq(ia->ri_id->device, rpcrdma_cq_event_upcall,
++	sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
+ 				  rpcrdma_cq_async_error_upcall, NULL,
+-				  ep->rep_attr.cap.max_recv_wr +
+ 				  ep->rep_attr.cap.max_send_wr + 1, 0);
+-	if (IS_ERR(ep->rep_cq)) {
+-		rc = PTR_ERR(ep->rep_cq);
+-		dprintk("RPC:       %s: ib_create_cq failed: %i\n",
++	if (IS_ERR(sendcq)) {
++		rc = PTR_ERR(sendcq);
++		dprintk("RPC:       %s: failed to create send CQ: %i\n",
+ 			__func__, rc);
+ 		goto out1;
+ 	}
+ 
+-	rc = ib_req_notify_cq(ep->rep_cq, IB_CQ_NEXT_COMP);
++	rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
+ 	if (rc) {
+ 		dprintk("RPC:       %s: ib_req_notify_cq failed: %i\n",
+ 			__func__, rc);
+ 		goto out2;
+ 	}
+ 
+-	ep->rep_attr.send_cq = ep->rep_cq;
+-	ep->rep_attr.recv_cq = ep->rep_cq;
++	recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
++				  rpcrdma_cq_async_error_upcall, NULL,
++				  ep->rep_attr.cap.max_recv_wr + 1, 0);
++	if (IS_ERR(recvcq)) {
++		rc = PTR_ERR(recvcq);
++		dprintk("RPC:       %s: failed to create recv CQ: %i\n",
++			__func__, rc);
++		goto out2;
++	}
++
++	rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
++	if (rc) {
++		dprintk("RPC:       %s: ib_req_notify_cq failed: %i\n",
++			__func__, rc);
++		ib_destroy_cq(recvcq);
++		goto out2;
++	}
++
++	ep->rep_attr.send_cq = sendcq;
++	ep->rep_attr.recv_cq = recvcq;
+ 
+ 	/* Initialize cma parameters */
+ 
+@@ -734,7 +772,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
+ 	return 0;
+ 
+ out2:
+-	err = ib_destroy_cq(ep->rep_cq);
++	err = ib_destroy_cq(sendcq);
+ 	if (err)
+ 		dprintk("RPC:       %s: ib_destroy_cq returned %i\n",
+ 			__func__, err);
+@@ -774,8 +812,14 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+ 		ep->rep_pad_mr = NULL;
+ 	}
+ 
+-	rpcrdma_clean_cq(ep->rep_cq);
+-	rc = ib_destroy_cq(ep->rep_cq);
++	rpcrdma_clean_cq(ep->rep_attr.recv_cq);
++	rc = ib_destroy_cq(ep->rep_attr.recv_cq);
++	if (rc)
++		dprintk("RPC:       %s: ib_destroy_cq returned %i\n",
++			__func__, rc);
++
++	rpcrdma_clean_cq(ep->rep_attr.send_cq);
++	rc = ib_destroy_cq(ep->rep_attr.send_cq);
+ 	if (rc)
+ 		dprintk("RPC:       %s: ib_destroy_cq returned %i\n",
+ 			__func__, rc);
+@@ -798,7 +842,9 @@ retry:
+ 		if (rc && rc != -ENOTCONN)
+ 			dprintk("RPC:       %s: rpcrdma_ep_disconnect"
+ 				" status %i\n", __func__, rc);
+-		rpcrdma_clean_cq(ep->rep_cq);
++
++		rpcrdma_clean_cq(ep->rep_attr.recv_cq);
++		rpcrdma_clean_cq(ep->rep_attr.send_cq);
+ 
+ 		xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
+ 		id = rpcrdma_create_id(xprt, ia,
+@@ -907,7 +953,8 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+ {
+ 	int rc;
+ 
+-	rpcrdma_clean_cq(ep->rep_cq);
++	rpcrdma_clean_cq(ep->rep_attr.recv_cq);
++	rpcrdma_clean_cq(ep->rep_attr.send_cq);
+ 	rc = rdma_disconnect(ia->ri_id);
+ 	if (!rc) {
+ 		/* returns without wait if not connected */
+@@ -1727,7 +1774,6 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
+ 	ib_dma_sync_single_for_cpu(ia->ri_id->device,
+ 		rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
+ 
+-	DECR_CQCOUNT(ep);
+ 	rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
+ 
+ 	if (rc)
+diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
+index 362a19d..334ab6e 100644
+--- a/net/sunrpc/xprtrdma/xprt_rdma.h
++++ b/net/sunrpc/xprtrdma/xprt_rdma.h
+@@ -79,7 +79,6 @@ struct rpcrdma_ep {
+ 	int			rep_cqinit;
+ 	int			rep_connected;
+ 	struct rpcrdma_ia	*rep_ia;
+-	struct ib_cq		*rep_cq;
+ 	struct ib_qp_init_attr	rep_attr;
+ 	wait_queue_head_t 	rep_connect_wait;
+ 	struct ib_sge		rep_pad;	/* holds zeroed pad */
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0118-xprtrmda-Reduce-lock-contention-in-completion-handle.patch b/linux-next-cherry-picks/0118-xprtrmda-Reduce-lock-contention-in-completion-handle.patch
new file mode 100644
index 0000000..115ab68
--- /dev/null
+++ b/linux-next-cherry-picks/0118-xprtrmda-Reduce-lock-contention-in-completion-handle.patch
@@ -0,0 +1,50 @@
+From 7f23f6f6e388d2003c4ecf5d558f3c2191e12530 Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Wed, 28 May 2014 10:33:34 -0400
+Subject: [PATCH 119/132] xprtrmda: Reduce lock contention in completion handlers
+
+Skip the ib_poll_cq() after re-arming, if the provider knows there
+are no additional items waiting. (Have a look at commit ed23a727 for
+more details).
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+---
+ net/sunrpc/xprtrdma/verbs.c |   14 ++++++++++----
+ 1 files changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
+index af2d097..c7d5281 100644
+--- a/net/sunrpc/xprtrdma/verbs.c
++++ b/net/sunrpc/xprtrdma/verbs.c
+@@ -192,8 +192,11 @@ rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
+ 		return;
+ 	}
+ 
+-	rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+-	if (rc) {
++	rc = ib_req_notify_cq(cq,
++			IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
++	if (rc == 0)
++		return;
++	if (rc < 0) {
+ 		dprintk("RPC:       %s: ib_req_notify_cq failed: %i\n",
+ 			__func__, rc);
+ 		return;
+@@ -272,8 +275,11 @@ rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
+ 		return;
+ 	}
+ 
+-	rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+-	if (rc) {
++	rc = ib_req_notify_cq(cq,
++			IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
++	if (rc == 0)
++		return;
++	if (rc < 0) {
+ 		dprintk("RPC:       %s: ib_req_notify_cq failed: %i\n",
+ 			__func__, rc);
+ 		return;
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0119-xprtrmda-Reduce-calls-to-ib_poll_cq-in-completion-ha.patch b/linux-next-cherry-picks/0119-xprtrmda-Reduce-calls-to-ib_poll_cq-in-completion-ha.patch
new file mode 100644
index 0000000..49703e6
--- /dev/null
+++ b/linux-next-cherry-picks/0119-xprtrmda-Reduce-calls-to-ib_poll_cq-in-completion-ha.patch
@@ -0,0 +1,165 @@
+From 1c00dd0776543608e13c74a527660cb8cd28a74f Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Wed, 28 May 2014 10:33:42 -0400
+Subject: [PATCH 120/132] xprtrmda: Reduce calls to ib_poll_cq() in completion handlers
+
+Change the completion handlers to grab up to 16 items per
+ib_poll_cq() call. No extra ib_poll_cq() is needed if fewer than 16
+items are returned.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+---
+ net/sunrpc/xprtrdma/verbs.c     |   56 ++++++++++++++++++++++++++------------
+ net/sunrpc/xprtrdma/xprt_rdma.h |    4 +++
+ 2 files changed, 42 insertions(+), 18 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
+index c7d5281..b8caee9 100644
+--- a/net/sunrpc/xprtrdma/verbs.c
++++ b/net/sunrpc/xprtrdma/verbs.c
+@@ -162,14 +162,23 @@ rpcrdma_sendcq_process_wc(struct ib_wc *wc)
+ }
+ 
+ static int
+-rpcrdma_sendcq_poll(struct ib_cq *cq)
++rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
+ {
+-	struct ib_wc wc;
+-	int rc;
++	struct ib_wc *wcs;
++	int count, rc;
+ 
+-	while ((rc = ib_poll_cq(cq, 1, &wc)) == 1)
+-		rpcrdma_sendcq_process_wc(&wc);
+-	return rc;
++	do {
++		wcs = ep->rep_send_wcs;
++
++		rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
++		if (rc <= 0)
++			return rc;
++
++		count = rc;
++		while (count-- > 0)
++			rpcrdma_sendcq_process_wc(wcs++);
++	} while (rc == RPCRDMA_POLLSIZE);
++	return 0;
+ }
+ 
+ /*
+@@ -183,9 +192,10 @@ rpcrdma_sendcq_poll(struct ib_cq *cq)
+ static void
+ rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
+ {
++	struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
+ 	int rc;
+ 
+-	rc = rpcrdma_sendcq_poll(cq);
++	rc = rpcrdma_sendcq_poll(cq, ep);
+ 	if (rc) {
+ 		dprintk("RPC:       %s: ib_poll_cq failed: %i\n",
+ 			__func__, rc);
+@@ -202,7 +212,7 @@ rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
+ 		return;
+ 	}
+ 
+-	rpcrdma_sendcq_poll(cq);
++	rpcrdma_sendcq_poll(cq, ep);
+ }
+ 
+ static void
+@@ -241,14 +251,23 @@ out_schedule:
+ }
+ 
+ static int
+-rpcrdma_recvcq_poll(struct ib_cq *cq)
++rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
+ {
+-	struct ib_wc wc;
+-	int rc;
++	struct ib_wc *wcs;
++	int count, rc;
+ 
+-	while ((rc = ib_poll_cq(cq, 1, &wc)) == 1)
+-		rpcrdma_recvcq_process_wc(&wc);
+-	return rc;
++	do {
++		wcs = ep->rep_recv_wcs;
++
++		rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
++		if (rc <= 0)
++			return rc;
++
++		count = rc;
++		while (count-- > 0)
++			rpcrdma_recvcq_process_wc(wcs++);
++	} while (rc == RPCRDMA_POLLSIZE);
++	return 0;
+ }
+ 
+ /*
+@@ -266,9 +285,10 @@ rpcrdma_recvcq_poll(struct ib_cq *cq)
+ static void
+ rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
+ {
++	struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
+ 	int rc;
+ 
+-	rc = rpcrdma_recvcq_poll(cq);
++	rc = rpcrdma_recvcq_poll(cq, ep);
+ 	if (rc) {
+ 		dprintk("RPC:       %s: ib_poll_cq failed: %i\n",
+ 			__func__, rc);
+@@ -285,7 +305,7 @@ rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
+ 		return;
+ 	}
+ 
+-	rpcrdma_recvcq_poll(cq);
++	rpcrdma_recvcq_poll(cq, ep);
+ }
+ 
+ #ifdef RPC_DEBUG
+@@ -721,7 +741,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
+ 	INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
+ 
+ 	sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
+-				  rpcrdma_cq_async_error_upcall, NULL,
++				  rpcrdma_cq_async_error_upcall, ep,
+ 				  ep->rep_attr.cap.max_send_wr + 1, 0);
+ 	if (IS_ERR(sendcq)) {
+ 		rc = PTR_ERR(sendcq);
+@@ -738,7 +758,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
+ 	}
+ 
+ 	recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
+-				  rpcrdma_cq_async_error_upcall, NULL,
++				  rpcrdma_cq_async_error_upcall, ep,
+ 				  ep->rep_attr.cap.max_recv_wr + 1, 0);
+ 	if (IS_ERR(recvcq)) {
+ 		rc = PTR_ERR(recvcq);
+diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
+index 334ab6e..cb4c882 100644
+--- a/net/sunrpc/xprtrdma/xprt_rdma.h
++++ b/net/sunrpc/xprtrdma/xprt_rdma.h
+@@ -74,6 +74,8 @@ struct rpcrdma_ia {
+  * RDMA Endpoint -- one per transport instance
+  */
+ 
++#define RPCRDMA_POLLSIZE	(16)
++
+ struct rpcrdma_ep {
+ 	atomic_t		rep_cqcount;
+ 	int			rep_cqinit;
+@@ -88,6 +90,8 @@ struct rpcrdma_ep {
+ 	struct rdma_conn_param	rep_remote_cma;
+ 	struct sockaddr_storage	rep_remote_addr;
+ 	struct delayed_work	rep_connect_worker;
++	struct ib_wc		rep_send_wcs[RPCRDMA_POLLSIZE];
++	struct ib_wc		rep_recv_wcs[RPCRDMA_POLLSIZE];
+ };
+ 
+ #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0120-xprtrdma-Limit-work-done-by-completion-handler.patch b/linux-next-cherry-picks/0120-xprtrdma-Limit-work-done-by-completion-handler.patch
new file mode 100644
index 0000000..9b7c86e
--- /dev/null
+++ b/linux-next-cherry-picks/0120-xprtrdma-Limit-work-done-by-completion-handler.patch
@@ -0,0 +1,79 @@
+From 8301a2c047cc25dabd645e5590c1db0ead4c5af4 Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Wed, 28 May 2014 10:33:51 -0400
+Subject: [PATCH 121/132] xprtrdma: Limit work done by completion handler
+
+Sagi Grimberg <sagig@dev.mellanox.co.il> points out that a steady
+stream of CQ events could starve other work because of the boundless
+loop pooling in rpcrdma_{send,recv}_poll().
+
+Instead of a (potentially infinite) while loop, return after
+collecting a budgeted number of completions.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Acked-by: Sagi Grimberg <sagig@dev.mellanox.co.il>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+---
+ net/sunrpc/xprtrdma/verbs.c     |   10 ++++++----
+ net/sunrpc/xprtrdma/xprt_rdma.h |    1 +
+ 2 files changed, 7 insertions(+), 4 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
+index b8caee9..1d08366 100644
+--- a/net/sunrpc/xprtrdma/verbs.c
++++ b/net/sunrpc/xprtrdma/verbs.c
+@@ -165,8 +165,9 @@ static int
+ rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
+ {
+ 	struct ib_wc *wcs;
+-	int count, rc;
++	int budget, count, rc;
+ 
++	budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
+ 	do {
+ 		wcs = ep->rep_send_wcs;
+ 
+@@ -177,7 +178,7 @@ rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
+ 		count = rc;
+ 		while (count-- > 0)
+ 			rpcrdma_sendcq_process_wc(wcs++);
+-	} while (rc == RPCRDMA_POLLSIZE);
++	} while (rc == RPCRDMA_POLLSIZE && --budget);
+ 	return 0;
+ }
+ 
+@@ -254,8 +255,9 @@ static int
+ rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
+ {
+ 	struct ib_wc *wcs;
+-	int count, rc;
++	int budget, count, rc;
+ 
++	budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
+ 	do {
+ 		wcs = ep->rep_recv_wcs;
+ 
+@@ -266,7 +268,7 @@ rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
+ 		count = rc;
+ 		while (count-- > 0)
+ 			rpcrdma_recvcq_process_wc(wcs++);
+-	} while (rc == RPCRDMA_POLLSIZE);
++	} while (rc == RPCRDMA_POLLSIZE && --budget);
+ 	return 0;
+ }
+ 
+diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
+index cb4c882..0c3b88e 100644
+--- a/net/sunrpc/xprtrdma/xprt_rdma.h
++++ b/net/sunrpc/xprtrdma/xprt_rdma.h
+@@ -74,6 +74,7 @@ struct rpcrdma_ia {
+  * RDMA Endpoint -- one per transport instance
+  */
+ 
++#define RPCRDMA_WC_BUDGET	(128)
+ #define RPCRDMA_POLLSIZE	(16)
+ 
+ struct rpcrdma_ep {
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0121-xprtrdma-Reduce-the-number-of-hardway-buffer-allocat.patch b/linux-next-cherry-picks/0121-xprtrdma-Reduce-the-number-of-hardway-buffer-allocat.patch
new file mode 100644
index 0000000..cb74e73
--- /dev/null
+++ b/linux-next-cherry-picks/0121-xprtrdma-Reduce-the-number-of-hardway-buffer-allocat.patch
@@ -0,0 +1,128 @@
+From 65866f8259851cea5e356d2fd46fc37a4e26330e Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Wed, 28 May 2014 10:33:59 -0400
+Subject: [PATCH 122/132] xprtrdma: Reduce the number of hardway buffer allocations
+
+While marshaling an RPC/RDMA request, the inline_{rsize,wsize}
+settings determine whether an inline request is used, or whether
+read or write chunks lists are built. The current default value of
+these settings is 1024. Any RPC request smaller than 1024 bytes is
+sent to the NFS server completely inline.
+
+rpcrdma_buffer_create() allocates and pre-registers a set of RPC
+buffers for each transport instance, also based on the inline rsize
+and wsize settings.
+
+RPC/RDMA requests and replies are built in these buffers. However,
+if an RPC/RDMA request is expected to be larger than 1024, a buffer
+has to be allocated and registered for that RPC, and deregistered
+and released when the RPC is complete. This is known has a
+"hardway allocation."
+
+Since the introduction of NFSv4, the size of RPC requests has become
+larger, and hardway allocations are thus more frequent. Hardway
+allocations are significant overhead, and they waste the existing
+RPC buffers pre-allocated by rpcrdma_buffer_create().
+
+We'd like fewer hardway allocations.
+
+Increasing the size of the pre-registered buffers is the most direct
+way to do this. However, a blanket increase of the inline thresholds
+has interoperability consequences.
+
+On my 64-bit system, rpcrdma_buffer_create() requests roughly 7000
+bytes for each RPC request buffer, using kmalloc(). Due to internal
+fragmentation, this wastes nearly 1200 bytes because kmalloc()
+already returns an 8192-byte piece of memory for a 7000-byte
+allocation request, though the extra space remains unused.
+
+So let's round up the size of the pre-allocated buffers, and make
+use of the unused space in the kmalloc'd memory.
+
+This change reduces the amount of hardway allocated memory for an
+NFSv4 general connectathon run from 1322092 to 9472 bytes (99%).
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Tested-by: Steve Wise <swise@opengridcomputing.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+---
+ net/sunrpc/xprtrdma/verbs.c |   25 +++++++++++++------------
+ 1 files changed, 13 insertions(+), 12 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
+index 1d08366..c80995a 100644
+--- a/net/sunrpc/xprtrdma/verbs.c
++++ b/net/sunrpc/xprtrdma/verbs.c
+@@ -50,6 +50,7 @@
+ #include <linux/interrupt.h>
+ #include <linux/pci.h>	/* for Tavor hack below */
+ #include <linux/slab.h>
++#include <asm/bitops.h>
+ 
+ #include "xprt_rdma.h"
+ 
+@@ -1005,7 +1006,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
+ 	struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
+ {
+ 	char *p;
+-	size_t len;
++	size_t len, rlen, wlen;
+ 	int i, rc;
+ 	struct rpcrdma_mw *r;
+ 
+@@ -1120,16 +1121,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
+ 	 * Allocate/init the request/reply buffers. Doing this
+ 	 * using kmalloc for now -- one for each buf.
+ 	 */
++	wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req));
++	rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep));
++	dprintk("RPC:       %s: wlen = %zu, rlen = %zu\n",
++		__func__, wlen, rlen);
++
+ 	for (i = 0; i < buf->rb_max_requests; i++) {
+ 		struct rpcrdma_req *req;
+ 		struct rpcrdma_rep *rep;
+ 
+-		len = cdata->inline_wsize + sizeof(struct rpcrdma_req);
+-		/* RPC layer requests *double* size + 1K RPC_SLACK_SPACE! */
+-		/* Typical ~2400b, so rounding up saves work later */
+-		if (len < 4096)
+-			len = 4096;
+-		req = kmalloc(len, GFP_KERNEL);
++		req = kmalloc(wlen, GFP_KERNEL);
+ 		if (req == NULL) {
+ 			dprintk("RPC:       %s: request buffer %d alloc"
+ 				" failed\n", __func__, i);
+@@ -1141,16 +1142,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
+ 		buf->rb_send_bufs[i]->rl_buffer = buf;
+ 
+ 		rc = rpcrdma_register_internal(ia, req->rl_base,
+-				len - offsetof(struct rpcrdma_req, rl_base),
++				wlen - offsetof(struct rpcrdma_req, rl_base),
+ 				&buf->rb_send_bufs[i]->rl_handle,
+ 				&buf->rb_send_bufs[i]->rl_iov);
+ 		if (rc)
+ 			goto out;
+ 
+-		buf->rb_send_bufs[i]->rl_size = len-sizeof(struct rpcrdma_req);
++		buf->rb_send_bufs[i]->rl_size = wlen -
++						sizeof(struct rpcrdma_req);
+ 
+-		len = cdata->inline_rsize + sizeof(struct rpcrdma_rep);
+-		rep = kmalloc(len, GFP_KERNEL);
++		rep = kmalloc(rlen, GFP_KERNEL);
+ 		if (rep == NULL) {
+ 			dprintk("RPC:       %s: reply buffer %d alloc failed\n",
+ 				__func__, i);
+@@ -1162,7 +1163,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
+ 		buf->rb_recv_bufs[i]->rr_buffer = buf;
+ 
+ 		rc = rpcrdma_register_internal(ia, rep->rr_base,
+-				len - offsetof(struct rpcrdma_rep, rr_base),
++				rlen - offsetof(struct rpcrdma_rep, rr_base),
+ 				&buf->rb_recv_bufs[i]->rr_handle,
+ 				&buf->rb_recv_bufs[i]->rr_iov);
+ 		if (rc)
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0122-xprtrdma-Ensure-ia-ri_id-qp-is-not-NULL-when-reconne.patch b/linux-next-cherry-picks/0122-xprtrdma-Ensure-ia-ri_id-qp-is-not-NULL-when-reconne.patch
new file mode 100644
index 0000000..3581bee
--- /dev/null
+++ b/linux-next-cherry-picks/0122-xprtrdma-Ensure-ia-ri_id-qp-is-not-NULL-when-reconne.patch
@@ -0,0 +1,94 @@
+From ec62f40d3505a643497d105c297093bb90afd44e Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Wed, 28 May 2014 10:34:07 -0400
+Subject: [PATCH 123/132] xprtrdma: Ensure ia->ri_id->qp is not NULL when reconnecting
+
+Devesh Sharma <Devesh.Sharma@Emulex.Com> reports that after a
+disconnect, his HCA is failing to create a fresh QP, leaving
+ia_ri->ri_id->qp set to NULL. But xprtrdma still allows RPCs to
+wake up and post LOCAL_INV as they exit, causing an oops.
+
+rpcrdma_ep_connect() is allowing the wake-up by leaking the QP
+creation error code (-EPERM in this case) to the RPC client's
+generic layer. xprt_connect_status() does not recognize -EPERM, so
+it kills pending RPC tasks immediately rather than retrying the
+connect.
+
+Re-arrange the QP creation logic so that when it fails on reconnect,
+it leaves ->qp with the old QP rather than NULL.  If pending RPC
+tasks wake and exit, LOCAL_INV work requests will flush rather than
+oops.
+
+On initial connect, leaving ->qp == NULL is OK, since there are no
+pending RPCs that might use ->qp. But be sure not to try to destroy
+a NULL QP when rpcrdma_ep_connect() is retried.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+---
+ net/sunrpc/xprtrdma/verbs.c |   29 ++++++++++++++++++++---------
+ 1 files changed, 20 insertions(+), 9 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
+index c80995a..54edf2a 100644
+--- a/net/sunrpc/xprtrdma/verbs.c
++++ b/net/sunrpc/xprtrdma/verbs.c
+@@ -867,6 +867,7 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+ 	if (ep->rep_connected != 0) {
+ 		struct rpcrdma_xprt *xprt;
+ retry:
++		dprintk("RPC:       %s: reconnecting...\n", __func__);
+ 		rc = rpcrdma_ep_disconnect(ep, ia);
+ 		if (rc && rc != -ENOTCONN)
+ 			dprintk("RPC:       %s: rpcrdma_ep_disconnect"
+@@ -879,7 +880,7 @@ retry:
+ 		id = rpcrdma_create_id(xprt, ia,
+ 				(struct sockaddr *)&xprt->rx_data.addr);
+ 		if (IS_ERR(id)) {
+-			rc = PTR_ERR(id);
++			rc = -EHOSTUNREACH;
+ 			goto out;
+ 		}
+ 		/* TEMP TEMP TEMP - fail if new device:
+@@ -893,20 +894,30 @@ retry:
+ 			printk("RPC:       %s: can't reconnect on "
+ 				"different device!\n", __func__);
+ 			rdma_destroy_id(id);
+-			rc = -ENETDOWN;
++			rc = -ENETUNREACH;
+ 			goto out;
+ 		}
+ 		/* END TEMP */
++		rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
++		if (rc) {
++			dprintk("RPC:       %s: rdma_create_qp failed %i\n",
++				__func__, rc);
++			rdma_destroy_id(id);
++			rc = -ENETUNREACH;
++			goto out;
++		}
+ 		rdma_destroy_qp(ia->ri_id);
+ 		rdma_destroy_id(ia->ri_id);
+ 		ia->ri_id = id;
+-	}
+-
+-	rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
+-	if (rc) {
+-		dprintk("RPC:       %s: rdma_create_qp failed %i\n",
+-			__func__, rc);
+-		goto out;
++	} else {
++		dprintk("RPC:       %s: connecting...\n", __func__);
++		rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
++		if (rc) {
++			dprintk("RPC:       %s: rdma_create_qp failed %i\n",
++				__func__, rc);
++			/* do not update ep->rep_connected */
++			return -ENETUNREACH;
++		}
+ 	}
+ 
+ /* XXX Tavor device performs badly with 2K MTU! */
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0123-xprtrdma-Remove-Tavor-MTU-setting.patch b/linux-next-cherry-picks/0123-xprtrdma-Remove-Tavor-MTU-setting.patch
new file mode 100644
index 0000000..4fde4bf
--- /dev/null
+++ b/linux-next-cherry-picks/0123-xprtrdma-Remove-Tavor-MTU-setting.patch
@@ -0,0 +1,55 @@
+From 5bc4bc729275a0bfc2bfd04466e8ab7c85af2f6e Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Wed, 28 May 2014 10:34:16 -0400
+Subject: [PATCH 124/132] xprtrdma: Remove Tavor MTU setting
+
+Clean up.  Remove HCA-specific clutter in xprtrdma, which is
+supposed to be device-independent.
+
+Hal Rosenstock <hal@dev.mellanox.co.il> observes:
+> Note that there is OpenSM option (enable_quirks) to return 1K MTU
+> in SA PathRecord responses for Tavor so that can be used for this.
+> The default setting for enable_quirks is FALSE so that would need
+> changing.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+---
+ net/sunrpc/xprtrdma/verbs.c |   14 --------------
+ 1 files changed, 0 insertions(+), 14 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
+index 54edf2a..515dfc1 100644
+--- a/net/sunrpc/xprtrdma/verbs.c
++++ b/net/sunrpc/xprtrdma/verbs.c
+@@ -48,7 +48,6 @@
+  */
+ 
+ #include <linux/interrupt.h>
+-#include <linux/pci.h>	/* for Tavor hack below */
+ #include <linux/slab.h>
+ #include <asm/bitops.h>
+ 
+@@ -920,19 +919,6 @@ retry:
+ 		}
+ 	}
+ 
+-/* XXX Tavor device performs badly with 2K MTU! */
+-if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
+-	struct pci_dev *pcid = to_pci_dev(ia->ri_id->device->dma_device);
+-	if (pcid->device == PCI_DEVICE_ID_MELLANOX_TAVOR &&
+-	    (pcid->vendor == PCI_VENDOR_ID_MELLANOX ||
+-	     pcid->vendor == PCI_VENDOR_ID_TOPSPIN)) {
+-		struct ib_qp_attr attr = {
+-			.path_mtu = IB_MTU_1024
+-		};
+-		rc = ib_modify_qp(ia->ri_id->qp, &attr, IB_QP_PATH_MTU);
+-	}
+-}
+-
+ 	ep->rep_connected = 0;
+ 
+ 	rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0124-xprtrdma-Allocate-missing-pagelist.patch b/linux-next-cherry-picks/0124-xprtrdma-Allocate-missing-pagelist.patch
new file mode 100644
index 0000000..cdf6409
--- /dev/null
+++ b/linux-next-cherry-picks/0124-xprtrdma-Allocate-missing-pagelist.patch
@@ -0,0 +1,38 @@
+From 196c69989d84ab902bbe545f7bd8ce78ee74dac4 Mon Sep 17 00:00:00 2001
+From: Shirley Ma <shirley.ma@oracle.com>
+Date: Wed, 28 May 2014 10:34:24 -0400
+Subject: [PATCH 125/132] xprtrdma: Allocate missing pagelist
+
+GETACL relies on transport layer to alloc memory for reply buffer.
+However xprtrdma assumes that the reply buffer (pagelist) has been
+pre-allocated in upper layer. This problem was reported by IOL OFA lab
+test on PPC.
+
+Signed-off-by: Shirley Ma <shirley.ma@oracle.com>
+Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
+Tested-by: Edward Mossman <emossman@iol.unh.edu>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+---
+ net/sunrpc/xprtrdma/rpc_rdma.c |    6 ++++++
+ 1 files changed, 6 insertions(+), 0 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
+index 436d229..dc4a826 100644
+--- a/net/sunrpc/xprtrdma/rpc_rdma.c
++++ b/net/sunrpc/xprtrdma/rpc_rdma.c
+@@ -99,6 +99,12 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
+ 	page_base = xdrbuf->page_base & ~PAGE_MASK;
+ 	p = 0;
+ 	while (len && n < nsegs) {
++		if (!ppages[p]) {
++			/* alloc the pagelist for receiving buffer */
++			ppages[p] = alloc_page(GFP_ATOMIC);
++			if (!ppages[p])
++				return 0;
++		}
+ 		seg[n].mr_page = ppages[p];
+ 		seg[n].mr_offset = (void *)(unsigned long) page_base;
+ 		seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len);
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0125-xprtrdma-Use-macros-for-reconnection-timeout-constan.patch b/linux-next-cherry-picks/0125-xprtrdma-Use-macros-for-reconnection-timeout-constan.patch
new file mode 100644
index 0000000..79de6d2
--- /dev/null
+++ b/linux-next-cherry-picks/0125-xprtrdma-Use-macros-for-reconnection-timeout-constan.patch
@@ -0,0 +1,61 @@
+From bfaee096deaa680195df5491eb650f81051c145d Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Wed, 28 May 2014 10:34:32 -0400
+Subject: [PATCH 126/132] xprtrdma: Use macros for reconnection timeout constants
+
+Clean up: Ensure the same max and min constant values are used
+everywhere when setting reconnect timeouts.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+---
+ net/sunrpc/xprtrdma/transport.c |   19 ++++++++++++-------
+ 1 files changed, 12 insertions(+), 7 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
+index d18b2a3..6b84d7d 100644
+--- a/net/sunrpc/xprtrdma/transport.c
++++ b/net/sunrpc/xprtrdma/transport.c
+@@ -149,6 +149,11 @@ static struct ctl_table sunrpc_table[] = {
+ 
+ #endif
+ 
++#define RPCRDMA_BIND_TO		(60U * HZ)
++#define RPCRDMA_INIT_REEST_TO	(5U * HZ)
++#define RPCRDMA_MAX_REEST_TO	(30U * HZ)
++#define RPCRDMA_IDLE_DISC_TO	(5U * 60 * HZ)
++
+ static struct rpc_xprt_ops xprt_rdma_procs;	/* forward reference */
+ 
+ static void
+@@ -285,9 +290,9 @@ xprt_setup_rdma(struct xprt_create *args)
+ 
+ 	/* 60 second timeout, no retries */
+ 	xprt->timeout = &xprt_rdma_default_timeout;
+-	xprt->bind_timeout = (60U * HZ);
+-	xprt->reestablish_timeout = (5U * HZ);
+-	xprt->idle_timeout = (5U * 60 * HZ);
++	xprt->bind_timeout = RPCRDMA_BIND_TO;
++	xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
++	xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
+ 
+ 	xprt->resvport = 0;		/* privileged port not needed */
+ 	xprt->tsh_size = 0;		/* RPC-RDMA handles framing */
+@@ -432,10 +437,10 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
+ 		schedule_delayed_work(&r_xprt->rdma_connect,
+ 			xprt->reestablish_timeout);
+ 		xprt->reestablish_timeout <<= 1;
+-		if (xprt->reestablish_timeout > (30 * HZ))
+-			xprt->reestablish_timeout = (30 * HZ);
+-		else if (xprt->reestablish_timeout < (5 * HZ))
+-			xprt->reestablish_timeout = (5 * HZ);
++		if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO)
++			xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO;
++		else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO)
++			xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
+ 	} else {
+ 		schedule_delayed_work(&r_xprt->rdma_connect, 0);
+ 		if (!RPC_IS_ASYNC(task))
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0126-xprtrdma-Reset-connection-timeout-after-successful-r.patch b/linux-next-cherry-picks/0126-xprtrdma-Reset-connection-timeout-after-successful-r.patch
new file mode 100644
index 0000000..8a678dc
--- /dev/null
+++ b/linux-next-cherry-picks/0126-xprtrdma-Reset-connection-timeout-after-successful-r.patch
@@ -0,0 +1,33 @@
+From 18906972aa1103c07869c9b43860a52e0e27e8e5 Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Wed, 28 May 2014 10:34:41 -0400
+Subject: [PATCH 127/132] xprtrdma: Reset connection timeout after successful reconnect
+
+If the new connection is able to make forward progress, reset the
+re-establish timeout. Otherwise it keeps growing even if disconnect
+events are rare.
+
+The same behavior as TCP is adopted: reconnect immediately if the
+transport instance has been able to make some forward progress.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+---
+ net/sunrpc/xprtrdma/rpc_rdma.c |    1 +
+ 1 files changed, 1 insertions(+), 0 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
+index dc4a826..ac65b0c 100644
+--- a/net/sunrpc/xprtrdma/rpc_rdma.c
++++ b/net/sunrpc/xprtrdma/rpc_rdma.c
+@@ -770,6 +770,7 @@ repost:
+ 
+ 	/* from here on, the reply is no longer an orphan */
+ 	req->rl_reply = rep;
++	xprt->reestablish_timeout = 0;
+ 
+ 	/* check for expected message types */
+ 	/* The order of some of these tests is important. */
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0127-xprtrdma-Avoid-deadlock-when-credit-window-is-reset.patch b/linux-next-cherry-picks/0127-xprtrdma-Avoid-deadlock-when-credit-window-is-reset.patch
new file mode 100644
index 0000000..1578dbb
--- /dev/null
+++ b/linux-next-cherry-picks/0127-xprtrdma-Avoid-deadlock-when-credit-window-is-reset.patch
@@ -0,0 +1,104 @@
+From e7ce710a8802351bd4118c5d6136c1d850f67cf9 Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Wed, 28 May 2014 10:34:57 -0400
+Subject: [PATCH 128/132] xprtrdma: Avoid deadlock when credit window is reset
+
+Update the cwnd while processing the server's reply.  Otherwise the
+next task on the xprt_sending queue is still subject to the old
+credit window. Currently, no task is awoken if the old congestion
+window is still exceeded, even if the new window is larger, and a
+deadlock results.
+
+This is an issue during a transport reconnect. Servers don't
+normally shrink the credit window, but the client does reset it to
+1 when reconnecting so the server can safely grow it again.
+
+As a minor optimization, remove the hack of grabbing the initial
+cwnd size (which happens to be RPC_CWNDSCALE) and using that value
+as the congestion scaling factor. The scaling value is invariant,
+and we are better off without the multiplication operation.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+---
+ net/sunrpc/xprtrdma/rpc_rdma.c  |    6 ++++++
+ net/sunrpc/xprtrdma/transport.c |   19 +------------------
+ net/sunrpc/xprtrdma/xprt_rdma.h |    1 -
+ 3 files changed, 7 insertions(+), 19 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
+index ac65b0c..77b84cf 100644
+--- a/net/sunrpc/xprtrdma/rpc_rdma.c
++++ b/net/sunrpc/xprtrdma/rpc_rdma.c
+@@ -716,6 +716,7 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
+ 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ 	__be32 *iptr;
+ 	int rdmalen, status;
++	unsigned long cwnd;
+ 
+ 	/* Check status. If bad, signal disconnect and return rep to pool */
+ 	if (rep->rr_len == ~0U) {
+@@ -845,6 +846,11 @@ badheader:
+ 		break;
+ 	}
+ 
++	cwnd = xprt->cwnd;
++	xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT;
++	if (xprt->cwnd > cwnd)
++		xprt_release_rqst_cong(rqst->rq_task);
++
+ 	dprintk("RPC:       %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
+ 			__func__, xprt, rqst, status);
+ 	xprt_complete_rqst(rqst->rq_task, status);
+diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
+index 6b84d7d..187894b 100644
+--- a/net/sunrpc/xprtrdma/transport.c
++++ b/net/sunrpc/xprtrdma/transport.c
+@@ -448,23 +448,6 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
+ 	}
+ }
+ 
+-static int
+-xprt_rdma_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
+-{
+-	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+-	int credits = atomic_read(&r_xprt->rx_buf.rb_credits);
+-
+-	/* == RPC_CWNDSCALE @ init, but *after* setup */
+-	if (r_xprt->rx_buf.rb_cwndscale == 0UL) {
+-		r_xprt->rx_buf.rb_cwndscale = xprt->cwnd;
+-		dprintk("RPC:       %s: cwndscale %lu\n", __func__,
+-			r_xprt->rx_buf.rb_cwndscale);
+-		BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0);
+-	}
+-	xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale;
+-	return xprt_reserve_xprt_cong(xprt, task);
+-}
+-
+ /*
+  * The RDMA allocate/free functions need the task structure as a place
+  * to hide the struct rpcrdma_req, which is necessary for the actual send/recv
+@@ -686,7 +669,7 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
+  */
+ 
+ static struct rpc_xprt_ops xprt_rdma_procs = {
+-	.reserve_xprt		= xprt_rdma_reserve_xprt,
++	.reserve_xprt		= xprt_reserve_xprt_cong,
+ 	.release_xprt		= xprt_release_xprt_cong, /* sunrpc/xprt.c */
+ 	.alloc_slot		= xprt_alloc_slot,
+ 	.release_request	= xprt_release_rqst_cong,       /* ditto */
+diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
+index 0c3b88e..89e7cd4 100644
+--- a/net/sunrpc/xprtrdma/xprt_rdma.h
++++ b/net/sunrpc/xprtrdma/xprt_rdma.h
+@@ -212,7 +212,6 @@ struct rpcrdma_req {
+ struct rpcrdma_buffer {
+ 	spinlock_t	rb_lock;	/* protects indexes */
+ 	atomic_t	rb_credits;	/* most recent server credits */
+-	unsigned long	rb_cwndscale;	/* cached framework rpc_cwndscale */
+ 	int		rb_max_requests;/* client max requests */
+ 	struct list_head rb_mws;	/* optional memory windows/fmrs/frmrs */
+ 	int		rb_send_index;
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0128-xprtrdma-Remove-BUG_ON-call-sites.patch b/linux-next-cherry-picks/0128-xprtrdma-Remove-BUG_ON-call-sites.patch
new file mode 100644
index 0000000..809584e
--- /dev/null
+++ b/linux-next-cherry-picks/0128-xprtrdma-Remove-BUG_ON-call-sites.patch
@@ -0,0 +1,83 @@
+From c977dea22708688eae31774f70126c97aa4dfe83 Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Wed, 28 May 2014 10:35:06 -0400
+Subject: [PATCH 129/132] xprtrdma: Remove BUG_ON() call sites
+
+If an error occurs in the marshaling logic, fail the RPC request
+being processed, but leave the client running.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+---
+ net/sunrpc/xprtrdma/transport.c |    3 ++-
+ net/sunrpc/xprtrdma/verbs.c     |   18 ++++++++++--------
+ 2 files changed, 12 insertions(+), 9 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
+index 187894b..93fe775 100644
+--- a/net/sunrpc/xprtrdma/transport.c
++++ b/net/sunrpc/xprtrdma/transport.c
+@@ -463,7 +463,8 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
+ 	struct rpcrdma_req *req, *nreq;
+ 
+ 	req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf);
+-	BUG_ON(NULL == req);
++	if (req == NULL)
++		return NULL;
+ 
+ 	if (size > req->rl_size) {
+ 		dprintk("RPC:       %s: size %zd too large for buffer[%zd]: "
+diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
+index 515dfc1..13dbd1c 100644
+--- a/net/sunrpc/xprtrdma/verbs.c
++++ b/net/sunrpc/xprtrdma/verbs.c
+@@ -1302,7 +1302,6 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
+ 	int i;
+ 	unsigned long flags;
+ 
+-	BUG_ON(req->rl_nchunks != 0);
+ 	spin_lock_irqsave(&buffers->rb_lock, flags);
+ 	buffers->rb_send_bufs[--buffers->rb_send_index] = req;
+ 	req->rl_niovs = 0;
+@@ -1535,10 +1534,6 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
+ 	} else
+ 		post_wr = &frmr_wr;
+ 
+-	/* Bump the key */
+-	key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
+-	ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
+-
+ 	/* Prepare FRMR WR */
+ 	memset(&frmr_wr, 0, sizeof frmr_wr);
+ 	frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
+@@ -1549,7 +1544,16 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
+ 	frmr_wr.wr.fast_reg.page_list_len = page_no;
+ 	frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
+ 	frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
+-	BUG_ON(frmr_wr.wr.fast_reg.length < len);
++	if (frmr_wr.wr.fast_reg.length < len) {
++		while (seg1->mr_nsegs--)
++			rpcrdma_unmap_one(ia, seg++);
++		return -EIO;
++	}
++
++	/* Bump the key */
++	key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
++	ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
++
+ 	frmr_wr.wr.fast_reg.access_flags = (writing ?
+ 				IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
+ 				IB_ACCESS_REMOTE_READ);
+@@ -1709,9 +1713,7 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
+ 
+ #if RPCRDMA_PERSISTENT_REGISTRATION
+ 	case RPCRDMA_ALLPHYSICAL:
+-		BUG_ON(nsegs != 1);
+ 		rpcrdma_unmap_one(ia, seg);
+-		rc = 0;
+ 		break;
+ #endif
+ 
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0129-xprtrdma-Disconnect-on-registration-failure.patch b/linux-next-cherry-picks/0129-xprtrdma-Disconnect-on-registration-failure.patch
new file mode 100644
index 0000000..e943208
--- /dev/null
+++ b/linux-next-cherry-picks/0129-xprtrdma-Disconnect-on-registration-failure.patch
@@ -0,0 +1,215 @@
+From c93c62231cf55df4a26bd08937efeea97e6fc5e8 Mon Sep 17 00:00:00 2001
+From: Chuck Lever <chuck.lever@oracle.com>
+Date: Wed, 28 May 2014 10:35:14 -0400
+Subject: [PATCH 130/132] xprtrdma: Disconnect on registration failure
+
+If rpcrdma_register_external() fails during request marshaling, the
+current RPC request is killed. Instead, this RPC should be retried
+after reconnecting the transport instance.
+
+The most likely reason for registration failure with FRMR is a
+failed post_send, which would be due to a remote transport
+disconnect or memory exhaustion. These issues can be recovered
+by a retry.
+
+Problems encountered in the marshaling logic itself will not be
+corrected by trying again, so these should still kill a request.
+
+Now that we've added a clean exit for marshaling errors, take the
+opportunity to defang some BUG_ON's.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+---
+ net/sunrpc/xprtrdma/rpc_rdma.c  |   48 +++++++++++++++++++++++++-------------
+ net/sunrpc/xprtrdma/transport.c |   17 +++++++++-----
+ 2 files changed, 42 insertions(+), 23 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
+index 77b84cf..693966d 100644
+--- a/net/sunrpc/xprtrdma/rpc_rdma.c
++++ b/net/sunrpc/xprtrdma/rpc_rdma.c
+@@ -77,6 +77,8 @@ static const char transfertypes[][12] = {
+  * Prepare the passed-in xdr_buf into representation as RPC/RDMA chunk
+  * elements. Segments are then coalesced when registered, if possible
+  * within the selected memreg mode.
++ *
++ * Returns positive number of segments converted, or a negative errno.
+  */
+ 
+ static int
+@@ -103,12 +105,13 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
+ 			/* alloc the pagelist for receiving buffer */
+ 			ppages[p] = alloc_page(GFP_ATOMIC);
+ 			if (!ppages[p])
+-				return 0;
++				return -ENOMEM;
+ 		}
+ 		seg[n].mr_page = ppages[p];
+ 		seg[n].mr_offset = (void *)(unsigned long) page_base;
+ 		seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len);
+-		BUG_ON(seg[n].mr_len > PAGE_SIZE);
++		if (seg[n].mr_len > PAGE_SIZE)
++			return -EIO;
+ 		len -= seg[n].mr_len;
+ 		++n;
+ 		++p;
+@@ -117,7 +120,7 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
+ 
+ 	/* Message overflows the seg array */
+ 	if (len && n == nsegs)
+-		return 0;
++		return -EIO;
+ 
+ 	if (xdrbuf->tail[0].iov_len) {
+ 		/* the rpcrdma protocol allows us to omit any trailing
+@@ -126,7 +129,7 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
+ 			return n;
+ 		if (n == nsegs)
+ 			/* Tail remains, but we're out of segments */
+-			return 0;
++			return -EIO;
+ 		seg[n].mr_page = NULL;
+ 		seg[n].mr_offset = xdrbuf->tail[0].iov_base;
+ 		seg[n].mr_len = xdrbuf->tail[0].iov_len;
+@@ -167,15 +170,17 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
+  *  Reply chunk (a counted array):
+  *   N elements:
+  *    1 - N - HLOO - HLOO - ... - HLOO
++ *
++ * Returns positive RPC/RDMA header size, or negative errno.
+  */
+ 
+-static unsigned int
++static ssize_t
+ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
+ 		struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type)
+ {
+ 	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+ 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
+-	int nsegs, nchunks = 0;
++	int n, nsegs, nchunks = 0;
+ 	unsigned int pos;
+ 	struct rpcrdma_mr_seg *seg = req->rl_segments;
+ 	struct rpcrdma_read_chunk *cur_rchunk = NULL;
+@@ -201,11 +206,11 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
+ 		pos = target->head[0].iov_len;
+ 
+ 	nsegs = rpcrdma_convert_iovs(target, pos, type, seg, RPCRDMA_MAX_SEGS);
+-	if (nsegs == 0)
+-		return 0;
++	if (nsegs < 0)
++		return nsegs;
+ 
+ 	do {
+-		int n = rpcrdma_register_external(seg, nsegs,
++		n = rpcrdma_register_external(seg, nsegs,
+ 						cur_wchunk != NULL, r_xprt);
+ 		if (n <= 0)
+ 			goto out;
+@@ -277,7 +282,7 @@ out:
+ 	for (pos = 0; nchunks--;)
+ 		pos += rpcrdma_deregister_external(
+ 				&req->rl_segments[pos], r_xprt);
+-	return 0;
++	return n;
+ }
+ 
+ /*
+@@ -359,6 +364,8 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
+  *  [1] -- the RPC header/data, marshaled by RPC and the NFS protocol.
+  *  [2] -- optional padding.
+  *  [3] -- if padded, header only in [1] and data here.
++ *
++ * Returns zero on success, otherwise a negative errno.
+  */
+ 
+ int
+@@ -368,7 +375,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
+ 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ 	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+ 	char *base;
+-	size_t hdrlen, rpclen, padlen;
++	size_t rpclen, padlen;
++	ssize_t hdrlen;
+ 	enum rpcrdma_chunktype rtype, wtype;
+ 	struct rpcrdma_msg *headerp;
+ 
+@@ -439,7 +447,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
+ 	/* The following simplification is not true forever */
+ 	if (rtype != rpcrdma_noch && wtype == rpcrdma_replych)
+ 		wtype = rpcrdma_noch;
+-	BUG_ON(rtype != rpcrdma_noch && wtype != rpcrdma_noch);
++	if (rtype != rpcrdma_noch && wtype != rpcrdma_noch) {
++		dprintk("RPC:       %s: cannot marshal multiple chunk lists\n",
++			__func__);
++		return -EIO;
++	}
+ 
+ 	hdrlen = 28; /*sizeof *headerp;*/
+ 	padlen = 0;
+@@ -464,8 +476,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
+ 			headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero;
+ 			headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero;
+ 			hdrlen += 2 * sizeof(u32); /* extra words in padhdr */
+-			BUG_ON(wtype != rpcrdma_noch);
+-
++			if (wtype != rpcrdma_noch) {
++				dprintk("RPC:       %s: invalid chunk list\n",
++					__func__);
++				return -EIO;
++			}
+ 		} else {
+ 			headerp->rm_body.rm_nochunks.rm_empty[0] = xdr_zero;
+ 			headerp->rm_body.rm_nochunks.rm_empty[1] = xdr_zero;
+@@ -500,9 +515,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
+ 		hdrlen = rpcrdma_create_chunks(rqst,
+ 					&rqst->rq_rcv_buf, headerp, wtype);
+ 	}
+-
+-	if (hdrlen == 0)
+-		return -1;
++	if (hdrlen < 0)
++		return hdrlen;
+ 
+ 	dprintk("RPC:       %s: %s: hdrlen %zd rpclen %zd padlen %zd"
+ 		" headerp 0x%p base 0x%p lkey 0x%x\n",
+diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
+index 93fe775..66f91f0 100644
+--- a/net/sunrpc/xprtrdma/transport.c
++++ b/net/sunrpc/xprtrdma/transport.c
+@@ -595,13 +595,12 @@ xprt_rdma_send_request(struct rpc_task *task)
+ 	struct rpc_xprt *xprt = rqst->rq_xprt;
+ 	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+ 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
++	int rc;
+ 
+-	/* marshal the send itself */
+-	if (req->rl_niovs == 0 && rpcrdma_marshal_req(rqst) != 0) {
+-		r_xprt->rx_stats.failed_marshal_count++;
+-		dprintk("RPC:       %s: rpcrdma_marshal_req failed\n",
+-			__func__);
+-		return -EIO;
++	if (req->rl_niovs == 0) {
++		rc = rpcrdma_marshal_req(rqst);
++		if (rc < 0)
++			goto failed_marshal;
+ 	}
+ 
+ 	if (req->rl_reply == NULL) 		/* e.g. reconnection */
+@@ -625,6 +624,12 @@ xprt_rdma_send_request(struct rpc_task *task)
+ 	rqst->rq_bytes_sent = 0;
+ 	return 0;
+ 
++failed_marshal:
++	r_xprt->rx_stats.failed_marshal_count++;
++	dprintk("RPC:       %s: rpcrdma_marshal_req failed, status %i\n",
++		__func__, rc);
++	if (rc == -EIO)
++		return -EIO;
+ drop_connection:
+ 	xprt_disconnect_done(xprt);
+ 	return -ENOTCONN;	/* implies disconnect */
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0130-svcrdma-refactor-marshalling-logic.patch b/linux-next-cherry-picks/0130-svcrdma-refactor-marshalling-logic.patch
new file mode 100644
index 0000000..aac5be8
--- /dev/null
+++ b/linux-next-cherry-picks/0130-svcrdma-refactor-marshalling-logic.patch
@@ -0,0 +1,1243 @@
+From 0bf4828983dff062cd502f27ab8644b32774e72e Mon Sep 17 00:00:00 2001
+From: Steve Wise <swise@opengridcomputing.com>
+Date: Wed, 28 May 2014 15:12:01 -0500
+Subject: [PATCH 131/132] svcrdma: refactor marshalling logic
+
+This patch refactors the NFSRDMA server marshalling logic to
+remove the intermediary map structures.  It also fixes an existing bug
+where the NFSRDMA server was not minding the device fast register page
+list length limitations.
+
+Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
+Signed-off-by: Steve Wise <swise@opengridcomputing.com>
+---
+ net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  |  643 +++++++++++++-----------------
+ net/sunrpc/xprtrdma/svc_rdma_sendto.c    |  230 +----------
+ net/sunrpc/xprtrdma/svc_rdma_transport.c |   62 ++--
+ 3 files changed, 331 insertions(+), 604 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+index 8d904e4..52d9f2c 100644
+--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
++++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+@@ -1,4 +1,5 @@
+ /*
++ * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
+  * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
+  *
+  * This software is available to you under a choice of one of two
+@@ -69,7 +70,8 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
+ 
+ 	/* Set up the XDR head */
+ 	rqstp->rq_arg.head[0].iov_base = page_address(page);
+-	rqstp->rq_arg.head[0].iov_len = min(byte_count, ctxt->sge[0].length);
++	rqstp->rq_arg.head[0].iov_len =
++		min_t(size_t, byte_count, ctxt->sge[0].length);
+ 	rqstp->rq_arg.len = byte_count;
+ 	rqstp->rq_arg.buflen = byte_count;
+ 
+@@ -85,7 +87,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
+ 		page = ctxt->pages[sge_no];
+ 		put_page(rqstp->rq_pages[sge_no]);
+ 		rqstp->rq_pages[sge_no] = page;
+-		bc -= min(bc, ctxt->sge[sge_no].length);
++		bc -= min_t(u32, bc, ctxt->sge[sge_no].length);
+ 		rqstp->rq_arg.buflen += ctxt->sge[sge_no].length;
+ 		sge_no++;
+ 	}
+@@ -113,291 +115,265 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
+ 	rqstp->rq_arg.tail[0].iov_len = 0;
+ }
+ 
+-/* Encode a read-chunk-list as an array of IB SGE
+- *
+- * Assumptions:
+- * - chunk[0]->position points to pages[0] at an offset of 0
+- * - pages[] is not physically or virtually contiguous and consists of
+- *   PAGE_SIZE elements.
+- *
+- * Output:
+- * - sge array pointing into pages[] array.
+- * - chunk_sge array specifying sge index and count for each
+- *   chunk in the read list
+- *
+- */
+-static int map_read_chunks(struct svcxprt_rdma *xprt,
+-			   struct svc_rqst *rqstp,
+-			   struct svc_rdma_op_ctxt *head,
+-			   struct rpcrdma_msg *rmsgp,
+-			   struct svc_rdma_req_map *rpl_map,
+-			   struct svc_rdma_req_map *chl_map,
+-			   int ch_count,
+-			   int byte_count)
++static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
+ {
+-	int sge_no;
+-	int sge_bytes;
+-	int page_off;
+-	int page_no;
+-	int ch_bytes;
+-	int ch_no;
+-	struct rpcrdma_read_chunk *ch;
++	if (rdma_node_get_transport(xprt->sc_cm_id->device->node_type) ==
++	     RDMA_TRANSPORT_IWARP)
++		return 1;
++	else
++		return min_t(int, sge_count, xprt->sc_max_sge);
++}
+ 
+-	sge_no = 0;
+-	page_no = 0;
+-	page_off = 0;
+-	ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
+-	ch_no = 0;
+-	ch_bytes = ntohl(ch->rc_target.rs_length);
+-	head->arg.head[0] = rqstp->rq_arg.head[0];
+-	head->arg.tail[0] = rqstp->rq_arg.tail[0];
+-	head->arg.pages = &head->pages[head->count];
+-	head->hdr_count = head->count; /* save count of hdr pages */
+-	head->arg.page_base = 0;
+-	head->arg.page_len = ch_bytes;
+-	head->arg.len = rqstp->rq_arg.len + ch_bytes;
+-	head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes;
+-	head->count++;
+-	chl_map->ch[0].start = 0;
+-	while (byte_count) {
+-		rpl_map->sge[sge_no].iov_base =
+-			page_address(rqstp->rq_arg.pages[page_no]) + page_off;
+-		sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes);
+-		rpl_map->sge[sge_no].iov_len = sge_bytes;
+-		/*
+-		 * Don't bump head->count here because the same page
+-		 * may be used by multiple SGE.
+-		 */
+-		head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
+-		rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1];
++typedef int (*rdma_reader_fn)(struct svcxprt_rdma *xprt,
++			      struct svc_rqst *rqstp,
++			      struct svc_rdma_op_ctxt *head,
++			      int *page_no,
++			      u32 *page_offset,
++			      u32 rs_handle,
++			      u32 rs_length,
++			      u64 rs_offset,
++			      int last);
++
++/* Issue an RDMA_READ using the local lkey to map the data sink */
++static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
++			       struct svc_rqst *rqstp,
++			       struct svc_rdma_op_ctxt *head,
++			       int *page_no,
++			       u32 *page_offset,
++			       u32 rs_handle,
++			       u32 rs_length,
++			       u64 rs_offset,
++			       int last)
++{
++	struct ib_send_wr read_wr;
++	int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
++	struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
++	int ret, read, pno;
++	u32 pg_off = *page_offset;
++	u32 pg_no = *page_no;
++
++	ctxt->direction = DMA_FROM_DEVICE;
++	ctxt->read_hdr = head;
++	pages_needed =
++		min_t(int, pages_needed, rdma_read_max_sge(xprt, pages_needed));
++	read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);
++
++	for (pno = 0; pno < pages_needed; pno++) {
++		int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
++
++		head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
++		head->arg.page_len += len;
++		head->arg.len += len;
++		if (!pg_off)
++			head->count++;
++		rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
+ 		rqstp->rq_next_page = rqstp->rq_respages + 1;
++		ctxt->sge[pno].addr =
++			ib_dma_map_page(xprt->sc_cm_id->device,
++					head->arg.pages[pg_no], pg_off,
++					PAGE_SIZE - pg_off,
++					DMA_FROM_DEVICE);
++		ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
++					   ctxt->sge[pno].addr);
++		if (ret)
++			goto err;
++		atomic_inc(&xprt->sc_dma_used);
+ 
+-		byte_count -= sge_bytes;
+-		ch_bytes -= sge_bytes;
+-		sge_no++;
+-		/*
+-		 * If all bytes for this chunk have been mapped to an
+-		 * SGE, move to the next SGE
+-		 */
+-		if (ch_bytes == 0) {
+-			chl_map->ch[ch_no].count =
+-				sge_no - chl_map->ch[ch_no].start;
+-			ch_no++;
+-			ch++;
+-			chl_map->ch[ch_no].start = sge_no;
+-			ch_bytes = ntohl(ch->rc_target.rs_length);
+-			/* If bytes remaining account for next chunk */
+-			if (byte_count) {
+-				head->arg.page_len += ch_bytes;
+-				head->arg.len += ch_bytes;
+-				head->arg.buflen += ch_bytes;
+-			}
++		/* The lkey here is either a local dma lkey or a dma_mr lkey */
++		ctxt->sge[pno].lkey = xprt->sc_dma_lkey;
++		ctxt->sge[pno].length = len;
++		ctxt->count++;
++
++		/* adjust offset and wrap to next page if needed */
++		pg_off += len;
++		if (pg_off == PAGE_SIZE) {
++			pg_off = 0;
++			pg_no++;
+ 		}
+-		/*
+-		 * If this SGE consumed all of the page, move to the
+-		 * next page
+-		 */
+-		if ((sge_bytes + page_off) == PAGE_SIZE) {
+-			page_no++;
+-			page_off = 0;
+-			/*
+-			 * If there are still bytes left to map, bump
+-			 * the page count
+-			 */
+-			if (byte_count)
+-				head->count++;
+-		} else
+-			page_off += sge_bytes;
++		rs_length -= len;
+ 	}
+-	BUG_ON(byte_count != 0);
+-	return sge_no;
++
++	if (last && rs_length == 0)
++		set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
++	else
++		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
++
++	memset(&read_wr, 0, sizeof(read_wr));
++	read_wr.wr_id = (unsigned long)ctxt;
++	read_wr.opcode = IB_WR_RDMA_READ;
++	ctxt->wr_op = read_wr.opcode;
++	read_wr.send_flags = IB_SEND_SIGNALED;
++	read_wr.wr.rdma.rkey = rs_handle;
++	read_wr.wr.rdma.remote_addr = rs_offset;
++	read_wr.sg_list = ctxt->sge;
++	read_wr.num_sge = pages_needed;
++
++	ret = svc_rdma_send(xprt, &read_wr);
++	if (ret) {
++		pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
++		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
++		goto err;
++	}
++
++	/* return current location in page array */
++	*page_no = pg_no;
++	*page_offset = pg_off;
++	ret = read;
++	atomic_inc(&rdma_stat_read);
++	return ret;
++ err:
++	svc_rdma_unmap_dma(ctxt);
++	svc_rdma_put_context(ctxt, 0);
++	return ret;
+ }
+ 
+-/* Map a read-chunk-list to an XDR and fast register the page-list.
+- *
+- * Assumptions:
+- * - chunk[0]	position points to pages[0] at an offset of 0
+- * - pages[]	will be made physically contiguous by creating a one-off memory
+- *		region using the fastreg verb.
+- * - byte_count is # of bytes in read-chunk-list
+- * - ch_count	is # of chunks in read-chunk-list
+- *
+- * Output:
+- * - sge array pointing into pages[] array.
+- * - chunk_sge array specifying sge index and count for each
+- *   chunk in the read list
+- */
+-static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
++/* Issue an RDMA_READ using an FRMR to map the data sink */
++static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
+ 				struct svc_rqst *rqstp,
+ 				struct svc_rdma_op_ctxt *head,
+-				struct rpcrdma_msg *rmsgp,
+-				struct svc_rdma_req_map *rpl_map,
+-				struct svc_rdma_req_map *chl_map,
+-				int ch_count,
+-				int byte_count)
++				int *page_no,
++				u32 *page_offset,
++				u32 rs_handle,
++				u32 rs_length,
++				u64 rs_offset,
++				int last)
+ {
+-	int page_no;
+-	int ch_no;
+-	u32 offset;
+-	struct rpcrdma_read_chunk *ch;
+-	struct svc_rdma_fastreg_mr *frmr;
+-	int ret = 0;
++	struct ib_send_wr read_wr;
++	struct ib_send_wr inv_wr;
++	struct ib_send_wr fastreg_wr;
++	u8 key;
++	int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
++	struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
++	struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt);
++	int ret, read, pno;
++	u32 pg_off = *page_offset;
++	u32 pg_no = *page_no;
+ 
+-	frmr = svc_rdma_get_frmr(xprt);
+ 	if (IS_ERR(frmr))
+ 		return -ENOMEM;
+ 
+-	head->frmr = frmr;
+-	head->arg.head[0] = rqstp->rq_arg.head[0];
+-	head->arg.tail[0] = rqstp->rq_arg.tail[0];
+-	head->arg.pages = &head->pages[head->count];
+-	head->hdr_count = head->count; /* save count of hdr pages */
+-	head->arg.page_base = 0;
+-	head->arg.page_len = byte_count;
+-	head->arg.len = rqstp->rq_arg.len + byte_count;
+-	head->arg.buflen = rqstp->rq_arg.buflen + byte_count;
++	ctxt->direction = DMA_FROM_DEVICE;
++	ctxt->frmr = frmr;
++	pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len);
++	read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);
+ 
+-	/* Fast register the page list */
+-	frmr->kva = page_address(rqstp->rq_arg.pages[0]);
++	frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]);
+ 	frmr->direction = DMA_FROM_DEVICE;
+ 	frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
+-	frmr->map_len = byte_count;
+-	frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT;
+-	for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
+-		frmr->page_list->page_list[page_no] =
++	frmr->map_len = pages_needed << PAGE_SHIFT;
++	frmr->page_list_len = pages_needed;
++
++	for (pno = 0; pno < pages_needed; pno++) {
++		int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
++
++		head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
++		head->arg.page_len += len;
++		head->arg.len += len;
++		if (!pg_off)
++			head->count++;
++		rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
++		rqstp->rq_next_page = rqstp->rq_respages + 1;
++		frmr->page_list->page_list[pno] =
+ 			ib_dma_map_page(xprt->sc_cm_id->device,
+-					rqstp->rq_arg.pages[page_no], 0,
++					head->arg.pages[pg_no], 0,
+ 					PAGE_SIZE, DMA_FROM_DEVICE);
+-		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+-					 frmr->page_list->page_list[page_no]))
+-			goto fatal_err;
++		ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
++					   frmr->page_list->page_list[pno]);
++		if (ret)
++			goto err;
+ 		atomic_inc(&xprt->sc_dma_used);
+-		head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
+-	}
+-	head->count += page_no;
+-
+-	/* rq_respages points one past arg pages */
+-	rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
+-	rqstp->rq_next_page = rqstp->rq_respages + 1;
+ 
+-	/* Create the reply and chunk maps */
+-	offset = 0;
+-	ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
+-	for (ch_no = 0; ch_no < ch_count; ch_no++) {
+-		int len = ntohl(ch->rc_target.rs_length);
+-		rpl_map->sge[ch_no].iov_base = frmr->kva + offset;
+-		rpl_map->sge[ch_no].iov_len = len;
+-		chl_map->ch[ch_no].count = 1;
+-		chl_map->ch[ch_no].start = ch_no;
+-		offset += len;
+-		ch++;
++		/* adjust offset and wrap to next page if needed */
++		pg_off += len;
++		if (pg_off == PAGE_SIZE) {
++			pg_off = 0;
++			pg_no++;
++		}
++		rs_length -= len;
+ 	}
+ 
+-	ret = svc_rdma_fastreg(xprt, frmr);
+-	if (ret)
+-		goto fatal_err;
+-
+-	return ch_no;
+-
+- fatal_err:
+-	printk("svcrdma: error fast registering xdr for xprt %p", xprt);
+-	svc_rdma_put_frmr(xprt, frmr);
+-	return -EIO;
+-}
+-
+-static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
+-			     struct svc_rdma_op_ctxt *ctxt,
+-			     struct svc_rdma_fastreg_mr *frmr,
+-			     struct kvec *vec,
+-			     u64 *sgl_offset,
+-			     int count)
+-{
+-	int i;
+-	unsigned long off;
++	if (last && rs_length == 0)
++		set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
++	else
++		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+ 
+-	ctxt->count = count;
+-	ctxt->direction = DMA_FROM_DEVICE;
+-	for (i = 0; i < count; i++) {
+-		ctxt->sge[i].length = 0; /* in case map fails */
+-		if (!frmr) {
+-			BUG_ON(!virt_to_page(vec[i].iov_base));
+-			off = (unsigned long)vec[i].iov_base & ~PAGE_MASK;
+-			ctxt->sge[i].addr =
+-				ib_dma_map_page(xprt->sc_cm_id->device,
+-						virt_to_page(vec[i].iov_base),
+-						off,
+-						vec[i].iov_len,
+-						DMA_FROM_DEVICE);
+-			if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+-						 ctxt->sge[i].addr))
+-				return -EINVAL;
+-			ctxt->sge[i].lkey = xprt->sc_dma_lkey;
+-			atomic_inc(&xprt->sc_dma_used);
+-		} else {
+-			ctxt->sge[i].addr = (unsigned long)vec[i].iov_base;
+-			ctxt->sge[i].lkey = frmr->mr->lkey;
+-		}
+-		ctxt->sge[i].length = vec[i].iov_len;
+-		*sgl_offset = *sgl_offset + vec[i].iov_len;
++	/* Bump the key */
++	key = (u8)(frmr->mr->lkey & 0x000000FF);
++	ib_update_fast_reg_key(frmr->mr, ++key);
++
++	ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset;
++	ctxt->sge[0].lkey = frmr->mr->lkey;
++	ctxt->sge[0].length = read;
++	ctxt->count = 1;
++	ctxt->read_hdr = head;
++
++	/* Prepare FASTREG WR */
++	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
++	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
++	fastreg_wr.send_flags = IB_SEND_SIGNALED;
++	fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
++	fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
++	fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
++	fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
++	fastreg_wr.wr.fast_reg.length = frmr->map_len;
++	fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
++	fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
++	fastreg_wr.next = &read_wr;
++
++	/* Prepare RDMA_READ */
++	memset(&read_wr, 0, sizeof(read_wr));
++	read_wr.send_flags = IB_SEND_SIGNALED;
++	read_wr.wr.rdma.rkey = rs_handle;
++	read_wr.wr.rdma.remote_addr = rs_offset;
++	read_wr.sg_list = ctxt->sge;
++	read_wr.num_sge = 1;
++	if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
++		read_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
++		read_wr.wr_id = (unsigned long)ctxt;
++		read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
++	} else {
++		read_wr.opcode = IB_WR_RDMA_READ;
++		read_wr.next = &inv_wr;
++		/* Prepare invalidate */
++		memset(&inv_wr, 0, sizeof(inv_wr));
++		inv_wr.wr_id = (unsigned long)ctxt;
++		inv_wr.opcode = IB_WR_LOCAL_INV;
++		inv_wr.send_flags = IB_SEND_SIGNALED;
++		inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
++	}
++	ctxt->wr_op = read_wr.opcode;
++
++	/* Post the chain */
++	ret = svc_rdma_send(xprt, &fastreg_wr);
++	if (ret) {
++		pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
++		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
++		goto err;
+ 	}
+-	return 0;
+-}
+ 
+-static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
+-{
+-	if ((rdma_node_get_transport(xprt->sc_cm_id->device->node_type) ==
+-	     RDMA_TRANSPORT_IWARP) &&
+-	    sge_count > 1)
+-		return 1;
+-	else
+-		return min_t(int, sge_count, xprt->sc_max_sge);
++	/* return current location in page array */
++	*page_no = pg_no;
++	*page_offset = pg_off;
++	ret = read;
++	atomic_inc(&rdma_stat_read);
++	return ret;
++ err:
++	svc_rdma_unmap_dma(ctxt);
++	svc_rdma_put_context(ctxt, 0);
++	svc_rdma_put_frmr(xprt, frmr);
++	return ret;
+ }
+ 
+-/*
+- * Use RDMA_READ to read data from the advertised client buffer into the
+- * XDR stream starting at rq_arg.head[0].iov_base.
+- * Each chunk in the array
+- * contains the following fields:
+- * discrim      - '1', This isn't used for data placement
+- * position     - The xdr stream offset (the same for every chunk)
+- * handle       - RMR for client memory region
+- * length       - data transfer length
+- * offset       - 64 bit tagged offset in remote memory region
+- *
+- * On our side, we need to read into a pagelist. The first page immediately
+- * follows the RPC header.
+- *
+- * This function returns:
+- * 0 - No error and no read-list found.
+- *
+- * 1 - Successful read-list processing. The data is not yet in
+- * the pagelist and therefore the RPC request must be deferred. The
+- * I/O completion will enqueue the transport again and
+- * svc_rdma_recvfrom will complete the request.
+- *
+- * <0 - Error processing/posting read-list.
+- *
+- * NOTE: The ctxt must not be touched after the last WR has been posted
+- * because the I/O completion processing may occur on another
+- * processor and free / modify the context. Ne touche pas!
+- */
+-static int rdma_read_xdr(struct svcxprt_rdma *xprt,
+-			 struct rpcrdma_msg *rmsgp,
+-			 struct svc_rqst *rqstp,
+-			 struct svc_rdma_op_ctxt *hdr_ctxt)
++static int rdma_read_chunks(struct svcxprt_rdma *xprt,
++			    struct rpcrdma_msg *rmsgp,
++			    struct svc_rqst *rqstp,
++			    struct svc_rdma_op_ctxt *head)
+ {
+-	struct ib_send_wr read_wr;
+-	struct ib_send_wr inv_wr;
+-	int err = 0;
+-	int ch_no;
+-	int ch_count;
+-	int byte_count;
+-	int sge_count;
+-	u64 sgl_offset;
++	int page_no, ch_count, ret;
+ 	struct rpcrdma_read_chunk *ch;
+-	struct svc_rdma_op_ctxt *ctxt = NULL;
+-	struct svc_rdma_req_map *rpl_map;
+-	struct svc_rdma_req_map *chl_map;
++	u32 page_offset, byte_count;
++	u64 rs_offset;
++	rdma_reader_fn reader;
+ 
+ 	/* If no read list is present, return 0 */
+ 	ch = svc_rdma_get_read_chunk(rmsgp);
+@@ -408,122 +384,55 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
+ 	if (ch_count > RPCSVC_MAXPAGES)
+ 		return -EINVAL;
+ 
+-	/* Allocate temporary reply and chunk maps */
+-	rpl_map = svc_rdma_get_req_map();
+-	chl_map = svc_rdma_get_req_map();
++	/* The request is completed when the RDMA_READs complete. The
++	 * head context keeps all the pages that comprise the
++	 * request.
++	 */
++	head->arg.head[0] = rqstp->rq_arg.head[0];
++	head->arg.tail[0] = rqstp->rq_arg.tail[0];
++	head->arg.pages = &head->pages[head->count];
++	head->hdr_count = head->count;
++	head->arg.page_base = 0;
++	head->arg.page_len = 0;
++	head->arg.len = rqstp->rq_arg.len;
++	head->arg.buflen = rqstp->rq_arg.buflen;
+ 
+-	if (!xprt->sc_frmr_pg_list_len)
+-		sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
+-					    rpl_map, chl_map, ch_count,
+-					    byte_count);
++	/* Use FRMR if supported */
++	if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)
++		reader = rdma_read_chunk_frmr;
+ 	else
+-		sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
+-						 rpl_map, chl_map, ch_count,
+-						 byte_count);
+-	if (sge_count < 0) {
+-		err = -EIO;
+-		goto out;
+-	}
+-
+-	sgl_offset = 0;
+-	ch_no = 0;
++		reader = rdma_read_chunk_lcl;
+ 
++	page_no = 0; page_offset = 0;
+ 	for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
+-	     ch->rc_discrim != 0; ch++, ch_no++) {
+-		u64 rs_offset;
+-next_sge:
+-		ctxt = svc_rdma_get_context(xprt);
+-		ctxt->direction = DMA_FROM_DEVICE;
+-		ctxt->frmr = hdr_ctxt->frmr;
+-		ctxt->read_hdr = NULL;
+-		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+-		clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
++	     ch->rc_discrim != 0; ch++) {
+ 
+-		/* Prepare READ WR */
+-		memset(&read_wr, 0, sizeof read_wr);
+-		read_wr.wr_id = (unsigned long)ctxt;
+-		read_wr.opcode = IB_WR_RDMA_READ;
+-		ctxt->wr_op = read_wr.opcode;
+-		read_wr.send_flags = IB_SEND_SIGNALED;
+-		read_wr.wr.rdma.rkey = ntohl(ch->rc_target.rs_handle);
+ 		xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset,
+ 				 &rs_offset);
+-		read_wr.wr.rdma.remote_addr = rs_offset + sgl_offset;
+-		read_wr.sg_list = ctxt->sge;
+-		read_wr.num_sge =
+-			rdma_read_max_sge(xprt, chl_map->ch[ch_no].count);
+-		err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr,
+-					&rpl_map->sge[chl_map->ch[ch_no].start],
+-					&sgl_offset,
+-					read_wr.num_sge);
+-		if (err) {
+-			svc_rdma_unmap_dma(ctxt);
+-			svc_rdma_put_context(ctxt, 0);
+-			goto out;
+-		}
+-		if (((ch+1)->rc_discrim == 0) &&
+-		    (read_wr.num_sge == chl_map->ch[ch_no].count)) {
+-			/*
+-			 * Mark the last RDMA_READ with a bit to
+-			 * indicate all RPC data has been fetched from
+-			 * the client and the RPC needs to be enqueued.
+-			 */
+-			set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+-			if (hdr_ctxt->frmr) {
+-				set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
+-				/*
+-				 * Invalidate the local MR used to map the data
+-				 * sink.
+-				 */
+-				if (xprt->sc_dev_caps &
+-				    SVCRDMA_DEVCAP_READ_W_INV) {
+-					read_wr.opcode =
+-						IB_WR_RDMA_READ_WITH_INV;
+-					ctxt->wr_op = read_wr.opcode;
+-					read_wr.ex.invalidate_rkey =
+-						ctxt->frmr->mr->lkey;
+-				} else {
+-					/* Prepare INVALIDATE WR */
+-					memset(&inv_wr, 0, sizeof inv_wr);
+-					inv_wr.opcode = IB_WR_LOCAL_INV;
+-					inv_wr.send_flags = IB_SEND_SIGNALED;
+-					inv_wr.ex.invalidate_rkey =
+-						hdr_ctxt->frmr->mr->lkey;
+-					read_wr.next = &inv_wr;
+-				}
+-			}
+-			ctxt->read_hdr = hdr_ctxt;
+-		}
+-		/* Post the read */
+-		err = svc_rdma_send(xprt, &read_wr);
+-		if (err) {
+-			printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n",
+-			       err);
+-			set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+-			svc_rdma_unmap_dma(ctxt);
+-			svc_rdma_put_context(ctxt, 0);
+-			goto out;
++		byte_count = ntohl(ch->rc_target.rs_length);
++
++		while (byte_count > 0) {
++			ret = reader(xprt, rqstp, head,
++				     &page_no, &page_offset,
++				     ntohl(ch->rc_target.rs_handle),
++				     byte_count, rs_offset,
++				     ((ch+1)->rc_discrim == 0) /* last */
++				     );
++			if (ret < 0)
++				goto err;
++			byte_count -= ret;
++			rs_offset += ret;
++			head->arg.buflen += ret;
+ 		}
+-		atomic_inc(&rdma_stat_read);
+-
+-		if (read_wr.num_sge < chl_map->ch[ch_no].count) {
+-			chl_map->ch[ch_no].count -= read_wr.num_sge;
+-			chl_map->ch[ch_no].start += read_wr.num_sge;
+-			goto next_sge;
+-		}
+-		sgl_offset = 0;
+-		err = 1;
+ 	}
+-
+- out:
+-	svc_rdma_put_req_map(rpl_map);
+-	svc_rdma_put_req_map(chl_map);
+-
++	ret = 1;
++ err:
+ 	/* Detach arg pages. svc_recv will replenish them */
+-	for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++)
+-		rqstp->rq_pages[ch_no] = NULL;
++	for (page_no = 0;
++	     &rqstp->rq_pages[page_no] < rqstp->rq_respages; page_no++)
++		rqstp->rq_pages[page_no] = NULL;
+ 
+-	return err;
++	return ret;
+ }
+ 
+ static int rdma_read_complete(struct svc_rqst *rqstp,
+@@ -595,13 +504,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
+ 				  struct svc_rdma_op_ctxt,
+ 				  dto_q);
+ 		list_del_init(&ctxt->dto_q);
+-	}
+-	if (ctxt) {
+ 		spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
+ 		return rdma_read_complete(rqstp, ctxt);
+-	}
+-
+-	if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
++	} else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
+ 		ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next,
+ 				  struct svc_rdma_op_ctxt,
+ 				  dto_q);
+@@ -621,7 +526,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
+ 		if (test_bit(XPT_CLOSE, &xprt->xpt_flags))
+ 			goto close_out;
+ 
+-		BUG_ON(ret);
+ 		goto out;
+ 	}
+ 	dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n",
+@@ -644,12 +548,11 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
+ 	}
+ 
+ 	/* Read read-list data. */
+-	ret = rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt);
++	ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt);
+ 	if (ret > 0) {
+ 		/* read-list posted, defer until data received from client. */
+ 		goto defer;
+-	}
+-	if (ret < 0) {
++	} else if (ret < 0) {
+ 		/* Post of read-list failed, free context. */
+ 		svc_rdma_put_context(ctxt, 1);
+ 		return 0;
+diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+index 7e024a5..49fd21a 100644
+--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
++++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+@@ -1,4 +1,5 @@
+ /*
++ * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
+  * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
+  *
+  * This software is available to you under a choice of one of two
+@@ -49,152 +50,6 @@
+ 
+ #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
+ 
+-/* Encode an XDR as an array of IB SGE
+- *
+- * Assumptions:
+- * - head[0] is physically contiguous.
+- * - tail[0] is physically contiguous.
+- * - pages[] is not physically or virtually contiguous and consists of
+- *   PAGE_SIZE elements.
+- *
+- * Output:
+- * SGE[0]              reserved for RCPRDMA header
+- * SGE[1]              data from xdr->head[]
+- * SGE[2..sge_count-2] data from xdr->pages[]
+- * SGE[sge_count-1]    data from xdr->tail.
+- *
+- * The max SGE we need is the length of the XDR / pagesize + one for
+- * head + one for tail + one for RPCRDMA header. Since RPCSVC_MAXPAGES
+- * reserves a page for both the request and the reply header, and this
+- * array is only concerned with the reply we are assured that we have
+- * on extra page for the RPCRMDA header.
+- */
+-static int fast_reg_xdr(struct svcxprt_rdma *xprt,
+-			struct xdr_buf *xdr,
+-			struct svc_rdma_req_map *vec)
+-{
+-	int sge_no;
+-	u32 sge_bytes;
+-	u32 page_bytes;
+-	u32 page_off;
+-	int page_no = 0;
+-	u8 *frva;
+-	struct svc_rdma_fastreg_mr *frmr;
+-
+-	frmr = svc_rdma_get_frmr(xprt);
+-	if (IS_ERR(frmr))
+-		return -ENOMEM;
+-	vec->frmr = frmr;
+-
+-	/* Skip the RPCRDMA header */
+-	sge_no = 1;
+-
+-	/* Map the head. */
+-	frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK);
+-	vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
+-	vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
+-	vec->count = 2;
+-	sge_no++;
+-
+-	/* Map the XDR head */
+-	frmr->kva = frva;
+-	frmr->direction = DMA_TO_DEVICE;
+-	frmr->access_flags = 0;
+-	frmr->map_len = PAGE_SIZE;
+-	frmr->page_list_len = 1;
+-	page_off = (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
+-	frmr->page_list->page_list[page_no] =
+-		ib_dma_map_page(xprt->sc_cm_id->device,
+-				virt_to_page(xdr->head[0].iov_base),
+-				page_off,
+-				PAGE_SIZE - page_off,
+-				DMA_TO_DEVICE);
+-	if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+-				 frmr->page_list->page_list[page_no]))
+-		goto fatal_err;
+-	atomic_inc(&xprt->sc_dma_used);
+-
+-	/* Map the XDR page list */
+-	page_off = xdr->page_base;
+-	page_bytes = xdr->page_len + page_off;
+-	if (!page_bytes)
+-		goto encode_tail;
+-
+-	/* Map the pages */
+-	vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
+-	vec->sge[sge_no].iov_len = page_bytes;
+-	sge_no++;
+-	while (page_bytes) {
+-		struct page *page;
+-
+-		page = xdr->pages[page_no++];
+-		sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
+-		page_bytes -= sge_bytes;
+-
+-		frmr->page_list->page_list[page_no] =
+-			ib_dma_map_page(xprt->sc_cm_id->device,
+-					page, page_off,
+-					sge_bytes, DMA_TO_DEVICE);
+-		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+-					 frmr->page_list->page_list[page_no]))
+-			goto fatal_err;
+-
+-		atomic_inc(&xprt->sc_dma_used);
+-		page_off = 0; /* reset for next time through loop */
+-		frmr->map_len += PAGE_SIZE;
+-		frmr->page_list_len++;
+-	}
+-	vec->count++;
+-
+- encode_tail:
+-	/* Map tail */
+-	if (0 == xdr->tail[0].iov_len)
+-		goto done;
+-
+-	vec->count++;
+-	vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
+-
+-	if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) ==
+-	    ((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) {
+-		/*
+-		 * If head and tail use the same page, we don't need
+-		 * to map it again.
+-		 */
+-		vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
+-	} else {
+-		void *va;
+-
+-		/* Map another page for the tail */
+-		page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
+-		va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK);
+-		vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
+-
+-		frmr->page_list->page_list[page_no] =
+-		    ib_dma_map_page(xprt->sc_cm_id->device, virt_to_page(va),
+-				    page_off,
+-				    PAGE_SIZE,
+-				    DMA_TO_DEVICE);
+-		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+-					 frmr->page_list->page_list[page_no]))
+-			goto fatal_err;
+-		atomic_inc(&xprt->sc_dma_used);
+-		frmr->map_len += PAGE_SIZE;
+-		frmr->page_list_len++;
+-	}
+-
+- done:
+-	if (svc_rdma_fastreg(xprt, frmr))
+-		goto fatal_err;
+-
+-	return 0;
+-
+- fatal_err:
+-	printk("svcrdma: Error fast registering memory for xprt %p\n", xprt);
+-	vec->frmr = NULL;
+-	svc_rdma_put_frmr(xprt, frmr);
+-	return -EIO;
+-}
+-
+ static int map_xdr(struct svcxprt_rdma *xprt,
+ 		   struct xdr_buf *xdr,
+ 		   struct svc_rdma_req_map *vec)
+@@ -208,9 +63,6 @@ static int map_xdr(struct svcxprt_rdma *xprt,
+ 	BUG_ON(xdr->len !=
+ 	       (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
+ 
+-	if (xprt->sc_frmr_pg_list_len)
+-		return fast_reg_xdr(xprt, xdr, vec);
+-
+ 	/* Skip the first sge, this is for the RPCRDMA header */
+ 	sge_no = 1;
+ 
+@@ -282,8 +134,6 @@ static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
+ }
+ 
+ /* Assumptions:
+- * - We are using FRMR
+- *     - or -
+  * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
+  */
+ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
+@@ -327,23 +177,16 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
+ 		sge_bytes = min_t(size_t,
+ 			  bc, vec->sge[xdr_sge_no].iov_len-sge_off);
+ 		sge[sge_no].length = sge_bytes;
+-		if (!vec->frmr) {
+-			sge[sge_no].addr =
+-				dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
+-					    sge_bytes, DMA_TO_DEVICE);
+-			xdr_off += sge_bytes;
+-			if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+-						 sge[sge_no].addr))
+-				goto err;
+-			atomic_inc(&xprt->sc_dma_used);
+-			sge[sge_no].lkey = xprt->sc_dma_lkey;
+-		} else {
+-			sge[sge_no].addr = (unsigned long)
+-				vec->sge[xdr_sge_no].iov_base + sge_off;
+-			sge[sge_no].lkey = vec->frmr->mr->lkey;
+-		}
++		sge[sge_no].addr =
++			dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
++				    sge_bytes, DMA_TO_DEVICE);
++		xdr_off += sge_bytes;
++		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
++					 sge[sge_no].addr))
++			goto err;
++		atomic_inc(&xprt->sc_dma_used);
++		sge[sge_no].lkey = xprt->sc_dma_lkey;
+ 		ctxt->count++;
+-		ctxt->frmr = vec->frmr;
+ 		sge_off = 0;
+ 		sge_no++;
+ 		xdr_sge_no++;
+@@ -369,7 +212,6 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
+ 	return 0;
+  err:
+ 	svc_rdma_unmap_dma(ctxt);
+-	svc_rdma_put_frmr(xprt, vec->frmr);
+ 	svc_rdma_put_context(ctxt, 0);
+ 	/* Fatal error, close transport */
+ 	return -EIO;
+@@ -397,10 +239,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
+ 	res_ary = (struct rpcrdma_write_array *)
+ 		&rdma_resp->rm_body.rm_chunks[1];
+ 
+-	if (vec->frmr)
+-		max_write = vec->frmr->map_len;
+-	else
+-		max_write = xprt->sc_max_sge * PAGE_SIZE;
++	max_write = xprt->sc_max_sge * PAGE_SIZE;
+ 
+ 	/* Write chunks start at the pagelist */
+ 	for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
+@@ -472,10 +311,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
+ 	res_ary = (struct rpcrdma_write_array *)
+ 		&rdma_resp->rm_body.rm_chunks[2];
+ 
+-	if (vec->frmr)
+-		max_write = vec->frmr->map_len;
+-	else
+-		max_write = xprt->sc_max_sge * PAGE_SIZE;
++	max_write = xprt->sc_max_sge * PAGE_SIZE;
+ 
+ 	/* xdr offset starts at RPC message */
+ 	nchunks = ntohl(arg_ary->wc_nchunks);
+@@ -545,7 +381,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
+ 		      int byte_count)
+ {
+ 	struct ib_send_wr send_wr;
+-	struct ib_send_wr inv_wr;
+ 	int sge_no;
+ 	int sge_bytes;
+ 	int page_no;
+@@ -559,7 +394,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
+ 		       "svcrdma: could not post a receive buffer, err=%d."
+ 		       "Closing transport %p.\n", ret, rdma);
+ 		set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+-		svc_rdma_put_frmr(rdma, vec->frmr);
+ 		svc_rdma_put_context(ctxt, 0);
+ 		return -ENOTCONN;
+ 	}
+@@ -567,11 +401,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
+ 	/* Prepare the context */
+ 	ctxt->pages[0] = page;
+ 	ctxt->count = 1;
+-	ctxt->frmr = vec->frmr;
+-	if (vec->frmr)
+-		set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
+-	else
+-		clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
+ 
+ 	/* Prepare the SGE for the RPCRDMA Header */
+ 	ctxt->sge[0].lkey = rdma->sc_dma_lkey;
+@@ -590,21 +419,15 @@ static int send_reply(struct svcxprt_rdma *rdma,
+ 		int xdr_off = 0;
+ 		sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
+ 		byte_count -= sge_bytes;
+-		if (!vec->frmr) {
+-			ctxt->sge[sge_no].addr =
+-				dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
+-					    sge_bytes, DMA_TO_DEVICE);
+-			xdr_off += sge_bytes;
+-			if (ib_dma_mapping_error(rdma->sc_cm_id->device,
+-						 ctxt->sge[sge_no].addr))
+-				goto err;
+-			atomic_inc(&rdma->sc_dma_used);
+-			ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
+-		} else {
+-			ctxt->sge[sge_no].addr = (unsigned long)
+-				vec->sge[sge_no].iov_base;
+-			ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey;
+-		}
++		ctxt->sge[sge_no].addr =
++			dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
++				    sge_bytes, DMA_TO_DEVICE);
++		xdr_off += sge_bytes;
++		if (ib_dma_mapping_error(rdma->sc_cm_id->device,
++					 ctxt->sge[sge_no].addr))
++			goto err;
++		atomic_inc(&rdma->sc_dma_used);
++		ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
+ 		ctxt->sge[sge_no].length = sge_bytes;
+ 	}
+ 	BUG_ON(byte_count != 0);
+@@ -627,6 +450,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
+ 			ctxt->sge[page_no+1].length = 0;
+ 	}
+ 	rqstp->rq_next_page = rqstp->rq_respages + 1;
++
+ 	BUG_ON(sge_no > rdma->sc_max_sge);
+ 	memset(&send_wr, 0, sizeof send_wr);
+ 	ctxt->wr_op = IB_WR_SEND;
+@@ -635,15 +459,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
+ 	send_wr.num_sge = sge_no;
+ 	send_wr.opcode = IB_WR_SEND;
+ 	send_wr.send_flags =  IB_SEND_SIGNALED;
+-	if (vec->frmr) {
+-		/* Prepare INVALIDATE WR */
+-		memset(&inv_wr, 0, sizeof inv_wr);
+-		inv_wr.opcode = IB_WR_LOCAL_INV;
+-		inv_wr.send_flags = IB_SEND_SIGNALED;
+-		inv_wr.ex.invalidate_rkey =
+-			vec->frmr->mr->lkey;
+-		send_wr.next = &inv_wr;
+-	}
+ 
+ 	ret = svc_rdma_send(rdma, &send_wr);
+ 	if (ret)
+@@ -653,7 +468,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
+ 
+  err:
+ 	svc_rdma_unmap_dma(ctxt);
+-	svc_rdma_put_frmr(rdma, vec->frmr);
+ 	svc_rdma_put_context(ctxt, 1);
+ 	return -EIO;
+ }
+diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
+index 02db8d9..e7323fb 100644
+--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
++++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
+@@ -1,4 +1,5 @@
+ /*
++ * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
+  * Copyright (c) 2005-2007 Network Appliance, Inc. All rights reserved.
+  *
+  * This software is available to you under a choice of one of two
+@@ -162,7 +163,6 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void)
+ 		schedule_timeout_uninterruptible(msecs_to_jiffies(500));
+ 	}
+ 	map->count = 0;
+-	map->frmr = NULL;
+ 	return map;
+ }
+ 
+@@ -338,22 +338,21 @@ static void process_context(struct svcxprt_rdma *xprt,
+ 
+ 	switch (ctxt->wr_op) {
+ 	case IB_WR_SEND:
+-		if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
+-			svc_rdma_put_frmr(xprt, ctxt->frmr);
++		BUG_ON(ctxt->frmr);
+ 		svc_rdma_put_context(ctxt, 1);
+ 		break;
+ 
+ 	case IB_WR_RDMA_WRITE:
++		BUG_ON(ctxt->frmr);
+ 		svc_rdma_put_context(ctxt, 0);
+ 		break;
+ 
+ 	case IB_WR_RDMA_READ:
+ 	case IB_WR_RDMA_READ_WITH_INV:
++		svc_rdma_put_frmr(xprt, ctxt->frmr);
+ 		if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
+ 			struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
+ 			BUG_ON(!read_hdr);
+-			if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
+-				svc_rdma_put_frmr(xprt, ctxt->frmr);
+ 			spin_lock_bh(&xprt->sc_rq_dto_lock);
+ 			set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
+ 			list_add_tail(&read_hdr->dto_q,
+@@ -365,6 +364,7 @@ static void process_context(struct svcxprt_rdma *xprt,
+ 		break;
+ 
+ 	default:
++		BUG_ON(1);
+ 		printk(KERN_ERR "svcrdma: unexpected completion type, "
+ 		       "opcode=%d\n",
+ 		       ctxt->wr_op);
+@@ -380,29 +380,42 @@ static void process_context(struct svcxprt_rdma *xprt,
+ static void sq_cq_reap(struct svcxprt_rdma *xprt)
+ {
+ 	struct svc_rdma_op_ctxt *ctxt = NULL;
+-	struct ib_wc wc;
++	struct ib_wc wc_a[6];
++	struct ib_wc *wc;
+ 	struct ib_cq *cq = xprt->sc_sq_cq;
+ 	int ret;
+ 
++	memset(wc_a, 0, sizeof(wc_a));
++
+ 	if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
+ 		return;
+ 
+ 	ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
+ 	atomic_inc(&rdma_stat_sq_poll);
+-	while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
+-		if (wc.status != IB_WC_SUCCESS)
+-			/* Close the transport */
+-			set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
++	while ((ret = ib_poll_cq(cq, ARRAY_SIZE(wc_a), wc_a)) > 0) {
++		int i;
+ 
+-		/* Decrement used SQ WR count */
+-		atomic_dec(&xprt->sc_sq_count);
+-		wake_up(&xprt->sc_send_wait);
++		for (i = 0; i < ret; i++) {
++			wc = &wc_a[i];
++			if (wc->status != IB_WC_SUCCESS) {
++				dprintk("svcrdma: sq wc err status %d\n",
++					wc->status);
+ 
+-		ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
+-		if (ctxt)
+-			process_context(xprt, ctxt);
++				/* Close the transport */
++				set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
++			}
+ 
+-		svc_xprt_put(&xprt->sc_xprt);
++			/* Decrement used SQ WR count */
++			atomic_dec(&xprt->sc_sq_count);
++			wake_up(&xprt->sc_send_wait);
++
++			ctxt = (struct svc_rdma_op_ctxt *)
++				(unsigned long)wc->wr_id;
++			if (ctxt)
++				process_context(xprt, ctxt);
++
++			svc_xprt_put(&xprt->sc_xprt);
++		}
+ 	}
+ 
+ 	if (ctxt)
+@@ -995,7 +1008,11 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
+ 			need_dma_mr = 0;
+ 		break;
+ 	case RDMA_TRANSPORT_IB:
+-		if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
++		if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) {
++			need_dma_mr = 1;
++			dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
++		} else if (!(devattr.device_cap_flags &
++			     IB_DEVICE_LOCAL_DMA_LKEY)) {
+ 			need_dma_mr = 1;
+ 			dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
+ 		} else
+@@ -1192,14 +1209,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
+ 		container_of(xprt, struct svcxprt_rdma, sc_xprt);
+ 
+ 	/*
+-	 * If there are fewer SQ WR available than required to send a
+-	 * simple response, return false.
+-	 */
+-	if ((rdma->sc_sq_depth - atomic_read(&rdma->sc_sq_count) < 3))
+-		return 0;
+-
+-	/*
+-	 * ...or there are already waiters on the SQ,
++	 * If there are already waiters on the SQ,
+ 	 * return false.
+ 	 */
+ 	if (waitqueue_active(&rdma->sc_send_wait))
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0131-svcrdma-Fence-LOCAL_INV-work-requests.patch b/linux-next-cherry-picks/0131-svcrdma-Fence-LOCAL_INV-work-requests.patch
new file mode 100644
index 0000000..2ecff31
--- /dev/null
+++ b/linux-next-cherry-picks/0131-svcrdma-Fence-LOCAL_INV-work-requests.patch
@@ -0,0 +1,31 @@
+From 83710fc753d2ae158aa3cb7a7966d9c1bd05b792 Mon Sep 17 00:00:00 2001
+From: Steve Wise <swise@opengridcomputing.com>
+Date: Thu, 5 Jun 2014 09:54:31 -0500
+Subject: [PATCH 132/132] svcrdma: Fence LOCAL_INV work requests
+
+Fencing forces the invalidate to only happen after all prior send
+work requests have been completed.
+
+Signed-off-by: Steve Wise <swise@opengridcomputing.com>
+Reported by : Devesh Sharma <Devesh.Sharma@Emulex.Com>
+Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+---
+ net/sunrpc/xprtrdma/svc_rdma_recvfrom.c |    2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+index 52d9f2c..8f92a61 100644
+--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
++++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+@@ -338,7 +338,7 @@ static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
+ 		memset(&inv_wr, 0, sizeof(inv_wr));
+ 		inv_wr.wr_id = (unsigned long)ctxt;
+ 		inv_wr.opcode = IB_WR_LOCAL_INV;
+-		inv_wr.send_flags = IB_SEND_SIGNALED;
++		inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE;
+ 		inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
+ 	}
+ 	ctxt->wr_op = read_wr.opcode;
+-- 
+1.7.1
+
diff --git a/linux-next-cherry-picks/0132-svcrdma-send_write-must-not-overflow.patch b/linux-next-cherry-picks/0132-svcrdma-send_write-must-not-overflow.patch
new file mode 100644
index 0000000..387f601
--- /dev/null
+++ b/linux-next-cherry-picks/0132-svcrdma-send_write-must-not-overflow.patch
@@ -0,0 +1,129 @@
+commit 255942907e7ff498ab1545b5edce5690833ff640
+Author: Steve Wise <swise@opengridcomputing.com>
+Date:   Wed Jul 9 13:49:15 2014 -0500
+
+    svcrdma: send_write() must not overflow the device's max sge
+    
+    Function send_write() must stop creating sges when it reaches the device
+    max and return the amount sent in the RDMA Write to the caller.
+    
+    Signed-off-by: Steve Wise <swise@opengridcomputing.com>
+    Signed-off-by: J. Bruce Fields <bfields@redhat.com>
+
+diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+index 49fd21a..9f1b506 100644
+--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
++++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+@@ -192,6 +192,8 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
+ 		xdr_sge_no++;
+ 		BUG_ON(xdr_sge_no > vec->count);
+ 		bc -= sge_bytes;
++		if (sge_no == xprt->sc_max_sge)
++			break;
+ 	}
+ 
+ 	/* Prepare WRITE WR */
+@@ -209,7 +211,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
+ 	atomic_inc(&rdma_stat_write);
+ 	if (svc_rdma_send(xprt, &write_wr))
+ 		goto err;
+-	return 0;
++	return write_len - bc;
+  err:
+ 	svc_rdma_unmap_dma(ctxt);
+ 	svc_rdma_put_context(ctxt, 0);
+@@ -225,7 +227,6 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
+ {
+ 	u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len;
+ 	int write_len;
+-	int max_write;
+ 	u32 xdr_off;
+ 	int chunk_off;
+ 	int chunk_no;
+@@ -239,8 +240,6 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
+ 	res_ary = (struct rpcrdma_write_array *)
+ 		&rdma_resp->rm_body.rm_chunks[1];
+ 
+-	max_write = xprt->sc_max_sge * PAGE_SIZE;
+-
+ 	/* Write chunks start at the pagelist */
+ 	for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
+ 	     xfer_len && chunk_no < arg_ary->wc_nchunks;
+@@ -260,23 +259,21 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
+ 						write_len);
+ 		chunk_off = 0;
+ 		while (write_len) {
+-			int this_write;
+-			this_write = min(write_len, max_write);
+ 			ret = send_write(xprt, rqstp,
+ 					 ntohl(arg_ch->rs_handle),
+ 					 rs_offset + chunk_off,
+ 					 xdr_off,
+-					 this_write,
++					 write_len,
+ 					 vec);
+-			if (ret) {
++			if (ret <= 0) {
+ 				dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
+ 					ret);
+ 				return -EIO;
+ 			}
+-			chunk_off += this_write;
+-			xdr_off += this_write;
+-			xfer_len -= this_write;
+-			write_len -= this_write;
++			chunk_off += ret;
++			xdr_off += ret;
++			xfer_len -= ret;
++			write_len -= ret;
+ 		}
+ 	}
+ 	/* Update the req with the number of chunks actually used */
+@@ -293,7 +290,6 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
+ {
+ 	u32 xfer_len = rqstp->rq_res.len;
+ 	int write_len;
+-	int max_write;
+ 	u32 xdr_off;
+ 	int chunk_no;
+ 	int chunk_off;
+@@ -311,8 +307,6 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
+ 	res_ary = (struct rpcrdma_write_array *)
+ 		&rdma_resp->rm_body.rm_chunks[2];
+ 
+-	max_write = xprt->sc_max_sge * PAGE_SIZE;
+-
+ 	/* xdr offset starts at RPC message */
+ 	nchunks = ntohl(arg_ary->wc_nchunks);
+ 	for (xdr_off = 0, chunk_no = 0;
+@@ -330,24 +324,21 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
+ 						write_len);
+ 		chunk_off = 0;
+ 		while (write_len) {
+-			int this_write;
+-
+-			this_write = min(write_len, max_write);
+ 			ret = send_write(xprt, rqstp,
+ 					 ntohl(ch->rs_handle),
+ 					 rs_offset + chunk_off,
+ 					 xdr_off,
+-					 this_write,
++					 write_len,
+ 					 vec);
+-			if (ret) {
++			if (ret <= 0) {
+ 				dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n",
+ 					ret);
+ 				return -EIO;
+ 			}
+-			chunk_off += this_write;
+-			xdr_off += this_write;
+-			xfer_len -= this_write;
+-			write_len -= this_write;
++			chunk_off += ret;
++			xdr_off += ret;
++			xfer_len -= ret;
++			write_len -= ret;
+ 		}
+ 	}
+ 	/* Update the req with the number of chunks actually used */
diff --git a/linux-next-cherry-picks/0133-nfsrdma-backport-fixes.patch b/linux-next-cherry-picks/0133-nfsrdma-backport-fixes.patch
new file mode 100644
index 0000000..d492471
--- /dev/null
+++ b/linux-next-cherry-picks/0133-nfsrdma-backport-fixes.patch
@@ -0,0 +1,48 @@
+Index: compat-rdma/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+===================================================================
+--- compat-rdma.orig/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
++++ compat-rdma/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+@@ -92,7 +92,9 @@ static void rdma_build_arg_xdr(struct sv
+ 		sge_no++;
+ 	}
+ 	rqstp->rq_respages = &rqstp->rq_pages[sge_no];
++#if (LINUX_VERSION_CODE > KERNEL_VERSION(3,7,0))
+ 	rqstp->rq_next_page = rqstp->rq_respages + 1;
++#endif
+ 
+ 	/* We should never run out of SGE because the limit is defined to
+ 	 * support the max allowed RPC data length
+@@ -167,7 +169,9 @@ static int rdma_read_chunk_lcl(struct sv
+ 		if (!pg_off)
+ 			head->count++;
+ 		rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
++#if (LINUX_VERSION_CODE > KERNEL_VERSION(3,7,0))
+ 		rqstp->rq_next_page = rqstp->rq_respages + 1;
++#endif
+ 		ctxt->sge[pno].addr =
+ 			ib_dma_map_page(xprt->sc_cm_id->device,
+ 					head->arg.pages[pg_no], pg_off,
+@@ -272,7 +276,9 @@ static int rdma_read_chunk_frmr(struct s
+ 		if (!pg_off)
+ 			head->count++;
+ 		rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
++#if (LINUX_VERSION_CODE > KERNEL_VERSION(3,7,0))
+ 		rqstp->rq_next_page = rqstp->rq_respages + 1;
++#endif
+ 		frmr->page_list->page_list[pno] =
+ 			ib_dma_map_page(xprt->sc_cm_id->device,
+ 					head->arg.pages[pg_no], 0,
+Index: compat-rdma/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+===================================================================
+--- compat-rdma.orig/net/sunrpc/xprtrdma/svc_rdma_sendto.c
++++ compat-rdma/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+@@ -446,7 +446,9 @@ static int send_reply(struct svcxprt_rdm
+ 		if (page_no+1 >= sge_no)
+ 			ctxt->sge[page_no+1].length = 0;
+ 	}
++#if (LINUX_VERSION_CODE > KERNEL_VERSION(3,7,0))
+ 	rqstp->rq_next_page = rqstp->rq_respages + 1;
++#endif
+ 
+ 	BUG_ON(sge_no > rdma->sc_max_sge);
+ 	memset(&send_wr, 0, sizeof send_wr);
diff --git a/linux-next-pending/0024-SUNRPC-Fix-large_reads-on-NFS-RDMA.patch b/linux-next-pending/0024-SUNRPC-Fix-large_reads-on-NFS-RDMA.patch
deleted file mode 100644
index d3ad959..0000000
--- a/linux-next-pending/0024-SUNRPC-Fix-large_reads-on-NFS-RDMA.patch
+++ /dev/null
@@ -1,46 +0,0 @@
-commit 2b7bbc963da8d076f263574af4138b5df2e1581f
-Author: Chuck Lever <chuck.lever@oracle.com>
-Date:   Wed Mar 12 12:51:30 2014 -0400
-
-    SUNRPC: Fix large reads on NFS/RDMA
-    
-    After commit a11a2bf4, "SUNRPC: Optimise away unnecessary data moves
-    in xdr_align_pages", Thu Aug 2 13:21:43 2012, READs larger than a
-    few hundred bytes via NFS/RDMA no longer work.  This commit exposed
-    a long-standing bug in rpcrdma_inline_fixup().
-    
-    I reproduce this with an rsize=4096 mount using the cthon04 basic
-    tests.  Test 5 fails with an EIO error.
-    
-    For my reproducer, kernel log shows:
-    
-      NFS: server cheating in read reply: count 4096 > recvd 0
-    
-    rpcrdma_inline_fixup() is zeroing the xdr_stream::page_len field,
-    and xdr_align_pages() is now returning that value to the READ XDR
-    decoder function.
-    
-    That field is set up by xdr_inline_pages() by the READ XDR encoder
-    function.  As far as I can tell, it is supposed to be left alone
-    after that, as it describes the dimensions of the reply xdr_stream,
-    not the contents of that stream.
-    
-    Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=68391
-    Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
-    Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
-
-diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
-index e03725b..96ead52 100644
---- a/net/sunrpc/xprtrdma/rpc_rdma.c
-+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
-@@ -649,9 +649,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
- 				break;
- 			page_base = 0;
- 		}
--		rqst->rq_rcv_buf.page_len = olen - copy_len;
--	} else
--		rqst->rq_rcv_buf.page_len = 0;
-+	}
- 
- 	if (copy_len && rqst->rq_rcv_buf.tail[0].iov_len) {
- 		curlen = copy_len;
diff --git a/linux-next-pending/0025-NFSRDMA-Fix-regression-in-NFSRDMA-server.patch b/linux-next-pending/0025-NFSRDMA-Fix-regression-in-NFSRDMA-server.patch
deleted file mode 100644
index abd5a0a..0000000
--- a/linux-next-pending/0025-NFSRDMA-Fix-regression-in-NFSRDMA-server.patch
+++ /dev/null
@@ -1,66 +0,0 @@
-Fix regression in NFSRDMA server
-
-From: Tom Tucker <tom@ogc.us>
-
-The server regression was caused by the addition of rq_next_page
-(afc59400d6c65bad66d4ad0b2daf879cbff8e23e). There were a few places that
-were missed with the update of the rq_respages array.
-
-NOTE: Patch modified to apply against OFED.
-
-Signed-off-by: Tom Tucker <tom@ogc.us>
-Tested-by: Steve Wise <swise@ogc.us>
-
----
-
---- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c	2014-03-31 15:31:05.214903226 -0500
-+++ a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c	2014-03-31 15:34:40.042047141 -0500
-@@ -90,6 +90,9 @@ static void rdma_build_arg_xdr(struct sv
- 		sge_no++;
- 	}
- 	rqstp->rq_respages = &rqstp->rq_pages[sge_no];
-+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0))
-+	rqstp->rq_next_page = rqstp->rq_respages + 1;
-+#endif
- 
- 	/* We should never run out of SGE because the limit is defined to
- 	 * support the max allowed RPC data length
-@@ -169,6 +172,9 @@ static int map_read_chunks(struct svcxpr
- 		 */
- 		head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
- 		rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1];
-+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0))
-+		rqstp->rq_next_page = rqstp->rq_respages + 1;
-+#endif
- 
- 		byte_count -= sge_bytes;
- 		ch_bytes -= sge_bytes;
-@@ -276,6 +282,9 @@ static int fast_reg_read_chunks(struct s
- 
- 	/* rq_respages points one past arg pages */
- 	rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
-+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0))
-+	rqstp->rq_next_page = rqstp->rq_respages + 1;
-+#endif
- 
- 	/* Create the reply and chunk maps */
- 	offset = 0;
-@@ -527,9 +536,6 @@ next_sge:
- #if (LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0))
-         while (rqstp->rq_resused)
-                 rqstp->rq_respages[--rqstp->rq_resused] = NULL;
--#else
--	while (rqstp->rq_next_page != rqstp->rq_respages)
--		*(--rqstp->rq_next_page) = NULL;
- #endif
- 
- 	return err;
-@@ -558,7 +564,7 @@ static int rdma_read_complete(struct svc
- #if (LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0))
-         rqstp->rq_resused = 0;
- #else
--	rqstp->rq_next_page = &rqstp->rq_arg.pages[page_no];
-+	rqstp->rq_next_page = rqstp->rq_respages + 1;
- #endif
- 
- 	/* Rebuild rq_arg head and tail. */
diff --git a/patches/0023-nfsrdma-Backport-for-rhel6.5.patch b/patches/0023-nfsrdma-Backport-for-rhel6.5.patch
index 6d184fb..c9a0bd6 100644
--- a/patches/0023-nfsrdma-Backport-for-rhel6.5.patch
+++ b/patches/0023-nfsrdma-Backport-for-rhel6.5.patch
@@ -55,28 +55,15 @@ diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_r
 index xxxxxxx..xxxxxxx xxxxxx
 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
 +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
-@@ -524,8 +524,13 @@ next_sge:
- 	 * Detach res pages. If svc_release sees any it will attempt to
- 	 * put them.
- 	 */
-+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0))
-+        while (rqstp->rq_resused)
-+                rqstp->rq_respages[--rqstp->rq_resused] = NULL;
-+#else
- 	while (rqstp->rq_next_page != rqstp->rq_respages)
- 		*(--rqstp->rq_next_page) = NULL;
-+#endif
- 
- 	return err;
- }
-@@ -550,7 +555,11 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
+@@ -550,7 +556,11 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
  
  	/* rq_respages starts after the last arg page */
  	rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
+-	rqstp->rq_next_page = &rqstp->rq_arg.pages[page_no];
 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0))
 +        rqstp->rq_resused = 0;
 +#else
- 	rqstp->rq_next_page = &rqstp->rq_arg.pages[page_no];
++       rqstp->rq_next_page = rqstp->rq_respages + 1;
 +#endif
  
  	/* Rebuild rq_arg head and tail. */
diff --git a/patches/0026-nfsrdma-Backport-for-sles11sp3.patch b/patches/0026-nfsrdma-Backport-for-sles11sp3.patch
index 84bb97e..ec2fc34 100644
--- a/patches/0026-nfsrdma-Backport-for-sles11sp3.patch
+++ b/patches/0026-nfsrdma-Backport-for-sles11sp3.patch
@@ -20,31 +20,3 @@ index xxxxxxx..xxxxxxx xxxxxx
  			xprt_rdma_slot_table_entries);
  	if (xprt == NULL) {
  		dprintk("RPC:       %s: couldn't allocate rpcrdma_xprt\n",
-@@ -450,8 +452,15 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
- }
- 
- static int
-+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,1,0)) || defined (CONFIG_COMPAT_XPRT_RESERVE_XPRT_CONG_2PARAMS)
- xprt_rdma_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
-+#else
-+xprt_rdma_reserve_xprt(struct rpc_task *task)
-+#endif
- {
-+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0)) && !defined (CONFIG_COMPAT_XPRT_RESERVE_XPRT_CONG_2PARAMS)
-+	struct rpc_xprt *xprt = task->tk_xprt;
-+#endif
- 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- 	int credits = atomic_read(&r_xprt->rx_buf.rb_credits);
- 
-@@ -463,7 +472,11 @@ xprt_rdma_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
- 		BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0);
- 	}
- 	xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale;
-+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,1,0)) || defined (CONFIG_COMPAT_XPRT_RESERVE_XPRT_CONG_2PARAMS)
- 	return xprt_reserve_xprt_cong(xprt, task);
-+#else
-+	return xprt_reserve_xprt_cong(task);
-+#endif
- }
- 
- /*
-- 
2.41.0