--- /dev/null
+From 9f6bb49327fb7179785b10a845cf5070494bd988 Mon Sep 17 00:00:00 2001
+From: Jerrie Coffman <jerrie.l.coffman@intel.com>
+Date: Fri, 20 May 2016 16:17:43 -0700
+Subject: [PATCH 3/7] add the ibp client and server drivers
+
+This adds the CCL-Direct (ibp) client and server drivers to
+infiniband. It includes the base ib proxy driver, the mlx4
+and mlx5 client drivers, and sa and cm proxy drivers.
+
+Signed-off-by: Jerrie Coffman <jerrie.l.coffman@intel.com>
+---
+ drivers/infiniband/Kconfig | 2 +
+ drivers/infiniband/Makefile | 2 +
+ drivers/infiniband/ibp/Kconfig | 31 +
+ drivers/infiniband/ibp/Makefile | 19 +
+ drivers/infiniband/ibp/cm/Makefile | 13 +
+ drivers/infiniband/ibp/cm/client.c | 130 ++
+ drivers/infiniband/ibp/cm/client.h | 87 +
+ drivers/infiniband/ibp/cm/client_msg.c | 232 +++
+ drivers/infiniband/ibp/cm/cm_client.h | 98 +
+ drivers/infiniband/ibp/cm/cm_client_msg.c | 785 ++++++++
+ drivers/infiniband/ibp/cm/cm_ibp_abi.h | 405 ++++
+ drivers/infiniband/ibp/cm/cm_proxy.c | 752 +++++++
+ drivers/infiniband/ibp/cm/cm_server_msg.c | 1044 ++++++++++
+ drivers/infiniband/ibp/cm/common.h | 55 +
+ drivers/infiniband/ibp/cm/ibp-abi.h | 94 +
+ drivers/infiniband/ibp/cm/ibp_exports.h | 50 +
+ drivers/infiniband/ibp/cm/server.c | 221 +++
+ drivers/infiniband/ibp/cm/server.h | 129 ++
+ drivers/infiniband/ibp/cm/server_msg.c | 176 ++
+ drivers/infiniband/ibp/compat.h | 101 +
+ drivers/infiniband/ibp/drv/Makefile | 10 +
+ drivers/infiniband/ibp/drv/client.c | 502 +++++
+ drivers/infiniband/ibp/drv/client.h | 126 ++
+ drivers/infiniband/ibp/drv/client_msg.c | 1892 ++++++++++++++++++
+ drivers/infiniband/ibp/drv/common.h | 51 +
+ drivers/infiniband/ibp/drv/hw/mlx4/Kconfig | 9 +
+ drivers/infiniband/ibp/drv/hw/mlx4/Makefile | 9 +
+ drivers/infiniband/ibp/drv/hw/mlx4/cq.c | 538 +++++
+ drivers/infiniband/ibp/drv/hw/mlx4/dbrec.c | 138 ++
+ drivers/infiniband/ibp/drv/hw/mlx4/main.c | 1572 +++++++++++++++
+ drivers/infiniband/ibp/drv/hw/mlx4/main.h | 312 +++
+ drivers/infiniband/ibp/drv/hw/mlx4/qp.c | 771 ++++++++
+ drivers/infiniband/ibp/drv/hw/mlx4/srq.c | 160 ++
+ drivers/infiniband/ibp/drv/hw/mlx5/Kconfig | 9 +
+ drivers/infiniband/ibp/drv/hw/mlx5/Makefile | 9 +
+ drivers/infiniband/ibp/drv/hw/mlx5/cq.c | 638 ++++++
+ drivers/infiniband/ibp/drv/hw/mlx5/dbrec.c | 126 ++
+ drivers/infiniband/ibp/drv/hw/mlx5/main.c | 1460 ++++++++++++++
+ drivers/infiniband/ibp/drv/hw/mlx5/main.h | 327 ++++
+ drivers/infiniband/ibp/drv/hw/mlx5/qp.c | 714 +++++++
+ drivers/infiniband/ibp/drv/hw/mlx5/srq.c | 185 ++
+ drivers/infiniband/ibp/drv/ibp-abi.h | 649 ++++++
+ drivers/infiniband/ibp/drv/ibp.h | 260 +++
+ drivers/infiniband/ibp/drv/server.c | 503 +++++
+ drivers/infiniband/ibp/drv/server.h | 182 ++
+ drivers/infiniband/ibp/drv/server_msg.c | 2813 +++++++++++++++++++++++++++
+ drivers/infiniband/ibp/sa/Makefile | 13 +
+ drivers/infiniband/ibp/sa/client.c | 134 ++
+ drivers/infiniband/ibp/sa/client.h | 90 +
+ drivers/infiniband/ibp/sa/client_msg.c | 231 +++
+ drivers/infiniband/ibp/sa/common.h | 55 +
+ drivers/infiniband/ibp/sa/ibp-abi.h | 101 +
+ drivers/infiniband/ibp/sa/ibp_exports.h | 49 +
+ drivers/infiniband/ibp/sa/sa_client.h | 123 ++
+ drivers/infiniband/ibp/sa/sa_client_msg.c | 435 +++++
+ drivers/infiniband/ibp/sa/sa_ibp_abi.h | 251 +++
+ drivers/infiniband/ibp/sa/sa_proxy.c | 773 ++++++++
+ drivers/infiniband/ibp/sa/sa_server_msg.c | 962 +++++++++
+ drivers/infiniband/ibp/sa/sa_table.h | 131 ++
+ drivers/infiniband/ibp/sa/server.c | 218 +++
+ drivers/infiniband/ibp/sa/server.h | 173 ++
+ drivers/infiniband/ibp/sa/server_msg.c | 185 ++
+ 62 files changed, 22315 insertions(+)
+ create mode 100644 drivers/infiniband/ibp/Kconfig
+ create mode 100644 drivers/infiniband/ibp/Makefile
+ create mode 100644 drivers/infiniband/ibp/cm/Makefile
+ create mode 100644 drivers/infiniband/ibp/cm/client.c
+ create mode 100644 drivers/infiniband/ibp/cm/client.h
+ create mode 100644 drivers/infiniband/ibp/cm/client_msg.c
+ create mode 100644 drivers/infiniband/ibp/cm/cm_client.h
+ create mode 100644 drivers/infiniband/ibp/cm/cm_client_msg.c
+ create mode 100644 drivers/infiniband/ibp/cm/cm_ibp_abi.h
+ create mode 100644 drivers/infiniband/ibp/cm/cm_proxy.c
+ create mode 100644 drivers/infiniband/ibp/cm/cm_server_msg.c
+ create mode 100644 drivers/infiniband/ibp/cm/common.h
+ create mode 100644 drivers/infiniband/ibp/cm/ibp-abi.h
+ create mode 100644 drivers/infiniband/ibp/cm/ibp_exports.h
+ create mode 100644 drivers/infiniband/ibp/cm/server.c
+ create mode 100644 drivers/infiniband/ibp/cm/server.h
+ create mode 100644 drivers/infiniband/ibp/cm/server_msg.c
+ create mode 100644 drivers/infiniband/ibp/compat.h
+ create mode 100644 drivers/infiniband/ibp/drv/Makefile
+ create mode 100644 drivers/infiniband/ibp/drv/client.c
+ create mode 100644 drivers/infiniband/ibp/drv/client.h
+ create mode 100644 drivers/infiniband/ibp/drv/client_msg.c
+ create mode 100644 drivers/infiniband/ibp/drv/common.h
+ create mode 100644 drivers/infiniband/ibp/drv/hw/mlx4/Kconfig
+ create mode 100644 drivers/infiniband/ibp/drv/hw/mlx4/Makefile
+ create mode 100644 drivers/infiniband/ibp/drv/hw/mlx4/cq.c
+ create mode 100644 drivers/infiniband/ibp/drv/hw/mlx4/dbrec.c
+ create mode 100644 drivers/infiniband/ibp/drv/hw/mlx4/main.c
+ create mode 100644 drivers/infiniband/ibp/drv/hw/mlx4/main.h
+ create mode 100644 drivers/infiniband/ibp/drv/hw/mlx4/qp.c
+ create mode 100644 drivers/infiniband/ibp/drv/hw/mlx4/srq.c
+ create mode 100644 drivers/infiniband/ibp/drv/hw/mlx5/Kconfig
+ create mode 100644 drivers/infiniband/ibp/drv/hw/mlx5/Makefile
+ create mode 100644 drivers/infiniband/ibp/drv/hw/mlx5/cq.c
+ create mode 100644 drivers/infiniband/ibp/drv/hw/mlx5/dbrec.c
+ create mode 100644 drivers/infiniband/ibp/drv/hw/mlx5/main.c
+ create mode 100644 drivers/infiniband/ibp/drv/hw/mlx5/main.h
+ create mode 100644 drivers/infiniband/ibp/drv/hw/mlx5/qp.c
+ create mode 100644 drivers/infiniband/ibp/drv/hw/mlx5/srq.c
+ create mode 100644 drivers/infiniband/ibp/drv/ibp-abi.h
+ create mode 100644 drivers/infiniband/ibp/drv/ibp.h
+ create mode 100644 drivers/infiniband/ibp/drv/server.c
+ create mode 100644 drivers/infiniband/ibp/drv/server.h
+ create mode 100644 drivers/infiniband/ibp/drv/server_msg.c
+ create mode 100644 drivers/infiniband/ibp/sa/Makefile
+ create mode 100644 drivers/infiniband/ibp/sa/client.c
+ create mode 100644 drivers/infiniband/ibp/sa/client.h
+ create mode 100644 drivers/infiniband/ibp/sa/client_msg.c
+ create mode 100644 drivers/infiniband/ibp/sa/common.h
+ create mode 100644 drivers/infiniband/ibp/sa/ibp-abi.h
+ create mode 100644 drivers/infiniband/ibp/sa/ibp_exports.h
+ create mode 100644 drivers/infiniband/ibp/sa/sa_client.h
+ create mode 100644 drivers/infiniband/ibp/sa/sa_client_msg.c
+ create mode 100644 drivers/infiniband/ibp/sa/sa_ibp_abi.h
+ create mode 100644 drivers/infiniband/ibp/sa/sa_proxy.c
+ create mode 100644 drivers/infiniband/ibp/sa/sa_server_msg.c
+ create mode 100644 drivers/infiniband/ibp/sa/sa_table.h
+ create mode 100644 drivers/infiniband/ibp/sa/server.c
+ create mode 100644 drivers/infiniband/ibp/sa/server.h
+ create mode 100644 drivers/infiniband/ibp/sa/server_msg.c
+
+diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
+index e9b7dc0..c466f25 100644
+--- a/drivers/infiniband/Kconfig
++++ b/drivers/infiniband/Kconfig
+@@ -75,6 +75,8 @@ source "drivers/infiniband/hw/nes/Kconfig"
+ source "drivers/infiniband/hw/ocrdma/Kconfig"
+ source "drivers/infiniband/hw/usnic/Kconfig"
+
++source "drivers/infiniband/ibp/Kconfig"
++
+ source "drivers/infiniband/ulp/ipoib/Kconfig"
+
+ source "drivers/infiniband/ulp/srp/Kconfig"
+diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
+index fad0b44..3bef222 100644
+--- a/drivers/infiniband/Makefile
++++ b/drivers/infiniband/Makefile
+@@ -2,3 +2,5 @@ obj-$(CONFIG_INFINIBAND) += core/
+ obj-$(CONFIG_INFINIBAND) += hw/
+ obj-$(CONFIG_INFINIBAND) += ulp/
+ obj-$(CONFIG_INFINIBAND) += sw/
++obj-$(CONFIG_IBP_CLIENT) += ibp/
++obj-$(CONFIG_IBP_SERVER) += ibp/
+diff --git a/drivers/infiniband/ibp/Kconfig b/drivers/infiniband/ibp/Kconfig
+new file mode 100644
+index 0000000..d0c3999
+--- /dev/null
++++ b/drivers/infiniband/ibp/Kconfig
+@@ -0,0 +1,31 @@
++config IBP_CLIENT
++ tristate "CCL Direct IB Client drivers"
++ depends on SCIF
++ ---help---
++ Client drivers for CCL Direct including client proxies for
++ hw drivers, and core drivers ib_sa and ib_cm. These are
++ for the Intel Xeon Phi coprocessor kernel, not the host.
++
++ To compile this driver as a module, choose M here.
++ If unsure, say N.
++
++config IBP_SERVER
++ tristate "CCL Direct IB Server drivers"
++ depends on SCIF
++ ---help---
++ Server drivers for CCL Direct including server proxies for
++ hw drivers, and core drivers ib_sa and ib_cm.
++ Also includes is a kernel mode test module
++
++ To compile this driver as a module, choose M here.
++ If unsure, say N.
++
++config IBP_DEBUG
++ bool "CCL Direct debugging"
++ depends on IBP_CLIENT || IBP_SERVER
++ default y
++ ---help---
++ This option causes debug code to be compiled into the CCL Direct drivers.
++
++source "drivers/infiniband/ibp/drv/hw/mlx4/Kconfig"
++source "drivers/infiniband/ibp/drv/hw/mlx5/Kconfig"
+diff --git a/drivers/infiniband/ibp/Makefile b/drivers/infiniband/ibp/Makefile
+new file mode 100644
+index 0000000..dce1e53
+--- /dev/null
++++ b/drivers/infiniband/ibp/Makefile
+@@ -0,0 +1,19 @@
++KDIR := /lib/modules/$(KERNELRELEASE)/
++
++SCIF_INCL := $(KDIR)/source/include/modules/
++HAVE_UPSTREAM := $(shell if grep -q scif_get_node_ids $(SCIF_INCL)/scif.h 2>/dev/null; then echo -n -DHAVE_UPSTREAM_SCIF; fi)
++HAVE_EXTERNAL := $(shell if test -f $(SCIF_INCL)/scif.h; then echo -n -DHAVE_EXTERNAL_SCIF; fi)
++
++# needed for out-of-tree building
++ABSPATH := $(abspath $(lastword $(MAKEFILE_LIST)))
++ABSDIR := $(dir $(ABSPATH))
++
++subdir-ccflags-y := -I$(ABSDIR) -I$(SCIF_INCL) $(HAVE_UPSTREAM) $(HAVE_EXTERNAL)
++subdir-ccflags-$(CONFIG_IBP_DEBUG) += -DIBP_DEBUG
++
++obj-$(CONFIG_IBP_CLIENT) += drv/
++obj-$(CONFIG_IBP_SERVER) += drv/
++obj-$(CONFIG_IBP_CLIENT) += cm/
++obj-$(CONFIG_IBP_SERVER) += cm/
++obj-$(CONFIG_IBP_CLIENT) += sa/
++obj-$(CONFIG_IBP_SERVER) += sa/
+diff --git a/drivers/infiniband/ibp/cm/Makefile b/drivers/infiniband/ibp/cm/Makefile
+new file mode 100644
+index 0000000..1994539
+--- /dev/null
++++ b/drivers/infiniband/ibp/cm/Makefile
+@@ -0,0 +1,13 @@
++obj-$(CONFIG_IBP_CLIENT) += ibp_cm_client.o
++obj-$(CONFIG_IBP_SERVER) += ibp_cm_server.o
++
++ccflags-y := -Idrivers/infiniband/
++
++ibp_cm_client-y := client.o \
++ client_msg.o \
++ cm_client_msg.o \
++ cm_proxy.o
++
++ibp_cm_server-y := server.o \
++ server_msg.o \
++ cm_server_msg.o
+diff --git a/drivers/infiniband/ibp/cm/client.c b/drivers/infiniband/ibp/cm/client.c
+new file mode 100644
+index 0000000..89d8e02
+--- /dev/null
++++ b/drivers/infiniband/ibp/cm/client.c
+@@ -0,0 +1,130 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "client.h"
++
++MODULE_AUTHOR("Jerrie Coffman");
++MODULE_AUTHOR("Sean Hefty");
++MODULE_AUTHOR("Phil Cayton");
++MODULE_AUTHOR("Jay Sternberg");
++MODULE_LICENSE("Dual BSD/GPL");
++MODULE_DESCRIPTION(DRV_DESC);
++MODULE_VERSION(DRV_VERSION);
++
++MODULE_PARAM(port, port, int, IBP_CM_PORT, "Connection port");
++MODULE_PARAM(timeout, timeout, int, 1000, "Connect/Poll timeout (in ms)");
++
++#ifdef IBP_DEBUG
++MODULE_PARAM(debug_level, debug_level, int, 0, "Debug: 0-none, 1-some, 2-all");
++#endif
++
++scif_epd_t ep;
++struct workqueue_struct *workqueue;
++static struct task_struct *ibp_cm_client_thread;
++
++static int ibp_cm_connect(void *unused)
++{
++ struct scif_port_id dst;
++ unsigned long delay;
++ int ret = 0;
++
++ dst.node = IBP_HOST_NODE;
++ dst.port = port;
++
++ delay = msecs_to_jiffies(timeout);
++
++ while (!kthread_should_stop()) {
++
++ ep = scif_open();
++ if (!ep) {
++ print_err("scif_open failed\n");
++ schedule_timeout_interruptible(delay);
++ continue;
++ }
++
++ while (scif_connect(ep, &dst) != 0) {
++ schedule_timeout_interruptible(delay);
++ if (kthread_should_stop())
++ break;
++ }
++
++ if (!kthread_should_stop()) {
++ print_dbg("connected node %d port %d\n",
++ dst.node, dst.port);
++
++ ibp_process_recvs();
++ }
++
++ scif_close(ep);
++ }
++
++ return ret;
++}
++
++static int __init ibp_cm_client_init(void)
++{
++ int ret;
++
++ printk(DRV_SIGNON);
++
++ init_rwsem(&list_rwsem);
++
++ workqueue = create_singlethread_workqueue(DRV_NAME);
++ if (!workqueue) {
++ print_err("create_singlethread_workqueue failed\n");
++ return -ENOMEM;
++ }
++
++ /* Start a thread to establish a connection. */
++ ibp_cm_client_thread = kthread_run(ibp_cm_connect, NULL, DRV_NAME);
++ if (IS_ERR(ibp_cm_client_thread)) {
++ ret = PTR_ERR(ibp_cm_client_thread);
++ print_err("kthread_run returned %d\n", ret);
++ destroy_workqueue(workqueue);
++ return ret;
++ }
++
++ return 0;
++}
++
++static void __exit ibp_cm_client_exit(void)
++{
++ kthread_stop(ibp_cm_client_thread);
++
++ flush_workqueue(workqueue);
++ destroy_workqueue(workqueue);
++
++ print_info(DRV_DESC " unloaded\n");
++}
++
++module_init(ibp_cm_client_init);
++module_exit(ibp_cm_client_exit);
+diff --git a/drivers/infiniband/ibp/cm/client.h b/drivers/infiniband/ibp/cm/client.h
+new file mode 100644
+index 0000000..86f0132
+--- /dev/null
++++ b/drivers/infiniband/ibp/cm/client.h
+@@ -0,0 +1,87 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef CLIENT_H
++#define CLIENT_H
++
++#include "common.h"
++#include <rdma/ib_verbs.h>
++#include "ibp-abi.h"
++#include "cm_ibp_abi.h"
++#include "core/cm_msgs.h"
++
++#define DRV_ROLE "CM Client"
++#define DRV_NAME "ibp_cm_client"
++#include "compat.h"
++
++#ifndef IBP_HOST_NODE
++#define IBP_HOST_NODE 0
++#endif
++
++extern int timeout;
++extern scif_epd_t ep;
++extern struct workqueue_struct *workqueue;
++extern struct rw_semaphore list_rwsem;
++
++int ibp_process_recvs(void);
++void ibp_event_work(struct work_struct *work);
++
++struct ibp_request {
++ struct completion done;
++ void *data;
++ size_t length;
++ int status;
++};
++
++struct ibp_event_work {
++ struct work_struct work;
++ struct ibp_cm_event event;
++};
++
++#define IBP_INIT_REQ(request, buf, size) \
++ do { \
++ (request)->data = (buf); \
++ (request)->length = (size); \
++ (request)->status = 0; \
++ init_completion(&(request)->done); \
++ } while (0)
++
++#define IBP_INIT_CMD(cmd, size, op, req) \
++ do { \
++ (cmd)->header.opcode = IBP_##op; \
++ (cmd)->header.length = (size); \
++ (cmd)->header.status = 0; \
++ (cmd)->header.reserved = 0; \
++ (cmd)->header.request = (uintptr_t)(req); \
++ } while (0)
++
++#endif /* CLIENT_H */
+diff --git a/drivers/infiniband/ibp/cm/client_msg.c b/drivers/infiniband/ibp/cm/client_msg.c
+new file mode 100644
+index 0000000..2c8710d
+--- /dev/null
++++ b/drivers/infiniband/ibp/cm/client_msg.c
+@@ -0,0 +1,232 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "client.h"
++
++static DEFINE_MUTEX(ibp_send_mutex);
++
++int ibp_send(scif_epd_t ep, void *buf, size_t len)
++{
++ int ret;
++
++ /*
++ ** Because user-context threads can be signaled, a mutex
++ ** and ERESTARTSYS check are required to complete atomically.
++ **/
++ mutex_lock(&ibp_send_mutex);
++ while (len) {
++ ret = scif_send(ep, buf, (uint32_t)len, SCIF_SEND_BLOCK);
++ if (ret < 0) {
++ print_dbg("scif_send returned %d\n", ret);
++ if (ret == -ERESTARTSYS) {
++ ret = 0;
++ } else {
++ mutex_unlock(&ibp_send_mutex);
++ return ret;
++ }
++ }
++ buf += ret;
++ len -= ret;
++ }
++ mutex_unlock(&ibp_send_mutex);
++
++ return 0;
++}
++
++int ibp_recv(scif_epd_t ep, void *buf, size_t len)
++{
++ struct scif_pollepd pollep;
++ int ret;
++
++ pollep.epd = ep;
++ pollep.events = POLLIN;
++
++ while (len) {
++ schedule();
++ if (kthread_should_stop())
++ return -EINTR;
++
++ ret = scif_poll(&pollep, 1, timeout);
++ if (ret == 0) /* timeout */
++ continue;
++ if (ret < 0) {
++ print_dbg("scif_poll revents 0x%x returned %d\n",
++ pollep.revents, ret);
++ return ret;
++ }
++
++ ret = scif_recv(ep, buf, (uint32_t)len, 0);
++ if (ret < 0) {
++ print_dbg("scif_recv returned %d\n", ret);
++ return ret;
++ }
++
++ buf += ret;
++ len -= ret;
++ }
++
++ return 0;
++}
++
++static int ibp_recv_bitbucket(scif_epd_t ep, size_t len)
++{
++ u8 bitbucket[64];
++ size_t bytes;
++ int ret = 0;
++
++ while (len) {
++ bytes = min(len, sizeof(bitbucket));
++ ret = ibp_recv(ep, &bitbucket, bytes);
++ if (ret)
++ break;
++ len -= bytes;
++ }
++
++ return ret;
++}
++
++static int ibp_recv_response(scif_epd_t ep, struct ibp_msg_header *hdr)
++{
++ struct ibp_request *req;
++ size_t len;
++ int ret;
++
++ req = (struct ibp_request *) hdr->request;
++ len = hdr->length - sizeof(*hdr);
++
++ ret = ibp_recv(ep, (void *)req->data, min(req->length, len));
++ if (ret)
++ return ret;
++
++ if (req->length < len) {
++ print_dbg("req->data overrun, expected %ld actual %ld\n",
++ req->length, len);
++ ret = ibp_recv_bitbucket(ep, len - req->length);
++ if (ret)
++ print_err("ibp_recv_bitbucket returned %d\n", ret);
++ req->status = -EMSGSIZE;
++ } else {
++ req->status = hdr->status;
++ }
++ req->length = len;
++
++ return ret;
++}
++
++static int ibp_response(scif_epd_t ep, struct ibp_msg_header *hdr)
++{
++ struct ibp_request *req;
++ int ret;
++
++ req = (struct ibp_request *) hdr->request;
++ if (!req)
++ return -EINVAL;
++
++ ret = ibp_recv_response(ep, hdr);
++ if (!ret)
++ complete(&req->done);
++
++ return ret;
++}
++
++static int ibp_event(scif_epd_t ep, struct ibp_msg_header *hdr)
++{
++ struct ibp_event_work *event;
++ u64 length;
++ int ret;
++
++ print_trace("in\n");
++
++ ret = ibp_recv(ep, &length, sizeof(u64));
++ if (ret)
++ goto err1;
++
++ if (length < sizeof(struct ibp_cm_event)) {
++ print_err("Invalid event size %llu\n", length);
++ ret = -EINVAL;
++ goto err1;
++ }
++
++ event = kzalloc(sizeof(struct work_struct) + length, GFP_KERNEL);
++ if (!event) {
++ print_err("kzalloc failed\n");
++ ret = -ENOMEM;
++ goto err1;
++ }
++
++ ret = ibp_recv(ep, &event->event, length);
++ if (ret)
++ goto err2;
++
++ INIT_WORK(&event->work, ibp_event_work);
++ queue_work(workqueue, &event->work);
++
++ return 0;
++err2:
++ kfree(event);
++err1:
++ return ret;
++}
++
++static int (*ibp_msg_table[])(scif_epd_t ep, struct ibp_msg_header *hdr) = {
++ [IBP_IBP_EVENT] = ibp_event,
++ [IBP_IBP_RESPONSE] = ibp_response,
++};
++
++int ibp_process_recvs(void)
++{
++ struct ibp_msg_header hdr;
++ int ret = 0;
++
++ while (!kthread_should_stop()) {
++
++ ret = ibp_recv(ep, &hdr, sizeof(hdr));
++ if (ret)
++ break;
++
++ if ((hdr.opcode < 0) ||
++ (hdr.opcode >= ARRAY_SIZE(ibp_msg_table)) ||
++ !ibp_msg_table[hdr.opcode]) {
++ print_err("Invalid command 0x%x\n", hdr.opcode);
++ ret = -EBADRQC;
++ break;
++ }
++
++ ret = ibp_msg_table[hdr.opcode](ep, &hdr);
++ if (ret)
++ break;
++
++ schedule();
++ }
++
++ return ret;
++}
+diff --git a/drivers/infiniband/ibp/cm/cm_client.h b/drivers/infiniband/ibp/cm/cm_client.h
+new file mode 100644
+index 0000000..add1780
+--- /dev/null
++++ b/drivers/infiniband/ibp/cm/cm_client.h
+@@ -0,0 +1,98 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef CM_CLIENT_H
++#define CM_CLIENT_H
++
++#include "common.h"
++#include <rdma/ib_verbs.h>
++#include "ibp-abi.h"
++#include "cm_ibp_abi.h"
++#include "core/cm_msgs.h"
++
++struct ibp_cm_entry {
++ u64 ibp_cm_id;
++ u64 device;
++ struct ib_cm_id ib_cm_id;
++ struct ibp_cm_entry *next;
++};
++
++u64 ibp_create_cm_id(struct ibp_cm_entry *entry, ib_cm_handler cm_handler,
++ void *context);
++
++void ibp_destroy_cm_id(struct ibp_cm_entry *entry);
++
++int ibp_cm_listen(struct ibp_cm_entry *entry,
++ __be64 service_id, __be64 service_mask);
++
++int ibp_send_cm_req(struct ibp_cm_entry *entry, struct ib_cm_req_param *param,
++ __be64 *service_id, __be64 * service_mask);
++
++int ibp_send_cm_rep(struct ibp_cm_entry *entry, struct ib_cm_rep_param *param);
++
++int ibp_send_cm_rtu(struct ibp_cm_entry *entry,
++ const void *private_data, u8 private_data_len);
++
++int ibp_send_cm_dreq(struct ibp_cm_entry *entry,
++ const void *private_data, u8 private_data_len);
++
++int ibp_send_cm_drep(struct ibp_cm_entry *entry,
++ const void *private_data, u8 private_data_len);
++
++int ibp_send_cm_rej(struct ibp_cm_entry *entry, enum ib_cm_rej_reason reason,
++ void *ari, u8 ari_length,
++ const void *private_data, u8 private_data_len);
++
++int ibp_send_cm_mra(struct ibp_cm_entry *entry, u8 service_timeout,
++ const void *private_data, u8 private_data_len);
++
++int ibp_send_cm_lap(struct ibp_cm_entry *entry,
++ struct ib_sa_path_rec *alternate_path,
++ const void *private_data, u8 private_data_len);
++
++int ibp_send_cm_apr(struct ibp_cm_entry *entry, enum ib_cm_apr_status status,
++ void *info, u8 info_length,
++ const void *private_data, u8 private_data_len);
++
++int ibp_send_cm_sidr_req(struct ibp_cm_entry *entry,
++ struct ib_cm_sidr_req_param *param,
++ __be64 *service_id, __be64 *service_mask);
++
++int ibp_send_cm_sidr_rep(struct ibp_cm_entry *entry,
++ struct ib_cm_sidr_rep_param *param);
++
++int ibp_cm_notify(struct ibp_cm_entry *entry, enum ib_event_type event);
++
++int ibp_cm_init_qp_attr(struct ibp_cm_entry *entry, struct ib_qp_attr *qp_attr,
++ int *qp_attr_mask);
++
++#endif /* CM_CLIENT_H */
+diff --git a/drivers/infiniband/ibp/cm/cm_client_msg.c b/drivers/infiniband/ibp/cm/cm_client_msg.c
+new file mode 100644
+index 0000000..555e49a
+--- /dev/null
++++ b/drivers/infiniband/ibp/cm/cm_client_msg.c
+@@ -0,0 +1,785 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ * * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "client.h"
++#include "cm_client.h"
++
++void ib_copy_sa_path_rec(struct ibp_sa_path_rec *a, struct ib_sa_path_rec *b)
++{
++ /*Copy ib_sa_path_rec to ibp_sa_path_rec*/
++ a->service_id = b->service_id;
++ a->dgid_prefix = b->dgid.global.subnet_prefix;
++ a->dgid_id = b->dgid.global.interface_id;
++ a->sgid_prefix = b->sgid.global.subnet_prefix;
++ a->sgid_id = b->sgid.global.interface_id;
++ a->dlid = b->dlid;
++ a->slid = b->slid;
++ a->raw_traffic = b->raw_traffic;
++ a->flow_label = b->flow_label;
++ a->hop_limit = b->hop_limit;
++ a->traffic_class = b->traffic_class;
++ a->reversible = b->reversible;
++ a->numb_path = b->numb_path;
++ a->pkey = b->pkey;
++ a->qos_class = b->qos_class;
++ a->sl = b->sl;
++ a->mtu_selector = b->mtu_selector;
++ a->mtu = b->mtu;
++ a->rate_selector = b->rate_selector;
++ a->rate = b->rate;
++ a->packet_life_time_selector = b->packet_life_time_selector;
++ a->packet_life_time = b->packet_life_time;
++ a->preference = b->preference;
++}
++
++u64 ibp_create_cm_id(struct ibp_cm_entry *entry, ib_cm_handler cm_handler,
++ void *context)
++{
++ struct ibp_create_cm_id_resp resp;
++ struct ibp_create_cm_id_cmd cmd;
++ struct ib_cm_id *ib_cm_id;
++ struct ibp_request req;
++ int ret;
++
++ print_trace("in\n");
++
++ IBP_INIT_REQ(&req, &resp, sizeof(resp));
++
++ ib_cm_id = &(entry->ib_cm_id);
++ cmd.device = entry->device;
++
++ IBP_INIT_CMD(&cmd, sizeof(cmd), CREATE_CM_ID, &req);
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret) {
++ print_err("ibp_send returned %d\n", ret);
++ return ret;
++ }
++
++ wait_for_completion(&req.done);
++
++ if (req.status) {
++ print_err("ib_create_cm_id returned %d\n", req.status);
++ return req.status;
++ }
++
++ ib_cm_id->cm_handler = cm_handler;
++ ib_cm_id->context = context;
++ ib_cm_id->service_id = resp.service_id;
++ ib_cm_id->service_mask = resp.service_mask;
++ ib_cm_id->local_id = resp.local_id;
++ ib_cm_id->remote_id = resp.remote_id;
++ ib_cm_id->remote_cm_qpn = resp.remote_cm_qpn;
++
++ return resp.ibp_cm_id;
++}
++
++void ibp_destroy_cm_id(struct ibp_cm_entry *entry)
++{
++ struct ibp_destroy_cm_id_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ print_trace("in\n");
++
++ IBP_INIT_REQ(&req, NULL, 0);
++
++ cmd.ibp_cm_id = entry->ibp_cm_id;
++
++ IBP_INIT_CMD(&cmd, sizeof(cmd), DESTROY_CM_ID, &req);
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret)
++ return;
++
++ wait_for_completion(&req.done);
++
++ return;
++}
++
++int ibp_cm_listen(struct ibp_cm_entry *entry,
++ __be64 service_id, __be64 service_mask)
++{
++ struct ibp_cm_listen_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ print_trace("in\n");
++
++ IBP_INIT_REQ(&req, NULL, 0);
++
++ cmd.ibp_cm_id = entry->ibp_cm_id;
++ cmd.service_id = service_id;
++ cmd.service_mask = service_mask;
++ cmd.null_comp_data = !NULL;
++
++ IBP_INIT_CMD(&cmd, sizeof(cmd), CM_LISTEN, &req);
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret)
++ return ret;
++
++ wait_for_completion(&req.done);
++
++ if (req.status) {
++ print_err("ib_cm_listen returned %d\n", req.status);
++ return req.status;
++ }
++
++ return 0;
++}
++
++int ibp_send_cm_req(struct ibp_cm_entry *entry, struct ib_cm_req_param *param,
++ __be64 *service_id, __be64 *service_mask)
++{
++ struct ibp_send_cm_req_cmd *cmd;
++ struct ibp_request req;
++ int cmd_size;
++ int ret;
++
++ IBP_INIT_REQ(&req, NULL, 0);
++
++ cmd_size = sizeof(struct ibp_send_cm_req_cmd) + param->private_data_len;
++
++ cmd = kzalloc(cmd_size, GFP_KERNEL);
++ if (!cmd) {
++ print_err("kzalloc failed\n");
++ return -ENOMEM;
++ }
++
++ cmd->ibp_cm_id = entry->ibp_cm_id;
++ ib_copy_sa_path_rec(&cmd->primary_path, param->primary_path);
++ if (param->alternate_path)
++ ib_copy_sa_path_rec(&cmd->alternate_path,
++ param->alternate_path);
++ cmd->service_id = param->service_id;
++ cmd->qp_num = param->qp_num;
++ cmd->qp_type = param->qp_type;
++ cmd->starting_psn = param->starting_psn;
++ cmd->peer_to_peer = param->peer_to_peer;
++ cmd->responder_resources = param->responder_resources;
++ cmd->initiator_depth = param->initiator_depth;
++ cmd->remote_cm_response_timeout = param->remote_cm_response_timeout;
++ cmd->flow_control = param->flow_control;
++ cmd->local_cm_response_timeout = param->local_cm_response_timeout;
++ cmd->retry_count = param->retry_count;
++ cmd->rnr_retry_count = param->rnr_retry_count;
++ cmd->max_cm_retries = param->max_cm_retries;
++ cmd->srq = param->srq;
++ cmd->private_data_len = param->private_data_len;
++
++ memcpy(cmd->private_data, param->private_data, param->private_data_len);
++
++ IBP_INIT_CMD(cmd, cmd_size, SEND_CM_REQ, &req);
++
++ ret = ibp_send(ep, cmd, cmd_size);
++ if (ret)
++ goto err;
++
++ wait_for_completion(&req.done);
++
++ if (req.status) {
++ print_err("ib_send_cm_req returned %d\n", req.status);
++ ret = req.status;
++ goto err;
++ }
++
++ *service_id = param->service_id;
++ *service_mask = ~cpu_to_be64(0);
++err:
++ kfree(cmd);
++ return ret;
++}
++
++int ibp_send_cm_rep(struct ibp_cm_entry *entry, struct ib_cm_rep_param *param)
++{
++ struct ibp_send_cm_rep_cmd *cmd;
++ struct ibp_request req;
++ int cmd_size;
++ int ret;
++
++ IBP_INIT_REQ(&req, NULL, 0);
++
++ cmd_size = sizeof(struct ibp_send_cm_rep_cmd) + param->private_data_len;
++
++ cmd = kzalloc(cmd_size, GFP_KERNEL);
++ if (!cmd) {
++ print_err("kzalloc failed\n");
++ return -ENOMEM;
++ }
++
++ cmd->ibp_cm_id = entry->ibp_cm_id;
++ cmd->qp_num = param->qp_num;
++ cmd->starting_psn = param->starting_psn;
++ cmd->responder_resources = param->responder_resources;
++ cmd->initiator_depth = param->initiator_depth;
++ cmd->failover_accepted = param->failover_accepted;
++ cmd->flow_control = param->flow_control;
++ cmd->rnr_retry_count = param->rnr_retry_count;
++ cmd->srq = param->srq;
++ cmd->private_data_len = param->private_data_len;
++
++ memcpy(cmd->private_data, param->private_data, param->private_data_len);
++
++ IBP_INIT_CMD(cmd, cmd_size, SEND_CM_REP, &req);
++
++ ret = ibp_send(ep, cmd, cmd_size);
++ if (ret)
++ goto err;
++
++ wait_for_completion(&req.done);
++
++ if (req.status) {
++ print_err("ib_send_cm_rep returned %d\n", req.status);
++ ret = req.status;
++ goto err;
++ }
++err:
++ kfree(cmd);
++ return ret;
++}
++
++int ibp_send_cm_rtu(struct ibp_cm_entry *entry,
++ const void *private_data, u8 private_data_len)
++{
++ struct ibp_send_cm_rtu_cmd *cmd;
++ struct ibp_request req;
++ int cmd_size;
++ int ret;
++
++ IBP_INIT_REQ(&req, NULL, 0);
++
++ cmd_size = sizeof(struct ibp_send_cm_rtu_cmd) + private_data_len;
++
++ cmd = kzalloc(cmd_size, GFP_KERNEL);
++ if (!cmd) {
++ print_err("kzalloc failed\n");
++ return -ENOMEM;
++ }
++
++ cmd->ibp_cm_id = entry->ibp_cm_id;
++ cmd->private_data_len = private_data_len;
++
++ memcpy(cmd->private_data, private_data, private_data_len);
++
++ IBP_INIT_CMD(cmd, cmd_size, SEND_CM_RTU, &req);
++
++ ret = ibp_send(ep, cmd, cmd_size);
++ if (ret)
++ goto err;
++
++ wait_for_completion(&req.done);
++
++ if (req.status) {
++ print_err("ib_send_cm_rtu returned %d\n", req.status);
++ ret = req.status;
++ goto err;
++ }
++err:
++ kfree(cmd);
++ return ret;
++}
++
++int ibp_send_cm_dreq(struct ibp_cm_entry *entry,
++ const void *private_data, u8 private_data_len)
++{
++ struct ibp_send_cm_dreq_cmd *cmd;
++ struct ibp_request req;
++ int cmd_size;
++ int ret;
++
++ IBP_INIT_REQ(&req, NULL, 0);
++
++ cmd_size = sizeof(struct ibp_send_cm_dreq_cmd) + private_data_len;
++
++ cmd = kzalloc(cmd_size, GFP_KERNEL);
++ if (!cmd) {
++ print_err("kzalloc failed\n");
++ return -ENOMEM;
++ }
++
++ cmd->ibp_cm_id = entry->ibp_cm_id;
++ cmd->private_data_len = private_data_len;
++
++ memcpy(cmd->private_data, private_data, private_data_len);
++
++ IBP_INIT_CMD(cmd, cmd_size, SEND_CM_DREQ, &req);
++
++ ret = ibp_send(ep, cmd, cmd_size);
++ if (ret)
++ goto err;
++
++ wait_for_completion(&req.done);
++
++ if (req.status) {
++ print_dbg("ib_send_cm_dreq returned %d\n", req.status);
++ ret = req.status;
++ }
++err:
++ kfree(cmd);
++ return ret;
++}
++
++int ibp_send_cm_drep(struct ibp_cm_entry *entry,
++ const void *private_data, u8 private_data_len)
++{
++ struct ibp_send_cm_drep_cmd *cmd;
++ struct ibp_request req;
++ int cmd_size;
++ int ret;
++
++ IBP_INIT_REQ(&req, NULL, 0);
++
++ cmd_size = sizeof(struct ibp_send_cm_drep_cmd) + private_data_len;
++
++ cmd = kzalloc(cmd_size, GFP_KERNEL);
++ if (!cmd) {
++ print_err("kzalloc failed\n");
++ return -ENOMEM;
++ }
++
++ cmd->ibp_cm_id = entry->ibp_cm_id;
++ cmd->private_data_len = private_data_len;
++
++ memcpy(cmd->private_data, private_data, private_data_len);
++
++ IBP_INIT_CMD(cmd, cmd_size, SEND_CM_DREP, &req);
++
++ ret = ibp_send(ep, cmd, cmd_size);
++ if (ret)
++ goto err;
++
++ wait_for_completion(&req.done);
++
++ if (req.status) {
++ print_dbg("ib_send_cm_drep returned %d\n", req.status);
++ ret = req.status;
++ goto err;
++ }
++err:
++ kfree(cmd);
++ return ret;
++}
++
++int ibp_send_cm_rej(struct ibp_cm_entry *entry, enum ib_cm_rej_reason reason,
++ void *ari, u8 ari_length,
++ const void *private_data, u8 private_data_len)
++{
++ struct ibp_send_cm_rej_cmd *cmd;
++ struct ibp_request req;
++ int cmd_size;
++ int ret;
++
++ IBP_INIT_REQ(&req, NULL, 0);
++
++ cmd_size = sizeof(struct ibp_send_cm_rej_cmd) +
++ private_data_len + ari_length;
++
++ cmd = kzalloc(cmd_size, GFP_KERNEL);
++ if (!cmd) {
++ print_err("kzalloc failed\n");
++ return -ENOMEM;
++ }
++
++ cmd->ibp_cm_id = entry->ibp_cm_id;
++ cmd->reason = reason;
++ cmd->private_data_len = private_data_len;
++ cmd->ari_length = ari_length;
++
++ memcpy(cmd->data, private_data, private_data_len);
++ memcpy(cmd->data+private_data_len, ari, ari_length);
++
++ IBP_INIT_CMD(cmd, cmd_size, SEND_CM_REJ, &req);
++
++ ret = ibp_send(ep, cmd, cmd_size);
++ if (ret)
++ goto err;
++
++ wait_for_completion(&req.done);
++
++ if (req.status) {
++ print_err("ib_send_cm_rej returned %d\n", req.status);
++ ret = req.status;
++ }
++err:
++ kfree(cmd);
++ return ret;
++}
++
++int ibp_send_cm_mra(struct ibp_cm_entry *entry, u8 service_timeout,
++ const void *private_data, u8 private_data_len)
++{
++ struct ibp_send_cm_mra_cmd *cmd;
++ struct ibp_request req;
++ int cmd_size;
++ int ret;
++
++ IBP_INIT_REQ(&req, NULL, 0);
++
++ cmd_size = sizeof(struct ibp_send_cm_mra_cmd) + private_data_len;
++
++ cmd = kzalloc(cmd_size, GFP_KERNEL);
++ if (!cmd) {
++ print_err("kzalloc failed\n");
++ return -ENOMEM;
++ }
++
++ cmd->ibp_cm_id = entry->ibp_cm_id;
++ cmd->service_timeout = service_timeout;
++ cmd->private_data_len = private_data_len;
++
++ memcpy(cmd->private_data, private_data, private_data_len);
++
++ IBP_INIT_CMD(cmd, cmd_size, SEND_CM_MRA, &req);
++
++ ret = ibp_send(ep, cmd, cmd_size);
++ if (ret)
++ goto err;
++
++ wait_for_completion(&req.done);
++
++ if (req.status) {
++ print_err("ib_send_cm_mra returned %d\n", req.status);
++ ret = req.status;
++ goto err;
++ }
++err:
++ kfree(cmd);
++ return ret;
++}
++
++int ibp_send_cm_lap(struct ibp_cm_entry *entry,
++ struct ib_sa_path_rec *alternate_path,
++ const void *private_data, u8 private_data_len)
++{
++ struct ibp_send_cm_lap_cmd *cmd;
++ struct ibp_request req;
++ int cmd_size;
++ int ret;
++
++ IBP_INIT_REQ(&req, NULL, 0);
++
++ cmd_size = sizeof(struct ibp_send_cm_lap_cmd) + private_data_len;
++
++ cmd = kzalloc(cmd_size, GFP_KERNEL);
++ if (!cmd) {
++ print_err("kzalloc failed\n");
++ return -ENOMEM;
++ }
++
++ cmd->ibp_cm_id = entry->ibp_cm_id;
++ if (alternate_path)
++ ib_copy_sa_path_rec(&cmd->alternate_path, alternate_path);
++ cmd->private_data_len = private_data_len;
++
++ memcpy(cmd->private_data, private_data, private_data_len);
++
++ IBP_INIT_CMD(cmd, cmd_size, SEND_CM_LAP, &req);
++
++ ret = ibp_send(ep, cmd, cmd_size);
++ if (ret)
++ goto err;
++
++ wait_for_completion(&req.done);
++
++ if (req.status) {
++ print_err("ib_send_cm_lap returned %d\n", req.status);
++ ret = req.status;
++ goto err;
++ }
++err:
++ kfree(cmd);
++ return ret;
++}
++
++int ibp_send_cm_apr(struct ibp_cm_entry *entry, enum ib_cm_apr_status status,
++ void *info, u8 info_length,
++ const void *private_data, u8 private_data_len)
++{
++ struct ibp_send_cm_apr_cmd *cmd;
++ struct ibp_request req;
++ int cmd_size;
++ int ret;
++
++ IBP_INIT_REQ(&req, NULL, 0);
++
++ cmd_size = sizeof(struct ibp_send_cm_apr_cmd) +
++ private_data_len +
++ info_length;
++
++ cmd = kzalloc(cmd_size, GFP_KERNEL);
++ if (!cmd) {
++ print_err("kzalloc failed\n");
++ return -ENOMEM;
++ }
++
++ cmd->ibp_cm_id = entry->ibp_cm_id;
++ cmd->status = status;
++ cmd->private_data_len = private_data_len;
++ cmd->info_length = info_length;
++
++ memcpy(cmd->data, private_data, private_data_len);
++ memcpy(cmd->data+private_data_len, info, info_length);
++
++ IBP_INIT_CMD(cmd, cmd_size, SEND_CM_APR, &req);
++
++ ret = ibp_send(ep, cmd, cmd_size);
++ if (ret)
++ goto err;
++
++ wait_for_completion(&req.done);
++
++ if (req.status) {
++ print_err("ib_send_cm_apr returned %d\n", req.status);
++ ret = req.status;
++ goto err;
++ }
++err:
++ kfree(cmd);
++ return ret;
++}
++
++int ibp_send_cm_sidr_req(struct ibp_cm_entry *entry,
++ struct ib_cm_sidr_req_param *param,
++ __be64 *service_id, __be64 *service_mask)
++{
++ struct ibp_send_cm_sidr_req_cmd *cmd;
++ struct ibp_request req;
++ int cmd_size;
++ int ret;
++
++ IBP_INIT_REQ(&req, NULL, 0);
++
++ cmd_size = sizeof(struct ibp_send_cm_sidr_req_cmd) +
++ param->private_data_len;
++
++ cmd = kzalloc(cmd_size, GFP_KERNEL);
++ if (!cmd) {
++ print_err("kzalloc failed\n");
++ return -ENOMEM;
++ }
++
++ cmd->ibp_cm_id = entry->ibp_cm_id;
++ ib_copy_sa_path_rec(&cmd->path, param->path);
++ cmd->service_id = param->service_id;
++ cmd->timeout_ms = param->timeout_ms;
++ cmd->max_cm_retries = param->max_cm_retries;
++ cmd->private_data_len = param->private_data_len;
++
++ memcpy(cmd->private_data, param->private_data, param->private_data_len);
++
++ IBP_INIT_CMD(cmd, cmd_size, SEND_CM_SIDR_REQ, &req);
++
++ ret = ibp_send(ep, cmd, cmd_size);
++ if (ret)
++ goto err;
++
++ wait_for_completion(&req.done);
++
++ if (req.status) {
++ print_err("ib_send_cm_sidr_req returned %d\n", req.status);
++ ret = req.status;
++ goto err;
++ }
++
++ *service_id = param->service_id;
++ *service_mask = ~cpu_to_be64(0);
++err:
++ kfree(cmd);
++ return ret;
++}
++
++int ibp_send_cm_sidr_rep(struct ibp_cm_entry *entry,
++ struct ib_cm_sidr_rep_param *param)
++{
++ struct ibp_send_cm_sidr_rep_cmd *cmd;
++ struct ibp_request req;
++ int cmd_size;
++ int ret;
++
++ IBP_INIT_REQ(&req, NULL, 0);
++
++ cmd_size = sizeof(struct ibp_send_cm_sidr_rep_cmd) +
++ param->private_data_len +
++ param->info_length;
++
++ cmd = kzalloc(cmd_size, GFP_KERNEL);
++ if (!cmd) {
++ print_err("kzalloc failed\n");
++ return -ENOMEM;
++ }
++
++ cmd->ibp_cm_id = entry->ibp_cm_id;
++ cmd->qp_num = param->qp_num;
++ cmd->qkey = param->qkey;
++ cmd->status = param->status;
++ cmd->private_data_len = param->private_data_len;
++ cmd->info_length = param->info_length;
++
++ memcpy(cmd->data, param->private_data, param->private_data_len);
++ memcpy(cmd->data+param->private_data_len,
++ param->info, param->info_length);
++
++ IBP_INIT_CMD(cmd, cmd_size,
++ SEND_CM_SIDR_REP, &req);
++
++ ret = ibp_send(ep, cmd, cmd_size);
++ if (ret)
++ goto err;
++
++ wait_for_completion(&req.done);
++
++ if (req.status) {
++ print_err("ib_send_cm_sidr_rep returned %d\n", req.status);
++ ret = req.status;
++ goto err;
++ }
++err:
++ kfree(cmd);
++ return ret;
++}
++
++int ibp_cm_notify(struct ibp_cm_entry *entry, enum ib_event_type event)
++{
++ struct ibp_cm_notify_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ IBP_INIT_REQ(&req, NULL, 0);
++
++ cmd.ibp_cm_id = entry->ibp_cm_id;
++ cmd.event = event;
++
++ IBP_INIT_CMD(&cmd, sizeof(cmd), CM_NOTIFY, &req);
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret)
++ return ret;
++
++ wait_for_completion(&req.done);
++
++ if (req.status)
++ print_err("ib_cm_notify returned %d\n", req.status);
++
++ return req.status;
++}
++
++int ibp_cm_init_qp_attr(struct ibp_cm_entry *entry, struct ib_qp_attr *qp_attr,
++ int *qp_attr_mask)
++{
++ struct ibp_cm_init_qp_attr_resp resp;
++ struct ibp_cm_init_qp_attr_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ IBP_INIT_REQ(&req, &resp, sizeof(resp));
++
++ cmd.ibp_cm_id = entry->ibp_cm_id;
++
++ cmd.qp_attr_state = qp_attr->qp_state;
++
++ IBP_INIT_CMD(&cmd, sizeof(cmd), CM_INIT_QP_ATTR, &req);
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret)
++ return ret;
++
++ wait_for_completion(&req.done);
++
++ if (req.status) {
++ print_err("ib_cm_init_qp_attr returned %d\n", req.status);
++ return req.status;
++ }
++
++ *qp_attr_mask = resp.qp_attr_mask;
++ qp_attr->qp_access_flags = resp.qp_access_flags;
++ qp_attr->qp_state = resp.qp_state;
++ qp_attr->cur_qp_state = resp.cur_qp_state;
++ qp_attr->path_mtu = resp.path_mtu;
++ qp_attr->path_mig_state = resp.path_mig_state;
++ qp_attr->qkey = resp.qkey;
++ qp_attr->rq_psn = resp.rq_psn;
++ qp_attr->sq_psn = resp.sq_psn;
++ qp_attr->dest_qp_num = resp.dest_qp_num;
++
++ qp_attr->cap.max_send_wr = resp.cap_max_send_wr;
++ qp_attr->cap.max_recv_wr = resp.cap_max_recv_wr;
++ qp_attr->cap.max_send_sge = resp.cap_max_send_sge;
++ qp_attr->cap.max_recv_sge = resp.cap_max_recv_sge;
++ qp_attr->cap.max_inline_data = resp.cap_max_inline_data;
++
++ qp_attr->ah_attr.grh.dgid.global.subnet_prefix =
++ resp.ah_attr_grh_dgid_subnet_prefix;
++ qp_attr->ah_attr.grh.dgid.global.interface_id =
++ resp.ah_attr_grh_dgid_interface_id;
++ qp_attr->ah_attr.grh.flow_label = resp.ah_attr_grh_flow_label;
++ qp_attr->ah_attr.grh.sgid_index = resp.ah_attr_grh_sgid_index;
++ qp_attr->ah_attr.grh.hop_limit = resp.ah_attr_grh_hop_limit;
++ qp_attr->ah_attr.grh.traffic_class = resp.ah_attr_grh_traffic_class;
++ qp_attr->ah_attr.dlid = resp.ah_attr_dlid;
++ qp_attr->ah_attr.sl = resp.ah_attr_sl;
++ qp_attr->ah_attr.src_path_bits = resp.ah_attr_src_path_bits;
++ qp_attr->ah_attr.static_rate = resp.ah_attr_static_rate;
++ qp_attr->ah_attr.ah_flags = resp.ah_attr_ah_flags;
++ qp_attr->ah_attr.port_num = resp.ah_attr_port_num;
++
++ qp_attr->alt_ah_attr.grh.dgid.global.subnet_prefix =
++ resp.alt_attr_grh_dgid_subnet_prefix;
++ qp_attr->alt_ah_attr.grh.dgid.global.interface_id =
++ resp.alt_attr_grh_dgid_interface_id;
++ qp_attr->alt_ah_attr.grh.flow_label = resp.alt_attr_grh_flow_label;
++ qp_attr->alt_ah_attr.grh.sgid_index = resp.alt_attr_grh_sgid_index;
++ qp_attr->alt_ah_attr.grh.hop_limit = resp.alt_attr_grh_hop_limit;
++ qp_attr->alt_ah_attr.grh.traffic_class
++ = resp.alt_attr_grh_traffic_class;
++ qp_attr->alt_ah_attr.dlid = resp.alt_attr_dlid;
++ qp_attr->alt_ah_attr.sl = resp.alt_attr_sl;
++ qp_attr->alt_ah_attr.src_path_bits = resp.alt_attr_src_path_bits;
++ qp_attr->alt_ah_attr.static_rate = resp.alt_attr_static_rate;
++ qp_attr->alt_ah_attr.ah_flags = resp.alt_attr_ah_flags;
++ qp_attr->alt_ah_attr.port_num = resp.alt_attr_port_num;
++
++ qp_attr->pkey_index = resp.pkey_index;
++ qp_attr->alt_pkey_index = resp.alt_pkey_index;
++ qp_attr->en_sqd_async_notify = resp.en_sqd_async_notify;
++ qp_attr->sq_draining = resp.sq_draining;
++ qp_attr->max_rd_atomic = resp.max_rd_atomic;
++ qp_attr->max_dest_rd_atomic = resp.max_dest_rd_atomic;
++ qp_attr->min_rnr_timer = resp.min_rnr_timer;
++ qp_attr->port_num = resp.port_num;
++ qp_attr->timeout = resp.timeout;
++ qp_attr->retry_cnt = resp.retry_cnt;
++ qp_attr->rnr_retry = resp.rnr_retry;
++ qp_attr->alt_port_num = resp.alt_port_num;
++ qp_attr->alt_timeout = resp.alt_timeout;
++
++ return 0;
++}
+diff --git a/drivers/infiniband/ibp/cm/cm_ibp_abi.h b/drivers/infiniband/ibp/cm/cm_ibp_abi.h
+new file mode 100644
+index 0000000..650e159
+--- /dev/null
++++ b/drivers/infiniband/ibp/cm/cm_ibp_abi.h
+@@ -0,0 +1,405 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef CM_IBP_ABI_H
++#define CM_IBP_ABI_H
++
++#include <linux/types.h>
++#include <rdma/ib_verbs.h>
++#include <rdma/ib_cm.h>
++
++/* Increment this value if any changes break compatibility. */
++#define IBP_CM_ABI_VERSION 1
++
++/*
++ * Make sure that all structs defined in this file are laid out to pack
++ * the same way on different architectures to avoid incompatibility.
++ *
++ * Specifically:
++ * - Do not use pointer types -- pass pointers in a u64 instead.
++ * - Make sure that any structure larger than 4 bytes is padded
++ * to a multiple of 8 bytes; otherwise the structure size may
++ * be different between architectures.
++ */
++
++struct ibp_event_msg {
++ struct ibp_msg_header header;
++ u64 length;
++ u8 event[0];
++};
++
++
++struct ibp_sa_path_rec {
++ __be64 service_id;
++ u64 dgid_prefix;
++ u64 dgid_id;
++ u64 sgid_prefix;
++ u64 sgid_id;
++ __be16 dlid;
++ __be16 slid;
++ u32 raw_traffic;
++ __be32 flow_label;
++ u8 hop_limit;
++ u8 traffic_class;
++ u32 reversible;
++ u8 numb_path;
++ __be16 pkey;
++ __be16 qos_class;
++ u8 sl;
++ u8 mtu_selector;
++ u8 mtu;
++ u8 rate_selector;
++ u8 rate;
++ u8 packet_life_time_selector;
++ u8 packet_life_time;
++ u8 preference;
++};
++
++struct ibp_create_cm_id_cmd {
++ struct ibp_msg_header header;
++ u64 device;
++};
++
++struct ibp_create_cm_id_resp {
++ u64 ibp_cm_id;
++ __be64 service_id;
++ __be64 service_mask;
++ __be32 local_id;
++ __be32 remote_id;
++ u32 remote_cm_qpn;
++ u32 filler;
++};
++
++struct ibp_destroy_cm_id_cmd {
++ struct ibp_msg_header header;
++ u64 ibp_cm_id;
++};
++
++struct ibp_cm_compare_data {
++# define IBP_CM_COMPARE_SIZE (64 / sizeof(u32))
++ u32 data[IBP_CM_COMPARE_SIZE];
++ u32 mask[IBP_CM_COMPARE_SIZE];
++};
++
++struct ibp_cm_listen_cmd {
++ struct ibp_msg_header header;
++ u64 ibp_cm_id;
++ __be64 service_id;
++ __be64 service_mask;
++ u64 null_comp_data;
++ struct ibp_cm_compare_data compare_data; // backward compatibility
++};
++
++struct ibp_send_cm_req_cmd {
++ struct ibp_msg_header header;
++ u64 ibp_cm_id;
++ struct ibp_sa_path_rec primary_path;
++ struct ibp_sa_path_rec alternate_path;
++ __be64 service_id;
++ u32 qp_num;
++ enum ib_qp_type qp_type;
++ u32 starting_psn;
++ u8 peer_to_peer;
++ u8 responder_resources;
++ u8 initiator_depth;
++ u8 remote_cm_response_timeout;
++ u8 flow_control;
++ u8 local_cm_response_timeout;
++ u8 retry_count;
++ u8 rnr_retry_count;
++ u8 max_cm_retries;
++ u8 srq;
++ u8 private_data_len;
++ char private_data[0];
++};
++
++struct ibp_send_cm_rep_cmd {
++ struct ibp_msg_header header;
++ u64 ibp_cm_id;
++ u32 qp_num;
++ u32 starting_psn;
++ u8 responder_resources;
++ u8 initiator_depth;
++ u8 failover_accepted;
++ u8 flow_control;
++ u8 rnr_retry_count;
++ u8 srq;
++ u8 private_data_len;
++ char private_data[0];
++};
++
++struct ibp_send_cm_rtu_cmd {
++ struct ibp_msg_header header;
++ u64 ibp_cm_id;
++ u8 private_data_len;
++ char private_data[0];
++};
++
++struct ibp_send_cm_dreq_cmd {
++ struct ibp_msg_header header;
++ u64 ibp_cm_id;
++ u8 private_data_len;
++ char private_data[0];
++};
++
++struct ibp_send_cm_drep_cmd {
++ struct ibp_msg_header header;
++ u64 ibp_cm_id;
++ u8 private_data_len;
++ char private_data[0];
++};
++
++struct ibp_send_cm_rej_cmd {
++ struct ibp_msg_header header;
++ u64 ibp_cm_id;
++ u64 reason;
++ u8 private_data_len;
++ u8 ari_length;
++ char data[0];
++};
++
++struct ibp_send_cm_mra_cmd {
++ struct ibp_msg_header header;
++ u64 ibp_cm_id;
++ u8 service_timeout;
++ u8 private_data_len;
++ char private_data[0];
++};
++
++struct ibp_send_cm_lap_cmd {
++ struct ibp_msg_header header;
++ u64 ibp_cm_id;
++ struct ibp_sa_path_rec alternate_path;
++ u8 private_data_len;
++ char private_data[0];
++};
++
++struct ibp_send_cm_apr_cmd {
++ struct ibp_msg_header header;
++ u64 ibp_cm_id;
++ u64 status;
++ u8 private_data_len;
++ u8 info_length;
++ char data[0];
++};
++
++struct ibp_send_cm_sidr_req_cmd {
++ struct ibp_msg_header header;
++ u64 ibp_cm_id;
++ struct ibp_sa_path_rec path;
++ __be64 service_id;
++ int timeout_ms;
++ u8 max_cm_retries;
++ u8 private_data_len;
++ char private_data[0];
++};
++
++struct ibp_send_cm_sidr_rep_cmd {
++ struct ibp_msg_header header;
++ u64 ibp_cm_id;
++ u32 qp_num;
++ u32 qkey;
++ u64 status;
++ u8 info_length;
++ u8 private_data_len;
++ char data[0];
++};
++
++struct ibp_cm_notify_cmd {
++ struct ibp_msg_header header;
++ u64 ibp_cm_id;
++ u64 event;
++};
++
++struct ibp_cm_init_qp_attr_cmd {
++ struct ibp_msg_header header;
++ u64 ibp_cm_id;
++ u64 qp_attr_state;
++};
++
++struct ibp_cm_init_qp_attr_resp {
++ u64 qp_attr_mask;
++ u64 qp_access_flags;
++ u64 qp_state;
++ u64 cur_qp_state;
++ u64 path_mtu;
++ u64 path_mig_state;
++ u32 qkey;
++ u32 rq_psn;
++ u32 sq_psn;
++ u64 dest_qp_num;
++
++ u32 cap_max_send_wr;
++ u32 cap_max_recv_wr;
++ u32 cap_max_send_sge;
++ u32 cap_max_recv_sge;
++ u32 cap_max_inline_data;
++
++ u64 ah_attr_grh_dgid_subnet_prefix;
++ u64 ah_attr_grh_dgid_interface_id;
++ u32 ah_attr_grh_flow_label;
++ u8 ah_attr_grh_sgid_index;
++ u8 ah_attr_grh_hop_limit;
++ u8 ah_attr_grh_traffic_class;
++ u16 ah_attr_dlid;
++ u8 ah_attr_sl;
++ u8 ah_attr_src_path_bits;
++ u8 ah_attr_static_rate;
++ u8 ah_attr_ah_flags;
++ u8 ah_attr_port_num;
++
++ u64 alt_attr_grh_dgid_subnet_prefix;
++ u64 alt_attr_grh_dgid_interface_id;
++ u32 alt_attr_grh_flow_label;
++ u8 alt_attr_grh_sgid_index;
++ u8 alt_attr_grh_hop_limit;
++ u8 alt_attr_grh_traffic_class;
++ u16 alt_attr_dlid;
++ u8 alt_attr_sl;
++ u8 alt_attr_src_path_bits;
++ u8 alt_attr_static_rate;
++ u8 alt_attr_ah_flags;
++ u8 alt_attr_port_num;
++
++ u16 pkey_index;
++ u16 alt_pkey_index;
++ u8 en_sqd_async_notify;
++ u8 sq_draining;
++ u8 max_rd_atomic;
++ u8 max_dest_rd_atomic;
++ u8 min_rnr_timer;
++ u8 port_num;
++ u8 timeout;
++ u8 retry_cnt;
++ u8 rnr_retry;
++ u8 alt_port_num;
++ u8 alt_timeout;
++
++};
++
++struct ibp_cm_req_event_resp {
++ struct ibp_sa_path_rec primary_path;
++ struct ibp_sa_path_rec alternate_path;
++ u64 listen_id;
++ __be64 remote_ca_guid;
++ __u32 remote_qkey;
++ __u32 remote_qpn;
++ __u32 qp_type;
++ __u32 starting_psn;
++ __u8 responder_resources;
++ __u8 initiator_depth;
++ __u8 local_cm_response_timeout;
++ __u8 flow_control;
++ __u8 remote_cm_response_timeout;
++ __u8 retry_count;
++ __u8 rnr_retry_count;
++ __u8 srq;
++ __u8 port;
++ __u8 reserved[7];
++};
++
++struct ibp_cm_rep_event_resp {
++ __be64 remote_ca_guid;
++ __u32 remote_qkey;
++ __u32 remote_qpn;
++ __u32 starting_psn;
++ __u8 responder_resources;
++ __u8 initiator_depth;
++ __u8 target_ack_delay;
++ __u8 failover_accepted;
++ __u8 flow_control;
++ __u8 rnr_retry_count;
++ __u8 srq;
++ __u8 reserved[5];
++};
++
++struct ibp_cm_rej_event_resp {
++ __u32 reason;
++};
++
++struct ibp_cm_mra_event_resp {
++ __u8 timeout;
++ __u8 reserved[3];
++};
++
++struct ibp_cm_lap_event_resp {
++ struct ibp_sa_path_rec path;
++};
++
++struct ibp_cm_rtu_event_resp {
++ __u32 status;
++ __be32 local_id;
++ __be32 remote_id;
++};
++
++struct ibp_cm_apr_event_resp {
++ __u32 status;
++};
++
++struct ibp_cm_sidr_req_event_resp {
++ u64 listen_id;
++ __u16 pkey;
++ __u8 port;
++ __u8 reserved;
++};
++
++struct ibp_cm_sidr_rep_event_resp {
++ __u32 status;
++ __u32 qkey;
++ __u32 qpn;
++};
++
++struct ibp_cm_event {
++ enum ib_cm_event_type event_type;
++ union {
++ struct ibp_cm_req_event_resp req_resp;
++ struct ibp_cm_rep_event_resp rep_resp;
++ struct ibp_cm_rej_event_resp rej_resp;
++ struct ibp_cm_rtu_event_resp rtu_resp;
++ struct ibp_cm_mra_event_resp mra_resp;
++ struct ibp_cm_lap_event_resp lap_resp;
++ struct ibp_cm_apr_event_resp apr_resp;
++ struct ibp_cm_sidr_req_event_resp sidr_req_resp;
++ struct ibp_cm_sidr_rep_event_resp sidr_rep_resp;
++
++ __u32 send_status;
++ } u;
++
++ u64 event_cm_id;
++ u64 ibp_cm_id;
++ u64 data_length;
++ u64 info_length;
++
++ u8 data[0];
++};
++
++#endif /* CM_IBP_ABI_H */
+diff --git a/drivers/infiniband/ibp/cm/cm_proxy.c b/drivers/infiniband/ibp/cm/cm_proxy.c
+new file mode 100644
+index 0000000..f08608e
+--- /dev/null
++++ b/drivers/infiniband/ibp/cm/cm_proxy.c
+@@ -0,0 +1,752 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "client.h"
++#include "cm_client.h"
++#include "ibp_exports.h"
++
++static struct ibp_cm_entry *gbl_list;
++struct rw_semaphore list_rwsem;
++
++/* Translate from client side CM ID to "true" CM ID on the host */
++static struct ibp_cm_entry *find_cm_id(void *context)
++{
++ struct ibp_cm_entry *entry;
++
++ down_read(&list_rwsem);
++
++ for (entry = gbl_list; entry; entry = entry->next)
++ if (entry == context)
++ goto out;
++
++ print_dbg("Could not find cm_entry %p\n", context);
++out:
++ up_read(&list_rwsem);
++
++ return entry;
++}
++
++/* Translate from server side "true" CM ID to proxied CM ID on the client */
++static struct ibp_cm_entry *find_proxied_cm_id(u64 ibp_cm_id)
++{
++ struct ibp_cm_entry *entry;
++
++ down_read(&list_rwsem);
++
++ for (entry = gbl_list; entry; entry = entry->next)
++ if (entry->ibp_cm_id == ibp_cm_id)
++ goto out;
++
++ print_dbg("Could not find proxied id %llx\n", ibp_cm_id);
++out:
++ up_read(&list_rwsem);
++
++ return entry;
++}
++
++struct ibp_cm_entry *create_event_cm_id(struct ibp_cm_event *event)
++{
++ struct ibp_cm_entry *old_entry, *new_entry;
++
++ print_trace("in\n");
++
++ old_entry = find_proxied_cm_id(event->ibp_cm_id);
++ if (!old_entry)
++ return NULL;
++
++ new_entry = kzalloc(sizeof(struct ibp_cm_entry), GFP_KERNEL);
++ if (!new_entry) {
++ print_err("kzalloc failed\n");
++ return NULL;
++ }
++
++ memcpy(new_entry, old_entry, sizeof(struct ibp_cm_entry));
++
++ new_entry->ib_cm_id.service_mask = ~cpu_to_be64(0);
++ new_entry->ib_cm_id.remote_cm_qpn = event->u.req_resp.remote_qpn;
++
++ new_entry->ibp_cm_id = event->event_cm_id;
++
++ down_write(&list_rwsem);
++ new_entry->next = gbl_list;
++ gbl_list = new_entry;
++ up_write(&list_rwsem);
++
++ return new_entry;
++}
++
++static int copy_cm_req_parm(struct ib_cm_req_event_param *parm,
++ struct ibp_cm_req_event_resp *resp)
++{
++ struct ibp_cm_entry *entry;
++
++ print_trace("in\n");
++
++ parm->primary_path = kmemdup(&resp->primary_path,
++ sizeof(struct ib_sa_path_rec),
++ GFP_KERNEL);
++ if (!parm->primary_path) {
++ print_err("kmemdup failed\n");
++ parm->primary_path = NULL;
++ return -ENOMEM;
++ }
++
++ if (resp->alternate_path.pkey) {
++ parm->alternate_path = kmemdup(&resp->alternate_path,
++ sizeof(struct ib_sa_path_rec),
++ GFP_KERNEL);
++ if (!parm->alternate_path) {
++ print_err("kmemdup failed\n");
++ kfree(parm->primary_path);
++ parm->alternate_path = NULL;
++ return -ENOMEM;
++ }
++ }
++
++ entry = find_proxied_cm_id(resp->listen_id);
++ if (!entry) {
++ kfree(parm->primary_path);
++ kfree(parm->alternate_path);
++ return -EINVAL;
++ }
++
++ parm->listen_id = &entry->ib_cm_id;
++
++ parm->remote_ca_guid = resp->remote_ca_guid;
++ parm->remote_qkey = resp->remote_qkey;
++ parm->remote_qpn = resp->remote_qpn;
++ parm->qp_type = resp->qp_type;
++ parm->starting_psn = resp->starting_psn;
++ parm->responder_resources = resp->responder_resources;
++ parm->initiator_depth = resp->initiator_depth;
++ parm->local_cm_response_timeout = resp->local_cm_response_timeout;
++ parm->flow_control = resp->flow_control;
++ parm->remote_cm_response_timeout
++ = resp->remote_cm_response_timeout;
++ parm->retry_count = resp->retry_count;
++ parm->rnr_retry_count = resp->rnr_retry_count;
++ parm->srq = resp->srq;
++ parm->port = resp->port;
++
++ return 0;
++}
++
++static void copy_cm_rep_parm(struct ib_cm_rep_event_param *parm,
++ struct ibp_cm_rep_event_resp *resp)
++{
++ print_trace("in\n");
++
++ parm->remote_ca_guid = resp->remote_ca_guid;
++ parm->remote_qkey = resp->remote_qkey;
++ parm->remote_qpn = resp->remote_qpn;
++ parm->starting_psn = resp->starting_psn;
++ parm->responder_resources = resp->responder_resources;
++ parm->initiator_depth = resp->initiator_depth;
++ parm->target_ack_delay = resp->target_ack_delay;
++ parm->failover_accepted = resp->failover_accepted;
++ parm->flow_control = resp->flow_control;
++ parm->rnr_retry_count = resp->rnr_retry_count;
++ parm->srq = resp->srq;
++}
++
++static int copy_cm_sidr_req_parm(struct ib_cm_sidr_req_event_param *parm,
++ struct ibp_cm_sidr_req_event_resp *resp)
++{
++ struct ibp_cm_entry *entry;
++
++ print_trace("in\n");
++
++ entry = find_proxied_cm_id(resp->listen_id);
++ if (!entry)
++ return -EINVAL;
++
++ parm->listen_id = &entry->ib_cm_id;
++ parm->pkey = resp->pkey;
++ parm->port = resp->port;
++
++ return 0;
++}
++
++void ibp_event_work(struct work_struct *work)
++{
++ struct ibp_event_work *cm_event_work;
++ struct ibp_cm_event *pevent;
++ struct ib_cm_event *event;
++ struct ibp_cm_entry *entry;
++ struct ib_cm_id *ib_cm_id;
++ void **info = NULL;
++ void *data;
++ int len;
++ int ret;
++
++ print_trace("in\n");
++
++ cm_event_work = container_of(work, struct ibp_event_work, work);
++ pevent = &cm_event_work->event;
++
++ if (pevent->event_type == IB_CM_REQ_RECEIVED)
++ entry = create_event_cm_id(pevent);
++ else
++ entry = find_proxied_cm_id(pevent->event_cm_id);
++
++ if (!entry) {
++ print_err("Could not find appropriate proxied cm_id\n");
++ goto err1;
++ }
++
++ ib_cm_id = &(entry->ib_cm_id);
++
++ if (!ib_cm_id->cm_handler) {
++ print_err("cm_id has not handler\n");
++ goto err1;
++ }
++
++ event = kzalloc(sizeof(struct ib_cm_event), GFP_KERNEL);
++ if (!event) {
++ print_err("kzalloc failed\n");
++ goto err1;
++ }
++
++ len = pevent->data_length;
++ if (len) {
++ event->private_data = kmemdup(pevent->data, len,
++ GFP_KERNEL);
++ if (!event->private_data) {
++ print_err("kmemdup failed\n");
++ goto err2;
++ }
++ }
++
++ len = pevent->info_length;
++
++ event->event = pevent->event_type;
++
++ switch (pevent->event_type) {
++ case IB_CM_REQ_RECEIVED:
++ print_dbg("IB_CM_REQ_RECEIVED (%d)\n", pevent->event_type);
++ if (copy_cm_req_parm(&event->param.req_rcvd,
++ &pevent->u.req_resp)) {
++ print_err("Error copying cm_req_parm\n");
++ goto err3;
++ }
++ break;
++
++ case IB_CM_REP_RECEIVED:
++ print_dbg("IB_CM_REP_RECEIVED (%d)\n", pevent->event_type);
++ copy_cm_rep_parm(&event->param.rep_rcvd, &pevent->u.rep_resp);
++ break;
++
++ case IB_CM_RTU_RECEIVED:
++ print_dbg("IB_CM_RTU_RECEIVED (%d)\n", pevent->event_type);
++ event->param.send_status = pevent->u.rtu_resp.status;
++ entry->ib_cm_id.remote_id = pevent->u.rtu_resp.remote_id;
++ /* not overwitting local id
++ * entry->ib_cm_id.local_id = pevent->u.rtu_resp.local_id;
++ */
++ break;
++
++ case IB_CM_USER_ESTABLISHED:
++ case IB_CM_DREQ_RECEIVED:
++ case IB_CM_DREP_RECEIVED:
++ case IB_CM_TIMEWAIT_EXIT:
++ print_dbg("IB_CM_(USER_EST/DREQ_REC/DREP_REC/TW_EXIT) (%d)\n",
++ pevent->event_type);
++ event->param.send_status = pevent->u.send_status;
++ break;
++
++ case IB_CM_MRA_RECEIVED:
++ print_dbg("IB_CM_MRA_RECEIVED (%d)\n", pevent->event_type);
++ event->param.mra_rcvd.service_timeout =
++ pevent->u.mra_resp.timeout;
++ break;
++
++ case IB_CM_REJ_RECEIVED:
++ print_dbg("IB_CM_REJ_RECEIVED (%d)\n", pevent->event_type);
++ if (len) {
++ event->param.rej_rcvd.ari_length = len;
++ info = &(event->param.rej_rcvd.ari);
++ }
++ break;
++
++ case IB_CM_LAP_RECEIVED:
++ print_dbg("IB_CM_LAP_RECEIVED (%d)\n", pevent->event_type);
++ if (pevent->u.lap_resp.path.pkey) {
++ event->param.lap_rcvd.alternate_path =
++ kmemdup(&pevent->u.lap_resp.path,
++ sizeof(struct ib_sa_path_rec),
++ GFP_KERNEL);
++ if (!event->param.lap_rcvd.alternate_path) {
++ print_err("kmemdup failed\n");
++ goto err3;
++ }
++ }
++ break;
++
++ case IB_CM_APR_RECEIVED:
++ print_dbg("IB_CM_APR_RECEIVED (%d)\n", pevent->event_type);
++ event->param.apr_rcvd.ap_status = pevent->u.apr_resp.status;
++
++ if (len) {
++ event->param.apr_rcvd.info_len = len;
++ info = &(event->param.apr_rcvd.apr_info);
++ }
++ break;
++
++ case IB_CM_SIDR_REQ_RECEIVED:
++ print_dbg("IB_CM_SIDR_REQ_RECEIVED (%d)\n", pevent->event_type);
++ if (copy_cm_sidr_req_parm(&event->param.sidr_req_rcvd,
++ &pevent->u.sidr_req_resp)) {
++ print_err("Error copying cm_sidr_req_param\n");
++ goto err3;
++ }
++ break;
++
++ case IB_CM_SIDR_REP_RECEIVED:
++ print_dbg("IB_CM_SIDR_REP_RECEIVED (%d)\n", pevent->event_type);
++ event->param.sidr_rep_rcvd.status =
++ pevent->u.sidr_rep_resp.status;
++ event->param.sidr_rep_rcvd.qkey =
++ pevent->u.sidr_rep_resp.qkey;
++ event->param.sidr_rep_rcvd.qpn =
++ pevent->u.sidr_rep_resp.qpn;
++
++ if (len) {
++ event->param.sidr_rep_rcvd.info_len = len;
++ info = &(event->param.sidr_rep_rcvd.info);
++ }
++ break;
++
++ case IB_CM_REQ_ERROR:
++ case IB_CM_REP_ERROR:
++ case IB_CM_DREQ_ERROR:
++ case IB_CM_LAP_ERROR:
++ case IB_CM_SIDR_REQ_ERROR:
++ print_dbg("IB_CM_<>_ERROR (%d)\n", pevent->event_type);
++ event->param.send_status = pevent->u.send_status;
++ break;
++
++ default:
++ print_err("Event %d not handled\n", pevent->event_type);
++ goto err3;
++ }
++
++ if (len) {
++ data = pevent->data + pevent->data_length;
++ *info = kmemdup(data, len, GFP_KERNEL);
++ if (!(*info)) {
++ print_err("kmemdup failed\n");
++ goto err3;
++ }
++ }
++
++ ret = ib_cm_id->cm_handler(ib_cm_id, event);
++ if (ret)
++ ib_destroy_cm_id(ib_cm_id);
++
++ goto out;
++err3:
++ kfree(event->private_data);
++out:
++err2:
++ kfree(event);
++err1:
++ kfree(cm_event_work);
++}
++
++/* Client requests a new CM ID
++ * Host allocates and supplies a reference to the CM ID
++ * to the Client side
++ */
++struct ib_cm_id *ib_create_cm_id(struct ib_device *ib_device,
++ ib_cm_handler cm_handler, void *context)
++{
++ struct ibp_cm_entry *entry;
++ struct ib_cm_id *cm_id;
++ u64 ibp_cm_id;
++ u64 device;
++
++ print_trace("in\n");
++
++ /*Resolve the MIC ib_device to cooresponding host ib_device handle.*/
++ device = ibp_resolve_ib_device(ib_device);
++ if (!device) {
++ print_err("Could not find a valid IBP Device\n");
++ return ERR_PTR(-ENODEV);
++ }
++
++ entry = kzalloc(sizeof(struct ibp_cm_entry), GFP_KERNEL);
++ if (!entry) {
++ print_err("kzmalloc failed\n");
++ return ERR_PTR(-ENOMEM);
++ }
++
++ /* store (linux) device and ib_device */
++ entry->device = device;
++
++ /* ask the host cm to create a new cm id.
++ entry->ib_cm_id structure filled out by ibp_create_cm_id() */
++ ibp_cm_id = ibp_create_cm_id(entry, cm_handler, context);
++ if (IS_ERR_VALUE(ibp_cm_id)) {
++ print_err("create_cm_id returned %d\n", (int) ibp_cm_id);
++ kfree(entry);
++ return ERR_PTR(-EINVAL);
++ }
++
++ /* current cm proxy entry is top of cm proxy entry list */
++ down_write(&list_rwsem);
++ entry->next = gbl_list;
++ gbl_list = entry;
++ up_write(&list_rwsem);
++
++ /* store cm_id structure supplied by host cm returned in the resp */
++ entry->ibp_cm_id = ibp_cm_id;
++
++ cm_id = &(entry->ib_cm_id);
++ cm_id->device = ib_device;
++
++ return cm_id;
++}
++EXPORT_SYMBOL(ib_create_cm_id);
++
++void ib_destroy_cm_id(struct ib_cm_id *cm_id)
++{
++ struct ibp_cm_entry *entry, *p;
++
++ print_trace("in\n");
++
++ entry = find_cm_id(container_of(cm_id, struct ibp_cm_entry, ib_cm_id));
++ if (!entry)
++ return;
++
++ down_write(&list_rwsem);
++ if (entry == gbl_list)
++ gbl_list = entry->next;
++ else {
++ for (p = gbl_list; p; p = p->next)
++ if (p->next == entry)
++ break;
++ if (p)
++ p->next = entry->next;
++ }
++ up_write(&list_rwsem);
++
++ ibp_destroy_cm_id(entry);
++ kfree(entry);
++ return;
++}
++EXPORT_SYMBOL(ib_destroy_cm_id);
++
++int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask)
++{
++ struct ibp_cm_entry *entry;
++ int ret;
++
++ print_trace("in\n");
++
++ entry = find_cm_id(container_of(cm_id, struct ibp_cm_entry, ib_cm_id));
++ if (!entry)
++ return -EINVAL;
++
++ ret = ibp_cm_listen(entry, service_id, service_mask);
++ if (ret)
++ print_err("ibp_cm_listen returned %d\n", ret);
++
++ return ret;
++}
++EXPORT_SYMBOL(ib_cm_listen);
++
++int ib_send_cm_req(struct ib_cm_id *cm_id, struct ib_cm_req_param *param)
++{
++ struct ibp_cm_entry *entry;
++ int ret;
++
++ print_trace("in\n");
++
++ entry = find_cm_id(container_of(cm_id, struct ibp_cm_entry, ib_cm_id));
++ if (!entry)
++ return -EINVAL;
++
++ ret = ibp_send_cm_req(entry, param,
++ &cm_id->service_id, &cm_id->service_mask);
++ if (ret)
++ print_err("ibp_send_cm_req returned %d\n", ret);
++
++ return ret;
++}
++EXPORT_SYMBOL(ib_send_cm_req);
++
++int ib_send_cm_rep(struct ib_cm_id *cm_id, struct ib_cm_rep_param *param)
++{
++ struct ibp_cm_entry *entry;
++ int ret;
++
++ print_trace("in\n");
++
++ entry = find_cm_id(container_of(cm_id, struct ibp_cm_entry, ib_cm_id));
++ if (!entry)
++ return -EINVAL;
++
++ ret = ibp_send_cm_rep(entry, param);
++ if (ret)
++ print_err("ibp_send_cm_rep returned %d\n", ret);
++
++ return ret;
++}
++EXPORT_SYMBOL(ib_send_cm_rep);
++
++int ib_send_cm_rtu(struct ib_cm_id *cm_id, const void *private_data,
++ u8 private_data_len)
++{
++ struct ibp_cm_entry *entry;
++ int ret;
++
++ print_trace("in\n");
++
++ entry = find_cm_id(container_of(cm_id, struct ibp_cm_entry, ib_cm_id));
++ if (!entry)
++ return -EINVAL;
++
++ ret = ibp_send_cm_rtu(entry, private_data, private_data_len);
++ if (ret)
++ print_err("ibp_send_cm_rtu returned %d\n", ret);
++
++ return ret;
++}
++EXPORT_SYMBOL(ib_send_cm_rtu);
++
++int ib_send_cm_dreq(struct ib_cm_id *cm_id, const void *private_data,
++ u8 private_data_len)
++{
++ struct ibp_cm_entry *entry;
++ int ret;
++
++ print_trace("in\n");
++
++ entry = find_cm_id(container_of(cm_id, struct ibp_cm_entry, ib_cm_id));
++ if (!entry)
++ return -EINVAL;
++
++ ret = ibp_send_cm_dreq(entry, private_data, private_data_len);
++ if (ret)
++ print_dbg("ibp_send_cm_dreq returned %d\n", ret);
++
++ return ret;
++}
++EXPORT_SYMBOL(ib_send_cm_dreq);
++
++int ib_send_cm_drep(struct ib_cm_id *cm_id, const void *private_data,
++ u8 private_data_len)
++{
++ struct ibp_cm_entry *entry;
++ int ret;
++
++ print_trace("in\n");
++
++ entry = find_cm_id(container_of(cm_id, struct ibp_cm_entry, ib_cm_id));
++ if (!entry)
++ return !EINVAL;
++
++ ret = ibp_send_cm_drep(entry, private_data, private_data_len);
++ if (ret)
++ print_dbg("ibp_send_cm_drep returned %d\n", ret);
++
++ return ret;
++}
++EXPORT_SYMBOL(ib_send_cm_drep);
++
++int ib_send_cm_rej(struct ib_cm_id *cm_id, enum ib_cm_rej_reason reason,
++ void *ari, u8 ari_length,
++ const void *private_data, u8 private_data_len)
++{
++ struct ibp_cm_entry *entry;
++ int ret;
++
++ print_trace("in\n");
++
++ entry = find_cm_id(container_of(cm_id, struct ibp_cm_entry, ib_cm_id));
++ if (!entry)
++ return !EINVAL;
++
++ ret = ibp_send_cm_rej(entry, reason,
++ ari, ari_length,
++ private_data, private_data_len);
++ if (ret)
++ print_err("ibp_send_cm_rej returned %d\n", ret);
++
++ return ret;
++}
++EXPORT_SYMBOL(ib_send_cm_rej);
++
++int ib_send_cm_mra(struct ib_cm_id *cm_id, u8 service_timeout,
++ const void *private_data, u8 private_data_len)
++{
++ struct ibp_cm_entry *entry;
++ int ret;
++
++ print_trace("in\n");
++
++ entry = find_cm_id(container_of(cm_id, struct ibp_cm_entry, ib_cm_id));
++ if (!entry)
++ return !EINVAL;
++
++ ret = ibp_send_cm_mra(entry, service_timeout,
++ private_data, private_data_len);
++ if (ret)
++ print_err("ibp_send_cm_mra returned %d\n", ret);
++
++ return ret;
++}
++EXPORT_SYMBOL(ib_send_cm_mra);
++
++int ib_send_cm_lap(struct ib_cm_id *cm_id,
++ struct ib_sa_path_rec *alternate_path,
++ const void *private_data, u8 private_data_len)
++{
++ struct ibp_cm_entry *entry;
++ int ret;
++
++ print_trace("in\n");
++
++ entry = find_cm_id(container_of(cm_id, struct ibp_cm_entry, ib_cm_id));
++ if (!entry)
++ return !EINVAL;
++
++ ret = ibp_send_cm_lap(entry, alternate_path,
++ private_data, private_data_len);
++ if (ret)
++ print_err("ibp_send_cm_lap returned %d\n", ret);
++
++ return ret;
++}
++EXPORT_SYMBOL(ib_send_cm_lap);
++
++int ib_send_cm_apr(struct ib_cm_id *cm_id, enum ib_cm_apr_status status,
++ void *info, u8 info_length,
++ const void *private_data, u8 private_data_len)
++{
++ struct ibp_cm_entry *entry;
++ int ret;
++
++ print_trace("in\n");
++
++ entry = find_cm_id(container_of(cm_id, struct ibp_cm_entry, ib_cm_id));
++ if (!entry)
++ return -EINVAL;
++
++ ret = ibp_send_cm_apr(entry, status, info, info_length,
++ private_data, private_data_len);
++ if (ret)
++ print_err("ibp_send_cm_apr returned %d\n", ret);
++
++ return ret;
++}
++EXPORT_SYMBOL(ib_send_cm_apr);
++
++int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
++ struct ib_cm_sidr_req_param *param)
++{
++ struct ibp_cm_entry *entry;
++ int ret;
++
++ print_trace("in\n");
++
++ entry = find_cm_id(container_of(cm_id, struct ibp_cm_entry, ib_cm_id));
++ if (!entry)
++ return -EINVAL;
++
++ ret = ibp_send_cm_sidr_req(entry, param,
++ &cm_id->service_id, &cm_id->service_mask);
++ if (ret)
++ print_err("ibp_send_cm_sidr_req returned %d\n", ret);
++
++ return ret;
++}
++EXPORT_SYMBOL(ib_send_cm_sidr_req);
++
++int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
++ struct ib_cm_sidr_rep_param *param)
++{
++ struct ibp_cm_entry *entry;
++ int ret;
++
++ print_trace("in\n");
++
++ entry = find_cm_id(container_of(cm_id, struct ibp_cm_entry, ib_cm_id));
++ if (!entry)
++ return -EINVAL;
++
++ ret = ibp_send_cm_sidr_rep(entry, param);
++ if (ret)
++ print_err("ibp_send_cm_sidr_rep returned %d\n", ret);
++
++ return ret;
++}
++EXPORT_SYMBOL(ib_send_cm_sidr_rep);
++
++int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
++{
++ struct ibp_cm_entry *entry;
++ int ret;
++
++ print_trace("in\n");
++
++ entry = find_cm_id(container_of(cm_id, struct ibp_cm_entry, ib_cm_id));
++ if (!entry)
++ return -EINVAL;
++
++ ret = ibp_cm_notify(entry, event);
++ if (ret)
++ print_err("ibp_cm_notify returned %d\n", ret);
++
++ return ret;
++}
++EXPORT_SYMBOL(ib_cm_notify);
++
++int ib_cm_init_qp_attr(struct ib_cm_id *cm_id, struct ib_qp_attr *qp_attr,
++ int *qp_attr_mask)
++{
++ struct ibp_cm_entry *entry;
++ int ret;
++
++ print_trace("in\n");
++
++ entry = find_cm_id(container_of(cm_id, struct ibp_cm_entry, ib_cm_id));
++ if (!entry)
++ return -EINVAL;
++
++ ret = ibp_cm_init_qp_attr(entry, qp_attr, qp_attr_mask);
++ if (ret)
++ print_err("ibp_cm_init_qp_attr returned %d\n", ret);
++
++ return ret;
++}
++EXPORT_SYMBOL(ib_cm_init_qp_attr);
+diff --git a/drivers/infiniband/ibp/cm/cm_server_msg.c b/drivers/infiniband/ibp/cm/cm_server_msg.c
+new file mode 100644
+index 0000000..884be47
+--- /dev/null
++++ b/drivers/infiniband/ibp/cm/cm_server_msg.c
+@@ -0,0 +1,1044 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "server.h"
++
++LIST_HEAD(cm_entry_list);
++
++void ibp_copy_sa_path_rec(struct ibp_sa_path_rec *a, struct ib_sa_path_rec *b)
++{
++ /*Copy ibp_sa_path_rec to ib_sa_path_rec*/
++ b->service_id = a->service_id;
++ b->dgid.global.subnet_prefix = a->dgid_prefix;
++ b->dgid.global.interface_id = a->dgid_id;
++ b->sgid.global.subnet_prefix = a->sgid_prefix;
++ b->sgid.global.interface_id = a->sgid_id;
++ b->dlid = a->dlid;
++ b->slid = a->slid;
++ b->raw_traffic = a->raw_traffic;
++ b->flow_label = a->flow_label;
++ b->hop_limit = a->hop_limit;
++ b->traffic_class = a->traffic_class;
++ b->reversible = a->reversible;
++ b->numb_path = a->numb_path;
++ b->pkey = a->pkey;
++ b->qos_class = a->qos_class;
++ b->sl = a->sl;
++ b->mtu_selector = a->mtu_selector;
++ b->mtu = a->mtu;
++ b->rate_selector = a->rate_selector;
++ b->rate = a->rate;
++ b->packet_life_time_selector = a->packet_life_time_selector;
++ b->packet_life_time = a->packet_life_time;
++ b->preference = a->preference;
++}
++
++void ib_copy_sa_path_rec(struct ibp_sa_path_rec *a, struct ib_sa_path_rec *b)
++{
++ /*Copy ib_sa_path_rec to ibp_sa_path_rec*/
++ a->service_id = b->service_id;
++ a->dgid_prefix = b->dgid.global.subnet_prefix;
++ a->dgid_id = b->dgid.global.interface_id;
++ a->sgid_prefix = b->sgid.global.subnet_prefix;
++ a->sgid_id = b->sgid.global.interface_id;
++ a->dlid = b->dlid;
++ a->slid = b->slid;
++ a->raw_traffic = b->raw_traffic;
++ a->flow_label = b->flow_label;
++ a->hop_limit = b->hop_limit;
++ a->traffic_class = b->traffic_class;
++ a->reversible = b->reversible;
++ a->numb_path = b->numb_path;
++ a->pkey = b->pkey;
++ a->qos_class = b->qos_class;
++ a->sl = b->sl;
++ a->mtu_selector = b->mtu_selector;
++ a->mtu = b->mtu;
++ a->rate_selector = b->rate_selector;
++ a->rate = b->rate;
++ a->packet_life_time_selector = b->packet_life_time_selector;
++ a->packet_life_time = b->packet_life_time;
++ a->preference = b->preference;
++}
++
++void cleanup_cm_entry_list(void)
++{
++ struct cm_entry *entry, *next;
++
++ down_write(&list_rwsem);
++ list_for_each_entry_safe(entry, next, &cm_entry_list, list)
++ kfree(entry);
++ up_write(&list_rwsem);
++}
++
++static struct cm_entry *find_cm_entry(struct ib_cm_id *cm_id)
++{
++ struct cm_entry *entry;
++
++ down_read(&list_rwsem);
++
++ list_for_each_entry(entry, &cm_entry_list, list)
++ if (entry->cm_id == cm_id)
++ goto out;
++
++ print_err("Could not find cm id %p\n", cm_id);
++ entry = NULL;
++out:
++ up_read(&list_rwsem);
++
++ return entry;
++}
++
++/* find the entry id for the listen cm id so we can add the new cm id
++ * that is being accepted to the list so it can be found on future events
++ */
++static struct cm_entry *find_cm_entry_and_add(struct ib_cm_id *listen_id,
++ struct ib_cm_id *cm_id)
++{
++ struct cm_entry *listen_entry, *entry;
++
++ listen_entry = find_cm_entry(listen_id);
++ if (!listen_entry) {
++ print_err("Could not find listen id %p\n", listen_id);
++ return NULL;
++ }
++
++ entry = kzalloc(sizeof(struct cm_entry), GFP_KERNEL);
++ if (!entry) {
++ print_err("kzalloc failed\n");
++ return NULL;
++ }
++
++ entry->client = listen_entry->client;
++ entry->cm_id = cm_id;
++
++ down_write(&list_rwsem);
++ list_add(&entry->list, &cm_entry_list);
++ up_write(&list_rwsem);
++
++ return listen_entry;
++}
++
++static void ibp_event_req_get(struct ibp_cm_req_event_resp *proxy_req,
++ struct ib_cm_req_event_param *req)
++
++{
++ proxy_req->listen_id = (u64) req->listen_id;
++ proxy_req->remote_ca_guid = req->remote_ca_guid;
++ proxy_req->remote_qkey = req->remote_qkey;
++ proxy_req->remote_qpn = req->remote_qpn;
++ proxy_req->qp_type = req->qp_type;
++ proxy_req->starting_psn = req->starting_psn;
++ proxy_req->responder_resources = req->responder_resources;
++ proxy_req->initiator_depth = req->initiator_depth;
++ proxy_req->local_cm_response_timeout = req->local_cm_response_timeout;
++ proxy_req->flow_control = req->flow_control;
++ proxy_req->remote_cm_response_timeout = req->remote_cm_response_timeout;
++ proxy_req->retry_count = req->retry_count;
++ proxy_req->rnr_retry_count = req->rnr_retry_count;
++ proxy_req->srq = req->srq;
++ proxy_req->port = req->port;
++ ib_copy_sa_path_rec(&proxy_req->primary_path, req->primary_path);
++ if (req->alternate_path)
++ ib_copy_sa_path_rec(&proxy_req->alternate_path,
++ req->alternate_path);
++}
++
++static void ibp_event_rep_get(struct ibp_cm_rep_event_resp *proxy_rep,
++ struct ib_cm_rep_event_param *rep)
++{
++ proxy_rep->remote_ca_guid = rep->remote_ca_guid;
++ proxy_rep->remote_qkey = rep->remote_qkey;
++ proxy_rep->remote_qpn = rep->remote_qpn;
++ proxy_rep->starting_psn = rep->starting_psn;
++ proxy_rep->responder_resources = rep->responder_resources;
++ proxy_rep->initiator_depth = rep->initiator_depth;
++ proxy_rep->target_ack_delay = rep->target_ack_delay;
++ proxy_rep->failover_accepted = rep->failover_accepted;
++ proxy_rep->flow_control = rep->flow_control;
++ proxy_rep->rnr_retry_count = rep->rnr_retry_count;
++ proxy_rep->srq = rep->srq;
++}
++
++static
++void ibp_event_sidr_rep_get(struct ibp_cm_sidr_rep_event_resp *proxy_resp,
++ struct ib_cm_sidr_rep_event_param *rep)
++{
++ proxy_resp->status = rep->status;
++ proxy_resp->qkey = rep->qkey;
++ proxy_resp->qpn = rep->qpn;
++}
++
++static void ibp_event(struct work_struct *work)
++{
++ struct ibp_event *event_work;
++ struct ibp_event_msg *msg;
++ int msg_len;
++ int event_len;
++
++ print_trace("in\n");
++
++ event_work = (struct ibp_event *) work;
++
++ event_len = event_work->event.data_length +
++ event_work->event.info_length +
++ sizeof(struct ibp_cm_event);
++
++ msg_len = sizeof(struct ibp_event_msg) + event_len;
++
++ msg = kzalloc(msg_len, GFP_KERNEL);
++ if (!msg) {
++ print_err("kzmalloc failed\n");
++ goto err;
++ }
++
++ memcpy(msg->event, &(event_work->event), event_len);
++ msg->length = event_len;
++
++ IBP_INIT_MSG(NULL, msg, msg_len, IBP_EVENT);
++
++ ibp_send(event_work->client->ep, msg, msg_len);
++err:
++ kfree(event_work);
++}
++
++static int ibp_event_handler(struct ib_cm_id *cm_id,
++ struct ib_cm_event *ib_cm_event)
++{
++ struct ibp_event *event_work;
++ struct ibp_client *client;
++ struct cm_entry *entry;
++ void *info = NULL;
++ int info_length = 0;
++ int data_length = 0;
++
++ print_trace("in\n");
++
++ switch (ib_cm_event->event) {
++ case IB_CM_REQ_RECEIVED:
++ data_length = IB_CM_REQ_PRIVATE_DATA_SIZE;
++ break;
++ case IB_CM_REP_RECEIVED:
++ data_length = IB_CM_REP_PRIVATE_DATA_SIZE;
++ break;
++ case IB_CM_RTU_RECEIVED:
++ data_length = IB_CM_RTU_PRIVATE_DATA_SIZE;
++ break;
++ case IB_CM_DREQ_RECEIVED:
++ data_length = IB_CM_DREQ_PRIVATE_DATA_SIZE;
++ break;
++ case IB_CM_DREP_RECEIVED:
++ data_length = IB_CM_DREP_PRIVATE_DATA_SIZE;
++ break;
++ case IB_CM_MRA_RECEIVED:
++ data_length = IB_CM_MRA_PRIVATE_DATA_SIZE;
++ break;
++ case IB_CM_REJ_RECEIVED:
++ data_length = IB_CM_REJ_PRIVATE_DATA_SIZE;
++ info_length = ib_cm_event->param.rej_rcvd.ari_length;
++ break;
++ case IB_CM_LAP_RECEIVED:
++ data_length = IB_CM_LAP_PRIVATE_DATA_SIZE;
++ break;
++ case IB_CM_APR_RECEIVED:
++ data_length = IB_CM_APR_PRIVATE_DATA_SIZE;
++ info_length = ib_cm_event->param.apr_rcvd.info_len;
++ break;
++ case IB_CM_SIDR_REQ_RECEIVED:
++ data_length = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE;
++ break;
++ case IB_CM_SIDR_REP_RECEIVED:
++ data_length = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
++ info_length = ib_cm_event->param.sidr_rep_rcvd.info_len;
++ break;
++ default:
++ break;
++ }
++ event_work = kzalloc((sizeof(struct ibp_event)) +
++ data_length + info_length, GFP_KERNEL);
++ if (!event_work) {
++ print_err("kzalloc failed\n");
++ return -ENOMEM;
++ }
++
++ if (ib_cm_event->event == IB_CM_REQ_RECEIVED) {
++ struct ib_cm_req_event_param *param;
++ param = &ib_cm_event->param.req_rcvd;
++ entry = find_cm_entry_and_add(param->listen_id, cm_id);
++ } else if (ib_cm_event->event == IB_CM_SIDR_REQ_RECEIVED) {
++ struct ib_cm_sidr_req_event_param *param;
++ param = &ib_cm_event->param.sidr_req_rcvd;
++ entry = find_cm_entry_and_add(param->listen_id, cm_id);
++ } else
++ entry = find_cm_entry(cm_id);
++
++ if (!entry) {
++ kfree(event_work);
++ return -EINVAL;
++ }
++
++ client = entry->client;
++
++ event_work->client = client;
++ event_work->event.ibp_cm_id = (u64) entry->cm_id;
++ event_work->event.event_cm_id = (u64) cm_id;
++ event_work->event.event_type = ib_cm_event->event;
++ event_work->event.data_length = data_length;
++ event_work->event.info_length = info_length;
++
++ /* parse and copy the proper event */
++ switch (ib_cm_event->event) {
++ case IB_CM_REQ_RECEIVED:
++ print_dbg("IB_CM_REQ_RECEIVED (%d)\n", ib_cm_event->event);
++ ibp_event_req_get(&event_work->event.u.req_resp,
++ &ib_cm_event->param.req_rcvd);
++ break;
++ case IB_CM_REP_RECEIVED:
++ print_dbg("IB_CM_REP_RECEIVED (%d)\n", ib_cm_event->event);
++ ibp_event_rep_get(&event_work->event.u.rep_resp,
++ &ib_cm_event->param.rep_rcvd);
++ break;
++ case IB_CM_MRA_RECEIVED:
++ print_dbg("IB_CM_MRA_RECEIVED (%d)\n", ib_cm_event->event);
++ event_work->event.u.mra_resp.timeout =
++ ib_cm_event->param.mra_rcvd.service_timeout;
++ break;
++ case IB_CM_REJ_RECEIVED:
++ print_dbg("IB_CM_REJ_RECEIVED (%d)\n", ib_cm_event->event);
++ event_work->event.u.rej_resp.reason =
++ ib_cm_event->param.rej_rcvd.reason;
++ info = ib_cm_event->param.rej_rcvd.ari;
++ break;
++ case IB_CM_RTU_RECEIVED:
++ print_dbg("IB_CM_RTU_RECEIVED (%d)\n", ib_cm_event->event);
++ event_work->event.u.rtu_resp.status =
++ ib_cm_event->param.send_status;
++ event_work->event.u.rtu_resp.local_id = cm_id->local_id;
++ event_work->event.u.rtu_resp.remote_id = cm_id->remote_id;
++ break;
++ case IB_CM_LAP_RECEIVED:
++ print_dbg("IB_CM_LAP_RECEIVED (%d)\n", ib_cm_event->event);
++ ib_copy_sa_path_rec(&event_work->event.u.lap_resp.path,
++ ib_cm_event->param.lap_rcvd.alternate_path);
++ break;
++ case IB_CM_APR_RECEIVED:
++ print_dbg("IB_CM_APR_RECEIVED (%d)\n", ib_cm_event->event);
++ event_work->event.u.apr_resp.status =
++ ib_cm_event->param.apr_rcvd.ap_status;
++ info = ib_cm_event->param.apr_rcvd.apr_info;
++ break;
++ case IB_CM_SIDR_REQ_RECEIVED:
++ print_dbg("IB_CM_SIDR_REQ_RECEIVED (%d)\n",
++ ib_cm_event->event);
++ event_work->event.u.sidr_req_resp.listen_id =
++ (u64) ib_cm_event->param.sidr_req_rcvd.listen_id;
++ event_work->event.u.sidr_req_resp.pkey =
++ ib_cm_event->param.sidr_req_rcvd.pkey;
++ event_work->event.u.sidr_req_resp.port =
++ ib_cm_event->param.sidr_req_rcvd.port;
++ break;
++ case IB_CM_SIDR_REP_RECEIVED:
++ print_dbg("IB_CM_SIDR_REP_RECEIVED (%d)\n",
++ ib_cm_event->event);
++ ibp_event_sidr_rep_get(&event_work->event.u.sidr_rep_resp,
++ &ib_cm_event->param.sidr_rep_rcvd);
++ info = ib_cm_event->param.sidr_rep_rcvd.info;
++ break;
++ case IB_CM_TIMEWAIT_EXIT:
++ case IB_CM_REQ_ERROR:
++ case IB_CM_REP_ERROR:
++ case IB_CM_DREQ_ERROR:
++ case IB_CM_LAP_ERROR:
++ case IB_CM_SIDR_REQ_ERROR:
++ print_dbg("IB_CM_..._ERROR (%d)\n", ib_cm_event->event);
++ event_work->event.u.send_status =
++ ib_cm_event->param.send_status;
++ break;
++
++ case IB_CM_USER_ESTABLISHED:
++ print_dbg("IB_CM_USER_ESTABLISHED (%d)\n",
++ ib_cm_event->event);
++ event_work->event.u.send_status =
++ ib_cm_event->param.send_status;
++ break;
++ case IB_CM_DREQ_RECEIVED:
++ print_dbg("IB_CM_DREQ_RECEIVED (%d)\n", ib_cm_event->event);
++ event_work->event.u.send_status =
++ ib_cm_event->param.send_status;
++ break;
++ case IB_CM_DREP_RECEIVED:
++ print_dbg("IB_CM_DREP_RECEIVED (%d)\n", ib_cm_event->event);
++ event_work->event.u.send_status =
++ ib_cm_event->param.send_status;
++ break;
++ default:
++ print_dbg("event not handled %d\n", ib_cm_event->event);
++ break;
++ }
++
++ if (data_length)
++ memcpy(event_work->event.data, ib_cm_event->private_data,
++ data_length);
++
++ if (info_length)
++ memcpy(event_work->event.data + data_length, info, info_length);
++
++ INIT_WORK(&event_work->work, ibp_event);
++ queue_work(client->workqueue, &event_work->work);
++
++ return 0;
++}
++
++int ibp_cmd_create_cm_id(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++ struct ibp_response_msg *msg;
++ struct ibp_create_cm_id_cmd *cmd;
++ struct ibp_create_cm_id_resp *resp;
++ struct ib_device *ib_device;
++ struct ib_cm_id *cm_id = NULL;
++ struct cm_entry *entry;
++ size_t len;
++ int status = 0;
++ int ret;
++
++ print_trace("in\n");
++
++ cmd = (struct ibp_create_cm_id_cmd *) hdr;
++ ib_device = (struct ib_device *) cmd->device;
++ msg = (struct ibp_response_msg *) client->tx_buf;
++ len = sizeof(*msg);
++
++ entry = kzalloc(sizeof(struct cm_entry), GFP_KERNEL);
++ if (!entry) {
++ print_err("kzalloc failed\n");
++ status = -ENOMEM;
++ goto send_resp;
++ }
++
++ cm_id = ib_create_cm_id(ib_device,
++ (ib_cm_handler) ibp_event_handler,
++ NULL);
++ if (IS_ERR(cm_id)) {
++ status = PTR_ERR(cm_id);
++ print_err("ib_create_cm_id returned %d\n", status);
++ goto send_resp;
++ }
++
++ len += sizeof(*resp);
++
++ resp = (struct ibp_create_cm_id_resp *) msg->data;
++
++ resp->ibp_cm_id = (u64) cm_id;
++ resp->service_id = cm_id->service_id;
++ resp->service_mask = cm_id->service_mask;
++ resp->local_id = cm_id->local_id;
++ resp->remote_id = cm_id->remote_id;
++ resp->remote_cm_qpn = cm_id->remote_cm_qpn;
++
++send_resp:
++ IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, status);
++
++ ret = ibp_send(client->ep, msg, len);
++ if (ret) {
++ kfree(entry);
++ print_err("ibp_send returned %d\n", ret);
++ return ret;
++ }
++ if (status) {
++ kfree(entry);
++ return status;
++ }
++
++ entry->client = client;
++ entry->cm_id = cm_id;
++
++ down_write(&list_rwsem);
++ list_add(&entry->list, &cm_entry_list);
++ up_write(&list_rwsem);
++
++ return 0;
++}
++
++int ibp_cmd_destroy_cm_id(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++ struct ibp_response_msg *msg;
++ struct ibp_destroy_cm_id_cmd *cmd;
++ struct ib_cm_id *cm_id;
++ struct cm_entry *entry;
++ size_t len;
++ int ret = 0;
++
++ print_trace("in\n");
++
++ cmd = (struct ibp_destroy_cm_id_cmd *) hdr;
++ cm_id = (struct ib_cm_id *) cmd->ibp_cm_id;
++ msg = (struct ibp_response_msg *) client->tx_buf;
++ len = sizeof(*msg);
++
++ entry = find_cm_entry(cm_id);
++ if (!entry)
++ goto send_resp;
++
++ down_write(&list_rwsem);
++ list_del(&entry->list);
++ up_write(&list_rwsem);
++
++ kfree(entry);
++
++ ib_destroy_cm_id(cm_id);
++
++send_resp:
++ IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++ return ibp_send(client->ep, msg, len);
++}
++
++int ibp_cmd_cm_listen(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++ struct ibp_response_msg *msg;
++ struct ibp_cm_listen_cmd *cmd;
++ struct ib_cm_id *cm_id;
++ size_t len;
++ int ret;
++
++ print_trace("in\n");
++
++ cmd = (struct ibp_cm_listen_cmd *) hdr;
++ cm_id = (struct ib_cm_id *) cmd->ibp_cm_id;
++ msg = (struct ibp_response_msg *) client->tx_buf;
++ len = sizeof(*msg);
++
++ ret = ib_cm_listen(cm_id, cmd->service_id, cmd->service_mask);
++ if (ret)
++ print_err("ib_cm_listen returned %d\n", ret);
++
++ IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++ return ibp_send(client->ep, msg, len);
++}
++
++int ibp_cmd_send_cm_req(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++ struct ibp_response_msg *msg;
++ struct ibp_send_cm_req_cmd *cmd;
++ struct ib_cm_id *cm_id;
++ struct ib_cm_req_param param = {0};
++ struct ib_sa_path_rec primary_path;
++ struct ib_sa_path_rec alternate_path;
++ size_t len;
++ int ret;
++
++ print_trace("in\n");
++
++ cmd = (struct ibp_send_cm_req_cmd *) hdr;
++ cm_id = (struct ib_cm_id *) cmd->ibp_cm_id;
++ msg = (struct ibp_response_msg *) client->tx_buf;
++ len = sizeof(*msg);
++
++ if (cmd->alternate_path.pkey) {
++ param.alternate_path = &alternate_path;
++ ibp_copy_sa_path_rec(&cmd->alternate_path, &alternate_path);
++ }
++
++ param.primary_path = &primary_path;
++ ibp_copy_sa_path_rec(&cmd->primary_path, &primary_path);
++
++ param.service_id = cmd->service_id;
++ param.qp_num = cmd->qp_num;
++ param.qp_type = cmd->qp_type;
++ param.starting_psn = cmd->starting_psn;
++ param.peer_to_peer = cmd->peer_to_peer;
++ param.responder_resources = cmd->responder_resources;
++ param.initiator_depth = cmd->initiator_depth;
++ param.remote_cm_response_timeout = cmd->remote_cm_response_timeout;
++ param.flow_control = cmd->flow_control;
++ param.local_cm_response_timeout = cmd->local_cm_response_timeout;
++ param.retry_count = cmd->retry_count;
++ param.rnr_retry_count = cmd->rnr_retry_count;
++ param.max_cm_retries = cmd->max_cm_retries;
++ param.srq = cmd->srq;
++ param.private_data_len = cmd->private_data_len;
++
++ if (cmd->private_data_len)
++ param.private_data = cmd->private_data;
++
++ ret = ib_send_cm_req(cm_id, ¶m);
++
++ if (ret)
++ print_err("send_cm_req returned %d\n", ret);
++
++ IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++ return ibp_send(client->ep, msg, len);
++}
++
++int ibp_cmd_send_cm_rep(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++ struct ibp_response_msg *msg;
++ struct ibp_send_cm_rep_cmd *cmd;
++ struct ib_cm_id *cm_id;
++ struct ib_cm_rep_param param = {0};
++ size_t len;
++ int ret;
++
++ print_trace("in\n");
++
++ cmd = (struct ibp_send_cm_rep_cmd *) hdr;
++ cm_id = (struct ib_cm_id *) cmd->ibp_cm_id;
++ msg = (struct ibp_response_msg *) client->tx_buf;
++ len = sizeof(*msg);
++
++ param.qp_num = cmd->qp_num;
++ param.starting_psn = cmd->starting_psn;
++ param.responder_resources = cmd->responder_resources;
++ param.initiator_depth = cmd->initiator_depth;
++ param.failover_accepted = cmd->failover_accepted;
++ param.rnr_retry_count = cmd->rnr_retry_count;
++ param.srq = cmd->srq;
++ param.private_data_len = cmd->private_data_len;
++
++ if (cmd->private_data_len)
++ param.private_data = cmd->private_data;
++
++ ret = ib_send_cm_rep(cm_id, ¶m);
++ if (ret)
++ print_err("send_cm_rep returned %d\n", ret);
++
++ IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++ return ibp_send(client->ep, msg, len);
++}
++
++int ibp_cmd_send_cm_rtu(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++ struct ibp_send_cm_rtu_cmd *cmd;
++ struct ibp_response_msg *msg;
++ struct ib_cm_id *cm_id;
++ void *private_data = NULL;
++ size_t len;
++ int ret;
++
++ print_trace("in\n");
++
++ cmd = (struct ibp_send_cm_rtu_cmd *) hdr;
++ cm_id = (struct ib_cm_id *) cmd->ibp_cm_id;
++ msg = (struct ibp_response_msg *) client->tx_buf;
++ len = sizeof(*msg);
++
++ if (cmd->private_data_len)
++ private_data = cmd->private_data;
++
++ ret = ib_send_cm_rtu(cm_id, private_data, cmd->private_data_len);
++ if (ret)
++ print_err("send_cm_rtu returned %d\n", ret);
++
++ IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++ return ibp_send(client->ep, msg, len);
++}
++
++int ibp_cmd_send_cm_dreq(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++ struct ibp_response_msg *msg;
++ struct ibp_send_cm_dreq_cmd *cmd;
++ struct ib_cm_id *cm_id;
++ void *private_data = NULL;
++ size_t len;
++ int ret;
++
++ print_trace("in\n");
++
++ cmd = (struct ibp_send_cm_dreq_cmd *) hdr;
++ cm_id = (struct ib_cm_id *) cmd->ibp_cm_id;
++ msg = (struct ibp_response_msg *) client->tx_buf;
++ len = sizeof(*msg);
++
++ if (cmd->private_data_len)
++ private_data = cmd->private_data;
++
++ ret = ib_send_cm_dreq(cm_id, private_data, cmd->private_data_len);
++ if (ret)
++ print_dbg("send_cm_dreq returned %d\n", ret);
++
++ IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++ return ibp_send(client->ep, msg, len);
++}
++
++int ibp_cmd_send_cm_drep(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++ struct ibp_response_msg *msg;
++ struct ibp_send_cm_drep_cmd *cmd;
++ struct ib_cm_id *cm_id;
++ void *private_data = NULL;
++ size_t len;
++ int ret;
++
++ print_trace("in\n");
++
++ cmd = (struct ibp_send_cm_drep_cmd *) hdr;
++ cm_id = (struct ib_cm_id *) cmd->ibp_cm_id;
++ msg = (struct ibp_response_msg *) client->tx_buf;
++ len = sizeof(*msg);
++
++ if (cmd->private_data_len)
++ private_data = cmd->private_data;
++
++ ret = ib_send_cm_drep(cm_id, private_data, cmd->private_data_len);
++ if (ret)
++ print_dbg("send_cm_drep returned %d\n", ret);
++
++ IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++ return ibp_send(client->ep, msg, len);
++}
++
++int ibp_cmd_send_cm_rej(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++ struct ibp_response_msg *msg;
++ struct ibp_send_cm_rej_cmd *cmd;
++ struct ib_cm_id *cm_id;
++ void *ari;
++ void *private_data = NULL;
++ size_t len;
++ int ret;
++
++ print_trace("in\n");
++
++ cmd = (struct ibp_send_cm_rej_cmd *) hdr;
++ cm_id = (struct ib_cm_id *) cmd->ibp_cm_id;
++ msg = (struct ibp_response_msg *) client->tx_buf;
++ len = sizeof(*msg);
++
++ if (cmd->private_data_len)
++ private_data = cmd->data;
++
++ ari = &(cmd->data[cmd->private_data_len]);
++
++ ret = ib_send_cm_rej(cm_id, cmd->reason, ari, cmd->ari_length,
++ private_data, cmd->private_data_len);
++ if (ret)
++ print_err("send_cm_rej returned %d\n", ret);
++
++ IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++ return ibp_send(client->ep, msg, len);
++}
++
++int ibp_cmd_send_cm_mra(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++ struct ibp_response_msg *msg;
++ struct ibp_send_cm_mra_cmd *cmd;
++ struct ib_cm_id *cm_id;
++ void *private_data = NULL;
++ size_t len;
++ int ret;
++
++ print_trace("in\n");
++
++ cmd = (struct ibp_send_cm_mra_cmd *) hdr;
++ cm_id = (struct ib_cm_id *) cmd->ibp_cm_id;
++ msg = (struct ibp_response_msg *) client->tx_buf;
++ len = sizeof(*msg);
++
++ if (cmd->private_data_len)
++ private_data = cmd->private_data;
++
++ ret = ib_send_cm_mra(cm_id, cmd->service_timeout,
++ private_data, cmd->private_data_len);
++ if (ret)
++ print_err("send_cm_mra returned %d\n", ret);
++
++ IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++ return ibp_send(client->ep, msg, len);
++}
++
++int ibp_cmd_send_cm_lap(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++ struct ibp_response_msg *msg;
++ struct ibp_send_cm_lap_cmd *cmd;
++ struct ib_cm_id *cm_id;
++ struct ib_sa_path_rec alt_path;
++ void *private_data = NULL;
++ size_t len;
++ int ret;
++
++ print_trace("in\n");
++
++ cmd = (struct ibp_send_cm_lap_cmd *) hdr;
++ cm_id = (struct ib_cm_id *) cmd->ibp_cm_id;
++ msg = (struct ibp_response_msg *) client->tx_buf;
++ len = sizeof(*msg);
++
++ if (cmd->private_data_len)
++ private_data = cmd->private_data;
++
++ ibp_copy_sa_path_rec(&cmd->alternate_path, &alt_path);
++
++ ret = ib_send_cm_lap(cm_id, &alt_path,
++ private_data, cmd->private_data_len);
++ if (ret)
++ print_err("send_cm_lap returned %d\n", ret);
++
++ IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++ return ibp_send(client->ep, msg, len);
++}
++
++int ibp_cmd_send_cm_apr(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++ struct ibp_response_msg *msg;
++ struct ibp_send_cm_apr_cmd *cmd;
++ struct ib_cm_id *cm_id;
++ void *info = NULL;
++ void *private_data = NULL;
++ size_t len;
++ int ret;
++
++ print_trace("in\n");
++
++ cmd = (struct ibp_send_cm_apr_cmd *) hdr;
++ cm_id = (struct ib_cm_id *) cmd->ibp_cm_id;
++ msg = (struct ibp_response_msg *) client->tx_buf;
++ len = sizeof(*msg);
++
++ if (cmd->private_data_len)
++ private_data = cmd->data;
++ if (cmd->info_length)
++ info = &(cmd->data[cmd->private_data_len]);
++
++ ret = ib_send_cm_apr(cm_id, cmd->status, info, cmd->info_length,
++ private_data, cmd->private_data_len);
++ if (ret)
++ print_err("send_cm_apr returned %d\n", ret);
++
++ IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++ return ibp_send(client->ep, msg, len);
++}
++
++int
++ibp_cmd_send_cm_sidr_req(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++ struct ibp_response_msg *msg;
++ struct ibp_send_cm_sidr_req_cmd *cmd;
++ struct ib_cm_id *cm_id;
++ struct ib_cm_sidr_req_param param = {0};
++ struct ib_sa_path_rec path;
++ size_t len;
++ int ret;
++
++ print_trace("in\n");
++
++ cmd = (struct ibp_send_cm_sidr_req_cmd *) hdr;
++ cm_id = (struct ib_cm_id *) cmd->ibp_cm_id;
++ msg = (struct ibp_response_msg *) client->tx_buf;
++ len = sizeof(*msg);
++
++ param.path = &path;
++ ibp_copy_sa_path_rec(&cmd->path, &path);
++
++ param.service_id = cmd->service_id;
++ param.timeout_ms = cmd->timeout_ms;
++ param.max_cm_retries = cmd->max_cm_retries;
++ param.private_data_len = cmd->private_data_len;
++
++ if (cmd->private_data_len)
++ param.private_data = cmd->private_data;
++
++ ret = ib_send_cm_sidr_req(cm_id, ¶m);
++ if (ret)
++ print_err("send_cm_sidr_req returned %d\n", ret);
++
++ IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++ return ibp_send(client->ep, msg, len);
++}
++
++int
++ibp_cmd_send_cm_sidr_rep(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++ struct ibp_response_msg *msg;
++ struct ibp_send_cm_sidr_rep_cmd *cmd;
++ struct ib_cm_sidr_rep_param param = {0};
++ struct ib_cm_id *cm_id;
++ size_t len;
++ int ret;
++
++ print_trace("in\n");
++
++ cmd = (struct ibp_send_cm_sidr_rep_cmd *) hdr;
++ cm_id = (struct ib_cm_id *) cmd->ibp_cm_id;
++ msg = (struct ibp_response_msg *) client->tx_buf;
++ len = sizeof(*msg);
++
++ param.qp_num = cmd->qp_num;
++ param.qkey = cmd->qkey;
++ param.status = cmd->status;
++ param.info_length = cmd->info_length;
++ param.private_data_len = cmd->private_data_len;
++
++ if (cmd->private_data_len)
++ param.private_data = cmd->data;
++ if (cmd->info_length)
++ param.info = &(cmd->data[cmd->private_data_len]);
++
++ ret = ib_send_cm_sidr_rep(cm_id, ¶m);
++ if (ret)
++ print_err("send_cm_sidr_rep returned %d\n", ret);
++
++ IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++ return ibp_send(client->ep, msg, len);
++}
++
++int ibp_cmd_cm_notify(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++ struct ibp_response_msg *msg;
++ struct ibp_cm_notify_cmd *cmd;
++ struct ib_cm_id *cm_id;
++ size_t len;
++ int ret;
++
++ print_trace("in\n");
++
++ cmd = (struct ibp_cm_notify_cmd *) hdr;
++ cm_id = (struct ib_cm_id *) cmd->ibp_cm_id;
++ msg = (struct ibp_response_msg *) client->tx_buf;
++ len = sizeof(*msg);
++
++ ret = ib_cm_notify(cm_id, cmd->event);
++ if (ret)
++ print_err("cm_notify returned %d\n", ret);
++
++ IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++ return ibp_send(client->ep, msg, len);
++}
++
++int
++ibp_cmd_cm_init_qp_attr(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++ struct ibp_response_msg *msg;
++ struct ibp_cm_init_qp_attr_cmd *cmd;
++ struct ibp_cm_init_qp_attr_resp *resp;
++ struct ib_cm_id *cm_id;
++ struct ib_qp_attr qp_attr;
++ int qp_attr_mask;
++ size_t len;
++ int ret;
++
++ print_trace("in\n");
++
++ cmd = (struct ibp_cm_init_qp_attr_cmd *) hdr;
++ cm_id = (struct ib_cm_id *) cmd->ibp_cm_id;
++ msg = (struct ibp_response_msg *) client->tx_buf;
++ len = sizeof(*msg);
++
++ qp_attr.qp_state = cmd->qp_attr_state;
++
++ ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
++ if (ret) {
++ print_err("init_qp_attr returned %d\n", ret);
++ goto send_resp;
++ }
++
++ len += sizeof(*resp);
++ resp = (struct ibp_cm_init_qp_attr_resp *) msg->data;
++
++ resp->qp_attr_mask = qp_attr_mask;
++ resp->qp_access_flags = qp_attr.qp_access_flags;
++ resp->qp_state = qp_attr.qp_state;
++ resp->cur_qp_state = qp_attr.cur_qp_state;
++ resp->path_mtu = qp_attr.path_mtu;
++ resp->path_mig_state = qp_attr.path_mig_state;
++ resp->qkey = qp_attr.qkey;
++ resp->rq_psn = qp_attr.rq_psn;
++ resp->sq_psn = qp_attr.sq_psn;
++ resp->dest_qp_num = qp_attr.dest_qp_num;
++
++ resp->cap_max_send_wr = qp_attr.cap.max_send_wr;
++ resp->cap_max_recv_wr = qp_attr.cap.max_recv_wr;
++ resp->cap_max_send_sge = qp_attr.cap.max_send_sge;
++ resp->cap_max_recv_sge = qp_attr.cap.max_recv_sge;
++ resp->cap_max_inline_data = qp_attr.cap.max_inline_data;
++
++ resp->ah_attr_grh_dgid_subnet_prefix =
++ qp_attr.ah_attr.grh.dgid.global.subnet_prefix;
++ resp->ah_attr_grh_dgid_interface_id =
++ qp_attr.ah_attr.grh.dgid.global.interface_id;
++ resp->ah_attr_grh_flow_label = qp_attr.ah_attr.grh.flow_label;
++ resp->ah_attr_grh_sgid_index = qp_attr.ah_attr.grh.sgid_index;
++ resp->ah_attr_grh_hop_limit = qp_attr.ah_attr.grh.hop_limit;
++ resp->ah_attr_grh_traffic_class = qp_attr.ah_attr.grh.traffic_class;
++ resp->ah_attr_dlid = qp_attr.ah_attr.dlid;
++ resp->ah_attr_sl = qp_attr.ah_attr.sl;
++ resp->ah_attr_src_path_bits = qp_attr.ah_attr.src_path_bits;
++ resp->ah_attr_static_rate = qp_attr.ah_attr.static_rate;
++ resp->ah_attr_ah_flags = qp_attr.ah_attr.ah_flags;
++ resp->ah_attr_port_num = qp_attr.ah_attr.port_num;
++
++ resp->alt_attr_grh_dgid_subnet_prefix =
++ qp_attr.alt_ah_attr.grh.dgid.global.subnet_prefix;
++ resp->alt_attr_grh_dgid_interface_id =
++ qp_attr.alt_ah_attr.grh.dgid.global.interface_id;
++ resp->alt_attr_grh_flow_label = qp_attr.alt_ah_attr.grh.flow_label;
++ resp->alt_attr_grh_sgid_index = qp_attr.alt_ah_attr.grh.sgid_index;
++ resp->alt_attr_grh_hop_limit = qp_attr.alt_ah_attr.grh.hop_limit;
++ resp->alt_attr_grh_traffic_class
++ = qp_attr.alt_ah_attr.grh.traffic_class;
++ resp->alt_attr_dlid = qp_attr.alt_ah_attr.dlid;
++ resp->alt_attr_sl = qp_attr.alt_ah_attr.sl;
++ resp->alt_attr_src_path_bits = qp_attr.alt_ah_attr.src_path_bits;
++ resp->alt_attr_static_rate = qp_attr.alt_ah_attr.static_rate;
++ resp->alt_attr_ah_flags = qp_attr.alt_ah_attr.ah_flags;
++ resp->alt_attr_port_num = qp_attr.alt_ah_attr.port_num;
++
++ resp->pkey_index = qp_attr.pkey_index;
++ resp->alt_pkey_index = qp_attr.alt_pkey_index;
++ resp->en_sqd_async_notify = qp_attr.en_sqd_async_notify;
++ resp->sq_draining = qp_attr.sq_draining;
++ resp->max_rd_atomic = qp_attr.max_rd_atomic;
++ resp->max_dest_rd_atomic = qp_attr.max_dest_rd_atomic;
++ resp->min_rnr_timer = qp_attr.min_rnr_timer;
++ resp->port_num = qp_attr.port_num;
++ resp->timeout = qp_attr.timeout;
++ resp->retry_cnt = qp_attr.retry_cnt;
++ resp->rnr_retry = qp_attr.rnr_retry;
++ resp->alt_port_num = qp_attr.alt_port_num;
++ resp->alt_timeout = qp_attr.alt_timeout;
++
++send_resp:
++ IBP_INIT_RESP(cm_id, msg, len, IBP_RESPONSE, hdr->request, ret);
++
++ return ibp_send(client->ep, msg, len);
++}
+diff --git a/drivers/infiniband/ibp/cm/common.h b/drivers/infiniband/ibp/cm/common.h
+new file mode 100644
+index 0000000..7c1c074
+--- /dev/null
++++ b/drivers/infiniband/ibp/cm/common.h
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef COMMON_H
++#define COMMON_H
++
++#include <linux/module.h>
++#include <linux/kthread.h>
++#include <linux/types.h>
++#include <linux/slab.h>
++#include <linux/poll.h>
++#include <linux/pci.h>
++#include <linux/net.h>
++#include <rdma/ib_verbs.h>
++#include "compat.h"
++
++#define SCIF_OFED_PORT_3 63 /*reserved for cm proxy */
++
++#ifndef IBP_CM_PORT /* unique scif port for this service */
++#define IBP_CM_PORT SCIF_OFED_PORT_3
++#endif
++
++int ibp_send(scif_epd_t ep, void *buf, size_t len);
++int ibp_recv(scif_epd_t ep, void *buf, size_t len);
++
++#endif /* COMMON_H */
+diff --git a/drivers/infiniband/ibp/cm/ibp-abi.h b/drivers/infiniband/ibp/cm/ibp-abi.h
+new file mode 100644
+index 0000000..383e8cb
+--- /dev/null
++++ b/drivers/infiniband/ibp/cm/ibp-abi.h
+@@ -0,0 +1,94 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef IBP_ABI_H
++#define IBP_ABI_H
++
++#include <linux/types.h>
++#include <rdma/ib_verbs.h>
++#include <rdma/ib_cm.h>
++
++/* Increment this value if any changes break compatibility. */
++#define IBP_CM_ABI_VERSION 1
++
++/* Client to server message enums. */
++enum {
++ IBP_CREATE_CM_ID,
++ IBP_DESTROY_CM_ID,
++ IBP_CM_LISTEN,
++ IBP_CM_NOTIFY,
++ IBP_SEND_CM_REQ,
++ IBP_SEND_CM_REP,
++ IBP_SEND_CM_RTU,
++ IBP_SEND_CM_DREQ,
++ IBP_SEND_CM_DREP,
++ IBP_SEND_CM_REJ,
++ IBP_SEND_CM_MRA,
++ IBP_SEND_CM_LAP,
++ IBP_SEND_CM_APR,
++ IBP_SEND_CM_SIDR_REQ,
++ IBP_SEND_CM_SIDR_REP,
++ IBP_CM_INIT_QP_ATTR,
++};
++
++/* Server to client message enums. */
++enum {
++ IBP_IBP_EVENT,
++ IBP_IBP_RESPONSE,
++};
++
++/*
++ * Make sure that all structs defined in this file are laid out to pack
++ * the same way on different architectures to avoid incompatibility.
++ *
++ * Specifically:
++ * - Do not use pointer types -- pass pointers in a u64 instead.
++ * - Make sure that any structure larger than 4 bytes is padded
++ * to a multiple of 8 bytes; otherwise the structure size may
++ * be different between architectures.
++ */
++
++struct ibp_msg_header { /* present in all messages */
++ u32 opcode;
++ u32 length;
++ u32 status;
++ u32 reserved;
++ u64 request;
++ u64 data[0];
++};
++
++struct ibp_response_msg {
++ struct ibp_msg_header header;
++ u64 data[0];
++};
++
++#endif /* IBP_ABI_H */
+diff --git a/drivers/infiniband/ibp/cm/ibp_exports.h b/drivers/infiniband/ibp/cm/ibp_exports.h
+new file mode 100644
+index 0000000..09c5dfe
+--- /dev/null
++++ b/drivers/infiniband/ibp/cm/ibp_exports.h
+@@ -0,0 +1,50 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef IBP_EXPORTS_H
++#define IBP_EXPORTS_H
++
++#include <rdma/ib_verbs.h>
++
++/*
++ ibp_resolve_ib_device - Return the host ib_device handle
++ @ibdev:Card IB device
++
++ Upper level drivers may require the host ib_device handle associated
++ with the card ib_device. This routine resolves the card ib_device to
++ the cooresponding host ib_device handle. A value of 0 is returned if
++ no match was found.
++*/
++u64 ibp_resolve_ib_device(struct ib_device *ibdev);
++
++
++#endif /* IBP_EXPORTS_H */
+diff --git a/drivers/infiniband/ibp/cm/server.c b/drivers/infiniband/ibp/cm/server.c
+new file mode 100644
+index 0000000..08fe284
+--- /dev/null
++++ b/drivers/infiniband/ibp/cm/server.c
+@@ -0,0 +1,221 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "server.h"
++
++MODULE_AUTHOR("Jerrie Coffman");
++MODULE_AUTHOR("Phil Cayton");
++MODULE_AUTHOR("Jay Sternberg");
++MODULE_LICENSE("Dual BSD/GPL");
++MODULE_DESCRIPTION(DRV_DESC);
++MODULE_VERSION(DRV_VERSION);
++
++MODULE_PARAM(port, port, int, IBP_CM_PORT, "Connection port");
++MODULE_PARAM(backlog, backlog, int, 8, "Connection backlog");
++MODULE_PARAM(timeout, timeout, int, 1000, "Listen/Poll time in milliseconds");
++
++#ifdef IBP_DEBUG
++MODULE_PARAM(debug_level, debug_level, int, 0, "Debug: 0-none, 1-some, 2-all");
++#endif
++
++struct rw_semaphore list_rwsem;
++
++LIST_HEAD(client_list);
++
++static struct task_struct *listen_thread;
++
++static struct ibp_client *ibp_create_client(scif_epd_t ep, uint16_t node)
++{
++ struct ibp_client *client;
++ int ret = -ENOMEM;
++
++ client = kzalloc(sizeof(*client), GFP_KERNEL);
++ if (!client) {
++ print_err("kzalloc failed\n");
++ return ERR_PTR(ret);
++ }
++
++ client->ep = ep;
++
++ client->rx_buf = (void *)__get_free_page(GFP_KERNEL);
++ if (!client->rx_buf) {
++ print_err("__get_free_page rx_buf failed\n");
++ goto err0;
++ }
++
++ client->tx_buf = (void *)__get_free_page(GFP_KERNEL);
++ if (!client->tx_buf) {
++ print_err("__get_free_page tx_buf failed\n");
++ goto err1;
++ }
++
++ client->workqueue = create_singlethread_workqueue(DRV_NAME);
++ if (!client->workqueue) {
++ print_err("create_singlethread_workqueue failed\n");
++ goto err2;
++ }
++
++ down_write(&list_rwsem);
++ list_add(&client->list, &client_list);
++ up_write(&list_rwsem);
++
++ client->ibp_cm_client_thread = kthread_run(ibp_process_recvs,
++ client, DRV_NAME);
++ if (!client->ibp_cm_client_thread) {
++ print_err("create cleint thread failed\n");
++ goto err3;
++ }
++
++ return client;
++err3:
++ down_write(&list_rwsem);
++ list_del(&client->list);
++ up_write(&list_rwsem);
++
++ destroy_workqueue(client->workqueue);
++err2:
++ free_page((uintptr_t)client->tx_buf);
++err1:
++ free_page((uintptr_t)client->rx_buf);
++err0:
++ kfree(client);
++ return ERR_PTR(ret);
++}
++
++static int ibp_cm_listen(void *data)
++{
++ struct ibp_client *client;
++ struct scif_pollepd listen;
++ struct scif_port_id peer;
++ scif_epd_t ep;
++ int ret;
++
++ listen.epd = scif_open();
++ if (!listen.epd) {
++ print_err("scif_open failed\n");
++ ret = -EIO;
++ goto err0;
++ }
++ listen.events = POLLIN;
++
++ ret = scif_bind(listen.epd, port);
++ if (ret < 0) {
++ print_err("scif_bind returned %d\n", ret);
++ goto err1;
++ }
++
++ ret = scif_listen(listen.epd, backlog);
++ if (ret) {
++ print_err("scif_listen returned %d\n", ret);
++ goto err1;
++ }
++
++ while (!kthread_should_stop()) {
++
++ schedule();
++
++ ret = scif_poll(&listen, 1, timeout);
++ if (ret == 0) /* timeout */
++ continue;
++ if (ret < 0) {
++ print_err("scif_poll revents 0x%x\n", listen.revents);
++ continue;
++ }
++
++ ret = scif_accept(listen.epd, &peer, &ep, 0);
++ if (ret) {
++ print_err("scif_accept returned %d\n", ret);
++ continue;
++ }
++
++ print_dbg("accepted node %d port %d\n", peer.node, peer.port);
++
++ client = ibp_create_client(ep, peer.node);
++ if (IS_ERR(client)) {
++ ret = PTR_ERR(client);
++ print_err("ibp_create_client returned %d\n", ret);
++ scif_close(ep);
++ }
++ }
++err1:
++ scif_close(listen.epd);
++err0:
++ return ret;
++}
++
++static int __init ibp_cm_server_init(void)
++{
++ int ret = 0;
++
++ print_info(DRV_SIGNON);
++
++ init_rwsem(&list_rwsem);
++
++ /* Start a thread for inbound connections. */
++ listen_thread = kthread_run(ibp_cm_listen, NULL, DRV_NAME);
++ if (IS_ERR(listen_thread)) {
++ ret = PTR_ERR(listen_thread);
++ print_err("kthread_run returned %d\n", ret);
++ }
++
++ return ret;
++}
++
++static void __exit ibp_cm_server_exit(void)
++{
++ struct ibp_client *client, *next;
++ struct completion done;
++
++ kthread_stop(listen_thread);
++
++ down_write(&list_rwsem);
++ list_for_each_entry_safe(client, next, &client_list, list) {
++ init_completion(&done);
++ client->done = &done;
++
++ /* Close scif ep to unblock the client thread scif_recv */
++ scif_close(client->ep);
++
++ up_write(&list_rwsem);
++
++ /* Wait for client thread to finish */
++ wait_for_completion(&done);
++
++ down_write(&list_rwsem);
++ }
++ up_write(&list_rwsem);
++
++ print_info(DRV_DESC " unloaded\n");
++}
++
++module_init(ibp_cm_server_init);
++module_exit(ibp_cm_server_exit);
+diff --git a/drivers/infiniband/ibp/cm/server.h b/drivers/infiniband/ibp/cm/server.h
+new file mode 100644
+index 0000000..cd71a90
+--- /dev/null
++++ b/drivers/infiniband/ibp/cm/server.h
+@@ -0,0 +1,129 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef SERVER_H
++#define SERVER_H
++
++#include <linux/fs.h>
++#include <linux/cdev.h>
++#include <linux/anon_inodes.h>
++#include <rdma/ib_umem.h>
++#include "ibp-abi.h"
++#include "cm_ibp_abi.h"
++#include "common.h"
++
++#define DRV_ROLE "CM Server"
++#define DRV_NAME "ibp_cm_server"
++#include "compat.h"
++
++#define MAX_MSG_SIZE PAGE_SIZE
++
++extern int timeout;
++extern struct rw_semaphore list_rwsem;
++extern struct list_head client_list;
++extern struct list_head cm_entry_list;
++
++struct ibp_client {
++ struct list_head list;
++ scif_epd_t ep;
++ void *rx_buf;
++ void *tx_buf;
++ struct completion *done;
++ struct workqueue_struct *workqueue;
++ struct task_struct *ibp_cm_client_thread;
++};
++
++struct cm_entry {
++ struct list_head list;
++ struct ib_cm_id *cm_id;
++ struct ibp_client *client;
++};
++
++struct ibp_event_get {
++ __u64 response;
++ __u64 data;
++ __u64 info;
++ __u8 data_len;
++ __u8 info_len;
++ __u8 reserved[6];
++};
++
++struct ibp_event {
++ struct work_struct work;
++ struct ibp_client *client;
++ struct ibp_cm_event event;
++};
++
++#define IBP_INIT_MSG(device, msg, size, op) \
++ do { \
++ (msg)->header.opcode = IBP_##op; \
++ (msg)->header.length = (size); \
++ (msg)->header.status = 0; \
++ (msg)->header.reserved = 0; \
++ (msg)->header.request = 0; \
++ } while (0)
++
++#define IBP_INIT_RESP(device, resp, size, op, req, stat) \
++ do { \
++ (resp)->header.opcode = IBP_##op; \
++ (resp)->header.length = (size); \
++ (resp)->header.status = (stat); \
++ (resp)->header.reserved = 0; \
++ (resp)->header.request = (req); \
++ } while (0)
++
++int ibp_process_recvs(void *p);
++void cleanup_cm_entry_list(void);
++
++int ibp_cmd_create_cm_id(struct ibp_client *client, struct ibp_msg_header *hdr);
++int ibp_cmd_destroy_cm_id(struct ibp_client *client,
++ struct ibp_msg_header *hdr);
++int ibp_cmd_cm_listen(struct ibp_client *client, struct ibp_msg_header *hdr);
++int ibp_cmd_cm_notify(struct ibp_client *client, struct ibp_msg_header *hdr);
++int ibp_cmd_send_cm_req(struct ibp_client *client, struct ibp_msg_header *hdr);
++int ibp_cmd_send_cm_rep(struct ibp_client *client, struct ibp_msg_header *hdr);
++int ibp_cmd_send_cm_rtu(struct ibp_client *client, struct ibp_msg_header *hdr);
++int ibp_cmd_send_cm_dreq(struct ibp_client *client, struct ibp_msg_header *hdr);
++int ibp_cmd_send_cm_drep(struct ibp_client *client, struct ibp_msg_header *hdr);
++int ibp_cmd_send_cm_rej(struct ibp_client *client, struct ibp_msg_header *hdr);
++int ibp_cmd_send_cm_mra(struct ibp_client *client, struct ibp_msg_header *hdr);
++int ibp_cmd_send_cm_lap(struct ibp_client *client, struct ibp_msg_header *hdr);
++int ibp_cmd_send_cm_apr(struct ibp_client *client, struct ibp_msg_header *hdr);
++int ibp_cmd_send_cm_sidr_req(struct ibp_client *client,
++ struct ibp_msg_header *hdr);
++int ibp_cmd_send_cm_sidr_rep(struct ibp_client *client,
++ struct ibp_msg_header *hdr);
++int ibp_cmd_cm_event(struct ibp_client *client, struct ibp_msg_header *hdr);
++int ibp_cmd_cm_init_qp_attr(struct ibp_client *client,
++ struct ibp_msg_header *hdr);
++
++#endif /* SERVER_H */
+diff --git a/drivers/infiniband/ibp/cm/server_msg.c b/drivers/infiniband/ibp/cm/server_msg.c
+new file mode 100644
+index 0000000..bc3f009
+--- /dev/null
++++ b/drivers/infiniband/ibp/cm/server_msg.c
+@@ -0,0 +1,176 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "server.h"
++#include "cm_ibp_abi.h"
++
++int ibp_send(scif_epd_t ep, void *buf, size_t len)
++{
++ int ret;
++
++ while (len) {
++ ret = scif_send(ep, buf, (uint32_t)len, SCIF_SEND_BLOCK);
++ if (ret < 0) {
++ print_dbg("scif_send returned %d\n", ret);
++ return ret;
++ }
++ buf += ret;
++ len -= ret;
++ }
++
++ return 0;
++}
++
++int ibp_recv(scif_epd_t ep, void *buf, size_t len)
++{
++ int ret;
++
++ while (len) {
++ ret = scif_recv(ep, buf, (uint32_t)len, SCIF_RECV_BLOCK);
++ if (ret < 0) {
++ print_dbg("scif_recv returned %d\n", ret);
++ return ret;
++ }
++ buf += ret;
++ len -= ret;
++ }
++
++ return 0;
++}
++
++static int
++ibp_cmd_bad_request(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++ struct ibp_response_msg *msg;
++ size_t len;
++ int status = -EBADRQC;
++
++ print_dbg("opcode 0x%x\n", hdr->opcode);
++
++ msg = (struct ibp_response_msg *) client->tx_buf;
++ len = sizeof(*msg);
++
++ IBP_INIT_RESP(NULL, msg, len, IBP_RESPONSE, hdr->request, status);
++ return ibp_send(client->ep, msg, len);
++}
++
++static void
++ibp_cm_destroy_client(struct ibp_client *client)
++{
++ struct cm_entry *cm, *tmp;
++
++ down_write(&list_rwsem);
++ list_del(&client->list);
++ list_for_each_entry_safe(cm, tmp, &cm_entry_list, list)
++ if (cm->client == client) {
++ ib_destroy_cm_id(cm->cm_id);
++ list_del(&cm->list);
++ kfree(cm);
++ }
++ up_write(&list_rwsem);
++
++ destroy_workqueue(client->workqueue);
++
++ free_page((uintptr_t)client->tx_buf);
++ free_page((uintptr_t)client->rx_buf);
++
++ if (client->done)
++ complete(client->done);
++ else
++ scif_close(client->ep);
++
++ kfree(client);
++}
++
++static int
++(*ibp_msg_table[])(struct ibp_client *c, struct ibp_msg_header *h) = {
++ [IBP_CREATE_CM_ID] = ibp_cmd_create_cm_id,
++ [IBP_DESTROY_CM_ID] = ibp_cmd_destroy_cm_id,
++ [IBP_CM_LISTEN] = ibp_cmd_cm_listen,
++ [IBP_CM_NOTIFY] = ibp_cmd_cm_notify,
++ [IBP_SEND_CM_REQ] = ibp_cmd_send_cm_req,
++ [IBP_SEND_CM_REP] = ibp_cmd_send_cm_rep,
++ [IBP_SEND_CM_RTU] = ibp_cmd_send_cm_rtu,
++ [IBP_SEND_CM_DREQ] = ibp_cmd_send_cm_dreq,
++ [IBP_SEND_CM_DREP] = ibp_cmd_send_cm_drep,
++ [IBP_SEND_CM_REJ] = ibp_cmd_send_cm_rej,
++ [IBP_SEND_CM_MRA] = ibp_cmd_send_cm_mra,
++ [IBP_SEND_CM_LAP] = ibp_cmd_send_cm_lap,
++ [IBP_SEND_CM_APR] = ibp_cmd_send_cm_apr,
++ [IBP_SEND_CM_SIDR_REQ] = ibp_cmd_send_cm_sidr_req,
++ [IBP_SEND_CM_SIDR_REP] = ibp_cmd_send_cm_sidr_rep,
++ [IBP_CM_INIT_QP_ATTR] = ibp_cmd_cm_init_qp_attr,
++};
++
++int ibp_process_recvs(void *p)
++{
++ struct ibp_client *client;
++ struct ibp_msg_header *hdr;
++ int ret;
++
++ client = (struct ibp_client *) p;
++ hdr = (struct ibp_msg_header *) client->rx_buf;
++
++ for (;;) {
++ ret = ibp_recv(client->ep, hdr, sizeof(*hdr));
++ if (ret)
++ break;
++
++ if (hdr->length > MAX_MSG_SIZE) {
++ print_err("message too large, len %u max %lu\n",
++ hdr->length, MAX_MSG_SIZE);
++ ret = -EMSGSIZE;
++ break;
++ }
++
++ if (hdr->length > sizeof(*hdr)) {
++ ret = ibp_recv(client->ep, hdr->data,
++ hdr->length - sizeof(*hdr));
++ if (ret)
++ break;
++ }
++
++ if ((hdr->opcode >= ARRAY_SIZE(ibp_msg_table)) ||
++ !ibp_msg_table[hdr->opcode]) {
++ ibp_cmd_bad_request(client, hdr);
++ continue;
++ }
++
++ ret = ibp_msg_table[hdr->opcode](client, hdr);
++ if (ret)
++ break;
++ }
++
++ ibp_cm_destroy_client(client);
++
++ return ret;
++}
+diff --git a/drivers/infiniband/ibp/compat.h b/drivers/infiniband/ibp/compat.h
+new file mode 100644
+index 0000000..7eb128c
+--- /dev/null
++++ b/drivers/infiniband/ibp/compat.h
+@@ -0,0 +1,101 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef COMPAT_H
++#define COMPAT_H
++
++/* scif compatibility hacks
++ * Vanilla kernel scif is always "upstream" ie has new function names.
++ * Out-of-tree scif.h from MPSS may have older names (MPSS3.x) or
++ * pretty much match the "upstream" version
++ */
++
++#ifdef HAVE_EXTERNAL_SCIF
++ #include <scif.h>
++ #ifndef HAVE_UPSTREAM_SCIF
++ #define scif_get_node_ids scif_get_nodeIDs
++ #define scif_port_id scif_portID
++ #endif
++#else
++ #include <linux/scif.h>
++#endif
++
++
++#define DRV_DESC "CCL Direct " DRV_ROLE
++#define DRV_VERSION "1.1"
++#define DRV_PFX DRV_NAME ": "
++
++#define DRV_COPYRIGHT "Copyright (c) 2011-2016 Intel Corporation"
++#define DRV_SIGNON DRV_DESC " v" DRV_VERSION "\n" DRV_COPYRIGHT "\n"
++
++#define MODULE_PARAM(name, var, type, value, desc) \
++ type var = value; \
++ module_param_named(name, var, type, 0644); \
++ MODULE_PARM_DESC(name, desc)
++
++#ifdef IBP_DEBUG
++extern int debug_level;
++#endif
++
++enum {
++ IBP_DEBUG_NONE,
++ IBP_DEBUG_TARGETED,
++ IBP_DEBUG_VERBOSE,
++};
++
++#define _PRINTK(l, f, arg...) \
++ printk(l DRV_PFX "%s " f, __func__, ##arg)
++
++#ifdef IBP_DEBUG
++#define PRINTK(dbg, l, f, arg...) \
++ do { \
++ if (debug_level >= dbg) \
++ printk(l DRV_PFX "%s " f, \
++ __func__, ##arg); \
++ } while (0)
++#else
++#define PRINTK(dbg, l, f, arg...) do { } while (0)
++#endif
++
++#define print_dbg(f, arg...) PRINTK(IBP_DEBUG_TARGETED, KERN_DEBUG, f, ##arg)
++#define print_err(f, arg...) _PRINTK(KERN_ERR, f, ##arg)
++#define print_info(f, arg...) pr_info(f, ##arg)
++
++#ifdef FORCED_FUNCTION_TRACING
++#define print_trace(f, arg...) PRINTK(IBP_DEBUG_VERBOSE, KERN_ERR, f, ##arg)
++#else
++#define print_trace(f, arg...) do { } while (0)
++#endif
++
++#define IS_NULL_OR_ERR(p) (!(p) || IS_ERR_VALUE((unsigned long)p))
++
++#endif /* COMPAT_H */
+diff --git a/drivers/infiniband/ibp/drv/Makefile b/drivers/infiniband/ibp/drv/Makefile
+new file mode 100644
+index 0000000..8c806b5
+--- /dev/null
++++ b/drivers/infiniband/ibp/drv/Makefile
+@@ -0,0 +1,10 @@
++obj-$(CONFIG_IBP_CLIENT) += ibp_client.o
++obj-$(CONFIG_IBP_SERVER) += ibp_server.o
++obj-$(CONFIG_IBP_MLX4) += hw/mlx4/
++obj-$(CONFIG_IBP_MLX5) += hw/mlx5/
++
++ibp_client-y := client.o \
++ client_msg.o
++
++ibp_server-y := server.o \
++ server_msg.o
+diff --git a/drivers/infiniband/ibp/drv/client.c b/drivers/infiniband/ibp/drv/client.c
+new file mode 100644
+index 0000000..b129950
+--- /dev/null
++++ b/drivers/infiniband/ibp/drv/client.c
+@@ -0,0 +1,502 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "client.h"
++
++MODULE_AUTHOR("Jerrie Coffman");
++MODULE_AUTHOR("Phil Cayton");
++MODULE_AUTHOR("Jay Sternberg");
++MODULE_LICENSE("Dual BSD/GPL");
++MODULE_DESCRIPTION(DRV_DESC);
++MODULE_VERSION(DRV_VERSION);
++
++MODULE_PARAM(port, port, int, IBP_PORT, "Connection port");
++MODULE_PARAM(timeout, timeout, int, 1000, "Connect/Poll timeout (in ms)");
++
++#ifdef IBP_DEBUG
++MODULE_PARAM(debug_level, debug_level, int, 0, "Debug: 0-none, 1-some, 2-all");
++#endif
++
++static struct kref ref;
++static DECLARE_COMPLETION(done);
++
++scif_epd_t ep;
++struct workqueue_struct *workqueue;
++struct rw_semaphore list_rwsem;
++
++static struct task_struct *ibp_client_thread;
++
++static LIST_HEAD(device_list);
++static LIST_HEAD(driver_list);
++
++static struct ibp_device *__ibp_find_device_by_name(const char *name)
++{
++ struct ibp_device *device;
++
++ list_for_each_entry(device, &device_list, list)
++ if (!strncmp(device->name, name, IB_DEVICE_NAME_MAX))
++ return device;
++
++ return ERR_PTR(-ENODEV);
++}
++
++static struct ibp_device *__ibp_find_device(u32 vendor_id, u32 device_id,
++ struct ibp_device *from)
++{
++ from = list_prepare_entry(from, &device_list, list);
++
++ list_for_each_entry_continue(from, &device_list, list)
++ if ((from->vendor_id == vendor_id) &&
++ (from->device_id == device_id))
++ return from;
++
++ return ERR_PTR(-ENODEV);
++}
++
++static struct ibp_driver *__ibp_find_driver(u32 vendor_id, u32 device_id)
++{
++ const struct ibp_id_table *tbl;
++ struct ibp_driver *driver;
++
++ list_for_each_entry(driver, &driver_list, list)
++ for (tbl = driver->id_table; tbl->vendor_id; tbl++)
++ if ((tbl->vendor_id == vendor_id) &&
++ (tbl->device_id == device_id))
++ return driver;
++
++ return ERR_PTR(-ENODEV);
++}
++
++static void __ibp_register_device(struct ibp_device *device)
++{
++ struct ibp_driver *driver;
++
++ list_add_tail(&device->list, &device_list);
++
++ driver = __ibp_find_driver(device->vendor_id, device->device_id);
++ if (!IS_ERR(driver))
++ driver->add(device);
++}
++
++static void __ibp_unregister_device(struct ibp_device *device)
++{
++ struct ibp_driver *driver;
++
++ driver = __ibp_find_driver(device->vendor_id, device->device_id);
++ if (!IS_ERR(driver))
++ driver->remove(device);
++
++ list_del(&device->list);
++}
++
++static ssize_t ibp_dev_show_vendor(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ struct ibp_device *device;
++
++ device = dev_get_drvdata(dev);
++ if (!device)
++ return 0;
++
++ return snprintf(buf, PAGE_SIZE, "0x%04x\n", device->vendor_id);
++}
++
++static ssize_t ibp_dev_show_device(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ struct ibp_device *device;
++
++ device = dev_get_drvdata(dev);
++ if (!device)
++ return 0;
++
++ return snprintf(buf, PAGE_SIZE, "0x%04x\n", device->device_id);
++}
++
++static DEVICE_ATTR(vendor, S_IRUGO, ibp_dev_show_vendor, NULL);
++static DEVICE_ATTR(device, S_IRUGO, ibp_dev_show_device, NULL);
++
++static struct device_attribute *ibp_dev_attrs[] = {
++ &dev_attr_vendor,
++ &dev_attr_device
++};
++
++static void ibp_done(struct kref *unused)
++{
++ complete(&done);
++}
++
++static void ibp_release_linux_dev(struct device *dev)
++{
++ kfree(dev);
++ kref_put(&ref, ibp_done);
++}
++
++static struct device *ibp_create_linux_dev(struct ibp_device *device)
++{
++ struct device *dev;
++ int i, ret;
++
++ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
++ if (!dev) {
++ print_err("kzalloc failed\n");
++ return ERR_PTR(-ENOMEM);
++ }
++
++ dev->init_name = device->name;
++ dev->release = ibp_release_linux_dev;
++
++ /* Always use reference counts to free dev. */
++ kref_get(&ref);
++ ret = device_register(dev);
++ if (ret) {
++ print_err("device_register returned %d\n", ret);
++ put_device(dev);
++ return ERR_PTR(ret);
++ }
++
++ dev_set_drvdata(dev, device);
++
++ for (i = 0; i < ARRAY_SIZE(ibp_dev_attrs); i++) {
++ ret = device_create_file(dev, ibp_dev_attrs[i]);
++ if (ret) {
++ print_err("device_create_file returned %d\n", ret);
++ while (--i >= 0)
++ device_remove_file(dev, ibp_dev_attrs[i]);
++ device_unregister(dev);
++ return ERR_PTR(ret);
++ }
++ }
++
++ return dev;
++}
++
++static void ibp_destroy_linux_dev(struct device *dev)
++{
++ int i;
++
++ for (i = 0; i < ARRAY_SIZE(ibp_dev_attrs); i++)
++ device_remove_file(dev, ibp_dev_attrs[i]);
++
++ device_unregister(dev);
++}
++
++static struct ibp_device *ibp_create_device(struct ibp_add_device *msg)
++{
++ struct ibp_device *device;
++ struct device *linux_dev;
++
++ device = kzalloc(sizeof(*device), GFP_KERNEL);
++ if (!device) {
++ print_err("kzalloc failed\n");
++ return ERR_PTR(-ENOMEM);
++ }
++
++ strlcat(device->name, msg->name, sizeof(device)->name);
++ device->vendor_id = msg->vendor_id;
++ device->device_id = msg->device_id;
++ device->ib_device = msg->ib_device;
++ device->device = msg->device;
++ device->node_guid = msg->node_guid;
++ device->uverbs_cmd_mask = msg->uverbs_cmd_mask;
++ device->uverbs_abi_ver = msg->uverbs_abi_ver;
++ device->ibp_abi_ver = msg->ibp_abi_ver;
++ device->num_comp_vectors = msg->num_comp_vectors;
++ device->phys_port_cnt = msg->phys_port_cnt;
++
++ linux_dev = ibp_create_linux_dev(device);
++ if (IS_ERR(linux_dev)) {
++ kfree(device);
++ device = ERR_CAST(linux_dev);
++ } else
++ device->linux_dev = linux_dev;
++
++ return device;
++}
++
++static void ibp_destroy_device(struct ibp_device *device)
++{
++ ibp_destroy_linux_dev(device->linux_dev);
++ kfree(device);
++}
++
++void ibp_add_one(struct ibp_add_device *msg)
++{
++ struct ibp_device *device;
++
++ if (msg->ibp_abi_ver > IBP_ABI_VERSION) {
++ print_err("message IBP ABI version %d "
++ "exceeds %d; ignoring %s\n",
++ msg->ibp_abi_ver, IBP_ABI_VERSION, msg->name);
++ return;
++ }
++
++ device = ibp_create_device(msg);
++ if (!IS_ERR(device)) {
++ down_write(&list_rwsem);
++ __ibp_register_device(device);
++ up_write(&list_rwsem);
++ }
++}
++
++void ibp_remove_one(u64 dev)
++{
++ struct ibp_device *device, *next;
++
++ down_write(&list_rwsem);
++ list_for_each_entry_safe(device, next, &device_list, list)
++ if (device->device == dev) {
++ __ibp_unregister_device(device);
++ ibp_destroy_device(device);
++ }
++ up_write(&list_rwsem);
++}
++
++static int __ibp_check_for_duplicates(const struct ibp_driver *driver)
++{
++ const struct ibp_id_table *tbl;
++ struct ibp_driver *drv;
++
++ for (tbl = driver->id_table; tbl->vendor_id; tbl++) {
++ drv = __ibp_find_driver(tbl->vendor_id, tbl->device_id);
++ if (!IS_ERR(drv))
++ return -EEXIST;
++ }
++
++ return 0;
++}
++
++/**
++ * ibp_resolve_ib_device - Return the host ib_device handle
++ * @ibdev:Card IB device
++ *
++ * Upper level proxy drivers may require the host ib_device handle
++ * associated with the card ib_device. This routine resolves the
++ * card ib_device to the cooresponding host ib_device handle. A
++ * value of 0 is returned if no match was found.
++ */
++u64 ibp_resolve_ib_device(struct ib_device *ibdev)
++{
++ struct ibp_device *device;
++ struct ibp_driver *driver;
++ u64 ib_device = 0;
++
++ down_read(&list_rwsem);
++
++ device = __ibp_find_device_by_name(ibdev->name);
++ if (IS_ERR(device))
++ goto err;
++
++ driver = __ibp_find_driver(device->vendor_id, device->device_id);
++ if (IS_ERR(driver))
++ goto err;
++
++ ib_device = driver->resolve(ibdev);
++err:
++ up_read(&list_rwsem);
++
++ return ib_device;
++}
++EXPORT_SYMBOL(ibp_resolve_ib_device);
++
++/**
++ * ibp_register_driver - Register this driver
++ * @driver:Driver to register
++ *
++ * Lower level drivers use ibp_register_driver to register for callbacks
++ * on IB device addition and removal. Only one low level driver registration
++ * is allowed for a each vendor/device id pair. When an IB device is added,
++ * it is compared with each registered driver vendor and device id. The add
++ * callback routine for the matching driver will be called.
++ */
++int ibp_register_driver(struct ibp_driver *driver)
++{
++ const struct ibp_id_table *tbl;
++ struct ibp_device *device;
++ int ret;
++
++ if (!driver->add || !driver->remove || !driver->resolve) {
++ print_err("missing add, remove, or resolve callback\n");
++ return -EINVAL;
++ }
++
++ down_write(&list_rwsem);
++
++ ret = __ibp_check_for_duplicates(driver);
++ if (ret) {
++ print_err("__ibp_check_for_duplicates failed\n");
++ goto err;
++ }
++ list_add_tail(&driver->list, &driver_list);
++ for (tbl = driver->id_table; tbl->vendor_id; tbl++) {
++ for (device = NULL;
++ !IS_ERR((device = __ibp_find_device(tbl->vendor_id,
++ tbl->device_id,
++ device)));
++ driver->add(device))
++ continue;
++ }
++err:
++ up_write(&list_rwsem);
++
++ return ret;
++}
++EXPORT_SYMBOL(ibp_register_driver);
++
++/**
++ * ibp_unregister_driver - Unregister this driver
++ * @client:Driver to unregister
++ *
++ * Lower level drivers use ibp_unregister_driver() to remove their
++ * registration. When ibp_unregister_driver() is called, the driver
++ * will receive a remove callback for each IB device with matcing vendor
++ * and device ids.
++ */
++void ibp_unregister_driver(struct ibp_driver *driver)
++{
++ const struct ibp_id_table *tbl;
++ struct ibp_device *device;
++
++ down_write(&list_rwsem);
++ for (tbl = driver->id_table; tbl->vendor_id; tbl++) {
++ for (device = NULL;
++ !IS_ERR((device = __ibp_find_device(tbl->vendor_id,
++ tbl->device_id,
++ device)));
++ driver->remove(device))
++ continue;
++ }
++ list_del(&driver->list);
++ up_write(&list_rwsem);
++}
++EXPORT_SYMBOL(ibp_unregister_driver);
++
++static int ibp_connect(void *unused)
++{
++ struct scif_port_id dst;
++ unsigned long delay;
++ int ret = 0;
++
++ dst.node = IBP_HOST_NODE;
++ dst.port = port;
++
++ delay = msecs_to_jiffies(timeout);
++
++ while (!kthread_should_stop()) {
++ ep = scif_open();
++ if (!ep) {
++ print_err("scif_open failed\n");
++ schedule_timeout_interruptible(delay);
++ continue;
++ }
++
++ while (scif_connect(ep, &dst) != 0) {
++ schedule_timeout_interruptible(delay);
++ if (kthread_should_stop())
++ break;
++ }
++
++ if (!kthread_should_stop()) {
++ print_dbg("connected node %d port %d\n",
++ dst.node, dst.port);
++
++ ibp_process_recvs();
++ }
++
++ scif_close(ep);
++ }
++
++ return ret;
++}
++
++static int __init ibp_client_init(void)
++{
++ int nid, ret;
++
++ print_info(DRV_SIGNON);
++
++ kref_init(&ref);
++
++ init_rwsem(&list_rwsem);
++
++ /* Calculate num_pfn */
++ num_pfn = 0;
++ for_each_node_state(nid, N_MEMORY)
++ num_pfn += node_spanned_pages(nid);
++
++ workqueue = create_singlethread_workqueue(DRV_NAME);
++ if (!workqueue) {
++ print_err("create_singlethread_workqueue failed\n");
++ ret = -ENOMEM;
++ goto err0;
++ }
++
++ /* Start a thread to establish a connection. */
++ ibp_client_thread = kthread_run(ibp_connect, NULL, DRV_NAME);
++ if (IS_ERR(ibp_client_thread)) {
++ ret = PTR_ERR(ibp_client_thread);
++ print_err("kthread_run returned %d\n", ret);
++ goto err1;
++ }
++
++ return 0;
++
++err1:
++ destroy_workqueue(workqueue);
++err0:
++ return ret;
++}
++
++static void __exit ibp_client_exit(void)
++{
++ struct ibp_device *device, *next;
++
++ kthread_stop(ibp_client_thread);
++
++ flush_workqueue(workqueue);
++ destroy_workqueue(workqueue);
++
++ down_write(&list_rwsem);
++ list_for_each_entry_safe(device, next, &device_list, list) {
++ __ibp_unregister_device(device);
++ ibp_destroy_device(device);
++ }
++ up_write(&list_rwsem);
++
++ kref_put(&ref, ibp_done);
++ wait_for_completion(&done);
++
++ print_info(DRV_DESC " unloaded\n");
++}
++
++module_init(ibp_client_init);
++module_exit(ibp_client_exit);
+diff --git a/drivers/infiniband/ibp/drv/client.h b/drivers/infiniband/ibp/drv/client.h
+new file mode 100644
+index 0000000..3cd1d26
+--- /dev/null
++++ b/drivers/infiniband/ibp/drv/client.h
+@@ -0,0 +1,126 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef CLIENT_H
++#define CLIENT_H
++
++#include "common.h"
++#include "ibp.h"
++
++#define DRV_ROLE "Client"
++#define DRV_NAME "ibp_client"
++#include "compat.h"
++
++#ifndef IBP_HOST_NODE
++#define IBP_HOST_NODE 0
++#endif
++
++extern int timeout;
++extern scif_epd_t ep;
++extern struct workqueue_struct *workqueue;
++extern unsigned long num_pfn;
++
++int ibp_process_recvs(void);
++
++void ibp_add_one(struct ibp_add_device *msg);
++void ibp_remove_one(u64 handle);
++
++struct ibp_request {
++ struct completion done;
++ void *data;
++ size_t length;
++ int status;
++};
++
++struct ibp_mmap {
++ struct ibp_device *device;
++ struct scif_range *range;
++ u64 mmap;
++};
++
++struct ibp_reg {
++ struct ibp_device *device;
++ struct ibp_rb rb;
++ off_t scif_addr;
++ size_t length;
++};
++
++struct ibp_add_work {
++ struct work_struct work;
++ struct ibp_add_device msg;
++};
++
++struct ibp_destroy_ah_work {
++ struct work_struct work;
++ u64 ah;
++ struct ibp_device *device;
++};
++
++struct ibp_remove_work {
++ struct work_struct work;
++ u64 device;
++};
++
++struct ibp_queued_response_work {
++ struct work_struct work;
++ struct completion *done;
++};
++
++struct ibp_async_event_work {
++ struct work_struct work;
++ struct ibp_async_event event;
++};
++
++struct ibp_cq_comp_work {
++ struct work_struct work;
++ struct ib_cq *ibcq;
++};
++
++#define IBP_INIT_REQ(request, buf, size) \
++ do { \
++ (request)->data = (buf); \
++ (request)->length = (size); \
++ (request)->status = 0; \
++ init_completion(&(request)->done); \
++ } while (0)
++
++#define IBP_INIT_CMD(handle, cmd, size, op, req) \
++ do { \
++ (cmd)->header.opcode = IBP_VERB_##op; \
++ (cmd)->header.length = (size); \
++ (cmd)->header.status = 0; \
++ (cmd)->header.reserved = 0; \
++ (cmd)->header.device = (handle)->device; \
++ (cmd)->header.request = (uintptr_t)(req); \
++ } while (0)
++
++#endif /* CLIENT_H */
+diff --git a/drivers/infiniband/ibp/drv/client_msg.c b/drivers/infiniband/ibp/drv/client_msg.c
+new file mode 100644
+index 0000000..962d9aa
+--- /dev/null
++++ b/drivers/infiniband/ibp/drv/client_msg.c
+@@ -0,0 +1,1892 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "client.h"
++
++unsigned long num_pfn;
++
++static DEFINE_MUTEX(ibp_send_mutex);
++
++static int ibp_send(scif_epd_t ep, void *buf, size_t len)
++{
++ int ret;
++
++ /*
++ * Because user-context threads can be signaled, a mutex
++ * and ERESTARTSYS check are required to complete atomically.
++ */
++ mutex_lock(&ibp_send_mutex);
++ while (len) {
++ ret = scif_send(ep, buf, (uint32_t)len, SCIF_SEND_BLOCK);
++ if (ret < 0) {
++ print_dbg("scif_send returned %d\n", ret);
++ if (ret == -ERESTARTSYS) {
++ ret = 0;
++ } else {
++ mutex_unlock(&ibp_send_mutex);
++ return ret;
++ }
++ }
++ buf += ret;
++ len -= ret;
++ }
++ mutex_unlock(&ibp_send_mutex);
++
++ return 0;
++}
++
++static int ibp_recv(scif_epd_t ep, void *buf, size_t len)
++{
++ struct scif_pollepd pollep;
++ int ret;
++
++ pollep.epd = ep;
++ pollep.events = POLLIN;
++
++ while (len) {
++ schedule();
++ if (kthread_should_stop())
++ return -EINTR;
++
++ ret = scif_poll(&pollep, 1, timeout);
++ if (ret == 0) /* timeout */
++ continue;
++ if (ret < 0) {
++ print_dbg("scif_poll revents 0x%x returned %d\n",
++ pollep.revents, ret);
++ return ret;
++ }
++
++ ret = scif_recv(ep, buf, (uint32_t)len, 0);
++ if (ret < 0) {
++ print_dbg("scif_recv returned %d\n", ret);
++ return ret;
++ }
++
++ buf += ret;
++ len -= ret;
++ }
++
++ return 0;
++}
++
++static int ibp_recv_bitbucket(scif_epd_t ep, size_t len)
++{
++ u8 bitbucket[64];
++ size_t bytes;
++ int ret = 0;
++
++ while (len) {
++ bytes = min(len, sizeof(bitbucket));
++ ret = ibp_recv(ep, &bitbucket, bytes);
++ if (ret)
++ break;
++
++ len -= bytes;
++ }
++
++ return ret;
++}
++
++int ibp_cmd_alloc_ucontext(struct ibp_device *device,
++ struct ib_device *ibdev,
++ u64 *ucontext,
++ struct ibp_alloc_ucontext_cmd *cmd,
++ size_t cmd_size,
++ struct ibp_alloc_ucontext_resp *resp,
++ size_t resp_size)
++{
++ struct ibp_request req;
++ int ret;
++
++ print_trace("in\n");
++
++ IBP_INIT_REQ(&req, resp, resp_size);
++ IBP_INIT_CMD(device, cmd, cmd_size, ALLOC_UCONTEXT, &req);
++
++ cmd->ibdev = (uintptr_t)ibdev;
++
++ ret = ibp_send(ep, cmd, cmd_size);
++ if (ret)
++ return ret;
++
++ wait_for_completion(&req.done);
++
++ if (req.status)
++ return req.status;
++
++ *ucontext = resp->ucontext;
++
++ return 0;
++}
++EXPORT_SYMBOL(ibp_cmd_alloc_ucontext);
++
++int ibp_cmd_dealloc_ucontext(struct ibp_device *device, u64 ucontext)
++{
++ struct ibp_dealloc_ucontext_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ print_trace("in\n");
++
++ IBP_INIT_REQ(&req, NULL, 0);
++ IBP_INIT_CMD(device, &cmd, sizeof(cmd), DEALLOC_UCONTEXT, &req);
++
++ cmd.ucontext = ucontext;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret)
++ return ret;
++
++ wait_for_completion(&req.done);
++
++ return req.status;
++}
++EXPORT_SYMBOL(ibp_cmd_dealloc_ucontext);
++
++static int ibp_get_scif_map_flags(unsigned long addr)
++{
++ /* See Documentation/x86/x86_64/mm.txt for a description of mem map */
++ return addr < (unsigned long)page_address(pfn_to_page(0)) ?
++ 0 : SCIF_MAP_KERNEL;
++}
++
++static int ibp_convert_access_flags(int access)
++{
++ int prot_flags;
++
++ prot_flags = SCIF_PROT_READ;
++
++ if (access & ~IB_ACCESS_REMOTE_READ)
++ prot_flags |= SCIF_PROT_WRITE;
++
++ return prot_flags;
++}
++
++static int ibp_scif_unregister(scif_epd_t epd, off_t offset, size_t len)
++{
++ int ret;
++
++ do {
++ ret = scif_unregister(epd, offset, len);
++ } while (ret == -ERESTARTSYS);
++
++ return ret;
++}
++
++struct ibp_rb *ibp_reg_buf(struct ibp_device *device, u64 ucontext,
++ unsigned long vaddr, size_t length, int access)
++{
++ struct ibp_reg_buf_resp resp;
++ struct ibp_reg_buf_cmd cmd;
++ struct ibp_request req;
++ struct ibp_reg *reg;
++ int ret;
++
++ print_trace("in\n");
++
++ reg = kzalloc(sizeof(*reg), GFP_KERNEL);
++ if (!reg) {
++ print_err("kzalloc failed\n");
++ return ERR_PTR(-ENOMEM);
++ }
++
++ reg->device = device;
++ reg->length = PAGE_ALIGN(length + (vaddr & ~PAGE_MASK));
++
++ reg->scif_addr = scif_register(ep, (void *)(vaddr & PAGE_MASK),
++ reg->length, 0,
++ ibp_convert_access_flags(access),
++ ibp_get_scif_map_flags(vaddr));
++ if (IS_ERR_VALUE(reg->scif_addr)) {
++ ret = reg->scif_addr;
++ goto err0;
++ }
++
++ IBP_INIT_REQ(&req, &resp, sizeof(resp));
++ IBP_INIT_CMD(device, &cmd, sizeof(cmd), REG_BUF, &req);
++
++ print_trace("in\n");
++
++ cmd.ucontext = ucontext;
++ cmd.virt_addr = vaddr;
++ cmd.scif_addr = reg->scif_addr;
++ cmd.offset = vaddr & ~PAGE_MASK;
++ cmd.length = length;
++ cmd.access = access;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret)
++ goto err1;
++
++ wait_for_completion(&req.done);
++
++ if (req.status) {
++ ret = req.status;
++ goto err1;
++ }
++
++ reg->rb.handle = resp.reg;
++
++ return ®->rb;
++err1:
++ ibp_scif_unregister(ep, reg->scif_addr, reg->length);
++err0:
++ kfree(reg);
++ return ERR_PTR(ret);
++}
++EXPORT_SYMBOL(ibp_reg_buf);
++
++int ibp_dereg_buf(struct ibp_device *device, struct ibp_rb *rb)
++{
++ struct ibp_dereg_buf_cmd cmd;
++ struct ibp_request req;
++ struct ibp_reg *reg;
++ int ret;
++
++ print_trace("in\n");
++
++ if (IS_NULL_OR_ERR(rb))
++ return 0;
++
++ IBP_INIT_REQ(&req, NULL, 0);
++ IBP_INIT_CMD(device, &cmd, sizeof(cmd), DEREG_BUF, &req);
++
++ cmd.reg = rb->handle;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret)
++ goto err;
++
++ wait_for_completion(&req.done);
++
++ ret = req.status;
++err:
++ reg = container_of(rb, struct ibp_reg, rb);
++ ibp_scif_unregister(ep, reg->scif_addr, reg->length);
++ kfree(reg);
++
++ return ret;
++}
++EXPORT_SYMBOL(ibp_dereg_buf);
++
++static void ibp_cmd_unmmap(struct ibp_mmap *mmap, int free)
++{
++ struct ibp_device *device;
++ struct ibp_unmmap_cmd cmd;
++ struct ibp_request req;
++
++ device = mmap->device;
++
++ if (mmap->range)
++ scif_put_pages(mmap->range);
++
++ IBP_INIT_REQ(&req, NULL, 0);
++ IBP_INIT_CMD(device, &cmd, sizeof(cmd), UNMMAP, &req);
++
++ cmd.mmap = mmap->mmap;
++
++ ibp_send(ep, &cmd, sizeof(cmd));
++
++ wait_for_completion(&req.done);
++
++ if (free)
++ kfree(mmap);
++}
++
++static void ibp_vma_close(struct vm_area_struct *vma)
++{
++ ibp_cmd_unmmap(vma->vm_private_data, 1);
++}
++
++static struct vm_operations_struct ibp_vm_ops = {
++ .close = ibp_vma_close,
++};
++
++static unsigned long ibp_get_vma_prot(struct vm_area_struct *vma)
++{
++ unsigned long prot;
++
++ prot = PROT_NONE;
++
++ if (vma->vm_flags & VM_READ)
++ prot |= PROT_READ;
++
++ if (vma->vm_flags & VM_WRITE)
++ prot |= PROT_WRITE;
++
++ if (vma->vm_flags & VM_EXEC)
++ prot |= PROT_EXEC;
++
++ return prot;
++}
++
++static unsigned long ibp_get_vma_flags(struct vm_area_struct *vma)
++{
++ unsigned long flags = 0;
++
++ if (vma->vm_flags & VM_SHARED)
++ flags |= MAP_SHARED;
++ else
++ flags |= MAP_PRIVATE;
++
++ if (vma->vm_flags & VM_DENYWRITE)
++ flags |= MAP_DENYWRITE;
++
++ if (vma->vm_flags & VM_GROWSDOWN)
++ flags |= MAP_GROWSDOWN;
++
++ if (vma->vm_flags & VM_LOCKED)
++ flags |= MAP_LOCKED;
++
++ return flags;
++}
++
++int ibp_cmd_mmap(struct ibp_device *device, u64 ucontext,
++ struct vm_area_struct *vma)
++{
++ struct ibp_mmap_resp resp;
++ struct ibp_mmap_cmd cmd;
++ struct ibp_request req;
++ struct ibp_mmap *mmap;
++ off_t offset;
++ int i, ret;
++
++ print_trace("in\n");
++
++ mmap = kzalloc(sizeof(*mmap), GFP_KERNEL);
++ if (!mmap) {
++ print_err("kzalloc failed\n");
++ return -ENOMEM;
++ }
++ mmap->device = device;
++
++ IBP_INIT_REQ(&req, &resp, sizeof(resp));
++ IBP_INIT_CMD(device, &cmd, sizeof(cmd), MMAP, &req);
++
++ cmd.len = vma->vm_end - vma->vm_start;
++ cmd.prot = ibp_get_vma_prot(vma);
++ cmd.flags = ibp_get_vma_flags(vma);
++ cmd.pgoff = vma->vm_pgoff;
++ cmd.ucontext = ucontext;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret)
++ goto err0;
++
++ wait_for_completion(&req.done);
++
++ if (req.status) {
++ ret = req.status;
++ goto err0;
++ }
++
++ mmap->mmap = resp.mmap;
++
++ ret = scif_get_pages(ep, resp.scif_addr, cmd.len, &mmap->range);
++ if (ret)
++ goto err1;
++
++ vma->vm_ops = &ibp_vm_ops;
++ vma->vm_private_data = mmap;
++ vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
++ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
++
++ offset = 0;
++ for (i = 0; i < mmap->range->nr_pages; i++) {
++ ret = io_remap_pfn_range(vma, vma->vm_start + offset,
++ mmap->range->phys_addr[i] >>
++ PAGE_SHIFT,
++ PAGE_SIZE, vma->vm_page_prot);
++ if (ret) {
++ print_err("io_remap_pfn_range returned %d\n", ret);
++ goto err1;
++ }
++ offset += PAGE_SIZE;
++ }
++
++ return 0;
++err1:
++ ibp_cmd_unmmap(mmap, 0);
++err0:
++ kfree(mmap);
++ return ret;
++}
++EXPORT_SYMBOL(ibp_cmd_mmap);
++
++struct ibp_iomem *ibp_cmd_ioremap(struct ibp_device *device, u64 ucontext,
++ phys_addr_t offset, unsigned long size)
++{
++ struct ibp_mmap_resp resp;
++ struct ibp_mmap_cmd cmd;
++ struct ibp_request req;
++ struct ibp_iomem *iomem;
++ struct ibp_mmap *mmap;
++ dma_addr_t paddr;
++ int i, ret;
++
++ print_trace("in\n");
++
++ iomem = kzalloc(sizeof(*iomem), GFP_KERNEL);
++ if (!iomem) {
++ print_err("kzalloc failed\n");
++ return ERR_PTR(-ENOMEM);
++ }
++
++ mmap = kzalloc(sizeof(*mmap), GFP_KERNEL);
++ if (!mmap) {
++ print_err("kzalloc failed\n");
++ ret = -ENOMEM;
++ goto err0;
++ }
++
++ iomem->cookie = mmap;
++ mmap->device = device;
++
++ IBP_INIT_REQ(&req, &resp, sizeof(resp));
++ IBP_INIT_CMD(device, &cmd, sizeof(cmd), MMAP, &req);
++
++ cmd.len = size;
++ cmd.prot = PROT_READ | PROT_WRITE;
++ cmd.flags = MAP_SHARED;
++ cmd.pgoff = offset;
++ cmd.ucontext = ucontext;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret)
++ goto err1;
++
++ wait_for_completion(&req.done);
++
++ if (req.status) {
++ ret = req.status;
++ goto err1;
++ }
++
++ mmap->mmap = resp.mmap;
++
++ ret = scif_get_pages(ep, resp.scif_addr, cmd.len, &mmap->range);
++ if (ret) {
++ print_err("scif_get_pages returned %d\n", ret);
++ goto err2;
++ }
++
++ paddr = mmap->range->phys_addr[0];
++ for (i = 1; i < mmap->range->nr_pages; i++) {
++ if (paddr + (i * PAGE_SIZE) != mmap->range->phys_addr[i]) {
++ print_err("scif_get_pages returned non-contiguous "
++ "phys_addr\n");
++ ret = -EFAULT;
++ goto err2;
++ }
++ }
++
++ iomem->addr = ioremap(mmap->range->phys_addr[0], size);
++ if (!iomem->addr) {
++ print_err("ioremap failed\n");
++ ret = -ENXIO;
++ goto err2;
++ }
++
++ return iomem;
++err2:
++ ibp_cmd_unmmap(mmap, 0);
++err1:
++ kfree(mmap);
++err0:
++ kfree(iomem);
++ return ERR_PTR(ret);
++}
++EXPORT_SYMBOL(ibp_cmd_ioremap);
++
++int ibp_cmd_iounmap(struct ibp_iomem *iomem)
++{
++ print_trace("in\n");
++
++ iounmap(iomem->addr);
++ ibp_cmd_unmmap(iomem->cookie, 1);
++ kfree(iomem);
++
++ return 0;
++}
++EXPORT_SYMBOL(ibp_cmd_iounmap);
++
++int ibp_cmd_query_device(struct ibp_device *device,
++ struct ib_device_attr *dev_attr)
++{
++ struct ibp_query_device_resp resp;
++ struct ibp_query_device_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ print_trace("in\n");
++
++ IBP_INIT_REQ(&req, &resp, sizeof(resp));
++ IBP_INIT_CMD(device, &cmd, sizeof(cmd), QUERY_DEVICE, &req);
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret)
++ return ret;
++
++ wait_for_completion(&req.done);
++
++ if (req.status)
++ return req.status;
++
++ dev_attr->fw_ver = resp.fw_ver;
++ dev_attr->sys_image_guid = resp.sys_image_guid;
++ dev_attr->max_mr_size = resp.max_mr_size;
++ dev_attr->page_size_cap = resp.page_size_cap;
++ dev_attr->vendor_id = resp.vendor_id;
++ dev_attr->vendor_part_id = resp.vendor_part_id;
++ dev_attr->hw_ver = resp.hw_ver;
++ dev_attr->max_qp = resp.max_qp;
++ dev_attr->max_qp_wr = resp.max_qp_wr;
++ dev_attr->device_cap_flags = resp.device_cap_flags;
++ dev_attr->max_sge = resp.max_sge;
++ dev_attr->max_sge_rd = resp.max_sge_rd;
++ dev_attr->max_cq = resp.max_cq;
++ dev_attr->max_cqe = resp.max_cqe;
++ dev_attr->max_mr = resp.max_mr;
++ dev_attr->max_pd = resp.max_pd;
++ dev_attr->max_qp_rd_atom = resp.max_qp_rd_atom;
++ dev_attr->max_ee_rd_atom = resp.max_ee_rd_atom;
++ dev_attr->max_res_rd_atom = resp.max_res_rd_atom;
++ dev_attr->max_qp_init_rd_atom = resp.max_qp_init_rd_atom;
++ dev_attr->max_ee_init_rd_atom = resp.max_ee_init_rd_atom;
++ dev_attr->atomic_cap = resp.atomic_cap;
++ dev_attr->masked_atomic_cap = resp.masked_atomic_cap;
++ dev_attr->max_ee = resp.max_ee;
++ dev_attr->max_rdd = resp.max_rdd;
++ dev_attr->max_mw = resp.max_mw;
++ dev_attr->max_raw_ipv6_qp = resp.max_raw_ipv6_qp;
++ dev_attr->max_raw_ethy_qp = resp.max_raw_ethy_qp;
++ dev_attr->max_mcast_grp = resp.max_mcast_grp;
++ dev_attr->max_mcast_qp_attach = resp.max_mcast_qp_attach;
++ dev_attr->max_total_mcast_qp_attach = resp.max_total_mcast_qp_attach;
++ dev_attr->max_ah = resp.max_ah;
++ dev_attr->max_fmr = resp.max_fmr;
++ dev_attr->max_map_per_fmr = resp.max_map_per_fmr;
++ dev_attr->max_srq = resp.max_srq;
++ dev_attr->max_srq_wr = resp.max_srq_wr;
++ dev_attr->max_srq_sge = resp.max_srq_sge;
++ dev_attr->max_fast_reg_page_list_len = resp.max_fast_reg_page_list_len;
++ dev_attr->max_pkeys = resp.max_pkeys;
++ dev_attr->local_ca_ack_delay = resp.local_ca_ack_delay;
++
++ return 0;
++}
++EXPORT_SYMBOL(ibp_cmd_query_device);
++
++int ibp_cmd_query_port(struct ibp_device *device, u8 port_num,
++ struct ib_port_attr *port_attr)
++{
++ struct ibp_query_port_resp resp;
++ struct ibp_query_port_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ IBP_INIT_REQ(&req, &resp, sizeof(resp));
++ IBP_INIT_CMD(device, &cmd, sizeof(cmd), QUERY_PORT, &req);
++
++ cmd.port_num = port_num;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret)
++ return ret;
++
++ wait_for_completion(&req.done);
++
++ if (req.status)
++ return req.status;
++
++ port_attr->state = resp.state;
++ port_attr->max_mtu = resp.max_mtu;
++ port_attr->active_mtu = resp.active_mtu;
++ port_attr->gid_tbl_len = resp.gid_tbl_len;
++ port_attr->port_cap_flags = resp.port_cap_flags;
++ port_attr->max_msg_sz = resp.max_msg_sz;
++ port_attr->bad_pkey_cntr = resp.bad_pkey_cntr;
++ port_attr->qkey_viol_cntr = resp.qkey_viol_cntr;
++ port_attr->pkey_tbl_len = resp.pkey_tbl_len;
++ port_attr->lid = resp.lid;
++ port_attr->sm_lid = resp.sm_lid;
++ port_attr->lmc = resp.lmc;
++ port_attr->max_vl_num = resp.max_vl_num;
++ port_attr->sm_sl = resp.sm_sl;
++ port_attr->subnet_timeout = resp.subnet_timeout;
++ port_attr->init_type_reply = resp.init_type_reply;
++ port_attr->active_width = resp.active_width;
++ port_attr->active_speed = resp.active_speed;
++ port_attr->phys_state = resp.phys_state;
++
++ return 0;
++}
++EXPORT_SYMBOL(ibp_cmd_query_port);
++
++enum rdma_link_layer ibp_cmd_get_link_layer(struct ibp_device *device,
++ u8 port_num)
++{
++ struct ibp_query_port_resp resp;
++ struct ibp_query_port_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ /* Use QUERY_PORT for backward compatibility. */
++ IBP_INIT_REQ(&req, &resp, sizeof(resp));
++ IBP_INIT_CMD(device, &cmd, sizeof(cmd), QUERY_PORT, &req);
++
++ cmd.port_num = port_num;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret)
++ return IB_LINK_LAYER_UNSPECIFIED;
++
++ wait_for_completion(&req.done);
++
++ if (req.status)
++ return IB_LINK_LAYER_UNSPECIFIED;
++
++ return resp.link_layer;
++}
++EXPORT_SYMBOL(ibp_cmd_get_link_layer);
++
++int ibp_cmd_query_gid(struct ibp_device *device, u8 port_num, int index,
++ union ib_gid *gid)
++{
++ struct ibp_query_gid_resp resp;
++ struct ibp_query_gid_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ IBP_INIT_REQ(&req, &resp, sizeof(resp));
++ IBP_INIT_CMD(device, &cmd, sizeof(cmd), QUERY_GID, &req);
++
++ cmd.index = index;
++ cmd.port_num = port_num;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret)
++ return ret;
++
++ wait_for_completion(&req.done);
++
++ if (req.status)
++ return req.status;
++
++ gid->global.subnet_prefix = resp.subnet_prefix;
++ gid->global.interface_id = resp.interface_id;
++
++ return 0;
++}
++EXPORT_SYMBOL(ibp_cmd_query_gid);
++
++int ibp_cmd_query_pkey(struct ibp_device *dev, u8 port, int index, u16 *pkey)
++{
++ struct ibp_query_pkey_resp resp;
++ struct ibp_query_pkey_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ IBP_INIT_REQ(&req, &resp, sizeof(resp));
++ IBP_INIT_CMD(dev, &cmd, sizeof(cmd), QUERY_PKEY, &req);
++
++ cmd.index = index;
++ cmd.port_num = port;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret)
++ return ret;
++
++ wait_for_completion(&req.done);
++
++ if (req.status)
++ return req.status;
++
++ *pkey = resp.pkey;
++
++ return 0;
++}
++EXPORT_SYMBOL(ibp_cmd_query_pkey);
++
++int ibp_cmd_alloc_pd(struct ibp_device *device, u64 ucontext, u64 *pd,
++ struct ibp_alloc_pd_cmd *cmd, size_t cmd_size,
++ struct ibp_alloc_pd_resp *resp, size_t resp_size)
++{
++ struct ibp_request req;
++ int ret;
++
++ print_trace("in\n");
++
++ IBP_INIT_REQ(&req, resp, resp_size);
++ IBP_INIT_CMD(device, cmd, cmd_size, ALLOC_PD, &req);
++
++ cmd->ucontext = ucontext;
++
++ ret = ibp_send(ep, cmd, cmd_size);
++ if (ret)
++ return ret;
++
++ wait_for_completion(&req.done);
++
++ if (req.status)
++ return req.status;
++
++ *pd = resp->pd;
++
++ return 0;
++}
++EXPORT_SYMBOL(ibp_cmd_alloc_pd);
++
++int ibp_cmd_dealloc_pd(struct ibp_device *device, u64 pd)
++{
++ struct ibp_dealloc_pd_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ print_trace("in\n");
++
++ IBP_INIT_REQ(&req, NULL, 0);
++ IBP_INIT_CMD(device, &cmd, sizeof(cmd), DEALLOC_PD, &req);
++
++ cmd.pd = pd;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret)
++ return ret;
++
++ wait_for_completion(&req.done);
++
++ return req.status;
++}
++EXPORT_SYMBOL(ibp_cmd_dealloc_pd);
++
++int ibp_cmd_create_ah(struct ibp_device *device, u64 pd,
++ struct ib_ah_attr *ah_attr, u64 *ah)
++{
++ struct ibp_create_ah_resp resp;
++ struct ibp_create_ah_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ print_trace("in\n");
++
++ IBP_INIT_REQ(&req, &resp, sizeof(resp));
++ IBP_INIT_CMD(device, &cmd, sizeof(cmd), CREATE_AH, &req);
++
++ cmd.pd = pd;
++ cmd.ah_attr.grh.dgid_subnet_prefix =
++ ah_attr->grh.dgid.global.subnet_prefix;
++ cmd.ah_attr.grh.dgid_interface_id =
++ ah_attr->grh.dgid.global.interface_id;
++ cmd.ah_attr.grh.flow_label = ah_attr->grh.flow_label;
++ cmd.ah_attr.grh.sgid_index = ah_attr->grh.sgid_index;
++ cmd.ah_attr.grh.hop_limit = ah_attr->grh.hop_limit;
++ cmd.ah_attr.grh.traffic_class = ah_attr->grh.traffic_class;
++ cmd.ah_attr.dlid = ah_attr->dlid;
++ cmd.ah_attr.sl = ah_attr->sl;
++ cmd.ah_attr.src_path_bits = ah_attr->src_path_bits;
++ cmd.ah_attr.static_rate = ah_attr->static_rate;
++ cmd.ah_attr.ah_flags = ah_attr->ah_flags;
++ cmd.ah_attr.port_num = ah_attr->port_num;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret)
++ return ret;
++
++ wait_for_completion(&req.done);
++
++ if (req.status)
++ return req.status;
++
++ *ah = resp.ah;
++
++ return 0;
++}
++EXPORT_SYMBOL(ibp_cmd_create_ah);
++
++int ibp_cmd_query_ah(struct ibp_device *dev, u64 ah, struct ib_ah_attr *attr)
++{
++ struct ibp_query_ah_resp resp;
++ struct ibp_query_ah_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ print_trace("in\n");
++
++ IBP_INIT_REQ(&req, &resp, sizeof(resp));
++ IBP_INIT_CMD(dev, &cmd, sizeof(cmd), QUERY_AH, &req);
++
++ cmd.ah = ah;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret)
++ return ret;
++
++ wait_for_completion(&req.done);
++
++ if (req.status)
++ return req.status;
++
++ attr->grh.dgid.global.subnet_prefix = resp.attr.grh.dgid_subnet_prefix;
++ attr->grh.dgid.global.interface_id = resp.attr.grh.dgid_interface_id;
++ attr->grh.flow_label = resp.attr.grh.flow_label;
++ attr->grh.sgid_index = resp.attr.grh.sgid_index;
++ attr->grh.hop_limit = resp.attr.grh.hop_limit;
++ attr->grh.traffic_class = resp.attr.grh.traffic_class;
++ attr->dlid = resp.attr.dlid;
++ attr->sl = resp.attr.sl;
++ attr->src_path_bits = resp.attr.src_path_bits;
++ attr->static_rate = resp.attr.static_rate;
++ attr->ah_flags = resp.attr.ah_flags;
++ attr->port_num = resp.attr.port_num;
++
++ return req.status;
++}
++EXPORT_SYMBOL(ibp_cmd_query_ah);
++
++static void ibp_destroy_ah_work(struct work_struct *work)
++{
++ struct ibp_destroy_ah_work *destroy;
++ struct ibp_destroy_ah_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ print_trace("in\n");
++
++ destroy = container_of(work, struct ibp_destroy_ah_work, work);
++
++ IBP_INIT_REQ(&req, NULL, 0);
++ IBP_INIT_CMD(destroy->device, &cmd, sizeof(cmd), DESTROY_AH, &req);
++
++ cmd.ah = destroy->ah;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (!ret)
++ wait_for_completion(&req.done);
++
++ kfree(destroy);
++}
++
++int ibp_cmd_destroy_ah(struct ibp_device *device, u64 ah)
++{
++ struct ibp_destroy_ah_work *destroy;
++
++ print_trace("in\n");
++
++ destroy = kzalloc(sizeof(*destroy), GFP_KERNEL);
++ if (!destroy) {
++ print_err("kzalloc failed\n");
++ return -ENOMEM;
++ }
++
++ destroy->ah = ah;
++ destroy->device = device;
++
++ INIT_WORK(&destroy->work, ibp_destroy_ah_work);
++ queue_work(workqueue, &destroy->work);
++
++ return 0;
++}
++EXPORT_SYMBOL(ibp_cmd_destroy_ah);
++
++int ibp_cmd_create_srq(struct ibp_device *device, u64 pd,
++ struct ib_srq_init_attr *init_attr,
++ u64 *srq, struct ib_srq *ibsrq,
++ struct ibp_create_srq_cmd *cmd, size_t cmd_size,
++ struct ibp_create_srq_resp *resp, size_t resp_size)
++{
++ struct ibp_request req;
++ int ret;
++
++ print_trace("in\n");
++
++ IBP_INIT_REQ(&req, resp, resp_size);
++ IBP_INIT_CMD(device, cmd, cmd_size, CREATE_SRQ, &req);
++
++ cmd->pd = pd;
++ cmd->srq_context = (uintptr_t)ibsrq;
++ cmd->attr.max_wr = init_attr->attr.max_wr;
++ cmd->attr.max_sge = init_attr->attr.max_sge;
++ cmd->attr.srq_limit = init_attr->attr.srq_limit;
++
++ ret = ibp_send(ep, cmd, cmd_size);
++ if (ret)
++ return ret;
++
++ wait_for_completion(&req.done);
++
++ if (req.status)
++ return req.status;
++
++ *srq = resp->srq;
++ init_attr->attr.max_wr = resp->attr.max_wr;
++ init_attr->attr.max_sge = resp->attr.max_sge;
++ init_attr->attr.srq_limit = resp->attr.srq_limit;
++
++ return 0;
++}
++EXPORT_SYMBOL(ibp_cmd_create_srq);
++
++int ibp_cmd_query_srq(struct ibp_device *device, u64 srq,
++ struct ib_srq_attr *attr)
++{
++ struct ibp_query_srq_resp resp;
++ struct ibp_query_srq_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ print_trace("in\n");
++
++ IBP_INIT_REQ(&req, &resp, sizeof(resp));
++ IBP_INIT_CMD(device, &cmd, sizeof(cmd), QUERY_SRQ, &req);
++
++ cmd.srq = srq;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret)
++ return ret;
++
++ wait_for_completion(&req.done);
++
++ if (req.status)
++ return req.status;
++
++ attr->max_wr = resp.attr.max_wr;
++ attr->max_sge = resp.attr.max_sge;
++ attr->srq_limit = resp.attr.srq_limit;
++
++ return 0;
++}
++EXPORT_SYMBOL(ibp_cmd_query_srq);
++
++int ibp_cmd_modify_srq(struct ibp_device *device, u64 srq,
++ struct ib_srq_attr *attr, enum ib_srq_attr_mask mask,
++ struct ibp_modify_srq_cmd *cmd, size_t cmd_size,
++ struct ibp_modify_srq_resp *resp, size_t resp_size)
++{
++ struct ibp_request req;
++ int ret;
++
++ print_trace("in\n");
++
++ IBP_INIT_REQ(&req, resp, resp_size);
++ IBP_INIT_CMD(device, cmd, cmd_size, MODIFY_SRQ, &req);
++
++ cmd->srq = srq;
++ cmd->srq_attr_mask = mask;
++ cmd->attr.max_wr = attr->max_wr;
++ cmd->attr.max_sge = attr->max_sge;
++ cmd->attr.srq_limit = attr->srq_limit;
++
++ ret = ibp_send(ep, cmd, cmd_size);
++ if (ret)
++ return ret;
++
++ wait_for_completion(&req.done);
++
++ if (req.status)
++ return req.status;
++
++ attr->max_wr = resp->attr.max_wr;
++ attr->max_sge = resp->attr.max_sge;
++ attr->srq_limit = resp->attr.srq_limit;
++
++ return 0;
++}
++EXPORT_SYMBOL(ibp_cmd_modify_srq);
++
++int ibp_cmd_destroy_srq(struct ibp_device *device, u64 srq)
++{
++ struct ibp_destroy_srq_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ print_trace("in\n");
++
++ IBP_INIT_REQ(&req, NULL, 0);
++ IBP_INIT_CMD(device, &cmd, sizeof(cmd), DESTROY_SRQ, &req);
++
++ cmd.srq = srq;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret)
++ return ret;
++
++ wait_for_completion(&req.done);
++
++ return req.status;
++}
++EXPORT_SYMBOL(ibp_cmd_destroy_srq);
++
++int ibp_cmd_create_qp(struct ibp_device *device, u64 pd,
++ u64 send_cq, u64 recv_cq, u64 srq,
++ struct ib_qp_init_attr *init_attr,
++ u64 *qp, struct ib_qp *ibqp,
++ struct ibp_create_qp_cmd *cmd, size_t cmd_size,
++ struct ibp_create_qp_resp *resp, size_t resp_size)
++{
++ struct ibp_request req;
++ int ret;
++
++ print_trace("in\n");
++
++ IBP_INIT_REQ(&req, resp, resp_size);
++ IBP_INIT_CMD(device, cmd, cmd_size, CREATE_QP, &req);
++
++ cmd->pd = pd;
++ cmd->send_cq = send_cq;
++ cmd->recv_cq = recv_cq;
++ cmd->srq = srq;
++ cmd->qp_context = (uintptr_t)ibqp;
++ cmd->cap.max_send_wr = init_attr->cap.max_send_wr;
++ cmd->cap.max_recv_wr = init_attr->cap.max_recv_wr;
++ cmd->cap.max_send_sge = init_attr->cap.max_send_sge;
++ cmd->cap.max_recv_sge = init_attr->cap.max_recv_sge;
++ cmd->cap.max_inline_data = init_attr->cap.max_inline_data;
++ cmd->sq_sig_type = init_attr->sq_sig_type;
++ cmd->qp_type = init_attr->qp_type;
++ cmd->create_flags = init_attr->create_flags;
++ cmd->xrc_domain = (uintptr_t)init_attr->xrcd;
++ cmd->port_num = init_attr->port_num;
++
++ ret = ibp_send(ep, cmd, cmd_size);
++ if (ret)
++ return ret;
++
++ wait_for_completion(&req.done);
++
++ if (req.status)
++ return req.status;
++
++ *qp = resp->qp;
++ ibqp->qp_num = resp->qpn;
++ init_attr->cap.max_recv_sge = resp->cap.max_recv_sge;
++ init_attr->cap.max_send_sge = resp->cap.max_send_sge;
++ init_attr->cap.max_recv_wr = resp->cap.max_recv_wr;
++ init_attr->cap.max_send_wr = resp->cap.max_send_wr;
++ init_attr->cap.max_inline_data = resp->cap.max_inline_data;
++
++ return 0;
++}
++EXPORT_SYMBOL(ibp_cmd_create_qp);
++
++int ibp_cmd_query_qp(struct ibp_device *device, u64 qp,
++ struct ib_qp_attr *attr, int qp_attr_mask,
++ struct ib_qp_init_attr *init_attr)
++{
++ struct ibp_query_qp_resp resp;
++ struct ibp_query_qp_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ print_trace("in\n");
++
++ IBP_INIT_REQ(&req, &resp, sizeof(resp));
++ IBP_INIT_CMD(device, &cmd, sizeof(cmd), QUERY_QP, &req);
++
++ cmd.qp = qp;
++ cmd.qp_attr_mask = qp_attr_mask;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret)
++ return ret;
++
++ wait_for_completion(&req.done);
++
++ if (req.status)
++ return req.status;
++
++ attr->qp_state = resp.qp_state;
++ attr->cur_qp_state = resp.cur_qp_state;
++ attr->path_mtu = resp.path_mtu;
++ attr->path_mig_state = resp.path_mig_state;
++ attr->qkey = resp.qkey;
++ attr->rq_psn = resp.rq_psn;
++ attr->sq_psn = resp.sq_psn;
++ attr->dest_qp_num = resp.dest_qp_num;
++ attr->qp_access_flags = resp.qp_access_flags;
++ init_attr->cap.max_send_wr = resp.init_cap.max_send_wr;
++ init_attr->cap.max_recv_wr = resp.init_cap.max_recv_wr;
++ init_attr->cap.max_send_sge = resp.init_cap.max_send_sge;
++ init_attr->cap.max_recv_sge = resp.init_cap.max_recv_sge;
++ init_attr->cap.max_inline_data = resp.init_cap.max_inline_data;
++ init_attr->create_flags = resp.init_create_flags;
++ init_attr->sq_sig_type = resp.init_sq_sig_type;
++ attr->cap.max_send_wr = resp.cap.max_send_wr;
++ attr->cap.max_recv_wr = resp.cap.max_recv_wr;
++ attr->cap.max_send_sge = resp.cap.max_send_sge;
++ attr->cap.max_recv_sge = resp.cap.max_recv_sge;
++ attr->cap.max_inline_data = resp.cap.max_inline_data;
++ attr->ah_attr.grh.dgid.global.subnet_prefix =
++ resp.ah.grh.dgid_subnet_prefix;
++ attr->ah_attr.grh.dgid.global.interface_id =
++ resp.ah.grh.dgid_interface_id;
++ attr->ah_attr.grh.flow_label = resp.ah.grh.flow_label;
++ attr->ah_attr.grh.sgid_index = resp.ah.grh.sgid_index;
++ attr->ah_attr.grh.hop_limit = resp.ah.grh.hop_limit;
++ attr->ah_attr.grh.traffic_class = resp.ah.grh.traffic_class;
++ attr->ah_attr.dlid = resp.ah.dlid;
++ attr->ah_attr.sl = resp.ah.sl;
++ attr->ah_attr.src_path_bits = resp.ah.src_path_bits;
++ attr->ah_attr.static_rate = resp.ah.static_rate;
++ attr->ah_attr.ah_flags = resp.ah.ah_flags;
++ attr->ah_attr.port_num = resp.ah.port_num;
++ attr->alt_ah_attr.grh.dgid.global.subnet_prefix =
++ resp.alt_ah.grh.dgid_subnet_prefix;
++ attr->alt_ah_attr.grh.dgid.global.interface_id =
++ resp.alt_ah.grh.dgid_interface_id;
++ attr->alt_ah_attr.grh.flow_label = resp.alt_ah.grh.flow_label;
++ attr->alt_ah_attr.grh.sgid_index = resp.alt_ah.grh.sgid_index;
++ attr->alt_ah_attr.grh.hop_limit = resp.alt_ah.grh.hop_limit;
++ attr->alt_ah_attr.grh.traffic_class = resp.alt_ah.grh.traffic_class;
++ attr->alt_ah_attr.dlid = resp.alt_ah.dlid;
++ attr->alt_ah_attr.sl = resp.alt_ah.sl;
++ attr->alt_ah_attr.src_path_bits = resp.alt_ah.src_path_bits;
++ attr->alt_ah_attr.static_rate = resp.alt_ah.static_rate;
++ attr->alt_ah_attr.ah_flags = resp.alt_ah.ah_flags;
++ attr->alt_ah_attr.port_num = resp.alt_ah.port_num;
++ attr->pkey_index = resp.pkey_index;
++ attr->alt_pkey_index = resp.alt_pkey_index;
++ attr->en_sqd_async_notify = resp.en_sqd_async_notify;
++ attr->sq_draining = resp.sq_draining;
++ attr->max_rd_atomic = resp.max_rd_atomic;
++ attr->max_dest_rd_atomic = resp.max_dest_rd_atomic;
++ attr->min_rnr_timer = resp.min_rnr_timer;
++ attr->port_num = resp.port_num;
++ attr->timeout = resp.timeout;
++ attr->retry_cnt = resp.retry_cnt;
++ attr->rnr_retry = resp.rnr_retry;
++ attr->alt_port_num = resp.alt_port_num;
++ attr->alt_timeout = resp.alt_timeout;
++
++ return 0;
++}
++EXPORT_SYMBOL(ibp_cmd_query_qp);
++
++int ibp_cmd_modify_qp(struct ibp_device *device, u64 qp,
++ struct ib_qp_attr *attr, int qp_attr_mask,
++ struct ibp_modify_qp_cmd *cmd, size_t cmd_size,
++ struct ibp_modify_qp_resp *resp, size_t resp_size)
++{
++ struct ibp_request req;
++ int ret;
++
++ print_trace("in\n");
++
++ IBP_INIT_REQ(&req, resp, resp_size);
++ IBP_INIT_CMD(device, cmd, cmd_size, MODIFY_QP, &req);
++
++ cmd->qp = qp;
++ cmd->qp_attr_mask = qp_attr_mask;
++ cmd->qp_state = attr->qp_state;
++ cmd->cur_qp_state = attr->cur_qp_state;
++ cmd->path_mtu = attr->path_mtu;
++ cmd->path_mig_state = attr->path_mig_state;
++ cmd->qkey = attr->qkey;
++ cmd->rq_psn = attr->rq_psn;
++ cmd->sq_psn = attr->sq_psn;
++ cmd->dest_qp_num = attr->dest_qp_num;
++ cmd->qp_access_flags = attr->qp_access_flags;
++ cmd->cap.max_send_wr = attr->cap.max_send_wr;
++ cmd->cap.max_recv_wr = attr->cap.max_recv_wr;
++ cmd->cap.max_send_sge = attr->cap.max_send_sge;
++ cmd->cap.max_recv_sge = attr->cap.max_recv_sge;
++ cmd->cap.max_inline_data = attr->cap.max_inline_data;
++ cmd->ah.grh.dgid_subnet_prefix =
++ attr->ah_attr.grh.dgid.global.subnet_prefix;
++ cmd->ah.grh.dgid_interface_id =
++ attr->ah_attr.grh.dgid.global.interface_id;
++ cmd->ah.grh.flow_label = attr->ah_attr.grh.flow_label;
++ cmd->ah.grh.sgid_index = attr->ah_attr.grh.sgid_index;
++ cmd->ah.grh.hop_limit = attr->ah_attr.grh.hop_limit;
++ cmd->ah.grh.traffic_class = attr->ah_attr.grh.traffic_class;
++ cmd->ah.dlid = attr->ah_attr.dlid;
++ cmd->ah.sl = attr->ah_attr.sl;
++ cmd->ah.src_path_bits = attr->ah_attr.src_path_bits;
++ cmd->ah.static_rate = attr->ah_attr.static_rate;
++ cmd->ah.ah_flags = attr->ah_attr.ah_flags;
++ cmd->ah.port_num = attr->ah_attr.port_num;
++ cmd->alt_ah.grh.dgid_subnet_prefix =
++ attr->alt_ah_attr.grh.dgid.global.subnet_prefix;
++ cmd->alt_ah.grh.dgid_interface_id =
++ attr->alt_ah_attr.grh.dgid.global.interface_id;
++ cmd->alt_ah.grh.flow_label = attr->alt_ah_attr.grh.flow_label;
++ cmd->alt_ah.grh.sgid_index = attr->alt_ah_attr.grh.sgid_index;
++ cmd->alt_ah.grh.hop_limit = attr->alt_ah_attr.grh.hop_limit;
++ cmd->alt_ah.grh.traffic_class = attr->alt_ah_attr.grh.traffic_class;
++ cmd->alt_ah.dlid = attr->alt_ah_attr.dlid;
++ cmd->alt_ah.sl = attr->alt_ah_attr.sl;
++ cmd->alt_ah.src_path_bits = attr->alt_ah_attr.src_path_bits;
++ cmd->alt_ah.static_rate = attr->alt_ah_attr.static_rate;
++ cmd->alt_ah.ah_flags = attr->alt_ah_attr.ah_flags;
++ cmd->alt_ah.port_num = attr->alt_ah_attr.port_num;
++ cmd->pkey_index = attr->pkey_index;
++ cmd->alt_pkey_index = attr->alt_pkey_index;
++ cmd->en_sqd_async_notify = attr->en_sqd_async_notify;
++ cmd->sq_draining = attr->sq_draining;
++ cmd->max_rd_atomic = attr->max_rd_atomic;
++ cmd->max_dest_rd_atomic = attr->max_dest_rd_atomic;
++ cmd->min_rnr_timer = attr->min_rnr_timer;
++ cmd->port_num = attr->port_num;
++ cmd->timeout = attr->timeout;
++ cmd->retry_cnt = attr->retry_cnt;
++ cmd->rnr_retry = attr->rnr_retry;
++ cmd->alt_port_num = attr->alt_port_num;
++ cmd->alt_timeout = attr->alt_timeout;
++
++ ret = ibp_send(ep, cmd, cmd_size);
++ if (ret)
++ return ret;
++
++ wait_for_completion(&req.done);
++
++ if (req.status)
++ return req.status;
++
++ attr->cap.max_recv_sge = resp->cap.max_recv_sge;
++ attr->cap.max_send_sge = resp->cap.max_send_sge;
++ attr->cap.max_recv_wr = resp->cap.max_recv_wr;
++ attr->cap.max_send_wr = resp->cap.max_send_wr;
++ attr->cap.max_inline_data = resp->cap.max_inline_data;
++
++ return 0;
++}
++EXPORT_SYMBOL(ibp_cmd_modify_qp);
++
++int ibp_cmd_destroy_qp(struct ibp_device *device, u64 qp)
++{
++ struct ibp_destroy_qp_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ print_trace("in\n");
++
++ IBP_INIT_REQ(&req, NULL, 0);
++ IBP_INIT_CMD(device, &cmd, sizeof(cmd), DESTROY_QP, &req);
++
++ cmd.qp = qp;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret)
++ return ret;
++
++ wait_for_completion(&req.done);
++
++ return req.status;
++}
++EXPORT_SYMBOL(ibp_cmd_destroy_qp);
++
++int ibp_cmd_create_cq(struct ibp_device *device, u64 ucontext,
++ int entries, int vector, u64 *cq, struct ib_cq *ibcq,
++ struct ibp_create_cq_cmd *cmd, size_t cmd_size,
++ struct ibp_create_cq_resp *resp, size_t resp_size)
++{
++ struct ibp_request req;
++ int ret;
++
++ print_trace("in\n");
++
++ IBP_INIT_REQ(&req, resp, resp_size);
++ IBP_INIT_CMD(device, cmd, cmd_size, CREATE_CQ, &req);
++
++ cmd->ucontext = ucontext;
++ cmd->cq_context = (uintptr_t)ibcq;
++ cmd->cqe = entries;
++ cmd->vector = vector;
++
++ ret = ibp_send(ep, cmd, cmd_size);
++ if (ret)
++ return ret;
++
++ wait_for_completion(&req.done);
++
++ if (req.status)
++ return req.status;
++
++ *cq = resp->cq;
++ ibcq->cqe = resp->cqe;
++
++ return 0;
++}
++EXPORT_SYMBOL(ibp_cmd_create_cq);
++
++int ibp_cmd_resize_cq(struct ibp_device *device, u64 cq,
++ int entries, struct ib_cq *ibcq,
++ struct ibp_resize_cq_cmd *cmd, size_t cmd_size,
++ struct ibp_resize_cq_resp *resp, size_t resp_size)
++{
++ struct ibp_request req;
++ int ret;
++
++ print_trace("in\n");
++
++ IBP_INIT_REQ(&req, resp, resp_size);
++ IBP_INIT_CMD(device, cmd, cmd_size, RESIZE_CQ, &req);
++
++ cmd->cq = cq;
++ cmd->cqe = entries;
++
++ ret = ibp_send(ep, cmd, cmd_size);
++ if (ret)
++ return ret;
++
++ wait_for_completion(&req.done);
++
++ if (req.status)
++ return req.status;
++
++ ibcq->cqe = resp->cqe;
++
++ return 0;
++}
++EXPORT_SYMBOL(ibp_cmd_resize_cq);
++
++int ibp_cmd_destroy_cq(struct ibp_device *device, u64 cq)
++{
++ struct ibp_destroy_cq_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ print_trace("in\n");
++
++ IBP_INIT_REQ(&req, NULL, 0);
++ IBP_INIT_CMD(device, &cmd, sizeof(cmd), DESTROY_CQ, &req);
++
++ cmd.cq = cq;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret)
++ return ret;
++
++ wait_for_completion(&req.done);
++
++ return req.status;
++}
++EXPORT_SYMBOL(ibp_cmd_destroy_cq);
++
++int ibp_cmd_reg_user_mr(struct ibp_device *device, u64 pd, u64 start,
++ u64 length, u64 virt_addr, int access, u64 *mr,
++ u32 *lkey, u32 *rkey,
++ struct ibp_reg_user_mr_cmd *cmd, size_t cmd_size,
++ struct ibp_reg_user_mr_resp *resp, size_t resp_size)
++{
++ struct ibp_request req;
++ struct ibp_reg *reg;
++ int ret;
++
++ print_trace("in\n");
++
++ reg = kzalloc(sizeof(*reg), GFP_KERNEL);
++ if (!reg) {
++ print_err("kzalloc failed\n");
++ return -ENOMEM;
++ }
++
++ reg->device = device;
++ reg->length = PAGE_ALIGN(length + (start & ~PAGE_MASK));
++
++ reg->scif_addr = scif_register(ep, (void *)(start & PAGE_MASK),
++ reg->length, 0,
++ ibp_convert_access_flags(access),
++ ibp_get_scif_map_flags(start));
++ if (IS_ERR_VALUE(reg->scif_addr)) {
++ kfree(reg);
++ return reg->scif_addr;
++ }
++
++ IBP_INIT_REQ(&req, resp, resp_size);
++ IBP_INIT_CMD(device, cmd, cmd_size, REG_USER_MR, &req);
++
++ cmd->pd = pd;
++ cmd->hca_va = virt_addr;
++ cmd->scif_addr = reg->scif_addr;
++ cmd->offset = start & ~PAGE_MASK;
++ cmd->length = length;
++ cmd->access = access;
++
++ ret = ibp_send(ep, cmd, cmd_size);
++ if (ret)
++ goto err;
++
++ wait_for_completion(&req.done);
++
++ if (req.status) {
++ ret = req.status;
++ goto err;
++ }
++
++ reg->rb.handle = resp->mr;
++ *mr = (uintptr_t)reg;
++ *lkey = resp->lkey;
++ *rkey = resp->rkey;
++
++ return 0;
++err:
++ ibp_scif_unregister(ep, reg->scif_addr, reg->length);
++ kfree(reg);
++ return ret;
++}
++EXPORT_SYMBOL(ibp_cmd_reg_user_mr);
++
++int ibp_cmd_dereg_mr(struct ibp_device *device, u64 mr)
++{
++ struct ibp_dereg_mr_cmd cmd;
++ struct ibp_request req;
++ struct ibp_reg *reg;
++ int ret;
++
++ print_trace("in\n");
++
++ IBP_INIT_REQ(&req, NULL, 0);
++ IBP_INIT_CMD(device, &cmd, sizeof(cmd), DEREG_MR, &req);
++
++ reg = (struct ibp_reg *) mr;
++ cmd.mr = reg->rb.handle;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret)
++ goto err;
++
++ wait_for_completion(&req.done);
++
++ ret = req.status;
++err:
++ ibp_scif_unregister(ep, reg->scif_addr, reg->length);
++ kfree(reg);
++ return ret;
++}
++EXPORT_SYMBOL(ibp_cmd_dereg_mr);
++
++int ibp_cmd_get_dma_mr(struct ibp_device *device, u64 pd, int access,
++ u64 *mr, u32 *lkey, u32 *rkey)
++{
++ struct ibp_reg_user_mr_resp resp;
++ struct ibp_reg_user_mr_cmd cmd;
++ unsigned long len;
++ u64 addr;
++
++ print_trace("in\n");
++
++ addr = (u64) page_address(pfn_to_page(0));
++ len = num_pfn << PAGE_SHIFT;
++
++ return ibp_cmd_reg_user_mr(device, pd, addr, len, addr, access, mr,
++ lkey, rkey, &cmd, sizeof(cmd),
++ &resp, sizeof(resp));
++}
++EXPORT_SYMBOL(ibp_cmd_get_dma_mr);
++
++int ibp_cmd_attach_mcast(struct ibp_device *device, u64 qp,
++ union ib_gid *gid, u16 lid)
++{
++ struct ibp_attach_mcast_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ print_trace("in\n");
++
++ IBP_INIT_REQ(&req, NULL, 0);
++ IBP_INIT_CMD(device, &cmd, sizeof(cmd), ATTACH_MCAST, &req);
++
++ cmd.qp = qp;
++ cmd.subnet_prefix = gid->global.subnet_prefix;
++ cmd.interface_id = gid->global.interface_id;
++ cmd.lid = lid;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret)
++ return ret;
++
++ wait_for_completion(&req.done);
++
++ return req.status;
++}
++EXPORT_SYMBOL(ibp_cmd_attach_mcast);
++
++int ibp_cmd_detach_mcast(struct ibp_device *device, u64 qp,
++ union ib_gid *gid, u16 lid)
++{
++ struct ibp_detach_mcast_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ print_trace("in\n");
++
++ IBP_INIT_REQ(&req, NULL, 0);
++ IBP_INIT_CMD(device, &cmd, sizeof(cmd), DETACH_MCAST, &req);
++
++ cmd.qp = qp;
++ cmd.subnet_prefix = gid->global.subnet_prefix;
++ cmd.interface_id = gid->global.interface_id;
++ cmd.lid = lid;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret)
++ return ret;
++
++ wait_for_completion(&req.done);
++
++ return req.status;
++}
++EXPORT_SYMBOL(ibp_cmd_detach_mcast);
++
++static void ibp_add_device_work(struct work_struct *work)
++{
++ struct ibp_add_work *add;
++
++ add = container_of(work, struct ibp_add_work, work);
++ ibp_add_one(&add->msg);
++
++ kfree(add);
++}
++
++static int ibp_msg_add_device(scif_epd_t ep, struct ibp_msg_header *hdr)
++{
++ struct ibp_add_work *add;
++ int ret;
++
++ add = kmalloc(sizeof(*add), GFP_ATOMIC);
++ if (!add) {
++ print_err("kmalloc failed\n");
++ return -ENOMEM;
++ }
++
++ ret = ibp_recv(ep, &add->msg, sizeof(add)->msg);
++ if (ret) {
++ kfree(add);
++ return ret;
++ }
++
++ INIT_WORK(&add->work, ibp_add_device_work);
++ queue_work(workqueue, &add->work);
++
++ return 0;
++}
++
++static void ibp_remove_device_work(struct work_struct *work)
++{
++ struct ibp_remove_work *remove;
++
++ remove = container_of(work, struct ibp_remove_work, work);
++ ibp_remove_one(remove->device);
++
++ kfree(remove);
++}
++
++static int ibp_msg_remove_device(scif_epd_t ep, struct ibp_msg_header *hdr)
++{
++ struct ibp_remove_work *remove;
++
++ remove = kmalloc(sizeof(*remove), GFP_ATOMIC);
++ if (!remove) {
++ print_err("kmalloc failed\n");
++ return -ENOMEM;
++ }
++
++ remove->device = hdr->device;
++
++ INIT_WORK(&remove->work, ibp_remove_device_work);
++ queue_work(workqueue, &remove->work);
++
++ return 0;
++}
++
++static int ibp_recv_verb_response(scif_epd_t ep, struct ibp_msg_header *hdr)
++{
++ struct ibp_request *req;
++ size_t len;
++ int ret;
++
++ req = (struct ibp_request *) hdr->request;
++ len = hdr->length - sizeof(*hdr);
++
++ ret = ibp_recv(ep, (void *) req->data, min(req->length, len));
++ if (ret)
++ return ret;
++
++ if (req->length < len) {
++ print_dbg("req->data overrun, expected %ld actual %ld\n",
++ req->length, len);
++
++ ret = ibp_recv_bitbucket(ep, len - req->length);
++ if (ret)
++ print_err("ibp_recv_bitbucket returned %d\n", ret);
++
++ req->status = -EMSGSIZE;
++ } else
++ req->status = hdr->status;
++
++ req->length = len;
++
++ return ret;
++}
++
++static int ibp_verb_response(scif_epd_t ep, struct ibp_msg_header *hdr)
++{
++ struct ibp_request *req;
++ int ret;
++
++ req = (struct ibp_request *) hdr->request;
++
++ ret = ibp_recv_verb_response(ep, hdr);
++ if (!ret)
++ complete(&req->done);
++
++ return ret;
++}
++
++static void ibp_queued_response_work(struct work_struct *work)
++{
++ struct ibp_queued_response_work *resp;
++
++ resp = container_of(work, struct ibp_queued_response_work, work);
++ complete(resp->done);
++
++ kfree(resp);
++}
++
++static int ibp_queued_response(scif_epd_t ep, struct ibp_msg_header *hdr)
++{
++ struct ibp_request *req;
++ struct ibp_queued_response_work *resp;
++ int ret;
++
++ req = (struct ibp_request *) hdr->request;
++
++ ret = ibp_recv_verb_response(ep, hdr);
++ if (ret)
++ return ret;
++
++ resp = kmalloc(sizeof(*resp), GFP_ATOMIC);
++ if (!resp) {
++ print_err("kmalloc failed\n");
++ return -ENOMEM;
++ }
++
++ resp->done = &req->done;
++
++ /*
++ * Queue the completion to serialize behind any events.
++ * Useful when destroying resources that generate events.
++ */
++ INIT_WORK(&resp->work, ibp_queued_response_work);
++ queue_work(workqueue, &resp->work);
++
++ return 0;
++}
++
++static void ibp_async_event_work(struct work_struct *work)
++{
++ struct ibp_async_event_work *async;
++ struct ibp_async_event *event;
++ struct ib_event ibevent;
++
++ async = container_of(work, struct ibp_async_event_work, work);
++ event = &async->event;
++
++ switch (event->type) {
++ case IB_EVENT_CQ_ERR:
++ {
++ struct ib_cq *ibcq = (struct ib_cq *) event->context;
++ if (ibcq->event_handler) {
++ ibevent.event = event->type;
++ ibevent.device = ibcq->device;
++ ibevent.element.cq = ibcq;
++ ibcq->event_handler(&ibevent, ibcq->cq_context);
++ }
++ break;
++ }
++
++ case IB_EVENT_QP_FATAL:
++ case IB_EVENT_QP_REQ_ERR:
++ case IB_EVENT_QP_ACCESS_ERR:
++ case IB_EVENT_QP_LAST_WQE_REACHED:
++ case IB_EVENT_SQ_DRAINED:
++ case IB_EVENT_COMM_EST:
++ case IB_EVENT_PATH_MIG:
++ case IB_EVENT_PATH_MIG_ERR:
++ {
++ struct ib_qp *ibqp = (struct ib_qp *) event->context;
++ if (ibqp->event_handler) {
++ ibevent.event = event->type;
++ ibevent.device = ibqp->device;
++ ibevent.element.qp = ibqp;
++ ibqp->event_handler(&ibevent, ibqp->qp_context);
++ }
++ break;
++ }
++
++ case IB_EVENT_SRQ_ERR:
++ case IB_EVENT_SRQ_LIMIT_REACHED:
++ {
++ struct ib_srq *ibsrq = (struct ib_srq *) event->context;
++ if (ibsrq->event_handler) {
++ ibevent.event = event->type;
++ ibevent.device = ibsrq->device;
++ ibevent.element.srq = ibsrq;
++ ibsrq->event_handler(&ibevent, ibsrq->srq_context);
++ }
++ break;
++ }
++
++ case IB_EVENT_DEVICE_FATAL:
++ case IB_EVENT_PORT_ACTIVE:
++ case IB_EVENT_PORT_ERR:
++ case IB_EVENT_LID_CHANGE:
++ case IB_EVENT_PKEY_CHANGE:
++ case IB_EVENT_SM_CHANGE:
++ case IB_EVENT_CLIENT_REREGISTER:
++ if (event->ibdev) {
++ ibevent.event = event->type;
++ ibevent.device = (struct ib_device *) event->ibdev;
++ ibevent.element.port_num = event->context;
++ ib_dispatch_event(&ibevent);
++ }
++ break;
++
++ default:
++ print_err("unknown event type 0x%x\n", event->type);
++ break;
++ }
++
++ kfree(async);
++}
++
++static int ibp_async_event(scif_epd_t ep, struct ibp_msg_header *hdr)
++{
++ struct ibp_async_event_work *async;
++ int ret;
++
++ async = kmalloc(sizeof(*async), GFP_ATOMIC);
++ if (!async) {
++ print_err("kmalloc failed\n");
++ return -ENOMEM;
++ }
++
++ ret = ibp_recv(ep, &async->event, sizeof(async->event));
++ if (ret) {
++ kfree(async);
++ return ret;
++ }
++
++ INIT_WORK(&async->work, ibp_async_event_work);
++ queue_work(workqueue, &async->work);
++
++ return 0;
++}
++
++static void ibp_cq_comp_work(struct work_struct *work)
++{
++ struct ibp_cq_comp_work *comp;
++ struct ib_cq *ibcq;
++
++ comp = container_of(work, struct ibp_cq_comp_work, work);
++
++ ibcq = comp->ibcq;
++
++ if (IS_NULL_OR_ERR(ibcq))
++ return;
++
++ ibcq->comp_handler(ibcq, ibcq->cq_context);
++
++ kfree(comp);
++}
++
++static int ibp_cq_comp(scif_epd_t ep, struct ibp_msg_header *hdr)
++{
++ struct ibp_cq_comp_work *comp;
++ struct ibp_cq_comp msg;
++ int ret;
++
++ comp = kmalloc(sizeof(*comp), GFP_ATOMIC);
++ if (!comp) {
++ print_err("kmalloc failed\n");
++ return -ENOMEM;
++ }
++
++ ret = ibp_recv(ep, &msg, sizeof(msg));
++ if (ret) {
++ kfree(comp);
++ return ret;
++ }
++
++ comp->ibcq = (struct ib_cq *) msg.cq_context;
++
++ if (IS_NULL_OR_ERR(comp->ibcq)) {
++ print_err("ibcq is invalid\n");
++ return -EINVAL;
++ }
++
++ INIT_WORK(&comp->work, ibp_cq_comp_work);
++ queue_work(workqueue, &comp->work);
++
++ return 0;
++}
++
++static int (*ibp_msg_table[])(scif_epd_t ep, struct ibp_msg_header *hdr) = {
++ [IBP_ADD_DEVICE] = ibp_msg_add_device,
++ [IBP_REMOVE_DEVICE] = ibp_msg_remove_device,
++ [IBP_VERB_RESPONSE] = ibp_verb_response,
++ [IBP_QUEUED_RESPONSE] = ibp_queued_response,
++ [IBP_ASYNC_EVENT] = ibp_async_event,
++ [IBP_CQ_COMP] = ibp_cq_comp,
++};
++
++int ibp_process_recvs(void)
++{
++ struct ibp_msg_header hdr;
++ int ret = 0;
++
++ while (!kthread_should_stop()) {
++
++ ret = ibp_recv(ep, &hdr, sizeof(hdr));
++ if (ret)
++ break;
++
++ if ((hdr.opcode < 0) ||
++ (hdr.opcode >= ARRAY_SIZE(ibp_msg_table)) ||
++ !ibp_msg_table[hdr.opcode]) {
++ print_err("Invalid command 0x%x\n", hdr.opcode);
++ ret = -EBADRQC;
++ break;
++ }
++
++ ret = ibp_msg_table[hdr.opcode](ep, &hdr);
++ if (ret)
++ break;
++
++ schedule();
++ }
++
++ return ret;
++}
+diff --git a/drivers/infiniband/ibp/drv/common.h b/drivers/infiniband/ibp/drv/common.h
+new file mode 100644
+index 0000000..8ca7625
+--- /dev/null
++++ b/drivers/infiniband/ibp/drv/common.h
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef COMMON_H
++#define COMMON_H
++
++#include <linux/module.h>
++#include <linux/kthread.h>
++#include <linux/types.h>
++#include <linux/slab.h>
++#include <linux/poll.h>
++#include <linux/mman.h>
++#include <linux/pci.h>
++#include <rdma/ib_verbs.h>
++
++#define SCIF_OFED_PORT_2 62 /* reserved for ccl direct */
++
++#ifndef IBP_PORT /* unique scif port for this service */
++#define IBP_PORT SCIF_OFED_PORT_2
++#endif
++
++#endif /* COMMON_H */
+diff --git a/drivers/infiniband/ibp/drv/hw/mlx4/Kconfig b/drivers/infiniband/ibp/drv/hw/mlx4/Kconfig
+new file mode 100644
+index 0000000..7cabc67
+--- /dev/null
++++ b/drivers/infiniband/ibp/drv/hw/mlx4/Kconfig
+@@ -0,0 +1,9 @@
++config IBP_MLX4
++ tristate "CCL Direct Mellanox ConnectX HCA client support"
++ depends on IBP_CLIENT
++ ---help---
++ This driver provides low-level InfiniBand support for
++ Mellanox ConnectX PCI Express host channel adapters (HCAs).
++
++ To compile this driver as a module, choose M here.
++ If unsure, say N.
+diff --git a/drivers/infiniband/ibp/drv/hw/mlx4/Makefile b/drivers/infiniband/ibp/drv/hw/mlx4/Makefile
+new file mode 100644
+index 0000000..9715b223
+--- /dev/null
++++ b/drivers/infiniband/ibp/drv/hw/mlx4/Makefile
+@@ -0,0 +1,9 @@
++obj-$(CONFIG_IBP_MLX4) += ibp_mlx4.o
++
++ccflags-y := -Idrivers/infiniband/ibp/drv/ -Idrivers/infiniband/
++
++ibp_mlx4-y := main.o \
++ dbrec.o \
++ srq.o \
++ cq.o \
++ qp.o
+diff --git a/drivers/infiniband/ibp/drv/hw/mlx4/cq.c b/drivers/infiniband/ibp/drv/hw/mlx4/cq.c
+new file mode 100644
+index 0000000..a733eed
+--- /dev/null
++++ b/drivers/infiniband/ibp/drv/hw/mlx4/cq.c
+@@ -0,0 +1,538 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "main.h"
++
++int ibp_mlx4_alloc_cq_buf(void **buf, int nent, int entry_size)
++{
++ int size = ALIGN(nent * entry_size, PAGE_SIZE);
++
++ *buf = kzalloc(size, GFP_KERNEL);
++ if (!*buf)
++ return -ENOMEM;
++
++ return 0;
++}
++
++static void ibp_mlx4_cq_comp_handler(struct ib_cq *ibcq, void *cq_context)
++{
++ struct ibp_mlx4_cq *cq = to_cq(ibcq);
++
++ ++cq->arm_sn;
++ cq->comp(ibcq, cq_context);
++}
++
++void ibp_mlx4_route_cq_comp(struct ib_cq *ibcq)
++{
++ struct ibp_mlx4_cq *cq = to_cq(ibcq);
++
++ spin_lock(&cq->lock);
++ if (ibcq->comp_handler != ibp_mlx4_cq_comp_handler) {
++ cq->comp = ibcq->comp_handler;
++ ibcq->comp_handler = ibp_mlx4_cq_comp_handler;
++ }
++ spin_unlock(&cq->lock);
++}
++
++static void dump_cqe(void *cqe)
++{
++ __be32 *buf;
++
++ buf = cqe;
++
++ print_dbg("CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
++ be32_to_cpu(buf[0]), be32_to_cpu(buf[1]), be32_to_cpu(buf[2]),
++ be32_to_cpu(buf[3]), be32_to_cpu(buf[4]), be32_to_cpu(buf[5]),
++ be32_to_cpu(buf[6]), be32_to_cpu(buf[7]));
++}
++
++static void *get_cqe(struct ibp_mlx4_cq *cq, int entry)
++{
++ return cq->cqe_buf + entry * cq->cqe_size;
++}
++
++static void *get_sw_cqe(struct ibp_mlx4_cq *cq, int n)
++{
++ struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe);
++ struct mlx4_cqe *tcqe = cq->cqe_size == 64 ? cqe + 1 : cqe;
++
++ return (!!(tcqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
++ !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
++}
++
++static struct mlx4_cqe *next_cqe_sw(struct ibp_mlx4_cq *cq)
++{
++ return get_sw_cqe(cq, cq->cons_index);
++}
++
++int ibp_mlx4_get_outstanding_cqes(struct ibp_mlx4_cq *cq)
++{
++ u32 i = cq->cons_index;
++
++ while (get_sw_cqe(cq, i & cq->ibcq.cqe))
++ i++;
++
++ return i - cq->cons_index;
++}
++
++void ibp_mlx4_cq_resize_copy_cqes(struct ibp_mlx4_cq *cq, void *buf)
++{
++ struct mlx4_cqe *cqe;
++ int i;
++ int cqe_inc = cq->cqe_size == 64 ? 1 : 0;
++
++ i = cq->cons_index;
++ cqe = get_cqe(cq, i & cq->ibcq.cqe);
++ cqe += cqe_inc;
++
++ while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) !=
++ MLX4_CQE_OPCODE_RESIZE) {
++ cqe->owner_sr_opcode = (cqe->owner_sr_opcode
++ & ~MLX4_CQE_OWNER_MASK) |
++ (((i + 1) & (cq->ibcq.cqe + 1))
++ ? MLX4_CQE_OWNER_MASK : 0);
++ memcpy(buf + ((i + 1) & cq->ibcq.cqe) * cq->cqe_size,
++ cqe - cqe_inc, cq->cqe_size);
++ ++i;
++ cqe = get_cqe(cq, i & cq->ibcq.cqe);
++ cqe += cqe_inc;
++ }
++ ++cq->cons_index;
++}
++
++static inline void ibp_mlx4_cq_set_ci(struct ibp_mlx4_cq *cq)
++{
++ *cq->set_ci_db = cpu_to_be32(cq->cons_index & 0xffffff);
++}
++
++void __ibp_mlx4_cq_clean(struct ibp_mlx4_cq *cq, u32 qpn,
++ struct ibp_mlx4_srq *srq)
++{
++ u32 index;
++ int nfreed = 0;
++ struct mlx4_cqe *cqe, *dest;
++ u8 owner_bit;
++ int cqe_inc = cq->cqe_size == 64 ? 1 : 0;
++
++ /*
++ * First we need to find the current producer index, so we
++ * know where to start cleaning from. It doesn't matter if HW
++ * adds new entries after this loop -- the QP we're worried
++ * about is already in RESET, so the new entries won't come
++ * from our QP and therefore don't need to be checked.
++ */
++ for (index = cq->cons_index; get_sw_cqe(cq, index); ++index)
++ if (index == cq->cons_index + cq->ibcq.cqe)
++ break;
++
++ /*
++ * Now sweep backwards through the CQ, removing CQ entries
++ * that match our QP by copying older entries on top of them.
++ */
++ while ((int) --index - (int) cq->cons_index >= 0) {
++ cqe = get_cqe(cq, index & cq->ibcq.cqe);
++ cqe += cqe_inc;
++ if ((be32_to_cpu(cqe->vlan_my_qpn)
++ & MLX4_CQE_QPN_MASK) == qpn) {
++ if (srq && !(cqe->owner_sr_opcode
++ & MLX4_CQE_IS_SEND_MASK))
++ ibp_mlx4_free_srq_wqe(srq,
++ be16_to_cpu(
++ cqe->wqe_index));
++ ++nfreed;
++ } else if (nfreed) {
++ dest = get_cqe(cq, (index + nfreed)
++ & cq->ibcq.cqe);
++ dest += cqe_inc;
++ owner_bit = dest->owner_sr_opcode
++ & MLX4_CQE_OWNER_MASK;
++ memcpy(dest, cqe, sizeof(*cqe));
++ dest->owner_sr_opcode = owner_bit |
++ (dest->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK);
++ }
++ }
++
++ if (nfreed) {
++ cq->cons_index += nfreed;
++ /*
++ * Make sure update of buffer contents is done before
++ * updating consumer index.
++ */
++ wmb();
++ ibp_mlx4_cq_set_ci(cq);
++ }
++}
++
++void ibp_mlx4_cq_clean(struct ibp_mlx4_cq *cq, u32 qpn,
++ struct ibp_mlx4_srq *srq)
++{
++ spin_lock_irq(&cq->lock);
++ __ibp_mlx4_cq_clean(cq, qpn, srq);
++ spin_unlock_irq(&cq->lock);
++}
++
++void ibp_mlx4_lock_cqs(struct ibp_mlx4_cq *send_cq,
++ struct ibp_mlx4_cq *recv_cq)
++ __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
++{
++ if (send_cq == recv_cq) {
++ spin_lock_irq(&send_cq->lock);
++ __acquire(&recv_cq->lock);
++ } else if (send_cq->cqn < recv_cq->cqn) {
++ spin_lock_irq(&send_cq->lock);
++ spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
++ } else {
++ spin_lock_irq(&recv_cq->lock);
++ spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
++ }
++}
++
++void ibp_mlx4_unlock_cqs(struct ibp_mlx4_cq *send_cq,
++ struct ibp_mlx4_cq *recv_cq)
++ __releases(&send_cq->lock) __releases(&recv_cq->lock)
++{
++ if (send_cq == recv_cq) {
++ __release(&recv_cq->lock);
++ spin_unlock_irq(&send_cq->lock);
++ } else if (send_cq->cqn < recv_cq->cqn) {
++ spin_unlock(&recv_cq->lock);
++ spin_unlock_irq(&send_cq->lock);
++ } else {
++ spin_unlock(&send_cq->lock);
++ spin_unlock_irq(&recv_cq->lock);
++ }
++}
++
++static void ibp_mlx4_handle_error_cqe(struct mlx4_err_cqe *cqe,
++ struct ib_wc *wc)
++{
++ if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) {
++ print_dbg("local QP operation err (QPN %06x, WQE index %x, "
++ "vendor syndrome %02x, opcode = %02x)\n",
++ be32_to_cpu(cqe->my_qpn), be16_to_cpu(cqe->wqe_index),
++ cqe->vendor_err_syndrome,
++ cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK);
++ dump_cqe(cqe);
++ }
++
++ switch (cqe->syndrome) {
++ case MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR:
++ wc->status = IB_WC_LOC_LEN_ERR;
++ break;
++ case MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR:
++ wc->status = IB_WC_LOC_QP_OP_ERR;
++ break;
++ case MLX4_CQE_SYNDROME_LOCAL_PROT_ERR:
++ wc->status = IB_WC_LOC_PROT_ERR;
++ break;
++ case MLX4_CQE_SYNDROME_WR_FLUSH_ERR:
++ wc->status = IB_WC_WR_FLUSH_ERR;
++ break;
++ case MLX4_CQE_SYNDROME_MW_BIND_ERR:
++ wc->status = IB_WC_MW_BIND_ERR;
++ break;
++ case MLX4_CQE_SYNDROME_BAD_RESP_ERR:
++ wc->status = IB_WC_BAD_RESP_ERR;
++ break;
++ case MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR:
++ wc->status = IB_WC_LOC_ACCESS_ERR;
++ break;
++ case MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
++ wc->status = IB_WC_REM_INV_REQ_ERR;
++ break;
++ case MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR:
++ wc->status = IB_WC_REM_ACCESS_ERR;
++ break;
++ case MLX4_CQE_SYNDROME_REMOTE_OP_ERR:
++ wc->status = IB_WC_REM_OP_ERR;
++ break;
++ case MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
++ wc->status = IB_WC_RETRY_EXC_ERR;
++ break;
++ case MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
++ wc->status = IB_WC_RNR_RETRY_EXC_ERR;
++ break;
++ case MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR:
++ wc->status = IB_WC_REM_ABORT_ERR;
++ break;
++ default:
++ wc->status = IB_WC_GENERAL_ERR;
++ break;
++ }
++
++ wc->vendor_err = cqe->vendor_err_syndrome;
++}
++
++static int ibp_mlx4_ipoib_csum_ok(__be16 status, __be16 checksum)
++{
++ return ((status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
++ MLX4_CQE_STATUS_IPV4F |
++ MLX4_CQE_STATUS_IPV4OPT |
++ MLX4_CQE_STATUS_IPV6 |
++ MLX4_CQE_STATUS_IPOK)) ==
++ cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
++ MLX4_CQE_STATUS_IPOK)) &&
++ (status & cpu_to_be16(MLX4_CQE_STATUS_UDP |
++ MLX4_CQE_STATUS_TCP)) &&
++ checksum == cpu_to_be16(0xffff);
++}
++
++static int ibp_mlx4_poll_one(struct ibp_mlx4_cq *cq,
++ struct ibp_mlx4_qp **cur_qp,
++ struct ib_wc *wc)
++{
++ struct ibp_mlx4_device *dev = to_device(cq->ibcq.device);
++ struct ibp_mlx4_srq *srq;
++ struct ibp_mlx4_wq *wq;
++ struct ibp_mlx4_qp *qp;
++ struct mlx4_cqe *cqe;
++ int is_send;
++ int is_error;
++ u32 g_mlpath_rqpn;
++ u16 wqe_ctr;
++
++ cqe = next_cqe_sw(cq);
++ if (!cqe)
++ return -EAGAIN;
++
++ if (cq->cqe_size == 64)
++ ++cqe;
++
++ ++cq->cons_index;
++
++ /*
++ * Make sure we read CQ entry contents after we've checked the
++ * ownership bit.
++ */
++ rmb();
++
++ is_send = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK;
++ is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
++ MLX4_CQE_OPCODE_ERROR;
++
++ if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
++ MLX4_OPCODE_NOP && is_send)) {
++ print_dbg("Completion for NOP opcode detected!\n");
++ return -EINVAL;
++ }
++
++ if (!*cur_qp ||
++ (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) !=
++ (*cur_qp)->ibqp.qp_num) {
++ /*
++ * We do not have to take the QP table lock here, because CQs
++ * will be locked while QPs are removed from the table.
++ */
++ qp = ibp_mlx4_qp_lookup(dev, be32_to_cpu(cqe->vlan_my_qpn));
++ if (unlikely(!qp)) {
++ print_dbg("CQ %06x with entry for unknown QPN %06x\n",
++ cq->cqn,
++ be32_to_cpu(cqe->vlan_my_qpn)
++ & MLX4_CQE_QPN_MASK);
++ return -EINVAL;
++ }
++
++ *cur_qp = qp;
++ }
++
++ wc->qp = &(*cur_qp)->ibqp;
++
++ if (is_send) {
++ wq = &(*cur_qp)->sq;
++ if (!(*cur_qp)->sq_signal_bits) {
++ wqe_ctr = be16_to_cpu(cqe->wqe_index);
++ wq->tail += (u16) (wqe_ctr - (u16) wq->tail);
++ }
++ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
++ ++wq->tail;
++ } else if ((*cur_qp)->ibqp.srq) {
++ srq = to_srq((*cur_qp)->ibqp.srq);
++ wqe_ctr = be16_to_cpu(cqe->wqe_index);
++ wc->wr_id = srq->wrid[wqe_ctr];
++
++ ibp_mlx4_free_srq_wqe(srq, wqe_ctr);
++ } else {
++ wq = &(*cur_qp)->rq;
++ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
++ ++wq->tail;
++ }
++
++ if (unlikely(is_error)) {
++ ibp_mlx4_handle_error_cqe((struct mlx4_err_cqe *) cqe, wc);
++ return 0;
++ }
++
++ wc->status = IB_WC_SUCCESS;
++
++ if (is_send) {
++ wc->wc_flags = 0;
++ switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
++ case MLX4_OPCODE_RDMA_WRITE_IMM:
++ wc->wc_flags |= IB_WC_WITH_IMM;
++ case MLX4_OPCODE_RDMA_WRITE:
++ wc->opcode = IB_WC_RDMA_WRITE;
++ break;
++ case MLX4_OPCODE_SEND_IMM:
++ wc->wc_flags |= IB_WC_WITH_IMM;
++ case MLX4_OPCODE_SEND:
++ case MLX4_OPCODE_SEND_INVAL:
++ wc->opcode = IB_WC_SEND;
++ break;
++ case MLX4_OPCODE_RDMA_READ:
++ wc->opcode = IB_WC_RDMA_READ;
++ wc->byte_len = be32_to_cpu(cqe->byte_cnt);
++ break;
++ case MLX4_OPCODE_ATOMIC_CS:
++ wc->opcode = IB_WC_COMP_SWAP;
++ wc->byte_len = 8;
++ break;
++ case MLX4_OPCODE_ATOMIC_FA:
++ wc->opcode = IB_WC_FETCH_ADD;
++ wc->byte_len = 8;
++ break;
++ case MLX4_OPCODE_LSO:
++ wc->opcode = IB_WC_LSO;
++ break;
++ case MLX4_OPCODE_FMR:
++ wc->opcode = IB_WC_REG_MR;
++ break;
++ case MLX4_OPCODE_LOCAL_INVAL:
++ wc->opcode = IB_WC_LOCAL_INV;
++ break;
++ }
++ } else {
++ wc->byte_len = be32_to_cpu(cqe->byte_cnt);
++
++ switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
++ case MLX4_RECV_OPCODE_RDMA_WRITE_IMM:
++ wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
++ wc->wc_flags = IB_WC_WITH_IMM;
++ wc->ex.imm_data = cqe->immed_rss_invalid;
++ break;
++ case MLX4_RECV_OPCODE_SEND_INVAL:
++ wc->opcode = IB_WC_RECV;
++ wc->wc_flags = IB_WC_WITH_INVALIDATE;
++ wc->ex.invalidate_rkey
++ = be32_to_cpu(cqe->immed_rss_invalid);
++ break;
++ case MLX4_RECV_OPCODE_SEND:
++ wc->opcode = IB_WC_RECV;
++ wc->wc_flags = 0;
++ break;
++ case MLX4_RECV_OPCODE_SEND_IMM:
++ wc->opcode = IB_WC_RECV;
++ wc->wc_flags = IB_WC_WITH_IMM;
++ wc->ex.imm_data = cqe->immed_rss_invalid;
++ break;
++ }
++
++ wc->slid = be16_to_cpu(cqe->rlid);
++ wc->sl = be16_to_cpu(cqe->sl_vid) >> 12;
++ g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn);
++ wc->src_qp = g_mlpath_rqpn & 0xffffff;
++ wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
++ wc->wc_flags |= g_mlpath_rqpn & 0x80000000
++ ? IB_WC_GRH : 0;
++ wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid)
++ & 0x7f;
++ wc->wc_flags |= ibp_mlx4_ipoib_csum_ok(cqe->status,
++ cqe->checksum) ? IB_WC_IP_CSUM_OK : 0;
++ }
++
++ return 0;
++}
++
++int ibp_mlx4_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
++{
++ struct ibp_mlx4_cq *cq = to_cq(ibcq);
++ struct ibp_mlx4_qp *cur_qp = NULL;
++ unsigned long flags;
++ int npolled;
++ int err = 0;
++
++ spin_lock_irqsave(&cq->lock, flags);
++
++ for (npolled = 0; npolled < num_entries; ++npolled) {
++ err = ibp_mlx4_poll_one(cq, &cur_qp, wc + npolled);
++ if (err)
++ break;
++ }
++
++ if (npolled)
++ ibp_mlx4_cq_set_ci(cq);
++
++ spin_unlock_irqrestore(&cq->lock, flags);
++
++ if (err == 0 || err == -EAGAIN)
++ return npolled;
++ else
++ return err;
++}
++
++static inline void ibp_mlx4_cq_arm(struct ibp_mlx4_cq *cq, u32 cmd,
++ void __iomem *uar_page,
++ spinlock_t *doorbell_lock)
++{
++ __be32 doorbell[2];
++ u32 sn;
++ u32 ci;
++
++ sn = cq->arm_sn & 3;
++ ci = cq->cons_index & 0xffffff;
++
++ *cq->arm_db = cpu_to_be32(sn << 28 | cmd | ci);
++
++ /*
++ * Make sure that the doorbell record in host memory is
++ * written before ringing the doorbell via PCI MMIO.
++ */
++ wmb();
++
++ doorbell[0] = cpu_to_be32(sn << 28 | cmd | cq->cqn);
++ doorbell[1] = cpu_to_be32(ci);
++
++ mlx4_write64(doorbell, uar_page + MLX4_CQ_DOORBELL, doorbell_lock);
++}
++
++int ibp_mlx4_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
++{
++ struct ibp_mlx4_device *dev = to_device(ibcq->device);
++ struct ibp_mlx4_cq *cq = to_cq(ibcq);
++
++ ibp_mlx4_cq_arm(cq,
++ (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
++ MLX4_CQ_DB_REQ_NOT_SOL : MLX4_CQ_DB_REQ_NOT,
++ dev->uar_map->addr,
++ MLX4_GET_DOORBELL_LOCK(&dev->uar_lock));
++
++ return 0;
++}
+diff --git a/drivers/infiniband/ibp/drv/hw/mlx4/dbrec.c b/drivers/infiniband/ibp/drv/hw/mlx4/dbrec.c
+new file mode 100644
+index 0000000..85e9c06
+--- /dev/null
++++ b/drivers/infiniband/ibp/drv/hw/mlx4/dbrec.c
+@@ -0,0 +1,138 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "main.h"
++
++static struct ibp_mlx4_db_pgdir *ibp_mlx4_alloc_db_pgdir(void)
++{
++ struct ibp_mlx4_db_pgdir *pgdir;
++
++ pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL);
++ if (!pgdir)
++ return NULL;
++
++ bitmap_fill(pgdir->order1, MLX4_DB_PER_PAGE / 2);
++ pgdir->bits[0] = pgdir->order0;
++ pgdir->bits[1] = pgdir->order1;
++ pgdir->db_page = (void *)__get_free_page(GFP_KERNEL);
++
++ if (!pgdir->db_page) {
++ kfree(pgdir);
++ return NULL;
++ }
++
++ return pgdir;
++}
++
++static int ibp_mlx4_alloc_db_from_pgdir(struct ibp_mlx4_db_pgdir *pgdir,
++ struct ibp_mlx4_dbrec *db, int order)
++{
++ int i, o;
++
++ for (o = order; o <= 1; ++o) {
++ i = find_first_bit(pgdir->bits[o], MLX4_DB_PER_PAGE >> o);
++ if (i < MLX4_DB_PER_PAGE >> o)
++ goto found;
++ }
++
++ return -ENOMEM;
++found:
++ clear_bit(i, pgdir->bits[o]);
++
++ i <<= o;
++
++ if (o > order)
++ set_bit(i ^ 1, pgdir->bits[order]);
++
++ db->pgdir = pgdir;
++ db->index = i;
++ db->order = order;
++
++ /* db is offset by 4 bytes */
++ db->db = pgdir->db_page + db->index * 4;
++
++ return 0;
++}
++
++int ibp_mlx4_alloc_dbrec(struct ibp_mlx4_device *dev,
++ struct ibp_mlx4_dbrec *db, int order)
++{
++ struct ibp_mlx4_db_pgdir *pgdir;
++ int ret = 0;
++
++ mutex_lock(&dev->pgdir_mutex);
++
++ list_for_each_entry(pgdir, &dev->pgdir_list, list)
++ if (!ibp_mlx4_alloc_db_from_pgdir(pgdir, db, order))
++ goto out;
++
++ pgdir = ibp_mlx4_alloc_db_pgdir();
++ if (!pgdir) {
++ ret = -ENOMEM;
++ goto out;
++ }
++
++ list_add(&pgdir->list, &dev->pgdir_list);
++
++ /* This should never fail -- we just allocated an empty page: */
++ WARN_ON(ibp_mlx4_alloc_db_from_pgdir(pgdir, db, order));
++out:
++ mutex_unlock(&dev->pgdir_mutex);
++
++ return ret;
++}
++
++void ibp_mlx4_free_dbrec(struct ibp_mlx4_device *dev, struct ibp_mlx4_dbrec *db)
++{
++ int i, o;
++
++ o = db->order;
++ i = db->index;
++
++ mutex_lock(&dev->pgdir_mutex);
++
++ if (db->order == 0 && test_bit(i ^ 1, db->pgdir->order0)) {
++ clear_bit(i ^ 1, db->pgdir->order0);
++ ++o;
++ }
++
++ i >>= o;
++ set_bit(i, db->pgdir->bits[o]);
++
++ if (bitmap_full(db->pgdir->order1, MLX4_DB_PER_PAGE / 2)) {
++ free_page((unsigned long) db->pgdir->db_page);
++ list_del(&db->pgdir->list);
++ kfree(db->pgdir);
++ }
++
++ mutex_unlock(&dev->pgdir_mutex);
++}
+diff --git a/drivers/infiniband/ibp/drv/hw/mlx4/main.c b/drivers/infiniband/ibp/drv/hw/mlx4/main.c
+new file mode 100644
+index 0000000..1c59739
+--- /dev/null
++++ b/drivers/infiniband/ibp/drv/hw/mlx4/main.c
+@@ -0,0 +1,1572 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "main.h"
++
++MODULE_AUTHOR("Jerrie Coffman");
++MODULE_AUTHOR("Phil Cayton");
++MODULE_AUTHOR("Jay Sternberg");
++MODULE_LICENSE("Dual BSD/GPL");
++MODULE_DESCRIPTION(DRV_DESC);
++MODULE_VERSION(DRV_VERSION);
++
++#ifdef IBP_DEBUG
++MODULE_PARAM(debug_level, debug_level, int, 0, "Debug: 0-none, 1-some, 2-all");
++#endif
++
++int ibp_mlx4_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
++ struct ib_udata *udata)
++{
++ struct ibp_mlx4_device *dev = to_device(ibdev);
++ int ret;
++
++ print_trace("in\n");
++
++ if (udata->inlen || udata->outlen)
++ return -EINVAL;
++
++ ret = ibp_cmd_query_device(dev->ibpdev, attr);
++ if (ret)
++ print_err("ibp_cmd_query_device returned %d\n", ret);
++
++ /* Mask attributes that are not supported. */
++ if (!(ibdev->uverbs_cmd_mask
++ & (1ull << IB_USER_VERBS_CMD_CREATE_SRQ))) {
++ attr->device_cap_flags &= ~IB_DEVICE_SRQ_RESIZE;
++ attr->max_srq = 0;
++ attr->max_srq_wr = 0;
++ attr->max_srq_sge = 0;
++ }
++
++ return ret;
++}
++
++int ibp_mlx4_query_port(struct ib_device *ibdev, u8 port_num,
++ struct ib_port_attr *attr)
++{
++ struct ibp_mlx4_device *dev = to_device(ibdev);
++ int ret;
++
++ ret = ibp_cmd_query_port(dev->ibpdev, port_num, attr);
++ if (ret)
++ print_err("ibp_cmd_query_port returned %d\n", ret);
++
++ return ret;
++}
++
++enum rdma_link_layer ibp_mlx4_get_link_layer(struct ib_device *ibdev,
++ u8 port_num)
++{
++ struct ibp_mlx4_device *dev = to_device(ibdev);
++
++ return ibp_cmd_get_link_layer(dev->ibpdev, port_num);
++}
++
++int ibp_mlx4_modify_port(struct ib_device *device,
++ u8 port_num, int port_modify_mask,
++ struct ib_port_modify *port_modify)
++{
++ print_trace("in\n");
++
++ return -ENOSYS;
++}
++
++int ibp_mlx4_query_gid(struct ib_device *ibdev, u8 port_num, int index,
++ union ib_gid *gid)
++{
++ struct ibp_mlx4_device *dev = to_device(ibdev);
++ int ret;
++
++ ret = ibp_cmd_query_gid(dev->ibpdev, port_num, index, gid);
++ if (ret)
++ print_err("ibp_cmd_query_gid returned %d\n", ret);
++
++ return ret;
++}
++
++int ibp_mlx4_query_pkey(struct ib_device *ibdev, u8 port_num,
++ u16 index, u16 *pkey)
++{
++ struct ibp_mlx4_device *dev = to_device(ibdev);
++ int ret;
++
++ ret = ibp_cmd_query_pkey(dev->ibpdev, port_num, index, pkey);
++ if (ret)
++ print_err("ibp_cmd_query_pkey returned %d\n", ret);
++
++ return ret;
++}
++
++struct ib_ucontext *ibp_mlx4_alloc_ucontext(struct ib_device *ibdev,
++ struct ib_udata *udata)
++{
++ struct ibp_mlx4_device *dev = to_device(ibdev);
++ struct ibp_mlx4_alloc_ucontext_resp_v4 resp_v4;
++ struct ibp_mlx4_alloc_ucontext_resp resp;
++ struct ibp_mlx4_ucontext *ucontext;
++ struct ibp_alloc_ucontext_cmd cmd;
++ int ret, ret2;
++
++ print_trace("in\n");
++
++ ucontext = kzalloc(sizeof(*ucontext), GFP_KERNEL);
++ if (!ucontext)
++ return ERR_PTR(-ENOMEM);
++
++ if (dev->ibpdev->uverbs_abi_ver > 3)
++ ret = ibp_cmd_alloc_ucontext(dev->ibpdev, NULL, &ucontext->ucontext,
++ &cmd, sizeof(cmd), &resp_v4.msg, sizeof(resp_v4));
++ else
++ ret = ibp_cmd_alloc_ucontext(dev->ibpdev, NULL, &ucontext->ucontext,
++ &cmd, sizeof(cmd), &resp.msg, sizeof(resp));
++
++ if (ret) {
++ print_err("ibp_cmd_alloc_ucontext returned %d\n", ret);
++ goto err0;
++ }
++
++ INIT_LIST_HEAD(&ucontext->db_list);
++ mutex_init(&ucontext->db_mutex);
++
++ if (dev->ibpdev->uverbs_abi_ver > 3) {
++ ucontext->cqe_size = resp_v4.data.cqe_size;
++ ret = ib_copy_to_udata(udata, &resp_v4.data, sizeof(resp_v4.data));
++ } else {
++ ucontext->cqe_size = sizeof(struct mlx4_cqe);
++ ret = ib_copy_to_udata(udata, &resp.data, sizeof(resp.data));
++ }
++ if (ret) {
++ print_err("ib_copy_to_udata returned %d\n", ret);
++ goto err1;
++ }
++
++ return &ucontext->ibucontext;
++err1:
++ ret2 = ibp_cmd_dealloc_ucontext(dev->ibpdev, ucontext->ucontext);
++ if (ret2)
++ print_err("ibp_cmd_dealloc_ucontext returned %d\n", ret2);
++err0:
++ kfree(ucontext);
++ return ERR_PTR(ret);
++}
++
++int ibp_mlx4_dealloc_ucontext(struct ib_ucontext *ibucontext)
++{
++ struct ibp_mlx4_ucontext *ucontext = to_ucontext(ibucontext);
++ struct ibp_mlx4_device *dev = to_device(ibucontext->device);
++ int ret;
++
++ print_trace("in\n");
++
++ ret = ibp_cmd_dealloc_ucontext(dev->ibpdev, ucontext->ucontext);
++ if (ret)
++ print_err("ibp_cmd_dealloc_ucontext returned %d\n", ret);
++
++ kfree(ucontext);
++ return 0;
++}
++
++int ibp_mlx4_mmap(struct ib_ucontext *ibucontext, struct vm_area_struct *vma)
++{
++ struct ibp_mlx4_ucontext *ucontext = to_ucontext(ibucontext);
++ struct ibp_mlx4_device *dev = to_device(ibucontext->device);
++ int ret;
++
++ print_trace("in\n");
++
++ ret = ibp_cmd_mmap(dev->ibpdev, ucontext->ucontext, vma);
++ if (ret)
++ print_err("ibp_cmd_mmap returned %d\n", ret);
++
++ return ret;
++}
++
++struct ib_pd *ibp_mlx4_alloc_pd(struct ib_device *ibdev,
++ struct ib_ucontext *ibucontext,
++ struct ib_udata *udata)
++{
++ struct ibp_mlx4_device *dev = to_device(ibdev);
++ struct ibp_mlx4_alloc_pd_resp resp;
++ struct ibp_alloc_pd_cmd cmd;
++ struct ibp_mlx4_pd *pd;
++ u64 ucontext;
++ int ret, ret2;
++
++ print_trace("in\n");
++
++ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
++ if (!pd)
++ return ERR_PTR(-ENOMEM);
++
++ ucontext = ibucontext ? to_ucontext(ibucontext)->ucontext
++ : dev->kcontext->ucontext;
++
++ ret = ibp_cmd_alloc_pd(dev->ibpdev, ucontext, &pd->pd,
++ &cmd, sizeof(cmd), &resp.msg, sizeof(resp));
++ if (ret) {
++ print_err("ibp_cmd_alloc_pd returned %d\n", ret);
++ goto err0;
++ }
++
++ pd->pdn = resp.data.pdn;
++
++ if (udata) {
++ ret = ib_copy_to_udata(udata, &resp.data, sizeof(resp.data));
++ if (ret) {
++ print_err("ib_copy_to_udata returned %d\n", ret);
++ goto err1;
++ }
++ }
++
++ return &pd->ibpd;
++err1:
++ ret2 = ibp_cmd_dealloc_pd(dev->ibpdev, pd->pd);
++ if (ret2)
++ print_err("ibp_cmd_dealloc_pd returned %d\n", ret2);
++err0:
++ kfree(pd);
++ return ERR_PTR(ret);
++}
++
++int ibp_mlx4_dealloc_pd(struct ib_pd *ibpd)
++{
++ struct ibp_mlx4_device *dev = to_device(ibpd->device);
++ struct ibp_mlx4_pd *pd = to_pd(ibpd);
++ int ret;
++
++ print_trace("in\n");
++
++ ret = ibp_cmd_dealloc_pd(dev->ibpdev, pd->pd);
++ if (ret)
++ print_err("ibp_cmd_dealloc_pd returned %d\n", ret);
++
++ kfree(pd);
++ return 0;
++}
++
++struct ib_ah *ibp_mlx4_create_ah(struct ib_pd *ibpd,
++ struct ib_ah_attr *ah_attr)
++{
++ struct ibp_mlx4_device *dev = to_device(ibpd->device);
++ struct ibp_mlx4_pd *pd = to_pd(ibpd);
++ struct ibp_mlx4_ah *ah;
++ int ret;
++
++ print_trace("in\n");
++
++ ah = kzalloc(sizeof(*ah), GFP_KERNEL);
++ if (!ah)
++ return ERR_PTR(-ENOMEM);
++
++ ret = ibp_cmd_create_ah(dev->ibpdev, pd->pd, ah_attr, &ah->ah);
++ if (ret) {
++ print_err("ibp_cmd_create_ah returned %d\n", ret);
++ kfree(ah);
++ return ERR_PTR(ret);
++ }
++
++ if (!ibpd->uobject) {
++ ah->av.port_pd = cpu_to_be32(pd->pdn |
++ (ah_attr->port_num << 24));
++ ah->av.g_slid = ah_attr->src_path_bits;
++ ah->av.dlid = cpu_to_be16(ah_attr->dlid);
++ ah->av.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
++
++ if (ah_attr->ah_flags & IB_AH_GRH) {
++ ah->av.g_slid |= 0x80;
++ ah->av.gid_index = ah_attr->grh.sgid_index;
++ ah->av.hop_limit = ah_attr->grh.hop_limit;
++ ah->av.sl_tclass_flowlabel |=
++ cpu_to_be32((ah_attr->grh.traffic_class << 20) |
++ ah_attr->grh.flow_label);
++ memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16);
++ }
++
++ if (ah_attr->static_rate) {
++ /* Query to determine the assigned rate value. */
++ ret = ibp_cmd_query_ah(dev->ibpdev, ah->ah, ah_attr);
++ if (ret)
++ print_err("ibp_cmd_query_ah returned %d\n",
++ ret);
++
++ ah->av.stat_rate = ah_attr->static_rate
++ + MLX4_STAT_RATE_OFFSET;
++ }
++ }
++
++ return &ah->ibah;
++}
++
++int ibp_mlx4_destroy_ah(struct ib_ah *ibah)
++{
++ struct ibp_mlx4_device *dev = to_device(ibah->device);
++ struct ibp_mlx4_ah *ah = to_ah(ibah);
++ int ret;
++
++ print_trace("in\n");
++
++ ret = ibp_cmd_destroy_ah(dev->ibpdev, ah->ah);
++ if (ret)
++ print_err("ibp_cmd_destroy_ah returned %d\n", ret);
++
++ kfree(ah);
++ return 0;
++}
++
++static
++struct ibp_mlx4_db *ibp_mlx4_db_map_user(struct ibp_mlx4_ucontext *ucontext,
++ unsigned long virt)
++{
++ struct ibp_mlx4_device *dev = to_device(ucontext->ibucontext.device);
++ struct ibp_mlx4_db *db;
++ unsigned long user_virt = virt & PAGE_MASK;
++ int ret;
++
++ mutex_lock(&ucontext->db_mutex);
++
++ list_for_each_entry(db, &ucontext->db_list, list)
++ if (db->user_virt == user_virt)
++ goto found;
++
++ db = kzalloc(sizeof(*db), GFP_KERNEL);
++ if (!db) {
++ print_err("kzalloc failed\n");
++ ret = -ENOMEM;
++ goto err0;
++ }
++
++ db->page = ibp_reg_buf(dev->ibpdev, ucontext->ucontext,
++ user_virt, PAGE_SIZE, 0);
++ if (IS_ERR(db->page)) {
++ ret = PTR_ERR(db->page);
++ print_err("ibp_reg_buf returned %d\n", ret);
++ goto err1;
++ }
++
++ db->user_virt = user_virt;
++ list_add(&db->list, &ucontext->db_list);
++found:
++ ++db->refcnt;
++ mutex_unlock(&ucontext->db_mutex);
++
++ return db;
++err1:
++ kfree(db);
++err0:
++ mutex_unlock(&ucontext->db_mutex);
++ return ERR_PTR(ret);
++}
++
++static void ibp_mlx4_db_unmap_user(struct ibp_mlx4_ucontext *ucontext,
++ struct ibp_mlx4_db *db)
++{
++ struct ibp_mlx4_device *dev;
++
++ if (!ucontext || !db)
++ return;
++
++ dev = to_device(ucontext->ibucontext.device);
++
++ mutex_lock(&ucontext->db_mutex);
++ if (!--db->refcnt) {
++ list_del(&db->list);
++ ibp_dereg_buf(dev->ibpdev, db->page);
++ kfree(db);
++ }
++ mutex_unlock(&ucontext->db_mutex);
++}
++
++struct ib_srq *ibp_mlx4_create_srq(struct ib_pd *ibpd,
++ struct ib_srq_init_attr *attr,
++ struct ib_udata *udata)
++{
++ struct ibp_mlx4_device *dev = to_device(ibpd->device);
++ struct ibp_mlx4_pd *pd = to_pd(ibpd);
++ struct ibp_mlx4_create_srq_resp resp;
++ struct ibp_mlx4_create_srq_cmd cmd;
++ struct ibp_mlx4_ucontext *ucontext;
++ struct ibp_mlx4_srq *srq;
++ int desc_size, buf_size;
++ int ret;
++
++ print_trace("in\n");
++
++ srq = kzalloc(sizeof(*srq), GFP_KERNEL);
++ if (!srq)
++ return ERR_PTR(-ENOMEM);
++
++ ucontext = ibpd->uobject ? to_ucontext(ibpd->uobject->context)
++ : dev->kcontext;
++
++ spin_lock_init(&srq->lock);
++
++ srq->max = roundup_pow_of_two(attr->attr.max_wr + 1);
++ srq->max_gs = attr->attr.max_sge;
++ desc_size = roundup_pow_of_two(sizeof(struct mlx4_wqe_srq_next_seg) +
++ srq->max_gs *
++ sizeof(struct mlx4_wqe_data_seg));
++ desc_size = max_t(int, 32, desc_size);
++ srq->wqe_shift = ilog2(desc_size);
++ buf_size = srq->max * desc_size;
++
++ if (udata) {
++ ret = ib_copy_from_udata(&cmd.data, udata, sizeof(cmd.data));
++ if (ret) {
++ print_err("ib_copy_from_udata returned %d\n", ret);
++ goto err0;
++ }
++ } else {
++ ret = ibp_mlx4_alloc_dbrec(dev, &srq->dbrec, 0);
++ if (ret) {
++ print_err("ibp_mlx4_db_alloc returned %d\n", ret);
++ goto err0;
++ }
++ cmd.data.db_addr = (uintptr_t)srq->dbrec.db;
++
++ *srq->dbrec.db = 0;
++
++ ret = ibp_mlx4_alloc_srq_buf(srq, desc_size);
++ if (ret) {
++ print_err("ibp_mlx4_alloc_srq_buf returned %d\n", ret);
++ goto err1;
++ }
++ cmd.data.buf_addr = (uintptr_t)srq->wqe_buf;
++ }
++
++ srq->buf = ibp_reg_buf(dev->ibpdev, ucontext->ucontext,
++ cmd.data.buf_addr, buf_size, 0);
++ if (IS_ERR(srq->buf)) {
++ ret = PTR_ERR(srq->buf);
++ print_err("ibp_reg_buf returned %d\n", ret);
++ goto err2;
++ }
++
++ srq->db = ibp_mlx4_db_map_user(ucontext, cmd.data.db_addr);
++ if (IS_ERR(srq->db)) {
++ ret = PTR_ERR(srq->db);
++ goto err3;
++ }
++
++ ret = ibp_cmd_create_srq(dev->ibpdev, pd->pd,
++ attr, &srq->srq, &srq->ibsrq,
++ &cmd.msg, sizeof(cmd), &resp.msg, sizeof(resp));
++ if (ret) {
++ print_err("ibp_cmd_create_srq returned %d\n", ret);
++ goto err4;
++ }
++
++ if (udata) {
++ ret = ib_copy_to_udata(udata, &resp.data, sizeof(resp.data));
++ if (ret) {
++ print_err("ib_copy_to_udata returned %d\n", ret);
++ goto err5;
++ }
++ } else
++ srq->srqn = resp.data.srqn;
++
++ return &srq->ibsrq;
++err5:
++ ibp_cmd_destroy_srq(dev->ibpdev, srq->srq);
++err4:
++ ibp_mlx4_db_unmap_user(ucontext, srq->db);
++err3:
++ ibp_dereg_buf(dev->ibpdev, srq->buf);
++err2:
++ if (!udata)
++ ibp_mlx4_free_srq_buf(srq);
++err1:
++ if (!udata)
++ ibp_mlx4_free_dbrec(dev, &srq->dbrec);
++err0:
++ kfree(srq);
++ return ERR_PTR(ret);
++}
++
++int ibp_mlx4_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
++{
++ struct ibp_mlx4_device *dev = to_device(ibsrq->device);
++ struct ibp_mlx4_srq *srq = to_srq(ibsrq);
++ int ret;
++
++ print_trace("in\n");
++
++ ret = ibp_cmd_query_srq(dev->ibpdev, srq->srq, attr);
++ if (ret)
++ print_err("ibp_cmd_query_srq returned %d\n", ret);
++
++ return ret;
++}
++
++int ibp_mlx4_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
++ enum ib_srq_attr_mask mask, struct ib_udata *udata)
++{
++ struct ibp_mlx4_device *dev = to_device(ibsrq->device);
++ struct ibp_mlx4_srq *srq = to_srq(ibsrq);
++ struct ibp_modify_srq_resp resp;
++ struct ibp_modify_srq_cmd cmd;
++ int ret;
++
++ print_trace("in\n");
++
++ ret = ibp_cmd_modify_srq(dev->ibpdev, srq->srq, attr, mask,
++ &cmd, sizeof(cmd), &resp, sizeof(resp));
++ if (ret)
++ print_err("ibp_cmd_modify_srq returned %d\n", ret);
++
++ return ret;
++}
++
++int ibp_mlx4_destroy_srq(struct ib_srq *ibsrq)
++{
++ struct ibp_mlx4_device *dev = to_device(ibsrq->device);
++ struct ibp_mlx4_srq *srq = to_srq(ibsrq);
++ struct ibp_mlx4_ucontext *ucontext;
++ int ret;
++
++ print_trace("in\n");
++
++ ucontext = ibsrq->uobject ? to_ucontext(ibsrq->uobject->context)
++ : dev->kcontext;
++
++ ret = ibp_cmd_destroy_srq(dev->ibpdev, srq->srq);
++ if (ret)
++ print_err("ibp_cmd_destroy_srq returned %d\n", ret);
++
++ ibp_mlx4_db_unmap_user(ucontext, srq->db);
++ ibp_dereg_buf(dev->ibpdev, srq->buf);
++
++ if (!ibsrq->uobject) {
++ ibp_mlx4_free_srq_buf(srq);
++ ibp_mlx4_free_dbrec(dev, &srq->dbrec);
++ }
++
++ kfree(srq);
++ return 0;
++}
++
++struct ib_qp *ibp_mlx4_create_qp(struct ib_pd *ibpd,
++ struct ib_qp_init_attr *attr,
++ struct ib_udata *udata)
++{
++ struct ibp_mlx4_device *dev = to_device(ibpd->device);
++ struct ibp_mlx4_pd *pd = to_pd(ibpd);
++ struct ibp_mlx4_ucontext *ucontext;
++ struct ibp_mlx4_create_qp_cmd cmd;
++ struct ibp_create_qp_resp resp;
++ struct ibp_mlx4_qp *qp;
++ u64 send_cq, recv_cq, srq;
++ int size, ret;
++
++ print_trace("in\n");
++
++ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
++ if (!qp)
++ return ERR_PTR(-ENOMEM);
++
++ ucontext = ibpd->uobject ? to_ucontext(ibpd->uobject->context)
++ : dev->kcontext;
++
++ mutex_init(&qp->mutex);
++ spin_lock_init(&qp->sq.lock);
++ spin_lock_init(&qp->rq.lock);
++
++ if (!!attr->srq || !!attr->xrcd) {
++ srq = to_srq(attr->srq)->srq;
++ } else {
++ srq = 0;
++ qp->rq.wqe_cnt =
++ roundup_pow_of_two(max(1U, attr->cap.max_recv_wr));
++ qp->rq.max_gs =
++ roundup_pow_of_two(max(1U, attr->cap.max_recv_sge));
++ qp->rq.wqe_shift =
++ ilog2(qp->rq.max_gs * sizeof(struct mlx4_wqe_data_seg));
++ }
++
++ if (udata) {
++ ret = ib_copy_from_udata(&cmd.data, udata, sizeof(cmd.data));
++ if (ret) {
++ print_err("ib_copy_from_udata returned %d\n", ret);
++ goto err0;
++ }
++
++ qp->sq.wqe_cnt = 1 << cmd.data.log_sq_bb_count;
++ qp->sq.wqe_shift = cmd.data.log_sq_stride;
++ } else {
++ if (attr->sq_sig_type == IB_SIGNAL_ALL_WR)
++ qp->sq_signal_bits =
++ cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
++
++ if (attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
++ qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
++
++ if (attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
++ qp->flags |= MLX4_IB_QP_LSO;
++
++ ret = ibp_mlx4_set_kernel_sq_size(dev, &attr->cap,
++ attr->qp_type, qp);
++ if (ret) {
++ print_err("ibp_mlx4_set_kernel_sq_size returned %d\n",
++ ret);
++ goto err0;
++ }
++
++ ret = ibp_mlx4_alloc_qp_buf(qp);
++ if (ret) {
++ print_err("ibp_mlx4_alloc_qp_buf returned %d\n", ret);
++ goto err0;
++ }
++
++ attr->cap.max_recv_wr = qp->rq.max_post = qp->rq.wqe_cnt;
++ attr->cap.max_recv_sge = qp->rq.max_gs;
++
++ cmd.data.buf_addr = (uintptr_t)qp->wqe_buf;
++ cmd.data.log_sq_stride = qp->sq.wqe_shift;
++
++ for (cmd.data.log_sq_bb_count = 0;
++ qp->sq.wqe_cnt > 1 << cmd.data.log_sq_bb_count;
++ ++cmd.data.log_sq_bb_count)
++ ; /* nothing */
++
++ cmd.data.sq_no_prefetch = qp->sq_no_prefetch = 0;
++ memset(cmd.data.reserved, 0, sizeof(cmd.data.reserved));
++
++ ibp_mlx4_route_cq_comp(attr->send_cq);
++ ibp_mlx4_route_cq_comp(attr->recv_cq);
++ }
++
++ size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
++ (qp->sq.wqe_cnt << qp->sq.wqe_shift);
++
++ qp->buf = ibp_reg_buf(dev->ibpdev, ucontext->ucontext,
++ cmd.data.buf_addr, size, 0);
++ if (IS_ERR(qp->buf)) {
++ ret = PTR_ERR(qp->buf);
++ print_err("ibp_reg_buf returned %d\n", ret);
++ goto err1;
++ }
++
++ if (!attr->srq &&
++ !(attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT)) {
++ if (!udata) {
++ ret = ibp_mlx4_alloc_dbrec(dev, &qp->dbrec, 0);
++ if (ret) {
++ print_err("ibp_mlx4_db_alloc returned %d\n",
++ ret);
++ goto err2;
++ }
++ cmd.data.db_addr = (uintptr_t)qp->dbrec.db;
++ *qp->dbrec.db = 0;
++ }
++
++ qp->db = ibp_mlx4_db_map_user(ucontext, cmd.data.db_addr);
++ if (IS_ERR(qp->db)) {
++ ret = PTR_ERR(qp->db);
++ print_err("ibp_mlx4_db_map_user returned %d\n", ret);
++ goto err3;
++ }
++ }
++
++ send_cq = to_cq(attr->send_cq)->cq;
++ recv_cq = to_cq(attr->recv_cq)->cq;
++
++ ret = ibp_cmd_create_qp(dev->ibpdev, pd->pd, send_cq, recv_cq,
++ srq, attr, &qp->qp, &qp->ibqp,
++ &cmd.msg, sizeof(cmd), &resp, sizeof(resp));
++ if (ret) {
++ print_err("ibp_cmd_create_qp returned %d\n", ret);
++ goto err4;
++ }
++
++ /* HW wants QPN writen in Big Endian order */
++ qp->doorbell_qpn = swab32(qp->ibqp.qp_num << 8);
++
++ ret = ibp_mlx4_insert_qp(dev, qp);
++ if (ret) {
++ print_err("ibp_mlx4_insert_qp returned %d\n", ret);
++ goto err5;
++ }
++
++ return &qp->ibqp;
++err5:
++ ibp_cmd_destroy_qp(dev->ibpdev, qp->qp);
++err4:
++ ibp_mlx4_db_unmap_user(ucontext, qp->db);
++err3:
++ if (!udata && !attr->srq &&
++ !(attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT))
++ ibp_mlx4_free_dbrec(dev, &qp->dbrec);
++err2:
++ ibp_dereg_buf(dev->ibpdev, qp->buf);
++err1:
++ if (!udata)
++ ibp_mlx4_free_qp_buf(qp);
++err0:
++ kfree(qp);
++ return ERR_PTR(ret);
++}
++
++int ibp_mlx4_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
++ int qp_attr_mask, struct ib_qp_init_attr *init_attr)
++{
++ struct ibp_mlx4_device *dev = to_device(ibqp->device);
++ struct ibp_mlx4_qp *qp = to_qp(ibqp);
++ int ret;
++
++ print_trace("in\n");
++
++ ret = ibp_cmd_query_qp(dev->ibpdev, qp->qp, attr, qp_attr_mask,
++ init_attr);
++ if (ret)
++ print_err("ibp_cmd_query_qp returned %d\n", ret);
++
++ return ret;
++}
++
++int ibp_mlx4_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
++ int qp_attr_mask, struct ib_udata *udata)
++{
++ struct ibp_mlx4_device *dev = to_device(ibqp->device);
++ struct ibp_mlx4_qp *qp = to_qp(ibqp);
++ struct ibp_modify_qp_resp resp;
++ struct ibp_modify_qp_cmd cmd;
++ int ret;
++
++ print_trace("in\n");
++
++ mutex_lock(&qp->mutex);
++
++ if (!ibqp->uobject &&
++ qp->state == IB_QPS_RESET &&
++ qp_attr_mask & IB_QP_STATE &&
++ attr->qp_state == IB_QPS_INIT) {
++ ibp_mlx4_qp_init_sq_ownership(qp);
++ }
++
++ ret = ibp_cmd_modify_qp(dev->ibpdev, qp->qp, attr, qp_attr_mask,
++ &cmd, sizeof(cmd), &resp, sizeof(resp));
++ if (ret) {
++ print_err("ibp_cmd_modify_qp returned %d\n", ret);
++ goto err;
++ }
++
++ if (!ibqp->uobject) {
++ if (qp_attr_mask & IB_QP_STATE) {
++ qp->state = attr->qp_state;
++ if (qp->state == IB_QPS_RESET) {
++ ibp_mlx4_cq_clean(to_cq(ibqp->recv_cq),
++ qp->ibqp.qp_num,
++ ibqp->srq ?
++ to_srq(ibqp->srq) : NULL);
++ if (ibqp->send_cq != ibqp->recv_cq)
++ ibp_mlx4_cq_clean(to_cq(ibqp->send_cq),
++ qp->ibqp.qp_num,
++ NULL);
++ qp->rq.head = 0;
++ qp->rq.tail = 0;
++ qp->sq.head = 0;
++ qp->sq.tail = 0;
++ qp->sq_next_wqe = 0;
++ if (!ibqp->srq)
++ *qp->dbrec.db = 0;
++ }
++ }
++
++ if (qp_attr_mask & IB_QP_PORT)
++ qp->port = attr->port_num;
++ }
++err:
++ mutex_unlock(&qp->mutex);
++ return ret;
++}
++
++int ibp_mlx4_destroy_qp(struct ib_qp *ibqp)
++{
++ struct ibp_mlx4_device *dev = to_device(ibqp->device);
++ struct ibp_mlx4_qp *qp = to_qp(ibqp);
++ struct ibp_mlx4_ucontext *ucontext;
++ struct ibp_mlx4_cq *send_cq, *recv_cq;
++ int ret;
++
++ print_trace("in\n");
++
++ ucontext = ibqp->uobject ? to_ucontext(ibqp->uobject->context)
++ : dev->kcontext;
++
++ ibp_mlx4_remove_qp(dev, qp);
++
++ ret = ibp_cmd_destroy_qp(dev->ibpdev, qp->qp);
++ if (ret)
++ print_err("ibp_cmd_destroy_qp returned %d\n", ret);
++
++ if (ibqp->uobject) {
++ ibp_mlx4_db_unmap_user(ucontext, qp->db);
++ ibp_dereg_buf(dev->ibpdev, qp->buf);
++ } else {
++ send_cq = to_cq(qp->ibqp.send_cq);
++ recv_cq = to_cq(qp->ibqp.recv_cq);
++
++ ibp_mlx4_lock_cqs(send_cq, recv_cq);
++
++ __ibp_mlx4_cq_clean(recv_cq, qp->ibqp.qp_num,
++ ibqp->srq ? to_srq(ibqp->srq) : NULL);
++ if (ibqp->send_cq != ibqp->recv_cq)
++ __ibp_mlx4_cq_clean(send_cq, qp->ibqp.qp_num, NULL);
++
++ ibp_mlx4_unlock_cqs(send_cq, recv_cq);
++
++ ibp_mlx4_db_unmap_user(ucontext, qp->db);
++ if (!ibqp->srq &&
++ !(ibqp->qp_type == IB_QPT_XRC_INI || ibqp->qp_type == IB_QPT_XRC_TGT))
++ ibp_mlx4_free_dbrec(dev, &qp->dbrec);
++
++ ibp_dereg_buf(dev->ibpdev, qp->buf);
++ ibp_mlx4_free_qp_buf(qp);
++ }
++
++ kfree(qp);
++ return 0;
++}
++
++struct ib_cq *ibp_mlx4_create_cq(struct ib_device *ibdev,
++ const struct ib_cq_init_attr *attr,
++ struct ib_ucontext *ibucontext,
++ struct ib_udata *udata)
++{
++ struct ibp_mlx4_device *dev = to_device(ibdev);
++ struct ibp_mlx4_create_cq_resp resp;
++ struct ibp_mlx4_create_cq_cmd cmd;
++ struct ibp_mlx4_ucontext *ucontext;
++ struct ibp_mlx4_cq *cq;
++ int entries, comp_vector;
++ int size, ret;
++
++ print_trace("in\n");
++
++ cq = kzalloc(sizeof(*cq), GFP_KERNEL);
++ if (!cq)
++ return ERR_PTR(-ENOMEM);
++
++ ucontext = ibucontext ? to_ucontext(ibucontext) : dev->kcontext;
++
++ mutex_init(&cq->resize_mutex);
++ spin_lock_init(&cq->lock);
++
++ entries = attr->cqe;
++ comp_vector = attr->comp_vector;
++
++ entries = roundup_pow_of_two(entries + 1);
++ cq->ibcq.cqe = entries - 1;
++ cq->cqe_size = ucontext->cqe_size;
++ size = entries * cq->cqe_size;
++
++ if (udata) {
++ ret = ib_copy_from_udata(&cmd.data, udata, sizeof(cmd.data));
++ if (ret) {
++ print_err("ib_copy_from_udata returned %d\n", ret);
++ goto err0;
++ }
++ } else {
++ ret = ibp_mlx4_alloc_dbrec(dev, &cq->dbrec, 1);
++ if (ret) {
++ print_err("ibp_mlx4_db_alloc returned %d\n", ret);
++ goto err0;
++ }
++ cmd.data.db_addr = (uintptr_t)cq->dbrec.db;
++
++ cq->set_ci_db = cq->dbrec.db;
++ cq->arm_db = cq->dbrec.db + 1;
++ *cq->set_ci_db = 0;
++ *cq->arm_db = 0;
++ cq->arm_sn = 1;
++
++ ret = ibp_mlx4_alloc_cq_buf(&cq->cqe_buf, entries, cq->cqe_size);
++ if (ret) {
++ print_err("ibp_mlx4_alloc_cq_buf returned %d\n", ret);
++ goto err1;
++ }
++ cmd.data.buf_addr = (uintptr_t)cq->cqe_buf;
++ }
++
++ cq->buf = ibp_reg_buf(dev->ibpdev, ucontext->ucontext,
++ cmd.data.buf_addr, size, IB_ACCESS_LOCAL_WRITE);
++ if (IS_ERR(cq->buf)) {
++ ret = PTR_ERR(cq->buf);
++ print_err("ibp_reg_buf returned %d\n", ret);
++ goto err2;
++ }
++
++ cq->db = ibp_mlx4_db_map_user(ucontext, cmd.data.db_addr);
++ if (IS_ERR(cq->db)) {
++ ret = PTR_ERR(cq->db);
++ goto err3;
++ }
++
++ ret = ibp_cmd_create_cq(dev->ibpdev, ucontext->ucontext,
++ cq->ibcq.cqe, comp_vector, &cq->cq, &cq->ibcq,
++ &cmd.msg, sizeof(cmd), &resp.msg, sizeof(resp));
++ if (ret) {
++ print_err("ibp_cmd_create_cq returned %d\n", ret);
++ goto err4;
++ }
++
++ if (udata) {
++ ret = ib_copy_to_udata(udata, &resp.data, sizeof(resp.data));
++ if (ret) {
++ print_err("ib_copy_to_udata returned %d\n", ret);
++ goto err5;
++ }
++ } else
++ cq->cqn = resp.data.cqn;
++
++ return &cq->ibcq;
++err5:
++ ibp_cmd_destroy_cq(dev->ibpdev, cq->cq);
++err4:
++ ibp_mlx4_db_unmap_user(ucontext, cq->db);
++err3:
++ ibp_dereg_buf(dev->ibpdev, cq->buf);
++err2:
++ if (!udata)
++ kfree(cq->cqe_buf);
++err1:
++ if (!udata)
++ ibp_mlx4_free_dbrec(dev, &cq->dbrec);
++err0:
++ kfree(cq);
++ return ERR_PTR(ret);
++}
++
++int ibp_mlx4_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
++{
++ struct ibp_mlx4_device *dev = to_device(ibcq->device);
++ struct ibp_mlx4_cq *cq = to_cq(ibcq);
++ struct ibp_mlx4_ucontext *ucontext;
++ struct ibp_mlx4_resize_cq_cmd cmd;
++ struct ibp_resize_cq_resp resp;
++ struct ibp_rb *buf = NULL;
++ void *old_buf, *new_buf;
++ int outst_cqe, ret, num;
++
++ print_trace("in\n");
++
++ ucontext = ibcq->uobject ? to_ucontext(ibcq->uobject->context)
++ : dev->kcontext;
++
++ num = roundup_pow_of_two(entries + 1);
++
++ mutex_lock(&cq->resize_mutex);
++
++ if (udata) {
++ ret = ib_copy_from_udata(&cmd.data, udata, sizeof(cmd.data));
++ if (ret) {
++ print_err("ib_copy_from_udata returned %d\n", ret);
++ goto err0;
++ }
++ } else {
++ /* Can't be smaller than the number of outstanding CQEs */
++ outst_cqe = ibp_mlx4_get_outstanding_cqes(cq);
++ if (entries < outst_cqe + 1)
++ goto out;
++
++ ret = ibp_mlx4_alloc_cq_buf(&new_buf, num, cq->cqe_size);
++ if (ret) {
++ print_err("ibp_mlx4_alloc_cq_buf returned %d\n", ret);
++ goto err0;
++ }
++ cmd.data.buf_addr = (uintptr_t)new_buf;
++ }
++
++ buf = ibp_reg_buf(dev->ibpdev, ucontext->ucontext,
++ cmd.data.buf_addr,
++ num * ucontext->cqe_size,
++ IB_ACCESS_LOCAL_WRITE);
++ if (IS_ERR(buf)) {
++ ret = PTR_ERR(buf);
++ print_err("ibp_reg_buf returned %d\n", ret);
++ goto err1;
++ }
++
++ ret = ibp_cmd_resize_cq(dev->ibpdev, cq->cq, entries, &cq->ibcq,
++ &cmd.msg, sizeof(cmd), &resp, sizeof(resp));
++ if (ret) {
++ print_err("ibp_cmd_resize_cq returned %d\n", ret);
++ goto err2;
++ }
++
++ if (!udata) {
++ spin_lock_irq(&cq->lock);
++
++ ibp_mlx4_cq_resize_copy_cqes(cq, new_buf);
++ old_buf = cq->cqe_buf;
++ cq->cqe_buf = new_buf;
++
++ spin_unlock_irq(&cq->lock);
++ }
++
++ ibcq->cqe = cq->ibcq.cqe;
++
++ ibp_dereg_buf(dev->ibpdev, cq->buf);
++ cq->buf = buf;
++
++ if (!udata) /* free after dereg_buf */
++ ibp_mlx4_free_cq_buf(dev->ibpdev, old_buf);
++out:
++ mutex_unlock(&cq->resize_mutex);
++ return 0;
++err2:
++ ibp_dereg_buf(dev->ibpdev, buf);
++err1:
++ if (!udata)
++ ibp_mlx4_free_cq_buf(dev->ibpdev, new_buf);
++err0:
++ mutex_unlock(&cq->resize_mutex);
++ return ret;
++}
++
++int ibp_mlx4_destroy_cq(struct ib_cq *ibcq)
++{
++ struct ibp_mlx4_device *dev = to_device(ibcq->device);
++ struct ibp_mlx4_cq *cq = to_cq(ibcq);
++ struct ibp_mlx4_ucontext *ucontext;
++ int ret;
++
++ print_trace("in\n");
++
++ ucontext = ibcq->uobject ? to_ucontext(ibcq->uobject->context)
++ : dev->kcontext;
++
++ ret = ibp_cmd_destroy_cq(dev->ibpdev, cq->cq);
++ if (ret)
++ print_err("ibp_cmd_destroy_cq returned %d\n", ret);
++
++ ibp_mlx4_db_unmap_user(ucontext, cq->db);
++ ibp_dereg_buf(dev->ibpdev, cq->buf);
++
++ if (!ibcq->uobject) {
++ ibp_mlx4_free_cq_buf(dev->ibpdev, cq->cqe_buf);
++ ibp_mlx4_free_dbrec(dev, &cq->dbrec);
++ }
++
++ kfree(cq);
++ return 0;
++}
++
++struct ib_mr *ibp_mlx4_get_dma_mr(struct ib_pd *ibpd, int access)
++{
++ struct ibp_mlx4_device *dev = to_device(ibpd->device);
++ struct ibp_mlx4_pd *pd = to_pd(ibpd);
++ struct ibp_mlx4_mr *mr;
++ int ret;
++
++ print_trace("in\n");
++
++ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
++ if (!mr)
++ return ERR_PTR(-ENOMEM);
++
++ ret = ibp_cmd_get_dma_mr(dev->ibpdev, pd->pd, access,
++ &mr->mr, &mr->ibmr.lkey, &mr->ibmr.rkey);
++ if (ret) {
++ print_err("ibp_cmd_get_dma_mr returned %d\n", ret);
++ goto err;
++ }
++
++ return &mr->ibmr;
++err:
++ kfree(mr);
++ return ERR_PTR(ret);
++}
++
++struct ib_mr *ibp_mlx4_reg_user_mr(struct ib_pd *ibpd,
++ u64 start, u64 length,
++ u64 virt_addr, int access,
++ struct ib_udata *udata)
++{
++ struct ibp_mlx4_device *dev = to_device(ibpd->device);
++ struct ibp_mlx4_pd *pd = to_pd(ibpd);
++ struct ibp_reg_user_mr_resp resp;
++ struct ibp_reg_user_mr_cmd cmd;
++ struct ibp_mlx4_mr *mr;
++ int ret;
++
++ print_trace("in\n");
++
++ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
++ if (!mr)
++ return ERR_PTR(-ENOMEM);
++
++ ret = ibp_cmd_reg_user_mr(dev->ibpdev, pd->pd,
++ start, length, virt_addr, access,
++ &mr->mr, &mr->ibmr.lkey, &mr->ibmr.rkey,
++ &cmd, sizeof(cmd), &resp, sizeof(resp));
++ if (ret) {
++ print_err("ibp_cmd_reg_user_mr returned %d\n", ret);
++ goto err;
++ }
++
++ return &mr->ibmr;
++err:
++ kfree(mr);
++ return ERR_PTR(ret);
++}
++
++int ibp_mlx4_dereg_mr(struct ib_mr *ibmr)
++{
++ struct ibp_mlx4_device *dev = to_device(ibmr->device);
++ struct ibp_mlx4_mr *mr = to_mr(ibmr);
++ int ret;
++
++ print_trace("in\n");
++
++ ret = ibp_cmd_dereg_mr(dev->ibpdev, mr->mr);
++ if (ret)
++ print_err("ibp_cmd_dereg_mr returned %d\n", ret);
++
++ kfree(mr);
++ return 0;
++}
++
++static int ibp_mlx4_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
++{
++ struct ibp_mlx4_device *dev = to_device(ibqp->device);
++ struct ibp_mlx4_qp *qp = to_qp(ibqp);
++ int ret;
++
++ print_trace("in\n");
++
++ ret = ibp_cmd_attach_mcast(dev->ibpdev, qp->qp, gid, lid);
++ if (ret)
++ print_err("ibp_cmd_attach_mcast returned %d\n", ret);
++
++ return ret;
++}
++
++static int ibp_mlx4_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
++{
++ struct ibp_mlx4_device *dev = to_device(ibqp->device);
++ struct ibp_mlx4_qp *qp = to_qp(ibqp);
++ int ret;
++
++ print_trace("in\n");
++
++ ret = ibp_cmd_detach_mcast(dev->ibpdev, qp->qp, gid, lid);
++ if (ret)
++ print_err("ibp_cmd_detach_mcast returned %d\n", ret);
++
++ return ret;
++}
++
++static void ibp_mlx4_dma_nop(struct ib_device *ibdev, u64 addr, size_t size,
++ enum dma_data_direction direction)
++{
++}
++
++static int ibp_mlx4_mapping_error(struct ib_device *ibdev, u64 dma_addr)
++{
++ return !dma_addr;
++}
++
++static u64 ibp_mlx4_dma_map_single(struct ib_device *ibdev,
++ void *cpu_addr, size_t size,
++ enum dma_data_direction direction)
++{
++ return (u64)cpu_addr;
++}
++
++static u64 ibp_mlx4_dma_map_page(struct ib_device *ibdev, struct page *page,
++ unsigned long offset, size_t size,
++ enum dma_data_direction direction)
++{
++ u64 addr;
++
++ if (offset + size > PAGE_SIZE)
++ return 0;
++
++ addr = (u64)page_address(page);
++ if (addr)
++ addr += offset;
++
++ return addr;
++}
++
++static int ibp_mlx4_map_sg(struct ib_device *ibdev, struct scatterlist *sg,
++ int nents, enum dma_data_direction direction)
++{
++ u64 addr;
++ int i, ret = nents;
++
++ for (i = 0; i < nents; i++, sg++) {
++ addr = (u64)page_address(sg_page(sg));
++ if (!addr) {
++ ret = 0;
++ break;
++ }
++
++ sg->dma_address = sg->offset + addr;
++ sg->dma_length = sg->length;
++ }
++
++ return ret;
++}
++
++static void ibp_mlx4_unmap_sg(struct ib_device *ibdev, struct scatterlist *sg,
++ int nents, enum dma_data_direction direction)
++{
++ print_trace("in\n");
++}
++
++static void ibp_mlx4_sync_single(struct ib_device *ibdev, u64 dma, size_t size,
++ enum dma_data_direction direction)
++{
++ print_trace("in\n");
++}
++
++static void *ibp_mlx4_dma_alloc_coherent(struct ib_device *ibdev, size_t size,
++ u64 *dma_handle, gfp_t flag)
++{
++ struct page *p = alloc_pages(flag, get_order(size));
++ void *addr = p ? page_address(p) : NULL;
++
++ print_trace("in\n");
++
++ if (dma_handle)
++ *dma_handle = (u64)addr;
++
++ return addr;
++}
++
++static void ibp_mlx4_dma_free_coherent(struct ib_device *ibdev, size_t size,
++ void *cpu_addr, u64 dma_handle)
++{
++ print_trace("in\n");
++
++ free_pages((unsigned long)cpu_addr, get_order(size));
++}
++
++struct ib_dma_mapping_ops ibp_mlx4_dma_ops = {
++ ibp_mlx4_mapping_error,
++ ibp_mlx4_dma_map_single,
++ ibp_mlx4_dma_nop,
++ ibp_mlx4_dma_map_page,
++ ibp_mlx4_dma_nop,
++ ibp_mlx4_map_sg,
++ ibp_mlx4_unmap_sg,
++ ibp_mlx4_sync_single,
++ ibp_mlx4_sync_single,
++ ibp_mlx4_dma_alloc_coherent,
++ ibp_mlx4_dma_free_coherent
++};
++
++int ibp_mlx4_register_device(struct ibp_mlx4_device *dev)
++{
++ strncpy(dev->ibdevice.name, dev->ibpdev->name, IB_DEVICE_NAME_MAX);
++
++ dev->ibdevice.owner = THIS_MODULE;
++ dev->ibdevice.node_type = RDMA_NODE_IB_CA;
++ dev->ibdevice.node_guid = dev->ibpdev->node_guid;
++ dev->ibdevice.dma_device = dev->ibpdev->linux_dev;
++ dev->ibdevice.phys_port_cnt = dev->ibpdev->phys_port_cnt;
++ dev->ibdevice.num_comp_vectors = dev->ibpdev->num_comp_vectors;
++
++ dev->ibdevice.uverbs_abi_ver = dev->ibpdev->uverbs_abi_ver;
++ dev->ibdevice.uverbs_cmd_mask =
++ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
++ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
++ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
++ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
++ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
++ (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
++ (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
++ (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
++ (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
++ (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
++ (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
++ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
++ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
++ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
++ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
++ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
++ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
++ (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
++ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
++ (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
++ (1ull << IB_USER_VERBS_CMD_REG_MR) |
++ (1ull << IB_USER_VERBS_CMD_DEREG_MR);
++
++ /* Set mask to the intersection of this driver and the native driver. */
++ dev->ibdevice.uverbs_cmd_mask &= dev->ibpdev->uverbs_cmd_mask;
++
++ dev->ibdevice.query_device = ibp_mlx4_query_device;
++ dev->ibdevice.query_port = ibp_mlx4_query_port;
++ dev->ibdevice.get_link_layer = ibp_mlx4_get_link_layer;
++ dev->ibdevice.query_pkey = ibp_mlx4_query_pkey;
++ dev->ibdevice.modify_port = ibp_mlx4_modify_port;
++ dev->ibdevice.query_gid = ibp_mlx4_query_gid;
++ dev->ibdevice.alloc_ucontext = ibp_mlx4_alloc_ucontext;
++ dev->ibdevice.dealloc_ucontext = ibp_mlx4_dealloc_ucontext;
++ dev->ibdevice.mmap = ibp_mlx4_mmap;
++ dev->ibdevice.alloc_pd = ibp_mlx4_alloc_pd;
++ dev->ibdevice.dealloc_pd = ibp_mlx4_dealloc_pd;
++ dev->ibdevice.create_ah = ibp_mlx4_create_ah;
++ dev->ibdevice.destroy_ah = ibp_mlx4_destroy_ah;
++ dev->ibdevice.create_srq = ibp_mlx4_create_srq;
++ dev->ibdevice.query_srq = ibp_mlx4_query_srq;
++ dev->ibdevice.modify_srq = ibp_mlx4_modify_srq;
++ dev->ibdevice.destroy_srq = ibp_mlx4_destroy_srq;
++ dev->ibdevice.create_qp = ibp_mlx4_create_qp;
++ dev->ibdevice.query_qp = ibp_mlx4_query_qp;
++ dev->ibdevice.modify_qp = ibp_mlx4_modify_qp;
++ dev->ibdevice.destroy_qp = ibp_mlx4_destroy_qp;
++ dev->ibdevice.create_cq = ibp_mlx4_create_cq;
++ dev->ibdevice.resize_cq = ibp_mlx4_resize_cq;
++ dev->ibdevice.destroy_cq = ibp_mlx4_destroy_cq;
++ dev->ibdevice.poll_cq = ibp_mlx4_poll_cq;
++ dev->ibdevice.req_notify_cq = ibp_mlx4_arm_cq;
++ dev->ibdevice.get_dma_mr = ibp_mlx4_get_dma_mr;
++ dev->ibdevice.reg_user_mr = ibp_mlx4_reg_user_mr;
++ dev->ibdevice.dereg_mr = ibp_mlx4_dereg_mr;
++ dev->ibdevice.post_send = ibp_mlx4_post_send;
++ dev->ibdevice.post_recv = ibp_mlx4_post_recv;
++ dev->ibdevice.post_srq_recv = ibp_mlx4_post_srq_recv;
++ dev->ibdevice.attach_mcast = ibp_mlx4_attach_mcast;
++ dev->ibdevice.detach_mcast = ibp_mlx4_detach_mcast;
++ dev->ibdevice.dma_ops = &ibp_mlx4_dma_ops;
++
++ return ib_register_device(&dev->ibdevice, NULL);
++}
++
++void ibp_mlx4_unregister_device(struct ibp_mlx4_device *dev)
++{
++ ib_unregister_device(&dev->ibdevice);
++}
++
++static int ibp_mlx4_create_kcontext(struct ibp_mlx4_device *dev)
++{
++ struct ibp_mlx4_alloc_ucontext_resp_v4 resp_v4;
++ struct ibp_mlx4_alloc_ucontext_resp resp;
++ struct ibp_mlx4_ucontext *kcontext;
++ struct ibp_alloc_ucontext_cmd cmd;
++ struct ib_device_attr attr;
++ int ret;
++
++ kcontext = kzalloc(sizeof(*kcontext), GFP_KERNEL);
++ if (!kcontext)
++ return -ENOMEM;
++
++ ret = ibp_cmd_query_device(dev->ibpdev, &attr);
++ if (ret) {
++ print_err("ibp_cmd_query_device returned %d\n", ret);
++ goto err0;
++ }
++
++ mutex_init(&kcontext->db_mutex);
++ INIT_LIST_HEAD(&kcontext->db_list);
++ kcontext->ibucontext.device = &dev->ibdevice;
++ INIT_RADIX_TREE(&dev->qp_table_tree, GFP_KERNEL);
++ spin_lock_init(&dev->qp_table_lock);
++
++ if (dev->ibpdev->uverbs_abi_ver > 3) {
++ ret = ibp_cmd_alloc_ucontext(dev->ibpdev, NULL, &kcontext->ucontext,
++ &cmd, sizeof(cmd), &resp_v4.msg, sizeof(resp_v4));
++ kcontext->cqe_size = resp_v4.data.cqe_size;
++ } else {
++ ret = ibp_cmd_alloc_ucontext(dev->ibpdev, NULL, &kcontext->ucontext,
++ &cmd, sizeof(cmd), &resp.msg, sizeof(resp));
++ kcontext->cqe_size = sizeof(struct mlx4_cqe);
++ }
++ if (ret) {
++ print_err("ibp_cmd_alloc_ucontext returned %d\n", ret);
++ goto err0;
++ }
++
++ dev->kcontext = kcontext;
++ mutex_init(&dev->pgdir_mutex);
++ INIT_LIST_HEAD(&dev->pgdir_list);
++ MLX4_INIT_DOORBELL_LOCK(&dev->uar_lock);
++
++ /*
++ * The mlx4 driver allocates num resources in powers of 2.
++ * Round up num_qps; the difference is reserved_qps.
++ */
++ dev->num_qps = roundup_pow_of_two(attr.max_qp);
++ dev->max_wqes = attr.max_qp_wr;
++ dev->max_sge = attr.max_sge;
++
++ dev->uar_map = ibp_cmd_ioremap(dev->ibpdev, kcontext->ucontext, 0,
++ PAGE_SIZE);
++ if (IS_ERR(dev->uar_map)) {
++ ret = PTR_ERR(dev->uar_map);
++ print_err("ibp_cmd_ioremap returned %d\n", ret);
++ goto err1;
++ }
++
++ return 0;
++err1:
++ ibp_cmd_dealloc_ucontext(dev->ibpdev, kcontext->ucontext);
++err0:
++ kfree(kcontext);
++ return ret;
++}
++
++static void ibp_mlx4_destroy_kcontext(struct ibp_mlx4_device *dev)
++{
++ ibp_cmd_iounmap(dev->uar_map);
++ ibp_cmd_dealloc_ucontext(dev->ibpdev, dev->kcontext->ucontext);
++ kfree(dev->kcontext);
++}
++
++int ibp_mlx4_add_one(struct ibp_device *ibpdev)
++{
++ struct ibp_mlx4_device *dev;
++ int ret;
++
++ if (ibpdev->uverbs_abi_ver < MLX4_UVERBS_MIN_ABI_VERSION ||
++ ibpdev->uverbs_abi_ver > MLX4_UVERBS_MAX_ABI_VERSION) {
++ print_err("host %s uverbs ABI version %d is not supported "
++ "(min supported %d, max supported %d)\n",
++ ibpdev->name, ibpdev->uverbs_abi_ver,
++ MLX4_UVERBS_MIN_ABI_VERSION,
++ MLX4_UVERBS_MAX_ABI_VERSION);
++ return -EINVAL;
++ }
++
++ dev = (struct ibp_mlx4_device *)ib_alloc_device(sizeof(*dev));
++ if (!dev) {
++ print_err("ib_alloc_device failed\n");
++ return -ENOMEM;
++ }
++ dev->ibpdev = ibpdev;
++
++ ret = ibp_mlx4_create_kcontext(dev);
++ if (ret) {
++ print_err("ibp_mlx4_create_kcontext returned %d\n", ret);
++ goto err0;
++ }
++
++ ret = ibp_mlx4_register_device(dev);
++ if (ret) {
++ print_err("ibp_mlx4_register_device returned %d\n", ret);
++ goto err1;
++ }
++
++ ibp_set_driver_data(ibpdev, (uintptr_t)dev);
++ return 0;
++err1:
++ ibp_mlx4_destroy_kcontext(dev);
++err0:
++ ib_dealloc_device(&dev->ibdevice);
++ return ret;
++}
++
++void ibp_mlx4_remove_one(struct ibp_device *ibpdev)
++{
++ struct ibp_mlx4_device *dev;
++
++ dev = (struct ibp_mlx4_device *)ibp_get_driver_data(ibpdev);
++ if (!dev)
++ return;
++
++ ibp_mlx4_unregister_device(dev);
++ ibp_set_driver_data(ibpdev, (uintptr_t)NULL);
++ ibp_mlx4_destroy_kcontext(dev);
++ ib_dealloc_device(&dev->ibdevice);
++}
++
++static u64 ibp_mlx4_resolve_one(struct ib_device *ibdev)
++{
++ return to_device(ibdev)->ibpdev->ib_device;
++}
++
++static const struct ibp_id_table ibp_mlx4_id_table[] = {
++ { PCI_VENDOR_ID_MELLANOX, 0x6340 },
++ { PCI_VENDOR_ID_MELLANOX, 0x634a },
++ { PCI_VENDOR_ID_MELLANOX, 0x6354 },
++ { PCI_VENDOR_ID_MELLANOX, 0x6732 },
++ { PCI_VENDOR_ID_MELLANOX, 0x673c },
++ { PCI_VENDOR_ID_MELLANOX, 0x6368 },
++ { PCI_VENDOR_ID_MELLANOX, 0x6750 },
++ { PCI_VENDOR_ID_MELLANOX, 0x6372 },
++ { PCI_VENDOR_ID_MELLANOX, 0x675a },
++ { PCI_VENDOR_ID_MELLANOX, 0x6764 },
++ { PCI_VENDOR_ID_MELLANOX, 0x6746 },
++ { PCI_VENDOR_ID_MELLANOX, 0x676e },
++ { PCI_VENDOR_ID_MELLANOX, 0x6778 },
++ { PCI_VENDOR_ID_MELLANOX, 0x1000 },
++ { PCI_VENDOR_ID_MELLANOX, 0x1001 },
++ { PCI_VENDOR_ID_MELLANOX, 0x1002 },
++ { PCI_VENDOR_ID_MELLANOX, 0x1003 },
++ { PCI_VENDOR_ID_MELLANOX, 0x1004 },
++ { PCI_VENDOR_ID_MELLANOX, 0x1005 },
++ { PCI_VENDOR_ID_MELLANOX, 0x1006 },
++ { PCI_VENDOR_ID_MELLANOX, 0x1007 },
++ { PCI_VENDOR_ID_MELLANOX, 0x1008 },
++ { PCI_VENDOR_ID_MELLANOX, 0x1009 },
++ { PCI_VENDOR_ID_MELLANOX, 0x100a },
++ { PCI_VENDOR_ID_MELLANOX, 0x100b },
++ { PCI_VENDOR_ID_MELLANOX, 0x100c },
++ { PCI_VENDOR_ID_MELLANOX, 0x100d },
++ { PCI_VENDOR_ID_MELLANOX, 0x100e },
++ { PCI_VENDOR_ID_MELLANOX, 0x100f },
++ { PCI_VENDOR_ID_MELLANOX, 0x1010 },
++ { 0, }
++};
++
++struct ibp_driver ibp_mlx4_driver = {
++ .name = DRV_NAME,
++ .id_table = ibp_mlx4_id_table,
++ .add = ibp_mlx4_add_one,
++ .remove = ibp_mlx4_remove_one,
++ .resolve = ibp_mlx4_resolve_one
++};
++
++static int __init ibp_mlx4_init(void)
++{
++ int ret;
++
++ print_info(DRV_SIGNON);
++
++ ret = ibp_register_driver(&ibp_mlx4_driver);
++ if (ret)
++ print_err("ibp_register_driver returned %d\n", ret);
++
++ return ret;
++}
++
++static void __exit ibp_mlx4_exit(void)
++{
++ ibp_unregister_driver(&ibp_mlx4_driver);
++
++ print_info(DRV_DESC " unloaded\n");
++}
++
++module_init(ibp_mlx4_init);
++module_exit(ibp_mlx4_exit);
+diff --git a/drivers/infiniband/ibp/drv/hw/mlx4/main.h b/drivers/infiniband/ibp/drv/hw/mlx4/main.h
+new file mode 100644
+index 0000000..11255f9
+--- /dev/null
++++ b/drivers/infiniband/ibp/drv/hw/mlx4/main.h
+@@ -0,0 +1,312 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef MAIN_H
++#define MAIN_H
++
++#include <linux/module.h>
++#include <linux/pci_ids.h>
++#include <linux/slab.h>
++#include <rdma/ib_verbs.h>
++#include <rdma/ib_user_verbs.h>
++#include "ibp.h"
++
++#include <linux/mlx4/cq.h>
++#include <linux/mlx4/qp.h>
++#include <linux/mlx4/srq.h>
++#include "hw/mlx4/user.h"
++
++#define DRV_ROLE "Mellanox ConnectX HCA driver"
++#define DRV_NAME "ibp_mlx4"
++#include "compat.h"
++
++static inline void ibp_mlx4_free_cq_buf(struct ibp_device *dev, void *buf) {
++ kfree(buf);
++}
++
++enum mlx4_ib_qp_flags {
++ MLX4_IB_QP_LSO = 1 << 0,
++ MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
++};
++
++struct ibp_mlx4_db_pgdir {
++ struct list_head list;
++ DECLARE_BITMAP(order0, MLX4_DB_PER_PAGE);
++ DECLARE_BITMAP(order1, MLX4_DB_PER_PAGE / 2);
++ unsigned long *bits[2];
++ void *db_page;
++ dma_addr_t db_dma;
++};
++
++struct ibp_mlx4_dbrec {
++ __be32 *db;
++ struct ibp_mlx4_db_pgdir *pgdir;
++ int index;
++ int order;
++};
++
++struct ibp_mlx4_device {
++ struct ib_device ibdevice;
++ struct ibp_device *ibpdev;
++
++ struct ibp_mlx4_ucontext *kcontext;
++ struct ibp_iomem *uar_map;
++ MLX4_DECLARE_DOORBELL_LOCK(uar_lock);
++ struct list_head pgdir_list;
++ struct mutex pgdir_mutex;
++ struct radix_tree_root qp_table_tree;
++ spinlock_t qp_table_lock;
++ int num_qps;
++ int max_wqes;
++ int max_sge;
++};
++
++struct ibp_mlx4_db {
++ struct list_head list;
++ unsigned long user_virt;
++ struct ibp_rb *page;
++ int refcnt;
++};
++
++struct ibp_mlx4_ucontext {
++ struct ib_ucontext ibucontext;
++ u64 ucontext;
++ struct list_head db_list;
++ struct mutex db_mutex;
++ int cqe_size;
++};
++
++struct ibp_mlx4_pd {
++ struct ib_pd ibpd;
++ u64 pd;
++ u32 pdn;
++};
++
++struct ibp_mlx4_ah {
++ struct ib_ah ibah;
++ u64 ah;
++ struct mlx4_av av;
++};
++
++struct ibp_mlx4_mr {
++ struct ib_mr ibmr;
++ u64 mr;
++};
++
++struct ibp_mlx4_cq {
++ struct ib_cq ibcq;
++ struct ibp_rb *buf;
++ struct ibp_mlx4_db *db;
++ struct mutex resize_mutex;
++ u64 cq;
++ ib_comp_handler comp;
++ struct ibp_mlx4_dbrec dbrec;
++ spinlock_t lock;
++ void *cqe_buf;
++ u32 cons_index;
++ __be32 *set_ci_db;
++ __be32 *arm_db;
++ int arm_sn;
++ int cqn;
++ int cqe_size;
++};
++
++struct ibp_mlx4_wq {
++ u64 *wrid;
++ int wqe_cnt;
++ int max_post;
++ int max_gs;
++ int offset;
++ int wqe_shift;
++ unsigned head;
++ unsigned tail;
++ spinlock_t lock;
++};
++
++struct ibp_mlx4_qp {
++ struct ib_qp ibqp;
++ struct ibp_rb *buf;
++ struct ibp_mlx4_db *db;
++ u64 qp;
++ struct ibp_mlx4_dbrec dbrec;
++ struct ibp_mlx4_wq rq;
++ struct ibp_mlx4_wq sq;
++ struct mutex mutex;
++ void *wqe_buf;
++ u32 doorbell_qpn;
++ __be32 sq_signal_bits;
++ unsigned sq_next_wqe;
++ int sq_max_wqes_per_wr;
++ int sq_spare_wqes;
++ u32 max_inline_data;
++ u32 flags;
++ enum ib_qp_state state;
++ u8 port;
++ u8 sq_no_prefetch;
++};
++
++struct ibp_mlx4_srq {
++ struct ib_srq ibsrq;
++ struct ibp_rb *buf;
++ struct ibp_mlx4_db *db;
++ u64 srq;
++ struct ibp_mlx4_dbrec dbrec;
++ spinlock_t lock;
++ int head;
++ int tail;
++ u64 *wrid;
++ void *wqe_buf;
++ int wqe_shift;
++ u16 wqe_ctr;
++ int max;
++ int max_gs;
++ int srqn;
++};
++
++#define TO_OBJ(x) \
++static inline struct ibp_mlx4_##x *to_##x(struct ib_##x *ib##x) \
++{ \
++ return container_of(ib##x, struct ibp_mlx4_##x, ib##x); \
++}
++TO_OBJ(ucontext)
++TO_OBJ(device)
++TO_OBJ(pd)
++TO_OBJ(ah)
++TO_OBJ(mr)
++TO_OBJ(cq)
++TO_OBJ(qp)
++TO_OBJ(srq)
++
++#define MLX4_UVERBS_MIN_ABI_VERSION 3
++#define MLX4_UVERBS_MAX_ABI_VERSION 4
++
++#if ((MLX4_IB_UVERBS_ABI_VERSION < MLX4_UVERBS_MIN_ABI_VERSION) || \
++ (MLX4_IB_UVERBS_ABI_VERSION > MLX4_UVERBS_MAX_ABI_VERSION))
++#error MLX4_IB_UVERBS_ABI_VERSION is not a supported value
++#endif
++
++struct mlx4_ib_alloc_ucontext_resp_v4 {
++ __u32 dev_caps;
++ __u32 qp_tab_size;
++ __u16 bf_reg_size;
++ __u16 bf_regs_per_page;
++ __u32 cqe_size;
++};
++
++struct ibp_mlx4_alloc_ucontext_resp_v4 {
++ struct ibp_alloc_ucontext_resp msg;
++ struct mlx4_ib_alloc_ucontext_resp_v4 data;
++};
++
++struct ibp_mlx4_alloc_ucontext_resp {
++ struct ibp_alloc_ucontext_resp msg;
++ struct mlx4_ib_alloc_ucontext_resp data;
++};
++
++struct ibp_mlx4_alloc_pd_resp {
++ struct ibp_alloc_pd_resp msg;
++ struct mlx4_ib_alloc_pd_resp data;
++};
++
++struct ibp_mlx4_create_srq_cmd {
++ struct ibp_create_srq_cmd msg;
++ struct mlx4_ib_create_srq data;
++};
++
++struct ibp_mlx4_create_srq_resp {
++ struct ibp_create_srq_resp msg;
++ struct mlx4_ib_create_srq_resp data;
++};
++
++struct ibp_mlx4_create_qp_cmd {
++ struct ibp_create_qp_cmd msg;
++ struct mlx4_ib_create_qp data;
++};
++
++struct ibp_mlx4_create_cq_cmd {
++ struct ibp_create_cq_cmd msg;
++ struct mlx4_ib_create_cq data;
++};
++
++struct ibp_mlx4_create_cq_resp {
++ struct ibp_create_cq_resp msg;
++ struct mlx4_ib_create_cq_resp data;
++};
++
++struct ibp_mlx4_resize_cq_cmd {
++ struct ibp_resize_cq_cmd msg;
++ struct mlx4_ib_resize_cq data;
++};
++
++int ibp_mlx4_alloc_dbrec(struct ibp_mlx4_device *dev,
++ struct ibp_mlx4_dbrec *db, int order);
++void ibp_mlx4_free_dbrec(struct ibp_mlx4_device *dev,
++ struct ibp_mlx4_dbrec *db);
++
++int ibp_mlx4_alloc_srq_buf(struct ibp_mlx4_srq *srq, int desc_size);
++void ibp_mlx4_free_srq_buf(struct ibp_mlx4_srq *srq);
++void ibp_mlx4_free_srq_wqe(struct ibp_mlx4_srq *srq, int wqe_index);
++int ibp_mlx4_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
++ struct ib_recv_wr **bad_wr);
++
++int ibp_mlx4_alloc_cq_buf(void **buf, int nent, int entry_size);
++void ibp_mlx4_route_cq_comp(struct ib_cq *ibcq);
++int ibp_mlx4_get_outstanding_cqes(struct ibp_mlx4_cq *cq);
++void ibp_mlx4_cq_resize_copy_cqes(struct ibp_mlx4_cq *cq, void *new_buf);
++void __ibp_mlx4_cq_clean(struct ibp_mlx4_cq *cq, u32 qpn,
++ struct ibp_mlx4_srq *srq);
++void ibp_mlx4_cq_clean(struct ibp_mlx4_cq *cq, u32 qpn,
++ struct ibp_mlx4_srq *srq);
++void ibp_mlx4_lock_cqs(struct ibp_mlx4_cq *send_cq,
++ struct ibp_mlx4_cq *recv_cq);
++void ibp_mlx4_unlock_cqs(struct ibp_mlx4_cq *send_cq,
++ struct ibp_mlx4_cq *recv_cq);
++int ibp_mlx4_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
++int ibp_mlx4_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
++
++int ibp_mlx4_set_kernel_sq_size(struct ibp_mlx4_device *dev,
++ struct ib_qp_cap *cap, enum ib_qp_type type,
++ struct ibp_mlx4_qp *qp);
++void ibp_mlx4_qp_init_sq_ownership(struct ibp_mlx4_qp *qp);
++
++int ibp_mlx4_alloc_qp_buf(struct ibp_mlx4_qp *qp);
++void ibp_mlx4_free_qp_buf(struct ibp_mlx4_qp *qp);
++int ibp_mlx4_insert_qp(struct ibp_mlx4_device *dev, struct ibp_mlx4_qp *qp);
++void ibp_mlx4_remove_qp(struct ibp_mlx4_device *dev, struct ibp_mlx4_qp *qp);
++void ibp_mlx4_free_qp_buf(struct ibp_mlx4_qp *qp);
++struct ibp_mlx4_qp *ibp_mlx4_qp_lookup(struct ibp_mlx4_device *dev, u32 qpn);
++
++int ibp_mlx4_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
++ struct ib_send_wr **bad_wr);
++int ibp_mlx4_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
++ struct ib_recv_wr **bad_wr);
++#endif /* MAIN_H */
+diff --git a/drivers/infiniband/ibp/drv/hw/mlx4/qp.c b/drivers/infiniband/ibp/drv/hw/mlx4/qp.c
+new file mode 100644
+index 0000000..72bb88a
+--- /dev/null
++++ b/drivers/infiniband/ibp/drv/hw/mlx4/qp.c
+@@ -0,0 +1,771 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "main.h"
++
++enum {
++ /*
++ * Largest possible UD header: send with GRH and immediate data.
++ */
++ MLX4_IB_UD_HEADER_SIZE = 72,
++ MLX4_IB_MAX_RAW_ETY_HDR_SIZE = 12
++};
++
++enum {
++ MLX4_IB_CACHE_LINE_SIZE = 64,
++};
++
++static const __be32 mlx4_ib_opcode[] = {
++ [IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND),
++ [IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO),
++ [IB_WR_SEND_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_SEND_IMM),
++ [IB_WR_RDMA_WRITE] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
++ [IB_WR_RDMA_WRITE_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
++ [IB_WR_RDMA_READ] = cpu_to_be32(MLX4_OPCODE_RDMA_READ),
++ [IB_WR_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
++ [IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
++ [IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
++ [IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
++ [IB_WR_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR),
++};
++
++static int send_wqe_overhead(enum ib_qp_type type, u32 flags)
++{
++ /*
++ * UD WQEs must have a datagram segment.
++ * RC and UC WQEs might have a remote address segment.
++ * MLX WQEs need two extra inline data segments (for the UD
++ * header and space for the ICRC).
++ */
++ switch (type) {
++ case IB_QPT_UD:
++ return sizeof(struct mlx4_wqe_ctrl_seg) +
++ sizeof(struct mlx4_wqe_datagram_seg) +
++ ((flags & MLX4_IB_QP_LSO) ? 128 : 0);
++ case IB_QPT_UC:
++ return sizeof(struct mlx4_wqe_ctrl_seg) +
++ sizeof(struct mlx4_wqe_raddr_seg);
++ case IB_QPT_RC:
++ return sizeof(struct mlx4_wqe_ctrl_seg) +
++ sizeof(struct mlx4_wqe_atomic_seg) +
++ sizeof(struct mlx4_wqe_raddr_seg);
++ case IB_QPT_SMI:
++ case IB_QPT_GSI:
++ return sizeof(struct mlx4_wqe_ctrl_seg) +
++ ALIGN(MLX4_IB_UD_HEADER_SIZE +
++ DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE,
++ MLX4_INLINE_ALIGN) *
++ sizeof(struct mlx4_wqe_inline_seg),
++ sizeof(struct mlx4_wqe_data_seg)) +
++ ALIGN(4 +
++ sizeof(struct mlx4_wqe_inline_seg),
++ sizeof(struct mlx4_wqe_data_seg));
++ case IB_QPT_RAW_ETHERTYPE:
++ return sizeof(struct mlx4_wqe_ctrl_seg) +
++ ALIGN(MLX4_IB_MAX_RAW_ETY_HDR_SIZE +
++ sizeof(struct mlx4_wqe_inline_seg),
++ sizeof(struct mlx4_wqe_data_seg));
++
++ default:
++ return sizeof(struct mlx4_wqe_ctrl_seg);
++ }
++}
++
++int ibp_mlx4_set_kernel_sq_size(struct ibp_mlx4_device *dev,
++ struct ib_qp_cap *cap, enum ib_qp_type type,
++ struct ibp_mlx4_qp *qp)
++{
++ int s;
++
++ s = max(cap->max_send_sge * sizeof(struct mlx4_wqe_data_seg),
++ cap->max_inline_data + sizeof(struct mlx4_wqe_inline_seg)) +
++ send_wqe_overhead(type, qp->flags);
++
++ qp->sq.wqe_shift = ilog2(64);
++
++ for (;;) {
++ qp->sq_max_wqes_per_wr = DIV_ROUND_UP(s,
++ 1U << qp->sq.wqe_shift);
++
++ /*
++ * We need to leave 2 KB + 1 WR of headroom in the SQ to
++ * allow HW to prefetch.
++ */
++ qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) +
++ qp->sq_max_wqes_per_wr;
++ qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr *
++ qp->sq_max_wqes_per_wr +
++ qp->sq_spare_wqes);
++
++ if (qp->sq.wqe_cnt <= dev->max_wqes)
++ break;
++
++ if (qp->sq_max_wqes_per_wr <= 1)
++ return -EINVAL;
++
++ ++qp->sq.wqe_shift;
++ }
++
++ qp->sq.max_gs = ((qp->sq_max_wqes_per_wr << qp->sq.wqe_shift) -
++ send_wqe_overhead(type, qp->flags)) /
++ sizeof(struct mlx4_wqe_data_seg);
++
++ cap->max_send_wr = qp->sq.max_post =
++ (qp->sq.wqe_cnt - qp->sq_spare_wqes) / qp->sq_max_wqes_per_wr;
++ cap->max_send_sge = min(qp->sq.max_gs, dev->max_sge);
++ qp->max_inline_data = cap->max_inline_data;
++
++ return 0;
++}
++
++int ibp_mlx4_alloc_qp_buf(struct ibp_mlx4_qp *qp)
++{
++ int size;
++
++ size = PAGE_ALIGN((qp->rq.wqe_cnt << qp->rq.wqe_shift) +
++ (qp->sq.wqe_cnt << qp->sq.wqe_shift));
++
++ qp->wqe_buf = kzalloc(size, GFP_KERNEL);
++ if (!qp->wqe_buf)
++ return -ENOMEM;
++
++ if (qp->rq.wqe_shift > qp->sq.wqe_shift) {
++ qp->rq.offset = 0;
++ qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
++ } else {
++ qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift;
++ qp->sq.offset = 0;
++ }
++
++ qp->sq.wrid = kzalloc(qp->sq.wqe_cnt * sizeof(u64), GFP_KERNEL);
++ if (!qp->sq.wrid)
++ goto err0;
++
++ qp->rq.wrid = kzalloc(qp->rq.wqe_cnt * sizeof(u64), GFP_KERNEL);
++ if (!qp->rq.wrid)
++ goto err1;
++
++ return 0;
++err1:
++ kfree(qp->sq.wrid);
++err0:
++ kfree(qp->wqe_buf);
++ return -ENOMEM;
++}
++
++void ibp_mlx4_free_qp_buf(struct ibp_mlx4_qp *qp)
++{
++ kfree(qp->wqe_buf);
++ kfree(qp->sq.wrid);
++ kfree(qp->rq.wrid);
++}
++
++int ibp_mlx4_insert_qp(struct ibp_mlx4_device *dev, struct ibp_mlx4_qp *qp)
++{
++ int ret;
++
++ spin_lock_irq(&dev->qp_table_lock);
++ ret = radix_tree_insert(&dev->qp_table_tree,
++ qp->ibqp.qp_num & (dev->num_qps - 1), qp);
++ spin_unlock_irq(&dev->qp_table_lock);
++
++ return ret;
++}
++
++void ibp_mlx4_remove_qp(struct ibp_mlx4_device *dev, struct ibp_mlx4_qp *qp)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&dev->qp_table_lock, flags);
++ radix_tree_delete(&dev->qp_table_tree,
++ qp->ibqp.qp_num & (dev->num_qps - 1));
++ spin_unlock_irqrestore(&dev->qp_table_lock, flags);
++}
++
++struct ibp_mlx4_qp *ibp_mlx4_qp_lookup(struct ibp_mlx4_device *dev, u32 qpn)
++{
++ unsigned long flags;
++ struct ibp_mlx4_qp *qp;
++
++ spin_lock_irqsave(&dev->qp_table_lock, flags);
++ qp = radix_tree_lookup(&dev->qp_table_tree,
++ qpn & (dev->num_qps - 1));
++ spin_unlock_irqrestore(&dev->qp_table_lock, flags);
++
++ return qp;
++}
++
++static inline void *ibp_mlx4_buf_offset(void *buf, int offset)
++{
++ return buf + offset;
++}
++
++static void *get_wqe(struct ibp_mlx4_qp *qp, int offset)
++{
++ return ibp_mlx4_buf_offset(qp->wqe_buf, offset);
++}
++
++static void *get_recv_wqe(struct ibp_mlx4_qp *qp, int n)
++{
++ return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
++}
++
++static void *get_send_wqe(struct ibp_mlx4_qp *qp, int n)
++{
++ return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift));
++}
++
++/*
++ * Stamp a SQ WQE so that it is invalid if prefetched by marking the
++ * first four bytes of every 64 byte chunk with
++ * 0x7FFFFFF | (invalid_ownership_value << 31).
++ *
++ * When the max work request size is less than or equal to the WQE
++ * basic block size, as an optimization, we can stamp all WQEs with
++ * 0xffffffff, and skip the very first chunk of each WQE.
++ */
++static void stamp_send_wqe(struct ibp_mlx4_qp *qp, int n, int size)
++{
++ __be32 *wqe;
++ int i;
++ int s;
++ int ind;
++ void *buf;
++ __be32 stamp;
++ struct mlx4_wqe_ctrl_seg *ctrl;
++
++ if (qp->sq_max_wqes_per_wr > 1) {
++ s = roundup(size, 1U << qp->sq.wqe_shift);
++ for (i = 0; i < s; i += 64) {
++ ind = (i >> qp->sq.wqe_shift) + n;
++ stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) :
++ cpu_to_be32(0xffffffff);
++ buf = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
++ wqe = buf + (i & ((1 << qp->sq.wqe_shift) - 1));
++ *wqe = stamp;
++ }
++ } else {
++ ctrl = buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
++ s = (ctrl->qpn_vlan.fence_size & 0x3f) << 4;
++ for (i = 64; i < s; i += 64) {
++ wqe = buf + i;
++ *wqe = cpu_to_be32(0xffffffff);
++ }
++ }
++}
++
++void ibp_mlx4_qp_init_sq_ownership(struct ibp_mlx4_qp *qp)
++{
++ struct mlx4_wqe_ctrl_seg *ctrl;
++ int i;
++
++ for (i = 0; i < qp->sq.wqe_cnt; ++i) {
++ ctrl = get_send_wqe(qp, i);
++ ctrl->owner_opcode = cpu_to_be32(1 << 31);
++ if (qp->sq_max_wqes_per_wr == 1)
++ ctrl->qpn_vlan.fence_size = 1 << (qp->sq.wqe_shift - 4);
++
++ stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift);
++ }
++}
++
++static void post_nop_wqe(struct ibp_mlx4_qp *qp, int n, int size)
++{
++ struct mlx4_wqe_ctrl_seg *ctrl;
++ struct mlx4_wqe_inline_seg *inl;
++ void *wqe;
++ int s;
++
++ ctrl = wqe = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
++ s = sizeof(struct mlx4_wqe_ctrl_seg);
++
++ if (qp->ibqp.qp_type == IB_QPT_UD) {
++ struct mlx4_wqe_datagram_seg *dgram = wqe + sizeof(*ctrl);
++ struct mlx4_av *av = (struct mlx4_av *)dgram->av;
++ memset(dgram, 0, sizeof(*dgram));
++ av->port_pd = cpu_to_be32((qp->port << 24) |
++ to_pd(qp->ibqp.pd)->pdn);
++ s += sizeof(struct mlx4_wqe_datagram_seg);
++ }
++
++ /* Pad the remainder of the WQE with an inline data segment. */
++ if (size > s) {
++ inl = wqe + s;
++ inl->byte_count = cpu_to_be32(1 << 31 |
++ (size - s - sizeof(*inl)));
++ }
++ ctrl->srcrb_flags = 0;
++ ctrl->qpn_vlan.fence_size = size / 16;
++ /*
++ * Make sure descriptor is fully written before setting ownership bit
++ * (because HW can start executing as soon as we do).
++ */
++ wmb();
++
++ ctrl->owner_opcode = cpu_to_be32(MLX4_OPCODE_NOP | MLX4_WQE_CTRL_NEC) |
++ (n & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
++
++ stamp_send_wqe(qp, n + qp->sq_spare_wqes, size);
++}
++
++/* Post NOP WQE to prevent wrap-around in the middle of WR */
++static inline unsigned pad_wraparound(struct ibp_mlx4_qp *qp, int ind)
++{
++ unsigned s = qp->sq.wqe_cnt - (ind & (qp->sq.wqe_cnt - 1));
++ if (unlikely(s < qp->sq_max_wqes_per_wr)) {
++ post_nop_wqe(qp, ind, s << qp->sq.wqe_shift);
++ ind += s;
++ }
++ return ind;
++}
++
++static int ibp_mlx4_wq_overflow(struct ibp_mlx4_wq *wq, int nreq,
++ struct ib_cq *ibcq)
++{
++ unsigned cur;
++ struct ibp_mlx4_cq *cq;
++
++ cur = wq->head - wq->tail;
++ if (likely(cur + nreq < wq->max_post))
++ return 0;
++
++ cq = to_cq(ibcq);
++ spin_lock(&cq->lock);
++ cur = wq->head - wq->tail;
++ spin_unlock(&cq->lock);
++
++ return cur + nreq >= wq->max_post;
++}
++
++static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
++{
++ memset(iseg, 0, sizeof(*iseg));
++ iseg->mem_key = cpu_to_be32(rkey);
++}
++
++static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
++ u64 remote_addr, u32 rkey)
++{
++ rseg->raddr = cpu_to_be64(remote_addr);
++ rseg->rkey = cpu_to_be32(rkey);
++ rseg->reserved = 0;
++}
++
++static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg,
++ struct ib_atomic_wr *wr)
++{
++ if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
++ aseg->swap_add = cpu_to_be64(wr->swap);
++ aseg->compare = cpu_to_be64(wr->compare_add);
++ } else {
++ aseg->swap_add = cpu_to_be64(wr->compare_add);
++ aseg->compare = 0;
++ }
++}
++
++static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
++ struct ib_ud_wr *wr)
++{
++ memcpy(dseg->av, &to_ah(wr->ah)->av, sizeof(struct mlx4_av));
++ dseg->dqpn = cpu_to_be32(wr->remote_qpn);
++ dseg->qkey = cpu_to_be32(wr->remote_qkey);
++}
++
++static void set_mlx_icrc_seg(void *dseg)
++{
++ u32 *t = dseg;
++ struct mlx4_wqe_inline_seg *iseg = dseg;
++
++ t[1] = 0;
++
++ /*
++ * Need a barrier here before writing the byte_count field to
++ * make sure that all the data is visible before the
++ * byte_count field is set. Otherwise, if the segment begins
++ * a new cacheline, the HCA prefetcher could grab the 64-byte
++ * chunk and get a valid (!= * 0xffffffff) byte count but
++ * stale data, and end up sending the wrong data.
++ */
++ wmb();
++
++ iseg->byte_count = cpu_to_be32((1 << 31) | 4);
++}
++
++static void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
++{
++ dseg->lkey = cpu_to_be32(sg->lkey);
++ dseg->addr = cpu_to_be64(sg->addr);
++
++ /*
++ * Need a barrier here before writing the byte_count field to
++ * make sure that all the data is visible before the
++ * byte_count field is set. Otherwise, if the segment begins
++ * a new cacheline, the HCA prefetcher could grab the 64-byte
++ * chunk and get a valid (!= * 0xffffffff) byte count but
++ * stale data, and end up sending the wrong data.
++ */
++ wmb();
++
++ dseg->byte_count = cpu_to_be32(sg->length);
++}
++
++static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
++{
++ dseg->byte_count = cpu_to_be32(sg->length);
++ dseg->lkey = cpu_to_be32(sg->lkey);
++ dseg->addr = cpu_to_be64(sg->addr);
++}
++
++static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_ud_wr *wr,
++ struct ibp_mlx4_qp *qp, unsigned *lso_seg_len,
++ __be32 *lso_hdr_sz, __be32 *blh)
++{
++ unsigned halign = ALIGN(sizeof(*wqe) + wr->hlen, 16);
++
++ if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE))
++ *blh = cpu_to_be32(1 << 6);
++
++ if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
++ wr->wr.num_sge > qp->sq.max_gs - (halign >> 4)))
++ return -EINVAL;
++
++ memcpy(wqe->header, wr->header, wr->hlen);
++
++ *lso_hdr_sz = cpu_to_be32(wr->mss << 16 | wr->hlen);
++ *lso_seg_len = halign;
++ return 0;
++}
++
++static __be32 send_ieth(struct ib_send_wr *wr)
++{
++ switch (wr->opcode) {
++ case IB_WR_SEND_WITH_IMM:
++ case IB_WR_RDMA_WRITE_WITH_IMM:
++ return wr->ex.imm_data;
++
++ case IB_WR_SEND_WITH_INV:
++ return cpu_to_be32(wr->ex.invalidate_rkey);
++
++ default:
++ return 0;
++ }
++}
++
++int ibp_mlx4_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
++ struct ib_send_wr **bad_wr)
++{
++ struct ibp_mlx4_qp *qp = to_qp(ibqp);
++ void *wqe;
++ struct mlx4_wqe_ctrl_seg *ctrl;
++ struct mlx4_wqe_data_seg *dseg = NULL;
++ unsigned long flags;
++ int nreq;
++ int err = 0;
++ unsigned ind;
++ int uninitialized_var(stamp);
++ int uninitialized_var(size);
++ unsigned uninitialized_var(seglen);
++ __be32 dummy;
++ __be32 *lso_wqe;
++ __be32 uninitialized_var(lso_hdr_sz);
++ __be32 blh;
++ int i;
++
++ spin_lock_irqsave(&qp->sq.lock, flags);
++
++ ind = qp->sq_next_wqe;
++
++ for (nreq = 0; wr; ++nreq, wr = wr->next) {
++ lso_wqe = &dummy;
++ blh = 0;
++
++ if (ibp_mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
++ err = -ENOMEM;
++ *bad_wr = wr;
++ goto out;
++ }
++
++ if (unlikely(wr->num_sge > qp->sq.max_gs)) {
++ err = -EINVAL;
++ *bad_wr = wr;
++ goto out;
++ }
++
++ ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
++ if (IS_ERR(ctrl)) {
++ err = -EINVAL;
++ *bad_wr = wr;
++ goto out;
++ }
++
++ qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)]
++ = wr->wr_id;
++
++ ctrl->srcrb_flags =
++ (wr->send_flags & IB_SEND_SIGNALED ?
++ cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |
++ (wr->send_flags & IB_SEND_SOLICITED ?
++ cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) |
++ ((wr->send_flags & IB_SEND_IP_CSUM) ?
++ cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
++ MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) |
++ qp->sq_signal_bits;
++
++ ctrl->imm = send_ieth(wr);
++
++ wqe += sizeof(*ctrl);
++ size = sizeof(*ctrl) / 16;
++
++ switch (ibqp->qp_type) {
++ case IB_QPT_RC:
++ case IB_QPT_UC:
++ switch (wr->opcode) {
++ case IB_WR_ATOMIC_CMP_AND_SWP:
++ case IB_WR_ATOMIC_FETCH_AND_ADD:
++ set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
++ atomic_wr(wr)->rkey);
++ wqe += sizeof(struct mlx4_wqe_raddr_seg);
++
++ set_atomic_seg(wqe, atomic_wr(wr));
++ wqe += sizeof(struct mlx4_wqe_atomic_seg);
++
++ size +=
++ (sizeof(struct mlx4_wqe_raddr_seg) +
++ sizeof(struct mlx4_wqe_atomic_seg)) / 16;
++
++ break;
++
++ case IB_WR_RDMA_READ:
++ case IB_WR_RDMA_WRITE:
++ case IB_WR_RDMA_WRITE_WITH_IMM:
++ set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
++ rdma_wr(wr)->rkey);
++ wqe += sizeof(struct mlx4_wqe_raddr_seg);
++ size += sizeof(struct mlx4_wqe_raddr_seg) / 16;
++ break;
++
++ case IB_WR_LOCAL_INV:
++ ctrl->srcrb_flags |=
++ cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
++ set_local_inv_seg(wqe, wr->ex.invalidate_rkey);
++ wqe += sizeof(struct mlx4_wqe_local_inval_seg);
++ size += sizeof(struct mlx4_wqe_local_inval_seg)
++ / 16;
++ break;
++
++ case IB_WR_REG_MR:
++ /* FMR is not yet supported on MIC */
++ err = -EINVAL;
++ *bad_wr = wr;
++ goto out;
++
++ default:
++ /* No extra segments required for sends */
++ break;
++ }
++ break;
++
++ case IB_QPT_UD:
++ set_datagram_seg(wqe, ud_wr(wr));
++ wqe += sizeof(struct mlx4_wqe_datagram_seg);
++ size += sizeof(struct mlx4_wqe_datagram_seg) / 16;
++
++ if (wr->opcode == IB_WR_LSO) {
++ err = build_lso_seg(wqe, ud_wr(wr), qp, &seglen,
++ &lso_hdr_sz, &blh);
++ if (unlikely(err)) {
++ *bad_wr = wr;
++ goto out;
++ }
++ lso_wqe = (__be32 *) wqe;
++ wqe += seglen;
++ size += seglen / 16;
++ }
++ break;
++
++ default:
++ break;
++ }
++
++ /*
++ * Write data segments in reverse order, so as to overwrite
++ * cacheline stamp last within each cacheline. This avoids
++ * issues with WQE prefetching.
++ */
++
++ dseg = wqe;
++ dseg += wr->num_sge - 1;
++ size += wr->num_sge * (sizeof(struct mlx4_wqe_data_seg) / 16);
++
++ /* Add one more inline data segment for ICRC for MLX sends */
++ if (unlikely(qp->ibqp.qp_type == IB_QPT_SMI ||
++ qp->ibqp.qp_type == IB_QPT_GSI)) {
++ set_mlx_icrc_seg(dseg + 1);
++ size += sizeof(struct mlx4_wqe_data_seg) / 16;
++ }
++
++ for (i = wr->num_sge - 1; i >= 0; --i, --dseg)
++ set_data_seg(dseg, wr->sg_list + i);
++
++ /*
++ * Possibly overwrite stamping in cacheline with LSO segment
++ * only after making sure all data segments are written.
++ */
++ wmb();
++ *lso_wqe = lso_hdr_sz;
++
++ ctrl->qpn_vlan.fence_size = (wr->send_flags & IB_SEND_FENCE ?
++ MLX4_WQE_CTRL_FENCE : 0) | size;
++
++ /*
++ * Make sure descriptor is fully written before setting
++ * ownership bit (as HW can start executing as soon as we do).
++ */
++ wmb();
++
++ if (wr->opcode < 0 ||
++ wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) {
++ err = -EINVAL;
++ goto out;
++ }
++
++ ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
++ (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0)
++ | blh;
++
++ stamp = ind + qp->sq_spare_wqes;
++ ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);
++
++ /*
++ * We can improve latency by not stamping the last send queue
++ * WQE until after ringing the doorbell, so only stamp here if
++ * there are still more WQEs to post.
++ *
++ * Same optimization applies to padding with NOP wqe in case of
++ * WQE shrinking (to prevent wrap-around in the middle of WR).
++ */
++ if (wr->next) {
++ stamp_send_wqe(qp, stamp, size * 16);
++ ind = pad_wraparound(qp, ind);
++ }
++ }
++
++out:
++ if (likely(nreq)) {
++ qp->sq.head += nreq;
++
++ /*
++ * Make sure that descriptors are written before db record.
++ */
++ wmb();
++
++ writel(qp->doorbell_qpn,
++ to_device(ibqp->device)->uar_map->addr
++ + MLX4_SEND_DOORBELL);
++
++ /*
++ * Make sure doorbells don't leak out of SQ spinlock
++ * and reach the HCA out of order.
++ */
++ mmiowb();
++
++ stamp_send_wqe(qp, stamp, size * 16);
++
++ ind = pad_wraparound(qp, ind);
++ qp->sq_next_wqe = ind;
++ }
++
++ spin_unlock_irqrestore(&qp->sq.lock, flags);
++
++ return err;
++}
++
++int ibp_mlx4_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
++ struct ib_recv_wr **bad_wr)
++{
++ struct ibp_mlx4_qp *qp = to_qp(ibqp);
++ struct mlx4_wqe_data_seg *scat;
++ unsigned long flags;
++ int err = 0;
++ int nreq;
++ int ind;
++ int i;
++
++ spin_lock_irqsave(&qp->rq.lock, flags);
++
++ ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
++
++ for (nreq = 0; wr; ++nreq, wr = wr->next) {
++ if (ibp_mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
++ err = -ENOMEM;
++ *bad_wr = wr;
++ goto out;
++ }
++
++ if (unlikely(wr->num_sge > qp->rq.max_gs)) {
++ err = -EINVAL;
++ *bad_wr = wr;
++ goto out;
++ }
++
++ scat = get_recv_wqe(qp, ind);
++
++ for (i = 0; i < wr->num_sge; ++i)
++ __set_data_seg(scat + i, wr->sg_list + i);
++
++ if (i < qp->rq.max_gs) {
++ scat[i].byte_count = 0;
++ scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY);
++ scat[i].addr = 0;
++ }
++
++ qp->rq.wrid[ind] = wr->wr_id;
++
++ ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
++ }
++
++out:
++ if (likely(nreq)) {
++ qp->rq.head += nreq;
++
++ /*
++ * Make sure that descriptors are written before db record.
++ */
++ wmb();
++
++ *(qp->dbrec.db) = cpu_to_be32(qp->rq.head & 0xffff);
++ }
++
++ spin_unlock_irqrestore(&qp->rq.lock, flags);
++
++ return err;
++}
+diff --git a/drivers/infiniband/ibp/drv/hw/mlx4/srq.c b/drivers/infiniband/ibp/drv/hw/mlx4/srq.c
+new file mode 100644
+index 0000000..926bfe2
+--- /dev/null
++++ b/drivers/infiniband/ibp/drv/hw/mlx4/srq.c
+@@ -0,0 +1,160 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "main.h"
++
++static void *ibp_mlx4_get_srq_wqe(struct ibp_mlx4_srq *srq, int n)
++{
++ return srq->wqe_buf + (n << srq->wqe_shift);
++}
++
++int ibp_mlx4_alloc_srq_buf(struct ibp_mlx4_srq *srq, int desc_size)
++{
++ struct mlx4_wqe_srq_next_seg *next;
++ struct mlx4_wqe_data_seg *scatter;
++ int size, i;
++
++ srq->wrid = kzalloc(srq->max * sizeof(u64), GFP_KERNEL);
++ if (!srq->wrid)
++ return -ENOMEM;
++
++ size = ALIGN(srq->max * desc_size, PAGE_SIZE);
++ srq->wqe_buf = kzalloc(size, GFP_KERNEL);
++ if (!srq->wqe_buf) {
++ kfree(srq->wrid);
++ return -ENOMEM;
++ }
++
++ srq->head = 0;
++ srq->tail = srq->max - 1;
++ srq->wqe_ctr = 0;
++
++ for (i = 0; i < srq->max; ++i) {
++ next = ibp_mlx4_get_srq_wqe(srq, i);
++ next->next_wqe_index = cpu_to_be16((i + 1) & (srq->max - 1));
++
++ for (scatter = (void *)(next + 1);
++ (void *)scatter < (void *)next + desc_size;
++ ++scatter)
++ scatter->lkey = cpu_to_be32(MLX4_INVALID_LKEY);
++ }
++
++ return 0;
++}
++
++void ibp_mlx4_free_srq_buf(struct ibp_mlx4_srq *srq)
++{
++ kfree(srq->wrid);
++ kfree(srq->wqe_buf);
++}
++
++static void *get_wqe(struct ibp_mlx4_srq *srq, int n)
++{
++ return srq->wqe_buf + (n << srq->wqe_shift);
++}
++
++void ibp_mlx4_free_srq_wqe(struct ibp_mlx4_srq *srq, int wqe_index)
++{
++ struct mlx4_wqe_srq_next_seg *next;
++
++ /* always called with interrupts disabled. */
++ spin_lock(&srq->lock);
++
++ next = get_wqe(srq, srq->tail);
++ next->next_wqe_index = cpu_to_be16(wqe_index);
++ srq->tail = wqe_index;
++
++ spin_unlock(&srq->lock);
++}
++
++int ibp_mlx4_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
++ struct ib_recv_wr **bad_wr)
++{
++ struct ibp_mlx4_srq *srq = to_srq(ibsrq);
++ struct mlx4_wqe_srq_next_seg *next;
++ struct mlx4_wqe_data_seg *scat;
++ unsigned long flags;
++ int err = 0;
++ int nreq;
++ int i;
++
++ spin_lock_irqsave(&srq->lock, flags);
++
++ for (nreq = 0; wr; ++nreq, wr = wr->next) {
++ if (unlikely(wr->num_sge > srq->max_gs)) {
++ err = -EINVAL;
++ *bad_wr = wr;
++ break;
++ }
++
++ if (unlikely(srq->head == srq->tail)) {
++ err = -ENOMEM;
++ *bad_wr = wr;
++ break;
++ }
++
++ srq->wrid[srq->head] = wr->wr_id;
++
++ next = ibp_mlx4_get_srq_wqe(srq, srq->head);
++ srq->head = be16_to_cpu(next->next_wqe_index);
++ scat = (struct mlx4_wqe_data_seg *) (next + 1);
++
++ for (i = 0; i < wr->num_sge; ++i) {
++ scat[i].byte_count =
++ cpu_to_be32(wr->sg_list[i].length);
++ scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey);
++ scat[i].addr = cpu_to_be64(wr->sg_list[i].addr);
++ }
++
++ if (i < srq->max_gs) {
++ scat[i].byte_count = 0;
++ scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY);
++ scat[i].addr = 0;
++ }
++ }
++
++ if (likely(nreq)) {
++ srq->wqe_ctr += nreq;
++
++ /*
++ * Make sure that descriptors are written before
++ * doorbell record.
++ */
++ wmb();
++
++ *srq->dbrec.db = cpu_to_be32(srq->wqe_ctr);
++ }
++
++ spin_unlock_irqrestore(&srq->lock, flags);
++
++ return err;
++}
+diff --git a/drivers/infiniband/ibp/drv/hw/mlx5/Kconfig b/drivers/infiniband/ibp/drv/hw/mlx5/Kconfig
+new file mode 100644
+index 0000000..25b180f
+--- /dev/null
++++ b/drivers/infiniband/ibp/drv/hw/mlx5/Kconfig
+@@ -0,0 +1,9 @@
++config IBP_MLX5
++ tristate "CCL Direct Mellanox Connect-IB HCA client support"
++ depends on IBP_CLIENT
++ ---help---
++ This driver provides low-level InfiniBand support for
++ Mellanox Connect-IB PCI Express host channel adapters (HCAs).
++
++ To compile this driver as a module, choose M here.
++ If unsure, say N.
+diff --git a/drivers/infiniband/ibp/drv/hw/mlx5/Makefile b/drivers/infiniband/ibp/drv/hw/mlx5/Makefile
+new file mode 100644
+index 0000000..aaf01fd
+--- /dev/null
++++ b/drivers/infiniband/ibp/drv/hw/mlx5/Makefile
+@@ -0,0 +1,9 @@
++obj-$(CONFIG_IBP_MLX5) += ibp_mlx5.o
++
++ccflags-y := -Idrivers/infiniband/ibp/drv/ -Idrivers/infiniband/
++
++ibp_mlx5-y := main.o \
++ dbrec.o \
++ srq.o \
++ cq.o \
++ qp.o
+diff --git a/drivers/infiniband/ibp/drv/hw/mlx5/cq.c b/drivers/infiniband/ibp/drv/hw/mlx5/cq.c
+new file mode 100644
+index 0000000..7f49cde
+--- /dev/null
++++ b/drivers/infiniband/ibp/drv/hw/mlx5/cq.c
+@@ -0,0 +1,638 @@
++/*
++ * Copyright (c) 2013-2014 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "main.h"
++
++static void ibp_mlx5_cq_comp_handler(struct ib_cq *ibcq, void *cq_context)
++{
++ struct ibp_mlx5_cq *cq = to_cq(ibcq);
++
++ ++cq->arm_sn;
++ cq->comp(ibcq, cq_context);
++}
++
++void ibp_mlx5_route_cq_comp(struct ib_cq *ibcq)
++{
++ struct ibp_mlx5_cq *cq = to_cq(ibcq);
++
++ spin_lock(&cq->lock);
++ if (ibcq->comp_handler != ibp_mlx5_cq_comp_handler) {
++ cq->comp = ibcq->comp_handler;
++ ibcq->comp_handler = ibp_mlx5_cq_comp_handler;
++ }
++ spin_unlock(&cq->lock);
++}
++
++static void *get_cqe(struct ibp_mlx5_cq *cq, int entry)
++{
++ return cq->cqe_buf + entry * cq->cqe_sz;
++}
++
++static void *get_sw_cqe(struct ibp_mlx5_cq *cq, int n)
++{
++ void *cqe = get_cqe(cq, n & cq->ibcq.cqe);
++ struct mlx5_cqe64 *cqe64;
++
++ cqe64 = (cq->cqe_sz == 64) ? cqe : cqe + 64;
++
++ if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) &&
++ !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) {
++ return cqe;
++ } else {
++ return NULL;
++ }
++}
++
++static struct mlx5_cqe *next_cqe_sw(struct ibp_mlx5_cq *cq)
++{
++ return get_sw_cqe(cq, cq->cons_index);
++}
++
++static void ibp_init_cq_buf(struct ibp_mlx5_cq *cq, int nent)
++{
++ int i;
++ void *cqe;
++ struct mlx5_cqe64 *cqe64;
++
++ for (i = 0; i < nent; ++i) {
++ cqe = get_cqe(cq, i);
++ cqe64 = (cq->cqe_sz == 64) ? cqe : cqe + 64;
++ cqe64->op_own = MLX5_CQE_INVALID << 4;
++ }
++}
++
++int ibp_mlx5_alloc_cq_buf(struct ibp_mlx5_cq *cq, int nent)
++{
++ int size;
++
++ size = ALIGN(nent * cq->cqe_sz, PAGE_SIZE * 2);
++
++ cq->cqe_buf = kzalloc(size, GFP_KERNEL);
++ if (!cq->cqe_buf)
++ return -ENOMEM;
++
++ ibp_init_cq_buf(cq, nent);
++
++ return 0;
++}
++
++void ibp_mlx5_free_cq_buf(struct ibp_mlx5_cq *cq)
++{
++ kfree(cq->cqe_buf);
++}
++
++static inline void ibp_mlx5_cq_set_ci(struct ibp_mlx5_cq *cq)
++{
++ *cq->set_ci_db = cpu_to_be32(cq->cons_index & 0xffffff);
++}
++
++static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn)
++{
++ return rsn == (ntohl(cqe64->sop_drop_qpn) & 0xffffff);
++}
++
++void __ibp_mlx5_cq_clean(struct ibp_mlx5_cq *cq, u32 rsn, struct ibp_mlx5_srq *srq)
++{
++ struct mlx5_cqe64 *cqe64, *dest64;
++ void *cqe, *dest;
++ u32 prod_index;
++ int nfreed = 0;
++ u8 owner_bit;
++
++ if (!cq)
++ return;
++
++ /*
++ * First we need to find the current producer index, so we
++ * know where to start cleaning from. It doesn't matter if HW
++ * adds new entries after this loop -- the QP we're worried
++ * about is already in RESET, so the new entries won't come
++ * from our QP and therefore don't need to be checked.
++ */
++ for (prod_index = cq->cons_index; get_sw_cqe(cq, prod_index); prod_index++)
++ if (prod_index == cq->cons_index + cq->ibcq.cqe)
++ break;
++
++ /*
++ * Now sweep backwards through the CQ, removing CQ entries
++ * that match our QP by copying older entries on top of them.
++ */
++ while ((int) --prod_index - (int) cq->cons_index >= 0) {
++ cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
++ cqe64 = (cq->cqe_sz == 64) ? cqe : cqe + 64;
++ if (is_equal_rsn(cqe64, rsn)) {
++ if (srq && (ntohl(cqe64->srqn) & 0xffffff))
++ ibp_mlx5_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter));
++ ++nfreed;
++ } else if (nfreed) {
++ dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
++ dest64 = (cq->cqe_sz == 64) ? dest : dest + 64;
++ owner_bit = dest64->op_own & MLX5_CQE_OWNER_MASK;
++ memcpy(dest, cqe, cq->cqe_sz);
++ dest64->op_own = owner_bit |
++ (dest64->op_own & ~MLX5_CQE_OWNER_MASK);
++ }
++ }
++
++ if (nfreed) {
++ cq->cons_index += nfreed;
++ /*
++ * Make sure update of buffer contents is done before
++ * updating consumer index.
++ */
++ wmb();
++ ibp_mlx5_cq_set_ci(cq);
++ }
++}
++
++void ibp_mlx5_cq_clean(struct ibp_mlx5_cq *cq, u32 qpn,
++ struct ibp_mlx5_srq *srq)
++{
++ if (!cq)
++ return;
++
++ spin_lock_irq(&cq->lock);
++ __ibp_mlx5_cq_clean(cq, qpn, srq);
++ spin_unlock_irq(&cq->lock);
++}
++
++void ibp_mlx5_lock_cqs(struct ibp_mlx5_cq *send_cq, struct ibp_mlx5_cq *recv_cq)
++ __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
++{
++ if (send_cq) {
++ if (recv_cq) {
++ if (send_cq->cqn < recv_cq->cqn) {
++ spin_lock_irq(&send_cq->lock);
++ spin_lock_nested(&recv_cq->lock,
++ SINGLE_DEPTH_NESTING);
++ } else if (send_cq->cqn == recv_cq->cqn) {
++ spin_lock_irq(&send_cq->lock);
++ __acquire(&recv_cq->lock);
++ } else {
++ spin_lock_irq(&recv_cq->lock);
++ spin_lock_nested(&send_cq->lock,
++ SINGLE_DEPTH_NESTING);
++ }
++ } else {
++ spin_lock_irq(&send_cq->lock);
++ }
++ } else if (recv_cq) {
++ spin_lock_irq(&recv_cq->lock);
++ }
++}
++
++void ibp_mlx5_unlock_cqs(struct ibp_mlx5_cq *send_cq, struct ibp_mlx5_cq *recv_cq)
++ __releases(&send_cq->lock) __releases(&recv_cq->lock)
++{
++ if (send_cq) {
++ if (recv_cq) {
++ if (send_cq->cqn < recv_cq->cqn) {
++ spin_unlock(&recv_cq->lock);
++ spin_unlock_irq(&send_cq->lock);
++ } else if (send_cq->cqn == recv_cq->cqn) {
++ __release(&recv_cq->lock);
++ spin_unlock_irq(&send_cq->lock);
++ } else {
++ spin_unlock(&send_cq->lock);
++ spin_unlock_irq(&recv_cq->lock);
++ }
++ } else {
++ spin_unlock_irq(&send_cq->lock);
++ }
++ } else if (recv_cq) {
++ spin_unlock_irq(&recv_cq->lock);
++ }
++}
++
++static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
++ struct ibp_mlx5_wq *wq, int idx)
++{
++ wc->wc_flags = 0;
++ switch (be32_to_cpu(cqe->sop_drop_qpn) >> 24) {
++ case MLX5_OPCODE_RDMA_WRITE_IMM:
++ wc->wc_flags |= IB_WC_WITH_IMM;
++ case MLX5_OPCODE_RDMA_WRITE:
++ wc->opcode = IB_WC_RDMA_WRITE;
++ break;
++ case MLX5_OPCODE_SEND_IMM:
++ wc->wc_flags |= IB_WC_WITH_IMM;
++ case MLX5_OPCODE_SEND:
++ case MLX5_OPCODE_SEND_INVAL:
++ wc->opcode = IB_WC_SEND;
++ break;
++ case MLX5_OPCODE_RDMA_READ:
++ wc->opcode = IB_WC_RDMA_READ;
++ wc->byte_len = be32_to_cpu(cqe->byte_cnt);
++ break;
++ case MLX5_OPCODE_ATOMIC_CS:
++ wc->opcode = IB_WC_COMP_SWAP;
++ wc->byte_len = 8;
++ break;
++ case MLX5_OPCODE_ATOMIC_FA:
++ wc->opcode = IB_WC_FETCH_ADD;
++ wc->byte_len = 8;
++ break;
++ case MLX5_OPCODE_ATOMIC_MASKED_CS:
++ wc->opcode = IB_WC_MASKED_COMP_SWAP;
++ wc->byte_len = 8;
++ break;
++ case MLX5_OPCODE_ATOMIC_MASKED_FA:
++ wc->opcode = IB_WC_MASKED_FETCH_ADD;
++ wc->byte_len = 8;
++ break;
++ // Not supported
++ //case MLX5_OPCODE_UMR:
++ // wc->opcode = get_umr_comp(wq, idx);
++ // break;
++ }
++}
++
++static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
++ struct ibp_mlx5_qp *qp)
++{
++ struct ibp_mlx5_device *dev = to_device(qp->ibqp.device);
++ struct ibp_mlx5_wq *wq;
++ u16 wqe_ctr;
++ u8 g;
++
++ if (qp->ibqp.srq || qp->ibqp.xrcd) {
++ struct ibp_mlx5_srq *srq = NULL;
++
++ if (qp->ibqp.xrcd) {
++ srq = ibp_mlx5_srq_lookup(dev, be32_to_cpu(cqe->srqn));
++ } else {
++ srq = to_srq(qp->ibqp.srq);
++ }
++ if (srq) {
++ wqe_ctr = be16_to_cpu(cqe->wqe_counter);
++ wc->wr_id = srq->wrid[wqe_ctr];
++ ibp_mlx5_free_srq_wqe(srq, wqe_ctr);
++ }
++ } else {
++ wq = &qp->rq;
++ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
++ ++wq->tail;
++ }
++ wc->byte_len = be32_to_cpu(cqe->byte_cnt);
++
++ switch (cqe->op_own >> 4) {
++ case MLX5_CQE_RESP_WR_IMM:
++ wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
++ wc->wc_flags = IB_WC_WITH_IMM;
++ wc->ex.imm_data = cqe->imm_inval_pkey;
++ break;
++ case MLX5_CQE_RESP_SEND:
++ wc->opcode = IB_WC_RECV;
++ wc->wc_flags = 0;
++ break;
++ case MLX5_CQE_RESP_SEND_IMM:
++ wc->opcode = IB_WC_RECV;
++ wc->wc_flags = IB_WC_WITH_IMM;
++ wc->ex.imm_data = cqe->imm_inval_pkey;
++ break;
++ case MLX5_CQE_RESP_SEND_INV:
++ wc->opcode = IB_WC_RECV;
++ wc->wc_flags = IB_WC_WITH_INVALIDATE;
++ wc->ex.invalidate_rkey = be32_to_cpu(cqe->imm_inval_pkey);
++ break;
++ }
++ wc->slid = be16_to_cpu(cqe->slid);
++ wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf;
++ wc->src_qp = be32_to_cpu(cqe->flags_rqpn) & 0xffffff;
++ wc->dlid_path_bits = cqe->ml_path;
++ g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
++ wc->wc_flags |= g ? IB_WC_GRH : 0;
++ wc->pkey_index = be32_to_cpu(cqe->imm_inval_pkey) & 0xffff;
++}
++
++static void dump_cqe(struct ibp_mlx5_device *dev, struct mlx5_err_cqe *cqe)
++{
++ __be32 *p = (__be32 *)cqe;
++ int i;
++
++ print_info("dump error cqe\n");
++ for (i = 0; i < sizeof(*cqe) / 16; i++, p += 4)
++ pr_info("%08x %08x %08x %08x\n", be32_to_cpu(p[0]),
++ be32_to_cpu(p[1]),
++ be32_to_cpu(p[2]),
++ be32_to_cpu(p[3]));
++}
++
++static void mlx5_handle_error_cqe(struct ibp_mlx5_device *dev,
++ struct mlx5_err_cqe *cqe,
++ struct ib_wc *wc)
++{
++ int dump = 1;
++
++ switch (cqe->syndrome) {
++ case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR:
++ wc->status = IB_WC_LOC_LEN_ERR;
++ break;
++ case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR:
++ wc->status = IB_WC_LOC_QP_OP_ERR;
++ break;
++ case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR:
++ wc->status = IB_WC_LOC_PROT_ERR;
++ break;
++ case MLX5_CQE_SYNDROME_WR_FLUSH_ERR:
++ dump = 0;
++ wc->status = IB_WC_WR_FLUSH_ERR;
++ break;
++ case MLX5_CQE_SYNDROME_MW_BIND_ERR:
++ wc->status = IB_WC_MW_BIND_ERR;
++ break;
++ case MLX5_CQE_SYNDROME_BAD_RESP_ERR:
++ wc->status = IB_WC_BAD_RESP_ERR;
++ break;
++ case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR:
++ wc->status = IB_WC_LOC_ACCESS_ERR;
++ break;
++ case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
++ wc->status = IB_WC_REM_INV_REQ_ERR;
++ break;
++ case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR:
++ wc->status = IB_WC_REM_ACCESS_ERR;
++ break;
++ case MLX5_CQE_SYNDROME_REMOTE_OP_ERR:
++ wc->status = IB_WC_REM_OP_ERR;
++ break;
++ case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
++ wc->status = IB_WC_RETRY_EXC_ERR;
++ dump = 0;
++ break;
++ case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
++ wc->status = IB_WC_RNR_RETRY_EXC_ERR;
++ dump = 0;
++ break;
++ case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR:
++ wc->status = IB_WC_REM_ABORT_ERR;
++ break;
++ default:
++ wc->status = IB_WC_GENERAL_ERR;
++ break;
++ }
++
++ wc->vendor_err = cqe->vendor_err_synd;
++ if (dump)
++ dump_cqe(dev, cqe);
++}
++
++static int is_atomic_response(struct ibp_mlx5_qp *qp, uint16_t idx)
++{
++ /* TBD: waiting decision
++ */
++ return 0;
++}
++
++static void *mlx5_get_atomic_laddr(struct ibp_mlx5_qp *qp, uint16_t idx)
++{
++ struct mlx5_wqe_data_seg *dpseg;
++ void *addr;
++
++ dpseg = ibp_mlx5_get_send_wqe(qp, idx)
++ + sizeof(struct mlx5_wqe_ctrl_seg)
++ + sizeof(struct mlx5_wqe_raddr_seg)
++ + sizeof(struct mlx5_wqe_atomic_seg);
++ addr = (void *)(unsigned long)be64_to_cpu(dpseg->addr);
++ return addr;
++}
++
++static void handle_atomic(struct ibp_mlx5_qp *qp, struct mlx5_cqe64 *cqe64,
++ uint16_t idx)
++{
++ void *addr;
++ int byte_count;
++ int i;
++
++ if (!is_atomic_response(qp, idx))
++ return;
++
++ byte_count = be32_to_cpu(cqe64->byte_cnt);
++ addr = mlx5_get_atomic_laddr(qp, idx);
++
++ if (byte_count == 4) {
++ *(uint32_t *)addr = be32_to_cpu(*((__be32 *)addr));
++ } else {
++ for (i = 0; i < byte_count; i += 8) {
++ *(uint64_t *)addr = be64_to_cpu(*((__be64 *)addr));
++ addr += 8;
++ }
++ }
++
++ return;
++}
++
++static void handle_atomics(struct ibp_mlx5_qp *qp, struct mlx5_cqe64 *cqe64,
++ u16 tail, u16 head)
++{
++ int idx;
++
++ do {
++ idx = tail & (qp->sq.wqe_cnt - 1);
++ handle_atomic(qp, cqe64, idx);
++ if (idx == head)
++ break;
++
++ tail = qp->sq.w_list[idx].next;
++ } while (1);
++ tail = qp->sq.w_list[idx].next;
++ qp->sq.last_poll = tail;
++}
++
++static int mlx5_poll_one(struct ibp_mlx5_cq *cq,
++ struct ibp_mlx5_qp **cur_qp,
++ struct ib_wc *wc)
++{
++ struct ibp_mlx5_device *dev = to_device(cq->ibcq.device);
++ struct mlx5_err_cqe *err_cqe;
++ struct mlx5_cqe64 *cqe64;
++ struct ibp_mlx5_qp *qp;
++ struct ibp_mlx5_wq *wq;
++ u16 wqe_ctr;
++ uint8_t opcode;
++ uint32_t qpn;
++ void *cqe;
++ int idx;
++
++ cqe = next_cqe_sw(cq);
++ if (!cqe)
++ return -EAGAIN;
++
++ cqe64 = (cq->cqe_sz == 64) ? cqe : cqe + 64;
++
++ ++cq->cons_index;
++
++ /*
++ * Make sure we read CQ entry contents after we've checked the
++ * ownership bit.
++ */
++ rmb();
++
++ /* TBD: resize CQ */
++
++ qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
++ if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
++ /*
++ * We do not have to take the QP table lock here,
++ * because CQs will be locked while QPs are removed
++ * from the table.
++ */
++ qp = ibp_mlx5_qp_lookup(dev, qpn);
++ if (unlikely(!qp)) {
++ print_err("CQE@CQ %06x for unknown QPN %6x\n",
++ cq->cqn, qpn);
++ return -EINVAL;
++ }
++
++ *cur_qp = qp;
++ }
++
++ wc->qp = &(*cur_qp)->ibqp;
++ opcode = cqe64->op_own >> 4;
++ switch (opcode) {
++ case MLX5_CQE_REQ:
++ wq = &(*cur_qp)->sq;
++ wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
++ idx = wqe_ctr & (wq->wqe_cnt - 1);
++ handle_good_req(wc, cqe64, wq, idx);
++ handle_atomics(*cur_qp, cqe64, wq->last_poll, idx);
++ wc->wr_id = wq->wrid[idx];
++ wq->tail = wq->wqe_head[idx] + 1;
++ wc->status = IB_WC_SUCCESS;
++ break;
++ case MLX5_CQE_RESP_WR_IMM:
++ case MLX5_CQE_RESP_SEND:
++ case MLX5_CQE_RESP_SEND_IMM:
++ case MLX5_CQE_RESP_SEND_INV:
++ handle_responder(wc, cqe64, *cur_qp);
++ wc->status = IB_WC_SUCCESS;
++ break;
++ case MLX5_CQE_RESIZE_CQ:
++ break;
++ case MLX5_CQE_REQ_ERR:
++ case MLX5_CQE_RESP_ERR:
++ err_cqe = (struct mlx5_err_cqe *)cqe64;
++ mlx5_handle_error_cqe(dev, err_cqe, wc);
++ print_dbg("%s error cqe on cqn 0x%x:\n",
++ opcode == MLX5_CQE_REQ_ERR ?
++ "Requestor" : "Responder", cq->cqn);
++ print_dbg("syndrome 0x%x, vendor syndrome 0x%x\n",
++ err_cqe->syndrome, err_cqe->vendor_err_synd);
++ if (opcode == MLX5_CQE_REQ_ERR) {
++ wq = &(*cur_qp)->sq;
++ wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
++ idx = wqe_ctr & (wq->wqe_cnt - 1);
++ wc->wr_id = wq->wrid[idx];
++ wq->tail = wq->wqe_head[idx] + 1;
++ } else {
++ struct ibp_mlx5_srq *srq;
++
++ if ((*cur_qp)->ibqp.srq) {
++ srq = to_srq((*cur_qp)->ibqp.srq);
++ wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
++ wc->wr_id = srq->wrid[wqe_ctr];
++ ibp_mlx5_free_srq_wqe(srq, wqe_ctr);
++ } else {
++ wq = &(*cur_qp)->rq;
++ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
++ ++wq->tail;
++ }
++ }
++ break;
++ }
++
++ return 0;
++}
++
++int ibp_mlx5_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
++{
++ struct ibp_mlx5_cq *cq = to_cq(ibcq);
++ struct ibp_mlx5_qp *cur_qp = NULL;
++ unsigned long flags;
++ int npolled;
++ int err = 0;
++
++ spin_lock_irqsave(&cq->lock, flags);
++
++ for (npolled = 0; npolled < num_entries; npolled++) {
++ err = mlx5_poll_one(cq, &cur_qp, wc + npolled);
++ if (err)
++ break;
++ }
++
++ if (npolled)
++ ibp_mlx5_cq_set_ci(cq);
++
++ spin_unlock_irqrestore(&cq->lock, flags);
++
++ if (err == 0 || err == -EAGAIN)
++ return npolled;
++ else
++ return err;
++}
++
++static inline void ibp_mlx5_cq_arm(struct ibp_mlx5_cq *cq, u32 cmd,
++ void __iomem *uar_page,
++ spinlock_t *doorbell_lock)
++{
++ __be32 doorbell[2];
++ u32 sn;
++ u32 ci;
++
++ sn = cq->arm_sn & 3;
++ ci = cq->cons_index & 0xffffff;
++
++ *cq->arm_db = cpu_to_be32(sn << 28 | cmd | ci);
++
++ /*
++ * Make sure that the doorbell record in host memory is
++ * written before ringing the doorbell via PCI MMIO.
++ */
++ wmb();
++
++ doorbell[0] = cpu_to_be32(sn << 28 | cmd | ci);
++ doorbell[1] = cpu_to_be32(cq->cqn);
++
++ mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, doorbell_lock);
++}
++
++int ibp_mlx5_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
++{
++ struct ibp_mlx5_device *dev = to_device(ibcq->device);
++ u32 cmd = (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
++ MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT;
++
++ ibp_mlx5_cq_arm(to_cq(ibcq), cmd, dev->uars[0]->addr,
++ MLX5_GET_DOORBELL_LOCK(&dev->uar_lock));
++
++ return 0;
++}
+diff --git a/drivers/infiniband/ibp/drv/hw/mlx5/dbrec.c b/drivers/infiniband/ibp/drv/hw/mlx5/dbrec.c
+new file mode 100644
+index 0000000..23aaeac
+--- /dev/null
++++ b/drivers/infiniband/ibp/drv/hw/mlx5/dbrec.c
+@@ -0,0 +1,126 @@
++/*
++ * Copyright (c) 2013-2014 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "main.h"
++
++static struct ibp_mlx5_db_pgdir *
++ibp_mlx5_alloc_db_pgdir(struct device *dma_device)
++{
++ struct ibp_mlx5_db_pgdir *pgdir;
++
++ pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL);
++ if (!pgdir) {
++ print_err("kzalloc failed\n");
++ return NULL;
++ }
++
++ bitmap_fill(pgdir->bitmap, MLX5_DB_PER_PAGE);
++ pgdir->db_page = (void *)__get_free_page(GFP_KERNEL);
++ if (!pgdir->db_page) {
++ print_err("dma_alloc_coherent failed\n");
++ kfree(pgdir);
++ return NULL;
++ }
++
++ return pgdir;
++}
++
++static int ibp_mlx5_alloc_db_from_pgdir(struct ibp_mlx5_db_pgdir *pgdir,
++ struct ibp_mlx5_dbrec *db)
++{
++ int i;
++ int offset;
++
++ i = find_first_bit(pgdir->bitmap, MLX5_DB_PER_PAGE);
++ if (i >= MLX5_DB_PER_PAGE) {
++ print_err("find_first_bit returned %d > %d\n",
++ i, MLX5_DB_PER_PAGE);
++ return -ENOMEM;
++ }
++
++ clear_bit(i, pgdir->bitmap);
++
++ db->u.pgdir = pgdir;
++ db->index = i;
++ offset = db->index * L1_CACHE_BYTES;
++ db->db = pgdir->db_page + offset / sizeof(*pgdir->db_page);
++ db->dma = pgdir->db_dma + offset;
++
++ db->db[0] = 0;
++ db->db[1] = 0;
++
++ return 0;
++}
++
++int ibp_mlx5_db_alloc(struct ibp_mlx5_device *dev, struct ibp_mlx5_dbrec *db)
++{
++ struct device *device = dev->ibdevice.dma_device;
++ struct ibp_mlx5_db_pgdir *pgdir;
++ int ret = 0;
++
++ mutex_lock(&dev->pgdir_mutex);
++
++ list_for_each_entry(pgdir, &dev->pgdir_list, list)
++ if (!ibp_mlx5_alloc_db_from_pgdir(pgdir, db))
++ goto out;
++
++ pgdir = ibp_mlx5_alloc_db_pgdir(device);
++ if (!pgdir) {
++ print_err("ibp_mlx5_alloc_db_pgdir failed\n");
++ ret = -ENOMEM;
++ goto out;
++ }
++
++ list_add(&pgdir->list, &dev->pgdir_list);
++
++ /* This should never fail -- we just allocated an empty page: */
++ WARN_ON(ibp_mlx5_alloc_db_from_pgdir(pgdir, db));
++out:
++ mutex_unlock(&dev->pgdir_mutex);
++
++ return ret;
++}
++
++void ibp_mlx5_db_free(struct ibp_mlx5_device *dev, struct ibp_mlx5_dbrec *db)
++{
++ mutex_lock(&dev->pgdir_mutex);
++
++ set_bit(db->index, db->u.pgdir->bitmap);
++
++ if (bitmap_full(db->u.pgdir->bitmap, MLX5_DB_PER_PAGE)) {
++ free_page((unsigned long) db->u.pgdir->db_page);
++ list_del(&db->u.pgdir->list);
++ kfree(db->u.pgdir);
++ }
++
++ mutex_unlock(&dev->pgdir_mutex);
++}
+diff --git a/drivers/infiniband/ibp/drv/hw/mlx5/main.c b/drivers/infiniband/ibp/drv/hw/mlx5/main.c
+new file mode 100644
+index 0000000..0ae6927
+--- /dev/null
++++ b/drivers/infiniband/ibp/drv/hw/mlx5/main.c
+@@ -0,0 +1,1460 @@
++/*
++ * Copyright (c) 2013-2014 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ *
++ * $Id: $
++ */
++
++#include "main.h"
++
++MODULE_AUTHOR("Jerrie Coffman");
++MODULE_AUTHOR("Phil Cayton");
++MODULE_AUTHOR("Jay Sternberg");
++MODULE_LICENSE("Dual BSD/GPL");
++MODULE_DESCRIPTION(DRV_DESC);
++MODULE_VERSION(DRV_VERSION);
++
++#ifdef IBP_DEBUG
++MODULE_PARAM(debug_level, debug_level, int, 0, "Debug: 0-none, 1-some, 2-all");
++#endif
++
++int ibp_mlx5_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
++ struct ib_udata *udata)
++{
++ struct ibp_mlx5_device *dev = to_device(ibdev);
++ int ret;
++
++ print_trace("in\n");
++
++ if (udata->inlen || udata->outlen)
++ return -EINVAL;
++
++ ret = ibp_cmd_query_device(dev->ibpdev, attr);
++ if (ret)
++ print_err("ibp_cmd_query_device error %d\n", ret);
++
++ /* A local_dma_lkey has not been created. */
++ ibdev->uverbs_cmd_mask &= ~IB_DEVICE_LOCAL_DMA_LKEY;
++
++ return ret;
++}
++
++int ibp_mlx5_query_port(struct ib_device *ibdev, u8 port_num,
++ struct ib_port_attr *attr)
++{
++ struct ibp_mlx5_device *dev = to_device(ibdev);
++ int ret;
++
++ ret = ibp_cmd_query_port(dev->ibpdev, port_num, attr);
++ if (ret)
++ print_err("ibp_cmd_query_port error %d\n", ret);
++
++ return ret;
++}
++
++int ibp_mlx5_modify_port(struct ib_device *device,
++ u8 port_num, int port_modify_mask,
++ struct ib_port_modify *port_modify)
++{
++ print_trace("in\n");
++
++ return -ENOSYS;
++}
++
++int ibp_mlx5_query_gid(struct ib_device *ibdev, u8 port_num, int index,
++ union ib_gid *gid)
++{
++ struct ibp_mlx5_device *dev = to_device(ibdev);
++ int ret;
++
++ ret = ibp_cmd_query_gid(dev->ibpdev, port_num, index, gid);
++ if (ret)
++ print_err("ibp_cmd_query_gid error %d\n", ret);
++
++ return ret;
++}
++
++int ibp_mlx5_query_pkey(struct ib_device *ibdev, u8 port_num,
++ u16 index, u16 *pkey)
++{
++ struct ibp_mlx5_device *dev = to_device(ibdev);
++ int ret;
++
++ ret = ibp_cmd_query_pkey(dev->ibpdev, port_num, index, pkey);
++ if (ret)
++ print_err("ibp_cmd_query_pkey error %d\n", ret);
++
++ return ret;
++}
++
++struct ib_ucontext *ibp_mlx5_alloc_ucontext(struct ib_device *ibdev,
++ struct ib_udata *udata)
++{
++ struct ibp_mlx5_device *dev = to_device(ibdev);
++ struct ibp_mlx5_alloc_ucontext_resp resp;
++ struct ibp_mlx5_alloc_ucontext_cmd cmd;
++ struct ibp_mlx5_ucontext *ucontext;
++ int ret;
++
++ print_trace("in\n");
++
++ ucontext = kzalloc(sizeof(*ucontext), GFP_KERNEL);
++ if (!ucontext)
++ return ERR_PTR(-ENOMEM);
++
++ ret = ib_copy_from_udata(&cmd.data, udata, sizeof(cmd.data));
++ if (ret) {
++ print_err("ib_copy_from_udata error %d\n", ret);
++ goto err0;
++ }
++
++ ret = ibp_cmd_alloc_ucontext(dev->ibpdev, NULL, &ucontext->ucontext,
++ &cmd.msg, sizeof(cmd), &resp.msg, sizeof(resp));
++ if (ret) {
++ print_err("ibp_cmd_alloc_ucontext error %d\n", ret);
++ goto err0;
++ }
++
++ INIT_LIST_HEAD(&ucontext->db_list);
++ mutex_init(&ucontext->db_mutex);
++
++ ret = ib_copy_to_udata(udata, &resp.data, sizeof(resp.data));
++ if (ret) {
++ print_err("ib_copy_to_udata error %d\n", ret);
++ goto err1;
++ }
++
++ return &ucontext->ibucontext;
++err1:
++ ibp_cmd_dealloc_ucontext(dev->ibpdev, ucontext->ucontext);
++err0:
++ kfree(ucontext);
++ return ERR_PTR(ret);
++}
++
++int ibp_mlx5_dealloc_ucontext(struct ib_ucontext *ibucontext)
++{
++ struct ibp_mlx5_ucontext *ucontext = to_ucontext(ibucontext);
++ struct ibp_mlx5_device *dev = to_device(ibucontext->device);
++ int ret;
++
++ print_trace("in\n");
++
++ ret = ibp_cmd_dealloc_ucontext(dev->ibpdev, ucontext->ucontext);
++ if (ret)
++ print_err("ibp_cmd_dealloc_ucontext error %d\n", ret);
++
++ kfree(ucontext);
++ return 0;
++}
++
++int ibp_mlx5_mmap(struct ib_ucontext *ibucontext, struct vm_area_struct *vma)
++{
++ struct ibp_mlx5_ucontext *ucontext = to_ucontext(ibucontext);
++ struct ibp_mlx5_device *dev = to_device(ibucontext->device);
++ int ret;
++
++ print_trace("in\n");
++
++ ret = ibp_cmd_mmap(dev->ibpdev, ucontext->ucontext, vma);
++ if (ret)
++ print_err("ibp_cmd_mmap error %d\n", ret);
++
++ return ret;
++}
++
++struct ib_pd *ibp_mlx5_alloc_pd(struct ib_device *ibdev,
++ struct ib_ucontext *ibucontext,
++ struct ib_udata *udata)
++{
++ struct ibp_mlx5_device *dev = to_device(ibdev);
++ struct ibp_mlx5_alloc_pd_resp resp;
++ struct ibp_alloc_pd_cmd cmd;
++ struct ibp_mlx5_pd *pd;
++ u64 ucontext;
++ int ret;
++
++ print_trace("in\n");
++
++ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
++ if (!pd)
++ return ERR_PTR(-ENOMEM);
++
++ ucontext = ibucontext ? to_ucontext(ibucontext)->ucontext
++ : dev->kcontext->ucontext;
++
++ ret = ibp_cmd_alloc_pd(dev->ibpdev, ucontext, &pd->pd,
++ &cmd, sizeof(cmd), &resp.msg, sizeof(resp));
++ if (ret) {
++ print_err("ibp_cmd_alloc_pd error %d\n", ret);
++ goto err0;
++ }
++
++ pd->pdn = resp.data.pdn;
++
++ if (udata) {
++ ret = ib_copy_to_udata(udata, &resp.data, sizeof(resp.data));
++ if (ret) {
++ print_err("ib_copy_to_udata error %d\n", ret);
++ goto err1;
++ }
++ }
++
++ return &pd->ibpd;
++err1:
++ ibp_cmd_dealloc_pd(dev->ibpdev, pd->pd);
++err0:
++ kfree(pd);
++ return ERR_PTR(ret);
++}
++
++int ibp_mlx5_dealloc_pd(struct ib_pd *ibpd)
++{
++ struct ibp_mlx5_device *dev = to_device(ibpd->device);
++ struct ibp_mlx5_pd *pd = to_pd(ibpd);
++ int ret;
++
++ print_trace("in\n");
++
++ ret = ibp_cmd_dealloc_pd(dev->ibpdev, pd->pd);
++ if (ret)
++ print_err("ibp_cmd_dealloc_pd error %d\n", ret);
++
++ kfree(pd);
++ return 0;
++}
++
++struct ib_ah *ibp_mlx5_create_ah(struct ib_pd *ibpd,
++ struct ib_ah_attr *ah_attr)
++{
++ struct ibp_mlx5_device *dev = to_device(ibpd->device);
++ struct ibp_mlx5_pd *pd = to_pd(ibpd);
++ struct ibp_mlx5_ah *ah;
++ u32 sgi;
++ int ret;
++
++ print_trace("in\n");
++
++ ah = kzalloc(sizeof(*ah), GFP_KERNEL);
++ if (!ah)
++ return ERR_PTR(-ENOMEM);
++
++ ret = ibp_cmd_create_ah(dev->ibpdev, pd->pd, ah_attr, &ah->ah);
++ if (ret) {
++ print_err("ibp_cmd_create_ah error %d\n", ret);
++ kfree(ah);
++ return ERR_PTR(ret);
++ }
++
++ if (!ibpd->uobject) {
++ if (ah_attr->ah_flags & IB_AH_GRH) {
++ sgi = ah_attr->grh.sgid_index << 20;
++
++ memcpy(ah->av.rgid, &ah_attr->grh.dgid, 16);
++ ah->av.grh_gid_fl =
++ cpu_to_be32(ah_attr->grh.flow_label |
++ (1 << 30) | sgi);
++ ah->av.hop_limit = ah_attr->grh.hop_limit;
++ ah->av.tclass = ah_attr->grh.traffic_class;
++ }
++
++ ah->av.rlid = cpu_to_be16(ah_attr->dlid);
++ ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
++ ah->av.stat_rate_sl = (ah_attr->static_rate << 4) |
++ (ah_attr->sl & 0xf);
++ }
++
++ return &ah->ibah;
++}
++
++int ibp_mlx5_destroy_ah(struct ib_ah *ibah)
++{
++ struct ibp_mlx5_device *dev = to_device(ibah->device);
++ struct ibp_mlx5_ah *ah = to_ah(ibah);
++ int ret;
++
++ print_trace("in\n");
++
++ ret = ibp_cmd_destroy_ah(dev->ibpdev, ah->ah);
++ if (ret)
++ print_err("ibp_cmd_destroy_ah error %d\n", ret);
++
++ kfree(ah);
++ return ret;
++}
++
++static
++struct ibp_mlx5_db *ibp_mlx5_db_map_user(struct ibp_mlx5_ucontext *ucontext,
++ unsigned long virt)
++{
++ struct ibp_mlx5_device *dev = to_device(ucontext->ibucontext.device);
++ struct ibp_mlx5_db *db;
++ int ret;
++
++ virt &= PAGE_MASK;
++
++ mutex_lock(&ucontext->db_mutex);
++
++ list_for_each_entry(db, &ucontext->db_list, list)
++ if (db->user_virt == virt)
++ goto found;
++
++ db = kzalloc(sizeof(*db), GFP_KERNEL);
++ if (!db) {
++ print_err("kzalloc %ld bytes failed\n", sizeof(*db));
++ ret = -ENOMEM;
++ goto err0;
++ }
++
++ db->page = ibp_reg_buf(dev->ibpdev, ucontext->ucontext,
++ virt, PAGE_SIZE, 0);
++ if (IS_ERR(db->page)) {
++ ret = PTR_ERR(db->page);
++ print_err("ibp_reg_buf error %d\n", ret);
++ goto err1;
++ }
++
++ db->user_virt = virt;
++ list_add(&db->list, &ucontext->db_list);
++found:
++ ++db->refcnt;
++ mutex_unlock(&ucontext->db_mutex);
++
++ return db;
++err1:
++ kfree(db);
++err0:
++ mutex_unlock(&ucontext->db_mutex);
++ return ERR_PTR(ret);
++}
++
++static void ibp_mlx5_db_unmap_user(struct ibp_mlx5_ucontext *ucontext,
++ struct ibp_mlx5_db *db)
++{
++ struct ibp_mlx5_device *dev;
++
++ if (!ucontext || !db)
++ return;
++
++ dev = to_device(ucontext->ibucontext.device);
++
++ mutex_lock(&ucontext->db_mutex);
++ if (!--db->refcnt) {
++ list_del(&db->list);
++ ibp_dereg_buf(dev->ibpdev, db->page);
++ kfree(db);
++ }
++ mutex_unlock(&ucontext->db_mutex);
++}
++
++struct ib_srq *ibp_mlx5_create_srq(struct ib_pd *ibpd,
++ struct ib_srq_init_attr *attr,
++ struct ib_udata *udata)
++{
++ struct ibp_mlx5_device *dev = to_device(ibpd->device);
++ struct ibp_mlx5_pd *pd = to_pd(ibpd);
++ struct ibp_mlx5_create_srq_resp resp;
++ struct ibp_mlx5_create_srq_cmd cmd;
++ struct ibp_mlx5_ucontext *ucontext;
++ struct ibp_mlx5_srq *srq;
++ int desc_size, buf_size;
++ int ret;
++
++ print_trace("in\n");
++
++ srq = kzalloc(sizeof(*srq), GFP_KERNEL);
++ if (!srq)
++ return ERR_PTR(-ENOMEM);
++
++ ucontext = ibpd->uobject ? to_ucontext(ibpd->uobject->context)
++ : dev->kcontext;
++
++ spin_lock_init (&srq->lock);
++
++ srq->max = roundup_pow_of_two(attr->attr.max_wr + 1);
++ srq->max_gs = attr->attr.max_sge;
++ desc_size = roundup_pow_of_two(sizeof(struct mlx5_wqe_srq_next_seg) +
++ srq->max_gs *
++ sizeof(struct mlx5_wqe_data_seg));
++ desc_size = max_t(int, 32, desc_size);
++ srq->max_avail_gather = (desc_size - sizeof(struct mlx5_wqe_srq_next_seg)) /
++ sizeof(struct mlx5_wqe_data_seg);
++ srq->wqe_shift = ilog2(desc_size);
++ buf_size = srq->max * desc_size;
++
++ if (udata) {
++ ret = ib_copy_from_udata(&cmd.data, udata, sizeof(cmd.data));
++ if (ret) {
++ print_err("ib_copy_from_udata returned %d\n", ret);
++ goto err0;
++ }
++ } else {
++ ret = ibp_mlx5_db_alloc(dev, &srq->dbrec);
++ if (ret) {
++ print_err("ibp_mlx5_db_alloc returned %d\n", ret);
++ goto err0;
++ }
++ cmd.data.db_addr = (uintptr_t)srq->dbrec.db;
++
++ *srq->dbrec.db = 0;
++
++ ret = ibp_mlx5_alloc_srq_buf(srq, desc_size);
++ if (ret) {
++ print_err("ibp_mlx5_alloc_srq_buf returned %d\n", ret);
++ goto err1;
++ }
++ cmd.data.buf_addr = (uintptr_t)srq->wqe_buf;
++
++ cmd.data.flags = 0;
++ cmd.data.reserved0 = 0;
++ cmd.data.uidx = 0;
++ cmd.data.reserved1 = 0;
++ }
++
++ srq->buf = ibp_reg_buf(dev->ibpdev, ucontext->ucontext,
++ cmd.data.buf_addr, buf_size, 0);
++ if (IS_ERR(srq->buf)) {
++ ret = PTR_ERR(srq->buf);
++ print_err("ibp_reg_buf returned %d\n", ret);
++ goto err2;
++ }
++
++ srq->db = ibp_mlx5_db_map_user(ucontext, cmd.data.db_addr);
++ if (IS_ERR(srq->db)) {
++ ret = PTR_ERR(srq->db);
++ goto err3;
++ }
++
++ ret = ibp_cmd_create_srq(dev->ibpdev, pd->pd,
++ attr, &srq->srq, &srq->ibsrq,
++ &cmd.msg, sizeof(cmd), &resp.msg, sizeof(resp));
++ if (ret) {
++ print_err("ibp_cmd_create_srq returned %d\n", ret);
++ goto err4;
++ }
++
++ if (udata) {
++ ret = ib_copy_to_udata(udata, &resp.data, sizeof(resp.data));
++ if (ret) {
++ print_err("ib_copy_to_udata returned %d\n", ret);
++ goto err5;
++ }
++ }
++
++ // TODO: this is needed unconditionally because we always
++ // store the srq in the radix tree. Mlx4 does things differently, not sure why.
++ srq->srqn = resp.data.srqn;
++
++ ret = ibp_mlx5_insert_srq(dev, srq);
++ if (ret) {
++ print_err("ibp_mlx5_insert_srq returned %d\n", ret);
++ goto err5;
++ }
++
++ return &srq->ibsrq;
++err5:
++ ibp_cmd_destroy_srq(dev->ibpdev, srq->srq);
++err4:
++ ibp_mlx5_db_unmap_user(ucontext, srq->db);
++err3:
++ ibp_dereg_buf(dev->ibpdev, srq->buf);
++err2:
++ if (!udata)
++ ibp_mlx5_free_srq_buf(srq);
++err1:
++ if (!udata)
++ ibp_mlx5_db_free(dev, &srq->dbrec);
++err0:
++ kfree(srq);
++ return ERR_PTR(ret);
++}
++
++int ibp_mlx5_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
++{
++ struct ibp_mlx5_device *dev = to_device(ibsrq->device);
++ struct ibp_mlx5_srq *srq = to_srq(ibsrq);
++ int ret;
++
++ print_trace("in\n");
++
++ ret = ibp_cmd_query_srq(dev->ibpdev, srq->srq, attr);
++ if (ret)
++ print_err("ibp_cmd_query_srq error %d\n", ret);
++
++ return ret;
++}
++
++int ibp_mlx5_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
++ enum ib_srq_attr_mask mask, struct ib_udata *udata)
++{
++ struct ibp_mlx5_device *dev = to_device(ibsrq->device);
++ struct ibp_mlx5_srq *srq = to_srq(ibsrq);
++ struct ibp_modify_srq_resp resp;
++ struct ibp_modify_srq_cmd cmd;
++ int ret;
++
++ print_trace("in\n");
++
++ ret = ibp_cmd_modify_srq(dev->ibpdev, srq->srq, attr, mask,
++ &cmd, sizeof(cmd), &resp, sizeof(resp));
++ if (ret)
++ print_err("ibp_cmd_modify_srq error %d\n", ret);
++
++ return ret;
++}
++
++int ibp_mlx5_destroy_srq(struct ib_srq *ibsrq)
++{
++ struct ibp_mlx5_device *dev = to_device(ibsrq->device);
++ struct ibp_mlx5_srq *srq = to_srq(ibsrq);
++ struct ibp_mlx5_ucontext *ucontext;
++ int ret;
++
++ print_trace("in\n");
++
++ ucontext = ibsrq->uobject ? to_ucontext(ibsrq->uobject->context)
++ : dev->kcontext;
++
++ ibp_mlx5_remove_srq(dev, srq);
++
++ ret = ibp_cmd_destroy_srq(dev->ibpdev, srq->srq);
++ if (ret)
++ print_err("ibp_cmd_destroy_srq error %d\n", ret);
++
++ ibp_mlx5_db_unmap_user(ucontext, srq->db);
++ ibp_dereg_buf(dev->ibpdev, srq->buf);
++
++ if (!ibsrq->uobject) {
++ ibp_mlx5_free_srq_buf(srq);
++ ibp_mlx5_db_free(dev, &srq->dbrec);
++ }
++
++ kfree(srq);
++ return 0;
++}
++
++struct ib_qp *ibp_mlx5_create_qp(struct ib_pd *ibpd,
++ struct ib_qp_init_attr *attr,
++ struct ib_udata *udata)
++{
++ struct ibp_mlx5_device *dev = to_device(ibpd->device);
++ struct ibp_mlx5_pd *pd = to_pd(ibpd);
++ struct ibp_mlx5_ucontext *ucontext;
++ struct ibp_mlx5_create_qp_cmd cmd;
++ struct ibp_mlx5_create_qp_resp resp;
++ struct ibp_mlx5_qp *qp;
++ u64 send_cq, recv_cq, srq;
++ int buf_size, ret;
++
++ print_trace("in\n");
++
++ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
++ if (!qp)
++ return ERR_PTR(-ENOMEM);
++
++ ucontext = ibpd->uobject ? to_ucontext(ibpd->uobject->context)
++ : dev->kcontext;
++
++ mutex_init(&qp->mutex);
++ spin_lock_init(&qp->sq.lock);
++ spin_lock_init(&qp->rq.lock);
++
++ srq = (!!attr->srq || !!attr->xrcd) ? to_srq(attr->srq)->srq : 0;
++
++ if (udata) {
++ ret = ib_copy_from_udata(&cmd.data, udata, sizeof(cmd.data));
++ if (ret) {
++ print_err("ib_copy_from_udata error %d\n", ret);
++ goto err0;
++ }
++
++ buf_size = (cmd.data.rq_wqe_count << cmd.data.rq_wqe_shift)
++ + (cmd.data.sq_wqe_count << 6);
++ } else {
++ if (attr->sq_sig_type == IB_SIGNAL_ALL_WR)
++ qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
++
++ if (attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
++ qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK;
++
++ ret = ibp_mlx5_alloc_qp_buf(qp, attr, srq);
++ if (ret) {
++ print_err("ibp_mlx5_alloc_qp_buf returned %d\n", ret);
++ goto err0;
++ }
++
++ buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift)
++ + (qp->sq.wqe_cnt << qp->sq.wqe_shift);
++
++ attr->cap.max_send_wr = qp->sq.max_post;
++ attr->cap.max_recv_wr = qp->rq.max_post;
++ attr->cap.max_send_sge = qp->sq.max_gs;
++ attr->cap.max_recv_sge = qp->rq.max_gs;
++ attr->cap.max_inline_data = qp->max_inline_data;
++
++ cmd.data.buf_addr = (uintptr_t)qp->wqe_buf;
++ cmd.data.rq_wqe_shift = qp->rq.wqe_shift;
++ cmd.data.rq_wqe_count = qp->rq.wqe_cnt;
++ cmd.data.sq_wqe_count = qp->sq.wqe_cnt;
++
++ cmd.data.flags = 0;
++
++ ret = ibp_mlx5_db_alloc(dev, &qp->dbrec);
++ if (ret) {
++ print_err("ibp_mlx5_db_alloc returned %d\n", ret);
++ goto err1;
++ }
++ cmd.data.db_addr = (uintptr_t)qp->dbrec.db;
++
++ qp->dbrec.db[MLX5_RCV_DBR] = 0;
++ qp->dbrec.db[MLX5_SND_DBR] = 0;
++
++ ibp_mlx5_route_cq_comp(attr->send_cq);
++ ibp_mlx5_route_cq_comp(attr->recv_cq);
++ }
++
++ qp->buf = ibp_reg_buf(dev->ibpdev, ucontext->ucontext,
++ cmd.data.buf_addr, buf_size, 0);
++ if (IS_ERR(qp->buf)) {
++ ret = PTR_ERR(qp->buf);
++ print_err("ibp_reg_buf error %d\n", ret);
++ goto err2;
++ }
++
++ qp->db = ibp_mlx5_db_map_user(ucontext, cmd.data.db_addr);
++ if (IS_ERR(qp->db)) {
++ ret = PTR_ERR(qp->db);
++ print_err("ibp_mlx5_db_map_user returned %d\n", ret);
++ goto err3;
++ }
++
++ send_cq = to_cq(attr->send_cq)->cq;
++ recv_cq = to_cq(attr->recv_cq)->cq;
++
++ ret = ibp_cmd_create_qp(dev->ibpdev, pd->pd, send_cq, recv_cq,
++ srq, attr, &qp->qp, &qp->ibqp,
++ &cmd.msg, sizeof(cmd), &resp.msg, sizeof(resp));
++ if (ret) {
++ print_err("ibp_cmd_create_qp error %d\n", ret);
++ goto err4;
++ }
++
++ if (udata) {
++ ret = ib_copy_to_udata(udata, &resp.data, sizeof(resp.data));
++ if (ret) {
++ print_err("ib_copy_to_udata error %d\n", ret);
++ goto err5;
++ }
++ } else
++ qp->bf = &dev->bfs[resp.data.uuar_index]; /* Map uuar */
++
++ ret = ibp_mlx5_insert_qp(dev, qp);
++ if (ret) {
++ print_err("ibp_mlx5_insert_qp returned %d\n", ret);
++ goto err5;
++ }
++
++ return &qp->ibqp;
++err5:
++ ibp_cmd_destroy_qp(dev->ibpdev, qp->qp);
++err4:
++ ibp_mlx5_db_unmap_user(ucontext, qp->db);
++err3:
++ ibp_dereg_buf(dev->ibpdev, qp->buf);
++err2:
++ if (!udata)
++ ibp_mlx5_db_free(dev, &qp->dbrec);
++err1:
++ if (!udata)
++ ibp_mlx5_free_qp_buf(qp);
++err0:
++ kfree(qp);
++ return ERR_PTR(ret);
++}
++
++int ibp_mlx5_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
++ int qp_attr_mask, struct ib_qp_init_attr *init_attr)
++{
++ struct ibp_mlx5_device *dev = to_device(ibqp->device);
++ struct ibp_mlx5_qp *qp = to_qp(ibqp);
++ int ret;
++
++ print_trace("in\n");
++
++ ret = ibp_cmd_query_qp(dev->ibpdev, qp->qp, attr, qp_attr_mask,
++ init_attr);
++ if (ret)
++ print_err("ibp_cmd_query_qp error %d\n", ret);
++
++ return ret;
++}
++
++int ibp_mlx5_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
++ int qp_attr_mask, struct ib_udata *udata)
++{
++ struct ibp_mlx5_device *dev = to_device(ibqp->device);
++ struct ibp_mlx5_qp *qp = to_qp(ibqp);
++ struct ibp_modify_qp_resp resp;
++ struct ibp_modify_qp_cmd cmd;
++ int ret;
++
++ print_trace("in\n");
++
++ mutex_lock(&qp->mutex);
++
++ ret = ibp_cmd_modify_qp(dev->ibpdev, qp->qp, attr, qp_attr_mask,
++ &cmd, sizeof(cmd), &resp, sizeof(resp));
++ if (ret) {
++ print_err("ibp_cmd_modify_qp error %d\n", ret);
++ goto err;
++ }
++
++ if (!ibqp->uobject &&
++ qp_attr_mask & IB_QP_STATE && attr->qp_state == IB_QPS_RESET) {
++
++ ibp_mlx5_cq_clean(to_cq(ibqp->recv_cq), qp->ibqp.qp_num,
++ ibqp->srq ? to_srq(ibqp->srq) : NULL);
++ if (ibqp->send_cq != ibqp->recv_cq)
++ ibp_mlx5_cq_clean(to_cq(ibqp->send_cq),
++ qp->ibqp.qp_num, NULL);
++
++ qp->rq.head = 0;
++ qp->rq.tail = 0;
++ qp->sq.head = 0;
++ qp->sq.tail = 0;
++ qp->sq.cur_post = 0;
++ qp->sq.last_poll = 0;
++
++ if (qp->dbrec.db) {
++ qp->dbrec.db[MLX5_RCV_DBR] = 0;
++ qp->dbrec.db[MLX5_SND_DBR] = 0;
++ }
++ }
++err:
++ mutex_unlock(&qp->mutex);
++ return ret;
++}
++
++int ibp_mlx5_destroy_qp(struct ib_qp *ibqp)
++{
++ struct ibp_mlx5_device *dev = to_device(ibqp->device);
++ struct ibp_mlx5_qp *qp = to_qp(ibqp);
++ struct ibp_mlx5_ucontext *ucontext;
++ struct ibp_mlx5_cq *send_cq, *recv_cq;
++ int ret;
++
++ print_trace("in\n");
++
++ ucontext = ibqp->uobject ? to_ucontext(ibqp->uobject->context)
++ : dev->kcontext;
++
++ ibp_mlx5_remove_qp(dev, qp);
++
++ ret = ibp_cmd_destroy_qp(dev->ibpdev, qp->qp);
++ if (ret)
++ print_err("ibp_cmd_destroy_qp returned %d\n", ret);
++
++ ibp_mlx5_db_unmap_user(ucontext, qp->db);
++ ibp_dereg_buf(dev->ibpdev, qp->buf);
++
++ if (!ibqp->uobject) {
++ send_cq = to_cq(qp->ibqp.send_cq);
++ recv_cq = to_cq(qp->ibqp.recv_cq);
++
++ ibp_mlx5_lock_cqs(send_cq, recv_cq);
++
++ __ibp_mlx5_cq_clean(recv_cq, qp->ibqp.qp_num,
++ ibqp->srq ? to_srq(ibqp->srq) : NULL);
++ if (ibqp->send_cq != ibqp->recv_cq)
++ __ibp_mlx5_cq_clean(send_cq, qp->ibqp.qp_num, NULL);
++
++ ibp_mlx5_unlock_cqs(send_cq, recv_cq);
++
++ ibp_mlx5_db_free(dev, &qp->dbrec);
++ ibp_mlx5_free_qp_buf(qp);
++ }
++
++ kfree(qp);
++ return 0;
++}
++
++struct ib_cq *ibp_mlx5_create_cq(struct ib_device *ibdev,
++ const struct ib_cq_init_attr *attr,
++ struct ib_ucontext *ibucontext,
++ struct ib_udata *udata)
++{
++ struct ibp_mlx5_device *dev = to_device(ibdev);
++ struct ibp_mlx5_create_cq_resp resp;
++ struct ibp_mlx5_create_cq_cmd cmd;
++ struct ibp_mlx5_ucontext *ucontext;
++ struct ibp_mlx5_cq *cq;
++ int comp_vector;
++ int nent, ret;
++
++ print_trace("in\n");
++
++ cq = kzalloc(sizeof(*cq), GFP_KERNEL);
++ if (!cq)
++ return ERR_PTR(-ENOMEM);
++
++ ucontext = ibucontext ? to_ucontext(ibucontext) : dev->kcontext;
++
++ spin_lock_init(&cq->lock);
++
++ nent = attr->cqe;
++ comp_vector = attr->comp_vector;
++
++ nent = roundup_pow_of_two(nent + 1);
++ cq->ibcq.cqe = nent - 1;
++
++ if (udata) {
++ ret = ib_copy_from_udata(&cmd.data, udata, sizeof(cmd.data));
++ if (ret) {
++ print_err("ib_copy_from_udata error %d\n", ret);
++ goto err0;
++ }
++ } else {
++ ret = ibp_mlx5_db_alloc(dev, &cq->dbrec);
++ if (ret) {
++ print_err("ibp_mlx5_db_alloc returned %d\n", ret);
++ goto err0;
++ }
++ cmd.data.db_addr = (uintptr_t)cq->dbrec.db;
++
++ cq->set_ci_db = cq->dbrec.db;
++ cq->arm_db = cq->dbrec.db + 1;
++
++ *cq->set_ci_db = 0;
++ *cq->arm_db = 0;
++
++ /* Set to 64 byes until MLX5 has a "proper interface" */
++ cq->cqe_sz = 64; /* See mlx5 driver */
++
++ ret = ibp_mlx5_alloc_cq_buf(cq, nent);
++ if (ret) {
++ print_err("ibp_mlx5_alloc_cq_buf returned %d\n", ret);
++ goto err1;
++ }
++
++ cmd.data.buf_addr = (uintptr_t)cq->cqe_buf;
++ cmd.data.cqe_size = cq->cqe_sz;
++ cmd.data.reserved = 0;
++ }
++
++ cq->buf = ibp_reg_buf(dev->ibpdev, ucontext->ucontext,
++ cmd.data.buf_addr,
++ nent * cmd.data.cqe_size,
++ IB_ACCESS_LOCAL_WRITE);
++ if (IS_ERR(cq->buf)) {
++ ret = PTR_ERR(cq->buf);
++ print_err("ibp_reg_buf error %d\n", ret);
++ goto err2;
++ }
++
++ cq->db = ibp_mlx5_db_map_user(ucontext, cmd.data.db_addr);
++ if (IS_ERR(cq->db)) {
++ ret = PTR_ERR(cq->db);
++ goto err3;
++ }
++
++ ret = ibp_cmd_create_cq(dev->ibpdev, ucontext->ucontext,
++ cq->ibcq.cqe, comp_vector, &cq->cq, &cq->ibcq,
++ &cmd.msg, sizeof(cmd), &resp.msg, sizeof(resp));
++ if (ret) {
++ print_err("ibp_cmd_create_cq error %d\n", ret);
++ goto err4;
++ }
++
++ if (udata) {
++ ret = ib_copy_to_udata(udata, &resp.data, sizeof(resp.data));
++ if (ret) {
++ print_err("ib_copy_to_udata error %d\n", ret);
++ goto err5;
++ }
++ } else
++ cq->cqn = resp.data.cqn;
++
++ return &cq->ibcq;
++err5:
++ ibp_cmd_destroy_cq(dev->ibpdev, cq->cq);
++err4:
++ ibp_mlx5_db_unmap_user(ucontext, cq->db);
++err3:
++ ibp_dereg_buf(dev->ibpdev, cq->buf);
++err2:
++ if (!udata)
++ ibp_mlx5_free_cq_buf(cq);
++err1:
++ if (!udata)
++ ibp_mlx5_db_free(dev, &cq->dbrec);
++err0:
++ kfree(cq);
++ return ERR_PTR(ret);
++}
++
++int ibp_mlx5_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
++{
++ print_trace("in\n");
++
++ return -ENOSYS;
++}
++
++int ibp_mlx5_destroy_cq(struct ib_cq *ibcq)
++{
++ struct ibp_mlx5_device *dev = to_device(ibcq->device);
++ struct ibp_mlx5_cq *cq = to_cq(ibcq);
++ struct ibp_mlx5_ucontext *ucontext;
++ int ret;
++
++ print_trace("in\n");
++
++ ucontext = ibcq->uobject ? to_ucontext(ibcq->uobject->context)
++ : dev->kcontext;
++
++ ret = ibp_cmd_destroy_cq(dev->ibpdev, cq->cq);
++ if (ret)
++ print_err("ibp_cmd_destroy_cq error %d\n", ret);
++
++ ibp_mlx5_db_unmap_user(ucontext, cq->db);
++ ibp_dereg_buf(dev->ibpdev, cq->buf);
++
++ if (!ibcq->uobject) {
++ ibp_mlx5_free_cq_buf(cq);
++ ibp_mlx5_db_free(dev, &cq->dbrec);
++ }
++
++ kfree(cq);
++ return 0;
++}
++
++struct ib_mr *ibp_mlx5_get_dma_mr(struct ib_pd *ibpd, int access)
++{
++ struct ibp_mlx5_device *dev = to_device(ibpd->device);
++ struct ibp_mlx5_pd *pd = to_pd(ibpd);
++ struct ibp_mlx5_mr *mr;
++ int ret;
++
++ print_trace("in\n");
++
++ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
++ if (!mr)
++ return ERR_PTR(-ENOMEM);
++
++ ret = ibp_cmd_get_dma_mr(dev->ibpdev, pd->pd, access,
++ &mr->mr, &mr->ibmr.lkey, &mr->ibmr.rkey);
++ if (ret) {
++ print_err("ibp_cmd_get_dma_mr returned %d\n", ret);
++ goto err;
++ }
++
++ return &mr->ibmr;
++err:
++ kfree(mr);
++ return ERR_PTR(ret);
++}
++
++struct ib_mr *ibp_mlx5_reg_user_mr(struct ib_pd *ibpd,
++ u64 start, u64 length,
++ u64 virt_addr, int access,
++ struct ib_udata *udata)
++{
++ struct ibp_mlx5_device *dev = to_device(ibpd->device);
++ struct ibp_mlx5_pd *pd = to_pd(ibpd);
++ struct ibp_reg_user_mr_resp resp;
++ struct ibp_reg_user_mr_cmd cmd;
++ struct ibp_mlx5_mr *mr;
++ int ret;
++
++ print_trace("in\n");
++
++ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
++ if (!mr)
++ return ERR_PTR(-ENOMEM);
++
++ ret = ibp_cmd_reg_user_mr(dev->ibpdev, pd->pd,
++ start, length, virt_addr, access,
++ &mr->mr, &mr->ibmr.lkey, &mr->ibmr.rkey,
++ &cmd, sizeof(cmd), &resp, sizeof(resp));
++ if (ret) {
++ print_err("ibp_cmd_reg_user_mr error %d\n", ret);
++ goto err;
++ }
++
++ return &mr->ibmr;
++err:
++ kfree(mr);
++ return ERR_PTR(ret);
++}
++
++int ibp_mlx5_dereg_mr(struct ib_mr *ibmr)
++{
++ struct ibp_mlx5_device *dev = to_device(ibmr->device);
++ struct ibp_mlx5_mr *mr = to_mr(ibmr);
++ int ret;
++
++ print_trace("in\n");
++
++ ret = ibp_cmd_dereg_mr(dev->ibpdev, mr->mr);
++ if (ret)
++ print_err("ibp_cmd_dereg_mr error %d\n", ret);
++
++ kfree(mr);
++ return 0;
++}
++
++static int ibp_mlx5_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
++{
++ struct ibp_mlx5_device *dev = to_device(ibqp->device);
++ struct ibp_mlx5_qp *qp = to_qp(ibqp);
++ int ret;
++
++ print_trace("in\n");
++
++ ret = ibp_cmd_attach_mcast(dev->ibpdev, qp->qp, gid, lid);
++ if (ret)
++ print_err("ibp_cmd_attach_mcast returned %d\n", ret);
++
++ return ret;
++}
++
++static int ibp_mlx5_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
++{
++ struct ibp_mlx5_device *dev = to_device(ibqp->device);
++ struct ibp_mlx5_qp *qp = to_qp(ibqp);
++ int ret;
++
++ print_trace("in\n");
++
++ ret = ibp_cmd_detach_mcast(dev->ibpdev, qp->qp, gid, lid);
++ if (ret)
++ print_err("ibp_cmd_detach_mcast returned %d\n", ret);
++
++ return ret;
++}
++
++static void ibp_mlx5_dma_nop(struct ib_device *ibdev, u64 addr, size_t size,
++ enum dma_data_direction direction)
++{
++}
++
++static int ibp_mlx5_mapping_error(struct ib_device *ibdev, u64 dma_addr)
++{
++ return !dma_addr;
++}
++
++static u64 ibp_mlx5_dma_map_single(struct ib_device *ibdev,
++ void *cpu_addr, size_t size,
++ enum dma_data_direction direction)
++{
++ return (u64)cpu_addr;
++}
++
++static u64 ibp_mlx5_dma_map_page(struct ib_device *ibdev, struct page *page,
++ unsigned long offset, size_t size,
++ enum dma_data_direction direction)
++{
++ u64 addr;
++
++ if (offset + size > PAGE_SIZE)
++ return 0;
++
++ addr = (u64)page_address(page);
++ if (addr)
++ addr += offset;
++
++ return addr;
++}
++
++static int ibp_mlx5_map_sg(struct ib_device *ibdev, struct scatterlist *sg,
++ int nents, enum dma_data_direction direction)
++{
++ u64 addr;
++ int i, ret = nents;
++
++ for (i = 0; i < nents; i++, sg++) {
++ addr = (u64)page_address(sg_page(sg));
++ if (!addr) {
++ ret = 0;
++ break;
++ }
++
++ sg->dma_address = sg->offset + addr;
++ sg->dma_length = sg->length;
++ }
++
++ return ret;
++}
++
++static void ibp_mlx5_unmap_sg(struct ib_device *ibdev, struct scatterlist *sg,
++ int nents, enum dma_data_direction direction)
++{
++ print_trace("in\n");
++}
++
++static void ibp_mlx5_sync_single(struct ib_device *ibdev, u64 dma, size_t size,
++ enum dma_data_direction direction)
++{
++ print_trace("in\n");
++}
++
++static void *ibp_mlx5_dma_alloc_coherent(struct ib_device *ibdev, size_t size,
++ u64 *dma_handle, gfp_t flag)
++{
++ struct page *p = alloc_pages(flag, get_order(size));
++ void *addr = p ? page_address(p) : NULL;
++
++ print_trace("in\n");
++
++ if (dma_handle)
++ *dma_handle = (u64)addr;
++
++ return addr;
++}
++
++static void ibp_mlx5_dma_free_coherent(struct ib_device *ibdev, size_t size,
++ void *cpu_addr, u64 dma_handle)
++{
++ print_trace("in\n");
++
++ free_pages((unsigned long)cpu_addr, get_order(size));
++}
++
++struct ib_dma_mapping_ops ibp_mlx5_dma_ops = {
++ ibp_mlx5_mapping_error,
++ ibp_mlx5_dma_map_single,
++ ibp_mlx5_dma_nop,
++ ibp_mlx5_dma_map_page,
++ ibp_mlx5_dma_nop,
++ ibp_mlx5_map_sg,
++ ibp_mlx5_unmap_sg,
++ ibp_mlx5_sync_single,
++ ibp_mlx5_sync_single,
++ ibp_mlx5_dma_alloc_coherent,
++ ibp_mlx5_dma_free_coherent
++};
++
++int ibp_mlx5_register_device(struct ibp_mlx5_device *dev)
++{
++ strncpy(dev->ibdevice.name, dev->ibpdev->name, IB_DEVICE_NAME_MAX);
++
++ dev->ibdevice.owner = THIS_MODULE;
++ dev->ibdevice.node_type = RDMA_NODE_IB_CA;
++ dev->ibdevice.node_guid = dev->ibpdev->node_guid;
++ dev->ibdevice.dma_device = dev->ibpdev->linux_dev;
++ dev->ibdevice.phys_port_cnt = dev->ibpdev->phys_port_cnt;
++ dev->ibdevice.num_comp_vectors = dev->ibpdev->num_comp_vectors;
++
++ dev->ibdevice.uverbs_abi_ver = dev->ibpdev->uverbs_abi_ver;
++ dev->ibdevice.uverbs_cmd_mask =
++ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
++ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
++ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
++ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
++ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
++ (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
++ (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
++ (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
++ (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
++ (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
++ (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
++ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
++ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
++ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
++ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
++ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
++ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
++ (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
++ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
++ (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
++ (1ull << IB_USER_VERBS_CMD_REG_MR) |
++ (1ull << IB_USER_VERBS_CMD_DEREG_MR);
++
++ /* Set mask to the intersection of this driver and the native driver. */
++ dev->ibdevice.uverbs_cmd_mask &= dev->ibpdev->uverbs_cmd_mask;
++
++ dev->ibdevice.query_device = ibp_mlx5_query_device;
++ dev->ibdevice.query_port = ibp_mlx5_query_port;
++ dev->ibdevice.query_pkey = ibp_mlx5_query_pkey;
++ dev->ibdevice.modify_port = ibp_mlx5_modify_port;
++ dev->ibdevice.query_gid = ibp_mlx5_query_gid;
++ dev->ibdevice.alloc_ucontext = ibp_mlx5_alloc_ucontext;
++ dev->ibdevice.dealloc_ucontext = ibp_mlx5_dealloc_ucontext;
++ dev->ibdevice.mmap = ibp_mlx5_mmap;
++ dev->ibdevice.alloc_pd = ibp_mlx5_alloc_pd;
++ dev->ibdevice.dealloc_pd = ibp_mlx5_dealloc_pd;
++ dev->ibdevice.create_ah = ibp_mlx5_create_ah;
++ dev->ibdevice.destroy_ah = ibp_mlx5_destroy_ah;
++ dev->ibdevice.create_srq = ibp_mlx5_create_srq;
++ dev->ibdevice.query_srq = ibp_mlx5_query_srq;
++ dev->ibdevice.modify_srq = ibp_mlx5_modify_srq;
++ dev->ibdevice.destroy_srq = ibp_mlx5_destroy_srq;
++ dev->ibdevice.create_qp = ibp_mlx5_create_qp;
++ dev->ibdevice.query_qp = ibp_mlx5_query_qp;
++ dev->ibdevice.modify_qp = ibp_mlx5_modify_qp;
++ dev->ibdevice.destroy_qp = ibp_mlx5_destroy_qp;
++ dev->ibdevice.create_cq = ibp_mlx5_create_cq;
++ dev->ibdevice.resize_cq = ibp_mlx5_resize_cq;
++ dev->ibdevice.destroy_cq = ibp_mlx5_destroy_cq;
++ dev->ibdevice.poll_cq = ibp_mlx5_poll_cq;
++ dev->ibdevice.req_notify_cq = ibp_mlx5_arm_cq;
++ dev->ibdevice.get_dma_mr = ibp_mlx5_get_dma_mr;
++ dev->ibdevice.reg_user_mr = ibp_mlx5_reg_user_mr;
++ dev->ibdevice.dereg_mr = ibp_mlx5_dereg_mr;
++ dev->ibdevice.post_send = ibp_mlx5_post_send;
++ dev->ibdevice.post_recv = ibp_mlx5_post_recv;
++ dev->ibdevice.post_srq_recv = ibp_mlx5_post_srq_recv;
++ dev->ibdevice.attach_mcast = ibp_mlx5_attach_mcast;
++ dev->ibdevice.detach_mcast = ibp_mlx5_detach_mcast;
++ dev->ibdevice.dma_ops = &ibp_mlx5_dma_ops;
++
++ return ib_register_device(&dev->ibdevice, NULL);
++}
++
++void ibp_mlx5_unregister_device(struct ibp_mlx5_device *dev)
++{
++ ib_unregister_device(&dev->ibdevice);
++}
++
++static int ibp_mlx5_create_kcontext(struct ibp_mlx5_device *dev)
++{
++ struct ibp_mlx5_alloc_ucontext_resp resp;
++ struct ibp_mlx5_alloc_ucontext_cmd cmd;
++ struct ibp_mlx5_ucontext *kcontext;
++ struct ibp_mlx5_bf *bf;
++ phys_addr_t offset;
++ int gross_uuars;
++ int ret;
++ int i;
++
++ kcontext = kzalloc(sizeof(*kcontext), GFP_KERNEL);
++ if (!kcontext)
++ return -ENOMEM;
++
++ mutex_init(&kcontext->db_mutex);
++ INIT_LIST_HEAD(&kcontext->db_list);
++ kcontext->ibucontext.device = &dev->ibdevice;
++
++ cmd.data.total_num_uuars = NUM_DRIVER_UARS * MLX5_BF_REGS_PER_PAGE;
++ cmd.data.num_low_latency_uuars = NUM_LOW_LAT_UUARS;
++
++ gross_uuars = cmd.data.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE * 4;
++ dev->bfs = kcalloc(gross_uuars, sizeof(*dev->bfs), GFP_KERNEL);
++ if (!dev->bfs) {
++ ret = -ENOMEM;
++ print_err("kcalloc bfs failed\n");
++ goto err0;
++ }
++
++ ret = ibp_cmd_alloc_ucontext(dev->ibpdev, NULL, &kcontext->ucontext,
++ &cmd.msg, sizeof(cmd),
++ &resp.msg, sizeof(resp));
++ if (ret) {
++ print_err("ibp_cmd_alloc_ucontext returned %d\n", ret);
++ goto err1;
++ }
++
++ dev->kcontext = kcontext;
++ dev->tot_uuars = resp.data.tot_uuars;
++
++ INIT_RADIX_TREE(&dev->qp_table_tree, GFP_KERNEL);
++ spin_lock_init(&dev->qp_table_lock);
++
++ INIT_RADIX_TREE(&dev->srq_table_tree, GFP_KERNEL);
++ spin_lock_init(&dev->srq_table_lock);
++
++ mutex_init(&dev->pgdir_mutex);
++ INIT_LIST_HEAD(&dev->pgdir_list);
++
++ MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
++
++ dev->uars = kcalloc(dev->tot_uuars, sizeof(*dev->uars), GFP_KERNEL);
++ if (!dev->uars) {
++ ret = -ENOMEM;
++ print_err("kcalloc uars failed\n");
++ goto err2;
++ }
++
++ for (i = 0; i < dev->tot_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE; ++i) {
++ offset = 0;
++ set_command(MLX5_IB_MMAP_REGULAR_PAGE, &offset);
++ set_index(i, &offset);
++ dev->uars[i] = ibp_cmd_ioremap(dev->ibpdev, kcontext->ucontext,
++ offset, PAGE_SIZE);
++ if (IS_ERR(dev->uars[i])) {
++ ret = PTR_ERR(dev->uars[i]);
++ print_err("ibp_cmd_ioremap returned %d\n", ret);
++ goto err3;
++ }
++ }
++
++ for (i = 0; i < gross_uuars; i++) {
++ bf = &dev->bfs[i];
++
++ if (i)
++ bf->buf_size = resp.data.bf_reg_size / 2;
++ bf->regreg = dev->uars[i / 4]->addr +
++ MLX5_BF_OFFSET + (i % 4) * resp.data.bf_reg_size;
++ bf->reg = NULL; /* Add WC support */
++ bf->offset = 0;
++ bf->need_lock =
++ !((i == 0) || i >= (dev->tot_uuars - NUM_LOW_LAT_UUARS) * 2);
++ spin_lock_init(&bf->lock);
++ spin_lock_init(&bf->lock32);
++ bf->uuarn = i;
++ }
++
++ return 0;
++err3:
++ for (--i; i >= 0; --i)
++ ibp_cmd_iounmap(dev->uars[i]);
++
++ kfree(dev->uars);
++err2:
++ ibp_cmd_dealloc_ucontext(dev->ibpdev, kcontext->ucontext);
++err1:
++ kfree(dev->bfs);
++err0:
++ kfree(kcontext);
++ return ret;
++}
++
++static void ibp_mlx5_destroy_kcontext(struct ibp_mlx5_device *dev)
++{
++ int i;
++
++ for (i = 0; i < dev->tot_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE; ++i)
++ ibp_cmd_iounmap(dev->uars[i]);
++
++ kfree(dev->uars);
++ ibp_cmd_dealloc_ucontext(dev->ibpdev, dev->kcontext->ucontext);
++ kfree(dev->bfs);
++ kfree(dev->kcontext);
++}
++
++int ibp_mlx5_add_one(struct ibp_device *ibpdev)
++{
++ struct ibp_mlx5_device *dev;
++ int ret;
++
++ if (ibpdev->uverbs_abi_ver != MLX5_IB_UVERBS_ABI_VERSION) {
++ print_err("ignoring %s: uverbs ABI version %d, expected %d\n",
++ ibpdev->name, ibpdev->uverbs_abi_ver,
++ MLX5_IB_UVERBS_ABI_VERSION);
++ return -EINVAL;
++ }
++
++ dev = (struct ibp_mlx5_device *)ib_alloc_device(sizeof(*dev));
++ if (!dev) {
++ print_err("ib_alloc_device failed\n");
++ return -ENOMEM;
++ }
++ dev->ibpdev = ibpdev;
++
++ ret = ibp_mlx5_create_kcontext(dev);
++ if (ret) {
++ print_err("ibp_mlx5_create_kcontext returned %d\n", ret);
++ goto err0;
++ }
++
++ ret = ibp_mlx5_register_device(dev);
++ if (ret) {
++ print_err("ibp_mlx5_register_device error %d\n", ret);
++ goto err1;
++ }
++
++ ibp_set_driver_data(ibpdev, (uintptr_t)dev);
++
++ return 0;
++err1:
++ ibp_mlx5_destroy_kcontext(dev);
++err0:
++ ib_dealloc_device(&dev->ibdevice);
++ return ret;
++}
++
++void ibp_mlx5_remove_one(struct ibp_device *ibpdev)
++{
++ struct ibp_mlx5_device *dev;
++
++ dev = (struct ibp_mlx5_device *)ibp_get_driver_data(ibpdev);
++ if (!dev)
++ return;
++
++ ibp_mlx5_unregister_device(dev);
++ ibp_set_driver_data(ibpdev, (uintptr_t)NULL);
++ ibp_mlx5_destroy_kcontext(dev);
++ ib_dealloc_device(&dev->ibdevice);
++}
++
++static u64 ibp_mlx5_resolve_one(struct ib_device *ibdev)
++{
++ return to_device(ibdev)->ibpdev->ib_device;
++}
++
++static const struct ibp_id_table ibp_mlx5_id_table[] = {
++ { PCI_VENDOR_ID_MELLANOX, 4113 }, /* Connect-IB */
++ { PCI_VENDOR_ID_MELLANOX, 4114 }, /* Connect-IB VF */
++ { PCI_VENDOR_ID_MELLANOX, 4115 }, /* ConnectX-4 */
++ { PCI_VENDOR_ID_MELLANOX, 4116 }, /* ConnectX-4 VF */
++ { PCI_VENDOR_ID_MELLANOX, 4117 }, /* ConnectX-4LX */
++ { PCI_VENDOR_ID_MELLANOX, 4118 }, /* ConnectX-4LX VF */
++ { 0, }
++};
++
++struct ibp_driver ibp_mlx5_driver = {
++ .name = DRV_NAME,
++ .id_table = ibp_mlx5_id_table,
++ .add = ibp_mlx5_add_one,
++ .remove = ibp_mlx5_remove_one,
++ .resolve = ibp_mlx5_resolve_one,
++};
++
++static int __init ibp_mlx5_init(void)
++{
++ int ret;
++
++ print_info(DRV_SIGNON);
++
++ ret = ibp_register_driver(&ibp_mlx5_driver);
++ if (ret)
++ print_err("ibp_register_driver error %d\n", ret);
++
++ return ret;
++}
++
++static void __exit ibp_mlx5_exit(void)
++{
++ ibp_unregister_driver(&ibp_mlx5_driver);
++
++ print_info(DRV_DESC " unloaded\n");
++}
++
++module_init(ibp_mlx5_init);
++module_exit(ibp_mlx5_exit);
+diff --git a/drivers/infiniband/ibp/drv/hw/mlx5/main.h b/drivers/infiniband/ibp/drv/hw/mlx5/main.h
+new file mode 100644
+index 0000000..26c37b9
+--- /dev/null
++++ b/drivers/infiniband/ibp/drv/hw/mlx5/main.h
+@@ -0,0 +1,327 @@
++/*
++ * Copyright (c) 2013-2014 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ *
++ * $Id: $
++ */
++
++#ifndef MAIN_H
++#define MAIN_H
++
++#include <linux/module.h>
++#include <linux/pci_ids.h>
++#include <linux/slab.h>
++#include <rdma/ib_verbs.h>
++#include <rdma/ib_user_verbs.h>
++#include "ibp.h"
++
++#include <linux/mlx5/cq.h>
++#include <linux/mlx5/qp.h>
++#include "hw/mlx5/user.h"
++
++#define DRV_ROLE "Mellanox ConnectX-IB HCA driver"
++#define DRV_NAME "ibp_mlx5"
++#include "compat.h"
++
++/* enum from drivers/net/ethernet/mellanox/mlx5/core/uar.c */
++enum {
++ NUM_DRIVER_UARS = 4,
++ NUM_LOW_LAT_UUARS = 4,
++};
++
++static inline void set_command(int command, phys_addr_t *offset)
++{
++ *offset |= (command << MLX5_IB_MMAP_CMD_SHIFT);
++}
++
++static inline void set_index(int index, phys_addr_t *offset)
++{
++ *offset |= index;
++}
++
++struct ibp_mlx5_db_pgdir {
++ struct list_head list;
++ DECLARE_BITMAP(bitmap, MLX5_DB_PER_PAGE);
++ void *db_page;
++ dma_addr_t db_dma;
++};
++
++struct ibp_mlx5_dbrec {
++ __be32 *db;
++ union {
++ struct ibp_mlx5_db_pgdir *pgdir;
++ struct ibp_ib_user_db_page *user_page;
++ } u;
++ dma_addr_t dma;
++ int index;
++};
++
++struct ibp_mlx5_bf {
++ void __iomem *reg;
++ void __iomem *regreg;
++ int buf_size;
++ unsigned long offset;
++ int need_lock;
++ /*
++ * protect blue flame buffer selection when needed
++ */
++ spinlock_t lock;
++
++ /*
++ * serialize 64 bit writes when done as two 32 bit accesses
++ */
++ spinlock_t lock32;
++ int uuarn;
++};
++
++struct ibp_mlx5_device {
++ struct ib_device ibdevice;
++ struct ibp_device *ibpdev;
++
++ struct ibp_mlx5_ucontext *kcontext;
++ struct ibp_mlx5_bf *bfs;
++ struct ibp_iomem **uars;
++ MLX5_DECLARE_DOORBELL_LOCK(uar_lock);
++ struct list_head pgdir_list;
++ struct mutex pgdir_mutex;
++ struct radix_tree_root qp_table_tree;
++ spinlock_t qp_table_lock;
++ struct radix_tree_root srq_table_tree;
++ spinlock_t srq_table_lock;
++ int tot_uuars;
++};
++
++struct ibp_mlx5_db {
++ struct list_head list;
++ unsigned long user_virt;
++ struct ibp_rb *page;
++ int refcnt;
++};
++
++struct ibp_mlx5_ucontext {
++ struct ib_ucontext ibucontext;
++ u64 ucontext;
++ struct list_head db_list;
++ struct mutex db_mutex;
++};
++
++struct ibp_mlx5_pd {
++ struct ib_pd ibpd;
++ u64 pd;
++ u32 pdn;
++};
++
++struct ibp_mlx5_ah {
++ struct ib_ah ibah;
++ u64 ah;
++ struct mlx5_av av;
++};
++
++struct ibp_mlx5_mr {
++ struct ib_mr ibmr;
++ u64 mr;
++};
++
++struct ibp_mlx5_cq {
++ struct ib_cq ibcq;
++ struct ibp_rb *buf;
++ struct ibp_mlx5_db *db;
++ u64 cq;
++
++ ib_comp_handler comp;
++ struct ibp_mlx5_dbrec dbrec;
++ spinlock_t lock;
++ void *cqe_buf;
++ u32 cons_index;
++ __be32 *set_ci_db;
++ __be32 *arm_db;
++ unsigned arm_sn;
++ int cqe_sz;
++ u32 cqn;
++};
++
++struct ibp_mlx5_wq {
++ u64 *wrid;
++ u32 *wr_data;
++ struct wr_list *w_list;
++ unsigned *wqe_head;
++
++ spinlock_t lock;
++ int wqe_cnt;
++ int max_post;
++ int max_gs;
++ int offset;
++ int wqe_shift;
++ unsigned head;
++ unsigned tail;
++ u16 cur_post;
++ u16 last_poll;
++ void *qend;
++};
++
++struct ibp_mlx5_qp {
++ struct ib_qp ibqp;
++ struct ibp_rb *buf;
++ struct ibp_mlx5_db *db;
++ struct ibp_mlx5_bf *bf;
++ u64 qp;
++
++ struct ibp_mlx5_dbrec dbrec;
++ struct ibp_mlx5_wq rq;
++ struct ibp_mlx5_wq sq;
++ struct mutex mutex;
++ void *wqe_buf;
++ u32 flags;
++ u32 max_inline_data;
++ u8 sq_signal_bits;
++ u8 fm_cache;
++};
++
++struct ibp_mlx5_srq {
++ struct ib_srq ibsrq;
++ struct ibp_rb *buf;
++ struct ibp_mlx5_db *db;
++ u64 srq;
++
++ struct ibp_mlx5_dbrec dbrec;
++ spinlock_t lock;
++ int head;
++ int tail;
++ u64 *wrid;
++ void *wqe_buf;
++ int wqe_shift;
++ u16 wqe_ctr;
++ int max_avail_gather;
++ int max_gs;
++ int max;
++ int srqn;
++};
++
++
++#define TO_OBJ(x) \
++static inline struct ibp_mlx5_##x *to_##x(struct ib_##x *ib##x) \
++{ \
++ return container_of(ib##x, struct ibp_mlx5_##x, ib##x); \
++}
++TO_OBJ(ucontext)
++TO_OBJ(device)
++TO_OBJ(pd)
++TO_OBJ(ah)
++TO_OBJ(mr)
++TO_OBJ(cq)
++TO_OBJ(qp)
++TO_OBJ(srq)
++
++struct ibp_mlx5_alloc_ucontext_cmd {
++ struct ibp_alloc_ucontext_cmd msg;
++ struct mlx5_ib_alloc_ucontext_req data;
++};
++
++struct ibp_mlx5_alloc_ucontext_resp {
++ struct ibp_alloc_ucontext_resp msg;
++ struct mlx5_ib_alloc_ucontext_resp data;
++};
++
++struct ibp_mlx5_alloc_pd_resp {
++ struct ibp_alloc_pd_resp msg;
++ struct mlx5_ib_alloc_pd_resp data;
++};
++
++struct ibp_mlx5_create_srq_cmd {
++ struct ibp_create_srq_cmd msg;
++ struct mlx5_ib_create_srq data;
++};
++
++struct ibp_mlx5_create_srq_resp {
++ struct ibp_create_srq_resp msg;
++ struct mlx5_ib_create_srq_resp data;
++};
++
++struct ibp_mlx5_create_qp_cmd {
++ struct ibp_create_qp_cmd msg;
++ struct mlx5_ib_create_qp data;
++};
++
++struct ibp_mlx5_create_qp_resp {
++ struct ibp_create_qp_resp msg;
++ struct mlx5_ib_create_qp_resp data;
++};
++
++struct ibp_mlx5_create_cq_cmd {
++ struct ibp_create_cq_cmd msg;
++ struct mlx5_ib_create_cq data;
++};
++
++struct ibp_mlx5_create_cq_resp {
++ struct ibp_create_cq_resp msg;
++ struct mlx5_ib_create_cq_resp data;
++};
++
++int ibp_mlx5_db_alloc(struct ibp_mlx5_device *dev,
++ struct ibp_mlx5_dbrec *db);
++void ibp_mlx5_db_free(struct ibp_mlx5_device *dev,
++ struct ibp_mlx5_dbrec *db);
++
++int ibp_mlx5_alloc_srq_buf(struct ibp_mlx5_srq *srq, int desc_size);
++void ibp_mlx5_free_srq_buf(struct ibp_mlx5_srq *srq);
++int ibp_mlx5_insert_srq(struct ibp_mlx5_device *dev, struct ibp_mlx5_srq *srq);
++void ibp_mlx5_remove_srq(struct ibp_mlx5_device *dev, struct ibp_mlx5_srq *srq);
++struct ibp_mlx5_srq *ibp_mlx5_srq_lookup(struct ibp_mlx5_device *dev, u32 srqn);
++void ibp_mlx5_free_srq_wqe(struct ibp_mlx5_srq *srq, int wqe_index);
++int ibp_mlx5_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
++ struct ib_recv_wr **bad_wr);
++
++int ibp_mlx5_alloc_cq_buf(struct ibp_mlx5_cq *cq, int nent);
++void ibp_mlx5_free_cq_buf(struct ibp_mlx5_cq *cq);
++void ibp_mlx5_lock_cqs(struct ibp_mlx5_cq *send_cq,
++ struct ibp_mlx5_cq *recv_cq);
++void ibp_mlx5_unlock_cqs(struct ibp_mlx5_cq *send_cq,
++ struct ibp_mlx5_cq *recv_cq);
++void ibp_mlx5_cq_clean(struct ibp_mlx5_cq *cq, u32 qpn,
++ struct ibp_mlx5_srq *srq);
++void __ibp_mlx5_cq_clean(struct ibp_mlx5_cq *cq, u32 rsn,
++ struct ibp_mlx5_srq *srq);
++int ibp_mlx5_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
++int ibp_mlx5_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
++void ibp_mlx5_route_cq_comp(struct ib_cq *ibcq);
++
++int ibp_mlx5_alloc_qp_buf(struct ibp_mlx5_qp *qp, struct ib_qp_init_attr *attr,
++ u64 srq);
++void ibp_mlx5_free_qp_buf(struct ibp_mlx5_qp *qp);
++int ibp_mlx5_insert_qp(struct ibp_mlx5_device *dev, struct ibp_mlx5_qp *qp);
++void ibp_mlx5_remove_qp(struct ibp_mlx5_device *dev, struct ibp_mlx5_qp *qp);
++struct ibp_mlx5_qp *ibp_mlx5_qp_lookup(struct ibp_mlx5_device *dev, u32 qpn);
++void *ibp_mlx5_get_send_wqe(struct ibp_mlx5_qp *qp, int n);
++int ibp_mlx5_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
++ struct ib_send_wr **bad_wr);
++int ibp_mlx5_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
++ struct ib_recv_wr **bad_wr);
++
++#endif /* MAIN_H */
+diff --git a/drivers/infiniband/ibp/drv/hw/mlx5/qp.c b/drivers/infiniband/ibp/drv/hw/mlx5/qp.c
+new file mode 100644
+index 0000000..e44c0ea
+--- /dev/null
++++ b/drivers/infiniband/ibp/drv/hw/mlx5/qp.c
+@@ -0,0 +1,714 @@
++/*
++ * Copyright (c) 2013-2014 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "main.h"
++
++enum {
++ MLX5_IB_SQ_STRIDE = 6,
++ MLX5_IB_CACHE_LINE_SIZE = 64,
++};
++
++static const u32 mlx5_ib_opcode[] = {
++ [IB_WR_SEND] = MLX5_OPCODE_SEND,
++ [IB_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM,
++ [IB_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE,
++ [IB_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM,
++ [IB_WR_RDMA_READ] = MLX5_OPCODE_RDMA_READ,
++ [IB_WR_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_CS,
++ [IB_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA,
++ [IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL,
++ [IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR, // not supported
++ [IB_WR_REG_MR] = MLX5_OPCODE_UMR, // not supported
++ [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS,
++ [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA,
++ [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR, // not supported
++};
++
++static void *get_wqe(struct ibp_mlx5_qp *qp, int offset)
++{
++ return qp->wqe_buf + offset;
++}
++
++static void *get_recv_wqe(struct ibp_mlx5_qp *qp, int n)
++{
++ return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
++}
++
++void *ibp_mlx5_get_send_wqe(struct ibp_mlx5_qp *qp, int n)
++{
++ return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE));
++}
++
++static int sq_overhead(enum ib_qp_type qp_type)
++{
++ int size = 0;
++
++ switch (qp_type) {
++ case IB_QPT_XRC_INI:
++ size += sizeof(struct mlx5_wqe_xrc_seg);
++ /* fall through */
++ case IB_QPT_RC:
++ size += sizeof(struct mlx5_wqe_ctrl_seg) +
++ sizeof(struct mlx5_wqe_atomic_seg) +
++ sizeof(struct mlx5_wqe_raddr_seg);
++ break;
++
++ case IB_QPT_XRC_TGT:
++ return 0;
++
++ case IB_QPT_UC:
++ size += sizeof(struct mlx5_wqe_ctrl_seg) +
++ sizeof(struct mlx5_wqe_raddr_seg) +
++ sizeof(struct mlx5_wqe_umr_ctrl_seg) +
++ sizeof(struct mlx5_mkey_seg);
++ break;
++
++ case IB_QPT_UD:
++ case IB_QPT_SMI:
++ case IB_QPT_GSI:
++ size += sizeof(struct mlx5_wqe_ctrl_seg) +
++ sizeof(struct mlx5_wqe_datagram_seg);
++ break;
++
++ default:
++ print_err("MLX5 sq_overhead bad type %d\n", qp_type);
++ return -EINVAL;
++ }
++
++ return size;
++}
++
++static int calc_send_wqe(struct ib_qp_init_attr *attr)
++{
++ int size;
++ int inl_size = 0;
++
++ size = sq_overhead(attr->qp_type);
++ if (size < 0)
++ return size;
++
++ if (attr->cap.max_inline_data) {
++ inl_size = size + sizeof(struct mlx5_wqe_inline_seg)
++ + attr->cap.max_inline_data;
++ }
++
++ size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg);
++
++ return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
++}
++
++static int ibp_mlx5_set_sq_size(struct ibp_mlx5_qp *qp,
++ struct ib_qp_init_attr *attr)
++{
++ int wqe_size, wq_size;
++
++ if (!attr->cap.max_send_wr)
++ return 0;
++
++ wqe_size = calc_send_wqe(attr);
++ if (wqe_size < 0)
++ return wqe_size;
++
++ qp->max_inline_data = wqe_size - sq_overhead(attr->qp_type)
++ - sizeof(struct mlx5_wqe_inline_seg);
++
++ wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
++ qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
++ qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
++ qp->sq.max_gs = attr->cap.max_send_sge;
++ qp->sq.max_post = wq_size / wqe_size;
++
++ return 0;
++}
++
++static int ibp_mlx5_set_rq_size(struct ibp_mlx5_qp *qp,
++ struct ib_qp_init_attr *attr, u64 srq)
++{
++ int wqe_size, wq_size;
++
++ if (srq || !attr->cap.max_recv_wr)
++ return 0;
++
++ wqe_size = attr->cap.max_recv_sge
++ * sizeof(struct mlx5_wqe_data_seg);
++ wqe_size = roundup_pow_of_two(wqe_size);
++
++ wq_size = roundup_pow_of_two(attr->cap.max_recv_wr) * wqe_size;
++ wq_size = max_t(int, wq_size, MLX5_SEND_WQE_BB);
++
++ qp->rq.wqe_cnt = wq_size / wqe_size;
++ qp->rq.wqe_shift = ilog2(wqe_size);
++ qp->rq.max_gs = (1 << qp->rq.wqe_shift)
++ / sizeof(struct mlx5_wqe_data_seg);
++ qp->rq.max_post = qp->rq.wqe_cnt;
++
++ return 0;
++}
++
++static int ibp_mlx5_alloc_qp(struct ibp_mlx5_qp *qp)
++{
++ int size;
++
++ size = PAGE_ALIGN((qp->rq.wqe_cnt << qp->rq.wqe_shift)
++ + (qp->sq.wqe_cnt << qp->sq.wqe_shift));
++
++ qp->wqe_buf = kzalloc(size, GFP_KERNEL);
++ if (!qp->wqe_buf)
++ goto err0;
++
++ qp->rq.offset = 0;
++ qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
++
++ qp->sq.qend = ibp_mlx5_get_send_wqe(qp, qp->sq.wqe_cnt);
++
++ qp->sq.wrid = kzalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wrid),
++ GFP_KERNEL);
++ if (!qp->sq.wrid)
++ goto err1;
++
++ qp->rq.wrid = kzalloc(qp->rq.wqe_cnt * sizeof(*qp->rq.wrid),
++ GFP_KERNEL);
++ if (!qp->rq.wrid)
++ goto err2;
++
++ qp->sq.w_list = kzalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.w_list),
++ GFP_KERNEL);
++ if (!qp->sq.w_list)
++ goto err3;
++
++ qp->sq.wr_data = kzalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_data),
++ GFP_KERNEL);
++ if (!qp->sq.wr_data)
++ goto err4;
++
++ qp->sq.wqe_head = kzalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wqe_head),
++ GFP_KERNEL);
++ if (!qp->sq.wqe_head)
++ goto err5;
++
++ return 0;
++err5:
++ kfree(qp->sq.wr_data);
++err4:
++ kfree(qp->sq.w_list);
++err3:
++ kfree(qp->rq.wrid);
++err2:
++ kfree(qp->sq.wrid);
++err1:
++ kfree(qp->wqe_buf);
++err0:
++ return -ENOMEM;
++}
++
++int ibp_mlx5_alloc_qp_buf(struct ibp_mlx5_qp *qp, struct ib_qp_init_attr *attr,
++ u64 srq)
++{
++ int ret;
++
++ ret = ibp_mlx5_set_rq_size(qp, attr, srq);
++ if (ret) {
++ print_err("ibp_mlx5_set_rq_size returned %d\n", ret);
++ goto err;
++ }
++
++ ret = ibp_mlx5_set_sq_size(qp, attr);
++ if (ret) {
++ print_err("ibp_mlx5_set_sq_size returned %d\n", ret);
++ goto err;
++ }
++
++ ret = ibp_mlx5_alloc_qp(qp);
++ if (ret)
++ print_err("ibp_mlx5_alloc_qp returned %d\n", ret);
++err:
++ return ret;
++}
++
++void ibp_mlx5_free_qp_buf(struct ibp_mlx5_qp *qp)
++{
++ kfree(qp->wqe_buf);
++ kfree(qp->sq.wrid);
++ kfree(qp->rq.wrid);
++ kfree(qp->sq.w_list);
++ kfree(qp->sq.wr_data);
++ kfree(qp->sq.wqe_head);
++}
++
++int ibp_mlx5_insert_qp(struct ibp_mlx5_device *dev, struct ibp_mlx5_qp *qp)
++{
++ int ret;
++
++ spin_lock_irq(&dev->qp_table_lock);
++ ret = radix_tree_insert(&dev->qp_table_tree, qp->ibqp.qp_num, qp);
++ spin_unlock_irq(&dev->qp_table_lock);
++
++ return ret;
++}
++
++void ibp_mlx5_remove_qp(struct ibp_mlx5_device *dev, struct ibp_mlx5_qp *qp)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&dev->qp_table_lock, flags);
++ radix_tree_delete(&dev->qp_table_tree, qp->ibqp.qp_num);
++ spin_unlock_irqrestore(&dev->qp_table_lock, flags);
++}
++
++struct ibp_mlx5_qp *ibp_mlx5_qp_lookup(struct ibp_mlx5_device *dev, u32 qpn)
++{
++ struct ibp_mlx5_qp *qp;
++ unsigned long flags;
++
++ spin_lock_irqsave(&dev->qp_table_lock, flags);
++ qp = radix_tree_lookup(&dev->qp_table_tree, qpn);
++ spin_unlock_irqrestore(&dev->qp_table_lock, flags);
++
++ return qp;
++}
++
++static int mlx5_wq_overflow(struct ibp_mlx5_wq *wq, int nreq, struct ib_cq *ib_cq)
++{
++ struct ibp_mlx5_cq *cq;
++ unsigned cur;
++
++ cur = wq->head - wq->tail;
++ if (likely(cur + nreq < wq->max_post))
++ return 0;
++
++ cq = to_cq(ib_cq);
++ spin_lock(&cq->lock);
++ cur = wq->head - wq->tail;
++ spin_unlock(&cq->lock);
++
++ return cur + nreq >= wq->max_post;
++}
++
++static __be32 send_ieth(struct ib_send_wr *wr)
++{
++ switch (wr->opcode) {
++ case IB_WR_SEND_WITH_IMM:
++ case IB_WR_RDMA_WRITE_WITH_IMM:
++ return wr->ex.imm_data;
++
++ case IB_WR_SEND_WITH_INV:
++ return cpu_to_be32(wr->ex.invalidate_rkey);
++
++ default:
++ return 0;
++ }
++}
++
++static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
++ u64 remote_addr, u32 rkey)
++{
++ rseg->raddr = cpu_to_be64(remote_addr);
++ rseg->rkey = cpu_to_be32(rkey);
++ rseg->reserved = 0;
++}
++
++static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
++ struct ib_send_wr *wr)
++{
++ memcpy(&dseg->av, &to_ah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av));
++ dseg->av.dqp_dct = cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV);
++ dseg->av.key.qkey.qkey = cpu_to_be32(ud_wr(wr)->remote_qkey);
++}
++
++static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
++{
++ dseg->byte_count = cpu_to_be32(sg->length);
++ dseg->lkey = cpu_to_be32(sg->lkey);
++ dseg->addr = cpu_to_be64(sg->addr);
++}
++
++static int set_data_inl_seg(struct ibp_mlx5_qp *qp, struct ib_send_wr *wr,
++ void *wqe, int *sz)
++{
++ struct mlx5_wqe_inline_seg *seg;
++ void *qend = qp->sq.qend;
++ void *addr;
++ int inl = 0;
++ int copy;
++ int len;
++ int i;
++
++ seg = wqe;
++ wqe += sizeof(*seg);
++ for (i = 0; i < wr->num_sge; i++) {
++ addr = (void *)(unsigned long)(wr->sg_list[i].addr);
++ len = wr->sg_list[i].length;
++ inl += len;
++
++ if (unlikely(inl > qp->max_inline_data))
++ return -ENOMEM;
++
++ if (unlikely(wqe + len > qend)) {
++ copy = qend - wqe;
++ memcpy(wqe, addr, copy);
++ addr += copy;
++ len -= copy;
++ wqe = ibp_mlx5_get_send_wqe(qp, 0);
++ }
++ memcpy(wqe, addr, len);
++ wqe += len;
++ }
++
++ seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
++
++ *sz = ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
++
++ return 0;
++}
++
++static u8 get_fence(u8 fence, struct ib_send_wr *wr)
++{
++ if (unlikely(wr->opcode == IB_WR_LOCAL_INV &&
++ wr->send_flags & IB_SEND_FENCE))
++ return MLX5_FENCE_MODE_STRONG_ORDERING;
++
++ if (unlikely(fence)) {
++ if (wr->send_flags & IB_SEND_FENCE)
++ return MLX5_FENCE_MODE_SMALL_AND_FENCE;
++ else
++ return fence;
++ } else {
++ return 0;
++ }
++}
++
++
++static int begin_wqe(struct ibp_mlx5_qp *qp, void **seg,
++ struct mlx5_wqe_ctrl_seg **ctrl,
++ struct ib_send_wr *wr, int *idx,
++ int *size, int nreq)
++{
++ int err = 0;
++
++ if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) {
++ print_err("wq overflow\n");
++ err = -ENOMEM;
++ return err;
++ }
++
++ *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
++ *seg = ibp_mlx5_get_send_wqe(qp, *idx);
++ *ctrl = *seg;
++ *(uint32_t *)(*seg + 8) = 0;
++ (*ctrl)->imm = send_ieth(wr);
++ (*ctrl)->fm_ce_se = qp->sq_signal_bits |
++ (wr->send_flags & IB_SEND_SIGNALED ?
++ MLX5_WQE_CTRL_CQ_UPDATE : 0) |
++ (wr->send_flags & IB_SEND_SOLICITED ?
++ MLX5_WQE_CTRL_SOLICITED : 0);
++
++ *seg += sizeof(**ctrl);
++ *size = sizeof(**ctrl) / 16;
++
++ return err;
++}
++
++static void finish_wqe(struct ibp_mlx5_qp *qp,
++ struct mlx5_wqe_ctrl_seg *ctrl,
++ u8 size, unsigned idx, u64 wr_id,
++ int nreq, u8 fence, u8 next_fence,
++ u32 mlx5_opcode)
++{
++ u8 opmod = 0;
++
++ ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
++ mlx5_opcode | ((u32)opmod << 24));
++ ctrl->qpn_ds = cpu_to_be32(size | (qp->ibqp.qp_num << 8));
++ ctrl->fm_ce_se |= fence;
++ qp->fm_cache = next_fence;
++
++ qp->sq.wrid[idx] = wr_id;
++ qp->sq.w_list[idx].opcode = mlx5_opcode;
++ qp->sq.wqe_head[idx] = qp->sq.head + nreq;
++ qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
++ qp->sq.w_list[idx].next = qp->sq.cur_post;
++}
++
++int ibp_mlx5_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
++ struct ib_send_wr **bad_wr)
++{
++ struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */
++ struct ibp_mlx5_qp *qp = to_qp(ibqp);
++ struct mlx5_wqe_data_seg *dpseg;
++ struct mlx5_wqe_xrc_seg *xrc;
++ struct ibp_mlx5_bf *bf = qp->bf;
++ int uninitialized_var(size);
++ void *qend = qp->sq.qend;
++ unsigned long flags;
++ unsigned idx;
++ int err = 0;
++ int inl = 0;
++ int num_sge;
++ void *seg;
++ int nreq;
++ int i;
++ u8 next_fence = 0;
++ u8 fence;
++
++ spin_lock_irqsave(&qp->sq.lock, flags);
++
++ for (nreq = 0; wr; ++nreq, wr = wr->next) {
++ if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
++ print_err("invalid opcode\n");
++ err = -EINVAL;
++ *bad_wr = wr;
++ goto out;
++ }
++
++ fence = qp->fm_cache;
++ num_sge = wr->num_sge;
++ if (unlikely(num_sge > qp->sq.max_gs)) {
++ print_err("num_sge > max_gs\n");
++ err = -ENOMEM;
++ *bad_wr = wr;
++ goto out;
++ }
++
++ err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq);
++ if (err) {
++ print_err("begin_wqe error %d\n", err);
++ err = -ENOMEM;
++ *bad_wr = wr;
++ goto out;
++ }
++
++ switch (ibqp->qp_type) {
++ case IB_QPT_XRC_INI:
++ xrc = seg;
++ seg += sizeof(*xrc);
++ size += sizeof(*xrc) / 16;
++ /* fall through */
++ case IB_QPT_RC:
++ switch (wr->opcode) {
++ case IB_WR_RDMA_READ:
++ case IB_WR_RDMA_WRITE:
++ case IB_WR_RDMA_WRITE_WITH_IMM:
++ set_raddr_seg(seg, rdma_wr(wr)->remote_addr,
++ rdma_wr(wr)->rkey);
++ seg += sizeof(struct mlx5_wqe_raddr_seg);
++ size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
++ break;
++
++ case IB_WR_ATOMIC_CMP_AND_SWP:
++ case IB_WR_ATOMIC_FETCH_AND_ADD:
++ case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
++ print_err("Atomic ops are not supported\n");
++ err = -ENOSYS;
++ *bad_wr = wr;
++ goto out;
++
++ /*
++ * It does not appear that these can be supported.
++ * They require a pd->pa_lkey which is not created.
++ */
++ case IB_WR_LOCAL_INV:
++ case IB_WR_REG_MR:
++ print_err("opcode %d is not supported\n",
++ wr->opcode);
++ err = -ENOSYS;
++ *bad_wr = wr;
++ goto out;
++
++ default:
++ break;
++ }
++ break;
++
++ case IB_QPT_UC:
++ switch (wr->opcode) {
++ case IB_WR_RDMA_WRITE:
++ case IB_WR_RDMA_WRITE_WITH_IMM:
++ set_raddr_seg(seg, rdma_wr(wr)->remote_addr,
++ rdma_wr(wr)->rkey);
++ seg += sizeof(struct mlx5_wqe_raddr_seg);
++ size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
++ break;
++
++ default:
++ break;
++ }
++ break;
++
++ case IB_QPT_UD:
++ case IB_QPT_SMI:
++ case IB_QPT_GSI:
++ set_datagram_seg(seg, wr);
++ seg += sizeof(struct mlx5_wqe_datagram_seg);
++ size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
++ if (unlikely((seg == qend)))
++ seg = ibp_mlx5_get_send_wqe(qp, 0);
++ break;
++
++ /*
++ * IB_QPT_REG_UMR QPs cannot be created thru ib_verbs
++ */
++
++ default:
++ break;
++ }
++
++ if (wr->send_flags & IB_SEND_INLINE && num_sge) {
++ int uninitialized_var(sz);
++
++ err = set_data_inl_seg(qp, wr, seg, &sz);
++ if (unlikely(err)) {
++ print_err("set_data_inl_seg err %d\n", err);
++ *bad_wr = wr;
++ goto out;
++ }
++ inl = 1;
++ size += sz;
++ } else {
++ dpseg = seg;
++ for (i = 0; i < num_sge; i++) {
++ if (unlikely(dpseg == qend)) {
++ seg = ibp_mlx5_get_send_wqe(qp, 0);
++ dpseg = seg;
++ }
++ if (likely(wr->sg_list[i].length)) {
++ set_data_ptr_seg(dpseg, wr->sg_list + i);
++ size += sizeof(struct mlx5_wqe_data_seg) / 16;
++ dpseg++;
++ }
++ }
++ }
++
++ finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
++ get_fence(fence, wr), next_fence,
++ mlx5_ib_opcode[wr->opcode]);
++ }
++
++out:
++ if (likely(nreq)) {
++ qp->sq.head += nreq;
++
++ /*
++ * Make sure that descriptors are written before
++ * updating doorbell record and ringing the doorbell
++ */
++ wmb();
++
++ qp->dbrec.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
++
++ /*
++ * Make sure doorbell record is visible to the HCA before
++ * we hit doorbell
++ */
++ wmb();
++
++ if (bf->need_lock)
++ spin_lock(&bf->lock);
++
++ mlx5_write64((__be32 *)ctrl, bf->regreg + bf->offset,
++ MLX5_GET_DOORBELL_LOCK(&bf->lock32));
++ /*
++ * Make sure doorbells don't leak out of SQ spinlock
++ * and reach the HCA out of order.
++ */
++ mmiowb();
++
++ bf->offset ^= bf->buf_size;
++ if (bf->need_lock)
++ spin_unlock(&bf->lock);
++ }
++
++ spin_unlock_irqrestore(&qp->sq.lock, flags);
++
++ return err;
++}
++
++int ibp_mlx5_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
++ struct ib_recv_wr **bad_wr)
++{
++ struct ibp_mlx5_qp *qp = to_qp(ibqp);
++ struct mlx5_wqe_data_seg *scat;
++ unsigned long flags;
++ int err = 0;
++ int nreq;
++ int ind;
++ int i;
++
++ spin_lock_irqsave(&qp->rq.lock, flags);
++
++ ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
++
++ for (nreq = 0; wr; ++nreq, wr = wr->next) {
++ if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
++ err = -ENOMEM;
++ *bad_wr = wr;
++ goto out;
++ }
++
++ if (unlikely(wr->num_sge > qp->rq.max_gs)) {
++ err = -EINVAL;
++ *bad_wr = wr;
++ goto out;
++ }
++
++ scat = get_recv_wqe(qp, ind);
++
++ for (i = 0; i < wr->num_sge; ++i)
++ set_data_ptr_seg(scat + i, wr->sg_list + i);
++
++ if (i < qp->rq.max_gs) {
++ scat[i].byte_count = 0;
++ scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY);
++ scat[i].addr = 0;
++ }
++
++ qp->rq.wrid[ind] = wr->wr_id;
++
++ ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
++ }
++
++out:
++ if (likely(nreq)) {
++ qp->rq.head += nreq;
++
++ /*
++ * Make sure that descriptors are written before
++ * doorbell record.
++ */
++ wmb();
++
++ *qp->dbrec.db = cpu_to_be32(qp->rq.head & 0xffff);
++ }
++
++ spin_unlock_irqrestore(&qp->rq.lock, flags);
++
++ return err;
++}
+diff --git a/drivers/infiniband/ibp/drv/hw/mlx5/srq.c b/drivers/infiniband/ibp/drv/hw/mlx5/srq.c
+new file mode 100644
+index 0000000..c9a5524
+--- /dev/null
++++ b/drivers/infiniband/ibp/drv/hw/mlx5/srq.c
+@@ -0,0 +1,185 @@
++/*
++ * Copyright (c) 2013-2014 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "main.h"
++
++static void *ibp_mlx5_get_srq_wqe(struct ibp_mlx5_srq *srq, int n)
++{
++ return srq->wqe_buf + (n << srq->wqe_shift);
++}
++
++int ibp_mlx5_alloc_srq_buf(struct ibp_mlx5_srq *srq, int desc_size)
++{
++ struct mlx5_wqe_srq_next_seg *next;
++ int size, i;
++
++ srq->wrid = kzalloc(srq->max * sizeof(*srq->wrid), GFP_KERNEL);
++ if (!srq->wrid)
++ return -ENOMEM;
++
++ size = ALIGN(srq->max * desc_size, PAGE_SIZE);
++ srq->wqe_buf = kzalloc(size, GFP_KERNEL);
++ if (!srq->wqe_buf) {
++ kfree(srq->wrid);
++ return -ENOMEM;
++ }
++
++ srq->head = 0;
++ srq->tail = srq->max - 1;
++ srq->wqe_ctr = 0;
++
++ for (i = 0; i < srq->max; i++) {
++ next = ibp_mlx5_get_srq_wqe(srq, i);
++ next->next_wqe_index = cpu_to_be16((i + 1) & (srq->max - 1));
++ }
++
++ return 0;
++}
++
++void ibp_mlx5_free_srq_buf(struct ibp_mlx5_srq *srq)
++{
++ kfree(srq->wrid);
++ kfree(srq->wqe_buf);
++}
++
++static void *get_wqe(struct ibp_mlx5_srq *srq, int n)
++{
++ return srq->wqe_buf + (n << srq->wqe_shift);
++}
++
++void ibp_mlx5_free_srq_wqe(struct ibp_mlx5_srq *srq, int wqe_index)
++{
++ struct mlx5_wqe_srq_next_seg *next;
++
++ /* always called with interrupts disabled. */
++ spin_lock(&srq->lock);
++
++ next = get_wqe(srq, srq->tail);
++ next->next_wqe_index = cpu_to_be16(wqe_index);
++ srq->tail = wqe_index;
++
++ spin_unlock(&srq->lock);
++}
++
++int ibp_mlx5_insert_srq(struct ibp_mlx5_device *dev, struct ibp_mlx5_srq *srq)
++{
++ int ret;
++
++ spin_lock_irq(&dev->srq_table_lock);
++ ret = radix_tree_insert(&dev->srq_table_tree, srq->srqn, srq);
++ spin_unlock_irq(&dev->srq_table_lock);
++
++ return ret;
++}
++
++void ibp_mlx5_remove_srq(struct ibp_mlx5_device *dev, struct ibp_mlx5_srq *srq)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&dev->srq_table_lock, flags);
++ radix_tree_delete(&dev->srq_table_tree, srq->srqn);
++ spin_unlock_irqrestore(&dev->srq_table_lock, flags);
++}
++
++struct ibp_mlx5_srq *ibp_mlx5_srq_lookup(struct ibp_mlx5_device *dev, u32 srqn)
++{
++ unsigned long flags;
++ struct ibp_mlx5_srq *srq;
++
++ spin_lock_irqsave(&dev->srq_table_lock, flags);
++ srq = radix_tree_lookup(&dev->srq_table_tree, srqn);
++ spin_unlock_irqrestore(&dev->srq_table_lock, flags);
++
++ return srq;
++}
++
++int ibp_mlx5_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
++ struct ib_recv_wr **bad_wr)
++{
++ struct ibp_mlx5_srq *srq = to_srq(ibsrq);
++ struct mlx5_wqe_srq_next_seg *next;
++ struct mlx5_wqe_data_seg *scat;
++ unsigned long flags;
++ int err = 0;
++ int nreq;
++ int i;
++
++ spin_lock_irqsave(&srq->lock, flags);
++
++ for (nreq = 0; wr; nreq++, wr = wr->next) {
++ if (unlikely(wr->num_sge > srq->max_gs)) {
++ err = -EINVAL;
++ *bad_wr = wr;
++ break;
++ }
++
++ if (unlikely(srq->head == srq->tail)) {
++ err = -ENOMEM;
++ *bad_wr = wr;
++ break;
++ }
++
++ srq->wrid[srq->head] = wr->wr_id;
++
++ next = ibp_mlx5_get_srq_wqe(srq, srq->head);
++ srq->head = be16_to_cpu(next->next_wqe_index);
++ scat = (struct mlx5_wqe_data_seg *) (next + 1);
++
++ for (i = 0; i < wr->num_sge; i++) {
++ scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length);
++ scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey);
++ scat[i].addr = cpu_to_be64(wr->sg_list[i].addr);
++ }
++
++ if (i < srq->max_avail_gather) {
++ scat[i].byte_count = 0;
++ scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY);
++ scat[i].addr = 0;
++ }
++ }
++
++ if (likely(nreq)) {
++ srq->wqe_ctr += nreq;
++
++ /*
++ * Make sure that descriptors are written before
++ * doorbell record.
++ */
++ wmb();
++
++ *srq->dbrec.db = cpu_to_be32(srq->wqe_ctr);
++ }
++
++ spin_unlock_irqrestore(&srq->lock, flags);
++
++ return err;
++}
+diff --git a/drivers/infiniband/ibp/drv/ibp-abi.h b/drivers/infiniband/ibp/drv/ibp-abi.h
+new file mode 100644
+index 0000000..fa8a1a9
+--- /dev/null
++++ b/drivers/infiniband/ibp/drv/ibp-abi.h
+@@ -0,0 +1,649 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef IBP_ABI_H
++#define IBP_ABI_H
++
++#include <linux/types.h>
++
++/* Increment this value if any changes break compatibility. */
++#define IBP_ABI_VERSION 2
++
++/* Client to server message enums. */
++enum {
++ IBP_VERB_GET_PROTOCOL_STATS,
++ IBP_VERB_QUERY_DEVICE,
++ IBP_VERB_QUERY_PORT,
++ IBP_VERB_GET_LINK_LAYER,
++ IBP_VERB_QUERY_GID,
++ IBP_VERB_QUERY_PKEY,
++ IBP_VERB_MODIFY_DEVICE,
++ IBP_VERB_MODIFY_PORT,
++ IBP_VERB_ALLOC_UCONTEXT,
++ IBP_VERB_DEALLOC_UCONTEXT,
++ IBP_VERB_REG_BUF,
++ IBP_VERB_DEREG_BUF,
++ IBP_VERB_MMAP,
++ IBP_VERB_UNMMAP,
++ IBP_VERB_ALLOC_PD,
++ IBP_VERB_DEALLOC_PD,
++ IBP_VERB_CREATE_AH,
++ IBP_VERB_MODIFY_AH,
++ IBP_VERB_QUERY_AH,
++ IBP_VERB_DESTROY_AH,
++ IBP_VERB_CREATE_SRQ,
++ IBP_VERB_MODIFY_SRQ,
++ IBP_VERB_QUERY_SRQ,
++ IBP_VERB_DESTROY_SRQ,
++ IBP_VERB_POST_SRQ_RECV,
++ IBP_VERB_CREATE_QP,
++ IBP_VERB_MODIFY_QP,
++ IBP_VERB_QUERY_QP,
++ IBP_VERB_DESTROY_QP,
++ IBP_VERB_POST_SEND,
++ IBP_VERB_POST_RECV,
++ IBP_VERB_CREATE_CQ,
++ IBP_VERB_MODIFY_CQ,
++ IBP_VERB_DESTROY_CQ,
++ IBP_VERB_RESIZE_CQ,
++ IBP_VERB_POLL_CQ,
++ IBP_VERB_PEEK_CQ,
++ IBP_VERB_REQ_NOTIFY_CQ,
++ IBP_VERB_REQ_NCOMP_NOTIF,
++ IBP_VERB_GET_DMA_MR,
++ IBP_VERB_REG_PHYS_MR,
++ IBP_VERB_REG_USER_MR,
++ IBP_VERB_QUERY_MR,
++ IBP_VERB_DEREG_MR,
++ IBP_VERB_ALLOC_FAST_REG_MR,
++ IBP_VERB_ALLOC_FAST_REG_PAGE_LIST,
++ IBP_VERB_FREE_FAST_REG_PAGE_LIST,
++ IBP_VERB_REREG_PHYS_MR,
++ IBP_VERB_ALLOC_MW,
++ IBP_VERB_BIND_MW,
++ IBP_VERB_DEALLOC_MW,
++ IBP_VERB_ALLOC_FMR,
++ IBP_VERB_MAP_PHYS_FMR,
++ IBP_VERB_UNMAP_FMR,
++ IBP_VERB_DEALLOC_FMR,
++ IBP_VERB_ATTACH_MCAST,
++ IBP_VERB_DETACH_MCAST,
++ IBP_VERB_PROCESS_MAD,
++ IBP_VERB_ALLOC_XRCD,
++ IBP_VERB_DEALLOC_XRCD,
++};
++
++/* Server to client message enums. */
++enum {
++ IBP_ADD_DEVICE,
++ IBP_REMOVE_DEVICE,
++ IBP_VERB_RESPONSE,
++ IBP_QUEUED_RESPONSE,
++ IBP_ASYNC_EVENT,
++ IBP_CQ_COMP,
++};
++
++/*
++ * Make sure that all structs defined in this file are laid out to pack
++ * the same way on different architectures to avoid incompatibility.
++ *
++ * Specifically:
++ * - Do not use pointer types -- pass pointers in a u64 instead.
++ * - Make sure that any structure larger than 4 bytes is padded
++ * to a multiple of 8 bytes; otherwise the structure size may
++ * be different between architectures.
++ */
++
++struct ibp_msg_header { /* present in all messages */
++ u32 opcode;
++ u32 length;
++ u32 status;
++ u32 reserved;
++ u64 device;
++ u64 request;
++ u64 data[0];
++};
++
++#define IBP_DEVICE_NAME_MAX 64
++
++struct ibp_add_device {
++ u8 name[IBP_DEVICE_NAME_MAX];
++ u32 vendor_id;
++ u32 device_id;
++ u64 ib_device;
++ u64 device;
++ __be64 node_guid;
++ u64 uverbs_cmd_mask;
++ u32 uverbs_abi_ver;
++ u32 ibp_abi_ver;
++ u32 num_comp_vectors;
++ u8 phys_port_cnt;
++ u8 reserved[7];
++};
++
++struct ibp_add_device_msg {
++ struct ibp_msg_header header;
++ struct ibp_add_device data;
++};
++
++struct ibp_remove_device_msg {
++ struct ibp_msg_header header;
++};
++
++struct ibp_verb_response_msg {
++ struct ibp_msg_header header;
++ u64 data[0];
++};
++
++struct ibp_queued_response_msg {
++ struct ibp_msg_header header;
++ u64 data[0];
++};
++
++struct ibp_async_event {
++ u64 ibdev;
++ u64 context;
++ u32 type;
++ u8 reserved[4];
++};
++
++struct ibp_async_event_msg {
++ struct ibp_msg_header header;
++ struct ibp_async_event data;
++};
++
++struct ibp_cq_comp {
++ u64 cq_context;
++};
++
++struct ibp_cq_comp_msg {
++ struct ibp_msg_header header;
++ struct ibp_cq_comp data;
++};
++
++struct ibp_alloc_ucontext_cmd {
++ struct ibp_msg_header header;
++ u64 ibdev;
++ u64 data[0];
++};
++
++struct ibp_alloc_ucontext_resp {
++ u64 ucontext;
++ u64 data[0];
++};
++
++struct ibp_dealloc_ucontext_cmd {
++ struct ibp_msg_header header;
++ u64 ucontext;
++};
++
++struct ibp_mmap_cmd {
++ struct ibp_msg_header header;
++ u64 len;
++ u64 prot;
++ u64 flags;
++ u64 pgoff;
++ u64 ucontext;
++};
++
++struct ibp_mmap_resp {
++ u64 mmap;
++ u64 scif_addr;
++};
++
++struct ibp_unmmap_cmd {
++ struct ibp_msg_header header;
++ u64 mmap;
++};
++
++struct ibp_reg_buf_cmd {
++ struct ibp_msg_header header;
++ u64 ucontext;
++ u64 virt_addr;
++ u64 scif_addr;
++ u64 length;
++ u32 offset;
++ u32 access;
++};
++
++struct ibp_reg_buf_resp {
++ u64 reg;
++};
++
++struct ibp_dereg_buf_cmd {
++ struct ibp_msg_header header;
++ u64 reg;
++};
++
++struct ibp_query_device_cmd {
++ struct ibp_msg_header header;
++};
++
++struct ibp_query_device_resp {
++ u64 fw_ver;
++ __be64 sys_image_guid;
++ u64 max_mr_size;
++ u64 page_size_cap;
++ u32 vendor_id;
++ u32 vendor_part_id;
++ u32 hw_ver;
++ u32 max_qp;
++ u32 max_qp_wr;
++ u32 device_cap_flags;
++ u32 max_sge;
++ u32 max_sge_rd;
++ u32 max_cq;
++ u32 max_cqe;
++ u32 max_mr;
++ u32 max_pd;
++ u32 max_qp_rd_atom;
++ u32 max_ee_rd_atom;
++ u32 max_res_rd_atom;
++ u32 max_qp_init_rd_atom;
++ u32 max_ee_init_rd_atom;
++ u32 atomic_cap;
++ u32 masked_atomic_cap;
++ u32 max_ee;
++ u32 max_rdd;
++ u32 max_mw;
++ u32 max_raw_ipv6_qp;
++ u32 max_raw_ethy_qp;
++ u32 max_mcast_grp;
++ u32 max_mcast_qp_attach;
++ u32 max_total_mcast_qp_attach;
++ u32 max_ah;
++ u32 max_fmr;
++ u32 max_map_per_fmr;
++ u32 max_srq;
++ u32 max_srq_wr;
++ u32 max_srq_sge;
++ u32 max_fast_reg_page_list_len;
++ u16 max_pkeys;
++ u8 local_ca_ack_delay;
++ u8 reserved[5];
++};
++
++struct ibp_query_port_cmd {
++ struct ibp_msg_header header;
++ u8 port_num;
++ u8 reserved[7];
++};
++
++struct ibp_query_port_resp {
++ u32 port_cap_flags;
++ u32 max_msg_sz;
++ u32 bad_pkey_cntr;
++ u32 qkey_viol_cntr;
++ u32 gid_tbl_len;
++ u16 pkey_tbl_len;
++ u16 lid;
++ u16 sm_lid;
++ u8 state;
++ u8 max_mtu;
++ u8 active_mtu;
++ u8 lmc;
++ u8 max_vl_num;
++ u8 sm_sl;
++ u8 subnet_timeout;
++ u8 init_type_reply;
++ u8 active_width;
++ u8 active_speed;
++ u8 phys_state;
++ u8 link_layer;
++ u8 reserved[2];
++};
++
++struct ibp_query_gid_cmd {
++ struct ibp_msg_header header;
++ u32 index;
++ u8 port_num;
++ u8 reserved[3];
++};
++
++struct ibp_query_gid_resp {
++ __be64 subnet_prefix;
++ __be64 interface_id;
++};
++
++struct ibp_query_pkey_cmd {
++ struct ibp_msg_header header;
++ u32 index;
++ u8 port_num;
++ u8 reserved[3];
++};
++
++struct ibp_query_pkey_resp {
++ u16 pkey;
++ u8 reserved[6];
++};
++
++struct ibp_alloc_pd_cmd {
++ struct ibp_msg_header header;
++ u64 ucontext;
++ u64 data[0];
++};
++
++struct ibp_alloc_pd_resp {
++ u64 pd;
++ u64 data[0];
++};
++
++struct ibp_dealloc_pd_cmd {
++ struct ibp_msg_header header;
++ u64 pd;
++};
++
++struct ibp_global_route {
++ __be64 dgid_subnet_prefix;
++ __be64 dgid_interface_id;
++ u32 flow_label;
++ u8 sgid_index;
++ u8 hop_limit;
++ u8 traffic_class;
++ u8 reserved[1];
++};
++
++struct ibp_ah_attr {
++ struct ibp_global_route grh;
++ u16 dlid;
++ u8 sl;
++ u8 src_path_bits;
++ u8 static_rate;
++ u8 ah_flags;
++ u8 port_num;
++ u8 reserved[1];
++};
++
++struct ibp_create_ah_cmd {
++ struct ibp_msg_header header;
++ u64 pd;
++ struct ibp_ah_attr ah_attr;
++};
++
++struct ibp_create_ah_resp {
++ u64 ah;
++};
++
++struct ibp_query_ah_cmd {
++ struct ibp_msg_header header;
++ u64 ah;
++};
++
++struct ibp_query_ah_resp {
++ struct ibp_ah_attr attr;
++};
++
++struct ibp_destroy_ah_cmd {
++ struct ibp_msg_header header;
++ u64 ah;
++};
++
++struct ibp_srq_attr {
++ u32 max_wr;
++ u32 max_sge;
++ u32 srq_limit;
++ u8 reserved[4];
++};
++
++struct ibp_create_srq_cmd {
++ struct ibp_msg_header header;
++ u64 pd;
++ u64 srq_context;
++ struct ibp_srq_attr attr;
++ u64 data[0];
++};
++
++struct ibp_create_srq_resp {
++ u64 srq;
++ struct ibp_srq_attr attr;
++ u64 data[0];
++};
++
++struct ibp_query_srq_cmd {
++ struct ibp_msg_header header;
++ u64 srq;
++};
++
++struct ibp_query_srq_resp {
++ struct ibp_srq_attr attr;
++};
++
++struct ibp_modify_srq_cmd {
++ struct ibp_msg_header header;
++ u64 srq;
++ struct ibp_srq_attr attr;
++ u32 srq_attr_mask;
++ u8 reserved[4];
++ u64 data[0];
++};
++
++struct ibp_modify_srq_resp {
++ struct ibp_srq_attr attr;
++ u64 data[0];
++};
++
++struct ibp_destroy_srq_cmd {
++ struct ibp_msg_header header;
++ u64 srq;
++};
++
++struct ibp_qp_cap {
++ u32 max_send_wr;
++ u32 max_recv_wr;
++ u32 max_send_sge;
++ u32 max_recv_sge;
++ u32 max_inline_data;
++ u8 reserved[4];
++};
++
++struct ibp_create_qp_cmd {
++ struct ibp_msg_header header;
++ u64 pd;
++ u64 send_cq;
++ u64 recv_cq;
++ u64 srq;
++ u64 xrc_domain;
++ u64 qp_context;
++ struct ibp_qp_cap cap;
++ u8 sq_sig_type;
++ u8 qp_type;
++ u8 create_flags;
++ u8 port_num;
++ u64 data[0];
++};
++
++struct ibp_create_qp_resp {
++ u64 qp;
++ struct ibp_qp_cap cap;
++ u32 qpn;
++ u8 reserved[4];
++ u64 data[0];
++};
++
++struct ibp_query_qp_cmd {
++ struct ibp_msg_header header;
++ u64 qp;
++ u32 qp_attr_mask;
++ u8 reserved[4];
++};
++
++struct ibp_query_qp_resp {
++ u32 qp_state;
++ u32 cur_qp_state;
++ u32 path_mtu;
++ u32 path_mig_state;
++ u32 qkey;
++ u32 rq_psn;
++ u32 sq_psn;
++ u32 dest_qp_num;
++ u32 qp_access_flags;
++ u32 init_create_flags;
++ struct ibp_qp_cap init_cap;
++ struct ibp_qp_cap cap;
++ struct ibp_ah_attr ah;
++ struct ibp_ah_attr alt_ah;
++ u16 pkey_index;
++ u16 alt_pkey_index;
++ u8 en_sqd_async_notify;
++ u8 sq_draining;
++ u8 max_rd_atomic;
++ u8 max_dest_rd_atomic;
++ u8 min_rnr_timer;
++ u8 port_num;
++ u8 timeout;
++ u8 retry_cnt;
++ u8 rnr_retry;
++ u8 alt_port_num;
++ u8 alt_timeout;
++ u8 init_sq_sig_type;
++};
++
++struct ibp_modify_qp_cmd {
++ struct ibp_msg_header header;
++ u64 qp;
++ u32 qp_attr_mask;
++ u32 qp_state;
++ u32 cur_qp_state;
++ u32 path_mtu;
++ u32 path_mig_state;
++ u32 qkey;
++ u32 rq_psn;
++ u32 sq_psn;
++ u32 dest_qp_num;
++ u32 qp_access_flags;
++ struct ibp_qp_cap cap;
++ struct ibp_ah_attr ah;
++ struct ibp_ah_attr alt_ah;
++ u16 pkey_index;
++ u16 alt_pkey_index;
++ u8 en_sqd_async_notify;
++ u8 sq_draining;
++ u8 max_rd_atomic;
++ u8 max_dest_rd_atomic;
++ u8 min_rnr_timer;
++ u8 port_num;
++ u8 timeout;
++ u8 retry_cnt;
++ u8 rnr_retry;
++ u8 alt_port_num;
++ u8 alt_timeout;
++ u8 reserved[1];
++ u64 data[0];
++};
++
++struct ibp_modify_qp_resp {
++ struct ibp_qp_cap cap;
++ u64 data[0];
++};
++
++struct ibp_destroy_qp_cmd {
++ struct ibp_msg_header header;
++ u64 qp;
++};
++
++struct ibp_create_cq_cmd {
++ struct ibp_msg_header header;
++ u64 ucontext;
++ u64 cq_context;
++ u32 cqe;
++ u32 vector;
++ u64 data[0];
++};
++
++struct ibp_create_cq_resp {
++ u64 cq;
++ u32 cqe;
++ u8 reserved[4];
++ u64 data[0];
++};
++
++struct ibp_resize_cq_cmd {
++ struct ibp_msg_header header;
++ u64 cq;
++ u32 cqe;
++ u8 reserved[4];
++ u64 data[0];
++};
++
++struct ibp_resize_cq_resp {
++ u32 cqe;
++ u8 reserved[4];
++ u64 data[0];
++};
++
++struct ibp_destroy_cq_cmd {
++ struct ibp_msg_header header;
++ u64 cq;
++};
++
++struct ibp_reg_user_mr_cmd {
++ struct ibp_msg_header header;
++ u64 pd;
++ u64 hca_va;
++ u64 scif_addr;
++ u64 length;
++ u32 offset;
++ u32 access;
++ u64 data[0];
++};
++
++struct ibp_reg_user_mr_resp {
++ u64 mr;
++ u32 lkey;
++ u32 rkey;
++ u64 data[0];
++};
++
++struct ibp_dereg_mr_cmd {
++ struct ibp_msg_header header;
++ u64 mr;
++};
++
++struct ibp_attach_mcast_cmd {
++ struct ibp_msg_header header;
++ u64 qp;
++ __be64 subnet_prefix;
++ __be64 interface_id;
++ u16 lid;
++ u8 data[6];
++};
++
++struct ibp_detach_mcast_cmd {
++ struct ibp_msg_header header;
++ u64 qp;
++ __be64 subnet_prefix;
++ __be64 interface_id;
++ u16 lid;
++ u8 data[6];
++};
++
++#endif /* IBP_ABI_H */
+diff --git a/drivers/infiniband/ibp/drv/ibp.h b/drivers/infiniband/ibp/drv/ibp.h
+new file mode 100644
+index 0000000..193433f
+--- /dev/null
++++ b/drivers/infiniband/ibp/drv/ibp.h
+@@ -0,0 +1,260 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef IBP_H
++#define IBP_H
++
++#include <rdma/ib_verbs.h>
++#include "ibp-abi.h"
++
++struct ibp_device {
++ char name[IBP_DEVICE_NAME_MAX];
++ u32 vendor_id;
++ u32 device_id;
++ u64 ib_device;
++ u64 device;
++ __be64 node_guid;
++ u64 uverbs_cmd_mask;
++ u32 uverbs_abi_ver;
++ u32 ibp_abi_ver;
++ struct device *linux_dev;
++ struct list_head list;
++ u64 driver_data;
++ int abi_version;
++ int num_comp_vectors;
++ u8 phys_port_cnt;
++};
++
++struct ibp_id_table {
++ u32 vendor_id;
++ u32 device_id;
++};
++
++struct ibp_driver {
++ const char *name;
++ const struct ibp_id_table *id_table;
++ int (*add)(struct ibp_device *device);
++ void (*remove)(struct ibp_device *device);
++ u64 (*resolve)(struct ib_device *ibdev);
++
++ struct list_head list;
++};
++
++struct ibp_rb {
++ u64 handle;
++};
++
++struct ibp_iomem {
++ void *cookie;
++ void __iomem *addr;
++};
++
++/**
++ * ibp_resolve_ib_device - Return the host ib_device handle
++ * @ibdev:Card IB device
++ *
++ * Upper level drivers may require the host ib_device handle associated
++ * with the card ib_device. This routine resolves the card ib_device to
++ * the cooresponding host ib_device handle. A value of 0 is returned if
++ * no match was found.
++ */
++u64 ibp_resolve_ib_device(struct ib_device *ibdev);
++
++/**
++ * ibp_register_driver - Register this driver
++ * @driver:Driver to register
++ *
++ * Lower level drivers use ibp_register_driver to register for callbacks
++ * on IB device addition and removal. Only one low level driver registration
++ * is allowed for a each vendor/device id pair. When an IB device is added,
++ * it is compared with each registered driver vendor and device id. The add
++ * callback routine for the matching driver will be called.
++ */
++int ibp_register_driver(struct ibp_driver *driver);
++
++/**
++ * ibp_unregister_driver - Unregister this driver
++ * @client:Driver to unregister
++ *
++ * Lower level drivers use ibp_unregister_driver() to remove their
++ * registration. When ibp_unregister_driver() is called, the driver
++ * will receive a remove callback for each IB device with matcing vendor
++ * and device ids.
++ */
++void ibp_unregister_driver(struct ibp_driver *driver);
++
++static inline void ibp_set_driver_data(struct ibp_device *device, u64 data)
++{
++ device->driver_data = data;
++}
++
++static inline u64 ibp_get_driver_data(struct ibp_device *device)
++{
++ return device->driver_data;
++}
++
++int ibp_cmd_alloc_ucontext(struct ibp_device *device, struct ib_device *ibdev,
++ u64 *ucontext, struct ibp_alloc_ucontext_cmd *cmd,
++ size_t cmd_size,
++ struct ibp_alloc_ucontext_resp *resp,
++ size_t resp_size);
++
++int ibp_cmd_dealloc_ucontext(struct ibp_device *device, u64 ucontext);
++
++/**
++ * ibp_reg_buf - Register a private buffer with this driver
++ * @device: the device on which to register
++ * @ucontext: peer driver ucontext handle
++ * @vaddr: starting virtual address of the buffer
++ * @length: length of the buffer
++ * @access: IB_ACCESS_xxx flags for buffer
++ *
++ * Lower level drivers use ibp_reg_buf() to register private buffers.
++ * Upon success, a pointer to a registered buffer structure is returned
++ * which contains an addr handle. The addr handle can be shared with
++ * a peer driver on the host server for its use with ib_umem_get().
++ * This routine should not be used to register IB memory regions.
++ */
++struct ibp_rb *ibp_reg_buf(struct ibp_device *device, u64 ucontext,
++ unsigned long vaddr, size_t length, int access);
++
++/**
++ * ibp_dereg_buf - Deregister a private buffer through this driver
++ * @device: the device on which to deregister
++ * @rb: pointer to the registered buffer structure; may be ERR or NULL
++ *
++ * Lower level drivers use ibp_dereg_buf() to deregister a private buffer.
++ */
++int ibp_dereg_buf(struct ibp_device *device, struct ibp_rb *rb);
++
++int ibp_cmd_mmap(struct ibp_device *device, u64 ucontext,
++ struct vm_area_struct *vma);
++
++struct ibp_iomem *ibp_cmd_ioremap(struct ibp_device *device, u64 ucontext,
++ phys_addr_t offset, unsigned long size);
++
++int ibp_cmd_iounmap(struct ibp_iomem *iomem);
++
++int ibp_cmd_query_device(struct ibp_device *device,
++ struct ib_device_attr *device_attr);
++
++int ibp_cmd_query_port(struct ibp_device *device, u8 port_num,
++ struct ib_port_attr *port_attr);
++
++enum rdma_link_layer ibp_cmd_get_link_layer(struct ibp_device *device,
++ u8 port_num);
++
++int ibp_cmd_query_gid(struct ibp_device *device, u8 port_num, int index,
++ union ib_gid *gid);
++
++int ibp_cmd_query_pkey(struct ibp_device *device, u8 port_num, int index,
++ u16 *pkey);
++
++int ibp_cmd_alloc_pd(struct ibp_device *device, u64 ucontext, u64 *pd,
++ struct ibp_alloc_pd_cmd *cmd, size_t cmd_size,
++ struct ibp_alloc_pd_resp *resp, size_t resp_size);
++
++int ibp_cmd_dealloc_pd(struct ibp_device *device, u64 pd);
++
++int ibp_cmd_create_ah(struct ibp_device *device, u64 pd,
++ struct ib_ah_attr *ah_attr,
++ u64 *ah);
++
++int ibp_cmd_query_ah(struct ibp_device *device, u64 ah,
++ struct ib_ah_attr *ah_attr);
++
++int ibp_cmd_destroy_ah(struct ibp_device *device, u64 ah);
++
++int ibp_cmd_create_srq(struct ibp_device *device, u64 pd,
++ struct ib_srq_init_attr *init_attr,
++ u64 *srq, struct ib_srq *ibsrq,
++ struct ibp_create_srq_cmd *cmd, size_t cmd_size,
++ struct ibp_create_srq_resp *resp, size_t resp_size);
++
++int ibp_cmd_query_srq(struct ibp_device *device, u64 srq,
++ struct ib_srq_attr *attr);
++
++int ibp_cmd_modify_srq(struct ibp_device *device, u64 srq,
++ struct ib_srq_attr *attr, enum ib_srq_attr_mask mask,
++ struct ibp_modify_srq_cmd *cmd, size_t cmd_size,
++ struct ibp_modify_srq_resp *resp, size_t resp_size);
++
++int ibp_cmd_destroy_srq(struct ibp_device *device, u64 srq);
++
++int ibp_cmd_create_qp(struct ibp_device *device, u64 pd,
++ u64 send_cq, u64 recv_cq, u64 srq,
++ struct ib_qp_init_attr *init_attr,
++ u64 *qp, struct ib_qp *ibqp,
++ struct ibp_create_qp_cmd *cmd, size_t cmd_size,
++ struct ibp_create_qp_resp *resp, size_t resp_size);
++
++int ibp_cmd_query_qp(struct ibp_device *device, u64 qp,
++ struct ib_qp_attr *attr, int qp_attr_mask,
++ struct ib_qp_init_attr *init_attr);
++
++int ibp_cmd_modify_qp(struct ibp_device *device, u64 qp,
++ struct ib_qp_attr *attr, int qp_attr_mask,
++ struct ibp_modify_qp_cmd *cmd, size_t cmd_size,
++ struct ibp_modify_qp_resp *resp, size_t resp_size);
++
++int ibp_cmd_destroy_qp(struct ibp_device *device, u64 qp);
++
++int ibp_cmd_create_cq(struct ibp_device *device, u64 ucontext,
++ int entries, int vector, u64 *cq, struct ib_cq *ibcq,
++ struct ibp_create_cq_cmd *cmd, size_t cmd_size,
++ struct ibp_create_cq_resp *resp, size_t resp_size);
++
++int ibp_cmd_resize_cq(struct ibp_device *device, u64 cq,
++ int entries, struct ib_cq *ibcq,
++ struct ibp_resize_cq_cmd *cmd, size_t cmd_size,
++ struct ibp_resize_cq_resp *resp, size_t resp_size);
++
++int ibp_cmd_destroy_cq(struct ibp_device *device, u64 cq);
++
++int ibp_cmd_reg_user_mr(struct ibp_device *device, u64 pd, u64 start,
++ u64 length, u64 virt_addr, int access, u64 *mr,
++ u32 *lkey, u32 *rkey,
++ struct ibp_reg_user_mr_cmd *cmd, size_t cmd_size,
++ struct ibp_reg_user_mr_resp *resp, size_t resp_size);
++
++int ibp_cmd_dereg_mr(struct ibp_device *device, u64 mr);
++
++int ibp_cmd_get_dma_mr(struct ibp_device *device, u64 pd, int access,
++ u64 *mr, u32 *lkey, u32 *rkey);
++
++int ibp_cmd_attach_mcast(struct ibp_device *device, u64 qp,
++ union ib_gid *gid, u16 lid);
++
++int ibp_cmd_detach_mcast(struct ibp_device *device, u64 qp,
++ union ib_gid *gid, u16 lid);
++
++#endif /* IBP_H */
+diff --git a/drivers/infiniband/ibp/drv/server.c b/drivers/infiniband/ibp/drv/server.c
+new file mode 100644
+index 0000000..4bbe35f
+--- /dev/null
++++ b/drivers/infiniband/ibp/drv/server.c
+@@ -0,0 +1,503 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "server.h"
++
++MODULE_AUTHOR("Jerrie Coffman");
++MODULE_AUTHOR("Phil Cayton");
++MODULE_AUTHOR("Jay Sternberg");
++MODULE_LICENSE("Dual BSD/GPL");
++MODULE_DESCRIPTION(DRV_DESC);
++MODULE_VERSION(DRV_VERSION);
++
++MODULE_PARAM(port, port, int, IBP_PORT, "Connection port");
++MODULE_PARAM(backlog, backlog, int, 8, "Connection backlog");
++MODULE_PARAM(timeout, timeout, int, 1000, "Listen/Poll time in milliseconds");
++
++#ifdef IBP_DEBUG
++MODULE_PARAM(debug_level, debug_level, int, 0, "Debug: 0-none, 1-some, 2-all");
++#endif
++
++static struct rw_semaphore list_rwsem;
++
++static struct class *ibp_class;
++static struct task_struct *listen_thread;
++
++static LIST_HEAD(device_list);
++static LIST_HEAD(client_list);
++static LIST_HEAD(cdev_list);
++
++static void ibp_add_one(struct ib_device *ib_dev);
++static void ibp_remove_one(struct ib_device *ib_dev, void *data);
++
++static struct ib_client ib_client = {
++ .name = DRV_NAME,
++ .add = ibp_add_one,
++ .remove = ibp_remove_one
++};
++
++static int ibp_open(struct inode *inode, struct file *filp);
++static ssize_t ibp_write(struct file *filp, const char __user *buf,
++ size_t count, loff_t *pos);
++static int ibp_close(struct inode *inode, struct file *filp);
++
++static const struct file_operations ibp_fops = {
++ .owner = THIS_MODULE,
++ .open = ibp_open,
++ .write = ibp_write,
++ .release = ibp_close,
++};
++
++static int ibp_create_cdev(struct ibp_client *client, uint16_t node)
++{
++ struct device *device;
++ dev_t devt;
++ int ret;
++
++ ret = alloc_chrdev_region(&devt, 0, 1, DRV_BASE);
++ if (ret) {
++ print_err("alloc_chrdev_region returned %d\n", ret);
++ return ret;
++ }
++
++ cdev_init(&client->cdev, &ibp_fops);
++ client->cdev.owner = THIS_MODULE;
++
++ ret = cdev_add(&client->cdev, devt, 1);
++ if (ret) {
++ print_err("cdev_add returned %d\n", ret);
++ goto err0;
++ }
++
++ device = device_create(ibp_class, NULL, devt,
++ NULL, DRV_BASE "%u", node);
++ if (IS_ERR(device)) {
++ ret = PTR_ERR(device);
++ goto err1;
++ }
++
++ /* Start on the cdev_list (until ibp_register_client). */
++ down_write(&list_rwsem);
++ list_add_tail(&client->list, &cdev_list);
++ up_write(&list_rwsem);
++
++ return 0;
++err1:
++ cdev_del(&client->cdev);
++err0:
++ unregister_chrdev_region(devt, 1);
++ return ret;
++}
++
++static void ibp_destroy_cdev(struct ibp_client *client)
++{
++ device_destroy(ibp_class, client->cdev.dev);
++ cdev_del(&client->cdev);
++ unregister_chrdev_region(client->cdev.dev, 1);
++}
++
++static struct ibp_client *ibp_create_client(scif_epd_t ep, uint16_t node)
++{
++ struct ibp_client *client;
++ int ret = -ENOMEM;
++
++ /* If a reconnect occurs while on the cdev_list just update the ep. */
++ down_read(&list_rwsem);
++ list_for_each_entry(client, &cdev_list, list) {
++ if (client->node == node) {
++ up_read(&list_rwsem);
++ scif_close(client->ep);
++ client->ep = ep;
++ return client;
++ }
++ }
++ up_read(&list_rwsem);
++
++ client = kzalloc(sizeof(*client), GFP_KERNEL);
++ if (!client) {
++ print_err("kzalloc failed\n");
++ goto err0;
++ }
++
++ client->ep = ep;
++ client->node = node;
++ atomic_set(&client->busy, 0);
++ atomic_set(&client->rx_in_process, 0);
++ init_waitqueue_head(&client->rx_wait_queue);
++ mutex_init(&client->ucontext_mutex);
++ INIT_LIST_HEAD(&client->ucontext_list);
++
++ client->workqueue = create_singlethread_workqueue(DRV_NAME);
++ if (!client->workqueue) {
++ print_err("create_singlethread_workqueue failed\n");
++ goto err1;
++ }
++
++ ret = ibp_create_cdev(client, node);
++ if (ret)
++ goto err2;
++
++ return client;
++err2:
++ destroy_workqueue(client->workqueue);
++err1:
++ kfree(client);
++err0:
++ return ERR_PTR(ret);
++}
++
++static void ibp_destroy_client(struct ibp_client *client)
++{
++ ibp_cleanup_ucontext(&client->ucontext_list);
++ scif_close(client->ep);
++ flush_workqueue(client->workqueue);
++ destroy_workqueue(client->workqueue);
++ ibp_destroy_cdev(client);
++ kfree(client);
++}
++
++static void ibp_register_client(struct ibp_client *client)
++{
++ struct ibp_device *device;
++
++ down_write(&list_rwsem);
++ list_move(&client->list, &client_list);
++ list_for_each_entry(device, &device_list, list)
++ ibp_send_add(client, device);
++ up_write(&list_rwsem);
++}
++
++static void ibp_unregister_client(struct ibp_client *client)
++{
++ struct ibp_device *device;
++
++ flush_workqueue(client->workqueue);
++
++ down_write(&list_rwsem);
++ list_del(&client->list);
++ list_for_each_entry(device, &device_list, list)
++ ibp_send_remove(client, device);
++ up_write(&list_rwsem);
++}
++
++static int ibp_open(struct inode *inode, struct file *filp)
++{
++ struct ibp_client *client;
++
++ client = container_of(inode->i_cdev, struct ibp_client, cdev);
++
++ filp->private_data = client;
++
++ if (atomic_add_return(1, &client->busy) == 1)
++ ibp_register_client(client);
++
++ return 0;
++}
++
++static ssize_t ibp_write(struct file *filp, const char __user *buf,
++ size_t count, loff_t *pos)
++{
++ struct ibp_client *client;
++ void *rx_buf, *tx_buf;
++ int ret = -ENOMEM;
++
++ client = filp->private_data;
++
++ rx_buf = (void *) __get_free_page(GFP_KERNEL);
++ if (!rx_buf) {
++ print_err("__get_free_page rx_buf failed\n");
++ goto err0;
++ }
++
++ tx_buf = (void *) __get_free_page(GFP_KERNEL);
++ if (!tx_buf) {
++ print_err("__get_free_page tx_buf failed\n");
++ goto err1;
++ }
++
++ ret = ibp_process_recvs(client, rx_buf, tx_buf);
++
++ free_page((uintptr_t) tx_buf);
++err1:
++ free_page((uintptr_t) rx_buf);
++err0:
++ return ret;
++}
++
++static int ibp_close(struct inode *inode, struct file *filp)
++{
++ struct ibp_client *client;
++
++ client = filp->private_data;
++
++ if (atomic_sub_and_test(1, &client->busy)) {
++ ibp_unregister_client(client);
++ device_destroy(ibp_class, client->cdev.dev);
++ ibp_destroy_client(client);
++ }
++
++ return 0;
++}
++
++int ibp_get_device(struct ibp_device *device)
++{
++ struct ibp_device *entry;
++
++ down_read(&list_rwsem);
++ list_for_each_entry(entry, &device_list, list) {
++ if (entry == device) {
++ kref_get(&device->ref);
++ break;
++ }
++ }
++ up_read(&list_rwsem);
++
++ return (entry == device) ? 0 : -ENODEV;
++}
++
++static void ibp_complete_device(struct kref *ref)
++{
++ struct ibp_device *device;
++
++ device = container_of(ref, struct ibp_device, ref);
++ complete(&device->done);
++}
++
++void ibp_put_device(struct ibp_device *device)
++{
++ kref_put(&device->ref, ibp_complete_device);
++}
++
++static struct ibp_device *ibp_create_device(struct ib_device *ib_dev)
++{
++ struct ibp_device *device;
++
++ device = kzalloc(sizeof(*device), GFP_KERNEL);
++ if (!device) {
++ print_err("kzalloc failed\n");
++ return ERR_PTR(-ENOMEM);
++ }
++ device->ib_dev = ib_dev;
++ kref_init(&device->ref);
++ init_completion(&device->done);
++
++ ib_set_client_data(ib_dev, &ib_client, device);
++
++ return device;
++}
++
++static void ibp_destroy_device(struct ibp_device *device)
++{
++ ibp_put_device(device);
++ wait_for_completion(&device->done);
++
++ ib_set_client_data(device->ib_dev, &ib_client, NULL);
++ kfree(device);
++}
++
++static void ibp_register_device(struct ibp_device *device)
++{
++ struct ibp_client *client;
++
++ down_write(&list_rwsem);
++ list_add_tail(&device->list, &device_list);
++ list_for_each_entry(client, &client_list, list)
++ ibp_send_add(client, device);
++ up_write(&list_rwsem);
++}
++
++static void ibp_unregister_device(struct ibp_device *device)
++{
++ struct ibp_client *client;
++
++ down_write(&list_rwsem);
++ list_for_each_entry(client, &client_list, list)
++ ibp_send_remove(client, device);
++ list_del(&device->list);
++ up_write(&list_rwsem);
++}
++
++static int ibp_ignore_ib_dev(struct ib_device *ib_dev)
++{
++ /*
++ * Only allow PCI-based channel adapters and RNICs.
++ * PCI is required in order to read the vendor id.
++ */
++ return (!ib_dev->dma_device->bus ||
++ !ib_dev->dma_device->bus->name ||
++ strncasecmp(ib_dev->dma_device->bus->name, "pci", 3) ||
++ ((ib_dev->node_type != RDMA_NODE_IB_CA) &&
++ (ib_dev->node_type != RDMA_NODE_RNIC))) ? 1 : 0;
++}
++
++static void ibp_add_one(struct ib_device *ib_dev)
++{
++ struct ibp_device *device;
++
++ if (ibp_ignore_ib_dev(ib_dev))
++ return;
++
++ device = ibp_create_device(ib_dev);
++ if (IS_ERR(device))
++ return;
++
++ ibp_register_device(device);
++}
++
++static void ibp_remove_one(struct ib_device *ib_dev, void *data)
++{
++ struct ibp_device *device;
++
++ device = ib_get_client_data(ib_dev, &ib_client);
++ if (!device)
++ return;
++
++ ibp_unregister_device(device);
++ ibp_destroy_device(device);
++}
++
++static int ibp_listen(void *data)
++{
++ struct ibp_client *client;
++ struct scif_pollepd listen;
++ struct scif_port_id peer;
++ scif_epd_t ep;
++ int ret;
++
++ listen.epd = scif_open();
++ if (!listen.epd) {
++ print_err("scif_open failed\n");
++ ret = -EIO;
++ goto err0;
++ }
++ listen.events = POLLIN;
++
++ ret = scif_bind(listen.epd, port);
++ if (ret < 0) {
++ print_err("scif_bind returned %d\n", ret);
++ goto err1;
++ }
++
++ ret = scif_listen(listen.epd, backlog);
++ if (ret) {
++ print_err("scif_listen returned %d\n", ret);
++ goto err1;
++ }
++
++ while (!kthread_should_stop()) {
++
++ schedule();
++
++ ret = scif_poll(&listen, 1, timeout);
++ if (ret == 0) /* timeout */
++ continue;
++ if (ret < 0) {
++ print_err("scif_poll revents 0x%x\n", listen.revents);
++ continue;
++ }
++
++ ret = scif_accept(listen.epd, &peer, &ep, 0);
++ if (ret) {
++ print_err("scif_accept returned %d\n", ret);
++ continue;
++ }
++
++ print_dbg("accepted node %d port %d\n", peer.node, peer.port);
++
++ client = ibp_create_client(ep, peer.node);
++ if (IS_ERR(client)) {
++ ret = PTR_ERR(client);
++ print_err("ibp_create_client returned %d\n", ret);
++ scif_close(ep);
++ }
++ }
++err1:
++ scif_close(listen.epd);
++err0:
++ return ret;
++}
++
++static int __init ibp_server_init(void)
++{
++ int ret;
++
++ print_info(DRV_SIGNON);
++
++ init_rwsem(&list_rwsem);
++
++ ibp_class = class_create(THIS_MODULE, "infiniband_proxy");
++ if (IS_ERR(ibp_class)) {
++ ret = PTR_ERR(ibp_class);
++ print_err("class_create returned %d\n", ret);
++ goto err0;
++ }
++
++ ret = ib_register_client(&ib_client);
++ if (ret) {
++ print_err("ib_register_client returned %d\n", ret);
++ goto err1;
++ }
++
++ /* Start a thread for inbound connections. */
++ listen_thread = kthread_run(ibp_listen, NULL, DRV_NAME);
++ if (IS_ERR(listen_thread)) {
++ ret = PTR_ERR(listen_thread);
++ print_err("kthread_run returned %d\n", ret);
++ goto err2;
++ }
++
++ return 0;
++err2:
++ ib_unregister_client(&ib_client);
++err1:
++ class_destroy(ibp_class);
++err0:
++ return ret;
++}
++
++static void __exit ibp_server_exit(void)
++{
++ struct ibp_client *client, *next;
++
++ kthread_stop(listen_thread);
++
++ list_for_each_entry_safe(client, next, &cdev_list, list)
++ ibp_destroy_client(client);
++
++ ib_unregister_client(&ib_client);
++ class_destroy(ibp_class);
++
++ print_info(DRV_DESC " unloaded\n");
++}
++
++module_init(ibp_server_init);
++module_exit(ibp_server_exit);
+diff --git a/drivers/infiniband/ibp/drv/server.h b/drivers/infiniband/ibp/drv/server.h
+new file mode 100644
+index 0000000..faf8dae
+--- /dev/null
++++ b/drivers/infiniband/ibp/drv/server.h
+@@ -0,0 +1,182 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef SERVER_H
++#define SERVER_H
++
++#include <linux/fs.h>
++#include <linux/cdev.h>
++#include <linux/anon_inodes.h>
++#include <linux/file.h>
++#include <rdma/ib_user_verbs.h>
++#include <rdma/ib_umem.h>
++#include "ibp-abi.h"
++#include "common.h"
++
++#define DRV_ROLE "Server"
++#define DRV_NAME "ibp_server"
++#include "compat.h"
++
++#define DRV_BASE "ibp"
++
++#define MAX_MSG_SIZE PAGE_SIZE
++
++extern int timeout;
++
++struct ibp_device {
++ struct list_head list;
++ struct ib_device *ib_dev;
++ struct kref ref;
++ struct completion done;
++};
++
++struct ibp_client {
++ struct list_head list;
++ scif_epd_t ep;
++ struct workqueue_struct *workqueue;
++ struct mutex ucontext_mutex;
++ struct list_head ucontext_list;
++ wait_queue_head_t rx_wait_queue;
++ atomic_t rx_in_process;
++ struct cdev cdev;
++ atomic_t busy;
++ uint16_t node;
++};
++
++struct ibp_queued_response {
++ struct ibp_client *client;
++ struct work_struct work;
++ u64 msg[0];
++};
++
++struct ibp_event {
++ struct ibp_client *client;
++ struct work_struct work;
++ u64 context;
++ u64 ibdev;
++ enum ib_event_type type;
++};
++
++struct ibp_comp {
++ struct ibp_client *client;
++ struct work_struct work;
++ void *cq_context;
++};
++
++struct ibp_ucontext {
++ struct ib_ucontext *ibucontext;
++ struct ibp_client *client;
++ struct ibp_device *device;
++ struct file *filp;
++ struct ib_event_handler event_handler;
++ u64 ibdev;
++ struct mutex mutex;
++ struct list_head list;
++ struct list_head mmap_list;
++ struct rb_root reg_tree;
++};
++
++struct ibp_qp {
++ struct ib_qp *ibqp;
++ struct list_head mcast;
++};
++
++struct ibp_mcast_entry {
++ struct list_head list;
++ union ib_gid gid;
++ u16 lid;
++};
++
++struct ibp_mmap {
++ struct list_head list;
++ struct ibp_ucontext *ucontext;
++ u64 len;
++ u64 prot;
++ u64 vaddr;
++ void __iomem *io_addr;
++ off_t scif_addr;
++};
++
++struct ibp_reg {
++ struct rb_node node;
++ struct scif_range *range;
++ struct ibp_ucontext *ucontext;
++ struct kref ref;
++ u64 virt_addr;
++ u64 length;
++ off_t offset;
++ u32 access;
++};
++
++struct ibp_mr {
++ struct ib_mr *ibmr;
++ struct ibp_reg *reg;
++};
++
++#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
++ do { \
++ (udata)->ops = &ibp_copy; \
++ (udata)->inbuf = (void *)(ibuf); \
++ (udata)->outbuf = (void *)(obuf); \
++ (udata)->inlen = (ilen) + \
++ sizeof(struct ib_uverbs_cmd_hdr); \
++ (udata)->outlen = (olen); \
++ } while (0)
++
++#define IBP_INIT_MSG(handle, msg, size, op) \
++ do { \
++ (msg)->header.opcode = IBP_##op; \
++ (msg)->header.length = (size); \
++ (msg)->header.status = 0; \
++ (msg)->header.reserved = 0; \
++ (msg)->header.device = (uintptr_t)(handle); \
++ (msg)->header.request = 0; \
++ } while (0)
++
++#define IBP_INIT_RESP(handle, resp, size, op, req, stat) \
++ do { \
++ (resp)->header.opcode = IBP_##op; \
++ (resp)->header.length = (size); \
++ (resp)->header.status = (stat); \
++ (resp)->header.reserved = 0; \
++ (resp)->header.device = (uintptr_t)(handle); \
++ (resp)->header.request = (req); \
++ } while (0)
++
++int ibp_process_recvs(struct ibp_client *client, void *rx_buf, void *tx_buf);
++void ibp_cleanup_ucontext(struct list_head *ucontext_list);
++int ibp_send_add(struct ibp_client *client, struct ibp_device *device);
++int ibp_send_remove(struct ibp_client *client, struct ibp_device *device);
++int ibp_get_device(struct ibp_device *device);
++void ibp_put_device(struct ibp_device *device);
++
++#endif /* SERVER_H */
+diff --git a/drivers/infiniband/ibp/drv/server_msg.c b/drivers/infiniband/ibp/drv/server_msg.c
+new file mode 100644
+index 0000000..ed5a697
+--- /dev/null
++++ b/drivers/infiniband/ibp/drv/server_msg.c
+@@ -0,0 +1,2813 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include <linux/version.h>
++
++#include "server.h"
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0)
++ #define IBP_DMA_ATTR(attrp, ...) DEFINE_DMA_ATTRS(_attrp); typeof(_attrp) *attrp = &_attrp;
++ #define IBP_SET_DMA_ATTR(attrp, val, ...) dma_set_attr(val, attrp);
++#else
++ #define IBP_DMA_ATTR(attrp, ...) unsigned long attrp = 0;
++ #define IBP_SET_DMA_ATTR(attrp, val, ...) attrp |= val;
++#endif
++
++/*
++ * umem functions
++ */
++static int ibp_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
++{
++ size_t bytes;
++
++ bytes = min(len, udata->inlen);
++
++ memcpy(dest, udata->inbuf, bytes);
++ if (bytes < len) {
++ memset(dest + bytes, 0, len - bytes);
++ return -EFAULT;
++ }
++ return 0;
++}
++
++static int ibp_copy_to_udata(struct ib_udata *udata, void *src, size_t len)
++{
++ size_t bytes;
++
++ bytes = min(len, udata->outlen);
++
++ memcpy(udata->outbuf, src, bytes);
++ udata->outlen -= bytes;
++
++ return (bytes < len) ? -EFAULT : 0;
++}
++
++static struct ib_udata_ops ibp_copy = {
++ .copy_from = ibp_copy_from_udata,
++ .copy_to = ibp_copy_to_udata
++};
++
++static struct ibp_reg *__ibp_find_reg(struct ibp_ucontext *ucontext,
++ unsigned long virt, size_t size,
++ int access)
++{
++ struct rb_node *node;
++ struct ibp_reg *reg;
++
++ node = ucontext->reg_tree.rb_node;
++
++ while (node) {
++ reg = rb_entry(node, struct ibp_reg, node);
++
++ if ((virt == reg->virt_addr) &&
++ (size == reg->length) &&
++ (access == reg->access))
++ return reg;
++
++ if (virt < reg->virt_addr)
++ node = node->rb_left;
++ else if (virt > reg->virt_addr)
++ node = node->rb_right;
++ else if (size < reg->length)
++ node = node->rb_left;
++ else if (size > reg->length)
++ node = node->rb_right;
++ else if (access < reg->access)
++ node = node->rb_left;
++ else
++ node = node->rb_right;
++ }
++
++ return ERR_PTR(-EFAULT);
++}
++
++static struct ibp_reg *ibp_find_reg(struct ibp_ucontext *ucontext,
++ unsigned long virt, size_t size,
++ int access)
++{
++ struct ibp_reg *reg;
++
++ mutex_lock(&ucontext->mutex);
++ reg = __ibp_find_reg(ucontext, virt, size, access);
++ mutex_unlock(&ucontext->mutex);
++
++ return reg;
++}
++
++static void __ibp_umem_release(struct ib_device *dev, struct ib_umem *umem,
++ int dirty)
++{
++ struct scatterlist *sg;
++ int i;
++
++ if (umem->nmap > 0)
++ ib_dma_unmap_sg(dev, umem->sg_head.sgl,
++ umem->nmap, DMA_BIDIRECTIONAL);
++
++ if (umem->writable && dirty)
++ for_each_sg(umem->sg_head.sgl, sg, umem->npages, i)
++ set_page_dirty_lock(sg_page(sg));
++
++ sg_free_table(&umem->sg_head);
++}
++
++static struct ib_umem *ibp_umem_get(struct ib_ucontext *ibucontext,
++ unsigned long addr, size_t size,
++ int access, int dmasync)
++{
++ struct ibp_reg *reg;
++ struct ib_umem *umem;
++ struct device *dma_device;
++ struct page *page;
++ struct scatterlist *sg;
++ void **va;
++ dma_addr_t *pa;
++ dma_addr_t daddr;
++ unsigned int dsize;
++ int npages;
++ int off;
++ int i;
++ int ret = 0;
++ IBP_DMA_ATTR(attrs);
++
++ reg = ibp_find_reg(ibucontext->umem_private_data, addr, size, access);
++ if (IS_ERR(reg))
++ return ERR_CAST(reg);
++
++ if (dmasync)
++ IBP_SET_DMA_ATTR(attrs, DMA_ATTR_WRITE_BARRIER);
++
++ umem = kzalloc(sizeof(*umem), GFP_KERNEL);
++ if (!umem) {
++ print_err("kzalloc failed\n");
++ return ERR_PTR(-ENOMEM);
++ }
++
++ umem->length = size;
++ umem->address = addr;
++ umem->page_size = PAGE_SIZE;
++ umem->pid = get_task_pid(current, PIDTYPE_PID);
++ umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ);
++
++ dsize = 0;
++ daddr = 0;
++ va = reg->range->va;
++ pa = reg->range->phys_addr;
++ dma_device = ibucontext->device->dma_device;
++ off = (addr - reg->virt_addr) + reg->offset;
++ npages = PAGE_ALIGN(size + (off & ~PAGE_MASK)) >> PAGE_SHIFT;
++ off >>= PAGE_SHIFT;
++
++ ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
++ if (ret) {
++ print_err("sg_alloc_table failed\n");
++ goto err1;
++ }
++
++ /* Assume hugetlb unless proven otherwise. */
++ umem->hugetlb = 1;
++ for (i = 0; i < npages && umem->hugetlb; i++) {
++ if (!dsize) {
++ dsize = PAGE_SIZE;
++ daddr = pa[i + off];
++ /* Page must start on a huge page boundary. */
++ if ((daddr & ~HPAGE_MASK) >= PAGE_SIZE)
++ umem->hugetlb = 0;
++ } else if (daddr + dsize != pa[i + off])
++ /* Pages must be contiguous. */
++ umem->hugetlb = 0;
++ else {
++ dsize += PAGE_SIZE;
++ if (dsize == HPAGE_SIZE)
++ dsize = 0;
++ }
++ }
++ /* Page must end on a huge page boundary.*/
++ if (umem->hugetlb && ((daddr + dsize) & ~HPAGE_MASK))
++ umem->hugetlb = 0;
++
++ for_each_sg(umem->sg_head.sgl, sg, npages, i) {
++ page = vmalloc_to_page(va[i]);
++ if (!page) {
++ print_err("vmalloc_to_page failed\n");
++ ret = -EINVAL;
++ goto err2;
++ }
++ sg_set_page(sg, page, PAGE_SIZE, 0);
++ }
++
++ umem->npages = npages;
++
++ umem->nmap = ib_dma_map_sg_attrs(ibucontext->device,
++ umem->sg_head.sgl,
++ umem->npages,
++ DMA_BIDIRECTIONAL,
++ attrs);
++ if (umem->nmap <= 0) {
++ print_err("map_sg_attrs failed\n");
++ ret = -ENOMEM;
++ goto err2;
++ }
++
++ return umem;
++err2:
++ __ibp_umem_release(ibucontext->device, umem, 0);
++err1:
++ put_pid(umem->pid);
++ kfree(umem);
++ return ERR_PTR(ret);
++}
++
++static void ibp_umem_release(struct ib_umem *umem)
++{
++ struct ib_ucontext *ibucontext;
++
++ ibucontext = umem->context;
++
++ __ibp_umem_release(ibucontext->device, umem, 0);
++
++ put_pid(umem->pid);
++ kfree(umem);
++}
++
++static struct ib_umem_ops ibp_umem = {
++ .get = &ibp_umem_get,
++ .release = &ibp_umem_release,
++};
++
++static int ibp_send(scif_epd_t ep, void *buf, size_t len)
++{
++ int ret;
++
++ while (len) {
++ ret = scif_send(ep, buf, (uint32_t) len, SCIF_SEND_BLOCK);
++ if (ret < 0) {
++ print_dbg("scif_send returned %d\n", ret);
++ return ret;
++ }
++ buf += ret;
++ len -= ret;
++ }
++
++ return 0;
++}
++
++static int ibp_recv(scif_epd_t ep, void *buf, size_t len)
++{
++ int ret;
++
++ while (len) {
++ ret = scif_recv(ep, buf, (uint32_t) len, SCIF_RECV_BLOCK);
++ if (ret < 0) {
++ print_dbg("scif_recv returned %d\n", ret);
++ return ret;
++ }
++ buf += ret;
++ len -= ret;
++ }
++
++ return 0;
++}
++
++int ibp_send_add(struct ibp_client *client, struct ibp_device *device)
++{
++ struct ibp_add_device_msg msg;
++ struct pci_dev *pdev;
++
++ print_trace("in\n");
++
++ pdev = to_pci_dev(device->ib_dev->dma_device);
++
++ IBP_INIT_MSG(device, &msg, sizeof(msg), ADD_DEVICE);
++
++ strncpy(msg.data.name, device->ib_dev->name, sizeof(msg.data.name));
++ msg.data.vendor_id = pdev->vendor;
++ msg.data.device_id = pdev->device;
++
++ msg.data.ib_device = (uintptr_t) device->ib_dev;
++ msg.data.device = (uintptr_t) device;
++ msg.data.node_guid = device->ib_dev->node_guid;
++ msg.data.uverbs_cmd_mask = device->ib_dev->uverbs_cmd_mask;
++ msg.data.uverbs_abi_ver = device->ib_dev->uverbs_abi_ver;
++ msg.data.ibp_abi_ver = IBP_ABI_VERSION;
++ msg.data.num_comp_vectors = device->ib_dev->num_comp_vectors;
++ msg.data.phys_port_cnt = device->ib_dev->phys_port_cnt;
++
++ return ibp_send(client->ep, &msg, sizeof(msg));
++}
++
++int ibp_send_remove(struct ibp_client *client, struct ibp_device *device)
++{
++ struct ibp_remove_device_msg msg;
++
++ print_trace("in\n");
++
++ IBP_INIT_MSG(device, &msg, sizeof(msg), REMOVE_DEVICE);
++ return ibp_send(client->ep, &msg, sizeof(msg));
++}
++
++static void ibp_send_queued_response(struct work_struct *work)
++{
++ struct ibp_queued_response_msg *msg;
++ struct ibp_queued_response *resp;
++
++ resp = container_of(work, struct ibp_queued_response, work);
++ msg = (struct ibp_queued_response_msg *) resp->msg;
++
++ ibp_send(resp->client->ep, msg, msg->header.length);
++ kfree(resp);
++}
++
++static int ibp_queue_response(struct ibp_client *client,
++ struct ibp_queued_response_msg *msg)
++{
++ struct ibp_queued_response *resp;
++ size_t len;
++
++ len = sizeof(*resp) + msg->header.length;
++
++ resp = kmalloc(len, GFP_ATOMIC);
++ if (!resp) {
++ print_err("kmalloc failed\n");
++ return -ENOMEM;
++ }
++
++ resp->client = client;
++ memcpy(&resp->msg, msg, msg->header.length);
++
++ /* Queue to serialize behing any associated events. */
++ INIT_WORK(&resp->work, ibp_send_queued_response);
++ queue_work(client->workqueue, &resp->work);
++
++ return 0;
++}
++
++static int ibp_cmd_error(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf, int ret)
++{
++ struct ibp_verb_response_msg *msg;
++ size_t len;
++
++ msg = (struct ibp_verb_response_msg *) tx_buf;
++ len = sizeof(*msg);
++
++ IBP_INIT_RESP(hdr->device, msg, len, VERB_RESPONSE, hdr->request, ret);
++ return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_bad_request(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ print_dbg("opcode 0x%x\n", hdr->opcode);
++ return ibp_cmd_error(client, hdr, tx_buf, -EBADRQC);
++}
++
++static int ibp_cmd_not_supported(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ print_dbg("opcode 0x%x\n", hdr->opcode);
++ return ibp_cmd_error(client, hdr, tx_buf, -ENOSYS);
++}
++
++static int ibp_cmd_query_device(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_query_device_resp *resp;
++ struct ib_device_attr *attr;
++ size_t len;
++
++ print_trace("in\n");
++
++ device = (struct ibp_device *) hdr->device;
++ msg = (struct ibp_verb_response_msg *) tx_buf;
++ len = sizeof(*msg);
++
++ attr = &device->ib_dev->attrs;
++
++ resp = (struct ibp_query_device_resp *) msg->data;
++ len += sizeof(*resp);
++
++ resp->fw_ver = attr->fw_ver;
++ resp->sys_image_guid = attr->sys_image_guid;
++ resp->max_mr_size = attr->max_mr_size;
++ resp->page_size_cap = attr->page_size_cap;
++ resp->vendor_id = attr->vendor_id;
++ resp->vendor_part_id = attr->vendor_part_id;
++ resp->hw_ver = attr->hw_ver;
++ resp->max_qp = attr->max_qp;
++ resp->max_qp_wr = attr->max_qp_wr;
++ resp->device_cap_flags = attr->device_cap_flags;
++ resp->max_sge = attr->max_sge;
++ resp->max_sge_rd = attr->max_sge_rd;
++ resp->max_cq = attr->max_cq;
++ resp->max_cqe = attr->max_cqe;
++ resp->max_mr = attr->max_mr;
++ resp->max_pd = attr->max_pd;
++ resp->max_qp_rd_atom = attr->max_qp_rd_atom;
++ resp->max_ee_rd_atom = attr->max_ee_rd_atom;
++ resp->max_res_rd_atom = attr->max_res_rd_atom;
++ resp->max_qp_init_rd_atom = attr->max_qp_init_rd_atom;
++ resp->max_ee_init_rd_atom = attr->max_ee_init_rd_atom;
++ resp->atomic_cap = attr->atomic_cap;
++ resp->masked_atomic_cap = attr->masked_atomic_cap;
++ resp->max_ee = attr->max_ee;
++ resp->max_rdd = attr->max_rdd;
++ resp->max_mw = attr->max_mw;
++ resp->max_raw_ipv6_qp = attr->max_raw_ipv6_qp;
++ resp->max_raw_ethy_qp = attr->max_raw_ethy_qp;
++ resp->max_mcast_grp = attr->max_mcast_grp;
++ resp->max_mcast_qp_attach = attr->max_mcast_qp_attach;
++ resp->max_total_mcast_qp_attach = attr->max_total_mcast_qp_attach;
++ resp->max_ah = attr->max_ah;
++ resp->max_fmr = attr->max_fmr;
++ resp->max_map_per_fmr = attr->max_map_per_fmr;
++ resp->max_srq = attr->max_srq;
++ resp->max_srq_wr = attr->max_srq_wr;
++ resp->max_srq_sge = attr->max_srq_sge;
++ resp->max_fast_reg_page_list_len = attr->max_fast_reg_page_list_len;
++ resp->max_pkeys = attr->max_pkeys;
++ resp->local_ca_ack_delay = attr->local_ca_ack_delay;
++
++ IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, 0);
++ return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_query_port(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_query_port_cmd *cmd;
++ struct ibp_query_port_resp *resp;
++ struct ib_port_attr attr;
++ size_t len;
++ int ret;
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_query_port_cmd *) hdr;
++ msg = (struct ibp_verb_response_msg *) tx_buf;
++ len = sizeof(*msg);
++
++ ret = ib_query_port(device->ib_dev, cmd->port_num, &attr);
++ if (ret) {
++ print_err("ib_query_port returned %d\n", ret);
++ goto send_resp;
++ }
++
++ resp = (struct ibp_query_port_resp *) msg->data;
++ len += sizeof(*resp);
++
++ resp->state = attr.state;
++ resp->max_mtu = attr.max_mtu;
++ resp->active_mtu = attr.active_mtu;
++ resp->gid_tbl_len = attr.gid_tbl_len;
++ resp->port_cap_flags = attr.port_cap_flags;
++ resp->max_msg_sz = attr.max_msg_sz;
++ resp->bad_pkey_cntr = attr.bad_pkey_cntr;
++ resp->qkey_viol_cntr = attr.qkey_viol_cntr;
++ resp->pkey_tbl_len = attr.pkey_tbl_len;
++ resp->lid = attr.lid;
++ resp->sm_lid = attr.sm_lid;
++ resp->lmc = attr.lmc;
++ resp->max_vl_num = attr.max_vl_num;
++ resp->sm_sl = attr.sm_sl;
++ resp->subnet_timeout = attr.subnet_timeout;
++ resp->init_type_reply = attr.init_type_reply;
++ resp->active_width = attr.active_width;
++ resp->active_speed = attr.active_speed;
++ resp->phys_state = attr.phys_state;
++ resp->link_layer = rdma_port_get_link_layer(device->ib_dev,
++ cmd->port_num);
++
++send_resp:
++ IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++ return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_query_gid(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_query_gid_cmd *cmd;
++ struct ibp_query_gid_resp *resp;
++ size_t len;
++ union ib_gid gid;
++ int ret;
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_query_gid_cmd *) hdr;
++ msg = (struct ibp_verb_response_msg *) tx_buf;
++ len = sizeof(*msg);
++
++ ret = ib_query_gid(device->ib_dev, cmd->port_num, cmd->index, &gid, NULL);
++ if (ret) {
++ print_err("ib_query_gid returned %d\n", ret);
++ goto send_resp;
++ }
++
++ resp = (struct ibp_query_gid_resp *) msg->data;
++ len += sizeof(*resp);
++
++ resp->subnet_prefix = gid.global.subnet_prefix;
++ resp->interface_id = gid.global.interface_id;
++
++send_resp:
++ IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++ return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_query_pkey(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_query_pkey_cmd *cmd;
++ struct ibp_query_pkey_resp *resp;
++ size_t len;
++ u16 pkey;
++ int ret;
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_query_pkey_cmd *) hdr;
++ msg = (struct ibp_verb_response_msg *) tx_buf;
++ len = sizeof(*msg);
++
++ ret = ib_query_pkey(device->ib_dev, cmd->port_num, cmd->index, &pkey);
++ if (ret) {
++ print_err("ib_query_pkey returned %d\n", ret);
++ goto send_resp;
++ }
++ resp = (struct ibp_query_pkey_resp *) msg->data;
++ len += sizeof(*resp);
++
++ resp->pkey = pkey;
++
++send_resp:
++ IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++ return ibp_send(client->ep, msg, len);
++}
++
++static void ibp_async_event(struct work_struct *work)
++{
++ struct ibp_event *event;
++ struct ibp_async_event_msg msg;
++
++ event = container_of(work, struct ibp_event, work);
++
++ IBP_INIT_MSG(NULL, &msg, sizeof(msg), ASYNC_EVENT);
++
++ msg.data.context = (uintptr_t) event->context;
++ msg.data.type = event->type;
++
++ ibp_send(event->client->ep, &msg, sizeof(msg));
++
++ kfree(event);
++}
++
++static void ibp_event_handler(struct ib_event_handler *handler,
++ struct ib_event *ibevent)
++{
++ struct ibp_ucontext *ucontext;
++ struct ibp_client *client;
++ struct ibp_event *event;
++
++ ucontext = container_of(handler, struct ibp_ucontext, event_handler);
++
++ if (ucontext->ibucontext->closing) {
++ print_dbg("ignoring event, connection closing\n");
++ return;
++ }
++
++ event = kmalloc(sizeof(*event), GFP_ATOMIC);
++ if (!event) {
++ print_err("kmalloc failed\n");
++ return;
++ }
++
++ client = ucontext->client;
++
++ event->client = client;
++ event->context = ibevent->element.port_num;
++ event->type = ibevent->event;
++ event->ibdev = ucontext->ibdev;
++
++ INIT_WORK(&event->work, ibp_async_event);
++ queue_work(client->workqueue, &event->work);
++}
++
++static int ibp_mmap(struct file *filp, struct vm_area_struct *vma)
++{
++ struct ibp_ucontext *ucontext;
++ struct ib_ucontext *ibucontext;
++
++ ucontext = filp->private_data;
++ ibucontext = ucontext->ibucontext;
++
++ return (ibucontext->device->mmap) ?
++ ibucontext->device->mmap(ibucontext, vma) : -ENOSYS;
++}
++
++static const struct file_operations ibp_fops = {
++ .mmap = ibp_mmap,
++};
++
++static int ibp_cmd_alloc_ucontext(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_alloc_ucontext_cmd *cmd;
++ struct ibp_alloc_ucontext_resp *resp;
++ struct ibp_ucontext *ucontext;
++ struct ib_ucontext *ibucontext;
++ struct ib_udata udata;
++ size_t len;
++ size_t outlen;
++ int ret;
++
++ print_trace("in\n");
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_alloc_ucontext_cmd *) hdr;
++ msg = (struct ibp_verb_response_msg *) tx_buf;
++ resp = (struct ibp_alloc_ucontext_resp *) msg->data;
++ len = hdr->length - sizeof(*cmd);
++ outlen = MAX_MSG_SIZE - sizeof(*msg) - sizeof(*resp);
++
++ INIT_UDATA(&udata, cmd->data, resp->data, len, outlen);
++
++ len = sizeof(*msg);
++
++ ret = ibp_get_device(device);
++ if (ret) {
++ print_err("ibp_get_device returned %d\n", ret);
++ goto send_resp;
++ }
++
++ ucontext = kzalloc(sizeof(*ucontext), GFP_KERNEL);
++ if (!ucontext) {
++ print_err("kzalloc failed\n");
++ ret = -ENOMEM;
++ goto err1;
++ }
++ ucontext->device = device;
++
++ ibucontext = device->ib_dev->alloc_ucontext(device->ib_dev, &udata);
++ if (IS_ERR(ibucontext)) {
++ ret = PTR_ERR(ibucontext);
++ print_err("Invalid ibucontext %p\n", ibucontext);
++ goto err2;
++ }
++
++ ibucontext->umem_ops = &ibp_umem;
++ ibucontext->umem_private_data = ucontext;
++ ibucontext->device = device->ib_dev;
++ ibucontext->closing = 0;
++
++ INIT_LIST_HEAD(&ibucontext->pd_list);
++ INIT_LIST_HEAD(&ibucontext->mr_list);
++ INIT_LIST_HEAD(&ibucontext->mw_list);
++ INIT_LIST_HEAD(&ibucontext->cq_list);
++ INIT_LIST_HEAD(&ibucontext->qp_list);
++ INIT_LIST_HEAD(&ibucontext->srq_list);
++ INIT_LIST_HEAD(&ibucontext->ah_list);
++ INIT_LIST_HEAD(&ibucontext->xrcd_list);
++
++ ucontext->filp = anon_inode_getfile("["DRV_NAME"]", &ibp_fops,
++ ucontext, O_RDWR);
++ if (IS_ERR(ucontext->filp)) {
++ ret = PTR_ERR(ucontext->filp);
++ print_err("anon_inode_getfile returned %d\n", ret);
++ goto err3;
++ }
++
++ if (cmd->ibdev) {
++ ucontext->ibdev = cmd->ibdev;
++ INIT_IB_EVENT_HANDLER(&ucontext->event_handler, device->ib_dev,
++ ibp_event_handler);
++ ret = ib_register_event_handler(&ucontext->event_handler);
++ if (ret) {
++ print_err("event_handler returned %d\n", ret);
++ goto err4;
++ }
++ }
++
++ ucontext->client = client;
++ ucontext->ibucontext = ibucontext;
++ mutex_init(&ucontext->mutex);
++ INIT_LIST_HEAD(&ucontext->mmap_list);
++ ucontext->reg_tree = RB_ROOT;
++
++ mutex_lock(&client->ucontext_mutex);
++ list_add_tail(&ucontext->list, &client->ucontext_list);
++ mutex_unlock(&client->ucontext_mutex);
++
++ len += sizeof(*resp);
++ len += outlen - udata.outlen; /* add driver private data */
++
++ resp->ucontext = (uintptr_t)ucontext;
++
++ goto send_resp;
++
++err4:
++ fput(ucontext->filp);
++err3:
++ device->ib_dev->dealloc_ucontext(ibucontext);
++err2:
++ kfree(ucontext);
++err1:
++ ibp_put_device(device);
++
++send_resp:
++ IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++ return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_dealloc_ucontext(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_dealloc_ucontext_cmd *cmd;
++ struct ibp_queued_response_msg *msg;
++ struct ibp_ucontext *ucontext;
++ struct ib_ucontext *ibucontext;
++ size_t len;
++ int ret = -EINVAL;
++
++ print_trace("in\n");
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_dealloc_ucontext_cmd *) hdr;
++ ucontext = (struct ibp_ucontext *) cmd->ucontext;
++ msg = (struct ibp_queued_response_msg *) tx_buf;
++ len = sizeof(*msg);
++
++ if (IS_NULL_OR_ERR(ucontext)) {
++ print_err("Invalid ucontext %p\n", ucontext);
++ goto send_resp;
++ }
++
++ ibucontext = ucontext->ibucontext;
++
++ if (ucontext->ibdev)
++ ib_unregister_event_handler(&ucontext->event_handler);
++
++ fput(ucontext->filp);
++
++ if (device && device->ib_dev) {
++ ret = device->ib_dev->dealloc_ucontext(ibucontext);
++ if (ret) {
++ print_err("ib_dealloc_ucontext returned %d\n", ret);
++ goto send_resp;
++ }
++ }
++
++ mutex_lock(&client->ucontext_mutex);
++ list_del(&ucontext->list);
++ mutex_unlock(&client->ucontext_mutex);
++
++ ibp_put_device(device);
++ kfree(ucontext);
++
++send_resp:
++ IBP_INIT_RESP(device, msg, len, QUEUED_RESPONSE, hdr->request, ret);
++ return ibp_queue_response(client, msg);
++}
++
++static void ibp_dereg_buf(struct kref *ref)
++{
++ struct ibp_reg *reg;
++ struct ibp_ucontext *ucontext;
++
++ reg = container_of(ref, struct ibp_reg, ref);
++ ucontext = reg->ucontext;
++
++ if (!RB_EMPTY_NODE(®->node)) {
++ mutex_lock(&ucontext->mutex);
++ rb_erase(®->node, &ucontext->reg_tree);
++ mutex_unlock(&ucontext->mutex);
++ }
++
++ if (reg->range)
++ scif_put_pages(reg->range);
++
++ kfree(reg);
++}
++
++static struct ibp_reg *__ibp_insert_reg_buf(struct ibp_ucontext *ucontext,
++ struct ibp_reg *reg)
++{
++ struct rb_node **link;
++ struct rb_node *parent;
++ struct ibp_reg *cur_reg;
++
++ link = &ucontext->reg_tree.rb_node;
++ parent = NULL;
++
++ while (*link) {
++ parent = *link;
++ cur_reg = rb_entry(parent, struct ibp_reg, node);
++
++ if ((reg->virt_addr == cur_reg->virt_addr) &&
++ (reg->length == cur_reg->length) &&
++ (reg->access == cur_reg->access))
++ return cur_reg;
++
++ if (reg->virt_addr < cur_reg->virt_addr)
++ link = &(*link)->rb_left;
++ else if (reg->virt_addr > cur_reg->virt_addr)
++ link = &(*link)->rb_right;
++ else if (reg->length < cur_reg->length)
++ link = &(*link)->rb_left;
++ else if (reg->length > cur_reg->length)
++ link = &(*link)->rb_right;
++ else if (reg->access < cur_reg->access)
++ link = &(*link)->rb_left;
++ else
++ link = &(*link)->rb_right;
++ }
++
++ rb_link_node(®->node, parent, link);
++ rb_insert_color(®->node, &ucontext->reg_tree);
++
++ return NULL;
++}
++
++static struct ibp_reg *ibp_reg_buf(struct ibp_ucontext *ucontext,
++ u64 virt_addr, u64 scif_addr, u64 length,
++ u64 offset, u32 access)
++{
++ struct ibp_reg *reg, *cur_reg;
++ int ret;
++
++ reg = kzalloc(sizeof(*reg), GFP_KERNEL);
++ if (!reg) {
++ print_err("kzalloc failed\n");
++ return ERR_PTR(-ENOMEM);
++ }
++
++ kref_init(®->ref);
++ RB_CLEAR_NODE(®->node);
++ reg->ucontext = ucontext;
++ reg->virt_addr = virt_addr;
++ reg->length = length;
++ reg->offset = offset;
++ reg->access = access;
++
++ ret = scif_get_pages(ucontext->client->ep, scif_addr,
++ PAGE_ALIGN(reg->length +
++ (reg->virt_addr & ~PAGE_MASK)),
++ ®->range);
++ if (ret) {
++ print_err("scif_get_pages returned %d\n", ret);
++ kref_put(®->ref, ibp_dereg_buf);
++ return ERR_PTR(ret);
++ }
++
++ mutex_lock(&ucontext->mutex);
++
++ cur_reg = __ibp_insert_reg_buf(ucontext, reg);
++ if (cur_reg) {
++ print_dbg("__ibp_insert_reg_buf duplicate entry\n");
++ kref_get(&cur_reg->ref);
++ }
++
++ mutex_unlock(&ucontext->mutex);
++
++ if (cur_reg) {
++ kref_put(®->ref, ibp_dereg_buf);
++ reg = cur_reg;
++ }
++
++ return reg;
++}
++
++static int ibp_cmd_reg_buf(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_reg_buf_cmd *cmd;
++ struct ibp_reg_buf_resp *resp;
++ struct ibp_ucontext *ucontext;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_reg *reg;
++ size_t len;
++ int ret = 0;
++
++ print_trace("in\n");
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_reg_buf_cmd *) hdr;
++ ucontext = (struct ibp_ucontext *) cmd->ucontext;
++ msg = (struct ibp_verb_response_msg *) tx_buf;
++ len = sizeof(*msg);
++
++ reg = ibp_reg_buf(ucontext, cmd->virt_addr, cmd->scif_addr,
++ cmd->length, cmd->offset, cmd->access);
++ if (IS_ERR(reg)) {
++ ret = PTR_ERR(reg);
++ print_err("ibp_reg_buf returned %d\n", ret);
++ goto send_resp;
++ }
++
++ resp = (struct ibp_reg_buf_resp *) msg->data;
++ len += sizeof(*resp);
++
++ resp->reg = (uintptr_t)reg;
++
++send_resp:
++ IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++ return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_dereg_buf(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_dereg_buf_cmd *cmd;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_reg *reg;
++ size_t len;
++
++ print_trace("in\n");
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_dereg_buf_cmd *) hdr;
++ reg = (struct ibp_reg *) cmd->reg;
++ msg = (struct ibp_verb_response_msg *) tx_buf;
++ len = sizeof(*msg);
++
++ kref_put(®->ref, ibp_dereg_buf);
++
++ IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, 0);
++ return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_convert_prot_flags(unsigned long prot)
++{
++ int prot_flags;
++
++ prot_flags = 0;
++
++ if (prot & PROT_READ)
++ prot_flags |= SCIF_PROT_READ;
++
++ if (prot & PROT_WRITE)
++ prot_flags |= SCIF_PROT_WRITE;
++
++ return prot_flags;
++}
++
++static int ibp_convert_map_flags(unsigned long flags)
++{
++ int map_flags;
++
++ map_flags = SCIF_MAP_KERNEL;
++
++ if (flags & MAP_FIXED)
++ map_flags |= SCIF_MAP_FIXED;
++
++ return map_flags;
++}
++
++static int ibp_scif_register(struct ibp_client *client, struct ibp_mmap *mmap,
++ unsigned long flags)
++{
++ struct vm_area_struct *vma;
++ unsigned long npages;
++ unsigned long pfn;
++ int offset;
++ int ret;
++
++ print_trace("in\n");
++
++ offset = mmap->vaddr & ~PAGE_MASK;
++ npages = PAGE_ALIGN(mmap->len + offset) >> PAGE_SHIFT;
++ if (npages != 1) {
++ print_err("request %lu but only one page supported\n", npages);
++ return -EINVAL;
++ }
++
++ down_write(¤t->mm->mmap_sem);
++ vma = find_vma(current->mm, mmap->vaddr);
++ if (!vma) {
++ up_write(¤t->mm->mmap_sem);
++ print_err("find_vma failed\n");
++ return -EFAULT;
++ }
++
++ ret = follow_pfn(vma, mmap->vaddr, &pfn);
++
++ up_write(¤t->mm->mmap_sem);
++ if (ret) {
++ print_err("follow_pfn returned %d\n", ret);
++ return ret;
++ }
++
++ mmap->io_addr = ioremap(page_to_phys(pfn_to_page(pfn)), mmap->len);
++ if (!mmap->io_addr) {
++ print_err("ioremap failed\n");
++ return -ENOMEM;
++ }
++
++ mmap->scif_addr = scif_register(client->ep, (void *) mmap->io_addr,
++ mmap->len, (off_t) mmap->io_addr,
++ ibp_convert_prot_flags(mmap->prot),
++ ibp_convert_map_flags(flags));
++ if (IS_ERR_VALUE(mmap->scif_addr)) {
++ ret = mmap->scif_addr;
++ print_err("scif_register returned %d\n", ret);
++ goto err0;
++
++ }
++
++ return 0;
++err0:
++ iounmap(mmap->io_addr);
++ return ret;
++}
++
++static
++void ibp_scif_unregister(struct ibp_client *client, struct ibp_mmap *mmap)
++{
++ int ret;
++
++ print_trace("in\n");
++
++ ret = scif_unregister(client->ep, mmap->scif_addr, mmap->len);
++ if (ret) {
++ if (ret == -ECONNRESET)
++ print_dbg("scif connection reset\n");
++ else
++ print_err("scif_unregister returned %d\n", ret);
++ }
++
++ iounmap(mmap->io_addr);
++}
++
++static int ibp_cmd_mmap(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_mmap_cmd *cmd;
++ struct ibp_mmap_resp *resp;
++ struct ibp_ucontext *ucontext;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_mmap *mmap;
++ size_t len;
++ int ret;
++
++ print_trace("in\n");
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_mmap_cmd *) hdr;
++ ucontext = (struct ibp_ucontext *) cmd->ucontext;
++ msg = (struct ibp_verb_response_msg *) tx_buf;
++ len = sizeof(*msg);
++
++ mmap = kzalloc(sizeof(*mmap), GFP_KERNEL);
++ if (!mmap) {
++ print_err("kzalloc failed\n");
++ ret = -ENOMEM;
++ goto send_resp;
++ }
++ mmap->ucontext = ucontext;
++ mmap->len = cmd->len;
++ mmap->prot = cmd->prot;
++
++ /* The mmap syscall ignores these bits; do the same here. */
++ cmd->flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
++
++ mmap->vaddr = vm_mmap(ucontext->filp, 0, cmd->len, cmd->prot,
++ cmd->flags, cmd->pgoff << PAGE_SHIFT);
++ if (mmap->vaddr & ~PAGE_MASK) {
++ ret = mmap->vaddr;
++ print_err("mmap returned %d\n", ret);
++ goto err1;
++ }
++
++ ret = ibp_scif_register(client, mmap, cmd->flags);
++ if (ret) {
++ print_err("ibp_scif_register returned %d\n", ret);
++ goto err2;
++ }
++
++ mutex_lock(&ucontext->mutex);
++ list_add_tail(&mmap->list, &ucontext->mmap_list);
++ mutex_unlock(&ucontext->mutex);
++
++ resp = (struct ibp_mmap_resp *) msg->data;
++ len += sizeof(*resp);
++
++ resp->scif_addr = mmap->scif_addr;
++ resp->mmap = (uintptr_t)mmap;
++
++ goto send_resp;
++err2:
++ vm_munmap(mmap->vaddr, cmd->len);
++err1:
++ kfree(mmap);
++
++send_resp:
++ IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++ return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_unmmap(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_unmmap_cmd *cmd;
++ struct ibp_mmap *mmap;
++ struct ibp_verb_response_msg *msg;
++ size_t len;
++ int ret = 0;
++
++ print_trace("in\n");
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_unmmap_cmd *) hdr;
++ mmap = (struct ibp_mmap *) cmd->mmap;
++ msg = (struct ibp_verb_response_msg *) tx_buf;
++ len = sizeof(*msg);
++
++ if (IS_NULL_OR_ERR(mmap)) {
++ print_err("Invalid mmap %p\n", mmap);
++ ret = -EINVAL;
++ goto send_resp;
++ }
++
++ ibp_scif_unregister(client, mmap);
++
++ if (IS_NULL_OR_ERR(current) || IS_NULL_OR_ERR(current->mm)) {
++ print_err("Invalid current mm pointer\n");
++ ret = -EINVAL;
++ goto send_resp;
++ }
++
++ vm_munmap(mmap->vaddr, mmap->len);
++
++ if (mmap->ucontext) {
++ mutex_lock(&mmap->ucontext->mutex);
++ list_del(&mmap->list);
++ mutex_unlock(&mmap->ucontext->mutex);
++ }
++
++ kfree(mmap);
++send_resp:
++ IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++ return ibp_send(client->ep, msg, len);
++}
++
++static struct ib_uobject *ibp_create_uobj(struct ibp_ucontext *ucontext)
++{
++ static struct lock_class_key __key;
++ struct ib_uobject *uobj;
++
++ if (IS_NULL_OR_ERR(ucontext))
++ return ERR_PTR(-EINVAL);
++
++ uobj = kzalloc(sizeof(*uobj), GFP_KERNEL);
++ if (!uobj)
++ return ERR_PTR(-ENOMEM);
++
++ /*
++ * the uobj struct is updated since this is kernel-to-kernel,
++ * so this structure is not fully setup as in ib_uverbs.
++ */
++ uobj->context = ucontext->ibucontext;
++ uobj->user_handle = (uintptr_t)ucontext;
++ kref_init(&uobj->ref);
++ init_rwsem(&uobj->mutex);
++ INIT_LIST_HEAD(&uobj->list);
++ lockdep_set_class(&uobj->mutex, &__key);
++ uobj->live = 1;
++
++ return uobj;
++}
++
++static void ibp_destroy_uobj(struct ib_uobject *uobj)
++{
++ struct ibp_ucontext *ucontext;
++
++ if (!IS_NULL_OR_ERR(uobj)) {
++ ucontext = (struct ibp_ucontext *) uobj->user_handle;
++
++ mutex_lock(&ucontext->mutex);
++ list_del(&uobj->list);
++ mutex_unlock(&ucontext->mutex);
++
++ kfree(uobj);
++ }
++}
++
++static int ibp_cmd_alloc_pd(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_alloc_pd_cmd *cmd;
++ struct ibp_alloc_pd_resp *resp;
++ struct ibp_ucontext *ucontext;
++ struct ib_uobject *uobj;
++ struct ib_udata udata;
++ struct ib_pd *pd;
++ size_t len;
++ size_t outlen;
++ int ret = 0;
++
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_alloc_pd_cmd *) hdr;
++ ucontext = (struct ibp_ucontext *) cmd->ucontext;
++ msg = (struct ibp_verb_response_msg *) tx_buf;
++ resp = (struct ibp_alloc_pd_resp *) msg->data;
++ len = hdr->length - sizeof(*cmd);
++ outlen = MAX_MSG_SIZE - sizeof(*msg) - sizeof(*resp);
++
++ INIT_UDATA(&udata, cmd->data, resp->data, len, outlen);
++
++ len = sizeof(*msg);
++
++ uobj = ibp_create_uobj(ucontext);
++ if (IS_ERR(uobj)) {
++ ret = PTR_ERR(uobj);
++ print_err("ibp_create_uobj returned %d\n", ret);
++ goto send_resp;
++ }
++
++ pd = device->ib_dev->alloc_pd(device->ib_dev, ucontext->ibucontext, &udata);
++ if (IS_ERR(pd)) {
++ ret = PTR_ERR(pd);
++ print_err("ib_alloc_pd returned %d\n", ret);
++ ibp_destroy_uobj(uobj);
++ goto send_resp;
++ }
++
++ pd->device = device->ib_dev;
++ pd->local_mr = NULL;
++ atomic_set(&pd->usecnt, 0);
++
++ pd->uobject = uobj;
++ uobj->object = pd;
++
++ if (device->ib_dev->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
++ pd->local_dma_lkey = device->ib_dev->local_dma_lkey;
++ else {
++ struct ib_mr *mr;
++
++ mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE);
++ if (IS_ERR(mr)) {
++ ret = PTR_ERR(mr);
++ print_err("ib_get_dma_mr returned %d\n", ret);
++ ib_dealloc_pd(pd);
++ ibp_destroy_uobj(uobj);
++ goto send_resp;
++ }
++
++ pd->local_mr = mr;
++ pd->local_dma_lkey = pd->local_mr->lkey;
++ }
++
++ mutex_lock(&ucontext->mutex);
++ list_add_tail(&uobj->list, &ucontext->ibucontext->pd_list);
++ mutex_unlock(&ucontext->mutex);
++
++ len += sizeof(*resp);
++ len += outlen - udata.outlen; /* add driver private data */
++
++ resp->pd = (uintptr_t)pd;
++
++send_resp:
++ IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++ return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_dealloc_pd(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_dealloc_pd_cmd *cmd;
++ struct ibp_verb_response_msg *msg;
++ struct ib_uobject *uobj;
++ struct ib_pd *pd;
++ size_t len;
++ int ret = 0;
++
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_dealloc_pd_cmd *) hdr;
++ pd = (struct ib_pd *) cmd->pd;
++ msg = (struct ibp_verb_response_msg *) tx_buf;
++ len = sizeof(*msg);
++
++ if (IS_NULL_OR_ERR(pd)) {
++ print_err("Invalid pd %p\n", pd);
++ ret = -EINVAL;
++ goto send_resp;
++ }
++ print_trace("pd=%p, pd.device=%p\n", pd, pd->device);
++
++ uobj = pd->uobject;
++
++ ib_dealloc_pd(pd);
++
++ ibp_destroy_uobj(uobj);
++
++send_resp:
++ IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++ return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_create_ah(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_create_ah_cmd *cmd;
++ struct ibp_create_ah_resp *resp;
++ struct ibp_ucontext *ucontext;
++ struct ib_uobject *uobj;
++ struct ib_pd *pd;
++ struct ib_ah *ah;
++ struct ib_ah_attr attr;
++ size_t len;
++ int ret = 0;
++
++ print_trace("in\n");
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_create_ah_cmd *) hdr;
++ pd = (struct ib_pd *) cmd->pd;
++ msg = (struct ibp_verb_response_msg *) tx_buf;
++ len = sizeof(*msg);
++
++ ucontext = (struct ibp_ucontext *) pd->uobject->user_handle;
++
++ uobj = ibp_create_uobj(ucontext);
++ if (IS_ERR(uobj)) {
++ ret = PTR_ERR(uobj);
++ print_err("ibp_create_uobj returned %d\n", ret);
++ goto send_resp;
++ }
++
++ memset(&attr, 0, sizeof(attr));
++
++ attr.dlid = cmd->ah_attr.dlid;
++ attr.sl = cmd->ah_attr.sl;
++ attr.src_path_bits = cmd->ah_attr.src_path_bits;
++ attr.static_rate = cmd->ah_attr.static_rate;
++ attr.ah_flags = cmd->ah_attr.ah_flags;
++ attr.port_num = cmd->ah_attr.port_num;
++ attr.grh.dgid.global.subnet_prefix =
++ cmd->ah_attr.grh.dgid_subnet_prefix;
++ attr.grh.dgid.global.interface_id = cmd->ah_attr.grh.dgid_interface_id;
++ attr.grh.flow_label = cmd->ah_attr.grh.flow_label;
++ attr.grh.sgid_index = cmd->ah_attr.grh.sgid_index;
++ attr.grh.hop_limit = cmd->ah_attr.grh.hop_limit;
++ attr.grh.traffic_class = cmd->ah_attr.grh.traffic_class;
++
++ ah = ib_create_ah(pd, &attr);
++ if (IS_ERR(ah)) {
++ ret = PTR_ERR(ah);
++ print_err("ib_create_ah returned %d\n", ret);
++ ibp_destroy_uobj(uobj);
++ goto send_resp;
++ }
++
++ ah->uobject = uobj;
++ uobj->object = ah;
++
++ mutex_lock(&ucontext->mutex);
++ list_add_tail(&uobj->list, &ucontext->ibucontext->ah_list);
++ mutex_unlock(&ucontext->mutex);
++
++ resp = (struct ibp_create_ah_resp *) msg->data;
++ len += sizeof(*resp);
++
++ resp->ah = (uintptr_t) ah;
++
++send_resp:
++ IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++ return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_query_ah(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_query_ah_cmd *cmd;
++ struct ibp_query_ah_resp *resp;
++ struct ibp_verb_response_msg *msg;
++ struct ib_ah *ah;
++ struct ib_ah_attr attr;
++ size_t len;
++ int ret;
++
++ print_trace("in\n");
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_query_ah_cmd *) hdr;
++ ah = (struct ib_ah *) cmd->ah;
++ msg = (struct ibp_verb_response_msg *) tx_buf;
++ len = sizeof(*msg);
++
++ ret = ib_query_ah(ah, &attr);
++ if (ret) {
++ print_err("ib_query_ah returned %d\n", ret);
++ goto send_resp;
++ }
++
++ resp = (struct ibp_query_ah_resp *) msg->data;
++ len += sizeof(*resp);
++
++ resp->attr.dlid = attr.dlid;
++ resp->attr.sl = attr.sl;
++ resp->attr.src_path_bits = attr.src_path_bits;
++ resp->attr.static_rate = attr.static_rate;
++ resp->attr.ah_flags = attr.ah_flags;
++ resp->attr.port_num = attr.port_num;
++ resp->attr.grh.dgid_subnet_prefix = attr.grh.dgid.global.subnet_prefix;
++ resp->attr.grh.dgid_interface_id = attr.grh.dgid.global.interface_id;
++ resp->attr.grh.flow_label = attr.grh.flow_label;
++ resp->attr.grh.sgid_index = attr.grh.sgid_index;
++ resp->attr.grh.hop_limit = attr.grh.hop_limit;
++ resp->attr.grh.traffic_class = attr.grh.traffic_class;
++
++send_resp:
++ IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++ return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_destroy_ah(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_destroy_ah_cmd *cmd;
++ struct ib_uobject *uobj;
++ struct ib_ah *ah;
++ size_t len;
++ int ret;
++
++ print_trace("in\n");
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_destroy_ah_cmd *) hdr;
++ msg = (struct ibp_verb_response_msg *) tx_buf;
++ ah = (struct ib_ah *) cmd->ah;
++ len = sizeof(*msg);
++
++ uobj = ah->uobject;
++
++ ret = ib_destroy_ah(ah);
++ if (ret) {
++ print_err("ib_destroy_ah returned %d\n", ret);
++ goto send_resp;
++ }
++
++ ibp_destroy_uobj(uobj);
++
++send_resp:
++ IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++ return ibp_send(client->ep, msg, len);
++}
++
++static void ibp_ibsrq_event(struct ib_event *ibevent, void *srq_context)
++{
++ struct ibp_ucontext *ucontext;
++ struct ibp_client *client;
++ struct ibp_event *event;
++ struct ib_uobject *uobj;
++
++ print_trace("in\n");
++
++ event = kmalloc(sizeof(*event), GFP_ATOMIC);
++ if (!event) {
++ print_err("kmalloc failed\n");
++ return;
++ }
++
++ uobj = ibevent->element.srq->uobject;
++ ucontext = (struct ibp_ucontext *) uobj->user_handle;
++ client = ucontext->client;
++
++ event->client = client;
++ event->context = (uintptr_t) srq_context;
++ event->type = ibevent->event;
++ event->ibdev = ucontext->ibdev;
++
++ INIT_WORK(&event->work, ibp_async_event);
++ queue_work(client->workqueue, &event->work);
++}
++
++static int ibp_cmd_create_srq(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_create_srq_cmd *cmd;
++ struct ibp_create_srq_resp *resp;
++ struct ibp_ucontext *ucontext;
++ struct ib_uobject *uobj;
++ struct ib_pd *pd;
++ struct ib_srq *srq;
++ struct ib_srq_init_attr init_attr;
++ struct ib_udata udata;
++ size_t len;
++ size_t outlen;
++ int ret = 0;
++
++ print_trace("in\n");
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_create_srq_cmd *) hdr;
++ pd = (struct ib_pd *) cmd->pd;
++ msg = (struct ibp_verb_response_msg *) tx_buf;
++ resp = (struct ibp_create_srq_resp *) msg->data;
++ len = hdr->length - sizeof(*cmd);
++ outlen = MAX_MSG_SIZE - sizeof(*msg) - sizeof(*resp);
++
++ INIT_UDATA(&udata, cmd->data, resp->data, len, outlen);
++
++ len = sizeof(*msg);
++
++ ucontext = (struct ibp_ucontext *) pd->uobject->user_handle;
++
++ uobj = ibp_create_uobj(ucontext);
++ if (IS_ERR(uobj)) {
++ ret = PTR_ERR(uobj);
++ print_err("ibp_create_uobj returned %d\n", ret);
++ goto send_resp;
++ }
++
++ memset(&init_attr, 0, sizeof(init_attr));
++
++ init_attr.event_handler = ibp_ibsrq_event;
++ init_attr.srq_context = (void *) cmd->srq_context;
++ init_attr.attr.max_wr = cmd->attr.max_wr;
++ init_attr.attr.max_sge = cmd->attr.max_sge;
++ init_attr.attr.srq_limit = cmd->attr.srq_limit;
++
++ srq = device->ib_dev->create_srq(pd, &init_attr, &udata);
++ if (IS_ERR(srq)) {
++ ret = PTR_ERR(srq);
++ print_err("ib_create_srq returned %d\n", ret);
++ ibp_destroy_uobj(uobj);
++ goto send_resp;
++ }
++
++ srq->device = device->ib_dev;
++ srq->pd = pd;
++ srq->event_handler = init_attr.event_handler;
++ srq->srq_context = init_attr.srq_context;
++ srq->srq_type = 0;
++ srq->ext.xrc.cq = NULL;
++ srq->ext.xrc.xrcd = NULL;
++
++ atomic_inc(&pd->usecnt);
++ atomic_set(&srq->usecnt, 0);
++
++ srq->uobject = uobj;
++ uobj->object = srq;
++
++ mutex_lock(&ucontext->mutex);
++ list_add_tail(&uobj->list, &ucontext->ibucontext->srq_list);
++ mutex_unlock(&ucontext->mutex);
++
++ len += sizeof(*resp);
++ len += outlen - udata.outlen; /* add driver private data */
++
++ resp->srq = (uintptr_t)srq;
++ resp->attr.max_wr = init_attr.attr.max_wr;
++ resp->attr.max_sge = init_attr.attr.max_sge;
++ resp->attr.srq_limit = init_attr.attr.srq_limit;
++
++send_resp:
++ IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++ return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_modify_srq(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_modify_srq_cmd *cmd;
++ struct ibp_modify_srq_resp *resp;
++ struct ib_srq *srq;
++ struct ib_srq_attr attr;
++ struct ib_udata udata;
++ size_t len;
++ size_t outlen;
++ int ret;
++
++ print_trace("in\n");
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_modify_srq_cmd *) hdr;
++ srq = (struct ib_srq *) cmd->srq;
++ msg = (struct ibp_verb_response_msg *) tx_buf;
++ resp = (struct ibp_modify_srq_resp *) msg->data;
++ len = hdr->length - sizeof(*cmd);
++ outlen = MAX_MSG_SIZE - sizeof(*msg) - sizeof(*resp);
++
++ INIT_UDATA(&udata, cmd->data, resp->data, len, outlen);
++
++ len = sizeof(*msg);
++
++ memset(&attr, 0, sizeof(attr));
++
++ attr.max_wr = cmd->attr.max_wr;
++ attr.max_sge = cmd->attr.max_sge;
++ attr.srq_limit = cmd->attr.srq_limit;
++
++ ret = device->ib_dev->modify_srq(srq, &attr, cmd->srq_attr_mask,
++ &udata);
++ if (ret) {
++ print_err("ib_modify_srq returned %d\n", ret);
++ goto send_resp;
++ }
++
++ len += sizeof(*resp);
++ len += outlen - udata.outlen; /* add driver private data */
++
++ resp->attr.max_wr = attr.max_wr;
++ resp->attr.max_sge = attr.max_sge;
++ resp->attr.srq_limit = attr.srq_limit;
++
++send_resp:
++ IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++ return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_query_srq(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_query_srq_cmd *cmd;
++ struct ibp_query_srq_resp *resp;
++ struct ib_srq *srq;
++ struct ib_srq_attr attr;
++ size_t len;
++ int ret;
++
++ print_trace("in\n");
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_query_srq_cmd *) hdr;
++ srq = (struct ib_srq *) cmd->srq;
++ msg = (struct ibp_verb_response_msg *) tx_buf;
++ len = sizeof(*msg);
++
++ ret = ib_query_srq(srq, &attr);
++ if (ret) {
++ print_err("ib_query_srq returned %d\n", ret);
++ goto send_resp;
++ }
++
++ resp = (struct ibp_query_srq_resp *) msg->data;
++ len += sizeof(*resp);
++
++ resp->attr.max_wr = attr.max_wr;
++ resp->attr.max_sge = attr.max_sge;
++ resp->attr.srq_limit = attr.srq_limit;
++
++send_resp:
++ IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++ return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_destroy_srq(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_queued_response_msg *msg;
++ struct ibp_destroy_srq_cmd *cmd;
++ struct ib_uobject *uobj;
++ struct ib_srq *srq;
++ size_t len;
++ int ret;
++
++ print_trace("in\n");
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_destroy_srq_cmd *) hdr;
++ srq = (struct ib_srq *) cmd->srq;
++ msg = (struct ibp_queued_response_msg *) tx_buf;
++ len = sizeof(*msg);
++
++ uobj = srq->uobject;
++
++ ret = ib_destroy_srq(srq);
++ if (ret) {
++ print_err("ib_destroy_srq returned %d\n", ret);
++ goto send_resp;
++ }
++
++ ibp_destroy_uobj(uobj);
++
++send_resp:
++ IBP_INIT_RESP(device, msg, len, QUEUED_RESPONSE, hdr->request, ret);
++ return ibp_queue_response(client, msg);
++}
++
++static void ibp_ibqp_event(struct ib_event *ibevent, void *qp_context)
++{
++ struct ibp_ucontext *ucontext;
++ struct ibp_client *client;
++ struct ibp_event *event;
++ struct ib_uobject *uobj;
++
++ event = kmalloc(sizeof(*event), GFP_ATOMIC);
++ if (!event) {
++ print_err("kmalloc failed\n");
++ return;
++ }
++
++ uobj = ibevent->element.qp->uobject;
++ ucontext = (struct ibp_ucontext *) uobj->user_handle;
++ client = ucontext->client;
++
++ event->client = client;
++ event->context = (uintptr_t) qp_context;
++ event->type = ibevent->event;
++ event->ibdev = ucontext->ibdev;
++
++ INIT_WORK(&event->work, ibp_async_event);
++ queue_work(client->workqueue, &event->work);
++}
++
++static int ibp_cmd_create_qp(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_create_qp_cmd *cmd;
++ struct ibp_create_qp_resp *resp;
++ struct ibp_ucontext *ucontext;
++ struct ib_uobject *uobj;
++ struct ib_pd *pd;
++ struct ibp_qp *qp;
++ struct ib_qp_init_attr init_attr;
++ struct ib_udata udata;
++ size_t len;
++ size_t outlen;
++ int ret = 0;
++
++ print_trace("in\n");
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_create_qp_cmd *) hdr;
++ pd = (struct ib_pd *) cmd->pd;
++ msg = (struct ibp_verb_response_msg *) tx_buf;
++ resp = (struct ibp_create_qp_resp *) msg->data;
++ len = hdr->length - sizeof(*cmd);
++ outlen = MAX_MSG_SIZE - sizeof(*msg) - sizeof(*resp);
++
++ INIT_UDATA(&udata, cmd->data, resp->data, len, outlen);
++
++ len = sizeof(*msg);
++
++ qp = kzalloc(sizeof *qp, GFP_KERNEL);
++ if (!qp) {
++ print_err("kzalloc failed\n");
++ ret = -ENOMEM;
++ goto send_resp;
++ }
++ INIT_LIST_HEAD(&qp->mcast);
++
++ ucontext = (struct ibp_ucontext *) pd->uobject->user_handle;
++
++ uobj = ibp_create_uobj(ucontext);
++ if (IS_ERR(uobj)) {
++ ret = PTR_ERR(uobj);
++ print_err("ibp_create_uobj returned %d\n", ret);
++ goto send_resp;
++ }
++
++ memset(&init_attr, 0, sizeof(init_attr));
++
++ init_attr.send_cq = (struct ib_cq *) cmd->send_cq;
++ init_attr.recv_cq = (struct ib_cq *) cmd->recv_cq;
++ init_attr.srq = (struct ib_srq *) cmd->srq;
++ init_attr.xrcd = (struct ib_xrcd *) cmd->xrc_domain;
++ init_attr.cap.max_send_wr = cmd->cap.max_send_wr;
++ init_attr.cap.max_recv_wr = cmd->cap.max_recv_wr;
++ init_attr.cap.max_send_sge = cmd->cap.max_send_sge;
++ init_attr.cap.max_recv_sge = cmd->cap.max_recv_sge;
++ init_attr.cap.max_inline_data = cmd->cap.max_inline_data;
++ init_attr.sq_sig_type = cmd->sq_sig_type;
++ init_attr.qp_type = cmd->qp_type;
++ init_attr.create_flags = cmd->create_flags;
++ init_attr.port_num = cmd->port_num;
++
++ qp->ibqp = device->ib_dev->create_qp(pd, &init_attr, &udata);
++ if (IS_ERR(qp->ibqp)) {
++ ret = PTR_ERR(qp->ibqp);
++ print_err("ib_create_qp returned %d\n", ret);
++ ibp_destroy_uobj(uobj);
++ goto send_resp;
++ }
++
++ qp->ibqp->device = device->ib_dev;
++ qp->ibqp->pd = pd;
++ qp->ibqp->send_cq = init_attr.send_cq;
++ qp->ibqp->recv_cq = init_attr.recv_cq;
++ qp->ibqp->srq = init_attr.srq;
++ qp->ibqp->event_handler = ibp_ibqp_event;
++ qp->ibqp->qp_context = (void *) cmd->qp_context;
++ qp->ibqp->qp_type = init_attr.qp_type;
++
++ if (qp->ibqp->qp_type == IB_QPT_XRC_TGT) {
++ qp->ibqp->xrcd = init_attr.xrcd;
++ atomic_inc(&qp->ibqp->xrcd->usecnt);
++ } else {
++ qp->ibqp->xrcd = NULL;
++ qp->ibqp->real_qp = qp->ibqp;
++ }
++ atomic_set(&qp->ibqp->usecnt, 0);
++
++ atomic_inc(&pd->usecnt);
++ atomic_inc(&init_attr.send_cq->usecnt);
++ atomic_inc(&init_attr.recv_cq->usecnt);
++
++ if (init_attr.srq)
++ atomic_inc(&init_attr.srq->usecnt);
++
++ qp->ibqp->uobject = uobj;
++ uobj->object = qp;
++
++ mutex_lock(&ucontext->mutex);
++ list_add_tail(&uobj->list, &ucontext->ibucontext->qp_list);
++ mutex_unlock(&ucontext->mutex);
++
++ len += sizeof(*resp);
++ len += outlen - udata.outlen; /* add driver private data */
++
++ resp->qp = (uintptr_t) qp;
++ resp->qpn = qp->ibqp->qp_num;
++ resp->cap.max_send_wr = init_attr.cap.max_send_wr;
++ resp->cap.max_recv_wr = init_attr.cap.max_recv_wr;
++ resp->cap.max_send_sge = init_attr.cap.max_send_sge;
++ resp->cap.max_recv_sge = init_attr.cap.max_recv_sge;
++ resp->cap.max_inline_data = init_attr.cap.max_inline_data;
++
++send_resp:
++ if (ret)
++ kfree(qp);
++
++ IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++ return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_modify_qp(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_modify_qp_cmd *cmd;
++ struct ibp_modify_qp_resp *resp;
++ struct ibp_qp *qp;
++ struct ib_qp_attr attr;
++ struct ib_udata udata;
++ size_t len;
++ size_t outlen;
++ int ret;
++
++ print_trace("in\n");
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_modify_qp_cmd *) hdr;
++ qp = (struct ibp_qp *) cmd->qp;
++ msg = (struct ibp_verb_response_msg *) tx_buf;
++ resp = (struct ibp_modify_qp_resp *) msg->data;
++ len = hdr->length - sizeof(*cmd);
++ outlen = MAX_MSG_SIZE - sizeof(*msg) - sizeof(*resp);
++
++ INIT_UDATA(&udata, cmd->data, resp->data, len, outlen);
++
++ len = sizeof(*msg);
++
++ memset(&attr, 0, sizeof(attr));
++
++ attr.qp_state = cmd->qp_state;
++ attr.cur_qp_state = cmd->cur_qp_state;
++ attr.path_mtu = cmd->path_mtu;
++ attr.path_mig_state = cmd->path_mig_state;
++ attr.qkey = cmd->qkey;
++ attr.rq_psn = cmd->rq_psn;
++ attr.sq_psn = cmd->sq_psn;
++ attr.dest_qp_num = cmd->dest_qp_num;
++ attr.qp_access_flags = cmd->qp_access_flags;
++ attr.cap.max_send_wr = cmd->cap.max_send_wr;
++ attr.cap.max_recv_wr = cmd->cap.max_recv_wr;
++ attr.cap.max_send_sge = cmd->cap.max_send_sge;
++ attr.cap.max_recv_sge = cmd->cap.max_recv_sge;
++ attr.cap.max_inline_data = cmd->cap.max_inline_data;
++ attr.ah_attr.grh.dgid.global.subnet_prefix =
++ cmd->ah.grh.dgid_subnet_prefix;
++ attr.ah_attr.grh.dgid.global.interface_id =
++ cmd->ah.grh.dgid_interface_id;
++ attr.ah_attr.grh.flow_label = cmd->ah.grh.flow_label;
++ attr.ah_attr.grh.sgid_index = cmd->ah.grh.sgid_index;
++ attr.ah_attr.grh.hop_limit = cmd->ah.grh.hop_limit;
++ attr.ah_attr.grh.traffic_class = cmd->ah.grh.traffic_class;
++ attr.ah_attr.dlid = cmd->ah.dlid;
++ attr.ah_attr.sl = cmd->ah.sl;
++ attr.ah_attr.src_path_bits = cmd->ah.src_path_bits;
++ attr.ah_attr.static_rate = cmd->ah.static_rate;
++ attr.ah_attr.ah_flags = cmd->ah.ah_flags;
++ attr.ah_attr.port_num = cmd->ah.port_num;
++ attr.alt_ah_attr.grh.dgid.global.subnet_prefix =
++ cmd->alt_ah.grh.dgid_subnet_prefix;
++ attr.alt_ah_attr.grh.dgid.global.interface_id =
++ cmd->alt_ah.grh.dgid_interface_id;
++ attr.alt_ah_attr.grh.flow_label = cmd->alt_ah.grh.flow_label;
++ attr.alt_ah_attr.grh.sgid_index = cmd->alt_ah.grh.sgid_index;
++ attr.alt_ah_attr.grh.hop_limit = cmd->alt_ah.grh.hop_limit;
++ attr.alt_ah_attr.grh.traffic_class = cmd->alt_ah.grh.traffic_class;
++ attr.alt_ah_attr.dlid = cmd->alt_ah.dlid;
++ attr.alt_ah_attr.sl = cmd->alt_ah.sl;
++ attr.alt_ah_attr.src_path_bits = cmd->alt_ah.src_path_bits;
++ attr.alt_ah_attr.static_rate = cmd->alt_ah.static_rate;
++ attr.alt_ah_attr.ah_flags = cmd->alt_ah.ah_flags;
++ attr.alt_ah_attr.port_num = cmd->alt_ah.port_num;
++ attr.pkey_index = cmd->pkey_index;
++ attr.alt_pkey_index = cmd->alt_pkey_index;
++ attr.en_sqd_async_notify = cmd->en_sqd_async_notify;
++ attr.sq_draining = cmd->sq_draining;
++ attr.max_rd_atomic = cmd->max_rd_atomic;
++ attr.max_dest_rd_atomic = cmd->max_dest_rd_atomic;
++ attr.min_rnr_timer = cmd->min_rnr_timer;
++ attr.port_num = cmd->port_num;
++ attr.timeout = cmd->timeout;
++ attr.retry_cnt = cmd->retry_cnt;
++ attr.rnr_retry = cmd->rnr_retry;
++ attr.alt_port_num = cmd->alt_port_num;
++ attr.alt_timeout = cmd->alt_timeout;
++
++ ret = device->ib_dev->modify_qp(qp->ibqp, &attr, cmd->qp_attr_mask, &udata);
++ if (ret) {
++ print_err("ib_modify_qp returned %d\n", ret);
++ goto send_resp;
++ }
++
++ len += sizeof(*resp);
++ len += outlen - udata.outlen; /* add driver private data */
++
++ resp->cap.max_send_wr = attr.cap.max_send_wr;
++ resp->cap.max_recv_wr = attr.cap.max_recv_wr;
++ resp->cap.max_send_sge = attr.cap.max_send_sge;
++ resp->cap.max_recv_sge = attr.cap.max_recv_sge;
++ resp->cap.max_inline_data = attr.cap.max_inline_data;
++
++send_resp:
++ IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++ return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_query_qp(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_query_qp_cmd *cmd;
++ struct ibp_query_qp_resp *resp;
++ struct ibp_qp *qp;
++ struct ib_qp_attr qp_attr;
++ struct ib_qp_init_attr qp_init_attr;
++ size_t len;
++ int ret;
++
++ print_trace("in\n");
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_query_qp_cmd *) hdr;
++ qp = (struct ibp_qp *) cmd->qp;
++ msg = (struct ibp_verb_response_msg *) tx_buf;
++ len = sizeof(*msg);
++
++ ret = ib_query_qp(qp->ibqp, &qp_attr, cmd->qp_attr_mask, &qp_init_attr);
++ if (ret) {
++ print_err("ib_query_qp returned %d\n", ret);
++ goto send_resp;
++ }
++
++ resp = (struct ibp_query_qp_resp *) msg->data;
++ len += sizeof(*resp);
++
++ resp->qp_state = qp_attr.qp_state;
++ resp->cur_qp_state = qp_attr.cur_qp_state;
++ resp->path_mtu = qp_attr.path_mtu;
++ resp->path_mig_state = qp_attr.path_mig_state;
++ resp->qkey = qp_attr.qkey;
++ resp->rq_psn = qp_attr.rq_psn;
++ resp->sq_psn = qp_attr.sq_psn;
++ resp->dest_qp_num = qp_attr.dest_qp_num;
++ resp->qp_access_flags = qp_attr.qp_access_flags;
++
++ resp->init_cap.max_send_wr = qp_init_attr.cap.max_send_wr;
++ resp->init_cap.max_recv_wr = qp_init_attr.cap.max_recv_wr;
++ resp->init_cap.max_send_sge = qp_init_attr.cap.max_send_sge;
++ resp->init_cap.max_recv_sge = qp_init_attr.cap.max_recv_sge;
++ resp->init_cap.max_inline_data = qp_init_attr.cap.max_inline_data;
++ resp->init_create_flags = qp_init_attr.create_flags;
++ resp->init_sq_sig_type = qp_init_attr.sq_sig_type;
++
++ resp->cap.max_send_wr = qp_attr.cap.max_send_wr;
++ resp->cap.max_recv_wr = qp_attr.cap.max_recv_wr;
++ resp->cap.max_send_sge = qp_attr.cap.max_send_sge;
++ resp->cap.max_recv_sge = qp_attr.cap.max_recv_sge;
++ resp->cap.max_inline_data = qp_attr.cap.max_inline_data;
++
++ resp->ah.grh.dgid_subnet_prefix =
++ qp_attr.ah_attr.grh.dgid.global.subnet_prefix;
++ resp->ah.grh.dgid_interface_id =
++ qp_attr.ah_attr.grh.dgid.global.interface_id;
++ resp->ah.grh.flow_label = qp_attr.ah_attr.grh.flow_label;
++ resp->ah.grh.sgid_index = qp_attr.ah_attr.grh.sgid_index;
++ resp->ah.grh.hop_limit = qp_attr.ah_attr.grh.hop_limit;
++ resp->ah.grh.traffic_class = qp_attr.ah_attr.grh.traffic_class;
++ resp->ah.dlid = qp_attr.ah_attr.dlid;
++ resp->ah.sl = qp_attr.ah_attr.sl;
++ resp->ah.src_path_bits = qp_attr.ah_attr.src_path_bits;
++ resp->ah.static_rate = qp_attr.ah_attr.static_rate;
++ resp->ah.ah_flags = qp_attr.ah_attr.ah_flags;
++ resp->ah.port_num = qp_attr.ah_attr.port_num;
++
++ resp->alt_ah.grh.dgid_subnet_prefix =
++ qp_attr.alt_ah_attr.grh.dgid.global.subnet_prefix;
++ resp->alt_ah.grh.dgid_interface_id =
++ qp_attr.alt_ah_attr.grh.dgid.global.interface_id;
++ resp->alt_ah.grh.flow_label = qp_attr.alt_ah_attr.grh.flow_label;
++ resp->alt_ah.grh.sgid_index = qp_attr.alt_ah_attr.grh.sgid_index;
++ resp->alt_ah.grh.hop_limit = qp_attr.alt_ah_attr.grh.hop_limit;
++ resp->alt_ah.grh.traffic_class = qp_attr.alt_ah_attr.grh.traffic_class;
++ resp->alt_ah.dlid = qp_attr.alt_ah_attr.dlid;
++ resp->alt_ah.sl = qp_attr.alt_ah_attr.sl;
++ resp->alt_ah.src_path_bits = qp_attr.alt_ah_attr.src_path_bits;
++ resp->alt_ah.static_rate = qp_attr.alt_ah_attr.static_rate;
++ resp->alt_ah.ah_flags = qp_attr.alt_ah_attr.ah_flags;
++ resp->alt_ah.port_num = qp_attr.alt_ah_attr.port_num;
++
++ resp->pkey_index = qp_attr.pkey_index;
++ resp->alt_pkey_index = qp_attr.alt_pkey_index;
++ resp->en_sqd_async_notify = qp_attr.en_sqd_async_notify;
++ resp->sq_draining = qp_attr.sq_draining;
++ resp->max_rd_atomic = qp_attr.max_rd_atomic;
++ resp->max_dest_rd_atomic = qp_attr.max_dest_rd_atomic;
++ resp->min_rnr_timer = qp_attr.min_rnr_timer;
++ resp->port_num = qp_attr.port_num;
++ resp->timeout = qp_attr.timeout;
++ resp->retry_cnt = qp_attr.retry_cnt;
++ resp->rnr_retry = qp_attr.rnr_retry;
++ resp->alt_port_num = qp_attr.alt_port_num;
++ resp->alt_timeout = qp_attr.alt_timeout;
++
++send_resp:
++ IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++ return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_destroy_qp(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_queued_response_msg *msg;
++ struct ibp_destroy_qp_cmd *cmd;
++ struct ib_uobject *uobj;
++ struct ibp_qp *qp;
++ size_t len;
++ int ret;
++
++ print_trace("in\n");
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_destroy_qp_cmd *) hdr;
++ qp = (struct ibp_qp *) cmd->qp;
++ msg = (struct ibp_queued_response_msg *) tx_buf;
++ len = sizeof(*msg);
++
++ uobj = qp->ibqp->uobject;
++
++ ret = ib_destroy_qp(qp->ibqp);
++ if (ret) {
++ print_err("ib_destroy_qp returned %d\n", ret);
++ goto send_resp;
++ }
++
++ ibp_destroy_uobj(uobj);
++
++ kfree(qp);
++
++send_resp:
++ IBP_INIT_RESP(device, msg, len, QUEUED_RESPONSE, hdr->request, ret);
++ return ibp_queue_response(client, msg);
++}
++
++static void ibp_ibcq_event(struct ib_event *ibevent, void *cq_context)
++{
++ struct ibp_ucontext *ucontext;
++ struct ibp_client *client;
++ struct ibp_event *event;
++ struct ib_uobject *uobj;
++
++ event = kmalloc(sizeof(*event), GFP_ATOMIC);
++ if (!event) {
++ print_err("kmalloc failed\n");
++ return;
++ }
++
++ uobj = (struct ib_uobject *) ibevent->element.cq->uobject;
++ ucontext = (void *) uobj->user_handle;
++ client = ucontext->client;
++
++ event->client = client;
++ event->context = (uintptr_t) cq_context;
++ event->type = ibevent->event;
++ event->ibdev = ucontext->ibdev;
++
++ INIT_WORK(&event->work, ibp_async_event);
++ queue_work(client->workqueue, &event->work);
++}
++
++static void ibp_cq_comp(struct work_struct *work)
++{
++ struct ibp_comp *comp;
++ struct ibp_cq_comp_msg msg;
++
++ comp = container_of(work, struct ibp_comp, work);
++
++ IBP_INIT_MSG(NULL, &msg, sizeof(msg), CQ_COMP);
++
++ msg.data.cq_context = (uintptr_t) comp->cq_context;
++
++ ibp_send(comp->client->ep, &msg, sizeof(msg));
++
++ kfree(comp);
++}
++
++static void ibp_ibcq_comp(struct ib_cq *ibcq, void *cq_context)
++{
++ struct ibp_ucontext *ucontext;
++ struct ibp_client *client;
++ struct ibp_comp *comp;
++
++ ucontext = (struct ibp_ucontext *) ibcq->uobject->user_handle;
++
++ if (ucontext->ibucontext->closing) {
++ print_dbg("ignoring cq completion, connection closing\n");
++ return;
++ }
++
++ comp = kmalloc(sizeof(*comp), GFP_ATOMIC);
++ if (!comp) {
++ print_err("kmalloc failed\n");
++ return;
++ }
++
++ client = ucontext->client;
++
++ comp->client = client;
++ comp->cq_context = cq_context;
++
++ INIT_WORK(&comp->work, ibp_cq_comp);
++ queue_work(client->workqueue, &comp->work);
++}
++
++static int ibp_cmd_create_cq(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_create_cq_cmd *cmd;
++ struct ibp_create_cq_resp *resp;
++ struct ibp_ucontext *ucontext;
++ struct ib_uobject *uobj;
++ struct ib_udata udata;
++ struct ib_cq *cq;
++ size_t len;
++ size_t outlen;
++ int ret = 0;
++ struct ib_cq_init_attr attr;
++
++ print_trace("in\n");
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_create_cq_cmd *) hdr;
++ ucontext = (struct ibp_ucontext *) cmd->ucontext;
++ msg = (struct ibp_verb_response_msg *) tx_buf;
++ resp = (struct ibp_create_cq_resp *) msg->data;
++ len = hdr->length - sizeof(*cmd);
++ outlen = MAX_MSG_SIZE - sizeof(*msg) - sizeof(*resp);
++
++ INIT_UDATA(&udata, cmd->data, resp->data, len, outlen);
++
++ len = sizeof(*msg);
++
++ uobj = ibp_create_uobj(ucontext);
++ if (IS_ERR(uobj)) {
++ ret = PTR_ERR(uobj);
++ print_err("ibp_create_uobj returned %d\n", ret);
++ goto send_resp;
++ }
++
++ memset(&attr, 0, sizeof(attr));
++ attr.cqe = cmd->cqe;
++ attr.comp_vector = cmd->vector;
++
++ cq = device->ib_dev->create_cq(device->ib_dev, &attr,
++ ucontext->ibucontext, &udata);
++ if (IS_ERR(cq)) {
++ ret = PTR_ERR(cq);
++ print_err("ib_create_cq returned %d\n", ret);
++ ibp_destroy_uobj(uobj);
++ goto send_resp;
++ }
++
++ cq->device = device->ib_dev;
++ cq->event_handler = ibp_ibcq_event;
++ cq->comp_handler = ibp_ibcq_comp;
++ cq->cq_context = (void *) cmd->cq_context;
++ atomic_set(&cq->usecnt, 0);
++
++ cq->uobject = uobj;
++ uobj->object = cq;
++
++ mutex_lock(&ucontext->mutex);
++ list_add_tail(&uobj->list, &ucontext->ibucontext->cq_list);
++ mutex_unlock(&ucontext->mutex);
++
++ len += sizeof(*resp);
++ len += outlen - udata.outlen; /* add driver private data */
++
++ resp->cq = (uintptr_t)cq;
++ resp->cqe = cq->cqe;
++
++send_resp:
++ IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++ return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_destroy_cq(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_queued_response_msg *msg;
++ struct ibp_destroy_cq_cmd *cmd;
++ struct ib_uobject *uobj;
++ struct ib_cq *cq;
++ size_t len;
++ int ret;
++
++ print_trace("in\n");
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_destroy_cq_cmd *) hdr;
++ cq = (struct ib_cq *) cmd->cq;
++ msg = (struct ibp_queued_response_msg *) tx_buf;
++ len = sizeof(*msg);
++
++ uobj = cq->uobject;
++
++ ret = ib_destroy_cq(cq);
++ if (ret) {
++ print_err("ib_destroy_cq returned %d\n", ret);
++ goto send_resp;
++ }
++
++ ibp_destroy_uobj(uobj);
++
++send_resp:
++ IBP_INIT_RESP(device, msg, len, QUEUED_RESPONSE, hdr->request, ret);
++ return ibp_queue_response(client, msg);
++}
++
++static int ibp_cmd_resize_cq(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_resize_cq_cmd *cmd;
++ struct ibp_resize_cq_resp *resp;
++ struct ib_cq *cq;
++ struct ib_udata udata;
++ size_t len;
++ size_t outlen;
++ int ret;
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_resize_cq_cmd *) hdr;
++ cq = (struct ib_cq *) cmd->cq;
++ msg = (struct ibp_verb_response_msg *) tx_buf;
++ resp = (struct ibp_resize_cq_resp *) msg->data;
++ len = hdr->length - sizeof(*cmd);
++ outlen = MAX_MSG_SIZE - sizeof(*msg) - sizeof(*resp);
++
++ INIT_UDATA(&udata, cmd->data, resp->data, len, outlen);
++
++ len = sizeof(*msg);
++
++ ret = device->ib_dev->resize_cq ?
++ device->ib_dev->resize_cq(cq, (int) cmd->cqe, &udata) : -ENOSYS;
++ if (ret) {
++ print_err("ib_resize_cq returned %d\n", ret);
++ goto send_resp;
++ }
++
++ len += sizeof(*resp);
++ len += outlen - udata.outlen; /* add driver private data */
++
++ resp->cqe = cq->cqe;
++
++send_resp:
++ IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++ return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_reg_user_mr(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_reg_user_mr_cmd *cmd;
++ struct ibp_reg_user_mr_resp *resp;
++ struct ibp_mr *mr;
++ struct ibp_ucontext *ucontext;
++ struct ib_uobject *uobj;
++ struct ib_udata udata;
++ struct ib_pd *pd;
++ size_t len;
++ size_t outlen;
++ int ret = 0;
++
++ print_trace("in\n");
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_reg_user_mr_cmd *) hdr;
++ msg = (struct ibp_verb_response_msg *) tx_buf;
++ resp = (struct ibp_reg_user_mr_resp *) msg->data;
++ len = hdr->length - sizeof(*cmd);
++ outlen = MAX_MSG_SIZE - sizeof(*msg) - sizeof(*resp);
++
++ INIT_UDATA(&udata, cmd->data, resp->data, len, outlen);
++
++ len = sizeof(*msg);
++
++ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
++ if (!mr) {
++ print_err("kzalloc failed\n");
++ ret = -ENOMEM;
++ goto send_resp;
++ }
++
++ pd = (struct ib_pd *) cmd->pd;
++
++ ucontext = (struct ibp_ucontext *) pd->uobject->user_handle;
++
++ mr->reg = ibp_reg_buf(ucontext, cmd->hca_va, cmd->scif_addr,
++ cmd->length, cmd->offset, cmd->access);
++ if (IS_ERR(mr->reg)) {
++ ret = PTR_ERR(mr->reg);
++ print_err("ibp_reg_buf returned %d\n", ret);
++ goto send_resp;
++ }
++
++ uobj = ibp_create_uobj(ucontext);
++ if (IS_ERR(uobj)) {
++ ret = PTR_ERR(uobj);
++ print_err("ibp_create_uobj returned %d\n", ret);
++ kref_put(&mr->reg->ref, ibp_dereg_buf);
++ goto send_resp;
++ }
++
++ mr->ibmr = pd->device->reg_user_mr(pd, cmd->hca_va, cmd->length,
++ cmd->hca_va, cmd->access, &udata);
++ if (IS_ERR(mr->ibmr)) {
++ ret = PTR_ERR(mr->ibmr);
++ print_err("ib_reg_user_mr returned %d\n", ret);
++ kref_put(&mr->reg->ref, ibp_dereg_buf);
++ ibp_destroy_uobj(uobj);
++ goto send_resp;
++ }
++
++ mr->ibmr->pd = pd;
++ mr->ibmr->device = pd->device;
++ atomic_inc(&pd->usecnt);
++
++ mr->ibmr->uobject = uobj;
++ uobj->object = mr;
++
++ mutex_lock(&ucontext->mutex);
++ list_add_tail(&uobj->list, &ucontext->ibucontext->mr_list);
++ mutex_unlock(&ucontext->mutex);
++
++ len += sizeof(*resp);
++ len += outlen - udata.outlen; /* add driver private data */
++
++ resp->mr = (uintptr_t) mr;
++ resp->lkey = mr->ibmr->lkey;
++ resp->rkey = mr->ibmr->rkey;
++
++send_resp:
++ if (ret)
++ kfree(mr);
++
++ IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++ return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_dereg_mr(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_dereg_mr_cmd *cmd;
++ struct ibp_mr *mr;
++ struct ib_uobject *uobj;
++ size_t len;
++ int ret;
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_dereg_mr_cmd *) hdr;
++ mr = (struct ibp_mr *) cmd->mr;
++ msg = (struct ibp_verb_response_msg *) tx_buf;
++ len = sizeof(*msg);
++
++ if (IS_NULL_OR_ERR(mr)) {
++ print_err("Invalid mr %p\n", mr);
++ ret = -EINVAL;
++ goto send_resp;
++ }
++
++ uobj = mr->ibmr->uobject;
++
++ ret = ib_dereg_mr(mr->ibmr);
++ if (ret) {
++ print_err("ib_dereg_mr returned %d\n", ret);
++ goto send_resp;
++ }
++
++ ibp_destroy_uobj(uobj);
++
++ if (mr->reg)
++ kref_put(&mr->reg->ref, ibp_dereg_buf);
++
++ kfree(mr);
++
++send_resp:
++ IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++ return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_attach_mcast(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_attach_mcast_cmd *cmd;
++ struct ibp_mcast_entry *mcast;
++ struct ibp_ucontext *ucontext;
++ struct ibp_qp *qp;
++ union ib_gid gid;
++ size_t len;
++ int ret;
++
++ print_trace("in\n");
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_attach_mcast_cmd *) hdr;
++ qp = (struct ibp_qp *) cmd->qp;
++ msg = (struct ibp_verb_response_msg *) tx_buf;
++ len = sizeof(*msg);
++
++ ucontext = (struct ibp_ucontext *) qp->ibqp->uobject->user_handle;
++
++ mcast = kzalloc(sizeof *mcast, GFP_KERNEL);
++ if (!mcast) {
++ print_err("kzalloc failed\n");
++ ret = -ENOMEM;
++ goto send_resp;
++ }
++
++ gid.global.subnet_prefix = cmd->subnet_prefix;
++ gid.global.interface_id = cmd->interface_id;
++
++ ret = ib_attach_mcast(qp->ibqp, &gid, cmd->lid);
++ if (ret) {
++ print_err("ib_attach_mcast returned %d\n", ret);
++ kfree(mcast);
++ goto send_resp;
++ }
++
++ mcast->lid = cmd->lid;
++ mcast->gid.global.subnet_prefix = cmd->subnet_prefix;
++ mcast->gid.global.interface_id = cmd->interface_id;
++
++ mutex_lock(&ucontext->mutex);
++ list_add_tail(&mcast->list, &qp->mcast);
++ mutex_unlock(&ucontext->mutex);
++
++send_resp:
++ IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++ return ibp_send(client->ep, msg, len);
++}
++
++static int ibp_cmd_detach_mcast(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf)
++{
++ struct ibp_device *device;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_detach_mcast_cmd *cmd;
++ struct ibp_mcast_entry *mcast;
++ struct ibp_ucontext *ucontext;
++ struct ibp_qp *qp;
++ union ib_gid gid;
++ size_t len;
++ int ret;
++
++ print_trace("in\n");
++
++ device = (struct ibp_device *) hdr->device;
++ cmd = (struct ibp_detach_mcast_cmd *) hdr;
++ qp = (struct ibp_qp *) cmd->qp;
++ msg = (struct ibp_verb_response_msg *) tx_buf;
++ len = sizeof(*msg);
++
++ ucontext = (struct ibp_ucontext *) qp->ibqp->uobject->user_handle;
++
++ gid.global.subnet_prefix = cmd->subnet_prefix;
++ gid.global.interface_id = cmd->interface_id;
++
++ ret = ib_detach_mcast(qp->ibqp, &gid, cmd->lid);
++ if (ret) {
++ print_err("ib_detach_mcast returned %d\n", ret);
++ goto send_resp;
++ }
++
++ mutex_lock(&ucontext->mutex);
++ list_for_each_entry(mcast, &qp->mcast, list)
++ if (cmd->lid == mcast->lid &&
++ !memcmp(&gid , mcast->gid.raw, sizeof mcast->gid.raw)) {
++ list_del(&mcast->list);
++ kfree(mcast);
++ break;
++ }
++ mutex_unlock(&ucontext->mutex);
++
++send_resp:
++ IBP_INIT_RESP(device, msg, len, VERB_RESPONSE, hdr->request, ret);
++ return ibp_send(client->ep, msg, len);
++}
++
++static void ibp_detach_mcast(struct ibp_qp *qp)
++{
++ struct ibp_mcast_entry *mcast, *tmp;
++
++ list_for_each_entry_safe(mcast, tmp, &qp->mcast, list) {
++ ib_detach_mcast(qp->ibqp, &mcast->gid, mcast->lid);
++ list_del(&mcast->list);
++ kfree(mcast);
++ }
++}
++
++static void ibp_destroy_ucontext(struct ibp_ucontext *ucontext)
++{
++ struct ib_ucontext *ibuctx;
++ struct ib_uobject *uobj, *tmp;
++ struct ibp_mmap *mmap, *tmp_mmap;
++
++ ibuctx = ucontext->ibucontext;
++ if (!ibuctx)
++ goto out;
++
++ ibuctx->closing = 1;
++
++ list_for_each_entry_safe(uobj, tmp, &ibuctx->ah_list, list) {
++ struct ib_ah *ibah = uobj->object;
++ ib_destroy_ah(ibah);
++ ibp_destroy_uobj(uobj);
++ }
++
++ list_for_each_entry_safe(uobj, tmp, &ibuctx->qp_list, list) {
++ struct ibp_qp *qp = uobj->object;
++ ibp_detach_mcast(qp);
++ ib_destroy_qp(qp->ibqp);
++ ibp_destroy_uobj(uobj);
++ kfree(qp);
++ }
++
++ list_for_each_entry_safe(uobj, tmp, &ibuctx->cq_list, list) {
++ struct ib_cq *ibcq = uobj->object;
++ ib_destroy_cq(ibcq);
++ ibp_destroy_uobj(uobj);
++ }
++
++ list_for_each_entry_safe(uobj, tmp, &ibuctx->srq_list, list) {
++ struct ib_srq *ibsrq = uobj->object;
++ ib_destroy_srq(ibsrq);
++ ibp_destroy_uobj(uobj);
++ }
++
++ list_for_each_entry_safe(uobj, tmp, &ibuctx->mr_list, list) {
++ struct ibp_mr *mr = uobj->object;
++ ib_dereg_mr(mr->ibmr);
++ ibp_destroy_uobj(uobj);
++ kref_put(&mr->reg->ref, ibp_dereg_buf);
++ kfree(mr);
++ }
++
++ list_for_each_entry_safe(uobj, tmp, &ibuctx->xrcd_list, list) {
++ struct ib_xrcd *ibxrcd = uobj->object;
++ ib_dealloc_xrcd(ibxrcd);
++ ibp_destroy_uobj(uobj);
++ }
++
++ list_for_each_entry_safe(uobj, tmp, &ibuctx->pd_list, list) {
++ struct ib_pd *ibpd = uobj->object;
++ ib_dealloc_pd(ibpd);
++ ibp_destroy_uobj(uobj);
++ }
++
++ ibuctx->device->dealloc_ucontext(ibuctx);
++out:
++ if (ucontext->ibdev)
++ ib_unregister_event_handler(&ucontext->event_handler);
++
++ list_for_each_entry_safe(mmap, tmp_mmap, &ucontext->mmap_list, list) {
++ ibp_scif_unregister(ucontext->client, mmap);
++
++ if (!IS_NULL_OR_ERR(current) && !IS_NULL_OR_ERR(current->mm)) {
++ vm_munmap(mmap->vaddr, mmap->len);
++ }
++ kfree(mmap);
++ }
++
++ while (!RB_EMPTY_ROOT(&ucontext->reg_tree)) {
++ struct ibp_reg *reg;
++ reg = rb_entry(ucontext->reg_tree.rb_node, struct ibp_reg,
++ node);
++ kref_put(®->ref, ibp_dereg_buf);
++ }
++
++ ibp_put_device(ucontext->device);
++ fput(ucontext->filp);
++ kfree(ucontext);
++}
++
++void ibp_cleanup_ucontext(struct list_head *ucontext_list)
++{
++ struct ibp_ucontext *ucontext, *next;
++
++ list_for_each_entry_safe(ucontext, next, ucontext_list, list)
++ ibp_destroy_ucontext(ucontext);
++}
++
++static int (*ibp_msg_table[])(struct ibp_client *client,
++ struct ibp_msg_header *hdr, void *tx_buf) = {
++ [IBP_VERB_GET_PROTOCOL_STATS] = ibp_cmd_not_supported,
++ [IBP_VERB_QUERY_DEVICE] = ibp_cmd_query_device,
++ [IBP_VERB_QUERY_PORT] = ibp_cmd_query_port,
++ [IBP_VERB_GET_LINK_LAYER] = ibp_cmd_not_supported,
++ [IBP_VERB_QUERY_GID] = ibp_cmd_query_gid,
++ [IBP_VERB_QUERY_PKEY] = ibp_cmd_query_pkey,
++ [IBP_VERB_MODIFY_DEVICE] = ibp_cmd_not_supported,
++ [IBP_VERB_MODIFY_PORT] = ibp_cmd_not_supported,
++ [IBP_VERB_ALLOC_UCONTEXT] = ibp_cmd_alloc_ucontext,
++ [IBP_VERB_DEALLOC_UCONTEXT] = ibp_cmd_dealloc_ucontext,
++ [IBP_VERB_REG_BUF] = ibp_cmd_reg_buf,
++ [IBP_VERB_DEREG_BUF] = ibp_cmd_dereg_buf,
++ [IBP_VERB_MMAP] = ibp_cmd_mmap,
++ [IBP_VERB_UNMMAP] = ibp_cmd_unmmap,
++ [IBP_VERB_ALLOC_PD] = ibp_cmd_alloc_pd,
++ [IBP_VERB_DEALLOC_PD] = ibp_cmd_dealloc_pd,
++ [IBP_VERB_CREATE_AH] = ibp_cmd_create_ah,
++ [IBP_VERB_MODIFY_AH] = ibp_cmd_not_supported,
++ [IBP_VERB_QUERY_AH] = ibp_cmd_query_ah,
++ [IBP_VERB_DESTROY_AH] = ibp_cmd_destroy_ah,
++ [IBP_VERB_CREATE_SRQ] = ibp_cmd_create_srq,
++ [IBP_VERB_MODIFY_SRQ] = ibp_cmd_modify_srq,
++ [IBP_VERB_QUERY_SRQ] = ibp_cmd_query_srq,
++ [IBP_VERB_DESTROY_SRQ] = ibp_cmd_destroy_srq,
++ [IBP_VERB_POST_SRQ_RECV] = ibp_cmd_not_supported,
++ [IBP_VERB_CREATE_QP] = ibp_cmd_create_qp,
++ [IBP_VERB_MODIFY_QP] = ibp_cmd_modify_qp,
++ [IBP_VERB_QUERY_QP] = ibp_cmd_query_qp,
++ [IBP_VERB_DESTROY_QP] = ibp_cmd_destroy_qp,
++ [IBP_VERB_POST_SEND] = ibp_cmd_not_supported,
++ [IBP_VERB_POST_RECV] = ibp_cmd_not_supported,
++ [IBP_VERB_CREATE_CQ] = ibp_cmd_create_cq,
++ [IBP_VERB_MODIFY_CQ] = ibp_cmd_not_supported,
++ [IBP_VERB_DESTROY_CQ] = ibp_cmd_destroy_cq,
++ [IBP_VERB_RESIZE_CQ] = ibp_cmd_resize_cq,
++ [IBP_VERB_POLL_CQ] = ibp_cmd_not_supported,
++ [IBP_VERB_PEEK_CQ] = ibp_cmd_not_supported,
++ [IBP_VERB_REQ_NOTIFY_CQ] = ibp_cmd_not_supported,
++ [IBP_VERB_REQ_NCOMP_NOTIF] = ibp_cmd_not_supported,
++ [IBP_VERB_GET_DMA_MR] = ibp_cmd_not_supported,
++ [IBP_VERB_REG_PHYS_MR] = ibp_cmd_not_supported,
++ [IBP_VERB_REG_USER_MR] = ibp_cmd_reg_user_mr,
++ [IBP_VERB_QUERY_MR] = ibp_cmd_not_supported,
++ [IBP_VERB_DEREG_MR] = ibp_cmd_dereg_mr,
++ [IBP_VERB_ALLOC_FAST_REG_MR] = ibp_cmd_not_supported,
++ [IBP_VERB_ALLOC_FAST_REG_PAGE_LIST] = ibp_cmd_not_supported,
++ [IBP_VERB_FREE_FAST_REG_PAGE_LIST] = ibp_cmd_not_supported,
++ [IBP_VERB_REREG_PHYS_MR] = ibp_cmd_not_supported,
++ [IBP_VERB_ALLOC_MW] = ibp_cmd_not_supported,
++ [IBP_VERB_BIND_MW] = ibp_cmd_not_supported,
++ [IBP_VERB_DEALLOC_MW] = ibp_cmd_not_supported,
++ [IBP_VERB_ALLOC_FMR] = ibp_cmd_not_supported,
++ [IBP_VERB_MAP_PHYS_FMR] = ibp_cmd_not_supported,
++ [IBP_VERB_UNMAP_FMR] = ibp_cmd_not_supported,
++ [IBP_VERB_DEALLOC_FMR] = ibp_cmd_not_supported,
++ [IBP_VERB_ATTACH_MCAST] = ibp_cmd_attach_mcast,
++ [IBP_VERB_DETACH_MCAST] = ibp_cmd_detach_mcast,
++ [IBP_VERB_PROCESS_MAD] = ibp_cmd_not_supported,
++ [IBP_VERB_ALLOC_XRCD] = ibp_cmd_not_supported,
++ [IBP_VERB_DEALLOC_XRCD] = ibp_cmd_not_supported,
++};
++
++int ibp_process_recvs(struct ibp_client *client, void *rx_buf, void *tx_buf)
++{
++ struct ibp_msg_header *hdr;
++ int ret;
++
++ hdr = (struct ibp_msg_header *) rx_buf;
++
++ for (;;) {
++ wait_event_interruptible(client->rx_wait_queue,
++ !atomic_xchg(&client->rx_in_process,
++ 1));
++
++ ret = ibp_recv(client->ep, hdr, sizeof(*hdr));
++ if (ret)
++ goto err;
++
++ if (hdr->length > MAX_MSG_SIZE) {
++ print_err("message too large, len %u max %lu\n",
++ hdr->length, MAX_MSG_SIZE);
++ ret = -EMSGSIZE;
++ goto err;
++ }
++
++ ret = ibp_recv(client->ep, hdr->data,
++ hdr->length - sizeof(*hdr));
++ if (ret)
++ goto err;
++
++ atomic_set(&client->rx_in_process, 0);
++ wake_up_interruptible(&client->rx_wait_queue);
++
++ if ((hdr->opcode >= ARRAY_SIZE(ibp_msg_table)) ||
++ !ibp_msg_table[hdr->opcode]) {
++ ibp_cmd_bad_request(client, hdr, tx_buf);
++ continue;
++ }
++
++ ret = ibp_msg_table[hdr->opcode](client, hdr, tx_buf);
++ if (ret)
++ goto err;
++ }
++
++ goto out;
++err:
++ atomic_set(&client->rx_in_process, 0);
++ wake_up_interruptible(&client->rx_wait_queue);
++
++out:
++ return ret;
++}
+diff --git a/drivers/infiniband/ibp/sa/Makefile b/drivers/infiniband/ibp/sa/Makefile
+new file mode 100644
+index 0000000..9435a98
+--- /dev/null
++++ b/drivers/infiniband/ibp/sa/Makefile
+@@ -0,0 +1,13 @@
++obj-$(CONFIG_IBP_CLIENT) += ibp_sa_client.o
++obj-$(CONFIG_IBP_SERVER) += ibp_sa_server.o
++
++ccflags-y := -Idrivers/infiniband/
++
++ibp_sa_client-y := client.o \
++ client_msg.o \
++ sa_client_msg.o \
++ sa_proxy.o
++
++ibp_sa_server-y := server.o \
++ server_msg.o \
++ sa_server_msg.o
+diff --git a/drivers/infiniband/ibp/sa/client.c b/drivers/infiniband/ibp/sa/client.c
+new file mode 100644
+index 0000000..e7a2a9b
+--- /dev/null
++++ b/drivers/infiniband/ibp/sa/client.c
+@@ -0,0 +1,134 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "client.h"
++
++MODULE_AUTHOR("Jerrie Coffman");
++MODULE_AUTHOR("Sean Hefty");
++MODULE_AUTHOR("Phil Cayton");
++MODULE_AUTHOR("Jay Sternberg");
++MODULE_LICENSE("Dual BSD/GPL");
++MODULE_DESCRIPTION(DRV_DESC);
++MODULE_VERSION(DRV_VERSION);
++
++MODULE_PARAM(port, port, int, IBP_SA_PORT, "Connection port");
++MODULE_PARAM(timeout, timeout, int, 1000, "Connect/Poll timeout (in ms)");
++
++#ifdef IBP_DEBUG
++MODULE_PARAM(debug_level, debug_level, int, 0, "Debug: 0-none, 1-some, 2-all");
++#endif
++
++static struct kref ref;
++static struct task_struct *ibp_sa_client_thread;
++
++scif_epd_t ep;
++struct workqueue_struct *workqueue;
++struct rw_semaphore list_rwsem;
++
++static int ibp_sa_connect(void *unused)
++{
++ struct scif_port_id dst;
++ unsigned long delay;
++ int ret = 0;
++
++ dst.node = IBP_SA_HOST_NODE;
++ dst.port = port;
++
++ delay = msecs_to_jiffies(timeout);
++
++ while (!kthread_should_stop()) {
++
++ ep = scif_open();
++ if (IS_NULL_OR_ERR(ep)) {
++ print_err("scif_open failed\n");
++ schedule_timeout_interruptible(delay);
++ continue;
++ }
++
++ while (scif_connect(ep, &dst) != 0) {
++ schedule_timeout_interruptible(delay);
++ if (kthread_should_stop())
++ break;
++ }
++
++ if (!kthread_should_stop()) {
++ print_dbg("connected node %d port %d\n",
++ dst.node, dst.port);
++
++ ibp_process_recvs();
++ }
++
++ scif_close(ep);
++ }
++
++ return ret;
++}
++
++static int __init ibp_sa_client_init(void)
++{
++ int ret;
++
++ print_info(DRV_SIGNON);
++
++ init_rwsem(&list_rwsem);
++ kref_init(&ref);
++
++ workqueue = create_singlethread_workqueue(DRV_NAME);
++ if (!workqueue) {
++ print_err("create_singlethread_workqueue failed\n");
++ return -ENOMEM;
++ }
++
++ /* Start a thread to establish a connection. */
++ ibp_sa_client_thread = kthread_run(ibp_sa_connect, NULL, DRV_NAME);
++ if (IS_ERR(ibp_sa_client_thread)) {
++ ret = PTR_ERR(ibp_sa_client_thread);
++ print_err("kthread_run returned %d\n", ret);
++ destroy_workqueue(workqueue);
++ return ret;
++ }
++
++ return 0;
++}
++
++static void __exit ibp_sa_client_exit(void)
++{
++ kthread_stop(ibp_sa_client_thread);
++
++ flush_workqueue(workqueue);
++ destroy_workqueue(workqueue);
++
++ print_info(DRV_DESC " unloaded\n");
++}
++
++module_init(ibp_sa_client_init);
++module_exit(ibp_sa_client_exit);
+diff --git a/drivers/infiniband/ibp/sa/client.h b/drivers/infiniband/ibp/sa/client.h
+new file mode 100644
+index 0000000..cb1c8bb
+--- /dev/null
++++ b/drivers/infiniband/ibp/sa/client.h
+@@ -0,0 +1,90 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef CLIENT_H
++#define CLIENT_H
++
++#include <rdma/ib_verbs.h>
++#include "common.h"
++#include "ibp-abi.h"
++#include "sa_ibp_abi.h"
++#include "core/sa.h"
++
++#define DRV_ROLE "SA Client"
++#define DRV_NAME "ibp_sa_client"
++#include "compat.h"
++
++#ifndef IBP_SA_HOST_NODE
++#define IBP_SA_HOST_NODE 0
++#endif
++
++extern int timeout;
++extern scif_epd_t ep;
++extern struct workqueue_struct *workqueue;
++extern struct rw_semaphore list_rwsem;
++
++int ibp_process_recvs(void);
++void init_proxy(void);
++void cleanup_proxy(void);
++void callback_work(struct work_struct *work);
++
++struct ibp_request {
++ struct completion done;
++ void *data;
++ size_t length;
++ int status;
++};
++
++struct callback_work {
++ struct work_struct work;
++ int length;
++ struct callback_msg data;
++};
++
++#define IBP_INIT_REQ(request, buf, size) \
++ do { \
++ (request)->data = (buf); \
++ (request)->length = (size); \
++ (request)->status = 0; \
++ init_completion(&(request)->done); \
++ } while (0)
++
++#define IBP_INIT_CMD(cmd, size, op, req) \
++ do { \
++ (cmd)->header.opcode = IBP_##op; \
++ (cmd)->header.length = (size); \
++ (cmd)->header.status = 0; \
++ (cmd)->header.reserved = 0; \
++ (cmd)->header.request = (uintptr_t)(req); \
++ } while (0)
++
++#endif /* CLIENT_H */
+diff --git a/drivers/infiniband/ibp/sa/client_msg.c b/drivers/infiniband/ibp/sa/client_msg.c
+new file mode 100644
+index 0000000..b4d28c3
+--- /dev/null
++++ b/drivers/infiniband/ibp/sa/client_msg.c
+@@ -0,0 +1,231 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "client.h"
++
++static DEFINE_MUTEX(ibp_send_mutex);
++
++int ibp_send(scif_epd_t ep, void *buf, size_t len)
++{
++ int ret;
++
++ /*
++ * * Because user-context threads can be signaled, a mutex
++ * * and ERESTARTSYS check are required to complete atomically.
++ */
++ mutex_lock(&ibp_send_mutex);
++ while (len) {
++ ret = scif_send(ep, buf, (uint32_t)len, SCIF_SEND_BLOCK);
++ if (ret < 0) {
++ print_dbg("scif_send returned %d\n", ret);
++ if (ret == -ERESTARTSYS) {
++ ret = 0;
++ } else {
++ mutex_unlock(&ibp_send_mutex);
++ return ret;
++ }
++ }
++ buf += ret;
++ len -= ret;
++ }
++ mutex_unlock(&ibp_send_mutex);
++
++ return 0;
++}
++
++int ibp_recv(scif_epd_t ep, void *buf, size_t len)
++{
++ struct scif_pollepd pollep;
++ int ret;
++
++ pollep.epd = ep;
++ pollep.events = POLLIN;
++
++ while (len) {
++ schedule();
++ if (kthread_should_stop())
++ return -EINTR;
++
++ ret = scif_poll(&pollep, 1, timeout);
++ if (ret == 0) /* timeout */
++ continue;
++ if (ret < 0) {
++ print_dbg("scif_poll revents 0x%x returned %d\n",
++ pollep.revents, ret);
++ return ret;
++ }
++
++ ret = scif_recv(ep, buf, (uint32_t)len, 0);
++ if (ret < 0) {
++ print_dbg("scif_recv returned %d\n", ret);
++ return ret;
++ }
++
++ buf += ret;
++ len -= ret;
++ }
++
++ return 0;
++}
++
++static int ibp_recv_bitbucket(scif_epd_t ep, size_t len)
++{
++ u8 bitbucket[64];
++ size_t bytes;
++ int ret = 0;
++
++ while (len) {
++ bytes = min(len, sizeof(bitbucket));
++ ret = ibp_recv(ep, &bitbucket, bytes);
++ if (ret)
++ break;
++ len -= bytes;
++ }
++
++ return ret;
++}
++
++static int ibp_recv_response(scif_epd_t ep, struct ibp_msg_header *hdr)
++{
++ struct ibp_request *req;
++ size_t len;
++ int ret;
++
++ req = (struct ibp_request *) hdr->request;
++ len = hdr->length - sizeof(*hdr);
++
++ ret = ibp_recv(ep, (void *)req->data, min(req->length, len));
++ if (ret)
++ return ret;
++
++ if (req->length < len) {
++ print_dbg("req->data overrun, expected %ld actual %ld\n",
++ req->length, len);
++ ret = ibp_recv_bitbucket(ep, len - req->length);
++ if (ret)
++ print_err("ibp_recv_bitbucket returned %d\n", ret);
++ req->status = -EMSGSIZE;
++ } else {
++ req->status = hdr->status;
++ }
++ req->length = len;
++
++ return ret;
++}
++
++static int ibp_response(scif_epd_t ep, struct ibp_msg_header *hdr)
++{
++ struct ibp_request *req;
++ int ret;
++
++ req = (struct ibp_request *)hdr->request;
++
++ ret = ibp_recv_response(ep, hdr);
++ if (!ret)
++ complete(&req->done);
++
++ return ret;
++}
++
++static int ibp_callback(scif_epd_t ep, struct ibp_msg_header *hdr)
++{
++ struct callback_work *cb_work;
++ size_t len;
++ int ret;
++
++ len = hdr->length - sizeof(*hdr);
++
++ if (len > sizeof(struct callback_msg)) {
++ print_err("callback data size too small recv %ld buffer %ld\n",
++ len, sizeof(struct callback_msg));
++ ret = -EINVAL;
++ goto err1;
++ }
++
++ cb_work = kzalloc(sizeof(struct callback_work), GFP_KERNEL);
++ if (!cb_work) {
++ print_err("kzalloc failed\n");
++ ret = -ENOMEM;
++ goto err1;
++ }
++
++ if (len) {
++ ret = ibp_recv(ep, &cb_work->data, len);
++ if (ret)
++ goto err2;
++ }
++
++ cb_work->length = len;
++
++ INIT_WORK(&cb_work->work, callback_work);
++ queue_work(workqueue, &cb_work->work);
++
++ return 0;
++err2:
++ kfree(cb_work);
++err1:
++ return ret;
++}
++
++static int (*ibp_msg_table[])(scif_epd_t ep, struct ibp_msg_header *hdr) = {
++ [IBP_CALLBACK] = ibp_callback,
++ [IBP_RESPONSE] = ibp_response,
++};
++
++int ibp_process_recvs(void)
++{
++ struct ibp_msg_header hdr;
++ int ret = 0;
++
++ while (!kthread_should_stop()) {
++
++ ret = ibp_recv(ep, &hdr, sizeof(hdr));
++ if (ret)
++ break;
++
++ if ((hdr.opcode < 0) ||
++ (hdr.opcode >= ARRAY_SIZE(ibp_msg_table)) ||
++ !ibp_msg_table[hdr.opcode]) {
++ print_err("Invalid command 0x%x\n", hdr.opcode);
++ ret = -EBADRQC;
++ break;
++ }
++
++ ret = ibp_msg_table[hdr.opcode](ep, &hdr);
++ if (ret)
++ break;
++
++ schedule();
++ }
++
++ return ret;
++}
+diff --git a/drivers/infiniband/ibp/sa/common.h b/drivers/infiniband/ibp/sa/common.h
+new file mode 100644
+index 0000000..11aead1
+--- /dev/null
++++ b/drivers/infiniband/ibp/sa/common.h
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef COMMON_H
++#define COMMON_H
++
++#include <linux/module.h>
++#include <linux/kthread.h>
++#include <linux/types.h>
++#include <linux/slab.h>
++#include <linux/poll.h>
++#include <linux/pci.h>
++#include <linux/net.h>
++#include <rdma/ib_verbs.h>
++#include "compat.h"
++
++#define SCIF_OFED_PORT_4 64 /*reserved for sa proxy */
++
++#ifndef IBP_SA_PORT /* unique scif port for this service */
++#define IBP_SA_PORT SCIF_OFED_PORT_4
++#endif
++
++int ibp_send(scif_epd_t ep, void *buf, size_t len);
++int ibp_recv(scif_epd_t ep, void *buf, size_t len);
++
++#endif /* COMMON_H */
+diff --git a/drivers/infiniband/ibp/sa/ibp-abi.h b/drivers/infiniband/ibp/sa/ibp-abi.h
+new file mode 100644
+index 0000000..1d37cd7
+--- /dev/null
++++ b/drivers/infiniband/ibp/sa/ibp-abi.h
+@@ -0,0 +1,101 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef IBP_ABI_H
++#define IBP_ABI_H
++
++#include <linux/types.h>
++#include <rdma/ib_verbs.h>
++#include <rdma/ib_sa.h>
++
++/* Increment this value if any changes break compatibility. */
++#define IBP_CM_ABI_VERSION 1
++#define MAX_MSG_SIZE PAGE_SIZE
++
++/* Client to server message enums. */
++enum {
++ /* have callback */
++ IBP_SA_PATH_REC_GET,
++ IBP_SA_JOIN_MCAST,
++
++ /* no callback */
++ IBP_SA_FREE_MCAST,
++ IBP_SA_GET_MCMEMBER_REC,
++ IBP_SA_REGISTER_CLIENT,
++ IBP_SA_UNREGISTER_CLIENT,
++ IBP_SA_CANCEL_QUERY,
++ IBP_INIT_AH_FROM_PATH,
++ IBP_INIT_AH_FROM_MCMEMBER,
++#if 0
++ /* not used or local to client */
++ IBP_SA_SERVICE_REC_QUERY,
++ IBP_SA_UNPACK_PATH,
++#endif
++};
++
++/* Server to client message enums. */
++enum {
++ IBP_CALLBACK,
++ IBP_RESPONSE,
++};
++
++enum {
++ PATH_REC_GET_CB,
++ JOIN_MCAST_CB,
++};
++
++/*
++ * Make sure that all structs defined in this file are laid out to pack
++ * the same way on different architectures to avoid incompatibility.
++ *
++ * Specifically:
++ * - Do not use pointer types -- pass pointers in a u64 instead.
++ * - Make sure that any structure larger than 4 bytes is padded
++ * to a multiple of 8 bytes; otherwise the structure size may
++ * be different between architectures.
++ */
++
++struct ibp_msg_header { /* present in all messages */
++ u32 opcode;
++ u32 length;
++ u32 status;
++ u32 reserved;
++ u64 request;
++ u64 data[0];
++};
++
++struct ibp_verb_response_msg {
++ struct ibp_msg_header header;
++ u64 data[0];
++};
++
++#endif /* IBP_ABI_H */
+diff --git a/drivers/infiniband/ibp/sa/ibp_exports.h b/drivers/infiniband/ibp/sa/ibp_exports.h
+new file mode 100644
+index 0000000..feb13a1
+--- /dev/null
++++ b/drivers/infiniband/ibp/sa/ibp_exports.h
+@@ -0,0 +1,49 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef IBP_EXPORTS_H
++#define IBP_EXPORTS_H
++
++#include <rdma/ib_verbs.h>
++
++/*
++ ibp_resolve_ib_device - Return the host ib_device handle
++ @ibdev:Card IB device
++
++ Upper level drivers may require the host ib_device handle associated
++ with the card ib_device. This routine resolves the card ib_device to
++ the cooresponding host ib_device handle. A value of 0 is returned if
++ no match was found.
++*/
++u64 ibp_resolve_ib_device(struct ib_device *ibdev);
++
++#endif /* IBP_EXPORTS_H */
+diff --git a/drivers/infiniband/ibp/sa/sa_client.h b/drivers/infiniband/ibp/sa/sa_client.h
+new file mode 100644
+index 0000000..7c4fa58
+--- /dev/null
++++ b/drivers/infiniband/ibp/sa/sa_client.h
+@@ -0,0 +1,123 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef SA_CLIENT_H
++#define SA_CLIENT_H
++
++#include "common.h"
++#include <rdma/ib_verbs.h>
++#include "ibp-abi.h"
++#include "sa_ibp_abi.h"
++#include <rdma/ib_sa.h>
++
++struct ib_sa_query {
++ void (*callback)(struct ib_sa_query *, int,
++ struct ib_sa_mad *);
++ void (*release)(struct ib_sa_query *);
++ struct ib_sa_client *client;
++ struct ib_sa_port *port;
++ struct ib_mad_send_buf *mad_buf;
++ struct ib_sa_sm_ah *sm_ah;
++ int id;
++};
++
++struct ibp_sa_query {
++ void (*callback)(int, void *, void *);
++ void *context;
++ struct ib_sa_query sa_query;
++ struct mutex lock;
++ int tmp_id;
++ struct ibp_sa_query *next;
++};
++
++struct ibp_sa_mcentry {
++ struct ib_sa_multicast multicast;
++ struct ib_sa_client *client;
++ int state;
++ u64 ibp_mcast;
++ struct mutex lock;
++ struct ibp_sa_mcentry *next;
++};
++
++struct ibp_sa_entry {
++ u64 ibp_client;
++ struct ib_sa_client *ib_client;
++ struct ibp_sa_entry *next;
++ struct ibp_sa_query *query_list;
++ struct ibp_sa_mcentry *mc_list;
++};
++
++struct multicast_work {
++ struct work_struct work;
++ struct ibp_sa_mcentry *mcentry;
++ struct ib_device *ib_device;
++ struct ib_sa_client *ib_client;
++ u8 port;
++};
++
++struct path_rec_work {
++ struct work_struct work;
++ struct ibp_sa_query *query;
++ struct ib_device *ib_device;
++ u8 port;
++ struct ib_sa_path_rec rec;
++ ib_sa_comp_mask comp_mask;
++ int timeout_ms;
++};
++
++struct cancel_query_work {
++ struct work_struct work;
++ int id;
++ struct ibp_sa_entry *entry;
++};
++
++int ibp_sa_path_rec_get(struct ibp_sa_entry *entry, u64 device, u8 port_num,
++ struct ib_sa_path_rec *rec, ib_sa_comp_mask comp_mask,
++ int timout_ms, gfp_t gfp_mask,
++ struct ibp_sa_query *query);
++int ibp_sa_register_client(struct ibp_sa_entry *entry);
++int ibp_sa_unregister_client(struct ibp_sa_entry *entry);
++int ibp_sa_cancel_query(struct ibp_sa_entry *entry, int id);
++int ibp_init_ah_from_path(u64 device, u8 port_num, struct ib_sa_path_rec *rec,
++ struct ib_ah_attr *ah_attr);
++int ibp_init_ah_from_mcmember(u64 device, u8 port,
++ struct ib_sa_mcmember_rec *rec,
++ struct ib_ah_attr *ah_attr);
++u64 ibp_sa_join_multicast(struct ibp_sa_entry *entry, u64 device,
++ u8 port_num, struct ib_sa_mcmember_rec *rec,
++ ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
++ struct ibp_sa_mcentry *mcentry);
++int ibp_sa_free_multicast(u64 mcast);
++int ibp_sa_get_mcmember_rec(u64 device, u8 port_num, union ib_gid *mgid,
++ struct ib_sa_mcmember_rec *rec);
++
++#endif /* SA_CLIENT_H */
+diff --git a/drivers/infiniband/ibp/sa/sa_client_msg.c b/drivers/infiniband/ibp/sa/sa_client_msg.c
+new file mode 100644
+index 0000000..5e8f70c
+--- /dev/null
++++ b/drivers/infiniband/ibp/sa/sa_client_msg.c
+@@ -0,0 +1,435 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "client.h"
++#include "sa_client.h"
++
++int ibp_sa_register_client(struct ibp_sa_entry *entry)
++{
++ struct ibp_sa_register_client_resp resp;
++ struct ibp_sa_register_client_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ IBP_INIT_REQ(&req, &resp, sizeof(resp));
++ IBP_INIT_CMD(&cmd, sizeof(cmd), SA_REGISTER_CLIENT, &req);
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret) {
++ print_dbg("ibp_send returned %d\n", ret);
++ return ret;
++ }
++
++ wait_for_completion(&req.done);
++
++ if (req.status)
++ return req.status;
++
++ entry->ibp_client = resp.ibp_client;
++
++ return 0;
++}
++
++int ibp_sa_unregister_client(struct ibp_sa_entry *entry)
++{
++ struct ibp_sa_unregister_client_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ IBP_INIT_REQ(&req, NULL, 0);
++ IBP_INIT_CMD(&cmd, sizeof(cmd), SA_UNREGISTER_CLIENT, &req);
++
++ cmd.ibp_client = entry->ibp_client;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret) {
++ print_err("ibp_send returned %d\n", ret);
++ return ret;
++ }
++
++ wait_for_completion(&req.done);
++
++ return req.status;
++}
++
++int ibp_sa_cancel_query(struct ibp_sa_entry *entry, int id)
++{
++ struct ibp_sa_cancel_query_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ print_trace("in\n");
++
++ IBP_INIT_REQ(&req, NULL, 0);
++ IBP_INIT_CMD(&cmd, sizeof(cmd), SA_CANCEL_QUERY, &req);
++
++ cmd.id = id;
++ cmd.client = entry->ibp_client;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (!ret)
++ wait_for_completion(&req.done);
++
++ return ret;
++}
++
++int ibp_init_ah_from_path(u64 device, u8 port_num, struct ib_sa_path_rec *rec,
++ struct ib_ah_attr *ah_attr)
++{
++ struct ibp_init_ah_from_path_resp resp;
++ struct ibp_init_ah_from_path_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ IBP_INIT_REQ(&req, &resp, sizeof(resp));
++ IBP_INIT_CMD(&cmd, sizeof(cmd), INIT_AH_FROM_PATH, &req);
++
++ cmd.device = device;
++ cmd.port_num = port_num;
++
++ cmd.rec.service_id = rec->service_id;
++ cmd.rec.dgid_prefix = rec->dgid.global.subnet_prefix;
++ cmd.rec.dgid_id = rec->dgid.global.interface_id;
++ cmd.rec.sgid_prefix = rec->sgid.global.subnet_prefix;
++ cmd.rec.sgid_id = rec->sgid.global.interface_id;
++ cmd.rec.dlid = rec->dlid;
++ cmd.rec.slid = rec->slid;
++ cmd.rec.raw_traffic = rec->raw_traffic;
++ cmd.rec.flow_label = rec->flow_label;
++ cmd.rec.hop_limit = rec->hop_limit;
++ cmd.rec.traffic_class = rec->traffic_class;
++ cmd.rec.reversible = rec->reversible;
++ cmd.rec.numb_path = rec->numb_path;
++ cmd.rec.pkey = rec->pkey;
++ cmd.rec.qos_class = rec->qos_class;
++ cmd.rec.sl = rec->sl;
++ cmd.rec.mtu_selector = rec->mtu_selector;
++ cmd.rec.mtu = rec->mtu;
++ cmd.rec.rate_selector = rec->rate_selector;
++ cmd.rec.rate = rec->rate;
++ cmd.rec.packet_life_time_selector = rec->packet_life_time_selector;
++ cmd.rec.packet_life_time = rec->packet_life_time;
++ cmd.rec.preference = rec->preference;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret) {
++ print_err("ibp_send returned %d\n", ret);
++ return ret;
++ }
++
++ wait_for_completion(&req.done);
++
++ if (req.status)
++ return req.status;
++
++ ah_attr->grh.dgid.global.subnet_prefix
++ = resp.attr.dgid_prefix;
++ ah_attr->grh.dgid.global.interface_id
++ = resp.attr.dgid_id;
++ ah_attr->grh.flow_label = resp.attr.flow_label;
++ ah_attr->grh.sgid_index = resp.attr.sgid_index;
++ ah_attr->grh.hop_limit = resp.attr.hop_limit;
++ ah_attr->grh.traffic_class = resp.attr.traffic_class;
++ ah_attr->dlid = resp.attr.dlid;
++ ah_attr->sl = resp.attr.sl;
++ ah_attr->src_path_bits = resp.attr.src_path_bits;
++ ah_attr->static_rate = resp.attr.static_rate;
++ ah_attr->ah_flags = resp.attr.ah_flags;
++ ah_attr->port_num = resp.attr.port_num;
++
++ return 0;
++}
++
++int ibp_sa_path_rec_get(struct ibp_sa_entry *entry, u64 device, u8 port_num,
++ struct ib_sa_path_rec *rec, ib_sa_comp_mask comp_mask,
++ int timeout_ms, gfp_t gfp_mask,
++ struct ibp_sa_query *query)
++{
++ struct ibp_sa_path_rec_get_resp resp;
++ struct ibp_sa_path_rec_get_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ IBP_INIT_REQ(&req, &resp, sizeof(resp));
++ IBP_INIT_CMD(&cmd, sizeof(cmd), SA_PATH_REC_GET, &req);
++
++ cmd.ibp_client = entry->ibp_client;
++ cmd.entry = (u64) entry;
++ cmd.query = (u64) query;
++ cmd.device = device;
++ cmd.port_num = port_num;
++ cmd.comp_mask = comp_mask;
++ cmd.timeout_ms = timeout_ms;
++ cmd.gfp_mask = gfp_mask;
++
++ cmd.rec.service_id = rec->service_id;
++ cmd.rec.dgid_prefix = rec->dgid.global.subnet_prefix;
++ cmd.rec.dgid_id = rec->dgid.global.interface_id;
++ cmd.rec.sgid_prefix = rec->sgid.global.subnet_prefix;
++ cmd.rec.sgid_id = rec->sgid.global.interface_id;
++ cmd.rec.dlid = rec->dlid;
++ cmd.rec.slid = rec->slid;
++ cmd.rec.raw_traffic = rec->raw_traffic;
++ cmd.rec.flow_label = rec->flow_label;
++ cmd.rec.hop_limit = rec->hop_limit;
++ cmd.rec.traffic_class = rec->traffic_class;
++ cmd.rec.reversible = rec->reversible;
++ cmd.rec.numb_path = rec->numb_path;
++ cmd.rec.pkey = rec->pkey;
++ cmd.rec.qos_class = rec->qos_class;
++ cmd.rec.sl = rec->sl;
++ cmd.rec.mtu_selector = rec->mtu_selector;
++ cmd.rec.mtu = rec->mtu;
++ cmd.rec.rate_selector = rec->rate_selector;
++ cmd.rec.rate = rec->rate;
++ cmd.rec.packet_life_time_selector = rec->packet_life_time_selector;
++ cmd.rec.packet_life_time = rec->packet_life_time;
++ cmd.rec.preference = rec->preference;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret) {
++ print_err("ibp_send returned %d\n", ret);
++ return ret;
++ }
++
++ wait_for_completion(&req.done);
++
++ if (req.status)
++ return req.status;
++
++ return resp.query_id;
++}
++
++u64 ibp_sa_join_multicast(struct ibp_sa_entry *entry, u64 device, u8 port,
++ struct ib_sa_mcmember_rec *rec,
++ ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
++ struct ibp_sa_mcentry *mcentry)
++{
++ struct ibp_sa_join_multicast_resp resp;
++ struct ibp_sa_join_multicast_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ IBP_INIT_REQ(&req, &resp, sizeof(resp));
++ IBP_INIT_CMD(&cmd, sizeof(cmd), SA_JOIN_MCAST, &req);
++
++ cmd.ibp_client = entry->ibp_client;
++ cmd.mcentry = (u64) mcentry;
++ cmd.device = device;
++ cmd.port_num = port;
++ cmd.comp_mask = comp_mask;
++ cmd.gfp_mask = gfp_mask;
++
++ cmd.rec.mgid.global.subnet_prefix
++ = rec->mgid.global.subnet_prefix;
++ cmd.rec.mgid.global.interface_id
++ = rec->mgid.global.interface_id;
++ cmd.rec.port_gid.global.subnet_prefix
++ = rec->port_gid.global.subnet_prefix;
++ cmd.rec.port_gid.global.interface_id
++ = rec->port_gid.global.interface_id;
++ cmd.rec.qkey = rec->qkey;
++ cmd.rec.mlid = rec->mlid;
++ cmd.rec.mtu_selector = rec->mtu_selector;
++ cmd.rec.mtu = rec->mtu;
++ cmd.rec.traffic_class = rec->traffic_class;
++ cmd.rec.pkey = rec->pkey;
++ cmd.rec.rate_selector = rec->rate_selector;
++ cmd.rec.rate = rec->rate;
++ cmd.rec.packet_life_time_selector
++ = rec->packet_life_time_selector;
++ cmd.rec.packet_life_time = rec->packet_life_time;
++ cmd.rec.sl = rec->sl;
++ cmd.rec.flow_label = rec->flow_label;
++ cmd.rec.hop_limit = rec->hop_limit;
++ cmd.rec.scope = rec->scope;
++ cmd.rec.join_state = rec->join_state;
++ cmd.rec.proxy_join = rec->proxy_join;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret) {
++ print_err("ibp_send returned %d\n", ret);
++ return ret;
++ }
++
++ wait_for_completion(&req.done);
++
++ if (req.status)
++ return req.status;
++
++ return resp.ibp_mcast;
++}
++
++int ibp_sa_free_multicast(u64 mcast)
++{
++ struct ibp_sa_free_multicast_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ IBP_INIT_REQ(&req, NULL, 0);
++ IBP_INIT_CMD(&cmd, sizeof(cmd), SA_FREE_MCAST, &req);
++
++ cmd.ibp_mcast = mcast;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret) {
++ print_err("ibp_send returned %d\n", ret);
++ return ret;
++ }
++
++ wait_for_completion(&req.done);
++
++ return req.status;
++}
++
++int ibp_sa_get_mcmember_rec(u64 device, u8 port, union ib_gid *mgid,
++ struct ib_sa_mcmember_rec *rec)
++{
++ struct ibp_sa_get_mcmember_rec_resp resp;
++ struct ibp_sa_get_mcmember_rec_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ IBP_INIT_REQ(&req, &resp, sizeof(resp));
++ IBP_INIT_CMD(&cmd, sizeof(cmd), SA_GET_MCMEMBER_REC, &req);
++
++ cmd.device = device;
++ cmd.subnet_prefix = mgid->global.subnet_prefix;
++ cmd.interface_id = mgid->global.interface_id;
++ cmd.port_num = port;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret) {
++ print_err("ibp_send returned %d\n", ret);
++ return ret;
++ }
++
++ wait_for_completion(&req.done);
++
++ if (req.status)
++ return req.status;
++
++ rec->mgid.global.subnet_prefix = resp.rec.mgid.global.subnet_prefix;
++ rec->mgid.global.interface_id = resp.rec.mgid.global.interface_id;
++ rec->port_gid.global.subnet_prefix
++ = resp.rec.port_gid.global.subnet_prefix;
++ rec->port_gid.global.interface_id
++ = resp.rec.port_gid.global.interface_id;
++ rec->qkey = resp.rec.qkey;
++ rec->mlid = resp.rec.mlid;
++ rec->mtu_selector = resp.rec.mtu_selector;
++ rec->mtu = resp.rec.mtu;
++ rec->traffic_class = resp.rec.traffic_class;
++ rec->pkey = resp.rec.pkey;
++ rec->rate_selector = resp.rec.rate_selector;
++ rec->rate = resp.rec.rate;
++ rec->packet_life_time_selector = resp.rec.packet_life_time_selector;
++ rec->packet_life_time = resp.rec.packet_life_time;
++ rec->sl = resp.rec.sl;
++ rec->flow_label = resp.rec.flow_label;
++ rec->hop_limit = resp.rec.hop_limit;
++ rec->scope = resp.rec.scope;
++ rec->join_state = resp.rec.join_state;
++ rec->proxy_join = resp.rec.proxy_join;
++
++ return 0;
++}
++
++int ibp_init_ah_from_mcmember(u64 device, u8 port,
++ struct ib_sa_mcmember_rec *rec,
++ struct ib_ah_attr *ah_attr)
++{
++ struct ibp_init_ah_from_mcmember_resp resp;
++ struct ibp_init_ah_from_mcmember_cmd cmd;
++ struct ibp_request req;
++ int ret;
++
++ IBP_INIT_REQ(&req, &resp, sizeof(resp));
++ IBP_INIT_CMD(&cmd, sizeof(cmd), INIT_AH_FROM_MCMEMBER, &req);
++
++ cmd.device = device;
++ cmd.port_num = port;
++
++ cmd.rec.mgid.global.subnet_prefix
++ = rec->mgid.global.subnet_prefix;
++ cmd.rec.mgid.global.interface_id
++ = rec->mgid.global.interface_id;
++ cmd.rec.port_gid.global.subnet_prefix
++ = rec->port_gid.global.subnet_prefix;
++ cmd.rec.port_gid.global.interface_id
++ = rec->port_gid.global.interface_id;
++ cmd.rec.qkey = rec->qkey;
++ cmd.rec.mlid = rec->mlid;
++ cmd.rec.mtu_selector = rec->mtu_selector;
++ cmd.rec.mtu = rec->mtu;
++ cmd.rec.traffic_class = rec->traffic_class;
++ cmd.rec.pkey = rec->pkey;
++ cmd.rec.rate_selector = rec->rate_selector;
++ cmd.rec.rate = rec->rate;
++ cmd.rec.packet_life_time_selector
++ = rec->packet_life_time_selector;
++ cmd.rec.packet_life_time = rec->packet_life_time;
++ cmd.rec.sl = rec->sl;
++ cmd.rec.flow_label = rec->flow_label;
++ cmd.rec.hop_limit = rec->hop_limit;
++ cmd.rec.scope = rec->scope;
++ cmd.rec.join_state = rec->join_state;
++ cmd.rec.proxy_join = rec->proxy_join;
++
++ ret = ibp_send(ep, &cmd, sizeof(cmd));
++ if (ret) {
++ print_err("ibp_send returned %d\n", ret);
++ return ret;
++ }
++
++ wait_for_completion(&req.done);
++
++ if (req.status)
++ return req.status;
++
++ ah_attr->grh.dgid.global.subnet_prefix
++ = resp.attr.dgid_prefix;
++ ah_attr->grh.dgid.global.interface_id
++ = resp.attr.dgid_id;
++ ah_attr->grh.flow_label = resp.attr.flow_label;
++ ah_attr->grh.sgid_index = resp.attr.sgid_index;
++ ah_attr->grh.hop_limit = resp.attr.hop_limit;
++ ah_attr->grh.traffic_class = resp.attr.traffic_class;
++ ah_attr->dlid = resp.attr.dlid;
++ ah_attr->sl = resp.attr.sl;
++ ah_attr->src_path_bits = resp.attr.src_path_bits;
++ ah_attr->static_rate = resp.attr.static_rate;
++ ah_attr->ah_flags = resp.attr.ah_flags;
++ ah_attr->port_num = resp.attr.port_num;
++
++ return 0;
++}
+diff --git a/drivers/infiniband/ibp/sa/sa_ibp_abi.h b/drivers/infiniband/ibp/sa/sa_ibp_abi.h
+new file mode 100644
+index 0000000..09bc840
+--- /dev/null
++++ b/drivers/infiniband/ibp/sa/sa_ibp_abi.h
+@@ -0,0 +1,251 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef SA_IBP_ABI_H
++#define SA_IBP_ABI_H
++
++#include <linux/types.h>
++#include <rdma/ib_verbs.h>
++#include <rdma/ib_sa.h>
++
++/* Increment this value if any changes break compatibility. */
++#define IBP_SA_ABI_VERSION 1
++
++/*
++ * Make sure that all structs defined in this file are laid out to pack
++ * the same way on different architectures to avoid incompatibility.
++ *
++ * Specifically:
++ * - Do not use pointer types -- pass pointers in a u64 instead.
++ * - Make sure that any structure larger than 4 bytes is padded
++ * to a multiple of 8 bytes; otherwise the structure size may
++ * be different between architectures.
++ */
++
++struct cb_header {
++ u64 cb_type;
++ u64 status;
++ u64 ibp_client;
++};
++
++struct ibp_sa_path_rec {
++ __be64 service_id;
++ u64 dgid_prefix;
++ u64 dgid_id;
++ u64 sgid_prefix;
++ u64 sgid_id;
++ __be16 dlid;
++ __be16 slid;
++ u32 raw_traffic;
++ __be32 flow_label;
++ u8 hop_limit;
++ u8 traffic_class;
++ u32 reversible;
++ u8 numb_path;
++ __be16 pkey;
++ __be16 qos_class;
++ u8 sl;
++ u8 mtu_selector;
++ u8 mtu;
++ u8 rate_selector;
++ u8 rate;
++ u8 packet_life_time_selector;
++ u8 packet_life_time;
++ u8 preference;
++};
++
++struct path_rec_data {
++ u64 entry;
++ u64 query;
++ struct ibp_sa_path_rec resp;
++ u8 reserved[1];
++};
++
++struct ibp_sa_mcmember_rec {
++ u64 mgid_prefix;
++ u64 mgid_id;
++ u64 port_gid_prefix;
++ u64 port_gid_id;
++ __be32 qkey;
++ __be16 mlid;
++ u8 mtu_selector;
++ u8 mtu;
++ u8 traffic_class;
++ __be16 pkey;
++ u8 rate_selector;
++ u8 rate;
++ u8 packet_life_time_selector;
++ u8 packet_life_time;
++ u8 sl;
++ __be32 flow_label;
++ u8 hop_limit;
++ u8 scope;
++ u8 join_state;
++ u64 proxy_join;
++ u8 reserved[1];
++};
++
++struct mc_join_data {
++ u64 mcentry;
++ u64 ibp_mcast;
++ struct ibp_sa_mcmember_rec rec;
++};
++
++struct callback_msg {
++ struct cb_header header;
++ union {
++ struct path_rec_data path_rec;
++ struct mc_join_data mc_join;
++ } u;
++};
++
++struct ibp_callback_msg {
++ struct ibp_msg_header header;
++ u8 data[0];
++};
++
++struct ibp_sa_path_rec_get_cmd {
++ struct ibp_msg_header header;
++ u64 ibp_client;
++ u64 entry;
++ u64 query;
++ u64 device;
++ u64 port_num;
++ u64 comp_mask;
++ u64 timeout_ms;
++ u64 gfp_mask;
++ struct ibp_sa_path_rec rec;
++};
++
++struct ibp_sa_path_rec_get_resp {
++ u64 sa_query;
++ u64 query_id;
++};
++
++struct ibp_sa_register_client_cmd {
++ struct ibp_msg_header header;
++};
++
++struct ibp_sa_register_client_resp {
++ u64 ibp_client;
++};
++
++struct ibp_sa_unregister_client_cmd {
++ struct ibp_msg_header header;
++ u64 ibp_client;
++};
++
++struct ibp_sa_cancel_query_cmd {
++ struct ibp_msg_header header;
++ u64 id;
++ u64 client;
++};
++
++struct ibp_init_ah_from_path_cmd {
++ struct ibp_msg_header header;
++ u64 device;
++ u8 port_num;
++ struct ibp_sa_path_rec rec;
++};
++
++struct ibp_ah_attr {
++ u64 dgid_prefix;
++ u64 dgid_id;
++ u32 flow_label;
++ u8 sgid_index;
++ u8 hop_limit;
++ u8 traffic_class;
++ u16 dlid;
++ u8 sl;
++ u8 src_path_bits;
++ u8 static_rate;
++ u8 ah_flags;
++ u8 port_num;
++};
++struct ibp_init_ah_from_path_resp {
++ struct ibp_ah_attr attr;
++};
++
++struct ibp_init_ah_from_mcmember_cmd {
++ struct ibp_msg_header header;
++ u64 device;
++ u8 port_num;
++ struct ib_sa_mcmember_rec rec;
++};
++
++struct ibp_init_ah_from_mcmember_resp {
++ struct ibp_ah_attr attr;
++};
++
++struct ibp_sa_join_multicast_cmd {
++ struct ibp_msg_header header;
++ u64 ibp_client;
++ u64 mcentry;
++ u64 device;
++ u8 port_num;
++ u64 comp_mask;
++ u64 gfp_mask;
++ struct ib_sa_mcmember_rec rec;
++};
++
++struct ibp_sa_join_multicast_resp {
++ u64 ibp_mcast;
++};
++
++struct ibp_sa_free_multicast_cmd {
++ struct ibp_msg_header header;
++ u64 ibp_mcast;
++};
++
++struct ibp_sa_get_mcmember_rec_cmd {
++ struct ibp_msg_header header;
++ u64 device;
++ u8 port_num;
++ u64 subnet_prefix;
++ u64 interface_id;
++};
++
++struct ibp_sa_get_mcmember_rec_resp {
++ struct ib_sa_mcmember_rec rec;
++};
++
++struct ibp_sa_event {
++ enum ib_event_type event_type;
++ u64 ibp_client;
++ union {
++ __u32 send_status;
++ } u;
++ u64 data_length;
++ u8 data[0];
++};
++
++#endif /* SA_IBP_ABI_H */
+diff --git a/drivers/infiniband/ibp/sa/sa_proxy.c b/drivers/infiniband/ibp/sa/sa_proxy.c
+new file mode 100644
+index 0000000..b20dde8
+--- /dev/null
++++ b/drivers/infiniband/ibp/sa/sa_proxy.c
+@@ -0,0 +1,773 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "client.h"
++#include "sa_client.h"
++#include "ibp_exports.h"
++#include <rdma/ib_pack.h>
++#include "sa_table.h"
++
++static struct ibp_sa_entry *gbl_list;
++
++/* Translate from client side SA Client to "true" SA Client on the host */
++static struct ibp_sa_entry *find_sa_mcast(struct ib_sa_multicast *multicast,
++ u64 *ibp_mcast)
++{
++ struct ibp_sa_entry *entry;
++ struct ibp_sa_mcentry *mcentry;
++
++ down_read(&list_rwsem);
++
++ for (entry = gbl_list; entry; entry = entry->next)
++ for (mcentry = entry->mc_list; mcentry;
++ mcentry = mcentry->next)
++ if (&mcentry->multicast == multicast) {
++ *ibp_mcast = mcentry->ibp_mcast;
++ goto found;
++ }
++
++ print_err("Could not find multicast entry\n");
++found:
++ up_read(&list_rwsem);
++
++ return entry;
++}
++
++static void free_sa_mcast(struct ibp_sa_entry *entry,
++ struct ib_sa_multicast *multicast)
++{
++ struct ibp_sa_mcentry *mcentry, *last = NULL;
++
++ down_write(&list_rwsem);
++
++ for (mcentry = entry->mc_list; mcentry; mcentry = mcentry->next) {
++ if (&mcentry->multicast == multicast) {
++ if (!last)
++ entry->mc_list = mcentry->next;
++ else
++ last->next = mcentry->next;
++ goto out;
++ }
++ last = mcentry;
++ }
++
++ print_err("Could not find mcentry\n");
++out:
++ up_write(&list_rwsem);
++
++ kfree(mcentry);
++}
++
++// XXX avoid defined but not used warning
++#if 0
++static struct ibp_sa_query *find_sa_query(struct ibp_sa_entry *entry, int id)
++{
++ struct ibp_sa_query *query;
++
++ down_read(&list_rwsem);
++
++ for (query = entry->query_list; query; query = query->next)
++ if (query->tmp_id == id)
++ goto out;
++
++ print_err("Could not find query ID\n");
++out:
++ up_read(&list_rwsem);
++
++ return query;
++}
++
++static void free_sa_query(struct ibp_sa_entry *entry, int id)
++{
++ struct ibp_sa_query *query, *last = NULL;
++
++ down_write(&list_rwsem);
++
++ for (query = entry->query_list; query; query = query->next) {
++ if (query->sa_query.id == id) {
++ if (!last)
++ entry->query_list = query->next;
++ else
++ last->next = query->next;
++ goto out;
++ }
++ last = query;
++ }
++
++ print_err("Could not find query ID\n");
++out:
++ up_write(&list_rwsem);
++
++ kfree(query);
++}
++#endif
++
++static void free_sa_client(struct ibp_sa_entry *entry)
++{
++ struct ibp_sa_entry *cur, *last = NULL;
++
++ down_write(&list_rwsem);
++
++ for (cur = gbl_list; cur; cur = cur->next) {
++ if (cur == entry) {
++ if (!last)
++ gbl_list = cur->next;
++ else
++ last->next = cur->next;
++ goto out;
++ }
++ last = cur;
++ }
++
++ print_err("Could not find client to free\n");
++
++out:
++ up_write(&list_rwsem);
++
++ kfree(cur);
++}
++
++/* Translate from client side SA Client to "true" SA Client on the host */
++static struct ibp_sa_entry *find_ib_client(struct ib_sa_client *ib_client)
++{
++ struct ibp_sa_entry *entry;
++
++ down_read(&list_rwsem);
++
++ for (entry = gbl_list; entry; entry = entry->next)
++ if (entry->ib_client == ib_client)
++ goto out;
++
++ print_err("Could not find local client\n");
++out:
++ up_read(&list_rwsem);
++
++ return entry;
++}
++
++void callback_path_rec(struct callback_work *cb_work, u64 status)
++{
++ struct ibp_sa_query *query;
++ struct path_rec_data *path_rec;
++ struct ibp_sa_entry *entry;
++ int len;
++
++ print_trace("in\n");
++
++ len = sizeof(struct path_rec_data) + sizeof(struct cb_header);
++ if (len != cb_work->length) {
++ print_err("Invalid data length %d, expecting %d\n",
++ cb_work->length, len);
++ return;
++ }
++
++ path_rec = (void *) &cb_work->data.u.path_rec;
++
++ query = (void *) path_rec->query;
++ entry = (void *) path_rec->entry;
++
++ if (IS_NULL_OR_ERR(query) || IS_NULL_OR_ERR(entry)) {
++ print_err("Invalid callback data\n");
++ return;
++ }
++
++ if (query->sa_query.id < 0) {
++ print_err("callback called even though error occurred\n");
++ return;
++ }
++
++ mutex_lock(&query->lock);
++ query->callback(status, &path_rec->resp, query->context);
++ mutex_unlock(&query->lock);
++}
++
++void callback_mcjoin(struct callback_work *cb_work, u64 status)
++{
++ struct mc_join_data *mc_join;
++ struct ibp_sa_mcentry *mcentry;
++ struct ib_sa_mcmember_rec *ib_rec;
++ struct ibp_sa_mcmember_rec *ibp_rec;
++ int len;
++ int ret = 0;
++
++ print_trace("in\n");
++
++ len = sizeof(struct mc_join_data) + sizeof(struct cb_header);
++ if (len != cb_work->length) {
++ print_err("Invalid data length %d, expecting %d\n",
++ cb_work->length, len);
++ return;
++ }
++
++ mc_join = (void *) &cb_work->data.u.mc_join;
++ mcentry = (void *) mc_join->mcentry;
++
++ if (IS_NULL_OR_ERR(mcentry)) {
++ print_err("Invalid callback data\n");
++ return;
++ }
++ if (mcentry->ibp_mcast < 0) {
++ print_err("callback called even though error occurred\n");
++ return;
++ }
++
++ mutex_lock(&mcentry->lock);
++ if (mcentry->multicast.callback) {
++ ibp_rec = (void *) &mc_join->rec;
++ ib_rec = &mcentry->multicast.rec;
++ if (!ib_rec)
++ goto out;
++
++ ib_rec->mgid.global.subnet_prefix = ibp_rec->mgid_prefix;
++ ib_rec->mgid.global.interface_id = ibp_rec->mgid_id;
++ ib_rec->port_gid.global.subnet_prefix =
++ ibp_rec->port_gid_prefix;
++ ib_rec->port_gid.global.interface_id = ibp_rec->port_gid_id;
++ ib_rec->qkey = ibp_rec->qkey;
++ ib_rec->mlid = ibp_rec->mlid;
++ ib_rec->mtu_selector = ibp_rec->mtu_selector;
++ ib_rec->mtu = ibp_rec->mtu;
++ ib_rec->traffic_class = ibp_rec->traffic_class;
++ ib_rec->pkey = ibp_rec->pkey;
++ ib_rec->rate_selector = ibp_rec->rate_selector;
++ ib_rec->rate = ibp_rec->rate;
++ ib_rec->packet_life_time_selector =
++ ibp_rec->packet_life_time_selector;
++ ib_rec->packet_life_time = ibp_rec->packet_life_time;
++ ib_rec->sl = ibp_rec->sl;
++ ib_rec->flow_label = ibp_rec->flow_label;
++ ib_rec->hop_limit = ibp_rec->hop_limit;
++ ib_rec->join_state = ibp_rec->join_state;
++ ib_rec->proxy_join = (int) ibp_rec->proxy_join;
++out:
++ ret = mcentry->multicast.callback(status, &mcentry->multicast);
++ }
++ mutex_unlock(&mcentry->lock);
++
++ if (ret)
++ ib_sa_free_multicast(&mcentry->multicast);
++}
++
++void callback_work(struct work_struct *work)
++{
++ struct callback_work *cb_work = (void *) work;
++ struct cb_header *header;
++ u64 cb_type, status;
++
++ print_trace("in\n");
++
++ if (!cb_work) {
++ print_err("Invalid callback work_struct\n");
++ return;
++ }
++
++ header = &cb_work->data.header;
++
++ status = header->status;
++ cb_type = header->cb_type;
++
++ switch (cb_type) {
++ case PATH_REC_GET_CB:
++ callback_path_rec(cb_work, status);
++ break;
++
++ case JOIN_MCAST_CB:
++ callback_mcjoin(cb_work, status);
++ break;
++
++ default:
++ print_err("unsupported callback: %lld\n", cb_type);
++ }
++
++ kfree(cb_work);
++}
++
++void ib_sa_register_client(struct ib_sa_client *ib_client)
++{
++ struct ibp_sa_entry *entry;
++ int ret;
++
++ print_trace("in\n");
++
++ entry = kzalloc(sizeof(struct ibp_sa_entry), GFP_KERNEL);
++ if (!entry) {
++ print_err("kzalloc failed\n");
++ return;
++ }
++
++ /*Ask the host sa to register a new ib_client.*/
++ ret = ibp_sa_register_client(entry);
++ if (ret) {
++ print_dbg("ibp_sa_register_client returned %d\n", ret);
++ kfree(entry);
++ return;
++ }
++
++ /*current sa proxy entry is top of sa proxy entry list*/
++ down_write(&list_rwsem);
++
++ entry->next = gbl_list;
++ gbl_list = entry;
++
++ up_write(&list_rwsem);
++
++ /*store sa_id structure supplied by host sa and returned in the resp*/
++ entry->ib_client = ib_client;
++
++ return;
++}
++EXPORT_SYMBOL(ib_sa_register_client);
++
++void ib_sa_unregister_client(struct ib_sa_client *ib_client)
++{
++ struct ibp_sa_entry *entry;
++ int ret;
++
++ print_trace("in\n");
++
++ entry = find_ib_client(ib_client);
++ if (!entry)
++ return;
++
++ ret = ibp_sa_unregister_client(entry);
++ if (ret)
++ print_err("ibp_sa_unregister_client returned %d\n", ret);
++
++ free_sa_client(entry);
++}
++EXPORT_SYMBOL(ib_sa_unregister_client);
++
++// XXX avoid defined but not used warning
++#if 0
++static void cancel_query_work(struct work_struct *work)
++{
++ struct ibp_sa_entry *entry;
++ struct cancel_query_work *cancel;
++ int id;
++ int ret;
++
++ print_trace("in\n");
++
++ cancel = container_of(work, struct cancel_query_work, work);
++
++ entry = cancel->entry;
++ id = cancel->id;
++
++ ret = ibp_sa_cancel_query(entry, id);
++ if (ret)
++ print_err("ibp_sa_cancel_query returned %d\n", ret);
++
++ free_sa_query(entry, id);
++ kfree(cancel);
++}
++#endif
++
++void ib_sa_cancel_query(int id, struct ib_sa_query *ib_query)
++{
++ print_trace("in\n");
++// XXX Fix me
++// Cancel needs to reference structures to handle race with query callback
++#if 0
++ struct ibp_sa_entry *entry;
++ struct ibp_sa_query *query;
++ struct cancel_query_work *cancel;
++
++ print_trace("in\n");
++
++ entry = find_ib_client(ib_query->client);
++ if (!entry) {
++ print_err("Could not find entry to cancel query.");
++ return;
++ }
++
++ query = find_sa_query(entry, id);
++ if (!query) {
++ print_err("Could not find id to cancel query.");
++ return;
++ }
++
++ cancel = kzalloc(sizeof(*cancel), GFP_KERNEL);
++ if (!cancel) {
++ print_err("kzalloc failed\n");
++ return;
++ }
++
++ cancel->entry = entry;
++ cancel->id = query->sa_query.id;
++
++ INIT_WORK(&cancel->work, cancel_query_work);
++ queue_work(workqueue, &cancel->work);
++#endif
++}
++EXPORT_SYMBOL(ib_sa_cancel_query);
++
++int ib_init_ah_from_path(struct ib_device *ib_device, u8 port,
++ struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr)
++{
++ u64 device;
++ int ret;
++
++ print_trace("in\n");
++
++ device = ibp_resolve_ib_device(ib_device);
++ if (!device) {
++ print_err("Could not find a valid ib_device\n");
++ return -ENODEV;
++ }
++
++ ret = ibp_init_ah_from_path(device, port, rec, ah_attr);
++ if (ret)
++ print_err("ibp_init_ah_from_path returned %d\n", ret);
++
++ return ret;
++}
++EXPORT_SYMBOL(ib_init_ah_from_path);
++
++void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec)
++{
++ print_trace("in\n");
++
++ ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec);
++}
++EXPORT_SYMBOL(ib_sa_unpack_path);
++
++void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute)
++{
++ print_trace("in\n");
++
++ ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute);
++}
++EXPORT_SYMBOL(ib_sa_pack_path);
++
++int ib_sa_service_rec_query(struct ib_sa_client *ib_client,
++ struct ib_device *ib_device,
++ u8 port, u8 method,
++ struct ib_sa_service_rec *rec,
++ ib_sa_comp_mask comp_mask,
++ int timeout_ms, gfp_t gfp_mask,
++ void (*callback)(int status,
++ struct ib_sa_service_rec *resp,
++ void *context),
++ void *context, struct ib_sa_query **sa_query)
++{
++ print_trace("in\n");
++
++ return -ENOSYS; /*Can not find any place this is used... */
++}
++EXPORT_SYMBOL(ib_sa_service_rec_query);
++
++static void dummy_callback(struct ib_sa_query *sa_query, int status,
++ struct ib_sa_mad *sa_mad)
++{
++ print_trace("in\n");
++}
++static void dummy_release(struct ib_sa_query *sa_query)
++{
++ print_trace("in\n");
++}
++
++static void sa_path_rec_work(struct work_struct *work)
++{
++ struct path_rec_work *pathrec;
++ struct ibp_sa_query *query;
++ struct ibp_sa_entry *entry;
++ u64 device;
++ int query_id;
++
++ print_trace("in\n");
++
++ pathrec = container_of(work, struct path_rec_work, work);
++
++ query = pathrec->query;
++
++ entry = find_ib_client(query->sa_query.client);
++ if (!entry) {
++ print_err("Could not find a valid ib_client\n");
++ query->callback(-EINVAL, &pathrec->rec, query->context);
++ goto out;
++ }
++
++ down_write(&list_rwsem);
++ query->next = entry->query_list;
++ entry->query_list = query;
++ up_write(&list_rwsem);
++
++ device = ibp_resolve_ib_device(pathrec->ib_device);
++ if (!device) {
++ print_err("Could not find a valid ib_device\n");
++ query->callback(-ENODEV, &pathrec->rec, query->context);
++ goto out;
++ }
++
++ mutex_init(&query->lock);
++ mutex_lock(&query->lock);
++
++ query_id = ibp_sa_path_rec_get(entry, device, pathrec->port,
++ &pathrec->rec, pathrec->comp_mask,
++ pathrec->timeout_ms, GFP_KERNEL,
++ pathrec->query);
++
++ query->sa_query.id = query_id;
++
++ /* If the return value of ib_sa_path_rec_get() is negative, it is
++ * an error code. Otherwise it is a query ID that is valid. */
++ if (query_id < 0) {
++ print_err("ibp_sa_path_rec_get returned %d\n", query_id);
++ query->callback(query_id, &pathrec->rec, query->context);
++ }
++out:
++ mutex_unlock(&query->lock);
++
++ kfree(pathrec);
++}
++
++int ib_sa_path_rec_get(struct ib_sa_client *ib_client,
++ struct ib_device *ib_device, u8 port,
++ struct ib_sa_path_rec *rec,
++ ib_sa_comp_mask comp_mask,
++ int timeout_ms, gfp_t gfp_mask,
++ void (*callback)(int status,
++ struct ib_sa_path_rec *resp,
++ void *context),
++ void *context, struct ib_sa_query **sa_query)
++{
++ struct ibp_sa_query *query;
++ struct path_rec_work *pathrec;
++ static int query_id = 1000;
++
++ print_trace("in\n");
++
++ query = kzalloc(sizeof(*query), gfp_mask);
++ if (!query) {
++ print_err("kzalloc failed\n");
++ return -ENOMEM;
++ }
++
++ pathrec = kzalloc(sizeof(*pathrec), gfp_mask);
++ if (!query) {
++ print_err("kzalloc failed\n");
++ kfree(query);
++ return -ENOMEM;
++ }
++
++ pathrec->query = query;
++ pathrec->ib_device = ib_device;
++ pathrec->port = port;
++ pathrec->rec = *rec;
++ pathrec->comp_mask = comp_mask;
++ pathrec->timeout_ms = timeout_ms;
++
++ query->context = context;
++ query->callback = (void *) callback;
++
++ query->sa_query.callback = callback ? dummy_callback : NULL;
++ query->sa_query.release = dummy_release;
++ query->sa_query.client = ib_client;
++ query->sa_query.id = -EINVAL;
++
++ *sa_query = &query->sa_query;
++
++ query->tmp_id = query_id++;
++
++ INIT_WORK(&pathrec->work, sa_path_rec_work);
++ queue_work(workqueue, &pathrec->work);
++
++ return query->tmp_id;
++}
++EXPORT_SYMBOL(ib_sa_path_rec_get);
++
++static void sa_mcjoin_work(struct work_struct *work)
++{
++ struct multicast_work *mcjoin;
++ struct ibp_sa_mcentry *mcentry;
++ struct ibp_sa_entry *entry;
++ u64 device;
++ u64 mcast;
++
++ print_trace("in\n");
++
++ mcjoin = container_of(work, struct multicast_work, work);
++
++ mcentry = mcjoin->mcentry;
++
++ entry = find_ib_client(mcentry->client);
++ if (!entry) {
++ print_err("Could not find a valid ib_client\n");
++ mcentry->ibp_mcast = -EINVAL;
++ goto out;
++ }
++
++ down_write(&list_rwsem);
++ mcentry->next = entry->mc_list;
++ entry->mc_list = mcentry;
++ up_write(&list_rwsem);
++
++ device = ibp_resolve_ib_device(mcjoin->ib_device);
++ if (!device) {
++ print_err("Could not find a valid ib_device\n");
++ mcentry->ibp_mcast = -ENODEV;
++ goto out;
++ }
++
++ mutex_init(&mcentry->lock);
++ mutex_lock(&mcentry->lock);
++
++ mcast = ibp_sa_join_multicast(entry, device, mcjoin->port,
++ &mcentry->multicast.rec,
++ mcentry->multicast.comp_mask,
++ GFP_KERNEL, mcentry);
++ if (mcast < 0)
++ print_err("ibp_sa_join_multicast returned %d\n", (int) mcast);
++
++ mcentry->ibp_mcast = mcast;
++
++ mutex_unlock(&mcentry->lock);
++out:
++ kfree(mcjoin);
++}
++
++struct ib_sa_multicast *
++ib_sa_join_multicast(struct ib_sa_client *ib_client,
++ struct ib_device *ib_device, u8 port,
++ struct ib_sa_mcmember_rec *rec,
++ ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
++ int (*callback)(int status,
++ struct ib_sa_multicast *multicast),
++ void *context)
++{
++ struct ibp_sa_mcentry *mcentry;
++ struct multicast_work *mcjoin;
++
++ print_trace("in\n");
++
++ mcentry = kzalloc(sizeof(*mcentry), gfp_mask);
++ if (!mcentry) {
++ print_err("kzalloc failed\n");
++ return ERR_PTR(-ENOMEM);
++ }
++
++ mcjoin = kzalloc(sizeof(*mcjoin), gfp_mask);
++ if (!mcjoin) {
++ kfree(mcentry);
++ print_err("kzalloc failed\n");
++ return ERR_PTR(-ENOMEM);
++ }
++
++ mcjoin->ib_device = ib_device;
++ mcjoin->port = port;
++ mcjoin->mcentry = mcentry;
++
++ mcentry->ibp_mcast = -EINVAL;
++ mcentry->client = ib_client;
++ mcentry->multicast.rec = *rec;
++ mcentry->multicast.comp_mask = comp_mask;
++ mcentry->multicast.callback = callback;
++ mcentry->multicast.context = context;
++
++ INIT_WORK(&mcjoin->work, sa_mcjoin_work);
++ queue_work(workqueue, &mcjoin->work);
++
++ return &mcentry->multicast;
++}
++EXPORT_SYMBOL(ib_sa_join_multicast);
++
++void ib_sa_free_multicast(struct ib_sa_multicast *multicast)
++{
++ struct ibp_sa_entry *entry;
++ u64 ibp_mcast;
++ int ret;
++
++ print_trace("in\n");
++
++ entry = find_sa_mcast(multicast, &ibp_mcast);
++ if (!entry) {
++ print_err("Could not find a valid entry\n");
++ return;
++ }
++
++ if (ibp_mcast == -EINVAL)
++ return;
++
++ ret = ibp_sa_free_multicast(ibp_mcast);
++ if (ret)
++ print_err("ibp_sa_free_multicast returned %d\n", ret);
++
++ free_sa_mcast(entry, multicast);
++}
++EXPORT_SYMBOL(ib_sa_free_multicast);
++
++int ib_sa_get_mcmember_rec(struct ib_device *ib_device, u8 port,
++ union ib_gid *mgid, struct ib_sa_mcmember_rec *rec)
++{
++ u64 device;
++ int ret;
++
++ print_trace("in\n");
++
++ device = ibp_resolve_ib_device(ib_device);
++ if (!device) {
++ print_err("Could not find a valid ib_device\n");
++ return -ENODEV;
++ }
++
++ ret = ibp_sa_get_mcmember_rec(device, port, mgid, rec);
++ if (ret)
++ print_err("ibp_sa_get_mcmember returned %d\n", ret);
++
++ return ret;
++}
++EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
++
++int ib_init_ah_from_mcmember(struct ib_device *ib_device, u8 port,
++ struct ib_sa_mcmember_rec *rec,
++ struct net_device *ndev,
++ enum ib_gid_type gid_type,
++ struct ib_ah_attr *ah_attr)
++{
++ u64 device;
++ int ret;
++
++ print_trace("in\n");
++
++ device = ibp_resolve_ib_device(ib_device);
++ if (!device) {
++ print_err("Could not find a valid ib_device\n");
++ return -ENODEV;
++ }
++
++ ret = ibp_init_ah_from_mcmember(device, port, rec, ah_attr);
++ if (ret)
++ print_err("ibp_init_ah_from_path returned %d\n", ret);
++
++ return ret;
++}
++EXPORT_SYMBOL(ib_init_ah_from_mcmember);
+diff --git a/drivers/infiniband/ibp/sa/sa_server_msg.c b/drivers/infiniband/ibp/sa/sa_server_msg.c
+new file mode 100644
+index 0000000..2f10f49
+--- /dev/null
++++ b/drivers/infiniband/ibp/sa/sa_server_msg.c
+@@ -0,0 +1,962 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ * * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "server.h"
++
++LIST_HEAD(sa_entry_list);
++LIST_HEAD(query_list);
++LIST_HEAD(mcast_list);
++
++static void free_query_list(struct sa_query_entry *entry)
++{
++ if (entry) {
++ down_write(&list_rwsem);
++ list_del(&entry->list);
++ up_write(&list_rwsem);
++ }
++}
++
++static struct sa_query_entry *add_query_list(struct ibp_client *client)
++{
++ struct sa_query_entry *entry;
++
++ print_trace("in\n");
++
++ entry = kzalloc(sizeof(struct sa_query_entry), GFP_KERNEL);
++ if (!entry) {
++ print_err("kzalloc failed\n");
++ return ERR_PTR(-ENOMEM);
++ }
++
++ entry->ibp_client = client;
++
++ down_write(&list_rwsem);
++ list_add(&entry->list, &query_list);
++ up_write(&list_rwsem);
++
++ return entry;
++}
++
++static struct sa_query_entry *find_query_entry(struct ib_sa_client *client)
++{
++ struct sa_query_entry *query;
++
++ down_read(&list_rwsem);
++
++ list_for_each_entry(query, &query_list, list)
++ if (query->sa_client == client)
++ goto out;
++
++ print_err("Could not find sa_query_entry\n");
++ query = NULL;
++out:
++ up_read(&list_rwsem);
++
++ return query;
++}
++
++static struct sa_entry *find_sa_entry(struct ib_sa_client *ib_client)
++{
++ struct sa_entry *entry;
++
++ down_read(&list_rwsem);
++
++ list_for_each_entry(entry, &sa_entry_list, list)
++ if (&entry->ib_client == ib_client)
++ goto out;
++
++ print_err("Could not find sa_entry\n");
++ entry = NULL;
++out:
++ up_read(&list_rwsem);
++
++ return entry;
++}
++
++/* Translate from server side "true" SA client to proxied SA client on the
++ * client
++ */
++static struct ib_sa_client *find_ibp_client(struct ibp_client *ibp_client)
++{
++ struct sa_entry *entry;
++ struct ib_sa_client *client = NULL;
++
++ down_read(&list_rwsem);
++
++ list_for_each_entry(entry, &sa_entry_list, list)
++ if (entry->client == ibp_client) {
++ client = &entry->ib_client;
++ goto out;
++ }
++
++ print_err("Could not find proxied sa_client %p\n", ibp_client);
++out:
++ up_read(&list_rwsem);
++
++ return client;
++}
++
++int ibp_cmd_sa_register_client(struct ibp_client *ibp_client,
++ struct ibp_msg_header *hdr)
++{
++ struct sa_entry *entry;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_sa_register_client_resp *resp;
++ size_t len;
++ int status = 0;
++ int ret;
++
++ print_trace("in\n");
++
++ msg = (struct ibp_verb_response_msg *) ibp_client->tx_buf;
++ len = sizeof(*msg);
++
++ entry = kzalloc((sizeof(struct sa_entry)), GFP_KERNEL);
++ if (!entry) {
++ print_err("kzalloc failed\n");
++ status = -ENOMEM;
++ goto send_resp;
++ }
++
++ entry->client = ibp_client;
++
++ len += sizeof(*resp);
++
++ resp = (struct ibp_sa_register_client_resp *) msg->data;
++
++ resp->ibp_client = (u64) &entry->ib_client;
++send_resp:
++ IBP_INIT_RESP(msg, len, RESPONSE, hdr->request, status);
++
++ ret = ibp_send(ibp_client->ep, msg, len);
++ if (ret) {
++ kfree(entry);
++ print_err("ibp_send returned %d\n", ret);
++ return ret;
++ }
++ if (status)
++ return status;
++
++ ib_sa_register_client(&entry->ib_client);
++
++ down_write(&list_rwsem);
++ list_add(&entry->list, &sa_entry_list);
++ up_write(&list_rwsem);
++
++ return 0;
++}
++
++int ibp_cmd_sa_unregister_client(struct ibp_client *ibp_client,
++ struct ibp_msg_header *hdr)
++{
++ struct sa_entry *entry;
++ struct ibp_sa_unregister_client_cmd *cmd;
++ struct ibp_verb_response_msg *msg;
++ struct ib_sa_client *client;
++ size_t len;
++ int ret = 0;
++
++ print_trace("in\n");
++
++ cmd = (struct ibp_sa_unregister_client_cmd *) hdr;
++ client = (struct ib_sa_client *) cmd->ibp_client;
++ msg = (struct ibp_verb_response_msg *) ibp_client->tx_buf;
++ len = sizeof(*msg);
++
++ entry = find_sa_entry(client);
++ if (!entry) {
++ ret = -EINVAL;
++ goto send_resp;
++ }
++
++ down_write(&list_rwsem);
++ list_del(&entry->list);
++ up_write(&list_rwsem);
++
++ ib_sa_unregister_client(&entry->ib_client);
++
++send_resp:
++ IBP_INIT_RESP(msg, len, RESPONSE, hdr->request, ret);
++
++ ret = ibp_send(ibp_client->ep, msg, len);
++ if (ret)
++ print_err("ibp_send returned %d\n", ret);
++
++ return ret;
++}
++
++int ibp_cmd_sa_cancel_query(struct ibp_client *ibp_client,
++ struct ibp_msg_header *hdr)
++{
++ struct sa_query_entry *entry;
++ struct ibp_sa_cancel_query_cmd *cmd;
++ struct ibp_verb_response_msg *msg;
++ size_t len;
++ int ret = 0;
++
++ print_trace("in\n");
++
++ cmd = (struct ibp_sa_cancel_query_cmd *) hdr;
++ msg = (struct ibp_verb_response_msg *) ibp_client->tx_buf;
++ len = sizeof(*msg);
++
++ entry = find_query_entry((struct ib_sa_client *) cmd->client);
++ if (!entry) {
++ ret = -EINVAL;
++ goto send_resp;
++ }
++
++ ib_sa_cancel_query(cmd->id, entry->query);
++
++ free_query_list(entry);
++send_resp:
++ IBP_INIT_RESP(msg, len, RESPONSE, hdr->request, ret);
++
++ ret = ibp_send(ibp_client->ep, msg, len);
++ if (ret)
++ print_err("ibp_send returned %d\n", ret);
++
++ return ret;
++}
++
++int ibp_cmd_init_ah_from_path(struct ibp_client *ibp_client,
++ struct ibp_msg_header *hdr)
++{
++ struct ib_device *device;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_init_ah_from_path_cmd *cmd;
++ struct ibp_init_ah_from_path_resp *resp;
++ struct ib_sa_path_rec rec;
++ struct ib_ah_attr attr;
++ size_t len;
++ u8 port_num;
++ int ret;
++
++ print_trace("in\n");
++
++ cmd = (struct ibp_init_ah_from_path_cmd *) hdr;
++ device = (struct ib_device *) cmd->device;
++ msg = (struct ibp_verb_response_msg *) ibp_client->tx_buf;
++ len = sizeof(*msg);
++
++ port_num = cmd->port_num;
++
++ rec.service_id = cmd->rec.service_id;
++ rec.dgid.global.interface_id
++ = cmd->rec.dgid_id;
++ rec.dgid.global.subnet_prefix
++ = cmd->rec.dgid_prefix;
++ rec.sgid.global.interface_id
++ = cmd->rec.sgid_id;
++ rec.sgid.global.subnet_prefix
++ = cmd->rec.sgid_prefix;
++ rec.dlid = cmd->rec.dlid;
++ rec.slid = cmd->rec.slid;
++ rec.raw_traffic = cmd->rec.raw_traffic;
++ rec.flow_label = cmd->rec.flow_label;
++ rec.hop_limit = cmd->rec.hop_limit;
++ rec.traffic_class = cmd->rec.traffic_class;
++ rec.reversible = cmd->rec.reversible;
++ rec.numb_path = cmd->rec.numb_path;
++ rec.pkey = cmd->rec.pkey;
++ rec.qos_class = cmd->rec.qos_class;
++ rec.sl = cmd->rec.sl;
++ rec.mtu_selector = cmd->rec.mtu_selector;
++ rec.mtu = cmd->rec.mtu;
++ rec.rate_selector = cmd->rec.rate_selector;
++ rec.rate = cmd->rec.rate;
++ rec.packet_life_time_selector
++ = cmd->rec.packet_life_time_selector;
++ rec.packet_life_time = cmd->rec.packet_life_time;
++ rec.preference = cmd->rec.preference;
++
++ ret = ib_init_ah_from_path(device, port_num, &rec, &attr);
++ if (ret)
++ print_err("init_ah_from_path returned %d\n", ret);
++
++ len += sizeof(*resp);
++ resp = (struct ibp_init_ah_from_path_resp *) msg->data;
++
++ resp->attr.dgid_prefix = attr.grh.dgid.global.subnet_prefix;
++ resp->attr.dgid_id = attr.grh.dgid.global.interface_id;
++ resp->attr.flow_label = attr.grh.flow_label;
++ resp->attr.sgid_index = attr.grh.sgid_index;
++ resp->attr.hop_limit = attr.grh.hop_limit;
++ resp->attr.traffic_class
++ = attr.grh.traffic_class;
++ resp->attr.dlid = attr.dlid;
++ resp->attr.sl = attr.sl;
++ resp->attr.src_path_bits
++ = attr.src_path_bits;
++ resp->attr.static_rate = attr.static_rate;
++ resp->attr.ah_flags = attr.ah_flags;
++ resp->attr.port_num = attr.port_num;
++
++ IBP_INIT_RESP(msg, len, RESPONSE, hdr->request, ret);
++
++ ret = ibp_send(ibp_client->ep, msg, len);
++ if (ret)
++ print_err("ibp_send returned %d\n", ret);
++
++ return ret;
++}
++
++int ibp_cmd_init_ah_from_mcmember(struct ibp_client *ibp_client,
++ struct ibp_msg_header *hdr)
++{
++ struct ib_device *device;
++ struct ibp_init_ah_from_mcmember_cmd *cmd;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_init_ah_from_mcmember_resp *resp;
++ struct ib_sa_mcmember_rec rec;
++ struct ib_ah_attr attr;
++ size_t len;
++ int ret;
++
++ print_trace("in\n");
++
++ cmd = (struct ibp_init_ah_from_mcmember_cmd *) hdr;
++ device = (struct ib_device *) cmd->device;
++ msg = (struct ibp_verb_response_msg *) ibp_client->tx_buf;
++ len = sizeof(*msg);
++
++ rec.mgid.global.subnet_prefix = cmd->rec.mgid.global.subnet_prefix;
++ rec.mgid.global.interface_id = cmd->rec.mgid.global.interface_id;
++ rec.port_gid.global.subnet_prefix
++ = cmd->rec.port_gid.global.subnet_prefix;
++ rec.port_gid.global.interface_id
++ = cmd->rec.port_gid.global.interface_id;
++ rec.qkey = cmd->rec.qkey;
++ rec.mlid = cmd->rec.mlid;
++ rec.mtu_selector = cmd->rec.mtu_selector;
++ rec.mtu = cmd->rec.mtu;
++ rec.traffic_class = cmd->rec.traffic_class;
++ rec.pkey = cmd->rec.pkey;
++ rec.rate_selector = cmd->rec.rate_selector;
++ rec.rate = cmd->rec.rate;
++ rec.packet_life_time_selector
++ = cmd->rec.packet_life_time_selector;
++ rec.packet_life_time = cmd->rec.packet_life_time;
++ rec.sl = cmd->rec.sl;
++ rec.flow_label = cmd->rec.flow_label;
++ rec.hop_limit = cmd->rec.hop_limit;
++ rec.scope = cmd->rec.scope;
++ rec.join_state = cmd->rec.join_state;
++ rec.proxy_join = cmd->rec.proxy_join;
++
++ ret = ib_init_ah_from_mcmember(device, cmd->port_num, &rec,
++ NULL, IB_GID_TYPE_IB, &attr);
++ if (ret) {
++ print_err("ib_init_ah_from_mcmember returned %d\n", ret);
++ goto send_resp;
++ }
++
++ len += sizeof(*resp);
++ resp = (struct ibp_init_ah_from_mcmember_resp *) msg->data;
++
++ resp->attr.dgid_prefix = attr.grh.dgid.global.subnet_prefix;
++ resp->attr.dgid_id = attr.grh.dgid.global.interface_id;
++ resp->attr.flow_label = attr.grh.flow_label;
++ resp->attr.sgid_index = attr.grh.sgid_index;
++ resp->attr.hop_limit = attr.grh.hop_limit;
++ resp->attr.traffic_class
++ = attr.grh.traffic_class;
++ resp->attr.dlid = attr.dlid;
++ resp->attr.sl = attr.sl;
++ resp->attr.src_path_bits
++ = attr.src_path_bits;
++ resp->attr.static_rate = attr.static_rate;
++ resp->attr.ah_flags = attr.ah_flags;
++ resp->attr.port_num = attr.port_num;
++send_resp:
++ IBP_INIT_RESP(msg, len, RESPONSE, hdr->request, ret);
++
++ ret = ibp_send(ibp_client->ep, msg, len);
++ if (ret)
++ print_err("ibp_send returned %d\n", ret);
++
++ return ret;
++}
++
++static void ibp_send_callback(struct work_struct *work)
++{
++ struct callback_work *cb_work;
++ struct ibp_callback_msg *msg;
++ struct cb_header *header;
++ struct ibp_client *client;
++ size_t len;
++ int data_length;
++ int cb_type;
++ int ret;
++
++ print_trace("in\n");
++
++ cb_work = (struct callback_work *) work;
++ len = sizeof(*msg);
++
++ if (!cb_work) {
++ print_err("Invalid callback work_struct\n");
++ return;
++ }
++
++ header = &cb_work->msg.header;
++ cb_type = header->cb_type;
++
++ client = cb_work->client;
++ if (!client) {
++ print_err("Invalid callback client\n");
++ goto err;
++ }
++ if (!client->ep) {
++ print_err("Invalid callback client ep\n");
++ goto err;
++ }
++ if (cb_work->data->ret) {
++ print_err("caller failed to send msg to card\n");
++ goto err;
++ }
++
++ data_length = cb_work->length;
++
++ if (cb_type == PATH_REC_GET_CB) {
++ ret = sizeof(struct path_rec_data) + sizeof(struct cb_header);
++ if (data_length != ret) {
++ print_err("Invalid data length %d, expecting %d\n",
++ data_length, ret);
++ goto err;
++ }
++ } else if (cb_type == JOIN_MCAST_CB) {
++ ret = sizeof(struct mc_join_data) + sizeof(struct cb_header);
++ if (data_length != ret) {
++ print_err("Invalid data length %d, expecting %d\n",
++ data_length, ret);
++ goto err;
++ }
++ } else {
++ print_err("Invalid callback type %d\n", cb_type);
++ goto err;
++ }
++
++ len += data_length;
++
++ msg = kzalloc(len, GFP_KERNEL);
++ if (!msg) {
++ print_err("kzmalloc failed\n");
++ goto err;
++ }
++ IBP_INIT_MSG(msg, len, CALLBACK);
++
++ memcpy(msg->data, &cb_work->msg, data_length);
++
++ /* wait for host to send message to card before processing cb */
++ mutex_lock(&cb_work->data->lock);
++
++ ret = ibp_send(client->ep, msg, len);
++ if (ret)
++ print_err("ibp_send returned %d\n", ret);
++
++ mutex_unlock(&cb_work->data->lock);
++
++ kfree(msg);
++err:
++ if (cb_type == PATH_REC_GET_CB)
++ kfree(cb_work->data);
++
++ kfree(cb_work);
++}
++
++static void path_rec_get_callback(int status, struct ib_sa_path_rec *resp,
++ void *context)
++{
++ struct path_rec_cb_data *data;
++ struct sa_query_entry *entry;
++ struct ibp_client *client;
++ struct ib_sa_client *ib_client;
++ struct callback_work *cb_work;
++ struct cb_header *header;
++ struct path_rec_data *path_rec;
++
++ print_trace("in\n");
++
++ data = (struct path_rec_cb_data *) context;
++ entry = data->entry;
++ client = entry->ibp_client;
++
++ cb_work = kzalloc(sizeof(struct callback_work), GFP_KERNEL);
++ if (!cb_work) {
++ print_err("kzalloc failed\n");
++ goto err1;
++ }
++
++ ib_client = find_ibp_client(client);
++ if (!ib_client) {
++ print_err("Could not find client for event handler\n");
++ goto err2;
++ }
++
++ if (!entry->query) {
++ print_err("Callback occurred before call returned\n");
++ goto err2;
++ }
++
++ cb_work->data = (struct generic_cb_data *) data;
++ cb_work->client = client;
++ cb_work->length = sizeof(*header) + sizeof(*path_rec);
++
++ header = &cb_work->msg.header;
++ header->cb_type = PATH_REC_GET_CB;
++ header->status = status;
++ header->ibp_client = (u64) ib_client;
++
++ path_rec = &cb_work->msg.u.path_rec;
++ path_rec->entry = data->ibp_entry;
++ path_rec->query = data->ibp_query;
++
++ if (status) {
++ print_err("callback status %d\n", status);
++ // XXX How is data deallocated in error cases?
++ goto queue_work;
++ }
++
++ path_rec->resp.service_id = resp->service_id;
++ path_rec->resp.dgid_prefix = resp->dgid.global.subnet_prefix;
++ path_rec->resp.dgid_id = resp->dgid.global.interface_id;
++ path_rec->resp.sgid_prefix = resp->sgid.global.subnet_prefix;
++ path_rec->resp.sgid_id = resp->sgid.global.interface_id;
++ path_rec->resp.dlid = resp->dlid;
++ path_rec->resp.slid = resp->slid;
++ path_rec->resp.raw_traffic = resp->raw_traffic;
++ path_rec->resp.flow_label = resp->flow_label;
++ path_rec->resp.hop_limit = resp->hop_limit;
++ path_rec->resp.traffic_class = resp->traffic_class;
++ path_rec->resp.reversible = resp->reversible;
++ path_rec->resp.numb_path = resp->numb_path;
++ path_rec->resp.pkey = resp->pkey;
++ path_rec->resp.qos_class = resp->qos_class;
++ path_rec->resp.sl = resp->sl;
++ path_rec->resp.mtu_selector = resp->mtu_selector;
++ path_rec->resp.mtu = resp->mtu;
++ path_rec->resp.rate_selector = resp->rate_selector;
++ path_rec->resp.rate = resp->rate;
++ path_rec->resp.packet_life_time_selector
++ = resp->packet_life_time_selector;
++ path_rec->resp.packet_life_time = resp->packet_life_time;
++ path_rec->resp.preference = resp->preference;
++
++queue_work:
++ free_query_list(entry);
++
++ INIT_WORK(&cb_work->work, ibp_send_callback);
++ queue_work(client->workqueue, &cb_work->work);
++ return;
++err2:
++ kfree(cb_work);
++err1:
++ kfree(data);
++ return;
++}
++
++int ibp_cmd_sa_path_rec_get(struct ibp_client *ibp_client,
++ struct ibp_msg_header *hdr)
++{
++ struct ib_device *ib_device;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_sa_path_rec_get_cmd *cmd;
++ struct ibp_sa_path_rec_get_resp *resp;
++ struct ib_sa_client *client;
++ struct ib_sa_query *sa_query;
++ struct sa_query_entry *entry;
++ struct path_rec_cb_data *data = NULL;
++ struct ib_sa_path_rec rec;
++ size_t len;
++ int query_id;
++ int ret = 0;
++
++ print_trace("in\n");
++
++ cmd = (struct ibp_sa_path_rec_get_cmd *) hdr;
++ ib_device = (struct ib_device *) cmd->device;
++ client = (struct ib_sa_client *) cmd->ibp_client;
++ msg = (struct ibp_verb_response_msg *) ibp_client->tx_buf;
++ len = sizeof(*msg);
++
++ entry = add_query_list(ibp_client);
++ if (IS_ERR(entry)) {
++ ret = PTR_ERR(entry);
++ goto send_resp;
++ }
++
++ data = kzalloc(sizeof(*data), GFP_KERNEL);
++ if (!data) {
++ free_query_list(entry);
++ print_err("kzalloc failed\n");
++ ret = -ENOMEM;
++ goto send_resp;
++ }
++
++ data->entry = entry;
++ data->ibp_entry = cmd->entry;
++ data->ibp_query = cmd->query;
++
++ rec.service_id = cmd->rec.service_id;
++ rec.dgid.global.interface_id
++ = cmd->rec.dgid_id;
++ rec.dgid.global.subnet_prefix
++ = cmd->rec.dgid_prefix;
++ rec.sgid.global.interface_id
++ = cmd->rec.sgid_id;
++ rec.sgid.global.subnet_prefix
++ = cmd->rec.sgid_prefix;
++ rec.dlid = cmd->rec.dlid;
++ rec.slid = cmd->rec.slid;
++ rec.raw_traffic = cmd->rec.raw_traffic;
++ rec.flow_label = cmd->rec.flow_label;
++ rec.hop_limit = cmd->rec.hop_limit;
++ rec.traffic_class = cmd->rec.traffic_class;
++ rec.reversible = cmd->rec.reversible;
++ rec.numb_path = cmd->rec.numb_path;
++ rec.pkey = cmd->rec.pkey;
++ rec.qos_class = cmd->rec.qos_class;
++ rec.sl = cmd->rec.sl;
++ rec.mtu_selector = cmd->rec.mtu_selector;
++ rec.mtu = cmd->rec.mtu;
++ rec.rate_selector = cmd->rec.rate_selector;
++ rec.rate = cmd->rec.rate;
++ rec.packet_life_time_selector
++ = cmd->rec.packet_life_time_selector;
++ rec.packet_life_time = cmd->rec.packet_life_time;
++ rec.preference = cmd->rec.preference;
++
++ mutex_init(&data->lock);
++ mutex_lock(&data->lock);
++
++ query_id = ib_sa_path_rec_get(client, ib_device, cmd->port_num, &rec,
++ cmd->comp_mask, cmd->timeout_ms,
++ GFP_KERNEL, path_rec_get_callback, data,
++ &sa_query);
++ if (query_id < 0) {
++ ret = query_id;
++ print_err("ib_sa_path_rec_get returned %d\n", ret);
++ free_query_list(entry);
++ mutex_unlock(&data->lock);
++ kfree(data);
++ data = NULL;
++ goto send_resp;
++ }
++ entry->query = sa_query;
++ entry->sa_client = client;
++ entry->id = query_id;
++
++ len += sizeof(*resp);
++ resp = (struct ibp_sa_path_rec_get_resp *) msg->data;
++ resp->query_id = query_id;
++ resp->sa_query = (u64)sa_query;
++
++send_resp:
++ IBP_INIT_RESP(msg, len, RESPONSE, hdr->request, ret);
++
++ ret = ibp_send(ibp_client->ep, msg, len);
++
++ if (data) {
++ data->ret = ret;
++ mutex_unlock(&data->lock);
++ }
++
++ if (ret)
++ print_err("ibp_send returned %d\n", ret);
++
++ return ret;
++}
++
++static int sa_join_callback(int status, struct ib_sa_multicast *multicast)
++{
++ struct join_mcast_cb_data *data;
++ struct ibp_client *client;
++ struct ib_sa_client *ib_client;
++ struct callback_work *cb_work;
++ struct cb_header *header;
++ struct mc_join_data *mc_join;
++ struct ib_sa_mcmember_rec *ib_rec;
++ struct ibp_sa_mcmember_rec *ibp_rec;
++ int ret = 0;
++
++ print_trace("in\n");
++
++ data = (struct join_mcast_cb_data *) multicast->context;
++
++ if (status == -ENETRESET)
++ goto err1;
++
++ cb_work = kzalloc(sizeof(struct callback_work), GFP_KERNEL);
++ if (!cb_work) {
++ print_err("kzalloc failed\n");
++ ret = -ENOMEM;
++ goto err1;
++ }
++
++ client = data->client;
++
++ ib_client = find_ibp_client(client);
++ if (!ib_client) {
++ print_err("Could not find client for event handler\n");
++ ret = -EINVAL;
++ goto err2;
++ }
++
++ cb_work->data = (struct generic_cb_data *) data;
++ cb_work->client = client;
++ cb_work->length = sizeof(*header) + sizeof(*mc_join);
++
++ header = &cb_work->msg.header;
++ header->cb_type = JOIN_MCAST_CB;
++ header->status = status;
++ header->ibp_client = (u64) ib_client;
++
++ mc_join = &cb_work->msg.u.mc_join;
++ mc_join->ibp_mcast = (u64) multicast;
++ mc_join->mcentry = data->mcentry;
++
++ if (status) {
++ print_dbg("callback status %d\n", status);
++ goto queue_work;
++ }
++
++ ib_rec = &multicast->rec;
++ ibp_rec = &mc_join->rec;
++
++ ibp_rec->mgid_prefix = ib_rec->mgid.global.subnet_prefix;
++ ibp_rec->mgid_id = ib_rec->mgid.global.interface_id;
++ ibp_rec->port_gid_prefix = ib_rec->port_gid.global.subnet_prefix;
++ ibp_rec->port_gid_id = ib_rec->port_gid.global.interface_id;
++ ibp_rec->qkey = ib_rec->qkey;
++ ibp_rec->mlid = ib_rec->mlid;
++ ibp_rec->mtu_selector = ib_rec->mtu_selector;
++ ibp_rec->mtu = ib_rec->mtu;
++ ibp_rec->traffic_class = ib_rec->traffic_class;
++ ibp_rec->pkey = ib_rec->pkey;
++ ibp_rec->rate_selector = ib_rec->rate_selector;
++ ibp_rec->rate = ib_rec->rate;
++ ibp_rec->packet_life_time_selector
++ = ib_rec->packet_life_time_selector;
++ ibp_rec->packet_life_time = ib_rec->packet_life_time;
++ ibp_rec->sl = ib_rec->sl;
++ ibp_rec->flow_label = ib_rec->flow_label;
++ ibp_rec->hop_limit = ib_rec->hop_limit;
++ ibp_rec->join_state = ib_rec->join_state;
++ ibp_rec->proxy_join = ib_rec->proxy_join;
++
++queue_work:
++ INIT_WORK(&cb_work->work, ibp_send_callback);
++ queue_work(client->workqueue, &cb_work->work);
++ return 0;
++err2:
++ kfree(cb_work);
++err1:
++ return ret;
++}
++
++int ibp_cmd_sa_join_multicast(struct ibp_client *ibp_client,
++ struct ibp_msg_header *hdr)
++{
++ struct ib_device *ib_device;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_sa_join_multicast_cmd *cmd;
++ struct ibp_sa_join_multicast_resp *resp;
++ struct ib_sa_client *client;
++ struct ib_sa_multicast *multicast;
++ struct join_mcast_cb_data *data;
++ size_t len;
++ int ret = 0;
++
++ print_trace("in\n");
++
++ cmd = (struct ibp_sa_join_multicast_cmd *) hdr;
++ ib_device = (struct ib_device *) cmd->device;
++ client = (struct ib_sa_client *) cmd->ibp_client;
++ msg = (struct ibp_verb_response_msg *) ibp_client->tx_buf;
++ len = sizeof(*msg);
++
++ data = kzalloc(sizeof(*data), GFP_KERNEL);
++ if (!data) {
++ ret = -ENOMEM;
++ goto send_resp;
++ }
++
++ data->client = ibp_client;
++ data->mcentry = cmd->mcentry;
++
++ mutex_init(&data->lock);
++ mutex_lock(&data->lock);
++
++ down_write(&list_rwsem);
++ list_add(&data->list, &mcast_list);
++ up_write(&list_rwsem);
++
++ multicast = ib_sa_join_multicast(client, ib_device,
++ cmd->port_num, &cmd->rec,
++ cmd->comp_mask, GFP_KERNEL,
++ sa_join_callback, data);
++
++ if (IS_ERR(multicast)) {
++ ret = PTR_ERR(multicast);
++ print_err("ib_sa_join_multicast returned %d\n", ret);
++ mutex_unlock(&data->lock);
++ down_write(&list_rwsem);
++ list_del(&data->list);
++ up_write(&list_rwsem);
++ kfree(data);
++ data = NULL;
++ goto send_resp;
++ }
++ data->mcast = multicast;
++
++ len += sizeof(*resp);
++ resp = (struct ibp_sa_join_multicast_resp *) msg->data;
++
++ resp->ibp_mcast = (u64) multicast;
++
++send_resp:
++ IBP_INIT_RESP(msg, len, RESPONSE, hdr->request, ret);
++
++ ret = ibp_send(ibp_client->ep, msg, len);
++
++ if (data) {
++ data->ret = ret;
++ mutex_unlock(&data->lock);
++ }
++
++ if (ret)
++ print_err("ibp_send returned %d\n", ret);
++
++ return ret;
++}
++
++int ibp_cmd_sa_free_multicast(struct ibp_client *ibp_client,
++ struct ibp_msg_header *hdr)
++{
++ struct ibp_verb_response_msg *msg;
++ struct ibp_sa_free_multicast_cmd *cmd;
++ struct ib_sa_multicast *multicast;
++ struct join_mcast_cb_data *data;
++ size_t len;
++ int ret = 0;
++
++ print_trace("in\n");
++
++ cmd = (struct ibp_sa_free_multicast_cmd *) hdr;
++ multicast = (struct ib_sa_multicast *) cmd->ibp_mcast;
++ data = (struct join_mcast_cb_data *) multicast->context;
++ msg = (struct ibp_verb_response_msg *) ibp_client->tx_buf;
++ len = sizeof(*msg);
++
++ ib_sa_free_multicast(multicast);
++
++ down_write(&list_rwsem);
++ list_del(&data->list);
++ up_write(&list_rwsem);
++
++ kfree(data);
++
++ IBP_INIT_RESP(msg, len, RESPONSE, hdr->request, ret);
++
++ ret = ibp_send(ibp_client->ep, msg, len);
++ if (ret)
++ print_err("ibp_send returned %d\n", ret);
++
++ return ret;
++}
++
++int ibp_cmd_sa_get_mcmember_rec(struct ibp_client *ibp_client,
++ struct ibp_msg_header *hdr)
++{
++ struct ib_device *ib_device;
++ struct ibp_verb_response_msg *msg;
++ struct ibp_sa_get_mcmember_rec_cmd *cmd;
++ struct ibp_sa_get_mcmember_rec_resp *resp;
++ struct ib_sa_mcmember_rec rec;
++ union ib_gid mgid;
++ size_t len;
++ int ret;
++
++ print_trace("in\n");
++
++ cmd = (struct ibp_sa_get_mcmember_rec_cmd *) hdr;
++ ib_device = (struct ib_device *) cmd->device;
++ msg = (struct ibp_verb_response_msg *) ibp_client->tx_buf;
++ len = sizeof(*msg);
++
++ mgid.global.subnet_prefix = cmd->subnet_prefix;
++ mgid.global.interface_id = cmd->interface_id;
++
++ ret = ib_sa_get_mcmember_rec(ib_device, cmd->port_num, &mgid, &rec);
++ if (ret) {
++ print_err("ib_sa_get_mcmember_rec returned %d\n", ret);
++ goto send_resp;
++ }
++
++ len += sizeof(*resp);
++ resp = (struct ibp_sa_get_mcmember_rec_resp *) msg->data;
++
++ resp->rec.mgid.global.subnet_prefix
++ = rec.mgid.global.subnet_prefix;
++ resp->rec.mgid.global.interface_id
++ = rec.mgid.global.interface_id;
++ resp->rec.port_gid.global.subnet_prefix
++ = rec.port_gid.global.subnet_prefix;
++ resp->rec.port_gid.global.interface_id
++ = rec.port_gid.global.interface_id;
++ resp->rec.qkey = rec.qkey;
++ resp->rec.mlid = rec.mlid;
++ resp->rec.mtu_selector = rec.mtu_selector;
++ resp->rec.mtu = rec.mtu;
++ resp->rec.traffic_class = rec.traffic_class;
++ resp->rec.pkey = rec.pkey;
++ resp->rec.rate_selector = rec.rate_selector;
++ resp->rec.rate = rec.rate;
++ resp->rec.packet_life_time_selector
++ = rec.packet_life_time_selector;
++ resp->rec.packet_life_time
++ = rec.packet_life_time;
++ resp->rec.sl = rec.sl;
++ resp->rec.flow_label = rec.flow_label;
++ resp->rec.hop_limit = rec.hop_limit;
++ resp->rec.scope = rec.scope;
++ resp->rec.join_state = rec.join_state;
++ resp->rec.proxy_join = rec.proxy_join;
++send_resp:
++ IBP_INIT_RESP(msg, len, RESPONSE, hdr->request, ret);
++
++ ret = ibp_send(ibp_client->ep, msg, len);
++ if (ret)
++ print_err("ibp_send returned %d\n", ret);
++
++ return ret;
++}
+diff --git a/drivers/infiniband/ibp/sa/sa_table.h b/drivers/infiniband/ibp/sa/sa_table.h
+new file mode 100644
+index 0000000..b0844cd
+--- /dev/null
++++ b/drivers/infiniband/ibp/sa/sa_table.h
+@@ -0,0 +1,131 @@
++/*"
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#define PATH_REC_FIELD(field) \
++ .struct_offset_bytes = offsetof(struct ib_sa_path_rec, field), \
++ .struct_size_bytes = sizeof((struct ib_sa_path_rec *) 0)->field, \
++ .field_name = "sa_path_rec:" #field
++
++static const struct ib_field path_rec_table[] = {
++ { PATH_REC_FIELD(service_id),
++ .offset_words = 0,
++ .offset_bits = 0,
++ .size_bits = 64 },
++ { PATH_REC_FIELD(dgid),
++ .offset_words = 2,
++ .offset_bits = 0,
++ .size_bits = 128 },
++ { PATH_REC_FIELD(sgid),
++ .offset_words = 6,
++ .offset_bits = 0,
++ .size_bits = 128 },
++ { PATH_REC_FIELD(dlid),
++ .offset_words = 10,
++ .offset_bits = 0,
++ .size_bits = 16 },
++ { PATH_REC_FIELD(slid),
++ .offset_words = 10,
++ .offset_bits = 16,
++ .size_bits = 16 },
++ { PATH_REC_FIELD(raw_traffic),
++ .offset_words = 11,
++ .offset_bits = 0,
++ .size_bits = 1 },
++ { RESERVED,
++ .offset_words = 11,
++ .offset_bits = 1,
++ .size_bits = 3 },
++ { PATH_REC_FIELD(flow_label),
++ .offset_words = 11,
++ .offset_bits = 4,
++ .size_bits = 20 },
++ { PATH_REC_FIELD(hop_limit),
++ .offset_words = 11,
++ .offset_bits = 24,
++ .size_bits = 8 },
++ { PATH_REC_FIELD(traffic_class),
++ .offset_words = 12,
++ .offset_bits = 0,
++ .size_bits = 8 },
++ { PATH_REC_FIELD(reversible),
++ .offset_words = 12,
++ .offset_bits = 8,
++ .size_bits = 1 },
++ { PATH_REC_FIELD(numb_path),
++ .offset_words = 12,
++ .offset_bits = 9,
++ .size_bits = 7 },
++ { PATH_REC_FIELD(pkey),
++ .offset_words = 12,
++ .offset_bits = 16,
++ .size_bits = 16 },
++ { PATH_REC_FIELD(qos_class),
++ .offset_words = 13,
++ .offset_bits = 0,
++ .size_bits = 12 },
++ { PATH_REC_FIELD(sl),
++ .offset_words = 13,
++ .offset_bits = 12,
++ .size_bits = 4 },
++ { PATH_REC_FIELD(mtu_selector),
++ .offset_words = 13,
++ .offset_bits = 16,
++ .size_bits = 2 },
++ { PATH_REC_FIELD(mtu),
++ .offset_words = 13,
++ .offset_bits = 18,
++ .size_bits = 6 },
++ { PATH_REC_FIELD(rate_selector),
++ .offset_words = 13,
++ .offset_bits = 24,
++ .size_bits = 2 },
++ { PATH_REC_FIELD(rate),
++ .offset_words = 13,
++ .offset_bits = 26,
++ .size_bits = 6 },
++ { PATH_REC_FIELD(packet_life_time_selector),
++ .offset_words = 14,
++ .offset_bits = 0,
++ .size_bits = 2 },
++ { PATH_REC_FIELD(packet_life_time),
++ .offset_words = 14,
++ .offset_bits = 2,
++ .size_bits = 6 },
++ { PATH_REC_FIELD(preference),
++ .offset_words = 14,
++ .offset_bits = 8,
++ .size_bits = 8 },
++ { RESERVED,
++ .offset_words = 14,
++ .offset_bits = 16,
++ .size_bits = 48 },
++};
+diff --git a/drivers/infiniband/ibp/sa/server.c b/drivers/infiniband/ibp/sa/server.c
+new file mode 100644
+index 0000000..ad9a4b1
+--- /dev/null
++++ b/drivers/infiniband/ibp/sa/server.c
+@@ -0,0 +1,218 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include "server.h"
++
++MODULE_AUTHOR("Jerrie Coffman");
++MODULE_AUTHOR("Phil Cayton");
++MODULE_AUTHOR("Jay Sternberg");
++MODULE_LICENSE("Dual BSD/GPL");
++MODULE_DESCRIPTION(DRV_DESC);
++MODULE_VERSION(DRV_VERSION);
++
++MODULE_PARAM(port, port, int, IBP_SA_PORT, "Connection port");
++MODULE_PARAM(backlog, backlog, int, 8, "Connection backlog");
++MODULE_PARAM(timeout, timeout, int, 1000, "Listen/Poll time in milliseconds");
++
++#ifdef IBP_DEBUG
++MODULE_PARAM(debug_level, debug_level, int, 0, "Debug: 0-none, 1-some, 2-all");
++#endif
++
++struct rw_semaphore list_rwsem;
++LIST_HEAD(client_list);
++
++static struct task_struct *listen_thread;
++
++static struct ibp_client *ibp_create_client(scif_epd_t ep, uint16_t node)
++{
++ struct ibp_client *client;
++ int ret = -ENOMEM;
++
++ client = kzalloc(sizeof(*client), GFP_KERNEL);
++ if (!client) {
++ print_err("kzalloc failed\n");
++ return ERR_PTR(ret);
++ }
++
++ client->ep = ep;
++
++ client->rx_buf = (void *)__get_free_page(GFP_KERNEL);
++ if (!client->rx_buf) {
++ print_err("__get_free_page rx_buf failed\n");
++ goto err0;
++ }
++
++ client->tx_buf = (void *)__get_free_page(GFP_KERNEL);
++ if (!client->tx_buf) {
++ print_err("__get_free_page tx_buf failed\n");
++ goto err1;
++ }
++
++ client->workqueue = create_singlethread_workqueue(DRV_NAME);
++ if (!client->workqueue) {
++ print_err("create_singlethread_workqueue failed\n");
++ goto err2;
++ }
++
++ down_write(&list_rwsem);
++ list_add(&client->list, &client_list);
++ up_write(&list_rwsem);
++
++ client->ibp_sa_client_thread = kthread_run(ibp_process_recvs,
++ client, DRV_NAME);
++ if (!client->ibp_sa_client_thread) {
++ print_err("create client thread failed\n");
++ goto err3;
++ }
++
++ return client;
++err3:
++ down_write(&list_rwsem);
++ list_del(&client->list);
++ up_write(&list_rwsem);
++
++ destroy_workqueue(client->workqueue);
++err2:
++ free_page((uintptr_t)client->tx_buf);
++err1:
++ free_page((uintptr_t)client->rx_buf);
++err0:
++ kfree(client);
++ return ERR_PTR(ret);
++}
++
++static int ibp_sa_listen(void *data)
++{
++ struct ibp_client *client;
++ struct scif_pollepd listen;
++ struct scif_port_id peer;
++ scif_epd_t ep;
++ int ret;
++
++ listen.epd = scif_open();
++ if (IS_NULL_OR_ERR(listen.epd)) {
++ print_err("scif_open failed\n");
++ ret = -EIO;
++ goto err0;
++ }
++ listen.events = POLLIN;
++
++ ret = scif_bind(listen.epd, port);
++ if (ret < 0) {
++ print_err("scif_bind returned %d\n", ret);
++ goto err1;
++ }
++
++ ret = scif_listen(listen.epd, backlog);
++ if (ret) {
++ print_err("scif_listen returned %d\n", ret);
++ goto err1;
++ }
++
++ while (!kthread_should_stop()) {
++
++ schedule();
++
++ ret = scif_poll(&listen, 1, timeout);
++ if (ret == 0) /* timeout */
++ continue;
++ if (ret < 0) {
++ print_err("scif_poll revents 0x%x\n", listen.revents);
++ continue;
++ }
++
++ ret = scif_accept(listen.epd, &peer, &ep, 0);
++ if (ret) {
++ print_err("scif_accept returned %d\n", ret);
++ continue;
++ }
++
++ print_dbg("accepted node %d port %d\n", peer.node, peer.port);
++
++ client = ibp_create_client(ep, peer.node);
++ if (IS_ERR(client)) {
++ ret = PTR_ERR(client);
++ print_err("ibp_create_client returned %d\n", ret);
++ scif_close(ep);
++ }
++ }
++err1:
++ scif_close(listen.epd);
++err0:
++ return ret;
++}
++
++static int __init ibp_sa_server_init(void)
++{
++ int ret = 0;
++
++ print_info(DRV_SIGNON);
++
++ init_rwsem(&list_rwsem);
++
++ /* Start a thread for inbound connections. */
++ listen_thread = kthread_run(ibp_sa_listen, NULL, DRV_NAME);
++ if (IS_NULL_OR_ERR(listen_thread)) {
++ ret = PTR_ERR(listen_thread);
++ print_err("kthread_run returned %d\n", ret);
++ }
++
++ return ret;
++}
++
++static void __exit ibp_sa_server_exit(void)
++{
++ struct ibp_client *client, *next;
++ struct completion done;
++
++ kthread_stop(listen_thread);
++
++ down_write(&list_rwsem);
++ list_for_each_entry_safe(client, next, &client_list, list) {
++ init_completion(&done);
++ client->done = &done;
++
++ /* Close scif ep to unblock the client thread scif_recv */
++ scif_close(client->ep);
++
++ up_write(&list_rwsem);
++ /* Wait for client thread to finish */
++ wait_for_completion(&done);
++ down_write(&list_rwsem);
++ }
++ up_write(&list_rwsem);
++
++ print_info(DRV_DESC " unloaded\n");
++}
++
++module_init(ibp_sa_server_init);
++module_exit(ibp_sa_server_exit);
+diff --git a/drivers/infiniband/ibp/sa/server.h b/drivers/infiniband/ibp/sa/server.h
+new file mode 100644
+index 0000000..be088e6
+--- /dev/null
++++ b/drivers/infiniband/ibp/sa/server.h
+@@ -0,0 +1,173 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#ifndef SERVER_H
++#define SERVER_H
++
++#include <linux/fs.h>
++#include <linux/cdev.h>
++#include <linux/anon_inodes.h>
++#include <rdma/ib_umem.h>
++#include <rdma/ib_cache.h>
++#include "ibp-abi.h"
++#include "sa_ibp_abi.h"
++#include "common.h"
++
++#define DRV_ROLE "SA Server"
++#define DRV_NAME "ibp_sa_server"
++#include "compat.h"
++
++extern int timeout;
++extern struct rw_semaphore list_rwsem;
++extern struct list_head client_list;
++extern struct list_head sa_entry_list;
++extern struct list_head query_list;
++extern struct list_head mcast_list;
++
++struct ib_sa_sm_ah {
++ struct ib_ah *ah;
++ struct kref ref;
++ u16 pkey_index;
++ u8 src_path_mask;
++};
++
++struct ib_sa_port {
++ struct ib_mad_agent *agent;
++ struct ib_mad_agent *notice_agent;
++ struct ib_sa_sm_ah *sm_ah;
++ struct work_struct update_task;
++ spinlock_t ah_lock;
++ u8 port_num;
++ struct ib_device *device;
++};
++
++struct ib_sa_device {
++ int start_port, end_port;
++ struct ib_event_handler event_handler;
++ struct ib_sa_port port[0];
++};
++
++struct ibp_client {
++ struct list_head list;
++ scif_epd_t ep;
++ void *rx_buf;
++ void *tx_buf;
++ struct completion *done;
++ struct workqueue_struct *workqueue;
++ struct task_struct *ibp_sa_client_thread;
++};
++
++struct sa_entry {
++ struct list_head list;
++ struct ib_sa_client ib_client;
++ struct ibp_client *client;
++};
++
++struct sa_query_entry {
++ struct list_head list;
++ int id;
++ struct ibp_client *ibp_client;
++ struct ib_sa_client *sa_client;
++ struct ib_sa_query *query;
++};
++
++struct path_rec_cb_data {
++ struct mutex lock;
++ int ret;
++ struct sa_query_entry *entry;
++ u64 ibp_entry;
++ u64 ibp_query;
++};
++
++struct join_mcast_cb_data {
++ struct mutex lock;
++ int ret;
++ struct ibp_client *client;
++ struct ib_sa_multicast *mcast;
++ struct list_head list;
++ u64 entry;
++ u64 mcentry;
++};
++
++struct generic_cb_data {
++ struct mutex lock;
++ int ret;
++};
++
++struct callback_work {
++ struct work_struct work;
++ struct ibp_client *client;
++ struct generic_cb_data *data;
++ int length;
++ struct callback_msg msg;
++};
++
++#define IBP_INIT_MSG(msg, size, op) \
++ do { \
++ (msg)->header.opcode = IBP_##op; \
++ (msg)->header.length = (size); \
++ (msg)->header.status = 0; \
++ (msg)->header.reserved = 0; \
++ (msg)->header.request = 0; \
++ } while (0)
++
++#define IBP_INIT_RESP(resp, size, op, req, stat) \
++ do { \
++ (resp)->header.opcode = IBP_##op; \
++ (resp)->header.length = (size); \
++ (resp)->header.status = (stat); \
++ (resp)->header.reserved = 0; \
++ (resp)->header.request = (req); \
++ } while (0)
++
++int ibp_process_recvs(void *p);
++
++int ibp_cmd_sa_path_rec_get(struct ibp_client *client,
++ struct ibp_msg_header *hdr);
++int ibp_cmd_sa_register_client(struct ibp_client *client,
++ struct ibp_msg_header *hdr);
++int ibp_cmd_sa_unregister_client(struct ibp_client *client,
++ struct ibp_msg_header *hdr);
++int ibp_cmd_sa_cancel_query(struct ibp_client *client,
++ struct ibp_msg_header *hdr);
++int ibp_cmd_init_ah_from_path(struct ibp_client *client,
++ struct ibp_msg_header *hdr);
++int ibp_cmd_init_ah_from_mcmember(struct ibp_client *client,
++ struct ibp_msg_header *hdr);
++int ibp_cmd_sa_join_multicast(struct ibp_client *client,
++ struct ibp_msg_header *hdr);
++int ibp_cmd_sa_free_multicast(struct ibp_client *client,
++ struct ibp_msg_header *hdr);
++int ibp_cmd_sa_get_mcmember_rec(struct ibp_client *client,
++ struct ibp_msg_header *hdr);
++
++#endif /* SERVER_H */
+diff --git a/drivers/infiniband/ibp/sa/server_msg.c b/drivers/infiniband/ibp/sa/server_msg.c
+new file mode 100644
+index 0000000..2d396d8
+--- /dev/null
++++ b/drivers/infiniband/ibp/sa/server_msg.c
+@@ -0,0 +1,185 @@
++/*
++ * Copyright (c) 2011-2013 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the GNU
++ * General Public License (GPL) Version 2, available from the file
++ * COPYING in the main directory of this source tree, or the
++ * OpenIB.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#include <linux/delay.h>
++
++#include "server.h"
++#include "sa_ibp_abi.h"
++
++int ibp_send(scif_epd_t ep, void *buf, size_t len)
++{
++ int ret;
++
++ while (len) {
++ ret = scif_send(ep, buf, (uint32_t)len, SCIF_SEND_BLOCK);
++ if (ret < 0) {
++ print_dbg("scif_send returned %d\n", ret);
++ return ret;
++ }
++ buf += ret;
++ len -= ret;
++ }
++
++ return 0;
++}
++
++int ibp_recv(scif_epd_t ep, void *buf, size_t len)
++{
++ int ret;
++
++ while (len) {
++ ret = scif_recv(ep, buf, (uint32_t)len, SCIF_RECV_BLOCK);
++ if (ret < 0) {
++ print_dbg("scif_recv returned %d\n", ret);
++ return ret;
++ }
++ buf += ret;
++ len -= ret;
++ }
++
++ return 0;
++}
++
++static int
++ibp_cmd_bad_request(struct ibp_client *client, struct ibp_msg_header *hdr)
++{
++ struct ibp_verb_response_msg *msg;
++ size_t len;
++ int status = -EBADRQC;
++
++ msg = (struct ibp_verb_response_msg *) client->tx_buf;
++ len = sizeof(*msg);
++
++ print_dbg("opcode 0x%x\n", hdr->opcode);
++
++ IBP_INIT_RESP(msg, len, RESPONSE, hdr->request, status);
++ return ibp_send(client->ep, msg, len);
++}
++
++static void
++ibp_sa_destroy_client(struct ibp_client *client)
++{
++ struct join_mcast_cb_data *mcast, *next_mcast;
++ struct sa_query_entry *query, *next_query;
++ struct sa_entry *sa, *next_sa;
++
++ down_write(&list_rwsem);
++ list_del(&client->list);
++ list_for_each_entry_safe(mcast, next_mcast, &mcast_list, list)
++ if (mcast->client == client) {
++ ib_sa_free_multicast(mcast->mcast);
++ list_del(&mcast->list);
++ kfree(mcast);
++ }
++ list_for_each_entry_safe(query, next_query, &query_list, list)
++ if (query->ibp_client == client) {
++ ib_sa_cancel_query(query->id, query->query);
++ list_del(&query->list);
++ kfree(query);
++ }
++ list_for_each_entry_safe(sa, next_sa, &sa_entry_list, list)
++ if (sa->client == client) {
++ ib_sa_unregister_client(&sa->ib_client);
++ list_del(&sa->list);
++ kfree(sa);
++ }
++ up_write(&list_rwsem);
++
++ destroy_workqueue(client->workqueue);
++
++ free_page((uintptr_t)client->tx_buf);
++ free_page((uintptr_t)client->rx_buf);
++
++ if (client->done)
++ complete(client->done);
++ else
++ scif_close(client->ep);
++
++ kfree(client);
++}
++
++static int
++(*ibp_msg_table[])(struct ibp_client *c, struct ibp_msg_header *h) = {
++ [IBP_SA_PATH_REC_GET] = ibp_cmd_sa_path_rec_get,
++ [IBP_SA_REGISTER_CLIENT] = ibp_cmd_sa_register_client,
++ [IBP_SA_UNREGISTER_CLIENT] = ibp_cmd_sa_unregister_client,
++ [IBP_SA_CANCEL_QUERY] = ibp_cmd_sa_cancel_query,
++ [IBP_INIT_AH_FROM_PATH] = ibp_cmd_init_ah_from_path,
++ [IBP_INIT_AH_FROM_MCMEMBER] = ibp_cmd_init_ah_from_mcmember,
++ [IBP_SA_JOIN_MCAST] = ibp_cmd_sa_join_multicast,
++ [IBP_SA_FREE_MCAST] = ibp_cmd_sa_free_multicast,
++ [IBP_SA_GET_MCMEMBER_REC] = ibp_cmd_sa_get_mcmember_rec,
++};
++
++int ibp_process_recvs(void *p)
++{
++ struct ibp_client *client;
++ struct ibp_msg_header *hdr;
++ int ret;
++
++ client = (struct ibp_client *) p;
++ hdr = (struct ibp_msg_header *) client->rx_buf;
++
++ for (;;) {
++ ret = ibp_recv(client->ep, hdr, sizeof(*hdr));
++ if (ret)
++ break;
++
++ if (hdr->length > MAX_MSG_SIZE) {
++ print_err("message too large, len %u max %lu\n",
++ hdr->length, MAX_MSG_SIZE);
++ ret = -EMSGSIZE;
++ break;
++ }
++
++ if (hdr->length > sizeof(*hdr)) {
++ ret = ibp_recv(client->ep, hdr->data,
++ hdr->length - sizeof(*hdr));
++ if (ret)
++ break;
++ }
++
++ if ((hdr->opcode >= ARRAY_SIZE(ibp_msg_table)) ||
++ !ibp_msg_table[hdr->opcode]) {
++ ibp_cmd_bad_request(client, hdr);
++ continue;
++ }
++
++ ret = ibp_msg_table[hdr->opcode](client, hdr);
++ if (ret)
++ break;
++ }
++
++ ibp_sa_destroy_client(client);
++
++ return ret;
++}
+--
+2.7.0
+
--- /dev/null
+From c25781519bc040e9c3b298c6f43144c74a329559 Mon Sep 17 00:00:00 2001
+From: Jerrie Coffman <jerrie.l.coffman@intel.com>
+Date: Thu, 1 Sep 2016 14:39:08 -0700
+Subject: [PATCH 6/7] Add ibscif driver
+
+---
+ drivers/infiniband/Kconfig | 1 +
+ drivers/infiniband/hw/Makefile | 1 +
+ drivers/infiniband/hw/scif/Kconfig | 4 +
+ drivers/infiniband/hw/scif/Makefile | 24 +
+ drivers/infiniband/hw/scif/ibscif_ah.c | 50 +
+ drivers/infiniband/hw/scif/ibscif_cm.c | 515 +++++
+ drivers/infiniband/hw/scif/ibscif_cq.c | 308 +++
+ drivers/infiniband/hw/scif/ibscif_driver.h | 774 +++++++
+ drivers/infiniband/hw/scif/ibscif_loopback.c | 582 +++++
+ drivers/infiniband/hw/scif/ibscif_main.c | 332 +++
+ drivers/infiniband/hw/scif/ibscif_mr.c | 486 +++++
+ drivers/infiniband/hw/scif/ibscif_pd.c | 56 +
+ drivers/infiniband/hw/scif/ibscif_post.c | 320 +++
+ drivers/infiniband/hw/scif/ibscif_procfs.c | 135 ++
+ drivers/infiniband/hw/scif/ibscif_protocol.c | 2810 +++++++++++++++++++++++++
+ drivers/infiniband/hw/scif/ibscif_protocol.h | 395 ++++
+ drivers/infiniband/hw/scif/ibscif_provider.c | 424 ++++
+ drivers/infiniband/hw/scif/ibscif_qp.c | 872 ++++++++
+ drivers/infiniband/hw/scif/ibscif_scheduler.c | 195 ++
+ drivers/infiniband/hw/scif/ibscif_util.c | 629 ++++++
+ 20 files changed, 8913 insertions(+)
+ create mode 100644 drivers/infiniband/hw/scif/Kconfig
+ create mode 100644 drivers/infiniband/hw/scif/Makefile
+ create mode 100644 drivers/infiniband/hw/scif/ibscif_ah.c
+ create mode 100644 drivers/infiniband/hw/scif/ibscif_cm.c
+ create mode 100644 drivers/infiniband/hw/scif/ibscif_cq.c
+ create mode 100644 drivers/infiniband/hw/scif/ibscif_driver.h
+ create mode 100644 drivers/infiniband/hw/scif/ibscif_loopback.c
+ create mode 100644 drivers/infiniband/hw/scif/ibscif_main.c
+ create mode 100644 drivers/infiniband/hw/scif/ibscif_mr.c
+ create mode 100644 drivers/infiniband/hw/scif/ibscif_pd.c
+ create mode 100644 drivers/infiniband/hw/scif/ibscif_post.c
+ create mode 100644 drivers/infiniband/hw/scif/ibscif_procfs.c
+ create mode 100644 drivers/infiniband/hw/scif/ibscif_protocol.c
+ create mode 100644 drivers/infiniband/hw/scif/ibscif_protocol.h
+ create mode 100644 drivers/infiniband/hw/scif/ibscif_provider.c
+ create mode 100644 drivers/infiniband/hw/scif/ibscif_qp.c
+ create mode 100644 drivers/infiniband/hw/scif/ibscif_scheduler.c
+ create mode 100644 drivers/infiniband/hw/scif/ibscif_util.c
+
+diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
+index c466f25..bbbc55e 100644
+--- a/drivers/infiniband/Kconfig
++++ b/drivers/infiniband/Kconfig
+@@ -74,6 +74,7 @@ source "drivers/infiniband/hw/mlx5/Kconfig"
+ source "drivers/infiniband/hw/nes/Kconfig"
+ source "drivers/infiniband/hw/ocrdma/Kconfig"
+ source "drivers/infiniband/hw/usnic/Kconfig"
++source "drivers/infiniband/hw/scif/Kconfig"
+
+ source "drivers/infiniband/ibp/Kconfig"
+
+diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
+index c0c7cf8..bcdaa3a 100644
+--- a/drivers/infiniband/hw/Makefile
++++ b/drivers/infiniband/hw/Makefile
+@@ -9,3 +9,4 @@ obj-$(CONFIG_INFINIBAND_NES) += nes/
+ obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/
+ obj-$(CONFIG_INFINIBAND_USNIC) += usnic/
+ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/
++obj-$(CONFIG_INFINIBAND_SCIF) += scif/
+diff --git a/drivers/infiniband/hw/scif/Kconfig b/drivers/infiniband/hw/scif/Kconfig
+new file mode 100644
+index 0000000..cda125f
+--- /dev/null
++++ b/drivers/infiniband/hw/scif/Kconfig
+@@ -0,0 +1,4 @@
++config INFINIBAND_SCIF
++ tristate "SCIF RDMA driver support"
++ ---help---
++ RDMA over SCIF driver.
+diff --git a/drivers/infiniband/hw/scif/Makefile b/drivers/infiniband/hw/scif/Makefile
+new file mode 100644
+index 0000000..036117f
+--- /dev/null
++++ b/drivers/infiniband/hw/scif/Makefile
+@@ -0,0 +1,24 @@
++KDIR := /lib/modules/$(KERNELRELEASE)/
++
++SCIF_INCL := $(KDIR)/source/include/modules/
++HAVE_UPSTREAM := $(shell if grep -q scif_get_node_ids $(SCIF_INCL)/scif.h 2>/dev/null; then echo -n -DHAVE_UPSTREAM_SCIF; fi)
++HAVE_EXTERNAL := $(shell if test -f $(SCIF_INCL)/scif.h; then echo -n -DHAVE_EXTERNAL_SCIF; fi)
++
++subdir-ccflags-y := -I$(SCIF_INCL) $(HAVE_UPSTREAM) $(HAVE_EXTERNAL)
++
++obj-$(CONFIG_INFINIBAND_SCIF) += ibscif.o
++
++ibscif-y := ibscif_main.o \
++ ibscif_ah.o \
++ ibscif_pd.o \
++ ibscif_cq.o \
++ ibscif_qp.o \
++ ibscif_mr.o \
++ ibscif_cm.o \
++ ibscif_post.o \
++ ibscif_procfs.o \
++ ibscif_loopback.o \
++ ibscif_provider.o \
++ ibscif_protocol.o \
++ ibscif_scheduler.o \
++ ibscif_util.o
+diff --git a/drivers/infiniband/hw/scif/ibscif_ah.c b/drivers/infiniband/hw/scif/ibscif_ah.c
+new file mode 100644
+index 0000000..cb045b7
+--- /dev/null
++++ b/drivers/infiniband/hw/scif/ibscif_ah.c
+@@ -0,0 +1,50 @@
++/*
++ * Copyright (c) 2008 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include "ibscif_driver.h"
++
++struct ib_ah *ibscif_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
++{
++ struct ibscif_ah *ah;
++
++ ah = kzalloc(sizeof *ah, GFP_KERNEL);
++ if (!ah)
++ return ERR_PTR(-ENOMEM);
++
++ ah->dlid = cpu_to_be16(attr->dlid);
++
++ return &ah->ibah;
++}
++
++int ibscif_destroy_ah(struct ib_ah *ibah)
++{
++ kfree(to_ah(ibah));
++ return 0;
++}
+diff --git a/drivers/infiniband/hw/scif/ibscif_cm.c b/drivers/infiniband/hw/scif/ibscif_cm.c
+new file mode 100644
+index 0000000..5013a5c
+--- /dev/null
++++ b/drivers/infiniband/hw/scif/ibscif_cm.c
+@@ -0,0 +1,515 @@
++/*
++ * Copyright (c) 2008 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include "ibscif_driver.h"
++
++static LIST_HEAD(listen_list);
++DEFINE_SPINLOCK(listen_list_lock);
++
++static int sockaddr_in_to_node_id( struct sockaddr_in addr )
++{
++ u8 *p = (u8 *)&addr.sin_addr.s_addr;
++
++ if (p[0]==192 && p[1]==0 && p[2]==2 && p[3]>=100 && p[3]<100+IBSCIF_MAX_DEVICES)
++ return (int)(p[3]-100);
++
++ else
++ return -EINVAL;
++}
++
++static struct sockaddr_in node_id_to_sockaddr_in( int node_id )
++{
++ struct sockaddr_in addr;
++ u8 *p = (u8 *)&addr.sin_addr.s_addr;
++
++ addr.sin_family = AF_INET;
++ addr.sin_addr.s_addr = 0;
++ addr.sin_port = 0;
++
++ p[0] = 192;
++ p[1] = 0;
++ p[2] = 2;
++ p[3] = 100 + node_id;
++
++ return addr;
++}
++
++void free_cm(struct kref *kref)
++{
++ struct ibscif_cm *cm_ctx;
++ cm_ctx = container_of(kref, struct ibscif_cm, kref);
++ if (cm_ctx->conn)
++ ibscif_put_conn(cm_ctx->conn);
++ kfree(cm_ctx);
++}
++
++static inline void get_cm(struct ibscif_cm *cm_ctx)
++{
++ kref_get(&cm_ctx->kref);
++}
++
++static inline void put_cm(struct ibscif_cm *cm_ctx)
++{
++ kref_put(&cm_ctx->kref, free_cm);
++}
++
++void free_listen(struct kref *kref)
++{
++ struct ibscif_listen *listen;
++ listen = container_of(kref, struct ibscif_listen, kref);
++ kfree(listen);
++}
++
++static inline void get_listen(struct ibscif_listen *listen)
++{
++ kref_get(&listen->kref);
++}
++
++static inline void put_listen(struct ibscif_listen *listen)
++{
++ kref_put(&listen->kref, free_listen);
++}
++
++static int connect_qp(struct ibscif_cm *cm_ctx)
++{
++ struct ibscif_qp *qp;
++ struct ib_qp_attr qp_attr;
++ int qp_attr_mask;
++ int err;
++
++ qp = ibscif_get_qp(cm_ctx->qpn);
++ if (IS_ERR(qp)) {
++ printk(KERN_ERR PFX "%s: invalid QP number: %d\n", __func__, cm_ctx->qpn);
++ return -EINVAL;
++ }
++
++ qp_attr_mask = IB_QP_STATE |
++ IB_QP_AV |
++ IB_QP_DEST_QPN |
++ IB_QP_ACCESS_FLAGS |
++ IB_QP_MAX_QP_RD_ATOMIC |
++ IB_QP_MAX_DEST_RD_ATOMIC;
++
++ qp_attr.ah_attr.ah_flags = 0;
++ qp_attr.ah_attr.dlid = IBSCIF_NODE_ID_TO_LID(cm_ctx->remote_node_id);
++ qp_attr.dest_qp_num = cm_ctx->remote_qpn;
++ qp_attr.qp_state = IB_QPS_RTS;
++ qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE |
++ IB_ACCESS_REMOTE_WRITE |
++ IB_ACCESS_REMOTE_READ |
++ IB_ACCESS_REMOTE_ATOMIC;
++ qp_attr.max_rd_atomic = 16; /* 8-bit value, don't use MAX_OR */
++ qp_attr.max_dest_rd_atomic = 16;/* 8-bit value, don't use MAX_IR */
++
++ err = ib_modify_qp(&qp->ibqp, &qp_attr, qp_attr_mask);
++
++ if (!err) {
++ qp->cm_context = cm_ctx;
++ get_cm(cm_ctx);
++ }
++
++ ibscif_put_qp(qp);
++
++ return err;
++}
++
++static void event_connection_close(struct ibscif_cm *cm_ctx)
++{
++ struct iw_cm_event event;
++
++ memset(&event, 0, sizeof(event));
++ event.event = IW_CM_EVENT_CLOSE;
++ event.status = -ECONNRESET;
++ if (cm_ctx->cm_id) {
++ cm_ctx->cm_id->event_handler(cm_ctx->cm_id, &event);
++ cm_ctx->cm_id->rem_ref(cm_ctx->cm_id);
++ cm_ctx->cm_id = NULL;
++ }
++}
++
++static void event_connection_reply(struct ibscif_cm *cm_ctx, int status)
++{
++ struct iw_cm_event event;
++
++ memset(&event, 0, sizeof(event));
++ event.event = IW_CM_EVENT_CONNECT_REPLY;
++ event.status = status;
++ event.local_addr = *(struct sockaddr_storage *) &cm_ctx->local_addr;
++ event.remote_addr = *(struct sockaddr_storage *) &cm_ctx->remote_addr;
++
++ if ((status == 0) || (status == -ECONNREFUSED)) {
++ event.private_data_len = cm_ctx->plen;
++ event.private_data = cm_ctx->pdata;
++ }
++ if (cm_ctx->cm_id) {
++ cm_ctx->cm_id->event_handler(cm_ctx->cm_id, &event);
++ if (status == -ECONNREFUSED) {
++ cm_ctx->cm_id->rem_ref(cm_ctx->cm_id);
++ cm_ctx->cm_id = NULL;
++ }
++ }
++}
++
++static void event_connection_request(struct ibscif_cm *cm_ctx)
++{
++ struct iw_cm_event event;
++
++ memset(&event, 0, sizeof(event));
++ event.event = IW_CM_EVENT_CONNECT_REQUEST;
++ event.local_addr = *(struct sockaddr_storage *) &cm_ctx->local_addr;
++ event.remote_addr = *(struct sockaddr_storage *) &cm_ctx->remote_addr;
++ event.private_data_len = cm_ctx->plen;
++ event.private_data = cm_ctx->pdata;
++ event.provider_data = cm_ctx;
++ event.ird = 16;
++ event.ord = 16;
++
++ if (cm_ctx->listen) {
++ cm_ctx->listen->cm_id->event_handler( cm_ctx->listen->cm_id, &event);
++ put_listen(cm_ctx->listen);
++ cm_ctx->listen = NULL;
++ }
++}
++
++static void event_connection_established( struct ibscif_cm *cm_ctx )
++{
++ struct iw_cm_event event;
++
++ memset(&event, 0, sizeof(event));
++ event.event = IW_CM_EVENT_ESTABLISHED;
++ event.ird = 16;
++ event.ord = 16;
++ if (cm_ctx->cm_id) {
++ cm_ctx->cm_id->event_handler(cm_ctx->cm_id, &event);
++ }
++}
++
++void ibscif_cm_async_callback(void *cm_context)
++{
++ struct ibscif_cm *cm_ctx = cm_context;
++
++ if (cm_ctx) {
++ event_connection_close(cm_ctx);
++ put_cm(cm_ctx);
++ }
++}
++
++int ibscif_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
++{
++ struct ibscif_cm *cm_ctx;
++ struct sockaddr_in *local_addr = (struct sockaddr_in *) &cm_id->local_addr;
++ struct sockaddr_in *remote_addr = (struct sockaddr_in *) &cm_id->remote_addr;
++ int node_id;
++ int remote_node_id;
++ int err = 0;
++
++ cm_ctx = kzalloc(sizeof *cm_ctx, GFP_KERNEL);
++ if (!cm_ctx) {
++ printk(KERN_ALERT PFX "%s: cannot allocate cm_ctx\n", __func__);
++ return -ENOMEM;
++ }
++
++ kref_init(&cm_ctx->kref); /* refcnt <- 1 */
++ spin_lock_init(&cm_ctx->lock);
++
++ node_id = sockaddr_in_to_node_id(*local_addr);
++ remote_node_id = sockaddr_in_to_node_id(*remote_addr);
++ if (node_id<0 || remote_node_id<0) {
++ printk(KERN_ALERT PFX "%s: invalid address, local_addr=%8x, remote_addr=%8x, node_id=%d, remote_node_id=%d\n",
++ __func__, local_addr->sin_addr.s_addr, remote_addr->sin_addr.s_addr,
++ node_id, remote_node_id);
++ err = -EINVAL;
++ goto out_free;
++ }
++
++ cm_ctx->conn = ibscif_get_conn( node_id, remote_node_id, 0 );
++ if (!cm_ctx->conn) {
++ printk(KERN_ALERT PFX "%s: failed to get connection %d-->%d\n", __func__, node_id, remote_node_id);
++ err = -EINVAL;
++ goto out_free;
++ }
++
++ cm_id->add_ref(cm_id);
++ cm_id->provider_data = cm_ctx;
++
++ cm_ctx->cm_id = cm_id;
++ cm_ctx->node_id = node_id;
++ cm_ctx->remote_node_id = remote_node_id;
++ cm_ctx->local_addr = *local_addr;
++ cm_ctx->remote_addr = *remote_addr;
++ cm_ctx->qpn = conn_param->qpn;
++ cm_ctx->plen = conn_param->private_data_len;
++ if (cm_ctx->plen > IBSCIF_MAX_PDATA_SIZE) {
++ printk(KERN_ALERT PFX "%s: plen (%d) exceeds the limit (%d), truncated.\n",
++ __func__, cm_ctx->plen, IBSCIF_MAX_PDATA_SIZE);
++ cm_ctx->plen = IBSCIF_MAX_PDATA_SIZE;
++ }
++ if (cm_ctx->plen)
++ memcpy(cm_ctx->pdata, conn_param->private_data, cm_ctx->plen);
++
++ err = ibscif_send_cm_req( cm_ctx );
++
++ return err;
++
++out_free:
++ kfree(cm_ctx);
++ return err;
++}
++
++int ibscif_cm_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
++{
++ struct ibscif_cm *cm_ctx = cm_id->provider_data;
++ int err = 0;
++
++ cm_id->add_ref(cm_id);
++ cm_ctx->cm_id = cm_id;
++ cm_ctx->qpn = conn_param->qpn;
++ cm_ctx->plen = conn_param->private_data_len;
++ if (cm_ctx->plen > IBSCIF_MAX_PDATA_SIZE) {
++ printk(KERN_ALERT PFX "%s: plen (%d) exceeds the limit (%d), truncated.\n",
++ __func__, cm_ctx->plen, IBSCIF_MAX_PDATA_SIZE);
++ cm_ctx->plen = IBSCIF_MAX_PDATA_SIZE;
++ }
++ if (cm_ctx->plen)
++ memcpy(cm_ctx->pdata, conn_param->private_data, cm_ctx->plen);
++
++ err = connect_qp( cm_ctx );
++ if (err) {
++ printk(KERN_ALERT PFX "%s: failed to modify QP into connected state\n", __func__);
++ goto err_out;
++ }
++
++ err = ibscif_send_cm_rep( cm_ctx );
++ if (err) {
++ printk(KERN_ALERT PFX "%s: failed to send REP\n", __func__);
++ goto err_out;
++ }
++
++ return 0;
++
++err_out:
++ cm_id->rem_ref(cm_id);
++ cm_ctx->cm_id = NULL;
++ put_cm(cm_ctx);
++ return err;
++}
++
++int ibscif_cm_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
++{
++ struct ibscif_cm *cm_ctx = cm_id->provider_data;
++ int err = 0;
++
++ err = ibscif_send_cm_rej( cm_ctx, pdata, pdata_len );
++
++ put_cm(cm_ctx);
++ return err;
++}
++
++int ibscif_cm_create_listen(struct iw_cm_id *cm_id, int backlog)
++{
++ struct ibscif_listen *listen;
++ struct sockaddr_in *local_addr = (struct sockaddr_in *) &cm_id->local_addr;
++
++ listen = kzalloc(sizeof *listen, GFP_KERNEL);
++ if (!listen) {
++ printk(KERN_ALERT PFX "%s: cannot allocate listen object\n", __func__);
++ return -ENOMEM;
++ }
++
++ kref_init(&listen->kref); /* refcnt <- 1 */
++
++ listen->cm_id = cm_id;
++ listen->port = local_addr->sin_port;
++ cm_id->provider_data = listen;
++ cm_id->add_ref(cm_id);
++
++ spin_lock_bh(&listen_list_lock);
++ list_add(&listen->entry, &listen_list);
++ spin_unlock_bh(&listen_list_lock);
++
++ return 0;
++}
++
++int ibscif_cm_destroy_listen(struct iw_cm_id *cm_id)
++{
++ struct ibscif_listen *listen = cm_id->provider_data;
++
++ spin_lock_bh(&listen_list_lock);
++ list_del(&listen->entry);
++ spin_unlock_bh(&listen_list_lock);
++ cm_id->rem_ref(cm_id);
++ put_listen(listen);
++
++ return 0;
++}
++
++/* similar to ibscif_get_qp(), but differs in:
++ * (1) use the "irqsave" version of the lock functions to avoid the
++ * kernel warnings about "local_bh_enable_ip";
++ * (2) don't hold the reference on success;
++ * (3) return NULL instead of error code on failure.
++ */
++struct ib_qp *ibscif_cm_get_qp(struct ib_device *ibdev, int qpn)
++{
++ struct ibscif_qp *qp;
++ unsigned long flags;
++
++ read_lock_irqsave(&wiremap_lock, flags);
++ qp = idr_find(&wiremap, qpn);
++ if (likely(qp) && unlikely(qp->magic != QP_MAGIC))
++ qp = NULL;
++ read_unlock_irqrestore(&wiremap_lock,flags);
++
++ return qp ? &qp->ibqp : NULL;
++}
++
++void ibscif_cm_add_ref(struct ib_qp *ibqp)
++{
++ struct ibscif_qp *qp;
++
++ if (likely(ibqp)) {
++ qp = to_qp(ibqp);
++ kref_get(&qp->ref);
++ }
++}
++
++void ibscif_cm_rem_ref(struct ib_qp *ibqp)
++{
++ struct ibscif_qp *qp;
++
++ if (likely(ibqp)) {
++ qp = to_qp(ibqp);
++ ibscif_put_qp(qp);
++ }
++}
++
++int ibscif_process_cm_skb(struct sk_buff *skb, struct ibscif_conn *conn)
++{
++ union ibscif_pdu *pdu = (union ibscif_pdu *)skb->data;
++ struct ibscif_cm *cm_ctx;
++ struct ibscif_listen *listen;
++ int cmd, qpn, status, plen, err, port;
++ u64 req_ctx, rep_ctx;
++
++ req_ctx = __be64_to_cpu(pdu->cm.req_ctx);
++ rep_ctx = __be64_to_cpu(pdu->cm.rep_ctx);
++ cmd = __be32_to_cpu(pdu->cm.cmd);
++ port = __be32_to_cpu(pdu->cm.port);
++ qpn = __be32_to_cpu(pdu->cm.qpn);
++ status = __be32_to_cpu(pdu->cm.status);
++ plen = __be32_to_cpu(pdu->cm.plen);
++
++ switch (cmd) {
++ case IBSCIF_CM_REQ:
++ cm_ctx = kzalloc(sizeof *cm_ctx, GFP_KERNEL);
++ if (!cm_ctx) {
++ printk(KERN_ALERT PFX "%s: cannot allocate cm_ctx\n", __func__);
++ return -ENOMEM;
++ }
++ kref_init(&cm_ctx->kref); /* refcnt <- 1 */
++ spin_lock_init(&cm_ctx->lock);
++
++ spin_lock_bh(&listen_list_lock);
++ list_for_each_entry(listen, &listen_list, entry) {
++ if (listen->port == port) {
++ cm_ctx->listen = listen;
++ get_listen(listen);
++ }
++ }
++ spin_unlock_bh(&listen_list_lock);
++
++ if (!cm_ctx->listen) {
++ printk(KERN_ALERT PFX "%s: no matching listener for connection request, port=%d\n", __func__, port);
++ put_cm(cm_ctx);
++ /* fix me: send CM_REJ */
++ return -EINVAL;
++ }
++
++ cm_ctx->cm_id = NULL;
++ cm_ctx->node_id = conn->dev->node_id;
++ cm_ctx->remote_node_id = conn->remote_node_id;
++ cm_ctx->local_addr = node_id_to_sockaddr_in(cm_ctx->node_id);
++ if (cm_ctx->listen)
++ cm_ctx->local_addr.sin_port = cm_ctx->listen->port;
++ cm_ctx->remote_addr = node_id_to_sockaddr_in(cm_ctx->remote_node_id);
++ cm_ctx->remote_qpn = qpn;
++ cm_ctx->plen = plen;
++ if (cm_ctx->plen > IBSCIF_MAX_PDATA_SIZE) {
++ printk(KERN_ALERT PFX "%s: plen (%d) exceeds the limit (%d), truncated.\n",
++ __func__, cm_ctx->plen, IBSCIF_MAX_PDATA_SIZE);
++ cm_ctx->plen = IBSCIF_MAX_PDATA_SIZE;
++ }
++ if (cm_ctx->plen)
++ memcpy(cm_ctx->pdata, pdu->cm.pdata, cm_ctx->plen);
++
++ cm_ctx->peer_context = req_ctx;
++ cm_ctx->conn = conn;
++ atomic_inc(&conn->refcnt);
++
++ event_connection_request(cm_ctx);
++ break;
++
++ case IBSCIF_CM_REP:
++ cm_ctx = (struct ibscif_cm *)req_ctx;
++ cm_ctx->plen = plen;
++ memcpy(cm_ctx->pdata, pdu->cm.pdata, plen);
++ cm_ctx->remote_qpn = qpn;
++ cm_ctx->peer_context = rep_ctx;
++ err = connect_qp( cm_ctx );
++ if (!err)
++ err = ibscif_send_cm_rtu(cm_ctx);
++ if (err)
++ printk(KERN_ALERT PFX "%s: failed to modify QP into connected state\n", __func__);
++ event_connection_reply(cm_ctx, err);
++ put_cm(cm_ctx);
++ break;
++
++ case IBSCIF_CM_REJ:
++ cm_ctx = (struct ibscif_cm *)req_ctx;
++ cm_ctx->plen = plen;
++ memcpy(cm_ctx->pdata, pdu->cm.pdata, plen);
++ event_connection_reply(cm_ctx, status);
++ put_cm(cm_ctx);
++ break;
++
++ case IBSCIF_CM_RTU:
++ cm_ctx = (struct ibscif_cm *)rep_ctx;
++ event_connection_established( cm_ctx );
++ put_cm(cm_ctx);
++ break;
++
++ default:
++ printk(KERN_ALERT PFX "%s: invalid CM cmd: %d\n", __func__, pdu->cm.cmd);
++ break;
++ }
++
++ return 0;
++}
++
+diff --git a/drivers/infiniband/hw/scif/ibscif_cq.c b/drivers/infiniband/hw/scif/ibscif_cq.c
+new file mode 100644
+index 0000000..85a3442
+--- /dev/null
++++ b/drivers/infiniband/hw/scif/ibscif_cq.c
+@@ -0,0 +1,308 @@
++/*
++ * Copyright (c) 2008 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include <linux/vmalloc.h>
++
++#include "ibscif_driver.h"
++
++static void ibscif_cq_tasklet(unsigned long cq_ptr)
++{
++ struct ibscif_cq *cq = (struct ibscif_cq *)cq_ptr;
++ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
++}
++
++struct ib_cq *ibscif_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr,
++ struct ib_ucontext *context, struct ib_udata *udata)
++{
++ struct ibscif_dev *dev = to_dev(ibdev);
++ struct ibscif_cq *cq;
++ int nbytes, npages;
++ int err;
++ int entries = attr->cqe;
++
++ if (entries < 1 || entries > MAX_CQ_SIZE)
++ return ERR_PTR(-EINVAL);
++
++ if (!atomic_add_unless(&dev->cq_cnt, 1, MAX_CQS))
++ return ERR_PTR(-EAGAIN);
++
++ cq = kzalloc(sizeof *cq, GFP_KERNEL);
++ if (!cq) {
++ atomic_dec(&dev->cq_cnt);
++ return ERR_PTR(-ENOMEM);
++ }
++
++ spin_lock_init(&cq->lock);
++ tasklet_init(&cq->tasklet, ibscif_cq_tasklet, (unsigned long)cq);
++ cq->state = CQ_READY;
++
++ nbytes = PAGE_ALIGN(entries * sizeof *cq->wc);
++ npages = nbytes >> PAGE_SHIFT;
++
++ err = ibscif_reserve_quota(&npages);
++ if (err)
++ goto out;
++
++ cq->wc = vzalloc(nbytes); /* Consider using vmalloc_user */
++ if (!cq->wc) {
++ err = -ENOMEM;
++ goto out;
++ }
++
++ cq->ibcq.cqe = nbytes / sizeof *cq->wc;
++
++ return &cq->ibcq;
++out:
++ ibscif_destroy_cq(&cq->ibcq);
++ return ERR_PTR(err);
++}
++
++int ibscif_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
++{
++ struct ibscif_cq *cq = to_cq(ibcq);
++ struct ibscif_wc *old_wc, *new_wc;
++ int nbytes, old_npages, new_npages, i, err;
++
++ if (cqe < 1 || cqe > MAX_CQ_SIZE)
++ return -EINVAL;
++
++ nbytes = PAGE_ALIGN(cqe * sizeof *cq->wc);
++ new_npages = nbytes >> PAGE_SHIFT;
++ old_npages = PAGE_ALIGN(ibcq->cqe * sizeof *cq->wc) >> PAGE_SHIFT;
++ new_npages -= old_npages;
++
++ if (new_npages == 0)
++ return 0;
++
++ if (new_npages > 0) {
++ err = ibscif_reserve_quota(&new_npages);
++ if (err)
++ return err;
++ }
++
++ new_wc = vzalloc(nbytes); /* Consider using vmalloc_user */
++ if (!new_wc) {
++ err = -ENOMEM;
++ goto out1;
++ }
++ cqe = nbytes / sizeof *cq->wc;
++ old_wc = cq->wc;
++
++ spin_lock_bh(&cq->lock);
++
++ if (cqe < cq->depth) {
++ err = -EBUSY;
++ goto out2;
++ }
++
++ for (i = 0; i < cq->depth; i++) {
++ new_wc[i] = old_wc[cq->head];
++ cq->head = (cq->head + 1) % ibcq->cqe;
++ }
++
++ cq->wc = new_wc;
++ cq->head = 0;
++ cq->tail = cq->depth;
++ ibcq->cqe = cqe;
++
++ spin_unlock_bh(&cq->lock);
++
++ if (old_wc)
++ vfree(old_wc);
++ if (new_npages < 0)
++ ibscif_release_quota(-new_npages);
++
++ return 0;
++out2:
++ spin_unlock_bh(&cq->lock);
++ vfree(new_wc);
++out1:
++ if (new_npages > 0)
++ ibscif_release_quota(new_npages);
++ return err;
++}
++
++int ibscif_destroy_cq(struct ib_cq *ibcq)
++{
++ struct ibscif_dev *dev = to_dev(ibcq->device);
++ struct ibscif_cq *cq = to_cq(ibcq);
++
++ tasklet_kill(&cq->tasklet);
++
++ if (cq->wc)
++ vfree(cq->wc);
++
++ ibscif_release_quota(PAGE_ALIGN(ibcq->cqe * sizeof *cq->wc) >> PAGE_SHIFT);
++
++ atomic_dec(&dev->cq_cnt);
++
++ kfree(cq);
++ return 0;
++}
++
++int ibscif_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
++{
++ struct ibscif_cq *cq = to_cq(ibcq);
++ struct ibscif_wq *wq;
++ int i, reap;
++
++ /*
++ * The protocol layer holds WQ lock while processing a packet and acquires
++ * the CQ lock to append a work completion. To avoid a deadly embrace, do
++ * not hold the CQ lock when adjusting the WQ reap count.
++ */
++ for (i = 0; (i < num_entries) && cq->depth; i++) {
++
++ spin_lock_bh(&cq->lock);
++ entry[i] = cq->wc[cq->head].ibwc;
++ reap = cq->wc[cq->head].reap;
++ cq->depth--;
++ wq = cq->wc[cq->head].wq;
++ cq->head = (cq->head + 1) % ibcq->cqe;
++ spin_unlock_bh(&cq->lock);
++
++ /* WQ may no longer exist or has been flushed. */
++ if (wq) {
++ spin_lock_bh(&wq->lock);
++ wq->head = (wq->head + reap) % wq->size;
++ wq->depth -= reap;
++ wq->completions -= reap;
++ spin_unlock_bh(&wq->lock);
++ }
++ }
++
++ return i;
++}
++
++int ibscif_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify)
++{
++ struct ibscif_cq *cq = to_cq(ibcq);
++ int ret;
++
++ spin_lock_bh(&cq->lock);
++
++ cq->arm |= notify & IB_CQ_SOLICITED_MASK;
++
++ if (notify & IB_CQ_SOLICITED)
++ cq->solicited = 0;
++
++ ret = (notify & IB_CQ_REPORT_MISSED_EVENTS) && cq->depth;
++
++ spin_unlock_bh(&cq->lock);
++
++ return ret;
++}
++
++void ibscif_notify_cq(struct ibscif_cq *cq)
++{
++ if (!cq->arm || !cq->depth)
++ return;
++
++ spin_lock_bh(&cq->lock);
++ if ((cq->arm & IB_CQ_NEXT_COMP) || ((cq->arm & IB_CQ_SOLICITED) && cq->solicited)) {
++ cq->arm = 0; /* Disarm the CQ */
++ spin_unlock_bh(&cq->lock);
++ tasklet_hi_schedule(&cq->tasklet);
++ } else
++ spin_unlock_bh(&cq->lock);
++}
++
++void ibscif_clear_cqes(struct ibscif_cq *cq, struct ibscif_wq *wq)
++{
++ struct ibscif_wc *wc;
++ int i, j;
++
++ if (!cq)
++ return;
++
++ /*
++ * Walk the CQ work completions and clear pointers to the
++ * given WQ to prevent retiring WQEs when CQEs are polled.
++ */
++ spin_lock_bh(&cq->lock);
++ j = cq->head;
++ for (i = 0; i < cq->depth; i++) {
++ wc = &cq->wc[j];
++ if (wc->wq == wq)
++ wc->wq = NULL;
++ j = (j + 1) % cq->ibcq.cqe;
++ }
++ spin_unlock_bh(&cq->lock);
++}
++
++/*
++ * Acquire lock and reserve a completion queue entry.
++ * Note that cq->lock is held upon successful completion of this call.
++ * On error, WQs affiliated with this CQ should generate an event and
++ * transition to the error state; refer to IB Spec r1.2 C11-39 and C11-40.
++ */
++int ibscif_reserve_cqe(struct ibscif_cq *cq, struct ibscif_wc **wc)
++{
++ spin_lock_bh(&cq->lock);
++
++ if (cq->state != CQ_READY) {
++ spin_unlock_bh(&cq->lock);
++ return -EIO;
++ }
++ if (!cq->ibcq.cqe) {
++ spin_unlock_bh(&cq->lock);
++ return -ENOSPC;
++ }
++ if (cq->depth == cq->ibcq.cqe) {
++ cq->state = CQ_ERROR;
++ spin_unlock_bh(&cq->lock);
++
++ if (cq->ibcq.event_handler) {
++ struct ib_event record;
++ record.event = IB_EVENT_CQ_ERR;
++ record.device = cq->ibcq.device;
++ record.element.cq = &cq->ibcq;
++ cq->ibcq.event_handler(&record, cq->ibcq.cq_context);
++ }
++ return -ENOBUFS;
++ }
++
++ *wc = &cq->wc[cq->tail];
++
++ return 0;
++}
++
++/*
++ * Append a completion queue entry and release lock.
++ * Note that this function assumes that the cq->lock is currently held.
++ */
++void ibscif_append_cqe(struct ibscif_cq *cq, struct ibscif_wc *wc, int solicited)
++{
++ cq->solicited = !!(solicited || (wc->ibwc.status != IB_WC_SUCCESS));
++ cq->tail = (cq->tail + 1) % cq->ibcq.cqe;
++ cq->depth++;
++
++ spin_unlock_bh(&cq->lock);
++}
+diff --git a/drivers/infiniband/hw/scif/ibscif_driver.h b/drivers/infiniband/hw/scif/ibscif_driver.h
+new file mode 100644
+index 0000000..ecca3b7
+--- /dev/null
++++ b/drivers/infiniband/hw/scif/ibscif_driver.h
+@@ -0,0 +1,774 @@
++/*
++ * Copyright (c) 2008 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#ifndef IBSCIF_DRIVER_H
++#define IBSCIF_DRIVER_H
++
++#include <linux/module.h>
++#include <linux/idr.h> /* for idr routines */
++#include <linux/kthread.h> /* for kthread routines */
++#include <linux/highmem.h> /* for kmap_atomic */
++#include <linux/pkt_sched.h> /* for TC_PRIO_CONTROL */
++#include <linux/if_arp.h> /* for ARPHRD_ETHER */
++#include <linux/swap.h> /* for totalram_pages */
++#include <linux/proc_fs.h> /* for proc_mkdir */
++#include <linux/poll.h>
++#include <linux/workqueue.h>
++#include <linux/semaphore.h>
++
++/* these macros are defined in "linux/semaphore.h".
++ * however, they may be missing on older systems.
++ */
++#ifndef DECLARE_MUTEX
++#define DECLARE_MUTEX(name) \
++ struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1)
++#endif
++
++#ifndef init_MUTEX
++#define init_MUTEX(sem) sema_init(sem, 1)
++#endif
++
++#include <rdma/ib_umem.h>
++#include <rdma/ib_verbs.h>
++#include <rdma/ib_user_verbs.h>
++#include <rdma/iw_cm.h>
++
++/* scif compatibility hacks
++ * Vanilla kernel scif is always "upstream" ie has new function names.
++ * Out-of-tree scif.h from MPSS may have older names (MPSS3.x) or
++ * pretty much match the "upstream" version
++ */
++
++#ifdef HAVE_EXTERNAL_SCIF
++ #include <scif.h>
++ #ifndef HAVE_UPSTREAM_SCIF
++ #define scif_get_node_ids scif_get_nodeIDs
++ #define scif_port_id scif_portID
++ #endif
++#else
++ #include <linux/scif.h>
++#endif
++
++#include "ibscif_protocol.h"
++
++#define SCIF_OFED_PORT_0 60 /* reserved for ibscif */
++#define SCIF_OFED_PORT_1 61 /* reserved for ibscif */
++
++#define IBSCIF_MTU 4096
++
++#define IBSCIF_EP_TYPE_LISTEN 0
++#define IBSCIF_EP_TYPE_COMM 1
++
++#define DRV_NAME "ibscif"
++#define PFX DRV_NAME ": "
++#define IBDEV_PFX DRV_NAME ""
++#define DRV_DESC "OpenFabrics IBSCIF Driver"
++#define DRV_VERSION "1.0"
++#define DRV_SIGNON DRV_DESC " v" DRV_VERSION
++
++#define UVERBS_ABI_VER 6
++#define VENDOR_ID 0x8086 /* Intel Corporation */
++#define DEVICE_ID 0
++#define HW_REV 1
++#define FW_REV IBSCIF_PROTOCOL_VER
++
++#define ibscif_db(fmt, arg...) \
++ do { if (verbose) printk(KERN_INFO PFX fmt, ##arg); } while (0)
++
++/*
++ * Attribute limits.
++ * These limits are imposed on client requests, however, the actual values
++ * returned may be larger than these limits on some objects due to rounding.
++ * The definitions are intended to show the thinking behind the values.
++ * E.g., MAX_PDS defined as MAX_QPS is intended to allow each QP to be
++ * on a separate PD, although that is not a usage requirement.
++ */
++#define MAX_QPS (64 * 1024)
++#define MAX_QP_SIZE (16 * 1024)
++#define MAX_CQS (MAX_QPS * 2) /* x2:send queues + recv queues */
++#define MAX_CQ_SIZE (MAX_QP_SIZE * 4) /* or combined */
++#define MAX_PDS MAX_QPS /* 1 per QP */
++#if 0
++#define MAX_MRS (MAX_QPS * 4) /* x4:local/remote,read/write */
++#else
++#define MAX_MRS 16383 /* limited by IBSCIF_MR_MAX_KEY */
++#endif
++#define MAX_MR_SIZE (2U * 1024 * 1024 * 1024)
++#define MAX_SGES (PAGE_SIZE / sizeof(struct ib_sge))
++#define MAX_OR (MAX_QP_SIZE / 2) /* half outbound reqs */
++#define MAX_IR MAX_OR /* balance inbound with outbound */
++
++extern int window_size;
++#define MIN_WINDOW_SIZE 4 /* Ack every window_size/MIN_WINDOW_SIZE packets */
++
++extern int rma_threshold;
++extern int fast_rdma;
++extern int blocking_send;
++extern int blocking_recv;
++extern int scif_loopback;
++extern int host_proxy;
++extern int new_ib_type;
++extern int verbose;
++extern int check_grh;
++
++extern struct list_head devlist;
++extern struct semaphore devlist_mutex;
++
++extern struct idr wiremap;
++extern rwlock_t wiremap_lock;
++
++extern struct ib_dma_mapping_ops ibscif_dma_mapping_ops;
++
++/* Match IB opcodes for copy in post_send; append driver specific values. */
++enum ibscif_wr_opcode {
++ WR_SEND = IB_WR_SEND,
++ WR_SEND_WITH_IMM = IB_WR_SEND_WITH_IMM,
++ WR_RDMA_WRITE = IB_WR_RDMA_WRITE,
++ WR_RDMA_WRITE_WITH_IMM = IB_WR_RDMA_WRITE_WITH_IMM,
++ WR_RDMA_READ = IB_WR_RDMA_READ,
++ WR_ATOMIC_CMP_AND_SWP = IB_WR_ATOMIC_CMP_AND_SWP,
++ WR_ATOMIC_FETCH_AND_ADD = IB_WR_ATOMIC_FETCH_AND_ADD,
++ WR_RDMA_READ_RSP,
++ WR_ATOMIC_RSP,
++ WR_RMA_RSP,
++ WR_UD,
++ NR_WR_OPCODES /* Must be last (for stats) */
++};
++
++struct ibscif_stats {
++ unsigned long packets_sent;
++ unsigned long packets_rcvd;
++ unsigned long bytes_sent;
++ unsigned long bytes_rcvd;
++ unsigned long duplicates;
++ unsigned long tx_errors;
++ unsigned long sched_exhaust;
++ unsigned long unavailable;
++ unsigned long loopback;
++ unsigned long recv;
++ unsigned long recv_imm;
++ unsigned long wr_opcode[NR_WR_OPCODES];
++ unsigned long fast_rdma_write;
++ unsigned long fast_rdma_read;
++ unsigned long fast_rdma_unavailable;
++ unsigned long fast_rdma_fallback;
++ unsigned long fast_rdma_force_ack;
++ unsigned long fast_rdma_tail_write;
++};
++
++#define DEV_STAT(dev, counter) dev->stats.counter
++
++#define IBSCIF_MAX_DEVICES 16
++#define IBSCIF_NAME_SIZE 12
++
++#define IBSCIF_NODE_ID_TO_LID(node_id) (node_id+1000)
++#define IBSCIF_LID_TO_NODE_ID(lid) (lid-1000)
++
++struct ibscif_conn {
++ struct list_head entry;
++ atomic_t refcnt;
++ scif_epd_t ep;
++ unsigned short remote_node_id;
++ union ib_gid remote_gid;
++ struct ibscif_dev *dev;
++ int local_close;
++ int remote_close;
++};
++
++struct ibscif_listen {
++ struct iw_cm_id *cm_id;
++ struct list_head entry;
++ struct kref kref;
++ int port;
++};
++
++#define IBSCIF_MAX_PDATA_SIZE 256
++struct ibscif_cm {
++ struct iw_cm_id *cm_id;
++ struct ibscif_conn *conn;
++ struct ibscif_listen *listen;
++ struct kref kref;
++ spinlock_t lock;
++ struct sockaddr_in local_addr;
++ struct sockaddr_in remote_addr;
++ unsigned short node_id;
++ unsigned short remote_node_id;
++ u32 qpn;
++ u32 remote_qpn;
++ int plen;
++ u8 pdata[IBSCIF_MAX_PDATA_SIZE];
++ u64 peer_context;
++};
++
++struct ibscif_dev {
++ struct ib_device ibdev;
++ struct net_device *netdev; /* for RDMA CM support */
++ struct list_head entry;
++
++ char name[IBSCIF_NAME_SIZE];
++ union ib_gid gid;
++ unsigned short node_id;
++ atomic_t refcnt;
++ scif_epd_t listen_ep;
++ struct list_head conn_list;
++ struct list_head mr_list;
++ struct semaphore mr_list_mutex;
++
++ struct proc_dir_entry *procfs;
++ struct ibscif_stats stats;
++
++ atomic_t pd_cnt;
++ atomic_t cq_cnt;
++ atomic_t qp_cnt;
++ atomic_t mr_cnt;
++
++ atomic_t available;
++ atomic_t was_new;
++
++ spinlock_t atomic_op;
++
++ struct semaphore mutex;
++ struct list_head wq_list; /* List of WQ's on this device */
++};
++
++struct ibscif_pd {
++ struct ib_pd ibpd;
++};
++
++struct ibscif_ah {
++ struct ib_ah ibah;
++ __be16 dlid;
++};
++
++struct ibscif_wc {
++ struct ib_wc ibwc;
++ int reap;
++ struct ibscif_wq *wq;
++};
++
++enum ibscif_cq_state {
++ CQ_READY,
++ CQ_ERROR
++};
++
++struct ibscif_cq {
++ struct ib_cq ibcq;
++ spinlock_t lock;
++ struct tasklet_struct tasklet;
++ enum ibscif_cq_state state;
++ enum ib_cq_notify_flags arm;
++ int solicited;
++ int head;
++ int tail;
++ int depth;
++ struct ibscif_wc *wc;
++};
++
++struct ibscif_ds {
++ struct ibscif_mr *mr;
++ u32 offset;
++ u32 length;
++ u32 lkey;
++ u32 in_use;
++ struct ibscif_mreg_info *current_mreg;
++};
++
++struct ibscif_segmentation {
++ struct ibscif_ds *current_ds;
++ u32 current_page_index;
++ u32 current_page_offset;
++ u32 wr_length_remaining;
++ u32 ds_length_remaining;
++ u32 starting_seq;
++ u32 next_seq;
++ u32 ending_seq;
++};
++
++struct ibscif_reassembly {
++ struct ibscif_ds *current_ds;
++ u32 current_ds_offset;
++ u32 last_packet_seq;
++ u32 last_seen_seq;
++ __be32 immediate_data;
++ int final_length;
++ u16 opcode;
++};
++
++struct ibscif_sar {
++ struct ibscif_segmentation seg;
++ struct ibscif_reassembly rea;
++};
++
++enum ibscif_wr_state {
++ WR_WAITING,
++ WR_STARTED,
++ WR_WAITING_FOR_ACK,
++ WR_WAITING_FOR_RSP,
++ WR_LAST_SEEN,
++ WR_COMPLETED
++};
++
++struct ibscif_wr {
++ u64 id;
++ enum ibscif_wr_opcode opcode;
++ int length;
++ enum ib_send_flags flags;
++
++ u32 msg_id;
++ enum ibscif_wr_state state;
++ struct ibscif_sar sar;
++ u32 use_rma;
++ u32 rma_id;
++
++ union {
++ struct ibscif_send {
++ u32 immediate_data;
++ } send;
++
++ struct ibscif_ud {
++ u16 remote_node_id;
++ u32 remote_qpn;
++ } ud;
++
++ struct ibscif_read {
++ u64 remote_address;
++ int remote_length;
++ u32 rkey;
++ } read;
++
++ struct ibscif_write {
++ u64 remote_address;
++ u32 rkey;
++ u32 immediate_data;
++ } write;
++
++ struct ibscif_cmp_swp {
++ u64 cmp_operand;
++ u64 swp_operand;
++ u64 remote_address;
++ u32 rkey;
++ } cmp_swp;
++
++ struct ibscif_fetch_add {
++ u64 add_operand;
++ u64 remote_address;
++ u32 rkey;
++ } fetch_add;
++
++ struct ibscif_atomic_rsp {
++ u64 orig_data;
++ u16 opcode;
++ } atomic_rsp;
++
++ struct ibscif_rma_rsp {
++ u32 xfer_length;
++ u32 error;
++ } rma_rsp;
++ };
++
++ u32 num_ds;
++ struct ibscif_ds ds_list[0]; /* Must be last */
++};
++
++struct ibscif_tx_state {
++ u32 next_seq;
++ u32 last_ack_seq_recvd;
++ u32 next_msg_id;
++};
++
++struct ibscif_rx_state {
++ u32 last_in_seq;
++ u32 last_seq_acked;
++ int defer_in_process;
++};
++
++struct ibscif_wirestate {
++ struct ibscif_tx_state tx;
++ struct ibscif_rx_state rx;
++};
++
++struct ibscif_wire {
++ struct ibscif_wirestate sq;
++ struct ibscif_wirestate iq;
++};
++
++struct ibscif_wq {
++ struct list_head entry;
++ struct ibscif_qp *qp;
++ spinlock_t lock;
++ struct ibscif_wr *wr;
++ int head;
++ int tail;
++ int depth;
++ int size;
++ int max_sge;
++ int wr_size;
++ int completions;
++ int reap;
++ int next_wr;
++ int next_msg_id;
++ struct ibscif_wirestate *wirestate;
++ int fast_rdma_completions;
++ int ud_msg_id;
++};
++
++enum ibscif_qp_state {
++ QP_IDLE,
++ QP_CONNECTED,
++ QP_DISCONNECT,
++ QP_ERROR,
++ QP_RESET,
++ QP_IGNORE,
++ NR_QP_STATES /* Must be last */
++};
++
++enum ibscif_schedule {
++ SCHEDULE_RESUME = 1 << 0,
++ SCHEDULE_RETRY = 1 << 1,
++ SCHEDULE_TIMEOUT = 1 << 2,
++ SCHEDULE_SQ = 1 << 6,
++ SCHEDULE_IQ = 1 << 7
++};
++
++struct ibscif_qp {
++ int magic; /* Must be first */
++# define QP_MAGIC 0x5b51505d /* "[QP]" */
++ struct kref ref;
++ struct completion done;
++ struct ib_qp ibqp;
++ struct ibscif_dev *dev;
++ enum ib_access_flags access;
++ enum ib_sig_type sq_policy;
++ enum ibscif_schedule schedule;
++ struct ibscif_wire wire;
++ int mtu;
++
++ int max_or;
++ atomic_t or_depth;
++ atomic_t or_posted;
++
++ struct semaphore modify_mutex;
++ spinlock_t lock;
++ enum ibscif_qp_state state;
++ u16 local_node_id;
++ u16 remote_node_id;
++ struct ibscif_conn *conn;
++ u32 remote_qpn;
++ int loopback;
++ struct ibscif_wq sq;
++ struct ibscif_wq rq;
++ struct ibscif_wq iq;
++ int in_scheduler;
++
++ struct ibscif_conn *ud_conn[IBSCIF_MAX_DEVICES];
++ struct ibscif_cm *cm_context;
++};
++
++#define is_sq(wq) (wq == &wq->qp->sq)
++#define is_rq(wq) (wq == &wq->qp->rq)
++#define is_iq(wq) (wq == &wq->qp->iq)
++
++/* Info about MR registered via SCIF API */
++struct ibscif_mreg_info {
++ struct list_head entry;
++ struct ibscif_conn *conn;
++ u64 offset;
++ u64 aligned_offset;
++ u32 aligned_length;
++};
++
++struct ibscif_mr {
++ int magic; /* Must be first */
++# define MR_MAGIC 0x5b4d525d /* "[MR]" */
++ struct list_head entry;
++ struct kref ref;
++ struct completion done;
++ struct ib_mr ibmr;
++ struct ib_umem *umem;
++ enum ib_access_flags access;
++ u64 addr;
++ u32 length;
++ int npages;
++ struct page **page;
++ scif_pinned_pages_t pinned_pages;
++ struct list_head mreg_list;
++};
++
++/* emulate old ib_verbs.h ib_phys_buf */
++struct ibscif_phys_buf {
++ u64 addr;
++ u64 size;
++};
++
++/* Canonical virtual address on X86_64 falls in the range 0x0000000000000000-0x00007fffffffffff
++ * and 0xffff800000000000-0xffffffffffffffff. The range 0x0000800000000000-0xffff7fffffffffff
++ * are unused. This basically means only 48 bits are used and the highest 16 bits are just sign
++ * extensions. We can put rkey into these 16 bits and use the result as the "offset" of SCIF's
++ * registered address space. By doing this, the SCIF_MAP_FIXED flag can be used so that the offset
++ * can be calculated directly from rkey and virtual address w/o using the "remote registration cache"
++ * mechanism.
++ *
++ * SCIF reserve the top 2 bits of the offset for internal uses, leaving 14 bits for rkey.
++ */
++#define IBSCIF_MR_MAX_KEY (0x3FFF)
++#define IBSCIF_MR_VADDR_MASK (0x0000FFFFFFFFFFFFUL)
++#define IBSCIF_MR_SIGN_MASK (0x0000800000000000UL)
++#define IBSCIF_MR_SIGN_EXT (0xFFFF000000000000UL)
++#define IBSCIF_MR_RKEY_MASK (0x3FFF000000000000UL)
++
++#define IBSCIF_MR_VADDR_TO_OFFSET(rkey, vaddr) ((((unsigned long)rkey) << 48) | \
++ (vaddr & IBSCIF_MR_VADDR_MASK))
++
++#define IBSCIF_MR_OFFSET_TO_VADDR(offset) ((offset & IBSCIF_MR_SIGN_MASK) ? \
++ (offset | IBSCIF_MR_SIGN_EXT) : \
++ (offset & IBSCIF_MR_VADDR_MASK))
++
++#define IBSCIF_MR_OFFSET_TO_RKEY(offset) ((offset & IBSCIF_MR_RKEY_MASK) >> 48)
++
++#define TO_OBJ(name, src, dst, field) \
++static inline struct dst *name(struct src *field) \
++{ \
++ return container_of(field, struct dst, field); \
++}
++TO_OBJ(to_dev, ib_device, ibscif_dev, ibdev)
++TO_OBJ(to_pd, ib_pd, ibscif_pd, ibpd)
++TO_OBJ(to_cq, ib_cq, ibscif_cq, ibcq)
++TO_OBJ(to_qp, ib_qp, ibscif_qp, ibqp)
++TO_OBJ(to_mr, ib_mr, ibscif_mr, ibmr)
++TO_OBJ(to_ah, ib_ah, ibscif_ah, ibah)
++
++#define OBJ_GET(obj, type) \
++static inline struct ibscif_##obj *ibscif_get_##obj(int id) \
++{ \
++ struct ibscif_##obj *obj; \
++ read_lock_bh(&wiremap_lock); \
++ obj = idr_find(&wiremap, id); \
++ if (likely(obj)) { \
++ if (likely(obj->magic == type)) \
++ kref_get(&obj->ref); \
++ else \
++ obj = ERR_PTR(-ENXIO); \
++ } else \
++ obj = ERR_PTR(-ENOENT); \
++ read_unlock_bh(&wiremap_lock); \
++ return obj; \
++}
++OBJ_GET(mr, MR_MAGIC)
++OBJ_GET(qp, QP_MAGIC)
++
++void ibscif_complete_mr(struct kref *kref);
++void ibscif_complete_qp(struct kref *kref);
++
++#define OBJ_PUT(obj) \
++static inline void ibscif_put_##obj(struct ibscif_##obj *obj) \
++{ \
++ if (likely(obj)) \
++ kref_put(&obj->ref, ibscif_complete_##obj); \
++}
++OBJ_PUT(mr)
++OBJ_PUT(qp)
++
++/* This function assumes the WQ is protected by a lock. */
++static inline struct ibscif_wr *ibscif_get_wr(struct ibscif_wq *wq, int index)
++{
++ /* Must calculate because WQ array elements are variable sized. */
++ return (struct ibscif_wr *)((void *)wq->wr + (wq->wr_size * index));
++}
++
++/* This function assumes the WQ is protected by a lock. */
++static inline void ibscif_append_wq(struct ibscif_wq *wq)
++{
++ wq->tail = (wq->tail + 1) % wq->size;
++ wq->depth++;
++ wq->next_msg_id++;
++}
++
++static inline void ibscif_clear_ds_ref(struct ibscif_ds *ds)
++{
++ if (ds->in_use) {
++ ds->in_use = 0;
++ ibscif_put_mr(ds->mr);
++ }
++}
++
++static inline void ibscif_clear_ds_refs(struct ibscif_ds *ds, int num_ds)
++{
++ while(num_ds--)
++ ibscif_clear_ds_ref(ds++);
++}
++
++static inline enum ib_wc_opcode to_ib_wc_opcode(enum ib_wr_opcode opcode)
++{
++ /* SQ only - RQ is either IB_WC_RECV or IB_WC_RECV_RDMA_WITH_IMM. */
++ switch (opcode) {
++ case IB_WR_RDMA_WRITE: return IB_WC_RDMA_WRITE;
++ case IB_WR_RDMA_WRITE_WITH_IMM: return IB_WC_RDMA_WRITE;
++ case IB_WR_SEND: return IB_WC_SEND;
++ case IB_WR_SEND_WITH_IMM: return IB_WC_SEND;
++ case IB_WR_RDMA_READ: return IB_WC_RDMA_READ;
++ case IB_WR_ATOMIC_CMP_AND_SWP: return IB_WC_COMP_SWAP;
++ case IB_WR_ATOMIC_FETCH_AND_ADD: return IB_WC_FETCH_ADD;
++ default: return -1;
++ }
++}
++
++static inline void *ibscif_map_src(struct page *page)
++{
++ return kmap_atomic(page);
++}
++
++static inline void *ibscif_map_dst(struct page *page)
++{
++ return kmap_atomic(page);
++}
++
++static inline void ibscif_unmap_src(struct page *page, void *addr)
++{
++ if (likely(addr))
++ kunmap(addr);
++}
++
++static inline void ibscif_unmap_dst(struct page *page, void *addr)
++{
++ if (likely(addr))
++ kunmap(addr);
++ if (likely(page)) {
++ flush_dcache_page(page);
++ if (!PageReserved(page))
++ set_page_dirty(page);
++ }
++}
++
++#ifdef IBSCIF_PERF_TEST
++#define IBSCIF_PERF_SAMPLE(counter,next) ibscif_perf_sample(counter,next)
++#else
++#define IBSCIF_PERF_SAMPLE(counter,next)
++#endif
++
++int ibscif_atomic_copy(void *dst_addr, void *src_addr, u32 copy_len, int head_copied);
++
++int ibscif_wiremap_add(void *obj, int *id);
++void ibscif_wiremap_del(int id);
++
++int ibscif_dev_init(void);
++void ibscif_protocol_init_pre(void);
++void ibscif_protocol_init_post(void);
++
++void ibscif_dev_cleanup(void);
++void ibscif_protocol_cleanup(void);
++
++int ibscif_procfs_add_dev(struct ibscif_dev *dev);
++void ibscif_procfs_remove_dev(struct ibscif_dev *dev);
++
++int ibscif_reserve_quota(int *npages);
++void ibscif_release_quota(int npages);
++
++void ibscif_scheduler_add_qp(struct ibscif_qp *qp);
++void ibscif_scheduler_remove_qp(struct ibscif_qp *qp);
++void ibscif_schedule(struct ibscif_wq *wq);
++
++struct ib_ah *ibscif_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr);
++int ibscif_destroy_ah(struct ib_ah *ibah);
++
++struct ib_pd *ibscif_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata);
++int ibscif_dealloc_pd(struct ib_pd *ibpd);
++
++struct ib_qp *ibscif_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *attr, struct ib_udata *udata);
++int ibscif_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr);
++int ibscif_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata);
++int ibscif_destroy_qp(struct ib_qp *ibqp);
++void ibscif_qp_internal_disconnect(struct ibscif_qp *qp, enum ibscif_reason reason);
++void ibscif_qp_remote_disconnect(struct ibscif_qp *qp, enum ibscif_reason reason);
++void ibscif_qp_add_ud_conn(struct ibscif_qp *qp, struct ibscif_conn *conn);
++
++struct ib_cq *ibscif_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr,
++ struct ib_ucontext *context, struct ib_udata *udata);
++
++int ibscif_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
++int ibscif_destroy_cq(struct ib_cq *ibcq);
++int ibscif_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
++int ibscif_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify);
++void ibscif_notify_cq(struct ibscif_cq *cq);
++void ibscif_clear_cqes(struct ibscif_cq *cq, struct ibscif_wq *wq);
++int ibscif_reserve_cqe(struct ibscif_cq *cq, struct ibscif_wc **wc);
++void ibscif_append_cqe(struct ibscif_cq *cq, struct ibscif_wc *wc, int solicited);
++
++struct ib_mr *ibscif_get_dma_mr(struct ib_pd *ibpd, int access);
++
++struct ib_mr *ibscif_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
++ u64 virt_addr, int access, struct ib_udata *udata);
++int ibscif_dereg_mr(struct ib_mr *ibmr);
++struct ibscif_mr *ibscif_validate_mr(u32 key, u64 addr, int length,
++ struct ib_pd *ibpd, enum ib_access_flags access);
++struct ibscif_mreg_info *ibscif_mr_get_mreg(struct ibscif_mr *mr, struct ibscif_conn *conn);
++void ibscif_refresh_mreg( struct ibscif_conn *conn );
++
++int ibscif_post_send(struct ib_qp *ibqp, struct ib_send_wr *ibwr, struct ib_send_wr **bad_wr);
++int ibscif_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ibwr, struct ib_recv_wr **bad_wr);
++
++void ibscif_send_disconnect(struct ibscif_qp *qp, enum ibscif_reason reason);
++void ibscif_send_close(struct ibscif_conn *conn);
++void ibscif_send_reopen(struct ibscif_conn *conn);
++
++void ibscif_loopback_disconnect(struct ibscif_qp *qp, enum ibscif_reason reason);
++void ibscif_loopback(struct ibscif_wq *sq);
++
++int ibscif_xmit_wr(struct ibscif_wq *wq, struct ibscif_wr *wr, int tx_limit, int retransmit,
++ u32 from_seq, u32 *posted);
++int ibscif_process_sq_completions(struct ibscif_qp *qp);
++
++struct ibscif_conn *ibscif_get_conn( int node_id, int remote_node_id, int find_local_peer );
++void ibscif_put_conn( struct ibscif_conn *conn );
++void ibscif_do_accept(struct ibscif_dev *dev);
++void ibscif_get_pollep_list(struct scif_pollepd *polleps, struct ibscif_dev **devs,
++ int *types, struct ibscif_conn **conns, int *count);
++void ibscif_refresh_pollep_list(void);
++void ibscif_get_ep_list(scif_epd_t *eps, int *count);
++void ibscif_remove_ep(struct ibscif_dev *dev, scif_epd_t ep);
++void ibscif_free_conn(struct ibscif_conn *conn);
++int ibscif_cleanup_idle_conn( void );
++void ibscif_perf_sample(int counter, int next);
++
++int ibscif_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
++int ibscif_cm_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
++int ibscif_cm_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
++int ibscif_cm_create_listen(struct iw_cm_id *cm_id, int backlog);
++int ibscif_cm_destroy_listen(struct iw_cm_id *cm_id);
++struct ib_qp *ibscif_cm_get_qp(struct ib_device *ibdev, int qpn);
++void ibscif_cm_add_ref(struct ib_qp *ibqp);
++void ibscif_cm_rem_ref(struct ib_qp *ibqp);
++void ibscif_cm_async_callback(void *cm_context);
++int ibscif_process_cm_skb(struct sk_buff *skb, struct ibscif_conn *conn);
++int ibscif_send_cm_req(struct ibscif_cm *cm_ctx);
++int ibscif_send_cm_rep(struct ibscif_cm *cm_ctx);
++int ibscif_send_cm_rej(struct ibscif_cm *cm_ctx, const void *pdata, u8 plen);
++int ibscif_send_cm_rtu(struct ibscif_cm *cm_ctx);
++
++#endif /* IBSCIF_DRIVER_H */
+diff --git a/drivers/infiniband/hw/scif/ibscif_loopback.c b/drivers/infiniband/hw/scif/ibscif_loopback.c
+new file mode 100644
+index 0000000..9f45259
+--- /dev/null
++++ b/drivers/infiniband/hw/scif/ibscif_loopback.c
+@@ -0,0 +1,582 @@
++/*
++ * Copyright (c) 2008 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include "ibscif_driver.h"
++
++struct ibscif_seg {
++ enum ib_access_flags access;
++ struct ibscif_ds *ds;
++ struct ibscif_mr *mr;
++ struct page **page;
++ void *addr;
++ u32 offset;
++ u32 ds_len;
++ u32 pg_len;
++ void *(*map)(struct page *page);
++ void (*unmap)(struct page *page, void *addr);
++};
++
++static void ibscif_seg_init(struct ibscif_seg *seg, struct ibscif_ds *ds,
++ void *(*map)(struct page *page), void (*unmap)(struct page *page, void *addr),
++ enum ib_access_flags access)
++{
++ memset(seg, 0, sizeof *seg);
++ seg->ds = ds;
++ seg->map = map;
++ seg->unmap = unmap;
++ seg->access = access;
++}
++
++static void ibscif_seg_fini(struct ibscif_seg *seg)
++{
++ seg->unmap(*seg->page, seg->addr);
++ if (likely(seg->mr))
++ ibscif_put_mr(seg->mr);
++}
++
++static int ibscif_seg_set(struct ibscif_seg *seg, u32 length, u32 copy_len)
++{
++ struct page **prev_page;
++
++ if (!seg->ds_len) {
++
++ if (seg->mr)
++ ibscif_put_mr(seg->mr);
++
++ seg->mr = ibscif_get_mr(seg->ds->lkey);
++ if (unlikely(IS_ERR(seg->mr)))
++ return PTR_ERR(seg->mr);
++
++ if (unlikely(seg->access && !(seg->mr->access & seg->access)))
++ return -EACCES;
++
++ prev_page = seg->page;
++ seg->offset = seg->ds->offset + (seg->mr->addr & ~PAGE_MASK);
++ seg->page = &seg->mr->page[seg->offset >> PAGE_SHIFT];
++ seg->offset &= ~PAGE_MASK;
++ seg->ds_len = seg->ds->length;
++ seg->pg_len = min(seg->ds_len, (u32)PAGE_SIZE - seg->offset);
++ seg->pg_len = min(seg->pg_len, length);
++
++ if (seg->page != prev_page)
++ seg->addr = seg->map(*seg->page) + seg->offset;
++
++ seg->ds++;
++
++ } else if (!seg->pg_len) {
++
++ seg->unmap(*seg->page, seg->addr);
++
++ seg->page++;
++ seg->addr = seg->map(*seg->page);
++ seg->pg_len = min(seg->ds_len, (u32)PAGE_SIZE);
++ seg->pg_len = min(seg->pg_len, length);
++ } else
++ seg->addr += copy_len;
++
++ return 0;
++}
++
++static inline int ibscif_seg_copy(struct ibscif_seg *dst, struct ibscif_seg *src, u32 length, int head_copied)
++{
++ src->ds_len -= length;
++ src->pg_len -= length;
++
++ dst->ds_len -= length;
++ dst->pg_len -= length;
++
++ return ibscif_atomic_copy(dst->addr, src->addr, length, head_copied);
++}
++
++/*
++ * Copy data from the source to the destination data segment list.
++ * This is a bit complicated since we must map and copy each page
++ * individually and because each data segment can be split across
++ * multiple pages within the memory region as illustrated below:
++ *
++ * +---page---+ +---page---+ +---page---+
++ * | .~~mr~~~|~~~|~~~~~~~~~~|~~~|~~~~~~. |
++ * | | | | [==ds===|===|====] | |
++ * | '~~~~~~~|~~~|~~~~~~~~~~|~~~|~~~~~~' |
++ * +----------+ +----------+ +----------+
++ *
++ * For example, due to different buffer page offsets, copying data
++ * between the following buffers will result in five separate copy
++ * operations as shown by the numeric labels below:
++ *
++ * +----------+ +----------+
++ * | | | |
++ * |1111111111| | |
++ * |2222222222| |1111111111|
++ * +----------+ +----------+
++ *
++ * +----------+ +----------+
++ * |3333333333| |2222222222|
++ * |3333333333| |3333333333|
++ * |4444444444| |3333333333|
++ * +----------+ +----------+
++ *
++ * +----------+ +----------+
++ * |5555555555| |4444444444|
++ * | | |5555555555|
++ * | | | |
++ * +----------+ +----------+
++ *
++ * The source and destination data segment list lengths are
++ * assumed to have been validated outside of this function.
++ */
++static int ibscif_dscopy(struct ibscif_ds *dst_ds, struct ibscif_ds *src_ds, u32 length)
++{
++ struct ibscif_seg src, dst;
++ int head_copied;
++ u32 copy_len;
++ int err = 0;
++
++ ibscif_seg_init(&src, src_ds, ibscif_map_src, ibscif_unmap_src, 0);
++ ibscif_seg_init(&dst, dst_ds, ibscif_map_dst, ibscif_unmap_dst, IB_ACCESS_LOCAL_WRITE);
++
++ head_copied = 0;
++ for (copy_len = 0; length; length -= copy_len) {
++
++ err = ibscif_seg_set(&src, length, copy_len);
++ if (unlikely(err))
++ break;
++ err = ibscif_seg_set(&dst, length, copy_len);
++ if (unlikely(err))
++ break;
++
++ copy_len = min(src.pg_len, dst.pg_len);
++ head_copied = ibscif_seg_copy(&dst, &src, copy_len, head_copied);
++ }
++
++ ibscif_seg_fini(&src);
++ ibscif_seg_fini(&dst);
++
++ return err;
++}
++
++/* Hold sq->lock during this call for synchronization. */
++static int ibscif_complete_sq_wr(struct ibscif_wq *sq, struct ibscif_wr *send_wr, enum ib_wc_status status)
++{
++ struct ibscif_qp *qp = sq->qp;
++ struct ibscif_wc *wc;
++ int err;
++
++ ibscif_clear_ds_refs(send_wr->ds_list, send_wr->num_ds);
++ sq->completions++;
++ sq->reap++;
++
++ if (send_wr->flags & IB_SEND_SIGNALED) {
++ struct ibscif_cq *cq = to_cq(qp->ibqp.send_cq);
++
++ err = ibscif_reserve_cqe(cq, &wc);
++ if (unlikely(err))
++ return err;
++
++ wc->ibwc.qp = &qp->ibqp;
++ wc->ibwc.src_qp = qp->remote_qpn;
++ wc->ibwc.wr_id = send_wr->id;
++ wc->ibwc.opcode = to_ib_wc_opcode(send_wr->opcode);
++ wc->ibwc.status = status;
++ wc->ibwc.ex.imm_data = 0;
++ wc->ibwc.port_num = 1;
++
++ if ((enum ib_wr_opcode)send_wr->opcode == IB_WR_RDMA_READ)
++ wc->ibwc.byte_len = send_wr->read.remote_length;
++ else if (((enum ib_wr_opcode)send_wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) ||
++ ((enum ib_wr_opcode)send_wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD))
++ wc->ibwc.byte_len = sizeof send_wr->atomic_rsp.orig_data;
++ else
++ wc->ibwc.byte_len = send_wr->length;
++
++ wc->wq = sq;
++ wc->reap = sq->reap;
++ sq->reap = 0;
++
++ ibscif_append_cqe(cq, wc, 0);
++ }
++
++ return 0;
++}
++
++/* Hold rq->lock during this call for synchronization. */
++static int ibscif_complete_rq_wr(struct ibscif_wq *rq, struct ibscif_wr *recv_wr,
++ struct ibscif_wr *send_wr, enum ib_wc_status status)
++{
++ struct ibscif_qp *qp = rq->qp;
++ struct ibscif_cq *cq = to_cq(qp->ibqp.recv_cq);
++ struct ibscif_wc *wc;
++ int err;
++
++ ibscif_clear_ds_refs(recv_wr->ds_list, recv_wr->num_ds);
++
++ err = ibscif_reserve_cqe(cq, &wc);
++ if (unlikely(err))
++ return err;
++
++ wc->ibwc.qp = &qp->ibqp;
++ wc->ibwc.src_qp = qp->remote_qpn;
++ wc->ibwc.wr_id = recv_wr->id;
++ wc->ibwc.status = status;
++ wc->ibwc.byte_len = send_wr->length;
++ wc->ibwc.port_num = 1;
++
++ if ((enum ib_wr_opcode)send_wr->opcode == IB_WR_SEND_WITH_IMM) {
++ DEV_STAT(qp->dev, recv_imm++);
++ wc->ibwc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
++ wc->ibwc.ex.imm_data = cpu_to_be32(send_wr->send.immediate_data);
++ } else if ((enum ib_wr_opcode)send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
++ DEV_STAT(qp->dev, recv_imm++);
++ wc->ibwc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
++ wc->ibwc.ex.imm_data = cpu_to_be32(send_wr->write.immediate_data);
++ } else {
++ DEV_STAT(qp->dev, recv++);
++ wc->ibwc.opcode = IB_WC_RECV;
++ wc->ibwc.ex.imm_data = 0;
++ }
++
++ wc->wq = rq;
++ wc->reap = 1;
++ rq->completions++;
++
++ ibscif_append_cqe(cq, wc, !!(send_wr->flags & IB_SEND_SOLICITED));
++
++ return 0;
++}
++
++/* Hold wq lock during this call for synchronization. */
++static int ibscif_validate_wq(struct ibscif_wq *wq, struct ibscif_wr **wr, enum ib_access_flags access)
++{
++ if (unlikely(wq->qp->state != QP_CONNECTED))
++ return -ENOTCONN;
++
++ if (unlikely(access && !(wq->qp->access & access)))
++ return -EACCES;
++
++ if (wr) {
++ int next;
++
++ if (unlikely(!wq->size))
++ return -ENOSPC;
++
++ next = (wq->head + wq->completions) % wq->size;
++
++ if (unlikely(next == wq->tail))
++ return -ENOBUFS;
++
++ *wr = ibscif_get_wr(wq, next);
++ }
++
++ return 0;
++}
++
++static int ibscif_loopback_send(struct ibscif_wq *sq, struct ibscif_wq *rq, struct ibscif_wr *send_wr)
++{
++ struct ibscif_wr *recv_wr;
++ int err;
++
++ spin_lock_bh(&rq->lock);
++
++ err = ibscif_validate_wq(rq, &recv_wr, 0);
++ if (unlikely(err))
++ goto out;
++
++ if (likely(send_wr->length)) {
++ if (unlikely(send_wr->length > recv_wr->length)) {
++ err = -EMSGSIZE;
++ goto out;
++ }
++
++ err = ibscif_dscopy(recv_wr->ds_list, send_wr->ds_list, send_wr->length);
++ if (unlikely(err))
++ goto out;
++ }
++
++ err = ibscif_complete_rq_wr(rq, recv_wr, send_wr, IB_WC_SUCCESS);
++out:
++ spin_unlock_bh(&rq->lock);
++
++ return err;
++}
++
++static int ibscif_loopback_write(struct ibscif_wq *sq, struct ibscif_wq *rq, struct ibscif_wr *write_wr)
++{
++ struct ibscif_wr *recv_wr = NULL;
++ struct ibscif_mr *dst_mr = ERR_PTR(-ENOENT);
++ int err;
++
++ spin_lock_bh(&rq->lock);
++
++ err = ibscif_validate_wq(rq, ((enum ib_wr_opcode)write_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) ?
++ &recv_wr : NULL, IB_ACCESS_REMOTE_WRITE);
++ if (unlikely(err))
++ goto out;
++
++ if (likely(write_wr->length)) {
++ struct ibscif_ds dst_ds;
++
++ dst_mr = ibscif_validate_mr(write_wr->write.rkey, write_wr->write.remote_address,
++ write_wr->length, rq->qp->ibqp.pd, IB_ACCESS_REMOTE_WRITE);
++ if (unlikely(IS_ERR(dst_mr))) {
++ err = PTR_ERR(dst_mr);
++ goto out;
++ }
++
++ dst_ds.mr = dst_mr;
++ dst_ds.offset = write_wr->write.remote_address - dst_mr->addr;
++ dst_ds.length = write_wr->length;
++ dst_ds.lkey = dst_mr->ibmr.lkey;
++
++ err = ibscif_dscopy(&dst_ds, write_wr->ds_list, dst_ds.length);
++ if (unlikely(err))
++ goto out;
++ } else
++ err = 0;
++
++ if (recv_wr)
++ err = ibscif_complete_rq_wr(rq, recv_wr, write_wr, IB_WC_SUCCESS);
++out:
++ if (likely(!IS_ERR(dst_mr)))
++ ibscif_put_mr(dst_mr);
++
++ spin_unlock_bh(&rq->lock);
++
++ return err;
++}
++
++static int ibscif_loopback_read(struct ibscif_wq *sq, struct ibscif_wq *iq, struct ibscif_wr *read_wr)
++{
++ struct ibscif_mr *src_mr = ERR_PTR(-ENOENT);
++ int err;
++
++ spin_lock_bh(&iq->lock);
++
++ err = ibscif_validate_wq(iq, NULL, IB_ACCESS_REMOTE_READ);
++ if (unlikely(err))
++ goto out;
++
++ if (!iq->size) {
++ err = -ENOBUFS;
++ goto out;
++ }
++
++ if (likely(read_wr->read.remote_length)) {
++ struct ibscif_ds src_ds;
++
++ src_mr = ibscif_validate_mr(read_wr->read.rkey, read_wr->read.remote_address,
++ read_wr->read.remote_length, iq->qp->ibqp.pd,
++ IB_ACCESS_REMOTE_READ);
++ if (unlikely(IS_ERR(src_mr))) {
++ err = PTR_ERR(src_mr);
++ goto out;
++ }
++
++ src_ds.mr = src_mr;
++ src_ds.offset = read_wr->read.remote_address - src_mr->addr;
++ src_ds.length = read_wr->read.remote_length;
++ src_ds.lkey = src_mr->ibmr.lkey;
++
++ err = ibscif_dscopy(read_wr->ds_list, &src_ds, src_ds.length);
++ } else
++ err = 0;
++out:
++ if (likely(!IS_ERR(src_mr)))
++ ibscif_put_mr(src_mr);
++
++ spin_unlock_bh(&iq->lock);
++
++ atomic_dec(&sq->qp->or_posted);
++
++ return err;
++}
++
++static int ibscif_loopback_atomic(struct ibscif_wq *sq, struct ibscif_wq *iq, struct ibscif_wr *atomic_wr)
++{
++ struct ibscif_mr *src_mr = ERR_PTR(-ENOENT);
++ struct ibscif_ds src_ds;
++ struct page *src_page;
++ u64 *src_addr, addr;
++ u32 src_offset, rkey;
++ int err;
++
++ if ((enum ib_wr_opcode)atomic_wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
++ addr = atomic_wr->cmp_swp.remote_address;
++ rkey = atomic_wr->cmp_swp.rkey;
++ } else {
++ addr = atomic_wr->fetch_add.remote_address;
++ rkey = atomic_wr->fetch_add.rkey;
++ }
++
++ spin_lock_bh(&iq->lock);
++
++ err = ibscif_validate_wq(iq, NULL, IB_ACCESS_REMOTE_ATOMIC);
++ if (unlikely(err))
++ goto out;
++
++ if (!iq->size) {
++ err = -ENOBUFS;
++ goto out;
++ }
++
++ src_mr = ibscif_validate_mr(rkey, addr, sizeof atomic_wr->atomic_rsp.orig_data,
++ iq->qp->ibqp.pd, IB_ACCESS_REMOTE_ATOMIC);
++ if (unlikely(IS_ERR(src_mr))) {
++ err = PTR_ERR(src_mr);
++ goto out;
++ }
++
++ /* Build a source data segment to copy the original data. */
++ src_ds.mr = src_mr;
++ src_ds.offset = addr - src_mr->addr;
++ src_ds.length = sizeof atomic_wr->atomic_rsp.orig_data;
++ src_ds.lkey = src_mr->ibmr.lkey;
++
++ /* Determine which page to map. */
++ src_offset = src_ds.offset + (src_mr->addr & ~PAGE_MASK);
++ src_page = src_mr->page[src_offset >> PAGE_SHIFT];
++ src_offset &= ~PAGE_MASK;
++
++ /* Lock to perform the atomic operation atomically. */
++ spin_lock_bh(&iq->qp->dev->atomic_op);
++
++ /* Copy the original data; this handles any ds_list crossing. */
++ err = ibscif_dscopy(atomic_wr->ds_list, &src_ds, sizeof atomic_wr->atomic_rsp.orig_data);
++ if (likely(!err)) {
++ src_addr = ibscif_map_src(src_page) + src_offset;
++ if ((enum ib_wr_opcode)atomic_wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
++ *src_addr += atomic_wr->fetch_add.add_operand;
++ else if (*src_addr == atomic_wr->cmp_swp.cmp_operand)
++ *src_addr = atomic_wr->cmp_swp.swp_operand;
++ ibscif_unmap_src(src_page, src_addr);
++ }
++
++ /* Atomic operation is complete. */
++ spin_unlock_bh(&iq->qp->dev->atomic_op);
++out:
++ if (likely(!IS_ERR(src_mr)))
++ ibscif_put_mr(src_mr);
++
++ spin_unlock_bh(&iq->lock);
++
++ atomic_dec(&sq->qp->or_posted);
++
++ return err;
++}
++
++void ibscif_loopback_disconnect(struct ibscif_qp *qp, enum ibscif_reason reason)
++{
++ struct ibscif_qp *remote_qp;
++
++ remote_qp = ibscif_get_qp(qp->remote_qpn);
++ if (unlikely(IS_ERR(remote_qp)))
++ return;
++
++ /* Don't bother if the SQ is connected to the RQ on the same QP. */
++ if (remote_qp != qp)
++ ibscif_qp_remote_disconnect(remote_qp, reason);
++
++ ibscif_put_qp(remote_qp);
++}
++
++/*
++ * Loopback QPs connected through the same MAC address.
++ * This includes an SQ connected to the RQ on the same QP.
++ */
++void ibscif_loopback(struct ibscif_wq *sq)
++{
++ struct ibscif_wq *rq, *iq;
++ struct ibscif_qp *remote_qp;
++ struct ibscif_wr *wr;
++ int status = 0, err = 0;
++
++ BUG_ON(!is_sq(sq));
++
++again:
++ remote_qp = ibscif_get_qp(sq->qp->remote_qpn);
++ if (unlikely(IS_ERR(remote_qp))) {
++ ibscif_qp_remote_disconnect(sq->qp, IBSCIF_REASON_INVALID_QP);
++ return;
++ }
++ rq = &remote_qp->rq;
++ iq = &remote_qp->iq;
++
++ DEV_STAT(sq->qp->dev, loopback++);
++
++ spin_lock_bh(&sq->lock);
++ for (wr = ibscif_get_wr(sq, sq->next_wr);
++ (sq->next_wr != sq->tail) && !err;
++ sq->next_wr = (sq->next_wr + 1) % sq->size) {
++
++ switch (wr->opcode) {
++
++ case WR_SEND:
++ case WR_SEND_WITH_IMM:
++ status = ibscif_loopback_send(sq, rq, wr);
++ break;
++ case WR_RDMA_WRITE:
++ case WR_RDMA_WRITE_WITH_IMM:
++ status = ibscif_loopback_write(sq, rq, wr);
++ break;
++ case WR_RDMA_READ:
++ status = ibscif_loopback_read(sq, iq, wr);
++ break;
++ case WR_ATOMIC_CMP_AND_SWP:
++ case WR_ATOMIC_FETCH_AND_ADD:
++ status = ibscif_loopback_atomic(sq, iq, wr);
++ break;
++ default:
++ status = -ENOSYS;
++ break;
++ }
++
++ if (likely(!status)) {
++ err = ibscif_complete_sq_wr(sq, wr, IB_WC_SUCCESS);
++
++ spin_unlock_bh(&sq->lock);
++ ibscif_notify_cq(to_cq(sq->qp->ibqp.send_cq));
++ ibscif_notify_cq(to_cq(remote_qp->ibqp.recv_cq));
++ spin_lock_bh(&sq->lock);
++ } else
++ break;
++ }
++ spin_unlock_bh(&sq->lock);
++
++ if (unlikely(status) && status != -ENOBUFS)
++ ibscif_qp_remote_disconnect(sq->qp, IBSCIF_REASON_QP_FATAL);
++ else if (unlikely(err))
++ ibscif_qp_internal_disconnect(sq->qp, IBSCIF_REASON_QP_FATAL);
++
++ ibscif_put_qp(remote_qp);
++
++ if (status == -ENOBUFS) {
++ schedule();
++ goto again;
++ }
++}
+diff --git a/drivers/infiniband/hw/scif/ibscif_main.c b/drivers/infiniband/hw/scif/ibscif_main.c
+new file mode 100644
+index 0000000..96d4808
+--- /dev/null
++++ b/drivers/infiniband/hw/scif/ibscif_main.c
+@@ -0,0 +1,332 @@
++/*
++ * Copyright (c) 2008 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include "ibscif_driver.h"
++
++static const char ibscif_signon[] = DRV_SIGNON;
++
++MODULE_AUTHOR("Intel Corporation");
++MODULE_LICENSE("Dual BSD/GPL");
++MODULE_DESCRIPTION(DRV_DESC);
++MODULE_VERSION(DRV_VERSION);
++
++#define MODULE_PARAM(type, name, value, desc) \
++ type name = value; \
++ module_param(name, type, 0664); \
++ MODULE_PARM_DESC(name, desc)
++
++#define MODULE_ARRAY(name, size, value, desc) \
++ unsigned int name##_argc; \
++ char *name[size] = { [0 ... size-1] = value }; \
++ module_param_array(name, charp, &name##_argc, 0644); \
++ MODULE_PARM_DESC(name, desc)
++
++#define DEFAULT_MAX_PINNED 50
++MODULE_PARAM(int, max_pinned, DEFAULT_MAX_PINNED,
++ "Maximum percent of physical memory that may be pinned");
++
++#define DEFAULT_WINDOW_SIZE 40
++MODULE_PARAM(int, window_size, DEFAULT_WINDOW_SIZE,
++ "Maximum number of outstanding unacknowledged packets");
++
++#define DEFAULT_RMA_THRESHOLD 1024
++MODULE_PARAM(int, rma_threshold, DEFAULT_RMA_THRESHOLD,
++ "Maximum message size sent through scif_send()");
++
++MODULE_PARAM(int, fast_rdma, 1,
++ "Use scif_writeto()/scif_readfrom() directly for RDMA write/read");
++
++MODULE_PARAM(int, blocking_send, 0,
++ "Use blocking version of scif_send()");
++
++MODULE_PARAM(int, blocking_recv, 1,
++ "Use blocking version of scif_recv()");
++
++MODULE_PARAM(int, scif_loopback, 1,
++ "Use SCIF lookback instead of kernel copy based loopback");
++
++MODULE_PARAM(int, host_proxy, 0,
++ "Proxy card side RDMA operations to host");
++
++MODULE_PARAM(int, new_ib_type, 1,
++ "Use new transport type dedicated to IBSCIF");
++
++MODULE_PARAM(int, verbose, 0,
++ "Produce more log info for debugging purpose");
++
++MODULE_PARAM(int, check_grh, 1,
++ "Detect outside-box connection by checking the global routing header");
++
++static atomic_t avail_pages; /* Calculated from max_pinned and totalram_pages */
++
++LIST_HEAD(devlist);
++DECLARE_MUTEX(devlist_mutex);
++
++DEFINE_IDR(wiremap);
++DEFINE_RWLOCK(wiremap_lock);
++static u32 reserved_0 = 0;
++
++void ibscif_dump(char *str, unsigned char* buf, int len)
++{
++ unsigned char *p, tmp[(16*3)+1];
++ int i;
++ return;
++ len = len > 64 ? 64 : len;
++ while (len) {
++ p = tmp;
++ for (i = len > 16 ? 16 : len; i; i--, len--)
++ p += sprintf(p, "%2x ", *buf++);
++ printk("(%d)%s: %s\n", smp_processor_id(), str, tmp);
++ }
++}
++
++int ibscif_reserve_quota(int *npages)
++{
++ int c, old, err;
++
++ if (!*npages)
++ return 0;
++
++ err = 0;
++ c = atomic_read(&avail_pages);
++ for (;;) {
++ if (unlikely(c < *npages))
++ break;
++ old = atomic_cmpxchg(&avail_pages, c, c - *npages);
++ if (likely(old == c))
++ break;
++ c = old;
++ }
++
++ if (c < *npages) {
++ *npages = 0;
++ err = -EDQUOT;
++ }
++
++ return err;
++}
++
++void ibscif_release_quota(int npages)
++{
++ if (npages)
++ atomic_add(npages, &avail_pages);
++}
++
++/*
++ * To work around MPI's assumptions that data is written atomically in their
++ * header structures, write the first 16 integers of a transfer atomically.
++ *
++ * Update: the assumption of MPI's ofa module is different in that the last
++ * four bytes needs to be written last and atomically. The buffers used in
++ * this case is always aligned.
++ */
++int ibscif_atomic_copy(void *dst_addr, void *src_addr, u32 copy_len, int head_copied)
++{
++ volatile int *src_x = (int *)src_addr;
++ volatile int *dst_x = (int *)dst_addr;
++ volatile u8 *src_c, *dst_c;
++ int head_aligned, tail_aligned;
++
++ if (unlikely(!copy_len))
++ return head_copied;
++
++ head_aligned = !((unsigned long)src_addr & (sizeof(int)-1)) &&
++ !((unsigned long)dst_addr & (sizeof(int)-1));
++
++
++ tail_aligned = !((unsigned long)(src_addr+copy_len) & (sizeof(int)-1)) &&
++ !((unsigned long)(dst_addr+copy_len) & (sizeof(int)-1));
++
++ if (!head_copied && head_aligned) {
++
++ switch (copy_len) {
++ case sizeof(int):
++ *dst_x = *src_x;
++ goto done;
++ case sizeof(int)*2:
++ *dst_x++ = *src_x++;
++ *dst_x = *src_x;
++ goto done;
++ case sizeof(int)*3:
++ *dst_x++ = *src_x++;
++ *dst_x++ = *src_x++;
++ *dst_x = *src_x;
++ goto done;
++ default:
++ if (copy_len >= (sizeof(int)*4)) {
++ /* We have at least a whole header to copy. */
++ head_copied = 1;
++ copy_len -= sizeof(int)*4;
++
++ *dst_x++ = *src_x++;
++ *dst_x++ = *src_x++;
++ *dst_x++ = *src_x++;
++
++ if (copy_len == 0) {
++ *dst_x = *src_x;
++ goto done;
++ }
++ *dst_x++ = *src_x++;
++ }
++ break;
++ }
++ }
++
++ /* The last integer is aligned. Copy all but the last int, then the last int */
++ if (tail_aligned && copy_len >= sizeof(int)) {
++ copy_len -= sizeof(int);
++ if (copy_len)
++ memcpy((void *)dst_x, (void *)src_x, copy_len);
++ smp_wmb();
++ src_x = (volatile int *)((char *)src_x + copy_len);
++ dst_x = (volatile int *)((char *)dst_x + copy_len);
++ *dst_x = *src_x;
++ goto done;
++ }
++
++ /* Bad alignment. Copy all but the last byte, then the last byte */
++ if (--copy_len)
++ memcpy((void *)dst_x, (void *)src_x, copy_len);
++
++ src_c = ((volatile u8 *)src_x) + copy_len;
++ dst_c = ((volatile u8 *)dst_x) + copy_len;
++ smp_wmb();
++ *dst_c = *src_c;
++done:
++ return head_copied;
++}
++
++int ibscif_wiremap_add(void *obj, int *id)
++{
++ int ret;
++
++ write_lock_bh(&wiremap_lock);
++ ret = idr_alloc(&wiremap, obj, 0, 0, GFP_ATOMIC);
++ write_unlock_bh(&wiremap_lock);
++
++ if (ret < 0)
++ return ret;
++
++ *id = ret;
++
++ return 0;
++}
++
++void ibscif_wiremap_del(int id)
++{
++ write_lock_bh(&wiremap_lock);
++ idr_remove(&wiremap, id);
++ write_unlock_bh(&wiremap_lock);
++}
++
++static int ibscif_init_wiremap(void)
++{
++ /*
++ * Instead of treating them as opaque, some applications assert that returned key
++ * values are non-zero. As a work-around, reserve the first key from the wiremap.
++ */
++ int ret = idr_alloc(&wiremap, &reserved_0, 0, 1, GFP_KERNEL);
++ BUG_ON(reserved_0 != 0);
++ return ret;
++}
++
++static void ibscif_free_wiremap(void)
++{
++ idr_destroy(&wiremap);
++}
++
++static void ibscif_init_params(void)
++{
++ if ((max_pinned <= 0) || (max_pinned > 100)) {
++ max_pinned = DEFAULT_MAX_PINNED;
++ printk(KERN_WARNING PFX "Corrected max_pinned module parameter to %d.\n",
++ max_pinned);
++ }
++ if (window_size < MIN_WINDOW_SIZE) {
++ window_size = MIN_WINDOW_SIZE;
++ printk(KERN_WARNING PFX "Corrected window_size module parameter to %d.\n",
++ window_size);
++ }
++ if (rma_threshold < 0) {
++ rma_threshold = 0x7FFFFFFF;
++ printk(KERN_WARNING PFX "Corrected rma_threshold module parameter to %d.\n",
++ rma_threshold);
++ }
++
++ /*
++ * Hardware RDMA devices have built-in limits on the number of registered pages.
++ * The avail_pages variable provides a limit for this software device.
++ */
++ atomic_set(&avail_pages, max_pinned * (totalram_pages / 100));
++}
++
++static int __init ibscif_init(void)
++{
++ int err;
++
++ printk(KERN_INFO PFX "%s\n", ibscif_signon);
++ printk(KERN_INFO PFX "max_pinned=%d, window_size=%d, "
++ "blocking_send=%d, blocking_recv=%d, "
++ "fast_rdma=%d, "
++ "host_proxy=%d, "
++ "rma_threshold=%d, scif_loopback=%d, "
++ "new_ib_type=%d, verbose=%d, "
++ "check_grh=%d\n",
++ max_pinned, window_size,
++ blocking_send, blocking_recv,
++ fast_rdma,
++ host_proxy,
++ rma_threshold, scif_loopback,
++ new_ib_type, verbose,
++ check_grh);
++
++ ibscif_init_params();
++
++ err = ibscif_init_wiremap();
++ if (err)
++ return err;
++
++ err = ibscif_dev_init();
++ if (!err)
++ return 0;
++
++ ibscif_free_wiremap();
++ return err;
++}
++
++static void __exit ibscif_exit(void)
++{
++ ibscif_dev_cleanup();
++ ibscif_free_wiremap();
++ printk(KERN_INFO PFX "unloaded\n");
++}
++
++module_init(ibscif_init);
++module_exit(ibscif_exit);
+diff --git a/drivers/infiniband/hw/scif/ibscif_mr.c b/drivers/infiniband/hw/scif/ibscif_mr.c
+new file mode 100644
+index 0000000..172b368
+--- /dev/null
++++ b/drivers/infiniband/hw/scif/ibscif_mr.c
+@@ -0,0 +1,486 @@
++/*
++ * Copyright (c) 2008 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include <linux/vmalloc.h>
++
++#include "ibscif_driver.h"
++
++static int ibscif_mr_init_mreg(struct ibscif_mr *mr);
++
++struct ib_mr *ibscif_get_dma_mr(struct ib_pd *ibpd, int access)
++{
++ struct ibscif_dev *dev = to_dev(ibpd->device);
++ struct ibscif_mr *mr;
++ int err;
++
++ if (!atomic_add_unless(&dev->mr_cnt, 1, MAX_MRS))
++ return ERR_PTR(-EAGAIN);
++
++ mr = kzalloc(sizeof *mr, GFP_KERNEL);
++ if (!mr) {
++ err = -ENOMEM;
++ printk(KERN_ALERT PFX "%s: unable to allocate mr.\n", __func__);
++ goto out1;
++ }
++
++ kref_init(&mr->ref);
++ init_completion(&mr->done);
++
++ err = ibscif_wiremap_add(mr, &mr->ibmr.lkey);
++ if (err) {
++ printk(KERN_ALERT PFX "%s: unable to allocate lkey.\n", __func__);
++ goto out2;
++ }
++
++ if (mr->ibmr.lkey > IBSCIF_MR_MAX_KEY) {
++ err = -ENOSPC;
++ printk(KERN_ALERT PFX "%s: lkey (%x) out of range.\n", __func__, mr->ibmr.lkey);
++ goto out3;
++ }
++
++ mr->ibmr.device = ibpd->device; /* For ibscif_dereg_mr() calls below. */
++ mr->ibmr.rkey = mr->ibmr.lkey;
++ mr->access = access;
++ mr->magic = MR_MAGIC;
++ INIT_LIST_HEAD(&mr->mreg_list);
++
++ return &mr->ibmr;
++
++out3:
++ ibscif_wiremap_del(mr->ibmr.lkey);
++out2:
++ kfree(mr);
++out1:
++ atomic_dec(&dev->mr_cnt);
++ return ERR_PTR(err);
++}
++
++struct ib_mr *ibscif_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
++ u64 virt_addr, int access, struct ib_udata *udata)
++{
++ struct ib_mr *ibmr;
++ struct ibscif_mr *mr;
++ struct scatterlist *sg;
++ struct ibscif_dev *dev;
++ int i, k, err;
++
++ if (length && ((start + length - 1) < start))
++ return ERR_PTR(-EOVERFLOW);
++
++ ibmr = ibscif_get_dma_mr(ibpd, access);
++ if (IS_ERR(ibmr))
++ return ibmr;
++
++ mr = to_mr(ibmr);
++ mr->addr = start;
++
++ mr->umem = ib_umem_get(ibpd->uobject->context, start, length, access, 0/*dma_sync*/);
++ if (IS_ERR(mr->umem)) {
++ err = PTR_ERR(mr->umem);
++ printk(KERN_ALERT PFX "%s: ib_umem_get returns %d.\n", __func__, err);
++ goto out;
++ }
++
++ mr->npages = ib_umem_page_count(mr->umem);
++ if (!mr->npages)
++ return &mr->ibmr;
++
++ mr->length = mr->umem->length;
++
++ err = ibscif_reserve_quota(&mr->npages);
++ if (err)
++ goto out;
++
++ mr->page = vzalloc(mr->npages * sizeof *mr->page);
++ if (!mr->page) {
++ err = -ENOMEM;
++ printk(KERN_ALERT PFX "%s: unable to allocate mr->page.\n", __func__);
++ goto out;
++ }
++
++ k = 0;
++ for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, i)
++ mr->page[k++] = sg_page(sg);
++
++ err = ibscif_mr_init_mreg(mr);
++ if (err)
++ goto out;
++
++ dev = to_dev(mr->ibmr.device);
++ down(&dev->mr_list_mutex);
++ list_add_tail(&mr->entry, &dev->mr_list);
++ up(&dev->mr_list_mutex);
++
++ return &mr->ibmr;
++out:
++ ibscif_dereg_mr(ibmr);
++ return ERR_PTR(err);
++}
++
++void ibscif_complete_mr(struct kref *ref)
++{
++ struct ibscif_mr *mr = container_of(ref, struct ibscif_mr, ref);
++ complete(&mr->done);
++}
++
++int ibscif_dereg_mr(struct ib_mr *ibmr)
++{
++ struct ibscif_dev *dev = to_dev(ibmr->device);
++ struct ibscif_mr *mr = to_mr(ibmr);
++ struct ibscif_mreg_info *mreg, *next;
++ struct ibscif_mr *mr0, *next0;
++ int ret;
++
++ ibscif_put_mr(mr);
++ wait_for_completion(&mr->done);
++
++ list_for_each_entry_safe(mreg, next, &mr->mreg_list, entry) {
++ do {
++ ret = scif_unregister(mreg->conn->ep, mreg->aligned_offset, mreg->aligned_length);
++ }
++ while (ret == -ERESTARTSYS);
++
++ if (ret && ret != -ENOTCONN)
++ printk(KERN_ALERT PFX "%s: scif_unregister returns %d. ep=%p, offset=%llx, length=%x\n",
++ __func__, ret, mreg->conn->ep, mreg->aligned_offset, mreg->aligned_length);
++
++ ibscif_put_conn(mreg->conn);
++ list_del(&mreg->entry);
++ kfree(mreg);
++ }
++
++ down(&dev->mr_list_mutex);
++ list_for_each_entry_safe(mr0, next0, &dev->mr_list, entry) {
++ if (mr0 == mr) {
++ list_del(&mr0->entry);
++ break;
++ }
++ }
++ up(&dev->mr_list_mutex);
++
++ if (mr->pinned_pages)
++ scif_unpin_pages(mr->pinned_pages);
++
++ if (mr->umem && !IS_ERR(mr->umem))
++ ib_umem_release(mr->umem);
++ if (mr->page)
++ vfree(mr->page);
++
++ ibscif_release_quota(mr->npages);
++ atomic_dec(&dev->mr_cnt);
++
++ ibscif_wiremap_del(mr->ibmr.lkey);
++
++ kfree(mr);
++ return 0;
++}
++
++/*
++ * Lookup and validate the given memory region access. A reference is held on success.
++ */
++struct ibscif_mr *ibscif_validate_mr(u32 key, u64 addr, int length,
++ struct ib_pd *ibpd, enum ib_access_flags access)
++{
++ struct ibscif_mr *mr;
++ int err;
++
++ mr = ibscif_get_mr(key);
++ if (unlikely(IS_ERR(mr)))
++ return mr;
++
++ if (unlikely(mr->ibmr.pd != ibpd)) {
++ err = -EPERM;
++ goto out;
++ }
++ if (unlikely(access && !(mr->access & access))) {
++ err = -EACCES;
++ goto out;
++ }
++ if (unlikely((addr < mr->addr) || ((addr + length) > (mr->addr + mr->length)))) {
++ err = -ERANGE;
++ goto out;
++ }
++
++ return mr;
++out:
++ ibscif_put_mr(mr);
++ return ERR_PTR(err);
++}
++
++static void ibscif_dma_nop(struct ib_device *ibdev, u64 addr, size_t size, enum dma_data_direction direction)
++{
++}
++
++static int ibscif_mapping_error(struct ib_device *ibdev, u64 dma_addr)
++{
++ return !dma_addr;
++}
++
++static u64 ibscif_dma_map_single(struct ib_device *ibdev, void *cpu_addr, size_t size,
++ enum dma_data_direction direction)
++{
++ return (u64)cpu_addr;
++}
++
++static u64 ibscif_dma_map_page(struct ib_device *ibdev, struct page *page, unsigned long offset, size_t size,
++ enum dma_data_direction direction)
++{
++ u64 addr;
++
++ if (offset + size > PAGE_SIZE)
++ return 0;
++
++ addr = (u64)page_address(page);
++ if (addr)
++ addr += offset;
++
++ return addr;
++}
++
++static int ibscif_map_sg(struct ib_device *ibdev, struct scatterlist *sg, int nents,
++ enum dma_data_direction direction)
++{
++ u64 addr;
++ int i, ret = nents;
++
++ for (i = 0; i < nents; i++, sg++) {
++ addr = (u64)page_address(sg_page(sg));
++ if (!addr) {
++ ret = 0;
++ break;
++ }
++
++ sg->dma_address = sg->offset + addr;
++ sg->dma_length = sg->length;
++ }
++ return ret;
++}
++
++static void ibscif_unmap_sg(struct ib_device *ibdev, struct scatterlist *sg, int nents,
++ enum dma_data_direction direction)
++{
++}
++
++static void ibscif_sync_single(struct ib_device *ibdev, u64 dma, size_t size,
++ enum dma_data_direction direction)
++{
++}
++
++static void *ibscif_dma_alloc_coherent(struct ib_device *ibdev, size_t size, u64 *dma_handle, gfp_t flag)
++{
++ struct page *p = alloc_pages(flag, get_order(size));
++ void *addr = p ? page_address(p) : NULL;
++
++ if (dma_handle)
++ *dma_handle = (u64)addr;
++
++ return addr;
++}
++
++static void ibscif_dma_free_coherent(struct ib_device *ibdev, size_t size, void *cpu_addr, u64 dma_handle)
++{
++ free_pages((unsigned long)cpu_addr, get_order(size));
++}
++
++struct ib_dma_mapping_ops ibscif_dma_mapping_ops = {
++ ibscif_mapping_error,
++ ibscif_dma_map_single,
++ ibscif_dma_nop,
++ ibscif_dma_map_page,
++ ibscif_dma_nop,
++ ibscif_map_sg,
++ ibscif_unmap_sg,
++ ibscif_sync_single,
++ ibscif_sync_single,
++ ibscif_dma_alloc_coherent,
++ ibscif_dma_free_coherent
++};
++
++static void ibscif_dump_mr_list( struct ibscif_dev *dev )
++{
++ struct ibscif_mr *mr;
++
++ list_for_each_entry(mr, &dev->mr_list, entry){
++ printk(KERN_ALERT PFX "%s: mr=%p [%llx, %x, %x]\n", __func__, mr, mr->addr, mr->length, mr->ibmr.rkey);
++ }
++}
++
++static int ibscif_mr_reg_with_conn(struct ibscif_mr *mr, struct ibscif_conn *conn, struct ibscif_mreg_info **new_mreg)
++{
++ struct ibscif_mreg_info *mreg;
++ off_t offset, aligned_offset;
++ u64 aligned_addr;
++ int aligned_length;
++ int offset_in_page;
++ int err;
++
++ aligned_addr = mr->addr & PAGE_MASK;
++ offset_in_page = (int)(mr->addr & ~PAGE_MASK);
++ aligned_length = (mr->length + offset_in_page + PAGE_SIZE - 1) & PAGE_MASK;
++ aligned_offset = IBSCIF_MR_VADDR_TO_OFFSET(mr->ibmr.rkey, aligned_addr);
++
++ offset = scif_register_pinned_pages(conn->ep, mr->pinned_pages, aligned_offset, SCIF_MAP_FIXED);
++
++ if (IS_ERR_VALUE(offset)) {
++ printk(KERN_ALERT PFX "%s: scif_register_pinned_pages returns %d\n", __func__, (int)offset);
++ printk(KERN_ALERT PFX "%s: conn=%p, ep=%p, mr=%p, addr=%llx, length=%x, rkey=%x, "
++ "aligned_addr=%llx, aligned_length=%x, aligned_offset=%llx\n",
++ __func__, conn, conn->ep, mr, mr->addr, mr->length, mr->ibmr.rkey,
++ aligned_addr, aligned_length, (uint64_t)aligned_offset);
++ ibscif_dump_mr_list(conn->dev);
++ return (int)offset;
++ }
++
++ BUG_ON(offset != aligned_offset);
++
++ offset += offset_in_page;
++
++ mreg = kzalloc(sizeof(struct ibscif_mreg_info), GFP_KERNEL);
++ if (!mreg) {
++ do {
++ err = scif_unregister(conn->ep, aligned_offset, aligned_length);
++ }
++ while (err == -ERESTARTSYS);
++
++ if (err && err != -ENOTCONN)
++ printk(KERN_ALERT PFX "%s: scif_unregister returns %d. ep=%p, offset=%llx, length=%x\n",
++ __func__, err, conn->ep, (uint64_t)aligned_offset, aligned_length);
++
++ return -ENOMEM;
++ }
++ mreg->conn = conn;
++ mreg->offset = (u64)offset;
++ mreg->aligned_offset = aligned_offset;
++ mreg->aligned_length = aligned_length;
++ list_add_tail(&mreg->entry, &mr->mreg_list);
++
++ atomic_inc(&conn->refcnt);
++ if (conn->local_close) {
++ conn->local_close = 0;
++ ibscif_send_reopen(conn);
++ }
++
++ if (new_mreg)
++ *new_mreg = mreg;
++
++ return 0;
++}
++
++struct ibscif_mreg_info *ibscif_mr_get_mreg(struct ibscif_mr *mr, struct ibscif_conn *conn)
++{
++ struct ibscif_mreg_info *mreg;
++ int err;
++ int i;
++
++ if (unlikely(!conn)) {
++ printk(KERN_ALERT PFX "%s: conn==NULL\n", __func__);
++ return NULL;
++ }
++
++ list_for_each_entry(mreg, &mr->mreg_list, entry){
++ if (mreg->conn == conn)
++ return mreg;
++ }
++
++ mreg = NULL;
++ err = ibscif_mr_reg_with_conn(mr, conn, &mreg);
++ if (err != -EADDRINUSE)
++ return mreg;
++
++ /* another thread is performing the registration */
++ if (verbose)
++ printk(KERN_INFO PFX "%s: mr is being registered by another thread. mr=%p, conn=%p.\n", __func__, mr, conn);
++ for (i=0; i<10000; i++) {
++ list_for_each_entry(mreg, &mr->mreg_list, entry){
++ if (mreg->conn == conn) {
++ if (verbose)
++ printk(KERN_INFO PFX "%s: got mreg after %d retries.\n", __func__, i+1);
++ return mreg;
++ }
++ }
++ schedule();
++ }
++ if (verbose)
++ printk(KERN_INFO PFX "%s: failed to get mreg after %d retries.\n", __func__, i);
++ return NULL;
++}
++
++static int ibscif_mr_init_mreg(struct ibscif_mr *mr)
++{
++ struct ibscif_dev *dev = to_dev(mr->ibmr.device);
++ struct ibscif_conn *conn;
++ int prot;
++ u64 aligned_addr;
++ int aligned_length;
++ int offset_in_page;
++ int err;
++
++ aligned_addr = mr->addr & PAGE_MASK;
++ offset_in_page = (int)(mr->addr & ~PAGE_MASK);
++ aligned_length = (mr->length + offset_in_page + PAGE_SIZE - 1) & PAGE_MASK;
++
++#if 0
++ prot = ((mr->access & IB_ACCESS_REMOTE_READ)?SCIF_PROT_READ:0) |
++ ((mr->access & IB_ACCESS_REMOTE_WRITE)?SCIF_PROT_WRITE:0);
++#else
++ // In IB, the same buffer can be registered multiple times with different access rights.
++ // SCIF doesn't have mechanism to support that. So we just turn on all the access rights.
++ // Otherwise we may end up with protection error.
++ prot = SCIF_PROT_READ | SCIF_PROT_WRITE;
++#endif
++
++ err = scif_pin_pages((void *)aligned_addr, aligned_length, prot, 0/*user addr*/, &mr->pinned_pages);
++ if (err) {
++ printk(KERN_ALERT PFX "%s: scif_pin_pages returns %d\n", __func__, err);
++ return err;
++ }
++
++ down(&dev->mutex);
++ list_for_each_entry(conn, &dev->conn_list, entry) {
++ err = ibscif_mr_reg_with_conn(mr, conn, NULL);
++ if (err)
++ break;
++ }
++ up(&dev->mutex);
++
++ return err;
++}
++
++void ibscif_refresh_mreg( struct ibscif_conn *conn )
++{
++ struct ibscif_mr *mr;
++
++ down(&conn->dev->mr_list_mutex);
++ list_for_each_entry(mr, &conn->dev->mr_list, entry){
++ ibscif_mr_get_mreg(mr, conn);
++ }
++ up(&conn->dev->mr_list_mutex);
++}
++
+diff --git a/drivers/infiniband/hw/scif/ibscif_pd.c b/drivers/infiniband/hw/scif/ibscif_pd.c
+new file mode 100644
+index 0000000..a5682cf
+--- /dev/null
++++ b/drivers/infiniband/hw/scif/ibscif_pd.c
+@@ -0,0 +1,56 @@
++/*
++ * Copyright (c) 2008 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include "ibscif_driver.h"
++
++struct ib_pd *ibscif_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata)
++{
++ struct ibscif_dev *dev = to_dev(ibdev);
++ struct ibscif_pd *pd;
++
++ if (!atomic_add_unless(&dev->pd_cnt, 1, MAX_PDS))
++ return ERR_PTR(-EAGAIN);
++
++ pd = kzalloc(sizeof *pd, GFP_KERNEL);
++ if (!pd) {
++ atomic_dec(&dev->pd_cnt);
++ return ERR_PTR(-ENOMEM);
++ }
++
++ return &pd->ibpd;
++}
++
++int ibscif_dealloc_pd(struct ib_pd *ibpd)
++{
++ struct ibscif_dev *dev = to_dev(ibpd->device);
++ atomic_dec(&dev->pd_cnt);
++ kfree(to_pd(ibpd));
++ return 0;
++}
+diff --git a/drivers/infiniband/hw/scif/ibscif_post.c b/drivers/infiniband/hw/scif/ibscif_post.c
+new file mode 100644
+index 0000000..900f75f
+--- /dev/null
++++ b/drivers/infiniband/hw/scif/ibscif_post.c
+@@ -0,0 +1,320 @@
++/*
++ * Copyright (c) 2008 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include "ibscif_driver.h"
++
++void ibscif_dump_sg(char *str, struct ib_sge *sge, int num)
++{
++ extern void ibscif_dump(char*, void*, int);
++ if (!sge)
++ return;
++ while (num--) {
++ ibscif_dump(str, (void*)sge->addr, sge->length);
++ sge++;
++ }
++}
++
++/*
++ * Build and validate the wr->ds_list from the given sg_list.
++ * If successful, a reference is held on each mr in the wr->ds_list.
++ */
++static int ibscif_wr_ds(struct ib_pd *ibpd, struct ib_sge *sg_list, int num_sge,
++ struct ibscif_wr *wr, int *total_length, enum ib_access_flags access)
++{
++ struct ibscif_ds *ds_list = wr->ds_list;
++ int err;
++
++ *total_length = 0;
++ for (wr->num_ds = 0; wr->num_ds < num_sge; sg_list++, ds_list++) {
++
++ ds_list->mr = ibscif_validate_mr(sg_list->lkey, sg_list->addr, sg_list->length, ibpd, access);
++ if (unlikely(IS_ERR(ds_list->mr))) {
++ err = PTR_ERR(ds_list->mr);
++ goto out;
++ }
++
++ ds_list->in_use = 1;
++ wr->num_ds++;
++
++ if (unlikely((*total_length + sg_list->length) < *total_length)) {
++ err = -EOVERFLOW;
++ goto out;
++ }
++
++ ds_list->offset = sg_list->addr - ds_list->mr->addr;
++ ds_list->length = sg_list->length;
++ ds_list->lkey = sg_list->lkey;
++ ds_list->current_mreg = NULL;
++
++ *total_length += ds_list->length;
++ }
++
++ return 0;
++out:
++ ibscif_clear_ds_refs(wr->ds_list, wr->num_ds);
++ return err;
++}
++
++int ibscif_post_send(struct ib_qp *ibqp, struct ib_send_wr *ibwr, struct ib_send_wr **bad_wr)
++{
++ struct ibscif_qp *qp = to_qp(ibqp);
++ struct ibscif_wq *sq = &qp->sq;
++ struct ibscif_wr *wr;
++ int nreq = 0, err;
++
++ IBSCIF_PERF_SAMPLE(0, 0);
++
++ spin_lock_bh(&sq->lock);
++
++ if (unlikely(ibqp->qp_type != IB_QPT_UD && qp->state != QP_CONNECTED)) {
++ err = -ENOTCONN;
++ goto out;
++ }
++ if (unlikely(!sq->size)) {
++ err = -ENOSPC;
++ goto out;
++ }
++
++ for (err = 0; ibwr; ibwr = ibwr->next, nreq++) {
++
++ if (unlikely(sq->depth == sq->size)) {
++ err = -ENOBUFS;
++ goto out;
++ }
++ if (unlikely(ibwr->num_sge > sq->max_sge)) {
++ err = -E2BIG;
++ goto out;
++ }
++
++ wr = ibscif_get_wr(sq, sq->tail);
++
++ memset(&wr->sar, 0, sizeof wr->sar);
++
++ wr->id = ibwr->wr_id;
++ wr->opcode = ibwr->opcode;
++ wr->flags = ibwr->send_flags | ((qp->sq_policy == IB_SIGNAL_ALL_WR) ? IB_SEND_SIGNALED : 0);
++ wr->state = WR_WAITING;
++ wr->use_rma = 0;
++ wr->rma_id = 0;
++
++ if (ibqp->qp_type == IB_QPT_UD) {
++ struct ib_ud_wr *udwr = ud_wr(ibwr);
++ wr->opcode = WR_UD;
++ wr->ud.remote_node_id = IBSCIF_LID_TO_NODE_ID(be16_to_cpu(to_ah(udwr->ah)->dlid));
++ wr->ud.remote_qpn = udwr->remote_qpn;
++
++ /* the remainings are the same as IB_WR_SEND */
++ err = ibscif_wr_ds(ibqp->pd, ibwr->sg_list, ibwr->num_sge, wr, &wr->length, 0);
++ if (unlikely(err))
++ goto out;
++
++ if (wr->length > IBSCIF_MTU) {
++ ibscif_clear_ds_refs(wr->ds_list, wr->num_ds);
++ err = -EMSGSIZE;
++ goto out;
++ }
++
++ wr->msg_id = sq->wirestate->tx.next_msg_id++;
++ }
++
++ else switch (ibwr->opcode) {
++
++ case IB_WR_SEND_WITH_IMM:
++ wr->send.immediate_data = ibwr->ex.imm_data;
++ case IB_WR_SEND:
++ err = ibscif_wr_ds(ibqp->pd, ibwr->sg_list, ibwr->num_sge, wr, &wr->length, 0);
++ if (unlikely(err))
++ goto out;
++ wr->msg_id = sq->wirestate->tx.next_msg_id++;
++ if (wr->length > rma_threshold) {
++ wr->use_rma = 1;
++ wr->rma_id = sq->next_msg_id;
++ }
++ break;
++
++ case IB_WR_RDMA_WRITE_WITH_IMM:
++ wr->msg_id = sq->wirestate->tx.next_msg_id++;
++ wr->write.immediate_data = ibwr->ex.imm_data;
++ case IB_WR_RDMA_WRITE:
++ {
++ struct ib_rdma_wr *rdmawr = rdma_wr(ibwr);
++ err = ibscif_wr_ds(ibqp->pd, ibwr->sg_list, ibwr->num_sge, wr, &wr->length, 0);
++ if (unlikely(err))
++ goto out;
++ if (wr->length &&
++ ((rdmawr->remote_addr + wr->length - 1) < rdmawr->remote_addr)) {
++ err = -EOVERFLOW;
++ goto out;
++ }
++ wr->write.remote_address = rdmawr->remote_addr;
++ wr->write.rkey = rdmawr->rkey;
++ if (ibwr->opcode == IB_WR_RDMA_WRITE)
++ wr->msg_id = 0;
++ if (wr->length > rma_threshold) {
++ wr->use_rma = 1;
++ wr->rma_id = sq->next_msg_id;
++ }
++ break;
++ }
++ case IB_WR_RDMA_READ:
++ {
++ struct ib_rdma_wr *rdmawr = rdma_wr(ibwr);
++ if (unlikely(!qp->max_or)) {
++ err = -ENOBUFS;
++ goto out;
++ }
++ err = ibscif_wr_ds(ibqp->pd, ibwr->sg_list, ibwr->num_sge, wr, &wr->length, IB_ACCESS_LOCAL_WRITE);
++ if (unlikely(err))
++ goto out;
++ if (wr->length &&
++ ((rdmawr->remote_addr + wr->length - 1) < rdmawr->remote_addr)) {
++ err = -EOVERFLOW;
++ goto out;
++ }
++ wr->read.remote_address = rdmawr->remote_addr;
++ wr->read.remote_length = wr->length;
++ wr->read.rkey = rdmawr->rkey;
++ wr->length = 0; /* No tx data with this opcode */
++ wr->msg_id = sq->next_msg_id;
++ atomic_inc(&qp->or_posted);
++ if (wr->read.remote_length > rma_threshold) {
++ wr->use_rma = 1;
++ wr->rma_id = wr->msg_id;
++ }
++ break;
++ }
++ case IB_WR_ATOMIC_CMP_AND_SWP:
++ case IB_WR_ATOMIC_FETCH_AND_ADD:
++ {
++ struct ib_atomic_wr *atomicwr = atomic_wr(ibwr);
++ if (unlikely(!qp->max_or)) {
++ err = -ENOBUFS;
++ goto out;
++ }
++ if (unlikely(atomicwr->remote_addr & (sizeof wr->atomic_rsp.orig_data - 1))) {
++ err = -EADDRNOTAVAIL;
++ goto out;
++ }
++ err = ibscif_wr_ds(ibqp->pd, ibwr->sg_list, ibwr->num_sge, wr, &wr->length, IB_ACCESS_LOCAL_WRITE);
++ if (unlikely(err))
++ goto out;
++ if (unlikely(wr->length < sizeof wr->atomic_rsp.orig_data)) {
++ err = -EINVAL;
++ goto out;
++ }
++ if (ibwr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
++ wr->cmp_swp.cmp_operand = atomicwr->compare_add;
++ wr->cmp_swp.swp_operand = atomicwr->swap;
++ wr->cmp_swp.remote_address = atomicwr->remote_addr;
++ wr->cmp_swp.rkey = atomicwr->rkey;
++ } else {
++ wr->fetch_add.add_operand = atomicwr->compare_add;
++ wr->fetch_add.remote_address = atomicwr->remote_addr;
++ wr->fetch_add.rkey = atomicwr->rkey;
++ }
++ wr->length = 0; /* No tx data with these opcodes */
++ wr->msg_id = sq->next_msg_id;
++ atomic_inc(&qp->or_posted);
++ break;
++ }
++ default:
++ err = -ENOMSG;
++ goto out;
++ }
++
++ DEV_STAT(qp->dev, wr_opcode[wr->opcode]++);
++ ibscif_append_wq(sq);
++ }
++out:
++ spin_unlock_bh(&sq->lock);
++
++ IBSCIF_PERF_SAMPLE(1, 0);
++
++ if (err)
++ *bad_wr = ibwr;
++ if (nreq)
++ ibscif_schedule(sq);
++
++ IBSCIF_PERF_SAMPLE(9, 1);
++
++ return err;
++}
++
++int ibscif_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ibwr, struct ib_recv_wr **bad_wr)
++{
++ struct ibscif_qp *qp = to_qp(ibqp);
++ struct ibscif_wq *rq = &qp->rq;
++ struct ibscif_wr *wr;
++ int err;
++
++ spin_lock_bh(&rq->lock);
++
++ if ((qp->state != QP_IDLE) && (qp->state != QP_CONNECTED)) {
++ err = -ENOTCONN;
++ goto out;
++ }
++ if (unlikely(!rq->size)) {
++ err = -ENOSPC;
++ goto out;
++ }
++
++ for (err = 0; ibwr; ibwr = ibwr->next) {
++
++ if (unlikely(rq->depth == rq->size)) {
++ err = -ENOBUFS;
++ goto out;
++ }
++ if (unlikely(ibwr->num_sge > rq->max_sge)) {
++ err = -E2BIG;
++ goto out;
++ }
++
++ wr = ibscif_get_wr(rq, rq->tail);
++
++ memset(&wr->sar, 0, sizeof wr->sar);
++
++ wr->id = ibwr->wr_id;
++ wr->msg_id = rq->next_msg_id;
++ wr->state = WR_WAITING;
++
++ err = ibscif_wr_ds(ibqp->pd, ibwr->sg_list, ibwr->num_sge, wr, &wr->length, IB_ACCESS_LOCAL_WRITE);
++ ibscif_clear_ds_refs(wr->ds_list, wr->num_ds);
++ if (unlikely(err))
++ goto out;
++
++ ibscif_append_wq(rq);
++ }
++out:
++ spin_unlock_bh(&rq->lock);
++ if (err)
++ *bad_wr = ibwr;
++
++ return err;
++}
+diff --git a/drivers/infiniband/hw/scif/ibscif_procfs.c b/drivers/infiniband/hw/scif/ibscif_procfs.c
+new file mode 100644
+index 0000000..60c1473
+--- /dev/null
++++ b/drivers/infiniband/hw/scif/ibscif_procfs.c
+@@ -0,0 +1,135 @@
++/*
++ * Copyright (c) 2008 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include "ibscif_driver.h"
++
++static int ibscif_stats_show(struct seq_file *m, void *v)
++{
++ int l = 0;
++ struct ibscif_dev *dev = m->private;
++
++ seq_printf
++ (m,
++ "%s statistics:\n"
++ " tx_bytes %lu rx_bytes %lu\n"
++ " tx_pkts %lu rx_pkts %lu loopback_pkts %lu\n"
++ " sched_exhaust %lu unavailable %lu\n"
++ " tx_errors %lu duplicates %lu\n"
++ " total wr %lu :\n"
++ " send %lu send_imm %lu write %lu write_imm %lu\n"
++ " recv %lu recv_imm %lu read %lu comp %lu fetch %lu\n"
++ " read_rsp %lu atomic_rsp %lu ud %lu\n"
++ " fast_rdma :\n"
++ " write %lu read %lu unavailable %lu fallback %lu force_ack %lu tail_write %lu\n",
++ dev->ibdev.name,
++ DEV_STAT(dev, bytes_sent),
++ DEV_STAT(dev, bytes_rcvd),
++ DEV_STAT(dev, packets_sent),
++ DEV_STAT(dev, packets_rcvd),
++ DEV_STAT(dev, loopback),
++ DEV_STAT(dev, sched_exhaust),
++ DEV_STAT(dev, unavailable),
++ DEV_STAT(dev, tx_errors),
++ DEV_STAT(dev, duplicates),
++ DEV_STAT(dev, wr_opcode[WR_SEND]) +
++ DEV_STAT(dev, wr_opcode[WR_SEND_WITH_IMM]) +
++ DEV_STAT(dev, wr_opcode[WR_RDMA_WRITE]) +
++ DEV_STAT(dev, wr_opcode[WR_RDMA_WRITE_WITH_IMM]) +
++ DEV_STAT(dev, recv) +
++ DEV_STAT(dev, recv_imm) +
++ DEV_STAT(dev, wr_opcode[WR_RDMA_READ]) +
++ DEV_STAT(dev, wr_opcode[WR_ATOMIC_CMP_AND_SWP]) +
++ DEV_STAT(dev, wr_opcode[WR_ATOMIC_FETCH_AND_ADD]) +
++ DEV_STAT(dev, wr_opcode[WR_RDMA_READ_RSP]) +
++ DEV_STAT(dev, wr_opcode[WR_ATOMIC_RSP]),
++ DEV_STAT(dev, wr_opcode[WR_SEND]),
++ DEV_STAT(dev, wr_opcode[WR_SEND_WITH_IMM]),
++ DEV_STAT(dev, wr_opcode[WR_RDMA_WRITE]),
++ DEV_STAT(dev, wr_opcode[WR_RDMA_WRITE_WITH_IMM]),
++ DEV_STAT(dev, recv),
++ DEV_STAT(dev, recv_imm),
++ DEV_STAT(dev, wr_opcode[WR_RDMA_READ]),
++ DEV_STAT(dev, wr_opcode[WR_ATOMIC_CMP_AND_SWP]),
++ DEV_STAT(dev, wr_opcode[WR_ATOMIC_FETCH_AND_ADD]),
++ DEV_STAT(dev, wr_opcode[WR_RDMA_READ_RSP]),
++ DEV_STAT(dev, wr_opcode[WR_ATOMIC_RSP]),
++ DEV_STAT(dev, wr_opcode[WR_UD]),
++ DEV_STAT(dev, fast_rdma_write),
++ DEV_STAT(dev, fast_rdma_read),
++ DEV_STAT(dev, fast_rdma_unavailable),
++ DEV_STAT(dev, fast_rdma_fallback),
++ DEV_STAT(dev, fast_rdma_force_ack),
++ DEV_STAT(dev, fast_rdma_tail_write)
++ );
++
++ return l;
++}
++
++static ssize_t ibscif_stats_write(struct file *file, const char __user *buffer,
++ size_t count, loff_t *ppos)
++{
++ struct ibscif_dev *dev = PDE_DATA(file_inode(file));
++ memset(&dev->stats, 0, sizeof dev->stats);
++ return count;
++}
++
++static int ibscif_stats_open(struct inode *inode, struct file *file)
++{
++ return single_open(file, ibscif_stats_show, PDE_DATA(inode));
++}
++
++struct file_operations ibscif_fops = {
++ .owner = THIS_MODULE,
++ .open = ibscif_stats_open,
++ .read = seq_read,
++ .write = ibscif_stats_write,
++ .llseek = seq_lseek,
++ .release = seq_release,
++};
++
++int ibscif_procfs_add_dev(struct ibscif_dev *dev)
++{
++ dev->procfs = proc_mkdir(dev->ibdev.name, init_net.proc_net);
++ if (!dev->procfs)
++ return -ENOENT;
++
++ if (proc_create_data("stats", S_IRUGO | S_IWUGO, dev->procfs,
++ &ibscif_fops ,dev))
++ return -ENOENT;
++
++ return 0;
++}
++
++void ibscif_procfs_remove_dev(struct ibscif_dev *dev)
++{
++ if (dev->procfs)
++ remove_proc_entry("stats", dev->procfs);
++ remove_proc_entry(dev->ibdev.name, init_net.proc_net);
++}
+diff --git a/drivers/infiniband/hw/scif/ibscif_protocol.c b/drivers/infiniband/hw/scif/ibscif_protocol.c
+new file mode 100644
+index 0000000..1c91934
+--- /dev/null
++++ b/drivers/infiniband/hw/scif/ibscif_protocol.c
+@@ -0,0 +1,2810 @@
++/*
++ * Copyright (c) 2008 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include "ibscif_driver.h"
++
++#include <linux/sched.h>
++/* dev/wr/qp backpointers overlayed in skb cb[] */
++struct ibscif_skb_cb {
++ struct ibscif_dev *dev;
++ struct ibscif_wr *wr;
++ scif_epd_t scif_ep;
++ struct ibscif_qp *qp; /* for UD only */
++};
++
++#define SET_SKB_DEV(skb,dev0) ((struct ibscif_skb_cb *)&skb->cb)->dev = dev0
++#define SET_SKB_WR(skb,wr0) ((struct ibscif_skb_cb *)&skb->cb)->wr = wr0
++#define SET_SKB_EP(skb,ep0) ((struct ibscif_skb_cb *)&skb->cb)->scif_ep = ep0
++#define SET_SKB_QP(skb,qp0) ((struct ibscif_skb_cb *)&skb->cb)->qp = qp0
++
++#define GET_SKB_DEV(skb) ((struct ibscif_skb_cb *)&skb->cb)->dev
++#define GET_SKB_WR(skb) ((struct ibscif_skb_cb *)&skb->cb)->wr
++#define GET_SKB_EP(skb) ((struct ibscif_skb_cb *)&skb->cb)->scif_ep
++#define GET_SKB_QP(skb) ((struct ibscif_skb_cb *)&skb->cb)->qp
++
++#define hw_addr_equal(h1, h2) (!memcmp(h1, h2, ETH_ALEN))
++
++#define KMAP(x) kmap(skb_frag_page(x))
++#define KUNMAP(x) kunmap(skb_frag_page(x))
++#define SET_PAGE(x,y) __skb_frag_set_page(x, y)
++#define GET_PAGE(x) __skb_frag_ref(x)
++
++void ibscif_skb_destructor(struct sk_buff *skb)
++{
++ struct ibscif_dev *dev = GET_SKB_DEV(skb);
++
++ /* A sk_buff is now available. */
++ if (atomic_inc_return(&dev->available) == 1)
++ ; /* Could invoke the scheduler here. */
++
++ /* Release the module reference held for this sk_buff. */
++ module_put(THIS_MODULE);
++}
++
++static struct sk_buff *ibscif_alloc_tx_skb(struct ibscif_dev *dev, int hdr_size, int payload_size)
++{
++ struct sk_buff *skb;
++
++ skb = dev_alloc_skb(hdr_size);
++ if (unlikely(!skb))
++ return NULL;
++
++ skb_reset_mac_header(skb);
++ skb_reset_network_header(skb);
++
++ skb->protocol = IBSCIF_PACKET_TYPE;
++ skb->ip_summed = CHECKSUM_UNNECESSARY;
++ skb->priority = TC_PRIO_CONTROL; /* highest defined priority */
++ skb->dev = (void *) dev;
++ skb->len = hdr_size + payload_size;
++ skb->data_len = payload_size;
++ skb->tail += hdr_size;
++
++ return skb;
++}
++
++static struct sk_buff_head xmit_queue;
++static void ibscif_xmit_work_handler( struct work_struct *context );
++static DECLARE_WORK(ibscif_xmit_work, ibscif_xmit_work_handler);
++static atomic_t xmit_busy = ATOMIC_INIT(0);
++
++static void ibscif_xmit_work_handler( struct work_struct *context )
++{
++ struct sk_buff *skb;
++ scif_epd_t scif_ep;
++ int num_frags;
++ skb_frag_t *frag;
++ void *vaddr;
++ int ret;
++ int hdr_size;
++ int i;
++ struct ibscif_qp *qp;
++
++again:
++ while ((skb = skb_dequeue(&xmit_queue))) {
++ scif_ep = GET_SKB_EP(skb);
++ if (!scif_ep) {
++ printk(KERN_ALERT PFX "%s: NULL scif_ep, skb=%p\n", __func__, skb);
++ goto next;
++ }
++
++ hdr_size = skb->len - skb->data_len;
++ for (i=0; i<hdr_size; ) {
++ ret = scif_send(scif_ep, skb->data+i, hdr_size-i,
++ blocking_send ? SCIF_SEND_BLOCK : 0);
++ if (ret < 0) {
++ printk(KERN_ALERT PFX "%s: fail to send header, hdr_size=%d, ret=%d\n", __func__, hdr_size, ret);
++ goto next;
++ }
++ i += ret;
++ }
++
++ num_frags = skb_shinfo(skb)->nr_frags;
++ frag = skb_shinfo(skb)->frags;
++ while (num_frags--) {
++ vaddr = KMAP(frag); /* because scif_send() may cause scheduling */
++ for (i=0; i<frag->size; ) {
++ ret = scif_send(scif_ep, vaddr + frag->page_offset + i,
++ frag->size - i,
++ blocking_send ? SCIF_SEND_BLOCK : 0);
++ if (ret < 0) {
++ printk(KERN_ALERT PFX "%s: scif_send returns %d, frag_size=%d\n", __func__, ret, frag->size);
++ break;
++ }
++ i += ret;
++ }
++ KUNMAP(frag);
++ frag++;
++ }
++next:
++ qp = GET_SKB_QP(skb);
++ if (qp && qp->ibqp.qp_type == IB_QPT_UD) {
++ struct ibscif_full_frame *pdu = (struct ibscif_full_frame*)skb->data;
++ u16 opcode = __be16_to_cpu(pdu->ibscif.hdr.opcode);
++ if (ibscif_pdu_is_last(opcode)) {
++ struct ibscif_wr *wr = GET_SKB_WR(skb);
++ ibscif_clear_ds_refs(wr->ds_list, wr->num_ds);
++ wr->state = WR_COMPLETED;
++ ibscif_process_sq_completions(GET_SKB_QP(skb));
++ }
++ /* Release the reference held on UD QPs */
++ ibscif_put_qp(qp);
++ }
++ kfree_skb(skb);
++ }
++
++ if (!skb_queue_empty(&xmit_queue))
++ goto again;
++
++ atomic_set(&xmit_busy, 0);
++}
++
++static void ibscif_dev_queue_xmit(struct sk_buff *skb)
++{
++ struct ibscif_dev *dev=NULL;
++ int len = 0;
++
++ if (skb) {
++ dev = GET_SKB_DEV(skb);
++ len = skb->len;
++ skb_queue_tail(&xmit_queue, skb);
++ }
++
++ /* only one instance can be enqueued, otherwise there is race condition between scif_send() calls. */
++ /* notice that the current running worker may miss the newly added item, but it will be picked up in the poll_thread */
++ if (!atomic_xchg(&xmit_busy, 1))
++ schedule_work(&ibscif_xmit_work);
++
++ if (likely(dev)) {
++ DEV_STAT(dev, packets_sent++);
++ DEV_STAT(dev, bytes_sent += len);
++ }
++}
++
++static int ibscif_create_hdr(struct ibscif_qp *qp, struct ibscif_wr *wr, struct sk_buff *skb,
++ u32 seq_num, u32 wr_len_remaining, int force)
++{
++ struct ibscif_full_frame *pdu = (struct ibscif_full_frame*)skb->data;
++ u32 sq_seq, iq_seq;
++ u16 opcode;
++ int i;
++
++ sq_seq = qp->wire.sq.rx.last_in_seq;
++ iq_seq = qp->wire.iq.rx.last_in_seq;
++ qp->wire.sq.rx.last_seq_acked = sq_seq;
++ qp->wire.iq.rx.last_seq_acked = iq_seq;
++
++ pdu->ibscif.hdr.length = __cpu_to_be16(skb->data_len);
++ if (qp->ibqp.qp_type == IB_QPT_UD) {
++ pdu->ibscif.hdr.dst_qp = __cpu_to_be32(wr->ud.remote_qpn);
++ }
++ else {
++ pdu->ibscif.hdr.dst_qp = __cpu_to_be32(qp->remote_qpn);
++ }
++ pdu->ibscif.hdr.src_qp = __cpu_to_be32(qp->ibqp.qp_num);
++ pdu->ibscif.hdr.seq_num = __cpu_to_be32(seq_num);
++ pdu->ibscif.hdr.sq_ack_num = __cpu_to_be32(sq_seq);
++ pdu->ibscif.hdr.iq_ack_num = __cpu_to_be32(iq_seq);
++
++ switch (wr->opcode) {
++ case WR_UD:
++ opcode = ibscif_op_ud;
++ if (skb->data_len == wr_len_remaining) {
++ opcode = ibscif_pdu_set_last(opcode);
++ if (wr->flags & IB_SEND_SIGNALED)
++ force = 1;
++ if (wr->flags & IB_SEND_SOLICITED)
++ opcode = ibscif_pdu_set_se(opcode);
++ }
++ pdu->ibscif.ud.msg_length = __cpu_to_be32(wr->length);
++ pdu->ibscif.ud.msg_offset = __cpu_to_be32(wr->length - wr_len_remaining);
++ memset(&pdu->ibscif.ud.grh, 0, 40);
++ break;
++
++ case WR_SEND:
++ case WR_SEND_WITH_IMM:
++ opcode = ibscif_op_send;
++ if (skb->data_len == wr_len_remaining || opcode == ibscif_op_send_rma) {
++ opcode = ibscif_pdu_set_last(opcode);
++ if (wr->flags & IB_SEND_SIGNALED)
++ force = 1;
++ if (wr->opcode == WR_SEND_WITH_IMM) {
++ opcode = ibscif_pdu_set_immed(opcode);
++ pdu->ibscif.send.immed_data = __cpu_to_be32(wr->send.immediate_data);
++ } else pdu->ibscif.send.immed_data = 0;
++ if (wr->flags & IB_SEND_SOLICITED)
++ opcode = ibscif_pdu_set_se(opcode);
++ }
++ pdu->ibscif.send.msg_id = __cpu_to_be32(wr->msg_id);
++ pdu->ibscif.send.msg_length = __cpu_to_be32(wr->length);
++ pdu->ibscif.send.msg_offset = __cpu_to_be32(wr->length - wr_len_remaining);
++ if (wr->use_rma) {
++ opcode = ibscif_op_send_rma;
++ pdu->ibscif.send.rma_id = __cpu_to_be32(wr->rma_id);
++ pdu->ibscif.send.num_rma_addrs = __cpu_to_be32(wr->num_ds);
++ for (i=0; i<wr->num_ds; i++) {
++ pdu->ibscif.send.rma_addrs[i].offset = __cpu_to_be64(wr->ds_list[i].current_mreg->offset + wr->ds_list[i].offset);
++ pdu->ibscif.send.rma_addrs[i].length = __cpu_to_be32(wr->ds_list[i].length);
++ }
++ }
++ break;
++
++ case WR_RDMA_READ:
++ opcode = ibscif_op_read;
++ pdu->ibscif.read_req.rdma_id = __cpu_to_be32(wr->msg_id);
++ pdu->ibscif.read_req.rdma_key = __cpu_to_be32(wr->read.rkey);
++ pdu->ibscif.read_req.rdma_length= __cpu_to_be32(wr->read.remote_length);
++ pdu->ibscif.read_req.rdma_address = __cpu_to_be64(wr->read.remote_address);
++ if (wr->use_rma) {
++ opcode = ibscif_op_read_rma;
++ pdu->ibscif.read_req.num_rma_addrs = __cpu_to_be32(wr->num_ds);
++ for (i=0; i<wr->num_ds; i++) {
++ pdu->ibscif.read_req.rma_addrs[i].offset = __cpu_to_be64(wr->ds_list[i].current_mreg->offset + wr->ds_list[i].offset);
++ pdu->ibscif.read_req.rma_addrs[i].length = __cpu_to_be32(wr->ds_list[i].length);
++ }
++ }
++ break;
++
++ case WR_RDMA_WRITE:
++ case WR_RDMA_WRITE_WITH_IMM:
++ opcode = ibscif_op_write;
++ if ((enum ib_wr_opcode)wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
++ opcode = ibscif_pdu_set_immed(opcode);
++ pdu->ibscif.write.immed_data = __cpu_to_be32(wr->write.immediate_data);
++ if (wr->flags & IB_SEND_SOLICITED)
++ opcode = ibscif_pdu_set_se(opcode);
++ } else pdu->ibscif.write.immed_data = 0;
++ if (skb->data_len == wr_len_remaining || opcode == ibscif_op_write_rma) {
++ opcode = ibscif_pdu_set_last(opcode);
++ if (wr->flags & IB_SEND_SIGNALED)
++ force = 1;
++ }
++ pdu->ibscif.write.msg_id = __cpu_to_be32(wr->msg_id);
++ pdu->ibscif.write.rdma_key = __cpu_to_be32(wr->write.rkey);
++ pdu->ibscif.write.rdma_address = __cpu_to_be64(wr->write.remote_address +
++ (wr->length - wr_len_remaining));
++ if (wr->use_rma) {
++ opcode = ibscif_op_write_rma;
++ if (wr->opcode == WR_RDMA_WRITE_WITH_IMM)
++ opcode = ibscif_pdu_set_immed(opcode);
++ pdu->ibscif.write.rma_id = __cpu_to_be32(wr->rma_id);
++ pdu->ibscif.write.rma_length = __cpu_to_be32(wr->length);
++ pdu->ibscif.write.num_rma_addrs = __cpu_to_be32(wr->num_ds);
++ for (i=0; i<wr->num_ds; i++) {
++ pdu->ibscif.write.rma_addrs[i].offset = __cpu_to_be64(wr->ds_list[i].current_mreg->offset + wr->ds_list[i].offset);
++ pdu->ibscif.write.rma_addrs[i].length = __cpu_to_be32(wr->ds_list[i].length);
++ }
++ }
++ break;
++
++ case WR_ATOMIC_CMP_AND_SWP:
++ opcode = ibscif_pdu_set_last(ibscif_op_comp_swap);
++ pdu->ibscif.comp_swap.atomic_id = __cpu_to_be32(wr->msg_id);
++ pdu->ibscif.comp_swap.atomic_key = __cpu_to_be32(wr->cmp_swp.rkey);
++ pdu->ibscif.comp_swap.comp_data = __cpu_to_be64(wr->cmp_swp.cmp_operand);
++ pdu->ibscif.comp_swap.swap_data = __cpu_to_be64(wr->cmp_swp.swp_operand);
++ pdu->ibscif.comp_swap.atomic_address = __cpu_to_be64(wr->cmp_swp.remote_address);
++ break;
++
++ case WR_ATOMIC_FETCH_AND_ADD:
++ opcode = ibscif_pdu_set_last(ibscif_op_fetch_add);
++ pdu->ibscif.fetch_add.atomic_id = __cpu_to_be32(wr->msg_id);
++ pdu->ibscif.fetch_add.atomic_key = __cpu_to_be32(wr->fetch_add.rkey);
++ pdu->ibscif.fetch_add.add_data = __cpu_to_be64(wr->fetch_add.add_operand);
++ pdu->ibscif.fetch_add.atomic_address = __cpu_to_be64(wr->fetch_add.remote_address);
++ break;
++
++ case WR_RDMA_READ_RSP:
++ opcode = ibscif_op_read_rsp;
++ if (skb->data_len == wr_len_remaining)
++ opcode = ibscif_pdu_set_last(opcode);
++ pdu->ibscif.read_rsp.rdma_id = __cpu_to_be32(wr->msg_id);
++ pdu->ibscif.read_rsp.rdma_offset = __cpu_to_be32(wr->length - wr_len_remaining);
++ break;
++
++ case WR_ATOMIC_RSP:
++ opcode = ibscif_pdu_set_last(wr->atomic_rsp.opcode);
++ pdu->ibscif.atomic_rsp.atomic_id = __cpu_to_be32(wr->msg_id);
++ pdu->ibscif.atomic_rsp.orig_data = __cpu_to_be64(wr->atomic_rsp.orig_data);
++ break;
++
++ case WR_RMA_RSP:
++ opcode = ibscif_op_rma_rsp;
++ pdu->ibscif.rma_rsp.rma_id = __cpu_to_be32(wr->msg_id);
++ pdu->ibscif.rma_rsp.xfer_length = __cpu_to_be32(wr->rma_rsp.xfer_length);
++ pdu->ibscif.rma_rsp.error = __cpu_to_be32(wr->rma_rsp.error);
++ break;
++ default:
++ printk(KERN_ERR PFX "%s() invalid opcode %d\n", __func__, wr->opcode);
++ return 1;
++ }
++
++ if (force)
++ opcode = ibscif_pdu_set_force_ack(opcode);
++
++ pdu->ibscif.hdr.opcode = __cpu_to_be16(opcode);
++
++ return 0;
++}
++
++static struct sk_buff* ibscif_alloc_pdu(struct ibscif_dev *dev, struct ibscif_qp *qp, struct ibscif_wr *wr,
++ int hdr_size, u32 seq_num, u32 payload_size, u32 len_remaining, int force)
++{
++ struct sk_buff *skb;
++ struct ibscif_full_frame *pdu;
++
++ if (unlikely(!qp->conn && qp->ibqp.qp_type != IB_QPT_UD)) {
++ printk(KERN_ALERT PFX "%s: ERROR: qp->conn == NULL\n", __func__);
++ return NULL;
++ }
++
++ if (!atomic_add_unless(&dev->available, -1, 0)) {
++ printk(KERN_NOTICE PFX "%s throttled by available tx buffer limit\n", dev->ibdev.name);
++ DEV_STAT(dev, unavailable++);
++ return NULL;
++ }
++
++ /* Get an skb for this protocol packet. */
++ skb = ibscif_alloc_tx_skb(dev, hdr_size, payload_size);
++ if (unlikely(!skb))
++ goto bail;
++
++ /* Hold a reference on the module until skb->destructor is called. */
++ __module_get(THIS_MODULE);
++ skb->destructor = ibscif_skb_destructor;
++
++ SET_SKB_DEV(skb, dev);
++ SET_SKB_WR(skb, wr);
++
++ if (qp->ibqp.qp_type == IB_QPT_UD) {
++ struct ibscif_conn *conn;
++ int flag = qp->ibqp.qp_num > wr->ud.remote_qpn;
++ conn = ibscif_get_conn(qp->local_node_id, wr->ud.remote_node_id, flag);
++ if (unlikely(!conn)) {
++ kfree_skb(skb);
++ goto bail;
++ }
++
++ ibscif_qp_add_ud_conn(qp, conn);
++ ibscif_put_conn(conn);
++ SET_SKB_EP(skb, conn->ep);
++ SET_SKB_QP(skb, qp);
++
++ /* Reference UD QPs until the wr is transmitted by ibscif_xmit_work_handler */
++ kref_get(&qp->ref);
++ }
++ else {
++ SET_SKB_EP(skb, qp->conn->ep);
++ }
++
++ /* Construct the header and copy it to the skb. */
++ if (unlikely(ibscif_create_hdr(qp, wr, skb, seq_num, len_remaining, force))) {
++ kfree_skb(skb);
++ goto bail;
++ }
++
++ pdu = (struct ibscif_full_frame *)skb->data;
++ pdu->ibscif.hdr.hdr_size = __cpu_to_be16(hdr_size);
++
++ return skb;
++bail:
++ atomic_inc(&dev->available);
++ return NULL;
++}
++
++static int ibscif_send_null_pdu(struct ibscif_dev *dev, struct ibscif_qp *qp, struct ibscif_wr *wr, u32 hdr_size)
++{
++ struct sk_buff *skb;
++
++ /* Allocate an initialized skb with a PDU header. */
++ skb = ibscif_alloc_pdu(dev, qp, wr, hdr_size, wr->sar.seg.starting_seq, 0, 0, 0);
++ if (unlikely(!skb))
++ return 0;
++
++ ibscif_dev_queue_xmit(skb);
++ return 1;
++}
++
++static int get_hdr_size_from_wr(struct ibscif_wr *wr)
++{
++ switch (wr->opcode) {
++ case WR_UD: return sizeof(struct ud_hdr);
++ case WR_SEND:
++ case WR_SEND_WITH_IMM: return sizeof(struct send_hdr);
++ case WR_RDMA_WRITE:
++ case WR_RDMA_WRITE_WITH_IMM: return sizeof(struct write_hdr);
++ case WR_RDMA_READ: return sizeof(struct read_req_hdr);
++ case WR_ATOMIC_CMP_AND_SWP: return sizeof(struct comp_swap_hdr);
++ case WR_ATOMIC_FETCH_AND_ADD: return sizeof(struct fetch_add_hdr);
++ case WR_RDMA_READ_RSP: return sizeof(struct read_rsp_hdr);
++ case WR_ATOMIC_RSP: return sizeof(struct atomic_rsp_hdr);
++ case WR_RMA_RSP: return sizeof(struct rma_rsp_hdr);
++ default: return 0;
++ }
++}
++
++static int get_rma_addr_size_from_wr(struct ibscif_wr *wr)
++{
++ switch (wr->opcode) {
++ case WR_UD: return 0;
++ case WR_SEND:
++ case WR_SEND_WITH_IMM:
++ case WR_RDMA_WRITE:
++ case WR_RDMA_WRITE_WITH_IMM:
++ case WR_RDMA_READ: return wr->num_ds * sizeof(struct rma_addr);
++ case WR_ATOMIC_CMP_AND_SWP: return 0;
++ case WR_ATOMIC_FETCH_AND_ADD: return 0;
++ case WR_RDMA_READ_RSP: return 0;
++ case WR_ATOMIC_RSP: return 0;
++ case WR_RMA_RSP: return 0;
++ default: return 0;
++ }
++}
++
++static int setup_rma_addrs(struct ibscif_wq *wq, struct ibscif_wr *wr)
++{
++ struct ibscif_ds *ds;
++ int i;
++
++ if (!wr->num_ds)
++ return 1;
++
++ for (i=0; i<wr->num_ds; i++) {
++ ds = &wr->ds_list[i];
++ if (!ds->current_mreg)
++ ds->current_mreg = ibscif_mr_get_mreg(ds->mr, wq->qp->conn);
++
++ if (!ds->current_mreg)
++ return 0;
++ }
++
++ return 1;
++}
++
++/* when necessary SCIF will allocate temp buffer to align up cache line offset.
++ * so we only need to use roffset to calculate the dma size.
++ */
++static inline int ibscif_dma_size(u32 len, u64 roffset)
++{
++ u32 head, tail;
++
++ tail = (roffset + len) % 64;
++ head = (64 - roffset % 64) % 64;
++ if (len >= head + tail)
++ return (len - head - tail);
++ else
++ return 0;
++}
++
++static void ibscif_send_ack(struct ibscif_qp *qp); /* defined later in this file */
++
++static int ibscif_try_fast_rdma(struct ibscif_wq *wq, struct ibscif_wr *wr)
++{
++ struct ibscif_qp *qp;
++ int i, err;
++ u64 loffset, roffset;
++ u32 total_length, rdma_length, xfer_len;
++ u64 raddress;
++ u32 rkey;
++ enum ib_access_flags access;
++ u32 dma_size = 0;
++ int rma_flag = 0;
++
++ IBSCIF_PERF_SAMPLE(2, 0);
++
++ switch (wr->opcode) {
++ case WR_RDMA_WRITE:
++ raddress = wr->write.remote_address;
++ rkey = wr->write.rkey;
++ total_length = rdma_length = wr->length;
++ access = IB_ACCESS_REMOTE_WRITE;
++ break;
++
++ case WR_RDMA_READ:
++ raddress = wr->read.remote_address;
++ rkey = wr->read.rkey;
++ total_length = rdma_length = wr->read.remote_length; /* wr->length is 0 */
++ access = IB_ACCESS_REMOTE_READ;
++ break;
++
++ default:
++ return 0;
++ }
++
++ qp = wq->qp;
++
++ if (unlikely(!qp->conn)) {
++ printk(KERN_ALERT PFX "%s: ERROR: qp->conn == NULL\n", __func__);
++ return 0;
++ }
++
++ if (!setup_rma_addrs(wq, wr)) {
++ DEV_STAT(qp->dev, fast_rdma_fallback++);
++ return 0;
++ }
++
++ roffset = IBSCIF_MR_VADDR_TO_OFFSET( rkey, raddress );
++
++ for (i=0; i<wr->num_ds; i++) {
++ if (rdma_length == 0)
++ break;
++
++ loffset = wr->ds_list[i].current_mreg->offset + wr->ds_list[i].offset;
++ xfer_len = min(wr->ds_list[i].length, rdma_length);
++ if (xfer_len == 0)
++ continue;
++
++ IBSCIF_PERF_SAMPLE(3, 0);
++
++ dma_size += ibscif_dma_size(xfer_len, roffset);
++
++ if (i==wr->num_ds-1)
++ rma_flag = dma_size ? SCIF_RMA_SYNC : 0;
++
++ if (wr->opcode == WR_RDMA_WRITE) {
++ err = scif_writeto(wq->qp->conn->ep, loffset, xfer_len, roffset, rma_flag|SCIF_RMA_ORDERED);
++ if (err)
++ printk(KERN_INFO PFX "%s(): error writing ordered messgage, size=%d, err=%d.\n",
++ __func__, xfer_len, err);
++ }
++ else {
++ err = scif_readfrom(wq->qp->conn->ep, loffset, xfer_len, roffset, rma_flag);
++ if (err)
++ printk(KERN_INFO PFX "%s(): error reading the messgage, size=%d, err=%d.\n",
++ __func__, xfer_len, err);
++ }
++
++ IBSCIF_PERF_SAMPLE(4, 0);
++
++ if (err){
++ DEV_STAT(qp->dev, fast_rdma_fallback++);
++ return 0;
++ }
++
++ roffset += xfer_len;
++ rdma_length -= xfer_len;
++ }
++
++ if (rdma_length)
++ printk(KERN_INFO PFX "%s(): remaining rdma_length=%d.\n", __func__, rdma_length);
++
++ IBSCIF_PERF_SAMPLE(5, 0);
++
++ /* complete the wr */
++ ibscif_clear_ds_refs(wr->ds_list, wr->num_ds);
++ wr->state = WR_COMPLETED;
++ wr->sar.rea.final_length = total_length - rdma_length;
++
++ /* we can't call ibscif_process_sq_completions here because we are holding the sq lock.
++ * set the flag and let the upper level make the call */
++ wq->fast_rdma_completions = 1;
++
++ if (wr->opcode == WR_RDMA_WRITE)
++ DEV_STAT(qp->dev, fast_rdma_write++);
++ else
++ DEV_STAT(qp->dev, fast_rdma_read++);
++
++ /* the fast rdma protocol doesn't send any packet, and thus can not piggyback any ack
++ * for the peer. send separate ack packet when necessary. */
++ if (qp->wire.sq.rx.last_seq_acked < qp->wire.sq.rx.last_in_seq ||
++ qp->wire.iq.rx.last_seq_acked < qp->wire.iq.rx.last_in_seq) {
++ ibscif_send_ack(qp);
++ DEV_STAT(qp->dev, fast_rdma_force_ack++);
++ }
++
++ IBSCIF_PERF_SAMPLE(8, 0);
++
++ return 1;
++}
++
++/*
++ * Setup for a fresh data descriptor.
++ */
++#define DS_SETUP(ds, mr, page_offset, page_index, ds_len_left) \
++do { \
++ mr = ds->mr; \
++ ds_len_left = ds->length; \
++ page_offset = ds->offset + (mr->addr & ~PAGE_MASK); \
++ page_index = page_offset >> PAGE_SHIFT; \
++ page_offset &= ~PAGE_MASK; \
++} while(0)
++
++/*
++ * Setup for page crossing within a data descriptor.
++ */
++#define NEXT_PAGE(ds, mr, page_offset, page_index, ds_len_left) \
++do { \
++ if (!ds_len_left) { \
++ ds++; \
++ DS_SETUP(ds, mr, page_offset, page_index, ds_len_left); \
++ } else { \
++ page_index++; \
++ BUG_ON(!(mr->npages > page_index)); \
++ page_offset = 0; \
++ } \
++} while(0)
++
++/*
++ * Setup the data descriptor, page, and offset for specified sequence number
++ */
++#define SETUP_BY_SEQ(wr, ds, mr, from_seq, wr_length, page_offset, page_index, \
++ ds_len_left, max_payload) \
++do { \
++ u32 i, frag_len_max; \
++ \
++ DS_SETUP(ds, mr, page_offset, page_index, ds_len_left); \
++ for (i = wr->sar.seg.starting_seq; seq_before(i, from_seq); i++) { \
++ num_frags = 0; \
++ payload_left = max_payload; \
++ while (payload_left && (num_frags < MAX_SKB_FRAGS)) { \
++ frag_len_max = min(ds_len_left, (u32)(PAGE_SIZE - page_offset));\
++ if (wr_length > payload_left) { \
++ if (payload_left > frag_len_max) { \
++ ds_len_left -= frag_len_max; \
++ NEXT_PAGE(ds, mr, page_offset, \
++ page_index, ds_len_left); \
++ } else { \
++ frag_len_max = payload_left; /* frag->size */ \
++ ds_len_left -= payload_left; \
++ page_offset += payload_left; \
++ } \
++ } else { \
++ if (wr_length > frag_len_max) { \
++ ds_len_left -= frag_len_max; \
++ NEXT_PAGE(ds, mr, page_offset, \
++ page_index, ds_len_left); \
++ } else { \
++ printk(KERN_ERR PFX \
++ "from_seq (%d) botch wr %p opcode %d length %d\n", \
++ from_seq, wr, wr->opcode, wr_length); \
++ return 0; \
++ } \
++ } \
++ wr_length -= frag_len_max; \
++ payload_left -= frag_len_max; \
++ num_frags++; \
++ } \
++ } \
++} while(0)
++
++int ibscif_xmit_wr(struct ibscif_wq *wq, struct ibscif_wr *wr, int tx_limit, int retransmit, u32 from_seq, u32 *posted)
++{
++ struct ibscif_dev *dev;
++ struct ibscif_qp *qp;
++ struct ibscif_ds *ds;
++ struct ibscif_mr *mr;
++ int hdr_size, page_index, num_frags, num_xmited;
++ u32 max_payload, wr_length, page_offset, ds_len_left, payload_left;
++
++ /* Try to process RDMA read/write directly with SCIF functions.
++ * The usual reason for failure is that the remote memory has not yet been
++ * registered with SCIF. The normal packet based path should handle that.
++ */
++ if (host_proxy && wq->qp->local_node_id>0 && wq->qp->remote_node_id==0) {
++ /* don't try fast rdma becasue we want to let the host do the data transfer */
++ }
++ else if (fast_rdma) {
++ num_xmited = 0;
++ if (ibscif_try_fast_rdma(wq, wr))
++ goto finish2;
++ }
++
++ if (!tx_limit) {
++ printk(KERN_INFO PFX "%s() called with tx_limit of zero\n", __func__);
++ return 0;
++ }
++
++ qp = wq->qp;
++ dev = qp->dev;
++ hdr_size = get_hdr_size_from_wr(wr);
++ max_payload = qp->mtu - hdr_size;
++
++ if (wr->use_rma) {
++ struct sk_buff *skb;
++
++ wr_length = wr->length;
++ wr->sar.seg.starting_seq = from_seq;
++ wr->sar.seg.ending_seq = from_seq;
++ wr->state = WR_STARTED;
++
++ num_xmited = 0;
++ if (setup_rma_addrs(wq, wr)) {
++ /* Make room in the header for RMA addresses */
++ hdr_size += get_rma_addr_size_from_wr(wr);
++
++ /* Allocate an initialized skb with PDU header. */
++ skb = ibscif_alloc_pdu(dev, qp, wr, hdr_size, from_seq, 0, wr_length, 0);
++ if (likely(skb)) {
++ ibscif_dev_queue_xmit(skb);
++ num_xmited++;
++ from_seq++;
++ }
++ }
++ else
++ printk(KERN_ALERT PFX "%s: fail to set up RMA addresses for the work request.\n", __func__);
++
++ goto finish;
++ }
++
++ if (!wr->sar.seg.current_ds) {
++ /*
++ * This is a fresh send so intialize the wr by setting the static
++ * parts of the header and sequence number range for this wr.
++ */
++ wr_length = wr->length;
++ wr->sar.seg.starting_seq = from_seq;
++ wr->sar.seg.ending_seq = from_seq;
++ if (wr->opcode == WR_UD)
++ max_payload = wr_length;
++ else if (wr_length > max_payload) {
++ wr->sar.seg.ending_seq += (wr_length / max_payload);
++ if (!(wr_length % max_payload))
++ wr->sar.seg.ending_seq--;
++ }
++
++ wr->state = WR_STARTED;
++
++ /*
++ * If this request has a payload, setup for fragmentation.
++ * Otherwise, send it on its way.
++ */
++ if (wr_length) {
++ ds = wr->ds_list;
++ DS_SETUP(ds, mr, page_offset, page_index, ds_len_left);
++ } else {
++ num_xmited = ibscif_send_null_pdu(dev, qp, wr, hdr_size);
++ /* from_seq must always advanced even in null PDU cases. */
++ from_seq++;
++ goto finish;
++ }
++ } else {
++ /* We're picking up from a paritally sent request. */
++ ds = wr->sar.seg.current_ds;
++ mr = ds->mr;
++ wr_length = wr->sar.seg.wr_length_remaining;
++ ds_len_left = wr->sar.seg.ds_length_remaining;
++ page_index = wr->sar.seg.current_page_index;
++ page_offset = wr->sar.seg.current_page_offset;
++ from_seq = wr->sar.seg.next_seq;
++ }
++
++ /* Ok, let's break this bad-boy up. */
++ num_xmited = 0;
++ while (wr_length && (num_xmited < tx_limit) && (qp->state == QP_CONNECTED)) {
++ struct sk_buff *skb;
++ skb_frag_t *frag;
++
++ /* Allocate an initialized skb with PDU header. */
++ skb = ibscif_alloc_pdu(dev, qp, wr, hdr_size, from_seq, min(wr_length, max_payload),
++ wr_length, retransmit && (num_xmited == (tx_limit - 1)));
++ if (unlikely(!skb))
++ break;
++
++ /* Update sequence number for next pass. */
++ from_seq++;
++
++ /* Fill the skb fragment list. */
++ frag = skb_shinfo(skb)->frags;
++ num_frags = 0;
++ payload_left = max_payload;
++
++ while (payload_left && (num_frags < MAX_SKB_FRAGS)) {
++ u32 frag_len_max;
++
++ SET_PAGE(frag, mr->page[page_index]);
++ frag->page_offset = page_offset;
++
++ /* Take a reference on the page - kfree_skb will release. */
++ GET_PAGE(frag);
++
++ frag_len_max = min(ds_len_left, (u32)(PAGE_SIZE - page_offset));
++ if (wr_length > payload_left) {
++ if (payload_left > frag_len_max) {
++ /* Deal with page boundary crossing. */
++ frag->size = frag_len_max;
++ ds_len_left -= frag_len_max;
++ NEXT_PAGE(ds, mr, page_offset, page_index, ds_len_left);
++ } else {
++ frag->size = payload_left;
++ ds_len_left -= payload_left;
++ page_offset += payload_left;
++ }
++ } else {
++ if (wr_length > frag_len_max) {
++ /* Deal with page boundary crossing. */
++ frag->size = frag_len_max;
++ ds_len_left -= frag_len_max;
++ NEXT_PAGE(ds, mr, page_offset, page_index, ds_len_left);
++ } else {
++ frag->size = wr_length;
++ payload_left -= wr_length;
++ wr_length = 0;
++ num_frags++; /* Change from index to number. */
++ break;
++ }
++ }
++
++ wr_length -= frag->size;
++ payload_left -= frag->size;
++ num_frags++;
++ frag++;
++ }
++ skb_shinfo(skb)->nr_frags = num_frags;
++
++ /* Check if we need to do a fixup because we ran out of frags. */
++ if ((num_frags == MAX_SKB_FRAGS) && wr_length) {
++ struct ibscif_full_frame *pdu = (struct ibscif_full_frame*)skb->data;
++ skb->len = hdr_size + (max_payload - payload_left);
++ skb->data_len = (max_payload - payload_left);
++ pdu->ibscif.hdr.length = __cpu_to_be16(skb->data_len);
++ pdu->ibscif.hdr.opcode = __cpu_to_be16(__be16_to_cpu(pdu->ibscif.hdr.opcode) & ~ibscif_last_flag);
++ }
++
++ /*
++ * Send it.
++ */
++ ibscif_dev_queue_xmit(skb);
++ num_xmited++;
++ }
++
++ /*
++ * Update state. If this is a retransmit, don't update anything. If not and
++ * there's more to do on the wr, save state. Otherwise, setup for next wr.
++ */
++ if (wr_length && !wr->use_rma) {
++ wr->sar.seg.current_ds = ds;
++ wr->sar.seg.wr_length_remaining = wr_length;
++ wr->sar.seg.ds_length_remaining = ds_len_left;
++ wr->sar.seg.current_page_index = page_index;
++ wr->sar.seg.current_page_offset = page_offset;
++ } else {
++finish: if (wr->opcode != WR_UD)
++ wr->state = WR_WAITING_FOR_ACK;
++finish2: wq->next_wr = (wq->next_wr + 1) % wq->size;
++ }
++ wr->sar.seg.next_seq = from_seq;
++ if (posted)
++ *posted = from_seq;
++
++ return num_xmited;
++}
++
++static struct sk_buff *ibscif_create_disconnect_hdr(struct ibscif_dev *dev, u32 src_qpn,
++ u32 dst_qpn, enum ibscif_reason reason)
++{
++ struct ibscif_full_frame *pdu;
++ struct sk_buff *skb;
++
++ skb = ibscif_alloc_tx_skb(dev, sizeof pdu->ibscif.disconnect, 0);
++ if (unlikely(!skb)) {
++ printk(KERN_ERR PFX "%s() can't allocate skb\n", __func__);
++ return NULL;
++ }
++
++ pdu = (struct ibscif_full_frame *)skb->data;
++
++ /* The eth_hdr and ack fields are set by the caller. */
++ pdu->ibscif.disconnect.hdr.opcode = __cpu_to_be16(ibscif_op_disconnect);
++ pdu->ibscif.disconnect.hdr.length = 0; /* Length has no meaning. */
++ pdu->ibscif.disconnect.hdr.dst_qp = __cpu_to_be32(dst_qpn);
++ pdu->ibscif.disconnect.hdr.src_qp = __cpu_to_be32(src_qpn);
++ pdu->ibscif.disconnect.hdr.seq_num = 0; /* seq_num has no meaning. */
++ pdu->ibscif.disconnect.hdr.hdr_size = __cpu_to_be16(sizeof(pdu->ibscif.disconnect));
++ pdu->ibscif.disconnect.reason = __cpu_to_be32(reason);
++
++ SET_SKB_DEV(skb, dev);
++ SET_SKB_WR(skb, NULL);
++
++ return skb;
++}
++
++void ibscif_send_disconnect(struct ibscif_qp *qp, enum ibscif_reason reason)
++{
++ struct ibscif_dev *dev = qp->dev;
++ struct ibscif_full_frame *pdu;
++ struct sk_buff *skb;
++
++ if (qp->ibqp.qp_type == IB_QPT_UD)
++ return;
++
++ if (qp->loopback) {
++ ibscif_loopback_disconnect(qp, reason);
++ return;
++ }
++
++ if (unlikely(!qp->conn)) {
++ printk(KERN_ALERT PFX "%s: ERROR: qp->conn == NULL\n", __func__);
++ return;
++ }
++
++ skb = ibscif_create_disconnect_hdr(dev, qp->ibqp.qp_num, qp->remote_qpn, reason);
++ if (unlikely(!skb))
++ return;
++
++ SET_SKB_EP(skb, qp->conn->ep);
++
++ pdu = (struct ibscif_full_frame *)skb->data;
++
++ pdu->ibscif.disconnect.hdr.sq_ack_num = __cpu_to_be32(qp->wire.sq.rx.last_in_seq);
++ pdu->ibscif.disconnect.hdr.iq_ack_num = __cpu_to_be32(qp->wire.iq.rx.last_in_seq);
++
++ ibscif_dev_queue_xmit(skb);
++}
++
++void ibscif_reflect_disconnect(struct ibscif_qp *qp, struct base_hdr *hdr, struct sk_buff *in_skb, enum ibscif_reason reason)
++{
++ struct ibscif_full_frame *pdu;
++ struct sk_buff *skb;
++
++ if (!qp || IS_ERR(qp)) {
++ if (qp != ERR_PTR(-ENOENT) && verbose)
++ printk(KERN_ALERT PFX "%s: qp=%p hdr=%p in_skb=%p reason=%d\n", __func__, qp, hdr, in_skb, reason);
++ return;
++ }
++
++ /* Don't send a disconnect for a disconnect. */
++ if (ibscif_pdu_base_type(hdr->opcode) == ibscif_op_disconnect)
++ return;
++
++ if (!qp->conn || !qp->conn->ep)
++ return;
++
++ skb = ibscif_create_disconnect_hdr((void *)in_skb->dev, hdr->dst_qp, hdr->src_qp, reason);
++ if (unlikely(!skb))
++ return;
++
++ SET_SKB_EP(skb, qp->conn->ep);
++
++ pdu = (struct ibscif_full_frame *)skb->data;
++
++ pdu->ibscif.disconnect.hdr.sq_ack_num = 0; /* sq_ack_num has no meaning. */
++ pdu->ibscif.disconnect.hdr.iq_ack_num = 0; /* iq_ack_num has no meaning. */
++
++ ibscif_dev_queue_xmit(skb);
++}
++
++static struct sk_buff *ibscif_create_ack_hdr(struct ibscif_qp *qp, int size)
++{
++ struct ibscif_full_frame *pdu;
++ struct sk_buff *skb;
++ u32 sq_seq, iq_seq;
++
++ if (unlikely(!qp->conn)) {
++ printk(KERN_ALERT PFX "%s: ERROR: qp->conn == NULL\n", __func__);
++ return NULL;
++ }
++
++ skb = ibscif_alloc_tx_skb(qp->dev, size, 0);
++ if (unlikely(!skb)) {
++ printk(KERN_ERR PFX "%s() can't allocate skb\n", __func__);
++ return NULL;
++ }
++
++ SET_SKB_DEV(skb, qp->dev);
++ SET_SKB_WR(skb, NULL);
++ SET_SKB_EP(skb, qp->conn->ep);
++
++ sq_seq = qp->wire.sq.rx.last_in_seq;
++ iq_seq = qp->wire.iq.rx.last_in_seq;
++ qp->wire.sq.rx.last_seq_acked = sq_seq;
++ qp->wire.iq.rx.last_seq_acked = iq_seq;
++
++ pdu = (struct ibscif_full_frame *)skb->data;
++
++ /* The opcode field set by the caller. */
++ pdu->ibscif.hdr.length = 0; /* Length has no meaning. */
++ pdu->ibscif.hdr.dst_qp = __cpu_to_be32(qp->remote_qpn);
++ pdu->ibscif.hdr.src_qp = __cpu_to_be32(qp->ibqp.qp_num);
++ pdu->ibscif.hdr.seq_num = 0; /* seq_num has no meaning. */
++ pdu->ibscif.hdr.sq_ack_num = __cpu_to_be32(sq_seq);
++ pdu->ibscif.hdr.iq_ack_num = __cpu_to_be32(iq_seq);
++ pdu->ibscif.hdr.hdr_size = __cpu_to_be16(size);
++
++ return skb;
++}
++
++static void ibscif_send_ack(struct ibscif_qp *qp)
++{
++ struct ibscif_full_frame *pdu;
++ struct sk_buff *skb;
++
++ skb = ibscif_create_ack_hdr(qp, sizeof pdu->ibscif.ack);
++ if (unlikely(!skb))
++ return;
++
++ pdu = (struct ibscif_full_frame *)skb->data;
++ pdu->ibscif.ack.hdr.opcode = __cpu_to_be16(ibscif_op_ack);
++
++ ibscif_dev_queue_xmit(skb);
++}
++
++static struct sk_buff *ibscif_create_close_hdr(struct ibscif_conn *conn, int size)
++{
++ struct ibscif_full_frame *pdu;
++ struct sk_buff *skb;
++
++ if (unlikely(!conn)) {
++ printk(KERN_ALERT PFX "%s: ERROR: conn == NULL\n", __func__);
++ return NULL;
++ }
++
++ skb = ibscif_alloc_tx_skb(conn->dev, size, 0);
++ if (unlikely(!skb)) {
++ printk(KERN_ERR PFX "%s() can't allocate skb\n", __func__);
++ return NULL;
++ }
++
++ SET_SKB_DEV(skb, conn->dev);
++ SET_SKB_WR(skb, NULL);
++ SET_SKB_EP(skb, conn->ep);
++
++ pdu = (struct ibscif_full_frame *)skb->data;
++
++ /* The opcode field set by the caller. */
++ pdu->ibscif.hdr.length = 0; /* Length has no meaning. */
++ pdu->ibscif.hdr.dst_qp = 0; /* unused */
++ pdu->ibscif.hdr.src_qp = 0; /* unused */
++ pdu->ibscif.hdr.seq_num = 0; /* seq_num has no meaning. */
++ pdu->ibscif.hdr.sq_ack_num = 0; /* unused */
++ pdu->ibscif.hdr.iq_ack_num = 0; /* unused */
++ pdu->ibscif.hdr.hdr_size = __cpu_to_be16(size);
++
++ return skb;
++}
++
++void ibscif_send_close(struct ibscif_conn *conn)
++{
++ struct ibscif_full_frame *pdu;
++ struct sk_buff *skb;
++
++ skb = ibscif_create_close_hdr(conn, sizeof pdu->ibscif.close);
++ if (unlikely(!skb))
++ return;
++
++ pdu = (struct ibscif_full_frame *)skb->data;
++ pdu->ibscif.close.hdr.opcode = __cpu_to_be16(ibscif_op_close);
++
++ ibscif_dev_queue_xmit(skb);
++}
++
++void ibscif_send_reopen(struct ibscif_conn *conn)
++{
++ struct ibscif_full_frame *pdu;
++ struct sk_buff *skb;
++
++ skb = ibscif_create_close_hdr(conn, sizeof pdu->ibscif.close);
++ if (unlikely(!skb))
++ return;
++
++ pdu = (struct ibscif_full_frame *)skb->data;
++ pdu->ibscif.close.hdr.opcode = __cpu_to_be16(ibscif_op_reopen);
++
++ ibscif_dev_queue_xmit(skb);
++}
++
++static struct sk_buff *ibscif_create_cm_hdr(struct ibscif_conn *conn, int size)
++{
++ struct ibscif_full_frame *pdu;
++ struct sk_buff *skb;
++
++ if (unlikely(!conn)) {
++ printk(KERN_ALERT PFX "%s: ERROR: conn == NULL\n", __func__);
++ return NULL;
++ }
++
++ skb = ibscif_alloc_tx_skb(conn->dev, size, 0);
++ if (unlikely(!skb)) {
++ printk(KERN_ERR PFX "%s() can't allocate skb\n", __func__);
++ return NULL;
++ }
++
++ SET_SKB_DEV(skb, conn->dev);
++ SET_SKB_WR(skb, NULL);
++ SET_SKB_EP(skb, conn->ep);
++
++ pdu = (struct ibscif_full_frame *)skb->data;
++
++ pdu->ibscif.hdr.opcode = __cpu_to_be16(ibscif_op_cm);
++ pdu->ibscif.hdr.length = 0; /* Length has no meaning. */
++ pdu->ibscif.hdr.dst_qp = 0; /* unused */
++ pdu->ibscif.hdr.src_qp = 0; /* unused */
++ pdu->ibscif.hdr.seq_num = 0; /* seq_num has no meaning. */
++ pdu->ibscif.hdr.sq_ack_num = 0; /* unused */
++ pdu->ibscif.hdr.iq_ack_num = 0; /* unused */
++ pdu->ibscif.hdr.hdr_size = __cpu_to_be16(size);
++
++ return skb;
++}
++
++int ibscif_send_cm_req(struct ibscif_cm *cm_ctx)
++{
++ struct ibscif_full_frame *pdu;
++ struct sk_buff *skb;
++
++ skb = ibscif_create_cm_hdr(cm_ctx->conn, sizeof pdu->ibscif.cm + cm_ctx->plen);
++ if (unlikely(!skb))
++ return -ENOMEM;
++
++ pdu = (struct ibscif_full_frame *)skb->data;
++ pdu->ibscif.cm.req_ctx = __cpu_to_be64((u64)(uintptr_t)cm_ctx);
++ pdu->ibscif.cm.cmd = __cpu_to_be32(IBSCIF_CM_REQ);
++ pdu->ibscif.cm.port = __cpu_to_be32((u32)cm_ctx->remote_addr.sin_port);
++ pdu->ibscif.cm.qpn = __cpu_to_be32(cm_ctx->qpn);
++ pdu->ibscif.cm.plen = __cpu_to_be32(cm_ctx->plen);
++ memcpy(pdu->ibscif.cm.pdata, cm_ctx->pdata, cm_ctx->plen);
++
++ ibscif_dev_queue_xmit(skb);
++
++ return 0;
++}
++
++int ibscif_send_cm_rep(struct ibscif_cm *cm_ctx)
++{
++ struct ibscif_full_frame *pdu;
++ struct sk_buff *skb;
++
++ skb = ibscif_create_cm_hdr(cm_ctx->conn, sizeof pdu->ibscif.cm + cm_ctx->plen);
++ if (unlikely(!skb))
++ return -ENOMEM;
++
++ pdu = (struct ibscif_full_frame *)skb->data;
++ pdu->ibscif.cm.req_ctx = __cpu_to_be64(cm_ctx->peer_context);
++ pdu->ibscif.cm.rep_ctx = __cpu_to_be64((__u64)cm_ctx);
++ pdu->ibscif.cm.cmd = __cpu_to_be32(IBSCIF_CM_REP);
++ pdu->ibscif.cm.qpn = __cpu_to_be32(cm_ctx->qpn);
++ pdu->ibscif.cm.status = __cpu_to_be32(0);
++ pdu->ibscif.cm.plen = __cpu_to_be32(cm_ctx->plen);
++ memcpy(pdu->ibscif.cm.pdata, cm_ctx->pdata, cm_ctx->plen);
++
++ ibscif_dev_queue_xmit(skb);
++
++ return 0;
++}
++
++int ibscif_send_cm_rej(struct ibscif_cm *cm_ctx, const void *pdata, u8 plen)
++{
++ struct ibscif_full_frame *pdu;
++ struct sk_buff *skb;
++
++ skb = ibscif_create_cm_hdr(cm_ctx->conn, sizeof pdu->ibscif.cm + plen);
++ if (unlikely(!skb))
++ return -ENOMEM;
++
++ pdu = (struct ibscif_full_frame *)skb->data;
++ pdu->ibscif.cm.req_ctx = __cpu_to_be64(cm_ctx->peer_context);
++ pdu->ibscif.cm.cmd = __cpu_to_be32(IBSCIF_CM_REJ);
++ pdu->ibscif.cm.status = __cpu_to_be32(-ECONNREFUSED);
++ pdu->ibscif.cm.plen = __cpu_to_be32((u32)plen);
++ memcpy(pdu->ibscif.cm.pdata, pdata, plen);
++
++ ibscif_dev_queue_xmit(skb);
++
++ return 0;
++}
++
++int ibscif_send_cm_rtu(struct ibscif_cm *cm_ctx)
++{
++ struct ibscif_full_frame *pdu;
++ struct sk_buff *skb;
++
++ skb = ibscif_create_cm_hdr(cm_ctx->conn, sizeof pdu->ibscif.cm);
++ if (unlikely(!skb))
++ return -ENOMEM;
++
++ pdu = (struct ibscif_full_frame *)skb->data;
++ pdu->ibscif.cm.rep_ctx = __cpu_to_be64(cm_ctx->peer_context);
++ pdu->ibscif.cm.cmd = __cpu_to_be32(IBSCIF_CM_RTU);
++
++ ibscif_dev_queue_xmit(skb);
++
++ return 0;
++}
++
++/* ---------------------- tx routines above this line ---------------------- */
++/* ---------------------- rx routines below this line ---------------------- */
++
++static void ibscif_protocol_error(struct ibscif_qp *qp, enum ibscif_reason reason)
++{
++ printk(KERN_NOTICE PFX "Disconnect due to protocol error %d\n", reason);
++ ibscif_qp_internal_disconnect(qp, reason);
++}
++
++int ibscif_process_sq_completions(struct ibscif_qp *qp)
++{
++ struct ibscif_cq *cq = to_cq(qp->ibqp.send_cq);
++ struct ibscif_wq *sq = &qp->sq;
++ struct ibscif_wr *wr;
++ struct ibscif_wc *wc;
++ int index, err = 0, i;
++
++ spin_lock_bh(&sq->lock);
++
++ /* Prevent divide by zero traps on wrap math. */
++ if (!sq->size)
++ goto out;
++
++ /* Iterate the send queue looking for defered completions. */
++ for (i=sq->completions; i<sq->depth; i++) {
++ index = (sq->head + i) % sq->size;
++
++ wr = ibscif_get_wr(sq, index);
++ if (wr->state != WR_COMPLETED)
++ break;
++
++ sq->completions++;
++ sq->reap++;
++
++ /* An IQ request has been completed; update the throttling variables. */
++ if ((wr->opcode == WR_RDMA_READ) ||
++ (wr->opcode == WR_ATOMIC_CMP_AND_SWP) ||
++ (wr->opcode == WR_ATOMIC_FETCH_AND_ADD)) {
++ BUG_ON(!atomic_read(&qp->or_depth));
++ atomic_dec(&qp->or_depth);
++ atomic_dec(&qp->or_posted);
++ }
++
++ /* See if we need to generate a completion. */
++ if (!(wr->flags & IB_SEND_SIGNALED))
++ continue;
++
++ err = ibscif_reserve_cqe(cq, &wc);
++ if (unlikely(err))
++ break;
++
++ wc->ibwc.qp = &qp->ibqp;
++ wc->ibwc.src_qp = qp->remote_qpn;
++ wc->ibwc.wr_id = wr->id;
++ wc->ibwc.opcode = to_ib_wc_opcode(wr->opcode);
++ wc->ibwc.wc_flags = (((enum ib_wr_opcode)wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) ||
++ ((enum ib_wr_opcode)wr->opcode == IB_WR_SEND_WITH_IMM)) ?
++ IB_WC_WITH_IMM : 0;
++ wc->ibwc.status = IB_WC_SUCCESS;
++ wc->ibwc.ex.imm_data = 0;
++ wc->ibwc.port_num = 1;
++ wc->ibwc.byte_len = (((enum ib_wr_opcode)wr->opcode == IB_WR_RDMA_READ) ||
++ ((enum ib_wr_opcode)wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) ||
++ ((enum ib_wr_opcode)wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) ?
++ wr->sar.rea.final_length : 0;
++ wc->wq = sq;
++ wc->reap = sq->reap;
++ sq->reap = 0;
++
++ ibscif_append_cqe(cq, wc, 0);
++ }
++out:
++ spin_unlock_bh(&sq->lock);
++
++ ibscif_notify_cq(cq);
++ return err;
++}
++
++static int ibscif_schedule_rx_completions(struct ibscif_qp *qp, int iq_flag, struct ibscif_rx_state *rx)
++{
++ struct ibscif_cq *cq = to_cq(qp->ibqp.recv_cq);
++ struct ibscif_wq *wq;
++ struct ibscif_wr *wr;
++ struct ibscif_wc *wc;
++ u32 last_in_seq;
++ int index, err, i;
++
++ wq = iq_flag ? &qp->sq /* yep, the SQ */ : &qp->rq;
++ last_in_seq = rx->last_in_seq;
++
++ /* Prevent divide by zero traps on wrap math. */
++ if (!wq->size)
++ return 0;
++
++ spin_lock_bh(&wq->lock);
++ for (i=wq->completions; i<wq->depth; i++) {
++ index = (wq->head + i) % wq->size;
++
++ wr = ibscif_get_wr(wq, index);
++
++ /* Skip over non-IQ entries. */
++ if (iq_flag &&
++ ((wr->opcode == WR_UD) ||
++ (wr->opcode == WR_SEND) ||
++ (wr->opcode == WR_SEND_WITH_IMM) ||
++ (wr->opcode == WR_RDMA_WRITE) ||
++ (wr->opcode == WR_RDMA_WRITE_WITH_IMM)))
++ continue;
++
++ /*
++ * If this WR hasn't seen the final segment in sequence then
++ * there is nothing more to process in this queue. We use the
++ * last seen state as a qualifier because last_packet_seq will
++ * be uninitialized until last packet is seen.
++ */
++ if ((wr->state != WR_LAST_SEEN) ||
++ seq_before(last_in_seq, wr->sar.rea.last_packet_seq))
++ break;
++
++ /* Clear references on memory regions. */
++ ibscif_clear_ds_refs(wr->ds_list, wr->num_ds);
++
++ if (iq_flag) {
++ /*
++ * Completed IQ replies are defered until earlier
++ * non-IQ WR have completed. This is determined
++ * with a second iteration of the WQ below.
++ */
++ wr->state = WR_COMPLETED;
++ continue; /* Look for more IQ completions. */
++ }
++
++ /* All receive queue completions are done here. */
++ err = ibscif_reserve_cqe(cq, &wc);
++ if (unlikely(err)) {
++ spin_unlock_bh(&wq->lock);
++ return err;
++ }
++
++ wc->ibwc.qp = &qp->ibqp;
++ wc->ibwc.src_qp = qp->remote_qpn;
++ wc->ibwc.wr_id = wr->id;
++ wc->ibwc.status = IB_WC_SUCCESS;
++ wc->ibwc.byte_len = wr->sar.rea.final_length;
++ wc->ibwc.port_num = 1;
++
++ if (ibscif_pdu_is_immed(wr->sar.rea.opcode)) {
++ DEV_STAT(qp->dev, recv_imm++);
++ wc->ibwc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
++ wc->ibwc.ex.imm_data = wr->sar.rea.immediate_data;
++ } else {
++ DEV_STAT(qp->dev, recv++);
++ wc->ibwc.opcode = IB_WC_RECV;
++ wc->ibwc.ex.imm_data = 0;
++ }
++
++ wc->wq = wq;
++ wc->reap = 1;
++ wq->completions++;
++
++ ibscif_append_cqe(cq, wc, !!ibscif_pdu_is_se(wr->sar.rea.opcode));
++ }
++ spin_unlock_bh(&wq->lock);
++
++ /* If this was the recieve queue, there is no more processing to be done. */
++ if (!iq_flag) {
++ ibscif_notify_cq(cq);
++ return 0;
++ }
++
++ err = ibscif_process_sq_completions(qp);
++ if (unlikely(err))
++ return err;
++
++ /*
++ * If we just created room for a backlogged IQ stream request
++ * and there is a tx window, reschedule to get it sent.
++ */
++ if ((atomic_read(&qp->or_posted) > atomic_read(&qp->or_depth)) &&
++ (atomic_read(&qp->or_depth) < qp->max_or) &&
++ ibscif_tx_window(&qp->wire.sq.tx))
++ qp->schedule |= SCHEDULE_RESUME | SCHEDULE_SQ;
++
++ return 0;
++}
++
++static enum ibscif_schedule ibscif_process_wq_ack(struct ibscif_wq *wq, u32 seq_num)
++{
++ struct ibscif_tx_state *tx = &wq->wirestate->tx;
++ enum ibscif_schedule status = 0;
++ int throttled, index, err = 0, i;
++
++ if (!wq->size || !wq->depth)
++ return 0;
++
++ /* If this is old news, get out. */
++ if (!seq_after(seq_num, tx->last_ack_seq_recvd))
++ return 0;
++
++ /* Capture if window was closed before updating. */
++ throttled = !ibscif_tx_window(tx);
++ tx->last_ack_seq_recvd = seq_num;
++
++ /*
++ * If were were throttled and now have an open window or
++ * simply up to date, resume streaming transfers. This
++ * can be overwritten with other schedule states below.
++ */
++ if (throttled && ibscif_tx_window(tx))
++ status = SCHEDULE_RESUME;
++
++ spin_lock_bh(&wq->lock);
++ for (i=wq->completions; i<wq->depth; i++) {
++ struct ibscif_wr *wr;
++
++ index = (wq->head + i) % wq->size;
++
++ wr = ibscif_get_wr(wq, index);
++
++ /* Get out if the WR hasn't been scheduled. */
++ if (wr->state == WR_WAITING)
++ break;
++
++ if (seq_after(wr->sar.seg.ending_seq, seq_num)) {
++
++ if ((wr->state == WR_STARTED) && !ibscif_tx_unacked_window(tx))
++ status = SCHEDULE_RESUME;
++
++ break;
++ }
++
++ /* We seem to have a completed WQ element. */
++
++ if (is_iq(wq)) {
++ /*
++ * We have a completed IQ reply.
++ * Clear references to the memory region.
++ */
++ ibscif_clear_ds_refs(wr->ds_list, wr->num_ds);
++
++ /*
++ * It's more effecient to retire an IQ wqe manually
++ * here instead of calling ibscif_retire_wqes().
++ */
++ wq->head = (wq->head + 1) % wq->size;
++ wq->depth -= 1;
++
++ } else if ((wr->opcode == WR_RDMA_READ) ||
++ (wr->opcode == WR_ATOMIC_CMP_AND_SWP) ||
++ (wr->opcode == WR_ATOMIC_FETCH_AND_ADD)||
++ (wr->opcode == WR_UD && wr->use_rma) ||
++ (wr->opcode == WR_SEND && wr->use_rma) ||
++ (wr->opcode == WR_SEND_WITH_IMM && wr->use_rma) ||
++ (wr->opcode == WR_RDMA_WRITE && wr->use_rma) ||
++ (wr->opcode == WR_RDMA_WRITE_WITH_IMM && wr->use_rma)) {
++ /*
++ * We have a request acknowledgment.
++ * Note the state change so it isn't retried.
++ *
++ * BTW, these request types are completed in the
++ * ibscif_schedule_rx_completions() routine when
++ * the data has arrived.
++ */
++ if (wr->state == WR_WAITING_FOR_ACK)
++ wr->state = WR_WAITING_FOR_RSP;
++
++ } else if (wr->state != WR_COMPLETED) {
++ /* Request is complete so no need to keep references. */
++ ibscif_clear_ds_refs(wr->ds_list, wr->num_ds);
++ wr->state = WR_COMPLETED;
++ }
++ }
++ spin_unlock_bh(&wq->lock);
++
++ if (is_sq(wq)) {
++ err = ibscif_process_sq_completions(wq->qp);
++ if (unlikely(err)) {
++ printk(KERN_ALERT PFX "%s: sq completion error: err=%d \n", __func__, err);
++ ibscif_protocol_error(wq->qp, IBSCIF_REASON_QP_FATAL);
++ status = 0;
++ }
++ }
++
++ return status;
++}
++
++static void ibscif_process_ack(struct ibscif_qp *qp, struct base_hdr *hdr)
++{
++ qp->schedule |= ibscif_process_wq_ack(&qp->sq, hdr->sq_ack_num) | SCHEDULE_SQ;
++ qp->schedule |= ibscif_process_wq_ack(&qp->iq, hdr->iq_ack_num) | SCHEDULE_IQ;
++}
++
++/* Note that the WQ lock is held on success. */
++static struct ibscif_wr *ibscif_reserve_wqe(struct ibscif_wq *wq)
++{
++ int err;
++
++ spin_lock_bh(&wq->lock);
++
++ if (unlikely(wq->qp->state != QP_CONNECTED)) {
++ err = -ENOTCONN;
++ goto out;
++ }
++ if (unlikely(!wq->size)) {
++ err = -ENOSPC;
++ goto out;
++ }
++ if (unlikely(wq->depth == wq->size)) {
++ err = -ENOBUFS;
++ goto out;
++ }
++
++ return ibscif_get_wr(wq, wq->tail);
++out:
++ spin_unlock_bh(&wq->lock);
++ return ERR_PTR(err);
++}
++
++/* Note that this assumes the WQ lock is currently held. */
++static void ibscif_append_wqe(struct ibscif_wq *wq)
++{
++ DEV_STAT(wq->qp->dev, wr_opcode[ibscif_get_wr(wq, wq->tail)->opcode]++);
++ ibscif_append_wq(wq);
++ spin_unlock_bh(&wq->lock);
++}
++
++static struct ibscif_wr* ibscif_wr_by_msg_id(struct ibscif_wq *wq, u32 msg_id)
++{
++ struct ibscif_wr *wr;
++ int size = wq->size;
++
++ if (!size)
++ return NULL;
++
++ wr = ibscif_get_wr(wq, msg_id % size);
++ if (wr->use_rma)
++ return (wr->rma_id == msg_id) ? wr : NULL;
++ else
++ return (wr->msg_id == msg_id) ? wr : NULL;
++}
++
++static int ibscif_ds_dma(struct ibscif_qp *qp, struct page **page, u32 page_offset, struct sk_buff *skb, u32 dma_len, int head_copied)
++{
++ void *dst, *src = skb->data;
++ u32 copy_len;
++
++ while (dma_len) {
++ copy_len = min(dma_len, (u32)PAGE_SIZE - page_offset);
++
++ dst = ibscif_map_dst(*page) + page_offset;
++ head_copied = ibscif_atomic_copy(dst, src, copy_len, head_copied);
++ ibscif_unmap_dst(*page, dst);
++
++ src += copy_len;
++ dma_len -= copy_len;
++
++ page++;
++ page_offset = 0;
++ }
++
++ return head_copied;
++}
++
++static int ibscif_place_data(struct ibscif_qp *qp, struct ibscif_wr *wr, struct sk_buff *skb,
++ u32 length, u32 offset, u32 seq_num)
++{
++ struct ibscif_ds *ds;
++ struct ibscif_mr *mr;
++ int seg_num, page_index;
++ u32 dma_len, ds_offset, page_offset;
++ int head_copied = 0;
++
++ if (!length) {
++ ds = NULL;
++ dma_len = 0;
++ ds_offset = 0;
++ goto no_data;
++ }
++
++ /* See if we can use our ds cache. */
++ if (likely((wr->sar.rea.current_ds) && (wr->sar.rea.last_seen_seq == seq_num - 1))) {
++ /* Take the cached entires. */
++ ds = wr->sar.rea.current_ds;
++ mr = ds->mr;
++ ds_offset = wr->sar.rea.current_ds_offset;
++ seg_num = (ds - wr->ds_list) / sizeof *wr->ds_list;
++ } else {
++ ds_offset = offset;
++ ds = wr->ds_list;
++ seg_num = 0;
++ while ((ds_offset >= ds->length) && (seg_num < wr->num_ds)) {
++ ds_offset -= ds->length;
++ ds++;
++ seg_num++;
++ }
++next_ds:
++ if (unlikely(seg_num >= wr->num_ds))
++ return -EMSGSIZE;
++ /*
++ * A memory region which may have posted receives against it can
++ * still be freed, therefore, we need to burn the cycles here to
++ * make sure it's still valid. We'll take a reference on it now
++ * that data is coming in.
++ */
++ if (!ds->in_use) {
++ mr = ibscif_get_mr(ds->lkey);
++ if (unlikely(IS_ERR(mr)))
++ return PTR_ERR(mr);
++ ds->in_use = 1;
++ if (unlikely(mr != ds->mr))
++ return -ENXIO;
++ if (unlikely(!(mr->access & IB_ACCESS_LOCAL_WRITE)))
++ return -EACCES;
++ } else
++ mr = ds->mr;
++ }
++
++ /* Place data for this descriptor. Routine will handle page boundary crossings. */
++ page_offset = ds->offset + ds_offset + (mr->addr & ~PAGE_MASK);
++ page_index = page_offset >> PAGE_SHIFT;
++ page_offset &= ~PAGE_MASK;
++
++ dma_len = min(ds->length - ds_offset, length);
++ head_copied = ibscif_ds_dma(qp, &mr->page[page_index], page_offset, skb, dma_len, head_copied);
++ length -= dma_len;
++ if (length) {
++ ds++;
++ seg_num++;
++ ds_offset = 0;
++ skb_pull(skb, dma_len);
++ goto next_ds;
++ }
++no_data:
++ wr->sar.rea.last_seen_seq = seq_num;
++
++ if (ds && ((ds_offset + dma_len) < ds->length)) {
++ wr->sar.rea.current_ds = ds;
++ wr->sar.rea.current_ds_offset = ds_offset + dma_len;
++ } else
++ wr->sar.rea.current_ds = NULL; /* Force a validation of the next ds. */
++
++ return 0;
++}
++
++static int ibscif_process_ud(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct sk_buff *skb)
++{
++ struct ibscif_wr *wr;
++ int err;
++ int grh_size = 40;
++ int msg_id;
++
++ if (unlikely(qp->ibqp.qp_type != IB_QPT_UD)) {
++ printk(KERN_ALERT PFX "%s: UD packet received on non-UD QP\n", __func__);
++ return -EINVAL;
++ }
++
++ pdu->ud.msg_length = __be32_to_cpu(pdu->ud.msg_length);
++ pdu->ud.msg_offset = __be32_to_cpu(pdu->ud.msg_offset);
++
++ /* Only one pdu is allowed for one UD packet, otherwise drop the pdu */
++ if (unlikely(pdu->ud.msg_length != pdu->hdr.length || pdu->ud.msg_offset)) {
++ printk(KERN_INFO PFX "%s: dropping fragmented UD packet. total_length=%d msg_length=%d msg_offset=%d\n",
++ __func__, pdu->hdr.length, pdu->ud.msg_length, pdu->ud.msg_offset);
++ return -EINVAL;
++ }
++
++ spin_lock_bh(&qp->rq.lock);
++ if (unlikely(qp->rq.ud_msg_id >= qp->rq.next_msg_id)) {
++ spin_unlock_bh(&qp->rq.lock);
++ printk(KERN_ALERT PFX "%s: ERROR: message arrives before recv is posted. msg_id=%d, rq.next_msg_id=%d\n",
++ __func__, pdu->send.msg_id, qp->rq.next_msg_id);
++ return -EBADRQC;
++ }
++ msg_id = qp->rq.ud_msg_id++;
++ spin_unlock_bh(&qp->rq.lock);
++
++ wr = ibscif_wr_by_msg_id(&qp->rq, msg_id);
++ if (unlikely(!wr))
++ return -EBADR;
++
++ if (unlikely((pdu->ud.msg_length + grh_size) > wr->length))
++ return -EMSGSIZE;
++
++ /* GRH is included as part of the received message */
++ skb_pull(skb, sizeof(pdu->ud)-grh_size);
++
++ err = ibscif_place_data(qp, wr, skb, pdu->hdr.length+grh_size, pdu->ud.msg_offset, pdu->hdr.seq_num);
++ if (unlikely(err))
++ return err;
++
++ wr->state = WR_LAST_SEEN;
++ wr->sar.rea.opcode = pdu->hdr.opcode;
++ wr->sar.rea.last_packet_seq = 0;
++ wr->sar.rea.immediate_data = 0;
++ wr->sar.rea.final_length = pdu->ud.msg_length+grh_size;
++
++ return 0;
++}
++
++static int ibscif_process_send(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct sk_buff *skb)
++{
++ struct ibscif_wr *wr;
++ int err;
++
++ pdu->send.msg_id = __be32_to_cpu(pdu->send.msg_id);
++ spin_lock_bh(&qp->rq.lock);
++ if (unlikely(pdu->send.msg_id >= qp->rq.next_msg_id)) {
++ spin_unlock_bh(&qp->rq.lock);
++ printk(KERN_ALERT PFX "%s: ERROR: message arrives before recv is posted. msg_id=%d, rq.next_msg_id=%d\n",
++ __func__, pdu->send.msg_id, qp->rq.next_msg_id);
++ return -EBADRQC;
++ }
++ spin_unlock_bh(&qp->rq.lock);
++
++ wr = ibscif_wr_by_msg_id(&qp->rq, pdu->send.msg_id);
++ if (unlikely(!wr))
++ return -EBADR;
++
++ pdu->send.msg_length = __be32_to_cpu(pdu->send.msg_length);
++ if (unlikely(pdu->send.msg_length > wr->length))
++ return -EMSGSIZE;
++
++ pdu->send.msg_offset = __be32_to_cpu(pdu->send.msg_offset);
++ if (unlikely(pdu->send.msg_offset > pdu->send.msg_length))
++ return -EINVAL;
++
++ if (unlikely((pdu->hdr.length + pdu->send.msg_offset) > wr->length))
++ return -ESPIPE;
++
++ skb_pull(skb, sizeof(pdu->send));
++
++ err = ibscif_place_data(qp, wr, skb, pdu->hdr.length, pdu->send.msg_offset, pdu->hdr.seq_num);
++ if (unlikely(err))
++ return err;
++
++ if (ibscif_pdu_is_last(pdu->hdr.opcode)) {
++ /*
++ * We've got the last of the message data.
++ * We always assume immediate data; if not needed, no harm, on foul.
++ */
++ wr->state = WR_LAST_SEEN;
++ wr->sar.rea.opcode = pdu->hdr.opcode;
++ wr->sar.rea.last_packet_seq = pdu->hdr.seq_num;
++ wr->sar.rea.immediate_data = __be32_to_cpu(pdu->send.immed_data);
++ wr->sar.rea.final_length = pdu->send.msg_length;
++ }
++
++ return 0;
++}
++
++static int ibscif_process_write(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct sk_buff *skb)
++{
++ struct ibscif_wr *wr;
++ struct ibscif_mr *mr;
++ u64 rdma_addr;
++ u32 rdma_len, page_offset;
++ int page_index;
++
++ if (unlikely(!(qp->access & IB_ACCESS_REMOTE_WRITE)))
++ return -EACCES;
++
++ /* Writes with immediate data consume an rq wqe. */
++ if (ibscif_pdu_is_immed(pdu->hdr.opcode)) {
++ pdu->write.msg_id = __be32_to_cpu(pdu->write.msg_id);
++ spin_lock_bh(&qp->rq.lock);
++ if (unlikely(pdu->write.msg_id >= qp->rq.next_msg_id)) {
++ spin_unlock_bh(&qp->rq.lock);
++ printk(KERN_ALERT PFX "%s: ERROR: message arrives before recv is posted. msg_id=%d, rq.next_msg_id=%d\n",
++ __func__, pdu->write.msg_id, qp->rq.next_msg_id);
++ return -EBADRQC;
++ }
++ spin_unlock_bh(&qp->rq.lock);
++
++ wr = ibscif_wr_by_msg_id(&qp->rq, pdu->write.msg_id);
++ if (unlikely(!wr))
++ return -EBADR;
++ } else
++ wr = NULL;
++
++ skb_pull(skb, sizeof(pdu->write));
++
++ rdma_addr = __be64_to_cpu(pdu->write.rdma_address);
++ rdma_len = pdu->hdr.length;
++ if (unlikely((rdma_addr + (rdma_len - 1)) < rdma_addr))
++ return -EOVERFLOW;
++
++ mr = ibscif_validate_mr(__be32_to_cpu(pdu->write.rdma_key), rdma_addr,
++ rdma_len, qp->ibqp.pd, IB_ACCESS_REMOTE_WRITE);
++ if (unlikely(IS_ERR(mr)))
++ return PTR_ERR(mr);
++
++ page_offset = rdma_addr & ~PAGE_MASK;
++ page_index = ((rdma_addr - mr->addr) + (mr->addr & ~PAGE_MASK)) >> PAGE_SHIFT;
++
++ ibscif_ds_dma(qp, &mr->page[page_index], page_offset, skb, rdma_len, 0);
++
++ ibscif_put_mr(mr);
++
++ if (wr) {
++ wr->sar.rea.final_length += rdma_len;
++ if (ibscif_pdu_is_last(pdu->hdr.opcode)) {
++ /* We've got the last of the write data. */
++ wr->state = WR_LAST_SEEN;
++ wr->sar.rea.opcode = pdu->hdr.opcode;
++ wr->sar.rea.last_packet_seq = pdu->hdr.seq_num;
++ wr->sar.rea.immediate_data = __be32_to_cpu(pdu->write.immed_data);
++ }
++ }
++
++ return 0;
++}
++
++static int ibscif_process_read(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct sk_buff *skb)
++{
++ struct ibscif_wr *wr;
++ struct ibscif_mr *mr;
++ u64 rdma_addr;
++ u32 rdma_len;
++
++ if (unlikely(!(qp->access & IB_ACCESS_REMOTE_READ)))
++ return -EACCES;
++
++ rdma_addr = __be64_to_cpu(pdu->read_req.rdma_address);
++ rdma_len = __be32_to_cpu(pdu->read_req.rdma_length);
++ if (unlikely((rdma_addr + (rdma_len - 1)) < rdma_addr))
++ return -EOVERFLOW;
++
++ mr = ibscif_validate_mr(__be32_to_cpu(pdu->read_req.rdma_key), rdma_addr,
++ rdma_len, qp->ibqp.pd, IB_ACCESS_REMOTE_READ);
++ if (unlikely(IS_ERR(mr)))
++ return PTR_ERR(mr);
++
++ wr = ibscif_reserve_wqe(&qp->iq);
++ if (unlikely(IS_ERR(wr))) {
++ ibscif_put_mr(mr);
++ return PTR_ERR(wr);
++ }
++
++ memset(&wr->sar, 0, sizeof wr->sar);
++
++ wr->opcode = WR_RDMA_READ_RSP;
++ wr->state = WR_WAITING;
++ wr->length = rdma_len;
++ wr->msg_id = __be32_to_cpu(pdu->read_req.rdma_id);
++ wr->num_ds = 1;
++ wr->ds_list[0].mr = mr;
++ wr->ds_list[0].offset = rdma_addr - mr->addr;
++ wr->ds_list[0].length = rdma_len;
++ wr->ds_list[0].in_use = 1;
++
++ ibscif_append_wqe(&qp->iq);
++ qp->schedule |= SCHEDULE_RESUME | SCHEDULE_IQ;
++
++ return 0;
++}
++
++static int ibscif_process_read_rsp(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct sk_buff *skb)
++{
++ struct ibscif_wr *wr;
++ int err;
++
++ /* Find the requesting sq wr. */
++ wr = ibscif_wr_by_msg_id(&qp->sq, __be32_to_cpu(pdu->read_rsp.rdma_id));
++ if (unlikely(!wr))
++ return -EBADR;
++ if (unlikely(wr->opcode != WR_RDMA_READ))
++ return -ENOMSG;
++
++ skb_pull(skb, sizeof(pdu->read_rsp));
++
++ pdu->read_rsp.rdma_offset = __be32_to_cpu(pdu->read_rsp.rdma_offset);
++
++ err = ibscif_place_data(qp, wr, skb, pdu->hdr.length, pdu->read_rsp.rdma_offset, pdu->hdr.seq_num);
++ if (unlikely(err))
++ return err;
++
++ if (ibscif_pdu_is_last(pdu->hdr.opcode)) {
++ /* We've got the last of the read data. */
++ wr->state = WR_LAST_SEEN;
++ wr->sar.rea.opcode = pdu->hdr.opcode;
++ wr->sar.rea.last_packet_seq = pdu->hdr.seq_num;
++ wr->sar.rea.final_length = pdu->read_rsp.rdma_offset + pdu->hdr.length;
++ }
++
++ return 0;
++}
++
++static int ibscif_process_atomic_req(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct sk_buff *skb)
++{
++ struct ibscif_wr *wr;
++ struct ibscif_mr *mr;
++ struct page *page;
++ u64 *addr;
++ u32 offset, rkey, msg_id;
++ u16 opcode;
++
++ if (unlikely(!(qp->access & IB_ACCESS_REMOTE_ATOMIC)))
++ return -EACCES;
++
++ opcode = ibscif_pdu_base_type(pdu->hdr.opcode);
++ if (opcode == ibscif_op_comp_swap) {
++ addr = (u64 *)__be64_to_cpu(pdu->comp_swap.atomic_address);
++ rkey = __be32_to_cpu(pdu->comp_swap.atomic_key);
++ msg_id = __be32_to_cpu(pdu->comp_swap.atomic_id);
++ } else {
++ addr = (u64 *)__be64_to_cpu(pdu->fetch_add.atomic_address);
++ rkey = __be32_to_cpu(pdu->fetch_add.atomic_key);
++ msg_id = __be32_to_cpu(pdu->fetch_add.atomic_id);
++ }
++
++ if (unlikely((u64)addr & (sizeof *addr - 1)))
++ return -EADDRNOTAVAIL;
++ if (unlikely((addr + (sizeof *addr - 1)) < addr))
++ return -EOVERFLOW;
++
++ mr = ibscif_validate_mr(rkey, (u64)addr, sizeof *addr, qp->ibqp.pd, IB_ACCESS_REMOTE_ATOMIC);
++ if (unlikely(IS_ERR(mr)))
++ return PTR_ERR(mr);
++
++ wr = ibscif_reserve_wqe(&qp->iq);
++ if (unlikely(IS_ERR(wr))) {
++ ibscif_put_mr(mr);
++ return PTR_ERR(wr);
++ }
++
++ /* Determine which page to map. */
++ offset = ((u64)addr - mr->addr) + (mr->addr & ~PAGE_MASK);
++ page = mr->page[offset >> PAGE_SHIFT];
++ offset &= ~PAGE_MASK;
++
++ /* Lock to perform the atomic operation atomically. */
++ spin_lock_bh(&qp->dev->atomic_op);
++
++ addr = ibscif_map_src(page) + offset;
++ wr->atomic_rsp.orig_data = *addr;
++ if (opcode == ibscif_op_fetch_add)
++ *addr += __be64_to_cpu(pdu->fetch_add.add_data);
++ else if (wr->atomic_rsp.orig_data == __be64_to_cpu(pdu->comp_swap.comp_data))
++ *addr = __be64_to_cpu(pdu->comp_swap.swap_data);
++ ibscif_unmap_src(page, addr);
++
++ ibscif_put_mr(mr);
++
++ /* Atomic operation is complete. */
++ spin_unlock_bh(&qp->dev->atomic_op);
++
++ memset(&wr->sar, 0, sizeof wr->sar);
++
++ wr->opcode = WR_ATOMIC_RSP;
++ wr->state = WR_WAITING;
++ wr->length = 0;
++ wr->msg_id = msg_id;
++ wr->num_ds = 0;
++ wr->atomic_rsp.opcode = (opcode==ibscif_op_comp_swap)? ibscif_op_comp_swap_rsp : ibscif_op_fetch_add_rsp;
++ /* The wr->atomic_rsp.orig_data field was set above. */
++
++ ibscif_append_wqe(&qp->iq);
++ qp->schedule |= SCHEDULE_RESUME | SCHEDULE_IQ;
++
++ return 0;
++}
++
++static int ibscif_process_atomic_rsp(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct sk_buff *skb)
++{
++ struct ibscif_wr *wr;
++ u16 opcode;
++ int err;
++
++ if (unlikely(!ibscif_pdu_is_last(pdu->atomic_rsp.hdr.opcode)))
++ return -EINVAL;
++
++ /* Find the requesting sq wr. */
++ wr = ibscif_wr_by_msg_id(&qp->sq, __be32_to_cpu(pdu->atomic_rsp.atomic_id));
++ if (unlikely(!wr))
++ return -EBADR;
++
++ opcode = ibscif_pdu_base_type(pdu->hdr.opcode);
++ if (unlikely(wr->opcode != ((opcode == ibscif_op_comp_swap_rsp) ?
++ WR_ATOMIC_CMP_AND_SWP : WR_ATOMIC_FETCH_AND_ADD)))
++ return -ENOMSG;
++
++ skb_pull(skb, (unsigned long)&pdu->atomic_rsp.orig_data - (unsigned long)pdu);
++
++ pdu->atomic_rsp.orig_data = __be64_to_cpu(pdu->atomic_rsp.orig_data);
++ err = ibscif_place_data(qp, wr, skb, sizeof pdu->atomic_rsp.orig_data, 0, pdu->hdr.seq_num);
++ if (unlikely(err))
++ return err;
++
++ wr->state = WR_LAST_SEEN;
++ wr->sar.rea.opcode = pdu->hdr.opcode;
++ wr->sar.rea.last_packet_seq = pdu->hdr.seq_num;
++ wr->sar.rea.final_length = sizeof pdu->atomic_rsp.orig_data;
++
++ return 0;
++}
++
++static int ibscif_process_disconnect(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct sk_buff *skb)
++{
++ ibscif_qp_remote_disconnect(qp, __be32_to_cpu(pdu->disconnect.reason));
++ return 0;
++}
++
++static int ibscif_process_send_rma(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct sk_buff *skb)
++{
++ struct ibscif_ds *ds;
++ struct ibscif_wr *wr;
++ struct ibscif_mr *mr;
++ struct ibscif_mreg_info *mreg;
++ u32 num_rma_addrs;
++ u64 rma_offset;
++ u32 rma_length;
++ u32 total;
++ int seg_num;
++ int cur_rma_addr;
++ u32 xfer_len, ds_offset;
++ int err;
++ u64 loffset;
++ u32 dma_size = 0;
++ int rma_flag = 0;
++
++ if (unlikely(!qp->conn)) {
++ printk(KERN_ALERT PFX "%s: ERROR: qp->conn == NULL\n", __func__);
++ return -EACCES;
++ }
++
++ pdu->send.msg_id = __be32_to_cpu(pdu->send.msg_id);
++ spin_lock_bh(&qp->rq.lock);
++ if (unlikely(pdu->send.msg_id >= qp->rq.next_msg_id)) {
++ spin_unlock_bh(&qp->rq.lock);
++ printk(KERN_ALERT PFX "%s: ERROR: message arrives before recv is posted. msg_id=%d, rq.next_msg_id=%d\n",
++ __func__, pdu->send.msg_id, qp->rq.next_msg_id);
++ return -EBADRQC;
++ }
++ spin_unlock_bh(&qp->rq.lock);
++
++ wr = ibscif_wr_by_msg_id(&qp->rq, pdu->send.msg_id);
++ if (unlikely(!wr))
++ return -EBADR;
++
++ pdu->send.msg_length = __be32_to_cpu(pdu->send.msg_length);
++ if (unlikely(pdu->send.msg_length > wr->length))
++ return -EMSGSIZE;
++
++ pdu->send.msg_offset = __be32_to_cpu(pdu->send.msg_offset);
++ if (unlikely(pdu->send.msg_offset > pdu->send.msg_length))
++ return -EINVAL;
++
++ if (unlikely((pdu->hdr.length + pdu->send.msg_offset) > wr->length))
++ return -ESPIPE;
++
++ total = 0;
++
++ num_rma_addrs = __be32_to_cpu(pdu->send.num_rma_addrs);
++ cur_rma_addr = 0;
++ rma_offset = __be64_to_cpu(pdu->send.rma_addrs[cur_rma_addr].offset);
++ rma_length = __be32_to_cpu(pdu->send.rma_addrs[cur_rma_addr].length);
++
++ ds_offset = pdu->send.msg_offset;
++ ds = wr->ds_list;
++ seg_num = 0;
++ while ((ds_offset >= ds->length) && (seg_num < wr->num_ds)) {
++ ds_offset -= ds->length;
++ ds++;
++ seg_num++;
++ }
++
++ err = 0;
++ while (total < pdu->send.msg_length && !err) {
++ if (unlikely(seg_num >= wr->num_ds))
++ return -EMSGSIZE;
++
++ if (!ds->in_use) {
++ mr = ibscif_get_mr(ds->lkey);
++ if (unlikely(IS_ERR(mr)))
++ return PTR_ERR(mr);
++ ds->in_use = 1;
++ if (unlikely(mr != ds->mr))
++ return -ENXIO;
++ if (unlikely(!(mr->access & IB_ACCESS_LOCAL_WRITE)))
++ return -EACCES;
++ } else
++ mr = ds->mr;
++
++ mreg = ibscif_mr_get_mreg(mr, qp->conn);
++ if (!mreg)
++ return -EACCES;
++
++ while (ds->length > ds_offset) {
++ xfer_len = min( ds->length - ds_offset, rma_length );
++ if (xfer_len) {
++ loffset = mreg->offset + ds->offset + ds_offset;
++ dma_size += ibscif_dma_size(xfer_len, rma_offset);
++
++ if ((total + xfer_len >= pdu->send.msg_length) && dma_size)
++ rma_flag = SCIF_RMA_SYNC;
++
++ err = scif_readfrom(qp->conn->ep, loffset, xfer_len, rma_offset, rma_flag);
++ if (err) {
++ printk(KERN_ALERT PFX "%s: scif_readfrom (%d bytes) returns %d\n", __func__, xfer_len, err);
++ break;
++ }
++
++ ds_offset += xfer_len;
++ rma_offset += xfer_len;
++ rma_length -= xfer_len;
++ total += xfer_len;
++
++ if (total >= pdu->send.msg_length)
++ break;
++ }
++ if (rma_length == 0) {
++ cur_rma_addr++;
++ if (unlikely(cur_rma_addr >= num_rma_addrs))
++ return -EMSGSIZE;
++
++ rma_offset = __be64_to_cpu(pdu->send.rma_addrs[cur_rma_addr].offset);
++ rma_length = __be32_to_cpu(pdu->send.rma_addrs[cur_rma_addr].length);
++ }
++ }
++
++ seg_num++;
++ ds++;
++ }
++
++ wr->state = WR_LAST_SEEN;
++ wr->sar.rea.opcode = pdu->hdr.opcode;
++ wr->sar.rea.last_packet_seq = pdu->hdr.seq_num;
++ wr->sar.rea.immediate_data = __be32_to_cpu(pdu->send.immed_data);
++ wr->sar.rea.final_length = pdu->send.msg_length;
++
++ /* Respond to the initiator with the result */
++ wr = ibscif_reserve_wqe(&qp->iq);
++ if (unlikely(IS_ERR(wr))) {
++ return PTR_ERR(wr);
++ }
++
++ memset(&wr->sar, 0, sizeof wr->sar);
++
++ wr->opcode = WR_RMA_RSP;
++ wr->state = WR_WAITING;
++ wr->length = 0;
++ wr->msg_id = __be32_to_cpu(pdu->send.rma_id);
++ wr->num_ds = 0;
++ wr->rma_rsp.xfer_length = total;
++ wr->rma_rsp.error = err;
++
++ ibscif_append_wqe(&qp->iq);
++ qp->schedule |= SCHEDULE_RESUME | SCHEDULE_IQ;
++
++ return 0;
++}
++
++static int ibscif_process_write_rma(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct sk_buff *skb)
++{
++ struct ibscif_wr *wr;
++ struct ibscif_mr *mr;
++ u64 rdma_addr;
++ u32 rdma_len;
++ struct ibscif_mreg_info *mreg;
++ u32 num_rma_addrs;
++ u64 rma_offset;
++ u32 rma_length;
++ u32 total;
++ int i;
++ int err;
++ u64 loffset;
++ u32 dma_size = 0;
++ int rma_flag = 0;
++
++ if (unlikely(!qp->conn)) {
++ printk(KERN_ALERT PFX "%s: ERROR: qp->conn == NULL\n", __func__);
++ return -EACCES;
++ }
++
++ if (unlikely(!(qp->access & IB_ACCESS_REMOTE_WRITE)))
++ return -EACCES;
++
++ /* Writes with immediate data consume an rq wqe. */
++ if (ibscif_pdu_is_immed(pdu->hdr.opcode)) {
++ pdu->write.msg_id = __be32_to_cpu(pdu->write.msg_id);
++ spin_lock_bh(&qp->rq.lock);
++ if (unlikely(pdu->write.msg_id >= qp->rq.next_msg_id)) {
++ spin_unlock_bh(&qp->rq.lock);
++ return -EBADRQC;
++ }
++ spin_unlock_bh(&qp->rq.lock);
++
++ wr = ibscif_wr_by_msg_id(&qp->rq, pdu->write.msg_id);
++ if (unlikely(!wr))
++ return -EBADR;
++ }
++ else
++ wr = NULL;
++
++ rdma_addr = __be64_to_cpu(pdu->write.rdma_address);
++ rdma_len = __be32_to_cpu(pdu->write.rma_length);
++ if (unlikely((rdma_addr + (rdma_len - 1)) < rdma_addr))
++ return -EOVERFLOW;
++
++ mr = ibscif_validate_mr(__be32_to_cpu(pdu->write.rdma_key), rdma_addr,
++ rdma_len, qp->ibqp.pd, IB_ACCESS_REMOTE_WRITE);
++ if (unlikely(IS_ERR(mr)))
++ return PTR_ERR(mr);
++
++ mreg = ibscif_mr_get_mreg(mr, qp->conn);
++ if (!mreg)
++ return -EACCES;
++
++ total = 0;
++ err = 0;
++ num_rma_addrs = __be32_to_cpu(pdu->write.num_rma_addrs);
++ for (i=0; i<num_rma_addrs; i++) {
++ rma_offset = __be64_to_cpu(pdu->write.rma_addrs[i].offset);
++ rma_length = __be32_to_cpu(pdu->write.rma_addrs[i].length);
++
++ if (rdma_len < rma_length)
++ rma_length = rdma_len;
++
++ if (rma_length == 0)
++ continue;
++
++ loffset = mreg->offset + (rdma_addr - mr->addr) + total;
++ dma_size += ibscif_dma_size(rma_length, rma_offset);
++
++ if ((i==num_rma_addrs-1) && dma_size)
++ rma_flag = SCIF_RMA_SYNC;
++
++ err = scif_readfrom(qp->conn->ep, loffset, rma_length, rma_offset, rma_flag);
++ if (err) {
++ printk(KERN_ALERT PFX "%s: scif_readfrom (%d bytes) returns %d\n", __func__, rma_length, err);
++ break;
++ }
++
++ rdma_len -= rma_length;
++ total += rma_length;
++ }
++
++ ibscif_put_mr(mr);
++
++ if (wr) {
++ wr->sar.rea.final_length = total;
++ wr->state = WR_LAST_SEEN;
++ wr->sar.rea.opcode = pdu->hdr.opcode;
++ wr->sar.rea.last_packet_seq = pdu->hdr.seq_num;
++ wr->sar.rea.immediate_data = __be32_to_cpu(pdu->write.immed_data);
++ }
++
++ /* Respond to the initiator with the result */
++ wr = ibscif_reserve_wqe(&qp->iq);
++ if (unlikely(IS_ERR(wr))) {
++ return PTR_ERR(wr);
++ }
++
++ memset(&wr->sar, 0, sizeof wr->sar);
++
++ wr->opcode = WR_RMA_RSP;
++ wr->state = WR_WAITING;
++ wr->length = 0;
++ wr->msg_id = __be32_to_cpu(pdu->write.rma_id);
++ wr->num_ds = 0;
++ wr->rma_rsp.xfer_length = total;
++ wr->rma_rsp.error = err;
++
++ ibscif_append_wqe(&qp->iq);
++ qp->schedule |= SCHEDULE_RESUME | SCHEDULE_IQ;
++
++ return 0;
++}
++
++static int ibscif_process_read_rma(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct sk_buff *skb)
++{
++ struct ibscif_wr *wr;
++ struct ibscif_mr *mr;
++ u64 rdma_addr;
++ u32 rdma_len;
++ struct ibscif_mreg_info *mreg;
++ u32 num_rma_addrs;
++ u64 rma_offset;
++ u32 rma_length;
++ u32 total;
++ int i;
++ int err;
++ u64 loffset;
++ u32 dma_size = 0;
++ int rma_flag = 0;
++
++ if (unlikely(!qp->conn)) {
++ printk(KERN_ALERT PFX "%s: ERROR: qp->conn == NULL\n", __func__);
++ return -EACCES;
++ }
++
++ if (unlikely(!(qp->access & IB_ACCESS_REMOTE_READ)))
++ return -EACCES;
++
++ rdma_addr = __be64_to_cpu(pdu->read_req.rdma_address);
++ rdma_len = __be32_to_cpu(pdu->read_req.rdma_length);
++ if (unlikely((rdma_addr + (rdma_len - 1)) < rdma_addr))
++ return -EOVERFLOW;
++
++ mr = ibscif_validate_mr(__be32_to_cpu(pdu->read_req.rdma_key), rdma_addr,
++ rdma_len, qp->ibqp.pd, IB_ACCESS_REMOTE_READ);
++ if (unlikely(IS_ERR(mr)))
++ return PTR_ERR(mr);
++
++ mreg = ibscif_mr_get_mreg(mr, qp->conn);
++ if (!mreg)
++ return -EACCES;
++
++ total = 0;
++ err = 0;
++ num_rma_addrs = __be32_to_cpu(pdu->read_req.num_rma_addrs);
++ for (i=0; i<num_rma_addrs; i++) {
++ rma_offset = __be64_to_cpu(pdu->read_req.rma_addrs[i].offset);
++ rma_length = __be32_to_cpu(pdu->read_req.rma_addrs[i].length);
++
++ if (rdma_len < rma_length)
++ rma_length = rdma_len;
++
++ if (rma_length == 0)
++ continue;
++
++ loffset = mreg->offset + (rdma_addr - mr->addr) + total;
++ dma_size += ibscif_dma_size(rma_length, rma_offset);
++
++ if ((i==num_rma_addrs-1) && dma_size)
++ rma_flag = SCIF_RMA_SYNC;
++
++ err = scif_writeto(qp->conn->ep, loffset, rma_length, rma_offset, rma_flag);
++ if (err) {
++ printk(KERN_ALERT PFX "%s: scif_writeto (%d bytes) returns %d\n", __func__, rma_length, err);
++ break;
++ }
++
++ rdma_len -= rma_length;
++ total += rma_length;
++ }
++
++ ibscif_put_mr(mr);
++
++ /* Respond to the initiator with the result */
++ wr = ibscif_reserve_wqe(&qp->iq);
++ if (unlikely(IS_ERR(wr))) {
++ return PTR_ERR(wr);
++ }
++
++ memset(&wr->sar, 0, sizeof wr->sar);
++
++ wr->opcode = WR_RMA_RSP;
++ wr->state = WR_WAITING;
++ wr->length = 0;
++ wr->msg_id = __be32_to_cpu(pdu->read_req.rdma_id);
++ wr->num_ds = 0;
++ wr->rma_rsp.xfer_length = total;
++ wr->rma_rsp.error = err;
++
++ ibscif_append_wqe(&qp->iq);
++ qp->schedule |= SCHEDULE_RESUME | SCHEDULE_IQ;
++
++ return 0;
++}
++
++static int ibscif_process_rma_rsp(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct sk_buff *skb)
++{
++ struct ibscif_wr *wr;
++
++ wr = ibscif_wr_by_msg_id(&qp->sq, __be32_to_cpu(pdu->rma_rsp.rma_id));
++ if (unlikely(!wr))
++ return -EBADR;
++ if (unlikely(!wr->use_rma))
++ return -ENOMSG;
++
++ if (wr->opcode == WR_RDMA_READ) {
++ /* ibscif_clear_ds_refs() is called in ibscif_schedule_rx_completions() */
++ wr->state = WR_LAST_SEEN;
++ }
++ else {
++ ibscif_clear_ds_refs(wr->ds_list, wr->num_ds);
++ wr->state = WR_COMPLETED;
++ }
++
++ wr->sar.rea.opcode = pdu->hdr.opcode;
++ wr->sar.rea.last_packet_seq = pdu->hdr.seq_num;
++ wr->sar.rea.final_length = pdu->rma_rsp.xfer_length;
++
++ return 0;
++}
++
++static int ibscif_process_pdu(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct sk_buff *skb)
++{
++ int err;
++
++ switch (ibscif_pdu_base_type(pdu->hdr.opcode)) {
++ case ibscif_op_ud:
++ err = ibscif_process_ud(qp, pdu, skb);
++ break;
++ case ibscif_op_send:
++ err = ibscif_process_send(qp, pdu, skb);
++ break;
++ case ibscif_op_write:
++ err = ibscif_process_write(qp, pdu, skb);
++ break;
++ case ibscif_op_read:
++ err = ibscif_process_read(qp, pdu, skb);
++ break;
++ case ibscif_op_read_rsp:
++ err = ibscif_process_read_rsp(qp, pdu, skb);
++ break;
++ case ibscif_op_comp_swap_rsp:
++ case ibscif_op_fetch_add_rsp:
++ err = ibscif_process_atomic_rsp(qp, pdu, skb);
++ break;
++ case ibscif_op_comp_swap:
++ case ibscif_op_fetch_add:
++ err = ibscif_process_atomic_req(qp, pdu, skb);
++ break;
++ case ibscif_op_ack:
++ /* Handled in piggyback ack processing. */
++ err = 0;
++ break;
++ case ibscif_op_disconnect:
++ /* Post send completions before the disconnect flushes the queues. */
++ ibscif_process_ack(qp, &pdu->hdr);
++ /* Now disconnect the QP. */
++ err = ibscif_process_disconnect(qp, pdu, skb);
++ break;
++ case ibscif_op_send_rma:
++ err = ibscif_process_send_rma(qp, pdu, skb);
++ break;
++ case ibscif_op_write_rma:
++ err = ibscif_process_write_rma(qp, pdu, skb);
++ break;
++ case ibscif_op_read_rma:
++ err = ibscif_process_read_rma(qp, pdu, skb);
++ break;
++ case ibscif_op_rma_rsp:
++ err = ibscif_process_rma_rsp(qp, pdu, skb);
++ break;
++ default:
++ printk(KERN_INFO PFX "Received invalid opcode (%x)\n",
++ ibscif_pdu_base_type(pdu->hdr.opcode));
++ err = IBSCIF_REASON_INVALID_OPCODE;
++ break;
++ }
++
++ if (unlikely(err)) {
++ printk(KERN_ALERT PFX "%s: ERROR: err=%d, opcode=%d\n", __func__, err, ibscif_pdu_base_type(pdu->hdr.opcode));
++ ibscif_protocol_error(qp, IBSCIF_REASON_QP_FATAL);
++ }
++
++ return err;
++}
++
++static int update_rx_seq_numbers(struct ibscif_qp *qp, union ibscif_pdu *pdu, struct ibscif_rx_state *rx)
++{
++ u32 seq_num = pdu->hdr.seq_num;
++
++ if (pdu->hdr.opcode == ibscif_op_ack)
++ return 0;
++
++ if (seq_num != rx->last_in_seq + 1)
++ return 0;
++
++ rx->last_in_seq = seq_num;
++
++ return 1;
++}
++
++static void ibscif_process_qp_skb(struct ibscif_qp *qp, struct sk_buff *skb)
++{
++ union ibscif_pdu *pdu = (union ibscif_pdu *)skb->data;
++ struct ibscif_rx_state *rx;
++ int err = 0;
++
++ /* Start with no scheduling. */
++ qp->schedule = 0;
++
++ rx = ibscif_pdu_is_iq(pdu->hdr.opcode) ? &qp->wire.iq.rx : &qp->wire.sq.rx;
++
++ if (ibscif_process_pdu(qp, pdu, skb) == IBSCIF_REASON_INVALID_OPCODE)
++ return;
++
++ /* skip ack and seq_num for UD QP */
++ if (qp->ibqp.qp_type == IB_QPT_UD) {
++ err = ibscif_schedule_rx_completions(qp, 0, rx);
++ if (unlikely(err)) {
++ printk(KERN_ALERT PFX "%s: rx completion error: err=%d, opcode=%d\n", __func__, err, ibscif_pdu_base_type(pdu->hdr.opcode));
++ ibscif_protocol_error(qp, IBSCIF_REASON_QP_FATAL);
++ }
++ goto done;
++ }
++
++ /* Process piggybacked acks. */
++ ibscif_process_ack(qp, &pdu->hdr);
++
++ if (update_rx_seq_numbers(qp, pdu, rx)) {
++ /* PDU is in sequence so schedule/remove completed work requests. */
++ err = ibscif_schedule_rx_completions(qp, ibscif_pdu_is_iq(pdu->hdr.opcode), rx);
++ if (unlikely(err)) {
++ printk(KERN_ALERT PFX "%s: rx completion error: err=%d, opcode=%d\n", __func__, err, ibscif_pdu_base_type(pdu->hdr.opcode));
++ ibscif_protocol_error(qp, IBSCIF_REASON_QP_FATAL);
++ goto done;
++ }
++ }
++
++ /* Generate an ack if forced or if the current window dictates it. */
++ if (ibscif_pdu_is_force_ack(pdu->hdr.opcode)) {
++ ibscif_send_ack(qp);
++ } else if (pdu->hdr.opcode != ibscif_op_ack) {
++ u32 window = ibscif_rx_window(rx);
++ if (window && (window % (window_size / MIN_WINDOW_SIZE)) == 0)
++ ibscif_send_ack(qp);
++ }
++done:
++ /* Run the scheduler if it was requested. */
++ if (qp->schedule & SCHEDULE_RESUME) {
++ if (qp->schedule & SCHEDULE_SQ)
++ ibscif_schedule(&qp->sq);
++ if (qp->schedule & SCHEDULE_IQ)
++ ibscif_schedule(&qp->iq);
++ }
++
++ return;
++}
++
++static int ibscif_recv_pkt(struct sk_buff *skb, struct ibscif_dev *dev, scif_epd_t ep, struct ibscif_conn *conn)
++{
++ union ibscif_pdu *pdu = (union ibscif_pdu *)skb->data;
++ struct ibscif_qp *qp = ERR_PTR(-ENOENT);
++
++ /* Convert the base header. */
++ pdu->hdr.opcode = __be16_to_cpu(pdu->hdr.opcode);
++ pdu->hdr.length = __be16_to_cpu(pdu->hdr.length);
++ pdu->hdr.dst_qp = __be32_to_cpu(pdu->hdr.dst_qp);
++ pdu->hdr.src_qp = __be32_to_cpu(pdu->hdr.src_qp);
++ pdu->hdr.seq_num = __be32_to_cpu(pdu->hdr.seq_num);
++ pdu->hdr.sq_ack_num = __be32_to_cpu(pdu->hdr.sq_ack_num);
++ pdu->hdr.iq_ack_num = __be32_to_cpu(pdu->hdr.iq_ack_num);
++
++ if (pdu->hdr.opcode == ibscif_op_close) {
++ //printk(KERN_INFO PFX "%s: op_close, conn=%p, local_close=%d\n", __func__, conn, conn->local_close);
++ conn->remote_close = 1;
++ goto done_no_qp;
++ }
++ else if (pdu->hdr.opcode == ibscif_op_reopen) {
++ //printk(KERN_INFO PFX "%s: op_reopen, conn=%p, local_close=%d\n", __func__, conn, conn->local_close);
++ conn->remote_close = 0;
++ goto done_no_qp;
++ }
++ else if (pdu->hdr.opcode == ibscif_op_cm) {
++ ibscif_process_cm_skb(skb, conn);
++ goto done_no_qp;
++ }
++
++ qp = ibscif_get_qp(pdu->hdr.dst_qp);
++ if (unlikely(IS_ERR(qp) ||
++ (qp->state != QP_CONNECTED && qp->ibqp.qp_type != IB_QPT_UD) ||
++ (qp->ibqp.qp_num != pdu->hdr.dst_qp) ||
++ (qp->remote_qpn != pdu->hdr.src_qp && qp->ibqp.qp_type != IB_QPT_UD) ||
++ 0)) {
++ /* Disconnect the rogue. */
++ ibscif_reflect_disconnect(qp, &pdu->hdr, skb, IBSCIF_REASON_INVALID_QP);
++ goto done;
++ }
++
++ if (qp->ibqp.qp_type == IB_QPT_UD)
++ ibscif_qp_add_ud_conn(qp, conn);
++
++ DEV_STAT(qp->dev, packets_rcvd++);
++ DEV_STAT(qp->dev, bytes_rcvd += skb->len);
++
++ ibscif_process_qp_skb(qp, skb);
++done:
++ if (likely(!IS_ERR(qp)))
++ ibscif_put_qp(qp);
++
++done_no_qp:
++ kfree_skb(skb);
++ return 0;
++}
++
++void ibscif_do_recv( struct ibscif_dev *dev, scif_epd_t ep, struct ibscif_conn *conn )
++{
++ struct sk_buff *skb;
++ union ibscif_pdu *pdu;
++ int hdr_size, payload_size, recv_size, pdu_size;
++ char *recv_buffer;
++ int ret;
++
++ skb = dev_alloc_skb(IBSCIF_MTU + sizeof(struct ud_hdr)); /* allow full UD payload */
++ if (unlikely(skb==NULL)) {
++ printk(KERN_ALERT PFX "%s(): fail to allocate skb, exiting\n", __func__);
++ return;
++ }
++
++ skb->protocol = IBSCIF_PACKET_TYPE;
++ skb->ip_summed = CHECKSUM_UNNECESSARY;
++ skb->priority = TC_PRIO_CONTROL; /* highest defined priority */
++ skb->dev = (void *) dev;
++
++ pdu = (union ibscif_pdu *)skb->data;
++
++ /* get the base header first so the packet size can be determinied */
++ recv_size = sizeof(pdu->hdr);
++ recv_buffer = (char *)&pdu->hdr;
++ while (recv_size) {
++ ret = scif_recv(ep, recv_buffer, recv_size, blocking_recv ? SCIF_RECV_BLOCK : 0);
++ if (ret < 0) {
++ printk(KERN_ALERT PFX "%s(): fail to receive hdr, ret=%d, expecting %d\n", __func__, ret, (int)recv_size);
++ if (ret == -ENOTCONN || ret == -ECONNRESET) {
++ if (verbose)
++ printk(KERN_INFO PFX "%s: ep disconnected by peer (%d). conn=%p, local_close=%d\n",
++ __func__, ret, conn, conn->local_close);
++ ibscif_remove_ep( dev, ep );
++ ibscif_refresh_pollep_list();
++ conn->remote_close = 1;
++ if (conn->local_close) {
++ ibscif_free_conn(conn);
++ }
++ }
++ goto errout;
++ }
++ recv_size -= ret;
++ recv_buffer += ret;
++ }
++
++ hdr_size = __be16_to_cpu(pdu->hdr.hdr_size);
++ payload_size = __be16_to_cpu(pdu->hdr.length);
++ pdu_size = hdr_size + payload_size;
++ if (unlikely(payload_size > IBSCIF_MTU)) {
++ printk(KERN_ALERT PFX "%s(): payload exceeds MTU, size=%d\n",
++ __func__, payload_size);
++ goto errout;
++ }
++
++ recv_size = pdu_size - sizeof(pdu->hdr);
++ recv_buffer = (char *)pdu + sizeof(pdu->hdr);
++
++ /* get the remaining of the packet */
++ //printk(KERN_INFO PFX "%s(): hdr_size=%d payload_size=%d pdu_size=%d recv_size=%d\n", __func__, hdr_size, payload_size, pdu_size, recv_size);
++ ret = 0;
++ while (recv_size) {
++ ret = scif_recv(ep, recv_buffer, recv_size, blocking_recv ? SCIF_RECV_BLOCK : 0);
++
++ if (ret < 0) {
++ printk(KERN_ALERT PFX "%s(): fail to receive data, ret=%d, expecting %d\n", __func__, ret, recv_size);
++ break;
++ }
++
++ recv_size -= ret;
++ recv_buffer += ret;
++ }
++
++ if (ret < 0)
++ goto errout;
++
++ skb->len = pdu_size;
++ skb->data_len = payload_size;
++ skb->tail += pdu_size;
++
++ ibscif_recv_pkt(skb, dev, ep, conn);
++ return;
++
++errout:
++ kfree_skb(skb);
++}
++
++#define IBSCIF_MAX_POLL_COUNT (IBSCIF_MAX_DEVICES * 2)
++static struct scif_pollepd poll_eps[IBSCIF_MAX_POLL_COUNT];
++static struct ibscif_dev *poll_devs[IBSCIF_MAX_POLL_COUNT];
++static int poll_types[IBSCIF_MAX_POLL_COUNT];
++static struct ibscif_conn *poll_conns[IBSCIF_MAX_POLL_COUNT];
++static struct task_struct *poll_thread = NULL;
++static atomic_t poll_eps_changed = ATOMIC_INIT(0);
++static volatile int poll_thread_running = 0;
++
++void ibscif_refresh_pollep_list( void )
++{
++ atomic_set(&poll_eps_changed, 1);
++}
++
++int ibscif_poll_thread( void *unused )
++{
++ int poll_count = 0;
++ int ret;
++ int i;
++ int busy;
++ int idle_count = 0;
++
++ poll_thread_running = 1;
++ while (!kthread_should_stop()) {
++ if (atomic_xchg(&poll_eps_changed, 0)) {
++ poll_count = IBSCIF_MAX_POLL_COUNT;
++ ibscif_get_pollep_list( poll_eps, poll_devs, poll_types, poll_conns, &poll_count );
++ }
++
++ if (poll_count == 0) {
++ schedule();
++ continue;
++ }
++
++ ret = scif_poll(poll_eps, poll_count, 1000); /* 1s timeout */
++
++ busy = 0;
++ if (ret > 0) {
++ for (i=0; i<poll_count; i++) {
++ if (poll_eps[i].revents & POLLIN) {
++ if (poll_types[i] == IBSCIF_EP_TYPE_LISTEN) {
++ ibscif_do_accept( poll_devs[i] );
++ busy = 1;
++ }
++ else {
++ ibscif_do_recv( poll_devs[i], poll_eps[i].epd, poll_conns[i] );
++ busy = 1;
++ }
++ }
++ else if (poll_eps[i].revents & POLLERR) {
++ if (verbose)
++ printk(KERN_INFO PFX "%s: ep error, conn=%p.\n", __func__, poll_conns[i]);
++ ibscif_remove_ep( poll_devs[i], poll_eps[i].epd );
++ ibscif_refresh_pollep_list();
++ /* in most the case, the error is caused by ep being already closed */
++ busy = 1;
++ }
++ else if (poll_eps[i].revents & POLLHUP) {
++ struct ibscif_conn *conn = poll_conns[i];
++ if (verbose)
++ printk(KERN_INFO PFX "%s: ep disconnected by peer.\n", __func__);
++ ibscif_remove_ep( poll_devs[i], poll_eps[i].epd );
++ ibscif_refresh_pollep_list();
++ if (conn) {
++ if (verbose)
++ printk(KERN_INFO PFX "%s: conn=%p, local_close=%d.\n", __func__, conn, conn->local_close);
++ conn->remote_close = 1;
++ if (conn->local_close) {
++ ibscif_free_conn(conn);
++ }
++ }
++ busy = 1;
++ }
++ }
++ }
++
++ if (busy) {
++ idle_count = 0;
++ }
++ else {
++ idle_count++;
++ /* close unused endpoint after 60 seconds */
++ if (idle_count == 60) {
++ if (ibscif_cleanup_idle_conn())
++ ibscif_refresh_pollep_list();
++ idle_count = 0;
++ }
++ /* pick up the unprocessed items in the xmit queue */
++ if (!skb_queue_empty(&xmit_queue))
++ ibscif_dev_queue_xmit(NULL);
++ schedule();
++ }
++ }
++
++ poll_thread_running = 0;
++ return 0;
++}
++
++void ibscif_protocol_init_pre(void)
++{
++ skb_queue_head_init(&xmit_queue);
++}
++
++void ibscif_protocol_init_post(void)
++{
++ poll_thread = kthread_run( ibscif_poll_thread, NULL, "ibscif_polld" );
++}
++
++void ibscif_protocol_cleanup(void)
++{
++ kthread_stop( poll_thread );
++
++ while (poll_thread_running)
++ schedule();
++}
+diff --git a/drivers/infiniband/hw/scif/ibscif_protocol.h b/drivers/infiniband/hw/scif/ibscif_protocol.h
+new file mode 100644
+index 0000000..3ce5763
+--- /dev/null
++++ b/drivers/infiniband/hw/scif/ibscif_protocol.h
+@@ -0,0 +1,395 @@
++/*
++ * Copyright (c) 2008 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#ifndef IBSCIF_PROTOCOL_H
++#define IBSCIF_PROTOCOL_H
++
++/*
++ * Protocol EtherType
++ */
++#define IBSCIF_PACKET_TYPE 0x8086
++
++/*
++ * Base protocol header version
++ */
++#define IBSCIF_PROTOCOL_VER_1 1
++#define IBSCIF_PROTOCOL_VER IBSCIF_PROTOCOL_VER_1
++
++/*
++ * Protocol opcode values - All other values are reserved.
++ */
++#define ibscif_last_flag 0x4000
++#define ibscif_immed_flag 0x2000
++#define ibscif_se_flag 0x1000
++#define ibscif_force_ack_flag 0x0800
++#define ibscif_iq_flag 0x0400
++
++#define ibscif_op_send 0
++#define ibscif_op_send_last (ibscif_op_send | ibscif_last_flag)
++#define ibscif_op_send_last_se (ibscif_op_send | ibscif_last_flag | ibscif_se_flag)
++#define ibscif_op_send_immed (ibscif_op_send | ibscif_immed_flag)
++#define ibscif_op_send_immed_se (ibscif_op_send | ibscif_immed_flag | ibscif_se_flag)
++
++#define ibscif_op_write 1
++#define ibscif_op_write_last (ibscif_op_write | ibscif_last_flag)
++#define ibscif_op_write_immed (ibscif_op_write | ibscif_immed_flag)
++#define ibscif_op_write_immed_se (ibscif_op_write | ibscif_immed_flag | ibscif_se_flag)
++
++#define ibscif_op_read 2
++#define ibscif_op_read_rsp (ibscif_op_read | ibscif_iq_flag)
++#define ibscif_op_read_rsp_last (ibscif_op_read_rsp | ibscif_last_flag)
++
++#define ibscif_op_comp_swap 3
++#define ibscif_op_comp_swap_rsp (ibscif_op_comp_swap | ibscif_iq_flag)
++
++#define ibscif_op_fetch_add 4
++#define ibscif_op_fetch_add_rsp (ibscif_op_fetch_add | ibscif_iq_flag)
++
++#define ibscif_op_ack 5
++#define ibscif_op_disconnect 6
++
++#define ibscif_op_send_rma 7
++#define ibscif_op_send_rma_se (ibscif_op_send_rma | ibscif_se_flag)
++#define ibscif_op_send_rma_immed (ibscif_op_send_rma | ibscif_immed_flag)
++#define ibscif_op_send_rma_immed_se (ibscif_op_send_rma | ibscif_immed_flag | ibscif_se_flag)
++
++#define ibscif_op_write_rma 8
++#define ibscif_op_write_rma_immed (ibscif_op_write_rma | ibscif_immed_flag)
++#define ibscif_op_write_rma_immed_se (ibscif_op_write_rma | ibscif_immed_flag | ibscif_se_flag)
++
++#define ibscif_op_read_rma 9
++#define ibscif_op_rma_rsp (10 | ibscif_iq_flag)
++
++#define ibscif_op_reg 11
++#define ibscif_op_dereg 12
++
++#define ibscif_op_close 13
++#define ibscif_op_reopen 14
++
++#define ibscif_op_ud 15
++#define ibscif_op_cm 16
++
++#define ibscif_pdu_is_last(op) (op & ibscif_last_flag)
++#define ibscif_pdu_is_immed(op) (op & ibscif_immed_flag)
++#define ibscif_pdu_is_se(op) (op & ibscif_se_flag)
++#define ibscif_pdu_is_force_ack(op) (op & ibscif_force_ack_flag)
++#define ibscif_pdu_is_iq(op) (op & ibscif_iq_flag)
++
++#define ibscif_pdu_set_last(op) (op | ibscif_last_flag)
++#define ibscif_pdu_set_immed(op) (op | ibscif_immed_flag)
++#define ibscif_pdu_set_se(op) (op | ibscif_se_flag)
++#define ibscif_pdu_set_force_ack(op) (op | ibscif_force_ack_flag)
++#define ibscif_pdu_set_iq(op) (op | ibscif_iq_flag)
++
++#define ibscif_pdu_base_type(op) \
++ (op & ~(ibscif_last_flag | \
++ ibscif_se_flag | \
++ ibscif_immed_flag | \
++ ibscif_force_ack_flag))
++
++/*
++ * Remote address descriptor for SCIF RMA operations
++ */
++struct rma_addr {
++ __be64 offset;
++ __be32 length;
++ __be32 reserved;
++} __attribute__ ((packed));
++
++/*
++ * Base header present in every packet
++ */
++struct base_hdr {
++ __be16 opcode;
++ __be16 length;
++ __be32 dst_qp;
++ __be32 src_qp;
++ __be32 seq_num;
++ __be32 sq_ack_num;
++ __be32 iq_ack_num;
++ __be16 hdr_size;
++ __be16 reserved[3];
++} __attribute__ ((packed));
++
++/*
++ * UD Header
++ */
++struct ud_hdr {
++ struct base_hdr hdr;
++ __be32 msg_id;
++ __be32 msg_length;
++ __be32 msg_offset;
++ u8 grh[40];
++} __attribute__ ((packed));
++
++/*
++ * Send Header
++ */
++struct send_hdr {
++ struct base_hdr hdr;
++ __be32 msg_id;
++ __be32 msg_length;
++ __be32 msg_offset;
++ __be32 immed_data;
++ __be32 rma_id; /* RMA */
++ __be32 num_rma_addrs; /* RMA */
++ struct rma_addr rma_addrs[0]; /* RMA */
++} __attribute__ ((packed));
++
++/*
++ * RDMA Write Header
++ */
++struct write_hdr {
++ struct base_hdr hdr;
++ __be64 rdma_address;
++ __be32 rdma_key;
++ __be32 immed_data;
++ __be32 msg_id;
++ __be32 rma_length; /* RMA */
++ __be32 rma_id; /* RMA */
++ __be32 num_rma_addrs; /* RMA */
++ struct rma_addr rma_addrs[0]; /* RMA */
++} __attribute__ ((packed));
++
++/*
++ * RDMA Read Request Header
++ */
++struct read_req_hdr {
++ struct base_hdr hdr;
++ __be64 rdma_address;
++ __be32 rdma_key;
++ __be32 rdma_length; /* shared with RMA */
++ __be32 rdma_id; /* shared with RMA */
++ __be32 num_rma_addrs; /* RMA */
++ struct rma_addr rma_addrs[0]; /* RMA */
++} __attribute__ ((packed));
++
++/*
++ * RDMA Read Response Header
++ */
++struct read_rsp_hdr {
++ struct base_hdr hdr;
++ __be32 rdma_offset;
++ __be32 rdma_id;
++} __attribute__ ((packed));
++
++
++/*
++ * Atomic Compare and Swap Header
++ */
++struct comp_swap_hdr {
++ struct base_hdr hdr;
++ __be64 atomic_address;
++ __be64 comp_data;
++ __be64 swap_data;
++ __be32 atomic_key;
++ __be32 atomic_id;
++ /* no pad needed */
++} __attribute__ ((packed));
++
++
++/*
++ * Atomic Fetch/Add Header
++ */
++struct fetch_add_hdr {
++ struct base_hdr hdr;
++ __be64 atomic_address;
++ __be64 add_data;
++ __be32 atomic_key;
++ __be32 atomic_id;
++ /* no pad needed */
++} __attribute__ ((packed));
++
++/*
++ * Atomic Response Header
++ */
++struct atomic_rsp_hdr {
++ struct base_hdr hdr;
++ __be64 orig_data;
++ __be32 atomic_id;
++} __attribute__ ((packed));
++
++/*
++ * ACK Header
++ */
++struct ack_hdr {
++ struct base_hdr hdr;
++} __attribute__ ((packed));
++
++/*
++ * Disconnect Header
++ */
++struct disconnect_hdr {
++ struct base_hdr hdr;
++ __be32 reason;
++} __attribute__ ((packed));
++
++/*
++ * RMA Response Header
++ */
++struct rma_rsp_hdr {
++ struct base_hdr hdr;
++ __be32 rma_id;
++ __be32 xfer_length;
++ __be32 error;
++} __attribute__ ((packed));
++
++/*
++ * MR Reg/Dereg Info Header
++ */
++struct reg_hdr {
++ struct base_hdr hdr;
++ __be64 scif_offset;
++ __be64 address;
++ __be32 length;
++ __be32 rkey;
++ __be32 access;
++} __attribute__ ((packed));
++
++/*
++ * SCIF endpoint close notiffication
++ */
++struct close_hdr {
++ struct base_hdr hdr;
++} __attribute__ ((packed));
++
++
++#define IBSCIF_CM_REQ 1
++#define IBSCIF_CM_REP 2
++#define IBSCIF_CM_REJ 3
++#define IBSCIF_CM_RTU 4
++
++/*
++ * RDMA CM Header
++ */
++
++struct cm_hdr {
++ struct base_hdr hdr;
++ __be64 req_ctx;
++ __be64 rep_ctx;
++ __be32 cmd;
++ __be32 port;
++ __be32 qpn;
++ __be32 status;
++ __be32 plen;
++ u8 pdata[0];
++} __attribute__ ((packed));
++
++enum ibscif_reason { /* Set each value to simplify manual lookup. */
++
++ /* Local Events */
++ IBSCIF_REASON_USER_GENERATED = 0,
++ IBSCIF_REASON_CQ_COMPLETION = 1,
++ IBSCIF_REASON_NIC_FATAL = 2,
++ IBSCIF_REASON_NIC_REMOVED = 3,
++
++ /* Disconnect Event */
++ IBSCIF_REASON_DISCONNECT = 4,
++
++ /* CQ Error */
++ IBSCIF_REASON_CQ_OVERRUN = 5,
++ IBSCIF_REASON_CQ_FATAL = 6,
++
++ /* QP Errors */
++ IBSCIF_REASON_QP_SQ_ERROR = 7,
++ IBSCIF_REASON_QP_RQ_ERROR = 8,
++ IBSCIF_REASON_QP_DESTROYED = 9,
++ IBSCIF_REASON_QP_ERROR = 10,
++ IBSCIF_REASON_QP_FATAL = 11,
++
++ /* Operation Errors */
++ IBSCIF_REASON_INVALID_OPCODE = 12,
++ IBSCIF_REASON_INVALID_LENGTH = 13,
++ IBSCIF_REASON_INVALID_QP = 14,
++ IBSCIF_REASON_INVALID_MSG_ID = 15,
++ IBSCIF_REASON_INVALID_LKEY = 16,
++ IBSCIF_REASON_INVALID_RDMA_RKEY = 17,
++ IBSCIF_REASON_INVALID_RDMA_ID = 18,
++ IBSCIF_REASON_INVALID_ATOMIC_RKEY = 19,
++ IBSCIF_REASON_INVALID_ATOMIC_ID = 20,
++ IBSCIF_REASON_MAX_IR_EXCEEDED = 21,
++ IBSCIF_REASON_ACK_TIMEOUT = 22,
++
++ /* Protection Errors */
++ IBSCIF_REASON_PROTECTION_VIOLATION = 23,
++ IBSCIF_REASON_BOUNDS_VIOLATION = 24,
++ IBSCIF_REASON_ACCESS_VIOLATION = 25,
++ IBSCIF_REASON_WRAP_ERROR = 26
++};
++
++union ibscif_pdu {
++ struct base_hdr hdr;
++ struct ud_hdr ud;
++ struct send_hdr send;
++ struct write_hdr write;
++ struct read_req_hdr read_req;
++ struct read_rsp_hdr read_rsp;
++ struct comp_swap_hdr comp_swap;
++ struct fetch_add_hdr fetch_add;
++ struct atomic_rsp_hdr atomic_rsp;
++ struct ack_hdr ack;
++ struct disconnect_hdr disconnect;
++ struct rma_rsp_hdr rma_rsp;
++ struct reg_hdr reg;
++ struct close_hdr close;
++ struct cm_hdr cm;
++};
++
++struct ibscif_full_frame {
++ union ibscif_pdu ibscif;
++};
++
++static inline int seq_before(u32 seq1, u32 seq2)
++{
++ return (s32)(seq1 - seq2) < 0;
++}
++
++static inline int seq_after(u32 seq1, u32 seq2)
++{
++ return (s32)(seq2 - seq1) < 0;
++}
++
++static inline int seq_between(u32 seq_target, u32 seq_low, u32 seq_high)
++{
++ return seq_high - seq_low >= seq_target - seq_low;
++}
++
++static inline u32 seq_window(u32 earlier, u32 later)
++{
++ return earlier > later ? ((u32)~0 - earlier) + later : later - earlier;
++}
++
++#define ibscif_tx_unacked_window(tx) seq_window((tx)->last_ack_seq_recvd, (tx)->next_seq - 1)
++
++#define ibscif_rx_window(rx) seq_window((rx)->last_seq_acked, (rx)->last_in_seq)
++
++#define ibscif_tx_window(tx) ((u32)window_size - ibscif_tx_unacked_window(tx))
++
++#endif /* IBSCIF_PROTOCOL_H */
+diff --git a/drivers/infiniband/hw/scif/ibscif_provider.c b/drivers/infiniband/hw/scif/ibscif_provider.c
+new file mode 100644
+index 0000000..9954532
+--- /dev/null
++++ b/drivers/infiniband/hw/scif/ibscif_provider.c
+@@ -0,0 +1,424 @@
++/*
++ * Copyright (c) 2008 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include "ibscif_driver.h"
++
++static int ibscif_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
++ struct ib_udata *udata)
++{
++ memset(attr, 0, sizeof *attr);
++
++ attr->vendor_id = VENDOR_ID;
++ attr->vendor_part_id = DEVICE_ID;
++ attr->hw_ver = HW_REV;
++ attr->fw_ver = FW_REV;
++ attr->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT;
++ attr->max_mr_size = MAX_MR_SIZE;
++ attr->page_size_cap = PAGE_SIZE;
++ attr->max_qp = MAX_QPS;
++ attr->max_qp_wr = MAX_QP_SIZE;
++ attr->max_sge = MAX_SGES;
++ attr->max_cq = MAX_CQS;
++ attr->max_cqe = MAX_CQ_SIZE;
++ attr->max_mr = MAX_MRS;
++ attr->max_pd = MAX_PDS;
++ attr->max_qp_rd_atom = MAX_IR>255 ? 255 : MAX_IR;
++ attr->max_qp_init_rd_atom = MAX_OR>255 ? 255 : MAX_OR;
++ attr->max_res_rd_atom = MAX_IR>255 ? 255 : MAX_IR;
++ attr->atomic_cap = IB_ATOMIC_HCA;
++ attr->sys_image_guid = ibdev->node_guid;
++
++ return 0;
++}
++
++static int ibscif_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
++{
++ struct ibscif_dev *dev = to_dev(ibdev);
++
++ memset(attr, 0, sizeof *attr);
++
++ /* See IB Spec r1.2 Table 145 for physical port state values. */
++ attr->lid = IBSCIF_NODE_ID_TO_LID(dev->node_id);
++ attr->sm_lid = 1;
++ attr->gid_tbl_len = 1;
++ attr->pkey_tbl_len = 1;
++ attr->max_msg_sz = MAX_MR_SIZE;
++ attr->phys_state = 5; /* LinkUp */
++ attr->state = IB_PORT_ACTIVE;
++ attr->max_mtu = IB_MTU_4096;
++ attr->active_mtu = IB_MTU_4096;
++ attr->active_width = IB_WIDTH_4X;
++ attr->active_speed = 4;
++ attr->max_vl_num = 1;
++ attr->port_cap_flags = IB_PORT_SM_DISABLED;
++
++ return 0;
++}
++
++static int ibscif_port_immutable(struct ib_device *ibdev, u8 port_num,
++ struct ib_port_immutable *immutable)
++{
++ struct ib_port_attr attr;
++ int err;
++
++ err = ibscif_query_port(ibdev, port_num, &attr);
++ if (err)
++ return err;
++
++ immutable->pkey_tbl_len = attr.pkey_tbl_len;
++ immutable->gid_tbl_len = attr.gid_tbl_len;
++ immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
++
++ return 0;
++}
++
++
++static int ibscif_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
++{
++ *pkey = 0xffff; /* IB_DEFAULT_PKEY_FULL */
++ return 0;
++}
++
++static int ibscif_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *ibgid)
++{
++ struct ibscif_dev *dev = to_dev(ibdev);
++
++ memcpy(ibgid, &dev->gid, sizeof(*ibgid));
++ return 0;
++}
++
++static struct ib_ucontext *ibscif_alloc_ucontext(struct ib_device *ibdev, struct ib_udata *udata)
++{
++ struct ib_ucontext *context = kzalloc(sizeof *context, GFP_KERNEL);
++ return (!context) ? ERR_PTR(-ENOMEM) : context;
++}
++
++static int ibscif_dealloc_ucontext(struct ib_ucontext *context)
++{
++ kfree(context);
++ return 0;
++}
++
++static void ibscif_generate_eui64(struct ibscif_dev *dev, u8 *eui64)
++{
++ memcpy(eui64, dev->netdev->dev_addr, 3);
++ eui64[3] = 0xFF;
++ eui64[4] = 0xFE;
++ memcpy(eui64+5, dev->netdev->dev_addr+3, 3);
++}
++
++static int ibscif_register_device(struct ibscif_dev *dev)
++{
++ strncpy(dev->ibdev.node_desc, DRV_SIGNON, sizeof dev->ibdev.node_desc);
++ ibscif_generate_eui64(dev, (u8 *)&dev->ibdev.node_guid);
++ dev->ibdev.owner = THIS_MODULE;
++ dev->ibdev.uverbs_abi_ver = UVERBS_ABI_VER;
++ dev->ibdev.uverbs_cmd_mask =
++ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
++ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
++ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
++ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
++ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
++ (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
++ (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
++ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
++ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
++ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
++ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
++ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
++ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
++ (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
++ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
++ (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
++ (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
++ (1ull << IB_USER_VERBS_CMD_REG_MR) |
++ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
++ (1ull << IB_USER_VERBS_CMD_POST_SEND) |
++ (1ull << IB_USER_VERBS_CMD_POST_RECV);
++
++ dev->ibdev.node_type = new_ib_type ? RDMA_NODE_MIC : RDMA_NODE_RNIC;
++ dev->ibdev.phys_port_cnt = 1;
++ dev->ibdev.query_device = ibscif_query_device; // Mandatory
++ dev->ibdev.num_comp_vectors = 1; // Mandatory
++ dev->ibdev.query_port = ibscif_query_port; // Mandatory
++ dev->ibdev.query_pkey = ibscif_query_pkey; // Mandatory
++ dev->ibdev.query_gid = ibscif_query_gid; // Mandatory
++ dev->ibdev.alloc_ucontext = ibscif_alloc_ucontext; // Required
++ dev->ibdev.dealloc_ucontext = ibscif_dealloc_ucontext; // Required
++ dev->ibdev.alloc_pd = ibscif_alloc_pd; // Mandatory
++ dev->ibdev.dealloc_pd = ibscif_dealloc_pd; // Mandatory
++ dev->ibdev.create_ah = ibscif_create_ah; // Mandatory
++ dev->ibdev.destroy_ah = ibscif_destroy_ah; // Mandatory
++ dev->ibdev.create_qp = ibscif_create_qp; // Mandatory
++ dev->ibdev.query_qp = ibscif_query_qp; // Optional
++ dev->ibdev.modify_qp = ibscif_modify_qp; // Mandatory
++ dev->ibdev.destroy_qp = ibscif_destroy_qp; // Mandatory
++ dev->ibdev.create_cq = ibscif_create_cq; // Mandatory
++ dev->ibdev.resize_cq = ibscif_resize_cq; // Optional
++ dev->ibdev.destroy_cq = ibscif_destroy_cq; // Mandatory
++ dev->ibdev.poll_cq = ibscif_poll_cq; // Mandatory
++ dev->ibdev.req_notify_cq = ibscif_arm_cq; // Mandatory
++ dev->ibdev.get_dma_mr = ibscif_get_dma_mr; // Mandatory
++ dev->ibdev.reg_user_mr = ibscif_reg_user_mr; // Required
++ dev->ibdev.dereg_mr = ibscif_dereg_mr; // Mandatory
++ dev->ibdev.post_send = ibscif_post_send; // Mandatory
++ dev->ibdev.post_recv = ibscif_post_receive; // Mandatory
++ dev->ibdev.dma_ops = &ibscif_dma_mapping_ops; // ??
++ dev->ibdev.get_port_immutable = &ibscif_port_immutable; // Mandatory
++
++ dev->ibdev.iwcm = kzalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
++ if (!dev->ibdev.iwcm)
++ return -ENOMEM;
++
++ dev->ibdev.iwcm->connect = ibscif_cm_connect;
++ dev->ibdev.iwcm->accept = ibscif_cm_accept;
++ dev->ibdev.iwcm->reject = ibscif_cm_reject;
++ dev->ibdev.iwcm->create_listen = ibscif_cm_create_listen;
++ dev->ibdev.iwcm->destroy_listen = ibscif_cm_destroy_listen;
++ dev->ibdev.iwcm->add_ref = ibscif_cm_add_ref;
++ dev->ibdev.iwcm->rem_ref = ibscif_cm_rem_ref;
++ dev->ibdev.iwcm->get_qp = ibscif_cm_get_qp;
++
++ return ib_register_device(&dev->ibdev, NULL);
++}
++
++static void ibscif_dev_release(struct device *dev)
++{
++ kfree(dev);
++}
++
++/*
++ * Hold devlist_mutex during this call for synchronization as needed.
++ * Upon return, dev is invalid.
++ */
++static void ibscif_remove_dev(struct ibscif_dev *dev)
++{
++ struct ibscif_conn *conn, *next;
++
++ if (dev->ibdev.reg_state == IB_DEV_REGISTERED)
++ ib_unregister_device(&dev->ibdev);
++
++ WARN_ON(!list_empty(&dev->wq_list));
++
++ down(&devlist_mutex);
++ list_del(&dev->entry);
++ up(&devlist_mutex);
++
++ ibscif_refresh_pollep_list();
++
++ down(&dev->mutex);
++ list_for_each_entry_safe(conn, next, &dev->conn_list, entry) {
++ scif_close(conn->ep);
++ list_del(&conn->entry);
++ kfree(conn);
++ }
++ up(&dev->mutex);
++
++ if (dev->listen_ep)
++ scif_close(dev->listen_ep);
++ ibscif_procfs_remove_dev(dev);
++
++ dev_put(dev->netdev);
++ device_unregister(dev->ibdev.dma_device);
++ ib_dealloc_device(&dev->ibdev);
++}
++
++static void ibscif_remove_one(struct net_device *netdev)
++{
++ struct ibscif_dev *dev, *next;
++
++ list_for_each_entry_safe(dev, next, &devlist, entry) {
++ if (netdev == dev->netdev) {
++ ibscif_remove_dev(dev);
++ break;
++ }
++ }
++}
++
++static int node_cnt;
++static uint16_t node_ids[IBSCIF_MAX_DEVICES];
++static uint16_t my_node_id;
++
++static void ibscif_add_one(struct net_device *netdev)
++{
++ static int dev_cnt;
++ static dma_addr_t dma_mask = -1;
++ struct ibscif_dev *dev;
++ int ret;
++
++ dev = (struct ibscif_dev *)ib_alloc_device(sizeof *dev);
++ if (!dev) {
++ printk(KERN_ALERT PFX "%s: fail to allocate ib_device\n", __func__);
++ return;
++ }
++
++ INIT_LIST_HEAD(&dev->conn_list);
++ INIT_LIST_HEAD(&dev->mr_list);
++ init_MUTEX(&dev->mr_list_mutex);
++ init_MUTEX(&dev->mutex);
++ spin_lock_init(&dev->atomic_op);
++ INIT_LIST_HEAD(&dev->wq_list);
++ atomic_set(&dev->available, 256); /* FIXME */
++
++ dev_hold(netdev);
++ dev->netdev = netdev;
++
++ /* use the MAC address of the netdev as the GID so that RDMA CM can
++ * find the ibdev from the IP address associated with the netdev.
++ */
++ memcpy(&dev->gid, dev->netdev->dev_addr, ETH_ALEN);
++
++ dev->ibdev.dma_device = kzalloc(sizeof *dev->ibdev.dma_device, GFP_KERNEL);
++ if (!dev->ibdev.dma_device) {
++ printk(KERN_ALERT PFX "%s: fail to allocate dma_device\n", __func__);
++ goto out_free_ibdev;
++ }
++
++ snprintf(dev->name, IBSCIF_NAME_SIZE, "scif_dma_%d", dev_cnt);
++ snprintf(dev->ibdev.name, IB_DEVICE_NAME_MAX, "scif%d", dev_cnt++);
++ dev->ibdev.dma_device->release = ibscif_dev_release;
++ dev->ibdev.dma_device->init_name = dev->name;
++ dev->ibdev.dma_device->dma_mask = &dma_mask;
++ ret = device_register(dev->ibdev.dma_device);
++ if (ret) {
++ printk(KERN_ALERT PFX "%s: fail to register dma_device, ret=%d\n", __func__, ret);
++ kfree(dev->ibdev.dma_device);
++ goto out_free_ibdev;
++ }
++
++ /* Notice: set up listen ep before inserting to devlist */
++
++ dev->listen_ep = scif_open();
++ if (!dev->listen_ep || IS_ERR(dev->listen_ep)) {
++ printk(KERN_ALERT PFX "%s: scif_open returns %ld\n", __func__, PTR_ERR(dev->listen_ep));
++ goto out_unreg_dmadev ;
++ }
++
++ ret = scif_get_node_ids(node_ids, IBSCIF_MAX_DEVICES, &my_node_id);
++ if (ret < 0) {
++ printk(KERN_ALERT PFX "%s: scif_get_node_ids returns %d\n",
++ __func__, ret);
++ goto out_close_ep;
++ }
++
++ node_cnt = ret;
++ dev->node_id = my_node_id;
++ printk(KERN_ALERT PFX "%s: my node_id is %d\n", __func__, dev->node_id);
++
++ ret = scif_bind(dev->listen_ep, SCIF_OFED_PORT_0);
++ if (ret < 0) {
++ printk(KERN_ALERT PFX "%s: scif_bind returns %d, port=%d\n",
++ __func__, ret, SCIF_OFED_PORT_0);
++ goto out_close_ep;
++ }
++
++ ret = scif_listen(dev->listen_ep, IBSCIF_MAX_DEVICES);
++ if (ret < 0) {
++ printk(KERN_ALERT PFX "%s: scif_listen returns %d\n", __func__, ret);
++ goto out_close_ep;
++ }
++
++ down(&devlist_mutex);
++ list_add_tail(&dev->entry, &devlist);
++ up(&devlist_mutex);
++
++ if (ibscif_register_device(dev))
++ ibscif_remove_dev(dev);
++ else
++ ibscif_procfs_add_dev(dev);
++
++ ibscif_refresh_pollep_list();
++
++ return;
++
++out_close_ep:
++ scif_close(dev->listen_ep);
++
++out_unreg_dmadev:
++ device_unregister(dev->ibdev.dma_device); /* it will free the memory, too */
++
++out_free_ibdev:
++ ib_dealloc_device(&dev->ibdev);
++}
++
++static int ibscif_notifier(struct notifier_block *nb, unsigned long event, void *ptr)
++{
++ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
++ u16 self;
++
++ /* Check for "eth0" on knl-lb, "mic0" on host */
++ scif_get_node_ids(NULL, 0, &self);
++ if (strcmp(netdev->name, self ? "eth0" : "mic0"))
++ return NOTIFY_DONE;
++
++ switch(event) {
++ case NETDEV_REGISTER:
++ ibscif_add_one(netdev);
++ ibscif_protocol_init_post();
++ break;
++
++ case NETDEV_UNREGISTER:
++ ibscif_remove_one(netdev);
++ break;
++
++ default:
++ /* we only care about the MAC address, ignore other notifications */
++ break;
++ }
++
++ return NOTIFY_DONE;
++}
++
++static struct notifier_block ibscif_notifier_block = {
++ .notifier_call = ibscif_notifier,
++};
++
++int ibscif_dev_init(void)
++{
++ int err = 0;
++
++ ibscif_protocol_init_pre();
++
++ err = register_netdevice_notifier(&ibscif_notifier_block);
++ if (err)
++ ibscif_protocol_cleanup();
++
++ return err;
++}
++
++void ibscif_dev_cleanup(void)
++{
++ struct ibscif_dev *dev, *next;
++
++ ibscif_protocol_cleanup();
++ unregister_netdevice_notifier(&ibscif_notifier_block);
++ list_for_each_entry_safe(dev, next, &devlist, entry)
++ ibscif_remove_dev(dev);
++}
+diff --git a/drivers/infiniband/hw/scif/ibscif_qp.c b/drivers/infiniband/hw/scif/ibscif_qp.c
+new file mode 100644
+index 0000000..aeb8937
+--- /dev/null
++++ b/drivers/infiniband/hw/scif/ibscif_qp.c
+@@ -0,0 +1,872 @@
++/*
++ * Copyright (c) 2008 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include <linux/vmalloc.h>
++
++#include "ibscif_driver.h"
++
++static struct ibscif_wr *ibscif_alloc_wr(struct ibscif_wq *wq, int new_size, int bytes)
++{
++ if (new_size && (new_size != wq->size)) {
++ struct ibscif_wr *new_wr = vzalloc(bytes);
++ return new_wr ? new_wr : ERR_PTR(-ENOMEM);
++ }
++ return NULL;
++}
++
++static void ibscif_move_wr(struct ibscif_wq *wq, struct ibscif_wr *new_wr, int new_size)
++{
++ int i;
++
++ if (wq->size == new_size)
++ return;
++
++ for (i = 0; i < wq->depth; i++) {
++ memcpy(&new_wr[i], &wq->wr[wq->head], wq->wr_size);
++ wq->head = (wq->head + 1) % wq->size;
++ }
++
++ if (wq->wr) {
++ vfree(wq->wr);
++ }
++
++ wq->wr = new_wr;
++ wq->head = 0;
++ wq->tail = wq->depth;
++ wq->size = new_size;
++}
++
++/* Caller must provide proper synchronization. */
++static int ibscif_resize_qp(struct ibscif_qp *qp, int sq_size, int rq_size, int iq_size)
++{
++ struct ibscif_wr *new_sq, *new_rq, *new_iq;
++ int sq_bytes, rq_bytes, iq_bytes;
++ int old_npages, new_npages, err;
++
++ sq_bytes = PAGE_ALIGN(sq_size * qp->sq.wr_size);
++ rq_bytes = PAGE_ALIGN(rq_size * qp->rq.wr_size);
++ iq_bytes = PAGE_ALIGN(iq_size * qp->iq.wr_size);
++
++ sq_size = sq_bytes / qp->sq.wr_size;
++ rq_size = rq_bytes / qp->rq.wr_size;
++ iq_size = iq_bytes / qp->iq.wr_size;
++
++ if ((sq_size == qp->sq.size) &&
++ (rq_size == qp->rq.size) &&
++ (iq_size == qp->iq.size))
++ return 0;
++
++ if ((sq_size < qp->sq.depth) ||
++ (rq_size < qp->rq.depth) ||
++ (iq_size < qp->iq.depth))
++ return -EINVAL;
++
++ /* Calculate the number of new pages required for this allocation. */
++ new_npages = (sq_bytes + rq_bytes + iq_bytes) >> PAGE_SHIFT;
++ old_npages = (PAGE_ALIGN(qp->sq.size * qp->sq.wr_size) +
++ PAGE_ALIGN(qp->rq.size * qp->rq.wr_size) +
++ PAGE_ALIGN(qp->iq.size * qp->iq.wr_size)) >> PAGE_SHIFT;
++ new_npages -= old_npages;
++
++ if (new_npages > 0) {
++ err = ibscif_reserve_quota(&new_npages);
++ if (err)
++ return err;
++ }
++
++ new_sq = ibscif_alloc_wr(&qp->sq, sq_size, sq_bytes);
++ new_rq = ibscif_alloc_wr(&qp->rq, rq_size, rq_bytes);
++ new_iq = ibscif_alloc_wr(&qp->iq, iq_size, iq_bytes);
++ if (IS_ERR(new_sq) || IS_ERR(new_rq) || IS_ERR(new_iq))
++ goto out;
++
++ ibscif_move_wr(&qp->sq, new_sq, sq_size);
++ ibscif_move_wr(&qp->rq, new_rq, rq_size);
++ ibscif_move_wr(&qp->iq, new_iq, iq_size);
++
++ if (new_npages < 0)
++ ibscif_release_quota(-new_npages);
++
++ return 0;
++out:
++ if (new_sq && !IS_ERR(new_sq))
++ vfree(new_sq);
++ if (new_rq && !IS_ERR(new_rq))
++ vfree(new_rq);
++ if (new_iq && !IS_ERR(new_iq))
++ vfree(new_iq);
++
++ return -ENOMEM;
++}
++
++static int ibscif_init_wqs(struct ibscif_qp *qp, struct ib_qp_init_attr *attr)
++{
++ spin_lock_init(&qp->sq.lock);
++ spin_lock_init(&qp->rq.lock);
++ spin_lock_init(&qp->iq.lock);
++
++ qp->sq.qp = qp;
++ qp->rq.qp = qp;
++ qp->iq.qp = qp;
++
++ qp->sq.wirestate = &qp->wire.sq;
++ qp->iq.wirestate = &qp->wire.iq;
++
++ qp->sq.max_sge = attr->cap.max_send_sge;
++ qp->rq.max_sge = attr->cap.max_recv_sge;
++ qp->iq.max_sge = 1;
++
++ qp->sq.wr_size = sizeof *qp->sq.wr + (sizeof *qp->sq.wr->ds_list * qp->sq.max_sge);
++ qp->rq.wr_size = sizeof *qp->rq.wr + (sizeof *qp->rq.wr->ds_list * qp->rq.max_sge);
++ qp->iq.wr_size = sizeof *qp->iq.wr + (sizeof *qp->iq.wr->ds_list * qp->iq.max_sge);
++
++ return ibscif_resize_qp(qp, attr->cap.max_send_wr, attr->cap.max_recv_wr, (rma_threshold==0x7FFFFFFF)?0:attr->cap.max_send_wr);
++}
++
++static void ibscif_reset_tx_state(struct ibscif_tx_state *tx)
++{
++ tx->next_seq = 1;
++ tx->last_ack_seq_recvd = 0;
++ tx->next_msg_id = 0;
++}
++
++static void ibscif_reset_rx_state(struct ibscif_rx_state *rx)
++{
++ rx->last_in_seq = 0;
++ rx->last_seq_acked = 0;
++ rx->defer_in_process = 0;
++}
++
++static void ibscif_reset_wirestate(struct ibscif_wirestate *wirestate)
++{
++ ibscif_reset_tx_state(&wirestate->tx);
++ ibscif_reset_rx_state(&wirestate->rx);
++}
++
++static void ibscif_reset_wire(struct ibscif_wire *wire)
++{
++ ibscif_reset_wirestate(&wire->sq);
++ ibscif_reset_wirestate(&wire->iq);
++}
++
++static void ibscif_init_wire(struct ibscif_wire *wire)
++{
++ ibscif_reset_wire(wire);
++}
++
++static void ibscif_query_qp_cap(struct ibscif_qp *qp, struct ib_qp_cap *cap)
++{
++ memset(cap, 0, sizeof *cap);
++ cap->max_send_wr = qp->sq.size;
++ cap->max_recv_wr = qp->rq.size;
++ cap->max_send_sge = qp->sq.max_sge;
++ cap->max_recv_sge = qp->rq.max_sge;
++}
++
++struct ib_qp *ibscif_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *attr, struct ib_udata *udata)
++{
++ struct ibscif_dev *dev = to_dev(ibpd->device);
++ struct ibscif_qp *qp;
++ int err;
++
++ if ((attr->qp_type != IB_QPT_RC && attr->qp_type != IB_QPT_UD) ||
++ (attr->cap.max_send_wr > MAX_QP_SIZE) ||
++ (attr->cap.max_recv_wr > MAX_QP_SIZE) ||
++ (attr->cap.max_send_sge > MAX_SGES) ||
++ (attr->cap.max_recv_sge > MAX_SGES) ||
++ (attr->cap.max_send_wr && !attr->send_cq) ||
++ (attr->cap.max_recv_wr && !attr->recv_cq))
++ return ERR_PTR(-EINVAL);
++
++ if (!atomic_add_unless(&dev->qp_cnt, 1, MAX_QPS))
++ return ERR_PTR(-EAGAIN);
++
++ qp = kzalloc(sizeof *qp, GFP_KERNEL);
++ if (!qp) {
++ atomic_dec(&dev->qp_cnt);
++ return ERR_PTR(-ENOMEM);
++ }
++
++ qp->local_node_id = dev->node_id;
++
++ kref_init(&qp->ref);
++ init_completion(&qp->done);
++ init_MUTEX(&qp->modify_mutex);
++ spin_lock_init(&qp->lock);
++ ibscif_init_wire(&qp->wire);
++ qp->sq_policy = attr->sq_sig_type;
++ qp->dev = dev;
++ qp->mtu = IBSCIF_MTU; /* FIXME */
++ qp->state = QP_IDLE;
++
++ err = ibscif_init_wqs(qp, attr);
++ if (err)
++ goto out;
++
++ ibscif_query_qp_cap(qp, &attr->cap);
++
++ err = ibscif_wiremap_add(qp, &qp->ibqp.qp_num);
++ if (err)
++ goto out;
++
++ qp->magic = QP_MAGIC;
++
++ ibscif_scheduler_add_qp(qp);
++ qp->in_scheduler = 1;
++
++ return &qp->ibqp;
++out:
++ ibscif_destroy_qp(&qp->ibqp);
++ return ERR_PTR(err);
++}
++
++static inline enum ib_qp_state to_ib_qp_state(enum ibscif_qp_state state)
++{
++ switch (state) {
++ case QP_IDLE: return IB_QPS_INIT;
++ case QP_CONNECTED: return IB_QPS_RTS;
++ case QP_DISCONNECT: return IB_QPS_SQD;
++ case QP_ERROR: return IB_QPS_ERR;
++ case QP_RESET: return IB_QPS_RESET;
++ default: return -1;
++ }
++}
++
++static inline enum ibscif_qp_state to_ibscif_qp_state(enum ib_qp_state state)
++{
++ switch (state) {
++ case IB_QPS_INIT: return QP_IDLE;
++ case IB_QPS_RTS: return QP_CONNECTED;
++ case IB_QPS_SQD: return QP_DISCONNECT;
++ case IB_QPS_ERR: return QP_ERROR;
++ case IB_QPS_RESET: return QP_RESET;
++ case IB_QPS_RTR: return QP_CONNECTED;
++ default: return -1;
++ }
++}
++
++/* Caller must provide proper synchronization. */
++static void __ibscif_query_qp(struct ibscif_qp *qp, struct ib_qp_attr *attr, struct ib_qp_init_attr *init_attr)
++{
++ struct ib_qp_cap cap;
++
++ ibscif_query_qp_cap(qp, &cap);
++
++ if (attr) {
++ attr->qp_state = to_ib_qp_state(qp->state);
++ attr->cur_qp_state = attr->qp_state;
++ attr->port_num = 1;
++ attr->path_mtu = qp->mtu;
++ attr->dest_qp_num = qp->remote_qpn;
++ attr->qp_access_flags = qp->access;
++ attr->max_rd_atomic = qp->max_or;
++ attr->max_dest_rd_atomic = qp->iq.size;
++ attr->cap = cap;
++ }
++
++ if (init_attr) {
++ init_attr->qp_type = qp->ibqp.qp_type;
++ init_attr->sq_sig_type = qp->sq_policy;
++ init_attr->cap = cap;
++ }
++}
++
++int ibscif_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr)
++{
++ struct ibscif_qp *qp = to_qp(ibqp);
++
++ memset(attr, 0, sizeof *attr);
++ memset(init_attr, 0, sizeof *init_attr);
++
++ spin_lock_bh(&qp->lock);
++ __ibscif_query_qp(qp, attr, init_attr);
++ spin_unlock_bh(&qp->lock);
++
++ return 0;
++}
++
++static int ibscif_flush_wq(struct ibscif_wq *wq, struct ibscif_cq *cq)
++{
++ struct ibscif_wr *wr;
++ struct ibscif_wc *wc;
++ int i, num_wr, err;
++
++ /* Prevent divide by zero traps on wrap math. */
++ if (!wq->size)
++ return 0;
++
++ spin_lock_bh(&wq->lock);
++ for (i = (wq->head + wq->completions) % wq->size, num_wr = 0;
++ wq->depth && (wq->completions != wq->depth);
++ i = (i + 1) % wq->size, num_wr++) {
++
++ wr = ibscif_get_wr(wq, i);
++
++ ibscif_clear_ds_refs(wr->ds_list, wr->num_ds);
++
++ if (!cq) {
++ wq->completions++;
++ continue;
++ }
++
++ err = ibscif_reserve_cqe(cq, &wc);
++ if (err) {
++ num_wr = err;
++ break;
++ }
++
++ wc->ibwc.qp = &wq->qp->ibqp;
++ wc->ibwc.src_qp = wq->qp->remote_qpn;
++ wc->ibwc.wr_id = wr->id;
++ wc->ibwc.opcode = is_rq(wq) ? IB_WC_RECV : to_ib_wc_opcode(wr->opcode);
++ wc->ibwc.status = IB_WC_WR_FLUSH_ERR;
++ wc->ibwc.ex.imm_data = 0;
++ wc->ibwc.byte_len = 0;
++ wc->ibwc.port_num = 1;
++
++ wc->wq = wq;
++ wc->reap = wq->reap + 1;
++ wq->reap = 0;
++ wq->completions++;
++
++ ibscif_append_cqe(cq, wc, 0);
++ }
++ spin_unlock_bh(&wq->lock);
++
++ if (num_wr && cq)
++ ibscif_notify_cq(cq);
++
++ return num_wr;
++}
++
++static void ibscif_flush_wqs(struct ibscif_qp *qp)
++{
++ int ret;
++
++ ret = ibscif_flush_wq(&qp->sq, to_cq(qp->ibqp.send_cq));
++ if (ret) /* A clean SQ flush should have done nothing. */
++ qp->state = QP_ERROR;
++
++ ret = ibscif_flush_wq(&qp->rq, to_cq(qp->ibqp.recv_cq));
++ if (ret < 0)
++ qp->state = QP_ERROR;
++
++ ibscif_flush_wq(&qp->iq, NULL);
++}
++
++static void ibscif_reset_wq(struct ibscif_wq *wq, struct ibscif_cq *cq)
++{
++ ibscif_clear_cqes(cq, wq);
++
++ wq->head = 0;
++ wq->tail = 0;
++ wq->depth = 0;
++ wq->reap = 0;
++ wq->next_wr = 0;
++ wq->next_msg_id = 0;
++ wq->completions = 0;
++}
++
++static void ibscif_reset_wqs(struct ibscif_qp *qp)
++{
++ ibscif_reset_wq(&qp->sq, to_cq(qp->ibqp.send_cq));
++ ibscif_reset_wq(&qp->rq, to_cq(qp->ibqp.recv_cq));
++ ibscif_reset_wq(&qp->iq, NULL);
++}
++
++static void ibscif_qp_event(struct ibscif_qp *qp, enum ib_event_type event)
++{
++ if (qp->ibqp.event_handler) {
++ struct ib_event record;
++ record.event = event;
++ record.device = qp->ibqp.device;
++ record.element.qp = &qp->ibqp;
++ qp->ibqp.event_handler(&record, qp->ibqp.qp_context);
++ }
++}
++
++/* Caller must provide proper synchronization. */
++static void ibscif_qp_error(struct ibscif_qp *qp)
++{
++ if (qp->state == QP_ERROR)
++ return;
++
++ if (qp->state == QP_CONNECTED)
++ ibscif_send_disconnect(qp, IBSCIF_REASON_DISCONNECT);
++
++ qp->state = QP_ERROR;
++
++ ibscif_flush_wqs(qp);
++
++ ibscif_cm_async_callback(qp->cm_context);
++ qp->cm_context = NULL;
++
++ /* don't generate the error event because transitioning to IB_QPS_ERR
++ state is normal when a QP is disconnected */
++
++ //ibscif_qp_event(qp, IB_EVENT_QP_FATAL);
++}
++
++/* Caller must provide proper synchronization. */
++static void ibscif_qp_reset(struct ibscif_qp *qp)
++{
++ if (qp->state == QP_RESET)
++ return;
++
++ if (qp->state == QP_CONNECTED)
++ ibscif_send_disconnect(qp, IBSCIF_REASON_DISCONNECT);
++
++ ibscif_reset_wqs(qp);
++ ibscif_reset_wire(&qp->wire);
++
++ ibscif_cm_async_callback(qp->cm_context);
++ qp->cm_context = NULL;
++
++ qp->state = QP_RESET;
++}
++
++/* Caller must provide proper synchronization. */
++void ibscif_qp_idle(struct ibscif_qp *qp)
++{
++ if (qp->state == QP_IDLE)
++ return;
++
++ ibscif_reset_wqs(qp);
++ ibscif_reset_wire(&qp->wire);
++
++ qp->state = QP_IDLE;
++}
++
++/* Caller must provide proper synchronization. */
++static void ibscif_qp_connect(struct ibscif_qp *qp, enum ibscif_qp_state cur_state)
++{
++ if (cur_state == QP_CONNECTED)
++ return;
++
++ qp->loopback = (qp->ibqp.qp_type != IB_QPT_UD) && !scif_loopback && (qp->local_node_id == qp->remote_node_id);
++ qp->conn = NULL;
++
++ qp->state = QP_CONNECTED;
++}
++
++/* Caller must provide proper synchronization. */
++static void ibscif_qp_local_disconnect(struct ibscif_qp *qp, enum ibscif_reason reason)
++{
++ if (qp->state != QP_CONNECTED)
++ return;
++
++ if (reason != IBSCIF_REASON_DISCONNECT)
++ printk(KERN_NOTICE PFX "QP %u sending abnormal disconnect %d\n",
++ qp->ibqp.qp_num, reason);
++
++ qp->state = QP_DISCONNECT;
++ ibscif_send_disconnect(qp, reason);
++
++ ibscif_flush_wqs(qp);
++
++ ibscif_cm_async_callback(qp->cm_context);
++ qp->cm_context = NULL;
++
++ if (reason != IBSCIF_REASON_DISCONNECT) {
++ qp->state = QP_ERROR;
++ ibscif_qp_event(qp, IB_EVENT_QP_FATAL);
++ } else
++ ibscif_qp_idle(qp);
++}
++
++void ibscif_qp_internal_disconnect(struct ibscif_qp *qp, enum ibscif_reason reason)
++{
++ spin_lock_bh(&qp->lock);
++ ibscif_qp_local_disconnect(qp, reason);
++ spin_unlock_bh(&qp->lock);
++}
++
++void ibscif_qp_remote_disconnect(struct ibscif_qp *qp, enum ibscif_reason reason)
++{
++ if (reason != IBSCIF_REASON_DISCONNECT)
++ printk(KERN_NOTICE PFX "QP %u received abnormal disconnect %d\n",
++ qp->ibqp.qp_num, reason);
++
++ if (qp->loopback) {
++ /*
++ * Prevent simultaneous loopback QP disconnect deadlocks.
++ * This is no worse than dropping a disconnect packet.
++ */
++ if (!spin_trylock_bh(&qp->lock))
++ return;
++ } else
++ spin_lock_bh(&qp->lock);
++
++ if (qp->state != QP_CONNECTED) {
++ spin_unlock_bh(&qp->lock);
++ return;
++ }
++
++ ibscif_flush_wqs(qp);
++
++ ibscif_cm_async_callback(qp->cm_context);
++ qp->cm_context = NULL;
++
++ if (reason != IBSCIF_REASON_DISCONNECT) {
++ qp->state = QP_ERROR;
++ ibscif_qp_event(qp, IB_EVENT_QP_FATAL);
++ } else
++ qp->state = QP_IDLE;
++
++ spin_unlock_bh(&qp->lock);
++}
++
++#define MODIFY_ALLOWED 1
++#define MODIFY_INVALID 0
++#define VALID_TRANSITION(next_state, modify_allowed) { 1, modify_allowed },
++#define INVAL_TRANSITION(next_state) { 0, MODIFY_INVALID },
++#define START_STATE(current_state) {
++#define CEASE_STATE(current_state) },
++
++static const struct {
++
++ int valid;
++ int modify_allowed;
++
++} qp_transition[NR_QP_STATES][NR_QP_STATES] = {
++
++ START_STATE(QP_IDLE)
++ VALID_TRANSITION( QP_IDLE, MODIFY_ALLOWED )
++ VALID_TRANSITION( QP_CONNECTED, MODIFY_ALLOWED )
++ INVAL_TRANSITION( QP_DISCONNECT )
++ VALID_TRANSITION( QP_ERROR, MODIFY_INVALID )
++ VALID_TRANSITION( QP_RESET, MODIFY_INVALID )
++ VALID_TRANSITION( QP_IGNORE, MODIFY_ALLOWED )
++ CEASE_STATE(QP_IDLE)
++
++ START_STATE(QP_CONNECTED)
++ INVAL_TRANSITION( QP_IDLE )
++ VALID_TRANSITION( QP_CONNECTED, MODIFY_ALLOWED )
++ VALID_TRANSITION( QP_DISCONNECT, MODIFY_INVALID )
++ VALID_TRANSITION( QP_ERROR, MODIFY_INVALID )
++ VALID_TRANSITION( QP_RESET, MODIFY_INVALID )
++ VALID_TRANSITION( QP_IGNORE, MODIFY_ALLOWED )
++ CEASE_STATE(QP_CONNECTED)
++
++ START_STATE(QP_DISCONNECT) /* Automatic transition to IDLE */
++ INVAL_TRANSITION( QP_IDLE )
++ INVAL_TRANSITION( QP_CONNECTED )
++ INVAL_TRANSITION( QP_DISCONNECT )
++ INVAL_TRANSITION( QP_ERROR )
++ INVAL_TRANSITION( QP_RESET )
++ INVAL_TRANSITION( QP_IGNORE )
++ CEASE_STATE(QP_DISCONNECT)
++
++ START_STATE(QP_ERROR)
++ VALID_TRANSITION( QP_IDLE, MODIFY_INVALID )
++ INVAL_TRANSITION( QP_CONNECTED )
++ INVAL_TRANSITION( QP_DISCONNECT )
++ VALID_TRANSITION( QP_ERROR, MODIFY_INVALID )
++ VALID_TRANSITION( QP_RESET, MODIFY_INVALID )
++ VALID_TRANSITION( QP_IGNORE, MODIFY_ALLOWED )
++ CEASE_STATE(QP_ERROR)
++
++ START_STATE(QP_RESET)
++ VALID_TRANSITION( QP_IDLE, MODIFY_ALLOWED )
++ INVAL_TRANSITION( QP_CONNECTED )
++ INVAL_TRANSITION( QP_DISCONNECT )
++ VALID_TRANSITION( QP_ERROR, MODIFY_INVALID )
++ VALID_TRANSITION( QP_RESET, MODIFY_INVALID )
++ VALID_TRANSITION( QP_IGNORE, MODIFY_ALLOWED )
++ CEASE_STATE(QP_RESET)
++};
++
++int ibscif_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata)
++{
++ struct ibscif_qp *qp = to_qp(ibqp);
++ enum ibscif_qp_state cur_state, new_state;
++ int sq_size, rq_size, max_or, max_ir;
++ int err = -EINVAL;
++
++ /*
++ * Mutex prevents simultaneous user-mode QP modifies.
++ */
++ down(&qp->modify_mutex);
++
++ cur_state = qp->state;
++
++ if ((attr_mask & IB_QP_CUR_STATE) && (to_ibscif_qp_state(attr->cur_qp_state) != cur_state))
++ goto out;
++ if ((attr_mask & IB_QP_PORT) && (attr->port_num == 0 || attr->port_num > 1))
++ goto out;
++
++ /* Validate any state transition. */
++ if (attr_mask & IB_QP_STATE) {
++ new_state = to_ibscif_qp_state(attr->qp_state);
++ if (new_state < 0 || new_state >= NR_QP_STATES)
++ goto out;
++
++ if (!qp_transition[cur_state][new_state].valid)
++ goto out;
++ } else
++ new_state = cur_state;
++
++ /* Validate any attribute modify request. */
++ if (attr_mask & (IB_QP_AV |
++ IB_QP_CAP |
++ IB_QP_DEST_QPN |
++ IB_QP_ACCESS_FLAGS |
++ IB_QP_MAX_QP_RD_ATOMIC |
++ IB_QP_MAX_DEST_RD_ATOMIC)) {
++
++ if (!qp_transition[cur_state][new_state].modify_allowed)
++ goto out;
++
++ if ((attr_mask & IB_QP_AV) && (attr->ah_attr.ah_flags & IB_AH_GRH) && check_grh) {
++ int remote_node_id = IBSCIF_LID_TO_NODE_ID(attr->ah_attr.dlid);
++ struct ibscif_conn *conn;
++ union ib_gid *dgid;
++
++ if (verbose)
++ printk(KERN_INFO PFX "%s: %d-->%d, DGID=%llx:%llx\n",
++ __func__, qp->local_node_id, remote_node_id,
++ __be64_to_cpu(attr->ah_attr.grh.dgid.global.subnet_prefix),
++ __be64_to_cpu(attr->ah_attr.grh.dgid.global.interface_id));
++
++ if (remote_node_id == qp->local_node_id) {
++ dgid = &qp->dev->gid;
++ }
++ else {
++ spin_lock(&qp->lock);
++ conn = ibscif_get_conn(qp->local_node_id, remote_node_id, 0);
++ spin_unlock(&qp->lock);
++ if (!conn) {
++ if (verbose)
++ printk(KERN_INFO PFX "%s: failed to make SCIF connection %d-->%d.\n",
++ __func__, qp->local_node_id, remote_node_id);
++ goto out;
++ }
++ dgid = &conn->remote_gid;
++ ibscif_put_conn(conn);
++ }
++
++ if (verbose)
++ printk(KERN_INFO PFX "%s: local GID[%d]=%llx:%llx\n",
++ __func__, remote_node_id,
++ __be64_to_cpu(dgid->global.subnet_prefix),
++ __be64_to_cpu(dgid->global.interface_id));
++
++ if (memcmp(dgid, &attr->ah_attr.grh.dgid, sizeof(*dgid))) {
++ if (verbose)
++ printk(KERN_INFO PFX "%s: connecting to DGID outside the box is unsupported.\n",
++ __func__);
++ goto out;
++ }
++ }
++
++ if (attr_mask & IB_QP_CAP) {
++ sq_size = attr->cap.max_send_wr;
++ rq_size = attr->cap.max_recv_wr;
++ if ((sq_size > MAX_QP_SIZE) || (rq_size > MAX_QP_SIZE))
++ goto out;
++ } else {
++ sq_size = qp->sq.size;
++ rq_size = qp->rq.size;
++ }
++ if ((sq_size && !qp->ibqp.send_cq) || (rq_size && !qp->ibqp.recv_cq))
++ goto out;
++
++ max_or = (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) ?
++ attr->max_rd_atomic : qp->max_or;
++ max_ir = (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) ?
++ attr->max_dest_rd_atomic : qp->iq.size;
++
++ if (rma_threshold<0x7FFFFFFF && max_ir>MAX_IR && max_ir>=qp->sq.size)
++ max_ir -= qp->sq.size;
++
++ if ((max_or > MAX_OR) || (max_ir > MAX_IR))
++ goto out;
++
++ /* Validation successful; resize the QP as needed. */
++ err = ibscif_resize_qp(qp, sq_size, rq_size, max_ir + ((rma_threshold==0x7FFFFFFFF)?0:sq_size));
++ if (err)
++ goto out;
++
++ /* No failure paths below the QP resize. */
++
++ qp->max_or = max_or;
++
++ if (attr_mask & IB_QP_ACCESS_FLAGS)
++ qp->access = attr->qp_access_flags;
++
++ if (attr_mask & IB_QP_DEST_QPN)
++ qp->remote_qpn = attr->dest_qp_num;
++
++ if (attr_mask & IB_QP_AV)
++ qp->remote_node_id = IBSCIF_LID_TO_NODE_ID(attr->ah_attr.dlid);
++ }
++
++ err = 0;
++ if (attr_mask & IB_QP_STATE) {
++
++ /* Perform state change processing. */
++ spin_lock_bh(&qp->lock);
++ switch (new_state) {
++ case QP_IDLE:
++ ibscif_qp_idle(qp);
++ break;
++ case QP_CONNECTED:
++ ibscif_qp_connect(qp, cur_state);
++ break;
++ case QP_DISCONNECT:
++ ibscif_qp_local_disconnect(qp, IBSCIF_REASON_DISCONNECT);
++ break;
++ case QP_ERROR:
++ ibscif_qp_error(qp);
++ break;
++ case QP_RESET:
++ ibscif_qp_reset(qp);
++ break;
++ default:
++ break;
++ }
++ spin_unlock_bh(&qp->lock);
++
++ /* scif_connect() can not be called with spin_lock_bh() held */
++ if (ibqp->qp_type != IB_QPT_UD &&
++ (new_state == QP_CONNECTED) &&
++ !qp->loopback) {
++ int flag = (qp->ibqp.qp_num > qp->remote_qpn);
++ spin_lock(&qp->lock);
++ qp->conn = ibscif_get_conn(qp->local_node_id, qp->remote_node_id, flag);
++ spin_unlock(&qp->lock);
++ }
++ }
++
++ __ibscif_query_qp(qp, attr, NULL);
++out:
++ up(&qp->modify_mutex);
++ return err;
++}
++
++void ibscif_complete_qp(struct kref *ref)
++{
++ struct ibscif_qp *qp = container_of(ref, struct ibscif_qp, ref);
++ complete(&qp->done);
++}
++
++int ibscif_destroy_qp(struct ib_qp *ibqp)
++{
++ struct ibscif_qp *qp = to_qp(ibqp);
++ struct ibscif_dev *dev = qp->dev;
++ int i, j;
++ struct ibscif_conn *conn[IBSCIF_MAX_DEVICES];
++
++ if (qp->cm_context) {
++ ibscif_cm_async_callback(qp->cm_context);
++ qp->cm_context = NULL;
++ }
++
++ if (ibqp->qp_num)
++ ibscif_wiremap_del(ibqp->qp_num);
++
++ if (qp->in_scheduler)
++ ibscif_scheduler_remove_qp(qp);
++
++ spin_lock_bh(&qp->lock);
++ if (qp->state == QP_CONNECTED)
++ ibscif_send_disconnect(qp, IBSCIF_REASON_DISCONNECT);
++ spin_unlock_bh(&qp->lock);
++
++ ibscif_put_qp(qp);
++ wait_for_completion(&qp->done);
++
++ ibscif_flush_wqs(qp);
++ ibscif_reset_wqs(qp);
++ ibscif_reset_wire(&qp->wire);
++
++ if (qp->sq.wr)
++ vfree(qp->sq.wr);
++ if (qp->rq.wr)
++ vfree(qp->rq.wr);
++ if (qp->iq.wr)
++ vfree(qp->iq.wr);
++
++ ibscif_release_quota((PAGE_ALIGN(qp->sq.size * qp->sq.wr_size) +
++ PAGE_ALIGN(qp->rq.size * qp->rq.wr_size) +
++ PAGE_ALIGN(qp->iq.size * qp->iq.wr_size)) >> PAGE_SHIFT);
++
++ atomic_dec(&dev->qp_cnt);
++
++ ibscif_put_conn(qp->conn);
++
++ if (qp->ibqp.qp_type == IB_QPT_UD) {
++ spin_lock_bh(&qp->lock);
++ for (i=0, j=0; i<IBSCIF_MAX_DEVICES; i++) {
++ if (qp->ud_conn[i]) {
++ conn[j++] = qp->ud_conn[i];
++ qp->ud_conn[i] = NULL;
++ }
++ }
++ spin_unlock_bh(&qp->lock);
++
++ /* ibscif_put_conn() may call scif_unregister(), should not hold a lock */
++ for (i=0; i<j; i++)
++ ibscif_put_conn(conn[i]);
++ }
++
++ kfree(qp);
++ return 0;
++}
++
++void ibscif_qp_add_ud_conn(struct ibscif_qp *qp, struct ibscif_conn *conn)
++{
++ int i;
++
++ if (!qp || !conn)
++ return;
++
++ if (qp->ibqp.qp_type != IB_QPT_UD)
++ return;
++
++
++ spin_lock_bh(&qp->lock);
++
++ for (i=0; i<IBSCIF_MAX_DEVICES; i++) {
++ if (qp->ud_conn[i] == conn)
++ goto done;
++ }
++
++ for (i=0; i<IBSCIF_MAX_DEVICES; i++) {
++ if (qp->ud_conn[i] == NULL) {
++ atomic_inc(&conn->refcnt);
++ qp->ud_conn[i] = conn;
++ break;
++ }
++ }
++done:
++ spin_unlock_bh(&qp->lock);
++}
++
+diff --git a/drivers/infiniband/hw/scif/ibscif_scheduler.c b/drivers/infiniband/hw/scif/ibscif_scheduler.c
+new file mode 100644
+index 0000000..4b3a62d
+--- /dev/null
++++ b/drivers/infiniband/hw/scif/ibscif_scheduler.c
+@@ -0,0 +1,195 @@
++/*
++ * Copyright (c) 2008 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include "ibscif_driver.h"
++
++static int ibscif_schedule_tx(struct ibscif_wq *wq, int max_send)
++{
++ struct ibscif_tx_state *tx = &wq->wirestate->tx;
++ struct ibscif_qp *qp = wq->qp;
++ struct ibscif_wr *wr;
++ int index, sent = 0;
++
++ while ((wq->next_wr != wq->tail) && ibscif_tx_window(tx) && max_send) {
++
++ index = wq->next_wr;
++ wr = ibscif_get_wr(wq, index);
++
++ /*
++ * Ack processing can reschedule a WR that is in retry; only process
++ * it if we are all caught up. Also, do not start a fenced WR until
++ * all prior RDMA read and atomic operations have completed.
++ */
++ if ((wr->flags & IB_SEND_FENCE) && atomic_read(&qp->or_depth) &&
++ (wr->state == WR_WAITING))
++ break;
++
++ switch (wr->opcode) {
++ case WR_RDMA_READ:
++ case WR_ATOMIC_CMP_AND_SWP:
++ case WR_ATOMIC_FETCH_AND_ADD:
++ /* Throttle IQ stream requests if needed. */
++ if (wr->state == WR_WAITING) {
++ if (atomic_read(&qp->or_depth) == qp->max_or)
++ return 0;
++ atomic_inc(&qp->or_depth);
++ }
++ /* Fall through. */
++ case WR_SEND:
++ case WR_SEND_WITH_IMM:
++ case WR_RDMA_WRITE:
++ case WR_RDMA_WRITE_WITH_IMM:
++ case WR_RDMA_READ_RSP:
++ case WR_ATOMIC_RSP:
++ case WR_RMA_RSP:
++ sent = ibscif_xmit_wr(wq, wr, min((u32)max_send, ibscif_tx_window(tx)),
++ 0, tx->next_seq, &tx->next_seq);
++ break;
++ case WR_UD:
++ sent = ibscif_xmit_wr(wq, wr, min((u32)max_send, ibscif_tx_window(tx)),
++ 0, 0, NULL);
++ break;
++ default:
++ printk(KERN_ERR PFX "%s() botch: found opcode %d on work queue\n",
++ __func__, wr->opcode);
++ return -EOPNOTSUPP;
++ }
++
++ /* If an IQ stream request did not get started we need to back off or_depth. */
++ if ((wr->state == WR_WAITING) &&
++ ((wr->opcode == WR_RDMA_READ) ||
++ (wr->opcode == WR_ATOMIC_CMP_AND_SWP) || (wr->opcode == WR_ATOMIC_FETCH_AND_ADD)))
++ atomic_dec(&qp->or_depth);
++
++ if (sent < 0)
++ return sent;
++
++ max_send -= sent;
++
++ /*
++ * The tx engine bumps next_wr when finished sending a whole WR.
++ * Bail if it didn't this time around.
++ */
++ if (wq->next_wr == index)
++ break;
++ }
++
++ return 0;
++}
++
++static int ibscif_schedule_wq(struct ibscif_wq *wq)
++{
++ int max_send, err = 0;
++ int need_call_sq_completions = 0;
++
++ /* Ignore loopback QPs that may be scheduled by retry processing. */
++ if (wq->qp->loopback)
++ return 0;
++
++ if (!(max_send = atomic_read(&wq->qp->dev->available)))
++ return -EBUSY;
++
++ spin_lock(&wq->lock);
++ err = ibscif_schedule_tx(wq, max_send);
++ need_call_sq_completions = wq->fast_rdma_completions;
++ wq->fast_rdma_completions = 0;
++ spin_unlock(&wq->lock);
++
++ if (unlikely(err))
++ ibscif_qp_internal_disconnect(wq->qp, IBSCIF_REASON_QP_FATAL);
++
++ if (fast_rdma && need_call_sq_completions)
++ ibscif_process_sq_completions(wq->qp);
++
++ return err;
++}
++
++void ibscif_schedule(struct ibscif_wq *wq)
++{
++ struct ibscif_dev *dev;
++ struct list_head processed;
++
++ if (wq->qp->loopback) {
++ ibscif_loopback(wq);
++ return;
++ }
++ dev = wq->qp->dev;
++
++ if (!ibscif_schedule_wq(wq))
++ goto out;
++
++ while (atomic_xchg(&dev->was_new, 0)) {
++ /* Bail if the device is busy. */
++ if (down_trylock(&dev->mutex))
++ goto out;
++
++ /*
++ * Schedule each WQ on the device and move it to the processed list.
++ * When complete, append the processed list to the device WQ list.
++ */
++ INIT_LIST_HEAD(&processed);
++ while (!list_empty(&dev->wq_list)) {
++ wq = list_entry(dev->wq_list.next, typeof(*wq), entry);
++ if (!ibscif_schedule_wq(wq)) {
++ DEV_STAT(dev, sched_exhaust++);
++ list_splice(&processed, dev->wq_list.prev);
++ up(&dev->mutex);
++ goto out;
++ }
++ list_move_tail(&wq->entry, &processed);
++ }
++ list_splice(&processed, dev->wq_list.prev);
++
++ up(&dev->mutex);
++ }
++ return;
++out:
++ atomic_inc(&dev->was_new);
++}
++
++void ibscif_scheduler_add_qp(struct ibscif_qp *qp)
++{
++ struct ibscif_dev *dev = qp->dev;
++
++ down(&dev->mutex);
++ list_add_tail(&qp->sq.entry, &dev->wq_list);
++ list_add_tail(&qp->iq.entry, &dev->wq_list);
++ up(&dev->mutex);
++}
++
++void ibscif_scheduler_remove_qp(struct ibscif_qp *qp)
++{
++ struct ibscif_dev *dev = qp->dev;
++
++ down(&dev->mutex);
++ list_del(&qp->sq.entry);
++ list_del(&qp->iq.entry);
++ up(&dev->mutex);
++}
+diff --git a/drivers/infiniband/hw/scif/ibscif_util.c b/drivers/infiniband/hw/scif/ibscif_util.c
+new file mode 100644
+index 0000000..b395f2f
+--- /dev/null
++++ b/drivers/infiniband/hw/scif/ibscif_util.c
+@@ -0,0 +1,629 @@
++/*
++ * Copyright (c) 2008 Intel Corporation. All rights reserved.
++ *
++ * This software is available to you under a choice of one of two
++ * licenses. You may choose to be licensed under the terms of the
++ * GNU General Public License (GPL) Version 2, available from the
++ * file COPYING in the main directory of this source tree, or the
++ * OpenFabrics.org BSD license below:
++ *
++ * Redistribution and use in source and binary forms, with or
++ * without modification, are permitted provided that the following
++ * conditions are met:
++ *
++ * - Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ *
++ * - Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials
++ * provided with the distribution.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
++ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
++ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include "ibscif_driver.h"
++
++#define IBSCIF_CONN_IDLE 0
++#define IBSCIF_CONN_REQ_SENT 1
++#define IBSCIF_CONN_REQ_RCVD 2
++#define IBSCIF_CONN_ESTABLISHED 3
++#define IBSCIF_CONN_ACTIVE 4
++
++DEFINE_SPINLOCK(conn_state_lock);
++static int conn_state[IBSCIF_MAX_DEVICES][IBSCIF_MAX_DEVICES];
++
++#define IBSCIF_CONN_REP 1
++#define IBSCIF_CONN_REJ 2
++#define IBSCIF_CONN_ERR 3
++
++struct ibscif_conn_resp {
++ int cmd;
++ union ib_gid gid;
++};
++
++void ibscif_do_accept(struct ibscif_dev *dev)
++{
++ struct scif_port_id peer;
++ scif_epd_t ep;
++ struct ibscif_conn *conn;
++ int ret;
++ struct ibscif_conn_resp resp;
++ int resp_size;
++
++ if (check_grh)
++ resp_size = sizeof(resp);
++ else
++ resp_size = sizeof(int);
++
++ ret = scif_accept(dev->listen_ep, &peer, &ep, SCIF_ACCEPT_SYNC);
++ if (ret) {
++ printk(KERN_ALERT PFX "%s: scif_accept returns %ld\n", __func__, PTR_ERR(ep));
++ return;
++ }
++
++ if (verbose)
++ printk(KERN_INFO PFX "%s: %d<--%d\n", __func__, dev->node_id, peer.node);
++
++ if (check_grh)
++ memcpy(&resp.gid, &dev->gid, sizeof(resp.gid));
++
++ spin_lock(&conn_state_lock);
++ switch (conn_state[dev->node_id][peer.node]) {
++ case IBSCIF_CONN_IDLE:
++ conn_state[dev->node_id][peer.node] = IBSCIF_CONN_REQ_RCVD;
++ resp.cmd = IBSCIF_CONN_REP;
++ if (verbose)
++ printk(KERN_INFO PFX "%s: no double connection, accepting\n", __func__);
++ break;
++
++ case IBSCIF_CONN_REQ_SENT:
++ /* A connection request has been sent, but no response yet. Node id is used to
++ * break the tie when both side send the connection request. One side is allowed
++ * to accept the request and its own request will be rejected by the peer.
++ */
++ if (dev->node_id > peer.node) {
++ resp.cmd = IBSCIF_CONN_REJ;
++ if (verbose)
++ printk(KERN_INFO PFX "%s: double connection, rejecting (peer will accept)\n", __func__);
++ }
++ else if (dev->node_id == peer.node) {
++ conn_state[dev->node_id][peer.node] = IBSCIF_CONN_REQ_RCVD;
++ resp.cmd = IBSCIF_CONN_REP;
++ if (verbose)
++ printk(KERN_INFO PFX "%s: loopback connection, accepting\n", __func__);
++ }
++ else {
++ conn_state[dev->node_id][peer.node] = IBSCIF_CONN_REQ_RCVD;
++ resp.cmd = IBSCIF_CONN_REP;
++ if (verbose)
++ printk(KERN_INFO PFX "%s: double connection, accepting (peer will reject)\n", __func__);
++ }
++ break;
++
++ case IBSCIF_CONN_REQ_RCVD:
++ if (verbose)
++ printk(KERN_INFO PFX "%s: duplicated connection request, rejecting\n", __func__);
++ resp.cmd = IBSCIF_CONN_REJ;
++ break;
++
++ case IBSCIF_CONN_ESTABLISHED:
++ case IBSCIF_CONN_ACTIVE:
++ if (verbose)
++ printk(KERN_INFO PFX "%s: already connected, rejecting\n", __func__);
++ resp.cmd = IBSCIF_CONN_REJ;
++ break;
++
++ default:
++ if (verbose)
++ printk(KERN_INFO PFX "%s: invalid state: %d\n", __func__, conn_state[dev->node_id][peer.node]);
++ resp.cmd = IBSCIF_CONN_ERR;
++ break;
++ }
++ spin_unlock(&conn_state_lock);
++
++ ret = scif_send(ep, &resp, resp_size, SCIF_SEND_BLOCK);
++ if (ret < 0) {
++ printk(KERN_ALERT PFX "%s: scif_send returns %d\n", __func__, ret);
++ scif_close(ep);
++ return;
++ }
++
++ if (resp.cmd != IBSCIF_CONN_REP) {
++ /* one additional hand shaking to prevent the previous send from being trashed by ep closing */
++ scif_recv(ep, &resp, resp_size, SCIF_RECV_BLOCK);
++ scif_close(ep);
++ return;
++ }
++
++ if (check_grh) {
++ ret = scif_recv(ep, &resp, resp_size, SCIF_RECV_BLOCK);
++ if (ret < 0) {
++ printk(KERN_ALERT PFX "%s: scif_recv returns %d\n", __func__, ret);
++ scif_close(ep);
++ spin_lock(&conn_state_lock);
++ conn_state[dev->node_id][peer.node] = IBSCIF_CONN_IDLE;
++ spin_unlock(&conn_state_lock);
++ return;
++ }
++ }
++
++ conn = kzalloc(sizeof (*conn), GFP_KERNEL);
++ if (!conn) {
++ printk(KERN_ALERT PFX "%s: cannot allocate connection context.\n", __func__);
++ scif_close(ep);
++ spin_lock(&conn_state_lock);
++ conn_state[dev->node_id][peer.node] = IBSCIF_CONN_IDLE;
++ spin_unlock(&conn_state_lock);
++ return;
++ }
++
++ conn->ep = ep;
++ conn->remote_node_id = peer.node;
++ if (check_grh)
++ memcpy(&conn->remote_gid, &resp.gid, sizeof(conn->remote_gid));
++ conn->dev = dev;
++ atomic_set(&conn->refcnt, 0);
++
++ spin_lock(&conn_state_lock);
++ conn_state[dev->node_id][peer.node] = IBSCIF_CONN_ESTABLISHED;
++ spin_unlock(&conn_state_lock);
++
++ if (verbose)
++ printk(KERN_INFO PFX "%s: connection established. ep=%p\n", __func__, ep);
++
++ ibscif_refresh_mreg(conn);
++
++ /* one addition sync to ensure the MRs are registered with the new ep at both side */
++ scif_send(ep, &resp, resp_size, SCIF_SEND_BLOCK);
++ scif_recv(ep, &resp, resp_size, SCIF_RECV_BLOCK);
++
++ list_add(&conn->entry, &dev->conn_list);
++ ibscif_refresh_pollep_list();
++
++ spin_lock(&conn_state_lock);
++ conn_state[dev->node_id][peer.node] = IBSCIF_CONN_ACTIVE;
++ spin_unlock(&conn_state_lock);
++}
++
++struct ibscif_conn *ibscif_do_connect(struct ibscif_dev *dev, int remote_node_id)
++{
++ struct scif_port_id dest;
++ struct ibscif_conn *conn = NULL;
++ int ret;
++ scif_epd_t ep;
++ struct ibscif_conn_resp resp;
++ union ib_gid peer_gid;
++ int resp_size;
++
++ if (check_grh)
++ resp_size = sizeof(resp);
++ else
++ resp_size = sizeof(int);
++
++ if (verbose)
++ printk(KERN_INFO PFX "%s: %d-->%d\n", __func__, dev->node_id, remote_node_id);
++
++ /* Validate remote_node_id for conn_state array check */
++ if ((remote_node_id < 0) || (remote_node_id >= IBSCIF_MAX_DEVICES))
++ return ERR_PTR(-EINVAL);
++
++ spin_lock(&conn_state_lock);
++ if (conn_state[dev->node_id][remote_node_id] != IBSCIF_CONN_IDLE) {
++ spin_unlock(&conn_state_lock);
++ if (verbose)
++ printk(KERN_INFO PFX "%s: connection already in progress, retry\n", __func__);
++ return ERR_PTR(-EAGAIN);
++ }
++ conn_state[dev->node_id][remote_node_id] = IBSCIF_CONN_REQ_SENT;
++ spin_unlock(&conn_state_lock);
++
++ ep = scif_open();
++ if (!ep) { /* SCIF API semantics */
++ ibscif_db("scif_open failed\n");
++ goto out_state;
++ }
++
++ if (IS_ERR(ep)) { /* SCIF emulator semantics */
++ ibscif_db("ep is ERR\n");
++ goto out_state;
++ }
++
++ dest.node = remote_node_id;
++ dest.port = SCIF_OFED_PORT_0;
++
++ ret = scif_connect(ep, &dest);
++ if (ret < 0) {
++ ibscif_db("scif_connect failed\n");
++ goto out_close;
++ }
++
++ /* Now ret is the port number ep is bound to */
++
++ ret = scif_recv(ep, &resp, resp_size, SCIF_RECV_BLOCK);
++ if (ret < 0) {
++ printk(KERN_ALERT PFX "%s: scif_recv returns %d\n", __func__, ret);
++ goto out_close;
++ }
++
++ if (resp.cmd != IBSCIF_CONN_REP) {
++ scif_send(ep, &resp, resp_size, SCIF_SEND_BLOCK);
++ /* the peer has issued the connection request */
++ if (resp.cmd == IBSCIF_CONN_REJ) {
++ if (verbose)
++ printk(KERN_INFO PFX "%s: rejected by peer due to double connection\n", __func__);
++ scif_close(ep);
++ /* don't reset the state becasue it's used for checking connection state */
++ return ERR_PTR(-EAGAIN);
++ }
++ else {
++ if (verbose)
++ printk(KERN_INFO PFX "%s: rejected by peer due to invalid state\n", __func__);
++ goto out_close;
++ }
++ }
++
++ if (check_grh) {
++ memcpy(&peer_gid, &resp.gid, sizeof(peer_gid));
++ memcpy(&resp.gid, &dev->gid, sizeof(resp.gid));
++ ret = scif_send(ep, &resp, resp_size, SCIF_SEND_BLOCK);
++ if (ret < 0) {
++ printk(KERN_ALERT PFX "%s: scif_send returns %d\n", __func__, ret);
++ goto out_close;
++ }
++ }
++
++ if (verbose)
++ printk(KERN_INFO PFX "%s: connection established. ep=%p\n", __func__, ep);
++
++ spin_lock(&conn_state_lock);
++ conn_state[dev->node_id][remote_node_id] = IBSCIF_CONN_ESTABLISHED;
++ spin_unlock(&conn_state_lock);
++
++ conn = kzalloc(sizeof *conn, GFP_KERNEL);
++ if (!conn) {
++ printk(KERN_ALERT PFX "%s: failed to allocate connection object\n", __func__);
++ goto out_close;
++ }
++
++ conn->ep = ep;
++ conn->remote_node_id = remote_node_id;
++ if (check_grh)
++ memcpy(&conn->remote_gid, &peer_gid, sizeof(conn->remote_gid));
++ conn->dev = dev;
++ atomic_set(&conn->refcnt, 0);
++
++ ibscif_refresh_mreg(conn);
++
++ /* one addition sync to ensure the MRs are registered with the new ep at both side */
++ scif_send(ep, &resp, resp_size, SCIF_SEND_BLOCK);
++ scif_recv(ep, &resp, resp_size, SCIF_RECV_BLOCK);
++
++ list_add_tail(&conn->entry, &dev->conn_list);
++ ibscif_refresh_pollep_list();
++
++ spin_lock(&conn_state_lock);
++ conn_state[dev->node_id][remote_node_id] = IBSCIF_CONN_ACTIVE;
++ spin_unlock(&conn_state_lock);
++
++ return conn;
++
++out_close:
++ scif_close(ep);
++
++out_state:
++ spin_lock(&conn_state_lock);
++ if (conn_state[dev->node_id][remote_node_id] == IBSCIF_CONN_REQ_SENT)
++ conn_state[dev->node_id][remote_node_id] = IBSCIF_CONN_IDLE;
++ spin_unlock(&conn_state_lock);
++ return conn;
++}
++
++struct ibscif_conn *ibscif_get_conn(int node_id, int remote_node_id, int find_local_peer)
++{
++ struct ibscif_dev *cur, *next, *dev = NULL;
++ struct ibscif_conn *conn, *conn1, *conn2;
++ int done=0, err=0, connect_tried=0;
++
++ down(&devlist_mutex);
++ list_for_each_entry_safe(cur, next, &devlist, entry) {
++ if (cur->node_id == node_id) {
++ dev = cur;
++ break;
++ }
++ }
++ up(&devlist_mutex);
++
++ if (!dev)
++ return NULL;
++
++again:
++ conn1 = NULL;
++ conn2 = NULL;
++ down(&dev->mutex);
++ list_for_each_entry(conn, &dev->conn_list, entry)
++ {
++ if (conn->remote_node_id == remote_node_id) {
++ if (node_id == remote_node_id) {
++ if (!conn1) {
++ conn1 = conn;
++ continue;
++ }
++ else {
++ conn2 = conn;
++ break;
++ }
++ }
++ up(&dev->mutex);
++ atomic_inc(&conn->refcnt);
++ if (conn->local_close) {
++ conn->local_close = 0;
++ ibscif_send_reopen(conn);
++ }
++ return conn;
++ }
++ }
++ up(&dev->mutex);
++
++ /* for loopback connections, we must wait for both endpoints be in the list to ensure that
++ * different endpoints are assigned to the two sides
++ */
++ if (node_id == remote_node_id) {
++ if (conn1 && conn2) {
++ conn = find_local_peer ? conn2 : conn1;
++ atomic_inc(&conn->refcnt);
++ if (conn->local_close) {
++ conn->local_close = 0;
++ ibscif_send_reopen(conn);
++ }
++ return conn;
++ }
++ else if (conn1) {
++ schedule();
++ goto again;
++ }
++ }
++
++ if (connect_tried) {
++ printk(KERN_ALERT PFX "%s: ERROR: cannot get connection (%d-->%d) after waiting, state=%d\n",
++ __func__, dev->node_id, remote_node_id, err-1);
++ return NULL;
++ }
++
++ conn = ibscif_do_connect(dev, remote_node_id);
++
++ /* If a connection is in progress, wait for its finish */
++ if (conn == ERR_PTR(-EAGAIN)) {
++ while (!done && !err) {
++ spin_lock(&conn_state_lock);
++ switch (conn_state[node_id][remote_node_id]) {
++ case IBSCIF_CONN_REQ_SENT:
++ case IBSCIF_CONN_REQ_RCVD:
++ case IBSCIF_CONN_ESTABLISHED:
++ break;
++ case IBSCIF_CONN_ACTIVE:
++ done = 1;
++ break;
++ default:
++ err = 1 + conn_state[node_id][remote_node_id];
++ break;
++ }
++ spin_unlock(&conn_state_lock);
++ schedule();
++ }
++ }
++
++ connect_tried = 1;
++ goto again;
++}
++
++void ibscif_put_conn(struct ibscif_conn *conn)
++{
++ if (!conn)
++ return;
++
++ if (atomic_dec_and_test(&conn->refcnt)) {
++ // printk(KERN_INFO PFX "%s: local_close, conn=%p, remote_close=%d\n", __func__, conn, conn->remote_close);
++ ibscif_send_close(conn);
++ conn->local_close = 1;
++ }
++}
++
++void ibscif_get_pollep_list(struct scif_pollepd *polleps,
++ struct ibscif_dev **devs, int *types, struct ibscif_conn **conns, int *count)
++{
++ struct ibscif_dev *dev;
++ struct ibscif_conn *conn;
++ int i = 0;
++ int max = *count;
++
++ down(&devlist_mutex);
++ list_for_each_entry(dev, &devlist, entry) {
++ if (i >= max)
++ break;
++
++ polleps[i].epd = dev->listen_ep;
++ polleps[i].events = POLLIN;
++ polleps[i].revents = 0;
++ devs[i] = dev;
++ types[i] = IBSCIF_EP_TYPE_LISTEN;
++ conns[i] = NULL;
++ i++;
++ if (verbose)
++ printk(KERN_INFO PFX "%s: ep=%p (%d:listen)\n", __func__, dev->listen_ep, dev->node_id);
++
++ down(&dev->mutex);
++ list_for_each_entry(conn, &dev->conn_list, entry)
++ {
++ if (i >= max)
++ break;
++ polleps[i].epd = conn->ep;
++ polleps[i].events = POLLIN;
++ polleps[i].revents = 0;
++ devs[i] = dev;
++ types[i] = IBSCIF_EP_TYPE_COMM;
++ conns[i] = conn;
++ i++;
++ if (verbose)
++ printk(KERN_INFO PFX "%s: ep=%p (%d<--->%d)\n", __func__, conn->ep, dev->node_id, conn->remote_node_id);
++ }
++ up(&dev->mutex);
++ }
++ up(&devlist_mutex);
++
++ if (verbose)
++ printk(KERN_INFO PFX "%s: count=%d\n", __func__, i);
++ *count = i;
++}
++
++void ibscif_get_ep_list(scif_epd_t *eps, int *count)
++{
++ struct ibscif_dev *dev;
++ struct ibscif_conn *conn;
++ int i = 0;
++ int max = *count;
++
++ down(&devlist_mutex);
++ list_for_each_entry(dev, &devlist, entry) {
++ if (i >= max)
++ break;
++
++ down(&dev->mutex);
++ list_for_each_entry(conn, &dev->conn_list, entry)
++ {
++ if (i >= max)
++ break;
++ eps[i] = conn->ep;
++ i++;
++ }
++ up(&dev->mutex);
++ }
++ up(&devlist_mutex);
++
++ *count = i;
++}
++
++void ibscif_remove_ep(struct ibscif_dev *dev, scif_epd_t ep)
++{
++ struct ibscif_conn *conn, *next;
++ down(&dev->mutex);
++ list_for_each_entry_safe(conn, next, &dev->conn_list, entry)
++ {
++ if (conn->ep == ep) {
++ spin_lock(&conn_state_lock);
++ conn_state[conn->dev->node_id][conn->remote_node_id] = IBSCIF_CONN_IDLE;
++ spin_unlock(&conn_state_lock);
++ list_del(&conn->entry);
++ }
++ }
++ up(&dev->mutex);
++}
++
++
++void ibscif_free_conn(struct ibscif_conn *conn)
++{
++ scif_close(conn->ep);
++ kfree(conn);
++}
++
++int ibscif_cleanup_idle_conn(void)
++{
++ struct ibscif_dev *dev;
++ struct ibscif_conn *conn, *next;
++ struct ibscif_conn *idle_conns[IBSCIF_MAX_DEVICES];
++ int i, n=0;
++
++ down(&devlist_mutex);
++ list_for_each_entry(dev, &devlist, entry) {
++ down(&dev->mutex);
++ list_for_each_entry_safe(conn, next, &dev->conn_list, entry)
++ {
++ if (conn->local_close && conn->remote_close) {
++ spin_lock(&conn_state_lock);
++ conn_state[conn->dev->node_id][conn->remote_node_id] = IBSCIF_CONN_IDLE;
++ spin_unlock(&conn_state_lock);
++ list_del(&conn->entry);
++ idle_conns[n++] = conn;
++ }
++ }
++ up(&dev->mutex);
++ }
++ up(&devlist_mutex);
++
++ for (i=0; i<n; i++)
++ ibscif_free_conn(idle_conns[i]);
++
++ if (n && verbose)
++ printk(KERN_ALERT PFX "%s: n=%d\n", __func__, n);
++
++ return n;
++}
++
++/*
++ * Simple routines to support performance profiling
++ */
++
++#include <linux/time.h>
++
++static uint32_t ibscif_time_passed(void)
++{
++ static int first = 1;
++ static struct timeval t0;
++ static struct timeval t;
++ uint32_t usec;
++
++ if (first) {
++ do_gettimeofday(&t0);
++ first = 0;
++ return 0;
++ }
++
++ do_gettimeofday(&t);
++ usec = (t.tv_sec - t0.tv_sec) * 1000000UL;
++ if (t.tv_usec >= t0.tv_usec)
++ usec += (t.tv_usec - t0.tv_usec);
++ else
++ usec -= (t0.tv_usec - t.tv_usec);
++
++ t0 = t;
++ return usec;
++}
++
++#define IBSCIF_PERF_MAX_SAMPLES 100
++#define IBSCIF_PERF_MAX_COUNTERS 10
++
++void ibscif_perf_sample(int counter, int next)
++{
++ static uint32_t T[IBSCIF_PERF_MAX_SAMPLES][IBSCIF_PERF_MAX_COUNTERS];
++ static int T_idx=0;
++ int i, j, sum;
++
++ if (counter>=0 && counter<IBSCIF_PERF_MAX_COUNTERS)
++ T[T_idx][counter] = ibscif_time_passed();
++
++ if (next) {
++ if (++T_idx < IBSCIF_PERF_MAX_SAMPLES)
++ return;
++
++ T_idx = 0;
++
++ /* batch output to minimize the impact on higher level timing */
++ for (i=0; i<IBSCIF_PERF_MAX_SAMPLES; i++) {
++ sum = 0;
++ printk(KERN_INFO PFX "%d: ", i);
++ for (j=0; j<IBSCIF_PERF_MAX_COUNTERS; j++) {
++ printk("T%d=%u ", j, T[i][j]);
++ if (j>0)
++ sum += T[i][j];
++ }
++ printk("SUM(T1..T%d)=%u\n", IBSCIF_PERF_MAX_COUNTERS-1, sum);
++ }
++ }
++}
++
+--
+2.7.0
+