-From 7f029de4573b51bc7a77409cfa667b3d43d31f81 Mon Sep 17 00:00:00 2001
-From: Phil Cayton <phil.cayton@intel.com>
-Date: Fri, 11 Jul 2014 12:17:24 -0700
-Subject: [PATCH 08/12] Add ibscif to the Infiniband HW directory
-
-Signed-off-by: Jianxin Xiong <jianxin.xiong@intel.com>
-Signed-off-by: Phil Cayton <phil.cayton@intel.com>
-Signed-off-by: Jay Sternberg <jay.e.sternberg@intel.com>
----
- drivers/infiniband/hw/scif/Kconfig | 4 +
- drivers/infiniband/hw/scif/Makefile | 40 +
- drivers/infiniband/hw/scif/ibscif_ah.c | 50 +
- drivers/infiniband/hw/scif/ibscif_cm.c | 514 +++++
- drivers/infiniband/hw/scif/ibscif_cq.c | 313 +++
- drivers/infiniband/hw/scif/ibscif_driver.h | 774 +++++++
- drivers/infiniband/hw/scif/ibscif_loopback.c | 583 +++++
- drivers/infiniband/hw/scif/ibscif_main.c | 351 +++
- drivers/infiniband/hw/scif/ibscif_mr.c | 566 +++++
- drivers/infiniband/hw/scif/ibscif_pd.c | 56 +
- drivers/infiniband/hw/scif/ibscif_post.c | 306 +++
- drivers/infiniband/hw/scif/ibscif_procfs.c | 185 ++
- drivers/infiniband/hw/scif/ibscif_protocol.c | 2832 +++++++++++++++++++++++++
- drivers/infiniband/hw/scif/ibscif_protocol.h | 395 ++++
- drivers/infiniband/hw/scif/ibscif_provider.c | 409 ++++
- drivers/infiniband/hw/scif/ibscif_qp.c | 825 +++++++
- drivers/infiniband/hw/scif/ibscif_scheduler.c | 195 ++
- drivers/infiniband/hw/scif/ibscif_util.c | 574 +++++
- 18 files changed, 8972 insertions(+)
- create mode 100644 drivers/infiniband/hw/scif/Kconfig
- create mode 100644 drivers/infiniband/hw/scif/Makefile
- create mode 100644 drivers/infiniband/hw/scif/ibscif_ah.c
- create mode 100644 drivers/infiniband/hw/scif/ibscif_cm.c
- create mode 100644 drivers/infiniband/hw/scif/ibscif_cq.c
- create mode 100644 drivers/infiniband/hw/scif/ibscif_driver.h
- create mode 100644 drivers/infiniband/hw/scif/ibscif_loopback.c
- create mode 100644 drivers/infiniband/hw/scif/ibscif_main.c
- create mode 100644 drivers/infiniband/hw/scif/ibscif_mr.c
- create mode 100644 drivers/infiniband/hw/scif/ibscif_pd.c
- create mode 100644 drivers/infiniband/hw/scif/ibscif_post.c
- create mode 100644 drivers/infiniband/hw/scif/ibscif_procfs.c
- create mode 100644 drivers/infiniband/hw/scif/ibscif_protocol.c
- create mode 100644 drivers/infiniband/hw/scif/ibscif_protocol.h
- create mode 100644 drivers/infiniband/hw/scif/ibscif_provider.c
- create mode 100644 drivers/infiniband/hw/scif/ibscif_qp.c
- create mode 100644 drivers/infiniband/hw/scif/ibscif_scheduler.c
- create mode 100644 drivers/infiniband/hw/scif/ibscif_util.c
-
-diff --git a/drivers/infiniband/hw/scif/Kconfig b/drivers/infiniband/hw/scif/Kconfig
-new file mode 100644
-index 0000000..cda125f
---- /dev/null
-+++ b/drivers/infiniband/hw/scif/Kconfig
-@@ -0,0 +1,4 @@
-+config INFINIBAND_SCIF
-+ tristate "SCIF RDMA driver support"
-+ ---help---
-+ RDMA over SCIF driver.
-diff --git a/drivers/infiniband/hw/scif/Makefile b/drivers/infiniband/hw/scif/Makefile
-new file mode 100644
-index 0000000..eb74366
---- /dev/null
-+++ b/drivers/infiniband/hw/scif/Makefile
-@@ -0,0 +1,40 @@
-+ifneq ($(KERNELRELEASE),)
-+
-+# Original Make begins
-+
-+obj-$(CONFIG_INFINIBAND_SCIF) += ibscif.o
-+
-+ibscif-y := ibscif_main.o \
-+ ibscif_ah.o \
-+ ibscif_pd.o \
-+ ibscif_cq.o \
-+ ibscif_qp.o \
-+ ibscif_mr.o \
-+ ibscif_cm.o \
-+ ibscif_post.o \
-+ ibscif_procfs.o \
-+ ibscif_loopback.o \
-+ ibscif_provider.o \
-+ ibscif_protocol.o \
-+ ibscif_scheduler.o \
-+ ibscif_util.o
-+
-+# Original Makefile ends
-+
-+else
-+
-+ifeq ($(KVER),)
-+ ifeq ($(KDIR),)
-+ KDIR := /lib/modules/$(shell uname -r)/build
-+ endif
-+else
-+ KDIR := /lib/modules/$(KVER)/build
-+endif
-+
-+all:
-+ $(MAKE) -C $(KDIR) SUBDIRS=$(shell pwd) CONFIG_INFINIBAND_SCIF=m
-+
-+clean:
-+ rm -rf *.o *.ko *.mod.c .*.cmd Module.* .tmp_versions
-+
-+endif
-diff --git a/drivers/infiniband/hw/scif/ibscif_ah.c b/drivers/infiniband/hw/scif/ibscif_ah.c
-new file mode 100644
-index 0000000..fb24486
---- /dev/null
-+++ b/drivers/infiniband/hw/scif/ibscif_ah.c
+diff -ruN a/drivers/infiniband/hw/scif/ibscif_ah.c b/drivers/infiniband/hw/scif/ibscif_ah.c
+--- a/drivers/infiniband/hw/scif/ibscif_ah.c 1969-12-31 16:00:00.000000000 -0800
++++ b/drivers/infiniband/hw/scif/ibscif_ah.c 2014-09-08 13:57:08.000000000 -0700
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2008 Intel Corporation. All rights reserved.
+ struct ibscif_ah *ah;
+
+ ah = kzalloc(sizeof *ah, GFP_KERNEL);
-+ if (!ah)
++ if (!ah)
+ return ERR_PTR(-ENOMEM);
+
+ ah->dlid = cpu_to_be16(attr->dlid);
+ kfree(to_ah(ibah));
+ return 0;
+}
-diff --git a/drivers/infiniband/hw/scif/ibscif_cm.c b/drivers/infiniband/hw/scif/ibscif_cm.c
-new file mode 100644
-index 0000000..18c07c1
---- /dev/null
-+++ b/drivers/infiniband/hw/scif/ibscif_cm.c
-@@ -0,0 +1,514 @@
+diff -ruN a/drivers/infiniband/hw/scif/ibscif_cm.c b/drivers/infiniband/hw/scif/ibscif_cm.c
+--- a/drivers/infiniband/hw/scif/ibscif_cm.c 1969-12-31 16:00:00.000000000 -0800
++++ b/drivers/infiniband/hw/scif/ibscif_cm.c 2014-09-08 13:57:08.000000000 -0700
+@@ -0,0 +1,515 @@
+/*
+ * Copyright (c) 2008 Intel Corporation. All rights reserved.
+ *
+}
+
+static inline void get_cm(struct ibscif_cm *cm_ctx)
-+{
++{
+ kref_get(&cm_ctx->kref);
+}
-+
++
+static inline void put_cm(struct ibscif_cm *cm_ctx)
-+{
++{
+ kref_put(&cm_ctx->kref, free_cm);
+}
+
+}
+
+static inline void get_listen(struct ibscif_listen *listen)
-+{
++{
+ kref_get(&listen->kref);
+}
-+
++
+static inline void put_listen(struct ibscif_listen *listen)
-+{
++{
+ kref_put(&listen->kref, free_listen);
+}
+
+
+ qp = ibscif_get_qp(cm_ctx->qpn);
+ if (IS_ERR(qp)) {
-+ printk(KERN_ERR "%s: invalid QP number: %d\n", __func__, cm_ctx->qpn);
-+ return -EINVAL;
++ printk(KERN_ERR PFX "%s: invalid QP number: %d\n", __func__, cm_ctx->qpn);
++ return -EINVAL;
+ }
-+
++
+ qp_attr_mask = IB_QP_STATE |
-+ IB_QP_AV |
++ IB_QP_AV |
+ IB_QP_DEST_QPN |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_MAX_QP_RD_ATOMIC |
+ IB_QP_MAX_DEST_RD_ATOMIC;
-+
++
+ qp_attr.ah_attr.ah_flags = 0;
+ qp_attr.ah_attr.dlid = IBSCIF_NODE_ID_TO_LID(cm_ctx->remote_node_id);
+ qp_attr.dest_qp_num = cm_ctx->remote_qpn;
+ IB_ACCESS_REMOTE_ATOMIC;
+ qp_attr.max_rd_atomic = 16; /* 8-bit value, don't use MAX_OR */
+ qp_attr.max_dest_rd_atomic = 16;/* 8-bit value, don't use MAX_IR */
-+
++
+ err = ib_modify_qp(&qp->ibqp, &qp_attr, qp_attr_mask);
-+
++
+ if (!err) {
+ qp->cm_context = cm_ctx;
+ get_cm(cm_ctx);
+ }
-+
++
+ ibscif_put_qp(qp);
-+
++
+ return err;
+}
+
+static void event_connection_close(struct ibscif_cm *cm_ctx)
+{
+ struct iw_cm_event event;
-+
++
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_CLOSE;
+ event.status = -ECONNRESET;
+static void event_connection_reply(struct ibscif_cm *cm_ctx, int status)
+{
+ struct iw_cm_event event;
-+
++
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_CONNECT_REPLY;
+ event.status = status;
+static void event_connection_request(struct ibscif_cm *cm_ctx)
+{
+ struct iw_cm_event event;
-+
++
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_CONNECT_REQUEST;
+ event.local_addr = *(struct sockaddr_storage *) &cm_ctx->local_addr;
+static void event_connection_established( struct ibscif_cm *cm_ctx )
+{
+ struct iw_cm_event event;
-+
++
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_ESTABLISHED;
+ event.ird = 16;
+void ibscif_cm_async_callback(void *cm_context)
+{
+ struct ibscif_cm *cm_ctx = cm_context;
-+
++
+ if (cm_ctx) {
+ event_connection_close(cm_ctx);
+ put_cm(cm_ctx);
+
+ cm_ctx = kzalloc(sizeof *cm_ctx, GFP_KERNEL);
+ if (!cm_ctx) {
-+ printk(KERN_ALERT "%s: cannot allocate cm_ctx\n", __func__);
++ printk(KERN_ALERT PFX "%s: cannot allocate cm_ctx\n", __func__);
+ return -ENOMEM;
+ }
+
+ node_id = sockaddr_in_to_node_id(*local_addr);
+ remote_node_id = sockaddr_in_to_node_id(*remote_addr);
+ if (node_id<0 || remote_node_id<0) {
-+ printk(KERN_ALERT "%s: invalid address, local_addr=%8x, remote_addr=%8x, node_id=%d, remote_node_id=%d\n",
++ printk(KERN_ALERT PFX "%s: invalid address, local_addr=%8x, remote_addr=%8x, node_id=%d, remote_node_id=%d\n",
+ __func__, local_addr->sin_addr.s_addr, remote_addr->sin_addr.s_addr,
+ node_id, remote_node_id);
+ err = -EINVAL;
+
+ cm_ctx->conn = ibscif_get_conn( node_id, remote_node_id, 0 );
+ if (!cm_ctx->conn) {
-+ printk(KERN_ALERT "%s: failed to get connection %d-->%d\n", __func__, node_id, remote_node_id);
++ printk(KERN_ALERT PFX "%s: failed to get connection %d-->%d\n", __func__, node_id, remote_node_id);
+ err = -EINVAL;
+ goto out_free;
+ }
+ cm_ctx->qpn = conn_param->qpn;
+ cm_ctx->plen = conn_param->private_data_len;
+ if (cm_ctx->plen > IBSCIF_MAX_PDATA_SIZE) {
-+ printk(KERN_ALERT "%s: plen (%d) exceeds the limit (%d), truncated.\n",
++ printk(KERN_ALERT PFX "%s: plen (%d) exceeds the limit (%d), truncated.\n",
+ __func__, cm_ctx->plen, IBSCIF_MAX_PDATA_SIZE);
+ cm_ctx->plen = IBSCIF_MAX_PDATA_SIZE;
+ }
+ cm_ctx->qpn = conn_param->qpn;
+ cm_ctx->plen = conn_param->private_data_len;
+ if (cm_ctx->plen > IBSCIF_MAX_PDATA_SIZE) {
-+ printk(KERN_ALERT "%s: plen (%d) exceeds the limit (%d), truncated.\n",
++ printk(KERN_ALERT PFX "%s: plen (%d) exceeds the limit (%d), truncated.\n",
+ __func__, cm_ctx->plen, IBSCIF_MAX_PDATA_SIZE);
+ cm_ctx->plen = IBSCIF_MAX_PDATA_SIZE;
+ }
+
+ err = connect_qp( cm_ctx );
+ if (err) {
-+ printk(KERN_ALERT "%s: failed to modify QP into connected state\n", __func__);
++ printk(KERN_ALERT PFX "%s: failed to modify QP into connected state\n", __func__);
+ goto err_out;
+ }
+
+ err = ibscif_send_cm_rep( cm_ctx );
+ if (err) {
-+ printk(KERN_ALERT "%s: failed to send REP\n", __func__);
++ printk(KERN_ALERT PFX "%s: failed to send REP\n", __func__);
+ goto err_out;
+ }
+
+
+ listen = kzalloc(sizeof *listen, GFP_KERNEL);
+ if (!listen) {
-+ printk(KERN_ALERT "%s: cannot allocate listen object\n", __func__);
++ printk(KERN_ALERT PFX "%s: cannot allocate listen object\n", __func__);
+ return -ENOMEM;
+ }
-+
++
+ kref_init(&listen->kref); /* refcnt <- 1 */
+
+ listen->cm_id = cm_id;
+ case IBSCIF_CM_REQ:
+ cm_ctx = kzalloc(sizeof *cm_ctx, GFP_KERNEL);
+ if (!cm_ctx) {
-+ printk(KERN_ALERT "%s: cannot allocate cm_ctx\n", __func__);
++ printk(KERN_ALERT PFX "%s: cannot allocate cm_ctx\n", __func__);
+ return -ENOMEM;
+ }
+ kref_init(&cm_ctx->kref); /* refcnt <- 1 */
+ spin_unlock_bh(&listen_list_lock);
+
+ if (!cm_ctx->listen) {
-+ printk(KERN_ALERT "%s: no matching listener for connection request, port=%d\n", __func__, port);
++ printk(KERN_ALERT PFX "%s: no matching listener for connection request, port=%d\n", __func__, port);
+ put_cm(cm_ctx);
+ /* fix me: send CM_REJ */
+ return -EINVAL;
+ cm_ctx->remote_qpn = qpn;
+ cm_ctx->plen = plen;
+ if (cm_ctx->plen > IBSCIF_MAX_PDATA_SIZE) {
-+ printk(KERN_ALERT "%s: plen (%d) exceeds the limit (%d), truncated.\n",
++ printk(KERN_ALERT PFX "%s: plen (%d) exceeds the limit (%d), truncated.\n",
+ __func__, cm_ctx->plen, IBSCIF_MAX_PDATA_SIZE);
+ cm_ctx->plen = IBSCIF_MAX_PDATA_SIZE;
+ }
+ cm_ctx->remote_qpn = qpn;
+ cm_ctx->peer_context = rep_ctx;
+ err = connect_qp( cm_ctx );
-+ if (!err)
++ if (!err)
+ err = ibscif_send_cm_rtu(cm_ctx);
+ if (err)
-+ printk(KERN_ALERT "%s: failed to modify QP into connected state\n", __func__);
++ printk(KERN_ALERT PFX "%s: failed to modify QP into connected state\n", __func__);
+ event_connection_reply(cm_ctx, err);
+ put_cm(cm_ctx);
+ break;
+ break;
+
+ default:
-+ printk(KERN_ALERT "%s: invalid CM cmd: %d\n", __func__, pdu->cm.cmd);
++ printk(KERN_ALERT PFX "%s: invalid CM cmd: %d\n", __func__, pdu->cm.cmd);
+ break;
+ }
+
+ return 0;
+}
-diff --git a/drivers/infiniband/hw/scif/ibscif_cq.c b/drivers/infiniband/hw/scif/ibscif_cq.c
-new file mode 100644
-index 0000000..3b1c402
---- /dev/null
-+++ b/drivers/infiniband/hw/scif/ibscif_cq.c
++
+diff -ruN a/drivers/infiniband/hw/scif/ibscif_cq.c b/drivers/infiniband/hw/scif/ibscif_cq.c
+--- a/drivers/infiniband/hw/scif/ibscif_cq.c 1969-12-31 16:00:00.000000000 -0800
++++ b/drivers/infiniband/hw/scif/ibscif_cq.c 2014-09-08 13:57:08.000000000 -0700
@@ -0,0 +1,313 @@
+/*
+ * Copyright (c) 2008 Intel Corporation. All rights reserved.
+ cq->head = (cq->head + 1) % ibcq->cqe;
+ spin_unlock_bh(&cq->lock);
+
-+ /* WQ may no longer exist or has been flushed. */
++ /* WQ may no longer exist or has been flushed. */
+ if (wq) {
+ spin_lock_bh(&wq->lock);
+ wq->head = (wq->head + reap) % wq->size;
+
+ spin_unlock_bh(&cq->lock);
+}
-diff --git a/drivers/infiniband/hw/scif/ibscif_driver.h b/drivers/infiniband/hw/scif/ibscif_driver.h
-new file mode 100644
-index 0000000..f6cb10f
---- /dev/null
-+++ b/drivers/infiniband/hw/scif/ibscif_driver.h
-@@ -0,0 +1,774 @@
+diff -ruN a/drivers/infiniband/hw/scif/ibscif_driver.h b/drivers/infiniband/hw/scif/ibscif_driver.h
+--- a/drivers/infiniband/hw/scif/ibscif_driver.h 1969-12-31 16:00:00.000000000 -0800
++++ b/drivers/infiniband/hw/scif/ibscif_driver.h 2014-09-08 13:57:08.000000000 -0700
+@@ -0,0 +1,787 @@
+/*
+ * Copyright (c) 2008 Intel Corporation. All rights reserved.
+ *
+#include <linux/version.h> /* for LINUX_VERSION_CODE */
+#include <linux/poll.h>
+#include <linux/workqueue.h>
++#include <linux/semaphore.h>
++
++/* these macros are defined in "linux/semaphore.h".
++ * however, they may be missing on older systems.
++ */
++#ifndef DECLARE_MUTEX
++#define DECLARE_MUTEX(name) \
++ struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1)
++#endif
++
++#ifndef init_MUTEX
++#define init_MUTEX(sem) sema_init(sem, 1)
++#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,4,0)
+ #include <linux/interrupt.h>
+#define DRV_DESC "OpenFabrics IBSCIF Driver"
+#define DRV_VERSION "0.1"
+#define DRV_SIGNON DRV_DESC " v" DRV_VERSION
++#define DRV_BUILD " built " __DATE__ " " __TIME__
+
+#define UVERBS_ABI_VER 6
+#define VENDOR_ID 0x8086 /* Intel Corporation */
+#define MAX_QPS (64 * 1024)
+#define MAX_QP_SIZE (16 * 1024)
+#define MAX_CQS (MAX_QPS * 2) /* x2:send queues + recv queues */
-+#define MAX_CQ_SIZE (MAX_QP_SIZE * 4) /* or combined */
++#define MAX_CQ_SIZE (MAX_QP_SIZE * 4) /* or combined */
+#define MAX_PDS MAX_QPS /* 1 per QP */
+#if 0
+#define MAX_MRS (MAX_QPS * 4) /* x4:local/remote,read/write */
+#else
-+#define MAX_MRS 16383 /* limited by IBSCIF_MR_MAX_KEY */
++#define MAX_MRS 16383 /* limited by IBSCIF_MR_MAX_KEY */
+#endif
+#define MAX_MR_SIZE (2U * 1024 * 1024 * 1024)
+#define MAX_SGES (PAGE_SIZE / sizeof(struct ib_sge))
+extern int host_proxy;
+extern int new_ib_type;
+extern int verbose;
++extern int check_grh;
+
+extern struct list_head devlist;
-+extern spinlock_t devlist_lock;
++extern struct semaphore devlist_mutex;
+
+extern struct idr wiremap;
+extern rwlock_t wiremap_lock;
+struct ibscif_conn {
+ struct list_head entry;
+ atomic_t refcnt;
-+ scif_epd_t ep;
++ scif_epd_t ep;
+ unsigned short remote_node_id;
++ union ib_gid remote_gid;
+ struct ibscif_dev *dev;
+ int local_close;
+ int remote_close;
+
+struct ibscif_dev {
+ struct ib_device ibdev;
-+ struct net_device *netdev; /* for RDMA CM support */
++ struct net_device *netdev; /* for RDMA CM support */
+ struct list_head entry;
+
+ char name[IBSCIF_NAME_SIZE];
++ union ib_gid gid;
+ unsigned short node_id;
+ atomic_t refcnt;
-+ scif_epd_t listen_ep;
-+ struct list_head conn_list;
++ scif_epd_t listen_ep;
++ struct list_head conn_list;
+ struct list_head mr_list;
-+ spinlock_t mr_list_lock;
++ struct semaphore mr_list_mutex;
+
+ struct proc_dir_entry *procfs;
+ struct ibscif_stats stats;
+
+ spinlock_t atomic_op;
+
-+ spinlock_t lock;
++ struct semaphore mutex;
+ struct list_head wq_list; /* List of WQ's on this device */
+};
+
+ int ud_msg_id;
+};
+
-+struct ibscif_mac {
-+ u8 addr[ETH_ALEN];
-+};
-+
+enum ibscif_qp_state {
+ QP_IDLE,
+ QP_CONNECTED,
+ atomic_t or_depth;
+ atomic_t or_posted;
+
-+ struct mutex mutex;
++ struct semaphore modify_mutex;
+ spinlock_t lock;
+ enum ibscif_qp_state state;
+ u16 local_node_id;
+ u32 length;
+ int npages;
+ struct page **page;
-+ scif_pinned_pages_t pinned_pages;
++ scif_pinned_pages_t pinned_pages;
+ struct list_head mreg_list;
+};
+
+/* Canonical virtual address on X86_64 falls in the range 0x0000000000000000-0x00007fffffffffff
+ * and 0xffff800000000000-0xffffffffffffffff. The range 0x0000800000000000-0xffff7fffffffffff
+ * are unused. This basically means only 48 bits are used and the highest 16 bits are just sign
-+ * extensions. We can put rkey into these 16 bits and use the result as the "offset" of SCIF's
++ * extensions. We can put rkey into these 16 bits and use the result as the "offset" of SCIF's
+ * registered address space. By doing this, the SCIF_MAP_FIXED flag can be used so that the offset
-+ * can be calculated directly from rkey and virtual address w/o using the "remote registration cache"
++ * can be calculated directly from rkey and virtual address w/o using the "remote registration cache"
+ * mechanism.
+ *
-+ * SCIF reserve the top 2 bits of the offset for internal uses, leaving 14 bits for rkey.
++ * SCIF reserve the top 2 bits of the offset for internal uses, leaving 14 bits for rkey.
+ */
+#define IBSCIF_MR_MAX_KEY (0x3FFF)
+#define IBSCIF_MR_VADDR_MASK (0x0000FFFFFFFFFFFFUL)
+int ibscif_send_cm_rtu(struct ibscif_cm *cm_ctx);
+
+#endif /* IBSCIF_DRIVER_H */
-diff --git a/drivers/infiniband/hw/scif/ibscif_loopback.c b/drivers/infiniband/hw/scif/ibscif_loopback.c
-new file mode 100644
-index 0000000..d9193e6
---- /dev/null
-+++ b/drivers/infiniband/hw/scif/ibscif_loopback.c
-@@ -0,0 +1,583 @@
+diff -ruN a/drivers/infiniband/hw/scif/ibscif_loopback.c b/drivers/infiniband/hw/scif/ibscif_loopback.c
+--- a/drivers/infiniband/hw/scif/ibscif_loopback.c 1969-12-31 16:00:00.000000000 -0800
++++ b/drivers/infiniband/hw/scif/ibscif_loopback.c 2014-09-08 13:57:08.000000000 -0700
+@@ -0,0 +1,582 @@
+/*
+ * Copyright (c) 2008 Intel Corporation. All rights reserved.
+ *
+
+ spin_lock_bh(&rq->lock);
+
-+ err = ibscif_validate_wq(rq,
-+ ((enum ib_wr_opcode)write_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) ?
-+ &recv_wr : NULL, IB_ACCESS_REMOTE_WRITE);
++ err = ibscif_validate_wq(rq, ((enum ib_wr_opcode)write_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) ?
++ &recv_wr : NULL, IB_ACCESS_REMOTE_WRITE);
+ if (unlikely(err))
+ goto out;
+
+ err = ibscif_dscopy(atomic_wr->ds_list, &src_ds, sizeof atomic_wr->atomic_rsp.orig_data);
+ if (likely(!err)) {
+ src_addr = ibscif_map_src(src_page) + src_offset;
-+ if ((enum ib_wr_opcode) atomic_wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
++ if ((enum ib_wr_opcode)atomic_wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+ *src_addr += atomic_wr->fetch_add.add_operand;
+ else if (*src_addr == atomic_wr->cmp_swp.cmp_operand)
+ *src_addr = atomic_wr->cmp_swp.swp_operand;
+ goto again;
+ }
+}
-diff --git a/drivers/infiniband/hw/scif/ibscif_main.c b/drivers/infiniband/hw/scif/ibscif_main.c
-new file mode 100644
-index 0000000..e10d954
---- /dev/null
-+++ b/drivers/infiniband/hw/scif/ibscif_main.c
-@@ -0,0 +1,351 @@
+diff -ruN a/drivers/infiniband/hw/scif/ibscif_main.c b/drivers/infiniband/hw/scif/ibscif_main.c
+--- a/drivers/infiniband/hw/scif/ibscif_main.c 1969-12-31 16:00:00.000000000 -0800
++++ b/drivers/infiniband/hw/scif/ibscif_main.c 2014-09-08 13:57:08.000000000 -0700
+@@ -0,0 +1,357 @@
+/*
+ * Copyright (c) 2008 Intel Corporation. All rights reserved.
+ *
+
+#include "ibscif_driver.h"
+
-+static const char ibscif_signon[] = DRV_SIGNON;
++static const char ibscif_signon[] = DRV_SIGNON DRV_BUILD;
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_PARAM(int, host_proxy, 0,
+ "Proxy card side RDMA operations to host");
+
-+#if (LINUX_VERSION_CODE>=KERNEL_VERSION(3,5,0))
++#if ((LINUX_VERSION_CODE>=KERNEL_VERSION(3,5,0)) || CONFIG_MK1OM || CONFIG_ML1OM)
+#define USE_NEW_IB_TYPE 1
+#else
+#define USE_NEW_IB_TYPE 0
+MODULE_PARAM(int, verbose, 0,
+ "Produce more log info for debugging purpose");
+
++MODULE_PARAM(int, check_grh, 1,
++ "Detect outside-box connection by checking the global routing header");
++
+static atomic_t avail_pages; /* Calculated from max_pinned and totalram_pages */
+
+LIST_HEAD(devlist);
-+DEFINE_SPINLOCK(devlist_lock);
++DECLARE_MUTEX(devlist_mutex);
+
+DEFINE_IDR(wiremap);
+DEFINE_RWLOCK(wiremap_lock);
+ len = len > 64 ? 64 : len;
+ while (len) {
+ p = tmp;
-+ for (i = len > 16 ? 16 : len; i; i--, len--)
++ for (i = len > 16 ? 16 : len; i; i--, len--)
+ p += sprintf(p, "%2x ", *buf++);
+ printk("(%d)%s: %s\n", smp_processor_id(), str, tmp);
+ }
+ * To work around MPI's assumptions that data is written atomically in their
+ * header structures, write the first 16 integers of a transfer atomically.
+ *
-+ * Update: the assumption of MPI's ofa module is different in that the last
++ * Update: the assumption of MPI's ofa module is different in that the last
+ * four bytes needs to be written last and atomically. The buffers used in
+ * this case is always aligned.
+ */
+ return head_copied;
+
+ head_aligned = !((unsigned long)src_addr & (sizeof(int)-1)) &&
-+ !((unsigned long)dst_addr & (sizeof(int)-1));
++ !((unsigned long)dst_addr & (sizeof(int)-1));
+
+
+ tail_aligned = !((unsigned long)(src_addr+copy_len) & (sizeof(int)-1)) &&
-+ !((unsigned long)(dst_addr+copy_len) & (sizeof(int)-1));
++ !((unsigned long)(dst_addr+copy_len) & (sizeof(int)-1));
+
+ if (!head_copied && head_aligned) {
+
+ *dst_x = *src_x;
+ goto done;
+ }
-+
++
+ /* Bad alignment. Copy all but the last byte, then the last byte */
+ if (--copy_len)
+ memcpy((void *)dst_x, (void *)src_x, copy_len);
+ "fast_rdma=%d, "
+ "host_proxy=%d, "
+ "rma_threshold=%d, scif_loopback=%d, "
-+ "new_ib_type=%d, verbose=%d\n",
++ "new_ib_type=%d, verbose=%d, "
++ "check_grh=%d\n",
+ max_pinned, window_size,
+ blocking_send, blocking_recv,
+ fast_rdma,
+ host_proxy,
+ rma_threshold, scif_loopback,
-+ new_ib_type, verbose);
++ new_ib_type, verbose,
++ check_grh);
+
+ ibscif_init_params();
+
+{
+ ibscif_dev_cleanup();
+ ibscif_free_wiremap();
++ printk(KERN_INFO PFX "unloaded\n");
+}
+
+module_init(ibscif_init);
+module_exit(ibscif_exit);
-diff --git a/drivers/infiniband/hw/scif/ibscif_mr.c b/drivers/infiniband/hw/scif/ibscif_mr.c
-new file mode 100644
-index 0000000..3df1d12
---- /dev/null
-+++ b/drivers/infiniband/hw/scif/ibscif_mr.c
-@@ -0,0 +1,566 @@
+diff -ruN a/drivers/infiniband/hw/scif/ibscif_mr.c b/drivers/infiniband/hw/scif/ibscif_mr.c
+--- a/drivers/infiniband/hw/scif/ibscif_mr.c 1969-12-31 16:00:00.000000000 -0800
++++ b/drivers/infiniband/hw/scif/ibscif_mr.c 2014-09-08 13:57:08.000000000 -0700
+@@ -0,0 +1,569 @@
+/*
+ * Copyright (c) 2008 Intel Corporation. All rights reserved.
+ *
+ mr = kzalloc(sizeof *mr, GFP_KERNEL);
+ if (!mr) {
+ err = -ENOMEM;
-+ printk(KERN_ALERT "%s: unable to allocate mr.\n", __func__);
++ printk(KERN_ALERT PFX "%s: unable to allocate mr.\n", __func__);
+ goto out1;
+ }
+
+
+ err = ibscif_wiremap_add(mr, &mr->ibmr.lkey);
+ if (err) {
-+ printk(KERN_ALERT "%s: unable to allocate lkey.\n", __func__);
++ printk(KERN_ALERT PFX "%s: unable to allocate lkey.\n", __func__);
+ goto out2;
+ }
+
+ if (mr->ibmr.lkey > IBSCIF_MR_MAX_KEY) {
+ err = -ENOSPC;
-+ printk(KERN_ALERT "%s: lkey (%x) out of range.\n", __func__, mr->ibmr.lkey);
++ printk(KERN_ALERT PFX "%s: lkey (%x) out of range.\n", __func__, mr->ibmr.lkey);
+ goto out3;
+ }
+
+ mr->umem = ib_umem_get(ibpd->uobject->context, start, length, access, 0/*dma_sync*/);
+ if (IS_ERR(mr->umem)) {
+ err = PTR_ERR(mr->umem);
-+ printk(KERN_ALERT "%s: ib_umem_get returns %d.\n", __func__, err);
++ printk(KERN_ALERT PFX "%s: ib_umem_get returns %d.\n", __func__, err);
+ goto out;
+ }
+
+ mr->page = vzalloc(mr->npages * sizeof *mr->page);
+ if (!mr->page) {
+ err = -ENOMEM;
-+ printk(KERN_ALERT "%s: unable to allocate mr->page.\n", __func__);
++ printk(KERN_ALERT PFX "%s: unable to allocate mr->page.\n", __func__);
+ goto out;
+ }
+
+#endif
+
+ err = ibscif_mr_init_mreg(mr);
-+ if (err)
++ if (err)
+ goto out;
+
-+ dev = to_dev(mr->ibmr.device);
-+ spin_lock(&dev->mr_list_lock);
++ dev = to_dev(mr->ibmr.device);
++ down(&dev->mr_list_mutex);
+ list_add_tail(&mr->entry, &dev->mr_list);
-+ spin_unlock(&dev->mr_list_lock);
++ up(&dev->mr_list_mutex);
+
+ return &mr->ibmr;
+out:
+ }
+ while (ret == -ERESTARTSYS);
+
-+ if (ret && ret != -ENOTCONN)
-+ printk(KERN_ALERT "%s: scif_unregister returns %d. ep=%p, offset=%llx, length=%x\n",
++ if (ret && ret != -ENOTCONN)
++ printk(KERN_ALERT PFX "%s: scif_unregister returns %d. ep=%p, offset=%llx, length=%x\n",
+ __func__, ret, mreg->conn->ep, mreg->aligned_offset, mreg->aligned_length);
+
+ ibscif_put_conn(mreg->conn);
+ kfree(mreg);
+ }
+
-+ spin_lock(&dev->mr_list_lock);
++ down(&dev->mr_list_mutex);
+ list_for_each_entry_safe(mr0, next0, &dev->mr_list, entry) {
+ if (mr0 == mr) {
+ list_del(&mr0->entry);
+ break;
+ }
+ }
-+ spin_unlock(&dev->mr_list_lock);
++ up(&dev->mr_list_mutex);
+
+ if (mr->pinned_pages)
+ scif_unpin_pages(mr->pinned_pages);
+ struct ibscif_mr *mr;
+
+ list_for_each_entry(mr, &dev->mr_list, entry){
-+ printk(KERN_ALERT "%s: mr=%p [%llx, %x, %x]\n", __func__, mr, mr->addr, mr->length, mr->ibmr.rkey);
++ printk(KERN_ALERT PFX "%s: mr=%p [%llx, %x, %x]\n", __func__, mr, mr->addr, mr->length, mr->ibmr.rkey);
+ }
+}
+
+ offset = scif_register_pinned_pages(conn->ep, mr->pinned_pages, aligned_offset, SCIF_MAP_FIXED);
+
+ if (IS_ERR_VALUE(offset)) {
-+ printk(KERN_ALERT "%s: scif_register_pinned_pages returns %d\n", __func__, (int)offset);
-+ printk(KERN_ALERT "%s: conn=%p, ep=%p, mr=%p, addr=%llx, length=%x, rkey=%x, aligned_addr=%llx, aligned_length=%x, aligned_offset=%llx\n",
-+ __func__, conn, conn->ep, mr, mr->addr, mr->length, mr->ibmr.rkey, aligned_addr, aligned_length, (uint64_t)aligned_offset);
++ printk(KERN_ALERT PFX "%s: scif_register_pinned_pages returns %d\n", __func__, (int)offset);
++ printk(KERN_ALERT PFX "%s: conn=%p, ep=%p, mr=%p, addr=%llx, length=%x, rkey=%x, "
++ "aligned_addr=%llx, aligned_length=%x, aligned_offset=%llx\n",
++ __func__, conn, conn->ep, mr, mr->addr, mr->length, mr->ibmr.rkey,
++ aligned_addr, aligned_length, (uint64_t)aligned_offset);
+ ibscif_dump_mr_list(conn->dev);
+ return (int)offset;
+ }
+ }
+ while (err == -ERESTARTSYS);
+
-+ if (err && err != -ENOTCONN)
-+ printk(KERN_ALERT "%s: scif_unregister returns %d. ep=%p, offset=%llx, length=%x\n",
++ if (err && err != -ENOTCONN)
++ printk(KERN_ALERT PFX "%s: scif_unregister returns %d. ep=%p, offset=%llx, length=%x\n",
+ __func__, err, conn->ep, (uint64_t)aligned_offset, aligned_length);
+
+ return -ENOMEM;
+ ibscif_send_reopen(conn);
+ }
+
-+ if (new_mreg)
++ if (new_mreg)
+ *new_mreg = mreg;
+
+ return 0;
+ int i;
+
+ if (unlikely(!conn)) {
-+ printk(KERN_ALERT "%s: conn==NULL\n", __func__);
++ printk(KERN_ALERT PFX "%s: conn==NULL\n", __func__);
+ return NULL;
+ }
+
+ err = ibscif_mr_reg_with_conn(mr, conn, &mreg);
+ if (err != -EADDRINUSE)
+ return mreg;
-+
++
+ /* another thread is performing the registration */
+ if (verbose)
-+ printk(KERN_INFO "%s: mr is being registered by another thread. mr=%p, conn=%p.\n", __func__, mr, conn);
++ printk(KERN_INFO PFX "%s: mr is being registered by another thread. mr=%p, conn=%p.\n", __func__, mr, conn);
+ for (i=0; i<10000; i++) {
+ list_for_each_entry(mreg, &mr->mreg_list, entry){
+ if (mreg->conn == conn) {
+ if (verbose)
-+ printk(KERN_INFO "%s: got mreg after %d retries.\n", __func__, i+1);
++ printk(KERN_INFO PFX "%s: got mreg after %d retries.\n", __func__, i+1);
+ return mreg;
+ }
+ }
+ schedule();
+ }
+ if (verbose)
-+ printk(KERN_INFO "%s: failed to get mreg after %d retries.\n", __func__, i);
++ printk(KERN_INFO PFX "%s: failed to get mreg after %d retries.\n", __func__, i);
+ return NULL;
+}
+
+ prot = ((mr->access & IB_ACCESS_REMOTE_READ)?SCIF_PROT_READ:0) |
+ ((mr->access & IB_ACCESS_REMOTE_WRITE)?SCIF_PROT_WRITE:0);
+#else
-+ // In IB, the same buffer can be registered multiple times with different access rights.
-+ // SCIF doesn't have mechanism to support that. So we just turn on all the access rights.
-+ // Otherwise we may end up with protection error.
++ // In IB, the same buffer can be registered multiple times with different access rights.
++ // SCIF doesn't have mechanism to support that. So we just turn on all the access rights.
++ // Otherwise we may end up with protection error.
+ prot = SCIF_PROT_READ | SCIF_PROT_WRITE;
+#endif
+
+ err = scif_pin_pages((void *)aligned_addr, aligned_length, prot, 0/*user addr*/, &mr->pinned_pages);
+ if (err) {
-+ printk(KERN_ALERT "%s: scif_pin_pages returns %d\n", __func__, err);
++ printk(KERN_ALERT PFX "%s: scif_pin_pages returns %d\n", __func__, err);
+ return err;
+ }
+
-+ spin_lock(&dev->lock);
++ down(&dev->mutex);
+ list_for_each_entry(conn, &dev->conn_list, entry) {
+ err = ibscif_mr_reg_with_conn(mr, conn, NULL);
+ if (err)
+ break;
+ }
-+ spin_unlock(&dev->lock);
++ up(&dev->mutex);
+
+ return err;
+}
+{
+ struct ibscif_mr *mr;
+
-+ spin_lock(&conn->dev->mr_list_lock);
++ down(&conn->dev->mr_list_mutex);
+ list_for_each_entry(mr, &conn->dev->mr_list, entry){
+ ibscif_mr_get_mreg(mr, conn);
+ }
-+ spin_unlock(&conn->dev->mr_list_lock);
++ up(&conn->dev->mr_list_mutex);
+}
-diff --git a/drivers/infiniband/hw/scif/ibscif_pd.c b/drivers/infiniband/hw/scif/ibscif_pd.c
-new file mode 100644
-index 0000000..a5682cf
---- /dev/null
-+++ b/drivers/infiniband/hw/scif/ibscif_pd.c
++
+diff -ruN a/drivers/infiniband/hw/scif/ibscif_pd.c b/drivers/infiniband/hw/scif/ibscif_pd.c
+--- a/drivers/infiniband/hw/scif/ibscif_pd.c 1969-12-31 16:00:00.000000000 -0800
++++ b/drivers/infiniband/hw/scif/ibscif_pd.c 2014-09-08 13:57:08.000000000 -0700
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2008 Intel Corporation. All rights reserved.
+ kfree(to_pd(ibpd));
+ return 0;
+}
-diff --git a/drivers/infiniband/hw/scif/ibscif_post.c b/drivers/infiniband/hw/scif/ibscif_post.c
-new file mode 100644
-index 0000000..c3bc588
---- /dev/null
-+++ b/drivers/infiniband/hw/scif/ibscif_post.c
+diff -ruN a/drivers/infiniband/hw/scif/ibscif_post.c b/drivers/infiniband/hw/scif/ibscif_post.c
+--- a/drivers/infiniband/hw/scif/ibscif_post.c 1969-12-31 16:00:00.000000000 -0800
++++ b/drivers/infiniband/hw/scif/ibscif_post.c 2014-09-08 13:57:08.000000000 -0700
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2008 Intel Corporation. All rights reserved.
+ }
+
+ for (err = 0; ibwr; ibwr = ibwr->next) {
-+
++
+ if (unlikely(rq->depth == rq->size)) {
+ err = -ENOBUFS;
+ goto out;
+
+ return err;
+}
-diff --git a/drivers/infiniband/hw/scif/ibscif_procfs.c b/drivers/infiniband/hw/scif/ibscif_procfs.c
-new file mode 100644
-index 0000000..0e14e9c
---- /dev/null
-+++ b/drivers/infiniband/hw/scif/ibscif_procfs.c
-@@ -0,0 +1,185 @@
+diff -ruN a/drivers/infiniband/hw/scif/ibscif_procfs.c b/drivers/infiniband/hw/scif/ibscif_procfs.c
+--- a/drivers/infiniband/hw/scif/ibscif_procfs.c 1969-12-31 16:00:00.000000000 -0800
++++ b/drivers/infiniband/hw/scif/ibscif_procfs.c 2014-09-08 13:57:08.000000000 -0700
+@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2008 Intel Corporation. All rights reserved.
+ *
+ " recv %lu recv_imm %lu read %lu comp %lu fetch %lu\n"
+ " read_rsp %lu atomic_rsp %lu ud %lu\n"
+ " fast_rdma :\n"
-+ " write %lu read %lu unavailable %lu fallback %lu "
-+ "force_ack %lu tail_write %lu\n",
++ " write %lu read %lu unavailable %lu fallback %lu force_ack %lu tail_write %lu\n",
+ dev->ibdev.name,
+ DEV_STAT(dev, bytes_sent),
+ DEV_STAT(dev, bytes_rcvd),
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
+static ssize_t ibscif_stats_write(struct file *file, const char __user *buffer,
-+ size_t count, loff_t *ppos)
++ size_t count, loff_t *ppos)
+{
-+ struct ibscif_dev *dev = PDE_DATA(file_inode(file));
-+ memset(&dev->stats, 0, sizeof dev->stats);
-+ return count;
++ struct ibscif_dev *dev = PDE_DATA(file_inode(file));
++ memset(&dev->stats, 0, sizeof dev->stats);
++ return count;
+}
+
+static int ibscif_stats_open(struct inode *inode, struct file *file)
+{
-+ return single_open(file, ibscif_stats_show, PDE_DATA(inode));
++ return single_open(file, ibscif_stats_show, PDE_DATA(inode));
+}
+
+struct file_operations ibscif_fops = {
-+ .owner = THIS_MODULE,
-+ .open = ibscif_stats_open,
-+ .read = seq_read,
-+ .write = ibscif_stats_write,
-+ .llseek = seq_lseek,
-+ .release = seq_release,
++ .owner = THIS_MODULE,
++ .open = ibscif_stats_open,
++ .read = seq_read,
++ .write = ibscif_stats_write,
++ .llseek = seq_lseek,
++ .release = seq_release,
+};
+
+int ibscif_procfs_add_dev(struct ibscif_dev *dev)
+{
-+ dev->procfs = proc_mkdir(dev->ibdev.name, init_net.proc_net);
-+ if (!dev->procfs)
-+ return -ENOENT;
++ dev->procfs = proc_mkdir(dev->ibdev.name, init_net.proc_net);
++ if (!dev->procfs)
++ return -ENOENT;
+
-+ if (proc_create_data("stats", S_IRUGO | S_IWUGO, dev->procfs,
-+ &ibscif_fops ,dev))
-+ return -ENOENT;
++ if (proc_create_data("stats", S_IRUGO | S_IWUGO, dev->procfs,
++ &ibscif_fops ,dev))
++ return -ENOENT;
+
-+ return 0;
++ return 0;
+}
+#else /* (LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)) */
-+static int ibscif_stats_write(struct file *file, const char __user *buffer,
-+ unsigned long count, void *data)
++static int ibscif_stats_write(struct file *file, const char __user *buffer, unsigned long count, void *data)
+{
+ struct ibscif_dev *dev = data;
+ memset(&dev->stats, 0, sizeof dev->stats);
+ if (!dev->procfs)
+ return -ENOENT;
+
-+ entry = create_proc_read_entry("stats", S_IRUGO | S_IWUGO,
-+ dev->procfs, ibscif_stats_read, dev);
++ entry = create_proc_read_entry("stats", S_IRUGO | S_IWUGO, dev->procfs, ibscif_stats_read, dev);
+ if (!entry)
+ return -ENOENT;
-+
+ entry->write_proc = ibscif_stats_write;
+
+ return 0;
+{
+ if (dev->procfs)
+ remove_proc_entry("stats", dev->procfs);
-+
+ remove_proc_entry(dev->ibdev.name, init_net.proc_net);
+}
-diff --git a/drivers/infiniband/hw/scif/ibscif_protocol.c b/drivers/infiniband/hw/scif/ibscif_protocol.c
-new file mode 100644
-index 0000000..58f3080
---- /dev/null
-+++ b/drivers/infiniband/hw/scif/ibscif_protocol.c
-@@ -0,0 +1,2832 @@
+diff -ruN a/drivers/infiniband/hw/scif/ibscif_protocol.c b/drivers/infiniband/hw/scif/ibscif_protocol.c
+--- a/drivers/infiniband/hw/scif/ibscif_protocol.c 1969-12-31 16:00:00.000000000 -0800
++++ b/drivers/infiniband/hw/scif/ibscif_protocol.c 2014-09-08 13:57:08.000000000 -0700
+@@ -0,0 +1,2838 @@
+/*
+ * Copyright (c) 2008 Intel Corporation. All rights reserved.
+ *
+struct ibscif_skb_cb {
+ struct ibscif_dev *dev;
+ struct ibscif_wr *wr;
-+ scif_epd_t scif_ep;
++ scif_epd_t scif_ep;
+ struct ibscif_qp *qp; /* for UD only */
+};
+
+#else
+ #define KMAP(x) kmap(skb_frag_page(x))
+ #define KUNMAP(x) kunmap(skb_frag_page(x))
-+ #define SET_PAGE(x,y) __skb_frag_set_page(x, y)
++ #define SET_PAGE(x,y) __skb_frag_set_page(x, y)
+ #define GET_PAGE(x) __skb_frag_ref(x)
+#endif
+
+ hdr_size = skb->len - skb->data_len;
+ for (i=0; i<hdr_size; ) {
+ ret = scif_send(scif_ep, skb->data+i, hdr_size-i,
-+ blocking_send ? SCIF_SEND_BLOCK : 0);
++ blocking_send ? SCIF_SEND_BLOCK : 0);
+ if (ret < 0) {
+ printk(KERN_ALERT PFX "%s: fail to send header, hdr_size=%d, ret=%d\n", __func__, hdr_size, ret);
+ goto next;
+ while (num_frags--) {
+ vaddr = KMAP(frag); /* because scif_send() may cause scheduling */
+ for (i=0; i<frag->size; ) {
-+ ret = scif_send(scif_ep, vaddr + frag->page_offset + i,
-+ frag->size - i,
-+ blocking_send ? SCIF_SEND_BLOCK : 0);
++ ret = scif_send(scif_ep, vaddr + frag->page_offset + i,
++ frag->size - i,
++ blocking_send ? SCIF_SEND_BLOCK : 0);
+ if (ret < 0) {
+ printk(KERN_ALERT PFX "%s: scif_send returns %d, frag_size=%d\n", __func__, ret, frag->size);
+ break;
+ }
+
+ /* only one instance can be enqueued, otherwise there is race condition between scif_send() calls. */
-+ /* notice that the current running worker may miss the newly added item, but it will be picked up in the poll_thread */
++ /* notice that the current running worker may miss the newly added item, but it will be picked up in the poll_thread */
+ if (!atomic_xchg(&xmit_busy, 1))
+ schedule_work(&ibscif_xmit_work);
+
+ }
+ pdu->ibscif.write.msg_id = __cpu_to_be32(wr->msg_id);
+ pdu->ibscif.write.rdma_key = __cpu_to_be32(wr->write.rkey);
-+ pdu->ibscif.write.rdma_address = __cpu_to_be64(wr->write.remote_address +
++ pdu->ibscif.write.rdma_address = __cpu_to_be64(wr->write.remote_address +
+ (wr->length - wr_len_remaining));
+ if (wr->use_rma) {
+ opcode = ibscif_op_write_rma;
+
+ pdu = (struct ibscif_full_frame *)skb->data;
+ pdu->ibscif.hdr.hdr_size = __cpu_to_be16(hdr_size);
-+
++
+ return skb;
+bail:
+ atomic_inc(&dev->available);
+
+ for (i=0; i<wr->num_ds; i++) {
+ ds = &wr->ds_list[i];
-+ if (!ds->current_mreg)
++ if (!ds->current_mreg)
+ ds->current_mreg = ibscif_mr_get_mreg(ds->mr, wq->qp->conn);
+
-+ if (!ds->current_mreg)
++ if (!ds->current_mreg)
+ return 0;
+ }
+
+ wr->sar.rea.final_length = total_length - rdma_length;
+
+ /* we can't call ibscif_process_sq_completions here because we are holding the sq lock.
-+ * set the flag and let the upper level make the call */
++ * set the flag and let the upper level make the call */
+ wq->fast_rdma_completions = 1;
+
+ if (wr->opcode == WR_RDMA_WRITE)
+ DEV_STAT(qp->dev, fast_rdma_read++);
+
+ /* the fast rdma protocol doesn't send any packet, and thus can not piggyback any ack
-+ * for the peer. send separate ack packet when necessary. */
++ * for the peer. send separate ack packet when necessary. */
+ if (qp->wire.sq.rx.last_seq_acked < qp->wire.sq.rx.last_in_seq ||
+ qp->wire.iq.rx.last_seq_acked < qp->wire.iq.rx.last_in_seq) {
+ ibscif_send_ack(qp);
+ u32 max_payload, wr_length, page_offset, ds_len_left, payload_left;
+
+ /* Try to process RDMA read/write directly with SCIF functions.
-+ * The usual reason for failure is that the remote memory has not yet been
-+ * registered with SCIF. The normal packet based path should handle that.
++ * The usual reason for failure is that the remote memory has not yet been
++ * registered with SCIF. The normal packet based path should handle that.
+ */
+ if (host_proxy && wq->qp->local_node_id>0 && wq->qp->remote_node_id==0) {
+ /* don't try fast rdma becasue we want to let the host do the data transfer */
+ }
-+ else if (fast_rdma) {
++ else if (fast_rdma) {
+ num_xmited = 0;
+ if (ibscif_try_fast_rdma(wq, wr))
+ goto finish2;
+ from_seq++;
+ }
+ }
-+ else
++ else
+ printk(KERN_ALERT PFX "%s: fail to set up RMA addresses for the work request.\n", __func__);
+
+ goto finish;
+ struct sk_buff *skb;
+
+ if (!qp || IS_ERR(qp)) {
-+ if (qp != ERR_PTR(-ENOENT))
-+ printk(KERN_ALERT "%s: qp=%p hdr=%p in_skb=%p reason=%d\n", __func__, qp, hdr, in_skb, reason);
++ if (qp != ERR_PTR(-ENOENT) && verbose)
++ printk(KERN_ALERT PFX "%s: qp=%p hdr=%p in_skb=%p reason=%d\n", __func__, qp, hdr, in_skb, reason);
+ return;
+ }
+
+ pdu->ibscif.hdr.seq_num = 0; /* seq_num has no meaning. */
+ pdu->ibscif.hdr.sq_ack_num = __cpu_to_be32(sq_seq);
+ pdu->ibscif.hdr.iq_ack_num = __cpu_to_be32(iq_seq);
-+ pdu->ibscif.hdr.hdr_size = __cpu_to_be16(size);
++ pdu->ibscif.hdr.hdr_size = __cpu_to_be16(size);
+
+ return skb;
+}
+ pdu->ibscif.hdr.seq_num = 0; /* seq_num has no meaning. */
+ pdu->ibscif.hdr.sq_ack_num = 0; /* unused */
+ pdu->ibscif.hdr.iq_ack_num = 0; /* unused */
-+ pdu->ibscif.hdr.hdr_size = __cpu_to_be16(size);
++ pdu->ibscif.hdr.hdr_size = __cpu_to_be16(size);
+
+ return skb;
+}
+ pdu->ibscif.hdr.seq_num = 0; /* seq_num has no meaning. */
+ pdu->ibscif.hdr.sq_ack_num = 0; /* unused */
+ pdu->ibscif.hdr.iq_ack_num = 0; /* unused */
-+ pdu->ibscif.hdr.hdr_size = __cpu_to_be16(size);
++ pdu->ibscif.hdr.hdr_size = __cpu_to_be16(size);
+
+ return skb;
+}
+ struct ibscif_full_frame *pdu;
+ struct sk_buff *skb;
+
-+ skb = ibscif_create_cm_hdr(cm_ctx->conn, sizeof pdu->ibscif.cm + cm_ctx->plen);
++ skb = ibscif_create_cm_hdr(cm_ctx->conn, sizeof pdu->ibscif.cm + cm_ctx->plen);
+ if (unlikely(!skb))
+ return -ENOMEM;
+
+ struct ibscif_full_frame *pdu;
+ struct sk_buff *skb;
+
-+ skb = ibscif_create_cm_hdr(cm_ctx->conn, sizeof pdu->ibscif.cm + cm_ctx->plen);
++ skb = ibscif_create_cm_hdr(cm_ctx->conn, sizeof pdu->ibscif.cm + cm_ctx->plen);
+ if (unlikely(!skb))
+ return -ENOMEM;
+
+ struct ibscif_full_frame *pdu;
+ struct sk_buff *skb;
+
-+ skb = ibscif_create_cm_hdr(cm_ctx->conn, sizeof pdu->ibscif.cm + plen);
++ skb = ibscif_create_cm_hdr(cm_ctx->conn, sizeof pdu->ibscif.cm + plen);
+ if (unlikely(!skb))
+ return -ENOMEM;
+
+ struct ibscif_full_frame *pdu;
+ struct sk_buff *skb;
+
-+ skb = ibscif_create_cm_hdr(cm_ctx->conn, sizeof pdu->ibscif.cm);
++ skb = ibscif_create_cm_hdr(cm_ctx->conn, sizeof pdu->ibscif.cm);
+ if (unlikely(!skb))
+ return -ENOMEM;
+
+ wr = ibscif_get_wr(wq, index);
+
+ /* Skip over non-IQ entries. */
-+ if (iq_flag &&
++ if (iq_flag &&
+ ((wr->opcode == WR_UD) ||
+ (wr->opcode == WR_SEND) ||
+ (wr->opcode == WR_SEND_WITH_IMM) ||
+ if (iq_flag) {
+ /*
+ * Completed IQ replies are defered until earlier
-+ * non-IQ WR have completed. This is determined
++ * non-IQ WR have completed. This is determined
+ * with a second iteration of the WQ below.
+ */
+ wr->state = WR_COMPLETED;
+ if (is_sq(wq)) {
+ err = ibscif_process_sq_completions(wq->qp);
+ if (unlikely(err)) {
-+ printk(KERN_ALERT "%s: sq completion error: err=%d \n", __func__, err);
++ printk(KERN_ALERT PFX "%s: sq completion error: err=%d \n", __func__, err);
+ ibscif_protocol_error(wq->qp, IBSCIF_REASON_QP_FATAL);
+ status = 0;
+ }
+ int msg_id;
+
+ if (unlikely(qp->ibqp.qp_type != IB_QPT_UD)) {
-+ printk(KERN_ALERT "%s: UD packet received on non-UD QP\n", __func__);
++ printk(KERN_ALERT PFX "%s: UD packet received on non-UD QP\n", __func__);
+ return -EINVAL;
+ }
+
+
+ /* Only one pdu is allowed for one UD packet, otherwise drop the pdu */
+ if (unlikely(pdu->ud.msg_length != pdu->hdr.length || pdu->ud.msg_offset)) {
-+ printk(KERN_INFO "%s: dropping fragmented UD packet. total_length=%d msg_length=%d msg_offset=%d\n",
++ printk(KERN_INFO PFX "%s: dropping fragmented UD packet. total_length=%d msg_length=%d msg_offset=%d\n",
+ __func__, pdu->hdr.length, pdu->ud.msg_length, pdu->ud.msg_offset);
+ return -EINVAL;
+ }
+ spin_lock_bh(&qp->rq.lock);
+ if (unlikely(qp->rq.ud_msg_id >= qp->rq.next_msg_id)) {
+ spin_unlock_bh(&qp->rq.lock);
-+ printk(KERN_ALERT "%s: ERROR: message arrives before recv is posted. msg_id=%d, rq.next_msg_id=%d\n",
++ printk(KERN_ALERT PFX "%s: ERROR: message arrives before recv is posted. msg_id=%d, rq.next_msg_id=%d\n",
+ __func__, pdu->send.msg_id, qp->rq.next_msg_id);
+ return -EBADRQC;
+ }
+ return -EMSGSIZE;
+
+ /* GRH is included as part of the received message */
-+ skb_pull(skb, sizeof(pdu->ud)-grh_size);
++ skb_pull(skb, sizeof(pdu->ud)-grh_size);
+
+ err = ibscif_place_data(qp, wr, skb, pdu->hdr.length+grh_size, pdu->ud.msg_offset, pdu->hdr.seq_num);
+ if (unlikely(err))
+ spin_lock_bh(&qp->rq.lock);
+ if (unlikely(pdu->send.msg_id >= qp->rq.next_msg_id)) {
+ spin_unlock_bh(&qp->rq.lock);
-+ printk(KERN_ALERT "%s: ERROR: message arrives before recv is posted. msg_id=%d, rq.next_msg_id=%d\n",
++ printk(KERN_ALERT PFX "%s: ERROR: message arrives before recv is posted. msg_id=%d, rq.next_msg_id=%d\n",
+ __func__, pdu->send.msg_id, qp->rq.next_msg_id);
+ return -EBADRQC;
+ }
+ spin_lock_bh(&qp->rq.lock);
+ if (unlikely(pdu->write.msg_id >= qp->rq.next_msg_id)) {
+ spin_unlock_bh(&qp->rq.lock);
-+ printk(KERN_ALERT "%s: ERROR: message arrives before recv is posted. msg_id=%d, rq.next_msg_id=%d\n",
++ printk(KERN_ALERT PFX "%s: ERROR: message arrives before recv is posted. msg_id=%d, rq.next_msg_id=%d\n",
+ __func__, pdu->write.msg_id, qp->rq.next_msg_id);
+ return -EBADRQC;
+ }
+ wr->length = 0;
+ wr->msg_id = msg_id;
+ wr->num_ds = 0;
-+ wr->atomic_rsp.opcode = (opcode==ibscif_op_comp_swap)? ibscif_op_comp_swap_rsp : ibscif_op_fetch_add_rsp;
++ wr->atomic_rsp.opcode = (opcode==ibscif_op_comp_swap)? ibscif_op_comp_swap_rsp : ibscif_op_fetch_add_rsp;
+ /* The wr->atomic_rsp.orig_data field was set above. */
+
+ ibscif_append_wqe(&qp->iq);
+ spin_lock_bh(&qp->rq.lock);
+ if (unlikely(pdu->send.msg_id >= qp->rq.next_msg_id)) {
+ spin_unlock_bh(&qp->rq.lock);
-+ printk(KERN_ALERT "%s: ERROR: message arrives before recv is posted. msg_id=%d, rq.next_msg_id=%d\n",
++ printk(KERN_ALERT PFX "%s: ERROR: message arrives before recv is posted. msg_id=%d, rq.next_msg_id=%d\n",
+ __func__, pdu->send.msg_id, qp->rq.next_msg_id);
+ return -EBADRQC;
+ }
+
+ err = scif_readfrom(qp->conn->ep, loffset, xfer_len, rma_offset, rma_flag);
+ if (err) {
-+ printk(KERN_ALERT "%s: scif_readfrom (%d bytes) returns %d\n", __func__, xfer_len, err);
++ printk(KERN_ALERT PFX "%s: scif_readfrom (%d bytes) returns %d\n", __func__, xfer_len, err);
+ break;
+ }
+
+ rma_length = __be32_to_cpu(pdu->send.rma_addrs[cur_rma_addr].length);
+ }
+ }
-+
++
+ seg_num++;
+ ds++;
+ }
+ if (rdma_len < rma_length)
+ rma_length = rdma_len;
+
-+ if (rma_length == 0)
++ if (rma_length == 0)
+ continue;
+
+ loffset = mreg->offset + (rdma_addr - mr->addr) + total;
+
+ err = scif_readfrom(qp->conn->ep, loffset, rma_length, rma_offset, rma_flag);
+ if (err) {
-+ printk(KERN_ALERT "%s: scif_readfrom (%d bytes) returns %d\n", __func__, rma_length, err);
++ printk(KERN_ALERT PFX "%s: scif_readfrom (%d bytes) returns %d\n", __func__, rma_length, err);
+ break;
+ }
+
+
+ if (wr) {
+ wr->sar.rea.final_length = total;
-+ wr->state = WR_LAST_SEEN;
++ wr->state = WR_LAST_SEEN;
+ wr->sar.rea.opcode = pdu->hdr.opcode;
+ wr->sar.rea.last_packet_seq = pdu->hdr.seq_num;
+ wr->sar.rea.immediate_data = __be32_to_cpu(pdu->write.immed_data);
-+ }
++ }
+
+ /* Respond to the initiator with the result */
+ wr = ibscif_reserve_wqe(&qp->iq);
+ if (rdma_len < rma_length)
+ rma_length = rdma_len;
+
-+ if (rma_length == 0)
++ if (rma_length == 0)
+ continue;
+
+ loffset = mreg->offset + (rdma_addr - mr->addr) + total;
+
+ err = scif_writeto(qp->conn->ep, loffset, rma_length, rma_offset, rma_flag);
+ if (err) {
-+ printk(KERN_ALERT "%s: scif_writeto (%d bytes) returns %d\n", __func__, rma_length, err);
++ printk(KERN_ALERT PFX "%s: scif_writeto (%d bytes) returns %d\n", __func__, rma_length, err);
+ break;
+ }
+
+ }
+
+ if (unlikely(err)) {
-+ printk(KERN_ALERT "%s: ERROR: err=%d, opcode=%d\n", __func__, err, ibscif_pdu_base_type(pdu->hdr.opcode));
++ printk(KERN_ALERT PFX "%s: ERROR: err=%d, opcode=%d\n", __func__, err, ibscif_pdu_base_type(pdu->hdr.opcode));
+ ibscif_protocol_error(qp, IBSCIF_REASON_QP_FATAL);
+ }
+
+ if (qp->ibqp.qp_type == IB_QPT_UD) {
+ err = ibscif_schedule_rx_completions(qp, 0, rx);
+ if (unlikely(err)) {
-+ printk(KERN_ALERT "%s: rx completion error: err=%d, opcode=%d\n", __func__, err, ibscif_pdu_base_type(pdu->hdr.opcode));
++ printk(KERN_ALERT PFX "%s: rx completion error: err=%d, opcode=%d\n", __func__, err, ibscif_pdu_base_type(pdu->hdr.opcode));
+ ibscif_protocol_error(qp, IBSCIF_REASON_QP_FATAL);
+ }
+ goto done;
+ /* PDU is in sequence so schedule/remove completed work requests. */
+ err = ibscif_schedule_rx_completions(qp, ibscif_pdu_is_iq(pdu->hdr.opcode), rx);
+ if (unlikely(err)) {
-+ printk(KERN_ALERT "%s: rx completion error: err=%d, opcode=%d\n", __func__, err, ibscif_pdu_base_type(pdu->hdr.opcode));
++ printk(KERN_ALERT PFX "%s: rx completion error: err=%d, opcode=%d\n", __func__, err, ibscif_pdu_base_type(pdu->hdr.opcode));
+ ibscif_protocol_error(qp, IBSCIF_REASON_QP_FATAL);
+ goto done;
+ }
+}
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,21)
-+#define skb_mac_header(skb) (skb->mac.raw)
++#define skb_mac_header(skb) (skb->mac.raw)
+#endif
+
+static int ibscif_recv_pkt(struct sk_buff *skb, struct ibscif_dev *dev, scif_epd_t ep, struct ibscif_conn *conn)
+ pdu->hdr.iq_ack_num = __be32_to_cpu(pdu->hdr.iq_ack_num);
+
+ if (pdu->hdr.opcode == ibscif_op_close) {
-+ //printk(KERN_INFO "%s: op_close, conn=%p, local_close=%d\n", __func__, conn, conn->local_close);
++ //printk(KERN_INFO PFX "%s: op_close, conn=%p, local_close=%d\n", __func__, conn, conn->local_close);
+ conn->remote_close = 1;
+ goto done_no_qp;
+ }
+ else if (pdu->hdr.opcode == ibscif_op_reopen) {
-+ //printk(KERN_INFO "%s: op_reopen, conn=%p, local_close=%d\n", __func__, conn, conn->local_close);
++ //printk(KERN_INFO PFX "%s: op_reopen, conn=%p, local_close=%d\n", __func__, conn, conn->local_close);
+ conn->remote_close = 0;
+ goto done_no_qp;
+ }
+ printk(KERN_ALERT PFX "%s(): fail to receive hdr, ret=%d, expecting %d\n", __func__, ret, (int)recv_size);
+ if (ret == -ENOTCONN || ret == -ECONNRESET) {
+ if (verbose)
-+ printk(KERN_INFO "%s: ep disconnected by peer (%d). conn=%p, local_close=%d\n",
++ printk(KERN_INFO PFX "%s: ep disconnected by peer (%d). conn=%p, local_close=%d\n",
+ __func__, ret, conn, conn->local_close);
+ ibscif_remove_ep( dev, ep );
+ ibscif_refresh_pollep_list();
+ recv_buffer += ret;
+ }
+
-+ if (ret < 0)
++ if (ret < 0)
+ goto errout;
+
+ skb->len = pdu_size;
+static struct ibscif_conn *poll_conns[IBSCIF_MAX_POLL_COUNT];
+static struct task_struct *poll_thread = NULL;
+static atomic_t poll_eps_changed = ATOMIC_INIT(0);
++static volatile int poll_thread_running = 0;
+
+void ibscif_refresh_pollep_list( void )
+{
+ int busy;
+ int idle_count = 0;
+
++ poll_thread_running = 1;
+ while (!kthread_should_stop()) {
+ if (atomic_xchg(&poll_eps_changed, 0)) {
+ poll_count = IBSCIF_MAX_POLL_COUNT;
+ if (ret > 0) {
+ for (i=0; i<poll_count; i++) {
+ if (poll_eps[i].revents & POLLIN) {
-+ if (poll_types[i] == IBSCIF_EP_TYPE_LISTEN) {
++ if (poll_types[i] == IBSCIF_EP_TYPE_LISTEN) {
+ ibscif_do_accept( poll_devs[i] );
+ busy = 1;
+ }
+ }
+ else if (poll_eps[i].revents & POLLERR) {
+ if (verbose)
-+ printk(KERN_INFO "%s: ep error, conn=%p.\n", __func__, poll_conns[i]);
++ printk(KERN_INFO PFX "%s: ep error, conn=%p.\n", __func__, poll_conns[i]);
+ ibscif_remove_ep( poll_devs[i], poll_eps[i].epd );
+ ibscif_refresh_pollep_list();
+ /* in most the case, the error is caused by ep being already closed */
+ else if (poll_eps[i].revents & POLLHUP) {
+ struct ibscif_conn *conn = poll_conns[i];
+ if (verbose)
-+ printk(KERN_INFO "%s: ep disconnected by peer.\n", __func__);
++ printk(KERN_INFO PFX "%s: ep disconnected by peer.\n", __func__);
+ ibscif_remove_ep( poll_devs[i], poll_eps[i].epd );
+ ibscif_refresh_pollep_list();
+ if (conn) {
+ if (verbose)
-+ printk(KERN_INFO "%s: conn=%p, local_close=%d.\n", __func__, conn, conn->local_close);
++ printk(KERN_INFO PFX "%s: conn=%p, local_close=%d.\n", __func__, conn, conn->local_close);
+ conn->remote_close = 1;
+ if (conn->local_close) {
-+ ibscif_free_conn(conn);
++ ibscif_free_conn(conn);
+ }
+ }
+ busy = 1;
+ }
+ }
+
++ poll_thread_running = 0;
+ return 0;
+}
+
+void ibscif_protocol_cleanup(void)
+{
+ kthread_stop( poll_thread );
++
++ while (poll_thread_running)
++ schedule();
+}
-diff --git a/drivers/infiniband/hw/scif/ibscif_protocol.h b/drivers/infiniband/hw/scif/ibscif_protocol.h
-new file mode 100644
-index 0000000..66b55f7
---- /dev/null
-+++ b/drivers/infiniband/hw/scif/ibscif_protocol.h
+diff -ruN a/drivers/infiniband/hw/scif/ibscif_protocol.h b/drivers/infiniband/hw/scif/ibscif_protocol.h
+--- a/drivers/infiniband/hw/scif/ibscif_protocol.h 1969-12-31 16:00:00.000000000 -0800
++++ b/drivers/infiniband/hw/scif/ibscif_protocol.h 2014-09-08 13:57:08.000000000 -0700
@@ -0,0 +1,395 @@
+/*
+ * Copyright (c) 2008 Intel Corporation. All rights reserved.
+union ibscif_pdu {
+ struct base_hdr hdr;
+ struct ud_hdr ud;
-+ struct send_hdr send;
++ struct send_hdr send;
+ struct write_hdr write;
+ struct read_req_hdr read_req;
+ struct read_rsp_hdr read_rsp;
+#define ibscif_tx_window(tx) ((u32)window_size - ibscif_tx_unacked_window(tx))
+
+#endif /* IBSCIF_PROTOCOL_H */
-diff --git a/drivers/infiniband/hw/scif/ibscif_provider.c b/drivers/infiniband/hw/scif/ibscif_provider.c
-new file mode 100644
-index 0000000..c42ce6d
---- /dev/null
-+++ b/drivers/infiniband/hw/scif/ibscif_provider.c
-@@ -0,0 +1,409 @@
+diff -ruN a/drivers/infiniband/hw/scif/ibscif_provider.c b/drivers/infiniband/hw/scif/ibscif_provider.c
+--- a/drivers/infiniband/hw/scif/ibscif_provider.c 1969-12-31 16:00:00.000000000 -0800
++++ b/drivers/infiniband/hw/scif/ibscif_provider.c 2014-09-08 13:57:08.000000000 -0700
+@@ -0,0 +1,406 @@
+/*
+ * Copyright (c) 2008 Intel Corporation. All rights reserved.
+ *
+ attr->gid_tbl_len = 1;
+ attr->pkey_tbl_len = 1;
+ attr->max_msg_sz = MAX_MR_SIZE;
-+ attr->phys_state = 5; /* LinkUp */
++ attr->phys_state = 5; /* LinkUp */
+ attr->state = IB_PORT_ACTIVE;
+ attr->max_mtu = IB_MTU_4096;
+ attr->active_mtu = IB_MTU_4096;
+{
+ struct ibscif_dev *dev = to_dev(ibdev);
+
-+ /*
-+ * Form a link local unicast address.
-+ * See http://www.faqs.org/rfcs/rfc2373.html.
-+ */
-+ ibgid->global.subnet_prefix = cpu_to_be64(0xFE80000000000000);
-+ memcpy(&ibgid->global.interface_id, &dev->ibdev.node_guid, 8);
-+ ((u8 *)&ibgid->global.interface_id)[0] ^= 2;
-+
++ memcpy(ibgid, &dev->gid, sizeof(*ibgid));
+ return 0;
+}
+
+
+static void ibscif_generate_eui64(struct ibscif_dev *dev, u8 *eui64)
+{
-+ /* use the MAC address of the netdev as the node GUID so RDMA CM
-+ * can find the ibdev from IP address associated with the netdev.
-+ */
+ memcpy(eui64, dev->netdev->dev_addr, 3);
+ eui64[3] = 0xFF;
+ eui64[4] = 0xFE;
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_POST_SEND) |
-+ (1ull << IB_USER_VERBS_CMD_POST_RECV);
++ (1ull << IB_USER_VERBS_CMD_POST_RECV);
++#if defined(MOFED) && !defined(MOFED_2_1)
++ dev->ibdev.node_type = new_ib_type ? RDMA_EXP_NODE_MIC : RDMA_NODE_RNIC;
++#else
+ dev->ibdev.node_type = new_ib_type ? RDMA_NODE_MIC : RDMA_NODE_RNIC;
++#endif
+ dev->ibdev.phys_port_cnt = 1;
+
+ dev->ibdev.query_device = ibscif_query_device; // Mandatory
+}
+
+/*
-+ * Hold devlist_lock during this call for synchronization as needed.
++ * Hold devlist_mutex during this call for synchronization as needed.
+ * Upon return, dev is invalid.
+ */
+static void ibscif_remove_dev(struct ibscif_dev *dev)
+
+ WARN_ON(!list_empty(&dev->wq_list));
+
-+ spin_lock_bh(&devlist_lock);
++ down(&devlist_mutex);
+ list_del(&dev->entry);
-+ spin_unlock_bh(&devlist_lock);
++ up(&devlist_mutex);
+
+ ibscif_refresh_pollep_list();
+
-+ /* spin_lock_bh() cause in_interrupt() be true, which leads
-+ * to kernel panic inside SCIF API functions */
-+ spin_lock(&dev->lock);
++ down(&dev->mutex);
+ list_for_each_entry_safe(conn, next, &dev->conn_list, entry) {
+ scif_close(conn->ep);
+ list_del(&conn->entry);
+ kfree(conn);
+ }
-+ spin_unlock(&dev->lock);
++ up(&dev->mutex);
+
+ if (dev->listen_ep)
+ scif_close(dev->listen_ep);
+static void ibscif_remove_one(struct net_device *netdev)
+{
+ struct ibscif_dev *dev, *next;
-+
++
+ list_for_each_entry_safe(dev, next, &devlist, entry) {
+ if (netdev == dev->netdev) {
-+ ibscif_remove_dev(dev);
++ ibscif_remove_dev(dev);
+ break;
+ }
+ }
+
+ INIT_LIST_HEAD(&dev->conn_list);
+ INIT_LIST_HEAD(&dev->mr_list);
-+ spin_lock_init(&dev->mr_list_lock);
-+ spin_lock_init(&dev->lock);
++ init_MUTEX(&dev->mr_list_mutex);
++ init_MUTEX(&dev->mutex);
+ spin_lock_init(&dev->atomic_op);
+ INIT_LIST_HEAD(&dev->wq_list);
+ atomic_set(&dev->available, 256); /* FIXME */
+ dev_hold(netdev);
+ dev->netdev = netdev;
+
++ /* use the MAC address of the netdev as the GID so that RDMA CM can
++ * find the ibdev from the IP address associated with the netdev.
++ */
++ memcpy(&dev->gid, dev->netdev->dev_addr, ETH_ALEN);
++
+ dev->ibdev.dma_device = kzalloc(sizeof *dev->ibdev.dma_device, GFP_KERNEL);
+ if (!dev->ibdev.dma_device) {
+ printk(KERN_ALERT PFX "%s: fail to allocate dma_device\n", __func__);
+ ret = device_register(dev->ibdev.dma_device);
+ if (ret) {
+ printk(KERN_ALERT PFX "%s: fail to register dma_device, ret=%d\n", __func__, ret);
-+ kfree(dev->ibdev.dma_device);
++ kfree(dev->ibdev.dma_device);
+ goto out_free_ibdev;
+ }
+
+
+ node_cnt = ret;
+ dev->node_id = my_node_id;
-+ printk(KERN_ALERT "%s: my node_id is %d\n", __func__, dev->node_id);
++ printk(KERN_ALERT PFX "%s: my node_id is %d\n", __func__, dev->node_id);
+
+ ret = scif_bind(dev->listen_ep, SCIF_OFED_PORT_0);
+ if (ret < 0) {
+ goto out_close_ep;
+ }
+
-+ spin_lock_bh(&devlist_lock);
++ down(&devlist_mutex);
+ list_add_tail(&dev->entry, &devlist);
-+ spin_unlock_bh(&devlist_lock);
++ up(&devlist_mutex);
+
+ if (ibscif_register_device(dev))
+ ibscif_remove_dev(dev);
+ ibscif_protocol_init_pre();
+
+ err = register_netdevice_notifier(&ibscif_notifier_block);
-+ if (err)
++ if (err)
+ ibscif_protocol_cleanup();
+
+ return err;
+{
+ struct ibscif_dev *dev, *next;
+
-+ unregister_netdevice_notifier(&ibscif_notifier_block);
+ ibscif_protocol_cleanup();
++ unregister_netdevice_notifier(&ibscif_notifier_block);
+ list_for_each_entry_safe(dev, next, &devlist, entry)
+ ibscif_remove_dev(dev);
+}
-diff --git a/drivers/infiniband/hw/scif/ibscif_qp.c b/drivers/infiniband/hw/scif/ibscif_qp.c
-new file mode 100644
-index 0000000..992a7a7
---- /dev/null
-+++ b/drivers/infiniband/hw/scif/ibscif_qp.c
-@@ -0,0 +1,825 @@
+diff -ruN a/drivers/infiniband/hw/scif/ibscif_qp.c b/drivers/infiniband/hw/scif/ibscif_qp.c
+--- a/drivers/infiniband/hw/scif/ibscif_qp.c 1969-12-31 16:00:00.000000000 -0800
++++ b/drivers/infiniband/hw/scif/ibscif_qp.c 2014-09-08 13:57:08.000000000 -0700
+@@ -0,0 +1,868 @@
+/*
+ * Copyright (c) 2008 Intel Corporation. All rights reserved.
+ *
+
+ kref_init(&qp->ref);
+ init_completion(&qp->done);
-+ mutex_init(&qp->mutex);
++ init_MUTEX(&qp->modify_mutex);
+ spin_lock_init(&qp->lock);
+ ibscif_init_wire(&qp->wire);
+ qp->sq_policy = attr->sq_sig_type;
+ ibscif_cm_async_callback(qp->cm_context);
+ qp->cm_context = NULL;
+
-+ /* don't generate the error event because transitioning to IB_QPS_ERR
++ /* don't generate the error event because transitioning to IB_QPS_ERR
+ state is normal when a QP is disconnected */
+
+ //ibscif_qp_event(qp, IB_EVENT_QP_FATAL);
+} qp_transition[NR_QP_STATES][NR_QP_STATES] = {
+
+ START_STATE(QP_IDLE)
-+ VALID_TRANSITION( QP_IDLE, MODIFY_ALLOWED )
++ VALID_TRANSITION( QP_IDLE, MODIFY_ALLOWED )
+ VALID_TRANSITION( QP_CONNECTED, MODIFY_ALLOWED )
+ INVAL_TRANSITION( QP_DISCONNECT )
+ VALID_TRANSITION( QP_ERROR, MODIFY_INVALID )
-+ VALID_TRANSITION( QP_RESET, MODIFY_INVALID )
-+ VALID_TRANSITION( QP_IGNORE, MODIFY_ALLOWED )
++ VALID_TRANSITION( QP_RESET, MODIFY_INVALID )
++ VALID_TRANSITION( QP_IGNORE, MODIFY_ALLOWED )
+ CEASE_STATE(QP_IDLE)
+
+ START_STATE(QP_CONNECTED)
+ VALID_TRANSITION( QP_CONNECTED, MODIFY_INVALID )
+ VALID_TRANSITION( QP_DISCONNECT, MODIFY_INVALID )
+ VALID_TRANSITION( QP_ERROR, MODIFY_INVALID )
-+ VALID_TRANSITION( QP_RESET, MODIFY_INVALID )
-+ VALID_TRANSITION( QP_IGNORE, MODIFY_ALLOWED )
++ VALID_TRANSITION( QP_RESET, MODIFY_INVALID )
++ VALID_TRANSITION( QP_IGNORE, MODIFY_ALLOWED )
+ CEASE_STATE(QP_CONNECTED)
+
+ START_STATE(QP_DISCONNECT) /* Automatic transition to IDLE */
+ INVAL_TRANSITION( QP_CONNECTED )
+ INVAL_TRANSITION( QP_DISCONNECT )
+ INVAL_TRANSITION( QP_ERROR )
-+ INVAL_TRANSITION( QP_RESET )
-+ INVAL_TRANSITION( QP_IGNORE )
++ INVAL_TRANSITION( QP_RESET )
++ INVAL_TRANSITION( QP_IGNORE )
+ CEASE_STATE(QP_DISCONNECT)
+
+ START_STATE(QP_ERROR)
+ INVAL_TRANSITION( QP_CONNECTED )
+ INVAL_TRANSITION( QP_DISCONNECT )
+ VALID_TRANSITION( QP_ERROR, MODIFY_INVALID )
-+ VALID_TRANSITION( QP_RESET, MODIFY_INVALID )
-+ VALID_TRANSITION( QP_IGNORE, MODIFY_ALLOWED )
++ VALID_TRANSITION( QP_RESET, MODIFY_INVALID )
++ VALID_TRANSITION( QP_IGNORE, MODIFY_ALLOWED )
+ CEASE_STATE(QP_ERROR)
+
+ START_STATE(QP_RESET)
+ INVAL_TRANSITION( QP_CONNECTED )
+ INVAL_TRANSITION( QP_DISCONNECT )
+ VALID_TRANSITION( QP_ERROR, MODIFY_INVALID )
-+ VALID_TRANSITION( QP_RESET, MODIFY_INVALID )
-+ VALID_TRANSITION( QP_IGNORE, MODIFY_ALLOWED )
++ VALID_TRANSITION( QP_RESET, MODIFY_INVALID )
++ VALID_TRANSITION( QP_IGNORE, MODIFY_ALLOWED )
+ CEASE_STATE(QP_RESET)
+};
+
+ /*
+ * Mutex prevents simultaneous user-mode QP modifies.
+ */
-+ mutex_lock(&qp->mutex);
++ down(&qp->modify_mutex);
+
+ cur_state = qp->state;
+
+ if (!qp_transition[cur_state][new_state].modify_allowed)
+ goto out;
+
++ if ((attr_mask & IB_QP_AV) && (attr->ah_attr.ah_flags & IB_AH_GRH) && check_grh) {
++ int remote_node_id = IBSCIF_LID_TO_NODE_ID(attr->ah_attr.dlid);
++ struct ibscif_conn *conn;
++ union ib_gid *dgid;
++
++ if (verbose)
++ printk(KERN_INFO PFX "%s: %d-->%d, DGID=%llx:%llx\n",
++ __func__, qp->local_node_id, remote_node_id,
++ __be64_to_cpu(attr->ah_attr.grh.dgid.global.subnet_prefix),
++ __be64_to_cpu(attr->ah_attr.grh.dgid.global.interface_id));
++
++ if (remote_node_id == qp->local_node_id) {
++ dgid = &qp->dev->gid;
++ }
++ else {
++ spin_lock(&qp->lock);
++ conn = ibscif_get_conn(qp->local_node_id, remote_node_id, 0);
++ spin_unlock(&qp->lock);
++ if (!conn) {
++ if (verbose)
++ printk(KERN_INFO PFX "%s: failed to make SCIF connection %d-->%d.\n",
++ __func__, qp->local_node_id, remote_node_id);
++ goto out;
++ }
++ dgid = &conn->remote_gid;
++ ibscif_put_conn(conn);
++ }
++
++ if (verbose)
++ printk(KERN_INFO PFX "%s: local GID[%d]=%llx:%llx\n",
++ __func__, remote_node_id,
++ __be64_to_cpu(dgid->global.subnet_prefix),
++ __be64_to_cpu(dgid->global.interface_id));
++
++ if (memcmp(dgid, &attr->ah_attr.grh.dgid, sizeof(*dgid))) {
++ if (verbose)
++ printk(KERN_INFO PFX "%s: connecting to DGID outside the box is unsupported.\n",
++ __func__);
++ goto out;
++ }
++ }
++
+ if (attr_mask & IB_QP_CAP) {
+ sq_size = attr->cap.max_send_wr;
+ rq_size = attr->cap.max_recv_wr;
+
+ __ibscif_query_qp(qp, attr, NULL);
+out:
-+ mutex_unlock(&qp->mutex);
++ up(&qp->modify_mutex);
+ return err;
+}
+
+ if (qp->ibqp.qp_type != IB_QPT_UD)
+ return;
+
-+
++
+ spin_lock_bh(&qp->lock);
+
+ for (i=0; i<IBSCIF_MAX_DEVICES; i++) {
+done:
+ spin_unlock_bh(&qp->lock);
+}
-diff --git a/drivers/infiniband/hw/scif/ibscif_scheduler.c b/drivers/infiniband/hw/scif/ibscif_scheduler.c
-new file mode 100644
-index 0000000..30e5a9f
---- /dev/null
-+++ b/drivers/infiniband/hw/scif/ibscif_scheduler.c
++
+diff -ruN a/drivers/infiniband/hw/scif/ibscif_scheduler.c b/drivers/infiniband/hw/scif/ibscif_scheduler.c
+--- a/drivers/infiniband/hw/scif/ibscif_scheduler.c 1969-12-31 16:00:00.000000000 -0800
++++ b/drivers/infiniband/hw/scif/ibscif_scheduler.c 2014-09-08 13:57:08.000000000 -0700
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2008 Intel Corporation. All rights reserved.
+ if (unlikely(err))
+ ibscif_qp_internal_disconnect(wq->qp, IBSCIF_REASON_QP_FATAL);
+
-+ if (fast_rdma && need_call_sq_completions)
++ if (fast_rdma && need_call_sq_completions)
+ ibscif_process_sq_completions(wq->qp);
-+
++
+ return err;
+}
+
+
+ while (atomic_xchg(&dev->was_new, 0)) {
+ /* Bail if the device is busy. */
-+ if (!spin_trylock_bh(&dev->lock))
++ if (down_trylock(&dev->mutex))
+ goto out;
+
+ /*
+ if (!ibscif_schedule_wq(wq)) {
+ DEV_STAT(dev, sched_exhaust++);
+ list_splice(&processed, dev->wq_list.prev);
-+ spin_unlock_bh(&dev->lock);
++ up(&dev->mutex);
+ goto out;
+ }
+ list_move_tail(&wq->entry, &processed);
+ }
+ list_splice(&processed, dev->wq_list.prev);
+
-+ spin_unlock_bh(&dev->lock);
++ up(&dev->mutex);
+ }
+ return;
+out:
+{
+ struct ibscif_dev *dev = qp->dev;
+
-+ spin_lock_bh(&dev->lock);
++ down(&dev->mutex);
+ list_add_tail(&qp->sq.entry, &dev->wq_list);
+ list_add_tail(&qp->iq.entry, &dev->wq_list);
-+ spin_unlock_bh(&dev->lock);
++ up(&dev->mutex);
+}
+
+void ibscif_scheduler_remove_qp(struct ibscif_qp *qp)
+{
+ struct ibscif_dev *dev = qp->dev;
+
-+ spin_lock_bh(&dev->lock);
++ down(&dev->mutex);
+ list_del(&qp->sq.entry);
+ list_del(&qp->iq.entry);
-+ spin_unlock_bh(&dev->lock);
-+}
-diff --git a/drivers/infiniband/hw/scif/ibscif_util.c b/drivers/infiniband/hw/scif/ibscif_util.c
-new file mode 100644
-index 0000000..7433203
---- /dev/null
-+++ b/drivers/infiniband/hw/scif/ibscif_util.c
-@@ -0,0 +1,574 @@
++ up(&dev->mutex);
++}
+diff -ruN a/drivers/infiniband/hw/scif/ibscif_util.c b/drivers/infiniband/hw/scif/ibscif_util.c
+--- a/drivers/infiniband/hw/scif/ibscif_util.c 1969-12-31 16:00:00.000000000 -0800
++++ b/drivers/infiniband/hw/scif/ibscif_util.c 2014-09-08 13:57:08.000000000 -0700
+@@ -0,0 +1,623 @@
+/*
+ * Copyright (c) 2008 Intel Corporation. All rights reserved.
+ *
+#define IBSCIF_CONN_REJ 2
+#define IBSCIF_CONN_ERR 3
+
++struct ibscif_conn_resp {
++ int cmd;
++ union ib_gid gid;
++};
++
+void ibscif_do_accept(struct ibscif_dev *dev)
+{
+ struct scif_portID peer;
+ scif_epd_t ep;
+ struct ibscif_conn *conn;
+ int ret;
-+ int resp;
++ struct ibscif_conn_resp resp;
++ int resp_size;
++
++ if (check_grh)
++ resp_size = sizeof(resp);
++ else
++ resp_size = sizeof(int);
+
+ ret = scif_accept(dev->listen_ep, &peer, &ep, SCIF_ACCEPT_SYNC);
+ if (ret) {
+ }
+
+ if (verbose)
-+ printk(KERN_INFO "%s: %d<--%d\n", __func__, dev->node_id, peer.node);
++ printk(KERN_INFO PFX "%s: %d<--%d\n", __func__, dev->node_id, peer.node);
++
++ if (check_grh)
++ memcpy(&resp.gid, &dev->gid, sizeof(resp.gid));
+
+ spin_lock(&conn_state_lock);
+ switch (conn_state[dev->node_id][peer.node]) {
+ case IBSCIF_CONN_IDLE:
+ conn_state[dev->node_id][peer.node] = IBSCIF_CONN_REQ_RCVD;
-+ resp = IBSCIF_CONN_REP;
++ resp.cmd = IBSCIF_CONN_REP;
+ if (verbose)
-+ printk(KERN_INFO "%s: no double connection, accepting\n", __func__);
++ printk(KERN_INFO PFX "%s: no double connection, accepting\n", __func__);
+ break;
+
+ case IBSCIF_CONN_REQ_SENT:
+ /* A connection request has been sent, but no response yet. Node id is used to
-+ * break the tie when both side send the connection request. One side is allowed
-+ * to accept the request and its own request will be rejected by the peer.
-+ */
++ * break the tie when both side send the connection request. One side is allowed
++ * to accept the request and its own request will be rejected by the peer.
++ */
+ if (dev->node_id > peer.node) {
-+ resp = IBSCIF_CONN_REJ;
++ resp.cmd = IBSCIF_CONN_REJ;
+ if (verbose)
-+ printk(KERN_INFO "%s: double connection, rejecting (peer will accept)\n", __func__);
++ printk(KERN_INFO PFX "%s: double connection, rejecting (peer will accept)\n", __func__);
+ }
+ else if (dev->node_id == peer.node) {
+ conn_state[dev->node_id][peer.node] = IBSCIF_CONN_REQ_RCVD;
-+ resp = IBSCIF_CONN_REP;
++ resp.cmd = IBSCIF_CONN_REP;
+ if (verbose)
-+ printk(KERN_INFO "%s: loopback connection, accepting\n", __func__);
++ printk(KERN_INFO PFX "%s: loopback connection, accepting\n", __func__);
+ }
+ else {
+ conn_state[dev->node_id][peer.node] = IBSCIF_CONN_REQ_RCVD;
-+ resp = IBSCIF_CONN_REP;
++ resp.cmd = IBSCIF_CONN_REP;
+ if (verbose)
-+ printk(KERN_INFO "%s: double connection, accepting (peer will reject)\n", __func__);
++ printk(KERN_INFO PFX "%s: double connection, accepting (peer will reject)\n", __func__);
+ }
+ break;
+
+ case IBSCIF_CONN_REQ_RCVD:
+ if (verbose)
-+ printk(KERN_INFO "%s: duplicated connection request, rejecting\n", __func__);
-+ resp = IBSCIF_CONN_REJ;
++ printk(KERN_INFO PFX "%s: duplicated connection request, rejecting\n", __func__);
++ resp.cmd = IBSCIF_CONN_REJ;
+ break;
+
+ case IBSCIF_CONN_ESTABLISHED:
+ case IBSCIF_CONN_ACTIVE:
+ if (verbose)
-+ printk(KERN_INFO "%s: already connected, rejecting\n", __func__);
-+ resp = IBSCIF_CONN_REJ;
++ printk(KERN_INFO PFX "%s: already connected, rejecting\n", __func__);
++ resp.cmd = IBSCIF_CONN_REJ;
+ break;
+
+ default:
+ if (verbose)
-+ printk(KERN_INFO "%s: invalid state: %d\n", __func__, conn_state[dev->node_id][peer.node]);
-+ resp = IBSCIF_CONN_ERR;
++ printk(KERN_INFO PFX "%s: invalid state: %d\n", __func__, conn_state[dev->node_id][peer.node]);
++ resp.cmd = IBSCIF_CONN_ERR;
+ break;
+ }
+ spin_unlock(&conn_state_lock);
+
-+ ret = scif_send(ep, &resp, sizeof(resp), SCIF_SEND_BLOCK);
++ ret = scif_send(ep, &resp, resp_size, SCIF_SEND_BLOCK);
+ if (ret < 0) {
+ printk(KERN_ALERT PFX "%s: scif_send returns %d\n", __func__, ret);
++ scif_close(ep);
+ return;
+ }
+
-+ if (resp != IBSCIF_CONN_REP) {
++ if (resp.cmd != IBSCIF_CONN_REP) {
+ /* one additional hand shaking to prevent the previous send from being trashed by ep closing */
-+ scif_recv(ep, &resp, sizeof(resp), SCIF_RECV_BLOCK);
++ scif_recv(ep, &resp, resp_size, SCIF_RECV_BLOCK);
+ scif_close(ep);
+ return;
+ }
+
++ if (check_grh) {
++ ret = scif_recv(ep, &resp, resp_size, SCIF_RECV_BLOCK);
++ if (ret < 0) {
++ printk(KERN_ALERT PFX "%s: scif_recv returns %d\n", __func__, ret);
++ scif_close(ep);
++ spin_lock(&conn_state_lock);
++ conn_state[dev->node_id][peer.node] = IBSCIF_CONN_IDLE;
++ spin_unlock(&conn_state_lock);
++ return;
++ }
++ }
++
+ conn = kzalloc(sizeof (*conn), GFP_KERNEL);
+ if (!conn) {
+ printk(KERN_ALERT PFX "%s: cannot allocate connection context.\n", __func__);
+
+ conn->ep = ep;
+ conn->remote_node_id = peer.node;
++ if (check_grh)
++ memcpy(&conn->remote_gid, &resp.gid, sizeof(conn->remote_gid));
+ conn->dev = dev;
+ atomic_set(&conn->refcnt, 0);
+
+ spin_unlock(&conn_state_lock);
+
+ if (verbose)
-+ printk(KERN_INFO "%s: connection established. ep=%p\n", __func__, ep);
++ printk(KERN_INFO PFX "%s: connection established. ep=%p\n", __func__, ep);
+
+ ibscif_refresh_mreg(conn);
+
+ /* one addition sync to ensure the MRs are registered with the new ep at both side */
-+ scif_send(ep, &resp, sizeof(resp), SCIF_SEND_BLOCK);
-+ scif_recv(ep, &resp, sizeof(resp), SCIF_RECV_BLOCK);
++ scif_send(ep, &resp, resp_size, SCIF_SEND_BLOCK);
++ scif_recv(ep, &resp, resp_size, SCIF_RECV_BLOCK);
+
-+ list_add(&conn->entry, &dev->conn_list);
++ list_add(&conn->entry, &dev->conn_list);
+ ibscif_refresh_pollep_list();
+
+ spin_lock(&conn_state_lock);
+ struct ibscif_conn *conn = NULL;
+ int ret;
+ scif_epd_t ep;
-+ int resp;
++ struct ibscif_conn_resp resp;
++ union ib_gid peer_gid;
++ int resp_size;
++
++ if (check_grh)
++ resp_size = sizeof(resp);
++ else
++ resp_size = sizeof(int);
+
+ if (verbose)
-+ printk(KERN_INFO "%s: %d-->%d\n", __func__, dev->node_id, remote_node_id);
++ printk(KERN_INFO PFX "%s: %d-->%d\n", __func__, dev->node_id, remote_node_id);
+
+ /* Validate remote_node_id for conn_state array check */
+ if ((remote_node_id < 0) || (remote_node_id >= IBSCIF_MAX_DEVICES))
+ if (conn_state[dev->node_id][remote_node_id] != IBSCIF_CONN_IDLE) {
+ spin_unlock(&conn_state_lock);
+ if (verbose)
-+ printk(KERN_INFO "%s: connection already in progress, retry\n", __func__);
++ printk(KERN_INFO PFX "%s: connection already in progress, retry\n", __func__);
+ return ERR_PTR(-EAGAIN);
+ }
+ conn_state[dev->node_id][remote_node_id] = IBSCIF_CONN_REQ_SENT;
+
+ ep = scif_open();
+ if (!ep) /* SCIF API semantics */
-+ goto out_state;
++ goto out_state;
+
+ if (IS_ERR(ep)) /* SCIF emulator semantics */
+ goto out_state;
+ dest.port = SCIF_OFED_PORT_0;
+
+ ret = scif_connect(ep, &dest);
-+ if (ret < 0)
++ if (ret < 0)
+ goto out_close;
+
+ /* Now ret is the port number ep is bound to */
+
-+ ret = scif_recv(ep, &resp, sizeof(resp), SCIF_RECV_BLOCK);
++ ret = scif_recv(ep, &resp, resp_size, SCIF_RECV_BLOCK);
+ if (ret < 0) {
+ printk(KERN_ALERT PFX "%s: scif_recv returns %d\n", __func__, ret);
+ goto out_close;
+ }
+
-+ if (resp != IBSCIF_CONN_REP) {
-+ scif_send(ep, &resp, sizeof(resp), SCIF_SEND_BLOCK);
++ if (resp.cmd != IBSCIF_CONN_REP) {
++ scif_send(ep, &resp, resp_size, SCIF_SEND_BLOCK);
+ /* the peer has issued the connection request */
-+ if (resp == IBSCIF_CONN_REJ) {
++ if (resp.cmd == IBSCIF_CONN_REJ) {
+ if (verbose)
-+ printk(KERN_INFO "%s: rejected by peer due to double connection\n", __func__);
++ printk(KERN_INFO PFX "%s: rejected by peer due to double connection\n", __func__);
+ scif_close(ep);
+ /* don't reset the state becasue it's used for checking connection state */
+ return ERR_PTR(-EAGAIN);
+ }
+ else {
+ if (verbose)
-+ printk(KERN_INFO "%s: rejected by peer due to invalid state\n", __func__);
++ printk(KERN_INFO PFX "%s: rejected by peer due to invalid state\n", __func__);
++ goto out_close;
++ }
++ }
++
++ if (check_grh) {
++ memcpy(&peer_gid, &resp.gid, sizeof(peer_gid));
++ memcpy(&resp.gid, &dev->gid, sizeof(resp.gid));
++ ret = scif_send(ep, &resp, resp_size, SCIF_SEND_BLOCK);
++ if (ret < 0) {
++ printk(KERN_ALERT PFX "%s: scif_send returns %d\n", __func__, ret);
+ goto out_close;
+ }
+ }
+
+ if (verbose)
-+ printk(KERN_INFO "%s: connection established. ep=%p\n", __func__, ep);
++ printk(KERN_INFO PFX "%s: connection established. ep=%p\n", __func__, ep);
+
+ spin_lock(&conn_state_lock);
+ conn_state[dev->node_id][remote_node_id] = IBSCIF_CONN_ESTABLISHED;
+
+ conn->ep = ep;
+ conn->remote_node_id = remote_node_id;
++ if (check_grh)
++ memcpy(&conn->remote_gid, &peer_gid, sizeof(conn->remote_gid));
+ conn->dev = dev;
+ atomic_set(&conn->refcnt, 0);
+
+ ibscif_refresh_mreg(conn);
+
+ /* one addition sync to ensure the MRs are registered with the new ep at both side */
-+ scif_send(ep, &resp, sizeof(resp), SCIF_SEND_BLOCK);
-+ scif_recv(ep, &resp, sizeof(resp), SCIF_RECV_BLOCK);
++ scif_send(ep, &resp, resp_size, SCIF_SEND_BLOCK);
++ scif_recv(ep, &resp, resp_size, SCIF_RECV_BLOCK);
+
+ list_add_tail(&conn->entry, &dev->conn_list);
+ ibscif_refresh_pollep_list();
+ struct ibscif_conn *conn, *conn1, *conn2;
+ int done=0, err=0, connect_tried=0;
+
-+ spin_lock_bh(&devlist_lock);
++ down(&devlist_mutex);
+ list_for_each_entry_safe(cur, next, &devlist, entry) {
+ if (cur->node_id == node_id) {
+ dev = cur;
+ break;
+ }
+ }
-+ spin_unlock_bh(&devlist_lock);
++ up(&devlist_mutex);
+
+ if (!dev)
+ return NULL;
+again:
+ conn1 = NULL;
+ conn2 = NULL;
-+ spin_lock(&dev->lock);
++ down(&dev->mutex);
+ list_for_each_entry(conn, &dev->conn_list, entry)
+ {
+ if (conn->remote_node_id == remote_node_id) {
+ break;
+ }
+ }
-+ spin_unlock(&dev->lock);
++ up(&dev->mutex);
+ atomic_inc(&conn->refcnt);
+ if (conn->local_close) {
+ conn->local_close = 0;
+ return conn;
+ }
+ }
-+ spin_unlock(&dev->lock);
++ up(&dev->mutex);
+
+ /* for loopback connections, we must wait for both endpoints be in the list to ensure that
-+ * different endpoints are assigned to the two sides
-+ */
++ * different endpoints are assigned to the two sides
++ */
+ if (node_id == remote_node_id) {
+ if (conn1 && conn2) {
+ conn = find_local_peer ? conn2 : conn1;
+ }
+
+ if (connect_tried) {
-+ printk(KERN_ALERT "%s: ERROR: cannot get connection (%d-->%d) after waiting, state=%d\n",
++ printk(KERN_ALERT PFX "%s: ERROR: cannot get connection (%d-->%d) after waiting, state=%d\n",
+ __func__, dev->node_id, remote_node_id, err-1);
+ return NULL;
+ }
+ return;
+
+ if (atomic_dec_and_test(&conn->refcnt)) {
-+ // printk(KERN_INFO "%s: local_close, conn=%p, remote_close=%d\n", __func__, conn, conn->remote_close);
++ // printk(KERN_INFO PFX "%s: local_close, conn=%p, remote_close=%d\n", __func__, conn, conn->remote_close);
+ ibscif_send_close(conn);
+ conn->local_close = 1;
+ }
+ int i = 0;
+ int max = *count;
+
-+ spin_lock_bh(&devlist_lock);
++ down(&devlist_mutex);
+ list_for_each_entry(dev, &devlist, entry) {
+ if (i >= max)
+ break;
+ polleps[i].events = POLLIN;
+ polleps[i].revents = 0;
+ devs[i] = dev;
-+ types[i] = IBSCIF_EP_TYPE_LISTEN;
++ types[i] = IBSCIF_EP_TYPE_LISTEN;
+ conns[i] = NULL;
+ i++;
+ if (verbose)
-+ printk(KERN_INFO "%s: ep=%p (%d:listen)\n", __func__, dev->listen_ep, dev->node_id);
++ printk(KERN_INFO PFX "%s: ep=%p (%d:listen)\n", __func__, dev->listen_ep, dev->node_id);
+
-+ spin_lock(&dev->lock);
++ down(&dev->mutex);
+ list_for_each_entry(conn, &dev->conn_list, entry)
+ {
+ if (i >= max)
+ polleps[i].events = POLLIN;
+ polleps[i].revents = 0;
+ devs[i] = dev;
-+ types[i] = IBSCIF_EP_TYPE_COMM;
++ types[i] = IBSCIF_EP_TYPE_COMM;
+ conns[i] = conn;
+ i++;
+ if (verbose)
-+ printk(KERN_INFO "%s: ep=%p (%d<--->%d)\n", __func__, conn->ep, dev->node_id, conn->remote_node_id);
++ printk(KERN_INFO PFX "%s: ep=%p (%d<--->%d)\n", __func__, conn->ep, dev->node_id, conn->remote_node_id);
+ }
-+ spin_unlock(&dev->lock);
++ up(&dev->mutex);
+ }
-+ spin_unlock_bh(&devlist_lock);
++ up(&devlist_mutex);
+
+ if (verbose)
-+ printk(KERN_INFO "%s: count=%d\n", __func__, i);
++ printk(KERN_INFO PFX "%s: count=%d\n", __func__, i);
+ *count = i;
+}
+
+ int i = 0;
+ int max = *count;
+
-+ spin_lock_bh(&devlist_lock);
++ down(&devlist_mutex);
+ list_for_each_entry(dev, &devlist, entry) {
+ if (i >= max)
+ break;
+
-+ spin_lock(&dev->lock);
++ down(&dev->mutex);
+ list_for_each_entry(conn, &dev->conn_list, entry)
+ {
+ if (i >= max)
+ eps[i] = conn->ep;
+ i++;
+ }
-+ spin_unlock(&dev->lock);
++ up(&dev->mutex);
+ }
-+ spin_unlock_bh(&devlist_lock);
++ up(&devlist_mutex);
+
+ *count = i;
+}
+void ibscif_remove_ep(struct ibscif_dev *dev, scif_epd_t ep)
+{
+ struct ibscif_conn *conn, *next;
-+ spin_lock(&dev->lock);
++ down(&dev->mutex);
+ list_for_each_entry_safe(conn, next, &dev->conn_list, entry)
+ {
+ if (conn->ep == ep) {
+ list_del(&conn->entry);
+ }
+ }
-+ spin_unlock(&dev->lock);
++ up(&dev->mutex);
+}
+
+
+ struct ibscif_conn *idle_conns[IBSCIF_MAX_DEVICES];
+ int i, n=0;
+
-+ spin_lock_bh(&devlist_lock);
++ down(&devlist_mutex);
+ list_for_each_entry(dev, &devlist, entry) {
-+ spin_lock(&dev->lock);
++ down(&dev->mutex);
+ list_for_each_entry_safe(conn, next, &dev->conn_list, entry)
+ {
+ if (conn->local_close && conn->remote_close) {
+ idle_conns[n++] = conn;
+ }
+ }
-+ spin_unlock(&dev->lock);
++ up(&dev->mutex);
+ }
-+ spin_unlock_bh(&devlist_lock);
++ up(&devlist_mutex);
+
+ for (i=0; i<n; i++)
+ ibscif_free_conn(idle_conns[i]);
+
-+ if (n)
-+ printk(KERN_ALERT "%s: n=%d\n", __func__, n);
++ if (n && verbose)
++ printk(KERN_ALERT PFX "%s: n=%d\n", __func__, n);
+
+ return n;
+}
+ static struct timeval t0;
+ static struct timeval t;
+ uint32_t usec;
-+
++
+ if (first) {
+ do_gettimeofday(&t0);
+ first = 0;
+
+ do_gettimeofday(&t);
+ usec = (t.tv_sec - t0.tv_sec) * 1000000UL;
-+ if (t.tv_usec >= t0.tv_usec)
++ if (t.tv_usec >= t0.tv_usec)
+ usec += (t.tv_usec - t0.tv_usec);
+ else
+ usec -= (t0.tv_usec - t.tv_usec);
+ }
+ }
+}
---
-1.8.3.1
-
++
+diff -ruN a/drivers/infiniband/hw/scif/Kconfig b/drivers/infiniband/hw/scif/Kconfig
+--- a/drivers/infiniband/hw/scif/Kconfig 1969-12-31 16:00:00.000000000 -0800
++++ b/drivers/infiniband/hw/scif/Kconfig 2014-09-08 13:57:08.000000000 -0700
+@@ -0,0 +1,4 @@
++config INFINIBAND_SCIF
++ tristate "SCIF RDMA driver support"
++ ---help---
++ RDMA over SCIF driver.
+diff -ruN a/drivers/infiniband/hw/scif/Makefile b/drivers/infiniband/hw/scif/Makefile
+--- a/drivers/infiniband/hw/scif/Makefile 1969-12-31 16:00:00.000000000 -0800
++++ b/drivers/infiniband/hw/scif/Makefile 2014-09-08 13:57:08.000000000 -0700
+@@ -0,0 +1,41 @@
++ifneq ($(KERNELRELEASE),)
++
++# Original Make begins
++
++obj-$(CONFIG_INFINIBAND_SCIF) += ibscif.o
++
++ibscif-y := ibscif_main.o \
++ ibscif_ah.o \
++ ibscif_pd.o \
++ ibscif_cq.o \
++ ibscif_qp.o \
++ ibscif_mr.o \
++ ibscif_cm.o \
++ ibscif_post.o \
++ ibscif_procfs.o \
++ ibscif_loopback.o \
++ ibscif_provider.o \
++ ibscif_protocol.o \
++ ibscif_scheduler.o \
++ ibscif_util.o
++
++# Original Makefile ends
++
++else
++
++ifeq ($(KVER),)
++ ifeq ($(KDIR),)
++ KDIR := /lib/modules/$(shell uname -r)/build
++ endif
++else
++ KDIR := /lib/modules/$(KVER)/build
++endif
++
++all:
++ $(MAKE) -C $(KDIR) SUBDIRS=$(shell pwd) CONFIG_INFINIBAND_SCIF=m
++
++clean:
++ rm -rf *.o *.ko *.mod.c .*.cmd Module.* .tmp_versions
++
++endif
++