From: Mccormick, Patrick M Date: Thu, 26 Jan 2017 09:34:57 +0000 (+0200) Subject: xeon-phi: fix for cm event handler X-Git-Tag: vofed-3.18-3-20170126-0142 X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=refs%2Ftags%2Fvofed-3.18-3-20170126-0142;p=~aditr%2Fcompat-rdma.git xeon-phi: fix for cm event handler Signed-off-by: Patrick McCormick Signed-off-by: Vladimir Sokolovsky --- diff --git a/tech-preview/xeon-phi/0006-add-scif.h-to-the-include-directory-matching-the-loc.patch b/tech-preview/xeon-phi/0006-add-scif.h-to-the-include-directory-matching-the-loc.patch deleted file mode 100644 index 4f9aa80..0000000 --- a/tech-preview/xeon-phi/0006-add-scif.h-to-the-include-directory-matching-the-loc.patch +++ /dev/null @@ -1,1756 +0,0 @@ -xeon-phi: add scif.h to the include directory matching the - -location that is in the mpss installation ---- -diff -ruN a5/include/modules/scif.h a6/include/modules/scif.h ---- a5/include/modules/scif.h 1969-12-31 16:00:00.000000000 -0800 -+++ a6/include/modules/scif.h 2015-09-10 09:33:19.295932526 -0700 -@@ -0,0 +1,1748 @@ -+/* -+ * Copyright 2010-2013 Intel Corporation. -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License, version 2, -+ * as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Disclaimer: The codes contained in these modules may be specific to -+ * the Intel Software Development Platform codenamed Knights Ferry, -+ * and the Intel product codenamed Knights Corner, and are not backward -+ * compatible with other Intel products. Additionally, Intel will NOT -+ * support the codes or instruction set in future products. -+ * -+ * Intel offers no warranty of any kind regarding the code. This code is -+ * licensed on an "AS IS" basis and Intel is not obligated to provide -+ * any support, assistance, installation, training, or other services -+ * of any kind. Intel is also not obligated to provide any updates, -+ * enhancements or extensions. Intel specifically disclaims any warranty -+ * of merchantability, non-infringement, fitness for any particular -+ * purpose, and any other warranty. -+ * -+ * Further, Intel disclaims all liability of any kind, including but -+ * not limited to liability for infringement of any proprietary rights, -+ * relating to the use of the code, even if Intel is notified of the -+ * possibility of such liability. Except as expressly stated in an Intel -+ * license agreement provided with this code and agreed upon with Intel, -+ * no license, express or implied, by estoppel or otherwise, to any -+ * intellectual property rights is granted herein. -+ */ -+ -+/* -+ * Revised 15:05 11/24/2010 -+ * Derived from SCIF SAS v0.41 with additional corrections -+ */ -+ -+#ifndef __SCIF_H__ -+#define __SCIF_H__ -+ -+#include -+#include -+#include -+#include -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#define SCIF_ACCEPT_SYNC 1 -+#define SCIF_SEND_BLOCK 1 -+#define SCIF_RECV_BLOCK 1 -+ -+/** -+ * The purpose of SCIF_VERSION is to check for compatibility between host and -+ * card SCIF modules. This version should be incremented whenever any changes -+ * are made to the SCIF driver code that is common to both card and the host. -+ * Whenever this version is incremented, SCIF_LIB_VERSION in user mode libscif -+ * scif.h file should be incremented and vice versa. Both the versions should -+ * always match. -+ */ -+#define SCIF_VERSION 1 -+ -+/* Start: Deprecated Temporary definition for compatability */ -+#define ACCEPT_SYNC SCIF_ACCEPT_SYNC -+#define SEND_BLOCK SCIF_SEND_BLOCK -+#define RECV_BLOCK SCIF_RECV_BLOCK -+/* End: Deprecated Temporary definition for compatability */ -+ -+enum { -+ SCIF_PROT_READ = (1<<0), -+ SCIF_PROT_WRITE = (1<<1) -+}; -+ -+enum { -+ SCIF_MAP_FIXED = 0x10, -+ SCIF_MAP_KERNEL = 0x20 -+}; -+ -+enum { -+ SCIF_FENCE_INIT_SELF = (1<<0), -+ SCIF_FENCE_INIT_PEER = (1<<1) -+}; -+ -+enum { -+ SCIF_FENCE_RAS_SELF = (1<<2), -+ SCIF_FENCE_RAS_PEER = (1<<3) -+}; -+ -+enum { -+ SCIF_SIGNAL_LOCAL = (1<<4), -+ SCIF_SIGNAL_REMOTE = (1<<5) -+}; -+ -+#define SCIF_RMA_USECPU 1 -+#define SCIF_RMA_USECACHE (1<<1) -+#define SCIF_RMA_SYNC (1<<2) -+#define SCIF_RMA_ORDERED (1<<3) -+//! @cond (Prevent doxygen from including these) -+#define SCIF_POLLIN POLLIN -+#define SCIF_POLLOUT POLLOUT -+#define SCIF_POLLERR POLLERR -+#define SCIF_POLLHUP POLLHUP -+#define SCIF_POLLNVAL POLLNVAL -+ -+/* SCIF Reserved Ports */ -+/* COI */ -+#define SCIF_COI_PORT_0 40 -+#define SCIF_COI_PORT_1 41 -+#define SCIF_COI_PORT_2 42 -+#define SCIF_COI_PORT_3 43 -+#define SCIF_COI_PORT_4 44 -+#define SCIF_COI_PORT_5 45 -+#define SCIF_COI_PORT_6 46 -+#define SCIF_COI_PORT_7 47 -+#define SCIF_COI_PORT_8 48 -+#define SCIF_COI_PORT_9 49 -+ -+/* OFED */ -+#define SCIF_OFED_PORT_0 60 -+#define SCIF_OFED_PORT_1 61 -+#define SCIF_OFED_PORT_2 62 -+#define SCIF_OFED_PORT_3 63 -+#define SCIF_OFED_PORT_4 64 -+#define SCIF_OFED_PORT_5 65 -+#define SCIF_OFED_PORT_6 66 -+#define SCIF_OFED_PORT_7 67 -+#define SCIF_OFED_PORT_8 68 -+#define SCIF_OFED_PORT_9 69 -+ -+/* NETDEV */ -+#define SCIF_NETDEV_PORT_0 80 -+#define SCIF_NETDEV_PORT_1 81 -+#define SCIF_NETDEV_PORT_2 82 -+#define SCIF_NETDEV_PORT_3 83 -+#define SCIF_NETDEV_PORT_4 84 -+#define SCIF_NETDEV_PORT_5 85 -+#define SCIF_NETDEV_PORT_6 86 -+#define SCIF_NETDEV_PORT_7 87 -+#define SCIF_NETDEV_PORT_8 88 -+#define SCIF_NETDEV_PORT_9 89 -+ -+/* RAS */ -+#define SCIF_RAS_PORT_0 100 -+#define SCIF_RAS_PORT_1 101 -+#define SCIF_RAS_PORT_2 102 -+#define SCIF_RAS_PORT_3 103 -+#define SCIF_RAS_PORT_4 104 -+#define SCIF_RAS_PORT_5 105 -+#define SCIF_RAS_PORT_6 106 -+#define SCIF_RAS_PORT_7 107 -+#define SCIF_RAS_PORT_8 108 -+#define SCIF_RAS_PORT_9 109 -+ -+/* Power Management */ -+#define SCIF_PM_PORT_0 120 -+#define SCIF_PM_PORT_1 121 -+#define SCIF_PM_PORT_2 122 -+#define SCIF_PM_PORT_3 123 -+#define SCIF_PM_PORT_4 124 -+#define SCIF_PM_PORT_5 125 -+#define SCIF_PM_PORT_6 126 -+#define SCIF_PM_PORT_7 127 -+#define SCIF_PM_PORT_8 128 -+#define SCIF_PM_PORT_9 129 -+ -+/* Board Tools */ -+#define SCIF_BT_PORT_0 130 -+#define SCIF_BT_PORT_1 131 -+#define SCIF_BT_PORT_2 132 -+#define SCIF_BT_PORT_3 133 -+#define SCIF_BT_PORT_4 134 -+#define SCIF_BT_PORT_5 135 -+#define SCIF_BT_PORT_6 136 -+#define SCIF_BT_PORT_7 137 -+#define SCIF_BT_PORT_8 138 -+#define SCIF_BT_PORT_9 139 -+ -+/* MIC Boot/Configuration support */ -+#define MPSSD_DOWNLOAD 160 -+#define MIC_NOTIFY 161 -+ -+#define SCIF_ADMIN_PORT_END 1024 -+ -+/* MYO */ -+#define SCIF_MYO_PORT_0 1025 -+#define SCIF_MYO_PORT_1 1026 -+#define SCIF_MYO_PORT_2 1027 -+#define SCIF_MYO_PORT_3 1028 -+#define SCIF_MYO_PORT_4 1029 -+#define SCIF_MYO_PORT_5 1030 -+#define SCIF_MYO_PORT_6 1031 -+#define SCIF_MYO_PORT_7 1032 -+#define SCIF_MYO_PORT_8 1033 -+#define SCIF_MYO_PORT_9 1034 -+ -+/* SSG Tools */ -+#define SCIF_ST_PORT_0 1044 -+#define SCIF_ST_PORT_1 1045 -+#define SCIF_ST_PORT_2 1046 -+#define SCIF_ST_PORT_3 1047 -+#define SCIF_ST_PORT_4 1048 -+#define SCIF_ST_PORT_5 1049 -+#define SCIF_ST_PORT_6 1050 -+#define SCIF_ST_PORT_7 1051 -+#define SCIF_ST_PORT_8 1052 -+#define SCIF_ST_PORT_9 1053 -+ -+/* End of SCIF Reserved Ports */ -+#define SCIF_PORT_RSVD 1088 -+//! @endcond -+ -+typedef struct endpt *scif_epd_t; -+ -+typedef struct scif_pinned_pages *scif_pinned_pages_t; -+ -+struct scif_range { -+ void *cookie; /* cookie */ -+ int nr_pages; /* Number of Pages */ -+ int prot_flags; /* R/W protection */ -+ /* Arrays phys_addr/va below are virtually contiguous */ -+ dma_addr_t *phys_addr; /* Array of physical addresses */ -+ void **va; /* Array of virtual addresses -+ * and populated only when called -+ * on the host for a remote SCIF -+ * connection on MIC. -+ */ -+}; -+ -+struct scif_pollepd { -+ scif_epd_t epd; /* endpoint descriptor */ -+ short events; /* requested events */ -+ short revents; /* returned events */ -+}; -+enum scif_event_type { -+ SCIF_NODE_ADDED = 1<<0, -+ SCIF_NODE_REMOVED = 1<<1 -+}; -+ -+union eventd { -+ uint16_t scif_node_added; -+ uint16_t scif_node_removed; -+}; -+ -+typedef void (*scif_callback_t)(enum scif_event_type event, union eventd -+data); -+ -+struct scif_callback { -+ struct list_head list_member; -+ scif_callback_t callback_handler; -+}; -+ -+#define SCIF_OPEN_FAILED ((scif_epd_t)-1) -+#define SCIF_REGISTER_FAILED ((off_t)-1) -+#define SCIF_MMAP_FAILED ((void *)-1) -+ -+struct scif_portID { -+ uint16_t node; /* node on which port resides */ -+ uint16_t port; /* Local port number */ -+}; -+ -+/* Start: Deprecated Temporary definition for compatability */ -+#define portID scif_portID -+typedef struct portID portID_t; -+/* End: Deprecated Temporary definition for compatability */ -+ -+/** -+ * scif_open - Create an endpoint -+ * -+ *\return -+ * The scif_open() function creates a new endpoint. -+ * -+ * Upon successful completion, scif_open() returns an endpoint descriptor to -+ * be used in subsequent SCIF functions calls to refer to that endpoint; -+ * otherwise: in user mode SCIF_OPEN_FAILED (that is ((scif_epd_t)-1)) is -+ * returned and errno is set to indicate the error; in kernel mode a NULL -+ * scif_epd_t is returned. -+ * -+ *\par Errors: -+ *- ENOMEM -+ * - Insufficient kernel memory was available. -+ *- ENXIO -+ * - Version mismatch between micscif driver and libscif. -+ */ -+scif_epd_t scif_open(void); -+ -+/** -+ * scif _bind - Bind an endpoint to a port -+ * \param epd endpoint descriptor -+ * \param pn port number -+ * -+ * scif_bind() binds endpoint epd to port pn, where pn is a port number on the -+ * local node. If pn is zero, a port number greater than or equal to -+ * SCIF_PORT_RSVD is assigned and returned. Each endpoint may be bound to -+ * exactly one local port. Ports less than 1024 when requested can only be bound -+ * by system (or root) processes or by processes executed by privileged users. -+ * -+ *\return -+ * Upon successful completion, scif_bind() returns the port number to which epd -+ * is bound; otherwise: in user mode -1 is returned and errno is set to -+ * indicate the error; in kernel mode the negative of one of the following -+ * errors is returned. -+ * -+ *\par Errors: -+ *- EBADF -+ * - epd is not a valid endpoint descriptor -+ *- EINVAL -+ * - epd is not a valid endpoint descriptor, or -+ * - The endpoint or the port are already bound. -+ *- EISCONN -+ * - The endpoint is already connected. -+ *- ENOSPC -+ * - No port number available for assignment (when pn==0). -+ *- ENOTTY -+ * - epd is not a valid endpoint descriptor -+ *- EACCES -+ * - The port requested is protected and the user is not the superuser. -+*/ -+int scif_bind(scif_epd_t epd, uint16_t pn); -+ -+/** -+ * scif_listen - Listen for connections on an endpoint -+ * -+ * \param epd endpoint descriptor -+ * \param backlog maximum pending connection requests -+ * -+ * scif_listen() marks the endpoint epd as a listening endpoint - that is, as -+ * an endpoint that will be used to accept incoming connection requests. Once -+ * so marked, the endpoint is said to be in the listening state and may not be -+ * used as the endpoint of a connection. -+ * -+ * The endpoint, epd, must have been bound to a port. -+ * -+ * The backlog argument defines the maximum length to which the queue of -+ * pending connections for epd may grow. If a connection request arrives when -+ * the queue is full, the client may receive an error with an indication that -+ * the connection was refused. -+ * -+ *\return -+ * Upon successful completion, scif_listen() returns 0; otherwise: in user mode -+ * -1 is returned and errno is set to indicate the error; in kernel mode the -+ * negative of one of the following errors is returned. -+ * -+ *\par Errors: -+ *- EBADF -+ * - epd is not a valid endpoint descriptor -+ *- EINVAL -+ * - epd is not a valid endpoint descriptor, or -+ * - The endpoint is not bound to a port -+ *- EISCONN -+ * - The endpoint is already connected or listening -+ *- ENOTTY -+ * - epd is not a valid endpoint descriptor -+*/ -+int scif_listen(scif_epd_t epd, int backlog); -+ -+/** -+ * scif_connect - Initiate a connection on a port -+ * \param epd endpoint descriptor -+ * \param dst global id of port to which to connect -+ * -+ * The scif_connect() function requests the connection of endpoint epd to remote -+ * port dst. If the connection is successful, a peer endpoint, bound to dst, is -+ * created on node dst.node. On successful return, the connection is complete. -+ * -+ * If the endpoint epd has not already been bound to a port, scif_connect() -+ * will bind it to an unused local port. -+ * -+ * A connection is terminated when an endpoint of the connection is closed, -+ * either explicitly by scif_close(), or when a process that owns one of the -+ * endpoints of a connection is terminated. -+ * -+ *\return -+ * Upon successful completion, scif_connect() returns the port ID to which the -+ * endpoint, epd, is bound; otherwise: in user mode -1 is returned and errno is -+ * set to indicate the error; in kernel mode the negative of one of the -+ * following errors is returned. -+ * -+ *\par Errors: -+ *- EBADF -+ * - epd is not a valid endpoint descriptor -+ *- ECONNREFUSED -+ * - The destination was not listening for connections or refused the -+ * connection request. -+ *- EINTR -+ * - Interrupted function -+ *- EINVAL -+ * - epd is not a valid endpoint descriptor, or -+ * - dst.port is not a valid port ID -+ *- EISCONN -+ * - The endpoint is already connected -+ *- ENOBUFS -+ * - No buffer space is available -+ *- ENODEV -+ * - The destination node does not exist, or -+ * - The node is lost. -+ *- ENOSPC -+ * - No port number available for assignment (when pn==0). -+ *- ENOTTY -+ * - epd is not a valid endpoint descriptor -+ *- EOPNOTSUPP -+ * - The endpoint is listening and cannot be connected -+*/ -+int scif_connect(scif_epd_t epd, struct scif_portID *dst); -+ -+/** -+ * scif_accept - Accept a connection on an endpoint -+ * \param epd endpoint descriptor -+ * \param peer global id of port to which connected -+ * \param newepd new connected endpoint descriptor -+ * \param flags flags -+ * -+ * The scif_accept() call extracts the first connection request on the queue of -+ * pending connections for the port on which epd is listening. scif_accept() -+ * creates a new endpoint, bound to the same port as epd, and allocates a new -+ * SCIF endpoint descriptor, returned in newepd, for the endpoint. The new -+ * endpoint is connected to the endpoint through which the connection was -+ * requested. epd is unaffected by this call, and remains in the listening -+ * state. -+ * -+ * On successful return, peer holds the global port identifier (node id and -+ * local port number) of the port which requested the connection. -+ * -+ * If the peer endpoint which requested the connection is closed, the endpoint -+ * returned by scif_accept() is closed. -+ * -+ * The number of connections that can (subsequently) be accepted on epd is only -+ * limited by system resources (memory). -+ * -+ * The flags argument is formed by OR'ing together zero or more of the -+ * following values: -+ *- SCIF_ACCEPT_SYNC: block until a connection request is presented. If -+ * SCIF_ACCEPT_SYNC is not in flags, and no pending -+ * connections are present on the queue, scif_accept()fails -+ * with an EAGAIN error -+ * -+ * On Linux in user mode, the select() and poll() functions can be used to -+ * determine when there is a connection request. On Microsoft Windows* and on -+ * Linux in kernel mode, the scif_poll() function may be used for this purpose. -+ * A readable event will be delivered when a connection is requested. -+ * -+ *\return -+ * Upon successful completion, scif_accept() returns 0; otherwise: in user mode -+ * -1 is returned and errno is set to indicate the error; in kernel mode the -+ * negative of one of the following errors is returned. -+ * -+ *\par Errors: -+ *- EAGAIN -+ * - SCIF_ACCEPT_SYNC is not set and no connections are present to be accepted, or -+ * - SCIF_ACCEPT_SYNC is not set and remote node failed to complete its -+ * connection request -+ *- EBADF -+ * - epd is not a valid endpoint descriptor -+ *- EINTR -+ * - Interrupted function -+ *- EINVAL -+ * - epd is not a valid endpoint descriptor, or -+ * - epd is not a listening endpoint -+ * - flags is invalid -+ * - peer is NULL -+ * - newepd is NULL -+ *- ENOBUFS -+ * - No buffer space is available -+ *- ENODEV -+ * - The requesting node is lost. -+ *- ENOMEM -+ * - Not enough space -+ *- ENOTTY -+ * - epd is not a valid endpoint descriptor -+ *- ENOENT -+ * - Secondary part of epd registeration failed. -+*/ -+int scif_accept(scif_epd_t epd, struct scif_portID *peer, scif_epd_t -+*newepd, int flags); -+ -+/** -+ * scif_close - Close an endpoint -+ * \param epd endpoint descriptor -+ * -+ * scif_close() closes an endpoint and performs necessary teardown of -+ * facilities associated with that endpoint. -+ * -+ * If epd is a listening endpoint then it will no longer accept connection -+ * requests on the port to which it is bound. Any pending connection requests -+ * are rejected. -+ * -+ * If epd is a connected endpoint, then its peer endpoint is also closed. RMAs -+ * which are in-process through epd or its peer endpoint will complete before -+ * scif_close() returns. Registered windows of the local and peer endpoints are -+ * released as if scif_unregister() was called against each window. -+ * -+ * Closing an endpoint does not affect mappings to remote memory. These remain -+ * until explicitly removed by calling scif_munmap(). -+ * -+ * If the peer endpoint's receive queue is not empty at the time that epd is -+ * closed, then the peer endpoint can be passed as the endpoint parameter to -+ * scif_recv() until the receive queue is empty. -+ * -+ * If epd is bound to a port, then the port is returned to the pool of -+ * available ports. -+ * -+ * epd is freed and may no longer be accessed. -+ * -+ *\return -+ * Upon successful completion, scif_close() returns 0; otherwise: in user mode -+ * -1 is returned and errno is set to indicate the error; in kernel mode the -+ * negative of one of the following errors is returned. -+ * -+ *\par Errors: -+ *- EBADF -+ * - epd is not a valid endpoint descriptor -+ *- EINVAL -+ * - epd is not a valid endpoint descriptor -+ */ -+int scif_close(scif_epd_t epd); -+ -+/** -+ * scif_send - Send a message -+ * \param epd endpoint descriptor -+ * \param msg message buffer address -+ * \param len message length -+ * \param flags blocking mode flags -+ * -+ * scif_send() sends data to the peer of endpoint epd. Up to len bytes of data -+ * are copied from memory starting at address msg. On successful execution the -+ * return value of scif_send() is the number of bytes that were sent, and is -+ * zero if no bytes were sent because len was zero. scif_send() may be called -+ * only when the endpoint is in a connected state. -+ * -+ * If a scif_send() call is non-blocking, then it sends only those bytes which -+ * can be sent without waiting, up to a maximum of len bytes. -+ * -+ * If a scif_send() call is blocking, then it normally returns after sending -+ * all len bytes. If a blocking call is interrupted or the connection is -+ * forcibly closed, the call is considered successful if some bytes were sent -+ * or len is zero, otherwise the call is considered unsuccessful. -+ * -+ * On Linux in user mode, the select() and poll() functions can be used to -+ * determine when the send queue is not full. On Microsoft Windows* and on -+ * Linux in kernel mode, the scif_poll() function may be used for this purpose. -+ * -+ * It is recommended that scif_send()/scif_recv() only be used for short -+ * control-type message communication between SCIF endpoints. The SCIF RMA -+ * APIs are expected to provide better performance for transfer sizes of -+ * 1024 bytes or longer. -+ * -+ * The flags argument is formed by ORing together zero or more of the following -+ * values: -+ *- SCIF_SEND_BLOCK: block until the entire message is sent. -+ * -+ *\return -+ * Upon successful completion, scif_send() returns the number of bytes sent; -+ * otherwise: in user mode -1 is returned and errno is set to indicate the -+ * error; in kernel mode the negative of one of the following errors is -+ * returned. -+ * -+ *\par Errors: -+ *- EBADF -+ * - epd is not a valid endpoint descriptor -+ *- ECONNRESET -+ * - A connection was forcibly closed by a peer. -+ *- EFAULT -+ * - An invalid address was specified for a parameter. -+ *- EINTR -+ * - epd was closed by scif_close() -+ *- EINVAL -+ * - epd is not a valid endpoint descriptor, or -+ * - flags is invalid -+ * - len is negative -+ *- ENODEV -+ * - The remote node is lost. -+ *- ENOMEM -+ * - Not enough space -+ *- ENOTCONN -+ * - The endpoint is not connected -+ *- ENOTTY -+ * - epd is not a valid endpoint descriptor -+ */ -+int scif_send(scif_epd_t epd, void *msg, int len, int flags); -+ -+/** -+ * scif_recv - Receive a message -+ * \param epd endpoint descriptor -+ * \param msg message buffer address -+ * \param len message buffer length -+ * \param flags blocking mode flags -+ * -+ * scif_recv() receives data from the peer of endpoint epd. Up to len bytes of -+ * data are copied to memory starting at address msg. On successful execution -+ * the return value of scif_recv() is the number of bytes that were received, -+ * and is zero if no bytes were received because len was zero. scif_recv() may -+ * be called only when the endpoint is in a connected state. -+ * -+ * If a scif_recv() call is non-blocking, then it receives only those bytes -+ * which can be received without waiting, up to a maximum of len bytes. -+ * -+ * If a scif_recv() call is blocking, then it normally returns after receiving -+ * all len bytes. If a blocking call is interrupted or the connection is -+ * forcibly closed, the call is considered successful if some bytes were -+ * received or len is zero, otherwise the call is considered unsuccessful; -+ * subsequent calls to scif_recv() will successfully receive all data sent -+ * through peer endpoint interruption or the connection was forcibly closed. -+ * -+ * On Linux in user mode, the select() and poll() functions can be used to -+ * determine when data is available to be received. On Microsoft Windows* and -+ * on Linux in kernel mode, the scif_poll() function may be used for this -+ * purpose. -+ * -+ * It is recommended that scif_send()/scif_recv() only be used for short -+ * control-type message communication between SCIF endpoints. The SCIF RMA -+ * APIs are expected to provide better performance for transfer sizes of -+ * 1024 bytes or longer. -+ * -+ * The flags argument is formed by ORing together zero or more of the following -+ * values: -+ *- SCIF_RECV_BLOCK: block until the entire message is received. -+ * -+ *\return -+ * Upon successful completion, scif_recv() returns the number of bytes -+ * received; otherwise: in user mode -1 is returned and errno is set to -+ * indicate the error; in kernel mode the negative of one of the following -+ * errors is returned. -+ * -+ *\par Errors: -+ *- EAGAIN -+ * - The destination node is returning from a low power state. -+ *- EBADF -+ * - epd is not a valid endpoint descriptor . -+ *- ECONNRESET -+ * - A connection was forcibly closed by a peer. -+ *- EFAULT -+ * - An invalid address was specified for a parameter. -+ *- EINVAL -+ * - epd is not a valid endpoint descriptor, or -+ * - flags is invalid, or -+ * - len is negative. -+ *- ENODEV -+ * - The remote node is lost. -+ *- ENOMEM -+ * - Not enough space. -+ *- ENOTCONN -+ * - The endpoint is not connected. -+ *- ENOTTY -+ * - epd is not a valid endpoint descriptor -+ */ -+int scif_recv(scif_epd_t epd, void *msg, int len, int flags); -+ -+/** -+ * scif_register - Mark a memory region for remote access. -+ * \param epd endpoint descriptor -+ * \param addr starting virtual address -+ * \param len length of range -+ * \param offset offset of window -+ * \param prot_flags read/write protection flags -+ * \param map_flags mapping flags -+ * -+ * The scif_register() function opens a window, a range of whole pages of the -+ * registered address space of the endpoint epd, starting at offset po and -+ * continuing for len bytes. The value of po, further described below, is a -+ * function of the parameters offset and len, and the value of map_flags. Each -+ * page of the window represents the physical memory page which backs the -+ * corresponding page of the range of virtual address pages starting at addr -+ * and continuing for len bytes. addr and len are constrained to be multiples -+ * of the page size. addr is interpreted as a user space address. A successful -+ * scif_register() call returns po as the return value. -+ * -+ * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset -+ * exactly, and offset is constrained to be a multiple of the page size. The -+ * mapping established by scif_register() will not replace any existing -+ * registration; an error is returned if any page within the range [offset, -+ * offset+len-1] intersects an existing window. -+ * Note: When SCIF_MAP_FIXED is set the current implementation limits -+ * offset to the range [0..2^62-1] and returns EADDRINUSE if the offset -+ * requested with SCIF_MAP_FIXED is in the range [2^62..2^63-1]. -+ * -+ * When SCIF_MAP_FIXED is not set, the implementation uses offset in an -+ * implementation-defined manner to arrive at po. The po value so chosen will -+ * be an area of the registered address space that the implementation deems -+ * suitable for a mapping of len bytes. An offset value of 0 is interpreted as -+ * granting the implementation complete freedom in selecting po, subject to -+ * constraints described below. A non-zero value of offset is taken to be a -+ * suggestion of an offset near which the mapping should be placed. When the -+ * implementation selects a value for po, it does not replace any extant -+ * window. In all cases, po will be a multiple of the page size. -+ * -+ * The physical pages which are so represented by a window are available for -+ * access in calls to scif_mmap(), scif_readfrom(), scif_writeto(), -+ * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the -+ * physical pages represented by the window will not be reused by the memory -+ * subsystem for any other purpose. Note that the same physical page may be -+ * represented by multiple windows. -+ * -+ * Subsequent operations which change the memory pages to which virtual -+ * addresses are mapped (such as mmap(), munmap(), scif_mmap() and -+ * scif_munmap()) have no effect on existing windows. -+ * -+ * On Linux, if the process will fork(), it is recommended that the registered -+ * virtual address range be marked with MADV_DONTFORK. Doing so will prevent -+ * problems due to copy-on-write semantics. -+ * -+ * The prot_flags argument is formed by OR'ing together one or more of the -+ * following values: -+ *- SCIF_PROT_READ: allow read operations from the window -+ *- SCIF_PROT_WRITE: allow write operations to the window -+ * -+ * The map_flags argument is formed by OR'ing together zero or more of -+ * the following values: -+ *- SCIF_MAP_FIXED: interpret offset exactly -+ * -+ *\return -+ * Upon successful completion, scif_register() returns the offset at which the -+ * mapping was placed (po); otherwise: in user mode SCIF_REGISTER_FAILED (that -+ * is (off_t *)-1) is returned and errno is set to indicate the error; in -+ * kernel mode the negative of one of the following errors is returned. -+ * -+ *\par Errors: -+ *- EADDRINUSE -+ * - SCIF_MAP_FIXED is set in map_flags, and pages in the range [offset, -+ * offset+len-1] are already registered -+ *- EAGAIN -+ * - The mapping could not be performed due to lack of resources -+ *- EBADF -+ * - epd is not a valid endpoint descriptor -+ *- ECONNRESET -+ * - A connection was forcibly closed by a peer. -+ *- EFAULT -+ * - Addresses in the range [addr , addr + len - 1] are invalid -+ *- EINVAL -+ * - epd is not a valid endpoint descriptor, or -+ * - map_flags is invalid, or -+ * - prot_flags is invalid, or -+ * - SCIF_MAP_FIXED is set in flags, and offset is not a multiple of -+ * the page size, or -+ * - addr is not a multiple of the page size, or -+ * - len is not a multiple of the page size, or is 0, or -+ * - offset is negative -+ *- ENODEV -+ * - The remote node is lost. -+ *- ENOMEM -+ * - Not enough space -+ *- ENOTCONN -+ * - The endpoint is not connected -+ *- ENOTTY -+ * - epd is not a valid endpoint descriptor -+ */ -+off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset, -+int prot_flags, int map_flags); -+ -+/** -+ * scif_unregister - Mark a memory region for remote access. -+ * \param epd endpoint descriptor -+ * \param offset start of range to unregister -+ * \param len length of range to unregister -+ * -+ * The scif_unregister() function closes those previously registered windows -+ * which are entirely within the range [offset,offset+len-1]. It is an error to -+ * specify a range which intersects only a subrange of a window. -+ * -+ * On a successful return, pages within the window may no longer be specified -+ * in calls to scif_mmap(), scif_readfrom(), scif_writeto(), scif_vreadfrom(), -+ * scif_vwriteto(), scif_get_pages, and scif_fence_signal(). The window, however, -+ * continues to exist until all previous references against it are removed. A -+ * window is referenced if there is a mapping to it created by scif_mmap(), or if -+ * scif_get_pages() was called against the window (and the pages have not been -+ * returned via scif_put_pages()). A window is also referenced while an RMA, in -+ * which some range of the window is a source or destination, is in progress. -+ * Finally a window is referenced while some offset in that window was specified -+ * to scif_fence_signal(), and the RMAs marked by that call to -+ * scif_fence_signal() have not completed. While a window is in this state, its -+ * registered address space pages are not available for use in a new registered -+ * window. -+ * -+ * When all such references to the window have been removed, its references to -+ * all the physical pages which it represents are removed. Similarly, the -+ * registered address space pages of the window become available for -+ * registration in a new window. -+ * -+ *\return -+ * Upon successful completion, scif_unregister() returns 0; otherwise: in user -+ * mode -1 is returned and errno is set to indicate the error; in kernel mode -+ * the negative of one of the following errors is returned. In the event of an -+ * error, no windows are unregistered. -+ * -+ *\par Errors: -+ *- EBADF -+ * - epd is not a valid endpoint descriptor -+ *- ECONNRESET -+ * - A connection was forcibly closed by a peer. -+ *- EINVAL -+ * - epd is not a valid endpoint descriptor, or -+ * - The range [offset,offset+len-1] intersects a subrange of a window, or -+ * - offset is negative -+ *- ENODEV -+ * -The remote node is lost. -+ *- ENOTCONN -+ * - The endpoint is not connected -+ *- ENOTTY -+ * - epd is not a valid endpoint descriptor -+ *- ENXIO -+ * - Addresses in the range [offset,offset+len-1] are invalid for the -+ * registered address space of epd. -+ */ -+int scif_unregister(scif_epd_t epd, off_t offset, size_t len); -+ -+ -+/** -+ * scif_readfrom - Copy from a remote address space -+ * \param epd endpoint descriptor -+ * \param loffset offset in local registered address space to -+ * which to copy -+ * \param len length of range to copy -+ * \param roffset offset in remote registered address space -+ * from which to copy -+ * \param rma_flags transfer mode flags -+ * -+ * scif_readfrom() copies len bytes from the remote registered address space of -+ * the peer of endpoint epd, starting at the offset roffset to the local -+ * registered address space of epd, starting at the offset loffset. -+ * -+ * Each of the specified ranges [loffset,loffset+len-1] and [roffset,roffset+ -+ * len-1] must be within some registered window or windows of the local and -+ * remote nodes respectively. A range may intersect multiple registered -+ * windows, but only if those windows are contiguous in the registered address -+ * space. -+ * -+ * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using -+ * programmed read/writes. Otherwise the data is copied using DMA. If rma_- -+ * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the -+ * transfer is complete. Otherwise, the transfer may be performed asynchron- -+ * ously. The order in which any two aynchronous RMA operations complete -+ * is non-deterministic. The synchronization functions, scif_fence_mark()/ -+ * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to -+ * the completion of asynchronous RMA operations. -+ * -+ * The DMA transfer of individual bytes is not guaranteed to complete in -+ * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last -+ * cacheline or partial cacheline of the source range will become visible on -+ * the destination node after all other transferred data in the source -+ * range has become visible on the destination node. -+ * -+ * The optimal DMA performance will likely be realized if both -+ * loffset and roffset are cacheline aligned (are a multiple of 64). Lower -+ * performance will likely be realized if loffset and roffset are not -+ * cacheline aligned but are separated by some multiple of 64. The lowest level -+ * of performance is likely if loffset and roffset are not separated by a -+ * multiple of 64. -+ * -+ * The rma_flags argument is formed by ORing together zero or more of the -+ * following values: -+ *- SCIF_RMA_USECPU: perform the transfer using the CPU, otherwise use the DMA -+ * engine. -+ *- SCIF_RMA_SYNC: perform the transfer synchronously, returning after the -+ * transfer has completed. Passing this flag might result in -+ * the API busy waiting and consuming CPU cycles while the DMA -+ * transfer is in progress. -+ *- SCIF_RMA_ORDERED: ensure that the last cacheline or partial cacheline of -+ * the source range becomes visible on the destination node -+ * after all other transferred data in the source range has -+ * become visible on the destination -+ * -+ *\return -+ * Upon successful completion, scif_readfrom() returns 0; otherwise: in user -+ * mode -1 is returned and errno is set to indicate the error; in kernel mode -+ * the negative of one of the following errors is returned. -+ * -+ *\par Errors -+ *- EACCESS -+ * - Attempt to write to a read-only range or read from a write-only range -+ *- EBADF -+ * - epd is not a valid endpoint descriptor -+ *- ECONNRESET -+ * - A connection was forcibly closed by a peer. -+ *- EINVAL -+ * - epd is not a valid endpoint descriptor, or -+ * - rma_flags is invalid -+ *- ENODEV -+ * -The remote node is lost. -+ *- ENOTCONN -+ * - The endpoint is not connected -+ *- ENOTTY -+ * - epd is not a valid endpoint descriptor -+ *- ENXIO -+ * - The range [loffset,loffset+len-1] is invalid for the registered address -+ * space of epd, or, -+ * - The range [roffset,roffset+len-1] is invalid for the registered address -+ * space of the peer of epd, or -+ * - loffset or roffset is negative -+*/ -+int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t -+roffset, int rma_flags); -+ -+/** -+ * scif_writeto - Copy to a remote address space -+ * \param epd endpoint descriptor -+ * \param loffset offset in local registered address space -+ * from which to copy -+ * \param len length of range to copy -+ * \param roffset offset in remote registered address space to -+ * which to copy -+ * \param rma_flags transfer mode flags -+ * -+ * scif_writeto() copies len bytes from the local registered address space of -+ * epd, starting at the offset loffset to the remote registered address space -+ * of the peer of endpoint epd, starting at the offset roffset. -+ * -+ * Each of the specified ranges [loffset,loffset+len-1] and [roffset,roffset+ -+ * len-1] must be within some registered window or windows of the local and -+ * remote nodes respectively. A range may intersect multiple registered -+ * windows, but only if those windows are contiguous in the registered address -+ * space. -+ * -+ * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using -+ * programmed read/writes. Otherwise the data is copied using DMA. If rma_- -+ * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the -+ * transfer is complete. Otherwise, the transfer may be performed asynchron- -+ * ously. The order in which any two aynchronous RMA operations complete -+ * is non-deterministic. The synchronization functions, scif_fence_mark()/ -+ * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to -+ * the completion of asynchronous RMA operations. -+ * -+ * The DMA transfer of individual bytes is not guaranteed to complete in -+ * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last -+ * cacheline or partial cacheline of the source range will become visible on -+ * the destination node after all other transferred data in the source -+ * range has become visible on the destination node. -+ * -+ * The optimal DMA performance will likely be realized if both -+ * loffset and roffset are cacheline aligned (are a multiple of 64). Lower -+ * performance will likely be realized if loffset and roffset are not cacheline -+ * aligned but are separated by some multiple of 64. The lowest level of -+ * performance is likely if loffset and roffset are not separated by a multiple -+ * of 64. -+ * -+ * The rma_flags argument is formed by ORing together zero or more of the -+ * following values: -+ *- SCIF_RMA_USECPU: perform the transfer using the CPU, otherwise use the DMA -+ * engine. -+ *- SCIF_RMA_SYNC: perform the transfer synchronously, returning after the -+ * transfer has completed. Passing this flag might result in -+ * the API busy waiting and consuming CPU cycles while the DMA -+ * transfer is in progress. -+ *- SCIF_RMA_ORDERED: ensure that the last cacheline or partial cacheline of -+ * the source range becomes visible on the destination node -+ * after all other transferred data in the source range has -+ * become visible on the destination -+ * -+ *\return -+ * Upon successful completion, scif_readfrom() returns 0; otherwise: in user -+ * mode -1 is returned and errno is set to indicate the error; in kernel mode -+ * the negative of one of the following errors is returned. -+ * -+ *\par Errors: -+ *- EACCESS -+ * - Attempt to write to a read-only range or read from a write-only range -+ *- EBADF -+ * - epd is not a valid endpoint descriptor -+ *- ECONNRESET -+ * - A connection was forcibly closed by a peer. -+ *- EINVAL -+ * - epd is not a valid endpoint descriptor, or -+ * - rma_flags is invalid -+ *- ENODEV -+ * - The remote node is lost. -+ *- ENOTCONN -+ * - The endpoint is not connected -+ *- ENOTTY -+ * - epd is not a valid endpoint descriptor -+ *- ENXIO -+ * - The range [loffset,loffset+len-1] is invalid for the registered address -+ * space of epd, or, -+ * - The range [roffset , roffset + len -1] is invalid for the registered -+ * address space of the peer of epd, or -+ * - loffset or roffset is negative -+ */ -+int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t -+roffset, int rma_flags); -+ -+/** -+ * scif_vreadfrom - Copy from a remote address space -+ * \param epd endpoint descriptor -+ * \param addr address to which to copy -+ * \param len length of range to copy -+ * \param roffset offset in remote registered address space -+ * from which to copy -+ * \param rma_flags transfer mode flags -+ * -+ * scif_vreadfrom() copies len bytes from the remote registered address -+ * space of the peer of endpoint epd, starting at the offset roffset, to local -+ * memory, starting at addr. addr is interpreted as a user space address. -+ * -+ * The specified range [roffset,roffset+len-1] must be within some registered -+ * window or windows of the remote nodes respectively. The range may intersect -+ * multiple registered windows, but only if those windows are contiguous in the -+ * registered address space. -+ * -+ * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using -+ * programmed read/writes. Otherwise the data is copied using DMA. If rma_- -+ * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the -+ * transfer is complete. Otherwise, the transfer may be performed asynchron- -+ * ously. The order in which any two aynchronous RMA operations complete -+ * is non-deterministic. The synchronization functions, scif_fence_mark()/ -+ * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to -+ * the completion of asynchronous RMA operations. -+ * -+ * The DMA transfer of individual bytes is not guaranteed to complete in -+ * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last -+ * cacheline or partial cacheline of the source range will become visible on -+ * the destination node after all other transferred data in the source -+ * range has become visible on the destination node. -+ * -+ * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back -+ * the specified local memory range may be remain in a pinned state even after -+ * the specified transfer completes. This may reduce overhead if some or all of -+ * the same virtual address range is referenced in a subsequent call of -+ * scif_vreadfrom() or scif_vwriteto(). -+ * -+ * The optimal DMA performance will likely be realized if both -+ * loffset and roffset are cacheline aligned (are a multiple of 64). Lower -+ * performance will likely be realized if loffset and roffset are not -+ * cacheline aligned but are separated by some multiple of 64. The lowest level -+ * of performance is likely if loffset and roffset are not separated by a -+ * multiple of 64. -+ * -+ * The rma_flags argument is formed by ORing together zero or more of the -+ * following values: -+ *- SCIF_RMA_USECPU: perform the transfer using the CPU, otherwise use the DMA -+ * engine. -+ *- SCIF_RMA_USECACHE: enable registration caching -+ *- SCIF_RMA_SYNC: perform the transfer synchronously, returning after the -+ * transfer has completed. Passing this flag might result in -+ * the API busy waiting and consuming CPU cycles while the DMA -+ * transfer is in progress. -+ *- SCIF_RMA_ORDERED: ensure that the last cacheline or partial cacheline of -+ * the source range becomes visible on the destination node -+ * after all other transferred data in the source range has -+ * become visible on the destination -+ * -+ *\return -+ * Upon successful completion, scif_vreadfrom() returns 0; otherwise: in user -+ * mode -1 is returned and errno is set to indicate the error; in kernel mode -+ * the negative of one of the following errors is returned. -+ * -+ *\par Errors: -+ *- EACCESS -+ * - Attempt to write to a read-only range or read from a write-only range -+ *- EBADF -+ * - epd is not a valid endpoint descriptor -+ *- ECONNRESET -+ * - A connection was forcibly closed by a peer. -+ *- EFAULT -+ * - Addresses in the range [addr,addr+len-1] are invalid -+ *- EINVAL -+ * - epd is not a valid endpoint descriptor, or -+ * - rma_flags is invalid -+ *- ENODEV -+ * - The remote node is lost. -+ *- ENOTCONN -+ * - The endpoint is not connected -+ *- ENOTTY -+ * - epd is not a valid endpoint descriptor -+ *- ENXIO -+ * - Addresses in the range [roffset,roffset+len-1] are invalid for the -+ * registered address space of epd. -+ */ -+int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t offset, -+int rma_flags); -+ -+/** -+ * scif_vwriteto - Copy to a remote address space -+ * \param epd endpoint descriptor -+ * \param addr address from which to copy -+ * \param len length of range to copy -+ * \param roffset offset in remote registered address space to -+ * which to copy -+ * \param rma_flags transfer mode flags -+ * -+ * scif_vwriteto() copies len bytes from the local memory, starting at addr, to -+ * the remote registered address space of the peer of endpoint epd, starting at -+ * the offset roffset. addr is interpreted as a user space address. -+ * -+ * The specified range [roffset,roffset+len-1] must be within some registered -+ * window or windows of the remote nodes respectively. The range may intersect -+ * multiple registered windows, but only if those windows are contiguous in the -+ * registered address space. -+ * -+ * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using -+ * programmed read/writes. Otherwise the data is copied using DMA. If rma_- -+ * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the -+ * transfer is complete. Otherwise, the transfer may be performed asynchron- -+ * ously. The order in which any two aynchronous RMA operations complete -+ * is non-deterministic. The synchronization functions, scif_fence_mark()/ -+ * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to -+ * the completion of asynchronous RMA operations. -+ * -+ * The DMA transfer of individual bytes is not guaranteed to complete in -+ * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last -+ * cacheline or partial cacheline of the source range will become visible on -+ * the destination node after all other transferred data in the source -+ * range has become visible on the destination node. -+ * -+ * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back -+ * the specified local memory range may be remain in a pinned state even after -+ * the specified transfer completes. This may reduce overhead if some or all of -+ * the same virtual address range is referenced in a subsequent call of -+ * scif_vreadfrom() or scif_vwriteto(). -+ * -+ * The optimal DMA performance will likely be realized if both -+ * addr and offset are cacheline aligned (are a multiple of 64). Lower -+ * performance will likely be realized if addr and offset are not cacheline -+ * aligned but are separated by some multiple of 64. The lowest level of -+ * performance is likely if addr and offset are not separated by a multiple of -+ * 64. -+ * -+ * The rma_flags argument is formed by ORing together zero or more of the -+ * following values: -+ *- SCIF_RMA_USECPU: perform the transfer using the CPU, otherwise use the DMA -+ * engine. -+ *- SCIF_RMA_USECACHE: allow registration caching -+ *- SCIF_RMA_SYNC: perform the transfer synchronously, returning after the -+ * transfer has completed. Passing this flag might result in -+ * the API busy waiting and consuming CPU cycles while the DMA -+ * transfer is in progress. -+ *- SCIF_RMA_ORDERED: ensure that the last cacheline or partial cacheline of -+ * the source range becomes visible on the destination node -+ * after all other transferred data in the source range has -+ * become visible on the destination -+ * -+ *\return -+ * Upon successful completion, scif_vwriteto () returns 0; otherwise: in user -+ * mode -1 is returned and errno is set to indicate the error; in kernel mode -+ * the negative of one of the following errors is returned. -+ * -+ *\par Errors: -+ *- EACCESS -+ * - Attempt to write to a read-only range or read from a write-only range -+ *- EBADF -+ * - epd is not a valid endpoint descriptor -+ *- ECONNRESET -+ * - A connection was forcibly closed by a peer. -+ *- EFAULT -+ * - Addresses in the range [addr,addr+len-1] are invalid -+ *- EINVAL -+ * - epd is not a valid endpoint descriptor, or -+ * - rma_flags is invalid -+ *- ENODEV -+ * - The remote node is lost. -+ *- ENOTCONN -+ * - The endpoint is not connected -+ *- ENOTTY -+ * - epd is not a valid endpoint descriptor -+ *- ENXIO -+ * - Addresses in the range [roffset,roffset+len-1] are invalid for the -+ * registered address space of epd. -+ */ -+int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, off_t offset, -+int rma_flags); -+ -+/** -+ * scif_fence_mark - Mark previously issued RMAs -+ * \param epd endpoint descriptor -+ * \param flags control flags -+ * \param mark marked handle returned as output. -+ * -+ * scif_fence_mark() returns after marking the current set of all uncompleted -+ * RMAs initiated through the endpoint epd or the current set of all -+ * uncompleted RMAs initiated through the peer of endpoint epd. The RMAs are -+ * marked with a value returned at mark. The application may subsequently call -+ * scif_fence_wait(), passing the value returned at mark, to await completion -+ * of all RMAs so marked. -+ * -+ * The flags argument has exactly one of the following values: -+ *- SCIF_FENCE_INIT_SELF: RMA operations initiated through endpoint -+ * epd are marked -+ *- SCIF_FENCE_INIT_PEER: RMA operations initiated through the peer -+ * of endpoint epd are marked -+ * -+ * \return -+ * Upon successful completion, scif_fence_mark() returns 0; otherwise: in user -+ * mode -1 is returned and errno is set to indicate the error; in kernel mode -+ * the negative of one of the following errors is returned. -+ * -+ *\par Errors: -+ *- EBADF -+ * - epd is not a valid endpoint descriptor -+ *- ECONNRESET -+ * - A connection was forcibly closed by a peer. -+ *- EINVAL -+ * - flags is invalid, or -+ * - epd is not a valid endpoint descriptor, or -+ *- ENODEV -+ * - The remote node is lost. -+ *- ENOTCONN -+ * - The endpoint is not connected -+ *- ENOMEM -+ * - Insufficient kernel memory was available. -+ *- ENOTTY -+ * - epd is not a valid endpoint descriptor -+ */ -+int scif_fence_mark(scif_epd_t epd, int flags, int *mark); -+ -+/** -+ * scif_fence_wait - Wait for completion of marked RMAs -+ * -+ * \param epd endpoint descriptor -+ * \param mark mark request -+ * -+ * scif_fence_wait() returns after all RMAs marked with mark have completed. -+ * The value passed in mark must have been obtained in a previous call to -+ * scif_fence_mark(). -+ * -+ *\return -+ * Upon successful completion, scif_fence_wait() returns 0; otherwise: in user -+ * mode -1 is returned and errno is set to indicate the error; in kernel mode -+ * the negative of one of the following errors is returned. -+ * -+ *\par Errors: -+ *- EBADF -+ * - epd is not a valid endpoint descriptor -+ *- ECONNRESET -+ * - A connection was forcibly closed by a peer. -+ *- EINVAL -+ * - epd is not a valid endpoint descriptor, or -+ *- ENODEV -+ * - The remote node is lost. -+ *- ENOTCONN -+ * - The endpoint is not connected -+ *- ENOMEM -+ * - Insufficient kernel memory was available. -+ *- ENOTTY -+ * - epd is not a valid endpoint descriptor -+ */ -+int scif_fence_wait(scif_epd_t epd, int mark); -+ -+/** -+ * scif_fence_signal - Request a signal on completion of RMAs -+ * \param loff local offset -+ * \param lval local value to write to loffset -+ * \param roff remote offset -+ * \param rval remote value to write to roffset -+ * \param flags flags -+ * -+ * scif_fence_signal() returns after marking the current set of all uncompleted -+ * RMAs initiated through the endpoint epd or marking the current set of all -+ * uncompleted RMAs initiated through the peer of endpoint epd. -+ * -+ * If flags includes SCIF_SIGNAL_LOCAL, then on completion of the RMAs in the -+ * marked set, lval is written to memory at the address corresponding to offset -+ * loff in the local registered address space of epd. loff must be within a -+ * registered window. If flags includes SCIF_SIGNAL_REMOTE, then on completion -+ * of the RMAs in the marked set, rval is written to memory at the * address -+ * corresponding to offset roff in the remote registered address space of epd. -+ * roff must be within a remote registered window of the peer of epd. Note -+ * that any specified offset must be DWORD (4 byte / 32 bit) aligned. -+ * -+ * The flags argument is formed by OR'ing together the following: -+ *- Exactly one of the following values: -+ * - SCIF_FENCE_INIT_SELF: RMA operations initiated through endpoint -+ * epd are marked -+ * - SCIF_FENCE_INIT_PEER: RMA operations initiated through the peer -+ * of endpoint epd are marked -+ *- One or more of the following values: -+ * - SCIF_SIGNAL_LOCAL: On completion of the marked set of RMAs, write lval to -+ * memory at the address corresponding to offset loff in the local registered -+ * address space of epd. -+ * - SCIF_SIGNAL_REMOTE: On completion of the marked set of RMAs, write lval to -+ * memory at the address corresponding to offset roff in the remote registered -+ * address space of epd. -+ * -+ *\return -+ * Upon successful completion, scif_fence_signal() returns 0; otherwise: in -+ * user mode -1 is returned and errno is set to indicate the error; in kernel -+ * mode the negative of one of the following errors is returned. -+ *\par Errors: -+ *- EBADF -+ * - epd is not a valid endpoint descriptor -+ *- ECONNRESET -+ * - A connection was forcibly closed by a peer. -+ *- EINVAL -+ * - epd is not a valid endpoint descriptor, or -+ * - flags is invalid, or -+ * - loff or roff are not DWORD aligned -+ *- ENODEV -+ * - The remote node is lost. -+ *- ENOTCONN -+ * - The endpoint is not connected -+ *- ENOTTY -+ * - epd is not a valid endpoint descriptor -+ *- ENXIO -+ * - loff is invalid for the registered address of epd, or -+ * - roff is invalid for the registered address space, of the peer of epd -+ */ -+int scif_fence_signal(scif_epd_t epd, off_t loff, uint64_t lval, off_t roff, -+uint64_t rval, int flags); -+ -+/** -+ * scif_get_nodeIDs - Return information about online nodes -+ * \param nodes array in which to return online node IDs -+ * \param len number of entries in the nodes array -+ * \param self address to place the node ID of the local node -+ * -+ * scif_get_nodeIDs() fills in the nodes array with up to len node IDs of the -+ * nodes in the SCIF network. If there is not enough space in nodes, as -+ * indicated by the len parameter, only len node IDs are returned in nodes. The -+ * return value of scif_get_nodeID() is the total number of nodes currently in -+ * the SCIF network. By checking the return value against the len parameter, the user may -+ * determine if enough space for nodes was allocated. -+ * -+ * The node ID of the local node is returned at self. -+ * -+ *\return -+ * Upon successful completion, scif_get_nodeIDs() returns the actual number of -+ * online nodes in the SCIF network including 'self'; otherwise: in user mode -+ * -1 is returned and errno is set to indicate the error; in kernel mode no -+ * errors are returned. -+ * -+ *\par Errors: -+ *- EFAULT -+ * - Bad address -+ */ -+int scif_get_nodeIDs(uint16_t *nodes, int len, uint16_t *self); -+ -+ -+/** -+ * scif_pin_pages - Pin a set of pages -+ * \param addr Virtual address of range to pin -+ * \param len Length of range to pin -+ * \param prot_flags Page protection flags -+ * \param map_flags Page classification flags -+ * \param pinned_pages Opaque handle of pinned pages -+ * -+ * scif_pin_pages() pins (locks in physical memory) the physical pages which -+ * back the range of virtual address pages starting at addr and continuing for -+ * len bytes. addr and len are constrained to be multiples of the page size. A -+ * successful scif_register() call returns an opaque pointer value at -+ * pinned_pages which may be used in subsequent calls to -+ * scif_register_pinned_pages(). -+ * -+ * The pages will remain pinned as long as there is a reference against the -+ * scif_pinned_pages_t value returned by scif_pin_pages() and until -+ * scif_unpin_pages() is called, passing the scif_pinned_pages_t value. A -+ * reference is added to a scif_pinned_pages_t value each time a window is -+ * created by calling scif_register_pinned_pages() and passing the -+ * scif_pinned_pages_t value. A reference is removed from a scif_pinned_pages_t value -+ * each time such a window is deleted. -+ * -+ * Subsequent operations which change the memory pages to which virtual -+ * addresses are mapped (such as mmap(), munmap(), scif_mmap() and -+ * scif_munmap()) have no effect on the scif_pinned_pages_t value or windows -+ * created against it. -+ * -+ * On Linux, if the process will fork(), it is recommended that the registered -+ * virtual address range be marked with MADV_DONTFORK. Doing so will prevent -+ * problems due to copy-on-write semantics. -+ * -+ * The prot_flags argument is formed by OR'ing together one or more of the -+ * following values: -+ *- SCIF_PROT_READ: allow read operations against the pages -+ *- SCIF_PROT_WRITE: allow write operations against the pages -+ * The map_flags argument is formed by OR'ing together zero or more of the -+ * following values: -+ *- SCIF_MAP_KERNEL: interpret addr as a kernel space address. By default, addr -+ * is interpreted as a user space address. -+ * -+ *\return -+ * Upon successful completion, scif_register() returns 0; otherwise the -+ * negative of one of the following errors is returned. -+ *\par Errors: -+ *- EFAULT -+ * - Addresses in the range [addr,addr+len-1] are invalid -+ *- EINVAL -+ * - prot_flags is invalid, -+ * - map_flags is invalid, or -+ * - offset is negative -+ *- ENOMEM -+ * - Not enough space -+ */ -+int -+scif_pin_pages( -+ void *addr, -+ size_t len, -+ int prot_flags, -+ int map_flags, -+ scif_pinned_pages_t *pinned_pages); -+ -+/** -+ * scif_unpin_pages - Unpin a set of pages -+ * \param pinned_pages Opaque handle of pages to be unpinned -+ * -+ * scif_unpin_pages() prevents scif_register_pinned_pages()from registering new -+ * windows against pinned_pages. The physical pages represented by pinned_pages -+ * will remain pinned until all windows previously registered against -+ * pinned_pages are deleted (the window is scif_unregister()'d and all -+ * references to the window are removed (see scif_unregister()). -+ * -+ * pinned_pages must have been obtain from a previous call to scif_pin_pages(). -+ * After calling scif_unpin_pages(), it is an error to pass pinned_pages to -+ * scif_register_pinned_pages(). -+ * -+ *\return: -+ * Upon successful completion, scif_unpin_pages() returns 0; otherwise the -+ * negative of one of the following errors is returned. -+ * -+ *\par Errors: -+ *- EINVAL -+ * - pinned_pages is not valid -+ */ -+int -+scif_unpin_pages( -+ scif_pinned_pages_t pinned_pages); -+ -+/** -+ * scif_register_pinned_pages - Mark a memory region for remote access. -+ * \param epd Endpoint descriptor -+ * \param pinned_pages Opaque handle of pinned pages -+ * \param offset Registered address space offset -+ * \param map_flags Flags which control where pages are mapped -+ * -+ * The scif_register_pinned_pages() function opens a window, a range of whole -+ * pages of the registered address space of the endpoint epd, starting at -+ * offset po. The value of po, further described below, is a function of the -+ * parameters offset and pinned_pages, and the value of map_flags. Each page of -+ * the window represents a corresponding physical memory page of the range -+ * represented by pinned_pages; the length of the window is the same as the -+ * length of range represented by pinned_pages. A successful scif_register() -+ * call returns po as the return value. -+ * -+ * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset -+ * exactly, and offset is constrained to be a multiple of the page size. The -+ * mapping established by scif_register() will not replace any existing -+ * registration; an error is returned if any page of the new window would -+ * intersect an existing window. -+ * -+ * When SCIF_MAP_FIXED is not set, the implementation uses offset in an -+ * implementation-defined manner to arrive at po. The po so chosen will be an -+ * area of the registered address space that the implementation deems suitable -+ * for a mapping of the required size. An offset value of 0 is interpreted as -+ * granting the implementation complete freedom in selecting po, subject to -+ * constraints described below. A non-zero value of offset is taken to be a -+ * suggestion of an offset near which the mapping should be placed. When the -+ * implementation selects a value for po, it does not replace any extant -+ * window. In all cases, po will be a multiple of the page size. -+ * -+ * The physical pages which are so represented by a window are available for -+ * access in calls to scif_get_pages(), scif_readfrom(), scif_writeto(), -+ * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the -+ * physical pages represented by the window will not be reused by the memory -+ * subsytem for any other purpose. Note that the same physical page may be -+ * represented by multiple windows. -+ * -+ * Windows created by scif_register_pinned_pages() are unregistered by -+ * scif_unregister(). -+ * -+ * The map_flags argument is formed by OR'ing together zero or more of the -+ * following values: -+ *- SCIF_MAP_FIXED: interpret offset exactly -+ * -+ *\return -+ * Upon successful completion, scif_register_pinned_pages() returns the offset -+ * at which the mapping was placed (po); otherwise the negative of one of the -+ * following errors is returned. -+ *\par Errors: -+ *- EADDRINUSE -+ * - SCIF_MAP_FIXED is set in map_flags and pages in the new -+ * window would intersect an existing window -+ *- EAGAIN -+ * - The mapping could not be performed due to lack of resources -+ *- ECONNRESET -+ * - A connection was forcibly closed by a peer. -+ *- EINVAL -+ * - epd is not a valid endpoint descriptor, or -+ * - map_flags is invalid, or -+ * - SCIF_MAP_FIXED is set in map_flags, and offset is not a -+ * multiple of the page size, or -+ * - offset is negative -+ *- ENODEV -+ * - The remote node is lost. -+ *- ENOMEM -+ * - Not enough space -+ *- ENOTCONN -+ * - The endpoint is not connected -+ */ -+off_t -+scif_register_pinned_pages( -+ scif_epd_t epd, -+ scif_pinned_pages_t pinned_pages, -+ off_t offset, -+ int map_flags); -+ -+/** -+ * scif_get_pages - Add references to remote registered pages -+ * \param epd endpoint descriptor -+ * \param offset registered address space offset -+ * \param len length of range of pages -+ * \param pages returned scif_range structure -+ * -+ * scif_get_pages() returns the addresses of the physical pages represented by -+ * those pages of the registered address space of the peer of epd, starting at -+ * offset and continuing for len bytes. offset and len are constrained to be -+ * multiples of the page size. -+ * -+ * All of the pages in the specified range [offset,offset+len-1] must be within -+ * a single window of the registered address space of the peer of epd. -+ * -+ * The addresses are returned as a virtually contiguous array pointed to by the -+ * phys_addr component of the scif_range structure whose address is returned in -+ * pages. The nr_pages component of scif_range is the length of the array. The -+ * prot_flags component of scif_range holds the protection flag value passed -+ * when the pages were registered. -+ * -+ * Each physical page whose address is returned by scif_get_pages() remains -+ * available and will not be released for reuse until the scif_range structure -+ * is returned in a call to scif_put_pages(). The scif_range structure returned -+ * by scif_get_pages() must be unmodified. -+ * -+ * It is an error to call scif_close() on an endpoint on which a scif_range -+ * structure of that endpoint has not been returned to scif_put_pages(). -+ * -+ *\return -+ * Upon successful completion, scif_get_pages() returns 0; otherwise the -+ * negative of one of the following errors is returned. -+ *\par Errors: -+ *- ECONNRESET -+ * - A connection was forcibly closed by a peer. -+ *- EINVAL -+ * - epd is not a valid endpoint descriptor, or -+ * - offset is not a multiple of the page size, or -+ * - offset is negative, or -+ * - len is not a multiple of the page size -+ *- ENODEV -+ * -The remote node is lost. -+ *- ENOTCONN -+ * - The endpoint is not connected -+ *- ENXIO -+ * - Addresses in the range [offset,offset+len-1] are invalid -+ * for the registered address space of the peer epd. -+ */ -+int scif_get_pages( -+ scif_epd_t epd, -+ off_t offset, -+ size_t len, -+ struct scif_range **pages); -+ -+/** -+ * scif_put_pages - Remove references from remote registered pages -+ * \param pages pages to be returned -+ * -+ * scif_put_pages() releases a scif_range structure previously obtained by -+ * calling scif_get_pages(). The physical pages represented by pages may -+ * be reused when the window which represented those pages is unregistered. -+ * Therefore, those pages must not be accessed after calling scif_put_pages(). -+ * -+ *\return -+ * Upon successful completion, scif_put_pages() returns 0; otherwise the -+ * negative of one of the following errors is returned. -+ *\par Errors: -+ *- EINVAL -+ * - pages does not point to a valid scif_range structure, or -+ * - the scif_range structure pointed to by pages was already returned. -+ *- ENODEV -+ * - The remote node is lost. -+ *- ENOTCONN -+ * - The endpoint is not connected. -+ */ -+int scif_put_pages( -+ struct scif_range *pages); -+ -+/** -+ * scif_poll - Wait for some event on an endpoint -+ * \param epds Array of endpoint descriptors -+ * \param nepds Length of epds -+ * \param timeout Upper limit on time for which scif_poll() will -+ * block -+ * -+ * scif_poll() waits for one of a set of endpoints to become ready to perform -+ * an I/O operation. scif_poll() exposes a subset of the functionality of the -+ * POSIX standard poll() function. -+ * -+ * The epds argument specifies the endpoint descriptors to be examined and the -+ * events of interest for each endpoint descriptor. epds is a pointer to an -+ * array with one member for each open endpoint descriptor of interest. -+ * -+ * The number of items in the epds array is specified in nepds. The epd field -+ * of scif_pollepd is an endpoint descriptor of an open endpoint. The field -+ * events is a bitmask specifying the events which the application is -+ * interested in. The field revents is an output parameter, filled by the -+ * kernel with the events that actually occurred. The bits returned in revents -+ * can include any of those specified in events, or one of the values -+ * SCIF_POLLERR, SCIF_POLLHUP, or SCIF_POLLNVAL. (These three bits are -+ * meaningless in the events field, and will be set in the revents field -+ * whenever the corresponding condition is true.) -+ * -+ * If none of the events requested (and no error) has occurred for any of the -+ * endpoint descriptors, then scif_poll() blocks until one of the events occurs. -+ * -+ * The timeout argument specifies an upper limit on the time for which -+ * scif_poll() will block, in milliseconds. Specifying a negative value in -+ * timeout means an infinite timeout. -+ * -+ * The following bits may be set in events and returned in revents: -+ *- SCIF_POLLIN: Data may be received without blocking. For a connected -+ * endpoint, this means that scif_recv() may be called without blocking. For a -+ * listening endpoint, this means that scif_accept() may be called without -+ * blocking. -+ *- SCIF_POLLOUT: Data may be sent without blocking. For a connected endpoint, -+ * this means that scif_send() may be called without blocking. This bit value -+ * has no meaning for a listening endpoint and is ignored if specified. -+ * -+ * The following bits are only returned in revents, and are ignored if set in -+ * events: -+ *- SCIF_POLLERR: An error occurred on the endpoint -+ *- SCIF_POLLHUP: The connection to the peer endpoint was disconnected -+ *- SCIF_POLLNVAL: The specified endpoint descriptor is invalid. -+ * -+ *\return -+ * Upon successful completion, scif_poll()returns a non-negative value. A -+ * positive value indicates the total number of endpoint descriptors that have -+ * been selected (that is, endpoint descriptors for which the revents member is -+ * non-zero. A value of 0 indicates that the call timed out and no endpoint -+ * descriptors have been selected. Otherwise: in user mode -1 is returned and -+ * errno is set to indicate the error; in kernel mode the negative of one of -+ * the following errors is returned. -+ * -+ *\par Errors: -+ *- EFAULT -+ * - The array given as argument was not contained in the calling program's -+ * address space. -+ *- EINTR -+ * - A signal occurred before any requested event. -+ *- EINVAL -+ * - The nepds argument is greater than {OPEN_MAX} -+ *- ENOMEM -+ * - There was no space to allocate file descriptor tables. -+*/ -+int -+scif_poll( -+ struct scif_pollepd *epds, -+ unsigned int nepds, -+ long timeout); -+ -+/** -+ * scif_event_register - Register an event handler -+ * \param handler Event handler to be registered -+ * -+ * scif_event_register() registers a routine, handler, to be called when some -+ * event occurs. The event parameter to handler indicates the type of event -+ * which has occurred, and the corresponding component of the data parameter to -+ * handler provides additional data about the event. -+ * -+ * The following events are defined: -+ *- SCIF_NODE_ADDED: A node has been added to the SCIF network. The -+ * scif_node_added component of the data parameter to handler identifies the -+ * node. This event is informational. There are no requirements on the event -+ * handler. -+ *- SCIF_NODE_REMOVED: A node is being removed from the SCIF network. The -+ * scif_node_removed component of the data parameter to handler identifies the -+ * node. Upon being called, and before returning, the event handler must -+ * return, using scif_put_pages(), all structures obtained using -+ * scif_get_pages() against an endpoint connected to the lost node. It is -+ * recommended and expected that the handler will also scif_close() all -+ * endpoints connected to the lost node. -+ * -+ *\return -+ * Upon successful completion scif_event_register() returns 0. -+ * -+ *\par Errors: -+ *- ENOMEM -+ * - There was no space to allocate file descriptor tables. -+*/ -+ -+int -+scif_event_register( -+ scif_callback_t handler); -+ -+/** -+ * scif_event_unregister - Unregister event handler -+ * \param handler Event handler to be unregistered -+ * -+ * scif_event_unregister() unregisters the handler which was registered -+ * previously by using scif_event_register(). -+ * -+ * WARNING: scif_event_unregister must be called before the module -+ * (that registered handles) exits for every handler that is registered. -+ * Failure to do so will result in crash of the scif module. -+ * -+ *\return -+ * Upon successful completion scif_event_unregister() returns 0. -+ *\par Errors: -+ *- EINVAL -+ * -If the event handler was not found/registered. -+*/ -+int -+scif_event_unregister( -+ scif_callback_t handler); -+ -+/* -+ * Note: The callee can use pci_resource_start(dev, index) and -+ * pci_resource_len(dev, index) to obtain the PCI resource starting -+ * physical address and length for valid non null indexes of the va -+ * array. MMIO bars will not have IORESOURCE_PREFETCH set in the -+ * flags obtained from pci_resource_flags(dev, index). va[index] -+ * will be set to NULL for invalid resources. -+ */ -+struct scif_pci_info { -+ /* pci_dev pointer associated with a node */ -+ struct pci_dev *pdev; -+ /* Ioremapped virtual address base for every valid PCIe resource */ -+ void __iomem *va[PCI_NUM_RESOURCES]; -+}; -+ -+/** -+ * scif_pci_info - Populate the scif_pci_info structure for a node. -+ * \param node The node to query -+ * \param dev The scif_pci_info structure to populate. -+ * -+ * scif_pci_info() populates the provided scif_pci_info structure -+ * associated with a node. The requested node ID cannot be the same as -+ * the current node. This routine will only return success when called from -+ * the host. -+ * -+ *\return -+ * Upon successful completion, scif_pci_info() returns 0; otherwise the -+ * negative of one of the following errors is returned. -+ * -+ *\par Errors: -+ *- EINVAL -+ * - The requested node is not valid. -+ * - Called on MIC instead of the host. -+ *- ENODEV -+ * - No pci_dev association exists for the node. -+ */ -+int -+scif_pci_info( -+ uint16_t node, -+ struct scif_pci_info *dev); -+ -+ -+#ifdef __cplusplus -+} /* extern "C" */ -+#endif -+ -+#endif /* __SCIF_H__ */ diff --git a/tech-preview/xeon-phi/0015-fix-for-cm-event-handler.patch b/tech-preview/xeon-phi/0015-fix-for-cm-event-handler.patch new file mode 100644 index 0000000..c268dfc --- /dev/null +++ b/tech-preview/xeon-phi/0015-fix-for-cm-event-handler.patch @@ -0,0 +1,158 @@ +diff --git a/drivers/infiniband/ibp/cm/cm_server_msg.c b/drivers/infiniband/ibp/cm/cm_server_msg.c +index df9cde2..c7f8991 100644 +--- a/drivers/infiniband/ibp/cm/cm_server_msg.c ++++ b/drivers/infiniband/ibp/cm/cm_server_msg.c +@@ -90,19 +90,6 @@ void ib_copy_sa_path_rec(struct ibp_sa_path_rec *a, struct ib_sa_path_rec *b) + a->preference = b->preference; + } + +-void cleanup_cm_entry_list(void) +-{ +- struct cm_entry *entry; +- struct cm_entry *next; +- +- down_write(&list_rwsem); +- +- list_for_each_entry_safe(entry, next, &cm_entry_list, list) +- kfree(entry); +- +- up_write(&list_rwsem); +-} +- + static struct cm_entry *find_cm_entry(struct ib_cm_id *cm_id) + { + struct cm_entry *entry; +@@ -122,6 +109,29 @@ out: + return entry; + } + ++static struct cm_entry *remove_cm_entry(struct ib_cm_id *cm_id) ++{ ++ struct cm_entry *entry, *found = NULL; ++ ++ down_write(&list_rwsem); ++ ++ list_for_each_entry(entry, &cm_entry_list, list) ++ if (entry->cm_id == cm_id) { ++ found = entry; ++ break; ++ } ++ ++ if (!found) { ++ print_err("Remove could not find cm id %p\n", cm_id); ++ } else { ++ list_del(&found->list); ++ } ++ ++ up_write(&list_rwsem); ++ ++ return found; ++} ++ + /* find the entry id for the listen cm id so we can add the new cm id + * that is being accepted to the list so it can be found on future events + */ +@@ -129,27 +139,35 @@ static struct cm_entry *find_cm_entry_and_add(struct ib_cm_id *listen_id, + struct ib_cm_id *cm_id) + { + struct cm_entry *entry; +- struct cm_entry *listen_entry; ++ struct cm_entry *listen_entry = NULL; ++ ++ down_write(&list_rwsem); ++ ++ list_for_each_entry(entry, &cm_entry_list, list) ++ if (entry->cm_id == listen_id) { ++ listen_entry = entry; ++ break; ++ } + +- listen_entry = find_cm_entry(listen_id); + if (!listen_entry) { + print_err("Could not find listen id %p\n", listen_id); +- return NULL; ++ goto out; + } + + entry = kzalloc(sizeof(struct cm_entry), GFP_KERNEL); + if (!entry) { + print_err("kzalloc failed\n"); +- return NULL; ++ listen_entry = NULL; ++ goto out; + } + + entry->client = listen_entry->client; + entry->cm_id = cm_id; + +- down_write(&list_rwsem); + list_add(&entry->list, &cm_entry_list); +- up_write(&list_rwsem); + ++out: ++ up_write(&list_rwsem); + return listen_entry; + } + +@@ -292,7 +310,7 @@ static int ibp_event_handler(struct ib_cm_id *cm_id, + data_length + info_length, GFP_KERNEL); + if (!event_work) { + print_err("kzalloc failed\n"); +- return -ENOMEM; ++ return 0; + } + + if (ib_cm_event->event == IB_CM_REQ_RECEIVED) { +@@ -307,8 +325,9 @@ static int ibp_event_handler(struct ib_cm_id *cm_id, + entry = find_cm_entry(cm_id); + + if (!entry) { ++ print_err("event (%d) on non-existent id: %p\n", ib_cm_event->event, cm_id); + kfree(event_work); +- return -EINVAL; ++ return 0; + } + + client = entry->client; +@@ -406,7 +425,7 @@ static int ibp_event_handler(struct ib_cm_id *cm_id, + ib_cm_event->param.send_status; + break; + default: +- print_dbg("event not handled %d\n", ib_cm_event->event); ++ print_err("event not handled %d\n", ib_cm_event->event); + break; + } + +@@ -509,16 +528,13 @@ int ibp_cmd_destroy_cm_id(struct ibp_client *client, struct ibp_msg_header *hdr) + msg = (struct ibp_response_msg *) client->tx_buf; + len = sizeof(*msg); + +- entry = find_cm_entry(cm_id); +- if (!entry) ++ entry = remove_cm_entry(cm_id); ++ if (!entry) { ++ ret = -EINVAL; + goto send_resp; +- +- down_write(&list_rwsem); +- list_del(&entry->list); +- up_write(&list_rwsem); ++ } + + kfree(entry); +- + ib_destroy_cm_id(cm_id); + + send_resp: +diff --git a/drivers/infiniband/ibp/cm/server.h b/drivers/infiniband/ibp/cm/server.h +index a66141b..ebe8461 100644 +--- a/drivers/infiniband/ibp/cm/server.h ++++ b/drivers/infiniband/ibp/cm/server.h +@@ -101,7 +101,6 @@ struct ibp_event { + } while (0) + + int ibp_process_recvs(void *p); +-void cleanup_cm_entry_list(void); + + int ibp_cmd_create_cm_id(struct ibp_client *client, struct ibp_msg_header *hdr); + int ibp_cmd_destroy_cm_id(struct ibp_client *client,