]> git.openfabrics.org - ~ardavis/dapl.git/commitdiff
r3882: uDAPL provider for OpenIB socket CM.
authorJames Lentini <jlentini@netapp.com>
Wed, 26 Oct 2005 21:10:26 +0000 (21:10 +0000)
committerJames Lentini <jlentini@netapp.com>
Wed, 26 Oct 2005 21:10:26 +0000 (21:10 +0000)
Signed-off by: Arlin Davis <ardavis@ichips.intel.com>
Signed-off by: James Lentini <jlentini@netapp.com>

dapl/openib_scm/README [new file with mode: 0644]
dapl/openib_scm/dapl_ib_cm.c [new file with mode: 0644]
dapl/openib_scm/dapl_ib_cq.c [new file with mode: 0644]
dapl/openib_scm/dapl_ib_dto.h [new file with mode: 0644]
dapl/openib_scm/dapl_ib_mem.c [new file with mode: 0644]
dapl/openib_scm/dapl_ib_qp.c [new file with mode: 0644]
dapl/openib_scm/dapl_ib_util.c [new file with mode: 0644]
dapl/openib_scm/dapl_ib_util.h [new file with mode: 0644]
dapl/udapl/Makefile

diff --git a/dapl/openib_scm/README b/dapl/openib_scm/README
new file mode 100644 (file)
index 0000000..239dfe6
--- /dev/null
@@ -0,0 +1,40 @@
+
+OpenIB uDAPL provider using socket-based CM, in leiu of uCM/uAT, to setup QP/channels.
+
+to build:
+
+cd dapl/udapl
+make VERBS=openib_scm clean
+make VERBS=openib_scm
+
+
+Modifications to common code:
+
+- added dapl/openib_scm directory 
+
+       dapl/udapl/Makefile
+
+New files for openib_scm provider
+
+       dapl/openib/dapl_ib_cq.c
+       dapl/openib/dapl_ib_dto.h
+       dapl/openib/dapl_ib_mem.c
+       dapl/openib/dapl_ib_qp.c
+       dapl/openib/dapl_ib_util.c
+       dapl/openib/dapl_ib_util.h
+       dapl/openib/dapl_ib_cm.c
+
+A simple dapl test just for openib_scm testing...
+
+       test/dtest/dtest.c
+       test/dtest/makefile
+
+       server: dtest -s 
+       client: dtest -h hostname
+
+known issues:
+
+       no memory windows support in ibverbs, dat_create_rmr fails.
+       
+
+
diff --git a/dapl/openib_scm/dapl_ib_cm.c b/dapl/openib_scm/dapl_ib_cm.c
new file mode 100644 (file)
index 0000000..11f2dc7
--- /dev/null
@@ -0,0 +1,1074 @@
+/*
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/***************************************************************************
+ *
+ *   Module:            uDAPL
+ *
+ *   Filename:          dapl_ib_cm.c
+ *
+ *   Author:            Arlin Davis
+ *
+ *   Created:           3/10/2005
+ *
+ *   Description: 
+ *
+ *   The uDAPL openib provider - connection management
+ *
+ ****************************************************************************
+ *                Source Control System Information
+ *
+ *    $Id: $
+ *
+ *     Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ **************************************************************************/
+
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_evd_util.h"
+#include "dapl_cr_util.h"
+#include "dapl_name_service.h"
+#include "dapl_ib_util.h"
+
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <netinet/tcp.h>
+#include <sysfs/libsysfs.h>
+
+/* prototypes */
+static uint16_t dapli_get_lid( struct ibv_device *dev, int port );
+
+static DAT_RETURN dapli_socket_connect ( DAPL_EP               *ep_ptr,
+                                        DAT_IA_ADDRESS_PTR     r_addr,
+                                        DAT_CONN_QUAL          r_qual,
+                                        DAT_COUNT              p_size,
+                                        DAT_PVOID              p_data );
+
+static DAT_RETURN dapli_socket_listen ( DAPL_IA                        *ia_ptr,
+                                       DAT_CONN_QUAL           serviceID,
+                                       DAPL_SP                 *sp_ptr );
+
+static DAT_RETURN dapli_socket_accept( ib_cm_srvc_handle_t cm_ptr );
+
+static DAT_RETURN dapli_socket_accept_final(   DAPL_EP         *ep_ptr,
+                                               DAPL_CR         *cr_ptr,
+                                               DAT_COUNT       p_size,
+                                               DAT_PVOID       p_data );
+
+/* XXX temporary hack to get lid */
+static uint16_t dapli_get_lid(IN struct ibv_device *dev, IN int port)
+{
+       char path[128];
+       char val[16];
+       char name[256];
+
+       if (sysfs_get_mnt_path(path, sizeof path)) {
+               fprintf(stderr, "Couldn't find sysfs mount.\n");
+               return 0;
+       }
+       sprintf(name, "%s/class/infiniband/%s/ports/%d/lid", path,
+                ibv_get_device_name(dev), port);
+
+       if (sysfs_read_attribute_value(name, val, sizeof val)) {
+               fprintf(stderr, "Couldn't read LID at %s\n", name);
+               return 0;
+       }
+       return strtol(val, NULL, 0);
+}
+
+/*
+ * ACTIVE: Create socket, connect, and exchange QP information 
+ */
+static DAT_RETURN 
+dapli_socket_connect ( DAPL_EP                 *ep_ptr,
+                       DAT_IA_ADDRESS_PTR      r_addr,
+                       DAT_CONN_QUAL           r_qual,
+                       DAT_COUNT               p_size,
+                       DAT_PVOID               p_data )
+{
+       ib_cm_handle_t  cm_ptr;
+       DAPL_IA         *ia_ptr = ep_ptr->header.owner_ia;
+       int             len, opt = 1;
+       struct iovec    iovec[2];
+       short           rtu_data = htons(0x0E0F);
+       
+       dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect: r_qual %d\n", r_qual);
+                       
+       /*
+        *  Allocate CM and initialize
+        */
+       if ((cm_ptr = dapl_os_alloc(sizeof(*cm_ptr))) == NULL ) {
+               return DAT_INSUFFICIENT_RESOURCES;
+       }
+
+       (void) dapl_os_memzero( cm_ptr, sizeof( *cm_ptr ) );
+       cm_ptr->socket = -1;
+
+       /* create, connect, sockopt, and exchange QP information */
+       if ((cm_ptr->socket = socket(AF_INET,SOCK_STREAM,0)) < 0 ) {
+               dapl_os_free( cm_ptr, sizeof( *cm_ptr ) );
+               return DAT_INSUFFICIENT_RESOURCES;
+       }
+
+       ((struct sockaddr_in*)r_addr)->sin_port = htons(r_qual);
+
+       if ( connect(cm_ptr->socket, r_addr, sizeof(*r_addr)) < 0 ) {
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+                            " connect: %s on r_qual %d\n",
+                            strerror(errno), (unsigned int)r_qual);
+               dapl_os_free( cm_ptr, sizeof( *cm_ptr ) );
+               return DAT_INVALID_ADDRESS;
+       }
+       setsockopt(cm_ptr->socket,IPPROTO_TCP,TCP_NODELAY,&opt,sizeof(opt));
+       
+       /* Send QP info, IA address, and private data */
+       cm_ptr->dst.qpn = ep_ptr->qp_handle->qp_num;
+       cm_ptr->dst.port = ia_ptr->hca_ptr->port_num;
+       cm_ptr->dst.lid = dapli_get_lid( ia_ptr->hca_ptr->ib_trans.ib_dev, 
+                                        ia_ptr->hca_ptr->port_num );
+       cm_ptr->dst.ia_address = ia_ptr->hca_ptr->hca_address;
+       cm_ptr->dst.p_size = p_size;
+       iovec[0].iov_base = &cm_ptr->dst;
+       iovec[0].iov_len  = sizeof(ib_qp_cm_t);
+       if ( p_size ) {
+               iovec[1].iov_base = p_data;
+               iovec[1].iov_len  = p_size;
+       }
+       len = writev( cm_ptr->socket, iovec, (p_size ? 2:1) );
+       if ( len != (p_size + sizeof(ib_qp_cm_t)) ) {
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
+                            " connect write: ERR %s, wcnt=%d\n",
+                            strerror(errno), len); 
+               goto bail;
+       }
+       dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+                    " connect: SRC port=0x%x lid=0x%x, qpn=0x%x, psize=%d\n",
+                    cm_ptr->dst.port, cm_ptr->dst.lid, 
+                    cm_ptr->dst.qpn, cm_ptr->dst.p_size ); 
+
+       /* read DST information into cm_ptr, overwrite SRC info */
+       len = readv( cm_ptr->socket, iovec, 1 );
+       if ( len != sizeof(ib_qp_cm_t) ) {
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
+                            " connect read: ERR %s, rcnt=%d\n",
+                            strerror(errno), len); 
+               goto bail;
+       }
+       dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+                    " connect: DST port=0x%x lid=0x%x, qpn=0x%x, psize=%d\n",
+                    cm_ptr->dst.port, cm_ptr->dst.lid, 
+                    cm_ptr->dst.qpn, cm_ptr->dst.p_size ); 
+
+       /* validate private data size before reading */
+       if ( cm_ptr->dst.p_size > IB_MAX_REP_PDATA_SIZE ) {
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
+                            " connect read: psize (%d) wrong\n",
+                            cm_ptr->dst.p_size ); 
+               goto bail;
+       }
+
+       /* read private data into cm_handle if any present */
+       if ( cm_ptr->dst.p_size ) {
+               iovec[0].iov_base = cm_ptr->p_data;
+               iovec[0].iov_len  = cm_ptr->dst.p_size;
+               len = readv( cm_ptr->socket, iovec, 1 );
+               if ( len != cm_ptr->dst.p_size ) {
+                       dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
+                               " connect read pdata: ERR %s, rcnt=%d\n",
+                               strerror(errno), len); 
+                       goto bail;
+               }
+       }
+
+       /* modify QP to RTR and then to RTS with remote info */
+       if ( dapls_modify_qp_state( ep_ptr->qp_handle, 
+                                   IBV_QPS_RTR, &cm_ptr->dst ) != DAT_SUCCESS )
+               goto bail;
+
+       if ( dapls_modify_qp_state( ep_ptr->qp_handle, 
+                                   IBV_QPS_RTS, &cm_ptr->dst ) != DAT_SUCCESS )
+               goto bail;
+                
+       ep_ptr->qp_state = IB_QP_STATE_RTS;
+
+       /* complete handshake after final QP state change */
+       write(cm_ptr->socket, &rtu_data, sizeof(rtu_data) );
+
+       /* init cm_handle and post the event with private data */
+       ep_ptr->cm_handle = cm_ptr;
+       dapl_dbg_log( DAPL_DBG_TYPE_EP," ACTIVE: connected!\n" ); 
+       dapl_evd_connection_callback(   ep_ptr->cm_handle, 
+                                       IB_CME_CONNECTED, 
+                                       cm_ptr->p_data, 
+                                       ep_ptr );       
+       return DAT_SUCCESS;
+
+bail:
+       /* close socket, free cm structure and post error event */
+       if ( cm_ptr->socket >= 0 ) 
+               close(cm_ptr->socket);
+       dapl_os_free( cm_ptr, sizeof( *cm_ptr ) );
+       dapls_ib_reinit_ep( ep_ptr ); /* reset QP state */
+
+       dapl_evd_connection_callback(   ep_ptr->cm_handle, 
+                                       IB_CME_LOCAL_FAILURE, 
+                                       NULL, 
+                                       ep_ptr );
+       return DAT_INTERNAL_ERROR;
+}
+
+
+/*
+ * PASSIVE: Create socket, listen, accept, exchange QP information 
+ */
+static DAT_RETURN 
+dapli_socket_listen (  DAPL_IA         *ia_ptr,
+                       DAT_CONN_QUAL   serviceID,
+                       DAPL_SP         *sp_ptr )
+{
+       struct sockaddr_in      addr;
+       ib_cm_srvc_handle_t     cm_ptr = NULL;
+       int                     opt = 1;
+       DAT_RETURN              dat_status = DAT_SUCCESS;
+
+       dapl_dbg_log (  DAPL_DBG_TYPE_EP,
+                       " listen(ia_ptr %p ServiceID %d sp_ptr %p)\n",
+                       ia_ptr, serviceID, sp_ptr);
+
+       /* Allocate CM and initialize */
+       if ((cm_ptr = dapl_os_alloc(sizeof(*cm_ptr))) == NULL) 
+               return DAT_INSUFFICIENT_RESOURCES;
+
+       (void) dapl_os_memzero( cm_ptr, sizeof( *cm_ptr ) );
+       
+       cm_ptr->socket = cm_ptr->l_socket = -1;
+       cm_ptr->sp = sp_ptr;
+       cm_ptr->hca_ptr = ia_ptr->hca_ptr;
+       
+       /* bind, listen, set sockopt, accept, exchange data */
+       if ((cm_ptr->l_socket = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
+               dapl_dbg_log (DAPL_DBG_TYPE_ERR, 
+                               "socket for listen returned %d\n", errno);
+               dat_status = DAT_INSUFFICIENT_RESOURCES;
+               goto bail;
+       }
+
+       setsockopt(cm_ptr->l_socket,SOL_SOCKET,SO_REUSEADDR,&opt,sizeof(opt));
+       addr.sin_port        = htons(serviceID);
+       addr.sin_family      = AF_INET;
+       addr.sin_addr.s_addr = INADDR_ANY;
+
+       if (( bind( cm_ptr->l_socket,(struct sockaddr*)&addr, sizeof(addr) ) < 0) ||
+                  (listen( cm_ptr->l_socket, 128 ) < 0) ) {
+       
+               dapl_dbg_log( DAPL_DBG_TYPE_ERR,
+                               " listen: ERROR %s on conn_qual 0x%x\n",
+                               strerror(errno),serviceID); 
+
+               if ( errno == EADDRINUSE )
+                       dat_status = DAT_CONN_QUAL_IN_USE;
+               else
+                       dat_status = DAT_CONN_QUAL_UNAVAILABLE;
+
+               goto bail;
+       }
+       
+       /* set cm_handle for this service point, save listen socket */
+       sp_ptr->cm_srvc_handle = cm_ptr;
+
+       /* add to SP->CR thread list */
+       dapl_llist_init_entry((DAPL_LLIST_ENTRY*)&cm_ptr->entry);
+       dapl_os_lock( &cm_ptr->hca_ptr->ib_trans.lock );
+       dapl_llist_add_tail(&cm_ptr->hca_ptr->ib_trans.list, 
+                           (DAPL_LLIST_ENTRY*)&cm_ptr->entry, cm_ptr);
+       dapl_os_unlock(&cm_ptr->hca_ptr->ib_trans.lock);
+
+       dapl_dbg_log( DAPL_DBG_TYPE_CM,
+                       " listen: qual 0x%x cr %p s_fd %d\n",
+                       ntohs(serviceID), cm_ptr, cm_ptr->l_socket ); 
+       
+       return dat_status;
+bail:
+       dapl_dbg_log( DAPL_DBG_TYPE_ERR,
+                       " listen: ERROR on conn_qual 0x%x\n",serviceID); 
+       if ( cm_ptr->l_socket >= 0 )
+               close( cm_ptr->l_socket );
+       dapl_os_free( cm_ptr, sizeof( *cm_ptr ) );
+       return dat_status;
+}
+
+
+/*
+ * PASSIVE: send local QP information, private data, and wait for 
+ *         active side to respond with QP RTS/RTR status 
+ */
+static DAT_RETURN 
+dapli_socket_accept(ib_cm_srvc_handle_t cm_ptr)
+{
+       ib_cm_handle_t  acm_ptr;
+       void            *p_data = NULL;
+       int             len;
+       DAT_RETURN      dat_status = DAT_SUCCESS;
+               
+       /* Allocate accept CM and initialize */
+       if ((acm_ptr = dapl_os_alloc(sizeof(*acm_ptr))) == NULL) 
+               return DAT_INSUFFICIENT_RESOURCES;
+
+       (void) dapl_os_memzero( acm_ptr, sizeof( *acm_ptr ) );
+       
+       acm_ptr->socket = -1;
+       acm_ptr->sp = cm_ptr->sp;
+       acm_ptr->hca_ptr = cm_ptr->hca_ptr;
+
+       len = sizeof(acm_ptr->dst.ia_address);
+       acm_ptr->socket = accept(cm_ptr->l_socket, 
+                               (struct sockaddr*)&acm_ptr->dst.ia_address, 
+                               &len );
+
+       if ( acm_ptr->socket < 0 ) {
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
+                       " accept: ERR %s on FD %d l_cr %p\n",
+                       strerror(errno),cm_ptr->l_socket,cm_ptr); 
+               dat_status = DAT_INTERNAL_ERROR;
+               goto bail;
+       }
+
+       /* read in DST QP info, IA address. check for private data */
+       len = read( acm_ptr->socket, &acm_ptr->dst, sizeof(ib_qp_cm_t) );
+       if ( len != sizeof(ib_qp_cm_t) ) {
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
+                       " accept read: ERR %s, rcnt=%d\n",
+                       strerror(errno), len); 
+               dat_status = DAT_INTERNAL_ERROR;
+               goto bail;
+
+       }
+       dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+               " accept: DST port=0x%x lid=0x%x, qpn=0x%x, psize=%d\n",
+               acm_ptr->dst.port, acm_ptr->dst.lid, 
+               acm_ptr->dst.qpn, acm_ptr->dst.p_size ); 
+
+       /* validate private data size before reading */
+       if ( acm_ptr->dst.p_size > IB_MAX_REQ_PDATA_SIZE ) {
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
+                       " accept read: psize (%d) wrong\n",
+                       acm_ptr->dst.p_size ); 
+               dat_status = DAT_INTERNAL_ERROR;
+               goto bail;
+       }
+
+       /* read private data into cm_handle if any present */
+       if ( acm_ptr->dst.p_size ) {
+               len = read( acm_ptr->socket, 
+                           acm_ptr->p_data, acm_ptr->dst.p_size );
+               if ( len != acm_ptr->dst.p_size ) {
+                       dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
+                               " accept read pdata: ERR %s, rcnt=%d\n",
+                               strerror(errno), len ); 
+                       dat_status = DAT_INTERNAL_ERROR;
+                       goto bail;
+               }
+               dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+                               " accept: psize=%d read\n",
+                               acm_ptr->dst.p_size); 
+               p_data = acm_ptr->p_data;
+       }
+       
+       /* trigger CR event and return SUCCESS */
+       dapls_cr_callback(  acm_ptr,
+                           IB_CME_CONNECTION_REQUEST_PENDING,
+                           p_data,
+                           acm_ptr->sp );
+
+       return DAT_SUCCESS;
+
+bail:
+       if ( acm_ptr->socket >=0 )
+               close( acm_ptr->socket );
+       dapl_os_free( acm_ptr, sizeof( *acm_ptr ) );
+       return DAT_INTERNAL_ERROR;
+}
+
+
+static DAT_RETURN 
+dapli_socket_accept_final( DAPL_EP             *ep_ptr,
+                          DAPL_CR              *cr_ptr,
+                          DAT_COUNT            p_size,
+                          DAT_PVOID            p_data )
+{
+       DAPL_IA         *ia_ptr = ep_ptr->header.owner_ia;
+       ib_cm_handle_t  cm_ptr = cr_ptr->ib_cm_handle;
+       ib_qp_cm_t      qp_cm;
+       struct iovec    iovec[2];
+       int             len;
+       short           rtu_data = 0;
+
+       if (p_size >  IB_MAX_REP_PDATA_SIZE) 
+               return DAT_LENGTH_ERROR;
+
+       /* must have a accepted socket */
+       if ( cm_ptr->socket < 0 )
+               return DAT_INTERNAL_ERROR;
+       
+       /* modify QP to RTR and then to RTS with remote info already read */
+       if ( dapls_modify_qp_state( ep_ptr->qp_handle, 
+                                   IBV_QPS_RTR, &cm_ptr->dst ) != DAT_SUCCESS )
+               goto bail;
+
+       if ( dapls_modify_qp_state( ep_ptr->qp_handle, 
+                                   IBV_QPS_RTS, &cm_ptr->dst ) != DAT_SUCCESS )
+               goto bail;
+
+       ep_ptr->qp_state = IB_QP_STATE_RTS;
+       
+       /* Send QP info, IA address, and private data */
+       qp_cm.qpn = ep_ptr->qp_handle->qp_num;
+       qp_cm.port = ia_ptr->hca_ptr->port_num;
+       qp_cm.lid = dapli_get_lid( ia_ptr->hca_ptr->ib_trans.ib_dev, 
+                                  ia_ptr->hca_ptr->port_num );
+       qp_cm.ia_address = ia_ptr->hca_ptr->hca_address;
+       qp_cm.p_size = p_size;
+       iovec[0].iov_base = &qp_cm;
+       iovec[0].iov_len  = sizeof(ib_qp_cm_t);
+       if (p_size) {
+               iovec[1].iov_base = p_data;
+               iovec[1].iov_len  = p_size;
+       }
+       len = writev( cm_ptr->socket, iovec, (p_size ? 2:1) );
+       if (len != (p_size + sizeof(ib_qp_cm_t))) {
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
+                            " accept_final: ERR %s, wcnt=%d\n",
+                            strerror(errno), len); 
+               goto bail;
+       }
+       dapl_dbg_log(DAPL_DBG_TYPE_EP, 
+                    " accept_final: SRC port=0x%x lid=0x%x, qpn=0x%x, psize=%d\n",
+                    qp_cm.port, qp_cm.lid, qp_cm.qpn, qp_cm.p_size ); 
+       
+       /* complete handshake after final QP state change */
+       len = read(cm_ptr->socket, &rtu_data, sizeof(rtu_data) );
+       if ( len != sizeof(rtu_data) || ntohs(rtu_data) != 0x0e0f ) {
+               dapl_dbg_log(DAPL_DBG_TYPE_ERR, 
+                            " accept_final: ERR %s, rcnt=%d rdata=%x\n",
+                            strerror(errno), len, ntohs(rtu_data) ); 
+               goto bail;
+       }
+
+       /* final data exchange if remote QP state is good to go */
+       dapl_dbg_log( DAPL_DBG_TYPE_EP," PASSIVE: connected!\n" ); 
+       dapls_cr_callback ( cm_ptr, IB_CME_CONNECTED, NULL, cm_ptr->sp );
+       return DAT_SUCCESS;
+
+bail:
+       dapl_dbg_log( DAPL_DBG_TYPE_ERR," accept_final: ERR !QP_RTR_RTS \n"); 
+       if ( cm_ptr >= 0 )
+               close( cm_ptr->socket );
+       dapl_os_free( cm_ptr, sizeof( *cm_ptr ) );
+       dapls_ib_reinit_ep( ep_ptr ); /* reset QP state */
+
+       return DAT_INTERNAL_ERROR;
+}
+
+
+/*
+ * dapls_ib_connect
+ *
+ * Initiate a connection with the passive listener on another node
+ *
+ * Input:
+ *     ep_handle,
+ *     remote_ia_address,
+ *     remote_conn_qual,
+ *     prd_size                size of private data and structure
+ *     prd_prt                 pointer to private data structure
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INSUFFICIENT_RESOURCES
+ *     DAT_INVALID_PARAMETER
+ *
+ */
+DAT_RETURN
+dapls_ib_connect (
+       IN  DAT_EP_HANDLE               ep_handle,
+       IN  DAT_IA_ADDRESS_PTR          remote_ia_address,
+       IN  DAT_CONN_QUAL               remote_conn_qual,
+       IN  DAT_COUNT                   private_data_size,
+       IN  void                        *private_data )
+{
+       DAPL_EP         *ep_ptr;
+       ib_qp_handle_t  qp_ptr;
+       
+       dapl_dbg_log ( DAPL_DBG_TYPE_EP,
+                       " connect(ep_handle %p ....)\n", ep_handle);
+       /*
+        *  Sanity check
+        */
+       if ( NULL == ep_handle ) 
+               return DAT_SUCCESS;
+
+       ep_ptr = (DAPL_EP*)ep_handle;
+       qp_ptr = ep_ptr->qp_handle;
+
+       return (dapli_socket_connect(   ep_ptr, remote_ia_address, 
+                                       remote_conn_qual,
+                                       private_data_size, private_data ));
+}
+
+/*
+ * dapls_ib_disconnect
+ *
+ * Disconnect an EP
+ *
+ * Input:
+ *     ep_handle,
+ *     disconnect_flags
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *
+ */
+DAT_RETURN
+dapls_ib_disconnect (
+       IN      DAPL_EP                 *ep_ptr,
+       IN      DAT_CLOSE_FLAGS         close_flags )
+{
+       ib_cm_handle_t  cm_ptr = ep_ptr->cm_handle;
+
+       dapl_dbg_log (DAPL_DBG_TYPE_EP,
+                       "dapls_ib_disconnect(ep_handle %p ....)\n",
+                       ep_ptr);
+
+       if ( cm_ptr->socket >= 0 ) {
+               close( cm_ptr->socket );
+               cm_ptr->socket = -1;
+       }
+       
+       /* reinit to modify QP state */
+       dapls_ib_reinit_ep(ep_ptr);
+
+       if ( ep_ptr->cr_ptr ) {
+               dapls_cr_callback ( ep_ptr->cm_handle,
+                                   IB_CME_DISCONNECTED,
+                                   NULL,
+                                   ((DAPL_CR *)ep_ptr->cr_ptr)->sp_ptr );
+       } else {
+               dapl_evd_connection_callback ( ep_ptr->cm_handle,
+                                               IB_CME_DISCONNECTED,
+                                               NULL,
+                                               ep_ptr );
+               ep_ptr->cm_handle = NULL;
+               dapl_os_free( cm_ptr, sizeof( *cm_ptr ) );
+       }       
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_disconnect_clean
+ *
+ * Clean up outstanding connection data. This routine is invoked
+ * after the final disconnect callback has occurred. Only on the
+ * ACTIVE side of a connection.
+ *
+ * Input:
+ *     ep_ptr          DAPL_EP
+ *     active          Indicates active side of connection
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     void
+ *
+ */
+void
+dapls_ib_disconnect_clean (
+       IN  DAPL_EP                     *ep_ptr,
+       IN  DAT_BOOLEAN                 active,
+       IN  const ib_cm_events_t        ib_cm_event )
+{
+    return;
+}
+
+/*
+ * dapl_ib_setup_conn_listener
+ *
+ * Have the CM set up a connection listener.
+ *
+ * Input:
+ *     ibm_hca_handle          HCA handle
+ *     qp_handle                       QP handle
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INSUFFICIENT_RESOURCES
+ *     DAT_INTERNAL_ERROR
+ *     DAT_CONN_QUAL_UNAVAILBLE
+ *     DAT_CONN_QUAL_IN_USE
+ *
+ */
+DAT_RETURN
+dapls_ib_setup_conn_listener (
+       IN  DAPL_IA             *ia_ptr,
+       IN  DAT_UINT64          ServiceID,
+       IN  DAPL_SP             *sp_ptr )
+{
+       return (dapli_socket_listen( ia_ptr, ServiceID, sp_ptr ));
+}
+
+
+/*
+ * dapl_ib_remove_conn_listener
+ *
+ * Have the CM remove a connection listener.
+ *
+ * Input:
+ *     ia_handle               IA handle
+ *     ServiceID               IB Channel Service ID
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INVALID_STATE
+ *
+ */
+DAT_RETURN
+dapls_ib_remove_conn_listener (
+       IN  DAPL_IA             *ia_ptr,
+       IN  DAPL_SP             *sp_ptr )
+{
+       ib_cm_srvc_handle_t     cm_ptr = sp_ptr->cm_srvc_handle;
+
+       dapl_dbg_log (DAPL_DBG_TYPE_EP,
+                       "dapls_ib_remove_conn_listener(ia_ptr %p sp_ptr %p cm_ptr %p)\n",
+                       ia_ptr, sp_ptr, cm_ptr );
+
+       /* close accepted socket, free cm_srvc_handle and return */
+       if ( cm_ptr != NULL ) {
+               if ( cm_ptr->l_socket >= 0 ) {
+                       close( cm_ptr->l_socket );
+                       cm_ptr->socket = -1;
+               }
+               /* cr_thread will free */
+               sp_ptr->cm_srvc_handle = NULL;
+       }
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_accept_connection
+ *
+ * Perform necessary steps to accept a connection
+ *
+ * Input:
+ *     cr_handle
+ *     ep_handle
+ *     private_data_size
+ *     private_data
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INSUFFICIENT_RESOURCES
+ *     DAT_INTERNAL_ERROR
+ *
+ */
+DAT_RETURN
+dapls_ib_accept_connection (
+       IN  DAT_CR_HANDLE       cr_handle,
+       IN  DAT_EP_HANDLE       ep_handle,
+       IN  DAT_COUNT           p_size,
+       IN  const DAT_PVOID     p_data )
+{
+       DAPL_CR                 *cr_ptr;
+       DAPL_EP                 *ep_ptr;
+       
+       dapl_dbg_log (DAPL_DBG_TYPE_EP,
+                     "dapls_ib_accept_connection(cr %p ep %p prd %p,%d)\n",
+                     cr_handle, ep_handle, p_data, p_size  );
+
+       cr_ptr  = (DAPL_CR *) cr_handle;
+       ep_ptr  = (DAPL_EP *) ep_handle;
+       
+       /* allocate and attach a QP if necessary */
+       if ( ep_ptr->qp_state == DAPL_QP_STATE_UNATTACHED ) {
+               DAT_RETURN status;
+               status = dapls_ib_qp_alloc( ep_ptr->header.owner_ia, 
+                                           ep_ptr, ep_ptr );
+               if ( status != DAT_SUCCESS )
+                       return status;
+       }
+    
+       return ( dapli_socket_accept_final(ep_ptr, cr_ptr, p_size, p_data) );
+}
+
+
+/*
+ * dapls_ib_reject_connection
+ *
+ * Reject a connection
+ *
+ * Input:
+ *     cr_handle
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INTERNAL_ERROR
+ *
+ */
+DAT_RETURN
+dapls_ib_reject_connection (
+       IN  ib_cm_handle_t      ib_cm_handle,
+       IN  int                 reject_reason )
+{
+       ib_cm_srvc_handle_t     cm_ptr = ib_cm_handle;
+
+       dapl_dbg_log (DAPL_DBG_TYPE_EP,
+                     "dapls_ib_reject_connection(cm_handle %p reason %x)\n",
+                     ib_cm_handle, reject_reason );
+
+       /* just close the socket and return */
+       if ( cm_ptr->socket > 0 ) {
+               close( cm_ptr->socket );
+               cm_ptr->socket = -1;
+       }
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_cm_remote_addr
+ *
+ * Obtain the remote IP address given a connection
+ *
+ * Input:
+ *     cr_handle
+ *
+ * Output:
+ *     remote_ia_address: where to place the remote address
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INVALID_HANDLE
+ *
+ */
+DAT_RETURN
+dapls_ib_cm_remote_addr (
+       IN      DAT_HANDLE      dat_handle,
+       OUT     DAT_SOCK_ADDR6  *remote_ia_address )
+{
+       DAPL_HEADER     *header;
+       ib_cm_handle_t  ib_cm_handle;
+
+       dapl_dbg_log (DAPL_DBG_TYPE_EP,
+                     "dapls_ib_cm_remote_addr(dat_handle %p, ....)\n",
+                     dat_handle );
+
+       header = (DAPL_HEADER *)dat_handle;
+
+       if (header->magic == DAPL_MAGIC_EP) 
+               ib_cm_handle = ((DAPL_EP *) dat_handle)->cm_handle;
+       else if (header->magic == DAPL_MAGIC_CR) 
+               ib_cm_handle = ((DAPL_CR *) dat_handle)->ib_cm_handle;
+       else 
+               return DAT_INVALID_HANDLE;
+
+       dapl_os_memcpy( remote_ia_address, 
+                       &ib_cm_handle->dst.ia_address, 
+                       sizeof(DAT_SOCK_ADDR6) );
+
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_private_data_size
+ *
+ * Return the size of private data given a connection op type
+ *
+ * Input:
+ *     prd_ptr         private data pointer
+ *     conn_op         connection operation type
+ *
+ * If prd_ptr is NULL, this is a query for the max size supported by
+ * the provider, otherwise it is the actual size of the private data
+ * contained in prd_ptr.
+ *
+ *
+ * Output:
+ *     None
+ *
+ * Returns:
+ *     length of private data
+ *
+ */
+int dapls_ib_private_data_size (
+       IN      DAPL_PRIVATE    *prd_ptr,
+       IN      DAPL_PDATA_OP   conn_op)
+{
+       int  size;
+
+       switch (conn_op)
+       {
+               case DAPL_PDATA_CONN_REQ:
+               {
+                       size = IB_MAX_REQ_PDATA_SIZE;
+                       break;
+               }
+               case DAPL_PDATA_CONN_REP:
+               {
+                       size = IB_MAX_REP_PDATA_SIZE;
+                       break;
+               }
+               case DAPL_PDATA_CONN_REJ:
+               {
+                       size = IB_MAX_REJ_PDATA_SIZE;
+                       break;
+               }
+               case DAPL_PDATA_CONN_DREQ:
+               {
+                       size = IB_MAX_DREQ_PDATA_SIZE;
+                       break;
+               }
+               case DAPL_PDATA_CONN_DREP:
+               {
+                       size = IB_MAX_DREP_PDATA_SIZE;
+                       break;
+               }
+               default:
+               {
+                       size = 0;
+               }
+
+       } /* end case */
+
+       return size;
+}
+
+/*
+ * Map all socket CM event codes to the DAT equivelent.
+ */
+#define DAPL_IB_EVENT_CNT      11
+
+static struct ib_cm_event_map
+{
+       const ib_cm_events_t    ib_cm_event;
+       DAT_EVENT_NUMBER        dat_event_num;
+       } ib_cm_event_map[DAPL_IB_EVENT_CNT] = {
+       /* 00 */  { IB_CME_CONNECTED,   
+                                       DAT_CONNECTION_EVENT_ESTABLISHED}, 
+       /* 01 */  { IB_CME_DISCONNECTED,        
+                                       DAT_CONNECTION_EVENT_DISCONNECTED},
+       /* 02 */  { IB_CME_DISCONNECTED_ON_LINK_DOWN, 
+                                       DAT_CONNECTION_EVENT_DISCONNECTED},
+       /* 03 */  { IB_CME_CONNECTION_REQUEST_PENDING,  
+                                       DAT_CONNECTION_REQUEST_EVENT},
+       /* 04 */  { IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,
+                                       DAT_CONNECTION_REQUEST_EVENT},
+       /* 05 */  { IB_CME_DESTINATION_REJECT,
+                                       DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
+       /* 06 */  { IB_CME_DESTINATION_REJECT_PRIVATE_DATA,             
+                                       DAT_CONNECTION_EVENT_PEER_REJECTED},
+       /* 07 */  { IB_CME_DESTINATION_UNREACHABLE,     
+                                       DAT_CONNECTION_EVENT_UNREACHABLE},
+       /* 08 */  { IB_CME_TOO_MANY_CONNECTION_REQUESTS,
+                                       DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
+       /* 09 */  { IB_CME_LOCAL_FAILURE,
+                                       DAT_CONNECTION_EVENT_BROKEN},
+       /* 10 */  { IB_CM_LOCAL_FAILURE,
+                                       DAT_CONNECTION_EVENT_BROKEN}
+};
+/*
+ * dapls_ib_get_cm_event
+ *
+ * Return a DAT connection event given a provider CM event.
+ *
+ * Input:
+ *     dat_event_num   DAT event we need an equivelent CM event for
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     ib_cm_event of translated DAPL value
+ */
+DAT_EVENT_NUMBER
+dapls_ib_get_dat_event (
+       IN    const ib_cm_events_t      ib_cm_event,
+       IN    DAT_BOOLEAN               active)
+{
+       DAT_EVENT_NUMBER        dat_event_num;
+       int                     i;
+       
+       active = active;
+
+       if (ib_cm_event > IB_CM_LOCAL_FAILURE)
+               return (DAT_EVENT_NUMBER) 0;
+
+       dat_event_num = 0;
+       for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
+               if (ib_cm_event == ib_cm_event_map[i].ib_cm_event) {
+                       dat_event_num = ib_cm_event_map[i].dat_event_num;
+                       break;
+               }
+       }
+       dapl_dbg_log (DAPL_DBG_TYPE_CALLBACK,
+               "dapls_ib_get_dat_event: event translate(%s) ib=0x%x dat=0x%x\n",
+               active ? "active" : "passive",  ib_cm_event, dat_event_num);
+
+       return dat_event_num;
+}
+
+
+/*
+ * dapls_ib_get_dat_event
+ *
+ * Return a DAT connection event given a provider CM event.
+ * 
+ * Input:
+ *     ib_cm_event     event provided to the dapl callback routine
+ *     active          switch indicating active or passive connection
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_EVENT_NUMBER of translated provider value
+ */
+ib_cm_events_t
+dapls_ib_get_cm_event (
+       IN    DAT_EVENT_NUMBER          dat_event_num)
+{
+    ib_cm_events_t     ib_cm_event;
+    int                        i;
+
+    ib_cm_event = 0;
+    for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
+       if ( dat_event_num == ib_cm_event_map[i].dat_event_num ) {
+               ib_cm_event = ib_cm_event_map[i].ib_cm_event;
+               break;
+       }
+    }
+    return ib_cm_event;
+}
+
+/* async CR processing thread to avoid blocking applications */
+void cr_thread(void *arg) 
+{
+    struct dapl_hca    *hca_ptr = arg;
+    ib_cm_srvc_handle_t        cr, next_cr;
+    int                        max_fd;
+    fd_set             rfd,rfds;
+    struct timeval     to;
+     
+    dapl_dbg_log(DAPL_DBG_TYPE_UTIL," cr_thread: ENTER hca %p\n",hca_ptr);
+
+    dapl_os_lock( &hca_ptr->ib_trans.lock );
+    hca_ptr->ib_trans.cr_state = IB_THREAD_RUN;
+    while (hca_ptr->ib_trans.cr_state == IB_THREAD_RUN) {
+       
+       FD_ZERO( &rfds ); 
+       max_fd = -1;
+       
+       if (!dapl_llist_is_empty(&hca_ptr->ib_trans.list))
+            next_cr = dapl_llist_peek_head (&hca_ptr->ib_trans.list);
+       else
+           next_cr = NULL;
+
+       while (next_cr) {
+           cr = next_cr;
+           dapl_dbg_log (DAPL_DBG_TYPE_CM," thread: cm_ptr %p\n", cr );
+           if (cr->l_socket == -1 || 
+               hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) {
+
+               dapl_dbg_log(DAPL_DBG_TYPE_CM," thread: Freeing %p\n", cr);
+               next_cr = dapl_llist_next_entry(&hca_ptr->ib_trans.list,
+                                               (DAPL_LLIST_ENTRY*)&cr->entry );
+               dapl_llist_remove_entry(&hca_ptr->ib_trans.list, 
+                                       (DAPL_LLIST_ENTRY*)&cr->entry);
+               dapl_os_free( cr, sizeof(*cr) );
+               continue;
+           }
+                 
+           FD_SET( cr->l_socket, &rfds ); /* add to select set */
+           if ( cr->l_socket > max_fd )
+               max_fd = cr->l_socket;
+
+           /* individual select poll to check for work */
+           FD_ZERO(&rfd);
+           FD_SET(cr->l_socket, &rfd);
+           dapl_os_unlock(&hca_ptr->ib_trans.lock);    
+           to.tv_sec  = 0;
+           to.tv_usec = 0;
+           if ( select(cr->l_socket + 1,&rfd, NULL, NULL, &to) < 0) {
+               dapl_dbg_log (DAPL_DBG_TYPE_CM,
+                         " thread: ERR %s on cr %p sk %d\n", 
+                         strerror(errno), cr, cr->l_socket);
+               close(cr->l_socket);
+               cr->l_socket = -1;
+           } else if ( FD_ISSET(cr->l_socket, &rfd) && 
+                       dapli_socket_accept(cr)) {
+               close(cr->l_socket);
+               cr->l_socket = -1;
+           }
+           dapl_os_lock( &hca_ptr->ib_trans.lock );
+           next_cr =  dapl_llist_next_entry(&hca_ptr->ib_trans.list,
+                                            (DAPL_LLIST_ENTRY*)&cr->entry );
+       } 
+       dapl_os_unlock( &hca_ptr->ib_trans.lock );
+       to.tv_sec  = 0;
+       to.tv_usec = 100000; /* wakeup and check destroy */
+       select(max_fd + 1, &rfds, NULL, NULL, &to);
+       dapl_os_lock( &hca_ptr->ib_trans.lock );
+    } 
+    dapl_os_unlock( &hca_ptr->ib_trans.lock ); 
+    hca_ptr->ib_trans.cr_state = IB_THREAD_EXIT;
+    dapl_dbg_log(DAPL_DBG_TYPE_UTIL," cr_thread(hca %p) exit\n",hca_ptr);
+}
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ *  tab-width: 8
+ * End:
+ */
diff --git a/dapl/openib_scm/dapl_ib_cq.c b/dapl/openib_scm/dapl_ib_cq.c
new file mode 100644 (file)
index 0000000..c343f4c
--- /dev/null
@@ -0,0 +1,619 @@
+/*
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/***************************************************************************
+ *
+ *   Module:            uDAPL
+ *
+ *   Filename:          dapl_ib_cq.c
+ *
+ *   Author:            Arlin Davis
+ *
+ *   Created:           3/10/2005
+ *
+ *   Description: 
+ *
+ *   The uDAPL openib provider - completion queue
+ *
+ ****************************************************************************
+ *                Source Control System Information
+ *
+ *    $Id: $
+ *
+ *     Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ **************************************************************************/
+
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_lmr_util.h"
+#include "dapl_evd_util.h"
+#include "dapl_ring_buffer_util.h"
+#include <sys/poll.h>
+#include <signal.h>
+
+int dapli_cq_thread_init(struct dapl_hca *hca_ptr)
+{
+        DAT_RETURN dat_status;
+
+        dapl_dbg_log(DAPL_DBG_TYPE_UTIL," cq_thread_init(%p)\n", hca_ptr);
+
+        /* create thread to process inbound connect request */
+       hca_ptr->ib_trans.cq_state = IB_THREAD_INIT;
+        dat_status = dapl_os_thread_create(cq_thread, (void*)hca_ptr, &hca_ptr->ib_trans.cq_thread);
+        if (dat_status != DAT_SUCCESS)
+        {
+                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+                             " cq_thread_init: failed to create thread\n");
+                return 1;
+        }
+       
+       /* wait for thread to start */
+       while (hca_ptr->ib_trans.cq_state != IB_THREAD_RUN) {
+                struct timespec sleep, remain;
+                sleep.tv_sec = 0;
+                sleep.tv_nsec = 20000000; /* 20 ms */
+                dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                             " cq_thread_init: waiting for cq_thread\n");
+                nanosleep (&sleep, &remain);
+        }
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL," cq_thread_init(%d) exit\n",getpid());
+        return 0;
+}
+
+void dapli_cq_thread_destroy(struct dapl_hca *hca_ptr)
+{
+        dapl_dbg_log(DAPL_DBG_TYPE_UTIL," cq_thread_destroy(%p)\n", hca_ptr);
+
+       if (hca_ptr->ib_trans.cq_state != IB_THREAD_RUN)
+               return;
+
+        /* destroy cr_thread and lock */
+        hca_ptr->ib_trans.cq_state = IB_THREAD_CANCEL;
+        pthread_kill(hca_ptr->ib_trans.cq_thread, SIGUSR1);
+        dapl_dbg_log(DAPL_DBG_TYPE_CM," cq_thread_destroy(%p) cancel\n",hca_ptr);
+        while (hca_ptr->ib_trans.cq_state != IB_THREAD_EXIT) {
+                struct timespec sleep, remain;
+                sleep.tv_sec = 0;
+                sleep.tv_nsec = 200000000; /* 200 ms */
+                dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+                             " cq_thread_destroy: waiting for cq_thread\n");
+                nanosleep (&sleep, &remain);
+        }
+        dapl_dbg_log(DAPL_DBG_TYPE_UTIL," cq_thread_destroy(%d) exit\n",getpid());
+}
+
+/* catch the signal */
+static void ib_cq_handler(int signum)
+{
+        return;
+}
+
+void cq_thread( void *arg )
+{
+        struct dapl_hca *hca_ptr = arg;
+        struct dapl_evd *evd_ptr;
+        struct ibv_cq   *ibv_cq = NULL;
+       sigset_t        sigset;
+
+       sigemptyset(&sigset);
+        sigaddset(&sigset,SIGUSR1);
+        pthread_sigmask(SIG_UNBLOCK, &sigset, NULL);
+        signal(SIGUSR1, ib_cq_handler);
+
+       hca_ptr->ib_trans.cq_state = IB_THREAD_RUN;
+       
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL," cq_thread: ENTER hca %p\n",hca_ptr);
+       
+        /* wait on DTO event, or signal to abort */
+        while (hca_ptr->ib_trans.cq_state == IB_THREAD_RUN) {
+                struct pollfd cq_fd = {
+                        .fd      = hca_ptr->ib_trans.ib_cq->fd,
+                        .events  = POLLIN,
+                        .revents = 0
+                };
+               if ((poll(&cq_fd, 1, -1) == 1) &&
+                       (!ibv_get_cq_event(hca_ptr->ib_trans.ib_cq,  
+                                  &ibv_cq, (void*)&evd_ptr))) {
+
+                       if (DAPL_BAD_HANDLE(evd_ptr, DAPL_MAGIC_EVD)) {
+                               ibv_ack_cq_events(ibv_cq, 1);
+                               return;
+                       }
+
+                       /* process DTO event via callback */
+                       dapl_evd_dto_callback ( hca_ptr->ib_hca_handle,
+                                               evd_ptr->ib_cq_handle,
+                                               (void*)evd_ptr );
+
+                       ibv_ack_cq_events(ibv_cq, 1);
+               } 
+        }
+        hca_ptr->ib_trans.cq_state = IB_THREAD_EXIT;
+        dapl_dbg_log(DAPL_DBG_TYPE_UTIL," cq_thread: EXIT: hca %p \n", hca_ptr);
+}
+
+
+/*
+ * Map all verbs DTO completion codes to the DAT equivelent.
+ *
+ * Not returned by verbs:     DAT_DTO_ERR_PARTIAL_PACKET
+ */
+static struct ib_status_map
+{
+    int                                ib_status;
+    DAT_DTO_COMPLETION_STATUS  dat_status;
+} ib_status_map[] = {
+       /* 00 */  { IBV_WC_SUCCESS,             DAT_DTO_SUCCESS},
+       /* 01 */  { IBV_WC_LOC_LEN_ERR,         DAT_DTO_ERR_LOCAL_LENGTH},
+       /* 02 */  { IBV_WC_LOC_QP_OP_ERR,       DAT_DTO_ERR_LOCAL_EP},
+       /* 03 */  { IBV_WC_LOC_EEC_OP_ERR,      DAT_DTO_ERR_TRANSPORT},
+       /* 04 */  { IBV_WC_LOC_PROT_ERR,        DAT_DTO_ERR_LOCAL_PROTECTION},
+       /* 05 */  { IBV_WC_WR_FLUSH_ERR,        DAT_DTO_ERR_FLUSHED},
+       /* 06 */  { IBV_WC_MW_BIND_ERR,         DAT_RMR_OPERATION_FAILED},
+       /* 07 */  { IBV_WC_BAD_RESP_ERR,        DAT_DTO_ERR_BAD_RESPONSE},
+       /* 08 */  { IBV_WC_LOC_ACCESS_ERR,      DAT_DTO_ERR_LOCAL_PROTECTION},
+       /* 09 */  { IBV_WC_REM_INV_REQ_ERR,     DAT_DTO_ERR_REMOTE_RESPONDER},
+       /* 10 */  { IBV_WC_REM_ACCESS_ERR,      DAT_DTO_ERR_REMOTE_ACCESS},
+       /* 11 */  { IBV_WC_REM_OP_ERR,          DAT_DTO_ERR_REMOTE_RESPONDER},
+       /* 12 */  { IBV_WC_RETRY_EXC_ERR,       DAT_DTO_ERR_TRANSPORT},
+       /* 13 */  { IBV_WC_RNR_RETRY_EXC_ERR,   DAT_DTO_ERR_RECEIVER_NOT_READY},
+       /* 14 */  { IBV_WC_LOC_RDD_VIOL_ERR,    DAT_DTO_ERR_LOCAL_PROTECTION},
+       /* 15 */  { IBV_WC_REM_INV_RD_REQ_ERR,  DAT_DTO_ERR_REMOTE_RESPONDER},
+       /* 16 */  { IBV_WC_REM_ABORT_ERR,       DAT_DTO_ERR_REMOTE_RESPONDER},
+       /* 17 */  { IBV_WC_INV_EECN_ERR,        DAT_DTO_ERR_TRANSPORT},
+       /* 18 */  { IBV_WC_INV_EEC_STATE_ERR,   DAT_DTO_ERR_TRANSPORT},
+       /* 19 */  { IBV_WC_FATAL_ERR,           DAT_DTO_ERR_TRANSPORT},
+       /* 20 */  { IBV_WC_RESP_TIMEOUT_ERR,    DAT_DTO_ERR_RECEIVER_NOT_READY},
+       /* 21 */  { IBV_WC_GENERAL_ERR,         DAT_DTO_ERR_TRANSPORT},
+};
+
+/*
+ * dapls_ib_get_dto_status
+ *
+ * Return the DAT status of a DTO operation
+ *
+ * Input:
+ *     cqe_ptr         pointer to completion queue entry
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     Value from ib_status_map table above
+ */
+
+DAT_DTO_COMPLETION_STATUS
+dapls_ib_get_dto_status (
+       IN ib_work_completion_t         *cqe_ptr)
+{
+       uint32_t        ib_status;
+       int             i;
+
+       ib_status = DAPL_GET_CQE_STATUS (cqe_ptr);
+
+       /*
+       * Due to the implementation of verbs completion code, we need to
+       * search the table for the correct value rather than assuming
+       * linear distribution.
+       */
+       for (i = 0; i <= IBV_WC_GENERAL_ERR; i++) {
+               if (ib_status == ib_status_map[i].ib_status) {
+                       if ( ib_status != IBV_WC_SUCCESS ) {
+                               dapl_dbg_log (DAPL_DBG_TYPE_DTO_COMP_ERR,
+                               " DTO completion ERROR: %d: op %#x\n", 
+                               ib_status, DAPL_GET_CQE_OPTYPE (cqe_ptr));
+                       }
+                       return ib_status_map[i].dat_status;
+               }
+       }
+
+       dapl_dbg_log (DAPL_DBG_TYPE_DTO_COMP_ERR,
+                       " DTO completion ERROR: %d: op %#x\n", 
+                       ib_status,
+                       DAPL_GET_CQE_OPTYPE (cqe_ptr));
+
+       return DAT_DTO_FAILURE;
+}
+    
+DAT_RETURN dapls_ib_get_async_event (
+       IN  ib_error_record_t           *err_record,
+       OUT DAT_EVENT_NUMBER            *async_event)
+{
+    DAT_RETURN dat_status = DAT_SUCCESS;
+    int        err_code = err_record->event_type;
+    
+    switch (err_code) {
+       /* OVERFLOW error */
+       case IBV_EVENT_CQ_ERR:
+           *async_event = DAT_ASYNC_ERROR_EVD_OVERFLOW;
+           break;
+       /* INTERNAL errors */
+       case IBV_EVENT_DEVICE_FATAL:
+           *async_event = DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR;
+           break;
+       /* CATASTROPHIC errors */
+       case IBV_EVENT_PORT_ERR:
+           *async_event = DAT_ASYNC_ERROR_IA_CATASTROPHIC;
+           break;
+       /* BROKEN QP error */
+       case IBV_EVENT_SQ_DRAINED:
+       case IBV_EVENT_QP_FATAL:
+       case IBV_EVENT_QP_REQ_ERR:
+       case IBV_EVENT_QP_ACCESS_ERR:
+           *async_event = DAT_ASYNC_ERROR_EP_BROKEN;
+           break;
+
+       /* connection completion */
+       case IBV_EVENT_COMM_EST:
+           *async_event = DAT_CONNECTION_EVENT_ESTABLISHED;
+           break;
+
+       /* TODO: process HW state changes */
+       case IBV_EVENT_PATH_MIG:
+       case IBV_EVENT_PATH_MIG_ERR:
+       case IBV_EVENT_PORT_ACTIVE:
+       case IBV_EVENT_LID_CHANGE:
+       case IBV_EVENT_PKEY_CHANGE:
+       case IBV_EVENT_SM_CHANGE:
+       default:
+           dat_status = DAT_ERROR (DAT_NOT_IMPLEMENTED, 0);
+    }
+    return dat_status;
+}
+
+/*
+ * dapl_ib_cq_alloc
+ *
+ * Alloc a CQ
+ *
+ * Input:
+ *     ia_handle               IA handle
+ *     evd_ptr                 pointer to EVD struct
+ *     cqlen                   minimum QLen
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_cq_alloc (
+       IN  DAPL_IA             *ia_ptr,
+       IN  DAPL_EVD            *evd_ptr,
+       IN  DAT_COUNT           *cqlen )
+{
+       dapl_dbg_log ( DAPL_DBG_TYPE_UTIL, 
+               "dapls_ib_cq_alloc: evd %p cqlen=%d \n", evd_ptr, *cqlen );
+
+       struct ibv_comp_channel *channel = ia_ptr->hca_ptr->ib_trans.ib_cq;
+
+#ifdef CQ_WAIT_OBJECT
+       if (evd_ptr->cq_wait_obj_handle)
+               channel = evd_ptr->cq_wait_obj_handle;
+#endif
+
+       /* Call IB verbs to create CQ */
+       evd_ptr->ib_cq_handle = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
+                                             *cqlen,
+                                             evd_ptr,
+                                             channel, 0);
+       
+       if (evd_ptr->ib_cq_handle == IB_INVALID_HANDLE) 
+               return  DAT_INSUFFICIENT_RESOURCES;
+
+       /* arm cq for events */
+       dapls_set_cq_notify(ia_ptr, evd_ptr);
+       
+        /* update with returned cq entry size */
+       *cqlen = evd_ptr->ib_cq_handle->cqe;
+
+       dapl_dbg_log ( DAPL_DBG_TYPE_UTIL, 
+               "dapls_ib_cq_alloc: new_cq %p cqlen=%d \n", 
+               evd_ptr->ib_cq_handle, *cqlen );
+
+       return DAT_SUCCESS;
+}
+
+
+/*
+ * dapl_ib_cq_resize
+ *
+ * Alloc a CQ
+ *
+ * Input:
+ *     ia_handle               IA handle
+ *     evd_ptr                 pointer to EVD struct
+ *     cqlen                   minimum QLen
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INVALID_PARAMETER
+ *
+ */
+DAT_RETURN
+dapls_ib_cq_resize (
+       IN  DAPL_IA     *ia_ptr,
+       IN  DAPL_EVD    *evd_ptr,
+       IN  DAT_COUNT   *cqlen )
+{
+       ib_cq_handle_t  new_cq;
+       struct ibv_comp_channel *channel = ia_ptr->hca_ptr->ib_trans.ib_cq;
+
+       /* IB verbs doe not support resize. Try to re-create CQ
+        * with new size. Can only be done if QP is not attached. 
+        * destroy EBUSY == QP still attached.
+        */
+
+#ifdef CQ_WAIT_OBJECT
+       if (evd_ptr->cq_wait_obj_handle)
+               channel = evd_ptr->cq_wait_obj_handle;
+#endif
+
+       /* Call IB verbs to create CQ */
+       new_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle, *cqlen,
+                              evd_ptr, channel, 0);
+
+       if (new_cq == IB_INVALID_HANDLE) 
+               return  DAT_INSUFFICIENT_RESOURCES;
+       
+       /* destroy the original and replace if successful */
+       if (ibv_destroy_cq(evd_ptr->ib_cq_handle)) {
+               ibv_destroy_cq(new_cq);
+               return(dapl_convert_errno(errno,"resize_cq"));
+       }
+               
+       /* update EVD with new cq handle and size */
+       evd_ptr->ib_cq_handle = new_cq;
+       *cqlen = new_cq->cqe;
+
+       /* arm cq for events */
+       dapls_set_cq_notify (ia_ptr, evd_ptr);
+
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_cq_free
+ *
+ * destroy a CQ
+ *
+ * Input:
+ *     ia_handle               IA handle
+ *     evd_ptr                 pointer to EVD struct
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INVALID_PARAMETER
+ *
+ */
+DAT_RETURN dapls_ib_cq_free (
+       IN  DAPL_IA             *ia_ptr,
+       IN  DAPL_EVD            *evd_ptr)
+{
+       if ( evd_ptr->ib_cq_handle != IB_INVALID_HANDLE ) {
+               /* copy all entries on CQ to EVD before destroying */   
+               dapls_evd_copy_cq(evd_ptr); 
+               if (ibv_destroy_cq(evd_ptr->ib_cq_handle)) 
+                       return(dapl_convert_errno(errno,"destroy_cq"));
+               evd_ptr->ib_cq_handle = IB_INVALID_HANDLE;
+       }
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapls_set_cq_notify
+ *
+ * Set the CQ notification for next
+ *
+ * Input:
+ *     hca_handl               hca handle
+ *     DAPL_EVD                evd handle
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     dapl_convert_errno 
+ */
+DAT_RETURN dapls_set_cq_notify (
+       IN  DAPL_IA         *ia_ptr,
+       IN  DAPL_EVD        *evd_ptr)
+{
+       if (ibv_req_notify_cq( evd_ptr->ib_cq_handle, 0 ))
+               return(dapl_convert_errno(errno,"notify_cq"));
+       else
+               return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_completion_notify
+ *
+ * Set the CQ notification type
+ *
+ * Input:
+ *     hca_handl               hca handle
+ *     evd_ptr                 evd handle
+ *     type                    notification type
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     dapl_convert_errno
+ */
+DAT_RETURN dapls_ib_completion_notify (
+       IN  ib_hca_handle_t             hca_handle,
+       IN  DAPL_EVD                    *evd_ptr,
+       IN  ib_notification_type_t      type)
+{
+       if (ibv_req_notify_cq( evd_ptr->ib_cq_handle, type ))
+               return(dapl_convert_errno(errno,"notify_cq_type"));
+       else
+               return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_completion_poll
+ *
+ * CQ poll for completions
+ *
+ * Input:
+ *     hca_handl               hca handle
+ *     evd_ptr                 evd handle
+ *     wc_ptr                  work completion
+ *
+ * Output:
+ *     none
+ *
+ * Returns: 
+ *     DAT_SUCCESS
+ *     DAT_QUEUE_EMPTY
+ *     
+ */
+DAT_RETURN dapls_ib_completion_poll (
+       IN  DAPL_HCA                    *hca_ptr,
+       IN  DAPL_EVD                    *evd_ptr,
+       IN  ib_work_completion_t        *wc_ptr)
+{
+       int     ret;
+
+       ret = ibv_poll_cq(evd_ptr->ib_cq_handle, 1, wc_ptr);
+       if (ret == 1) 
+               return  DAT_SUCCESS;
+       
+       return  DAT_QUEUE_EMPTY;
+}
+
+#ifdef CQ_WAIT_OBJECT
+
+/* NEW common wait objects for providers with direct CQ wait objects */
+DAT_RETURN
+dapls_ib_wait_object_create ( 
+               IN DAPL_EVD             *evd_ptr,
+               IN ib_wait_obj_handle_t *p_cq_wait_obj_handle )
+{
+       dapl_dbg_log (  DAPL_DBG_TYPE_CM, 
+                       " cq_object_create: (%p,%p)\n", 
+                       evd_ptr, p_cq_wait_obj_handle );
+
+       /* set cq_wait object to evd_ptr */
+       *p_cq_wait_obj_handle = 
+               ibv_create_comp_channel(evd_ptr->header.owner_ia->hca_ptr->ib_hca_handle);      
+               
+       return DAT_SUCCESS;
+}
+
+DAT_RETURN
+dapls_ib_wait_object_destroy (
+       IN ib_wait_obj_handle_t     p_cq_wait_obj_handle)
+{
+       dapl_dbg_log (  DAPL_DBG_TYPE_UTIL, 
+                       " cq_object_destroy: wait_obj=%p\n", 
+                       p_cq_wait_obj_handle );
+       
+       ibv_destroy_comp_channel(p_cq_wait_obj_handle);
+       
+       return DAT_SUCCESS;
+}
+
+DAT_RETURN
+dapls_ib_wait_object_wakeup (
+       IN ib_wait_obj_handle_t         p_cq_wait_obj_handle)
+{
+       dapl_dbg_log (  DAPL_DBG_TYPE_UTIL, 
+                       " cq_object_wakeup: wait_obj=%p\n", 
+                       p_cq_wait_obj_handle );
+
+        /* no wake up mechanism */
+       return DAT_SUCCESS;
+}
+
+DAT_RETURN
+dapls_ib_wait_object_wait (
+       IN ib_wait_obj_handle_t     p_cq_wait_obj_handle,
+       IN u_int32_t                timeout)
+{
+       struct dapl_evd *evd_ptr;
+       struct ibv_cq   *ibv_cq = NULL;
+       void            *ibv_ctx = NULL;
+       int             status = 0; 
+       int             timeout_ms = -1;
+       struct pollfd cq_fd = {
+                       .fd      = p_cq_wait_obj_handle->fd,
+                       .events  = POLLIN,
+                       .revents = 0
+               };
+
+       dapl_dbg_log ( DAPL_DBG_TYPE_CM, 
+                       " cq_object_wait: CQ channel %p time %d\n", 
+                       p_cq_wait_obj_handle, timeout );
+       
+       /* uDAPL timeout values in usecs */
+       if (timeout != DAT_TIMEOUT_INFINITE)
+               timeout_ms = timeout/1000;
+
+       status = poll(&cq_fd, 1, timeout_ms);
+
+       /* returned event */
+       if (status > 0) {
+               if (!ibv_get_cq_event(p_cq_wait_obj_handle, 
+                                     &ibv_cq, (void*)&evd_ptr)) {
+                       ibv_ack_cq_events(ibv_cq, 1);
+               }
+               status = 0;
+
+       /* timeout */
+       } else if (status == 0) 
+               status = ETIMEDOUT;
+       
+       dapl_dbg_log (DAPL_DBG_TYPE_CM, 
+                     " cq_object_wait: RET evd %p ibv_cq %p ibv_ctx %p %s\n",
+                     evd_ptr, ibv_cq,ibv_ctx,strerror(errno));
+       
+       return(dapl_convert_errno(status,"cq_wait_object_wait"));
+       
+}
+#endif
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ *  tab-width: 8
+ * End:
+ */
+
diff --git a/dapl/openib_scm/dapl_ib_dto.h b/dapl/openib_scm/dapl_ib_dto.h
new file mode 100644 (file)
index 0000000..c019cc8
--- /dev/null
@@ -0,0 +1,262 @@
+/*
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/***************************************************************************
+ *
+ *   Module:            uDAPL
+ *
+ *   Filename:          dapl_ib_dto.h
+ *
+ *   Author:            Arlin Davis
+ *
+ *   Created:           3/10/2005
+ *
+ *   Description: 
+ *
+ *   The uDAPL openib provider - DTO operations and CQE macros 
+ *
+ ****************************************************************************
+ *                Source Control System Information
+ *
+ *    $Id: $
+ *
+ *     Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ **************************************************************************/
+#ifndef _DAPL_IB_DTO_H_
+#define _DAPL_IB_DTO_H_
+
+#include "dapl_ib_util.h"
+
+#define        DEFAULT_DS_ENTRIES      8
+
+STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p);
+
+/*
+ * dapls_ib_post_recv
+ *
+ * Provider specific Post RECV function
+ */
+STATIC _INLINE_ DAT_RETURN 
+dapls_ib_post_recv (
+       IN  DAPL_EP             *ep_ptr,
+       IN  DAPL_COOKIE         *cookie,
+       IN  DAT_COUNT           segments,
+       IN  DAT_LMR_TRIPLET     *local_iov )
+{
+       ib_data_segment_t       ds_array[DEFAULT_DS_ENTRIES];
+       ib_data_segment_t       *ds_array_p;
+       struct ibv_recv_wr      wr;
+       struct ibv_recv_wr      *bad_wr;
+       DAT_COUNT               i, total_len;
+       
+       dapl_dbg_log (DAPL_DBG_TYPE_EP,
+                     " post_rcv: ep %p cookie %p segs %d l_iov %p\n",
+                     ep_ptr, cookie, segments, local_iov);
+
+       if ( segments <= DEFAULT_DS_ENTRIES ) 
+               ds_array_p = ds_array;
+       else
+               ds_array_p = dapl_os_alloc(segments * sizeof(ib_data_segment_t));
+
+       if (NULL == ds_array_p)
+               return (DAT_INSUFFICIENT_RESOURCES);
+       
+       /* setup work request */
+       total_len = 0;
+       wr.next = 0;
+       wr.num_sge = 0;
+       wr.wr_id = (uint64_t)(uintptr_t)cookie;
+       wr.sg_list = ds_array_p;
+
+       for (i = 0; i < segments; i++ ) {
+               if ( !local_iov[i].segment_length )
+                       continue;
+
+               ds_array_p->addr  = (uint64_t) local_iov[i].virtual_address;
+               ds_array_p->length = local_iov[i].segment_length;
+               ds_array_p->lkey  = local_iov[i].lmr_context;
+               
+               dapl_dbg_log (  DAPL_DBG_TYPE_EP, 
+                               " post_rcv: l_key 0x%x va %p len %d\n",
+                               ds_array_p->lkey, ds_array_p->addr, 
+                               ds_array_p->length );
+
+               total_len += ds_array_p->length;
+               wr.num_sge++;
+               ds_array_p++;
+       }
+
+       if (cookie != NULL) 
+               cookie->val.dto.size = total_len;
+
+       if (ibv_post_recv(ep_ptr->qp_handle, &wr, &bad_wr))
+               return( dapl_convert_errno(EFAULT,"ibv_recv") );
+       
+       return DAT_SUCCESS;
+}
+
+
+/*
+ * dapls_ib_post_send
+ *
+ * Provider specific Post SEND function
+ */
+STATIC _INLINE_ DAT_RETURN 
+dapls_ib_post_send (
+    IN  DAPL_EP                        *ep_ptr,
+    IN  ib_send_op_type_t       op_type,
+    IN  DAPL_COOKIE            *cookie,
+    IN  DAT_COUNT              segments,
+    IN  DAT_LMR_TRIPLET                *local_iov,
+    IN  const DAT_RMR_TRIPLET  *remote_iov,
+    IN  DAT_COMPLETION_FLAGS   completion_flags)
+{
+       dapl_dbg_log (DAPL_DBG_TYPE_EP,
+                     " post_snd: ep %p op %d ck %p sgs %d l_iov %p r_iov %p f %d\n",
+                     ep_ptr, op_type, cookie, segments, local_iov, 
+                     remote_iov, completion_flags);
+
+       ib_data_segment_t       ds_array[DEFAULT_DS_ENTRIES];
+       ib_data_segment_t       *ds_array_p;
+       struct ibv_send_wr      wr;
+       struct ibv_send_wr      *bad_wr;
+       ib_hca_transport_t      *ibt_ptr = &ep_ptr->header.owner_ia->hca_ptr->ib_trans;
+       DAT_COUNT               i, total_len;
+       
+       dapl_dbg_log (DAPL_DBG_TYPE_EP,
+                     " post_snd: ep %p cookie %p segs %d l_iov %p\n",
+                     ep_ptr, cookie, segments, local_iov);
+
+       if( segments <= DEFAULT_DS_ENTRIES ) 
+               ds_array_p = ds_array;
+       else
+               ds_array_p = dapl_os_alloc(segments * sizeof(ib_data_segment_t));
+
+       if (NULL == ds_array_p)
+               return (DAT_INSUFFICIENT_RESOURCES);
+       
+       /* setup the work request */
+       wr.next = 0;
+       wr.opcode = op_type;
+       wr.num_sge = 0;
+       wr.send_flags = 0;
+       wr.wr_id = (uint64_t)(uintptr_t)cookie;
+       wr.sg_list = ds_array_p;
+       total_len = 0;
+
+       for (i = 0; i < segments; i++ ) {
+               if ( !local_iov[i].segment_length )
+                       continue;
+
+               ds_array_p->addr  = (uint64_t) local_iov[i].virtual_address;
+               ds_array_p->length = local_iov[i].segment_length;
+               ds_array_p->lkey  = local_iov[i].lmr_context;
+               
+               dapl_dbg_log (  DAPL_DBG_TYPE_EP, 
+                               " post_snd: lkey 0x%x va %p len %d \n",
+                               ds_array_p->lkey, ds_array_p->addr, 
+                               ds_array_p->length );
+
+               total_len += ds_array_p->length;
+               wr.num_sge++;
+               ds_array_p++;
+       }
+
+       if (cookie != NULL) 
+               cookie->val.dto.size = total_len;
+       
+       if ((op_type == OP_RDMA_WRITE) || (op_type == OP_RDMA_READ)) {
+               wr.wr.rdma.remote_addr = remote_iov->target_address;
+               wr.wr.rdma.rkey = remote_iov->rmr_context;
+               dapl_dbg_log (  DAPL_DBG_TYPE_EP, 
+                               " post_snd_rdma: rkey 0x%x va %#016Lx\n",
+                               wr.wr.rdma.rkey, wr.wr.rdma.remote_addr );
+       }
+
+       /* inline data for send or write ops */
+       if ((total_len <= ibt_ptr->max_inline_send ) && 
+          ((op_type == OP_SEND) || (op_type == OP_RDMA_WRITE))) 
+               wr.send_flags |= IBV_SEND_INLINE;
+
+       /* set completion flags in work request */
+       wr.send_flags |= (DAT_COMPLETION_SUPPRESS_FLAG & 
+                               completion_flags) ? 0 : IBV_SEND_SIGNALED;
+       wr.send_flags |= (DAT_COMPLETION_BARRIER_FENCE_FLAG & 
+                               completion_flags) ? IBV_SEND_FENCE : 0;
+       wr.send_flags |= (DAT_COMPLETION_SOLICITED_WAIT_FLAG & 
+                               completion_flags) ? IBV_SEND_SOLICITED : 0;
+
+       dapl_dbg_log (DAPL_DBG_TYPE_EP, 
+                     " post_snd: op 0x%x flags 0x%x sglist %p, %d\n", 
+                       wr.opcode, wr.send_flags, wr.sg_list, wr.num_sge);
+
+       if (ibv_post_send(ep_ptr->qp_handle, &wr, &bad_wr))
+               return( dapl_convert_errno(EFAULT,"ibv_recv") );
+       
+       dapl_dbg_log (DAPL_DBG_TYPE_EP," post_snd: returned\n");
+       return DAT_SUCCESS;
+}
+
+STATIC _INLINE_ DAT_RETURN 
+dapls_ib_optional_prv_dat (
+       IN  DAPL_CR             *cr_ptr,
+       IN  const void          *event_data,
+       OUT   DAPL_CR           **cr_pp)
+{
+    return DAT_SUCCESS;
+}
+
+STATIC _INLINE_ int dapls_cqe_opcode(ib_work_completion_t *cqe_p)
+{
+    switch (cqe_p->opcode) {
+       case IBV_WC_SEND:
+           return (OP_SEND);
+       case IBV_WC_RDMA_WRITE:
+           return (OP_RDMA_WRITE);
+       case IBV_WC_RDMA_READ:
+           return (OP_RDMA_READ);
+       case IBV_WC_COMP_SWAP:
+           return (OP_COMP_AND_SWAP);
+       case IBV_WC_FETCH_ADD:
+           return (OP_FETCH_AND_ADD);
+       case IBV_WC_BIND_MW:
+           return (OP_BIND_MW);
+       case IBV_WC_RECV:
+           return (OP_RECEIVE);
+       case IBV_WC_RECV_RDMA_WITH_IMM:
+           return (OP_RECEIVE_IMM);
+       default:
+           return (OP_INVALID);
+    }
+}
+
+#define DAPL_GET_CQE_OPTYPE(cqe_p)     dapls_cqe_opcode(cqe_p)
+#define DAPL_GET_CQE_WRID(cqe_p)       ((ib_work_completion_t*)cqe_p)->wr_id
+#define DAPL_GET_CQE_STATUS(cqe_p)     ((ib_work_completion_t*)cqe_p)->status
+#define DAPL_GET_CQE_BYTESNUM(cqe_p)   ((ib_work_completion_t*)cqe_p)->byte_len
+#define DAPL_GET_CQE_IMMED_DATA(cqe_p) ((ib_work_completion_t*)cqe_p)->imm_data
+
+#endif /*  _DAPL_IB_DTO_H_ */
diff --git a/dapl/openib_scm/dapl_ib_mem.c b/dapl/openib_scm/dapl_ib_mem.c
new file mode 100644 (file)
index 0000000..de36c0f
--- /dev/null
@@ -0,0 +1,392 @@
+/*
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/**********************************************************************
+ * 
+ * MODULE: dapl_det_mem.c
+ *
+ * PURPOSE: Intel DET APIs: Memory windows, registration,
+ *           and protection domain 
+ *
+ * $Id: $
+ *
+ **********************************************************************/
+
+#include <sys/ioctl.h>  /* for IOCTL's */
+#include <sys/types.h>  /* for socket(2) and related bits and pieces */
+#include <sys/socket.h> /* for socket(2) */
+#include <net/if.h>     /* for struct ifreq */
+#include <net/if_arp.h> /* for ARPHRD_ETHER */
+#include <unistd.h>            /* for _SC_CLK_TCK */
+
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_lmr_util.h"
+
+/*
+ * dapls_convert_privileges
+ *
+ * Convert LMR privileges to provider  
+ *
+ * Input:
+ *     DAT_MEM_PRIV_FLAGS
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     ibv_access_flags
+ *
+ */
+STATIC _INLINE_ int
+dapls_convert_privileges (
+    IN DAT_MEM_PRIV_FLAGS      privileges)
+{
+       int     access = 0;
+
+       /*
+        * if (DAT_MEM_PRIV_LOCAL_READ_FLAG & privileges) do nothing
+        */
+       if (DAT_MEM_PRIV_LOCAL_WRITE_FLAG & privileges)
+               access |= IBV_ACCESS_LOCAL_WRITE;
+       if (DAT_MEM_PRIV_REMOTE_WRITE_FLAG & privileges)
+               access |= IBV_ACCESS_REMOTE_WRITE;
+       if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
+               access |= IBV_ACCESS_REMOTE_READ;
+       if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
+               access |= IBV_ACCESS_REMOTE_READ;
+       if (DAT_MEM_PRIV_REMOTE_READ_FLAG & privileges)
+               access |= IBV_ACCESS_REMOTE_READ;
+
+       return access;
+}
+
+/*
+ * dapl_ib_pd_alloc
+ *
+ * Alloc a PD
+ *
+ * Input:
+ *     ia_handle       IA handle
+ *     pz              pointer to PZ struct
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_pd_alloc (
+       IN  DAPL_IA     *ia_ptr,
+       IN  DAPL_PZ     *pz )
+{
+       /* get a protection domain */
+       pz->pd_handle = ibv_alloc_pd(ia_ptr->hca_ptr->ib_hca_handle);
+       if (!pz->pd_handle) 
+               return(dapl_convert_errno(ENOMEM,"alloc_pd"));
+
+       dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " pd_alloc: pd_handle=%p\n", 
+                    pz->pd_handle );
+
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_pd_free
+ *
+ * Free a PD
+ *
+ * Input:
+ *     ia_handle       IA handle
+ *     PZ_ptr          pointer to PZ struct
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *      DAT_INVALID_STATE
+ *
+ */
+DAT_RETURN
+dapls_ib_pd_free (
+       IN  DAPL_PZ     *pz )
+{
+       if (pz->pd_handle != IB_INVALID_HANDLE) {
+               if (ibv_dealloc_pd(pz->pd_handle))
+                       return(dapl_convert_errno(errno,"dealloc_pd"));
+               pz->pd_handle = IB_INVALID_HANDLE;      
+       }
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_mr_register
+ *
+ * Register a virtual memory region
+ *
+ * Input:
+ *     ia_handle       IA handle
+ *     lmr             pointer to dapl_lmr struct
+ *     virt_addr       virtual address of beginning of mem region
+ *     length          length of memory region
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_mr_register (
+        IN  DAPL_IA                 *ia_ptr,
+        IN  DAPL_LMR                *lmr,
+        IN  DAT_PVOID                virt_addr,
+        IN  DAT_VLEN                length,
+        IN  DAT_MEM_PRIV_FLAGS      privileges)
+{
+       ib_pd_handle_t  ib_pd_handle;
+
+       ib_pd_handle = ((DAPL_PZ *)lmr->param.pz_handle)->pd_handle;
+       
+       dapl_dbg_log (  DAPL_DBG_TYPE_UTIL, 
+                       " mr_register: ia=%p, lmr=%p va=%p ln=%d pv=0x%x\n", 
+                       ia_ptr, lmr, virt_addr, length, privileges );
+
+       /* TODO: shared memory */
+       if (lmr->param.mem_type == DAT_MEM_TYPE_SHARED_VIRTUAL) {
+               dapl_dbg_log( DAPL_DBG_TYPE_ERR,
+                    " mr_register_shared: NOT IMPLEMENTED\n");    
+               return DAT_ERROR (DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);  
+       }
+
+       /* local read is default on IB */ 
+       lmr->mr_handle = 
+               ibv_reg_mr(((DAPL_PZ *)lmr->param.pz_handle)->pd_handle, 
+                           virt_addr, 
+                           length, 
+                           dapls_convert_privileges(privileges));
+
+       if (!lmr->mr_handle) 
+               return(dapl_convert_errno(ENOMEM,"reg_mr"));
+       
+       lmr->param.lmr_context = lmr->mr_handle->lkey; 
+       lmr->param.rmr_context = lmr->mr_handle->rkey;
+       lmr->param.registered_size = length;
+       lmr->param.registered_address = (DAT_VADDR)(uintptr_t) virt_addr;
+
+       dapl_dbg_log (  DAPL_DBG_TYPE_UTIL, 
+                       " mr_register: mr=%p h %x pd %p ctx %p ,lkey=0x%x, rkey=0x%x priv=%x\n", 
+                       lmr->mr_handle, lmr->mr_handle->handle, 
+                       lmr->mr_handle->pd,
+                       lmr->mr_handle->context,
+                       lmr->mr_handle->lkey, 
+                       lmr->mr_handle->rkey, 
+                       length, dapls_convert_privileges(privileges) );
+
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_mr_deregister
+ *
+ * Free a memory region
+ *
+ * Input:
+ *     lmr                     pointer to dapl_lmr struct
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INVALID_STATE
+ *
+ */
+DAT_RETURN
+dapls_ib_mr_deregister (
+       IN  DAPL_LMR    *lmr )
+{
+       if (lmr->mr_handle != IB_INVALID_HANDLE) {
+               if (ibv_dereg_mr(lmr->mr_handle))
+                       return(dapl_convert_errno(errno,"dereg_pd"));
+               lmr->mr_handle = IB_INVALID_HANDLE;
+       }
+       return DAT_SUCCESS;
+}
+
+
+/*
+ * dapl_ib_mr_register_shared
+ *
+ * Register a virtual memory region
+ *
+ * Input:
+ *     ia_ptr          IA handle
+ *     lmr             pointer to dapl_lmr struct
+ *     virt_addr       virtual address of beginning of mem region
+ *     length          length of memory region
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_mr_register_shared (
+       IN  DAPL_IA                 *ia_ptr,
+       IN  DAPL_LMR                *lmr,
+       IN  DAT_MEM_PRIV_FLAGS  privileges )
+{
+    dapl_dbg_log(DAPL_DBG_TYPE_ERR," mr_register_shared: NOT IMPLEMENTED\n");
+    return DAT_ERROR (DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);  
+}
+
+/*
+ * dapls_ib_mw_alloc
+ *
+ * Bind a protection domain to a memory window
+ *
+ * Input:
+ *     rmr     Initialized rmr to hold binding handles
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_mw_alloc (
+       IN  DAPL_RMR    *rmr )
+{
+
+       dapl_dbg_log(DAPL_DBG_TYPE_ERR," mw_alloc: NOT IMPLEMENTED\n");
+       return DAT_ERROR (DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);  
+}
+
+/*
+ * dapls_ib_mw_free
+ *
+ * Release bindings of a protection domain to a memory window
+ *
+ * Input:
+ *     rmr     Initialized rmr to hold binding handles
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INVALID_STATE
+ *
+ */
+DAT_RETURN
+dapls_ib_mw_free (
+       IN  DAPL_RMR    *rmr )
+{      
+       dapl_dbg_log(DAPL_DBG_TYPE_ERR," mw_free: NOT IMPLEMENTED\n");
+       return DAT_ERROR (DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);  
+}
+
+/*
+ * dapls_ib_mw_bind
+ *
+ * Bind a protection domain to a memory window
+ *
+ * Input:
+ *     rmr     Initialized rmr to hold binding handles
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INVALID_PARAMETER;
+ *     DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_mw_bind (
+       IN  DAPL_RMR                    *rmr,
+       IN  DAPL_LMR                    *lmr,
+       IN  DAPL_EP                     *ep,
+       IN  DAPL_COOKIE                 *cookie,
+       IN  DAT_VADDR                   virtual_address,
+       IN  DAT_VLEN                    length,
+       IN  DAT_MEM_PRIV_FLAGS          mem_priv,
+       IN  DAT_BOOLEAN                 is_signaled)
+{
+       dapl_dbg_log(DAPL_DBG_TYPE_ERR," mw_bind: NOT IMPLEMENTED\n");
+       return DAT_ERROR (DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);  
+}
+
+/*
+ * dapls_ib_mw_unbind
+ *
+ * Unbind a protection domain from a memory window
+ *
+ * Input:
+ *     rmr     Initialized rmr to hold binding handles
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INVALID_PARAMETER;
+ *     DAT_INVALID_STATE;
+ *     DAT_INSUFFICIENT_RESOURCES
+ *
+ */
+DAT_RETURN
+dapls_ib_mw_unbind (
+       IN  DAPL_RMR    *rmr,
+       IN  DAPL_EP     *ep,
+       IN  DAPL_COOKIE *cookie,
+       IN  DAT_BOOLEAN is_signaled )
+{
+       dapl_dbg_log(DAPL_DBG_TYPE_ERR," mw_unbind: NOT IMPLEMENTED\n");
+       return DAT_ERROR (DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);  
+}
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ *  tab-width: 8
+ * End:
+ */
+
diff --git a/dapl/openib_scm/dapl_ib_qp.c b/dapl/openib_scm/dapl_ib_qp.c
new file mode 100644 (file)
index 0000000..ba642ec
--- /dev/null
@@ -0,0 +1,398 @@
+/*
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/**********************************************************************
+ *
+ * MODULE: dapl_det_qp.c
+ *
+ * PURPOSE: QP routines for access to DET Verbs
+ *
+ * $Id: $
+ **********************************************************************/
+
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+
+/*
+ * dapl_ib_qp_alloc
+ *
+ * Alloc a QP
+ *
+ * Input:
+ *     *ep_ptr         pointer to EP INFO
+ *     ib_hca_handle   provider HCA handle
+ *     ib_pd_handle    provider protection domain handle
+ *     cq_recv         provider recv CQ handle
+ *     cq_send         provider send CQ handle
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INSUFFICIENT_RESOURCES
+ *     DAT_INTERNAL_ERROR
+ *
+ */
+DAT_RETURN
+dapls_ib_qp_alloc (
+    IN  DAPL_IA                *ia_ptr,
+    IN  DAPL_EP                *ep_ptr,
+    IN  DAPL_EP                *ep_ctx_ptr )
+{
+       DAT_EP_ATTR             *attr;
+       DAPL_EVD                *rcv_evd, *req_evd;
+       ib_cq_handle_t          rcv_cq, req_cq;
+       ib_pd_handle_t          ib_pd_handle;
+       struct ibv_qp_init_attr qp_create;
+                       
+       dapl_dbg_log (DAPL_DBG_TYPE_EP,
+                     " qp_alloc: ia_ptr %p ep_ptr %p ep_ctx_ptr %p\n",
+                     ia_ptr, ep_ptr, ep_ctx_ptr);
+
+       attr = &ep_ptr->param.ep_attr;
+       ib_pd_handle = ((DAPL_PZ *)ep_ptr->param.pz_handle)->pd_handle;
+       rcv_evd = (DAPL_EVD *) ep_ptr->param.recv_evd_handle;
+       req_evd = (DAPL_EVD *) ep_ptr->param.request_evd_handle;
+
+       /* 
+        * DAT allows usage model of EP's with no EVD's but IB does not. 
+        * Create a CQ with zero entries under the covers to support and 
+        * catch any invalid posting. 
+        */
+       if ( rcv_evd != DAT_HANDLE_NULL ) 
+               rcv_cq = rcv_evd->ib_cq_handle;
+       else if (!ia_ptr->hca_ptr->ib_trans.ib_cq_empty) 
+               rcv_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;
+       else {
+               struct ibv_comp_channel *channel = 
+                                       ia_ptr->hca_ptr->ib_trans.ib_cq;
+#ifdef CQ_WAIT_OBJECT
+               if (rcv_evd->cq_wait_obj_handle)
+                       channel = rcv_evd->cq_wait_obj_handle;
+#endif
+               /* Call IB verbs to create CQ */
+               rcv_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
+                                      0, NULL, channel, 0);
+
+               if (rcv_cq == IB_INVALID_HANDLE) 
+                       return(dapl_convert_errno(ENOMEM, "create_cq"));
+
+               ia_ptr->hca_ptr->ib_trans.ib_cq_empty = rcv_cq;
+       }
+       if (req_evd != DAT_HANDLE_NULL) 
+               req_cq = req_evd->ib_cq_handle;
+       else 
+               req_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;
+
+       /* Setup attributes and create qp */
+       dapl_os_memzero((void*)&qp_create, sizeof(qp_create));
+       qp_create.send_cq = req_cq;
+       qp_create.recv_cq = rcv_cq;
+       qp_create.cap.max_send_wr = attr->max_request_dtos;
+       qp_create.cap.max_recv_wr = attr->max_recv_dtos;
+       qp_create.cap.max_send_sge = attr->max_request_iov;
+       qp_create.cap.max_recv_sge = attr->max_recv_iov;
+       qp_create.cap.max_inline_data = ia_ptr->hca_ptr->ib_trans.max_inline_send; 
+       qp_create.qp_type = IBV_QPT_RC;
+       qp_create.qp_context = (void*)ep_ptr;
+
+       ep_ptr->qp_handle = ibv_create_qp( ib_pd_handle, &qp_create);
+       if (!ep_ptr->qp_handle) 
+               return(dapl_convert_errno(ENOMEM, "create_qp"));
+       
+       dapl_dbg_log (  DAPL_DBG_TYPE_EP,
+                       " qp_alloc: qpn %p sq %d,%d rq %d,%d\n", 
+                       ep_ptr->qp_handle->qp_num,
+                       qp_create.cap.max_send_wr,qp_create.cap.max_send_sge,
+                       qp_create.cap.max_recv_wr,qp_create.cap.max_recv_sge );
+
+       /* Setup QP attributes for INIT state on the way out */ 
+       if (dapls_modify_qp_state(ep_ptr->qp_handle,
+                                 IBV_QPS_INIT,
+                                 NULL )  != DAT_SUCCESS ) {
+               ibv_destroy_qp(ep_ptr->qp_handle);              
+               ep_ptr->qp_handle = IB_INVALID_HANDLE;
+               return DAT_INTERNAL_ERROR;
+       }
+
+       ep_ptr->qp_state = IB_QP_STATE_INIT;
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_qp_free
+ *
+ * Free a QP
+ *
+ * Input:
+ *     ia_handle       IA handle
+ *     *ep_ptr         pointer to EP INFO
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *  dapl_convert_errno
+ *
+ */
+DAT_RETURN
+dapls_ib_qp_free (
+    IN  DAPL_IA                *ia_ptr,
+    IN  DAPL_EP                *ep_ptr )
+{
+       dapl_dbg_log (DAPL_DBG_TYPE_EP, " qp_free:  ep_ptr %p qp %p\n", 
+                     ep_ptr, ep_ptr->qp_handle);
+
+       if (ep_ptr->qp_handle != IB_INVALID_HANDLE) {
+               /* force error state to flush queue, then destroy */
+               dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, NULL);
+               
+               if (ibv_destroy_qp(ep_ptr->qp_handle)) 
+                       return(dapl_convert_errno(errno,"destroy_qp"));
+
+               ep_ptr->qp_handle = IB_INVALID_HANDLE;
+               ep_ptr->qp_state = IB_QP_STATE_ERROR;
+       }
+
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapl_ib_qp_modify
+ *
+ * Set the QP to the parameters specified in an EP_PARAM
+ *
+ * The EP_PARAM structure that is provided has been
+ * sanitized such that only non-zero values are valid.
+ *
+ * Input:
+ *     ib_hca_handle           HCA handle
+ *     qp_handle               QP handle
+ *     ep_attr                 Sanitized EP Params
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INSUFFICIENT_RESOURCES
+ *     DAT_INVALID_PARAMETER
+ *
+ */
+DAT_RETURN
+dapls_ib_qp_modify (
+    IN  DAPL_IA                *ia_ptr,
+    IN  DAPL_EP                *ep_ptr,
+    IN  DAT_EP_ATTR    *attr )
+{
+       struct ibv_qp_attr      qp_attr;
+       
+       if (ep_ptr->qp_handle == IB_INVALID_HANDLE)
+               return DAT_INVALID_PARAMETER;
+
+       /* 
+        * EP state, qp_handle state should be an indication
+        * of current state but the only way to be sure is with
+        * a user mode ibv_query_qp call which is NOT available 
+        */
+       
+       /* move to error state if necessary */
+       if ((ep_ptr->qp_state == IB_QP_STATE_ERROR) &&
+           (ep_ptr->qp_handle->state != IBV_QPS_ERR)) {
+               ep_ptr->qp_state = IB_QP_STATE_ERROR;
+               return (dapls_modify_qp_state(ep_ptr->qp_handle, 
+                                             IBV_QPS_ERR, NULL));
+       }
+
+       /*
+        * Check if we have the right qp_state to modify attributes
+        */
+       if ((ep_ptr->qp_handle->state  != IBV_QPS_RTR ) && 
+           (ep_ptr->qp_handle->state  != IBV_QPS_RTS )) 
+               return DAT_INVALID_STATE;
+
+       /* Adjust to current EP attributes */
+       dapl_os_memzero((void*)&qp_attr, sizeof(qp_attr));
+       qp_attr.cap.max_send_wr = attr->max_request_dtos;
+       qp_attr.cap.max_recv_wr = attr->max_recv_dtos;
+       qp_attr.cap.max_send_sge = attr->max_request_iov;
+       qp_attr.cap.max_recv_sge = attr->max_recv_iov;
+
+       dapl_dbg_log (DAPL_DBG_TYPE_EP,
+                     "modify_qp: qp %p sq %d,%d, rq %d,%d\n", 
+                     ep_ptr->qp_handle, 
+                     qp_attr.cap.max_send_wr, qp_attr.cap.max_send_sge, 
+                     qp_attr.cap.max_recv_wr, qp_attr.cap.max_recv_sge );
+
+       if (ibv_modify_qp(ep_ptr->qp_handle, &qp_attr, IBV_QP_CAP)) {
+               dapl_dbg_log (DAPL_DBG_TYPE_ERR,
+                             "modify_qp: modify ep %p qp %p failed\n",
+                             ep_ptr, ep_ptr->qp_handle);
+               return(dapl_convert_errno(errno,"modify_qp_state"));
+       }
+
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_reinit_ep
+ *
+ * Move the QP to INIT state again.
+ *
+ * Input:
+ *     ep_ptr          DAPL_EP
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     void
+ *
+ */
+void
+dapls_ib_reinit_ep (
+       IN  DAPL_EP     *ep_ptr)
+{
+       
+       if ( ep_ptr->qp_handle != IB_INVALID_HANDLE ) {
+               /* move to RESET state and then to INIT */
+               dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_RESET, 0);
+               dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_INIT, 0);
+               ep_ptr->qp_state = IB_QP_STATE_INIT;
+       }
+
+       /* TODO: When IB-CM is implement then handle timewait before 
+        * allowing re-use of this QP
+        */
+}
+
+/* 
+ * Generic QP modify for init, reset, error, RTS, RTR
+ */
+DAT_RETURN
+dapls_modify_qp_state ( IN ib_qp_handle_t      qp_handle,
+                       IN ib_qp_state_t        qp_state,
+                       IN ib_qp_cm_t           *qp_cm )
+{
+       struct ibv_qp_attr      qp_attr;
+       enum ibv_qp_attr_mask   mask = IBV_QP_STATE;
+               
+       dapl_os_memzero((void*)&qp_attr, sizeof(qp_attr));
+       qp_attr.qp_state = qp_state;
+       
+       switch (qp_state) {
+               /* additional attributes with RTR and RTS */
+               case IBV_QPS_RTR:
+               {
+                       mask |= IBV_QP_AV                 |
+                               IBV_QP_PATH_MTU           |
+                               IBV_QP_DEST_QPN           |
+                               IBV_QP_RQ_PSN             |
+                               IBV_QP_MAX_DEST_RD_ATOMIC |
+                               IBV_QP_MIN_RNR_TIMER;
+                       qp_attr.qp_state                = IBV_QPS_RTR;
+                       qp_attr.path_mtu                = IBV_MTU_1024;
+                       qp_attr.dest_qp_num             = qp_cm->qpn;
+                       qp_attr.rq_psn                  = 1;
+                       qp_attr.max_dest_rd_atomic      = 8;
+                       qp_attr.min_rnr_timer           = 12;
+                       qp_attr.ah_attr.is_global       = 0;
+                       qp_attr.ah_attr.dlid            = qp_cm->lid;
+                       qp_attr.ah_attr.sl              = 0;
+                       qp_attr.ah_attr.src_path_bits   = 0;
+                       qp_attr.ah_attr.port_num        = qp_cm->port;
+
+                       dapl_dbg_log (DAPL_DBG_TYPE_EP,
+                             " modify_qp_rtr: qpn %x lid %x port %x\n",
+                             qp_cm->qpn,qp_cm->lid,qp_cm->port );
+                       break;
+               }               
+               case IBV_QPS_RTS: 
+               {
+                       mask |= IBV_QP_TIMEOUT            |
+                               IBV_QP_RETRY_CNT          |
+                               IBV_QP_RNR_RETRY          |
+                               IBV_QP_SQ_PSN             |
+                               IBV_QP_MAX_QP_RD_ATOMIC;
+                       qp_attr.qp_state        = IBV_QPS_RTS;
+                       qp_attr.timeout         = 14;
+                       qp_attr.retry_cnt       = 7;
+                       qp_attr.rnr_retry       = 7;
+                       qp_attr.sq_psn          = 1;
+                       qp_attr.max_rd_atomic   = 8;
+                       dapl_dbg_log (DAPL_DBG_TYPE_EP,
+                             " modify_qp_rts: psn %x or %x\n",
+                             qp_attr.sq_psn, qp_attr.max_rd_atomic );
+                       break;
+               }
+               case IBV_QPS_INIT: 
+               {
+                       DAPL_IA *ia_ptr;
+                       DAPL_EP *ep_ptr; 
+                       /* need to find way back to port num */
+                       ep_ptr = (DAPL_EP*)qp_handle->qp_context;
+                       if (ep_ptr)
+                               ia_ptr = ep_ptr->header.owner_ia;
+                       else
+                               break;
+
+                       mask |= IBV_QP_PKEY_INDEX       |
+                               IBV_QP_PORT             |
+                               IBV_QP_ACCESS_FLAGS;
+
+                       qp_attr.pkey_index  = 0;
+                       qp_attr.port_num = ia_ptr->hca_ptr->port_num;
+                       qp_attr.qp_access_flags = 
+                                       IBV_ACCESS_LOCAL_WRITE |
+                                       IBV_ACCESS_REMOTE_WRITE |
+                                       IBV_ACCESS_REMOTE_READ |
+                                       IBV_ACCESS_REMOTE_ATOMIC;
+                       
+                       dapl_dbg_log (DAPL_DBG_TYPE_EP,
+                               " modify_qp_init: pi %x port %x acc %x\n",
+                               qp_attr.pkey_index, qp_attr.port_num,
+                               qp_attr.qp_access_flags );
+                       break;
+               }
+               default:
+                       break;
+               
+       }
+
+       if (ibv_modify_qp(qp_handle, &qp_attr, mask))
+               return(dapl_convert_errno(errno,"modify_qp_state"));
+       
+       return DAT_SUCCESS;
+}
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ *  tab-width: 8
+ * End:
+ */
diff --git a/dapl/openib_scm/dapl_ib_util.c b/dapl/openib_scm/dapl_ib_util.c
new file mode 100644 (file)
index 0000000..68eb3b0
--- /dev/null
@@ -0,0 +1,472 @@
+/*
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/***************************************************************************
+ *
+ *   Module:            uDAPL
+ *
+ *   Filename:          dapl_ib_util.c
+ *
+ *   Author:            Arlin Davis
+ *
+ *   Created:           3/10/2005
+ *
+ *   Description: 
+ *
+ *   The uDAPL openib provider - init, open, close, utilities
+ *
+ ****************************************************************************
+ *                Source Control System Information
+ *
+ *    $Id: $
+ *
+ *     Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ **************************************************************************/
+#ifdef RCSID
+static const char rcsid[] = "$Id:  $";
+#endif
+
+#include "dapl.h"
+#include "dapl_adapter_util.h"
+#include "dapl_ib_util.h"
+
+#include <stdlib.h>
+#include <netinet/tcp.h>
+#include <sys/utsname.h>
+#include <unistd.h>    
+#include <fcntl.h>
+
+int g_dapl_loopback_connection = 0;
+
+/* just get IP address for hostname */
+DAT_RETURN getipaddr( char *addr, int addr_len)
+{
+       struct sockaddr_in      *ipv4_addr = (struct sockaddr_in*)addr;
+       struct hostent          *h_ptr;
+       struct utsname          ourname;
+
+       if ( uname( &ourname ) < 0 ) 
+               return DAT_INTERNAL_ERROR;
+
+       h_ptr = gethostbyname( ourname.nodename );
+       if ( h_ptr == NULL ) 
+               return DAT_INTERNAL_ERROR;
+
+       if ( h_ptr->h_addrtype == AF_INET ) {
+               ipv4_addr = (struct sockaddr_in*) addr;
+               ipv4_addr->sin_family = AF_INET;
+               dapl_os_memcpy( &ipv4_addr->sin_addr, h_ptr->h_addr_list[0], 4 );
+       } else 
+               return DAT_INVALID_ADDRESS;
+
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_init, dapls_ib_release
+ *
+ * Initialize Verb related items for device open
+ *
+ * Input:
+ *     none
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     0 success, -1 error
+ *
+ */
+int32_t dapls_ib_init (void)
+{      
+       return 0;
+}
+
+int32_t dapls_ib_release (void)
+{
+       return 0;
+}
+
+/*
+ * dapls_ib_open_hca
+ *
+ * Open HCA
+ *
+ * Input:
+ *      *hca_name         pointer to provider device name
+ *      *ib_hca_handle_p  pointer to provide HCA handle
+ *
+ * Output:
+ *      none
+ *
+ * Return:
+ *      DAT_SUCCESS
+ *      dapl_convert_errno
+ *
+ */
+DAT_RETURN dapls_ib_open_hca (
+        IN   IB_HCA_NAME       hca_name,
+        IN   DAPL_HCA          *hca_ptr)
+{
+       struct dlist    *dev_list;
+       int             opts;
+       DAT_RETURN      dat_status = DAT_SUCCESS;
+
+       dapl_dbg_log (DAPL_DBG_TYPE_UTIL, 
+                     " open_hca: %s - %p\n", hca_name, hca_ptr );
+
+       /* Get list of all IB devices, find match, open */
+       dev_list = ibv_get_devices();
+       dlist_start(dev_list);
+       dlist_for_each_data(dev_list,hca_ptr->ib_trans.ib_dev,struct ibv_device) {
+               if (!strcmp(ibv_get_device_name(hca_ptr->ib_trans.ib_dev),hca_name))
+                       break;
+       }
+
+       if (!hca_ptr->ib_trans.ib_dev) {
+               dapl_dbg_log (DAPL_DBG_TYPE_ERR,
+                             " open_hca: IB device %s not found\n",
+                             hca_name);
+               return DAT_INTERNAL_ERROR;
+       }
+       
+       dapl_dbg_log (DAPL_DBG_TYPE_UTIL," open_hca: Found dev %s %016llx\n", 
+                       ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+                       (unsigned long long)bswap_64(ibv_get_device_guid(hca_ptr->ib_trans.ib_dev)));
+
+       hca_ptr->ib_hca_handle = ibv_open_device(hca_ptr->ib_trans.ib_dev);
+       if (!hca_ptr->ib_hca_handle) {
+                dapl_dbg_log (DAPL_DBG_TYPE_ERR, 
+                               " open_hca: IB dev open failed for %s\n", 
+                               ibv_get_device_name(hca_ptr->ib_trans.ib_dev) );
+               return DAT_INTERNAL_ERROR;
+       }
+
+       /* set inline max with enviroment or default */
+       hca_ptr->ib_trans.max_inline_send = 
+               dapl_os_get_env_val ( "DAPL_MAX_INLINE", INLINE_SEND_DEFAULT );
+
+       /* initialize cq_lock */
+       dat_status = dapl_os_lock_init(&hca_ptr->ib_trans.cq_lock);
+       if (dat_status != DAT_SUCCESS)
+       {
+               dapl_dbg_log (DAPL_DBG_TYPE_ERR, 
+                       " open_hca: failed to init cq_lock\n");
+               goto bail;
+       }
+
+       /* EVD events without direct CQ channels, non-blocking */
+       hca_ptr->ib_trans.ib_cq = 
+               ibv_create_comp_channel(hca_ptr->ib_hca_handle);
+       opts = fcntl(hca_ptr->ib_trans.ib_cq->fd, F_GETFL); /* uCQ */
+       if (opts < 0 || fcntl(hca_ptr->ib_trans.ib_cq->fd, 
+                             F_SETFL, opts | O_NONBLOCK) < 0) {
+               dapl_dbg_log (DAPL_DBG_TYPE_ERR, 
+                             " open_hca: ERR with CQ FD\n" );
+               goto bail;
+       }
+
+       if (dapli_cq_thread_init(hca_ptr)) {
+                dapl_dbg_log (DAPL_DBG_TYPE_ERR,
+                              " open_hca: cq_thread_init failed for %s\n",
+                              ibv_get_device_name(hca_ptr->ib_trans.ib_dev) );
+                goto bail;
+        }
+
+       /* initialize cr_list lock */
+       dat_status = dapl_os_lock_init(&hca_ptr->ib_trans.lock);
+       if (dat_status != DAT_SUCCESS)
+       {
+               dapl_dbg_log (DAPL_DBG_TYPE_ERR, 
+                               " open_hca: failed to init lock\n");
+               goto bail;
+       }
+
+       /* initialize CM list for listens on this HCA */
+       dapl_llist_init_head(&hca_ptr->ib_trans.list);
+
+       /* create thread to process inbound connect request */
+       hca_ptr->ib_trans.cr_state = IB_THREAD_INIT;
+       dat_status = dapl_os_thread_create(cr_thread, 
+                                          (void*)hca_ptr, 
+                                          &hca_ptr->ib_trans.thread );
+       if (dat_status != DAT_SUCCESS)
+       {
+               dapl_dbg_log (DAPL_DBG_TYPE_ERR, 
+                               " open_hca: failed to create thread\n");
+               goto bail;
+       }
+       
+       /* wait for thread */
+       while (hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) {
+               struct timespec sleep, remain;
+               sleep.tv_sec = 0;
+               sleep.tv_nsec = 20000000; /* 20 ms */
+               dapl_dbg_log(DAPL_DBG_TYPE_UTIL, 
+                            " open_hca: waiting for cr_thread\n");
+               nanosleep (&sleep, &remain);
+       }
+
+       /* get the IP address of the device */
+       dat_status = getipaddr((char*)&hca_ptr->hca_address, 
+                               sizeof(DAT_SOCK_ADDR6) );
+       dapl_dbg_log (DAPL_DBG_TYPE_UTIL, 
+               " open_hca: %s, port %d, %s  %d.%d.%d.%d\n", 
+               ibv_get_device_name(hca_ptr->ib_trans.ib_dev), hca_ptr->port_num,
+               ((struct sockaddr_in *)&hca_ptr->hca_address)->sin_family == AF_INET ?  "AF_INET":"AF_INET6",
+               ((struct sockaddr_in *)&hca_ptr->hca_address)->sin_addr.s_addr >> 0 & 0xff,
+               ((struct sockaddr_in *)&hca_ptr->hca_address)->sin_addr.s_addr >> 8 & 0xff,
+               ((struct sockaddr_in *)&hca_ptr->hca_address)->sin_addr.s_addr >> 16 & 0xff,
+               ((struct sockaddr_in *)&hca_ptr->hca_address)->sin_addr.s_addr >> 24 & 0xff );
+
+       return dat_status;
+bail:
+       ibv_close_device(hca_ptr->ib_hca_handle); 
+       hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
+       return DAT_INTERNAL_ERROR;
+}
+
+
+/*
+ * dapls_ib_close_hca
+ *
+ * Open HCA
+ *
+ * Input:
+ *      DAPL_HCA   provide CA handle
+ *
+ * Output:
+ *      none
+ *
+ * Return:
+ *      DAT_SUCCESS
+ *     dapl_convert_errno 
+ *
+ */
+DAT_RETURN dapls_ib_close_hca (        IN   DAPL_HCA   *hca_ptr )
+{
+       dapl_dbg_log (DAPL_DBG_TYPE_UTIL," close_hca: %p\n",hca_ptr);
+
+       dapli_cq_thread_destroy(hca_ptr);
+
+       if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) {
+               if (ibv_close_device(hca_ptr->ib_hca_handle)) 
+                       return(dapl_convert_errno(errno,"ib_close_device"));
+               hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
+       }
+
+       dapl_os_lock_destroy(&hca_ptr->ib_trans.cq_lock);
+
+       /* destroy cr_thread and lock */
+       hca_ptr->ib_trans.cr_state = IB_THREAD_CANCEL;
+       while (hca_ptr->ib_trans.cr_state != IB_THREAD_EXIT) {
+               struct timespec sleep, remain;
+               sleep.tv_sec = 0;
+               sleep.tv_nsec = 20000000; /* 20 ms */
+               dapl_dbg_log(DAPL_DBG_TYPE_UTIL, 
+                            " close_hca: waiting for cr_thread\n");
+               nanosleep (&sleep, &remain);
+       }
+       dapl_os_lock_destroy(&hca_ptr->ib_trans.lock);
+
+       return (DAT_SUCCESS);
+}
+  
+/*
+ * dapls_ib_query_hca
+ *
+ * Query the hca attribute
+ *
+ * Input:
+ *     hca_handl               hca handle      
+ *     ia_attr                 attribute of the ia
+ *     ep_attr                 attribute of the ep
+ *     ip_addr                 ip address of DET NIC
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INVALID_HANDLE
+ */
+
+DAT_RETURN dapls_ib_query_hca (
+       IN  DAPL_HCA                       *hca_ptr,
+       OUT DAT_IA_ATTR                    *ia_attr,
+       OUT DAT_EP_ATTR                    *ep_attr,
+       OUT DAT_SOCK_ADDR6                 *ip_addr)
+{
+       struct ibv_device_attr  dev_attr;
+       struct ibv_port_attr    port_attr;
+
+       if (hca_ptr->ib_hca_handle == NULL) {
+               dapl_dbg_log (DAPL_DBG_TYPE_ERR," query_hca: BAD handle\n");
+               return (DAT_INVALID_HANDLE);
+       }
+
+       /* local IP address of device, set during ia_open */
+       if (ip_addr != NULL)
+               memcpy(ip_addr, &hca_ptr->hca_address, sizeof(DAT_SOCK_ADDR6));
+       
+       if (ia_attr == NULL && ep_attr == NULL) 
+               return DAT_SUCCESS;
+
+       /* query verbs for this device and port attributes */   
+       if (ibv_query_device(hca_ptr->ib_hca_handle, &dev_attr) ||
+                            ibv_query_port(hca_ptr->ib_hca_handle, 
+                                           hca_ptr->port_num, &port_attr))
+               return(dapl_convert_errno(errno,"ib_query_hca"));
+
+       if (ia_attr != NULL) {
+               ia_attr->adapter_name[DAT_NAME_MAX_LENGTH - 1] = '\0';
+               ia_attr->vendor_name[DAT_NAME_MAX_LENGTH - 1] = '\0';
+               ia_attr->ia_address_ptr = (DAT_IA_ADDRESS_PTR)&hca_ptr->hca_address;
+
+               dapl_dbg_log (DAPL_DBG_TYPE_UTIL, 
+                       " query_hca: %s %s  %d.%d.%d.%d\n", 
+                       ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+                       ((struct sockaddr_in *)ia_attr->ia_address_ptr)->sin_family == AF_INET ?  "AF_INET":"AF_INET6",
+                       ((struct sockaddr_in *)ia_attr->ia_address_ptr)->sin_addr.s_addr >> 0 & 0xff,
+                       ((struct sockaddr_in *)ia_attr->ia_address_ptr)->sin_addr.s_addr >> 8 & 0xff,
+                       ((struct sockaddr_in *)ia_attr->ia_address_ptr)->sin_addr.s_addr >> 16 & 0xff,
+                       ((struct sockaddr_in *)ia_attr->ia_address_ptr)->sin_addr.s_addr >> 24 & 0xff );
+               
+               ia_attr->hardware_version_major   = dev_attr.hw_ver;
+               /* ia_attr->hardware_version_minor   = dev_attr.fw_ver; */
+               ia_attr->max_eps                  = dev_attr.max_qp;
+               ia_attr->max_dto_per_ep           = dev_attr.max_qp_wr;
+               ia_attr->max_rdma_read_per_ep     = dev_attr.max_qp_rd_atom;
+               ia_attr->max_evds                 = dev_attr.max_cq;
+               ia_attr->max_evd_qlen             = dev_attr.max_cqe;
+               ia_attr->max_iov_segments_per_dto = dev_attr.max_sge;
+               ia_attr->max_lmrs                 = dev_attr.max_mr;
+               ia_attr->max_lmr_block_size       = dev_attr.max_mr_size;
+               ia_attr->max_rmrs                 = dev_attr.max_mw;
+               ia_attr->max_lmr_virtual_address  = dev_attr.max_mr_size;
+               ia_attr->max_rmr_target_address   = dev_attr.max_mr_size;
+               ia_attr->max_pzs                  = dev_attr.max_pd;
+               ia_attr->max_mtu_size             = port_attr.max_msg_sz;
+               ia_attr->max_rdma_size            = port_attr.max_msg_sz;
+               ia_attr->num_transport_attr       = 0;
+               ia_attr->transport_attr           = NULL;
+               ia_attr->num_vendor_attr          = 0;
+               ia_attr->vendor_attr              = NULL;
+
+               dapl_dbg_log (DAPL_DBG_TYPE_UTIL, 
+                       " query_hca: (%x.%x) ep %d ep_q %d evd %d evd_q %d\n", 
+                       ia_attr->hardware_version_major,
+                       ia_attr->hardware_version_minor,
+                       ia_attr->max_eps, ia_attr->max_dto_per_ep,
+                       ia_attr->max_evds, ia_attr->max_evd_qlen );
+               dapl_dbg_log (DAPL_DBG_TYPE_UTIL, 
+                       " query_hca: msg %llu rdma %llu iov %d lmr %d rmr %d\n", 
+                       ia_attr->max_mtu_size, ia_attr->max_rdma_size,
+                       ia_attr->max_iov_segments_per_dto, ia_attr->max_lmrs, 
+                       ia_attr->max_rmrs );
+
+       }
+       
+       if (ep_attr != NULL) {
+               ep_attr->max_mtu_size     = port_attr.max_msg_sz;
+               ep_attr->max_rdma_size    = port_attr.max_msg_sz;
+               ep_attr->max_recv_dtos    = dev_attr.max_qp_wr;
+               ep_attr->max_request_dtos = dev_attr.max_qp_wr;
+               ep_attr->max_recv_iov     = dev_attr.max_sge;
+               ep_attr->max_request_iov  = dev_attr.max_sge;
+               ep_attr->max_rdma_read_in = dev_attr.max_qp_rd_atom;
+               ep_attr->max_rdma_read_out= dev_attr.max_qp_rd_atom;
+               dapl_dbg_log (DAPL_DBG_TYPE_UTIL, 
+                       " query_hca: MAX msg %llu dto %d iov %d rdma i%d,o%d\n", 
+                       ep_attr->max_mtu_size,
+                       ep_attr->max_recv_dtos, ep_attr->max_recv_iov,
+                       ep_attr->max_rdma_read_in, ep_attr->max_rdma_read_out);
+       }
+
+       return DAT_SUCCESS;
+}
+
+/*
+ * dapls_ib_setup_async_callback
+ *
+ * Set up an asynchronous callbacks of various kinds
+ *
+ * Input:
+ *     ia_handle               IA handle
+ *     handler_type            type of handler to set up
+ *     callback_handle         handle param for completion callbacks
+ *     callback                callback routine pointer
+ *     context                 argument for callback routine
+ *
+ * Output:
+ *     none
+ *
+ * Returns:
+ *     DAT_SUCCESS
+ *     DAT_INSUFFICIENT_RESOURCES
+ *     DAT_INVALID_PARAMETER
+ *
+ */
+DAT_RETURN dapls_ib_setup_async_callback (
+       IN  DAPL_IA                     *ia_ptr,
+       IN  DAPL_ASYNC_HANDLER_TYPE     handler_type,
+       IN  DAPL_EVD                    *evd_ptr,
+       IN  ib_async_handler_t          callback,
+       IN  void                        *context )
+
+{
+    ib_hca_transport_t *hca_ptr;
+
+    dapl_dbg_log (DAPL_DBG_TYPE_UTIL,
+                 " setup_async_cb: ia %p type %d handle %p cb %p ctx %p\n",
+                 ia_ptr, handler_type, evd_ptr, callback, context);
+
+    hca_ptr = &ia_ptr->hca_ptr->ib_trans;
+    switch(handler_type)
+    {
+       case DAPL_ASYNC_UNAFILIATED:
+               hca_ptr->async_unafiliated = 
+                       (ib_async_handler_t)callback;
+               hca_ptr->async_un_ctx = context;
+               break;
+       case DAPL_ASYNC_CQ_ERROR:
+               hca_ptr->async_cq_error = 
+                       (ib_async_cq_handler_t)callback;
+               break;
+       case DAPL_ASYNC_CQ_COMPLETION:
+               hca_ptr->async_cq = 
+                       (ib_async_dto_handler_t)callback;
+               break;
+       case DAPL_ASYNC_QP_ERROR:
+               hca_ptr->async_qp_error = 
+                       (ib_async_qp_handler_t)callback;
+               break;
+       default:
+               break;
+    }
+    return DAT_SUCCESS;
+}
+
diff --git a/dapl/openib_scm/dapl_ib_util.h b/dapl/openib_scm/dapl_ib_util.h
new file mode 100644 (file)
index 0000000..e18361f
--- /dev/null
@@ -0,0 +1,356 @@
+/*
+ * This Software is licensed under one of the following licenses:
+ *
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/cpl.php.
+ *
+ * 2) under the terms of the "The BSD License" a copy of which is
+ *    available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/bsd-license.php.
+ *
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
+ *    copy of which is available from the Open Source Initiative, see
+ *    http://www.opensource.org/licenses/gpl-license.php.
+ *
+ * Licensee has the right to choose one of the above licenses.
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice and one of the license notices.
+ *
+ * Redistributions in binary form must reproduce both the above copyright
+ * notice, one of the license notices in the documentation
+ * and/or other materials provided with the distribution.
+ */
+
+/***************************************************************************
+ *
+ *   Module:            uDAPL
+ *
+ *   Filename:          dapl_ib_util.h
+ *
+ *   Author:            Arlin Davis
+ *
+ *   Created:           3/10/2005
+ *
+ *   Description: 
+ *
+ *   The uDAPL openib provider - definitions, prototypes,
+ *
+ ****************************************************************************
+ *                Source Control System Information
+ *
+ *    $Id: $
+ *
+ *     Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ **************************************************************************/
+
+#ifndef _DAPL_IB_UTIL_H_
+#define _DAPL_IB_UTIL_H_
+
+#include "verbs.h"
+#include <byteswap.h>
+
+#ifndef __cplusplus
+#define false 0
+#define true  1
+#endif /*__cplusplus */
+
+/* Typedefs to map common DAPL provider types to IB verbs */
+typedef        struct ibv_qp           *ib_qp_handle_t;
+typedef        struct ibv_cq           *ib_cq_handle_t;
+typedef        struct ibv_pd           *ib_pd_handle_t;
+typedef        struct ibv_mr           *ib_mr_handle_t;
+typedef        struct ibv_mw           *ib_mw_handle_t;
+typedef        struct ibv_wc           ib_work_completion_t;
+
+/* HCA context type maps to IB verbs  */
+typedef        struct ibv_context      *ib_hca_handle_t;
+typedef ib_hca_handle_t                dapl_ibal_ca_t;
+
+/* CM mappings, user CM not complete use SOCKETS */
+
+/* destination info to exchange until real IB CM shows up */
+typedef struct _ib_qp_cm
+{ 
+       uint32_t                qpn;
+       uint16_t                lid;
+       uint16_t                port;
+       int                     p_size;
+       DAT_SOCK_ADDR6          ia_address;
+
+} ib_qp_cm_t;
+
+/* 
+ * dapl_llist_entry in dapl.h but dapl.h depends on provider 
+ * typedef's in this file first. move dapl_llist_entry out of dapl.h
+ */
+struct ib_llist_entry
+{
+    struct dapl_llist_entry    *flink;
+    struct dapl_llist_entry    *blink;
+    void                       *data;
+    struct dapl_llist_entry    *list_head;
+};
+
+struct ib_cm_handle
+{ 
+       struct ib_llist_entry   entry;
+       int                     socket;
+       int                     l_socket; 
+       struct dapl_hca         *hca_ptr;
+       DAT_HANDLE              cr;
+       DAT_HANDLE              sp;     
+       ib_qp_cm_t              dst;
+       unsigned char           p_data[256];
+};
+
+typedef struct ib_cm_handle    *ib_cm_handle_t;
+typedef ib_cm_handle_t         ib_cm_srvc_handle_t;
+
+DAT_RETURN getipaddr(char *addr, int addr_len);
+
+/* CM events */
+typedef enum 
+{
+    IB_CME_CONNECTED,
+    IB_CME_DISCONNECTED,
+    IB_CME_DISCONNECTED_ON_LINK_DOWN,
+    IB_CME_CONNECTION_REQUEST_PENDING,
+    IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,
+    IB_CME_DESTINATION_REJECT,
+    IB_CME_DESTINATION_REJECT_PRIVATE_DATA,
+    IB_CME_DESTINATION_UNREACHABLE,
+    IB_CME_TOO_MANY_CONNECTION_REQUESTS,
+    IB_CME_LOCAL_FAILURE,
+    IB_CM_LOCAL_FAILURE
+
+} ib_cm_events_t;
+
+/* prototype for cm thread */
+void cr_thread (void *arg);
+
+/* Operation and state mappings */
+typedef enum   ibv_send_flags  ib_send_op_type_t;
+typedef        struct  ibv_sge         ib_data_segment_t;
+typedef enum   ibv_qp_state    ib_qp_state_t;
+typedef        enum    ibv_event_type  ib_async_event_type;
+typedef struct ibv_async_event ib_error_record_t;      
+
+/* CQ notifications */
+typedef enum
+{
+       IB_NOTIFY_ON_NEXT_COMP,
+       IB_NOTIFY_ON_SOLIC_COMP
+
+} ib_notification_type_t;
+
+/* other mappings */
+typedef int                    ib_bool_t;
+typedef union ibv_gid          GID;
+typedef char                   *IB_HCA_NAME;
+typedef uint16_t               ib_hca_port_t;
+typedef uint32_t               ib_comp_handle_t;
+
+#ifdef CQ_WAIT_OBJECT
+typedef struct ibv_comp_channel *ib_wait_obj_handle_t;
+#endif
+
+/* Definitions */
+#define IB_INVALID_HANDLE      NULL
+
+/* inline send rdma threshold */
+#define        INLINE_SEND_DEFAULT     128
+
+/* CM private data areas */
+#define        IB_MAX_REQ_PDATA_SIZE   92
+#define        IB_MAX_REP_PDATA_SIZE   196
+#define        IB_MAX_REJ_PDATA_SIZE   148
+#define        IB_MAX_DREQ_PDATA_SIZE  220
+#define        IB_MAX_DREP_PDATA_SIZE  224
+
+/* DTO OPs, ordered for DAPL ENUM definitions ???*/
+#define OP_RDMA_WRITE           IBV_WR_RDMA_WRITE
+#define OP_RDMA_WRITE_IMM       IBV_WR_RDMA_WRITE_WITH_IMM
+#define OP_SEND                 IBV_WR_SEND
+#define OP_SEND_IMM             IBV_WR_SEND_WITH_IMM
+#define OP_RDMA_READ            IBV_WR_RDMA_READ
+#define OP_COMP_AND_SWAP        IBV_WR_ATOMIC_CMP_AND_SWP
+#define OP_FETCH_AND_ADD        IBV_WR_ATOMIC_FETCH_AND_ADD
+#define OP_RECEIVE              7   /* internal op */
+#define OP_RECEIVE_IMM         8   /* internel op */
+#define OP_BIND_MW              9   /* internal op */
+#define OP_INVALID             0xff
+
+/* Definitions to map QP state */
+#define IB_QP_STATE_RESET      IBV_QPS_RESET
+#define IB_QP_STATE_INIT       IBV_QPS_INIT
+#define IB_QP_STATE_RTR                IBV_QPS_RTR
+#define IB_QP_STATE_RTS                IBV_QPS_RTS
+#define IB_QP_STATE_SQD                IBV_QPS_SQD
+#define IB_QP_STATE_SQE                IBV_QPS_SQE
+#define IB_QP_STATE_ERROR      IBV_QPS_ERR
+
+/* Definitions for ibverbs/mthca return codes, should be defined in verbs.h */
+/* some are errno and some are -n values */
+
+/**
+ * ibv_get_device_name - Return kernel device name
+ * ibv_get_device_guid - Return device's node GUID
+ * ibv_open_device - Return ibv_context or NULL
+ * ibv_close_device - Return 0, (errno?)
+ * ibv_get_async_event - Return 0, -1 
+ * ibv_alloc_pd - Return ibv_pd, NULL
+ * ibv_dealloc_pd - Return 0, errno 
+ * ibv_reg_mr - Return ibv_mr, NULL
+ * ibv_dereg_mr - Return 0, errno
+ * ibv_create_cq - Return ibv_cq, NULL
+ * ibv_destroy_cq - Return 0, errno
+ * ibv_get_cq_event - Return 0 & ibv_cq/context, int
+ * ibv_poll_cq - Return n & ibv_wc, 0 ok, -1 empty, -2 error 
+ * ibv_req_notify_cq - Return 0 (void?)
+ * ibv_create_qp - Return ibv_qp, NULL
+ * ibv_modify_qp - Return 0, errno
+ * ibv_destroy_qp - Return 0, errno
+ * ibv_post_send - Return 0, -1 & bad_wr
+ * ibv_post_recv - Return 0, -1 & bad_wr 
+ */
+
+/* async handler for DTO, CQ, QP, and unafiliated */
+typedef void (*ib_async_dto_handler_t)(
+    IN    ib_hca_handle_t    ib_hca_handle,
+    IN    ib_error_record_t  *err_code,
+    IN    void               *context);
+
+typedef void (*ib_async_cq_handler_t)(
+    IN    ib_hca_handle_t    ib_hca_handle,
+    IN    ib_cq_handle_t     ib_cq_handle,
+    IN    ib_error_record_t  *err_code,
+    IN    void               *context);
+
+typedef void (*ib_async_qp_handler_t)(
+    IN    ib_hca_handle_t    ib_hca_handle,
+    IN    ib_qp_handle_t     ib_qp_handle,
+    IN    ib_error_record_t  *err_code,
+    IN    void               *context);
+
+typedef void (*ib_async_handler_t)(
+    IN    ib_hca_handle_t    ib_hca_handle,
+    IN    ib_error_record_t  *err_code,
+    IN    void               *context);
+
+typedef enum
+{
+       IB_THREAD_INIT,
+       IB_THREAD_RUN,
+       IB_THREAD_CANCEL,
+       IB_THREAD_EXIT
+
+} ib_thread_state_t;
+
+/* ib_hca_transport_t, specific to this implementation */
+typedef struct _ib_hca_transport
+{ 
+       struct  ibv_device      *ib_dev;
+       ib_cq_handle_t          ib_cq_empty;
+       DAPL_OS_LOCK            cq_lock;        
+       int                     max_inline_send;
+       ib_thread_state_t       cq_state;
+       DAPL_OS_THREAD          cq_thread;
+       struct ibv_comp_channel *ib_cq;
+       int                     cr_state;
+       DAPL_OS_THREAD          thread;
+       DAPL_OS_LOCK            lock;   
+       struct dapl_llist_entry *list;  
+       ib_async_handler_t      async_unafiliated;
+       void                    *async_un_ctx;
+       ib_async_cq_handler_t   async_cq_error;
+       ib_async_dto_handler_t  async_cq;
+       ib_async_qp_handler_t   async_qp_error;
+
+} ib_hca_transport_t;
+
+/* provider specfic fields for shared memory support */
+typedef uint32_t ib_shm_transport_t;
+
+/* prototypes */
+int32_t        dapls_ib_init (void);
+int32_t        dapls_ib_release (void);
+void cq_thread (void *arg);
+void cr_thread(void *arg);
+int dapli_cq_thread_init(struct dapl_hca *hca_ptr);
+void dapli_cq_thread_destroy(struct dapl_hca *hca_ptr);
+
+
+DAT_RETURN
+dapls_modify_qp_state ( IN ib_qp_handle_t      qp_handle,
+                       IN ib_qp_state_t        qp_state,
+                       IN ib_qp_cm_t           *qp_cm );
+
+/* inline functions */
+STATIC _INLINE_ IB_HCA_NAME dapl_ib_convert_name (IN char *name)
+{
+       /* use ascii; name of local device */
+       return dapl_os_strdup(name);
+}
+
+STATIC _INLINE_ void dapl_ib_release_name (IN IB_HCA_NAME name)
+{
+       return;
+}
+
+/*
+ *  Convert errno to DAT_RETURN values
+ */
+STATIC _INLINE_ DAT_RETURN 
+dapl_convert_errno( IN int err, IN const char *str )
+{
+    if (!err)  return DAT_SUCCESS;
+       
+#if DAPL_DBG
+    if ((err != EAGAIN) && (err != ETIME) && (err != ETIMEDOUT))
+       dapl_dbg_log (DAPL_DBG_TYPE_ERR," %s %s\n", str, strerror(err));
+#endif 
+
+    switch( err )
+    {
+       case EOVERFLOW  : return DAT_LENGTH_ERROR;
+       case EACCES     : return DAT_PRIVILEGES_VIOLATION;
+       case ENXIO      : 
+       case ERANGE     : 
+       case EPERM      : return DAT_PROTECTION_VIOLATION;                
+       case EINVAL     :
+        case EBADF     : 
+       case ENOENT     :
+       case ENOTSOCK   : return DAT_INVALID_HANDLE;
+       case EISCONN    : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_CONNECTED;
+       case ECONNREFUSED : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_NOTREADY;
+       case ETIME      :           
+       case ETIMEDOUT  : return DAT_TIMEOUT_EXPIRED;
+       case ENETUNREACH: return DAT_INVALID_ADDRESS | DAT_INVALID_ADDRESS_UNREACHABLE;
+       case EADDRINUSE : return DAT_CONN_QUAL_IN_USE;
+       case EALREADY   : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_ACTCONNPENDING;
+        case ENOSPC    : 
+       case ENOMEM     :
+        case E2BIG     :
+        case EDQUOT    : return DAT_INSUFFICIENT_RESOURCES;
+        case EAGAIN    : return DAT_QUEUE_EMPTY;
+       case EINTR      : return DAT_INTERRUPTED_CALL;
+       case EAFNOSUPPORT : return DAT_INVALID_ADDRESS | DAT_INVALID_ADDRESS_MALFORMED;
+       case EFAULT     : 
+       default         : return DAT_INTERNAL_ERROR;
+    }
+ }
+
+/*
+ * Definitions required only for DAT 1.1 builds
+ */
+#define IB_ACCESS_LOCAL_READ    IBV_ACCESS_LOCAL_WRITE
+#define IB_ACCESS_LOCAL_WRITE   IBV_ACCESS_LOCAL_WRITE
+#define IB_ACCESS_REMOTE_READ   IBV_ACCESS_REMOTE_READ
+#define IB_ACCESS_REMOTE_WRITE  IBV_ACCESS_REMOTE_WRITE
+#define IB_ACCESS_MW_BIND       IBV_ACCESS_LOCAL_WRITE
+#define IB_ACCESS_ATOMIC       
+
+#endif /*  _DAPL_IB_UTIL_H_ */
index 98ddc32425b158f60a9a34ff4b5f01d1c27e7adf..bf52f01c4f491da61c0b925507c627fa01d6b767 100644 (file)
@@ -138,6 +138,16 @@ CFLAGS   += -DCQ_WAIT_OBJECT
 CFLAGS   += -I/usr/local/include/infiniband
 endif
 
+#
+# OpenIB provider with Socket CM
+#
+ifeq ($(VERBS),openib_scm)
+PROVIDER = $(TOPDIR)/../openib_scm
+CFLAGS   += -DOPENIB
+CFLAGS   += -DCQ_WAIT_OBJECT
+CFLAGS   += -I/usr/local/include/infiniband
+endif
+
 #
 # If an implementation supports CM and DTO completions on the same EVD
 # then DAPL_MERGE_CM_DTO should be set
@@ -251,6 +261,13 @@ PROVIDER_SRCS  = dapl_ib_util.c dapl_ib_cq.c dapl_ib_qp.c
 PROVIDER_SRCS += dapl_ib_cm.c dapl_ib_mem.c
 endif
 
+ifeq ($(VERBS),openib_scm)
+LDFLAGS += -libverbs
+LDFLAGS += -rpath /usr/local/lib -L /usr/local/lib
+PROVIDER_SRCS  = dapl_ib_util.c dapl_ib_cq.c dapl_ib_qp.c \
+                 dapl_ib_cm.c dapl_ib_mem.c
+endif
+
 UDAPL_SRCS =   dapl_init.c             \
        dapl_evd_create.c               \
        dapl_evd_query.c                \