-/*
- * Copyright (c) 2005 Voltaire Inc. All rights reserved.
- * Copyright (c) 2005-2007 Intel Corporation. All rights reserved.
- * Copyright (c) 2004-2005, Mellanox Technologies, Inc. All rights reserved.
- * Copyright (c) 2003 Topspin Corporation. All rights reserved.
- * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- *
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- * copy of which is available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/**********************************************************************
- *
- * MODULE: dapl_ib_cm.c
- *
- * PURPOSE: The OFED provider - uCMA, name and route resolution
- *
- * $Id: $
- *
- **********************************************************************/
-
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-#include "dapl_evd_util.h"
-#include "dapl_cr_util.h"
-#include "dapl_name_service.h"
-#include "dapl_ib_util.h"
-#include "dapl_vendor.h"
-#include "dapl_osd.h"
-
-extern struct rdma_event_channel *g_cm_events;
-
-/* local prototypes */
-static struct dapl_cm_id *dapli_req_recv(struct dapl_cm_id *conn,
- struct rdma_cm_event *event);
-static void dapli_cm_active_cb(struct dapl_cm_id *conn,
- struct rdma_cm_event *event);
-static void dapli_cm_passive_cb(struct dapl_cm_id *conn,
- struct rdma_cm_event *event);
-static void dapli_addr_resolve(struct dapl_cm_id *conn);
-static void dapli_route_resolve(struct dapl_cm_id *conn);
-
-/* cma requires 16 bit SID, in network order */
-#define IB_PORT_MOD 32001
-#define IB_PORT_BASE (65535 - IB_PORT_MOD)
-#define SID_TO_PORT(SID) \
- (SID > 0xffff ? \
- htons((unsigned short)((SID % IB_PORT_MOD) + IB_PORT_BASE)) :\
- htons((unsigned short)SID))
-
-#define PORT_TO_SID(p) ntohs(p)
-
-/* private data header to validate consumer rejects versus abnormal events */
-struct dapl_pdata_hdr {
- DAT_UINT32 version;
-};
-
-static void dapli_addr_resolve(struct dapl_cm_id *conn)
-{
- int ret;
-#ifdef DAPL_DBG
- struct rdma_addr *ipaddr = &conn->cm_id->route.addr;
-#endif
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " addr_resolve: cm_id %p SRC %x DST %x\n",
- conn->cm_id, ntohl(((struct sockaddr_in *)
- &ipaddr->src_addr)->sin_addr.s_addr),
- ntohl(((struct sockaddr_in *)
- &ipaddr->dst_addr)->sin_addr.s_addr));
-
- ret = rdma_resolve_route(conn->cm_id, conn->route_timeout);
- if (ret) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " dapl_cma_connect: rdma_resolve_route ERR 0x%x %s\n",
- ret, strerror(errno));
- dapl_evd_connection_callback(conn,
- IB_CME_LOCAL_FAILURE,
- NULL, conn->ep);
- }
-}
-
-static void dapli_route_resolve(struct dapl_cm_id *conn)
-{
- int ret;
-#ifdef DAPL_DBG
- struct rdma_addr *ipaddr = &conn->cm_id->route.addr;
- struct ib_addr *ibaddr = &conn->cm_id->route.addr.addr.ibaddr;
-#endif
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " route_resolve: cm_id %p SRC %x DST %x PORT %d\n",
- conn->cm_id, ntohl(((struct sockaddr_in *)
- &ipaddr->src_addr)->sin_addr.s_addr),
- ntohl(((struct sockaddr_in *)
- &ipaddr->dst_addr)->sin_addr.s_addr),
- ntohs(((struct sockaddr_in *)
- &ipaddr->dst_addr)->sin_port));
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " route_resolve: SRC GID subnet %016llx id %016llx\n",
- (unsigned long long)
- ntohll(ibaddr->sgid.global.subnet_prefix),
- (unsigned long long)
- ntohll(ibaddr->sgid.global.interface_id));
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " route_resolve: DST GID subnet %016llx id %016llx\n",
- (unsigned long long)
- ntohll(ibaddr->dgid.global.subnet_prefix),
- (unsigned long long)
- ntohll(ibaddr->dgid.global.interface_id));
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " route_resolve: cm_id %p pdata %p plen %d rr %d ind %d\n",
- conn->cm_id,
- conn->params.private_data,
- conn->params.private_data_len,
- conn->params.responder_resources,
- conn->params.initiator_depth);
-
- ret = rdma_connect(conn->cm_id, &conn->params);
- if (ret) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " dapl_cma_connect: rdma_connect ERR %d %s\n",
- ret, strerror(errno));
- goto bail;
- }
- return;
-
- bail:
- dapl_evd_connection_callback(conn,
- IB_CME_LOCAL_FAILURE, NULL, conn->ep);
-}
-
-dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep)
-{
- dp_ib_cm_handle_t conn;
- struct rdma_cm_id *cm_id;
-
- /* Allocate CM and initialize lock */
- if ((conn = dapl_os_alloc(sizeof(*conn))) == NULL)
- return NULL;
-
- dapl_os_memzero(conn, sizeof(*conn));
- dapl_os_lock_init(&conn->lock);
- conn->refs++;
-
- /* create CM_ID, bind to local device, create QP */
- if (rdma_create_id(g_cm_events, &cm_id, (void *)conn, RDMA_PS_TCP)) {
- dapl_os_free(conn, sizeof(*conn));
- return NULL;
- }
- conn->cm_id = cm_id;
-
- /* setup timers for address and route resolution */
- conn->arp_timeout = dapl_os_get_env_val("DAPL_CM_ARP_TIMEOUT_MS",
- IB_ARP_TIMEOUT);
- conn->arp_retries = dapl_os_get_env_val("DAPL_CM_ARP_RETRY_COUNT",
- IB_ARP_RETRY_COUNT);
- conn->route_timeout = dapl_os_get_env_val("DAPL_CM_ROUTE_TIMEOUT_MS",
- IB_ROUTE_TIMEOUT);
- conn->route_retries = dapl_os_get_env_val("DAPL_CM_ROUTE_RETRY_COUNT",
- IB_ROUTE_RETRY_COUNT);
- if (ep != NULL) {
- conn->ep = ep;
- conn->hca = ((DAPL_IA *)ep->param.ia_handle)->hca_ptr;
- }
-
- return conn;
-}
-
-/*
- * Only called from consumer thread via dat_ep_free()
- * accept, reject, or connect.
- * Cannot be called from callback thread.
- * rdma_destroy_id will block until rdma_get_cm_event is acked.
- */
-void dapls_ib_cm_free(dp_ib_cm_handle_t conn, DAPL_EP *ep)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " destroy_conn: conn %p id %d\n",
- conn, conn->cm_id);
-
- dapl_os_lock(&conn->lock);
- conn->refs--;
- dapl_os_unlock(&conn->lock);
-
- /* block until event thread complete */
- while (conn->refs)
- dapl_os_sleep_usec(10000);
-
- if (ep) {
- ep->cm_handle = NULL;
- ep->qp_handle = NULL;
- ep->qp_state = IB_QP_STATE_ERROR;
- }
-
- if (conn->cm_id) {
- if (conn->cm_id->qp)
- rdma_destroy_qp(conn->cm_id);
- rdma_destroy_id(conn->cm_id);
- }
-
- dapl_os_free(conn, sizeof(*conn));
-}
-
-static struct dapl_cm_id *dapli_req_recv(struct dapl_cm_id *conn,
- struct rdma_cm_event *event)
-{
- struct dapl_cm_id *new_conn;
-#ifdef DAPL_DBG
- struct rdma_addr *ipaddr = &event->id->route.addr;
-#endif
-
- if (conn->sp == NULL) {
- dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- " dapli_rep_recv: on invalid listen " "handle\n");
- return NULL;
- }
-
- /* allocate new cm_id and merge listen parameters */
- new_conn = dapl_os_alloc(sizeof(*new_conn));
- if (new_conn) {
- (void)dapl_os_memzero(new_conn, sizeof(*new_conn));
- dapl_os_lock_init(&new_conn->lock);
- new_conn->cm_id = event->id; /* provided by uCMA */
- event->id->context = new_conn; /* update CM_ID context */
- new_conn->sp = conn->sp;
- new_conn->hca = conn->hca;
- new_conn->refs++;
-
- /* Get requesters connect data, setup for accept */
- new_conn->params.responder_resources =
- DAPL_MIN(event->param.conn.responder_resources,
- conn->hca->ib_trans.rd_atom_in);
- new_conn->params.initiator_depth =
- DAPL_MIN(event->param.conn.initiator_depth,
- conn->hca->ib_trans.rd_atom_out);
-
- new_conn->params.flow_control = event->param.conn.flow_control;
- new_conn->params.rnr_retry_count =
- event->param.conn.rnr_retry_count;
- new_conn->params.retry_count = event->param.conn.retry_count;
-
- /* save private data */
- if (event->param.conn.private_data_len) {
- dapl_os_memcpy(new_conn->p_data,
- event->param.conn.private_data,
- event->param.conn.private_data_len);
- new_conn->params.private_data = new_conn->p_data;
- new_conn->params.private_data_len =
- event->param.conn.private_data_len;
- }
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM, " passive_cb: "
- "REQ: SP %p PORT %d LID %d "
- "NEW CONN %p ID %p pdata %p,%d\n",
- new_conn->sp, ntohs(((struct sockaddr_in *)
- &ipaddr->src_addr)->sin_port),
- event->listen_id, new_conn, event->id,
- event->param.conn.private_data,
- event->param.conn.private_data_len);
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM, " passive_cb: "
- "REQ: IP SRC %x PORT %d DST %x PORT %d "
- "rr %d init %d\n", ntohl(((struct sockaddr_in *)
- &ipaddr->src_addr)->
- sin_addr.s_addr),
- ntohs(((struct sockaddr_in *)
- &ipaddr->src_addr)->sin_port),
- ntohl(((struct sockaddr_in *)
- &ipaddr->dst_addr)->sin_addr.s_addr),
- ntohs(((struct sockaddr_in *)
- &ipaddr->dst_addr)->sin_port),
- new_conn->params.responder_resources,
- new_conn->params.initiator_depth);
- }
- return new_conn;
-}
-
-static void dapli_cm_active_cb(struct dapl_cm_id *conn,
- struct rdma_cm_event *event)
-{
- DAPL_OS_LOCK *lock = &conn->lock;
- ib_cm_events_t ib_cm_event;
- const void *pdata = NULL;
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " active_cb: conn %p id %d event %d\n",
- conn, conn->cm_id, event->event);
-
- /* There is a chance that we can get events after
- * the consumer calls disconnect in a pending state
- * since the IB CM and uDAPL states are not shared.
- * In some cases, IB CM could generate either a DCONN
- * or CONN_ERR after the consumer returned from
- * dapl_ep_disconnect with a DISCONNECTED event
- * already queued. Check state here and bail to
- * avoid any events after a disconnect.
- */
- if (DAPL_BAD_HANDLE(conn->ep, DAPL_MAGIC_EP))
- return;
-
- dapl_os_lock(&conn->ep->header.lock);
- if (conn->ep->param.ep_state == DAT_EP_STATE_DISCONNECTED) {
- dapl_os_unlock(&conn->ep->header.lock);
- return;
- }
- if (event->event == RDMA_CM_EVENT_DISCONNECTED)
- conn->ep->param.ep_state = DAT_EP_STATE_DISCONNECTED;
-
- dapl_os_unlock(&conn->ep->header.lock);
- dapl_os_lock(lock);
-
- switch (event->event) {
- case RDMA_CM_EVENT_UNREACHABLE:
- case RDMA_CM_EVENT_CONNECT_ERROR:
- dapl_log(DAPL_DBG_TYPE_WARN,
- "dapl_cma_active: CONN_ERR event=0x%x"
- " status=%d %s DST %s, %d\n",
- event->event, event->status,
- (event->status == -ETIMEDOUT) ? "TIMEOUT" : "",
- inet_ntoa(((struct sockaddr_in *)
- &conn->cm_id->route.addr.dst_addr)->
- sin_addr),
- ntohs(((struct sockaddr_in *)
- &conn->cm_id->route.addr.dst_addr)->
- sin_port));
-
- /* per DAT SPEC provider always returns UNREACHABLE */
- ib_cm_event = IB_CME_DESTINATION_UNREACHABLE;
- break;
- case RDMA_CM_EVENT_REJECTED:
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " dapli_cm_active_handler: REJECTED reason=%d\n",
- event->status);
-
- /* valid REJ from consumer will always contain private data */
- if (event->status == 28 &&
- event->param.conn.private_data_len) {
- ib_cm_event = IB_CME_DESTINATION_REJECT_PRIVATE_DATA;
- pdata =
- (unsigned char *)event->param.conn.
- private_data +
- sizeof(struct dapl_pdata_hdr);
- } else {
- ib_cm_event = IB_CME_DESTINATION_REJECT;
- dapl_log(DAPL_DBG_TYPE_WARN,
- "dapl_cma_active: non-consumer REJ,"
- " reason=%d, DST %s, %d\n",
- event->status,
- inet_ntoa(((struct sockaddr_in *)
- &conn->cm_id->route.addr.
- dst_addr)->sin_addr),
- ntohs(((struct sockaddr_in *)
- &conn->cm_id->route.addr.
- dst_addr)->sin_port));
- }
- break;
- case RDMA_CM_EVENT_ESTABLISHED:
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " active_cb: cm_id %d PORT %d CONNECTED to %s!\n",
- conn->cm_id, ntohs(((struct sockaddr_in *)
- &conn->cm_id->route.addr.
- dst_addr)->sin_port),
- inet_ntoa(((struct sockaddr_in *)
- &conn->cm_id->route.addr.dst_addr)->
- sin_addr));
-
- /* setup local and remote ports for ep query */
- conn->ep->param.remote_port_qual =
- PORT_TO_SID(rdma_get_dst_port(conn->cm_id));
- conn->ep->param.local_port_qual =
- PORT_TO_SID(rdma_get_src_port(conn->cm_id));
-
- ib_cm_event = IB_CME_CONNECTED;
- pdata = event->param.conn.private_data;
- break;
- case RDMA_CM_EVENT_DISCONNECTED:
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " active_cb: DISC EVENT - EP %p\n",conn->ep);
- rdma_disconnect(conn->cm_id); /* required for DREP */
- ib_cm_event = IB_CME_DISCONNECTED;
- /* validate EP handle */
- if (DAPL_BAD_HANDLE(conn->ep, DAPL_MAGIC_EP))
- conn = NULL;
- break;
- default:
- dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- " dapli_cm_active_cb_handler: Unexpected CM "
- "event %d on ID 0x%p\n", event->event,
- conn->cm_id);
- conn = NULL;
- break;
- }
-
- dapl_os_unlock(lock);
- if (conn)
- dapl_evd_connection_callback(conn, ib_cm_event, pdata, conn->ep);
-}
-
-static void dapli_cm_passive_cb(struct dapl_cm_id *conn,
- struct rdma_cm_event *event)
-{
- ib_cm_events_t ib_cm_event;
- struct dapl_cm_id *conn_recv = conn;
- const void *pdata = NULL;
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " passive_cb: conn %p id %d event %d\n",
- conn, event->id, event->event);
-
- dapl_os_lock(&conn->lock);
-
- switch (event->event) {
- case RDMA_CM_EVENT_CONNECT_REQUEST:
- /* create new conn object with new conn_id from event */
- conn_recv = dapli_req_recv(conn, event);
- ib_cm_event = IB_CME_CONNECTION_REQUEST_PENDING;
- pdata = event->param.conn.private_data;
- break;
- case RDMA_CM_EVENT_UNREACHABLE:
- case RDMA_CM_EVENT_CONNECT_ERROR:
- dapl_log(DAPL_DBG_TYPE_WARN,
- "dapl_cm_passive: CONN_ERR event=0x%x status=%d %s,"
- " DST %s,%d\n",
- event->event, event->status,
- (event->status == -ETIMEDOUT) ? "TIMEOUT" : "",
- inet_ntoa(((struct sockaddr_in *)
- &conn->cm_id->route.addr.dst_addr)->
- sin_addr), ntohs(((struct sockaddr_in *)
- &conn->cm_id->route.addr.
- dst_addr)->sin_port));
- ib_cm_event = IB_CME_DESTINATION_UNREACHABLE;
- break;
- case RDMA_CM_EVENT_REJECTED:
- /* will alwasys be abnormal NON-consumer from active side */
- dapl_log(DAPL_DBG_TYPE_WARN,
- "dapl_cm_passive: non-consumer REJ, reason=%d,"
- " DST %s, %d\n",
- event->status,
- inet_ntoa(((struct sockaddr_in *)
- &conn->cm_id->route.addr.dst_addr)->
- sin_addr),
- ntohs(((struct sockaddr_in *)
- &conn->cm_id->route.addr.dst_addr)->
- sin_port));
- ib_cm_event = IB_CME_DESTINATION_REJECT;
- break;
- case RDMA_CM_EVENT_ESTABLISHED:
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " passive_cb: cm_id %p PORT %d CONNECTED from 0x%x!\n",
- conn->cm_id, ntohs(((struct sockaddr_in *)
- &conn->cm_id->route.addr.
- src_addr)->sin_port),
- ntohl(((struct sockaddr_in *)
- &conn->cm_id->route.addr.dst_addr)->
- sin_addr.s_addr));
- ib_cm_event = IB_CME_CONNECTED;
- break;
- case RDMA_CM_EVENT_DISCONNECTED:
- rdma_disconnect(conn->cm_id); /* required for DREP */
- ib_cm_event = IB_CME_DISCONNECTED;
- /* validate SP handle context */
- if (DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_PSP) &&
- DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_RSP))
- conn_recv = NULL;
- break;
- default:
- dapl_dbg_log(DAPL_DBG_TYPE_ERR, " passive_cb: "
- "Unexpected CM event %d on ID 0x%p\n",
- event->event, conn->cm_id);
- conn_recv = NULL;
- break;
- }
-
- dapl_os_unlock(&conn->lock);
- if (conn_recv)
- dapls_cr_callback(conn_recv, ib_cm_event, pdata, conn_recv->sp);
-}
-
-/************************ DAPL provider entry points **********************/
-
-/*
- * dapls_ib_connect
- *
- * Initiate a connection with the passive listener on another node
- *
- * Input:
- * ep_handle,
- * remote_ia_address,
- * remote_conn_qual,
- * prd_size size of private data and structure
- * prd_prt pointer to private data structure
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INSUFFICIENT_RESOURCES
- * DAT_INVALID_PARAMETER
- *
- */
-DAT_RETURN dapls_ib_connect(IN DAT_EP_HANDLE ep_handle,
- IN DAT_IA_ADDRESS_PTR r_addr,
- IN DAT_CONN_QUAL r_qual,
- IN DAT_COUNT p_size, IN void *p_data)
-{
- struct dapl_ep *ep_ptr = ep_handle;
- struct dapl_cm_id *conn = ep_ptr->cm_handle;
- int ret;
-
- /* Sanity check */
- if (NULL == ep_ptr)
- return DAT_SUCCESS;
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " connect: rSID 0x%llx rPort %d, pdata %p, ln %d\n",
- r_qual, ntohs(SID_TO_PORT(r_qual)), p_data, p_size);
-
- /* rdma conn and cm_id pre-bound; reference via ep_ptr->cm_handle */
-
- /* Setup QP/CM parameters and private data in cm_id */
- (void)dapl_os_memzero(&conn->params, sizeof(conn->params));
- conn->params.responder_resources =
- ep_ptr->param.ep_attr.max_rdma_read_in;
- conn->params.initiator_depth = ep_ptr->param.ep_attr.max_rdma_read_out;
- conn->params.flow_control = 1;
- conn->params.rnr_retry_count = IB_RNR_RETRY_COUNT;
- conn->params.retry_count = IB_RC_RETRY_COUNT;
- if (p_size) {
- dapl_os_memcpy(conn->p_data, p_data, p_size);
- conn->params.private_data = conn->p_data;
- conn->params.private_data_len = p_size;
- }
-
- /* copy in remote address, need a copy for retry attempts */
- dapl_os_memcpy(&conn->r_addr, r_addr, sizeof(*r_addr));
-
- /* Resolve remote address, src already bound during QP create */
- ((struct sockaddr_in *)&conn->r_addr)->sin_port = SID_TO_PORT(r_qual);
- ((struct sockaddr_in *)&conn->r_addr)->sin_family = AF_INET;
-
- ret = rdma_resolve_addr(conn->cm_id, NULL,
- (struct sockaddr *)&conn->r_addr,
- conn->arp_timeout);
- if (ret) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " dapl_cma_connect: rdma_resolve_addr ERR 0x%x %s\n",
- ret, strerror(errno));
- return dapl_convert_errno(errno, "ib_connect");
- }
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " connect: resolve_addr: cm_id %p -> %s port %d\n",
- conn->cm_id,
- inet_ntoa(((struct sockaddr_in *)&conn->r_addr)->sin_addr),
- ((struct sockaddr_in *)&conn->r_addr)->sin_port);
-
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_disconnect
- *
- * Disconnect an EP
- *
- * Input:
- * ep_handle,
- * disconnect_flags
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- *
- */
-DAT_RETURN
-dapls_ib_disconnect(IN DAPL_EP * ep_ptr, IN DAT_CLOSE_FLAGS close_flags)
-{
- dp_ib_cm_handle_t conn = ep_ptr->cm_handle;
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " disconnect(ep %p, conn %p, id %d flags %x)\n",
- ep_ptr, conn, (conn ? conn->cm_id : 0), close_flags);
-
- if ((conn == IB_INVALID_HANDLE) || (conn->cm_id == NULL))
- return DAT_SUCCESS;
-
- /* no graceful half-pipe disconnect option */
- rdma_disconnect(conn->cm_id);
-
- /*
- * DAT event notification occurs from the callback
- * Note: will fire even if DREQ goes unanswered on timeout
- */
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_disconnect_clean
- *
- * Clean up outstanding connection data. This routine is invoked
- * after the final disconnect callback has occurred. Only on the
- * ACTIVE side of a connection.
- *
- * Input:
- * ep_ptr DAPL_EP
- * active Indicates active side of connection
- *
- * Output:
- * none
- *
- * Returns:
- * void
- *
- */
-void
-dapls_ib_disconnect_clean(IN DAPL_EP * ep_ptr,
- IN DAT_BOOLEAN active,
- IN const ib_cm_events_t ib_cm_event)
-{
- /* nothing to do */
- return;
-}
-
-/*
- * dapl_ib_setup_conn_listener
- *
- * Have the CM set up a connection listener.
- *
- * Input:
- * ibm_hca_handle HCA handle
- * qp_handle QP handle
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INSUFFICIENT_RESOURCES
- * DAT_INTERNAL_ERROR
- * DAT_CONN_QUAL_UNAVAILBLE
- * DAT_CONN_QUAL_IN_USE
- *
- */
-DAT_RETURN
-dapls_ib_setup_conn_listener(IN DAPL_IA * ia_ptr,
- IN DAT_UINT64 ServiceID, IN DAPL_SP * sp_ptr)
-{
- DAT_RETURN dat_status = DAT_SUCCESS;
- ib_cm_srvc_handle_t conn;
- DAT_SOCK_ADDR6 addr; /* local binding address */
-
- /* Allocate CM and initialize lock */
- if ((conn = dapl_os_alloc(sizeof(*conn))) == NULL)
- return DAT_INSUFFICIENT_RESOURCES;
-
- dapl_os_memzero(conn, sizeof(*conn));
- dapl_os_lock_init(&conn->lock);
- conn->refs++;
-
- /* create CM_ID, bind to local device, create QP */
- if (rdma_create_id
- (g_cm_events, &conn->cm_id, (void *)conn, RDMA_PS_TCP)) {
- dapl_os_free(conn, sizeof(*conn));
- return (dapl_convert_errno(errno, "setup_listener"));
- }
-
- /* open identifies the local device; per DAT specification */
- /* Get family and address then set port to consumer's ServiceID */
- dapl_os_memcpy(&addr, &ia_ptr->hca_ptr->hca_address, sizeof(addr));
- ((struct sockaddr_in *)&addr)->sin_port = SID_TO_PORT(ServiceID);
-
- if (rdma_bind_addr(conn->cm_id, (struct sockaddr *)&addr)) {
- if ((errno == EBUSY) || (errno == EADDRINUSE) ||
- (errno == EADDRNOTAVAIL))
- dat_status = DAT_CONN_QUAL_IN_USE;
- else
- dat_status =
- dapl_convert_errno(errno, "setup_listener");
- goto bail;
- }
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " listen(ia_ptr %p SID 0x%llx Port %d sp %p conn %p id %d)\n",
- ia_ptr, ServiceID, ntohs(SID_TO_PORT(ServiceID)),
- sp_ptr, conn, conn->cm_id);
-
- sp_ptr->cm_srvc_handle = conn;
- conn->sp = sp_ptr;
- conn->hca = ia_ptr->hca_ptr;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " listen(conn=%p cm_id=%d)\n",
- sp_ptr->cm_srvc_handle, conn->cm_id);
-
- if (rdma_listen(conn->cm_id, 0)) { /* max cma backlog */
-
- if ((errno == EBUSY) || (errno == EADDRINUSE) ||
- (errno == EADDRNOTAVAIL))
- dat_status = DAT_CONN_QUAL_IN_USE;
- else
- dat_status =
- dapl_convert_errno(errno, "setup_listener");
- goto bail;
- }
-
- /* success */
- return DAT_SUCCESS;
-
- bail:
- rdma_destroy_id(conn->cm_id);
- dapl_os_free(conn, sizeof(*conn));
- return dat_status;
-}
-
-/*
- * dapl_ib_remove_conn_listener
- *
- * Have the CM remove a connection listener.
- *
- * Input:
- * ia_handle IA handle
- * ServiceID IB Channel Service ID
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INVALID_STATE
- *
- */
-DAT_RETURN
-dapls_ib_remove_conn_listener(IN DAPL_IA * ia_ptr, IN DAPL_SP * sp_ptr)
-{
- ib_cm_srvc_handle_t conn = sp_ptr->cm_srvc_handle;
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " remove_listen(ia_ptr %p sp_ptr %p cm_ptr %p)\n",
- ia_ptr, sp_ptr, conn);
-
- if (conn != IB_INVALID_HANDLE) {
- sp_ptr->cm_srvc_handle = NULL;
- dapls_ib_cm_free(conn, NULL);
- }
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_accept_connection
- *
- * Perform necessary steps to accept a connection
- *
- * Input:
- * cr_handle
- * ep_handle
- * private_data_size
- * private_data
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INSUFFICIENT_RESOURCES
- * DAT_INTERNAL_ERROR
- *
- */
-DAT_RETURN
-dapls_ib_accept_connection(IN DAT_CR_HANDLE cr_handle,
- IN DAT_EP_HANDLE ep_handle,
- IN DAT_COUNT p_size, IN const DAT_PVOID p_data)
-{
- DAPL_CR *cr_ptr = (DAPL_CR *) cr_handle;
- DAPL_EP *ep_ptr = (DAPL_EP *) ep_handle;
- DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;
- struct dapl_cm_id *cr_conn = cr_ptr->ib_cm_handle;
- int ret;
- DAT_RETURN dat_status;
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " accept(cr %p conn %p, id %p, p_data %p, p_sz=%d)\n",
- cr_ptr, cr_conn, cr_conn->cm_id, p_data, p_size);
-
- /* Obtain size of private data structure & contents */
- if (p_size > IB_MAX_REP_PDATA_SIZE) {
- dat_status = DAT_ERROR(DAT_LENGTH_ERROR, DAT_NO_SUBTYPE);
- goto bail;
- }
-
- if (ep_ptr->qp_state == DAPL_QP_STATE_UNATTACHED) {
- /*
- * If we are lazy attaching the QP then we may need to
- * hook it up here. Typically, we run this code only for
- * DAT_PSP_PROVIDER_FLAG
- */
- dat_status = dapls_ib_qp_alloc(ia_ptr, ep_ptr, NULL);
- if (dat_status != DAT_SUCCESS) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " dapl_cma_accept: qp_alloc ERR %d\n",
- dat_status);
- goto bail;
- }
- }
-
- /*
- * Validate device and port in EP cm_id against inbound
- * CR cm_id. The pre-allocated EP cm_id is already bound to
- * a local device (cm_id and QP) when created. Move the QP
- * to the new cm_id only if device and port numbers match.
- */
- if (ep_ptr->cm_handle->cm_id->verbs == cr_conn->cm_id->verbs &&
- ep_ptr->cm_handle->cm_id->port_num == cr_conn->cm_id->port_num) {
- /* move QP to new cr_conn, remove QP ref in EP cm_id */
- cr_conn->cm_id->qp = ep_ptr->cm_handle->cm_id->qp;
- ep_ptr->cm_handle->cm_id->qp = NULL;
- dapls_ib_cm_free(ep_ptr->cm_handle, NULL);
- } else {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " dapl_cma_accept: ERR dev(%p!=%p) or"
- " port mismatch(%d!=%d)\n",
- ep_ptr->cm_handle->cm_id->verbs, cr_conn->cm_id->verbs,
- ntohs(ep_ptr->cm_handle->cm_id->port_num),
- ntohs(cr_conn->cm_id->port_num));
- dat_status = DAT_INTERNAL_ERROR;
- goto bail;
- }
-
- cr_ptr->param.local_ep_handle = ep_handle;
- cr_conn->params.private_data = p_data;
- cr_conn->params.private_data_len = p_size;
-
- ret = rdma_accept(cr_conn->cm_id, &cr_conn->params);
- if (ret) {
- dapl_log(DAPL_DBG_TYPE_ERR, " dapl_cma_accept: ERR %d %s\n",
- ret, strerror(errno));
- dat_status = dapl_convert_errno(ret, "accept");
- goto bail;
- }
-
- /* save accepted conn and EP reference, qp_handle unchanged */
- ep_ptr->cm_handle = cr_conn;
- cr_conn->ep = ep_ptr;
-
- /* setup local and remote ports for ep query */
- /* Note: port qual in network order */
- ep_ptr->param.remote_port_qual =
- PORT_TO_SID(rdma_get_dst_port(cr_conn->cm_id));
- ep_ptr->param.local_port_qual =
- PORT_TO_SID(rdma_get_src_port(cr_conn->cm_id));
-
- return DAT_SUCCESS;
- bail:
- rdma_reject(cr_conn->cm_id, NULL, 0);
- dapls_ib_cm_free(cr_conn, NULL);
- return dat_status;
-}
-
-/*
- * dapls_ib_reject_connection
- *
- * Reject a connection
- *
- * Input:
- * cr_handle
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INTERNAL_ERROR
- *
- */
-DAT_RETURN
-dapls_ib_reject_connection(IN dp_ib_cm_handle_t cm_handle,
- IN int reason,
- IN DAT_COUNT private_data_size,
- IN const DAT_PVOID private_data)
-{
- int ret;
- int offset = sizeof(struct dapl_pdata_hdr);
- struct dapl_pdata_hdr pdata_hdr;
-
- memset(&pdata_hdr, 0, sizeof pdata_hdr);
- pdata_hdr.version = htonl((DAT_VERSION_MAJOR << 24) |
- (DAT_VERSION_MINOR << 16) |
- (VN_PROVIDER_MAJOR << 8) |
- (VN_PROVIDER_MINOR));
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " reject: handle %p reason %x, ver=%x, data %p, sz=%d\n",
- cm_handle, reason, ntohl(pdata_hdr.version),
- private_data, private_data_size);
-
- if (cm_handle == IB_INVALID_HANDLE) {
- dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- " reject: invalid handle: reason %d\n", reason);
- return DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_CR);
- }
-
- if (private_data_size >
- dapls_ib_private_data_size(NULL, DAPL_PDATA_CONN_REJ,
- cm_handle->hca))
- return DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG3);
-
- /* setup pdata_hdr and users data, in CR pdata buffer */
- dapl_os_memcpy(cm_handle->p_data, &pdata_hdr, offset);
- if (private_data_size)
- dapl_os_memcpy(cm_handle->p_data + offset,
- private_data, private_data_size);
-
- /*
- * Always some private data with reject so active peer can
- * determine real application reject from an abnormal
- * application termination
- */
- ret = rdma_reject(cm_handle->cm_id,
- cm_handle->p_data, offset + private_data_size);
-
- dapls_ib_cm_free(cm_handle, NULL);
- return dapl_convert_errno(ret, "reject");
-}
-
-/*
- * dapls_ib_cm_remote_addr
- *
- * Obtain the remote IP address given a connection
- *
- * Input:
- * cr_handle
- *
- * Output:
- * remote_ia_address: where to place the remote address
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INVALID_HANDLE
- *
- */
-DAT_RETURN
-dapls_ib_cm_remote_addr(IN DAT_HANDLE dat_handle, OUT DAT_SOCK_ADDR6 * raddr)
-{
- DAPL_HEADER *header;
- dp_ib_cm_handle_t ib_cm_handle;
- struct rdma_addr *ipaddr;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " remote_addr(cm_handle=%p, r_addr=%p)\n",
- dat_handle, raddr);
-
- header = (DAPL_HEADER *) dat_handle;
-
- if (header->magic == DAPL_MAGIC_EP)
- ib_cm_handle = ((DAPL_EP *) dat_handle)->cm_handle;
- else if (header->magic == DAPL_MAGIC_CR)
- ib_cm_handle = ((DAPL_CR *) dat_handle)->ib_cm_handle;
- else
- return DAT_INVALID_HANDLE;
-
- /* get remote IP address from cm_id route */
- ipaddr = &ib_cm_handle->cm_id->route.addr;
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " remote_addr: conn %p id %p SRC %x DST %x PORT %d\n",
- ib_cm_handle, ib_cm_handle->cm_id,
- ntohl(((struct sockaddr_in *)
- &ipaddr->src_addr)->sin_addr.s_addr),
- ntohl(((struct sockaddr_in *)
- &ipaddr->dst_addr)->sin_addr.s_addr),
- ntohs(((struct sockaddr_in *)
- &ipaddr->dst_addr)->sin_port));
-
- dapl_os_memcpy(raddr, &ipaddr->dst_addr, sizeof(DAT_SOCK_ADDR));
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_private_data_size
- *
- * Return the size of private data given a connection op type
- *
- * Input:
- * prd_ptr private data pointer
- * conn_op connection operation type
- * hca_ptr hca pointer, needed for transport type
- *
- * If prd_ptr is NULL, this is a query for the max size supported by
- * the provider, otherwise it is the actual size of the private data
- * contained in prd_ptr.
- *
- *
- * Output:
- * None
- *
- * Returns:
- * length of private data
- *
- */
-int dapls_ib_private_data_size(IN DAPL_PRIVATE * prd_ptr,
- IN DAPL_PDATA_OP conn_op, IN DAPL_HCA * hca_ptr)
-{
- int size;
-
- if (hca_ptr->ib_hca_handle->device->transport_type
- == IBV_TRANSPORT_IWARP)
- return (IWARP_MAX_PDATA_SIZE - sizeof(struct dapl_pdata_hdr));
-
- switch (conn_op) {
-
- case DAPL_PDATA_CONN_REQ:
- size = IB_MAX_REQ_PDATA_SIZE;
- break;
- case DAPL_PDATA_CONN_REP:
- size = IB_MAX_REP_PDATA_SIZE;
- break;
- case DAPL_PDATA_CONN_REJ:
- size = IB_MAX_REJ_PDATA_SIZE - sizeof(struct dapl_pdata_hdr);
- break;
- case DAPL_PDATA_CONN_DREQ:
- size = IB_MAX_DREQ_PDATA_SIZE;
- break;
- case DAPL_PDATA_CONN_DREP:
- size = IB_MAX_DREP_PDATA_SIZE;
- break;
- default:
- size = 0;
-
- } /* end case */
-
- return size;
-}
-
-/*
- * Map all CMA event codes to the DAT equivelent.
- */
-#define DAPL_IB_EVENT_CNT 13
-
-static struct ib_cm_event_map {
- const ib_cm_events_t ib_cm_event;
- DAT_EVENT_NUMBER dat_event_num;
-} ib_cm_event_map[DAPL_IB_EVENT_CNT] = {
- /* 00 */ {
- IB_CME_CONNECTED, DAT_CONNECTION_EVENT_ESTABLISHED},
- /* 01 */ {
- IB_CME_DISCONNECTED, DAT_CONNECTION_EVENT_DISCONNECTED},
- /* 02 */ {
- IB_CME_DISCONNECTED_ON_LINK_DOWN,
- DAT_CONNECTION_EVENT_DISCONNECTED},
- /* 03 */ {
- IB_CME_CONNECTION_REQUEST_PENDING, DAT_CONNECTION_REQUEST_EVENT},
- /* 04 */ {
- IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,
- DAT_CONNECTION_REQUEST_EVENT},
- /* 05 */ {
- IB_CME_CONNECTION_REQUEST_ACKED, DAT_CONNECTION_REQUEST_EVENT},
- /* 06 */ {
- IB_CME_DESTINATION_REJECT,
- DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
- /* 07 */ {
- IB_CME_DESTINATION_REJECT_PRIVATE_DATA,
- DAT_CONNECTION_EVENT_PEER_REJECTED},
- /* 08 */ {
- IB_CME_DESTINATION_UNREACHABLE, DAT_CONNECTION_EVENT_UNREACHABLE},
- /* 09 */ {
- IB_CME_TOO_MANY_CONNECTION_REQUESTS,
- DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
- /* 10 */ {
- IB_CME_LOCAL_FAILURE, DAT_CONNECTION_EVENT_BROKEN},
- /* 11 */ {
- IB_CME_BROKEN, DAT_CONNECTION_EVENT_BROKEN},
- /* 12 */ {
-IB_CME_TIMEOUT, DAT_CONNECTION_EVENT_TIMED_OUT},};
-
-/*
- * dapls_ib_get_cm_event
- *
- * Return a DAT connection event given a provider CM event.
- *
- * Input:
- * dat_event_num DAT event we need an equivelent CM event for
- *
- * Output:
- * none
- *
- * Returns:
- * ib_cm_event of translated DAPL value
- */
-DAT_EVENT_NUMBER
-dapls_ib_get_dat_event(IN const ib_cm_events_t ib_cm_event,
- IN DAT_BOOLEAN active)
-{
- DAT_EVENT_NUMBER dat_event_num;
- int i;
-
- active = active;
-
- dat_event_num = 0;
- for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
- if (ib_cm_event == ib_cm_event_map[i].ib_cm_event) {
- dat_event_num = ib_cm_event_map[i].dat_event_num;
- break;
- }
- }
- dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
- "dapls_ib_get_dat_event: event(%s) ib=0x%x dat=0x%x\n",
- active ? "active" : "passive", ib_cm_event, dat_event_num);
-
- return dat_event_num;
-}
-
-/*
- * dapls_ib_get_dat_event
- *
- * Return a DAT connection event given a provider CM event.
- *
- * Input:
- * ib_cm_event event provided to the dapl callback routine
- * active switch indicating active or passive connection
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_EVENT_NUMBER of translated provider value
- */
-ib_cm_events_t dapls_ib_get_cm_event(IN DAT_EVENT_NUMBER dat_event_num)
-{
- ib_cm_events_t ib_cm_event;
- int i;
-
- ib_cm_event = 0;
- for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
- if (dat_event_num == ib_cm_event_map[i].dat_event_num) {
- ib_cm_event = ib_cm_event_map[i].ib_cm_event;
- break;
- }
- }
- return ib_cm_event;
-}
-
-void dapli_cma_event_cb(void)
-{
- struct rdma_cm_event *event;
-
- /* process one CM event, fairness, non-blocking */
- if (!rdma_get_cm_event(g_cm_events, &event)) {
- struct dapl_cm_id *conn;
-
- /* set proper conn from cm_id context */
- if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST)
- conn = (struct dapl_cm_id *)event->listen_id->context;
- else
- conn = (struct dapl_cm_id *)event->id->context;
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " cm_event: EVENT=%d ID=%p LID=%p CTX=%p\n",
- event->event, event->id, event->listen_id, conn);
-
- /* cm_free is blocked waiting for ack */
- dapl_os_lock(&conn->lock);
- if (!conn->refs) {
- dapl_os_unlock(&conn->lock);
- rdma_ack_cm_event(event);
- return;
- }
- conn->refs++;
- dapl_os_unlock(&conn->lock);
-
- switch (event->event) {
- case RDMA_CM_EVENT_ADDR_RESOLVED:
- dapli_addr_resolve(conn);
- break;
-
- case RDMA_CM_EVENT_ROUTE_RESOLVED:
- dapli_route_resolve(conn);
- break;
-
- case RDMA_CM_EVENT_ADDR_ERROR:
- dapl_log(DAPL_DBG_TYPE_WARN,
- "dapl_cma_active: CM ADDR ERROR: ->"
- " DST %s retry (%d)..\n",
- inet_ntoa(((struct sockaddr_in *)
- &conn->r_addr)->sin_addr),
- conn->arp_retries);
-
- /* retry address resolution */
- if ((--conn->arp_retries) &&
- (event->status == -ETIMEDOUT)) {
- int ret;
- ret = rdma_resolve_addr(conn->cm_id, NULL,
- (struct sockaddr *)
- &conn->r_addr,
- conn->arp_timeout);
- if (!ret)
- break;
- else {
- dapl_dbg_log(DAPL_DBG_TYPE_WARN,
- " ERROR: rdma_resolve_addr = "
- "%d %s\n",
- ret, strerror(errno));
- }
- }
- /* retries exhausted or resolve_addr failed */
- dapl_log(DAPL_DBG_TYPE_ERR,
- "dapl_cma_active: ARP_ERR, retries(%d)"
- " exhausted -> DST %s,%d\n",
- IB_ARP_RETRY_COUNT,
- inet_ntoa(((struct sockaddr_in *)
- &conn->cm_id->route.addr.dst_addr)->
- sin_addr),
- ntohs(((struct sockaddr_in *)
- &conn->cm_id->route.addr.dst_addr)->
- sin_port));
-
- dapl_evd_connection_callback(conn,
- IB_CME_DESTINATION_UNREACHABLE,
- NULL, conn->ep);
- break;
-
- case RDMA_CM_EVENT_ROUTE_ERROR:
- dapl_log(DAPL_DBG_TYPE_WARN,
- "dapl_cma_active: CM ROUTE ERROR: ->"
- " DST %s retry (%d)..\n",
- inet_ntoa(((struct sockaddr_in *)
- &conn->r_addr)->sin_addr),
- conn->route_retries);
-
- /* retry route resolution */
- if ((--conn->route_retries) &&
- (event->status == -ETIMEDOUT))
- dapli_addr_resolve(conn);
- else {
- dapl_log(DAPL_DBG_TYPE_ERR,
- "dapl_cma_active: PATH_RECORD_ERR,"
- " retries(%d) exhausted, DST %s,%d\n",
- IB_ROUTE_RETRY_COUNT,
- inet_ntoa(((struct sockaddr_in *)
- &conn->cm_id->route.addr.
- dst_addr)->sin_addr),
- ntohs(((struct sockaddr_in *)
- &conn->cm_id->route.addr.
- dst_addr)->sin_port));
-
- dapl_evd_connection_callback(conn,
- IB_CME_DESTINATION_UNREACHABLE,
- NULL, conn->ep);
- }
- break;
-
- case RDMA_CM_EVENT_DEVICE_REMOVAL:
- dapl_evd_connection_callback(conn,
- IB_CME_LOCAL_FAILURE,
- NULL, conn->ep);
- break;
- case RDMA_CM_EVENT_CONNECT_REQUEST:
- case RDMA_CM_EVENT_CONNECT_ERROR:
- case RDMA_CM_EVENT_UNREACHABLE:
- case RDMA_CM_EVENT_REJECTED:
- case RDMA_CM_EVENT_ESTABLISHED:
- case RDMA_CM_EVENT_DISCONNECTED:
- /* passive or active */
- if (conn->sp)
- dapli_cm_passive_cb(conn, event);
- else
- dapli_cm_active_cb(conn, event);
- break;
- case RDMA_CM_EVENT_CONNECT_RESPONSE:
-#ifdef RDMA_CM_EVENT_TIMEWAIT_EXIT
- case RDMA_CM_EVENT_TIMEWAIT_EXIT:
-#endif
- break;
- default:
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " cm_event: UNEXPECTED EVENT=%p ID=%p CTX=%p\n",
- event->event, event->id,
- event->id->context);
- break;
- }
-
- /* ack event, unblocks destroy_cm_id in consumer threads */
- rdma_ack_cm_event(event);
-
- dapl_os_lock(&conn->lock);
- conn->refs--;
- dapl_os_unlock(&conn->lock);
- }
-}
-
-/*
- * Local variables:
- * c-indent-level: 4
- * c-basic-offset: 4
- * tab-width: 8
- * End:
- */
+/*\r
+ * Copyright (c) 2005 Voltaire Inc. All rights reserved.\r
+ * Copyright (c) 2005-2007 Intel Corporation. All rights reserved.\r
+ * Copyright (c) 2004-2005, Mellanox Technologies, Inc. All rights reserved. \r
+ * Copyright (c) 2003 Topspin Corporation. All rights reserved. \r
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.\r
+ *\r
+ * This Software is licensed under one of the following licenses:\r
+ *\r
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is\r
+ * available from the Open Source Initiative, see\r
+ * http://www.opensource.org/licenses/cpl.php.\r
+ *\r
+ * 2) under the terms of the "The BSD License" a copy of which is\r
+ * available from the Open Source Initiative, see\r
+ * http://www.opensource.org/licenses/bsd-license.php.\r
+ *\r
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a\r
+ * copy of which is available from the Open Source Initiative, see\r
+ * http://www.opensource.org/licenses/gpl-license.php.\r
+ *\r
+ * Licensee has the right to choose one of the above licenses.\r
+ *\r
+ * Redistributions of source code must retain the above copyright\r
+ * notice and one of the license notices.\r
+ *\r
+ * Redistributions in binary form must reproduce both the above copyright\r
+ * notice, one of the license notices in the documentation\r
+ * and/or other materials provided with the distribution.\r
+ */\r
+\r
+/**********************************************************************\r
+ *\r
+ * MODULE: dapl_ib_cm.c\r
+ *\r
+ * PURPOSE: The OFED provider - uCMA, name and route resolution\r
+ *\r
+ * $Id: $\r
+ *\r
+ **********************************************************************/\r
+\r
+#include "dapl.h"\r
+#include "dapl_adapter_util.h"\r
+#include "dapl_evd_util.h"\r
+#include "dapl_cr_util.h"\r
+#include "dapl_name_service.h"\r
+#include "dapl_ib_util.h"\r
+#include "dapl_vendor.h"\r
+#include "dapl_osd.h"\r
+\r
+extern struct rdma_event_channel *g_cm_events;\r
+\r
+/* local prototypes */\r
+static struct dapl_cm_id *dapli_req_recv(struct dapl_cm_id *conn,\r
+ struct rdma_cm_event *event);\r
+static void dapli_cm_active_cb(struct dapl_cm_id *conn,\r
+ struct rdma_cm_event *event);\r
+static void dapli_cm_passive_cb(struct dapl_cm_id *conn,\r
+ struct rdma_cm_event *event);\r
+static void dapli_addr_resolve(struct dapl_cm_id *conn);\r
+static void dapli_route_resolve(struct dapl_cm_id *conn);\r
+\r
+/* cma requires 16 bit SID, in network order */\r
+#define IB_PORT_MOD 32001\r
+#define IB_PORT_BASE (65535 - IB_PORT_MOD)\r
+#define SID_TO_PORT(SID) \\r
+ (SID > 0xffff ? \\r
+ htons((unsigned short)((SID % IB_PORT_MOD) + IB_PORT_BASE)) :\\r
+ htons((unsigned short)SID))\r
+\r
+#define PORT_TO_SID(p) ntohs(p)\r
+\r
+/* private data header to validate consumer rejects versus abnormal events */\r
+struct dapl_pdata_hdr {\r
+ DAT_UINT32 version;\r
+};\r
+\r
+static void dapli_addr_resolve(struct dapl_cm_id *conn)\r
+{\r
+ int ret;\r
+#ifdef DAPL_DBG\r
+ struct rdma_addr *ipaddr = &conn->cm_id->route.addr;\r
+#endif\r
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+ " addr_resolve: cm_id %p SRC %x DST %x\n",\r
+ conn->cm_id, ntohl(((struct sockaddr_in *)\r
+ &ipaddr->src_addr)->sin_addr.s_addr),\r
+ ntohl(((struct sockaddr_in *)\r
+ &ipaddr->dst_addr)->sin_addr.s_addr));\r
+\r
+ ret = rdma_resolve_route(conn->cm_id, conn->route_timeout);\r
+ if (ret) {\r
+ dapl_log(DAPL_DBG_TYPE_ERR,\r
+ " dapl_cma_connect: rdma_resolve_route ERR 0x%x %s\n",\r
+ ret, strerror(errno));\r
+ dapl_evd_connection_callback(conn,\r
+ IB_CME_LOCAL_FAILURE,\r
+ NULL, conn->ep);\r
+ }\r
+}\r
+\r
+static void dapli_route_resolve(struct dapl_cm_id *conn)\r
+{\r
+ int ret;\r
+#ifdef DAPL_DBG\r
+ struct rdma_addr *ipaddr = &conn->cm_id->route.addr;\r
+ struct ib_addr *ibaddr = &conn->cm_id->route.addr.addr.ibaddr;\r
+#endif\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+ " route_resolve: cm_id %p SRC %x DST %x PORT %d\n",\r
+ conn->cm_id, ntohl(((struct sockaddr_in *)\r
+ &ipaddr->src_addr)->sin_addr.s_addr),\r
+ ntohl(((struct sockaddr_in *)\r
+ &ipaddr->dst_addr)->sin_addr.s_addr),\r
+ ntohs(((struct sockaddr_in *)\r
+ &ipaddr->dst_addr)->sin_port));\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+ " route_resolve: SRC GID subnet %016llx id %016llx\n",\r
+ (unsigned long long)\r
+ ntohll(ibaddr->sgid.global.subnet_prefix),\r
+ (unsigned long long)\r
+ ntohll(ibaddr->sgid.global.interface_id));\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+ " route_resolve: DST GID subnet %016llx id %016llx\n",\r
+ (unsigned long long)\r
+ ntohll(ibaddr->dgid.global.subnet_prefix),\r
+ (unsigned long long)\r
+ ntohll(ibaddr->dgid.global.interface_id));\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+ " route_resolve: cm_id %p pdata %p plen %d rr %d ind %d\n",\r
+ conn->cm_id,\r
+ conn->params.private_data,\r
+ conn->params.private_data_len,\r
+ conn->params.responder_resources,\r
+ conn->params.initiator_depth);\r
+\r
+ ret = rdma_connect(conn->cm_id, &conn->params);\r
+ if (ret) {\r
+ dapl_log(DAPL_DBG_TYPE_ERR,\r
+ " dapl_cma_connect: rdma_connect ERR %d %s\n",\r
+ ret, strerror(errno));\r
+ goto bail;\r
+ }\r
+ return;\r
+\r
+ bail:\r
+ dapl_evd_connection_callback(conn,\r
+ IB_CME_LOCAL_FAILURE, NULL, conn->ep);\r
+}\r
+\r
+dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep)\r
+{\r
+ dp_ib_cm_handle_t conn;\r
+ struct rdma_cm_id *cm_id;\r
+\r
+ /* Allocate CM and initialize lock */\r
+ if ((conn = dapl_os_alloc(sizeof(*conn))) == NULL)\r
+ return NULL;\r
+\r
+ dapl_os_memzero(conn, sizeof(*conn));\r
+ dapl_os_lock_init(&conn->lock);\r
+ conn->refs++;\r
+\r
+ /* create CM_ID, bind to local device, create QP */\r
+ if (rdma_create_id(g_cm_events, &cm_id, (void *)conn, RDMA_PS_TCP)) {\r
+ dapl_os_free(conn, sizeof(*conn));\r
+ return NULL;\r
+ }\r
+ conn->cm_id = cm_id;\r
+\r
+ /* setup timers for address and route resolution */\r
+ conn->arp_timeout = dapl_os_get_env_val("DAPL_CM_ARP_TIMEOUT_MS",\r
+ IB_ARP_TIMEOUT);\r
+ conn->arp_retries = dapl_os_get_env_val("DAPL_CM_ARP_RETRY_COUNT",\r
+ IB_ARP_RETRY_COUNT);\r
+ conn->route_timeout = dapl_os_get_env_val("DAPL_CM_ROUTE_TIMEOUT_MS",\r
+ IB_ROUTE_TIMEOUT);\r
+ conn->route_retries = dapl_os_get_env_val("DAPL_CM_ROUTE_RETRY_COUNT",\r
+ IB_ROUTE_RETRY_COUNT);\r
+ if (ep != NULL) {\r
+ conn->ep = ep;\r
+ conn->hca = ((DAPL_IA *)ep->param.ia_handle)->hca_ptr;\r
+ }\r
+\r
+ return conn;\r
+}\r
+\r
+/* \r
+ * Only called from consumer thread via dat_ep_free()\r
+ * accept, reject, or connect.\r
+ * Cannot be called from callback thread.\r
+ * rdma_destroy_id will block until rdma_get_cm_event is acked.\r
+ */\r
+void dapls_ib_cm_free(dp_ib_cm_handle_t conn, DAPL_EP *ep)\r
+{\r
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+ " destroy_conn: conn %p id %d\n", \r
+ conn, conn->cm_id);\r
+\r
+ dapl_os_lock(&conn->lock);\r
+ conn->refs--;\r
+ dapl_os_unlock(&conn->lock);\r
+\r
+ /* block until event thread complete */\r
+ while (conn->refs) \r
+ dapl_os_sleep_usec(10000);\r
+ \r
+ if (ep) {\r
+ ep->cm_handle = NULL;\r
+ ep->qp_handle = NULL;\r
+ ep->qp_state = IB_QP_STATE_ERROR;\r
+ }\r
+\r
+ if (conn->cm_id) {\r
+ if (conn->cm_id->qp)\r
+ rdma_destroy_qp(conn->cm_id);\r
+ rdma_destroy_id(conn->cm_id);\r
+ }\r
+\r
+ dapl_os_free(conn, sizeof(*conn));\r
+}\r
+\r
+static struct dapl_cm_id *dapli_req_recv(struct dapl_cm_id *conn,\r
+ struct rdma_cm_event *event)\r
+{\r
+ struct dapl_cm_id *new_conn;\r
+#ifdef DAPL_DBG\r
+ struct rdma_addr *ipaddr = &event->id->route.addr;\r
+#endif\r
+\r
+ if (conn->sp == NULL) {\r
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,\r
+ " dapli_rep_recv: on invalid listen " "handle\n");\r
+ return NULL;\r
+ }\r
+\r
+ /* allocate new cm_id and merge listen parameters */\r
+ new_conn = dapl_os_alloc(sizeof(*new_conn));\r
+ if (new_conn) {\r
+ (void)dapl_os_memzero(new_conn, sizeof(*new_conn));\r
+ dapl_os_lock_init(&new_conn->lock);\r
+ new_conn->cm_id = event->id; /* provided by uCMA */\r
+ event->id->context = new_conn; /* update CM_ID context */\r
+ new_conn->sp = conn->sp;\r
+ new_conn->hca = conn->hca;\r
+ new_conn->refs++;\r
+\r
+ /* Get requesters connect data, setup for accept */\r
+ new_conn->params.responder_resources =\r
+ DAPL_MIN(event->param.conn.responder_resources,\r
+ conn->hca->ib_trans.rd_atom_in);\r
+ new_conn->params.initiator_depth =\r
+ DAPL_MIN(event->param.conn.initiator_depth,\r
+ conn->hca->ib_trans.rd_atom_out);\r
+\r
+ new_conn->params.flow_control = event->param.conn.flow_control;\r
+ new_conn->params.rnr_retry_count =\r
+ event->param.conn.rnr_retry_count;\r
+ new_conn->params.retry_count = event->param.conn.retry_count;\r
+\r
+ /* save private data */\r
+ if (event->param.conn.private_data_len) {\r
+ dapl_os_memcpy(new_conn->p_data,\r
+ event->param.conn.private_data,\r
+ event->param.conn.private_data_len);\r
+ new_conn->params.private_data = new_conn->p_data;\r
+ new_conn->params.private_data_len =\r
+ event->param.conn.private_data_len;\r
+ }\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_CM, " passive_cb: "\r
+ "REQ: SP %p PORT %d LID %d "\r
+ "NEW CONN %p ID %p pdata %p,%d\n",\r
+ new_conn->sp, ntohs(((struct sockaddr_in *)\r
+ &ipaddr->src_addr)->sin_port),\r
+ event->listen_id, new_conn, event->id,\r
+ event->param.conn.private_data,\r
+ event->param.conn.private_data_len);\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_CM, " passive_cb: "\r
+ "REQ: IP SRC %x PORT %d DST %x PORT %d "\r
+ "rr %d init %d\n", ntohl(((struct sockaddr_in *)\r
+ &ipaddr->src_addr)->\r
+ sin_addr.s_addr),\r
+ ntohs(((struct sockaddr_in *)\r
+ &ipaddr->src_addr)->sin_port),\r
+ ntohl(((struct sockaddr_in *)\r
+ &ipaddr->dst_addr)->sin_addr.s_addr),\r
+ ntohs(((struct sockaddr_in *)\r
+ &ipaddr->dst_addr)->sin_port),\r
+ new_conn->params.responder_resources,\r
+ new_conn->params.initiator_depth);\r
+ }\r
+ return new_conn;\r
+}\r
+\r
+static void dapli_cm_active_cb(struct dapl_cm_id *conn,\r
+ struct rdma_cm_event *event)\r
+{\r
+ DAPL_OS_LOCK *lock = &conn->lock;\r
+ ib_cm_events_t ib_cm_event;\r
+ const void *pdata = NULL;\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+ " active_cb: conn %p id %d event %d\n",\r
+ conn, conn->cm_id, event->event);\r
+\r
+ /* There is a chance that we can get events after\r
+ * the consumer calls disconnect in a pending state\r
+ * since the IB CM and uDAPL states are not shared.\r
+ * In some cases, IB CM could generate either a DCONN\r
+ * or CONN_ERR after the consumer returned from\r
+ * dapl_ep_disconnect with a DISCONNECTED event\r
+ * already queued. Check state here and bail to\r
+ * avoid any events after a disconnect.\r
+ */\r
+ if (DAPL_BAD_HANDLE(conn->ep, DAPL_MAGIC_EP))\r
+ return;\r
+\r
+ dapl_os_lock(&conn->ep->header.lock);\r
+ if (conn->ep->param.ep_state == DAT_EP_STATE_DISCONNECTED) {\r
+ dapl_os_unlock(&conn->ep->header.lock);\r
+ return;\r
+ }\r
+ if (event->event == RDMA_CM_EVENT_DISCONNECTED)\r
+ conn->ep->param.ep_state = DAT_EP_STATE_DISCONNECTED;\r
+\r
+ dapl_os_unlock(&conn->ep->header.lock);\r
+ dapl_os_lock(lock);\r
+\r
+ switch (event->event) {\r
+ case RDMA_CM_EVENT_UNREACHABLE:\r
+ case RDMA_CM_EVENT_CONNECT_ERROR:\r
+ dapl_log(DAPL_DBG_TYPE_WARN,\r
+ "dapl_cma_active: CONN_ERR event=0x%x"\r
+ " status=%d %s DST %s, %d\n",\r
+ event->event, event->status,\r
+ (event->status == -ETIMEDOUT) ? "TIMEOUT" : "",\r
+ inet_ntoa(((struct sockaddr_in *)\r
+ &conn->cm_id->route.addr.dst_addr)->\r
+ sin_addr),\r
+ ntohs(((struct sockaddr_in *)\r
+ &conn->cm_id->route.addr.dst_addr)->\r
+ sin_port));\r
+\r
+ /* per DAT SPEC provider always returns UNREACHABLE */\r
+ ib_cm_event = IB_CME_DESTINATION_UNREACHABLE;\r
+ break;\r
+ case RDMA_CM_EVENT_REJECTED:\r
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+ " dapli_cm_active_handler: REJECTED reason=%d\n",\r
+ event->status);\r
+\r
+ /* valid REJ from consumer will always contain private data */\r
+ if (event->status == 28 &&\r
+ event->param.conn.private_data_len) {\r
+ ib_cm_event = IB_CME_DESTINATION_REJECT_PRIVATE_DATA;\r
+ pdata =\r
+ (unsigned char *)event->param.conn.\r
+ private_data +\r
+ sizeof(struct dapl_pdata_hdr);\r
+ } else {\r
+ ib_cm_event = IB_CME_DESTINATION_REJECT;\r
+ dapl_log(DAPL_DBG_TYPE_WARN,\r
+ "dapl_cma_active: non-consumer REJ,"\r
+ " reason=%d, DST %s, %d\n",\r
+ event->status,\r
+ inet_ntoa(((struct sockaddr_in *)\r
+ &conn->cm_id->route.addr.\r
+ dst_addr)->sin_addr),\r
+ ntohs(((struct sockaddr_in *)\r
+ &conn->cm_id->route.addr.\r
+ dst_addr)->sin_port));\r
+ }\r
+ break;\r
+ case RDMA_CM_EVENT_ESTABLISHED:\r
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+ " active_cb: cm_id %d PORT %d CONNECTED to %s!\n",\r
+ conn->cm_id, ntohs(((struct sockaddr_in *)\r
+ &conn->cm_id->route.addr.\r
+ dst_addr)->sin_port),\r
+ inet_ntoa(((struct sockaddr_in *)\r
+ &conn->cm_id->route.addr.dst_addr)->\r
+ sin_addr));\r
+\r
+ /* setup local and remote ports for ep query */\r
+ conn->ep->param.remote_port_qual =\r
+ PORT_TO_SID(rdma_get_dst_port(conn->cm_id));\r
+ conn->ep->param.local_port_qual =\r
+ PORT_TO_SID(rdma_get_src_port(conn->cm_id));\r
+\r
+ ib_cm_event = IB_CME_CONNECTED;\r
+ pdata = event->param.conn.private_data;\r
+ break;\r
+ case RDMA_CM_EVENT_DISCONNECTED:\r
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+ " active_cb: DISC EVENT - EP %p\n",conn->ep);\r
+ rdma_disconnect(conn->cm_id); /* required for DREP */\r
+ ib_cm_event = IB_CME_DISCONNECTED;\r
+ /* validate EP handle */\r
+ if (DAPL_BAD_HANDLE(conn->ep, DAPL_MAGIC_EP))\r
+ conn = NULL;\r
+ break;\r
+ default:\r
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,\r
+ " dapli_cm_active_cb_handler: Unexpected CM "\r
+ "event %d on ID 0x%p\n", event->event,\r
+ conn->cm_id);\r
+ conn = NULL;\r
+ break;\r
+ }\r
+\r
+ dapl_os_unlock(lock);\r
+ if (conn)\r
+ dapl_evd_connection_callback(conn, ib_cm_event, pdata, conn->ep);\r
+}\r
+\r
+static void dapli_cm_passive_cb(struct dapl_cm_id *conn,\r
+ struct rdma_cm_event *event)\r
+{\r
+ ib_cm_events_t ib_cm_event;\r
+ struct dapl_cm_id *conn_recv = conn;\r
+ const void *pdata = NULL;\r
+ \r
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+ " passive_cb: conn %p id %d event %d\n",\r
+ conn, event->id, event->event);\r
+\r
+ dapl_os_lock(&conn->lock);\r
+\r
+ switch (event->event) {\r
+ case RDMA_CM_EVENT_CONNECT_REQUEST:\r
+ /* create new conn object with new conn_id from event */\r
+ conn_recv = dapli_req_recv(conn, event);\r
+ ib_cm_event = IB_CME_CONNECTION_REQUEST_PENDING;\r
+ pdata = event->param.conn.private_data;\r
+ break;\r
+ case RDMA_CM_EVENT_UNREACHABLE:\r
+ case RDMA_CM_EVENT_CONNECT_ERROR:\r
+ dapl_log(DAPL_DBG_TYPE_WARN,\r
+ "dapl_cm_passive: CONN_ERR event=0x%x status=%d %s,"\r
+ " DST %s,%d\n",\r
+ event->event, event->status,\r
+ (event->status == -ETIMEDOUT) ? "TIMEOUT" : "",\r
+ inet_ntoa(((struct sockaddr_in *)\r
+ &conn->cm_id->route.addr.dst_addr)->\r
+ sin_addr), ntohs(((struct sockaddr_in *)\r
+ &conn->cm_id->route.addr.\r
+ dst_addr)->sin_port));\r
+ ib_cm_event = IB_CME_DESTINATION_UNREACHABLE;\r
+ break;\r
+ case RDMA_CM_EVENT_REJECTED:\r
+ /* will alwasys be abnormal NON-consumer from active side */\r
+ dapl_log(DAPL_DBG_TYPE_WARN,\r
+ "dapl_cm_passive: non-consumer REJ, reason=%d,"\r
+ " DST %s, %d\n",\r
+ event->status,\r
+ inet_ntoa(((struct sockaddr_in *)\r
+ &conn->cm_id->route.addr.dst_addr)->\r
+ sin_addr),\r
+ ntohs(((struct sockaddr_in *)\r
+ &conn->cm_id->route.addr.dst_addr)->\r
+ sin_port));\r
+ ib_cm_event = IB_CME_DESTINATION_REJECT;\r
+ break;\r
+ case RDMA_CM_EVENT_ESTABLISHED:\r
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+ " passive_cb: cm_id %p PORT %d CONNECTED from 0x%x!\n",\r
+ conn->cm_id, ntohs(((struct sockaddr_in *)\r
+ &conn->cm_id->route.addr.\r
+ src_addr)->sin_port),\r
+ ntohl(((struct sockaddr_in *)\r
+ &conn->cm_id->route.addr.dst_addr)->\r
+ sin_addr.s_addr));\r
+ ib_cm_event = IB_CME_CONNECTED;\r
+ break;\r
+ case RDMA_CM_EVENT_DISCONNECTED:\r
+ rdma_disconnect(conn->cm_id); /* required for DREP */\r
+ ib_cm_event = IB_CME_DISCONNECTED;\r
+ /* validate SP handle context */\r
+ if (DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_PSP) &&\r
+ DAPL_BAD_HANDLE(conn->sp, DAPL_MAGIC_RSP))\r
+ conn_recv = NULL;\r
+ break;\r
+ default:\r
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR, " passive_cb: "\r
+ "Unexpected CM event %d on ID 0x%p\n",\r
+ event->event, conn->cm_id);\r
+ conn_recv = NULL;\r
+ break;\r
+ }\r
+\r
+ dapl_os_unlock(&conn->lock);\r
+ if (conn_recv)\r
+ dapls_cr_callback(conn_recv, ib_cm_event, pdata, conn_recv->sp);\r
+}\r
+\r
+/************************ DAPL provider entry points **********************/\r
+\r
+/*\r
+ * dapls_ib_connect\r
+ *\r
+ * Initiate a connection with the passive listener on another node\r
+ *\r
+ * Input:\r
+ * ep_handle,\r
+ * remote_ia_address,\r
+ * remote_conn_qual,\r
+ * prd_size size of private data and structure\r
+ * prd_prt pointer to private data structure\r
+ *\r
+ * Output:\r
+ * none\r
+ *\r
+ * Returns:\r
+ * DAT_SUCCESS\r
+ * DAT_INSUFFICIENT_RESOURCES\r
+ * DAT_INVALID_PARAMETER\r
+ *\r
+ */\r
+DAT_RETURN dapls_ib_connect(IN DAT_EP_HANDLE ep_handle,\r
+ IN DAT_IA_ADDRESS_PTR r_addr,\r
+ IN DAT_CONN_QUAL r_qual,\r
+ IN DAT_COUNT p_size, IN void *p_data)\r
+{\r
+ struct dapl_ep *ep_ptr = ep_handle;\r
+ struct dapl_cm_id *conn = ep_ptr->cm_handle;\r
+ int ret;\r
+\r
+ /* Sanity check */\r
+ if (NULL == ep_ptr)\r
+ return DAT_SUCCESS;\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+ " connect: rSID 0x%llx rPort %d, pdata %p, ln %d\n",\r
+ r_qual, ntohs(SID_TO_PORT(r_qual)), p_data, p_size);\r
+\r
+ /* rdma conn and cm_id pre-bound; reference via ep_ptr->cm_handle */\r
+\r
+ /* Setup QP/CM parameters and private data in cm_id */\r
+ (void)dapl_os_memzero(&conn->params, sizeof(conn->params));\r
+ conn->params.responder_resources =\r
+ ep_ptr->param.ep_attr.max_rdma_read_in;\r
+ conn->params.initiator_depth = ep_ptr->param.ep_attr.max_rdma_read_out;\r
+ conn->params.flow_control = 1;\r
+ conn->params.rnr_retry_count = IB_RNR_RETRY_COUNT;\r
+ conn->params.retry_count = IB_RC_RETRY_COUNT;\r
+ if (p_size) {\r
+ dapl_os_memcpy(conn->p_data, p_data, p_size);\r
+ conn->params.private_data = conn->p_data;\r
+ conn->params.private_data_len = p_size;\r
+ }\r
+\r
+ /* copy in remote address, need a copy for retry attempts */\r
+ dapl_os_memcpy(&conn->r_addr, r_addr, sizeof(*r_addr));\r
+\r
+ /* Resolve remote address, src already bound during QP create */\r
+ ((struct sockaddr_in *)&conn->r_addr)->sin_port = SID_TO_PORT(r_qual);\r
+ ((struct sockaddr_in *)&conn->r_addr)->sin_family = AF_INET;\r
+\r
+ ret = rdma_resolve_addr(conn->cm_id, NULL,\r
+ (struct sockaddr *)&conn->r_addr,\r
+ conn->arp_timeout);\r
+ if (ret) {\r
+ dapl_log(DAPL_DBG_TYPE_ERR,\r
+ " dapl_cma_connect: rdma_resolve_addr ERR 0x%x %s\n",\r
+ ret, strerror(errno));\r
+ return dapl_convert_errno(errno, "ib_connect");\r
+ }\r
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+ " connect: resolve_addr: cm_id %p -> %s port %d\n",\r
+ conn->cm_id,\r
+ inet_ntoa(((struct sockaddr_in *)&conn->r_addr)->sin_addr),\r
+ ((struct sockaddr_in *)&conn->r_addr)->sin_port);\r
+\r
+ return DAT_SUCCESS;\r
+}\r
+\r
+/*\r
+ * dapls_ib_disconnect\r
+ *\r
+ * Disconnect an EP\r
+ *\r
+ * Input:\r
+ * ep_handle,\r
+ * disconnect_flags\r
+ *\r
+ * Output:\r
+ * none\r
+ *\r
+ * Returns:\r
+ * DAT_SUCCESS\r
+ *\r
+ */\r
+DAT_RETURN\r
+dapls_ib_disconnect(IN DAPL_EP * ep_ptr, IN DAT_CLOSE_FLAGS close_flags)\r
+{\r
+ dp_ib_cm_handle_t conn = ep_ptr->cm_handle;\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+ " disconnect(ep %p, conn %p, id %d flags %x)\n",\r
+ ep_ptr, conn, (conn ? conn->cm_id : 0), close_flags);\r
+\r
+ if ((conn == IB_INVALID_HANDLE) || (conn->cm_id == NULL))\r
+ return DAT_SUCCESS;\r
+\r
+ /* no graceful half-pipe disconnect option */\r
+ rdma_disconnect(conn->cm_id);\r
+\r
+ /* \r
+ * DAT event notification occurs from the callback\r
+ * Note: will fire even if DREQ goes unanswered on timeout \r
+ */\r
+ return DAT_SUCCESS;\r
+}\r
+\r
+/*\r
+ * dapls_ib_disconnect_clean\r
+ *\r
+ * Clean up outstanding connection data. This routine is invoked\r
+ * after the final disconnect callback has occurred. Only on the\r
+ * ACTIVE side of a connection.\r
+ *\r
+ * Input:\r
+ * ep_ptr DAPL_EP\r
+ * active Indicates active side of connection\r
+ *\r
+ * Output:\r
+ * none\r
+ *\r
+ * Returns:\r
+ * void\r
+ *\r
+ */\r
+void\r
+dapls_ib_disconnect_clean(IN DAPL_EP * ep_ptr,\r
+ IN DAT_BOOLEAN active,\r
+ IN const ib_cm_events_t ib_cm_event)\r
+{\r
+ /* nothing to do */\r
+ return;\r
+}\r
+\r
+/*\r
+ * dapl_ib_setup_conn_listener\r
+ *\r
+ * Have the CM set up a connection listener.\r
+ *\r
+ * Input:\r
+ * ibm_hca_handle HCA handle\r
+ * qp_handle QP handle\r
+ *\r
+ * Output:\r
+ * none\r
+ *\r
+ * Returns:\r
+ * DAT_SUCCESS\r
+ * DAT_INSUFFICIENT_RESOURCES\r
+ * DAT_INTERNAL_ERROR\r
+ * DAT_CONN_QUAL_UNAVAILBLE\r
+ * DAT_CONN_QUAL_IN_USE\r
+ *\r
+ */\r
+DAT_RETURN\r
+dapls_ib_setup_conn_listener(IN DAPL_IA * ia_ptr,\r
+ IN DAT_UINT64 ServiceID, IN DAPL_SP * sp_ptr)\r
+{\r
+ DAT_RETURN dat_status = DAT_SUCCESS;\r
+ ib_cm_srvc_handle_t conn;\r
+ DAT_SOCK_ADDR6 addr; /* local binding address */\r
+\r
+ /* Allocate CM and initialize lock */\r
+ if ((conn = dapl_os_alloc(sizeof(*conn))) == NULL)\r
+ return DAT_INSUFFICIENT_RESOURCES;\r
+\r
+ dapl_os_memzero(conn, sizeof(*conn));\r
+ dapl_os_lock_init(&conn->lock);\r
+ conn->refs++;\r
+\r
+ /* create CM_ID, bind to local device, create QP */\r
+ if (rdma_create_id\r
+ (g_cm_events, &conn->cm_id, (void *)conn, RDMA_PS_TCP)) {\r
+ dapl_os_free(conn, sizeof(*conn));\r
+ return (dapl_convert_errno(errno, "setup_listener"));\r
+ }\r
+\r
+ /* open identifies the local device; per DAT specification */\r
+ /* Get family and address then set port to consumer's ServiceID */\r
+ dapl_os_memcpy(&addr, &ia_ptr->hca_ptr->hca_address, sizeof(addr));\r
+ ((struct sockaddr_in *)&addr)->sin_port = SID_TO_PORT(ServiceID);\r
+\r
+ if (rdma_bind_addr(conn->cm_id, (struct sockaddr *)&addr)) {\r
+ if ((errno == EBUSY) || (errno == EADDRINUSE) || \r
+ (errno == EADDRNOTAVAIL))\r
+ dat_status = DAT_CONN_QUAL_IN_USE;\r
+ else\r
+ dat_status =\r
+ dapl_convert_errno(errno, "setup_listener");\r
+ goto bail;\r
+ }\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+ " listen(ia_ptr %p SID 0x%llx Port %d sp %p conn %p id %d)\n",\r
+ ia_ptr, ServiceID, ntohs(SID_TO_PORT(ServiceID)),\r
+ sp_ptr, conn, conn->cm_id);\r
+\r
+ sp_ptr->cm_srvc_handle = conn;\r
+ conn->sp = sp_ptr;\r
+ conn->hca = ia_ptr->hca_ptr;\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,\r
+ " listen(conn=%p cm_id=%d)\n",\r
+ sp_ptr->cm_srvc_handle, conn->cm_id);\r
+\r
+ if (rdma_listen(conn->cm_id, 0)) { /* max cma backlog */\r
+\r
+ if ((errno == EBUSY) || (errno == EADDRINUSE) ||\r
+ (errno == EADDRNOTAVAIL))\r
+ dat_status = DAT_CONN_QUAL_IN_USE;\r
+ else\r
+ dat_status =\r
+ dapl_convert_errno(errno, "setup_listener");\r
+ goto bail;\r
+ }\r
+\r
+ /* success */\r
+ return DAT_SUCCESS;\r
+\r
+ bail:\r
+ rdma_destroy_id(conn->cm_id);\r
+ dapl_os_free(conn, sizeof(*conn));\r
+ return dat_status;\r
+}\r
+\r
+/*\r
+ * dapl_ib_remove_conn_listener\r
+ *\r
+ * Have the CM remove a connection listener.\r
+ *\r
+ * Input:\r
+ * ia_handle IA handle\r
+ * ServiceID IB Channel Service ID\r
+ *\r
+ * Output:\r
+ * none\r
+ *\r
+ * Returns:\r
+ * DAT_SUCCESS\r
+ * DAT_INVALID_STATE\r
+ *\r
+ */\r
+DAT_RETURN\r
+dapls_ib_remove_conn_listener(IN DAPL_IA * ia_ptr, IN DAPL_SP * sp_ptr)\r
+{\r
+ ib_cm_srvc_handle_t conn = sp_ptr->cm_srvc_handle;\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+ " remove_listen(ia_ptr %p sp_ptr %p cm_ptr %p)\n",\r
+ ia_ptr, sp_ptr, conn);\r
+\r
+ if (conn != IB_INVALID_HANDLE) {\r
+ sp_ptr->cm_srvc_handle = NULL;\r
+ dapls_ib_cm_free(conn, NULL);\r
+ }\r
+ return DAT_SUCCESS;\r
+}\r
+\r
+/*\r
+ * dapls_ib_accept_connection\r
+ *\r
+ * Perform necessary steps to accept a connection\r
+ *\r
+ * Input:\r
+ * cr_handle\r
+ * ep_handle\r
+ * private_data_size\r
+ * private_data\r
+ *\r
+ * Output:\r
+ * none\r
+ *\r
+ * Returns:\r
+ * DAT_SUCCESS\r
+ * DAT_INSUFFICIENT_RESOURCES\r
+ * DAT_INTERNAL_ERROR\r
+ *\r
+ */\r
+DAT_RETURN\r
+dapls_ib_accept_connection(IN DAT_CR_HANDLE cr_handle,\r
+ IN DAT_EP_HANDLE ep_handle,\r
+ IN DAT_COUNT p_size, IN const DAT_PVOID p_data)\r
+{\r
+ DAPL_CR *cr_ptr = (DAPL_CR *) cr_handle;\r
+ DAPL_EP *ep_ptr = (DAPL_EP *) ep_handle;\r
+ DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;\r
+ struct dapl_cm_id *cr_conn = cr_ptr->ib_cm_handle;\r
+ int ret;\r
+ DAT_RETURN dat_status;\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+ " accept(cr %p conn %p, id %p, p_data %p, p_sz=%d)\n",\r
+ cr_ptr, cr_conn, cr_conn->cm_id, p_data, p_size);\r
+\r
+ /* Obtain size of private data structure & contents */\r
+ if (p_size > IB_MAX_REP_PDATA_SIZE) {\r
+ dat_status = DAT_ERROR(DAT_LENGTH_ERROR, DAT_NO_SUBTYPE);\r
+ goto bail;\r
+ }\r
+\r
+ if (ep_ptr->qp_state == DAPL_QP_STATE_UNATTACHED) {\r
+ /* \r
+ * If we are lazy attaching the QP then we may need to\r
+ * hook it up here. Typically, we run this code only for\r
+ * DAT_PSP_PROVIDER_FLAG\r
+ */\r
+ dat_status = dapls_ib_qp_alloc(ia_ptr, ep_ptr, NULL);\r
+ if (dat_status != DAT_SUCCESS) {\r
+ dapl_log(DAPL_DBG_TYPE_ERR,\r
+ " dapl_cma_accept: qp_alloc ERR %d\n",\r
+ dat_status);\r
+ goto bail;\r
+ }\r
+ }\r
+\r
+ /* \r
+ * Validate device and port in EP cm_id against inbound \r
+ * CR cm_id. The pre-allocated EP cm_id is already bound to \r
+ * a local device (cm_id and QP) when created. Move the QP\r
+ * to the new cm_id only if device and port numbers match.\r
+ */\r
+ if (ep_ptr->cm_handle->cm_id->verbs == cr_conn->cm_id->verbs &&\r
+ ep_ptr->cm_handle->cm_id->port_num == cr_conn->cm_id->port_num) {\r
+ /* move QP to new cr_conn, remove QP ref in EP cm_id */\r
+ cr_conn->cm_id->qp = ep_ptr->cm_handle->cm_id->qp;\r
+ ep_ptr->cm_handle->cm_id->qp = NULL;\r
+ dapls_ib_cm_free(ep_ptr->cm_handle, NULL);\r
+ } else {\r
+ dapl_log(DAPL_DBG_TYPE_ERR,\r
+ " dapl_cma_accept: ERR dev(%p!=%p) or"\r
+ " port mismatch(%d!=%d)\n",\r
+ ep_ptr->cm_handle->cm_id->verbs, cr_conn->cm_id->verbs,\r
+ ntohs(ep_ptr->cm_handle->cm_id->port_num),\r
+ ntohs(cr_conn->cm_id->port_num));\r
+ dat_status = DAT_INTERNAL_ERROR;\r
+ goto bail;\r
+ }\r
+\r
+ cr_ptr->param.local_ep_handle = ep_handle;\r
+ cr_conn->params.private_data = p_data;\r
+ cr_conn->params.private_data_len = p_size;\r
+\r
+ ret = rdma_accept(cr_conn->cm_id, &cr_conn->params);\r
+ if (ret) {\r
+ dapl_log(DAPL_DBG_TYPE_ERR, " dapl_cma_accept: ERR %d %s\n",\r
+ ret, strerror(errno));\r
+ dat_status = dapl_convert_errno(ret, "accept");\r
+ goto bail;\r
+ }\r
+\r
+ /* save accepted conn and EP reference, qp_handle unchanged */\r
+ ep_ptr->cm_handle = cr_conn;\r
+ cr_conn->ep = ep_ptr;\r
+\r
+ /* setup local and remote ports for ep query */\r
+ /* Note: port qual in network order */\r
+ ep_ptr->param.remote_port_qual =\r
+ PORT_TO_SID(rdma_get_dst_port(cr_conn->cm_id));\r
+ ep_ptr->param.local_port_qual =\r
+ PORT_TO_SID(rdma_get_src_port(cr_conn->cm_id));\r
+\r
+ return DAT_SUCCESS;\r
+ bail:\r
+ rdma_reject(cr_conn->cm_id, NULL, 0);\r
+ dapls_ib_cm_free(cr_conn, NULL);\r
+ return dat_status;\r
+}\r
+\r
+/*\r
+ * dapls_ib_reject_connection\r
+ *\r
+ * Reject a connection\r
+ *\r
+ * Input:\r
+ * cr_handle\r
+ *\r
+ * Output:\r
+ * none\r
+ *\r
+ * Returns:\r
+ * DAT_SUCCESS\r
+ * DAT_INTERNAL_ERROR\r
+ *\r
+ */\r
+DAT_RETURN\r
+dapls_ib_reject_connection(IN dp_ib_cm_handle_t cm_handle,\r
+ IN int reason,\r
+ IN DAT_COUNT private_data_size,\r
+ IN const DAT_PVOID private_data)\r
+{\r
+ int ret;\r
+ int offset = sizeof(struct dapl_pdata_hdr);\r
+ struct dapl_pdata_hdr pdata_hdr;\r
+\r
+ memset(&pdata_hdr, 0, sizeof pdata_hdr);\r
+ pdata_hdr.version = htonl((DAT_VERSION_MAJOR << 24) |\r
+ (DAT_VERSION_MINOR << 16) |\r
+ (VN_PROVIDER_MAJOR << 8) |\r
+ (VN_PROVIDER_MINOR));\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+ " reject: handle %p reason %x, ver=%x, data %p, sz=%d\n",\r
+ cm_handle, reason, ntohl(pdata_hdr.version),\r
+ private_data, private_data_size);\r
+\r
+ if (cm_handle == IB_INVALID_HANDLE) {\r
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,\r
+ " reject: invalid handle: reason %d\n", reason);\r
+ return DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_CR);\r
+ }\r
+\r
+ if (private_data_size >\r
+ dapls_ib_private_data_size(NULL, DAPL_PDATA_CONN_REJ,\r
+ cm_handle->hca))\r
+ return DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG3);\r
+\r
+ /* setup pdata_hdr and users data, in CR pdata buffer */\r
+ dapl_os_memcpy(cm_handle->p_data, &pdata_hdr, offset);\r
+ if (private_data_size)\r
+ dapl_os_memcpy(cm_handle->p_data + offset,\r
+ private_data, private_data_size);\r
+\r
+ /*\r
+ * Always some private data with reject so active peer can\r
+ * determine real application reject from an abnormal \r
+ * application termination\r
+ */\r
+ ret = rdma_reject(cm_handle->cm_id,\r
+ cm_handle->p_data, offset + private_data_size);\r
+\r
+ dapls_ib_cm_free(cm_handle, NULL);\r
+ return dapl_convert_errno(ret, "reject");\r
+}\r
+\r
+/*\r
+ * dapls_ib_cm_remote_addr\r
+ *\r
+ * Obtain the remote IP address given a connection\r
+ *\r
+ * Input:\r
+ * cr_handle\r
+ *\r
+ * Output:\r
+ * remote_ia_address: where to place the remote address\r
+ *\r
+ * Returns:\r
+ * DAT_SUCCESS\r
+ * DAT_INVALID_HANDLE\r
+ *\r
+ */\r
+DAT_RETURN\r
+dapls_ib_cm_remote_addr(IN DAT_HANDLE dat_handle, OUT DAT_SOCK_ADDR6 * raddr)\r
+{\r
+ DAPL_HEADER *header;\r
+ dp_ib_cm_handle_t ib_cm_handle;\r
+ struct rdma_addr *ipaddr;\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,\r
+ " remote_addr(cm_handle=%p, r_addr=%p)\n",\r
+ dat_handle, raddr);\r
+\r
+ header = (DAPL_HEADER *) dat_handle;\r
+\r
+ if (header->magic == DAPL_MAGIC_EP)\r
+ ib_cm_handle = ((DAPL_EP *) dat_handle)->cm_handle;\r
+ else if (header->magic == DAPL_MAGIC_CR)\r
+ ib_cm_handle = ((DAPL_CR *) dat_handle)->ib_cm_handle;\r
+ else\r
+ return DAT_INVALID_HANDLE;\r
+\r
+ /* get remote IP address from cm_id route */\r
+ ipaddr = &ib_cm_handle->cm_id->route.addr;\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+ " remote_addr: conn %p id %p SRC %x DST %x PORT %d\n",\r
+ ib_cm_handle, ib_cm_handle->cm_id,\r
+ ntohl(((struct sockaddr_in *)\r
+ &ipaddr->src_addr)->sin_addr.s_addr),\r
+ ntohl(((struct sockaddr_in *)\r
+ &ipaddr->dst_addr)->sin_addr.s_addr),\r
+ ntohs(((struct sockaddr_in *)\r
+ &ipaddr->dst_addr)->sin_port));\r
+\r
+ dapl_os_memcpy(raddr, &ipaddr->dst_addr, sizeof(DAT_SOCK_ADDR));\r
+ return DAT_SUCCESS;\r
+}\r
+\r
+/*\r
+ * dapls_ib_private_data_size\r
+ *\r
+ * Return the size of private data given a connection op type\r
+ *\r
+ * Input:\r
+ * prd_ptr private data pointer\r
+ * conn_op connection operation type\r
+ * hca_ptr hca pointer, needed for transport type\r
+ *\r
+ * If prd_ptr is NULL, this is a query for the max size supported by\r
+ * the provider, otherwise it is the actual size of the private data\r
+ * contained in prd_ptr.\r
+ *\r
+ *\r
+ * Output:\r
+ * None\r
+ *\r
+ * Returns:\r
+ * length of private data\r
+ *\r
+ */\r
+int dapls_ib_private_data_size(IN DAPL_PRIVATE * prd_ptr,\r
+ IN DAPL_PDATA_OP conn_op, IN DAPL_HCA * hca_ptr)\r
+{\r
+ return RDMA_MAX_PRIVATE_DATA;\r
+}\r
+\r
+/*\r
+ * Map all CMA event codes to the DAT equivelent.\r
+ */\r
+#define DAPL_IB_EVENT_CNT 13\r
+\r
+static struct ib_cm_event_map {\r
+ const ib_cm_events_t ib_cm_event;\r
+ DAT_EVENT_NUMBER dat_event_num;\r
+} ib_cm_event_map[DAPL_IB_EVENT_CNT] = {\r
+ /* 00 */ {\r
+ IB_CME_CONNECTED, DAT_CONNECTION_EVENT_ESTABLISHED},\r
+ /* 01 */ {\r
+ IB_CME_DISCONNECTED, DAT_CONNECTION_EVENT_DISCONNECTED},\r
+ /* 02 */ {\r
+ IB_CME_DISCONNECTED_ON_LINK_DOWN,\r
+ DAT_CONNECTION_EVENT_DISCONNECTED},\r
+ /* 03 */ {\r
+ IB_CME_CONNECTION_REQUEST_PENDING, DAT_CONNECTION_REQUEST_EVENT},\r
+ /* 04 */ {\r
+ IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,\r
+ DAT_CONNECTION_REQUEST_EVENT},\r
+ /* 05 */ {\r
+ IB_CME_CONNECTION_REQUEST_ACKED, DAT_CONNECTION_REQUEST_EVENT},\r
+ /* 06 */ {\r
+ IB_CME_DESTINATION_REJECT,\r
+ DAT_CONNECTION_EVENT_NON_PEER_REJECTED},\r
+ /* 07 */ {\r
+ IB_CME_DESTINATION_REJECT_PRIVATE_DATA,\r
+ DAT_CONNECTION_EVENT_PEER_REJECTED},\r
+ /* 08 */ {\r
+ IB_CME_DESTINATION_UNREACHABLE, DAT_CONNECTION_EVENT_UNREACHABLE},\r
+ /* 09 */ {\r
+ IB_CME_TOO_MANY_CONNECTION_REQUESTS,\r
+ DAT_CONNECTION_EVENT_NON_PEER_REJECTED},\r
+ /* 10 */ {\r
+ IB_CME_LOCAL_FAILURE, DAT_CONNECTION_EVENT_BROKEN},\r
+ /* 11 */ {\r
+ IB_CME_BROKEN, DAT_CONNECTION_EVENT_BROKEN},\r
+ /* 12 */ {\r
+IB_CME_TIMEOUT, DAT_CONNECTION_EVENT_TIMED_OUT},};\r
+\r
+/*\r
+ * dapls_ib_get_cm_event\r
+ *\r
+ * Return a DAT connection event given a provider CM event.\r
+ *\r
+ * Input:\r
+ * dat_event_num DAT event we need an equivelent CM event for\r
+ *\r
+ * Output:\r
+ * none\r
+ *\r
+ * Returns:\r
+ * ib_cm_event of translated DAPL value\r
+ */\r
+DAT_EVENT_NUMBER\r
+dapls_ib_get_dat_event(IN const ib_cm_events_t ib_cm_event,\r
+ IN DAT_BOOLEAN active)\r
+{\r
+ DAT_EVENT_NUMBER dat_event_num;\r
+ int i;\r
+\r
+ active = active;\r
+\r
+ dat_event_num = 0;\r
+ for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {\r
+ if (ib_cm_event == ib_cm_event_map[i].ib_cm_event) {\r
+ dat_event_num = ib_cm_event_map[i].dat_event_num;\r
+ break;\r
+ }\r
+ }\r
+ dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,\r
+ "dapls_ib_get_dat_event: event(%s) ib=0x%x dat=0x%x\n",\r
+ active ? "active" : "passive", ib_cm_event, dat_event_num);\r
+\r
+ return dat_event_num;\r
+}\r
+\r
+/*\r
+ * dapls_ib_get_dat_event\r
+ *\r
+ * Return a DAT connection event given a provider CM event.\r
+ * \r
+ * Input:\r
+ * ib_cm_event event provided to the dapl callback routine\r
+ * active switch indicating active or passive connection\r
+ *\r
+ * Output:\r
+ * none\r
+ *\r
+ * Returns:\r
+ * DAT_EVENT_NUMBER of translated provider value\r
+ */\r
+ib_cm_events_t dapls_ib_get_cm_event(IN DAT_EVENT_NUMBER dat_event_num)\r
+{\r
+ ib_cm_events_t ib_cm_event;\r
+ int i;\r
+\r
+ ib_cm_event = 0;\r
+ for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {\r
+ if (dat_event_num == ib_cm_event_map[i].dat_event_num) {\r
+ ib_cm_event = ib_cm_event_map[i].ib_cm_event;\r
+ break;\r
+ }\r
+ }\r
+ return ib_cm_event;\r
+}\r
+\r
+void dapli_cma_event_cb(void)\r
+{\r
+ struct rdma_cm_event *event;\r
+ \r
+ /* process one CM event, fairness, non-blocking */\r
+ if (!rdma_get_cm_event(g_cm_events, &event)) {\r
+ struct dapl_cm_id *conn;\r
+\r
+ /* set proper conn from cm_id context */\r
+ if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST)\r
+ conn = (struct dapl_cm_id *)event->listen_id->context;\r
+ else\r
+ conn = (struct dapl_cm_id *)event->id->context;\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+ " cm_event: EVENT=%d ID=%p LID=%p CTX=%p\n",\r
+ event->event, event->id, event->listen_id, conn);\r
+ \r
+ /* cm_free is blocked waiting for ack */\r
+ dapl_os_lock(&conn->lock);\r
+ if (!conn->refs) {\r
+ dapl_os_unlock(&conn->lock);\r
+ rdma_ack_cm_event(event);\r
+ return;\r
+ }\r
+ conn->refs++;\r
+ dapl_os_unlock(&conn->lock);\r
+\r
+ switch (event->event) {\r
+ case RDMA_CM_EVENT_ADDR_RESOLVED:\r
+ dapli_addr_resolve(conn);\r
+ break;\r
+\r
+ case RDMA_CM_EVENT_ROUTE_RESOLVED:\r
+ dapli_route_resolve(conn);\r
+ break;\r
+\r
+ case RDMA_CM_EVENT_ADDR_ERROR:\r
+ dapl_log(DAPL_DBG_TYPE_WARN,\r
+ "dapl_cma_active: CM ADDR ERROR: ->"\r
+ " DST %s retry (%d)..\n",\r
+ inet_ntoa(((struct sockaddr_in *)\r
+ &conn->r_addr)->sin_addr),\r
+ conn->arp_retries);\r
+\r
+ /* retry address resolution */\r
+ if ((--conn->arp_retries) &&\r
+ (event->status == -ETIMEDOUT)) {\r
+ int ret;\r
+ ret = rdma_resolve_addr(conn->cm_id, NULL,\r
+ (struct sockaddr *)\r
+ &conn->r_addr,\r
+ conn->arp_timeout);\r
+ if (!ret)\r
+ break;\r
+ else {\r
+ dapl_dbg_log(DAPL_DBG_TYPE_WARN,\r
+ " ERROR: rdma_resolve_addr = "\r
+ "%d %s\n",\r
+ ret, strerror(errno));\r
+ }\r
+ }\r
+ /* retries exhausted or resolve_addr failed */\r
+ dapl_log(DAPL_DBG_TYPE_ERR,\r
+ "dapl_cma_active: ARP_ERR, retries(%d)"\r
+ " exhausted -> DST %s,%d\n",\r
+ IB_ARP_RETRY_COUNT,\r
+ inet_ntoa(((struct sockaddr_in *)\r
+ &conn->cm_id->route.addr.dst_addr)->\r
+ sin_addr),\r
+ ntohs(((struct sockaddr_in *)\r
+ &conn->cm_id->route.addr.dst_addr)->\r
+ sin_port));\r
+\r
+ dapl_evd_connection_callback(conn,\r
+ IB_CME_DESTINATION_UNREACHABLE,\r
+ NULL, conn->ep);\r
+ break;\r
+\r
+ case RDMA_CM_EVENT_ROUTE_ERROR:\r
+ dapl_log(DAPL_DBG_TYPE_WARN,\r
+ "dapl_cma_active: CM ROUTE ERROR: ->"\r
+ " DST %s retry (%d)..\n",\r
+ inet_ntoa(((struct sockaddr_in *)\r
+ &conn->r_addr)->sin_addr),\r
+ conn->route_retries);\r
+\r
+ /* retry route resolution */\r
+ if ((--conn->route_retries) &&\r
+ (event->status == -ETIMEDOUT))\r
+ dapli_addr_resolve(conn);\r
+ else {\r
+ dapl_log(DAPL_DBG_TYPE_ERR,\r
+ "dapl_cma_active: PATH_RECORD_ERR,"\r
+ " retries(%d) exhausted, DST %s,%d\n",\r
+ IB_ROUTE_RETRY_COUNT,\r
+ inet_ntoa(((struct sockaddr_in *)\r
+ &conn->cm_id->route.addr.\r
+ dst_addr)->sin_addr),\r
+ ntohs(((struct sockaddr_in *)\r
+ &conn->cm_id->route.addr.\r
+ dst_addr)->sin_port));\r
+\r
+ dapl_evd_connection_callback(conn,\r
+ IB_CME_DESTINATION_UNREACHABLE,\r
+ NULL, conn->ep);\r
+ }\r
+ break;\r
+\r
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:\r
+ dapl_evd_connection_callback(conn,\r
+ IB_CME_LOCAL_FAILURE,\r
+ NULL, conn->ep);\r
+ break;\r
+ case RDMA_CM_EVENT_CONNECT_REQUEST:\r
+ case RDMA_CM_EVENT_CONNECT_ERROR:\r
+ case RDMA_CM_EVENT_UNREACHABLE:\r
+ case RDMA_CM_EVENT_REJECTED:\r
+ case RDMA_CM_EVENT_ESTABLISHED:\r
+ case RDMA_CM_EVENT_DISCONNECTED:\r
+ /* passive or active */\r
+ if (conn->sp)\r
+ dapli_cm_passive_cb(conn, event);\r
+ else\r
+ dapli_cm_active_cb(conn, event);\r
+ break;\r
+ case RDMA_CM_EVENT_CONNECT_RESPONSE:\r
+#ifdef RDMA_CM_EVENT_TIMEWAIT_EXIT\r
+ case RDMA_CM_EVENT_TIMEWAIT_EXIT:\r
+#endif\r
+ break;\r
+ default:\r
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,\r
+ " cm_event: UNEXPECTED EVENT=%p ID=%p CTX=%p\n",\r
+ event->event, event->id,\r
+ event->id->context);\r
+ break;\r
+ }\r
+ \r
+ /* ack event, unblocks destroy_cm_id in consumer threads */\r
+ rdma_ack_cm_event(event);\r
+\r
+ dapl_os_lock(&conn->lock);\r
+ conn->refs--;\r
+ dapl_os_unlock(&conn->lock);\r
+ } \r
+}\r
+\r
+/*\r
+ * Local variables:\r
+ * c-indent-level: 4\r
+ * c-basic-offset: 4\r
+ * tab-width: 8\r
+ * End:\r
+ */\r
-/*
- * Copyright (c) 2005-2008 Intel Corporation. All rights reserved.
- *
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- * copy of which is available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/**********************************************************************
- *
- * MODULE: dapl_ib_util.c
- *
- * PURPOSE: OFED provider - init, open, close, utilities, work thread
- *
- * $Id:$
- *
- **********************************************************************/
-
-#ifdef RCSID
-static const char rcsid[] = "$Id: $";
-#endif
-
-#include "openib_osd.h"
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-#include "dapl_ib_util.h"
-#include "dapl_osd.h"
-
-#include <stdlib.h>
-
-struct rdma_event_channel *g_cm_events = NULL;
-ib_thread_state_t g_ib_thread_state = 0;
-DAPL_OS_THREAD g_ib_thread;
-DAPL_OS_LOCK g_hca_lock;
-struct dapl_llist_entry *g_hca_list;
-
-#if defined(_WIN64) || defined(_WIN32)
-#include "..\..\..\..\..\etc\user\comp_channel.cpp"
-#include <rdma\winverbs.h>
-
-static COMP_SET ufds;
-
-static int getipaddr_netdev(char *name, char *addr, int addr_len)
-{
- IWVProvider *prov;
- WV_DEVICE_ADDRESS devaddr;
- struct addrinfo *res, *ai;
- HRESULT hr;
- int index;
-
- if (strncmp(name, "rdma_dev", 8)) {
- return EINVAL;
- }
-
- index = atoi(name + 8);
-
- hr = WvGetObject(&IID_IWVProvider, (LPVOID *) &prov);
- if (FAILED(hr)) {
- return hr;
- }
-
- hr = getaddrinfo("..localmachine", NULL, NULL, &res);
- if (hr) {
- goto release;
- }
-
- for (ai = res; ai; ai = ai->ai_next) {
- hr = prov->lpVtbl->TranslateAddress(prov, ai->ai_addr, &devaddr);
- if (SUCCEEDED(hr) && (ai->ai_addrlen <= addr_len) && (index-- == 0)) {
- memcpy(addr, ai->ai_addr, ai->ai_addrlen);
- goto free;
- }
- }
- hr = ENODEV;
-
-free:
- freeaddrinfo(res);
-release:
- prov->lpVtbl->Release(prov);
- return hr;
-}
-
-static int dapls_os_init(void)
-{
- return CompSetInit(&ufds);
-}
-
-static void dapls_os_release(void)
-{
- CompSetCleanup(&ufds);
-}
-
-static int dapls_config_cm_channel(struct rdma_event_channel *channel)
-{
- channel->channel.Milliseconds = 0;
- return 0;
-}
-
-static int dapls_config_verbs(struct ibv_context *verbs)
-{
- verbs->channel.Milliseconds = 0;
- return 0;
-}
-
-static int dapls_config_comp_channel(struct ibv_comp_channel *channel)
-{
- channel->comp_channel.Milliseconds = 0;
- return 0;
-}
-
-static int dapls_thread_signal(void)
-{
- CompSetCancel(&ufds);
- return 0;
-}
-#else // _WIN64 || WIN32
-int g_ib_pipe[2];
-
-static int dapls_os_init(void)
-{
- /* create pipe for waking up work thread */
- return pipe(g_ib_pipe);
-}
-
-static void dapls_os_release(void)
-{
- /* close pipe? */
-}
-
-/* Get IP address using network device name */
-static int getipaddr_netdev(char *name, char *addr, int addr_len)
-{
- struct ifreq ifr;
- int skfd, ret, len;
-
- /* Fill in the structure */
- snprintf(ifr.ifr_name, IFNAMSIZ, "%s", name);
- ifr.ifr_hwaddr.sa_family = ARPHRD_INFINIBAND;
-
- /* Create a socket fd */
- skfd = socket(PF_INET, SOCK_STREAM, 0);
- ret = ioctl(skfd, SIOCGIFADDR, &ifr);
- if (ret)
- goto bail;
-
- switch (ifr.ifr_addr.sa_family) {
-#ifdef AF_INET6
- case AF_INET6:
- len = sizeof(struct sockaddr_in6);
- break;
-#endif
- case AF_INET:
- default:
- len = sizeof(struct sockaddr);
- break;
- }
-
- if (len <= addr_len)
- memcpy(addr, &ifr.ifr_addr, len);
- else
- ret = EINVAL;
-
- bail:
- close(skfd);
- return ret;
-}
-
-static int dapls_config_fd(int fd)
-{
- int opts;
-
- opts = fcntl(fd, F_GETFL);
- if (opts < 0 || fcntl(fd, F_SETFL, opts | O_NONBLOCK) < 0) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " dapls_config_fd: fcntl on fd %d ERR %d %s\n",
- fd, opts, strerror(errno));
- return errno;
- }
-
- return 0;
-}
-
-static int dapls_config_cm_channel(struct rdma_event_channel *channel)
-{
- return dapls_config_fd(channel->fd);
-}
-
-static int dapls_config_verbs(struct ibv_context *verbs)
-{
- return dapls_config_fd(verbs->async_fd);
-}
-
-static int dapls_config_comp_channel(struct ibv_comp_channel *channel)
-{
- return dapls_config_fd(channel->fd);
-}
-
-static int dapls_thread_signal(void)
-{
- return write(g_ib_pipe[1], "w", sizeof "w");
-}
-#endif
-
-/* Get IP address using network name, address, or device name */
-static int getipaddr(char *name, char *addr, int len)
-{
- struct addrinfo *res;
-
- /* assume netdev for first attempt, then network and address type */
- if (getipaddr_netdev(name, addr, len)) {
- if (getaddrinfo(name, NULL, NULL, &res)) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: getaddr_netdev ERROR:"
- " %s. Is %s configured?\n",
- strerror(errno), name);
- return 1;
- } else {
- if (len >= res->ai_addrlen)
- memcpy(addr, res->ai_addr, res->ai_addrlen);
- else {
- freeaddrinfo(res);
- return 1;
- }
- freeaddrinfo(res);
- }
- }
-
- dapl_dbg_log(
- DAPL_DBG_TYPE_UTIL,
- " getipaddr: family %d port %d addr %d.%d.%d.%d\n",
- ((struct sockaddr_in *)addr)->sin_family,
- ((struct sockaddr_in *)addr)->sin_port,
- ((struct sockaddr_in *)addr)->sin_addr.s_addr >> 0 & 0xff,
- ((struct sockaddr_in *)addr)->sin_addr.s_addr >> 8 & 0xff,
- ((struct sockaddr_in *)addr)->sin_addr.s_addr >> 16 & 0xff,
- ((struct sockaddr_in *)addr)->sin_addr.
- s_addr >> 24 & 0xff);
-
- return 0;
-}
-
-/*
- * dapls_ib_init, dapls_ib_release
- *
- * Initialize Verb related items for device open
- *
- * Input:
- * none
- *
- * Output:
- * none
- *
- * Returns:
- * 0 success, -1 error
- *
- */
-int32_t dapls_ib_init(void)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " dapl_ib_init: \n");
-
- /* initialize hca_list lock */
- dapl_os_lock_init(&g_hca_lock);
-
- /* initialize hca list for CQ events */
- dapl_llist_init_head(&g_hca_list);
-
- if (dapls_os_init())
- return 1;
-
- return 0;
-}
-
-int32_t dapls_ib_release(void)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " dapl_ib_release: \n");
- dapli_ib_thread_destroy();
- if (g_cm_events != NULL)
- rdma_destroy_event_channel(g_cm_events);
- dapls_os_release();
- return 0;
-}
-
-/*
- * dapls_ib_open_hca
- *
- * Open HCA
- *
- * Input:
- * *hca_name pointer to provider device name
- * *ib_hca_handle_p pointer to provide HCA handle
- *
- * Output:
- * none
- *
- * Return:
- * DAT_SUCCESS
- * dapl_convert_errno
- *
- */
-DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, IN DAPL_HCA * hca_ptr)
-{
- struct rdma_cm_id *cm_id = NULL;
- union ibv_gid *gid;
- int ret;
- DAT_RETURN dat_status;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " open_hca: %s - %p\n", hca_name, hca_ptr);
-
- /* Setup the global cm event channel */
- dapl_os_lock(&g_hca_lock);
- if (g_cm_events == NULL) {
- g_cm_events = rdma_create_event_channel();
- if (g_cm_events == NULL) {
- dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- " open_hca: ERR - RDMA channel %s\n",
- strerror(errno));
- dapl_os_unlock(&g_hca_lock);
- return DAT_INTERNAL_ERROR;
- }
- }
- dapl_os_unlock(&g_hca_lock);
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " open_hca: RDMA channel created (%p)\n", g_cm_events);
-
- /* HCA name will be hostname or IP address */
- if (getipaddr((char *)hca_name,
- (char *)&hca_ptr->hca_address,
- sizeof(DAT_SOCK_ADDR6)))
- return DAT_INVALID_ADDRESS;
-
- /* cm_id will bind local device/GID based on IP address */
- if (rdma_create_id(g_cm_events, &cm_id,
- (void *)hca_ptr, RDMA_PS_TCP)) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: rdma_create ERR %s\n", strerror(errno));
- return DAT_INTERNAL_ERROR;
- }
- ret = rdma_bind_addr(cm_id, (struct sockaddr *)&hca_ptr->hca_address);
- if ((ret) || (cm_id->verbs == NULL)) {
- rdma_destroy_id(cm_id);
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: rdma_bind ERR %s."
- " Is %s configured?\n", strerror(errno), hca_name);
- rdma_destroy_id(cm_id);
- return DAT_INVALID_ADDRESS;
- }
-
- /* keep reference to IB device and cm_id */
- hca_ptr->ib_trans.cm_id = cm_id;
- hca_ptr->ib_hca_handle = cm_id->verbs;
- dapls_config_verbs(cm_id->verbs);
- hca_ptr->port_num = cm_id->port_num;
- hca_ptr->ib_trans.ib_dev = cm_id->verbs->device;
- hca_ptr->ib_trans.ib_ctx = cm_id->verbs;
- gid = &cm_id->route.addr.addr.ibaddr.sgid;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " open_hca: ctx=%p port=%d GID subnet %016llx"
- " id %016llx\n", cm_id->verbs, cm_id->port_num,
- (unsigned long long)ntohll(gid->global.subnet_prefix),
- (unsigned long long)ntohll(gid->global.interface_id));
-
- /* support for EVD's with CNO's: one channel via thread */
- hca_ptr->ib_trans.ib_cq =
- ibv_create_comp_channel(hca_ptr->ib_hca_handle);
- if (hca_ptr->ib_trans.ib_cq == NULL) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: ibv_create_comp_channel ERR %s\n",
- strerror(errno));
- rdma_destroy_id(cm_id);
- return DAT_INTERNAL_ERROR;
- }
- if (dapls_config_comp_channel(hca_ptr->ib_trans.ib_cq)) {
- rdma_destroy_id(cm_id);
- return DAT_INTERNAL_ERROR;
- }
-
- /* set inline max with env or default, get local lid and gid 0 */
- if (hca_ptr->ib_hca_handle->device->transport_type
- == IBV_TRANSPORT_IWARP)
- hca_ptr->ib_trans.max_inline_send =
- dapl_os_get_env_val("DAPL_MAX_INLINE",
- INLINE_SEND_IWARP_DEFAULT);
- else
- hca_ptr->ib_trans.max_inline_send =
- dapl_os_get_env_val("DAPL_MAX_INLINE",
- INLINE_SEND_IB_DEFAULT);
-
- /* set CM timer defaults */
- hca_ptr->ib_trans.max_cm_timeout =
- dapl_os_get_env_val("DAPL_MAX_CM_RESPONSE_TIME",
- IB_CM_RESPONSE_TIMEOUT);
- hca_ptr->ib_trans.max_cm_retries =
- dapl_os_get_env_val("DAPL_MAX_CM_RETRIES", IB_CM_RETRIES);
-
- /* set default IB MTU */
- hca_ptr->ib_trans.mtu = dapl_ib_mtu(2048);
-
- dat_status = dapli_ib_thread_init();
- if (dat_status != DAT_SUCCESS)
- return dat_status;
- /*
- * Put new hca_transport on list for async and CQ event processing
- * Wakeup work thread to add to polling list
- */
- dapl_llist_init_entry((DAPL_LLIST_ENTRY *) &hca_ptr->ib_trans.entry);
- dapl_os_lock(&g_hca_lock);
- dapl_llist_add_tail(&g_hca_list,
- (DAPL_LLIST_ENTRY *) &hca_ptr->ib_trans.entry,
- &hca_ptr->ib_trans.entry);
- if (dapls_thread_signal() == -1)
- dapl_log(DAPL_DBG_TYPE_UTIL,
- " open_hca: thread wakeup error = %s\n",
- strerror(errno));
- dapl_os_unlock(&g_hca_lock);
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " open_hca: %s, %s %d.%d.%d.%d INLINE_MAX=%d\n", hca_name,
- ((struct sockaddr_in *)
- &hca_ptr->hca_address)->sin_family == AF_INET ?
- "AF_INET" : "AF_INET6",
- ((struct sockaddr_in *)
- &hca_ptr->hca_address)->sin_addr.s_addr >> 0 & 0xff,
- ((struct sockaddr_in *)
- &hca_ptr->hca_address)->sin_addr.s_addr >> 8 & 0xff,
- ((struct sockaddr_in *)
- &hca_ptr->hca_address)->sin_addr.s_addr >> 16 & 0xff,
- ((struct sockaddr_in *)
- &hca_ptr->hca_address)->sin_addr.s_addr >> 24 & 0xff,
- hca_ptr->ib_trans.max_inline_send);
-
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_close_hca
- *
- * Open HCA
- *
- * Input:
- * DAPL_HCA provide CA handle
- *
- * Output:
- * none
- *
- * Return:
- * DAT_SUCCESS
- * dapl_convert_errno
- *
- */
-DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA * hca_ptr)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " close_hca: %p->%p\n",
- hca_ptr, hca_ptr->ib_hca_handle);
-
- if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) {
- if (rdma_destroy_id(hca_ptr->ib_trans.cm_id))
- return (dapl_convert_errno(errno, "ib_close_device"));
- hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
- }
-
- dapl_os_lock(&g_hca_lock);
- if (g_ib_thread_state != IB_THREAD_RUN) {
- dapl_os_unlock(&g_hca_lock);
- goto bail;
- }
- dapl_os_unlock(&g_hca_lock);
-
- /*
- * Remove hca from async event processing list
- * Wakeup work thread to remove from polling list
- */
- hca_ptr->ib_trans.destroy = 1;
- if (dapls_thread_signal() == -1)
- dapl_log(DAPL_DBG_TYPE_UTIL,
- " destroy: thread wakeup error = %s\n",
- strerror(errno));
-
- /* wait for thread to remove HCA references */
- while (hca_ptr->ib_trans.destroy != 2) {
- if (dapls_thread_signal() == -1)
- dapl_log(DAPL_DBG_TYPE_UTIL,
- " destroy: thread wakeup error = %s\n",
- strerror(errno));
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " ib_thread_destroy: wait on hca %p destroy\n");
- dapl_os_sleep_usec(1000);
- }
-bail:
- return (DAT_SUCCESS);
-}
-
-
-DAT_RETURN dapli_ib_thread_init(void)
-{
- DAT_RETURN dat_status;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " ib_thread_init(%d)\n", dapl_os_getpid());
-
- dapl_os_lock(&g_hca_lock);
- if (g_ib_thread_state != IB_THREAD_INIT) {
- dapl_os_unlock(&g_hca_lock);
- return DAT_SUCCESS;
- }
-
- /* uCMA events non-blocking */
- if (dapls_config_cm_channel(g_cm_events)) {
- dapl_os_unlock(&g_hca_lock);
- return (dapl_convert_errno(errno, "create_thread ERR: cm_fd"));
- }
-
- g_ib_thread_state = IB_THREAD_CREATE;
- dapl_os_unlock(&g_hca_lock);
-
- /* create thread to process inbound connect request */
- dat_status = dapl_os_thread_create(dapli_thread, NULL, &g_ib_thread);
- if (dat_status != DAT_SUCCESS)
- return (dapl_convert_errno(errno,
- "create_thread ERR:"
- " check resource limits"));
-
- /* wait for thread to start */
- dapl_os_lock(&g_hca_lock);
- while (g_ib_thread_state != IB_THREAD_RUN) {
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " ib_thread_init: waiting for ib_thread\n");
- dapl_os_unlock(&g_hca_lock);
- dapl_os_sleep_usec(1000);
- dapl_os_lock(&g_hca_lock);
- }
- dapl_os_unlock(&g_hca_lock);
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " ib_thread_init(%d) exit\n", dapl_os_getpid());
-
- return DAT_SUCCESS;
-}
-
-void dapli_ib_thread_destroy(void)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " ib_thread_destroy(%d)\n", dapl_os_getpid());
- /*
- * wait for async thread to terminate.
- * pthread_join would be the correct method
- * but some applications have some issues
- */
-
- /* destroy ib_thread, wait for termination, if not already */
- dapl_os_lock(&g_hca_lock);
- if (g_ib_thread_state != IB_THREAD_RUN)
- goto bail;
-
- g_ib_thread_state = IB_THREAD_CANCEL;
- while ((g_ib_thread_state != IB_THREAD_EXIT)) {
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " ib_thread_destroy: waiting for ib_thread\n");
- if (dapls_thread_signal() == -1)
- dapl_log(DAPL_DBG_TYPE_UTIL,
- " destroy: thread wakeup error = %s\n",
- strerror(errno));
- dapl_os_unlock(&g_hca_lock);
- dapl_os_sleep_usec(2000);
- dapl_os_lock(&g_hca_lock);
- }
-bail:
- dapl_os_unlock(&g_hca_lock);
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " ib_thread_destroy(%d) exit\n", dapl_os_getpid());
-}
-
-#if defined(_WIN64) || defined(_WIN32)
-/* work thread for uAT, uCM, CQ, and async events */
-void dapli_thread(void *arg)
-{
- struct _ib_hca_transport *hca;
- struct _ib_hca_transport *uhca[8];
- COMP_CHANNEL *channel;
- int ret, idx, cnt;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread(%d,0x%x): ENTER: \n",
- dapl_os_getpid(), g_ib_thread);
-
- dapl_os_lock(&g_hca_lock);
- for (g_ib_thread_state = IB_THREAD_RUN;
- g_ib_thread_state == IB_THREAD_RUN;
- dapl_os_lock(&g_hca_lock)) {
-
- CompSetZero(&ufds);
- CompSetAdd(&g_cm_events->channel, &ufds);
-
- idx = 0;
- hca = dapl_llist_is_empty(&g_hca_list) ? NULL :
- dapl_llist_peek_head(&g_hca_list);
-
- while (hca) {
- CompSetAdd(&hca->ib_ctx->channel, &ufds);
- CompSetAdd(&hca->ib_cq->comp_channel, &ufds);
- uhca[idx++] = hca;
- hca = dapl_llist_next_entry(&g_hca_list,
- (DAPL_LLIST_ENTRY *)
- &hca->entry);
- }
- cnt = idx;
-
- dapl_os_unlock(&g_hca_lock);
- ret = CompSetPoll(&ufds, INFINITE);
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " ib_thread(%d) poll_event 0x%x\n",
- dapl_os_getpid(), ret);
-
- dapli_cma_event_cb();
-
- /* check and process ASYNC events, per device */
- for (idx = 0; idx < cnt; idx++) {
- if (uhca[idx]->destroy == 1) {
- dapl_os_lock(&g_hca_lock);
- dapl_llist_remove_entry(&g_hca_list,
- (DAPL_LLIST_ENTRY *)
- &uhca[idx]->entry);
- dapl_os_unlock(&g_hca_lock);
- uhca[idx]->destroy = 2;
- } else {
- dapli_cq_event_cb(uhca[idx]);
- dapli_async_event_cb(uhca[idx]);
- }
- }
- }
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread(%d) EXIT\n",
- dapl_os_getpid());
- g_ib_thread_state = IB_THREAD_EXIT;
- dapl_os_unlock(&g_hca_lock);
-}
-#else // _WIN64 || WIN32
-
-/* work thread for uAT, uCM, CQ, and async events */
-void dapli_thread(void *arg)
-{
- struct pollfd ufds[__FD_SETSIZE];
- struct _ib_hca_transport *uhca[__FD_SETSIZE] = { NULL };
- struct _ib_hca_transport *hca;
- int ret, idx, fds;
- char rbuf[2];
-
- dapl_dbg_log(DAPL_DBG_TYPE_THREAD,
- " ib_thread(%d,0x%x): ENTER: pipe %d ucma %d\n",
- dapl_os_getpid(), g_ib_thread, g_ib_pipe[0],
- g_cm_events->fd);
-
- /* Poll across pipe, CM, AT never changes */
- dapl_os_lock(&g_hca_lock);
- g_ib_thread_state = IB_THREAD_RUN;
-
- ufds[0].fd = g_ib_pipe[0]; /* pipe */
- ufds[0].events = POLLIN;
- ufds[1].fd = g_cm_events->fd; /* uCMA */
- ufds[1].events = POLLIN;
-
- while (g_ib_thread_state == IB_THREAD_RUN) {
-
- /* build ufds after pipe and uCMA events */
- ufds[0].revents = 0;
- ufds[1].revents = 0;
- idx = 1;
-
- /* Walk HCA list and setup async and CQ events */
- if (!dapl_llist_is_empty(&g_hca_list))
- hca = dapl_llist_peek_head(&g_hca_list);
- else
- hca = NULL;
-
- while (hca) {
-
- /* uASYNC events */
- ufds[++idx].fd = hca->ib_ctx->async_fd;
- ufds[idx].events = POLLIN;
- ufds[idx].revents = 0;
- uhca[idx] = hca;
-
- /* CQ events are non-direct with CNO's */
- ufds[++idx].fd = hca->ib_cq->fd;
- ufds[idx].events = POLLIN;
- ufds[idx].revents = 0;
- uhca[idx] = hca;
-
- dapl_dbg_log(DAPL_DBG_TYPE_THREAD,
- " ib_thread(%d) poll_fd: hca[%d]=%p,"
- " async=%d pipe=%d cm=%d \n",
- dapl_os_getpid(), hca, ufds[idx - 1].fd,
- ufds[0].fd, ufds[1].fd);
-
- hca = dapl_llist_next_entry(&g_hca_list,
- (DAPL_LLIST_ENTRY *)
- &hca->entry);
- }
-
- /* unlock, and setup poll */
- fds = idx + 1;
- dapl_os_unlock(&g_hca_lock);
- ret = poll(ufds, fds, -1);
- if (ret <= 0) {
- dapl_dbg_log(DAPL_DBG_TYPE_THREAD,
- " ib_thread(%d): ERR %s poll\n",
- dapl_os_getpid(), strerror(errno));
- dapl_os_lock(&g_hca_lock);
- continue;
- }
-
- dapl_dbg_log(DAPL_DBG_TYPE_THREAD,
- " ib_thread(%d) poll_event: "
- " async=0x%x pipe=0x%x cm=0x%x \n",
- dapl_os_getpid(), ufds[idx].revents,
- ufds[0].revents, ufds[1].revents);
-
- /* uCMA events */
- if (ufds[1].revents == POLLIN)
- dapli_cma_event_cb();
-
- /* check and process CQ and ASYNC events, per device */
- for (idx = 2; idx < fds; idx++) {
- if (ufds[idx].revents == POLLIN) {
- dapli_cq_event_cb(uhca[idx]);
- dapli_async_event_cb(uhca[idx]);
- }
- }
-
- /* check and process user events, PIPE */
- if (ufds[0].revents == POLLIN) {
- if (read(g_ib_pipe[0], rbuf, 2) == -1)
- dapl_log(DAPL_DBG_TYPE_THREAD,
- " cr_thread: pipe rd err= %s\n",
- strerror(errno));
-
- /* cleanup any device on list marked for destroy */
- for (idx = 3; idx < fds; idx++) {
- if (uhca[idx] && uhca[idx]->destroy == 1) {
- dapl_os_lock(&g_hca_lock);
- dapl_llist_remove_entry(
- &g_hca_list,
- (DAPL_LLIST_ENTRY*)
- &uhca[idx]->entry);
- dapl_os_unlock(&g_hca_lock);
- uhca[idx]->destroy = 2;
- }
- }
- }
- dapl_os_lock(&g_hca_lock);
- }
-
- dapl_dbg_log(DAPL_DBG_TYPE_THREAD, " ib_thread(%d) EXIT\n",
- dapl_os_getpid());
- g_ib_thread_state = IB_THREAD_EXIT;
- dapl_os_unlock(&g_hca_lock);
-}
-#endif
+/*\r
+ * Copyright (c) 2005-2008 Intel Corporation. All rights reserved.\r
+ *\r
+ * This Software is licensed under one of the following licenses:\r
+ *\r
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is\r
+ * available from the Open Source Initiative, see\r
+ * http://www.opensource.org/licenses/cpl.php.\r
+ *\r
+ * 2) under the terms of the "The BSD License" a copy of which is\r
+ * available from the Open Source Initiative, see\r
+ * http://www.opensource.org/licenses/bsd-license.php.\r
+ *\r
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a\r
+ * copy of which is available from the Open Source Initiative, see\r
+ * http://www.opensource.org/licenses/gpl-license.php.\r
+ *\r
+ * Licensee has the right to choose one of the above licenses.\r
+ *\r
+ * Redistributions of source code must retain the above copyright\r
+ * notice and one of the license notices.\r
+ *\r
+ * Redistributions in binary form must reproduce both the above copyright\r
+ * notice, one of the license notices in the documentation\r
+ * and/or other materials provided with the distribution.\r
+ */\r
+\r
+/**********************************************************************\r
+ * \r
+ * MODULE: dapl_ib_util.c\r
+ *\r
+ * PURPOSE: OFED provider - init, open, close, utilities, work thread\r
+ *\r
+ * $Id:$\r
+ *\r
+ **********************************************************************/\r
+\r
+#ifdef RCSID\r
+static const char rcsid[] = "$Id: $";\r
+#endif\r
+\r
+#include "openib_osd.h"\r
+#include "dapl.h"\r
+#include "dapl_adapter_util.h"\r
+#include "dapl_ib_util.h"\r
+#include "dapl_osd.h"\r
+\r
+#include <stdlib.h>\r
+\r
+struct rdma_event_channel *g_cm_events = NULL;\r
+ib_thread_state_t g_ib_thread_state = 0;\r
+DAPL_OS_THREAD g_ib_thread;\r
+DAPL_OS_LOCK g_hca_lock;\r
+struct dapl_llist_entry *g_hca_list;\r
+\r
+#if defined(_WIN64) || defined(_WIN32)\r
+#include "..\..\..\..\..\etc\user\comp_channel.cpp"\r
+#include <rdma\winverbs.h>\r
+\r
+static COMP_SET ufds;\r
+\r
+static int getipaddr_netdev(char *name, char *addr, int addr_len)\r
+{\r
+ IWVProvider *prov;\r
+ WV_DEVICE_ADDRESS devaddr;\r
+ struct addrinfo *res, *ai;\r
+ HRESULT hr;\r
+ int index;\r
+\r
+ if (strncmp(name, "rdma_dev", 8)) {\r
+ return EINVAL;\r
+ }\r
+\r
+ index = atoi(name + 8);\r
+\r
+ hr = WvGetObject(&IID_IWVProvider, (LPVOID *) &prov);\r
+ if (FAILED(hr)) {\r
+ return hr;\r
+ }\r
+\r
+ hr = getaddrinfo("..localmachine", NULL, NULL, &res);\r
+ if (hr) {\r
+ goto release;\r
+ }\r
+\r
+ for (ai = res; ai; ai = ai->ai_next) {\r
+ hr = prov->lpVtbl->TranslateAddress(prov, ai->ai_addr, &devaddr);\r
+ if (SUCCEEDED(hr) && (ai->ai_addrlen <= addr_len) && (index-- == 0)) {\r
+ memcpy(addr, ai->ai_addr, ai->ai_addrlen);\r
+ goto free;\r
+ }\r
+ }\r
+ hr = ENODEV;\r
+\r
+free:\r
+ freeaddrinfo(res);\r
+release:\r
+ prov->lpVtbl->Release(prov);\r
+ return hr;\r
+}\r
+\r
+static int dapls_os_init(void)\r
+{\r
+ return CompSetInit(&ufds);\r
+}\r
+\r
+static void dapls_os_release(void)\r
+{\r
+ CompSetCleanup(&ufds);\r
+}\r
+\r
+static int dapls_config_cm_channel(struct rdma_event_channel *channel)\r
+{\r
+ channel->channel.Milliseconds = 0;\r
+ return 0;\r
+}\r
+\r
+static int dapls_config_verbs(struct ibv_context *verbs)\r
+{\r
+ verbs->channel.Milliseconds = 0;\r
+ return 0;\r
+}\r
+\r
+static int dapls_config_comp_channel(struct ibv_comp_channel *channel)\r
+{\r
+ channel->comp_channel.Milliseconds = 0;\r
+ return 0;\r
+}\r
+\r
+static int dapls_thread_signal(void)\r
+{\r
+ CompSetCancel(&ufds);\r
+ return 0;\r
+}\r
+#else // _WIN64 || WIN32\r
+int g_ib_pipe[2];\r
+\r
+static int dapls_os_init(void)\r
+{\r
+ /* create pipe for waking up work thread */\r
+ return pipe(g_ib_pipe);\r
+}\r
+\r
+static void dapls_os_release(void)\r
+{\r
+ /* close pipe? */\r
+}\r
+\r
+/* Get IP address using network device name */\r
+static int getipaddr_netdev(char *name, char *addr, int addr_len)\r
+{\r
+ struct ifreq ifr;\r
+ int skfd, ret, len;\r
+\r
+ /* Fill in the structure */\r
+ snprintf(ifr.ifr_name, IFNAMSIZ, "%s", name);\r
+ ifr.ifr_hwaddr.sa_family = ARPHRD_INFINIBAND;\r
+\r
+ /* Create a socket fd */\r
+ skfd = socket(PF_INET, SOCK_STREAM, 0);\r
+ ret = ioctl(skfd, SIOCGIFADDR, &ifr);\r
+ if (ret)\r
+ goto bail;\r
+\r
+ switch (ifr.ifr_addr.sa_family) {\r
+#ifdef AF_INET6\r
+ case AF_INET6:\r
+ len = sizeof(struct sockaddr_in6);\r
+ break;\r
+#endif\r
+ case AF_INET:\r
+ default:\r
+ len = sizeof(struct sockaddr);\r
+ break;\r
+ }\r
+\r
+ if (len <= addr_len)\r
+ memcpy(addr, &ifr.ifr_addr, len);\r
+ else\r
+ ret = EINVAL;\r
+\r
+ bail:\r
+ close(skfd);\r
+ return ret;\r
+}\r
+\r
+static int dapls_config_fd(int fd)\r
+{\r
+ int opts;\r
+\r
+ opts = fcntl(fd, F_GETFL);\r
+ if (opts < 0 || fcntl(fd, F_SETFL, opts | O_NONBLOCK) < 0) {\r
+ dapl_log(DAPL_DBG_TYPE_ERR,\r
+ " dapls_config_fd: fcntl on fd %d ERR %d %s\n",\r
+ fd, opts, strerror(errno));\r
+ return errno;\r
+ }\r
+\r
+ return 0;\r
+}\r
+\r
+static int dapls_config_cm_channel(struct rdma_event_channel *channel)\r
+{\r
+ return dapls_config_fd(channel->fd);\r
+}\r
+\r
+static int dapls_config_verbs(struct ibv_context *verbs)\r
+{\r
+ return dapls_config_fd(verbs->async_fd);\r
+}\r
+\r
+static int dapls_config_comp_channel(struct ibv_comp_channel *channel)\r
+{\r
+ return dapls_config_fd(channel->fd);\r
+}\r
+\r
+static int dapls_thread_signal(void)\r
+{\r
+ return write(g_ib_pipe[1], "w", sizeof "w");\r
+}\r
+#endif\r
+\r
+/* Get IP address using network name, address, or device name */\r
+static int getipaddr(char *name, char *addr, int len)\r
+{\r
+ struct addrinfo *res;\r
+\r
+ /* assume netdev for first attempt, then network and address type */\r
+ if (getipaddr_netdev(name, addr, len)) {\r
+ if (getaddrinfo(name, NULL, NULL, &res)) {\r
+ dapl_log(DAPL_DBG_TYPE_ERR,\r
+ " open_hca: getaddr_netdev ERROR:"\r
+ " %s. Is %s configured?\n",\r
+ strerror(errno), name);\r
+ return 1;\r
+ } else {\r
+ if (len >= res->ai_addrlen)\r
+ memcpy(addr, res->ai_addr, res->ai_addrlen);\r
+ else {\r
+ freeaddrinfo(res);\r
+ return 1;\r
+ }\r
+ freeaddrinfo(res);\r
+ }\r
+ }\r
+\r
+ dapl_dbg_log(\r
+ DAPL_DBG_TYPE_UTIL,\r
+ " getipaddr: family %d port %d addr %d.%d.%d.%d\n",\r
+ ((struct sockaddr_in *)addr)->sin_family,\r
+ ((struct sockaddr_in *)addr)->sin_port,\r
+ ((struct sockaddr_in *)addr)->sin_addr.s_addr >> 0 & 0xff,\r
+ ((struct sockaddr_in *)addr)->sin_addr.s_addr >> 8 & 0xff,\r
+ ((struct sockaddr_in *)addr)->sin_addr.s_addr >> 16 & 0xff,\r
+ ((struct sockaddr_in *)addr)->sin_addr.\r
+ s_addr >> 24 & 0xff);\r
+\r
+ return 0;\r
+}\r
+\r
+/*\r
+ * dapls_ib_init, dapls_ib_release\r
+ *\r
+ * Initialize Verb related items for device open\r
+ *\r
+ * Input:\r
+ * none\r
+ *\r
+ * Output:\r
+ * none\r
+ *\r
+ * Returns:\r
+ * 0 success, -1 error\r
+ *\r
+ */\r
+int32_t dapls_ib_init(void)\r
+{\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " dapl_ib_init: \n");\r
+\r
+ /* initialize hca_list lock */\r
+ dapl_os_lock_init(&g_hca_lock);\r
+\r
+ /* initialize hca list for CQ events */\r
+ dapl_llist_init_head(&g_hca_list);\r
+\r
+ if (dapls_os_init())\r
+ return 1;\r
+\r
+ return 0;\r
+}\r
+\r
+int32_t dapls_ib_release(void)\r
+{\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " dapl_ib_release: \n");\r
+ dapli_ib_thread_destroy();\r
+ if (g_cm_events != NULL)\r
+ rdma_destroy_event_channel(g_cm_events);\r
+ dapls_os_release();\r
+ return 0;\r
+}\r
+\r
+/*\r
+ * dapls_ib_open_hca\r
+ *\r
+ * Open HCA\r
+ *\r
+ * Input:\r
+ * *hca_name pointer to provider device name\r
+ * *ib_hca_handle_p pointer to provide HCA handle\r
+ *\r
+ * Output:\r
+ * none\r
+ *\r
+ * Return:\r
+ * DAT_SUCCESS\r
+ * dapl_convert_errno\r
+ *\r
+ */\r
+DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, IN DAPL_HCA * hca_ptr)\r
+{\r
+ struct rdma_cm_id *cm_id = NULL;\r
+ union ibv_gid *gid;\r
+ int ret;\r
+ DAT_RETURN dat_status;\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+ " open_hca: %s - %p\n", hca_name, hca_ptr);\r
+\r
+ /* Setup the global cm event channel */\r
+ dapl_os_lock(&g_hca_lock);\r
+ if (g_cm_events == NULL) {\r
+ g_cm_events = rdma_create_event_channel();\r
+ if (g_cm_events == NULL) {\r
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,\r
+ " open_hca: ERR - RDMA channel %s\n",\r
+ strerror(errno));\r
+ dapl_os_unlock(&g_hca_lock);\r
+ return DAT_INTERNAL_ERROR;\r
+ }\r
+ }\r
+ dapl_os_unlock(&g_hca_lock);\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+ " open_hca: RDMA channel created (%p)\n", g_cm_events);\r
+\r
+ /* HCA name will be hostname or IP address */\r
+ if (getipaddr((char *)hca_name,\r
+ (char *)&hca_ptr->hca_address, \r
+ sizeof(DAT_SOCK_ADDR6)))\r
+ return DAT_INVALID_ADDRESS;\r
+\r
+ /* cm_id will bind local device/GID based on IP address */\r
+ if (rdma_create_id(g_cm_events, &cm_id, \r
+ (void *)hca_ptr, RDMA_PS_TCP)) {\r
+ dapl_log(DAPL_DBG_TYPE_ERR,\r
+ " open_hca: rdma_create ERR %s\n", strerror(errno));\r
+ return DAT_INTERNAL_ERROR;\r
+ }\r
+ ret = rdma_bind_addr(cm_id, (struct sockaddr *)&hca_ptr->hca_address);\r
+ if ((ret) || (cm_id->verbs == NULL)) {\r
+ rdma_destroy_id(cm_id);\r
+ dapl_log(DAPL_DBG_TYPE_ERR,\r
+ " open_hca: rdma_bind ERR %s."\r
+ " Is %s configured?\n", strerror(errno), hca_name);\r
+ rdma_destroy_id(cm_id);\r
+ return DAT_INVALID_ADDRESS;\r
+ }\r
+\r
+ /* keep reference to IB device and cm_id */\r
+ hca_ptr->ib_trans.cm_id = cm_id;\r
+ hca_ptr->ib_hca_handle = cm_id->verbs;\r
+ dapls_config_verbs(cm_id->verbs);\r
+ hca_ptr->port_num = cm_id->port_num;\r
+ hca_ptr->ib_trans.ib_dev = cm_id->verbs->device;\r
+ hca_ptr->ib_trans.ib_ctx = cm_id->verbs;\r
+ gid = &cm_id->route.addr.addr.ibaddr.sgid;\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+ " open_hca: ctx=%p port=%d GID subnet %016llx"\r
+ " id %016llx\n", cm_id->verbs, cm_id->port_num,\r
+ (unsigned long long)ntohll(gid->global.subnet_prefix),\r
+ (unsigned long long)ntohll(gid->global.interface_id));\r
+\r
+ /* support for EVD's with CNO's: one channel via thread */\r
+ hca_ptr->ib_trans.ib_cq =\r
+ ibv_create_comp_channel(hca_ptr->ib_hca_handle);\r
+ if (hca_ptr->ib_trans.ib_cq == NULL) {\r
+ dapl_log(DAPL_DBG_TYPE_ERR,\r
+ " open_hca: ibv_create_comp_channel ERR %s\n",\r
+ strerror(errno));\r
+ rdma_destroy_id(cm_id);\r
+ return DAT_INTERNAL_ERROR;\r
+ }\r
+ if (dapls_config_comp_channel(hca_ptr->ib_trans.ib_cq)) {\r
+ rdma_destroy_id(cm_id);\r
+ return DAT_INTERNAL_ERROR;\r
+ }\r
+\r
+ /* set inline max with env or default, get local lid and gid 0 */\r
+ if (hca_ptr->ib_hca_handle->device->transport_type\r
+ == IBV_TRANSPORT_IWARP)\r
+ hca_ptr->ib_trans.max_inline_send =\r
+ dapl_os_get_env_val("DAPL_MAX_INLINE",\r
+ INLINE_SEND_IWARP_DEFAULT);\r
+ else\r
+ hca_ptr->ib_trans.max_inline_send =\r
+ dapl_os_get_env_val("DAPL_MAX_INLINE",\r
+ INLINE_SEND_IB_DEFAULT);\r
+\r
+ /* set CM timer defaults */\r
+ hca_ptr->ib_trans.max_cm_timeout =\r
+ dapl_os_get_env_val("DAPL_MAX_CM_RESPONSE_TIME",\r
+ IB_CM_RESPONSE_TIMEOUT);\r
+ hca_ptr->ib_trans.max_cm_retries =\r
+ dapl_os_get_env_val("DAPL_MAX_CM_RETRIES", IB_CM_RETRIES);\r
+ \r
+ /* set default IB MTU */\r
+ hca_ptr->ib_trans.mtu = dapl_ib_mtu(2048);\r
+\r
+ dat_status = dapli_ib_thread_init();\r
+ if (dat_status != DAT_SUCCESS)\r
+ return dat_status;\r
+ /* \r
+ * Put new hca_transport on list for async and CQ event processing \r
+ * Wakeup work thread to add to polling list\r
+ */\r
+ dapl_llist_init_entry((DAPL_LLIST_ENTRY *) &hca_ptr->ib_trans.entry);\r
+ dapl_os_lock(&g_hca_lock);\r
+ dapl_llist_add_tail(&g_hca_list,\r
+ (DAPL_LLIST_ENTRY *) &hca_ptr->ib_trans.entry,\r
+ &hca_ptr->ib_trans.entry);\r
+ if (dapls_thread_signal() == -1)\r
+ dapl_log(DAPL_DBG_TYPE_UTIL,\r
+ " open_hca: thread wakeup error = %s\n",\r
+ strerror(errno));\r
+ dapl_os_unlock(&g_hca_lock);\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+ " open_hca: %s, %s %d.%d.%d.%d INLINE_MAX=%d\n", hca_name,\r
+ ((struct sockaddr_in *)\r
+ &hca_ptr->hca_address)->sin_family == AF_INET ?\r
+ "AF_INET" : "AF_INET6", \r
+ ((struct sockaddr_in *)\r
+ &hca_ptr->hca_address)->sin_addr.s_addr >> 0 & 0xff, \r
+ ((struct sockaddr_in *)\r
+ &hca_ptr->hca_address)->sin_addr.s_addr >> 8 & 0xff, \r
+ ((struct sockaddr_in *)\r
+ &hca_ptr->hca_address)->sin_addr.s_addr >> 16 & 0xff, \r
+ ((struct sockaddr_in *)\r
+ &hca_ptr->hca_address)->sin_addr.s_addr >> 24 & 0xff, \r
+ hca_ptr->ib_trans.max_inline_send);\r
+\r
+ return DAT_SUCCESS;\r
+}\r
+\r
+/*\r
+ * dapls_ib_close_hca\r
+ *\r
+ * Open HCA\r
+ *\r
+ * Input:\r
+ * DAPL_HCA provide CA handle\r
+ *\r
+ * Output:\r
+ * none\r
+ *\r
+ * Return:\r
+ * DAT_SUCCESS\r
+ * dapl_convert_errno \r
+ *\r
+ */\r
+DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA * hca_ptr)\r
+{\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " close_hca: %p->%p\n",\r
+ hca_ptr, hca_ptr->ib_hca_handle);\r
+\r
+ dapl_os_lock(&g_hca_lock);\r
+ if (g_ib_thread_state != IB_THREAD_RUN) {\r
+ dapl_os_unlock(&g_hca_lock);\r
+ goto bail;\r
+ }\r
+ dapl_os_unlock(&g_hca_lock);\r
+\r
+ /* \r
+ * Remove hca from async event processing list\r
+ * Wakeup work thread to remove from polling list\r
+ */\r
+ hca_ptr->ib_trans.destroy = 1;\r
+ if (dapls_thread_signal() == -1)\r
+ dapl_log(DAPL_DBG_TYPE_UTIL,\r
+ " destroy: thread wakeup error = %s\n",\r
+ strerror(errno));\r
+\r
+ /* wait for thread to remove HCA references */\r
+ while (hca_ptr->ib_trans.destroy != 2) {\r
+ if (dapls_thread_signal() == -1)\r
+ dapl_log(DAPL_DBG_TYPE_UTIL,\r
+ " destroy: thread wakeup error = %s\n",\r
+ strerror(errno));\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+ " ib_thread_destroy: wait on hca %p destroy\n");\r
+ dapl_os_sleep_usec(1000);\r
+ }\r
+bail:\r
+ if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) {\r
+ if (rdma_destroy_id(hca_ptr->ib_trans.cm_id))\r
+ return (dapl_convert_errno(errno, "ib_close_device"));\r
+ hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;\r
+ }\r
+\r
+ return (DAT_SUCCESS);\r
+}\r
+\r
+\r
+DAT_RETURN dapli_ib_thread_init(void)\r
+{\r
+ DAT_RETURN dat_status;\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+ " ib_thread_init(%d)\n", dapl_os_getpid());\r
+\r
+ dapl_os_lock(&g_hca_lock);\r
+ if (g_ib_thread_state != IB_THREAD_INIT) {\r
+ dapl_os_unlock(&g_hca_lock);\r
+ return DAT_SUCCESS;\r
+ }\r
+\r
+ /* uCMA events non-blocking */\r
+ if (dapls_config_cm_channel(g_cm_events)) {\r
+ dapl_os_unlock(&g_hca_lock);\r
+ return (dapl_convert_errno(errno, "create_thread ERR: cm_fd"));\r
+ }\r
+\r
+ g_ib_thread_state = IB_THREAD_CREATE;\r
+ dapl_os_unlock(&g_hca_lock);\r
+\r
+ /* create thread to process inbound connect request */\r
+ dat_status = dapl_os_thread_create(dapli_thread, NULL, &g_ib_thread);\r
+ if (dat_status != DAT_SUCCESS)\r
+ return (dapl_convert_errno(errno,\r
+ "create_thread ERR:"\r
+ " check resource limits"));\r
+\r
+ /* wait for thread to start */\r
+ dapl_os_lock(&g_hca_lock);\r
+ while (g_ib_thread_state != IB_THREAD_RUN) {\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+ " ib_thread_init: waiting for ib_thread\n");\r
+ dapl_os_unlock(&g_hca_lock);\r
+ dapl_os_sleep_usec(1000);\r
+ dapl_os_lock(&g_hca_lock);\r
+ }\r
+ dapl_os_unlock(&g_hca_lock);\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+ " ib_thread_init(%d) exit\n", dapl_os_getpid());\r
+\r
+ return DAT_SUCCESS;\r
+}\r
+\r
+void dapli_ib_thread_destroy(void)\r
+{\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+ " ib_thread_destroy(%d)\n", dapl_os_getpid());\r
+ /* \r
+ * wait for async thread to terminate. \r
+ * pthread_join would be the correct method\r
+ * but some applications have some issues\r
+ */\r
+\r
+ /* destroy ib_thread, wait for termination, if not already */\r
+ dapl_os_lock(&g_hca_lock);\r
+ if (g_ib_thread_state != IB_THREAD_RUN)\r
+ goto bail;\r
+\r
+ g_ib_thread_state = IB_THREAD_CANCEL;\r
+ while ((g_ib_thread_state != IB_THREAD_EXIT)) {\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+ " ib_thread_destroy: waiting for ib_thread\n");\r
+ if (dapls_thread_signal() == -1)\r
+ dapl_log(DAPL_DBG_TYPE_UTIL,\r
+ " destroy: thread wakeup error = %s\n",\r
+ strerror(errno));\r
+ dapl_os_unlock(&g_hca_lock);\r
+ dapl_os_sleep_usec(2000);\r
+ dapl_os_lock(&g_hca_lock);\r
+ }\r
+bail:\r
+ dapl_os_unlock(&g_hca_lock);\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+ " ib_thread_destroy(%d) exit\n", dapl_os_getpid());\r
+}\r
+\r
+#if defined(_WIN64) || defined(_WIN32)\r
+/* work thread for uAT, uCM, CQ, and async events */\r
+void dapli_thread(void *arg)\r
+{\r
+ struct _ib_hca_transport *hca;\r
+ struct _ib_hca_transport *uhca[8];\r
+ COMP_CHANNEL *channel;\r
+ int ret, idx, cnt;\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread(%d,0x%x): ENTER: \n",\r
+ dapl_os_getpid(), g_ib_thread);\r
+\r
+ dapl_os_lock(&g_hca_lock);\r
+ for (g_ib_thread_state = IB_THREAD_RUN;\r
+ g_ib_thread_state == IB_THREAD_RUN; \r
+ dapl_os_lock(&g_hca_lock)) {\r
+\r
+ CompSetZero(&ufds);\r
+ CompSetAdd(&g_cm_events->channel, &ufds);\r
+\r
+ idx = 0;\r
+ hca = dapl_llist_is_empty(&g_hca_list) ? NULL :\r
+ dapl_llist_peek_head(&g_hca_list);\r
+\r
+ while (hca) {\r
+ CompSetAdd(&hca->ib_ctx->channel, &ufds);\r
+ CompSetAdd(&hca->ib_cq->comp_channel, &ufds);\r
+ uhca[idx++] = hca;\r
+ hca = dapl_llist_next_entry(&g_hca_list,\r
+ (DAPL_LLIST_ENTRY *)\r
+ &hca->entry);\r
+ }\r
+ cnt = idx;\r
+\r
+ dapl_os_unlock(&g_hca_lock);\r
+ ret = CompSetPoll(&ufds, INFINITE);\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+ " ib_thread(%d) poll_event 0x%x\n",\r
+ dapl_os_getpid(), ret);\r
+\r
+ dapli_cma_event_cb();\r
+\r
+ /* check and process ASYNC events, per device */\r
+ for (idx = 0; idx < cnt; idx++) {\r
+ if (uhca[idx]->destroy == 1) {\r
+ dapl_os_lock(&g_hca_lock);\r
+ dapl_llist_remove_entry(&g_hca_list,\r
+ (DAPL_LLIST_ENTRY *)\r
+ &uhca[idx]->entry);\r
+ dapl_os_unlock(&g_hca_lock);\r
+ uhca[idx]->destroy = 2;\r
+ } else {\r
+ dapli_cq_event_cb(uhca[idx]);\r
+ dapli_async_event_cb(uhca[idx]);\r
+ }\r
+ }\r
+ }\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread(%d) EXIT\n",\r
+ dapl_os_getpid());\r
+ g_ib_thread_state = IB_THREAD_EXIT;\r
+ dapl_os_unlock(&g_hca_lock);\r
+}\r
+#else // _WIN64 || WIN32\r
+\r
+/* work thread for uAT, uCM, CQ, and async events */\r
+void dapli_thread(void *arg)\r
+{\r
+ struct pollfd ufds[__FD_SETSIZE];\r
+ struct _ib_hca_transport *uhca[__FD_SETSIZE] = { NULL };\r
+ struct _ib_hca_transport *hca;\r
+ int ret, idx, fds;\r
+ char rbuf[2];\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_THREAD,\r
+ " ib_thread(%d,0x%x): ENTER: pipe %d ucma %d\n",\r
+ dapl_os_getpid(), g_ib_thread, g_ib_pipe[0],\r
+ g_cm_events->fd);\r
+\r
+ /* Poll across pipe, CM, AT never changes */\r
+ dapl_os_lock(&g_hca_lock);\r
+ g_ib_thread_state = IB_THREAD_RUN;\r
+\r
+ ufds[0].fd = g_ib_pipe[0]; /* pipe */\r
+ ufds[0].events = POLLIN;\r
+ ufds[1].fd = g_cm_events->fd; /* uCMA */\r
+ ufds[1].events = POLLIN;\r
+\r
+ while (g_ib_thread_state == IB_THREAD_RUN) {\r
+\r
+ /* build ufds after pipe and uCMA events */\r
+ ufds[0].revents = 0;\r
+ ufds[1].revents = 0;\r
+ idx = 1;\r
+\r
+ /* Walk HCA list and setup async and CQ events */\r
+ if (!dapl_llist_is_empty(&g_hca_list))\r
+ hca = dapl_llist_peek_head(&g_hca_list);\r
+ else\r
+ hca = NULL;\r
+\r
+ while (hca) {\r
+\r
+ /* uASYNC events */\r
+ ufds[++idx].fd = hca->ib_ctx->async_fd;\r
+ ufds[idx].events = POLLIN;\r
+ ufds[idx].revents = 0;\r
+ uhca[idx] = hca;\r
+\r
+ /* CQ events are non-direct with CNO's */\r
+ ufds[++idx].fd = hca->ib_cq->fd;\r
+ ufds[idx].events = POLLIN;\r
+ ufds[idx].revents = 0;\r
+ uhca[idx] = hca;\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_THREAD,\r
+ " ib_thread(%d) poll_fd: hca[%d]=%p,"\r
+ " async=%d pipe=%d cm=%d \n",\r
+ dapl_os_getpid(), hca, ufds[idx - 1].fd,\r
+ ufds[0].fd, ufds[1].fd);\r
+\r
+ hca = dapl_llist_next_entry(&g_hca_list,\r
+ (DAPL_LLIST_ENTRY *)\r
+ &hca->entry);\r
+ }\r
+\r
+ /* unlock, and setup poll */\r
+ fds = idx + 1;\r
+ dapl_os_unlock(&g_hca_lock);\r
+ ret = poll(ufds, fds, -1);\r
+ if (ret <= 0) {\r
+ dapl_dbg_log(DAPL_DBG_TYPE_THREAD,\r
+ " ib_thread(%d): ERR %s poll\n",\r
+ dapl_os_getpid(), strerror(errno));\r
+ dapl_os_lock(&g_hca_lock);\r
+ continue;\r
+ }\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_THREAD,\r
+ " ib_thread(%d) poll_event: "\r
+ " async=0x%x pipe=0x%x cm=0x%x \n",\r
+ dapl_os_getpid(), ufds[idx].revents,\r
+ ufds[0].revents, ufds[1].revents);\r
+\r
+ /* uCMA events */\r
+ if (ufds[1].revents == POLLIN)\r
+ dapli_cma_event_cb();\r
+\r
+ /* check and process CQ and ASYNC events, per device */\r
+ for (idx = 2; idx < fds; idx++) {\r
+ if (ufds[idx].revents == POLLIN) {\r
+ dapli_cq_event_cb(uhca[idx]);\r
+ dapli_async_event_cb(uhca[idx]);\r
+ }\r
+ }\r
+\r
+ /* check and process user events, PIPE */\r
+ if (ufds[0].revents == POLLIN) {\r
+ if (read(g_ib_pipe[0], rbuf, 2) == -1)\r
+ dapl_log(DAPL_DBG_TYPE_THREAD,\r
+ " cr_thread: pipe rd err= %s\n",\r
+ strerror(errno));\r
+\r
+ /* cleanup any device on list marked for destroy */\r
+ for (idx = 3; idx < fds; idx++) {\r
+ if (uhca[idx] && uhca[idx]->destroy == 1) {\r
+ dapl_os_lock(&g_hca_lock);\r
+ dapl_llist_remove_entry(\r
+ &g_hca_list,\r
+ (DAPL_LLIST_ENTRY*)\r
+ &uhca[idx]->entry);\r
+ dapl_os_unlock(&g_hca_lock);\r
+ uhca[idx]->destroy = 2;\r
+ }\r
+ }\r
+ }\r
+ dapl_os_lock(&g_hca_lock);\r
+ }\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_THREAD, " ib_thread(%d) EXIT\n",\r
+ dapl_os_getpid());\r
+ g_ib_thread_state = IB_THREAD_EXIT;\r
+ dapl_os_unlock(&g_hca_lock);\r
+}\r
+#endif\r
-/*
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- * copy of which is available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-
-/*
- * dapl_ib_qp_alloc
- *
- * Alloc a QP
- *
- * Input:
- * *ep_ptr pointer to EP INFO
- * ib_hca_handle provider HCA handle
- * ib_pd_handle provider protection domain handle
- * cq_recv provider recv CQ handle
- * cq_send provider send CQ handle
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INSUFFICIENT_RESOURCES
- * DAT_INTERNAL_ERROR
- *
- */
-DAT_RETURN
-dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr,
- IN DAPL_EP * ep_ptr, IN DAPL_EP * ep_ctx_ptr)
-{
- DAT_EP_ATTR *attr;
- DAPL_EVD *rcv_evd, *req_evd;
- ib_cq_handle_t rcv_cq, req_cq;
- ib_pd_handle_t ib_pd_handle;
- struct ibv_qp_init_attr qp_create;
-#ifdef _OPENIB_CMA_
- dp_ib_cm_handle_t conn;
-#endif
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " qp_alloc: ia_ptr %p ep_ptr %p ep_ctx_ptr %p\n",
- ia_ptr, ep_ptr, ep_ctx_ptr);
-
- attr = &ep_ptr->param.ep_attr;
- ib_pd_handle = ((DAPL_PZ *) ep_ptr->param.pz_handle)->pd_handle;
- rcv_evd = (DAPL_EVD *) ep_ptr->param.recv_evd_handle;
- req_evd = (DAPL_EVD *) ep_ptr->param.request_evd_handle;
-
- /*
- * DAT allows usage model of EP's with no EVD's but IB does not.
- * Create a CQ with zero entries under the covers to support and
- * catch any invalid posting.
- */
- if (rcv_evd != DAT_HANDLE_NULL)
- rcv_cq = rcv_evd->ib_cq_handle;
- else if (!ia_ptr->hca_ptr->ib_trans.ib_cq_empty)
- rcv_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;
- else {
- struct ibv_comp_channel *channel;
-
- channel = ibv_create_comp_channel(ia_ptr->hca_ptr->ib_hca_handle);
- if (!channel)
- return (dapl_convert_errno(ENOMEM, "create_cq"));
-
- /* Call IB verbs to create CQ */
- rcv_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,
- 0, NULL, channel, 0);
-
- if (rcv_cq == IB_INVALID_HANDLE) {
- ibv_destroy_comp_channel(channel);
- return (dapl_convert_errno(ENOMEM, "create_cq"));
- }
-
- ia_ptr->hca_ptr->ib_trans.ib_cq_empty = rcv_cq;
- }
- if (req_evd != DAT_HANDLE_NULL)
- req_cq = req_evd->ib_cq_handle;
- else
- req_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;
-
- /*
- * IMPLEMENTATION NOTE:
- * uDAPL allows consumers to post buffers on the EP after creation
- * and before a connect request (outbound and inbound). This forces
- * a binding to a device during the hca_open call and requires the
- * consumer to predetermine which device to listen on or connect from.
- * This restriction eliminates any option of listening or connecting
- * over multiple devices. uDAPL should add API's to resolve addresses
- * and bind to the device at the approriate time (before connect
- * and after CR arrives). Discovery should happen at connection time
- * based on addressing and not on static configuration during open.
- */
-
-#ifdef _OPENIB_CMA_
- /* Allocate CM and initialize lock */
- if ((conn = dapls_ib_cm_create(ep_ptr)) == NULL)
- return (dapl_convert_errno(ENOMEM, "create_cq"));
-
- /* open identifies the local device; per DAT specification */
- if (rdma_bind_addr(conn->cm_id,
- (struct sockaddr *)&ia_ptr->hca_ptr->hca_address))
- return (dapl_convert_errno(EAFNOSUPPORT, "create_cq"));
-#endif
- /* Setup attributes and create qp */
- dapl_os_memzero((void *)&qp_create, sizeof(qp_create));
- qp_create.send_cq = req_cq;
- qp_create.cap.max_send_wr = attr->max_request_dtos;
- qp_create.cap.max_send_sge = attr->max_request_iov;
- qp_create.cap.max_inline_data =
- ia_ptr->hca_ptr->ib_trans.max_inline_send;
- qp_create.qp_type = IBV_QPT_RC;
- qp_create.qp_context = (void *)ep_ptr;
-
-#ifdef DAT_EXTENSIONS
- if (attr->service_type == DAT_IB_SERVICE_TYPE_UD) {
-#ifdef _OPENIB_CMA_
- return (DAT_NOT_IMPLEMENTED);
-#endif
- qp_create.qp_type = IBV_QPT_UD;
- if (attr->max_message_size >
- (128 << ia_ptr->hca_ptr->ib_trans.mtu)) {
- return (DAT_INVALID_PARAMETER | DAT_INVALID_ARG6);
- }
- }
-#endif
-
- /* ibv assumes rcv_cq is never NULL, set to req_cq */
- if (rcv_cq == NULL) {
- qp_create.recv_cq = req_cq;
- qp_create.cap.max_recv_wr = 0;
- qp_create.cap.max_recv_sge = 0;
- } else {
- qp_create.recv_cq = rcv_cq;
- qp_create.cap.max_recv_wr = attr->max_recv_dtos;
- qp_create.cap.max_recv_sge = attr->max_recv_iov;
- }
-
-#ifdef _OPENIB_CMA_
- if (rdma_create_qp(conn->cm_id, ib_pd_handle, &qp_create)) {
- dapls_ib_cm_free(conn, ep_ptr);
- return (dapl_convert_errno(errno, "create_qp"));
- }
- ep_ptr->qp_handle = conn->cm_id->qp;
- ep_ptr->cm_handle = conn;
- ep_ptr->qp_state = IBV_QPS_INIT;
-
- /* setup up ep->param to reference the bound local address and port */
- ep_ptr->param.local_ia_address_ptr =
- &conn->cm_id->route.addr.src_addr;
- ep_ptr->param.local_port_qual = rdma_get_src_port(conn->cm_id);
-#else
- ep_ptr->qp_handle = ibv_create_qp(ib_pd_handle, &qp_create);
- if (!ep_ptr->qp_handle)
- return (dapl_convert_errno(ENOMEM, "create_qp"));
-
- /* Setup QP attributes for INIT state on the way out */
- if (dapls_modify_qp_state(ep_ptr->qp_handle,
- IBV_QPS_INIT, 0, 0, 0) != DAT_SUCCESS) {
- ibv_destroy_qp(ep_ptr->qp_handle);
- ep_ptr->qp_handle = IB_INVALID_HANDLE;
- return DAT_INTERNAL_ERROR;
- }
-#endif
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " qp_alloc: qpn %p sq %d,%d rq %d,%d\n",
- ep_ptr->qp_handle->qp_num,
- qp_create.cap.max_send_wr, qp_create.cap.max_send_sge,
- qp_create.cap.max_recv_wr, qp_create.cap.max_recv_sge);
-
- return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_qp_free
- *
- * Free a QP
- *
- * Input:
- * ia_handle IA handle
- * *ep_ptr pointer to EP INFO
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * dapl_convert_errno
- *
- */
-DAT_RETURN dapls_ib_qp_free(IN DAPL_IA * ia_ptr, IN DAPL_EP * ep_ptr)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_EP, " qp_free: ep_ptr %p qp %p\n",
- ep_ptr, ep_ptr->qp_handle);
-
- if (ep_ptr->cm_handle != NULL) {
- dapls_ib_cm_free(ep_ptr->cm_handle, ep_ptr);
- }
-
- if (ep_ptr->qp_handle != NULL) {
- /* force error state to flush queue, then destroy */
- dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, 0,0,0);
-
- if (ibv_destroy_qp(ep_ptr->qp_handle))
- return (dapl_convert_errno(errno, "destroy_qp"));
-
- ep_ptr->qp_handle = NULL;
- }
-
-#ifdef DAT_EXTENSIONS
- /* UD endpoints can have many CR associations and will not
- * set ep->cm_handle. Call provider with cm_ptr null to incidate
- * UD type multi CR's for this EP. It will parse internal list
- * and cleanup all associations.
- */
- if (ep_ptr->param.ep_attr.service_type == DAT_IB_SERVICE_TYPE_UD)
- dapls_ib_cm_free(NULL, ep_ptr);
-#endif
-
- return DAT_SUCCESS;
-}
-
-/*
- * dapl_ib_qp_modify
- *
- * Set the QP to the parameters specified in an EP_PARAM
- *
- * The EP_PARAM structure that is provided has been
- * sanitized such that only non-zero values are valid.
- *
- * Input:
- * ib_hca_handle HCA handle
- * qp_handle QP handle
- * ep_attr Sanitized EP Params
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_SUCCESS
- * DAT_INSUFFICIENT_RESOURCES
- * DAT_INVALID_PARAMETER
- *
- */
-DAT_RETURN
-dapls_ib_qp_modify(IN DAPL_IA * ia_ptr,
- IN DAPL_EP * ep_ptr, IN DAT_EP_ATTR * attr)
-{
- struct ibv_qp_attr qp_attr;
-
- if (ep_ptr->qp_handle == IB_INVALID_HANDLE)
- return DAT_INVALID_PARAMETER;
-
- /*
- * EP state, qp_handle state should be an indication
- * of current state but the only way to be sure is with
- * a user mode ibv_query_qp call which is NOT available
- */
-
- /* move to error state if necessary */
- if ((ep_ptr->qp_state == IB_QP_STATE_ERROR) &&
- (ep_ptr->qp_handle->state != IBV_QPS_ERR)) {
- return (dapls_modify_qp_state(ep_ptr->qp_handle,
- IBV_QPS_ERR, 0, 0, 0));
- }
-
- /*
- * Check if we have the right qp_state to modify attributes
- */
- if ((ep_ptr->qp_handle->state != IBV_QPS_RTR) &&
- (ep_ptr->qp_handle->state != IBV_QPS_RTS))
- return DAT_INVALID_STATE;
-
- /* Adjust to current EP attributes */
- dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
- qp_attr.cap.max_send_wr = attr->max_request_dtos;
- qp_attr.cap.max_recv_wr = attr->max_recv_dtos;
- qp_attr.cap.max_send_sge = attr->max_request_iov;
- qp_attr.cap.max_recv_sge = attr->max_recv_iov;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- "modify_qp: qp %p sq %d,%d, rq %d,%d\n",
- ep_ptr->qp_handle,
- qp_attr.cap.max_send_wr, qp_attr.cap.max_send_sge,
- qp_attr.cap.max_recv_wr, qp_attr.cap.max_recv_sge);
-
- if (ibv_modify_qp(ep_ptr->qp_handle, &qp_attr, IBV_QP_CAP)) {
- dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- "modify_qp: modify ep %p qp %p failed\n",
- ep_ptr, ep_ptr->qp_handle);
- return (dapl_convert_errno(errno, "modify_qp_state"));
- }
-
- return DAT_SUCCESS;
-}
-
-/*
- * dapls_ib_reinit_ep
- *
- * Move the QP to INIT state again.
- *
- * Input:
- * ep_ptr DAPL_EP
- *
- * Output:
- * none
- *
- * Returns:
- * void
- *
- */
-#if defined(_WIN32) || defined(_WIN64) || defined(_OPENIB_CMA_)
-void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr)
-{
- /* work around bug in low level driver - 3/24/09 */
- /* RTS -> RESET -> INIT -> ERROR QP transition crashes system */
- if (ep_ptr->qp_handle != IB_INVALID_HANDLE) {
- dapls_ib_qp_free(ep_ptr->header.owner_ia, ep_ptr);
- dapls_ib_qp_alloc(ep_ptr->header.owner_ia, ep_ptr, ep_ptr);
- }
-}
-#else // _WIN32 || _WIN64
-void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr)
-{
- if (ep_ptr->qp_handle != IB_INVALID_HANDLE &&
- ep_ptr->qp_handle->qp_type != IBV_QPT_UD) {
- /* move to RESET state and then to INIT */
- dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_RESET,0,0,0);
- dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_INIT,0,0,0);
- }
-}
-#endif // _WIN32 || _WIN64
-
-/*
- * Generic QP modify for init, reset, error, RTS, RTR
- * For UD, create_ah on RTR, qkey on INIT
- * CM msg provides QP attributes, info in network order
- */
-DAT_RETURN
-dapls_modify_qp_state(IN ib_qp_handle_t qp_handle,
- IN ib_qp_state_t qp_state,
- IN uint32_t qpn,
- IN uint16_t lid,
- IN ib_gid_handle_t gid)
-{
- struct ibv_qp_attr qp_attr;
- enum ibv_qp_attr_mask mask = IBV_QP_STATE;
- DAPL_EP *ep_ptr = (DAPL_EP *) qp_handle->qp_context;
- DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;
- int ret;
-
- dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
- qp_attr.qp_state = qp_state;
-
- switch (qp_state) {
- case IBV_QPS_RTR:
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " QPS_RTR: type %d qpn 0x%x lid 0x%x"
- " port %d ep %p qp_state %d \n",
- qp_handle->qp_type,
- ntohl(qpn), ntohs(lid),
- ia_ptr->hca_ptr->port_num,
- ep_ptr, ep_ptr->qp_state);
-
- mask |= IBV_QP_AV |
- IBV_QP_PATH_MTU |
- IBV_QP_DEST_QPN |
- IBV_QP_RQ_PSN |
- IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER;
-
- qp_attr.dest_qp_num = ntohl(qpn);
- qp_attr.rq_psn = 1;
- qp_attr.path_mtu = ia_ptr->hca_ptr->ib_trans.mtu;
- qp_attr.max_dest_rd_atomic =
- ep_ptr->param.ep_attr.max_rdma_read_out;
- qp_attr.min_rnr_timer =
- ia_ptr->hca_ptr->ib_trans.rnr_timer;
-
- /* address handle. RC and UD */
- qp_attr.ah_attr.dlid = ntohs(lid);
- if (ia_ptr->hca_ptr->ib_trans.global) {
- qp_attr.ah_attr.is_global = 1;
- qp_attr.ah_attr.grh.dgid.global.subnet_prefix =
- ntohll(gid->global.subnet_prefix);
- qp_attr.ah_attr.grh.dgid.global.interface_id =
- ntohll(gid->global.interface_id);
- qp_attr.ah_attr.grh.hop_limit =
- ia_ptr->hca_ptr->ib_trans.hop_limit;
- qp_attr.ah_attr.grh.traffic_class =
- ia_ptr->hca_ptr->ib_trans.tclass;
- }
- qp_attr.ah_attr.sl = 0;
- qp_attr.ah_attr.src_path_bits = 0;
- qp_attr.ah_attr.port_num = ia_ptr->hca_ptr->port_num;
-
- /* UD: already in RTR, RTS state */
- if (qp_handle->qp_type == IBV_QPT_UD) {
- mask = IBV_QP_STATE;
- if (ep_ptr->qp_state == IBV_QPS_RTR ||
- ep_ptr->qp_state == IBV_QPS_RTS)
- return DAT_SUCCESS;
- }
- break;
- case IBV_QPS_RTS:
- if (qp_handle->qp_type == IBV_QPT_RC) {
- mask |= IBV_QP_SQ_PSN |
- IBV_QP_TIMEOUT |
- IBV_QP_RETRY_CNT |
- IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC;
- qp_attr.timeout =
- ia_ptr->hca_ptr->ib_trans.ack_timer;
- qp_attr.retry_cnt =
- ia_ptr->hca_ptr->ib_trans.ack_retry;
- qp_attr.rnr_retry =
- ia_ptr->hca_ptr->ib_trans.rnr_retry;
- qp_attr.max_rd_atomic =
- ep_ptr->param.ep_attr.max_rdma_read_out;
- }
- /* RC and UD */
- qp_attr.qp_state = IBV_QPS_RTS;
- qp_attr.sq_psn = 1;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " QPS_RTS: psn %x rd_atomic %d ack %d "
- " retry %d rnr_retry %d ep %p qp_state %d\n",
- qp_attr.sq_psn, qp_attr.max_rd_atomic,
- qp_attr.timeout, qp_attr.retry_cnt,
- qp_attr.rnr_retry, ep_ptr,
- ep_ptr->qp_state);
-
- if (qp_handle->qp_type == IBV_QPT_UD) {
- /* already RTS, multi remote AH's on QP */
- if (ep_ptr->qp_state == IBV_QPS_RTS)
- return DAT_SUCCESS;
- else
- mask = IBV_QP_STATE | IBV_QP_SQ_PSN;
- }
- break;
- case IBV_QPS_INIT:
- mask |= IBV_QP_PKEY_INDEX | IBV_QP_PORT;
- if (qp_handle->qp_type == IBV_QPT_RC) {
- mask |= IBV_QP_ACCESS_FLAGS;
- qp_attr.qp_access_flags =
- IBV_ACCESS_LOCAL_WRITE |
- IBV_ACCESS_REMOTE_WRITE |
- IBV_ACCESS_REMOTE_READ |
- IBV_ACCESS_REMOTE_ATOMIC |
- IBV_ACCESS_MW_BIND;
- }
-
- if (qp_handle->qp_type == IBV_QPT_UD) {
- /* already INIT, multi remote AH's on QP */
- if (ep_ptr->qp_state == IBV_QPS_INIT)
- return DAT_SUCCESS;
- mask |= IBV_QP_QKEY;
- qp_attr.qkey = DAT_UD_QKEY;
- }
-
- qp_attr.pkey_index = 0;
- qp_attr.port_num = ia_ptr->hca_ptr->port_num;
-
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " QPS_INIT: pi %x port %x acc %x qkey 0x%x\n",
- qp_attr.pkey_index, qp_attr.port_num,
- qp_attr.qp_access_flags, qp_attr.qkey);
- break;
- default:
- break;
- }
-
- ret = ibv_modify_qp(qp_handle, &qp_attr, mask);
- if (ret == 0) {
- ep_ptr->qp_state = qp_state;
- return DAT_SUCCESS;
- } else {
- return (dapl_convert_errno(errno, "modify_qp_state"));
- }
-}
-
-/* Modify UD type QP from init, rtr, rts, info network order */
-DAT_RETURN
-dapls_modify_qp_ud(IN DAPL_HCA *hca, IN ib_qp_handle_t qp)
-{
- struct ibv_qp_attr qp_attr;
-
- /* modify QP, setup and prepost buffers */
- dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
- qp_attr.qp_state = IBV_QPS_INIT;
- qp_attr.pkey_index = 0;
- qp_attr.port_num = hca->port_num;
- qp_attr.qkey = DAT_UD_QKEY;
- if (ibv_modify_qp(qp, &qp_attr,
- IBV_QP_STATE |
- IBV_QP_PKEY_INDEX |
- IBV_QP_PORT |
- IBV_QP_QKEY)) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " modify_ud_qp INIT: ERR %s\n", strerror(errno));
- return (dapl_convert_errno(errno, "modify_qp"));
- }
- dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
- qp_attr.qp_state = IBV_QPS_RTR;
- if (ibv_modify_qp(qp, &qp_attr,IBV_QP_STATE)) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " modify_ud_qp RTR: ERR %s\n", strerror(errno));
- return (dapl_convert_errno(errno, "modify_qp"));
- }
- dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
- qp_attr.qp_state = IBV_QPS_RTS;
- qp_attr.sq_psn = 1;
- if (ibv_modify_qp(qp, &qp_attr,
- IBV_QP_STATE | IBV_QP_SQ_PSN)) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " modify_ud_qp RTS: ERR %s\n", strerror(errno));
- return (dapl_convert_errno(errno, "modify_qp"));
- }
- return DAT_SUCCESS;
-}
-
-/* Create address handle for remote QP, info in network order */
-ib_ah_handle_t
-dapls_create_ah(IN DAPL_HCA *hca,
- IN ib_pd_handle_t pd,
- IN ib_qp_handle_t qp,
- IN uint16_t lid,
- IN ib_gid_handle_t gid)
-{
- struct ibv_qp_attr qp_attr;
- ib_ah_handle_t ah;
-
- if (qp->qp_type != IBV_QPT_UD)
- return NULL;
-
- dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
- qp_attr.qp_state = IBV_QP_STATE;
-
- /* address handle. RC and UD */
- qp_attr.ah_attr.dlid = ntohs(lid);
- if (gid != NULL) {
- dapl_log(DAPL_DBG_TYPE_CM, "dapl_create_ah: with GID\n");
- qp_attr.ah_attr.is_global = 1;
- qp_attr.ah_attr.grh.dgid.global.subnet_prefix =
- ntohll(gid->global.subnet_prefix);
- qp_attr.ah_attr.grh.dgid.global.interface_id =
- ntohll(gid->global.interface_id);
- qp_attr.ah_attr.grh.hop_limit = hca->ib_trans.hop_limit;
- qp_attr.ah_attr.grh.traffic_class = hca->ib_trans.tclass;
- }
- qp_attr.ah_attr.sl = 0;
- qp_attr.ah_attr.src_path_bits = 0;
- qp_attr.ah_attr.port_num = hca->port_num;
-
- dapl_log(DAPL_DBG_TYPE_CM,
- " dapls_create_ah: port %x lid %x pd %p ctx %p handle 0x%x\n",
- hca->port_num,qp_attr.ah_attr.dlid, pd, pd->context, pd->handle);
-
- /* UD: create AH for remote side */
- ah = ibv_create_ah(pd, &qp_attr.ah_attr);
- if (!ah) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " create_ah: ERR %s\n", strerror(errno));
- return NULL;
- }
-
- dapl_log(DAPL_DBG_TYPE_CM,
- " dapls_create_ah: AH %p for lid %x\n",
- ah, qp_attr.ah_attr.dlid);
-
- return ah;
-}
-
-/*
- * Local variables:
- * c-indent-level: 4
- * c-basic-offset: 4
- * tab-width: 8
- * End:
- */
+/*\r
+ * This Software is licensed under one of the following licenses:\r
+ *\r
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is\r
+ * available from the Open Source Initiative, see\r
+ * http://www.opensource.org/licenses/cpl.php.\r
+ *\r
+ * 2) under the terms of the "The BSD License" a copy of which is\r
+ * available from the Open Source Initiative, see\r
+ * http://www.opensource.org/licenses/bsd-license.php.\r
+ *\r
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a\r
+ * copy of which is available from the Open Source Initiative, see\r
+ * http://www.opensource.org/licenses/gpl-license.php.\r
+ *\r
+ * Licensee has the right to choose one of the above licenses.\r
+ *\r
+ * Redistributions of source code must retain the above copyright\r
+ * notice and one of the license notices.\r
+ *\r
+ * Redistributions in binary form must reproduce both the above copyright\r
+ * notice, one of the license notices in the documentation\r
+ * and/or other materials provided with the distribution.\r
+ */\r
+#include "dapl.h"\r
+#include "dapl_adapter_util.h"\r
+\r
+/*\r
+ * dapl_ib_qp_alloc\r
+ *\r
+ * Alloc a QP\r
+ *\r
+ * Input:\r
+ * *ep_ptr pointer to EP INFO\r
+ * ib_hca_handle provider HCA handle\r
+ * ib_pd_handle provider protection domain handle\r
+ * cq_recv provider recv CQ handle\r
+ * cq_send provider send CQ handle\r
+ *\r
+ * Output:\r
+ * none\r
+ *\r
+ * Returns:\r
+ * DAT_SUCCESS\r
+ * DAT_INSUFFICIENT_RESOURCES\r
+ * DAT_INTERNAL_ERROR\r
+ *\r
+ */\r
+DAT_RETURN\r
+dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr,\r
+ IN DAPL_EP * ep_ptr, IN DAPL_EP * ep_ctx_ptr)\r
+{\r
+ DAT_EP_ATTR *attr;\r
+ DAPL_EVD *rcv_evd, *req_evd;\r
+ ib_cq_handle_t rcv_cq, req_cq;\r
+ ib_pd_handle_t ib_pd_handle;\r
+ struct ibv_qp_init_attr qp_create;\r
+#ifdef _OPENIB_CMA_\r
+ dp_ib_cm_handle_t conn;\r
+#endif\r
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,\r
+ " qp_alloc: ia_ptr %p ep_ptr %p ep_ctx_ptr %p\n",\r
+ ia_ptr, ep_ptr, ep_ctx_ptr);\r
+\r
+ attr = &ep_ptr->param.ep_attr;\r
+ ib_pd_handle = ((DAPL_PZ *) ep_ptr->param.pz_handle)->pd_handle;\r
+ rcv_evd = (DAPL_EVD *) ep_ptr->param.recv_evd_handle;\r
+ req_evd = (DAPL_EVD *) ep_ptr->param.request_evd_handle;\r
+\r
+ /* \r
+ * DAT allows usage model of EP's with no EVD's but IB does not. \r
+ * Create a CQ with zero entries under the covers to support and \r
+ * catch any invalid posting. \r
+ */\r
+ if (rcv_evd != DAT_HANDLE_NULL)\r
+ rcv_cq = rcv_evd->ib_cq_handle;\r
+ else if (!ia_ptr->hca_ptr->ib_trans.ib_cq_empty)\r
+ rcv_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;\r
+ else {\r
+ struct ibv_comp_channel *channel;\r
+\r
+ channel = ibv_create_comp_channel(ia_ptr->hca_ptr->ib_hca_handle);\r
+ if (!channel)\r
+ return (dapl_convert_errno(ENOMEM, "create_cq"));\r
+ \r
+ /* Call IB verbs to create CQ */\r
+ rcv_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle,\r
+ 0, NULL, channel, 0);\r
+\r
+ if (rcv_cq == IB_INVALID_HANDLE) {\r
+ ibv_destroy_comp_channel(channel);\r
+ return (dapl_convert_errno(ENOMEM, "create_cq"));\r
+ }\r
+\r
+ ia_ptr->hca_ptr->ib_trans.ib_cq_empty = rcv_cq;\r
+ }\r
+ if (req_evd != DAT_HANDLE_NULL)\r
+ req_cq = req_evd->ib_cq_handle;\r
+ else\r
+ req_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty;\r
+\r
+ /* \r
+ * IMPLEMENTATION NOTE:\r
+ * uDAPL allows consumers to post buffers on the EP after creation\r
+ * and before a connect request (outbound and inbound). This forces\r
+ * a binding to a device during the hca_open call and requires the\r
+ * consumer to predetermine which device to listen on or connect from.\r
+ * This restriction eliminates any option of listening or connecting \r
+ * over multiple devices. uDAPL should add API's to resolve addresses \r
+ * and bind to the device at the approriate time (before connect \r
+ * and after CR arrives). Discovery should happen at connection time \r
+ * based on addressing and not on static configuration during open.\r
+ */\r
+\r
+#ifdef _OPENIB_CMA_\r
+ /* Allocate CM and initialize lock */\r
+ if ((conn = dapls_ib_cm_create(ep_ptr)) == NULL)\r
+ return (dapl_convert_errno(ENOMEM, "create_cq"));\r
+\r
+ /* open identifies the local device; per DAT specification */\r
+ if (rdma_bind_addr(conn->cm_id,\r
+ (struct sockaddr *)&ia_ptr->hca_ptr->hca_address))\r
+ return (dapl_convert_errno(EAFNOSUPPORT, "create_cq"));\r
+#endif\r
+ /* Setup attributes and create qp */\r
+ dapl_os_memzero((void *)&qp_create, sizeof(qp_create));\r
+ qp_create.send_cq = req_cq;\r
+ qp_create.cap.max_send_wr = attr->max_request_dtos;\r
+ qp_create.cap.max_send_sge = attr->max_request_iov;\r
+ qp_create.cap.max_inline_data =\r
+ ia_ptr->hca_ptr->ib_trans.max_inline_send;\r
+ qp_create.qp_type = IBV_QPT_RC;\r
+ qp_create.qp_context = (void *)ep_ptr;\r
+\r
+#ifdef DAT_EXTENSIONS \r
+ if (attr->service_type == DAT_IB_SERVICE_TYPE_UD) {\r
+#ifdef _OPENIB_CMA_\r
+ return (DAT_NOT_IMPLEMENTED);\r
+#endif\r
+ qp_create.qp_type = IBV_QPT_UD;\r
+ if (attr->max_message_size >\r
+ (128 << ia_ptr->hca_ptr->ib_trans.mtu)) {\r
+ return (DAT_INVALID_PARAMETER | DAT_INVALID_ARG6);\r
+ }\r
+ }\r
+#endif\r
+ \r
+ /* ibv assumes rcv_cq is never NULL, set to req_cq */\r
+ if (rcv_cq == NULL) {\r
+ qp_create.recv_cq = req_cq;\r
+ qp_create.cap.max_recv_wr = 0;\r
+ qp_create.cap.max_recv_sge = 0;\r
+ } else {\r
+ qp_create.recv_cq = rcv_cq;\r
+ qp_create.cap.max_recv_wr = attr->max_recv_dtos;\r
+ qp_create.cap.max_recv_sge = attr->max_recv_iov;\r
+ }\r
+\r
+#ifdef _OPENIB_CMA_\r
+ if (rdma_create_qp(conn->cm_id, ib_pd_handle, &qp_create)) {\r
+ dapls_ib_cm_free(conn, ep_ptr);\r
+ return (dapl_convert_errno(errno, "create_qp"));\r
+ }\r
+ ep_ptr->qp_handle = conn->cm_id->qp;\r
+ ep_ptr->cm_handle = conn;\r
+ ep_ptr->qp_state = IBV_QPS_INIT;\r
+\r
+ ep_ptr->param.local_port_qual = rdma_get_src_port(conn->cm_id);\r
+#else\r
+ ep_ptr->qp_handle = ibv_create_qp(ib_pd_handle, &qp_create);\r
+ if (!ep_ptr->qp_handle)\r
+ return (dapl_convert_errno(ENOMEM, "create_qp"));\r
+ \r
+ /* Setup QP attributes for INIT state on the way out */\r
+ if (dapls_modify_qp_state(ep_ptr->qp_handle,\r
+ IBV_QPS_INIT, 0, 0, 0) != DAT_SUCCESS) {\r
+ ibv_destroy_qp(ep_ptr->qp_handle);\r
+ ep_ptr->qp_handle = IB_INVALID_HANDLE;\r
+ return DAT_INTERNAL_ERROR;\r
+ }\r
+#endif\r
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,\r
+ " qp_alloc: qpn %p sq %d,%d rq %d,%d\n",\r
+ ep_ptr->qp_handle->qp_num,\r
+ qp_create.cap.max_send_wr, qp_create.cap.max_send_sge,\r
+ qp_create.cap.max_recv_wr, qp_create.cap.max_recv_sge);\r
+\r
+ return DAT_SUCCESS;\r
+}\r
+\r
+/*\r
+ * dapl_ib_qp_free\r
+ *\r
+ * Free a QP\r
+ *\r
+ * Input:\r
+ * ia_handle IA handle\r
+ * *ep_ptr pointer to EP INFO\r
+ *\r
+ * Output:\r
+ * none\r
+ *\r
+ * Returns:\r
+ * DAT_SUCCESS\r
+ * dapl_convert_errno\r
+ *\r
+ */\r
+DAT_RETURN dapls_ib_qp_free(IN DAPL_IA * ia_ptr, IN DAPL_EP * ep_ptr)\r
+{\r
+ dapl_dbg_log(DAPL_DBG_TYPE_EP, " qp_free: ep_ptr %p qp %p\n",\r
+ ep_ptr, ep_ptr->qp_handle);\r
+\r
+ if (ep_ptr->cm_handle != NULL) {\r
+ dapls_ib_cm_free(ep_ptr->cm_handle, ep_ptr);\r
+ }\r
+ \r
+ if (ep_ptr->qp_handle != NULL) {\r
+ /* force error state to flush queue, then destroy */\r
+ dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, 0,0,0);\r
+\r
+ if (ibv_destroy_qp(ep_ptr->qp_handle))\r
+ return (dapl_convert_errno(errno, "destroy_qp"));\r
+\r
+ ep_ptr->qp_handle = NULL;\r
+ }\r
+\r
+#ifdef DAT_EXTENSIONS\r
+ /* UD endpoints can have many CR associations and will not\r
+ * set ep->cm_handle. Call provider with cm_ptr null to incidate\r
+ * UD type multi CR's for this EP. It will parse internal list\r
+ * and cleanup all associations.\r
+ */\r
+ if (ep_ptr->param.ep_attr.service_type == DAT_IB_SERVICE_TYPE_UD) \r
+ dapls_ib_cm_free(NULL, ep_ptr);\r
+#endif\r
+\r
+ return DAT_SUCCESS;\r
+}\r
+\r
+/*\r
+ * dapl_ib_qp_modify\r
+ *\r
+ * Set the QP to the parameters specified in an EP_PARAM\r
+ *\r
+ * The EP_PARAM structure that is provided has been\r
+ * sanitized such that only non-zero values are valid.\r
+ *\r
+ * Input:\r
+ * ib_hca_handle HCA handle\r
+ * qp_handle QP handle\r
+ * ep_attr Sanitized EP Params\r
+ *\r
+ * Output:\r
+ * none\r
+ *\r
+ * Returns:\r
+ * DAT_SUCCESS\r
+ * DAT_INSUFFICIENT_RESOURCES\r
+ * DAT_INVALID_PARAMETER\r
+ *\r
+ */\r
+DAT_RETURN\r
+dapls_ib_qp_modify(IN DAPL_IA * ia_ptr,\r
+ IN DAPL_EP * ep_ptr, IN DAT_EP_ATTR * attr)\r
+{\r
+ struct ibv_qp_attr qp_attr;\r
+\r
+ if (ep_ptr->qp_handle == IB_INVALID_HANDLE)\r
+ return DAT_INVALID_PARAMETER;\r
+\r
+ /* \r
+ * EP state, qp_handle state should be an indication\r
+ * of current state but the only way to be sure is with\r
+ * a user mode ibv_query_qp call which is NOT available \r
+ */\r
+\r
+ /* move to error state if necessary */\r
+ if ((ep_ptr->qp_state == IB_QP_STATE_ERROR) &&\r
+ (ep_ptr->qp_handle->state != IBV_QPS_ERR)) {\r
+ return (dapls_modify_qp_state(ep_ptr->qp_handle, \r
+ IBV_QPS_ERR, 0, 0, 0));\r
+ }\r
+\r
+ /*\r
+ * Check if we have the right qp_state to modify attributes\r
+ */\r
+ if ((ep_ptr->qp_handle->state != IBV_QPS_RTR) &&\r
+ (ep_ptr->qp_handle->state != IBV_QPS_RTS))\r
+ return DAT_INVALID_STATE;\r
+\r
+ /* Adjust to current EP attributes */\r
+ dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));\r
+ qp_attr.cap.max_send_wr = attr->max_request_dtos;\r
+ qp_attr.cap.max_recv_wr = attr->max_recv_dtos;\r
+ qp_attr.cap.max_send_sge = attr->max_request_iov;\r
+ qp_attr.cap.max_recv_sge = attr->max_recv_iov;\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,\r
+ "modify_qp: qp %p sq %d,%d, rq %d,%d\n",\r
+ ep_ptr->qp_handle,\r
+ qp_attr.cap.max_send_wr, qp_attr.cap.max_send_sge,\r
+ qp_attr.cap.max_recv_wr, qp_attr.cap.max_recv_sge);\r
+\r
+ if (ibv_modify_qp(ep_ptr->qp_handle, &qp_attr, IBV_QP_CAP)) {\r
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,\r
+ "modify_qp: modify ep %p qp %p failed\n",\r
+ ep_ptr, ep_ptr->qp_handle);\r
+ return (dapl_convert_errno(errno, "modify_qp_state"));\r
+ }\r
+\r
+ return DAT_SUCCESS;\r
+}\r
+\r
+/*\r
+ * dapls_ib_reinit_ep\r
+ *\r
+ * Move the QP to INIT state again.\r
+ *\r
+ * Input:\r
+ * ep_ptr DAPL_EP\r
+ *\r
+ * Output:\r
+ * none\r
+ *\r
+ * Returns:\r
+ * void\r
+ *\r
+ */\r
+#if defined(_WIN32) || defined(_WIN64) || defined(_OPENIB_CMA_)\r
+void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr)\r
+{\r
+ /* work around bug in low level driver - 3/24/09 */\r
+ /* RTS -> RESET -> INIT -> ERROR QP transition crashes system */\r
+ if (ep_ptr->qp_handle != IB_INVALID_HANDLE) {\r
+ dapls_ib_qp_free(ep_ptr->header.owner_ia, ep_ptr);\r
+ dapls_ib_qp_alloc(ep_ptr->header.owner_ia, ep_ptr, ep_ptr);\r
+ }\r
+}\r
+#else // _WIN32 || _WIN64\r
+void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr)\r
+{\r
+ if (ep_ptr->qp_handle != IB_INVALID_HANDLE &&\r
+ ep_ptr->qp_handle->qp_type != IBV_QPT_UD) {\r
+ /* move to RESET state and then to INIT */\r
+ dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_RESET,0,0,0);\r
+ dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_INIT,0,0,0);\r
+ }\r
+}\r
+#endif // _WIN32 || _WIN64\r
+\r
+/* \r
+ * Generic QP modify for init, reset, error, RTS, RTR\r
+ * For UD, create_ah on RTR, qkey on INIT\r
+ * CM msg provides QP attributes, info in network order\r
+ */\r
+DAT_RETURN\r
+dapls_modify_qp_state(IN ib_qp_handle_t qp_handle,\r
+ IN ib_qp_state_t qp_state, \r
+ IN uint32_t qpn,\r
+ IN uint16_t lid,\r
+ IN ib_gid_handle_t gid)\r
+{\r
+ struct ibv_qp_attr qp_attr;\r
+ enum ibv_qp_attr_mask mask = IBV_QP_STATE;\r
+ DAPL_EP *ep_ptr = (DAPL_EP *) qp_handle->qp_context;\r
+ DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;\r
+ int ret;\r
+\r
+ dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));\r
+ qp_attr.qp_state = qp_state;\r
+ \r
+ switch (qp_state) {\r
+ case IBV_QPS_RTR:\r
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,\r
+ " QPS_RTR: type %d qpn 0x%x lid 0x%x"\r
+ " port %d ep %p qp_state %d \n",\r
+ qp_handle->qp_type, \r
+ ntohl(qpn), ntohs(lid), \r
+ ia_ptr->hca_ptr->port_num,\r
+ ep_ptr, ep_ptr->qp_state);\r
+\r
+ mask |= IBV_QP_AV |\r
+ IBV_QP_PATH_MTU |\r
+ IBV_QP_DEST_QPN |\r
+ IBV_QP_RQ_PSN |\r
+ IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER;\r
+\r
+ qp_attr.dest_qp_num = ntohl(qpn);\r
+ qp_attr.rq_psn = 1;\r
+ qp_attr.path_mtu = ia_ptr->hca_ptr->ib_trans.mtu;\r
+ qp_attr.max_dest_rd_atomic =\r
+ ep_ptr->param.ep_attr.max_rdma_read_out;\r
+ qp_attr.min_rnr_timer =\r
+ ia_ptr->hca_ptr->ib_trans.rnr_timer;\r
+\r
+ /* address handle. RC and UD */\r
+ qp_attr.ah_attr.dlid = ntohs(lid);\r
+ if (ia_ptr->hca_ptr->ib_trans.global) {\r
+ qp_attr.ah_attr.is_global = 1;\r
+ qp_attr.ah_attr.grh.dgid.global.subnet_prefix = \r
+ ntohll(gid->global.subnet_prefix);\r
+ qp_attr.ah_attr.grh.dgid.global.interface_id = \r
+ ntohll(gid->global.interface_id);\r
+ qp_attr.ah_attr.grh.hop_limit =\r
+ ia_ptr->hca_ptr->ib_trans.hop_limit;\r
+ qp_attr.ah_attr.grh.traffic_class =\r
+ ia_ptr->hca_ptr->ib_trans.tclass;\r
+ }\r
+ qp_attr.ah_attr.sl = 0;\r
+ qp_attr.ah_attr.src_path_bits = 0;\r
+ qp_attr.ah_attr.port_num = ia_ptr->hca_ptr->port_num;\r
+\r
+ /* UD: already in RTR, RTS state */\r
+ if (qp_handle->qp_type == IBV_QPT_UD) {\r
+ mask = IBV_QP_STATE;\r
+ if (ep_ptr->qp_state == IBV_QPS_RTR ||\r
+ ep_ptr->qp_state == IBV_QPS_RTS)\r
+ return DAT_SUCCESS;\r
+ }\r
+ break;\r
+ case IBV_QPS_RTS:\r
+ if (qp_handle->qp_type == IBV_QPT_RC) {\r
+ mask |= IBV_QP_SQ_PSN |\r
+ IBV_QP_TIMEOUT |\r
+ IBV_QP_RETRY_CNT |\r
+ IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC;\r
+ qp_attr.timeout =\r
+ ia_ptr->hca_ptr->ib_trans.ack_timer;\r
+ qp_attr.retry_cnt =\r
+ ia_ptr->hca_ptr->ib_trans.ack_retry;\r
+ qp_attr.rnr_retry =\r
+ ia_ptr->hca_ptr->ib_trans.rnr_retry;\r
+ qp_attr.max_rd_atomic =\r
+ ep_ptr->param.ep_attr.max_rdma_read_out;\r
+ }\r
+ /* RC and UD */\r
+ qp_attr.qp_state = IBV_QPS_RTS;\r
+ qp_attr.sq_psn = 1;\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,\r
+ " QPS_RTS: psn %x rd_atomic %d ack %d "\r
+ " retry %d rnr_retry %d ep %p qp_state %d\n",\r
+ qp_attr.sq_psn, qp_attr.max_rd_atomic,\r
+ qp_attr.timeout, qp_attr.retry_cnt,\r
+ qp_attr.rnr_retry, ep_ptr,\r
+ ep_ptr->qp_state);\r
+\r
+ if (qp_handle->qp_type == IBV_QPT_UD) {\r
+ /* already RTS, multi remote AH's on QP */\r
+ if (ep_ptr->qp_state == IBV_QPS_RTS)\r
+ return DAT_SUCCESS;\r
+ else\r
+ mask = IBV_QP_STATE | IBV_QP_SQ_PSN;\r
+ }\r
+ break;\r
+ case IBV_QPS_INIT:\r
+ mask |= IBV_QP_PKEY_INDEX | IBV_QP_PORT;\r
+ if (qp_handle->qp_type == IBV_QPT_RC) {\r
+ mask |= IBV_QP_ACCESS_FLAGS;\r
+ qp_attr.qp_access_flags =\r
+ IBV_ACCESS_LOCAL_WRITE |\r
+ IBV_ACCESS_REMOTE_WRITE |\r
+ IBV_ACCESS_REMOTE_READ |\r
+ IBV_ACCESS_REMOTE_ATOMIC |\r
+ IBV_ACCESS_MW_BIND;\r
+ }\r
+\r
+ if (qp_handle->qp_type == IBV_QPT_UD) {\r
+ /* already INIT, multi remote AH's on QP */\r
+ if (ep_ptr->qp_state == IBV_QPS_INIT)\r
+ return DAT_SUCCESS;\r
+ mask |= IBV_QP_QKEY;\r
+ qp_attr.qkey = DAT_UD_QKEY;\r
+ }\r
+\r
+ qp_attr.pkey_index = 0;\r
+ qp_attr.port_num = ia_ptr->hca_ptr->port_num;\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_EP,\r
+ " QPS_INIT: pi %x port %x acc %x qkey 0x%x\n",\r
+ qp_attr.pkey_index, qp_attr.port_num,\r
+ qp_attr.qp_access_flags, qp_attr.qkey);\r
+ break;\r
+ default:\r
+ break;\r
+ }\r
+\r
+ ret = ibv_modify_qp(qp_handle, &qp_attr, mask);\r
+ if (ret == 0) {\r
+ ep_ptr->qp_state = qp_state;\r
+ return DAT_SUCCESS;\r
+ } else {\r
+ return (dapl_convert_errno(errno, "modify_qp_state"));\r
+ }\r
+}\r
+\r
+/* Modify UD type QP from init, rtr, rts, info network order */\r
+DAT_RETURN \r
+dapls_modify_qp_ud(IN DAPL_HCA *hca, IN ib_qp_handle_t qp)\r
+{\r
+ struct ibv_qp_attr qp_attr;\r
+\r
+ /* modify QP, setup and prepost buffers */\r
+ dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));\r
+ qp_attr.qp_state = IBV_QPS_INIT;\r
+ qp_attr.pkey_index = 0;\r
+ qp_attr.port_num = hca->port_num;\r
+ qp_attr.qkey = DAT_UD_QKEY;\r
+ if (ibv_modify_qp(qp, &qp_attr, \r
+ IBV_QP_STATE |\r
+ IBV_QP_PKEY_INDEX |\r
+ IBV_QP_PORT |\r
+ IBV_QP_QKEY)) {\r
+ dapl_log(DAPL_DBG_TYPE_ERR,\r
+ " modify_ud_qp INIT: ERR %s\n", strerror(errno));\r
+ return (dapl_convert_errno(errno, "modify_qp"));\r
+ }\r
+ dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));\r
+ qp_attr.qp_state = IBV_QPS_RTR;\r
+ if (ibv_modify_qp(qp, &qp_attr,IBV_QP_STATE)) {\r
+ dapl_log(DAPL_DBG_TYPE_ERR, \r
+ " modify_ud_qp RTR: ERR %s\n", strerror(errno));\r
+ return (dapl_convert_errno(errno, "modify_qp"));\r
+ }\r
+ dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));\r
+ qp_attr.qp_state = IBV_QPS_RTS;\r
+ qp_attr.sq_psn = 1;\r
+ if (ibv_modify_qp(qp, &qp_attr, \r
+ IBV_QP_STATE | IBV_QP_SQ_PSN)) {\r
+ dapl_log(DAPL_DBG_TYPE_ERR,\r
+ " modify_ud_qp RTS: ERR %s\n", strerror(errno));\r
+ return (dapl_convert_errno(errno, "modify_qp"));\r
+ }\r
+ return DAT_SUCCESS;\r
+}\r
+\r
+/* Create address handle for remote QP, info in network order */\r
+ib_ah_handle_t \r
+dapls_create_ah(IN DAPL_HCA *hca,\r
+ IN ib_pd_handle_t pd,\r
+ IN ib_qp_handle_t qp,\r
+ IN uint16_t lid,\r
+ IN ib_gid_handle_t gid)\r
+{\r
+ struct ibv_qp_attr qp_attr;\r
+ ib_ah_handle_t ah;\r
+\r
+ if (qp->qp_type != IBV_QPT_UD)\r
+ return NULL;\r
+\r
+ dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));\r
+ qp_attr.qp_state = IBV_QP_STATE;\r
+\r
+ /* address handle. RC and UD */\r
+ qp_attr.ah_attr.dlid = ntohs(lid);\r
+ if (gid != NULL) {\r
+ dapl_log(DAPL_DBG_TYPE_CM, "dapl_create_ah: with GID\n");\r
+ qp_attr.ah_attr.is_global = 1;\r
+ qp_attr.ah_attr.grh.dgid.global.subnet_prefix = \r
+ ntohll(gid->global.subnet_prefix);\r
+ qp_attr.ah_attr.grh.dgid.global.interface_id = \r
+ ntohll(gid->global.interface_id);\r
+ qp_attr.ah_attr.grh.hop_limit = hca->ib_trans.hop_limit;\r
+ qp_attr.ah_attr.grh.traffic_class = hca->ib_trans.tclass;\r
+ }\r
+ qp_attr.ah_attr.sl = 0;\r
+ qp_attr.ah_attr.src_path_bits = 0;\r
+ qp_attr.ah_attr.port_num = hca->port_num;\r
+\r
+ dapl_log(DAPL_DBG_TYPE_CM, \r
+ " dapls_create_ah: port %x lid %x pd %p ctx %p handle 0x%x\n", \r
+ hca->port_num,qp_attr.ah_attr.dlid, pd, pd->context, pd->handle);\r
+\r
+ /* UD: create AH for remote side */\r
+ ah = ibv_create_ah(pd, &qp_attr.ah_attr);\r
+ if (!ah) {\r
+ dapl_log(DAPL_DBG_TYPE_ERR,\r
+ " create_ah: ERR %s\n", strerror(errno));\r
+ return NULL;\r
+ }\r
+\r
+ dapl_log(DAPL_DBG_TYPE_CM, \r
+ " dapls_create_ah: AH %p for lid %x\n", \r
+ ah, qp_attr.ah_attr.dlid);\r
+\r
+ return ah;\r
+}\r
+\r
+/*\r
+ * Local variables:\r
+ * c-indent-level: 4\r
+ * c-basic-offset: 4\r
+ * tab-width: 8\r
+ * End:\r
+ */\r
-/*
- * This Software is licensed under one of the following licenses:
- *
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/cpl.php.
- *
- * 2) under the terms of the "The BSD License" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/bsd-license.php.
- *
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- * copy of which is available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/gpl-license.php.
- *
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
- */
-
-/***************************************************************************
- *
- * Module: uDAPL
- *
- * Filename: dapl_ib_util.c
- *
- * Author: Arlin Davis
- *
- * Created: 3/10/2005
- *
- * Description:
- *
- * The uDAPL openib provider - init, open, close, utilities
- *
- ****************************************************************************
- * Source Control System Information
- *
- * $Id: $
- *
- * Copyright (c) 2005 Intel Corporation. All rights reserved.
- *
- **************************************************************************/
-#ifdef RCSID
-static const char rcsid[] = "$Id: $";
-#endif
-
-#include "openib_osd.h"
-#include "dapl.h"
-#include "dapl_adapter_util.h"
-#include "dapl_ib_util.h"
-#include "dapl_osd.h"
-
-#include <stdlib.h>
-
-ib_thread_state_t g_ib_thread_state = 0;
-DAPL_OS_THREAD g_ib_thread;
-DAPL_OS_LOCK g_hca_lock;
-struct dapl_llist_entry *g_hca_list;
-
-void dapli_thread(void *arg);
-DAT_RETURN dapli_ib_thread_init(void);
-void dapli_ib_thread_destroy(void);
-
-#if defined(_WIN64) || defined(_WIN32)
-#include "..\..\..\..\..\etc\user\comp_channel.cpp"
-#include <rdma\winverbs.h>
-
-static COMP_SET ufds;
-
-static int dapls_os_init(void)
-{
- return CompSetInit(&ufds);
-}
-
-static void dapls_os_release(void)
-{
- CompSetCleanup(&ufds);
-}
-
-static int dapls_config_verbs(struct ibv_context *verbs)
-{
- verbs->channel.Milliseconds = 0;
- return 0;
-}
-
-static int dapls_config_comp_channel(struct ibv_comp_channel *channel)
-{
- channel->comp_channel.Milliseconds = 0;
- return 0;
-}
-
-static int dapls_thread_signal(void)
-{
- CompSetCancel(&ufds);
- return 0;
-}
-#else // _WIN64 || WIN32
-int g_ib_pipe[2];
-
-static int dapls_os_init(void)
-{
- /* create pipe for waking up work thread */
- return pipe(g_ib_pipe);
-}
-
-static void dapls_os_release(void)
-{
- /* close pipe? */
-}
-
-static int dapls_config_fd(int fd)
-{
- int opts;
-
- opts = fcntl(fd, F_GETFL);
- if (opts < 0 || fcntl(fd, F_SETFL, opts | O_NONBLOCK) < 0) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " dapls_config_fd: fcntl on fd %d ERR %d %s\n",
- fd, opts, strerror(errno));
- return errno;
- }
-
- return 0;
-}
-
-static int dapls_config_verbs(struct ibv_context *verbs)
-{
- return dapls_config_fd(verbs->async_fd);
-}
-
-static int dapls_config_comp_channel(struct ibv_comp_channel *channel)
-{
- return dapls_config_fd(channel->fd);
-}
-
-static int dapls_thread_signal(void)
-{
- return write(g_ib_pipe[1], "w", sizeof "w");
-}
-#endif
-
-
-static int32_t create_cr_pipe(IN DAPL_HCA * hca_ptr)
-{
- DAPL_SOCKET listen_socket;
- struct sockaddr_in addr;
- socklen_t addrlen = sizeof(addr);
- int ret;
-
- listen_socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
- if (listen_socket == DAPL_INVALID_SOCKET)
- return 1;
-
- memset(&addr, 0, sizeof addr);
- addr.sin_family = AF_INET;
- addr.sin_addr.s_addr = htonl(0x7f000001);
- ret = bind(listen_socket, (struct sockaddr *)&addr, sizeof addr);
- if (ret)
- goto err1;
-
- ret = getsockname(listen_socket, (struct sockaddr *)&addr, &addrlen);
- if (ret)
- goto err1;
-
- ret = listen(listen_socket, 0);
- if (ret)
- goto err1;
-
- hca_ptr->ib_trans.scm[1] = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
- if (hca_ptr->ib_trans.scm[1] == DAPL_INVALID_SOCKET)
- goto err1;
-
- ret = connect(hca_ptr->ib_trans.scm[1],
- (struct sockaddr *)&addr, sizeof(addr));
- if (ret)
- goto err2;
-
- hca_ptr->ib_trans.scm[0] = accept(listen_socket, NULL, NULL);
- if (hca_ptr->ib_trans.scm[0] == DAPL_INVALID_SOCKET)
- goto err2;
-
- closesocket(listen_socket);
- return 0;
-
- err2:
- closesocket(hca_ptr->ib_trans.scm[1]);
- err1:
- closesocket(listen_socket);
- return 1;
-}
-
-static void destroy_cr_pipe(IN DAPL_HCA * hca_ptr)
-{
- closesocket(hca_ptr->ib_trans.scm[0]);
- closesocket(hca_ptr->ib_trans.scm[1]);
-}
-
-
-/*
- * dapls_ib_init, dapls_ib_release
- *
- * Initialize Verb related items for device open
- *
- * Input:
- * none
- *
- * Output:
- * none
- *
- * Returns:
- * 0 success, -1 error
- *
- */
-int32_t dapls_ib_init(void)
-{
- /* initialize hca_list */
- dapl_os_lock_init(&g_hca_lock);
- dapl_llist_init_head(&g_hca_list);
-
- if (dapls_os_init())
- return 1;
-
- return 0;
-}
-
-int32_t dapls_ib_release(void)
-{
- dapli_ib_thread_destroy();
- dapls_os_release();
- return 0;
-}
-
-/*
- * dapls_ib_open_hca
- *
- * Open HCA
- *
- * Input:
- * *hca_name pointer to provider device name
- * *ib_hca_handle_p pointer to provide HCA handle
- *
- * Output:
- * none
- *
- * Return:
- * DAT_SUCCESS
- * dapl_convert_errno
- *
- */
-DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, IN DAPL_HCA * hca_ptr)
-{
- struct ibv_device **dev_list;
- struct ibv_port_attr port_attr;
- int i;
- DAT_RETURN dat_status;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " open_hca: %s - %p\n", hca_name, hca_ptr);
-
- /* get the IP address of the device */
- dat_status = getlocalipaddr((DAT_SOCK_ADDR *) &hca_ptr->hca_address,
- sizeof(DAT_SOCK_ADDR6));
- if (dat_status != DAT_SUCCESS)
- return dat_status;
-
-#ifdef DAPL_DBG
- /* DBG: unused port, set process id, lower 16 bits of pid */
- ((struct sockaddr_in *)&hca_ptr->hca_address)->sin_port =
- htons((uint16_t)dapl_os_getpid());
-#endif
- /* Get list of all IB devices, find match, open */
- dev_list = ibv_get_device_list(NULL);
- if (!dev_list) {
- dapl_dbg_log(DAPL_DBG_TYPE_ERR,
- " open_hca: ibv_get_device_list() failed\n",
- hca_name);
- return DAT_INTERNAL_ERROR;
- }
-
- for (i = 0; dev_list[i]; ++i) {
- hca_ptr->ib_trans.ib_dev = dev_list[i];
- if (!strcmp(ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
- hca_name))
- goto found;
- }
-
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: device %s not found\n", hca_name);
- goto err;
-
-found:
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " open_hca: Found dev %s %016llx\n",
- ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
- (unsigned long long)
- ntohll(ibv_get_device_guid(hca_ptr->ib_trans.ib_dev)));
-
- hca_ptr->ib_hca_handle = ibv_open_device(hca_ptr->ib_trans.ib_dev);
- if (!hca_ptr->ib_hca_handle) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: dev open failed for %s, err=%s\n",
- ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
- strerror(errno));
- goto err;
- }
- hca_ptr->ib_trans.ib_ctx = hca_ptr->ib_hca_handle;
- dapls_config_verbs(hca_ptr->ib_hca_handle);
-
- /* get lid for this hca-port, network order */
- if (ibv_query_port(hca_ptr->ib_hca_handle,
- (uint8_t) hca_ptr->port_num, &port_attr)) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: get lid ERR for %s, err=%s\n",
- ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
- strerror(errno));
- goto err;
- } else {
- hca_ptr->ib_trans.lid = htons(port_attr.lid);
- }
-
- /* get gid for this hca-port, network order */
- if (ibv_query_gid(hca_ptr->ib_hca_handle,
- (uint8_t) hca_ptr->port_num,
- 0, &hca_ptr->ib_trans.gid)) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: query GID ERR for %s, err=%s\n",
- ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
- strerror(errno));
- goto err;
- }
-
- /* set RC tunables via enviroment or default */
- hca_ptr->ib_trans.max_inline_send =
- dapl_os_get_env_val("DAPL_MAX_INLINE", INLINE_SEND_DEFAULT);
- hca_ptr->ib_trans.ack_retry =
- dapl_os_get_env_val("DAPL_ACK_RETRY", SCM_ACK_RETRY);
- hca_ptr->ib_trans.ack_timer =
- dapl_os_get_env_val("DAPL_ACK_TIMER", SCM_ACK_TIMER);
- hca_ptr->ib_trans.rnr_retry =
- dapl_os_get_env_val("DAPL_RNR_RETRY", SCM_RNR_RETRY);
- hca_ptr->ib_trans.rnr_timer =
- dapl_os_get_env_val("DAPL_RNR_TIMER", SCM_RNR_TIMER);
- hca_ptr->ib_trans.global =
- dapl_os_get_env_val("DAPL_GLOBAL_ROUTING", SCM_GLOBAL);
- hca_ptr->ib_trans.hop_limit =
- dapl_os_get_env_val("DAPL_HOP_LIMIT", SCM_HOP_LIMIT);
- hca_ptr->ib_trans.tclass =
- dapl_os_get_env_val("DAPL_TCLASS", SCM_TCLASS);
- hca_ptr->ib_trans.mtu =
- dapl_ib_mtu(dapl_os_get_env_val("DAPL_IB_MTU", SCM_IB_MTU));
-
-
- /* EVD events without direct CQ channels, CNO support */
- hca_ptr->ib_trans.ib_cq =
- ibv_create_comp_channel(hca_ptr->ib_hca_handle);
- if (hca_ptr->ib_trans.ib_cq == NULL) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: ibv_create_comp_channel ERR %s\n",
- strerror(errno));
- goto bail;
- }
- dapls_config_comp_channel(hca_ptr->ib_trans.ib_cq);
-
- dat_status = dapli_ib_thread_init();
- if (dat_status != DAT_SUCCESS) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: failed to init cq thread lock\n");
- goto bail;
- }
- /*
- * Put new hca_transport on list for async and CQ event processing
- * Wakeup work thread to add to polling list
- */
- dapl_llist_init_entry((DAPL_LLIST_ENTRY *)&hca_ptr->ib_trans.entry);
- dapl_os_lock(&g_hca_lock);
- dapl_llist_add_tail(&g_hca_list,
- (DAPL_LLIST_ENTRY *) &hca_ptr->ib_trans.entry,
- &hca_ptr->ib_trans.entry);
- if (dapls_thread_signal() == -1)
- dapl_log(DAPL_DBG_TYPE_UTIL,
- " open_hca: thread wakeup error = %s\n",
- strerror(errno));
- dapl_os_unlock(&g_hca_lock);
-
- /* initialize cr_list lock */
- dat_status = dapl_os_lock_init(&hca_ptr->ib_trans.lock);
- if (dat_status != DAT_SUCCESS) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: failed to init cr_list lock\n");
- goto bail;
- }
-
- /* initialize CM list for listens on this HCA */
- dapl_llist_init_head(&hca_ptr->ib_trans.list);
-
- /* initialize pipe, user level wakeup on select */
- if (create_cr_pipe(hca_ptr)) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: failed to init cr pipe - %s\n",
- strerror(errno));
- goto bail;
- }
-
- /* create thread to process inbound connect request */
- hca_ptr->ib_trans.cr_state = IB_THREAD_INIT;
- dat_status = dapl_os_thread_create(cr_thread,
- (void *)hca_ptr,
- &hca_ptr->ib_trans.thread);
- if (dat_status != DAT_SUCCESS) {
- dapl_log(DAPL_DBG_TYPE_ERR,
- " open_hca: failed to create thread\n");
- goto bail;
- }
-
- /* wait for thread */
- while (hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) {
- dapl_os_sleep_usec(1000);
- }
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " open_hca: devname %s, port %d, hostname_IP %s\n",
- ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
- hca_ptr->port_num, inet_ntoa(((struct sockaddr_in *)
- &hca_ptr->hca_address)->
- sin_addr));
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " open_hca: LID 0x%x GID Subnet 0x" F64x " ID 0x" F64x
- "\n", ntohs(hca_ptr->ib_trans.lid), (unsigned long long)
- htonll(hca_ptr->ib_trans.gid.global.subnet_prefix),
- (unsigned long long)htonll(hca_ptr->ib_trans.gid.global.
- interface_id));
-
- ibv_free_device_list(dev_list);
- return dat_status;
-
- bail:
- ibv_close_device(hca_ptr->ib_hca_handle);
- hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
- err:
- ibv_free_device_list(dev_list);
- return DAT_INTERNAL_ERROR;
-}
-
-/*
- * dapls_ib_close_hca
- *
- * Open HCA
- *
- * Input:
- * DAPL_HCA provide CA handle
- *
- * Output:
- * none
- *
- * Return:
- * DAT_SUCCESS
- * dapl_convert_errno
- *
- */
-DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA * hca_ptr)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " close_hca: %p\n", hca_ptr);
-
- if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) {
- if (ibv_close_device(hca_ptr->ib_hca_handle))
- return (dapl_convert_errno(errno, "ib_close_device"));
- hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
- }
-
- dapl_os_lock(&g_hca_lock);
- if (g_ib_thread_state != IB_THREAD_RUN) {
- dapl_os_unlock(&g_hca_lock);
- return (DAT_SUCCESS);
- }
- dapl_os_unlock(&g_hca_lock);
-
- /* destroy cr_thread and lock */
- hca_ptr->ib_trans.cr_state = IB_THREAD_CANCEL;
- send(hca_ptr->ib_trans.scm[1], "w", sizeof "w", 0);
- while (hca_ptr->ib_trans.cr_state != IB_THREAD_EXIT) {
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " close_hca: waiting for cr_thread\n");
- send(hca_ptr->ib_trans.scm[1], "w", sizeof "w", 0);
- dapl_os_sleep_usec(1000);
- }
- dapl_os_lock_destroy(&hca_ptr->ib_trans.lock);
- destroy_cr_pipe(hca_ptr); /* no longer need pipe */
-
- /*
- * Remove hca from async event processing list
- * Wakeup work thread to remove from polling list
- */
- hca_ptr->ib_trans.destroy = 1;
- if (dapls_thread_signal() == -1)
- dapl_log(DAPL_DBG_TYPE_UTIL,
- " destroy: thread wakeup error = %s\n",
- strerror(errno));
-
- /* wait for thread to remove HCA references */
- while (hca_ptr->ib_trans.destroy != 2) {
- if (dapls_thread_signal() == -1)
- dapl_log(DAPL_DBG_TYPE_UTIL,
- " destroy: thread wakeup error = %s\n",
- strerror(errno));
- dapl_os_sleep_usec(1000);
- }
-
- return (DAT_SUCCESS);
-}
-
-DAT_RETURN dapli_ib_thread_init(void)
-{
- DAT_RETURN dat_status;
-
- dapl_os_lock(&g_hca_lock);
- if (g_ib_thread_state != IB_THREAD_INIT) {
- dapl_os_unlock(&g_hca_lock);
- return DAT_SUCCESS;
- }
-
- g_ib_thread_state = IB_THREAD_CREATE;
- dapl_os_unlock(&g_hca_lock);
-
- /* create thread to process inbound connect request */
- dat_status = dapl_os_thread_create(dapli_thread, NULL, &g_ib_thread);
- if (dat_status != DAT_SUCCESS)
- return (dapl_convert_errno(errno,
- "create_thread ERR:"
- " check resource limits"));
-
- /* wait for thread to start */
- dapl_os_lock(&g_hca_lock);
- while (g_ib_thread_state != IB_THREAD_RUN) {
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " ib_thread_init: waiting for ib_thread\n");
- dapl_os_unlock(&g_hca_lock);
- dapl_os_sleep_usec(1000);
- dapl_os_lock(&g_hca_lock);
- }
- dapl_os_unlock(&g_hca_lock);
-
- return DAT_SUCCESS;
-}
-
-void dapli_ib_thread_destroy(void)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " ib_thread_destroy(%d)\n", dapl_os_getpid());
- /*
- * wait for async thread to terminate.
- * pthread_join would be the correct method
- * but some applications have some issues
- */
-
- /* destroy ib_thread, wait for termination, if not already */
- dapl_os_lock(&g_hca_lock);
- if (g_ib_thread_state != IB_THREAD_RUN)
- goto bail;
-
- g_ib_thread_state = IB_THREAD_CANCEL;
- while (g_ib_thread_state != IB_THREAD_EXIT) {
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " ib_thread_destroy: waiting for ib_thread\n");
- if (dapls_thread_signal() == -1)
- dapl_log(DAPL_DBG_TYPE_UTIL,
- " destroy: thread wakeup error = %s\n",
- strerror(errno));
- dapl_os_unlock(&g_hca_lock);
- dapl_os_sleep_usec(2000);
- dapl_os_lock(&g_hca_lock);
- }
-bail:
- dapl_os_unlock(&g_hca_lock);
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " ib_thread_destroy(%d) exit\n", dapl_os_getpid());
-}
-
-
-#if defined(_WIN64) || defined(_WIN32)
-/* work thread for uAT, uCM, CQ, and async events */
-void dapli_thread(void *arg)
-{
- struct _ib_hca_transport *hca;
- struct _ib_hca_transport *uhca[8];
- int ret, idx, cnt;
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread(%d,0x%x): ENTER: \n",
- dapl_os_getpid(), g_ib_thread);
-
- dapl_os_lock(&g_hca_lock);
- for (g_ib_thread_state = IB_THREAD_RUN;
- g_ib_thread_state == IB_THREAD_RUN;
- dapl_os_lock(&g_hca_lock)) {
-
- CompSetZero(&ufds);
- idx = 0;
- hca = dapl_llist_is_empty(&g_hca_list) ? NULL :
- dapl_llist_peek_head(&g_hca_list);
-
- while (hca) {
- CompSetAdd(&hca->ib_ctx->channel, &ufds);
- CompSetAdd(&hca->ib_cq->comp_channel, &ufds);
- uhca[idx++] = hca;
- hca = dapl_llist_next_entry(&g_hca_list,
- (DAPL_LLIST_ENTRY *)
- &hca->entry);
- }
- cnt = idx;
-
- dapl_os_unlock(&g_hca_lock);
- ret = CompSetPoll(&ufds, INFINITE);
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
- " ib_thread(%d) poll_event 0x%x\n",
- dapl_os_getpid(), ret);
-
-
- /* check and process ASYNC events, per device */
- for (idx = 0; idx < cnt; idx++) {
- if (uhca[idx]->destroy == 1) {
- dapl_os_lock(&g_hca_lock);
- dapl_llist_remove_entry(&g_hca_list,
- (DAPL_LLIST_ENTRY *)
- &uhca[idx]->entry);
- dapl_os_unlock(&g_hca_lock);
- uhca[idx]->destroy = 2;
- } else {
- dapli_cq_event_cb(uhca[idx]);
- dapli_async_event_cb(uhca[idx]);
- }
- }
- }
-
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread(%d) EXIT\n",
- dapl_os_getpid());
- g_ib_thread_state = IB_THREAD_EXIT;
- dapl_os_unlock(&g_hca_lock);
-}
-#else // _WIN64 || WIN32
-
-/* work thread for uAT, uCM, CQ, and async events */
-void dapli_thread(void *arg)
-{
- struct pollfd ufds[__FD_SETSIZE];
- struct _ib_hca_transport *uhca[__FD_SETSIZE] = { NULL };
- struct _ib_hca_transport *hca;
- int ret, idx, fds;
- char rbuf[2];
-
- dapl_dbg_log(DAPL_DBG_TYPE_THREAD,
- " ib_thread(%d,0x%x): ENTER: pipe %d \n",
- dapl_os_getpid(), g_ib_thread, g_ib_pipe[0]);
-
- /* Poll across pipe, CM, AT never changes */
- dapl_os_lock(&g_hca_lock);
- g_ib_thread_state = IB_THREAD_RUN;
-
- ufds[0].fd = g_ib_pipe[0]; /* pipe */
- ufds[0].events = POLLIN;
-
- while (g_ib_thread_state == IB_THREAD_RUN) {
-
- /* build ufds after pipe and uCMA events */
- ufds[0].revents = 0;
- idx = 0;
-
- /* Walk HCA list and setup async and CQ events */
- if (!dapl_llist_is_empty(&g_hca_list))
- hca = dapl_llist_peek_head(&g_hca_list);
- else
- hca = NULL;
-
- while (hca) {
-
- /* uASYNC events */
- ufds[++idx].fd = hca->ib_ctx->async_fd;
- ufds[idx].events = POLLIN;
- ufds[idx].revents = 0;
- uhca[idx] = hca;
-
- /* CQ events are non-direct with CNO's */
- ufds[++idx].fd = hca->ib_cq->fd;
- ufds[idx].events = POLLIN;
- ufds[idx].revents = 0;
- uhca[idx] = hca;
-
- dapl_dbg_log(DAPL_DBG_TYPE_THREAD,
- " ib_thread(%d) poll_fd: hca[%d]=%p,"
- " async=%d pipe=%d \n",
- dapl_os_getpid(), hca, ufds[idx - 1].fd,
- ufds[0].fd);
-
- hca = dapl_llist_next_entry(&g_hca_list,
- (DAPL_LLIST_ENTRY *)
- &hca->entry);
- }
-
- /* unlock, and setup poll */
- fds = idx + 1;
- dapl_os_unlock(&g_hca_lock);
- ret = poll(ufds, fds, -1);
- if (ret <= 0) {
- dapl_dbg_log(DAPL_DBG_TYPE_THREAD,
- " ib_thread(%d): ERR %s poll\n",
- dapl_os_getpid(), strerror(errno));
- dapl_os_lock(&g_hca_lock);
- continue;
- }
-
- dapl_dbg_log(DAPL_DBG_TYPE_THREAD,
- " ib_thread(%d) poll_event: "
- " async=0x%x pipe=0x%x \n",
- dapl_os_getpid(), ufds[idx].revents,
- ufds[0].revents);
-
- /* check and process CQ and ASYNC events, per device */
- for (idx = 1; idx < fds; idx++) {
- if (ufds[idx].revents == POLLIN) {
- dapli_cq_event_cb(uhca[idx]);
- dapli_async_event_cb(uhca[idx]);
- }
- }
-
- /* check and process user events, PIPE */
- if (ufds[0].revents == POLLIN) {
- if (read(g_ib_pipe[0], rbuf, 2) == -1)
- dapl_log(DAPL_DBG_TYPE_THREAD,
- " cr_thread: pipe rd err= %s\n",
- strerror(errno));
-
- /* cleanup any device on list marked for destroy */
- for (idx = 1; idx < fds; idx++) {
- if (uhca[idx] && uhca[idx]->destroy == 1) {
- dapl_os_lock(&g_hca_lock);
- dapl_llist_remove_entry(
- &g_hca_list,
- (DAPL_LLIST_ENTRY*)
- &uhca[idx]->entry);
- dapl_os_unlock(&g_hca_lock);
- uhca[idx]->destroy = 2;
- }
- }
- }
- dapl_os_lock(&g_hca_lock);
- }
-
- dapl_dbg_log(DAPL_DBG_TYPE_THREAD, " ib_thread(%d) EXIT\n",
- dapl_os_getpid());
- g_ib_thread_state = IB_THREAD_EXIT;
- dapl_os_unlock(&g_hca_lock);
-}
-#endif
+/*\r
+ * This Software is licensed under one of the following licenses:\r
+ *\r
+ * 1) under the terms of the "Common Public License 1.0" a copy of which is\r
+ * available from the Open Source Initiative, see\r
+ * http://www.opensource.org/licenses/cpl.php.\r
+ *\r
+ * 2) under the terms of the "The BSD License" a copy of which is\r
+ * available from the Open Source Initiative, see\r
+ * http://www.opensource.org/licenses/bsd-license.php.\r
+ *\r
+ * 3) under the terms of the "GNU General Public License (GPL) Version 2" a\r
+ * copy of which is available from the Open Source Initiative, see\r
+ * http://www.opensource.org/licenses/gpl-license.php.\r
+ *\r
+ * Licensee has the right to choose one of the above licenses.\r
+ *\r
+ * Redistributions of source code must retain the above copyright\r
+ * notice and one of the license notices.\r
+ *\r
+ * Redistributions in binary form must reproduce both the above copyright\r
+ * notice, one of the license notices in the documentation\r
+ * and/or other materials provided with the distribution.\r
+ */\r
+\r
+/***************************************************************************\r
+ *\r
+ * Module: uDAPL\r
+ *\r
+ * Filename: dapl_ib_util.c\r
+ *\r
+ * Author: Arlin Davis\r
+ *\r
+ * Created: 3/10/2005\r
+ *\r
+ * Description: \r
+ *\r
+ * The uDAPL openib provider - init, open, close, utilities\r
+ *\r
+ ****************************************************************************\r
+ * Source Control System Information\r
+ *\r
+ * $Id: $\r
+ *\r
+ * Copyright (c) 2005 Intel Corporation. All rights reserved.\r
+ *\r
+ **************************************************************************/\r
+#ifdef RCSID\r
+static const char rcsid[] = "$Id: $";\r
+#endif\r
+\r
+#include "openib_osd.h"\r
+#include "dapl.h"\r
+#include "dapl_adapter_util.h"\r
+#include "dapl_ib_util.h"\r
+#include "dapl_osd.h"\r
+\r
+#include <stdlib.h>\r
+\r
+ib_thread_state_t g_ib_thread_state = 0;\r
+DAPL_OS_THREAD g_ib_thread;\r
+DAPL_OS_LOCK g_hca_lock;\r
+struct dapl_llist_entry *g_hca_list;\r
+\r
+void dapli_thread(void *arg);\r
+DAT_RETURN dapli_ib_thread_init(void);\r
+void dapli_ib_thread_destroy(void);\r
+\r
+#if defined(_WIN64) || defined(_WIN32)\r
+#include "..\..\..\..\..\etc\user\comp_channel.cpp"\r
+#include <rdma\winverbs.h>\r
+\r
+static COMP_SET ufds;\r
+\r
+static int dapls_os_init(void)\r
+{\r
+ return CompSetInit(&ufds);\r
+}\r
+\r
+static void dapls_os_release(void)\r
+{\r
+ CompSetCleanup(&ufds);\r
+}\r
+\r
+static int dapls_config_verbs(struct ibv_context *verbs)\r
+{\r
+ verbs->channel.Milliseconds = 0;\r
+ return 0;\r
+}\r
+\r
+static int dapls_config_comp_channel(struct ibv_comp_channel *channel)\r
+{\r
+ channel->comp_channel.Milliseconds = 0;\r
+ return 0;\r
+}\r
+\r
+static int dapls_thread_signal(void)\r
+{\r
+ CompSetCancel(&ufds);\r
+ return 0;\r
+}\r
+#else // _WIN64 || WIN32\r
+int g_ib_pipe[2];\r
+\r
+static int dapls_os_init(void)\r
+{\r
+ /* create pipe for waking up work thread */\r
+ return pipe(g_ib_pipe);\r
+}\r
+\r
+static void dapls_os_release(void)\r
+{\r
+ /* close pipe? */\r
+}\r
+\r
+static int dapls_config_fd(int fd)\r
+{\r
+ int opts;\r
+\r
+ opts = fcntl(fd, F_GETFL);\r
+ if (opts < 0 || fcntl(fd, F_SETFL, opts | O_NONBLOCK) < 0) {\r
+ dapl_log(DAPL_DBG_TYPE_ERR,\r
+ " dapls_config_fd: fcntl on fd %d ERR %d %s\n",\r
+ fd, opts, strerror(errno));\r
+ return errno;\r
+ }\r
+\r
+ return 0;\r
+}\r
+\r
+static int dapls_config_verbs(struct ibv_context *verbs)\r
+{\r
+ return dapls_config_fd(verbs->async_fd);\r
+}\r
+\r
+static int dapls_config_comp_channel(struct ibv_comp_channel *channel)\r
+{\r
+ return dapls_config_fd(channel->fd);\r
+}\r
+\r
+static int dapls_thread_signal(void)\r
+{\r
+ return write(g_ib_pipe[1], "w", sizeof "w");\r
+}\r
+#endif\r
+\r
+\r
+static int32_t create_cr_pipe(IN DAPL_HCA * hca_ptr)\r
+{\r
+ DAPL_SOCKET listen_socket;\r
+ struct sockaddr_in addr;\r
+ socklen_t addrlen = sizeof(addr);\r
+ int ret;\r
+\r
+ listen_socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);\r
+ if (listen_socket == DAPL_INVALID_SOCKET)\r
+ return 1;\r
+\r
+ memset(&addr, 0, sizeof addr);\r
+ addr.sin_family = AF_INET;\r
+ addr.sin_addr.s_addr = htonl(0x7f000001);\r
+ ret = bind(listen_socket, (struct sockaddr *)&addr, sizeof addr);\r
+ if (ret)\r
+ goto err1;\r
+\r
+ ret = getsockname(listen_socket, (struct sockaddr *)&addr, &addrlen);\r
+ if (ret)\r
+ goto err1;\r
+\r
+ ret = listen(listen_socket, 0);\r
+ if (ret)\r
+ goto err1;\r
+\r
+ hca_ptr->ib_trans.scm[1] = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);\r
+ if (hca_ptr->ib_trans.scm[1] == DAPL_INVALID_SOCKET)\r
+ goto err1;\r
+\r
+ ret = connect(hca_ptr->ib_trans.scm[1], \r
+ (struct sockaddr *)&addr, sizeof(addr));\r
+ if (ret)\r
+ goto err2;\r
+\r
+ hca_ptr->ib_trans.scm[0] = accept(listen_socket, NULL, NULL);\r
+ if (hca_ptr->ib_trans.scm[0] == DAPL_INVALID_SOCKET)\r
+ goto err2;\r
+\r
+ closesocket(listen_socket);\r
+ return 0;\r
+\r
+ err2:\r
+ closesocket(hca_ptr->ib_trans.scm[1]);\r
+ err1:\r
+ closesocket(listen_socket);\r
+ return 1;\r
+}\r
+\r
+static void destroy_cr_pipe(IN DAPL_HCA * hca_ptr)\r
+{\r
+ closesocket(hca_ptr->ib_trans.scm[0]);\r
+ closesocket(hca_ptr->ib_trans.scm[1]);\r
+}\r
+\r
+\r
+/*\r
+ * dapls_ib_init, dapls_ib_release\r
+ *\r
+ * Initialize Verb related items for device open\r
+ *\r
+ * Input:\r
+ * none\r
+ *\r
+ * Output:\r
+ * none\r
+ *\r
+ * Returns:\r
+ * 0 success, -1 error\r
+ *\r
+ */\r
+int32_t dapls_ib_init(void)\r
+{\r
+ /* initialize hca_list */\r
+ dapl_os_lock_init(&g_hca_lock);\r
+ dapl_llist_init_head(&g_hca_list);\r
+\r
+ if (dapls_os_init())\r
+ return 1;\r
+\r
+ return 0;\r
+}\r
+\r
+int32_t dapls_ib_release(void)\r
+{\r
+ dapli_ib_thread_destroy();\r
+ dapls_os_release();\r
+ return 0;\r
+}\r
+\r
+/*\r
+ * dapls_ib_open_hca\r
+ *\r
+ * Open HCA\r
+ *\r
+ * Input:\r
+ * *hca_name pointer to provider device name\r
+ * *ib_hca_handle_p pointer to provide HCA handle\r
+ *\r
+ * Output:\r
+ * none\r
+ *\r
+ * Return:\r
+ * DAT_SUCCESS\r
+ * dapl_convert_errno\r
+ *\r
+ */\r
+DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, IN DAPL_HCA * hca_ptr)\r
+{\r
+ struct ibv_device **dev_list;\r
+ struct ibv_port_attr port_attr;\r
+ int i;\r
+ DAT_RETURN dat_status;\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+ " open_hca: %s - %p\n", hca_name, hca_ptr);\r
+\r
+ /* get the IP address of the device */\r
+ dat_status = getlocalipaddr((DAT_SOCK_ADDR *) &hca_ptr->hca_address,\r
+ sizeof(DAT_SOCK_ADDR6));\r
+ if (dat_status != DAT_SUCCESS)\r
+ return dat_status;\r
+\r
+#ifdef DAPL_DBG\r
+ /* DBG: unused port, set process id, lower 16 bits of pid */\r
+ ((struct sockaddr_in *)&hca_ptr->hca_address)->sin_port = \r
+ htons((uint16_t)dapl_os_getpid());\r
+#endif\r
+ /* Get list of all IB devices, find match, open */\r
+ dev_list = ibv_get_device_list(NULL);\r
+ if (!dev_list) {\r
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR,\r
+ " open_hca: ibv_get_device_list() failed\n",\r
+ hca_name);\r
+ return DAT_INTERNAL_ERROR;\r
+ }\r
+\r
+ for (i = 0; dev_list[i]; ++i) {\r
+ hca_ptr->ib_trans.ib_dev = dev_list[i];\r
+ if (!strcmp(ibv_get_device_name(hca_ptr->ib_trans.ib_dev),\r
+ hca_name))\r
+ goto found;\r
+ }\r
+\r
+ dapl_log(DAPL_DBG_TYPE_ERR,\r
+ " open_hca: device %s not found\n", hca_name);\r
+ goto err;\r
+\r
+found:\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " open_hca: Found dev %s %016llx\n",\r
+ ibv_get_device_name(hca_ptr->ib_trans.ib_dev),\r
+ (unsigned long long)\r
+ ntohll(ibv_get_device_guid(hca_ptr->ib_trans.ib_dev)));\r
+\r
+ hca_ptr->ib_hca_handle = ibv_open_device(hca_ptr->ib_trans.ib_dev);\r
+ if (!hca_ptr->ib_hca_handle) {\r
+ dapl_log(DAPL_DBG_TYPE_ERR,\r
+ " open_hca: dev open failed for %s, err=%s\n",\r
+ ibv_get_device_name(hca_ptr->ib_trans.ib_dev),\r
+ strerror(errno));\r
+ goto err;\r
+ }\r
+ hca_ptr->ib_trans.ib_ctx = hca_ptr->ib_hca_handle;\r
+ dapls_config_verbs(hca_ptr->ib_hca_handle);\r
+\r
+ /* get lid for this hca-port, network order */\r
+ if (ibv_query_port(hca_ptr->ib_hca_handle,\r
+ (uint8_t) hca_ptr->port_num, &port_attr)) {\r
+ dapl_log(DAPL_DBG_TYPE_ERR,\r
+ " open_hca: get lid ERR for %s, err=%s\n",\r
+ ibv_get_device_name(hca_ptr->ib_trans.ib_dev),\r
+ strerror(errno));\r
+ goto err;\r
+ } else {\r
+ hca_ptr->ib_trans.lid = htons(port_attr.lid);\r
+ }\r
+\r
+ /* get gid for this hca-port, network order */\r
+ if (ibv_query_gid(hca_ptr->ib_hca_handle,\r
+ (uint8_t) hca_ptr->port_num,\r
+ 0, &hca_ptr->ib_trans.gid)) {\r
+ dapl_log(DAPL_DBG_TYPE_ERR,\r
+ " open_hca: query GID ERR for %s, err=%s\n",\r
+ ibv_get_device_name(hca_ptr->ib_trans.ib_dev),\r
+ strerror(errno));\r
+ goto err;\r
+ }\r
+\r
+ /* set RC tunables via enviroment or default */\r
+ hca_ptr->ib_trans.max_inline_send =\r
+ dapl_os_get_env_val("DAPL_MAX_INLINE", INLINE_SEND_DEFAULT);\r
+ hca_ptr->ib_trans.ack_retry =\r
+ dapl_os_get_env_val("DAPL_ACK_RETRY", SCM_ACK_RETRY);\r
+ hca_ptr->ib_trans.ack_timer =\r
+ dapl_os_get_env_val("DAPL_ACK_TIMER", SCM_ACK_TIMER);\r
+ hca_ptr->ib_trans.rnr_retry =\r
+ dapl_os_get_env_val("DAPL_RNR_RETRY", SCM_RNR_RETRY);\r
+ hca_ptr->ib_trans.rnr_timer =\r
+ dapl_os_get_env_val("DAPL_RNR_TIMER", SCM_RNR_TIMER);\r
+ hca_ptr->ib_trans.global =\r
+ dapl_os_get_env_val("DAPL_GLOBAL_ROUTING", SCM_GLOBAL);\r
+ hca_ptr->ib_trans.hop_limit =\r
+ dapl_os_get_env_val("DAPL_HOP_LIMIT", SCM_HOP_LIMIT);\r
+ hca_ptr->ib_trans.tclass =\r
+ dapl_os_get_env_val("DAPL_TCLASS", SCM_TCLASS);\r
+ hca_ptr->ib_trans.mtu =\r
+ dapl_ib_mtu(dapl_os_get_env_val("DAPL_IB_MTU", SCM_IB_MTU));\r
+\r
+\r
+ /* EVD events without direct CQ channels, CNO support */\r
+ hca_ptr->ib_trans.ib_cq =\r
+ ibv_create_comp_channel(hca_ptr->ib_hca_handle);\r
+ if (hca_ptr->ib_trans.ib_cq == NULL) {\r
+ dapl_log(DAPL_DBG_TYPE_ERR,\r
+ " open_hca: ibv_create_comp_channel ERR %s\n",\r
+ strerror(errno));\r
+ goto bail;\r
+ }\r
+ dapls_config_comp_channel(hca_ptr->ib_trans.ib_cq);\r
+ \r
+ dat_status = dapli_ib_thread_init();\r
+ if (dat_status != DAT_SUCCESS) {\r
+ dapl_log(DAPL_DBG_TYPE_ERR,\r
+ " open_hca: failed to init cq thread lock\n");\r
+ goto bail;\r
+ }\r
+ /* \r
+ * Put new hca_transport on list for async and CQ event processing \r
+ * Wakeup work thread to add to polling list\r
+ */\r
+ dapl_llist_init_entry((DAPL_LLIST_ENTRY *)&hca_ptr->ib_trans.entry);\r
+ dapl_os_lock(&g_hca_lock);\r
+ dapl_llist_add_tail(&g_hca_list,\r
+ (DAPL_LLIST_ENTRY *) &hca_ptr->ib_trans.entry,\r
+ &hca_ptr->ib_trans.entry);\r
+ if (dapls_thread_signal() == -1)\r
+ dapl_log(DAPL_DBG_TYPE_UTIL,\r
+ " open_hca: thread wakeup error = %s\n",\r
+ strerror(errno));\r
+ dapl_os_unlock(&g_hca_lock);\r
+\r
+ /* initialize cr_list lock */\r
+ dat_status = dapl_os_lock_init(&hca_ptr->ib_trans.lock);\r
+ if (dat_status != DAT_SUCCESS) {\r
+ dapl_log(DAPL_DBG_TYPE_ERR,\r
+ " open_hca: failed to init cr_list lock\n");\r
+ goto bail;\r
+ }\r
+\r
+ /* initialize CM list for listens on this HCA */\r
+ dapl_llist_init_head(&hca_ptr->ib_trans.list);\r
+\r
+ /* initialize pipe, user level wakeup on select */\r
+ if (create_cr_pipe(hca_ptr)) {\r
+ dapl_log(DAPL_DBG_TYPE_ERR,\r
+ " open_hca: failed to init cr pipe - %s\n",\r
+ strerror(errno));\r
+ goto bail;\r
+ }\r
+\r
+ /* create thread to process inbound connect request */\r
+ hca_ptr->ib_trans.cr_state = IB_THREAD_INIT;\r
+ dat_status = dapl_os_thread_create(cr_thread,\r
+ (void *)hca_ptr,\r
+ &hca_ptr->ib_trans.thread);\r
+ if (dat_status != DAT_SUCCESS) {\r
+ dapl_log(DAPL_DBG_TYPE_ERR,\r
+ " open_hca: failed to create thread\n");\r
+ goto bail;\r
+ }\r
+\r
+ /* wait for thread */\r
+ while (hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) {\r
+ dapl_os_sleep_usec(1000);\r
+ }\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+ " open_hca: devname %s, port %d, hostname_IP %s\n",\r
+ ibv_get_device_name(hca_ptr->ib_trans.ib_dev),\r
+ hca_ptr->port_num, inet_ntoa(((struct sockaddr_in *)\r
+ &hca_ptr->hca_address)->\r
+ sin_addr));\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+ " open_hca: LID 0x%x GID Subnet 0x" F64x " ID 0x" F64x\r
+ "\n", ntohs(hca_ptr->ib_trans.lid), (unsigned long long)\r
+ htonll(hca_ptr->ib_trans.gid.global.subnet_prefix),\r
+ (unsigned long long)htonll(hca_ptr->ib_trans.gid.global.\r
+ interface_id));\r
+\r
+ ibv_free_device_list(dev_list);\r
+ return dat_status;\r
+\r
+ bail:\r
+ ibv_close_device(hca_ptr->ib_hca_handle);\r
+ hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;\r
+ err:\r
+ ibv_free_device_list(dev_list);\r
+ return DAT_INTERNAL_ERROR;\r
+}\r
+\r
+/*\r
+ * dapls_ib_close_hca\r
+ *\r
+ * Open HCA\r
+ *\r
+ * Input:\r
+ * DAPL_HCA provide CA handle\r
+ *\r
+ * Output:\r
+ * none\r
+ *\r
+ * Return:\r
+ * DAT_SUCCESS\r
+ * dapl_convert_errno \r
+ *\r
+ */\r
+DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA * hca_ptr)\r
+{\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " close_hca: %p\n", hca_ptr);\r
+\r
+ dapl_os_lock(&g_hca_lock);\r
+ if (g_ib_thread_state != IB_THREAD_RUN) {\r
+ dapl_os_unlock(&g_hca_lock);\r
+ goto out;\r
+ }\r
+ dapl_os_unlock(&g_hca_lock);\r
+\r
+ /* destroy cr_thread and lock */\r
+ hca_ptr->ib_trans.cr_state = IB_THREAD_CANCEL;\r
+ send(hca_ptr->ib_trans.scm[1], "w", sizeof "w", 0);\r
+ while (hca_ptr->ib_trans.cr_state != IB_THREAD_EXIT) {\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+ " close_hca: waiting for cr_thread\n");\r
+ send(hca_ptr->ib_trans.scm[1], "w", sizeof "w", 0);\r
+ dapl_os_sleep_usec(1000);\r
+ }\r
+ dapl_os_lock_destroy(&hca_ptr->ib_trans.lock);\r
+ destroy_cr_pipe(hca_ptr); /* no longer need pipe */\r
+ \r
+ /* \r
+ * Remove hca from async event processing list\r
+ * Wakeup work thread to remove from polling list\r
+ */\r
+ hca_ptr->ib_trans.destroy = 1;\r
+ if (dapls_thread_signal() == -1)\r
+ dapl_log(DAPL_DBG_TYPE_UTIL,\r
+ " destroy: thread wakeup error = %s\n",\r
+ strerror(errno));\r
+\r
+ /* wait for thread to remove HCA references */\r
+ while (hca_ptr->ib_trans.destroy != 2) {\r
+ if (dapls_thread_signal() == -1)\r
+ dapl_log(DAPL_DBG_TYPE_UTIL,\r
+ " destroy: thread wakeup error = %s\n",\r
+ strerror(errno));\r
+ dapl_os_sleep_usec(1000);\r
+ }\r
+\r
+out:\r
+ if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) {\r
+ if (ibv_close_device(hca_ptr->ib_hca_handle))\r
+ return (dapl_convert_errno(errno, "ib_close_device"));\r
+ hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;\r
+ }\r
+ return (DAT_SUCCESS);\r
+}\r
+\r
+DAT_RETURN dapli_ib_thread_init(void)\r
+{\r
+ DAT_RETURN dat_status;\r
+\r
+ dapl_os_lock(&g_hca_lock);\r
+ if (g_ib_thread_state != IB_THREAD_INIT) {\r
+ dapl_os_unlock(&g_hca_lock);\r
+ return DAT_SUCCESS;\r
+ }\r
+\r
+ g_ib_thread_state = IB_THREAD_CREATE;\r
+ dapl_os_unlock(&g_hca_lock);\r
+\r
+ /* create thread to process inbound connect request */\r
+ dat_status = dapl_os_thread_create(dapli_thread, NULL, &g_ib_thread);\r
+ if (dat_status != DAT_SUCCESS)\r
+ return (dapl_convert_errno(errno,\r
+ "create_thread ERR:"\r
+ " check resource limits"));\r
+\r
+ /* wait for thread to start */\r
+ dapl_os_lock(&g_hca_lock);\r
+ while (g_ib_thread_state != IB_THREAD_RUN) {\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+ " ib_thread_init: waiting for ib_thread\n");\r
+ dapl_os_unlock(&g_hca_lock);\r
+ dapl_os_sleep_usec(1000);\r
+ dapl_os_lock(&g_hca_lock);\r
+ }\r
+ dapl_os_unlock(&g_hca_lock);\r
+\r
+ return DAT_SUCCESS;\r
+}\r
+\r
+void dapli_ib_thread_destroy(void)\r
+{\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+ " ib_thread_destroy(%d)\n", dapl_os_getpid());\r
+ /* \r
+ * wait for async thread to terminate. \r
+ * pthread_join would be the correct method\r
+ * but some applications have some issues\r
+ */\r
+\r
+ /* destroy ib_thread, wait for termination, if not already */\r
+ dapl_os_lock(&g_hca_lock);\r
+ if (g_ib_thread_state != IB_THREAD_RUN)\r
+ goto bail;\r
+\r
+ g_ib_thread_state = IB_THREAD_CANCEL;\r
+ while (g_ib_thread_state != IB_THREAD_EXIT) {\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+ " ib_thread_destroy: waiting for ib_thread\n");\r
+ if (dapls_thread_signal() == -1)\r
+ dapl_log(DAPL_DBG_TYPE_UTIL,\r
+ " destroy: thread wakeup error = %s\n",\r
+ strerror(errno));\r
+ dapl_os_unlock(&g_hca_lock);\r
+ dapl_os_sleep_usec(2000);\r
+ dapl_os_lock(&g_hca_lock);\r
+ }\r
+bail:\r
+ dapl_os_unlock(&g_hca_lock);\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+ " ib_thread_destroy(%d) exit\n", dapl_os_getpid());\r
+}\r
+\r
+\r
+#if defined(_WIN64) || defined(_WIN32)\r
+/* work thread for uAT, uCM, CQ, and async events */\r
+void dapli_thread(void *arg)\r
+{\r
+ struct _ib_hca_transport *hca;\r
+ struct _ib_hca_transport *uhca[8];\r
+ int ret, idx, cnt;\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread(%d,0x%x): ENTER: \n",\r
+ dapl_os_getpid(), g_ib_thread);\r
+\r
+ dapl_os_lock(&g_hca_lock);\r
+ for (g_ib_thread_state = IB_THREAD_RUN;\r
+ g_ib_thread_state == IB_THREAD_RUN; \r
+ dapl_os_lock(&g_hca_lock)) {\r
+\r
+ CompSetZero(&ufds);\r
+ idx = 0;\r
+ hca = dapl_llist_is_empty(&g_hca_list) ? NULL :\r
+ dapl_llist_peek_head(&g_hca_list);\r
+\r
+ while (hca) {\r
+ CompSetAdd(&hca->ib_ctx->channel, &ufds);\r
+ CompSetAdd(&hca->ib_cq->comp_channel, &ufds);\r
+ uhca[idx++] = hca;\r
+ hca = dapl_llist_next_entry(&g_hca_list,\r
+ (DAPL_LLIST_ENTRY *)\r
+ &hca->entry);\r
+ }\r
+ cnt = idx;\r
+\r
+ dapl_os_unlock(&g_hca_lock);\r
+ ret = CompSetPoll(&ufds, INFINITE);\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL,\r
+ " ib_thread(%d) poll_event 0x%x\n",\r
+ dapl_os_getpid(), ret);\r
+\r
+\r
+ /* check and process ASYNC events, per device */\r
+ for (idx = 0; idx < cnt; idx++) {\r
+ if (uhca[idx]->destroy == 1) {\r
+ dapl_os_lock(&g_hca_lock);\r
+ dapl_llist_remove_entry(&g_hca_list,\r
+ (DAPL_LLIST_ENTRY *)\r
+ &uhca[idx]->entry);\r
+ dapl_os_unlock(&g_hca_lock);\r
+ uhca[idx]->destroy = 2;\r
+ } else {\r
+ dapli_cq_event_cb(uhca[idx]);\r
+ dapli_async_event_cb(uhca[idx]);\r
+ }\r
+ }\r
+ }\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ib_thread(%d) EXIT\n",\r
+ dapl_os_getpid());\r
+ g_ib_thread_state = IB_THREAD_EXIT;\r
+ dapl_os_unlock(&g_hca_lock);\r
+}\r
+#else // _WIN64 || WIN32\r
+\r
+/* work thread for uAT, uCM, CQ, and async events */\r
+void dapli_thread(void *arg)\r
+{\r
+ struct pollfd ufds[__FD_SETSIZE];\r
+ struct _ib_hca_transport *uhca[__FD_SETSIZE] = { NULL };\r
+ struct _ib_hca_transport *hca;\r
+ int ret, idx, fds;\r
+ char rbuf[2];\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_THREAD,\r
+ " ib_thread(%d,0x%x): ENTER: pipe %d \n",\r
+ dapl_os_getpid(), g_ib_thread, g_ib_pipe[0]);\r
+\r
+ /* Poll across pipe, CM, AT never changes */\r
+ dapl_os_lock(&g_hca_lock);\r
+ g_ib_thread_state = IB_THREAD_RUN;\r
+\r
+ ufds[0].fd = g_ib_pipe[0]; /* pipe */\r
+ ufds[0].events = POLLIN;\r
+\r
+ while (g_ib_thread_state == IB_THREAD_RUN) {\r
+\r
+ /* build ufds after pipe and uCMA events */\r
+ ufds[0].revents = 0;\r
+ idx = 0;\r
+\r
+ /* Walk HCA list and setup async and CQ events */\r
+ if (!dapl_llist_is_empty(&g_hca_list))\r
+ hca = dapl_llist_peek_head(&g_hca_list);\r
+ else\r
+ hca = NULL;\r
+\r
+ while (hca) {\r
+\r
+ /* uASYNC events */\r
+ ufds[++idx].fd = hca->ib_ctx->async_fd;\r
+ ufds[idx].events = POLLIN;\r
+ ufds[idx].revents = 0;\r
+ uhca[idx] = hca;\r
+\r
+ /* CQ events are non-direct with CNO's */\r
+ ufds[++idx].fd = hca->ib_cq->fd;\r
+ ufds[idx].events = POLLIN;\r
+ ufds[idx].revents = 0;\r
+ uhca[idx] = hca;\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_THREAD,\r
+ " ib_thread(%d) poll_fd: hca[%d]=%p,"\r
+ " async=%d pipe=%d \n",\r
+ dapl_os_getpid(), hca, ufds[idx - 1].fd,\r
+ ufds[0].fd);\r
+\r
+ hca = dapl_llist_next_entry(&g_hca_list,\r
+ (DAPL_LLIST_ENTRY *)\r
+ &hca->entry);\r
+ }\r
+\r
+ /* unlock, and setup poll */\r
+ fds = idx + 1;\r
+ dapl_os_unlock(&g_hca_lock);\r
+ ret = poll(ufds, fds, -1);\r
+ if (ret <= 0) {\r
+ dapl_dbg_log(DAPL_DBG_TYPE_THREAD,\r
+ " ib_thread(%d): ERR %s poll\n",\r
+ dapl_os_getpid(), strerror(errno));\r
+ dapl_os_lock(&g_hca_lock);\r
+ continue;\r
+ }\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_THREAD,\r
+ " ib_thread(%d) poll_event: "\r
+ " async=0x%x pipe=0x%x \n",\r
+ dapl_os_getpid(), ufds[idx].revents,\r
+ ufds[0].revents);\r
+\r
+ /* check and process CQ and ASYNC events, per device */\r
+ for (idx = 1; idx < fds; idx++) {\r
+ if (ufds[idx].revents == POLLIN) {\r
+ dapli_cq_event_cb(uhca[idx]);\r
+ dapli_async_event_cb(uhca[idx]);\r
+ }\r
+ }\r
+\r
+ /* check and process user events, PIPE */\r
+ if (ufds[0].revents == POLLIN) {\r
+ if (read(g_ib_pipe[0], rbuf, 2) == -1)\r
+ dapl_log(DAPL_DBG_TYPE_THREAD,\r
+ " cr_thread: pipe rd err= %s\n",\r
+ strerror(errno));\r
+\r
+ /* cleanup any device on list marked for destroy */\r
+ for (idx = 1; idx < fds; idx++) {\r
+ if (uhca[idx] && uhca[idx]->destroy == 1) {\r
+ dapl_os_lock(&g_hca_lock);\r
+ dapl_llist_remove_entry(\r
+ &g_hca_list,\r
+ (DAPL_LLIST_ENTRY*)\r
+ &uhca[idx]->entry);\r
+ dapl_os_unlock(&g_hca_lock);\r
+ uhca[idx]->destroy = 2;\r
+ }\r
+ }\r
+ }\r
+ dapl_os_lock(&g_hca_lock);\r
+ }\r
+\r
+ dapl_dbg_log(DAPL_DBG_TYPE_THREAD, " ib_thread(%d) EXIT\n",\r
+ dapl_os_getpid());\r
+ g_ib_thread_state = IB_THREAD_EXIT;\r
+ dapl_os_unlock(&g_hca_lock);\r
+}\r
+#endif\r
context->cmd_if->Release();\r
ibv_release();\r
delete vcontext->port;\r
+ CompChannelCleanup(&vcontext->context.channel);\r
delete vcontext;\r
return 0;\r
}\r
* Interfaces based on librdmacm 1.0.8.\r
*/\r
\r
+#define RDMA_MAX_PRIVATE_DATA 56\r
+\r
/*\r
* Upon receiving a device removal event, users must destroy the associated\r
* RDMA identifier and release all resources allocated with the device.\r
\r
struct cma_event {\r
struct rdma_cm_event event;\r
- uint8_t private_data[56];\r
+ uint8_t private_data[RDMA_MAX_PRIVATE_DATA];\r
struct cma_id_private *id_priv;\r
};\r
\r
evt->id_priv = CONTAINING_RECORD(id, struct cma_id_private, id);\r
evt->event.id = id;\r
evt->event.param.conn.private_data = evt->private_data;\r
+ evt->event.param.conn.private_data_len = RDMA_MAX_PRIVATE_DATA;\r
+\r
evt->event.status = id->ep.connect->\r
GetOverlappedResult(&entry->Overlap, &bytes, FALSE);\r
\r