#include "dapl_ib_util.h"
#include <sys/poll.h>
#include <signal.h>
+#include <rdma/rdma_cma_ib.h>
extern struct rdma_event_channel *g_cm_events;
(unsigned short)((SID % IB_PORT_MOD) + IB_PORT_BASE) :\
(unsigned short)SID)
-
static void dapli_addr_resolve(struct dapl_cm_id *conn)
{
int ret;
static void dapli_route_resolve(struct dapl_cm_id *conn)
{
int ret;
+ size_t optlen = sizeof(struct ib_cm_req_opt);
+ struct ib_cm_req_opt req_opt;
#ifdef DAPL_DBG
struct rdma_addr *ipaddr = &conn->cm_id->route.addr;
struct ib_addr *ibaddr = &conn->cm_id->route.addr.addr.ibaddr;
cpu_to_be64(ibaddr->dgid.global.interface_id));
dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " rdma_connect: cm_id %p pdata %p plen %d rr %d ind %d\n",
+ " route_resolve: cm_id %p pdata %p plen %d rr %d ind %d\n",
conn->cm_id,
conn->params.private_data,
conn->params.private_data_len,
conn->params.responder_resources,
conn->params.initiator_depth );
+ /* Get default connect request timeout values, and adjust */
+ ret = rdma_get_option(conn->cm_id, RDMA_PROTO_IB, IB_CM_REQ_OPTIONS,
+ (void*)&req_opt, &optlen);
+ if (ret) {
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR, " rdma_get_option failed: %s\n",
+ strerror(errno));
+ goto bail;
+ }
+
+ dapl_dbg_log(DAPL_DBG_TYPE_CM, " route_resolve: "
+ "Set CR times - response %d to %d, retry %d to %d\n",
+ req_opt.remote_cm_response_timeout,
+ conn->hca->ib_trans.max_cm_timeout,
+ req_opt.max_cm_retries,
+ conn->hca->ib_trans.max_cm_retries);
+
+ /* Use hca response time setting for connect requests */
+ req_opt.max_cm_retries = conn->hca->ib_trans.max_cm_retries;
+ req_opt.remote_cm_response_timeout =
+ conn->hca->ib_trans.max_cm_timeout;
+ req_opt.local_cm_response_timeout =
+ req_opt.remote_cm_response_timeout;
+ ret = rdma_set_option(conn->cm_id, RDMA_PROTO_IB, IB_CM_REQ_OPTIONS,
+ (void*)&req_opt, optlen);
+ if (ret) {
+ dapl_dbg_log(DAPL_DBG_TYPE_ERR, " rdma_set_option failed: %s\n",
+ strerror(errno));
+ goto bail;
+ }
+
ret = rdma_connect(conn->cm_id, &conn->params);
if (ret) {
dapl_dbg_log(DAPL_DBG_TYPE_ERR, " rdma_connect failed: %s\n",
}
dapl_os_unlock(&conn->lock);
+ /* There is a chance that we can get events after
+ * the consumer calls disconnect in a pending state
+ * since the IB CM and uDAPL states are not shared.
+ * In some cases, IB CM could generate either a DCONN
+ * or CONN_ERR after the consumer returned from
+ * dapl_ep_disconnect with a DISCONNECTED event
+ * already queued. Check state here and bail to
+ * avoid any events after a disconnect.
+ */
+ if (DAPL_BAD_HANDLE(conn->ep, DAPL_MAGIC_EP))
+ return;
+
+ dapl_os_lock(&conn->ep->header.lock);
+ if (conn->ep->param.ep_state == DAT_EP_STATE_DISCONNECTED) {
+ dapl_os_unlock(&conn->ep->header.lock);
+ return;
+ }
+ if (event->event == RDMA_CM_EVENT_DISCONNECTED)
+ conn->ep->param.ep_state = DAT_EP_STATE_DISCONNECTED;
+
+ dapl_os_unlock(&conn->ep->header.lock);
+
switch (event->event) {
case RDMA_CM_EVENT_UNREACHABLE:
case RDMA_CM_EVENT_CONNECT_ERROR:
- dapl_dbg_log(
- DAPL_DBG_TYPE_WARN,
- " dapli_cm_active_handler: CONN_ERR "
- " event=0x%x status=%d\n",
- event->event, event->status);
+ dapl_dbg_log(
+ DAPL_DBG_TYPE_WARN,
+ " dapli_cm_active_handler: CONN_ERR "
+ " event=0x%x status=%d %s\n",
+ event->event, event->status,
+ (event->status == -110)?"TIMEOUT":"" );
dapl_evd_connection_callback(conn,
IB_CME_DESTINATION_UNREACHABLE,
event->private_data, new_conn->sp);
break;
case RDMA_CM_EVENT_UNREACHABLE:
- dapls_cr_callback(conn, IB_CME_DESTINATION_UNREACHABLE,
- NULL, conn->sp);
-
case RDMA_CM_EVENT_CONNECT_ERROR:
dapl_dbg_log(
- DAPL_DBG_TYPE_WARN,
- " dapli_cm_passive: CONN_ERR "
- " event=0x%x status=%d",
- " on SRC 0x%x,0x%x DST 0x%x,0x%x\n",
- event->event, event->status,
- ntohl(((struct sockaddr_in *)
- &ipaddr->src_addr)->sin_addr.s_addr),
- ntohs(((struct sockaddr_in *)
- &ipaddr->src_addr)->sin_port),
- ntohl(((struct sockaddr_in *)
- &ipaddr->dst_addr)->sin_addr.s_addr),
- ntohs(((struct sockaddr_in *)
- &ipaddr->dst_addr)->sin_port));
+ DAPL_DBG_TYPE_WARN,
+ " dapli_cm_passive: CONN_ERR "
+ " event=0x%x status=%d %s"
+ " on SRC 0x%x,0x%x DST 0x%x,0x%x\n",
+ event->event, event->status,
+ (event->status == -110)?"TIMEOUT":"",
+ ntohl(((struct sockaddr_in *)
+ &ipaddr->src_addr)->sin_addr.s_addr),
+ ntohs(((struct sockaddr_in *)
+ &ipaddr->src_addr)->sin_port),
+ ntohl(((struct sockaddr_in *)
+ &ipaddr->dst_addr)->sin_addr.s_addr),
+ ntohs(((struct sockaddr_in *)
+ &ipaddr->dst_addr)->sin_port));
dapls_cr_callback(conn, IB_CME_DESTINATION_UNREACHABLE,
NULL, conn->sp);