From: Arlin Davis Date: Tue, 10 Sep 2013 16:19:18 +0000 (-0700) Subject: common: cleanup async event processing and logging X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=32ca84dd47db1fe780afc36bf7f6a1b6bdf9d5fc;p=~ardavis%2Fdapl.git common: cleanup async event processing and logging Add formatted string print for ib verbs async events Remove unecessary logging and duplicate async callbacks Modify all IB providers to use dapli_async_event_cb() Signed-off-by: Arlin Davis --- diff --git a/dapl/common/dapl_evd_un_async_error_callb.c b/dapl/common/dapl_evd_un_async_error_callb.c index b31fc4c..d6884c3 100644 --- a/dapl/common/dapl_evd_un_async_error_callb.c +++ b/dapl/common/dapl_evd_un_async_error_callb.c @@ -63,9 +63,6 @@ dapl_evd_un_async_error_callback(IN ib_hca_handle_t ib_hca_handle, DAT_EVENT_NUMBER async_event; DAT_RETURN dat_status; - dapl_log(DAPL_DBG_TYPE_WARN, " -- %s (%p, %p, %p)\n", - __FUNCTION__, ib_hca_handle, cause_ptr, context); - if (NULL == context) { dapl_os_panic("NULL == context\n"); return; @@ -86,6 +83,4 @@ dapl_evd_un_async_error_callback(IN ib_hca_handle_t ib_hca_handle, async_event, async_evd->header.owner_ia); } - dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK | DAPL_DBG_TYPE_EXCEPTION, - "dapl_evd_un_async_error_callback () returns\n"); } diff --git a/dapl/openib_common/cq.c b/dapl/openib_common/cq.c index d89f83e..5870991 100644 --- a/dapl/openib_common/cq.c +++ b/dapl/openib_common/cq.c @@ -114,41 +114,50 @@ DAT_RETURN dapls_ib_get_async_event(IN ib_error_record_t * err_record, DAT_RETURN dat_status = DAT_SUCCESS; int err_code = err_record->event_type; - switch (err_code) { + dapl_log(DAPL_DBG_TYPE_WARN, " WARNING: %s\n", dapl_ib_async_str(err_code)); + + switch (err_code) + { /* OVERFLOW error */ - case IBV_EVENT_CQ_ERR: - *async_event = DAT_ASYNC_ERROR_EVD_OVERFLOW; - break; + case IBV_EVENT_CQ_ERR: + *async_event = DAT_ASYNC_ERROR_EVD_OVERFLOW; + break; + /* INTERNAL errors */ - case IBV_EVENT_DEVICE_FATAL: - *async_event = DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR; - break; + case IBV_EVENT_DEVICE_FATAL: + case IBV_EVENT_PORT_ERR: + case IBV_EVENT_LID_CHANGE: + case IBV_EVENT_PKEY_CHANGE: + case IBV_EVENT_SM_CHANGE: /* CATASTROPHIC errors */ - case IBV_EVENT_PORT_ERR: - *async_event = DAT_ASYNC_ERROR_IA_CATASTROPHIC; - break; + *async_event = DAT_ASYNC_ERROR_IA_CATASTROPHIC; + break; + /* BROKEN QP error */ - case IBV_EVENT_SQ_DRAINED: - case IBV_EVENT_QP_FATAL: - case IBV_EVENT_QP_REQ_ERR: - case IBV_EVENT_QP_ACCESS_ERR: - *async_event = DAT_ASYNC_ERROR_EP_BROKEN; - break; + case IBV_EVENT_SQ_DRAINED: + case IBV_EVENT_QP_FATAL: + case IBV_EVENT_QP_REQ_ERR: + case IBV_EVENT_QP_ACCESS_ERR: + case IBV_EVENT_SRQ_ERR: + case IBV_EVENT_SRQ_LIMIT_REACHED: + case IBV_EVENT_QP_LAST_WQE_REACHED: + *async_event = DAT_ASYNC_ERROR_EP_BROKEN; + break; /* connection completion */ - case IBV_EVENT_COMM_EST: - *async_event = DAT_CONNECTION_EVENT_ESTABLISHED; - break; - - /* TODO: process HW state changes */ - case IBV_EVENT_PATH_MIG: - case IBV_EVENT_PATH_MIG_ERR: - case IBV_EVENT_PORT_ACTIVE: - case IBV_EVENT_LID_CHANGE: - case IBV_EVENT_PKEY_CHANGE: - case IBV_EVENT_SM_CHANGE: - default: - dat_status = DAT_ERROR(DAT_NOT_IMPLEMENTED, 0); + case IBV_EVENT_COMM_EST: + *async_event = DAT_CONNECTION_EVENT_ESTABLISHED; + break; + + /* non-catastrophic events */ + case IBV_EVENT_PATH_MIG: + case IBV_EVENT_PATH_MIG_ERR: + case IBV_EVENT_PORT_ACTIVE: + case IBV_EVENT_CLIENT_REREGISTER: + break; + + default: + dat_status = DAT_ERROR(DAT_NOT_IMPLEMENTED, 0); } return dat_status; } diff --git a/dapl/openib_common/dapl_ib_common.h b/dapl/openib_common/dapl_ib_common.h index 925aa25..5da127b 100644 --- a/dapl/openib_common/dapl_ib_common.h +++ b/dapl/openib_common/dapl_ib_common.h @@ -354,6 +354,7 @@ enum ibv_mtu dapl_ib_mtu(int mtu); char *dapl_ib_mtu_str(enum ibv_mtu mtu); int getipaddr_netdev(char *name, char *addr, int addr_len); DAT_RETURN getlocalipaddr(char *addr, int addr_len); +void dapli_async_event_cb(struct _ib_hca_transport *hca); /* qp.c */ DAT_RETURN dapls_modify_qp_ud(IN DAPL_HCA *hca, IN struct ibv_qp *qp); @@ -451,4 +452,29 @@ STATIC _INLINE_ char * dapl_cm_op_str(IN int op) return ((op < 1 || op > 7) ? "Invalid OP?" : ops[op]); } +static inline char * dapl_ib_async_str(IN int st) +{ + static char *state[] = { + "IBV_EVENT_CQ_ERR", + "IBV_EVENT_QP_FATAL", + "IBV_EVENT_QP_REQ_ERR", + "IBV_EVENT_QP_ACCESS_ERR", + "IBV_EVENT_COMM_EST", + "IBV_EVENT_SQ_DRAINED", + "IBV_EVENT_PATH_MIG", + "IBV_EVENT_PATH_MIG_ERR", + "IBV_EVENT_DEVICE_FATAL", + "IBV_EVENT_PORT_ACTIVE", + "IBV_EVENT_PORT_ERR", + "IBV_EVENT_LID_CHANGE", + "IBV_EVENT_PKEY_CHANGE", + "IBV_EVENT_SM_CHANGE", + "IBV_EVENT_SRQ_ERR", + "IBV_EVENT_SRQ_LIMIT_REACHED", + "IBV_EVENT_QP_LAST_WQE_REACHED", + "IBV_EVENT_CLIENT_REREGISTER", + }; + return ((st < 0 || st > 17) ? "Invalid IB async event?" : state[st]); +} + #endif /* _DAPL_IB_COMMON_H_ */ diff --git a/dapl/openib_common/util.c b/dapl/openib_common/util.c index f1d843d..626e7b8 100644 --- a/dapl/openib_common/util.c +++ b/dapl/openib_common/util.c @@ -640,8 +640,8 @@ void dapli_async_event_cb(struct _ib_hca_transport *hca) } case IBV_EVENT_CLIENT_REREGISTER: /* no need to report this event this time */ - dapl_log(DAPL_DBG_TYPE_UTIL, - " async_event: IBV_CLIENT_REREGISTER\n"); + dapl_log(DAPL_DBG_TYPE_WARN, + " WARNING: IBV_CLIENT_REREGISTER\n"); break; default: diff --git a/dapl/openib_mcm/cm.c b/dapl/openib_mcm/cm.c index 4c4b9fa..80608b0 100644 --- a/dapl/openib_mcm/cm.c +++ b/dapl/openib_mcm/cm.c @@ -34,7 +34,9 @@ #include "dapl_ep_util.h" #include "dapl_osd.h" +#ifdef DAPL_DBG static char gid_str[INET6_ADDRSTRLEN]; +#endif enum DAPL_FD_EVENTS { DAPL_FD_READ = POLLIN, @@ -2028,7 +2030,7 @@ void cm_thread(void *arg) } if (dapl_poll(hca->ib_hca_handle->async_fd, DAPL_FD_READ) == DAPL_FD_READ) { - mcm_async_event(hca); + dapli_async_event_cb(&hca->ib_trans); } if (dapl_poll(hca->ib_trans.ib_cq->fd, DAPL_FD_READ) == DAPL_FD_READ) { diff --git a/dapl/openib_mcm/dapl_ib_util.h b/dapl/openib_mcm/dapl_ib_util.h index eed6d74..72d3825 100644 --- a/dapl/openib_mcm/dapl_ib_util.h +++ b/dapl/openib_mcm/dapl_ib_util.h @@ -152,7 +152,6 @@ typedef struct _ib_hca_transport void cm_thread(void *arg); void dapli_queue_conn(dp_ib_cm_handle_t cm); void dapli_dequeue_conn(dp_ib_cm_handle_t cm); -void mcm_async_event(struct dapl_hca *hca); void mcm_connect_rtu(dp_ib_cm_handle_t cm, dat_mcm_msg_t *msg); void mcm_disconnect_final(dp_ib_cm_handle_t cm); void dapli_cq_event_cb(struct _ib_hca_transport *tp); diff --git a/dapl/openib_mcm/device.c b/dapl/openib_mcm/device.c index 6d6e1b1..570c43c 100644 --- a/dapl/openib_mcm/device.c +++ b/dapl/openib_mcm/device.c @@ -565,95 +565,3 @@ bail: return -1; } -void mcm_async_event(struct dapl_hca *hca) -{ - struct ibv_async_event event; - struct _ib_hca_transport *tp = &hca->ib_trans; - - if (!ibv_get_async_event(hca->ib_hca_handle, &event)) { - - switch (event.event_type) { - case IBV_EVENT_CQ_ERR: - { - struct dapl_evd *evd_ptr = - event.element.cq->cq_context; - - dapl_log(DAPL_DBG_TYPE_ERR, - "dapl async_event CQ (%p) ERR %d\n", - evd_ptr, event.event_type); - - /* report up if async callback still setup */ - if (tp->async_cq_error) - tp->async_cq_error(hca->ib_hca_handle, - evd_ptr->ib_cq_handle, - &event, (void *)evd_ptr); - break; - } - case IBV_EVENT_COMM_EST: - { - /* Received msgs on connected QP before RTU */ - dapl_log(DAPL_DBG_TYPE_UTIL, - " async_event COMM_EST(%p) rdata beat RTU\n", - event.element.qp); - - break; - } - case IBV_EVENT_QP_FATAL: - case IBV_EVENT_QP_REQ_ERR: - case IBV_EVENT_QP_ACCESS_ERR: - case IBV_EVENT_QP_LAST_WQE_REACHED: - case IBV_EVENT_SRQ_ERR: - case IBV_EVENT_SRQ_LIMIT_REACHED: - case IBV_EVENT_SQ_DRAINED: - { - struct dapl_ep *ep_ptr = - event.element.qp->qp_context; - - dapl_log(DAPL_DBG_TYPE_ERR, - "dapl async_event QP (%p) ERR %d\n", - ep_ptr, event.event_type); - - /* report up if async callback still setup */ - if (tp->async_qp_error) - tp->async_qp_error(hca->ib_hca_handle, - ep_ptr->qp_handle, - &event, (void *)ep_ptr); - break; - } - case IBV_EVENT_PATH_MIG: - case IBV_EVENT_PATH_MIG_ERR: - case IBV_EVENT_DEVICE_FATAL: - case IBV_EVENT_PORT_ACTIVE: - case IBV_EVENT_PORT_ERR: - case IBV_EVENT_LID_CHANGE: - case IBV_EVENT_PKEY_CHANGE: - case IBV_EVENT_SM_CHANGE: - { - dapl_log(DAPL_DBG_TYPE_WARN, - "dapl async_event: DEV ERR %d\n", - event.event_type); - - /* report up if async callback still setup */ - if (tp->async_unafiliated) - tp->async_unafiliated(hca->ib_hca_handle, - &event, - tp->async_un_ctx); - break; - } - case IBV_EVENT_CLIENT_REREGISTER: - /* no need to report this event this time */ - dapl_log(DAPL_DBG_TYPE_UTIL, - " async_event: IBV_CLIENT_REREGISTER\n"); - break; - - default: - dapl_log(DAPL_DBG_TYPE_WARN, - "dapl async_event: %d UNKNOWN\n", - event.event_type); - break; - - } - ibv_ack_async_event(&event); - } -} - diff --git a/dapl/openib_ucm/cm.c b/dapl/openib_ucm/cm.c index b6d1fc5..e6ed4f6 100644 --- a/dapl/openib_ucm/cm.c +++ b/dapl/openib_ucm/cm.c @@ -1264,7 +1264,7 @@ ud_bail: (DAT_PVOID *)cm->msg.p_data, (DAT_PVOID *)&xevent); - if (event != DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED) + if (event != (ib_cm_events_t)DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED) dapli_cm_free(cm); DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)), DCNT_IA_CM_AH_RESOLVED); @@ -2226,7 +2226,7 @@ void cm_thread(void *arg) } if (dapl_poll(hca->ib_hca_handle->async_fd, DAPL_FD_READ) == DAPL_FD_READ) { - ucm_async_event(hca); + dapli_async_event_cb(&hca->ib_trans); } if (dapl_poll(hca->ib_trans.ib_cq->fd, DAPL_FD_READ) == DAPL_FD_READ) { diff --git a/dapl/openib_ucm/dapl_ib_util.h b/dapl/openib_ucm/dapl_ib_util.h index efb7346..02bfc27 100644 --- a/dapl/openib_ucm/dapl_ib_util.h +++ b/dapl/openib_ucm/dapl_ib_util.h @@ -148,7 +148,6 @@ typedef struct _ib_hca_transport /* prototypes */ void cm_thread(void *arg); -void ucm_async_event(struct dapl_hca *hca); void dapli_cq_event_cb(struct _ib_hca_transport *tp); void dapls_cm_acquire(dp_ib_cm_handle_t cm_ptr); void dapls_cm_release(dp_ib_cm_handle_t cm_ptr); diff --git a/dapl/openib_ucm/device.c b/dapl/openib_ucm/device.c index dfb31e0..6c8f2ec 100644 --- a/dapl/openib_ucm/device.c +++ b/dapl/openib_ucm/device.c @@ -591,96 +591,3 @@ bail: ucm_service_destroy(hca); return -1; } - -void ucm_async_event(struct dapl_hca *hca) -{ - struct ibv_async_event event; - struct _ib_hca_transport *tp = &hca->ib_trans; - - if (!ibv_get_async_event(hca->ib_hca_handle, &event)) { - - switch (event.event_type) { - case IBV_EVENT_CQ_ERR: - { - struct dapl_evd *evd_ptr = - event.element.cq->cq_context; - - dapl_log(DAPL_DBG_TYPE_ERR, - "dapl async_event CQ (%p) ERR %d\n", - evd_ptr, event.event_type); - - /* report up if async callback still setup */ - if (tp->async_cq_error) - tp->async_cq_error(hca->ib_hca_handle, - evd_ptr->ib_cq_handle, - &event, (void *)evd_ptr); - break; - } - case IBV_EVENT_COMM_EST: - { - /* Received msgs on connected QP before RTU */ - dapl_log(DAPL_DBG_TYPE_UTIL, - " async_event COMM_EST(%p) rdata beat RTU\n", - event.element.qp); - - break; - } - case IBV_EVENT_QP_FATAL: - case IBV_EVENT_QP_REQ_ERR: - case IBV_EVENT_QP_ACCESS_ERR: - case IBV_EVENT_QP_LAST_WQE_REACHED: - case IBV_EVENT_SRQ_ERR: - case IBV_EVENT_SRQ_LIMIT_REACHED: - case IBV_EVENT_SQ_DRAINED: - { - struct dapl_ep *ep_ptr = - event.element.qp->qp_context; - - dapl_log(DAPL_DBG_TYPE_ERR, - "dapl async_event QP (%p) ERR %d\n", - ep_ptr, event.event_type); - - /* report up if async callback still setup */ - if (tp->async_qp_error) - tp->async_qp_error(hca->ib_hca_handle, - ep_ptr->qp_handle, - &event, (void *)ep_ptr); - break; - } - case IBV_EVENT_PATH_MIG: - case IBV_EVENT_PATH_MIG_ERR: - case IBV_EVENT_DEVICE_FATAL: - case IBV_EVENT_PORT_ACTIVE: - case IBV_EVENT_PORT_ERR: - case IBV_EVENT_LID_CHANGE: - case IBV_EVENT_PKEY_CHANGE: - case IBV_EVENT_SM_CHANGE: - { - dapl_log(DAPL_DBG_TYPE_WARN, - "dapl async_event: DEV ERR %d\n", - event.event_type); - - /* report up if async callback still setup */ - if (tp->async_unafiliated) - tp->async_unafiliated(hca->ib_hca_handle, - &event, - tp->async_un_ctx); - break; - } - case IBV_EVENT_CLIENT_REREGISTER: - /* no need to report this event this time */ - dapl_log(DAPL_DBG_TYPE_UTIL, - " async_event: IBV_CLIENT_REREGISTER\n"); - break; - - default: - dapl_log(DAPL_DBG_TYPE_WARN, - "dapl async_event: %d UNKNOWN\n", - event.event_type); - break; - - } - ibv_ack_async_event(&event); - } -} -