From 1f8da7a209983ca5c1a6ff01fa8d539b4e82a9a7 Mon Sep 17 00:00:00 2001 From: leonidk Date: Tue, 12 Aug 2008 18:34:39 +0000 Subject: [PATCH] [MLX4] fixed mechanism of mlx4_hca's asynchronous events notification. git-svn-id: svn://openib.tc.cornell.edu/gen1@1480 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86 --- trunk/hw/mlx4/kernel/bus/ib/main.c | 2 + trunk/hw/mlx4/kernel/bus/inc/ib_verbs.h | 38 ++++----- trunk/hw/mlx4/kernel/bus/inc/ib_verbs_ex.h | 3 + trunk/hw/mlx4/kernel/hca/data.c | 98 ++++++++++++++-------- trunk/hw/mlx4/kernel/hca/data.h | 1 + trunk/inc/iba/ib_types.h | 12 +++ 6 files changed, 98 insertions(+), 56 deletions(-) diff --git a/trunk/hw/mlx4/kernel/bus/ib/main.c b/trunk/hw/mlx4/kernel/bus/ib/main.c index 5faa22b6..923134c0 100644 --- a/trunk/hw/mlx4/kernel/bus/ib/main.c +++ b/trunk/hw/mlx4/kernel/bus/ib/main.c @@ -561,6 +561,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.x.find_cached_pkey = ib_find_cached_pkey; ibdev->ib_dev.x.get_cached_gid = ib_get_cached_gid; ibdev->ib_dev.x.get_cached_pkey = ib_get_cached_pkey; + ibdev->ib_dev.x.register_ev_cb = mlx4_reset_cb_register; + ibdev->ib_dev.x.unregister_ev_cb = mlx4_reset_cb_unregister; if (mlx4_is_livefish(ibdev->dev)) return ibdev; diff --git a/trunk/hw/mlx4/kernel/bus/inc/ib_verbs.h b/trunk/hw/mlx4/kernel/bus/inc/ib_verbs.h index d05558da..e2512d9b 100644 --- a/trunk/hw/mlx4/kernel/bus/inc/ib_verbs.h +++ b/trunk/hw/mlx4/kernel/bus/inc/ib_verbs.h @@ -255,27 +255,27 @@ struct ib_port_modify { }; enum ib_event_type { - IB_EVENT_CQ_ERR = IB_AE_CQ_ERROR, - IB_EVENT_QP_FATAL = IB_AE_QP_FATAL, - IB_EVENT_QP_REQ_ERR = IB_AE_WQ_REQ_ERROR, - IB_EVENT_QP_ACCESS_ERR = IB_AE_WQ_ACCESS_ERROR, - IB_EVENT_COMM_EST = IB_AE_QP_COMM, - IB_EVENT_SQ_DRAINED = IB_AE_SQ_DRAINED, - IB_EVENT_PATH_MIG = IB_AE_QP_APM, - IB_EVENT_PATH_MIG_ERR = IB_AE_QP_APM_ERROR, - IB_EVENT_DEVICE_FATAL = IB_AE_LOCAL_FATAL, - IB_EVENT_PORT_ACTIVE = IB_AE_PORT_ACTIVE, - IB_EVENT_PORT_ERR = IB_AE_PORT_DOWN, - IB_EVENT_SRQ_LIMIT_REACHED = IB_AE_SRQ_LIMIT_REACHED, - IB_EVENT_SRQ_ERR = IB_AE_SRQ_CATAS_ERROR, - IB_EVENT_QP_LAST_WQE_REACHED = IB_AE_SRQ_QP_LAST_WQE_REACHED, - IB_EVENT_LID_CHANGE = IB_AE_UNKNOWN + 1, + IB_EVENT_CQ_ERR = IB_AE_CQ_ERROR, + IB_EVENT_QP_FATAL = IB_AE_QP_FATAL, + IB_EVENT_QP_REQ_ERR = IB_AE_WQ_REQ_ERROR, + IB_EVENT_QP_ACCESS_ERR = IB_AE_WQ_ACCESS_ERROR, + IB_EVENT_COMM_EST = IB_AE_QP_COMM, + IB_EVENT_SQ_DRAINED = IB_AE_SQ_DRAINED, + IB_EVENT_PATH_MIG = IB_AE_QP_APM, + IB_EVENT_PATH_MIG_ERR = IB_AE_QP_APM_ERROR, + IB_EVENT_DEVICE_FATAL = IB_AE_LOCAL_FATAL, + IB_EVENT_PORT_ACTIVE = IB_AE_PORT_ACTIVE, + IB_EVENT_PORT_ERR = IB_AE_PORT_DOWN, + IB_EVENT_SRQ_LIMIT_REACHED = IB_AE_SRQ_LIMIT_REACHED, + IB_EVENT_SRQ_ERR = IB_AE_SRQ_CATAS_ERROR, + IB_EVENT_QP_LAST_WQE_REACHED = IB_AE_SRQ_QP_LAST_WQE_REACHED, + IB_EVENT_RESET_DRIVER = IB_AE_RESET_DRIVER, // device will be reset upon fatal error + IB_EVENT_RESET_CLIENT = IB_AE_RESET_CLIENT, // device will be reset upon client request + IB_EVENT_RESET_END = IB_AE_RESET_END, // device has been reset + IB_EVENT_LID_CHANGE = IB_AE_UNKNOWN + 1, IB_EVENT_PKEY_CHANGE, IB_EVENT_SM_CHANGE, - IB_EVENT_CLIENT_REREGISTER, - IB_EVENT_RESET_DRIVER, // device will be reset upon fatal error - IB_EVENT_RESET_CLIENT, // device will be upon client request - IB_EVENT_RESET_END // device has been reset + IB_EVENT_CLIENT_REREGISTER }; struct ib_event { diff --git a/trunk/hw/mlx4/kernel/bus/inc/ib_verbs_ex.h b/trunk/hw/mlx4/kernel/bus/inc/ib_verbs_ex.h index 49692cc4..0bfd7313 100644 --- a/trunk/hw/mlx4/kernel/bus/inc/ib_verbs_ex.h +++ b/trunk/hw/mlx4/kernel/bus/inc/ib_verbs_ex.h @@ -36,6 +36,7 @@ typedef struct _FDO_DEVICE_DATA *PFDO_DEVICE_DATA; struct ib_cq; +struct ib_event_handler; /* extension for ib_device */ struct ib_device_ex @@ -49,6 +50,8 @@ struct ib_device_ex u8 port_num, int index, __be16 *pkey); int (*find_cached_pkey)(struct ib_device *device, u8 port_num, __be16 pkey, u16 *index); + int (*register_ev_cb) (struct ib_event_handler *event_handler); + int (*unregister_ev_cb) (struct ib_event_handler *event_handler); }; diff --git a/trunk/hw/mlx4/kernel/hca/data.c b/trunk/hw/mlx4/kernel/hca/data.c index a3cea329..19791df0 100644 --- a/trunk/hw/mlx4/kernel/hca/data.c +++ b/trunk/hw/mlx4/kernel/hca/data.c @@ -104,13 +104,58 @@ mlnx_hcas_init( void ) ///////////////////////////////////////////////////////// ///////////////////////////////////////////////////////// + +void ca_event_handler(struct ib_event *ev, void *context) +{ + mlnx_hca_t *p_hca = (mlnx_hca_t *)context; + ib_event_rec_t event_rec; + LIST_ENTRY *entry; + ci_event_handler_t *event_handler; + + // prepare parameters + event_rec.type = ev->event; + event_rec.port_number = ev->element.port_num; + if (event_rec.type > IB_AE_UNKNOWN) { + // CL_ASSERT(0); // This shouldn't happen + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SHIM,("Unmapped E_EV_CA event of type 0x%x. Replaced by 0x%x (IB_AE_LOCAL_FATAL)\n", + event_rec.type, IB_AE_LOCAL_FATAL)); + event_rec.type = IB_AE_LOCAL_FATAL; + } + + // call the user callback + KeAcquireSpinLockAtDpcLevel(&p_hca->event_list_lock); + for (entry = p_hca->event_list.Flink; entry != &p_hca->event_list; + entry = entry->Flink) { + + event_handler = CONTAINING_RECORD(entry, ci_event_handler_t, entry); + event_rec.context = (void *) event_handler; + event_handler->pfn_async_event_cb(&event_rec); + } + KeReleaseSpinLockFromDpcLevel(&p_hca->event_list_lock); + + if (p_hca && p_hca->async_cb_p) { + event_rec.context = (void *)p_hca->ca_context; + (p_hca->async_cb_p)(&event_rec); + } else { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Incorrect context. Async callback was not invoked\n")); + } +} + + +void event_handler( struct ib_event_handler *handler, struct ib_event *event ) +{ + ca_event_handler( event, handler->ctx ); +} + ib_api_status_t mlnx_set_cb( IN mlnx_hca_t * p_hca, IN ci_async_event_cb_t async_cb_p, IN const void* const ib_context) { + int err; cl_status_t cl_status; + struct ib_device *ibdev = hca2ibdev(p_hca); // Setup the callbacks if (!p_hca->async_proc_mgr_p) @@ -133,6 +178,17 @@ mlnx_set_cb( p_hca->async_cb_p = async_cb_p; p_hca->ca_context = ib_context; // This is the context our CB forwards to IBAL + + // register callback with bus driver + INIT_IB_EVENT_HANDLER( &p_hca->ib_event_handler, ibdev, + event_handler, p_hca, NULL, 0 ); + + err = ibdev->x.register_ev_cb(&p_hca->ib_event_handler); + if (err) { + RtlZeroMemory( &p_hca->ib_event_handler, sizeof(p_hca->ib_event_handler) ); + return IB_ERROR; + } + return IB_SUCCESS; } @@ -143,10 +199,14 @@ mlnx_reset_cb( IN mlnx_hca_t * p_hca) { cl_async_proc_t *p_async_proc; - + struct ib_device *ibdev = hca2ibdev(p_hca); cl_spinlock_acquire( &hca_lock ); + // unregister callback with bus driver + if ( p_hca->ib_event_handler.handler ) + ibdev->x.unregister_ev_cb(&p_hca->ib_event_handler); + p_async_proc = p_hca->async_proc_mgr_p; p_hca->async_proc_mgr_p = NULL; @@ -295,42 +355,6 @@ from_hca_cap( } } -void ca_event_handler(struct ib_event *ev, void *context) -{ - mlnx_hca_t *p_hca = (mlnx_hca_t *)context; - ib_event_rec_t event_rec; - LIST_ENTRY *entry; - ci_event_handler_t *event_handler; - - // prepare parameters - event_rec.type = ev->event; - event_rec.port_number = ev->element.port_num; - if (event_rec.type > IB_AE_UNKNOWN) { - // CL_ASSERT(0); // This shouldn't happen - HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SHIM,("Unmapped E_EV_CA event of type 0x%x. Replaced by 0x%x (IB_AE_LOCAL_FATAL)\n", - event_rec.type, IB_AE_LOCAL_FATAL)); - event_rec.type = IB_AE_LOCAL_FATAL; - } - - // call the user callback - KeAcquireSpinLockAtDpcLevel(&p_hca->event_list_lock); - for (entry = p_hca->event_list.Flink; entry != &p_hca->event_list; - entry = entry->Flink) { - - event_handler = CONTAINING_RECORD(entry, ci_event_handler_t, entry); - event_rec.context = (void *) event_handler; - event_handler->pfn_async_event_cb(&event_rec); - } - KeReleaseSpinLockFromDpcLevel(&p_hca->event_list_lock); - - if (p_hca && p_hca->async_cb_p) { - event_rec.context = (void *)p_hca->ca_context; - (p_hca->async_cb_p)(&event_rec); - } else { - HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Incorrect context. Async callback was not invoked\n")); - } -} - enum ib_rate to_rate(uint8_t rate) { if (rate == IB_PATH_RECORD_RATE_2_5_GBS) return IB_RATE_2_5_GBPS; diff --git a/trunk/hw/mlx4/kernel/hca/data.h b/trunk/hw/mlx4/kernel/hca/data.h index 2d0aabec..f02b1787 100644 --- a/trunk/hw/mlx4/kernel/hca/data.h +++ b/trunk/hw/mlx4/kernel/hca/data.h @@ -163,6 +163,7 @@ typedef struct _mlnx_hca_t { void *cl_device_h; uint32_t index; cl_async_proc_t *async_proc_mgr_p; + struct ib_event_handler ib_event_handler; } mlnx_hca_t; // Functions diff --git a/trunk/inc/iba/ib_types.h b/trunk/inc/iba/ib_types.h index 651dae9f..c5c3dcf9 100644 --- a/trunk/inc/iba/ib_types.h +++ b/trunk/inc/iba/ib_types.h @@ -8792,6 +8792,9 @@ typedef enum _ib_async_event_t IB_AE_SRQ_LIMIT_REACHED, IB_AE_SRQ_CATAS_ERROR, IB_AE_SRQ_QP_LAST_WQE_REACHED, + IB_AE_RESET_DRIVER, + IB_AE_RESET_CLIENT, + IB_AE_RESET_END, IB_AE_UNKNOWN /* ALWAYS LAST ENUM VALUE */ } ib_async_event_t; @@ -8896,6 +8899,15 @@ typedef enum _ib_async_event_t * a CQE is generated for the last WQE, or * the QP gets in the Error State and there are no more WQEs on the RQ. * +* IB_AE_RESET_DRIVER +* Device will be reset upon fatal error. +* +* IB_AE_RESET_CLIENT +* Device will be reset upon client request. +* +* IB_AE_RESET_END +* Device has been reset. +* * IB_AE_UNKNOWN * An unknown error occurred which cannot be attributed to any * resource; behavior is indeterminate. -- 2.46.0