]> git.openfabrics.org - ~ardavis/dapl.git/commitdiff
r3774: Fix the async error handling and callback mappings.
authorJames Lentini <jlentini@netapp.com>
Thu, 13 Oct 2005 20:45:22 +0000 (20:45 +0000)
committerJames Lentini <jlentini@netapp.com>
Thu, 13 Oct 2005 20:45:22 +0000 (20:45 +0000)
Updated TODO list.
Signed-off by: Arlin Davis <ardavis@ichips.intel.com>
Signed-off by: James Lentini <jlentini@netapp.com>

dapl/openib/TODO
dapl/openib/dapl_ib_util.c
dapl/openib/dapl_ib_util.h

index ef775e38ae099b931258e9c1f74891a480d78798..15058614779b20413d2ea9a6243d29458c3b9e19 100644 (file)
@@ -1,12 +1,10 @@
 
 IB Verbs:
 - CQ resize
-- mulitple CQ event support
 - memory window support
 
 DAPL:
 - reinit EP needs a QP timewait completion notification
-- direct cq_wait_object when multi-CQ verbs event support arrives
 - shared receive queue support
 
 Under discussion:
index 6561830256cbc069f280623278495b0213341bbb..f7ed6ed66c901768dbc0c4c787beb4faf7b6b6ac 100644 (file)
@@ -214,8 +214,11 @@ DAT_RETURN dapls_ib_open_hca (
        /* Get list of all IB devices, find match, open */
        dev_list = ibv_get_devices();
        dlist_start(dev_list);
-       dlist_for_each_data(dev_list,hca_ptr->ib_trans.ib_dev,struct ibv_device) {
-               if (!strcmp(ibv_get_device_name(hca_ptr->ib_trans.ib_dev),hca_name))
+       dlist_for_each_data(dev_list,
+                           hca_ptr->ib_trans.ib_dev,
+                           struct ibv_device) {
+               if (!strcmp(ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+                                               hca_name))
                        break;
        }
 
@@ -226,20 +229,22 @@ DAT_RETURN dapls_ib_open_hca (
                return DAT_INTERNAL_ERROR;
        }
        
-       dapl_dbg_log (DAPL_DBG_TYPE_UTIL," open_hca: Found dev %s %016llx\n", 
-                       ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
-                       (unsigned long long)bswap_64(ibv_get_device_guid(hca_ptr->ib_trans.ib_dev)));
+       dapl_dbg_log (
+           DAPL_DBG_TYPE_UTIL," open_hca: Found dev %s %016llx\n", 
+           ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+           (unsigned long long)
+               bswap_64(ibv_get_device_guid(hca_ptr->ib_trans.ib_dev)));
 
        hca_ptr->ib_hca_handle = ibv_open_device(hca_ptr->ib_trans.ib_dev);
        if (!hca_ptr->ib_hca_handle) {
                dapl_dbg_log (DAPL_DBG_TYPE_ERR, 
                              " open_hca: IB dev open failed for %s\n", 
-                             ibv_get_device_name(hca_ptr->ib_trans.ib_dev) );
+                             ibv_get_device_name(hca_ptr->ib_trans.ib_dev));
                return DAT_INTERNAL_ERROR;
        }
        hca_ptr->ib_trans.ib_ctx = hca_ptr->ib_hca_handle;
 
-       /* set inline max with enviromment or default, get local lid and gid 0 */
+       /* set inline max with env or default, get local lid and gid 0 */
        hca_ptr->ib_trans.max_inline_send = 
                dapl_os_get_env_val("DAPL_MAX_INLINE", INLINE_SEND_DEFAULT);
 
@@ -253,15 +258,17 @@ DAT_RETURN dapls_ib_open_hca (
        }
                        
        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " open_hca: GID subnet %016llx id %016llx\n",
-                    (unsigned long long)bswap_64(hca_ptr->ib_trans.gid.global.subnet_prefix),
-                    (unsigned long long)bswap_64(hca_ptr->ib_trans.gid.global.interface_id) );
+               " open_hca: GID subnet %016llx id %016llx\n",
+               (unsigned long long)
+                       bswap_64(hca_ptr->ib_trans.gid.global.subnet_prefix),
+               (unsigned long long)
+                       bswap_64(hca_ptr->ib_trans.gid.global.interface_id));
 
        /* get the IP address of the device using GID */
        if (dapli_get_hca_addr(hca_ptr)) {
                dapl_dbg_log (DAPL_DBG_TYPE_ERR, 
                              " open_hca: ERR ib_at_ips_by_gid for %s\n", 
-                             ibv_get_device_name(hca_ptr->ib_trans.ib_dev) );
+                             ibv_get_device_name(hca_ptr->ib_trans.ib_dev));
                goto bail;
        }
 
@@ -310,15 +317,23 @@ DAT_RETURN dapls_ib_open_hca (
        write(g_ib_pipe[1], "w", sizeof "w");
        dapl_os_unlock(&g_hca_lock);
        
-       dapl_dbg_log (DAPL_DBG_TYPE_UTIL, 
-                     " open_hca: %s, port %d, %s  %d.%d.%d.%d INLINE_MAX=%d\n", 
-                     ibv_get_device_name(hca_ptr->ib_trans.ib_dev), hca_ptr->port_num,
-                     ((struct sockaddr_in *)&hca_ptr->hca_address)->sin_family == AF_INET ?  "AF_INET":"AF_INET6",
-                     ((struct sockaddr_in *)&hca_ptr->hca_address)->sin_addr.s_addr >> 0 & 0xff,
-                     ((struct sockaddr_in *)&hca_ptr->hca_address)->sin_addr.s_addr >> 8 & 0xff,
-                     ((struct sockaddr_in *)&hca_ptr->hca_address)->sin_addr.s_addr >> 16 & 0xff,
-                     ((struct sockaddr_in *)&hca_ptr->hca_address)->sin_addr.s_addr >> 24 & 0xff,
-                     hca_ptr->ib_trans.max_inline_send );
+       dapl_dbg_log (
+               DAPL_DBG_TYPE_UTIL, 
+               " open_hca: %s, port %d, %s  %d.%d.%d.%d INLINE_MAX=%d\n",
+               ibv_get_device_name(hca_ptr->ib_trans.ib_dev), 
+               hca_ptr->port_num,
+               ((struct sockaddr_in *)
+                       &hca_ptr->hca_address)->sin_family == AF_INET ?  
+                       "AF_INET":"AF_INET6",
+               ((struct sockaddr_in *)
+                       &hca_ptr->hca_address)->sin_addr.s_addr >> 0 & 0xff,
+               ((struct sockaddr_in *)
+                       &hca_ptr->hca_address)->sin_addr.s_addr >> 8 & 0xff,
+               ((struct sockaddr_in *)
+                       &hca_ptr->hca_address)->sin_addr.s_addr >> 16 & 0xff,
+               ((struct sockaddr_in *)
+                       &hca_ptr->hca_address)->sin_addr.s_addr >> 24 & 0xff,
+               hca_ptr->ib_trans.max_inline_send );
 
        hca_ptr->ib_trans.d_hca = hca_ptr;
        return DAT_SUCCESS;
@@ -370,7 +385,7 @@ DAT_RETURN dapls_ib_close_hca (     IN   DAPL_HCA   *hca_ptr )
                sleep.tv_sec = 0;
                sleep.tv_nsec = 10000000; /* 10 ms */
                dapl_dbg_log(DAPL_DBG_TYPE_UTIL, 
-                            " ib_thread_destroy: waiting on hca %p destroy\n");
+                            " ib_thread_destroy: wait on hca %p destroy\n");
                nanosleep (&sleep, &remain);
        }
        return (DAT_SUCCESS);
@@ -425,19 +440,26 @@ DAT_RETURN dapls_ib_query_hca (
        if (ia_attr != NULL) {
                ia_attr->adapter_name[DAT_NAME_MAX_LENGTH - 1] = '\0';
                ia_attr->vendor_name[DAT_NAME_MAX_LENGTH - 1] = '\0';
-               ia_attr->ia_address_ptr = (DAT_IA_ADDRESS_PTR)&hca_ptr->hca_address;
+               ia_attr->ia_address_ptr = 
+                       (DAT_IA_ADDRESS_PTR)&hca_ptr->hca_address;
 
                dapl_dbg_log (DAPL_DBG_TYPE_UTIL, 
                        " query_hca: %s %s  %d.%d.%d.%d\n", 
                        ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
-                       ((struct sockaddr_in *)ia_attr->ia_address_ptr)->sin_family == AF_INET ? "AF_INET":"AF_INET6",
-                       ((struct sockaddr_in *)ia_attr->ia_address_ptr)->sin_addr.s_addr >> 0 & 0xff,
-                       ((struct sockaddr_in *)ia_attr->ia_address_ptr)->sin_addr.s_addr >> 8 & 0xff,
-                       ((struct sockaddr_in *)ia_attr->ia_address_ptr)->sin_addr.s_addr >> 16 & 0xff,
-                       ((struct sockaddr_in *)ia_attr->ia_address_ptr)->sin_addr.s_addr >> 24 & 0xff );
+                       ((struct sockaddr_in *)
+                       ia_attr->ia_address_ptr)->sin_family == AF_INET ? 
+                       "AF_INET":"AF_INET6",
+                       ((struct sockaddr_in *)
+                       ia_attr->ia_address_ptr)->sin_addr.s_addr >> 0 & 0xff,
+                       ((struct sockaddr_in *)
+                       ia_attr->ia_address_ptr)->sin_addr.s_addr >> 8 & 0xff,
+                       ((struct sockaddr_in *)
+                       ia_attr->ia_address_ptr)->sin_addr.s_addr >> 16 & 0xff,
+                       ((struct sockaddr_in *)
+                       ia_attr->ia_address_ptr)->sin_addr.s_addr >> 24 & 0xff);
                
                ia_attr->hardware_version_major   = dev_attr.hw_ver;
-               ia_attr->hardware_version_minor   = dev_attr.fw_ver;
+               /* ia_attr->hardware_version_minor   = dev_attr.fw_ver; */
                ia_attr->max_eps                  = dev_attr.max_qp;
                ia_attr->max_dto_per_ep           = dev_attr.max_qp_wr;
                ia_attr->max_rdma_read_per_ep     = dev_attr.max_qp_rd_atom;
@@ -468,7 +490,6 @@ DAT_RETURN dapls_ib_query_hca (
                        ia_attr->max_mtu_size, ia_attr->max_rdma_size,
                        ia_attr->max_iov_segments_per_dto, ia_attr->max_lmrs, 
                        ia_attr->max_rmrs );
-
        }
        
        if (ep_attr != NULL) {
@@ -522,27 +543,28 @@ DAT_RETURN dapls_ib_setup_async_callback (
        ib_hca_transport_t      *hca_ptr;
 
        dapl_dbg_log (DAPL_DBG_TYPE_UTIL,
-                       " setup_async_cb: ia %p type %d handle %p cb %p ctx %p\n",
+                       " setup_async_cb: ia %p type %d hdl %p cb %p ctx %p\n",
                        ia_ptr, handler_type, evd_ptr, callback, context);
 
        hca_ptr = &ia_ptr->hca_ptr->ib_trans;
        switch(handler_type)
        {
                case DAPL_ASYNC_UNAFILIATED:
-                       hca_ptr->async_unafiliated = callback;
+                       hca_ptr->async_unafiliated = 
+                               (ib_async_handler_t)callback;
                        hca_ptr->async_un_ctx = context;
                        break;
                case DAPL_ASYNC_CQ_ERROR:
-                       hca_ptr->async_cq_error = callback;
-                       hca_ptr->async_cq_ctx = context;
+                       hca_ptr->async_cq_error = 
+                               (ib_async_cq_handler_t)callback;
                        break;
                case DAPL_ASYNC_CQ_COMPLETION:
-                       hca_ptr->async_cq = callback;
-                       hca_ptr->async_ctx = context;
+                       hca_ptr->async_cq = 
+                               (ib_async_dto_handler_t)callback;
                        break;
                case DAPL_ASYNC_QP_ERROR:
-                       hca_ptr->async_qp_error = callback;
-                       hca_ptr->async_qp_ctx = context;
+                       hca_ptr->async_qp_error = 
+                               (ib_async_qp_handler_t)callback;
                        break;
                default:
                        break;
@@ -573,7 +595,6 @@ void dapli_ib_thread_destroy(void)
        int retries = 10;
        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
                     " ib_thread_destroy(%d)\n", getpid());
-
        /* 
         * wait for async thread to terminate. 
         * pthread_join would be the correct method
@@ -623,34 +644,42 @@ void dapli_async_event_cb(struct _ib_hca_transport *hca)
 
                        case    IBV_EVENT_CQ_ERR:
                        {
-                               dapl_dbg_log(DAPL_DBG_TYPE_WARN,
-                                            " dapli_async_event CQ ERR %d\n",
-                                            event.event_type);                         
+                               struct dapl_ep *evd_ptr = 
+                                       event.element.cq->cq_context;
+
+                               dapl_dbg_log(
+                                       DAPL_DBG_TYPE_WARN,
+                                       " dapli_async_event CQ (%p) ERR %d\n",
+                                       evd_ptr, event.event_type);                             
                                
                                /* report up if async callback still setup */
                                if (hca->async_cq_error)
                                        hca->async_cq_error(hca->ib_ctx,
+                                                           event.element.cq,   
                                                            &event,
-                                                           hca->async_cq_ctx);
+                                                           (void*)evd_ptr);
                                break;
                        }
                        case    IBV_EVENT_COMM_EST:
                        {
-                               /* Received messages on connected QP before RTU */
-                               struct dapl_ep *ep_ptr = event.element.qp->qp_context;
+                               /* Received msgs on connected QP before RTU */
+                               struct dapl_ep *ep_ptr = 
+                                       event.element.qp->qp_context;
                                
                                /* TODO: cannot process COMM_EST until ibv  
                                 * guarantees valid QP context for events. 
                                 * Race conditions exist with QP destroy call. 
                                 * For now, assume the RTU will arrive.
                                 */
-                               dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                                            " dapli_async_event COMM_EST (qp=%p)\n",
-                                            event.element.qp); 
+                               dapl_dbg_log(
+                                       DAPL_DBG_TYPE_UTIL,
+                                       " dapli_async_event COMM_EST(qp=%p)\n",
+                                       event.element.qp);      
 
                                if (!DAPL_BAD_HANDLE(ep_ptr, DAPL_MAGIC_EP) &&
                                    ep_ptr->cm_handle != IB_INVALID_HANDLE)
-                                       ib_cm_establish(ep_ptr->cm_handle->cm_id);
+                                       ib_cm_establish(
+                                               ep_ptr->cm_handle->cm_id);
                        
                                break;
                        }
@@ -662,15 +691,20 @@ void dapli_async_event_cb(struct _ib_hca_transport *hca)
                        case    IBV_EVENT_SRQ_LIMIT_REACHED:
                        case    IBV_EVENT_SQ_DRAINED:
                        {
-                               dapl_dbg_log(DAPL_DBG_TYPE_WARN,
-                                            " dapli_async_event QP ERR %d\n",
-                                            event.event_type); 
+                               struct dapl_ep *ep_ptr = 
+                                       event.element.qp->qp_context;
+
+                               dapl_dbg_log(
+                                       DAPL_DBG_TYPE_WARN,
+                                       " dapli_async_event QP (%p) ERR %d\n",
+                                       ep_ptr, event.event_type);      
                                
                                /* report up if async callback still setup */
                                if (hca->async_qp_error)
                                        hca->async_qp_error(hca->ib_ctx,
+                                                           event.element.qp,
                                                            &event,
-                                                           hca->async_qp_ctx);
+                                                           (void*)ep_ptr);
                                break;
                        }
                        case    IBV_EVENT_PATH_MIG:
index a692bb0b86f4f70832cd28bd077c1efa02579a6c..20f4968878f50cc061b15d9c67dc13b11e495ed4 100644 (file)
@@ -141,7 +141,7 @@ typedef enum        ibv_send_flags  ib_send_op_type_t;
 typedef        struct  ibv_sge         ib_data_segment_t;
 typedef enum   ibv_qp_state    ib_qp_state_t;
 typedef        enum    ibv_event_type  ib_async_event_type;
-typedef struct ibv_async_event ib_error_record_t;      
+typedef struct ibv_async_event ib_error_record_t;
 
 /* CQ notifications */
 typedef enum
@@ -222,12 +222,30 @@ typedef struct ibv_comp_channel *ib_wait_obj_handle_t;
  * ibv_post_recv - Return 0, -1 & bad_wr 
  */
 
-/* async handler for CQ, QP, and unafiliated */
+/* async handler for DTO, CQ, QP, and unafiliated */
+typedef void (*ib_async_dto_handler_t)(
+    IN    ib_hca_handle_t    ib_hca_handle,
+    IN    ib_error_record_t  *err_code,
+    IN    void               *context);
+
+typedef void (*ib_async_cq_handler_t)(
+    IN    ib_hca_handle_t    ib_hca_handle,
+    IN    ib_cq_handle_t     ib_cq_handle,
+    IN    ib_error_record_t  *err_code,
+    IN    void               *context);
+
+typedef void (*ib_async_qp_handler_t)(
+    IN    ib_hca_handle_t    ib_hca_handle,
+    IN    ib_qp_handle_t     ib_qp_handle,
+    IN    ib_error_record_t  *err_code,
+    IN    void               *context);
+
 typedef void (*ib_async_handler_t)(
     IN    ib_hca_handle_t    ib_hca_handle,
     IN    ib_error_record_t  *err_code,
     IN    void               *context);
 
+
 /* ib_hca_transport_t, specific to this implementation */
 typedef struct _ib_hca_transport
 { 
@@ -244,12 +262,9 @@ typedef struct _ib_hca_transport
        union ibv_gid           gid;
        ib_async_handler_t      async_unafiliated;
        void                    *async_un_ctx;
-       ib_async_handler_t      async_cq_error;
-       void                    *async_ctx;
-       ib_async_handler_t      async_cq;
-       void                    *async_cq_ctx;
-       ib_async_handler_t      async_qp_error;
-       void                    *async_qp_ctx;
+       ib_async_cq_handler_t   async_cq_error;
+       ib_async_dto_handler_t  async_cq;
+       ib_async_qp_handler_t   async_qp_error;
 
 } ib_hca_transport_t;