From: tzachid Date: Tue, 7 Mar 2006 14:14:09 +0000 (+0000) Subject: [MTHCA\IPOIB\AL] update from latest mthca code X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=0bc4eb5d10916e5fb80a35b65805d9e79cb61e4b;p=~shefty%2Frdma-win.git [MTHCA\IPOIB\AL] update from latest mthca code include update to IPoIB mutex ifc to alloc_pd and create_cq ib_types.h git-svn-id: svn://openib.tc.cornell.edu/gen1@230 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86 --- diff --git a/branches/MTHCA/core/al/al_verbs.h b/branches/MTHCA/core/al/al_verbs.h index f5ce6c0d..403de878 100644 --- a/branches/MTHCA/core/al/al_verbs.h +++ b/branches/MTHCA/core/al/al_verbs.h @@ -71,8 +71,9 @@ port_num, ca_mod, p_port_attr_mod ) #define verbs_create_cq(h_ca, p_cq_create, h_cq) \ - h_ca->obj.p_ci_ca->verbs.create_cq( h_ca->obj.p_ci_ca->h_ci_ca,\ - h_cq, &p_cq_create->size, &h_cq->h_ci_cq, p_umv_buf ) + h_ca->obj.p_ci_ca->verbs.create_cq( \ + (p_umv_buf) ? h_ca->h_um_ca : h_ca->obj.p_ci_ca->h_ci_ca, \ + h_cq, &p_cq_create->size, &h_cq->h_ci_cq, p_umv_buf ) #define verbs_check_cq(h_cq) ((h_cq)->h_ci_cq) #define verbs_destroy_cq(h_cq) \ @@ -169,8 +170,9 @@ h_qp->h_ci_qp, p_mw_bind, p_rkey ) #define verbs_allocate_pd(h_ca, h_pd) \ - h_ca->obj.p_ci_ca->verbs.allocate_pd(\ - h_ca->obj.p_ci_ca->h_ci_ca, h_pd->type, &h_pd->h_ci_pd, p_umv_buf ) + h_ca->obj.p_ci_ca->verbs.allocate_pd( \ + (p_umv_buf) ? h_ca->h_um_ca : h_ca->obj.p_ci_ca->h_ci_ca, \ + h_pd->type, &h_pd->h_ci_pd, p_umv_buf ) /* * Reference the hardware PD. diff --git a/branches/MTHCA/core/al/ib_statustext.c b/branches/MTHCA/core/al/ib_statustext.c index 78f07854..a7d2454e 100644 --- a/branches/MTHCA/core/al/ib_statustext.c +++ b/branches/MTHCA/core/al/ib_statustext.c @@ -163,6 +163,17 @@ static const char* const __ib_wc_status_str[] = "IB_WCS_RNR_RETRY_ERR", "IB_WCS_TIMEOUT_RETRY_ERR", "IB_WCS_REM_INVALID_REQ_ERR", + "IB_WCS_LOCAL_EEC_OP_ERR", + "IB_WCS_BAD_RESP_ERR", + "IB_WCS_LOCAL_ACCESS_ERR", + "IB_WCS_REM_INV_REQ_ERR", + "IB_WCS_LOCAL_RDD_VIOL_ERR", + "IB_WCS_REM_ABORT_ERR", + "IB_WCS_INV_EECN_ERR", + "IB_WCS_INV_EEC_STATE_ERR", + "IB_WCS_FATAL_ERR", + "IB_WCS_RESP_TIMEOUT_ERR", + "IB_WCS_GENERAL_ERR", "IB_WCS_UNMATCHED_RESPONSE", /* InfiniBand Access Layer */ "IB_WCS_CANCELED", /* InfiniBand Access Layer */ "IB_WCS_UNKNOWN" diff --git a/branches/MTHCA/hw/mthca/hca_utils.c b/branches/MTHCA/hw/mthca/hca_utils.c new file mode 100644 index 00000000..f00caa9e --- /dev/null +++ b/branches/MTHCA/hw/mthca/hca_utils.c @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: hca_data.c 148 2005-07-12 07:48:46Z sleybo $ + */ + + +#include "hca_driver.h" +#include "mthca_dev.h" + + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "hca_data.tmh" +#endif + + +mthca_qp_access_t +map_qp_ibal_acl( + IN ib_access_t ibal_acl) +{ +#define IBAL_ACL(ifl,mfl) if (ibal_acl & ifl) mthca_acl |= mfl + mthca_qp_access_t mthca_acl = 0; + + IBAL_ACL(IB_AC_RDMA_READ,MTHCA_ACCESS_REMOTE_READ); + IBAL_ACL(IB_AC_RDMA_WRITE,MTHCA_ACCESS_REMOTE_WRITE); + IBAL_ACL(IB_AC_ATOMIC,MTHCA_ACCESS_REMOTE_ATOMIC); + IBAL_ACL(IB_AC_LOCAL_WRITE,MTHCA_ACCESS_LOCAL_WRITE); + IBAL_ACL(IB_AC_MW_BIND,MTHCA_ACCESS_MW_BIND); + + return mthca_acl; +} + +///////////////////////////////////////////////////////// +///////////////////////////////////////////////////////// +ib_access_t +map_qp_mthca_acl( + IN mthca_qp_access_t mthca_acl) +{ +#define ACL_IBAL(mfl,ifl) if (mthca_acl & mfl) ibal_acl |= ifl + ib_access_t ibal_acl = 0; + + ACL_IBAL(MTHCA_ACCESS_REMOTE_READ,IB_AC_RDMA_READ); + ACL_IBAL(MTHCA_ACCESS_REMOTE_WRITE,IB_AC_RDMA_WRITE); + ACL_IBAL(MTHCA_ACCESS_REMOTE_ATOMIC,IB_AC_ATOMIC); + ACL_IBAL(MTHCA_ACCESS_LOCAL_WRITE,IB_AC_LOCAL_WRITE); + ACL_IBAL(MTHCA_ACCESS_MW_BIND,IB_AC_MW_BIND); + + return ibal_acl; +} + + diff --git a/branches/MTHCA/hw/mthca/hca_utils.h b/branches/MTHCA/hw/mthca/hca_utils.h new file mode 100644 index 00000000..ba259a8d --- /dev/null +++ b/branches/MTHCA/hw/mthca/hca_utils.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: hca_data.h 148 2005-07-12 07:48:46Z sleybo $ + */ + +#ifndef __HCA_UTILS_H__ +#define __HCA_UTILS_H__ + +#include +#include + +mthca_qp_access_t +map_qp_ibal_acl( + IN ib_access_t ibal_acl) +; + +ib_access_t +map_qp_mthca_acl( + IN mthca_qp_access_t mthca_acl); + +#endif + diff --git a/branches/MTHCA/hw/mthca/kernel/Makefile b/branches/MTHCA/hw/mthca/kernel/Makefile index 9c985f57..1c8f2940 100644 --- a/branches/MTHCA/hw/mthca/kernel/Makefile +++ b/branches/MTHCA/hw/mthca/kernel/Makefile @@ -1,7 +1,6 @@ -# # DO NOT EDIT THIS FILE!!! Edit .\sources. if you want to add a new source # file to this component. This file merely indirects to the real make file # that is shared by all the driver components of the Windows NT DDK # -!INCLUDE $(NTMAKEENV)\makefile.def +!INCLUDE ..\..\..\inc\openib.def# diff --git a/branches/MTHCA/hw/mthca/kernel/SOURCES b/branches/MTHCA/hw/mthca/kernel/SOURCES index 78157a2b..7ec6cb2c 100644 --- a/branches/MTHCA/hw/mthca/kernel/SOURCES +++ b/branches/MTHCA/hw/mthca/kernel/SOURCES @@ -4,17 +4,22 @@ TARGETNAME=mthca TARGETPATH=$(TRUNK)\bin\kernel\obj$(BUILD_ALT_DIR) TARGETTYPE=DRIVER +#ENABLE_EVENT_TRACING=1 + SOURCES= \ - hca.rc \ - hca_driver.c \ + mthca_log.mc \ + mthca_log.rc \ + hca.rc \ hca_data.c \ - hca_pci.c \ - hca_pnp.c \ - hca_verbs.c \ hca_mcast.c \ + hca_verbs.c \ + hca_pnp.c \ + hca_pci.c \ + hca_driver.c \ hca_direct.c \ hca_memory.c \ hca_smp.c \ + ..\hca_utils.c \ \ mt_l2w.c \ mt_memory.c \ @@ -23,6 +28,9 @@ SOURCES= \ mt_ud_header.c \ mt_device.c \ mt_verbs.c \ + mt_reset_tavor.c \ + mt_uverbs.c \ + mt_uverbsmem.c \ \ mthca_allocator.c \ mthca_av.c \ @@ -40,15 +48,11 @@ SOURCES= \ mthca_qp.c \ mthca_srq.c \ mthca_uar.c \ - -!if 0 - - -!endif - + mthca_log.c \ + mthca_catas.c INCLUDES=\ - $(TRUNK)\inc\kernel\mthca; \ + ..; \ $(TRUNK)\inc; \ $(TRUNK)\inc\kernel; \ $(TRUNK)\inc\complib; \ @@ -60,4 +64,18 @@ TARGETLIBS= \ $(TARGETPATH)\*\ibal.lib \ $(DDK_LIB_PATH)\wdmguid.lib + + +!IFDEF ENABLE_EVENT_TRACING + +C_DEFINES = $(C_DEFINES) -DEVENT_TRACING + +RUN_WPP= -ext:.c.h $(SOURCES) -km \ + -scan:hca_debug.h \ + -func:HCA_PRINT(LEVEL,FLAGS,(MSG,...)) \ + -func:HCA_PRINT_EV(LEVEL,FLAGS,(MSG,...)) \ + -func:HCA_PRINT_EXIT(LEVEL,FLAGS,(MSG,...)) +!ENDIF + + MSC_WARNING_LEVEL= /W3 diff --git a/branches/MTHCA/hw/mthca/kernel/hca.rc b/branches/MTHCA/hw/mthca/kernel/hca.rc index d9460cc3..345f4397 100644 --- a/branches/MTHCA/hw/mthca/kernel/hca.rc +++ b/branches/MTHCA/hw/mthca/kernel/hca.rc @@ -30,15 +30,15 @@ */ -#include +#include #define VER_FILETYPE VFT_DRV #define VER_FILESUBTYPE VFT2_UNKNOWN -#ifdef _DEBUG_ -#define VER_FILEDESCRIPTION_STR "InfiniServ Tavor HCA Driver (checked)" +#ifdef DBG +#define VER_FILEDESCRIPTION_STR "HCA Driver (checked)" #else -#define VER_FILEDESCRIPTION_STR "InfiniServ Tavor HCA Driver" +#define VER_FILEDESCRIPTION_STR "HCA Driver" #endif -#define VER_INTERNALNAME_STR "hca.sys" -#define VER_ORIGINALFILENAME_STR "hca.sys" +#define VER_INTERNALNAME_STR "mthca.sys" +#define VER_ORIGINALFILENAME_STR "mthca.sys" #include diff --git a/branches/MTHCA/hw/mthca/kernel/hca_data.c b/branches/MTHCA/hw/mthca/kernel/hca_data.c index 758c09e5..36faf35a 100644 --- a/branches/MTHCA/hw/mthca/kernel/hca_data.c +++ b/branches/MTHCA/hw/mthca/kernel/hca_data.c @@ -31,25 +31,24 @@ */ -#include "hca_data.h" -#include "hca_debug.h" -#include "mthca_provider.h" +#include "hca_driver.h" +#include "hca_utils.h" + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "hca_data.tmh" +#endif + +#include "mthca_dev.h" +#include static cl_spinlock_t hob_lock; -#if 1 -u_int32_t g_mlnx_dbg_lvl = CL_DBG_ERROR | MLNX_DBG_TRACE; -#else -u_int32_t g_mlnx_dbg_lvl = CL_DBG_ERROR | - MLNX_DBG_QPN | - MLNX_DBG_MEM | - MLNX_DBG_INFO | - MLNX_DBG_TRACE | - // MLNX_DBG_DIRECT | - 0; -#endif -u_int32_t g_mlnx_dpc2thread = 0; + +uint32_t g_mlnx_dpc2thread = 0; cl_qlist_t mlnx_hca_list; @@ -163,7 +162,7 @@ mlnx_names_from_guid( cl_status_t mlnx_hcas_init( void ) { - u_int32_t idx; + uint32_t idx; cl_qlist_init( &mlnx_hca_list ); return cl_spinlock_init( &hob_lock ); @@ -176,7 +175,7 @@ mlnx_hcas_init( void ) cl_status_t mlnx_hobs_init( void ) { - u_int32_t idx; + uint32_t idx; cl_qlist_init( &mlnx_hca_list ); @@ -204,7 +203,7 @@ mlnx_hobs_insert( IN mlnx_hca_t *p_hca, OUT mlnx_hob_t **hob_pp) { - u_int32_t idx; + uint32_t idx; ib_api_status_t status = IB_ERROR; mlnx_cache_t *p_cache; @@ -282,7 +281,7 @@ mlnx_hobs_set_cb( hob_p->comp_cb_p = comp_cb_p; hob_p->async_cb_p = async_cb_p; hob_p->ca_context = ib_context; // This is the context our CB forwards to IBAL - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("CL: hca_idx %d context 0x%p\n", hob_p - mlnx_hob_array, ib_context)); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM,"CL: hca_idx %d context 0x%p\n", hob_p - mlnx_hob_array, ib_context) return IB_SUCCESS; } return IB_ERROR; @@ -292,7 +291,7 @@ mlnx_hobs_set_cb( hob_p->comp_cb_p = comp_cb_p; hob_p->async_cb_p = async_cb_p; hob_p->ca_context = ib_context; // This is the context our CB forwards to IBAL - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("CL: hca_idx %d context 0x%p\n", hob_p - mlnx_hob_array, ib_context)); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM,("CL: hca_idx %d context 0x%p\n", (int)(hob_p - mlnx_hob_array), ib_context)); return IB_SUCCESS; #endif @@ -333,7 +332,7 @@ mlnx_hobs_remove( if( p_cache ) cl_free( p_cache ); - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("CL: hobs_remove idx %d \n", hob_p - mlnx_hob_array)); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM,("CL: hobs_remove idx %d \n", (int)(hob_p - mlnx_hob_array))); } #ifdef WIN_TO_BE_CHANGED @@ -364,7 +363,7 @@ mlnx_hobs_lookup( IN HH_hca_hndl_t hndl, OUT mlnx_hob_t **hca_p) { - u_int32_t idx; + uint32_t idx; if (!hca_p) return IB_ERROR; @@ -411,7 +410,7 @@ mlnx_hobs_get_hobul( } -static int priv_ceil_log2(u_int32_t n) +static int priv_ceil_log2(uint32_t n) { int shift; @@ -436,7 +435,7 @@ mlnx_hobul_new( HH_hca_dev_t *hca_ul_info; ib_api_status_t status; VAPI_hca_cap_t hca_caps; - u_int32_t i; + uint32_t i; #if MLNX_COMP_MODEL == 1 static uint32_t proc_num = 0; #endif @@ -482,7 +481,7 @@ mlnx_hobul_new( hobul_p->max_cq = hobul_p->cq_idx_mask + 1; hobul_p->max_qp = hobul_p->qp_idx_mask + 1; - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("CL: sizes cq 0%x qp 0%x pd 0%x\n", hca_caps.max_num_cq, hca_caps.max_num_qp, hca_caps.max_pd_num)); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM,"CL: sizes cq 0%x qp 0%x pd 0%x\n", hca_caps.max_num_cq, hca_caps.max_num_qp, hca_caps.max_pd_num) /* create and initialize the data stucture for CQs */ hobul_p->cq_info_tbl = cl_zalloc(hobul_p->max_cq * sizeof (cq_info_t)); @@ -493,7 +492,7 @@ mlnx_hobul_new( /* create and initialize the data stucture for PDs */ hobul_p->pd_info_tbl = cl_zalloc(hobul_p->max_pd * sizeof (pd_info_t)); - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("CL: alloc failed? cq=%d qp=%d pd=%d\n", + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM, ("CL: alloc failed? cq=%d qp=%d pd=%d\n", !hobul_p->cq_info_tbl, !hobul_p->qp_info_tbl, !hobul_p->pd_info_tbl)); if (!hobul_p->pd_info_tbl || @@ -552,7 +551,7 @@ mlnx_hobul_new( } hobul_p->log2_mpt_size = ((THH_hca_ul_resources_t *)resources_p)->log2_mpt_size; - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("log2_mpt_size = %d\n", hobul_p->log2_mpt_size)); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM,"log2_mpt_size = %d\n", hobul_p->log2_mpt_size) cl_spinlock_acquire(&hob_lock); mlnx_hobul_array[hob_p->index] = hobul_p; @@ -611,7 +610,7 @@ mlnx_hobul_delete( IN mlnx_hob_t *hob_p) { mlnx_hobul_t *hobul_p; - u_int32_t i; + uint32_t i; // Verify handle CL_ASSERT((hob_p - mlnx_hob_array) < MLNX_NUM_HOBKL); @@ -714,7 +713,8 @@ mlnx_map_vapi_event_type( return IB_AE_PORT_ACTIVE; /* ACTIVE STATE */ default: - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("FAIL to map %d (last known %d) returning %d\n", + HCA_PRINT(TRACE_LEVEL_INFORMATION,HCA_DBG_SHIM , + ("FAIL to map %d (last known %d) returning %d\n", event_id, VAPI_PORT_ACTIVE, IB_AE_LOCAL_FATAL)); if (event_class_p) *event_class_p = E_EV_CA; return IB_AE_LOCAL_FATAL; @@ -744,18 +744,18 @@ mlnx_async_cb( IN HH_event_record_t *hh_er_p, IN void *private_data) { - u_int32_t obj_idx; + uint32_t obj_idx; mlnx_hob_t *hob_p; mlnx_cb_data_t cb_data; mlnx_cb_data_t *cb_data_p; - CL_TRACE(MLNX_DBG_DIRECT, g_mlnx_dbg_lvl, ("ASYNC CB %p (0x%x)\n", - private_data, (private_data) ? *(u_int32_t *)private_data : 0xB5)); + HCA_PRINT(TRACE_LEVEL_INFORMATION,HCA_DBG_SHIM,"ASYNC CB %p (0x%x)\n", + private_data, (private_data) ? *(uint32_t *)private_data : 0xB5)); if (!private_data || !hh_er_p) return; - obj_idx = *(u_int32_t *)private_data; + obj_idx = *(uint32_t *)private_data; if (obj_idx >= MLNX_NUM_HOBKL) return; hob_p = mlnx_hob_array + obj_idx; @@ -787,7 +787,7 @@ mlnx_async_dpc( IN cl_async_proc_item_t *async_item_p ) { HH_event_record_t *hh_er_p; - u_int32_t obj_idx; + uint32_t obj_idx; mlnx_hob_t *hob_p; mlnx_hobul_t *hobul_p; mlnx_cb_data_t *cb_data_p; @@ -795,18 +795,18 @@ mlnx_async_dpc( ENUM_EVENT_CLASS event_class; ib_event_rec_t event_r; - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("ASYNC DPC %p\n", async_item_p)); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM,"ASYNC DPC %p\n", async_item_p) cb_data_p = PARENT_STRUCT( async_item_p, mlnx_cb_data_t, async_item ); if (!cb_data_p) return; hh_er_p = &cb_data_p->hh_er; - obj_idx = *(u_int32_t *)cb_data_p->private_data; + obj_idx = *(uint32_t *)cb_data_p->private_data; hob_p = mlnx_hob_array + obj_idx; hobul_p = mlnx_hobul_array[obj_idx]; - CL_TRACE(MLNX_DBG_DIRECT, g_mlnx_dbg_lvl, ("ASYNC DPC type %d ca_context %p\n", + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_DIRECT, ("ASYNC DPC type %d ca_context %p\n", hh_er_p->etype, hob_p->ca_context)); if (!hob_p || @@ -833,7 +833,7 @@ mlnx_async_dpc( event_r.context = (void *)hobul_p->qp_info_tbl[obj_idx].qp_context; else { - CL_TRACE(MLNX_DBG_DIRECT, g_mlnx_dbg_lvl, ("ASYNC DPC bad qpn 0x%x max 0x%x\n", obj_idx, hobul_p->max_qp)); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_DIRECT,"ASYNC DPC bad qpn 0x%x max 0x%x\n", obj_idx, hobul_p->max_qp) goto cleanup; } } @@ -846,7 +846,7 @@ mlnx_async_dpc( event_r.context = (void *)hobul_p->cq_info_tbl[obj_idx].cq_context; else { - CL_TRACE(MLNX_DBG_DIRECT, g_mlnx_dbg_lvl, ("ASYNC DPC bad cqn 0x%x max 0x%x\n", obj_idx, hobul_p->max_cq)); + HCA_PRINT(MLNX_DBG_DIRECT, HCA_DBG_DIRECT,"ASYNC DPC bad cqn 0x%x max 0x%x\n", obj_idx, hobul_p->max_cq) goto cleanup; } } @@ -855,7 +855,7 @@ mlnx_async_dpc( case E_EV_LAST: default: // CL_ASSERT(0); // This shouldn't happen - CL_TRACE(MLNX_DBG_DIRECT, g_mlnx_dbg_lvl, ("ASYNC DPC unknown event_class 0x%x\n", event_class)); + HCA_PRINT(MLNX_DBG_DIRECT, HCA_DBG_DIRECT,"ASYNC DPC unknown event_class 0x%x\n", event_class) break; } @@ -878,19 +878,19 @@ mlnx_comp_cb( IN void *private_data) { #if MLNX_COMP_MODEL - u_int32_t cq_num; - u_int32_t hca_idx; + uint32_t cq_num; + uint32_t hca_idx; mlnx_hob_t *hob_p; mlnx_hobul_t *hobul_p; #if MLNX_COMP_MODEL == 2 static uint32_t proc_num = 0; #endif - CL_TRACE(MLNX_DBG_DIRECT, g_mlnx_dbg_lvl, ("COMP CB cq 0x%x %p\n", hh_cq, private_data)); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_DIRECT,"COMP CB cq 0x%x %p\n", hh_cq, private_data) UNUSED_PARAM( hh_hndl ); - hca_idx = *(u_int32_t *)private_data; + hca_idx = *(uint32_t *)private_data; hob_p = mlnx_hob_array + hca_idx; hobul_p = mlnx_hobul_array[hca_idx]; cq_num = hh_cq & hobul_p->cq_idx_mask; @@ -909,21 +909,21 @@ mlnx_comp_cb( } else { - HCA_TRACE( HCA_DBG_ERROR, ("CQ index out of range!!!\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR,HCA_DBG_SHIM,("CQ index out of range!!!\n"); ); } } #else /* MLNX_COMP_MODEL */ - u_int32_t obj_idx; + uint32_t obj_idx; mlnx_hob_t *hob_p; mlnx_cb_data_t cb_data; mlnx_cb_data_t *cb_data_p; - CL_TRACE(MLNX_DBG_DIRECT, g_mlnx_dbg_lvl, ("COMP CB cq 0x%x %p\n", hh_cq, private_data)); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_DIRECT,"COMP CB cq 0x%x %p\n", hh_cq, private_data) if (!private_data) return; - obj_idx = *(u_int32_t *)private_data; + obj_idx = *(uint32_t *)private_data; hob_p = mlnx_hob_array + obj_idx; if (!hob_p) return; @@ -974,21 +974,21 @@ static void mlnx_comp_dpc( IN cl_async_proc_item_t *async_item_p ) { - u_int32_t cq_num; - u_int32_t hca_idx; + uint32_t cq_num; + uint32_t hca_idx; mlnx_hob_t *hob_p; mlnx_hobul_t *hobul_p; mlnx_cb_data_t *cb_data_p; - CL_TRACE(MLNX_DBG_DIRECT, g_mlnx_dbg_lvl, ("COMP DPC %p\n", async_item_p)); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_DIRECT,("COMP DPC %p\n", async_item_p); cb_data_p = PARENT_STRUCT( async_item_p, mlnx_cb_data_t, async_item ); if (!cb_data_p) return; - hca_idx = *(u_int32_t *)cb_data_p->private_data; + hca_idx = *(uint32_t *)cb_data_p->private_data; hob_p = mlnx_hob_array + hca_idx; hobul_p = mlnx_hobul_array[hca_idx]; - cq_num = (u_int32_t)cb_data_p->hh_cq & hobul_p->cq_idx_mask; + cq_num = (uint32_t)cb_data_p->hh_cq & hobul_p->cq_idx_mask; if (NULL != hob_p && NULL != hobul_p && hob_p->hh_hndl && hob_p->comp_cb_p) @@ -1117,7 +1117,7 @@ mlnx_lock_region( ///////////////////////////////////////////////////////// ib_api_status_t mlnx_conv_ibal_mr_create( - IN u_int32_t pd_idx, + IN uint32_t pd_idx, IN OUT mlnx_mro_t *mro_p, IN VAPI_mr_change_t change_flags, IN ib_mr_create_t const *p_mr_create, @@ -1129,7 +1129,7 @@ mlnx_conv_ibal_mr_create( /* Set ACL information first since it is used to lock the region. */ if( change_flags & VAPI_MR_CHANGE_ACL ) { - mro_p->mr_acl = map_ibal_acl( p_mr_create->access_ctrl ); + mro_p->mr_acl = map_qp_ibal_acl( p_mr_create->access_ctrl ); // This computation should be externalized by THH mro_p->mr_mosal_perm = MOSAL_PERM_READ | @@ -1138,7 +1138,7 @@ mlnx_conv_ibal_mr_create( if( change_flags & VAPI_MR_CHANGE_TRANS ) { - CL_TRACE(MLNX_DBG_MEM, g_mlnx_dbg_lvl, ("addr 0x%p size %"PRId64"\n", (void *)p_mr_create->vaddr, p_mr_create->length)); + HCA_PRINT(MLNX_DBG_MEM, HCA_DBG_SHIM,("addr 0x%p size %I64d\n", (void *)p_mr_create->vaddr, p_mr_create->length); // Build TPT entries mro_p->mr_start = (IB_virt_addr_t)p_mr_create->vaddr; mro_p->mr_size = p_mr_create->length; @@ -1166,37 +1166,37 @@ mlnx_conv_ibal_mr_create( ///////////////////////////////////////////////////////// ib_api_status_t mlnx_conv_ibal_pmr_create( - IN u_int32_t pd_idx, + IN uint32_t pd_idx, IN mlnx_mro_t *mro_p, IN ib_phys_create_t const *p_pmr_create, OUT HH_mr_t *mr_props_p ) { VAPI_phy_addr_t* buf_lst = NULL; VAPI_size_t* sz_lst = NULL; - u_int32_t i; - u_int32_t page_shift = priv_ceil_log2(p_pmr_create->hca_page_size); - u_int64_t page_mask = (1 << page_shift) - 1; - u_int64_t tot_sz = 0; + uint32_t i; + uint32_t page_shift = priv_ceil_log2(p_pmr_create->hca_page_size); + uint64_t page_mask = (1 << page_shift) - 1; + uint64_t tot_sz = 0; - CL_TRACE(MLNX_DBG_MEM, g_mlnx_dbg_lvl, - ("PRE: addr %p size 0x%"PRIx64" shift %d\n", + HCA_PRINT(MLNX_DBG_MEM, HCA_DBG_MEMORY, + ("PRE: addr %p size 0x%I64x shift %d\n", (void *)(uintn_t)mro_p->mr_start, p_pmr_create->length, page_mask)); mro_p->mr_start = (mro_p->mr_start & ~page_mask) | (p_pmr_create->buf_offset & page_mask); - CL_TRACE(MLNX_DBG_MEM, g_mlnx_dbg_lvl, + HCA_PRINT(MLNX_DBG_MEM, HCA_DBG_MEMORY, ("POST: addr %p\n", (void *)(uintn_t)mro_p->mr_start)); mr_props_p->start = mro_p->mr_start; mr_props_p->size = p_pmr_create->length; - mr_props_p->acl = map_ibal_acl(p_pmr_create->access_ctrl); + mr_props_p->acl = map_qp_ibal_acl(p_pmr_create->access_ctrl); mr_props_p->pd = pd_idx; -#ifdef _DEBUG_ +#ifdef DBG mro_p->mr_size = mr_props_p->size; // mro_p->mr_first_page_addr = 0; // mro_p->mr_num_pages = (mro_p->mr_end >> PAGESHIFT) + 1 - (mro_p->mr_start >> PAGESHIFT); -// CL_TRACE(MLNX_DBG_MEM, g_mlnx_dbg_lvl, ("1st pg addr 0x%p pages %d\n", +// HCA_PRINT(MLNX_DBG_MEM, HCA_DBG_DIRECT, ("1st pg addr 0x%p pages %d\n", // (void *)mro_p->mr_first_page_addr, p_pmr_create->num_bufs)); - CL_TRACE(MLNX_DBG_MEM, g_mlnx_dbg_lvl, ("1st phys addr 0x%"PRIx64" phys pages %d\n", + HCA_PRINT(MLNX_DBG_MEM, HCA_DBG_MEMORY,("1st phys addr 0x%I64x phys pages %d\n", p_pmr_create->range_array[0].base_addr, p_pmr_create->num_ranges)); #endif @@ -1209,7 +1209,7 @@ mlnx_conv_ibal_pmr_create( if (p_pmr_create->hca_page_size != MT_DOWN_ALIGNX_PHYS(p_pmr_create->hca_page_size, page_shift)) { - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("phys buf size is not page aligned\n")); + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_MEMORY,("phys buf size is not page aligned\n"); return IB_INVALID_PARAMETER; } @@ -1219,14 +1219,14 @@ mlnx_conv_ibal_pmr_create( uint64_t end_addr = start_addr + p_pmr_create->range_array[i].size; if( end_addr < start_addr ) { - CL_TRACE( CL_DBG_ERROR, g_mlnx_dbg_lvl, ("phys buf end < start\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_MEMORY,("phys buf end < start\n"); return IB_INVALID_PARAMETER; } if (start_addr != MT_DOWN_ALIGNX_PHYS(start_addr, page_shift)) { - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("phys buf start adrs is not page aligned\n")); + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_MEMORY,("phys buf start adrs is not page aligned\n"); return IB_INVALID_PARAMETER; } @@ -1235,18 +1235,18 @@ mlnx_conv_ibal_pmr_create( if( tot_sz < p_pmr_create->length + p_pmr_create->buf_offset ) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("length(0x"PRIx64") + buf offset(0x"PRIx64") larger than sum " - "of phys ranges(0x"PRIx64")\n", - p_pmr_create->length, p_pmr_create->buf_offset, tot_sz) ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_SHIM, + ("length(0x%I64x) + buf offset(0x%I64x) larger than sum " + "of phys ranges(0x%I64x)\n", + p_pmr_create->length, p_pmr_create->buf_offset, tot_sz)); return IB_INVALID_PARAMETER; } if( p_pmr_create->buf_offset > p_pmr_create->range_array[0].size ) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("buf offset(0x%x) > than 1st phy range size(0x"PRIx64")\n", - p_pmr_create->buf_offset, p_pmr_create->range_array[0].size) ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_MEMORY, + ("buf offset(0x%x) > than 1st phy range size(0x%I64x)\n", + p_pmr_create->buf_offset, p_pmr_create->range_array[0].size)); return IB_INVALID_PARAMETER; } @@ -1254,8 +1254,8 @@ mlnx_conv_ibal_pmr_create( buf_lst = (VAPI_phy_addr_t*)cl_pzalloc( sizeof(VAPI_phy_addr_t)*(p_pmr_create->num_ranges)); if (!buf_lst) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("Failed to allocate range address list.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_MEMORY, + ("Failed to allocate range address list.\n")); return IB_INSUFFICIENT_MEMORY; } @@ -1265,8 +1265,8 @@ mlnx_conv_ibal_pmr_create( if (!sz_lst) { cl_free( buf_lst ); - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("Failed to allocate range size list.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_SHIM, + ("Failed to allocate range size list.\n")); return IB_INSUFFICIENT_MEMORY; } @@ -1286,15 +1286,15 @@ mlnx_conv_ibal_pmr_create( } -u_int8_t +uint8_t mlnx_gid_to_index( IN HH_hca_hndl_t hh_hndl, - IN u_int8_t port_num, - IN u_int8_t *raw_gid) + IN uint8_t port_num, + IN uint8_t *raw_gid) { ib_gid_t *gid_table_p = NULL; - u_int8_t index = 0; // default return value - u_int8_t i; + uint8_t index = 0; // default return value + uint8_t i; gid_table_p = cl_zalloc( 64*sizeof(ib_gid_t)); @@ -1304,13 +1304,13 @@ mlnx_gid_to_index( { if (!cl_memcmp(raw_gid, gid_table_p[i].raw, sizeof(ib_gid_t))) { - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("1: found GID at index %d\n", i)); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM,("1: found GID at index %d\n", i); index = i; break; } } - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("2: found GID at index %d\n", index)); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM,("2: found GID at index %d\n", index); cl_free( gid_table_p); return index; @@ -1340,8 +1340,8 @@ mlnx_conv_ibal_av( { vapi_av_p->grh_flag = TRUE; vapi_av_p->hop_limit = ibal_av_p->grh.hop_limit; - // CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("raw %p, &raw %p\n", ibal_av_p->grh.src_gid.raw, &ibal_av_p->grh.src_gid.raw)); - vapi_av_p->sgid_index = mlnx_gid_to_index(hh_hndl, ibal_av_p->port_num, (u_int8_t *)ibal_av_p->grh.src_gid.raw); + // HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_DIRECT,("raw %p, &raw %p\n", ibal_av_p->grh.src_gid.raw, &ibal_av_p->grh.src_gid.raw); + vapi_av_p->sgid_index = mlnx_gid_to_index(hh_hndl, ibal_av_p->port_num, (uint8_t *)ibal_av_p->grh.src_gid.raw); cl_memcpy(vapi_av_p->dgid, ibal_av_p->grh.dest_gid.raw, sizeof(vapi_av_p->dgid)); } } @@ -1404,7 +1404,7 @@ mlnx_map_vapi_cqe_status( case IB_COMP_FATAL_ERR: return IB_WCS_REM_ACCESS_ERR; // ??? case IB_COMP_GENERAL_ERR: return IB_WCS_REM_ACCESS_ERR; // ??? default: - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("FAIL to map %d (last known %d) returning %d\n", + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SHIM ,("FAIL to map %d (last known %d) returning %d\n", vapi_status, IB_COMP_GENERAL_ERR, IB_WCS_REM_ACCESS_ERR)); return IB_WCS_REM_ACCESS_ERR; } @@ -1479,7 +1479,7 @@ mlnx_conv_bind_req( { bind_prop_p->qp = hhul_qp_hndl; bind_prop_p->id = p_mw_bind->wr_id; - bind_prop_p->acl = map_ibal_acl(p_mw_bind->access_ctrl); + bind_prop_p->acl = map_qp_ibal_acl(p_mw_bind->access_ctrl); bind_prop_p->size = p_mw_bind->local_ds.length; bind_prop_p->start = (VAPI_virt_addr_t)(MT_virt_addr_t)p_mw_bind->local_ds.vaddr; bind_prop_p->mr_lkey = p_mw_bind->local_ds.lkey; @@ -1539,7 +1539,7 @@ mlnx_map_ibal_qp_type( return IB_TS_UD; default: - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("FAIL to map ibal_qp_type %d (last known %d) returning %d\n", + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SHIM,("FAIL to map %d (last known %d) returning %d\n", ibal_qpt, IB_QPT_QP1_ALIAS, IB_TS_RAW)); if (vapi_qp_type_p) *vapi_qp_type_p = VAPI_RAW_ETY_QP; return IB_TS_RAW; @@ -1606,7 +1606,7 @@ mlnx_map_vapi_qp_state( // TBD: IB_QPS_SQD_DRAINING // TBD: IB_QPS_SQD_DRAINED default: - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("FAIL to map vapi_qp_state %d (last known %d) returning %d\n", + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SHIM,("FAIL to map %d (last known %d) returning %d\n", vapi_qp_state, VAPI_ERR, IB_QPS_INIT)); return IB_QPS_INIT; } @@ -1625,7 +1625,7 @@ mlnx_map_vapi_apm_state( case VAPI_ARMED: return IB_APM_ARMED; default: - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("FAIL to map vapi_apm_state %d (last known %d) returning %d\n", + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SHIM,("FAIL to map %d (last known %d) returning %d\n", vapi_apm_state, VAPI_ARMED, 0)); return 0; } @@ -1636,9 +1636,9 @@ mlnx_map_vapi_apm_state( // UNUSED: IBAL uses same encoding as THH ///////////////////////////////////////////////////////// static -u_int32_t ibal_mtu_to_vapi(u_int32_t ibal_mtu) +uint32_t ibal_mtu_to_vapi(uint32_t ibal_mtu) { - u_int32_t mtu = 0; + uint32_t mtu = 0; // MTU256=1, MTU512=2, MTU1024=3 while (ibal_mtu >>= 1) mtu++; @@ -1648,7 +1648,7 @@ u_int32_t ibal_mtu_to_vapi(u_int32_t ibal_mtu) ///////////////////////////////////////////////////////// ///////////////////////////////////////////////////////// static -u_int32_t vapi_mtu_to_ibal(u_int32_t vapi_mtu) +uint32_t vapi_mtu_to_ibal(uint32_t vapi_mtu) { return (1 << (vapi_mtu + 7)); } @@ -1673,12 +1673,12 @@ mlnx_conv_vapi_qp_attr( qp_attr_p->resp_res = hh_qp_attr_p->qp_ous_rd_atom; // outstanding as target (in) qp_attr_p->num = cl_ntoh32(hh_qp_attr_p->qp_num); - CL_TRACE(MLNX_DBG_QPN, g_mlnx_dbg_lvl, ("ibal_qpn 0x%x = hh_qpn 0x%x\n", + HCA_PRINT(MLNX_DBG_QPN, g_mlnx_dbg_lvl,("ibal_qpn 0x%x = hh_qpn 0x%x\n", qp_attr_p->num, hh_qp_attr_p->qp_num)); qp_attr_p->dest_num = cl_ntoh32(hh_qp_attr_p->dest_qp_num); - CL_TRACE(MLNX_DBG_QPN, g_mlnx_dbg_lvl, ("ibal_dest 0x%x = hh_dest 0x%x\n", + HCA_PRINT(MLNX_DBG_QPN, g_mlnx_dbg_lvl,("ibal_dest 0x%x = hh_dest 0x%x\n", qp_attr_p->dest_num, hh_qp_attr_p->dest_qp_num)); qp_attr_p->qkey = cl_ntoh32 (hh_qp_attr_p->qkey); @@ -1693,13 +1693,13 @@ mlnx_conv_vapi_qp_attr( qp_attr_p->apm_state = mlnx_map_vapi_apm_state(hh_qp_attr_p->path_mig_state); mlnx_conv_vapi_av(hh_hndl, &hh_qp_attr_p->av, &qp_attr_p->primary_av); - qp_attr_p->primary_av.conn.path_mtu = (u_int8_t)hh_qp_attr_p->path_mtu; + qp_attr_p->primary_av.conn.path_mtu = (uint8_t)hh_qp_attr_p->path_mtu; qp_attr_p->primary_av.conn.local_ack_timeout = hh_qp_attr_p->timeout; qp_attr_p->primary_av.conn.seq_err_retry_cnt = hh_qp_attr_p->retry_count; qp_attr_p->primary_av.conn.rnr_retry_cnt = hh_qp_attr_p->rnr_retry; mlnx_conv_vapi_av(hh_hndl, &hh_qp_attr_p->alt_av, &qp_attr_p->alternate_av); - qp_attr_p->alternate_av.conn. path_mtu = (u_int8_t)hh_qp_attr_p->path_mtu; + qp_attr_p->alternate_av.conn. path_mtu = (uint8_t)hh_qp_attr_p->path_mtu; qp_attr_p->alternate_av.conn.local_ack_timeout = hh_qp_attr_p->timeout; qp_attr_p->alternate_av.conn.seq_err_retry_cnt = hh_qp_attr_p->retry_count; qp_attr_p->alternate_av.conn.rnr_retry_cnt = hh_qp_attr_p->rnr_retry; @@ -1795,7 +1795,7 @@ mlnx_conv_qp_modify_attr( } #if 1 - CL_TRACE(MLNX_DBG_QPN, g_mlnx_dbg_lvl, ("modify_qp: hh_dest 0x%x = ibal_dest 0x%x\n", + HCA_PRINT(MLNX_DBG_QPN, g_mlnx_dbg_lvl,("modify_qp: hh_dest 0x%x = ibal_dest 0x%x\n", qp_attr_p->dest_qp_num, modify_attr_p->state.rtr.dest_qp)); #endif @@ -1908,7 +1908,7 @@ mlnx_conv_qp_modify_attr( default: break; } - CL_TRACE(MLNX_DBG_QPN, g_mlnx_dbg_lvl, ("CL: conv_qp_modify: new state %d attr_mask 0x%x\n", qp_attr_p->qp_state, *attr_mask_p)); + HCA_PRINT(MLNX_DBG_QPN, g_mlnx_dbg_lvl,("CL: conv_qp_modify: new state %d attr_mask 0x%x\n", qp_attr_p->qp_state, *attr_mask_p); return IB_SUCCESS; } @@ -1949,7 +1949,7 @@ mlnx_conv_send_desc( OUT VAPI_sr_desc_t *vapi_send_desc_p) { boolean_t imm = FALSE; - u_int32_t idx; + uint32_t idx; register VAPI_sg_lst_entry_t *sg_lst_p; register ib_local_ds_t *ds_array; @@ -1957,8 +1957,8 @@ mlnx_conv_send_desc( switch (transport) { case IB_TS_UD: - CL_TRACE(MLNX_DBG_DIRECT, g_mlnx_dbg_lvl, ("mapping %s QP\n", "UD")); - { + HCA_PRINT(TRACE_LEVEL_INFORMATION, g_mlnx_dbg_lvl,("mapping %s QP\n","UD")); + mlnx_avo_t *avo_p = (mlnx_avo_t *)ibal_send_wqe_p->dgrm.ud.h_av; vapi_send_desc_p->remote_qp = cl_ntoh32 (ibal_send_wqe_p->dgrm.ud.remote_qp); @@ -1972,7 +1972,7 @@ mlnx_conv_send_desc( } case IB_TS_RC: - CL_TRACE(MLNX_DBG_DIRECT, g_mlnx_dbg_lvl, ("mapping %s QP\n", "RC")); + HCA_PRINT(TRACE_LEVEL_INFORMATION, g_mlnx_dbg_lvl,("mapping %s QP\n","RC")); // vapi_send_desc_p->remote_qp = 0; // vapi_send_desc_p->remote_qkey = 0; vapi_send_desc_p->remote_addr = ibal_send_wqe_p->remote_ops.vaddr; @@ -2006,11 +2006,11 @@ VAPI_SIGNALED : VAPI_UNSIGNALED; sg_lst_p->addr = ds_array->vaddr; sg_lst_p->len = ds_array->length; sg_lst_p->lkey = ds_array->lkey; - // CL_TRACE(MLNX_DBG_DIRECT, g_mlnx_dbg_lvl, ("post_send (conv) addr %Lx size %d key 0x%x\n", sg_lst_p->addr, sg_lst_p->len, sg_lst_p->lkey)); + // HCA_PRINT(TRACE_LEVEL_INFORMATION,HCA_DBG_SHIM,conv); sg_lst_p++; ds_array++; } - CL_TRACE(MLNX_DBG_DIRECT, g_mlnx_dbg_lvl, ("send: rqpn 0x%x rkey 0x%x\n", + HCA_PRINT(TRACE_LEVEL_INFORMATION,HCA_DBG_SHIM ,("send: rqpn 0x%x rkey 0x%x\n", vapi_send_desc_p->remote_qp, vapi_send_desc_p->remote_qkey)); return IB_SUCCESS; @@ -2023,7 +2023,7 @@ mlnx_conv_recv_desc( IN const ib_recv_wr_t *ibal_recv_wqe_p, OUT VAPI_rr_desc_t *vapi_recv_desc_p) { - u_int32_t idx; + uint32_t idx; register VAPI_sg_lst_entry_t *sg_lst_p; register ib_local_ds_t *ds_array; @@ -2039,7 +2039,7 @@ mlnx_conv_recv_desc( sg_lst_p->addr = ds_array->vaddr; sg_lst_p->len = ds_array->length; sg_lst_p->lkey = ds_array->lkey; - // CL_TRACE(MLNX_DBG_DIRECT, g_mlnx_dbg_lvl, ("post_recv (conv) addr 0x%Lx size %d key 0x%x\n", sg_lst_p->addr, sg_lst_p->len, sg_lst_p->lkey)); + // HCA_PRINT(TRACE_LEVEL_INFORMATION,HCA_DBG_SHIM,conv); sg_lst_p++; ds_array++; } @@ -2075,11 +2075,11 @@ mthca_port_cap_to_ibal( ib_api_status_t mlnx_get_hca_pkey_tbl( IN HH_hca_hndl_t hh_hndl, - IN u_int8_t port_num, - IN u_int16_t num_entries, + IN uint8_t port_num, + IN uint16_t num_entries, OUT void* table_p) { - u_int16_t size; + uint16_t size; ib_net16_t *pkey_p; if (HH_OK != THH_hob_get_pkey_tbl( hh_hndl, port_num, num_entries, &size, table_p)) @@ -2087,7 +2087,7 @@ mlnx_get_hca_pkey_tbl( pkey_p = (ib_net16_t *)table_p; #if 0 - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("port %d pkey0 0x%x pkey1 0x%x\n", port_num, pkey_p[0], pkey_p[1])); + HCA_PRINT(TRACE_LEVEL_INFORMATION, g_mlnx_dbg_lvl,("port %d pkey0 0x%x pkey1 0x%x\n", port_num, pkey_p[0], pkey_p[1]); #endif return IB_SUCCESS; } @@ -2095,11 +2095,11 @@ mlnx_get_hca_pkey_tbl( ib_api_status_t mlnx_get_hca_gid_tbl( IN HH_hca_hndl_t hh_hndl, - IN u_int8_t port_num, - IN u_int16_t num_entries, + IN uint8_t port_num, + IN uint16_t num_entries, OUT void* table_p) { - u_int16_t size; + uint16_t size; if (HH_OK != THH_hob_get_gid_tbl( hh_hndl, port_num, num_entries, &size, table_p)) return IB_ERROR; @@ -2139,15 +2139,15 @@ mlnx_conv_hca_cap( IN struct ib_port_attr *hca_ports, OUT ib_ca_attr_t *ca_attr_p) { - u_int8_t port_num; + uint8_t port_num; ib_port_attr_t *ibal_port_p; struct ib_port_attr *mthca_port_p; ca_attr_p->vend_id = hca_info_p->vendor_id; ca_attr_p->dev_id = (uint16_t)hca_info_p->vendor_part_id; ca_attr_p->revision = (uint16_t)hca_info_p->hw_ver; - //TODO: convert guid ? - ca_attr_p->ca_guid = *(UNALIGNED64 u_int64_t *)&hca_info_p->node_guid; + ca_attr_p->fw_ver = hca_info_p->fw_ver; + ca_attr_p->ca_guid = *(UNALIGNED64 uint64_t *)&ib_dev->node_guid; ca_attr_p->num_ports = ib_dev->phys_port_cnt; ca_attr_p->max_qps = hca_info_p->max_qp; ca_attr_p->max_wrs = hca_info_p->max_qp_wr; @@ -2207,12 +2207,12 @@ mlnx_conv_hca_cap( ibal_port_p->pkey_ctr = (uint16_t)mthca_port_p->bad_pkey_cntr; ibal_port_p->qkey_ctr = (uint16_t)mthca_port_p->qkey_viol_cntr; ibal_port_p->max_msg_size = mthca_port_p->max_msg_sz; - ibal_port_p->mtu = (u_int8_t)mthca_port_p->max_mtu; + ibal_port_p->mtu = (uint8_t)mthca_port_p->max_mtu; ibal_port_p->subnet_timeout = mthca_port_p->subnet_timeout; // ibal_port_p->local_ack_timeout = 3; // TBD: currently ~32 usec #if 0 - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("Port %d port_guid 0x%"PRIx64"\n", + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM ,("Port %d port_guid 0x%I64x\n", ibal_port_p->port_num, ibal_port_p->port_guid)); #endif } @@ -2221,11 +2221,16 @@ mlnx_conv_hca_cap( void cq_comp_handler(struct ib_cq *cq, void *context) { mlnx_hob_t *hob_p = (mlnx_hob_t *)context; - if (hob_p) - (hob_p->comp_cb_p)(cq->cq_context); + struct mthca_cq *mcq =(struct mthca_cq *)cq; + HCA_ENTER(HCA_DBG_CQ); + if (hob_p) { + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_CQ ,("Invoking completion callback\n")); + (hob_p->comp_cb_p)(mcq->cq_context); + } else { - HCA_TRACE (CL_DBG_ERROR, ("Incorrect context. Completion callback was not invoked\n")); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_CQ ,("Incorrect context. Completion callback was not invoked\n")); } + HCA_EXIT(HCA_DBG_CQ); } void ca_event_handler(struct ib_event *ev, void *context) @@ -2239,7 +2244,7 @@ void ca_event_handler(struct ib_event *ev, void *context) event_rec.type = ev->event; if (event_rec.type > IB_AE_UNKNOWN) { // CL_ASSERT(0); // This shouldn't happen - HCA_TRACE(HCA_DBG_ERROR, ("Unmapped E_EV_CA event of type 0x%x. Replaced by 0x%x (IB_AE_LOCAL_FATAL)\n", + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SHIM,("Unmapped E_EV_CA event of type 0x%x. Replaced by 0x%x (IB_AE_LOCAL_FATAL)\n", event_rec.type, IB_AE_LOCAL_FATAL)); event_rec.type = IB_AE_LOCAL_FATAL; } @@ -2248,7 +2253,7 @@ void ca_event_handler(struct ib_event *ev, void *context) if (hob_p) (hob_p->async_cb_p)(&event_rec); else { - HCA_TRACE (CL_DBG_ERROR, ("Incorrect context. Async callback was not invoked\n")); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Incorrect context. Async callback was not invoked\n")); } } @@ -2267,7 +2272,7 @@ void qp_event_handler(struct ib_event *ev, void *context) if (hob_p) (hob_p->async_cb_p)(&event_rec); else { - HCA_TRACE (CL_DBG_ERROR, ("Incorrect context. Async callback was not invoked\n")); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Incorrect context. Async callback was not invoked\n")); } } @@ -2286,8 +2291,480 @@ void cq_event_handler(struct ib_event *ev, void *context) if (hob_p) (hob_p->async_cb_p)(&event_rec); else { - HCA_TRACE (CL_DBG_ERROR, ("Incorrect context. Async callback was not invoked\n")); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Incorrect context. Async callback was not invoked\n")); } } +ib_qp_state_t mlnx_qps_to_ibal(enum ib_qp_state qps) +{ +#define MAP_QPS(val1,val2) case val1: ib_qps = val2; break + ib_qp_state_t ib_qps; + switch (qps) { + MAP_QPS( IBQPS_RESET, IB_QPS_RESET ); + MAP_QPS( IBQPS_INIT, IB_QPS_INIT ); + MAP_QPS( IBQPS_RTR, IB_QPS_RTR ); + MAP_QPS( IBQPS_RTS, IB_QPS_RTS ); + MAP_QPS( IBQPS_SQD, IB_QPS_SQD ); + MAP_QPS( IBQPS_SQE, IB_QPS_SQERR ); + MAP_QPS( IBQPS_ERR, IB_QPS_ERROR ); + default: + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Unmapped MTHCA qp_state %d\n", qps)); + ib_qps = 0xffffffff; + } + return ib_qps; +} + +enum ib_qp_state mlnx_qps_from_ibal(ib_qp_state_t ib_qps) +{ +#define MAP_IBQPS(val1,val2) case val1: qps = val2; break + enum ib_qp_state qps; + switch (ib_qps) { + MAP_IBQPS( IB_QPS_RESET, IBQPS_RESET ); + MAP_IBQPS( IB_QPS_INIT, IBQPS_INIT ); + MAP_IBQPS( IB_QPS_RTR, IBQPS_RTR ); + MAP_IBQPS( IB_QPS_RTS, IBQPS_RTS ); + MAP_IBQPS( IB_QPS_SQD, IBQPS_SQD ); + MAP_IBQPS( IB_QPS_SQD_DRAINING, IBQPS_SQD ); + MAP_IBQPS( IB_QPS_SQD_DRAINED, IBQPS_SQD ); + MAP_IBQPS( IB_QPS_SQERR, IBQPS_SQE ); + MAP_IBQPS( IB_QPS_ERROR, IBQPS_ERR ); + default: + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Unmapped IBAL qp_state %d\n", ib_qps)); + qps = 0xffffffff; + } + return qps; +} + +ib_api_status_t +mlnx_conv_qp_modify_attr( + IN const struct ib_qp *ib_qp_p, + IN ib_qp_type_t qp_type, + IN const ib_qp_mod_t *modify_attr_p, + OUT struct ib_qp_attr *qp_attr_p, + OUT int *qp_attr_mask_p + ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct mthca_qp *qp_p = (struct mthca_qp *)ib_qp_p; + + RtlZeroMemory( qp_attr_p, sizeof *qp_attr_p ); + *qp_attr_mask_p = IB_QP_STATE; + qp_attr_p->qp_state = mlnx_qps_from_ibal( modify_attr_p->req_state ); + + // skipped cases + if (qp_p->state == IBQPS_RESET && modify_attr_p->req_state != IB_QPS_INIT) + return IB_NOT_DONE; + + switch (modify_attr_p->req_state) { + case IB_QPS_RESET: + case IB_QPS_ERROR: + case IB_QPS_SQERR: + case IB_QPS_TIME_WAIT: + break; + + case IB_QPS_INIT: + + switch (qp_type) { + case IB_QPT_RELIABLE_CONN: + case IB_QPT_UNRELIABLE_CONN: + *qp_attr_mask_p |= IB_QP_PORT | IB_QP_PKEY_INDEX |IB_QP_ACCESS_FLAGS; + qp_attr_p->qp_access_flags = map_qp_ibal_acl(modify_attr_p->state.init.access_ctrl); + break; + case IB_QPT_UNRELIABLE_DGRM: + case IB_QPT_QP0: + case IB_QPT_QP1: + default: + *qp_attr_mask_p |= IB_QP_PORT | IB_QP_QKEY | IB_QP_PKEY_INDEX ; + qp_attr_p->qkey = cl_ntoh32 (modify_attr_p->state.init.qkey); + break; + } + + // IB_QP_PORT + qp_attr_p->port_num = modify_attr_p->state.init.primary_port; + + // IB_QP_PKEY_INDEX + qp_attr_p->pkey_index = modify_attr_p->state.init.pkey_index; + + break; + + case IB_QPS_RTR: + /* modifying the WQE depth is not supported */ + if( modify_attr_p->state.rtr.opts & IB_MOD_QP_SQ_DEPTH || + modify_attr_p->state.rtr.opts & IB_MOD_QP_RQ_DEPTH ) { + status = IB_UNSUPPORTED; + break; + } + + switch (qp_type) { + case IB_QPT_RELIABLE_CONN: + *qp_attr_mask_p |= /* required flags */ + IB_QP_DEST_QPN |IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | + IB_QP_AV |IB_QP_PATH_MTU | IB_QP_MIN_RNR_TIMER; + + // IB_QP_DEST_QPN + qp_attr_p->dest_qp_num = cl_ntoh32 (modify_attr_p->state.rtr.dest_qp); + + // IB_QP_RQ_PSN + qp_attr_p->rq_psn = cl_ntoh32 (modify_attr_p->state.rtr.rq_psn); + + // IB_QP_MAX_DEST_RD_ATOMIC + qp_attr_p->max_dest_rd_atomic = modify_attr_p->state.rtr.resp_res; + + // IB_QP_AV, IB_QP_PATH_MTU: Convert primary RC AV (mandatory) + err = mlnx_conv_ibal_av(ib_qp_p->device, + &modify_attr_p->state.rtr.primary_av, &qp_attr_p->ah_attr); + if (err) { + status = IB_ERROR; + break; + } + qp_attr_p->path_mtu = modify_attr_p->state.rtr.primary_av.conn.path_mtu; // MTU + qp_attr_p->timeout = modify_attr_p->state.rtr.primary_av.conn.local_ack_timeout; // MTU + qp_attr_p->retry_cnt = modify_attr_p->state.rtr.primary_av.conn.seq_err_retry_cnt; // MTU + qp_attr_p->rnr_retry = modify_attr_p->state.rtr.primary_av.conn.rnr_retry_cnt; // MTU + + // IB_QP_MIN_RNR_TIMER + if (modify_attr_p->state.rtr.opts & IB_MOD_QP_RNR_NAK_TIMEOUT) { + qp_attr_p->min_rnr_timer = modify_attr_p->state.rtr.rnr_nak_timeout; + } + + // IB_QP_ACCESS_FLAGS: Convert Remote Atomic Flags + if (modify_attr_p->state.rtr.opts & IB_MOD_QP_ACCESS_CTRL) { + *qp_attr_mask_p |= IB_QP_ACCESS_FLAGS; /* optional flag */ + qp_attr_p->qp_access_flags = map_qp_ibal_acl(modify_attr_p->state.init.access_ctrl); + } + + // IB_QP_ALT_PATH: Convert alternate RC AV + if (modify_attr_p->state.rtr.opts & IB_MOD_QP_ALTERNATE_AV) { + *qp_attr_mask_p |= IB_QP_ALT_PATH; /* required flag */ + err = mlnx_conv_ibal_av(ib_qp_p->device, + &modify_attr_p->state.rtr.alternate_av, &qp_attr_p->alt_ah_attr); + if (err) { + status = IB_ERROR; + break; + } + qp_attr_p->alt_timeout = modify_attr_p->state.rtr.alternate_av.conn.local_ack_timeout; // XXX: conv + } + + // IB_QP_PKEY_INDEX + if (modify_attr_p->state.rtr.opts & IB_MOD_QP_PKEY) { + *qp_attr_mask_p |= IB_QP_PKEY_INDEX; + qp_attr_p->pkey_index = modify_attr_p->state.rtr.pkey_index; + } + break; + + case IB_QPT_UNRELIABLE_CONN: + *qp_attr_mask_p |= /* required flags */ + IB_QP_DEST_QPN |IB_QP_RQ_PSN | IB_QP_AV | IB_QP_PATH_MTU; + + // IB_QP_DEST_QPN + qp_attr_p->dest_qp_num = cl_ntoh32 (modify_attr_p->state.rtr.dest_qp); + + // IB_QP_RQ_PSN + qp_attr_p->rq_psn = cl_ntoh32 (modify_attr_p->state.rtr.rq_psn); + + // IB_QP_PATH_MTU + qp_attr_p->path_mtu = modify_attr_p->state.rtr.primary_av.conn.path_mtu; + + // IB_QP_AV: Convert primary AV (mandatory) + err = mlnx_conv_ibal_av(ib_qp_p->device, + &modify_attr_p->state.rtr.primary_av, &qp_attr_p->ah_attr); + if (err) { + status = IB_ERROR; + break; + } + + // IB_QP_ACCESS_FLAGS: Convert Remote Atomic Flags + if (modify_attr_p->state.rtr.opts & IB_MOD_QP_ACCESS_CTRL) { + *qp_attr_mask_p |= IB_QP_ACCESS_FLAGS; /* optional flag */ + qp_attr_p->qp_access_flags = map_qp_ibal_acl(modify_attr_p->state.init.access_ctrl); + } + + // IB_QP_ALT_PATH: Convert alternate RC AV + if (modify_attr_p->state.rtr.opts & IB_MOD_QP_ALTERNATE_AV) { + *qp_attr_mask_p |= IB_QP_ALT_PATH; /* required flag */ + err = mlnx_conv_ibal_av(ib_qp_p->device, + &modify_attr_p->state.rtr.alternate_av, &qp_attr_p->alt_ah_attr); + if (err) { + status = IB_ERROR; + break; + } + } + + // IB_QP_PKEY_INDEX + if (modify_attr_p->state.rtr.opts & IB_MOD_QP_PKEY) { + *qp_attr_mask_p |= IB_QP_PKEY_INDEX; + qp_attr_p->pkey_index = modify_attr_p->state.rtr.pkey_index; + } + break; + + case IB_QPT_UNRELIABLE_DGRM: + case IB_QPT_QP0: + case IB_QPT_QP1: + default: + // IB_QP_PKEY_INDEX + if (modify_attr_p->state.rtr.opts & IB_MOD_QP_PKEY) { + *qp_attr_mask_p |= IB_QP_PKEY_INDEX; + qp_attr_p->pkey_index = modify_attr_p->state.rtr.pkey_index; + } + + // IB_QP_QKEY + if (modify_attr_p->state.rtr.opts & IB_MOD_QP_QKEY) { + *qp_attr_mask_p |= IB_QP_QKEY; + qp_attr_p->qkey = cl_ntoh32 (modify_attr_p->state.rtr.qkey); + } + break; + + } + break; + + case IB_QPS_RTS: + /* modifying the WQE depth is not supported */ + if( modify_attr_p->state.rts.opts & IB_MOD_QP_SQ_DEPTH || + modify_attr_p->state.rts.opts & IB_MOD_QP_RQ_DEPTH ) + { + status = IB_UNSUPPORTED; + break; + } + + switch (qp_type) { + case IB_QPT_RELIABLE_CONN: + *qp_attr_mask_p |= /* required flags */ + IB_QP_SQ_PSN |IB_QP_MAX_QP_RD_ATOMIC | IB_QP_TIMEOUT | + IB_QP_RETRY_CNT |IB_QP_RNR_RETRY; + + // IB_QP_MAX_QP_RD_ATOMIC + qp_attr_p->max_rd_atomic = modify_attr_p->state.rts.init_depth; + + // IB_QP_TIMEOUT + qp_attr_p->timeout = modify_attr_p->state.rts.local_ack_timeout; // XXX: conv + + // IB_QP_RETRY_CNT + qp_attr_p->retry_cnt = modify_attr_p->state.rts.retry_cnt; + + // IB_QP_RNR_RETRY + qp_attr_p->rnr_retry = modify_attr_p->state.rts.rnr_retry_cnt; + + // IB_QP_MAX_DEST_RD_ATOMIC: Update the responder resources for RDMA/ATOMIC (optional for SQD->RTS) + if (modify_attr_p->state.rts.opts & IB_MOD_QP_RESP_RES) { + *qp_attr_mask_p |= IB_QP_MAX_DEST_RD_ATOMIC; + qp_attr_p->max_dest_rd_atomic = modify_attr_p->state.rts.resp_res; + } + +#if 0 + // Linux patch 4793: PKEY_INDEX is not a legal parameter in the RTR->RTS transition. + + // IB_QP_PKEY_INDEX + if (modify_attr_p->state.rtr.opts & IB_MOD_QP_PKEY) { + *qp_attr_mask_p |= IB_QP_PKEY_INDEX; + qp_attr_p->pkey_index = modify_attr_p->state.rts.pkey_index; + } +#endif + + // IB_QP_MIN_RNR_TIMER + if (modify_attr_p->state.rtr.opts & IB_MOD_QP_RNR_NAK_TIMEOUT) { + *qp_attr_mask_p |= IB_QP_MIN_RNR_TIMER; + qp_attr_p->min_rnr_timer = modify_attr_p->state.rts.rnr_nak_timeout; + } + + // IB_QP_PATH_MIG_STATE + if (modify_attr_p->state.rtr.opts & IB_MOD_QP_APM_STATE) { + *qp_attr_mask_p |= IB_QP_PATH_MIG_STATE; + qp_attr_p->path_mig_state = modify_attr_p->state.rts.apm_state; + } + + // IB_QP_ACCESS_FLAGS + if (modify_attr_p->state.rts.opts & IB_MOD_QP_ACCESS_CTRL) { + *qp_attr_mask_p |= IB_QP_ACCESS_FLAGS; /* optional flags */ + qp_attr_p->qp_access_flags = map_qp_ibal_acl(modify_attr_p->state.init.access_ctrl); + } + + // IB_QP_ALT_PATH: Convert alternate RC AV + if (modify_attr_p->state.rts.opts & IB_MOD_QP_ALTERNATE_AV) { + *qp_attr_mask_p |= IB_QP_ALT_PATH; /* optional flag */ + err = mlnx_conv_ibal_av(ib_qp_p->device, + &modify_attr_p->state.rts.alternate_av, &qp_attr_p->alt_ah_attr); + if (err) { + status = IB_ERROR; + break; + } + qp_attr_p->alt_timeout = modify_attr_p->state.rts.alternate_av.conn.local_ack_timeout; // XXX: conv + } + break; + + case IB_QPT_UNRELIABLE_CONN: + *qp_attr_mask_p |= /* required flags */ + IB_QP_SQ_PSN; + + // IB_QP_MAX_DEST_RD_ATOMIC: Update the responder resources for RDMA/ATOMIC (optional for SQD->RTS) + if (modify_attr_p->state.rts.opts & IB_MOD_QP_RESP_RES) { + *qp_attr_mask_p |= IB_QP_MAX_DEST_RD_ATOMIC; + qp_attr_p->max_dest_rd_atomic = modify_attr_p->state.rts.resp_res; + } + +#if 0 + // Linux patch 4793: PKEY_INDEX is not a legal parameter in the RTR->RTS transition. + + // IB_QP_PKEY_INDEX + if (modify_attr_p->state.rtr.opts & IB_MOD_QP_PKEY) { + *qp_attr_mask_p |= IB_QP_PKEY_INDEX; + qp_attr_p->pkey_index = modify_attr_p->state.rts.pkey_index; + } +#endif + + // IB_QP_PATH_MIG_STATE + if (modify_attr_p->state.rtr.opts & IB_MOD_QP_APM_STATE) { + *qp_attr_mask_p |= IB_QP_PATH_MIG_STATE; + qp_attr_p->path_mig_state = modify_attr_p->state.rts.apm_state; + } + + // IB_QP_ACCESS_FLAGS + if (modify_attr_p->state.rts.opts & IB_MOD_QP_ACCESS_CTRL) { + *qp_attr_mask_p |= IB_QP_ACCESS_FLAGS; /* optional flags */ + qp_attr_p->qp_access_flags = map_qp_ibal_acl(modify_attr_p->state.init.access_ctrl); + } + + // IB_QP_ALT_PATH: Convert alternate RC AV + if (modify_attr_p->state.rts.opts & IB_MOD_QP_ALTERNATE_AV) { + *qp_attr_mask_p |= IB_QP_ALT_PATH; /* optional flag */ + err = mlnx_conv_ibal_av(ib_qp_p->device, + &modify_attr_p->state.rts.alternate_av, &qp_attr_p->alt_ah_attr); + if (err) { + status = IB_ERROR; + break; + } + } + break; + + case IB_QPT_UNRELIABLE_DGRM: + case IB_QPT_QP0: + case IB_QPT_QP1: + default: + *qp_attr_mask_p |= /* required flags */ + IB_QP_SQ_PSN; + + // IB_QP_QKEY + if (modify_attr_p->state.rtr.opts & IB_MOD_QP_QKEY) { + *qp_attr_mask_p |= IB_QP_QKEY; + qp_attr_p->qkey = cl_ntoh32 (modify_attr_p->state.rtr.qkey); + } + break; + + break; + + } + + // IB_QP_SQ_PSN: common for all + qp_attr_p->sq_psn = cl_ntoh32 (modify_attr_p->state.rts.sq_psn); + //NB: IB_QP_CUR_STATE flag is not provisioned by IBAL + break; + + case IB_QPS_SQD: + case IB_QPS_SQD_DRAINING: + case IB_QPS_SQD_DRAINED: + *qp_attr_mask_p |= IB_QP_EN_SQD_ASYNC_NOTIFY; + qp_attr_p->en_sqd_async_notify = (u8)modify_attr_p->state.sqd.sqd_event; + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_SHIM ,("IB_QP_EN_SQD_ASYNC_NOTIFY seems like unsupported\n")); + break; + + default: + //NB: is this an error case and we need this message ? What about returning an error ? + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Unmapped qp_state %d\n", modify_attr_p->req_state)); + break; + + } + + return status; +} + +int +mlnx_conv_ibal_av( + IN const struct ib_device *ib_dev_p, + IN const ib_av_attr_t *ibal_av_p, + OUT struct ib_ah_attr *ah_attr_p) +{ + int err = 0; + u8 port_num; + u16 gid_index; + + ah_attr_p->port_num = ibal_av_p->port_num; + ah_attr_p->sl = ibal_av_p->sl; + ah_attr_p->dlid = cl_ntoh16(ibal_av_p->dlid); + //TODO: how static_rate is coded ? + ah_attr_p->static_rate = + (ibal_av_p->static_rate == IB_PATH_RECORD_RATE_10_GBS ? 0 : 3); + ah_attr_p->src_path_bits = ibal_av_p->path_bits; // PATH: + + /* For global destination or Multicast address:*/ + if (ibal_av_p->grh_valid) + { + ah_attr_p->ah_flags |= IB_AH_GRH; + ah_attr_p->grh.hop_limit = ibal_av_p->grh.hop_limit; + ib_grh_get_ver_class_flow( ibal_av_p->grh.ver_class_flow, NULL, + &ah_attr_p->grh.traffic_class, &ah_attr_p->grh.flow_label ); + err = ib_find_cached_gid((struct ib_device *)ib_dev_p, + (union ib_gid *)ibal_av_p->grh.src_gid.raw, &port_num, &gid_index); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("ib_find_cached_gid failed %d (%#x). Using default: sgid_index = 0\n", err, err)); + gid_index = 0; + } + else if (port_num != ah_attr_p->port_num) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("ib_find_cached_gid returned wrong port_num %u (Expected - %u). Using the expected.\n", + (u32)port_num, (u32)ah_attr_p->port_num)); + } + ah_attr_p->grh.sgid_index = (u8)gid_index; + RtlCopyMemory(ah_attr_p->grh.dgid.raw, ibal_av_p->grh.dest_gid.raw, sizeof(ah_attr_p->grh.dgid)); + } + + return err; +} + +int +mlnx_conv_mthca_av( + IN const struct ib_ah *ib_ah_p, + OUT ib_av_attr_t *ibal_av_p) +{ + int err = 0; + struct ib_ud_header header; + struct mthca_ah *ah_p = (struct mthca_ah *)ib_ah_p; + struct ib_device *ib_dev_p = ib_ah_p->pd->device; + struct mthca_dev *dev_p = (struct mthca_dev *)ib_dev_p; + + err = mthca_read_ah( dev_p, ah_p, &header); + if (err) + goto err_read_ah; + + // common part + ibal_av_p->sl = header.lrh.service_level; + mthca_get_av_params(ah_p, &ibal_av_p->port_num, + &ibal_av_p->dlid, &ibal_av_p->static_rate, &ibal_av_p->path_bits ); + + // GRH + ibal_av_p->grh_valid = header.grh_present; + if (ibal_av_p->grh_valid) { + ibal_av_p->grh.ver_class_flow = ib_grh_set_ver_class_flow( + header.grh.ip_version, header.grh.traffic_class, header.grh.flow_label ); + ibal_av_p->grh.hop_limit = header.grh.hop_limit; + RtlCopyMemory(ibal_av_p->grh.src_gid.raw, + header.grh.source_gid.raw, sizeof(ibal_av_p->grh.src_gid)); + RtlCopyMemory(ibal_av_p->grh.src_gid.raw, + header.grh.destination_gid.raw, sizeof(ibal_av_p->grh.dest_gid)); + } + + //TODO: don't know, how to fill conn. Note, that previous version didn't fill it also. + +err_read_ah: + return err; +} + +void +mlnx_modify_ah( + IN const struct ib_ah *ib_ah_p, + IN const struct ib_ah_attr *ah_attr_p) +{ + struct ib_device *ib_dev_p = ib_ah_p->pd->device; + struct mthca_dev *dev_p = (struct mthca_dev *)ib_dev_p; + + mthca_set_av_params(dev_p, (struct mthca_ah *)ib_ah_p, (struct ib_ah_attr *)ah_attr_p ); +} diff --git a/branches/MTHCA/hw/mthca/kernel/hca_data.h b/branches/MTHCA/hw/mthca/kernel/hca_data.h index 14e65229..47575b98 100644 --- a/branches/MTHCA/hw/mthca/kernel/hca_data.h +++ b/branches/MTHCA/hw/mthca/kernel/hca_data.h @@ -39,77 +39,9 @@ #include #include -#ifndef WIN_TO_BE_REMOVED - -//TODO: temp data type -// THH -#define HH_hca_hndl_t int -#define HH_hca_dev_t int -#define HH_cq_hndl_t int -#define HH_event_record_t int -#define HH_pd_hndl_t int -#define HH_mr_t int -#define HH_mr_info_t int -// HHUL -#define HHUL_pd_hndl_t int -#define HHUL_qp_hndl_t int -#define HHUL_cq_hndl_t int -#define HHUL_hca_hndl_t int -#define HHUL_ud_av_hndl_t int -#define HHUL_mw_bind_t int -#define HHUL_qp_init_attr_t int - -// VAPI -#define VAPI_sr_desc_t int -#define VAPI_rr_desc_t int -#define VAPI_sg_lst_entry_t int -#define VAPI_sg_lst_entry_t int -#define VAPI_mrw_acl_t int -#define VAPI_lkey_t int -#define VAPI_ud_av_t int -#define VAPI_mr_change_t int -#define VAPI_wc_status_t int -#define VAPI_cqe_opcode_t int -#define VAPI_remote_node_addr_type_t int -#define VAPI_qp_attr_t int -#define VAPI_qp_attr_mask_t int -#define VAPI_sr_desc_t int -#define VAPI_hca_cap_t int -#define VAPI_special_qp_t int -#define VAPI_hca_port_t int - -// MOSAL -#define MOSAL_protection_ctx_t int -#define MOSAL_mem_perm_t int -#define MOSAL_iobuf_t int -#define MT_size_t int - -//TODO: replace by u64 et al -typedef uint64_t u_int64_t; -typedef uint32_t u_int32_t; -typedef uint16_t u_int16_t; -typedef uint8_t u_int8_t; - -// taken from ib_defs.h -typedef u_int32_t IB_wqpn_t; /* Work QP number: Only 24 LSbits */ -typedef u_int8_t IB_port_t; -typedef u_int8_t IB_gid_t[16]; /* GID (aka IPv6) H-to-L (big) (network) endianess */ -typedef u_int32_t IB_ts_t; - - -#endif - -extern u_int32_t g_mlnx_dbg_lvl; extern uint32_t g_sqp_max_avs; extern char mlnx_uvp_lib_name[]; -#define MLNX_DBG_INFO (1<<1) -#define MLNX_DBG_TRACE (1<<2) -#define MLNX_DBG_VERBOSE (1<<3) -// for data path debugging -#define MLNX_DBG_DIRECT (1<<4) -#define MLNX_DBG_QPN (1<<5) -#define MLNX_DBG_MEM (1<<6) #define MLNX_MAX_HCA 4 #define MLNX_NUM_HOBKL MLNX_MAX_HCA @@ -135,28 +67,13 @@ extern char mlnx_uvp_lib_name[]; */ #define MLNX_COMP_MODEL 3 -#ifdef WIN_TO_BE_REMOVED -#define PD_HCA_FROM_HNDL(hndl) (((pd_info_t *)hndl)->hca_idx) -#define PD_NUM_FROM_HNDL(hndl) (((pd_info_t *)hndl)->pd_num) -#define CQ_HCA_FROM_HNDL(hndl) (((cq_info_t *)hndl)->hca_idx) -#define CQ_NUM_FROM_HNDL(hndl) (((cq_info_t *)hndl)->cq_num) -#define QP_HCA_FROM_HNDL(hndl) (((qp_info_t *)hndl)->hca_idx) -#define QP_NUM_FROM_HNDL(hndl) (((qp_info_t *)hndl)->qp_num) - -#define PD_HNDL_FROM_PD(pd_num) (&hobul_p->pd_info_tbl[pd_num]) -#define CQ_HNDL_FROM_CQ(cq_num) (&hobul_p->cq_info_tbl[cq_num]) -#define QP_HNDL_FROM_QP(qp_num) (&hobul_p->qp_info_tbl[qp_num]) -#else -// incorrect: #define HOBUL_FROM_PD(hndl) container_of(hndl, mlnx_hobul_t, pd_info_tbl) -#endif - -#ifdef _DEBUG_ +#ifdef DBG #define VALIDATE_INDEX(index, limit, error, label) \ { \ if (index >= limit) \ { \ status = error; \ - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("file %s line %d\n", __FILE__, __LINE__)); \ + HCA_PRINT(TRACE_LEVEL_ERROR , g_mlnx_dbg_lvl ,("file %s line %d\n", __FILE__, __LINE__)));\ goto label; \ } \ } @@ -251,185 +168,47 @@ typedef struct _ib_ca { ci_async_event_cb_t async_cb_p; const void *ca_context; void *cl_device_h; - u_int32_t index; + uint32_t index; cl_async_proc_t *async_proc_mgr_p; mlnx_cache_t *cache; // Cached port attributes. const void * __ptr64 p_dev_obj; // store underlying device object } mlnx_hob_t; -typedef struct _ib_um_ca -{ - MDL *p_mdl; - void *p_mapped_addr; - HH_hca_hndl_t hh_hndl; - HH_hca_dev_t dev_info; - uint8_t ul_hca_res[1]; // Beginning of UL resource buffer. - -} mlnx_um_ca_t; - -typedef struct { - cl_async_proc_item_t async_item; - HH_hca_hndl_t hh_hndl; - HH_cq_hndl_t hh_cq; // for completion - HH_event_record_t hh_er; // for async events - void *private_data; -} mlnx_cb_data_t; - -#ifdef WIN_TO_BE_REMOVED -typedef struct _ib_pd { /* struct of PD related resources */ - ENUM_MARK mark; - u_int32_t kernel_mode; - cl_mutex_t mutex; - atomic32_t count; - u_int32_t hca_idx; - // mlnx_hob_t *hob_p; - HH_hca_hndl_t hh_hndl; /* For HH direct access */ - HH_pd_hndl_t pd_num; /* For HH-UL direct access */ - HHUL_pd_hndl_t hhul_pd_hndl; - void *pd_ul_resources_p; -} pd_info_t; - -#else - -struct _ib_pd { - struct ib_pd ib_pd; -} pd_info_t; - -#endif - -#ifdef WIN_TO_BE_REMOVED -typedef struct _ib_cq { /* struct of CQ related resources */ - ENUM_MARK mark; - cl_mutex_t mutex; - u_int32_t hca_idx; - u_int32_t kernel_mode; - // mlnx_hob_t *hob_p; - HH_hca_hndl_t hh_hndl; /* For HH direct access */ - HH_cq_hndl_t cq_num; /* For HH-UL direct access */ -// HH_pd_hndl_t pd_num; /* For HH-UL direct access */ - HHUL_cq_hndl_t hhul_cq_hndl; - void *cq_ul_resources_p; - const void *cq_context; - KDPC dpc; - atomic32_t spl_qp_cnt; - -} cq_info_t; -#else - -struct _ib_cq { - struct ib_cq ibcq; -} cq_info_t; - -#endif - -#ifdef WIN_TO_BE_REMOVED -typedef struct _ib_qp { - ENUM_MARK mark; - cl_mutex_t mutex; - u_int32_t hca_idx; - u_int32_t kernel_mode; - // mlnx_hob_t *hob_p; - HH_hca_hndl_t hh_hndl; // For HH direct access */ - HHUL_qp_hndl_t hhul_qp_hndl; - IB_wqpn_t qp_num; // For direct HH-UL access */ - HH_pd_hndl_t pd_num; // For HH-UL direct access */ - IB_port_t port; // Valid for special QPs only */ - ib_qp_type_t qp_type; // Required for qp_query - u_int32_t sq_signaled; // Required for qp_query - ib_cq_handle_t h_sq_cq; - ib_cq_handle_t h_rq_cq; - u_int32_t sq_size; - u_int32_t rq_size; - VAPI_sr_desc_t *send_desc_p; - VAPI_rr_desc_t *recv_desc_p; - VAPI_sg_lst_entry_t *send_sge_p; - VAPI_sg_lst_entry_t *recv_sge_p; - void *qp_ul_resources_p; - const void *qp_context; -} qp_info_t; -#else - -struct _ib_qp { - // must be the first - struct ib_qp ibqp; -} qp_info_t; - -#endif - typedef struct HOBUL_t { int dummy; #ifdef WIN_TO_BE_REMOVED pd_info_t *pd_info_tbl; HH_hca_hndl_t hh_hndl; /* For HH direct access */ HHUL_hca_hndl_t hhul_hndl; /* user level HCA resources handle for HH */ - u_int32_t cq_idx_mask; /* */ - u_int32_t qp_idx_mask; /* */ - u_int32_t vendor_id; /* \ */ - u_int32_t device_id; /* > 3 items needed for initializing user level */ + uint32_t cq_idx_mask; /* */ + uint32_t qp_idx_mask; /* */ + uint32_t vendor_id; /* \ */ + uint32_t device_id; /* > 3 items needed for initializing user level */ void *hca_ul_resources_p; /* / */ MT_size_t cq_ul_resources_sz; /* Needed for allocating user resources for CQs */ MT_size_t qp_ul_resources_sz; /* Needed for allocating user resources for QPs */ MT_size_t pd_ul_resources_sz; /* Needed for allocating user resources for PDs */ - u_int32_t max_cq; /* Max num. of CQs - size of following table */ + uint32_t max_cq; /* Max num. of CQs - size of following table */ cq_info_t *cq_info_tbl; - u_int32_t max_qp; /* Max num. of QPs - size of following table */ + uint32_t max_qp; /* Max num. of QPs - size of following table */ qp_info_t *qp_info_tbl; - u_int32_t max_pd; /* Max num. of PDs - size of following table */ - u_int32_t log2_mpt_size; + uint32_t max_pd; /* Max num. of PDs - size of following table */ + uint32_t log2_mpt_size; atomic32_t count; #endif } mlnx_hobul_t, *mlnx_hobul_hndl_t; -#ifdef WIN_TO_BE_REMOVED -typedef struct _ib_mr { - ENUM_MARK mark; - ENUM_MR_TYPE mr_type; - u_int64_t mr_start; // TBD: IA64 - u_int64_t mr_size; // TBD: IA64 -// u_int64_t mr_first_page_addr; // TBD : IA64 -// u_int32_t mr_num_pages; - ib_pd_handle_t mr_pd_handle; - MOSAL_iobuf_t mr_iobuf; - VAPI_mrw_acl_t mr_acl; - VAPI_lkey_t mr_lkey; - MOSAL_protection_ctx_t mr_prot_ctx; - MOSAL_mem_perm_t mr_mosal_perm; -} mlnx_mro_t; -#else - -typedef struct _ib_mr { - struct ib_mr ib_mr; -} mlnx_mro_t; - -#endif -typedef struct _ib_mw { - ENUM_MARK mark; - u_int32_t hca_idx; - u_int32_t pd_idx; - u_int32_t mw_rkey; -} mlnx_mwo_t; - typedef struct _ib_mcast { - ENUM_MARK mark; - IB_gid_t mcast_gid; - u_int32_t hca_idx; - u_int32_t qp_num; - u_int32_t kernel_mode; + ib_gid_t mcast_gid; + struct ib_qp *ib_qp_p; + uint16_t mcast_lid; } mlnx_mcast_t; -typedef struct _ib_av { - ENUM_MARK mark; - u_int32_t hca_idx; - u_int32_t pd_idx; - u_int32_t user_mode; - HHUL_ud_av_hndl_t h_av; -} mlnx_avo_t; - -typedef struct { +typedef struct _mlnx_hca_t { cl_list_item_t list_item; // to include in the HCA chain net64_t guid; // HCA node Guid struct mthca_dev *mdev; // VP Driver device - u_int32_t hw_ver; // HCA HW version + uint32_t hw_ver; // HCA HW version mlnx_hob_t hob; // HOB - IBAL-related HCA resources mlnx_hobul_t hobul; // HOBUL - - IBAL-related kernel client resources @@ -486,10 +265,6 @@ mlnx_hobs_insert( IN mlnx_hca_t *p_hca, OUT mlnx_hob_t **hob_p); -void -mlnx_hobs_get_handle( - IN mlnx_hob_t *hob_p, - OUT HH_hca_hndl_t *hndl_p); ib_api_status_t mlnx_hobs_set_cb( @@ -512,21 +287,10 @@ void mlnx_hobs_remove( IN mlnx_hob_t *hob_p); -ib_api_status_t -mlnx_hobs_lookup( - IN HH_hca_hndl_t hndl, - OUT mlnx_hob_t **hca_p); - mlnx_hobul_t * mlnx_hobs_get_hobul( IN mlnx_hob_t *hob_p); -ib_api_status_t -mlnx_hobul_new( - IN mlnx_hob_t *hob_p, - IN HH_hca_hndl_t hh_hndl, - IN void *resources_p); - void mlnx_hobul_get( IN mlnx_hob_t *hob_p, @@ -536,109 +300,6 @@ void mlnx_hobul_delete( IN mlnx_hob_t *hob_p); -// Conversion Functions - -VAPI_mrw_acl_t -map_ibal_acl( - IN ib_access_t ibal_acl); - -ib_access_t -map_vapi_acl( - IN VAPI_mrw_acl_t vapi_acl); - -ib_api_status_t -mlnx_lock_region( - IN mlnx_mro_t *mro_p, - IN boolean_t um_call ); - -ib_api_status_t -mlnx_conv_ibal_mr_create( - IN u_int32_t pd_idx, - IN OUT mlnx_mro_t *mro_p, - IN VAPI_mr_change_t change_flags, - IN ib_mr_create_t const *p_mr_create, - IN boolean_t um_call, - OUT HH_mr_t *mr_props_p ); - -ib_api_status_t -mlnx_conv_ibal_pmr_create( - IN u_int32_t pd_idx, - IN mlnx_mro_t *mro_p, - IN ib_phys_create_t const *p_pmr_create, - OUT HH_mr_t *mr_props_p ); - -void -mlnx_conv_ibal_av( - IN HH_hca_hndl_t hh_hndl, - IN const ib_av_attr_t *ibal_av_p, - OUT VAPI_ud_av_t *vapi_av_p); - -void -mlnx_conv_vapi_av( - IN HH_hca_hndl_t hh_hndl, - IN const VAPI_ud_av_t *vapi_av_p, - OUT ib_av_attr_t *ibal_av_p); - -int -mlnx_map_vapi_cqe_status( - IN VAPI_wc_status_t vapi_status); - -int -mlnx_map_vapi_cqe_type( - IN VAPI_cqe_opcode_t opcode); - -int -mlnx_map_vapi_rna_type( - IN VAPI_remote_node_addr_type_t rna); - -void -mlnx_conv_vapi_mr_attr( - IN ib_pd_handle_t pd_h, - IN HH_mr_info_t *mr_info_p, - OUT ib_mr_attr_t *mr_query_p); - -void -mlnx_conv_bind_req( - IN HHUL_qp_hndl_t hhul_qp_hndl, - IN ib_bind_wr_t* const p_mw_bind, - OUT HHUL_mw_bind_t *bind_prop_p); - -int -mlnx_map_ibal_qp_type( - IN ib_qp_type_t ibal_qpt, - OUT VAPI_special_qp_t *vapi_qp_type_p); - -void -mlnx_conv_qp_create_attr( - IN const ib_qp_create_t *create_attr_p, - IN HHUL_qp_init_attr_t *init_attr_p, - OUT VAPI_special_qp_t *vapi_qp_type_p); - -void -mlnx_conv_vapi_qp_attr( - IN HH_hca_hndl_t hh_hndl, - IN VAPI_qp_attr_t *hh_qp_attr_p, - OUT ib_qp_attr_t *qp_attr_p); - -ib_api_status_t -mlnx_conv_qp_modify_attr( - IN HH_hca_hndl_t hh_hndl, - IN ib_qp_type_t qp_type, - IN const ib_qp_mod_t *modify_attr_p, - OUT VAPI_qp_attr_t *qp_attr_p, - OUT VAPI_qp_attr_mask_t *attr_mask_p); - -ib_api_status_t -mlnx_conv_send_desc( - IN IB_ts_t transport, - IN const ib_send_wr_t *ibal_send_wqe_p, - OUT VAPI_sr_desc_t *vapi_send_desc_p); - -ib_api_status_t -mlnx_conv_recv_desc( - IN const ib_recv_wr_t *ibal_recv_wqe_p, - OUT VAPI_rr_desc_t *vapi_recv_desc_p); - void mlnx_conv_hca_cap( IN struct ib_device *ib_dev, @@ -646,20 +307,6 @@ mlnx_conv_hca_cap( IN struct ib_port_attr *hca_ports, OUT ib_ca_attr_t *ca_attr_p); -ib_api_status_t -mlnx_get_hca_pkey_tbl( - IN HH_hca_hndl_t hh_hndl, - IN u_int8_t port_num, - IN u_int16_t num_entries, - OUT void* table); - -ib_api_status_t -mlnx_get_hca_gid_tbl( - IN HH_hca_hndl_t hh_hndl, - IN u_int8_t port_num, - IN u_int16_t num_entries, - OUT void* table); - ib_api_status_t mlnx_local_mad ( IN const ib_ca_handle_t h_ca, @@ -691,10 +338,43 @@ fw_access_ctrl( IN ib_ci_op_t* const p_ci_op, IN OUT ci_umv_buf_t *p_umv_buf OPTIONAL); + void cq_comp_handler(struct ib_cq *cq, void *context); + void ca_event_handler(struct ib_event *ev, void *context); + void qp_event_handler(struct ib_event *ev, void *context); + void cq_event_handler(struct ib_event *ev, void *context); +ib_qp_state_t mlnx_qps_to_ibal(enum ib_qp_state qps); + +enum ib_qp_state mlnx_qps_from_ibal(ib_qp_state_t ib_qps); + +ib_api_status_t +mlnx_conv_qp_modify_attr( + IN const struct ib_qp *ib_qp_p, + IN ib_qp_type_t qp_type, + IN const ib_qp_mod_t *modify_attr_p, + OUT struct ib_qp_attr *qp_attr_p, + OUT int *qp_attr_mask_p + ); + +int +mlnx_conv_ibal_av( + IN const struct ib_device *ib_dev_p, + IN const ib_av_attr_t *ibal_av_p, + OUT struct ib_ah_attr *ah_attr_p); + +int +mlnx_conv_mthca_av( + IN const struct ib_ah *ib_ah_p, + OUT ib_av_attr_t *ibal_av_p); + +void +mlnx_modify_ah( + IN const struct ib_ah *ib_ah_p, + IN const struct ib_ah_attr *ah_attr_p); + #endif diff --git a/branches/MTHCA/hw/mthca/kernel/hca_debug.h b/branches/MTHCA/hw/mthca/kernel/hca_debug.h index b6871b53..9f587b66 100644 --- a/branches/MTHCA/hw/mthca/kernel/hca_debug.h +++ b/branches/MTHCA/hw/mthca/kernel/hca_debug.h @@ -30,43 +30,150 @@ */ -#if !defined( _HCA_DEBUG_H_ ) +#ifndef _HCA_DEBUG_H_ #define _HCA_DEBUG_H_ -#include +extern uint32_t g_mthca_dbg_level; +extern uint32_t g_mthca_dbg_flags; +#define MAX_LOG_BUF_LEN 512 +extern WCHAR g_wlog_buf[ MAX_LOG_BUF_LEN ]; +extern UCHAR g_slog_buf[ MAX_LOG_BUF_LEN ]; -#define HCA_DBG_DEV (1 << 0) -#define HCA_DBG_PNP (1 << 1) -#define HCA_DBG_PO (1 << 2) -#define HCA_DBG_WARN (1 << 30) -#define HCA_DBG_ERROR CL_DBG_ERROR -#define HCA_DBG_FULL CL_DBG_ALL +#if defined(EVENT_TRACING) +// +// Software Tracing Definitions +// +#define WPP_CONTROL_GUIDS \ + WPP_DEFINE_CONTROL_GUID(HCACtlGuid,(8BF1F640,63FE,4743,B9EF,FA38C695BFDE), \ + WPP_DEFINE_BIT( HCA_DBG_DEV) \ + WPP_DEFINE_BIT( HCA_DBG_PNP) \ + WPP_DEFINE_BIT( HCA_DBG_MAD) \ + WPP_DEFINE_BIT( HCA_DBG_PO) \ + WPP_DEFINE_BIT( HCA_DBG_CQ) \ + WPP_DEFINE_BIT( HCA_DBG_QP) \ + WPP_DEFINE_BIT( HCA_DBG_MEMORY) \ + WPP_DEFINE_BIT( HCA_DBG_AV) \ + WPP_DEFINE_BIT( HCA_DBG_LOW) \ + WPP_DEFINE_BIT( HCA_DBG_SHIM)) -extern uint32_t g_mlnx_dbg_lvl; +#define WPP_GLOBALLOGGER -#define HCA_ENTER( msg_lvl ) \ - CL_ENTER( msg_lvl, g_mlnx_dbg_lvl ) +#define WPP_LEVEL_FLAGS_ENABLED(lvl, flags) (WPP_LEVEL_ENABLED(flags) && WPP_CONTROL(WPP_BIT_ ## flags).Level >= lvl) +#define WPP_LEVEL_FLAGS_LOGGER(lvl,flags) WPP_LEVEL_LOGGER(flags) +#define WPP_FLAG_ENABLED(flags)(WPP_LEVEL_ENABLED(flags) && WPP_CONTROL(WPP_BIT_ ## flags).Level >= TRACE_LEVEL_VERBOSE) +#define WPP_FLAG_LOGGER(flags) WPP_LEVEL_LOGGER(flags) -#define HCA_EXIT( msg_lvl ) \ - CL_EXIT( msg_lvl, g_mlnx_dbg_lvl ) -#define HCA_TRACE( msg_lvl, msg ) \ - CL_TRACE( (msg_lvl), g_mlnx_dbg_lvl, msg ) +// begin_wpp config +// HCA_ENTER(FLAG); +// HCA_EXIT(FLAG); +// USEPREFIX(HCA_PRINT, "%!STDPREFIX! %!FUNC!() :"); +// USESUFFIX(HCA_ENTER, " %!FUNC!()===>"); +// USESUFFIX(HCA_EXIT, " %!FUNC!()<==="); +// end_wpp -#define HCA_TRACE_ERR( msg_lvl, msg ) \ - if ( status != IB_SUCCESS) \ - CL_TRACE( (msg_lvl), g_mlnx_dbg_lvl, msg ) -#define HCA_TRACE_EXIT( msg_lvl, msg ) \ - CL_TRACE_EXIT( msg_lvl, g_mlnx_dbg_lvl, msg ) -#define HCA_PRINT( msg_lvl, msg ) \ - CL_PRINT( msg_lvl, g_mlnx_dbg_lvl, msg ) +#else -#endif /* !defined( _HCA_DEBUG_H_ ) */ + +#include + +/* + * Debug macros + */ + + +#define HCA_DBG_DEV (1 << 0) +#define HCA_DBG_PNP (1 << 1) +#define HCA_DBG_MAD (1 << 2) +#define HCA_DBG_PO (1 << 3) +#define HCA_DBG_QP (1 << 4) +#define HCA_DBG_CQ (1 << 5) +#define HCA_DBG_MEMORY (1 << 6) +#define HCA_DBG_AV (1<<7) +#define HCA_DBG_LOW (1 << 8) +#define HCA_DBG_SHIM (1 << 9) + +static void _build_str( PUCHAR format, ... ) +{ + va_list p_arg; + va_start(p_arg, format); + vsprintf(g_slog_buf , format , p_arg); + swprintf(g_wlog_buf, L"%S", g_slog_buf); + va_end(p_arg); +} + +#define HCA_PRINT_TO_EVENT_LOG(_obj_,_level_,_flag_,_msg_) \ + { \ + ULONG event_id; \ + switch (_level_) { \ + case TRACE_LEVEL_FATAL: case TRACE_LEVEL_ERROR: event_id = EVENT_MTHCA_ANY_ERROR; break; \ + case TRACE_LEVEL_WARNING: event_id = EVENT_MTHCA_ANY_WARN; break; \ + default: event_id = EVENT_MTHCA_ANY_INFO; break; \ + } \ + _build_str _msg_; \ + WriteEventLogEntryStr( _obj_, event_id, 0, 0, g_wlog_buf, 0, 0 ); \ + } + +#define HCA_PRINT_EV_MDEV(_level_,_flag_,_msg_) \ + HCA_PRINT_TO_EVENT_LOG(mdev->ext->cl_ext.p_self_do,_level_,_flag_,_msg_) + + +#if DBG + +#define HCA_PRINT(_level_,_flag_,_msg_) \ + if (g_mthca_dbg_level >= (_level_) && \ + (g_mthca_dbg_flags & (_flag_))) { \ + DbgPrint ("[MTHCA] %s() :", __FUNCTION__); \ + if((_level_) == TRACE_LEVEL_ERROR) DbgPrint ("***ERROR*** "); \ + DbgPrint _msg_; \ + } + +#define HCA_PRINT_EV(_level_,_flag_,_msg_) \ + { \ + HCA_PRINT(_level_,_flag_,_msg_) \ + HCA_PRINT_EV_MDEV(_level_,_flag_,_msg_) \ + } + + +#else + +#define HCA_PRINT(lvl ,flags, msg) + +#define HCA_PRINT_EV(_level_,_flag_,_msg_) \ + { \ + HCA_PRINT(_level_,_flag_,_msg_) \ + HCA_PRINT_EV_MDEV(_level_,_flag_,_msg_) \ + } + +#endif + + +#define HCA_ENTER(flags)\ + HCA_PRINT(TRACE_LEVEL_VERBOSE, flags,("===>\n")); + +#define HCA_EXIT(flags)\ + HCA_PRINT(TRACE_LEVEL_VERBOSE, flags, ("<===\n" )); + + +#define HCA_PRINT_EXIT(_level_,_flag_,_msg_) \ + {\ + if (status != IB_SUCCESS) {\ + HCA_PRINT(_level_,_flag_,_msg_);\ + }\ + HCA_EXIT(_flag_);\ + } + +#endif //EVENT_TRACING + + + + +#endif /*_HCA_DEBUG_H_ */ diff --git a/branches/MTHCA/hw/mthca/kernel/hca_direct.c b/branches/MTHCA/hw/mthca/kernel/hca_direct.c index 4dbc5bd1..24f0340f 100644 --- a/branches/MTHCA/hw/mthca/kernel/hca_direct.c +++ b/branches/MTHCA/hw/mthca/kernel/hca_direct.c @@ -31,7 +31,16 @@ */ -#include "hca_data.h" +#include "hca_driver.h" +#include "hca_debug.h" + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "hca_direct.tmh" +#endif +#include "mthca_dev.h" /* Controls whether to use the VAPI entrypoints in THH, or the IBAL native ones. */ @@ -50,18 +59,37 @@ mlnx_post_send ( OUT ib_send_wr_t **pp_failed ) { #ifndef WIN_TO_BE_CHANGED - UNREFERENCED_PARAMETER(h_qp); - UNREFERENCED_PARAMETER(p_send_wr); - UNREFERENCED_PARAMETER(pp_failed); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_post_send not implemented\n")); - return IB_INVALID_CA_HANDLE; + + int err; + ib_api_status_t status; + struct ib_qp *ib_qp_p = (struct ib_qp *)h_qp; + struct ib_device *ib_dev_p = ib_qp_p->device; + + HCA_ENTER(HCA_DBG_QP); + + // sanity checks + + // create CQ + err = ib_dev_p->post_send(ib_qp_p, p_send_wr, pp_failed ); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_QP ,("post_send failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_post_send; + } + + status = IB_SUCCESS; + +err_post_send: + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR , HCA_DBG_QP ,("completes with ERROR status %s\n", ib_get_err_str(status))); + return status; + #else ib_api_status_t status = IB_SUCCESS; qp_info_t *qp_info_p = (qp_info_t *)h_qp; - u_int32_t qp_idx = 0; + uint32_t qp_idx = 0; mlnx_hobul_t *hobul_p; - // CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("hca %x qp %x\n", qp_info_p->hca_idx, qp_info_p->qp_num)); + // HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM,"hca %x qp %x\n", qp_info_p->hca_idx, qp_info_p->qp_num) if( !p_send_wr ) { status = IB_INVALID_PARAMETER; @@ -91,7 +119,7 @@ mlnx_post_send ( #endif cleanup: - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %d\n", status)); + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR, HCA_DBG_QP,"completes with ERROR status %d\n", status) return status; #endif } @@ -104,16 +132,35 @@ mlnx_post_recv ( OUT ib_recv_wr_t **pp_failed OPTIONAL ) { #ifndef WIN_TO_BE_CHANGED - UNREFERENCED_PARAMETER(h_qp); - UNREFERENCED_PARAMETER(p_recv_wr); - UNREFERENCED_PARAMETER(pp_failed); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_post_recv not implemented\n")); - return IB_INVALID_CA_HANDLE; + + int err; + ib_api_status_t status; + struct ib_qp *ib_qp_p = (struct ib_qp *)h_qp; + struct ib_device *ib_dev_p = ib_qp_p->device; + + HCA_ENTER(HCA_DBG_QP); + + // sanity checks + + // create CQ + err = ib_dev_p->post_recv(ib_qp_p, p_recv_wr, pp_failed ); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("post_recv failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_post_recv; + } + + status = IB_SUCCESS; + +err_post_recv: + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR , HCA_DBG_QP ,("completes with ERROR status %s\n", ib_get_err_str(status))); + return status; + #else ib_api_status_t status = IB_SUCCESS; qp_info_t *qp_info_p = (qp_info_t *)h_qp; - u_int32_t qp_idx = 0; + uint32_t qp_idx = 0; mlnx_hobul_t *hobul_p; #if !MLNX_RECV_NATIVE HH_ret_t ret; @@ -123,7 +170,7 @@ mlnx_post_recv ( VAPI_special_qp_t vapi_qp_type; #endif - // CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("hca %x qp %x\n", + // HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM ,HCA_DBG_SHIM ,("hca %x qp %x\n", // qp_info_p->hca_idx, qp_info_p->qp_num)); if( !p_recv_wr ) { @@ -181,7 +228,7 @@ mlnx_post_recv ( // For regular QP use real send multiple VAPI_rr_desc_t desc_list[MLNX_MAX_WRS_PER_CHAIN]; VAPI_sg_lst_entry_t sg_list[MLNX_MAX_WRS_PER_CHAIN][MLNX_MAX_NUM_SGE]; - u_int32_t num_wrs; + uint32_t num_wrs; wqe_p = p_recv_wr; while (wqe_p) { @@ -191,12 +238,12 @@ mlnx_post_recv ( desc_list [num_wrs].sg_lst_p = &sg_list [num_wrs][0]; status = mlnx_conv_recv_desc(wqe_p, &desc_list[num_wrs]); if (status != IB_SUCCESS) { - CL_TRACE(MLNX_DBG_DIRECT, g_mlnx_dbg_lvl, + HCA_PRINT(MLNX_DBG_DIRECT, HCA_DBG_QP, ("FAILED to map the recv_desc %d\n", num_wrs)); break; } } - // CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("num_wrs %d\n", num_wrs)); + // HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_SHIM,"num_wrs %d\n", num_wrs) if (num_wrs > 0) { if (num_wrs > 1) { ret = THHUL_qpm_post_recv_reqs (hobul_p->hhul_hndl, @@ -217,7 +264,7 @@ mlnx_post_recv ( } } else { /* no work requests this round */ - CL_TRACE (MLNX_DBG_DIRECT, g_mlnx_dbg_lvl, ("NO WRs\n")); + HCA_PRINT(MLNX_DBG_DIRECT , HCA_DBG_QP ,("NO WRs\n")); *pp_failed = wqe_p; break; } @@ -228,7 +275,7 @@ mlnx_post_recv ( #endif cleanup: - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %d\n", status)); + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR, HCA_DBG_QP,("completes with ERROR status %d\n", status)); return status; #endif } @@ -243,16 +290,16 @@ mlnx_peek_cq( OUT uint32_t* const p_n_cqes ) { #ifndef WIN_TO_BE_CHANGED - UNREFERENCED_PARAMETER(h_cq); - UNREFERENCED_PARAMETER(p_n_cqes); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_peek_cq not implemented\n")); - return IB_INVALID_CA_HANDLE; +UNREFERENCED_PARAMETER(h_cq); +UNREFERENCED_PARAMETER(p_n_cqes); +HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("mlnx_peek_cq not implemented\n")); +return IB_INVALID_CA_HANDLE; #else ib_api_status_t status = IB_UNKNOWN_ERROR; - u_int32_t hca_idx = CQ_HCA_FROM_HNDL(h_cq); - u_int32_t cq_num = CQ_NUM_FROM_HNDL(h_cq); - u_int32_t cq_idx; + uint32_t hca_idx = CQ_HCA_FROM_HNDL(h_cq); + uint32_t cq_num = CQ_NUM_FROM_HNDL(h_cq); + uint32_t cq_idx; mlnx_hobul_t *hobul_p; HHUL_cq_hndl_t hhul_cq_hndl; @@ -282,7 +329,7 @@ mlnx_peek_cq( return IB_SUCCESS; cleanup: - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %d\n", status)); + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_SHIM,("completes with ERROR status %d\n", status)); return status; #endif } @@ -294,17 +341,40 @@ mlnx_poll_cq ( OUT ib_wc_t** const pp_done_wclist ) { #ifndef WIN_TO_BE_CHANGED - UNREFERENCED_PARAMETER(h_cq); - UNREFERENCED_PARAMETER(pp_free_wclist); - UNREFERENCED_PARAMETER(pp_done_wclist); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_poll_cq not implemented\n")); - return IB_INVALID_CA_HANDLE; + + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_cq *ib_cq_p = (struct ib_cq *)h_cq; + + HCA_ENTER(HCA_DBG_CQ); + + // sanity checks + if (!pp_free_wclist || !pp_done_wclist || !*pp_free_wclist) { + status = IB_INVALID_PARAMETER; + goto err_invalid_params; + } + + // poll CQ + err = mthca_poll_cq_list(ib_cq_p, pp_free_wclist, pp_done_wclist ); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_CQ ,("mthca_poll_cq_list failed (%d)\n", err)); + status = errno_to_iberr(err); + }else if (!*pp_done_wclist) + status = IB_NOT_FOUND; + +err_invalid_params: + if (status != IB_NOT_FOUND){ + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR , HCA_DBG_CQ ,("completes with ERROR status %s\n", ib_get_err_str(status))); + }else + HCA_EXIT(HCA_DBG_CQ); + return status; + #else ib_api_status_t status = IB_UNKNOWN_ERROR; - u_int32_t hca_idx = CQ_HCA_FROM_HNDL(h_cq); - u_int32_t cq_num = CQ_NUM_FROM_HNDL(h_cq); - u_int32_t cq_idx; + uint32_t hca_idx = CQ_HCA_FROM_HNDL(h_cq); + uint32_t cq_num = CQ_NUM_FROM_HNDL(h_cq); + uint32_t cq_idx; mlnx_hobul_t *hobul_p; HHUL_cq_hndl_t hhul_cq_hndl; #if !MLNX_POLL_NATIVE @@ -339,7 +409,7 @@ mlnx_poll_cq ( pp_free_wclist, pp_done_wclist ); cleanup: - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %d\n", status)); + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_SHIM,("completes with ERROR status %d\n", status)); return status; #endif } @@ -350,16 +420,29 @@ mlnx_enable_cq_notify ( IN const boolean_t solicited ) { #ifndef WIN_TO_BE_CHANGED - UNREFERENCED_PARAMETER(h_cq); - UNREFERENCED_PARAMETER(solicited); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_enable_cq_notify not implemented\n")); - return IB_INVALID_CA_HANDLE; + + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_cq *ib_cq_p = (struct ib_cq *)h_cq; + + HCA_ENTER(HCA_DBG_SHIM); + + // REARM CQ + err = ib_req_notify_cq(ib_cq_p, (solicited) ? IB_CQ_SOLICITED : IB_CQ_NEXT_COMP ); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("ib_req_notify_cq failed (%d)\n", err)); + status = errno_to_iberr(err); + } + + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("completes with ERROR status %s\n", ib_get_err_str(status))); + return status; + #else ib_api_status_t status = IB_UNKNOWN_ERROR; - u_int32_t hca_idx = CQ_HCA_FROM_HNDL(h_cq); - u_int32_t cq_num = CQ_NUM_FROM_HNDL(h_cq); - u_int32_t cq_idx; + uint32_t hca_idx = CQ_HCA_FROM_HNDL(h_cq); + uint32_t cq_num = CQ_NUM_FROM_HNDL(h_cq); + uint32_t cq_idx; mlnx_hobul_t *hobul_p; HHUL_cq_hndl_t hhul_cq_hndl; VAPI_cq_notif_type_t hh_request; @@ -395,7 +478,7 @@ mlnx_enable_cq_notify ( return IB_SUCCESS; cleanup: - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %d\n", status)); + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_SHIM,("completes with ERROR status %d\n", status)); return status; #endif } @@ -406,16 +489,28 @@ mlnx_enable_ncomp_cq_notify ( IN const uint32_t n_cqes ) { #ifndef WIN_TO_BE_CHANGED - UNREFERENCED_PARAMETER(h_cq); - UNREFERENCED_PARAMETER(n_cqes); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_enable_ncomp_cq_notify not implemented\n")); - return IB_INVALID_CA_HANDLE; + + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_cq *ib_cq_p = (struct ib_cq *)h_cq; + + HCA_ENTER(HCA_DBG_SHIM); + + err = ib_req_ncomp_notif(ib_cq_p, n_cqes ); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("ib_req_ncomp_notif failed (%d)\n", err)); + status = errno_to_iberr(err); + } + + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("completes with ERROR status %s\n", ib_get_err_str(status))); + return status; + #else ib_api_status_t status = IB_UNKNOWN_ERROR; - u_int32_t hca_idx = CQ_HCA_FROM_HNDL(h_cq); - u_int32_t cq_num = CQ_NUM_FROM_HNDL(h_cq); - u_int32_t cq_idx; + uint32_t hca_idx = CQ_HCA_FROM_HNDL(h_cq); + uint32_t cq_num = CQ_NUM_FROM_HNDL(h_cq); + uint32_t cq_idx; mlnx_hobul_t *hobul_p; HHUL_cq_hndl_t hhul_cq_hndl; @@ -447,7 +542,7 @@ mlnx_enable_ncomp_cq_notify ( return IB_SUCCESS; cleanup: - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %d\n", status)); + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_SHIM,("completes with ERROR status %d\n", status)); return status; #endif } @@ -460,25 +555,25 @@ mlnx_bind_mw ( OUT net32_t* const p_rkey ) { #ifndef WIN_TO_BE_CHANGED - UNREFERENCED_PARAMETER(h_mw); - UNREFERENCED_PARAMETER(h_qp); - UNREFERENCED_PARAMETER(p_mw_bind); - UNREFERENCED_PARAMETER(p_rkey); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_bind_mw not implemented\n")); - return IB_INVALID_CA_HANDLE; + UNREFERENCED_PARAMETER(h_mw); + UNREFERENCED_PARAMETER(h_qp); + UNREFERENCED_PARAMETER(p_mw_bind); + UNREFERENCED_PARAMETER(p_rkey); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("mlnx_bind_mw not implemented\n")); + return IB_INVALID_CA_HANDLE; #else ib_api_status_t status = IB_UNKNOWN_ERROR; - u_int32_t hca_idx = QP_HCA_FROM_HNDL(h_qp); - u_int32_t qp_num = QP_NUM_FROM_HNDL(h_qp); - u_int32_t qp_idx = 0; - u_int32_t new_key; + uint32_t hca_idx = QP_HCA_FROM_HNDL(h_qp); + uint32_t qp_num = QP_NUM_FROM_HNDL(h_qp); + uint32_t qp_idx = 0; + uint32_t new_key; mlnx_hobul_t *hobul_p; mlnx_mwo_t *mwo_p; HHUL_qp_hndl_t hhul_qp_hndl; HHUL_mw_bind_t bind_props; - // CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER(HCA_DBG_SHIM); mwo_p = (mlnx_mwo_t *)h_mw; if (!mwo_p || mwo_p->mark != E_MARK_MW) { status = IB_INVALID_MW_HANDLE; @@ -498,12 +593,12 @@ mlnx_bind_mw ( } qp_idx = qp_num & hobul_p->qp_idx_mask; - // CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("line %d - qp_idx 0x%x\n", __LINE__, qp_idx)); + // HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM,"line %d - qp_idx 0x%x\n", __LINE__, qp_idx) VALIDATE_INDEX(qp_idx, hobul_p->max_qp, IB_INVALID_QP_HANDLE, cleanup); #if 0 - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("line %d - qp_num 0x%x valid %d\n", + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM ,("line %d - qp_num 0x%x valid %d\n", __LINE__, hobul_p->qp_info_tbl[qp_idx].qp_num, E_MARK_QP == hobul_p->qp_info_tbl[qp_idx].mark)); @@ -539,12 +634,12 @@ mlnx_bind_mw ( } *p_rkey = cl_hton32( new_key ); - // CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + // HCA_EXIT(TRACE_LEVEL_VERBOSE, HCA_DBG_SHIM); return IB_SUCCESS; cleanup: - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %d\n", status)); - // CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_SHIM,("completes with ERROR status %d\n", status)); + // HCA_EXIT(TRACE_LEVEL_VERBOSE, HCA_DBG_SHIM); return status; #endif } @@ -558,7 +653,7 @@ mlnx_direct_if( p_interface->post_recv = mlnx_post_recv; p_interface->enable_ncomp_cq_notify = mlnx_enable_ncomp_cq_notify; - p_interface->peek_cq = mlnx_peek_cq; + p_interface->peek_cq = NULL; /* mlnx_peek_cq: Not implemented */ p_interface->poll_cq = mlnx_poll_cq; p_interface->enable_cq_notify = mlnx_enable_cq_notify; diff --git a/branches/MTHCA/hw/mthca/kernel/hca_driver.c b/branches/MTHCA/hw/mthca/kernel/hca_driver.c index bc73dc1c..7a720c87 100644 --- a/branches/MTHCA/hw/mthca/kernel/hca_driver.c +++ b/branches/MTHCA/hw/mthca/kernel/hca_driver.c @@ -34,8 +34,17 @@ * Provides the driver entry points for the Tavor VPD. */ - #include "hca_driver.h" +#include "hca_debug.h" + +#include "mthca_log.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "hca_driver.tmh" +#endif +#include "mthca_dev.h" #include #include #pragma warning( push, 3 ) @@ -43,12 +52,24 @@ #pragma warning( pop ) #include +/* from \inc\platform\evntrace.h +#define TRACE_LEVEL_NONE 0 // Tracing is not on +#define TRACE_LEVEL_FATAL 1 // Abnormal exit or termination +#define TRACE_LEVEL_ERROR 2 // Severe errors that need logging +#define TRACE_LEVEL_WARNING 3 // Warnings such as allocation failure +#define TRACE_LEVEL_INFORMATION 4 // Includes non-error cases(e.g.,Entry-Exit) +#define TRACE_LEVEL_VERBOSE 5 // Detailed traces from intermediate steps +*/ +uint32_t g_mthca_dbg_level = TRACE_LEVEL_INFORMATION; +uint32_t g_mthca_dbg_flags= 0xffff; +WCHAR g_wlog_buf[ MAX_LOG_BUF_LEN ]; +UCHAR g_slog_buf[ MAX_LOG_BUF_LEN ]; /* * UVP name does not include file extension. For debug builds, UAL * will append "d.dll". For release builds, UAL will append ".dll" */ -char mlnx_uvp_lib_name[MAX_LIB_NAME] = {"mt23108u"}; +char mlnx_uvp_lib_name[MAX_LIB_NAME] = {"mthcau"}; NTSTATUS @@ -270,14 +291,16 @@ DriverEntry( { NTSTATUS status; cl_status_t cl_status; - +#if defined(EVENT_TRACING) + WPP_INIT_TRACING(p_driver_obj ,p_registry_path); +#endif HCA_ENTER( HCA_DBG_DEV ); status = __read_registry( p_registry_path ); if( !NT_SUCCESS( status ) ) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("__read_registry_path returned 0x%X.\n", status) ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("__read_registry_path returned 0x%X.\n", status)); return status; } @@ -285,8 +308,8 @@ DriverEntry( cl_status = mlnx_hcas_init(); if( cl_status != CL_SUCCESS ) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("mlnx_hcas_init returned %s.\n", cl_status_text[cl_status]) ); + HCA_PRINT( TRACE_LEVEL_ERROR,HCA_DBG_PNP, + ("mlnx_hcas_init returned %s.\n", cl_status_text[cl_status])); return cl_to_ntstatus( cl_status ); } // cl_memclr( mlnx_hca_array, MLNX_MAX_HCA * sizeof(ci_interface_t) ); @@ -303,6 +326,17 @@ DriverEntry( p_driver_obj->DriverUnload = hca_drv_unload; p_driver_obj->DriverExtension->AddDevice = hca_add_device; + /* init core */ + if (ib_core_init()) { + HCA_PRINT( TRACE_LEVEL_ERROR , HCA_DBG_PNP ,("Failed to init core, aborting.\n")); + return STATUS_UNSUCCESSFUL; + } + + /* init uverbs module */ + if (ib_uverbs_init()) { + HCA_PRINT( TRACE_LEVEL_ERROR , HCA_DBG_PNP ,("Failed ib_uverbs_init, aborting.\n")); + return STATUS_UNSUCCESSFUL; + } HCA_EXIT( HCA_DBG_DEV ); return STATUS_SUCCESS; } @@ -314,7 +348,7 @@ __read_registry( { NTSTATUS status; /* Remember the terminating entry in the table below. */ - RTL_QUERY_REGISTRY_TABLE table[2]; + RTL_QUERY_REGISTRY_TABLE table[3]; UNICODE_STRING param_path; HCA_ENTER( HCA_DBG_DEV ); @@ -325,8 +359,8 @@ __read_registry( param_path.Buffer = cl_zalloc( param_path.MaximumLength ); if( !param_path.Buffer ) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("Failed to allocate parameters path buffer.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_DEV, + ("Failed to allocate parameters path buffer.\n")); return STATUS_INSUFFICIENT_RESOURCES; } @@ -341,16 +375,30 @@ __read_registry( /* Setup the table entries. */ table[0].Flags = RTL_QUERY_REGISTRY_DIRECT; - table[0].Name = L"DebugFlags"; - table[0].EntryContext = &g_mlnx_dbg_lvl; + table[0].Name = L"DebugLevel"; + table[0].EntryContext = &g_mthca_dbg_level; table[0].DefaultType = REG_DWORD; - table[0].DefaultData = &g_mlnx_dbg_lvl; + table[0].DefaultData = &g_mthca_dbg_level; table[0].DefaultLength = sizeof(ULONG); + + table[1].Flags = RTL_QUERY_REGISTRY_DIRECT; + table[1].Name = L"DebugFlags"; + table[1].EntryContext = &g_mthca_dbg_flags; + table[1].DefaultType = REG_DWORD; + table[1].DefaultData = &g_mthca_dbg_flags; + table[1].DefaultLength = sizeof(ULONG); + /* Have at it! */ status = RtlQueryRegistryValues( RTL_REGISTRY_ABSOLUTE, param_path.Buffer, table, NULL, NULL ); + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_DEV, + ("debug level %d debug flags 0x%.8x\n", + g_mthca_dbg_level , + g_mthca_dbg_flags)); + + cl_free( param_path.Buffer ); HCA_EXIT( HCA_DBG_DEV ); return status; @@ -365,7 +413,14 @@ hca_drv_unload( UNUSED_PARAM( p_driver_obj ); + ib_uverbs_cleanup(); + ib_core_cleanup(); + HCA_EXIT( HCA_DBG_DEV ); +#if defined(EVENT_TRACING) + WPP_CLEANUP(p_driver_obj); +#endif + } @@ -410,8 +465,8 @@ hca_add_device( FALSE, &p_dev_obj ); if( !NT_SUCCESS( status ) ) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("IoCreateDevice returned 0x%08X.\n", status) ); + HCA_PRINT( TRACE_LEVEL_ERROR,HCA_DBG_PNP, + ("IoCreateDevice returned 0x%08X.\n", status)); return status; } @@ -421,7 +476,7 @@ hca_add_device( //if( cl_status != CL_SUCCESS ) //{ // IoDeleteDevice( p_dev_obj ); - // HCA_TRACE_EXIT( HCA_DBG_ERROR, + // HCA_PRINT( TRACE_LEVEL_ERROR, // ("cl_mutex_init returned %s.\n", cl_status_text[status]) ); // return cl_to_ntstatus( status ); //} @@ -433,8 +488,8 @@ hca_add_device( { //cl_event_destroy( &p_ext->mutex ); IoDeleteDevice( p_dev_obj ); - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("IoAttachDeviceToDeviceStack failed.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoAttachDeviceToDeviceStack failed.\n")); return STATUS_NO_SUCH_DEVICE; } @@ -471,8 +526,8 @@ __get_ci_interface( NULL, 0, NULL, &event, &io_status ); if( !p_irp ) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("IoBuildSynchronousFsdRequest failed.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoBuildSynchronousFsdRequest failed.\n")); return STATUS_INSUFFICIENT_RESOURCES; } @@ -500,8 +555,8 @@ __get_ci_interface( if( !NT_SUCCESS( status ) ) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("Query interface for verbs returned %08x.\n", status) ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Query interface for verbs returned %08x.\n", status)); return status; } @@ -530,8 +585,8 @@ __get_hca_handle( NULL, 0, NULL, &event, &io_status ); if( !p_irp ) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("IoBuildSynchronousFsdRequest failed.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoBuildSynchronousFsdRequest failed.\n")); return STATUS_INSUFFICIENT_RESOURCES; } @@ -559,8 +614,8 @@ __get_hca_handle( if( !NT_SUCCESS( status ) ) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("Query interface for HCA handle returned %08x.\n", status) ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Query interface for HCA handle returned %08x.\n", status)); return status; } @@ -645,8 +700,8 @@ __pnp_notify_target( &p_ext->pnp_target_entry ); if( !NT_SUCCESS( status ) ) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("IoRegisterPlugPlayNotification returned %08x.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoRegisterPlugPlayNotification returned %08x.\n",status )); return status; } @@ -669,9 +724,9 @@ __alloc_hca_ifc( p_ifc = ExAllocatePool( PagedPool, sizeof(ci_interface_t) ); if( !p_ifc ) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_MAD, ("Failed to allocate ci_interface_t (%d bytes).\n", - sizeof(ci_interface_t)) ); + sizeof(ci_interface_t))); return NULL; } @@ -710,8 +765,8 @@ __hca_register( status = __get_ci_interface( p_dev_obj ); if( !NT_SUCCESS( status ) ) { - HCA_TRACE( HCA_DBG_ERROR, - ("__get_ci_interface returned %08x.\n", status) ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("__get_ci_interface returned %08x.\n", status)); return status; } @@ -719,7 +774,7 @@ __hca_register( p_hca_ifc = __alloc_hca_ifc( p_ext ); if( !p_hca_ifc ) { - HCA_TRACE( HCA_DBG_ERROR, ("__alloc_hca_ifc failed.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP ,("__alloc_hca_ifc failed.\n"))); return STATUS_NO_MEMORY; } @@ -756,27 +811,27 @@ __hca_register( // if( p_ext->state != HCA_ADDED && // p_ext->state != HCA_STARTED ) // { -// HCA_TRACE( HCA_DBG_ERROR, ("Invalid state.\n") ); +// HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Invalid state.\n"))); ); // break; // } // // ASSERT( !p_ext->p_al_dev ); // // /* Get the AL device object. */ -// HCA_TRACE( HCA_DBG_PNP, ("Calling IoGetDeviceObjectPointer.\n") ); +// HCA_PRINT( HCA_DBG_PNP ,HCA_DBG_SHIM ,("Calling IoGetDeviceObjectPointer.\n"))); ); // status = IoGetDeviceObjectPointer( &p_context->sym_link_name, // FILE_ALL_ACCESS, &p_ext->p_al_file_obj, &p_ext->p_al_dev ); // if( !NT_SUCCESS( status ) ) // { -// HCA_TRACE( HCA_DBG_ERROR, -// ("IoGetDeviceObjectPointer returned %08x.\n", status) ); +// HCA_PRINT( TRACE_LEVEL_ERROR, +// "IoGetDeviceObjectPointer returned %08x.\n", status) ; // break; // } // // cl_event_signal( &p_ext->mutex ); // /* Register for removal notification of the IB Fabric root device. */ -// HCA_TRACE( HCA_DBG_PNP, -// ("Registering for target notifications.\n") ); +// HCA_PRINT( HCA_DBG_PNP, +// ("Registering for target notifications.\n") ; // status = IoRegisterPlugPlayNotification( // EventCategoryTargetDeviceChange, 0, p_ext->p_al_file_obj, // p_dev_obj->DriverObject, __pnp_notify_target, p_dev_obj, @@ -785,8 +840,8 @@ __hca_register( // if( !NT_SUCCESS( status ) ) // { // ObDereferenceObject( p_ext->p_al_file_obj ); -// HCA_TRACE( HCA_DBG_ERROR, -// ("IoRegisterPlugPlayNotification returned %08x.\n", status) ); +// HCA_PRINT( TRACE_LEVEL_ERROR, +// "IoRegisterPlugPlayNotification returned %08x.\n", status); // break; // } // @@ -838,7 +893,7 @@ __pnp_notify_ifc( if( p_ext->state != HCA_STARTED ) { - HCA_TRACE( HCA_DBG_ERROR, ("Invalid state: %d\n", p_ext->state) ); + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP ,("Invalid state: %d\n", p_ext->state))); ); return STATUS_SUCCESS; } @@ -846,19 +901,18 @@ __pnp_notify_ifc( ASSERT( !p_ext->p_al_file_obj ); /* Get the AL device object. */ - HCA_TRACE( HCA_DBG_PNP, ("Calling IoGetDeviceObjectPointer.\n") ); + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_PNP ,("Calling IoGetDeviceObjectPointer.\n"))); ); status = IoGetDeviceObjectPointer( p_notify->SymbolicLinkName, FILE_ALL_ACCESS, &p_ext->p_al_file_obj, &p_ext->p_al_dev ); if( !NT_SUCCESS( status ) ) { - HCA_TRACE( HCA_DBG_ERROR, + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, ("IoGetDeviceObjectPointer returned %08x.\n", status) ); return STATUS_SUCCESS; } /* Register for removal notification of the IB Fabric root device. */ - HCA_TRACE( HCA_DBG_PNP, - ("Registering for target notifications.\n") ); + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_PNP,("Registering for target notifications.\n")); status = IoRegisterPlugPlayNotification( EventCategoryTargetDeviceChange, 0, p_ext->p_al_file_obj, p_dev_obj->DriverObject, __pnp_notify_target, p_dev_obj, @@ -868,8 +922,8 @@ __pnp_notify_ifc( ObDereferenceObject( p_ext->p_al_file_obj ); p_ext->p_al_file_obj = NULL; p_ext->p_al_dev = NULL; - HCA_TRACE( HCA_DBG_ERROR, - ("IoRegisterPlugPlayNotification returned %08x.\n", status) ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoRegisterPlugPlayNotification returned %08x.\n", status)); return STATUS_SUCCESS; } @@ -881,8 +935,8 @@ __pnp_notify_ifc( ObDereferenceObject( p_ext->p_al_file_obj ); p_ext->p_al_file_obj = NULL; p_ext->p_al_dev = NULL; - HCA_TRACE( HCA_DBG_ERROR, - ("__get_ci_interface returned %08x.\n", status) ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("__get_ci_interface returned %08x.\n", status)); return STATUS_SUCCESS; } @@ -929,8 +983,8 @@ hca_start( status = cl_do_sync_pnp( p_dev_obj, p_irp, p_action ); if( !NT_SUCCESS( status ) ) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("Lower drivers failed IRP_MN_START_DEVICE.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP , + ("Lower drivers failed IRP_MN_START_DEVICE.\n")); return status; } @@ -941,8 +995,8 @@ hca_start( status = __get_hca_handle( p_ext ); if( !NT_SUCCESS( status ) ) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("Failed to get HH HCA handle.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Failed to get HH HCA handle.\n")); return status; } #else @@ -958,7 +1012,7 @@ hca_start( status = fw_flash_get_ca_guid(p_ext->cl_ext.p_pdo, &ca_guid); if ( !NT_SUCCESS( status ) ) { - HCA_TRACE( HCA_DBG_ERROR, + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, ("fw_flash_get_ca_guid failed status =%#x.\n", status) ); return status; } @@ -979,8 +1033,8 @@ hca_start( if( !NT_SUCCESS( status ) ) { p_ext->state = HCA_ADDED; - HCA_TRACE( HCA_DBG_ERROR, - ("IoRegisterPlugPlayNotification returned %08x.\n", status) ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoRegisterPlugPlayNotification returned %08x.\n", status)); } HCA_EXIT( HCA_DBG_PNP ); @@ -1106,8 +1160,8 @@ hca_query_bus_relations( { //cl_event_signal( &p_ext->mutex ); *p_action = IrpComplete; - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("AL get_relations returned %08x.\n", status) ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("AL get_relations returned %08x.\n", status)); return status; } } @@ -1116,8 +1170,8 @@ hca_query_bus_relations( status = cl_alloc_relations( p_irp, 1 ); if( !NT_SUCCESS( status ) ) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("cl_alloc_relations returned %08x.\n", status) ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("cl_alloc_relations returned %08x.\n", status)); return status; } @@ -1400,8 +1454,8 @@ fw_access_ctrl( if_ready = 0; BusInterface.InterfaceDereference((PVOID)BusInterface.Context); } - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("fw_access_ctrl failed returns %08x.\n", status) ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("fw_access_ctrl failed returns %08x.\n", status)); } return status; } diff --git a/branches/MTHCA/hw/mthca/kernel/hca_driver.h b/branches/MTHCA/hw/mthca/kernel/hca_driver.h index b9c5e136..3099c4f8 100644 --- a/branches/MTHCA/hw/mthca/kernel/hca_driver.h +++ b/branches/MTHCA/hw/mthca/kernel/hca_driver.h @@ -38,13 +38,14 @@ #include #include #include +#include "hca_data.h" #include "mt_l2w.h" #include "hca_debug.h" -#include "hca_data.h" + + #include "hca_pnp.h" #include "hca_pci.h" - #if !defined(FILE_DEVICE_INFINIBAND) // Not defined in WXP DDK #define FILE_DEVICE_INFINIBAND 0x0000003B #endif @@ -103,7 +104,6 @@ typedef struct _hca_bar } hca_bar_t; - typedef struct _hca_dev_ext { /* ------------------------------------------------- @@ -141,7 +141,8 @@ typedef struct _hca_dev_ext hca_bar_t bar[HCA_BAR_TYPE_MAX]; /* HCA memory bars */ CM_PARTIAL_RESOURCE_DESCRIPTOR interruptInfo; /* HCA interrupt resources */ PKINTERRUPT int_obj; /* HCA interrupt object */ - spinlock_t isr_lock; /* lock for the ISR */ + spinlock_t isr_lock; /* lock for the ISR */ + ULONG bus_number; /* HCA's bus number */ /* ------------------------------------------------- * VARIABLES @@ -156,6 +157,8 @@ typedef struct _hca_dev_ext #define EXT_FROM_HOB(hob_p) (container_of(hob_p, hca_dev_ext_t, hca.hob)) #define IBDEV_FROM_HOB(hob_p) (&EXT_FROM_HOB(hob_p)->hca.mdev->ib_dev) #define HOBUL_FROM_HOB(hob_p) (&EXT_FROM_HOB(hob_p)->hca.hobul) +#define HOB_FROM_IBDEV(dev_p) (mlnx_hob_t *)&dev_p->mdev->ext->hca.hob + @@ -198,9 +201,10 @@ Firmware Update definitions static inline errno_to_iberr(int err) { -#define ERR_NAME(a) #a #define MAP_ERR(err,ibstatus) case err: ib_status = ibstatus; break ib_api_status_t ib_status = IB_UNKNOWN_ERROR; + if (err < 0) + err = -err; switch (err) { MAP_ERR( ENOENT, IB_NOT_FOUND ); MAP_ERR( EINTR, IB_INTERRUPTED ); @@ -213,8 +217,8 @@ static inline errno_to_iberr(int err) MAP_ERR( EINVAL, IB_INVALID_PARAMETER ); MAP_ERR( ENOSYS, IB_UNSUPPORTED ); default: - CL_TRACE (CL_DBG_ERROR, g_mlnx_dbg_lvl, - ("Unmapped errno %s (%d)\n", ERR_NAME(err), err)); + //HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_SHIM, + // "Unmapped errno (%d)\n", err); break; } return ib_status; diff --git a/branches/MTHCA/hw/mthca/kernel/hca_mcast.c b/branches/MTHCA/hw/mthca/kernel/hca_mcast.c index 1b39abe8..19208efc 100644 --- a/branches/MTHCA/hw/mthca/kernel/hca_mcast.c +++ b/branches/MTHCA/hw/mthca/kernel/hca_mcast.c @@ -34,7 +34,15 @@ #include #include -#include "hca_data.h" +#include "hca_driver.h" + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "hca_mcast.tmh" +#endif +#include "mthca_dev.h" /* * Multicast Support Verbs. @@ -48,23 +56,111 @@ mlnx_attach_mcast ( IN OUT ci_umv_buf_t *p_umv_buf ) { #ifndef WIN_TO_BE_CHANGED - UNREFERENCED_PARAMETER(h_qp); - UNREFERENCED_PARAMETER(p_mcast_gid); - UNREFERENCED_PARAMETER(mcast_lid); - UNREFERENCED_PARAMETER(ph_mcast); - UNREFERENCED_PARAMETER(p_umv_buf); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_attach_mcast not implemented\n")); - return IB_INVALID_CA_HANDLE; + + int err; + ib_api_status_t status; + struct ib_qp *ib_qp_p = (struct ib_qp *)h_qp; + mlnx_mcast_t *mcast_p; + struct ib_ucontext *context_p = NULL; + struct ib_udata *udata_p = NULL; + + HCA_ENTER(HCA_DBG_SHIM); + + // sanity checks + if( p_umv_buf && p_umv_buf->command ) { + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("User mode is not supported yet\n")); + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_SHIM ,("User mode is not supported yet\n")); + status = IB_UNSUPPORTED; + goto err_user_unsupported; + } + if (!p_mcast_gid || !ph_mcast) { + status = IB_INVALID_PARAMETER; + goto err_invalid_param; + } + + // allocate structure + mcast_p = (mlnx_mcast_t*)kmalloc(sizeof *mcast_p, GFP_ATOMIC ); + if (mcast_p == NULL) { + status = IB_INSUFFICIENT_MEMORY; + goto err_no_mem; + } + +#ifdef WIN_USER_SUPPORT + if( p_umv_buf && p_umv_buf->command ) + { + //TODO: check the below sanity check + if ((p_umv_buf->input_size - sizeof (uint32_t)) != + hca_ul_info->pd_ul_resources_sz || + NULL == p_umv_buf->p_inout_buf) { + status = IB_INVALID_PARAMETER; + goto cleanup; + } + pd_ul_resources_p = (void *)p_umv_buf->p_inout_buf; + + //TODO: create user context by call to mthca_alloc_ucontext() + } +#endif + + // attach to mcast group + if( p_umv_buf && p_umv_buf->command ) { + //TODO: call uverbs + } + else { + err = ibv_attach_mcast(ib_qp_p, (union ib_gid *)p_mcast_gid, (u16)mcast_lid); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("ibv_attach_mcast failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_attach; + } + } + + // fill the structure + mcast_p->ib_qp_p = ib_qp_p; + mcast_p->mcast_lid = mcast_lid; + RtlCopyMemory(mcast_p->mcast_gid.raw, p_mcast_gid->raw, sizeof *p_mcast_gid); + HCA_PRINT(TRACE_LEVEL_WARNING, HCA_DBG_SHIM, ("mcasth %p, qp_p %p, mlid %hx, mgid %I64x`%I64x\n", + mcast_p, mcast_p->ib_qp_p, mcast_p->mcast_lid, + *(uint64_t*)&mcast_p->mcast_gid.raw[0], + *(uint64_t*)&mcast_p->mcast_gid.raw[8] )); + + // return the result + if (ph_mcast) *ph_mcast = (ib_mcast_handle_t)mcast_p; + +#ifdef WIN_USER_SUPPORT + if( p_umv_buf && p_umv_buf->command ) + { + p_umv_buf->output_size = p_umv_buf->input_size; + /* + * Copy the pd_idx back to user + */ + cl_memcpy (((uint8_t* __ptr64)p_umv_buf->p_inout_buf + hca_ul_info->pd_ul_resources_sz), + &pd_idx, sizeof (pd_idx)); + p_umv_buf->status = IB_SUCCESS; + } +#endif + + status = IB_SUCCESS; + goto end; + +err_attach: + kfree(mcast_p); +err_no_mem: +err_invalid_param: +err_user_unsupported: +end: + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("completes with ERROR status %s\n", ib_get_err_str(status))); + return status; + #else ib_api_status_t status; - u_int32_t hca_idx = QP_HCA_FROM_HNDL(h_qp); - u_int32_t qp_num = QP_NUM_FROM_HNDL(h_qp); - u_int32_t qp_idx = 0; + uint32_t hca_idx = QP_HCA_FROM_HNDL(h_qp); + uint32_t qp_num = QP_NUM_FROM_HNDL(h_qp); + uint32_t qp_idx = 0; mlnx_mcast_t *mcast_p = NULL; mlnx_hobul_t *hobul_p; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_SHIM); UNUSED_PARAM( mcast_lid ); @@ -114,7 +210,7 @@ mlnx_attach_mcast ( p_umv_buf->output_size = 0; p_umv_buf->status = IB_SUCCESS; } - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_SHIM); return IB_SUCCESS; cleanup_locked: @@ -124,12 +220,11 @@ cleanup: if( p_umv_buf && p_umv_buf->command ) { - p_umv_buf->output_size = 0; p_umv_buf->status = status; } - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("status = %d\n",status)); + HCA_EXIT(HCA_DBG_SHIM); return status; #endif } @@ -139,19 +234,53 @@ mlnx_detach_mcast ( IN const ib_mcast_handle_t h_mcast) { #ifndef WIN_TO_BE_CHANGED - UNREFERENCED_PARAMETER(h_mcast); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_detach_mcast not implemented\n")); - return IB_INVALID_CA_HANDLE; + + ib_api_status_t status; + int err; + mlnx_mcast_t *mcast_p = (mlnx_mcast_t*)h_mcast; + + // sanity check + if (!mcast_p || !mcast_p->ib_qp_p) { + status = IB_INVALID_PARAMETER; + goto err_invalid_param; + } + HCA_PRINT(TRACE_LEVEL_WARNING, HCA_DBG_SHIM,("mcasth %p, qp_p %p, mlid %hx, mgid %I64x`%I64x\n", + mcast_p, mcast_p->ib_qp_p, mcast_p->mcast_lid, + *(uint64_t*)&mcast_p->mcast_gid.raw[0], + *(uint64_t*)&mcast_p->mcast_gid.raw[8] )); + + // detach + if( mcast_p->ib_qp_p->ucontext) { + //TODO: call uverbs + } + else { + err = ibv_detach_mcast( mcast_p->ib_qp_p, + (union ib_gid *)&mcast_p->mcast_gid, mcast_p->mcast_lid ); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("ibv_detach_mcast failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_detach_mcast; + } + } + + status = IB_SUCCESS; + +err_detach_mcast: + kfree(mcast_p); +err_invalid_param: + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("completes with ERROR status %s\n", ib_get_err_str(status))); + return status; + #else ib_api_status_t status; mlnx_mcast_t *mcast_p = (mlnx_mcast_t *)h_mcast; - u_int32_t hca_idx; - u_int32_t qp_num; - u_int32_t qp_idx = 0; + uint32_t hca_idx; + uint32_t qp_num; + uint32_t qp_idx = 0; mlnx_hobul_t *hobul_p; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_SHIM); if (!mcast_p || mcast_p->mark != E_MARK_MG) { status = IB_INVALID_PARAMETER; @@ -187,7 +316,7 @@ mlnx_detach_mcast ( mcast_p->mark = E_MARK_INVALID; cl_free( mcast_p); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_SHIM); return IB_SUCCESS; cleanup_locked: @@ -198,8 +327,8 @@ cleanup: cl_free( mcast_p); } - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("status %d\n",status)); + HCA_EXIT( HCA_DBG_SHIM); return status; #endif } diff --git a/branches/MTHCA/hw/mthca/kernel/hca_memory.c b/branches/MTHCA/hw/mthca/kernel/hca_memory.c index ded3062a..7e1bf34e 100644 --- a/branches/MTHCA/hw/mthca/kernel/hca_memory.c +++ b/branches/MTHCA/hw/mthca/kernel/hca_memory.c @@ -32,15 +32,14 @@ #include "hca_driver.h" +#include "hca_utils.h" -static inline u32 convert_access(ib_access_t acc) -{ - return (acc & IB_AC_ATOMIC ? IB_ACCESS_REMOTE_ATOMIC : 0) | - (acc & IB_AC_RDMA_WRITE ? IB_ACCESS_REMOTE_WRITE : 0) | - (acc & IB_AC_RDMA_READ ? IB_ACCESS_REMOTE_READ : 0) | - (acc & IB_AC_LOCAL_WRITE ? IB_ACCESS_LOCAL_WRITE : 0); -} - +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "hca_memory.tmh" +#endif /* * Memory Management Verbs. @@ -59,22 +58,17 @@ mlnx_register_mr ( ib_api_status_t status; int err; - u_int32_t lkey, rkey; + uint32_t lkey, rkey; struct ib_mr *mr_p; struct ib_umem region; mt_iobuf_t iobuf; - struct ib_udata udata; + ci_umv_buf_t umv_buf; struct mthca_mr *mro_p; struct ib_pd *ib_pd_p = (struct ib_pd *)h_pd; + struct ib_device *ib_dev = ib_pd_p->device; - - UNUSED_PARAM( um_call ); + HCA_ENTER(HCA_DBG_SHIM); - HCA_ENTER(MLNX_DBG_TRACE); - - HCA_TRACE(CL_DBG_ERROR, ("mlnx_modify_mr not implemented\n")); - return IB_UNSUPPORTED; - // sanity checks if( !cl_is_blockable() ) { status = IB_UNSUPPORTED; @@ -84,7 +78,17 @@ mlnx_register_mr ( status = IB_INVALID_PARAMETER; goto err_invalid_parm; } + /* + * Local write permission is required if remote write or + * remote atomic permission is also requested. + */ + if (p_mr_create->access_ctrl & (IB_AC_RDMA_WRITE | IB_AC_RDMA_READ) && + !(p_mr_create->access_ctrl & IB_AC_LOCAL_WRITE)) { + status = IB_INVALID_PARAMETER; + goto err_invalid_access; + } +#ifdef WIN_TO_BE_REMOVED // lock buffer for user if (um_call) { err = iobuf_register( @@ -94,7 +98,7 @@ mlnx_register_mr ( (int)p_mr_create->access_ctrl, &iobuf ); if (err) { - HCA_TRACE (CL_DBG_ERROR, ("iobuf_register failed(%d) \n",err)); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("iobuf_register failed(%d)",err)); status = errno_to_iberr(err); goto err_lock; } @@ -102,54 +106,61 @@ mlnx_register_mr ( // prepare parameters RtlZeroMemory(®ion, sizeof(region)); - RtlZeroMemory(&udata, sizeof(udata)); + RtlZeroMemory(&umv_buf, sizeof(umv_buf)); region.user_base = (u64)p_mr_create->vaddr; region.virt_base = (u64)p_mr_create->vaddr; region.page_size = PAGE_SIZE; region.length = p_mr_create->length; + region.offset = p_mr_create->vaddr & (PAGE_SIZE - 1); //TODO: end filling region (add list of chunks) - //TODO: fill udata - + //TODO: fill umv_buf +#endif + // register mr - mr_p = mthca_reg_user_mr(ib_pd_p, ®ion, - convert_access(p_mr_create->access_ctrl), &udata); + mr_p = ibv_reg_mr(ib_pd_p, map_qp_ibal_acl(p_mr_create->access_ctrl), + p_mr_create->vaddr, p_mr_create->length, + (uint64_t)(ULONG_PTR)p_mr_create->vaddr, um_call ); if (IS_ERR(mr_p)) { err = PTR_ERR(mr_p); - CL_TRACE (CL_DBG_ERROR, g_mlnx_dbg_lvl, - ("mthca_reg_user_mr failed (%d)\n", err)); + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_MEMORY, + ("ibv_reg_mr failed (%d)\n", err)); status = errno_to_iberr(err); - goto err_reg_user_mr; + goto err_reg_mr; } // results mro_p = (struct mthca_mr *)mr_p; +#ifdef WIN_TO_BE_REMOVED mro_p->iobuf = iobuf; - if (ph_mr) *ph_mr = (ib_mr_handle_t)mr_p; +#endif *p_lkey = mr_p->lkey; *p_rkey = mr_p->rkey; + if (ph_mr) *ph_mr = (ib_mr_handle_t)mr_p; status = IB_SUCCESS; -err_reg_user_mr: +err_reg_mr: +#ifdef WIN_TO_BE_REMOVED if (um_call) iobuf_deregister(&iobuf ); err_lock: +#endif +err_invalid_access: err_invalid_parm: err_unsupported: - HCA_TRACE_ERR(CL_DBG_ERROR, ("completes with ERROR status %s\n", ib_get_err_str(status))); - HCA_EXIT(MLNX_DBG_TRACE); + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY ,("completes with ERROR status %s\n", ib_get_err_str(status))); return status; #else - u_int32_t hca_idx = PD_HCA_FROM_HNDL(h_pd); - u_int32_t pd_idx = PD_NUM_FROM_HNDL(h_pd); + uint32_t hca_idx = PD_HCA_FROM_HNDL(h_pd); + uint32_t pd_idx = PD_NUM_FROM_HNDL(h_pd); mlnx_hobul_t *hobul_p; ib_api_status_t status; HH_mr_t mr_props; mlnx_mro_t *mro_p = NULL; - u_int32_t lkey=0, rkey=0; + uint32_t lkey=0, rkey=0; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_MEMORY); if( !cl_is_blockable() ) return IB_UNSUPPORTED; @@ -203,7 +214,7 @@ err_unsupported: mro_p->mr_lkey = lkey; if (ph_mr) *ph_mr = (ib_mr_handle_t)mro_p; - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_MEMORY); return IB_SUCCESS; cleanup_post_lock: @@ -214,10 +225,9 @@ cleanup: mro_p->mark = E_MARK_INVALID; cl_free( mro_p); } - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("file %s line %d\n", __FILE__, __LINE__)); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mro_p 0x%p mark %d\n", mro_p, (mro_p ? mro_p->mark : 0xBAD))); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY ,("mro_p->mark %d \n",mro_p ? mro_p->mark : 0xBAD))); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY ,("status %d \n",status))); + HCA_EXIT(HCA_DBG_MEMORY); return status; #endif } @@ -235,14 +245,15 @@ mlnx_register_pmr ( #ifndef WIN_TO_BE_CHANGED ib_api_status_t status; int err; - u_int32_t lkey, rkey; + uint32_t lkey, rkey; struct ib_mr *mr_p; struct ib_phys_buf *buffer_list; struct ib_pd *ib_pd_p = (struct ib_pd *)h_pd; + struct ib_device *ib_dev = ib_pd_p->device; UNUSED_PARAM( um_call ); - HCA_ENTER(MLNX_DBG_TRACE); + HCA_ENTER(HCA_DBG_SHIM); // sanity checks if( !cl_is_blockable() ) { @@ -261,13 +272,14 @@ mlnx_register_pmr ( //NB: p_pmr_create->hca_page_size is not used, i.e. supposed it is always the same // register pmr - mr_p = mthca_reg_phys_mr(ib_pd_p, - buffer_list, p_pmr_create->num_ranges, - convert_access(p_pmr_create->access_ctrl), - p_vaddr ); + if (p_pmr_create->length == (uint64_t)-1LL) + mr_p = ibv_get_dma_mr(ib_pd_p, map_qp_ibal_acl(p_pmr_create->access_ctrl) ); + else + mr_p = ibv_reg_phys_mr(ib_pd_p, buffer_list, p_pmr_create->num_ranges, + map_qp_ibal_acl(p_pmr_create->access_ctrl), p_vaddr ); if (IS_ERR(mr_p)) { err = PTR_ERR(mr_p); - CL_TRACE (CL_DBG_ERROR, g_mlnx_dbg_lvl, + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_MEMORY, ("mthca_reg_phys_mr failed (%d)\n", err)); status = errno_to_iberr(err); goto err_reg_phys_mr; @@ -283,23 +295,22 @@ mlnx_register_pmr ( err_reg_phys_mr: err_invalid_parm: err_unsupported: - HCA_TRACE_ERR(CL_DBG_ERROR, ("completes with ERROR status %s\n", ib_get_err_str(status))); - HCA_EXIT(MLNX_DBG_TRACE); + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY ,("completes with ERROR status %s\n", ib_get_err_str(status))); return status; #else - u_int32_t hca_idx = PD_HCA_FROM_HNDL(h_pd); - u_int32_t pd_idx = PD_NUM_FROM_HNDL(h_pd); + uint32_t hca_idx = PD_HCA_FROM_HNDL(h_pd); + uint32_t pd_idx = PD_NUM_FROM_HNDL(h_pd); mlnx_hobul_t *hobul_p; ib_api_status_t status; HH_mr_t mr_props = { 0 }; mlnx_mro_t *mro_p = NULL; - u_int32_t lkey, rkey; + uint32_t lkey, rkey; UNUSED_PARAM( um_call ); - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_MEMORY); if( !cl_is_blockable() ) return IB_UNSUPPORTED; @@ -363,9 +374,9 @@ err_unsupported: if (ph_mr) *ph_mr = (ib_mr_handle_t)mro_p; *p_vaddr = mro_p->mr_start; // return the updated address - CL_TRACE(MLNX_DBG_MEM, g_mlnx_dbg_lvl, ("file %s line %d\n", __FILE__, __LINE__)); - CL_TRACE(MLNX_DBG_MEM, g_mlnx_dbg_lvl, ("mro_p 0x%p mark %d\n", mro_p, (mro_p ? mro_p->mark : 0xBAD))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("mro_p->mark %d\n",mro_p ? mro_p->mark : 0xBAD))); + HCA_EXIT( HCA_DBG_MEMORY); return IB_SUCCESS; cleanup: @@ -379,8 +390,8 @@ cleanup: mro_p->mark = E_MARK_INVALID; cl_free( mro_p); } - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY ,("status %d\n",status))); + HCA_EXIT( HCA_DBG_MEMORY); return status; #endif } @@ -393,18 +404,18 @@ mlnx_query_mr ( #ifndef WIN_TO_BE_CHANGED UNREFERENCED_PARAMETER(h_mr); UNREFERENCED_PARAMETER(p_mr_query); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_query_mr not implemented\n")); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY ,("mlnx_query_mr not implemented\n")); return IB_UNSUPPORTED; #else - u_int32_t hca_idx; - u_int32_t pd_idx; + uint32_t hca_idx; + uint32_t pd_idx; mlnx_hobul_t *hobul_p; ib_api_status_t status = IB_SUCCESS; HH_mr_info_t mr_info; mlnx_mro_t *mro_p = NULL; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_MEMORY); if( !cl_is_blockable() ) return IB_UNSUPPORTED; @@ -429,8 +440,8 @@ mlnx_query_mr ( status = IB_INVALID_PD_HANDLE; goto cleanup; } - CL_TRACE(MLNX_DBG_MEM, g_mlnx_dbg_lvl, ("file %s line %d\n", __FILE__, __LINE__)); - CL_TRACE(MLNX_DBG_MEM, g_mlnx_dbg_lvl, ("mro_p 0x%p mark %d\n", mro_p, (mro_p ? mro_p->mark : 0xBAD))); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_MEMORY,"file %s line %d\n", __FILE__, __LINE__) + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("mro_p->mark %d",mro_p ? mro_p->mark : 0xBAD))); if (HH_OK != THH_hob_query_mr(hobul_p->hh_hndl, mro_p->mr_lkey, &mr_info)) { status = IB_ERROR; @@ -439,12 +450,12 @@ mlnx_query_mr ( mlnx_conv_vapi_mr_attr((ib_pd_handle_t)PD_HNDL_FROM_PD(pd_idx), &mr_info, p_mr_query); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_MEMORY); return IB_SUCCESS; cleanup: - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY ,("status %d \n",status))); + HCA_EXIT( HCA_DBG_MEMORY); return status; #endif } @@ -468,20 +479,20 @@ mlnx_modify_mr ( UNREFERENCED_PARAMETER(p_rkey); UNREFERENCED_PARAMETER(h_pd); UNREFERENCED_PARAMETER(um_call); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_modify_mr not implemented\n")); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY ,("mlnx_modify_mr not implemented\n")); return IB_UNSUPPORTED; #else - u_int32_t hca_idx; - u_int32_t pd_idx, old_pd_idx; + uint32_t hca_idx; + uint32_t pd_idx, old_pd_idx; mlnx_hobul_t *hobul_p; ib_api_status_t status; VAPI_mr_change_t change_mask; HH_mr_t mr_props; mlnx_mro_t *mro_p = NULL; - u_int32_t lkey, rkey; + uint32_t lkey, rkey; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_MEMORY); if( !cl_is_blockable() ) return IB_UNSUPPORTED; @@ -525,8 +536,7 @@ mlnx_modify_mr ( goto cleanup; } - CL_TRACE(MLNX_DBG_MEM, g_mlnx_dbg_lvl, ("file %s line %d\n", __FILE__, __LINE__)); - CL_TRACE(MLNX_DBG_MEM, g_mlnx_dbg_lvl, ("mro_p 0x%p mark %d\n", mro_p, (mro_p ? mro_p->mark : 0xBAD))); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("mro_p->mark %d\n", mro_p ? mro_p->mark : 0xBAD))); // change_mask = mem_modify_req; change_mask = 0; @@ -566,12 +576,10 @@ mlnx_modify_mr ( if (p_lkey) *p_lkey = lkey; if (p_rkey) *p_rkey = rkey; - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); - return IB_SUCCESS; + status = IB_SUCCESS; cleanup: - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY ,("status %d \n", status))); return status; #endif } @@ -597,22 +605,22 @@ mlnx_modify_pmr ( UNREFERENCED_PARAMETER(p_rkey); UNREFERENCED_PARAMETER(h_pd); UNREFERENCED_PARAMETER(um_call); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_modify_pmr not implemented\n")); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY ,("mlnx_modify_pmr not implemented\n")); return IB_UNSUPPORTED; #else - u_int32_t hca_idx; - u_int32_t pd_idx; + uint32_t hca_idx; + uint32_t pd_idx; mlnx_hobul_t *hobul_p; ib_api_status_t status; VAPI_mr_change_t change_mask; HH_mr_t mr_props = { 0 }; mlnx_mro_t *mro_p = NULL; - u_int32_t lkey, rkey; + uint32_t lkey, rkey; UNUSED_PARAM( um_call ); - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_MEMORY); if( !cl_is_blockable() ) return IB_UNSUPPORTED; @@ -684,8 +692,8 @@ mlnx_modify_pmr ( if (p_lkey) *p_lkey = lkey; if (p_rkey) *p_rkey = rkey; - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); return IB_SUCCESS; + goto end; cleanup: if( mr_props.tpt.tpt.buf_lst.buf_sz_lst ) @@ -693,9 +701,8 @@ cleanup: if( mr_props.tpt.tpt.buf_lst.phys_buf_lst ) cl_free( mr_props.tpt.tpt.buf_lst.phys_buf_lst ); - - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); +end: + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY ,("status %d \n", status))); return status; #endif } @@ -720,20 +727,20 @@ mlnx_register_smr ( UNREFERENCED_PARAMETER(p_rkey); UNREFERENCED_PARAMETER(ph_mr); UNREFERENCED_PARAMETER(um_call); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_register_smr not implemented\n")); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY ,("mlnx_register_smr not implemented\n")); return IB_UNSUPPORTED; #else - u_int32_t hca_idx = PD_HCA_FROM_HNDL(h_pd); - u_int32_t pd_idx = PD_NUM_FROM_HNDL(h_pd); + uint32_t hca_idx = PD_HCA_FROM_HNDL(h_pd); + uint32_t pd_idx = PD_NUM_FROM_HNDL(h_pd); mlnx_hobul_t *hobul_p; ib_api_status_t status; HH_smr_t smr_props; mlnx_mro_t *base_mro_p = NULL; mlnx_mro_t *new_mro_p = NULL; - u_int32_t lkey, rkey; + uint32_t lkey, rkey; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_MEMORY); if( !cl_is_blockable() ) return IB_UNSUPPORTED; @@ -767,7 +774,7 @@ mlnx_register_smr ( smr_props.start = *p_vaddr; // PD handle for new memory region smr_props.pd = PD_NUM_FROM_HNDL(base_mro_p->mr_pd_handle); - smr_props.acl = map_ibal_acl(access_ctrl); // Access control (R/W permission local/remote + smr_props.acl = map_qp_ibal_acl(access_ctrl); // Access control (R/W permission local/remote // Allocate new handle for shared region if (NULL == (new_mro_p = cl_zalloc( sizeof(mlnx_mro_t)))) { @@ -808,9 +815,9 @@ mlnx_register_smr ( *ph_mr = (ib_mr_handle_t)new_mro_p; -// CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("new_mro_p 0x%p page 0x%x, %d\n", +// HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_MEMORY ,HCA_DBG_SHIM ,("new_mro_p 0x%p page 0x%x, %d\n", // new_mro_p, new_mro_p->mr_first_page_addr, new_mro_p->mr_num_pages)); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_MEMORY); return IB_SUCCESS; cleanup: @@ -818,8 +825,8 @@ cleanup: new_mro_p->mark = E_MARK_INVALID; cl_free( new_mro_p); } - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY ,("status %d\n", status))); + HCA_EXIT( HCA_DBG_MEMORY); return status; #endif } @@ -832,7 +839,7 @@ mlnx_deregister_mr ( ib_api_status_t status; int err; - HCA_ENTER(MLNX_DBG_TRACE); + HCA_ENTER(HCA_DBG_SHIM); // sanity checks if( !cl_is_blockable() ) { @@ -840,46 +847,46 @@ mlnx_deregister_mr ( goto err_unsupported; } - // deregister - err = mthca_dereg_mr((struct ib_mr *)h_mr); - if (err) { - status = errno_to_iberr(err); - HCA_TRACE (CL_DBG_ERROR, - ("mthca_dereg_mr failed (%d) for mr %p\n", err, h_mr)); - goto err_dereg_mr; - } - +#ifdef WIN_TO_BE_REMOVED // unlock user buffer { struct mthca_mr *mro_p = (struct mthca_mr *)h_mr; - if (mro_p->iobuf.is_user) + if (mro_p->ibmr.uobject) iobuf_deregister( &mro_p->iobuf ); } +#endif + + // deregister + err = ibv_dereg_mr((struct ib_mr *)h_mr); + if (err) { + status = errno_to_iberr(err); + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_MEMORY, + ("mthca_dereg_mr failed (%d)", status)); + goto err_dereg_mr; + } status = IB_SUCCESS; err_dereg_mr: err_unsupported: - HCA_TRACE_ERR(CL_DBG_ERROR, ("completes with ERROR status %s\n", ib_get_err_str(status))); - HCA_EXIT(MLNX_DBG_TRACE); + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY ,("completes with ERROR status %s\n", ib_get_err_str(status))); return status; #else mlnx_mro_t *mro_p = NULL; - u_int32_t hca_idx; - u_int32_t pd_idx; + uint32_t hca_idx; + uint32_t pd_idx; mlnx_hobul_t *hobul_p; ib_api_status_t status; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_MEMORY); if( !cl_is_blockable() ) return IB_UNSUPPORTED; mro_p = (mlnx_mro_t *)h_mr; if (!mro_p || mro_p->mark != E_MARK_MR) { - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("file %s line %d\n", __FILE__, __LINE__)); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mro_p 0x%p mark %d\n", mro_p, (mro_p ? mro_p->mark : 0xBAD))); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY ,("mro_p->mark %d\n", mro_p ? mro_p->mark : 0xBAD))); status = IB_INVALID_MR_HANDLE; goto cleanup; } @@ -907,13 +914,13 @@ err_unsupported: // update PD object count cl_atomic_dec(&hobul_p->pd_info_tbl[pd_idx].count); - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("pd %d count %d\n", pd_idx, hobul_p->pd_info_tbl[pd_idx].count)); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_MEMORY,"pd %d count %d\n", pd_idx, hobul_p->pd_info_tbl[pd_idx].count) if (mro_p) { mro_p->mark = E_MARK_INVALID; cl_free( mro_p); } - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_MEMORY); return IB_SUCCESS; cleanup: @@ -921,8 +928,8 @@ cleanup: mro_p->mark = E_MARK_INVALID; cl_free( mro_p); } - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY ,("stauts %d\n", status))); + HCA_EXIT( HCA_DBG_MEMORY); return status; #endif } @@ -943,16 +950,16 @@ mlnx_create_mw ( UNREFERENCED_PARAMETER(p_rkey); UNREFERENCED_PARAMETER(ph_mw); UNREFERENCED_PARAMETER(p_umv_buf); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_create_mw not implemented\n")); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY ,("mlnx_create_mw not implemented\n")); return IB_UNSUPPORTED; #else - u_int32_t hca_idx = PD_HCA_FROM_HNDL(h_pd); - u_int32_t pd_idx = PD_NUM_FROM_HNDL(h_pd); + uint32_t hca_idx = PD_HCA_FROM_HNDL(h_pd); + uint32_t pd_idx = PD_NUM_FROM_HNDL(h_pd); mlnx_hobul_t *hobul_p; mlnx_mwo_t *mwo_p = NULL; ib_api_status_t status; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_MEMORY); if( !cl_is_blockable() ) return IB_UNSUPPORTED; @@ -1000,7 +1007,7 @@ mlnx_create_mw ( p_umv_buf->output_size = 0; p_umv_buf->status = IB_SUCCESS; } - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_MEMORY); return IB_SUCCESS; cleanup: @@ -1010,11 +1017,10 @@ cleanup: } if( p_umv_buf && p_umv_buf->command ) { - p_umv_buf->output_size = 0; p_umv_buf->status = status; } - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY ,("status %d\n", status))); + HCA_EXIT( HCA_DBG_MEMORY); return status; #endif } @@ -1031,17 +1037,17 @@ mlnx_query_mw ( UNREFERENCED_PARAMETER(ph_pd); UNREFERENCED_PARAMETER(p_rkey); UNREFERENCED_PARAMETER(p_umv_buf); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_query_mw not implemented\n")); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY ,("mlnx_query_mw not implemented\n")); return IB_UNSUPPORTED; #else mlnx_mwo_t *mwo_p = NULL; - u_int32_t hca_idx; - u_int32_t pd_idx; + uint32_t hca_idx; + uint32_t pd_idx; mlnx_hobul_t *hobul_p; ib_api_status_t status; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_MEMORY); if( !cl_is_blockable() ) return IB_UNSUPPORTED; @@ -1075,17 +1081,16 @@ mlnx_query_mw ( p_umv_buf->output_size = 0; p_umv_buf->status = IB_SUCCESS; } - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_MEMORY); return IB_SUCCESS; cleanup: if( p_umv_buf && p_umv_buf->command ) { - p_umv_buf->output_size = 0; p_umv_buf->status = status; } - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY ,("status %d\n", status))); + HCA_EXIT( HCA_DBG_MEMORY); return status; #endif } @@ -1096,17 +1101,17 @@ mlnx_destroy_mw ( { #ifndef WIN_TO_BE_CHANGED UNREFERENCED_PARAMETER(h_mw); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_destroy_mw not implemented\n")); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY ,("mlnx_destroy_mw not implemented\n")); return IB_UNSUPPORTED; #else mlnx_mwo_t *mwo_p = NULL; - u_int32_t hca_idx; - u_int32_t pd_idx; + uint32_t hca_idx; + uint32_t pd_idx; mlnx_hobul_t *hobul_p; ib_api_status_t status; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_MEMORY); if( !cl_is_blockable() ) return IB_UNSUPPORTED; @@ -1145,7 +1150,7 @@ mlnx_destroy_mw ( mwo_p->mark = E_MARK_INVALID; cl_free( mwo_p); } - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_MEMORY); return IB_SUCCESS; cleanup: @@ -1153,8 +1158,8 @@ cleanup: mwo_p->mark = E_MARK_INVALID; cl_free( mwo_p); } - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY ,("status %d\n",status))); + HCA_EXIT( HCA_DBG_MEMORY); return status; #endif } diff --git a/branches/MTHCA/hw/mthca/kernel/hca_pci.c b/branches/MTHCA/hw/mthca/kernel/hca_pci.c index 0e308e4f..739de367 100644 --- a/branches/MTHCA/hw/mthca/kernel/hca_pci.c +++ b/branches/MTHCA/hw/mthca/kernel/hca_pci.c @@ -1,5 +1,11 @@ #include "hca_driver.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "hca_pci.tmh" +#endif #include #include #include @@ -7,7 +13,6 @@ #include #endif - #define HCA_RESET_HCR_OFFSET 0x000F0010 #define HCA_RESET_TOKEN CL_HTON32(0x00000001) @@ -15,6 +20,12 @@ #define PCI_CAPABILITY_ID_PCIX 0x07 #define PCI_CAPABILITY_ID_PCIEXP 0x10 +boolean_t +FindBridgeIf( + IN hca_dev_ext_t *pi_ext, + IN PBUS_INTERFACE_STANDARD pi_pInterface + ); + /* * Vital Product Data Capability @@ -97,7 +108,6 @@ __restore_pci_config( #pragma alloc_text (PAGE, __restore_pci_config) #endif - /* Forwards the request to the HCA's PDO. */ static NTSTATUS __get_bus_ifc( @@ -126,8 +136,8 @@ __get_bus_ifc( if( !pIrp ) { ObDereferenceObject( pDev ); - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("IoBuildSynchronousFsdRequest failed.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR,HCA_DBG_PNP, + ("IoBuildSynchronousFsdRequest failed.\n")); return STATUS_INSUFFICIENT_RESOURCES; } @@ -183,7 +193,7 @@ __save_pci_config( pBusIfc->Context, PCI_WHICHSPACE_CONFIG, &pBuf[0], 0, 88 ); if( len != 88 ) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, ("Failed to read HCA config.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR , HCA_DBG_PNP ,("Failed to read HCA config.\n")); return STATUS_DEVICE_NOT_READY; } @@ -192,7 +202,7 @@ __save_pci_config( pBusIfc->Context, PCI_WHICHSPACE_CONFIG, &pBuf[24], 96, 160 ); if( len != 160 ) { - HCA_TRACE( HCA_DBG_ERROR, ("Failed to read HCA config.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP ,("Failed to read HCA config.\n")); return STATUS_DEVICE_NOT_READY; } @@ -305,7 +315,7 @@ __restore_pci_config( len = PCI_CONFIG_WRITE( VendorID, DeviceID ); if( len != PCI_CONFIG_LEN( VendorID, DeviceID ) ) { - HCA_TRACE( HCA_DBG_ERROR, ("Failed to write vendor/device IDs.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP ,("Failed to write vendor/device IDs.\n")); return STATUS_DEVICE_NOT_READY; } @@ -318,7 +328,7 @@ __restore_pci_config( len = PCI_CONFIG_WRITE( Status, u.type0.MaximumLatency ); if( len != PCI_CONFIG_LEN( Status, u.type0.MaximumLatency ) ) { - HCA_TRACE( HCA_DBG_ERROR, ("Failed to write type 0 common header.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP ,("Failed to write type 0 common header.\n")); return STATUS_DEVICE_NOT_READY; } } @@ -328,7 +338,7 @@ __restore_pci_config( len = PCI_CONFIG_WRITE( Status, u.type1.InterruptPin ); if( len != PCI_CONFIG_LEN( Status, u.type1.InterruptPin ) ) { - HCA_TRACE( HCA_DBG_ERROR, ("Failed to write type 1 common header.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP ,("Failed to write type 1 common header.\n")); return STATUS_DEVICE_NOT_READY; } } @@ -338,7 +348,7 @@ __restore_pci_config( pConfig->DeviceSpecific, PCI_CONFIG_OFFSET( DeviceSpecific ), 192 ); if( len != 192 ) { - HCA_TRACE( HCA_DBG_ERROR, ("Failed to write capabilites.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP ,("Failed to write capabilites.\n")); return STATUS_DEVICE_NOT_READY; } @@ -346,7 +356,7 @@ __restore_pci_config( len = PCI_CONFIG_WRITE( Command, Command ); if( len != PCI_CONFIG_LEN( Command, Command ) ) { - HCA_TRACE( HCA_DBG_ERROR, ("Failed to write command register.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Failed to write command register.\n")); return STATUS_DEVICE_NOT_READY; } @@ -358,8 +368,8 @@ __restore_pci_config( if( len != PCI_CONFIG_LEN( u.type1.BridgeControl, u.type1.BridgeControl ) ) { - HCA_TRACE( HCA_DBG_ERROR, - ("Failed to write bridge control register.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Failed to write bridge control register.\n")); return STATUS_DEVICE_NOT_READY; } } @@ -370,168 +380,177 @@ __restore_pci_config( NTSTATUS -hca_reset( - IN DEVICE_OBJECT* const pDevObj ) +hca_reset( DEVICE_OBJECT* const pDevObj, int is_tavor ) { - NTSTATUS status; + NTSTATUS status = STATUS_SUCCESS; PCI_COMMON_CONFIG hcaConfig, brConfig; BUS_INTERFACE_STANDARD brBusIfc, hcaBusIfc; - hca_dev_ext_t *pExt; + hca_dev_ext_t *pExt = (hca_dev_ext_t*)pDevObj->DeviceExtension; ULONG data, i; PULONG reset_p; PHYSICAL_ADDRESS pa; + static int skip = 1; HCA_ENTER( HCA_DBG_PNP ); + if (skip) goto resetErr1; /* Get the HCA's bus interface. */ status = __get_bus_ifc( pDevObj, &GUID_BUS_INTERFACE_STANDARD, &hcaBusIfc ); if( !NT_SUCCESS( status ) ) { - HCA_TRACE( HCA_DBG_ERROR, ("Failed to get HCA bus interface.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Failed to get HCA bus interface.\n")); goto resetErr1; } -#ifdef WIN_TO_BE_CHANGED - //leo: not supported this way - /* Get the HCA Bridge's bus interface. */ - status = __get_bus_ifc( pDevObj, &GUID_HCA_BRIDGE_INTERFACE, &brBusIfc ); - if( !NT_SUCCESS( status ) ) - { - HCA_TRACE( HCA_DBG_ERROR, - ("Failed to get HCA bridge bus interface.\n") ); - goto resetErr2; + if (is_tavor) { +#if 0 + /* Get the HCA Bridge's bus interface. */ + status = __get_bus_ifc( pDevObj, &GUID_HCA_BRIDGE_INTERFACE, &brBusIfc ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Failed to get HCA bridge bus interface.\n")); + goto resetErr2; + } +#else + if (!FindBridgeIf( pExt, &brBusIfc )) + goto resetErr2; +#endif } -#endif /* Save the HCA's configuration. */ status = __save_pci_config( &hcaBusIfc, &hcaConfig ); if( !NT_SUCCESS( status ) ) { - HCA_TRACE( HCA_DBG_ERROR, - ("Failed to save HCA config.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Failed to save HCA config.\n")); goto resetErr3; } -#ifdef WIN_TO_BE_CHANGED - //leo: not supported this way - /* Save the HCA bridge's configuration. */ - status = __save_pci_config( &brBusIfc, &brConfig ); - if( !NT_SUCCESS( status ) ) - { - HCA_TRACE( HCA_DBG_ERROR, - ("Failed to save bridge config.\n") ); - goto resetErr3; + if (is_tavor) { + /* Save the HCA bridge's configuration. */ + status = __save_pci_config( &brBusIfc, &brConfig ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Failed to save bridge config.\n")); + goto resetErr3; + } } -#endif - + /* map reset register */ - pExt = (hca_dev_ext_t*)pDevObj->DeviceExtension; pa.QuadPart = pExt->bar[HCA_BAR_TYPE_HCR].phys + HCA_RESET_HCR_OFFSET; + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_PNP ,("Mapping reset register with address 0x%I64x\n", pa.QuadPart)); reset_p = MmMapIoSpace( pa, 4, MmNonCached ); if( !reset_p ) { - HCA_TRACE( HCA_DBG_ERROR, ("Failed to map reset register with address 0x%I64x\n", pa.QuadPart) ); + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP ,("Failed to map reset register with address 0x%I64x\n", pa.QuadPart)); status = STATUS_UNSUCCESSFUL; goto resetErr3; } /* Issue the reset. */ + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_PNP ,("Resetting the chip ...\n")); WRITE_REGISTER_ULONG( reset_p, HCA_RESET_TOKEN ); /* Wait a second. */ cl_thread_suspend( 1000 ); /* unmap the reset register */ + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_PNP ,("Unmapping reset register \n")); MmUnmapIoSpace( reset_p, 4 ); -#ifdef WIN_TO_BE_CHANGED - //leo: not supported this way - /* - * Now read the bridge's configuration register until it doesn't - * return 0xFFFFFFFF. Give it 10 seconds for good measure. - */ - for( i = 0; i < 10; i++ ) - { - if( brBusIfc.GetBusData( brBusIfc.Context, PCI_WHICHSPACE_CONFIG, - &data, 0, sizeof(ULONG) ) != sizeof(ULONG) ) + if (is_tavor) { + /* + * Now read the bridge's configuration register until it doesn't + * return 0xFFFFFFFF. Give it 10 seconds for good measure. + */ + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_PNP ,("Read the Bridge's configuration register \n")); + for( i = 0; i < 10; i++ ) + { + if( brBusIfc.GetBusData( brBusIfc.Context, PCI_WHICHSPACE_CONFIG, + &data, 0, sizeof(ULONG) ) != sizeof(ULONG) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Failed to read bridge configuration data.\n")); + status = STATUS_UNSUCCESSFUL; + goto resetErr3; + } + /* See if we got valid data. */ + if( data != 0xFFFFFFFF ) + break; + + cl_thread_suspend( 1000 ); + } + if( i == 10 ) { - HCA_TRACE( HCA_DBG_ERROR, - ("Failed to read bridge configuration data.\n") ); + /* Darn, timed out. :( */ + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Doh! HCA Bridge never came back from reset!\n")); status = STATUS_UNSUCCESSFUL; goto resetErr3; } - /* See if we got valid data. */ - if( data != 0xFFFFFFFF ) - break; - - cl_thread_suspend( 1000 ); } - if( i == 10 ) - { - /* Darn, timed out. :( */ - HCA_TRACE( HCA_DBG_ERROR, - ("Doh! HCA Bridge never came back from reset!\n") ); - status = STATUS_UNSUCCESSFUL; - goto resetErr3; - } -#else - /* //TODO: ??? can we read HCA (and not bridge) for Tavor ???? - * Now read the HCA's configuration register until it doesn't - * return 0xFFFFFFFF. Give it 10 seconds for good measure. - */ - for( i = 0; i < 100; i++ ) - { - if( hcaBusIfc.GetBusData( hcaBusIfc.Context, PCI_WHICHSPACE_CONFIG, - &data, 0, sizeof(ULONG) ) != sizeof(ULONG) ) + + else { + /* + * Now read the HCA's configuration register until it doesn't + * return 0xFFFFFFFF. Give it 10 seconds for good measure. + */ + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_PNP ,("Read the HCA's configuration register \n")); + for( i = 0; i < 100; i++ ) + { + if( hcaBusIfc.GetBusData( hcaBusIfc.Context, PCI_WHICHSPACE_CONFIG, + &data, 0, sizeof(ULONG) ) != sizeof(ULONG) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Failed to read HCA configuration data.\n")); + status = STATUS_UNSUCCESSFUL; + goto resetErr3; + } + /* See if we got valid data. */ + if( data != 0xFFFFFFFF ) + break; + + cl_thread_suspend( 100 ); + } + if( i >= 100 ) { - HCA_TRACE( HCA_DBG_ERROR, - ("Failed to read HCA configuration data.\n") ); + /* Darn, timed out. :( */ + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Doh! HCA Bridge never came back from reset!\n")); status = STATUS_UNSUCCESSFUL; goto resetErr3; } - /* See if we got valid data. */ - if( data != 0xFFFFFFFF ) - break; - - cl_thread_suspend( 100 ); - } - if( i >= 100 ) - { - /* Darn, timed out. :( */ - HCA_TRACE( HCA_DBG_ERROR, - ("Doh! HCA Bridge never came back from reset!\n") ); - status = STATUS_UNSUCCESSFUL; - goto resetErr3; } -#endif - -#ifdef WIN_TO_BE_CHANGED - /* Restore the HCA's bridge configuration. */ - status = __restore_pci_config( &brBusIfc, &brConfig ); - if( !NT_SUCCESS( status ) ) - { - HCA_TRACE( HCA_DBG_ERROR, - ("Failed to restore bridge config.\n") ); - goto resetErr3; + if (is_tavor) { + /* Restore the HCA's bridge configuration. */ + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_PNP ,("Restoring bridge PCI configuration \n")); + status = __restore_pci_config( &brBusIfc, &brConfig ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Failed to restore bridge config.\n")); + goto resetErr3; + } } -#endif - + /* Restore the HCA's configuration. */ + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_PNP ,("Restoring HCA PCI configuration \n")); status = __restore_pci_config( &hcaBusIfc, &hcaConfig ); if( !NT_SUCCESS( status ) ) { - HCA_TRACE( HCA_DBG_ERROR, - ("Failed to restore HCA config.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Failed to restore HCA config.\n")); } resetErr3: -#ifdef WIN_TO_BE_CHANGED - brBusIfc.InterfaceDereference( brBusIfc.Context ); + if (is_tavor) + brBusIfc.InterfaceDereference( brBusIfc.Context ); resetErr2: -#endif hcaBusIfc.InterfaceDereference( hcaBusIfc.Context ); resetErr1: @@ -613,7 +632,7 @@ hca_tune_pci( status = __get_bus_ifc( pDevObj, &GUID_BUS_INTERFACE_STANDARD, &hcaBusIfc ); if( !NT_SUCCESS( status ) ) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, ("Failed to get HCA bus interface.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP ,("Failed to get HCA bus interface.\n")); return status; } @@ -621,8 +640,8 @@ hca_tune_pci( status = __save_pci_config( &hcaBusIfc, &hcaConfig ); if( !NT_SUCCESS( status ) ) { - HCA_TRACE( HCA_DBG_ERROR, - ("Failed to save HCA config.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Failed to save HCA config.\n")); status = STATUS_UNSUCCESSFUL; goto tweakErr; } @@ -638,8 +657,8 @@ hca_tune_pci( /* Update the command field to max the read byte count if needed. */ if( (pPciXCap->Command & 0x000C) != 0x000C ) { - HCA_TRACE( HCA_DBG_WARN | HCA_DBG_PNP, - ("Updating max recv byte count of PCI-X capability.\n") ); + HCA_PRINT( TRACE_LEVEL_WARNING, HCA_DBG_PNP, + ("Updating max recv byte count of PCI-X capability.\n")); pPciXCap->Command = (pPciXCap->Command & ~PCI_X_CMD_MAX_READ) | (3 << 2); len = hcaBusIfc.SetBusData( hcaBusIfc.Context, PCI_WHICHSPACE_CONFIG, &pPciXCap->Command, @@ -647,8 +666,8 @@ hca_tune_pci( sizeof( pPciXCap->Command ) ); if( len != sizeof( pPciXCap->Command ) ) { - HCA_TRACE( HCA_DBG_ERROR, - ("Failed to update PCI-X maximum read byte count.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Failed to update PCI-X maximum read byte count.\n")); status = STATUS_UNSUCCESSFUL; goto tweakErr; } @@ -665,8 +684,8 @@ hca_tune_pci( pPciExpCap = (PCI_PCIEXP_CAPABILITY*)(((UCHAR*)&hcaConfig) + capOffset); /* Update Max_Read_Request_Size. */ - HCA_TRACE( HCA_DBG_WARN | HCA_DBG_PNP, - ("Updating max recv byte count of PCI-X capability.\n") ); + HCA_PRINT( TRACE_LEVEL_WARNING ,HCA_DBG_PNP, + ("Updating max recv byte count of PCI-X capability.\n")); pPciExpCap->DevControl = (pPciExpCap->DevControl & ~PCI_EXP_DEVCTL_READRQ) | (5 << 12); len = hcaBusIfc.SetBusData( hcaBusIfc.Context, PCI_WHICHSPACE_CONFIG, &pPciExpCap->DevControl, @@ -674,8 +693,8 @@ hca_tune_pci( sizeof( pPciExpCap->DevControl ) ); if( len != sizeof( pPciExpCap->DevControl ) ) { - HCA_TRACE( HCA_DBG_ERROR, - ("Failed to update PCI-Exp maximum read byte count.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Failed to update PCI-Exp maximum read byte count.\n")); goto tweakErr; } } @@ -708,7 +727,7 @@ hca_enable_pci( status = __get_bus_ifc( pDevObj, &GUID_BUS_INTERFACE_STANDARD, &hcaBusIfc ); if( !NT_SUCCESS( status ) ) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, ("Failed to get HCA bus interface.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR , HCA_DBG_PNP ,("Failed to get HCA bus interface.\n")); return STATUS_DEVICE_NOT_READY; } @@ -716,8 +735,8 @@ hca_enable_pci( status = __save_pci_config( &hcaBusIfc, pHcaConfig ); if( !NT_SUCCESS( status ) ) { - HCA_TRACE( HCA_DBG_ERROR, - ("Failed to save HCA config.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Failed to save HCA config.\n")); goto pciErr; } @@ -728,7 +747,7 @@ hca_enable_pci( (PVOID)&pHcaConfig->Command , 4, sizeof(ULONG) ); if( len != sizeof(ULONG) ) { - HCA_TRACE( HCA_DBG_ERROR, ("Failed to write command register.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP ,("Failed to write command register.\n")); status = STATUS_DEVICE_NOT_READY; goto pciErr; } diff --git a/branches/MTHCA/hw/mthca/kernel/hca_pci.h b/branches/MTHCA/hw/mthca/kernel/hca_pci.h index 5c5c3f90..63934149 100644 --- a/branches/MTHCA/hw/mthca/kernel/hca_pci.h +++ b/branches/MTHCA/hw/mthca/kernel/hca_pci.h @@ -4,7 +4,7 @@ NTSTATUS hca_reset( - IN DEVICE_OBJECT* const pDevObj ); + IN DEVICE_OBJECT* const pDevObj, int is_tavor ); NTSTATUS hca_enable_pci( diff --git a/branches/MTHCA/hw/mthca/kernel/hca_pnp.c b/branches/MTHCA/hw/mthca/kernel/hca_pnp.c index a54f7370..216add0c 100644 --- a/branches/MTHCA/hw/mthca/kernel/hca_pnp.c +++ b/branches/MTHCA/hw/mthca/kernel/hca_pnp.c @@ -10,95 +10,103 @@ * Provides the driver entry points for the Tavor VPD. */ +#include "hca_driver.h" +#include "mthca_dev.h" -#include -#include -#ifdef WIN_TO_BE_CHANGED -#include +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "hca_pnp.tmh" #endif #include "mthca.h" +#include +#include + +extern const char *mthca_version; + static NTSTATUS hca_start( - IN DEVICE_OBJECT* const pDevObj, - IN IRP* const pIrp, - OUT cl_irp_action_t* const pAction ); + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); static NTSTATUS hca_query_stop( - IN DEVICE_OBJECT* const pDevObj, - IN IRP* const pIrp, - OUT cl_irp_action_t* const pAction ); + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); static NTSTATUS hca_stop( - IN DEVICE_OBJECT* const pDevObj, - IN IRP* const pIrp, - OUT cl_irp_action_t* const pAction ); + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); static NTSTATUS hca_cancel_stop( - IN DEVICE_OBJECT* const pDevObj, - IN IRP* const pIrp, - OUT cl_irp_action_t* const pAction ); + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); static NTSTATUS hca_query_remove( - IN DEVICE_OBJECT* const pDevObj, - IN IRP* const pIrp, - OUT cl_irp_action_t* const pAction ); + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); static void hca_release_resources( - IN DEVICE_OBJECT* const pDevObj ); + IN DEVICE_OBJECT* const p_dev_obj ); static NTSTATUS hca_cancel_remove( - IN DEVICE_OBJECT* const pDevObj, - IN IRP* const pIrp, - OUT cl_irp_action_t* const pAction ); + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); static NTSTATUS hca_surprise_remove( - IN DEVICE_OBJECT* const pDevObj, - IN IRP* const pIrp, - OUT cl_irp_action_t* const pAction ); + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); static NTSTATUS hca_query_capabilities( - IN DEVICE_OBJECT* const pDevObj, - IN IRP* const pIrp, - OUT cl_irp_action_t* const pAction ); + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); static NTSTATUS hca_query_pnp_state( - IN DEVICE_OBJECT* const pDevObj, - IN IRP* const pIrp, - OUT cl_irp_action_t* const pAction ); + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); static NTSTATUS hca_query_bus_relations( - IN DEVICE_OBJECT* const pDevObj, - IN IRP* const pIrp, - OUT cl_irp_action_t* const pAction ); + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); static NTSTATUS hca_query_removal_relations( - IN DEVICE_OBJECT* const pDevObj, - IN IRP* const pIrp, - OUT cl_irp_action_t* const pAction ); + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); static NTSTATUS hca_query_power( - IN DEVICE_OBJECT* const pDevObj, - IN IRP* const pIrp, - OUT cl_irp_action_t* const pAction ); + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); static NTSTATUS hca_set_power( - IN DEVICE_OBJECT* const pDevObj, - IN IRP* const pIrp, - OUT cl_irp_action_t* const pAction ); + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); static ci_interface_t* __alloc_hca_ifc( @@ -106,11 +114,11 @@ __alloc_hca_ifc( static NTSTATUS __get_ci_interface( - IN DEVICE_OBJECT* const pDevObj ); + IN DEVICE_OBJECT* const p_dev_obj ); static NTSTATUS __hca_register( - IN DEVICE_OBJECT *pDevObj ); + IN DEVICE_OBJECT *p_dev_obj ); static NTSTATUS __pnp_notify_target( @@ -192,55 +200,55 @@ hca_add_device( IN PDEVICE_OBJECT pPdo ) { NTSTATUS status; - DEVICE_OBJECT *pDevObj, *pNextDevObj; + DEVICE_OBJECT *p_dev_obj, *pNextDevObj; hca_dev_ext_t *p_ext; - HCA_ENTER( HCA_DBG_PNP ); + HCA_ENTER(HCA_DBG_PNP); /* * Create the device so that we have a device extension to store stuff in. */ status = IoCreateDevice( pDriverObj, sizeof(hca_dev_ext_t), NULL, FILE_DEVICE_INFINIBAND, FILE_DEVICE_SECURE_OPEN, - FALSE, &pDevObj ); + FALSE, &p_dev_obj ); if( !NT_SUCCESS( status ) ) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("IoCreateDevice returned 0x%08X.\n", status) ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoCreateDevice returned 0x%08X.\n", status)); return status; } - p_ext = (hca_dev_ext_t*)pDevObj->DeviceExtension; + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; cl_memclr( p_ext, sizeof(hca_dev_ext_t) ); /* Attach to the device stack. */ - pNextDevObj = IoAttachDeviceToDeviceStack( pDevObj, pPdo ); + pNextDevObj = IoAttachDeviceToDeviceStack( p_dev_obj, pPdo ); if( !pNextDevObj ) { //cl_event_destroy( &p_ext->mutex ); - IoDeleteDevice( pDevObj ); - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("IoAttachDeviceToDeviceStack failed.\n") ); + IoDeleteDevice( p_dev_obj ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoAttachDeviceToDeviceStack failed.\n")); return STATUS_NO_SUCH_DEVICE; } /* Inititalize the complib extension. */ - cl_init_pnp_po_ext( pDevObj, pNextDevObj, pPdo, g_mlnx_dbg_lvl, + cl_init_pnp_po_ext( p_dev_obj, pNextDevObj, pPdo, 0, &vfptrHcaPnp, NULL ); p_ext->state = HCA_ADDED; - HCA_EXIT( HCA_DBG_PNP ); + HCA_EXIT(HCA_DBG_PNP); return status; } static NTSTATUS __get_ci_interface( - IN DEVICE_OBJECT* const pDevObj ) + IN DEVICE_OBJECT* const p_dev_obj ) { NTSTATUS status; - IRP *pIrp; + IRP *p_irp; hca_dev_ext_t *p_ext; IO_STATUS_BLOCK ioStatus; IO_STACK_LOCATION *pIoStack; @@ -248,22 +256,22 @@ __get_ci_interface( HCA_ENTER( HCA_DBG_PNP ); - p_ext = (hca_dev_ext_t*)pDevObj->DeviceExtension; + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; KeInitializeEvent( &event, NotificationEvent, FALSE ); /* Query for the verbs interface. */ - pIrp = IoBuildSynchronousFsdRequest( IRP_MJ_PNP, p_ext->p_al_dev, + p_irp = IoBuildSynchronousFsdRequest( IRP_MJ_PNP, p_ext->p_al_dev, NULL, 0, NULL, &event, &ioStatus ); - if( !pIrp ) + if( !p_irp ) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("IoBuildSynchronousFsdRequest failed.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoBuildSynchronousFsdRequest failed.\n")); return STATUS_INSUFFICIENT_RESOURCES; } /* Format the IRP. */ - pIoStack = IoGetNextIrpStackLocation( pIrp ); + pIoStack = IoGetNextIrpStackLocation( p_irp ); pIoStack->MinorFunction = IRP_MN_QUERY_INTERFACE; pIoStack->Parameters.QueryInterface.Version = IB_CI_INTERFACE_VERSION; pIoStack->Parameters.QueryInterface.Size = sizeof(ib_ci_ifc_t); @@ -272,10 +280,10 @@ __get_ci_interface( pIoStack->Parameters.QueryInterface.InterfaceSpecificData = NULL; pIoStack->Parameters.QueryInterface.InterfaceType = &GUID_IB_CI_INTERFACE; - pIrp->IoStatus.Status = STATUS_NOT_SUPPORTED; + p_irp->IoStatus.Status = STATUS_NOT_SUPPORTED; /* Send the IRP. */ - status = IoCallDriver( p_ext->p_al_dev, pIrp ); + status = IoCallDriver( p_ext->p_al_dev, p_irp ); if( status == STATUS_PENDING ) { KeWaitForSingleObject( &event, Executive, KernelMode, @@ -286,8 +294,8 @@ __get_ci_interface( if( !NT_SUCCESS( status ) ) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("Query interface for verbs returned %08x.\n", status) ); + HCA_PRINT( TRACE_LEVEL_ERROR,HCA_DBG_PNP, + ("Query interface for verbs returned %08x.\n", status)); return status; } @@ -302,15 +310,15 @@ __pnp_notify_target( IN void *context ) { NTSTATUS status = STATUS_SUCCESS; - DEVICE_OBJECT *pDevObj; + DEVICE_OBJECT *p_dev_obj; hca_dev_ext_t *p_ext; TARGET_DEVICE_REMOVAL_NOTIFICATION *pNotify; HCA_ENTER( HCA_DBG_PNP ); pNotify = (TARGET_DEVICE_REMOVAL_NOTIFICATION*)pNotifyStruct; - pDevObj = (DEVICE_OBJECT*)context; - p_ext = (hca_dev_ext_t*)pDevObj->DeviceExtension; + p_dev_obj = (DEVICE_OBJECT*)context; + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; if( IsEqualGUID( &pNotify->Event, &GUID_TARGET_DEVICE_QUERY_REMOVE ) ) { @@ -365,16 +373,16 @@ __pnp_notify_target( status = IoRegisterPlugPlayNotification( EventCategoryTargetDeviceChange, 0, p_ext->p_al_file_obj, - pDevObj->DriverObject, __pnp_notify_target, pDevObj, + p_dev_obj->DriverObject, __pnp_notify_target, p_dev_obj, &p_ext->pnp_target_entry ); if( !NT_SUCCESS( status ) ) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("IoRegisterPlugPlayNotification returned %08x.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoRegisterPlugPlayNotification returned %08x.\n", status)); return status; } - __hca_register( pDevObj ); + __hca_register( p_dev_obj ); } HCA_EXIT( HCA_DBG_PNP ); @@ -394,9 +402,9 @@ __alloc_hca_ifc( (ci_interface_t*)ExAllocatePool( PagedPool, sizeof(ci_interface_t) ); if( !pIfc ) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, + HCA_PRINT( TRACE_LEVEL_ERROR,HCA_DBG_PNP, ("Failed to allocate ci_interface_t (%d bytes).\n", - sizeof(ci_interface_t)) ); + sizeof(ci_interface_t))); return NULL; } @@ -414,7 +422,7 @@ __alloc_hca_ifc( static NTSTATUS __hca_register( - IN DEVICE_OBJECT *pDevObj ) + IN DEVICE_OBJECT *p_dev_obj ) { hca_dev_ext_t *p_ext; NTSTATUS status; @@ -423,17 +431,17 @@ __hca_register( HCA_ENTER( HCA_DBG_PNP ); - p_ext = (hca_dev_ext_t*)pDevObj->DeviceExtension; + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; ASSERT( p_ext->state == HCA_STARTED ); ASSERT( p_ext->p_al_dev ); /* Get the AL's lower interface. */ - status = __get_ci_interface( pDevObj ); + status = __get_ci_interface( p_dev_obj ); if( !NT_SUCCESS( status ) ) { - HCA_TRACE( HCA_DBG_ERROR, - ("__get_ci_interface returned %08x.\n", status) ); + HCA_PRINT( TRACE_LEVEL_ERROR,HCA_DBG_SHIM, + ("__get_ci_interface returned %08x.\n", status)); return status; } @@ -441,7 +449,7 @@ __hca_register( p_hca_ifc = __alloc_hca_ifc( p_ext ); if( !p_hca_ifc ) { - HCA_TRACE( HCA_DBG_ERROR, ("__alloc_hca_ifc failed.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("__alloc_hca_ifc failed.\n")); return STATUS_NO_MEMORY; } @@ -465,15 +473,15 @@ __pnp_notify_ifc( IN void *context ) { NTSTATUS status; - DEVICE_OBJECT *pDevObj; + DEVICE_OBJECT *p_dev_obj; hca_dev_ext_t *p_ext; DEVICE_INTERFACE_CHANGE_NOTIFICATION *pNotify; HCA_ENTER( HCA_DBG_PNP ); pNotify = (DEVICE_INTERFACE_CHANGE_NOTIFICATION*)pNotifyStruct; - pDevObj = (DEVICE_OBJECT*)context; - p_ext = (hca_dev_ext_t*)pDevObj->DeviceExtension; + p_dev_obj = (DEVICE_OBJECT*)context; + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; if( !IsEqualGUID( &pNotify->Event, &GUID_DEVICE_INTERFACE_ARRIVAL ) ) { @@ -490,7 +498,7 @@ __pnp_notify_ifc( if( p_ext->state != HCA_STARTED ) { - HCA_TRACE( HCA_DBG_ERROR, ("Invalid state: %d\n", p_ext->state) ); + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Invalid state: %d\n", p_ext->state)); return STATUS_SUCCESS; } @@ -498,34 +506,34 @@ __pnp_notify_ifc( ASSERT( !p_ext->p_al_file_obj ); /* Get the AL device object. */ - HCA_TRACE( HCA_DBG_PNP, ("Calling IoGetDeviceObjectPointer.\n") ); + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM ,("Calling IoGetDeviceObjectPointer.\n")); status = IoGetDeviceObjectPointer( pNotify->SymbolicLinkName, FILE_ALL_ACCESS, &p_ext->p_al_file_obj, &p_ext->p_al_dev ); if( !NT_SUCCESS( status ) ) { - HCA_TRACE( HCA_DBG_ERROR, - ("IoGetDeviceObjectPointer returned %08x.\n", status) ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_SHIM, + ("IoGetDeviceObjectPointer returned %08x.\n", status )); return STATUS_SUCCESS; } /* Register for removal notification of the IB Fabric root device. */ - HCA_TRACE( HCA_DBG_PNP, - ("Registering for target notifications.\n") ); + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_PNP, + ("Registering for target notifications.\n")); status = IoRegisterPlugPlayNotification( EventCategoryTargetDeviceChange, 0, p_ext->p_al_file_obj, - pDevObj->DriverObject, __pnp_notify_target, pDevObj, + p_dev_obj->DriverObject, __pnp_notify_target, p_dev_obj, &p_ext->pnp_target_entry ); if( !NT_SUCCESS( status ) ) { ObDereferenceObject( p_ext->p_al_file_obj ); p_ext->p_al_file_obj = NULL; p_ext->p_al_dev = NULL; - HCA_TRACE( HCA_DBG_ERROR, - ("IoRegisterPlugPlayNotification returned %08x.\n", status) ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoRegisterPlugPlayNotification returned %08x.\n", status)); return STATUS_SUCCESS; } - status = __hca_register( pDevObj ); + status = __hca_register( p_dev_obj ); if( !NT_SUCCESS( status ) ) { IoUnregisterPlugPlayNotification( p_ext->pnp_target_entry ); @@ -533,8 +541,8 @@ __pnp_notify_ifc( ObDereferenceObject( p_ext->p_al_file_obj ); p_ext->p_al_file_obj = NULL; p_ext->p_al_dev = NULL; - HCA_TRACE( HCA_DBG_ERROR, - ("__get_ci_interface returned %08x.\n", status) ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_SHIM, + ("__get_ci_interface returned %08x.\n", status)); return STATUS_SUCCESS; } @@ -554,7 +562,7 @@ __pnp_notify_ifc( */ static NTSTATUS __SetupHcaResources( - IN DEVICE_OBJECT* const pDevObj, + IN DEVICE_OBJECT* const p_dev_obj, IN CM_RESOURCE_LIST* const pHcaResList, IN CM_RESOURCE_LIST* const pHostResList ) { @@ -567,11 +575,14 @@ __SetupHcaResources( HCA_ENTER( HCA_DBG_PNP ); - p_ext = (hca_dev_ext_t*)pDevObj->DeviceExtension; + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; ASSERT( pHostResList->List[0].PartialResourceList.Version == 1 ); ASSERT( pHostResList->List[0].PartialResourceList.Revision == 1 ); + // store the bus number for reset of Tavor + p_ext->bus_number = pHostResList->List[0].BusNumber; + for( i = 0; i < pHostResList->List[0].PartialResourceList.Count; i++ ) { pHcaRes = @@ -579,6 +590,7 @@ __SetupHcaResources( pHostRes = &pHostResList->List[0].PartialResourceList.PartialDescriptors[i]; + /* * Save the interrupt information so that we can power the device * up and down. Since the device will lose state when powered down @@ -603,8 +615,8 @@ __SetupHcaResources( if( type == HCA_BAR_TYPE_HCR && (pHcaRes->Flags & CM_RESOURCE_MEMORY_PREFETCHABLE) ) { - HCA_TRACE( HCA_DBG_ERROR, - ("First memory resource is prefetchable - expected HCR.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("First memory resource is prefetchable - expected HCR.\n")); status = STATUS_UNSUCCESSFUL; break; } @@ -617,8 +629,8 @@ __SetupHcaResources( pHostRes->u.Memory.Length, MmNonCached ); if( !p_ext->bar[type].virt ) { - HCA_TRACE( HCA_DBG_ERROR, - ("Failed to map memory resource type %d\n", type) ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Failed to map memory resource type %d\n", type)); status = STATUS_UNSUCCESSFUL; break; } @@ -636,13 +648,13 @@ __SetupHcaResources( else if( type != HCA_BAR_TYPE_MAX ) { - HCA_TRACE( HCA_DBG_ERROR, ("Failed to map all memory resources.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Failed to map all memory resources.\n")); status = STATUS_UNSUCCESSFUL; } if( p_ext->interruptInfo.Type != CmResourceTypeInterrupt ) { - HCA_TRACE( HCA_DBG_ERROR, ("No interrupt resource.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("No interrupt resource.\n")); status = STATUS_UNSUCCESSFUL; } @@ -653,14 +665,14 @@ __SetupHcaResources( static void __UnmapHcaMemoryResources( - IN DEVICE_OBJECT* const pDevObj ) + IN DEVICE_OBJECT* const p_dev_obj ) { hca_dev_ext_t *p_ext; USHORT i; HCA_ENTER( HCA_DBG_PNP ); - p_ext = (hca_dev_ext_t*)pDevObj->DeviceExtension; + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; for( i = 0; i < HCA_BAR_TYPE_MAX; i++ ) { @@ -677,9 +689,9 @@ __UnmapHcaMemoryResources( static NTSTATUS hca_start( - IN DEVICE_OBJECT* const pDevObj, - IN IRP* const pIrp, - OUT cl_irp_action_t* const pAction ) + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) { NTSTATUS status; hca_dev_ext_t *p_ext; @@ -689,18 +701,18 @@ hca_start( HCA_ENTER( HCA_DBG_PNP ); - p_ext = (hca_dev_ext_t*)pDevObj->DeviceExtension; + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; /* Handled on the way up. */ - status = cl_do_sync_pnp( pDevObj, pIrp, pAction ); + status = cl_do_sync_pnp( p_dev_obj, p_irp, p_action ); if( !NT_SUCCESS( status ) ) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("Lower drivers failed IRP_MN_START_DEVICE.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Lower drivers failed IRP_MN_START_DEVICE.\n")); return status; } - pIoStack = IoGetCurrentIrpStackLocation( pIrp ); + pIoStack = IoGetCurrentIrpStackLocation( p_irp ); /* * Walk the resource lists and store the information. The write-only @@ -711,18 +723,18 @@ hca_start( * - UAR space: prefetchable, write only. * - DDR: prefetchable, read/write. */ - status = __SetupHcaResources( pDevObj, + status = __SetupHcaResources( p_dev_obj, pIoStack->Parameters.StartDevice.AllocatedResources, pIoStack->Parameters.StartDevice.AllocatedResourcesTranslated ); if( !NT_SUCCESS( status ) ) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("__ProcessResources returned %08X.\n", status) ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("__ProcessResources returned %08X.\n", status)); return status; } /* save PCI configuration info and enable device */ - hca_enable_pci( pDevObj, &p_ext->hcaConfig ); + hca_enable_pci( p_dev_obj, &p_ext->hcaConfig ); /* * Get the DMA adapter representing the HCA so we can @@ -732,19 +744,19 @@ hca_start( devDesc.Version = DEVICE_DESCRIPTION_VERSION2; devDesc.Master = TRUE; devDesc.ScatterGather = TRUE; + devDesc.Dma32BitAddresses = TRUE; devDesc.Dma64BitAddresses = TRUE; devDesc.InterfaceType = PCIBus; - - //TODO: what about Arbel ? Has it the same limit ? Is it the right place to call IoGetDmaAdapter ? - /* Tavor has a limit of 2GB for data transfer lengths. */ - devDesc.MaximumLength = 0x80000000; + // get the adapter object + // 0x80000000 is a threshold, that's why - 1 + devDesc.MaximumLength = 0x80000000 - 1; p_ext->p_dma_adapter = IoGetDmaAdapter( p_ext->cl_ext.p_pdo, &devDesc, &p_ext->n_map_regs ); if( !p_ext->p_dma_adapter ) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("Failed to get DMA_ADAPTER for HCA.\n") ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Failed to get DMA_ADAPTER for HCA.\n")); return STATUS_INSUFFICIENT_RESOURCES; } @@ -753,8 +765,8 @@ hca_start( if( !NT_SUCCESS( status ) ) { //TODO: no cleanup on error - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("mthca_start returned %08X\n", status) ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("mthca_start returned %08X\n", status)); return status; } @@ -769,8 +781,8 @@ hca_start( int err = mthca_get_dev_info( p_ext->hca.mdev, &p_ext->hca.guid, &p_ext->hca.hw_ver ); if (err) { //TODO: no cleanup on error - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("can't get guid - mthca_query_port() failed (%08X)\n", err) ); + HCA_PRINT( TRACE_LEVEL_ERROR,HCA_DBG_PNP, + ("can't get guid - mthca_query_port()")); return STATUS_INSUFFICIENT_RESOURCES; } } @@ -788,13 +800,13 @@ hca_start( status = IoRegisterPlugPlayNotification( EventCategoryDeviceInterfaceChange, PNPNOTIFY_DEVICE_INTERFACE_INCLUDE_EXISTING_INTERFACES, - (void*)&GUID_IB_CI_INTERFACE, pDevObj->DriverObject, - __pnp_notify_ifc, pDevObj, &p_ext->pnp_ifc_entry ); + (void*)&GUID_IB_CI_INTERFACE, p_dev_obj->DriverObject, + __pnp_notify_ifc, p_dev_obj, &p_ext->pnp_ifc_entry ); if( !NT_SUCCESS( status ) ) { p_ext->state = HCA_ADDED; - HCA_TRACE( HCA_DBG_ERROR, - ("IoRegisterPlugPlayNotification returned %08x.\n", status) ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoRegisterPlugPlayNotification returned %08x.\n", status)); } /* We get started fully powered. */ @@ -802,6 +814,20 @@ hca_start( powerState.DeviceState = PowerDeviceD0; PoSetPowerState ( p_ext->cl_ext.p_self_do, DevicePowerState, powerState ); + { + struct mthca_dev *mdev = p_ext->hca.mdev; + HCA_PRINT_EV(TRACE_LEVEL_INFORMATION ,HCA_DBG_LOW , + ("Ven %d Dev %d Hw %x Fw %d.%d.%d Drv %s (%s) Flg %s%s%s\n", + (unsigned)p_ext->hcaConfig.VendorID, (unsigned)p_ext->hcaConfig.DeviceID, + p_ext->hca.hw_ver, (int) (mdev->fw_ver >> 32), + (int) (mdev->fw_ver >> 16) & 0xffff, (int) (mdev->fw_ver & 0xffff), + DRV_VERSION, DRV_RELDATE, + (mdev->mthca_flags & MTHCA_FLAG_MEMFREE) ? "M:" : "", + (mdev->mthca_flags & MTHCA_FLAG_PCIE) ? "E:" : "", + (mdev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN) ? "H" : "" + )); + } + HCA_EXIT( HCA_DBG_PNP ); return status; } @@ -809,14 +835,14 @@ hca_start( static void hca_release_resources( - IN DEVICE_OBJECT* const pDevObj ) + IN DEVICE_OBJECT* const p_dev_obj ) { hca_dev_ext_t *p_ext; POWER_STATE powerState; HCA_ENTER( HCA_DBG_PNP ); - p_ext = (hca_dev_ext_t*)pDevObj->DeviceExtension; + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; if( p_ext->state == HCA_REGISTERED ) { @@ -847,7 +873,7 @@ hca_release_resources( p_ext->p_dma_adapter->DmaOperations->PutDmaAdapter( p_ext->p_dma_adapter ); //cl_event_destroy( &p_ext->mutex ); - __UnmapHcaMemoryResources( pDevObj ); + __UnmapHcaMemoryResources( p_dev_obj ); /* Notify the power manager that the device is powered down. */ powerState.DeviceState = PowerDeviceD3; @@ -862,30 +888,30 @@ hca_release_resources( static NTSTATUS hca_query_removal_relations( - IN DEVICE_OBJECT* const pDevObj, - IN IRP* const pIrp, - OUT cl_irp_action_t* const pAction ) + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) { NTSTATUS status; hca_dev_ext_t *p_ext; HCA_ENTER( HCA_DBG_PNP ); - p_ext = (hca_dev_ext_t*)pDevObj->DeviceExtension; + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; if( p_ext->state == HCA_REGISTERED ) { - status = p_ext->ci_ifc.get_relations( p_ext->hca.guid, pIrp ); + status = p_ext->ci_ifc.get_relations( p_ext->hca.guid, p_irp ); if( !NT_SUCCESS( status ) ) { - *pAction = IrpComplete; - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("AL get_relations returned %08x.\n", status) ); + *p_action = IrpComplete; + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("AL get_relations returned %08x.\n", status)); return status; } } - *pAction = IrpPassDown; + *p_action = IrpPassDown; HCA_EXIT( HCA_DBG_PNP ); return STATUS_SUCCESS; } @@ -893,116 +919,162 @@ hca_query_removal_relations( static NTSTATUS hca_query_bus_relations( - IN DEVICE_OBJECT* const pDevObj, - IN IRP* const pIrp, - OUT cl_irp_action_t* const pAction ) + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) { - return cl_irp_skip( pDevObj, pIrp, pAction ); +#ifndef WIN_TO_BE_CHANGED + + NTSTATUS status; + DEVICE_RELATIONS *p_rel; + hca_dev_ext_t *p_ext; + + HCA_ENTER( HCA_DBG_PNP ); + + p_ext = p_dev_obj->DeviceExtension; + + //cl_event_wait_on( &p_ext->mutex, EVENT_NO_TIMEOUT, FALSE ); + if( p_ext->state == HCA_REGISTERED ) + { + status = p_ext->ci_ifc.get_relations( p_ext->hca.guid, p_irp ); + if( !NT_SUCCESS( status ) ) + { + //cl_event_signal( &p_ext->mutex ); + *p_action = IrpComplete; + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("AL get_relations returned %08x.\n", status)); + return status; + } + } + else + { + status = cl_alloc_relations( p_irp, 1 ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("cl_alloc_relations returned %08x.\n", status)); + return status; + } + + p_rel = (DEVICE_RELATIONS*)p_irp->IoStatus.Information; + p_rel->Count = 0; + p_rel->Objects[0] = NULL; + } + + //cl_event_signal( &p_ext->mutex ); + + *p_action = IrpPassDown; + HCA_EXIT( HCA_DBG_PNP ); + return STATUS_SUCCESS; + +#else + return cl_irp_skip( p_dev_obj, p_irp, p_action ); //NTSTATUS status; //hca_dev_ext_t *p_ext; //HCA_ENTER( HCA_DBG_PNP ); - //p_ext = pDevObj->DeviceExtension; + //p_ext = p_dev_obj->DeviceExtension; //if( p_ext->state == HCA_REGISTERED ) //{ - // status = p_ext->ci_ifc.get_relations( p_ext->hca.guid, pIrp ); + // status = p_ext->ci_ifc.get_relations( p_ext->hca.guid, p_irp ); // if( !NT_SUCCESS( status ) ) // { - // *pAction = IrpComplete; - // HCA_TRACE_EXIT( HCA_DBG_ERROR, - // ("AL get_relations returned %08x.\n", status) ); + // *p_action = IrpComplete; + // HCA_PRINT( TRACE_LEVEL_ERROR, + // "AL get_relations returned %08x.\n", status); // return status; // } //} - //*pAction = IrpPassDown; + //*p_action = IrpPassDown; //HCA_EXIT( HCA_DBG_PNP ); //return STATUS_SUCCESS; +#endif } static NTSTATUS hca_query_stop( - IN DEVICE_OBJECT* const pDevObj, - IN IRP* const pIrp, - OUT cl_irp_action_t* const pAction ) + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) { /* All kernel clients will get notified through the device hierarchy. */ /* TODO: set a flag to fail creation of any new IB resources. */ - return cl_irp_skip( pDevObj, pIrp, pAction ); + return cl_irp_skip( p_dev_obj, p_irp, p_action ); } static NTSTATUS hca_stop( - IN DEVICE_OBJECT* const pDevObj, - IN IRP* const pIrp, - OUT cl_irp_action_t* const pAction ) + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) { /* * Must disable everything. Complib framework will * call ReleaseResources handler. */ - return cl_irp_skip( pDevObj, pIrp, pAction ); + return cl_irp_skip( p_dev_obj, p_irp, p_action ); } static NTSTATUS hca_cancel_stop( - IN DEVICE_OBJECT* const pDevObj, - IN IRP* const pIrp, - OUT cl_irp_action_t* const pAction ) + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) { /* Handled on the way up. */ - return cl_do_sync_pnp( pDevObj, pIrp, pAction ); + return cl_do_sync_pnp( p_dev_obj, p_irp, p_action ); } static NTSTATUS hca_query_remove( - IN DEVICE_OBJECT* const pDevObj, - IN IRP* const pIrp, - OUT cl_irp_action_t* const pAction ) + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) { /* Query remove always succeeds. */ /* TODO: set a flag to fail creation of any new IB resources. */ - return cl_irp_skip( pDevObj, pIrp, pAction ); + return cl_irp_skip( p_dev_obj, p_irp, p_action ); } static NTSTATUS hca_cancel_remove( - IN DEVICE_OBJECT* const pDevObj, - IN IRP* const pIrp, - OUT cl_irp_action_t* const pAction ) + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) { /* Handled on the way up. */ - return cl_do_sync_pnp( pDevObj, pIrp, pAction ); + return cl_do_sync_pnp( p_dev_obj, p_irp, p_action ); } static NTSTATUS hca_surprise_remove( - IN DEVICE_OBJECT* const pDevObj, - IN IRP* const pIrp, - OUT cl_irp_action_t* const pAction ) + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) { /* * TODO: Set state so that all further requests * automatically succeed/fail as needed. */ - return cl_irp_skip( pDevObj, pIrp, pAction ); + return cl_irp_skip( p_dev_obj, p_irp, p_action ); } static NTSTATUS hca_query_capabilities( - IN DEVICE_OBJECT* const pDevObj, - IN IRP* const pIrp, - OUT cl_irp_action_t* const pAction ) + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) { NTSTATUS status; hca_dev_ext_t *p_ext; @@ -1011,18 +1083,18 @@ hca_query_capabilities( HCA_ENTER( HCA_DBG_PNP ); - p_ext = (hca_dev_ext_t*)pDevObj->DeviceExtension; + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; /* Process on the way up. */ - status = cl_do_sync_pnp( pDevObj, pIrp, pAction ); + status = cl_do_sync_pnp( p_dev_obj, p_irp, p_action ); if( !NT_SUCCESS( status ) ) { - HCA_TRACE_EXIT( HCA_DBG_ERROR, - ("cl_do_sync_pnp returned %08X.\n", status) ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("cl_do_sync_pnp returned %08X.\n", status)); return status; } - pIoStack = IoGetCurrentIrpStackLocation( pIrp ); + pIoStack = IoGetCurrentIrpStackLocation( p_irp ); pCaps = pIoStack->Parameters.DeviceCapabilities.Capabilities; /* @@ -1035,29 +1107,29 @@ hca_query_capabilities( if( pCaps->DeviceD1 ) { - HCA_TRACE( HCA_DBG_WARN | HCA_DBG_PNP, - ("WARNING: Device reports support for DeviceD1 power state.\n") ); + HCA_PRINT( TRACE_LEVEL_WARNING ,HCA_DBG_PNP, + ("WARNING: Device reports support for DeviceD1 power state.\n")); pCaps->DeviceD1 = FALSE; } if( pCaps->DeviceD2 ) { - HCA_TRACE( HCA_DBG_WARN | HCA_DBG_PNP, - ("WARINING: Device reports support for DeviceD2 power state.\n") ); + HCA_PRINT( TRACE_LEVEL_WARNING,HCA_DBG_PNP, + ("WARINING: Device reports support for DeviceD2 power state.\n")); pCaps->DeviceD2 = FALSE; } if( pCaps->SystemWake != PowerSystemUnspecified ) { - HCA_TRACE( HCA_DBG_WARN | HCA_DBG_PNP, - ("WARINING: Device reports support for system wake.\n") ); + HCA_PRINT( TRACE_LEVEL_WARNING ,HCA_DBG_PNP, + ("WARINING: Device reports support for system wake.\n")); pCaps->SystemWake = PowerSystemUnspecified; } if( pCaps->DeviceWake != PowerDeviceUnspecified ) { - HCA_TRACE( HCA_DBG_WARN | HCA_DBG_PNP, - ("WARINING: Device reports support for device wake.\n") ); + HCA_PRINT( TRACE_LEVEL_WARNING, HCA_DBG_PNP, + ("WARINING: Device reports support for device wake.\n")); pCaps->DeviceWake = PowerDeviceUnspecified; } @@ -1068,19 +1140,19 @@ hca_query_capabilities( static NTSTATUS hca_query_pnp_state( - IN DEVICE_OBJECT* const pDevObj, - IN IRP* const pIrp, - OUT cl_irp_action_t* const pAction ) + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) { hca_dev_ext_t *p_ext; HCA_ENTER( HCA_DBG_PNP ); - p_ext = (hca_dev_ext_t*)pDevObj->DeviceExtension; + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; - pIrp->IoStatus.Information |= p_ext->pnpState; + p_irp->IoStatus.Information |= p_ext->pnpState; - *pAction = IrpSkip; + *p_action = IrpSkip; HCA_EXIT( HCA_DBG_PNP ); return STATUS_SUCCESS;; @@ -1089,18 +1161,18 @@ hca_query_pnp_state( static NTSTATUS hca_query_power( - IN DEVICE_OBJECT* const pDevObj, - IN IRP* const pIrp, - OUT cl_irp_action_t* const pAction ) + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) { NTSTATUS status = STATUS_SUCCESS; IO_STACK_LOCATION *pIoStack; - HCA_ENTER( HCA_DBG_PO ); + HCA_ENTER(HCA_DBG_PO); - UNUSED_PARAM( pDevObj ); + UNUSED_PARAM( p_dev_obj ); - pIoStack = IoGetCurrentIrpStackLocation( pIrp ); + pIoStack = IoGetCurrentIrpStackLocation( p_irp ); switch( pIoStack->Parameters.Power.Type ) { @@ -1134,9 +1206,9 @@ hca_query_power( } if( status == STATUS_NOT_SUPPORTED ) - *pAction = IrpComplete; + *p_action = IrpComplete; else - *pAction = IrpSkip; + *p_action = IrpSkip; HCA_EXIT( HCA_DBG_PO ); return status; @@ -1145,13 +1217,13 @@ hca_query_power( static void __RequestPowerCompletion( - IN DEVICE_OBJECT *pDevObj, + IN DEVICE_OBJECT *p_dev_obj, IN UCHAR minorFunction, IN POWER_STATE powerState, IN void *context, IN IO_STATUS_BLOCK *pIoStatus ) { - IRP *pIrp; + IRP *p_irp; cl_pnp_po_ext_t *p_ext; HCA_ENTER( HCA_DBG_PO ); @@ -1159,16 +1231,16 @@ __RequestPowerCompletion( UNUSED_PARAM( minorFunction ); UNUSED_PARAM( powerState ); - pIrp = (IRP*)context; - p_ext = (cl_pnp_po_ext_t*)pDevObj->DeviceExtension; + p_irp = (IRP*)context; + p_ext = (cl_pnp_po_ext_t*)p_dev_obj->DeviceExtension; /* Propagate the device IRP status to the system IRP status. */ - pIrp->IoStatus.Status = pIoStatus->Status; + p_irp->IoStatus.Status = pIoStatus->Status; /* Continue Power IRP processing. */ - PoStartNextPowerIrp( pIrp ); - IoCompleteRequest( pIrp, IO_NO_INCREMENT ); - IoReleaseRemoveLock( &p_ext->remove_lock, pIrp ); + PoStartNextPowerIrp( p_irp ); + IoCompleteRequest( p_irp, IO_NO_INCREMENT ); + IoReleaseRemoveLock( &p_ext->remove_lock, p_irp ); HCA_EXIT( HCA_DBG_PO ); } @@ -1176,8 +1248,8 @@ __RequestPowerCompletion( /*NOTE: Completion routines must NEVER be pageable. */ static NTSTATUS __SystemPowerCompletion( - IN DEVICE_OBJECT *pDevObj, - IN IRP *pIrp, + IN DEVICE_OBJECT *p_dev_obj, + IN IRP *p_irp, IN void *context ) { NTSTATUS status; @@ -1185,20 +1257,20 @@ __SystemPowerCompletion( hca_dev_ext_t *p_ext; IO_STACK_LOCATION *pIoStack; - HCA_ENTER( HCA_DBG_PO ); + HCA_ENTER( HCA_DBG_PNP ); UNUSED_PARAM( context ); - p_ext = (hca_dev_ext_t*)pDevObj->DeviceExtension; - pIoStack = IoGetCurrentIrpStackLocation( pIrp ); + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; + pIoStack = IoGetCurrentIrpStackLocation( p_irp ); - if( !NT_SUCCESS( pIrp->IoStatus.Status ) ) + if( !NT_SUCCESS( p_irp->IoStatus.Status ) ) { - PoStartNextPowerIrp( pIrp ); - IoReleaseRemoveLock( &p_ext->cl_ext.remove_lock, pIrp ); - HCA_TRACE_EXIT( HCA_DBG_ERROR, + PoStartNextPowerIrp( p_irp ); + IoReleaseRemoveLock( &p_ext->cl_ext.remove_lock, p_irp ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, ("IRP_MN_SET_POWER for system failed by lower driver with %08x.\n", - pIrp->IoStatus.Status) ); + p_irp->IoStatus.Status)); return STATUS_SUCCESS; } @@ -1209,21 +1281,21 @@ __SystemPowerCompletion( * Send a device power IRP to our devnode. Using our device object will * only work on win2k and other NT based systems. */ - status = PoRequestPowerIrp( pDevObj, IRP_MN_SET_POWER, state, - __RequestPowerCompletion, pIrp, NULL ); + status = PoRequestPowerIrp( p_dev_obj, IRP_MN_SET_POWER, state, + __RequestPowerCompletion, p_irp, NULL ); - if( !NT_SUCCESS( pIrp->IoStatus.Status ) ) + if( !NT_SUCCESS( p_irp->IoStatus.Status ) ) { - PoStartNextPowerIrp( pIrp ); + PoStartNextPowerIrp( p_irp ); /* Propagate the failure. */ - pIrp->IoStatus.Status = status; - IoCompleteRequest( pIrp, IO_NO_INCREMENT ); - IoReleaseRemoveLock( &p_ext->cl_ext.remove_lock, pIrp ); - HCA_TRACE( HCA_DBG_ERROR, - ("PoRequestPowerIrp returned %08x.\n", status) ); + p_irp->IoStatus.Status = status; + IoCompleteRequest( p_irp, IO_NO_INCREMENT ); + IoReleaseRemoveLock( &p_ext->cl_ext.remove_lock, p_irp ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("PoRequestPowerIrp returned %08x.\n", status)); } - HCA_EXIT( HCA_DBG_PO ); + HCA_EXIT( HCA_DBG_PNP ); return STATUS_MORE_PROCESSING_REQUIRED; } @@ -1231,19 +1303,19 @@ __SystemPowerCompletion( /* Work item callback to handle DevicePowerD0 IRPs at passive level. */ static void __PowerUpCb( - IN DEVICE_OBJECT* pDevObj, + IN DEVICE_OBJECT* p_dev_obj, IN void* context ) { NTSTATUS status; IO_STACK_LOCATION *pIoStack; hca_dev_ext_t *p_ext; - IRP *pIrp; + IRP *p_irp; HCA_ENTER( HCA_DBG_PO ); - p_ext = (hca_dev_ext_t*)pDevObj->DeviceExtension; - pIrp = (IRP*)context; - pIoStack = IoGetCurrentIrpStackLocation( pIrp ); + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; + p_irp = (IRP*)context; + pIoStack = IoGetCurrentIrpStackLocation( p_irp ); IoFreeWorkItem( p_ext->pPoWorkItem ); p_ext->pPoWorkItem = NULL; @@ -1253,7 +1325,7 @@ __PowerUpCb( goto done; if( p_ext->p_al_dev ) - status = __hca_register( pDevObj ); + status = __hca_register( p_dev_obj ); done: if( !NT_SUCCESS( status ) ) @@ -1263,9 +1335,9 @@ done: IoInvalidateDeviceState( p_ext->cl_ext.p_pdo ); } - PoStartNextPowerIrp( pIrp ); - IoCompleteRequest( pIrp, IO_NO_INCREMENT ); - IoReleaseRemoveLock( &p_ext->cl_ext.remove_lock, pIrp ); + PoStartNextPowerIrp( p_irp ); + IoCompleteRequest( p_irp, IO_NO_INCREMENT ); + IoReleaseRemoveLock( &p_ext->cl_ext.remove_lock, p_irp ); HCA_EXIT( HCA_DBG_PO ); } @@ -1274,8 +1346,8 @@ done: /*NOTE: Completion routines must NEVER be pageable. */ static NTSTATUS __DevicePowerCompletion( - IN DEVICE_OBJECT *pDevObj, - IN IRP *pIrp, + IN DEVICE_OBJECT *p_dev_obj, + IN IRP *p_irp, IN void *context ) { NTSTATUS status; @@ -1286,53 +1358,53 @@ __DevicePowerCompletion( UNUSED_PARAM( context ); - p_ext = (hca_dev_ext_t*)pDevObj->DeviceExtension; - pIoStack = IoGetCurrentIrpStackLocation( pIrp ); + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; + pIoStack = IoGetCurrentIrpStackLocation( p_irp ); - if( !NT_SUCCESS( pIrp->IoStatus.Status ) ) + if( !NT_SUCCESS( p_irp->IoStatus.Status ) ) { - PoStartNextPowerIrp( pIrp ); - IoReleaseRemoveLock( &p_ext->cl_ext.remove_lock, pIrp ); - HCA_TRACE_EXIT( HCA_DBG_ERROR, + PoStartNextPowerIrp( p_irp ); + IoReleaseRemoveLock( &p_ext->cl_ext.remove_lock, p_irp ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, ("IRP_MN_SET_POWER for device failed by lower driver with %08x.\n", - pIrp->IoStatus.Status) ); + p_irp->IoStatus.Status)); return STATUS_SUCCESS; } p_ext->PowerState = pIoStack->Parameters.Power.State.DeviceState; - PoSetPowerState( pDevObj, DevicePowerState, + PoSetPowerState( p_dev_obj, DevicePowerState, pIoStack->Parameters.Power.State ); /* Process in a work item - mthca_start blocks. */ ASSERT( !p_ext->pPoWorkItem ); - p_ext->pPoWorkItem = IoAllocateWorkItem( pDevObj ); + p_ext->pPoWorkItem = IoAllocateWorkItem( p_dev_obj ); if( !p_ext->pPoWorkItem ) { IoInvalidateDeviceState( p_ext->cl_ext.p_pdo ); - PoStartNextPowerIrp( pIrp ); - IoReleaseRemoveLock( &p_ext->cl_ext.remove_lock, pIrp ); + PoStartNextPowerIrp( p_irp ); + IoReleaseRemoveLock( &p_ext->cl_ext.remove_lock, p_irp ); return STATUS_SUCCESS; } /* Process in work item callback. */ - IoMarkIrpPending( pIrp ); - IoQueueWorkItem( p_ext->pPoWorkItem, __PowerUpCb, DelayedWorkQueue, pIrp ); + IoMarkIrpPending( p_irp ); + IoQueueWorkItem( p_ext->pPoWorkItem, __PowerUpCb, DelayedWorkQueue, p_irp ); /* TODO: Start the HCA. */ status = mthca_init_one( p_ext ); if( !NT_SUCCESS( status ) ) goto done; if( p_ext->p_al_dev ) - status = __hca_register( pDevObj ); + status = __hca_register( p_dev_obj ); done: if( !NT_SUCCESS( status ) ) IoInvalidateDeviceState( p_ext->cl_ext.p_pdo ); - PoStartNextPowerIrp( pIrp ); - IoReleaseRemoveLock( &p_ext->cl_ext.remove_lock, pIrp ); + PoStartNextPowerIrp( p_irp ); + IoReleaseRemoveLock( &p_ext->cl_ext.remove_lock, p_irp ); HCA_EXIT( HCA_DBG_PO ); return STATUS_MORE_PROCESSING_REQUIRED; @@ -1342,23 +1414,23 @@ done: /* Work item callback to handle DevicePowerD3 IRPs at passive level. */ static void __PowerDownCb( - IN DEVICE_OBJECT* pDevObj, + IN DEVICE_OBJECT* p_dev_obj, IN void* context ) { IO_STACK_LOCATION *pIoStack; hca_dev_ext_t *p_ext; - IRP *pIrp; + IRP *p_irp; HCA_ENTER( HCA_DBG_PO ); - p_ext = (hca_dev_ext_t*)pDevObj->DeviceExtension; - pIrp = (IRP*)context; - pIoStack = IoGetCurrentIrpStackLocation( pIrp ); + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; + p_irp = (IRP*)context; + pIoStack = IoGetCurrentIrpStackLocation( p_irp ); IoFreeWorkItem( p_ext->pPoWorkItem ); p_ext->pPoWorkItem = NULL; - PoSetPowerState( pDevObj, DevicePowerState, + PoSetPowerState( p_dev_obj, DevicePowerState, pIoStack->Parameters.Power.State ); if( p_ext->state == HCA_REGISTERED ) { @@ -1372,10 +1444,10 @@ __PowerDownCb( mthca_remove_one( p_ext ); - IoSkipCurrentIrpStackLocation( pIrp ); - PoStartNextPowerIrp( pIrp ); - PoCallDriver( p_ext->cl_ext.p_next_do, pIrp ); - IoReleaseRemoveLock( &p_ext->cl_ext.remove_lock, pIrp ); + IoSkipCurrentIrpStackLocation( p_irp ); + PoStartNextPowerIrp( p_irp ); + PoCallDriver( p_ext->cl_ext.p_next_do, p_irp ); + IoReleaseRemoveLock( &p_ext->cl_ext.remove_lock, p_irp ); HCA_EXIT( HCA_DBG_PO ); } @@ -1383,9 +1455,9 @@ __PowerDownCb( static NTSTATUS hca_set_power( - IN DEVICE_OBJECT* const pDevObj, - IN IRP* const pIrp, - OUT cl_irp_action_t* const pAction ) + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) { NTSTATUS status; IO_STACK_LOCATION *pIoStack; @@ -1393,8 +1465,8 @@ hca_set_power( HCA_ENTER( HCA_DBG_PO ); - p_ext = (hca_dev_ext_t*)pDevObj->DeviceExtension; - pIoStack = IoGetCurrentIrpStackLocation( pIrp ); + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; + pIoStack = IoGetCurrentIrpStackLocation( p_irp ); switch( pIoStack->Parameters.Power.Type ) { @@ -1404,43 +1476,43 @@ hca_set_power( * power dispatch function can be called at elevated IRQL if the * device is in a paging/hibernation/crash dump path. */ - IoMarkIrpPending( pIrp ); - IoCopyCurrentIrpStackLocationToNext( pIrp ); + IoMarkIrpPending( p_irp ); + IoCopyCurrentIrpStackLocationToNext( p_irp ); #pragma warning( push, 3 ) - IoSetCompletionRoutine( pIrp, __SystemPowerCompletion, NULL, + IoSetCompletionRoutine( p_irp, __SystemPowerCompletion, NULL, TRUE, TRUE, TRUE ); #pragma warning( pop ) - PoCallDriver( p_ext->cl_ext.p_next_do, pIrp ); + PoCallDriver( p_ext->cl_ext.p_next_do, p_irp ); - *pAction = IrpDoNothing; + *p_action = IrpDoNothing; status = STATUS_PENDING; break; case DevicePowerState: - IoMarkIrpPending( pIrp ); + IoMarkIrpPending( p_irp ); if( pIoStack->Parameters.Power.State.DeviceState == PowerDeviceD0 ) { /* If we're already powered up, just pass down. */ if( p_ext->PowerState == PowerDeviceD0 ) { status = STATUS_SUCCESS; - *pAction = IrpIgnore; + *p_action = IrpIgnore; break; } /* Process in I/O completion callback. */ - IoCopyCurrentIrpStackLocationToNext( pIrp ); + IoCopyCurrentIrpStackLocationToNext( p_irp ); #pragma warning( push, 3 ) - IoSetCompletionRoutine( pIrp, __DevicePowerCompletion, NULL, + IoSetCompletionRoutine( p_irp, __DevicePowerCompletion, NULL, TRUE, TRUE, TRUE ); #pragma warning( pop ) - PoCallDriver( p_ext->cl_ext.p_next_do, pIrp ); + PoCallDriver( p_ext->cl_ext.p_next_do, p_irp ); } else { /* Process in a work item - deregister_ca and HcaDeinit block. */ ASSERT( !p_ext->pPoWorkItem ); - p_ext->pPoWorkItem = IoAllocateWorkItem( pDevObj ); + p_ext->pPoWorkItem = IoAllocateWorkItem( p_dev_obj ); if( !p_ext->pPoWorkItem ) { status = STATUS_INSUFFICIENT_RESOURCES; @@ -1448,26 +1520,24 @@ hca_set_power( } /* Process in work item callback. */ - IoMarkIrpPending( pIrp ); + IoMarkIrpPending( p_irp ); IoQueueWorkItem( - p_ext->pPoWorkItem, __PowerDownCb, DelayedWorkQueue, pIrp ); + p_ext->pPoWorkItem, __PowerDownCb, DelayedWorkQueue, p_irp ); } - *pAction = IrpDoNothing; + *p_action = IrpDoNothing; status = STATUS_PENDING; break; default: /* Pass down and let the PDO driver handle it. */ - *pAction = IrpIgnore; + *p_action = IrpIgnore; status = STATUS_SUCCESS; break; } if( !NT_SUCCESS( status ) ) - *pAction = IrpComplete; + *p_action = IrpComplete; HCA_EXIT( HCA_DBG_PNP ); return status; } - - diff --git a/branches/MTHCA/hw/mthca/kernel/hca_pnp.h b/branches/MTHCA/hw/mthca/kernel/hca_pnp.h index c23082ed..fba554f6 100644 --- a/branches/MTHCA/hw/mthca/kernel/hca_pnp.h +++ b/branches/MTHCA/hw/mthca/kernel/hca_pnp.h @@ -30,7 +30,7 @@ */ -#if !defined( _HCA_PNP_H_ ) +#ifndef _HCA_PNP_H_ #define _HCA_PNP_H_ void hca_init_vfptr( void ); diff --git a/branches/MTHCA/hw/mthca/kernel/hca_smp.c b/branches/MTHCA/hw/mthca/kernel/hca_smp.c index b971888c..43810308 100644 --- a/branches/MTHCA/hw/mthca/kernel/hca_smp.c +++ b/branches/MTHCA/hw/mthca/kernel/hca_smp.c @@ -35,8 +35,15 @@ */ -#include "hca_data.h" -#include "hca_debug.h" +#include "hca_driver.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "hca_smp.tmh" +#endif +#include "mthca_dev.h" +#include "ib_mad.h" boolean_t @@ -490,6 +497,27 @@ mlnx_update_cache( * Local MAD Support Verbs. For CAs that do not support * agents in HW. */ + +#ifdef WIN_TO_BE_REMOVED +//TODO: seems like non need in that +static void build_smp_wc(u64 wr_id, u16 slid, u16 pkey_index, + u8 port_num, struct _ib_wc *wc) +{ + RtlZeroMemory(wc, sizeof *wc); + wc->wr_id = wr_id; + wc->status = IB_WC_SUCCESS; + wc->wc_type = IB_WC_RECV; + wc->length = sizeof(struct ib_mad) + sizeof(struct ib_grh); + wc->qp_num = IB_QP0; + wc->port_num = port_num; + wc->recv.ud.pkey_index = pkey_index; + wc->recv.ud.remote_qp = IB_QP0; + wc->recv.ud.remote_lid = slid; + wc->recv.ud.remote_sl = 0; + wc->recv.ud.path_bits = 0; +} +#endif + ib_api_status_t mlnx_local_mad ( IN const ib_ca_handle_t h_ca, @@ -498,21 +526,83 @@ mlnx_local_mad ( OUT ib_mad_t *p_mad_out ) { #ifndef WIN_TO_BE_CHANGED - UNREFERENCED_PARAMETER(h_ca); - UNREFERENCED_PARAMETER(port_num); - UNREFERENCED_PARAMETER(p_mad_in); - UNREFERENCED_PARAMETER(p_mad_out); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_local_mad not implemented\n")); - return IB_INVALID_CA_HANDLE; + + int err; + ib_api_status_t status = IB_SUCCESS; + mlnx_hob_t *hob_p = (mlnx_hob_t *)h_ca; + struct ib_device *ib_dev_p = IBDEV_FROM_HOB( hob_p ); + //TODO: do we need use flags (IB_MAD_IGNORE_MKEY, IB_MAD_IGNORE_BKEY) ? + int mad_flags = 0; + struct _ib_wc wc; + struct _ib_wc *wc_p; + //TODO: do we need use grh ? + struct ib_grh *grh_p = NULL; + + HCA_ENTER(HCA_DBG_MAD); + + // sanity checks + if (port_num > 2) { + status = IB_INVALID_PARAMETER; + goto err_port_num; + } + +#ifdef WIN_TO_BE_REMOVED + //TODO: seems like non need in that + //TODO: don't know: wr_id, pkey_index !!! + if (p_mad_in->mgmt_class == IB_MCLASS_SUBN_DIR) { + ib_smp_t *smp = (ib_smp_t *)p_mad_in; + u64 wr_id = 0; + u16 pkey_index = 0; + build_smp_wc(wr_id, cl_ntoh16(smp->dr_slid), pkey_index, port_num, &wc); + wc_p = &wc; + } +#else + wc_p = NULL; +#endif + + // debug print + { + ib_smp_t *smp = (ib_smp_t *)p_mad_in; + HCA_PRINT( TRACE_LEVEL_WARNING, HCA_DBG_MAD, + ("MAD: Class %02x, Method %02x, Attr %02x, HopPtr %d, HopCnt %d, \n", + (uint32_t)smp->mgmt_class, (uint32_t)smp->method, + (uint32_t)smp->attr_id, (uint32_t)smp->hop_ptr, + (uint32_t)smp->hop_count)); + } + + // process mad + if( !mlnx_cachable_mad( h_ca, port_num, p_mad_in, p_mad_out ) ) + { + err = mthca_process_mad(ib_dev_p, mad_flags, (uint8_t)port_num, + wc_p, grh_p, (struct ib_mad*)p_mad_in, (struct ib_mad*)p_mad_out); + if (!err) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_MAD, + ("MAD failed:\n\tClass 0x%x\n\tMethod 0x%x\n\tAttr 0x%x", + p_mad_in->mgmt_class, p_mad_in->method, p_mad_in->attr_id )); + status = IB_ERROR; + goto err_process_mad; + } + mlnx_update_cache( h_ca, port_num, p_mad_out ); + } + + /* Modify direction for Direct MAD */ + if ( p_mad_in->mgmt_class == IB_MCLASS_SUBN_DIR ) + p_mad_out->status |= IB_SMP_DIRECTION; + +err_process_mad: +err_port_num: + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR , HCA_DBG_MAD ,("completes with ERROR status %s\n", ib_get_err_str(status))); + return status; + #else ib_api_status_t status; mlnx_hob_t *hob_p = (mlnx_hob_t *)h_ca; - u_int32_t hca_idx; + uint32_t hca_idx; mlnx_hobul_t *hobul_p; HH_hca_dev_t *hca_ul_info; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER(HCA_DBG_MAD); if (port_num > 2) { status = IB_INVALID_PARAMETER; @@ -542,9 +632,9 @@ mlnx_local_mad ( if( HH_OK != THH_hob_process_local_mad( hobul_p->hh_hndl, port_num, 0x0, 0, (void *)p_mad_in, p_mad_out ) ) { - HCA_TRACE( HCA_DBG_ERROR, + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_MAD, ("MAD failed:\n\tClass 0x%x\n\tMethod 0x%x\n\tAttr 0x%x", - p_mad_in->mgmt_class, p_mad_in->method, p_mad_in->attr_id ) ); + p_mad_in->mgmt_class, p_mad_in->method, p_mad_in->attr_id )); status = IB_ERROR; goto cleanup; } @@ -556,12 +646,12 @@ mlnx_local_mad ( if ( p_mad_in->mgmt_class == IB_MCLASS_SUBN_DIR ) p_mad_out->status |= IB_SMP_DIRECTION; - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT(HCA_DBG_MAD); return IB_SUCCESS; cleanup: - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MAD ,("status %d\n", status))); + HCA_EXIT(HCA_DBG_MAD); return status; #endif } diff --git a/branches/MTHCA/hw/mthca/kernel/hca_verbs.c b/branches/MTHCA/hw/mthca/kernel/hca_verbs.c index 350018c4..61b192ae 100644 --- a/branches/MTHCA/hw/mthca/kernel/hca_verbs.c +++ b/branches/MTHCA/hw/mthca/kernel/hca_verbs.c @@ -32,8 +32,16 @@ #include "hca_driver.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "hca_verbs.tmh" +#endif #include "mthca_dev.h" #include "ib_cache.h" +#include "mthca_user.h" +#include "mt_uverbs.h" #define PTR_ALIGN(size) (((size) + sizeof(void*) - 1) & ~(sizeof(void*) - 1)) @@ -65,18 +73,19 @@ mlnx_open_ca ( mlnx_hca_t *p_hca; ib_api_status_t status; + mlnx_cache_t *p_cache; - HCA_ENTER(MLNX_DBG_TRACE); - HCA_TRACE(MLNX_DBG_INFO, ("context 0x%p\n", ca_context)); + HCA_ENTER(HCA_DBG_SHIM); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM ,("context 0x%p\n", ca_context)); // find CA object p_hca = mlnx_hca_from_guid( ca_guid ); if( !p_hca ) { - HCA_EXIT( MLNX_DBG_TRACE ); - return IB_NOT_FOUND; + status = IB_NOT_FOUND; + goto err_hca_from_guid; } - HCA_TRACE(MLNX_DBG_INFO, ("context 0x%p\n", ca_context)); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM ,("context 0x%p\n", ca_context)); status = mlnx_hobs_set_cb(&p_hca->hob, pfn_completion_cb, pfn_async_event_cb, @@ -85,17 +94,25 @@ mlnx_open_ca ( goto err_set_cb; } + // MAD cache + p_cache = (mlnx_cache_t*)cl_pzalloc( sizeof(mlnx_cache_t) * 2 ); + if( !p_cache ) { + status = IB_INSUFFICIENT_MEMORY; + goto err_mad_cache; + } + p_hca->hob.cache = p_cache; + + //TODO: do we need something for kernel users ? // Return pointer to HOB object if (ph_ca) *ph_ca = &p_hca->hob; status = IB_SUCCESS; +err_mad_cache: err_set_cb: - - // For user mode call - return status to user mode - HCA_TRACE_ERR(CL_DBG_ERROR, ("completes with ERROR status %s\n", ib_get_err_str(status))); - HCA_EXIT(MLNX_DBG_TRACE); +err_hca_from_guid: + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("completes with ERROR status %s\n", ib_get_err_str(status))); return status; #else @@ -106,13 +123,13 @@ err_set_cb: mlnx_hob_t *new_ca = NULL; MOSAL_protection_ctx_t prot_ctx; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("context 0x%p\n", ca_context)); + HCA_ENTER( HCA_DBG_SHIM); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM,"context 0x%p\n", ca_context) // find CA object p_hca = mlnx_hca_from_guid( ca_guid ); if( !p_hca ) { - HCA_EXIT( MLNX_DBG_TRACE ); + HCA_EXIT( TRACE_LEVEL_VERBOSE ); return IB_NOT_FOUND; } @@ -123,14 +140,14 @@ err_set_cb: // if a HOBKL exists for this device (i.e. it is open) - return E_BUSY if (IB_SUCCESS == mlnx_hobs_lookup(p_hca->hh_hndl, &new_ca)) { if (ph_ca) *ph_ca = (ib_ca_handle_t)new_ca; - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_SHIM); return IB_RESOURCE_BUSY; } // Create a mapping from hca index to hh_hndl status = mlnx_hobs_insert(p_hca, &new_ca); if (IB_SUCCESS != status) { - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_SHIM); return status; } @@ -143,7 +160,7 @@ err_set_cb: goto cleanup; } - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("context 0x%p\n", ca_context)); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM,"context 0x%p\n", ca_context) status = mlnx_hobs_set_cb(new_ca, pfn_completion_cb, pfn_async_event_cb, @@ -152,7 +169,7 @@ err_set_cb: goto cleanup; } - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("ul_resource sizes: hca %d pd %d\n", + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM ,("ul_resource sizes: hca %d pd %d\n", hca_ul_info->hca_ul_resources_sz, hca_ul_info->pd_ul_resources_sz)); @@ -181,7 +198,7 @@ err_set_cb: // Return the HOBUL index if (ph_ca) *ph_ca = new_ca; - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_SHIM); return IB_SUCCESS; cleanup: @@ -191,8 +208,8 @@ cleanup: mlnx_hobs_remove(new_ca); // For user mode call - return status to user mode - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("status %d \n", status))); + HCA_EXIT( HCA_DBG_SHIM); return status; #endif } @@ -208,11 +225,11 @@ mlnx_query_ca ( ib_api_status_t status; uint32_t size, required_size; - u_int8_t port_num, num_ports; - u_int32_t num_gids, num_pkeys; - u_int32_t num_page_sizes = 1; // TBD: what is actually supported + uint8_t port_num, num_ports; + uint32_t num_gids, num_pkeys; + uint32_t num_page_sizes = 1; // TBD: what is actually supported uint8_t *last_p; - u_int32_t priv_op; + uint32_t priv_op; struct ib_device_attr props; struct ib_port_attr *hca_ports = NULL; int i; @@ -221,12 +238,12 @@ mlnx_query_ca ( struct ib_device *ib_dev = IBDEV_FROM_HOB( hob_p ); int err; - HCA_ENTER(MLNX_DBG_TRACE); + HCA_ENTER(HCA_DBG_SHIM); // sanity checks if( p_umv_buf && p_umv_buf->command ) { - HCA_TRACE (CL_DBG_ERROR, ("User mode is not supported yet\n")); - status = IB_UNSUPPORTED; + HCA_PRINT (TRACE_LEVEL_ERROR, HCA_DBG_SHIM ,("User mode is not supported yet\n")); + p_umv_buf->status = status = IB_UNSUPPORTED; goto err_user_unsupported; } if (NULL == p_byte_count) { @@ -237,7 +254,7 @@ mlnx_query_ca ( // query the device err = mthca_query_device(ib_dev, &props ); if (err) { - HCA_TRACE (CL_DBG_ERROR, + HCA_PRINT (TRACE_LEVEL_ERROR, HCA_DBG_SHIM, ("ib_query_device failed (%d)\n",err)); status = errno_to_iberr(err); goto err_query_device; @@ -246,7 +263,7 @@ mlnx_query_ca ( // alocate arrary for port properties num_ports = ib_dev->phys_port_cnt; /* Number of physical ports of the HCA */ if (NULL == (hca_ports = cl_zalloc( num_ports * sizeof *hca_ports))) { - HCA_TRACE (CL_DBG_ERROR, ("Failed to cl_zalloc ports array\n")); + HCA_PRINT (TRACE_LEVEL_ERROR, HCA_DBG_SHIM, ("Failed to cl_zalloc ports array\n")); status = IB_INSUFFICIENT_MEMORY; goto err_alloc_ports; } @@ -255,7 +272,7 @@ mlnx_query_ca ( num_gids = 0; num_pkeys = 0; required_size = PTR_ALIGN(sizeof(ib_ca_attr_t)) + - PTR_ALIGN(sizeof(u_int32_t) * num_page_sizes) + + PTR_ALIGN(sizeof(uint32_t) * num_page_sizes) + PTR_ALIGN(sizeof(ib_port_attr_t) * num_ports); // get port properties @@ -263,7 +280,7 @@ mlnx_query_ca ( // request err = mthca_query_port(ib_dev, port_num + start_port(ib_dev), &hca_ports[port_num]); if (err) { - HCA_TRACE (CL_DBG_ERROR, ("ib_query_port failed(%d) for port %d\n",err, port_num)); + HCA_PRINT (TRACE_LEVEL_ERROR, HCA_DBG_SHIM, ("ib_query_port failed(%d) for port %d\n",err, port_num)); status = errno_to_iberr(err); goto err_query_port; } @@ -275,48 +292,16 @@ mlnx_query_ca ( // calculate pkeys table size num_pkeys = hca_ports[port_num].pkey_tbl_len; - size = PTR_ALIGN(sizeof(u_int16_t) * num_pkeys); + size = PTR_ALIGN(sizeof(uint16_t) * num_pkeys); required_size += size; } -#ifdef WIN_USER_SUPPORT - // handling user parameters - if( p_umv_buf && p_umv_buf->command ) - { - /* - * Prepare the buffer with the size including hca_ul_resources_sz - * NO ALIGNMENT for this size - */ - - if (p_umv_buf->p_inout_buf) - { - cl_memcpy (&priv_op, p_umv_buf->p_inout_buf, sizeof (priv_op)); - HCA_TRACE(MLNX_DBG_TRACE, ("priv_op = %d\n", priv_op)); - - /* - * Yes, UVP request for hca_ul_info - */ - if (p_umv_buf->input_size != - (sizeof (HH_hca_dev_t) + sizeof (priv_op) )) - { - *p_byte_count = required_size; - p_umv_buf->output_size = 0; - status = IB_INVALID_PARAMETER; - goto cleanup; - } - cl_memcpy( (uint8_t* __ptr64)p_umv_buf->p_inout_buf + sizeof (priv_op), - hca_ul_info, sizeof (HH_hca_dev_t)); - p_umv_buf->output_size = p_umv_buf->input_size; - } - } -#endif - // resource sufficience check if (NULL == p_ca_attr || *p_byte_count < required_size) { *p_byte_count = required_size; status = IB_INSUFFICIENT_MEMORY; if ( p_ca_attr != NULL) { - HCA_TRACE (CL_DBG_ERROR, + HCA_PRINT (TRACE_LEVEL_ERROR,HCA_DBG_SHIM, ("Failed *p_byte_count (%d) < required_size (%d)\n", *p_byte_count, required_size )); } goto err_insuff_mem; @@ -327,7 +312,7 @@ mlnx_query_ca ( last_p += PTR_ALIGN(sizeof(*p_ca_attr)); p_ca_attr->p_page_size = (uint32_t*)last_p; - last_p += PTR_ALIGN(num_page_sizes * sizeof(u_int32_t)); + last_p += PTR_ALIGN(num_page_sizes * sizeof(uint32_t)); p_ca_attr->p_port_attr = (ib_port_attr_t *)last_p; last_p += PTR_ALIGN(num_ports * sizeof(ib_port_attr_t)); @@ -337,8 +322,8 @@ mlnx_query_ca ( size = PTR_ALIGN(sizeof(ib_gid_t) * hca_ports[port_num].gid_tbl_len); last_p += size; - p_ca_attr->p_port_attr[port_num].p_pkey_table = (u_int16_t *)last_p; - size = PTR_ALIGN(sizeof(u_int16_t) * hca_ports[port_num].pkey_tbl_len); + p_ca_attr->p_port_attr[port_num].p_pkey_table = (uint16_t *)last_p; + size = PTR_ALIGN(sizeof(uint16_t) * hca_ports[port_num].pkey_tbl_len); last_p += size; } @@ -351,7 +336,7 @@ mlnx_query_ca ( &p_ca_attr->p_port_attr[port_num].p_pkey_table[i] ); if (err) { status = errno_to_iberr(err); - HCA_TRACE (CL_DBG_ERROR, + HCA_PRINT (TRACE_LEVEL_ERROR,HCA_DBG_SHIM, ("ib_get_cached_pkey failed (%d) for port_num %d, index %d\n", err, port_num + start_port(ib_dev), i)); goto err_get_pkey; @@ -361,11 +346,11 @@ mlnx_query_ca ( // get gids, using cache for (i=0; i < hca_ports[port_num].gid_tbl_len; ++i) { union ib_gid * __ptr64 gid = (union ib_gid *)&p_ca_attr->p_port_attr[port_num].p_gid_table[i]; - err = ib_get_cached_gid( ib_dev, port_num + start_port(ib_dev), i, (union ib_gid *)&gid ); + err = ib_get_cached_gid( ib_dev, port_num + start_port(ib_dev), i, (union ib_gid *)gid ); //TODO: do we need to convert gids to little endian if (err) { status = errno_to_iberr(err); - HCA_TRACE (CL_DBG_ERROR, + HCA_PRINT (TRACE_LEVEL_ERROR, HCA_DBG_SHIM, ("ib_get_cached_gid failed (%d) for port_num %d, index %d\n", err, port_num + start_port(ib_dev), i)); goto err_get_gid; @@ -373,10 +358,10 @@ mlnx_query_ca ( } #if 0 - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("port %d gid0:", port_num)); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM,"port %d gid0:", port_num) for (i = 0; i < 16; i++) - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, (" 0x%x", p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[i])); - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("\n")); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM," 0x%x", p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[i]) + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM,"\n") #endif } @@ -384,17 +369,10 @@ mlnx_query_ca ( p_ca_attr->size = required_size; CL_ASSERT( required_size == (((uintn_t)last_p) - ((uintn_t)p_ca_attr)) ); #if 0 - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("Space required %d used %d\n", + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM ,HCA_DBG_SHIM ,("Space required %d used %d\n", required_size, ((uintn_t)last_p) - ((uintn_t)p_ca_attr)))); #endif -#ifdef WIN_USER_SUPPORT - // Convert query result into IBAL structure (no cl_memset()) - if( p_umv_buf && p_umv_buf->command ) - { - } -#endif - // !!! GID/PKEY tables must be queried before this call !!! mlnx_conv_hca_cap(ib_dev, &props, hca_ports, p_ca_attr); @@ -408,11 +386,10 @@ err_query_port: err_alloc_ports: err_query_device: err_byte_count: - if( p_umv_buf && p_umv_buf->command ) p_umv_buf->status = status; err_user_unsupported: if( status != IB_INSUFFICIENT_MEMORY && status != IB_SUCCESS ) - HCA_TRACE(CL_DBG_ERROR, ("completes with ERROR status %s\n", ib_get_err_str(status))); - HCA_EXIT(MLNX_DBG_TRACE); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("completes with ERROR status %s\n", ib_get_err_str(status))); + HCA_EXIT(HCA_DBG_SHIM); return status; @@ -426,14 +403,14 @@ err_user_unsupported: VAPI_hca_cap_t hca_cap; VAPI_hca_port_t *hca_ports = NULL; uint32_t size, required_size; - u_int8_t port_num, num_ports; - u_int32_t num_gids, num_pkeys; - u_int32_t num_page_sizes = 1; // TBD: what is actually supported + uint8_t port_num, num_ports; + uint32_t num_gids, num_pkeys; + uint32_t num_page_sizes = 1; // TBD: what is actually supported uint8_t *last_p; void *hca_ul_resources_p = NULL; - u_int32_t priv_op; + uint32_t priv_op; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_SHIM); if (NULL == p_byte_count) { status = IB_INVALID_PARAMETER; @@ -442,7 +419,7 @@ err_user_unsupported: mlnx_hobs_get_handle(hob_p, &hh_hndl); if (NULL == hh_hndl) { - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("returning E_NODEV dev\n")); + HCA_PRINT(TRACE_LEVEL_INFORMATION , HCA_DBG_SHIM ,("returning E_NODEV dev\n"))); status = IB_INVALID_CA_HANDLE; goto cleanup; } @@ -457,7 +434,7 @@ err_user_unsupported: num_ports = hca_cap.phys_port_num; /* Number of physical ports of the HCA */ if (NULL == (hca_ports = cl_zalloc( num_ports * sizeof(VAPI_hca_port_t)))) { - CL_TRACE (CL_DBG_ERROR, g_mlnx_dbg_lvl, + HCA_PRINT (TRACE_LEVEL_ERROR, HCA_DBG_SHIM, ("Failed to cl_zalloc ports array\n")); status = IB_INSUFFICIENT_MEMORY; goto cleanup; @@ -467,7 +444,7 @@ err_user_unsupported: num_gids = 0; num_pkeys = 0; required_size = PTR_ALIGN(sizeof(ib_ca_attr_t)) + - PTR_ALIGN(sizeof(u_int32_t) * num_page_sizes) + + PTR_ALIGN(sizeof(uint32_t) * num_page_sizes) + PTR_ALIGN(sizeof(ib_port_attr_t) * num_ports); for (port_num = 0; port_num < num_ports; port_num++) { if (HH_OK != THH_hob_query_port_prop(hh_hndl, port_num+1, &hca_ports[port_num])) { @@ -480,44 +457,15 @@ err_user_unsupported: required_size += size; num_pkeys = hca_ports[port_num].pkey_tbl_len; - size = PTR_ALIGN(sizeof(u_int16_t) * num_pkeys); + size = PTR_ALIGN(sizeof(uint16_t) * num_pkeys); required_size += size; } - if( p_umv_buf && p_umv_buf->command ) - { - /* - * Prepare the buffer with the size including hca_ul_resources_sz - * NO ALIGNMENT for this size - */ - - if (p_umv_buf->p_inout_buf) - { - cl_memcpy (&priv_op, p_umv_buf->p_inout_buf, sizeof (priv_op)); - CL_TRACE(MLNX_DBG_TRACE, g_mlnx_dbg_lvl, ("priv_op = %d\n", priv_op)); - - /* - * Yes, UVP request for hca_ul_info - */ - if (p_umv_buf->input_size != - (sizeof (HH_hca_dev_t) + sizeof (priv_op) )) - { - *p_byte_count = required_size; - p_umv_buf->output_size = 0; - status = IB_INVALID_PARAMETER; - goto cleanup; - } - cl_memcpy( (uint8_t* __ptr64)p_umv_buf->p_inout_buf + sizeof (priv_op), - hca_ul_info, sizeof (HH_hca_dev_t)); - p_umv_buf->output_size = p_umv_buf->input_size; - } - } - if (NULL == p_ca_attr || *p_byte_count < required_size) { *p_byte_count = required_size; status = IB_INSUFFICIENT_MEMORY; if ( p_ca_attr != NULL) { - CL_TRACE (CL_DBG_ERROR, g_mlnx_dbg_lvl, + HCA_PRINT (TRACE_LEVEL_ERROR, HCA_DBG_SHIM, ("Failed *p_byte_count < required_size\n")); } goto cleanup; @@ -528,7 +476,7 @@ err_user_unsupported: last_p += PTR_ALIGN(sizeof(*p_ca_attr)); p_ca_attr->p_page_size = (uint32_t*)last_p; - last_p += PTR_ALIGN(num_page_sizes * sizeof(u_int32_t)); + last_p += PTR_ALIGN(num_page_sizes * sizeof(uint32_t)); p_ca_attr->p_port_attr = (ib_port_attr_t *)last_p; last_p += PTR_ALIGN(num_ports * sizeof(ib_port_attr_t)); @@ -538,8 +486,8 @@ err_user_unsupported: size = PTR_ALIGN(sizeof(ib_gid_t) * hca_ports[port_num].gid_tbl_len); last_p += size; - p_ca_attr->p_port_attr[port_num].p_pkey_table = (u_int16_t *)last_p; - size = PTR_ALIGN(sizeof(u_int16_t) * hca_ports[port_num].pkey_tbl_len); + p_ca_attr->p_port_attr[port_num].p_pkey_table = (uint16_t *)last_p; + size = PTR_ALIGN(sizeof(uint16_t) * hca_ports[port_num].pkey_tbl_len); last_p += size; } @@ -549,7 +497,7 @@ err_user_unsupported: hca_ports[port_num].pkey_tbl_len, p_ca_attr->p_port_attr[port_num].p_pkey_table); if (IB_SUCCESS != status) { - CL_TRACE (CL_DBG_ERROR, g_mlnx_dbg_lvl, + HCA_PRINT (TRACE_LEVEL_ERROR, HCA_DBG_SHIM, ("Failed to mlnx_get_hca_pkey_tbl for port_num:%d\n",port_num)); goto cleanup; } @@ -558,7 +506,7 @@ err_user_unsupported: hca_ports[port_num].gid_tbl_len, &p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw); if (IB_SUCCESS != status) { - CL_TRACE (CL_DBG_ERROR, g_mlnx_dbg_lvl, + HCA_PRINT (TRACE_LEVEL_ERROR, HCA_DBG_SHIM, ("Failed to mlnx_get_hca_gid_tbl for port_num:%d\n",port_num)); goto cleanup; } @@ -567,24 +515,16 @@ err_user_unsupported: { int i; - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("port %d gid0:", port_num)); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM,("port %d gid0:", port_num)); for (i = 0; i < 16; i++) - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, (" 0x%x", p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[i])); - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("\n")); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM,(" 0x%x", p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[i])); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM,("\n")); } #endif } // Convert query result into IBAL structure (no cl_memset()) - if( p_umv_buf && p_umv_buf->command ) - { - // p_ca_attr->size = required_size - hca_ul_info->hca_ul_resources_sz; - p_ca_attr->size = required_size; - } - else - { - p_ca_attr->size = required_size; - } + p_ca_attr->size = required_size; // !!! GID/PKEY tables must be queried before this call !!! mlnx_conv_vapi_hca_cap(hca_ul_info, &hca_cap, hca_ports, p_ca_attr); @@ -593,24 +533,22 @@ err_user_unsupported: CL_ASSERT( required_size == (((uintn_t)last_p) - ((uintn_t)p_ca_attr)) ); #if 0 - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("Space required %d used %d\n", + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM ,HCA_DBG_SHIM ,("Space required %d used %d\n", required_size, ((uintn_t)last_p) - ((uintn_t)p_ca_attr)))); #endif - if( p_umv_buf && p_umv_buf->command ) p_umv_buf->status = IB_SUCCESS; if (hca_ul_resources_p) cl_free (hca_ul_resources_p); if (hca_ports) cl_free( hca_ports ); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_SHIM); return IB_SUCCESS; cleanup: - if( p_umv_buf && p_umv_buf->command ) p_umv_buf->status = status; if (hca_ul_resources_p) cl_free (hca_ul_resources_p); if (hca_ports) cl_free( hca_ports); if( p_ca_attr != NULL || status != IB_INSUFFICIENT_MEMORY ) - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("status %d\n", status))); + HCA_EXIT( HCA_DBG_SHIM); return status; #endif } @@ -640,7 +578,7 @@ mlnx_modify_ca ( mlnx_hob_t *hob_p = (mlnx_hob_t *)h_ca; struct ib_device *ib_dev = IBDEV_FROM_HOB( hob_p ); - HCA_ENTER(MLNX_DBG_TRACE); + HCA_ENTER(HCA_DBG_SHIM); // prepare parameters RtlZeroMemory(&props, sizeof(props)); @@ -655,15 +593,14 @@ mlnx_modify_ca ( err = mthca_modify_port(ib_dev, port_num, port_modify_mask, &props ); if (err) { status = errno_to_iberr(err); - HCA_TRACE (CL_DBG_ERROR, ("mthca_modify_port failed (%d) \n", err)); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("mthca_modify_port failed (%d) \n",err)); goto err_modify_port; } status = IB_SUCCESS; err_modify_port: - HCA_TRACE_ERR(CL_DBG_ERROR, ("completes with ERROR status %s\n", ib_get_err_str(status))); - HCA_EXIT(MLNX_DBG_TRACE); + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("completes with ERROR status %s\n", ib_get_err_str(status))); return status; #else @@ -675,7 +612,7 @@ err_modify_port: VAPI_hca_attr_t hca_attr; VAPI_hca_attr_mask_t hca_attr_mask = 0; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_SHIM); mlnx_hobs_get_handle(hob_p, &hh_hndl); if (NULL == hh_hndl) { @@ -713,12 +650,12 @@ err_modify_port: } } - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_SHIM); return IB_SUCCESS; cleanup: - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("status %d\n",status))); + HCA_EXIT( HCA_DBG_SHIM); return status; #endif } @@ -729,14 +666,14 @@ mlnx_close_ca ( { #ifndef WIN_TO_BE_CHANGED - HCA_ENTER(MLNX_DBG_TRACE); + HCA_ENTER(HCA_DBG_SHIM); // release HOB resources mlnx_hobs_remove(h_ca); //TODO: release HOBUL resources - HCA_EXIT(MLNX_DBG_TRACE); + HCA_EXIT(HCA_DBG_SHIM); return IB_SUCCESS; @@ -749,7 +686,7 @@ mlnx_close_ca ( void *hca_ul_resources_p = NULL; mlnx_hobul_t *hobul_p; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_SHIM); hobul_p = mlnx_hobul_array[hob_p->index]; if( !hobul_p ) { @@ -779,12 +716,12 @@ mlnx_close_ca ( THH_hob_close_hca(hh_hndl); mlnx_hobs_remove(hob_p); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_SHIM); return IB_SUCCESS; cleanup: - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("status %d\n",status))); + HCA_EXIT( HCA_DBG_SHIM); return status; #endif } @@ -797,11 +734,73 @@ mlnx_um_open( OUT ib_ca_handle_t* const ph_um_ca ) { #ifndef WIN_TO_BE_CHANGED - UNREFERENCED_PARAMETER(h_ca); - UNREFERENCED_PARAMETER(p_umv_buf); - UNREFERENCED_PARAMETER(ph_um_ca); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_um_open not implemented\n")); - return IB_INVALID_CA_HANDLE; + + int err; + ib_api_status_t status; + mlnx_hob_t *hob_p = (mlnx_hob_t *)h_ca; + hca_dev_ext_t *ext_p = EXT_FROM_HOB( hob_p ); + struct ib_device *ib_dev = IBDEV_FROM_HOB( hob_p ); + struct ib_ucontext *context_p; + struct mthca_alloc_ucontext_resp *uresp_p; + struct ib_pd *ib_pd_p; + struct ibv_alloc_pd_resp resp; + ci_umv_buf_t umv_buf; + + HCA_ENTER(HCA_DBG_SHIM); + + // sanity check + if( !p_umv_buf || !p_umv_buf->command || !p_umv_buf->p_inout_buf || + (p_umv_buf->output_size < sizeof *uresp_p) ) { + status = IB_INVALID_PARAMETER; + goto mlnx_um_open_err; + } + + // create user context in kernel + context_p = mthca_alloc_ucontext(ib_dev, p_umv_buf); + if (IS_ERR(context_p)) { + err = PTR_ERR(context_p); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("mthca_alloc_ucontext failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_alloc_ucontext; + } + + /* allocate pd */ + umv_buf.output_size = sizeof(struct ibv_alloc_pd_resp); + umv_buf.p_inout_buf = &resp; + //NB: Pay attention ! Ucontext parameter is important here: + // when it is present (i.e. - for user space) - mthca_alloc_pd won't create MR + context_p->pd = ibv_alloc_pd(ib_dev, context_p, &umv_buf); + if (IS_ERR(context_p->pd)) { + err = PTR_ERR(context_p->pd); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("ibv_alloc_pd failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_alloc_pd; + } + + // fill more parameters for user (sanity checks are in mthca_alloc_ucontext) + uresp_p = (struct mthca_alloc_ucontext_resp *)p_umv_buf->p_inout_buf; + uresp_p->uar_addr = (uint64_t)(UINT_PTR)context_p->user_uar; + uresp_p->pd_handle = resp.pd_handle; + uresp_p->pdn = resp.pdn; + uresp_p->vend_id = (uint32_t)ext_p->hcaConfig.VendorID; + uresp_p->dev_id = (uint16_t)ext_p->hcaConfig.DeviceID; + + // return the result + if (ph_um_ca) *ph_um_ca = (ib_ca_handle_t)context_p; + + status = IB_SUCCESS; + goto end; + +err_alloc_pd: + mthca_dealloc_ucontext(context_p); +err_alloc_ucontext: +mlnx_um_open_err: +end: + if (p_umv_buf && p_umv_buf->command) + p_umv_buf->status = status; + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("completes with ERROR status %s\n", ib_get_err_str(status))); + return status; + #else ib_api_status_t status; @@ -811,12 +810,12 @@ mlnx_um_open( mlnx_um_ca_t *p_um_ca; MOSAL_protection_ctx_t prot_ctx; - HCA_ENTER( MLNX_DBG_TRACE ); + HCA_ENTER( TRACE_LEVEL_VERBOSE ); mlnx_hobs_get_handle( hob_p, &hh_hndl ); if( !hh_hndl ) { - HCA_TRACE(MLNX_DBG_INFO, ("returning E_NODEV dev\n")); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM ,("returning E_NODEV dev\n")); status = IB_INVALID_CA_HANDLE; goto mlnx_um_open_err1; } @@ -840,7 +839,7 @@ mlnx_um_open( goto mlnx_um_open_err1; } - HCA_TRACE( MLNX_DBG_TRACE, ("priv_op = %d\n", p_umv_buf->command )); + HCA_PRINT( TRACE_LEVEL_VERBOSE ,HCA_DBG_SHIM ,("priv_op = %d\n", p_umv_buf->command )); /* Yes, UVP request for hca_ul_info. */ p_um_ca = (mlnx_um_ca_t*)cl_zalloc( @@ -879,7 +878,7 @@ mlnx_um_open( prot_ctx = MOSAL_get_current_prot_ctx(); if( THH_hob_alloc_ul_res(hh_hndl, prot_ctx, p_um_ca->ul_hca_res) != HH_OK ) { - HCA_TRACE( CL_DBG_ERROR, ("Failed to get ul_res\n")); + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Failed to get ul_res\n")); p_umv_buf->status = IB_ERROR; } @@ -904,28 +903,28 @@ mlnx_um_open_err1: //*ph_um_ca = NULL; p_umv_buf->output_size = sizeof(void*); - HCA_EXIT( MLNX_DBG_TRACE ); + HCA_EXIT( TRACE_LEVEL_VERBOSE ); return p_umv_buf->status; #endif } - static void mlnx_um_close( IN ib_ca_handle_t h_ca, IN ib_ca_handle_t h_um_ca ) { #ifndef WIN_TO_BE_CHANGED - UNREFERENCED_PARAMETER(h_ca); - UNREFERENCED_PARAMETER(h_um_ca); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_um_close not implemented\n")); - return ; + + UNREFERENCED_PARAMETER(h_ca); + ibv_um_close((struct ib_ucontext *)h_um_ca); + return; + #else mlnx_hob_t *hob_p = (mlnx_hob_t *)h_ca; HH_hca_hndl_t hh_hndl = NULL; mlnx_um_ca_t *p_um_ca = (mlnx_um_ca_t*)h_um_ca; - HCA_ENTER( MLNX_DBG_TRACE ); + HCA_ENTER( TRACE_LEVEL_VERBOSE ); mlnx_hobs_get_handle( hob_p, &hh_hndl ); if( !hh_hndl ) @@ -941,7 +940,7 @@ mlnx_um_close_cleanup: IoFreeMdl( p_um_ca->p_mdl ); cl_free( p_um_ca ); - HCA_EXIT( MLNX_DBG_TRACE ); + HCA_EXIT( TRACE_LEVEL_VERBOSE ); #endif } @@ -960,76 +959,44 @@ mlnx_allocate_pd ( #ifndef WIN_TO_BE_CHANGED ib_api_status_t status; - mlnx_hob_t *hob_p = (mlnx_hob_t *)h_ca; - struct ib_device *ib_dev = IBDEV_FROM_HOB( hob_p ); - struct ib_ucontext *context_p = NULL; - struct ib_udata *udata_p = NULL; + struct ib_device *ib_dev; + struct ib_ucontext *context_p; struct ib_pd *ib_pd_p; int err; //TODO: how are we use it ? UNREFERENCED_PARAMETER(type); - HCA_ENTER(MLNX_DBG_TRACE); + HCA_ENTER(HCA_DBG_SHIM); - // sanity checks if( p_umv_buf && p_umv_buf->command ) { - HCA_TRACE (CL_DBG_ERROR, ("User mode is not supported yet\n")); - status = IB_UNSUPPORTED; - goto err_user_unsupported; + context_p = (struct ib_ucontext *)h_ca; + ib_dev = context_p->device; } - -#ifdef WIN_USER_SUPPORT - if( p_umv_buf && p_umv_buf->command ) - { - //TODO: check the below sanity check - if ((p_umv_buf->input_size - sizeof (u_int32_t)) != - hca_ul_info->pd_ul_resources_sz || - NULL == p_umv_buf->p_inout_buf) { - status = IB_INVALID_PARAMETER; - goto cleanup; - } - pd_ul_resources_p = (void *)p_umv_buf->p_inout_buf; - - //TODO: create user context by call to mthca_alloc_ucontext() + else { + mlnx_hob_t *hob_p = (mlnx_hob_t *)h_ca; + context_p = NULL; + ib_dev = IBDEV_FROM_HOB( hob_p ); } -#endif - + // create PD - if( p_umv_buf && p_umv_buf->command ) { - //TODO: call uverbs - } - else { - ib_pd_p = ib_alloc_pd(ib_dev); - if (IS_ERR(ib_pd_p)) { - err = PTR_ERR(ib_pd_p); - HCA_TRACE (CL_DBG_ERROR, ("ib_alloc_pd failed (%d)\n", err)); - status = errno_to_iberr(err); - goto err_alloc_pd; - } + ib_pd_p = ibv_alloc_pd(ib_dev, context_p, p_umv_buf); + if (IS_ERR(ib_pd_p)) { + err = PTR_ERR(ib_pd_p); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("ibv_alloc_pd failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_alloc_pd; } - // return the result - if (ph_pd) *ph_pd = (ib_pd_handle_t)ib_pd_p; -#ifdef WIN_USER_SUPPORT - if( p_umv_buf && p_umv_buf->command ) - { - p_umv_buf->output_size = p_umv_buf->input_size; - /* - * Copy the pd_idx back to user - */ - cl_memcpy (((uint8_t* __ptr64)p_umv_buf->p_inout_buf + hca_ul_info->pd_ul_resources_sz), - &pd_idx, sizeof (pd_idx)); - p_umv_buf->status = IB_SUCCESS; - } -#endif + // return the result + if (ph_pd) *ph_pd = (ib_pd_handle_t)ib_pd_p; - status = IB_SUCCESS; + status = IB_SUCCESS; err_alloc_pd: -err_user_unsupported: - HCA_TRACE_ERR(CL_DBG_ERROR, ("completes with ERROR status %s\n", ib_get_err_str(status))); - HCA_EXIT(MLNX_DBG_TRACE); + if (p_umv_buf && p_umv_buf->command) + p_umv_buf->status = status; + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("completes with ERROR status %s\n", ib_get_err_str(status))); return status; #else @@ -1038,11 +1005,11 @@ err_user_unsupported: HH_hca_dev_t *hca_ul_info; HHUL_pd_hndl_t hhul_pd_hndl = 0; void *pd_ul_resources_p = NULL; - u_int32_t pd_idx; + uint32_t pd_idx; ib_api_status_t status; MOSAL_protection_ctx_t prot_ctx; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_SHIM); hobul_p = mlnx_hobs_get_hobul(hob_p); if (NULL == hobul_p) { @@ -1059,7 +1026,7 @@ err_user_unsupported: if( p_umv_buf && p_umv_buf->command ) { // For user mode calls - obtain and verify the vendor information - if ((p_umv_buf->input_size - sizeof (u_int32_t)) != + if ((p_umv_buf->input_size - sizeof (uint32_t)) != hca_ul_info->pd_ul_resources_sz || NULL == p_umv_buf->p_inout_buf) { status = IB_INVALID_PARAMETER; @@ -1141,7 +1108,7 @@ err_user_unsupported: cl_atomic_inc( &hobul_p->count ); if (ph_pd) *ph_pd = (ib_pd_handle_t)PD_HNDL_FROM_PD(pd_idx); - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("hca_idx 0x%x pd_idx 0x%x returned 0x%p\n", hob_p->index, pd_idx, *ph_pd)); + HCA_PRINT(TRACE_LEVEL_INFORMATION , HCA_DBG_SHIM ,("hca_idx 0x%x pd_idx 0x%x returned 0x%p\n", hob_p->index, pd_idx, *ph_pd)); if( p_umv_buf && p_umv_buf->command ) { @@ -1153,7 +1120,7 @@ err_user_unsupported: &pd_idx, sizeof (pd_idx)); p_umv_buf->status = IB_SUCCESS; } - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_SHIM); return IB_SUCCESS; cleanup_pd: @@ -1165,11 +1132,10 @@ cleanup: cl_free( pd_ul_resources_p); if( p_umv_buf && p_umv_buf->command ) { - p_umv_buf->output_size = 0; p_umv_buf->status = status; } - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("status %d \n", status)); + HCA_EXIT( HCA_DBG_SHIM); return status; #endif } @@ -1184,33 +1150,32 @@ mlnx_deallocate_pd ( int err; struct ib_pd *ib_pd_p = (struct ib_pd *)h_pd; + HCA_ENTER( HCA_DBG_QP); + + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM , + ("pcs %p\n", PsGetCurrentProcess())); + // dealloc pd - if( ib_pd_p->uobject ) { - //TODO: call uverbs - } - else { - err = ib_dealloc_pd( ib_pd_p ); - if (err) { - HCA_TRACE (CL_DBG_ERROR, ("ib_dealloc_pd failed (%d)\n", err)); - status = errno_to_iberr(err); - goto err_dealloc_pd; - } + err = ibv_dealloc_pd( ib_pd_p ); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("ibv_dealloc_pd failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_dealloc_pd; } - status = IB_SUCCESS; + status = IB_SUCCESS; err_dealloc_pd: - HCA_TRACE_ERR(CL_DBG_ERROR, ("completes with ERROR status %s\n", ib_get_err_str(status))); - HCA_EXIT(MLNX_DBG_TRACE); + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("completes with ERROR status %s\n", ib_get_err_str(status))); return status; #else - u_int32_t hca_idx = PD_HCA_FROM_HNDL(h_pd); - u_int32_t pd_idx = PD_NUM_FROM_HNDL(h_pd); + uint32_t hca_idx = PD_HCA_FROM_HNDL(h_pd); + uint32_t pd_idx = PD_NUM_FROM_HNDL(h_pd); mlnx_hobul_t *hobul_p; HHUL_pd_hndl_t hhul_pd_hndl; ib_api_status_t status; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_SHIM); VALIDATE_INDEX(hca_idx, MLNX_MAX_HCA, IB_INVALID_CA_HANDLE, cleanup); hobul_p = mlnx_hobul_array[hca_idx]; @@ -1226,10 +1191,12 @@ err_dealloc_pd: cl_mutex_acquire(&hobul_p->pd_info_tbl[pd_idx].mutex); - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("pd %d count %d k_mod %d\n", pd_idx, hobul_p->pd_info_tbl[pd_idx].count, hobul_p->pd_info_tbl[pd_idx].kernel_mode)); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM,("pd %d count %d k_mod %d\n", pd_idx, + hobul_p->pd_info_tbl[pd_idx].count, hobul_p->pd_info_tbl[pd_idx].kernel_mode)); if (0 != hobul_p->pd_info_tbl[pd_idx].count) { - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("pd %d count %d\n", pd_idx, hobul_p->pd_info_tbl[pd_idx].count)); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM,("pd %d count %d\n", + pd_idx, hobul_p->pd_info_tbl[pd_idx].count)); status = IB_RESOURCE_BUSY; goto cleanup_locked; } @@ -1244,15 +1211,15 @@ err_dealloc_pd: } } - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("pd %d before free_pd hh_hndl %p\n", - pd_idx, hobul_p->hh_hndl)); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM ,("pd %d before free_pd hh_hndl %p\n", + pd_idx, hobul_p->hh_hndl); if (HH_OK != THH_hob_free_pd(hobul_p->hh_hndl, pd_idx)) { status = IB_ERROR; goto cleanup_locked; } - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("pd %d after free_pd\n", pd_idx)); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM,"pd %d after free_pd\n", pd_idx) if (hobul_p->pd_info_tbl[pd_idx].kernel_mode) { if (HH_OK != THHUL_pdm_free_pd_done(hobul_p->hhul_hndl, hhul_pd_hndl)) { @@ -1269,15 +1236,15 @@ err_dealloc_pd: cl_mutex_release(&hobul_p->pd_info_tbl[pd_idx].mutex); cl_atomic_dec( &hobul_p->count ); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_SHIM); return IB_SUCCESS; cleanup_locked: cl_mutex_release(&hobul_p->pd_info_tbl[pd_idx].mutex); cleanup: - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("status %d\n", status)); + HCA_EXIT( HCA_DBG_SHIM); return status; #endif } @@ -1293,15 +1260,63 @@ mlnx_create_av ( IN OUT ci_umv_buf_t *p_umv_buf ) { #ifndef WIN_TO_BE_CHANGED - UNREFERENCED_PARAMETER(h_pd); - UNREFERENCED_PARAMETER(p_addr_vector); - UNREFERENCED_PARAMETER(ph_av); - UNREFERENCED_PARAMETER(p_umv_buf); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_create_av not implemented\n")); - return IB_INVALID_CA_HANDLE; + + int err = 0; + ib_api_status_t status = IB_SUCCESS; + struct ib_pd *ib_pd_p = (struct ib_pd *)h_pd; + struct ib_device *ib_dev_p = ib_pd_p->device; + struct ib_ah *ib_av_p; + struct ib_ah_attr ah_attr; + struct ib_ucontext *context_p = NULL; + struct ib_udata *udata_p = NULL; + + HCA_ENTER(HCA_DBG_QP); + + if( p_umv_buf && p_umv_buf->command ) { + // sanity checks + if (p_umv_buf->input_size < sizeof(struct ibv_create_ah) || + p_umv_buf->output_size < sizeof(struct ibv_create_ah_resp) || + !p_umv_buf->p_inout_buf) { + status = IB_INVALID_PARAMETER; + goto err_inval_params; + } + context_p = ib_pd_p->ucontext; + } + else + context_p = NULL; + + // fill parameters + RtlZeroMemory(&ah_attr, sizeof(ah_attr)); + mlnx_conv_ibal_av( ib_dev_p, p_addr_vector, &ah_attr ); + + ib_av_p = ibv_create_ah(ib_pd_p, &ah_attr, context_p, p_umv_buf); + if (IS_ERR(ib_pd_p)) { + err = PTR_ERR(ib_pd_p); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("ibv_alloc_pd failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_alloc_av; + } + + // return the result + if (ph_av) *ph_av = (ib_av_handle_t)ib_av_p; + + if( context_p ) + { + struct ibv_create_ah_resp *create_ah_resp = (struct ibv_create_ah_resp *)p_umv_buf->p_inout_buf; + cl_memcpy( &create_ah_resp->av_attr, p_addr_vector, sizeof(create_ah_resp->av_attr) ); + p_umv_buf->status = IB_SUCCESS; + } + + status = IB_SUCCESS; + +err_alloc_av: +err_inval_params: + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("completes with ERROR status %s\n", ib_get_err_str(status))); + return status; + #else - u_int32_t hca_idx = PD_HCA_FROM_HNDL(h_pd); - u_int32_t pd_idx = PD_NUM_FROM_HNDL(h_pd); + uint32_t hca_idx = PD_HCA_FROM_HNDL(h_pd); + uint32_t pd_idx = PD_NUM_FROM_HNDL(h_pd); HHUL_ud_av_hndl_t av_h; mlnx_hobul_t *hobul_p; mlnx_avo_t *avo_p = NULL; @@ -1310,7 +1325,7 @@ mlnx_create_av ( VAPI_ud_av_t av; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_SHIM); VALIDATE_INDEX(hca_idx, MLNX_MAX_HCA, IB_INVALID_CA_HANDLE, cleanup); hobul_p = mlnx_hobul_array[hca_idx]; @@ -1341,7 +1356,7 @@ mlnx_create_av ( // update PD object count cl_atomic_inc(&hobul_p->pd_info_tbl[pd_idx].count); - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("pd %d count %d\n", pd_idx, hobul_p->pd_info_tbl[pd_idx].count)); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM,"pd %d count %d\n", pd_idx, hobul_p->pd_info_tbl[pd_idx].count) avo_p->mark = E_MARK_AV; @@ -1355,8 +1370,8 @@ mlnx_create_av ( { p_umv_buf->status = IB_SUCCESS; } - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); - return IB_SUCCESS; + status = IB_SUCCESS; + goto end; cleanup: if (avo_p) { @@ -1365,12 +1380,10 @@ cleanup: } if( p_umv_buf && p_umv_buf->command ) { - p_umv_buf->output_size = 0; p_umv_buf->status = status; } - - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); +end: + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("status %d \n", status)); return status; #endif } @@ -1383,12 +1396,49 @@ mlnx_query_av ( IN OUT ci_umv_buf_t *p_umv_buf ) { #ifndef WIN_TO_BE_CHANGED - UNREFERENCED_PARAMETER(h_av); - UNREFERENCED_PARAMETER(p_addr_vector); - UNREFERENCED_PARAMETER(ph_pd); - UNREFERENCED_PARAMETER(p_umv_buf); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_query_av not implemented\n")); - return IB_UNSUPPORTED; + + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_ah *ib_ah_p = (struct ib_ah *)h_av; + + HCA_ENTER(HCA_DBG_SHIM); + + // sanity checks + if( p_umv_buf && p_umv_buf->command ) { + HCA_PRINT (TRACE_LEVEL_ERROR, HCA_DBG_SHIM,("User mode is not supported yet\n")); + status = IB_UNSUPPORTED; + goto err_user_unsupported; + } + + // query AV +#if 0 + //TODO: not implemented in low-level driver + err = ibv_query_ah(ib_ah_p, &ah_attr) + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_SHIM, ("ibv_query_ah failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_query_ah; + } + // convert to IBAL structure: something like that + mlnx_conv_mthca_av( p_addr_vector, &ah_attr ); +#else + + err = mlnx_conv_mthca_av( ib_ah_p, p_addr_vector ); + if (err) { + HCA_PRINT (TRACE_LEVEL_ERROR, HCA_DBG_SHIM,("mlnx_conv_mthca_av failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_conv_mthca_av; + } +#endif + + // results + *ph_pd = (ib_pd_handle_t)ib_ah_p->pd; + +err_conv_mthca_av: +err_user_unsupported: + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("completes with ERROR status %s\n", ib_get_err_str(status))); + return status; + #else mlnx_avo_t *avo_p = (mlnx_avo_t *)h_av; mlnx_hobul_t *hobul_p; @@ -1396,7 +1446,7 @@ mlnx_query_av ( VAPI_ud_av_t av; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_SHIM); if (!avo_p || avo_p->mark != E_MARK_AV) { status = IB_INVALID_AV_HANDLE; goto cleanup; @@ -1429,17 +1479,16 @@ mlnx_query_av ( p_umv_buf->output_size = 0; p_umv_buf->status = IB_SUCCESS; } - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_SHIM); return IB_SUCCESS; cleanup: if( p_umv_buf && p_umv_buf->command ) { - p_umv_buf->output_size = 0; p_umv_buf->status = status; } - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("status %d \n", status)); + HCA_EXIT( HCA_DBG_SHIM); return status; #endif } @@ -1451,11 +1500,43 @@ mlnx_modify_av ( IN OUT ci_umv_buf_t *p_umv_buf ) { #ifndef WIN_TO_BE_CHANGED - UNREFERENCED_PARAMETER(h_av); - UNREFERENCED_PARAMETER(p_addr_vector); - UNREFERENCED_PARAMETER(p_umv_buf); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_modify_av not implemented\n")); - return IB_UNSUPPORTED; + + int err; + struct ib_ah_attr ah_attr; + ib_api_status_t status = IB_SUCCESS; + struct ib_ah *ib_ah_p = (struct ib_ah *)h_av; + struct ib_device *ib_dev_p = ib_ah_p->pd->device; + + HCA_ENTER(HCA_DBG_SHIM); + + // sanity checks + if( p_umv_buf && p_umv_buf->command ) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("User mode is not supported yet\n")); + status = IB_UNSUPPORTED; + goto err_user_unsupported; + } + + // fill parameters + mlnx_conv_ibal_av( ib_dev_p, p_addr_vector, &ah_attr ); + + // modify AH +#if 0 + //TODO: not implemented in low-level driver + err = ibv_modify_ah(ib_ah_p, &ah_attr) + if (err) { + HCA_PRINT (TRACE_LEVEL_ERROR, HCA_DBG_SHIM,("ibv_query_ah failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_query_ah; + } +#else + + mlnx_modify_ah( ib_ah_p, &ah_attr ); +#endif + +err_user_unsupported: + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("completes with ERROR status %s\n", ib_get_err_str(status))); + return status; + #else mlnx_avo_t *avo_p = (mlnx_avo_t *)h_av; mlnx_hobul_t *hobul_p; @@ -1463,7 +1544,7 @@ mlnx_modify_av ( VAPI_ud_av_t av; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_SHIM); if (!avo_p || avo_p->mark != E_MARK_AV) { status = IB_INVALID_AV_HANDLE; goto cleanup; @@ -1488,17 +1569,16 @@ mlnx_modify_av ( p_umv_buf->output_size = 0; p_umv_buf->status = IB_SUCCESS; } - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_SHIM); return IB_SUCCESS; cleanup: if( p_umv_buf && p_umv_buf->command ) { - p_umv_buf->output_size = 0; p_umv_buf->status = status; } - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("status %d\n",status)); + HCA_EXIT( HCA_DBG_SHIM); return status; #endif } @@ -1508,15 +1588,31 @@ mlnx_destroy_av ( IN const ib_av_handle_t h_av) { #ifndef WIN_TO_BE_CHANGED - UNREFERENCED_PARAMETER(h_av); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_destroy_av not implemented\n")); - return IB_INVALID_CA_HANDLE; + + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_ah *ib_ah_p = (struct ib_ah *)h_av; + + HCA_ENTER(HCA_DBG_SHIM); + + // destroy AV + err = ibv_destroy_ah( ib_ah_p ); + if (err) { + HCA_PRINT (TRACE_LEVEL_ERROR ,HCA_DBG_SHIM,("ibv_destroy_ah failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_destroy_ah; + } + +err_destroy_ah: + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("completes with ERROR status %s\n", ib_get_err_str(status))); + return status; + #else mlnx_avo_t *avo_p = (mlnx_avo_t *)h_av; mlnx_hobul_t *hobul_p; ib_api_status_t status; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_SHIM); if (!avo_p || avo_p->mark != E_MARK_AV) { status = IB_INVALID_AV_HANDLE; goto cleanup; @@ -1543,11 +1639,11 @@ mlnx_destroy_av ( // update PD object count cl_atomic_dec(&hobul_p->pd_info_tbl[avo_p->pd_idx].count); - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("pd %d count %d\n", avo_p->pd_idx, hobul_p->pd_info_tbl[avo_p->pd_idx].count)); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM,("pd %d count %d\n", avo_p->pd_idx, hobul_p->pd_info_tbl[avo_p->pd_idx].count)); avo_p->mark = E_MARK_INVALID; cl_free( avo_p); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_SHIM); return IB_SUCCESS; cleanup: @@ -1555,8 +1651,8 @@ cleanup: avo_p->mark = E_MARK_INVALID; cl_free( avo_p); } - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("status %d\n", status)); + HCA_EXIT( HCA_DBG_SHIM); return status; #endif } @@ -1565,146 +1661,144 @@ cleanup: * Queue Pair Management Verbs */ -ib_api_status_t -mlnx_create_qp ( + +static ib_api_status_t +_create_qp ( IN const ib_pd_handle_t h_pd, + IN const uint8_t port_num, IN const void *qp_context, IN const ib_qp_create_t *p_create_attr, OUT ib_qp_attr_t *p_qp_attr, OUT ib_qp_handle_t *ph_qp, IN OUT ci_umv_buf_t *p_umv_buf ) { -#ifndef WIN_TO_BE_CHANGED - - int err; - ib_api_status_t status; - struct ib_qp * ib_qp_p; - struct mthca_qp *qp_p; - struct ib_qp_init_attr qp_init_attr; - struct ib_ucontext *context_p = NULL; - struct ib_udata *udata_p = NULL; - struct ib_pd *ib_pd_p = (struct ib_pd *)h_pd; - struct ib_device *ib_dev = ib_pd_p->device; - mlnx_hob_t *hob_p = (mlnx_hob_t *)&ib_dev->mdev->ext->hca.hob; - - HCA_ENTER(MLNX_DBG_TRACE); + int err; + ib_api_status_t status; + struct ib_qp * ib_qp_p; + struct mthca_qp *qp_p; + struct ib_qp_init_attr qp_init_attr; + struct ib_ucontext *context_p = NULL; + struct ib_udata *udata_p = NULL; + struct ib_pd *ib_pd_p = (struct ib_pd *)h_pd; + struct ib_device *ib_dev = ib_pd_p->device; + mlnx_hob_t *hob_p = HOB_FROM_IBDEV(ib_dev); + + HCA_ENTER(HCA_DBG_QP); - // sanity checks - if( p_umv_buf && p_umv_buf->command ) { - HCA_TRACE (CL_DBG_ERROR, ("User mode is not supported yet\n")); - status = IB_UNSUPPORTED; - goto err_user_unsupported; - } -#ifdef WIN_USER_SUPPORT - if( p_umv_buf && p_umv_buf->command ) - { - //TODO: check the below sanity check - if ((p_umv_buf->input_size - sizeof (u_int32_t)) != - hca_ul_info->pd_ul_resources_sz || - NULL == p_umv_buf->p_inout_buf) { + if( p_umv_buf && p_umv_buf->command ) { + // sanity checks + if (p_umv_buf->input_size < sizeof(struct ibv_create_qp) || + p_umv_buf->output_size < sizeof(struct ibv_create_qp_resp) || + !p_umv_buf->p_inout_buf) { status = IB_INVALID_PARAMETER; - goto cleanup; + goto err_inval_params; } - - //TODO: create user context by call to mthca_alloc_ucontext() - } -#endif - - // prepare the parameters - RtlZeroMemory(&qp_init_attr, sizeof(qp_init_attr)); - qp_init_attr.qp_type = p_create_attr->qp_type; - qp_init_attr.event_handler = qp_event_handler; - qp_init_attr.qp_context = hob_p; - qp_init_attr.recv_cq = (struct ib_cq *)p_create_attr->h_rq_cq; - qp_init_attr.send_cq = (struct ib_cq *)p_create_attr->h_sq_cq; - qp_init_attr.cap.max_recv_sge = p_create_attr->rq_sge; - qp_init_attr.cap.max_send_sge = p_create_attr->sq_sge; - qp_init_attr.cap.max_recv_wr = p_create_attr->rq_depth; - qp_init_attr.cap.max_send_wr = p_create_attr->sq_depth; - qp_init_attr.sq_sig_type = (p_create_attr->sq_signaled) ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; - - // create QP - if( p_umv_buf && p_umv_buf->command ) { - //TODO: call uverbs - } - else { - ib_qp_p = ib_create_qp( ib_pd_p, &qp_init_attr ); + context_p = ib_pd_p->ucontext; + } + else + context_p = NULL; + + // prepare the parameters + RtlZeroMemory(&qp_init_attr, sizeof(qp_init_attr)); + qp_init_attr.qp_type = p_create_attr->qp_type; + qp_init_attr.event_handler = qp_event_handler; + qp_init_attr.qp_context = hob_p; + qp_init_attr.recv_cq = (struct ib_cq *)p_create_attr->h_rq_cq; + qp_init_attr.send_cq = (struct ib_cq *)p_create_attr->h_sq_cq; + qp_init_attr.cap.max_recv_sge = p_create_attr->rq_sge; + qp_init_attr.cap.max_send_sge = p_create_attr->sq_sge; + qp_init_attr.cap.max_recv_wr = p_create_attr->rq_depth; + qp_init_attr.cap.max_send_wr = p_create_attr->sq_depth; + qp_init_attr.sq_sig_type = (p_create_attr->sq_signaled) ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; + qp_init_attr.port_num = port_num; + + + // create qp + ib_qp_p = ibv_create_qp( ib_pd_p, &qp_init_attr, context_p, p_umv_buf ); if (IS_ERR(ib_qp_p)) { err = PTR_ERR(ib_qp_p); - HCA_TRACE (CL_DBG_ERROR, ("ib_create_qp failed (%d)\n", err)); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_QP ,("ibv_create_qp failed (%d)\n", err)); status = errno_to_iberr(err); goto err_create_qp; } - } - - // fill the object - qp_p = (struct mthca_qp *)ib_qp_p; - qp_p->qp_context = (void*)qp_context; - - // Query QP to obtain requested attributes - if (p_qp_attr) { - status = mlnx_query_qp ((ib_qp_handle_t)ib_qp_p, p_qp_attr, p_umv_buf); - if (status != IB_SUCCESS) - goto err_query_qp; - } - // return the results - if (ph_qp) *ph_qp = (ib_qp_handle_t)ib_qp_p; + // fill the object + qp_p = (struct mthca_qp *)ib_qp_p; + qp_p->qp_context = (void*)qp_context; + qp_p->qp_init_attr = qp_init_attr; + + // Query QP to obtain requested attributes + if (p_qp_attr) { + status = mlnx_query_qp ((ib_qp_handle_t)ib_qp_p, p_qp_attr, p_umv_buf); + if (status != IB_SUCCESS) + goto err_query_qp; + } + + // return the results + if (ph_qp) *ph_qp = (ib_qp_handle_t)ib_qp_p; + + status = IB_SUCCESS; + goto end; + + err_query_qp: + ibv_destroy_qp( ib_qp_p ); + err_create_qp: + err_inval_params: + end: + if (p_umv_buf && p_umv_buf->command) + p_umv_buf->status = status; + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("completes with ERROR status %s\n", ib_get_err_str(status))); + return status; +} -#ifdef WIN_USER_SUPPORT - if( p_umv_buf && p_umv_buf->command ) - { - p_umv_buf->output_size = p_umv_buf->input_size; - /* - * Copy the pd_idx back to user - */ - cl_memcpy (((uint8_t* __ptr64)p_umv_buf->p_inout_buf + hca_ul_info->pd_ul_resources_sz), - &pd_idx, sizeof (pd_idx)); - p_umv_buf->status = IB_SUCCESS; - } -#endif +ib_api_status_t +mlnx_create_spl_qp ( + IN const ib_pd_handle_t h_pd, + IN const uint8_t port_num, + IN const void *qp_context, + IN const ib_qp_create_t *p_create_attr, + OUT ib_qp_attr_t *p_qp_attr, + OUT ib_qp_handle_t *ph_qp ) +{ +#ifndef WIN_TO_BE_CHANGED - status = IB_SUCCESS; - + ib_api_status_t status; -err_query_qp: - if( p_umv_buf && p_umv_buf->command ) { - //TODO: cleanup for user - } - else { - ib_destroy_qp( ib_qp_p ); - } -err_create_qp: -err_user_unsupported: - HCA_TRACE_ERR(CL_DBG_ERROR, ("completes with ERROR status %s\n", ib_get_err_str(status))); - HCA_EXIT(MLNX_DBG_TRACE); + HCA_ENTER(HCA_DBG_SHIM); + + status = _create_qp( h_pd, port_num, + qp_context, p_create_attr, p_qp_attr, ph_qp, NULL ); + + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("completes with ERROR status %s\n", ib_get_err_str(status))); return status; #else ib_api_status_t status; ib_qp_handle_t h_qp; + ci_umv_buf_t *p_umv_buf = NULL; - u_int32_t hca_idx = PD_HCA_FROM_HNDL(h_pd); - u_int32_t pd_idx = PD_NUM_FROM_HNDL(h_pd); - u_int32_t qp_num; - u_int32_t qp_idx; - u_int32_t send_cq_num; - u_int32_t send_cq_idx; - u_int32_t recv_cq_num; - u_int32_t recv_cq_idx; + uint32_t hca_idx = PD_HCA_FROM_HNDL(h_pd); + uint32_t pd_idx = PD_NUM_FROM_HNDL(h_pd); + uint32_t qp_num; + uint32_t qp_idx; + uint32_t send_cq_num; + uint32_t send_cq_idx; + uint32_t recv_cq_num; + uint32_t recv_cq_idx; mlnx_hobul_t *hobul_p; HH_hca_dev_t *hca_ul_info; HH_qp_init_attr_t hh_qp_init_attr; HHUL_qp_init_attr_t ul_qp_init_attr; HHUL_qp_hndl_t hhul_qp_hndl = NULL; + VAPI_special_qp_t vapi_qp_type; VAPI_qp_cap_t hh_qp_cap; void *qp_ul_resources_p = NULL; VAPI_sg_lst_entry_t *send_sge_p = NULL; VAPI_sg_lst_entry_t *recv_sge_p = NULL; - u_int32_t num_sge; + uint32_t num_sge; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_SHIM); VALIDATE_INDEX(hca_idx, MLNX_MAX_HCA, IB_INVALID_CA_HANDLE, cleanup); hobul_p = mlnx_hobul_array[hca_idx]; @@ -1732,7 +1826,7 @@ err_user_unsupported: // convert input parameters cl_memclr(&ul_qp_init_attr, sizeof(ul_qp_init_attr)); - mlnx_conv_qp_create_attr(p_create_attr, &ul_qp_init_attr, NULL); + mlnx_conv_qp_create_attr(p_create_attr, &ul_qp_init_attr, &vapi_qp_type); send_cq_num = CQ_NUM_FROM_HNDL(p_create_attr->h_sq_cq); recv_cq_num = CQ_NUM_FROM_HNDL(p_create_attr->h_rq_cq); send_cq_idx = send_cq_num & hobul_p->cq_idx_mask; @@ -1755,8 +1849,7 @@ err_user_unsupported: if( p_umv_buf && p_umv_buf->command ) { // For user mode calls - obtain and verify the vendor information - if ((p_umv_buf->input_size - sizeof (u_int32_t)) != - hca_ul_info->qp_ul_resources_sz || + if (p_umv_buf->input_size != hca_ul_info->qp_ul_resources_sz || NULL == p_umv_buf->p_inout_buf) { status = IB_INVALID_PARAMETER; goto cleanup; @@ -1764,19 +1857,25 @@ err_user_unsupported: qp_ul_resources_p = (void *)p_umv_buf->p_inout_buf; } else { - // for kernel mode calls - allocate app resources. Use prep->call->done sequence + // For kernel mode calls - allocate app resources. Use prep->call->done sequence qp_ul_resources_p = cl_zalloc( hca_ul_info->qp_ul_resources_sz); if (!qp_ul_resources_p) { status = IB_INSUFFICIENT_MEMORY; goto cleanup; } - if (HH_OK != THHUL_qpm_create_qp_prep(hobul_p->hhul_hndl, &ul_qp_init_attr, &hhul_qp_hndl, &hh_qp_cap, qp_ul_resources_p)) { - status = IB_ERROR; - goto cleanup; - } - // TBD: if not same report error to IBAL - ul_qp_init_attr.qp_cap = hh_qp_cap; // struct assign + if (HH_OK != THHUL_qpm_special_qp_prep(hobul_p->hhul_hndl, + vapi_qp_type, + port_num, + &ul_qp_init_attr, + &hhul_qp_hndl, + &hh_qp_cap, + qp_ul_resources_p)) { + status = IB_ERROR; + goto cleanup; + } + // TBD: if not same report error to IBAL + ul_qp_init_attr.qp_cap = hh_qp_cap; // struct assign } // Convert HHUL to HH structure (for HH create_qp) @@ -1795,12 +1894,18 @@ err_user_unsupported: hh_qp_init_attr.rq_cq = recv_cq_num; hh_qp_init_attr.sq_sig_type = ul_qp_init_attr.sq_sig_type; hh_qp_init_attr.rq_sig_type = ul_qp_init_attr.rq_sig_type; - hh_qp_init_attr.ts_type = ul_qp_init_attr.ts_type; + hh_qp_init_attr.ts_type = VAPI_TS_UD; hh_qp_init_attr.qp_cap = ul_qp_init_attr.qp_cap; // struct assign // Allocate the QP (cmdif) - if (HH_OK != THH_hob_create_qp(hobul_p->hh_hndl, &hh_qp_init_attr, qp_ul_resources_p, &qp_num)) { - status = IB_INSUFFICIENT_RESOURCES; + if (HH_OK != THH_hob_get_special_qp( hobul_p->hh_hndl, + vapi_qp_type, + port_num, + &hh_qp_init_attr, + qp_ul_resources_p, + &qp_num)) + { + status = IB_ERROR; goto cleanup_qp; } @@ -1832,8 +1937,6 @@ err_user_unsupported: // Save data refs for future use qp_idx = qp_num & hobul_p->qp_idx_mask; VALIDATE_INDEX(qp_idx, hobul_p->max_qp, IB_ERROR, cleanup_qp); - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("hobul_p 0x%p mask 0x%x qp_idx 0x%x qp_num 0x%x\n", - hobul_p, hobul_p->qp_idx_mask, qp_idx, qp_num)); h_qp = (ib_qp_handle_t)QP_HNDL_FROM_QP(qp_idx); cl_mutex_acquire(&h_qp->mutex); @@ -1852,21 +1955,24 @@ err_user_unsupported: h_qp->h_rq_cq = &hobul_p->cq_info_tbl[recv_cq_idx]; h_qp->kernel_mode = !(p_umv_buf && p_umv_buf->command); h_qp->mark = E_MARK_QP; - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("qp num 0x%x idx 0x%x cq_s 0x%x cq_r 0x%x\n", + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM,("qp num 0x%x idx 0x%x cq_s 0x%x cq_r 0x%x\n", qp_num, qp_idx, send_cq_idx, recv_cq_idx)); cl_mutex_release(&h_qp->mutex); + + /* Mark the CQ's associated with this special QP as being high priority. */ + cl_atomic_inc( &h_qp->h_sq_cq->spl_qp_cnt ); + KeSetImportanceDpc( &h_qp->h_sq_cq->dpc, HighImportance ); + cl_atomic_inc( &h_qp->h_rq_cq->spl_qp_cnt ); + KeSetImportanceDpc( &h_qp->h_rq_cq->dpc, HighImportance ); + // Update PD object count cl_atomic_inc(&hobul_p->pd_info_tbl[pd_idx].count); - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("pd %d count %d\n", pd_idx, hobul_p->pd_info_tbl[pd_idx].count)); + HCA_PRINT(TRACE_LEVEL_INFORMATION , HCA_DBG_SHIM ,("pd %d count %d\n", pd_idx, hobul_p->pd_info_tbl[pd_idx].count)); // Query QP to obtain requested attributes if (p_qp_attr) { - if (IB_SUCCESS != (status = mlnx_query_qp (h_qp, p_qp_attr, p_umv_buf))) - { - if( !(p_umv_buf && p_umv_buf->command) ) - goto cleanup_qp; - else - goto cleanup; + if (IB_SUCCESS != (status = mlnx_query_qp (h_qp, p_qp_attr, p_umv_buf))) { + goto cleanup; } } @@ -1875,13 +1981,8 @@ err_user_unsupported: { p_umv_buf->output_size = p_umv_buf->input_size; p_umv_buf->status = IB_SUCCESS; - /* - * Copy the qp_idx back to user - */ - cl_memcpy (((uint8_t* __ptr64)p_umv_buf->p_inout_buf + hca_ul_info->qp_ul_resources_sz), - &qp_num, sizeof (qp_num)); } - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_SHIM); return IB_SUCCESS; cleanup_qp: @@ -1891,63 +1992,67 @@ cleanup_qp: THHUL_qpm_destroy_qp_done(hobul_p->hhul_hndl, hhul_qp_hndl); cleanup: - if( !(p_umv_buf && p_umv_buf->command) && qp_ul_resources_p) + if( !(p_umv_buf && p_umv_buf->command) && qp_ul_resources_p ) cl_free( qp_ul_resources_p); if( p_umv_buf && p_umv_buf->command ) { - p_umv_buf->output_size = 0; p_umv_buf->status = status; } - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("stauts %d\n", status)); + HCA_EXIT( HCA_DBG_SHIM); return status; #endif } ib_api_status_t -mlnx_create_spl_qp ( +mlnx_create_qp ( IN const ib_pd_handle_t h_pd, - IN const uint8_t port_num, IN const void *qp_context, IN const ib_qp_create_t *p_create_attr, OUT ib_qp_attr_t *p_qp_attr, - OUT ib_qp_handle_t *ph_qp ) + OUT ib_qp_handle_t *ph_qp, + IN OUT ci_umv_buf_t *p_umv_buf ) { #ifndef WIN_TO_BE_CHANGED - UNREFERENCED_PARAMETER(h_pd); - UNREFERENCED_PARAMETER(qp_context); - UNREFERENCED_PARAMETER(p_create_attr); - UNREFERENCED_PARAMETER(p_qp_attr); - UNREFERENCED_PARAMETER(ph_qp); - UNREFERENCED_PARAMETER(port_num); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_create_spl_qp not implemented\n")); - return IB_UNSUPPORTED; + + ib_api_status_t status; + //NB: algorithm of mthca_alloc_sqp() requires port_num + // PRM states, that special pares are created in couples, so + // looks like we can put here port_num = 1 always + uint8_t port_num = 1; + + HCA_ENTER(HCA_DBG_SHIM); + + status = _create_qp( h_pd, port_num, + qp_context, p_create_attr, p_qp_attr, ph_qp, p_umv_buf ); + + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("completes with ERROR status %s\n", ib_get_err_str(status))); + return status; + #else ib_api_status_t status; ib_qp_handle_t h_qp; - ci_umv_buf_t *p_umv_buf = NULL; - u_int32_t hca_idx = PD_HCA_FROM_HNDL(h_pd); - u_int32_t pd_idx = PD_NUM_FROM_HNDL(h_pd); - u_int32_t qp_num; - u_int32_t qp_idx; - u_int32_t send_cq_num; - u_int32_t send_cq_idx; - u_int32_t recv_cq_num; - u_int32_t recv_cq_idx; + uint32_t hca_idx = PD_HCA_FROM_HNDL(h_pd); + uint32_t pd_idx = PD_NUM_FROM_HNDL(h_pd); + uint32_t qp_num; + uint32_t qp_idx; + uint32_t send_cq_num; + uint32_t send_cq_idx; + uint32_t recv_cq_num; + uint32_t recv_cq_idx; mlnx_hobul_t *hobul_p; HH_hca_dev_t *hca_ul_info; HH_qp_init_attr_t hh_qp_init_attr; HHUL_qp_init_attr_t ul_qp_init_attr; HHUL_qp_hndl_t hhul_qp_hndl = NULL; - VAPI_special_qp_t vapi_qp_type; VAPI_qp_cap_t hh_qp_cap; void *qp_ul_resources_p = NULL; VAPI_sg_lst_entry_t *send_sge_p = NULL; VAPI_sg_lst_entry_t *recv_sge_p = NULL; - u_int32_t num_sge; + uint32_t num_sge; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_QP); VALIDATE_INDEX(hca_idx, MLNX_MAX_HCA, IB_INVALID_CA_HANDLE, cleanup); hobul_p = mlnx_hobul_array[hca_idx]; @@ -1975,7 +2080,7 @@ mlnx_create_spl_qp ( // convert input parameters cl_memclr(&ul_qp_init_attr, sizeof(ul_qp_init_attr)); - mlnx_conv_qp_create_attr(p_create_attr, &ul_qp_init_attr, &vapi_qp_type); + mlnx_conv_qp_create_attr(p_create_attr, &ul_qp_init_attr, NULL); send_cq_num = CQ_NUM_FROM_HNDL(p_create_attr->h_sq_cq); recv_cq_num = CQ_NUM_FROM_HNDL(p_create_attr->h_rq_cq); send_cq_idx = send_cq_num & hobul_p->cq_idx_mask; @@ -1998,7 +2103,8 @@ mlnx_create_spl_qp ( if( p_umv_buf && p_umv_buf->command ) { // For user mode calls - obtain and verify the vendor information - if (p_umv_buf->input_size != hca_ul_info->qp_ul_resources_sz || + if ((p_umv_buf->input_size - sizeof (uint32_t)) != + hca_ul_info->qp_ul_resources_sz || NULL == p_umv_buf->p_inout_buf) { status = IB_INVALID_PARAMETER; goto cleanup; @@ -2006,25 +2112,19 @@ mlnx_create_spl_qp ( qp_ul_resources_p = (void *)p_umv_buf->p_inout_buf; } else { - // For kernel mode calls - allocate app resources. Use prep->call->done sequence + // for kernel mode calls - allocate app resources. Use prep->call->done sequence qp_ul_resources_p = cl_zalloc( hca_ul_info->qp_ul_resources_sz); if (!qp_ul_resources_p) { status = IB_INSUFFICIENT_MEMORY; goto cleanup; } - if (HH_OK != THHUL_qpm_special_qp_prep(hobul_p->hhul_hndl, - vapi_qp_type, - port_num, - &ul_qp_init_attr, - &hhul_qp_hndl, - &hh_qp_cap, - qp_ul_resources_p)) { - status = IB_ERROR; - goto cleanup; - } - // TBD: if not same report error to IBAL - ul_qp_init_attr.qp_cap = hh_qp_cap; // struct assign + if (HH_OK != THHUL_qpm_create_qp_prep(hobul_p->hhul_hndl, &ul_qp_init_attr, &hhul_qp_hndl, &hh_qp_cap, qp_ul_resources_p)) { + status = IB_ERROR; + goto cleanup; + } + // TBD: if not same report error to IBAL + ul_qp_init_attr.qp_cap = hh_qp_cap; // struct assign } // Convert HHUL to HH structure (for HH create_qp) @@ -2043,18 +2143,12 @@ mlnx_create_spl_qp ( hh_qp_init_attr.rq_cq = recv_cq_num; hh_qp_init_attr.sq_sig_type = ul_qp_init_attr.sq_sig_type; hh_qp_init_attr.rq_sig_type = ul_qp_init_attr.rq_sig_type; - hh_qp_init_attr.ts_type = VAPI_TS_UD; + hh_qp_init_attr.ts_type = ul_qp_init_attr.ts_type; hh_qp_init_attr.qp_cap = ul_qp_init_attr.qp_cap; // struct assign // Allocate the QP (cmdif) - if (HH_OK != THH_hob_get_special_qp( hobul_p->hh_hndl, - vapi_qp_type, - port_num, - &hh_qp_init_attr, - qp_ul_resources_p, - &qp_num)) - { - status = IB_ERROR; + if (HH_OK != THH_hob_create_qp(hobul_p->hh_hndl, &hh_qp_init_attr, qp_ul_resources_p, &qp_num)) { + status = IB_INSUFFICIENT_RESOURCES; goto cleanup_qp; } @@ -2086,6 +2180,8 @@ mlnx_create_spl_qp ( // Save data refs for future use qp_idx = qp_num & hobul_p->qp_idx_mask; VALIDATE_INDEX(qp_idx, hobul_p->max_qp, IB_ERROR, cleanup_qp); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_QP,("hobul_p 0x%p mask 0x%x qp_idx 0x%x qp_num 0x%x\n", + hobul_p, hobul_p->qp_idx_mask, qp_idx, qp_num)); h_qp = (ib_qp_handle_t)QP_HNDL_FROM_QP(qp_idx); cl_mutex_acquire(&h_qp->mutex); @@ -2104,24 +2200,21 @@ mlnx_create_spl_qp ( h_qp->h_rq_cq = &hobul_p->cq_info_tbl[recv_cq_idx]; h_qp->kernel_mode = !(p_umv_buf && p_umv_buf->command); h_qp->mark = E_MARK_QP; - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("qp num 0x%x idx 0x%x cq_s 0x%x cq_r 0x%x\n", + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_QP,("qp num 0x%x idx 0x%x cq_s 0x%x cq_r 0x%x\n", qp_num, qp_idx, send_cq_idx, recv_cq_idx)); cl_mutex_release(&h_qp->mutex); - - /* Mark the CQ's associated with this special QP as being high priority. */ - cl_atomic_inc( &h_qp->h_sq_cq->spl_qp_cnt ); - KeSetImportanceDpc( &h_qp->h_sq_cq->dpc, HighImportance ); - cl_atomic_inc( &h_qp->h_rq_cq->spl_qp_cnt ); - KeSetImportanceDpc( &h_qp->h_rq_cq->dpc, HighImportance ); - // Update PD object count cl_atomic_inc(&hobul_p->pd_info_tbl[pd_idx].count); - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("pd %d count %d\n", pd_idx, hobul_p->pd_info_tbl[pd_idx].count)); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_QP,("pd %d count %d\n", pd_idx, hobul_p->pd_info_tbl[pd_idx].count)); // Query QP to obtain requested attributes if (p_qp_attr) { - if (IB_SUCCESS != (status = mlnx_query_qp (h_qp, p_qp_attr, p_umv_buf))) { - goto cleanup; + if (IB_SUCCESS != (status = mlnx_query_qp (h_qp, p_qp_attr, p_umv_buf))) + { + if( !(p_umv_buf && p_umv_buf->command) ) + goto cleanup_qp; + else + goto cleanup; } } @@ -2130,8 +2223,13 @@ mlnx_create_spl_qp ( { p_umv_buf->output_size = p_umv_buf->input_size; p_umv_buf->status = IB_SUCCESS; + /* + * Copy the qp_idx back to user + */ + cl_memcpy (((uint8_t* __ptr64)p_umv_buf->p_inout_buf + hca_ul_info->qp_ul_resources_sz), + &qp_num, sizeof (qp_num)); } - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_QP); return IB_SUCCESS; cleanup_qp: @@ -2141,15 +2239,13 @@ cleanup_qp: THHUL_qpm_destroy_qp_done(hobul_p->hhul_hndl, hhul_qp_hndl); cleanup: - if( !(p_umv_buf && p_umv_buf->command) && qp_ul_resources_p ) + if( !(p_umv_buf && p_umv_buf->command) && qp_ul_resources_p) cl_free( qp_ul_resources_p); if( p_umv_buf && p_umv_buf->command ) { - p_umv_buf->output_size = 0; p_umv_buf->status = status; } - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("status %d\n", status)); return status; #endif } @@ -2162,25 +2258,87 @@ mlnx_modify_qp ( IN OUT ci_umv_buf_t *p_umv_buf OPTIONAL ) { #ifndef WIN_TO_BE_CHANGED - UNREFERENCED_PARAMETER(h_qp); - UNREFERENCED_PARAMETER(p_modify_attr); - UNREFERENCED_PARAMETER(p_qp_attr); - UNREFERENCED_PARAMETER(p_umv_buf); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_modify_qp not implemented\n")); - return IB_INVALID_CA_HANDLE; + + ib_api_status_t status; + int err; + struct ib_qp_attr qp_attr; + int qp_attr_mask; + struct ib_qp *ib_qp_p = (struct ib_qp *)h_qp; + + HCA_ENTER(HCA_DBG_QP); + + // sanity checks + if( p_umv_buf && p_umv_buf->command ) { + // sanity checks + if (p_umv_buf->output_size < sizeof(struct ibv_modify_qp_resp) || + !p_umv_buf->p_inout_buf) { + status = IB_INVALID_PARAMETER; + goto err_inval_params; + } + } + + // fill parameters + status = mlnx_conv_qp_modify_attr( ib_qp_p, ib_qp_p->qp_type, + p_modify_attr, &qp_attr, &qp_attr_mask ); + if (status == IB_NOT_DONE) + goto query_qp; + if (status != IB_SUCCESS ) + goto err_mode_unsupported; + + // modify QP + err = ibv_modify_qp(ib_qp_p, &qp_attr, qp_attr_mask); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_QP ,("ibv_modify_qp failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_modify_qp; + } + + // Query QP to obtain requested attributes +query_qp: + if (p_qp_attr) { + status = mlnx_query_qp ((ib_qp_handle_t)ib_qp_p, p_qp_attr, p_umv_buf); + if (status != IB_SUCCESS) + goto err_query_qp; + } + + if( p_umv_buf && p_umv_buf->command ) + { + struct ibv_modify_qp_resp resp; + resp.attr_mask = qp_attr_mask; + resp.qp_state = qp_attr.qp_state; + err = ib_copy_to_umv_buf(p_umv_buf, &resp, sizeof(struct ibv_modify_qp_resp)); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("ib_copy_to_umv_buf failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_copy; + } + } + + status = IB_SUCCESS; + +err_copy: +err_query_qp: +err_modify_qp: +err_mode_unsupported: +err_inval_params: + if (p_umv_buf && p_umv_buf->command) + p_umv_buf->status = status; + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("completes with ERROR status %s\n", ib_get_err_str(status))); + return status; + #else ib_api_status_t status; - u_int32_t hca_idx = QP_HCA_FROM_HNDL(h_qp); - u_int32_t qp_num = QP_NUM_FROM_HNDL(h_qp); - u_int32_t qp_idx = 0; + uint32_t hca_idx = QP_HCA_FROM_HNDL(h_qp); + uint32_t qp_num = QP_NUM_FROM_HNDL(h_qp); + uint32_t qp_idx = 0; mlnx_hobul_t *hobul_p; HHUL_qp_hndl_t hhul_qp_hndl; VAPI_qp_attr_mask_t hh_qp_attr_mask; VAPI_qp_attr_t hh_qp_attr; VAPI_qp_state_t hh_qp_state; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_QP); VALIDATE_INDEX(hca_idx, MLNX_MAX_HCA, IB_INVALID_CA_HANDLE, cleanup); hobul_p = mlnx_hobul_array[hca_idx]; @@ -2196,8 +2354,8 @@ mlnx_modify_qp ( goto cleanup; } - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, - ("Before acquire mutex to modify qp_idx 0x%x\n", + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_QP, + ("Before acquire mutex to modify qp_idx 0x%x\n", qp_idx)); cl_mutex_acquire(&hobul_p->qp_info_tbl[qp_idx].mutex); @@ -2228,7 +2386,7 @@ mlnx_modify_qp ( goto cleanup_locked; } - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_QP, ("After hob_modify_qp qp_idx 0x%x k_mod %d\n", qp_idx, hobul_p->qp_info_tbl[qp_idx].kernel_mode)); @@ -2249,7 +2407,7 @@ mlnx_modify_qp ( } if ( p_umv_buf && p_umv_buf->command && (! hobul_p->qp_info_tbl[qp_idx].kernel_mode)) { - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_QP, ("mod_qp qp_idx %d umv_buf %p inout_buf %p\n", qp_idx, p_umv_buf, p_umv_buf->p_inout_buf)); if (p_umv_buf->p_inout_buf) { @@ -2259,7 +2417,7 @@ mlnx_modify_qp ( p_umv_buf->status = IB_SUCCESS; } } - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_QP); return IB_SUCCESS; @@ -2269,11 +2427,9 @@ cleanup_locked: cleanup: if( p_umv_buf && p_umv_buf->command ) { - p_umv_buf->output_size = 0; p_umv_buf->status = status; } - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("status %d\n", status)); return status; #endif } @@ -2285,21 +2441,65 @@ mlnx_query_qp ( IN OUT ci_umv_buf_t *p_umv_buf ) { #ifndef WIN_TO_BE_CHANGED - UNREFERENCED_PARAMETER(h_qp); - UNREFERENCED_PARAMETER(p_qp_attr); - UNREFERENCED_PARAMETER(p_umv_buf); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_query_qp not implemented\n")); - return IB_UNSUPPORTED; + + ib_api_status_t status = IB_SUCCESS; + int err=0; + struct ib_qp *ib_qp_p = (struct ib_qp *)h_qp; + struct mthca_qp *qp_p = (struct mthca_qp *)ib_qp_p; + + // sanity checks + + // clean the structure + RtlZeroMemory( p_qp_attr, sizeof *p_qp_attr ); + + // fill the structure + //TODO: this function is to be implemented via ibv_query_qp, which is not supported now + p_qp_attr->h_pd = (ib_pd_handle_t)qp_p->ibqp.pd; + p_qp_attr->qp_type = qp_p->ibqp.qp_type; + p_qp_attr->sq_max_inline = qp_p->qp_init_attr.cap.max_inline_data; + p_qp_attr->sq_depth = qp_p->qp_init_attr.cap.max_send_wr; + p_qp_attr->rq_depth = qp_p->qp_init_attr.cap.max_recv_wr; + p_qp_attr->sq_sge = qp_p->qp_init_attr.cap.max_send_sge; + p_qp_attr->rq_sge = qp_p->qp_init_attr.cap.max_recv_sge; + p_qp_attr->resp_res = qp_p->resp_depth; + p_qp_attr->h_sq_cq = (ib_cq_handle_t)qp_p->ibqp.send_cq; + p_qp_attr->h_rq_cq = (ib_cq_handle_t)qp_p->ibqp.recv_cq; + p_qp_attr->sq_signaled = qp_p->sq_policy == IB_SIGNAL_ALL_WR; + p_qp_attr->state = mlnx_qps_to_ibal( qp_p->state ); + p_qp_attr->num = cl_hton32(qp_p->ibqp.qp_num); + +#if 0 +//TODO: don't know how to fill the following fileds without support of query_qp in MTHCA + p_qp_attr->access_ctrl = qp_p-> + p_qp_attr->pkey_index = qp_p-> + p_qp_attr->dest_num = qp_p- + p_qp_attr->init_depth = qp_p- + p_qp_attr->qkey = qp_p- + p_qp_attr->sq_psn = qp_p- + p_qp_attr->rq_psn = qp_p- + p_qp_attr->primary_port = qp_p- + p_qp_attr->alternate_port = qp_p- + p_qp_attr->primary_av = qp_p- + p_qp_attr->alternate_av = qp_p- + p_qp_attr->apm_state = qp_p- +#endif + + status = IB_SUCCESS; + + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR , HCA_DBG_QP ,("completes with ERROR status %s\n", ib_get_err_str(status))); + return status; + + #else ib_api_status_t status; - u_int32_t hca_idx = QP_HCA_FROM_HNDL(h_qp); - u_int32_t qp_num = QP_NUM_FROM_HNDL(h_qp); - u_int32_t qp_idx = 0; + uint32_t hca_idx = QP_HCA_FROM_HNDL(h_qp); + uint32_t qp_num = QP_NUM_FROM_HNDL(h_qp); + uint32_t qp_idx = 0; mlnx_hobul_t *hobul_p; VAPI_qp_attr_t hh_qp_attr; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_QP); VALIDATE_INDEX(hca_idx, MLNX_MAX_HCA, IB_INVALID_CA_HANDLE, cleanup); hobul_p = mlnx_hobul_array[hca_idx]; @@ -2337,7 +2537,7 @@ mlnx_query_qp ( p_umv_buf->output_size = 0; p_umv_buf->status = IB_SUCCESS; } - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_QP); return IB_SUCCESS; cleanup_locked: @@ -2348,8 +2548,8 @@ cleanup: p_umv_buf->output_size = 0; p_umv_buf->status = status; } - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("status %d\n", status)); return status; #endif } @@ -2360,47 +2560,46 @@ mlnx_destroy_qp ( IN const uint64_t timewait ) { #ifndef WIN_TO_BE_CHANGED - + ib_api_status_t status; int err; struct ib_qp *ib_qp_p = (struct ib_qp *)h_qp; - + UNUSED_PARAM( timewait ); - // destroy CQ - if( ib_qp_p->uobject ) { - //TODO: call uverbs - } - else { - err = ib_destroy_qp( ib_qp_p ); - if (err) { - HCA_TRACE (CL_DBG_ERROR, ("ib_destroy_qp failed (%d)\n", err)); - status = errno_to_iberr(err); - goto err_destroy_qp; - } + HCA_ENTER( HCA_DBG_QP); + + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM , + ("qpnum %#x, pcs %p\n", ib_qp_p->qp_num, PsGetCurrentProcess()) ); + + err = ibv_destroy_qp( ib_qp_p ); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("ibv_destroy_qp failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_destroy_qp; } status = IB_SUCCESS; err_destroy_qp: - HCA_TRACE_ERR(CL_DBG_ERROR, ("completes with ERROR status %s\n", ib_get_err_str(status))); - HCA_EXIT(MLNX_DBG_TRACE); + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("completes with ERROR status %s\n", ib_get_err_str(status))); return status; #else + ib_api_status_t status; - u_int32_t hca_idx = QP_HCA_FROM_HNDL(h_qp); - u_int32_t qp_num = QP_NUM_FROM_HNDL(h_qp); - u_int32_t pd_idx = 0; - u_int32_t qp_idx = 0; + uint32_t hca_idx = QP_HCA_FROM_HNDL(h_qp); + uint32_t qp_num = QP_NUM_FROM_HNDL(h_qp); + uint32_t pd_idx = 0; + uint32_t qp_idx = 0; mlnx_hobul_t *hobul_p; HHUL_qp_hndl_t hhul_qp_hndl; UNUSED_PARAM( timewait ); - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("hca %d qp 0x%x\n", hca_idx, qp_num)); + HCA_ENTER( HCA_DBG_QP); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_QP,("hca %d qp 0x%x\n", hca_idx, qp_num)); VALIDATE_INDEX(hca_idx, MLNX_MAX_HCA, IB_INVALID_CA_HANDLE, cleanup); hobul_p = mlnx_hobul_array[hca_idx]; @@ -2410,13 +2609,14 @@ err_destroy_qp: } qp_idx = qp_num & hobul_p->qp_idx_mask; - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("hobul_p 0x%p mask 0x%x qp_idx 0x%x mark %d\n", + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_QP ,("hobul_p 0x%p mask 0x%x qp_idx 0x%x mark %d\n", hobul_p, hobul_p->qp_idx_mask, qp_idx, hobul_p->qp_info_tbl[qp_idx].mark)); VALIDATE_INDEX(qp_idx, hobul_p->max_qp, IB_INVALID_QP_HANDLE, cleanup); if ( E_MARK_QP != hobul_p->qp_info_tbl[qp_idx].mark) { if (E_MARK_INVALID == hobul_p->qp_info_tbl[qp_idx].mark) { - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(IB_INVALID_QP_HANDLE))); + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SHIM, + ("completes with ERROR status %s\n", ib_get_err_str(IB_INVALID_QP_HANDLE))); return IB_SUCCESS; // Already freed } status = IB_INVALID_QP_HANDLE; @@ -2430,13 +2630,13 @@ err_destroy_qp: VALIDATE_INDEX(pd_idx, hobul_p->max_pd, IB_ERROR, cleanup_locked); if (E_MARK_PD != hobul_p->pd_info_tbl[pd_idx].mark) { - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("file %s line %d\n", __FILE__, __LINE__)); - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("pd_idx 0x%x mark %d\n", pd_idx, hobul_p->pd_info_tbl[pd_idx].mark)); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_QP,("file %s line %d\n", __FILE__, __LINE__)); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_QP,("pd_idx 0x%x mark %d\n", pd_idx, hobul_p->pd_info_tbl[pd_idx].mark)); status = IB_INVALID_PD_HANDLE; goto cleanup_locked; } - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_QP, ("Before THH_destroy qp_idx 0x%x k_mod %d pd_idx 0x%x\n", qp_idx, hobul_p->qp_info_tbl[qp_idx].kernel_mode, pd_idx)); @@ -2446,7 +2646,7 @@ err_destroy_qp: goto cleanup_locked; } - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_QP, ("After THH_destroy qp_idx 0x%x k_mod %d pd_idx 0x%x\n", qp_idx, hobul_p->qp_info_tbl[qp_idx].kernel_mode, pd_idx)); @@ -2477,16 +2677,16 @@ err_destroy_qp: // Update PD object count cl_atomic_dec(&hobul_p->pd_info_tbl[pd_idx].count); - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("pd %d count %d\n", pd_idx, hobul_p->pd_info_tbl[pd_idx].count)); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_QP,("pd %d count %d\n", pd_idx, hobul_p->pd_info_tbl[pd_idx].count)); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_QP); return IB_SUCCESS; cleanup_locked: cl_mutex_release(&hobul_p->qp_info_tbl[qp_idx].mutex); cleanup: - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("status %d\n",status)); + HCA_EXIT( HCA_DBG_SHIM); return status; #endif } @@ -2509,49 +2709,42 @@ mlnx_create_cq ( ib_api_status_t status; struct ib_cq *ib_cq_p; struct mthca_cq *cq_p; - mlnx_hob_t *hob_p = (mlnx_hob_t *)h_ca; - struct ib_device *ib_dev = IBDEV_FROM_HOB( hob_p ); - struct ib_ucontext *context_p = NULL; - struct ib_udata *udata_p = NULL; + mlnx_hob_t *hob_p; + struct ib_device *ib_dev; + struct ib_ucontext *context_p; + struct ib_pd *ib_pd_p; - HCA_ENTER(MLNX_DBG_TRACE); + HCA_ENTER(HCA_DBG_CQ); - // sanity checks if( p_umv_buf && p_umv_buf->command ) { - HCA_TRACE (CL_DBG_ERROR, ("User mode is not supported yet\n")); - status = IB_UNSUPPORTED; - goto err_user_unsupported; - } - -#ifdef WIN_USER_SUPPORT - if( p_umv_buf && p_umv_buf->command ) - { - //TODO: check the below sanity check - if ((p_umv_buf->input_size - sizeof (u_int32_t)) != - hca_ul_info->pd_ul_resources_sz || - NULL == p_umv_buf->p_inout_buf) { - status = IB_INVALID_PARAMETER; - goto cleanup; - } - //TODO: create user context by call to mthca_alloc_ucontext() - } -#endif + // sanity checks + if (p_umv_buf->input_size < sizeof(struct ibv_create_cq) || + p_umv_buf->output_size < sizeof(struct ibv_create_cq_resp) || + !p_umv_buf->p_inout_buf) { + status = IB_INVALID_PARAMETER; + goto err_inval_params; + } - // create CQ - if( p_umv_buf && p_umv_buf->command ) { - //TODO: call uverbs + context_p = (struct ib_ucontext *)h_ca; + hob_p = HOB_FROM_IBDEV(context_p->device); + ib_dev = context_p->device; } else { - ib_cq_p = ib_create_cq(ib_dev, - cq_comp_handler, cq_event_handler, - hob_p, *p_size ); - if (IS_ERR(ib_cq_p)) { - err = PTR_ERR(ib_cq_p); - HCA_TRACE (CL_DBG_ERROR, ("ib_create_cq failed (%d)\n", err)); - status = errno_to_iberr(err); - goto err_create_cq; - } + hob_p = (mlnx_hob_t *)h_ca; + context_p = NULL; + ib_dev = IBDEV_FROM_HOB( hob_p ); + } + + // allocate cq + ib_cq_p = ibv_create_cq(ib_dev, + cq_comp_handler, cq_event_handler, + hob_p, *p_size, context_p, p_umv_buf ); + if (IS_ERR(ib_cq_p)) { + err = PTR_ERR(ib_cq_p); + HCA_PRINT (TRACE_LEVEL_ERROR ,HCA_DBG_CQ, ("ibv_create_cq failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_create_cq; } // fill the object @@ -2559,43 +2752,32 @@ mlnx_create_cq ( cq_p->cq_context = (void*)cq_context; // return the result - if (ph_cq) *ph_cq = (ib_cq_handle_t)ib_cq_p; - -#ifdef WIN_USER_SUPPORT - if( p_umv_buf && p_umv_buf->command ) - { - p_umv_buf->output_size = p_umv_buf->input_size; - /* - * Copy the pd_idx back to user - */ - cl_memcpy (((uint8_t* __ptr64)p_umv_buf->p_inout_buf + hca_ul_info->pd_ul_resources_sz), - &pd_idx, sizeof (pd_idx)); - p_umv_buf->status = IB_SUCCESS; - } -#endif + *p_size = ib_cq_p->cqe; + if (ph_cq) *ph_cq = (ib_cq_handle_t)cq_p; status = IB_SUCCESS; -err_create_cq: -err_user_unsupported: - HCA_TRACE_ERR(CL_DBG_ERROR, ("completes with ERROR status %s\n", ib_get_err_str(status))); - HCA_EXIT(MLNX_DBG_TRACE); +err_create_cq: +err_inval_params: + if (p_umv_buf && p_umv_buf->command) + p_umv_buf->status = status; + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("completes with ERROR status %s\n", ib_get_err_str(status))); return status; #else ib_api_status_t status; mlnx_hob_t *hob_p = (mlnx_hob_t *)h_ca; - u_int32_t cq_idx; - u_int32_t cq_num; - u_int32_t cq_size = 0; + uint32_t cq_idx; + uint32_t cq_num; + uint32_t cq_size = 0; mlnx_hobul_t *hobul_p; HH_hca_dev_t *hca_ul_info; HHUL_cq_hndl_t hhul_cq_hndl = NULL; void *cq_ul_resources_p = NULL; MOSAL_protection_ctx_t prot_ctx; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_CQ); hobul_p = mlnx_hobs_get_hobul(hob_p); if (NULL == hobul_p) { @@ -2619,7 +2801,7 @@ err_user_unsupported: if( p_umv_buf && p_umv_buf->command ) { // For user mode calls - obtain and verify the vendor information - if ((p_umv_buf->input_size - sizeof (u_int32_t)) != + if ((p_umv_buf->input_size - sizeof (uint32_t)) != hca_ul_info->cq_ul_resources_sz || NULL == p_umv_buf->p_inout_buf) { status = IB_INVALID_PARAMETER; @@ -2676,7 +2858,7 @@ err_user_unsupported: // Update CA object count cl_atomic_inc(&hobul_p->count); - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("HCA %d count %d\n", h_ca->index, hobul_p->count)); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_CQ,("HCA %d count %d\n", h_ca->index, hobul_p->count)); *p_size = cq_size; if (ph_cq) *ph_cq = (ib_cq_handle_t)CQ_HNDL_FROM_CQ(cq_idx); @@ -2691,7 +2873,7 @@ err_user_unsupported: cl_memcpy (((uint8_t* __ptr64)p_umv_buf->p_inout_buf + hca_ul_info->cq_ul_resources_sz), &cq_num, sizeof (cq_num)); } - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_SHIM); return IB_SUCCESS; cleanup_cq: @@ -2702,11 +2884,10 @@ cleanup: cl_free( cq_ul_resources_p); if( p_umv_buf && p_umv_buf->command ) { - p_umv_buf->output_size = 0; p_umv_buf->status = status; } - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR ,HCA_DBG_CQ ,("status %d\n",status)); return status; #endif } @@ -2720,21 +2901,23 @@ mlnx_resize_cq ( #ifndef WIN_TO_BE_CHANGED UNREFERENCED_PARAMETER(h_cq); UNREFERENCED_PARAMETER(p_size); - UNREFERENCED_PARAMETER(p_umv_buf); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_resize_cq not implemented\n")); + if (p_umv_buf && p_umv_buf->command) { + p_umv_buf->status = IB_UNSUPPORTED; + } + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("mlnx_resize_cq not implemented\n")); return IB_UNSUPPORTED; #else ib_api_status_t status; - u_int32_t hca_idx = CQ_HCA_FROM_HNDL(h_cq); - u_int32_t cq_num = CQ_NUM_FROM_HNDL(h_cq); - u_int32_t cq_idx; + uint32_t hca_idx = CQ_HCA_FROM_HNDL(h_cq); + uint32_t cq_num = CQ_NUM_FROM_HNDL(h_cq); + uint32_t cq_idx; mlnx_hobul_t *hobul_p; HHUL_cq_hndl_t hhul_cq_hndl; void *cq_ul_resources_p = NULL; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_SHIM); if (!p_size) { status = IB_INVALID_PARAMETER; @@ -2803,7 +2986,7 @@ mlnx_resize_cq ( p_umv_buf->output_size = p_umv_buf->input_size; p_umv_buf->status = IB_SUCCESS; } - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_SHIM); return IB_SUCCESS; cleanup_locked: @@ -2812,11 +2995,10 @@ cleanup_locked: cleanup: if( p_umv_buf && p_umv_buf->command ) { - p_umv_buf->output_size = 0; p_umv_buf->status = status; } - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("status %d\n", status)); + HCA_EXIT( HCA_DBG_SHIM); return status; #endif } @@ -2830,19 +3012,21 @@ mlnx_query_cq ( #ifndef WIN_TO_BE_CHANGED UNREFERENCED_PARAMETER(h_cq); UNREFERENCED_PARAMETER(p_size); - UNREFERENCED_PARAMETER(p_umv_buf); - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("mlnx_query_cq not implemented\n")); + if (p_umv_buf && p_umv_buf->command) { + p_umv_buf->status = IB_UNSUPPORTED; + } + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("mlnx_query_cq not implemented\n")); return IB_UNSUPPORTED; #else ib_api_status_t status; - u_int32_t hca_idx = CQ_HCA_FROM_HNDL(h_cq); - u_int32_t cq_num = CQ_NUM_FROM_HNDL(h_cq); - u_int32_t cq_idx; + uint32_t hca_idx = CQ_HCA_FROM_HNDL(h_cq); + uint32_t cq_num = CQ_NUM_FROM_HNDL(h_cq); + uint32_t cq_idx; mlnx_hobul_t *hobul_p; HHUL_cq_hndl_t hhul_cq_hndl; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_SHIM); if (!p_size) { status = IB_INVALID_PARAMETER; @@ -2878,7 +3062,7 @@ mlnx_query_cq ( p_umv_buf->output_size = 0; p_umv_buf->status = IB_SUCCESS; } - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_SHIM); return IB_SUCCESS; cleanup_locked: @@ -2887,11 +3071,10 @@ cleanup_locked: cleanup: if( p_umv_buf && p_umv_buf->command ) { - p_umv_buf->output_size = 0; p_umv_buf->status = status; } - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("status %d\n", status)); + HCA_EXIT( HCA_DBG_SHIM); return status; #endif } @@ -2906,37 +3089,36 @@ mlnx_destroy_cq ( int err; struct ib_cq *ib_cq_p = (struct ib_cq *)h_cq; + HCA_ENTER( HCA_DBG_QP); + + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM , + ("cqn %#x, pcs %p\n", ((struct mthca_cq*)ib_cq_p)->cqn, PsGetCurrentProcess()) ); + // destroy CQ - if( ib_cq_p->uobject ) { - //TODO: call uverbs - } - else { - err = ib_destroy_cq( ib_cq_p ); - if (err) { - HCA_TRACE (CL_DBG_ERROR, ("ib_destroy_cq failed (%d)\n", err)); - status = errno_to_iberr(err); - goto err_destroy_cq; - } + err = ibv_destroy_cq( ib_cq_p ); + if (err) { + HCA_PRINT (TRACE_LEVEL_ERROR ,HCA_DBG_SHIM,("ibv_destroy_cq failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_destroy_cq; } status = IB_SUCCESS; err_destroy_cq: - HCA_TRACE_ERR(CL_DBG_ERROR, ("completes with ERROR status %s\n", ib_get_err_str(status))); - HCA_EXIT(MLNX_DBG_TRACE); + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("completes with ERROR status %s\n", ib_get_err_str(status))); return status; #else ib_api_status_t status; - u_int32_t hca_idx = CQ_HCA_FROM_HNDL(h_cq); - u_int32_t cq_num = CQ_NUM_FROM_HNDL(h_cq); - u_int32_t cq_idx; -// u_int32_t pd_idx = 0; + uint32_t hca_idx = CQ_HCA_FROM_HNDL(h_cq); + uint32_t cq_num = CQ_NUM_FROM_HNDL(h_cq); + uint32_t cq_idx; +// uint32_t pd_idx = 0; mlnx_hobul_t *hobul_p; HHUL_cq_hndl_t hhul_cq_hndl; - CL_ENTER(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_ENTER( HCA_DBG_SHIM); VALIDATE_INDEX(hca_idx, MLNX_MAX_HCA, IB_INVALID_CQ_HANDLE, cleanup); hobul_p = mlnx_hobul_array[hca_idx]; @@ -2983,18 +3165,18 @@ err_destroy_cq: // Update CA object count cl_atomic_dec(&hobul_p->count); - CL_TRACE(MLNX_DBG_INFO, g_mlnx_dbg_lvl, ("CA %d count %d\n", hca_idx, hobul_p->count)); + HCA_PRINT(TRACE_LEVEL_INFORMATION , HCA_DBG_SHIM ,("CA %d count %d\n", hca_idx, hobul_p->count)); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_EXIT( HCA_DBG_SHIM); return IB_SUCCESS; cleanup_locked: cl_mutex_release(&hobul_p->cq_info_tbl[cq_idx].mutex); cleanup: - CL_TRACE(CL_DBG_ERROR, g_mlnx_dbg_lvl, ("completes with ERROR status %s\n", ib_get_err_str(status))); - CL_EXIT(MLNX_DBG_TRACE, g_mlnx_dbg_lvl); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("status %d\n", status)); + HCA_EXIT( HCA_DBG_SHIM); return status; #endif } @@ -3016,12 +3198,12 @@ setup_ci_interface( /* UVP name */ cl_memcpy( p_interface->libname, mlnx_uvp_lib_name, MAX_LIB_NAME); - CL_TRACE(MLNX_DBG_TRACE, g_mlnx_dbg_lvl, ("UVP filename %s\n", p_interface->libname)); + HCA_PRINT(TRACE_LEVEL_VERBOSE , HCA_DBG_SHIM ,("UVP filename %s\n", p_interface->libname)); /* The real interface. */ p_interface->open_ca = mlnx_open_ca; p_interface->query_ca = mlnx_query_ca; - p_interface->modify_ca = mlnx_modify_ca; // ++ + p_interface->modify_ca = mlnx_modify_ca; p_interface->close_ca = mlnx_close_ca; p_interface->um_open_ca = mlnx_um_open; p_interface->um_close_ca = mlnx_um_close; @@ -3057,4 +3239,3 @@ setup_ci_interface( return; } - diff --git a/branches/MTHCA/hw/mthca/kernel/ib_at.h b/branches/MTHCA/hw/mthca/kernel/ib_at.h new file mode 100644 index 00000000..fd275409 --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/ib_at.h @@ -0,0 +1,218 @@ +/* + * Copyright (c) 2004,2005 Voltaire Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * + * $Id: ib_at.h 3025 2005-08-08 20:38:20Z halr $ + */ + +#if !defined( IB_AT_H ) +#define IB_AT_H + +#include +#include + +enum ib_at_multipathing_type { + IB_AT_PATH_SAME_PORT = 0, + IB_AT_PATH_SAME_HCA = 1, /* but different ports if applicable */ + IB_AT_PATH_SAME_SYSTEM = 2, /* but different ports if applicable */ + IB_AT_PATH_INDEPENDENT_HCA = 3, + IB_AT_PATH_SRC_ROUTE = 4, /* application controlled multipathing */ +}; + +enum ib_at_route_flags { + IB_AT_ROUTE_USE_DEFAULTS = 0, + IB_AT_ROUTE_FORCE_ATS = 1, + IB_AT_ROUTE_FORCE_ARP = 2, + IB_AT_ROUTE_FORCE_RESOLVE = 4, +}; + +struct ib_at_path_attr { + u16 qos_tag; + __be16 pkey; + u8 multi_path_type; +}; + +struct ib_at_ib_route { + union ib_gid sgid; + union ib_gid dgid; + struct ib_device *out_dev; + int out_port; + struct ib_at_path_attr attr; +}; + +enum ib_at_op_status { + IB_AT_STATUS_INVALID = 0, + IB_AT_STATUS_PENDING = 1, + IB_AT_STATUS_COMPLETED = 2, + IB_AT_STATUS_ERROR = 3, + IB_AT_STATUS_CANCELED = 4, +}; + +/* + * ib_at_completion structure - callback function parameters structure + * @completion: completion call back function + * @context: user defined context pointer + * @req_id: asynchronous request ID - optional, out + * + * The asynchronous resolution function behavior is as follows: + * If the resolve operation can be fulfilled immediately, then the output + * structures are set and the number of filled structures is returned. + * + * If the resolve operation cannot by fulfilled immediately and + * an ib_at_completion structure is not provided, + * then the function immediately returns -EWOULDBLOCK. + * + * If ib_at_completion structure is provided and an asynchronous + * operation is started, the function immediately returns zero, + * and the request ID field (req_id) is set if the pointer is + * non NULL. This request ID may be used to cancel the operation, + * or to poll its status. + * + * When an asynchronous operation completes (successfully or not), + * the callback function is called, passing the request ID, + * the supplied user context and the number of output structures. + * If the asynchronous operation did not complete, a negative + * error code is return as the 'rec_num'. + * Valid error codes are: + * -EINTR: operation is canceled + * -EIO: request send failed + * -ETIMEOUT: operation timed out + * + * Returned value of zero records means that the resolution process + * completed, but the given address could not be resolved at this time. + */ +struct ib_at_completion { + void (*fn)(u64 req_id, void *context, int rec_num); + void *context; + u64 req_id; +}; + +/** + * ib_at_route_by_ip - asynchronously resolve ip address to ib route + * @dst_ip: destination ip + * @src_ip: source ip - optional + * @tos: ip type of service + * @flags: ib_at_route_flags + * @ib_route: out structure + * @async_comp: asynchronous callback structure - optional + * + * Resolve the specified dst_ip to a &struct ib_route structure. + * src_ip can be provided to force specific output interface. + * flags can be used to select resolving method; currently IB-ARP or ATS. + * + * See ib_at_completion structure documentation for asynchronous + * operation details. + */ +int ib_at_route_by_ip(u32 dst_ip, u32 src_ip, int tos, u16 flags, + struct ib_at_ib_route *ib_route, + struct ib_at_completion *async_comp); + +/** + * ib_at_paths_by_route - asynchronously resolve ib route to ib path records + * @ib_route: ib route to resolve + * @mpath_type: ib_at_multipathing_type + * @path_arr: SA path record array - out + * @npath: maximal number of paths to return + * @async_comp: asynchronous callback structure - optional + * + * Resolve the specified ib_route to a SA path record array. + * Number of returned paths will not exceed npath. + * Multipathing type may be used to obtain redundant paths for APM, + * other failover schemes, bandwidth aggregation or source based routing. + * Note that multipathing request is meaningless unless npath is greater than 1. + * + * Returned ib_route structure includes the recommended pkey and qos_tag for + * this route. + * + * See ib_at_completion structure documentation for asynchronous operation + * details. + */ +int ib_at_paths_by_route(struct ib_at_ib_route *ib_route, u32 mpath_type, + struct ib_sa_path_rec *path_arr, int npath, + struct ib_at_completion *async_comp); + +/** + * ib_at_ips_by_gid - asynchronously resolve GID to IP addresses + * @gid: GID to resolve + * @dst_ips: array of IPs, out + * @nips: number of IP entries in dst_ips array + * @async_comp: asynchronous callback structure - optional + * + * Resolve the gid to IP addresses, but not more than nips. + * This function rely on the IB-ATS mechanism. + * + * See ib_at_completion structure documentation for asynchronous + * operation details. + */ +int ib_at_ips_by_gid(union ib_gid *gid, u32 *dst_ips, int nips, + struct ib_at_completion *async_comp); + +/** + * ib_at_ips_by_subnet - return local IP addresses by IP subnet + * @network: network to resolve - optional + * @netmask: subnet net mask - optional + * @dst_ips: array of IPs, out + * @nips: number of IP entries in dst_ips array + * + * Return local IP addresses matching the network and netmask, + * but not more than nips. + * + * Note that network and netmask as 0x0 or 0xffffffff returns all local IPs. + */ +int ib_at_ips_by_subnet(u32 network, u32 netmask, u32 *dst_ips, int nips); + +/** + * ib_at_invalidate_paths - invalidate possibly cached paths keyed by ib_route + * @ib_route: paths key - optional + * + * Returns number of invalidated paths. + * If ib_route is NULL, then the entire cache will be flushed. + */ +int ib_at_invalidate_paths(struct ib_at_ib_route *ib_route); + +/** + * ib_at_cancel - cancel possible active asynchronous operation + * @req_id: asynchronous request ID + * + * Return 0 if canceled, -1 if cancel failed (e.g. bad ID) + */ +int ib_at_cancel(u64 req_id); + +/** + * ib_at_status - poll asynchronous operation's status + * @req_id: asynchronous request ID ib_at_op_status + * + * Return non-negative ib_at_op_status value, + * or -EINVAL if the request ID is invalid. + */ +int ib_at_status(u64 req_id); + +#endif /* IB_AT_H */ diff --git a/branches/MTHCA/hw/mthca/kernel/ib_cache.h b/branches/MTHCA/hw/mthca/kernel/ib_cache.h new file mode 100644 index 00000000..be3ca4fc --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/ib_cache.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_cache.h 2730 2005-06-28 16:43:03Z sean.hefty $ + */ + +#ifndef _IB_CACHE_H +#define _IB_CACHE_H + +#include + +/** + * ib_get_cached_gid - Returns a cached GID table entry + * @device: The device to query. + * @port_num: The port number of the device to query. + * @index: The index into the cached GID table to query. + * @gid: The GID value found at the specified index. + * + * ib_get_cached_gid() fetches the specified GID table entry stored in + * the local software cache. + */ +int ib_get_cached_gid(struct ib_device *device, + u8 port_num, + int index, + union ib_gid *gid); + +/** + * ib_find_cached_gid - Returns the port number and GID table index where + * a specified GID value occurs. + * @device: The device to query. + * @gid: The GID value to search for. + * @port_num: The port number of the device where the GID value was found. + * @index: The index into the cached GID table where the GID was found. This + * parameter may be NULL. + * + * ib_find_cached_gid() searches for the specified GID value in + * the local software cache. + */ +int ib_find_cached_gid(struct ib_device *device, + union ib_gid *gid, + u8 *port_num, + u16 *index); + +/** + * ib_get_cached_pkey - Returns a cached PKey table entry + * @device: The device to query. + * @port_num: The port number of the device to query. + * @index: The index into the cached PKey table to query. + * @pkey: The PKey value found at the specified index. + * + * ib_get_cached_pkey() fetches the specified PKey table entry stored in + * the local software cache. + */ +int ib_get_cached_pkey(struct ib_device *device_handle, + u8 port_num, + int index, + u16 *pkey); + +/** + * ib_find_cached_pkey - Returns the PKey table index where a specified + * PKey value occurs. + * @device: The device to query. + * @port_num: The port number of the device to search for the PKey. + * @pkey: The PKey value to search for. + * @index: The index into the cached PKey table where the PKey was found. + * + * ib_find_cached_pkey() searches the specified PKey table in + * the local software cache. + */ +int ib_find_cached_pkey(struct ib_device *device, + u8 port_num, + u16 pkey, + u16 *index); + + +int ib_cache_setup(void); +void ib_cache_cleanup(void); + +#endif /* _IB_CACHE_H */ diff --git a/branches/MTHCA/hw/mthca/kernel/ib_cm.h b/branches/MTHCA/hw/mthca/kernel/ib_cm.h new file mode 100644 index 00000000..b7db2a9f --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/ib_cm.h @@ -0,0 +1,568 @@ +/* + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_cm.h 2930 2005-07-28 19:22:44Z sean.hefty $ + */ +#if !defined(IB_CM_H) +#define IB_CM_H + +#include +#include + +enum ib_cm_state { + IB_CM_IDLE, + IB_CM_LISTEN, + IB_CM_REQ_SENT, + IB_CM_REQ_RCVD, + IB_CM_MRA_REQ_SENT, + IB_CM_MRA_REQ_RCVD, + IB_CM_REP_SENT, + IB_CM_REP_RCVD, + IB_CM_MRA_REP_SENT, + IB_CM_MRA_REP_RCVD, + IB_CM_ESTABLISHED, + IB_CM_DREQ_SENT, + IB_CM_DREQ_RCVD, + IB_CM_TIMEWAIT, + IB_CM_SIDR_REQ_SENT, + IB_CM_SIDR_REQ_RCVD +}; + +enum ib_cm_lap_state { + IB_CM_LAP_IDLE, + IB_CM_LAP_SENT, + IB_CM_LAP_RCVD, + IB_CM_MRA_LAP_SENT, + IB_CM_MRA_LAP_RCVD, +}; + +enum ib_cm_event_type { + IB_CM_REQ_ERROR, + IB_CM_REQ_RECEIVED, + IB_CM_REP_ERROR, + IB_CM_REP_RECEIVED, + IB_CM_RTU_RECEIVED, + IB_CM_USER_ESTABLISHED, + IB_CM_DREQ_ERROR, + IB_CM_DREQ_RECEIVED, + IB_CM_DREP_RECEIVED, + IB_CM_TIMEWAIT_EXIT, + IB_CM_MRA_RECEIVED, + IB_CM_REJ_RECEIVED, + IB_CM_LAP_ERROR, + IB_CM_LAP_RECEIVED, + IB_CM_APR_RECEIVED, + IB_CM_SIDR_REQ_ERROR, + IB_CM_SIDR_REQ_RECEIVED, + IB_CM_SIDR_REP_RECEIVED +}; + +enum ib_cm_data_size { + IB_CM_REQ_PRIVATE_DATA_SIZE = 92, + IB_CM_MRA_PRIVATE_DATA_SIZE = 222, + IB_CM_REJ_PRIVATE_DATA_SIZE = 148, + IB_CM_REP_PRIVATE_DATA_SIZE = 196, + IB_CM_RTU_PRIVATE_DATA_SIZE = 224, + IB_CM_DREQ_PRIVATE_DATA_SIZE = 220, + IB_CM_DREP_PRIVATE_DATA_SIZE = 224, + IB_CM_REJ_ARI_LENGTH = 72, + IB_CM_LAP_PRIVATE_DATA_SIZE = 168, + IB_CM_APR_PRIVATE_DATA_SIZE = 148, + IB_CM_APR_INFO_LENGTH = 72, + IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE = 216, + IB_CM_SIDR_REP_PRIVATE_DATA_SIZE = 136, + IB_CM_SIDR_REP_INFO_LENGTH = 72 +}; + +struct ib_cm_id; + +struct ib_cm_req_event_param { + struct ib_cm_id *listen_id; + struct ib_device *device; + u8 port; + + struct ib_sa_path_rec *primary_path; + struct ib_sa_path_rec *alternate_path; + + __be64 remote_ca_guid; + u32 remote_qkey; + u32 remote_qpn; + enum ib_qp_type_t qp_type; + + u32 starting_psn; + u8 responder_resources; + u8 initiator_depth; + unsigned int local_cm_response_timeout:5; + unsigned int flow_control:1; + unsigned int remote_cm_response_timeout:5; + unsigned int retry_count:3; + unsigned int rnr_retry_count:3; + unsigned int srq:1; +}; + +struct ib_cm_rep_event_param { + __be64 remote_ca_guid; + u32 remote_qkey; + u32 remote_qpn; + u32 starting_psn; + u8 responder_resources; + u8 initiator_depth; + unsigned int target_ack_delay:5; + unsigned int failover_accepted:2; + unsigned int flow_control:1; + unsigned int rnr_retry_count:3; + unsigned int srq:1; +}; + +enum ib_cm_rej_reason { + IB_CM_REJ_NO_QP = 1, + IB_CM_REJ_NO_EEC = 2, + IB_CM_REJ_NO_RESOURCES = 3, + IB_CM_REJ_TIMEOUT = 4, + IB_CM_REJ_UNSUPPORTED = 5, + IB_CM_REJ_INVALID_COMM_ID = 6, + IB_CM_REJ_INVALID_COMM_INSTANCE = 7, + IB_CM_REJ_INVALID_SERVICE_ID = 8, + IB_CM_REJ_INVALID_TRANSPORT_TYPE = 9, + IB_CM_REJ_STALE_CONN = 10, + IB_CM_REJ_RDC_NOT_EXIST = 11, + IB_CM_REJ_INVALID_GID = 12, + IB_CM_REJ_INVALID_LID = 13, + IB_CM_REJ_INVALID_SL = 14, + IB_CM_REJ_INVALID_TRAFFIC_CLASS = 15, + IB_CM_REJ_INVALID_HOP_LIMIT = 16, + IB_CM_REJ_INVALID_PACKET_RATE = 17, + IB_CM_REJ_INVALID_ALT_GID = 18, + IB_CM_REJ_INVALID_ALT_LID = 19, + IB_CM_REJ_INVALID_ALT_SL = 20, + IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS = 21, + IB_CM_REJ_INVALID_ALT_HOP_LIMIT = 22, + IB_CM_REJ_INVALID_ALT_PACKET_RATE = 23, + IB_CM_REJ_PORT_CM_REDIRECT = 24, + IB_CM_REJ_PORT_REDIRECT = 25, + IB_CM_REJ_INVALID_MTU = 26, + IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES = 27, + IB_CM_REJ_CONSUMER_DEFINED = 28, + IB_CM_REJ_INVALID_RNR_RETRY = 29, + IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID = 30, + IB_CM_REJ_INVALID_CLASS_VERSION = 31, + IB_CM_REJ_INVALID_FLOW_LABEL = 32, + IB_CM_REJ_INVALID_ALT_FLOW_LABEL = 33 +}; + +struct ib_cm_rej_event_param { + enum ib_cm_rej_reason reason; + void *ari; + u8 ari_length; +}; + +struct ib_cm_mra_event_param { + u8 service_timeout; +}; + +struct ib_cm_lap_event_param { + struct ib_sa_path_rec *alternate_path; +}; + +enum ib_cm_apr_status { + IB_CM_APR_SUCCESS, + IB_CM_APR_INVALID_COMM_ID, + IB_CM_APR_UNSUPPORTED, + IB_CM_APR_REJECT, + IB_CM_APR_REDIRECT, + IB_CM_APR_IS_CURRENT, + IB_CM_APR_INVALID_QPN_EECN, + IB_CM_APR_INVALID_LID, + IB_CM_APR_INVALID_GID, + IB_CM_APR_INVALID_FLOW_LABEL, + IB_CM_APR_INVALID_TCLASS, + IB_CM_APR_INVALID_HOP_LIMIT, + IB_CM_APR_INVALID_PACKET_RATE, + IB_CM_APR_INVALID_SL +}; + +struct ib_cm_apr_event_param { + enum ib_cm_apr_status ap_status; + void *apr_info; + u8 info_len; +}; + +struct ib_cm_sidr_req_event_param { + struct ib_cm_id *listen_id; + struct ib_device *device; + u8 port; + u16 pkey; +}; + +enum ib_cm_sidr_status { + IB_SIDR_SUCCESS, + IB_SIDR_UNSUPPORTED, + IB_SIDR_REJECT, + IB_SIDR_NO_QP, + IB_SIDR_REDIRECT, + IB_SIDR_UNSUPPORTED_VERSION +}; + +struct ib_cm_sidr_rep_event_param { + enum ib_cm_sidr_status status; + u32 qkey; + u32 qpn; + void *info; + u8 info_len; + +}; + +struct ib_cm_event { + enum ib_cm_event_type event; + union { + struct ib_cm_req_event_param req_rcvd; + struct ib_cm_rep_event_param rep_rcvd; + /* No data for RTU received events. */ + struct ib_cm_rej_event_param rej_rcvd; + struct ib_cm_mra_event_param mra_rcvd; + struct ib_cm_lap_event_param lap_rcvd; + struct ib_cm_apr_event_param apr_rcvd; + /* No data for DREQ/DREP received events. */ + struct ib_cm_sidr_req_event_param sidr_req_rcvd; + struct ib_cm_sidr_rep_event_param sidr_rep_rcvd; + enum ib_wc_status send_status; + } param; + + void *private_data; +}; + +/** + * ib_cm_handler - User-defined callback to process communication events. + * @cm_id: Communication identifier associated with the reported event. + * @event: Information about the communication event. + * + * IB_CM_REQ_RECEIVED and IB_CM_SIDR_REQ_RECEIVED communication events + * generated as a result of listen requests result in the allocation of a + * new @cm_id. The new @cm_id is returned to the user through this callback. + * Clients are responsible for destroying the new @cm_id. For peer-to-peer + * IB_CM_REQ_RECEIVED and all other events, the returned @cm_id corresponds + * to a user's existing communication identifier. + * + * Users may not call ib_destroy_cm_id while in the context of this callback; + * however, returning a non-zero value instructs the communication manager to + * destroy the @cm_id after the callback completes. + */ +typedef int (*ib_cm_handler)(struct ib_cm_id *cm_id, + struct ib_cm_event *event); + +struct ib_cm_id { + ib_cm_handler cm_handler; + void *context; + __be64 service_id; + __be64 service_mask; + enum ib_cm_state state; /* internal CM/debug use */ + enum ib_cm_lap_state lap_state; /* internal CM/debug use */ + __be32 local_id; + __be32 remote_id; +}; + +/** + * ib_create_cm_id - Allocate a communication identifier. + * @cm_handler: Callback invoked to notify the user of CM events. + * @context: User specified context associated with the communication + * identifier. + * + * Communication identifiers are used to track connection states, service + * ID resolution requests, and listen requests. + */ +struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler, + void *context); + +/** + * ib_destroy_cm_id - Destroy a connection identifier. + * @cm_id: Connection identifier to destroy. + * + * This call blocks until the connection identifier is destroyed. + */ +void ib_destroy_cm_id(struct ib_cm_id *cm_id); + +#define IB_SERVICE_ID_AGN_MASK __constant_cpu_to_be64(0xFF00000000000000ULL) +#define IB_CM_ASSIGN_SERVICE_ID __constant_cpu_to_be64(0x0200000000000000ULL) + +/** + * ib_cm_listen - Initiates listening on the specified service ID for + * connection and service ID resolution requests. + * @cm_id: Connection identifier associated with the listen request. + * @service_id: Service identifier matched against incoming connection + * and service ID resolution requests. The service ID should be specified + * network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will + * assign a service ID to the caller. + * @service_mask: Mask applied to service ID used to listen across a + * range of service IDs. If set to 0, the service ID is matched + * exactly. This parameter is ignored if %service_id is set to + * IB_CM_ASSIGN_SERVICE_ID. + */ +int ib_cm_listen(struct ib_cm_id *cm_id, + __be64 service_id, + __be64 service_mask); + +struct ib_cm_req_param { + struct ib_sa_path_rec *primary_path; + struct ib_sa_path_rec *alternate_path; + __be64 service_id; + u32 qp_num; + enum ib_qp_type_t qp_type; + u32 starting_psn; + const void *private_data; + u8 private_data_len; + u8 peer_to_peer; + u8 responder_resources; + u8 initiator_depth; + u8 remote_cm_response_timeout; + u8 flow_control; + u8 local_cm_response_timeout; + u8 retry_count; + u8 rnr_retry_count; + u8 max_cm_retries; + u8 srq; +}; + +/** + * ib_send_cm_req - Sends a connection request to the remote node. + * @cm_id: Connection identifier that will be associated with the + * connection request. + * @param: Connection request information needed to establish the + * connection. + */ +int ib_send_cm_req(struct ib_cm_id *cm_id, + struct ib_cm_req_param *param); + +struct ib_cm_rep_param { + u32 qp_num; + u32 starting_psn; + const void *private_data; + u8 private_data_len; + u8 responder_resources; + u8 initiator_depth; + u8 target_ack_delay; + u8 failover_accepted; + u8 flow_control; + u8 rnr_retry_count; + u8 srq; +}; + +/** + * ib_send_cm_rep - Sends a connection reply in response to a connection + * request. + * @cm_id: Connection identifier that will be associated with the + * connection request. + * @param: Connection reply information needed to establish the + * connection. + */ +int ib_send_cm_rep(struct ib_cm_id *cm_id, + struct ib_cm_rep_param *param); + +/** + * ib_send_cm_rtu - Sends a connection ready to use message in response + * to a connection reply message. + * @cm_id: Connection identifier associated with the connection request. + * @private_data: Optional user-defined private data sent with the + * ready to use message. + * @private_data_len: Size of the private data buffer, in bytes. + */ +int ib_send_cm_rtu(struct ib_cm_id *cm_id, + const void *private_data, + u8 private_data_len); + +/** + * ib_send_cm_dreq - Sends a disconnection request for an existing + * connection. + * @cm_id: Connection identifier associated with the connection being + * released. + * @private_data: Optional user-defined private data sent with the + * disconnection request message. + * @private_data_len: Size of the private data buffer, in bytes. + */ +int ib_send_cm_dreq(struct ib_cm_id *cm_id, + const void *private_data, + u8 private_data_len); + +/** + * ib_send_cm_drep - Sends a disconnection reply to a disconnection request. + * @cm_id: Connection identifier associated with the connection being + * released. + * @private_data: Optional user-defined private data sent with the + * disconnection reply message. + * @private_data_len: Size of the private data buffer, in bytes. + * + * If the cm_id is in the correct state, the CM will transition the connection + * to the timewait state, even if an error occurs sending the DREP message. + */ +int ib_send_cm_drep(struct ib_cm_id *cm_id, + const void *private_data, + u8 private_data_len); + +/** + * ib_cm_establish - Forces a connection state to established. + * @cm_id: Connection identifier to transition to established. + * + * This routine should be invoked by users who receive messages on a + * connected QP before an RTU has been received. + */ +int ib_cm_establish(struct ib_cm_id *cm_id); + +/** + * ib_send_cm_rej - Sends a connection rejection message to the + * remote node. + * @cm_id: Connection identifier associated with the connection being + * rejected. + * @reason: Reason for the connection request rejection. + * @ari: Optional additional rejection information. + * @ari_length: Size of the additional rejection information, in bytes. + * @private_data: Optional user-defined private data sent with the + * rejection message. + * @private_data_len: Size of the private data buffer, in bytes. + */ +int ib_send_cm_rej(struct ib_cm_id *cm_id, + enum ib_cm_rej_reason reason, + void *ari, + u8 ari_length, + const void *private_data, + u8 private_data_len); + +/** + * ib_send_cm_mra - Sends a message receipt acknowledgement to a connection + * message. + * @cm_id: Connection identifier associated with the connection message. + * @service_timeout: The maximum time required for the sender to reply to + * to the connection message. + * @private_data: Optional user-defined private data sent with the + * message receipt acknowledgement. + * @private_data_len: Size of the private data buffer, in bytes. + */ +int ib_send_cm_mra(struct ib_cm_id *cm_id, + u8 service_timeout, + const void *private_data, + u8 private_data_len); + +/** + * ib_send_cm_lap - Sends a load alternate path request. + * @cm_id: Connection identifier associated with the load alternate path + * message. + * @alternate_path: A path record that identifies the alternate path to + * load. + * @private_data: Optional user-defined private data sent with the + * load alternate path message. + * @private_data_len: Size of the private data buffer, in bytes. + */ +int ib_send_cm_lap(struct ib_cm_id *cm_id, + struct ib_sa_path_rec *alternate_path, + const void *private_data, + u8 private_data_len); + +/** + * ib_cm_init_qp_attr - Initializes the QP attributes for use in transitioning + * to a specified QP state. + * @cm_id: Communication identifier associated with the QP attributes to + * initialize. + * @qp_attr: On input, specifies the desired QP state. On output, the + * mandatory and desired optional attributes will be set in order to + * modify the QP to the specified state. + * @qp_attr_mask: The QP attribute mask that may be used to transition the + * QP to the specified state. + * + * Users must set the @qp_attr->qp_state to the desired QP state. This call + * will set all required attributes for the given transition, along with + * known optional attributes. Users may override the attributes returned from + * this call before calling ib_modify_qp. + */ +int ib_cm_init_qp_attr(struct ib_cm_id *cm_id, + struct ib_qp_attr *qp_attr, + int *qp_attr_mask); + +/** + * ib_send_cm_apr - Sends an alternate path response message in response to + * a load alternate path request. + * @cm_id: Connection identifier associated with the alternate path response. + * @status: Reply status sent with the alternate path response. + * @info: Optional additional information sent with the alternate path + * response. + * @info_length: Size of the additional information, in bytes. + * @private_data: Optional user-defined private data sent with the + * alternate path response message. + * @private_data_len: Size of the private data buffer, in bytes. + */ +int ib_send_cm_apr(struct ib_cm_id *cm_id, + enum ib_cm_apr_status status, + void *info, + u8 info_length, + const void *private_data, + u8 private_data_len); + +struct ib_cm_sidr_req_param { + struct ib_sa_path_rec *path; + __be64 service_id; + int timeout_ms; + const void *private_data; + u8 private_data_len; + u8 max_cm_retries; + u16 pkey; +}; + +/** + * ib_send_cm_sidr_req - Sends a service ID resolution request to the + * remote node. + * @cm_id: Communication identifier that will be associated with the + * service ID resolution request. + * @param: Service ID resolution request information. + */ +int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, + struct ib_cm_sidr_req_param *param); + +struct ib_cm_sidr_rep_param { + u32 qp_num; + u32 qkey; + enum ib_cm_sidr_status status; + const void *info; + u8 info_length; + const void *private_data; + u8 private_data_len; +}; + +/** + * ib_send_cm_sidr_rep - Sends a service ID resolution request to the + * remote node. + * @cm_id: Communication identifier associated with the received service ID + * resolution request. + * @param: Service ID resolution reply information. + */ +int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, + struct ib_cm_sidr_rep_param *param); + +#endif /* IB_CM_H */ diff --git a/branches/MTHCA/hw/mthca/kernel/ib_fmr_pool.h b/branches/MTHCA/hw/mthca/kernel/ib_fmr_pool.h new file mode 100644 index 00000000..6c9e24d6 --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/ib_fmr_pool.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_fmr_pool.h 2730 2005-06-28 16:43:03Z sean.hefty $ + */ + +#if !defined(IB_FMR_POOL_H) +#define IB_FMR_POOL_H + +#include + +struct ib_fmr_pool; + +/** + * struct ib_fmr_pool_param - Parameters for creating FMR pool + * @max_pages_per_fmr:Maximum number of pages per map request. + * @access:Access flags for FMRs in pool. + * @pool_size:Number of FMRs to allocate for pool. + * @dirty_watermark:Flush is triggered when @dirty_watermark dirty + * FMRs are present. + * @flush_function:Callback called when unmapped FMRs are flushed and + * more FMRs are possibly available for mapping + * @flush_arg:Context passed to user's flush function. + * @cache:If set, FMRs may be reused after unmapping for identical map + * requests. + */ +struct ib_fmr_pool_param { + int max_pages_per_fmr; + enum ib_access_flags access; + int pool_size; + int dirty_watermark; + void (*flush_function)(struct ib_fmr_pool *pool, + void * arg); + void *flush_arg; + unsigned cache:1; +}; + +struct ib_pool_fmr { + struct ib_fmr *fmr; + struct ib_fmr_pool *pool; + struct list_head list; + struct hlist_node cache_node; + int ref_count; + int remap_count; + u64 io_virtual_address; + int page_list_len; + u64 page_list[0]; +}; + +struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, + struct ib_fmr_pool_param *params); + +void ib_destroy_fmr_pool(struct ib_fmr_pool *pool); + +int ib_flush_fmr_pool(struct ib_fmr_pool *pool); + +struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle, + u64 *page_list, + int list_len, + u64 *io_virtual_address); + +int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr); + +#endif /* IB_FMR_POOL_H */ diff --git a/branches/MTHCA/hw/mthca/kernel/ib_mad.h b/branches/MTHCA/hw/mthca/kernel/ib_mad.h new file mode 100644 index 00000000..176250d5 --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/ib_mad.h @@ -0,0 +1,584 @@ +/* + * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_mad.h 2928 2005-07-28 18:45:56Z sean.hefty $ + */ + +#if !defined( IB_MAD_H ) +#define IB_MAD_H + +#include + +/* Management base version */ +#define IB_MGMT_BASE_VERSION 1 + +/* Management classes */ +#define IB_MGMT_CLASS_SUBN_LID_ROUTED 0x01 +#define IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE 0x81 +#define IB_MGMT_CLASS_SUBN_ADM 0x03 +#define IB_MGMT_CLASS_PERF_MGMT 0x04 +#define IB_MGMT_CLASS_BM 0x05 +#define IB_MGMT_CLASS_DEVICE_MGMT 0x06 +#define IB_MGMT_CLASS_CM 0x07 +#define IB_MGMT_CLASS_SNMP 0x08 +#define IB_MGMT_CLASS_VENDOR_RANGE2_START 0x30 +#define IB_MGMT_CLASS_VENDOR_RANGE2_END 0x4F + +#define IB_OPENIB_OUI (0x001405) + +/* Management methods */ +#define IB_MGMT_METHOD_GET 0x01 +#define IB_MGMT_METHOD_SET 0x02 +#define IB_MGMT_METHOD_GET_RESP 0x81 +#define IB_MGMT_METHOD_SEND 0x03 +#define IB_MGMT_METHOD_TRAP 0x05 +#define IB_MGMT_METHOD_REPORT 0x06 +#define IB_MGMT_METHOD_REPORT_RESP 0x86 +#define IB_MGMT_METHOD_TRAP_REPRESS 0x07 + +#define IB_MGMT_METHOD_RESP 0x80 + +#define IB_MGMT_MAX_METHODS 128 + +/* RMPP information */ +#define IB_MGMT_RMPP_VERSION 1 + +#define IB_MGMT_RMPP_TYPE_DATA 1 +#define IB_MGMT_RMPP_TYPE_ACK 2 +#define IB_MGMT_RMPP_TYPE_STOP 3 +#define IB_MGMT_RMPP_TYPE_ABORT 4 + +#define IB_MGMT_RMPP_FLAG_ACTIVE 1 +#define IB_MGMT_RMPP_FLAG_FIRST (1<<1) +#define IB_MGMT_RMPP_FLAG_LAST (1<<2) + +#define IB_MGMT_RMPP_NO_RESPTIME 0x1F + +#define IB_MGMT_RMPP_STATUS_SUCCESS 0 +#define IB_MGMT_RMPP_STATUS_RESX 1 +#define IB_MGMT_RMPP_STATUS_ABORT_MIN 118 +#define IB_MGMT_RMPP_STATUS_T2L 118 +#define IB_MGMT_RMPP_STATUS_BAD_LEN 119 +#define IB_MGMT_RMPP_STATUS_BAD_SEG 120 +#define IB_MGMT_RMPP_STATUS_BADT 121 +#define IB_MGMT_RMPP_STATUS_W2S 122 +#define IB_MGMT_RMPP_STATUS_S2B 123 +#define IB_MGMT_RMPP_STATUS_BAD_STATUS 124 +#define IB_MGMT_RMPP_STATUS_UNV 125 +#define IB_MGMT_RMPP_STATUS_TMR 126 +#define IB_MGMT_RMPP_STATUS_UNSPEC 127 +#define IB_MGMT_RMPP_STATUS_ABORT_MAX 127 + +#ifdef LINUX_TO_BE_REMOVED +// defined in ib_types.h +#define IB_QP0 0 +#define IB_QP1 __constant_htonl(1) +#endif +#define IB_QP1_QKEY 0x00000180 /* big endian */ +#define IB_QP_SET_QKEY 0x00000080 /* big endian */ + +struct ib_mad_hdr { + u8 base_version; + u8 mgmt_class; + u8 class_version; + u8 method; + __be16 status; + __be16 class_specific; + __be64 tid; + __be16 attr_id; + __be16 resv; + __be32 attr_mod; +}; + +struct ib_rmpp_hdr { + u8 rmpp_version; + u8 rmpp_type; + u8 rmpp_rtime_flags; + u8 rmpp_status; + __be32 seg_num; + __be32 paylen_newwin; +}; + +typedef u64 ib_sa_comp_mask; + +#define IB_SA_COMP_MASK(n) ((ib_sa_comp_mask) cl_hton64(1ull << n)) + +/* + * ib_sa_hdr and ib_sa_mad structures must be packed because they have + * 64-bit fields that are only 32-bit aligned. 64-bit architectures will + * lay them out wrong otherwise. (And unfortunately they are sent on + * the wire so we can't change the layout) + */ +#pragma pack(push,1) +struct ib_sa_hdr { + __be64 sm_key; + __be16 attr_offset; + __be16 reserved; + ib_sa_comp_mask comp_mask; +}; +#pragma pack(pop) + +struct ib_mad { + struct ib_mad_hdr mad_hdr; + u8 data[232]; +}; + +struct ib_rmpp_mad { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + u8 data[220]; +}; + +#pragma pack(push,1) +struct ib_sa_mad { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + struct ib_sa_hdr sa_hdr; + u8 data[200]; +}; +#pragma pack(pop) + +struct ib_vendor_mad { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + u8 reserved; + u8 oui[3]; + u8 data[216]; +}; + +/** + * ib_mad_send_buf - MAD data buffer and work request for sends. + * @mad: References an allocated MAD data buffer. The size of the data + * buffer is specified in the @send_wr.length field. + * @mapping: DMA mapping information. + * @mad_agent: MAD agent that allocated the buffer. + * @context: User-controlled context fields. + * @send_wr: An initialized work request structure used when sending the MAD. + * The wr_id field of the work request is initialized to reference this + * data structure. + * @sge: A scatter-gather list referenced by the work request. + * + * Users are responsible for initializing the MAD buffer itself, with the + * exception of specifying the payload length field in any RMPP MAD. + */ +struct ib_mad_send_buf { + struct ib_mad *mad; + dma_addr_t mapping; + struct ib_mad_agent *mad_agent; + void *context[2]; + struct _ib_send_wr send_wr; + struct ib_sge sge; +}; + +/** + * ib_get_rmpp_resptime - Returns the RMPP response time. + * @rmpp_hdr: An RMPP header. + */ +static inline u8 ib_get_rmpp_resptime(struct ib_rmpp_hdr *rmpp_hdr) +{ + return rmpp_hdr->rmpp_rtime_flags >> 3; +} + +/** + * ib_get_rmpp_flags - Returns the RMPP flags. + * @rmpp_hdr: An RMPP header. + */ +static inline u8 ib_get_rmpp_flags(struct ib_rmpp_hdr *rmpp_hdr) +{ + return rmpp_hdr->rmpp_rtime_flags & 0x7; +} + +/** + * ib_set_rmpp_resptime - Sets the response time in an RMPP header. + * @rmpp_hdr: An RMPP header. + * @rtime: The response time to set. + */ +static inline void ib_set_rmpp_resptime(struct ib_rmpp_hdr *rmpp_hdr, u8 rtime) +{ + rmpp_hdr->rmpp_rtime_flags = ib_get_rmpp_flags(rmpp_hdr) | (rtime << 3); +} + +/** + * ib_set_rmpp_flags - Sets the flags in an RMPP header. + * @rmpp_hdr: An RMPP header. + * @flags: The flags to set. + */ +static inline void ib_set_rmpp_flags(struct ib_rmpp_hdr *rmpp_hdr, u8 flags) +{ + rmpp_hdr->rmpp_rtime_flags = (rmpp_hdr->rmpp_rtime_flags & 0xF1) | + (flags & 0x7); +} + +struct ib_mad_agent; +struct ib_mad_send_wc; +struct ib_mad_recv_wc; + +/** + * ib_mad_send_handler - callback handler for a sent MAD. + * @mad_agent: MAD agent that sent the MAD. + * @mad_send_wc: Send work completion information on the sent MAD. + */ +typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent, + struct ib_mad_send_wc *mad_send_wc); + +/** + * ib_mad_snoop_handler - Callback handler for snooping sent MADs. + * @mad_agent: MAD agent that snooped the MAD. + * @send_wr: Work request information on the sent MAD. + * @mad_send_wc: Work completion information on the sent MAD. Valid + * only for snooping that occurs on a send completion. + * + * Clients snooping MADs should not modify data referenced by the @send_wr + * or @mad_send_wc. + */ +typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent, + struct ib_send_wr *send_wr, + struct ib_mad_send_wc *mad_send_wc); + +/** + * ib_mad_recv_handler - callback handler for a received MAD. + * @mad_agent: MAD agent requesting the received MAD. + * @mad_recv_wc: Received work completion information on the received MAD. + * + * MADs received in response to a send request operation will be handed to + * the user after the send operation completes. All data buffers given + * to registered agents through this routine are owned by the receiving + * client, except for snooping agents. Clients snooping MADs should not + * modify the data referenced by @mad_recv_wc. + */ +typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent, + struct ib_mad_recv_wc *mad_recv_wc); + +/** + * ib_mad_agent - Used to track MAD registration with the access layer. + * @device: Reference to device registration is on. + * @qp: Reference to QP used for sending and receiving MADs. + * @mr: Memory region for system memory usable for DMA. + * @recv_handler: Callback handler for a received MAD. + * @send_handler: Callback handler for a sent MAD. + * @snoop_handler: Callback handler for snooped sent MADs. + * @context: User-specified context associated with this registration. + * @hi_tid: Access layer assigned transaction ID for this client. + * Unsolicited MADs sent by this client will have the upper 32-bits + * of their TID set to this value. + * @port_num: Port number on which QP is registered + * @rmpp_version: If set, indicates the RMPP version used by this agent. + */ +struct ib_mad_agent { + struct ib_device *device; + struct ib_qp *qp; + struct ib_mr *mr; + ib_mad_recv_handler recv_handler; + ib_mad_send_handler send_handler; + ib_mad_snoop_handler snoop_handler; + void *context; + u32 hi_tid; + u8 port_num; + u8 rmpp_version; +}; + +/** + * ib_mad_send_wc - MAD send completion information. + * @wr_id: Work request identifier associated with the send MAD request. + * @status: Completion status. + * @vendor_err: Optional vendor error information returned with a failed + * request. + */ +struct ib_mad_send_wc { + u64 wr_id; + enum ib_wc_status status; + u32 vendor_err; +}; + +/** + * ib_mad_recv_buf - received MAD buffer information. + * @list: Reference to next data buffer for a received RMPP MAD. + * @grh: References a data buffer containing the global route header. + * The data refereced by this buffer is only valid if the GRH is + * valid. + * @mad: References the start of the received MAD. + */ +struct ib_mad_recv_buf { + struct list_head list; + struct ib_grh *grh; + struct ib_mad *mad; +}; + +/** + * ib_mad_recv_wc - received MAD information. + * @wc: Completion information for the received data. + * @recv_buf: Specifies the location of the received data buffer(s). + * @rmpp_list: Specifies a list of RMPP reassembled received MAD buffers. + * @mad_len: The length of the received MAD, without duplicated headers. + * + * For received response, the wr_id field of the wc is set to the wr_id + * for the corresponding send request. + */ +struct ib_mad_recv_wc { + struct _ib_wc *wc; + struct ib_mad_recv_buf recv_buf; + struct list_head rmpp_list; + int mad_len; +}; + +/** + * ib_mad_reg_req - MAD registration request + * @mgmt_class: Indicates which management class of MADs should be receive + * by the caller. This field is only required if the user wishes to + * receive unsolicited MADs, otherwise it should be 0. + * @mgmt_class_version: Indicates which version of MADs for the given + * management class to receive. + * @oui: Indicates IEEE OUI when mgmt_class is a vendor class + * in the range from 0x30 to 0x4f. Otherwise not used. + * @method_mask: The caller will receive unsolicited MADs for any method + * where @method_mask = 1. + */ +struct ib_mad_reg_req { + u8 mgmt_class; + u8 mgmt_class_version; + u8 oui[3]; + DECLARE_BITMAP(method_mask, IB_MGMT_MAX_METHODS); +}; + +/** + * ib_register_mad_agent - Register to send/receive MADs. + * @device: The device to register with. + * @port_num: The port on the specified device to use. + * @qp_type: Specifies which QP to access. Must be either + * IB_QPT_QP0 or IB_QPT_QP1. + * @mad_reg_req: Specifies which unsolicited MADs should be received + * by the caller. This parameter may be NULL if the caller only + * wishes to receive solicited responses. + * @rmpp_version: If set, indicates that the client will send + * and receive MADs that contain the RMPP header for the given version. + * If set to 0, indicates that RMPP is not used by this client. + * @send_handler: The completion callback routine invoked after a send + * request has completed. + * @recv_handler: The completion callback routine invoked for a received + * MAD. + * @context: User specified context associated with the registration. + */ +struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, + u8 port_num, + enum ib_qp_type_t qp_type, + struct ib_mad_reg_req *mad_reg_req, + u8 rmpp_version, + ib_mad_send_handler send_handler, + ib_mad_recv_handler recv_handler, + void *context); + +enum ib_mad_snoop_flags { + /*IB_MAD_SNOOP_POSTED_SENDS = 1,*/ + /*IB_MAD_SNOOP_RMPP_SENDS = (1<<1),*/ + IB_MAD_SNOOP_SEND_COMPLETIONS = (1<<2), + /*IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS = (1<<3),*/ + IB_MAD_SNOOP_RECVS = (1<<4) + /*IB_MAD_SNOOP_RMPP_RECVS = (1<<5),*/ + /*IB_MAD_SNOOP_REDIRECTED_QPS = (1<<6)*/ +}; + +/** + * ib_register_mad_snoop - Register to snoop sent and received MADs. + * @device: The device to register with. + * @port_num: The port on the specified device to use. + * @qp_type: Specifies which QP traffic to snoop. Must be either + * IB_QPT_QP0 or IB_QPT_QP1. + * @mad_snoop_flags: Specifies information where snooping occurs. + * @send_handler: The callback routine invoked for a snooped send. + * @recv_handler: The callback routine invoked for a snooped receive. + * @context: User specified context associated with the registration. + */ +struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device, + u8 port_num, + enum ib_qp_type_t qp_type, + int mad_snoop_flags, + ib_mad_snoop_handler snoop_handler, + ib_mad_recv_handler recv_handler, + void *context); + +/** + * ib_unregister_mad_agent - Unregisters a client from using MAD services. + * @mad_agent: Corresponding MAD registration request to deregister. + * + * After invoking this routine, MAD services are no longer usable by the + * client on the associated QP. + */ +int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent); + +/** + * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated + * with the registered client. + * @mad_agent: Specifies the associated registration to post the send to. + * @send_wr: Specifies the information needed to send the MAD(s). + * @bad_send_wr: Specifies the MAD on which an error was encountered. + * + * Sent MADs are not guaranteed to complete in the order that they were posted. + * + * If the MAD requires RMPP, the data buffer should contain a single copy + * of the common MAD, RMPP, and class specific headers, followed by the class + * defined data. If the class defined data would not divide evenly into + * RMPP segments, then space must be allocated at the end of the referenced + * buffer for any required padding. To indicate the amount of class defined + * data being transferred, the paylen_newwin field in the RMPP header should + * be set to the size of the class specific header plus the amount of class + * defined data being transferred. The paylen_newwin field should be + * specified in network-byte order. + */ +int ib_post_send_mad(struct ib_mad_agent *mad_agent, + struct ib_send_wr *send_wr, + struct ib_send_wr **bad_send_wr); + +/** + * ib_coalesce_recv_mad - Coalesces received MAD data into a single buffer. + * @mad_recv_wc: Work completion information for a received MAD. + * @buf: User-provided data buffer to receive the coalesced buffers. The + * referenced buffer should be at least the size of the mad_len specified + * by @mad_recv_wc. + * + * This call copies a chain of received MAD segments into a single data buffer, + * removing duplicated headers. + */ +void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc, void *buf); + +/** + * ib_free_recv_mad - Returns data buffers used to receive a MAD. + * @mad_recv_wc: Work completion information for a received MAD. + * + * Clients receiving MADs through their ib_mad_recv_handler must call this + * routine to return the work completion buffers to the access layer. + */ +void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc); + +/** + * ib_cancel_mad - Cancels an outstanding send MAD operation. + * @mad_agent: Specifies the registration associated with sent MAD. + * @wr_id: Indicates the work request identifier of the MAD to cancel. + * + * MADs will be returned to the user through the corresponding + * ib_mad_send_handler. + */ +void ib_cancel_mad(struct ib_mad_agent *mad_agent, u64 wr_id); + +/** + * ib_modify_mad - Modifies an outstanding send MAD operation. + * @mad_agent: Specifies the registration associated with sent MAD. + * @wr_id: Indicates the work request identifier of the MAD to modify. + * @timeout_ms: New timeout value for sent MAD. + * + * This call will reset the timeout value for a sent MAD to the specified + * value. + */ +int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms); + +/** + * ib_redirect_mad_qp - Registers a QP for MAD services. + * @qp: Reference to a QP that requires MAD services. + * @rmpp_version: If set, indicates that the client will send + * and receive MADs that contain the RMPP header for the given version. + * If set to 0, indicates that RMPP is not used by this client. + * @send_handler: The completion callback routine invoked after a send + * request has completed. + * @recv_handler: The completion callback routine invoked for a received + * MAD. + * @context: User specified context associated with the registration. + * + * Use of this call allows clients to use MAD services, such as RMPP, + * on user-owned QPs. After calling this routine, users may send + * MADs on the specified QP by calling ib_mad_post_send. + */ +struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp, + u8 rmpp_version, + ib_mad_send_handler send_handler, + ib_mad_recv_handler recv_handler, + void *context); + +/** + * ib_process_mad_wc - Processes a work completion associated with a + * MAD sent or received on a redirected QP. + * @mad_agent: Specifies the registered MAD service using the redirected QP. + * @wc: References a work completion associated with a sent or received + * MAD segment. + * + * This routine is used to complete or continue processing on a MAD request. + * If the work completion is associated with a send operation, calling + * this routine is required to continue an RMPP transfer or to wait for a + * corresponding response, if it is a request. If the work completion is + * associated with a receive operation, calling this routine is required to + * process an inbound or outbound RMPP transfer, or to match a response MAD + * with its corresponding request. + */ +int ib_process_mad_wc(struct ib_mad_agent *mad_agent, + struct _ib_wc *wc); + +/** + * ib_create_send_mad - Allocate and initialize a data buffer and work request + * for sending a MAD. + * @mad_agent: Specifies the registered MAD service to associate with the MAD. + * @remote_qpn: Specifies the QPN of the receiving node. + * @pkey_index: Specifies which PKey the MAD will be sent using. This field + * is valid only if the remote_qpn is QP 1. + * @ah: References the address handle used to transfer to the remote node. + * @rmpp_active: Indicates if the send will enable RMPP. + * @hdr_len: Indicates the size of the data header of the MAD. This length + * should include the common MAD header, RMPP header, plus any class + * specific header. + * @data_len: Indicates the size of any user-transferred data. The call will + * automatically adjust the allocated buffer size to account for any + * additional padding that may be necessary. + * @gfp_mask: GFP mask used for the memory allocation. + * + * This is a helper routine that may be used to allocate a MAD. Users are + * not required to allocate outbound MADs using this call. The returned + * MAD send buffer will reference a data buffer usable for sending a MAD, along + * with an initialized work request structure. Users may modify the returned + * MAD data buffer or work request before posting the send. + * + * The returned data buffer will be cleared. Users are responsible for + * initializing the common MAD and any class specific headers. If @rmpp_active + * is set, the RMPP header will be initialized for sending. + */ +struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, + u32 remote_qpn, u16 pkey_index, + struct ib_ah *ah, int rmpp_active, + int hdr_len, int data_len, + unsigned int gfp_mask); + +/** + * ib_free_send_mad - Returns data buffers used to send a MAD. + * @send_buf: Previously allocated send data buffer. + */ +void ib_free_send_mad(struct ib_mad_send_buf *send_buf); + +#endif /* IB_MAD_H */ diff --git a/branches/MTHCA/hw/mthca/kernel/ib_pack.h b/branches/MTHCA/hw/mthca/kernel/ib_pack.h new file mode 100644 index 00000000..fe480f3e --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/ib_pack.h @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_pack.h 1349 2004-12-16 21:09:43Z roland $ + */ + +#ifndef IB_PACK_H +#define IB_PACK_H + +#include + +enum { + IB_LRH_BYTES = 8, + IB_GRH_BYTES = 40, + IB_BTH_BYTES = 12, + IB_DETH_BYTES = 8 +}; + +struct ib_field { + size_t struct_offset_bytes; + size_t struct_size_bytes; + int offset_words; + int offset_bits; + int size_bits; + char *field_name; +}; + +#define RESERVED \ + .field_name = "reserved" + +/* + * This macro cleans up the definitions of constants for BTH opcodes. + * It is used to define constants such as IB_OPCODE_UD_SEND_ONLY, + * which becomes IB_OPCODE_UD + IB_OPCODE_SEND_ONLY, and this gives + * the correct value. + * + * In short, user code should use the constants defined using the + * macro rather than worrying about adding together other constants. +*/ +#define IB_OPCODE(transport, op) \ + IB_OPCODE_ ## transport ## _ ## op = \ + IB_OPCODE_ ## transport + IB_OPCODE_ ## op + +enum { + /* transport types -- just used to define real constants */ + IB_OPCODE_RC = 0x00, + IB_OPCODE_UC = 0x20, + IB_OPCODE_RD = 0x40, + IB_OPCODE_UD = 0x60, + + /* operations -- just used to define real constants */ + IB_OPCODE_SEND_FIRST = 0x00, + IB_OPCODE_SEND_MIDDLE = 0x01, + IB_OPCODE_SEND_LAST = 0x02, + IB_OPCODE_SEND_LAST_WITH_IMMEDIATE = 0x03, + IB_OPCODE_SEND_ONLY = 0x04, + IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE = 0x05, + IB_OPCODE_RDMA_WRITE_FIRST = 0x06, + IB_OPCODE_RDMA_WRITE_MIDDLE = 0x07, + IB_OPCODE_RDMA_WRITE_LAST = 0x08, + IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE = 0x09, + IB_OPCODE_RDMA_WRITE_ONLY = 0x0a, + IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE = 0x0b, + IB_OPCODE_RDMA_READ_REQUEST = 0x0c, + IB_OPCODE_RDMA_READ_RESPONSE_FIRST = 0x0d, + IB_OPCODE_RDMA_READ_RESPONSE_MIDDLE = 0x0e, + IB_OPCODE_RDMA_READ_RESPONSE_LAST = 0x0f, + IB_OPCODE_RDMA_READ_RESPONSE_ONLY = 0x10, + IB_OPCODE_ACKNOWLEDGE = 0x11, + IB_OPCODE_ATOMIC_ACKNOWLEDGE = 0x12, + IB_OPCODE_COMPARE_SWAP = 0x13, + IB_OPCODE_FETCH_ADD = 0x14, + + /* real constants follow -- see comment about above IB_OPCODE() + macro for more details */ + + /* RC */ + IB_OPCODE(RC, SEND_FIRST), + IB_OPCODE(RC, SEND_MIDDLE), + IB_OPCODE(RC, SEND_LAST), + IB_OPCODE(RC, SEND_LAST_WITH_IMMEDIATE), + IB_OPCODE(RC, SEND_ONLY), + IB_OPCODE(RC, SEND_ONLY_WITH_IMMEDIATE), + IB_OPCODE(RC, RDMA_WRITE_FIRST), + IB_OPCODE(RC, RDMA_WRITE_MIDDLE), + IB_OPCODE(RC, RDMA_WRITE_LAST), + IB_OPCODE(RC, RDMA_WRITE_LAST_WITH_IMMEDIATE), + IB_OPCODE(RC, RDMA_WRITE_ONLY), + IB_OPCODE(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE), + IB_OPCODE(RC, RDMA_READ_REQUEST), + IB_OPCODE(RC, RDMA_READ_RESPONSE_FIRST), + IB_OPCODE(RC, RDMA_READ_RESPONSE_MIDDLE), + IB_OPCODE(RC, RDMA_READ_RESPONSE_LAST), + IB_OPCODE(RC, RDMA_READ_RESPONSE_ONLY), + IB_OPCODE(RC, ACKNOWLEDGE), + IB_OPCODE(RC, ATOMIC_ACKNOWLEDGE), + IB_OPCODE(RC, COMPARE_SWAP), + IB_OPCODE(RC, FETCH_ADD), + + /* UC */ + IB_OPCODE(UC, SEND_FIRST), + IB_OPCODE(UC, SEND_MIDDLE), + IB_OPCODE(UC, SEND_LAST), + IB_OPCODE(UC, SEND_LAST_WITH_IMMEDIATE), + IB_OPCODE(UC, SEND_ONLY), + IB_OPCODE(UC, SEND_ONLY_WITH_IMMEDIATE), + IB_OPCODE(UC, RDMA_WRITE_FIRST), + IB_OPCODE(UC, RDMA_WRITE_MIDDLE), + IB_OPCODE(UC, RDMA_WRITE_LAST), + IB_OPCODE(UC, RDMA_WRITE_LAST_WITH_IMMEDIATE), + IB_OPCODE(UC, RDMA_WRITE_ONLY), + IB_OPCODE(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE), + + /* RD */ + IB_OPCODE(RD, SEND_FIRST), + IB_OPCODE(RD, SEND_MIDDLE), + IB_OPCODE(RD, SEND_LAST), + IB_OPCODE(RD, SEND_LAST_WITH_IMMEDIATE), + IB_OPCODE(RD, SEND_ONLY), + IB_OPCODE(RD, SEND_ONLY_WITH_IMMEDIATE), + IB_OPCODE(RD, RDMA_WRITE_FIRST), + IB_OPCODE(RD, RDMA_WRITE_MIDDLE), + IB_OPCODE(RD, RDMA_WRITE_LAST), + IB_OPCODE(RD, RDMA_WRITE_LAST_WITH_IMMEDIATE), + IB_OPCODE(RD, RDMA_WRITE_ONLY), + IB_OPCODE(RD, RDMA_WRITE_ONLY_WITH_IMMEDIATE), + IB_OPCODE(RD, RDMA_READ_REQUEST), + IB_OPCODE(RD, RDMA_READ_RESPONSE_FIRST), + IB_OPCODE(RD, RDMA_READ_RESPONSE_MIDDLE), + IB_OPCODE(RD, RDMA_READ_RESPONSE_LAST), + IB_OPCODE(RD, RDMA_READ_RESPONSE_ONLY), + IB_OPCODE(RD, ACKNOWLEDGE), + IB_OPCODE(RD, ATOMIC_ACKNOWLEDGE), + IB_OPCODE(RD, COMPARE_SWAP), + IB_OPCODE(RD, FETCH_ADD), + + /* UD */ + IB_OPCODE(UD, SEND_ONLY), + IB_OPCODE(UD, SEND_ONLY_WITH_IMMEDIATE) +}; + +enum { + IB_LNH_RAW = 0, + IB_LNH_IP = 1, + IB_LNH_IBA_LOCAL = 2, + IB_LNH_IBA_GLOBAL = 3 +}; + +struct ib_unpacked_lrh { + u8 virtual_lane; + u8 link_version; + u8 service_level; + u8 link_next_header; + __be16 destination_lid; + __be16 packet_length; + __be16 source_lid; +}; + +struct ib_unpacked_grh { + u8 ip_version; + u8 traffic_class; + __be32 flow_label; + __be16 payload_length; + u8 next_header; + u8 hop_limit; + union ib_gid source_gid; + union ib_gid destination_gid; +}; + +struct ib_unpacked_bth { + u8 opcode; + u8 solicited_event; + u8 mig_req; + u8 pad_count; + u8 transport_header_version; + __be16 pkey; + __be32 destination_qpn; + u8 ack_req; + __be32 psn; +}; + +struct ib_unpacked_deth { + __be32 qkey; + __be32 source_qpn; +}; + +struct ib_ud_header { + struct ib_unpacked_lrh lrh; + int grh_present; + struct ib_unpacked_grh grh; + struct ib_unpacked_bth bth; + struct ib_unpacked_deth deth; + int immediate_present; + __be32 immediate_data; +}; + +void ib_pack(const struct ib_field *desc, + int desc_len, + void *structure, + void *buf); + +void ib_unpack(const struct ib_field *desc, + int desc_len, + void *buf, + void *structure); + +void ib_ud_header_init(int payload_bytes, + int grh_present, + struct ib_ud_header *header); + +int ib_ud_header_pack(struct ib_ud_header *header, + void *buf); + +int ib_ud_header_unpack(void *buf, + struct ib_ud_header *header); + +#endif /* IB_PACK_H */ diff --git a/branches/MTHCA/hw/mthca/kernel/ib_sa.h b/branches/MTHCA/hw/mthca/kernel/ib_sa.h new file mode 100644 index 00000000..08962d22 --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/ib_sa.h @@ -0,0 +1,371 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_sa.h 2928 2005-07-28 18:45:56Z sean.hefty $ + */ + +#ifndef IB_SA_H +#define IB_SA_H + +#include +#include + +enum { + IB_SA_CLASS_VERSION = 2, /* IB spec version 1.1/1.2 */ + + IB_SA_METHOD_GET_TABLE = 0x12, + IB_SA_METHOD_GET_TABLE_RESP = 0x92, + IB_SA_METHOD_DELETE = 0x15 +}; + +enum ib_sa_selector { + IB_SA_GTE = 0, + IB_SA_LTE = 1, + IB_SA_EQ = 2, + /* + * The meaning of "best" depends on the attribute: for + * example, for MTU best will return the largest available + * MTU, while for packet life time, best will return the + * smallest available life time. + */ + IB_SA_BEST = 3 +}; + +enum ib_sa_rate { + IB_SA_RATE_2_5_GBPS = 2, + IB_SA_RATE_5_GBPS = 5, + IB_SA_RATE_10_GBPS = 3, + IB_SA_RATE_20_GBPS = 6, + IB_SA_RATE_30_GBPS = 4, + IB_SA_RATE_40_GBPS = 7, + IB_SA_RATE_60_GBPS = 8, + IB_SA_RATE_80_GBPS = 9, + IB_SA_RATE_120_GBPS = 10 +}; + +static inline int ib_sa_rate_enum_to_int(enum ib_sa_rate rate) +{ + switch (rate) { + case IB_SA_RATE_2_5_GBPS: return 1; + case IB_SA_RATE_5_GBPS: return 2; + case IB_SA_RATE_10_GBPS: return 4; + case IB_SA_RATE_20_GBPS: return 8; + case IB_SA_RATE_30_GBPS: return 12; + case IB_SA_RATE_40_GBPS: return 16; + case IB_SA_RATE_60_GBPS: return 24; + case IB_SA_RATE_80_GBPS: return 32; + case IB_SA_RATE_120_GBPS: return 48; + default: return -1; + } +} + +/* + * Structures for SA records are named "struct ib_sa_xxx_rec." No + * attempt is made to pack structures to match the physical layout of + * SA records in SA MADs; all packing and unpacking is handled by the + * SA query code. + * + * For a record with structure ib_sa_xxx_rec, the naming convention + * for the component mask value for field yyy is IB_SA_XXX_REC_YYY (we + * never use different abbreviations or otherwise change the spelling + * of xxx/yyy between ib_sa_xxx_rec.yyy and IB_SA_XXX_REC_YYY). + * + * Reserved rows are indicated with comments to help maintainability. + */ + +/* reserved: 0 */ +/* reserved: 1 */ +#define IB_SA_PATH_REC_DGID IB_SA_COMP_MASK( 2) +#define IB_SA_PATH_REC_SGID IB_SA_COMP_MASK( 3) +#define IB_SA_PATH_REC_DLID IB_SA_COMP_MASK( 4) +#define IB_SA_PATH_REC_SLID IB_SA_COMP_MASK( 5) +#define IB_SA_PATH_REC_RAW_TRAFFIC IB_SA_COMP_MASK( 6) +/* reserved: 7 */ +#define IB_SA_PATH_REC_FLOW_LABEL IB_SA_COMP_MASK( 8) +#define IB_SA_PATH_REC_HOP_LIMIT IB_SA_COMP_MASK( 9) +#define IB_SA_PATH_REC_TRAFFIC_CLASS IB_SA_COMP_MASK(10) +#define IB_SA_PATH_REC_REVERSIBLE IB_SA_COMP_MASK(11) +#define IB_SA_PATH_REC_NUMB_PATH IB_SA_COMP_MASK(12) +#define IB_SA_PATH_REC_PKEY IB_SA_COMP_MASK(13) +/* reserved: 14 */ +#define IB_SA_PATH_REC_SL IB_SA_COMP_MASK(15) +#define IB_SA_PATH_REC_MTU_SELECTOR IB_SA_COMP_MASK(16) +#define IB_SA_PATH_REC_MTU IB_SA_COMP_MASK(17) +#define IB_SA_PATH_REC_RATE_SELECTOR IB_SA_COMP_MASK(18) +#define IB_SA_PATH_REC_RATE IB_SA_COMP_MASK(19) +#define IB_SA_PATH_REC_PACKET_LIFE_TIME_SELECTOR IB_SA_COMP_MASK(20) +#define IB_SA_PATH_REC_PACKET_LIFE_TIME IB_SA_COMP_MASK(21) +#define IB_SA_PATH_REC_PREFERENCE IB_SA_COMP_MASK(22) + +struct ib_sa_path_rec { + /* reserved */ + /* reserved */ + union ib_gid dgid; + union ib_gid sgid; + __be16 dlid; + __be16 slid; + int raw_traffic; + /* reserved */ + __be32 flow_label; + u8 hop_limit; + u8 traffic_class; + int reversible; + u8 numb_path; + __be16 pkey; + /* reserved */ + u8 sl; + u8 mtu_selector; + u8 mtu; + u8 rate_selector; + u8 rate; + u8 packet_life_time_selector; + u8 packet_life_time; + u8 preference; +}; + +#define IB_SA_MCMEMBER_REC_MGID IB_SA_COMP_MASK( 0) +#define IB_SA_MCMEMBER_REC_PORT_GID IB_SA_COMP_MASK( 1) +#define IB_SA_MCMEMBER_REC_QKEY IB_SA_COMP_MASK( 2) +#define IB_SA_MCMEMBER_REC_MLID IB_SA_COMP_MASK( 3) +#define IB_SA_MCMEMBER_REC_MTU_SELECTOR IB_SA_COMP_MASK( 4) +#define IB_SA_MCMEMBER_REC_MTU IB_SA_COMP_MASK( 5) +#define IB_SA_MCMEMBER_REC_TRAFFIC_CLASS IB_SA_COMP_MASK( 6) +#define IB_SA_MCMEMBER_REC_PKEY IB_SA_COMP_MASK( 7) +#define IB_SA_MCMEMBER_REC_RATE_SELECTOR IB_SA_COMP_MASK( 8) +#define IB_SA_MCMEMBER_REC_RATE IB_SA_COMP_MASK( 9) +#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR IB_SA_COMP_MASK(10) +#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME IB_SA_COMP_MASK(11) +#define IB_SA_MCMEMBER_REC_SL IB_SA_COMP_MASK(12) +#define IB_SA_MCMEMBER_REC_FLOW_LABEL IB_SA_COMP_MASK(13) +#define IB_SA_MCMEMBER_REC_HOP_LIMIT IB_SA_COMP_MASK(14) +#define IB_SA_MCMEMBER_REC_SCOPE IB_SA_COMP_MASK(15) +#define IB_SA_MCMEMBER_REC_JOIN_STATE IB_SA_COMP_MASK(16) +#define IB_SA_MCMEMBER_REC_PROXY_JOIN IB_SA_COMP_MASK(17) + +struct ib_sa_mcmember_rec { + union ib_gid mgid; + union ib_gid port_gid; + __be32 qkey; + __be16 mlid; + u8 mtu_selector; + u8 mtu; + u8 traffic_class; + __be16 pkey; + u8 rate_selector; + u8 rate; + u8 packet_life_time_selector; + u8 packet_life_time; + u8 sl; + __be32 flow_label; + u8 hop_limit; + u8 scope; + u8 join_state; + int proxy_join; +}; + +/* Service Record Component Mask Sec 15.2.5.14 Ver 1.1 */ +#define IB_SA_SERVICE_REC_SERVICE_ID IB_SA_COMP_MASK( 0) +#define IB_SA_SERVICE_REC_SERVICE_GID IB_SA_COMP_MASK( 1) +#define IB_SA_SERVICE_REC_SERVICE_PKEY IB_SA_COMP_MASK( 2) +/* reserved: 3 */ +#define IB_SA_SERVICE_REC_SERVICE_LEASE IB_SA_COMP_MASK( 4) +#define IB_SA_SERVICE_REC_SERVICE_KEY IB_SA_COMP_MASK( 5) +#define IB_SA_SERVICE_REC_SERVICE_NAME IB_SA_COMP_MASK( 6) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_0 IB_SA_COMP_MASK( 7) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_1 IB_SA_COMP_MASK( 8) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_2 IB_SA_COMP_MASK( 9) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_3 IB_SA_COMP_MASK(10) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_4 IB_SA_COMP_MASK(11) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_5 IB_SA_COMP_MASK(12) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_6 IB_SA_COMP_MASK(13) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_7 IB_SA_COMP_MASK(14) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_8 IB_SA_COMP_MASK(15) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_9 IB_SA_COMP_MASK(16) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_10 IB_SA_COMP_MASK(17) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_11 IB_SA_COMP_MASK(18) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_12 IB_SA_COMP_MASK(19) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_13 IB_SA_COMP_MASK(20) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_14 IB_SA_COMP_MASK(21) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_15 IB_SA_COMP_MASK(22) +#define IB_SA_SERVICE_REC_SERVICE_DATA16_0 IB_SA_COMP_MASK(23) +#define IB_SA_SERVICE_REC_SERVICE_DATA16_1 IB_SA_COMP_MASK(24) +#define IB_SA_SERVICE_REC_SERVICE_DATA16_2 IB_SA_COMP_MASK(25) +#define IB_SA_SERVICE_REC_SERVICE_DATA16_3 IB_SA_COMP_MASK(26) +#define IB_SA_SERVICE_REC_SERVICE_DATA16_4 IB_SA_COMP_MASK(27) +#define IB_SA_SERVICE_REC_SERVICE_DATA16_5 IB_SA_COMP_MASK(28) +#define IB_SA_SERVICE_REC_SERVICE_DATA16_6 IB_SA_COMP_MASK(29) +#define IB_SA_SERVICE_REC_SERVICE_DATA16_7 IB_SA_COMP_MASK(30) +#define IB_SA_SERVICE_REC_SERVICE_DATA32_0 IB_SA_COMP_MASK(31) +#define IB_SA_SERVICE_REC_SERVICE_DATA32_1 IB_SA_COMP_MASK(32) +#define IB_SA_SERVICE_REC_SERVICE_DATA32_2 IB_SA_COMP_MASK(33) +#define IB_SA_SERVICE_REC_SERVICE_DATA32_3 IB_SA_COMP_MASK(34) +#define IB_SA_SERVICE_REC_SERVICE_DATA64_0 IB_SA_COMP_MASK(35) +#define IB_SA_SERVICE_REC_SERVICE_DATA64_1 IB_SA_COMP_MASK(36) + +#define IB_DEFAULT_SERVICE_LEASE 0xFFFFFFFF + +struct ib_sa_service_rec { + u64 id; + union ib_gid gid; + __be16 pkey; + /* reserved */ + u32 lease; + u8 key[16]; + u8 name[64]; + u8 data8[16]; + u16 data16[8]; + u32 data32[4]; + u64 data64[2]; +}; + +struct ib_sa_query; + +void ib_sa_cancel_query(int id, struct ib_sa_query *query); + +int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, + struct ib_sa_path_rec *rec, + ib_sa_comp_mask comp_mask, + int timeout_ms, unsigned int gfp_mask, + void (*callback)(int status, + struct ib_sa_path_rec *resp, + void *context), + void *context, + struct ib_sa_query **query); + +int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, + u8 method, + struct ib_sa_mcmember_rec *rec, + ib_sa_comp_mask comp_mask, + int timeout_ms, unsigned int gfp_mask, + void (*callback)(int status, + struct ib_sa_mcmember_rec *resp, + void *context), + void *context, + struct ib_sa_query **query); + +int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, + u8 method, + struct ib_sa_service_rec *rec, + ib_sa_comp_mask comp_mask, + int timeout_ms, unsigned int gfp_mask, + void (*callback)(int status, + struct ib_sa_service_rec *resp, + void *context), + void *context, + struct ib_sa_query **sa_query); + +/** + * ib_sa_mcmember_rec_set - Start an MCMember set query + * @device:device to send query on + * @port_num: port number to send query on + * @rec:MCMember Record to send in query + * @comp_mask:component mask to send in query + * @timeout_ms:time to wait for response + * @gfp_mask:GFP mask to use for internal allocations + * @callback:function called when query completes, times out or is + * canceled + * @context:opaque user context passed to callback + * @sa_query:query context, used to cancel query + * + * Send an MCMember Set query to the SA (eg to join a multicast + * group). The callback function will be called when the query + * completes (or fails); status is 0 for a successful response, -EINTR + * if the query is canceled, -ETIMEDOUT is the query timed out, or + * -EIO if an error occurred sending the query. The resp parameter of + * the callback is only valid if status is 0. + * + * If the return value of ib_sa_mcmember_rec_set() is negative, it is + * an error code. Otherwise it is a query ID that can be used to + * cancel the query. + */ +static inline int +ib_sa_mcmember_rec_set(struct ib_device *device, u8 port_num, + struct ib_sa_mcmember_rec *rec, + ib_sa_comp_mask comp_mask, + int timeout_ms, unsigned int gfp_mask, + void (*callback)(int status, + struct ib_sa_mcmember_rec *resp, + void *context), + void *context, + struct ib_sa_query **query) +{ + return ib_sa_mcmember_rec_query(device, port_num, + IB_MGMT_METHOD_SET, + rec, comp_mask, + timeout_ms, gfp_mask, callback, + context, query); +} + +/** + * ib_sa_mcmember_rec_delete - Start an MCMember delete query + * @device:device to send query on + * @port_num: port number to send query on + * @rec:MCMember Record to send in query + * @comp_mask:component mask to send in query + * @timeout_ms:time to wait for response + * @gfp_mask:GFP mask to use for internal allocations + * @callback:function called when query completes, times out or is + * canceled + * @context:opaque user context passed to callback + * @sa_query:query context, used to cancel query + * + * Send an MCMember Delete query to the SA (eg to leave a multicast + * group). The callback function will be called when the query + * completes (or fails); status is 0 for a successful response, -EINTR + * if the query is canceled, -ETIMEDOUT is the query timed out, or + * -EIO if an error occurred sending the query. The resp parameter of + * the callback is only valid if status is 0. + * + * If the return value of ib_sa_mcmember_rec_delete() is negative, it + * is an error code. Otherwise it is a query ID that can be used to + * cancel the query. + */ +static inline int +ib_sa_mcmember_rec_delete(struct ib_device *device, u8 port_num, + struct ib_sa_mcmember_rec *rec, + ib_sa_comp_mask comp_mask, + int timeout_ms, unsigned int gfp_mask, + void (*callback)(int status, + struct ib_sa_mcmember_rec *resp, + void *context), + void *context, + struct ib_sa_query **query) +{ + return ib_sa_mcmember_rec_query(device, port_num, + IB_SA_METHOD_DELETE, + rec, comp_mask, + timeout_ms, gfp_mask, callback, + context, query); +} + + +#endif /* IB_SA_H */ diff --git a/branches/MTHCA/hw/mthca/kernel/ib_smi.h b/branches/MTHCA/hw/mthca/kernel/ib_smi.h new file mode 100644 index 00000000..66ff9d94 --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/ib_smi.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_smi.h 2928 2005-07-28 18:45:56Z sean.hefty $ + */ + +#if !defined( IB_SMI_H ) +#define IB_SMI_H + +#include + +#define IB_SMP_DATA_SIZE 64 +#define IB_SMP_MAX_PATH_HOPS 64 + +#pragma pack(push,1) +struct ib_smp { + u8 base_version; + u8 mgmt_class; + u8 class_version; + u8 method; + __be16 status; + u8 hop_ptr; + u8 hop_cnt; + __be64 tid; + __be16 attr_id; + __be16 resv; + __be32 attr_mod; + __be64 mkey; + __be16 dr_slid; + __be16 dr_dlid; + u8 reserved[28]; + u8 data[IB_SMP_DATA_SIZE]; + u8 initial_path[IB_SMP_MAX_PATH_HOPS]; + u8 return_path[IB_SMP_MAX_PATH_HOPS]; +}; +#pragma pack(pop) + + +/* Subnet management attributes */ +#define IB_SMP_ATTR_NOTICE cl_hton16(0x0002) +#define IB_SMP_ATTR_NODE_DESC cl_hton16(0x0010) +#define IB_SMP_ATTR_NODE_INFO cl_hton16(0x0011) +#define IB_SMP_ATTR_SWITCH_INFO cl_hton16(0x0012) +#define IB_SMP_ATTR_GUID_INFO cl_hton16(0x0014) +#define IB_SMP_ATTR_PORT_INFO cl_hton16(0x0015) +#define IB_SMP_ATTR_PKEY_TABLE cl_hton16(0x0016) +#define IB_SMP_ATTR_SL_TO_VL_TABLE cl_hton16(0x0017) +#define IB_SMP_ATTR_VL_ARB_TABLE cl_hton16(0x0018) +#define IB_SMP_ATTR_LINEAR_FORWARD_TABLE cl_hton16(0x0019) +#define IB_SMP_ATTR_RANDOM_FORWARD_TABLE cl_hton16(0x001A) +#define IB_SMP_ATTR_MCAST_FORWARD_TABLE cl_hton16(0x001B) +#define IB_SMP_ATTR_SM_INFO cl_hton16(0x0020) +#define IB_SMP_ATTR_VENDOR_DIAG cl_hton16(0x0030) +#define IB_SMP_ATTR_LED_INFO cl_hton16(0x0031) +#define IB_SMP_ATTR_VENDOR_MASK cl_hton16(0xFF00) + +static inline u8 +ib_get_smp_direction(struct ib_smp *smp) +{ + return ((smp->status & IB_SMP_DIRECTION) == IB_SMP_DIRECTION); +} + +#endif /* IB_SMI_H */ diff --git a/branches/MTHCA/hw/mthca/kernel/ib_user_at.h b/branches/MTHCA/hw/mthca/kernel/ib_user_at.h new file mode 100644 index 00000000..be9cce67 --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/ib_user_at.h @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_user_at.h 3202 2005-08-26 17:11:34Z roland $ + */ + +#ifndef IB_USER_AT_H +#define IB_USER_AT_H + +#include +#include + +#define IB_USER_AT_ABI_VERSION 1 + +enum { + IB_USER_AT_CMD_ROUTE_BY_IP, + IB_USER_AT_CMD_PATHS_BY_ROUTE, + IB_USER_AT_CMD_IPS_BY_GID, + IB_USER_AT_CMD_IPS_BY_SUBNET, + IB_USER_AT_CMD_INVALIDATE_PATHS, + IB_USER_AT_CMD_CANCEL, + IB_USER_AT_CMD_STATUS, + + IB_USER_AT_CMD_EVENT, +}; + +/* + * command ABI structures. + */ +struct ib_uat_cmd_hdr { + __u32 cmd; + __u16 in; + __u16 out; +}; + +enum ib_uat_multipathing_type { + IB_USER_AT_PATH_SAME_PORT = 0, + IB_USER_AT_PATH_SAME_HCA = 1, /* but different ports if applicable */ + IB_USER_AT_PATH_SAME_SYSTEM = 2, /* but different ports if applicable */ + IB_USER_AT_PATH_INDEPENDENT_HCA = 3, + IB_USER_AT_PATH_SRC_ROUTE = 4, /* application controlled multipathing */ +}; + +enum ib_uat_route_flags { + IB_USER_AT_ROUTE_USE_DEFAULTS = 0, + IB_USER_AT_ROUTE_FORCE_ATS = 1, + IB_USER_AT_ROUTE_FORCE_ARP = 2, + IB_USER_AT_ROUTE_FORCE_RESOLVE = 4, +}; + +struct ib_uat_path_attr { + __u16 qos_tag; + __u16 pkey; + __u8 multi_path_type; +}; + +struct ib_uat_ib_route { + __u8 sgid[16]; + __u8 dgid[16]; + struct ibv_device *out_dev; + int out_port; + struct ib_uat_path_attr attr; +}; + +enum ib_uat_op_status { + IB_USER_AT_STATUS_INVALID = 0, + IB_USER_AT_STATUS_PENDING = 1, + IB_USER_AT_STATUS_COMPLETED = 2, + IB_USER_AT_STATUS_CANCELED = 3, +}; + +struct ib_uat_completion { + void (*fn)(__u64 req_id, void *context, int rec_num); + void *context; + __u64 req_id; +}; + +struct ib_uat_paths_by_route_req { + struct ib_uat_ib_route *ib_route; + __u32 mpath_type; + struct ib_sa_path_rec *path_arr; + int npath; + struct ib_uat_completion *async_comp; + __u64 response; +}; + +struct ib_uat_paths_by_route_resp { + __u64 req_id; +}; + +struct ib_uat_route_by_ip_req { + __u32 dst_ip; + __u32 src_ip; + int tos; + __u16 flags; + struct ib_uat_ib_route *ib_route; + struct ib_uat_completion *async_comp; + __u64 response; +}; + +struct ib_uat_route_by_ip_resp { + __u64 req_id; +}; + +struct ib_uat_ips_by_gid_req { + union ibv_gid *gid; + __u32 *dst_ips; + int nips; + struct ib_uat_completion *async_comp; + __u64 response; +}; + +struct ib_uat_ips_by_gid_resp { + __u64 req_id; +}; + +struct ib_uat_ips_by_subnet_req { + __u32 network; + __u32 netmask; + __u32 *dst_ips; + int nips; +}; + +struct ib_uat_invalidate_paths_req { + struct ib_uat_ib_route *ib_route; +}; + +struct ib_uat_cancel_req { + __u64 req_id; +}; + +struct ib_uat_status_req { + __u64 req_id; +}; + +/* + * event notification ABI structures. + */ +struct ib_uat_event_get { + __u64 response; +}; + +struct ib_uat_event_resp { + __u64 callback; + __u64 context; + __u64 req_id; + int rec_num; +}; +#endif /* IB_USER_AT_H */ diff --git a/branches/MTHCA/hw/mthca/kernel/ib_user_mad.h b/branches/MTHCA/hw/mthca/kernel/ib_user_mad.h new file mode 100644 index 00000000..1b491f7f --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/ib_user_mad.h @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_user_mad.h 2928 2005-07-28 18:45:56Z sean.hefty $ + */ + +#ifndef IB_USER_MAD_H +#define IB_USER_MAD_H + +/* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define IB_USER_MAD_ABI_VERSION 5 + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + */ + +/** + * ib_user_mad_hdr - MAD packet header + * @id - ID of agent MAD received with/to be sent with + * @status - 0 on successful receive, ETIMEDOUT if no response + * received (transaction ID in data[] will be set to TID of original + * request) (ignored on send) + * @timeout_ms - Milliseconds to wait for response (unset on receive) + * @retries - Number of automatic retries to attempt + * @qpn - Remote QP number received from/to be sent to + * @qkey - Remote Q_Key to be sent with (unset on receive) + * @lid - Remote lid received from/to be sent to + * @sl - Service level received with/to be sent with + * @path_bits - Local path bits received with/to be sent with + * @grh_present - If set, GRH was received/should be sent + * @gid_index - Local GID index to send with (unset on receive) + * @hop_limit - Hop limit in GRH + * @traffic_class - Traffic class in GRH + * @gid - Remote GID in GRH + * @flow_label - Flow label in GRH + */ +struct ib_user_mad_hdr { + uint32_t id; + uint32_t status; + uint32_t timeout_ms; + uint32_t retries; + uint32_t length; + __be32 qpn; + __be32 qkey; + __be16 lid; + uint8_t sl; + uint8_t path_bits; + uint8_t grh_present; + uint8_t gid_index; + uint8_t hop_limit; + uint8_t traffic_class; + uint8_t gid[16]; + __be32 flow_label; +}; + +/** + * ib_user_mad - MAD packet + * @hdr - MAD packet header + * @data - Contents of MAD + * + */ +struct ib_user_mad { + struct ib_user_mad_hdr hdr; + uint8_t data[0]; +}; + +/** + * ib_user_mad_reg_req - MAD registration request + * @id - Set by the kernel; used to identify agent in future requests. + * @qpn - Queue pair number; must be 0 or 1. + * @method_mask - The caller will receive unsolicited MADs for any method + * where @method_mask = 1. + * @mgmt_class - Indicates which management class of MADs should be receive + * by the caller. This field is only required if the user wishes to + * receive unsolicited MADs, otherwise it should be 0. + * @mgmt_class_version - Indicates which version of MADs for the given + * management class to receive. + * @oui: Indicates IEEE OUI when mgmt_class is a vendor class + * in the range from 0x30 to 0x4f. Otherwise not used. + * @rmpp_version: If set, indicates the RMPP version used. + * + */ +struct ib_user_mad_reg_req { + uint32_t id; + uint32_t method_mask[4]; + uint8_t qpn; + uint8_t mgmt_class; + uint8_t mgmt_class_version; + uint8_t oui[3]; + uint8_t rmpp_version; +}; + +#define IB_IOCTL_MAGIC 0x1b + +#define IB_USER_MAD_REGISTER_AGENT _IOWR(IB_IOCTL_MAGIC, 1, \ + struct ib_user_mad_reg_req) + +#define IB_USER_MAD_UNREGISTER_AGENT _IOW(IB_IOCTL_MAGIC, 2, uint32_t) + +#endif /* IB_USER_MAD_H */ diff --git a/branches/MTHCA/hw/mthca/kernel/ib_user_verbs.h b/branches/MTHCA/hw/mthca/kernel/ib_user_verbs.h new file mode 100644 index 00000000..2965b638 --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/ib_user_verbs.h @@ -0,0 +1,712 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_user_verbs.h 4019 2005-11-11 00:33:09Z sean.hefty $ + */ + +#ifndef IB_USER_VERBS_H +#define IB_USER_VERBS_H + +#include + +#ifdef LINUX_TO_BE_REMOVED +/* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define IB_USER_VERBS_ABI_VERSION 4 + +enum { + IB_USER_VERBS_CMD_GET_CONTEXT, + IB_USER_VERBS_CMD_QUERY_DEVICE, + IB_USER_VERBS_CMD_QUERY_PORT, + IB_USER_VERBS_CMD_ALLOC_PD, + IB_USER_VERBS_CMD_DEALLOC_PD, + IB_USER_VERBS_CMD_CREATE_AH, + IB_USER_VERBS_CMD_MODIFY_AH, + IB_USER_VERBS_CMD_QUERY_AH, + IB_USER_VERBS_CMD_DESTROY_AH, + IB_USER_VERBS_CMD_REG_MR, + IB_USER_VERBS_CMD_REG_SMR, + IB_USER_VERBS_CMD_REREG_MR, + IB_USER_VERBS_CMD_QUERY_MR, + IB_USER_VERBS_CMD_DEREG_MR, + IB_USER_VERBS_CMD_ALLOC_MW, + IB_USER_VERBS_CMD_BIND_MW, + IB_USER_VERBS_CMD_DEALLOC_MW, + IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL, + IB_USER_VERBS_CMD_CREATE_CQ, + IB_USER_VERBS_CMD_RESIZE_CQ, + IB_USER_VERBS_CMD_DESTROY_CQ, + IB_USER_VERBS_CMD_POLL_CQ, + IB_USER_VERBS_CMD_PEEK_CQ, + IB_USER_VERBS_CMD_REQ_NOTIFY_CQ, + IB_USER_VERBS_CMD_CREATE_QP, + IB_USER_VERBS_CMD_QUERY_QP, + IB_USER_VERBS_CMD_MODIFY_QP, + IB_USER_VERBS_CMD_DESTROY_QP, + IB_USER_VERBS_CMD_POST_SEND, + IB_USER_VERBS_CMD_POST_RECV, + IB_USER_VERBS_CMD_ATTACH_MCAST, + IB_USER_VERBS_CMD_DETACH_MCAST, + IB_USER_VERBS_CMD_CREATE_SRQ, + IB_USER_VERBS_CMD_MODIFY_SRQ, + IB_USER_VERBS_CMD_QUERY_SRQ, + IB_USER_VERBS_CMD_DESTROY_SRQ, + IB_USER_VERBS_CMD_POST_SRQ_RECV +}; + + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * Specifically: + * - Do not use pointer types -- pass pointers in __u64 instead. + * - Make sure that any structure larger than 4 bytes is padded to a + * multiple of 8 bytes. Otherwise the structure size will be + * different between 32-bit and 64-bit architectures. + */ + +struct ib_uverbs_async_event_desc { + __u64 element; + __u32 event_type; /* enum ib_event_type */ + __u32 reserved; +}; + +struct ib_uverbs_comp_event_desc { + __u64 cq_handle; +}; + +/* + * All commands from userspace should start with a __u32 command field + * followed by __u16 in_words and out_words fields (which give the + * length of the command block and response buffer if any in 32-bit + * words). The kernel driver will read these fields first and read + * the rest of the command struct based on these value. + */ + +struct ib_uverbs_cmd_hdr { + __u32 command; + __u16 in_words; + __u16 out_words; +}; + +struct ib_uverbs_get_context { + __u64 response; + __u64 driver_data[0]; +}; + +struct ib_uverbs_get_context_resp { + __u32 async_fd; + __u32 num_comp_vectors; +}; + +struct ib_uverbs_query_device { + __u64 response; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_device_resp { + __u64 fw_ver; + __be64 node_guid; + __be64 sys_image_guid; + __u64 max_mr_size; + __u64 page_size_cap; + __u32 vendor_id; + __u32 vendor_part_id; + __u32 hw_ver; + __u32 max_qp; + __u32 max_qp_wr; + __u32 device_cap_flags; + __u32 max_sge; + __u32 max_sge_rd; + __u32 max_cq; + __u32 max_cqe; + __u32 max_mr; + __u32 max_pd; + __u32 max_qp_rd_atom; + __u32 max_ee_rd_atom; + __u32 max_res_rd_atom; + __u32 max_qp_init_rd_atom; + __u32 max_ee_init_rd_atom; + __u32 atomic_cap; + __u32 max_ee; + __u32 max_rdd; + __u32 max_mw; + __u32 max_raw_ipv6_qp; + __u32 max_raw_ethy_qp; + __u32 max_mcast_grp; + __u32 max_mcast_qp_attach; + __u32 max_total_mcast_qp_attach; + __u32 max_ah; + __u32 max_fmr; + __u32 max_map_per_fmr; + __u32 max_srq; + __u32 max_srq_wr; + __u32 max_srq_sge; + __u16 max_pkeys; + __u8 local_ca_ack_delay; + __u8 phys_port_cnt; + __u8 reserved[4]; +}; + +struct ib_uverbs_query_port { + __u64 response; + __u8 port_num; + __u8 reserved[7]; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_port_resp { + __u32 port_cap_flags; + __u32 max_msg_sz; + __u32 bad_pkey_cntr; + __u32 qkey_viol_cntr; + __u32 gid_tbl_len; + __u16 pkey_tbl_len; + __u16 lid; + __u16 sm_lid; + __u8 state; + __u8 max_mtu; + __u8 active_mtu; + __u8 lmc; + __u8 max_vl_num; + __u8 sm_sl; + __u8 subnet_timeout; + __u8 init_type_reply; + __u8 active_width; + __u8 active_speed; + __u8 phys_state; + __u8 reserved[3]; +}; + +struct ib_uverbs_alloc_pd { + __u64 response; + __u64 driver_data[0]; +}; +#endif + +struct ibv_alloc_pd_resp { + __u64 pd_handle; + __u32 pdn; + __u32 reserved; +}; + +struct ibv_dealloc_pd { + __u64 pd_handle; +}; + +struct ibv_reg_mr { + __u64 start; + __u64 length; + __u64 hca_va; + __u32 access_flags; + __u32 pdn; + __u64 pd_handle; +}; + +struct ibv_reg_mr_resp { + __u64 mr_handle; + __u32 lkey; + __u32 rkey; +}; + +struct ibv_dereg_mr { + __u64 mr_handle; +}; + +#ifdef LINUX_TO_BE_REMOVED +struct ib_uverbs_create_comp_channel { + __u64 response; +}; + +struct ib_uverbs_create_comp_channel_resp { + __u32 fd; +}; +#endif + +#ifdef LINUX_TO_BE_CHANGED +struct ib_uverbs_create_cq { + __u64 response; + __u64 user_handle; + __u32 cqe; + __u32 comp_vector; + __s32 comp_channel; + __u32 reserved; + __u64 driver_data[0]; +}; +#else +struct ibv_create_cq { + struct ibv_reg_mr mr; + __u64 arm_db_page; + __u64 set_db_page; + __u32 arm_db_index; + __u32 set_db_index; + __u64 user_handle; + __u32 lkey; +}; +#endif + +#ifdef LINUX_TO_BE_CHANGED +struct ib_uverbs_create_cq_resp { + __u64 cq_handle; + __u32 cqe; +}; +#else +struct ibv_create_cq_resp { + __u64 user_handle; + __u64 cq_handle; + struct ibv_reg_mr_resp mr; + __u32 cqe; + __u32 cqn; +}; +#endif + +struct ib_uverbs_poll_cq { + __u64 response; + __u32 cq_handle; + __u32 ne; +}; + +struct ib_uverbs_wc { + __u64 wr_id; + __u32 status; + __u32 opcode; + __u32 vendor_err; + __u32 byte_len; + __u32 imm_data; + __u32 qp_num; + __u32 src_qp; + __u32 wc_flags; + __u16 pkey_index; + __u16 slid; + __u8 sl; + __u8 dlid_path_bits; + __u8 port_num; + __u8 reserved; +}; + +struct ib_uverbs_poll_cq_resp { + __u32 count; + __u32 reserved; + struct ib_uverbs_wc wc[0]; +}; + +struct ib_uverbs_req_notify_cq { + __u32 cq_handle; + __u32 solicited_only; +}; + + +#ifdef LINUX_TO_BE_REMOVED +struct ib_uverbs_destroy_cq { + __u64 response; + __u32 cq_handle; + __u32 reserved; +}; + +struct ib_uverbs_destroy_cq_resp { + __u32 comp_events_reported; + __u32 async_events_reported; +}; + +#endif + +struct ib_uverbs_global_route { + __u8 dgid[16]; + __u32 flow_label; + __u8 sgid_index; + __u8 hop_limit; + __u8 traffic_class; + __u8 reserved; +}; + +struct ib_uverbs_ah_attr { + struct ib_uverbs_global_route grh; + __u16 dlid; + __u8 sl; + __u8 src_path_bits; + __u8 static_rate; + __u8 is_global; + __u8 port_num; + __u8 reserved; +}; + +#ifdef LINUX_TO_BE_REMOVED +struct ib_uverbs_qp_attr { + __u32 qp_attr_mask; + __u32 qp_state; + __u32 cur_qp_state; + __u32 path_mtu; + __u32 path_mig_state; + __u32 qkey; + __u32 rq_psn; + __u32 sq_psn; + __u32 dest_qp_num; + __u32 qp_access_flags; + + struct ib_uverbs_ah_attr ah_attr; + struct ib_uverbs_ah_attr alt_ah_attr; + + /* ib_qp_cap */ + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; + + __u16 pkey_index; + __u16 alt_pkey_index; + __u8 en_sqd_async_notify; + __u8 sq_draining; + __u8 max_rd_atomic; + __u8 max_dest_rd_atomic; + __u8 min_rnr_timer; + __u8 port_num; + __u8 timeout; + __u8 retry_cnt; + __u8 rnr_retry; + __u8 alt_port_num; + __u8 alt_timeout; + __u8 reserved[5]; +}; +#endif +#ifdef LINUX_TO_BE_CHANGED +struct ib_uverbs_create_qp { + __u64 response; + __u64 user_handle; + __u32 pd_handle; + __u32 send_cq_handle; + __u32 recv_cq_handle; + __u32 srq_handle; + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; + __u8 sq_sig_all; + __u8 qp_type; + __u8 is_srq; + __u8 reserved; + __u64 driver_data[0]; +}; +#else +struct ibv_create_qp { + __u64 sq_db_page; + __u64 rq_db_page; + __u32 sq_db_index; + __u32 rq_db_index; + struct ibv_reg_mr mr; + __u64 user_handle; + __u64 send_cq_handle; + __u64 recv_cq_handle; + __u64 srq_handle; + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; + __u32 lkey; /* used only in kernel */ + __u8 sq_sig_all; + __u8 qp_type; + __u8 is_srq; + __u8 reserved[5]; +}; +#endif + + +#ifdef LINUX_TO_BE_CHANGED +struct ib_uverbs_create_qp_resp { + __u64 qp_handle; + __u32 qpn; + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; +}; +#else +struct ibv_create_qp_resp { + struct ibv_reg_mr_resp mr; + __u64 user_handle; + __u64 qp_handle; + __u32 qpn; + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; +}; +#endif + + +#ifdef LINUX_TO_BE_REMOVED +/* + * This struct needs to remain a multiple of 8 bytes to keep the + * alignment of the modify QP parameters. + */ +struct ib_uverbs_qp_dest { + __u8 dgid[16]; + __u32 flow_label; + __u16 dlid; + __u16 reserved; + __u8 sgid_index; + __u8 hop_limit; + __u8 traffic_class; + __u8 sl; + __u8 src_path_bits; + __u8 static_rate; + __u8 is_global; + __u8 port_num; +}; + +struct ib_uverbs_modify_qp { + struct ib_uverbs_qp_dest dest; + struct ib_uverbs_qp_dest alt_dest; + __u32 qp_handle; + __u32 attr_mask; + __u32 qkey; + __u32 rq_psn; + __u32 sq_psn; + __u32 dest_qp_num; + __u32 qp_access_flags; + __u16 pkey_index; + __u16 alt_pkey_index; + __u8 qp_state; + __u8 cur_qp_state; + __u8 path_mtu; + __u8 path_mig_state; + __u8 en_sqd_async_notify; + __u8 max_rd_atomic; + __u8 max_dest_rd_atomic; + __u8 min_rnr_timer; + __u8 port_num; + __u8 timeout; + __u8 retry_cnt; + __u8 rnr_retry; + __u8 alt_port_num; + __u8 alt_timeout; + __u8 reserved[2]; + __u64 driver_data[0]; +}; + +struct ib_uverbs_modify_qp_resp { + __u32 dummy[]; +}; + +#else + struct ibv_modify_qp_resp { + enum ibv_qp_attr_mask attr_mask; + __u8 qp_state; + __u8 reserved[3]; + }; +#endif + +#ifdef LINUX_TO_BE_REMOVED +struct ib_uverbs_destroy_qp { + __u64 response; + __u32 qp_handle; + __u32 reserved; +}; + +struct ib_uverbs_destroy_qp_resp { + __u32 events_reported; +}; +#endif + +/* + * The ib_uverbs_sge structure isn't used anywhere, since we assume + * the ib_sge structure is packed the same way on 32-bit and 64-bit + * architectures in both kernel and user space. It's just here to + * document the ABI. + */ +struct ib_uverbs_sge { + __u64 addr; + __u32 length; + __u32 lkey; +}; + +struct ib_uverbs_send_wr { + __u64 wr_id; + __u32 num_sge; + __u32 opcode; + __u32 send_flags; + __u32 imm_data; + union { + struct { + __u64 remote_addr; + __u32 rkey; + __u32 reserved; + } rdma; + struct { + __u64 remote_addr; + __u64 compare_add; + __u64 swap; + __u32 rkey; + __u32 reserved; + } atomic; + struct { + __u32 ah; + __u32 remote_qpn; + __u32 remote_qkey; + __u32 reserved; + } ud; + } wr; +}; + +struct ib_uverbs_post_send { + __u64 response; + __u32 qp_handle; + __u32 wr_count; + __u32 sge_count; + __u32 wqe_size; + struct ib_uverbs_send_wr send_wr[0]; +}; + +struct ib_uverbs_post_send_resp { + __u32 bad_wr; +}; + +struct ib_uverbs_recv_wr { + __u64 wr_id; + __u32 num_sge; + __u32 reserved; +}; + +struct ib_uverbs_post_recv { + __u64 response; + __u32 qp_handle; + __u32 wr_count; + __u32 sge_count; + __u32 wqe_size; + struct ib_uverbs_recv_wr recv_wr[0]; +}; + +struct ib_uverbs_post_recv_resp { + __u32 bad_wr; +}; + +struct ib_uverbs_post_srq_recv { + __u64 response; + __u32 srq_handle; + __u32 wr_count; + __u32 sge_count; + __u32 wqe_size; + struct ib_uverbs_recv_wr recv[0]; +}; + +struct ib_uverbs_post_srq_recv_resp { + __u32 bad_wr; +}; + + +#ifdef LINUX_TO_BE_CHANGED +struct ib_uverbs_create_ah { + __u64 response; + __u64 user_handle; + __u32 pd_handle; + __u32 reserved; + struct ib_uverbs_ah_attr attr; +}; +#else +struct ibv_create_ah { + struct ibv_reg_mr mr; +}; +#endif + +#ifdef LINUX_TO_BE_CHANGED +struct ib_uverbs_create_ah_resp { + __u32 ah_handle; +}; +#else +struct ibv_create_ah_resp { + __u64 start; + struct ibv_reg_mr_resp mr; + ib_av_attr_t av_attr; +}; +#endif + +struct ib_uverbs_destroy_ah { + __u32 ah_handle; +}; + +struct ib_uverbs_attach_mcast { + __u8 gid[16]; + __u32 qp_handle; + __u16 mlid; + __u16 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_detach_mcast { + __u8 gid[16]; + __u32 qp_handle; + __u16 mlid; + __u16 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_create_srq { + __u64 response; + __u64 user_handle; + __u32 pd_handle; + __u32 max_wr; + __u32 max_sge; + __u32 srq_limit; + __u64 driver_data[0]; +}; + +struct ib_uverbs_create_srq_resp { + __u32 srq_handle; +}; + +struct ib_uverbs_modify_srq { + __u32 srq_handle; + __u32 attr_mask; + __u32 max_wr; + __u32 srq_limit; + __u64 driver_data[0]; +}; + +struct ib_uverbs_destroy_srq { + __u64 response; + __u32 srq_handle; + __u32 reserved; +}; + +struct ib_uverbs_destroy_srq_resp { + __u32 events_reported; +}; + +#endif /* IB_USER_VERBS_H */ diff --git a/branches/MTHCA/hw/mthca/kernel/ib_verbs.h b/branches/MTHCA/hw/mthca/kernel/ib_verbs.h new file mode 100644 index 00000000..68405544 --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/ib_verbs.h @@ -0,0 +1,1552 @@ +/* + * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_verbs.h 2975 2005-08-04 04:59:37Z roland $ + */ + +#if !defined(IB_VERBS_H) +#define IB_VERBS_H + +#include +#include +#include + +union ib_gid { + u8 raw[16]; + struct { + __be64 subnet_prefix; + __be64 interface_id; + } global; +}; + +enum ib_node_type { + IB_NODE_CA = 1, + IB_NODE_SWITCH, + IB_NODE_ROUTER +}; + +enum ib_device_cap_flags { + IB_DEVICE_RESIZE_MAX_WR = 1, + IB_DEVICE_BAD_PKEY_CNTR = (1<<1), + IB_DEVICE_BAD_QKEY_CNTR = (1<<2), + IB_DEVICE_RAW_MULTI = (1<<3), + IB_DEVICE_AUTO_PATH_MIG = (1<<4), + IB_DEVICE_CHANGE_PHY_PORT = (1<<5), + IB_DEVICE_UD_AV_PORT_ENFORCE = (1<<6), + IB_DEVICE_CURR_QP_STATE_MOD = (1<<7), + IB_DEVICE_SHUTDOWN_PORT = (1<<8), + IB_DEVICE_INIT_TYPE = (1<<9), + IB_DEVICE_PORT_ACTIVE_EVENT = (1<<10), + IB_DEVICE_SYS_IMAGE_GUID = (1<<11), + IB_DEVICE_RC_RNR_NAK_GEN = (1<<12), + IB_DEVICE_SRQ_RESIZE = (1<<13), + IB_DEVICE_N_NOTIFY_CQ = (1<<14), +}; + +#ifdef LINUX_TO_BE_REMOVED +// defined in ib_types.h +enum ib_atomic_cap { + IB_ATOMIC_NONE, + IB_ATOMIC_HCA, + IB_ATOMIC_GLOB +}; +#endif + +struct ib_device_attr { + u64 fw_ver; + __be64 sys_image_guid; + u64 max_mr_size; + u64 page_size_cap; + u32 vendor_id; + u32 vendor_part_id; + u32 hw_ver; + int max_qp; + int max_qp_wr; + int device_cap_flags; + int max_sge; + int max_sge_rd; + int max_cq; + int max_cqe; + int max_mr; + int max_pd; + int max_qp_rd_atom; + int max_ee_rd_atom; + int max_res_rd_atom; + int max_qp_init_rd_atom; + int max_ee_init_rd_atom; + enum ib_atomic_cap atomic_cap; + int max_ee; + int max_rdd; + int max_mw; + int max_raw_ipv6_qp; + int max_raw_ethy_qp; + int max_mcast_grp; + int max_mcast_qp_attach; + int max_total_mcast_qp_attach; + int max_ah; + int max_fmr; + int max_map_per_fmr; + int max_srq; + int max_srq_wr; + int max_srq_sge; + u16 max_pkeys; + u8 local_ca_ack_delay; +}; + +static inline int ib_mtu_enum_to_int(enum ib_mtu mtu) +{ + switch (mtu) { + case IB_MTU_256: return 256; + case IB_MTU_512: return 512; + case IB_MTU_1024: return 1024; + case IB_MTU_2048: return 2048; + case IB_MTU_4096: return 4096; + default: return -1; + } +} + +enum ib_port_state { + IB_PORT_NOP = 0, + IB_PORT_DOWN = 1, + IB_PORT_INIT = 2, + IB_PORT_ARMED = 3, + IB_PORT_ACTIVE = 4, + IB_PORT_ACTIVE_DEFER = 5 +}; + +enum ib_port_cap_flags { + IB_PORT_SM = 1 << 1, + IB_PORT_NOTICE_SUP = 1 << 2, + IB_PORT_TRAP_SUP = 1 << 3, + IB_PORT_OPT_IPD_SUP = 1 << 4, + IB_PORT_AUTO_MIGR_SUP = 1 << 5, + IB_PORT_SL_MAP_SUP = 1 << 6, + IB_PORT_MKEY_NVRAM = 1 << 7, + IB_PORT_PKEY_NVRAM = 1 << 8, + IB_PORT_LED_INFO_SUP = 1 << 9, + IB_PORT_SM_DISABLED = 1 << 10, + IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11, + IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12, + IB_PORT_CM_SUP = 1 << 16, + IB_PORT_SNMP_TUNNEL_SUP = 1 << 17, + IB_PORT_REINIT_SUP = 1 << 18, + IB_PORT_DEVICE_MGMT_SUP = 1 << 19, + IB_PORT_VENDOR_CLASS_SUP = 1 << 20, + IB_PORT_DR_NOTICE_SUP = 1 << 21, + IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22, + IB_PORT_BOOT_MGMT_SUP = 1 << 23, + IB_PORT_LINK_LATENCY_SUP = 1 << 24, + IB_PORT_CLIENT_REG_SUP = 1 << 25 +}; + +enum ib_port_width { + IB_WIDTH_1X = 1, + IB_WIDTH_4X = 2, + IB_WIDTH_8X = 4, + IB_WIDTH_12X = 8 +}; + +static inline int ib_width_enum_to_int(enum ib_port_width width) +{ + switch (width) { + case IB_WIDTH_1X: return 1; + case IB_WIDTH_4X: return 4; + case IB_WIDTH_8X: return 8; + case IB_WIDTH_12X: return 12; + default: return -1; + } +} + +struct ib_port_attr { + enum ib_port_state state; + enum ib_mtu max_mtu; + enum ib_mtu active_mtu; + int gid_tbl_len; + u32 port_cap_flags; + u32 max_msg_sz; + u32 bad_pkey_cntr; + u32 qkey_viol_cntr; + u16 pkey_tbl_len; + u16 lid; + u16 sm_lid; + u8 lmc; + u8 max_vl_num; + u8 sm_sl; + u8 subnet_timeout; + u8 init_type_reply; + u8 active_width; + u8 active_speed; + u8 phys_state; +}; + +enum ib_device_modify_flags { + IB_DEVICE_MODIFY_SYS_IMAGE_GUID = 1 +}; + +struct ib_device_modify { + u64 sys_image_guid; +}; + +enum ib_port_modify_flags { + IB_PORT_SHUTDOWN = 1, + IB_PORT_INIT_TYPE = (1<<2), + IB_PORT_RESET_QKEY_CNTR = (1<<3) +}; + +struct ib_port_modify { + u32 set_port_cap_mask; + u32 clr_port_cap_mask; + u8 init_type; +}; + +enum ib_event_type { + IB_EVENT_CQ_ERR = IB_AE_CQ_ERROR, + IB_EVENT_QP_FATAL = IB_AE_QP_FATAL, + IB_EVENT_QP_REQ_ERR = IB_AE_WQ_REQ_ERROR, + IB_EVENT_QP_ACCESS_ERR = IB_AE_WQ_ACCESS_ERROR, + IB_EVENT_COMM_EST = IB_AE_QP_COMM, + IB_EVENT_SQ_DRAINED = IB_AE_SQ_DRAINED, + IB_EVENT_PATH_MIG = IB_AE_QP_APM, + IB_EVENT_PATH_MIG_ERR = IB_AE_QP_APM_ERROR, + IB_EVENT_DEVICE_FATAL = IB_AE_LOCAL_FATAL, + IB_EVENT_PORT_ACTIVE = IB_AE_PORT_ACTIVE, + IB_EVENT_PORT_ERR = IB_AE_PORT_DOWN, + IB_EVENT_LID_CHANGE = IB_AE_UNKNOWN + 1, + IB_EVENT_PKEY_CHANGE, + IB_EVENT_SM_CHANGE, + IB_EVENT_SRQ_ERR, + IB_EVENT_SRQ_LIMIT_REACHED, + IB_EVENT_QP_LAST_WQE_REACHED +}; + +struct ib_event { + struct ib_device *device; + union { + struct ib_cq *cq; + struct ib_qp *qp; + struct ib_srq *srq; + u8 port_num; + } element; + enum ib_event_type event; +}; + +struct ib_event_handler { + struct ib_device *device; + void (*handler)(struct ib_event_handler *, struct ib_event *); + struct list_head list; +}; + +#define INIT_IB_EVENT_HANDLER(_ptr, _device, _handler) \ + do { \ + (_ptr)->device = _device; \ + (_ptr)->handler = _handler; \ + INIT_LIST_HEAD(&(_ptr)->list); \ + } while (0) + +struct ib_global_route { + union ib_gid dgid; + u32 flow_label; + u8 sgid_index; + u8 hop_limit; + u8 traffic_class; +}; + +struct ib_grh { + __be32 version_tclass_flow; + __be16 paylen; + u8 next_hdr; + u8 hop_limit; + union ib_gid sgid; + union ib_gid dgid; +}; + +enum { + IB_MULTICAST_QPN = 0xffffff +}; + +#ifdef LINUX_TO_BE_REMOVED +// defined in ib_types.h +#define IB_LID_PERMISSIVE cl_hton16(0xFFFF) +#endif + +enum ib_ah_flags { + IB_AH_GRH = 1 +}; + +struct ib_ah_attr { + struct ib_global_route grh; + u16 dlid; + u8 sl; + u8 src_path_bits; + u8 static_rate; + u8 ah_flags; + u8 port_num; +}; + +#ifdef WIN_TO_BE_REMOVE +//define in ib_types.h +enum ib_wc_status { + IB_WC_SUCCESS, + IB_WC_LOC_LEN_ERR, + IB_WC_LOC_QP_OP_ERR, + IB_WC_LOC_EEC_OP_ERR, + IB_WC_LOC_PROT_ERR, + IB_WC_WR_FLUSH_ERR, + IB_WC_MW_BIND_ERR, + IB_WC_BAD_RESP_ERR, + IB_WC_LOC_ACCESS_ERR, + IB_WC_REM_INV_REQ_ERR, + IB_WC_REM_ACCESS_ERR, + IB_WC_REM_OP_ERR, + IB_WC_RETRY_EXC_ERR, + IB_WC_RNR_RETRY_EXC_ERR, + IB_WC_LOC_RDD_VIOL_ERR, + IB_WC_REM_INV_RD_REQ_ERR, + IB_WC_REM_ABORT_ERR, + IB_WC_INV_EECN_ERR, + IB_WC_INV_EEC_STATE_ERR, + IB_WC_FATAL_ERR, + IB_WC_RESP_TIMEOUT_ERR, + IB_WC_GENERAL_ERR +}; +#endif + +#ifdef LINUX_TO_BE_REMOVED +// defined in ib_types.h +enum ib_wc_opcode { + IB_WC_SEND, + IB_WC_RDMA_WRITE, + IB_WC_RDMA_READ, + IB_WC_COMP_SWAP, + IB_WC_FETCH_ADD, + IB_WC_BIND_MW, +/* + * Set value of IB_WC_RECV so consumers can test if a completion is a + * receive by testing (opcode & IB_WC_RECV). + */ + IB_WC_RECV = 1 << 7, + IB_WC_RECV_RDMA_WITH_IMM +}; +#endif + +#ifdef WIN_TO_BE_REMOVED +// ib_recv_opt_t is used instead +enum ib_wc_flags { + IB_WC_GRH = 1, + IB_WC_WITH_IMM = (1<<1) +}; +#endif + +#ifdef WIN_TO_BE_REMOVED +// struct _ib_wc is used instead +struct ib_wc { + u64 wr_id; + enum ib_wc_status status; + enum ib_wc_opcode opcode; + u32 vendor_err; + u32 byte_len; + __be32 imm_data; + u32 qp_num; + u32 src_qp; + int wc_flags; + u16 pkey_index; + u16 slid; + u8 sl; + u8 dlid_path_bits; + u8 port_num; /* valid only for DR SMPs on switches */ +}; +#endif + +enum ib_cq_notify { + IB_CQ_SOLICITED, + IB_CQ_NEXT_COMP +}; + +enum ib_srq_attr_mask { + IB_SRQ_MAX_WR = 1 << 0, + IB_SRQ_LIMIT = 1 << 1, +}; + +struct ib_srq_attr { + u32 max_wr; + u32 max_sge; + u32 srq_limit; +}; + +struct ib_srq_init_attr { + void (*event_handler)(struct ib_event *, void *); + void *srq_context; + struct ib_srq_attr attr; +}; + +struct ib_qp_cap { + u32 max_send_wr; + u32 max_recv_wr; + u32 max_send_sge; + u32 max_recv_sge; + u32 max_inline_data; +}; + +enum ib_sig_type { + IB_SIGNAL_ALL_WR, + IB_SIGNAL_REQ_WR +}; + +#ifdef LINUX_TO_BE_REMOVED +// defined in ib_types.h +enum ib_qp_type_t { + /* + * IB_QPT_QP0 and IB_QPT_QP1 have to be the first two entries + * here (and in that order) since the MAD layer uses them as + * indices into a 2-entry table. + */ + IB_QPT_QP0, + IB_QPT_QP1, + + IB_QPT_RELIABLE_CONN, + IB_QPT_UNRELIABLE_CONN, + IB_QPT_UNRELIABLE_DGRM, + IB_QPT_RAW_IPV6, + IB_QPT_RAW_ETY +}; +#endif + +struct ib_qp_init_attr { + void (*event_handler)(struct ib_event *, void *); + void *qp_context; + struct ib_cq *send_cq; + struct ib_cq *recv_cq; + struct ib_srq *srq; + struct ib_qp_cap cap; + enum ib_sig_type sq_sig_type; + enum ib_qp_type_t qp_type; + u8 port_num; /* special QP types only */ +}; + +enum ib_rnr_timeout { + IB_RNR_TIMER_655_36 = 0, + IB_RNR_TIMER_000_01 = 1, + IB_RNR_TIMER_000_02 = 2, + IB_RNR_TIMER_000_03 = 3, + IB_RNR_TIMER_000_04 = 4, + IB_RNR_TIMER_000_06 = 5, + IB_RNR_TIMER_000_08 = 6, + IB_RNR_TIMER_000_12 = 7, + IB_RNR_TIMER_000_16 = 8, + IB_RNR_TIMER_000_24 = 9, + IB_RNR_TIMER_000_32 = 10, + IB_RNR_TIMER_000_48 = 11, + IB_RNR_TIMER_000_64 = 12, + IB_RNR_TIMER_000_96 = 13, + IB_RNR_TIMER_001_28 = 14, + IB_RNR_TIMER_001_92 = 15, + IB_RNR_TIMER_002_56 = 16, + IB_RNR_TIMER_003_84 = 17, + IB_RNR_TIMER_005_12 = 18, + IB_RNR_TIMER_007_68 = 19, + IB_RNR_TIMER_010_24 = 20, + IB_RNR_TIMER_015_36 = 21, + IB_RNR_TIMER_020_48 = 22, + IB_RNR_TIMER_030_72 = 23, + IB_RNR_TIMER_040_96 = 24, + IB_RNR_TIMER_061_44 = 25, + IB_RNR_TIMER_081_92 = 26, + IB_RNR_TIMER_122_88 = 27, + IB_RNR_TIMER_163_84 = 28, + IB_RNR_TIMER_245_76 = 29, + IB_RNR_TIMER_327_68 = 30, + IB_RNR_TIMER_491_52 = 31 +}; + +enum ib_qp_attr_mask { + IB_QP_STATE = 1, + IB_QP_CUR_STATE = (1<<1), + IB_QP_EN_SQD_ASYNC_NOTIFY = (1<<2), + IB_QP_ACCESS_FLAGS = (1<<3), + IB_QP_PKEY_INDEX = (1<<4), + IB_QP_PORT = (1<<5), + IB_QP_QKEY = (1<<6), + IB_QP_AV = (1<<7), + IB_QP_PATH_MTU = (1<<8), + IB_QP_TIMEOUT = (1<<9), + IB_QP_RETRY_CNT = (1<<10), + IB_QP_RNR_RETRY = (1<<11), + IB_QP_RQ_PSN = (1<<12), + IB_QP_MAX_QP_RD_ATOMIC = (1<<13), + IB_QP_ALT_PATH = (1<<14), + IB_QP_MIN_RNR_TIMER = (1<<15), + IB_QP_SQ_PSN = (1<<16), + IB_QP_MAX_DEST_RD_ATOMIC = (1<<17), + IB_QP_PATH_MIG_STATE = (1<<18), + IB_QP_CAP = (1<<19), + IB_QP_DEST_QPN = (1<<20) +}; + +#ifdef WIN_TO_BE_REMOVED +// ib_apm_state_t used instead +enum ib_mig_state { + IB_MIG_MIGRATED, + IB_MIG_REARM, + IB_MIG_ARMED +}; +#endif + +//TODO: these literals are also defined in ib_types.h and have there ANOTHER VALUES !!! +enum ib_qp_state { + IBQPS_RESET, + IBQPS_INIT, + IBQPS_RTR, + IBQPS_RTS, + IBQPS_SQD, + IBQPS_SQE, + IBQPS_ERR +}; + + +struct ib_qp_attr { + enum ib_qp_state qp_state; + enum ib_qp_state cur_qp_state; + enum ib_mtu path_mtu; + ib_apm_state_t path_mig_state; + u32 qkey; + u32 rq_psn; + u32 sq_psn; + u32 dest_qp_num; + int qp_access_flags; + struct ib_qp_cap cap; + struct ib_ah_attr ah_attr; + struct ib_ah_attr alt_ah_attr; + u16 pkey_index; + u16 alt_pkey_index; + u8 en_sqd_async_notify; + u8 sq_draining; + u8 max_rd_atomic; + u8 max_dest_rd_atomic; + u8 min_rnr_timer; + u8 port_num; + u8 timeout; + u8 retry_cnt; + u8 rnr_retry; + u8 alt_port_num; + u8 alt_timeout; +}; + +#ifdef WIN_TO_BE_REMOVED +// ib_send_opt_t flags are used instead +enum ib_wr_opcode { + IB_WR_RDMA_WRITE, + IB_WR_RDMA_WRITE_WITH_IMM, + IB_WR_SEND, + IB_WR_SEND_WITH_IMM, + IB_WR_RDMA_READ, + IB_WR_ATOMIC_CMP_AND_SWP, + IB_WR_ATOMIC_FETCH_AND_ADD +}; +#endif +#ifdef WIN_TO_BE_REMOVED +// ib_send_opt_t flags are used instead +enum ib_send_flags { + IB_SEND_FENCE = 1, + IB_SEND_SIGNALED = (1<<1), + IB_SEND_SOLICITED = (1<<2), + IB_SEND_INLINE = (1<<3) +}; +#endif + +struct ib_sge { + u64 addr; + u32 length; + u32 lkey; +}; + +#ifdef WIN_TO_BE_REMOVED +// struct _ib_send_wr is used instead +struct ib_send_wr { + struct ib_send_wr *next; + u64 wr_id; + enum ib_wr_opcode opcode; + int send_flags; + int num_sge; + struct ib_sge *sg_list; + __be32 imm_data; + + union { + struct { + u64 remote_addr; + u32 rkey; + } rdma; + struct { + u64 remote_addr; + u64 compare_add; + u64 swap; + u32 rkey; + } atomic; + struct { + struct ib_ah *ah; + struct ib_mad_hdr *mad_hdr; + u32 remote_qpn; + u32 remote_qkey; + int timeout_ms; /* valid for MADs only */ + int retries; /* valid for MADs only */ + u16 pkey_index; /* valid for GSI only */ + u8 port_num; /* valid for DR SMPs on switch only */ + } ud; + } wr; +}; +#endif + +#ifdef WIN_TO_BE_REMOVED +// struct _ib_recv_wr is used instead +struct ib_recv_wr { + struct ib_recv_wr * __ptr64 next; + u64 wr_id; + int num_sge; + struct ib_sge * __ptr64 sg_list; +}; +#endif + +typedef enum MTHCA_QP_ACCESS_FLAGS { + MTHCA_ACCESS_LOCAL_WRITE = 1, + MTHCA_ACCESS_REMOTE_WRITE = (1<<1), + MTHCA_ACCESS_REMOTE_READ = (1<<2), + MTHCA_ACCESS_REMOTE_ATOMIC = (1<<3), + MTHCA_ACCESS_MW_BIND = (1<<4) +} mthca_qp_access_t; + +struct ib_phys_buf { + u64 addr; + u64 size; +}; + +struct ib_mr_attr { + struct ib_pd *pd; + u64 device_virt_addr; + u64 size; + mthca_qp_access_t mr_access_flags; + u32 lkey; + u32 rkey; +}; + +enum ib_mr_rereg_flags { + IB_MR_REREG_TRANS = 1, + IB_MR_REREG_PD = (1<<1), + IB_MR_REREG_ACCESS = (1<<2) +}; + +struct ib_mw_bind { + struct ib_mr *mr; + u64 wr_id; + u64 addr; + u32 length; + int send_flags; + int mw_access_flags; +}; + +struct ib_fmr_attr { + int max_pages; + int max_maps; + u8 page_size; +}; + +struct ib_ucontext { + struct ib_device *device; + PVOID user_uar; + struct ib_pd *pd; + atomic_t usecnt; /* count all resources */ + ULONG is_removing; + +#ifdef LINUX_TO_BE_REMOVED + struct list_head pd_list; + struct list_head mr_list; + struct list_head mw_list; + struct list_head cq_list; + struct list_head qp_list; + struct list_head srq_list; + struct list_head ah_list; + spinlock_t lock; +#endif +}; + +struct ib_uobject { + u64 user_handle; /* handle given to us by userspace */ + struct ib_ucontext *context; /* associated user context */ + struct list_head list; /* link to context's list */ + u32 id; /* index into kernel idr */ +}; + +struct ib_umem { + u64 user_base; + u64 virt_base; + u64 length; + int offset; + int page_size; + int writable; + struct list_head chunk_list; +}; + +struct ib_umem_chunk { + struct list_head list; + int nents; + int nmap; + struct scatterlist page_list[0]; +}; + +struct ib_udata { + void *inbuf; + void *outbuf; + size_t inlen; + size_t outlen; +}; + +#define IB_UMEM_MAX_PAGE_CHUNK \ + ((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) / \ + ((char *) &((struct ib_umem_chunk *) 0)->page_list[1] - \ + (char *) &((struct ib_umem_chunk *) 0)->page_list[0])) + +struct ib_umem_object { + struct ib_uobject uobject; + struct ib_umem umem; +}; + +struct ib_pd { + struct ib_device *device; + struct ib_ucontext *ucontext; + atomic_t usecnt; /* count all resources */ +}; + +struct ib_ah { + struct ib_device *device; + struct ib_pd *pd; + struct ib_ucontext *ucontext; + struct ib_mr *ib_mr; +}; + +typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context); + +struct ib_cq { + struct ib_device *device; +#ifdef LINUX_TO_BE_REMOVED + struct ib_uobject *uobject; +#else + struct ib_ucontext *ucontext; + struct ib_mr *ib_mr; +#endif + ib_comp_handler comp_handler; + void (*event_handler)(struct ib_event *, void *); + void * cq_context; + int cqe; + atomic_t usecnt; /* count number of work queues */ +}; + +struct ib_srq { + struct ib_device *device; + struct ib_pd *pd; + struct ib_uobject *uobject; + void (*event_handler)(struct ib_event *, void *); + void *srq_context; + atomic_t usecnt; +}; + +struct ib_qp { + struct ib_device *device; + struct ib_pd *pd; + struct ib_cq *send_cq; + struct ib_cq *recv_cq; + struct ib_srq *srq; +#ifdef LINUX_TO_BE_REMOVED + struct ib_uobject *uobject; +#else + struct ib_ucontext *ucontext; + struct ib_mr *ib_mr; +#endif + void (*event_handler)(struct ib_event *, void *); + void *qp_context; + u32 qp_num; + enum ib_qp_type_t qp_type; +}; + +struct ib_mr { + struct ib_device *device; + struct ib_pd *pd; +#ifdef LINUX_TO_BE_REMOVED + struct ib_uobject *uobject; +#endif + u32 lkey; + u32 rkey; + atomic_t usecnt; /* count number of MWs */ +}; + +struct ib_mw { + struct ib_device *device; + struct ib_pd *pd; +#ifdef LINUX_TO_BE_REMOVED + struct ib_uobject *uobject; +#endif + u32 rkey; +}; + +struct ib_fmr { + struct ib_device *device; + struct ib_pd *pd; + struct list_head list; + u32 lkey; + u32 rkey; +}; + +struct ib_mad; +struct ib_grh; + +enum ib_process_mad_flags { + IB_MAD_IGNORE_MKEY = 1, + IB_MAD_IGNORE_BKEY = 2, + IB_MAD_IGNORE_ALL = IB_MAD_IGNORE_MKEY | IB_MAD_IGNORE_BKEY +}; + +enum ib_mad_result { + IB_MAD_RESULT_FAILURE = 0, /* (!SUCCESS is the important flag) */ + IB_MAD_RESULT_SUCCESS = 1 << 0, /* MAD was successfully processed */ + IB_MAD_RESULT_REPLY = 1 << 1, /* Reply packet needs to be sent */ + IB_MAD_RESULT_CONSUMED = 1 << 2 /* Packet consumed: stop processing */ +}; + +#define IB_DEVICE_NAME_MAX 64 + +struct ib_cache { + rwlock_t lock; + struct ib_event_handler event_handler; + struct ib_pkey_cache **pkey_cache; + struct ib_gid_cache **gid_cache; +}; + +struct mthca_dev; + +struct ib_device { + struct mthca_dev *mdev; + + char name[IB_DEVICE_NAME_MAX]; + + struct list_head event_handler_list; + spinlock_t event_handler_lock; + + struct list_head core_list; + struct list_head client_data_list; + spinlock_t client_data_lock; + + struct ib_cache cache; + + u32 flags; + + int (*query_device)(struct ib_device *device, + struct ib_device_attr *device_attr); + int (*query_port)(struct ib_device *device, + u8 port_num, + struct ib_port_attr *port_attr); + int (*query_gid)(struct ib_device *device, + u8 port_num, int index, + union ib_gid *gid); + int (*query_pkey)(struct ib_device *device, + u8 port_num, u16 index, u16 *pkey); + int (*modify_device)(struct ib_device *device, + int device_modify_mask, + struct ib_device_modify *device_modify); + int (*modify_port)(struct ib_device *device, + u8 port_num, int port_modify_mask, + struct ib_port_modify *port_modify); + struct ib_ucontext * (*alloc_ucontext)(struct ib_device *device, + ci_umv_buf_t* const p_umv_buf); + int (*dealloc_ucontext)(struct ib_ucontext *context); + int (*mmap)(struct ib_ucontext *context, + struct vm_area_struct *vma); + struct ib_pd * (*alloc_pd)(struct ib_device *device, + struct ib_ucontext *context, + ci_umv_buf_t* const p_umv_buf); + int (*dealloc_pd)(struct ib_pd *pd); + struct ib_ah * (*create_ah)(struct ib_pd *pd, + struct ib_ah_attr *ah_attr); + int (*modify_ah)(struct ib_ah *ah, + struct ib_ah_attr *ah_attr); + int (*query_ah)(struct ib_ah *ah, + struct ib_ah_attr *ah_attr); + int (*destroy_ah)(struct ib_ah *ah); + struct ib_srq * (*create_srq)(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + ci_umv_buf_t* const p_umv_buf); + int (*modify_srq)(struct ib_srq *srq, + struct ib_srq_attr *srq_attr, + enum ib_srq_attr_mask srq_attr_mask); + int (*query_srq)(struct ib_srq *srq, + struct ib_srq_attr *srq_attr); + int (*destroy_srq)(struct ib_srq *srq); + int (*post_srq_recv)(struct ib_srq *srq, + struct _ib_recv_wr *recv_wr, + struct _ib_recv_wr **bad_recv_wr); + struct ib_qp * (*create_qp)(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + ci_umv_buf_t* const p_umv_buf); + int (*modify_qp)(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask); + int (*query_qp)(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr); + int (*destroy_qp)(struct ib_qp *qp); + int (*post_send)(struct ib_qp *qp, + struct _ib_send_wr *send_wr, + struct _ib_send_wr **bad_send_wr); + int (*post_recv)(struct ib_qp *qp, + struct _ib_recv_wr *recv_wr, + struct _ib_recv_wr **bad_recv_wr); + struct ib_cq * (*create_cq)(struct ib_device *device, int cqe, + struct ib_ucontext *context, + ci_umv_buf_t* const p_umv_buf); + int (*destroy_cq)(struct ib_cq *cq); + int (*resize_cq)(struct ib_cq *cq, int *cqe); + int (*poll_cq)(struct ib_cq *cq, int num_entries, + struct _ib_wc *wc); + int (*peek_cq)(struct ib_cq *cq, int wc_cnt); + int (*req_notify_cq)(struct ib_cq *cq, + enum ib_cq_notify cq_notify); + int (*req_ncomp_notif)(struct ib_cq *cq, + int wc_cnt); + struct ib_mr * (*get_dma_mr)(struct ib_pd *pd, + mthca_qp_access_t mr_access_flags); + struct ib_mr * (*reg_phys_mr)(struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + mthca_qp_access_t mr_access_flags, + u64 *iova_start); + struct ib_mr * (*reg_user_mr)(struct ib_pd *pd, + void* __ptr64 vaddr, uint64_t length, uint64_t hca_va, mthca_qp_access_t acc); + int (*query_mr)(struct ib_mr *mr, + struct ib_mr_attr *mr_attr); + int (*dereg_mr)(struct ib_mr *mr); + int (*rereg_phys_mr)(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + mthca_qp_access_t mr_access_flags, + u64 *iova_start); + struct ib_mw * (*alloc_mw)(struct ib_pd *pd); + int (*bind_mw)(struct ib_qp *qp, + struct ib_mw *mw, + struct ib_mw_bind *mw_bind); + int (*dealloc_mw)(struct ib_mw *mw); + struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd, + mthca_qp_access_t mr_access_flags, + struct ib_fmr_attr *fmr_attr); + int (*map_phys_fmr)(struct ib_fmr *fmr, + u64 *page_list, int list_len, + u64 iova); + int (*unmap_fmr)(struct list_head *fmr_list); + int (*dealloc_fmr)(struct ib_fmr *fmr); + int (*attach_mcast)(struct ib_qp *qp, + union ib_gid *gid, + u16 lid); + int (*detach_mcast)(struct ib_qp *qp, + union ib_gid *gid, + u16 lid); + int (*process_mad)(struct ib_device *device, + int process_mad_flags, + u8 port_num, + struct _ib_wc *in_wc, + struct ib_grh *in_grh, + struct ib_mad *in_mad, + struct ib_mad *out_mad); + +#ifdef LINUX_TO_BE_REMOVED + struct module *owner; + struct class_device class_dev; + struct kobject ports_parent; +#endif + struct list_head port_list; + + u64 uverbs_cmd_mask; + __be64 node_guid; + u8 node_type; + u8 phys_port_cnt; +}; + +struct ib_client { + char *name; + void (*add) (struct ib_device *); + void (*remove)(struct ib_device *); + + struct list_head list; +}; + +struct ib_device *ib_alloc_device(size_t size); +void ib_dealloc_device(struct ib_device *device); + +int ib_register_device (struct ib_device *device); +void ib_unregister_device(struct ib_device *device); + +int ib_register_client (struct ib_client *client); +void ib_unregister_client(struct ib_client *client); + +void *ib_get_client_data(struct ib_device *device, struct ib_client *client); +void ib_set_client_data(struct ib_device *device, struct ib_client *client, + void *data); + +int ib_core_init(void); + +void ib_core_cleanup(void); + +#ifdef LINUX_TO_BE_REMOVED +static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len) +{ + return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0; +} + +static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len) +{ + return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0; +} + +#endif + + + +int ib_register_event_handler (struct ib_event_handler *event_handler); +int ib_unregister_event_handler(struct ib_event_handler *event_handler); +void ib_dispatch_event(struct ib_event *event); + +int ib_query_device(struct ib_device *device, + struct ib_device_attr *device_attr); + +int ib_query_port(struct ib_device *device, + u8 port_num, struct ib_port_attr *port_attr); + +int ib_query_gid(struct ib_device *device, + u8 port_num, int index, union ib_gid *gid); + +int ib_query_pkey(struct ib_device *device, + u8 port_num, u16 index, u16 *pkey); + +int ib_modify_device(struct ib_device *device, + int device_modify_mask, + struct ib_device_modify *device_modify); + +int ib_modify_port(struct ib_device *device, + u8 port_num, int port_modify_mask, + struct ib_port_modify *port_modify); + +/** + * ibv_alloc_pd - Allocates an unused protection domain. + * @device: The device on which to allocate the protection domain. + * @context: user process context (for application calls only) + * @p_umv_buf: parameters structure (for application calls only) + * + * A protection domain object provides an association between QPs, shared + * receive queues, address handles, memory regions, and memory windows. + */ +struct ib_pd *ibv_alloc_pd(struct ib_device *device, + struct ib_ucontext *context, ci_umv_buf_t* const p_umv_buf); + +/** + * ibv_dealloc_pd - Deallocates a protection domain. + * @pd: The protection domain to deallocate. + */ +int ibv_dealloc_pd(struct ib_pd *pd); + +/** + * ibv_create_ah - Creates an address handle for the given address vector. + * @pd: The protection domain associated with the address handle. + * @ah_attr: The attributes of the address vector. + * @context: user process context (for application calls only) + * @p_umv_buf: parameters structure (for application calls only) + * + * The address handle is used to reference a local or global destination + * in all UD QP post sends. + */ +struct ib_ah *ibv_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_ucontext *context, ci_umv_buf_t* const p_umv_buf); + +/** + * ibv_create_ah_from_wc - Creates an address handle associated with the + * sender of the specified work completion. + * @pd: The protection domain associated with the address handle. + * @wc: Work completion information associated with a received message. + * @grh: References the received global route header. This parameter is + * ignored unless the work completion indicates that the GRH is valid. + * @port_num: The outbound port number to associate with the address. + * + * The address handle is used to reference a local or global destination + * in all UD QP post sends. + */ +struct ib_ah *ibv_create_ah_from_wc(struct ib_pd *pd, struct _ib_wc *wc, + struct ib_grh *grh, u8 port_num); + +/** + * ibv_modify_ah - Modifies the address vector associated with an address + * handle. + * @ah: The address handle to modify. + * @ah_attr: The new address vector attributes to associate with the + * address handle. + */ +int ibv_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); + +/** + * ibv_query_ah - Queries the address vector associated with an address + * handle. + * @ah: The address handle to query. + * @ah_attr: The address vector attributes associated with the address + * handle. + */ +int ibv_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); + +/** + * ibv_destroy_ah - Destroys an address handle. + * @ah: The address handle to destroy. + */ +int ibv_destroy_ah(struct ib_ah *ah); + +/** + * ibv_create_srq - Creates a SRQ associated with the specified protection + * domain. + * @pd: The protection domain associated with the SRQ. + * @srq_init_attr: A list of initial attributes required to create the SRQ. + * + * srq_attr->max_wr and srq_attr->max_sge are read the determine the + * requested size of the SRQ, and set to the actual values allocated + * on return. If ibv_create_srq() succeeds, then max_wr and max_sge + * will always be at least as large as the requested values. + */ +struct ib_srq *ibv_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr); + +/** + * ibv_modify_srq - Modifies the attributes for the specified SRQ. + * @srq: The SRQ to modify. + * @srq_attr: On input, specifies the SRQ attributes to modify. On output, + * the current values of selected SRQ attributes are returned. + * @srq_attr_mask: A bit-mask used to specify which attributes of the SRQ + * are being modified. + * + * The mask may contain IB_SRQ_MAX_WR to resize the SRQ and/or + * IB_SRQ_LIMIT to set the SRQ's limit and request notification when + * the number of receives queued drops below the limit. + */ +int ibv_modify_srq(struct ib_srq *srq, + struct ib_srq_attr *srq_attr, + enum ib_srq_attr_mask srq_attr_mask); + +/** + * ibv_query_srq - Returns the attribute list and current values for the + * specified SRQ. + * @srq: The SRQ to query. + * @srq_attr: The attributes of the specified SRQ. + */ +int ibv_query_srq(struct ib_srq *srq, + struct ib_srq_attr *srq_attr); + +/** + * ibv_destroy_srq - Destroys the specified SRQ. + * @srq: The SRQ to destroy. + */ +int ibv_destroy_srq(struct ib_srq *srq); + +/** + * ibv_post_srq_recv - Posts a list of work requests to the specified SRQ. + * @srq: The SRQ to post the work request on. + * @recv_wr: A list of work requests to post on the receive queue. + * @bad_recv_wr: On an immediate failure, this parameter will reference + * the work request that failed to be posted on the QP. + */ +static inline int ibv_post_srq_recv(struct ib_srq *srq, + struct _ib_recv_wr *recv_wr, + struct _ib_recv_wr **bad_recv_wr) +{ + return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr); +} + +/** + * ibv_create_qp - Creates a QP associated with the specified protection + * domain. + * @pd: The protection domain associated with the QP. + * @qp_init_attr: A list of initial attributes required to create the QP. + * @context: user process context (for application calls only) + * @p_umv_buf: parameters structure (for application calls only) + */ + struct ib_qp *ibv_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + struct ib_ucontext *context, ci_umv_buf_t* const p_umv_buf); + +/** + * ibv_modify_qp - Modifies the attributes for the specified QP and then + * transitions the QP to the given state. + * @qp: The QP to modify. + * @qp_attr: On input, specifies the QP attributes to modify. On output, + * the current values of selected QP attributes are returned. + * @qp_attr_mask: A bit-mask used to specify which attributes of the QP + * are being modified. + */ +int ibv_modify_qp(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask); + +/** + * ibv_query_qp - Returns the attribute list and current values for the + * specified QP. + * @qp: The QP to query. + * @qp_attr: The attributes of the specified QP. + * @qp_attr_mask: A bit-mask used to select specific attributes to query. + * @qp_init_attr: Additional attributes of the selected QP. + * + * The qp_attr_mask may be used to limit the query to gathering only the + * selected attributes. + */ +int ibv_query_qp(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr); + +/** + * ibv_destroy_qp - Destroys the specified QP. + * @qp: The QP to destroy. + */ +int ibv_destroy_qp(struct ib_qp *qp); + +/** + * ib_post_send - Posts a list of work requests to the send queue of + * the specified QP. + * @qp: The QP to post the work request on. + * @send_wr: A list of work requests to post on the send queue. + * @bad_send_wr: On an immediate failure, this parameter will reference + * the work request that failed to be posted on the QP. + */ +static inline int ib_post_send(struct ib_qp *qp, + struct _ib_send_wr *send_wr, + struct _ib_send_wr **bad_send_wr) +{ + return qp->device->post_send(qp, send_wr, bad_send_wr); +} + +/** + * ib_post_recv - Posts a list of work requests to the receive queue of + * the specified QP. + * @qp: The QP to post the work request on. + * @recv_wr: A list of work requests to post on the receive queue. + * @bad_recv_wr: On an immediate failure, this parameter will reference + * the work request that failed to be posted on the QP. + */ +static inline int ib_post_recv(struct ib_qp *qp, + struct _ib_recv_wr *recv_wr, + struct _ib_recv_wr **bad_recv_wr) +{ + return qp->device->post_recv(qp, recv_wr, bad_recv_wr); +} + +/** + * ibv_create_cq - Creates a CQ on the specified device. + * @device: The device on which to create the CQ. + * @comp_handler: A user-specified callback that is invoked when a + * completion event occurs on the CQ. + * @event_handler: A user-specified callback that is invoked when an + * asynchronous event not associated with a completion occurs on the CQ. + * @cq_context: Context associated with the CQ returned to the user via + * the associated completion and event handlers. + * @cqe: The minimum size of the CQ. + * @context: user process context (for application calls only) + * @p_umv_buf: parameters structure (for application calls only) + * + * Users can examine the cq structure to determine the actual CQ size. + */ +struct ib_cq *ibv_create_cq(struct ib_device *device, + ib_comp_handler comp_handler, + void (*event_handler)(struct ib_event *, void *), + void *cq_context, int cqe, + struct ib_ucontext *context, ci_umv_buf_t* const p_umv_buf); + +/** + * ibv_resize_cq - Modifies the capacity of the CQ. + * @cq: The CQ to resize. + * @cqe: The minimum size of the CQ. + * + * Users can examine the cq structure to determine the actual CQ size. + */ +int ibv_resize_cq(struct ib_cq *cq, int cqe); + +/** + * ibv_destroy_cq - Destroys the specified CQ. + * @cq: The CQ to destroy. + */ +int ibv_destroy_cq(struct ib_cq *cq); + +/** + * ib_poll_cq - poll a CQ for completion(s) + * @cq:the CQ being polled + * @num_entries:maximum number of completions to return + * @wc:array of at least @num_entries &struct _ib_wc where completions + * will be returned + * + * Poll a CQ for (possibly multiple) completions. If the return value + * is < 0, an error occurred. If the return value is >= 0, it is the + * number of completions returned. If the return value is + * non-negative and < num_entries, then the CQ was emptied. + */ +static inline int ib_poll_cq(struct ib_cq *cq, int num_entries, + struct _ib_wc *wc) +{ + return cq->device->poll_cq(cq, num_entries, wc); +} + +/** + * ib_peek_cq - Returns the number of unreaped completions currently + * on the specified CQ. + * @cq: The CQ to peek. + * @wc_cnt: A minimum number of unreaped completions to check for. + * + * If the number of unreaped completions is greater than or equal to wc_cnt, + * this function returns wc_cnt, otherwise, it returns the actual number of + * unreaped completions. + */ +int ib_peek_cq(struct ib_cq *cq, int wc_cnt); + +/** + * ib_req_notify_cq - Request completion notification on a CQ. + * @cq: The CQ to generate an event for. + * @cq_notify: If set to %IB_CQ_SOLICITED, completion notification will + * occur on the next solicited event. If set to %IB_CQ_NEXT_COMP, + * notification will occur on the next completion. + */ +static inline int ib_req_notify_cq(struct ib_cq *cq, + enum ib_cq_notify cq_notify) +{ + return cq->device->req_notify_cq(cq, cq_notify); +} + +/** + * ib_req_ncomp_notif - Request completion notification when there are + * at least the specified number of unreaped completions on the CQ. + * @cq: The CQ to generate an event for. + * @wc_cnt: The number of unreaped completions that should be on the + * CQ before an event is generated. + */ +static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) +{ + return cq->device->req_ncomp_notif ? + cq->device->req_ncomp_notif(cq, wc_cnt) : + -ENOSYS; +} + +/** + * ibv_reg_mr - Prepares a virtually addressed memory region for use + * by an HCA. + * @pd: The protection domain associated assigned to the registered region. + * @vaddr: virtual address of the region + * @length: Specifies the size of the region. + * @hca_va: virtual address in HCA + * @mr_access_flags: Specifies the memory access rights. + * @um_call: call from user, when TRUE. + */ +struct ib_mr *ibv_reg_mr(struct ib_pd *pd, + mthca_qp_access_t mr_access_flags, + void* __ptr64 vaddr, + uint64_t length, + uint64_t hca_va, + boolean_t um_call + ); + +/** + * ibv_get_dma_mr - Returns a memory region for system memory that is + * usable for DMA. + * @pd: The protection domain associated with the memory region. + * @mr_access_flags: Specifies the memory access rights. + */ +struct ib_mr *ibv_get_dma_mr(struct ib_pd *pd, mthca_qp_access_t mr_access_flags); + +/** + * ibv_reg_phys_mr - Prepares a virtually addressed memory region for use + * by an HCA. + * @pd: The protection domain associated assigned to the registered region. + * @phys_buf_array: Specifies a list of physical buffers to use in the + * memory region. + * @num_phys_buf: Specifies the size of the phys_buf_array. + * @mr_access_flags: Specifies the memory access rights. + * @iova_start: The offset of the region's starting I/O virtual address. + */ +struct ib_mr *ibv_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + mthca_qp_access_t mr_access_flags, + u64 *iova_start); + +/** + * ibv_rereg_phys_mr - Modifies the attributes of an existing memory region. + * Conceptually, this call performs the functions deregister memory region + * followed by register physical memory region. Where possible, + * resources are reused instead of deallocated and reallocated. + * @mr: The memory region to modify. + * @mr_rereg_mask: A bit-mask used to indicate which of the following + * properties of the memory region are being modified. + * @pd: If %IB_MR_REREG_PD is set in mr_rereg_mask, this field specifies + * the new protection domain to associated with the memory region, + * otherwise, this parameter is ignored. + * @phys_buf_array: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this + * field specifies a list of physical buffers to use in the new + * translation, otherwise, this parameter is ignored. + * @num_phys_buf: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this + * field specifies the size of the phys_buf_array, otherwise, this + * parameter is ignored. + * @mr_access_flags: If %IB_MR_REREG_ACCESS is set in mr_rereg_mask, this + * field specifies the new memory access rights, otherwise, this + * parameter is ignored. + * @iova_start: The offset of the region's starting I/O virtual address. + */ +int ibv_rereg_phys_mr(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + mthca_qp_access_t mr_access_flags, + u64 *iova_start); + +/** + * ibv_query_mr - Retrieves information about a specific memory region. + * @mr: The memory region to retrieve information about. + * @mr_attr: The attributes of the specified memory region. + */ +int ibv_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); + +/** + * ibv_dereg_mr - Deregisters a memory region and removes it from the + * HCA translation table. + * @mr: The memory region to deregister. + */ +int ibv_dereg_mr(struct ib_mr *mr); + +/** + * ibv_alloc_mw - Allocates a memory window. + * @pd: The protection domain associated with the memory window. + */ +struct ib_mw *ibv_alloc_mw(struct ib_pd *pd); + +/** + * ib_bind_mw - Posts a work request to the send queue of the specified + * QP, which binds the memory window to the given address range and + * remote access attributes. + * @qp: QP to post the bind work request on. + * @mw: The memory window to bind. + * @mw_bind: Specifies information about the memory window, including + * its address range, remote access rights, and associated memory region. + */ +static inline int ib_bind_mw(struct ib_qp *qp, + struct ib_mw *mw, + struct ib_mw_bind *mw_bind) +{ + /* XXX reference counting in corresponding MR? */ + return mw->device->bind_mw ? + mw->device->bind_mw(qp, mw, mw_bind) : + -ENOSYS; +} + +/** + * ibv_dealloc_mw - Deallocates a memory window. + * @mw: The memory window to deallocate. + */ +int ibv_dealloc_mw(struct ib_mw *mw); + +/** + * ibv_alloc_fmr - Allocates a unmapped fast memory region. + * @pd: The protection domain associated with the unmapped region. + * @mr_access_flags: Specifies the memory access rights. + * @fmr_attr: Attributes of the unmapped region. + * + * A fast memory region must be mapped before it can be used as part of + * a work request. + */ +struct ib_fmr *ibv_alloc_fmr(struct ib_pd *pd, + mthca_qp_access_t mr_access_flags, + struct ib_fmr_attr *fmr_attr); + +/** + * ib_map_phys_fmr - Maps a list of physical pages to a fast memory region. + * @fmr: The fast memory region to associate with the pages. + * @page_list: An array of physical pages to map to the fast memory region. + * @list_len: The number of pages in page_list. + * @iova: The I/O virtual address to use with the mapped region. + */ +static inline int ib_map_phys_fmr(struct ib_fmr *fmr, + u64 *page_list, int list_len, + u64 iova) +{ + return fmr->device->map_phys_fmr(fmr, page_list, list_len, iova); +} + +/** + * ibv_unmap_fmr - Removes the mapping from a list of fast memory regions. + * @fmr_list: A linked list of fast memory regions to unmap. + */ +int ibv_unmap_fmr(struct list_head *fmr_list); + +/** + * ibv_dealloc_fmr - Deallocates a fast memory region. + * @fmr: The fast memory region to deallocate. + */ +int ibv_dealloc_fmr(struct ib_fmr *fmr); + +/** + * ibv_attach_mcast - Attaches the specified QP to a multicast group. + * @qp: QP to attach to the multicast group. The QP must be type + * IB_QPT_UNRELIABLE_DGRM. + * @gid: Multicast group GID. + * @lid: Multicast group LID in host byte order. + * + * In order to send and receive multicast packets, subnet + * administration must have created the multicast group and configured + * the fabric appropriately. The port associated with the specified + * QP must also be a member of the multicast group. + */ +int ibv_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); + +/** + * ibv_detach_mcast - Detaches the specified QP from a multicast group. + * @qp: QP to detach from the multicast group. + * @gid: Multicast group GID. + * @lid: Multicast group LID in host byte order. + */ +int ibv_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); + +/** + * ibv_um_close - Releases application. + * @h_um_ca: application context + */ +void ibv_um_close(struct ib_ucontext * h_um_ca); + +#endif /* IB_VERBS_H */ diff --git a/branches/MTHCA/hw/mthca/kernel/mt_atomic.h b/branches/MTHCA/hw/mthca/kernel/mt_atomic.h new file mode 100644 index 00000000..4dcf5f30 --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/mt_atomic.h @@ -0,0 +1,58 @@ +#ifndef MT_ATOMIC_H +#define MT_ATOMIC_H + +// atomic +typedef LONG atomic_t; + +static inline void atomic_inc(atomic_t *pval) +{ + InterlockedIncrement(pval); +} + +static inline void atomic_dec(atomic_t *pval) +{ + InterlockedDecrement(pval); +} + +static inline atomic_t atomic_read(atomic_t *pval) +{ + return (atomic_t)InterlockedOr (pval,0); +} + +static inline void atomic_set(atomic_t *pval, long val) +{ + InterlockedExchange(pval, val); +} + +/** +* atomic_inc_and_test - decrement and test +* pval: pointer of type atomic_t +* +* Atomically increments pval by 1 and +* returns true if the result is 0, or false for all other +* cases. +*/ +static inline int +atomic_inc_and_test(atomic_t *pval) +{ + LONG val = InterlockedIncrement(pval); + return (val == 0); +} + +/** +* atomic_dec_and_test - decrement and test +* pval: pointer of type atomic_t +* +* Atomically decrements pval by 1 and +* returns true if the result is 0, or false for all other +* cases. +*/ +static inline int +atomic_dec_and_test(atomic_t *pval) +{ + LONG val = InterlockedDecrement(pval); + return (val == 0); +} + + +#endif diff --git a/branches/MTHCA/hw/mthca/kernel/mt_bitmap.h b/branches/MTHCA/hw/mthca/kernel/mt_bitmap.h new file mode 100644 index 00000000..0468285e --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/mt_bitmap.h @@ -0,0 +1,272 @@ +#ifndef MT_BITMAP_H +#define MT_BITMAP_H + +#ifdef WIN_TO_BE_REMOVED +unsigned long ul_mask[32] = { + 0x00000001, 0x00000002, 0x00000004, 0x00000008, + 0x00000010, 0x00000020, 0x00000040, 0x00000080, + 0x00000100, 0x00000200, 0x00000400, 0x00000800, + 0x00001000, 0x00002000, 0x00004000, 0x00008000, + 0x00010000, 0x00020000, 0x00040000, 0x00080000, + 0x00100000, 0x00200000, 0x00400000, 0x00800000, + 0x01000000, 0x02000000, 0x04000000, 0x08000000, + 0x10000000, 0x20000000, 0x40000000, 0x80000000 }; +#endif + +// DECLARE_BITMAP +#define BITS_PER_LONG 32 +#define BITS_TO_LONGS(bits) \ + (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) +#define DECLARE_BITMAP(name,bits) \ + unsigned long name[BITS_TO_LONGS(bits)] + +/** +* atomic_set_bit - Atomically set a bit in memory +* @nr: the bit to set +* @addr: the address to start counting from +* +* This function is atomic and may not be reordered. See __set_bit() +* if you do not require the atomic guarantees. +* +* Note: there are no guarantees that this function will not be reordered +* on non x86 architectures, so if you are writting portable code, +* make sure not to rely on its reordering guarantees. +* +* Note that @nr may be almost arbitrarily large; this function is not +* restricted to acting on a single-word quantity. +*/ +static inline unsigned long atomic_clear_bit(int nr, volatile unsigned long * addr) +{ + return InterlockedAnd( addr, ~(1 << nr) ); +} + +/** +* atomic_clear_bit - Clears a bit in memory +* @nr: Bit to clear +* @addr: Address to start counting from +* +* clear_bit() is atomic and may not be reordered. However, it does +* not contain a memory barrier, so if it is used for locking purposes, +* you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() +* in order to ensure changes are visible on other processors. +*/ +static inline unsigned long atomic_set_bit(int nr, volatile unsigned long * addr) +{ + return InterlockedOr( addr, (1 << nr) ); +} + +static inline int set_bit(int nr,long * addr) +{ + addr += nr >> 5; + return atomic_set_bit( nr & 0x1f, addr ); +} + +static inline int clear_bit(int nr, long * addr) +{ + addr += nr >> 5; + return atomic_clear_bit( nr & 0x1f, addr ); +} + +static inline int test_bit(int nr, const unsigned long * addr) +{ + int mask; + + addr += nr >> 5; + mask = 1 << (nr & 0x1f); + return ((mask & *addr) != 0); +} + + +/** +* bitmap_zero - clear the bitmap +* @dst: the bitmap address +* @nbits: the bitmap size in bits +* +*/ +static inline void bitmap_zero(unsigned long *dst, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = 0UL; + else { + int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); + RtlZeroMemory(dst, len); + } +} + +#define BITMAP_LAST_WORD_MASK(nbits) \ + ( ((nbits) % BITS_PER_LONG) ? (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL ) + +int __bitmap_full(const unsigned long *bitmap, int bits); + +static inline int bitmap_full(const unsigned long *src, int nbits) +{ + if (nbits <= BITS_PER_LONG) + return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits)); + else + return __bitmap_full(src, nbits); +} + +int __bitmap_empty(const unsigned long *bitmap, int bits); + +static inline int bitmap_empty(const unsigned long *src, int nbits) +{ + if (nbits <= BITS_PER_LONG) + return ! (*src & BITMAP_LAST_WORD_MASK(nbits)); + else + return __bitmap_empty(src, nbits); +} + +/* +* fls: find last bit set. +* returns: 0 - if not found or N+1, if found Nth bit +*/ + +static inline int fls(int x) +{ + int r = 32; + + if (!x) + return 0; + if (!(x & 0xffff0000u)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xff000000u)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xf0000000u)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xc0000000u)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x80000000u)) { + x <<= 1; + r -= 1; + } + return r; +} + + +/** +* _ffs - find the first one bit in a word +* @addr: The address to start the search at +* @offset: The bitnumber to start searching at +* +* returns: 0 - if not found or N+1, if found Nth bit +*/ +static inline int _ffs(const unsigned long *addr, int offset) +{ + //TODO: not an effective code - is better in Assembler + int mask = 1 << offset; + int rbc = BITS_PER_LONG - offset; + int ix; + for (ix=0; ix #include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mt_cache.tmh" +#endif #include #include "ib_cache.h" #include "hca_vp.h" +#ifdef ALLOC_PRAGMA +#pragma alloc_text (PAGE, ib_cache_setup) +#pragma alloc_text (PAGE, ib_cache_cleanup) +#endif + + struct ib_pkey_cache { int table_len; u16 table[0]; @@ -65,11 +77,15 @@ int ib_get_cached_gid(struct ib_device *device, { struct ib_gid_cache *cache; int ret = 0; + SPIN_LOCK_PREP(lh); + // sanity checks if (port_num < start_port(device) || port_num > end_port(device)) return -EINVAL; + if (!device->cache.gid_cache) + return -EFAULT; - read_lock_irqsave(&device->cache.lock); + read_lock_irqsave(&device->cache.lock, &lh); cache = device->cache.gid_cache[port_num - start_port(device)]; @@ -78,11 +94,10 @@ int ib_get_cached_gid(struct ib_device *device, else *gid = cache->table[index]; - read_unlock_irqrestore(&device->cache.lock); + read_unlock_irqrestore(&lh); return ret; } -EXPORT_SYMBOL(ib_get_cached_gid); int ib_find_cached_gid(struct ib_device *device, union ib_gid *gid, @@ -92,12 +107,13 @@ int ib_find_cached_gid(struct ib_device *device, struct ib_gid_cache *cache; int p, i; int ret = -ENOENT; + SPIN_LOCK_PREP(lh); *port_num = -1; if (index) *index = -1; - read_lock_irqsave(&device->cache.lock); + read_lock_irqsave(&device->cache.lock, &lh); for (p = 0; p <= end_port(device) - start_port(device); ++p) { cache = device->cache.gid_cache[p]; @@ -112,11 +128,10 @@ int ib_find_cached_gid(struct ib_device *device, } } found: - read_unlock_irqrestore(&device->cache.lock); + read_unlock_irqrestore(&lh); return ret; } -EXPORT_SYMBOL(ib_find_cached_gid); int ib_get_cached_pkey(struct ib_device *device, u8 port_num, @@ -125,11 +140,15 @@ int ib_get_cached_pkey(struct ib_device *device, { struct ib_pkey_cache *cache; int ret = 0; + SPIN_LOCK_PREP(lh); + // sanity checks if (port_num < start_port(device) || port_num > end_port(device)) return -EINVAL; + if (!device->cache.gid_cache) + return -EFAULT; - read_lock_irqsave(&device->cache.lock); + read_lock_irqsave(&device->cache.lock, &lh); cache = device->cache.pkey_cache[port_num - start_port(device)]; @@ -138,11 +157,10 @@ int ib_get_cached_pkey(struct ib_device *device, else *pkey = cache->table[index]; - read_unlock_irqrestore(&device->cache.lock); + read_unlock_irqrestore(&lh); return ret; } -EXPORT_SYMBOL(ib_get_cached_pkey); int ib_find_cached_pkey(struct ib_device *device, u8 port_num, @@ -152,11 +170,12 @@ int ib_find_cached_pkey(struct ib_device *device, struct ib_pkey_cache *cache; int i; int ret = -ENOENT; + SPIN_LOCK_PREP(lh); if (port_num < start_port(device) || port_num > end_port(device)) return -EINVAL; - read_lock_irqsave(&device->cache.lock); + read_lock_irqsave(&device->cache.lock, &lh); cache = device->cache.pkey_cache[port_num - start_port(device)]; @@ -169,11 +188,10 @@ int ib_find_cached_pkey(struct ib_device *device, break; } - read_unlock_irqrestore(&device->cache.lock); + read_unlock_irqrestore(&lh); return ret; } -EXPORT_SYMBOL(ib_find_cached_pkey); static void ib_cache_update(struct ib_device *device, u8 port) @@ -183,6 +201,7 @@ static void ib_cache_update(struct ib_device *device, struct ib_gid_cache *gid_cache = NULL, *old_gid_cache; int i; int ret; + SPIN_LOCK_PREP(lh); tprops = kmalloc(sizeof *tprops, GFP_KERNEL); if (!tprops) @@ -190,8 +209,8 @@ static void ib_cache_update(struct ib_device *device, ret = ib_query_port(device, port, tprops); if (ret) { - printk(KERN_WARNING "ib_query_port failed (%d) for %s\n", - ret, device->name); + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_LOW,("ib_query_port failed (%d) for %s, port %d\n", + ret, device->name, port)); goto err; } @@ -212,8 +231,8 @@ static void ib_cache_update(struct ib_device *device, for (i = 0; i < pkey_cache->table_len; ++i) { ret = ib_query_pkey(device, port, (u16)i, pkey_cache->table + i); if (ret) { - printk(KERN_WARNING "ib_query_pkey failed (%d) for %s (index %d)\n", - ret, device->name, i); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW,("ib_query_pkey failed (%d) for %s (index %d)\n", + ret, device->name, i)); goto err; } } @@ -221,13 +240,13 @@ static void ib_cache_update(struct ib_device *device, for (i = 0; i < gid_cache->table_len; ++i) { ret = ib_query_gid(device, port, i, gid_cache->table + i); if (ret) { - printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n", - ret, device->name, i); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW,("ib_query_gid failed (%d) for %s (index %d)\n", + ret, device->name, i)); goto err; } } - write_lock_irq(&device->cache.lock); + write_lock_irq(&device->cache.lock, &lh); old_pkey_cache = device->cache.pkey_cache[port - start_port(device)]; old_gid_cache = device->cache.gid_cache [port - start_port(device)]; @@ -235,7 +254,7 @@ static void ib_cache_update(struct ib_device *device, device->cache.pkey_cache[port - start_port(device)] = pkey_cache; device->cache.gid_cache [port - start_port(device)] = gid_cache; - write_unlock_irq(&device->cache.lock); + write_unlock_irq(&lh); kfree(old_pkey_cache); kfree(old_gid_cache); @@ -253,7 +272,6 @@ static void ib_cache_task(void *work_ptr) struct ib_update_work *work = work_ptr; ib_cache_update(work->device, work->port_num); - kfree(work); } #ifdef LINUX_TO_BE_CHANGED @@ -290,12 +308,17 @@ VOID struct ib_update_work *work = (struct ib_update_work *)Context; ib_cache_task(Context); IoFreeWorkItem(work->work_item); + kfree(Context); } static void ib_cache_event(struct ib_event_handler *handler, struct ib_event *event) { struct ib_update_work *work; + static int temp_skip = 10; + + if (temp_skip-- <= 0) + return; if (event->event == IB_EVENT_PORT_ERR || event->event == IB_EVENT_PORT_ACTIVE || @@ -303,7 +326,7 @@ static void ib_cache_event(struct ib_event_handler *handler, event->event == IB_EVENT_PKEY_CHANGE || event->event == IB_EVENT_SM_CHANGE) { work = kmalloc(sizeof *work, GFP_ATOMIC); - //???: what will happen on allocation failure (leo) + //TODO: what will happen on allocation failure ? if (work) { work->device = event->device; work->port_num = event->element.port_num; @@ -347,8 +370,8 @@ static void ib_cache_setup_one(struct ib_device *device) (end_port(device) - start_port(device) + 1), GFP_KERNEL); if (!device->cache.pkey_cache || !device->cache.gid_cache) { - printk(KERN_WARNING "Couldn't allocate cache " - "for %s\n", device->name); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW,("Couldn't allocate cache " + "for %s\n", device->name)); goto err; } @@ -405,12 +428,13 @@ static struct ib_client cache_client = { static struct ib_client cache_client = { "cache", ib_cache_setup_one, ib_cache_cleanup_one }; #endif -int __init ib_cache_setup(void) +int ib_cache_setup(void) { return ib_register_client(&cache_client); } -void __exit ib_cache_cleanup(void) +void ib_cache_cleanup(void) { ib_unregister_client(&cache_client); } + diff --git a/branches/MTHCA/hw/mthca/kernel/mt_device.c b/branches/MTHCA/hw/mthca/kernel/mt_device.c index 31cba28e..c8f3ae91 100644 --- a/branches/MTHCA/hw/mthca/kernel/mt_device.c +++ b/branches/MTHCA/hw/mthca/kernel/mt_device.c @@ -34,6 +34,12 @@ */ #include "hca_driver.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mt_device.tmh" +#endif #include "ib_verbs.h" #include "ib_cache.h" @@ -86,8 +92,8 @@ static int ib_device_check_mandatory(struct ib_device *device) for (i = 0; i < sizeof mandatory_table / sizeof mandatory_table[0]; ++i) { if (!*(void **) ((u8 *) device + mandatory_table[i].offset)) { - printk(KERN_WARNING "Device %s is missing mandatory function %s\n", - device->name, mandatory_table[i].name); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW,("Device %s is missing mandatory function %s\n", + device->name, mandatory_table[i].name)); return -EINVAL; } } @@ -199,20 +205,21 @@ void ib_dealloc_device(struct ib_device *device) static int add_client_context(struct ib_device *device, struct ib_client *client) { struct ib_client_data *context; + SPIN_LOCK_PREP(lh); context = kmalloc(sizeof *context, GFP_KERNEL); if (!context) { - printk(KERN_WARNING "Couldn't allocate client context for %s/%s\n", - device->name, client->name); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW,("Couldn't allocate client context for %s/%s\n", + device->name, client->name)); return -ENOMEM; } context->client = client; context->data = NULL; - spin_lock_irqsave(&device->client_data_lock); + spin_lock_irqsave(&device->client_data_lock, &lh); list_add(&context->list, &device->client_data_list); - spin_unlock_irqrestore(&device->client_data_lock); + spin_unlock_irqrestore(&lh); return 0; } @@ -251,8 +258,8 @@ int ib_register_device(struct ib_device *device) #ifdef LINUX_TO_BE_REMOVED ret = ib_device_register_sysfs(device); if (ret) { - printk(KERN_WARNING "Couldn't register device %s with driver model\n", - device->name); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Couldn't register device %s with driver model\n", + device->name)); goto out; } #endif @@ -283,6 +290,7 @@ void ib_unregister_device(struct ib_device *device) { struct ib_client *client; struct ib_client_data *context, *tmp; + SPIN_LOCK_PREP(lh); down(&device_mutex); @@ -294,10 +302,10 @@ void ib_unregister_device(struct ib_device *device) up(&device_mutex); - spin_lock_irqsave(&device->client_data_lock); + spin_lock_irqsave(&device->client_data_lock, &lh); list_for_each_entry_safe(context, tmp, &device->client_data_list, list,struct ib_client_data,struct ib_client_data) kfree(context); - spin_unlock_irqrestore(&device->client_data_lock); + spin_unlock_irqrestore(&lh); } @@ -344,6 +352,7 @@ void ib_unregister_client(struct ib_client *client) { struct ib_client_data *context, *tmp; struct ib_device *device; + SPIN_LOCK_PREP(lh); down(&device_mutex); @@ -351,13 +360,13 @@ void ib_unregister_client(struct ib_client *client) if (client->remove) client->remove(device); - spin_lock_irqsave(&device->client_data_lock); + spin_lock_irqsave(&device->client_data_lock, &lh); list_for_each_entry_safe(context, tmp, &device->client_data_list, list,struct ib_client_data,struct ib_client_data) if (context->client == client) { list_del(&context->list); kfree(context); } - spin_unlock_irqrestore(&device->client_data_lock); + spin_unlock_irqrestore(&lh); } list_del(&client->list); @@ -377,14 +386,15 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client) { struct ib_client_data *context; void *ret = NULL; + SPIN_LOCK_PREP(lh); - spin_lock_irqsave(&device->client_data_lock); + spin_lock_irqsave(&device->client_data_lock, &lh); list_for_each_entry(context, &device->client_data_list, list,struct ib_client_data) if (context->client == client) { ret = context->data; break; } - spin_unlock_irqrestore(&device->client_data_lock); + spin_unlock_irqrestore(&lh); return ret; } @@ -403,19 +413,20 @@ void ib_set_client_data(struct ib_device *device, struct ib_client *client, void *data) { struct ib_client_data *context; + SPIN_LOCK_PREP(lh); - spin_lock_irqsave(&device->client_data_lock); + spin_lock_irqsave(&device->client_data_lock, &lh); list_for_each_entry(context, &device->client_data_list, list,struct ib_client_data) if (context->client == client) { context->data = data; goto out; } - printk(KERN_WARNING "No client context found for %s/%s\n", - device->name, client->name); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("No client context found for %s/%s\n", + device->name, client->name)); out: - spin_unlock_irqrestore(&device->client_data_lock); + spin_unlock_irqrestore(&lh); } @@ -430,11 +441,12 @@ out: */ int ib_register_event_handler (struct ib_event_handler *event_handler) { + SPIN_LOCK_PREP(lh); - spin_lock_irqsave(&event_handler->device->event_handler_lock); + spin_lock_irqsave(&event_handler->device->event_handler_lock, &lh); list_add_tail(&event_handler->list, &event_handler->device->event_handler_list); - spin_unlock_irqrestore(&event_handler->device->event_handler_lock); + spin_unlock_irqrestore(&lh); return 0; } @@ -449,9 +461,10 @@ int ib_register_event_handler (struct ib_event_handler *event_handler) */ int ib_unregister_event_handler(struct ib_event_handler *event_handler) { - spin_lock_irqsave(&event_handler->device->event_handler_lock); + SPIN_LOCK_PREP(lh); + spin_lock_irqsave(&event_handler->device->event_handler_lock, &lh); list_del(&event_handler->list); - spin_unlock_irqrestore(&event_handler->device->event_handler_lock); + spin_unlock_irqrestore(&lh); return 0; } @@ -468,13 +481,14 @@ int ib_unregister_event_handler(struct ib_event_handler *event_handler) void ib_dispatch_event(struct ib_event *event) { struct ib_event_handler *handler; + SPIN_LOCK_PREP(lh); - spin_lock_irqsave(&event->device->event_handler_lock); + spin_lock_irqsave(&event->device->event_handler_lock, &lh); list_for_each_entry(handler, &event->device->event_handler_list, list,struct ib_event_handler) handler->handler(handler, event); - spin_unlock_irqrestore(&event->device->event_handler_lock); + spin_unlock_irqrestore(&lh); } @@ -506,6 +520,8 @@ int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr) { + if (port_num < start_port(device) || port_num > end_port(device)) + return -EINVAL; return device->query_port(device, port_num, port_attr); } @@ -575,6 +591,9 @@ int ib_modify_port(struct ib_device *device, u8 port_num, int port_modify_mask, struct ib_port_modify *port_modify) { + if (port_num < start_port(device) || port_num > end_port(device)) + return -EINVAL; + return device->modify_port(device, port_num, port_modify_mask, port_modify); } @@ -588,7 +607,7 @@ int ib_core_init(void) ret = ib_cache_setup(); if (ret) { - printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n"); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Couldn't set up InfiniBand P_Key/GID cache\n")); } return ret; diff --git a/branches/MTHCA/hw/mthca/kernel/mt_l2w.h b/branches/MTHCA/hw/mthca/kernel/mt_l2w.h new file mode 100644 index 00000000..0075bd8c --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/mt_l2w.h @@ -0,0 +1,103 @@ +#ifndef MT_L2W_H +#define MT_L2W_H + +// =========================================== +// INCLUDES +// =========================================== + +// OS +#include +#include +#include +#include + +// ours - the order is important +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +// =========================================== +// SUBSTITUTIONS +// =========================================== + +#define BUG_ON(exp) do { ASSERT(!(exp)); /* in Linux follows here panic() !*/ } while(0) +#define WARN_ON(exp) do { ASSERT(!(exp)); /* in Linux follows here panic() !*/ } while(0) +#define snprintf _snprintf + +// memory barriers +#define wmb KeMemoryBarrier +#define rmb KeMemoryBarrier +#define mb KeMemoryBarrier + +// =========================================== +// LITERALS +// =========================================== + + + + +// =========================================== +// TYPES +// =========================================== + +// rw_lock +typedef spinlock_t rwlock_t; + +// dummy function +typedef void (*MT_EMPTY_FUNC)(); + +// =========================================== +// MACROS +// =========================================== + +// nullifying macros +#define might_sleep() do {} while(0) + +#ifdef WIN_TO_BE_REMOVED +// compiler doesn't understand that +// min_t/max_t + #define min_t(type,x,y) \ + ({ type __x = (x); type __y = (y); __x < __y ? __x: __y; }) +#define max_t(type,x,y) \ + ({ type __x = (x); type __y = (y); __x > __y ? __x: __y; }) +#endif + +// ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +// ALIGN +#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1)) + +// there is a bug in Microsoft compiler, that when _byteswap_uint64() gets an expression +// it executes the expression but doesn't swap tte dwords +// So, there's a workaround +#ifdef BYTESWAP_UINT64_BUG_FIXED +#define CPU_2_BE64_PREP +#define CPU_2_BE64(x) cl_hton64(x) +#else +#define CPU_2_BE64_PREP unsigned __int64 __tmp__; +#define CPU_2_BE64(x) ( __tmp__ = x, cl_hton64(__tmp__) ) +#endif + + +SIZE_T strlcpy(char *dest, const char *src, SIZE_T size); +void MT_time_calibrate(); + +#define ERR_PTR(error) ((void*)(LONG_PTR)(error)) +#define PTR_ERR(ptr) ((long)(LONG_PTR)(void*)(ptr)) +//TODO: there are 2 assumptions here: +// - pointer can't be too big (around -1) +// - error can't be bigger than 1000 +#define IS_ERR(ptr) ((ULONG_PTR)ptr > (ULONG_PTR)-1000L) + +#endif diff --git a/branches/MTHCA/hw/mthca/kernel/mt_list.h b/branches/MTHCA/hw/mthca/kernel/mt_list.h new file mode 100644 index 00000000..35853f3c --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/mt_list.h @@ -0,0 +1,169 @@ +#ifndef MT_LIST_H +#define MT_LIST_H + +// taken from list.h + +/* + * These are non-NULL pointers that will result in page faults + * under normal circumstances, used to verify that nobody uses + * non-initialized list entries. + */ +#define LIST_POISON1 ((void *) 0x00100100) +#define LIST_POISON2 ((void *) 0x00200200) + +/* +* Simple doubly linked list implementation. +* +* Some of the internal functions ("__xxx") are useful when +* manipulating whole lists rather than single entries, as +* sometimes we already know the next/prev entries and we can +* generate better code by using them directly rather than +* using the generic single-entry routines. +*/ + +struct list_head { + struct list_head *next, *prev; +}; + +#define LIST_HEAD_INIT(name) { &(name), &(name) } + +#define LIST_HEAD(name) \ + struct list_head name = LIST_HEAD_INIT(name) + +#define INIT_LIST_HEAD(ptr) do { \ + (ptr)->next = (ptr); (ptr)->prev = (ptr); \ +} while (0) + + +/* +* Insert a new entry between two known consecutive entries. +* +* This is only for internal list manipulation where we know +* the prev/next entries already! +*/ +static inline void __list_add(struct list_head *new, + struct list_head *prev, + struct list_head *next) +{ + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} + +/** +* list_add - add a new entry +* @new: new entry to be added +* @head: list head to add it after +* +* Insert a new entry after the specified head. +* This is good for implementing stacks. +*/ +static inline void list_add(struct list_head *new, struct list_head *head) +{ + __list_add(new, head, head->next); +} + +/** +* list_add_tail - add a new entry +* @new: new entry to be added +* @head: list head to add it before +* +* Insert a new entry before the specified head. +* This is useful for implementing queues. +*/ +static inline void list_add_tail(struct list_head *new, struct list_head *head) +{ + __list_add(new, head->prev, head); +} + + /* + * Delete a list entry by making the prev/next entries + * point to each other. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ + static inline void __list_del(struct list_head * prev, struct list_head * next) + { + next->prev = prev; + prev->next = next; + } + + /** + * list_del - deletes entry from list. + * @entry: the element to delete from the list. + * Note: list_empty on entry does not return true after this, the entry is + * in an undefined state. + */ + static inline void list_del(struct list_head *entry) + { + __list_del(entry->prev, entry->next); + entry->next = LIST_POISON1; + entry->prev = LIST_POISON2; + } + +/** +* list_empty - tests whether a list is empty +* @head: the list to test. +*/ +static inline int list_empty(const struct list_head *head) +{ + return head->next == head; +} + + /** + * list_entry - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + */ +#define list_entry(ptr, type, member) \ + container_of(ptr, type, member) + +//leo: macro changed out of unportable operator typeof +/** +* list_for_each_entry - iterate over list of given type +* @pos: the type * to use as a loop counter. +* @head: the head for your list. +* @member: the name of the list_struct within the struct. +* @type: typeof(*pos) +*/ +#define list_for_each_entry(pos, head, member,type) \ + for (pos = list_entry((head)->next, type, member); \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, type, member)) + + +//leo: macro changed out of unportable operator typeof +/** +* list_for_each_entry_reverse - iterate backwards over list of given type. +* @pos: the type * to use as a loop counter. +* @head: the head for your list. +* @member: the name of the list_struct within the struct. +* @type: typeof(*pos) +*/ +#define list_for_each_entry_reverse(pos, head, member,type) \ + for (pos = list_entry((head)->prev, type, member); \ + &pos->member != (head); \ + pos = list_entry(pos->member.prev, type, member)) + + +//leo: macro changed out of unportable operator typeof +/** +* list_for_each_entry_safe - iterate over list of given type safe against removal of list entry +* @pos: the type * to use as a loop counter. +* @n: another type * to use as temporary storage +* @head: the head for your list. +* @member: the name of the list_struct within the struct. +* @type: typeof(*pos) +* @type_n: typeof(*n) +*/ +#define list_for_each_entry_safe(pos, n, head, member,type,type_n) \ + for (pos = list_entry((head)->next, type, member), \ + n = list_entry(pos->member.next, type, member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, type_n, member)) + + +#endif diff --git a/branches/MTHCA/hw/mthca/kernel/mt_memory.c b/branches/MTHCA/hw/mthca/kernel/mt_memory.c index 7a08a793..23b95d26 100644 --- a/branches/MTHCA/hw/mthca/kernel/mt_memory.c +++ b/branches/MTHCA/hw/mthca/kernel/mt_memory.c @@ -1,34 +1,48 @@ -#include "hca_driver.h" +/* + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: packer.c 2730 2005-06-28 16:43:03Z sean.hefty $ + */ + #include "hca_driver.h" #include "mthca_dev.h" +#if defined (EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mt_memory.tmh" +#endif -void * alloc_pages( - IN struct mthca_dev *dev, - IN unsigned long cur_order, - OUT dma_addr_t *p_da) -{ - PHYSICAL_ADDRESS pa = {0}; - DMA_ADAPTER * p_dma = dev->ext->p_dma_adapter; - void * va = p_dma->DmaOperations->AllocateCommonBuffer( - p_dma, PAGE_SIZE << cur_order, &pa, FALSE ); - RtlZeroMemory( va, PAGE_SIZE << cur_order ); - *p_da = pa.QuadPart; - return va; -} - -void free_pages( - IN struct mthca_dev *dev, - IN unsigned long cur_order, - IN void *va, - IN dma_addr_t da) -{ - PHYSICAL_ADDRESS pa = {0}; - DMA_ADAPTER * p_dma = dev->ext->p_dma_adapter; - pa.QuadPart = da; - p_dma->DmaOperations->FreeCommonBuffer( - p_dma, PAGE_SIZE << cur_order, pa, va, FALSE ); -} - /* * Function: map user buffer to kernel and lock it * @@ -39,16 +53,16 @@ int get_user_pages( IN u64 start, /* address in user space */ IN int npages, /* size in pages */ IN int write_access, /* access rights */ - OUT void **pages, /* mapped kernel address */ - OUT PMDL *p_mdl /* MDL */ + OUT struct scatterlist *sg /* s/g list */ ) { PMDL mdl_p; - int size = PAGE_SIZE << npages; + int size = npages << PAGE_SHIFT; int access = (write_access) ? IoWriteAccess : IoReadAccess; int err; void * kva; /* kernel virtual address */ + HCA_ENTER(HCA_DBG_SHIM); ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL); /* allocate MDL */ @@ -68,7 +82,7 @@ int get_user_pages( __except (EXCEPTION_EXECUTE_HANDLER) { NTSTATUS Status = GetExceptionCode(); - printk(KERN_ERROR "Exception 0x%x on MmProbeAndLockPages(), addr 0xI64x, size %d\n", Status, start, size); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Exception 0x%x on MmProbeAndLockPages(), addr 0x%I64x, size %d\n", Status, start, size)); err = -EACCES; goto err1; } @@ -78,12 +92,15 @@ int get_user_pages( KernelMode, MmNonCached, NULL, FALSE, NormalPagePriority ); if (kva == NULL) { - printk(KERN_ERROR "MmMapLockedPagesSpecifyCache failed\n"); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("MmMapLockedPagesSpecifyCache failed\n")); goto err2; } - *pages = kva; - *p_mdl = mdl_p; + sg->page = kva; + sg->length = size; + sg->offset = (unsigned int)(start & ~PAGE_MASK); + sg->p_mdl = mdl_p; + sg->dma_address = MmGetPhysicalAddress(kva).QuadPart; return 0; err2: @@ -91,6 +108,7 @@ err2: err1: IoFreeMdl(mdl_p); err0: + HCA_EXIT(HCA_DBG_SHIM); return err; } @@ -103,33 +121,196 @@ void put_page(struct scatterlist *sg) IoFreeMdl(sg->p_mdl); } } + +VOID + AdapterListControl( + IN PDEVICE_OBJECT DeviceObject, + IN PIRP Irp, + IN PSCATTER_GATHER_LIST ScatterGather, + IN PVOID Context + ) +{ + struct scatterlist *p_sg = (struct scatterlist *)Context; + PSCATTER_GATHER_ELEMENT p_sg_el; + + UNREFERENCED_PARAMETER(DeviceObject); + UNREFERENCED_PARAMETER(Irp); + + // sanity checks + if (!ScatterGather || !Context) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("AdapterListControl failed: invalid parameters\n")); + return; + } + if (ScatterGather->NumberOfElements > 1) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("AdapterListControl failed: unexpected sg size; %d elements \n", + ScatterGather->NumberOfElements )); + } + if (ScatterGather->Elements[0].Length != p_sg->length) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("AdapterListControl failed: unexpected buffer size %#x (expected %#x) \n", + ScatterGather->Elements[0].Length, p_sg->length )); + } + + // results + p_sg->dma_address = ScatterGather->Elements[0].Address.QuadPart; // get logical address + p_sg->p_os_sg = ScatterGather; // store sg list address for releasing + //NB: we do not flush the buffers by FlushAdapterBuffers(), because we don't really transfer data +} + +/* Returns: the number of mapped sg elements */ +int pci_map_sg(struct mthca_dev *dev, + struct scatterlist *sg, int nents, int direction) +{ +#ifndef USE_GET_SG_LIST + + // mapping was performed in alloc_dma_mem + return nents; + +#else + + int i; + NTSTATUS status; + hca_dev_ext_t *p_ext = dev->ext; + struct scatterlist *p_sg = sg; + KIRQL irql = KeRaiseIrqlToDpcLevel(); + + for (i=0; ip_dma_adapter->DmaOperations->GetScatterGatherList( + p_ext->p_dma_adapter, p_ext->cl_ext.p_self_do, p_sg->p_mdl, p_sg->page, + p_sg->length, AdapterListControl, sg, (BOOLEAN)direction ); + if (!NT_SUCCESS(status)) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("GetScatterGatherList failed %#x\n", status))); + break; + } + } + KeLowerIrql(irql); + return i; /* i.e., we mapped all the entries */ + +#endif +} + +/* Returns: the number of unmapped sg elements */ +int pci_unmap_sg(struct mthca_dev *dev, + struct scatterlist *sg, int nents, int direction) +{ +#ifndef USE_GET_SG_LIST + + // mapping was performed in alloc_dma_mem + return nents; -void* alloc_dma_mem( +#else + + int i; + hca_dev_ext_t *p_ext = dev->ext; + struct scatterlist *p_sg = sg; + KIRQL irql = KeRaiseIrqlToDpcLevel(); + void *p_os_sg = p_sg->p_os_sg; + + for (i=0; ip_os_sg = NULL; + p_ext->p_dma_adapter->DmaOperations->PutScatterGatherList( + p_ext->p_dma_adapter, p_os_sg, (BOOLEAN)direction ); + } + KeLowerIrql(irql); + return i; /* i.e., we mapped all the entries */ + +#endif +} + +void *alloc_dma_mem( IN struct mthca_dev *dev, IN unsigned long size, - OUT dma_addr_t *p_da) + OUT struct scatterlist *p_sg) { - PHYSICAL_ADDRESS pa = {0}; - DMA_ADAPTER * p_dma = dev->ext->p_dma_adapter; - void * va = p_dma->DmaOperations->AllocateCommonBuffer( + void *va; + DMA_ADAPTER *p_dma = dev->ext->p_dma_adapter; + + +#ifndef USE_GET_SG_LIST + + PHYSICAL_ADDRESS pa; + ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL); + + RtlZeroMemory(p_sg,sizeof *p_sg); + p_sg->length = size; + va = p_dma->DmaOperations->AllocateCommonBuffer( p_dma, size, &pa, FALSE ); - *p_da = pa.QuadPart; + p_sg->dma_address = pa.QuadPart; + +#else + + int err; + PHYSICAL_ADDRESS la = {0}, ba = {0}, ha = {(u64)(-1I64)}; + + ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL); + + RtlZeroMemory(p_sg,sizeof *p_sg); + p_sg->length = size; + + // allocate memory + va = MmAllocateContiguousMemorySpecifyCache( + size, la, ha, ba, MmNonCached ); + if (!va) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("MmAllocateContiguousMemorySpecifyCache failed on %#x size\n", size ))); + goto err_alloc; + } + + // allocate MDL + p_sg->p_mdl = IoAllocateMdl( va, size, FALSE, FALSE, NULL ); + if (!p_sg->p_mdl) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("MmAllocateContiguousMemorySpecifyCache failed on %#x size\n", size ))); + goto err_mdl; + } + MmBuildMdlForNonPagedPool( p_sg->p_mdl ); + goto end; + +err_mdl: + MmFreeContiguousMemory(va); + va = NULL; +err_alloc: +end: + +#endif + + p_sg->page = va; return va; } void free_dma_mem( IN struct mthca_dev *dev, - IN unsigned long size, - IN void *va, - IN dma_addr_t da) + IN struct scatterlist *p_sg) { - PHYSICAL_ADDRESS pa = {0}; - DMA_ADAPTER * p_dma = dev->ext->p_dma_adapter; - pa.QuadPart = da; +#ifndef USE_GET_SG_LIST + + PHYSICAL_ADDRESS pa; + DMA_ADAPTER *p_dma = dev->ext->p_dma_adapter; + + ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL); + + pa.QuadPart = p_sg->dma_address; p_dma->DmaOperations->FreeCommonBuffer( - p_dma, size, pa, va, FALSE ); + p_dma, p_sg->length, pa, + p_sg->page, FALSE ); + +#else + + PMDL p_mdl = p_sg->p_mdl; + PVOID page = p_sg->page; + + ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL); + if (p_mdl) { + p_sg->p_mdl = NULL; + IoFreeMdl( p_mdl ); + } + if (page) { + p_sg->page = NULL; + MmFreeContiguousMemory(page); + } + +#endif } + typedef struct _mt_iobuf_seg { LIST_ENTRY link; PMDL mdl_p; @@ -143,7 +324,7 @@ static int register_segment( IN u64 va, IN u64 size, IN int is_user, - IN u32 acc, + IN ib_access_t acc, IN OUT mt_iobuf_t * iobuf_p) { PMDL mdl_p; @@ -154,7 +335,7 @@ static int register_segment( LOCK_OPERATION Operation; // set Operation - if (acc & IB_ACCESS_LOCAL_WRITE) + if (acc & IB_AC_LOCAL_WRITE) Operation = IoModifyAccess; else Operation = IoReadAccess; @@ -189,8 +370,8 @@ static int register_segment( __except (EXCEPTION_EXECUTE_HANDLER) { NTSTATUS Status = GetExceptionCode(); - HCA_TRACE( HCA_DBG_ERROR, - ("MOSAL_iobuf_register: Exception 0x%x on MmProbeAndLockPages(), va %p, sz %d\n", + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_SHIM, + ("MOSAL_iobuf_register: Exception 0x%x on MmProbeAndLockPages(), va %I64d, sz %I64d\n", Status, va, size)); rc = -EACCES; goto err_probe; @@ -217,7 +398,7 @@ int iobuf_register( IN u64 va, IN u64 size, IN int is_user, - IN int acc, + IN ib_access_t acc, IN OUT mt_iobuf_t *iobuf_p) { int rc; @@ -313,3 +494,5 @@ void iobuf_deregister(mt_iobuf_t *iobuf_p) } + + diff --git a/branches/MTHCA/hw/mthca/kernel/mt_memory.h b/branches/MTHCA/hw/mthca/kernel/mt_memory.h new file mode 100644 index 00000000..01b1f552 --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/mt_memory.h @@ -0,0 +1,275 @@ +#ifndef MT_MEMORY_H +#define MT_MEMORY_H + +#include "iba/ib_types.h" + +// =========================================== +// CONSTANTS +// =========================================== + +#define MT_TAG_ATOMIC 'MOTA' +#define MT_TAG_KERNEL 'LNRK' +#define MT_TAG_HIGH 'HGIH' +#define MT_TAG_PCIPOOL 'PICP' +#define MT_TAG_IOMAP 'PAMI' + +// =========================================== +// SUBSTITUTIONS +// =========================================== + +#define memcpy_toio memcpy + +// =========================================== +// MACROS +// =========================================== + +#define PAGE_MASK (~(PAGE_SIZE-1)) +#define NEXT_PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) + + +// =========================================== +// SYSTEM MEMORY +// =========================================== + +// memory +#define __GFP_NOWARN 0 /* Suppress page allocation failure warning */ +#define __GFP_HIGHMEM 0 + +#define GFP_ATOMIC 1 /* can't wait (i.e. DPC or higher) */ +#define GFP_KERNEL 2 /* can wait (npaged) */ +#define GFP_HIGHUSER 4 /* GFP_KERNEL, that can be in HIGH memory */ + + +#define SLAB_ATOMIC GFP_ATOMIC +#define SLAB_KERNEL GFP_KERNEL + +#if 1 +static inline void * kmalloc( SIZE_T bsize, unsigned int gfp_mask) +{ + void *ptr; + MT_ASSERT( KeGetCurrentIrql() <= DISPATCH_LEVEL ); + switch (gfp_mask) { + case GFP_ATOMIC: + ptr = ExAllocatePoolWithTag( NonPagedPool, bsize, MT_TAG_ATOMIC ); + break; + case GFP_KERNEL: + ptr = ExAllocatePoolWithTag( NonPagedPool, bsize, MT_TAG_KERNEL ); + break; + case GFP_HIGHUSER: + ptr = ExAllocatePoolWithTag( NonPagedPool, bsize, MT_TAG_HIGH ); + break; + default: + DbgPrint("kmalloc: unsupported flag %d\n", gfp_mask); + ptr = NULL; + break; + } + return ptr; +} +#else +#define kmalloc(bsize,flags) ExAllocatePoolWithTag( NonPagedPool, bsize, MT_TAG_KERNEL ) +#endif + +static inline void * kzalloc( SIZE_T bsize, unsigned int gfp_mask) +{ + void* va = kmalloc(bsize, gfp_mask); + if (va) + RtlZeroMemory(va, bsize); + return va; +} + +static inline void kfree (const void *pobj) +{ + MT_ASSERT( KeGetCurrentIrql() <= DISPATCH_LEVEL ); + if (pobj) + ExFreePool((void *)pobj); +} + +#define get_zeroed_page(mask) kzalloc(PAGE_SIZE, mask) +#define free_page(ptr) kfree(ptr) + + +// =========================================== +// IO SPACE <==> SYSTEM MEMORY +// =========================================== + + +/** +* ioremap - map bus memory into CPU space +* @offset: bus address of the memory +* @size: size of the resource to map +* +* ioremap performs a platform specific sequence of operations to +* make bus memory CPU accessible via the readb/readw/readl/writeb/ +* writew/writel functions and the other mmio helpers. The returned +* address is not guaranteed to be usable directly as a virtual +* address. +*/ +static inline void *ioremap(io_addr_t addr, SIZE_T size, SIZE_T* psize) +{ + PHYSICAL_ADDRESS pa; + void *va; + + MT_ASSERT( KeGetCurrentIrql() <= DISPATCH_LEVEL ); + pa.QuadPart = addr; + va = MmMapIoSpace( pa, size, MmNonCached ); + *psize = size; + return va; +} + +static inline void iounmap(void *va, SIZE_T size) +{ + MmUnmapIoSpace( va, size); +} + + // =========================================== + // DMA SUPPORT + // =========================================== + +#define PCI_DMA_BIDIRECTIONAL 0 +#define PCI_DMA_TODEVICE 1 +#define PCI_DMA_FROMDEVICE 2 +#define DMA_TO_DEVICE PCI_DMA_TODEVICE + + struct scatterlist { + dma_addr_t dma_address; /* logical (device) address */ + void * page; /* kernel virtual address */ + PMDL p_mdl; /* MDL, if any (used for user space buffers) */ + PSCATTER_GATHER_LIST p_os_sg; /* adapter scatter-gather list */ + unsigned int offset; /* offset in the first page */ + unsigned int length; /* buffer length */ + }; + + #define sg_dma_address(sg) ((sg)->dma_address) + #define sg_dma_len(sg) ((sg)->length) + + int pci_map_sg(struct mthca_dev *dev, + struct scatterlist *sg, int nents, int direction); + + int pci_unmap_sg(struct mthca_dev *dev, + struct scatterlist *sg, int nents, int direction); + + void free_dma_mem( + IN struct mthca_dev *dev, + IN struct scatterlist *p_sg); + + void *alloc_dma_mem( + IN struct mthca_dev *dev, + IN unsigned long size, + OUT struct scatterlist *p_sg); + +static inline void *alloc_dma_zmem( + IN struct mthca_dev *dev, + IN unsigned long size, + OUT struct scatterlist *p_sg) +{ + void *va = alloc_dma_mem( dev, size, p_sg ); + if (va) + RtlZeroMemory(va, size); + return va; +} + +static inline void *alloc_dma_zmem_map( + IN struct mthca_dev *dev, + IN unsigned long size, + IN int direction, + OUT struct scatterlist *p_sg) +{ + void *va = alloc_dma_zmem( dev, size, p_sg ); + if (va) { + RtlZeroMemory(va, size); + if (!pci_map_sg( dev, p_sg, 1, direction )) { + free_dma_mem( dev, p_sg ); + va = NULL; + } + } + return va; +} + +static inline void free_dma_mem_map( + IN struct mthca_dev *dev, + IN struct scatterlist *p_sg, + IN int direction ) +{ + pci_unmap_sg( dev, p_sg, 1, direction ); + free_dma_mem( dev, p_sg ); +} + + static inline dma_addr_t pci_mape_page(struct mthca_dev *dev, + void *va, unsigned long offset, SIZE_T size, int direction) + { + UNREFERENCED_PARAMETER(dev); + UNREFERENCED_PARAMETER(va); + UNREFERENCED_PARAMETER(offset); + UNREFERENCED_PARAMETER(size); + UNREFERENCED_PARAMETER(direction); + /* suppose, that pages where always translated to DMA space */ + return 0; /* i.e., we unmapped all the entries */ + } + + // =========================================== + // HELPERS + // =========================================== + + static inline int get_order(unsigned long size) +{ + int order; + + size = (size-1) >> (PAGE_SHIFT-1); + order = -1; + do { + size >>= 1; + order++; + } while (size); + return order; +} + +static inline int long_log2(unsigned long x) +{ + int r = 0; + for (x >>= 1; x > 0; x >>= 1) + r++; + return r; +} + +static inline unsigned long roundup_pow_of_two(unsigned long x) +{ + return (1UL << fls(x - 1)); +} + +// =========================================== +// PROTOTYPES +// =========================================== + +void put_page(struct scatterlist *sg); +int get_user_pages( + IN struct mthca_dev *dev, /* device */ + IN u64 start, /* address in user space */ + IN int npages, /* size in pages */ + IN int write_access, /* access rights */ + OUT struct scatterlist *sg /* s/g list */ + ); + +typedef struct _mt_iobuf { + u64 va; /* virtual address of the buffer */ + u64 size; /* size in bytes of the buffer */ + LIST_ENTRY seg_que; + u32 nr_pages; + int is_user; + int seg_num; +} mt_iobuf_t; + + +void iobuf_deregister(mt_iobuf_t *iobuf_p); +int iobuf_register( + IN u64 va, + IN u64 size, + IN int is_user, + IN ib_access_t acc, + IN OUT mt_iobuf_t *iobuf_p); + + +unsigned long copy_from_user(void *to, const void *from, unsigned long n); +unsigned long copy_to_user(void *to, const void *from, unsigned long n); + + +#endif diff --git a/branches/MTHCA/hw/mthca/kernel/mt_packer.c b/branches/MTHCA/hw/mthca/kernel/mt_packer.c index 66a9f674..afba3a66 100644 --- a/branches/MTHCA/hw/mthca/kernel/mt_packer.c +++ b/branches/MTHCA/hw/mthca/kernel/mt_packer.c @@ -34,16 +34,22 @@ */ #include +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mt_packer.tmh" +#endif static u64 value_read(int offset, int size, u8 *structure) { switch (size) { case 1: return *(u8 *) (structure + offset); - case 2: return be16_to_cpup((__be16 *) (structure + offset)); - case 4: return be32_to_cpup((__be32 *) (structure + offset)); - case 8: return be64_to_cpup((__be64 *) (structure + offset)); + case 2: return cl_ntoh16(*(__be16 *) (structure + offset)); + case 4: return cl_ntoh32(*(__be32 *) (structure + offset)); + case 8: return cl_ntoh64(*(__be64 *) (structure + offset)); default: - printk(KERN_WARNING "Field size %d bits not handled\n", size * 8); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Field size %d bits not handled\n", size * 8)); return 0; } } @@ -64,6 +70,7 @@ void ib_pack(const struct ib_field *desc, u8 *buf) { int i; + CPU_2_BE64_PREP; for (i = 0; i < desc_len; ++i) { if (desc[i].size_bits <= 32) { @@ -80,9 +87,9 @@ void ib_pack(const struct ib_field *desc, else val = 0; - mask = cpu_to_be32(((1ull << desc[i].size_bits) - 1) << shift); + mask = cl_hton32(((1ull << desc[i].size_bits) - 1) << shift); addr = (__be32 *) buf + desc[i].offset_words; - *addr = (*addr & ~mask) | (cpu_to_be32(val) & mask); + *addr = (*addr & ~mask) | (cl_hton32(val) & mask); } else if (desc[i].size_bits <= 64) { int shift; u64 val; @@ -97,15 +104,15 @@ void ib_pack(const struct ib_field *desc, else val = 0; - mask = cpu_to_be64((~0ull >> (64 - desc[i].size_bits)) << shift); + mask = CPU_2_BE64((~0ull >> (64 - desc[i].size_bits)) << shift); addr = (__be64 *) ((__be32 *) buf + desc[i].offset_words); - *addr = (*addr & ~mask) | (cpu_to_be64(val) & mask); + *addr = (*addr & ~mask) | (cl_hton64(val) & mask); } else { if (desc[i].offset_bits % 8 || desc[i].size_bits % 8) { - printk(KERN_WARNING "Structure field %s of size %d " + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Structure field %s of size %d " "bits is not byte-aligned\n", - desc[i].field_name, desc[i].size_bits); + desc[i].field_name, desc[i].size_bits)); } if (desc[i].struct_size_bytes) @@ -119,17 +126,16 @@ void ib_pack(const struct ib_field *desc, } } } -EXPORT_SYMBOL(ib_pack); static void value_write(int offset, int size, u64 val, u8 *structure) { switch (size * 8) { case 8: *( u8 *) (structure + offset) = (u8)val; break; - case 16: *(__be16 *) (structure + offset) = cpu_to_be16(val); break; - case 32: *(__be32 *) (structure + offset) = cpu_to_be32(val); break; - case 64: *(__be64 *) (structure + offset) = cpu_to_be64(val); break; + case 16: *(__be16 *) (structure + offset) = cl_hton16((u16)val); break; + case 32: *(__be32 *) (structure + offset) = cl_hton32((u32)val); break; + case 64: *(__be64 *) (structure + offset) = cl_hton64(val); break; default: - printk(KERN_WARNING "Field size %d bits not handled\n", size * 8); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Field size %d bits not handled\n", size * 8)); } } @@ -163,7 +169,7 @@ void ib_unpack(const struct ib_field *desc, shift = 32 - desc[i].offset_bits - desc[i].size_bits; mask = ((1ull << desc[i].size_bits) - 1) << shift; addr = (__be32 *) buf + desc[i].offset_words; - val = (be32_to_cpup(addr) & mask) >> shift; + val = (cl_ntoh32(*addr) & mask) >> shift; value_write(desc[i].struct_offset_bytes, desc[i].struct_size_bytes, val, @@ -177,7 +183,7 @@ void ib_unpack(const struct ib_field *desc, shift = 64 - desc[i].offset_bits - desc[i].size_bits; mask = (~0ull >> (64 - desc[i].size_bits)) << shift; addr = (__be64 *) buf + desc[i].offset_words; - val = (be64_to_cpup(addr) & mask) >> shift; + val = (cl_ntoh64(*addr) & mask) >> shift; value_write(desc[i].struct_offset_bytes, desc[i].struct_size_bytes, val, @@ -185,9 +191,9 @@ void ib_unpack(const struct ib_field *desc, } else { if (desc[i].offset_bits % 8 || desc[i].size_bits % 8) { - printk(KERN_WARNING "Structure field %s of size %d " + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Structure field %s of size %d " "bits is not byte-aligned\n", - desc[i].field_name, desc[i].size_bits); + desc[i].field_name, desc[i].size_bits)); } memcpy(structure + desc[i].struct_offset_bytes, @@ -197,4 +203,3 @@ void ib_unpack(const struct ib_field *desc, } } } -EXPORT_SYMBOL(ib_unpack); diff --git a/branches/MTHCA/hw/mthca/kernel/mt_pci.h b/branches/MTHCA/hw/mthca/kernel/mt_pci.h new file mode 100644 index 00000000..83947ef7 --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/mt_pci.h @@ -0,0 +1,115 @@ +#ifndef MT_PCI_H +#define MT_PCI_H + +// =========================================== +// LITERALS +// =========================================== + +#ifndef PCI_VENDOR_ID_MELLANOX +#define PCI_VENDOR_ID_MELLANOX 0x15b3 +#endif + +#ifndef PCI_DEVICE_ID_MELLANOX_TAVOR +#define PCI_DEVICE_ID_MELLANOX_TAVOR 0x5a44 +#endif + +#ifndef PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT +#define PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT 0x6278 +#endif + +#ifndef PCI_DEVICE_ID_MELLANOX_ARBEL +#define PCI_DEVICE_ID_MELLANOX_ARBEL 0x6282 +#endif + +#ifndef PCI_DEVICE_ID_MELLANOX_SINAI_OLD +#define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c +#endif + +#ifndef PCI_DEVICE_ID_MELLANOX_SINAI +#define PCI_DEVICE_ID_MELLANOX_SINAI 0x6274 +#endif + +#ifndef PCI_VENDOR_ID_TOPSPIN +#define PCI_VENDOR_ID_TOPSPIN 0x1867 +#endif + + +// =========================================== +// TYPES +// =========================================== + + +// =========================================== +// MACROS/FUNCTIONS +// =========================================== + +// get bar boundaries +#if 1 +#define pci_resource_start(dev,bar_num) ((dev)->ext->bar[bar_num].phys) +#define pci_resource_len(dev,bar_num) ((dev)->ext->bar[bar_num].size) +#else +static inline uint64_t pci_resource_start(struct mthca_dev *dev, int bar_num) +{ + return dev->ext->bar[bar_num].phys; +} +#endif + + +// i/o to registers + +static inline u64 readq(const volatile void __iomem *addr) +{ + //TODO: write atomic implementation of _IO_READ_QWORD and change mthca_doorbell.h + u64 val; + READ_REGISTER_BUFFER_ULONG((PULONG)(addr), (PULONG)&val, 2 ); + return val; +} + +static inline u32 readl(const volatile void __iomem *addr) +{ + return READ_REGISTER_ULONG((PULONG)(addr)); +} + +static inline u16 reads(const volatile void __iomem *addr) +{ + return READ_REGISTER_USHORT((PUSHORT)(addr)); +} + +static inline u8 readb(const volatile void __iomem *addr) +{ + return READ_REGISTER_UCHAR((PUCHAR)(addr)); +} + +#define __raw_readq readq +#define __raw_readl readl +#define __raw_reads reads +#define __raw_readb readb + +static inline void writeq(unsigned __int64 val, volatile void __iomem *addr) +{ + //TODO: write atomic implementation of _IO_WRITE_QWORD and change mthca_doorbell.h + WRITE_REGISTER_BUFFER_ULONG( (PULONG)(addr), (PULONG)&val, 2 ); +} + +static inline void writel(unsigned int val, volatile void __iomem *addr) +{ + WRITE_REGISTER_ULONG((PULONG)(addr),val); +} + +static inline void writes(unsigned short val, volatile void __iomem *addr) +{ + WRITE_REGISTER_USHORT((PUSHORT)(addr),val); +} + +static inline void writeb(unsigned char val, volatile void __iomem *addr) +{ + WRITE_REGISTER_UCHAR((PUCHAR)(addr),val); +} + +#define __raw_writeq writeq +#define __raw_writel writel +#define __raw_writes writes +#define __raw_writeb writeb + +#endif + diff --git a/branches/MTHCA/hw/mthca/kernel/mt_pcipool.h b/branches/MTHCA/hw/mthca/kernel/mt_pcipool.h new file mode 100644 index 00000000..1270293d --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/mt_pcipool.h @@ -0,0 +1,110 @@ +#ifndef MT_PCIPOOL_H +#define MT_PCIPOOL_H + +typedef struct pci_pool { +#ifdef WIN_TO_BE_REMOVED + struct list_head page_list; + spinlock_t lock; + size_t blocks_per_page; + size_t allocation; + wait_queue_head_t waitq; + struct list_head pools; +#endif + size_t size; + struct mthca_dev *mdev; + char name [32]; + NPAGED_LOOKASIDE_LIST pool_hdr; +} pci_pool_t; + +// taken from dmapool.c + +/** +* pci_pool_create - Creates a pool of consistent memory blocks, for dma. +* @name: name of pool, for diagnostics +* @mdev: device that will be doing the DMA +* @size: size of the blocks in this pool. +* @align: alignment requirement for blocks; must be a power of two +* @allocation: returned blocks won't cross this boundary (or zero) +* Context: !in_interrupt() +* +* Returns a dma allocation pool with the requested characteristics, or +* null if one can't be created. Given one of these pools, dma_pool_alloc() +* may be used to allocate memory. Such memory will all have "consistent" +* DMA mappings, accessible by the device and its driver without using +* cache flushing primitives. The actual size of blocks allocated may be +* larger than requested because of alignment. +* +* If allocation is nonzero, objects returned from dma_pool_alloc() won't + * cross that size boundary. This is useful for devices which have + * addressing restrictions on individual DMA transfers, such as not crossing + * boundaries of 4KBytes. + */ + +pci_pool_t * +pci_pool_create (const char *name, struct mthca_dev *mdev, + size_t size, size_t align, size_t allocation); + +/** + * dma_pool_alloc - get a block of consistent memory + * @pool: dma pool that will produce the block + * @mem_flags: GFP_* bitmask + * @handle: pointer to dma address of block + * + * This returns the kernel virtual address of a currently unused block, + * and reports its dma address through the handle. + * If such a memory block can't be allocated, null is returned. + */ +static inline void * +pci_pool_alloc (pci_pool_t *pool, int mem_flags, dma_addr_t *handle) +{ + PHYSICAL_ADDRESS pa; + void * ptr; + + MT_ASSERT( KeGetCurrentIrql() <= DISPATCH_LEVEL ); + + ptr = ExAllocateFromNPagedLookasideList( &pool->pool_hdr ); + if (ptr != NULL) { + pa = MmGetPhysicalAddress( ptr ); + *handle = pa.QuadPart; + } + return ptr; +} + + +/** +* dma_pool_free - put block back into dma pool +* @pool: the dma pool holding the block +* @vaddr: virtual address of block +* @dma: dma address of block +* +* Caller promises neither device nor driver will again touch this block +* unless it is first re-allocated. +*/ +static inline void +pci_pool_free (pci_pool_t *pool, void *vaddr, dma_addr_t dma) +{ + UNREFERENCED_PARAMETER(dma); + MT_ASSERT( KeGetCurrentIrql() <= DISPATCH_LEVEL ); + ExFreeToNPagedLookasideList( &pool->pool_hdr, vaddr ); +} + + + +/** + * pci_pool_destroy - destroys a pool of dma memory blocks. + * @pool: dma pool that will be destroyed + * Context: !in_interrupt() + * + * Caller guarantees that no more memory from the pool is in use, + * and that nothing will try to use the pool after this call. + */ +static inline void +pci_pool_destroy (pci_pool_t *pool) +{ + ExDeleteNPagedLookasideList( &pool->pool_hdr ); + ExFreePool( pool); +} + + + +#endif diff --git a/branches/MTHCA/hw/mthca/kernel/mt_reset_tavor.c b/branches/MTHCA/hw/mthca/kernel/mt_reset_tavor.c new file mode 100644 index 00000000..82331333 --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/mt_reset_tavor.c @@ -0,0 +1,641 @@ +#include +#include +#include +#include "hca_driver.h" +#include "mthca.h" +#include "hca_debug.h" +#include "Mt_l2w.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mt_reset_tavor.tmh" +#endif + + +#pragma warning(disable : 4996) + +/* limitations */ +#define N_BUSES 16 /* max number of PCI buses */ +#define N_DEVICES 32 /* max number of devices on one bus */ +#define N_FUNCTIONS 8 /* max number of functions on one device */ +#define N_CARDS 8 /* max number of HCA cards */ + +/*----------------------------------------------------------------*/ + +PWCHAR +WcharFindChar( + IN PWCHAR pi_BufStart, + IN PWCHAR pi_BufEnd, + IN WCHAR pi_FromPattern, + IN WCHAR pi_ToPattern + ) +/*++ + +Routine Description: + Converts wide-character string into ASCII + +Arguments: + + pi_BufStart.......... start of the source string + pi_BufEnd............ end of the source string + pi_FromPattern....... start of pattern range to find + pi_ToPattern......... end of pattern range to find + +Return Value: + + pointer to the first pattern found or NULL (when reached the end) + +--*/ +{ /* WcharFindChar */ + + PWCHAR l_pResult = pi_BufStart; + + while (l_pResult < pi_BufEnd ) + { + if (*l_pResult >= pi_FromPattern && *l_pResult <= pi_ToPattern) + return l_pResult; + l_pResult++; + } + + return NULL; + +} /* WcharFindChar */ + +/*----------------------------------------------------------------*/ + +/* + * Function: PciFindDeviceByBusAndId + * + * Parameters: + * IN pi_Bus - a bus, to start the scan + * IN pi_DevId - Device Id to search + * INOUT po_pDevFunc - pointer to dev/func, from which to start the search + * + * Returns: + * FALSE - device not found + * TRUE - a device was found; *po_pDevFunc contains its location + * + * Description: + * The function is intended for iterative search on one bus. + * It looks for the device of pi_DevId id, starting from device with + * *po_pDevFunc location. When it finds the next device of that id, it updates + * *po_pDevFunc with the found device' location + * + */ +BOOLEAN PciFindDeviceByBusAndId( + IN ULONG pi_Bus, + IN ULONG pi_DevId, + IN OUT PULONG po_pDevFunc ) +{ + ULONG l_DevId; + ULONG l_Bytes; + ULONG l_Device; + ULONG l_Function; + + // calculate, where to start the search + l_Device = *po_pDevFunc & 0x01f; + l_Function = (*po_pDevFunc >> 5) & 7; + for (; l_Device < N_DEVICES; l_Device++, l_Function = 0 ) { + for (; l_Function < N_FUNCTIONS; l_Function++ ) { + l_Bytes = HalGetBusDataByOffset( + PCIConfiguration, + pi_Bus, + l_Device |(l_Function<<5), + (PVOID)&l_DevId, + 0, + sizeof(ULONG) + ); + if (l_Bytes != sizeof(ULONG)) + continue; /* as if - "not found" */ + if (l_DevId == pi_DevId) + goto ExitFound; + } + } + return FALSE; + +ExitFound: + *po_pDevFunc = l_Device |(l_Function<<5); + return TRUE; +} + +/*----------------------------------------------------------------*/ + +/* + * Function: PciFindDeviceById + * + * Parameters: + * IN pi_DevId - Device Id to search + * INOUT po_pBus - pointer to bus number, from which to start the search + * INOUT po_pDevFunc - pointer to dev/func, from which to start the search + * + * Returns: + * FALSE - device was not found + * TRUE - a device was found; *po_pBus/*po_pDevFunc contain its location + * + * Description: + * The function is intended for an iterative search. + * It looks for the device of pi_DevId id, starting from device with *po_pBus and + * *po_pDevFunc location. When it finds the next device of that id, updates *po_pBus + * and *po_pDevFunc with the found device' location + * + */ +static +BOOLEAN PciFindDeviceById( + IN ULONG pi_DevId, + IN OUT PULONG po_pBus, + IN OUT PULONG po_pDevFunc ) +{ + ULONG l_Bus; + ULONG l_DevFunc = *po_pDevFunc; + + for (l_Bus= *po_pBus; l_Bus < N_BUSES; l_Bus++, l_DevFunc=0) { + if (PciFindDeviceByBusAndId(l_Bus, pi_DevId, &l_DevFunc)) + break; + } + if (l_Bus >= N_BUSES) + return FALSE; + + // found + *po_pBus = l_Bus; + *po_pDevFunc = l_DevFunc; + return TRUE; +} + +/*----------------------------------------------------------------*/ + +/* + * Function: PciFindBridgeByBus + * + * Parameters: + * IN pi_SecBus - bus number of an HCA in question + * OUT po_pBus - pointer to bus number of the bridge of the HCA, if found + * OUT po_pDevFunc - pointer to dev/func of the bridge of the HCA, if found + * + * Returns: + * FALSE - the bridge not found + * TRUE - a device was found; *po_pBus/*po_pDevFunc contain its location + * + * Description: + * The function scans all the buses to find the Bridge of an HCA device, found on bus pi_SecBus. + * The appropiate bridge must have its PrimaryBus field in PCI cfg header equal to pi_SecBus. + * + */ +static BOOLEAN PciFindBridgeByBus( + IN ULONG pi_SecBus, + OUT PULONG po_pBus, + OUT PULONG po_pDevFunc ) +{ + ULONG l_DevFunc=0, l_Bus=0; + ULONG l_DevId = ((int)(23110) << 16) | PCI_VENDOR_ID_MELLANOX; + ULONG l_SecBus, l_tmp, l_Bytes; + ULONG l_Device; + ULONG l_Function; + + while (1) { + /* look for a bridge */ + if (!PciFindDeviceById(l_DevId, &l_Bus, &l_DevFunc)) + return FALSE; /* bridge not found */ + + /* found a bridge -check, whether it is ours */ + l_Bytes = HalGetBusDataByOffset( + PCIConfiguration, + l_Bus, + l_DevFunc, + (PVOID)&l_tmp, + 24, /* 24 - PrimaryBus, 25 - SecondaryBus, 26 - SubordinateBus */ + sizeof(ULONG) + ); + if (l_Bytes != sizeof(ULONG)) + goto NextDevice; /* as if - "not found" */ + + l_SecBus = (l_tmp >> 16) & 255; + if ( l_SecBus == pi_SecBus ) + break; /* found !!! */ + +NextDevice: + // calculate, where to continue the search + l_Device = l_DevFunc & 0x01f; + l_Function = (l_DevFunc >> 5) & 7; + l_Function++; + if (l_Function >= N_FUNCTIONS) { + l_Function = 0; + l_Device++; + if (l_Device >= N_DEVICES) { + l_Device = 0; + l_Bus++; + } + if (l_Bus >= N_BUSES) + return FALSE; + } + l_DevFunc = l_Device |(l_Function<<5);; + } + + *po_pBus = l_Bus; + *po_pDevFunc = l_DevFunc; + return TRUE; +} + +/*----------------------------------------------------------------*/ + +/* + * Function: MdGetDevLocation + * + * Parameters: + * IN pi_pPdo - PDO of a device in question + * OUT po_pBus - pointer to the bus number of the device in question + * OUT po_pDevFunc - pointer to dev/func of the device, if found + * + * Returns: + * not STATUS_SUCCESS - the device location was not found + * STATUS_SUCCESS - the device location was found and returned in OUT parameters + * + * Description: + * The function uses IoGetDeviceProperty to get the location of a device with given PDO + * + */ +static NTSTATUS +MdGetDevLocation( + IN PDEVICE_OBJECT pi_pPdo, + OUT ULONG * po_pBus, + OUT ULONG * po_pDevFunc + ) +{ + ULONG l_BusNumber, l_DevNumber, l_Function, l_ResultLength = 0; + WCHAR l_Buffer[40], *l_pEnd, *l_pBuf = l_Buffer, *l_pBufEnd = l_Buffer + sizeof(l_Buffer); + NTSTATUS l_Status; + UNICODE_STRING l_UnicodeNumber; + + /* prepare */ + l_ResultLength = 0; + RtlZeroMemory( l_Buffer, sizeof(l_Buffer) ); + + /* Get the device number */ + l_Status = IoGetDeviceProperty(pi_pPdo, + DevicePropertyLocationInformation, sizeof(l_Buffer), &l_Buffer, &l_ResultLength); + + /* Verify if the function was successful */ + if ( !NT_SUCCESS(l_Status) || !l_ResultLength ) { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("(MdGetDevLocation) Unable to get device number: Status 0x%x, ResultSize %d \n", + l_Status, l_ResultLength )); + goto exit; + } + + // ALL THE BELOW CRAP WE DO INSTEAD OF + // sscanf(l_Buffer, "PCI bus %d, device %d, function %d", &l_BusNumber, &l_DevNumber, &l_Function ); + + /* take bus number */ + l_pBuf = WcharFindChar( l_pBuf, l_pBufEnd, L'0', L'9' ); + if (l_pBuf == NULL) goto err; + l_pEnd = WcharFindChar( l_pBuf, l_pBufEnd, L',', L',' ); + if (l_pEnd == NULL) goto err; + l_UnicodeNumber.Length = l_UnicodeNumber.MaximumLength = (USHORT)((PCHAR)l_pEnd - (PCHAR)l_pBuf); + l_UnicodeNumber.Buffer = l_pBuf; l_pBuf = l_pEnd; + RtlUnicodeStringToInteger( &l_UnicodeNumber, 10, &l_BusNumber); + + /* take slot number */ + l_pBuf = WcharFindChar( l_pBuf, l_pBufEnd, L'0', L'9' ); + if (l_pBuf == NULL) goto err; + l_pEnd = WcharFindChar( l_pBuf, l_pBufEnd, L',', L',' ); + if (l_pEnd == NULL) goto err; + l_UnicodeNumber.Length = l_UnicodeNumber.MaximumLength = (USHORT)((PCHAR)l_pEnd - (PCHAR)l_pBuf); + l_UnicodeNumber.Buffer = l_pBuf; l_pBuf = l_pEnd; + RtlUnicodeStringToInteger( &l_UnicodeNumber, 10, &l_DevNumber); + + /* take function number */ + *(l_Buffer + (l_ResultLength>>1)) = 0; /* set end of string */ + l_pBuf = WcharFindChar( l_pBuf, l_pBufEnd, L'0', L'9' ); + if (l_pBuf == NULL) goto err; + l_pEnd = WcharFindChar( l_pBuf, l_pBufEnd, 0, 0 ); + if (l_pEnd == NULL) goto err; + l_UnicodeNumber.Length = l_UnicodeNumber.MaximumLength = (USHORT)((PCHAR)l_pEnd - (PCHAR)l_pBuf); + l_UnicodeNumber.Buffer = l_pBuf; l_pBuf = l_pEnd; + RtlUnicodeStringToInteger( &l_UnicodeNumber, 10, &l_Function); + + /* return the results */ + *po_pBus = l_BusNumber; + *po_pDevFunc = (l_DevNumber & 0x01f) | ((l_Function & 7) << 5); + + goto exit; + +err: + l_Status = STATUS_UNSUCCESSFUL; +exit: + return l_Status; +} + + +/*------------------------------------------------------------------------------------------------------*/ + +/* + * Function: PciFindPdoByPdoAndLocation + * + * Parameters: + * IN pi_pPdo - PDO of HCA's bus device + * IN pi_Bus, pi_DevFunc - bridge location + * OUT po_pPdo - pointer to PDO of the bridge, when found + * + * Returns: + * FALSE - the bridge was not found + * TRUE - a device was found; *po_pPdo contains its PDO + * + * Description: + * The function finds PDO of a Tavor bridge device by scanning through all the + * devices of the PCI.SYS driver + * + * Note: + * It is a "hack" algorithm. It uses some fields of system structures and some + * optimistic assumptions - see more below + */ +static BOOLEAN PciFindPdoByPdoAndLocation( + IN PDEVICE_OBJECT pi_pPdo, + IN ULONG pi_Bus, + IN ULONG pi_DevFunc, + OUT PDEVICE_OBJECT * po_pPdo ) +{ + PDRIVER_OBJECT l_pDrv; + PDEVICE_OBJECT l_pPdo; + NTSTATUS l_Status; + ULONG l_Bus, l_DevFunc; + // suppose that there is no more than N_PCI_DEVICES, belonging to PCI.SYS + #define N_PCI_DEVICES 256 + // suppose that the PDO objects, once created, never get moved + PDEVICE_OBJECT pdo[N_PCI_DEVICES]; + int i, n_pdos = 0; + + // suppose, that PDOs are added only at PASSIVE_LEVEL + KIRQL irql = KeRaiseIrqlToDpcLevel(); + + // get to the PCI.SYS driver + l_pDrv = pi_pPdo->DriverObject; + + // find and store all bus PDO s (because the bridge is a bus enumerated device) + for ( l_pPdo = l_pDrv->DeviceObject; l_pPdo; l_pPdo = l_pPdo->NextDevice ) { + if ( l_pPdo->Flags & DO_BUS_ENUMERATED_DEVICE ) { + pdo[n_pdos] = l_pPdo; + if (++n_pdos >= N_PCI_DEVICES) + break; + } + } + + // return to previous level + KeLowerIrql(irql); + + // loop over all the PCI driver devices + l_pPdo = NULL; /* mark, that we didn't find PDO */ + for ( i = 0; i < n_pdos; ++i ) { + // get the location of the device of that PDO + l_Status = MdGetDevLocation( pdo[i], &l_Bus, &l_DevFunc ); + if (l_Status != STATUS_SUCCESS) + continue; + // check, whether it's our device + if (l_Bus == pi_Bus && l_DevFunc == pi_DevFunc) { + l_pPdo = pdo[i]; + break; + } + } + + // check whether we found the PDO + if (!l_pPdo) + return FALSE; + *po_pPdo = l_pPdo; + return TRUE; +} + +/*----------------------------------------------------------------*/ + +/* Function: SendAwaitIrpCompletion + * + * Parameters: + * + * Description: + * IRP completion routine + * + * Returns: + * pointer to the entry on SUCCESS + * NULL - otherwise + * +*/ +static +NTSTATUS +SendAwaitIrpCompletion ( + IN PDEVICE_OBJECT DeviceObject, + IN PIRP Irp, + IN PVOID Context + ) +{ + UNREFERENCED_PARAMETER (DeviceObject); + KeSetEvent ((PKEVENT) Context, IO_NO_INCREMENT, FALSE); + return STATUS_MORE_PROCESSING_REQUIRED; // Keep this IRP +} + +/*------------------------------------------------------------------------------------------------------*/ + +/* + * Function: SendAwaitIrp + * + * Description: + * Create and send IRP stack down the stack and wait for the response (Blocking Mode) + * + * Parameters: + * pi_pDeviceExt.......... ointer to USB device extension + * pi_MajorCode........... IRP major code + * pi_MinorCode........... IRP minor code + * pi_pBuffer............. parameter buffer + * pi_nSize............... size of the buffer + * po_pInfo.............. returned field Information from IoStatus block + * + * Returns: + * pointer to the entry on SUCCESS + * NULL - otherwise + * +*/ +static +NTSTATUS +SendAwaitIrp( + IN PDEVICE_OBJECT pi_pFdo, + IN PDEVICE_OBJECT pi_pLdo, + IN ULONG pi_MajorCode, + IN ULONG pi_MinorCode, + IN PVOID pi_pBuffer, + IN int pi_nSize, + OUT PVOID * po_pInfo + ) +/*++ + + Routine Description: + + Create and send IRP stack down the stack and wait for the response ( +Blocking Mode) + + Arguments: + + pi_pFdo................ our device + pi_pLdo................ lower device + pi_MajorCode........... IRP major code + pi_MinorCode........... IRP minor code + pi_pBuffer............. parameter buffer + pi_nSize............... size of the buffer + + Returns: + + standard NTSTATUS return codes. + + Notes: + +--*/ +{ /* SendAwaitIrp */ + // Event + KEVENT l_hEvent; + // Pointer to IRP + PIRP l_pIrp; + // Stack location + PIO_STACK_LOCATION l_pStackLocation; + // Returned status + NTSTATUS l_Status; + + // call validation + if(KeGetCurrentIrql() != PASSIVE_LEVEL) + return STATUS_SUCCESS; + + // create event + KeInitializeEvent(&l_hEvent, NotificationEvent, FALSE); + + // build IRP request to USBD driver + l_pIrp = IoAllocateIrp( pi_pFdo->StackSize, FALSE ); + + // validate request + if (!l_pIrp) + { + //MdKdPrint( DBGLVL_MAXIMUM, ("(SendAwaitIrp) Unable to allocate IRP !\n")); + return STATUS_INSUFFICIENT_RESOURCES; + } + + // fill IRP + l_pIrp->IoStatus.Status = STATUS_NOT_SUPPORTED; + + // set completion routine + IoSetCompletionRoutine(l_pIrp,SendAwaitIrpCompletion, &l_hEvent, TRUE, TRUE, TRUE); + + // fill stack location + l_pStackLocation = IoGetNextIrpStackLocation(l_pIrp); + l_pStackLocation->MajorFunction= (UCHAR)pi_MajorCode; + l_pStackLocation->MinorFunction= (UCHAR)pi_MinorCode; + RtlCopyMemory( &l_pStackLocation->Parameters, pi_pBuffer, pi_nSize ); + + // Call lower driver perform request + l_Status = IoCallDriver( pi_pLdo, l_pIrp ); + + // if the request not performed --> wait + if (l_Status == STATUS_PENDING) + { + // Wait until the IRP will be complete + KeWaitForSingleObject( + &l_hEvent, // event to wait for + Executive, // thread type (to wait into its context) + KernelMode, // mode of work + FALSE, // alertable + NULL // timeout + ); + l_Status = l_pIrp->IoStatus.Status; + } + + if (po_pInfo) + *po_pInfo = (PVOID)l_pIrp->IoStatus.Information; + + IoFreeIrp(l_pIrp); + return l_Status; + +} /* SendAwaitIrp */ + +/*------------------------------------------------------------------------------------------------------*/ + +/* + * Function: FindBridgeIf + * + * Parameters: + * IN pi_ext - device extension + * OUT pi_pInterface - bus interface to work with the bridge + * + * Returns: + * FALSE - the bridge was not found + * TRUE - a device was found; *po_pPdo contains its PDO + * + * Description: + * The function finds PDO of the bridge by HCA's bus number + * + */ +int +FindBridgeIf( + IN hca_dev_ext_t *pi_ext, + IN PBUS_INTERFACE_STANDARD pi_pInterface + ) +{ + int bRes; + NTSTATUS rc; + IO_STACK_LOCATION l_Iosl; + PDEVICE_RELATIONS l_pDr; + PDEVICE_OBJECT l_pPdo; + ULONG l_DevFunc, l_Bus; + BOOLEAN bResult; + // parameter buffer for the request + IO_STACK_LOCATION l_Stack; + + // find bridge location + if (!PciFindBridgeByBus( pi_ext->bus_number, &l_Bus, &l_DevFunc )) + return FALSE; + + // find PDO of our bus driver (bypassing possible low filter drivers) + RtlZeroMemory( &l_Iosl, sizeof(l_Iosl) ); + l_Iosl.Parameters.QueryDeviceRelations.Type = TargetDeviceRelation; + rc = SendAwaitIrp( + pi_ext->cl_ext.p_self_do, + pi_ext->cl_ext.p_next_do, + IRP_MJ_PNP, + IRP_MN_QUERY_DEVICE_RELATIONS, + &l_Iosl.Parameters, + sizeof(l_Iosl.Parameters.QueryDeviceRelations), + &l_pDr + ); + + if (!NT_SUCCESS (rc)) { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("IRP_MN_QUERY_DEVICE_RELATIONS failed (%#x);: Fdo %p, Ldo %p \n", + rc, pi_ext->cl_ext.p_self_do, pi_ext->cl_ext.p_next_do )); + return FALSE; + } + + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM ,("IRP_MN_QUERY_DEVICE_RELATIONS for Fdo %p, Ldo %p: num_of_PDOs %d, PDO %p \n", + pi_ext->cl_ext.p_self_do, pi_ext->cl_ext.p_next_do, l_pDr->Count, l_pDr->Objects[0] )); + + /* get the PDO of Bridge */ + if (!PciFindPdoByPdoAndLocation( l_pDr->Objects[0], + l_Bus, l_DevFunc, &l_pPdo )) { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Not found bridge's (bus %d, dev/func %x. pdo %p) PDO - can't restore the PCI header \n", + l_Bus, l_DevFunc, l_pDr->Objects[0] )); + return FALSE; + } + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM ,("Found bridge's PDO %p (bus %d, dev/func %x. pdo %p) \n", + l_pPdo, l_Bus, l_DevFunc, l_pDr->Objects[0] )); + + // clean interface data + RtlZeroMemory( (PCHAR)pi_pInterface, sizeof(BUS_INTERFACE_STANDARD) ); + + // fill request parameters + l_Stack.Parameters.QueryInterface.InterfaceType = (LPGUID) &GUID_BUS_INTERFACE_STANDARD; + l_Stack.Parameters.QueryInterface.Size = sizeof(BUS_INTERFACE_STANDARD); + l_Stack.Parameters.QueryInterface.Version = 1; + l_Stack.Parameters.QueryInterface.Interface = (PINTERFACE)pi_pInterface; + l_Stack.Parameters.QueryInterface.InterfaceSpecificData = NULL; + + rc =SendAwaitIrp( pi_ext->cl_ext.p_self_do, l_pPdo, IRP_MJ_PNP, + IRP_MN_QUERY_INTERFACE, &l_Stack.Parameters, sizeof(l_Stack.Parameters), NULL); + if (!NT_SUCCESS (rc)) + return FALSE; + + return TRUE; +} + + +/*----------------------------------------------------------------*/ + + diff --git a/branches/MTHCA/hw/mthca/kernel/mt_spinlock.h b/branches/MTHCA/hw/mthca/kernel/mt_spinlock.h new file mode 100644 index 00000000..9227365a --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/mt_spinlock.h @@ -0,0 +1,127 @@ +#ifndef MT_SPINLOCK_H +#define MT_SPINLOCK_H + +typedef struct spinlock { + KSPIN_LOCK lock; +#ifdef SUPPORT_SPINLOCK_IRQ + PKINTERRUPT p_int_obj; + KIRQL irql; +#endif +} spinlock_t; + +#ifdef SUPPORT_SPINLOCK_IRQ + +static inline void +spin_lock_setint( + IN spinlock_t* const l, + IN PKINTERRUPT p_int_obj ) +{ + MT_ASSERT( l ); + l->p_int_obj = p_int_obj; +} + +static inline void spin_lock_irq_init( + IN spinlock_t* const l, + IN PKINTERRUPT int_obj + ) +{ + KeInitializeSpinLock( &l->lock ); + l->p_int_obj = int_obj; +} + +static inline unsigned long +spin_lock_irq( + IN spinlock_t* const l) +{ + MT_ASSERT( l ); + MT_ASSERT( l->p_int_obj ); + return (unsigned long)(l->irql = KeAcquireInterruptSpinLock ( l->p_int_obj )); +} + +static inline void +spin_unlock_irq( + IN spinlock_t* const p_spinlock ) +{ + MT_ASSERT( p_spinlock ); + MT_ASSERT( p_spinlock->p_int_obj ); + KeReleaseInterruptSpinLock ( p_spinlock->p_int_obj, p_spinlock->irql ); +} + +#endif + +#define SPIN_LOCK_PREP(lh) KLOCK_QUEUE_HANDLE lh + +static inline void spin_lock_init( + IN spinlock_t* const p_spinlock ) +{ + KeInitializeSpinLock( &p_spinlock->lock ); +} + +static inline void +spin_lock( + IN spinlock_t* const l, + IN PKLOCK_QUEUE_HANDLE lockh) +{ + MT_ASSERT( l || lockh ); + ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL); + KeAcquireInStackQueuedSpinLock ( &l->lock, lockh ); +} + +static inline void +spin_unlock( + IN PKLOCK_QUEUE_HANDLE lockh) +{ + MT_ASSERT( lockh ); + ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL); + KeReleaseInStackQueuedSpinLock( lockh ); +} + +static inline void +spin_lock_sync( + IN spinlock_t* const l ) +{ + KLOCK_QUEUE_HANDLE lockh; + MT_ASSERT( l ); + ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL); + KeAcquireInStackQueuedSpinLock ( &l->lock, &lockh ); + KeReleaseInStackQueuedSpinLock( &lockh ); +} + +/* to be used only at DPC level */ +static inline void +spin_lock_dpc( + IN spinlock_t* const l, + IN PKLOCK_QUEUE_HANDLE lockh) +{ + MT_ASSERT( l || lockh ); + ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL); + KeAcquireInStackQueuedSpinLockAtDpcLevel( &l->lock, lockh ); +} + +/* to be used only at DPC level */ +static inline void +spin_unlock_dpc( + IN PKLOCK_QUEUE_HANDLE lockh) +{ + ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL); + KeReleaseInStackQueuedSpinLockFromDpcLevel( lockh ); +} + + +/* we are working from DPC level, so we can use usual spinlocks */ +#define spin_lock_irq spin_lock +#define spin_unlock_irq spin_unlock + +/* no diff in Windows */ +#define spin_lock_irqsave spin_lock_irq +#define spin_unlock_irqrestore spin_unlock_irq + +/* Windows doesn't support such kind of spinlocks so far, but may be tomorrow ... */ +#define rwlock_init spin_lock_init +#define read_lock_irqsave spin_lock_irqsave +#define read_unlock_irqrestore spin_unlock_irqrestore +#define write_lock_irq spin_lock_irq +#define write_unlock_irq spin_unlock_irq + +#endif + diff --git a/branches/MTHCA/hw/mthca/kernel/mt_sync.h b/branches/MTHCA/hw/mthca/kernel/mt_sync.h new file mode 100644 index 00000000..ccc54b1e --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/mt_sync.h @@ -0,0 +1,107 @@ +#ifndef MT_SYNC_H +#define MT_SYNC_H + +// literals +#ifndef LONG_MAX +#define LONG_MAX 2147483647L /* maximum (signed) long value */ +#endif + + +// mutex wrapper + +// suitable both for mutexes and semaphores +static inline void down(PRKMUTEX p_mutex) +{ + NTSTATUS status; + + ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL); + while (1) { + status = KeWaitForSingleObject( p_mutex, Executive, KernelMode, FALSE, NULL ); + if (status == STATUS_SUCCESS) + break; + } +} + +// suitable both for mutexes and semaphores +static inline int down_interruptible(PRKMUTEX p_mutex) +{ + NTSTATUS status; + + ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL); + status = KeWaitForSingleObject( p_mutex, Executive, KernelMode, TRUE, NULL ); + if (status == STATUS_SUCCESS) + return 0; + return -EINTR; +} + +#define sem_down(ptr) down((PRKMUTEX)(ptr)) +#define sem_down_interruptible(ptr) down_interruptible((PRKMUTEX)(ptr)) + +static inline void up(PRKMUTEX p_mutex) +{ + ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL); + KeReleaseMutex( p_mutex, FALSE ); +} + +static inline void sem_up(PRKSEMAPHORE p_sem) +{ + ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL); + KeReleaseSemaphore( p_sem, 0, 1, FALSE ); +} + +static inline void sem_init( + IN PRKSEMAPHORE p_sem, + IN LONG cnt, + IN LONG limit) +{ + ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL); + KeInitializeSemaphore( p_sem, cnt, limit ); +} + + +typedef struct wait_queue_head { + KEVENT event; +} wait_queue_head_t; + +static inline void wait_event(wait_queue_head_t *obj_p, int condition) +{ + NTSTATUS status; + MT_ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL); + if (condition) + return; + while (1) { + status = KeWaitForSingleObject( &obj_p->event, Executive, KernelMode, FALSE, NULL ); + if (status == STATUS_SUCCESS) + break; + } +} + +static inline void wake_up(wait_queue_head_t *obj_p) +{ + MT_ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL); + KeSetEvent( &obj_p->event, 0, FALSE ); +} + +static inline void init_waitqueue_head(wait_queue_head_t *obj_p) +{ + //TODO: ASSERT is temporary outcommented, because using of fast mutexes in CompLib + // cause working on APC_LEVEL + //ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL); + KeInitializeEvent( &obj_p->event, NotificationEvent , FALSE ); +} + +static inline void free_irq(PKINTERRUPT int_obj) +{ + IoDisconnectInterrupt( int_obj ); +} + +int request_irq( + IN CM_PARTIAL_RESOURCE_DESCRIPTOR *int_info, /* interrupt resources */ + IN KSPIN_LOCK *isr_lock, /* spin lcok for ISR */ + IN PKSERVICE_ROUTINE isr, /* ISR */ + IN void *isr_ctx, /* ISR context */ + OUT PKINTERRUPT *int_obj /* interrupt object */ + ); + + +#endif diff --git a/branches/MTHCA/hw/mthca/kernel/mt_time.h b/branches/MTHCA/hw/mthca/kernel/mt_time.h new file mode 100644 index 00000000..3738ecf0 --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/mt_time.h @@ -0,0 +1,54 @@ +#ifndef MT_TIME_H +#define MT_TIME_H + + +/* get time stamp */ +static inline volatile u64 MT_time_get_stamp(void) +{ + volatile u64 tsc; + +#if defined(_WIN64) && (defined(IA64) || defined(_IA64_)) + /* Itanium */ + + /* returns a value in units of 100 nsecs */ + tsc = KeQueryInterruptTime(); + +#elif defined(_WIN64) && (defined(AMD64) || defined(_AMD64_)) + /* x64 */ + + /* returns a value in units of Time-Stamp Counter (usually, clocks) */ + tsc = __rdtsc(); + +#elif defined(_WIN32) && (defined(i386) || defined(_x86_)) + /* x86 */ + + /* returns a value in units of Time-Stamp Counter (usually, clocks) */ + __asm { + lea ebx,tsc + rdtsc + mov [ebx],eax + mov [ebx+4],edx + } +#else + #error Unsupported platform +#endif + + return tsc; +} + +extern u64 mt_ticks_per_sec; + + +/* CONVERTIONS */ +#define MT_USECS_TO_TICKS(to) ((mt_ticks_per_sec * (to)) / 1000000 ) +#define MT_MSECS_TO_TICKS(to) MT_USECS_TO_TICKS(1000 * to) + +/* comparison */ +#define time_after(a,b) ((INT64)(b) - (INT64)(a) < 0) +#define time_before(a,b) time_after(b,a) +#define time_after_eq(a,b) ((INT64)(a) - (INT64)(b) >= 0) +#define time_before_eq(a,b) time_after_eq(b,a) + + +#endif + diff --git a/branches/MTHCA/hw/mthca/kernel/mt_types.h b/branches/MTHCA/hw/mthca/kernel/mt_types.h new file mode 100644 index 00000000..aaffa048 --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/mt_types.h @@ -0,0 +1,57 @@ +#ifndef MT_TYPES_H +#define MT_TYPES_H + +//#include + #include + +// =========================================== +// SUBSTITUTES +// =========================================== + +// gcc compiler attributes +#define __iomem +#define likely(x) (x) +#define unlikely(x) (x) + +// container_of +#define container_of CONTAINING_RECORD + +// inline +#define inline __inline + +// =========================================== +// TYPES +// =========================================== + +// basic types +typedef unsigned char u8, __u8; +typedef unsigned short int u16, __u16; +typedef unsigned int u32, __u32; +typedef unsigned __int64 u64, __u64; +typedef char s8, __s8; +typedef short int s16, __s16; +typedef int s32, __s32; +typedef __int64 s64, __s64; + +// inherited +typedef u16 __le16; +typedef u16 __be16; +typedef u32 __le32; +typedef u32 __be32; +typedef u64 __le64; +typedef u64 __be64; +typedef u64 dma_addr_t; +typedef u64 io_addr_t; + +// =========================================== +// MACROS +// =========================================== + +// assert +#ifdef _DEBUG_ +#define MT_ASSERT( exp ) (void)(!(exp)?DbgPrint("Assertion Failed:" #exp "\n"),DbgBreakPoint(),FALSE:TRUE) +#else +#define MT_ASSERT( exp ) +#endif /* _DEBUG_ */ + +#endif diff --git a/branches/MTHCA/hw/mthca/kernel/mt_ud_header.c b/branches/MTHCA/hw/mthca/kernel/mt_ud_header.c index d0f8432b..2ae3b735 100644 --- a/branches/MTHCA/hw/mthca/kernel/mt_ud_header.c +++ b/branches/MTHCA/hw/mthca/kernel/mt_ud_header.c @@ -34,7 +34,12 @@ */ #include - +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mt_ud_header.tmh" +#endif #include #ifdef LINUX_TO_BE_REMOVED @@ -304,7 +309,7 @@ void ib_ud_header_init(int payload_bytes, packet_length += IB_GRH_BYTES / 4; header->grh.ip_version = 6; header->grh.payload_length = - cpu_to_be16((IB_BTH_BYTES + + cl_hton16((IB_BTH_BYTES + IB_DETH_BYTES + payload_bytes + 4 + /* ICRC */ @@ -312,7 +317,7 @@ void ib_ud_header_init(int payload_bytes, header->grh.next_header = 0x1b; } - header->lrh.packet_length = cpu_to_be16(packet_length); + header->lrh.packet_length = cl_hton16(packet_length); if (header->immediate_present) header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; @@ -321,7 +326,6 @@ void ib_ud_header_init(int payload_bytes, header->bth.pad_count = (4 - payload_bytes) & 3; header->bth.transport_header_version = 0; } -EXPORT_SYMBOL(ib_ud_header_init); /** * ib_ud_header_pack - Pack UD header struct into wire format @@ -361,7 +365,6 @@ int ib_ud_header_pack(struct ib_ud_header *header, return len; } -EXPORT_SYMBOL(ib_ud_header_pack); /** * ib_ud_header_unpack - Unpack UD header struct from wire format @@ -379,8 +382,8 @@ int ib_ud_header_unpack(u8 *buf, buf += IB_LRH_BYTES; if (header->lrh.link_version != 0) { - printk(KERN_WARNING "Invalid LRH.link_version %d\n", - header->lrh.link_version); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Invalid LRH.link_version %d\n", + header->lrh.link_version)); return -EINVAL; } @@ -396,20 +399,20 @@ int ib_ud_header_unpack(u8 *buf, buf += IB_GRH_BYTES; if (header->grh.ip_version != 6) { - printk(KERN_WARNING "Invalid GRH.ip_version %d\n", - header->grh.ip_version); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Invalid GRH.ip_version %d\n", + header->grh.ip_version)); return -EINVAL; } if (header->grh.next_header != 0x1b) { - printk(KERN_WARNING "Invalid GRH.next_header 0x%02x\n", - header->grh.next_header); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Invalid GRH.next_header 0x%02x\n", + header->grh.next_header)); return -EINVAL; } break; default: - printk(KERN_WARNING "Invalid LRH.link_next_header %d\n", - header->lrh.link_next_header); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Invalid LRH.link_next_header %d\n", + header->lrh.link_next_header)); return -EINVAL; } @@ -425,14 +428,14 @@ int ib_ud_header_unpack(u8 *buf, header->immediate_present = 1; break; default: - printk(KERN_WARNING "Invalid BTH.opcode 0x%02x\n", - header->bth.opcode); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Invalid BTH.opcode 0x%02x\n", + header->bth.opcode)); return -EINVAL; } if (header->bth.transport_header_version != 0) { - printk(KERN_WARNING "Invalid BTH.transport_header_version %d\n", - header->bth.transport_header_version); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Invalid BTH.transport_header_version %d\n", + header->bth.transport_header_version)); return -EINVAL; } @@ -445,4 +448,3 @@ int ib_ud_header_unpack(u8 *buf, return 0; } -EXPORT_SYMBOL(ib_ud_header_unpack); diff --git a/branches/MTHCA/hw/mthca/kernel/mt_uverbs.c b/branches/MTHCA/hw/mthca/kernel/mt_uverbs.c new file mode 100644 index 00000000..6d3a5ce4 --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/mt_uverbs.c @@ -0,0 +1,1900 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: uverbs_cmd.c 4227 2005-11-30 00:58:50Z roland $ + */ + +#include "mt_uverbs.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mt_uverbs.tmh" +#endif + +#ifdef LINUX_TO_BE_CHANGED + +#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \ + do { \ + (udata)->inbuf = (void *) (ibuf); \ + (udata)->outbuf = (void *) (obuf); \ + (udata)->inlen = (ilen); \ + (udata)->outlen = (olen); \ + } while (0) + +ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, + const char *buf, + int in_len, int out_len) +{ + struct ib_uverbs_get_context cmd; + struct ib_uverbs_get_context_resp resp; + struct ib_udata udata; + struct ib_device *ibdev = file->device->ib_dev; + struct ib_ucontext *ucontext; + struct file *filp; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&file->mutex); + + if (file->ucontext) { + ret = -EINVAL; + goto err; + } + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + ucontext = ibdev->alloc_ucontext(ibdev, &udata); + if (IS_ERR(ucontext)) + return PTR_ERR(file->ucontext); + + ucontext->device = ibdev; + INIT_LIST_HEAD(&ucontext->pd_list); + INIT_LIST_HEAD(&ucontext->mr_list); + INIT_LIST_HEAD(&ucontext->mw_list); + INIT_LIST_HEAD(&ucontext->cq_list); + INIT_LIST_HEAD(&ucontext->qp_list); + INIT_LIST_HEAD(&ucontext->srq_list); + INIT_LIST_HEAD(&ucontext->ah_list); + + resp.num_comp_vectors = file->device->num_comp_vectors; + + filp = ib_uverbs_alloc_event_file(file, 1, &resp.async_fd); + if (IS_ERR(filp)) { + ret = PTR_ERR(filp); + goto err_free; + } + + if (copy_to_user((void *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_file; + } + + file->async_file = filp->private_data; + + INIT_IB_EVENT_HANDLER(&file->event_handler, file->device->ib_dev, + ib_uverbs_event_handler); + ret = ib_register_event_handler(&file->event_handler); + if (ret) + goto err_file; + + kref_get(&file->async_file->ref); + kref_get(&file->ref); + file->ucontext = ucontext; + + fd_install(resp.async_fd, filp); + + up(&file->mutex); + + return in_len; + +err_file: + put_unused_fd(resp.async_fd); + fput(filp); + +err_free: + ibdev->dealloc_ucontext(ucontext); + +err: + up(&file->mutex); + return ret; +} + +ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, + const char *buf, + int in_len, int out_len) +{ + struct ib_uverbs_query_device cmd; + struct ib_uverbs_query_device_resp resp; + struct ib_device_attr attr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + ret = ib_query_device(file->device->ib_dev, &attr); + if (ret) + return ret; + + memset(&resp, 0, sizeof resp); + + resp.fw_ver = attr.fw_ver; + resp.node_guid = attr.node_guid; + resp.sys_image_guid = attr.sys_image_guid; + resp.max_mr_size = attr.max_mr_size; + resp.page_size_cap = attr.page_size_cap; + resp.vendor_id = attr.vendor_id; + resp.vendor_part_id = attr.vendor_part_id; + resp.hw_ver = attr.hw_ver; + resp.max_qp = attr.max_qp; + resp.max_qp_wr = attr.max_qp_wr; + resp.device_cap_flags = attr.device_cap_flags; + resp.max_sge = attr.max_sge; + resp.max_sge_rd = attr.max_sge_rd; + resp.max_cq = attr.max_cq; + resp.max_cqe = attr.max_cqe; + resp.max_mr = attr.max_mr; + resp.max_pd = attr.max_pd; + resp.max_qp_rd_atom = attr.max_qp_rd_atom; + resp.max_ee_rd_atom = attr.max_ee_rd_atom; + resp.max_res_rd_atom = attr.max_res_rd_atom; + resp.max_qp_init_rd_atom = attr.max_qp_init_rd_atom; + resp.max_ee_init_rd_atom = attr.max_ee_init_rd_atom; + resp.atomic_cap = attr.atomic_cap; + resp.max_ee = attr.max_ee; + resp.max_rdd = attr.max_rdd; + resp.max_mw = attr.max_mw; + resp.max_raw_ipv6_qp = attr.max_raw_ipv6_qp; + resp.max_raw_ethy_qp = attr.max_raw_ethy_qp; + resp.max_mcast_grp = attr.max_mcast_grp; + resp.max_mcast_qp_attach = attr.max_mcast_qp_attach; + resp.max_total_mcast_qp_attach = attr.max_total_mcast_qp_attach; + resp.max_ah = attr.max_ah; + resp.max_fmr = attr.max_fmr; + resp.max_map_per_fmr = attr.max_map_per_fmr; + resp.max_srq = attr.max_srq; + resp.max_srq_wr = attr.max_srq_wr; + resp.max_srq_sge = attr.max_srq_sge; + resp.max_pkeys = attr.max_pkeys; + resp.local_ca_ack_delay = attr.local_ca_ack_delay; + resp.phys_port_cnt = file->device->ib_dev->phys_port_cnt; + + if (copy_to_user((void *) (unsigned long) cmd.response, + &resp, sizeof resp)) + return -EFAULT; + + return in_len; +} + +ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, + const char *buf, + int in_len, int out_len) +{ + struct ib_uverbs_query_port cmd; + struct ib_uverbs_query_port_resp resp; + struct ib_port_attr attr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + ret = ib_query_port(file->device->ib_dev, cmd.port_num, &attr); + if (ret) + return ret; + + memset(&resp, 0, sizeof resp); + + resp.state = attr.state; + resp.max_mtu = attr.max_mtu; + resp.active_mtu = attr.active_mtu; + resp.gid_tbl_len = attr.gid_tbl_len; + resp.port_cap_flags = attr.port_cap_flags; + resp.max_msg_sz = attr.max_msg_sz; + resp.bad_pkey_cntr = attr.bad_pkey_cntr; + resp.qkey_viol_cntr = attr.qkey_viol_cntr; + resp.pkey_tbl_len = attr.pkey_tbl_len; + resp.lid = attr.lid; + resp.sm_lid = attr.sm_lid; + resp.lmc = attr.lmc; + resp.max_vl_num = attr.max_vl_num; + resp.sm_sl = attr.sm_sl; + resp.subnet_timeout = attr.subnet_timeout; + resp.init_type_reply = attr.init_type_reply; + resp.active_width = attr.active_width; + resp.active_speed = attr.active_speed; + resp.phys_state = attr.phys_state; + + if (copy_to_user((void *) (unsigned long) cmd.response, + &resp, sizeof resp)) + return -EFAULT; + + return in_len; +} + +ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, + const char *buf, + int in_len, int out_len) +{ + struct ib_uverbs_alloc_pd cmd; + struct ib_uverbs_alloc_pd_resp resp; + struct ib_udata udata; + struct ib_uobject *uobj; + struct ib_pd *pd; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); + if (!uobj) + return -ENOMEM; + + uobj->context = file->ucontext; + + pd = file->device->ib_dev->alloc_pd(file->device->ib_dev, + file->ucontext, &udata); + if (IS_ERR(pd)) { + ret = PTR_ERR(pd); + goto err; + } + + pd->device = file->device->ib_dev; + pd->uobject = uobj; + atomic_set(&pd->usecnt, 0); + + down(&ib_uverbs_idr_mutex); + +retry: + if (!idr_pre_get(&ib_uverbs_pd_idr, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_up; + } + + ret = idr_get_new(&ib_uverbs_pd_idr, pd, &uobj->id); + + if (ret == -EAGAIN) + goto retry; + if (ret) + goto err_up; + + memset(&resp, 0, sizeof resp); + resp.pd_handle = uobj->id; + + if (copy_to_user((void *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_idr; + } + + down(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->pd_list); + up(&file->mutex); + + up(&ib_uverbs_idr_mutex); + + return in_len; + +err_idr: + idr_remove(&ib_uverbs_pd_idr, uobj->id); + +err_up: + up(&ib_uverbs_idr_mutex); + ibv_dealloc_pd(pd); + +err: + kfree(uobj); + return ret; +} + +ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, + const char *buf, + int in_len, int out_len) +{ + struct ib_uverbs_dealloc_pd cmd; + struct ib_pd *pd; + struct ib_uobject *uobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); + if (!pd || pd->uobject->context != file->ucontext) + goto out; + + uobj = pd->uobject; + + ret = ibv_dealloc_pd(pd); + if (ret) + goto out; + + idr_remove(&ib_uverbs_pd_idr, cmd.pd_handle); + + down(&file->mutex); + list_del(&uobj->list); + up(&file->mutex); + + kfree(uobj); + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, + const char *buf, int in_len, + int out_len) +{ + struct ib_uverbs_reg_mr cmd; + struct ib_uverbs_reg_mr_resp resp; + struct ib_udata udata; + struct ib_umem_object *obj; + struct ib_pd *pd; + struct ib_mr *mr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) + return -EINVAL; + + /* + * Local write permission is required if remote write or + * remote atomic permission is also requested. + */ + if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) && + !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE)) + return -EINVAL; + + obj = kmalloc(sizeof *obj, GFP_KERNEL); + if (!obj) + return -ENOMEM; + + obj->uobject.context = file->ucontext; + + /* + * We ask for writable memory if any access flags other than + * "remote read" are set. "Local write" and "remote write" + * obviously require write access. "Remote atomic" can do + * things like fetch and add, which will modify memory, and + * "MW bind" can change permissions by binding a window. + */ + ret = ib_umem_get(file->device->ib_dev, &obj->umem, + (void *) (unsigned long) cmd.start, cmd.length, + !!(cmd.access_flags & ~IB_ACCESS_REMOTE_READ)); + if (ret) + goto err_free; + + obj->umem.virt_base = cmd.hca_va; + + down(&ib_uverbs_idr_mutex); + + pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); + if (!pd || pd->uobject->context != file->ucontext) { + ret = -EINVAL; + goto err_up; + } + + if (!pd->device->reg_user_mr) { + ret = -ENOSYS; + goto err_up; + } + + mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata); + if (IS_ERR(mr)) { + ret = PTR_ERR(mr); + goto err_up; + } + + mr->device = pd->device; + mr->pd = pd; + mr->uobject = &obj->uobject; + atomic_inc(&pd->usecnt); + atomic_set(&mr->usecnt, 0); + + memset(&resp, 0, sizeof resp); + resp.lkey = mr->lkey; + resp.rkey = mr->rkey; + +retry: + if (!idr_pre_get(&ib_uverbs_mr_idr, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_unreg; + } + + ret = idr_get_new(&ib_uverbs_mr_idr, mr, &obj->uobject.id); + + if (ret == -EAGAIN) + goto retry; + if (ret) + goto err_unreg; + + resp.mr_handle = obj->uobject.id; + + if (copy_to_user((void *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_idr; + } + + down(&file->mutex); + list_add_tail(&obj->uobject.list, &file->ucontext->mr_list); + up(&file->mutex); + + up(&ib_uverbs_idr_mutex); + + return in_len; + +err_idr: + idr_remove(&ib_uverbs_mr_idr, obj->uobject.id); + +err_unreg: + ibv_dereg_mr(mr); + +err_up: + up(&ib_uverbs_idr_mutex); + + ib_umem_release(file->device->ib_dev, &obj->umem); + +err_free: + kfree(obj); + return ret; +} + +ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, + const char *buf, int in_len, + int out_len) +{ + struct ib_uverbs_dereg_mr cmd; + struct ib_mr *mr; + struct ib_umem_object *memobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + mr = idr_find(&ib_uverbs_mr_idr, cmd.mr_handle); + if (!mr || mr->uobject->context != file->ucontext) + goto out; + + memobj = container_of(mr->uobject, struct ib_umem_object, uobject); + + ret = ibv_dereg_mr(mr); + if (ret) + goto out; + + idr_remove(&ib_uverbs_mr_idr, cmd.mr_handle); + + down(&file->mutex); + list_del(&memobj->uobject.list); + up(&file->mutex); + + ib_umem_release(file->device->ib_dev, &memobj->umem); + kfree(memobj); + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, + const char *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_comp_channel cmd; + struct ib_uverbs_create_comp_channel_resp resp; + struct file *filp; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + filp = ib_uverbs_alloc_event_file(file, 0, &resp.fd); + if (IS_ERR(filp)) + return PTR_ERR(filp); + + if (copy_to_user((void *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + put_unused_fd(resp.fd); + fput(filp); + return -EFAULT; + } + + fd_install(resp.fd, filp); + return in_len; +} + +ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, + const char *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_cq cmd; + struct ib_uverbs_create_cq_resp resp; + struct ib_udata udata; + struct ib_ucq_object *uobj; + struct ib_uverbs_event_file *ev_file = NULL; + struct ib_cq *cq; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + if (cmd.comp_vector >= file->device->num_comp_vectors) + return -EINVAL; + + if (cmd.comp_channel >= 0) + ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel); + + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); + if (!uobj) + return -ENOMEM; + + uobj->uobject.user_handle = cmd.user_handle; + uobj->uobject.context = file->ucontext; + uobj->uverbs_file = file; + uobj->comp_events_reported = 0; + uobj->async_events_reported = 0; + INIT_LIST_HEAD(&uobj->comp_list); + INIT_LIST_HEAD(&uobj->async_list); + + cq = file->device->ib_dev->create_cq(file->device->ib_dev, cmd.cqe, + file->ucontext, &udata); + if (IS_ERR(cq)) { + ret = PTR_ERR(cq); + goto err; + } + + cq->device = file->device->ib_dev; + cq->uobject = &uobj->uobject; + cq->comp_handler = ib_uverbs_comp_handler; + cq->event_handler = ib_uverbs_cq_event_handler; + cq->cq_context = ev_file; + atomic_set(&cq->usecnt, 0); + + down(&ib_uverbs_idr_mutex); + +retry: + if (!idr_pre_get(&ib_uverbs_cq_idr, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_up; + } + + ret = idr_get_new(&ib_uverbs_cq_idr, cq, &uobj->uobject.id); + + if (ret == -EAGAIN) + goto retry; + if (ret) + goto err_up; + + memset(&resp, 0, sizeof resp); + resp.cq_handle = uobj->uobject.id; + resp.cqe = cq->cqe; + + if (copy_to_user((void *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_idr; + } + + down(&file->mutex); + list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list); + up(&file->mutex); + + up(&ib_uverbs_idr_mutex); + + return in_len; + +err_idr: + idr_remove(&ib_uverbs_cq_idr, uobj->uobject.id); + +err_up: + up(&ib_uverbs_idr_mutex); + ibv_destroy_cq(cq); + +err: + kfree(uobj); + return ret; +} + +ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, + const char *buf, int in_len, + int out_len) +{ + struct ib_uverbs_poll_cq cmd; + struct ib_uverbs_poll_cq_resp *resp; + struct ib_cq *cq; + struct ib_wc *wc; + int ret = 0; + int i; + int rsize; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + wc = kmalloc(cmd.ne * sizeof *wc, GFP_KERNEL); + if (!wc) + return -ENOMEM; + + rsize = sizeof *resp + cmd.ne * sizeof(struct ib_uverbs_wc); + resp = kmalloc(rsize, GFP_KERNEL); + if (!resp) { + ret = -ENOMEM; + goto out_wc; + } + + down(&ib_uverbs_idr_mutex); + cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); + if (!cq || cq->uobject->context != file->ucontext) { + ret = -EINVAL; + goto out; + } + + resp->count = ib_poll_cq(cq, cmd.ne, wc); + + for (i = 0; i < resp->count; i++) { + resp->wc[i].wr_id = wc[i].wr_id; + resp->wc[i].status = wc[i].status; + resp->wc[i].opcode = wc[i].opcode; + resp->wc[i].vendor_err = wc[i].vendor_err; + resp->wc[i].byte_len = wc[i].byte_len; + resp->wc[i].imm_data = (__u32 __force) wc[i].imm_data; + resp->wc[i].qp_num = wc[i].qp_num; + resp->wc[i].src_qp = wc[i].src_qp; + resp->wc[i].wc_flags = wc[i].wc_flags; + resp->wc[i].pkey_index = wc[i].pkey_index; + resp->wc[i].slid = wc[i].slid; + resp->wc[i].sl = wc[i].sl; + resp->wc[i].dlid_path_bits = wc[i].dlid_path_bits; + resp->wc[i].port_num = wc[i].port_num; + } + + if (copy_to_user((void *) (unsigned long) cmd.response, resp, rsize)) + ret = -EFAULT; + +out: + up(&ib_uverbs_idr_mutex); + kfree(resp); + +out_wc: + kfree(wc); + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, + const char *buf, int in_len, + int out_len) +{ + struct ib_uverbs_req_notify_cq cmd; + struct ib_cq *cq; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); + if (cq && cq->uobject->context == file->ucontext) { + ib_req_notify_cq(cq, cmd.solicited_only ? + IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); + ret = in_len; + } + up(&ib_uverbs_idr_mutex); + + return ret; +} + +ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, + const char *buf, int in_len, + int out_len) +{ + struct ib_uverbs_destroy_cq cmd; + struct ib_uverbs_destroy_cq_resp resp; + struct ib_cq *cq; + struct ib_ucq_object *uobj; + struct ib_uverbs_event_file *ev_file; + u64 user_handle; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + memset(&resp, 0, sizeof resp); + + down(&ib_uverbs_idr_mutex); + + cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); + if (!cq || cq->uobject->context != file->ucontext) + goto out; + + user_handle = cq->uobject->user_handle; + uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); + ev_file = cq->cq_context; + + ret = ibv_destroy_cq(cq); + if (ret) + goto out; + + idr_remove(&ib_uverbs_cq_idr, cmd.cq_handle); + + down(&file->mutex); + list_del(&uobj->uobject.list); + up(&file->mutex); + + ib_uverbs_release_ucq(file, ev_file, uobj); + + resp.comp_events_reported = uobj->comp_events_reported; + resp.async_events_reported = uobj->async_events_reported; + + kfree(uobj); + + if (copy_to_user((void *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, + const char *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_qp cmd; + struct ib_uverbs_create_qp_resp resp; + struct ib_udata udata; + struct ib_uqp_object *uobj; + struct ib_pd *pd; + struct ib_cq *scq, *rcq; + struct ib_srq *srq; + struct ib_qp *qp; + struct ib_qp_init_attr attr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); + if (!uobj) + return -ENOMEM; + + down(&ib_uverbs_idr_mutex); + + pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); + scq = idr_find(&ib_uverbs_cq_idr, cmd.send_cq_handle); + rcq = idr_find(&ib_uverbs_cq_idr, cmd.recv_cq_handle); + srq = cmd.is_srq ? idr_find(&ib_uverbs_srq_idr, cmd.srq_handle) : NULL; + + if (!pd || pd->uobject->context != file->ucontext || + !scq || scq->uobject->context != file->ucontext || + !rcq || rcq->uobject->context != file->ucontext || + (cmd.is_srq && (!srq || srq->uobject->context != file->ucontext))) { + ret = -EINVAL; + goto err_up; + } + + attr.event_handler = ib_uverbs_qp_event_handler; + attr.qp_context = file; + attr.send_cq = scq; + attr.recv_cq = rcq; + attr.srq = srq; + attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; + attr.qp_type = cmd.qp_type; + + attr.cap.max_send_wr = cmd.max_send_wr; + attr.cap.max_recv_wr = cmd.max_recv_wr; + attr.cap.max_send_sge = cmd.max_send_sge; + attr.cap.max_recv_sge = cmd.max_recv_sge; + attr.cap.max_inline_data = cmd.max_inline_data; + + uobj->uevent.uobject.user_handle = cmd.user_handle; + uobj->uevent.uobject.context = file->ucontext; + uobj->uevent.events_reported = 0; + INIT_LIST_HEAD(&uobj->uevent.event_list); + INIT_LIST_HEAD(&uobj->mcast_list); + + qp = pd->device->create_qp(pd, &attr, &udata); + if (IS_ERR(qp)) { + ret = PTR_ERR(qp); + goto err_up; + } + + qp->device = pd->device; + qp->pd = pd; + qp->send_cq = attr.send_cq; + qp->recv_cq = attr.recv_cq; + qp->srq = attr.srq; + qp->uobject = &uobj->uevent.uobject; + qp->event_handler = attr.event_handler; + qp->qp_context = attr.qp_context; + qp->qp_type = attr.qp_type; + atomic_inc(&pd->usecnt); + atomic_inc(&attr.send_cq->usecnt); + atomic_inc(&attr.recv_cq->usecnt); + if (attr.srq) + atomic_inc(&attr.srq->usecnt); + + memset(&resp, 0, sizeof resp); + resp.qpn = qp->qp_num; + +retry: + if (!idr_pre_get(&ib_uverbs_qp_idr, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_destroy; + } + + ret = idr_get_new(&ib_uverbs_qp_idr, qp, &uobj->uevent.uobject.id); + + if (ret == -EAGAIN) + goto retry; + if (ret) + goto err_destroy; + + resp.qp_handle = uobj->uevent.uobject.id; + resp.max_recv_sge = attr.cap.max_recv_sge; + resp.max_send_sge = attr.cap.max_send_sge; + resp.max_recv_wr = attr.cap.max_recv_wr; + resp.max_send_wr = attr.cap.max_send_wr; + resp.max_inline_data = attr.cap.max_inline_data; + + if (copy_to_user((void *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_idr; + } + + down(&file->mutex); + list_add_tail(&uobj->uevent.uobject.list, &file->ucontext->qp_list); + up(&file->mutex); + + up(&ib_uverbs_idr_mutex); + + return in_len; + +err_idr: + idr_remove(&ib_uverbs_qp_idr, uobj->uevent.uobject.id); + +err_destroy: + ibv_destroy_qp(qp); + +err_up: + up(&ib_uverbs_idr_mutex); + + kfree(uobj); + return ret; +} + +ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, + const char *buf, int in_len, + int out_len) +{ + struct ib_uverbs_modify_qp cmd; + struct ib_qp *qp; + struct ib_qp_attr *attr; + int ret; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + attr = kmalloc(sizeof *attr, GFP_KERNEL); + if (!attr) + return -ENOMEM; + + down(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (!qp || qp->uobject->context != file->ucontext) { + ret = -EINVAL; + goto out; + } + + attr->qp_state = cmd.qp_state; + attr->cur_qp_state = cmd.cur_qp_state; + attr->path_mtu = cmd.path_mtu; + attr->path_mig_state = cmd.path_mig_state; + attr->qkey = cmd.qkey; + attr->rq_psn = cmd.rq_psn; + attr->sq_psn = cmd.sq_psn; + attr->dest_qp_num = cmd.dest_qp_num; + attr->qp_access_flags = cmd.qp_access_flags; + attr->pkey_index = cmd.pkey_index; + attr->alt_pkey_index = cmd.pkey_index; + attr->en_sqd_async_notify = cmd.en_sqd_async_notify; + attr->max_rd_atomic = cmd.max_rd_atomic; + attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic; + attr->min_rnr_timer = cmd.min_rnr_timer; + attr->port_num = cmd.port_num; + attr->timeout = cmd.timeout; + attr->retry_cnt = cmd.retry_cnt; + attr->rnr_retry = cmd.rnr_retry; + attr->alt_port_num = cmd.alt_port_num; + attr->alt_timeout = cmd.alt_timeout; + + memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16); + attr->ah_attr.grh.flow_label = cmd.dest.flow_label; + attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index; + attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit; + attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class; + attr->ah_attr.dlid = cmd.dest.dlid; + attr->ah_attr.sl = cmd.dest.sl; + attr->ah_attr.src_path_bits = cmd.dest.src_path_bits; + attr->ah_attr.static_rate = cmd.dest.static_rate; + attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0; + attr->ah_attr.port_num = cmd.dest.port_num; + + memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16); + attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label; + attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index; + attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit; + attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class; + attr->alt_ah_attr.dlid = cmd.alt_dest.dlid; + attr->alt_ah_attr.sl = cmd.alt_dest.sl; + attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits; + attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate; + attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; + attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; + + ret = ibv_modify_qp(qp, attr, cmd.attr_mask); + if (ret) + goto out; + + ret = in_len; + +out: + up(&ib_uverbs_idr_mutex); + kfree(attr); + + return ret; +} + +ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, + const char *buf, int in_len, + int out_len) +{ + struct ib_uverbs_destroy_qp cmd; + struct ib_uverbs_destroy_qp_resp resp; + struct ib_qp *qp; + struct ib_uqp_object *uobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + memset(&resp, 0, sizeof resp); + + down(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (!qp || qp->uobject->context != file->ucontext) + goto out; + + uobj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); + + if (!list_empty(&uobj->mcast_list)) { + ret = -EBUSY; + goto out; + } + + ret = ibv_destroy_qp(qp); + if (ret) + goto out; + + idr_remove(&ib_uverbs_qp_idr, cmd.qp_handle); + + down(&file->mutex); + list_del(&uobj->uevent.uobject.list); + up(&file->mutex); + + ib_uverbs_release_uevent(file, &uobj->uevent); + + resp.events_reported = uobj->uevent.events_reported; + + kfree(uobj); + + if (copy_to_user((void *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, + const char *buf, int in_len, + int out_len) +{ + struct ib_uverbs_post_send cmd; + struct ib_uverbs_post_send_resp resp; + struct ib_uverbs_send_wr *user_wr; + struct ib_send_wr *wr = NULL, *last, *next, *bad_wr; + struct ib_qp *qp; + int i, sg_ind; + ssize_t ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count + + cmd.sge_count * sizeof (struct ib_uverbs_sge)) + return -EINVAL; + + if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr)) + return -EINVAL; + + user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL); + if (!user_wr) + return -ENOMEM; + + down(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (!qp || qp->uobject->context != file->ucontext) + goto out; + + sg_ind = 0; + last = NULL; + for (i = 0; i < cmd.wr_count; ++i) { + if (copy_from_user(user_wr, + buf + sizeof cmd + i * cmd.wqe_size, + cmd.wqe_size)) { + ret = -EFAULT; + goto out; + } + + if (user_wr->num_sge + sg_ind > cmd.sge_count) { + ret = -EINVAL; + goto out; + } + + next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + + user_wr->num_sge * sizeof (struct ib_sge), + GFP_KERNEL); + if (!next) { + ret = -ENOMEM; + goto out; + } + + if (!last) + wr = next; + else + last->next = next; + last = next; + + next->next = NULL; + next->wr_id = user_wr->wr_id; + next->num_sge = user_wr->num_sge; + next->opcode = user_wr->opcode; + next->send_flags = user_wr->send_flags; + next->imm_data = (__be32 __force) user_wr->imm_data; + + if (qp->qp_type == IB_QPT_UD) { + next->wr.ud.ah = idr_find(&ib_uverbs_ah_idr, + user_wr->wr.ud.ah); + if (!next->wr.ud.ah) { + ret = -EINVAL; + goto out; + } + next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn; + next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey; + } else { + switch (next->opcode) { + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + case IB_WR_RDMA_READ: + next->wr.rdma.remote_addr = + user_wr->wr.rdma.remote_addr; + next->wr.rdma.rkey = + user_wr->wr.rdma.rkey; + break; + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + next->wr.atomic.remote_addr = + user_wr->wr.atomic.remote_addr; + next->wr.atomic.compare_add = + user_wr->wr.atomic.compare_add; + next->wr.atomic.swap = user_wr->wr.atomic.swap; + next->wr.atomic.rkey = user_wr->wr.atomic.rkey; + break; + default: + break; + } + } + + if (next->num_sge) { + next->sg_list = (void *) next + + ALIGN(sizeof *next, sizeof (struct ib_sge)); + if (copy_from_user(next->sg_list, + buf + sizeof cmd + + cmd.wr_count * cmd.wqe_size + + sg_ind * sizeof (struct ib_sge), + next->num_sge * sizeof (struct ib_sge))) { + ret = -EFAULT; + goto out; + } + sg_ind += next->num_sge; + } else + next->sg_list = NULL; + } + + resp.bad_wr = 0; + ret = qp->device->post_send(qp, wr, &bad_wr); + if (ret) + for (next = wr; next; next = next->next) { + ++resp.bad_wr; + if (next == bad_wr) + break; + } + + if (copy_to_user((void *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + up(&ib_uverbs_idr_mutex); + + while (wr) { + next = wr->next; + kfree(wr); + wr = next; + } + + kfree(user_wr); + + return ret ? ret : in_len; +} + +static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char *buf, + int in_len, + u32 wr_count, + u32 sge_count, + u32 wqe_size) +{ + struct ib_uverbs_recv_wr *user_wr; + struct ib_recv_wr *wr = NULL, *last, *next; + int sg_ind; + int i; + int ret; + + if (in_len < wqe_size * wr_count + + sge_count * sizeof (struct ib_uverbs_sge)) + return ERR_PTR(-EINVAL); + + if (wqe_size < sizeof (struct ib_uverbs_recv_wr)) + return ERR_PTR(-EINVAL); + + user_wr = kmalloc(wqe_size, GFP_KERNEL); + if (!user_wr) + return ERR_PTR(-ENOMEM); + + sg_ind = 0; + last = NULL; + for (i = 0; i < wr_count; ++i) { + if (copy_from_user(user_wr, buf + i * wqe_size, + wqe_size)) { + ret = -EFAULT; + goto err; + } + + if (user_wr->num_sge + sg_ind > sge_count) { + ret = -EINVAL; + goto err; + } + + next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + + user_wr->num_sge * sizeof (struct ib_sge), + GFP_KERNEL); + if (!next) { + ret = -ENOMEM; + goto err; + } + + if (!last) + wr = next; + else + last->next = next; + last = next; + + next->next = NULL; + next->wr_id = user_wr->wr_id; + next->num_sge = user_wr->num_sge; + + if (next->num_sge) { + next->sg_list = (void *) next + + ALIGN(sizeof *next, sizeof (struct ib_sge)); + if (copy_from_user(next->sg_list, + buf + wr_count * wqe_size + + sg_ind * sizeof (struct ib_sge), + next->num_sge * sizeof (struct ib_sge))) { + ret = -EFAULT; + goto err; + } + sg_ind += next->num_sge; + } else + next->sg_list = NULL; + } + + kfree(user_wr); + return wr; + +err: + kfree(user_wr); + + while (wr) { + next = wr->next; + kfree(wr); + wr = next; + } + + return ERR_PTR(ret); +} + +ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, + const char *buf, int in_len, + int out_len) +{ + struct ib_uverbs_post_recv cmd; + struct ib_uverbs_post_recv_resp resp; + struct ib_recv_wr *wr, *next, *bad_wr; + struct ib_qp *qp; + ssize_t ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, + in_len - sizeof cmd, cmd.wr_count, + cmd.sge_count, cmd.wqe_size); + if (IS_ERR(wr)) + return PTR_ERR(wr); + + down(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (!qp || qp->uobject->context != file->ucontext) + goto out; + + resp.bad_wr = 0; + ret = qp->device->post_recv(qp, wr, &bad_wr); + if (ret) + for (next = wr; next; next = next->next) { + ++resp.bad_wr; + if (next == bad_wr) + break; + } + + + if (copy_to_user((void *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + up(&ib_uverbs_idr_mutex); + + while (wr) { + next = wr->next; + kfree(wr); + wr = next; + } + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, + const char *buf, int in_len, + int out_len) +{ + struct ib_uverbs_post_srq_recv cmd; + struct ib_uverbs_post_srq_recv_resp resp; + struct ib_recv_wr *wr, *next, *bad_wr; + struct ib_srq *srq; + ssize_t ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, + in_len - sizeof cmd, cmd.wr_count, + cmd.sge_count, cmd.wqe_size); + if (IS_ERR(wr)) + return PTR_ERR(wr); + + down(&ib_uverbs_idr_mutex); + + srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle); + if (!srq || srq->uobject->context != file->ucontext) + goto out; + + resp.bad_wr = 0; + ret = srq->device->post_srq_recv(srq, wr, &bad_wr); + if (ret) + for (next = wr; next; next = next->next) { + ++resp.bad_wr; + if (next == bad_wr) + break; + } + + + if (copy_to_user((void *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + up(&ib_uverbs_idr_mutex); + + while (wr) { + next = wr->next; + kfree(wr); + wr = next; + } + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, + const char *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_ah cmd; + struct ib_uverbs_create_ah_resp resp; + struct ib_uobject *uobj; + struct ib_pd *pd; + struct ib_ah *ah; + struct ib_ah_attr attr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); + if (!uobj) + return -ENOMEM; + + down(&ib_uverbs_idr_mutex); + + pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); + if (!pd || pd->uobject->context != file->ucontext) { + ret = -EINVAL; + goto err_up; + } + + uobj->user_handle = cmd.user_handle; + uobj->context = file->ucontext; + + attr.dlid = cmd.attr.dlid; + attr.sl = cmd.attr.sl; + attr.src_path_bits = cmd.attr.src_path_bits; + attr.static_rate = cmd.attr.static_rate; + attr.port_num = cmd.attr.port_num; + attr.grh.flow_label = cmd.attr.grh.flow_label; + attr.grh.sgid_index = cmd.attr.grh.sgid_index; + attr.grh.hop_limit = cmd.attr.grh.hop_limit; + attr.grh.traffic_class = cmd.attr.grh.traffic_class; + memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16); + + ah = ibv_create_ah(pd, &attr); + if (IS_ERR(ah)) { + ret = PTR_ERR(ah); + goto err_up; + } + + ah->uobject = uobj; + +retry: + if (!idr_pre_get(&ib_uverbs_ah_idr, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_destroy; + } + + ret = idr_get_new(&ib_uverbs_ah_idr, ah, &uobj->id); + + if (ret == -EAGAIN) + goto retry; + if (ret) + goto err_destroy; + + resp.ah_handle = uobj->id; + + if (copy_to_user((void *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_idr; + } + + down(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->ah_list); + up(&file->mutex); + + up(&ib_uverbs_idr_mutex); + + return in_len; + +err_idr: + idr_remove(&ib_uverbs_ah_idr, uobj->id); + +err_destroy: + ibv_destroy_ah(ah); + +err_up: + up(&ib_uverbs_idr_mutex); + + kfree(uobj); + return ret; +} + +ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, + const char *buf, int in_len, int out_len) +{ + struct ib_uverbs_destroy_ah cmd; + struct ib_ah *ah; + struct ib_uobject *uobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + ah = idr_find(&ib_uverbs_ah_idr, cmd.ah_handle); + if (!ah || ah->uobject->context != file->ucontext) + goto out; + + uobj = ah->uobject; + + ret = ibv_destroy_ah(ah); + if (ret) + goto out; + + idr_remove(&ib_uverbs_ah_idr, cmd.ah_handle); + + down(&file->mutex); + list_del(&uobj->list); + up(&file->mutex); + + kfree(uobj); + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, + const char *buf, int in_len, + int out_len) +{ + struct ib_uverbs_attach_mcast cmd; + struct ib_qp *qp; + struct ib_uqp_object *uobj; + struct ib_uverbs_mcast_entry *mcast; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (!qp || qp->uobject->context != file->ucontext) + goto out; + + uobj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); + + list_for_each_entry(mcast, &uobj->mcast_list, list) + if (cmd.mlid == mcast->lid && + !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) { + ret = 0; + goto out; + } + + mcast = kmalloc(sizeof *mcast, GFP_KERNEL); + if (!mcast) { + ret = -ENOMEM; + goto out; + } + + mcast->lid = cmd.mlid; + memcpy(mcast->gid.raw, cmd.gid, sizeof mcast->gid.raw); + + ret = ibv_attach_mcast(qp, &mcast->gid, cmd.mlid); + if (!ret) { + uobj = container_of(qp->uobject, struct ib_uqp_object, + uevent.uobject); + list_add_tail(&mcast->list, &uobj->mcast_list); + } else + kfree(mcast); + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, + const char *buf, int in_len, + int out_len) +{ + struct ib_uverbs_detach_mcast cmd; + struct ib_uqp_object *uobj; + struct ib_qp *qp; + struct ib_uverbs_mcast_entry *mcast; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (!qp || qp->uobject->context != file->ucontext) + goto out; + + ret = ibv_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid); + if (ret) + goto out; + + uobj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); + + list_for_each_entry(mcast, &uobj->mcast_list, list) + if (cmd.mlid == mcast->lid && + !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) { + list_del(&mcast->list); + kfree(mcast); + break; + } + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, + const char *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_srq cmd; + struct ib_uverbs_create_srq_resp resp; + struct ib_udata udata; + struct ib_uevent_object *uobj; + struct ib_pd *pd; + struct ib_srq *srq; + struct ib_srq_init_attr attr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); + if (!uobj) + return -ENOMEM; + + down(&ib_uverbs_idr_mutex); + + pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); + + if (!pd || pd->uobject->context != file->ucontext) { + ret = -EINVAL; + goto err_up; + } + + attr.event_handler = ib_uverbs_srq_event_handler; + attr.srq_context = file; + attr.attr.max_wr = cmd.max_wr; + attr.attr.max_sge = cmd.max_sge; + attr.attr.srq_limit = cmd.srq_limit; + + uobj->uobject.user_handle = cmd.user_handle; + uobj->uobject.context = file->ucontext; + uobj->events_reported = 0; + INIT_LIST_HEAD(&uobj->event_list); + + srq = pd->device->create_srq(pd, &attr, &udata); + if (IS_ERR(srq)) { + ret = PTR_ERR(srq); + goto err_up; + } + + srq->device = pd->device; + srq->pd = pd; + srq->uobject = &uobj->uobject; + srq->event_handler = attr.event_handler; + srq->srq_context = attr.srq_context; + atomic_inc(&pd->usecnt); + atomic_set(&srq->usecnt, 0); + + memset(&resp, 0, sizeof resp); + +retry: + if (!idr_pre_get(&ib_uverbs_srq_idr, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_destroy; + } + + ret = idr_get_new(&ib_uverbs_srq_idr, srq, &uobj->uobject.id); + + if (ret == -EAGAIN) + goto retry; + if (ret) + goto err_destroy; + + resp.srq_handle = uobj->uobject.id; + + if (copy_to_user((void *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_idr; + } + + down(&file->mutex); + list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list); + up(&file->mutex); + + up(&ib_uverbs_idr_mutex); + + return in_len; + +err_idr: + idr_remove(&ib_uverbs_srq_idr, uobj->uobject.id); + +err_destroy: + ibv_destroy_srq(srq); + +err_up: + up(&ib_uverbs_idr_mutex); + + kfree(uobj); + return ret; +} + +ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, + const char *buf, int in_len, + int out_len) +{ + struct ib_uverbs_modify_srq cmd; + struct ib_srq *srq; + struct ib_srq_attr attr; + int ret; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle); + if (!srq || srq->uobject->context != file->ucontext) { + ret = -EINVAL; + goto out; + } + + attr.max_wr = cmd.max_wr; + attr.srq_limit = cmd.srq_limit; + + ret = ibv_modify_srq(srq, &attr, cmd.attr_mask); + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, + const char *buf, int in_len, + int out_len) +{ + struct ib_uverbs_destroy_srq cmd; + struct ib_uverbs_destroy_srq_resp resp; + struct ib_srq *srq; + struct ib_uevent_object *uobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + memset(&resp, 0, sizeof resp); + + srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle); + if (!srq || srq->uobject->context != file->ucontext) + goto out; + + uobj = container_of(srq->uobject, struct ib_uevent_object, uobject); + + ret = ibv_destroy_srq(srq); + if (ret) + goto out; + + idr_remove(&ib_uverbs_srq_idr, cmd.srq_handle); + + down(&file->mutex); + list_del(&uobj->uobject.list); + up(&file->mutex); + + ib_uverbs_release_uevent(file, uobj); + + resp.events_reported = uobj->events_reported; + + kfree(uobj); + + if (copy_to_user((void *) (unsigned long) cmd.response, + &resp, sizeof resp)) + ret = -EFAULT; + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + +#endif + +// ---------------------------------------------------------------------- + +struct ib_pd *ib_uverbs_alloc_pd(struct ib_device *device, + struct ib_ucontext *context, ci_umv_buf_t* const p_umv_buf) +{ + struct ib_pd *pd; + + pd = device->alloc_pd(device, context, p_umv_buf); + + if (!IS_ERR(pd)) { + pd->device = device; +#ifdef WIN_TO_BE_REMOVED + pd->uobject = NULL; +#endif + atomic_set(&pd->usecnt, 0); + } + + return pd; +} + +int ib_uverbs_dealloc_pd(struct ib_pd *pd) +{ + if (atomic_read(&pd->usecnt)) + return -EBUSY; + + return pd->device->dealloc_pd(pd); +} + +static void ib_uverbs_add_one(struct ib_device *device); +static void ib_uverbs_remove_one(struct ib_device *device); + +static struct ib_client uverbs_client = { + "uverbs", + ib_uverbs_add_one, + ib_uverbs_remove_one +}; + + +static void ib_uverbs_add_one(struct ib_device *device) +{ + struct ib_uverbs_device *uverbs_dev; + + if (!device->alloc_ucontext) + return; + + uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL); + if (!uverbs_dev) + return; + + ib_set_client_data(device, &uverbs_client, uverbs_dev); +} + +static void ib_uverbs_remove_one(struct ib_device *device) +{ + struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client); + + if (uverbs_dev) + kfree(uverbs_dev); +} + +int ib_uverbs_init(void) +{ + int ret; + + ret = ib_register_client(&uverbs_client); + if (ret) + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("user_verbs: couldn't register client\n")); + + return ret; +} + +void ib_uverbs_cleanup(void) +{ + ib_unregister_client(&uverbs_client); +} + diff --git a/branches/MTHCA/hw/mthca/kernel/mt_uverbs.h b/branches/MTHCA/hw/mthca/kernel/mt_uverbs.h new file mode 100644 index 00000000..b8e56e55 --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/mt_uverbs.h @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: uverbs.h 4227 2005-11-30 00:58:50Z roland $ + */ + +#ifndef UVERBS_H +#define UVERBS_H + +#include +#include + +/* + * Our lifetime rules for these structs are the following: + * + * struct ib_uverbs_device: One reference is held by the module and + * released in ib_uverbs_remove_one(). Another reference is taken by + * ib_uverbs_open() each time the character special file is opened, + * and released in ib_uverbs_release_file() when the file is released. + * + * struct ib_uverbs_file: One reference is held by the VFS and + * released when the file is closed. Another reference is taken when + * an asynchronous event queue file is created and released when the + * event file is closed. + * + * struct ib_uverbs_event_file: One reference is held by the VFS and + * released when the file is closed. For asynchronous event files, + * another reference is held by the corresponding main context file + * and released when that file is closed. For completion event files, + * a reference is taken when a CQ is created that uses the file, and + * released when the CQ is destroyed. + */ + +struct ib_uverbs_device { + struct ib_device *ib_dev; +#ifdef LINUX_TO_BE_CHANGED + struct kref ref; + int devnum; + struct cdev *dev; + struct class_device *class_dev; + int num_comp_vectors; +#endif +}; + +#ifdef LINUX_TO_BE_CHANGED +struct ib_uverbs_event_file { + struct kref ref; + struct file *file; + struct ib_uverbs_file *uverbs_file; + spinlock_t lock; + int is_async; + wait_queue_head_t poll_wait; + struct fasync_struct *async_queue; + struct list_head event_list; +}; + +struct ib_uverbs_file { + struct kref ref; + struct semaphore mutex; + struct ib_uverbs_device *device; + struct ib_ucontext *ucontext; + struct ib_event_handler event_handler; + struct ib_uverbs_event_file *async_file; +}; + +struct ib_uverbs_event { + union { + struct ib_uverbs_async_event_desc async; + struct ib_uverbs_comp_event_desc comp; + } desc; + struct list_head list; + struct list_head obj_list; + u32 *counter; +}; +#endif + +struct ib_uverbs_mcast_entry { + struct list_head list; + union ib_gid gid; + u16 lid; +}; + +struct ib_uevent_object { + struct ib_uobject uobject; + struct list_head event_list; + u32 events_reported; +}; + +struct ib_uqp_object { + struct ib_uevent_object uevent; + struct list_head mcast_list; +}; + +struct ib_ucq_object { + struct ib_uobject uobject; + struct ib_uverbs_file *uverbs_file; + struct list_head comp_list; + struct list_head async_list; + u32 comp_events_reported; + u32 async_events_reported; +}; + +#ifdef LIN_TO_BE_CHANGED +extern struct semaphore ib_uverbs_idr_mutex; +extern struct idr ib_uverbs_pd_idr; +extern struct idr ib_uverbs_mr_idr; +extern struct idr ib_uverbs_mw_idr; +extern struct idr ib_uverbs_ah_idr; +extern struct idr ib_uverbs_cq_idr; +extern struct idr ib_uverbs_qp_idr; +extern struct idr ib_uverbs_srq_idr; +#endif + +struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, + int is_async, int *fd); +void ib_uverbs_release_event_file(struct kref *ref); +struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd); + +void ib_uverbs_release_ucq(struct ib_uverbs_file *file, + struct ib_uverbs_event_file *ev_file, + struct ib_ucq_object *uobj); +void ib_uverbs_release_uevent(struct ib_uverbs_file *file, + struct ib_uevent_object *uobj); + +void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context); +void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr); +void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); +void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); +void ib_uverbs_event_handler(struct ib_event_handler *handler, + struct ib_event *event); + +int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, + void *addr, size_t size, int write); +void ib_umem_release(struct ib_device *dev, struct ib_umem *umem); +void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem); + +#ifdef LIN_TO_BE_CHANGED +#define IB_UVERBS_DECLARE_CMD(name) \ + ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ + const char *buf, int in_len, \ + int out_len) +#endif + +IB_UVERBS_DECLARE_CMD(get_context); +IB_UVERBS_DECLARE_CMD(query_device); +IB_UVERBS_DECLARE_CMD(query_port); +IB_UVERBS_DECLARE_CMD(alloc_pd); +IB_UVERBS_DECLARE_CMD(dealloc_pd); +IB_UVERBS_DECLARE_CMD(reg_mr); +IB_UVERBS_DECLARE_CMD(dereg_mr); +IB_UVERBS_DECLARE_CMD(create_comp_channel); +IB_UVERBS_DECLARE_CMD(create_cq); +IB_UVERBS_DECLARE_CMD(poll_cq); +IB_UVERBS_DECLARE_CMD(req_notify_cq); +IB_UVERBS_DECLARE_CMD(destroy_cq); +IB_UVERBS_DECLARE_CMD(create_qp); +IB_UVERBS_DECLARE_CMD(modify_qp); +IB_UVERBS_DECLARE_CMD(destroy_qp); +IB_UVERBS_DECLARE_CMD(post_send); +IB_UVERBS_DECLARE_CMD(post_recv); +IB_UVERBS_DECLARE_CMD(post_srq_recv); +IB_UVERBS_DECLARE_CMD(create_ah); +IB_UVERBS_DECLARE_CMD(destroy_ah); +IB_UVERBS_DECLARE_CMD(attach_mcast); +IB_UVERBS_DECLARE_CMD(detach_mcast); +IB_UVERBS_DECLARE_CMD(create_srq); +IB_UVERBS_DECLARE_CMD(modify_srq); +IB_UVERBS_DECLARE_CMD(destroy_srq); + +struct ib_pd *ib_uverbs_alloc_pd(struct ib_device *device, + struct ib_ucontext *context, ci_umv_buf_t* const p_umv_buf); + +int ib_uverbs_dealloc_pd(struct ib_pd *pd); + +#endif /* UVERBS_H */ diff --git a/branches/MTHCA/hw/mthca/kernel/mt_uverbsmem.c b/branches/MTHCA/hw/mthca/kernel/mt_uverbsmem.c new file mode 100644 index 00000000..5f507548 --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/mt_uverbsmem.c @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: uverbs_mem.c 2783 2005-07-05 02:21:08Z roland $ + */ + +#include "ib_verbs.h" + +void ibv_umem_release(struct ib_device *dev, struct ib_umem *umem) +{ + struct ib_umem_chunk *chunk, *tmp; + int i; + + list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list,struct ib_umem_chunk,struct ib_umem_chunk) { + pci_unmap_sg((struct mthca_dev *)dev, chunk->page_list, + chunk->nents, PCI_DMA_BIDIRECTIONAL); + for (i = 0; i < chunk->nents; ++i) { + put_page(&chunk->page_list[i]); + } + kfree(chunk); + } +} + +int ibv_umem_get(struct ib_device *dev, struct ib_umem *mem, + void *addr, size_t size, int write) +{ + struct ib_umem_chunk *chunk, *last_chunk; + u64 cur_base; + unsigned long npages, nupages; + int ret = -ENOMEM; + int off; + int i; + + /* fill mem */ + mem->user_base = (u64)(UINT_PTR)addr; + mem->length = size; + mem->offset = (int)(((u64)(UINT_PTR) addr) & ~PAGE_MASK); + mem->page_size = PAGE_SIZE; + mem->writable = write; + INIT_LIST_HEAD(&mem->chunk_list); + + /* build sg list */ + npages = NEXT_PAGE_ALIGN(size + mem->offset) >> PAGE_SHIFT; + cur_base = (u64)(UINT_PTR)addr & PAGE_MASK; + while (npages) { + /* allocate a max large chunk (it's <= PAGE_SIZE) */ + chunk = kzalloc(sizeof *chunk + sizeof (struct scatterlist) * + IB_UMEM_MAX_PAGE_CHUNK, GFP_KERNEL); + if (!chunk) + goto err_kmalloc; + list_add_tail(&chunk->list, &mem->chunk_list); + + /* fill the chunk */ + for (i=0; i < IB_UMEM_MAX_PAGE_CHUNK; i++) { + + /* map a one page */ + ret = get_user_pages((struct mthca_dev *)dev, cur_base, + 1, write, &chunk->page_list[i] ); + if (ret < 0) + goto out; + + /* update the chunk */ + chunk->nents++; /* number of sg elements */ + + /* calculate the rest of the buffer to handle */ + cur_base += PAGE_SIZE; + if (!--npages) + break; + } + + /* map all chunk pages */ + chunk->nmap = pci_map_sg((struct mthca_dev *)dev, + chunk->page_list, chunk->nents, PCI_DMA_BIDIRECTIONAL); + if (chunk->nmap <= 0) + goto out; + + } + + /* shorten the last chunk */ + ret = 0; /* if we get here - all is OK */ + last_chunk = chunk; + chunk = kzalloc(sizeof *chunk + sizeof (struct scatterlist) * + chunk->nents, GFP_KERNEL); + if (!chunk) + goto err_kmalloc; + memcpy( chunk, last_chunk, sizeof *last_chunk + sizeof (struct scatterlist) * + last_chunk->nents); + list_del(&last_chunk->list); + list_add_tail(&chunk->list, &mem->chunk_list); + kfree(last_chunk); + goto exit; + +out: + ibv_umem_release(dev, mem); +err_kmalloc: +exit: + return ret; +} + + diff --git a/branches/MTHCA/hw/mthca/kernel/mt_verbs.c b/branches/MTHCA/hw/mthca/kernel/mt_verbs.c index da54c0a6..b4ad3d8d 100644 --- a/branches/MTHCA/hw/mthca/kernel/mt_verbs.c +++ b/branches/MTHCA/hw/mthca/kernel/mt_verbs.c @@ -40,54 +40,159 @@ #include #include +#include "mthca_dev.h" +#include "mthca_user.h" +#include "ib_user_verbs.h" + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mt_verbs.tmh" +#endif + + +void ibv_um_close( struct ib_ucontext * h_um_ca ) +{ + int err; + ib_api_status_t status; + struct ib_ucontext *context_p = (struct ib_ucontext *)h_um_ca; + + HCA_ENTER(HCA_DBG_SHIM); + + context_p->is_removing = TRUE; + + if (atomic_read(&context_p->usecnt)) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("resources are not released (cnt %d)\n", context_p->usecnt)); + status = IB_RESOURCE_BUSY; + goto err_usage; + } + + err = ibv_dealloc_pd( context_p->pd ); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("ibv_dealloc_pd failed (%d)\n", err)); + status = errno_to_iberr(err); + } + + err = mthca_dealloc_ucontext(context_p); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("mthca_dealloc_ucontext failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_dealloc_ucontext; + } + + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM , ("pcs %p\n", PsGetCurrentProcess()) ); + status = IB_SUCCESS; + goto end; + +err_dealloc_ucontext: +err_usage: +end: + HCA_PRINT_EXIT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("completes with ERROR status %s\n", ib_get_err_str(status))); + return; +} /* Protection domains */ -struct ib_pd *ib_alloc_pd(struct ib_device *device) +struct ib_pd *ibv_alloc_pd(struct ib_device *device, + struct ib_ucontext *context, ci_umv_buf_t* const p_umv_buf) { struct ib_pd *pd; - pd = device->alloc_pd(device, NULL, NULL); + pd = device->alloc_pd(device, context, p_umv_buf); if (!IS_ERR(pd)) { pd->device = device; - pd->uobject = NULL; + pd->ucontext = context; atomic_set(&pd->usecnt, 0); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM ,("PD%d use cnt %d, pd_handle %p, ctx %p \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt, pd, pd->ucontext)); } return pd; } -EXPORT_SYMBOL(ib_alloc_pd); -int ib_dealloc_pd(struct ib_pd *pd) +int ibv_dealloc_pd(struct ib_pd *pd) { - if (atomic_read(&pd->usecnt)) + if (atomic_read(&pd->usecnt)) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("resources are not released (cnt %d)\n", pd->usecnt)); return -EBUSY; + } + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM ,("PD%d use cnt %d, pd_handle %p, ctx %p \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt, pd, pd->ucontext)); return pd->device->dealloc_pd(pd); } -EXPORT_SYMBOL(ib_dealloc_pd); /* Address handles */ -struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +struct ib_ah *ibv_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_ucontext *context, ci_umv_buf_t* const p_umv_buf) { + int err; struct ib_ah *ah; + struct ib_mr *ib_mr; + u64 start; + + // for user call we need also allocate MR + if (context && p_umv_buf && p_umv_buf->p_inout_buf) { + struct ibv_create_ah *create_ah = (struct ibv_create_ah *)p_umv_buf->p_inout_buf; + + // create region + ib_mr = ibv_reg_mr( + pd, + create_ah->mr.access_flags, + (void*)(ULONG_PTR)create_ah->mr.start, + create_ah->mr.length, create_ah->mr.hca_va, TRUE ); + if (IS_ERR(ib_mr)) { + err = PTR_ERR(ib_mr); + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_AV ,("ibv_reg_mr failed (%d)\n", err)); + goto err_alloc_mr; + } + + start = create_ah->mr.start; + } ah = pd->device->create_ah(pd, ah_attr); - if (!IS_ERR(ah)) { - ah->device = pd->device; - ah->pd = pd; - ah->uobject = NULL; - atomic_inc(&pd->usecnt); + if (IS_ERR(ah)) { + err = PTR_ERR(ah); + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_AV ,("create_ah failed (%d)\n", err)); + goto err_create_ah; + } + + // fill results + ah->device = pd->device; + ah->pd = pd; + ah->ucontext = context; + atomic_inc(&pd->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_AV ,("PD%d use cnt %d, pd_handle %p, ctx %p \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt, pd, pd->ucontext)); + if (context) + atomic_inc(&context->usecnt); + + // fill results for user + if (context && p_umv_buf && p_umv_buf->p_inout_buf) { + struct ibv_create_ah_resp *create_ah_resp = (struct ibv_create_ah_resp *)p_umv_buf->p_inout_buf; + ah->ib_mr = ib_mr; + create_ah_resp->start = start; + create_ah_resp->mr.lkey = ib_mr->lkey; + create_ah_resp->mr.rkey = ib_mr->rkey; + create_ah_resp->mr.mr_handle = (u64)(ULONG_PTR)ib_mr; + p_umv_buf->output_size = sizeof(struct ibv_create_ah_resp); } return ah; + +err_create_ah: + ibv_dereg_mr(ib_mr); +err_alloc_mr: + if( p_umv_buf && p_umv_buf->command ) + p_umv_buf->status = IB_ERROR; + return ERR_PTR(ib_mr); } -EXPORT_SYMBOL(ib_create_ah); -struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc, +struct ib_ah *ibv_create_ah_from_wc(struct ib_pd *pd, struct _ib_wc *wc, struct ib_grh *grh, u8 port_num) { struct ib_ah_attr ah_attr; @@ -96,12 +201,12 @@ struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc, int ret; memset(&ah_attr, 0, sizeof ah_attr); - ah_attr.dlid = wc->slid; - ah_attr.sl = wc->sl; - ah_attr.src_path_bits = wc->dlid_path_bits; + ah_attr.dlid = wc->recv.ud.remote_lid; + ah_attr.sl = wc->recv.ud.remote_sl; + ah_attr.src_path_bits = wc->recv.ud.path_bits; ah_attr.port_num = port_num; - if (wc->wc_flags & IB_WC_GRH) { + if (wc->recv.ud.recv_opt & IB_RECV_OPT_GRH_VALID) { ah_attr.ah_flags = IB_AH_GRH; ah_attr.grh.dgid = grh->dgid; @@ -111,49 +216,69 @@ struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc, return ERR_PTR(ret); ah_attr.grh.sgid_index = (u8) gid_index; - flow_class = be32_to_cpu(grh->version_tclass_flow); + flow_class = cl_ntoh32(grh->version_tclass_flow); ah_attr.grh.flow_label = flow_class & 0xFFFFF; ah_attr.grh.traffic_class = (flow_class >> 20) & 0xFF; ah_attr.grh.hop_limit = grh->hop_limit; } - return ib_create_ah(pd, &ah_attr); + return ibv_create_ah(pd, &ah_attr, NULL, NULL); } -EXPORT_SYMBOL(ib_create_ah_from_wc); -int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) +int ibv_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) { return ah->device->modify_ah ? ah->device->modify_ah(ah, ah_attr) : -ENOSYS; } -EXPORT_SYMBOL(ib_modify_ah); -int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) +int ibv_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) { return ah->device->query_ah ? ah->device->query_ah(ah, ah_attr) : -ENOSYS; } -EXPORT_SYMBOL(ib_query_ah); -int ib_destroy_ah(struct ib_ah *ah) + +static void release_user_cq_qp_resources( + struct ib_ucontext *ucontext, + struct ib_mr * ib_mr) +{ + if (ucontext) { + ibv_dereg_mr( ib_mr ); + atomic_dec(&ucontext->usecnt); + if (!atomic_read(&ucontext->usecnt) && ucontext->is_removing) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("User resources are released. Removing context\n")); + ibv_um_close(ucontext); + } + } +} + +int ibv_destroy_ah(struct ib_ah *ah) { struct ib_pd *pd; int ret; + struct ib_ucontext *ucontext; + struct ib_mr * ib_mr; pd = ah->pd; + ucontext = ah->ucontext; + ib_mr = ah->ib_mr; + ret = ah->device->destroy_ah(ah); - if (!ret) + if (!ret) { atomic_dec(&pd->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_AV ,("PD%d use cnt %d, pd_handle %p, ctx %p \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt, pd, pd->ucontext)); + } + release_user_cq_qp_resources(ucontext, ib_mr); return ret; } -EXPORT_SYMBOL(ib_destroy_ah); /* Shared receive queues */ -struct ib_srq *ib_create_srq(struct ib_pd *pd, +struct ib_srq *ibv_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr) { struct ib_srq *srq; @@ -171,29 +296,28 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd, srq->srq_context = srq_init_attr->srq_context; atomic_inc(&pd->usecnt); atomic_set(&srq->usecnt, 0); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM ,("PD%d use cnt %d \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt)); } return srq; } -EXPORT_SYMBOL(ib_create_srq); -int ib_modify_srq(struct ib_srq *srq, +int ibv_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask) { return srq->device->modify_srq(srq, srq_attr, srq_attr_mask); } -EXPORT_SYMBOL(ib_modify_srq); -int ib_query_srq(struct ib_srq *srq, +int ibv_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr) { return srq->device->query_srq ? srq->device->query_srq(srq, srq_attr) : -ENOSYS; } -EXPORT_SYMBOL(ib_query_srq); -int ib_destroy_srq(struct ib_srq *srq) +int ibv_destroy_srq(struct ib_srq *srq) { struct ib_pd *pd; int ret; @@ -204,52 +328,117 @@ int ib_destroy_srq(struct ib_srq *srq) pd = srq->pd; ret = srq->device->destroy_srq(srq); - if (!ret) + if (!ret) { atomic_dec(&pd->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM ,("PD%d use cnt %d \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt)); + } return ret; } -EXPORT_SYMBOL(ib_destroy_srq); /* Queue pairs */ -struct ib_qp *ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr) +struct ib_qp *ibv_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + struct ib_ucontext *context, ci_umv_buf_t* const p_umv_buf) { - struct ib_qp *qp; + int err; + struct ib_qp *ib_qp; + struct ib_mr *ib_mr; + u64 user_handle; + + // for user call we need also allocate MR + if (context && p_umv_buf && p_umv_buf->p_inout_buf) { + int err; + struct ibv_create_qp *create_qp = (struct ibv_create_qp *)p_umv_buf->p_inout_buf; + + // create region + ib_mr = ibv_reg_mr( + (struct ib_pd *)(ULONG_PTR)create_qp->mr.pd_handle, + create_qp->mr.access_flags, + (void*)(ULONG_PTR)create_qp->mr.start, + create_qp->mr.length, create_qp->mr.hca_va, TRUE ); + if (IS_ERR(ib_mr)) { + err = PTR_ERR(ib_mr); + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_LOW ,("ibv_reg_mr failed (%d)\n", err)); + goto err_alloc_mr; + } + create_qp->lkey = ib_mr->lkey; + user_handle = create_qp->user_handle; + } - qp = pd->device->create_qp(pd, qp_init_attr, NULL); + ib_qp = pd->device->create_qp(pd, qp_init_attr, p_umv_buf); - if (!IS_ERR(qp)) { - qp->device = pd->device; - qp->pd = pd; - qp->send_cq = qp_init_attr->send_cq; - qp->recv_cq = qp_init_attr->recv_cq; - qp->srq = qp_init_attr->srq; - qp->uobject = NULL; - qp->event_handler = qp_init_attr->event_handler; - qp->qp_context = qp_init_attr->qp_context; - qp->qp_type = qp_init_attr->qp_type; - atomic_inc(&pd->usecnt); - atomic_inc(&qp_init_attr->send_cq->usecnt); - atomic_inc(&qp_init_attr->recv_cq->usecnt); - if (qp_init_attr->srq) - atomic_inc(&qp_init_attr->srq->usecnt); + if (IS_ERR(ib_qp)) { + err = PTR_ERR(ib_qp); + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_LOW ,("create_qp failed (%d)\n", err)); + goto err_create_qp; + } + + // fill results + ib_qp->device = pd->device; + ib_qp->pd = pd; + ib_qp->send_cq = qp_init_attr->send_cq; + ib_qp->recv_cq = qp_init_attr->recv_cq; + ib_qp->srq = qp_init_attr->srq; + ib_qp->ucontext = context; + ib_qp->event_handler = qp_init_attr->event_handler; + ib_qp->qp_context = qp_init_attr->qp_context; + ib_qp->qp_type = qp_init_attr->qp_type; + atomic_inc(&pd->usecnt); + atomic_inc(&qp_init_attr->send_cq->usecnt); + atomic_inc(&qp_init_attr->recv_cq->usecnt); + if (qp_init_attr->srq) + atomic_inc(&qp_init_attr->srq->usecnt); + if (context) + atomic_inc(&context->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM ,("PD%d use cnt %d, pd_handle %p, ctx %p \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt, pd, pd->ucontext)); + + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_LOW , + ("uctx %p, qhndl %p, qnum %#x, q_num %#x, scq %#x:%#x, rcq %#x:%#x \n", + pd->ucontext, ib_qp, ((struct mthca_qp*)ib_qp)->qpn, ib_qp->qp_num, + ((struct mthca_cq*)ib_qp->send_cq)->cqn, ib_qp->send_cq->cqe, + ((struct mthca_cq*)ib_qp->recv_cq)->cqn, ib_qp->recv_cq->cqe ) ); + + // fill results for user + if (context && p_umv_buf && p_umv_buf->p_inout_buf) { + struct mthca_qp *qp = (struct mthca_qp *)ib_qp; + struct ibv_create_qp_resp *create_qp_resp = (struct ibv_create_qp_resp *)p_umv_buf->p_inout_buf; + ib_qp->ib_mr = ib_mr; + create_qp_resp->qpn = ib_qp->qp_num; + create_qp_resp->user_handle = user_handle; + create_qp_resp->mr.lkey = ib_mr->lkey; + create_qp_resp->mr.rkey = ib_mr->rkey; + create_qp_resp->mr.mr_handle = (u64)(ULONG_PTR)ib_mr; + create_qp_resp->qp_handle = (__u64)(ULONG_PTR)qp; + create_qp_resp->max_send_wr = qp->sq.max; + create_qp_resp->max_recv_wr = qp->rq.max; + create_qp_resp->max_send_sge = qp->sq.max_gs; + create_qp_resp->max_recv_sge = qp->rq.max_gs; + create_qp_resp->max_inline_data = qp->max_inline_data; + p_umv_buf->output_size = sizeof(struct ibv_create_qp_resp); } - return qp; + return ib_qp; + +err_create_qp: + ibv_dereg_mr(ib_mr); +err_alloc_mr: + if( p_umv_buf && p_umv_buf->command ) + p_umv_buf->status = IB_ERROR; + return ERR_PTR(ib_mr); } -EXPORT_SYMBOL(ib_create_qp); -int ib_modify_qp(struct ib_qp *qp, +int ibv_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask) { return qp->device->modify_qp(qp, qp_attr, qp_attr_mask); } -EXPORT_SYMBOL(ib_modify_qp); -int ib_query_qp(struct ib_qp *qp, +int ibv_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) @@ -258,67 +447,127 @@ int ib_query_qp(struct ib_qp *qp, qp->device->query_qp(qp, qp_attr, qp_attr_mask, qp_init_attr) : -ENOSYS; } -EXPORT_SYMBOL(ib_query_qp); - -int ib_destroy_qp(struct ib_qp *qp) + +int ibv_destroy_qp(struct ib_qp *qp) { struct ib_pd *pd; struct ib_cq *scq, *rcq; struct ib_srq *srq; int ret; + struct ib_ucontext *ucontext; + struct ib_mr * ib_mr; pd = qp->pd; scq = qp->send_cq; rcq = qp->recv_cq; srq = qp->srq; + ucontext = pd->ucontext; + ib_mr = qp->ib_mr; ret = qp->device->destroy_qp(qp); if (!ret) { atomic_dec(&pd->usecnt); atomic_dec(&scq->usecnt); atomic_dec(&rcq->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM ,("PD%d use cnt %d, pd_handle %p, ctx %p \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt, pd, pd->ucontext)); if (srq) atomic_dec(&srq->usecnt); + release_user_cq_qp_resources(ucontext, ib_mr); } return ret; } -EXPORT_SYMBOL(ib_destroy_qp); /* Completion queues */ -struct ib_cq *ib_create_cq(struct ib_device *device, +struct ib_cq *ibv_create_cq(struct ib_device *device, ib_comp_handler comp_handler, void (*event_handler)(struct ib_event *, void *), - void *cq_context, int cqe) + void *cq_context, int cqe, + struct ib_ucontext *context, ci_umv_buf_t* const p_umv_buf) { + int err; struct ib_cq *cq; - - cq = device->create_cq(device, cqe, NULL, NULL); - - if (!IS_ERR(cq)) { - cq->device = device; - cq->uobject = NULL; - cq->comp_handler = comp_handler; - cq->event_handler = event_handler; - cq->cq_context = cq_context; - atomic_set(&cq->usecnt, 0); + struct ib_mr *ib_mr; + u64 user_handle; + + // for user call we need also allocate MR + if (context && p_umv_buf && p_umv_buf->p_inout_buf) { + struct ibv_create_cq *create_cq = (struct ibv_create_cq *)p_umv_buf->p_inout_buf; + + // create region + ib_mr = ibv_reg_mr( + (struct ib_pd *)(ULONG_PTR)create_cq->mr.pd_handle, + create_cq->mr.access_flags, + (void*)(ULONG_PTR)create_cq->mr.start, + create_cq->mr.length, create_cq->mr.hca_va, TRUE ); + if (IS_ERR(ib_mr)) { + err = PTR_ERR(ib_mr); + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_LOW ,("ibv_reg_mr failed (%d)\n", err)); + goto err_alloc_mr; + } + user_handle = create_cq->user_handle; + create_cq->lkey = ib_mr->lkey; + } + + // create cq + cq = device->create_cq(device, cqe, context, p_umv_buf); + if (IS_ERR(cq)) { + err = PTR_ERR(cq); + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_LOW ,("create_qp failed (%d)\n", err)); + goto err_create_cq; } + cq->device = device; + cq->ucontext = context; + cq->comp_handler = comp_handler; + cq->event_handler = event_handler; + cq->cq_context = cq_context; + atomic_set(&cq->usecnt, 0); + if (context) + atomic_inc(&context->usecnt); + + // fill results + if (context && p_umv_buf && p_umv_buf->p_inout_buf) { + struct ibv_create_cq_resp *create_cq_resp = (struct ibv_create_cq_resp *)p_umv_buf->p_inout_buf; + cq->ib_mr = ib_mr; + create_cq_resp->user_handle = user_handle; + create_cq_resp->mr.lkey = ib_mr->lkey; + create_cq_resp->mr.rkey = ib_mr->rkey; + create_cq_resp->mr.mr_handle = (u64)(ULONG_PTR)ib_mr; + create_cq_resp->cq_handle = (u64)(ULONG_PTR)cq; + create_cq_resp->cqe = cq->cqe; + p_umv_buf->output_size = sizeof(struct ibv_create_cq_resp); + } + return cq; + +err_create_cq: + ibv_dereg_mr(ib_mr); +err_alloc_mr: + if( p_umv_buf && p_umv_buf->command ) + p_umv_buf->status = IB_ERROR; + return ERR_PTR(err); } -EXPORT_SYMBOL(ib_create_cq); -int ib_destroy_cq(struct ib_cq *cq) +int ibv_destroy_cq(struct ib_cq *cq) { + int ret; + struct ib_ucontext *ucontext = cq->ucontext; + struct ib_mr * ib_mr = cq->ib_mr; + if (atomic_read(&cq->usecnt)) return -EBUSY; - return cq->device->destroy_cq(cq); + ret = cq->device->destroy_cq(cq); + + release_user_cq_qp_resources(ucontext, ib_mr); + + return ret; } -EXPORT_SYMBOL(ib_destroy_cq); -int ib_resize_cq(struct ib_cq *cq, +int ibv_resize_cq(struct ib_cq *cq, int cqe) { int ret; @@ -332,11 +581,51 @@ int ib_resize_cq(struct ib_cq *cq, return ret; } -EXPORT_SYMBOL(ib_resize_cq); /* Memory regions */ -struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags) +struct ib_mr *ibv_reg_mr(struct ib_pd *pd, + mthca_qp_access_t mr_access_flags, + void* __ptr64 vaddr, + uint64_t length, + uint64_t hca_va, + boolean_t um_call + ) +{ + struct ib_mr *ib_mr; + int ret; + + /* sanity check */ + if (!um_call) { + ret = -ENOSYS; + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_LOW ,("ibv_reg_mr for kernel mode is not supported (%d)\n", ret)); + goto err_not_supported; + } + + ib_mr = pd->device->reg_user_mr(pd, vaddr, length, hca_va, mr_access_flags); + if (IS_ERR(ib_mr)) { + ret = PTR_ERR(ib_mr); + goto err_reg_user_mr; + } + + ib_mr->device = pd->device; + ib_mr->pd = pd; +#ifdef LINUX_TO_BE_REMOVED + ib_mr->uobject = &obj->uobject; +#endif + atomic_inc(&pd->usecnt); + atomic_set(&ib_mr->usecnt, 0); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM ,("PD%d use cnt %d, pd_handle %p, ctx %p \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt, pd, pd->ucontext)); + + return ib_mr; + +err_reg_user_mr: +err_not_supported: + return ERR_PTR(ret); +} + +struct ib_mr *ibv_get_dma_mr(struct ib_pd *pd, mthca_qp_access_t mr_access_flags) { struct ib_mr *mr; @@ -345,19 +634,22 @@ struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags) if (!IS_ERR(mr)) { mr->device = pd->device; mr->pd = pd; - mr->uobject = NULL; + #ifdef LINUX_TO_BE_REMOVED + mr->uobject = NULL; + #endif atomic_inc(&pd->usecnt); atomic_set(&mr->usecnt, 0); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM ,("PD%d use cnt %d \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt)); } return mr; } -EXPORT_SYMBOL(ib_get_dma_mr); -struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd, +struct ib_mr *ibv_reg_phys_mr(struct ib_pd *pd, struct ib_phys_buf *phys_buf_array, int num_phys_buf, - int mr_access_flags, + mthca_qp_access_t mr_access_flags, u64 *iova_start) { struct ib_mr *mr; @@ -368,21 +660,24 @@ struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd, if (!IS_ERR(mr)) { mr->device = pd->device; mr->pd = pd; - mr->uobject = NULL; +#ifdef LINUX_TO_BE_REMOVED + mr->uobject = NULL; +#endif atomic_inc(&pd->usecnt); atomic_set(&mr->usecnt, 0); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM ,("PD%d use cnt %d \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt)); } return mr; } -EXPORT_SYMBOL(ib_reg_phys_mr); -int ib_rereg_phys_mr(struct ib_mr *mr, +int ibv_rereg_phys_mr(struct ib_mr *mr, int mr_rereg_mask, struct ib_pd *pd, struct ib_phys_buf *phys_buf_array, int num_phys_buf, - int mr_access_flags, + mthca_qp_access_t mr_access_flags, u64 *iova_start) { struct ib_pd *old_pd; @@ -403,39 +698,41 @@ int ib_rereg_phys_mr(struct ib_mr *mr, if (!ret && (mr_rereg_mask & IB_MR_REREG_PD)) { atomic_dec(&old_pd->usecnt); atomic_inc(&pd->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM ,("PD%d use cnt %d \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt)); } return ret; } -EXPORT_SYMBOL(ib_rereg_phys_mr); -int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr) +int ibv_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr) { return mr->device->query_mr ? mr->device->query_mr(mr, mr_attr) : -ENOSYS; } -EXPORT_SYMBOL(ib_query_mr); -int ib_dereg_mr(struct ib_mr *mr) +int ibv_dereg_mr(struct ib_mr *mr) { - struct ib_pd *pd; int ret; + struct ib_pd *pd; if (atomic_read(&mr->usecnt)) return -EBUSY; pd = mr->pd; ret = mr->device->dereg_mr(mr); - if (!ret) + if (!ret) { atomic_dec(&pd->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM ,("PD%d use cnt %d, pd_handle %p, ctx %p \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt, pd, pd->ucontext)); + } return ret; } -EXPORT_SYMBOL(ib_dereg_mr); /* Memory windows */ -struct ib_mw *ib_alloc_mw(struct ib_pd *pd) +struct ib_mw *ibv_alloc_mw(struct ib_pd *pd) { struct ib_mw *mw; @@ -446,32 +743,37 @@ struct ib_mw *ib_alloc_mw(struct ib_pd *pd) if (!IS_ERR(mw)) { mw->device = pd->device; mw->pd = pd; - mw->uobject = NULL; +#ifdef LINUX_TO_BE_REMOVED + mr->uobject = NULL; +#endif atomic_inc(&pd->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM ,("PD%d use cnt %d \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt)); } return mw; } -EXPORT_SYMBOL(ib_alloc_mw); -int ib_dealloc_mw(struct ib_mw *mw) +int ibv_dealloc_mw(struct ib_mw *mw) { struct ib_pd *pd; int ret; pd = mw->pd; ret = mw->device->dealloc_mw(mw); - if (!ret) + if (!ret) { atomic_dec(&pd->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM ,("PD%d use cnt %d \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt)); + } return ret; } -EXPORT_SYMBOL(ib_dealloc_mw); /* "Fast" memory regions */ -struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd, - int mr_access_flags, +struct ib_fmr *ibv_alloc_fmr(struct ib_pd *pd, + mthca_qp_access_t mr_access_flags, struct ib_fmr_attr *fmr_attr) { struct ib_fmr *fmr; @@ -484,13 +786,14 @@ struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd, fmr->device = pd->device; fmr->pd = pd; atomic_inc(&pd->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM ,("PD%d use cnt %d \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt)); } return fmr; } -EXPORT_SYMBOL(ib_alloc_fmr); -int ib_unmap_fmr(struct list_head *fmr_list) +int ibv_unmap_fmr(struct list_head *fmr_list) { struct ib_fmr *fmr; @@ -500,36 +803,41 @@ int ib_unmap_fmr(struct list_head *fmr_list) fmr = list_entry(fmr_list->next, struct ib_fmr, list); return fmr->device->unmap_fmr(fmr_list); } -EXPORT_SYMBOL(ib_unmap_fmr); -int ib_dealloc_fmr(struct ib_fmr *fmr) +int ibv_dealloc_fmr(struct ib_fmr *fmr) { struct ib_pd *pd; int ret; pd = fmr->pd; ret = fmr->device->dealloc_fmr(fmr); - if (!ret) + if (!ret) { atomic_dec(&pd->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM ,("PD%d use cnt %d \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt)); + } return ret; } -EXPORT_SYMBOL(ib_dealloc_fmr); /* Multicast groups */ -int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) +int ibv_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) { - return qp->device->attach_mcast ? - qp->device->attach_mcast(qp, gid, lid) : - -ENOSYS; + if (!qp->device->attach_mcast) + return -ENOSYS; + if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UNRELIABLE_DGRM) + return -EINVAL; + + return qp->device->attach_mcast(qp, gid, lid); } -EXPORT_SYMBOL(ib_attach_mcast); -int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) +int ibv_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) { - return qp->device->detach_mcast ? - qp->device->detach_mcast(qp, gid, lid) : - -ENOSYS; + if (!qp->device->detach_mcast) + return -ENOSYS; + if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UNRELIABLE_DGRM) + return -EINVAL; + + return qp->device->detach_mcast(qp, gid, lid); } -EXPORT_SYMBOL(ib_detach_mcast); diff --git a/branches/MTHCA/hw/mthca/kernel/mthca.h b/branches/MTHCA/hw/mthca/kernel/mthca.h index 456d47ba..9570421a 100644 --- a/branches/MTHCA/hw/mthca/kernel/mthca.h +++ b/branches/MTHCA/hw/mthca/kernel/mthca.h @@ -1,9 +1,6 @@ #ifndef MTHCA_H #define MTHCA_H -#include "hca_driver.h" -#include "mthca_dev.h" - NTSTATUS mthca_init_one(hca_dev_ext_t *ext); void mthca_remove_one(hca_dev_ext_t *ext); int mthca_get_dev_info(struct mthca_dev *mdev, __be64 *node_guid, u32 *hw_id); diff --git a/branches/MTHCA/hw/mthca/kernel/mthca.inf b/branches/MTHCA/hw/mthca/kernel/mthca.inf index 2fb94d5e..986fbccb 100644 --- a/branches/MTHCA/hw/mthca/kernel/mthca.inf +++ b/branches/MTHCA/hw/mthca/kernel/mthca.inf @@ -7,7 +7,8 @@ Class=InfiniBandHca ClassGUID=%HcaClassGuid% Provider=%MTL% CatalogFile=mthca.cat -DriverVer=09/10/2005,1.0.3 +; must be synchronized with MTHCA_DEV.H +DriverVer=03/01/2006,1.0.4 ; ================= Destination directory section ===================== @@ -15,7 +16,7 @@ DriverVer=09/10/2005,1.0.3 DefaultDestDir=%DIRID_DRIVERS% ClassCopyFiles=%DIRID_SYSTEM% MTHCA.UMCopyFiles=%DIRID_SYSTEM% -MTHCA.WOW64CopyFiles=%DIRID_SYSTEM_X86% +MTHCA.WOW64CopyFiles=%DIRID_WINDOWS%\SysWOW64 ; ================= Class Install section ===================== @@ -49,7 +50,7 @@ ibal.sys=1 mthca.sys=1 ; 2 lines excluded temporary ;mthcau.dll=1 -;mthcaud.dll=1 +mthcaud.dll=1 [SourceDisksFiles.amd64] IbInstaller.dll=1 @@ -57,7 +58,7 @@ ibal.sys=1 mthca.sys=1 ; 2 lines excluded temporary ;mthcau.dll=1 -;mthcaud.dll=1 +mthcaud.dll=1 ;uvpd32.dll=1 ;uvpd32d.dll=1 @@ -67,7 +68,7 @@ ibal.sys=1 mthca.sys=1 ; 2 lines excluded temporary ;mthcau.dll=1 -;mthcaud.dll=1 +mthcaud.dll=1 ;uvpd32.dll=1 ;uvpd32d.dll=1 @@ -114,15 +115,15 @@ CopyFiles = MTHCA.UMCopyFiles CopyFiles = MTHCA.WOW64CopyFiles [MTHCA.DDInstall.ntx86.Services] -AddService = mthca,%SPSVCINST_ASSOCSERVICE%,MTHCA.ServiceInstall +AddService = mthca,%SPSVCINST_ASSOCSERVICE%,MTHCA.ServiceInstall,MTHCA.EventLog AddService = ibal,%SPSVCINST_NULL%,Ibal.ServiceInstall [MTHCA.DDInstall.ntamd64.Services] -AddService = mthca,%SPSVCINST_ASSOCSERVICE%,MTHCA.ServiceInstall +AddService = mthca,%SPSVCINST_ASSOCSERVICE%,MTHCA.ServiceInstall,MTHCA.EventLog AddService = ibal,%SPSVCINST_NULL%,Ibal.ServiceInstall [MTHCA.DDInstall.ntia64.Services] -AddService = mthca,%SPSVCINST_ASSOCSERVICE%,MTHCA.ServiceInstall +AddService = mthca,%SPSVCINST_ASSOCSERVICE%,MTHCA.ServiceInstall,MTHCA.EventLog AddService = ibal,%SPSVCINST_NULL%,Ibal.ServiceInstall [MTHCA.CopyFiles] @@ -132,7 +133,7 @@ mthca.sys [MTHCA.UMCopyFiles] ; 2 lines excluded temporary ;mthcau.dll,,,2 -;mthcaud.dll,,,2 +mthcaud.dll,,,2 [MTHCA.WOW64CopyFiles] ; 2 lines excluded temporary @@ -160,8 +161,19 @@ ErrorControl = %SERVICE_ERROR_NORMAL% ServiceBinary = %12%\ibal.sys AddReg = Ibal.ParamsReg + +[MTHCA.EventLog] +AddReg = MTHCA.AddEventLogReg + +[MTHCA.AddEventLogReg] +HKR, , EventMessageFile, 0x00020000, "%%SystemRoot%%\System32\IoLogMsg.dll;%%SystemRoot%%\System32\drivers\mthca.sys" +HKR, , TypesSupported, 0x00010001, 7 + [MTHCA.ParamsReg] -HKR,"Parameters","DebugFlags",%REG_DWORD%,0x80000000 +HKR,"Parameters","DebugLevel",%REG_DWORD%,0x00000003 +HKR,"Parameters","DebugFlags",%REG_DWORD%,0x0000ffff +HKLM,"System\CurrentControlSet\Control\WMI\GlobalLogger\8bf1f640-63fe-4743-b9ef-fa38c695bfde","Flags",%REG_DWORD%,0xffff +HKLM,"System\CurrentControlSet\Control\WMI\GlobalLogger\8bf1f640-63fe-4743-b9ef-fa38c695bfde","Level",%REG_DWORD%,0x3 [Ibal.ParamsReg] HKR,"Parameters","DebugFlags",%REG_DWORD_NO_CLOBBER%,0x80000000 @@ -191,4 +203,3 @@ REG_DWORD_NO_CLOBBER = 0x00010003 REG_MULTI_SZ_APPEND = 0x00010008 DIRID_SYSTEM = 11 DIRID_DRIVERS = 12 -DIRID_SYSTEM_X86 = 16425 diff --git a/branches/MTHCA/hw/mthca/kernel/mthca_allocator.c b/branches/MTHCA/hw/mthca/kernel/mthca_allocator.c index d03ed81d..797a6310 100644 --- a/branches/MTHCA/hw/mthca/kernel/mthca_allocator.c +++ b/branches/MTHCA/hw/mthca/kernel/mthca_allocator.c @@ -33,13 +33,20 @@ */ #include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_allocator.tmh" +#endif /* Trivial bitmap-based allocator */ u32 mthca_alloc(struct mthca_alloc *alloc) { u32 obj; + SPIN_LOCK_PREP(lh); - spin_lock(&alloc->lock); + spin_lock(&alloc->lock, &lh); obj = find_next_zero_bit(alloc->table, alloc->max, alloc->last); if (obj >= alloc->max) { alloc->top = (alloc->top + alloc->max) & alloc->mask; @@ -52,19 +59,21 @@ u32 mthca_alloc(struct mthca_alloc *alloc) } else obj = -1; - spin_unlock(&alloc->lock); + spin_unlock(&lh); return obj; } void mthca_free(struct mthca_alloc *alloc, u32 obj) { + SPIN_LOCK_PREP(lh); + obj &= alloc->max - 1; - spin_lock(&alloc->lock); + spin_lock(&alloc->lock, &lh); clear_bit(obj, alloc->table); alloc->last = MIN(alloc->last, obj); alloc->top = (alloc->top + alloc->max) & alloc->mask; - spin_unlock(&alloc->lock); + spin_unlock(&lh); } int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask, @@ -143,8 +152,8 @@ void mthca_array_clear(struct mthca_array *array, int index) } if (array->page_list[p].used < 0) - pr_debug("Array %p index %d page %d with ref count %d < 0\n", - array, index, p, array->page_list[p].used); + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_LOW,("Array %p index %d page %d with ref count %d < 0\n", + array, index, p, array->page_list[p].used)); } int mthca_array_init(struct mthca_array *array, int nent) @@ -191,18 +200,16 @@ int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct, dma_addr_t t; int i; + HCA_ENTER(HCA_DBG_MEMORY); if (size <= max_direct) { *is_direct = 1; npages = 1; shift = get_order(size) + PAGE_SHIFT; - buf->direct.buf = dma_alloc_coherent(dev, - size, &t, GFP_KERNEL); - if (!buf->direct.buf) + alloc_dma_zmem_map(dev, size, PCI_DMA_BIDIRECTIONAL, &buf->direct); + if (!buf->direct.page) return -ENOMEM; - buf->direct.mapping = t; /* save dma_addr_t */ - - RtlZeroMemory(buf->direct.buf, size); + t = buf->direct.dma_address; /* shorten the code below */ while (t & ((1 << shift) - 1)) { --shift; @@ -230,19 +237,13 @@ int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct, goto err_out; for (i = 0; i < npages; ++i) - buf->page_list[i].buf = NULL; + buf->page_list[i].page = NULL; for (i = 0; i < npages; ++i) { - buf->page_list[i].buf = - dma_alloc_coherent(dev, PAGE_SIZE, - &t, GFP_KERNEL); - if (!buf->page_list[i].buf) + alloc_dma_zmem_map(dev, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL, &buf->page_list[i]); + if (!buf->page_list[i].page) goto err_free; - - dma_list[i] = t; - buf->page_list[i].mapping = t; - - RtlZeroMemory(buf->page_list[i].buf, PAGE_SIZE); + dma_list[i] = buf->page_list[i].dma_address; } } @@ -256,7 +257,8 @@ int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct, goto err_free; kfree(dma_list); - + + HCA_EXIT(HCA_DBG_MEMORY); return 0; err_free: @@ -277,14 +279,11 @@ void mthca_buf_free(struct mthca_dev *dev, int size, union mthca_buf *buf, mthca_free_mr(dev, mr); if (is_direct) { - dma_free_coherent(dev, size, buf->direct.buf, - buf->direct.mapping); + free_dma_mem_map(dev, &buf->direct, PCI_DMA_BIDIRECTIONAL); } else { for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i) { - dma_free_coherent(dev, PAGE_SIZE, - buf->page_list[i].buf, - buf->page_list[i].mapping); + free_dma_mem_map(dev, &buf->page_list[i], PCI_DMA_BIDIRECTIONAL); } kfree(buf->page_list); } diff --git a/branches/MTHCA/hw/mthca/kernel/mthca_av.c b/branches/MTHCA/hw/mthca/kernel/mthca_av.c index 48cc80b6..cffda49d 100644 --- a/branches/MTHCA/hw/mthca/kernel/mthca_av.c +++ b/branches/MTHCA/hw/mthca/kernel/mthca_av.c @@ -35,8 +35,19 @@ #include #include - #include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_av.tmh" +#endif + +#ifdef ALLOC_PRAGMA +#pragma alloc_text (PAGE, mthca_init_av_table) +#pragma alloc_text (PAGE, mthca_cleanup_av_table) +#endif + struct mthca_av { __be32 port_pd; @@ -99,34 +110,34 @@ on_hca_fail: RtlZeroMemory(av, MTHCA_AV_SIZE); - av->port_pd = cpu_to_be32(pd->pd_num | (ah_attr->port_num << 24)); + av->port_pd = cl_hton32(pd->pd_num | (ah_attr->port_num << 24)); av->g_slid = ah_attr->src_path_bits; - av->dlid = cpu_to_be16(ah_attr->dlid); + av->dlid = cl_hton16(ah_attr->dlid); av->msg_sr = (3 << 4) | /* 2K message */ ah_attr->static_rate; - av->sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); + av->sl_tclass_flowlabel = cl_hton32(ah_attr->sl << 28); if (ah_attr->ah_flags & IB_AH_GRH) { av->g_slid |= 0x80; av->gid_index = (ah_attr->port_num - 1) * dev->limits.gid_table_len + ah_attr->grh.sgid_index; av->hop_limit = ah_attr->grh.hop_limit; av->sl_tclass_flowlabel |= - cpu_to_be32((ah_attr->grh.traffic_class << 20) | + cl_hton32((ah_attr->grh.traffic_class << 20) | ah_attr->grh.flow_label); memcpy(av->dgid, ah_attr->grh.dgid.raw, 16); } else { /* Arbel workaround -- low byte of GID must be 2 */ - av->dgid[3] = cpu_to_be32(2); + av->dgid[3] = cl_hton32(2); } if (0) { int j; - mthca_dbg(dev, "Created UDAV at %p/%08lx:\n", - av, (unsigned long) ah->avdma); + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Created UDAV at %p/%08lx:\n", + av, (unsigned long) ah->avdma)); for (j = 0; j < 8; ++j) - printk(KERN_DEBUG " [%2x] %08x\n", - j * 4, be32_to_cpu(((__be32 *) av)[j])); + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,(" [%2x] %08x\n", + j * 4, cl_ntoh32(((__be32 *) av)[j]))); } if (ah->type == MTHCA_AH_ON_HCA) { @@ -158,35 +169,38 @@ int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah) return 0; } +int mthca_ah_grh_present(struct mthca_ah *ah) +{ + return !!(ah->av->g_slid & 0x80); +} + int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah, struct ib_ud_header *header) { if (ah->type == MTHCA_AH_ON_HCA) return -EINVAL; - header->lrh.service_level = (u8)be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28; - header->lrh.destination_lid = (u8)ah->av->dlid; - header->lrh.source_lid = cpu_to_be16(ah->av->g_slid & 0x7f); - if (ah->av->g_slid & 0x80) { - header->grh_present = 1; + header->lrh.service_level = (u8)cl_ntoh32(ah->av->sl_tclass_flowlabel) >> 28; + header->lrh.destination_lid = ah->av->dlid; + header->lrh.source_lid = cl_hton16(ah->av->g_slid & 0x7f); + header->grh_present = mthca_ah_grh_present(ah); + if (header->grh_present) { header->grh.traffic_class = - (u8)((be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20) & 0xff); + (u8)((cl_ntoh32(ah->av->sl_tclass_flowlabel) >> 20) & 0xff); header->grh.flow_label = - (u8)(ah->av->sl_tclass_flowlabel & cpu_to_be32(0xfffff)); + (u8)(ah->av->sl_tclass_flowlabel & cl_hton32(0xfffff)); ib_get_cached_gid(&dev->ib_dev, - (u8)be32_to_cpu(ah->av->port_pd) >> 24, - ah->av->gid_index, + (u8) (cl_ntoh32(ah->av->port_pd) >> 24), + ah->av->gid_index % dev->limits.gid_table_len, &header->grh.source_gid); memcpy(header->grh.destination_gid.raw, ah->av->dgid, 16); - } else { - header->grh_present = 0; } return 0; } -int __devinit mthca_init_av_table(struct mthca_dev *dev) +int mthca_init_av_table(struct mthca_dev *dev) { int err; @@ -228,7 +242,7 @@ int __devinit mthca_init_av_table(struct mthca_dev *dev) return -ENOMEM; } -void __devexit mthca_cleanup_av_table(struct mthca_dev *dev) +void mthca_cleanup_av_table(struct mthca_dev *dev) { if (mthca_is_memfree(dev)) return; @@ -238,3 +252,44 @@ void __devexit mthca_cleanup_av_table(struct mthca_dev *dev) pci_pool_destroy(dev->av_table.pool); mthca_alloc_cleanup(&dev->av_table.alloc); } + +//NB: temporary, for support of query_qp +void mthca_get_av_params( struct mthca_ah *ah_p, u8 *port_num, __be16 *dlid, u8 *sr, u8 *path_bits ) +{ + struct mthca_av *av_p = ah_p->av; + *port_num = (u8) (cl_ntoh32(av_p->port_pd) >> 24); + *dlid = av_p->dlid; + *sr = av_p->msg_sr & 0x0f; + *path_bits = av_p->g_slid & 0x7f; +} + +//NB: temporary, for support of modify_qp +void mthca_set_av_params( struct mthca_dev *dev, struct mthca_ah *ah_p, struct ib_ah_attr *ah_attr ) +{ + struct mthca_av *av = ah_p->av; + struct ib_ah *ib_ah_p = (struct ib_ah *)ah_p; + struct mthca_pd *pd = (struct mthca_pd *)ib_ah_p->pd; + + // taken from mthca_create_av + av->port_pd = cl_hton32(pd->pd_num | (ah_attr->port_num << 24)); + av->g_slid = ah_attr->src_path_bits; + av->dlid = cl_hton16(ah_attr->dlid); + av->msg_sr = (3 << 4) | /* 2K message */ + ah_attr->static_rate; + av->sl_tclass_flowlabel = cl_hton32(ah_attr->sl << 28); + if (ah_attr->ah_flags & IB_AH_GRH) { + av->g_slid |= 0x80; + av->gid_index = (ah_attr->port_num - 1) * dev->limits.gid_table_len + + ah_attr->grh.sgid_index; + av->hop_limit = ah_attr->grh.hop_limit; + av->sl_tclass_flowlabel |= + cl_hton32((ah_attr->grh.traffic_class << 20) | + ah_attr->grh.flow_label); + memcpy(av->dgid, ah_attr->grh.dgid.raw, 16); + } else { + /* Arbel workaround -- low byte of GID must be 2 */ + av->dgid[3] = cl_hton32(2); + } +} + + diff --git a/branches/MTHCA/hw/mthca/kernel/mthca_catas.c b/branches/MTHCA/hw/mthca/kernel/mthca_catas.c new file mode 100644 index 00000000..1a982c1a --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/mthca_catas.c @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_catas.tmh" +#endif + +enum { + MTHCA_CATAS_POLL_INTERVAL = 5 * HZ, + + MTHCA_CATAS_TYPE_INTERNAL = 0, + MTHCA_CATAS_TYPE_UPLINK = 3, + MTHCA_CATAS_TYPE_DDR = 4, + MTHCA_CATAS_TYPE_PARITY = 5, +}; + +static spinlock_t catas_lock; + +static void handle_catas(struct mthca_dev *dev) +{ + struct ib_event event; + const char *type; + int i; + + event.device = &dev->ib_dev; + event.event = IB_EVENT_DEVICE_FATAL; + event.element.port_num = 0; + + ib_dispatch_event(&event); + + switch (_byteswap_ulong(readl(dev->catas_err.map)) >> 24) { + case MTHCA_CATAS_TYPE_INTERNAL: + type = "internal error"; + break; + case MTHCA_CATAS_TYPE_UPLINK: + type = "uplink bus error"; + break; + case MTHCA_CATAS_TYPE_DDR: + type = "DDR data error"; + break; + case MTHCA_CATAS_TYPE_PARITY: + type = "internal parity error"; + break; + default: + type = "unknown error"; + break; + } + + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Catastrophic error detected: %s\n", type)); + for (i = 0; i < (int)dev->catas_err.size; ++i) + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_LOW,(" buf[%02x]: %08x\n", + i, _byteswap_ulong(readl(dev->catas_err.map + i)))); +} + +static void poll_catas(struct mthca_dev *dev) +{ + unsigned long flags; + int i; + SPIN_LOCK_PREP(lh); + + for (i = 0; i < (int)dev->catas_err.size; ++i) + if (readl(dev->catas_err.map + i)) { + handle_catas(dev); + return; + } + + spin_lock_dpc(&catas_lock, &lh); + if (!dev->catas_err.stop) { + KeSetTimerEx( &dev->catas_err.timer, dev->catas_err.interval, + 0, &dev->catas_err.timer_dpc ); + } + spin_unlock_dpc(&lh); + + return; +} + +static void timer_dpc( + IN struct _KDPC *Dpc, + IN PVOID DeferredContext, + IN PVOID SystemArgument1, + IN PVOID SystemArgument2 + ) +{ + struct mthca_dev *dev = (struct mthca_dev *)DeferredContext; + poll_catas( dev ); +} + + +void mthca_start_catas_poll(struct mthca_dev *dev) +{ + u64 addr; + + dev->catas_err.stop = 0; + dev->catas_err.map = NULL; + + addr = pci_resource_start(dev, HCA_BAR_TYPE_HCR) + + ((pci_resource_len(dev, HCA_BAR_TYPE_HCR) - 1) & + dev->catas_err.addr); + + dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4, &dev->catas_err.map_size ); + if (!dev->catas_err.map) { + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_LOW, ("couldn't map catastrophic error region " + "at 0x%I64x/0x%x\n", addr, dev->catas_err.size * 4)); + return; + } + + spin_lock_init( &catas_lock ); + KeInitializeDpc( &dev->catas_err.timer_dpc, timer_dpc, dev ); + KeInitializeTimer( &dev->catas_err.timer ); + dev->catas_err.interval.QuadPart = (-10)* (__int64)MTHCA_CATAS_POLL_INTERVAL; + KeSetTimerEx( &dev->catas_err.timer, dev->catas_err.interval, + 0, &dev->catas_err.timer_dpc ); +} + +void mthca_stop_catas_poll(struct mthca_dev *dev) +{ + SPIN_LOCK_PREP(lh); + + spin_lock_irq(&catas_lock, &lh); + dev->catas_err.stop = 1; + spin_unlock_irq(&lh); + + KeCancelTimer(&dev->catas_err.timer); + + if (dev->catas_err.map) { + iounmap(dev->catas_err.map, dev->catas_err.map_size); + } +} diff --git a/branches/MTHCA/hw/mthca/kernel/mthca_cmd.c b/branches/MTHCA/hw/mthca/kernel/mthca_cmd.c index ab639d80..f0312e88 100644 --- a/branches/MTHCA/hw/mthca/kernel/mthca_cmd.c +++ b/branches/MTHCA/hw/mthca/kernel/mthca_cmd.c @@ -36,6 +36,12 @@ #include #include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_cmd.tmh" +#endif #include "mthca_config_reg.h" #include "mthca_cmd.h" #include "mthca_memfree.h" @@ -145,7 +151,6 @@ enum { * commands. So we can't use strict timeouts described in PRM -- we * just arbitrarily select 60 seconds for now. */ -#define HZ 1000000 /* 1 sec in usecs */ #define CMD_POLL_N_TRIES 60 enum { @@ -178,7 +183,7 @@ struct mthca_cmd_context { static inline int go_bit(struct mthca_dev *dev) { return readl(dev->hcr + HCR_STATUS_OFFSET) & - swab32(1 << HCR_GO_BIT); + _byteswap_ulong(1 << HCR_GO_BIT); } /* @@ -253,17 +258,17 @@ static int mthca_cmd_post(struct mthca_dev *dev, * (and some architectures such as ia64 implement memcpy_toio * in terms of writeb). */ - __raw_writel((__force u32) cpu_to_be32(in_param >> 32), (u8 *)dev->hcr + 0 * 4); - __raw_writel((__force u32) cpu_to_be32(in_param & 0xfffffffful), (u8 *) dev->hcr + 1 * 4); - __raw_writel((__force u32) cpu_to_be32(in_modifier), (u8 *)dev->hcr + 2 * 4); - __raw_writel((__force u32) cpu_to_be32(out_param >> 32), (u8 *)dev->hcr + 3 * 4); - __raw_writel((__force u32) cpu_to_be32(out_param & 0xfffffffful), (u8 *)dev->hcr + 4 * 4); - __raw_writel((__force u32) cpu_to_be32(token << 16), (u8 *)dev->hcr + 5 * 4); + __raw_writel((u32) cl_hton32((u32)(in_param >> 32)), (u8 *)dev->hcr + 0 * 4); + __raw_writel((u32) cl_hton32((u32)(in_param & 0xfffffffful)), (u8 *) dev->hcr + 1 * 4); + __raw_writel((u32) cl_hton32(in_modifier), (u8 *)dev->hcr + 2 * 4); + __raw_writel((u32) cl_hton32((u32)(out_param >> 32)), (u8 *)dev->hcr + 3 * 4); + __raw_writel((u32) cl_hton32((u32)(out_param & 0xfffffffful)), (u8 *)dev->hcr + 4 * 4); + __raw_writel((u32) cl_hton32(token << 16), (u8 *)dev->hcr + 5 * 4); /* __raw_writel may not order writes. */ wmb(); - __raw_writel((__force u32) cpu_to_be32((1 << HCR_GO_BIT) | + __raw_writel((u32) cl_hton32((1 << HCR_GO_BIT) | (event ? (1 << HCA_E_BIT) : 0) | (op_modifier << HCR_OPMOD_SHIFT) | op), (u8 *)dev->hcr + 6 * 4); @@ -303,12 +308,15 @@ static int mthca_cmd_poll(struct mthca_dev *dev, if (out_is_imm) *out_param = - (u64) be32_to_cpu((__force __be32) + (u64) cl_ntoh32((__be32) __raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET)) << 32 | - (u64) be32_to_cpu((__force __be32) + (u64) cl_ntoh32((__be32) __raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET + 4)); - *status = (u8)(be32_to_cpu((__force __be32) __raw_readl(dev->hcr + HCR_STATUS_OFFSET)) >> 24); + *status = (u8)(cl_ntoh32((__be32) __raw_readl(dev->hcr + HCR_STATUS_OFFSET)) >> 24); + if (*status) + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("mthca_cmd_wait: Command %02x completed with status %02x\n", + op, *status)); out: sem_up(&dev->cmd.poll_sem); @@ -364,15 +372,16 @@ static int mthca_cmd_wait(struct mthca_dev *dev, { int err = 0; struct mthca_cmd_context *context; + SPIN_LOCK_PREP(lh); if (sem_down_interruptible(&dev->cmd.event_sem)) return -EINTR; - spin_lock(&dev->cmd.context_lock); + spin_lock( &dev->cmd.context_lock, &lh ); BUG_ON(dev->cmd.free_head < 0); context = &dev->cmd.context[dev->cmd.free_head]; dev->cmd.free_head = context->next; - spin_unlock(&dev->cmd.context_lock); + spin_unlock( &lh ); #ifdef LINUX_TO_BE_CHANGED init_completion(&context->done); @@ -414,17 +423,17 @@ static int mthca_cmd_wait(struct mthca_dev *dev, *status = context->status; if (*status) - mthca_dbg(dev, "Command %02x completed with status %02x\n", - op, *status); + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("mthca_cmd_wait: Command %02x completed with status %02x\n", + op, *status)); if (out_is_imm) *out_param = context->out_param; out: - spin_lock(&dev->cmd.context_lock); + spin_lock(&dev->cmd.context_lock, &lh); context->next = dev->cmd.free_head; dev->cmd.free_head = (int)(context - dev->cmd.context); - spin_unlock(&dev->cmd.context_lock); + spin_unlock(&lh); sem_up( &dev->cmd.event_sem ); @@ -497,7 +506,7 @@ int mthca_cmd_init(struct mthca_dev *dev) dev->hcr = ioremap(pci_resource_start(dev, HCA_BAR_TYPE_HCR) + MTHCA_HCR_BASE, MTHCA_HCR_SIZE, &dev->hcr_size); if (!dev->hcr) { - mthca_err(dev, "Couldn't map command register."); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Couldn't map command register.")); return -ENOMEM; } @@ -615,10 +624,10 @@ int mthca_SYS_EN(struct mthca_dev *dev, u8 *status) ret = mthca_cmd_imm(dev, 0, &out, 0, 0, CMD_SYS_EN, HZ, status); if (*status == MTHCA_CMD_STAT_DDR_MEM_ERR) - mthca_warn(dev, "SYS_EN DDR error: syn=%x, sock=%d, " + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_LOW,("SYS_EN DDR error: syn=%x, sock=%d, " "sladdr=%d, SPD source=%s\n", (int) (out >> 6) & 0xf, (int) (out >> 4) & 3, - (int) (out >> 1) & 7, (int) out & 1 ? "NVMEM" : "DIMM"); + (int) (out >> 1) & 7, (int) out & 1 ? "NVMEM" : "DIMM")); return ret; } @@ -639,6 +648,7 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, unsigned long i; int err = 0; int ts = 0, tc = 0; + CPU_2_BE64_PREP; mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); if (IS_ERR(mailbox)) @@ -657,19 +667,18 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, i = (u32)mthca_icm_addr(&iter) | mthca_icm_size(&iter); lg = ffs(i) - 1; if (lg < 12) { - mthca_warn(dev, "Got FW area not aligned to 4K (%llx/%lx).\n", + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Got FW area not aligned to 4K (%I64x/%lx).\n", (unsigned long long) mthca_icm_addr(&iter), - mthca_icm_size(&iter)); + mthca_icm_size(&iter))); err = -EINVAL; goto out; } - for (i = 0; i < mthca_icm_size(&iter) / (1 << lg); ++i) { + for (i = 0; i < mthca_icm_size(&iter) >> lg; ++i) { if (virt != -1) { - pages[nent * 2] = cpu_to_be64(virt); + pages[nent * 2] = cl_hton64(virt); virt += 1 << lg; } - - pages[nent * 2 + 1] = cpu_to_be64((mthca_icm_addr(&iter) + + pages[nent * 2 + 1] = CPU_2_BE64((mthca_icm_addr(&iter) + (i << lg)) | (lg - 12)); ts += 1 << (lg - 10); ++tc; @@ -690,14 +699,14 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, switch (op) { case CMD_MAP_FA: - mthca_dbg(dev, "Mapped %d chunks/%d KB for FW.\n", tc, ts); + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("Mapped %d chunks/%d KB for FW.\n", tc, ts)); break; case CMD_MAP_ICM_AUX: - mthca_dbg(dev, "Mapped %d chunks/%d KB for ICM aux.\n", tc, ts); + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("Mapped %d chunks/%d KB for ICM aux.\n", tc, ts)); break; case CMD_MAP_ICM: - mthca_dbg(dev, "Mapped %d chunks/%d KB at %llx for ICM.\n", - tc, ts, (unsigned long long) virt - (ts << 10)); + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Mapped %d chunks/%d KB at %I64x for ICM.\n", + tc, ts, (unsigned long long) virt - (ts << 10))); break; } @@ -764,37 +773,42 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status) MTHCA_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET); dev->cmd.max_cmds = 1 << lg; + MTHCA_GET(dev->catas_err.addr, outbox, QUERY_FW_ERR_START_OFFSET); + MTHCA_GET(dev->catas_err.size, outbox, QUERY_FW_ERR_SIZE_OFFSET); + + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("FW version %012I64x, max commands %d\n", + (unsigned long long) dev->fw_ver, dev->cmd.max_cmds)); + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Catastrophic error buffer at 0x%I64x, size 0x%x\n", + (unsigned long long) dev->catas_err.addr, dev->catas_err.size)); - mthca_dbg(dev, "FW version %012llx, max commands %d\n", - (unsigned long long) dev->fw_ver, dev->cmd.max_cmds); if (mthca_is_memfree(dev)) { MTHCA_GET(dev->fw.arbel.fw_pages, outbox, QUERY_FW_SIZE_OFFSET); MTHCA_GET(dev->fw.arbel.clr_int_base, outbox, QUERY_FW_CLR_INT_BASE_OFFSET); MTHCA_GET(dev->fw.arbel.eq_arm_base, outbox, QUERY_FW_EQ_ARM_BASE_OFFSET); MTHCA_GET(dev->fw.arbel.eq_set_ci_base, outbox, QUERY_FW_EQ_SET_CI_BASE_OFFSET); - mthca_dbg(dev, "FW size %d KB\n", dev->fw.arbel.fw_pages << 2); + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("FW size %d KB\n", dev->fw.arbel.fw_pages << 2)); /* * Arbel page size is always 4 KB; round up number of * system pages needed. */ dev->fw.arbel.fw_pages = - (dev->fw.arbel.fw_pages + (1 << (PAGE_SHIFT - 12)) - 1) >> - (PAGE_SHIFT - 12); + ALIGN(dev->fw.arbel.fw_pages, PAGE_SIZE >> 12) >> + (PAGE_SHIFT - 12); - mthca_dbg(dev, "Clear int @ %llx, EQ arm @ %llx, EQ set CI @ %llx\n", + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Clear int @ %I64x, EQ arm @ %I64x, EQ set CI @ %I64x\n", (unsigned long long) dev->fw.arbel.clr_int_base, (unsigned long long) dev->fw.arbel.eq_arm_base, - (unsigned long long) dev->fw.arbel.eq_set_ci_base); + (unsigned long long) dev->fw.arbel.eq_set_ci_base)); } else { MTHCA_GET(dev->fw.tavor.fw_start, outbox, QUERY_FW_START_OFFSET); MTHCA_GET(dev->fw.tavor.fw_end, outbox, QUERY_FW_END_OFFSET); - mthca_dbg(dev, "FW size %d KB (start %llx, end %llx)\n", + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("FW size %d KB (start %I64x, end %I64x)\n", (int) ((dev->fw.tavor.fw_end - dev->fw.tavor.fw_start) >> 10), (unsigned long long) dev->fw.tavor.fw_start, - (unsigned long long) dev->fw.tavor.fw_end); + (unsigned long long) dev->fw.tavor.fw_end)); } out: @@ -837,18 +851,18 @@ int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status) if (!!(info & ENABLE_LAM_INFO_HIDDEN_FLAG) != !!(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) { - mthca_info(dev, "FW reports that HCA-attached memory " + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_LOW ,("FW reports that HCA-attached memory " "is %s hidden; does not match PCI config\n", - (info & ENABLE_LAM_INFO_HIDDEN_FLAG) ? - "" : "not"); + (info & ENABLE_LAM_INFO_HIDDEN_FLAG)? + "" : "not")); } if (info & ENABLE_LAM_INFO_HIDDEN_FLAG) - mthca_dbg(dev, "HCA-attached memory is hidden.\n"); + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("HCA-attached memory is hidden.\n")); - mthca_dbg(dev, "HCA memory size %d KB (start %llx, end %llx)\n", + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("HCA memory size %d KB (start %I64x, end %I64x)\n", (int) ((dev->ddr_end - dev->ddr_start) >> 10), (unsigned long long) dev->ddr_start, - (unsigned long long) dev->ddr_end); + (unsigned long long) dev->ddr_end)); out: mthca_free_mailbox(dev, mailbox); @@ -892,18 +906,19 @@ int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status) if (!!(info & QUERY_DDR_INFO_HIDDEN_FLAG) != !!(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) { - mthca_info(dev, "FW reports that HCA-attached memory " + + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_LOW ,("FW reports that HCA-attached memory " "is %s hidden; does not match PCI config\n", (info & QUERY_DDR_INFO_HIDDEN_FLAG) ? - "" : "not"); + "" : "not")); } if (info & QUERY_DDR_INFO_HIDDEN_FLAG) - mthca_dbg(dev, "HCA-attached memory is hidden.\n"); + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("HCA-attached memory is hidden.\n")); - mthca_dbg(dev, "HCA memory size %d KB (start %llx, end %llx)\n", + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("HCA memory size %d KB (start %I64x, end %I64x)\n", (int) ((dev->ddr_end - dev->ddr_start) >> 10), (unsigned long long) dev->ddr_start, - (unsigned long long) dev->ddr_end); + (unsigned long long) dev->ddr_end)); out: mthca_free_mailbox(dev, mailbox); @@ -990,10 +1005,6 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, if (err) goto out; - MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET); - dev_lim->max_srq_sz = 1 << field; - MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET); - dev_lim->max_qp_sz = 1 << field; MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_QP_OFFSET); dev_lim->reserved_qps = 1 << (field & 0xf); MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_OFFSET); @@ -1089,28 +1100,36 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, MTHCA_GET(size, outbox, QUERY_DEV_LIM_UAR_ENTRY_SZ_OFFSET); dev_lim->uar_scratch_entry_sz = size; - mthca_dbg(dev, "Max QPs: %d, reserved QPs: %d, entry size: %d\n", - dev_lim->max_qps, dev_lim->reserved_qps, dev_lim->qpc_entry_sz); - mthca_dbg(dev, "Max SRQs: %d, reserved SRQs: %d, entry size: %d\n", - dev_lim->max_srqs, dev_lim->reserved_srqs, dev_lim->srq_entry_sz); - mthca_dbg(dev, "Max CQs: %d, reserved CQs: %d, entry size: %d\n", - dev_lim->max_cqs, dev_lim->reserved_cqs, dev_lim->cqc_entry_sz); - mthca_dbg(dev, "Max EQs: %d, reserved EQs: %d, entry size: %d\n", - dev_lim->max_eqs, dev_lim->reserved_eqs, dev_lim->eqc_entry_sz); - mthca_dbg(dev, "reserved MPTs: %d, reserved MTTs: %d\n", - dev_lim->reserved_mrws, dev_lim->reserved_mtts); - mthca_dbg(dev, "Max PDs: %d, reserved PDs: %d, reserved UARs: %d\n", - dev_lim->max_pds, dev_lim->reserved_pds, dev_lim->reserved_uars); - mthca_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n", - dev_lim->max_pds, dev_lim->reserved_mgms); - - mthca_dbg(dev, "Flags: %08x\n", dev_lim->flags); + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Max QPs: %d, reserved QPs: %d, entry size: %d\n", + dev_lim->max_qps, dev_lim->reserved_qps, dev_lim->qpc_entry_sz)); + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Max SRQs: %d, reserved SRQs: %d, entry size: %d\n", + dev_lim->max_srqs, dev_lim->reserved_srqs, dev_lim->srq_entry_sz)); + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Max CQs: %d, reserved CQs: %d, entry size: %d\n", + dev_lim->max_cqs, dev_lim->reserved_cqs, dev_lim->cqc_entry_sz)); + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Max EQs: %d, reserved EQs: %d, entry size: %d\n", + dev_lim->max_eqs, dev_lim->reserved_eqs, dev_lim->eqc_entry_sz)); + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("reserved MPTs: %d, reserved MTTs: %d\n", + dev_lim->reserved_mrws, dev_lim->reserved_mtts)); + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Max PDs: %d, reserved PDs: %d, reserved UARs: %d\n", + dev_lim->max_pds, dev_lim->reserved_pds, dev_lim->reserved_uars)); + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Max QP/MCG: %d, reserved MGMs: %d\n", + dev_lim->max_pds, dev_lim->reserved_mgms)); + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n", + dev_lim->max_cq_sz, dev_lim->max_qp_sz, dev_lim->max_srq_sz)); + + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("Flags: %08x\n", dev_lim->flags)); if (mthca_is_memfree(dev)) { + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET); + dev_lim->max_srq_sz = 1 << field; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET); + dev_lim->max_qp_sz = 1 << field; MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSZ_SRQ_OFFSET); dev_lim->hca.arbel.resize_srq = field & 1; MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_RQ_OFFSET); dev_lim->max_sg = min(field, dev_lim->max_sg); + MTHCA_GET(size, outbox, QUERY_DEV_LIM_MAX_DESC_SZ_RQ_OFFSET); + dev_lim->max_desc_sz = min((int)size, dev_lim->max_desc_sz); MTHCA_GET(size, outbox, QUERY_DEV_LIM_MPT_ENTRY_SZ_OFFSET); dev_lim->mpt_entry_sz = size; MTHCA_GET(field, outbox, QUERY_DEV_LIM_PBL_SZ_OFFSET); @@ -1124,18 +1143,24 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, MTHCA_GET(dev_lim->hca.arbel.max_icm_sz, outbox, QUERY_DEV_LIM_MAX_ICM_SZ_OFFSET); - if (dev_lim->hca.arbel.bmme_flags & 1) - mthca_dbg(dev, "Base MM extensions: yes " + if (dev_lim->hca.arbel.bmme_flags & 1){ + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Base MM extensions: yes " "(flags %d, max PBL %d, rsvd L_Key %08x)\n", dev_lim->hca.arbel.bmme_flags, dev_lim->hca.arbel.max_pbl_sz, - dev_lim->hca.arbel.reserved_lkey); - else - mthca_dbg(dev, "Base MM extensions: no\n"); + dev_lim->hca.arbel.reserved_lkey)); + }else{ + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("Base MM extensions: no\n")); + } - mthca_dbg(dev, "Max ICM size %lld MB\n", - (unsigned long long) dev_lim->hca.arbel.max_icm_sz >> 20); - } else { + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Max ICM size %I64d MB\n", + (unsigned long long) dev_lim->hca.arbel.max_icm_sz >> 20)); + } + else { + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET); + dev_lim->max_srq_sz = (1 << field) - 1; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET); + dev_lim->max_qp_sz = (1 << field) - 1; MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_AV_OFFSET); dev_lim->hca.tavor.max_avs = 1 << (field & 0x3f); dev_lim->mpt_entry_sz = MTHCA_MPT_ENTRY_SIZE; @@ -1159,8 +1184,8 @@ static void get_board_id(u8 *vsd, char *board_id) RtlZeroMemory(board_id, MTHCA_BOARD_ID_LEN); - if (be16_to_cpup(vsd + VSD_OFFSET_SIG1) == VSD_SIGNATURE_TOPSPIN && - be16_to_cpup(vsd + VSD_OFFSET_SIG2) == VSD_SIGNATURE_TOPSPIN) { + if (cl_ntoh16(*(u16*)(vsd + VSD_OFFSET_SIG1)) == VSD_SIGNATURE_TOPSPIN && + cl_ntoh16(*(u16*)(vsd + VSD_OFFSET_SIG2)) == VSD_SIGNATURE_TOPSPIN) { strlcpy(board_id, vsd + VSD_OFFSET_TS_BOARD_ID, MTHCA_BOARD_ID_LEN); } else { /* @@ -1170,7 +1195,7 @@ static void get_board_id(u8 *vsd, char *board_id) */ for (i = 0; i < 4; ++i) ((u32 *) board_id)[i] = - swab32(*(u32 *) (vsd + VSD_OFFSET_MLX_BOARD_ID + i * 4)); + _byteswap_ulong(*(u32 *) (vsd + VSD_OFFSET_MLX_BOARD_ID + i * 4)); } } @@ -1265,14 +1290,14 @@ int mthca_INIT_HCA(struct mthca_dev *dev, RtlZeroMemory(inbox, INIT_HCA_IN_SIZE); #if defined(__LITTLE_ENDIAN) - *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1); + *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cl_hton32(1 << 1); #elif defined(__BIG_ENDIAN) - *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 1); + *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cl_hton32(1 << 1); #else #error Host endianness not defined #endif /* Check port for UD address vector: */ - *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1); + *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cl_hton32(1); /* We leave wqe_quota, responder_exu, etc as 0 (default) */ @@ -1338,21 +1363,19 @@ int mthca_INIT_IB(struct mthca_dev *dev, int err; u32 flags; -#define INIT_IB_IN_SIZE 56 -#define INIT_IB_FLAGS_OFFSET 0x00 -#define INIT_IB_FLAG_SIG (1 << 18) -#define INIT_IB_FLAG_NG (1 << 17) -#define INIT_IB_FLAG_G0 (1 << 16) -#define INIT_IB_FLAG_1X (1 << 8) -#define INIT_IB_FLAG_4X (1 << 9) -#define INIT_IB_FLAG_12X (1 << 11) -#define INIT_IB_VL_SHIFT 4 -#define INIT_IB_MTU_SHIFT 12 -#define INIT_IB_MAX_GID_OFFSET 0x06 -#define INIT_IB_MAX_PKEY_OFFSET 0x0a -#define INIT_IB_GUID0_OFFSET 0x10 -#define INIT_IB_NODE_GUID_OFFSET 0x18 -#define INIT_IB_SI_GUID_OFFSET 0x20 +#define INIT_IB_IN_SIZE 56 +#define INIT_IB_FLAGS_OFFSET 0x00 +#define INIT_IB_FLAG_SIG (1 << 18) +#define INIT_IB_FLAG_NG (1 << 17) +#define INIT_IB_FLAG_G0 (1 << 16) +#define INIT_IB_VL_SHIFT 4 +#define INIT_IB_PORT_WIDTH_SHIFT 8 +#define INIT_IB_MTU_SHIFT 12 +#define INIT_IB_MAX_GID_OFFSET 0x06 +#define INIT_IB_MAX_PKEY_OFFSET 0x0a +#define INIT_IB_GUID0_OFFSET 0x10 +#define INIT_IB_NODE_GUID_OFFSET 0x18 +#define INIT_IB_SI_GUID_OFFSET 0x20 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); if (IS_ERR(mailbox)) @@ -1362,12 +1385,11 @@ int mthca_INIT_IB(struct mthca_dev *dev, RtlZeroMemory(inbox, INIT_IB_IN_SIZE); flags = 0; - flags |= param->enable_1x ? INIT_IB_FLAG_1X : 0; - flags |= param->enable_4x ? INIT_IB_FLAG_4X : 0; flags |= param->set_guid0 ? INIT_IB_FLAG_G0 : 0; flags |= param->set_node_guid ? INIT_IB_FLAG_NG : 0; flags |= param->set_si_guid ? INIT_IB_FLAG_SIG : 0; flags |= param->vl_cap << INIT_IB_VL_SHIFT; + flags |= param->port_width << INIT_IB_PORT_WIDTH_SHIFT; flags |= param->mtu_cap << INIT_IB_MTU_SHIFT; MTHCA_PUT(inbox, flags, INIT_IB_FLAGS_OFFSET); @@ -1446,8 +1468,8 @@ int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status return PTR_ERR(mailbox); inbox = mailbox->buf; - inbox[0] = cpu_to_be64(virt); - inbox[1] = cpu_to_be64(dma_addr); + inbox[0] = cl_hton64(virt); + inbox[1] = cl_hton64(dma_addr); err = mthca_cmd(dev, mailbox->dma, 1, 0, CMD_MAP_ICM, CMD_TIME_CLASS_B, status); @@ -1455,16 +1477,16 @@ int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status mthca_free_mailbox(dev, mailbox); if (!err) - mthca_dbg(dev, "Mapped page at %llx to %llx for ICM.\n", - (unsigned long long) dma_addr, (unsigned long long) virt); + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Mapped page at %I64x to %I64x for ICM.\n", + (unsigned long long) dma_addr, (unsigned long long) virt)); return err; } int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count, u8 *status) { - mthca_dbg(dev, "Unmapping %d pages at %llx from ICM.\n", - page_count, (unsigned long long) virt); + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Unmapping %d pages at %I64x from ICM.\n", + page_count, (unsigned long long) virt)); return mthca_cmd(dev, virt, page_count, 0, CMD_UNMAP_ICM, CMD_TIME_CLASS_B, status); } @@ -1493,6 +1515,7 @@ int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages, * pages needed. */ *aux_pages = (*aux_pages + (1 << (PAGE_SHIFT - 12)) - 1) >> (PAGE_SHIFT - 12); + *aux_pages = ALIGN(*aux_pages, PAGE_SIZE >> 12) >> (PAGE_SHIFT - 12); return 0; } @@ -1527,9 +1550,9 @@ int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status) int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap, int eq_num, u8 *status) { - mthca_dbg(dev, "%s mask %016llx for eqn %d\n", + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("%s mask %016I64x for eqn %d\n", unmap ? "Clearing" : "Setting", - (unsigned long long) event_mask, eq_num); + (unsigned long long) event_mask, eq_num)); return mthca_cmd(dev, event_mask, (unmap << 31) | eq_num, 0, CMD_MAP_EQ, CMD_TIME_CLASS_B, status); } @@ -1639,15 +1662,15 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, } else { if (0) { int i; - mthca_dbg(dev, "Dumping QP context:\n"); - printk(" opt param mask: %08x\n", be32_to_cpup((__be32 *)mailbox->buf)); + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("Dumping QP context:\n")); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_LOW ,(" opt param mask: %08x\n", cl_ntoh32(*(__be32 *)mailbox->buf))); for (i = 0; i < 0x100 / 4; ++i) { if (i % 8 == 0) - printk(" [%02x] ", i * 4); - printk(" %08x", - be32_to_cpu(((__be32 *) mailbox->buf)[i + 2])); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_LOW ,(" [%02x] ", i * 4)); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_LOW ,(" %08x", + cl_ntoh32(((__be32 *) mailbox->buf)[i + 2]))); if ((i + 1) % 8 == 0) - printk("\n"); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_LOW ,("\n")); } } } @@ -1659,15 +1682,15 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, if (0 && mailbox) { int i; - mthca_dbg(dev, "Dumping QP context:\n"); - printk(" %08x\n", be32_to_cpup((__be32 *)mailbox->buf)); + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("Dumping QP context:\n")); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_LOW ,(" %08x\n", cl_ntoh32(*(__be32 *)mailbox->buf))); for (i = 0; i < 0x100 / 4; ++i) { if (i % 8 == 0) - printk("[%02x] ", i * 4); - printk(" %08x", - be32_to_cpu(((__be32 *) mailbox->buf)[i + 2])); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_LOW ,("[%02x] ", i * 4)); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_LOW ,(" %08x", + cl_ntoh32(((__be32 *) mailbox->buf)[i + 2]))); if ((i + 1) % 8 == 0) - printk("\n"); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_LOW ,("\n")); } } @@ -1715,7 +1738,7 @@ int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn, } int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, - int port, struct ib_wc *in_wc, struct ib_grh *in_grh, + int port, struct _ib_wc *in_wc, struct ib_grh *in_grh, void *in_mad, void *response_mad, u8 *status) { struct mthca_mailbox *inmailbox, *outmailbox; @@ -1761,24 +1784,24 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, RtlZeroMemory(inbox + 256, 256); MTHCA_PUT(inbox, in_wc->qp_num, MAD_IFC_MY_QPN_OFFSET); - MTHCA_PUT(inbox, in_wc->src_qp, MAD_IFC_RQPN_OFFSET); + MTHCA_PUT(inbox, in_wc->recv.ud.remote_qp, MAD_IFC_RQPN_OFFSET); - val = in_wc->sl << 4; + val = in_wc->recv.ud.remote_sl << 4; MTHCA_PUT(inbox, val, MAD_IFC_SL_OFFSET); - val = in_wc->dlid_path_bits | - (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0); + val = in_wc->recv.ud.path_bits | + (in_wc->recv.ud.recv_opt & IB_RECV_OPT_GRH_VALID ? 0x80 : 0); MTHCA_PUT(inbox, val, MAD_IFC_GRH_OFFSET); - MTHCA_PUT(inbox, in_wc->slid, MAD_IFC_RLID_OFFSET); - MTHCA_PUT(inbox, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET); + MTHCA_PUT(inbox, in_wc->recv.ud.remote_lid, MAD_IFC_RLID_OFFSET); + MTHCA_PUT(inbox, in_wc->recv.ud.pkey_index, MAD_IFC_PKEY_OFFSET); if (in_grh) memcpy(inbox + MAD_IFC_GRH_OFFSET, in_grh, 40); op_modifier |= 0x10; - in_modifier |= in_wc->slid << 16; + in_modifier |= in_wc->recv.ud.remote_lid << 16; } err = mthca_cmd_box(dev, inmailbox->dma, outmailbox->dma, diff --git a/branches/MTHCA/hw/mthca/kernel/mthca_cmd.h b/branches/MTHCA/hw/mthca/kernel/mthca_cmd.h index f79232a7..8f8d8112 100644 --- a/branches/MTHCA/hw/mthca/kernel/mthca_cmd.h +++ b/branches/MTHCA/hw/mthca/kernel/mthca_cmd.h @@ -220,8 +220,7 @@ struct mthca_init_hca_param { }; struct mthca_init_ib_param { - int enable_1x; - int enable_4x; + int port_width; int vl_cap; int mtu_cap; u16 gid_cap; @@ -312,7 +311,7 @@ int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee, int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn, u8 *status); int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, - int port, struct ib_wc *in_wc, struct ib_grh *in_grh, + int port, struct _ib_wc *in_wc, struct ib_grh *in_grh, void *in_mad, void *response_mad, u8 *status); int mthca_READ_MGM(struct mthca_dev *dev, int index, struct mthca_mailbox *mailbox, u8 *status); diff --git a/branches/MTHCA/hw/mthca/kernel/mthca_cq.c b/branches/MTHCA/hw/mthca/kernel/mthca_cq.c index 42593cb9..e711d1d0 100644 --- a/branches/MTHCA/hw/mthca/kernel/mthca_cq.c +++ b/branches/MTHCA/hw/mthca/kernel/mthca_cq.c @@ -39,15 +39,23 @@ #include #include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_cq.tmh" +#endif #include "mthca_cmd.h" #include "mthca_memfree.h" -enum { - MTHCA_MAX_DIRECT_CQ_SIZE = 4 * PAGE_SIZE -}; + +#ifdef ALLOC_PRAGMA +#pragma alloc_text (PAGE, mthca_init_cq_table) +#pragma alloc_text (PAGE, mthca_cleanup_cq_table) +#endif enum { - MTHCA_CQ_ENTRY_SIZE = 0x20 + MTHCA_MAX_DIRECT_CQ_SIZE = 4 * PAGE_SIZE }; /* @@ -70,7 +78,7 @@ struct mthca_cq_context { __be32 ci_db; /* Arbel only */ __be32 state_db; /* Arbel only */ u32 reserved; -} __attribute__((packed)); +}; #pragma pack(pop) #define MTHCA_CQ_STATUS_OK ( 0 << 28) @@ -127,12 +135,12 @@ struct mthca_err_cqe { __be32 my_qpn; u32 reserved1[3]; u8 syndrome; - u8 reserved2; + u8 vendor_err; __be16 db_cnt; - u32 reserved3; + u32 reserved2; __be32 wqe; u8 opcode; - u8 reserved4[2]; + u8 reserved3[2]; u8 owner; }; @@ -152,9 +160,9 @@ struct mthca_err_cqe { static inline struct mthca_cqe *get_cqe(struct mthca_cq *cq, int entry) { if (cq->is_direct) - return (struct mthca_cqe *)(cq->queue.direct.buf + (entry * MTHCA_CQ_ENTRY_SIZE)); + return (struct mthca_cqe *)((u8*)cq->queue.direct.page + (entry * MTHCA_CQ_ENTRY_SIZE)); else - return (struct mthca_cqe *)(cq->queue.page_list[entry * MTHCA_CQ_ENTRY_SIZE / PAGE_SIZE].buf + return (struct mthca_cqe *)((u8*)cq->queue.page_list[entry * MTHCA_CQ_ENTRY_SIZE / PAGE_SIZE].page + (entry * MTHCA_CQ_ENTRY_SIZE) % PAGE_SIZE); } @@ -179,10 +187,11 @@ static void dump_cqe(struct mthca_dev *dev, void *cqe_ptr) __be32 *cqe = cqe_ptr; (void) cqe; /* avoid warning if mthca_dbg compiled away... */ - mthca_dbg(dev, "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n", - be32_to_cpu(cqe[0]), be32_to_cpu(cqe[1]), be32_to_cpu(cqe[2]), - be32_to_cpu(cqe[3]), be32_to_cpu(cqe[4]), be32_to_cpu(cqe[5]), - be32_to_cpu(cqe[6]), be32_to_cpu(cqe[7])); + HCA_PRINT(TRACE_LEVEL_INFORMATION,HCA_DBG_CQ,("CQE contents \n")); + HCA_PRINT(TRACE_LEVEL_INFORMATION,HCA_DBG_CQ,("\t[%2x] %08x %08x %08x %08x\n",0, + cl_ntoh32(cqe[0]), cl_ntoh32(cqe[1]), cl_ntoh32(cqe[2]), cl_ntoh32(cqe[3]))); + HCA_PRINT(TRACE_LEVEL_INFORMATION,HCA_DBG_CQ,("\t[%2x] %08x %08x %08x %08x \n",16, + cl_ntoh32(cqe[4]), cl_ntoh32(cqe[5]), cl_ntoh32(cqe[6]), cl_ntoh32(cqe[7]))); } /* @@ -195,11 +204,11 @@ static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq, __be32 doorbell[2]; if (mthca_is_memfree(dev)) { - *cq->set_ci_db = cpu_to_be32(cq->cons_index); + *cq->set_ci_db = cl_hton32(cq->cons_index); wmb(); } else { - doorbell[0] = cpu_to_be32(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn); - doorbell[1] = cpu_to_be32(incr - 1); + doorbell[0] = cl_hton32(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn); + doorbell[1] = cl_hton32(incr - 1); mthca_write64(doorbell, dev->kar + MTHCA_CQ_DOORBELL, @@ -214,7 +223,7 @@ void mthca_cq_completion(struct mthca_dev *dev, u32 cqn) cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1)); if (!cq) { - mthca_warn(dev, "Completion event for bogus CQ %08x\n", cqn); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Completion event for bogus CQ %08x\n", cqn)); return; } @@ -228,17 +237,18 @@ void mthca_cq_event(struct mthca_dev *dev, u32 cqn, { struct mthca_cq *cq; struct ib_event event; + SPIN_LOCK_PREP(lh); - spin_lock(&dev->cq_table.lock); + spin_lock(&dev->cq_table.lock, &lh); cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1)); if (cq) atomic_inc(&cq->refcount); - spin_unlock(&dev->cq_table.lock); + spin_unlock(&lh); if (!cq) { - mthca_warn(dev, "Async event for bogus CQ %08x\n", cqn); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Async event for bogus CQ %08x\n", cqn)); return; } @@ -259,17 +269,19 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, struct mthca_cqe *cqe; u32 prod_index; int nfreed = 0; + SPIN_LOCK_PREP(lht); + SPIN_LOCK_PREP(lh); - spin_lock_irq(&dev->cq_table.lock); + spin_lock_irq(&dev->cq_table.lock, &lht); cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1)); if (cq) atomic_inc(&cq->refcount); - spin_unlock_irq(&dev->cq_table.lock); + spin_unlock_irq(&lht); if (!cq) return; - spin_lock_irq(&cq->lock); + spin_lock_irq(&cq->lock, &lh); /* * First we need to find the current producer index, so we @@ -285,26 +297,25 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, break; if (0) - mthca_dbg(dev, "Cleaning QPN %06x from CQN %06x; ci %d, pi %d\n", - qpn, cqn, cq->cons_index, prod_index); + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Cleaning QPN %06x from CQN %06x; ci %d, pi %d\n", + qpn, cqn, cq->cons_index, prod_index)); /* * Now sweep backwards through the CQ, removing CQ entries * that match our QP by copying older entries on top of them. */ - while (prod_index > cq->cons_index) { - cqe = get_cqe(cq, (prod_index - 1) & cq->ibcq.cqe); - if (cqe->my_qpn == cpu_to_be32(qpn)) { + while ((int) --prod_index - (int) cq->cons_index >= 0) { + cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); + if (cqe->my_qpn == cl_hton32(qpn)) { if (srq) - mthca_free_srq_wqe(srq, be32_to_cpu(cqe->wqe)); + mthca_free_srq_wqe(srq, cl_ntoh32(cqe->wqe)); ++nfreed; + } + else + if (nfreed) { + memcpy(get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe), + cqe, MTHCA_CQ_ENTRY_SIZE); } - else if (nfreed) - memcpy(get_cqe(cq, (prod_index - 1 + nfreed) & - cq->ibcq.cqe), - cqe, - MTHCA_CQ_ENTRY_SIZE); - --prod_index; } if (nfreed) { @@ -313,7 +324,7 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, update_cons_index(dev, cq, nfreed); } - spin_unlock_irq(&cq->lock); + spin_unlock_irq(&lh); if (atomic_dec_and_test(&cq->refcount)) wake_up(&cq->wait); } @@ -321,84 +332,88 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, struct mthca_qp *qp, int wqe_index, int is_send, struct mthca_err_cqe *cqe, - struct ib_wc *entry, int *free_cqe) + struct _ib_wc *entry, int *free_cqe) { int err; int dbd; __be32 new_wqe; - if (cqe->syndrome == SYNDROME_LOCAL_QP_OP_ERR) { - mthca_dbg(dev, "local QP operation err " + + if (cqe->syndrome != SYNDROME_WR_FLUSH_ERR) { + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_CQ ,("Completion with errro " "(QPN %06x, WQE @ %08x, CQN %06x, index %d)\n", - be32_to_cpu(cqe->my_qpn), be32_to_cpu(cqe->wqe), - cq->cqn, cq->cons_index); + cl_ntoh32(cqe->my_qpn), cl_ntoh32(cqe->wqe), + cq->cqn, cq->cons_index)); dump_cqe(dev, cqe); } + /* - * For completions in error, only work request ID, status (and - * freed resource count for RD) have to be set. + * For completions in error, only work request ID, status, vendor error + * (and freed resource count for RD) have to be set. */ switch (cqe->syndrome) { case SYNDROME_LOCAL_LENGTH_ERR: - entry->status = IB_WC_LOC_LEN_ERR; + entry->status = IB_WCS_LOCAL_LEN_ERR; break; case SYNDROME_LOCAL_QP_OP_ERR: - entry->status = IB_WC_LOC_QP_OP_ERR; + entry->status = IB_WCS_LOCAL_OP_ERR; break; case SYNDROME_LOCAL_EEC_OP_ERR: - entry->status = IB_WC_LOC_EEC_OP_ERR; + entry->status = IB_WCS_LOCAL_EEC_OP_ERR; break; case SYNDROME_LOCAL_PROT_ERR: - entry->status = IB_WC_LOC_PROT_ERR; + entry->status = IB_WCS_LOCAL_PROTECTION_ERR; break; case SYNDROME_WR_FLUSH_ERR: - entry->status = IB_WC_WR_FLUSH_ERR; + entry->status = IB_WCS_WR_FLUSHED_ERR; break; case SYNDROME_MW_BIND_ERR: - entry->status = IB_WC_MW_BIND_ERR; + entry->status = IB_WCS_MEM_WINDOW_BIND_ERR; break; case SYNDROME_BAD_RESP_ERR: - entry->status = IB_WC_BAD_RESP_ERR; + entry->status = IB_WCS_BAD_RESP_ERR; break; case SYNDROME_LOCAL_ACCESS_ERR: - entry->status = IB_WC_LOC_ACCESS_ERR; + entry->status = IB_WCS_LOCAL_ACCESS_ERR; break; case SYNDROME_REMOTE_INVAL_REQ_ERR: - entry->status = IB_WC_REM_INV_REQ_ERR; + entry->status = IB_WCS_REM_INV_REQ_ERR; break; case SYNDROME_REMOTE_ACCESS_ERR: - entry->status = IB_WC_REM_ACCESS_ERR; + entry->status = IB_WCS_REM_ACCESS_ERR; break; case SYNDROME_REMOTE_OP_ERR: - entry->status = IB_WC_REM_OP_ERR; + entry->status = IB_WCS_REM_OP_ERR; break; case SYNDROME_RETRY_EXC_ERR: - entry->status = IB_WC_RETRY_EXC_ERR; + entry->status = IB_WCS_TIMEOUT_RETRY_ERR; break; case SYNDROME_RNR_RETRY_EXC_ERR: - entry->status = IB_WC_RNR_RETRY_EXC_ERR; + entry->status = IB_WCS_RNR_RETRY_ERR; break; case SYNDROME_LOCAL_RDD_VIOL_ERR: - entry->status = IB_WC_LOC_RDD_VIOL_ERR; + entry->status = IB_WCS_LOCAL_RDD_VIOL_ERR; break; case SYNDROME_REMOTE_INVAL_RD_REQ_ERR: - entry->status = IB_WC_REM_INV_RD_REQ_ERR; + entry->status = IB_WCS_REM_INVALID_REQ_ERR; break; case SYNDROME_REMOTE_ABORTED_ERR: - entry->status = IB_WC_REM_ABORT_ERR; + entry->status = IB_WCS_REM_ABORT_ERR; break; case SYNDROME_INVAL_EECN_ERR: - entry->status = IB_WC_INV_EECN_ERR; + entry->status = IB_WCS_INV_EECN_ERR; break; case SYNDROME_INVAL_EEC_STATE_ERR: - entry->status = IB_WC_INV_EEC_STATE_ERR; + entry->status = IB_WCS_INV_EEC_STATE_ERR; break; default: - entry->status = IB_WC_GENERAL_ERR; + entry->status = IB_WCS_GENERAL_ERR; break; } + entry->vendor_specific = cqe->vendor_err; + /* * Mem-free HCAs always generate one CQE per WQE, even in the * error case, so we don't have to check the doorbell count, etc. @@ -415,10 +430,10 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, * doorbell count, free the CQE. Otherwise just update it for * the next poll operation. */ - if (!(new_wqe & cpu_to_be32(0x3f)) || (!cqe->db_cnt && dbd)) + if (!(new_wqe & cl_hton32(0x3f)) || (!cqe->db_cnt && dbd)) return 0; - cqe->db_cnt = cpu_to_be16(be16_to_cpu(cqe->db_cnt) - dbd); + cqe->db_cnt = cl_hton16(cl_ntoh16(cqe->db_cnt) - dbd); cqe->wqe = new_wqe; cqe->syndrome = SYNDROME_WR_FLUSH_ERR; @@ -431,7 +446,7 @@ static inline int mthca_poll_one(struct mthca_dev *dev, struct mthca_cq *cq, struct mthca_qp **cur_qp, int *freed, - struct ib_wc *entry) + struct _ib_wc *entry) { struct mthca_wq *wq; struct mthca_cqe *cqe; @@ -441,6 +456,7 @@ static inline int mthca_poll_one(struct mthca_dev *dev, int free_cqe = 1; int err = 0; + HCA_ENTER(HCA_DBG_CQ); cqe = next_cqe_sw(cq); if (!cqe) return -EAGAIN; @@ -451,10 +467,10 @@ static inline int mthca_poll_one(struct mthca_dev *dev, */ rmb(); - if (0) { - mthca_dbg(dev, "%x/%d: CQE -> QPN %06x, WQE @ %08x\n", - cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn), - be32_to_cpu(cqe->wqe)); + if(0){ + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_CQ,("%x/%d: CQE -> QPN %06x, WQE @ %08x\n", + cq->cqn, cq->cons_index, cl_ntoh32(cqe->my_qpn), + cl_ntoh32(cqe->wqe))); dump_cqe(dev, cqe); } @@ -462,18 +478,18 @@ static inline int mthca_poll_one(struct mthca_dev *dev, MTHCA_ERROR_CQE_OPCODE_MASK; is_send = is_error ? cqe->opcode & 0x01 : cqe->is_send & 0x80; - if (!*cur_qp || be32_to_cpu(cqe->my_qpn) != (*cur_qp)->qpn) { + if (!*cur_qp || cl_ntoh32(cqe->my_qpn) != (*cur_qp)->qpn) { /* * We do not have to take the QP table lock here, * because CQs will be locked while QPs are removed * from the table. */ *cur_qp = mthca_array_get(&dev->qp_table.qp, - be32_to_cpu(cqe->my_qpn) & + cl_ntoh32(cqe->my_qpn) & (dev->limits.num_qps - 1)); if (!*cur_qp) { - mthca_warn(dev, "CQ entry for unknown QP %06x\n", - be32_to_cpu(cqe->my_qpn) & 0xffffff); + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_CQ, ("CQ entry for unknown QP %06x\n", + cl_ntoh32(cqe->my_qpn) & 0xffffff)); err = -EINVAL; goto out; } @@ -483,20 +499,20 @@ static inline int mthca_poll_one(struct mthca_dev *dev, if (is_send) { wq = &(*cur_qp)->sq; - wqe_index = ((be32_to_cpu(cqe->wqe) - (*cur_qp)->send_wqe_offset) + wqe_index = ((cl_ntoh32(cqe->wqe) - (*cur_qp)->send_wqe_offset) >> wq->wqe_shift); entry->wr_id = (*cur_qp)->wrid[wqe_index + (*cur_qp)->rq.max]; } else if ((*cur_qp)->ibqp.srq) { struct mthca_srq *srq = to_msrq((*cur_qp)->ibqp.srq); - u32 wqe = be32_to_cpu(cqe->wqe); + u32 wqe = cl_ntoh32(cqe->wqe); wq = NULL; wqe_index = wqe >> srq->wqe_shift; entry->wr_id = srq->wrid[wqe_index]; mthca_free_srq_wqe(srq, wqe); } else { wq = &(*cur_qp)->rq; - wqe_index = be32_to_cpu(cqe->wqe) >> wq->wqe_shift; + wqe_index = cl_ntoh32(cqe->wqe) >> wq->wqe_shift; entry->wr_id = (*cur_qp)->wrid[wqe_index]; } @@ -509,79 +525,78 @@ static inline int mthca_poll_one(struct mthca_dev *dev, wq->last_comp = wqe_index; } - if (is_error) { - err = handle_error_cqe(dev, cq, *cur_qp, wqe_index, is_send, - (struct mthca_err_cqe *) cqe, - entry, &free_cqe); - goto out; - } - if (is_send) { - entry->wc_flags = 0; + entry->recv.ud.recv_opt = 0; switch (cqe->opcode) { case MTHCA_OPCODE_RDMA_WRITE: - entry->opcode = IB_WC_RDMA_WRITE; + entry->wc_type = IB_WC_RDMA_WRITE; break; case MTHCA_OPCODE_RDMA_WRITE_IMM: - entry->opcode = IB_WC_RDMA_WRITE; - entry->wc_flags |= IB_WC_WITH_IMM; + entry->wc_type = IB_WC_RDMA_WRITE; + entry->recv.ud.recv_opt |= IB_RECV_OPT_IMMEDIATE; break; case MTHCA_OPCODE_SEND: - entry->opcode = IB_WC_SEND; + entry->wc_type = IB_WC_SEND; break; case MTHCA_OPCODE_SEND_IMM: - entry->opcode = IB_WC_SEND; - entry->wc_flags |= IB_WC_WITH_IMM; + entry->wc_type = IB_WC_SEND; + entry->recv.ud.recv_opt |= IB_RECV_OPT_IMMEDIATE; break; case MTHCA_OPCODE_RDMA_READ: - entry->opcode = IB_WC_RDMA_READ; - entry->byte_len = be32_to_cpu(cqe->byte_cnt); + entry->wc_type = IB_WC_RDMA_READ; + entry->length = cl_ntoh32(cqe->byte_cnt); break; case MTHCA_OPCODE_ATOMIC_CS: - entry->opcode = IB_WC_COMPARE_SWAP; - entry->byte_len = be32_to_cpu(cqe->byte_cnt); + entry->wc_type = IB_WC_COMPARE_SWAP; + entry->length = cl_ntoh32(cqe->byte_cnt); break; case MTHCA_OPCODE_ATOMIC_FA: - entry->opcode = IB_WC_FETCH_ADD; - entry->byte_len = be32_to_cpu(cqe->byte_cnt); + entry->wc_type = IB_WC_FETCH_ADD; + entry->length = cl_ntoh32(cqe->byte_cnt); break; case MTHCA_OPCODE_BIND_MW: - entry->opcode = IB_WC_MW_BIND; + entry->wc_type = IB_WC_MW_BIND; break; default: - entry->opcode = MTHCA_OPCODE_INVALID; + entry->wc_type = MTHCA_OPCODE_INVALID; break; } } else { - entry->byte_len = be32_to_cpu(cqe->byte_cnt); + entry->length = cl_ntoh32(cqe->byte_cnt); switch (cqe->opcode & 0x1f) { case IB_OPCODE_SEND_LAST_WITH_IMMEDIATE: case IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE: - entry->wc_flags = IB_WC_WITH_IMM; - entry->imm_data = cqe->imm_etype_pkey_eec; - entry->opcode = IB_WC_RECV; + entry->recv.ud.recv_opt = IB_RECV_OPT_IMMEDIATE; + entry->recv.ud.immediate_data = cqe->imm_etype_pkey_eec; + entry->wc_type = IB_WC_RECV; break; case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE: case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE: - entry->wc_flags = IB_WC_WITH_IMM; - entry->imm_data = cqe->imm_etype_pkey_eec; - entry->opcode = IB_WC_RECV_RDMA_WRITE; + entry->recv.ud.recv_opt = IB_RECV_OPT_IMMEDIATE; + entry->recv.ud.immediate_data = cqe->imm_etype_pkey_eec; + entry->wc_type = IB_WC_RECV_RDMA_WRITE; break; default: - entry->wc_flags = 0; - entry->opcode = IB_WC_RECV; + entry->recv.ud.recv_opt = 0; + entry->wc_type = IB_WC_RECV; break; } - entry->slid = be16_to_cpu(cqe->rlid); - entry->sl = be16_to_cpu(cqe->sl_g_mlpath) >> 12; - entry->src_qp = be32_to_cpu(cqe->rqpn) & 0xffffff; - entry->dlid_path_bits = be16_to_cpu(cqe->sl_g_mlpath) & 0x7f; - entry->pkey_index = (u16)(be32_to_cpu(cqe->imm_etype_pkey_eec) >> 16); - entry->wc_flags |= be16_to_cpu(cqe->sl_g_mlpath) & 0x80 ? - IB_WC_GRH : 0; + entry->recv.ud.remote_lid = cqe->rlid; + entry->recv.ud.remote_qp = cqe->rqpn & 0xffffff00; + entry->recv.ud.pkey_index = (u16)(cl_ntoh32(cqe->imm_etype_pkey_eec) >> 16); + entry->recv.ud.remote_sl = cl_ntoh16(cqe->sl_g_mlpath) >> 12; + entry->recv.ud.path_bits = cl_ntoh16(cqe->sl_g_mlpath) & 0x7f; + entry->recv.ud.recv_opt |= cl_ntoh16(cqe->sl_g_mlpath) & 0x80 ? + IB_RECV_OPT_GRH_VALID : 0; } - entry->status = IB_WC_SUCCESS; + if (is_error) { + err = handle_error_cqe(dev, cq, *cur_qp, wqe_index, is_send, + (struct mthca_err_cqe *) cqe, + entry, &free_cqe); + } + else + entry->status = IB_WCS_SUCCESS; out: if (likely(free_cqe)) { @@ -589,12 +604,12 @@ static inline int mthca_poll_one(struct mthca_dev *dev, ++(*freed); ++cq->cons_index; } - + HCA_EXIT(HCA_DBG_CQ); return err; } int mthca_poll_cq(struct ib_cq *ibcq, int num_entries, - struct ib_wc *entry) + struct _ib_wc *entry) { struct mthca_dev *dev = to_mdev(ibcq->device); struct mthca_cq *cq = to_mcq(ibcq); @@ -602,8 +617,9 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries, int err = 0; int freed = 0; int npolled; + SPIN_LOCK_PREP(lh); - spin_lock_irqsave(&cq->lock); + spin_lock_irqsave(&cq->lock, &lh); for (npolled = 0; npolled < num_entries; ++npolled) { err = mthca_poll_one(dev, cq, &qp, @@ -617,20 +633,69 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries, update_cons_index(dev, cq, freed); } - spin_unlock_irqrestore(&cq->lock); + spin_unlock_irqrestore(&lh); - return err == 0 || err == -EAGAIN ? npolled : err; + return (err == 0 || err == -EAGAIN) ? npolled : err; } +int mthca_poll_cq_list( + IN struct ib_cq *ibcq, + IN OUT ib_wc_t** const pp_free_wclist, + OUT ib_wc_t** const pp_done_wclist ) +{ + struct mthca_dev *dev = to_mdev(ibcq->device); + struct mthca_cq *cq = to_mcq(ibcq); + struct mthca_qp *qp = NULL; + int err = 0; + int freed = 0; + ib_wc_t *wc_p, **next_pp; + uint32_t wc_cnt = 0; + SPIN_LOCK_PREP(lh); + + HCA_ENTER(HCA_DBG_CQ); + + spin_lock_irqsave(&cq->lock, &lh); + + // loop through CQ + next_pp = pp_done_wclist; + wc_p = *pp_free_wclist; + while( wc_p ) { + // poll one CQE + err = mthca_poll_one(dev, cq, &qp, &freed, wc_p); + if (err) + break; + + // prepare for the next loop + *next_pp = wc_p; + next_pp = &wc_p->p_next; + wc_p = wc_p->p_next; + } + + // prepare the results + *pp_free_wclist = wc_p; /* Set the head of the free list. */ + *next_pp = NULL; /* Clear the tail of the done list. */ + + // update consumer index + if (freed) { + wmb(); + update_cons_index(dev, cq, freed); + } + + spin_unlock_irqrestore(&lh); + HCA_EXIT(HCA_DBG_CQ); + return (err == 0 || err == -EAGAIN)? 0 : err; +} + + int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify) { __be32 doorbell[2]; - doorbell[0] = cpu_to_be32((notify == IB_CQ_SOLICITED ? + doorbell[0] = cl_hton32((notify == IB_CQ_SOLICITED ? MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL : MTHCA_TAVOR_CQ_DB_REQ_NOT) | to_mcq(cq)->cqn); - doorbell[1] = (__force __be32) 0xffffffff; + doorbell[1] = (__be32) 0xffffffff; mthca_write64(doorbell, to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL, @@ -647,10 +712,10 @@ int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) __be32 ci; sn = cq->arm_sn & 3; - ci = cpu_to_be32(cq->cons_index); + ci = cl_hton32(cq->cons_index); doorbell[0] = ci; - doorbell[1] = cpu_to_be32((cq->cqn << 8) | (2 << 5) | (sn << 3) | + doorbell[1] = cl_hton32((cq->cqn << 8) | (2 << 5) | (sn << 3) | (notify == IB_CQ_SOLICITED ? 1 : 2)); mthca_write_db_rec(doorbell, cq->arm_db); @@ -661,7 +726,7 @@ int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) */ wmb(); - doorbell[0] = cpu_to_be32((sn << 28) | + doorbell[0] = cl_hton32((sn << 28) | (notify == IB_CQ_SOLICITED ? MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL : MTHCA_ARBEL_CQ_DB_REQ_NOT) | @@ -691,6 +756,7 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, int err = -ENOMEM; u8 status; int i; + SPIN_LOCK_PREP(lh); might_sleep(); @@ -745,46 +811,46 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, init_waitqueue_head(&cq->wait); RtlZeroMemory(cq_context, sizeof *cq_context); - cq_context->flags = cpu_to_be32(MTHCA_CQ_STATUS_OK | + cq_context->flags = cl_hton32(MTHCA_CQ_STATUS_OK | MTHCA_CQ_STATE_DISARMED | MTHCA_CQ_FLAG_TR); - cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24); + cq_context->logsize_usrpage = cl_hton32((ffs(nent) - 1) << 24); if (ctx) - cq_context->logsize_usrpage |= cpu_to_be32(ctx->uar.index); + cq_context->logsize_usrpage |= cl_hton32(ctx->uar.index); else - cq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index); - cq_context->error_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn); - cq_context->comp_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn); - cq_context->pd = cpu_to_be32(pdn); - cq_context->lkey = cpu_to_be32(cq->mr.ibmr.lkey); - cq_context->cqn = cpu_to_be32(cq->cqn); + cq_context->logsize_usrpage |= cl_hton32(dev->driver_uar.index); + cq_context->error_eqn = cl_hton32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn); + cq_context->comp_eqn = cl_hton32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn); + cq_context->pd = cl_hton32(pdn); + cq_context->lkey = cl_hton32(cq->mr.ibmr.lkey); + cq_context->cqn = cl_hton32(cq->cqn); if (mthca_is_memfree(dev)) { - cq_context->ci_db = cpu_to_be32(cq->set_ci_db_index); - cq_context->state_db = cpu_to_be32(cq->arm_db_index); + cq_context->ci_db = cl_hton32(cq->set_ci_db_index); + cq_context->state_db = cl_hton32(cq->arm_db_index); } err = mthca_SW2HW_CQ(dev, mailbox, cq->cqn, &status); if (err) { - mthca_warn(dev, "SW2HW_CQ failed (%d)\n", err); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("SW2HW_CQ failed (%d)\n", err)); goto err_out_free_mr; } if (status) { - mthca_warn(dev, "SW2HW_CQ returned status 0x%02x\n", - status); + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_LOW,("SW2HW_CQ returned status 0x%02x\n", + status)); err = -EINVAL; goto err_out_free_mr; } - spin_lock_irq(&dev->cq_table.lock); + spin_lock_irq(&dev->cq_table.lock, &lh); if (mthca_array_set(&dev->cq_table.cq, cq->cqn & (dev->limits.num_cqs - 1), cq)) { - spin_unlock_irq(&dev->cq_table.lock); + spin_unlock_irq(&lh); goto err_out_free_mr; } - spin_unlock_irq(&dev->cq_table.lock); + spin_unlock_irq(&lh); cq->cons_index = 0; @@ -822,36 +888,39 @@ void mthca_free_cq(struct mthca_dev *dev, struct mthca_mailbox *mailbox; int err; u8 status; + SPIN_LOCK_PREP(lh); might_sleep(); mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); if (IS_ERR(mailbox)) { - mthca_warn(dev, "No memory for mailbox to free CQ.\n"); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("No memory for mailbox to free CQ.\n")); return; } err = mthca_HW2SW_CQ(dev, mailbox, cq->cqn, &status); - if (err) - mthca_warn(dev, "HW2SW_CQ failed (%d)\n", err); - else if (status) - mthca_warn(dev, "HW2SW_CQ returned status 0x%02x\n", status); + if (err){ + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("HW2SW_CQ failed (%d)\n", err)); + } + else if (status){ + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("HW2SW_CQ returned status 0x%02x\n", status)); + } if (0) { __be32 *ctx = mailbox->buf; int j; - printk(KERN_ERROR "context for CQN %x (cons index %x, next sw %d)\n", + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("context for CQN %x (cons index %x, next sw %d)\n", cq->cqn, cq->cons_index, - cq->is_kernel ? !!next_cqe_sw(cq) : 0); + cq->is_kernel ? !!next_cqe_sw(cq) : 0)); for (j = 0; j < 16; ++j) - printk(KERN_ERROR "[%2x] %08x\n", j * 4, be32_to_cpu(ctx[j])); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("[%2x] %08x\n", j * 4, cl_ntoh32(ctx[j]))); } - spin_lock_irq(&dev->cq_table.lock); + spin_lock_irq(&dev->cq_table.lock, &lh); mthca_array_clear(&dev->cq_table.cq, cq->cqn & (dev->limits.num_cqs - 1)); - spin_unlock_irq(&dev->cq_table.lock); + spin_unlock_irq(&lh); /* wait for all RUNNING DPCs on that EQ to complete */ { @@ -879,7 +948,7 @@ void mthca_free_cq(struct mthca_dev *dev, mthca_free_mailbox(dev, mailbox); } -int __devinit mthca_init_cq_table(struct mthca_dev *dev) +int mthca_init_cq_table(struct mthca_dev *dev) { int err; @@ -900,8 +969,10 @@ int __devinit mthca_init_cq_table(struct mthca_dev *dev) return err; } -void __devexit mthca_cleanup_cq_table(struct mthca_dev *dev) +void mthca_cleanup_cq_table(struct mthca_dev *dev) { mthca_array_cleanup(&dev->cq_table.cq, dev->limits.num_cqs); mthca_alloc_cleanup(&dev->cq_table.alloc); } + + diff --git a/branches/MTHCA/hw/mthca/kernel/mthca_dev.h b/branches/MTHCA/hw/mthca/kernel/mthca_dev.h index adf0f84e..73ad3dfb 100644 --- a/branches/MTHCA/hw/mthca/kernel/mthca_dev.h +++ b/branches/MTHCA/hw/mthca/kernel/mthca_dev.h @@ -40,13 +40,17 @@ #define MTHCA_DEV_H #include "hca_driver.h" +#include "mthca_log.h" #include "mthca_provider.h" #include "mthca_doorbell.h" -#define DRV_NAME "ib_mthca" +// must be synchronized with MTHCA.INF +#define DRV_NAME "mthca" #define PFX DRV_NAME ": " -#define DRV_VERSION "1.30" -#define DRV_RELDATE "Sep 10, 2005" +#define DRV_VERSION "1.0.4" +#define DRV_RELDATE "03/01/2006" + +#define HZ 1000000 /* 1 sec in usecs */ enum { MTHCA_FLAG_DDR_HIDDEN = 1 << 1, @@ -78,6 +82,8 @@ enum { /* Arbel FW gives us these, but we need them for Tavor */ MTHCA_MPT_ENTRY_SIZE = 0x40, MTHCA_MTT_SEG_SIZE = 0x40, + + MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2) }; enum { @@ -87,7 +93,7 @@ enum { MTHCA_NUM_EQ }; -enum { +enum mthca_wr_opcode{ MTHCA_OPCODE_NOP = 0x00, MTHCA_OPCODE_RDMA_WRITE = 0x08, MTHCA_OPCODE_RDMA_WRITE_IMM = 0x09, @@ -123,12 +129,17 @@ struct mthca_limits { int num_uars; int max_sg; int num_qps; + int max_wqes; + int max_desc_sz; + int max_qp_init_rdma; int reserved_qps; int num_srqs; int reserved_srqs; + int max_srq_wqes; int num_eecs; int reserved_eecs; int num_cqs; + int max_cqes; int reserved_cqs; int num_eqs; int reserved_eqs; @@ -143,6 +154,9 @@ struct mthca_limits { int reserved_mcgs; int num_pds; int reserved_pds; + u32 page_size_cap; + u32 flags; + u8 port_width_cap; }; struct mthca_alloc { @@ -201,8 +215,7 @@ struct mthca_eq_table { u32 arm_mask; struct mthca_eq eq[MTHCA_NUM_EQ]; u64 icm_virt; - void *icm_page; - dma_addr_t icm_dma; + struct scatterlist sg; int have_irq; u8 inta_pin; KLOCK_QUEUE_HANDLE lockh; @@ -249,6 +262,17 @@ struct mthca_mcg_table { struct mthca_icm_table *table; }; +struct mthca_catas_err { + u64 addr; + u32 __iomem *map; + SIZE_T map_size; + unsigned long stop; + u32 size; + KTIMER timer; + KDPC timer_dpc; + LARGE_INTEGER interval; +}; + struct mthca_dev { struct ib_device ib_dev; hca_dev_ext_t *ext; @@ -314,7 +338,7 @@ struct mthca_dev { struct mthca_qp_table qp_table; struct mthca_av_table av_table; struct mthca_mcg_table mcg_table; - + struct mthca_catas_err catas_err; struct mthca_uar driver_uar; struct mthca_db_table *db_tab; struct mthca_pd driver_pd; @@ -332,12 +356,12 @@ enum { MTHCA_DEV_INITIALIZED, MTHCA_DEV_FAILED }; - -#define mthca_dbg _mthca_dbg -#define mthca_err _mthca_err -#define mthca_info _mthca_info -#define mthca_warn _mthca_warn +enum { + MTHCA_CQ_ENTRY_SIZE = 0x20 +}; + + #define MTHCA_GET(dest, source, offset) \ do { \ @@ -345,9 +369,9 @@ enum { void *__q = &(dest); \ switch (sizeof (dest)) { \ case 1: *(u8 *)__q = *(u8 *) __p; break; \ - case 2: *(u16 *)__q = (u16)be16_to_cpup((u16 *)__p); break; \ - case 4: *(u32 *)__q = (u32)be32_to_cpup((u32 *)__p); break; \ - case 8: *(u64 *)__q = (u64)be64_to_cpup((u64 *)__p); break; \ + case 2: *(u16 *)__q = (u16)cl_ntoh16(*(u16 *)__p); break; \ + case 4: *(u32 *)__q = (u32)cl_ntoh32(*(u32 *)__p); break; \ + case 8: *(u64 *)__q = (u64)cl_ntoh64(*(u64 *)__p); break; \ default: ASSERT(0); \ } \ } while (0) @@ -358,9 +382,9 @@ enum { void *__d = ((char *) (dest) + (offset)); \ switch (sizeof(source)) { \ case 1: *(u8 *) __d = (u8)(source); break; \ - case 2: *(__be16 *) __d = cpu_to_be16((u16)source); break; \ - case 4: *(__be32 *) __d = cpu_to_be32((u32)source); break; \ - case 8: *(__be64 *) __d = cpu_to_be64((u64)source); break; \ + case 2: *(__be16 *) __d = cl_hton16((u16)source); break; \ + case 4: *(__be32 *) __d = cl_hton32((u32)source); break; \ + case 8: *(__be64 *) __d = cl_hton64((u64)source); break; \ default: ASSERT(0); \ } \ } while (0) @@ -406,6 +430,9 @@ void mthca_cleanup_mcg_table(struct mthca_dev *dev); int mthca_register_device(struct mthca_dev *dev); void mthca_unregister_device(struct mthca_dev *dev); +void mthca_start_catas_poll(struct mthca_dev *dev); +void mthca_stop_catas_poll(struct mthca_dev *dev); + int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar); void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar); @@ -417,17 +444,17 @@ void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt); int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, int start_index, u64 *buffer_list, int list_len); int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, - u64 iova, u64 total_size, u32 access, struct mthca_mr *mr); + u64 iova, u64 total_size, mthca_mpt_access_t access, struct mthca_mr *mr); int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd, - u32 access, struct mthca_mr *mr); + mthca_mpt_access_t access, struct mthca_mr *mr); int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd, u64 *buffer_list, int buffer_size_shift, int list_len, u64 iova, u64 total_size, - u32 access, struct mthca_mr *mr); + mthca_mpt_access_t access, struct mthca_mr *mr); void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr); int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, - u32 access, struct mthca_fmr *fmr); + mthca_mpt_access_t access, struct mthca_fmr *fmr); int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int list_len, u64 iova); void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr); @@ -440,7 +467,7 @@ int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt); void mthca_unmap_eq_icm(struct mthca_dev *dev); int mthca_poll_cq(struct ib_cq *ibcq, int num_entries, - struct ib_wc *entry); + struct _ib_wc *entry); int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify); int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify); int mthca_init_cq(struct mthca_dev *dev, int nent, @@ -457,25 +484,27 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, struct ib_srq_attr *attr, struct mthca_srq *srq); void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq); +int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask); void mthca_srq_event(struct mthca_dev *dev, u32 srqn, enum ib_event_type event_type); void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr); -int mthca_tavor_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); -int mthca_arbel_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int mthca_tavor_post_srq_recv(struct ib_srq *srq, struct _ib_recv_wr *wr, + struct _ib_recv_wr **bad_wr); +int mthca_arbel_post_srq_recv(struct ib_srq *srq, struct _ib_recv_wr *wr, + struct _ib_recv_wr **bad_wr); void mthca_qp_event(struct mthca_dev *dev, u32 qpn, enum ib_event_type event_type); int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask); -int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); -int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int mthca_tavor_post_send(struct ib_qp *ibqp, struct _ib_send_wr *wr, + struct _ib_send_wr **bad_wr); +int mthca_tavor_post_receive(struct ib_qp *ibqp, struct _ib_recv_wr *wr, + struct _ib_recv_wr **bad_wr); +int mthca_arbel_post_send(struct ib_qp *ibqp, struct _ib_send_wr *wr, + struct _ib_send_wr **bad_wr); +int mthca_arbel_post_receive(struct ib_qp *ibqp, struct _ib_recv_wr *wr, + struct _ib_recv_wr **bad_wr); int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, int index, int *dbd, __be32 *new_wqe); int mthca_alloc_qp(struct mthca_dev *dev, @@ -510,7 +539,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); int mthca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, + struct _ib_wc *in_wc, struct ib_grh *in_grh, struct ib_mad *in_mad, struct ib_mad *out_mad); @@ -527,4 +556,37 @@ static inline int mthca_is_memfree(struct mthca_dev *dev) return dev->mthca_flags & MTHCA_FLAG_MEMFREE; } +void mthca_get_av_params( struct mthca_ah *ah_p, u8 *port_num, __be16 *dlid, u8 *sr, u8 *path_bits ); + +void mthca_set_av_params( struct mthca_dev *dev, struct mthca_ah *ah_p, struct ib_ah_attr *ah_attr ); + +int ib_uverbs_init(void); +void ib_uverbs_cleanup(void); +int mthca_ah_grh_present(struct mthca_ah *ah); + + + + + +VOID +WriteEventLogEntry( + PVOID pi_pIoObject, + ULONG pi_ErrorCode, + ULONG pi_UniqueErrorCode, + ULONG pi_FinalStatus, + ULONG pi_nDataItems, + ... + ); + +VOID +WriteEventLogEntryStr( + PVOID pi_pIoObject, + ULONG pi_ErrorCode, + ULONG pi_UniqueErrorCode, + ULONG pi_FinalStatus, + PWCHAR pi_InsertionStr, + ULONG pi_nDataItems, + ... + ); + #endif /* MTHCA_DEV_H */ diff --git a/branches/MTHCA/hw/mthca/kernel/mthca_doorbell.h b/branches/MTHCA/hw/mthca/kernel/mthca_doorbell.h index 762dd1e0..e7a83f3e 100644 --- a/branches/MTHCA/hw/mthca/kernel/mthca_doorbell.h +++ b/branches/MTHCA/hw/mthca/kernel/mthca_doorbell.h @@ -53,7 +53,7 @@ static inline void mthca_write64_raw(__be64 val, void __iomem *dest) { - __raw_writeq((__force u64) val, dest); + __raw_writeq((u64) val, dest); } static inline void mthca_write64(__be32 val[2], void __iomem *dest, @@ -81,17 +81,18 @@ static inline void mthca_write_db_rec(__be32 val[2], __be32 *db) static inline void mthca_write64_raw(__be64 val, void __iomem *dest) { - __raw_writel(((__force u32 *) &val)[0], dest); - __raw_writel(((__force u32 *) &val)[1], (u8*)dest + 4); + __raw_writel(((u32 *) &val)[0], dest); + __raw_writel(((u32 *) &val)[1], (u8*)dest + 4); } static inline void mthca_write64(__be32 val[2], void __iomem *dest, spinlock_t *doorbell_lock) { - spin_lock_irqsave(doorbell_lock); - __raw_writel((__force u32) val[0], dest); - __raw_writel((__force u32) val[1], (u8*)dest + 4); - spin_unlock_irqrestore(doorbell_lock); + SPIN_LOCK_PREP(lh); + spin_lock_irqsave(doorbell_lock, &lh); + __raw_writel((u32) val[0], dest); + __raw_writel((u32) val[1], (u8*)dest + 4); + spin_unlock_irqrestore(&lh); } static inline void mthca_write_db_rec(__be32 val[2], __be32 *db) diff --git a/branches/MTHCA/hw/mthca/kernel/mthca_eq.c b/branches/MTHCA/hw/mthca/kernel/mthca_eq.c index a1bed5d2..b5ea667e 100644 --- a/branches/MTHCA/hw/mthca/kernel/mthca_eq.c +++ b/branches/MTHCA/hw/mthca/kernel/mthca_eq.c @@ -34,12 +34,42 @@ */ #include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_eq.tmh" +#endif #include "mthca_cmd.h" #include "mthca_config_reg.h" +static int mthca_map_reg(struct mthca_dev *dev, + u64 offset, unsigned long size, + void __iomem **map, SIZE_T *map_size); +static int mthca_map_eq_regs(struct mthca_dev *dev); +static void mthca_unmap_eq_regs(struct mthca_dev *dev); +static int mthca_create_eq(struct mthca_dev *dev, + int nent, + u8 intr, + struct mthca_eq *eq); + + + +#ifdef ALLOC_PRAGMA +#pragma alloc_text (PAGE, mthca_map_reg) +#pragma alloc_text (PAGE, mthca_map_eq_regs) +#pragma alloc_text (PAGE, mthca_init_eq_table) +#pragma alloc_text (PAGE, mthca_unmap_eq_regs) +#pragma alloc_text (PAGE, mthca_map_eq_icm) +#pragma alloc_text (PAGE, mthca_unmap_eq_icm) +#pragma alloc_text (PAGE, mthca_create_eq) +#pragma alloc_text (PAGE, mthca_cleanup_eq_table) +#endif + enum { MTHCA_NUM_ASYNC_EQE = 0x80, MTHCA_NUM_CMD_EQE = 0x80, + MTHCA_NUM_SPARE_EQE = 0x80, MTHCA_EQ_ENTRY_SIZE = 0x20 }; @@ -60,7 +90,7 @@ struct mthca_eq_context { __be32 consumer_index; __be32 producer_index; u32 reserved3[4]; -} __attribute__((packed)); +}; #pragma pack(pop) #define MTHCA_EQ_STATUS_OK ( 0 << 28) @@ -76,23 +106,24 @@ struct mthca_eq_context { #define MTHCA_EQ_STATE_ARBEL ( 8 << 8) enum { - MTHCA_EVENT_TYPE_COMP = 0x00, - MTHCA_EVENT_TYPE_PATH_MIG = 0x01, - MTHCA_EVENT_TYPE_COMM_EST = 0x02, - MTHCA_EVENT_TYPE_SQ_DRAINED = 0x03, - MTHCA_EVENT_TYPE_SRQ_LAST_WQE = 0x13, - MTHCA_EVENT_TYPE_CQ_ERROR = 0x04, - MTHCA_EVENT_TYPE_WQ_CATAS_ERROR = 0x05, - MTHCA_EVENT_TYPE_EEC_CATAS_ERROR = 0x06, - MTHCA_EVENT_TYPE_PATH_MIG_FAILED = 0x07, + MTHCA_EVENT_TYPE_COMP = 0x00, + MTHCA_EVENT_TYPE_PATH_MIG = 0x01, + MTHCA_EVENT_TYPE_COMM_EST = 0x02, + MTHCA_EVENT_TYPE_SQ_DRAINED = 0x03, + MTHCA_EVENT_TYPE_CQ_ERROR = 0x04, + MTHCA_EVENT_TYPE_WQ_CATAS_ERROR = 0x05, + MTHCA_EVENT_TYPE_EEC_CATAS_ERROR = 0x06, + MTHCA_EVENT_TYPE_PATH_MIG_FAILED = 0x07, + MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR = 0x08, + MTHCA_EVENT_TYPE_PORT_CHANGE = 0x09, + MTHCA_EVENT_TYPE_CMD = 0x0a, MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10, - MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR = 0x11, - MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR = 0x12, - MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR = 0x08, - MTHCA_EVENT_TYPE_PORT_CHANGE = 0x09, - MTHCA_EVENT_TYPE_EQ_OVERFLOW = 0x0f, - MTHCA_EVENT_TYPE_ECC_DETECT = 0x0e, - MTHCA_EVENT_TYPE_CMD = 0x0a + MTHCA_EVENT_TYPE_ECC_DETECT = 0x0e, + MTHCA_EVENT_TYPE_EQ_OVERFLOW = 0x0f, + MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR = 0x11, + MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR = 0x12, + MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE = 0x13, + MTHCA_EVENT_TYPE_SRQ_LIMIT = 0x14 }; #define MTHCA_ASYNC_EVENT_MASK ((1ULL << MTHCA_EVENT_TYPE_PATH_MIG) | \ @@ -107,8 +138,10 @@ enum { (1ULL << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR) | \ (1ULL << MTHCA_EVENT_TYPE_PORT_CHANGE) | \ (1ULL << MTHCA_EVENT_TYPE_ECC_DETECT)) -#define MTHCA_SRQ_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \ - (1ULL << MTHCA_EVENT_TYPE_SRQ_LAST_WQE) +#define MTHCA_SRQ_EVENT_MASK ((1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \ + (1ULL << MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE) | \ + (1ULL << MTHCA_EVENT_TYPE_SRQ_LIMIT)) + #define MTHCA_CMD_EVENT_MASK (1ULL << MTHCA_EVENT_TYPE_CMD) #define MTHCA_EQ_DB_INC_CI (1 << 24) @@ -127,7 +160,7 @@ struct mthca_eqe { u32 raw[6]; struct { __be32 cqn; - } __attribute__((packed)) comp; + } comp; struct { u16 reserved1; __be16 token; @@ -135,24 +168,27 @@ struct mthca_eqe { u8 reserved3[3]; u8 status; __be64 out_param; - } __attribute__((packed)) cmd; + } cmd; struct { __be32 qpn; - } __attribute__((packed)) qp; + } qp; + struct { + __be32 srqn; + } srq; struct { __be32 cqn; u32 reserved1; u8 reserved2[3]; u8 syndrome; - } __attribute__((packed)) cq_err; + } cq_err; struct { u32 reserved1[2]; __be32 port; - } __attribute__((packed)) port_change; + } port_change; } event; u8 reserved3[3]; u8 owner; -} __attribute__((packed)); +} ; #pragma pack(pop) #define MTHCA_EQ_ENTRY_OWNER_SW (0 << 7) @@ -169,8 +205,8 @@ static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u { __be32 doorbell[2]; - doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_SET_CI | eq->eqn); - doorbell[1] = cpu_to_be32(ci & (eq->nent - 1)); + doorbell[0] = cl_hton32(MTHCA_EQ_DB_SET_CI | eq->eqn); + doorbell[1] = cl_hton32(ci & (eq->nent - 1)); /* * This barrier makes sure that all updates to ownership bits @@ -190,7 +226,7 @@ static inline void arbel_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u { /* See comment in tavor_set_eq_ci() above. */ wmb(); - __raw_writel((__force u32) cpu_to_be32(ci), + __raw_writel((u32) cl_hton32(ci), (u8*)dev->eq_regs.arbel.eq_set_ci_base + eq->eqn * 8); /* We still want ordering, just not swabbing, so add a barrier */ mb(); @@ -208,7 +244,7 @@ static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn) { __be32 doorbell[2]; - doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_REQ_NOT | eqn); + doorbell[0] = cl_hton32(MTHCA_EQ_DB_REQ_NOT | eqn); doorbell[1] = 0; mthca_write64(doorbell, @@ -226,8 +262,8 @@ static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn) if (!mthca_is_memfree(dev)) { __be32 doorbell[2]; - doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_DISARM_CQ | eqn); - doorbell[1] = cpu_to_be32(cqn); + doorbell[0] = cl_hton32(MTHCA_EQ_DB_DISARM_CQ | eqn); + doorbell[1] = cl_hton32(cqn); mthca_write64(doorbell, dev->kar + MTHCA_EQ_DOORBELL, @@ -238,7 +274,7 @@ static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn) static inline struct mthca_eqe *get_eqe(struct mthca_eq *eq, u32 entry) { unsigned long off = (entry & (eq->nent - 1)) * MTHCA_EQ_ENTRY_SIZE; - return (struct mthca_eqe *)(eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE); + return (struct mthca_eqe *)((u8*)eq->page_list[off / PAGE_SIZE].page + off % PAGE_SIZE); } static inline struct mthca_eqe* next_eqe_sw(struct mthca_eq *eq) @@ -257,8 +293,8 @@ static void port_change(struct mthca_dev *dev, int port, int active) { struct ib_event record; - mthca_dbg(dev, "Port change to %s for port %d\n", - active ? "active" : "down", port); + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Port change to %s for port %d\n", + active ? "active" : "down", port)); record.device = &dev->ib_dev; record.event = active ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; @@ -274,9 +310,9 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq) struct mthca_eqe *eqe; int disarm_cqn; int eqes_found = 0; + int set_ci = 0; while ((eqe = next_eqe_sw(eq))) { - int set_ci = 0; /* * Make sure we read EQ entry contents after we've @@ -286,76 +322,80 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq) switch (eqe->type) { case MTHCA_EVENT_TYPE_COMP: - disarm_cqn = be32_to_cpu(eqe->event.comp.cqn) & 0xffffff; + disarm_cqn = cl_ntoh32(eqe->event.comp.cqn) & 0xffffff; disarm_cq(dev, eq->eqn, disarm_cqn); mthca_cq_completion(dev, disarm_cqn); break; case MTHCA_EVENT_TYPE_PATH_MIG: - mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, + mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, IB_EVENT_PATH_MIG); break; case MTHCA_EVENT_TYPE_COMM_EST: - mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, + mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, IB_EVENT_COMM_EST); break; case MTHCA_EVENT_TYPE_SQ_DRAINED: - mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, + mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, IB_EVENT_SQ_DRAINED); break; + case MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE: + mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_QP_LAST_WQE_REACHED); + break; + + case MTHCA_EVENT_TYPE_SRQ_LIMIT: + mthca_srq_event(dev, cl_ntoh32(eqe->event.srq.srqn) & 0xffffff, + IB_EVENT_SRQ_LIMIT_REACHED); + break; + case MTHCA_EVENT_TYPE_WQ_CATAS_ERROR: - mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, + mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, IB_EVENT_QP_FATAL); break; case MTHCA_EVENT_TYPE_PATH_MIG_FAILED: - mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, + mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, IB_EVENT_PATH_MIG_ERR); break; case MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR: - mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, + mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, IB_EVENT_QP_REQ_ERR); break; case MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR: - mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff, + mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, IB_EVENT_QP_ACCESS_ERR); break; case MTHCA_EVENT_TYPE_CMD: mthca_cmd_event(dev, - be16_to_cpu(eqe->event.cmd.token), + cl_ntoh16(eqe->event.cmd.token), eqe->event.cmd.status, - be64_to_cpu(eqe->event.cmd.out_param)); - /* - * cmd_event() may add more commands. - * The card will think the queue has overflowed if - * we don't tell it we've been processing events. - */ - set_ci = 1; + cl_ntoh64(eqe->event.cmd.out_param)); break; case MTHCA_EVENT_TYPE_PORT_CHANGE: port_change(dev, - (be32_to_cpu(eqe->event.port_change.port) >> 28) & 3, + (cl_ntoh32(eqe->event.port_change.port) >> 28) & 3, eqe->subtype == 0x4); break; case MTHCA_EVENT_TYPE_CQ_ERROR: - mthca_warn(dev, "CQ %s on CQN %06x\n", + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_LOW, ("CQ %s on CQN %06x (syndrome %d)\n", eqe->event.cq_err.syndrome == 1 ? "overrun" : "access violation", - be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff); - mthca_cq_event(dev, be32_to_cpu(eqe->event.cq_err.cqn), + cl_ntoh32(eqe->event.cq_err.cqn) & 0xffffff, eqe->event.cq_err.syndrome)); + mthca_cq_event(dev, cl_ntoh32(eqe->event.cq_err.cqn), IB_EVENT_CQ_ERR); break; case MTHCA_EVENT_TYPE_EQ_OVERFLOW: - mthca_warn(dev, "EQ overrun on EQN %d\n", eq->eqn); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("EQ overrun on EQN %d\n", eq->eqn)); break; case MTHCA_EVENT_TYPE_EEC_CATAS_ERROR: @@ -363,16 +403,24 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq) case MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR: case MTHCA_EVENT_TYPE_ECC_DETECT: default: - mthca_warn(dev, "Unhandled event %02x(%02x) on EQ %d\n", - eqe->type, eqe->subtype, eq->eqn); + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_LOW, ("Unhandled event %02x(%02x) on EQ %d\n", + eqe->type, eqe->subtype, eq->eqn)); break; }; set_eqe_hw(eqe); ++eq->cons_index; eqes_found = 1; + ++set_ci; - if (unlikely(set_ci)) { + /* + * The HCA will think the queue has overflowed if we + * don't tell it we've been processing events. We + * create our EQs with MTHCA_NUM_SPARE_EQE extra + * entries, so we must update our consumer index at + * least that often. + */ + if (unlikely(set_ci >= MTHCA_NUM_SPARE_EQE)) { /* * Conditional on hca_type is OK here because * this is a rare case, not the fast path. @@ -394,8 +442,9 @@ static void mthca_tavor_dpc( PRKDPC dpc, { struct mthca_eq *eq = ctx; struct mthca_dev *dev = eq->dev; + SPIN_LOCK_PREP(lh); - spin_lock_dpc(&eq->lock); + spin_lock_dpc(&eq->lock, &lh); /* we need 'if' in case, when there were scheduled 2 DPC for one EQ */ if (mthca_eq_int(dev, eq)) { @@ -403,7 +452,7 @@ static void mthca_tavor_dpc( PRKDPC dpc, tavor_eq_req_not(dev, eq->eqn); } - spin_unlock_dpc(&eq->lock); + spin_unlock_dpc(&lh); } static BOOLEAN mthca_tavor_interrupt( @@ -419,20 +468,20 @@ static BOOLEAN mthca_tavor_interrupt( writel(dev->eq_table.clr_mask, dev->eq_table.clr_int); ecr = readl((u8*)dev->eq_regs.tavor.ecr_base + 4); - if (ecr) { - writel(ecr, (u8*)dev->eq_regs.tavor.ecr_base + - MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4); + if (!ecr) + return FALSE; - for (i = 0; i < MTHCA_NUM_EQ; ++i) { - if (ecr & dev->eq_table.eq[i].eqn_mask && - next_eqe_sw(&dev->eq_table.eq[i])) { - KeInsertQueueDpc(&dev->eq_table.eq[i].dpc, NULL, NULL); - } - } + writel(ecr, (u8*)dev->eq_regs.tavor.ecr_base + + MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4); + for (i = 0; i < MTHCA_NUM_EQ; ++i) { + if (ecr & dev->eq_table.eq[i].eqn_mask && + next_eqe_sw(&dev->eq_table.eq[i])) { + KeInsertQueueDpc(&dev->eq_table.eq[i].dpc, NULL, NULL); + } } - return (BOOLEAN)ecr; + return TRUE; } #ifdef MSI_SUPPORT @@ -456,15 +505,16 @@ static void mthca_arbel_dpc( PRKDPC dpc, { struct mthca_eq *eq = ctx; struct mthca_dev *dev = eq->dev; + SPIN_LOCK_PREP(lh); - spin_lock_dpc(&eq->lock); + spin_lock_dpc(&eq->lock, &lh); /* we need 'if' in case, when there were scheduled 2 DPC for one EQ */ if (mthca_eq_int(dev, eq)) arbel_set_eq_ci(dev, eq, eq->cons_index); arbel_eq_req_not(dev, eq->eqn_mask); - spin_unlock_dpc(&eq->lock); + spin_unlock_dpc(&lh); } static BOOLEAN mthca_arbel_interrupt( @@ -505,13 +555,12 @@ static irqreturn_t mthca_arbel_msi_x_interrupt(int irq, void *eq_ptr, } #endif -static int __devinit mthca_create_eq(struct mthca_dev *dev, +static int mthca_create_eq(struct mthca_dev *dev, int nent, u8 intr, struct mthca_eq *eq) { - int npages = (nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) / - PAGE_SIZE; + int npages; u64 *dma_list = NULL; dma_addr_t t; struct mthca_mailbox *mailbox; @@ -520,12 +569,9 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, int i; u8 status; - /* Make sure EQ size is aligned to a power of 2 size. */ - for (i = 1; i < nent; i <<= 1) - ; /* nothing */ - nent = i; - - eq->dev = dev; + eq->dev = dev; + eq->nent = roundup_pow_of_two(max(nent, 2)); + npages = ALIGN(eq->nent * MTHCA_EQ_ENTRY_SIZE, PAGE_SIZE) / PAGE_SIZE; eq->page_list = kmalloc(npages * sizeof *eq->page_list, GFP_KERNEL); @@ -533,7 +579,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, goto err_out; for (i = 0; i < npages; ++i) - eq->page_list[i].buf = NULL; + eq->page_list[i].page = NULL; dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); if (!dma_list) @@ -545,18 +591,13 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, eq_context = mailbox->buf; for (i = 0; i < npages; ++i) { - eq->page_list[i].buf = dma_alloc_coherent(dev, - PAGE_SIZE, &t, GFP_KERNEL); - if (!eq->page_list[i].buf) + alloc_dma_zmem_map(dev, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL, &eq->page_list[i]); + if (!eq->page_list[i].page) goto err_out_free_pages; - - dma_list[i] = t; - eq->page_list[i].mapping = t; - - RtlZeroMemory(eq->page_list[i].buf, PAGE_SIZE); + dma_list[i] = eq->page_list[i].dma_address; } - for (i = 0; i < nent; ++i) + for (i = 0; i < eq->nent; ++i) set_eqe_hw(get_eqe(eq, i)); eq->eqn = mthca_alloc(&dev->eq_table.alloc); @@ -572,34 +613,32 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, if (err) goto err_out_free_eq; - eq->nent = nent; - RtlZeroMemory(eq_context, sizeof *eq_context); - eq_context->flags = cpu_to_be32(MTHCA_EQ_STATUS_OK | + eq_context->flags = cl_hton32(MTHCA_EQ_STATUS_OK | MTHCA_EQ_OWNER_HW | MTHCA_EQ_STATE_ARMED | MTHCA_EQ_FLAG_TR); if (mthca_is_memfree(dev)) - eq_context->flags |= cpu_to_be32(MTHCA_EQ_STATE_ARBEL); + eq_context->flags |= cl_hton32(MTHCA_EQ_STATE_ARBEL); - eq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24); + eq_context->logsize_usrpage = cl_hton32((ffs(eq->nent) - 1) << 24); if (mthca_is_memfree(dev)) { - eq_context->arbel_pd = cpu_to_be32(dev->driver_pd.pd_num); + eq_context->arbel_pd = cl_hton32(dev->driver_pd.pd_num); } else { - eq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index); - eq_context->tavor_pd = cpu_to_be32(dev->driver_pd.pd_num); + eq_context->logsize_usrpage |= cl_hton32(dev->driver_uar.index); + eq_context->tavor_pd = cl_hton32(dev->driver_pd.pd_num); } eq_context->intr = intr; - eq_context->lkey = cpu_to_be32(eq->mr.ibmr.lkey); + eq_context->lkey = cl_hton32(eq->mr.ibmr.lkey); err = mthca_SW2HW_EQ(dev, mailbox, eq->eqn, &status); if (err) { - mthca_warn(dev, "SW2HW_EQ failed (%d)\n", err); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("SW2HW_EQ failed (%d)\n", err)); goto err_out_free_mr; } if (status) { - mthca_warn(dev, "SW2HW_EQ returned status 0x%02x\n", - status); + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_LOW,("SW2HW_EQ returned status 0x%02x\n", + status)); err = -EINVAL; goto err_out_free_mr; } @@ -607,13 +646,13 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, kfree(dma_list); mthca_free_mailbox(dev, mailbox); - eq->eqn_mask = swab32(1 << eq->eqn); + eq->eqn_mask = _byteswap_ulong(1 << eq->eqn); eq->cons_index = 0; dev->eq_table.arm_mask |= eq->eqn_mask; - mthca_dbg(dev, "Allocated EQ %d with %d entries\n", - eq->eqn, nent); + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Allocated EQ %d with %d entries\n", + eq->eqn, eq->nent)); return err; @@ -625,10 +664,8 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, err_out_free_pages: for (i = 0; i < npages; ++i) { - if (eq->page_list[i].buf) { - dma_free_coherent(dev, PAGE_SIZE, - eq->page_list[i].buf, - eq->page_list[i].mapping); + if (eq->page_list[i].page) { + free_dma_mem_map(dev, &eq->page_list[i], PCI_DMA_BIDIRECTIONAL); } } mthca_free_mailbox(dev, mailbox); @@ -657,28 +694,26 @@ static void mthca_free_eq(struct mthca_dev *dev, err = mthca_HW2SW_EQ(dev, mailbox, eq->eqn, &status); if (err) - mthca_warn(dev, "HW2SW_EQ failed (%d)\n", err); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("HW2SW_EQ failed (%d)\n", err)); if (status) - mthca_warn(dev, "HW2SW_EQ returned status 0x%02x\n", status); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("HW2SW_EQ returned status 0x%02x\n", status)); dev->eq_table.arm_mask &= ~eq->eqn_mask; if (0) { - mthca_dbg(dev, "Dumping EQ context %02x:\n", eq->eqn); + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("Dumping EQ context %02x:\n", eq->eqn)); for (i = 0; i < sizeof (struct mthca_eq_context) / 4; ++i) { if (i % 4 == 0) - printk("[%02x] ", i * 4); - printk(" %08x", be32_to_cpup((u8*)mailbox->buf + i * 4)); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_LOW ,("[%02x] ", i * 4)); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_LOW ,(" %08x", cl_ntoh32(*(u32*)((u8*)mailbox->buf + i * 4)))); if ((i + 1) % 4 == 0) - printk("\n"); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_LOW ,("\n")); } } mthca_free_mr(dev, &eq->mr); for (i = 0; i < npages; ++i) { - pci_free_consistent(dev, PAGE_SIZE, - eq->page_list[i].buf, - eq->page_list[i].mapping); + free_dma_mem_map(dev, &eq->page_list[i], PCI_DMA_BIDIRECTIONAL); } kfree(eq->page_list); @@ -699,7 +734,7 @@ static void mthca_free_irqs(struct mthca_dev *dev) #endif } -static int __devinit mthca_map_reg(struct mthca_dev *dev, +static int mthca_map_reg(struct mthca_dev *dev, u64 offset, unsigned long size, void __iomem **map, SIZE_T *map_size) { @@ -717,7 +752,7 @@ static void mthca_unmap_reg(struct mthca_dev *dev, u64 offset, iounmap(map, map_size); } -static int __devinit mthca_map_eq_regs(struct mthca_dev *dev) +static int mthca_map_eq_regs(struct mthca_dev *dev) { u64 mthca_base; @@ -734,8 +769,8 @@ static int __devinit mthca_map_eq_regs(struct mthca_dev *dev) if (mthca_map_reg(dev, (pci_resource_len(dev, 0) - 1) & dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, &dev->clr_base, &dev->clr_base_size)) { - mthca_err(dev, "Couldn't map interrupt clear register, " - "aborting.\n"); + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Couldn't map interrupt clear register, " + "aborting.\n")); return -ENOMEM; } @@ -746,7 +781,7 @@ static int __devinit mthca_map_eq_regs(struct mthca_dev *dev) if (mthca_map_reg(dev, ((pci_resource_len(dev, 0) - 1) & dev->fw.arbel.eq_arm_base) + 4, 4, &dev->eq_regs.arbel.eq_arm, &dev->eq_regs.arbel.eq_arm_size)) { - mthca_err(dev, "Couldn't map EQ arm register, aborting.\n"); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Couldn't map EQ arm register, aborting.\n")); mthca_unmap_reg(dev, (pci_resource_len(dev, 0) - 1) & dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, dev->clr_base, dev->clr_base_size); @@ -759,7 +794,7 @@ static int __devinit mthca_map_eq_regs(struct mthca_dev *dev) &dev->eq_regs.arbel.eq_set_ci_base, &dev->eq_regs.arbel.eq_set_ci_base_size )) { - mthca_err(dev, "Couldn't map EQ CI register, aborting.\n"); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Couldn't map EQ CI register, aborting.\n")); mthca_unmap_reg(dev, ((pci_resource_len(dev, 0) - 1) & dev->fw.arbel.eq_arm_base) + 4, 4, dev->eq_regs.arbel.eq_arm, dev->eq_regs.arbel.eq_arm_size); @@ -771,16 +806,16 @@ static int __devinit mthca_map_eq_regs(struct mthca_dev *dev) } else { if (mthca_map_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE, &dev->clr_base, &dev->clr_base_size)) { - mthca_err(dev, "Couldn't map interrupt clear register, " - "aborting.\n"); + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Couldn't map interrupt clear register, " + "aborting.\n")); return -ENOMEM; } if (mthca_map_reg(dev, MTHCA_ECR_BASE, MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE, &dev->eq_regs.tavor.ecr_base, &dev->eq_regs.tavor.ecr_base_size)) { - mthca_err(dev, "Couldn't map ecr register, " - "aborting.\n"); + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Couldn't map ecr register, " + "aborting.\n")); mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE, dev->clr_base, dev->clr_base_size); return -ENOMEM; @@ -791,7 +826,7 @@ static int __devinit mthca_map_eq_regs(struct mthca_dev *dev) } -static void __devexit mthca_unmap_eq_regs(struct mthca_dev *dev) +static void mthca_unmap_eq_regs(struct mthca_dev *dev) { if (mthca_is_memfree(dev)) { mthca_unmap_reg(dev, (pci_resource_len(dev, 0) - 1) & @@ -816,7 +851,7 @@ static void __devexit mthca_unmap_eq_regs(struct mthca_dev *dev) } } -int __devinit mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt) +int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt) { int ret; u8 status; @@ -828,29 +863,28 @@ int __devinit mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt) * memory, or 1 KB total. */ dev->eq_table.icm_virt = icm_virt; - dev->eq_table.icm_page = alloc_pages(dev,0,&dev->eq_table.icm_dma); - if (!dev->eq_table.icm_page) + alloc_dma_zmem_map(dev,PAGE_SIZE, PCI_DMA_BIDIRECTIONAL, &dev->eq_table.sg); + if (!dev->eq_table.sg.page) return -ENOMEM; - ret = mthca_MAP_ICM_page(dev, dev->eq_table.icm_dma, icm_virt, &status); + ret = mthca_MAP_ICM_page(dev, dev->eq_table.sg.dma_address, icm_virt, &status); if (!ret && status) ret = -EINVAL; - if (ret) { - free_pages(dev, 0, dev->eq_table.icm_page, dev->eq_table.icm_dma ); - } + if (ret) + free_dma_mem_map(dev, &dev->eq_table.sg, PCI_DMA_BIDIRECTIONAL ); return ret; } -void __devexit mthca_unmap_eq_icm(struct mthca_dev *dev) +void mthca_unmap_eq_icm(struct mthca_dev *dev) { u8 status; mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, PAGE_SIZE / 4096, &status); - free_pages(dev, 0, dev->eq_table.icm_page, dev->eq_table.icm_dma ); + free_dma_mem_map(dev, &dev->eq_table.sg, PCI_DMA_BIDIRECTIONAL ); } -int __devinit mthca_init_eq_table(struct mthca_dev *dev) +int mthca_init_eq_table(struct mthca_dev *dev) { int err; u8 status; @@ -876,7 +910,7 @@ int __devinit mthca_init_eq_table(struct mthca_dev *dev) #endif { dev->eq_table.clr_mask = - swab32(1 << (dev->eq_table.inta_pin & 31)); + _byteswap_ulong(1 << (dev->eq_table.inta_pin & 31)); dev->eq_table.clr_int = dev->clr_base + (dev->eq_table.inta_pin < 32 ? 4 : 0); } @@ -886,19 +920,19 @@ int __devinit mthca_init_eq_table(struct mthca_dev *dev) intr = (dev->mthca_flags & MTHCA_FLAG_MSI) ? 128 : dev->eq_table.inta_pin; - err = mthca_create_eq(dev, dev->limits.num_cqs, + err = mthca_create_eq(dev, dev->limits.num_cqs + MTHCA_NUM_SPARE_EQE, (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 128 : intr, &dev->eq_table.eq[MTHCA_EQ_COMP]); if (err) goto err_out_unmap; - err = mthca_create_eq(dev, MTHCA_NUM_ASYNC_EQE, + err = mthca_create_eq(dev, MTHCA_NUM_ASYNC_EQE + MTHCA_NUM_SPARE_EQE, (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 129 : intr, &dev->eq_table.eq[MTHCA_EQ_ASYNC]); if (err) goto err_out_comp; - err = mthca_create_eq(dev, MTHCA_NUM_CMD_EQE, + err = mthca_create_eq(dev, MTHCA_NUM_CMD_EQE + MTHCA_NUM_SPARE_EQE, (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 130 : intr, &dev->eq_table.eq[MTHCA_EQ_CMD]); if (err) @@ -960,20 +994,19 @@ int __devinit mthca_init_eq_table(struct mthca_dev *dev) err = mthca_MAP_EQ(dev, async_mask(dev), 0, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status); if (err) - mthca_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n", - dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, err); + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_LOW,("MAP_EQ for async EQ %d failed (%d)\n", + dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, err)); if (status) - mthca_warn(dev, "MAP_EQ for async EQ %d returned status 0x%02x\n", - dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, status); - + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_LOW, ("MAP_EQ for async EQ %d returned status 0x%02x\n", + dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, status)); err = mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK, 0, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status); if (err) - mthca_warn(dev, "MAP_EQ for cmd EQ %d failed (%d)\n", - dev->eq_table.eq[MTHCA_EQ_CMD].eqn, err); + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_LOW, ("MAP_EQ for cmd EQ %d failed (%d)\n", + dev->eq_table.eq[MTHCA_EQ_CMD].eqn, err)); if (status) - mthca_warn(dev, "MAP_EQ for cmd EQ %d returned status 0x%02x\n", - dev->eq_table.eq[MTHCA_EQ_CMD].eqn, status); + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_LOW,("MAP_EQ for cmd EQ %d returned status 0x%02x\n", + dev->eq_table.eq[MTHCA_EQ_CMD].eqn, status)); for (i = 0; i < MTHCA_EQ_CMD; ++i) if (mthca_is_memfree(dev)) @@ -1000,7 +1033,7 @@ err_out_free: return err; } -void __devexit mthca_cleanup_eq_table(struct mthca_dev *dev) +void mthca_cleanup_eq_table(struct mthca_dev *dev) { u8 status; int i; @@ -1019,3 +1052,5 @@ void __devexit mthca_cleanup_eq_table(struct mthca_dev *dev) mthca_alloc_cleanup(&dev->eq_table.alloc); } + + diff --git a/branches/MTHCA/hw/mthca/kernel/mthca_log.c b/branches/MTHCA/hw/mthca/kernel/mthca_log.c new file mode 100644 index 00000000..a270828e --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/mthca_log.c @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2005 Mellanox Technologies LTD. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +// Author: Yossi Leybovich + +#include "hca_driver.h" + + +VOID +WriteEventLogEntry( + PVOID pi_pIoObject, + ULONG pi_ErrorCode, + ULONG pi_UniqueErrorCode, + ULONG pi_FinalStatus, + ULONG pi_nDataItems, + ... + ) +/*++ + +Routine Description: + Writes an event log entry to the event log. + +Arguments: + + pi_pIoObject......... The IO object ( driver object or device object ). + pi_ErrorCode......... The error code. + pi_UniqueErrorCode... A specific error code. + pi_FinalStatus....... The final status. + pi_nDataItems........ Number of data items. + . + . data items values + . + +Return Value: + + None . + +--*/ +{ /* WriteEventLogEntry */ + + /* Variable argument list */ + va_list l_Argptr; + /* Pointer to an error log entry */ + PIO_ERROR_LOG_PACKET l_pErrorLogEntry; + + /* Init the variable argument list */ + va_start(l_Argptr, pi_nDataItems); + + /* Allocate an error log entry */ + l_pErrorLogEntry = + (PIO_ERROR_LOG_PACKET)IoAllocateErrorLogEntry( + pi_pIoObject, + (UCHAR)(sizeof(IO_ERROR_LOG_PACKET)+pi_nDataItems*sizeof(ULONG)) + ); + /* Check allocation */ + if ( l_pErrorLogEntry != NULL) + { /* OK */ + + /* Data item index */ + USHORT l_nDataItem ; + + /* Set the error log entry header */ + l_pErrorLogEntry->ErrorCode = pi_ErrorCode; + l_pErrorLogEntry->DumpDataSize = (USHORT) (pi_nDataItems*sizeof(ULONG)); + l_pErrorLogEntry->SequenceNumber = 0; + l_pErrorLogEntry->MajorFunctionCode = 0; + l_pErrorLogEntry->IoControlCode = 0; + l_pErrorLogEntry->RetryCount = 0; + l_pErrorLogEntry->UniqueErrorValue = pi_UniqueErrorCode; + l_pErrorLogEntry->FinalStatus = pi_FinalStatus; + + /* Insert the data items */ + for (l_nDataItem = 0; l_nDataItem < pi_nDataItems; l_nDataItem++) + { /* Inset a data item */ + + /* Current data item */ + int l_CurDataItem ; + + /* Get next data item */ + l_CurDataItem = va_arg( l_Argptr, int); + + /* Put it into the data array */ + l_pErrorLogEntry->DumpData[l_nDataItem] = l_CurDataItem ; + + } /* Inset a data item */ + + /* Write the packet */ + IoWriteErrorLogEntry(l_pErrorLogEntry); + + } /* OK */ + + /* Term the variable argument list */ + va_end(l_Argptr); + +} /* WriteEventLogEntry */ + +/*------------------------------------------------------------------------------------------------------*/ + +VOID +WriteEventLogEntryStr( + PVOID pi_pIoObject, + ULONG pi_ErrorCode, + ULONG pi_UniqueErrorCode, + ULONG pi_FinalStatus, + PWCHAR pi_InsertionStr, + ULONG pi_nDataItems, + ... + ) +/*++ + +Routine Description: + Writes an event log entry to the event log. + +Arguments: + + pi_pIoObject......... The IO object ( driver object or device object ). + pi_ErrorCode......... The error code. + pi_UniqueErrorCode... A specific error code. + pi_FinalStatus....... The final status. + pi_nDataItems........ Number of data items. + . + . data items values + . + +Return Value: + + None . + +--*/ +{ /* WriteEventLogEntryStr */ + + /* Variable argument list */ + va_list l_Argptr; + /* Pointer to an error log entry */ + PIO_ERROR_LOG_PACKET l_pErrorLogEntry; + /* sizeof insertion string */ + int l_Size = (pi_InsertionStr) ? ((wcslen(pi_InsertionStr) + 1) * sizeof( WCHAR )) : 0; + + /* Init the variable argument list */ + va_start(l_Argptr, pi_nDataItems); + + /* Allocate an error log entry */ + l_pErrorLogEntry = + (PIO_ERROR_LOG_PACKET)IoAllocateErrorLogEntry( + pi_pIoObject, + (UCHAR)(sizeof(IO_ERROR_LOG_PACKET)+pi_nDataItems*sizeof(ULONG)+l_Size) + ); + /* Check allocation */ + if ( l_pErrorLogEntry != NULL) + { /* OK */ + + /* Data item index */ + USHORT l_nDataItem ; + + /* Set the error log entry header */ + l_pErrorLogEntry->ErrorCode = pi_ErrorCode; + l_pErrorLogEntry->DumpDataSize = (USHORT) (pi_nDataItems*sizeof(ULONG)); + l_pErrorLogEntry->SequenceNumber = 0; + l_pErrorLogEntry->MajorFunctionCode = 0; + l_pErrorLogEntry->IoControlCode = 0; + l_pErrorLogEntry->RetryCount = 0; + l_pErrorLogEntry->UniqueErrorValue = pi_UniqueErrorCode; + l_pErrorLogEntry->FinalStatus = pi_FinalStatus; + + /* Insert the data items */ + for (l_nDataItem = 0; l_nDataItem < pi_nDataItems; l_nDataItem++) + { /* Inset a data item */ + + /* Current data item */ + int l_CurDataItem ; + + /* Get next data item */ + l_CurDataItem = va_arg( l_Argptr, int); + + /* Put it into the data array */ + l_pErrorLogEntry->DumpData[l_nDataItem] = l_CurDataItem ; + + } /* Inset a data item */ + + /* add insertion string */ + if (pi_InsertionStr) { + char *ptr; + l_pErrorLogEntry->NumberOfStrings = 1; + l_pErrorLogEntry->StringOffset = sizeof(IO_ERROR_LOG_PACKET) + l_pErrorLogEntry->DumpDataSize; + ptr = (char*)l_pErrorLogEntry + l_pErrorLogEntry->StringOffset; + memcpy( ptr, pi_InsertionStr, l_Size ); + } + + /* Write the packet */ + IoWriteErrorLogEntry(l_pErrorLogEntry); + + } /* OK */ + + /* Term the variable argument list */ + va_end(l_Argptr); + +} /* WriteEventLogEntry */ + + + + + + diff --git a/branches/MTHCA/hw/mthca/kernel/mthca_log.mc b/branches/MTHCA/hw/mthca/kernel/mthca_log.mc new file mode 100644 index 00000000..08cbddae --- /dev/null +++ b/branches/MTHCA/hw/mthca/kernel/mthca_log.mc @@ -0,0 +1,56 @@ +;/*++ +;============================================================================= +;Copyright (c) 2001 Mellanox Technologies +; +;Module Name: +; +; mthcalog.mc +; +;Abstract: +; +; MTHCA Driver event log messages +; +;Authors: +; +; Yossi Leybovich +; +;Environment: +; +; Kernel Mode . +; +;============================================================================= +;--*/ +; +MessageIdTypedef = NTSTATUS + +SeverityNames = ( + Success = 0x0:STATUS_SEVERITY_SUCCESS + Informational = 0x1:STATUS_SEVERITY_INFORMATIONAL + Warning = 0x2:STATUS_SEVERITY_WARNING + Error = 0x3:STATUS_SEVERITY_ERROR + ) + +FacilityNames = ( + System = 0x0 + RpcRuntime = 0x2:FACILITY_RPC_RUNTIME + RpcStubs = 0x3:FACILITY_RPC_STUBS + Io = 0x4:FACILITY_IO_ERROR_CODE + MTHCA = 0x7:FACILITY_MTHCA_ERROR_CODE + ) + + +MessageId=0x0001 Facility=MTHCA Severity=Informational SymbolicName=EVENT_MTHCA_ANY_INFO +Language=English +%2 +. + +MessageId=0x0002 Facility=MTHCA Severity=Warning SymbolicName=EVENT_MTHCA_ANY_WARN +Language=English +%2 +. + +MessageId=0x0003 Facility=MTHCA Severity=Error SymbolicName=EVENT_MTHCA_ANY_ERROR +Language=English +%2 +. + diff --git a/branches/MTHCA/hw/mthca/kernel/mthca_mad.c b/branches/MTHCA/hw/mthca/kernel/mthca_mad.c index 0d94a92c..5c116dbc 100644 --- a/branches/MTHCA/hw/mthca/kernel/mthca_mad.c +++ b/branches/MTHCA/hw/mthca/kernel/mthca_mad.c @@ -39,6 +39,12 @@ #include #include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_mad.tmh" +#endif #include "mthca_cmd.h" enum { @@ -47,8 +53,7 @@ enum { }; struct mthca_trap_mad { - struct ib_mad *mad; - dma_addr_t mapping; + struct scatterlist sg; }; static void update_sm_ah(struct mthca_dev *dev, @@ -56,6 +61,7 @@ static void update_sm_ah(struct mthca_dev *dev, { struct ib_ah *new_ah; struct ib_ah_attr ah_attr; + SPIN_LOCK_PREP(lh); if (!dev->send_agent[port_num - 1][0]) return; @@ -65,24 +71,17 @@ static void update_sm_ah(struct mthca_dev *dev, ah_attr.sl = sl; ah_attr.port_num = port_num; -#ifdef LINUX_TO_BE_CHANGED - new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd, - &ah_attr); + new_ah = ibv_create_ah(dev->send_agent[port_num - 1][0]->qp->pd, + &ah_attr, NULL, NULL); if (IS_ERR(new_ah)) return; -#else - printk( KERN_ERROR " update_sm_ah: ib_create_ah not ported \n" ); - return; -#endif - spin_lock_irqsave(&dev->sm_lock); + spin_lock_irqsave(&dev->sm_lock, &lh); if (dev->sm_ah[port_num - 1]) { -#ifdef LINUX_TO_BE_CHANGED - ib_destroy_ah(dev->sm_ah[port_num - 1]); -#endif + ibv_destroy_ah(dev->sm_ah[port_num - 1]); } dev->sm_ah[port_num - 1] = new_ah; - spin_unlock_irqrestore(&dev->sm_lock); + spin_unlock_irqrestore(&lh); } /* @@ -100,7 +99,7 @@ static void smp_snoop(struct ib_device *ibdev, mad->mad_hdr.method == IB_MGMT_METHOD_SET) { if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) { update_sm_ah(to_mdev(ibdev), port_num, - be16_to_cpup((__be16 *) (mad->data + 58)), + cl_ntoh16(*(__be16 *) (mad->data + 58)), (*(u8 *) (mad->data + 76)) & 0xf); event.device = ibdev; @@ -125,35 +124,36 @@ static void forward_trap(struct mthca_dev *dev, int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED; struct mthca_trap_mad *tmad; struct ib_sge gather_list; - struct ib_send_wr *bad_wr, wr; + struct _ib_send_wr *bad_wr, wr; struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn]; int ret; + SPIN_LOCK_PREP(lh); /* fill the template */ - wr.sg_list = &gather_list; - wr.num_sge = 1; - wr.opcode = IB_WR_SEND; - wr.send_flags = IB_SEND_SIGNALED; - wr.wr.ud.remote_qpn = qpn; - wr.wr.ud.remote_qkey = qpn ? IB_QP1_QKEY : 0; + wr.ds_array = (ib_local_ds_t* __ptr64)(void*)&gather_list; + wr.num_ds = 1; + wr.wr_type = WR_SEND; + wr.send_opt = IB_SEND_OPT_SIGNALED; + wr.dgrm.ud.remote_qp = cl_hton32(qpn); + wr.dgrm.ud.remote_qkey = qpn ? IB_QP1_QKEY : 0; if (agent) { tmad = kmalloc(sizeof *tmad, GFP_KERNEL); if (!tmad) return; - tmad->mad = alloc_dma_mem(dev, sizeof *tmad->mad, &tmad->mapping); - if (!tmad->mad) { + alloc_dma_zmem(dev, sizeof *mad, &tmad->sg); + if (!tmad->sg.page) { kfree(tmad); return; } - memcpy(tmad->mad, mad, sizeof *mad); + memcpy(tmad->sg.page, mad, sizeof *mad); - wr.wr.ud.mad_hdr = &tmad->mad->mad_hdr; + wr.dgrm.ud.rsvd = (void* __ptr64)&((struct ib_mad *)tmad->sg.page)->mad_hdr; wr.wr_id = (u64)(ULONG_PTR)tmad; - gather_list.addr = tmad->mapping; - gather_list.length = sizeof *tmad->mad; + gather_list.addr = tmad->sg.dma_address; + gather_list.length = tmad->sg.length; gather_list.lkey = to_mpd(agent->qp->pd)->ntmr.ibmr.lkey; /* @@ -162,24 +162,23 @@ static void forward_trap(struct mthca_dev *dev, * wrong following the IB spec strictly, but we know * it's OK for our devices). */ - spin_lock_irqsave(&dev->sm_lock); - wr.wr.ud.ah = dev->sm_ah[port_num - 1]; - if (wr.wr.ud.ah) + spin_lock_irqsave(&dev->sm_lock, &lh); + wr.dgrm.ud.h_av = (ib_av_handle_t)dev->sm_ah[port_num - 1]; + if (wr.dgrm.ud.h_av) #ifdef LINUX_TO_BE_CHANGED ret = ib_post_send_mad(agent, &wr, &bad_wr); #else { - printk( KERN_ERROR " forward_trap: ib_post_send_mad not ported \n" ); + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,(" forward_trap: ib_post_send_mad not ported \n" )); ret = -EINVAL; } #endif else ret = -EINVAL; - spin_unlock_irqrestore(&dev->sm_lock); + spin_unlock_irqrestore(&lh); if (ret) { - free_dma_mem(dev, sizeof *tmad->mad, - tmad->mad, tmad->mapping ); + free_dma_mem_map(dev, &tmad->sg, PCI_DMA_BIDIRECTIONAL ); kfree(tmad); } } @@ -188,19 +187,28 @@ static void forward_trap(struct mthca_dev *dev, int mthca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - struct ib_wc *in_wc, + struct _ib_wc *in_wc, struct ib_grh *in_grh, struct ib_mad *in_mad, struct ib_mad *out_mad) { int err; u8 status; - u16 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); + u16 slid = in_wc ? in_wc->recv.ud.remote_lid : cl_ntoh16(IB_LID_PERMISSIVE); + + +#if 0 + HCA_PRINT( TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW,("mthca_process_mad: \n\tin: Class %02x, Method %02x, AttrId %x, AttrMod %x, ClSpec %x, Tid %I64x\n"), + (u32)in_mad->mad_hdr.mgmt_class, (u32)in_mad->mad_hdr.method, + (u32)in_mad->mad_hdr.attr_id, in_mad->mad_hdr.attr_mod, + (u32)in_mad->mad_hdr.class_specific, in_mad->mad_hdr.tid )); +#endif /* Forward locally generated traps to the SM */ if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) { forward_trap(to_mdev(ibdev), port_num, in_mad); + HCA_PRINT( TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("mthca_process_mad: Not sent, but locally forwarded\n")); return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; } @@ -212,10 +220,13 @@ int mthca_process_mad(struct ib_device *ibdev, */ if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && in_mad->mad_hdr.method != IB_MGMT_METHOD_SET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS) + in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS) { + HCA_PRINT( TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("mthca_process_mad: Skip some methods. Nothing done !\n")); return IB_MAD_RESULT_SUCCESS; + } /* * Don't process SMInfo queries or vendor-specific @@ -223,44 +234,67 @@ int mthca_process_mad(struct ib_device *ibdev, */ if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO || ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) == - IB_SMP_ATTR_VENDOR_MASK)) + IB_SMP_ATTR_VENDOR_MASK)) { + HCA_PRINT( TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("mthca_process_mad: Skip SMInfo queries or vendor-specific MADs. Nothing done !\n")); return IB_MAD_RESULT_SUCCESS; - } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || + } + } + else { + if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS1 || in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS2) { - if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_SET) + + if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && + in_mad->mad_hdr.method != IB_MGMT_METHOD_SET) { + HCA_PRINT( TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("mthca_process_mad: Skip some management methods. Nothing done !\n")); + return IB_MAD_RESULT_SUCCESS; + } + } + else { + HCA_PRINT( TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("mthca_process_mad: Skip IB_MGMT_CLASS_PERF_MGMT et al. Nothing done !\n")); return IB_MAD_RESULT_SUCCESS; - } else - return IB_MAD_RESULT_SUCCESS; + } + } + // send MAD err = mthca_MAD_IFC(to_mdev(ibdev), mad_flags & IB_MAD_IGNORE_MKEY, mad_flags & IB_MAD_IGNORE_BKEY, port_num, in_wc, in_grh, in_mad, out_mad, &status); if (err) { - mthca_err(to_mdev(ibdev), "MAD_IFC failed\n"); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("MAD_IFC failed\n")); return IB_MAD_RESULT_FAILURE; } if (status == MTHCA_CMD_STAT_BAD_PKT) return IB_MAD_RESULT_SUCCESS; if (status) { - mthca_err(to_mdev(ibdev), "MAD_IFC returned status %02x\n", - status); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("mthca_process_mad: MAD_IFC returned status %02x\n", status)); return IB_MAD_RESULT_FAILURE; } if (!out_mad->mad_hdr.status) smp_snoop(ibdev, port_num, in_mad); +#if 0 + //NB: excluded, because it is set in the shim. Being set here, it prevents shim MAD cache to work ! /* set return bit in status of directed route responses */ if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - out_mad->mad_hdr.status |= cpu_to_be16(1 << 15); + out_mad->mad_hdr.status |= cl_hton16(1 << 15); +#endif + +#if 0 + HCA_PRINT( TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW,("mthca_process_mad: \n\tout: Class %02x, Method %02x, AttrId %x, AttrMod %x, ClSpec %x, Tid %I64x, Status %x\n", + (u32)in_mad->mad_hdr.mgmt_class, (u32)in_mad->mad_hdr.method, + (u32)in_mad->mad_hdr.attr_id, in_mad->mad_hdr.attr_mod, + (u32)in_mad->mad_hdr.class_specific, in_mad->mad_hdr.tid, + (u32)in_mad->mad_hdr.status )); +#endif - if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) + if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) { /* no response for trap repress */ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + } return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; } @@ -271,8 +305,7 @@ static void send_handler(struct ib_mad_agent *agent, struct mthca_trap_mad *tmad = (void *) (ULONG_PTR) mad_send_wc->wr_id; - free_dma_mem(agent->device->mdev, sizeof *tmad->mad, - tmad->mad, tmad->mapping ); + free_dma_mem_map(agent->device->mdev, &tmad->sg, PCI_DMA_BIDIRECTIONAL ); kfree(tmad); } @@ -305,7 +338,7 @@ err: return PTR_ERR(agent); #else - printk( KERN_ERROR " mthca_create_agents: ib_register_mad_agent not ported \n" ); + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,(" mthca_create_agents: ib_register_mad_agent not ported \n" )); return 0; #endif } @@ -327,7 +360,7 @@ void mthca_free_agents(struct mthca_dev *dev) #ifdef LINUX_TO_BE_CHANGED if (dev->sm_ah[p]) - ib_destroy_ah(dev->sm_ah[p]); + ibv_destroy_ah(dev->sm_ah[p]); #endif } } diff --git a/branches/MTHCA/hw/mthca/kernel/mthca_main.c b/branches/MTHCA/hw/mthca/kernel/mthca_main.c index c857211e..ad8f499e 100644 --- a/branches/MTHCA/hw/mthca/kernel/mthca_main.c +++ b/branches/MTHCA/hw/mthca/kernel/mthca_main.c @@ -35,14 +35,20 @@ */ #include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_main.tmh" +#endif #include "mthca_config_reg.h" #include "mthca_cmd.h" #include "mthca_profile.h" #include "mthca_memfree.h" static const char mthca_version[] = - DRV_NAME ": Mellanox InfiniBand HCA driver v" - DRV_VERSION " (" DRV_RELDATE ")\n"; + DRV_NAME ": HCA Driver v" + DRV_VERSION " (" DRV_RELDATE ")"; static struct mthca_profile default_profile = { 1 << 16, // num_qp @@ -103,14 +109,6 @@ static struct pci_device_id { }; #define MTHCA_PCI_TABLE_SIZE (sizeof(mthca_pci_table)/sizeof(struct pci_device_id)) - -// wrapper to driver's hca_reset -static NTSTATUS mthca_reset(struct mthca_dev *mdev) -{ - PDEVICE_OBJECT pdo = mdev->ext->cl_ext.p_self_do; - return hca_reset(pdo); -} - // wrapper to driver's hca_tune_pci static NTSTATUS mthca_tune_pci(struct mthca_dev *mdev) { @@ -125,12 +123,12 @@ int mthca_get_dev_info(struct mthca_dev *mdev, __be64 *node_guid, u32 *hw_id) int err = (ib_dev->query_device )(ib_dev, &props ); if (err) { - mthca_err( mdev, "can't get guid - mthca_query_port() failed (%08X)\n", err ); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("can't get guid - mthca_query_port() failed (%08X)\n", err )); return err; } - //TODO: do we need to convert GUID to LE by be64_to_cpu(x) ? - *node_guid = props.node_guid; + //TODO: do we need to convert GUID to LE by cl_ntoh64(x) ? + *node_guid = ib_dev->node_guid; *hw_id = props.hw_ver; return 0; } @@ -157,24 +155,24 @@ static int mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim) err = mthca_QUERY_DEV_LIM(mdev, dev_lim, &status); if (err) { - mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("QUERY_DEV_LIM command failed, aborting.\n")); return err; } if (status) { - mthca_err(mdev, "QUERY_DEV_LIM returned status 0x%02x, " - "aborting.\n", status); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("QUERY_DEV_LIM returned status 0x%02x, " + "aborting.\n", status)); return -EINVAL; } if (dev_lim->min_page_sz > PAGE_SIZE) { - mthca_err(mdev, "HCA minimum page size of %d bigger than " + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("HCA minimum page size of %d bigger than " "kernel PAGE_SIZE of %ld, aborting.\n", - dev_lim->min_page_sz, PAGE_SIZE); + dev_lim->min_page_sz, PAGE_SIZE)); return -ENODEV; } if (dev_lim->num_ports > MTHCA_MAX_PORTS) { - mthca_err(mdev, "HCA has %d ports, but we only support %d, " + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("HCA has %d ports, but we only support %d, " "aborting.\n", - dev_lim->num_ports, MTHCA_MAX_PORTS); + dev_lim->num_ports, MTHCA_MAX_PORTS)); return -ENODEV; } @@ -185,15 +183,28 @@ static int mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim) mdev->limits.pkey_table_len = dev_lim->max_pkeys; mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay; mdev->limits.max_sg = dev_lim->max_sg; + mdev->limits.max_wqes = dev_lim->max_qp_sz; + mdev->limits.max_qp_init_rdma = dev_lim->max_requester_per_qp; mdev->limits.reserved_qps = dev_lim->reserved_qps; + mdev->limits.max_srq_wqes = dev_lim->max_srq_sz; mdev->limits.reserved_srqs = dev_lim->reserved_srqs; mdev->limits.reserved_eecs = dev_lim->reserved_eecs; + mdev->limits.max_desc_sz = dev_lim->max_desc_sz; + /* + * Subtract 1 from the limit because we need to allocate a + * spare CQE so the HCA HW can tell the difference between an + * empty CQ and a full CQ. + */ + mdev->limits.max_cqes = dev_lim->max_cq_sz - 1; mdev->limits.reserved_cqs = dev_lim->reserved_cqs; mdev->limits.reserved_eqs = dev_lim->reserved_eqs; mdev->limits.reserved_mtts = dev_lim->reserved_mtts; mdev->limits.reserved_mrws = dev_lim->reserved_mrws; mdev->limits.reserved_uars = dev_lim->reserved_uars; mdev->limits.reserved_pds = dev_lim->reserved_pds; + mdev->limits.port_width_cap = (u8)dev_lim->max_port_width; + mdev->limits.page_size_cap = !(u32)(dev_lim->min_page_sz - 1); + mdev->limits.flags = dev_lim->flags; /* IB_DEVICE_RESIZE_MAX_WR not supported by driver. May be doable since hardware supports it for SRQ. @@ -238,39 +249,43 @@ static int mthca_init_tavor(struct mthca_dev *mdev) err = mthca_SYS_EN(mdev, &status); if (err) { - mthca_err(mdev, "SYS_EN command failed, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("SYS_EN command failed, aborting.\n")); return err; } if (status) { - mthca_err(mdev, "SYS_EN returned status 0x%02x, " - "aborting.\n", status); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("SYS_EN returned status 0x%02x, " + "aborting.\n", status)); return -EINVAL; } err = mthca_QUERY_FW(mdev, &status); if (err) { - mthca_err(mdev, "QUERY_FW command failed, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("QUERY_FW command failed, aborting.\n")); goto err_disable; } if (status) { - mthca_err(mdev, "QUERY_FW returned status 0x%02x, " - "aborting.\n", status); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("QUERY_FW returned status 0x%02x, " + "aborting.\n", status)); err = -EINVAL; goto err_disable; } err = mthca_QUERY_DDR(mdev, &status); if (err) { - mthca_err(mdev, "QUERY_DDR command failed, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("QUERY_DDR command failed, aborting.\n")); goto err_disable; } if (status) { - mthca_err(mdev, "QUERY_DDR returned status 0x%02x, " - "aborting.\n", status); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,( "QUERY_DDR returned status 0x%02x, " + "aborting.\n", status)); err = -EINVAL; goto err_disable; } err = mthca_dev_lim(mdev, &dev_lim); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,( "QUERY_DEV_LIM command failed, aborting.\n")); + goto err_disable; + } profile = default_profile; profile.num_uar = dev_lim.uar_size / PAGE_SIZE; @@ -284,12 +299,12 @@ static int mthca_init_tavor(struct mthca_dev *mdev) err = (int)mthca_INIT_HCA(mdev, &init_hca, &status); if (err) { - mthca_err(mdev, "INIT_HCA command failed, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("INIT_HCA command failed, aborting.\n")); goto err_disable; } if (status) { - mthca_err(mdev, "INIT_HCA returned status 0x%02x, " - "aborting.\n", status); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("INIT_HCA returned status 0x%02x, " + "aborting.\n", status)); err = -EINVAL; goto err_disable; } @@ -313,27 +328,27 @@ static int mthca_load_fw(struct mthca_dev *mdev) mthca_alloc_icm(mdev, mdev->fw.arbel.fw_pages, GFP_HIGHUSER | __GFP_NOWARN); if (!mdev->fw.arbel.fw_icm) { - mthca_err(mdev, "Couldn't allocate FW area, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Couldn't allocate FW area, aborting.\n")); return -ENOMEM; } err = mthca_MAP_FA(mdev, mdev->fw.arbel.fw_icm, &status); if (err) { - mthca_err(mdev, "MAP_FA command failed, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("MAP_FA command failed, aborting.\n")); goto err_free; } if (status) { - mthca_err(mdev, "MAP_FA returned status 0x%02x, aborting.\n", status); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("MAP_FA returned status 0x%02x, aborting.\n", status)); err = -EINVAL; goto err_free; } err = mthca_RUN_FW(mdev, &status); if (err) { - mthca_err(mdev, "RUN_FW command failed, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("RUN_FW command failed, aborting.\n")); goto err_unmap_fa; } if (status) { - mthca_err(mdev, "RUN_FW returned status 0x%02x, aborting.\n", status); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("RUN_FW returned status 0x%02x, aborting.\n", status)); err = -EINVAL; goto err_unmap_fa; } @@ -359,40 +374,40 @@ static int mthca_init_icm(struct mthca_dev *mdev, err = mthca_SET_ICM_SIZE(mdev, icm_size, &aux_pages, &status); if (err) { - mthca_err(mdev, "SET_ICM_SIZE command failed, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("SET_ICM_SIZE command failed, aborting.\n")); return err; } if (status) { - mthca_err(mdev, "SET_ICM_SIZE returned status 0x%02x, " - "aborting.\n", status); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("SET_ICM_SIZE returned status 0x%02x, " + "aborting.\n", status)); return -EINVAL; } - mthca_dbg(mdev, "%lld KB of HCA context requires %lld KB aux memory.\n", + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_LOW , ("%I64d KB of HCA context requires %I64d KB aux memory.\n", (unsigned long long) icm_size >> 10, - (unsigned long long) aux_pages << 2); + (unsigned long long) aux_pages << 2)); mdev->fw.arbel.aux_icm = mthca_alloc_icm(mdev, (int)aux_pages, GFP_HIGHUSER | __GFP_NOWARN); if (!mdev->fw.arbel.aux_icm) { - mthca_err(mdev, "Couldn't allocate aux memory, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Couldn't allocate aux memory, aborting.\n")); return -ENOMEM; } err = mthca_MAP_ICM_AUX(mdev, mdev->fw.arbel.aux_icm, &status); if (err) { - mthca_err(mdev, "MAP_ICM_AUX command failed, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("MAP_ICM_AUX command failed, aborting.\n")); goto err_free_aux; } if (status) { - mthca_err(mdev, "MAP_ICM_AUX returned status 0x%02x, aborting.\n", status); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("MAP_ICM_AUX returned status 0x%02x, aborting.\n", status)); err = -EINVAL; goto err_free_aux; } err = mthca_map_eq_icm(mdev, init_hca->eqc_base); if (err) { - mthca_err(mdev, "Failed to map EQ context memory, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Failed to map EQ context memory, aborting.\n")); goto err_unmap_aux; } @@ -401,7 +416,7 @@ static int mthca_init_icm(struct mthca_dev *mdev, mdev->limits.num_mtt_segs, mdev->limits.reserved_mtts, 1); if (!mdev->mr_table.mtt_table) { - mthca_err(mdev, "Failed to map MTT context memory, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Failed to map MTT context memory, aborting.\n")); err = -ENOMEM; goto err_unmap_eq; } @@ -411,7 +426,7 @@ static int mthca_init_icm(struct mthca_dev *mdev, mdev->limits.num_mpts, mdev->limits.reserved_mrws, 1); if (!mdev->mr_table.mpt_table) { - mthca_err(mdev, "Failed to map MPT context memory, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Failed to map MPT context memory, aborting.\n")); err = -ENOMEM; goto err_unmap_mtt; } @@ -421,7 +436,7 @@ static int mthca_init_icm(struct mthca_dev *mdev, mdev->limits.num_qps, mdev->limits.reserved_qps, 0); if (!mdev->qp_table.qp_table) { - mthca_err(mdev, "Failed to map QP context memory, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Failed to map QP context memory, aborting.\n")); err = -ENOMEM; goto err_unmap_mpt; } @@ -431,7 +446,7 @@ static int mthca_init_icm(struct mthca_dev *mdev, mdev->limits.num_qps, mdev->limits.reserved_qps, 0); if (!mdev->qp_table.eqp_table) { - mthca_err(mdev, "Failed to map EQP context memory, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Failed to map EQP context memory, aborting.\n")); err = -ENOMEM; goto err_unmap_qp; } @@ -442,7 +457,7 @@ static int mthca_init_icm(struct mthca_dev *mdev, mdev->qp_table.rdb_shift, 0, 0); if (!mdev->qp_table.rdb_table) { - mthca_err(mdev, "Failed to map RDB context memory, aborting\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Failed to map RDB context memory, aborting\n")); err = -ENOMEM; goto err_unmap_eqp; } @@ -452,7 +467,7 @@ static int mthca_init_icm(struct mthca_dev *mdev, mdev->limits.num_cqs, mdev->limits.reserved_cqs, 0); if (!mdev->cq_table.table) { - mthca_err(mdev, "Failed to map CQ context memory, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Failed to map CQ context memory, aborting.\n")); err = -ENOMEM; goto err_unmap_rdb; } @@ -464,8 +479,8 @@ static int mthca_init_icm(struct mthca_dev *mdev, mdev->limits.num_srqs, mdev->limits.reserved_srqs, 0); if (!mdev->srq_table.table) { - mthca_err(mdev, "Failed to map SRQ context memory, " - "aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Failed to map SRQ context memory, " + "aborting.\n")); err = -ENOMEM; goto err_unmap_cq; } @@ -484,7 +499,7 @@ static int mthca_init_icm(struct mthca_dev *mdev, mdev->limits.num_amgms, 0); if (!mdev->mcg_table.table) { - mthca_err(mdev, "Failed to map MCG context memory, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Failed to map MCG context memory, aborting.\n")); err = -ENOMEM; goto err_unmap_srq; } @@ -536,38 +551,38 @@ static int mthca_init_arbel(struct mthca_dev *mdev) err = mthca_QUERY_FW(mdev, &status); if (err) { - mthca_err(mdev, "QUERY_FW command failed, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("QUERY_FW command failed, aborting.\n")); return err; } if (status) { - mthca_err(mdev, "QUERY_FW returned status 0x%02x, " - "aborting.\n", status); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("QUERY_FW returned status 0x%02x, " + "aborting.\n", status)); return -EINVAL; } err = mthca_ENABLE_LAM(mdev, &status); if (err) { - mthca_err(mdev, "ENABLE_LAM command failed, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("ENABLE_LAM command failed, aborting.\n")); return err; } if (status == MTHCA_CMD_STAT_LAM_NOT_PRE) { - mthca_dbg(mdev, "No HCA-attached memory (running in MemFree mode)\n"); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_LOW ,("No HCA-attached memory (running in MemFree mode)\n")); mdev->mthca_flags |= MTHCA_FLAG_NO_LAM; } else if (status) { - mthca_err(mdev, "ENABLE_LAM returned status 0x%02x, " - "aborting.\n", status); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("ENABLE_LAM returned status 0x%02x, " + "aborting.\n", status)); return -EINVAL; } err = mthca_load_fw(mdev); if (err) { - mthca_err(mdev, "Failed to start FW, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Failed to start FW, aborting.\n")); goto err_disable; } err = mthca_dev_lim(mdev, &dev_lim); if (err) { - mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("QUERY_DEV_LIM command failed, aborting.\n")); goto err_stop_fw; } @@ -589,12 +604,12 @@ static int mthca_init_arbel(struct mthca_dev *mdev) err = mthca_INIT_HCA(mdev, &init_hca, &status); if (err) { - mthca_err(mdev, "INIT_HCA command failed, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("INIT_HCA command failed, aborting.\n")); goto err_free_icm; } if (status) { - mthca_err(mdev, "INIT_HCA returned status 0x%02x, " - "aborting.\n", status); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("INIT_HCA returned status 0x%02x, " + "aborting.\n", status)); err = -EINVAL; goto err_free_icm; } @@ -672,12 +687,12 @@ static int mthca_init_hca(struct mthca_dev *mdev) err = mthca_QUERY_ADAPTER(mdev, &adapter, &status); if (err) { - mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("QUERY_ADAPTER command failed, aborting.\n")); goto err_close; } if (status) { - mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, " - "aborting.\n", status); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("QUERY_ADAPTER returned status 0x%02x, " + "aborting.\n", status)); err = -EINVAL; goto err_close; } @@ -693,166 +708,156 @@ err_close: return err; } -static int mthca_setup_hca(struct mthca_dev *dev) +static int mthca_setup_hca(struct mthca_dev *mdev) { int err; u8 status; - MTHCA_INIT_DOORBELL_LOCK(&dev->doorbell_lock); + MTHCA_INIT_DOORBELL_LOCK(&mdev->doorbell_lock); - err = mthca_init_uar_table(dev); + err = mthca_init_uar_table(mdev); if (err) { - mthca_err(dev, "Failed to initialize " - "user access region table, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Failed to initialize " + "user access region table, aborting.\n")); return err; } - err = mthca_uar_alloc(dev, &dev->driver_uar); + err = mthca_uar_alloc(mdev, &mdev->driver_uar); if (err) { - mthca_err(dev, "Failed to allocate driver access region, " - "aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Failed to allocate driver access region, " + "aborting.\n")); goto err_uar_table_free; } - dev->kar = ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE,&dev->kar_size); - if (!dev->kar) { - mthca_err(dev, "Couldn't map kernel access region, " - "aborting.\n"); + mdev->kar = ioremap(mdev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE,&mdev->kar_size); + if (!mdev->kar) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Couldn't map kernel access region, " + "aborting.\n")); err = -ENOMEM; goto err_uar_free; } - err = mthca_init_pd_table(dev); + err = mthca_init_pd_table(mdev); if (err) { - mthca_err(dev, "Failed to initialize " - "protection domain table, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Failed to initialize " + "protection domain table, aborting.\n")); goto err_kar_unmap; } - err = mthca_init_mr_table(dev); + err = mthca_init_mr_table(mdev); if (err) { - mthca_err(dev, "Failed to initialize " - "memory region table, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Failed to initialize " + "memory region table, aborting.\n")); goto err_pd_table_free; } - err = mthca_pd_alloc(dev, 1, &dev->driver_pd); + err = mthca_pd_alloc(mdev, 1, &mdev->driver_pd); if (err) { - mthca_err(dev, "Failed to create driver PD, " - "aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Failed to create driver PD, " + "aborting.\n")); goto err_mr_table_free; } - err = mthca_init_eq_table(dev); + err = mthca_init_eq_table(mdev); if (err) { - mthca_err(dev, "Failed to initialize " - "event queue table, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR,HCA_DBG_LOW, ("Failed to initialize " + "event queue table, aborting.\n")); goto err_pd_free; } - err = mthca_cmd_use_events(dev); + err = mthca_cmd_use_events(mdev); if (err) { - mthca_err(dev, "Failed to switch to event-driven " - "firmware commands, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Failed to switch to event-driven " + "firmware commands, aborting.\n")); goto err_eq_table_free; } - err = mthca_NOP(dev, &status); - if (err || status) { - mthca_err(dev, "NOP command failed to generate interrupt, aborting.\n"); - if (dev->mthca_flags & (MTHCA_FLAG_MSI | MTHCA_FLAG_MSI_X)) - mthca_err(dev, "Try again with MSI/MSI-X disabled.\n"); - else - mthca_err(dev, "BIOS or ACPI interrupt routing problem?\n"); - - goto err_cmd_poll; - } - - err = mthca_NOP(dev, &status); + err = mthca_NOP(mdev, &status); if (err || status) { - mthca_err(dev, "NOP command failed to generate interrupt, aborting.\n"); - if (dev->mthca_flags & (MTHCA_FLAG_MSI | MTHCA_FLAG_MSI_X)) - mthca_err(dev, "Try again with MSI/MSI-X disabled.\n"); - else - mthca_err(dev, "BIOS or ACPI interrupt routing problem?\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("NOP command failed to generate interrupt, aborting.\n")); + if (mdev->mthca_flags & (MTHCA_FLAG_MSI | MTHCA_FLAG_MSI_X)){ + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Try again with MSI/MSI-X disabled.\n")); + }else{ + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("BIOS or ACPI interrupt routing problem?\n")); + } goto err_cmd_poll; } - mthca_dbg(dev, "NOP command IRQ test passed\n"); + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("NOP command IRQ test passed\n")); - err = mthca_init_cq_table(dev); + err = mthca_init_cq_table(mdev); if (err) { - mthca_err(dev, "Failed to initialize " - "completion queue table, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Failed to initialize " + "completion queue table, aborting.\n")); goto err_cmd_poll; } - err = mthca_init_srq_table(dev); + err = mthca_init_srq_table(mdev); if (err) { - mthca_err(dev, "Failed to initialize " - "shared receive queue table, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Failed to initialize " + "shared receive queue table, aborting.\n")); goto err_cq_table_free; } - err = mthca_init_qp_table(dev); + err = mthca_init_qp_table(mdev); if (err) { - mthca_err(dev, "Failed to initialize " - "queue pair table, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR,HCA_DBG_LOW, ("Failed to initialize " + "queue pair table, aborting.\n")); goto err_srq_table_free; } - err = mthca_init_av_table(dev); + err = mthca_init_av_table(mdev); if (err) { - mthca_err(dev, "Failed to initialize " - "address vector table, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Failed to initialize " + "address vector table, aborting.\n")); goto err_qp_table_free; } - err = mthca_init_mcg_table(dev); + err = mthca_init_mcg_table(mdev); if (err) { - mthca_err(dev, "Failed to initialize " - "multicast group table, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Failed to initialize " + "multicast group table, aborting.\n")); goto err_av_table_free; } return 0; err_av_table_free: - mthca_cleanup_av_table(dev); + mthca_cleanup_av_table(mdev); err_qp_table_free: - mthca_cleanup_qp_table(dev); + mthca_cleanup_qp_table(mdev); err_srq_table_free: - mthca_cleanup_srq_table(dev); + mthca_cleanup_srq_table(mdev); err_cq_table_free: - mthca_cleanup_cq_table(dev); + mthca_cleanup_cq_table(mdev); err_cmd_poll: - mthca_cmd_use_polling(dev); + mthca_cmd_use_polling(mdev); err_eq_table_free: - mthca_cleanup_eq_table(dev); + mthca_cleanup_eq_table(mdev); err_pd_free: - mthca_pd_free(dev, &dev->driver_pd); + mthca_pd_free(mdev, &mdev->driver_pd); err_mr_table_free: - mthca_cleanup_mr_table(dev); + mthca_cleanup_mr_table(mdev); err_pd_table_free: - mthca_cleanup_pd_table(dev); + mthca_cleanup_pd_table(mdev); err_kar_unmap: - iounmap(dev->kar, dev->kar_size); + iounmap(mdev->kar, mdev->kar_size); err_uar_free: - mthca_uar_free(dev, &dev->driver_uar); + mthca_uar_free(mdev, &mdev->driver_uar); err_uar_table_free: - mthca_cleanup_uar_table(dev); + mthca_cleanup_uar_table(mdev); return err; } @@ -862,28 +867,27 @@ static int mthca_check_fw(struct mthca_dev *mdev, struct pci_device_id *p_id) int err = 0; if (mdev->fw_ver <= mthca_hca_table[p_id->driver_data].max_unsupported_fw) { - mthca_err(mdev, "HCA FW version %x.%x.%x is not supported. Use %x.%x.%x or higher.\n", + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("HCA FW version %d.%d.%d is not supported. Use %d.%d.%d or higher.\n", (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff, (int) (mdev->fw_ver & 0xffff), (int) (mthca_hca_table[p_id->driver_data].min_supported_fw >> 32), (int) (mthca_hca_table[p_id->driver_data].min_supported_fw >> 16) & 0xffff, - (int) (mthca_hca_table[p_id->driver_data].min_supported_fw & 0xffff)); + (int) (mthca_hca_table[p_id->driver_data].min_supported_fw & 0xffff))); err = -EINVAL; } else if (mdev->fw_ver < mthca_hca_table[p_id->driver_data].min_supported_fw) { - mthca_warn(mdev, "HCA FW version %x.%x.%x is too old. Use %x.%x.%x or higher.\n", + HCA_PRINT_EV(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("HCA FW version %d.%d.%d is too old. Use %d.%d.%d or higher.\nIf you have problems, try updating your HCA FW.\n", (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff, (int) (mdev->fw_ver & 0xffff), (int) (mthca_hca_table[p_id->driver_data].min_supported_fw >> 32), (int) (mthca_hca_table[p_id->driver_data].min_supported_fw >> 16) & 0xffff, - (int) (mthca_hca_table[p_id->driver_data].min_supported_fw & 0xffff)); - mthca_warn(mdev, "If you have problems, try updating your HCA FW.\n"); + (int) (mthca_hca_table[p_id->driver_data].min_supported_fw & 0xffff))); } else { - mthca_warn(mdev, "Current HCA FW version is %x.%x.%x. \n", + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_LOW ,("Current HCA FW version is %d.%d.%d. \n", (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff, - (int) (mdev->fw_ver & 0xffff)); + (int) (mdev->fw_ver & 0xffff))); } return err; @@ -899,7 +903,7 @@ NTSTATUS mthca_init_one(hca_dev_ext_t *ext) /* print version */ if (!mthca_version_printed) { - printk(KERN_INFO "%s", mthca_version); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_LOW ,("%s\n", mthca_version)); ++mthca_version_printed; } @@ -915,8 +919,8 @@ NTSTATUS mthca_init_one(hca_dev_ext_t *ext) /* allocate mdev structure */ mdev = kmalloc(sizeof *mdev, GFP_KERNEL); if (!mdev) { - printk(KERN_ERROR PFX "Device struct alloc failed, " - "aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Device struct alloc failed, " + "aborting.\n")); status = STATUS_INSUFFICIENT_RESOURCES; goto end; } @@ -924,7 +928,6 @@ NTSTATUS mthca_init_one(hca_dev_ext_t *ext) /* set some fields */ RtlZeroMemory(mdev, sizeof *mdev); mdev->ext = ext; /* pointer to DEVICE OBJECT extension */ - ext->hca.mdev = mdev; if (ext->hca_hidden) mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN; if (mthca_hca_table[p_id->driver_data].is_memfree) @@ -940,20 +943,14 @@ NTSTATUS mthca_init_one(hca_dev_ext_t *ext) * attempt a firmware command, since a boot ROM may have left * the HCA in an undefined state. */ - status = mthca_reset(mdev); + status = hca_reset( mdev->ext->cl_ext.p_self_do, p_id->driver_data == TAVOR ); if ( !NT_SUCCESS( status ) ) { - mthca_err(mdev, "Failed to reset HCA, aborting.\n"); - goto err_free_dev; - } - - if (ib_core_init()) { - mthca_err(mdev, "Failed to init core, aborting.\n"); - status = STATUS_UNSUCCESSFUL; + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Failed to reset HCA, aborting.\n")); goto err_free_dev; } if (mthca_cmd_init(mdev)) { - mthca_err(mdev, "Failed to init command interface, aborting.\n"); + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Failed to init command interface, aborting.\n")); status = STATUS_DEVICE_DATA_ERROR; goto err_free_dev; } @@ -993,6 +990,7 @@ NTSTATUS mthca_init_one(hca_dev_ext_t *ext) goto err_unregister; } + ext->hca.mdev = mdev; mdev->state = MTHCA_DEV_INITIALIZED; return 0; @@ -1024,46 +1022,40 @@ err_free_dev: kfree(mdev); end: - mdev->state = MTHCA_DEV_FAILED; return status; } -void __devexit mthca_remove_one(hca_dev_ext_t *ext) +void mthca_remove_one(hca_dev_ext_t *ext) { struct mthca_dev *mdev = ext->hca.mdev; u8 status; int p; if (mdev) { - if (mdev->state != MTHCA_DEV_FAILED) { - mthca_free_agents(mdev); - mthca_unregister_device(mdev); - - for (p = 1; p <= mdev->limits.num_ports; ++p) - mthca_CLOSE_IB(mdev, p, &status); - - mthca_cleanup_mcg_table(mdev); - mthca_cleanup_av_table(mdev); - mthca_cleanup_qp_table(mdev); - mthca_cleanup_srq_table(mdev); - mthca_cleanup_cq_table(mdev); - mthca_cmd_use_polling(mdev); - mthca_cleanup_eq_table(mdev); - - mthca_pd_free(mdev, &mdev->driver_pd); - - mthca_cleanup_mr_table(mdev); - mthca_cleanup_pd_table(mdev); - - iounmap(mdev->kar, mdev->kar_size); - mthca_uar_free(mdev, &mdev->driver_uar); - mthca_cleanup_uar_table(mdev); - mthca_close_hca(mdev); - mthca_cmd_cleanup(mdev); - ib_core_cleanup(); - } + mthca_free_agents(mdev); + mthca_unregister_device(mdev); + + for (p = 1; p <= mdev->limits.num_ports; ++p) + mthca_CLOSE_IB(mdev, p, &status); + + mthca_cleanup_mcg_table(mdev); + mthca_cleanup_av_table(mdev); + mthca_cleanup_qp_table(mdev); + mthca_cleanup_srq_table(mdev); + mthca_cleanup_cq_table(mdev); + mthca_cmd_use_polling(mdev); + mthca_cleanup_eq_table(mdev); + mthca_pd_free(mdev, &mdev->driver_pd); + mthca_cleanup_mr_table(mdev); + mthca_cleanup_pd_table(mdev); + iounmap(mdev->kar, mdev->kar_size); + mthca_uar_free(mdev, &mdev->driver_uar); + mthca_cleanup_uar_table(mdev); + mthca_close_hca(mdev); + mthca_cmd_cleanup(mdev); kfree(mdev); + ext->hca.mdev = NULL; } } diff --git a/branches/MTHCA/hw/mthca/kernel/mthca_mcg.c b/branches/MTHCA/hw/mthca/kernel/mthca_mcg.c index 67429d18..1a8a9fd0 100644 --- a/branches/MTHCA/hw/mthca/kernel/mthca_mcg.c +++ b/branches/MTHCA/hw/mthca/kernel/mthca_mcg.c @@ -33,11 +33,19 @@ */ #include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_mcg.tmh" +#endif #include "mthca_cmd.h" -enum { - MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2) -}; + +#ifdef ALLOC_PRAGMA +#pragma alloc_text (PAGE, mthca_init_mcg_table) +#pragma alloc_text (PAGE, mthca_cleanup_mcg_table) +#endif struct mthca_mgm { __be32 next_gid_index; @@ -84,23 +92,23 @@ static int find_mgm(struct mthca_dev *dev, if (err) goto out; if (status) { - mthca_err(dev, "MGID_HASH returned status %02x\n", status); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("MGID_HASH returned status %02x\n", status)); err = -EINVAL; goto out; } if (0) - mthca_dbg(dev, "Hash for %04x:%04x:%04x:%04x:" + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Hash for %04x:%04x:%04x:%04x:" "%04x:%04x:%04x:%04x is %04x\n", - be16_to_cpu(((__be16 *) gid)[0]), - be16_to_cpu(((__be16 *) gid)[1]), - be16_to_cpu(((__be16 *) gid)[2]), - be16_to_cpu(((__be16 *) gid)[3]), - be16_to_cpu(((__be16 *) gid)[4]), - be16_to_cpu(((__be16 *) gid)[5]), - be16_to_cpu(((__be16 *) gid)[6]), - be16_to_cpu(((__be16 *) gid)[7]), - *hash); + cl_ntoh16(((__be16 *) gid)[0]), + cl_ntoh16(((__be16 *) gid)[1]), + cl_ntoh16(((__be16 *) gid)[2]), + cl_ntoh16(((__be16 *) gid)[3]), + cl_ntoh16(((__be16 *) gid)[4]), + cl_ntoh16(((__be16 *) gid)[5]), + cl_ntoh16(((__be16 *) gid)[6]), + cl_ntoh16(((__be16 *) gid)[7]), + *hash)); *index = *hash; *prev = -1; @@ -110,13 +118,14 @@ static int find_mgm(struct mthca_dev *dev, if (err) goto out; if (status) { - mthca_err(dev, "READ_MGM returned status %02x\n", status); - return -EINVAL; + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("READ_MGM returned status %02x\n", status)); + err = -EINVAL; + goto out; } if (!memcmp(mgm->gid, zero_gid, 16)) { if (*index != *hash) { - mthca_err(dev, "Found zero MGID in AMGM.\n"); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Found zero MGID in AMGM.\n")); err = -EINVAL; } goto out; @@ -126,7 +135,7 @@ static int find_mgm(struct mthca_dev *dev, goto out; *prev = *index; - *index = be32_to_cpu(mgm->next_gid_index) >> 5; + *index = cl_ntoh32(mgm->next_gid_index) >> 6; } while (*index); *index = -1; @@ -153,8 +162,10 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) return PTR_ERR(mailbox); mgm = mailbox->buf; - if (down_interruptible(&dev->mcg_table.mutex)) - return -EINTR; + if (down_interruptible(&dev->mcg_table.mutex)) { + err = -EINTR; + goto err_sem; + } err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index); if (err) @@ -168,7 +179,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) index = mthca_alloc(&dev->mcg_table.alloc); if (index == -1) { - mthca_err(dev, "No AMGM entries left\n"); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("No AMGM entries left\n")); err = -ENOMEM; goto out; } @@ -177,23 +188,29 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) if (err) goto out; if (status) { - mthca_err(dev, "READ_MGM returned status %02x\n", status); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("READ_MGM returned status %02x\n", status)); err = -EINVAL; goto out; } + memset(mgm, 0, sizeof *mgm); memcpy(mgm->gid, gid->raw, 16); mgm->next_gid_index = 0; } for (i = 0; i < MTHCA_QP_PER_MGM; ++i) - if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) { - mgm->qp[i] = cpu_to_be32(ibqp->qp_num | (1 << 31)); + if (mgm->qp[i] == cl_hton32(ibqp->qp_num | (1 << 31))) { + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("QP %06x already a member of MGM\n", + ibqp->qp_num)); + err = 0; + goto out; + } else if (!(mgm->qp[i] & cl_hton32(1 << 31))) { + mgm->qp[i] = cl_hton32(ibqp->qp_num | (1 << 31)); break; } if (i == MTHCA_QP_PER_MGM) { - mthca_err(dev, "MGM at index %x is full.\n", index); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("MGM at index %x is full.\n", index)); err = -ENOMEM; goto out; } @@ -202,8 +219,9 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) if (err) goto out; if (status) { - mthca_err(dev, "WRITE_MGM returned status %02x\n", status); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("WRITE_MGM returned status %02x\n", status)); err = -EINVAL; + goto out; } if (!link) @@ -213,23 +231,28 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) if (err) goto out; if (status) { - mthca_err(dev, "READ_MGM returned status %02x\n", status); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("READ_MGM returned status %02x\n", status)); err = -EINVAL; goto out; } - mgm->next_gid_index = cpu_to_be32(index << 5); + mgm->next_gid_index = cl_hton32(index << 6); err = mthca_WRITE_MGM(dev, prev, mailbox, &status); if (err) goto out; if (status) { - mthca_err(dev, "WRITE_MGM returned status %02x\n", status); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("WRITE_MGM returned status %02x\n", status)); err = -EINVAL; } - out: +out: + if (err && link && index != -1) { + BUG_ON(index < dev->limits.num_mgms); + mthca_free(&dev->mcg_table.alloc, index); + } KeReleaseMutex(&dev->mcg_table.mutex,FALSE); +err_sem: mthca_free_mailbox(dev, mailbox); return err; } @@ -250,37 +273,39 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) return PTR_ERR(mailbox); mgm = mailbox->buf; - if (down_interruptible(&dev->mcg_table.mutex)) - return -EINTR; + if (down_interruptible(&dev->mcg_table.mutex)) { + err = -EINTR; + goto err_sem; + } err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index); if (err) goto out; if (index == -1) { - mthca_err(dev, "MGID %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x " + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_LOW, ("MGID %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x " "not found\n", - be16_to_cpu(((__be16 *) gid->raw)[0]), - be16_to_cpu(((__be16 *) gid->raw)[1]), - be16_to_cpu(((__be16 *) gid->raw)[2]), - be16_to_cpu(((__be16 *) gid->raw)[3]), - be16_to_cpu(((__be16 *) gid->raw)[4]), - be16_to_cpu(((__be16 *) gid->raw)[5]), - be16_to_cpu(((__be16 *) gid->raw)[6]), - be16_to_cpu(((__be16 *) gid->raw)[7])); + cl_ntoh16(((__be16 *) gid->raw)[0]), + cl_ntoh16(((__be16 *) gid->raw)[1]), + cl_ntoh16(((__be16 *) gid->raw)[2]), + cl_ntoh16(((__be16 *) gid->raw)[3]), + cl_ntoh16(((__be16 *) gid->raw)[4]), + cl_ntoh16(((__be16 *) gid->raw)[5]), + cl_ntoh16(((__be16 *) gid->raw)[6]), + cl_ntoh16(((__be16 *) gid->raw)[7]))); err = -EINVAL; goto out; } for (loc = -1, i = 0; i < MTHCA_QP_PER_MGM; ++i) { - if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1 << 31))) + if (mgm->qp[i] == cl_hton32(ibqp->qp_num | (1 << 31))) loc = i; - if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) + if (!(mgm->qp[i] & cl_hton32(1 << 31))) break; } if (loc == -1) { - mthca_err(dev, "QP %06x not found in MGM\n", ibqp->qp_num); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("QP %06x not found in MGM\n", ibqp->qp_num)); err = -EINVAL; goto out; } @@ -292,7 +317,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) if (err) goto out; if (status) { - mthca_err(dev, "WRITE_MGM returned status %02x\n", status); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("WRITE_MGM returned status %02x\n", status)); err = -EINVAL; goto out; } @@ -300,19 +325,17 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) if (i != 1) goto out; - goto out; - if (prev == -1) { /* Remove entry from MGM */ - if (be32_to_cpu(mgm->next_gid_index) >> 5) { - err = mthca_READ_MGM(dev, - be32_to_cpu(mgm->next_gid_index) >> 5, + int amgm_index_to_free = cl_ntoh32(mgm->next_gid_index) >> 6; + if (amgm_index_to_free) { + err = mthca_READ_MGM(dev, amgm_index_to_free, mailbox, &status); if (err) goto out; if (status) { - mthca_err(dev, "READ_MGM returned status %02x\n", - status); + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("READ_MGM returned status %02x\n", + status)); err = -EINVAL; goto out; } @@ -323,48 +346,57 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) if (err) goto out; if (status) { - mthca_err(dev, "WRITE_MGM returned status %02x\n", status); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("WRITE_MGM returned status %02x\n", status)); err = -EINVAL; goto out; } + if (amgm_index_to_free) { + BUG_ON(amgm_index_to_free < dev->limits.num_mgms); + mthca_free(&dev->mcg_table.alloc, amgm_index_to_free); + } } else { /* Remove entry from AMGM */ - index = be32_to_cpu(mgm->next_gid_index) >> 5; + int curr_next_index = cl_ntoh32(mgm->next_gid_index) >> 6; err = mthca_READ_MGM(dev, prev, mailbox, &status); if (err) goto out; if (status) { - mthca_err(dev, "READ_MGM returned status %02x\n", status); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("READ_MGM returned status %02x\n", status)); err = -EINVAL; goto out; } - mgm->next_gid_index = cpu_to_be32(index << 5); + mgm->next_gid_index = cl_hton32(curr_next_index << 6); err = mthca_WRITE_MGM(dev, prev, mailbox, &status); if (err) goto out; if (status) { - mthca_err(dev, "WRITE_MGM returned status %02x\n", status); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("WRITE_MGM returned status %02x\n", status)); err = -EINVAL; goto out; } + BUG_ON(index < dev->limits.num_mgms); + mthca_free(&dev->mcg_table.alloc, index); } out: KeReleaseMutex(&dev->mcg_table.mutex, FALSE); +err_sem: mthca_free_mailbox(dev, mailbox); return err; } -int __devinit mthca_init_mcg_table(struct mthca_dev *dev) +int mthca_init_mcg_table(struct mthca_dev *dev) { int err; + int table_size = dev->limits.num_mgms + dev->limits.num_amgms; err = mthca_alloc_init(&dev->mcg_table.alloc, - dev->limits.num_amgms, - dev->limits.num_amgms - 1, - 0); + table_size, + table_size - 1, + dev->limits.num_mgms); + if (err) return err; @@ -373,7 +405,9 @@ int __devinit mthca_init_mcg_table(struct mthca_dev *dev) return 0; } -void __devexit mthca_cleanup_mcg_table(struct mthca_dev *dev) +void mthca_cleanup_mcg_table(struct mthca_dev *dev) { mthca_alloc_cleanup(&dev->mcg_table.alloc); } + + diff --git a/branches/MTHCA/hw/mthca/kernel/mthca_memfree.c b/branches/MTHCA/hw/mthca/kernel/mthca_memfree.c index af8675c4..af85b5fc 100644 --- a/branches/MTHCA/hw/mthca/kernel/mthca_memfree.c +++ b/branches/MTHCA/hw/mthca/kernel/mthca_memfree.c @@ -37,6 +37,12 @@ #include "hca_driver.h" #include "mthca_memfree.h" #include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_memfree.tmh" +#endif #include "mthca_cmd.h" /* @@ -71,8 +77,7 @@ void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm) PCI_DMA_BIDIRECTIONAL); for (i = 0; i < chunk->npages; ++i) - free_pages(dev, get_order(chunk->mem[i].length), - chunk->mem[i].page, chunk->mem[i].dma_address); + free_dma_mem_map(dev, &chunk->mem[i], PCI_DMA_BIDIRECTIONAL ); kfree(chunk); } @@ -116,20 +121,17 @@ struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages, --cur_order; /* try to allocate a contiguous PHYSICAL buffer */ - chunk->mem[chunk->npages].page = alloc_pages( dev, - cur_order,&chunk->mem[chunk->npages].dma_address); + alloc_dma_zmem( dev, PAGE_SIZE << cur_order, + &chunk->mem[chunk->npages] ); /* if succeded - proceed handling */ if (chunk->mem[chunk->npages].page) { - chunk->mem[chunk->npages].length = PAGE_SIZE << cur_order; - chunk->mem[chunk->npages].offset = 0; /* check, whether a chunk is full */ if (++chunk->npages == MTHCA_ICM_CHUNK_LEN) { /* it's full --> map physical addresses to bus ones */ chunk->nsg = pci_map_sg(dev, chunk->mem, - chunk->npages, - PCI_DMA_BIDIRECTIONAL); + chunk->npages, PCI_DMA_BIDIRECTIONAL ); if (chunk->nsg <= 0) goto fail; @@ -246,7 +248,7 @@ void *mthca_table_find(struct mthca_icm_table *table, int obj) for (i = 0; i < chunk->npages; ++i) { if ((int)chunk->mem[i].length >= offset) { page = chunk->mem[i].page; - break; + goto out; } offset -= chunk->mem[i].length; } @@ -344,8 +346,8 @@ struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev, } #if 0 - mthca_dbg(dev, "Allocated/max chunks %d:%d, reserved/max objects %#x:%#x, one/total size %#x:%#x at %llx \n", - i, num_icm, reserved, nobj, obj_size, nobj * obj_size, (unsigned long long) virt); + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Allocated/max chunks %d:%d, reserved/max objects %#x:%#x, one/total size %#x:%#x at %lx \n", + i, num_icm, reserved, nobj, obj_size, nobj * obj_size, (unsigned long long) virt)); #endif return table; @@ -376,8 +378,8 @@ void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table) } #if 0 - mthca_dbg(dev, "Released chunks %d, objects %#x, one/total size %#x:%#x at %llx \n", - table->num_icm, table->num_obj, table->obj_size, table->num_obj * table->obj_size, (unsigned long long) table->virt); + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW, ( "Released chunks %d, objects %#x, one/total size %#x:%#x at %lx \n", + table->num_icm, table->num_obj, table->obj_size, table->num_obj * table->obj_size, (unsigned long long) table->virt)); #endif kfree(table); } @@ -419,7 +421,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, } ret = get_user_pages(dev, uaddr & PAGE_MASK, 1, 1, - &db_tab->page[i].mem.page, &db_tab->page[i].mem.p_mdl); + &db_tab->page[i].mem); if (ret < 0) goto out; @@ -427,7 +429,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, db_tab->page[i].mem.offset = (unsigned)(uaddr & ~PAGE_MASK); ret = pci_map_sg(dev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); - if (ret < 0) { + if (ret <= 0) { put_page(&db_tab->page[i].mem); goto out; } @@ -507,9 +509,11 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, put_page(&db_tab->page[i].mem); } } + + kfree(db_tab); } -int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db) +int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type, u32 qn, __be32 **db) { int group; int start, end, dir; @@ -517,6 +521,7 @@ int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db) struct mthca_db_page *page; int ret = 0; u8 status; + CPU_2_BE64_PREP; down(&dev->db_tab->mutex); switch (type) { @@ -574,21 +579,19 @@ int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db) page = dev->db_tab->page + end; alloc: - page->db_rec = dma_alloc_coherent(dev, 4096, - &page->mapping, GFP_KERNEL); - if (!page->db_rec) { + alloc_dma_zmem_map(dev, 4096, PCI_DMA_BIDIRECTIONAL, &page->sg); + if (!page->sg.page) { ret = -ENOMEM; goto out; } - RtlZeroMemory(page->db_rec, 4096); + page->db_rec = (__be64*)page->sg.page; - ret = mthca_MAP_ICM_page(dev, page->mapping, + ret = mthca_MAP_ICM_page(dev, page->sg.dma_address, mthca_uarc_virt(dev, &dev->driver_uar, i), &status); if (!ret && status) ret = -EINVAL; if (ret) { - dma_free_coherent(dev, 4096, - page->db_rec, page->mapping); + free_dma_mem_map(dev, &page->sg, PCI_DMA_BIDIRECTIONAL); goto out; } @@ -603,7 +606,7 @@ found: ret = i * MTHCA_DB_REC_PER_PAGE + j; - page->db_rec[j] = cpu_to_be64((((ULONGLONG)qn << 8) | (type << 5))); + page->db_rec[j] = CPU_2_BE64((((ULONGLONG)qn << 8) | (type << 5))); *db = (__be32 *) &page->db_rec[j]; out: @@ -634,8 +637,7 @@ void mthca_free_db(struct mthca_dev *dev, int type, int db_index) i >= dev->db_tab->max_group1 - 1) { mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status); - dma_free_coherent(dev, 4096, - page->db_rec, page->mapping); + free_dma_mem_map(dev, &page->sg, PCI_DMA_BIDIRECTIONAL); page->db_rec = NULL; if (i == dev->db_tab->max_group1) { @@ -699,13 +701,11 @@ void mthca_cleanup_db_tab(struct mthca_dev *dev) continue; if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE)) - mthca_warn(dev, "Kernel UARC page %d not empty\n", i); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Kernel UARC page %d not empty\n", i)); mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status); - dma_free_coherent(dev, 4096, - dev->db_tab->page[i].db_rec, - dev->db_tab->page[i].mapping); + free_dma_mem_map(dev, &dev->db_tab->page[i].sg, PCI_DMA_BIDIRECTIONAL); } kfree(dev->db_tab->page); diff --git a/branches/MTHCA/hw/mthca/kernel/mthca_memfree.h b/branches/MTHCA/hw/mthca/kernel/mthca_memfree.h index e167f210..c2c832cf 100644 --- a/branches/MTHCA/hw/mthca/kernel/mthca_memfree.h +++ b/branches/MTHCA/hw/mthca/kernel/mthca_memfree.h @@ -135,7 +135,7 @@ enum { struct mthca_db_page { DECLARE_BITMAP(used, MTHCA_DB_REC_PER_PAGE); __be64 *db_rec; - dma_addr_t mapping; + struct scatterlist sg; }; struct mthca_db_table { @@ -169,7 +169,7 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, int mthca_init_db_tab(struct mthca_dev *dev); void mthca_cleanup_db_tab(struct mthca_dev *dev); -int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db); +int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type, u32 qn, __be32 **db); void mthca_free_db(struct mthca_dev *dev, int type, int db_index); #endif /* MTHCA_MEMFREE_H */ diff --git a/branches/MTHCA/hw/mthca/kernel/mthca_mr.c b/branches/MTHCA/hw/mthca/kernel/mthca_mr.c index 045284c9..2d505cf4 100644 --- a/branches/MTHCA/hw/mthca/kernel/mthca_mr.c +++ b/branches/MTHCA/hw/mthca/kernel/mthca_mr.c @@ -34,9 +34,25 @@ */ #include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_mr.tmh" +#endif #include "mthca_cmd.h" #include "mthca_memfree.h" +static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order); +static void mthca_buddy_cleanup(struct mthca_buddy *buddy); + +#ifdef ALLOC_PRAGMA +#pragma alloc_text (PAGE, mthca_buddy_init) +#pragma alloc_text (PAGE, mthca_buddy_cleanup) +#pragma alloc_text (PAGE, mthca_init_mr_table) +#pragma alloc_text (PAGE, mthca_cleanup_mr_table) +#endif + struct mthca_mtt { struct mthca_buddy *buddy; int order; @@ -60,7 +76,7 @@ struct mthca_mpt_entry { __be64 mtt_seg; __be32 mtt_sz; /* Arbel only */ u32 reserved[2]; -} __attribute__((packed)); +} ; #pragma pack(pop) #define MTHCA_MPT_FLAG_SW_OWNS (0xfUL << 28) @@ -85,8 +101,9 @@ static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order) int o; u32 m; u32 seg; + SPIN_LOCK_PREP(lh); - spin_lock(&buddy->lock); + spin_lock(&buddy->lock, &lh); for (o = order; o <= buddy->max_order; ++o) { m = 1 << (buddy->max_order - o); @@ -95,7 +112,7 @@ static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order) goto found; } - spin_unlock(&buddy->lock); + spin_unlock(&lh); return -1; found: @@ -107,7 +124,7 @@ static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order) set_bit(seg ^ 1, buddy->bits[o]); } - spin_unlock(&buddy->lock); + spin_unlock(&lh); seg <<= order; @@ -116,9 +133,11 @@ static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order) static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order) { + SPIN_LOCK_PREP(lh); + seg >>= order; - spin_lock(&buddy->lock); + spin_lock(&buddy->lock, &lh); while (test_bit(seg ^ 1, buddy->bits[order])) { clear_bit(seg ^ 1, buddy->bits[order]); @@ -128,10 +147,10 @@ static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order) set_bit(seg, buddy->bits[order]); - spin_unlock(&buddy->lock); + spin_unlock(&lh); } -static int __devinit mthca_buddy_init(struct mthca_buddy *buddy, int max_order) +static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order) { int i, s; @@ -168,7 +187,7 @@ err_out: return -ENOMEM; } -static void __devexit mthca_buddy_cleanup(struct mthca_buddy *buddy) +static void mthca_buddy_cleanup(struct mthca_buddy *buddy) { int i; @@ -260,13 +279,13 @@ int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, while (list_len > 0) { val = dev->mr_table.mtt_base + mtt->first_seg * MTHCA_MTT_SEG_SIZE + start_index * 8; - mtt_entry[0] = cpu_to_be64(val); + mtt_entry[0] = cl_hton64(val); mtt_entry[1] = 0; for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i) { val = buffer_list[i]; // BUG in compiler: it can't perform OR on u64 !!! We perform OR on the low dword *(PULONG)&val |= MTHCA_MTT_FLAG_PRESENT; - mtt_entry[i + 2] = cpu_to_be64(val); + mtt_entry[i + 2] = cl_hton64(val); } /* @@ -278,12 +297,12 @@ int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, err = mthca_WRITE_MTT(dev, mailbox, (i + 1) & ~1, &status); if (err) { - mthca_warn(dev, "WRITE_MTT failed (%d)\n", err); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_MEMORY ,("WRITE_MTT failed (%d)\n", err)); goto out; } if (status) { - mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n", - status); + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_MEMORY,("WRITE_MTT returned status 0x%02x\n", + status)); err = -EINVAL; goto out; } @@ -335,7 +354,7 @@ static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key) } int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, - u64 iova, u64 total_size, u32 access, struct mthca_mr *mr) + u64 iova, u64 total_size, mthca_mpt_access_t access, struct mthca_mr *mr) { struct mthca_mailbox *mailbox; struct mthca_mpt_entry *mpt_entry; @@ -343,6 +362,7 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, int i; int err; u8 status; + CPU_2_BE64_PREP; might_sleep(); @@ -366,35 +386,35 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, } mpt_entry = mailbox->buf; - mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS | + mpt_entry->flags = cl_hton32(MTHCA_MPT_FLAG_SW_OWNS | MTHCA_MPT_FLAG_MIO | MTHCA_MPT_FLAG_REGION | access); if (!mr->mtt) - mpt_entry->flags |= cpu_to_be32(MTHCA_MPT_FLAG_PHYSICAL); + mpt_entry->flags |= cl_hton32(MTHCA_MPT_FLAG_PHYSICAL); - mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12); - mpt_entry->key = cpu_to_be32(key); - mpt_entry->pd = cpu_to_be32(pd); - mpt_entry->start = cpu_to_be64(iova); - mpt_entry->length = cpu_to_be64(total_size); + mpt_entry->page_size = cl_hton32(buffer_size_shift - 12); + mpt_entry->key = cl_hton32(key); + mpt_entry->pd = cl_hton32(pd); + mpt_entry->start = cl_hton64(iova); + mpt_entry->length = cl_hton64(total_size); RtlZeroMemory(&mpt_entry->lkey, sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey)); if (mr->mtt) mpt_entry->mtt_seg = - cpu_to_be64(dev->mr_table.mtt_base + + CPU_2_BE64(dev->mr_table.mtt_base + mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE); - if (0) { - mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey); - for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) { - if (i % 4 == 0) - printk("[%02x] ", i * 4); - printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i])); - if ((i + 1) % 4 == 0) - printk("\n"); + if (1) { + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("Dumping MPT entry %08x :\n", mr->ibmr.lkey)); + for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; i=i+4) { + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("[%02x] %08x %08x %08x %08x \n",i, + cl_ntoh32(((__be32 *) mpt_entry)[i]), + cl_ntoh32(((__be32 *) mpt_entry)[i+1]), + cl_ntoh32(((__be32 *) mpt_entry)[i+2]), + cl_ntoh32(((__be32 *) mpt_entry)[i+3]))); } } @@ -402,11 +422,11 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, key & (dev->limits.num_mpts - 1), &status); if (err) { - mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_MEMORY ,("SW2HW_MPT failed (%d)\n", err)); goto err_out_mailbox; } else if (status) { - mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n", - status); + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_MEMORY,("SW2HW_MPT returned status 0x%02x\n", + status)); err = -EINVAL; goto err_out_mailbox; } @@ -426,7 +446,7 @@ err_out_mpt_free: } int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd, - u32 access, struct mthca_mr *mr) + mthca_mpt_access_t access, struct mthca_mr *mr) { mr->mtt = NULL; return mthca_mr_alloc(dev, pd, 12, 0, ~0ULL, access, mr); @@ -435,10 +455,10 @@ int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd, int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd, u64 *buffer_list, int buffer_size_shift, int list_len, u64 iova, u64 total_size, - u32 access, struct mthca_mr *mr) + mthca_mpt_access_t access, struct mthca_mr *mr) { int err; - + HCA_ENTER(HCA_DBG_MEMORY); mr->mtt = mthca_alloc_mtt(dev, list_len); if (IS_ERR(mr->mtt)) return PTR_ERR(mr->mtt); @@ -454,15 +474,14 @@ int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd, if (err) mthca_free_mtt(dev, mr->mtt); + HCA_EXIT(HCA_DBG_MEMORY); return err; } /* Free mr or fmr */ static void mthca_free_region(struct mthca_dev *dev, u32 lkey) { - mthca_table_put(dev, dev->mr_table.mpt_table, - arbel_key_to_hw_index(lkey)); - + mthca_table_put(dev, dev->mr_table.mpt_table, key_to_hw_index(dev, lkey)); mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey)); } @@ -477,18 +496,19 @@ void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr) key_to_hw_index(dev, mr->ibmr.lkey) & (dev->limits.num_mpts - 1), &status); - if (err) - mthca_warn(dev, "HW2SW_MPT failed (%d)\n", err); - else if (status) - mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n", - status); + if (err){ + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_MEMORY ,("HW2SW_MPT failed (%d)\n", err)); + }else if (status){ + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_MEMORY,("HW2SW_MPT returned status 0x%02x\n", + status)); + } mthca_free_region(dev, mr->ibmr.lkey); mthca_free_mtt(dev, mr->mtt); } int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, - u32 access, struct mthca_fmr *mr) + mthca_mpt_access_t access, struct mthca_fmr *mr) { struct mthca_mpt_entry *mpt_entry; struct mthca_mailbox *mailbox; @@ -498,6 +518,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, int list_len = mr->attr.max_pages; int err = -ENOMEM; int i; + CPU_2_BE64_PREP; might_sleep(); @@ -548,26 +569,26 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, mpt_entry = mailbox->buf; - mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS | + mpt_entry->flags = cl_hton32(MTHCA_MPT_FLAG_SW_OWNS | MTHCA_MPT_FLAG_MIO | MTHCA_MPT_FLAG_REGION | access); - mpt_entry->page_size = cpu_to_be32(mr->attr.page_size - 12); - mpt_entry->key = cpu_to_be32(key); - mpt_entry->pd = cpu_to_be32(pd); + mpt_entry->page_size = cl_hton32(mr->attr.page_size - 12); + mpt_entry->key = cl_hton32(key); + mpt_entry->pd = cl_hton32(pd); RtlZeroMemory(&mpt_entry->start, sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, start)); - mpt_entry->mtt_seg = cpu_to_be64(dev->mr_table.mtt_base + mtt_seg); - - if (0) { - mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey); - for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) { - if (i % 4 == 0) - printk("[%02x] ", i * 4); - printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i])); - if ((i + 1) % 4 == 0) - printk("\n"); + mpt_entry->mtt_seg = CPU_2_BE64(dev->mr_table.mtt_base + mtt_seg); + + if (1) { + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("Dumping MPT entry %08x:\n", mr->ibmr.lkey)); + for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; i=i+4) { + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("[%02x] %08x %08x %08x %08x \n",i, + cl_ntoh32(((__be32 *) mpt_entry)[i]), + cl_ntoh32(((__be32 *) mpt_entry)[i+1]), + cl_ntoh32(((__be32 *) mpt_entry)[i+2]), + cl_ntoh32(((__be32 *) mpt_entry)[i+3]))); } } @@ -575,12 +596,12 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, key & (dev->limits.num_mpts - 1), &status); if (err) { - mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_MEMORY ,("SW2HW_MPT failed (%d)\n", err)); goto err_out_mailbox_free; } if (status) { - mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n", - status); + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_MEMORY,("SW2HW_MPT returned status 0x%02x\n", + status)); err = -EINVAL; goto err_out_mailbox_free; } @@ -649,6 +670,7 @@ int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, struct mthca_mpt_entry mpt_entry; u32 key; int i, err; + CPU_2_BE64_PREP; err = mthca_check_fmr(fmr, page_list, list_len, iova); if (err) @@ -665,16 +687,16 @@ int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, for (i = 0; i < list_len; ++i) { // BUG in compiler: it can't perform OR on u64 !!! We perform OR on the low dword u64 val = page_list[i]; - __be64 mtt_entry = cpu_to_be64(val); + __be64 mtt_entry = cl_hton64(val); *(PULONG)&val |= MTHCA_MTT_FLAG_PRESENT; mthca_write64_raw(mtt_entry, fmr->mem.tavor.mtts + i); } - mpt_entry.lkey = cpu_to_be32(key); - mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size)); - mpt_entry.start = cpu_to_be64(iova); + mpt_entry.lkey = cl_hton32(key); + mpt_entry.length = CPU_2_BE64(list_len * (1ull << fmr->attr.page_size)); + mpt_entry.start = cl_hton64(iova); - __raw_writel((__force u32) mpt_entry.lkey, &fmr->mem.tavor.mpt->key); + __raw_writel((u32) mpt_entry.lkey, &fmr->mem.tavor.mpt->key); memcpy_toio(&fmr->mem.tavor.mpt->start, &mpt_entry.start, offsetof(struct mthca_mpt_entry, window_count) - offsetof(struct mthca_mpt_entry, start)); @@ -691,6 +713,7 @@ int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, struct mthca_dev *dev = to_mdev(ibfmr->device); u32 key; int i, err; + CPU_2_BE64_PREP; err = mthca_check_fmr(fmr, page_list, list_len, iova); if (err) @@ -710,13 +733,13 @@ int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, // BUG in compiler: it can't perform OR on u64 !!! We perform OR on the low dword u64 val = page_list[i]; *(PULONG)&val |= MTHCA_MTT_FLAG_PRESENT; - fmr->mem.arbel.mtts[i] = cpu_to_be64(val); + fmr->mem.arbel.mtts[i] = cl_hton64(val); } - fmr->mem.arbel.mpt->key = cpu_to_be32(key); - fmr->mem.arbel.mpt->lkey = cpu_to_be32(key); - fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size)); - fmr->mem.arbel.mpt->start = cpu_to_be64(iova); + fmr->mem.arbel.mpt->key = cl_hton32(key); + fmr->mem.arbel.mpt->lkey = cl_hton32(key); + fmr->mem.arbel.mpt->length = CPU_2_BE64(list_len * (1ull << fmr->attr.page_size)); + fmr->mem.arbel.mpt->start = cl_hton64(iova); wmb(); @@ -759,7 +782,7 @@ void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr) *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW; } -int __devinit mthca_init_mr_table(struct mthca_dev *dev) +int mthca_init_mr_table(struct mthca_dev *dev) { int err, i; @@ -788,7 +811,7 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev) i = fls(dev->limits.fmr_reserved_mtts - 1); if (i >= 31) { - mthca_warn(dev, "Unable to reserve 2^31 FMR MTTs.\n"); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_MEMORY ,("Unable to reserve 2^31 FMR MTTs.\n")); err = -EINVAL; goto err_fmr_mpt; } @@ -799,7 +822,7 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev) &dev->mr_table.tavor_fmr.mpt_base_size); if (!dev->mr_table.tavor_fmr.mpt_base) { - mthca_warn(dev, "MPT ioremap for FMR failed.\n"); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_MEMORY ,("MPT ioremap for FMR failed.\n")); err = -ENOMEM; goto err_fmr_mpt; } @@ -809,7 +832,7 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev) (1 << i) * MTHCA_MTT_SEG_SIZE, &dev->mr_table.tavor_fmr.mtt_base_size ); if (!dev->mr_table.tavor_fmr.mtt_base) { - mthca_warn(dev, "MTT ioremap for FMR failed.\n"); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_MEMORY ,("MTT ioremap for FMR failed.\n")); err = -ENOMEM; goto err_fmr_mtt; } @@ -834,8 +857,8 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev) if (mthca_alloc_mtt_range(dev, i, dev->mr_table.fmr_mtt_buddy) == -1) { - mthca_warn(dev, "MTT table of order %d is too small.\n", - dev->mr_table.fmr_mtt_buddy->max_order); + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_MEMORY,("MTT table of order %d is too small.\n", + dev->mr_table.fmr_mtt_buddy->max_order)); err = -ENOMEM; goto err_reserve_mtts; } @@ -867,7 +890,7 @@ err_mtt_buddy: return err; } -void __devexit mthca_cleanup_mr_table(struct mthca_dev *dev) +void mthca_cleanup_mr_table(struct mthca_dev *dev) { /* XXX check if any MRs are still allocated? */ if (dev->limits.fmr_reserved_mtts) @@ -884,3 +907,4 @@ void __devexit mthca_cleanup_mr_table(struct mthca_dev *dev) mthca_alloc_cleanup(&dev->mr_table.mpt_alloc); } + diff --git a/branches/MTHCA/hw/mthca/kernel/mthca_pd.c b/branches/MTHCA/hw/mthca/kernel/mthca_pd.c index 9f207eed..c42105ca 100644 --- a/branches/MTHCA/hw/mthca/kernel/mthca_pd.c +++ b/branches/MTHCA/hw/mthca/kernel/mthca_pd.c @@ -36,6 +36,12 @@ #include "mthca_dev.h" + +#ifdef ALLOC_PRAGMA +#pragma alloc_text (PAGE, mthca_init_pd_table) +#pragma alloc_text (PAGE, mthca_cleanup_pd_table) +#endif + int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd) { int err = 0; @@ -69,7 +75,7 @@ void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd) mthca_free(&dev->pd_table.alloc, pd->pd_num); } -int __devinit mthca_init_pd_table(struct mthca_dev *dev) +int mthca_init_pd_table(struct mthca_dev *dev) { return mthca_alloc_init(&dev->pd_table.alloc, dev->limits.num_pds, @@ -77,8 +83,9 @@ int __devinit mthca_init_pd_table(struct mthca_dev *dev) dev->limits.reserved_pds); } -void __devexit mthca_cleanup_pd_table(struct mthca_dev *dev) +void mthca_cleanup_pd_table(struct mthca_dev *dev) { /* XXX check if any PDs are still allocated? */ mthca_alloc_cleanup(&dev->pd_table.alloc); } + diff --git a/branches/MTHCA/hw/mthca/kernel/mthca_profile.c b/branches/MTHCA/hw/mthca/kernel/mthca_profile.c index 548fdf80..3a665826 100644 --- a/branches/MTHCA/hw/mthca/kernel/mthca_profile.c +++ b/branches/MTHCA/hw/mthca/kernel/mthca_profile.c @@ -35,6 +35,12 @@ #include "mthca_profile.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_profile.tmh" +#endif enum { MTHCA_RES_QP, @@ -149,30 +155,30 @@ u64 mthca_make_profile(struct mthca_dev *dev, total_size += profile[i].size; } if (total_size > mem_avail) { - mthca_err(dev, "Profile requires 0x%llx bytes; " - "won't in 0x%llx bytes of context memory.\n", + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Profile requires 0x%I64x bytes; " + "won't in 0x%I64x bytes of context memory.\n", (unsigned long long) total_size, - (unsigned long long) mem_avail); + (unsigned long long) mem_avail)); kfree(profile); return -ENOMEM; } if (profile[i].size) - mthca_dbg(dev, "profile[%2d]--%2d/%2d @ 0x%16llx " - "(size 0x%8llx)\n", + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("profile[%2d]--%2d/%2d @ 0x%16I64x " + "(size 0x%8I64x)\n", i, profile[i].type, profile[i].log_num, (unsigned long long) profile[i].start, - (unsigned long long) profile[i].size); + (unsigned long long) profile[i].size)); } - if (mthca_is_memfree(dev)) - mthca_dbg(dev, "HCA context memory: reserving %d KB\n", - (int) (total_size >> 10)); - else - mthca_dbg(dev, "HCA memory: allocated %d KB/%d KB (%d KB free)\n", + if (mthca_is_memfree(dev)){ + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("HCA context memory: reserving %d KB\n", + (int) (total_size >> 10))); + }else{ + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("HCA memory: allocated %d KB/%d KB (%d KB free)\n", (int) (total_size >> 10), (int) (mem_avail >> 10), - (int) ((mem_avail - total_size) >> 10)); - + (int) ((mem_avail - total_size) >> 10))); + } for (i = 0; i < MTHCA_RES_NUM; ++i) { int mc_entry_sz = MTHCA_MGM_ENTRY_SIZE; int mtt_seg_sz = MTHCA_MTT_SEG_SIZE; diff --git a/branches/MTHCA/hw/mthca/kernel/mthca_provider.c b/branches/MTHCA/hw/mthca/kernel/mthca_provider.c index 2bcaacde..0dee4874 100644 --- a/branches/MTHCA/hw/mthca/kernel/mthca_provider.c +++ b/branches/MTHCA/hw/mthca/kernel/mthca_provider.c @@ -1,4 +1,4 @@ -/* +/* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. @@ -38,11 +38,31 @@ #include +#include "ib_user_verbs.h" #include "mthca_dev.h" + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_provider.tmh" +#endif #include "mthca_cmd.h" #include "mthca_user.h" #include "mthca_memfree.h" + void ibv_umem_release(struct ib_device *dev, struct ib_umem *umem); + int ibv_umem_get(struct ib_device *dev, struct ib_umem *mem, + void *addr, size_t size, int write); + + static void init_query_mad(struct ib_smp *mad) + { + mad->base_version = 1; + mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + mad->class_version = 1; + mad->method = IB_MGMT_METHOD_GET; + } + int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) { @@ -53,25 +73,16 @@ u8 status; - in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL); + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) goto out; - RtlZeroMemory(props, sizeof *props); - - props->fw_ver = mdev->fw_ver; - - RtlZeroMemory(in_mad, sizeof *in_mad); - in_mad->base_version = 1; - in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; - in_mad->class_version = 1; - in_mad->method = IB_MGMT_METHOD_GET; - in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; err = mthca_MAD_IFC(mdev, 1, 1, - 1, NULL, NULL, in_mad, out_mad, - &status); + 1, NULL, NULL, in_mad, out_mad, &status); if (err) goto out; if (status) { @@ -79,25 +90,38 @@ goto out; } + RtlZeroMemory(props, sizeof *props); + props->fw_ver = mdev->fw_ver; props->device_cap_flags = mdev->device_cap_flags; - props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & + props->vendor_id = cl_ntoh32(*(__be32 *) (out_mad->data + 36)) & 0xffffff; - props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30)); - props->hw_ver = be16_to_cpup((__be16 *) (out_mad->data + 32)); + props->vendor_part_id = cl_ntoh16(*(__be16 *) (out_mad->data + 30)); + props->hw_ver = cl_ntoh32(*(__be32 *) (out_mad->data + 32)); memcpy(&props->sys_image_guid, out_mad->data + 4, 8); - memcpy(&props->node_guid, out_mad->data + 12, 8); props->max_mr_size = ~0ull; + props->page_size_cap = mdev->limits.page_size_cap; props->max_qp = mdev->limits.num_qps - mdev->limits.reserved_qps; - props->max_qp_wr = 0xffff; + props->max_qp_wr = mdev->limits.max_wqes; props->max_sge = mdev->limits.max_sg; props->max_cq = mdev->limits.num_cqs - mdev->limits.reserved_cqs; - props->max_cqe = 0xffff; + props->max_cqe = mdev->limits.max_cqes; props->max_mr = mdev->limits.num_mpts - mdev->limits.reserved_mrws; props->max_pd = mdev->limits.num_pds - mdev->limits.reserved_pds; props->max_qp_rd_atom = 1 << mdev->qp_table.rdb_shift; - props->max_qp_init_rd_atom = 1 << mdev->qp_table.rdb_shift; + props->max_qp_init_rd_atom = mdev->limits.max_qp_init_rdma; + props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; + props->max_srq = mdev->limits.num_srqs - mdev->limits.reserved_srqs; + props->max_srq_wr = mdev->limits.max_srq_wqes; + props->max_srq_sge = mdev->limits.max_sg; props->local_ca_ack_delay = (u8)mdev->limits.local_ca_ack_delay; + props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ? + IB_ATOMIC_LOCAL : IB_ATOMIC_NONE; + props->max_pkeys = (u16)mdev->limits.pkey_table_len; + props->max_mcast_grp = mdev->limits.num_mgms + mdev->limits.num_amgms; + props->max_mcast_qp_attach = MTHCA_QP_PER_MGM; + props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * + props->max_mcast_grp; err = 0; out: @@ -114,18 +138,14 @@ int mthca_query_port(struct ib_device *ibdev, int err = -ENOMEM; u8 status; - in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL); + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) goto out; - RtlZeroMemory(in_mad, sizeof *in_mad); - in_mad->base_version = 1; - in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; - in_mad->class_version = 1; - in_mad->method = IB_MGMT_METHOD_GET; - in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; - in_mad->attr_mod = cpu_to_be32(port); + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; + in_mad->attr_mod = cl_hton32(port); err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad, @@ -137,18 +157,24 @@ int mthca_query_port(struct ib_device *ibdev, goto out; } - props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16)); + RtlZeroMemory(props, sizeof *props); + props->lid = cl_ntoh16(*(__be16 *) (out_mad->data + 16)); props->lmc = out_mad->data[34] & 0x7; - props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18)); + props->sm_lid = cl_ntoh16(*(__be16 *) (out_mad->data + 18)); props->sm_sl = out_mad->data[36] & 0xf; props->state = out_mad->data[32] & 0xf; props->phys_state = out_mad->data[33] >> 4; - props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20)); + props->port_cap_flags = cl_ntoh32(*(__be32 *) (out_mad->data + 20)); props->gid_tbl_len = to_mdev(ibdev)->limits.gid_table_len; + props->max_msg_sz = 0x80000000; props->pkey_tbl_len = (u16)to_mdev(ibdev)->limits.pkey_table_len; - props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); + props->bad_pkey_cntr = cl_ntoh16(*(__be16 *) (out_mad->data + 46)); + props->qkey_viol_cntr = cl_ntoh16(*(__be16 *) (out_mad->data + 48)); props->active_width = out_mad->data[31] & 0xf; props->active_speed = out_mad->data[35] >> 4; + props->max_mtu = out_mad->data[41] & 0xf; + props->active_mtu = out_mad->data[36] >> 4; + props->subnet_timeout = out_mad->data[51] & 0x1f; out: kfree(in_mad); @@ -199,18 +225,14 @@ int mthca_query_pkey(struct ib_device *ibdev, int err = -ENOMEM; u8 status; - in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL); + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) goto out; - RtlZeroMemory(in_mad, sizeof *in_mad); - in_mad->base_version = 1; - in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; - in_mad->class_version = 1; - in_mad->method = IB_MGMT_METHOD_GET; - in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; - in_mad->attr_mod = cpu_to_be32(index / 32); + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; + in_mad->attr_mod = cl_hton32(index / 32); err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad, @@ -222,7 +244,7 @@ int mthca_query_pkey(struct ib_device *ibdev, goto out; } - *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]); + *pkey = cl_ntoh16(((__be16 *) out_mad->data)[index % 32]); out: kfree(in_mad); @@ -238,18 +260,14 @@ int mthca_query_gid(struct ib_device *ibdev, u8 port, int err = -ENOMEM; u8 status; - in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL); + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) goto out; - RtlZeroMemory(in_mad, sizeof *in_mad); - in_mad->base_version = 1; - in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; - in_mad->class_version = 1; - in_mad->method = IB_MGMT_METHOD_GET; - in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; - in_mad->attr_mod = cpu_to_be32(port); + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; + in_mad->attr_mod = cl_hton32(port); err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad, @@ -263,13 +281,9 @@ int mthca_query_gid(struct ib_device *ibdev, u8 port, memcpy(gid->raw, out_mad->data + 8, 8); - RtlZeroMemory(in_mad, sizeof *in_mad); - in_mad->base_version = 1; - in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; - in_mad->class_version = 1; - in_mad->method = IB_MGMT_METHOD_GET; - in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; - in_mad->attr_mod = cpu_to_be32(index / 8); + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; + in_mad->attr_mod = cl_hton32(index / 8); err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad, @@ -290,7 +304,7 @@ int mthca_query_gid(struct ib_device *ibdev, u8 port, } struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata) + ci_umv_buf_t* const p_umv_buf) { struct mthca_alloc_ucontext_resp uresp; struct mthca_ucontext *context; @@ -304,41 +318,92 @@ struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev, else uresp.uarc_size = 0; - context = kmalloc(sizeof *context, GFP_KERNEL); - if (!context) - return ERR_PTR(-ENOMEM); - + context = kzalloc(sizeof *context, GFP_KERNEL); + if (!context) { + err = -ENOMEM; + goto err_nomem; + } + err = mthca_uar_alloc(to_mdev(ibdev), &context->uar); - if (err) { - kfree(context); - return ERR_PTR(err); + if (err) + goto err_uar_alloc; + + /* + * map uar to user space + */ + + /* map UAR to kernel */ + context->kva = ioremap(context->uar.pfn << PAGE_SHIFT, PAGE_SIZE,&context->uar_size); + if (!context->kva) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_LOW ,("Couldn't map kernel access region, aborting.\n") ); + err = -ENOMEM; + goto err_ioremap; + } + + /* build MDL */ + context->mdl = IoAllocateMdl( context->kva, (ULONG)context->uar_size, + FALSE, TRUE, NULL ); + if( !context->mdl ) { + err = -ENOMEM; + goto err_alloc_mdl; + } + MmBuildMdlForNonPagedPool( context->mdl ); + + /* Map the memory into the calling process's address space. */ + __try { + context->ibucontext.user_uar = MmMapLockedPagesSpecifyCache( context->mdl, + UserMode, MmNonCached, NULL, FALSE, NormalPagePriority ); + } + __except(EXCEPTION_EXECUTE_HANDLER) { + err = -EACCES; + goto err_map; } + /* user_db_tab */ context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev)); if (IS_ERR(context->db_tab)) { err = PTR_ERR(context->db_tab); - mthca_uar_free(to_mdev(ibdev), &context->uar); - kfree(context); - return ERR_PTR(err); + goto err_init_user; } - if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) { - mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab); - mthca_uar_free(to_mdev(ibdev), &context->uar); - kfree(context); - return ERR_PTR(-EFAULT); - } + err = ib_copy_to_umv_buf(p_umv_buf, &uresp, sizeof uresp); + if (err) + goto err_copy_to_umv_buf; + context->ibucontext.device = ibdev; + + atomic_set(&context->ibucontext.usecnt, 0); return &context->ibucontext; + +err_copy_to_umv_buf: + mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, + context->db_tab); +err_init_user: + MmUnmapLockedPages( context->ibucontext.user_uar, context->mdl ); +err_map: + IoFreeMdl(context->mdl); +err_alloc_mdl: + iounmap(context->kva, PAGE_SIZE); +err_ioremap: + mthca_uar_free(to_mdev(ibdev), &context->uar); +err_uar_alloc: + kfree(context); +err_nomem: + return ERR_PTR(err); } int mthca_dealloc_ucontext(struct ib_ucontext *context) { - mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar, - to_mucontext(context)->db_tab); - mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar); - kfree(to_mucontext(context)); - + struct mthca_ucontext *mucontext = to_mucontext(context); + + mthca_cleanup_user_db_tab(to_mdev(context->device), &mucontext->uar, + mucontext->db_tab); + MmUnmapLockedPages( mucontext->ibucontext.user_uar, mucontext->mdl ); + IoFreeMdl(mucontext->mdl); + iounmap(mucontext->kva, PAGE_SIZE); + mthca_uar_free(to_mdev(context->device), &mucontext->uar); + kfree(mucontext); + return 0; } @@ -367,30 +432,49 @@ static int mthca_mmap_uar(struct ib_ucontext *context, #endif struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, - struct ib_udata *udata) + ci_umv_buf_t* const p_umv_buf) { - struct mthca_pd *pd; int err; + struct mthca_pd *pd; + struct ibv_alloc_pd_resp resp; + /* sanity check */ + if (p_umv_buf && p_umv_buf->command) { + if (p_umv_buf->output_size < sizeof(struct ibv_alloc_pd_resp)) { + err = -EINVAL; + goto err_param; + } + } + pd = kmalloc(sizeof *pd, GFP_KERNEL); - if (!pd) - return ERR_PTR(-ENOMEM); + if (!pd) { + err = -ENOMEM; + goto err_mem; + } err = mthca_pd_alloc(to_mdev(ibdev), !context, pd); if (err) { - kfree(pd); - return ERR_PTR(err); + goto err_pd_alloc; } - if (context) { - if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (u32))) { - mthca_pd_free(to_mdev(ibdev), pd); - kfree(pd); - return ERR_PTR(-EFAULT); + if (p_umv_buf) { + resp.pd_handle = (u64)(UINT_PTR)pd; + resp.pdn = pd->pd_num; + if (ib_copy_to_umv_buf(p_umv_buf, &resp, sizeof(struct ibv_alloc_pd_resp))) { + err = -EFAULT; + goto err_copy; } } return &pd->ibpd; + +err_copy: + mthca_pd_free(to_mdev(ibdev), pd); +err_pd_alloc: + kfree(pd); +err_mem: +err_param: + return ERR_PTR(err); } int mthca_dealloc_pd(struct ib_pd *pd) @@ -430,7 +514,7 @@ int mthca_ah_destroy(struct ib_ah *ah) struct ib_srq *mthca_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *init_attr, - struct ib_udata *udata) + ci_umv_buf_t* const p_umv_buf) { struct mthca_create_srq ucmd; struct mthca_ucontext *context = NULL; @@ -441,12 +525,13 @@ struct ib_srq *mthca_create_srq(struct ib_pd *pd, if (!srq) return ERR_PTR(-ENOMEM); - if (pd->uobject) { - context = to_mucontext(pd->uobject->context); - - if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) - return ERR_PTR(-EFAULT); + if (pd->ucontext) { + context = to_mucontext(pd->ucontext); + if (ib_copy_from_umv_buf(&ucmd, p_umv_buf, sizeof ucmd)) { + err = -EFAULT; + goto err_free; + } err = mthca_map_user_db(to_mdev(pd->device), &context->uar, context->db_tab, ucmd.db_index, ucmd.db_page); @@ -461,14 +546,14 @@ struct ib_srq *mthca_create_srq(struct ib_pd *pd, err = mthca_alloc_srq(to_mdev(pd->device), to_mpd(pd), &init_attr->attr, srq); - if (err && pd->uobject) + if (err && pd->ucontext) mthca_unmap_user_db(to_mdev(pd->device), &context->uar, context->db_tab, ucmd.db_index); if (err) goto err_free; - if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof (u32))) { + if (context && ib_copy_to_umv_buf(p_umv_buf, &srq->srqn, sizeof (u32))) { mthca_free_srq(to_mdev(pd->device), srq); err = -EFAULT; goto err_free; @@ -501,10 +586,11 @@ int mthca_destroy_srq(struct ib_srq *srq) struct ib_qp *mthca_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, - struct ib_udata *udata) + ci_umv_buf_t* const p_umv_buf) { - struct mthca_create_qp ucmd; + struct ibv_create_qp ucmd; struct mthca_qp *qp; + struct mthca_ucontext *context; int err; switch (init_attr->qp_type) { @@ -512,37 +598,32 @@ struct ib_qp *mthca_create_qp(struct ib_pd *pd, case IB_QPT_UNRELIABLE_CONN: case IB_QPT_UNRELIABLE_DGRM: { - struct mthca_ucontext *context; qp = kmalloc(sizeof *qp, GFP_KERNEL); - if (!qp) - return ERR_PTR(-ENOMEM); + if (!qp) { + err = -ENOMEM; + goto err_mem; + } - if (pd->uobject) { - context = to_mucontext(pd->uobject->context); + if (pd->ucontext) { + context = to_mucontext(pd->ucontext); - if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) - return ERR_PTR(-EFAULT); + if (ib_copy_from_umv_buf(&ucmd, p_umv_buf, sizeof ucmd)) { + err = -EFAULT; + goto err_copy; + } err = mthca_map_user_db(to_mdev(pd->device), &context->uar, context->db_tab, ucmd.sq_db_index, ucmd.sq_db_page); - if (err) { - kfree(qp); - return ERR_PTR(err); - } + if (err) + goto err_map1; err = mthca_map_user_db(to_mdev(pd->device), &context->uar, context->db_tab, ucmd.rq_db_index, ucmd.rq_db_page); - if (err) { - mthca_unmap_user_db(to_mdev(pd->device), - &context->uar, - context->db_tab, - ucmd.sq_db_index); - kfree(qp); - return ERR_PTR(err); - } + if (err) + goto err_map2; qp->mr.ibmr.lkey = ucmd.lkey; qp->sq.db_index = ucmd.sq_db_index; @@ -555,18 +636,11 @@ struct ib_qp *mthca_create_qp(struct ib_pd *pd, init_attr->qp_type, init_attr->sq_sig_type, &init_attr->cap, qp); - if (err && pd->uobject) { - context = to_mucontext(pd->uobject->context); - - mthca_unmap_user_db(to_mdev(pd->device), - &context->uar, - context->db_tab, - ucmd.sq_db_index); - mthca_unmap_user_db(to_mdev(pd->device), - &context->uar, - context->db_tab, - ucmd.rq_db_index); - } + if (err) + if (pd->ucontext) + goto err_alloc_qp_user; + else + goto err_copy; qp->ibqp.qp_num = qp->qpn; break; @@ -575,12 +649,16 @@ struct ib_qp *mthca_create_qp(struct ib_pd *pd, case IB_QPT_QP1: { /* Don't allow userspace to create special QPs */ - if (pd->uobject) - return ERR_PTR(-EINVAL); + if (pd->ucontext) { + err = -EINVAL; + goto err_inval; + } qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL); - if (!qp) - return ERR_PTR(-ENOMEM); + if (!qp) { + err = -ENOMEM; + goto err_mem; + } qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_QP0 ? 0 : 1; @@ -590,37 +668,48 @@ struct ib_qp *mthca_create_qp(struct ib_pd *pd, init_attr->sq_sig_type, &init_attr->cap, qp->ibqp.qp_num, init_attr->port_num, to_msqp(qp)); + if (err) + goto err_alloc_sqp; + break; } default: /* Don't support raw QPs */ - return ERR_PTR(-ENOSYS); - } - - if (err) { - kfree(qp); - return ERR_PTR(err); + err = -ENOSYS; + goto err_unsupported; } - init_attr->cap.max_inline_data = 0; init_attr->cap.max_send_wr = qp->sq.max; init_attr->cap.max_recv_wr = qp->rq.max; init_attr->cap.max_send_sge = qp->sq.max_gs; init_attr->cap.max_recv_sge = qp->rq.max_gs; + init_attr->cap.max_inline_data = qp->max_inline_data; return &qp->ibqp; + + +err_alloc_qp_user: + mthca_unmap_user_db(to_mdev(pd->device), + &context->uar, context->db_tab, ucmd.rq_db_index); +err_map2: + mthca_unmap_user_db(to_mdev(pd->device), + &context->uar, context->db_tab, ucmd.sq_db_index); +err_map1: err_copy: err_alloc_sqp: + kfree(qp); +err_mem: err_inval: err_unsupported: + return ERR_PTR(err); } int mthca_destroy_qp(struct ib_qp *qp) { - if (qp->uobject) { + if (qp->ucontext) { mthca_unmap_user_db(to_mdev(qp->device), - &to_mucontext(qp->uobject->context)->uar, - to_mucontext(qp->uobject->context)->db_tab, + &to_mucontext(qp->ucontext)->uar, + to_mucontext(qp->ucontext)->db_tab, to_mqp(qp)->sq.db_index); mthca_unmap_user_db(to_mdev(qp->device), - &to_mucontext(qp->uobject->context)->uar, - to_mucontext(qp->uobject->context)->db_tab, + &to_mucontext(qp->ucontext)->uar, + to_mucontext(qp->ucontext)->db_tab, to_mqp(qp)->rq.db_index); } mthca_free_qp(to_mdev(qp->device), to_mqp(qp)); @@ -630,15 +719,18 @@ int mthca_destroy_qp(struct ib_qp *qp) struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, struct ib_ucontext *context, - struct ib_udata *udata) + ci_umv_buf_t* const p_umv_buf) { - struct mthca_create_cq ucmd; + struct ibv_create_cq ucmd; struct mthca_cq *cq; int nent; int err; + if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes) + return ERR_PTR(-EINVAL); + if (context) { - if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) + if (ib_copy_from_umv_buf(&ucmd, p_umv_buf, sizeof ucmd)) return ERR_PTR(-EFAULT); err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, @@ -661,7 +753,7 @@ struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, } if (context) { - cq->mr.ibmr.lkey = ucmd.lkey; + cq->mr.ibmr.lkey = ucmd.lkey; cq->set_ci_db_index = ucmd.set_db_index; cq->arm_db_index = ucmd.arm_db_index; } @@ -671,16 +763,20 @@ struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, err = mthca_init_cq(to_mdev(ibdev), nent, context ? to_mucontext(context) : NULL, - context ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num, + context ? ucmd.mr.pdn : to_mdev(ibdev)->driver_pd.pd_num, cq); if (err) goto err_free; - if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (u32))) { - mthca_free_cq(to_mdev(ibdev), cq); - goto err_free; + if (context ) { + struct ibv_create_cq_resp *create_cq_resp = (struct ibv_create_cq_resp *)p_umv_buf->p_inout_buf; + create_cq_resp->cqn = cq->cqn; } + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_LOW , + ("uctx %p, cq_hndl %p, cq_num %#x, cqe %#x\n", + context, &cq->ibcq, cq->cqn, cq->ibcq.cqe ) ); + return &cq->ibcq; err_free: @@ -701,14 +797,14 @@ err_unmap_set: int mthca_destroy_cq(struct ib_cq *cq) { - if (cq->uobject) { + if (cq->ucontext) { mthca_unmap_user_db(to_mdev(cq->device), - &to_mucontext(cq->uobject->context)->uar, - to_mucontext(cq->uobject->context)->db_tab, + &to_mucontext(cq->ucontext)->uar, + to_mucontext(cq->ucontext)->db_tab, to_mcq(cq)->arm_db_index); mthca_unmap_user_db(to_mdev(cq->device), - &to_mucontext(cq->uobject->context)->uar, - to_mucontext(cq->uobject->context)->db_tab, + &to_mucontext(cq->ucontext)->uar, + to_mucontext(cq->ucontext)->db_tab, to_mcq(cq)->set_ci_db_index); } mthca_free_cq(to_mdev(cq->device), to_mcq(cq)); @@ -717,16 +813,23 @@ int mthca_destroy_cq(struct ib_cq *cq) return 0; } -static inline u32 convert_access(int acc) +static +mthca_mpt_access_t +map_qp_mpt( + IN mthca_qp_access_t qp_acl) { - return (acc & IB_ACCESS_REMOTE_ATOMIC ? MTHCA_MPT_FLAG_ATOMIC : 0) | - (acc & IB_ACCESS_REMOTE_WRITE ? MTHCA_MPT_FLAG_REMOTE_WRITE : 0) | - (acc & IB_ACCESS_REMOTE_READ ? MTHCA_MPT_FLAG_REMOTE_READ : 0) | - (acc & IB_ACCESS_LOCAL_WRITE ? MTHCA_MPT_FLAG_LOCAL_WRITE : 0) | - MTHCA_MPT_FLAG_LOCAL_READ; +#define ACL_MTHCA(mfl,ifl) if (qp_acl & mfl) mpt_acl |= ifl + mthca_mpt_access_t mpt_acl = 0; + + ACL_MTHCA(MTHCA_ACCESS_REMOTE_READ,MTHCA_MPT_FLAG_REMOTE_READ); + ACL_MTHCA(MTHCA_ACCESS_REMOTE_WRITE,MTHCA_MPT_FLAG_REMOTE_WRITE); + ACL_MTHCA(MTHCA_ACCESS_REMOTE_ATOMIC,MTHCA_MPT_FLAG_ATOMIC); + ACL_MTHCA(MTHCA_ACCESS_LOCAL_WRITE,MTHCA_MPT_FLAG_LOCAL_WRITE); + + return (mpt_acl | MTHCA_MPT_FLAG_LOCAL_READ); } -struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc) +struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, mthca_qp_access_t acc) { struct mthca_mr *mr; int err; @@ -734,10 +837,11 @@ struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc) mr = kmalloc(sizeof *mr, GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); + RtlZeroMemory(mr, sizeof *mr); err = mthca_mr_alloc_notrans(to_mdev(pd->device), to_mpd(pd)->pd_num, - convert_access(acc), mr); + map_qp_mpt(acc), mr); if (err) { kfree(mr); @@ -750,7 +854,7 @@ struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc) struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd, struct ib_phys_buf *buffer_list, int num_phys_buf, - int acc, + mthca_qp_access_t acc, u64 *iova_start) { struct mthca_mr *mr; @@ -773,17 +877,17 @@ struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd, mask = 0; total_size = 0; for (i = 0; i < num_phys_buf; ++i) { - if (i != 0 && buffer_list[i].addr & ~PAGE_MASK) - return ERR_PTR(-EINVAL); - if (i != 0 && i != num_phys_buf - 1 && - (buffer_list[i].size & ~PAGE_MASK)) - return ERR_PTR(-EINVAL); + if (i != 0) + mask |= buffer_list[i].addr; + if (i != num_phys_buf - 1) + mask |= buffer_list[i].addr + buffer_list[i].size; total_size += buffer_list[i].size; - if (i > 0) - mask |= buffer_list[i].addr; } + if (mask & ~PAGE_MASK) + return ERR_PTR(-EINVAL); + /* Find largest page shift we can use to cover buffers */ for (shift = PAGE_SHIFT; shift < 31; ++shift) if (num_phys_buf > 1) { @@ -824,18 +928,18 @@ struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd, ++j) page_list[n++] = buffer_list[i].addr + ((u64) j << shift); - mthca_dbg(to_mdev(pd->device), "Registering memory at %llx (iova %llx) " + HCA_PRINT( TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("Registering memory at %I64x (iova %I64x) " "in PD %x; shift %d, npages %d.\n", (unsigned long long) buffer_list[0].addr, (unsigned long long) *iova_start, to_mpd(pd)->pd_num, - shift, npages); + shift, npages)); err = mthca_mr_alloc_phys(to_mdev(pd->device), to_mpd(pd)->pd_num, page_list, shift, npages, *iova_start, total_size, - convert_access(acc), mr); + map_qp_mpt(acc), mr); if (err) { kfree(page_list); @@ -847,38 +951,55 @@ struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd, return &mr->ibmr; } -struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, - int acc, struct ib_udata *udata) +struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, + void* __ptr64 vaddr, uint64_t length, uint64_t hca_va, mthca_qp_access_t acc) { struct mthca_dev *dev = to_mdev(pd->device); struct ib_umem_chunk *chunk; struct mthca_mr *mr; + struct ib_umem *region; u64 *pages; int shift, n, len; int i, j, k; int err = 0; - shift = ffs(region->page_size) - 1; + mr = kzalloc(sizeof *mr, GFP_KERNEL); + if (!mr) { + err = -ENOMEM; + goto err_nomem; + } + region = &mr->umem; + + /* + * We ask for writable memory if any access flags other than + * "remote read" are set. "Local write" and "remote write" + * obviously require write access. "Remote atomic" can do + * things like fetch and add, which will modify memory, and + * "MW bind" can change permissions by binding a window. + */ + err = ibv_umem_get(pd->device, region, + (void *)vaddr, (size_t)length, + !!(acc & ~MTHCA_ACCESS_REMOTE_READ)); + if (err) + goto err_umem_get; + + region->virt_base = hca_va; /* va in HCA */ - mr = kmalloc(sizeof *mr, GFP_KERNEL); - if (!mr) - return ERR_PTR(-ENOMEM); - RtlZeroMemory(mr, sizeof *mr); - n = 0; + shift = ffs(region->page_size) - 1; list_for_each_entry(chunk, ®ion->chunk_list, list,struct ib_umem_chunk) n += chunk->nents; mr->mtt = mthca_alloc_mtt(dev, n); if (IS_ERR(mr->mtt)) { err = PTR_ERR(mr->mtt); - goto err; + goto err_alloc_mtt; } pages = (u64 *) kmalloc(PAGE_SIZE,GFP_KERNEL); if (!pages) { err = -ENOMEM; - goto err_mtt; + goto err_pages; } i = n = 0; @@ -899,33 +1020,37 @@ struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, err = mthca_write_mtt(dev, mr->mtt, n, pages, i); if (err) - goto mtt_done; + goto err_write_mtt; n += i; i = 0; } } } - if (i) + if (i) { err = mthca_write_mtt(dev, mr->mtt, n, pages, i); -mtt_done: - free_page((void*) pages); - if (err) - goto err_mtt; + if (err) + goto err_write_mtt; + } err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, region->virt_base, - region->length, convert_access(acc), mr); - + region->length, map_qp_mpt(acc), mr); if (err) - goto err_mtt; + goto err_mt_alloc; + free_page((void*) pages); return &mr->ibmr; -err_mtt: +err_mt_alloc: +err_write_mtt: + free_page((void*) pages); +err_pages: mthca_free_mtt(dev, mr->mtt); - -err: +err_alloc_mtt: + ibv_umem_release(pd->device, region); +err_umem_get: kfree(mr); +err_nomem: return ERR_PTR(err); } @@ -933,11 +1058,13 @@ int mthca_dereg_mr(struct ib_mr *mr) { struct mthca_mr *mmr = to_mmr(mr); mthca_free_mr(to_mdev(mr->device), mmr); + if (mr->pd->ucontext) + ibv_umem_release(mr->pd->device, &mmr->umem); kfree(mmr); return 0; } -struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, int mr_access_flags, +struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, mthca_qp_access_t acc, struct ib_fmr_attr *fmr_attr) { struct mthca_fmr *fmr; @@ -949,7 +1076,7 @@ struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, int mr_access_flags, memcpy(&fmr->attr, fmr_attr, sizeof *fmr_attr); err = mthca_fmr_alloc(to_mdev(pd->device), to_mpd(pd)->pd_num, - convert_access(mr_access_flags), fmr); + map_qp_mpt(acc), fmr); if (err) { kfree(fmr); @@ -1015,7 +1142,7 @@ static ssize_t show_rev(struct class_device *cdev, char *buf) static ssize_t show_fw_ver(struct class_device *cdev, char *buf) { struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev); - return sprintf(buf, "%x.%x.%x\n", (int) (dev->fw_ver >> 32), + return sprintf(buf, "%d.%d.%d\n", (int) (dev->fw_ver >> 32), (int) (dev->fw_ver >> 16) & 0xffff, (int) dev->fw_ver & 0xffff); } @@ -1057,14 +1184,71 @@ static struct class_device_attribute *mthca_class_attributes[] = { }; #endif +static int mthca_init_node_data(struct mthca_dev *dev) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + u8 status; + + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; + + err = mthca_MAD_IFC(dev, 1, 1, + 1, NULL, NULL, in_mad, out_mad, + &status); + if (err) + goto out; + if (status) { + err = -EINVAL; + goto out; + } + + memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8); + +out: + kfree(in_mad); + kfree(out_mad); + return err; +} + int mthca_register_device(struct mthca_dev *dev) { int ret; int i; + ret = mthca_init_node_data(dev); + if (ret) + return ret; + strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX); #ifdef LINUX_TO_BE_REMOVED dev->ib_dev.owner = THIS_MODULE; +dev->ib_dev.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); + #endif dev->ib_dev.node_type = IB_NODE_CA; @@ -1088,6 +1272,7 @@ int mthca_register_device(struct mthca_dev *dev) if (dev->mthca_flags & MTHCA_FLAG_SRQ) { dev->ib_dev.create_srq = mthca_create_srq; + dev->ib_dev.modify_srq = mthca_modify_srq; dev->ib_dev.destroy_srq = mthca_destroy_srq; if (mthca_is_memfree(dev)) @@ -1148,10 +1333,13 @@ int mthca_register_device(struct mthca_dev *dev) } #endif + mthca_start_catas_poll(dev); + return 0; } void mthca_unregister_device(struct mthca_dev *dev) { + mthca_stop_catas_poll(dev); ib_unregister_device(&dev->ib_dev); } diff --git a/branches/MTHCA/hw/mthca/kernel/mthca_provider.h b/branches/MTHCA/hw/mthca/kernel/mthca_provider.h index d2f9286c..19217e2f 100644 --- a/branches/MTHCA/hw/mthca/kernel/mthca_provider.h +++ b/branches/MTHCA/hw/mthca/kernel/mthca_provider.h @@ -39,25 +39,22 @@ #include #include +#include +typedef uint32_t mthca_mpt_access_t; #define MTHCA_MPT_FLAG_ATOMIC (1 << 14) #define MTHCA_MPT_FLAG_REMOTE_WRITE (1 << 13) #define MTHCA_MPT_FLAG_REMOTE_READ (1 << 12) #define MTHCA_MPT_FLAG_LOCAL_WRITE (1 << 11) #define MTHCA_MPT_FLAG_LOCAL_READ (1 << 10) -struct mthca_buf_list { - u8 *buf; - dma_addr_t mapping; -}; - union mthca_buf { - struct mthca_buf_list direct; - struct mthca_buf_list *page_list; + struct scatterlist direct; + struct scatterlist *page_list; }; struct mthca_uar { - unsigned long pfn; + PFN_NUMBER pfn; int index; }; @@ -67,6 +64,10 @@ struct mthca_ucontext { struct ib_ucontext ibucontext; struct mthca_uar uar; struct mthca_user_db_table *db_tab; + // for user UAR + PMDL mdl; + PVOID kva; + SIZE_T uar_size; }; struct mthca_mtt; @@ -75,8 +76,11 @@ struct mthca_mr { //NB: the start of this structure is to be equal to mlnx_mro_t ! //NB: the structure was not inserted here for not to mix driver and provider structures struct ib_mr ibmr; +#ifdef WIN_TO_BE_REMOVED mt_iobuf_t iobuf; +#endif struct mthca_mtt *mtt; + struct ib_umem umem; }; struct mthca_fmr { @@ -113,7 +117,7 @@ struct mthca_eq { u16 msi_x_entry; int have_irq; int nent; - struct mthca_buf_list *page_list; + struct scatterlist *page_list; struct mthca_mr mr; KDPC dpc; /* DPC for MSI-X interrupts */ spinlock_t lock; /* spinlock for simult DPCs */ @@ -246,6 +250,9 @@ struct mthca_wq { struct mthca_qp { struct ib_qp ibqp; void *qp_context; // leo: for IBAL shim + //TODO: added just because absense of ibv_query_qp + // thereafter it may be worth to be replaced by struct ib_qp_attr qp_attr; + struct ib_qp_init_attr qp_init_attr; // leo: for query_qp atomic_t refcount; u32 qpn; int is_direct; @@ -260,6 +267,7 @@ struct mthca_qp { struct mthca_wq sq; enum ib_sig_type sq_policy; int send_wqe_offset; + int max_inline_data; u64 *wrid; union mthca_buf queue; @@ -274,9 +282,7 @@ struct mthca_sqp { u32 qkey; u32 send_psn; struct ib_ud_header ud_header; - int header_buf_size; - void *header_buf; - dma_addr_t header_dma; + struct scatterlist sg; }; static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext *ibucontext) @@ -334,6 +340,25 @@ static inline int end_port(struct ib_device *device) return device->node_type == IB_NODE_SWITCH ? 0 : device->phys_port_cnt; } +static inline int ib_copy_from_umv_buf(void *dest, ci_umv_buf_t* const p_umv_buf, size_t len) +{ + RtlCopyMemory(dest, p_umv_buf->p_inout_buf, len); + return 0; +} + +static inline int ib_copy_to_umv_buf(ci_umv_buf_t* const p_umv_buf, void *src, size_t len) +{ + if (p_umv_buf->output_size < len) { + p_umv_buf->status = IB_INSUFFICIENT_MEMORY; + p_umv_buf->output_size = 0; + return -EFAULT; + } + RtlCopyMemory(p_umv_buf->p_inout_buf, src, len); + p_umv_buf->status = IB_SUCCESS; + p_umv_buf->output_size = len; + return 0; +} + // API @@ -354,13 +379,13 @@ int mthca_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid); struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev, - struct ib_udata *udata); + ci_umv_buf_t* const p_umv_buf); int mthca_dealloc_ucontext(struct ib_ucontext *context); struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, - struct ib_udata *udata); + ci_umv_buf_t* const p_umv_buf); int mthca_dealloc_pd(struct ib_pd *pd); @@ -371,41 +396,46 @@ int mthca_ah_destroy(struct ib_ah *ah); struct ib_srq *mthca_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *init_attr, - struct ib_udata *udata); + ci_umv_buf_t* const p_umv_buf); int mthca_destroy_srq(struct ib_srq *srq); struct ib_qp *mthca_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, - struct ib_udata *udata); + ci_umv_buf_t* const p_umv_buf); int mthca_destroy_qp(struct ib_qp *qp); struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, struct ib_ucontext *context, - struct ib_udata *udata); + ci_umv_buf_t* const p_umv_buf); int mthca_destroy_cq(struct ib_cq *cq); -struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc); +struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, mthca_qp_access_t acc); struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd, struct ib_phys_buf *buffer_list, int num_phys_buf, - int acc, + mthca_qp_access_t acc, u64 *iova_start); -struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, - int acc, struct ib_udata *udata); +struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, + void* __ptr64 vaddr, uint64_t length, uint64_t hca_va, mthca_qp_access_t acc); int mthca_dereg_mr(struct ib_mr *mr); -struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, int mr_access_flags, +struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, mthca_qp_access_t acc, struct ib_fmr_attr *fmr_attr); int mthca_dealloc_fmr(struct ib_fmr *fmr); int mthca_unmap_fmr(struct list_head *fmr_list); +int mthca_poll_cq_list( + IN struct ib_cq *ibcq, + IN OUT ib_wc_t** const pp_free_wclist, + OUT ib_wc_t** const pp_done_wclist ); + #endif /* MTHCA_PROVIDER_H */ diff --git a/branches/MTHCA/hw/mthca/kernel/mthca_qp.c b/branches/MTHCA/hw/mthca/kernel/mthca_qp.c index 2ba3886b..1eb22721 100644 --- a/branches/MTHCA/hw/mthca/kernel/mthca_qp.c +++ b/branches/MTHCA/hw/mthca/kernel/mthca_qp.c @@ -40,10 +40,22 @@ #include #include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_qp.tmh" +#endif #include "mthca_cmd.h" #include "mthca_memfree.h" #include "mthca_wqe.h" + +#ifdef ALLOC_PRAGMA +#pragma alloc_text (PAGE, mthca_init_qp_table) +#pragma alloc_text (PAGE, mthca_cleanup_qp_table) +#endif + enum { MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE, MTHCA_ACK_REQ_FREQ = 10, @@ -107,7 +119,7 @@ struct mthca_qp_path { u8 hop_limit; __be32 sl_tclass_flowlabel; u8 rgid[16]; -} __attribute__((packed)); +} ; struct mthca_qp_context { __be32 flags; @@ -146,14 +158,14 @@ struct mthca_qp_context { __be16 rq_wqe_counter; /* reserved on Tavor */ __be16 sq_wqe_counter; /* reserved on Tavor */ u32 reserved3[18]; -} __attribute__((packed)); +} ; struct mthca_qp_param { __be32 opt_param_mask; u32 reserved1; struct mthca_qp_context context; u32 reserved2[62]; -} __attribute__((packed)); +} ; #pragma pack(pop) enum { @@ -187,17 +199,6 @@ static const u8 mthca_opcode[] = { }; -//TODO: these literals are also defined in ib_types.h and have there ANOTHER VALUES !!! -enum ib_qp_state { - IBQPS_RESET, - IBQPS_INIT, - IBQPS_RTR, - IBQPS_RTS, - IBQPS_SQD, - IBQPS_SQE, - IBQPS_ERR -}; - enum { RC, UC, UD, RD, RDEE, MLX, NUM_TRANS }; static struct _state_table { @@ -236,7 +237,7 @@ static void fill_state_table() t[IBQPS_RTR].trans = MTHCA_TRANS_INIT2RTR; t[IBQPS_RTR].req_param[UC] = - IB_QP_AV |IB_QP_PATH_MTU |IB_QP_DEST_QPN |IB_QP_RQ_PSN |IB_QP_MAX_DEST_RD_ATOMIC; + IB_QP_AV |IB_QP_PATH_MTU |IB_QP_DEST_QPN |IB_QP_RQ_PSN; t[IBQPS_RTR].req_param[RC] = IB_QP_AV |IB_QP_PATH_MTU |IB_QP_DEST_QPN |IB_QP_RQ_PSN |IB_QP_MAX_DEST_RD_ATOMIC |IB_QP_MIN_RNR_TIMER; t[IBQPS_RTR].opt_param[UD] = IB_QP_PKEY_INDEX |IB_QP_QKEY; @@ -251,15 +252,15 @@ static void fill_state_table() t[IBQPS_RTS].trans = MTHCA_TRANS_RTR2RTS; t[IBQPS_RTS].req_param[UD] = IB_QP_SQ_PSN; - t[IBQPS_RTS].req_param[UC] = IB_QP_SQ_PSN |IB_QP_MAX_QP_RD_ATOMIC; + t[IBQPS_RTS].req_param[UC] = IB_QP_SQ_PSN; t[IBQPS_RTS].req_param[RC] = IB_QP_TIMEOUT |IB_QP_RETRY_CNT |IB_QP_RNR_RETRY |IB_QP_SQ_PSN |IB_QP_MAX_QP_RD_ATOMIC; t[IBQPS_RTS].req_param[MLX] = IB_QP_SQ_PSN; t[IBQPS_RTS].opt_param[UD] = IB_QP_CUR_STATE |IB_QP_QKEY; t[IBQPS_RTS].opt_param[UC] = - IB_QP_CUR_STATE |IB_QP_ALT_PATH |IB_QP_ACCESS_FLAGS |IB_QP_PKEY_INDEX |IB_QP_PATH_MIG_STATE; + IB_QP_CUR_STATE |IB_QP_ALT_PATH |IB_QP_ACCESS_FLAGS |IB_QP_PATH_MIG_STATE; t[IBQPS_RTS].opt_param[RC] = IB_QP_CUR_STATE |IB_QP_ALT_PATH | - IB_QP_ACCESS_FLAGS |IB_QP_PKEY_INDEX |IB_QP_MIN_RNR_TIMER |IB_QP_PATH_MIG_STATE; + IB_QP_ACCESS_FLAGS |IB_QP_MIN_RNR_TIMER |IB_QP_PATH_MIG_STATE; t[IBQPS_RTS].opt_param[MLX] = IB_QP_CUR_STATE |IB_QP_QKEY; /* IBQPS_RTS */ @@ -291,8 +292,8 @@ static void fill_state_table() t[IBQPS_SQD].trans = MTHCA_TRANS_SQD2SQD; t[IBQPS_SQD].opt_param[UD] = IB_QP_PKEY_INDEX |IB_QP_QKEY; - t[IBQPS_SQD].opt_param[UC] = IB_QP_AV |IB_QP_MAX_QP_RD_ATOMIC |IB_QP_MAX_DEST_RD_ATOMIC | - IB_QP_CUR_STATE |IB_QP_ALT_PATH |IB_QP_ACCESS_FLAGS |IB_QP_PKEY_INDEX |IB_QP_PATH_MIG_STATE; + t[IBQPS_SQD].opt_param[UC] = IB_QP_AV | IB_QP_CUR_STATE | + IB_QP_ALT_PATH |IB_QP_ACCESS_FLAGS |IB_QP_PKEY_INDEX |IB_QP_PATH_MIG_STATE; t[IBQPS_SQD].opt_param[RC] = IB_QP_AV |IB_QP_TIMEOUT |IB_QP_RETRY_CNT |IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC |IB_QP_MAX_DEST_RD_ATOMIC |IB_QP_CUR_STATE |IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS |IB_QP_PKEY_INDEX |IB_QP_MIN_RNR_TIMER |IB_QP_PATH_MIG_STATE; @@ -305,8 +306,8 @@ static void fill_state_table() t[IBQPS_RTS].trans = MTHCA_TRANS_SQERR2RTS; t[IBQPS_RTS].opt_param[UD] = IB_QP_CUR_STATE |IB_QP_QKEY; - t[IBQPS_RTS].opt_param[UC] = IB_QP_CUR_STATE; - t[IBQPS_RTS].opt_param[RC] = IB_QP_CUR_STATE |IB_QP_MIN_RNR_TIMER; + t[IBQPS_RTS].opt_param[UC] = IB_QP_CUR_STATE | IB_QP_ACCESS_FLAGS; +// t[IBQPS_RTS].opt_param[RC] = IB_QP_CUR_STATE |IB_QP_MIN_RNR_TIMER; t[IBQPS_RTS].opt_param[MLX] = IB_QP_CUR_STATE |IB_QP_QKEY; /* IBQPS_ERR */ @@ -329,42 +330,71 @@ static int is_qp0(struct mthca_dev *dev, struct mthca_qp *qp) qp->qpn <= (u32)(dev->qp_table.sqp_start + 1); } + +static void dump_wqe(u32 *wqe_ptr , struct mthca_qp *qp_ptr) +{ + __be32 *wqe = wqe_ptr; + + (void) wqe; /* avoid warning if mthca_dbg compiled away... */ + HCA_PRINT(TRACE_LEVEL_INFORMATION,HCA_DBG_QP,("WQE contents QPN 0x%06x \n",qp_ptr->qpn)); + HCA_PRINT(TRACE_LEVEL_INFORMATION,HCA_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",0 + , be32_to_cpu(wqe[0]), be32_to_cpu(wqe[1]), be32_to_cpu(wqe[2]), be32_to_cpu(wqe[3]))); + HCA_PRINT(TRACE_LEVEL_INFORMATION,HCA_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",4 + , be32_to_cpu(wqe[4]), be32_to_cpu(wqe[5]), be32_to_cpu(wqe[6]), be32_to_cpu(wqe[7]))); + HCA_PRINT(TRACE_LEVEL_INFORMATION,HCA_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",8 + , be32_to_cpu(wqe[8]), be32_to_cpu(wqe[9]), be32_to_cpu(wqe[10]), be32_to_cpu(wqe[11]))); + HCA_PRINT(TRACE_LEVEL_INFORMATION,HCA_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",12 + , be32_to_cpu(wqe[12]), be32_to_cpu(wqe[13]), be32_to_cpu(wqe[14]), be32_to_cpu(wqe[15]))); + +} + + static void *get_recv_wqe(struct mthca_qp *qp, int n) { if (qp->is_direct) - return qp->queue.direct.buf + (n << qp->rq.wqe_shift); + return (u8*)qp->queue.direct.page + (n << qp->rq.wqe_shift); else - return qp->queue.page_list[(n << qp->rq.wqe_shift) >> PAGE_SHIFT].buf + + return (u8*)qp->queue.page_list[(n << qp->rq.wqe_shift) >> PAGE_SHIFT].page + ((n << qp->rq.wqe_shift) & (PAGE_SIZE - 1)); } static void *get_send_wqe(struct mthca_qp *qp, int n) { if (qp->is_direct) - return qp->queue.direct.buf + qp->send_wqe_offset + + return (u8*)qp->queue.direct.page + qp->send_wqe_offset + (n << qp->sq.wqe_shift); else - return qp->queue.page_list[(qp->send_wqe_offset + + return (u8*)qp->queue.page_list[(qp->send_wqe_offset + (n << qp->sq.wqe_shift)) >> - PAGE_SHIFT].buf + + PAGE_SHIFT].page + ((qp->send_wqe_offset + (n << qp->sq.wqe_shift)) & (PAGE_SIZE - 1)); } +static void mthca_wq_init(struct mthca_wq *wq) +{ + spin_lock_init(&wq->lock); + wq->next_ind = 0; + wq->last_comp = wq->max - 1; + wq->head = 0; + wq->tail = 0; +} + void mthca_qp_event(struct mthca_dev *dev, u32 qpn, enum ib_event_type event_type) { struct mthca_qp *qp; struct ib_event event; + SPIN_LOCK_PREP(lh); - spin_lock(&dev->qp_table.lock); + spin_lock(&dev->qp_table.lock, &lh); qp = mthca_array_get(&dev->qp_table.qp, qpn & (dev->limits.num_qps - 1)); if (qp) atomic_inc(&qp->refcount); - spin_unlock(&dev->qp_table.lock); + spin_unlock(&lh); if (!qp) { - mthca_warn(dev, "Async event for bogus QP %08x\n", qpn); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_QP ,("QP %06x Async event for bogus \n", qpn)); return; } @@ -423,8 +453,7 @@ static void init_port(struct mthca_dev *dev, int port) RtlZeroMemory(¶m, sizeof param); - param.enable_1x = 1; - param.enable_4x = 1; + param.port_width = dev->limits.port_width_cap; param.vl_cap = dev->limits.vl_cap; param.mtu_cap = dev->limits.mtu_cap; param.gid_cap = (u16)dev->limits.gid_table_len; @@ -432,9 +461,40 @@ static void init_port(struct mthca_dev *dev, int port) err = mthca_INIT_IB(dev, ¶m, port, &status); if (err) - mthca_warn(dev, "INIT_IB failed, return code %d.\n", err); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("INIT_IB failed, return code %d.\n", err)); if (status) - mthca_warn(dev, "INIT_IB returned status %02x.\n", status); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("INIT_IB returned status %02x.\n", status)); +} + + +static __be32 get_hw_access_flags(struct mthca_qp *qp, struct ib_qp_attr *attr, + int attr_mask) +{ + u8 dest_rd_atomic; + u32 access_flags; + u32 hw_access_flags = 0; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + dest_rd_atomic = attr->max_dest_rd_atomic; + else + dest_rd_atomic = qp->resp_depth; + + if (attr_mask & IB_QP_ACCESS_FLAGS) + access_flags = attr->qp_access_flags; + else + access_flags = qp->atomic_rd_en; + + if (!dest_rd_atomic) + access_flags &= MTHCA_ACCESS_REMOTE_WRITE; + + if (access_flags & MTHCA_ACCESS_REMOTE_READ) + hw_access_flags |= MTHCA_QP_BIT_RRE; + if (access_flags & MTHCA_ACCESS_REMOTE_ATOMIC) + hw_access_flags |= MTHCA_QP_BIT_RAE; + if (access_flags & MTHCA_ACCESS_REMOTE_WRITE) + hw_access_flags |= MTHCA_QP_BIT_RWE; + + return cl_hton32(hw_access_flags); } int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) @@ -448,6 +508,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) u32 req_param, opt_param; u8 status; int err; + SPIN_LOCK_PREP(lhs); + SPIN_LOCK_PREP(lhr); if (attr_mask & IB_QP_CUR_STATE) { if (attr->cur_qp_state != IBQPS_RTR && @@ -458,11 +520,11 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) else cur_state = attr->cur_qp_state; } else { - spin_lock_irq(&qp->sq.lock); - spin_lock(&qp->rq.lock); + spin_lock_irq(&qp->sq.lock, &lhs); + spin_lock(&qp->rq.lock, &lhr); cur_state = qp->state; - spin_unlock(&qp->rq.lock); - spin_unlock_irq(&qp->sq.lock); + spin_unlock(&lhr); + spin_unlock_irq(&lhs); } if (attr_mask & IB_QP_STATE) { @@ -473,8 +535,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) new_state = cur_state; if (state_table[cur_state][new_state].trans == MTHCA_TRANS_INVALID) { - mthca_dbg(dev, "Illegal QP transition " - "%d->%d\n", cur_state, new_state); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("Illegal QP transition " + "%d->%d\n", cur_state, new_state)); return -EINVAL; } @@ -482,20 +544,49 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) opt_param = state_table[cur_state][new_state].opt_param[qp->transport]; if ((req_param & attr_mask) != req_param) { - mthca_dbg(dev, "QP transition " + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("QP transition " "%d->%d missing req attr 0x%08x\n", cur_state, new_state, - req_param & ~attr_mask); + req_param & ~attr_mask)); + //NB: IBAL doesn't use all the fields, so we can miss some mandatory flags return -EINVAL; } if (attr_mask & ~(req_param | opt_param | IB_QP_STATE)) { - mthca_dbg(dev, "QP transition (transport %d) " + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("QP transition (transport %d) " "%d->%d has extra attr 0x%08x\n", qp->transport, cur_state, new_state, attr_mask & ~(req_param | opt_param | - IB_QP_STATE)); + IB_QP_STATE))); + //NB: The old code sometimes uses optional flags that are not so in this code + return -EINVAL; + } + + if ((attr_mask & IB_QP_PKEY_INDEX) && + attr->pkey_index >= dev->limits.pkey_table_len) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("PKey index (%u) too large. max is %d\n", + attr->pkey_index,dev->limits.pkey_table_len-1)); + return -EINVAL; + } + + if ((attr_mask & IB_QP_PORT) && + (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("Port number (%u) is invalid\n", attr->port_num)); + return -EINVAL; + } + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && + attr->max_rd_atomic > dev->limits.max_qp_init_rdma) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("Max rdma_atomic as initiator %u too large (max is %d)\n", + attr->max_rd_atomic, dev->limits.max_qp_init_rdma)); + return -EINVAL; + } + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && + attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("Max rdma_atomic as responder %u too large (max %d)\n", + attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift)); return -EINVAL; } @@ -506,22 +597,22 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) qp_context = &qp_param->context; RtlZeroMemory(qp_param, sizeof *qp_param); - qp_context->flags = cpu_to_be32((to_mthca_state(new_state) << 28) | + qp_context->flags = cl_hton32((to_mthca_state(new_state) << 28) | (to_mthca_st(qp->transport) << 16)); - qp_context->flags |= cpu_to_be32(MTHCA_QP_BIT_DE); + qp_context->flags |= cl_hton32(MTHCA_QP_BIT_DE); if (!(attr_mask & IB_QP_PATH_MIG_STATE)) - qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_MIGRATED << 11); + qp_context->flags |= cl_hton32(MTHCA_QP_PM_MIGRATED << 11); else { - qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PM_STATE); + qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_PM_STATE); switch (attr->path_mig_state) { - case IB_MIG_MIGRATED: - qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_MIGRATED << 11); + case IB_APM_MIGRATED: + qp_context->flags |= cl_hton32(MTHCA_QP_PM_MIGRATED << 11); break; - case IB_MIG_REARM: - qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_REARM << 11); + case IB_APM_REARM: + qp_context->flags |= cl_hton32(MTHCA_QP_PM_REARM << 11); break; - case IB_MIG_ARMED: - qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_ARMED << 11); + case IB_APM_ARMED: + qp_context->flags |= cl_hton32(MTHCA_QP_PM_ARMED << 11); break; } } @@ -545,208 +636,163 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) /* leave arbel_sched_queue as 0 */ - if (qp->ibqp.uobject) + if (qp->ibqp.ucontext) qp_context->usr_page = - cpu_to_be32(to_mucontext(qp->ibqp.uobject->context)->uar.index); + cl_hton32(to_mucontext(qp->ibqp.ucontext)->uar.index); else - qp_context->usr_page = cpu_to_be32(dev->driver_uar.index); - qp_context->local_qpn = cpu_to_be32(qp->qpn); + qp_context->usr_page = cl_hton32(dev->driver_uar.index); + qp_context->local_qpn = cl_hton32(qp->qpn); if (attr_mask & IB_QP_DEST_QPN) { - qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num); + qp_context->remote_qpn = cl_hton32(attr->dest_qp_num); } if (qp->transport == MLX) qp_context->pri_path.port_pkey |= - cpu_to_be32(to_msqp(qp)->port << 24); + cl_hton32(to_msqp(qp)->port << 24); else { if (attr_mask & IB_QP_PORT) { qp_context->pri_path.port_pkey |= - cpu_to_be32(attr->port_num << 24); - qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PORT_NUM); + cl_hton32(attr->port_num << 24); + qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_PORT_NUM); } } if (attr_mask & IB_QP_PKEY_INDEX) { qp_context->pri_path.port_pkey |= - cpu_to_be32(attr->pkey_index); - qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PKEY_INDEX); + cl_hton32(attr->pkey_index); + qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_PKEY_INDEX); } if (attr_mask & IB_QP_RNR_RETRY) { qp_context->pri_path.rnr_retry = attr->rnr_retry << 5; - qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY); + qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_RNR_RETRY); } if (attr_mask & IB_QP_AV) { qp_context->pri_path.g_mylmc = attr->ah_attr.src_path_bits & 0x7f; - qp_context->pri_path.rlid = cpu_to_be16(attr->ah_attr.dlid); + qp_context->pri_path.rlid = cl_hton16(attr->ah_attr.dlid); qp_context->pri_path.static_rate = !!attr->ah_attr.static_rate; if (attr->ah_attr.ah_flags & IB_AH_GRH) { qp_context->pri_path.g_mylmc |= 1 << 7; qp_context->pri_path.mgid_index = attr->ah_attr.grh.sgid_index; qp_context->pri_path.hop_limit = attr->ah_attr.grh.hop_limit; qp_context->pri_path.sl_tclass_flowlabel = - cpu_to_be32((attr->ah_attr.sl << 28) | + cl_hton32((attr->ah_attr.sl << 28) | (attr->ah_attr.grh.traffic_class << 20) | (attr->ah_attr.grh.flow_label)); memcpy(qp_context->pri_path.rgid, attr->ah_attr.grh.dgid.raw, 16); } else { qp_context->pri_path.sl_tclass_flowlabel = - cpu_to_be32(attr->ah_attr.sl << 28); + cl_hton32(attr->ah_attr.sl << 28); } - qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH); + qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH); } if (attr_mask & IB_QP_TIMEOUT) { - qp_context->pri_path.ackto = attr->timeout; - qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ACK_TIMEOUT); + qp_context->pri_path.ackto = attr->timeout << 3; + qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_ACK_TIMEOUT); } /* XXX alt_path */ /* leave rdd as 0 */ - qp_context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pd_num); + qp_context->pd = cl_hton32(to_mpd(ibqp->pd)->pd_num); /* leave wqe_base as 0 (we always create an MR based at 0 for WQs) */ - qp_context->wqe_lkey = cpu_to_be32(qp->mr.ibmr.lkey); - qp_context->params1 = cpu_to_be32((MTHCA_ACK_REQ_FREQ << 28) | + qp_context->wqe_lkey = cl_hton32(qp->mr.ibmr.lkey); + qp_context->params1 = cl_hton32((MTHCA_ACK_REQ_FREQ << 28) | (MTHCA_FLIGHT_LIMIT << 24) | - MTHCA_QP_BIT_SRE | - MTHCA_QP_BIT_SWE | - MTHCA_QP_BIT_SAE); + MTHCA_QP_BIT_SWE); if (qp->sq_policy == IB_SIGNAL_ALL_WR) - qp_context->params1 |= cpu_to_be32(MTHCA_QP_BIT_SSC); + qp_context->params1 |= cl_hton32(MTHCA_QP_BIT_SSC); if (attr_mask & IB_QP_RETRY_CNT) { - qp_context->params1 |= cpu_to_be32(attr->retry_cnt << 16); - qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RETRY_COUNT); + qp_context->params1 |= cl_hton32(attr->retry_cnt << 16); + qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_RETRY_COUNT); } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { - long val = attr->max_rd_atomic; - qp_context->params1 |= cpu_to_be32(min(val ? ffs(val) - 1 : 0, 7) << 21); - qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX); + if (attr->max_rd_atomic) { + qp_context->params1 |= + cl_hton32(MTHCA_QP_BIT_SRE | + MTHCA_QP_BIT_SAE); + qp_context->params1 |= + cl_hton32(fls(attr->max_rd_atomic - 1) << 21); + } + qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_SRA_MAX); } if (attr_mask & IB_QP_SQ_PSN) - qp_context->next_send_psn = cpu_to_be32(attr->sq_psn); - qp_context->cqn_snd = cpu_to_be32(to_mcq(ibqp->send_cq)->cqn); + qp_context->next_send_psn = cl_hton32(attr->sq_psn); + qp_context->cqn_snd = cl_hton32(to_mcq(ibqp->send_cq)->cqn); if (mthca_is_memfree(dev)) { - qp_context->snd_wqe_base_l = cpu_to_be32(qp->send_wqe_offset); - qp_context->snd_db_index = cpu_to_be32(qp->sq.db_index); - } - - if (attr_mask & IB_QP_ACCESS_FLAGS) { - /* - * Only enable RDMA/atomics if we have responder - * resources set to a non-zero value. - */ - if (qp->resp_depth) { - qp_context->params2 |= - cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE ? - MTHCA_QP_BIT_RWE : 0); - qp_context->params2 |= - cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_READ ? - MTHCA_QP_BIT_RRE : 0); - qp_context->params2 |= - cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC ? - MTHCA_QP_BIT_RAE : 0); - } - - qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE | - MTHCA_QP_OPTPAR_RRE | - MTHCA_QP_OPTPAR_RAE); - - qp->atomic_rd_en = (u8)attr->qp_access_flags; + qp_context->snd_wqe_base_l = cl_hton32(qp->send_wqe_offset); + qp_context->snd_db_index = cl_hton32(qp->sq.db_index); } if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { - u8 rra_max; - - if (qp->resp_depth && !attr->max_dest_rd_atomic) { - /* - * Lowering our responder resources to zero. - * Turn off RDMA/atomics as responder. - * (RWE/RRE/RAE in params2 already zero) - */ - qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE | - MTHCA_QP_OPTPAR_RRE | - MTHCA_QP_OPTPAR_RAE); - } - if (!qp->resp_depth && attr->max_dest_rd_atomic) { - /* - * Increasing our responder resources from - * zero. Turn on RDMA/atomics as appropriate. - */ - qp_context->params2 |= - cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_WRITE ? - MTHCA_QP_BIT_RWE : 0); - qp_context->params2 |= - cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_READ ? - MTHCA_QP_BIT_RRE : 0); + if (attr->max_dest_rd_atomic) qp_context->params2 |= - cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_ATOMIC ? - MTHCA_QP_BIT_RAE : 0); + cl_hton32(fls(attr->max_dest_rd_atomic - 1) << 21); - qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE | - MTHCA_QP_OPTPAR_RRE | - MTHCA_QP_OPTPAR_RAE); - } - - for (rra_max = 0; - 1 << rra_max < attr->max_dest_rd_atomic && - rra_max < dev->qp_table.rdb_shift; - ++rra_max) - ; /* nothing */ + qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_RRA_MAX); - qp_context->params2 |= cpu_to_be32(rra_max << 21); - qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX); + } - qp->resp_depth = attr->max_dest_rd_atomic; + if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) { + qp_context->params2 |= get_hw_access_flags(qp, attr, attr_mask); + qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_RWE | + MTHCA_QP_OPTPAR_RRE | + MTHCA_QP_OPTPAR_RAE); } - qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC); + qp_context->params2 |= cl_hton32(MTHCA_QP_BIT_RSC); if (ibqp->srq) - qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RIC); + qp_context->params2 |= cl_hton32(MTHCA_QP_BIT_RIC); if (attr_mask & IB_QP_MIN_RNR_TIMER) { - qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24); - qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_TIMEOUT); + qp_context->rnr_nextrecvpsn |= cl_hton32(attr->min_rnr_timer << 24); + qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_RNR_TIMEOUT); } if (attr_mask & IB_QP_RQ_PSN) - qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn); + qp_context->rnr_nextrecvpsn |= cl_hton32(attr->rq_psn); qp_context->ra_buff_indx = - cpu_to_be32(dev->qp_table.rdb_base + + cl_hton32(dev->qp_table.rdb_base + ((qp->qpn & (dev->limits.num_qps - 1)) * MTHCA_RDB_ENTRY_SIZE << dev->qp_table.rdb_shift)); - qp_context->cqn_rcv = cpu_to_be32(to_mcq(ibqp->recv_cq)->cqn); + qp_context->cqn_rcv = cl_hton32(to_mcq(ibqp->recv_cq)->cqn); if (mthca_is_memfree(dev)) - qp_context->rcv_db_index = cpu_to_be32(qp->rq.db_index); + qp_context->rcv_db_index = cl_hton32(qp->rq.db_index); if (attr_mask & IB_QP_QKEY) { - qp_context->qkey = cpu_to_be32(attr->qkey); - qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_Q_KEY); + qp_context->qkey = cl_hton32(attr->qkey); + qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_Q_KEY); } if (ibqp->srq) - qp_context->srqn = cpu_to_be32(1 << 24 | + qp_context->srqn = cl_hton32(1 << 24 | to_msrq(ibqp->srq)->srqn); err = mthca_MODIFY_QP(dev, state_table[cur_state][new_state].trans, qp->qpn, 0, mailbox, 0, &status); if (status) { - mthca_warn(dev, "modify QP %d returned status %02x.\n", - state_table[cur_state][new_state].trans, status); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("modify QP %d returned status %02x.\n", + state_table[cur_state][new_state].trans, status)); err = -EINVAL; } - if (!err) + if (!err) { qp->state = new_state; + if (attr_mask & IB_QP_ACCESS_FLAGS) + qp->atomic_rd_en = (u8)attr->qp_access_flags; + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + qp->resp_depth = attr->max_dest_rd_atomic; + } mthca_free_mailbox(dev, mailbox); @@ -754,8 +800,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) store_attrs(to_msqp(qp), attr, attr_mask); /* - * If we are moving QP0 to RTR, bring the IB link up; if we - * are moving QP0 to RESET or ERROR, bring the link back down. + * If we moved QP0 to RTR, bring the IB link up; if we moved + * QP0 to RESET or ERROR, bring the link back down. */ if (is_qp0(dev, qp)) { if (cur_state != IBQPS_RTR && @@ -769,9 +815,81 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) mthca_CLOSE_IB(dev, to_msqp(qp)->port, &status); } + /* + * If we moved a kernel QP to RESET, clean up all old CQ + * entries and reinitialize the QP. + */ + if (!err && new_state == IB_QPS_RESET && !qp->ibqp.ucontext) { + mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn, + qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); + if (qp->ibqp.send_cq != qp->ibqp.recv_cq) + mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn, + qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); + + mthca_wq_init(&qp->sq); + qp->sq.last = get_send_wqe(qp, qp->sq.max - 1); + mthca_wq_init(&qp->rq); + qp->rq.last = get_send_wqe(qp, qp->rq.max - 1); + + if (mthca_is_memfree(dev)) { + *qp->sq.db = 0; + *qp->rq.db = 0; + } + } + return err; } +static int mthca_max_data_size(struct mthca_dev *dev, struct mthca_qp *qp, int desc_sz) +{ + + /* + * Calculate the maximum size of WQE s/g segments, excluding + * the next segment and other non-data segments. + */ + int max_data_size = desc_sz - sizeof (struct mthca_next_seg); + + switch (qp->transport) { + case MLX: + max_data_size -= 2 * sizeof (struct mthca_data_seg); + break; + + case UD: + if (mthca_is_memfree(dev)) + max_data_size -= sizeof (struct mthca_arbel_ud_seg); + else + max_data_size -= sizeof (struct mthca_tavor_ud_seg); + break; + + default: + max_data_size -= sizeof (struct mthca_raddr_seg); + break; + } + return max_data_size; +} + +static inline int mthca_max_inline_data(struct mthca_pd *pd, int max_data_size) +{ + /* We don't support inline data for kernel QPs (yet). */ + return pd->ibpd.ucontext ? max_data_size - MTHCA_INLINE_HEADER_SIZE : 0; +} + +static void mthca_adjust_qp_caps(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_qp *qp) +{ + int max_data_size = mthca_max_data_size(dev, qp, + min(dev->limits.max_desc_sz, 1 << qp->sq.wqe_shift)); + + qp->max_inline_data = mthca_max_inline_data(pd, max_data_size); + + qp->sq.max_gs = min(dev->limits.max_sg, + (int)(max_data_size / sizeof (struct mthca_data_seg))); + qp->rq.max_gs = min(dev->limits.max_sg, + (int)((min(dev->limits.max_desc_sz, 1 << qp->rq.wqe_shift) - + sizeof (struct mthca_next_seg)) / sizeof (struct mthca_data_seg))); +} + /* * Allocate and register buffer for WQEs. qp->rq.max, sq.max, * rq.max_gs and sq.max_gs must all be assigned. @@ -785,30 +903,57 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, { int size; int err = -ENOMEM; - + + HCA_ENTER(HCA_DBG_QP); size = sizeof (struct mthca_next_seg) + qp->rq.max_gs * sizeof (struct mthca_data_seg); + if (size > dev->limits.max_desc_sz) + return -EINVAL; + for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size; qp->rq.wqe_shift++) ; /* nothing */ - size = sizeof (struct mthca_next_seg) + - qp->sq.max_gs * sizeof (struct mthca_data_seg); + size = qp->sq.max_gs * sizeof (struct mthca_data_seg); switch (qp->transport) { - case MLX: - size += 2 * sizeof (struct mthca_data_seg); - break; - case UD: - if (mthca_is_memfree(dev)) - size += sizeof (struct mthca_arbel_ud_seg); - else - size += sizeof (struct mthca_tavor_ud_seg); - break; - default: - /* bind seg is as big as atomic + raddr segs */ - size += sizeof (struct mthca_bind_seg); + case MLX: + size += 2 * sizeof (struct mthca_data_seg); + break; + + case UD: + size += mthca_is_memfree(dev) ? + sizeof (struct mthca_arbel_ud_seg) : + sizeof (struct mthca_tavor_ud_seg); + break; + + case UC: + size += sizeof (struct mthca_raddr_seg); + break; + + case RC: + size += sizeof (struct mthca_raddr_seg); + /* + * An atomic op will require an atomic segment, a + * remote address segment and one scatter entry. + */ + size = max(size, + sizeof (struct mthca_atomic_seg) + + sizeof (struct mthca_raddr_seg) + + sizeof (struct mthca_data_seg)); + break; + + default: + break; } + + /* Make sure that we have enough space for a bind request */ + size = max(size, sizeof (struct mthca_bind_seg)); + + size += sizeof (struct mthca_next_seg); + + if (size > dev->limits.max_desc_sz) + return -EINVAL; for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size; qp->sq.wqe_shift++) @@ -822,10 +967,10 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, * allocate anything. All we need is to calculate the WQE * sizes and the send_wqe_offset, so we're done now. */ - if (pd->ibpd.uobject) + if (pd->ibpd.ucontext) return 0; - size = (int)(LONG_PTR)PAGE_ALIGN(qp->send_wqe_offset + + size = (int)(LONG_PTR)NEXT_PAGE_ALIGN(qp->send_wqe_offset + (qp->sq.max << qp->sq.wqe_shift)); qp->wrid = kmalloc((qp->rq.max + qp->sq.max) * sizeof (u64), @@ -837,7 +982,8 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, &qp->queue, &qp->is_direct, pd, 0, &qp->mr); if (err) goto err_out; - + + HCA_EXIT(HCA_DBG_QP); return 0; err_out: @@ -848,7 +994,7 @@ err_out: static void mthca_free_wqe_buf(struct mthca_dev *dev, struct mthca_qp *qp) { - mthca_buf_free(dev, (int)(LONG_PTR)PAGE_ALIGN(qp->send_wqe_offset + + mthca_buf_free(dev, (int)(LONG_PTR)NEXT_PAGE_ALIGN(qp->send_wqe_offset + (qp->sq.max << qp->sq.wqe_shift)), &qp->queue, qp->is_direct, &qp->mr); kfree(qp->wrid); @@ -924,15 +1070,6 @@ static void mthca_free_memfree(struct mthca_dev *dev, } } -static void mthca_wq_init(struct mthca_wq* wq) -{ - spin_lock_init(&wq->lock); - wq->next_ind = 0; - wq->last_comp = wq->max - 1; - wq->head = 0; - wq->tail = 0; -} - static int mthca_alloc_qp_common(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_cq *send_cq, @@ -944,14 +1081,13 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, int i; atomic_set(&qp->refcount, 1); + init_waitqueue_head(&qp->wait); qp->state = IBQPS_RESET; qp->atomic_rd_en = 0; qp->resp_depth = 0; qp->sq_policy = send_policy; mthca_wq_init(&qp->sq); mthca_wq_init(&qp->rq); - /*leo: seems like was missed */ - init_waitqueue_head(&qp->wait); ret = mthca_map_memfree(dev, qp); if (ret) @@ -963,12 +1099,14 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, return ret; } + mthca_adjust_qp_caps(dev, pd, qp); + /* * If this is a userspace QP, we're done now. The doorbells * will be allocated and buffers will be initialized in * userspace. */ - if (pd->ibpd.uobject) + if (pd->ibpd.ucontext) return 0; ret = mthca_alloc_memfree(dev, qp); @@ -986,19 +1124,19 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, for (i = 0; i < qp->rq.max; ++i) { next = get_recv_wqe(qp, i); - next->nda_op = cpu_to_be32(((i + 1) & (qp->rq.max - 1)) << + next->nda_op = cl_hton32(((i + 1) & (qp->rq.max - 1)) << qp->rq.wqe_shift); - next->ee_nds = cpu_to_be32(size); + next->ee_nds = cl_hton32(size); for (scatter = (void *) (next + 1); (void *) scatter < (void *) ((u8*)next + (1 << qp->rq.wqe_shift)); ++scatter) - scatter->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); + scatter->lkey = cl_hton32(MTHCA_INVAL_LKEY); } for (i = 0; i < qp->sq.max; ++i) { next = get_send_wqe(qp, i); - next->nda_op = cpu_to_be32((((i + 1) & (qp->sq.max - 1)) << + next->nda_op = cl_hton32((((i + 1) & (qp->sq.max - 1)) << qp->sq.wqe_shift) + qp->send_wqe_offset); } @@ -1011,11 +1149,23 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, } static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap, - struct mthca_qp *qp) + struct mthca_pd *pd, struct mthca_qp *qp) { + int max_data_size = mthca_max_data_size(dev, qp, dev->limits.max_desc_sz); + /* Sanity check QP size before proceeding */ - if (cap->max_send_wr > 65536 || cap->max_recv_wr > 65536 || - cap->max_send_sge > 64 || cap->max_recv_sge > 64) + if (cap->max_send_wr > (u32)dev->limits.max_wqes || + cap->max_recv_wr > (u32)dev->limits.max_wqes || + cap->max_send_sge > (u32)dev->limits.max_sg || + cap->max_recv_sge > (u32)dev->limits.max_sg || + cap->max_inline_data > (u32)mthca_max_inline_data(pd, max_data_size)) + return -EINVAL; + + /* + * For MLX transport we need 2 extra S/G entries: + * one for the header and one for the checksum at the end + */ + if (qp->transport == MLX && cap->max_recv_sge + 2 > (u32)dev->limits.max_sg) return -EINVAL; if (mthca_is_memfree(dev)) { @@ -1034,14 +1184,6 @@ static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap, MTHCA_INLINE_CHUNK_SIZE) / sizeof (struct mthca_data_seg)); - /* - * For MLX transport we need 2 extra S/G entries: - * one for the header and one for the checksum at the end - */ - if ((qp->transport == MLX && qp->sq.max_gs + 2 > dev->limits.max_sg) || - qp->sq.max_gs > dev->limits.max_sg || qp->rq.max_gs > dev->limits.max_sg) - return -EINVAL; - return 0; } @@ -1055,8 +1197,9 @@ int mthca_alloc_qp(struct mthca_dev *dev, struct mthca_qp *qp) { int err; + SPIN_LOCK_PREP(lh); - err = mthca_set_qp_size(dev, cap, qp); + err = mthca_set_qp_size(dev, cap, pd, qp); if (err) return err; @@ -1078,10 +1221,10 @@ int mthca_alloc_qp(struct mthca_dev *dev, return err; } - spin_lock_irq(&dev->qp_table.lock); + spin_lock_irq(&dev->qp_table.lock, &lh); mthca_array_set(&dev->qp_table.qp, qp->qpn & (dev->limits.num_qps - 1), qp); - spin_unlock_irq(&dev->qp_table.lock); + spin_unlock_irq(&lh); return 0; } @@ -1098,23 +1241,27 @@ int mthca_alloc_sqp(struct mthca_dev *dev, { u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1; int err; + SPIN_LOCK_PREP(lhs); + SPIN_LOCK_PREP(lhr); + SPIN_LOCK_PREP(lht); - err = mthca_set_qp_size(dev, cap, &sqp->qp); + err = mthca_set_qp_size(dev, cap, pd, &sqp->qp); if (err) return err; - sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE; - sqp->header_buf = dma_alloc_coherent(dev, sqp->header_buf_size, - &sqp->header_dma, GFP_KERNEL); - if (!sqp->header_buf) + alloc_dma_zmem_map(dev, + sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE, + PCI_DMA_BIDIRECTIONAL, + &sqp->sg); + if (!sqp->sg.page) return -ENOMEM; - spin_lock_irq(&dev->qp_table.lock); + spin_lock_irq(&dev->qp_table.lock, &lht); if (mthca_array_get(&dev->qp_table.qp, mqpn)) err = -EBUSY; else mthca_array_set(&dev->qp_table.qp, mqpn, sqp); - spin_unlock_irq(&dev->qp_table.lock); + spin_unlock_irq(&lht); if (err) goto err_out; @@ -1137,21 +1284,20 @@ int mthca_alloc_sqp(struct mthca_dev *dev, * Lock CQs here, so that CQ polling code can do QP lookup * without taking a lock. */ - spin_lock_irq(&send_cq->lock); + spin_lock_irq(&send_cq->lock, &lhs); if (send_cq != recv_cq) - spin_lock(&recv_cq->lock); + spin_lock(&recv_cq->lock, &lhr); - spin_lock(&dev->qp_table.lock); + spin_lock(&dev->qp_table.lock, &lht); mthca_array_clear(&dev->qp_table.qp, mqpn); - spin_unlock(&dev->qp_table.lock); + spin_unlock(&lht); if (send_cq != recv_cq) - spin_unlock(&recv_cq->lock); - spin_unlock_irq(&send_cq->lock); + spin_unlock(&lhr); + spin_unlock_irq(&lhs); err_out: - dma_free_coherent(dev, sqp->header_buf_size, - sqp->header_buf, sqp->header_dma); + free_dma_mem_map(dev, &sqp->sg, PCI_DMA_BIDIRECTIONAL); return err; } @@ -1162,6 +1308,9 @@ void mthca_free_qp(struct mthca_dev *dev, u8 status; struct mthca_cq *send_cq; struct mthca_cq *recv_cq; + SPIN_LOCK_PREP(lhs); + SPIN_LOCK_PREP(lhr); + SPIN_LOCK_PREP(lht); send_cq = to_mcq(qp->ibqp.send_cq); recv_cq = to_mcq(qp->ibqp.recv_cq); @@ -1170,18 +1319,18 @@ void mthca_free_qp(struct mthca_dev *dev, * Lock CQs here, so that CQ polling code can do QP lookup * without taking a lock. */ - spin_lock_irq(&send_cq->lock); + spin_lock_irq(&send_cq->lock, &lhs); if (send_cq != recv_cq) - spin_lock(&recv_cq->lock); + spin_lock(&recv_cq->lock, &lhr); - spin_lock(&dev->qp_table.lock); + spin_lock(&dev->qp_table.lock, &lht); mthca_array_clear(&dev->qp_table.qp, qp->qpn & (dev->limits.num_qps - 1)); - spin_unlock(&dev->qp_table.lock); + spin_unlock(&lht); if (send_cq != recv_cq) - spin_unlock(&recv_cq->lock); - spin_unlock_irq(&send_cq->lock); + spin_unlock(&lhr); + spin_unlock_irq(&lhs); atomic_dec(&qp->refcount); wait_event(&qp->wait, !atomic_read(&qp->refcount)); @@ -1194,7 +1343,7 @@ void mthca_free_qp(struct mthca_dev *dev, * will be cleaned up in userspace, so all we have to do is * unref the mem-free tables and free the QPN in our table. */ - if (!qp->ibqp.uobject) { + if (!qp->ibqp.ucontext) { mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn, qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); if (qp->ibqp.send_cq != qp->ibqp.recv_cq) @@ -1209,48 +1358,67 @@ void mthca_free_qp(struct mthca_dev *dev, if (is_sqp(dev, qp)) { atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count)); - dma_free_coherent(dev, - to_msqp(qp)->header_buf_size, - to_msqp(qp)->header_buf, - to_msqp(qp)->header_dma); + free_dma_mem_map(dev, &to_msqp(qp)->sg, PCI_DMA_BIDIRECTIONAL); } else mthca_free(&dev->qp_table.alloc, qp->qpn); } +static enum mthca_wr_opcode conv_ibal_wr_opcode(struct _ib_send_wr *wr) +{ + + enum mthca_wr_opcode opcode = -1; //= wr->wr_type; + + switch (wr->wr_type) { + case WR_SEND: + opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_SEND_IMM : MTHCA_OPCODE_SEND; + break; + case WR_RDMA_WRITE: + opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_RDMA_WRITE_IMM : MTHCA_OPCODE_RDMA_WRITE; + break; + case WR_RDMA_READ: opcode = MTHCA_OPCODE_RDMA_READ; break; + case WR_COMPARE_SWAP: opcode = MTHCA_OPCODE_ATOMIC_CS; break; + case WR_FETCH_ADD: opcode = MTHCA_OPCODE_ATOMIC_FA; break; + default: opcode = MTHCA_OPCODE_INVALID;break; + } + return opcode; +} + /* Create UD header for an MLX send and build a data segment for it */ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, - int ind, struct ib_send_wr *wr, + int ind, struct _ib_send_wr *wr, struct mthca_mlx_seg *mlx, struct mthca_data_seg *data) { + enum ib_wr_opcode opcode = conv_ibal_wr_opcode(wr); int header_size; int err; u16 pkey; + CPU_2_BE64_PREP; ib_ud_header_init(256, /* assume a MAD */ - sqp->ud_header.grh_present, - &sqp->ud_header); + mthca_ah_grh_present(to_mah((struct ib_ah *)wr->dgrm.ud.h_av)), + &sqp->ud_header); - err = mthca_read_ah(dev, to_mah(wr->wr.ud.ah), &sqp->ud_header); + err = mthca_read_ah(dev, to_mah((struct ib_ah *)wr->dgrm.ud.h_av), &sqp->ud_header); if (err) return err; - mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1); - mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) | + mlx->flags &= ~cl_hton32(MTHCA_NEXT_SOLICIT | 1); + mlx->flags |= cl_hton32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) | (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE ? MTHCA_MLX_SLR : 0) | (sqp->ud_header.lrh.service_level << 8)); mlx->rlid = sqp->ud_header.lrh.destination_lid; mlx->vcrc = 0; - switch (wr->opcode) { - case IB_WR_SEND: + switch (opcode) { + case MTHCA_OPCODE_SEND: sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; sqp->ud_header.immediate_present = 0; break; - case IB_WR_SEND_WITH_IMM: + case MTHCA_OPCODE_SEND_IMM: sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; sqp->ud_header.immediate_present = 1; - sqp->ud_header.immediate_data = wr->imm_data; + sqp->ud_header.immediate_data = wr->immediate_data; break; default: return -EINVAL; @@ -1259,27 +1427,27 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; - sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); + sqp->ud_header.bth.solicited_event = !!(wr->send_opt & IB_SEND_OPT_SOLICITED); if (!sqp->qp.ibqp.qp_num) ib_get_cached_pkey(&dev->ib_dev, (u8)sqp->port, sqp->pkey_index, &pkey); else ib_get_cached_pkey(&dev->ib_dev, (u8)sqp->port, - wr->wr.ud.pkey_index, &pkey); - sqp->ud_header.bth.pkey = cpu_to_be16(pkey); - sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); - sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); - sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ? - sqp->qkey : wr->wr.ud.remote_qkey); - sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); + wr->dgrm.ud.pkey_index, &pkey); + sqp->ud_header.bth.pkey = cl_hton16(pkey); + sqp->ud_header.bth.destination_qpn = wr->dgrm.ud.remote_qp; + sqp->ud_header.bth.psn = cl_hton32((sqp->send_psn++) & ((1 << 24) - 1)); + sqp->ud_header.deth.qkey = wr->dgrm.ud.remote_qkey & 0x00000080 ? + cl_hton32(sqp->qkey) : wr->dgrm.ud.remote_qkey; + sqp->ud_header.deth.source_qpn = cl_hton32(sqp->qp.ibqp.qp_num); header_size = ib_ud_header_pack(&sqp->ud_header, - (u8*)sqp->header_buf + + (u8*)sqp->sg.page + ind * MTHCA_UD_HEADER_SIZE); - data->byte_count = cpu_to_be32(header_size); - data->lkey = cpu_to_be32(to_mpd(sqp->qp.ibqp.pd)->ntmr.ibmr.lkey); - data->addr = cpu_to_be64(sqp->header_dma + + data->byte_count = cl_hton32(header_size); + data->lkey = cl_hton32(to_mpd(sqp->qp.ibqp.pd)->ntmr.ibmr.lkey); + data->addr = CPU_2_BE64(sqp->sg.dma_address + ind * MTHCA_UD_HEADER_SIZE); return 0; @@ -1290,21 +1458,22 @@ static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq, { unsigned cur; struct mthca_cq *cq; + SPIN_LOCK_PREP(lh); cur = wq->head - wq->tail; if (likely((int)cur + nreq < wq->max)) return 0; cq = to_mcq(ib_cq); - spin_lock(&cq->lock); + spin_lock(&cq->lock, &lh); cur = wq->head - wq->tail; - spin_unlock(&cq->lock); + spin_unlock(&lh); return (int)cur + nreq >= wq->max; } -int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int mthca_tavor_post_send(struct ib_qp *ibqp, struct _ib_send_wr *wr, + struct _ib_send_wr **bad_wr) { struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); @@ -1318,19 +1487,21 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, u32 f0 = 0; int ind; u8 op0 = 0; + enum ib_wr_opcode opcode; + SPIN_LOCK_PREP(lh); - spin_lock_irqsave(&qp->sq.lock); + spin_lock_irqsave(&qp->sq.lock, &lh); /* XXX check that state is OK to post send */ ind = qp->sq.next_ind; - for (nreq = 0; wr; ++nreq, wr = wr->next) { + for (nreq = 0; wr; ++nreq, wr = wr->p_next) { if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { - mthca_err(dev, "SQ %06x full (%u head, %u tail," + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP,("SQ %06x full (%u head, %u tail," " %d max, %d nreq)\n", qp->qpn, qp->sq.head, qp->sq.tail, - qp->sq.max, nreq); + qp->sq.max, nreq)); err = -ENOMEM; *bad_wr = wr; goto out; @@ -1339,58 +1510,59 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, wqe = get_send_wqe(qp, ind); prev_wqe = qp->sq.last; qp->sq.last = wqe; + opcode = conv_ibal_wr_opcode(wr); ((struct mthca_next_seg *) wqe)->nda_op = 0; ((struct mthca_next_seg *) wqe)->ee_nds = 0; ((struct mthca_next_seg *) wqe)->flags = - ((wr->send_flags & IB_SEND_SIGNALED) ? - cpu_to_be32(MTHCA_NEXT_CQ_UPDATE) : 0) | - ((wr->send_flags & IB_SEND_SOLICITED) ? - cpu_to_be32(MTHCA_NEXT_SOLICIT) : 0) | - cpu_to_be32(1); - if (wr->opcode == IB_WR_SEND_WITH_IMM || - wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) - ((struct mthca_next_seg *) wqe)->imm = wr->imm_data; + ((wr->send_opt & IB_SEND_OPT_SIGNALED) ? + cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) | + ((wr->send_opt & IB_SEND_OPT_SOLICITED) ? + cl_hton32(MTHCA_NEXT_SOLICIT) : 0) | + cl_hton32(1); + if (opcode == MTHCA_OPCODE_SEND_IMM|| + opcode == MTHCA_OPCODE_RDMA_WRITE_IMM) + ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data; wqe += sizeof (struct mthca_next_seg); size = sizeof (struct mthca_next_seg) / 16; switch (qp->transport) { case RC: - switch (wr->opcode) { - case IB_WR_ATOMIC_CMP_AND_SWP: - case IB_WR_ATOMIC_FETCH_AND_ADD: + switch (opcode) { + case MTHCA_OPCODE_ATOMIC_CS: + case MTHCA_OPCODE_ATOMIC_FA: ((struct mthca_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.atomic.remote_addr); + cl_hton64(wr->remote_ops.vaddr); ((struct mthca_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.atomic.rkey); + cl_hton32(wr->remote_ops.rkey); ((struct mthca_raddr_seg *) wqe)->reserved = 0; wqe += sizeof (struct mthca_raddr_seg); - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + if (opcode == MTHCA_OPCODE_ATOMIC_CS) { ((struct mthca_atomic_seg *) wqe)->swap_add = - cpu_to_be64(wr->wr.atomic.swap); + cl_hton64(wr->remote_ops.atomic2); ((struct mthca_atomic_seg *) wqe)->compare = - cpu_to_be64(wr->wr.atomic.compare_add); + cl_hton64(wr->remote_ops.atomic1); } else { ((struct mthca_atomic_seg *) wqe)->swap_add = - cpu_to_be64(wr->wr.atomic.compare_add); + cl_hton64(wr->remote_ops.atomic1); ((struct mthca_atomic_seg *) wqe)->compare = 0; } wqe += sizeof (struct mthca_atomic_seg); - size += sizeof (struct mthca_raddr_seg) / 16 + - sizeof (struct mthca_atomic_seg); + size += (sizeof (struct mthca_raddr_seg) + + sizeof (struct mthca_atomic_seg)) / 16 ; break; - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: - case IB_WR_RDMA_READ: + case MTHCA_OPCODE_RDMA_READ: + case MTHCA_OPCODE_RDMA_WRITE: + case MTHCA_OPCODE_RDMA_WRITE_IMM: ((struct mthca_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.rdma.remote_addr); + cl_hton64(wr->remote_ops.vaddr); ((struct mthca_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.rdma.rkey); + cl_hton32(wr->remote_ops.rkey); ((struct mthca_raddr_seg *) wqe)->reserved = 0; wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; @@ -1404,13 +1576,13 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case UC: - switch (wr->opcode) { - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: + switch (opcode) { + case MTHCA_OPCODE_RDMA_WRITE: + case MTHCA_OPCODE_RDMA_WRITE_IMM: ((struct mthca_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.rdma.remote_addr); + cl_hton64(wr->remote_ops.vaddr); ((struct mthca_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.rdma.rkey); + cl_hton32(wr->remote_ops.rkey); ((struct mthca_raddr_seg *) wqe)->reserved = 0; wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; @@ -1425,13 +1597,11 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case UD: ((struct mthca_tavor_ud_seg *) wqe)->lkey = - cpu_to_be32(to_mah(wr->wr.ud.ah)->key); + cl_hton32(to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->key); ((struct mthca_tavor_ud_seg *) wqe)->av_addr = - cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma); - ((struct mthca_tavor_ud_seg *) wqe)->dqpn = - cpu_to_be32(wr->wr.ud.remote_qpn); - ((struct mthca_tavor_ud_seg *) wqe)->qkey = - cpu_to_be32(wr->wr.ud.remote_qkey); + cl_hton64(to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->avdma); + ((struct mthca_tavor_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp; + ((struct mthca_tavor_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey; wqe += sizeof (struct mthca_tavor_ud_seg); size += sizeof (struct mthca_tavor_ud_seg) / 16; @@ -1450,28 +1620,30 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; } - if (wr->num_sge > qp->sq.max_gs) { - mthca_err(dev, "too many gathers\n"); + if ((int)(int)wr->num_ds > qp->sq.max_gs) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("SQ %06x too many gathers\n",qp->qpn)); err = -EINVAL; *bad_wr = wr; goto out; } - for (i = 0; i < wr->num_sge; ++i) { + for (i = 0; i < (int)wr->num_ds; ++i) { ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); + cl_hton32(wr->ds_array[i].length); ((struct mthca_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); + cl_hton32(wr->ds_array[i].lkey); ((struct mthca_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); + cl_hton64(wr->ds_array[i].vaddr); wqe += sizeof (struct mthca_data_seg); size += sizeof (struct mthca_data_seg) / 16; + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_QP ,("SQ %06x [%02x] lkey 0x%08x vaddr 0x%I64x 0x%x\n",qp->qpn,i, + (wr->ds_array[i].lkey),(wr->ds_array[i].vaddr),wr->ds_array[i].length)); } /* Add one more inline data segment for ICRC */ if (qp->transport == MLX) { ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32((1 << 31) | 4); + cl_hton32((1 << 31) | 4); ((u32 *) wqe)[1] = 0; wqe += sizeof (struct mthca_data_seg); size += sizeof (struct mthca_data_seg) / 16; @@ -1479,25 +1651,28 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, qp->wrid[ind + qp->rq.max] = wr->wr_id; - if (wr->opcode >= ARRAY_SIZE(mthca_opcode)) { - mthca_err(dev, "opcode invalid\n"); + if (opcode == MTHCA_OPCODE_INVALID) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("SQ %06x opcode invalid\n",qp->qpn)); err = -EINVAL; *bad_wr = wr; goto out; } ((struct mthca_next_seg *) prev_wqe)->nda_op = - cpu_to_be32(((ind << qp->sq.wqe_shift) + - qp->send_wqe_offset) |mthca_opcode[wr->opcode]); + cl_hton32(((ind << qp->sq.wqe_shift) + + qp->send_wqe_offset) |opcode); wmb(); ((struct mthca_next_seg *) prev_wqe)->ee_nds = - cpu_to_be32((size0 ? 0 : MTHCA_NEXT_DBD) |size); + cl_hton32((size0 ? 0 : MTHCA_NEXT_DBD) |size); if (!size0) { size0 = size; - op0 = mthca_opcode[wr->opcode]; + op0 = opcode; } + if(0) + dump_wqe( (u32*)qp->sq.last,qp); + ++ind; if (unlikely(ind >= qp->sq.max)) ind -= qp->sq.max; @@ -1507,9 +1682,9 @@ out: if (likely(nreq)) { __be32 doorbell[2]; - doorbell[0] = cpu_to_be32(((qp->sq.next_ind << qp->sq.wqe_shift) + + doorbell[0] = cl_hton32(((qp->sq.next_ind << qp->sq.wqe_shift) + qp->send_wqe_offset) | f0 | op0); - doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0); + doorbell[1] = cl_hton32((qp->qpn << 8) | size0); wmb(); @@ -1521,15 +1696,16 @@ out: qp->sq.next_ind = ind; qp->sq.head += nreq; - spin_unlock_irqrestore(&qp->sq.lock); + spin_unlock_irqrestore(&lh); return err; } -int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mthca_tavor_post_receive(struct ib_qp *ibqp, struct _ib_recv_wr *wr, + struct _ib_recv_wr **bad_wr) { struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); + __be32 doorbell[2]; int err = 0; int nreq; int i; @@ -1538,19 +1714,34 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, int ind; u8 *wqe; u8 *prev_wqe; + SPIN_LOCK_PREP(lh); - spin_lock_irqsave(&qp->rq.lock); + spin_lock_irqsave(&qp->rq.lock, &lh); /* XXX check that state is OK to post receive */ ind = qp->rq.next_ind; - for (nreq = 0; wr; ++nreq, wr = wr->next) { + for (nreq = 0; wr; ++nreq, wr = wr->p_next) { + if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) { + nreq = 0; + + doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0); + doorbell[1] = cl_hton32(qp->qpn << 8); + + wmb(); + + mthca_write64(doorbell, dev->kar + MTHCA_RECEIVE_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + + qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB; + size0 = 0; + } if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { - mthca_err(dev, "RQ %06x full (%u head, %u tail," + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP,("RQ %06x full (%u head, %u tail," " %d max, %d nreq)\n", qp->qpn, qp->rq.head, qp->rq.tail, - qp->rq.max, nreq); + qp->rq.max, nreq)); err = -ENOMEM; *bad_wr = wr; goto out; @@ -1562,40 +1753,46 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, ((struct mthca_next_seg *) wqe)->nda_op = 0; ((struct mthca_next_seg *) wqe)->ee_nds = - cpu_to_be32(MTHCA_NEXT_DBD); + cl_hton32(MTHCA_NEXT_DBD); ((struct mthca_next_seg *) wqe)->flags = 0; wqe += sizeof (struct mthca_next_seg); size = sizeof (struct mthca_next_seg) / 16; - if (unlikely(wr->num_sge > qp->rq.max_gs)) { + if (unlikely((int)wr->num_ds > qp->rq.max_gs)) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("RQ %06x too many gathers\n",qp->qpn)); err = -EINVAL; *bad_wr = wr; goto out; } - for (i = 0; i < wr->num_sge; ++i) { + for (i = 0; i < (int)wr->num_ds; ++i) { ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); + cl_hton32(wr->ds_array[i].length); ((struct mthca_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); + cl_hton32(wr->ds_array[i].lkey); ((struct mthca_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); + cl_hton64(wr->ds_array[i].vaddr); wqe += sizeof (struct mthca_data_seg); size += sizeof (struct mthca_data_seg) / 16; +// HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("RQ %06x [%02x] lkey 0x%08x vaddr 0x%I64x 0x %x 0x%08x\n",i,qp->qpn, +// (wr->ds_array[i].lkey),(wr->ds_array[i].vaddr),wr->ds_array[i].length, wr->wr_id)); } qp->wrid[ind] = wr->wr_id; ((struct mthca_next_seg *) prev_wqe)->nda_op = - cpu_to_be32((ind << qp->rq.wqe_shift) | 1); + cl_hton32((ind << qp->rq.wqe_shift) | 1); wmb(); ((struct mthca_next_seg *) prev_wqe)->ee_nds = - cpu_to_be32(MTHCA_NEXT_DBD | size); + cl_hton32(MTHCA_NEXT_DBD | size); if (!size0) size0 = size; + if(0) + dump_wqe( (u32*)wqe ,qp); + ++ind; if (unlikely(ind >= qp->rq.max)) ind -= qp->rq.max; @@ -1603,30 +1800,28 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, out: if (likely(nreq)) { - __be32 doorbell[2]; - - doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0); - doorbell[1] = cpu_to_be32((qp->qpn << 8) | nreq); + doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0); + doorbell[1] = cl_hton32((qp->qpn << 8) | nreq); wmb(); - mthca_write64(doorbell, - dev->kar + MTHCA_RECEIVE_DOORBELL, - MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + mthca_write64(doorbell, dev->kar + MTHCA_RECEIVE_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); } qp->rq.next_ind = ind; qp->rq.head += nreq; - spin_unlock_irqrestore(&qp->rq.lock); + spin_unlock_irqrestore(&lh); return err; } -int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int mthca_arbel_post_send(struct ib_qp *ibqp, struct _ib_send_wr *wr, + struct _ib_send_wr **bad_wr) { struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); + __be32 doorbell[2]; u8 *wqe; u8 *prev_wqe; int err = 0; @@ -1637,19 +1832,45 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, u32 f0 = 0; int ind; u8 op0 = 0; + enum ib_wr_opcode opcode; + SPIN_LOCK_PREP(lh); - spin_lock_irqsave(&qp->sq.lock); + spin_lock_irqsave(&qp->sq.lock, &lh); /* XXX check that state is OK to post send */ ind = qp->sq.head & (qp->sq.max - 1); - for (nreq = 0; wr; ++nreq, wr = wr->next) { + for (nreq = 0; wr; ++nreq, wr = wr->p_next) { + if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) { + nreq = 0; + doorbell[0] = cl_hton32((MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) | + ((qp->sq.head & 0xffff) << 8) |f0 | op0); + doorbell[1] = cl_hton32((qp->qpn << 8) | size0); + qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB; + size0 = 0; + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + *qp->sq.db = cl_hton32(qp->sq.head & 0xffff); + + /* + * Make sure doorbell record is written before we + * write MMIO send doorbell. + */ + wmb(); + mthca_write64(doorbell, dev->kar + MTHCA_SEND_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + } + if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { - mthca_err(dev, "SQ %06x full (%u head, %u tail," + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP,("SQ %06x full (%u head, %u tail," " %d max, %d nreq)\n", qp->qpn, qp->sq.head, qp->sq.tail, - qp->sq.max, nreq); + qp->sq.max, nreq)); err = -ENOMEM; *bad_wr = wr; goto out; @@ -1658,56 +1879,57 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, wqe = get_send_wqe(qp, ind); prev_wqe = qp->sq.last; qp->sq.last = wqe; + opcode = conv_ibal_wr_opcode(wr); ((struct mthca_next_seg *) wqe)->flags = - ((wr->send_flags & IB_SEND_SIGNALED) ? - cpu_to_be32(MTHCA_NEXT_CQ_UPDATE) : 0) | - ((wr->send_flags & IB_SEND_SOLICITED) ? - cpu_to_be32(MTHCA_NEXT_SOLICIT) : 0) | - cpu_to_be32(1); - if (wr->opcode == IB_WR_SEND_WITH_IMM || - wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) - ((struct mthca_next_seg *) wqe)->imm = wr->imm_data; + ((wr->send_opt & IB_SEND_OPT_SIGNALED) ? + cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) | + ((wr->send_opt & IB_SEND_OPT_SOLICITED) ? + cl_hton32(MTHCA_NEXT_SOLICIT) : 0) | + cl_hton32(1); + if (opcode == MTHCA_OPCODE_SEND_IMM|| + opcode == MTHCA_OPCODE_RDMA_WRITE_IMM) + ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data; wqe += sizeof (struct mthca_next_seg); size = sizeof (struct mthca_next_seg) / 16; switch (qp->transport) { case RC: - switch (wr->opcode) { - case IB_WR_ATOMIC_CMP_AND_SWP: - case IB_WR_ATOMIC_FETCH_AND_ADD: + switch (opcode) { + case MTHCA_OPCODE_ATOMIC_CS: + case MTHCA_OPCODE_ATOMIC_FA: ((struct mthca_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.atomic.remote_addr); + cl_hton64(wr->remote_ops.vaddr); ((struct mthca_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.atomic.rkey); + cl_hton32(wr->remote_ops.rkey); ((struct mthca_raddr_seg *) wqe)->reserved = 0; wqe += sizeof (struct mthca_raddr_seg); - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + if (opcode == MTHCA_OPCODE_ATOMIC_FA) { ((struct mthca_atomic_seg *) wqe)->swap_add = - cpu_to_be64(wr->wr.atomic.swap); + cl_hton64(wr->remote_ops.atomic2); ((struct mthca_atomic_seg *) wqe)->compare = - cpu_to_be64(wr->wr.atomic.compare_add); + cl_hton64(wr->remote_ops.atomic1); } else { ((struct mthca_atomic_seg *) wqe)->swap_add = - cpu_to_be64(wr->wr.atomic.compare_add); + cl_hton64(wr->remote_ops.atomic1); ((struct mthca_atomic_seg *) wqe)->compare = 0; } wqe += sizeof (struct mthca_atomic_seg); - size += sizeof (struct mthca_raddr_seg) / 16 + - sizeof (struct mthca_atomic_seg); + size += (sizeof (struct mthca_raddr_seg) + + sizeof (struct mthca_atomic_seg)) / 16 ; break; - case IB_WR_RDMA_READ: - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: + case MTHCA_OPCODE_RDMA_READ: + case MTHCA_OPCODE_RDMA_WRITE: + case MTHCA_OPCODE_RDMA_WRITE_IMM: ((struct mthca_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.rdma.remote_addr); + cl_hton64(wr->remote_ops.vaddr); ((struct mthca_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.rdma.rkey); + cl_hton32(wr->remote_ops.rkey); ((struct mthca_raddr_seg *) wqe)->reserved = 0; wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; @@ -1721,13 +1943,13 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case UC: - switch (wr->opcode) { - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: + switch (opcode) { + case MTHCA_OPCODE_RDMA_WRITE: + case MTHCA_OPCODE_RDMA_WRITE_IMM: ((struct mthca_raddr_seg *) wqe)->raddr = - cpu_to_be64(wr->wr.rdma.remote_addr); + cl_hton64(wr->remote_ops.vaddr); ((struct mthca_raddr_seg *) wqe)->rkey = - cpu_to_be32(wr->wr.rdma.rkey); + cl_hton32(wr->remote_ops.rkey); ((struct mthca_raddr_seg *) wqe)->reserved = 0; wqe += sizeof (struct mthca_raddr_seg); size += sizeof (struct mthca_raddr_seg) / 16; @@ -1742,11 +1964,9 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case UD: memcpy(((struct mthca_arbel_ud_seg *) wqe)->av, - to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE); - ((struct mthca_arbel_ud_seg *) wqe)->dqpn = - cpu_to_be32(wr->wr.ud.remote_qpn); - ((struct mthca_arbel_ud_seg *) wqe)->qkey = - cpu_to_be32(wr->wr.ud.remote_qkey); + to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->av, MTHCA_AV_SIZE); + ((struct mthca_arbel_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp; + ((struct mthca_arbel_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey; wqe += sizeof (struct mthca_arbel_ud_seg); size += sizeof (struct mthca_arbel_ud_seg) / 16; @@ -1765,20 +1985,20 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; } - if (wr->num_sge > qp->sq.max_gs) { - mthca_err(dev, "too many gathers\n"); + if ((int)wr->num_ds > qp->sq.max_gs) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("SQ %06x full too many gathers\n",qp->qpn)); err = -EINVAL; *bad_wr = wr; goto out; } - for (i = 0; i < wr->num_sge; ++i) { + for (i = 0; i < (int)wr->num_ds; ++i) { ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); + cl_hton32(wr->ds_array[i].length); ((struct mthca_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); + cl_hton32(wr->ds_array[i].lkey); ((struct mthca_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); + cl_hton64(wr->ds_array[i].vaddr); wqe += sizeof (struct mthca_data_seg); size += sizeof (struct mthca_data_seg) / 16; } @@ -1786,7 +2006,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, /* Add one more inline data segment for ICRC */ if (qp->transport == MLX) { ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32((1 << 31) | 4); + cl_hton32((1 << 31) | 4); ((u32 *) wqe)[1] = 0; wqe += sizeof (struct mthca_data_seg); size += sizeof (struct mthca_data_seg) / 16; @@ -1794,23 +2014,23 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, qp->wrid[ind + qp->rq.max] = wr->wr_id; - if (wr->opcode >= ARRAY_SIZE(mthca_opcode)) { - mthca_err(dev, "opcode invalid\n"); + if (opcode == MTHCA_OPCODE_INVALID) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("SQ %06x opcode invalid\n",qp->qpn)); err = -EINVAL; *bad_wr = wr; goto out; } ((struct mthca_next_seg *) prev_wqe)->nda_op = - cpu_to_be32(((ind << qp->sq.wqe_shift) + - qp->send_wqe_offset) |mthca_opcode[wr->opcode]); + cl_hton32(((ind << qp->sq.wqe_shift) + + qp->send_wqe_offset) |opcode); wmb(); ((struct mthca_next_seg *) prev_wqe)->ee_nds = - cpu_to_be32(MTHCA_NEXT_DBD | size); + cl_hton32(MTHCA_NEXT_DBD | size); if (!size0) { size0 = size; - op0 = mthca_opcode[wr->opcode]; + op0 = opcode; } ++ind; @@ -1820,13 +2040,9 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, out: if (likely(nreq)) { - __be32 doorbell[2]; - - doorbell[0] = cpu_to_be32((nreq << 24) | - ((qp->sq.head & 0xffff) << 8) | - f0 | op0); - doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0); - + doorbell[0] = cl_hton32((nreq << 24) | + ((qp->sq.head & 0xffff) << 8) |f0 | op0); + doorbell[1] = cl_hton32((qp->qpn << 8) | size0); qp->sq.head += nreq; /* @@ -1834,7 +2050,7 @@ out: * doorbell record. */ wmb(); - *qp->sq.db = cpu_to_be32(qp->sq.head & 0xffff); + *qp->sq.db = cl_hton32(qp->sq.head & 0xffff); /* * Make sure doorbell record is written before we @@ -1846,12 +2062,12 @@ out: MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); } - spin_unlock_irqrestore(&qp->sq.lock); + spin_unlock_irqrestore(&lh); return err; } -int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mthca_arbel_post_receive(struct ib_qp *ibqp, struct _ib_recv_wr *wr, + struct _ib_recv_wr **bad_wr) { struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); @@ -1860,19 +2076,20 @@ int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, int ind; int i; u8 *wqe; + SPIN_LOCK_PREP(lh); - spin_lock_irqsave(&qp->rq.lock); + spin_lock_irqsave(&qp->rq.lock, &lh); /* XXX check that state is OK to post receive */ ind = qp->rq.head & (qp->rq.max - 1); - for (nreq = 0; wr; ++nreq, wr = wr->next) { + for (nreq = 0; wr; ++nreq, wr = wr->p_next) { if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { - mthca_err(dev, "RQ %06x full (%u head, %u tail," + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP,("RQ %06x full (%u head, %u tail," " %d max, %d nreq)\n", qp->qpn, qp->rq.head, qp->rq.tail, - qp->rq.max, nreq); + qp->rq.max, nreq)); err = -ENOMEM; *bad_wr = wr; goto out; @@ -1884,25 +2101,26 @@ int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, wqe += sizeof (struct mthca_next_seg); - if (unlikely(wr->num_sge > qp->rq.max_gs)) { + if (unlikely((int)wr->num_ds > qp->rq.max_gs)) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("RQ %06x full too many scatter\n",qp->qpn)); err = -EINVAL; *bad_wr = wr; goto out; } - for (i = 0; i < wr->num_sge; ++i) { + for (i = 0; i < (int)wr->num_ds; ++i) { ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); + cl_hton32(wr->ds_array[i].length); ((struct mthca_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); + cl_hton32(wr->ds_array[i].lkey); ((struct mthca_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); + cl_hton64(wr->ds_array[i].vaddr); wqe += sizeof (struct mthca_data_seg); } if (i < qp->rq.max_gs) { ((struct mthca_data_seg *) wqe)->byte_count = 0; - ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); + ((struct mthca_data_seg *) wqe)->lkey = cl_hton32(MTHCA_INVAL_LKEY); ((struct mthca_data_seg *) wqe)->addr = 0; } @@ -1921,10 +2139,10 @@ out: * doorbell record. */ wmb(); - *qp->rq.db = cpu_to_be32(qp->rq.head & 0xffff); + *qp->rq.db = cl_hton32(qp->rq.head & 0xffff); } - spin_unlock_irqrestore(&qp->rq.lock); + spin_unlock_irqrestore(&lh); return err; } @@ -1947,17 +2165,17 @@ int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, else next = get_recv_wqe(qp, index); - *dbd = !!(next->ee_nds & cpu_to_be32(MTHCA_NEXT_DBD)); - if (next->ee_nds & cpu_to_be32(0x3f)) - *new_wqe = (next->nda_op & cpu_to_be32(~0x3f)) | - (next->ee_nds & cpu_to_be32(0x3f)); + *dbd = !!(next->ee_nds & cl_hton32(MTHCA_NEXT_DBD)); + if (next->ee_nds & cl_hton32(0x3f)) + *new_wqe = (next->nda_op & cl_hton32(~0x3f)) | + (next->ee_nds & cl_hton32(0x3f)); else *new_wqe = 0; return 0; } -int __devinit mthca_init_qp_table(struct mthca_dev *dev) +int mthca_init_qp_table(struct mthca_dev *dev) { int err; u8 status; @@ -1993,9 +2211,9 @@ int __devinit mthca_init_qp_table(struct mthca_dev *dev) if (err) goto err_out; if (status) { - mthca_warn(dev, "CONF_SPECIAL_QP returned " + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("CONF_SPECIAL_QP returned " "status %02x, aborting.\n", - status); + status)); err = -EINVAL; goto err_out; } @@ -2012,7 +2230,7 @@ int __devinit mthca_init_qp_table(struct mthca_dev *dev) return err; } -void __devexit mthca_cleanup_qp_table(struct mthca_dev *dev) +void mthca_cleanup_qp_table(struct mthca_dev *dev) { int i; u8 status; @@ -2023,3 +2241,4 @@ void __devexit mthca_cleanup_qp_table(struct mthca_dev *dev) mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps); mthca_alloc_cleanup(&dev->qp_table.alloc); } + diff --git a/branches/MTHCA/hw/mthca/kernel/mthca_reset.c b/branches/MTHCA/hw/mthca/kernel/mthca_reset.c index 66447816..50bd2517 100644 --- a/branches/MTHCA/hw/mthca/kernel/mthca_reset.c +++ b/branches/MTHCA/hw/mthca/kernel/mthca_reset.c @@ -50,7 +50,7 @@ int mthca_reset(struct mthca_dev *mdev) struct pci_dev *bridge = NULL; #define MTHCA_RESET_OFFSET 0xf0010 -#define MTHCA_RESET_VALUE swab32(1) +#define MTHCA_RESET_VALUE _byteswap_ulong(1) /* * Reset the chip. This is somewhat ugly because we have to @@ -71,7 +71,7 @@ int mthca_reset(struct mthca_dev *mdev) bridge)) != NULL) { if (bridge->hdr_type == PCI_HEADER_TYPE_BRIDGE && bridge->subordinate == mdev->pdev->bus) { - mthca_dbg(mdev, "Found bridge: %s (%s)\n", + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_LOW , "Found bridge: %s (%s)\n", pci_pretty_name(bridge), pci_name(bridge)); break; } @@ -83,7 +83,7 @@ int mthca_reset(struct mthca_dev *mdev) * assume we're in no-bridge mode and hope for * the best. */ - mthca_warn(mdev, "No bridge found for %s (%s)\n", + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_LOW, "No bridge found for %s (%s)\n", pci_pretty_name(mdev->pdev), pci_name(mdev->pdev)); } @@ -93,7 +93,7 @@ int mthca_reset(struct mthca_dev *mdev) hca_header = kmalloc(256, GFP_KERNEL); if (!hca_header) { err = -ENOMEM; - mthca_err(mdev, "Couldn't allocate memory to save HCA " + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW , "Couldn't allocate memory to save HCA " "PCI header, aborting.\n"); goto out; } @@ -103,7 +103,7 @@ int mthca_reset(struct mthca_dev *mdev) continue; if (pci_read_config_dword(mdev->pdev, i * 4, hca_header + i)) { err = -ENODEV; - mthca_err(mdev, "Couldn't save HCA " + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW , "Couldn't save HCA " "PCI header, aborting.\n"); goto out; } @@ -113,7 +113,7 @@ int mthca_reset(struct mthca_dev *mdev) bridge_header = kmalloc(256, GFP_KERNEL); if (!bridge_header) { err = -ENOMEM; - mthca_err(mdev, "Couldn't allocate memory to save HCA " + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW , "Couldn't allocate memory to save HCA " "bridge PCI header, aborting.\n"); goto out; } @@ -123,7 +123,7 @@ int mthca_reset(struct mthca_dev *mdev) continue; if (pci_read_config_dword(bridge, i * 4, bridge_header + i)) { err = -ENODEV; - mthca_err(mdev, "Couldn't save HCA bridge " + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW , "Couldn't save HCA bridge " "PCI header, aborting.\n"); goto out; } @@ -137,7 +137,7 @@ int mthca_reset(struct mthca_dev *mdev) if (!reset) { err = -ENOMEM; - mthca_err(mdev, "Couldn't map HCA reset register, " + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW , "Couldn't map HCA reset register, " "aborting.\n"); goto out; } @@ -157,7 +157,7 @@ int mthca_reset(struct mthca_dev *mdev) for (c = 0; c < 100; ++c) { if (pci_read_config_dword(bridge ? bridge : mdev->pdev, 0, &v)) { err = -ENODEV; - mthca_err(mdev, "Couldn't access HCA after reset, " + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW , "Couldn't access HCA after reset, " "aborting.\n"); goto out; } @@ -169,7 +169,7 @@ int mthca_reset(struct mthca_dev *mdev) } err = -ENODEV; - mthca_err(mdev, "PCI device did not come back after reset, " + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW , "PCI device did not come back after reset, " "aborting.\n"); goto out; } @@ -187,7 +187,7 @@ good: if (pci_write_config_dword(bridge, i * 4, bridge_header[i])) { err = -ENODEV; - mthca_err(mdev, "Couldn't restore HCA bridge reg %x, " + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW , "Couldn't restore HCA bridge reg %x, " "aborting.\n", i); goto out; } @@ -196,7 +196,7 @@ good: if (pci_write_config_dword(bridge, PCI_COMMAND, bridge_header[PCI_COMMAND / 4])) { err = -ENODEV; - mthca_err(mdev, "Couldn't restore HCA bridge COMMAND, " + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW , "Couldn't restore HCA bridge COMMAND, " "aborting.\n"); goto out; } @@ -208,7 +208,7 @@ good: if (pci_write_config_dword(mdev->pdev, i * 4, hca_header[i])) { err = -ENODEV; - mthca_err(mdev, "Couldn't restore HCA reg %x, " + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW , "Couldn't restore HCA reg %x, " "aborting.\n", i); goto out; } @@ -217,7 +217,7 @@ good: if (pci_write_config_dword(mdev->pdev, PCI_COMMAND, hca_header[PCI_COMMAND / 4])) { err = -ENODEV; - mthca_err(mdev, "Couldn't restore HCA COMMAND, " + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW , "Couldn't restore HCA COMMAND, " "aborting.\n"); goto out; } diff --git a/branches/MTHCA/hw/mthca/kernel/mthca_srq.c b/branches/MTHCA/hw/mthca/kernel/mthca_srq.c index c37c62ff..572183e9 100644 --- a/branches/MTHCA/hw/mthca/kernel/mthca_srq.c +++ b/branches/MTHCA/hw/mthca/kernel/mthca_srq.c @@ -34,10 +34,22 @@ #include "mt_l2w.h" #include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_srq.tmh" +#endif #include "mthca_cmd.h" #include "mthca_memfree.h" #include "mthca_wqe.h" + +#ifdef ALLOC_PRAGMA +#pragma alloc_text (PAGE, mthca_init_srq_table) +#pragma alloc_text (PAGE, mthca_cleanup_srq_table) +#endif + enum { MTHCA_MAX_DIRECT_SRQ_SIZE = 4 * PAGE_SIZE }; @@ -68,28 +80,44 @@ struct mthca_arbel_srq_context { static void *get_wqe(struct mthca_srq *srq, int n) { if (srq->is_direct) - return srq->queue.direct.buf + (n << srq->wqe_shift); + return (u8*)srq->queue.direct.page + (n << srq->wqe_shift); else - return srq->queue.page_list[(n << srq->wqe_shift) >> PAGE_SHIFT].buf + + return (u8*)srq->queue.page_list[(n << srq->wqe_shift) >> PAGE_SHIFT].page + ((n << srq->wqe_shift) & (PAGE_SIZE - 1)); } +/* + * Return a pointer to the location within a WQE that we're using as a + * link when the WQE is in the free list. We use the imm field + * because in the Tavor case, posting a WQE may overwrite the next + * segment of the previous WQE, but a receive WQE will never touch the + * imm field. This avoids corrupting our free list if the previous + * WQE has already completed and been put on the free list when we + * post the next WQE. + */ +static inline int *wqe_to_link(void *wqe) +{ + return (int *) ((u8*)wqe + offsetof(struct mthca_next_seg, imm)); +} + static void mthca_tavor_init_srq_context(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_srq *srq, struct mthca_tavor_srq_context *context) { + CPU_2_BE64_PREP; + RtlZeroMemory(context, sizeof *context); - context->wqe_base_ds = cpu_to_be64(1 << (srq->wqe_shift - 4)); - context->state_pd = cpu_to_be32(pd->pd_num); - context->lkey = cpu_to_be32(srq->mr.ibmr.lkey); + context->wqe_base_ds = CPU_2_BE64(1 << (srq->wqe_shift - 4)); + context->state_pd = cl_hton32(pd->pd_num); + context->lkey = cl_hton32(srq->mr.ibmr.lkey); - if (pd->ibpd.uobject) + if (pd->ibpd.ucontext) context->uar = - cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index); + cl_hton32(to_mucontext(pd->ibpd.ucontext)->uar.index); else - context->uar = cpu_to_be32(dev->driver_uar.index); + context->uar = cl_hton32(dev->driver_uar.index); } static void mthca_arbel_init_srq_context(struct mthca_dev *dev, @@ -102,16 +130,16 @@ static void mthca_arbel_init_srq_context(struct mthca_dev *dev, RtlZeroMemory(context, sizeof *context); logsize = long_log2(srq->max) + srq->wqe_shift; - context->state_logsize_srqn = cpu_to_be32(logsize << 24 | srq->srqn); - context->lkey = cpu_to_be32(srq->mr.ibmr.lkey); - context->db_index = cpu_to_be32(srq->db_index); - context->logstride_usrpage = cpu_to_be32((srq->wqe_shift - 4) << 29); - if (pd->ibpd.uobject) + context->state_logsize_srqn = cl_hton32(logsize << 24 | srq->srqn); + context->lkey = cl_hton32(srq->mr.ibmr.lkey); + context->db_index = cl_hton32(srq->db_index); + context->logstride_usrpage = cl_hton32((srq->wqe_shift - 4) << 29); + if (pd->ibpd.ucontext) context->logstride_usrpage |= - cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index); + cl_hton32(to_mucontext(pd->ibpd.ucontext)->uar.index); else - context->logstride_usrpage |= cpu_to_be32(dev->driver_uar.index); - context->eq_pd = cpu_to_be32(MTHCA_EQ_ASYNC << 24 | pd->pd_num); + context->logstride_usrpage |= cl_hton32(dev->driver_uar.index); + context->eq_pd = cl_hton32(MTHCA_EQ_ASYNC << 24 | pd->pd_num); } static void mthca_free_srq_buf(struct mthca_dev *dev, struct mthca_srq *srq) @@ -129,7 +157,7 @@ static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd, int err; int i; - if (pd->ibpd.uobject) + if (pd->ibpd.ucontext) return 0; srq->wrid = kmalloc(srq->max * sizeof (u64), GFP_KERNEL); @@ -152,14 +180,16 @@ static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd, for (i = 0; i < srq->max; ++i) { wqe = get_wqe(srq, i); - *(int *) wqe = i < srq->max - 1 ? i + 1 : -1; + *wqe_to_link(wqe) = i < srq->max - 1 ? i + 1 : -1; for (scatter = (struct mthca_data_seg *)(wqe + sizeof (struct mthca_next_seg)); (void *) scatter < (void*)(wqe + (1 << srq->wqe_shift)); ++scatter) - scatter->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); + scatter->lkey = cl_hton32(MTHCA_INVAL_LKEY); } + srq->last = get_wqe(srq, srq->max - 1); + return 0; } @@ -170,9 +200,11 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, u8 status; int ds; int err; + SPIN_LOCK_PREP(lh); /* Sanity check SRQ size before proceeding */ - if (attr->max_wr > 16 << 20 || attr->max_sge > 64) + if ((int)attr->max_wr > dev->limits.max_srq_wqes || + (int)attr->max_sge > dev->limits.max_sg) return -EINVAL; srq->max = attr->max_wr; @@ -182,7 +214,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, if (mthca_is_memfree(dev)) srq->max = roundup_pow_of_two(srq->max + 1); - ds = min(64UL, + ds = max(64UL, roundup_pow_of_two(sizeof (struct mthca_next_seg) + srq->max_gs * sizeof (struct mthca_data_seg))); srq->wqe_shift = long_log2(ds); @@ -196,7 +228,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, if (err) goto err_out; - if (!pd->ibpd.uobject) { + if (!pd->ibpd.ucontext) { srq->db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SRQ, srq->srqn, &srq->db); if (srq->db_index < 0) { @@ -228,49 +260,49 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn, &status); if (err) { - mthca_warn(dev, "SW2HW_SRQ failed (%d)\n", err); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("SW2HW_SRQ failed (%d)\n", err)); goto err_out_free_buf; } if (status) { - mthca_warn(dev, "SW2HW_SRQ returned status 0x%02x\n", - status); + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_LOW,("SW2HW_SRQ returned status 0x%02x\n", + status)); err = -EINVAL; goto err_out_free_buf; } - spin_lock_irq(&dev->srq_table.lock); + spin_lock_irq(&dev->srq_table.lock, &lh); if (mthca_array_set(&dev->srq_table.srq, srq->srqn & (dev->limits.num_srqs - 1), srq)) { - spin_unlock_irq(&dev->srq_table.lock); + spin_unlock_irq(&lh); goto err_out_free_srq; } - spin_unlock_irq(&dev->srq_table.lock); + spin_unlock_irq(&lh); mthca_free_mailbox(dev, mailbox); srq->first_free = 0; srq->last_free = srq->max - 1; - srq->last = get_wqe(srq, srq->max - 1); return 0; err_out_free_srq: err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn, &status); - if (err) - mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err); - else if (status) - mthca_warn(dev, "HW2SW_SRQ returned status 0x%02x\n", status); + if (err){ + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("HW2SW_SRQ failed (%d)\n", err)); + }else if (status){ + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("HW2SW_SRQ returned status 0x%02x\n", status)); + } err_out_free_buf: - if (!pd->ibpd.uobject) + if (!pd->ibpd.ucontext) mthca_free_srq_buf(dev, srq); err_out_mailbox: mthca_free_mailbox(dev, mailbox); err_out_db: - if (!pd->ibpd.uobject && mthca_is_memfree(dev)) + if (!pd->ibpd.ucontext && mthca_is_memfree(dev)) mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index); err_out_icm: @@ -287,23 +319,25 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq) struct mthca_mailbox *mailbox; int err; u8 status; + SPIN_LOCK_PREP(lh); mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); if (IS_ERR(mailbox)) { - mthca_warn(dev, "No memory for mailbox to free SRQ.\n"); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("No memory for mailbox to free SRQ.\n")); return; } err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn, &status); - if (err) - mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err); - else if (status) - mthca_warn(dev, "HW2SW_SRQ returned status 0x%02x\n", status); + if (err){ + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("HW2SW_SRQ failed (%d)\n", err)); + }else if (status){ + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("HW2SW_SRQ returned status 0x%02x\n", status)); + } - spin_lock_irq(&dev->srq_table.lock); + spin_lock_irq(&dev->srq_table.lock, &lh); mthca_array_clear(&dev->srq_table.srq, srq->srqn & (dev->limits.num_srqs - 1)); - spin_unlock_irq(&dev->srq_table.lock); + spin_unlock_irq(&lh); atomic_dec(&srq->refcount); wait_event(&srq->wait, !atomic_read(&srq->refcount)); @@ -319,20 +353,44 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq) mthca_free_mailbox(dev, mailbox); } +int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask) +{ + struct mthca_dev *dev = to_mdev(ibsrq->device); + struct mthca_srq *srq = to_msrq(ibsrq); + int ret; + u8 status; + + /* We don't support resizing SRQs (yet?) */ + if (attr_mask & IB_SRQ_MAX_WR) + return -EINVAL; + + if (attr_mask & IB_SRQ_LIMIT) { + ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit, &status); + if (ret) + return ret; + if (status) + return -EINVAL; + } + + return 0; +} + void mthca_srq_event(struct mthca_dev *dev, u32 srqn, enum ib_event_type event_type) { struct mthca_srq *srq; struct ib_event event; + SPIN_LOCK_PREP(lh); - spin_lock(&dev->srq_table.lock); + spin_lock(&dev->srq_table.lock, &lh); srq = mthca_array_get(&dev->srq_table.srq, srqn & (dev->limits.num_srqs - 1)); if (srq) atomic_inc(&srq->refcount); - spin_unlock(&dev->srq_table.lock); + spin_unlock(&lh); if (!srq) { - mthca_warn(dev, "Async event for bogus SRQ %08x\n", srqn); + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Async event for bogus SRQ %08x\n", srqn)); return; } @@ -355,28 +413,30 @@ out: void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr) { int ind; + SPIN_LOCK_PREP(lh); ind = wqe_addr >> srq->wqe_shift; - spin_lock(&srq->lock); + spin_lock(&srq->lock, &lh); if (likely(srq->first_free >= 0)) - *(int *) get_wqe(srq, srq->last_free) = ind; + *wqe_to_link(get_wqe(srq, srq->last_free)) = ind; else srq->first_free = ind; - *(int *) get_wqe(srq, ind) = -1; + *wqe_to_link(get_wqe(srq, ind)) = -1; srq->last_free = ind; - spin_unlock(&srq->lock); + spin_unlock(&lh); } //TODO: is this code correct at all ? -int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct _ib_recv_wr *wr, + struct _ib_recv_wr **bad_wr) { struct mthca_dev *dev = to_mdev(ibsrq->device); struct mthca_srq *srq = to_msrq(ibsrq); + __be32 doorbell[2]; int err = 0; int first_ind; int ind; @@ -385,23 +445,51 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, int i; u8 *wqe; u8 *prev_wqe; + SPIN_LOCK_PREP(lh); - spin_lock_irqsave(&srq->lock); + spin_lock_irqsave(&srq->lock, &lh); first_ind = srq->first_free; - for (nreq = 0; wr; ++nreq, wr = wr->next) { + for (nreq = 0; wr; ++nreq, wr = wr->p_next) { + if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) { + nreq = 0; + + doorbell[0] = cl_hton32(first_ind << srq->wqe_shift); + doorbell[1] = cl_hton32(srq->srqn << 8); + + /* + * Make sure that descriptors are written + * before doorbell is rung. + */ + wmb(); + + mthca_write64(doorbell, + dev->kar + MTHCA_RECEIVE_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + + first_ind = srq->first_free; + } + ind = srq->first_free; if (ind < 0) { - mthca_err(dev, "SRQ %06x full\n", srq->srqn); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("SRQ %06x full\n", srq->srqn)); err = -ENOMEM; *bad_wr = wr; goto out; } wqe = get_wqe(srq, ind); - next_ind = *(int *) wqe; + next_ind = *wqe_to_link(wqe); + + if (next_ind < 0) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("SRQ %06x full\n", srq->srqn)); + err = -ENOMEM; + *bad_wr = wr; + break; + } + prev_wqe = srq->last; srq->last = wqe; @@ -411,34 +499,34 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, wqe += sizeof (struct mthca_next_seg); - if (unlikely(wr->num_sge > srq->max_gs)) { + if (unlikely((int)wr->num_ds > srq->max_gs)) { err = -EINVAL; *bad_wr = wr; srq->last = prev_wqe; goto out; } - for (i = 0; i < wr->num_sge; ++i) { + for (i = 0; i < (int)wr->num_ds; ++i) { ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); + cl_hton32(wr->ds_array[i].length); ((struct mthca_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); + cl_hton32(wr->ds_array[i].lkey); ((struct mthca_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); + cl_hton64(wr->ds_array[i].vaddr); wqe += sizeof (struct mthca_data_seg); } if (i < srq->max_gs) { ((struct mthca_data_seg *) wqe)->byte_count = 0; - ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); + ((struct mthca_data_seg *) wqe)->lkey = cl_hton32(MTHCA_INVAL_LKEY); ((struct mthca_data_seg *) wqe)->addr = 0; } ((struct mthca_next_seg *) prev_wqe)->nda_op = - cpu_to_be32((ind << srq->wqe_shift) | 1); + cl_hton32((ind << srq->wqe_shift) | 1); wmb(); ((struct mthca_next_seg *) prev_wqe)->ee_nds = - cpu_to_be32(MTHCA_NEXT_DBD); + cl_hton32(MTHCA_NEXT_DBD); srq->wrid[ind] = wr->wr_id; srq->first_free = next_ind; @@ -446,10 +534,8 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, out: if (likely(nreq)) { - __be32 doorbell[2]; - - doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift); - doorbell[1] = cpu_to_be32((srq->srqn << 8) | nreq); + doorbell[0] = cl_hton32(first_ind << srq->wqe_shift); + doorbell[1] = cl_hton32((srq->srqn << 8) | nreq); /* * Make sure that descriptors are written before @@ -462,13 +548,13 @@ out: MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); } - spin_unlock_irqrestore(&srq->lock); + spin_unlock_irqrestore(&lh); return err; } //TODO: is this code correct at all ? -int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct _ib_recv_wr *wr, + struct _ib_recv_wr **bad_wr) { struct mthca_dev *dev = to_mdev(ibsrq->device); struct mthca_srq *srq = to_msrq(ibsrq); @@ -478,48 +564,56 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, int nreq; int i; u8 *wqe; + SPIN_LOCK_PREP(lh); - spin_lock_irqsave(&srq->lock); + spin_lock_irqsave(&srq->lock, &lh); - for (nreq = 0; wr; ++nreq, wr = wr->next) { + for (nreq = 0; wr; ++nreq, wr = wr->p_next) { ind = srq->first_free; if (ind < 0) { - mthca_err(dev, "SRQ %06x full\n", srq->srqn); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("SRQ %06x full\n", srq->srqn)); err = -ENOMEM; *bad_wr = wr; goto out; } wqe = get_wqe(srq, ind); - next_ind = *(int *) wqe; + next_ind = *wqe_to_link(wqe); - ((struct mthca_next_seg *) wqe)->nda_op = 0; - cpu_to_be32((next_ind << srq->wqe_shift) | 1); + if (next_ind < 0) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("SRQ %06x full\n", srq->srqn)); + err = -ENOMEM; + *bad_wr = wr; + break; + } + + ((struct mthca_next_seg *) wqe)->nda_op = + cl_hton32((next_ind << srq->wqe_shift) | 1); ((struct mthca_next_seg *) wqe)->ee_nds = 0; /* flags field will always remain 0 */ wqe += sizeof (struct mthca_next_seg); - if (unlikely(wr->num_sge > srq->max_gs)) { + if (unlikely((int)wr->num_ds > srq->max_gs)) { err = -EINVAL; *bad_wr = wr; goto out; } - for (i = 0; i < wr->num_sge; ++i) { + for (i = 0; i < (int)wr->num_ds; ++i) { ((struct mthca_data_seg *) wqe)->byte_count = - cpu_to_be32(wr->sg_list[i].length); + cl_hton32(wr->ds_array[i].length); ((struct mthca_data_seg *) wqe)->lkey = - cpu_to_be32(wr->sg_list[i].lkey); + cl_hton32(wr->ds_array[i].lkey); ((struct mthca_data_seg *) wqe)->addr = - cpu_to_be64(wr->sg_list[i].addr); + cl_hton64(wr->ds_array[i].vaddr); wqe += sizeof (struct mthca_data_seg); } if (i < srq->max_gs) { ((struct mthca_data_seg *) wqe)->byte_count = 0; - ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); + ((struct mthca_data_seg *) wqe)->lkey = cl_hton32(MTHCA_INVAL_LKEY); ((struct mthca_data_seg *) wqe)->addr = 0; } @@ -536,14 +630,14 @@ out: * we write doorbell record. */ wmb(); - *srq->db = cpu_to_be32(srq->counter); + *srq->db = cl_hton32(srq->counter); } - spin_unlock_irqrestore(&srq->lock); + spin_unlock_irqrestore(&lh); return err; } -int __devinit mthca_init_srq_table(struct mthca_dev *dev) +int mthca_init_srq_table(struct mthca_dev *dev) { int err; @@ -567,7 +661,7 @@ int __devinit mthca_init_srq_table(struct mthca_dev *dev) return err; } -void __devexit mthca_cleanup_srq_table(struct mthca_dev *dev) +void mthca_cleanup_srq_table(struct mthca_dev *dev) { if (!(dev->mthca_flags & MTHCA_FLAG_SRQ)) return; @@ -575,3 +669,4 @@ void __devexit mthca_cleanup_srq_table(struct mthca_dev *dev) mthca_array_cleanup(&dev->srq_table.srq, dev->limits.num_srqs); mthca_alloc_cleanup(&dev->srq_table.alloc); } + diff --git a/branches/MTHCA/hw/mthca/kernel/mthca_user.h b/branches/MTHCA/hw/mthca/kernel/mthca_user.h index dd46c5c5..a06df52f 100644 --- a/branches/MTHCA/hw/mthca/kernel/mthca_user.h +++ b/branches/MTHCA/hw/mthca/kernel/mthca_user.h @@ -44,15 +44,24 @@ */ struct mthca_alloc_ucontext_resp { + uint64_t uar_addr; + u64 pd_handle; + u32 pdn; u32 qp_tab_size; u32 uarc_size; + u32 vend_id; + u16 dev_id; }; +#ifdef LINUX_TO_BE_REMOVED struct mthca_alloc_pd_resp { + u64 pd_handle; u32 pdn; u32 reserved; }; +#endif +#ifdef LINUX_TO_BE_REMOVED struct mthca_create_cq { u32 lkey; u32 pdn; @@ -61,11 +70,15 @@ struct mthca_create_cq { u32 arm_db_index; u32 set_db_index; }; +#endif +#ifdef LINUX_TO_BE_REMOVED struct mthca_create_cq_resp { + u32 lkey; u32 cqn; u32 reserved; }; +#endif struct mthca_create_srq { u32 lkey; @@ -78,8 +91,15 @@ struct mthca_create_srq_resp { u32 reserved; }; +#ifdef LINUX_TO_BE_REMOVED struct mthca_create_qp { u32 lkey; +#ifndef LINUX_TO_BE_REMOVED + u64 pd_handle; + u64 mr_buf; + u32 mr_size; + u32 mr_access; +#endif u32 reserved; u64 sq_db_page; u64 rq_db_page; @@ -87,4 +107,8 @@ struct mthca_create_qp { u32 rq_db_index; }; +struct mthca_create_qp_resp { + u32 lkey; +}; +#endif #endif /* MTHCA_USER_H */ diff --git a/branches/MTHCA/hw/mthca/kernel/mthca_wqe.h b/branches/MTHCA/hw/mthca/kernel/mthca_wqe.h index 5a68c665..7f162ce8 100644 --- a/branches/MTHCA/hw/mthca/kernel/mthca_wqe.h +++ b/branches/MTHCA/hw/mthca/kernel/mthca_wqe.h @@ -47,7 +47,9 @@ enum { }; enum { - MTHCA_INVAL_LKEY = 0x100 + MTHCA_INVAL_LKEY = 0x100, + MTHCA_TAVOR_MAX_WQES_PER_RECV_DB = 256, + MTHCA_ARBEL_MAX_WQES_PER_SEND_DB = 255 }; struct mthca_next_seg { diff --git a/branches/MTHCA/inc/iba/ib_types.h b/branches/MTHCA/inc/iba/ib_types.h index 1d6b4560..c5e1bec0 100644 --- a/branches/MTHCA/inc/iba/ib_types.h +++ b/branches/MTHCA/inc/iba/ib_types.h @@ -8764,6 +8764,19 @@ typedef enum _ib_wc_status_t IB_WCS_RNR_RETRY_ERR, IB_WCS_TIMEOUT_RETRY_ERR, IB_WCS_REM_INVALID_REQ_ERR, + // new codes + IB_WCS_LOCAL_EEC_OP_ERR, + IB_WCS_BAD_RESP_ERR, + IB_WCS_LOCAL_ACCESS_ERR, + IB_WCS_REM_INV_REQ_ERR, + IB_WCS_LOCAL_RDD_VIOL_ERR, + IB_WCS_REM_ABORT_ERR, + IB_WCS_INV_EECN_ERR, + IB_WCS_INV_EEC_STATE_ERR, + IB_WCS_FATAL_ERR, + IB_WCS_RESP_TIMEOUT_ERR, + IB_WCS_GENERAL_ERR, + // IB_WCS_UNMATCHED_RESPONSE, /* InfiniBand Access Layer */ IB_WCS_CANCELED, /* InfiniBand Access Layer */ IB_WCS_UNKNOWN /* Must be last. */ @@ -8827,6 +8840,30 @@ typedef enum _ib_wc_status_t * - There was insufficient buffers to receive a new atomic operation. * - An RDMA request was larger than 2^31 bytes. * +* //TODO: add codes' description +* +* IB_WCS_LOCAL_EEC_OP_ERR, +* +* IB_WCS_BAD_RESP_ERR, +* +* IB_WCS_LOCAL_ACCESS_ERR, +* +* IB_WCS_REM_INV_REQ_ERR, +* +* IB_WCS_LOCAL_RDD_VIOL_ERR, +* +* IB_WCS_REM_ABORT_ERR, +* +* IB_WCS_INV_EECN_ERR, +* +* IB_WCS_INV_EEC_STATE_ERR, +* +* IB_WCS_FATAL_ERR, +* +* IB_WCS_RESP_TIMEOUT_ERR, +* +* IB_WCS_GENERAL_ERR, +* * IB_WCS_UNMATCHED_RESPONSE * A response MAD was received for which there was no matching send. The * send operation may have been canceled by the user or may have timed diff --git a/branches/MTHCA/ulp/ipoib/kernel/ipoib.rc b/branches/MTHCA/ulp/ipoib/kernel/ipoib.rc index 9d04836f..525372fd 100644 --- a/branches/MTHCA/ulp/ipoib/kernel/ipoib.rc +++ b/branches/MTHCA/ulp/ipoib/kernel/ipoib.rc @@ -45,3 +45,4 @@ #define VER_ORIGINALFILENAME_STR "ipoib.sys" #include +#include "ipoib_log.rc" diff --git a/branches/MTHCA/ulp/ipoib/kernel/ipoib_adapter.c b/branches/MTHCA/ulp/ipoib/kernel/ipoib_adapter.c index 7e90b6dd..1c3f8366 100644 --- a/branches/MTHCA/ulp/ipoib/kernel/ipoib_adapter.c +++ b/branches/MTHCA/ulp/ipoib/kernel/ipoib_adapter.c @@ -234,7 +234,7 @@ ipoib_destroy_adapter( * between destruction and AL callbacks (PnP, Query, Destruction). * The lock provides protection */ - cl_mutex_acquire( &p_adapter->mutex ); + KeWaitForMutexObject( &p_adapter->mutex, Executive, KernelMode, FALSE, NULL ); cl_obj_lock( &p_adapter->obj ); p_adapter->state = IB_PNP_PORT_REMOVE; @@ -247,7 +247,7 @@ ipoib_destroy_adapter( cl_obj_unlock( &p_adapter->obj ); - cl_mutex_release( &p_adapter->mutex ); + KeReleaseMutex( &p_adapter->mutex, FALSE ); cl_obj_destroy( &p_adapter->obj ); @@ -263,7 +263,6 @@ adapter_construct( cl_spinlock_construct( &p_adapter->send_stat_lock ); cl_spinlock_construct( &p_adapter->recv_stat_lock ); cl_qpool_construct( &p_adapter->item_pool ); - cl_mutex_construct( &p_adapter->mutex ); cl_vector_construct( &p_adapter->ip_vector ); cl_perf_construct( &p_adapter->perf ); @@ -315,13 +314,7 @@ adapter_init( return IB_ERROR; } - cl_status = cl_mutex_init( &p_adapter->mutex ); - if( cl_status != CL_SUCCESS ) - { - IPOIB_TRACE_EXIT( IPOIB_DBG_ERROR, - ("cl_mutex_init returned %s\n", cl_status_text[cl_status]) ); - return IB_ERROR; - } + KeInitializeMutex( &p_adapter->mutex, 0 ); /* We manually manage the size and capacity of the vector. */ cl_status = cl_vector_init( &p_adapter->ip_vector, 0, @@ -463,7 +456,6 @@ __adapter_free( cl_qpool_destroy( &p_adapter->item_pool ); cl_spinlock_destroy( &p_adapter->recv_stat_lock ); cl_spinlock_destroy( &p_adapter->send_stat_lock ); - cl_mutex_destroy( &p_adapter->mutex ); cl_obj_deinit( p_obj ); cl_perf_destroy( &p_adapter->perf, TRUE ); @@ -493,13 +485,13 @@ __ipoib_pnp_cb( CL_ASSERT( p_adapter ); /* Synchronize with destruction */ - cl_mutex_acquire( &p_adapter->mutex ); + KeWaitForMutexObject( &p_adapter->mutex, Executive, KernelMode, FALSE, NULL ); cl_obj_lock( &p_adapter->obj ); old_state = p_adapter->state; cl_obj_unlock( &p_adapter->obj ); if( old_state == IB_PNP_PORT_REMOVE ) { - cl_mutex_release( &p_adapter->mutex ); + KeReleaseMutex( &p_adapter->mutex, FALSE ); IPOIB_TRACE_EXIT( IPOIB_DBG_PNP, ("Aborting - Adapter destroying.\n") ); return IB_NOT_DONE; @@ -635,7 +627,7 @@ __ipoib_pnp_cb( break; } - cl_mutex_release( &p_adapter->mutex ); + KeReleaseMutex( &p_adapter->mutex, FALSE ); IPOIB_EXIT( IPOIB_DBG_PNP ); return status; @@ -753,8 +745,7 @@ __ipoib_pnp_dereg( p_adapter = PARENT_STRUCT( context, ipoib_adapter_t, obj ); /* Synchronize with destruction */ - cl_mutex_acquire( &p_adapter->mutex ); - + KeWaitForMutexObject( &p_adapter->mutex, Executive, KernelMode, FALSE, NULL ); cl_obj_lock( &p_adapter->obj ); CL_ASSERT( !p_adapter->h_pnp ); @@ -800,7 +791,7 @@ __ipoib_pnp_dereg( /* Dereference the adapter since the previous registration is now gone. */ cl_obj_deref( &p_adapter->obj ); - cl_mutex_release( &p_adapter->mutex ); + KeReleaseMutex( &p_adapter->mutex, FALSE ); IPOIB_EXIT( IPOIB_DBG_INIT ); } @@ -923,7 +914,7 @@ ipoib_set_active( NdisWriteErrorLogEntry( p_adapter->h_adapter, EVENT_IPOIB_PORT_UP + (p_adapter->rate/ONE_X_IN_100BPS), 1, p_adapter->rate ); - + NdisMIndicateStatus( p_adapter->h_adapter, NDIS_STATUS_MEDIA_CONNECT, NULL, 0 ); NdisMIndicateStatusComplete( p_adapter->h_adapter ); diff --git a/branches/MTHCA/ulp/ipoib/kernel/ipoib_adapter.h b/branches/MTHCA/ulp/ipoib/kernel/ipoib_adapter.h index 3a1849a1..63f863c6 100644 --- a/branches/MTHCA/ulp/ipoib/kernel/ipoib_adapter.h +++ b/branches/MTHCA/ulp/ipoib/kernel/ipoib_adapter.h @@ -161,9 +161,7 @@ typedef struct _ipoib_adapter uint8_t mcast_array_size; cl_qpool_t item_pool; - - cl_mutex_t mutex; - + KMUTEX mutex; cl_vector_t ip_vector; cl_perf_t perf; diff --git a/branches/MTHCA/ulp/ipoib/kernel/ipoib_debug.h b/branches/MTHCA/ulp/ipoib/kernel/ipoib_debug.h index a96cd69e..7b58334d 100644 --- a/branches/MTHCA/ulp/ipoib/kernel/ipoib_debug.h +++ b/branches/MTHCA/ulp/ipoib/kernel/ipoib_debug.h @@ -59,6 +59,7 @@ extern uint32_t g_ipoib_dbg_lvl; #define IPOIB_DBG_MCAST (1 << 7) #define IPOIB_DBG_ALLOC (1 << 8) #define IPOIB_DBG_OID (1 << 9) +#define IPOIB_DBG_IOCTL (1 << 10) #define IPOIB_DBG_FUNC (1 << 28) /* For function entry/exit */ #define IPOIB_DBG_INFO (1 << 29) /* For verbose information */