From e3a0ef89a1f271da249b121a36edb5db920d1d3c Mon Sep 17 00:00:00 2001 From: tzachid Date: Wed, 4 Oct 2006 12:57:02 +0000 Subject: [PATCH] [MTHCA] Implementation of fairness between eqs - each eq is limited to 10 ms. git-svn-id: svn://openib.tc.cornell.edu/gen1@514 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86 --- trunk/hw/mthca/kernel/hca_data.h | 2 + trunk/hw/mthca/kernel/hca_driver.c | 24 +- trunk/hw/mthca/kernel/mt_l2w.c | 255 +-- trunk/hw/mthca/kernel/mthca.inf | 2 + trunk/hw/mthca/kernel/mthca_eq.c | 2169 ++++++++++++------------ trunk/hw/mthca/kernel/mthca_provider.h | 893 +++++----- 6 files changed, 1693 insertions(+), 1652 deletions(-) diff --git a/trunk/hw/mthca/kernel/hca_data.h b/trunk/hw/mthca/kernel/hca_data.h index e3188609..66681ce2 100644 --- a/trunk/hw/mthca/kernel/hca_data.h +++ b/trunk/hw/mthca/kernel/hca_data.h @@ -44,6 +44,8 @@ extern char mlnx_uvp_lib_name[]; extern uint32_t g_skip_tavor_reset; extern uint32_t g_disable_tavor_reset; extern uint32_t g_tune_pci; +extern uint32_t g_processor_affinity; +extern uint32_t g_max_DPC_time_us; #define MLNX_MAX_HCA 4 diff --git a/trunk/hw/mthca/kernel/hca_driver.c b/trunk/hw/mthca/kernel/hca_driver.c index 38586cd3..9fee6e99 100644 --- a/trunk/hw/mthca/kernel/hca_driver.c +++ b/trunk/hw/mthca/kernel/hca_driver.c @@ -69,6 +69,9 @@ UCHAR g_slog_buf[ MAX_LOG_BUF_LEN ]; uint32_t g_skip_tavor_reset=0; /* skip reset for Tavor cards */ uint32_t g_disable_tavor_reset=1; /* disable Tavor reset for the next driver load */ uint32_t g_tune_pci=0; /* 0 - skip tuning PCI configuration space of HCAs */ +uint32_t g_processor_affinity = 0; +uint32_t g_max_DPC_time_us = 10000; + UNICODE_STRING g_param_path; @@ -236,7 +239,7 @@ __read_registry( { NTSTATUS status; /* Remember the terminating entry in the table below. */ - RTL_QUERY_REGISTRY_TABLE table[6]; + RTL_QUERY_REGISTRY_TABLE table[8]; HCA_ENTER( HCA_DBG_DEV ); @@ -297,15 +300,30 @@ __read_registry( table[4].DefaultData = &g_tune_pci; table[4].DefaultLength = sizeof(ULONG); + table[5].Flags = RTL_QUERY_REGISTRY_DIRECT; + table[5].Name = L"ProcessorAffinity"; + table[5].EntryContext = &g_processor_affinity; + table[5].DefaultType = REG_DWORD; + table[5].DefaultData = &g_processor_affinity; + table[5].DefaultLength = sizeof(ULONG); + + table[6].Flags = RTL_QUERY_REGISTRY_DIRECT; + table[6].Name = L"MaxDpcTimeUs"; + table[6].EntryContext = &g_max_DPC_time_us; + table[6].DefaultType = REG_DWORD; + table[6].DefaultData = &g_max_DPC_time_us; + table[6].DefaultLength = sizeof(ULONG); + /* Have at it! */ status = RtlQueryRegistryValues( RTL_REGISTRY_ABSOLUTE, g_param_path.Buffer, table, NULL, NULL ); HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_INIT, - ("debug level %d debug flags 0x%.8x SkipTavorReset %d DisableTavorReset %d TunePci %d\n", + ("debug level %d debug flags 0x%.8x SkipTavorReset %d DisableTavorReset %d TunePci %d" + "g_processor_affinity %d g_max_DPC_time_us%d\n", g_mthca_dbg_level , g_mthca_dbg_flags, g_skip_tavor_reset, g_disable_tavor_reset, - g_tune_pci )); + g_tune_pci, g_processor_affinity, g_max_DPC_time_us )); HCA_EXIT( HCA_DBG_DEV ); return status; diff --git a/trunk/hw/mthca/kernel/mt_l2w.c b/trunk/hw/mthca/kernel/mt_l2w.c index e1d99291..43928791 100644 --- a/trunk/hw/mthca/kernel/mt_l2w.c +++ b/trunk/hw/mthca/kernel/mt_l2w.c @@ -1,123 +1,132 @@ -#include - -pci_pool_t * -pci_pool_create (const char *name, struct mthca_dev *mdev, - size_t size, size_t align, size_t allocation) -{ - pci_pool_t *pool; - UNREFERENCED_PARAMETER(align); - UNREFERENCED_PARAMETER(allocation); - - MT_ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL); - - // allocation parameter is not handled yet - ASSERT(allocation == 0); - - // allocate object - pool = (pci_pool_t *)ExAllocatePoolWithTag( NonPagedPool, sizeof(pci_pool_t), MT_TAG_PCIPOOL ); - if (pool == NULL) - return NULL; - - //TODO: not absolutely correct: Linux's pci_pool_alloc provides contiguous physical memory, - // while default alloc function - ExAllocatePoolWithTag -doesn't. - // But for now it is used for elements of size <= PAGE_SIZE - // Anyway - a sanity check: - ASSERT(size <= PAGE_SIZE); - if (size > PAGE_SIZE) - return NULL; - - //TODO: not too effective: one can read its own alloc/free functions - ExInitializeNPagedLookasideList( &pool->pool_hdr, NULL, NULL, 0, size, MT_TAG_PCIPOOL, 0 ); - - // fill the object - pool->mdev = mdev; - pool->size = size; - strncpy( pool->name, name, sizeof pool->name ); - - return pool; -} - -// from lib/string.c -/** -* strlcpy - Copy a %NUL terminated string into a sized buffer -* @dest: Where to copy the string to -* @src: Where to copy the string from -* @size: size of destination buffer -* -* Compatible with *BSD: the result is always a valid -* NUL-terminated string that fits in the buffer (unless, -* of course, the buffer size is zero). It does not pad -* out the result like strncpy() does. -*/ -SIZE_T strlcpy(char *dest, const char *src, SIZE_T size) -{ - SIZE_T ret = strlen(src); - - if (size) { - SIZE_T len = (ret >= size) ? size-1 : ret; - memcpy(dest, src, len); - dest[len] = '\0'; - } - return ret; -} - - -int __bitmap_full(const unsigned long *bitmap, int bits) -{ - int k, lim = bits/BITS_PER_LONG; - for (k = 0; k < lim; ++k) - if (~bitmap[k]) - return 0; - - if (bits % BITS_PER_LONG) - if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) - return 0; - - return 1; -} - -int __bitmap_empty(const unsigned long *bitmap, int bits) -{ - int k, lim = bits/BITS_PER_LONG; - for (k = 0; k < lim; ++k) - if (bitmap[k]) - return 0; - - if (bits % BITS_PER_LONG) - if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) - return 0; - - return 1; -} - -int request_irq( - IN CM_PARTIAL_RESOURCE_DESCRIPTOR *int_info, /* interrupt resources */ - IN KSPIN_LOCK *isr_lock, /* spin lock for ISR */ - IN PKSERVICE_ROUTINE isr, /* ISR */ - IN void *isr_ctx, /* ISR context */ - OUT PKINTERRUPT *int_obj /* interrupt object */ - ) -{ - NTSTATUS status; - - status = IoConnectInterrupt( - int_obj, /* InterruptObject */ - isr, /* ISR */ - isr_ctx, /* ISR context */ - isr_lock, /* spinlock */ - int_info->u.Interrupt.Vector, /* interrupt vector */ - (KIRQL)int_info->u.Interrupt.Level, /* IRQL */ - (KIRQL)int_info->u.Interrupt.Level, /* Synchronize IRQL */ - (BOOLEAN)((int_info->Flags == CM_RESOURCE_INTERRUPT_LATCHED) ? - Latched : LevelSensitive), /* interrupt type: LATCHED or LEVEL */ - (BOOLEAN)(int_info->ShareDisposition == CmResourceShareShared), /* vector shared or not */ - (KAFFINITY)int_info->u.Interrupt.Affinity, /* interrupt affinity */ - FALSE /* whether to save Float registers */ - ); - - if (!NT_SUCCESS(status)) - return -EFAULT; /* failed to connect interrupt */ - else - return 0; -} - +#include +#include +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mt_l2w.tmh" +#endif + +pci_pool_t * +pci_pool_create (const char *name, struct mthca_dev *mdev, + size_t size, size_t align, size_t allocation) +{ + pci_pool_t *pool; + UNREFERENCED_PARAMETER(align); + UNREFERENCED_PARAMETER(allocation); + + MT_ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL); + + // allocation parameter is not handled yet + ASSERT(allocation == 0); + + // allocate object + pool = (pci_pool_t *)ExAllocatePoolWithTag( NonPagedPool, sizeof(pci_pool_t), MT_TAG_PCIPOOL ); + if (pool == NULL) + return NULL; + + //TODO: not absolutely correct: Linux's pci_pool_alloc provides contiguous physical memory, + // while default alloc function - ExAllocatePoolWithTag -doesn't. + // But for now it is used for elements of size <= PAGE_SIZE + // Anyway - a sanity check: + ASSERT(size <= PAGE_SIZE); + if (size > PAGE_SIZE) + return NULL; + + //TODO: not too effective: one can read its own alloc/free functions + ExInitializeNPagedLookasideList( &pool->pool_hdr, NULL, NULL, 0, size, MT_TAG_PCIPOOL, 0 ); + + // fill the object + pool->mdev = mdev; + pool->size = size; + strncpy( pool->name, name, sizeof pool->name ); + + return pool; +} + +// from lib/string.c +/** +* strlcpy - Copy a %NUL terminated string into a sized buffer +* @dest: Where to copy the string to +* @src: Where to copy the string from +* @size: size of destination buffer +* +* Compatible with *BSD: the result is always a valid +* NUL-terminated string that fits in the buffer (unless, +* of course, the buffer size is zero). It does not pad +* out the result like strncpy() does. +*/ +SIZE_T strlcpy(char *dest, const char *src, SIZE_T size) +{ + SIZE_T ret = strlen(src); + + if (size) { + SIZE_T len = (ret >= size) ? size-1 : ret; + memcpy(dest, src, len); + dest[len] = '\0'; + } + return ret; +} + + +int __bitmap_full(const unsigned long *bitmap, int bits) +{ + int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + if (~bitmap[k]) + return 0; + + if (bits % BITS_PER_LONG) + if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) + return 0; + + return 1; +} + +int __bitmap_empty(const unsigned long *bitmap, int bits) +{ + int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + if (bitmap[k]) + return 0; + + if (bits % BITS_PER_LONG) + if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) + return 0; + + return 1; +} + +int request_irq( + IN CM_PARTIAL_RESOURCE_DESCRIPTOR *int_info, /* interrupt resources */ + IN KSPIN_LOCK *isr_lock, /* spin lock for ISR */ + IN PKSERVICE_ROUTINE isr, /* ISR */ + IN void *isr_ctx, /* ISR context */ + OUT PKINTERRUPT *int_obj /* interrupt object */ + ) +{ + NTSTATUS status; + + status = IoConnectInterrupt( + int_obj, /* InterruptObject */ + isr, /* ISR */ + isr_ctx, /* ISR context */ + isr_lock, /* spinlock */ + int_info->u.Interrupt.Vector, /* interrupt vector */ + (KIRQL)int_info->u.Interrupt.Level, /* IRQL */ + (KIRQL)int_info->u.Interrupt.Level, /* Synchronize IRQL */ + (BOOLEAN)((int_info->Flags == CM_RESOURCE_INTERRUPT_LATCHED) ? + Latched : LevelSensitive), /* interrupt type: LATCHED or LEVEL */ + (BOOLEAN)(int_info->ShareDisposition == CmResourceShareShared), /* vector shared or not */ + g_processor_affinity ? g_processor_affinity : (KAFFINITY)int_info->u.Interrupt.Affinity, /* interrupt affinity */ + FALSE /* whether to save Float registers */ + ); + + if (!NT_SUCCESS(status)) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_INIT ,("IoConnectInterrupt failed status %d (did you change the processor_affinity ? )\n",status)); + return -EFAULT; /* failed to connect interrupt */ + } + else + return 0; +} + diff --git a/trunk/hw/mthca/kernel/mthca.inf b/trunk/hw/mthca/kernel/mthca.inf index 96198535..2d7c3c51 100644 --- a/trunk/hw/mthca/kernel/mthca.inf +++ b/trunk/hw/mthca/kernel/mthca.inf @@ -166,6 +166,8 @@ HKR,"Parameters","DebugFlags",%REG_DWORD%,0x0000ffff HKR,"Parameters","SkipTavorReset",%REG_DWORD%,0 HKR,"Parameters","DisableTavorResetOnFailure",%REG_DWORD%,1 HKR,"Parameters","TunePci",%REG_DWORD%,0 +HKR,"Parameters","ProcessorAffinity",%REG_DWORD%,0 +HKR,"Parameters","MaxDpcTimeUs",%REG_DWORD%,10000 HKLM,"System\CurrentControlSet\Control\WMI\GlobalLogger\8bf1f640-63fe-4743-b9ef-fa38c695bfde","Flags",%REG_DWORD%,0xffff HKLM,"System\CurrentControlSet\Control\WMI\GlobalLogger\8bf1f640-63fe-4743-b9ef-fa38c695bfde","Level",%REG_DWORD%,0x3 diff --git a/trunk/hw/mthca/kernel/mthca_eq.c b/trunk/hw/mthca/kernel/mthca_eq.c index db51afcb..45c312ea 100644 --- a/trunk/hw/mthca/kernel/mthca_eq.c +++ b/trunk/hw/mthca/kernel/mthca_eq.c @@ -1,1080 +1,1089 @@ -/* - * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * $Id$ - */ - -#include "mthca_dev.h" -#if defined(EVENT_TRACING) -#ifdef offsetof -#undef offsetof -#endif -#include "mthca_eq.tmh" -#endif -#include "mthca_cmd.h" -#include "mthca_config_reg.h" - -static int mthca_map_reg(struct mthca_dev *dev, - u64 offset, unsigned long size, - void __iomem **map, SIZE_T *map_size); -static int mthca_map_eq_regs(struct mthca_dev *dev); -static void mthca_unmap_eq_regs(struct mthca_dev *dev); -static int mthca_create_eq(struct mthca_dev *dev, - int nent, - u8 intr, - struct mthca_eq *eq); - - - -#ifdef ALLOC_PRAGMA -#pragma alloc_text (PAGE, mthca_map_reg) -#pragma alloc_text (PAGE, mthca_map_eq_regs) -#pragma alloc_text (PAGE, mthca_init_eq_table) -#pragma alloc_text (PAGE, mthca_unmap_eq_regs) -#pragma alloc_text (PAGE, mthca_map_eq_icm) -#pragma alloc_text (PAGE, mthca_unmap_eq_icm) -#pragma alloc_text (PAGE, mthca_create_eq) -#pragma alloc_text (PAGE, mthca_cleanup_eq_table) -#endif - -enum { - MTHCA_NUM_ASYNC_EQE = 0x80, - MTHCA_NUM_CMD_EQE = 0x80, - MTHCA_NUM_SPARE_EQE = 0x80, - MTHCA_EQ_ENTRY_SIZE = 0x20 -}; - -/* - * Must be packed because start is 64 bits but only aligned to 32 bits. - */ -#pragma pack(push,1) -struct mthca_eq_context { - __be32 flags; - __be64 start; - __be32 logsize_usrpage; - __be32 tavor_pd; /* reserved for Arbel */ - u8 reserved1[3]; - u8 intr; - __be32 arbel_pd; /* lost_count for Tavor */ - __be32 lkey; - u32 reserved2[2]; - __be32 consumer_index; - __be32 producer_index; - u32 reserved3[4]; -}; -#pragma pack(pop) - -#define MTHCA_EQ_STATUS_OK ( 0 << 28) -#define MTHCA_EQ_STATUS_OVERFLOW ( 9 << 28) -#define MTHCA_EQ_STATUS_WRITE_FAIL (10 << 28) -#define MTHCA_EQ_OWNER_SW ( 0 << 24) -#define MTHCA_EQ_OWNER_HW ( 1 << 24) -#define MTHCA_EQ_FLAG_TR ( 1 << 18) -#define MTHCA_EQ_FLAG_OI ( 1 << 17) -#define MTHCA_EQ_STATE_ARMED ( 1 << 8) -#define MTHCA_EQ_STATE_FIRED ( 2 << 8) -#define MTHCA_EQ_STATE_ALWAYS_ARMED ( 3 << 8) -#define MTHCA_EQ_STATE_ARBEL ( 8 << 8) - -enum { - MTHCA_EVENT_TYPE_COMP = 0x00, - MTHCA_EVENT_TYPE_PATH_MIG = 0x01, - MTHCA_EVENT_TYPE_COMM_EST = 0x02, - MTHCA_EVENT_TYPE_SQ_DRAINED = 0x03, - MTHCA_EVENT_TYPE_CQ_ERROR = 0x04, - MTHCA_EVENT_TYPE_WQ_CATAS_ERROR = 0x05, - MTHCA_EVENT_TYPE_EEC_CATAS_ERROR = 0x06, - MTHCA_EVENT_TYPE_PATH_MIG_FAILED = 0x07, - MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR = 0x08, - MTHCA_EVENT_TYPE_PORT_CHANGE = 0x09, - MTHCA_EVENT_TYPE_CMD = 0x0a, - MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10, - MTHCA_EVENT_TYPE_ECC_DETECT = 0x0e, - MTHCA_EVENT_TYPE_EQ_OVERFLOW = 0x0f, - MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR = 0x11, - MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR = 0x12, - MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE = 0x13, - MTHCA_EVENT_TYPE_SRQ_LIMIT = 0x14 -}; - -#define MTHCA_ASYNC_EVENT_MASK ((1Ui64 << MTHCA_EVENT_TYPE_PATH_MIG) | \ - (1Ui64 << MTHCA_EVENT_TYPE_COMM_EST) | \ - (1Ui64 << MTHCA_EVENT_TYPE_SQ_DRAINED) | \ - (1Ui64 << MTHCA_EVENT_TYPE_CQ_ERROR) | \ - (1Ui64 << MTHCA_EVENT_TYPE_WQ_CATAS_ERROR) | \ - (1Ui64 << MTHCA_EVENT_TYPE_EEC_CATAS_ERROR) | \ - (1Ui64 << MTHCA_EVENT_TYPE_PATH_MIG_FAILED) | \ - (1Ui64 << MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \ - (1Ui64 << MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR) | \ - (1Ui64 << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR) | \ - (1Ui64 << MTHCA_EVENT_TYPE_PORT_CHANGE) | \ - (1Ui64 << MTHCA_EVENT_TYPE_ECC_DETECT)) -#define MTHCA_SRQ_EVENT_MASK ((1Ui64 << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \ - (1Ui64 << MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE) | \ - (1Ui64 << MTHCA_EVENT_TYPE_SRQ_LIMIT)) - -#define MTHCA_CMD_EVENT_MASK (1Ui64 << MTHCA_EVENT_TYPE_CMD) - -#define MTHCA_EQ_DB_INC_CI (1 << 24) -#define MTHCA_EQ_DB_REQ_NOT (2 << 24) -#define MTHCA_EQ_DB_DISARM_CQ (3 << 24) -#define MTHCA_EQ_DB_SET_CI (4 << 24) -#define MTHCA_EQ_DB_ALWAYS_ARM (5 << 24) - -#pragma pack(push,1) -struct mthca_eqe { - u8 reserved1; - u8 type; - u8 reserved2; - u8 subtype; - union { - u32 raw[6]; - struct { - __be32 cqn; - } comp; - struct { - u16 reserved1; - __be16 token; - u32 reserved2; - u8 reserved3[3]; - u8 status; - __be64 out_param; - } cmd; - struct { - __be32 qpn; - } qp; - struct { - __be32 srqn; - } srq; - struct { - __be32 cqn; - u32 reserved1; - u8 reserved2[3]; - u8 syndrome; - } cq_err; - struct { - u32 reserved1[2]; - __be32 port; - } port_change; - } event; - u8 reserved3[3]; - u8 owner; -} ; -#pragma pack(pop) - -#define MTHCA_EQ_ENTRY_OWNER_SW (0 << 7) -#define MTHCA_EQ_ENTRY_OWNER_HW (1 << 7) - -static inline u64 async_mask(struct mthca_dev *dev) -{ - return dev->mthca_flags & MTHCA_FLAG_SRQ ? - MTHCA_ASYNC_EVENT_MASK | MTHCA_SRQ_EVENT_MASK : - MTHCA_ASYNC_EVENT_MASK; -} - -static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci) -{ - __be32 doorbell[2]; - - doorbell[0] = cl_hton32(MTHCA_EQ_DB_SET_CI | eq->eqn); - doorbell[1] = cl_hton32(ci & (eq->nent - 1)); - - /* - * This barrier makes sure that all updates to ownership bits - * done by set_eqe_hw() hit memory before the consumer index - * is updated. set_eq_ci() allows the HCA to possibly write - * more EQ entries, and we want to avoid the exceedingly - * unlikely possibility of the HCA writing an entry and then - * having set_eqe_hw() overwrite the owner field. - */ - wmb(); - mthca_write64(doorbell, - dev->kar + MTHCA_EQ_DOORBELL, - MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); -} - -static inline void arbel_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci) -{ - /* See comment in tavor_set_eq_ci() above. */ - wmb(); - __raw_writel((u32) cl_hton32(ci), - (u8*)dev->eq_regs.arbel.eq_set_ci_base + eq->eqn * 8); - /* We still want ordering, just not swabbing, so add a barrier */ - mb(); -} - -static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci) -{ - if (mthca_is_memfree(dev)) - arbel_set_eq_ci(dev, eq, ci); - else - tavor_set_eq_ci(dev, eq, ci); -} - -static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn) -{ - __be32 doorbell[2]; - - doorbell[0] = cl_hton32(MTHCA_EQ_DB_REQ_NOT | eqn); - doorbell[1] = 0; - - mthca_write64(doorbell, - dev->kar + MTHCA_EQ_DOORBELL, - MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); -} - -static inline void arbel_eq_req_not(struct mthca_dev *dev, u32 eqn_mask) -{ - writel(eqn_mask, dev->eq_regs.arbel.eq_arm); -} - -static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn) -{ - if (!mthca_is_memfree(dev)) { - __be32 doorbell[2]; - - doorbell[0] = cl_hton32(MTHCA_EQ_DB_DISARM_CQ | eqn); - doorbell[1] = cl_hton32(cqn); - - mthca_write64(doorbell, - dev->kar + MTHCA_EQ_DOORBELL, - MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); - } -} - -static inline struct mthca_eqe *get_eqe(struct mthca_eq *eq, u32 entry) -{ - unsigned long off = (entry & (eq->nent - 1)) * MTHCA_EQ_ENTRY_SIZE; - return (struct mthca_eqe *)((u8*)eq->page_list[off / PAGE_SIZE].page + off % PAGE_SIZE); -} - -static inline struct mthca_eqe* next_eqe_sw(struct mthca_eq *eq) -{ - struct mthca_eqe* eqe; - eqe = get_eqe(eq, eq->cons_index); - return (MTHCA_EQ_ENTRY_OWNER_HW & eqe->owner) ? NULL : eqe; -} - -static inline void set_eqe_hw(struct mthca_eqe *eqe) -{ - eqe->owner = MTHCA_EQ_ENTRY_OWNER_HW; -} - -static void port_change(struct mthca_dev *dev, int port, int active) -{ - struct ib_event record; - - HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Port change to %s for port %d\n", - active ? "active" : "down", port)); - - record.device = &dev->ib_dev; - record.event = active ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; - record.element.port_num = (u8)port; - // Gen2 ib_core mechanism - ib_dispatch_event(&record); - // our callback - ca_event_handler( &record, &dev->ext->hca.hob ); -} - -static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq) -{ - int disarm_cqn; - int eqes_found = 0; - int set_ci = 0; - struct mthca_eqe *eqe = next_eqe_sw(eq); - - while (eqe) { - - /* - * Make sure we read EQ entry contents after we've - * checked the ownership bit. - */ - rmb(); - - switch (eqe->type) { - case MTHCA_EVENT_TYPE_COMP: - disarm_cqn = cl_ntoh32(eqe->event.comp.cqn) & 0xffffff; - disarm_cq(dev, eq->eqn, disarm_cqn); - mthca_cq_completion(dev, disarm_cqn); - break; - - case MTHCA_EVENT_TYPE_PATH_MIG: - mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, - IB_EVENT_PATH_MIG); - break; - - case MTHCA_EVENT_TYPE_COMM_EST: - mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, - IB_EVENT_COMM_EST); - break; - - case MTHCA_EVENT_TYPE_SQ_DRAINED: - mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, - IB_EVENT_SQ_DRAINED); - break; - - case MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE: - mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, - IB_EVENT_QP_LAST_WQE_REACHED); - break; - - case MTHCA_EVENT_TYPE_SRQ_LIMIT: - mthca_srq_event(dev, cl_ntoh32(eqe->event.srq.srqn) & 0xffffff, - IB_EVENT_SRQ_LIMIT_REACHED); - break; - - case MTHCA_EVENT_TYPE_WQ_CATAS_ERROR: - mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, - IB_EVENT_QP_FATAL); - break; - - case MTHCA_EVENT_TYPE_PATH_MIG_FAILED: - mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, - IB_EVENT_PATH_MIG_ERR); - break; - - case MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR: - mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, - IB_EVENT_QP_REQ_ERR); - break; - - case MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR: - mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, - IB_EVENT_QP_ACCESS_ERR); - break; - - case MTHCA_EVENT_TYPE_CMD: - mthca_cmd_event(dev, - cl_ntoh16(eqe->event.cmd.token), - eqe->event.cmd.status, - cl_ntoh64(eqe->event.cmd.out_param)); - break; - - case MTHCA_EVENT_TYPE_PORT_CHANGE: - port_change(dev, - (cl_ntoh32(eqe->event.port_change.port) >> 28) & 3, - eqe->subtype == 0x4); - break; - - case MTHCA_EVENT_TYPE_CQ_ERROR: - HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_LOW, ("CQ %s on CQN %06x (syndrome %d)\n", - eqe->event.cq_err.syndrome == 1 ? - "overrun" : "access violation", - cl_ntoh32(eqe->event.cq_err.cqn) & 0xffffff, eqe->event.cq_err.syndrome)); - mthca_cq_event(dev, cl_ntoh32(eqe->event.cq_err.cqn), - IB_EVENT_CQ_ERR); - break; - - case MTHCA_EVENT_TYPE_EQ_OVERFLOW: - HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("EQ overrun on EQN %d\n", eq->eqn)); - break; - - case MTHCA_EVENT_TYPE_EEC_CATAS_ERROR: - case MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR: - case MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR: - case MTHCA_EVENT_TYPE_ECC_DETECT: - default: - HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_LOW, ("Unhandled event %02x(%02x) on EQ %d\n", - eqe->type, eqe->subtype, eq->eqn)); - break; - }; - - set_eqe_hw(eqe); - ++eq->cons_index; - eqes_found += 1; - ++set_ci; - - /* - * The HCA will think the queue has overflowed if we - * don't tell it we've been processing events. We - * create our EQs with MTHCA_NUM_SPARE_EQE extra - * entries, so we must update our consumer index at - * least that often. - */ - if (unlikely(set_ci >= MTHCA_NUM_SPARE_EQE)) { - /* - * Conditional on hca_type is OK here because - * this is a rare case, not the fast path. - */ - set_eq_ci(dev, eq, eq->cons_index); - set_ci = 0; - } - eqe = next_eqe_sw(eq); - } - - /* - * Rely on caller to set consumer index so that we don't have - * to test hca_type in our interrupt handling fast path. - */ - return eqes_found; -} - -static void mthca_tavor_dpc( PRKDPC dpc, - PVOID ctx, PVOID arg1, PVOID arg2 ) -{ - struct mthca_eq *eq = ctx; - struct mthca_dev *dev = eq->dev; - SPIN_LOCK_PREP(lh); - - UNREFERENCED_PARAMETER(dpc); - UNREFERENCED_PARAMETER(arg1); - UNREFERENCED_PARAMETER(arg2); - - spin_lock_dpc(&eq->lock, &lh); - - /* we need 'if' in case, when there were scheduled 2 DPC for one EQ */ - if (mthca_eq_int(dev, eq)) { - tavor_set_eq_ci(dev, eq, eq->cons_index); - tavor_eq_req_not(dev, eq->eqn); - } - - spin_unlock_dpc(&lh); -} - -static BOOLEAN mthca_tavor_interrupt( - PKINTERRUPT int_obj, - PVOID ctx - ) -{ - struct mthca_dev *dev = ctx; - u32 ecr; - int i; - - UNREFERENCED_PARAMETER(int_obj); - - if (dev->eq_table.clr_mask) - writel(dev->eq_table.clr_mask, dev->eq_table.clr_int); - - ecr = readl((u8*)dev->eq_regs.tavor.ecr_base + 4); - if (!ecr) - return FALSE; - - writel(ecr, (u8*)dev->eq_regs.tavor.ecr_base + - MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4); - - for (i = 0; i < MTHCA_NUM_EQ; ++i) { - if (ecr & dev->eq_table.eq[i].eqn_mask && - next_eqe_sw(&dev->eq_table.eq[i])) { - KeInsertQueueDpc(&dev->eq_table.eq[i].dpc, NULL, NULL); - } - } - - return TRUE; -} - -#ifdef MSI_SUPPORT -static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr, - struct pt_regs *regs) -{ - struct mthca_eq *eq = eq_ptr; - struct mthca_dev *dev = eq->dev; - - mthca_eq_int(dev, eq); - tavor_set_eq_ci(dev, eq, eq->cons_index); - tavor_eq_req_not(dev, eq->eqn); - - /* MSI-X vectors always belong to us */ - return IRQ_HANDLED; -} -#endif - -static void mthca_arbel_dpc( PRKDPC dpc, - PVOID ctx, PVOID arg1, PVOID arg2 ) -{ - struct mthca_eq *eq = ctx; - struct mthca_dev *dev = eq->dev; - SPIN_LOCK_PREP(lh); - - UNREFERENCED_PARAMETER(dpc); - UNREFERENCED_PARAMETER(arg1); - UNREFERENCED_PARAMETER(arg2); - - spin_lock_dpc(&eq->lock, &lh); - - /* we need 'if' in case, when there were scheduled 2 DPC for one EQ */ - if (mthca_eq_int(dev, eq)) - arbel_set_eq_ci(dev, eq, eq->cons_index); - arbel_eq_req_not(dev, eq->eqn_mask); - - spin_unlock_dpc(&lh); -} - -static BOOLEAN mthca_arbel_interrupt( - PKINTERRUPT int_obj, - PVOID ctx - ) -{ - struct mthca_dev *dev = ctx; - int work = 0; - int i; - - UNREFERENCED_PARAMETER(int_obj); - - if (dev->eq_table.clr_mask) - writel(dev->eq_table.clr_mask, dev->eq_table.clr_int); - - for (i = 0; i < MTHCA_NUM_EQ; ++i) { - if (next_eqe_sw( &dev->eq_table.eq[i]) ) { - work = 1; - while(InterlockedCompareExchange(&dev->dpc_lock, 1, 0)); - - KeInsertQueueDpc(&dev->eq_table.eq[i].dpc, NULL, NULL); - InterlockedCompareExchange(&dev->dpc_lock, 0, 1); - } else { - arbel_eq_req_not(dev, dev->eq_table.eq[i].eqn_mask); - } - } - - return (BOOLEAN)work; -} - -#ifdef MSI_SUPPORT -static irqreturn_t mthca_arbel_msi_x_interrupt(int irq, void *eq_ptr, - struct pt_regs *regs) -{ - struct mthca_eq *eq = eq_ptr; - struct mthca_dev *dev = eq->dev; - - mthca_eq_int(dev, eq); - arbel_set_eq_ci(dev, eq, eq->cons_index); - arbel_eq_req_not(dev, eq->eqn_mask); - - /* MSI-X vectors always belong to us */ - return IRQ_HANDLED; -} -#endif - -static int mthca_create_eq(struct mthca_dev *dev, - int nent, - u8 intr, - struct mthca_eq *eq) -{ - int npages; - u64 *dma_list = NULL; - struct mthca_mailbox *mailbox; - struct mthca_eq_context *eq_context; - int err = -ENOMEM; - int i; - u8 status; - - HCA_ENTER(HCA_DBG_INIT); - eq->dev = dev; - eq->nent = roundup_pow_of_two(max(nent, 2)); - npages = ALIGN(eq->nent * MTHCA_EQ_ENTRY_SIZE, PAGE_SIZE) / PAGE_SIZE; - - eq->page_list = kmalloc(npages * sizeof *eq->page_list, - GFP_KERNEL); - if (!eq->page_list) - goto err_out; - - for (i = 0; i < npages; ++i) - eq->page_list[i].page = NULL; - - dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); - if (!dma_list) - goto err_out_free; - - mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); - if (IS_ERR(mailbox)) - goto err_out_free; - eq_context = mailbox->buf; - - for (i = 0; i < npages; ++i) { - alloc_dma_zmem_map(dev, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL, &eq->page_list[i]); - if (!eq->page_list[i].page) - goto err_out_free_pages; - dma_list[i] = eq->page_list[i].dma_address; - } - - for (i = 0; i < eq->nent; ++i) - set_eqe_hw(get_eqe(eq, i)); - - eq->eqn = mthca_alloc(&dev->eq_table.alloc); - if (eq->eqn == -1) - goto err_out_free_pages; - - err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num, - dma_list, PAGE_SHIFT, npages, - 0, npages * PAGE_SIZE, - MTHCA_MPT_FLAG_LOCAL_WRITE | - MTHCA_MPT_FLAG_LOCAL_READ, - &eq->mr); - if (err) - goto err_out_free_eq; - - RtlZeroMemory(eq_context, sizeof *eq_context); - eq_context->flags = cl_hton32(MTHCA_EQ_STATUS_OK | - MTHCA_EQ_OWNER_HW | - MTHCA_EQ_STATE_ARMED | - MTHCA_EQ_FLAG_TR); - if (mthca_is_memfree(dev)) - eq_context->flags |= cl_hton32(MTHCA_EQ_STATE_ARBEL); - - eq_context->logsize_usrpage = cl_hton32((ffs(eq->nent) - 1) << 24); - if (mthca_is_memfree(dev)) { - eq_context->arbel_pd = cl_hton32(dev->driver_pd.pd_num); - } else { - eq_context->logsize_usrpage |= cl_hton32(dev->driver_uar.index); - eq_context->tavor_pd = cl_hton32(dev->driver_pd.pd_num); - } - eq_context->intr = intr; - eq_context->lkey = cl_hton32(eq->mr.ibmr.lkey); - - err = mthca_SW2HW_EQ(dev, mailbox, eq->eqn, &status); - if (err) { - HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("SW2HW_EQ failed (%d)\n", err)); - goto err_out_free_mr; - } - if (status) { - HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_LOW,("SW2HW_EQ returned status 0x%02x\n", - status)); - err = -EINVAL; - goto err_out_free_mr; - } - - kfree(dma_list); - mthca_free_mailbox(dev, mailbox); - - eq->eqn_mask = _byteswap_ulong(1 << eq->eqn); - eq->cons_index = 0; - - dev->eq_table.arm_mask |= eq->eqn_mask; - - HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_INIT ,("Allocated EQ %d with %d entries\n", - eq->eqn, eq->nent)); - - HCA_EXIT(HCA_DBG_INIT); - return err; - - err_out_free_mr: - mthca_free_mr(dev, &eq->mr); - - err_out_free_eq: - mthca_free(&dev->eq_table.alloc, eq->eqn); - - err_out_free_pages: - for (i = 0; i < npages; ++i) { - if (eq->page_list[i].page) { - free_dma_mem_map(dev, &eq->page_list[i], PCI_DMA_BIDIRECTIONAL); - } - } - mthca_free_mailbox(dev, mailbox); - - err_out_free: - kfree(eq->page_list); - kfree(dma_list); - - err_out: - HCA_EXIT(HCA_DBG_INIT); - return err; -} - -static void mthca_free_eq(struct mthca_dev *dev, - struct mthca_eq *eq) -{ - struct mthca_mailbox *mailbox; - int err; - u8 status; - int npages = (eq->nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) / - PAGE_SIZE; - int i; - - mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); - if (IS_ERR(mailbox)) - return; - - err = mthca_HW2SW_EQ(dev, mailbox, eq->eqn, &status); - if (err) - HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("HW2SW_EQ failed (%d)\n", err)); - if (status) - HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("HW2SW_EQ returned status 0x%02x\n", status)); - - dev->eq_table.arm_mask &= ~eq->eqn_mask; - - { // debug print - HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("Dumping EQ context %02x:\n", eq->eqn)); - for (i = 0; i < sizeof (struct mthca_eq_context) / 4; i=i+4) { - HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("[%02x] %08x %08x %08x %08x\n", i, - cl_ntoh32(*(u32*)((u8*)mailbox->buf + i * 4)), - cl_ntoh32(*(u32*)((u8*)mailbox->buf + (i+1)*4)), - cl_ntoh32(*(u32*)((u8*)mailbox->buf + (i+2)*4)), - cl_ntoh32(*(u32*)((u8*)mailbox->buf + (i+1)*4)))); - - } - } - - mthca_free_mr(dev, &eq->mr); - for (i = 0; i < npages; ++i) { - free_dma_mem_map(dev, &eq->page_list[i], PCI_DMA_BIDIRECTIONAL); - } - - kfree(eq->page_list); - mthca_free_mailbox(dev, mailbox); -} - -static void mthca_free_irqs(struct mthca_dev *dev) -{ - if (dev->eq_table.have_irq) - free_irq(dev->ext->int_obj); -#ifdef MSI_SUPPORT - for (i = 0; i < MTHCA_NUM_EQ; ++i) - if (dev->eq_table.eq[i].have_irq) - free_irq(dev->eq_table.eq[i].msi_x_vector, - dev->eq_table.eq + i); -#endif -} - -static int mthca_map_reg(struct mthca_dev *dev, - u64 offset, unsigned long size, - void __iomem **map, SIZE_T *map_size) -{ - u64 base = pci_resource_start(dev, HCA_BAR_TYPE_HCR); - *map = ioremap(base + offset, size, map_size); - if (!*map) - return -ENOMEM; - return 0; -} - -static void mthca_unmap_reg(struct mthca_dev *dev, u64 offset, - unsigned long size, void __iomem *map, SIZE_T map_size) -{ - UNREFERENCED_PARAMETER(dev); - UNREFERENCED_PARAMETER(size); - UNREFERENCED_PARAMETER(offset); - iounmap(map, map_size); -} - -static int mthca_map_eq_regs(struct mthca_dev *dev) -{ - u64 mthca_base; - - mthca_base = pci_resource_start(dev, HCA_BAR_TYPE_HCR); - - if (mthca_is_memfree(dev)) { - /* - * We assume that the EQ arm and EQ set CI registers - * fall within the first BAR. We can't trust the - * values firmware gives us, since those addresses are - * valid on the HCA's side of the PCI bus but not - * necessarily the host side. - */ - if (mthca_map_reg(dev, (pci_resource_len(dev, 0) - 1) & - dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, - &dev->clr_base, &dev->clr_base_size)) { - HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Couldn't map interrupt clear register, " - "aborting.\n")); - return -ENOMEM; - } - - /* - * Add 4 because we limit ourselves to EQs 0 ... 31, - * so we only need the low word of the register. - */ - if (mthca_map_reg(dev, ((pci_resource_len(dev, 0) - 1) & - dev->fw.arbel.eq_arm_base) + 4, 4, - &dev->eq_regs.arbel.eq_arm, &dev->eq_regs.arbel.eq_arm_size)) { - HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Couldn't map EQ arm register, aborting.\n")); - mthca_unmap_reg(dev, (pci_resource_len(dev, 0) - 1) & - dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, - dev->clr_base, dev->clr_base_size); - return -ENOMEM; - } - - if (mthca_map_reg(dev, (pci_resource_len(dev, 0) - 1) & - dev->fw.arbel.eq_set_ci_base, - MTHCA_EQ_SET_CI_SIZE, - &dev->eq_regs.arbel.eq_set_ci_base, - &dev->eq_regs.arbel.eq_set_ci_base_size - )) { - HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Couldn't map EQ CI register, aborting.\n")); - mthca_unmap_reg(dev, ((pci_resource_len(dev, 0) - 1) & - dev->fw.arbel.eq_arm_base) + 4, 4, - dev->eq_regs.arbel.eq_arm, dev->eq_regs.arbel.eq_arm_size); - mthca_unmap_reg(dev, (pci_resource_len(dev, 0) - 1) & - dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, - dev->clr_base, dev->clr_base_size); - return -ENOMEM; - } - } else { - if (mthca_map_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE, - &dev->clr_base, &dev->clr_base_size)) { - HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Couldn't map interrupt clear register, " - "aborting.\n")); - return -ENOMEM; - } - - if (mthca_map_reg(dev, MTHCA_ECR_BASE, - MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE, - &dev->eq_regs.tavor.ecr_base, &dev->eq_regs.tavor.ecr_base_size)) { - HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Couldn't map ecr register, " - "aborting.\n")); - mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE, - dev->clr_base, dev->clr_base_size); - return -ENOMEM; - } - } - - return 0; - -} - -static void mthca_unmap_eq_regs(struct mthca_dev *dev) -{ - if (mthca_is_memfree(dev)) { - mthca_unmap_reg(dev, (pci_resource_len(dev, 0) - 1) & - dev->fw.arbel.eq_set_ci_base, - MTHCA_EQ_SET_CI_SIZE, - dev->eq_regs.arbel.eq_set_ci_base, - dev->eq_regs.arbel.eq_set_ci_base_size); - mthca_unmap_reg(dev, ((pci_resource_len(dev, 0) - 1) & - dev->fw.arbel.eq_arm_base) + 4, 4, - dev->eq_regs.arbel.eq_arm, - dev->eq_regs.arbel.eq_arm_size); - mthca_unmap_reg(dev, (pci_resource_len(dev, 0) - 1) & - dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, - dev->clr_base, dev->clr_base_size); - } else { - mthca_unmap_reg(dev, MTHCA_ECR_BASE, - MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE, - dev->eq_regs.tavor.ecr_base, - dev->eq_regs.tavor.ecr_base_size); - mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE, - dev->clr_base, dev->clr_base_size); - } -} - -int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt) -{ - int ret; - u8 status; - - /* - * We assume that mapping one page is enough for the whole EQ - * context table. This is fine with all current HCAs, because - * we only use 32 EQs and each EQ uses 32 bytes of context - * memory, or 1 KB total. - */ - dev->eq_table.icm_virt = icm_virt; - alloc_dma_zmem_map(dev,PAGE_SIZE, PCI_DMA_BIDIRECTIONAL, &dev->eq_table.sg); - if (!dev->eq_table.sg.page) - return -ENOMEM; - - ret = mthca_MAP_ICM_page(dev, dev->eq_table.sg.dma_address, icm_virt, &status); - if (!ret && status) - ret = -EINVAL; - if (ret) - free_dma_mem_map(dev, &dev->eq_table.sg, PCI_DMA_BIDIRECTIONAL ); - - return ret; -} - -void mthca_unmap_eq_icm(struct mthca_dev *dev) -{ - u8 status; - - mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, PAGE_SIZE / 4096, &status); - free_dma_mem_map(dev, &dev->eq_table.sg, PCI_DMA_BIDIRECTIONAL ); -} - -int mthca_init_eq_table(struct mthca_dev *dev) -{ - int err; - u8 status; - u8 intr; - int i; - - HCA_ENTER(HCA_DBG_INIT); - err = mthca_alloc_init(&dev->eq_table.alloc, - dev->limits.num_eqs, - dev->limits.num_eqs - 1, - dev->limits.reserved_eqs); - if (err) - return err; - - err = mthca_map_eq_regs(dev); - if (err) - goto err_out_free; - -#ifdef MSI_SUPPORT - if (dev->mthca_flags & MTHCA_FLAG_MSI || - dev->mthca_flags & MTHCA_FLAG_MSI_X) { - dev->eq_table.clr_mask = 0; - } else -#endif - { - dev->eq_table.clr_mask = - _byteswap_ulong(1 << (dev->eq_table.inta_pin & 31)); - dev->eq_table.clr_int = dev->clr_base + - (dev->eq_table.inta_pin < 32 ? 4 : 0); - } - - dev->eq_table.arm_mask = 0; - - intr = (dev->mthca_flags & MTHCA_FLAG_MSI) ? - 128 : dev->eq_table.inta_pin; - - err = mthca_create_eq(dev, dev->limits.num_cqs + MTHCA_NUM_SPARE_EQE, - (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 128 : intr, - &dev->eq_table.eq[MTHCA_EQ_COMP]); - if (err) - goto err_out_unmap; - - err = mthca_create_eq(dev, MTHCA_NUM_ASYNC_EQE + MTHCA_NUM_SPARE_EQE, - (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 129 : intr, - &dev->eq_table.eq[MTHCA_EQ_ASYNC]); - if (err) - goto err_out_comp; - - err = mthca_create_eq(dev, MTHCA_NUM_CMD_EQE + MTHCA_NUM_SPARE_EQE, - (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 130 : intr, - &dev->eq_table.eq[MTHCA_EQ_CMD]); - if (err) - goto err_out_async; - -#ifdef MSI_SUPPORT - if (dev->mthca_flags & MTHCA_FLAG_MSI_X) { - static const char *eq_name[] = { - [MTHCA_EQ_COMP] = DRV_NAME " (comp)", - [MTHCA_EQ_ASYNC] = DRV_NAME " (async)", - [MTHCA_EQ_CMD] = DRV_NAME " (cmd)" - }; - - for (i = 0; i < MTHCA_NUM_EQ; ++i) { - err = request_irq(dev->eq_table.eq[i].msi_x_vector, - mthca_is_memfree(dev) ? - mthca_arbel_msi_x_interrupt : - mthca_tavor_msi_x_interrupt, - 0, eq_name[i], dev->eq_table.eq + i); - if (err) - goto err_out_cmd; - dev->eq_table.eq[i].have_irq = 1; - /* init DPC stuff something like that */ - spin_lock_init( &dev->eq_table.eq[i].lock ); - dev->dpc_lock = 0; - KeInitializeDpc( - &dev->eq_table.eq[i].dpc, - mthca_is_memfree(dev) ? - mthca_arbel_msi_x_dpc : - mthca_tavor_msi_x_dpc, - dev->eq_table.eq + i); - } - } else -#endif - { - spin_lock_init( &dev->ext->isr_lock ); - err = request_irq( - &dev->ext->interruptInfo, - &dev->ext->isr_lock.lock , - mthca_is_memfree(dev) ? mthca_arbel_interrupt : mthca_tavor_interrupt, - dev, - &dev->ext->int_obj - ); - if (err) - goto err_out_cmd; - dev->eq_table.have_irq = 1; - - /* init DPC stuff */ - for (i = 0; i < MTHCA_NUM_EQ; ++i) { - spin_lock_init( &dev->eq_table.eq[i].lock ); - KeInitializeDpc( - &dev->eq_table.eq[i].dpc, - mthca_is_memfree(dev) ? - mthca_arbel_dpc : - mthca_tavor_dpc, - dev->eq_table.eq + i); - } - } - - err = mthca_MAP_EQ(dev, async_mask(dev), - 0, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status); - if (err) - HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_INIT,("MAP_EQ for async EQ %d failed (%d)\n", - dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, err)); - if (status) - HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_INIT, ("MAP_EQ for async EQ %d returned status 0x%02x\n", - dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, status)); - err = mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK, - 0, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status); - if (err) - HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_INIT, ("MAP_EQ for cmd EQ %d failed (%d)\n", - dev->eq_table.eq[MTHCA_EQ_CMD].eqn, err)); - if (status) - HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_INIT,("MAP_EQ for cmd EQ %d returned status 0x%02x\n", - dev->eq_table.eq[MTHCA_EQ_CMD].eqn, status)); - - for (i = 0; i < MTHCA_NUM_EQ; ++i) - if (mthca_is_memfree(dev)) - arbel_eq_req_not(dev, dev->eq_table.eq[i].eqn_mask); - else - tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn); - - return 0; - -err_out_cmd: - mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_CMD]); - -err_out_async: - mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_ASYNC]); - -err_out_comp: - mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_COMP]); - -err_out_unmap: - mthca_unmap_eq_regs(dev); - -err_out_free: - mthca_alloc_cleanup(&dev->eq_table.alloc); - HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_INIT ,("mthca_init_eq failed %d",err)); - return err; -} - -void mthca_cleanup_eq_table(struct mthca_dev *dev) -{ - u8 status; - int i; - - mthca_free_irqs(dev); - - mthca_MAP_EQ(dev, async_mask(dev), - 1, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status); - mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK, - 1, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status); - - for (i = 0; i < MTHCA_NUM_EQ; ++i) - mthca_free_eq(dev, &dev->eq_table.eq[i]); - - mthca_unmap_eq_regs(dev); - - mthca_alloc_cleanup(&dev->eq_table.alloc); -} - - +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_eq.tmh" +#endif +#include "mthca_cmd.h" +#include "mthca_config_reg.h" + +static int mthca_map_reg(struct mthca_dev *dev, + u64 offset, unsigned long size, + void __iomem **map, SIZE_T *map_size); +static int mthca_map_eq_regs(struct mthca_dev *dev); +static void mthca_unmap_eq_regs(struct mthca_dev *dev); +static int mthca_create_eq(struct mthca_dev *dev, + int nent, + u8 intr, + struct mthca_eq *eq); + + + +#ifdef ALLOC_PRAGMA +#pragma alloc_text (PAGE, mthca_map_reg) +#pragma alloc_text (PAGE, mthca_map_eq_regs) +#pragma alloc_text (PAGE, mthca_init_eq_table) +#pragma alloc_text (PAGE, mthca_unmap_eq_regs) +#pragma alloc_text (PAGE, mthca_map_eq_icm) +#pragma alloc_text (PAGE, mthca_unmap_eq_icm) +#pragma alloc_text (PAGE, mthca_create_eq) +#pragma alloc_text (PAGE, mthca_cleanup_eq_table) +#endif + +enum { + MTHCA_NUM_ASYNC_EQE = 0x80, + MTHCA_NUM_CMD_EQE = 0x80, + MTHCA_NUM_SPARE_EQE = 0x80, + MTHCA_EQ_ENTRY_SIZE = 0x20 +}; + +/* + * Must be packed because start is 64 bits but only aligned to 32 bits. + */ +#pragma pack(push,1) +struct mthca_eq_context { + __be32 flags; + __be64 start; + __be32 logsize_usrpage; + __be32 tavor_pd; /* reserved for Arbel */ + u8 reserved1[3]; + u8 intr; + __be32 arbel_pd; /* lost_count for Tavor */ + __be32 lkey; + u32 reserved2[2]; + __be32 consumer_index; + __be32 producer_index; + u32 reserved3[4]; +}; +#pragma pack(pop) + +#define MTHCA_EQ_STATUS_OK ( 0 << 28) +#define MTHCA_EQ_STATUS_OVERFLOW ( 9 << 28) +#define MTHCA_EQ_STATUS_WRITE_FAIL (10 << 28) +#define MTHCA_EQ_OWNER_SW ( 0 << 24) +#define MTHCA_EQ_OWNER_HW ( 1 << 24) +#define MTHCA_EQ_FLAG_TR ( 1 << 18) +#define MTHCA_EQ_FLAG_OI ( 1 << 17) +#define MTHCA_EQ_STATE_ARMED ( 1 << 8) +#define MTHCA_EQ_STATE_FIRED ( 2 << 8) +#define MTHCA_EQ_STATE_ALWAYS_ARMED ( 3 << 8) +#define MTHCA_EQ_STATE_ARBEL ( 8 << 8) + +enum { + MTHCA_EVENT_TYPE_COMP = 0x00, + MTHCA_EVENT_TYPE_PATH_MIG = 0x01, + MTHCA_EVENT_TYPE_COMM_EST = 0x02, + MTHCA_EVENT_TYPE_SQ_DRAINED = 0x03, + MTHCA_EVENT_TYPE_CQ_ERROR = 0x04, + MTHCA_EVENT_TYPE_WQ_CATAS_ERROR = 0x05, + MTHCA_EVENT_TYPE_EEC_CATAS_ERROR = 0x06, + MTHCA_EVENT_TYPE_PATH_MIG_FAILED = 0x07, + MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR = 0x08, + MTHCA_EVENT_TYPE_PORT_CHANGE = 0x09, + MTHCA_EVENT_TYPE_CMD = 0x0a, + MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10, + MTHCA_EVENT_TYPE_ECC_DETECT = 0x0e, + MTHCA_EVENT_TYPE_EQ_OVERFLOW = 0x0f, + MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR = 0x11, + MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR = 0x12, + MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE = 0x13, + MTHCA_EVENT_TYPE_SRQ_LIMIT = 0x14 +}; + +#define MTHCA_ASYNC_EVENT_MASK ((1Ui64 << MTHCA_EVENT_TYPE_PATH_MIG) | \ + (1Ui64 << MTHCA_EVENT_TYPE_COMM_EST) | \ + (1Ui64 << MTHCA_EVENT_TYPE_SQ_DRAINED) | \ + (1Ui64 << MTHCA_EVENT_TYPE_CQ_ERROR) | \ + (1Ui64 << MTHCA_EVENT_TYPE_WQ_CATAS_ERROR) | \ + (1Ui64 << MTHCA_EVENT_TYPE_EEC_CATAS_ERROR) | \ + (1Ui64 << MTHCA_EVENT_TYPE_PATH_MIG_FAILED) | \ + (1Ui64 << MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \ + (1Ui64 << MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR) | \ + (1Ui64 << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR) | \ + (1Ui64 << MTHCA_EVENT_TYPE_PORT_CHANGE) | \ + (1Ui64 << MTHCA_EVENT_TYPE_ECC_DETECT)) +#define MTHCA_SRQ_EVENT_MASK ((1Ui64 << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \ + (1Ui64 << MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE) | \ + (1Ui64 << MTHCA_EVENT_TYPE_SRQ_LIMIT)) + +#define MTHCA_CMD_EVENT_MASK (1Ui64 << MTHCA_EVENT_TYPE_CMD) + +#define MTHCA_EQ_DB_INC_CI (1 << 24) +#define MTHCA_EQ_DB_REQ_NOT (2 << 24) +#define MTHCA_EQ_DB_DISARM_CQ (3 << 24) +#define MTHCA_EQ_DB_SET_CI (4 << 24) +#define MTHCA_EQ_DB_ALWAYS_ARM (5 << 24) + +#pragma pack(push,1) +struct mthca_eqe { + u8 reserved1; + u8 type; + u8 reserved2; + u8 subtype; + union { + u32 raw[6]; + struct { + __be32 cqn; + } comp; + struct { + u16 reserved1; + __be16 token; + u32 reserved2; + u8 reserved3[3]; + u8 status; + __be64 out_param; + } cmd; + struct { + __be32 qpn; + } qp; + struct { + __be32 srqn; + } srq; + struct { + __be32 cqn; + u32 reserved1; + u8 reserved2[3]; + u8 syndrome; + } cq_err; + struct { + u32 reserved1[2]; + __be32 port; + } port_change; + } event; + u8 reserved3[3]; + u8 owner; +} ; +#pragma pack(pop) + +#define MTHCA_EQ_ENTRY_OWNER_SW (0 << 7) +#define MTHCA_EQ_ENTRY_OWNER_HW (1 << 7) + +static inline u64 async_mask(struct mthca_dev *dev) +{ + return dev->mthca_flags & MTHCA_FLAG_SRQ ? + MTHCA_ASYNC_EVENT_MASK | MTHCA_SRQ_EVENT_MASK : + MTHCA_ASYNC_EVENT_MASK; +} + +static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci) +{ + __be32 doorbell[2]; + + doorbell[0] = cl_hton32(MTHCA_EQ_DB_SET_CI | eq->eqn); + doorbell[1] = cl_hton32(ci & (eq->nent - 1)); + + /* + * This barrier makes sure that all updates to ownership bits + * done by set_eqe_hw() hit memory before the consumer index + * is updated. set_eq_ci() allows the HCA to possibly write + * more EQ entries, and we want to avoid the exceedingly + * unlikely possibility of the HCA writing an entry and then + * having set_eqe_hw() overwrite the owner field. + */ + wmb(); + mthca_write64(doorbell, + dev->kar + MTHCA_EQ_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); +} + +static inline void arbel_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci) +{ + /* See comment in tavor_set_eq_ci() above. */ + wmb(); + __raw_writel((u32) cl_hton32(ci), + (u8*)dev->eq_regs.arbel.eq_set_ci_base + eq->eqn * 8); + /* We still want ordering, just not swabbing, so add a barrier */ + mb(); +} + +static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci) +{ + if (mthca_is_memfree(dev)) + arbel_set_eq_ci(dev, eq, ci); + else + tavor_set_eq_ci(dev, eq, ci); +} + +static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn) +{ + __be32 doorbell[2]; + + doorbell[0] = cl_hton32(MTHCA_EQ_DB_REQ_NOT | eqn); + doorbell[1] = 0; + + mthca_write64(doorbell, + dev->kar + MTHCA_EQ_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); +} + +static inline void arbel_eq_req_not(struct mthca_dev *dev, u32 eqn_mask) +{ + writel(eqn_mask, dev->eq_regs.arbel.eq_arm); +} + +static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn) +{ + if (!mthca_is_memfree(dev)) { + __be32 doorbell[2]; + + doorbell[0] = cl_hton32(MTHCA_EQ_DB_DISARM_CQ | eqn); + doorbell[1] = cl_hton32(cqn); + + mthca_write64(doorbell, + dev->kar + MTHCA_EQ_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + } +} + +static inline struct mthca_eqe *get_eqe(struct mthca_eq *eq, u32 entry) +{ + unsigned long off = (entry & (eq->nent - 1)) * MTHCA_EQ_ENTRY_SIZE; + return (struct mthca_eqe *)((u8*)eq->page_list[off / PAGE_SIZE].page + off % PAGE_SIZE); +} + +static inline struct mthca_eqe* next_eqe_sw(struct mthca_eq *eq) +{ + struct mthca_eqe* eqe; + eqe = get_eqe(eq, eq->cons_index); + return (MTHCA_EQ_ENTRY_OWNER_HW & eqe->owner) ? NULL : eqe; +} + +static inline void set_eqe_hw(struct mthca_eqe *eqe) +{ + eqe->owner = MTHCA_EQ_ENTRY_OWNER_HW; +} + +static void port_change(struct mthca_dev *dev, int port, int active) +{ + struct ib_event record; + + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Port change to %s for port %d\n", + active ? "active" : "down", port)); + + record.device = &dev->ib_dev; + record.event = active ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; + record.element.port_num = (u8)port; + // Gen2 ib_core mechanism + ib_dispatch_event(&record); + // our callback + ca_event_handler( &record, &dev->ext->hca.hob ); +} + +static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq) +{ + int disarm_cqn; + int eqes_found = 0; + int set_ci = 0; + struct mthca_eqe *eqe = next_eqe_sw(eq); + uint64_t start = cl_get_time_stamp(); + int loops = 0; + + while (eqe) { + + /* + * Make sure we read EQ entry contents after we've + * checked the ownership bit. + */ + rmb(); + + switch (eqe->type) { + case MTHCA_EVENT_TYPE_COMP: + disarm_cqn = cl_ntoh32(eqe->event.comp.cqn) & 0xffffff; + disarm_cq(dev, eq->eqn, disarm_cqn); + mthca_cq_completion(dev, disarm_cqn); + break; + + case MTHCA_EVENT_TYPE_PATH_MIG: + mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_PATH_MIG); + break; + + case MTHCA_EVENT_TYPE_COMM_EST: + mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_COMM_EST); + break; + + case MTHCA_EVENT_TYPE_SQ_DRAINED: + mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_SQ_DRAINED); + break; + + case MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE: + mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_QP_LAST_WQE_REACHED); + break; + + case MTHCA_EVENT_TYPE_SRQ_LIMIT: + mthca_srq_event(dev, cl_ntoh32(eqe->event.srq.srqn) & 0xffffff, + IB_EVENT_SRQ_LIMIT_REACHED); + break; + + case MTHCA_EVENT_TYPE_WQ_CATAS_ERROR: + mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_QP_FATAL); + break; + + case MTHCA_EVENT_TYPE_PATH_MIG_FAILED: + mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_PATH_MIG_ERR); + break; + + case MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_QP_REQ_ERR); + break; + + case MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR: + mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_QP_ACCESS_ERR); + break; + + case MTHCA_EVENT_TYPE_CMD: + mthca_cmd_event(dev, + cl_ntoh16(eqe->event.cmd.token), + eqe->event.cmd.status, + cl_ntoh64(eqe->event.cmd.out_param)); + break; + + case MTHCA_EVENT_TYPE_PORT_CHANGE: + port_change(dev, + (cl_ntoh32(eqe->event.port_change.port) >> 28) & 3, + eqe->subtype == 0x4); + break; + + case MTHCA_EVENT_TYPE_CQ_ERROR: + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_LOW, ("CQ %s on CQN %06x (syndrome %d)\n", + eqe->event.cq_err.syndrome == 1 ? + "overrun" : "access violation", + cl_ntoh32(eqe->event.cq_err.cqn) & 0xffffff, eqe->event.cq_err.syndrome)); + mthca_cq_event(dev, cl_ntoh32(eqe->event.cq_err.cqn), + IB_EVENT_CQ_ERR); + break; + + case MTHCA_EVENT_TYPE_EQ_OVERFLOW: + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("EQ overrun on EQN %d\n", eq->eqn)); + break; + + case MTHCA_EVENT_TYPE_EEC_CATAS_ERROR: + case MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR: + case MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR: + case MTHCA_EVENT_TYPE_ECC_DETECT: + default: + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_LOW, ("Unhandled event %02x(%02x) on EQ %d\n", + eqe->type, eqe->subtype, eq->eqn)); + break; + }; + + set_eqe_hw(eqe); + ++eq->cons_index; + eqes_found += 1; + ++set_ci; + + /* + * The HCA will think the queue has overflowed if we + * don't tell it we've been processing events. We + * create our EQs with MTHCA_NUM_SPARE_EQE extra + * entries, so we must update our consumer index at + * least that often. + */ + if (unlikely(set_ci >= MTHCA_NUM_SPARE_EQE)) { + /* + * Conditional on hca_type is OK here because + * this is a rare case, not the fast path. + */ + set_eq_ci(dev, eq, eq->cons_index); + set_ci = 0; + } + loops++; + if (cl_get_time_stamp() - start > g_max_DPC_time_us ) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Handeling of EQ stopped, and a new DPC is entered after %d loops\n", loops)); + KeInsertQueueDpc(&dev->eq_table.eq[eq->eq_num].dpc, NULL, NULL); + break; + } + eqe = next_eqe_sw(eq); + } + + /* + * Rely on caller to set consumer index so that we don't have + * to test hca_type in our interrupt handling fast path. + */ + return eqes_found; +} + +static void mthca_tavor_dpc( PRKDPC dpc, + PVOID ctx, PVOID arg1, PVOID arg2 ) +{ + struct mthca_eq *eq = ctx; + struct mthca_dev *dev = eq->dev; + SPIN_LOCK_PREP(lh); + + UNREFERENCED_PARAMETER(dpc); + UNREFERENCED_PARAMETER(arg1); + UNREFERENCED_PARAMETER(arg2); + + spin_lock_dpc(&eq->lock, &lh); + + /* we need 'if' in case, when there were scheduled 2 DPC for one EQ */ + if (mthca_eq_int(dev, eq)) { + tavor_set_eq_ci(dev, eq, eq->cons_index); + tavor_eq_req_not(dev, eq->eqn); + } + + spin_unlock_dpc(&lh); +} + +static BOOLEAN mthca_tavor_interrupt( + PKINTERRUPT int_obj, + PVOID ctx + ) +{ + struct mthca_dev *dev = ctx; + u32 ecr; + int i; + + UNREFERENCED_PARAMETER(int_obj); + + if (dev->eq_table.clr_mask) + writel(dev->eq_table.clr_mask, dev->eq_table.clr_int); + + ecr = readl((u8*)dev->eq_regs.tavor.ecr_base + 4); + if (!ecr) + return FALSE; + + writel(ecr, (u8*)dev->eq_regs.tavor.ecr_base + + MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4); + + for (i = 0; i < MTHCA_NUM_EQ; ++i) { + if (ecr & dev->eq_table.eq[i].eqn_mask && + next_eqe_sw(&dev->eq_table.eq[i])) { + KeInsertQueueDpc(&dev->eq_table.eq[i].dpc, NULL, NULL); + } + } + + return TRUE; +} + +#ifdef MSI_SUPPORT +static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr, + struct pt_regs *regs) +{ + struct mthca_eq *eq = eq_ptr; + struct mthca_dev *dev = eq->dev; + + mthca_eq_int(dev, eq); + tavor_set_eq_ci(dev, eq, eq->cons_index); + tavor_eq_req_not(dev, eq->eqn); + + /* MSI-X vectors always belong to us */ + return IRQ_HANDLED; +} +#endif + +static void mthca_arbel_dpc( PRKDPC dpc, + PVOID ctx, PVOID arg1, PVOID arg2 ) +{ + struct mthca_eq *eq = ctx; + struct mthca_dev *dev = eq->dev; + SPIN_LOCK_PREP(lh); + + UNREFERENCED_PARAMETER(dpc); + UNREFERENCED_PARAMETER(arg1); + UNREFERENCED_PARAMETER(arg2); + + spin_lock_dpc(&eq->lock, &lh); + + /* we need 'if' in case, when there were scheduled 2 DPC for one EQ */ + if (mthca_eq_int(dev, eq)) + arbel_set_eq_ci(dev, eq, eq->cons_index); + arbel_eq_req_not(dev, eq->eqn_mask); + + spin_unlock_dpc(&lh); +} + +static BOOLEAN mthca_arbel_interrupt( + PKINTERRUPT int_obj, + PVOID ctx + ) +{ + struct mthca_dev *dev = ctx; + int work = 0; + int i; + + UNREFERENCED_PARAMETER(int_obj); + + if (dev->eq_table.clr_mask) + writel(dev->eq_table.clr_mask, dev->eq_table.clr_int); + + for (i = 0; i < MTHCA_NUM_EQ; ++i) { + if (next_eqe_sw( &dev->eq_table.eq[i]) ) { + work = 1; + while(InterlockedCompareExchange(&dev->dpc_lock, 1, 0)); + + KeInsertQueueDpc(&dev->eq_table.eq[i].dpc, NULL, NULL); + InterlockedCompareExchange(&dev->dpc_lock, 0, 1); + } else { + arbel_eq_req_not(dev, dev->eq_table.eq[i].eqn_mask); + } + } + + return (BOOLEAN)work; +} + +#ifdef MSI_SUPPORT +static irqreturn_t mthca_arbel_msi_x_interrupt(int irq, void *eq_ptr, + struct pt_regs *regs) +{ + struct mthca_eq *eq = eq_ptr; + struct mthca_dev *dev = eq->dev; + + mthca_eq_int(dev, eq); + arbel_set_eq_ci(dev, eq, eq->cons_index); + arbel_eq_req_not(dev, eq->eqn_mask); + + /* MSI-X vectors always belong to us */ + return IRQ_HANDLED; +} +#endif + +static int mthca_create_eq(struct mthca_dev *dev, + int nent, + u8 intr, + struct mthca_eq *eq) +{ + int npages; + u64 *dma_list = NULL; + struct mthca_mailbox *mailbox; + struct mthca_eq_context *eq_context; + int err = -ENOMEM; + int i; + u8 status; + + HCA_ENTER(HCA_DBG_INIT); + eq->dev = dev; + eq->nent = roundup_pow_of_two(max(nent, 2)); + npages = ALIGN(eq->nent * MTHCA_EQ_ENTRY_SIZE, PAGE_SIZE) / PAGE_SIZE; + + eq->page_list = kmalloc(npages * sizeof *eq->page_list, + GFP_KERNEL); + if (!eq->page_list) + goto err_out; + + for (i = 0; i < npages; ++i) + eq->page_list[i].page = NULL; + + dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); + if (!dma_list) + goto err_out_free; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + goto err_out_free; + eq_context = mailbox->buf; + + for (i = 0; i < npages; ++i) { + alloc_dma_zmem_map(dev, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL, &eq->page_list[i]); + if (!eq->page_list[i].page) + goto err_out_free_pages; + dma_list[i] = eq->page_list[i].dma_address; + } + + for (i = 0; i < eq->nent; ++i) + set_eqe_hw(get_eqe(eq, i)); + + eq->eqn = mthca_alloc(&dev->eq_table.alloc); + if (eq->eqn == -1) + goto err_out_free_pages; + + err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num, + dma_list, PAGE_SHIFT, npages, + 0, npages * PAGE_SIZE, + MTHCA_MPT_FLAG_LOCAL_WRITE | + MTHCA_MPT_FLAG_LOCAL_READ, + &eq->mr); + if (err) + goto err_out_free_eq; + + RtlZeroMemory(eq_context, sizeof *eq_context); + eq_context->flags = cl_hton32(MTHCA_EQ_STATUS_OK | + MTHCA_EQ_OWNER_HW | + MTHCA_EQ_STATE_ARMED | + MTHCA_EQ_FLAG_TR); + if (mthca_is_memfree(dev)) + eq_context->flags |= cl_hton32(MTHCA_EQ_STATE_ARBEL); + + eq_context->logsize_usrpage = cl_hton32((ffs(eq->nent) - 1) << 24); + if (mthca_is_memfree(dev)) { + eq_context->arbel_pd = cl_hton32(dev->driver_pd.pd_num); + } else { + eq_context->logsize_usrpage |= cl_hton32(dev->driver_uar.index); + eq_context->tavor_pd = cl_hton32(dev->driver_pd.pd_num); + } + eq_context->intr = intr; + eq_context->lkey = cl_hton32(eq->mr.ibmr.lkey); + + err = mthca_SW2HW_EQ(dev, mailbox, eq->eqn, &status); + if (err) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("SW2HW_EQ failed (%d)\n", err)); + goto err_out_free_mr; + } + if (status) { + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_LOW,("SW2HW_EQ returned status 0x%02x\n", + status)); + err = -EINVAL; + goto err_out_free_mr; + } + + kfree(dma_list); + mthca_free_mailbox(dev, mailbox); + + eq->eqn_mask = _byteswap_ulong(1 << eq->eqn); + eq->cons_index = 0; + + dev->eq_table.arm_mask |= eq->eqn_mask; + + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_INIT ,("Allocated EQ %d with %d entries\n", + eq->eqn, eq->nent)); + + HCA_EXIT(HCA_DBG_INIT); + return err; + + err_out_free_mr: + mthca_free_mr(dev, &eq->mr); + + err_out_free_eq: + mthca_free(&dev->eq_table.alloc, eq->eqn); + + err_out_free_pages: + for (i = 0; i < npages; ++i) { + if (eq->page_list[i].page) { + free_dma_mem_map(dev, &eq->page_list[i], PCI_DMA_BIDIRECTIONAL); + } + } + mthca_free_mailbox(dev, mailbox); + + err_out_free: + kfree(eq->page_list); + kfree(dma_list); + + err_out: + HCA_EXIT(HCA_DBG_INIT); + return err; +} + +static void mthca_free_eq(struct mthca_dev *dev, + struct mthca_eq *eq) +{ + struct mthca_mailbox *mailbox; + int err; + u8 status; + int npages = (eq->nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) / + PAGE_SIZE; + int i; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return; + + err = mthca_HW2SW_EQ(dev, mailbox, eq->eqn, &status); + if (err) + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("HW2SW_EQ failed (%d)\n", err)); + if (status) + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("HW2SW_EQ returned status 0x%02x\n", status)); + + dev->eq_table.arm_mask &= ~eq->eqn_mask; + + { // debug print + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("Dumping EQ context %02x:\n", eq->eqn)); + for (i = 0; i < sizeof (struct mthca_eq_context) / 4; i=i+4) { + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("[%02x] %08x %08x %08x %08x\n", i, + cl_ntoh32(*(u32*)((u8*)mailbox->buf + i * 4)), + cl_ntoh32(*(u32*)((u8*)mailbox->buf + (i+1)*4)), + cl_ntoh32(*(u32*)((u8*)mailbox->buf + (i+2)*4)), + cl_ntoh32(*(u32*)((u8*)mailbox->buf + (i+1)*4)))); + + } + } + + mthca_free_mr(dev, &eq->mr); + for (i = 0; i < npages; ++i) { + free_dma_mem_map(dev, &eq->page_list[i], PCI_DMA_BIDIRECTIONAL); + } + + kfree(eq->page_list); + mthca_free_mailbox(dev, mailbox); +} + +static void mthca_free_irqs(struct mthca_dev *dev) +{ + if (dev->eq_table.have_irq) + free_irq(dev->ext->int_obj); +#ifdef MSI_SUPPORT + for (i = 0; i < MTHCA_NUM_EQ; ++i) + if (dev->eq_table.eq[i].have_irq) + free_irq(dev->eq_table.eq[i].msi_x_vector, + dev->eq_table.eq + i); +#endif +} + +static int mthca_map_reg(struct mthca_dev *dev, + u64 offset, unsigned long size, + void __iomem **map, SIZE_T *map_size) +{ + u64 base = pci_resource_start(dev, HCA_BAR_TYPE_HCR); + *map = ioremap(base + offset, size, map_size); + if (!*map) + return -ENOMEM; + return 0; +} + +static void mthca_unmap_reg(struct mthca_dev *dev, u64 offset, + unsigned long size, void __iomem *map, SIZE_T map_size) +{ + UNREFERENCED_PARAMETER(dev); + UNREFERENCED_PARAMETER(size); + UNREFERENCED_PARAMETER(offset); + iounmap(map, map_size); +} + +static int mthca_map_eq_regs(struct mthca_dev *dev) +{ + u64 mthca_base; + + mthca_base = pci_resource_start(dev, HCA_BAR_TYPE_HCR); + + if (mthca_is_memfree(dev)) { + /* + * We assume that the EQ arm and EQ set CI registers + * fall within the first BAR. We can't trust the + * values firmware gives us, since those addresses are + * valid on the HCA's side of the PCI bus but not + * necessarily the host side. + */ + if (mthca_map_reg(dev, (pci_resource_len(dev, 0) - 1) & + dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, + &dev->clr_base, &dev->clr_base_size)) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Couldn't map interrupt clear register, " + "aborting.\n")); + return -ENOMEM; + } + + /* + * Add 4 because we limit ourselves to EQs 0 ... 31, + * so we only need the low word of the register. + */ + if (mthca_map_reg(dev, ((pci_resource_len(dev, 0) - 1) & + dev->fw.arbel.eq_arm_base) + 4, 4, + &dev->eq_regs.arbel.eq_arm, &dev->eq_regs.arbel.eq_arm_size)) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Couldn't map EQ arm register, aborting.\n")); + mthca_unmap_reg(dev, (pci_resource_len(dev, 0) - 1) & + dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, + dev->clr_base, dev->clr_base_size); + return -ENOMEM; + } + + if (mthca_map_reg(dev, (pci_resource_len(dev, 0) - 1) & + dev->fw.arbel.eq_set_ci_base, + MTHCA_EQ_SET_CI_SIZE, + &dev->eq_regs.arbel.eq_set_ci_base, + &dev->eq_regs.arbel.eq_set_ci_base_size + )) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Couldn't map EQ CI register, aborting.\n")); + mthca_unmap_reg(dev, ((pci_resource_len(dev, 0) - 1) & + dev->fw.arbel.eq_arm_base) + 4, 4, + dev->eq_regs.arbel.eq_arm, dev->eq_regs.arbel.eq_arm_size); + mthca_unmap_reg(dev, (pci_resource_len(dev, 0) - 1) & + dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, + dev->clr_base, dev->clr_base_size); + return -ENOMEM; + } + } else { + if (mthca_map_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE, + &dev->clr_base, &dev->clr_base_size)) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Couldn't map interrupt clear register, " + "aborting.\n")); + return -ENOMEM; + } + + if (mthca_map_reg(dev, MTHCA_ECR_BASE, + MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE, + &dev->eq_regs.tavor.ecr_base, &dev->eq_regs.tavor.ecr_base_size)) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Couldn't map ecr register, " + "aborting.\n")); + mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE, + dev->clr_base, dev->clr_base_size); + return -ENOMEM; + } + } + + return 0; + +} + +static void mthca_unmap_eq_regs(struct mthca_dev *dev) +{ + if (mthca_is_memfree(dev)) { + mthca_unmap_reg(dev, (pci_resource_len(dev, 0) - 1) & + dev->fw.arbel.eq_set_ci_base, + MTHCA_EQ_SET_CI_SIZE, + dev->eq_regs.arbel.eq_set_ci_base, + dev->eq_regs.arbel.eq_set_ci_base_size); + mthca_unmap_reg(dev, ((pci_resource_len(dev, 0) - 1) & + dev->fw.arbel.eq_arm_base) + 4, 4, + dev->eq_regs.arbel.eq_arm, + dev->eq_regs.arbel.eq_arm_size); + mthca_unmap_reg(dev, (pci_resource_len(dev, 0) - 1) & + dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, + dev->clr_base, dev->clr_base_size); + } else { + mthca_unmap_reg(dev, MTHCA_ECR_BASE, + MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE, + dev->eq_regs.tavor.ecr_base, + dev->eq_regs.tavor.ecr_base_size); + mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE, + dev->clr_base, dev->clr_base_size); + } +} + +int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt) +{ + int ret; + u8 status; + + /* + * We assume that mapping one page is enough for the whole EQ + * context table. This is fine with all current HCAs, because + * we only use 32 EQs and each EQ uses 32 bytes of context + * memory, or 1 KB total. + */ + dev->eq_table.icm_virt = icm_virt; + alloc_dma_zmem_map(dev,PAGE_SIZE, PCI_DMA_BIDIRECTIONAL, &dev->eq_table.sg); + if (!dev->eq_table.sg.page) + return -ENOMEM; + + ret = mthca_MAP_ICM_page(dev, dev->eq_table.sg.dma_address, icm_virt, &status); + if (!ret && status) + ret = -EINVAL; + if (ret) + free_dma_mem_map(dev, &dev->eq_table.sg, PCI_DMA_BIDIRECTIONAL ); + + return ret; +} + +void mthca_unmap_eq_icm(struct mthca_dev *dev) +{ + u8 status; + + mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, PAGE_SIZE / 4096, &status); + free_dma_mem_map(dev, &dev->eq_table.sg, PCI_DMA_BIDIRECTIONAL ); +} + +int mthca_init_eq_table(struct mthca_dev *dev) +{ + int err; + u8 status; + u8 intr; + int i; + + HCA_ENTER(HCA_DBG_INIT); + err = mthca_alloc_init(&dev->eq_table.alloc, + dev->limits.num_eqs, + dev->limits.num_eqs - 1, + dev->limits.reserved_eqs); + if (err) + return err; + + err = mthca_map_eq_regs(dev); + if (err) + goto err_out_free; + +#ifdef MSI_SUPPORT + if (dev->mthca_flags & MTHCA_FLAG_MSI || + dev->mthca_flags & MTHCA_FLAG_MSI_X) { + dev->eq_table.clr_mask = 0; + } else +#endif + { + dev->eq_table.clr_mask = + _byteswap_ulong(1 << (dev->eq_table.inta_pin & 31)); + dev->eq_table.clr_int = dev->clr_base + + (dev->eq_table.inta_pin < 32 ? 4 : 0); + } + + dev->eq_table.arm_mask = 0; + + intr = (dev->mthca_flags & MTHCA_FLAG_MSI) ? + 128 : dev->eq_table.inta_pin; + + err = mthca_create_eq(dev, dev->limits.num_cqs + MTHCA_NUM_SPARE_EQE, + (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 128 : intr, + &dev->eq_table.eq[MTHCA_EQ_COMP]); + if (err) + goto err_out_unmap; + + err = mthca_create_eq(dev, MTHCA_NUM_ASYNC_EQE + MTHCA_NUM_SPARE_EQE, + (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 129 : intr, + &dev->eq_table.eq[MTHCA_EQ_ASYNC]); + if (err) + goto err_out_comp; + + err = mthca_create_eq(dev, MTHCA_NUM_CMD_EQE + MTHCA_NUM_SPARE_EQE, + (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 130 : intr, + &dev->eq_table.eq[MTHCA_EQ_CMD]); + if (err) + goto err_out_async; + +#ifdef MSI_SUPPORT + if (dev->mthca_flags & MTHCA_FLAG_MSI_X) { + static const char *eq_name[] = { + [MTHCA_EQ_COMP] = DRV_NAME " (comp)", + [MTHCA_EQ_ASYNC] = DRV_NAME " (async)", + [MTHCA_EQ_CMD] = DRV_NAME " (cmd)" + }; + + for (i = 0; i < MTHCA_NUM_EQ; ++i) { + err = request_irq(dev->eq_table.eq[i].msi_x_vector, + mthca_is_memfree(dev) ? + mthca_arbel_msi_x_interrupt : + mthca_tavor_msi_x_interrupt, + 0, eq_name[i], dev->eq_table.eq + i); + if (err) + goto err_out_cmd; + dev->eq_table.eq[i].have_irq = 1; + /* init DPC stuff something like that */ + spin_lock_init( &dev->eq_table.eq[i].lock ); + dev->dpc_lock = 0; + KeInitializeDpc( + &dev->eq_table.eq[i].dpc, + mthca_is_memfree(dev) ? + mthca_arbel_msi_x_dpc : + mthca_tavor_msi_x_dpc, + dev->eq_table.eq + i); + } + } else +#endif + { + spin_lock_init( &dev->ext->isr_lock ); + err = request_irq( + &dev->ext->interruptInfo, + &dev->ext->isr_lock.lock , + mthca_is_memfree(dev) ? mthca_arbel_interrupt : mthca_tavor_interrupt, + dev, + &dev->ext->int_obj + ); + if (err) + goto err_out_cmd; + dev->eq_table.have_irq = 1; + + /* init DPC stuff */ + for (i = 0; i < MTHCA_NUM_EQ; ++i) { + spin_lock_init( &dev->eq_table.eq[i].lock ); + KeInitializeDpc( + &dev->eq_table.eq[i].dpc, + mthca_is_memfree(dev) ? + mthca_arbel_dpc : + mthca_tavor_dpc, + dev->eq_table.eq + i); + dev->eq_table.eq[i].eq_num = i; + } + } + + err = mthca_MAP_EQ(dev, async_mask(dev), + 0, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status); + if (err) + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_INIT,("MAP_EQ for async EQ %d failed (%d)\n", + dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, err)); + if (status) + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_INIT, ("MAP_EQ for async EQ %d returned status 0x%02x\n", + dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, status)); + err = mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK, + 0, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status); + if (err) + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_INIT, ("MAP_EQ for cmd EQ %d failed (%d)\n", + dev->eq_table.eq[MTHCA_EQ_CMD].eqn, err)); + if (status) + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_INIT,("MAP_EQ for cmd EQ %d returned status 0x%02x\n", + dev->eq_table.eq[MTHCA_EQ_CMD].eqn, status)); + + for (i = 0; i < MTHCA_NUM_EQ; ++i) + if (mthca_is_memfree(dev)) + arbel_eq_req_not(dev, dev->eq_table.eq[i].eqn_mask); + else + tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn); + + return 0; + +err_out_cmd: + mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_CMD]); + +err_out_async: + mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_ASYNC]); + +err_out_comp: + mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_COMP]); + +err_out_unmap: + mthca_unmap_eq_regs(dev); + +err_out_free: + mthca_alloc_cleanup(&dev->eq_table.alloc); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_INIT ,("mthca_init_eq failed %d\n",err)); + return err; +} + +void mthca_cleanup_eq_table(struct mthca_dev *dev) +{ + u8 status; + int i; + + mthca_free_irqs(dev); + + mthca_MAP_EQ(dev, async_mask(dev), + 1, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status); + mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK, + 1, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status); + + for (i = 0; i < MTHCA_NUM_EQ; ++i) + mthca_free_eq(dev, &dev->eq_table.eq[i]); + + mthca_unmap_eq_regs(dev); + + mthca_alloc_cleanup(&dev->eq_table.alloc); +} + + diff --git a/trunk/hw/mthca/kernel/mthca_provider.h b/trunk/hw/mthca/kernel/mthca_provider.h index 136f6cb9..b321a7e8 100644 --- a/trunk/hw/mthca/kernel/mthca_provider.h +++ b/trunk/hw/mthca/kernel/mthca_provider.h @@ -1,446 +1,447 @@ -/* - * Copyright (c) 2004 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Cisco Systems. All rights reserved. - * Copyright (c) 2005 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * $Id$ - */ - -#ifndef MTHCA_PROVIDER_H -#define MTHCA_PROVIDER_H - -#include -#include -#include - -typedef uint32_t mthca_mpt_access_t; -#define MTHCA_MPT_FLAG_ATOMIC (1 << 14) -#define MTHCA_MPT_FLAG_REMOTE_WRITE (1 << 13) -#define MTHCA_MPT_FLAG_REMOTE_READ (1 << 12) -#define MTHCA_MPT_FLAG_LOCAL_WRITE (1 << 11) -#define MTHCA_MPT_FLAG_LOCAL_READ (1 << 10) - -union mthca_buf { - struct scatterlist direct; - struct scatterlist *page_list; -}; - -struct mthca_uar { - PFN_NUMBER pfn; - int index; -}; - -struct mthca_user_db_table; - -struct mthca_ucontext { - struct ib_ucontext ibucontext; - struct mthca_uar uar; - struct mthca_user_db_table *db_tab; - // for user UAR - PMDL mdl; - PVOID kva; - SIZE_T uar_size; -}; - -struct mthca_mtt; - -struct mthca_mr { - //NB: the start of this structure is to be equal to mlnx_mro_t ! - //NB: the structure was not inserted here for not to mix driver and provider structures - struct ib_mr ibmr; - struct mthca_mtt *mtt; - int iobuf_used; - mt_iobuf_t iobuf; - void *secure_handle; -}; - -struct mthca_fmr { - struct ib_fmr ibmr; - struct ib_fmr_attr attr; - struct mthca_mtt *mtt; - int maps; - union { - struct { - struct mthca_mpt_entry __iomem *mpt; - u64 __iomem *mtts; - } tavor; - struct { - struct mthca_mpt_entry *mpt; - __be64 *mtts; - } arbel; - } mem; -}; - -struct mthca_pd { - struct ib_pd ibpd; - u32 pd_num; - atomic_t sqp_count; - struct mthca_mr ntmr; - int privileged; -}; - -struct mthca_eq { - struct mthca_dev *dev; - int eqn; - u32 eqn_mask; - u32 cons_index; - u16 msi_x_vector; - u16 msi_x_entry; - int have_irq; - int nent; - struct scatterlist *page_list; - struct mthca_mr mr; - KDPC dpc; /* DPC for MSI-X interrupts */ - spinlock_t lock; /* spinlock for simult DPCs */ -}; - -struct mthca_av; - -enum mthca_ah_type { - MTHCA_AH_ON_HCA, - MTHCA_AH_PCI_POOL, - MTHCA_AH_KMALLOC -}; - -struct mthca_ah { - struct ib_ah ibah; - enum mthca_ah_type type; - u32 key; - struct mthca_av *av; - dma_addr_t avdma; -}; - -/* - * Quick description of our CQ/QP locking scheme: - * - * We have one global lock that protects dev->cq/qp_table. Each - * struct mthca_cq/qp also has its own lock. An individual qp lock - * may be taken inside of an individual cq lock. Both cqs attached to - * a qp may be locked, with the send cq locked first. No other - * nesting should be done. - * - * Each struct mthca_cq/qp also has an atomic_t ref count. The - * pointer from the cq/qp_table to the struct counts as one reference. - * This reference also is good for access through the consumer API, so - * modifying the CQ/QP etc doesn't need to take another reference. - * Access because of a completion being polled does need a reference. - * - * Finally, each struct mthca_cq/qp has a wait_queue_head_t for the - * destroy function to sleep on. - * - * This means that access from the consumer API requires nothing but - * taking the struct's lock. - * - * Access because of a completion event should go as follows: - * - lock cq/qp_table and look up struct - * - increment ref count in struct - * - drop cq/qp_table lock - * - lock struct, do your thing, and unlock struct - * - decrement ref count; if zero, wake up waiters - * - * To destroy a CQ/QP, we can do the following: - * - lock cq/qp_table, remove pointer, unlock cq/qp_table lock - * - decrement ref count - * - wait_event until ref count is zero - * - * It is the consumer's responsibilty to make sure that no QP - * operations (WQE posting or state modification) are pending when the - * QP is destroyed. Also, the consumer must make sure that calls to - * qp_modify are serialized. - * - * Possible optimizations (wait for profile data to see if/where we - * have locks bouncing between CPUs): - * - split cq/qp table lock into n separate (cache-aligned) locks, - * indexed (say) by the page in the table - * - split QP struct lock into three (one for common info, one for the - * send queue and one for the receive queue) - */ -//TODO: check correctness of the above requirement: "It is the consumer's responsibilty to make sure that no QP -// operations (WQE posting or state modification) are pending when the QP is destroyed" - -struct mthca_cq { - struct ib_cq ibcq; - void *cq_context; // leo: for IBAL shim - spinlock_t lock; - atomic_t refcount; - int cqn; - u32 cons_index; - int is_direct; - int is_kernel; - - /* Next fields are Arbel only */ - int set_ci_db_index; - __be32 *set_ci_db; - int arm_db_index; - __be32 *arm_db; - int arm_sn; - int u_arm_db_index; - int *p_u_arm_sn; - - union mthca_buf queue; - struct mthca_mr mr; - wait_queue_head_t wait; - KMUTEX mutex; -}; - -struct mthca_srq { - struct ib_srq ibsrq; - spinlock_t lock; - atomic_t refcount; - int srqn; - int max; - int max_gs; - int wqe_shift; - int first_free; - int last_free; - u16 counter; /* Arbel only */ - int db_index; /* Arbel only */ - __be32 *db; /* Arbel only */ - void *last; - - int is_direct; - u64 *wrid; - union mthca_buf queue; - struct mthca_mr mr; - - wait_queue_head_t wait; - KMUTEX mutex; -}; - -struct mthca_wq { - spinlock_t lock; - int max; - unsigned next_ind; - unsigned last_comp; - unsigned head; - unsigned tail; - void *last; - int max_gs; - int wqe_shift; - - int db_index; /* Arbel only */ - __be32 *db; -}; - -struct mthca_qp { - struct ib_qp ibqp; - void *qp_context; // leo: for IBAL shim - //TODO: added just because absense of ibv_query_qp - // thereafter it may be worth to be replaced by struct ib_qp_attr qp_attr; - struct ib_qp_init_attr qp_init_attr; // leo: for query_qp - atomic_t refcount; - u32 qpn; - int is_direct; - u8 transport; - u8 state; - u8 atomic_rd_en; - u8 resp_depth; - - struct mthca_mr mr; - - struct mthca_wq rq; - struct mthca_wq sq; - enum ib_sig_type sq_policy; - int send_wqe_offset; - int max_inline_data; - - u64 *wrid; - union mthca_buf queue; - - wait_queue_head_t wait; - KMUTEX mutex; -}; - -struct mthca_sqp { - struct mthca_qp qp; - int port; - int pkey_index; - u32 qkey; - u32 send_psn; - struct ib_ud_header ud_header; - struct scatterlist sg; -}; - -static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext *ibucontext) -{ - return container_of(ibucontext, struct mthca_ucontext, ibucontext); -} - -static inline struct mthca_fmr *to_mfmr(struct ib_fmr *ibmr) -{ - return container_of(ibmr, struct mthca_fmr, ibmr); -} - -static inline struct mthca_mr *to_mmr(struct ib_mr *ibmr) -{ - return container_of(ibmr, struct mthca_mr, ibmr); -} - -static inline struct mthca_pd *to_mpd(struct ib_pd *ibpd) -{ - return container_of(ibpd, struct mthca_pd, ibpd); -} - -static inline struct mthca_ah *to_mah(struct ib_ah *ibah) -{ - return container_of(ibah, struct mthca_ah, ibah); -} - -static inline struct mthca_cq *to_mcq(struct ib_cq *ibcq) -{ - return container_of(ibcq, struct mthca_cq, ibcq); -} - -static inline struct mthca_srq *to_msrq(struct ib_srq *ibsrq) -{ - return container_of(ibsrq, struct mthca_srq, ibsrq); -} - -static inline struct mthca_qp *to_mqp(struct ib_qp *ibqp) -{ - return container_of(ibqp, struct mthca_qp, ibqp); -} - -static inline struct mthca_sqp *to_msqp(struct mthca_qp *qp) -{ - return container_of(qp, struct mthca_sqp, qp); -} - -static inline uint8_t start_port(struct ib_device *device) -{ - return device->node_type == IB_NODE_SWITCH ? 0 : 1; -} - -static inline uint8_t end_port(struct ib_device *device) -{ - return device->node_type == IB_NODE_SWITCH ? 0 : device->phys_port_cnt; -} - -static inline int ib_copy_from_umv_buf(void *dest, ci_umv_buf_t* const p_umv_buf, size_t len) -{ - RtlCopyMemory(dest, p_umv_buf->p_inout_buf, len); - return 0; -} - -static inline int ib_copy_to_umv_buf(ci_umv_buf_t* const p_umv_buf, void *src, size_t len) -{ - if (p_umv_buf->output_size < len) { - p_umv_buf->status = IB_INSUFFICIENT_MEMORY; - p_umv_buf->output_size = 0; - return -EFAULT; - } - RtlCopyMemory(p_umv_buf->p_inout_buf, src, len); - p_umv_buf->status = IB_SUCCESS; - p_umv_buf->output_size = (uint32_t)len; - return 0; -} - - - -// API -int mthca_query_device(struct ib_device *ibdev, - struct ib_device_attr *props); - -int mthca_query_port(struct ib_device *ibdev, - u8 port, struct ib_port_attr *props); - -int mthca_modify_port(struct ib_device *ibdev, - u8 port, int port_modify_mask, - struct ib_port_modify *props); - -int mthca_query_pkey_chunk(struct ib_device *ibdev, - u8 port, u16 index, u16 pkey[32]); - -int mthca_query_gid_chunk(struct ib_device *ibdev, u8 port, - int index, union ib_gid gid[8]); - -struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev, - ci_umv_buf_t* const p_umv_buf); - -int mthca_dealloc_ucontext(struct ib_ucontext *context); - -struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev, - struct ib_ucontext *context, - ci_umv_buf_t* const p_umv_buf); - -int mthca_dealloc_pd(struct ib_pd *pd); - -struct ib_ah *mthca_ah_create(struct ib_pd *pd, - struct ib_ah_attr *ah_attr); - -int mthca_ah_destroy(struct ib_ah *ah); - -struct ib_srq *mthca_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *init_attr, - ci_umv_buf_t* const p_umv_buf); - -int mthca_destroy_srq(struct ib_srq *srq); - -struct ib_qp *mthca_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, - ci_umv_buf_t* const p_umv_buf); - -int mthca_destroy_qp(struct ib_qp *qp); - -struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, - struct ib_ucontext *context, - ci_umv_buf_t* const p_umv_buf); - -int mthca_destroy_cq(struct ib_cq *cq); - -struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, mthca_qp_access_t acc); - -struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd, - struct ib_phys_buf *buffer_list, - int num_phys_buf, - mthca_qp_access_t acc, - u64 *iova_start); - -struct ib_mr *mthca_reg_virt_mr(struct ib_pd *pd, - void* __ptr64 vaddr, uint64_t length, uint64_t hca_va, - mthca_qp_access_t acc, boolean_t um_call); - -int mthca_dereg_mr(struct ib_mr *mr); - -struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, mthca_qp_access_t acc, - struct ib_fmr_attr *fmr_attr); - -int mthca_dealloc_fmr(struct ib_fmr *fmr); - -int mthca_unmap_fmr(struct list_head *fmr_list); - -int mthca_poll_cq_list( - IN struct ib_cq *ibcq, - IN OUT ib_wc_t** const pp_free_wclist, - OUT ib_wc_t** const pp_done_wclist ); - - -#endif /* MTHCA_PROVIDER_H */ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#ifndef MTHCA_PROVIDER_H +#define MTHCA_PROVIDER_H + +#include +#include +#include + +typedef uint32_t mthca_mpt_access_t; +#define MTHCA_MPT_FLAG_ATOMIC (1 << 14) +#define MTHCA_MPT_FLAG_REMOTE_WRITE (1 << 13) +#define MTHCA_MPT_FLAG_REMOTE_READ (1 << 12) +#define MTHCA_MPT_FLAG_LOCAL_WRITE (1 << 11) +#define MTHCA_MPT_FLAG_LOCAL_READ (1 << 10) + +union mthca_buf { + struct scatterlist direct; + struct scatterlist *page_list; +}; + +struct mthca_uar { + PFN_NUMBER pfn; + int index; +}; + +struct mthca_user_db_table; + +struct mthca_ucontext { + struct ib_ucontext ibucontext; + struct mthca_uar uar; + struct mthca_user_db_table *db_tab; + // for user UAR + PMDL mdl; + PVOID kva; + SIZE_T uar_size; +}; + +struct mthca_mtt; + +struct mthca_mr { + //NB: the start of this structure is to be equal to mlnx_mro_t ! + //NB: the structure was not inserted here for not to mix driver and provider structures + struct ib_mr ibmr; + struct mthca_mtt *mtt; + int iobuf_used; + mt_iobuf_t iobuf; + void *secure_handle; +}; + +struct mthca_fmr { + struct ib_fmr ibmr; + struct ib_fmr_attr attr; + struct mthca_mtt *mtt; + int maps; + union { + struct { + struct mthca_mpt_entry __iomem *mpt; + u64 __iomem *mtts; + } tavor; + struct { + struct mthca_mpt_entry *mpt; + __be64 *mtts; + } arbel; + } mem; +}; + +struct mthca_pd { + struct ib_pd ibpd; + u32 pd_num; + atomic_t sqp_count; + struct mthca_mr ntmr; + int privileged; +}; + +struct mthca_eq { + struct mthca_dev *dev; + int eqn; + int eq_num; + u32 eqn_mask; + u32 cons_index; + u16 msi_x_vector; + u16 msi_x_entry; + int have_irq; + int nent; + struct scatterlist *page_list; + struct mthca_mr mr; + KDPC dpc; /* DPC for MSI-X interrupts */ + spinlock_t lock; /* spinlock for simult DPCs */ +}; + +struct mthca_av; + +enum mthca_ah_type { + MTHCA_AH_ON_HCA, + MTHCA_AH_PCI_POOL, + MTHCA_AH_KMALLOC +}; + +struct mthca_ah { + struct ib_ah ibah; + enum mthca_ah_type type; + u32 key; + struct mthca_av *av; + dma_addr_t avdma; +}; + +/* + * Quick description of our CQ/QP locking scheme: + * + * We have one global lock that protects dev->cq/qp_table. Each + * struct mthca_cq/qp also has its own lock. An individual qp lock + * may be taken inside of an individual cq lock. Both cqs attached to + * a qp may be locked, with the send cq locked first. No other + * nesting should be done. + * + * Each struct mthca_cq/qp also has an atomic_t ref count. The + * pointer from the cq/qp_table to the struct counts as one reference. + * This reference also is good for access through the consumer API, so + * modifying the CQ/QP etc doesn't need to take another reference. + * Access because of a completion being polled does need a reference. + * + * Finally, each struct mthca_cq/qp has a wait_queue_head_t for the + * destroy function to sleep on. + * + * This means that access from the consumer API requires nothing but + * taking the struct's lock. + * + * Access because of a completion event should go as follows: + * - lock cq/qp_table and look up struct + * - increment ref count in struct + * - drop cq/qp_table lock + * - lock struct, do your thing, and unlock struct + * - decrement ref count; if zero, wake up waiters + * + * To destroy a CQ/QP, we can do the following: + * - lock cq/qp_table, remove pointer, unlock cq/qp_table lock + * - decrement ref count + * - wait_event until ref count is zero + * + * It is the consumer's responsibilty to make sure that no QP + * operations (WQE posting or state modification) are pending when the + * QP is destroyed. Also, the consumer must make sure that calls to + * qp_modify are serialized. + * + * Possible optimizations (wait for profile data to see if/where we + * have locks bouncing between CPUs): + * - split cq/qp table lock into n separate (cache-aligned) locks, + * indexed (say) by the page in the table + * - split QP struct lock into three (one for common info, one for the + * send queue and one for the receive queue) + */ +//TODO: check correctness of the above requirement: "It is the consumer's responsibilty to make sure that no QP +// operations (WQE posting or state modification) are pending when the QP is destroyed" + +struct mthca_cq { + struct ib_cq ibcq; + void *cq_context; // leo: for IBAL shim + spinlock_t lock; + atomic_t refcount; + int cqn; + u32 cons_index; + int is_direct; + int is_kernel; + + /* Next fields are Arbel only */ + int set_ci_db_index; + __be32 *set_ci_db; + int arm_db_index; + __be32 *arm_db; + int arm_sn; + int u_arm_db_index; + int *p_u_arm_sn; + + union mthca_buf queue; + struct mthca_mr mr; + wait_queue_head_t wait; + KMUTEX mutex; +}; + +struct mthca_srq { + struct ib_srq ibsrq; + spinlock_t lock; + atomic_t refcount; + int srqn; + int max; + int max_gs; + int wqe_shift; + int first_free; + int last_free; + u16 counter; /* Arbel only */ + int db_index; /* Arbel only */ + __be32 *db; /* Arbel only */ + void *last; + + int is_direct; + u64 *wrid; + union mthca_buf queue; + struct mthca_mr mr; + + wait_queue_head_t wait; + KMUTEX mutex; +}; + +struct mthca_wq { + spinlock_t lock; + int max; + unsigned next_ind; + unsigned last_comp; + unsigned head; + unsigned tail; + void *last; + int max_gs; + int wqe_shift; + + int db_index; /* Arbel only */ + __be32 *db; +}; + +struct mthca_qp { + struct ib_qp ibqp; + void *qp_context; // leo: for IBAL shim + //TODO: added just because absense of ibv_query_qp + // thereafter it may be worth to be replaced by struct ib_qp_attr qp_attr; + struct ib_qp_init_attr qp_init_attr; // leo: for query_qp + atomic_t refcount; + u32 qpn; + int is_direct; + u8 transport; + u8 state; + u8 atomic_rd_en; + u8 resp_depth; + + struct mthca_mr mr; + + struct mthca_wq rq; + struct mthca_wq sq; + enum ib_sig_type sq_policy; + int send_wqe_offset; + int max_inline_data; + + u64 *wrid; + union mthca_buf queue; + + wait_queue_head_t wait; + KMUTEX mutex; +}; + +struct mthca_sqp { + struct mthca_qp qp; + int port; + int pkey_index; + u32 qkey; + u32 send_psn; + struct ib_ud_header ud_header; + struct scatterlist sg; +}; + +static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct mthca_ucontext, ibucontext); +} + +static inline struct mthca_fmr *to_mfmr(struct ib_fmr *ibmr) +{ + return container_of(ibmr, struct mthca_fmr, ibmr); +} + +static inline struct mthca_mr *to_mmr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct mthca_mr, ibmr); +} + +static inline struct mthca_pd *to_mpd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct mthca_pd, ibpd); +} + +static inline struct mthca_ah *to_mah(struct ib_ah *ibah) +{ + return container_of(ibah, struct mthca_ah, ibah); +} + +static inline struct mthca_cq *to_mcq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct mthca_cq, ibcq); +} + +static inline struct mthca_srq *to_msrq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct mthca_srq, ibsrq); +} + +static inline struct mthca_qp *to_mqp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct mthca_qp, ibqp); +} + +static inline struct mthca_sqp *to_msqp(struct mthca_qp *qp) +{ + return container_of(qp, struct mthca_sqp, qp); +} + +static inline uint8_t start_port(struct ib_device *device) +{ + return device->node_type == IB_NODE_SWITCH ? 0 : 1; +} + +static inline uint8_t end_port(struct ib_device *device) +{ + return device->node_type == IB_NODE_SWITCH ? 0 : device->phys_port_cnt; +} + +static inline int ib_copy_from_umv_buf(void *dest, ci_umv_buf_t* const p_umv_buf, size_t len) +{ + RtlCopyMemory(dest, p_umv_buf->p_inout_buf, len); + return 0; +} + +static inline int ib_copy_to_umv_buf(ci_umv_buf_t* const p_umv_buf, void *src, size_t len) +{ + if (p_umv_buf->output_size < len) { + p_umv_buf->status = IB_INSUFFICIENT_MEMORY; + p_umv_buf->output_size = 0; + return -EFAULT; + } + RtlCopyMemory(p_umv_buf->p_inout_buf, src, len); + p_umv_buf->status = IB_SUCCESS; + p_umv_buf->output_size = (uint32_t)len; + return 0; +} + + + +// API +int mthca_query_device(struct ib_device *ibdev, + struct ib_device_attr *props); + +int mthca_query_port(struct ib_device *ibdev, + u8 port, struct ib_port_attr *props); + +int mthca_modify_port(struct ib_device *ibdev, + u8 port, int port_modify_mask, + struct ib_port_modify *props); + +int mthca_query_pkey_chunk(struct ib_device *ibdev, + u8 port, u16 index, u16 pkey[32]); + +int mthca_query_gid_chunk(struct ib_device *ibdev, u8 port, + int index, union ib_gid gid[8]); + +struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev, + ci_umv_buf_t* const p_umv_buf); + +int mthca_dealloc_ucontext(struct ib_ucontext *context); + +struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + ci_umv_buf_t* const p_umv_buf); + +int mthca_dealloc_pd(struct ib_pd *pd); + +struct ib_ah *mthca_ah_create(struct ib_pd *pd, + struct ib_ah_attr *ah_attr); + +int mthca_ah_destroy(struct ib_ah *ah); + +struct ib_srq *mthca_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + ci_umv_buf_t* const p_umv_buf); + +int mthca_destroy_srq(struct ib_srq *srq); + +struct ib_qp *mthca_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + ci_umv_buf_t* const p_umv_buf); + +int mthca_destroy_qp(struct ib_qp *qp); + +struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, + struct ib_ucontext *context, + ci_umv_buf_t* const p_umv_buf); + +int mthca_destroy_cq(struct ib_cq *cq); + +struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, mthca_qp_access_t acc); + +struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *buffer_list, + int num_phys_buf, + mthca_qp_access_t acc, + u64 *iova_start); + +struct ib_mr *mthca_reg_virt_mr(struct ib_pd *pd, + void* __ptr64 vaddr, uint64_t length, uint64_t hca_va, + mthca_qp_access_t acc, boolean_t um_call); + +int mthca_dereg_mr(struct ib_mr *mr); + +struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, mthca_qp_access_t acc, + struct ib_fmr_attr *fmr_attr); + +int mthca_dealloc_fmr(struct ib_fmr *fmr); + +int mthca_unmap_fmr(struct list_head *fmr_list); + +int mthca_poll_cq_list( + IN struct ib_cq *ibcq, + IN OUT ib_wc_t** const pp_free_wclist, + OUT ib_wc_t** const pp_done_wclist ); + + +#endif /* MTHCA_PROVIDER_H */ -- 2.41.0