From 6da7ad7b5240e58619c38538cae14f274924c131 Mon Sep 17 00:00:00 2001 From: leonidk Date: Wed, 27 Jun 2007 08:49:10 +0000 Subject: [PATCH] [MTHCA] added auto-trimming for memory registration cash git-svn-id: svn://openib.tc.cornell.edu/gen1@722 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86 --- trunk/hw/mthca/kernel/hca_pnp.c | 1 + trunk/hw/mthca/kernel/hca_verbs.c | 2 + trunk/hw/mthca/kernel/mt_pa_cash.c | 260 +++++++++++++++++++++++------ trunk/hw/mthca/kernel/mt_pa_cash.h | 10 +- 4 files changed, 216 insertions(+), 57 deletions(-) diff --git a/trunk/hw/mthca/kernel/hca_pnp.c b/trunk/hw/mthca/kernel/hca_pnp.c index 4d9bf574..fa1c5b71 100644 --- a/trunk/hw/mthca/kernel/hca_pnp.c +++ b/trunk/hw/mthca/kernel/hca_pnp.c @@ -1143,6 +1143,7 @@ hca_query_remove( { hca_dev_ext_t*p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; if (atomic_read(&p_ext->usecnt)) { + DbgPrint( "MTHCA: Can't get unloaded. %d applications are still in work\n", p_ext->usecnt); p_irp->IoStatus.Status = STATUS_UNSUCCESSFUL; return cl_irp_complete( p_dev_obj, p_irp, p_action ); } diff --git a/trunk/hw/mthca/kernel/hca_verbs.c b/trunk/hw/mthca/kernel/hca_verbs.c index 5d8f74d6..aa6c5ab2 100644 --- a/trunk/hw/mthca/kernel/hca_verbs.c +++ b/trunk/hw/mthca/kernel/hca_verbs.c @@ -41,6 +41,7 @@ #include "mthca_dev.h" #include "ib_cache.h" #include "mx_abi.h" +#include "mt_pa_cash.h" #define PTR_ALIGN(size) (((size) + sizeof(void*) - 1) & ~(sizeof(void*) - 1)) @@ -526,6 +527,7 @@ done: cl_free( h_um_ca ); else ibv_um_close(p_ucontext); + pa_cash_print(); return; } diff --git a/trunk/hw/mthca/kernel/mt_pa_cash.c b/trunk/hw/mthca/kernel/mt_pa_cash.c index 0a86cf60..3b0f27bc 100644 --- a/trunk/hw/mthca/kernel/mt_pa_cash.c +++ b/trunk/hw/mthca/kernel/mt_pa_cash.c @@ -34,79 +34,143 @@ */ #include "mt_pa_cash.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mt_pa_cash.tmh" +#endif + +/////////////////////////////////////////////////////////////////////////// +// +// RESTRICTIONS +// +/////////////////////////////////////////////////////////////////////////// #ifdef _WIN64 -#define MAX_PAGES_SUPPORTED (64 * 1024 * 1024) +#define MAX_PAGES_SUPPORTED (64 * 1024 * 1024) // 256 GB #else -#define MAX_PAGES_SUPPORTED (16 * 1024 * 1024) +#define MAX_PAGES_SUPPORTED (16 * 1024 * 1024) // 64 GB #endif -typedef struct { - int ref_cnt; -} pa_table_entry_t; +#define FREE_LIST_TRESHOLD 256 // max number of pages in free list + +/////////////////////////////////////////////////////////////////////////// +// +// CONSTANTS +// +/////////////////////////////////////////////////////////////////////////// #define PA_TABLE_ENTRY_SIZE sizeof(pa_table_entry_t) #define PA_TABLE_ENTRY_NUM (PAGE_SIZE / PA_TABLE_ENTRY_SIZE) #define PA_TABLE_SIZE (PA_TABLE_ENTRY_SIZE * PA_TABLE_ENTRY_NUM) -#define PA_DIR_ENTRY_SIZE sizeof(void*) +#define PA_DIR_ENTRY_SIZE sizeof(pa_dir_entry_t) #define PA_DIR_ENTRY_NUM (MAX_PAGES_SUPPORTED /PA_TABLE_ENTRY_NUM) #define PA_DIR_SIZE (PA_DIR_ENTRY_SIZE * PA_DIR_ENTRY_NUM) -struct pa_cash_s { - pa_table_entry_t **pa_dir; + +/////////////////////////////////////////////////////////////////////////// +// +// STRUCTURES +// +/////////////////////////////////////////////////////////////////////////// + +typedef struct { + int ref_cnt; +} pa_table_entry_t; + +typedef struct { + pa_table_entry_t *pa_te; /* pointer to one page of pa_table_entry_t elements */ + int used; /* number of pa_table_entry_t elements, used now. When 0 - the page may be freed */ +} pa_dir_entry_t; + +typedef struct pa_cash_s { + pa_dir_entry_t *pa_dir; + SINGLE_LIST_ENTRY free_list_hdr; + uint32_t free_nr_pages; + uint32_t free_list_threshold; uint32_t max_nr_pages; -} g_cash = { NULL, 0 }; + uint32_t cur_nr_pages; +} pa_cash_t; + + + +/////////////////////////////////////////////////////////////////////////// +// +// GLOBALS +// +/////////////////////////////////////////////////////////////////////////// KMUTEX g_pa_mutex; u64 g_pa[1024]; +pa_cash_t g_cash; -int pa_cash_init() + +/////////////////////////////////////////////////////////////////////////// +// +// STATIC FUNCTIONS +// +/////////////////////////////////////////////////////////////////////////// + +static uint32_t __calc_threshold() { - void *pa_dir; - pa_dir = kzalloc(PA_DIR_SIZE, GFP_KERNEL); + // threshold expresses the max length of free pages list, which gets released only at driver unload time + // so it can be calculated to be proportional to the system memory size + return FREE_LIST_TRESHOLD; +} - if (!pa_dir) - return -ENOMEM; - g_cash.pa_dir = pa_dir; - g_cash.max_nr_pages = PA_TABLE_ENTRY_NUM * PA_DIR_ENTRY_NUM; - KeInitializeMutex(&g_pa_mutex, 0); - return 0; +static pa_table_entry_t *__alloc_page() +{ + pa_table_entry_t *pa_te; + + /* take from the list of reserved if it is not empty */ + if (g_cash.free_nr_pages) { + pa_te = (pa_table_entry_t *)PopEntryList( &g_cash.free_list_hdr ); + ((SINGLE_LIST_ENTRY*)pa_te)->Next = NULL; + g_cash.free_nr_pages--; + } + else /* allocate new page */ + pa_te = (pa_table_entry_t *)kzalloc( PA_TABLE_SIZE, GFP_KERNEL ); + + return pa_te; } -void pa_cash_release() +static void __free_page(pa_table_entry_t *pa_te) { - int i; - /* free cash tables */ - for (i=0; i> PAGE_SHIFT); - pa_table_entry_t *pa_te; + pa_table_entry_t *pa_te = g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te; - /* or pa is incorrect or memory that big is not supported */ - if (ix > g_cash.max_nr_pages) { - ASSERT(FALSE); - return -EFAULT; + /* no this page_table - add a new one */ + if (!pa_te) { + pa_te = __alloc_page(); + if (!pa_te) + return NULL; + g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te = pa_te; + g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used = 0; + g_cash.cur_nr_pages++; } - - pa_te = g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM]; - /* no this page_table */ - if (!pa_te) - return 0; + return pa_te; +} - return pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt; +static void __put_page(uint32_t ix) +{ + __free_page(g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te); + g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te = NULL; + g_cash.cur_nr_pages--; } -int __add_pa(uint64_t pa) +static int __add_pa(uint64_t pa) { uint32_t ix = (uint32_t)(pa >> PAGE_SHIFT); pa_table_entry_t *pa_te; @@ -116,23 +180,22 @@ int __add_pa(uint64_t pa) ASSERT(FALSE); return -EFAULT; } - - pa_te = g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM]; - - /* no this page_table - add a new one */ - if (!pa_te) { - pa_te = kzalloc(PA_DIR_SIZE, GFP_KERNEL); - if (!pa_te) - return -ENOMEM; - g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM] = pa_te; - } + /* get page address */ + pa_te = __get_page(ix); + if (!pa_te) + return -ENOMEM; + /* register page address */ + if (!pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt) + ++g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used; ++pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt; + return 0; } -int __rmv_pa(uint64_t pa) + +static int __rmv_pa(uint64_t pa) { uint32_t ix = (uint32_t)(pa >> PAGE_SHIFT); pa_table_entry_t *pa_te; @@ -143,7 +206,7 @@ int __rmv_pa(uint64_t pa) return -EFAULT; } - pa_te = g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM]; + pa_te = g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te; /* no this page_table - error*/ if (!pa_te) { @@ -154,9 +217,25 @@ int __rmv_pa(uint64_t pa) /* deregister page address */ --pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt; ASSERT(pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt >= 0); + + /* release the page on need */ + if (!pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt) + --g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used; + if (!g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used) + __put_page(ix); + return 0; } + + +/////////////////////////////////////////////////////////////////////////// +// +// PUBLIC FUNCTIONS +// +/////////////////////////////////////////////////////////////////////////// + + int pa_register(mt_iobuf_t *iobuf_p) { int i,j,n; @@ -208,3 +287,78 @@ void pa_deregister(mt_iobuf_t *iobuf_p) } } +void pa_cash_print() +{ + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW, + ("pa_cash_print: max_nr_pages %d (%#x), cur_nr_pages %d (%#x), free_list_hdr %d, free_threshold %d\n", + g_cash.max_nr_pages, g_cash.max_nr_pages, + g_cash.cur_nr_pages, g_cash.cur_nr_pages, + g_cash.free_nr_pages, g_cash.free_list_threshold )); +} + + +void pa_cash_release() +{ + int i; + + pa_cash_print(); + + if (!g_cash.pa_dir) + return; + + /* free cash tables */ + for (i=0; i> PAGE_SHIFT); + pa_table_entry_t *pa_te; + + /* or pa is incorrect or memory that big is not supported */ + if (ix > g_cash.max_nr_pages) { + ASSERT(FALSE); + return -EFAULT; + } + + pa_te = g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te; + + /* no this page_table */ + if (!pa_te) + return 0; + + return pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt; +} + +int pa_cash_init() +{ + void *pa_dir; + pa_dir = kzalloc(PA_DIR_SIZE, GFP_KERNEL); + + if (!pa_dir) + return -ENOMEM; + g_cash.pa_dir = pa_dir; + g_cash.max_nr_pages = PA_TABLE_ENTRY_NUM * PA_DIR_ENTRY_NUM; + g_cash.free_list_hdr.Next = NULL; + g_cash.cur_nr_pages = 0; + g_cash.free_nr_pages = 0; + g_cash.free_list_threshold = __calc_threshold(); + KeInitializeMutex(&g_pa_mutex, 0); + return 0; +} + diff --git a/trunk/hw/mthca/kernel/mt_pa_cash.h b/trunk/hw/mthca/kernel/mt_pa_cash.h index ca2c0435..4ca6eb57 100644 --- a/trunk/hw/mthca/kernel/mt_pa_cash.h +++ b/trunk/hw/mthca/kernel/mt_pa_cash.h @@ -36,14 +36,16 @@ #include "mthca_dev.h" extern KMUTEX g_pa_mutex; + int pa_cash_init(); + void pa_cash_release(); -int pa_is_registerable( - IN void* __ptr64 vaddr, - IN uint64_t length, - IN OUT mthca_qp_access_t *acc); int pa_is_registered(uint64_t pa); + int pa_register(mt_iobuf_t *iobuf_p); + void pa_deregister(mt_iobuf_t *iobuf_p); +void pa_cash_print(); + -- 2.41.0