From: Arlin Davis Date: Tue, 8 Jul 2014 21:20:27 +0000 (-0700) Subject: common: new debug levels for low system memory, IA stats, and package info X-Git-Tag: dapl-2.1.0-1~21 X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=e01796baa6b0f6cf0e1fb310d2c42d3446898c24;p=~ardavis%2Fdapl.git common: new debug levels for low system memory, IA stats, and package info DAPL_DBG_TYPE_SYS_WARN = 0x800000 DAPL_DBG_TYPE_VER = 0x1000000 DAPL_DBG_TYPE_IA_STATS = 0x2000000 export DAPL_DBG_SYS_MEM = 5 will set the checking for memory less than 5% when DAPL_DBG_TYPE is set with bit DAPL_DBG_TYPE_SYS_WARN. The package must be built with --enable-counters for memory checking and IA stats capabilities. In addition, if DAPL_DBG_TYPE is set with bit DAPL_DBG_TYPE_VER than the package rev and build date will be sent to stdout during library init. Signed-off-by: Arlin Davis --- diff --git a/Makefile.am b/Makefile.am index 47a9d9c..78c8fd0 100755 --- a/Makefile.am +++ b/Makefile.am @@ -50,6 +50,8 @@ else AM_CFLAGS = -g -Wall -D_GNU_SOURCE -DDAT_CONF="\"$(sysconfdir)/dat.conf\"" endif +AM_CFLAGS += -DPACKAGE_DATE=$$(date +'%Y%m%d') + sysconf_DATA = doc/dat.conf datlibdir = $(libdir) diff --git a/dapl/common/dapl_debug.c b/dapl/common/dapl_debug.c index cb45496..36adf24 100644 --- a/dapl/common/dapl_debug.c +++ b/dapl/common/dapl_debug.c @@ -31,8 +31,10 @@ #include #endif /* __KDAPL__ */ +DAPL_DBG_TYPE g_dapl_dbg_level; /* debug type override */ DAPL_DBG_TYPE g_dapl_dbg_type; /* initialized in dapl_init.c */ DAPL_DBG_DEST g_dapl_dbg_dest; /* initialized in dapl_init.c */ +int g_dapl_dbg_mem; /* initialized in dapl_init.c */ static char *_ptr_host_ = NULL; static char _hostname_[128]; @@ -50,7 +52,7 @@ void dapl_internal_dbg_log(DAPL_DBG_TYPE type, const char *fmt, ...) last_t = start_t; } - if (type & g_dapl_dbg_type) { + if ((type & g_dapl_dbg_type) || (type & g_dapl_dbg_level)) { if (DAPL_DBG_DEST_STDOUT & g_dapl_dbg_dest) { dapl_os_get_time(¤t_t); delta_t = current_t - last_t; @@ -384,6 +386,8 @@ void dapli_stop_counters(DAT_HANDLE dh) if (g_dapl_dbg_type & DAPL_DBG_TYPE_DIAG_ERRS) dapl_stop_diag_cntrs(dh); + if (g_dapl_dbg_type & DAPL_DBG_TYPE_IA_STATS) + dapl_print_counter_str(dh, DCNT_IA_ALL_COUNTERS, 1, "_IA"); if (g_dapl_dbg_type & DAPL_DBG_TYPE_CM_STATS) dapl_print_counter_str(dh, DCNT_IA_ALL_COUNTERS, 1, "_CM"); else if (g_dapl_dbg_type & DAPL_DBG_TYPE_CM_ERRS) diff --git a/dapl/common/dapl_ep_create.c b/dapl/common/dapl_ep_create.c index c7dedde..ec11c7c 100644 --- a/dapl/common/dapl_ep_create.c +++ b/dapl/common/dapl_ep_create.c @@ -158,8 +158,7 @@ dapl_ep_create(IN DAT_IA_HANDLE ia_handle, */ if (ep_attr != NULL && ( #ifndef DAT_EXTENSIONS - ep_attr->service_type != - DAT_SERVICE_TYPE_RC || + ep_attr->service_type != DAT_SERVICE_TYPE_RC || #endif (recv_evd_handle == DAT_HANDLE_NULL && ep_attr->max_recv_dtos != 0) @@ -177,6 +176,7 @@ dapl_ep_create(IN DAT_IA_HANDLE ia_handle, dapl_ep_check_recv_completion_flags (ep_attr->recv_completion_flags)))) { dat_status = DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG6); + dapl_log(DAPL_DBG_TYPE_ERR, "dapl_ep_create: failed EP attributes\n"); goto bail; } @@ -194,12 +194,11 @@ dapl_ep_create(IN DAT_IA_HANDLE ia_handle, ep_attr->max_request_dtos > ep_attr_limit.max_request_dtos || ep_attr->max_recv_iov > ep_attr_limit.max_recv_iov || ep_attr->max_request_iov > ep_attr_limit.max_request_iov - || ep_attr->max_rdma_read_in > - ep_attr_limit.max_rdma_read_in - || ep_attr->max_rdma_read_out > - ep_attr_limit.max_rdma_read_out) + || ep_attr->max_rdma_read_in > ep_attr_limit.max_rdma_read_in + || ep_attr->max_rdma_read_out > ep_attr_limit.max_rdma_read_out) { dat_status = DAT_INVALID_PARAMETER | DAT_INVALID_ARG6; + dapl_log(DAPL_DBG_TYPE_ERR, "dapl_ep_create: failed Transport attributes\n"); goto bail; } } diff --git a/dapl/common/dapl_ep_free.c b/dapl/common/dapl_ep_free.c index a8deeb2..178cae6 100644 --- a/dapl/common/dapl_ep_free.c +++ b/dapl/common/dapl_ep_free.c @@ -132,7 +132,7 @@ DAT_RETURN DAT_API dapl_ep_free(IN DAT_EP_HANDLE ep_handle) #ifdef DAPL_DBG /* check if event pending and warn, don't assert, state is valid */ if (ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECT_PENDING) { - dapl_dbg_log(DAPL_DBG_TYPE_WARN, " dat_ep_free WARNING: " + dapl_dbg_log(DAPL_DBG_TYPE_EP, " dat_ep_free WARNING: " "EVENT PENDING on ep %p, disconnect " "and wait before calling dat_ep_free\n", ep_ptr); } diff --git a/dapl/common/dapl_ep_post_rdma_write.c b/dapl/common/dapl_ep_post_rdma_write.c index b8bea97..784a0a3 100644 --- a/dapl/common/dapl_ep_post_rdma_write.c +++ b/dapl/common/dapl_ep_post_rdma_write.c @@ -93,7 +93,7 @@ dapl_ep_post_rdma_write(IN DAT_EP_HANDLE ep_handle, OP_RDMA_WRITE); dapl_dbg_log(DAPL_DBG_TYPE_RTN, - "dapl_ep_post_rdma_write () returns 0x%x", dat_status); + "dapl_ep_post_rdma_write () returns 0x%x\n", dat_status); return dat_status; } diff --git a/dapl/common/dapl_evd_connection_callb.c b/dapl/common/dapl_evd_connection_callb.c index a28d8d6..3c79103 100644 --- a/dapl/common/dapl_evd_connection_callb.c +++ b/dapl/common/dapl_evd_connection_callb.c @@ -153,8 +153,8 @@ dapl_evd_connection_callback(IN dp_ib_cm_handle_t ib_cm_handle, DAPL_MAX_PRIVATE_DATA_SIZE)); dapl_dbg_log(DAPL_DBG_TYPE_CM | DAPL_DBG_TYPE_CALLBACK, - "dapl_evd_connection_callback PEER REJ pd=%p sz=%d\n", - prd_ptr, private_data_size); + "dapl_evd_connection_callback PEER REJ ep=%p pd=%p sz=%d\n", + ep_ptr, prd_ptr, private_data_size); } case DAT_CONNECTION_EVENT_DISCONNECTED: case DAT_CONNECTION_EVENT_UNREACHABLE: diff --git a/dapl/common/dapl_evd_cq_async_error_callb.c b/dapl/common/dapl_evd_cq_async_error_callb.c index 258f8f2..2bd2a93 100644 --- a/dapl/common/dapl_evd_cq_async_error_callb.c +++ b/dapl/common/dapl_evd_cq_async_error_callb.c @@ -63,9 +63,9 @@ dapl_evd_cq_async_error_callback(IN ib_hca_handle_t ib_hca_handle, DAPL_EVD *evd; DAT_RETURN dat_status; - dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK | DAPL_DBG_TYPE_EXCEPTION, - "dapl_evd_cq_async_error_callback (%p, %p, %p, %p)\n", - ib_hca_handle, ib_cq_handle, cause_ptr, context); + dapl_log(DAPL_DBG_TYPE_WARN, " -- %s (%p, %p, %p, %p)\n", + __FUNCTION__, ib_hca_handle, ib_cq_handle, + cause_ptr, context); if (NULL == context) { dapl_os_panic("NULL == context\n"); diff --git a/dapl/common/dapl_evd_dto_callb.c b/dapl/common/dapl_evd_dto_callb.c index 2e8d70e..143b7c9 100755 --- a/dapl/common/dapl_evd_dto_callb.c +++ b/dapl/common/dapl_evd_dto_callb.c @@ -130,8 +130,10 @@ dapl_evd_dto_callback(IN ib_hca_handle_t hca_handle, dat_status = dapls_ib_completion_notify(hca_handle, evd_ptr, IB_NOTIFY_ON_NEXT_COMP); - if (DAT_SUCCESS != dat_status) { + dapl_log(DAPL_DBG_TYPE_WARN, " -- %s notify ERR, CQ %p, st %x\n", + __FUNCTION__, (void *)evd_ptr->ib_cq_handle, state); + (void)dapls_evd_post_async_error_event(evd_ptr-> header. owner_ia-> diff --git a/dapl/common/dapl_evd_qp_async_error_callb.c b/dapl/common/dapl_evd_qp_async_error_callb.c index a9ea4ff..bc05d0f 100644 --- a/dapl/common/dapl_evd_qp_async_error_callb.c +++ b/dapl/common/dapl_evd_qp_async_error_callb.c @@ -115,10 +115,9 @@ dapl_evd_qp_async_error_callback(IN ib_hca_handle_t ib_hca_handle, async_evd = (DAPL_EVD *) ia_ptr->async_error_evd; DAPL_CNTR(ia_ptr, DCNT_IA_ASYNC_QP_ERROR); - dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK | DAPL_DBG_TYPE_EXCEPTION, - "--> %s: ep %p qp %p (%x) state %d\n", __FUNCTION__, - ep_ptr, - ep_ptr->qp_handle, ep_ptr->qpn, ep_ptr->param.ep_state); + dapl_log(DAPL_DBG_TYPE_WARN, " -- %s: ep %p qp %p (%x) state %d\n", + __FUNCTION__, ep_ptr, ep_ptr->qp_handle, + ep_ptr->qpn, ep_ptr->param.ep_state); /* * Transition to ERROR if we are connected; other states need to diff --git a/dapl/common/dapl_evd_un_async_error_callb.c b/dapl/common/dapl_evd_un_async_error_callb.c index 8b3f1bb..d6884c3 100644 --- a/dapl/common/dapl_evd_un_async_error_callb.c +++ b/dapl/common/dapl_evd_un_async_error_callb.c @@ -63,10 +63,6 @@ dapl_evd_un_async_error_callback(IN ib_hca_handle_t ib_hca_handle, DAT_EVENT_NUMBER async_event; DAT_RETURN dat_status; - dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK | DAPL_DBG_TYPE_EXCEPTION, - "dapl_evd_un_async_error_callback (%p, %p, %p)\n", - ib_hca_handle, cause_ptr, context); - if (NULL == context) { dapl_os_panic("NULL == context\n"); return; @@ -87,6 +83,4 @@ dapl_evd_un_async_error_callback(IN ib_hca_handle_t ib_hca_handle, async_event, async_evd->header.owner_ia); } - dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK | DAPL_DBG_TYPE_EXCEPTION, - "dapl_evd_un_async_error_callback () returns\n"); } diff --git a/dapl/include/dapl_debug.h b/dapl/include/dapl_debug.h index 6cbe028..ccc06fc 100644 --- a/dapl/include/dapl_debug.h +++ b/dapl/include/dapl_debug.h @@ -76,6 +76,9 @@ typedef enum DAPL_DBG_TYPE_LINK_ERRS = 0x100000, DAPL_DBG_TYPE_LINK_WARN = 0x200000, DAPL_DBG_TYPE_DIAG_ERRS = 0x400000, + DAPL_DBG_TYPE_SYS_WARN = 0x800000, + DAPL_DBG_TYPE_VER = 0x1000000, + DAPL_DBG_TYPE_IA_STATS = 0x2000000, } DAPL_DBG_TYPE; @@ -85,15 +88,17 @@ typedef enum DAPL_DBG_DEST_SYSLOG = 0x0002, } DAPL_DBG_DEST; +extern DAPL_DBG_TYPE g_dapl_dbg_level; extern DAPL_DBG_TYPE g_dapl_dbg_type; extern DAPL_DBG_DEST g_dapl_dbg_dest; +extern int g_dapl_dbg_mem; extern void dapl_internal_dbg_log(DAPL_DBG_TYPE type, const char *fmt, ...); -#define dapl_log g_dapl_dbg_type==0 ? (void) 1 : dapl_internal_dbg_log +#define dapl_log !g_dapl_dbg_type && !g_dapl_dbg_level ? (void) 1 : dapl_internal_dbg_log #if defined(DAPL_DBG) -#define dapl_dbg_log g_dapl_dbg_type==0 ? (void) 1 : dapl_internal_dbg_log +#define dapl_dbg_log !g_dapl_dbg_type && !g_dapl_dbg_level ? (void) 1 : dapl_internal_dbg_log #else #define dapl_dbg_log(...) #endif diff --git a/dapl/udapl/dapl_init.c b/dapl/udapl/dapl_init.c index 07e5b31..9ccbaf5 100644 --- a/dapl/udapl/dapl_init.c +++ b/dapl/udapl/dapl_init.c @@ -47,6 +47,8 @@ #include "dapl_name_service.h" #include "dapl_timer_util.h" #include "dapl_vendor.h" +#include "config.h" + /* * dapl_init @@ -72,16 +74,24 @@ void dapl_init(void) /* set up debug type */ g_dapl_dbg_type = dapl_os_get_env_val("DAPL_DBG_TYPE", DAPL_DBG_TYPE_ERR | DAPL_DBG_TYPE_WARN); + + g_dapl_dbg_level = dapl_os_get_env_val("DAPL_DBG_LEVEL", 0); + /* set up debug destination */ g_dapl_dbg_dest = dapl_os_get_env_val("DAPL_DBG_DEST", DAPL_DBG_DEST_STDOUT); + g_dapl_dbg_mem = dapl_os_get_env_val("DAPL_DBG_SYS_MEM", 5); + /* open log file on first logging call if necessary */ if (g_dapl_dbg_dest & DAPL_DBG_DEST_SYSLOG) openlog("libdapl", LOG_ODELAY | LOG_PID | LOG_CONS, LOG_USER); - dapl_log(DAPL_DBG_TYPE_UTIL, "dapl_init: dbg_type=0x%x,dbg_dest=0x%x\n", - g_dapl_dbg_type, g_dapl_dbg_dest); + dapl_log(DAPL_DBG_TYPE_UTIL, "dapl_init: dbg_type=%x,dbg_dest=%x,dbg_level=%x\n", + g_dapl_dbg_type, g_dapl_dbg_dest, g_dapl_dbg_level); + + dapl_log(DAPL_DBG_TYPE_VER, " %s dapl-%s-%u, DAPL_DBG_TYPE 0x%x\n", + PROVIDER_NAME, VERSION, PACKAGE_DATE, g_dapl_dbg_type); /* See if the user is on a loopback setup */ g_dapl_loopback_connection = dapl_os_get_env_bool("DAPL_LOOPBACK"); diff --git a/dapl/udapl/dapl_lmr_create.c b/dapl/udapl/dapl_lmr_create.c index f1d5016..b93aced 100644 --- a/dapl/udapl/dapl_lmr_create.c +++ b/dapl/udapl/dapl_lmr_create.c @@ -108,6 +108,8 @@ dapli_lmr_create_virtual(IN DAPL_IA * ia, reg_desc.for_va = virt_addr; dat_status = DAT_SUCCESS; + dapl_dbg_log(DAPL_DBG_TYPE_API, "dapl_lmr_create (ia=%p,pz=%p)\n", ia, pz); + lmr = dapl_lmr_alloc(ia, DAT_MEM_TYPE_VIRTUAL, reg_desc, length, (DAT_PZ_HANDLE) pz, privileges); @@ -122,7 +124,9 @@ dapli_lmr_create_virtual(IN DAPL_IA * ia, lmr, virt_addr, length, privileges, va_type); - +#ifdef DAPL_COUNTERS + dapl_os_memchk(g_dapl_dbg_mem, "create_mr"); +#endif if (DAT_SUCCESS != dat_status) { dapl_lmr_dealloc(lmr); goto bail; diff --git a/dapl/udapl/linux/dapl_osd.h b/dapl/udapl/linux/dapl_osd.h index 0412461..1c6f9db 100644 --- a/dapl/udapl/linux/dapl_osd.h +++ b/dapl/udapl/linux/dapl_osd.h @@ -90,6 +90,7 @@ #include #include #include +#include #include #if !defined(SUSE_11) && !defined(REDHAT_EL5) && defined(__ia64__) @@ -434,6 +435,8 @@ dapl_os_wait_object_destroy ( * Memory Functions */ +STATIC _INLINE_ void dapl_os_memchk(int percent, const char *txt); + /* function prototypes */ STATIC _INLINE_ void *dapl_os_alloc (int size); @@ -448,10 +451,29 @@ STATIC _INLINE_ void * dapl_os_memcpy (void *dest, const void *src, int len); STATIC _INLINE_ int dapl_os_memcmp (const void *mem1, const void *mem2, int len); /* memory functions */ - +STATIC _INLINE_ void dapl_os_memchk(int percent, const char *txt) +{ + struct sysinfo si; + double mfree, threshold = (double)percent/100; + + sysinfo(&si); + mfree = si.freeram + si.sharedram + si.bufferram; + + if (mfree/(double)si.totalram < threshold) { + dapl_log(DAPL_DBG_TYPE_SYS_WARN, + " WARNING: LOW MEMORY: %s (Free %d Share %d, Bufs %d)" + " < %2.2f percent of total (%d MB) memory\n", + txt, si.freeram/(1024*1024), si.sharedram/(1024*1024), + si.bufferram/(1024*1024), si.totalram/(1024*1024), + (mfree/(double)si.totalram)*100); + } +} STATIC _INLINE_ void *dapl_os_alloc (int size) { +#ifdef DAPL_COUNTERS + dapl_os_memchk(g_dapl_dbg_mem, "malloc"); +#endif return malloc (size); }