]> git.openfabrics.org - ~ardavis/dapl.git/commitdiff
common: new debug levels for low system memory, IA stats, and package info
authorArlin Davis <arlin.r.davis@intel.com>
Tue, 8 Jul 2014 21:20:27 +0000 (14:20 -0700)
committerArlin Davis <arlin.r.davis@intel.com>
Tue, 8 Jul 2014 21:20:27 +0000 (14:20 -0700)
DAPL_DBG_TYPE_SYS_WARN = 0x800000
DAPL_DBG_TYPE_VER      = 0x1000000
DAPL_DBG_TYPE_IA_STATS = 0x2000000

export DAPL_DBG_SYS_MEM = 5 will set the checking for memory less than 5%
when DAPL_DBG_TYPE is set with bit DAPL_DBG_TYPE_SYS_WARN.

The package must be built with --enable-counters for memory checking and
IA stats capabilities.

In addition, if DAPL_DBG_TYPE is set with bit DAPL_DBG_TYPE_VER than
the package rev and build date will be sent to stdout during library
init.

Signed-off-by: Arlin Davis <arlin.r.davis@intel.com>
14 files changed:
Makefile.am
dapl/common/dapl_debug.c
dapl/common/dapl_ep_create.c
dapl/common/dapl_ep_free.c
dapl/common/dapl_ep_post_rdma_write.c
dapl/common/dapl_evd_connection_callb.c
dapl/common/dapl_evd_cq_async_error_callb.c
dapl/common/dapl_evd_dto_callb.c
dapl/common/dapl_evd_qp_async_error_callb.c
dapl/common/dapl_evd_un_async_error_callb.c
dapl/include/dapl_debug.h
dapl/udapl/dapl_init.c
dapl/udapl/dapl_lmr_create.c
dapl/udapl/linux/dapl_osd.h

index 47a9d9c4a8b6647392b8482308411ff938fc4cda..78c8fd082c0b8f650f5bf6076ac04d2d3e89023e 100755 (executable)
@@ -50,6 +50,8 @@ else
 AM_CFLAGS = -g -Wall -D_GNU_SOURCE -DDAT_CONF="\"$(sysconfdir)/dat.conf\""
 endif
 
+AM_CFLAGS += -DPACKAGE_DATE=$$(date +'%Y%m%d')
+
 sysconf_DATA = doc/dat.conf
 
 datlibdir = $(libdir)
index cb454964231c1474fb304fc0478a4674b143dc71..36adf245b5dfb93df513db677dcb508224ee7a34 100644 (file)
 #include <stdlib.h>
 #endif                         /* __KDAPL__ */
 
+DAPL_DBG_TYPE g_dapl_dbg_level;        /* debug type override */
 DAPL_DBG_TYPE g_dapl_dbg_type; /* initialized in dapl_init.c */
 DAPL_DBG_DEST g_dapl_dbg_dest; /* initialized in dapl_init.c */
+int           g_dapl_dbg_mem;  /* initialized in dapl_init.c */
 
 static char *_ptr_host_ = NULL;
 static char _hostname_[128];
@@ -50,7 +52,7 @@ void dapl_internal_dbg_log(DAPL_DBG_TYPE type, const char *fmt, ...)
                last_t = start_t;
        }
 
-       if (type & g_dapl_dbg_type) {
+       if ((type & g_dapl_dbg_type) || (type & g_dapl_dbg_level)) {
                if (DAPL_DBG_DEST_STDOUT & g_dapl_dbg_dest) {
                        dapl_os_get_time(&current_t);
                        delta_t = current_t - last_t;
@@ -384,6 +386,8 @@ void dapli_stop_counters(DAT_HANDLE dh)
        if (g_dapl_dbg_type & DAPL_DBG_TYPE_DIAG_ERRS)
                dapl_stop_diag_cntrs(dh);
 
+       if (g_dapl_dbg_type & DAPL_DBG_TYPE_IA_STATS)
+               dapl_print_counter_str(dh, DCNT_IA_ALL_COUNTERS, 1, "_IA");
        if (g_dapl_dbg_type & DAPL_DBG_TYPE_CM_STATS)
                dapl_print_counter_str(dh, DCNT_IA_ALL_COUNTERS, 1, "_CM");
        else if (g_dapl_dbg_type & DAPL_DBG_TYPE_CM_ERRS)
index c7dedde6912da96b68f31e94e0a41200307052f0..ec11c7c604d0c965ce784c244661255629e0ad97 100644 (file)
@@ -158,8 +158,7 @@ dapl_ep_create(IN DAT_IA_HANDLE ia_handle,
         */
        if (ep_attr != NULL && (
 #ifndef DAT_EXTENSIONS
-                                      ep_attr->service_type !=
-                                      DAT_SERVICE_TYPE_RC ||
+                                      ep_attr->service_type != DAT_SERVICE_TYPE_RC ||
 #endif
                                       (recv_evd_handle == DAT_HANDLE_NULL
                                        && ep_attr->max_recv_dtos != 0)
@@ -177,6 +176,7 @@ dapl_ep_create(IN DAT_IA_HANDLE ia_handle,
                                           dapl_ep_check_recv_completion_flags
                                           (ep_attr->recv_completion_flags)))) {
                dat_status = DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG6);
+               dapl_log(DAPL_DBG_TYPE_ERR, "dapl_ep_create: failed EP attributes\n");
                goto bail;
        }
 
@@ -194,12 +194,11 @@ dapl_ep_create(IN DAT_IA_HANDLE ia_handle,
                    ep_attr->max_request_dtos > ep_attr_limit.max_request_dtos
                    || ep_attr->max_recv_iov > ep_attr_limit.max_recv_iov
                    || ep_attr->max_request_iov > ep_attr_limit.max_request_iov
-                   || ep_attr->max_rdma_read_in >
-                   ep_attr_limit.max_rdma_read_in
-                   || ep_attr->max_rdma_read_out >
-                   ep_attr_limit.max_rdma_read_out)
+                   || ep_attr->max_rdma_read_in >  ep_attr_limit.max_rdma_read_in
+                   || ep_attr->max_rdma_read_out >  ep_attr_limit.max_rdma_read_out)
                {
                        dat_status = DAT_INVALID_PARAMETER | DAT_INVALID_ARG6;
+                       dapl_log(DAPL_DBG_TYPE_ERR, "dapl_ep_create: failed Transport attributes\n");
                        goto bail;
                }
        }
index a8deeb26bfe35fbc5607722920caee4b78b524b3..178cae6357d9e6fe7c6cf64ccc814f1680e8a97c 100644 (file)
@@ -132,7 +132,7 @@ DAT_RETURN DAT_API dapl_ep_free(IN DAT_EP_HANDLE ep_handle)
 #ifdef DAPL_DBG
        /* check if event pending and warn, don't assert, state is valid */
        if (ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECT_PENDING) {
-               dapl_dbg_log(DAPL_DBG_TYPE_WARN, " dat_ep_free WARNING: "
+               dapl_dbg_log(DAPL_DBG_TYPE_EP, " dat_ep_free WARNING: "
                             "EVENT PENDING on ep %p, disconnect "
                             "and wait before calling dat_ep_free\n", ep_ptr);
        }
index b8bea9770fa362f0ee2ff586d38d3c61e41a105e..784a0a374509d1d15da6328e3b0c3d5b459aef59 100644 (file)
@@ -93,7 +93,7 @@ dapl_ep_post_rdma_write(IN DAT_EP_HANDLE ep_handle,
                                           OP_RDMA_WRITE);
 
        dapl_dbg_log(DAPL_DBG_TYPE_RTN,
-                    "dapl_ep_post_rdma_write () returns 0x%x", dat_status);
+                    "dapl_ep_post_rdma_write () returns 0x%x\n", dat_status);
 
        return dat_status;
 }
index a28d8d6a8029bb6e2ac73776a06d6d4c990afcca..3c791033d8c11ddc5fb1c5e67ab7dcbebef540e6 100644 (file)
@@ -153,8 +153,8 @@ dapl_evd_connection_callback(IN dp_ib_cm_handle_t ib_cm_handle,
                                                        DAPL_MAX_PRIVATE_DATA_SIZE));
 
                        dapl_dbg_log(DAPL_DBG_TYPE_CM | DAPL_DBG_TYPE_CALLBACK,
-                                    "dapl_evd_connection_callback PEER REJ pd=%p sz=%d\n",
-                                    prd_ptr, private_data_size);
+                                    "dapl_evd_connection_callback PEER REJ ep=%p pd=%p sz=%d\n",
+                                    ep_ptr, prd_ptr, private_data_size);
                }
        case DAT_CONNECTION_EVENT_DISCONNECTED:
        case DAT_CONNECTION_EVENT_UNREACHABLE:
index 258f8f287af767b096cf12dffba7013e090eb810..2bd2a9308a992c98cf6704cbb01f6a48258d4fc5 100644 (file)
@@ -63,9 +63,9 @@ dapl_evd_cq_async_error_callback(IN ib_hca_handle_t ib_hca_handle,
        DAPL_EVD *evd;
        DAT_RETURN dat_status;
 
-       dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK | DAPL_DBG_TYPE_EXCEPTION,
-                    "dapl_evd_cq_async_error_callback (%p, %p, %p, %p)\n",
-                    ib_hca_handle, ib_cq_handle, cause_ptr, context);
+       dapl_log(DAPL_DBG_TYPE_WARN, " -- %s (%p, %p, %p, %p)\n",
+                __FUNCTION__, ib_hca_handle, ib_cq_handle,
+                cause_ptr, context);
 
        if (NULL == context) {
                dapl_os_panic("NULL == context\n");
index 2e8d70e8d7053f4e41b3a24b5cf6f91f7dae5abe..143b7c98bae4ea0b77bd741c8f1fe4e4315b0965 100755 (executable)
@@ -130,8 +130,10 @@ dapl_evd_dto_callback(IN ib_hca_handle_t hca_handle,
                        dat_status = dapls_ib_completion_notify(hca_handle,
                                                                evd_ptr,
                                                                IB_NOTIFY_ON_NEXT_COMP);
-
                        if (DAT_SUCCESS != dat_status) {
+                               dapl_log(DAPL_DBG_TYPE_WARN, " -- %s notify ERR, CQ %p, st %x\n",
+                                        __FUNCTION__, (void *)evd_ptr->ib_cq_handle, state);
+
                                (void)dapls_evd_post_async_error_event(evd_ptr->
                                                                       header.
                                                                       owner_ia->
index a9ea4ffb455a2a7351d055951781a0dc5b38a5d7..bc05d0f1a33b094fb94256d7b945ca60ae637018 100644 (file)
@@ -115,10 +115,9 @@ dapl_evd_qp_async_error_callback(IN ib_hca_handle_t ib_hca_handle,
        async_evd = (DAPL_EVD *) ia_ptr->async_error_evd;
        DAPL_CNTR(ia_ptr, DCNT_IA_ASYNC_QP_ERROR);
 
-       dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK | DAPL_DBG_TYPE_EXCEPTION,
-                    "--> %s: ep %p qp %p (%x) state %d\n", __FUNCTION__,
-                    ep_ptr,
-                    ep_ptr->qp_handle, ep_ptr->qpn, ep_ptr->param.ep_state);
+       dapl_log(DAPL_DBG_TYPE_WARN, " -- %s: ep %p qp %p (%x) state %d\n",
+                __FUNCTION__,  ep_ptr, ep_ptr->qp_handle,
+                ep_ptr->qpn, ep_ptr->param.ep_state);
 
        /*
         * Transition to ERROR if we are connected; other states need to
index 8b3f1bb6813d2e24d6c51b29b0f7bd1092aa2bea..d6884c3f6f2a00a4c6a1789b6423ea2b4a928bd3 100644 (file)
@@ -63,10 +63,6 @@ dapl_evd_un_async_error_callback(IN ib_hca_handle_t ib_hca_handle,
        DAT_EVENT_NUMBER async_event;
        DAT_RETURN dat_status;
 
-       dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK | DAPL_DBG_TYPE_EXCEPTION,
-                    "dapl_evd_un_async_error_callback (%p, %p, %p)\n",
-                    ib_hca_handle, cause_ptr, context);
-
        if (NULL == context) {
                dapl_os_panic("NULL == context\n");
                return;
@@ -87,6 +83,4 @@ dapl_evd_un_async_error_callback(IN ib_hca_handle_t ib_hca_handle,
                                                 async_event,
                                                 async_evd->header.owner_ia);
        }
-       dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK | DAPL_DBG_TYPE_EXCEPTION,
-                    "dapl_evd_un_async_error_callback () returns\n");
 }
index 6cbe0284df7895de2d86b9ef7f49b0aa59d36fa1..ccc06fcb597858f7fae3c8426672431f98f3a460 100644 (file)
@@ -76,6 +76,9 @@ typedef enum
     DAPL_DBG_TYPE_LINK_ERRS    = 0x100000,
     DAPL_DBG_TYPE_LINK_WARN    = 0x200000,
     DAPL_DBG_TYPE_DIAG_ERRS    = 0x400000,
+    DAPL_DBG_TYPE_SYS_WARN     = 0x800000,
+    DAPL_DBG_TYPE_VER          = 0x1000000,
+    DAPL_DBG_TYPE_IA_STATS     = 0x2000000,
 
 } DAPL_DBG_TYPE;
 
@@ -85,15 +88,17 @@ typedef enum
     DAPL_DBG_DEST_SYSLOG       = 0x0002,
 } DAPL_DBG_DEST;
 
+extern DAPL_DBG_TYPE   g_dapl_dbg_level;
 extern DAPL_DBG_TYPE   g_dapl_dbg_type;
 extern DAPL_DBG_DEST   g_dapl_dbg_dest;
+extern int             g_dapl_dbg_mem;
 
 extern void dapl_internal_dbg_log(DAPL_DBG_TYPE type,  const char *fmt,  ...);
 
-#define dapl_log g_dapl_dbg_type==0 ? (void) 1 : dapl_internal_dbg_log
+#define dapl_log !g_dapl_dbg_type && !g_dapl_dbg_level ? (void) 1 : dapl_internal_dbg_log
 
 #if defined(DAPL_DBG)
-#define dapl_dbg_log g_dapl_dbg_type==0 ? (void) 1 : dapl_internal_dbg_log
+#define dapl_dbg_log !g_dapl_dbg_type && !g_dapl_dbg_level ? (void) 1 : dapl_internal_dbg_log
 #else
 #define dapl_dbg_log(...)
 #endif
index 07e5b3126864c3bce4125d00c9a3fc6c345681ec..9ccbaf5d214d233d06b10aaf6029b3500f6f96ae 100644 (file)
@@ -47,6 +47,8 @@
 #include "dapl_name_service.h"
 #include "dapl_timer_util.h"
 #include "dapl_vendor.h"
+#include "config.h"
+
 
 /*
  * dapl_init
@@ -72,16 +74,24 @@ void dapl_init(void)
        /* set up debug type */
        g_dapl_dbg_type = dapl_os_get_env_val("DAPL_DBG_TYPE",
                                              DAPL_DBG_TYPE_ERR | DAPL_DBG_TYPE_WARN);
+
+       g_dapl_dbg_level = dapl_os_get_env_val("DAPL_DBG_LEVEL", 0);
+
        /* set up debug destination */
        g_dapl_dbg_dest = dapl_os_get_env_val("DAPL_DBG_DEST",
                                              DAPL_DBG_DEST_STDOUT);
 
+       g_dapl_dbg_mem = dapl_os_get_env_val("DAPL_DBG_SYS_MEM", 5);
+
        /* open log file on first logging call if necessary */
        if (g_dapl_dbg_dest & DAPL_DBG_DEST_SYSLOG)
                openlog("libdapl", LOG_ODELAY | LOG_PID | LOG_CONS, LOG_USER);
 
-       dapl_log(DAPL_DBG_TYPE_UTIL, "dapl_init: dbg_type=0x%x,dbg_dest=0x%x\n",
-                g_dapl_dbg_type, g_dapl_dbg_dest);
+       dapl_log(DAPL_DBG_TYPE_UTIL, "dapl_init: dbg_type=%x,dbg_dest=%x,dbg_level=%x\n",
+                g_dapl_dbg_type, g_dapl_dbg_dest, g_dapl_dbg_level);
+
+       dapl_log(DAPL_DBG_TYPE_VER, " %s dapl-%s-%u,  DAPL_DBG_TYPE 0x%x\n",
+                PROVIDER_NAME, VERSION, PACKAGE_DATE, g_dapl_dbg_type);
 
        /* See if the user is on a loopback setup */
        g_dapl_loopback_connection = dapl_os_get_env_bool("DAPL_LOOPBACK");
index f1d50164a9cc50ee58a2b523eff6204a64e5a440..b93aced11fab39809e51a3dd5576a70e3fbcb274 100644 (file)
@@ -108,6 +108,8 @@ dapli_lmr_create_virtual(IN DAPL_IA * ia,
        reg_desc.for_va = virt_addr;
        dat_status = DAT_SUCCESS;
 
+       dapl_dbg_log(DAPL_DBG_TYPE_API, "dapl_lmr_create (ia=%p,pz=%p)\n", ia, pz);
+
        lmr = dapl_lmr_alloc(ia,
                             DAT_MEM_TYPE_VIRTUAL,
                             reg_desc, length, (DAT_PZ_HANDLE) pz, privileges);
@@ -122,7 +124,9 @@ dapli_lmr_create_virtual(IN DAPL_IA * ia,
                                          lmr,
                                          virt_addr,
                                          length, privileges, va_type);
-
+#ifdef DAPL_COUNTERS
+       dapl_os_memchk(g_dapl_dbg_mem, "create_mr");
+#endif
        if (DAT_SUCCESS != dat_status) {
                dapl_lmr_dealloc(lmr);
                goto bail;
index 04124612afaf6648f6e7bef6caee1ad6c76ee264..1c6f9db8718368918dd1138a090e52d3ad9cc9bc 100644 (file)
@@ -90,6 +90,7 @@
 #include <signal.h>
 #include <netinet/tcp.h>
 #include <sys/utsname.h>
+#include <sys/sysinfo.h>
 #include <fcntl.h>
 
 #if !defined(SUSE_11) && !defined(REDHAT_EL5) && defined(__ia64__)
@@ -434,6 +435,8 @@ dapl_os_wait_object_destroy (
  * Memory Functions
  */
 
+STATIC _INLINE_ void dapl_os_memchk(int percent, const char *txt);
+
 /* function prototypes */
 STATIC _INLINE_ void *dapl_os_alloc (int size);
 
@@ -448,10 +451,29 @@ STATIC _INLINE_ void * dapl_os_memcpy (void *dest, const void *src, int len);
 STATIC _INLINE_ int dapl_os_memcmp (const void *mem1, const void *mem2, int len);
 
 /* memory functions */
-
+STATIC _INLINE_ void dapl_os_memchk(int percent, const char *txt)
+{
+       struct sysinfo si;
+       double mfree, threshold = (double)percent/100;
+
+       sysinfo(&si);
+       mfree = si.freeram + si.sharedram + si.bufferram;
+
+       if (mfree/(double)si.totalram < threshold) {
+               dapl_log(DAPL_DBG_TYPE_SYS_WARN,
+                        " WARNING: LOW MEMORY: %s (Free %d Share %d, Bufs %d)"
+                        " < %2.2f percent of total (%d MB) memory\n",
+                        txt, si.freeram/(1024*1024), si.sharedram/(1024*1024),
+                        si.bufferram/(1024*1024), si.totalram/(1024*1024),
+                        (mfree/(double)si.totalram)*100);
+       }
+}
 
 STATIC _INLINE_ void *dapl_os_alloc (int size)
 {
+#ifdef DAPL_COUNTERS
+    dapl_os_memchk(g_dapl_dbg_mem, "malloc");
+#endif
     return malloc (size);
 }