From 6ffb5d3ecdfd40c405e99f04dfba58e24db9966c Mon Sep 17 00:00:00 2001 From: Nichamon Naksinehaboon Date: Fri, 7 Jul 2023 03:37:01 -0500 Subject: [PATCH] [FIXUP] gpu metrics --- .../sampler/gpu_metrics_sampler/gmg_log.c | 7 +-- .../sampler/gpu_metrics_sampler/gmg_log.h | 6 +-- .../gpu_metrics_ldms_sampler.c | 43 ++++++++++--------- 3 files changed, 29 insertions(+), 27 deletions(-) diff --git a/ldms/src/contrib/sampler/gpu_metrics_sampler/gmg_log.c b/ldms/src/contrib/sampler/gpu_metrics_sampler/gmg_log.c index ef0c1e1f0..778f4b81d 100644 --- a/ldms/src/contrib/sampler/gpu_metrics_sampler/gmg_log.c +++ b/ldms/src/contrib/sampler/gpu_metrics_sampler/gmg_log.c @@ -51,11 +51,12 @@ #include "gmg_log.h" -extern ovis_log_t mylog; +extern ovis_log_t __gmg_log; ovis_log_t setGmgLoggingFunction( - const ovis_log_t _mylog) { - ovis_log_t oldPf = mylog = _mylog; + const ovis_log_t pi_log) { + ovis_log_t oldPf = __gmg_log; GMGLOG(OVIS_LDEBUG, "Updated msglog\n"); + __gmg_log = pi_log; return oldPf; } diff --git a/ldms/src/contrib/sampler/gpu_metrics_sampler/gmg_log.h b/ldms/src/contrib/sampler/gpu_metrics_sampler/gmg_log.h index 1fa6fa73b..86f17aac7 100644 --- a/ldms/src/contrib/sampler/gpu_metrics_sampler/gmg_log.h +++ b/ldms/src/contrib/sampler/gpu_metrics_sampler/gmg_log.h @@ -59,18 +59,18 @@ #include "ovis_log/ovis_log.h" #include "ldmsd.h" // contains log function prototype; return type of log function is void. -extern ovis_log_t mylog; +ovis_log_t __gmg_log; /** * The following are provided for convenience, since msglog is fully accessible. */ // GMGLOG() is only used in gather_gpu_metrics_from_one_api.c and gmg_ldms_util.c. #define GMGLOG(LEVEL, FMT, ...) do { \ - ovis_log(mylog, (LEVEL), (FMT), ##__VA_ARGS__); \ + ovis_log(__gmg_log, (LEVEL), (FMT), ##__VA_ARGS__); \ } while (0) ovis_log_t setGmgLoggingFunction( - const ovis_log_t mylog // ovis_log_t is already a pointer type + const ovis_log_t pi_log // ovis_log_t is already a pointer type ); #endif // _GMG_LOG_H_ diff --git a/ldms/src/contrib/sampler/gpu_metrics_sampler/gpu_metrics_ldms_sampler.c b/ldms/src/contrib/sampler/gpu_metrics_sampler/gpu_metrics_ldms_sampler.c index 698805344..d7fc276d4 100644 --- a/ldms/src/contrib/sampler/gpu_metrics_sampler/gpu_metrics_ldms_sampler.c +++ b/ldms/src/contrib/sampler/gpu_metrics_sampler/gpu_metrics_ldms_sampler.c @@ -69,7 +69,7 @@ #include "gmg_ldms_util.h" #include "gather_gpu_metrics_from_one_api.h" -static ovis_log_t mylog; +static ovis_log_t __gpu_metrics_log; static uint32_t g_numberOfDevicesInSchema = 0; @@ -104,13 +104,13 @@ void free_base() { ze_driver_handle_t getGpuDriver() { ze_result_t res = initializeOneApi(); // only slow the first time it is called for each process if (res != ZE_RESULT_SUCCESS) { - ovis_log(mylog, OVIS_LERROR, "!!!initializeOneApi() => 0x%x\n", res); + ovis_log(__gpu_metrics_log, OVIS_LERROR, "!!!initializeOneApi() => 0x%x\n", res); return NULL; } ze_driver_handle_t hDriver = getDriver(); if (hDriver == NULL) { - ovis_log(mylog, OVIS_LERROR, "!!!getDriver() => NULL\n"); + ovis_log(__gpu_metrics_log, OVIS_LERROR, "!!!getDriver() => NULL\n"); return NULL; } @@ -130,14 +130,14 @@ static int create_metric_set_schema_and_set(base_data_t base) { ze_driver_handle_t hDriver = getGpuDriver(); if (hDriver == NULL) { - ovis_log(mylog, OVIS_LERROR, "!!!getGpuDriver() => NULL\n"); + ovis_log(__gpu_metrics_log, OVIS_LERROR, "!!!getGpuDriver() => NULL\n"); goto err; } uint32_t numDevices = 0; ze_device_handle_t *phDevices = enumerateGpuDevices(hDriver, &numDevices); if (phDevices == NULL) { - ovis_log(mylog, OVIS_LERROR, "!!!enumerateGpuDevices(&numDevices=%p) => NULL, %d\n", &numDevices, numDevices); + ovis_log(__gpu_metrics_log, OVIS_LERROR, "!!!enumerateGpuDevices(&numDevices=%p) => NULL, %d\n", &numDevices, numDevices); goto err; } freeZeDeviceHandle(phDevices); @@ -148,7 +148,7 @@ static int create_metric_set_schema_and_set(base_data_t base) { schema = base_schema_new(base); if (!schema) { - ovis_log(mylog, OVIS_LERROR, + ovis_log(__gpu_metrics_log, OVIS_LERROR, "!!!%s: The schema '%s' could not be created, errno=%d.\n", __FILE__, base->schema_name, errno); rc = errno; @@ -184,7 +184,7 @@ static int create_metric_set_schema_and_set(base_data_t base) { static void printValList(const char *szListName, struct attr_value_list *av_list) { size_t listSize = MIN(av_list->count, av_list->size); for (size_t i = 0; i < listSize; i++) { - ovis_log(mylog, OVIS_LDEBUG, "%s[%d] = %s:%s\n", + ovis_log(__gpu_metrics_log, OVIS_LDEBUG, "%s[%d] = %s:%s\n", szListName, i, av_name(av_list, i), av_value_at_idx(av_list, i)); } } @@ -201,7 +201,7 @@ static int config_check(struct attr_value_list *keyword_list, struct attr_value_ for (i = 0; i < (sizeof(deprecated) / sizeof(deprecated[0])); i++) { value = av_value(attribute_value_list, deprecated[i]); if (value) { - ovis_log(mylog, OVIS_LERROR, SAMP ": !!!config argument %s has been deprecated.\n", + ovis_log(__gpu_metrics_log, OVIS_LERROR, SAMP ": !!!config argument %s has been deprecated.\n", deprecated[i]); return EINVAL; } @@ -234,7 +234,7 @@ static int config(struct ldmsd_plugin *self, if (getSimulationMode() == true) { // Log this ERROR so that it appears in /opt/clmgr/log/ldms_sampler.log - ovis_log(mylog, OVIS_LERROR, "Simulation mode is ON\n"); // no really an error so don't prefix with '!!!' + ovis_log(__gpu_metrics_log, OVIS_LERROR, "Simulation mode is ON\n"); // no really an error so don't prefix with '!!!' } printValList("keyword_list", keyword_list); @@ -243,7 +243,7 @@ static int config(struct ldmsd_plugin *self, int rc; if (set) { - ovis_log(mylog, OVIS_LERROR, SAMP ": !!!Set already created.\n"); + ovis_log(__gpu_metrics_log, OVIS_LERROR, SAMP ": !!!Set already created.\n"); return EINVAL; } @@ -254,7 +254,7 @@ static int config(struct ldmsd_plugin *self, // Create an instance from the base "class". This is effectively calling // the base class constructor. - base = base_config(attribute_value_list, SAMP, SAMP, mylog); + base = base_config(attribute_value_list, SAMP, SAMP, __gpu_metrics_log); if (!base) { rc = errno; goto err; @@ -264,7 +264,7 @@ static int config(struct ldmsd_plugin *self, // is considered well-defined after the metric set schema is defined. rc = create_metric_set_schema_and_set(base); if (rc) { - ovis_log(mylog, OVIS_LERROR, SAMP ": !!!failed to create a metric set.\n"); + ovis_log(__gpu_metrics_log, OVIS_LERROR, SAMP ": !!!failed to create a metric set.\n"); goto err; } @@ -292,20 +292,20 @@ static ldms_set_t get_set(struct ldmsd_sampler *self) { */ static int sample(struct ldmsd_sampler *self) { if (!set) { - ovis_log(mylog, OVIS_LDEBUG, SAMP ": plugin not initialized\n"); + ovis_log(__gpu_metrics_log, OVIS_LDEBUG, SAMP ": plugin not initialized\n"); return EINVAL; } ze_driver_handle_t hDriver = getGpuDriver(); if (hDriver == NULL) { - ovis_log(mylog, OVIS_LERROR, "!!!getGpuDriver() => NULL\n"); + ovis_log(__gpu_metrics_log, OVIS_LERROR, "!!!getGpuDriver() => NULL\n"); return EINVAL; } uint32_t numDevices = 0; ze_device_handle_t *phDevices = enumerateGpuDevices(hDriver, &numDevices); if (phDevices == NULL) { - ovis_log(mylog, OVIS_LERROR, "!!!enumerateGpuDevices(&numDevices=%p) => NULL, %d\n", &numDevices, numDevices); + ovis_log(__gpu_metrics_log, OVIS_LERROR, "!!!enumerateGpuDevices(&numDevices=%p) => NULL, %d\n", &numDevices, numDevices); return EINVAL; } uint32_t numDevicesToSample = MIN(g_numberOfDevicesInSchema, numDevices); // cannot sample more than schema size @@ -316,7 +316,7 @@ static int sample(struct ldmsd_sampler *self) { size_t mallocCount = getMallocCount(); if (mallocCount != 1) { // Only allocated memory is the device handler array. - ovis_log(mylog, OVIS_LERROR, SAMP ": !!!mallocCount=%ld != 1\n", mallocCount); + ovis_log(__gpu_metrics_log, OVIS_LERROR, SAMP ": !!!mallocCount=%ld != 1\n", mallocCount); } freeZeDeviceHandle(phDevices); @@ -335,14 +335,14 @@ static void term(struct ldmsd_plugin *self) { size_t mallocCount = getMallocCount(); if (mallocCount) { // This following log message is never printed; maybe term was never called. - ovis_log(mylog, OVIS_LERROR, SAMP ": !!!mallocCount=%ld != 0\n", mallocCount); + ovis_log(__gpu_metrics_log, OVIS_LERROR, SAMP ": !!!mallocCount=%ld != 0\n", mallocCount); } free_base(); free_set(); free_schema(); - if (mylog) - ovis_log_destroy(mylog); + if (__gpu_metrics_log) + ovis_log_destroy(__gpu_metrics_log); } /** @@ -367,11 +367,12 @@ static struct ldmsd_sampler gpu_metrics_plugin = { * @return plugin instance. */ struct ldmsd_plugin *get_plugin() { - mylog = ovis_log_register("sampler."SAMP, "Messages for the " SAMP " plugin"); - if (!mylog) { + __gpu_metrics_log = ovis_log_register("sampler."SAMP, "Messages for the " SAMP " plugin"); + if (!__gpu_metrics_log) { ovis_log(NULL, OVIS_LWARN, "Failed to create the " SAMP " plugin's " "log subsystem. Error %d.\n", errno); } + setGmgLoggingFunction(__gpu_metrics_log); set = NULL; return &gpu_metrics_plugin.base; }