diff --git a/dlio_profiler/logger.py b/dlio_profiler/logger.py index ca648038..109b87cf 100644 --- a/dlio_profiler/logger.py +++ b/dlio_profiler/logger.py @@ -1,8 +1,11 @@ from functools import wraps from typing import Dict - -import dlio_profiler_py as profiler import os +DLIO_PROFILER_ENABLED = True if os.getenv("DLIO_PROFILER_ENABLED", '1') == '1' else False + +if DLIO_PROFILER_ENABLED: + import dlio_profiler_py as profiler + from pathlib import Path import inspect @@ -11,8 +14,9 @@ class dlio_logger: __instance = None def __init__(self, logfile): - self.logfile = logfile - self.logger = None + if DLIO_PROFILER_ENABLED: + self.logfile = logfile + self.logger = None dlio_logger.__instance = self @classmethod @@ -29,153 +33,176 @@ def get_instance(cls, log_file=None): def initialize_log(logfile, data_dir, process_id): log_file = Path(logfile) instance = dlio_logger.get_instance(log_file) - os.makedirs(log_file.parent, exist_ok=True) - if os.path.isfile(log_file): - os.remove(log_file) - instance.logger = profiler - instance.logger.initialize(f"{instance.logfile}", f"{data_dir}", process_id=process_id) + if DLIO_PROFILER_ENABLED: + os.makedirs(log_file.parent, exist_ok=True) + if os.path.isfile(log_file): + os.remove(log_file) + instance.logger = profiler + instance.logger.initialize(f"{instance.logfile}", f"{data_dir}", process_id=process_id) def get_time(self): - return self.logger.get_time() + if DLIO_PROFILER_ENABLED: + return self.logger.get_time() + return 0 def log_event(self, name, cat, start_time, duration, int_args=None): - if int_args is None: - int_args = {} - self.logger.log_event(name=name, cat=cat, start_time=start_time, duration=duration, int_args=int_args) + if DLIO_PROFILER_ENABLED: + if int_args is None: + int_args = {} + self.logger.log_event(name=name, cat=cat, start_time=start_time, duration=duration, int_args=int_args) def finalize(self): - self.logger.finalize() + if DLIO_PROFILER_ENABLED: + self.logger.finalize() class fn_interceptor(object): def __init__(self, cat, name=None, epoch=None, step=None, image_idx=None, image_size=None): - if not name: - name = inspect.stack()[1].function - self._name = name - self._cat = cat - self._arguments: Dict[str, int] = {} - if epoch is not None: self._arguments["epoch"] = epoch - if step is not None: self._arguments["step"] = step - if image_idx is not None: self._arguments["image_idx"] = image_idx - if image_size is not None: self._arguments["image_size"] = image_size - self.reset() + if DLIO_PROFILER_ENABLED: + if not name: + name = inspect.stack()[1].function + self._name = name + self._cat = cat + self._arguments: Dict[str, int] = {} + if epoch is not None: self._arguments["epoch"] = epoch + if step is not None: self._arguments["step"] = step + if image_idx is not None: self._arguments["image_idx"] = image_idx + if image_size is not None: self._arguments["image_size"] = image_size + self.reset() def __enter__(self): - self._t1 = dlio_logger.get_instance().get_time() + if DLIO_PROFILER_ENABLED: + self._t1 = dlio_logger.get_instance().get_time() return self def update(self, epoch=None, step=None, image_idx=None, image_size=None): - - if epoch is not None: self._arguments["epoch"] = epoch - if step is not None: self._arguments["step"] = step - if image_idx is not None: self._arguments["image_idx"] = image_idx - if image_size is not None: self._arguments["image_size"] = image_size + if DLIO_PROFILER_ENABLED: + if epoch is not None: self._arguments["epoch"] = epoch + if step is not None: self._arguments["step"] = step + if image_idx is not None: self._arguments["image_idx"] = image_idx + if image_size is not None: self._arguments["image_size"] = image_size return self def flush(self): - self._t2 = dlio_logger.get_instance().get_time() - if len(self._arguments) > 0: - dlio_logger.get_instance().log_event(name=self._name, cat=self._cat, start_time=self._t1, - duration=self._t2 - self._t1, - int_args=self._arguments) - else: - dlio_logger.get_instance().log_event(name=self._name, cat=self._cat, start_time=self._t1, - duration=self._t2 - self._t1) - self._flush = True + if DLIO_PROFILER_ENABLED: + self._t2 = dlio_logger.get_instance().get_time() + if len(self._arguments) > 0: + dlio_logger.get_instance().log_event(name=self._name, cat=self._cat, start_time=self._t1, + duration=self._t2 - self._t1, + int_args=self._arguments) + else: + dlio_logger.get_instance().log_event(name=self._name, cat=self._cat, start_time=self._t1, + duration=self._t2 - self._t1) + self._flush = True return self def reset(self): - self._t1 = dlio_logger.get_instance().get_time() - self._t2 = self._t1 - self._flush = False + if DLIO_PROFILER_ENABLED: + self._t1 = dlio_logger.get_instance().get_time() + self._t2 = self._t1 + self._flush = False return self def __exit__(self, exc_type, exc_val, exc_tb): - if not self._flush: - self.flush() + if DLIO_PROFILER_ENABLED: + if not self._flush: + self.flush() def log(self, func): - arg_names = inspect.getfullargspec(func)[0] + if DLIO_PROFILER_ENABLED: + arg_names = inspect.getfullargspec(func)[0] @wraps(func) def wrapper(*args, **kwargs): - if "self" == arg_names[0]: - if hasattr(args[0], "epoch"): - self._arguments["epoch"] = args[0].epoch - if hasattr(args[0], "step"): - self._arguments["step"] = args[0].step - if hasattr(args[0], "image_size"): - self._arguments["image_size"] = args[0].image_size - if hasattr(args[0], "image_idx"): - self._arguments["image_idx"] = args[0].image_idx - for name, value in zip(arg_names[1:], kwargs): - if hasattr(args, name): - setattr(args, name, value) - if name == "epoch": - self._arguments["epoch"] = int(value) - elif name == "image_idx": - self._arguments["image_idx"] = int(value) - elif name == "image_size": - self._arguments["image_size"] = int(value) - elif name == "step": - self._arguments["image_size"] = int(value) - - start = dlio_logger.get_instance().get_time() + if DLIO_PROFILER_ENABLED: + if "self" == arg_names[0]: + if hasattr(args[0], "epoch"): + self._arguments["epoch"] = args[0].epoch + if hasattr(args[0], "step"): + self._arguments["step"] = args[0].step + if hasattr(args[0], "image_size"): + self._arguments["image_size"] = args[0].image_size + if hasattr(args[0], "image_idx"): + self._arguments["image_idx"] = args[0].image_idx + for name, value in zip(arg_names[1:], kwargs): + if hasattr(args, name): + setattr(args, name, value) + if name == "epoch": + self._arguments["epoch"] = int(value) + elif name == "image_idx": + self._arguments["image_idx"] = int(value) + elif name == "image_size": + self._arguments["image_size"] = int(value) + elif name == "step": + self._arguments["image_size"] = int(value) + + start = dlio_logger.get_instance().get_time() x = func(*args, **kwargs) - end = dlio_logger.get_instance().get_time() - if len(self._arguments) > 0: - dlio_logger.get_instance().log_event(name=func.__qualname__, cat=self._cat, start_time=start, - duration=end - start, - int_args=self._arguments) - else: - dlio_logger.get_instance().log_event(name=func.__qualname__, cat=self._cat, start_time=start, - duration=end - start) + if DLIO_PROFILER_ENABLED: + end = dlio_logger.get_instance().get_time() + if len(self._arguments) > 0: + dlio_logger.get_instance().log_event(name=func.__qualname__, cat=self._cat, start_time=start, + duration=end - start, + int_args=self._arguments) + else: + dlio_logger.get_instance().log_event(name=func.__qualname__, cat=self._cat, start_time=start, + duration=end - start) return x return wrapper def iter(self, func, iter_name="step"): - self._arguments[iter_name] = 1 - name = f"{inspect.stack()[1].function}.iter" - kernal_name = f"{inspect.stack()[1].function}" - start = dlio_logger.get_instance().get_time() + if DLIO_PROFILER_ENABLED: + self._arguments[iter_name] = 1 + name = f"{inspect.stack()[1].function}.iter" + kernal_name = f"{inspect.stack()[1].function}" + start = dlio_logger.get_instance().get_time() for v in func: - end = dlio_logger.get_instance().get_time() - t0 = dlio_logger.get_instance().get_time() + if DLIO_PROFILER_ENABLED: + end = dlio_logger.get_instance().get_time() + t0 = dlio_logger.get_instance().get_time() yield v - t1 = dlio_logger.get_instance().get_time() - - if len(self._arguments) > 0: - dlio_logger.get_instance().log_event(name=name, cat=self._cat, start_time=start, duration=end - start, - int_args=self._arguments) - dlio_logger.get_instance().log_event(name=kernal_name, cat=self._cat, start_time=t0, duration=t1 - t0, - int_args=self._arguments) - else: - dlio_logger.get_instance().log_event(name=name, cat=self._cat, start_time=start, duration=end - start) - dlio_logger.get_instance().log_event(name=kernal_name, cat=self._cat, start_time=t0, duration=t1 - t0) - self._arguments[iter_name] += 1 - start = dlio_logger.get_instance().get_time() + if DLIO_PROFILER_ENABLED: + t1 = dlio_logger.get_instance().get_time() + + if len(self._arguments) > 0: + dlio_logger.get_instance().log_event(name=name, cat=self._cat, start_time=start, + duration=end - start, + int_args=self._arguments) + dlio_logger.get_instance().log_event(name=kernal_name, cat=self._cat, start_time=t0, + duration=t1 - t0, + int_args=self._arguments) + else: + dlio_logger.get_instance().log_event(name=name, cat=self._cat, start_time=start, + duration=end - start) + dlio_logger.get_instance().log_event(name=kernal_name, cat=self._cat, start_time=t0, + duration=t1 - t0) + self._arguments[iter_name] += 1 + start = dlio_logger.get_instance().get_time() def log_init(self, init): - arg_names = inspect.getfullargspec(init)[0] + if DLIO_PROFILER_ENABLED: + arg_names = inspect.getfullargspec(init)[0] @wraps(init) def new_init(args, *kwargs): - for name, value in zip(arg_names[1:], kwargs): - setattr(args, name, value) - if name == "epoch": - self._arguments["epoch"] = value - start = dlio_logger.get_instance().get_time() + if DLIO_PROFILER_ENABLED: + for name, value in zip(arg_names[1:], kwargs): + setattr(args, name, value) + if name == "epoch": + self._arguments["epoch"] = value + start = dlio_logger.get_instance().get_time() init(args, *kwargs) - end = dlio_logger.get_instance().get_time() - - if len(self._arguments) > 0: - dlio_logger.get_instance().log_event(name=init.__qualname__, cat=self._cat, start_time=start, - duration=end - start, - int_args=self._arguments) - else: - dlio_logger.get_instance().log_event(name=init.__qualname__, cat=self._cat, start_time=start, - duration=end - start) + if DLIO_PROFILER_ENABLED: + end = dlio_logger.get_instance().get_time() + + if len(self._arguments) > 0: + dlio_logger.get_instance().log_event(name=init.__qualname__, cat=self._cat, start_time=start, + duration=end - start, + int_args=self._arguments) + else: + dlio_logger.get_instance().log_event(name=init.__qualname__, cat=self._cat, start_time=start, + duration=end - start) return new_init diff --git a/external/brahma b/external/brahma index a37419fe..9b4bd34b 160000 --- a/external/brahma +++ b/external/brahma @@ -1 +1 @@ -Subproject commit a37419fed6a991d50d2fabf7c287ec519ce392eb +Subproject commit 9b4bd34ba3b10b7c525f92136be8dc5a0262ee50 diff --git a/external/pybind11 b/external/pybind11 index be97c5a9..8b03ffa7 160000 --- a/external/pybind11 +++ b/external/pybind11 @@ -1 +1 @@ -Subproject commit be97c5a98b4b252c524566f508b5c79410d118c6 +Subproject commit 8b03ffa7c06cd9c8a38297b1c8923695d1ff1b07 diff --git a/src/dlio_profiler/brahma/posix.cpp b/src/dlio_profiler/brahma/posix.cpp index ef2bc2cc..8da36fd0 100644 --- a/src/dlio_profiler/brahma/posix.cpp +++ b/src/dlio_profiler/brahma/posix.cpp @@ -347,6 +347,8 @@ int brahma::POSIXDLIOProfiler::unlink(const char *pathname) { int brahma::POSIXDLIOProfiler::symlink(const char *path1, const char *path2) { BRAHMA_MAP_OR_FAIL(symlink); DLIO_LOGGER_START(path1); + DLIO_LOGGER_UPDATE(path1); + DLIO_LOGGER_UPDATE(path2); int ret = __real_symlink(path1, path2); DLIO_LOGGER_END(); return ret; @@ -355,6 +357,9 @@ int brahma::POSIXDLIOProfiler::symlink(const char *path1, const char *path2) { int brahma::POSIXDLIOProfiler::symlinkat(const char *path1, int fd, const char *path2) { BRAHMA_MAP_OR_FAIL(symlinkat); DLIO_LOGGER_START(path1); + DLIO_LOGGER_UPDATE(path1); + DLIO_LOGGER_UPDATE(fd); + DLIO_LOGGER_UPDATE(path2); int ret = __real_symlinkat(path1, fd, path2); DLIO_LOGGER_END(); return ret; @@ -363,6 +368,8 @@ int brahma::POSIXDLIOProfiler::symlinkat(const char *path1, int fd, const char * ssize_t brahma::POSIXDLIOProfiler::readlink(const char *path, char *buf, size_t bufsize) { BRAHMA_MAP_OR_FAIL(readlink); DLIO_LOGGER_START(path); + DLIO_LOGGER_UPDATE(path); + DLIO_LOGGER_UPDATE(bufsize); ssize_t ret = __real_readlink(path, buf, bufsize); DLIO_LOGGER_END(); return ret; @@ -371,6 +378,9 @@ ssize_t brahma::POSIXDLIOProfiler::readlink(const char *path, char *buf, size_t ssize_t brahma::POSIXDLIOProfiler::readlinkat(int fd, const char *path, char *buf, size_t bufsize) { BRAHMA_MAP_OR_FAIL(readlinkat); DLIO_LOGGER_START(fd); + DLIO_LOGGER_UPDATE(fd); + DLIO_LOGGER_UPDATE(path); + DLIO_LOGGER_UPDATE(bufsize); ssize_t ret = __real_readlinkat(fd, path, buf, bufsize); DLIO_LOGGER_END(); return ret; @@ -379,6 +389,8 @@ ssize_t brahma::POSIXDLIOProfiler::readlinkat(int fd, const char *path, char *bu int brahma::POSIXDLIOProfiler::rename(const char *oldpath, const char *newpath) { BRAHMA_MAP_OR_FAIL(rename); DLIO_LOGGER_START(oldpath); + DLIO_LOGGER_UPDATE(oldpath); + DLIO_LOGGER_UPDATE(newpath); int ret = __real_rename(oldpath, newpath); DLIO_LOGGER_END(); return ret; @@ -387,6 +399,8 @@ int brahma::POSIXDLIOProfiler::rename(const char *oldpath, const char *newpath) int brahma::POSIXDLIOProfiler::chmod(const char *path, mode_t mode) { BRAHMA_MAP_OR_FAIL(chmod); DLIO_LOGGER_START(path); + DLIO_LOGGER_UPDATE(path); + DLIO_LOGGER_UPDATE(mode); int ret = __real_chmod(path, mode); DLIO_LOGGER_END(); return ret; @@ -395,6 +409,9 @@ int brahma::POSIXDLIOProfiler::chmod(const char *path, mode_t mode) { int brahma::POSIXDLIOProfiler::chown(const char *path, uid_t owner, gid_t group) { BRAHMA_MAP_OR_FAIL(chown); DLIO_LOGGER_START(path); + DLIO_LOGGER_UPDATE(path); + DLIO_LOGGER_UPDATE(owner); + DLIO_LOGGER_UPDATE(group); int ret = __real_chown(path, owner, group); DLIO_LOGGER_END(); return ret; @@ -403,6 +420,9 @@ int brahma::POSIXDLIOProfiler::chown(const char *path, uid_t owner, gid_t group) int brahma::POSIXDLIOProfiler::lchown(const char *path, uid_t owner, gid_t group) { BRAHMA_MAP_OR_FAIL(lchown); DLIO_LOGGER_START(path); + DLIO_LOGGER_UPDATE(path); + DLIO_LOGGER_UPDATE(owner); + DLIO_LOGGER_UPDATE(group); int ret = __real_lchown(path, owner, group); DLIO_LOGGER_END(); return ret; @@ -419,17 +439,52 @@ int brahma::POSIXDLIOProfiler::utime(const char *filename, const utimbuf *buf) { DIR* brahma::POSIXDLIOProfiler::opendir(const char *name) { BRAHMA_MAP_OR_FAIL(opendir); DLIO_LOGGER_START(name); + DLIO_LOGGER_UPDATE(name); DIR* ret = __real_opendir(name); DLIO_LOGGER_END(); return ret; } -int brahma::POSIXDLIOProfiler::fcntl(int fd, int cmd, long arg) { +int brahma::POSIXDLIOProfiler::fcntl(int fd, int cmd, ...) { BRAHMA_MAP_OR_FAIL(fcntl); - DLIO_LOGGER_START(fd); - int ret = __real_fcntl(fd, cmd, arg); - DLIO_LOGGER_END(); - return ret; + if(cmd==F_DUPFD || cmd==F_DUPFD_CLOEXEC || cmd==F_SETFD || cmd==F_SETFL || cmd==F_SETOWN) { // arg: int + va_list arg; + va_start(arg, cmd); + int val = va_arg(arg, int); + va_end(arg); + DLIO_LOGGER_START(fd); + DLIO_LOGGER_UPDATE(fd); + DLIO_LOGGER_UPDATE(cmd); + int ret = __real_fcntl(fd, cmd, val); + DLIO_LOGGER_END(); + return ret; + } else if(cmd==F_GETFD || cmd==F_GETFL || cmd==F_GETOWN) { + DLIO_LOGGER_START(fd); + DLIO_LOGGER_UPDATE(fd); + DLIO_LOGGER_UPDATE(cmd); + int ret = __real_fcntl(fd, cmd); + DLIO_LOGGER_END(); + return ret; + } else if(cmd==F_SETLK || cmd==F_SETLKW || cmd==F_GETLK) { + va_list arg; + va_start(arg, cmd); + struct flock *lk = va_arg(arg, struct flock*); + va_end(arg); + DLIO_LOGGER_START(fd); + DLIO_LOGGER_UPDATE(fd); + DLIO_LOGGER_UPDATE(cmd); + int ret = __real_fcntl(fd, cmd, lk); + DLIO_LOGGER_END(); + return ret; + } else { // assume arg: void, cmd==F_GETOWN_EX || cmd==F_SETOWN_EX ||cmd==F_GETSIG || cmd==F_SETSIG) + DLIO_LOGGER_START(fd); + DLIO_LOGGER_UPDATE(fd); + DLIO_LOGGER_UPDATE(cmd); + int ret = __real_fcntl(fd, cmd); + DLIO_LOGGER_END(); + return ret; + } + } int brahma::POSIXDLIOProfiler::dup(int oldfd) { @@ -451,6 +506,8 @@ int brahma::POSIXDLIOProfiler::dup2(int oldfd, int newfd) { int brahma::POSIXDLIOProfiler::mkfifo(const char *pathname, mode_t mode) { BRAHMA_MAP_OR_FAIL(mkfifo); DLIO_LOGGER_START(pathname); + DLIO_LOGGER_UPDATE(pathname); + DLIO_LOGGER_UPDATE(mode); int ret = __real_mkfifo(pathname, mode); DLIO_LOGGER_END(); return ret; @@ -467,6 +524,7 @@ mode_t brahma::POSIXDLIOProfiler::umask(mode_t mask) { int brahma::POSIXDLIOProfiler::access(const char *path, int amode) { BRAHMA_MAP_OR_FAIL(access); DLIO_LOGGER_START(path); + DLIO_LOGGER_UPDATE(path); int ret = __real_access(path, amode); DLIO_LOGGER_END(); return ret; @@ -475,6 +533,8 @@ int brahma::POSIXDLIOProfiler::access(const char *path, int amode) { int brahma::POSIXDLIOProfiler::faccessat(int fd, const char *path, int amode, int flag) { BRAHMA_MAP_OR_FAIL(faccessat); DLIO_LOGGER_START(fd); + DLIO_LOGGER_UPDATE(path); + DLIO_LOGGER_UPDATE(fd); int ret = __real_faccessat(fd, path, amode, flag); DLIO_LOGGER_END(); return ret; @@ -483,6 +543,7 @@ int brahma::POSIXDLIOProfiler::faccessat(int fd, const char *path, int amode, in int brahma::POSIXDLIOProfiler::remove(const char *pathname) { BRAHMA_MAP_OR_FAIL(remove); DLIO_LOGGER_START(pathname); + DLIO_LOGGER_UPDATE(pathname); int ret = __real_remove(pathname); DLIO_LOGGER_END(); return ret; @@ -491,6 +552,8 @@ int brahma::POSIXDLIOProfiler::remove(const char *pathname) { int brahma::POSIXDLIOProfiler::truncate(const char *pathname, off_t length) { BRAHMA_MAP_OR_FAIL(truncate); DLIO_LOGGER_START(pathname); + DLIO_LOGGER_UPDATE(pathname); + DLIO_LOGGER_UPDATE(length); int ret = __real_truncate(pathname, length); DLIO_LOGGER_END(); return ret; @@ -499,6 +562,8 @@ int brahma::POSIXDLIOProfiler::truncate(const char *pathname, off_t length) { int brahma::POSIXDLIOProfiler::ftruncate(int fd, off_t length) { BRAHMA_MAP_OR_FAIL(ftruncate); DLIO_LOGGER_START(fd); + DLIO_LOGGER_UPDATE(fd); + DLIO_LOGGER_UPDATE(length); int ret = __real_ftruncate(fd, length); DLIO_LOGGER_END(); return ret; diff --git a/src/dlio_profiler/brahma/posix.h b/src/dlio_profiler/brahma/posix.h index 8924761d..f5fa0f76 100644 --- a/src/dlio_profiler/brahma/posix.h +++ b/src/dlio_profiler/brahma/posix.h @@ -21,6 +21,7 @@ class POSIXDLIOProfiler : public POSIX { static std::shared_ptr instance; std::unordered_set tracked_fd; std::vector track_filename; + std::vector ignore_filename; std::shared_ptr logger; inline std::string get_filename(int fd) { char proclnk[PATH_MAX]; @@ -30,24 +31,40 @@ class POSIXDLIOProfiler : public POSIX { filename[r] = '\0'; return filename; } - inline bool is_traced(int fd) { + inline std::pair is_traced(int fd, const char* func) { + if (fd == -1) return std::pair(false, ""); auto iter = tracked_fd.find(fd); - if (iter != tracked_fd.end()) return true; - return is_traced(get_filename(fd).c_str()); + if (iter != tracked_fd.end()) return std::pair(true, ""); + return is_traced(get_filename(fd).c_str(), func); } - inline bool is_traced(const char* filename) { + inline std::pair is_traced(const char* filename, const char* func) { bool found = false; + bool ignore = false; char resolved_path[PATH_MAX]; realpath(filename, resolved_path); - for (const auto file : track_filename) { + if (ignore_files(resolved_path) || ignore_files(filename)) { + DLIO_PROFILER_LOGINFO("Profiler ignoring logfile %s", resolved_path); + return std::pair(false, filename); + } + for (const auto file : ignore_filename) { if (strstr(resolved_path, file.c_str()) != NULL) { - DLIO_PROFILER_LOGINFO("Profiler Intercepted POSIX tracing %s %s", resolved_path, filename); - found = true; + DLIO_PROFILER_LOGINFO("Profiler Intercepted POSIX not tracing %s %s %s", resolved_path, filename, func); + ignore = true; + break; } } - DLIO_PROFILER_LOGINFO("Profiler Intercepted POSIX not tracing %s %s", resolved_path, filename); - return found; + if (!ignore) { + for (const auto file : track_filename) { + if (strstr(resolved_path, file.c_str()) != NULL) { + DLIO_PROFILER_LOGINFO("Profiler Intercepted POSIX tracing %s %s %s", resolved_path, filename, func); + found = true; + break; + } + } + } + DLIO_PROFILER_LOGINFO("Profiler Intercepted POSIX not tracing %s %s %s", resolved_path, filename, func); + return std::pair(found, filename); } inline void trace(int fd) { tracked_fd.insert(fd); @@ -65,6 +82,11 @@ class POSIXDLIOProfiler : public POSIX { realpath(filename, resolved_path); track_filename.push_back(resolved_path); } + inline void untrace(const char* filename) { + char resolved_path[PATH_MAX]; + realpath(filename, resolved_path); + ignore_filename.push_back(resolved_path); + } ~POSIXDLIOProfiler() override = default; static std::shared_ptr get_instance() { if (instance == nullptr) { @@ -138,7 +160,7 @@ class POSIXDLIOProfiler : public POSIX { DIR *opendir(const char *name) override; - int fcntl(int fd, int cmd, long arg) override; + int fcntl(int fd, int cmd, ...) override; int dup(int oldfd) override; diff --git a/src/dlio_profiler/brahma/stdio.h b/src/dlio_profiler/brahma/stdio.h index 0acfda0d..7bb2bac8 100644 --- a/src/dlio_profiler/brahma/stdio.h +++ b/src/dlio_profiler/brahma/stdio.h @@ -19,6 +19,7 @@ class STDIODLIOProfiler : public STDIO { static std::shared_ptr instance; std::unordered_set tracked_fh; std::vector track_filename; + std::vector ignore_filename; std::shared_ptr logger; inline std::string get_filename(int fd) { char proclnk[PATH_MAX]; @@ -28,23 +29,39 @@ class STDIODLIOProfiler : public STDIO { filename[r] = '\0'; return filename; } - inline bool is_traced(FILE* fh) { + inline std::pair is_traced(FILE* fh, const char* func) { + if (fh == NULL) return std::pair(false, ""); auto iter = tracked_fh.find(fh); - if (iter != tracked_fh.end()) return true; - return is_traced(get_filename(fileno(fh)).c_str()); + if (iter != tracked_fh.end()) return std::pair(true, ""); + return is_traced(get_filename(fileno(fh)).c_str(), func); } - inline bool is_traced(const char* filename) { + inline std::pair is_traced(const char* filename, const char* func) { bool found = false; + bool ignore = false; char resolved_path[PATH_MAX]; realpath(filename, resolved_path); - for (const auto file : track_filename) { + if (ignore_files(resolved_path)) { + DLIO_PROFILER_LOGINFO("Profiler ignoring logfile %s", resolved_path); + return std::pair(false, filename); + } + for (const auto file : ignore_filename) { if (strstr(resolved_path, file.c_str()) != NULL) { - DLIO_PROFILER_LOGINFO("Profiler Intercepted STDIO tracing %s %s", resolved_path, filename); - found = true; + DLIO_PROFILER_LOGINFO("Profiler Intercepted POSIX not tracing %s %s %s", resolved_path, filename, func); + ignore = true; + break; + } + } + if (!ignore) { + for (const auto file : track_filename) { + if (strstr(resolved_path, file.c_str()) != NULL) { + DLIO_PROFILER_LOGINFO("Profiler Intercepted STDIO tracing %s %s %s", resolved_path, filename, func); + found = true; + break; + } } } - DLIO_PROFILER_LOGINFO("Profiler Intercepted STDIO not tracing %s %s", resolved_path, filename); - return found; + DLIO_PROFILER_LOGINFO("Profiler Intercepted STDIO not tracing %s %s %s", resolved_path, filename, func); + return std::pair(found, filename); } inline void trace(FILE* fh) { tracked_fh.insert(fh); @@ -63,6 +80,11 @@ class STDIODLIOProfiler : public STDIO { realpath(filename, resolved_path); track_filename.push_back(resolved_path); } + inline void untrace(const char* filename) { + char resolved_path[PATH_MAX]; + realpath(filename, resolved_path); + ignore_filename.push_back(resolved_path); + } ~STDIODLIOProfiler() = default; static std::shared_ptr get_instance() { diff --git a/src/dlio_profiler/core/common.h b/src/dlio_profiler/core/common.h index 51d1a4bd..9b81a3d5 100644 --- a/src/dlio_profiler/core/common.h +++ b/src/dlio_profiler/core/common.h @@ -6,6 +6,8 @@ #define DLIO_PROFILER_COMMON_H #include #include +#include +const std::string ignore_filenames[4] = {".pfw", "/pipe", "/socket","/proc/self"}; inline std::vector split(std::string str, char delimiter) { std::vector res; if (str.find(delimiter) == std::string::npos) { @@ -20,5 +22,13 @@ inline std::vector split(std::string str, char delimiter) { } return res; } +inline bool ignore_files(const char* filename) { + for(auto &file: ignore_filenames) { + if (strstr(filename, file.c_str()) != NULL) { + return true; + } + } + return false; +} typedef double TimeResolution; #endif //DLIO_PROFILER_COMMON_H diff --git a/src/dlio_profiler/dlio_logger.h b/src/dlio_profiler/dlio_logger.h index 436cb9ff..d4be019e 100644 --- a/src/dlio_profiler/dlio_logger.h +++ b/src/dlio_profiler/dlio_logger.h @@ -29,30 +29,28 @@ class DLIOLogger { if (dlio_profiler_error != nullptr && strcmp(dlio_profiler_error, "1") == 0) { throw_error = true; } - if (init_log) { - this->is_init=true; - FILE *fp = NULL; - std::string log_file; - if (log_file.empty()) { - char *dlio_profiler_log_dir = getenv("DLIO_PROFILER_LOG_DIR"); - if (dlio_profiler_log_dir == nullptr) { + this->is_init=true; + FILE *fp = NULL; + std::string log_file; + if (log_file.empty()) { + char *dlio_profiler_log_dir = getenv("DLIO_PROFILER_LOG_DIR"); + if (dlio_profiler_log_dir == nullptr) { + fp = stderr; + log_file = "STDERR"; + } else { + if (strcmp(dlio_profiler_log_dir, "STDERR") == 0) { fp = stderr; log_file = "STDERR"; + } else if (strcmp(dlio_profiler_log_dir, "STDOUT") == 0) { + fp = stdout; + log_file = "STDOUT"; } else { - if (strcmp(dlio_profiler_log_dir, "STDERR") == 0) { - fp = stderr; - log_file = "STDERR"; - } else if (strcmp(dlio_profiler_log_dir, "STDOUT") == 0) { - fp = stdout; - log_file = "STDOUT"; - } else { - int pid = getpid(); - log_file = std::string(dlio_profiler_log_dir) + "/" + "trace_ll_" + std::to_string(pid) + ".pfw"; - } + int pid = getpid(); + log_file = std::string(dlio_profiler_log_dir) + "/" + "trace_ll_" + std::to_string(pid) + ".pfw"; } } - update_log_file(log_file); } + update_log_file(log_file); } inline TimeResolution get_current_time(){ return std::chrono::duration(std::chrono::high_resolution_clock::now().time_since_epoch()).count(); @@ -85,12 +83,17 @@ class DLIOLogger { dlio_profiler::Singleton::get_instance() #define DLIO_LOGGER_FINI() \ dlio_profiler::Singleton::get_instance()->finalize() +#define DLIO_LOGGER_UPDATE(value) if (trace) metadata.insert_or_assign(#value, value); #define DLIO_LOGGER_START(entity) \ - bool trace = is_traced(entity); \ - TimeResolution start_time = 0; \ + auto pair = is_traced(entity, __FUNCTION__); \ + bool trace = pair.first; \ + TimeResolution start_time = 0; \ auto metadata = std::unordered_map(); \ - if (trace) start_time = this->logger->get_time(); -#define DLIO_LOGGER_UPDATE(value) if (trace) metadata.insert_or_assign(#value, value); + if (trace) { \ + auto filename = pair.second; \ + DLIO_LOGGER_UPDATE(filename); \ + start_time = this->logger->get_time(); \ + } #define DLIO_LOGGER_END() \ if (trace) { \ TimeResolution end_time = this->logger->get_time(); \ diff --git a/src/dlio_profiler/dlio_profiler_py.cpp b/src/dlio_profiler/dlio_profiler_py.cpp index b106aad9..e0f1ba88 100644 --- a/src/dlio_profiler/dlio_profiler_py.cpp +++ b/src/dlio_profiler/dlio_profiler_py.cpp @@ -7,10 +7,33 @@ #include #include #include + + +#include +#include +#include +#include +#include + namespace py = pybind11; namespace dlio_profiler { + + + void handler(int sig) { + void *array[10]; + size_t size; + + // get void*'s for all entries on the stack + size = backtrace(array, 10); + + // print out all the frames to stderr + fprintf(stderr, "Error: signal %d:\n", sig); + backtrace_symbols_fd(array, size, STDERR_FILENO); + exit(1); + } void initialize(std::string &log_file, std::string &data_dirs, int process_id) { - if (process_id == 0) DLIO_PROFILER_LOGPRINT("log_file %s data_dirs %s and process %d\n", log_file.c_str(), data_dirs.c_str(), process_id); + signal(SIGSEGV, handler); + if (process_id <= 0) DLIO_PROFILER_LOGPRINT("log_file %s data_dirs %s and process %d\n", log_file.c_str(), data_dirs.c_str(), process_id); dlio_profiler::Singleton::get_instance()->update_log_file(log_file, process_id); char *dlio_profiler_priority_str = getenv("DLIO_PROFILER_GOTCHA_PRIORITY"); int dlio_profiler_priority = 1; @@ -21,6 +44,8 @@ namespace dlio_profiler { auto posix_instance = brahma::POSIXDLIOProfiler::get_instance(); auto stdio_instance = brahma::STDIODLIOProfiler::get_instance(); auto paths = split(data_dirs, ':'); + posix_instance->untrace(log_file.c_str()); + stdio_instance->untrace(log_file.c_str()); for (const auto &path:paths) { DLIO_PROFILER_LOGINFO("Profiler will trace %s\n", path.c_str()); posix_instance->trace(path.c_str()); diff --git a/src/dlio_profiler/writer/chrome_writer.cpp b/src/dlio_profiler/writer/chrome_writer.cpp index 836a66b8..9416ceae 100644 --- a/src/dlio_profiler/writer/chrome_writer.cpp +++ b/src/dlio_profiler/writer/chrome_writer.cpp @@ -24,17 +24,14 @@ dlio_profiler::ChromeWriter::log(std::string &event_name, std::string &category, std::unordered_map &metadata, int process_id) { if (is_first_write) { if (this->fp == NULL) { - file_mtx.lock(); fp = fopen(filename.c_str(), "w+"); - file_mtx.unlock(); } if (fp == nullptr) { ERROR(fp == nullptr,"unable to create log file %s", filename.c_str()); } else { std::string data = "[\n"; - file_mtx.lock(); auto written_elements = fwrite(data.c_str(), data.size(), sizeof(char), fp); - file_mtx.unlock(); + fflush(fp); if (written_elements != 1) { ERROR(written_elements != 1, "unable to initialize log file %s", filename.c_str()); } @@ -43,10 +40,8 @@ dlio_profiler::ChromeWriter::log(std::string &event_name, std::string &category, } if (fp != nullptr) { std::string json = convert_json(event_name, category, start_time, duration, metadata, process_id); - file_mtx.lock(); auto written_elements = fwrite(json.c_str(), json.size(), sizeof(char), fp); fflush(fp); - file_mtx.unlock(); if (written_elements != 1) { ERROR(written_elements != 1, "unable to write to log file %s", filename.c_str()); } diff --git a/src/dlio_profiler/writer/chrome_writer.h b/src/dlio_profiler/writer/chrome_writer.h index a16d9a61..a03c3f47 100644 --- a/src/dlio_profiler/writer/chrome_writer.h +++ b/src/dlio_profiler/writer/chrome_writer.h @@ -19,7 +19,8 @@ namespace dlio_profiler { std::string convert_json(std::string &event_name, std::string &category, TimeResolution start_time, TimeResolution duration, std::unordered_map &metadata, int process_id); bool is_first_write; - std::mutex file_mtx; + int process_id; + std::unordered_map mtx_map; std::vector core_affinity() { auto cores = std::vector(); hwloc_cpuset_t set = hwloc_bitmap_alloc(); @@ -37,7 +38,8 @@ namespace dlio_profiler { return std::string(hostname); } public: - ChromeWriter(FILE* fp=NULL):BaseWriter(), is_first_write(true){ + ChromeWriter(FILE* fp=NULL):BaseWriter(), is_first_write(true), mtx_map(){ + process_id = getpid(); this->fp = fp; hwloc_topology_init(&topology); // initialization hwloc_topology_load(topology); // actual detection