From dd0fd917b7ed28523aeedd4e399e9bd90f35bd6a Mon Sep 17 00:00:00 2001 From: "Gabriele N. Tornetta" Date: Sun, 31 Mar 2024 16:09:15 +0100 Subject: [PATCH 1/2] fix(linux): improved container support We make use of the /proc/root of the procfs to improve the chances that we can infer the required information, such as the location of the runtime structure and the interpreter version from binaries pointed at by the actual process root. This is relevant for processes running inside containers, since their FS root does not coincide with the host root. --- ChangeLog | 2 ++ src/linux/common.h | 23 +++++++++++++++++++++++ src/linux/py_proc.h | 20 ++++++++++---------- src/py_proc.c | 21 +++++++-------------- 4 files changed, 42 insertions(+), 24 deletions(-) diff --git a/ChangeLog b/ChangeLog index 7831a1ac..b84f435f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,7 @@ 2024-xx-xx v3.6.1 + Improve support for Python processes running in containers. + Bugfix: fixed a bug with the MOJO binary format that caused the line end position to wrongly be set to a non-zero value for CPython < 3.11, where line end information is not actually available. diff --git a/src/linux/common.h b/src/linux/common.h index a97cd541..98e888ec 100644 --- a/src/linux/common.h +++ b/src/linux/common.h @@ -26,9 +26,11 @@ #include #include #include +#include #include #include "../error.h" +#include "../hints.h" #include "../stats.h" @@ -109,3 +111,24 @@ _procfs(pid_t pid, char * file) { return fp; } + + +// ---------------------------------------------------------------------------- +static inline char * +proc_root(pid_t pid, char * file) { + if (file[0] != '/') { + log_e("File path is not absolute"); + return NULL; + } + + char * proc_root = calloc(1, strlen(file) + 24); + if (!isvalid(proc_root)) + return NULL; + + if (sprintf(proc_root, "/proc/%d/root%s", pid, file) < 0) { + free(proc_root); + return NULL; + } + + return proc_root; +} diff --git a/src/linux/py_proc.h b/src/linux/py_proc.h index c5ea90cc..29957e31 100644 --- a/src/linux/py_proc.h +++ b/src/linux/py_proc.h @@ -455,16 +455,16 @@ _py_proc__parse_maps_file(py_proc_t * self) { // The first memory map of the executable if (!isvalid(pd->maps[MAP_BIN].path) && strcmp(pd->exe_path, pathname) == 0) { map = &(pd->maps[MAP_BIN]); - map->path = strndup(pathname, strlen(pathname)); + map->path = proc_root(self->pid, pathname); if (!isvalid(map->path)) { - log_ie("Cannot duplicate path name"); + log_e("Cannot get proc root path for %s", pathname); set_error(EPROC); FAIL; } - map->file_size = _file_size(pathname); + map->file_size = _file_size(map->path); map->base = (void *) lower; map->size = upper - lower; - map->has_symbols = success(_py_proc__analyze_elf(self, pathname, (void *) lower)); + map->has_symbols = success(_py_proc__analyze_elf(self, map->path, (void *) lower)); if (map->has_symbols) { map->bss_base = self->map.bss.base; map->bss_size = self->map.bss.size; @@ -479,13 +479,13 @@ _py_proc__parse_maps_file(py_proc_t * self) { int has_symbols = success(_py_proc__analyze_elf(self, pathname, (void *) lower)); if (has_symbols) { map = &(pd->maps[MAP_LIBSYM]); - map->path = strndup(pathname, strlen(pathname)); + map->path = proc_root(self->pid, pathname); if (!isvalid(map->path)) { - log_ie("Cannot duplicate path name"); + log_e("Cannot get proc root path for %s", pathname); set_error(EPROC); FAIL; } - map->file_size = _file_size(pathname); + map->file_size = _file_size(map->path); map->base = (void *) lower; map->size = upper - lower; map->has_symbols = TRUE; @@ -503,13 +503,13 @@ _py_proc__parse_maps_file(py_proc_t * self) { unsigned int v; if (sscanf(needle, "libpython%u.%u", &v, &v) == 2) { map = &(pd->maps[MAP_LIBNEEDLE]); - map->path = needle_path = strndup(pathname, strlen(pathname)); + map->path = needle_path = proc_root(self->pid, pathname); if (!isvalid(map->path)) { - log_ie("Cannot duplicate path name"); + log_e("Cannot get proc root path for %s", pathname); set_error(EPROC); FAIL; } - map->file_size = _file_size(pathname); + map->file_size = _file_size(map->path); map->base = (void *) lower; map->size = upper - lower; map->has_symbols = FALSE; diff --git a/src/py_proc.c b/src/py_proc.c index 33141677..9951dd6d 100644 --- a/src/py_proc.c +++ b/src/py_proc.c @@ -110,17 +110,14 @@ _get_version_from_executable(char * binary, int * major, int * minor, int * patc #endif fp = _popen(cmd, "r"); - if (!isvalid(fp)) { - set_error(EPROC); + if (!isvalid(fp)) FAIL; - } while (fgets(version, sizeof(version) - 1, fp) != NULL) { if (sscanf(version, "Python %d.%d.%d", major, minor, patch) == 3) SUCCESS; } - set_error(EPROC); FAIL; } /* _get_version_from_executable */ @@ -129,13 +126,15 @@ _get_version_from_filename(char * filename, const char * needle, int * major, in #if defined PL_LINUX /* LINUX */ char * base = filename; char * end = base + strlen(base); + size_t needle_len = strlen(needle); while (base < end) { base = strstr(base, needle); if (!isvalid(base)) { break; } - if (sscanf(base + strlen(needle), "%u.%u", major, minor) == 2) { + base += needle_len; + if (sscanf(base, "%u.%u", major, minor) == 2) { SUCCESS; } } @@ -143,23 +142,18 @@ _get_version_from_filename(char * filename, const char * needle, int * major, in #elif defined PL_WIN /* WIN */ // Assume the library path is of the form *.python3[0-9]+[.]dll int n = strlen(filename); - if (n < 10) { - set_error(EPROC); + if (n < 10) FAIL; - } char * p = filename + n - 1; while (*(p--) != 'n' && p > filename); p++; *major = *(p++) - '0'; - if (*major != 3) { - set_error(EPROC); + if (*major != 3) FAIL; - } - if (sscanf(p,"%d.dll", minor) == 1) { + if (sscanf(p,"%d.dll", minor) == 1) SUCCESS; - } #elif defined PL_MACOS /* MAC */ char * ver_needle = strstr(filename, "3."); @@ -169,7 +163,6 @@ _get_version_from_filename(char * filename, const char * needle, int * major, in #endif - set_error(EPROC); FAIL; } /* _get_version_from_filename */ From 0f8e7d7084513b1c5a2e7634d2ab056795637e58 Mon Sep 17 00:00:00 2001 From: "Gabriele N. Tornetta" Date: Mon, 1 Apr 2024 15:22:57 +0100 Subject: [PATCH 2/2] exclude potentially unreachable lines from code coverage --- src/linux/common.h | 10 +++++----- src/linux/py_proc.h | 12 ++++++------ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/linux/common.h b/src/linux/common.h index 98e888ec..8c9ecb54 100644 --- a/src/linux/common.h +++ b/src/linux/common.h @@ -117,17 +117,17 @@ _procfs(pid_t pid, char * file) { static inline char * proc_root(pid_t pid, char * file) { if (file[0] != '/') { - log_e("File path is not absolute"); - return NULL; + log_e("File path is not absolute"); // GCOV_EXCL_START + return NULL; // GCOV_EXCL_STOP } char * proc_root = calloc(1, strlen(file) + 24); if (!isvalid(proc_root)) - return NULL; + return NULL; // GCOV_EXCL_LINE if (sprintf(proc_root, "/proc/%d/root%s", pid, file) < 0) { - free(proc_root); - return NULL; + free(proc_root); // GCOV_EXCL_START + return NULL; // GCOV_EXCL_STOP } return proc_root; diff --git a/src/linux/py_proc.h b/src/linux/py_proc.h index 29957e31..852d8d00 100644 --- a/src/linux/py_proc.h +++ b/src/linux/py_proc.h @@ -457,9 +457,9 @@ _py_proc__parse_maps_file(py_proc_t * self) { map = &(pd->maps[MAP_BIN]); map->path = proc_root(self->pid, pathname); if (!isvalid(map->path)) { - log_e("Cannot get proc root path for %s", pathname); + log_e("Cannot get proc root path for %s", pathname); // GCOV_EXCL_START set_error(EPROC); - FAIL; + FAIL; // GCOV_EXCL_STOP } map->file_size = _file_size(map->path); map->base = (void *) lower; @@ -481,9 +481,9 @@ _py_proc__parse_maps_file(py_proc_t * self) { map = &(pd->maps[MAP_LIBSYM]); map->path = proc_root(self->pid, pathname); if (!isvalid(map->path)) { - log_e("Cannot get proc root path for %s", pathname); + log_e("Cannot get proc root path for %s", pathname); // GCOV_EXCL_START set_error(EPROC); - FAIL; + FAIL; // GCOV_EXCL_STOP } map->file_size = _file_size(map->path); map->base = (void *) lower; @@ -505,9 +505,9 @@ _py_proc__parse_maps_file(py_proc_t * self) { map = &(pd->maps[MAP_LIBNEEDLE]); map->path = needle_path = proc_root(self->pid, pathname); if (!isvalid(map->path)) { - log_e("Cannot get proc root path for %s", pathname); + log_e("Cannot get proc root path for %s", pathname); // GCOV_EXCL_START set_error(EPROC); - FAIL; + FAIL; // GCOV_EXCL_STOP } map->file_size = _file_size(map->path); map->base = (void *) lower;