From 452e4660482d9563606736e9f2aa81d6398377ad Mon Sep 17 00:00:00 2001 From: Ray Andrew Date: Fri, 4 Oct 2024 07:26:36 +0000 Subject: [PATCH] add `dftracer_event_count` - use sqlite from python to calculate valid events inside traces - reuse `dftracer_event_count` for sanity checking inside `dftracer_split` --- CMakeLists.txt | 30 +++++++------- docs/utilities.rst | 25 +++++++++--- script/dftracer_event_count | 80 +++++++++++++++++++++++++++++++++++++ script/dftracer_split | 36 ++--------------- setup.py | 3 +- 5 files changed, 118 insertions(+), 56 deletions(-) create mode 100755 script/dftracer_event_count diff --git a/CMakeLists.txt b/CMakeLists.txt index 550fe81..7487491 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,6 @@ set(DFTRACER_VERSION "(1, 0, 5)") project(dftracer LANGUAGES C CXX) - # Convenience defines string(TOUPPER "${PROJECT_NAME}" UPPER_PROJECT_NAME) string(TOLOWER "${PROJECT_NAME}" LOWER_PROJECT_NAME) @@ -85,7 +84,6 @@ if (NOT DFTRACER_EXPORTED_TARGETS) set(DFTRACER_EXPORTED_TARGETS "dftracer-targets") endif () - include(dftracer-utils) #------------------------------------------------------------------------------ # Build options @@ -108,13 +106,13 @@ endif (DFTRACER_USE_CLANG_LIBCXX) # Options option (DFTRACER_DISABLE_HWLOC "Disable HWLOC" On) -if (NOT DFTRACER_DISABLE_HWLOC) +if (NOT DFTRACER_DISABLE_HWLOC) set(DFTRACER_HWLOC_ENABLE 1) endif() # Options option (DFTRACER_ENABLE_FTRACING "Enable Function Tracing" OFF) -if (DFTRACER_ENABLE_FTRACING) +if (DFTRACER_ENABLE_FTRACING) set(DFTRACER_FTRACING_ENABLE 1) set(DFTRACER_FUNCTION_FLAGS "-g" "-finstrument-functions" "-Wl,-E" "-fvisibility=default") else() @@ -122,14 +120,13 @@ else() endif() option (DFTRACER_ENABLE_MPI "Enable MPI" OFF) -if (DFTRACER_ENABLE_MPI) +if (DFTRACER_ENABLE_MPI) set(DFTRACER_MPI_ENABLE 1) endif() option (DFTRACER_BUILD_PYTHON_BINDINGS "Build python bindings." ON) set(DFTRACER_PYTHON_EXE "python3" CACHE STRING "Python executable to use for building.") set(DFTRACER_PYTHON_SITE $ENV{DFTRACER_PYTHON_SITE} CACHE STRING "Python site packages to use for building.") - if (DFTRACER_BUILD_PYTHON_BINDINGS) if (DFTRACER_PYTHON_SITE) set(PYTHON_SITE_PACKAGES "${DFTRACER_PYTHON_SITE}") @@ -192,7 +189,7 @@ endif () # Dependencies #------------------------------------------------------------------------------ -if (DFTRACER_INSTALL_DEPENDENCIES) +if (DFTRACER_INSTALL_DEPENDENCIES) message(STATUS "[${PROJECT_NAME}] downloading dependencies. Please run make for downloading depedencies and then do reconfigure without dependency flag.") message(STATUS "[${PROJECT_NAME}] Installing depedencies at ${CMAKE_INSTALL_PREFIX}") add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/dependency) @@ -233,7 +230,7 @@ if (DFTRACER_BUILD_PYTHON_BINDINGS) #add_subdirectory(dependency/py11bind) endif() -if (DFTRACER_ENABLE_MPI) +if (DFTRACER_ENABLE_MPI) find_package(MPI COMPONENTS CXX REQUIRED) if (MPI_FOUND) message(STATUS "[DFTRACER] found mpi.h at ${MPI_CXX_INCLUDE_DIRS}") @@ -255,9 +252,6 @@ if (NOT DFTRACER_DISABLE_HWLOC) endif () endif() - - - include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) set(DFTRACER_PRELOAD_SRC ${CMAKE_CURRENT_SOURCE_DIR}/src/dftracer/dftracer_preload.cpp) @@ -416,7 +410,6 @@ if (DFTRACER_BUILD_PYTHON_BINDINGS) . ${CMAKE_BINARY_DIR}/symlink.sh \")") - install(TARGETS py${PROJECT_NAME}_dbg LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) @@ -475,6 +468,14 @@ install( bin ) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/script/dftracer_event_count ${EXECUTABLE_OUTPUT_PATH}/dftracer_event_count COPYONLY) +install( + FILES + ${EXECUTABLE_OUTPUT_PATH}/dftracer_event_count + DESTINATION + bin +) + #cmake_policy(SET CMP0079 NEW) # In case that we need more control over the target building order if(DFTRACER_ENABLE_TESTS) @@ -500,7 +501,7 @@ include(CMakePackageConfigHelpers) configure_package_config_file( ${CMAKE_CURRENT_SOURCE_DIR}/cmake/configure_files/${PROJECT_NAME}-config.cmake.build.in "${CMAKE_BINARY_DIR}/${PROJECT_NAME}-config.cmake" - INSTALL_DESTINATION ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/cmake/${PROJECT_NAME}/${PROJECT_NAME}-config.cmake + INSTALL_DESTINATION ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/cmake/${PROJECT_NAME}/${PROJECT_NAME}-config.cmake PATH_VARS CMAKE_BINARY_DIR ) @@ -536,7 +537,6 @@ export(EXPORT ${DFTRACER_EXPORTED_TARGETS} configure_file("${CMAKE_SOURCE_DIR}/cmake/configure_files/dftracer_config.hpp.in" "${CMAKE_INCLUDE_OUTPUT_DIRECTORY}/dftracer/dftracer_config.hpp" @ONLY) - ################################################################ # Install DFTRACER ################################################################ @@ -569,7 +569,6 @@ install(FILES DESTINATION ${DFTRACER_INSTALL_DOCDIR}) - ############################################################################### # Print out configuration summary ############################################################################### @@ -644,7 +643,6 @@ string(APPEND _str execute_process(COMMAND ${CMAKE_COMMAND} -E echo "${_str}") set(_str) - ############################################################################### # Write a basic modulefile ############################################################################### diff --git a/docs/utilities.rst b/docs/utilities.rst index fb129da..6539e55 100644 --- a/docs/utilities.rst +++ b/docs/utilities.rst @@ -119,9 +119,22 @@ Arguments for this script are: 1. **-n app_name** specify app name. 2. **-f** override indices. -3. **-c** compress input file -4. **-s size** chunk size (in MB) -5. **-v** enable verbose mode -6. **-h** display help -7. **-d input_directory** specify input directories. should contain .pfw or .pfw.gz files. -8. **-o output_directory** specify output directory. +3. **-s size** chunk size (in MB) +4. **-v** enable verbose mode +5. **-h** display help +6. **-d input_directory** specify input directories. should contain .pfw or .pfw.gz files. +7. **-o output_directory** specify output directory. + +------------------ +Counting DFTracer traces' events +------------------ + +The script will count number of valid events of traces + +.. code-block:: bash + /bin/usage: dftracer_event_count [-c] [-d input_directory] + +Arguments for this script are: +1. **-d input_directory** specify input directories. should contain .pfw or .pfw.gz files. +2. **-c** disable create index (assuming index exists, if not will throw error) +3. **-h** display help diff --git a/script/dftracer_event_count b/script/dftracer_event_count new file mode 100755 index 0000000..580089c --- /dev/null +++ b/script/dftracer_event_count @@ -0,0 +1,80 @@ +#!/bin/bash + +# The script will count number of valid events of traces +# This has the following signature. +# +# usage: dftracer_split [-c] [-d input_directory] +# -d input_directory specify input directories. should contain .pfw or .pfw.gz files. +# -f override generated files +# -c disable create index (assuming index exists, if not will throw error) +# -h display help + +LOG_DIR=$PWD +run_create_index=1 + +function usage { + echo "usage: $(basename $0) [-c] [-d input_directory]" + echo " -h display help" + echo " -c disable create index (assuming index exists, if not will throw error)" + echo " -d input_directory specify input directories. should contain .pfw or .pfw.gz files." + exit 1 +} +while getopts ':cd:h' opt; do + case "$opt" in + c) + run_create_index=0 + ;; + d) + LOG_DIR="${OPTARG}" + ;; + h) + usage + exit 0 + ;; + :) + echo -e "option requires an argument.\n" + usage + exit 1 + ;; + + ?) + echo -e "Invalid command option.\n" + usage + exit 1 + ;; + esac +done +shift "$(($OPTIND -1))" + +function get_lines_count { + local dir=$1 + + # if dir empty use current dir + if [ -z "$dir" ]; then + dir=$PWD + fi + + pushd $dir > /dev/null + + lines_count=$(cat <<-EOF | python3 +import zindex_py as zindex +import glob +import sqlite3 + +lines_count = 0 +for file in sorted(glob.glob("*.pfw.gz")): + conn = sqlite3.connect(f"{file}.zindex") + res = conn.execute("select count(line) as a from LineOffsets where length > 8;") + lines_count += res.fetchone()[0] +print(lines_count) +EOF) + popd > /dev/null + + echo $lines_count +} + +if [ "$run_create_index" == "1" ]; then + SCRIPT_DIR="$(dirname "$(readlink -f "$0")")" + $SCRIPT_DIR/dftracer_create_index -c -d $LOG_DIR -f >/dev/null +fi +get_lines_count $LOG_DIR diff --git a/script/dftracer_split b/script/dftracer_split index 603d0d5..099ca77 100755 --- a/script/dftracer_split +++ b/script/dftracer_split @@ -6,7 +6,6 @@ # usage: dftracer_split [-fv] [-n app_name] [-d input_directory] [-o output_directory] [-s chunk_size] # -n app_name specify app name # -f override generated files -# -c compress input # -s size chunk size (in MB) # -v enable verbose mode # -h display help @@ -29,7 +28,6 @@ function usage { echo "usage: $(basename $0) [-fv] [-n app_name] [-d input_directory] [-o output_directory] [-s chunk_size]" echo " -n app_name specify app name" echo " -f override generated files" - echo " -c compress input" echo " -s size chunk size (in MB)" echo " -v enable verbose mode" echo " -h display help" @@ -113,35 +111,7 @@ fi SCRIPT_DIR="$(dirname "$(readlink -f "$0")")" $SCRIPT_DIR/dftracer_create_index -c -d $LOG_DIR -f -function get_lines_count { - local dir=$1 - - # if dir empty use current dir - if [ -z "$dir" ]; then - dir=$PWD - fi - - pushd $dir > /dev/null - - lines_count=$(cat <<-EOF | python3 -import zindex_py as zindex -import glob -import sqlite3 - -lines_count = 0 -for file in sorted(glob.glob("*.pfw.gz")): - conn = sqlite3.connect(f"{file}.zindex") - res = conn.execute("select count(line) as a from LineOffsets where length > 8;") - lines_count += res.fetchone()[0] -print(lines_count) -EOF) - - popd > /dev/null - - echo $lines_count -} - -# LINES_COUNT=$(get_lines_count $LOG_DIR) +# LINES_COUNT=$($SCRIPT_DIR/dftracer_event_count -d $LOG_DIR -c) # echo "Original lines count $LINES_COUNT" pushd $LOG_DIR > /dev/null @@ -258,8 +228,8 @@ rm -f *.pfw.gz $SCRIPT_DIR/dftracer_create_index -c -d $dest -f rm -f *.pfw -LINES_COUNT=$(get_lines_count $LOG_DIR) -SPLIT_LINES_COUNT=$(get_lines_count $dest) +LINES_COUNT=$($SCRIPT_DIR/dftracer_event_count -d $LOG_DIR -c) +SPLIT_LINES_COUNT=$($SCRIPT_DIR/dftracer_event_count -d $dest -c) if [ $LINES_COUNT -ne $SPLIT_LINES_COUNT ]; then date_echo "Error: Original lines count $LINES_COUNT does not match split lines count $SPLIT_LINES_COUNT" diff --git a/setup.py b/setup.py index aad47ec..48b21e6 100644 --- a/setup.py +++ b/setup.py @@ -197,7 +197,8 @@ def build_extension(self, ext: CMakeExtension) -> None: 'script/dftracer_sanitize', 'script/dftracer_anonymize', 'script/dftracer_split', - 'script/dftracer_create_index',], + 'script/dftracer_create_index', + 'script/dftracer_event_count', ], package_dir={"dftracer": "dftracer", "dftracer_dbg": "dftracer_dbg", "dfanalyzer": "dfanalyzer"},