Skip to content

Commit

Permalink
added script for creating index (#213)
Browse files Browse the repository at this point in the history
  • Loading branch information
hariharan-devarajan authored Oct 3, 2024
1 parent e61f7a0 commit 3a4ce79
Show file tree
Hide file tree
Showing 5 changed files with 197 additions and 10 deletions.
8 changes: 8 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,14 @@ install(
bin
)

configure_file(${CMAKE_CURRENT_SOURCE_DIR}/script/dftracer_create_index ${EXECUTABLE_OUTPUT_PATH}/dftracer_create_index COPYONLY)
install(
FILES
${EXECUTABLE_OUTPUT_PATH}/dftracer_create_index
DESTINATION
bin
)

#cmake_policy(SET CMP0079 NEW) # In case that we need more control over the target building order

if(DFTRACER_ENABLE_TESTS)
Expand Down
18 changes: 18 additions & 0 deletions docs/utilities.rst
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,21 @@ Arguments for this script are:
4. **-h** display help
5. **-d input_directory** specify input directories. should contain .pfw or .pfw.gz files.
6. **-o output_directory** specify output directory.

------------------
Create Index script
------------------

The script compresses and creates index for all dftracer traces

.. code-block:: bash
<install-dir>/bin/usage: dftracer_create_index [-fcv] [-d input_directory]
Arguments for this script are:

1. **-f** override indices.
2. **-c** compress input file
3. **-v** enable verbose mode
4. **-h** display help
5. **-d input_directory** specify input directories. should contain .pfw or .pfw.gz files.
34 changes: 25 additions & 9 deletions script/dftracer_anonymize
Original file line number Diff line number Diff line change
Expand Up @@ -76,18 +76,30 @@ echo "Setting up output directory"
rm -rf ${OUTPUT_DIR}
mkdir -p ${OUTPUT_DIR}

pfw_count=`ls -1 $LOG_DIR/*.pfw 2> /dev/null | wc -l`
gz_count=`ls -1 $LOG_DIR/*.gz 2> /dev/null | wc -l`
total=$((pfw_count + gz_count))
total=0
for file in *.pfw*; do total=1; break; done
if [ $total == 0 ]; then
echo "The folder does not contain any pfw or pfw.gz files."
exit 0
fi

count=1

files=("$LOG_DIR"/*.pfw*)
total=${#files[@]}
JOBS_LIMIT=64
# loop over logs
for file in "$LOG_DIR"/*.pfw*; do
for file_index in "${!files[@]}"; do
file=${files[$file_index]}
running_jobs=$(jobs -rp | wc -l)
if [ $running_jobs -ge $JOBS_LIMIT ]; then
date_echo "waiting for Running $running_jobs jobs to be less than $JOBS_LIMIT"
while [ $running_jobs -ge $JOBS_LIMIT ]
do
sleep 1
running_jobs=$(jobs -rp | wc -l)
done
date_echo "Running $running_jobs jobs are now less than $JOBS_LIMIT"
fi
{
# only look at files
if [ -f "$file" ]; then
# calculate basename and copy files
Expand All @@ -97,7 +109,7 @@ for file in "$LOG_DIR"/*.pfw*; do
cp $LOG_DIR/$filename $OUTPUT_DIR/.tmp.$filename
if [ "$ext" == "gz" ]; then
# if file is gz get the name
name=${count%.pfw.gz}
name=${file_index%.pfw.gz}
echo "extracted name $name"
# replace non utf characters
gunzip -c $OUTPUT_DIR/.tmp.$filename | sed -e "s/${USER}/USER/g" > $OUTPUT_DIR/$name.pfw
Expand All @@ -110,7 +122,7 @@ for file in "$LOG_DIR"/*.pfw*; do
fi
else
# if file is pfw get the name
name=${count%.pfw}
name=${file_index%.pfw}
echo "extracted name $name"
# replace non utf characters
cat $OUTPUT_DIR/.tmp.$filename | sed -e "s/${USER}/USER/g" > $OUTPUT_DIR/$name.pfw
Expand All @@ -124,5 +136,9 @@ for file in "$LOG_DIR"/*.pfw*; do
# remove temp file
rm $OUTPUT_DIR/.tmp.$filename
fi
count=$((count+1))
} &
done

wait

date_echo Finished anonymization of traces.
144 changes: 144 additions & 0 deletions script/dftracer_create_index
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#!/bin/bash

# The script creates indices for the dftracer traces
# This has the following signature.
#
# usage: dftracer_create_index [-fcv] [-d input_directory]
# -f override indices
# -c compress input
# -v enable verbose mode
# -h display help
# -d input_directory specify input directories. should contain .pfw or .pfw.gz files.

date_echo() {
dt=$(date '+%d/%m/%Y %H:%M:%S');
echo "$dt $@"
}

LOG_DIR=$PWD
override=0
compressed=0

PPWD=$PWD

function usage {
echo "usage: $(basename $0) [-fcv] [-d input_directory]"
echo " -f override indices"
echo " -c compress input"
echo " -v enable verbose mode"
echo " -h display help"
echo " -d input_directory specify input directories. should contain .pfw or .pfw.gz files."
exit 1
}
while getopts ':cvfd:h' opt; do
case "$opt" in
d)
LOG_DIR="${OPTARG}"
;;
f)
override=1
;;
v)
set -x
;;
c)
compressed=1
;;
h)
usage
exit 0
;;

:)
echo -e "option requires an argument.\n"
usage
exit 1
;;

?)
echo -e "Invalid command option.\n"
usage
exit 1
;;
esac
done
shift "$(($OPTIND -1))"
total=0
for file in *.pfw*; do total=1; break; done

# pfw_count=$(ls -1 $LOG_DIR/*.pfw 2> /dev/null | wc -l)
# gz_count=$(ls -1 $LOG_DIR/*.gz 2> /dev/null | wc -l)
# total=$((pfw_count + gz_count))
if [ $total == 0 ]; then
date_echo "The folder does not contain any pfw or pfw.gz files."
exit 1
fi

python -c "import zindex_py;"
if [[ $? != 0 ]]; then
date_echo "failure: $?: zindex not found. Please install zindex with: pip install zindex_py"
exit 1
fi
zindex_exec=$(python3 -c 'import zindex_py;import site; sp=site.getsitepackages()[0]; print(f"{sp}/zindex_py/bin/zindex")')
if [ ! -f "${zindex_exec}" ]; then
date_echo "failure: $?: zindex not found. Please install zindex with: pip install zindex_py"
exit 1
else
date_echo "Found zindex executable at ${zindex_exec}"
fi

if [ "$override" == "1" ]; then
date_echo "Removing existing indices as override is passed."
rm $LOG_DIR/*.zindex
fi

pushd $LOG_DIR
JOBS_LIMIT=$(nproc --all)
files=("$LOG_DIR"/*.pfw*)
total=${#files[@]}
# loop over logs
for file_index in "${!files[@]}"; do
file=${files[$file_index]}
running_jobs=$(jobs -rp | wc -l)
if [ $running_jobs -ge $JOBS_LIMIT ]; then
date_echo "waiting for Running $running_jobs jobs to be less than $JOBS_LIMIT"
while [ $running_jobs -ge $JOBS_LIMIT ]
do
sleep 1
running_jobs=$(jobs -rp | wc -l)
done
date_echo "Running $running_jobs jobs are now less than $JOBS_LIMIT"
fi
# only look at files
if [ -f "$file" ]; then
# calculate basename and copy files
filename=$(basename -- "$file")
ext="${filename##*.}"
if [ "$ext" == "gz" ]; then
{
# if file is gz get the name
name=${filename%.pfw.gz}
if [ ! -f "$file.zindex" ]; then
$zindex_exec $file --index-file file:$file.zindex --regex 'id:([0-9]+)' --numeric --unique
fi
date_echo "Created index for file $file.gz $file_index of $total"
} &
else
{
# if file is pfw get the name
name=${filename%.pfw}
if [ ! -f "$file.gz.zindex" ]; then
if [ $compressed == 1 ]; then
date_echo "Compressing file $name"
gzip $file
$zindex_exec $file.gz --index-file file:$file.gz.zindex --regex 'id:([0-9]+)' --numeric --unique
fi
fi
date_echo "Created index for file $file.gz $file_index of $total"
} &
fi
fi
done
popd
wait
date_echo Creation of index finished
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,8 @@ def build_extension(self, ext: CMakeExtension) -> None:
scripts=['script/dftracer_compact',
'script/dftracer_merge',
'script/dftracer_sanitize',
'script/dftracer_anonymize'],
'script/dftracer_anonymize',
'script/dftracer_create_index',],
package_dir={"dftracer": "dftracer",
"dftracer_dbg": "dftracer_dbg",
"dfanalyzer": "dfanalyzer"},
Expand Down

0 comments on commit 3a4ce79

Please sign in to comment.