-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added script for creating index (#213)
- Loading branch information
1 parent
e61f7a0
commit 3a4ce79
Showing
5 changed files
with
197 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
#!/bin/bash | ||
|
||
# The script creates indices for the dftracer traces | ||
# This has the following signature. | ||
# | ||
# usage: dftracer_create_index [-fcv] [-d input_directory] | ||
# -f override indices | ||
# -c compress input | ||
# -v enable verbose mode | ||
# -h display help | ||
# -d input_directory specify input directories. should contain .pfw or .pfw.gz files. | ||
|
||
date_echo() { | ||
dt=$(date '+%d/%m/%Y %H:%M:%S'); | ||
echo "$dt $@" | ||
} | ||
|
||
LOG_DIR=$PWD | ||
override=0 | ||
compressed=0 | ||
|
||
PPWD=$PWD | ||
|
||
function usage { | ||
echo "usage: $(basename $0) [-fcv] [-d input_directory]" | ||
echo " -f override indices" | ||
echo " -c compress input" | ||
echo " -v enable verbose mode" | ||
echo " -h display help" | ||
echo " -d input_directory specify input directories. should contain .pfw or .pfw.gz files." | ||
exit 1 | ||
} | ||
while getopts ':cvfd:h' opt; do | ||
case "$opt" in | ||
d) | ||
LOG_DIR="${OPTARG}" | ||
;; | ||
f) | ||
override=1 | ||
;; | ||
v) | ||
set -x | ||
;; | ||
c) | ||
compressed=1 | ||
;; | ||
h) | ||
usage | ||
exit 0 | ||
;; | ||
|
||
:) | ||
echo -e "option requires an argument.\n" | ||
usage | ||
exit 1 | ||
;; | ||
|
||
?) | ||
echo -e "Invalid command option.\n" | ||
usage | ||
exit 1 | ||
;; | ||
esac | ||
done | ||
shift "$(($OPTIND -1))" | ||
total=0 | ||
for file in *.pfw*; do total=1; break; done | ||
|
||
# pfw_count=$(ls -1 $LOG_DIR/*.pfw 2> /dev/null | wc -l) | ||
# gz_count=$(ls -1 $LOG_DIR/*.gz 2> /dev/null | wc -l) | ||
# total=$((pfw_count + gz_count)) | ||
if [ $total == 0 ]; then | ||
date_echo "The folder does not contain any pfw or pfw.gz files." | ||
exit 1 | ||
fi | ||
|
||
python -c "import zindex_py;" | ||
if [[ $? != 0 ]]; then | ||
date_echo "failure: $?: zindex not found. Please install zindex with: pip install zindex_py" | ||
exit 1 | ||
fi | ||
zindex_exec=$(python3 -c 'import zindex_py;import site; sp=site.getsitepackages()[0]; print(f"{sp}/zindex_py/bin/zindex")') | ||
if [ ! -f "${zindex_exec}" ]; then | ||
date_echo "failure: $?: zindex not found. Please install zindex with: pip install zindex_py" | ||
exit 1 | ||
else | ||
date_echo "Found zindex executable at ${zindex_exec}" | ||
fi | ||
|
||
if [ "$override" == "1" ]; then | ||
date_echo "Removing existing indices as override is passed." | ||
rm $LOG_DIR/*.zindex | ||
fi | ||
|
||
pushd $LOG_DIR | ||
JOBS_LIMIT=$(nproc --all) | ||
files=("$LOG_DIR"/*.pfw*) | ||
total=${#files[@]} | ||
# loop over logs | ||
for file_index in "${!files[@]}"; do | ||
file=${files[$file_index]} | ||
running_jobs=$(jobs -rp | wc -l) | ||
if [ $running_jobs -ge $JOBS_LIMIT ]; then | ||
date_echo "waiting for Running $running_jobs jobs to be less than $JOBS_LIMIT" | ||
while [ $running_jobs -ge $JOBS_LIMIT ] | ||
do | ||
sleep 1 | ||
running_jobs=$(jobs -rp | wc -l) | ||
done | ||
date_echo "Running $running_jobs jobs are now less than $JOBS_LIMIT" | ||
fi | ||
# only look at files | ||
if [ -f "$file" ]; then | ||
# calculate basename and copy files | ||
filename=$(basename -- "$file") | ||
ext="${filename##*.}" | ||
if [ "$ext" == "gz" ]; then | ||
{ | ||
# if file is gz get the name | ||
name=${filename%.pfw.gz} | ||
if [ ! -f "$file.zindex" ]; then | ||
$zindex_exec $file --index-file file:$file.zindex --regex 'id:([0-9]+)' --numeric --unique | ||
fi | ||
date_echo "Created index for file $file.gz $file_index of $total" | ||
} & | ||
else | ||
{ | ||
# if file is pfw get the name | ||
name=${filename%.pfw} | ||
if [ ! -f "$file.gz.zindex" ]; then | ||
if [ $compressed == 1 ]; then | ||
date_echo "Compressing file $name" | ||
gzip $file | ||
$zindex_exec $file.gz --index-file file:$file.gz.zindex --regex 'id:([0-9]+)' --numeric --unique | ||
fi | ||
fi | ||
date_echo "Created index for file $file.gz $file_index of $total" | ||
} & | ||
fi | ||
fi | ||
done | ||
popd | ||
wait | ||
date_echo Creation of index finished |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters