-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathpreprocess.sh
26 lines (18 loc) · 877 Bytes
/
preprocess.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#!/bin/bash
NUMBER_OF_PROCESSORS=$(nproc --all)
BASE_DIR=$(pwd)
FILES_DIR=$BASE_DIR/data/files
echo "Number of processors: $NUMBER_OF_PROCESSORS"
echo "Files directory: $FILES_DIR"
echo "Creating Elasticsearch index..."
node utils/ElasticSearchIndexCreater
echo "Indexing CSV files..."
cd $FILES_DIR && find . -name "*.csv" -print0 | xargs -r -0 -n 1 -P $NUMBER_OF_PROCESSORS -- sh -c 'timeout 2400 node ../../utils/ElasticSearchCSVIndexer.js "$@"; true' --
echo "Computing histograms..."
cd $FILES_DIR && find . -name "*.csv" -print0 | xargs -r -0 -n 1 -P $NUMBER_OF_PROCESSORS -- sh -c 'timeout 2400 python3 ../../utils/HistogramGenerator.py "$@"; true' --
echo "Computing key column overlap..."
python3 utils/KeyColumnOverlap.py
echo "Finding unionable tables..."
python3 utils/UnionableTableFinder.py
echo "Creating MongoDB index..."
node utils/MongoDBIndexCreator