forked from GeorgesAlkhouri/virtuoso-import-docker
-
Notifications
You must be signed in to change notification settings - Fork 1
/
dump.sh
108 lines (90 loc) · 2.96 KB
/
dump.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/usr/bin/env bash
dt=$(date '+%d/%m/%Y %H:%M:%S')
# http://docs.openlinksw.com/virtuoso/rdfperfdumpandreloadgraphs/
# Definition of the isql connection to Virtuoso
bin="isql-vt"
host="virtuoso"
port=1111
user="dba"
password=${DBA_PASSWORD}
export_dir="${VIRTUOSO_DATA_DIR}"
# Wrap the execution of isql commands to receive the return code and output
run_virtuoso_cmd () {
VIRT_OUTPUT=`echo "$1" | "$bin" -H "$host" -S "$port" -U "$user" -P "$password" 2>&1`
VIRT_RETCODE=$?
if [[ $VIRT_RETCODE -eq 0 ]]; then
echo "$VIRT_OUTPUT" | tail -n+5 | perl -pe 's|^SQL> ||g'
return 0
else
echo -e "[ERROR] running the these commands in virtuoso:\n$1\nerror code: $VIRT_RETCODE\noutput:"
echo "$VIRT_OUTPUT"
let 'ret = VIRT_RETCODE + 128'
return $ret
fi
}
# Check if the virtuoso is up and running
# This is needed during the bootstrapping process in a docker setup
test_connection () {
if [[ -z $1 ]]; then
echo "[ERROR] missing argument: retry attempts"
exit 1
fi
t=$1
run_virtuoso_cmd 'status();'
while [[ $? -ne 0 ]] ;
do
echo -n "."
sleep 2
echo $t
let "t=$t-1"
if [ $t -eq 0 ]
then
echo "timeout"
return 2
fi
run_virtuoso_cmd 'status();'
done
}
cd "$export_dir"
echo "[INFO] waiting for store to come online"
: ${CONNECTION_ATTEMPTS:=60}
test_connection "${CONNECTION_ATTEMPTS}"
if [ $? -eq 2 ]; then
echo "[ERROR] store not reachable"
exit 1
fi
# Give some more seconds to the virtuoso to really accept updates
sleep 3
# First define the procedure
command=`cat /virtuoso/dump_one_graph.virtuoso>&1`
run_virtuoso_cmd "$command"
echo "[INFO] $dt Starting dump process...";
echo "[INFO] initializing named graphs from *.graph files"
declare -A serializer=( ["nt"]="ntriples" ["rdf"]="rdfxml" ["xml"]="rdfxml" ["ttl"]="turtle")
for ext in nt rdf ttl xml; do
for graph_file in *.${ext}.graph; do
if [ ! -f ${graph_file} ]; then
# skip when the loop iterates varbatom "*.xml.graph" files
continue
fi
graph=`head -n1 ${graph_file}`
echo "[INFO] dump graph $graph to ${graph_file%.graph}"
# Now use it to dump
run_virtuoso_cmd "dump_one_graph('${graph}', '${export_dir}/tmp_data_', 1000000000);"
exportfile="tmp_data_000001.ttl"
# Check if the repository contains a custom normalize script
if [ -f ./normalize.sh ]; then
mv $exportfile ${graph_file%.graph}
./normalize.sh ${graph_file%.graph}
else
if [ $ext -eq "nt" ]; then
rapper -q -i turtle -o ${serializer[$ext]} $exportfile | LC_ALL=C sort -u > ${graph_file%.graph}
else
rapper -q -i turtle -o ${serializer[$ext]} $exportfile > ${graph_file%.graph}
fi
rm "$exportfile"
fi
rm "$exportfile.graph"
done
done
echo "[INFO] dump done;"