-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakefile
55 lines (40 loc) · 1.91 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
K=900
ORIG_FILES=$(wildcard orig/*.json)
SIMPLE_JSON_FILES=$(wildcard simple/*.json)
install:
pip install -r requirements.txt
mkdir -p orig simple spotlight-output spotlight-output-patches spotlight-postproc stats
all: download parse annotate patch
download:
python src/download_abstracts.py -q zika --retmax $(K)
python src/download_abstracts.py -q microcephaly --retmax $(K)
#python src/download_abstracts.py -q "aedes aegypti" --retmax $(K)
python src/download_abstracts.py -q pyriproxyfen --retmax $(K)
# parsing depends on downloading (currently jointly executed)
parse:download
# currently the parsing is done at download time
# annotating depends on downloading, parsing and running annotation on every parsed file
annotate:$(SIMPLE_JSON_FILES:simple/%.json=spotlight-output/%.json)
spotlight-output/%.json:simple/%.json
python src/annotate.py $< $@
# extracting entities from the annotations
entities:$(SIMPLE_JSON_FILES:simple/%.json=spotlight-postproc/%.eset)
spotlight-postproc/%.eset:spotlight-output/%.json
cat $< | jq -r ".[].URI" | sort -u > $@
#TODO patching depends on downloading, parsing, annotating and manually generated patch files to be applied in order
patch: annotate
#TODO implement patching
stats/entity_counts.csv:entities
echo "age,population" > $@
#TODO this is currently Mac-specific. #FIXME
cat spotlight-postproc/*.eset | sort | grep -v "," | uniq -c | sort -nr | sed -E "s|[ ]+([0-9]+) http://dbpedia.org/resource/(.+)|\2,\1|" | head -10 >> $@
coocs:$(SIMPLE_JSON_FILES:simple/%.json=spotlight-postproc/%.coocs)
spotlight-postproc/%.coocs:spotlight-postproc/%.eset
python src/coocs.py $< $@
stats/cooc_counts.csv:$(SIMPLE_JSON_FILES:simple/%.json=spotlight-postproc/%.coocs)
cat spotlight-postproc/*.coocs | sort | uniq -c | sort -nr > $@
stats/cooc_counts.json:stats/cooc_counts.csv
python src/coocs2json.py $< $@ 200
clean:
mkdir -p bak
mv errors.log bak/errors.log.`date +%F_%R`