-
Notifications
You must be signed in to change notification settings - Fork 0
/
irsys
executable file
·79 lines (63 loc) · 2.77 KB
/
irsys
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/python
from helpers import *
import sys
import argparse
##Argument Parser Options
parser = argparse.ArgumentParser()
parser.add_argument("-j", "--job", type = str , choices=['index', 'query', 'session'] , help="Specify the job you need to run.")
parser.add_argument("-p", "--path", help="Path of file or directory to index")
parser.add_argument("-q", "--query", help="Query string to run (for single query mode) ")
parser.add_argument("-e", "--eval",action='store_true', help="Make res file and evaluate using terrier")
parser.add_argument("-qid", "--qid" ,type = int,choices=[6, 7], help="Pick one of the predefined queries to evaluate using terrier")
parser.add_argument("-s", "--stem" ,type = str,choices=['porter', 'lancaster'], help="Pick one of the available stemmers for indexing")
parser.add_argument("-l", "--lem" ,type = str,choices=['wordnet'], help="Pick wither to use wordnet for lemmatization (maybe add other options in the future)")
parser.add_argument("-rs", "--rs" ,action='store_true', help="Remove Stopwords , default is False")
parser.add_argument("-m", "--model" ,type=str,choices=['tfidf', 'bm25' , 'boolean'], help="Choose Retrieval Model")
args = parser.parse_args()
##Predefined Queries
predefined_queries = {
'6' : "sustainable ecosystems",
'7' : 'air guitar textile sensors'
}
if args.qid:
model = 'tfidf'
if args.model:
model = args.model
print model
query = predefined_queries[str(args.qid)];
print "Evaluating Query : " + query
print evaluate(query,args.qid,model)
elif args.job == "index":
if not args.path:
args.path = raw_input("Please enter a file path or directory to index :")
stemmer = "porter"
lem = "none"
if args.stem :
stemmer = args.stem
else:
stemmer = 'none'
if args.lem :
lem = args.lem
if lem != 'none':
stemmer = 'none'
index = index(args.path,stemmer,lem ,args.rs)
print_statistics(index,'of')
elif args.job == "query":
if not args.query:
args.query = raw_input("Please enter a Query to perform :")
model = 'tfidf'
if args.model:
model = args.model
index = load_index()
print run_query(args.query , index , model)
else:
index = load_index()
while True:
query = raw_input("Enter Query :")
if query == "exit()":
break
result = run_query(query , index)
for x in result[0:min(10,len(result))]:
print x
print "=================================================================="
print "=================================================================="