-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse-batch.py
executable file
·39 lines (31 loc) · 1.09 KB
/
parse-batch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/usr/bin/python
import ucfparser as up
import saveparses as savep
import pdb
import sys
def main():
if len(sys.argv) != 3:
print 'arguments: corpus-file output-parses-file'
return
parser = up.ucfparser()
fin_name = sys.argv[1]
json_fin_name = sys.argv[2]
json_out = open(json_fin_name, 'w')
for parses in up.parse_batch(parser, fin_name, filter_chars=False):
for num, parse in enumerate(parses):
sentence, parsetree, scopes = parse
try:
json_repr = savep.parse_json_encode(parse)
json_out.write(json_repr + '\n')
json_out.flush()
parse = savep.parse_json_decode(json_repr)
sentence, parsetree, scopes = parse
except UnicodeEncodeError:
print '*** skipping write parse on unicode error'
except ValueError:
print '*** value error?'
print 'wrote parse %d to %s' % (num, json_fin_name)
print up.parse_str_repr(parse)
json_out.close()
if __name__ == "__main__":
main()