forked from nnkennard/iclr-discourse-dataset
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexample.py
More file actions
47 lines (36 loc) · 1.27 KB
/
example.py
File metadata and controls
47 lines (36 loc) · 1.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import argparse
import sqlite3
import sys
import lib.openreview_db as ordb
parser = argparse.ArgumentParser(
description='Load OpenReview data from a sqlite3 database.')
parser.add_argument('-d', '--dbfile', default="db/or.db",
type=str, help='path to database file')
parser.add_argument('-n', '--numexamples', default=-1,
type=int, help='number of examples to dump as string. If -1, dump all')
def main():
args = parser.parse_args()
conn = ordb.create_connection(args.dbfile)
if conn is not None:
cur = conn.cursor()
if args.numexamples == -1:
get_rows_command = "SELECT * FROM traindev_pairs WHERE split=?"
else:
get_rows_command = ("SELECT * FROM traindev_pairs WHERE "
"split=? LIMIT {0}").format(args.numexamples)
cur.execute(get_rows_command, ("train",))
rows = cur.fetchall()
for row in rows:
cur.execute("SELECT * FROM traindev WHERE sid=?",
(row["review_sid"],))
crunched_rows = ordb.crunch_text_rows(cur.fetchall())
for note_id, chunks in crunched_rows.items():
print(note_id)
print("-" * 80)
for chunk in chunks:
for sentence in chunk:
print(" ".join(sentence))
print()
print("*" * 80)
if __name__ == "__main__":
main()