This repository has been archived by the owner on Feb 2, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
parsertest.py
executable file
·68 lines (57 loc) · 2.54 KB
/
parsertest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from kindleclippingsparser import KindleClippingsParser
from cgi import escape
from sys import argv, stdin
pars = KindleClippingsParser(stdin)
foo = pars.parse()
results = dict()
if len(argv) > 1 and argv[1]:
search_title = unicode()
search_title=argv[1].decode("utf-8")
else:
search_title = None
def select_query(a, search_title):
if search_title:
from Levenshtein import ratio as ratio
return (ratio(a['title'].encode("utf-8"),
search_title.encode("utf-8")) > 0.50) \
and a['text'] and a['author'] \
and a['location'] and a['type'] == "Highlight"
else:
return a['title'] and a['text'] \
and a['author'] and a['location'] \
and a['type'] == "Highlight"
for a in foo:
if select_query(a, search_title):
if not a.get('author').encode("utf-8") in results:
results[a.get('author').encode("utf-8")] = {a['title'].encode("utf-8"):
[(a['text'].encode("utf-8"),
a['location'].encode("utf-8"))]}
elif not a['title'].encode("utf-8") in results[a.get('author').encode("utf-8")]:
results[a['author'].encode("utf-8")][a['title'.encode("utf-8")]] = [(a['text'].encode("utf-8"),
a['location'].encode("utf-8"))]
else:
results[a.get('author').encode("utf-8")][a.get('title').encode("utf-8")] += [(a['text'].encode("utf-8"),
a['location'].encode("utf-8"))]
print '''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<title>Clippings from Kindle</title>
</head>
<body>
'''
for author, work in results.iteritems():
#print author
print "<h2>%s</h2>" % escape(author)
for title, location_texts in work.iteritems():
print "<h3>%s</h3>" % escape(title)
for text, location in location_texts:
print "<blockquote><p>%s (%s)</p></blockquote>" % (escape(text), escape(location))
print "</body></html>"
# print "<blockquote>%s (%s, »%s«, %s)</blockquote>\n" % (a['text'].encode("utf-8"),
# a['author'].encode("utf-8"),
# a['title'].encode("utf-8"),
# a['location'].encode("utf-8"))