-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathguides_parser.py
170 lines (143 loc) · 5.31 KB
/
guides_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#!/usr/bin/env python
#
# Copyright (c) 2012 Appcelerator, Inc. All Rights Reserved.
# Licensed under the Apache Public License (version 2)
import os, shutil, json, sys, re, xml.dom.minidom, pprint, optparse
from BeautifulSoup import BeautifulSoup
parser = optparse.OptionParser()
parser.add_option("-i", "--input", dest="input",
help="input", metavar="FILE")
parser.add_option("-o", "--output", dest="output",
help="output", metavar="FILE")
(options, args) = parser.parse_args()
if options.output is None or options.input is None:
print "please provide --input and --output "
exit()
dom = xml.dom.minidom.parse(options.input) # parse an XML file by name
topics = []
dest = os.path.join(options.output)
try:
shutil.rmtree(dest)
except:
pass
try:
os.makedirs(dest)
except:
pass
# load the whitelist -- wiki links which are *not* the result of errors
try:
f = open(os.path.join(os.path.dirname(__file__), 'guides_wiki_whitelist'), 'r')
list = f.read()
wiki_whitelist = list.split()
f.close()
except Exception as e:
print "Threw an exception" + str(e)
pass
def node2obj(node):
if node.nodeType != 1:
return
if node.getAttribute('href').find('#') !=-1:
return
global topics, dest
shortname = re.sub('(.*)\.html', '\\1', node.getAttribute('href'))
dirname = shortname
dir = os.path.join(dest, 'guides', dirname)
try:
os.makedirs(dir)
except:
pass
shutil.copyfile(os.path.join(os.path.dirname(options.input), shortname+'.html'), os.path.join(dir, 'README.html'))
# do some convertion for imported guides
f = open(os.path.join(dir, 'README.html'), 'r')
soup = BeautifulSoup(f.read())
f.close()
# process all hyperlinks
for tag in soup.findAll('a'):
if not tag.has_key('href'):
continue
href = tag['href']
if href.find('http://') == 0 or href.find('https://') == 0:
# APIDoc links are included as full URLs
if re.search('apidoc/mobile/latest/', href):
# replace links to documentation
if re.search('apidoc/mobile/latest/.*-method?(\.html)?', href):
# method - http://developer.appcelerator.com/apidoc/mobile/latest/Titanium.Locale.getString-method.html
# #!/api/Titanium.UI.Button-method-getBackgroundGradient
tag['href'] = re.sub('.*/(.*)\.(.*)-method?(\.html)?', '#!/api/\\1-method-\\2', href)
elif re.search('apidoc/mobile/latest/.*-property?(\.html)?', href):
# property - http://developer.appcelerator.com/apidoc/mobile/latest/Titanium.Locale.getString-property.html
# #!/api/Titanium.UI.Button-property-backgroundGradient
tag['href'] = re.sub('.*/(.*)\.(.*)-property?(\.html)?', '#!/api/\\1-property-\\2', href)
elif re.search('apidoc/mobile/latest/.*-event?(\.html)?', href):
# event - http://developer.appcelerator.com/apidoc/mobile/latest/Titanium.Locale.getString-event.html
# #!/api/Titanium.UI.Button-event-click
tag['href'] = re.sub('.*/(.*)\.(.*)-event?(\.html)?', '#!/api/\\1-event-\\2', href)
elif re.search('apidoc/mobile/latest/.*', href):
# http://developer.appcelerator.com/apidoc/mobile/latest/Titanium.XML.DOMDocument-object.html
tag['href'] = re.sub(r'.*/([\w\.]*)(\-(module|object))?(\.html)?', '#!/api/\\1', href)
else:
print "Unprocessed " + href
pass
# Rewrite any absolute links to http://docs.appcelerator.com
# Only way we can insert a link to the videos in the wiki is to use a full URL
elif re.search('http://docs.appcelerator.com/titanium/.*#!.*', href):
tag['href'] = re.sub('http://docs.appcelerator.com/titanium/.*#!', '#!', href)
# In general, any link back to the wiki is a bad link.
# There are a very few exceptions, such as the community wiki.
elif href.startswith('https://wiki.appcelerator.org'):
whitelisted = False
for url in wiki_whitelist:
if href.startswith(url):
whitelisted = True
if not whitelisted:
print 'Warning: unconverted wiki link (' + href + ') while processing ' + dir
else:
# open external links in new windows/tabs
tag['target'] = '_blank'
pass
# otherwise, we're looking at an internal guides link.
else:
if href.find(' '):
# no matter what kind of link it is, replace spaces
href = href.replace(' ', '_')
tag[href] = href
if re.search('.*?\.html#', href):
# replace links to other guides with hash anchors
# removing hashes because they break jsduck navigation
tag['href'] = re.sub('(.*?)\.html#(.*)', '#!/guide/\\1-section-\\2', href)
elif re.search('.*?\.html', href):
# replace links to other guides
# removing hashes because they break jsduck navigation
tag['href'] = re.sub('(.*?)\.html(.*)', '#!/guide/\\1', href)
else:
#print href
pass
for tag in soup.findAll('link'):
if not tag.has_key('href'):
continue;
del tag['href']
f = open(os.path.join(dir, 'README.html'), 'w')
f.write(soup.renderContents())
f.close()
res = {
"name": dirname,
"title": node.getAttribute('label'),
"items": []
}
for node in node.childNodes:
res['items'].append(node2obj(node))
res['items'] = filter(None, res['items'])
if len(res['items']) == 0:
del res['items']
return res
for node in dom.documentElement.childNodes:
if node.nodeType != 1:
continue
obj = node2obj(node)
if obj == None:
continue
topics.append(obj)
# Ok, we've parsed document tree into tree structure.
f = open(os.path.join(dest, 'guides.json'), 'w')
f.write(json.dumps(topics, indent=4))
f.close()