-
Notifications
You must be signed in to change notification settings - Fork 0
/
interwiki_graph.py
150 lines (136 loc) · 5.71 KB
/
interwiki_graph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
""" Module with the graphviz drawing calls """
#
# (C) Pywikipedia bot team, 2006-2012
#
# Distributed under the terms of the MIT license.
#
__version__ = '$Id$'
import threading
pydotfound = True
try:
import pydot
except ImportError:
pydotfound = False
import wikipedia as pywikibot
import config
class GraphImpossible(Exception):
"Drawing a graph is not possible on your system."
class GraphSavingThread(threading.Thread):
"""
Rendering a graph can take extremely long. We use
multithreading because of that.
TODO: Find out if several threads running in parallel
can slow down the system too much. Consider adding a
mechanism to kill a thread if it takes too long.
"""
def __init__(self, graph, originPage):
threading.Thread.__init__(self)
self.graph = graph
self.originPage = originPage
def run(self):
for format in config.interwiki_graph_formats:
filename = 'interwiki-graphs/' + getFilename(self.originPage,
format)
if self.graph.write(filename, prog = 'dot', format = format):
pywikibot.output(u'Graph saved as %s' % filename)
else:
pywikibot.output(u'Graph could not be saved as %s' % filename)
class GraphDrawer:
def __init__(self, subject):
if not pydotfound:
raise GraphImpossible, 'pydot is not installed.'
self.graph = None
self.subject = subject
def getLabel(self, page):
return (u'"%s:%s"' % (page.site.language(),
page.title())).encode('utf-8')
def addNode(self, page):
node = pydot.Node(self.getLabel(page), shape = 'rectangle')
node.set_URL("\"http://%s%s\""
% (page.site.hostname(),
page.site.get_address(page.urlname())))
node.set_style('filled')
node.set_fillcolor('white')
node.set_fontsize('11')
if not page.exists():
node.set_fillcolor('red')
elif page.isRedirectPage():
node.set_fillcolor('blue')
elif page.isDisambig():
node.set_fillcolor('orange')
if page.namespace() != self.subject.originPage.namespace():
node.set_color('green')
node.set_style('filled,bold')
# if we found more than one valid page for this language:
if len(filter(lambda p: p.site == page.site and p.exists() \
and not p.isRedirectPage(),
self.subject.foundIn.keys())) > 1:
# mark conflict by octagonal node
node.set_shape('octagon')
self.graph.add_node(node)
def addDirectedEdge(self, page, refPage):
# if page was given as a hint, referrers would be [None]
if refPage is not None:
sourceLabel = self.getLabel(refPage)
targetLabel = self.getLabel(page)
edge = pydot.Edge(sourceLabel, targetLabel)
oppositeEdge = self.graph.get_edge(targetLabel, sourceLabel)
if oppositeEdge:
if isinstance(oppositeEdge, list):
# bugfix for pydot >= 1.0.3
oppositeEdge = oppositeEdge[0]
#oppositeEdge.set_arrowtail('normal')
oppositeEdge.set_dir('both')
# workaround for bug [ 1722739 ]: prevent duplicate edges
# (it is unclear why duplicate edges occur)
elif self.graph.get_edge(sourceLabel, targetLabel):
pywikibot.output(
u'BUG: Tried to create duplicate edge from %s to %s'
% (refPage.title(asLink=True), page.title(asLink=True)))
# duplicate edges would be bad because then get_edge() would
# give a list of edges, not a single edge when we handle the
# opposite edge.
else:
# add edge
if refPage.site == page.site:
edge.set_color('blue')
elif not page.exists():
# mark dead links
edge.set_color('red')
elif refPage.isDisambig() != page.isDisambig():
# mark links between disambiguation and non-disambiguation
# pages
edge.set_color('orange')
if refPage.namespace() != page.namespace():
edge.set_color('green')
self.graph.add_edge(edge)
def saveGraphFile(self):
thread = GraphSavingThread(self.graph, self.subject.originPage)
thread.start()
def createGraph(self):
"""
See http://meta.wikimedia.org/wiki/Interwiki_graphs
"""
pywikibot.output(u'Preparing graph for %s'
% self.subject.originPage.title())
# create empty graph
self.graph = pydot.Dot()
# self.graph.set('concentrate', 'true')
for page in self.subject.foundIn.iterkeys():
# a node for each found page
self.addNode(page)
# mark start node by pointing there from a black dot.
firstLabel = self.getLabel(self.subject.originPage)
self.graph.add_node(pydot.Node('start', shape = 'point'))
self.graph.add_edge(pydot.Edge('start', firstLabel))
for page, referrers in self.subject.foundIn.iteritems():
for refPage in referrers:
self.addDirectedEdge(page, refPage)
self.saveGraphFile()
def getFilename(page, extension = None):
filename = '%s-%s-%s' % (page.site.family.name,
page.site.language(),
page.titleForFilename())
if extension:
filename += '.%s' % extension
return filename