forked from machinekit/machinekit-docs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchecklinks.py
127 lines (106 loc) · 3.38 KB
/
checklinks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import os, sys, sgmllib, cookielib, urllib, htmlentitydefs
if len(sys.argv) > 1:
ref = sys.argv[1]
else:
ref = "../html/gcode.html"
if len(sys.argv) > 2:
targets = sys.argv[2:]
else:
targets = None
def get(attr, attrs, default=""):
attr = attr.lower()
for k, v in attrs:
if k.lower() == attr: return v
return default
class MetaHandler:
def do_meta(self, attrs):
equiv = get("http-equiv", attrs)
content = get("content", attrs)
if equiv != "content-type": return
attrs = cookielib.split_header_words([content])[0]
encoding = get("charset", attrs)
if encoding == "ASCII": encoding = "ISO-8859-1"
if encoding: self.encoding = encoding
class get_refs(sgmllib.SGMLParser, MetaHandler):
entitydefs = htmlentitydefs.entitydefs
def __init__(self, verbose=0):
sgmllib.SGMLParser.__init__(self, verbose)
self.refs = set()
self.encoding = None
def do_a(self, attrs):
href = get('href', attrs)
if self.encoding:
href = href.decode(self.encoding)
href = urllib.unquote(href)
self.refs.add(href)
class get_anchors(sgmllib.SGMLParser, MetaHandler):
entitydefs = htmlentitydefs.entitydefs
def __init__(self, verbose=0):
sgmllib.SGMLParser.__init__(self, verbose)
self.anchors = set()
self.encoding = None
def unknown_starttag(self, tag, attrs):
id = get('id', attrs)
if id:
self.do_a([('name', id)])
def unknown_endtag(self, tag): pass
def do_a(self, attrs):
name = get('name', attrs, get('id', attrs))
if self.encoding:
name = name.decode(self.encoding)
name = urllib.unquote(name)
if name:
self.anchors.add(name)
_anchors = {}
def get_anchors_cached(filename):
if filename not in _anchors:
a = get_anchors()
a.feed(open(filename).read())
_anchors[filename] = a.anchors
return _anchors[filename]
def resolve_file(src, target):
if "#" in target:
a, b = target.split("#", 1)
else:
a, b = target, None
a = a or src
return os.path.join(os.path.dirname(ref), a), b
def resolve(target, anchor):
if not anchor: return True
anchors = get_anchors_cached(target)
return anchor in anchors
refs = get_refs()
refs.feed(open(ref).read())
refs = refs.refs
missing_anchor = set()
missing_file = set()
unlisted_targets = set()
good = set()
for r in refs:
target, anchor = resolve_file(ref, r)
if targets and not target in targets:
unlisted_targets.add(target)
elif not os.path.exists(target):
missing_file.add(r)
elif not resolve(target, anchor):
missing_anchor.add(r)
else:
good.add(r)
if missing_file:
print "Files linked to in %s but could not be found:" % (
os.path.basename(ref),)
for i in sorted(missing_file):
print "\t%r" % i
if missing_anchor:
print "Anchors used in %s but not defined in linked file:" % (
os.path.basename(ref),)
for i in sorted(missing_anchor):
print "\t%r" % i
if unlisted_targets:
print "Links to files not listed as targets:"
for i in sorted(unlisted_targets):
print "\t%r" % i
print "If all link targets are not listed in the Submakefile, then the results of this program is unreliable."
print "Good links: %d/%d" % (len(good), len(refs))
if missing_anchor or missing_file or unlisted_targets:
raise SystemExit, 1