forked from hugovk/peps
-
Notifications
You must be signed in to change notification settings - Fork 0
/
check-peps.py
executable file
·608 lines (478 loc) · 22.2 KB
/
check-peps.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
#!/usr/bin/env python3
# This file is placed in the public domain or under the
# CC0-1.0-Universal license, whichever is more permissive.
"""check-peps: Check PEPs for common mistakes.
Usage: check-peps [-d | --detailed] <PEP files...>
Only the PEPs specified are checked.
If none are specified, all PEPs are checked.
Use "--detailed" to show the contents of lines where errors were found.
"""
from __future__ import annotations
import datetime as dt
import re
import sys
from pathlib import Path
TYPE_CHECKING = False
if TYPE_CHECKING:
from collections.abc import Iterable, Iterator, KeysView, Sequence
from typing import TypeAlias
# (line number, warning message)
Message: TypeAlias = tuple[int, str]
MessageIterator: TypeAlias = Iterator[Message]
# get the directory with the PEP sources
ROOT_DIR = Path(__file__).resolve().parent
PEP_ROOT = ROOT_DIR / "peps"
# See PEP 12 for the order
# Note we retain "BDFL-Delegate"
ALL_HEADERS = (
"PEP",
"Title",
"Version",
"Last-Modified",
"Author",
"Sponsor",
"BDFL-Delegate", "PEP-Delegate",
"Discussions-To",
"Status",
"Type",
"Topic",
"Content-Type",
"Requires",
"Created",
"Python-Version",
"Post-History",
"Replaces",
"Superseded-By",
"Resolution",
)
REQUIRED_HEADERS = frozenset({"PEP", "Title", "Author", "Status", "Type", "Created"})
# See PEP 1 for the full list
ALL_STATUSES = frozenset({
"Accepted",
"Active",
"April Fool!",
"Deferred",
"Draft",
"Final",
"Provisional",
"Rejected",
"Superseded",
"Withdrawn",
})
# PEPs that are allowed to link directly to PEPs
SKIP_DIRECT_PEP_LINK_CHECK = frozenset({"0009", "0287", "0676", "0684", "8001"})
DEFAULT_FLAGS = re.ASCII | re.IGNORECASE # Insensitive latin
# any sequence of letters or '-', followed by a single ':' and a space or end of line
HEADER_PATTERN = re.compile(r"^([a-z\-]+):(?: |$)", DEFAULT_FLAGS)
# any sequence of unicode letters or legal special characters
NAME_PATTERN = re.compile(r"(?:[^\W\d_]|[ ',\-.])+(?: |$)")
# any sequence of ASCII letters, digits, or legal special characters
EMAIL_LOCAL_PART_PATTERN = re.compile(r"[\w!#$%&'*+\-/=?^{|}~.]+", DEFAULT_FLAGS)
DISCOURSE_THREAD_PATTERN = re.compile(r"([\w\-]+/)?\d+", DEFAULT_FLAGS)
DISCOURSE_POST_PATTERN = re.compile(r"([\w\-]+/)?\d+(/\d+)?", DEFAULT_FLAGS)
MAILMAN_2_PATTERN = re.compile(r"[\w\-]+/\d{4}-[a-z]+/\d+\.html", DEFAULT_FLAGS)
MAILMAN_3_THREAD_PATTERN = re.compile(r"[\w\-]+@python\.org/thread/[a-z0-9]+/?", DEFAULT_FLAGS)
MAILMAN_3_MESSAGE_PATTERN = re.compile(r"[\w\-]+@python\.org/message/[a-z0-9]+/?(#[a-z0-9]+)?", DEFAULT_FLAGS)
# Controlled by the "--detailed" flag
DETAILED_ERRORS = False
def check(filenames: Sequence[str] = (), /) -> int:
"""The main entry-point."""
if filenames:
filenames = map(Path, filenames)
else:
filenames = PEP_ROOT.glob("pep-????.rst")
if (count := sum(map(check_file, filenames))) > 0:
s = "s" * (count != 1)
print(f"check-peps failed: {count} error{s}", file=sys.stderr)
return 1
return 0
def check_file(filename: Path, /) -> int:
filename = filename.resolve()
try:
content = filename.read_text(encoding="utf-8")
except FileNotFoundError:
return _output_error(filename, [""], [(0, "Could not read PEP!")])
else:
lines = content.splitlines()
return _output_error(filename, lines, check_peps(filename, lines))
def check_peps(filename: Path, lines: Sequence[str], /) -> MessageIterator:
yield from check_headers(lines)
for line_num, line in enumerate(lines, start=1):
if filename.stem.removeprefix("pep-") in SKIP_DIRECT_PEP_LINK_CHECK:
continue
yield from check_direct_links(line_num, line.lstrip())
def check_headers(lines: Sequence[str], /) -> MessageIterator:
yield from _validate_pep_number(next(iter(lines), ""))
found_headers = {}
line_num = 0
for line_num, line in enumerate(lines, start=1):
if line.strip() == "":
headers_end_line_num = line_num
break
if match := HEADER_PATTERN.match(line):
header = match[1]
if header in ALL_HEADERS:
if header not in found_headers:
found_headers[match[1]] = line_num
else:
yield line_num, f"Must not have duplicate header: {header} "
else:
yield line_num, f"Must not have invalid header: {header}"
else:
headers_end_line_num = line_num
yield from _validate_required_headers(found_headers.keys())
shifted_line_nums = list(found_headers.values())[1:]
for i, (header, line_num) in enumerate(found_headers.items()):
start = line_num - 1
end = headers_end_line_num - 1
if i < len(found_headers) - 1:
end = shifted_line_nums[i] - 1
remainder = "\n".join(lines[start:end]).removeprefix(f"{header}:")
if remainder != "":
if remainder[0] not in {" ", "\n"}:
yield line_num, f"Headers must have a space after the colon: {header}"
remainder = remainder.lstrip()
yield from _validate_header(header, line_num, remainder)
def _validate_header(header: str, line_num: int, content: str) -> MessageIterator:
if header == "Title":
yield from _validate_title(line_num, content)
elif header == "Author":
yield from _validate_author(line_num, content)
elif header == "Sponsor":
yield from _validate_sponsor(line_num, content)
elif header in {"BDFL-Delegate", "PEP-Delegate"}:
yield from _validate_delegate(line_num, content)
elif header == "Discussions-To":
yield from _validate_discussions_to(line_num, content)
elif header == "Status":
yield from _validate_status(line_num, content)
elif header == "Type":
yield from _validate_type(line_num, content)
elif header == "Topic":
yield from _validate_topic(line_num, content)
elif header == "Content-Type":
yield from _validate_content_type(line_num, content)
elif header in {"Requires", "Replaces", "Superseded-By"}:
yield from _validate_pep_references(line_num, content)
elif header == "Created":
yield from _validate_created(line_num, content)
elif header == "Python-Version":
yield from _validate_python_version(line_num, content)
elif header == "Post-History":
yield from _validate_post_history(line_num, content)
elif header == "Resolution":
yield from _validate_resolution(line_num, content)
def check_direct_links(line_num: int, line: str) -> MessageIterator:
"""Check that PEPs and RFCs aren't linked directly"""
line = line.lower()
if "dev/peps/pep-" in line or "peps.python.org/pep-" in line:
yield line_num, "Use the :pep:`NNN` role to refer to PEPs"
if "rfc-editor.org/rfc/" in line or "ietf.org/doc/html/rfc" in line:
yield line_num, "Use the :rfc:`NNN` role to refer to RFCs"
def _output_error(filename: Path, lines: Sequence[str], errors: Iterable[Message]) -> int:
relative_filename = filename.relative_to(ROOT_DIR)
err_count = 0
for line_num, msg in errors:
err_count += 1
print(f"{relative_filename}:{line_num}: {msg}")
if not DETAILED_ERRORS:
continue
line = lines[line_num - 1]
print(" |")
print(f"{line_num: >4} | '{line}'")
print(" |")
return err_count
###########################
# PEP Header Validators #
###########################
def _validate_required_headers(found_headers: KeysView[str]) -> MessageIterator:
"""PEPs must have all required headers, in the PEP 12 order"""
if missing := REQUIRED_HEADERS.difference(found_headers):
for missing_header in sorted(missing, key=ALL_HEADERS.index):
yield 1, f"Must have required header: {missing_header}"
ordered_headers = sorted(found_headers, key=ALL_HEADERS.index)
if list(found_headers) != ordered_headers:
order_str = ", ".join(ordered_headers)
yield 1, "Headers must be in PEP 12 order. Correct order: " + order_str
def _validate_pep_number(line: str) -> MessageIterator:
"""'PEP' header must be a number 1-9999"""
if not line.startswith("PEP: "):
yield 1, "PEP must begin with the 'PEP:' header"
return
pep_number = line.removeprefix("PEP: ").lstrip()
yield from _pep_num(1, pep_number, "'PEP:' header")
def _validate_title(line_num: int, line: str) -> MessageIterator:
"""'Title' must be 1-79 characters"""
if len(line) == 0:
yield line_num, "PEP must have a title"
elif len(line) > 79:
yield line_num, "PEP title must be less than 80 characters"
def _validate_author(line_num: int, body: str) -> MessageIterator:
"""'Author' must be list of 'Name <[email protected]>, …'"""
lines = body.split("\n")
for offset, line in enumerate(lines):
if offset >= 1 and line[:9].isspace():
# Checks for:
# Author: Alice
# Bob
# ^^^^
# Note that len("Author: ") == 8
yield line_num + offset, "Author line must not be over-indented"
if offset < len(lines) - 1:
if not line.endswith(","):
yield line_num + offset, "Author continuation lines must end with a comma"
for part in line.removesuffix(",").split(", "):
yield from _email(line_num + offset, part, "Author")
def _validate_sponsor(line_num: int, line: str) -> MessageIterator:
"""'Sponsor' must have format 'Name <[email protected]>'"""
yield from _email(line_num, line, "Sponsor")
def _validate_delegate(line_num: int, line: str) -> MessageIterator:
"""'Delegate' must have format 'Name <[email protected]>'"""
if line == "":
return
# PEP 451
if ", " in line:
for part in line.removesuffix(",").split(", "):
yield from _email(line_num, part, "Delegate")
return
yield from _email(line_num, line, "Delegate")
def _validate_discussions_to(line_num: int, line: str) -> MessageIterator:
"""'Discussions-To' must be a thread URL"""
yield from _thread(line_num, line, "Discussions-To", discussions_to=True)
if line.startswith("https://"):
return
for suffix in "@python.org", "@googlegroups.com":
if line.endswith(suffix):
remainder = line.removesuffix(suffix)
if re.fullmatch(r"[\w\-]+", remainder) is None:
yield line_num, "Discussions-To must be a valid mailing list"
return
yield line_num, "Discussions-To must be a valid thread URL or mailing list"
def _validate_status(line_num: int, line: str) -> MessageIterator:
"""'Status' must be a valid PEP status"""
if line not in ALL_STATUSES:
yield line_num, "Status must be a valid PEP status"
def _validate_type(line_num: int, line: str) -> MessageIterator:
"""'Type' must be a valid PEP type"""
if line not in {"Standards Track", "Informational", "Process"}:
yield line_num, "Type must be a valid PEP type"
def _validate_topic(line_num: int, line: str) -> MessageIterator:
"""'Topic' must be for a valid sub-index"""
topics = line.split(", ")
unique_topics = set(topics)
if len(topics) > len(unique_topics):
yield line_num, "Topic must not contain duplicates"
if unique_topics - {"Governance", "Packaging", "Typing", "Release"}:
if not all(map(str.istitle, unique_topics)):
yield line_num, "Topic must be properly capitalised (Title Case)"
if unique_topics - {"governance", "packaging", "typing", "release"}:
yield line_num, "Topic must be for a valid sub-index"
if sorted(topics) != topics:
yield line_num, "Topic must be sorted lexicographically"
def _validate_content_type(line_num: int, line: str) -> MessageIterator:
"""'Content-Type' must be 'text/x-rst'"""
if line != "text/x-rst":
yield line_num, "Content-Type must be 'text/x-rst'"
def _validate_pep_references(line_num: int, line: str) -> MessageIterator:
"""`Requires`/`Replaces`/`Superseded-By` must be 'NNN' PEP IDs"""
line = line.removesuffix(",").rstrip()
if line.count(", ") != line.count(","):
yield line_num, "PEP references must be separated by comma-spaces (', ')"
return
references = line.split(", ")
for reference in references:
yield from _pep_num(line_num, reference, "PEP reference")
def _validate_created(line_num: int, line: str) -> MessageIterator:
"""'Created' must be a 'DD-mmm-YYYY' date"""
yield from _date(line_num, line, "Created")
def _validate_python_version(line_num: int, line: str) -> MessageIterator:
"""'Python-Version' must be an ``X.Y[.Z]`` version"""
versions = line.split(", ")
for version in versions:
if version.count(".") not in {1, 2}:
yield line_num, f"Python-Version must have two or three segments: {version}"
continue
try:
major, minor, micro = version.split(".", 2)
except ValueError:
major, minor = version.split(".", 1)
micro = ""
if major not in "123":
yield line_num, f"Python-Version major part must be 1, 2, or 3: {version}"
if not _is_digits(minor) and minor != "x":
yield line_num, f"Python-Version minor part must be numeric: {version}"
elif minor != "0" and minor[0] == "0":
yield line_num, f"Python-Version minor part must not have leading zeros: {version}"
if micro == "":
return
if minor == "x":
yield line_num, f"Python-Version micro part must be empty if minor part is 'x': {version}"
elif micro[0] == "0":
yield line_num, f"Python-Version micro part must not have leading zeros: {version}"
elif not _is_digits(micro):
yield line_num, f"Python-Version micro part must be numeric: {version}"
def _validate_post_history(line_num: int, body: str) -> MessageIterator:
"""'Post-History' must be '`DD-mmm-YYYY <Thread URL>`__, …'"""
if body == "":
return
for offset, line in enumerate(body.removesuffix(",").split("\n"), start=line_num):
for post in line.removesuffix(",").strip().split(", "):
prefix, postfix = (post.startswith("`"), post.endswith(">`__"))
if not prefix and not postfix:
yield from _date(offset, post, "Post-History")
elif prefix and postfix:
post_date, post_url = post[1:-4].split(" <")
yield from _date(offset, post_date, "Post-History")
yield from _thread(offset, post_url, "Post-History")
else:
yield offset, f"post line must be a date or both start with “`” and end with “>`__”"
def _validate_resolution(line_num: int, line: str) -> MessageIterator:
"""'Resolution' must be a direct thread/message URL"""
yield from _thread(line_num, line, "Resolution", allow_message=True)
########################
# Validation Helpers #
########################
def _pep_num(line_num: int, pep_number: str, prefix: str) -> MessageIterator:
if pep_number == "":
yield line_num, f"{prefix} must not be blank: {pep_number!r}"
return
if pep_number.startswith("0") and pep_number != "0":
yield line_num, f"{prefix} must not contain leading zeros: {pep_number!r}"
if not _is_digits(pep_number):
yield line_num, f"{prefix} must be numeric: {pep_number!r}"
elif not 0 <= int(pep_number) <= 9999:
yield line_num, f"{prefix} must be between 0 and 9999: {pep_number!r}"
def _is_digits(string: str) -> bool:
"""Match a string of ASCII digits ([0-9]+)."""
return string.isascii() and string.isdigit()
def _email(line_num: int, author_email: str, prefix: str) -> MessageIterator:
author_email = author_email.strip()
if author_email.count("<") > 1:
msg = f"{prefix} entries must not contain multiple '<': {author_email!r}"
yield line_num, msg
if author_email.count(">") > 1:
msg = f"{prefix} entries must not contain multiple '>': {author_email!r}"
yield line_num, msg
if author_email.count("@") > 1:
msg = f"{prefix} entries must not contain multiple '@': {author_email!r}"
yield line_num, msg
author = author_email.split("<", 1)[0].rstrip()
if NAME_PATTERN.fullmatch(author) is None:
msg = f"{prefix} entries must begin with a valid 'Name': {author_email!r}"
yield line_num, msg
return
email_text = author_email.removeprefix(author)
if not email_text:
# Does not have the optional email part
return
if not email_text.startswith(" <") or not email_text.endswith(">"):
msg = f"{prefix} entries must be formatted as 'Name <[email protected]>': {author_email!r}"
yield line_num, msg
email_text = email_text.removeprefix(" <").removesuffix(">")
if "@" in email_text:
local, domain = email_text.rsplit("@", 1)
elif " at " in email_text:
local, domain = email_text.rsplit(" at ", 1)
else:
yield line_num, f"{prefix} entries must contain a valid email address: {author_email!r}"
return
if EMAIL_LOCAL_PART_PATTERN.fullmatch(local) is None or _invalid_domain(domain):
yield line_num, f"{prefix} entries must contain a valid email address: {author_email!r}"
def _invalid_domain(domain_part: str) -> bool:
*labels, root = domain_part.split(".")
for label in labels:
if not label.replace("-", "").isalnum():
return True
return not root.isalnum() or not root.isascii()
def _thread(line_num: int, url: str, prefix: str, *, allow_message: bool = False, discussions_to: bool = False) -> MessageIterator:
if allow_message and discussions_to:
msg = "allow_message and discussions_to cannot both be True"
raise ValueError(msg)
msg = f"{prefix} must be a valid thread URL"
if not url.startswith("https://"):
if not discussions_to:
yield line_num, msg
return
if url.startswith("https://discuss.python.org/t/"):
remainder = url.removeprefix("https://discuss.python.org/t/").removesuffix("/")
# Discussions-To links must be the thread itself, not a post
if discussions_to:
# The equivalent pattern is similar to '([\w\-]+/)?\d+',
# but the topic name must contain a non-numeric character
# We use ``str.rpartition`` as the topic name is optional
topic_name, _, topic_id = remainder.rpartition("/")
if topic_name == '' and _is_digits(topic_id):
return
topic_name = topic_name.replace("-", "0").replace("_", "0")
# the topic name must not be entirely numeric
valid_topic_name = not _is_digits(topic_name) and topic_name.isalnum()
if valid_topic_name and _is_digits(topic_id):
return
else:
# The equivalent pattern is similar to '([\w\-]+/)?\d+(/\d+)?',
# but the topic name must contain a non-numeric character
if remainder.count("/") == 2:
# When there are three parts, the URL must be "topic-name/topic-id/post-id".
topic_name, topic_id, post_id = remainder.rsplit("/", 2)
topic_name = topic_name.replace("-", "0").replace("_", "0")
valid_topic_name = not _is_digits(topic_name) and topic_name.isalnum()
if valid_topic_name and _is_digits(topic_id) and _is_digits(post_id):
# the topic name must not be entirely numeric
return
elif remainder.count("/") == 1:
# When there are only two parts, there's an ambiguity between
# "topic-name/topic-id" and "topic-id/post-id".
# We disambiguate by checking if the LHS is a valid name and
# the RHS is a valid topic ID (for the former),
# and then if both the LHS and RHS are valid IDs (for the latter).
left, right = remainder.rsplit("/")
left = left.replace("-", "0").replace("_", "0")
# the topic name must not be entirely numeric
left_is_name = not _is_digits(left) and left.isalnum()
if left_is_name and _is_digits(right):
return
elif _is_digits(left) and _is_digits(right):
return
else:
# When there's only one part, it must be a valid topic ID.
if _is_digits(remainder):
return
if url.startswith("https://mail.python.org/pipermail/"):
remainder = url.removeprefix("https://mail.python.org/pipermail/")
if MAILMAN_2_PATTERN.fullmatch(remainder) is not None:
return
if url.startswith("https://mail.python.org/archives/list/"):
remainder = url.removeprefix("https://mail.python.org/archives/list/")
if allow_message and MAILMAN_3_MESSAGE_PATTERN.fullmatch(remainder) is not None:
return
if MAILMAN_3_THREAD_PATTERN.fullmatch(remainder) is not None:
return
yield line_num, msg
def _date(line_num: int, date_str: str, prefix: str) -> MessageIterator:
try:
parsed_date = dt.datetime.strptime(date_str, "%d-%b-%Y")
except ValueError:
yield line_num, f"{prefix} must be a 'DD-mmm-YYYY' date: {date_str!r}"
return
else:
if date_str[1] == "-": # Date must be zero-padded
yield line_num, f"{prefix} must be a 'DD-mmm-YYYY' date: {date_str!r}"
return
if parsed_date.year < 1990:
yield line_num, f"{prefix} must not be before Python was invented: {date_str!r}"
if parsed_date > (dt.datetime.now() + dt.timedelta(days=14)):
yield line_num, f"{prefix} must not be in the future: {date_str!r}"
if __name__ == "__main__":
if {"-h", "--help", "-?"}.intersection(sys.argv[1:]):
print(__doc__, file=sys.stderr)
raise SystemExit(0)
files = {}
for arg in sys.argv[1:]:
if not arg.startswith("-"):
files[arg] = None
elif arg in {"-d", "--detailed"}:
DETAILED_ERRORS = True
else:
print(f"Unknown option: {arg!r}", file=sys.stderr)
raise SystemExit(1)
raise SystemExit(check(files))