-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
/
legal.py
230 lines (176 loc) · 6.87 KB
/
legal.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
# Copyright 2014-2023 the openage authors. See copying.md for legal info.
"""
Checks the legal headers of all files.
"""
from datetime import date
import re
from subprocess import Popen, PIPE
from .util import findfiles, readfile, writefile, has_ext, SHEBANG
OPENAGE_AUTHORS = (
"Copyright (?P<crstart>\\d{4})-(?P<crend>\\d{4}) the openage authors\\."
)
OPENAGE_AUTHORTEMPLATE = (
"Copyright {crstart}-{crend} the openage authors."
)
NATIVELEGALHEADER = re.compile(
"^"
# Allow shebang line, followed by an optional empty line.
"(" + SHEBANG + ")?"
# Next line must be the copyright line.
"(#|//) " + OPENAGE_AUTHORS + " See copying\\.md for legal info\\.\n"
)
THIRDPARTYLEGALHEADER = re.compile(
"^"
# 3rd-party copyright/license
"(#|//) This file (was (taken|adapted)|contains (data|code)) from .*\n"
"(#|//) Copyright \\d{4}-\\d{4} .*\n"
"(#|//) .*license.*\n"
# any number of lines containing further 3rd-party copyright info
"((#|//) .*\\n)*"
# the openage copyright
"(#|//) (Modifications|Other (data|code)|Everything else) " +
OPENAGE_AUTHORS + "\n"
"(#|//) See copying\\.md for further legal info\\.\n")
# Empty files (consisting of only comments) don't require a legal header.
EMPTYFILE = re.compile("^(((#|//) .*)?\n)*$")
# cython-generated files
CYTHONGENERATED = re.compile("^[^\\n]*(Generated by Cython |failed Cython compilation.)")
# all those files will be checked.
EXTENSIONS_REQUIRING_LEGAL_HEADERS = {
'.h', '.cpp', '.py', '.pyx', '.pxi', '.cmake', '.h.in',
'.cpp.in', '.py.in', '.h.template', '.cpp.template',
'.py.template', '.qml'
}
def get_git_change_year(filename):
""" Returns git-log's opinion on when the file was last changed. """
invocation = [
'git', 'log', '-1', '--format=%ad', '--date=short', '--no-merges', '--',
filename
]
with Popen(invocation, stdout=PIPE) as proc:
output = proc.communicate()[0].decode('utf-8', errors='ignore').strip()
if proc.returncode != 0 or not output:
# git doesn't know about the file
return None
return int(output[:4])
def match_legalheader(data):
"""
Tests whether data matches any of the regular expressions,
and returns a tuple of (matching header regex, match).
"""
for hdr in (NATIVELEGALHEADER,
THIRDPARTYLEGALHEADER,
EMPTYFILE,
CYTHONGENERATED):
match = re.match(hdr, data)
if match is not None:
return hdr, match
raise ValueError("no match found")
def create_year_fix(filename, file_content, expected_end_year,
found_start_year, headertype):
"""
Create a function that, when called, fixes the copyright header.
"""
# check if a fix can be created
if headertype not in {NATIVELEGALHEADER, THIRDPARTYLEGALHEADER}:
return None
def year_fix_function():
"""
Store the file with correct copyright years.
"""
fixed_file, success = re.subn(
OPENAGE_AUTHORS,
OPENAGE_AUTHORTEMPLATE.format(crstart=found_start_year,
crend=expected_end_year),
file_content
)
if not success:
raise ValueError("copyright year fix did not suceeed")
writefile(filename, fixed_file)
return f"Copyright for {filename} was fixed."
return year_fix_function
def test_headers(check_files, paths, git_change_years, third_party_files):
""" Tests all in-sourcefile legal headers. """
if not git_change_years:
print("warning: I won't check if the copyright matches the git history.")
print(" Run with --test-git-change-years to enable the check.")
# determine all uncommited files from git.
# those definitely need the current year in the copyright message.
with Popen(['git', 'diff', '--name-only', 'HEAD'], stdout=PIPE) as proc:
uncommited = set(proc.communicate()[0].decode('ascii').strip().split('\n'))
current_calendar_year = date.today().year
for filename in findfiles(paths, EXTENSIONS_REQUIRING_LEGAL_HEADERS):
try:
file_content = readfile(filename)
headertype, match = match_legalheader(file_content)
except ValueError:
yield (
"Legal header missing or invalid",
(filename + "\nSee copying.md for a template"),
None
)
continue
if headertype is THIRDPARTYLEGALHEADER:
third_party_files.add(filename)
try:
found_start_year = int(match.group('crstart'))
found_end_year = int(match.group('crend'))
except IndexError:
# this header type has/needs no copyright years
# (e.g. empty file)
continue
expected_end_year = None
if filename in uncommited:
expected_end_year = current_calendar_year
elif git_change_years:
if check_files is None or filename in check_files:
expected_end_year = get_git_change_year(filename)
if expected_end_year is None:
continue
if found_end_year != expected_end_year:
fix = create_year_fix(
filename,
file_content,
expected_end_year,
found_start_year,
headertype
)
yield (
"Bad copyright year",
(filename + "\n" +
f"\tExpected {expected_end_year}\n" +
f"\tFound {found_end_year}"),
fix
)
def find_issues(check_files, paths, git_change_years=False):
"""
Tests all source files for the required legal headers.
"""
third_party_files = set()
yield from test_headers(
check_files, paths, git_change_years, third_party_files)
# test whether all third-party files are listed in copying.md
listed_files = set()
for line in readfile('copying.md').split('\n'):
match = re.match("^ - `([^`]+)`.*$", line)
if not match:
continue
filename = match.group(1)
listed_files.add(filename)
# file listed, but has no 3rd-party header?
for filename in sorted(listed_files - third_party_files):
if has_ext(filename, EXTENSIONS_REQUIRING_LEGAL_HEADERS):
yield (
"third-party file listing issue",
(f"{filename}\n\tlisted in copying.md, but has no "
"third-party license header."),
None
)
# file has 3rd-party header, but is not listed?
for filename in sorted(third_party_files - listed_files):
yield (
"third-party file listing issue",
(f"{filename}\n\thas a third-party license header, but isn't "
"listed in copying.md"),
None
)