Skip to content
Merged
4 changes: 3 additions & 1 deletion book.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,15 @@ suppress_processing = [
"docs/ART.md",
"docs/CONTRIBUTORS.md",
"docs/GRANT-ACKNOWLEDGMENTS.md",
"docs/HOME.md",
"docs/MAINTAINERS.md",
"docs/PROJECTS.md",
"docs/STATEMENT-OF-INCLUSIVITY.md",
"docs/SUMMARY.md",
"docs/VISUALIZATION.md"
]
sitewide_changes = [
"docs/HOME.md"
]

[preprocessor.concepts]
command = "python3 ./scripts/preprocessor_concepts.py"
Expand Down
95 changes: 64 additions & 31 deletions scripts/preprocessor_git_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,50 @@ def nobreak_span(text):
return f'<span class="prefer-nobreak">{text}</span>'


def get_recent_changes(contributors, *args):
"""
Returns a markdown list of the most recent RECENT_CHANGES_COUNT commits.
"""
recent_changes_output = subprocess.run([
'git', 'log',
# Show only last RECENT_CHANGES_COUNT commits
'-n', str(RECENT_CHANGES_COUNT),
# Get hash, date, message, author and coauthors, separated by tabs
# NB When there are no trailers, the line ends with a tab
# NB Coauthors usually have the format "name <email>" and there is
# no way to tell git to strip the email, so it needs to be done
# in post processing
'--format=%H%x09%as%x09%s%x09%an%x09%(trailers:key=co-authored-by,valueonly=true,separator=%x09)',
'HEAD',
*args
], capture_output=True, text=True, check=True).stdout.splitlines()

skipped_authors = set()
recent_changes = '## Recent changes\n'

for line in recent_changes_output:
[sha, date, message, *raw_authors] = line.split('\t')
author_indices = []
for raw_author in map(cleanup_author_part, raw_authors):
if raw_author == '':
continue
author_index = get_real_author_index(raw_author, contributors)
if author_index is None:
skipped_authors.add(raw_author)
continue
author_indices.append(author_index)

if not author_indices:
continue

formatted_authors = format_multiple_authors_attribution([
contributors[idx]['displayName'] for idx in author_indices
])
recent_changes += f'- {date}. {formatted_authors}. <i><a target="_blank" href={github_page_for_commit(sha)}>{message}.</a></i>\n'

return (recent_changes, skipped_authors)


def get_author_element_for_file(filename, include_contributors, contributors, contributors_file):
"""
Extracts git usernames of contributors to a particular file
Expand Down Expand Up @@ -104,37 +148,9 @@ def get_author_element_for_file(filename, include_contributors, contributors, co
created_date = file_log_output[-1]
modified_date = file_log_output[0]

recent_changes_output = subprocess.run([
'git', 'log',
# Show only last RECENT_CHANGES_COUNT commits
'-n', str(RECENT_CHANGES_COUNT),
# Get hash, date, message, author and coauthors, separated by tabs
# NB When there are no trailers, the line ends with a tab
# NB Coauthors usually have the format "name <email>" and there is
# no way to tell git to strip the email, so it needs to be done
# in post processing
'--format=%H%x09%as%x09%s%x09%an%x09%(trailers:key=co-authored-by,valueonly=true,separator=%x09)',
'HEAD', '--', filename
], capture_output=True, text=True, check=True).stdout.splitlines()
recent_changes = '## Recent changes\n'
for recent_changes_line in recent_changes_output:
[sha, date, message, *raw_authors] = recent_changes_line.split('\t')
author_indices = []
for raw_author in map(cleanup_author_part, raw_authors):
# Line ended with a tab
if raw_author == '':
continue
author_index = get_real_author_index(raw_author, contributors)
if author_index is None:
skipped_authors.add(raw_author)
continue
author_indices.append(author_index)
if len(author_indices) == 0:
continue
formatted_authors = format_multiple_authors_attribution([
contributors[author_index]['displayName'] for author_index in author_indices
])
recent_changes += f'- {date}. {formatted_authors}. <i><a target="_blank" href={github_page_for_commit(sha)}>{message}.</a></i>\n'
recent_changes, recent_skipped_authors = get_recent_changes(
contributors, '--', filename)
skipped_authors.update(recent_skipped_authors)

if skipped_authors:
print_skipping_contributors_warning(skipped_authors, contributors_file)
Expand Down Expand Up @@ -164,6 +180,20 @@ def add_author_info_to_chapter_rec_mut(roots, chapter, contributors, config):
source_file_name = potential_source_file_name

if source_file_name in config['suppress_processing']:
# Suppress processing for this page
return

if source_file_name in config['sitewide_changes']:
# Insert recent sitewide changes on page
footer_recent_sitewide, skipped_authors = get_recent_changes(
contributors, '--invert-grep', '--grep=^chore:')

if skipped_authors:
print_skipping_contributors_warning(
skipped_authors, config['contributors_file'])

# Append to end of file
chapter['content'] += '\n' + footer_recent_sitewide
return

header_info_element, footer_info_element = get_author_element_for_file(
Expand All @@ -172,6 +202,7 @@ def add_author_info_to_chapter_rec_mut(roots, chapter, contributors, config):
for ext in config['attribute_file_extensions'])),
contributors,
config['contributors_file'])

# Assumption: The title is the first header in the file
chapter_heading_start = chapter['content'].index('# ')
chapter_heading_end = chapter['content'].index('\n', chapter_heading_start)
Expand Down Expand Up @@ -225,6 +256,8 @@ def add_author_info_to_root_section(roots, section, contributors, config):
'attribute_file_extensions', [])
metadata_config['suppress_processing'] = metadata_config.get(
'suppress_processing', [])
metadata_config['sitewide_changes'] = metadata_config.get(
'sitewide_changes', [])
metadata_config['contributors_file'] = metadata_config.get(
'contributors_file', 'CONTRIBUTORS.toml')

Expand Down