Skip to content

Commit

Permalink
Use --porcelain version of git blame
Browse files Browse the repository at this point in the history
Closes #20

While parsing the --porcelain output of git blame is more complicated
than that of git blame -lts

* it should be considered more robust
* it provides substantially more information:
  - we can get all information about the commits that we need
  - this makes it obsolete to call `git show` on the commits

The format of the commit timestamp is different in this command,
therfore this had to modify the datetime processing functions,
which were moved to util.py at the same time.
  • Loading branch information
uliska committed Mar 4, 2020
1 parent ba91edd commit 6f5822c
Show file tree
Hide file tree
Showing 2 changed files with 129 additions and 67 deletions.
156 changes: 106 additions & 50 deletions mkdocs_git_authors_plugin/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def find_repo_root(self):
cmd.run()
return cmd.stdout()[0]

def get_commit(self, sha: str):
def get_commit(self, sha: str, **kwargs):
"""
Return the (cached) Commit object for given sha.
Expand All @@ -232,7 +232,7 @@ def get_commit(self, sha: str):
Commit object
"""
if not self._commits.get(sha):
self._commits[sha] = Commit(self, sha)
self._commits[sha] = Commit(self, sha, **kwargs)
return self._commits.get(sha)

def page(self, path):
Expand Down Expand Up @@ -317,9 +317,19 @@ class Commit(AbstractRepoObject):
Stores only information relevant to our plugin:
- author name and email,
- date/time
- summary (not used at this point)
"""

def __init__(self, repo: Repo, sha: str):
def __init__(
self,
repo: Repo,
sha: str,
author_name: str,
author_email: str,
author_time: str,
author_tz: str,
summary: str
):
"""Initialize a commit from its SHA.
Populates the object running git show.
Expand All @@ -330,7 +340,14 @@ def __init__(self, repo: Repo, sha: str):
"""

super().__init__(repo)
self._populate(sha)

self._author = self.repo().author(
author_name,
author_email
)
self._datetime = util.commit_datetime(author_time, author_tz)
self._datetime_string = util.commit_datetime_string(self._datetime)
self._summary = summary

def author(self):
"""
Expand Down Expand Up @@ -358,40 +375,6 @@ def datetime(self, _type=str):
"""
return self._datetime_string if _type == str else self._datetime

def _populate(self, sha: str):
"""
Retrieve information about the commit.
Args:
sha: 40-byte SHA string of the commit
Returns:
"""
if sha == '0000000000000000000000000000000000000000':
# This indicates an uncommitted line, so there's
# no actual Git commit to inspect. Instead we
# populate the Commit object wtih a fake Author.
self._author = repo.author('Uncommitted', '#')
self._datetime = None
self._datetime_string = '---'
return

cmd = GitCommand('show', [
'-t',
'--quiet',
"--format='%aN%n%aE%n%ai'",
sha
])
cmd.run()
result = cmd.stdout()

# Author name and email are returned on single lines.
self._author = self.repo().author(result[0], result[1])

# Third line includes formatted date/time info
self._datetime_string = result[2]
self._datetime = util.commit_datetime(self._datetime_string)


class Page(AbstractRepoObject):
"""
Expand Down Expand Up @@ -483,24 +466,97 @@ def authors_summary(self):
def _process_git_blame(self):
"""
Execute git blame and parse the results.
This retrieves all data we need, also for the Commit object.
Each line will be associated with a Commit object and counted
to its author's "account".
Whether empty lines are counted is determined by the
count_empty_lines configuration option.
git blame --porcelain will produce output like the following
for each line in a file:
When a commit is first seen in that file:
30ed8daf1c48e4a7302de23b6ed262ab13122d31 1 2 1
author John Doe
author-mail <[email protected]>
author-time 1580742131
author-tz +0100
committer John Doe
committer-mail <[email protected]>
committer-time 1580742131
summary Fancy commit message title
filename home/docs/README.md
line content (indicated by TAB. May be empty after that)
When a commit has already been seen *in that file*:
82a3e5021b7131e31fc5b110194a77ebee907955 4 5
line content
In this case the metadata is not repeated, but it is guaranteed that
a Commit object with that SHA has already been created so we don't
need that information anymore.
When a line has not been committed yet:
0000000000000000000000000000000000000000 1 1 1
author Not Committed Yet
author-mail <not.committed.yet>
author-time 1583342617
author-tz +0100
committer Not Committed Yet
committer-mail <not.committed.yet>
committer-time 1583342617
committer-tz +0100
summary Version of books/main/docs/index.md from books/main/docs/index.md
previous 1f0c3455841488fe0f010e5f56226026b5c5d0b3 books/main/docs/index.md
filename books/main/docs/index.md
uncommitted line content
In this case exactly one Commit object with the special SHA and fake
author will be created and counted.
Args:
---
Returns:
--- (this method works through side effects)
"""

cmd = GitCommand('blame', ['-lts', str(self._path)])
cmd.run()
re_sha = re.compile('^\w{40}')

# Retrieve SHA and content from the line, discarding
# file path and line number
line_pattern = re.compile('(.*?)\s.*\s*\d\)(\s*.*)')
cmd = GitCommand('blame', ['--porcelain', str(self._path)])
cmd.run()

commit_data = {}
for line in cmd.stdout():
m = line_pattern.match(line)
key = line.split(' ')[0]
m = re_sha.match(key)
if m:
sha = m.group(1)
content = m.group(2).strip()

if content or self.repo().config('count_empty_lines'):
# assign the line to a commit and count it
commit = self.repo().get_commit(sha)
commit_data = {
'sha': key
}
elif key in [
'author',
'author-mail',
'author-time',
'author-tz',
'summary'
]:
commit_data[key] = line[len(key)+1:]
elif line.startswith('\t'):
# assign the line to a commit
# and create the Commit object if necessary
commit = self.repo().get_commit(
commit_data.get('sha'),
# The following values are guaranteed to be present
# when a commit is seen for the first time,
# so they can be used for creating a Commit object.
author_name=commit_data.get('author'),
author_email=commit_data.get('author-mail'),
author_time=commit_data.get('author-time'),
author_tz=commit_data.get('author-tz'),
summary=commit_data.get('summary')
)
if len(line) > 1 or self.repo().config('count_empty_lines'):
author = commit.author()
if author not in self._authors:
self._authors.append(author)
Expand Down
40 changes: 23 additions & 17 deletions mkdocs_git_authors_plugin/util.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,40 @@
from datetime import datetime, timezone, timedelta

def commit_datetime(dt: str):
def commit_datetime(author_time: str, author_tz: str):
"""
Convert a commit's datetime string to a
datetime.datetime object with timezone info.
Convert a commit's timestamp to an aware datetime object.
Args:
A string returned from the %ai formatting argument
in a git show command.
author_time: Unix timestamp string
author_tz: string in the format +hhmm
Returns:
datetime.datetime object with tzinfo
"""
d, t, tz = dt.split(' ')
d = [int(v) for v in d.split('-')]
t = [int(v) for v in t.split(':')]

# timezone info looks like +hhmm or -hhmm
tz_hours = int(tz[:3])
th_minutes = int(tz[0] + tz[3:])
tz_hours = int(author_tz[:3])
th_minutes = int(author_tz[0] + author_tz[3:])

# Construct 'aware' datetime.datetime object
return datetime(
d[0], d[1], d[2],
hour=t[0],
minute=t[1],
second=t[2],
tzinfo=timezone(timedelta(hours=tz_hours,minutes=th_minutes))
return datetime.fromtimestamp(
int(author_time),
timezone(timedelta(hours=tz_hours,minutes=th_minutes))
)


def commit_datetime_string(dt: datetime):
"""
Return a string representation for a commit's timestamp.
Args:
dt: datetime object with tzinfo
Returns:
string representation (should be localized)
"""
return dt.strftime('%c %z')


def repo_authors_summary(authors, config: dict):
"""
A summary list of the authors' contributions on repo level.
Expand Down

0 comments on commit 6f5822c

Please sign in to comment.