Skip to content

Commit

Permalink
update repo scraper
Browse files Browse the repository at this point in the history
  • Loading branch information
SkafteNicki committed Jan 14, 2025
1 parent 12134ca commit 17d3efe
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 5 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,8 @@ prediction*.json
**/service_account_key.json
corruptmnist_v1/
corruptmnist_v2/
tools/repo_stats/report.py
tools/repo_stats/README.md

# vscode
.vscode/
2 changes: 1 addition & 1 deletion tools/repo_stats/leaderboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def main() -> None:
"""Main function for the leaderboard."""
download_data("repo_stats.json")
dataframe = load_data("repo_stats.json")
dataframe["num_warnings"] = dataframe["num_warnings"].apply(lambda x: 27 - x if pd.notnull(x) else x)
dataframe["num_warnings"] = dataframe["num_warnings"].apply(lambda x: 31 - x if pd.notnull(x) else x)
dataframe["activity_matrix"] = dataframe["activity_matrix"].apply(
lambda x: activity_to_image(x) if x is not None else x
)
Expand Down
4 changes: 3 additions & 1 deletion tools/repo_stats/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,9 @@ def check_answers(self) -> int | None:
stdin=PIPE,
)
output = p.stderr.read()
return len(output.decode("utf-8").split("\n")[:-1:2])
output = output.decode("utf-8").split("\n")
lines = [line for line in output if "WARNING" in line]
return len(lines) if len(lines) else None
return None


Expand Down
13 changes: 10 additions & 3 deletions tools/repo_stats/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,20 @@ def load_data(file_name: str) -> list[GroupInfo]:
return content


def create_activity_matrix(commits: list, max_delta: int = 5, normalize: bool = True) -> list[list[int]]:
def create_activity_matrix(
commits: list,
max_delta: int = 5,
min_delta: int = 1,
) -> list[list[int]]:
"""Creates an activity matrix from the commits."""
commit_times = [datetime.datetime.fromisoformat(commit["commit"]["committer"]["date"][:-1]) for commit in commits]
commit_times.sort()

start_time = commit_times[0]
end_time = min(start_time + datetime.timedelta(weeks=max_delta), commit_times[-1])
end_time = max(
start_time + datetime.timedelta(weeks=min_delta),
min(start_time + datetime.timedelta(weeks=max_delta), commit_times[-1]),
)

num_days = (end_time - start_time).days + 1 # include last day

Expand Down Expand Up @@ -124,7 +131,7 @@ def main():
contributor.commits_pr += 1
commits += pr_commits

activity_matrix = create_activity_matrix(commits, max_delta=3)
activity_matrix = create_activity_matrix(commits, max_delta=3, min_delta=1)

average_commit_length = sum([len(c) for c in commit_messages]) / len(commit_messages)

Expand Down

0 comments on commit 17d3efe

Please sign in to comment.