Skip to content

Commit 76e6b1c

Browse files
committed
initial work
1 parent ce5f2a2 commit 76e6b1c

File tree

2 files changed

+219
-0
lines changed

2 files changed

+219
-0
lines changed
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
"""Parse the GitHub action log for test times.
2+
3+
Taken from https://github.com/pymc-labs/pymc-marketing/tree/main/scripts/slowest_tests/extract-slow-tests.py
4+
5+
"""
6+
7+
import re
8+
import sys
9+
10+
from pathlib import Path
11+
12+
start_pattern = re.compile(r"==== slow")
13+
separator_pattern = re.compile(r"====")
14+
time_pattern = re.compile(r"(\d+\.\d+)s ")
15+
16+
17+
def extract_lines(lines: list[str]) -> list[str]:
18+
times = []
19+
20+
in_section = False
21+
for line in lines:
22+
detect_start = start_pattern.search(line)
23+
detect_end = separator_pattern.search(line)
24+
25+
if detect_start:
26+
in_section = True
27+
28+
if in_section:
29+
times.append(line)
30+
31+
if not detect_start and in_section and detect_end:
32+
break
33+
34+
return times
35+
36+
37+
def trim_up_to_match(pattern, string: str) -> str:
38+
match = pattern.search(string)
39+
if not match:
40+
return ""
41+
42+
return string[match.start() :]
43+
44+
45+
def trim(pattern, lines: list[str]) -> list[str]:
46+
return [trim_up_to_match(pattern, line) for line in lines]
47+
48+
49+
def strip_ansi(text: str) -> str:
50+
ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
51+
return ansi_escape.sub("", text)
52+
53+
54+
def format_times(times: list[str]) -> list[str]:
55+
return (
56+
trim(separator_pattern, times[:1])
57+
+ trim(time_pattern, times[1:-1])
58+
+ [strip_ansi(line) for line in trim(separator_pattern, times[-1:])]
59+
)
60+
61+
62+
def read_lines_from_stdin():
63+
return sys.stdin.read().splitlines()
64+
65+
66+
def read_from_file(file: Path):
67+
"""For testing purposes."""
68+
return file.read_text().splitlines()
69+
70+
71+
def main(read_lines):
72+
lines = read_lines()
73+
times = extract_lines(lines)
74+
parsed_times = format_times(times)
75+
print("\n".join(parsed_times)) # noqa: T201
76+
77+
78+
if __name__ == "__main__":
79+
read_lines = read_lines_from_stdin
80+
main(read_lines)
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
#!/bin/zsh
2+
3+
DRY_RUN=false
4+
5+
owner=pymc-devs
6+
repo=pymc
7+
issue_number=7686
8+
title="Speed up test times :rocket:"
9+
workflow=tests
10+
contributing_url="https://www.pymc.io/projects/docs/en/stable/contributing/index.html"
11+
action_url="https://github.com/$owner/$repo/blob/main/.github/workflows/slow-tests-issue.yml"
12+
latest_id=$(gh run list --limit 30 --workflow $workflow --status success --json databaseId,startedAt,updatedAt --jq '
13+
. | map({
14+
databaseId: .databaseId,
15+
startedAt: .startedAt,
16+
updatedAt: .updatedAt,
17+
minutes: (((.updatedAt | fromdate) - (.startedAt | fromdate)) / 60)
18+
} | select(.minutes > 10))
19+
| .[0].databaseId
20+
')
21+
jobs=$(gh api /repos/$owner/$repo/actions/runs/$latest_id/jobs --jq '
22+
.jobs
23+
')
24+
# | map({name: .name, run_id: .run_id, id: .id, started_at: .started_at, completed_at: .completed_at})
25+
26+
echo $jobs
27+
28+
# # Skip 3.10, float32, and Benchmark tests
29+
# function skip_job() {
30+
# name=$1
31+
# # if [[ $name == *"py3.10"* ]]; then
32+
# # return 0
33+
# # fi
34+
# #
35+
# # if [[ $name == *"float32 1"* ]]; then
36+
# # return 0
37+
# # fi
38+
# #
39+
# # if [[ $name == *"Benchmark"* ]]; then
40+
# # return 0
41+
# # fi
42+
#
43+
# return 1
44+
# }
45+
#
46+
# # Remove common prefix from the name
47+
# function remove_prefix() {
48+
# name=$1
49+
# echo $name
50+
# # echo $name | sed -e 's/^ubuntu-latest test py3.12 numpy>=2.0 : fast-compile 0 : float32 0 : //'
51+
# }
52+
#
53+
# function human_readable_time() {
54+
# started_at=$1
55+
# completed_at=$2
56+
#
57+
# start_seconds=$(date -d "$started_at" +%s)
58+
# end_seconds=$(date -d "$completed_at" +%s)
59+
#
60+
# seconds=$(($end_seconds - $start_seconds))
61+
#
62+
# if [ $seconds -lt 60 ]; then
63+
# echo "$seconds seconds"
64+
# else
65+
# echo "$(date -u -d @$seconds +'%-M minutes %-S seconds')"
66+
# fi
67+
# }
68+
#
69+
# all_times=""
70+
# echo "$jobs" | jq -c '.[]' | while read -r job; do
71+
# id=$(echo $job | jq -r '.id')
72+
# name=$(echo $job | jq -r '.name')
73+
# run_id=$(echo $job | jq -r '.run_id')
74+
# started_at=$(echo $job | jq -r '.started_at')
75+
# completed_at=$(echo $job | jq -r '.completed_at')
76+
#
77+
# if skip_job $name; then
78+
# echo "Skipping $name"
79+
# continue
80+
# fi
81+
#
82+
# echo "Processing job: $name (ID: $id, Run ID: $run_id)"
83+
#
84+
# # Seeing a bit more stabilty with the API rather than the CLI
85+
# # https://docs.github.com/en/rest/actions/workflow-jobs?apiVersion=2022-11-28#download-job-logs-for-a-workflow-run
86+
# times=$(gh api /repos/$owner/$repo/actions/jobs/$id/logs | python extract-slow-tests.py)
87+
# # times=$(gh run view --job $id --log | python extract-slow-tests.py)
88+
#
89+
# if [ -z "$times" ]; then
90+
# # Some of the jobs are non-test jobs, so we skip them
91+
# echo "No tests found for '$name', skipping"
92+
# continue
93+
# fi
94+
#
95+
# echo $times
96+
#
97+
# human_readable=$(human_readable_time $started_at $completed_at)
98+
# name=$(remove_prefix $name)
99+
#
100+
# top="<details><summary>($human_readable) $name</summary>\n\n\n\`\`\`"
101+
# bottom="\`\`\`\n\n</details>"
102+
#
103+
# formatted_times="$top\n$times\n$bottom"
104+
#
105+
# if [ -n "$all_times" ]; then
106+
# all_times="$all_times\n$formatted_times"
107+
# else
108+
# all_times="$formatted_times"
109+
# fi
110+
# done
111+
#
112+
# # if [ -z "$all_times" ]; then
113+
# # echo "No slow tests found, exiting"
114+
# # exit 1
115+
# # fi
116+
#
117+
# run_date=$(date +"%Y-%m-%d")
118+
# body=$(cat << EOF
119+
# If you are motivated to help speed up some tests, we would appreciate it!
120+
#
121+
# Here are some of the slowest test times:
122+
#
123+
# $all_times
124+
#
125+
# You can find more information on how to contribute [here]($contributing_url)
126+
#
127+
# Automatically generated by [GitHub Action]($action_url)
128+
# Latest run date: $run_date
129+
# Run logs: [$latest_id](https://github.com/$owner/$repo/actions/runs/$latest_id)
130+
# EOF
131+
# )
132+
#
133+
# if [ "$DRY_RUN" = true ]; then
134+
# echo "Dry run, not updating issue"
135+
# echo $body
136+
# exit
137+
# fi
138+
# echo $body | gh issue edit $issue_number --body-file - --title "$title"
139+
# echo "Updated issue $issue_number with all times"

0 commit comments

Comments
 (0)