Skip to content

Commit 72028d7

Browse files
authored
PTFE-780 Fix bug with runner status and API status update (#380)
1 parent a99de2d commit 72028d7

File tree

7 files changed

+61
-31
lines changed

7 files changed

+61
-31
lines changed

runner_manager/models/runner.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import logging
12
from datetime import datetime, timedelta
23
from enum import Enum
34
from typing import List, Literal, Optional
@@ -10,9 +11,9 @@
1011
from redis_om import Field, NotFoundError
1112

1213
from runner_manager.clients.github import GitHub
13-
from runner_manager.logging import log
1414
from runner_manager.models.base import BaseModel
1515

16+
log = logging.getLogger(__name__)
1617
# Ideally the runner model would have been inherited
1718
# from githubkit.rest.models.Runner, like the following:
1819
# class Runner(BaseModel, githubkit.rest.models.Runner):
@@ -25,7 +26,6 @@
2526

2627
class RunnerStatus(str, Enum):
2728
online = "online"
28-
idle = "idle"
2929
offline = "offline"
3030

3131

@@ -89,17 +89,19 @@ def find_from_webhook(cls, webhook: WorkflowJobEvent) -> "Runner":
8989
return runner
9090

9191
@property
92-
def is_online(self) -> bool:
93-
"""Check if the runner is online
92+
def is_active(self) -> bool:
93+
"""Check if the runner is active.
94+
95+
An active runner is a runner that is running a job.
9496
9597
Returns:
96-
bool: True if the runner is online, False otherwise.
98+
bool: True if the runner is active, False otherwise.
9799
"""
98-
return self.status == RunnerStatus.online
100+
return self.status == RunnerStatus.online and self.busy is True
99101

100102
@property
101103
def is_offline(self) -> bool:
102-
"""Check if the runner is offline
104+
"""Check if the runner is offline.
103105
104106
Returns:
105107
bool: True if the runner is offline, False otherwise.
@@ -108,12 +110,15 @@ def is_offline(self) -> bool:
108110

109111
@property
110112
def is_idle(self) -> bool:
111-
"""Check if the runner is idle
113+
"""Check if the runner is idle.
114+
115+
An idle runner is a runner that is online and
116+
properly attached to GitHub but is not running a job.
112117
113118
Returns:
114119
bool: True if the runner is idle, False otherwise.
115120
"""
116-
return self.status == RunnerStatus.idle
121+
return self.status == RunnerStatus.online and self.busy is False
117122

118123
@property
119124
def time_since_created(self) -> timedelta:
@@ -144,7 +149,7 @@ def time_to_start_expired(self, timeout: timedelta) -> bool:
144149
return self.is_offline and self.time_since_created > timeout
145150

146151
def time_to_live_expired(self, time_to_live: timedelta) -> bool:
147-
return self.is_online and self.time_since_started > time_to_live
152+
return self.is_active and self.time_since_started > time_to_live
148153

149154
def update_from_github(self, github: GitHub) -> "Runner":
150155
if self.id is not None:
@@ -153,9 +158,9 @@ def update_from_github(self, github: GitHub) -> "Runner":
153158
org=self.organization, runner_id=self.id
154159
).parsed_data
155160
)
156-
self.status = RunnerStatus(self.status)
161+
self.status = RunnerStatus(github_runner.status)
157162
self.busy = github_runner.busy
158-
log.info(f"Runner {self.name} status updated to {self.status}")
163+
log.info(f"Runner {self.name} status updated to {self.status}")
159164
return self.save()
160165

161166
def generate_jit_config(self, github: GitHub) -> "Runner":

runner_manager/models/runner_group.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,9 +160,9 @@ def delete_runner(self, runner: Runner) -> int:
160160
@property
161161
def need_new_runner(self) -> bool:
162162
runners = self.get_runners()
163-
idle = len([runner for runner in runners if runner.busy is False])
163+
not_active = len([runner for runner in runners if runner.is_active is False])
164164
count = len(runners)
165-
return idle < self.min and count < self.max
165+
return not_active < self.min and count < self.max
166166

167167
def create_github_group(self, github: GitHub) -> GitHubRunnerGroup:
168168
"""Create a GitHub runner group."""
@@ -254,6 +254,12 @@ def healthcheck(
254254
runner: Runner = self.create_runner(github)
255255
if runner:
256256
log.info(f"Runner {runner.name} created")
257+
idle_runners = [runner for runner in self.get_runners() if runner.is_idle]
258+
# check if there's more idle runners than the minimum
259+
while len(idle_runners) > self.min:
260+
runner = idle_runners.pop()
261+
self.delete_runner(runner)
262+
log.info(f"Runner {runner.name} deleted")
257263

258264
@classmethod
259265
def find_from_base(cls, basegroup: "BaseRunnerGroup") -> "RunnerGroup":

tests/unit/conftest.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,9 @@ def runner(settings) -> Runner:
4545
runner: Runner = Runner(
4646
id=1,
4747
name="test",
48+
organization="octo-org",
4849
runner_group_id=1,
49-
status="online",
50+
status="offline",
5051
busy=False,
5152
labels=[RunnerLabel(name="label")],
5253
manager=settings.name,

tests/unit/jobs/test_healthchecks.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -45,16 +45,25 @@ def test_healthchecks_hypothesis(
4545
def test_group_healthcheck(
4646
runner_group: RunnerGroup, settings: Settings, github: GitHub
4747
):
48+
assert settings.timeout_runner
49+
assert settings.time_to_live
50+
assert runner_group.min == 0
4851
runner_group.save(github=github)
52+
4953
runner_tts: Runner = runner_group.create_runner(github)
5054
assert runner_tts is not None
5155
runner_tts.created_at = datetime.now() - (
5256
settings.timeout_runner + timedelta(minutes=1)
5357
)
58+
# Removing id to avoid retrieving info from GitHub mock API
59+
runner_tts.id = None
5460
runner_tts.save()
5561
runner_ttl: Runner = runner_group.create_runner(github)
5662
assert runner_ttl is not None
63+
# Removing id to avoid retrieving info from GitHub mock API
64+
runner_ttl.id = None
5765
runner_ttl.status = RunnerStatus.online
66+
runner_ttl.busy = True
5867
runner_ttl.started_at = datetime.now() - (
5968
settings.time_to_live + timedelta(minutes=1)
6069
)
@@ -68,9 +77,10 @@ def test_group_healthcheck(
6877
def test_need_new_runner_healthcheck(
6978
runner_group: RunnerGroup, settings: Settings, github: GitHub
7079
):
71-
runner_group.max = 2
80+
runner_group.max = 1
7281
runner_group.min = 1
7382
runner_group.save()
83+
assert len(runner_group.get_runners()) == 0
7484
assert runner_group.need_new_runner is True
7585
runner_group.healthcheck(settings.time_to_live, settings.timeout_runner, github)
7686
assert runner_group.need_new_runner is False
@@ -91,23 +101,16 @@ def test_time_to_start(runner: Runner, settings: Settings):
91101

92102

93103
def test_time_to_live(runner: Runner, settings: Settings):
104+
assert settings.time_to_live
94105
runner.started_at = datetime.now() - (settings.time_to_live + timedelta(minutes=1))
95106
runner.status = RunnerStatus.online
107+
runner.busy = True
96108
assert runner.time_to_live_expired(settings.time_to_live) is True
97109

98110
runner.started_at = datetime.now() - (settings.time_to_live - timedelta(minutes=1))
99111
assert runner.time_to_live_expired(settings.time_to_live) is False
100112

101113

102-
def test_need_new_runner(runner_group: RunnerGroup, github: GitHub):
103-
runner_group.max = 2
104-
runner_group.min = 1
105-
runner_group.save()
106-
assert runner_group.need_new_runner is True
107-
runner_group.create_runner(github)
108-
assert runner_group.need_new_runner is False
109-
110-
111114
def test_healthcheck_job(
112115
runner_group: RunnerGroup, settings: Settings, queue: Queue, github: GitHub
113116
):

tests/unit/jobs/test_workflow_job.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ def test_workflow_job_in_progress(
122122
id=webhook.workflow_job.runner_id,
123123
name=webhook.workflow_job.runner_name,
124124
busy=False,
125-
status="idle",
125+
status="online",
126126
manager=settings.name,
127127
runner_group_id=webhook.workflow_job.runner_group_id,
128128
runner_group_name=webhook.workflow_job.runner_group_name,

tests/unit/models/test_runner.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from hypothesis import strategies as st
55
from redis_om import Migrator, NotFoundError
66

7+
from runner_manager.clients.github import GitHub
78
from runner_manager.models.runner import Runner
89

910
from ...strategies import WorkflowJobCompletedStrategy
@@ -12,7 +13,7 @@
1213
@given(st.builds(Runner))
1314
def test_validate_runner(instance: Runner):
1415
assert instance.name is not None
15-
assert instance.status in ["online", "offline", "idle"]
16+
assert instance.status in ["online", "offline"]
1617
assert isinstance(instance.busy, bool)
1718

1819

@@ -47,3 +48,16 @@ def test_find_from_webhook(runner: Runner, webhook: WorkflowJobCompleted):
4748
assert Runner.find_from_webhook(webhook) == runner
4849
runner.delete(runner.pk)
4950
assert Runner.find_from_webhook(webhook) is None
51+
52+
53+
def test_update_from_github(runner: Runner, github: GitHub):
54+
runner.save()
55+
assert runner.id is not None, "Runner must have an id"
56+
github_runner = github.rest.actions.get_self_hosted_runner_for_org(
57+
org=runner.organization, runner_id=runner.id
58+
).parsed_data
59+
print(github_runner)
60+
runner.update_from_github(github)
61+
assert runner.busy == github_runner.busy
62+
assert runner.status == github_runner.status
63+
assert runner.status == "online"

tests/unit/models/test_runner_group.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -141,16 +141,17 @@ def test_runner_group_name():
141141

142142

143143
def test_need_new_runner(runner_group: RunnerGroup, github: GitHub):
144-
runner_group.min = 1
145144
runner_group.max = 2
145+
runner_group.min = 1
146146
runner_group.save()
147147
assert runner_group.need_new_runner is True
148148
runner = runner_group.create_runner(github)
149-
assert runner is not None
149+
# One runner is expected to be created we don't need a new one.
150150
assert runner_group.need_new_runner is False
151+
assert runner is not None
152+
# Pretend the runner is now active.
151153
runner.status = RunnerStatus.online
152154
runner.busy = True
153155
runner.save()
156+
Migrator().run()
154157
assert runner_group.need_new_runner is True
155-
runner_group.create_runner(github)
156-
assert runner_group.need_new_runner is False

0 commit comments

Comments
 (0)