Skip to content

Commit a4e3cf0

Browse files
committed
Fix
1 parent 06d52b5 commit a4e3cf0

File tree

6 files changed

+245
-30
lines changed

6 files changed

+245
-30
lines changed

cms/db/task.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ class Task(Base):
222222
default=SCORE_MODE_MAX_TOKENED_LAST)
223223

224224
# Skip entire subtask on failed testcase when using GroupMin/GroupMul score types.
225-
skip_failed_subtask: bool = Column(Boolean, nullable=False, default=True)
225+
skip_failed_subtask: bool = Column(Boolean, nullable=False, default=False)
226226

227227
# Active Dataset (id and object) currently being used for scoring.
228228
# The ForeignKeyConstraint for this column is set at table-level.

cms/grading/scoretypes/abc.py

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
# Copyright © 2010-2012 Matteo Boscariol <[email protected]>
77
# Copyright © 2013-2016 Luca Wehrstedt <[email protected]>
88
# Copyright © 2015 wafrelka <[email protected]>
9+
# Copyright © 2025 Pasit Sangprachathanarak <[email protected]>
910
#
1011
# This program is free software: you can redistribute it and/or modify
1112
# it under the terms of the GNU Affero General Public License as
@@ -285,6 +286,8 @@ class ScoreTypeGroup(ScoreTypeAlone):
285286
<tr class="correct">
286287
{% elif tc["outcome"] == "Not correct" %}
287288
<tr class="notcorrect">
289+
{% elif tc["outcome"] == "Skipped" %}
290+
<tr class="partiallycorrect">
288291
{% else %}
289292
<tr class="partiallycorrect">
290293
{% endif %}
@@ -433,9 +436,13 @@ def compute_score(self, submission_result):
433436
evaluation = evaluations[tc_idx]
434437

435438
# Handle skipped testcases specifically
436-
if evaluation.outcome == "N/A" and evaluation.text == [
437-
"Skipped due to failed testcase in subtask"
438-
]:
439+
if (
440+
evaluation.outcome == "N/A"
441+
and evaluation.text
442+
and len(evaluation.text) > 0
443+
and "Skipped due to failed testcase in subtask"
444+
in evaluation.text[0]
445+
):
439446
tc_score = 0.0 # Skipped testcases count as 0.0 for scoring
440447
tc_outcome = "Skipped"
441448
elif evaluation.outcome == "N/A" or evaluation.outcome is None:
@@ -459,7 +466,9 @@ def compute_score(self, submission_result):
459466
{
460467
"idx": tc_idx,
461468
"outcome": tc_outcome,
462-
"text": evaluation.text,
469+
"text": ["N/A"]
470+
if tc_outcome == "Skipped"
471+
else evaluation.text, # Show N/A for skipped testcases
463472
"time": evaluation.execution_time,
464473
"time_limit": evaluation.dataset.time_limit,
465474
"time_limit_was_exceeded": time_limit_was_exceeded,
@@ -476,7 +485,24 @@ def compute_score(self, submission_result):
476485
tc_first_lowest_idx = tc_idx
477486
tc_first_lowest_score = tc_score
478487
else:
479-
public_testcases.append({"idx": tc_idx})
488+
# For non-public testcases, still show the outcome if it's "Skipped"
489+
# This ensures that skipped testcases are visible to contestants
490+
if tc_outcome == "Skipped":
491+
public_testcases.append(
492+
{
493+
"idx": tc_idx,
494+
"outcome": tc_outcome,
495+
"text": [
496+
"N/A"
497+
],
498+
"time": None,
499+
"memory": None,
500+
"show_in_restricted_feedback": False,
501+
"show_in_oi_restricted_feedback": False,
502+
}
503+
)
504+
else:
505+
public_testcases.append({"idx": tc_idx})
480506

481507
# Calculate scores considering skipped testcases as 0.0
482508
outcome_values = []

cms/grading/subtask_skipper.py

Lines changed: 114 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def __init__(self, task, submission_result):
4141
self.task = task
4242
self.submission_result = submission_result
4343
self.dataset = submission_result.dataset if submission_result else None
44-
self.skip_enabled = getattr(task, 'skip_failed_subtask', True)
44+
self.skip_enabled = getattr(task, "skip_failed_subtask", False)
4545
self._subtask_groups = None
4646
self._failed_subtasks = set()
4747
self._skipped_testcases = set()
@@ -56,16 +56,35 @@ def should_skip_testcase(self, testcase_codename: str) -> bool:
5656
if not self.skip_enabled or not self.dataset:
5757
return False
5858

59-
# Only skip for GroupMin and GroupMul score types
6059
score_type = self.dataset.score_type
61-
if score_type not in ['GroupMin', 'GroupMul']:
60+
if score_type not in ["GroupMin", "GroupMul"]:
6261
return False
6362

63+
if testcase_codename in self._skipped_testcases:
64+
return True
65+
66+
# Check if any earlier testcase in the same subtask has failed
6467
subtask_idx = self._get_subtask_for_testcase(testcase_codename)
6568
if subtask_idx is None:
6669
return False
6770

68-
return subtask_idx in self._failed_subtasks
71+
# Check if this subtask has already failed due to an earlier testcase
72+
if subtask_idx in self._failed_subtasks:
73+
subtask_testcases = self._get_testcases_in_subtask(subtask_idx)
74+
try:
75+
current_testcase_idx = subtask_testcases.index(testcase_codename)
76+
# Check if any earlier testcase in this subtask has failed
77+
for i in range(current_testcase_idx):
78+
earlier_testcase = subtask_testcases[i]
79+
if self._is_testcase_failed(earlier_testcase):
80+
logger.info(
81+
f"Skipping testcase {testcase_codename} because earlier testcase {earlier_testcase} failed in subtask {subtask_idx}"
82+
)
83+
return True
84+
except ValueError:
85+
pass
86+
87+
return False
6988

7089
def mark_testcase_failed(self, testcase_codename: str, outcome: float):
7190
"""Mark a testcase as failed and potentially skip remaining testcases in the subtask.
@@ -76,31 +95,52 @@ def mark_testcase_failed(self, testcase_codename: str, outcome: float):
7695
if not self.skip_enabled or not self.dataset:
7796
return
7897

79-
# Only handle for GroupMin and GroupMul score types
8098
score_type = self.dataset.score_type
8199
if score_type not in ['GroupMin', 'GroupMul']:
82100
return
83101

84-
# Check if this testcase failed (outcome is 0.0 for failed)
102+
# Check if this testcase failed
85103
if outcome > 0.0:
86104
return
87105

88106
subtask_idx = self._get_subtask_for_testcase(testcase_codename)
89107
if subtask_idx is None:
108+
logger.warning(f"Could not find subtask for testcase {testcase_codename}")
90109
return
91110

92111
# Mark this subtask as failed
93112
self._failed_subtasks.add(subtask_idx)
94113
logger.info(f"Marking subtask {subtask_idx} as failed due to testcase {testcase_codename}")
95114

96-
# Get all testcases in this subtask and mark remaining ones as skipped
115+
# Get all testcases in this subtask in order
97116
subtask_testcases = self._get_testcases_in_subtask(subtask_idx)
98-
for tc_codename in subtask_testcases:
99-
if tc_codename != testcase_codename: # Skip the failing testcase itself
100-
# Check if this testcase hasn't been evaluated yet
101-
if not self._is_testcase_evaluated(tc_codename):
102-
self._skipped_testcases.add(tc_codename)
103-
logger.info(f"Marking testcase {tc_codename} as skipped in subtask {subtask_idx}")
117+
logger.info(f"Subtask {subtask_idx} testcases in order: {subtask_testcases}")
118+
119+
# Find the position of the failing testcase
120+
try:
121+
failing_testcase_idx = subtask_testcases.index(testcase_codename)
122+
logger.info(
123+
f"Failing testcase {testcase_codename} is at position {failing_testcase_idx} in subtask {subtask_idx}"
124+
)
125+
except ValueError:
126+
logger.warning(
127+
f"Failed testcase {testcase_codename} not found in subtask {subtask_idx}"
128+
)
129+
return
130+
131+
# Skip only the testcases that come after the failing one in this subtask
132+
for i in range(failing_testcase_idx + 1, len(subtask_testcases)):
133+
tc_codename = subtask_testcases[i]
134+
# Only skip if this testcase hasn't been started yet
135+
if not self._is_testcase_started(tc_codename):
136+
self._skipped_testcases.add(tc_codename)
137+
logger.info(
138+
f"Marking testcase {tc_codename} (position {i}) as skipped in subtask {subtask_idx} (after failure of {testcase_codename})"
139+
)
140+
else:
141+
logger.info(
142+
f"Testcase {tc_codename} (position {i}) already started/completed, not skipping"
143+
)
104144

105145
def get_skipped_testcases(self) -> Set[str]:
106146
"""Get the set of testcase codenames that should be skipped."""
@@ -126,22 +166,31 @@ def _get_subtask_groups(self) -> Optional[Dict[int, List[str]]]:
126166

127167
self._subtask_groups = {}
128168
testcase_names = sorted(self.dataset.testcases.keys())
169+
logger.debug(f"All testcase names in order: {testcase_names}")
170+
logger.debug(f"Score type parameters: {parameters}")
129171

130172
for subtask_idx, parameter in enumerate(parameters):
131173
if len(parameter) < 2:
132174
continue
133175

134-
max_score, target = parameter[0], parameter[1]
176+
_, target = (
177+
parameter[0],
178+
parameter[1],
179+
)
135180

136181
if isinstance(target, int):
137-
# Number-based grouping: first N testcases
138182
start_idx = sum(param[1] for param in parameters[:subtask_idx] if isinstance(param[1], int))
139183
end_idx = start_idx + target
140184
group_testcases = testcase_names[start_idx:end_idx]
185+
logger.debug(
186+
f"Subtask {subtask_idx} (number-based): testcases {start_idx}-{end_idx - 1} = {group_testcases}"
187+
)
141188
elif isinstance(target, str):
142-
# Regex-based grouping
143189
pattern = re.compile(target)
144190
group_testcases = [tc for tc in testcase_names if pattern.match(tc)]
191+
logger.debug(
192+
f"Subtask {subtask_idx} (regex-based): pattern '{target}' = {group_testcases}"
193+
)
145194
else:
146195
continue
147196

@@ -198,3 +247,52 @@ def _is_testcase_evaluated(self, testcase_codename: str) -> bool:
198247
return True
199248

200249
return False
250+
251+
def _is_testcase_started(self, testcase_codename: str) -> bool:
252+
"""Check if a testcase has been started (queued, running, or completed).
253+
254+
This is more comprehensive than _is_testcase_evaluated as it also
255+
checks if the testcase is currently being evaluated.
256+
257+
testcase_codename: The codename of the testcase
258+
259+
Returns: True if the testcase has been started, False otherwise
260+
"""
261+
# First check if it's already completed
262+
if self._is_testcase_evaluated(testcase_codename):
263+
return True
264+
265+
# For now, we'll use the same logic as _is_testcase_evaluated
266+
# In the future, we could check if the testcase is currently
267+
# In the evaluation queue or being processed
268+
# But since we don't have easy access to the queue state here,
269+
# we will only skip testcases that definitely
270+
# haven't been touched yet.
271+
272+
# TODO: Could be enhanced to check the evaluation service queue
273+
return self._is_testcase_evaluated(testcase_codename)
274+
275+
def _is_testcase_failed(self, testcase_codename: str) -> bool:
276+
"""Check if a testcase has failed (outcome <= 0.0).
277+
278+
testcase_codename: The codename of the testcase
279+
280+
Returns: True if the testcase failed, False otherwise
281+
"""
282+
if not self.submission_result:
283+
return False
284+
285+
for evaluation in self.submission_result.evaluations:
286+
if evaluation.codename == testcase_codename:
287+
try:
288+
outcome = (
289+
float(evaluation.outcome)
290+
if evaluation.outcome != "N/A"
291+
and evaluation.outcome is not None
292+
else 0.0
293+
)
294+
return outcome <= 0.0
295+
except (ValueError, TypeError):
296+
return True # If we can't parse the outcome, consider it failed
297+
298+
return False # Not evaluated yet, so not failed

cms/server/admin/templates/task.html

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,13 @@ <h2 id="title_task_configuration" class="toggling_on">Task configuration</h2>
242242
</tr>
243243

244244
<tr><td colspan=2><h2>Score options</h2></td></tr>
245+
<tr>
246+
<td>
247+
<span class="info" title="When enabled, if one testcase fails in a subtask (for GroupMin/GroupMul score types), remaining testcases in that subtask will be skipped and marked as 'Skipped'."></span>
248+
Skip entire subtask on failed testcase
249+
</td>
250+
<td><input type="checkbox" name="skip_failed_subtask" /></td>
251+
</tr>
245252
<tr>
246253
<td>
247254
<span class="info" title="The number of decimal places the scores will be rounded to.
@@ -263,13 +270,6 @@ <h2 id="title_task_configuration" class="toggling_on">Task configuration</h2>
263270
</select>
264271
</td>
265272
</tr>
266-
<tr>
267-
<td>
268-
<span class="info" title="When enabled, if one testcase fails in a subtask (for GroupMin/GroupMul score types), remaining testcases in that subtask will be skipped and marked as 'Skipped'."></span>
269-
Skip entire subtask on failed testcase
270-
</td>
271-
<td><input type="checkbox" name="skip_failed_subtask" {{ "checked" if task.skip_failed_subtask else "" }} /></td>
272-
</tr>
273273
</table>
274274
<div class="hr"></div>
275275
</div>

0 commit comments

Comments
 (0)