Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion cms/grading/steps/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
from .trusted import checker_step, extract_outcome_and_text, trusted_step
from .whitediff import _WHITES, _white_diff, white_diff_step,\
white_diff_fobj_step
from .realprecision import _EPS, realprecision_diff_step, \
realprecision_diff_fobj_step, _real_numbers_compare


__all__ = [
Expand All @@ -43,5 +45,7 @@
# trusted.py
"checker_step", "extract_outcome_and_text", "trusted_step",
# whitediff.py
"_WHITES", "_white_diff", "white_diff_step", "white_diff_fobj_step"
"_WHITES", "_white_diff", "white_diff_step", "white_diff_fobj_step",
# realprecision.py
"_EPS", "_real_numbers_compare", "realprecision_diff_step", "realprecision_diff_fobj_step"
]
153 changes: 153 additions & 0 deletions cms/grading/steps/realprecision.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
#!/usr/bin/env python3

# Contest Management System - http://cms-dev.github.io/
# Copyright © 2025 Ron Ryvchin <[email protected]>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""High level functions to perform standardized real-number comparison.

Policy:
- Tokenization: only fixed-format decimals (no exponent, no inf/nan).
Accepted examples: "12", "12.", "12.34", ".5", "-0.0", "+3.000"
Rejected examples: "1e-3", "nan", "inf", "0x1.8p3"
- Tolerance: 1e-6 absolute OR 1e-6 * max(1, |a|, |b|) relative.
- Pairwise comparison in order in case the number of fixed-format decimals
is the same, otherwise the files are considered different.
"""

import logging
import re
import typing

from cms.grading.Sandbox import Sandbox

from .evaluation import EVALUATION_MESSAGES


logger = logging.getLogger(__name__)


# Fixed-format decimals only (bytes regex).
_FIXED_DEC_RE = re.compile(rb'[+-]?(?:\d+(?:\.\d*)?|\.\d+)')
_EPS = 1e-6


def _compare_real_pair(a: float, b: float) -> bool:
"""Return True if a and b match within absolute/relative tolerance."""
diff = abs(a - b)
tol = _EPS * max(1.0, abs(a), abs(b))
return diff <= tol


def _parse_fixed(token: bytes) -> float | None:
"""Parse a fixed-format decimal token into float; return None on failure."""
# The regex already excludes exponents/inf/nan; this is defensive.
try:
# Decode strictly ASCII; reject weird Unicode digits.
s = token.decode("ascii", errors="strict")
# float() accepts exponent, but regex guarantees none are present.
return float(s)
except Exception:
return None


def _extract_fixed_decimals(stream: typing.BinaryIO) -> list[float]:
"""Extract and parse all fixed-format decimal tokens from a binary stream."""
data = stream.read()
nums: list[float] = []
for m in _FIXED_DEC_RE.findall(data):
v = _parse_fixed(m)
if v is not None:
nums.append(v)
return nums


def _real_numbers_compare(
output: typing.BinaryIO, correct: typing.BinaryIO
) -> bool:
"""Compare the two output files. Two files are equal if they have the
same number of real numbers, and all for every integer i, the absolute
or relative difference of real number i of first file and real number i
of second file is smaller or equal to 10^-6.

output: the first file to compare.
res: the second file to compare.
return: True if the two file are (up to the 10^-6 accuracy) as explained above.

"""
exp_nums = _extract_fixed_decimals(correct)
act_nums = _extract_fixed_decimals(output)

if len(exp_nums) != len(act_nums):
return False

n = len(exp_nums)

# Pairwise comparisons
for i in range(n):
a, b = exp_nums[i], act_nums[i]
if not _compare_real_pair(a, b):
return False

return True


def realprecision_diff_fobj_step(
output_fobj: typing.BinaryIO, correct_output_fobj: typing.BinaryIO
) -> tuple[float, list[str]]:
"""Compare user output and correct output by extracting the fixed
floating point format number, and comparing their values.

It gives an outcome 1.0 if the output and the reference output have
an absoulte or a relative smaller or equal to 10^-6 and 0.0 if they don't.
Calling this function means that the output file exists.

output_fobj: file for the user output, opened in binary mode.
correct_output_fobj: file for the correct output, opened in
binary mode.

return: the outcome as above and a description text.

"""
if _real_numbers_compare(output_fobj, correct_output_fobj):
return 1.0, [EVALUATION_MESSAGES.get("success").message]
else:
return 0.0, [EVALUATION_MESSAGES.get("wrong").message]


def realprecision_diff_step(
sandbox: Sandbox, output_filename: str, correct_output_filename: str
) -> tuple[float, list[str]]:
"""Compare user output and correct output by extracting the fixed
floating point format number, and comparing their values.

It gives an outcome 1.0 if the output and the reference output have
an absoulte or a relative smaller or equal to 10^-6 and 0.0 if they don't
(or if the output doesn't exist).

sandbox: the sandbox we consider.
output_filename: the filename of user's output in the sandbox.
correct_output_filename: the same with reference output.

return: the outcome as above and a description text.

"""
if sandbox.file_exists(output_filename):
with sandbox.get_file(output_filename) as out_file, \
sandbox.get_file(correct_output_filename) as res_file:
return real_precision_fobj_step(out_file, res_file)
else:
return 0.0, [
EVALUATION_MESSAGES.get("nooutput").message, output_filename]
8 changes: 7 additions & 1 deletion cms/grading/tasktypes/Batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ class Batch(TaskType):
# Constants used in the parameter definition.
OUTPUT_EVAL_DIFF = "diff"
OUTPUT_EVAL_CHECKER = "comparator"
OUTPUT_EVAL_REALPREC = "realprecision"
COMPILATION_ALONE = "alone"
COMPILATION_GRADER = "grader"

Expand Down Expand Up @@ -111,7 +112,8 @@ class Batch(TaskType):
"output_eval",
"",
{OUTPUT_EVAL_DIFF: "Outputs compared with white diff",
OUTPUT_EVAL_CHECKER: "Outputs are compared by a comparator"})
OUTPUT_EVAL_CHECKER: "Outputs are compared by a comparator",
OUTPUT_EVAL_REALPREC: "Outputs compared as real numbers (with precision of 1e-6)"})

ACCEPTED_PARAMETERS = [_COMPILATION, _USE_FILE, _EVALUATION]

Expand Down Expand Up @@ -181,6 +183,9 @@ def _uses_grader(self) -> bool:
def _uses_checker(self) -> bool:
return self.output_eval == self.OUTPUT_EVAL_CHECKER

def _uses_realprecision(self) -> bool:
return self.output_eval == self.OUTPUT_EVAL_REALPREC

@staticmethod
def _executable_filename(codenames: Iterable[str], language: Language) -> str:
"""Return the chosen executable name computed from the codenames.
Expand Down Expand Up @@ -371,6 +376,7 @@ def _evaluate_step(self, job, file_cacher, output_file_params, outcome, text, st
file_cacher, job,
self.CHECKER_CODENAME
if self._uses_checker() else None,
use_realprecision = self._uses_realprecision(),
**output_file_params, extra_args=extra_args)

# Fill in the job with the results.
Expand Down
8 changes: 7 additions & 1 deletion cms/grading/tasktypes/OutputOnly.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ class OutputOnly(TaskType):
# Constants used in the parameter definition.
OUTPUT_EVAL_DIFF = "diff"
OUTPUT_EVAL_CHECKER = "comparator"
OUTPUT_EVAL_REALPREC = "realprecision"

# Other constants to specify the task type behaviour and parameters.
ALLOW_PARTIAL_SUBMISSION = True
Expand All @@ -66,7 +67,8 @@ class OutputOnly(TaskType):
"output_eval",
"",
{OUTPUT_EVAL_DIFF: "Outputs compared with white diff",
OUTPUT_EVAL_CHECKER: "Outputs are compared by a comparator"})
OUTPUT_EVAL_CHECKER: "Outputs are compared by a comparator",
OUTPUT_EVAL_REALPREC: "Outputs compared as real numbers (with precision of 1e-6)"})

ACCEPTED_PARAMETERS = [_EVALUATION]

Expand Down Expand Up @@ -97,6 +99,9 @@ def get_auto_managers(self):
def _uses_checker(self) -> bool:
return self.output_eval == OutputOnly.OUTPUT_EVAL_CHECKER

def _uses_realprecision(self) -> bool:
return self.output_eval == self.OUTPUT_EVAL_REALPREC

@staticmethod
def _get_user_output_filename(job: Job):
return OutputOnly.USER_OUTPUT_FILENAME_TEMPLATE % \
Expand Down Expand Up @@ -127,6 +132,7 @@ def evaluate(self, job, file_cacher):
box_success, outcome, text = eval_output(
file_cacher, job,
OutputOnly.CHECKER_CODENAME if self._uses_checker() else None,
use_realprecision = self._uses_realprecision(),
user_output_digest=job.files[user_output_filename].digest)

# Fill in the job with the results.
Expand Down
9 changes: 6 additions & 3 deletions cms/grading/tasktypes/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
from cms.grading.Sandbox import Sandbox
from cms.grading.language import Language
from cms.grading.steps import EVALUATION_MESSAGES, checker_step, \
white_diff_fobj_step
white_diff_fobj_step, realprecision_diff_fobj_step


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -217,6 +217,7 @@ def eval_output(
file_cacher: FileCacher,
job: Job,
checker_codename: str | None,
use_realprecision: bool = False,
user_output_path: str | None = None,
user_output_digest: str | None = None,
user_output_filename: str = "",
Expand All @@ -227,7 +228,8 @@ def eval_output(
file_cacher: file cacher to use to get files.
job: the job triggering this checker run.
checker_codename: codename of the checker amongst the manager,
or None to use white diff.
or None to use white diff / real number precision.
use_realprecision: whether we should use real precision comparator.
user_output_path: full path of the user output file, None if
using the digest (exactly one must be non-None).
user_output_digest: digest of the user output file, None if
Expand Down Expand Up @@ -283,12 +285,13 @@ def eval_output(
return success, outcome, text

else:
comparator_function = realprecision_diff_fobj_step if use_realprecision else white_diff_fobj_step
if user_output_path is not None:
user_output_fobj = open(user_output_path, "rb")
else:
user_output_fobj = file_cacher.get_file(user_output_digest)
with user_output_fobj:
with file_cacher.get_file(job.output) as correct_output_fobj:
outcome, text = white_diff_fobj_step(
outcome, text = comparator_function(
user_output_fobj, correct_output_fobj)
return True, outcome, text
Loading