From 0c593caba69c15dad6ba1f4868072ee94cb6a91e Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Sun, 9 Feb 2025 15:05:42 +0200 Subject: [PATCH] follow-up cleaning for #146 --- .../cogs/github_cog.py | 9 ++++---- src/discord-cluster-manager/consts.py | 14 +++++++++++ src/discord-cluster-manager/leaderboard_db.py | 11 ++------- .../leaderboard_eval.py | 22 ------------------ src/discord-cluster-manager/task.py | 23 ------------------- 5 files changed, 20 insertions(+), 59 deletions(-) delete mode 100644 src/discord-cluster-manager/leaderboard_eval.py diff --git a/src/discord-cluster-manager/cogs/github_cog.py b/src/discord-cluster-manager/cogs/github_cog.py index b5426d5d..2bce98e2 100644 --- a/src/discord-cluster-manager/cogs/github_cog.py +++ b/src/discord-cluster-manager/cogs/github_cog.py @@ -1,10 +1,9 @@ import json from cogs.submit_cog import ProgressReporter, SubmitCog -from consts import GitHubGPU, GPUType +from consts import AMD_REQUIREMENTS, NVIDIA_REQUIREMENTS, GitHubGPU, GPUType from discord import app_commands from github_runner import GitHubRun -from leaderboard_eval import amd_requirements, nvidia_requirements from run_eval import CompileResult, FullResult, RunResult from utils import setup_logging @@ -40,9 +39,9 @@ async def _run_submission( inputs = {"payload": payload} if lang == "py": if selected_gpu == GPUType.NVIDIA: - inputs["requirements"] = nvidia_requirements + inputs["requirements"] = NVIDIA_REQUIREMENTS else: - inputs["requirements"] = amd_requirements + inputs["requirements"] = AMD_REQUIREMENTS if not await run.trigger(inputs): raise RuntimeError("Failed to trigger GitHub Action. Please check the configuration.") @@ -69,7 +68,7 @@ async def _run_submission( comp = CompileResult(**data["compile"]) else: comp = None - run = {k: RunResult(**v) for k, v in data['runs']} + run = {k: RunResult(**v) for k, v in data["runs"]} return FullResult(success=True, error="", compile=comp, runs=run) async def wait_callback(self, run: GitHubRun, status: ProgressReporter): diff --git a/src/discord-cluster-manager/consts.py b/src/discord-cluster-manager/consts.py index f5b42397..e6ea9a30 100644 --- a/src/discord-cluster-manager/consts.py +++ b/src/discord-cluster-manager/consts.py @@ -120,3 +120,17 @@ def combine_enums(enums: list[Type[Enum]], combined_name: str) -> Enum: "-Xptxas=--warn-on-spills", ] MODAL_CUDA_INCLUDE_DIRS = ["/ThunderKittens/include"] + +NVIDIA_REQUIREMENTS = """ +numpy +torch +setuptools +ninja +triton +""" + +AMD_REQUIREMENTS = """ +--index-url https://download.pytorch.org/whl/nightly/rocm6.2 +pytorch-triton-rocm==3.1.0+cf34004b8a +torch==2.6.0.dev20241023+rocm6.2 +""" diff --git a/src/discord-cluster-manager/leaderboard_db.py b/src/discord-cluster-manager/leaderboard_db.py index 872e5eb5..ea790060 100644 --- a/src/discord-cluster-manager/leaderboard_db.py +++ b/src/discord-cluster-manager/leaderboard_db.py @@ -1,4 +1,3 @@ -import json import logging from typing import List, Optional @@ -13,7 +12,7 @@ POSTGRES_USER, ) from psycopg2 import Error -from task import LeaderboardTask, build_from_legacy_reference +from task import LeaderboardTask from utils import LeaderboardItem, LRUCache, SubmissionItem leaderboard_name_cache = LRUCache(max_size=512) @@ -231,13 +230,7 @@ def get_leaderboard(self, leaderboard_name: str) -> LeaderboardItem | None: res = self.cursor.fetchone() if res: - # TODO: This is just a clutch to keep compatibility with old leaderboards - try: - task = LeaderboardTask.from_dict(res[3]) - except json.JSONDecodeError: - logging.error("json decoding error in LB %s. Legacy task?", leaderboard_name) - task = build_from_legacy_reference(res[3]) - + task = LeaderboardTask.from_dict(res[3]) return LeaderboardItem( id=res[0], name=res[1], diff --git a/src/discord-cluster-manager/leaderboard_eval.py b/src/discord-cluster-manager/leaderboard_eval.py deleted file mode 100644 index 75e55d47..00000000 --- a/src/discord-cluster-manager/leaderboard_eval.py +++ /dev/null @@ -1,22 +0,0 @@ -######## -# Evaluation scripts to run for leaderboard results -######## - -from pathlib import Path - -py_eval = Path.read_text(Path(__file__).parent / "eval.py") -cu_eval = Path.read_text(Path(__file__).parent / "eval.cu") - -nvidia_requirements = """ -numpy -torch -setuptools -ninja -triton -""" - -amd_requirements = """ ---index-url https://download.pytorch.org/whl/nightly/rocm6.2 -pytorch-triton-rocm==3.1.0+cf34004b8a -torch==2.6.0.dev20241023+rocm6.2 -""" diff --git a/src/discord-cluster-manager/task.py b/src/discord-cluster-manager/task.py index 706ad0df..1b9ec3dd 100644 --- a/src/discord-cluster-manager/task.py +++ b/src/discord-cluster-manager/task.py @@ -4,7 +4,6 @@ from pathlib import Path from typing import Dict, Union -import leaderboard_eval from consts import Language @@ -109,27 +108,5 @@ def make_task(yaml_file: str | Path) -> LeaderboardTask: return LeaderboardTask.from_dict(raw) -# TODO remove this as soon as possible -def build_from_legacy_reference(ref: str): - if "#include " in ref: - lang = Language.CUDA - config = CudaTaskData(sources=["eval.cu"]) - files = { - "eval.cu": leaderboard_eval.cu_eval, - "reference.cuh": ref, - "submission.cuh": "@SUBMISSION@", - } - elif "import " in ref: - lang = Language.Python - config = PythonTaskData(main="eval.py") - files = { - "eval.py": leaderboard_eval.py_eval, - "reference.py": ref, - "submission.py": "@SUBMISSION@", - } - - return LeaderboardTask(lang=lang, files=files, config=config, libraries=[]) - - if __name__ == "__main__": print(json.dumps(make_task("task.yml").to_dict(), indent=4))