primatrix · 0xaskr · Mar 27, 2026 · Mar 27, 2026 · Mar 27, 2026 · coderabbitai
diff --git a/.github/ci/cpu-ref-tests-gpu.sky.yaml b/.github/ci/cpu-ref-tests-gpu.sky.yaml
@@ -0,0 +1,24 @@
+# CI CPU Reference Tests on GPU
+# Runs tests/ref/ with CUDA + FLA (flash-linear-attention) available
+# so that GPU comparison tests (CPU ref vs FLA Triton) are not skipped.
+#
+# Usage (local):
+#   sky launch .github/ci/cpu-ref-tests-gpu.sky.yaml --down -y
+#
+# In CI this is invoked by .github/workflows/cpu_ref_tests.yml
+
+workdir: .
+
+resources:
+  accelerators: L4:1
+  use_spot: true
+
+setup: |
+  pip install uv
+  cd ~/sky_workdir
+  uv pip install -e '.[gpu]' --system
+  uv pip install pytest --system
+
+run: |
+  cd ~/sky_workdir
+  pytest tests/ref/ -v -o "addopts=--strict-markers"
diff --git a/.github/workflows/check_coverage.yml b/.github/workflows/check_coverage.yml
@@ -0,0 +1,25 @@
+name: API Coverage Check
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  api_coverage:
+    name: "Check public API test coverage"
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Check public API test coverage
+        run: python scripts/check_test_coverage.py
+
+      - name: Check public function assertions
+        run: python scripts/check_asserts.py
diff --git a/.github/workflows/cpu_ref_tests.yml b/.github/workflows/cpu_ref_tests.yml
@@ -0,0 +1,63 @@
+name: CPU Reference Tests
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - "tops/cpu/**"
+      - "tests/ref/**"
+  pull_request:
+    branches: [main]
+    paths:
+      - "tops/cpu/**"
+      - "tests/ref/**"
-  push:
-    branches: [main]
-    paths:
-      - "tops/cpu/**"
-      - "tests/ref/**"
-  pull_request:
-    branches: [main]
-    paths:
-      - "tops/cpu/**"
-      - "tests/ref/**"
+  push:
+    branches: [main]
+    paths:
+      - ".github/workflows/cpu_ref_tests.yml"
+      - ".github/ci/cpu-ref-tests-gpu.sky.yaml"
+      - "tops/cpu/**"
+      - "tests/ref/**"
+  pull_request:
+    branches: [main]
+    paths:
+      - ".github/workflows/cpu_ref_tests.yml"
+      - ".github/ci/cpu-ref-tests-gpu.sky.yaml"
+      - "tops/cpu/**"
+      - "tests/ref/**"
-  push:
-    branches: [main]
-    paths:
-      - "tops/cpu/**"
-      - "tests/ref/**"
-  pull_request:
-    branches: [main]
-    paths:
-      - "tops/cpu/**"
-      - "tests/ref/**"
+  push:
+    branches: [main]
+    paths:
+      - ".github/workflows/cpu_ref_tests.yml"
+      - ".github/ci/cpu-ref-tests-gpu.sky.yaml"
+      - "tops/cpu/**"
+      - "tests/ref/**"
+  pull_request:
+    branches: [main]
+    paths:
+      - ".github/workflows/cpu_ref_tests.yml"
+      - ".github/ci/cpu-ref-tests-gpu.sky.yaml"
+      - "tops/cpu/**"
+      - "tests/ref/**"
+
+concurrency:
+  group: cpu-ref-tests-${{ github.head_ref || github.ref_name }}
+  cancel-in-progress: true
+
+jobs:
+  cpu_ref_tests:
+    name: "CPU ref tests (GPU via SkyPilot)"
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      id-token: write
+    timeout-minutes: 30
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Authenticate to GCP
+        uses: google-github-actions/auth@v2
+        with:
+          workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}
+          service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }}
+
+      - name: Set up Cloud SDK
+        uses: google-github-actions/setup-gcloud@v2
+
+      - name: Install SkyPilot
+        run: pip install 'skypilot[gcp]'
+
+      - name: Generate cluster name
+        id: cluster
+        run: |
+          BRANCH="${{ github.head_ref || github.ref_name }}"
+          NAME="ci-ref-$(echo "$BRANCH" | tr '/_' '--' | tr '[:upper:]' '[:lower:]' | head -c 20)-${GITHUB_RUN_NUMBER}"
+          echo "name=$NAME" >> "$GITHUB_OUTPUT"
+
-      - name: Generate cluster name
-        id: cluster
-        run: |
-          BRANCH="${{ github.head_ref || github.ref_name }}"
-          NAME="ci-ref-$(echo "$BRANCH" | tr '/_' '--' | tr '[:upper:]' '[:lower:]' | head -c 20)-${GITHUB_RUN_NUMBER}"
-          echo "name=$NAME" >> "$GITHUB_OUTPUT"
+      - name: Generate cluster name
+        id: cluster
+        env:
+          BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
+        run: |
+          BRANCH="$BRANCH_NAME"
+          NAME="ci-ref-$(echo "$BRANCH" | tr '/_' '--' | tr '[:upper:]' '[:lower:]' | head -c 20)-${GITHUB_RUN_NUMBER}"
+          echo "name=$NAME" >> "$GITHUB_OUTPUT"
-      - name: Generate cluster name
-        id: cluster
-        run: |
-          BRANCH="${{ github.head_ref || github.ref_name }}"
-          NAME="ci-ref-$(echo "$BRANCH" | tr '/_' '--' | tr '[:upper:]' '[:lower:]' | head -c 20)-${GITHUB_RUN_NUMBER}"
-          echo "name=$NAME" >> "$GITHUB_OUTPUT"
+      - name: Generate cluster name
+        id: cluster
+        env:
+          BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
+        run: |
+          BRANCH="$BRANCH_NAME"
+          NAME="ci-ref-$(echo "$BRANCH" | tr '/_' '--' | tr '[:upper:]' '[:lower:]' | head -c 20)-${GITHUB_RUN_NUMBER}"
+          echo "name=$NAME" >> "$GITHUB_OUTPUT"
+      - name: Run tests on GPU via SkyPilot
+        run: |
+          sky launch .github/ci/cpu-ref-tests-gpu.sky.yaml \
+            --cluster "${{ steps.cluster.outputs.name }}" \
+            --down \
+            -y
+
+      - name: Cleanup
+        if: always()
+        run: |
+          sky down "${{ steps.cluster.outputs.name }}" -y 2>/dev/null || true
diff --git a/scripts/check_test_coverage.py b/scripts/check_test_coverage.py
@@ -0,0 +1,177 @@
+#!/usr/bin/env python3
+"""
+检查 tops/ops/ 下所有公开接口是否具有对应的测试覆盖。
+
+扫描 tops/ops/ 各子包的 __init__.py，提取公开 API 符号列表，
+然后在 tests/ 目录中搜索每个符号是否被至少一个测试文件引用。
+若存在未覆盖的接口，脚本以非零退出码退出，可用于 CI 门控。
+
+支持的导出风格：
+  1. __all__ = ["symbol1", "symbol2", ...]
+  2. from .mod import name as name  （PEP 484 显式 re-export）
+"""
+
+import ast
+import re
+import sys
+from pathlib import Path
+
+
+def _extract_dunder_all(tree: ast.Module) -> list[str]:
+  """从 AST 中提取 __all__ 列表中的字符串常量。"""
+  for node in ast.iter_child_nodes(tree):
-  for node in ast.iter_child_nodes(tree):
+  for node in tree.body:
-  for node in ast.iter_child_nodes(tree):
+  for node in tree.body:
+    if not isinstance(node, ast.Assign):
+      continue
+    for target in node.targets:
+      if isinstance(target, ast.Name) and target.id == "__all__":
+        if isinstance(node.value, ast.List):
+          return [
+            elt.value
+            for elt in node.value.elts
+            if isinstance(elt, ast.Constant) and isinstance(elt.value, str)
+          ]
+  return []
+
+
+def _extract_reexports(tree: ast.Module) -> list[str]:
+  """提取 'from .mod import name as name' 风格的显式 re-export 符号。"""
+  names = []
+  for node in ast.iter_child_nodes(tree):
-  for node in ast.iter_child_nodes(tree):
+  for node in tree.body:
-  for node in ast.iter_child_nodes(tree):
+  for node in tree.body:
+    if not isinstance(node, ast.ImportFrom):
+      continue
+    if node.level == 0:
+      continue
+    for alias in node.names:
+      if alias.asname is not None and alias.asname == alias.name:
+        names.append(alias.asname)
+  return names
+
+
+def discover_public_interfaces(ops_dir: Path) -> dict[str, list[str]]:
+  """扫描 tops/ops/ 子包的 __init__.py，发现公开 API 符号。
+
+  检测策略（按优先级）：
+    1. 若 __init__.py 含 __all__ = [...] → 使用其中的符号名
+    2. 若 __init__.py 含 'from .x import y as y' re-export → 收集符号名
+    3. 若 __init__.py 为空或无导出内容 → 跳过
+
+  Args:
+    ops_dir: tops/ops/ 目录路径
+
+  Returns:
+    dict，键为 "tops.ops.<subpackage>"，值为公开符号名列表
+  """
+  result = {}
+  for subdir in sorted(ops_dir.iterdir()):
+    if not subdir.is_dir():
+      continue
+    init_file = subdir / "__init__.py"
+    if not init_file.exists():
+      continue
+
+    source = init_file.read_text(encoding="utf-8")
+    if not source.strip():
+      continue
+
+    tree = ast.parse(source, filename=str(init_file))
+
+    all_names = _extract_dunder_all(tree)
+    if all_names:
+      result[f"tops.ops.{subdir.name}"] = all_names
+      continue
+
+    reexports = _extract_reexports(tree)
+    if reexports:
+      result[f"tops.ops.{subdir.name}"] = reexports
+      continue
+
+  return result
+
+
+def find_test_references(
+  tests_dir: Path, symbols: list[str]
+) -> dict[str, list[str]]:
+  """检查哪些符号在测试文件中被引用。
+
+  扫描 tests/ 下所有 .py 文件（排除 tests/src/），
+  对每个符号做词边界正则匹配。
+
+  Args:
+    tests_dir: tests/ 目录路径
+    symbols: 待检查的符号名列表
+
+  Returns:
+    dict，键为符号名，值为引用该符号的测试文件路径列表
+  """
+  patterns = {
+    sym: re.compile(r"\b" + re.escape(sym) + r"\b") for sym in symbols
+  }
+  references: dict[str, list[str]] = {sym: [] for sym in symbols}
+
+  for py_file in sorted(tests_dir.rglob("*.py")):
+    rel = py_file.relative_to(tests_dir)
+    if rel.parts and rel.parts[0] == "src":
+      continue
+
+    content = py_file.read_text(encoding="utf-8")
+    for sym, pattern in patterns.items():
+      if pattern.search(content):
+        references[sym].append(str(py_file))
+
+  return references
+
+
+def main():
+  project_root = Path(__file__).resolve().parents[1]
+  ops_dir = project_root / "tops" / "ops"
+  tests_dir = project_root / "tests"
+
+  assert ops_dir.is_dir(), f"目录不存在: {ops_dir}"
+  assert tests_dir.is_dir(), f"目录不存在: {tests_dir}"
+
+  interfaces = discover_public_interfaces(ops_dir)
+
+  if not interfaces:
+    print("未在 tops/ops/ 中发现公开接口，无需检查。")
+    sys.exit(0)
+
+  all_symbols = []
+  for symbols in interfaces.values():
+    all_symbols.extend(symbols)
+
+  references = find_test_references(tests_dir, all_symbols)
+
+  total = 0
+  covered = 0
+  gaps = []
+
+  for pkg, symbols in sorted(interfaces.items()):
+    print(f"\n{'=' * 60}")
+    print(f"Package: {pkg} ({len(symbols)} interfaces)")
+    print(f"{'=' * 60}")
+    for sym in symbols:
+      total += 1
+      files = references.get(sym, [])
+      if files:
+        covered += 1
+        print(f"  [PASS] {sym} ({len(files)} test files)")
+      else:
+        gaps.append((pkg, sym))
+        print(f"  [MISS] {sym} -- 无测试覆盖")
+
+  print(f"\n{'=' * 60}")
+  print(f"汇总: {covered}/{total} 接口已覆盖")
+  print(f"{'=' * 60}")
+
+  if gaps:
+    print(f"\n检查失败: {len(gaps)} 个接口缺少测试覆盖:")
+    for pkg, sym in gaps:
+      print(f"  - {pkg}.{sym}")
+    sys.exit(1)
+  else:
+    print("\n所有公开接口均已具备测试覆盖。")
+    sys.exit(0)
+
+
+if __name__ == "__main__":
+  main()