flagos-ai · xmhubj · Feb 4, 2026 · Feb 4, 2026 · Feb 4, 2026 · Feb 5, 2026
diff --git a/.github/labeler.yml b/.github/labeler.yml
@@ -0,0 +1,35 @@
+# PR Labeler configuratoin file
+# Automatically add labels based on modified file paths
+
+docs:
+  - changed-files:
+      - any-glob-to-any-file: '**/*.md'
+
+ci:
+  - changed-files:
+      - any-glob-to-any-file:
+          - '.github/**/*'
+          - '.pre-commit-config.yaml'
+
+tests:
+  - changed-files:
+      - any-glob-to-any-file: 'tests/**/*'
+
+core:
+  - changed-files:
+      - any-glob-to-any-file: 'vllm_fl/**/*'
+
+examples:
+  - changed-files:
+      - any-glob-to-any-file: 'examples/**/*'
+
+benchmarks:
+  - changed-files:
+      - any-glob-to-any-file: 'benchmarks/**/*'
+
+build:
+  - changed-files:
+      - any-glob-to-any-file:
+          - 'setup.py'
+          - 'requirements*.txt'
+          - 'pyproject.toml'
diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
@@ -0,0 +1,24 @@
+name: "Pull Request Labeler"
+
+on:
+  pull_request_target:
+    types: [opened, synchronize, reopened]
+
+permissions:
+  contents: read
+  pull-requests: write
+
+jobs:
+  label:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Apply labels
+        uses: actions/labeler@v5
+        continue-on-error: true  # Don't fail if config not yet on main branch
+        with:
+          repo-token: "${{ secrets.GITHUB_TOKEN }}"
+          sync-labels: true
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,8 @@
 *.egg-info
 __pycache__/
+build/
 
+# Coverage
+.coverage
+.coverage.*
+htmlcov/
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,61 @@
+[build-system]
+requires = ["setuptools>=45", "setuptools-scm[toml]>=6.2"]
+build-backend = "setuptools.build_meta"
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+python_files = ["test_*.py"]
+python_classes = ["Test*"]
+python_functions = ["test_*"]
+markers = [
+    "gpu: marks tests as requiring single GPU (deselect with '-m \"not gpu\"')",
+    "multi_gpu: marks tests as requiring multiple GPUs",
+    "slow: marks tests as slow (deselect with '-m \"not slow\"')",
+    "integration: marks tests as integration tests",
+    "e2e: marks tests as end-to-end tests",
+    "flaggems: marks tests as requiring flag_gems library",
+    "functional: marks tests as functional tests",
+]
+addopts = "-v --tb=short"
+filterwarnings = [
+    "ignore::DeprecationWarning",
+    "ignore::UserWarning",
+]
+
+[tool.coverage.run]
+source = ["vllm_fl"]
+omit = [
+    "tests/*",
+    "examples/*",
+    "benchmarks/*",
+]
+
+[tool.coverage.report]
+exclude_lines = [
+    "pragma: no cover",
+    "def __repr__",
+    "raise NotImplementedError",
+    "if TYPE_CHECKING:",
+    "if __name__ == .__main__.:",
+]
+
+[tool.ruff]
+line-length = 100
+target-version = "py39"
+
+[tool.ruff.lint]
+select = [
+    "E",   # pycodestyle errors
+    "W",   # pycodestyle warnings
+    "F",   # pyflakes
+    "I",   # isort
+    "B",   # flake8-bugbear
+    "C4",  # flake8-comprehensions
+]
+ignore = [
+    "E501",  # line too long (handled by formatter)
+    "B008",  # do not perform function calls in argument defaults
+]
+
+[tool.ruff.lint.isort]
+known-first-party = ["vllm_fl"]
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,4 @@
+vllm==0.13.0
 decorator
 pyyaml
 scipy

diff --git a/tests/e2e/conftest.py → tests/e2e_tests/conftest.py b/tests/e2e/conftest.py → tests/e2e_tests/conftest.py
diff --git a/tests/e2e/test_offline_inference.py → tests/e2e_tests/test_offline_inference.py b/tests/e2e/test_offline_inference.py → tests/e2e_tests/test_offline_inference.py
@@ -4,7 +4,7 @@
 import pytest
 import vllm  # noqa: F401
 from conftest import VllmRunner
-import vllm_flagos
+import vllm_fl  # noqa: F401
 
 MODELS = [
     # "Qwen/Qwen3-0.6B",

diff --git a/tests/test_offline_minicmp.py → tests/e2e_tests/test_offline_minicmp.py b/tests/test_offline_minicmp.py → tests/e2e_tests/test_offline_minicmp.py
diff --git a/tests/test_offline_qwen3_next.py → tests/e2e_tests/test_offline_qwen3_next.py b/tests/test_offline_qwen3_next.py → tests/e2e_tests/test_offline_qwen3_next.py
diff --git a/tests/test_vllm_serve_minicmp.py → tests/e2e_tests/test_vllm_serve_minicmp.py b/tests/test_vllm_serve_minicmp.py → tests/e2e_tests/test_vllm_serve_minicmp.py
diff --git a/tests/test_vllm_serve_qwen3_next.py → ...s/e2e_tests/test_vllm_serve_qwen3_next.py b/tests/test_vllm_serve_qwen3_next.py → ...s/e2e_tests/test_vllm_serve_qwen3_next.py
diff --git a/tests/functional_tests/__init__.py b/tests/functional_tests/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (c) 2025 BAAI. All rights reserved.
+"""Functional tests for vllm_fl."""
diff --git a/tests/functional_tests/compilation/__init__.py b/tests/functional_tests/compilation/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (c) 2025 BAAI. All rights reserved.
+"""Compilation functional tests."""
diff --git a/tests/functional_tests/compilation/test_graph_capture.py b/tests/functional_tests/compilation/test_graph_capture.py
@@ -0,0 +1,147 @@
+# Copyright (c) 2025 BAAI. All rights reserved.
+
+"""
+Functional tests for graph capture and replay.
+Tests CUDA/NPU graph functionality for model optimization.
+
+Note: Unit tests for GraphOptions, GraphEntry, and GraphWrapper are in
+unit_tests/compilation/test_graph.py. This file only contains functional
+tests that require actual GPU execution.
+"""
+
+import pytest
+import torch
+from dataclasses import dataclass
+
+
+# Mark all tests as requiring GPU
+pytestmark = pytest.mark.gpu
+
+
+class TestWeakRefTensors:
+    """Test weak reference tensor functionality."""
+
+    def test_weak_ref_tensors_function(self):
+        """Test weak_ref_tensors function exists."""
+        try:
+            from vllm_fl.compilation.graph import weak_ref_tensors
+            assert weak_ref_tensors is not None
+        except ImportError:
+            pytest.skip("weak_ref_tensors not available")
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="GPU not available")
+    def test_weak_ref_tensors_with_cuda_tensor(self):
+        """Test weak_ref_tensors with CUDA tensor."""
+        try:
+            from vllm_fl.compilation.graph import weak_ref_tensors
+        except ImportError:
+            pytest.skip("weak_ref_tensors not available")
+
+        tensor = torch.randn(4, 8, device="cuda")
+        result = weak_ref_tensors(tensor)
+        # Result should be either the tensor or a weak reference
+        assert result is not None
+
+
+class TestGraphCaptureFlow:
+    """Test the complete graph capture flow."""
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="GPU not available")
+    def test_cuda_graph_basic_capture(self):
+        """Test basic CUDA graph capture and replay."""
+        # Simple test without vllm_fl dependencies
+        device = torch.device("cuda")
+
+        # Create a simple computation
+        def computation(x):
+            return x * 2 + 1
+
+        # Create input tensor
+        x = torch.randn(4, 8, device=device)
+
+        # Warmup
+        y = computation(x)
+
+        # Capture graph
+        g = torch.cuda.CUDAGraph()
+        with torch.cuda.graph(g):
+            y = computation(x)
+
+        # Replay graph
+        g.replay()
+
+        # Verify output
+        expected = x * 2 + 1
+        assert torch.allclose(y, expected)
+
+    @pytest.mark.skipif(not torch.cuda.is_available(), reason="GPU not available")
+    def test_cuda_graph_with_different_inputs(self):
+        """Test CUDA graph with different input values."""
+        device = torch.device("cuda")
+
+        # Static input buffer
+        static_input = torch.randn(4, 8, device=device)
+        static_output = torch.empty(4, 8, device=device)
+
+        def computation(x, out):
+            out.copy_(x * 2)
+
+        # Warmup
+        computation(static_input, static_output)
+
+        # Capture graph
+        g = torch.cuda.CUDAGraph()
+        with torch.cuda.graph(g):
+            computation(static_input, static_output)
+
+        # Test with new input values (copy to static buffer)
+        new_input = torch.ones(4, 8, device=device)
+        static_input.copy_(new_input)
+
+        # Replay
+        g.replay()
+
+        expected = new_input * 2
+        assert torch.allclose(static_output, expected)
+
+
+class TestGraphCacheManagement:
+    """Test graph cache management functionality."""
+
+    def test_batch_descriptor_hashing(self):
+        """Test that batch descriptors can be used as dict keys."""
+        @dataclass(frozen=True)
+        class MockBatchDescriptor:
+            num_tokens: int
+            max_num_reqs: int
+
+        desc1 = MockBatchDescriptor(num_tokens=16, max_num_reqs=4)
+        desc2 = MockBatchDescriptor(num_tokens=16, max_num_reqs=4)
+        desc3 = MockBatchDescriptor(num_tokens=32, max_num_reqs=8)
+
+        cache = {}
+        cache[desc1] = "graph1"
+        cache[desc3] = "graph3"
+
+        # Same values should hash to same key
+        assert cache[desc2] == "graph1"
+        assert cache[desc3] == "graph3"
+
+    def test_graph_entry_storage(self):
+        """Test storing graph entries in cache."""
+        try:
+            from vllm_fl.compilation.graph import GraphEntry
+        except ImportError:
+            pytest.skip("GraphEntry not available")
+
+        @dataclass(frozen=True)
+        class MockBatchDescriptor:
+            num_tokens: int
+
+        cache = {}
+        desc = MockBatchDescriptor(num_tokens=16)
+
+        entry = GraphEntry(batch_descriptor=desc)
+        cache[desc] = entry
+
+        assert cache[desc].batch_descriptor.num_tokens == 16
diff --git a/tests/functional_tests/conftest.py b/tests/functional_tests/conftest.py
@@ -0,0 +1,96 @@
+# Copyright (c) 2025 BAAI. All rights reserved.
+
+"""
+Functional test fixtures and configuration.
+"""
+
+import os
+import pytest
+import torch
+
+
+def pytest_configure(config):
+    """Register custom markers."""
+    config.addinivalue_line("markers", "gpu: marks tests as requiring GPU")
+    config.addinivalue_line("markers", "multi_gpu: marks tests as requiring multiple GPUs")
+    config.addinivalue_line("markers", "flaggems: marks tests as requiring flag_gems library")
+
+
+@pytest.fixture(scope="session")
+def has_gpu():
+    """Check if GPU is available."""
+    return torch.cuda.is_available()
+
+
+@pytest.fixture(scope="session")
+def device(has_gpu):
+    """Get the test device."""
+    if has_gpu:
+        return torch.device("cuda:0")
+    return torch.device("cpu")
+
+
+@pytest.fixture(scope="session")
+def gpu_count():
+    """Get the number of available GPUs."""
+    if torch.cuda.is_available():
+        return torch.cuda.device_count()
+    return 0
+
+
+@pytest.fixture
+def reset_dispatch_manager():
+    """Reset dispatch manager before and after test."""
+    from vllm_fl.dispatch import reset_default_manager, reset_global_policy
+
+    reset_default_manager()
+    reset_global_policy()
+    yield
+    reset_default_manager()
+    reset_global_policy()
+
+
+@pytest.fixture
+def clean_env():
+    """Clean dispatch-related environment variables."""
+    env_vars = [
+        "VLLM_FL_PREFER",
+        "VLLM_FL_STRICT",
+        "VLLM_FL_CONFIG",
+        "VLLM_FL_DENY_VENDORS",
+        "VLLM_FL_ALLOW_VENDORS",
+        "VLLM_FL_PER_OP",
+        "VLLM_FL_DISPATCH_DEBUG",
+    ]
+
+    # Save original values
+    original = {k: os.environ.get(k) for k in env_vars}
+
+    # Clear env vars
+    for k in env_vars:
+        os.environ.pop(k, None)
+
+    yield
+
+    # Restore original values
+    for k, v in original.items():
+        if v is not None:
+            os.environ[k] = v
+        else:
+            os.environ.pop(k, None)
+
+
+def skip_if_no_gpu(fn):
+    """Decorator to skip test if no GPU is available."""
+    return pytest.mark.skipif(
+        not torch.cuda.is_available(),
+        reason="GPU not available"
+    )(fn)
+
+
+def skip_if_no_multi_gpu(fn):
+    """Decorator to skip test if less than 2 GPUs available."""
+    return pytest.mark.skipif(
+        not torch.cuda.is_available() or torch.cuda.device_count() < 2,
+        reason="Multiple GPUs not available"
+    )(fn)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# Copyright (c) 2025 BAAI. All rights reserved.
		"""Functional tests for vllm_fl."""
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# Copyright (c) 2025 BAAI. All rights reserved.
		"""Compilation functional tests."""