Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# PR Labeler configuratoin file
# Automatically add labels based on modified file paths

docs:
- changed-files:
- any-glob-to-any-file: '**/*.md'

ci:
- changed-files:
- any-glob-to-any-file:
- '.github/**/*'
- '.pre-commit-config.yaml'

tests:
- changed-files:
- any-glob-to-any-file: 'tests/**/*'

core:
- changed-files:
- any-glob-to-any-file: 'vllm_fl/**/*'

examples:
- changed-files:
- any-glob-to-any-file: 'examples/**/*'

benchmarks:
- changed-files:
- any-glob-to-any-file: 'benchmarks/**/*'

build:
- changed-files:
- any-glob-to-any-file:
- 'setup.py'
- 'requirements*.txt'
- 'pyproject.toml'
24 changes: 24 additions & 0 deletions .github/workflows/labeler.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: "Pull Request Labeler"

on:
pull_request_target:
types: [opened, synchronize, reopened]

permissions:
contents: read
pull-requests: write

jobs:
label:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Apply labels
uses: actions/labeler@v5
continue-on-error: true # Don't fail if config not yet on main branch
with:
repo-token: "${{ secrets.GITHUB_TOKEN }}"
sync-labels: true
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
*.egg-info
__pycache__/
build/

# Coverage
.coverage
.coverage.*
htmlcov/
61 changes: 61 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
[build-system]
requires = ["setuptools>=45", "setuptools-scm[toml]>=6.2"]
build-backend = "setuptools.build_meta"

[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = ["test_*.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
markers = [
"gpu: marks tests as requiring single GPU (deselect with '-m \"not gpu\"')",
"multi_gpu: marks tests as requiring multiple GPUs",
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
"integration: marks tests as integration tests",
"e2e: marks tests as end-to-end tests",
"flaggems: marks tests as requiring flag_gems library",
"functional: marks tests as functional tests",
]
addopts = "-v --tb=short"
filterwarnings = [
"ignore::DeprecationWarning",
"ignore::UserWarning",
]

[tool.coverage.run]
source = ["vllm_fl"]
omit = [
"tests/*",
"examples/*",
"benchmarks/*",
]

[tool.coverage.report]
exclude_lines = [
"pragma: no cover",
"def __repr__",
"raise NotImplementedError",
"if TYPE_CHECKING:",
"if __name__ == .__main__.:",
]

[tool.ruff]
line-length = 100
target-version = "py39"

[tool.ruff.lint]
select = [
"E", # pycodestyle errors
"W", # pycodestyle warnings
"F", # pyflakes
"I", # isort
"B", # flake8-bugbear
"C4", # flake8-comprehensions
]
ignore = [
"E501", # line too long (handled by formatter)
"B008", # do not perform function calls in argument defaults
]

[tool.ruff.lint.isort]
known-first-party = ["vllm_fl"]
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
vllm==0.13.0
decorator
pyyaml
scipy
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest
import vllm # noqa: F401
from conftest import VllmRunner
import vllm_flagos
import vllm_fl # noqa: F401

MODELS = [
# "Qwen/Qwen3-0.6B",
Expand Down
File renamed without changes.
2 changes: 2 additions & 0 deletions tests/functional_tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright (c) 2025 BAAI. All rights reserved.
"""Functional tests for vllm_fl."""
2 changes: 2 additions & 0 deletions tests/functional_tests/compilation/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright (c) 2025 BAAI. All rights reserved.
"""Compilation functional tests."""
147 changes: 147 additions & 0 deletions tests/functional_tests/compilation/test_graph_capture.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
# Copyright (c) 2025 BAAI. All rights reserved.

"""
Functional tests for graph capture and replay.
Tests CUDA/NPU graph functionality for model optimization.

Note: Unit tests for GraphOptions, GraphEntry, and GraphWrapper are in
unit_tests/compilation/test_graph.py. This file only contains functional
tests that require actual GPU execution.
"""

import pytest
import torch
from dataclasses import dataclass


# Mark all tests as requiring GPU
pytestmark = pytest.mark.gpu


class TestWeakRefTensors:
"""Test weak reference tensor functionality."""

def test_weak_ref_tensors_function(self):
"""Test weak_ref_tensors function exists."""
try:
from vllm_fl.compilation.graph import weak_ref_tensors
assert weak_ref_tensors is not None
except ImportError:
pytest.skip("weak_ref_tensors not available")

@pytest.mark.skipif(not torch.cuda.is_available(), reason="GPU not available")
def test_weak_ref_tensors_with_cuda_tensor(self):
"""Test weak_ref_tensors with CUDA tensor."""
try:
from vllm_fl.compilation.graph import weak_ref_tensors
except ImportError:
pytest.skip("weak_ref_tensors not available")

tensor = torch.randn(4, 8, device="cuda")
result = weak_ref_tensors(tensor)
# Result should be either the tensor or a weak reference
assert result is not None


class TestGraphCaptureFlow:
"""Test the complete graph capture flow."""

@pytest.mark.skipif(not torch.cuda.is_available(), reason="GPU not available")
def test_cuda_graph_basic_capture(self):
"""Test basic CUDA graph capture and replay."""
# Simple test without vllm_fl dependencies
device = torch.device("cuda")

# Create a simple computation
def computation(x):
return x * 2 + 1

# Create input tensor
x = torch.randn(4, 8, device=device)

# Warmup
y = computation(x)

# Capture graph
g = torch.cuda.CUDAGraph()
with torch.cuda.graph(g):
y = computation(x)

# Replay graph
g.replay()

# Verify output
expected = x * 2 + 1
assert torch.allclose(y, expected)

@pytest.mark.skipif(not torch.cuda.is_available(), reason="GPU not available")
def test_cuda_graph_with_different_inputs(self):
"""Test CUDA graph with different input values."""
device = torch.device("cuda")

# Static input buffer
static_input = torch.randn(4, 8, device=device)
static_output = torch.empty(4, 8, device=device)

def computation(x, out):
out.copy_(x * 2)

# Warmup
computation(static_input, static_output)

# Capture graph
g = torch.cuda.CUDAGraph()
with torch.cuda.graph(g):
computation(static_input, static_output)

# Test with new input values (copy to static buffer)
new_input = torch.ones(4, 8, device=device)
static_input.copy_(new_input)

# Replay
g.replay()

expected = new_input * 2
assert torch.allclose(static_output, expected)


class TestGraphCacheManagement:
"""Test graph cache management functionality."""

def test_batch_descriptor_hashing(self):
"""Test that batch descriptors can be used as dict keys."""
@dataclass(frozen=True)
class MockBatchDescriptor:
num_tokens: int
max_num_reqs: int

desc1 = MockBatchDescriptor(num_tokens=16, max_num_reqs=4)
desc2 = MockBatchDescriptor(num_tokens=16, max_num_reqs=4)
desc3 = MockBatchDescriptor(num_tokens=32, max_num_reqs=8)

cache = {}
cache[desc1] = "graph1"
cache[desc3] = "graph3"

# Same values should hash to same key
assert cache[desc2] == "graph1"
assert cache[desc3] == "graph3"

def test_graph_entry_storage(self):
"""Test storing graph entries in cache."""
try:
from vllm_fl.compilation.graph import GraphEntry
except ImportError:
pytest.skip("GraphEntry not available")

@dataclass(frozen=True)
class MockBatchDescriptor:
num_tokens: int

cache = {}
desc = MockBatchDescriptor(num_tokens=16)

entry = GraphEntry(batch_descriptor=desc)
cache[desc] = entry

assert cache[desc].batch_descriptor.num_tokens == 16
96 changes: 96 additions & 0 deletions tests/functional_tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# Copyright (c) 2025 BAAI. All rights reserved.

"""
Functional test fixtures and configuration.
"""

import os
import pytest
import torch


def pytest_configure(config):
"""Register custom markers."""
config.addinivalue_line("markers", "gpu: marks tests as requiring GPU")
config.addinivalue_line("markers", "multi_gpu: marks tests as requiring multiple GPUs")
config.addinivalue_line("markers", "flaggems: marks tests as requiring flag_gems library")


@pytest.fixture(scope="session")
def has_gpu():
"""Check if GPU is available."""
return torch.cuda.is_available()


@pytest.fixture(scope="session")
def device(has_gpu):
"""Get the test device."""
if has_gpu:
return torch.device("cuda:0")
return torch.device("cpu")


@pytest.fixture(scope="session")
def gpu_count():
"""Get the number of available GPUs."""
if torch.cuda.is_available():
return torch.cuda.device_count()
return 0


@pytest.fixture
def reset_dispatch_manager():
"""Reset dispatch manager before and after test."""
from vllm_fl.dispatch import reset_default_manager, reset_global_policy

reset_default_manager()
reset_global_policy()
yield
reset_default_manager()
reset_global_policy()


@pytest.fixture
def clean_env():
"""Clean dispatch-related environment variables."""
env_vars = [
"VLLM_FL_PREFER",
"VLLM_FL_STRICT",
"VLLM_FL_CONFIG",
"VLLM_FL_DENY_VENDORS",
"VLLM_FL_ALLOW_VENDORS",
"VLLM_FL_PER_OP",
"VLLM_FL_DISPATCH_DEBUG",
]

# Save original values
original = {k: os.environ.get(k) for k in env_vars}

# Clear env vars
for k in env_vars:
os.environ.pop(k, None)

yield

# Restore original values
for k, v in original.items():
if v is not None:
os.environ[k] = v
else:
os.environ.pop(k, None)


def skip_if_no_gpu(fn):
"""Decorator to skip test if no GPU is available."""
return pytest.mark.skipif(
not torch.cuda.is_available(),
reason="GPU not available"
)(fn)


def skip_if_no_multi_gpu(fn):
"""Decorator to skip test if less than 2 GPUs available."""
return pytest.mark.skipif(
not torch.cuda.is_available() or torch.cuda.device_count() < 2,
reason="Multiple GPUs not available"
)(fn)
Loading