Skip to content
Open
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 36 additions & 2 deletions .github/workflows/coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ on:


jobs:
test_coverage:
test_vllm_coverage:
runs-on: ParallelHoss

steps:
Expand All @@ -25,13 +25,47 @@ jobs:
- name: Install dependencies
run: |
python -m pip install -U pip
pip install -e .[vllm]
pip install -e .[lora]
pip install --force-reinstall 'triton==3.2.0'
pip install -r requirements-dev.txt

- name: Run tests
run: |
coverage run --source=genlm/backend -m pytest --benchmark-disable --ignore=tests/test_mlx.py
coverage run --source=genlm/backend -m pytest --benchmark-disable --ignore=tests/test_mlx.py --ignore=tests/test_sgl.py
coverage json --omit "*/test*"
coverage report --omit "*/test*"

- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5
with:
fail_ci_if_error: false
token: ${{ secrets.CODECOV_TOKEN }}
files: ./coverage.json
slug: genlm/genlm-backend

test_sgl_coverage:
runs-on: ParallelHoss

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 1

- uses: actions/setup-python@v4
with:
python-version: 3.11.5
cache: 'pip'

- name: Install dependencies
run: |
python -m pip install -U pip
pip install -e .[sgl]
pip install -r requirements-dev.txt

- name: Run tests
run: |
coverage run --source=genlm/backend -m pytest tests/test_sgl.py
coverage json --omit "*/test*"
coverage report --omit "*/test*"

Expand Down
4 changes: 2 additions & 2 deletions benchmark/benchmark_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ def load_model(model, batch_size=None):
return AsyncTransformer.from_name(model_name, batch_size=batch_size)


@pytest.mark.parametrize("model", ["vllm", "transformer"])
@pytest.mark.parametrize("model", ["vllm", "sglang"])
def test_await_next_token_logprobs(benchmark, model):
llm = load_model(model, batch_size=1)
sequences = token_prefixes(text, tokenizer=llm.tokenizer)
run_await_next_token_logprobs(benchmark=benchmark, llm=llm, sequences=sequences)


@pytest.mark.parametrize("model", ["vllm", "transformer"])
@pytest.mark.parametrize("model", ["vllm", "sglang"])
def test_await_batch_next_token_logprobs(benchmark, model, batch_size=20):
llm = load_model(model, batch_size=batch_size)
batches = token_prefix_batches(text, tokenizer=llm.tokenizer, batch_size=batch_size)
Expand Down
6 changes: 5 additions & 1 deletion genlm/backend/llm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from genlm.backend.llm.hf import AsyncTransformer
from genlm.backend.llm.base import AsyncLM, MockAsyncLM
from genlm.backend.llm.mlx import AsyncMlxLM
from genlm.backend.llm.sgl import AsyncSGLTransformer

import torch

Expand All @@ -11,7 +12,7 @@ def load_model_by_name(name, backend=None, llm_opts=None):

Args:
name (str): Hugging Face model name (e.g. "gpt2", "meta-llama/Llama-3.2-1B-Instruct")
backend (str, optional): Backend to use for inference. Can be "vllm", "hf" or "mock".
backend (str, optional): Backend to use for inference. Can be "vllm", "hf", "mlx", "sgl", or "mock".
If None, defaults to "vllm" if CUDA is available, otherwise "hf".
llm_opts (dict, optional): Additional options to pass to the backend constructor.
See AsyncVirtualLM and AsyncTransformer documentation for details.
Expand All @@ -36,6 +37,8 @@ def load_model_by_name(name, backend=None, llm_opts=None):
return MockAsyncLM.from_name(name, **llm_opts)
elif backend == "mlx":
return AsyncMlxLM.from_name(name, **llm_opts)
elif backend == "sgl":
return AsyncSGLTransformer.from_name(name, **llm_opts)
else:
raise ValueError(f"Invalid backend: {backend}")

Expand All @@ -46,5 +49,6 @@ def load_model_by_name(name, backend=None, llm_opts=None):
"AsyncVirtualLM",
"AsyncTransformer",
"AsyncMlxLM",
"AsyncSGLTransformer",
"MockAsyncLM",
]
Loading