Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
652fd68
add ability to pass in multiple request rates
DaltheCow Jun 19, 2024
bfcc329
running tests command is added to the Makefile
parfeniukink Jun 26, 2024
44f8e41
rename to fixed rate profile generator
DaltheCow Jul 2, 2024
d8469cb
increment rate index
DaltheCow Jul 2, 2024
eb03706
combine constanct and poisson Profile creation
DaltheCow Jul 2, 2024
9a0c582
throw error if user passes in rate in synchronous mode
DaltheCow Jul 2, 2024
fb1ebcb
refactor to better handle creating profile generator
DaltheCow Jul 2, 2024
bf72422
test base ProfileGenerator class
DaltheCow Jul 3, 2024
98f79c4
update fixed rate profile generator, add tests, set up initial execut…
DaltheCow Jul 9, 2024
c0284a3
test executor run method
DaltheCow Jul 10, 2024
d915515
test sweep profile generator
DaltheCow Jul 11, 2024
993610a
merge main
DaltheCow Jul 15, 2024
ffc0e7b
fix test indentation
DaltheCow Jul 15, 2024
b41b74a
fix some improper imports
DaltheCow Jul 17, 2024
c78d2ba
Merge branch 'main' into multiple-request-rates
DaltheCow Jul 18, 2024
98ff5e4
merge main, handle merge conflicts minus the tests
DaltheCow Jul 23, 2024
21774cd
wip
DaltheCow Jul 23, 2024
1d3a7cf
wip
DaltheCow Jul 23, 2024
79c03ce
fix all broken tests
DaltheCow Jul 23, 2024
8833ae3
run make style
DaltheCow Jul 23, 2024
2ec371b
fix linting issues
DaltheCow Jul 23, 2024
2361916
remove unused import
DaltheCow Jul 23, 2024
ca8e553
fix type issue
DaltheCow Jul 23, 2024
ce460de
pytest.init_options section is restored
Jul 24, 2024
3062fba
format pyproject.toml file
Jul 24, 2024
7524977
Merge branch 'main' into multiple-request-rates
markurtz Jul 24, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ style:
isort src tests
flake8 src tests --max-line-length 88

# test:
# pytest tests
test:
python -m pytest -s -vvv --cache-clear tests/

build:
python setup.py sdist bdist_wheel
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def _setup_long_description() -> Tuple[str, str]:
'openai',
'requests',
'transformers',
'click'
],
extras_require={
'dev': [
Expand Down
4 changes: 2 additions & 2 deletions src/guidellm/executor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
Profile,
ProfileGenerationModes,
ProfileGenerator,
SingleProfileGenerator,
FixedRateProfileGenerator,
SweepProfileGenerator,
)

Expand All @@ -12,6 +12,6 @@
"ProfileGenerationModes",
"Profile",
"ProfileGenerator",
"SingleProfileGenerator",
"FixedRateProfileGenerator",
"SweepProfileGenerator",
]
5 changes: 4 additions & 1 deletion src/guidellm/executor/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,16 @@ def __init__(
self,
request_generator: RequestGenerator,
backend: Backend,
profile_mode: Union[str, ProfileGenerationModes] = "single",
rate_type: str = "sweep",
profile_args: Optional[Dict[str, Any]] = None,
max_requests: Optional[int] = None,
max_duration: Optional[float] = None,
):
self.request_generator = request_generator
self.backend = backend
profile_mode = "sweep"
if rate_type in {"synchronous", "constant", "poisson"}:
profile_mode = "fixed_rate"
self.profile = ProfileGenerator.create_generator(
profile_mode, **(profile_args or {})
)
Expand Down
64 changes: 38 additions & 26 deletions src/guidellm/executor/profile_generator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from abc import ABC, abstractmethod
from dataclasses import dataclass
from enum import Enum
from typing import Optional, Union
from typing import List, Optional, Union

import numpy

Expand All @@ -12,13 +12,17 @@
"ProfileGenerationModes",
"Profile",
"ProfileGenerator",
"SingleProfileGenerator",
"FixedRateProfileGenerator",
"SweepProfileGenerator",
]

RateTypeLoadGenModeMap = {
"constant": LoadGenerationModes.CONSTANT,
"poisson": LoadGenerationModes.POISSON,
}

class ProfileGenerationModes(Enum):
SINGLE = "single"
FIXED = "fixed_rate"
SWEEP = "sweep"


Expand All @@ -43,9 +47,11 @@ def inner_wrapper(wrapped_class):
def create_generator(
mode: Union[str, ProfileGenerationModes], **kwargs
) -> "ProfileGenerator":
if isinstance(mode, str):
mode = ProfileGenerationModes(mode)

mode_is_invalid = not isinstance(mode, str) or mode not in [m.value for m in ProfileGenerationModes]
if mode_is_invalid:
raise ValueError(f"Invalid profile generation mode: {mode}")
mode = ProfileGenerationModes(mode)

if mode not in ProfileGenerator._registry:
raise ValueError(f"Invalid profile generation mode: {mode}")

Expand All @@ -61,35 +67,41 @@ def next_profile(
pass


@ProfileGenerator.register_generator(ProfileGenerationModes.SINGLE)
class SingleProfileGenerator(ProfileGenerator):
def __init__(self, rate: float, rate_type: str, **kwargs):
super().__init__(ProfileGenerationModes.SINGLE)
self._rate = rate
self._rate_type = rate_type
@ProfileGenerator.register_generator(ProfileGenerationModes.FIXED)
class FixedRateProfileGenerator(ProfileGenerator):
def __init__(self, rate_type: str, rate: Optional[List[float]] = None, **kwargs):
super().__init__(ProfileGenerationModes.FIXED)
if rate_type == "synchronous" and rate and len(rate) > 0:
raise ValueError("custom rates are not supported in synchronous mode")
self._rates = rate
self._rate_index = 0
self._generated = False
self._rate_type = rate_type

def next_profile(
self, current_report: TextGenerationBenchmarkReport
) -> Optional[Profile]:
if self._generated:
return None

self._generated = True

if self._rate_type == "constant":
return Profile(
load_gen_mode=LoadGenerationModes.CONSTANT, load_gen_rate=self._rate
)

if self._rate_type == "synchronous":
if self._generated:
return None

self._generated = True

return Profile(
load_gen_mode=LoadGenerationModes.SYNCHRONOUS, load_gen_rate=None
)

if self._rate_type == "poisson":

if self._rate_type in {"constant", "poisson"}:
if self._rate_index >= len(self._rates):
return None

current_rate = self._rates[self._rate_index]
self._rate_index += 1

load_gen_mode = RateTypeLoadGenModeMap[self._rate_type]

return Profile(
load_gen_mode=LoadGenerationModes.POISSON, load_gen_rate=self._rate
load_gen_mode=load_gen_mode, load_gen_rate=current_rate
)

raise ValueError(f"Invalid rate type: {self._rate_type}")
Expand Down Expand Up @@ -151,4 +163,4 @@ def next_profile(
load_gen_mode=LoadGenerationModes.CONSTANT, load_gen_rate=rate
)

return None
return None
5 changes: 3 additions & 2 deletions src/guidellm/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,9 @@
@click.option(
"--rate",
type=float,
default="1.0",
default=[1.0],
help="Rate to use for constant and poisson rate types",
multiple=True,
)
@click.option(
"--num-seconds",
Expand Down Expand Up @@ -109,7 +110,7 @@ def main(
executor = Executor(
request_generator=request_generator,
backend=backend,
profile_mode=rate_type,
rate_type=rate_type,
profile_args={"rate_type": rate_type, "rate": rate},
max_requests=num_requests,
max_duration=num_seconds,
Expand Down
70 changes: 70 additions & 0 deletions tests/unit/executor/test_executor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import pytest
from unittest.mock import MagicMock, patch
from src.guidellm.backend.base import Backend
from src.guidellm.executor.executor import Executor
from src.guidellm.executor.profile_generator import Profile, ProfileGenerator
from src.guidellm.request.base import RequestGenerator
from src.guidellm.scheduler.load_generator import LoadGenerationModes

def test_executor_creation():
mock_request_generator = MagicMock(spec=RequestGenerator)
mock_backend = MagicMock(spec=Backend)
rate_type = "sweep"
profile_args = None
max_requests = None,
max_duration = None,
executor = Executor(mock_request_generator, mock_backend, rate_type, profile_args, max_requests, max_duration);
assert executor.request_generator == mock_request_generator
assert executor.backend == mock_backend
assert executor.max_requests == max_requests
assert executor.max_duration == max_duration


@pytest.fixture
def mock_request_generator():
return MagicMock(spec=RequestGenerator)

@pytest.fixture
def mock_backend():
return MagicMock(spec=Backend)

@pytest.fixture
def mock_scheduler():
with patch('src.guidellm.executor.executor.Scheduler') as MockScheduler:
yield MockScheduler

def test_executor_run(mock_request_generator, mock_backend, mock_scheduler):

mock_profile_generator = MagicMock(spec=ProfileGenerator)
profiles = [
Profile(load_gen_mode=LoadGenerationModes.CONSTANT, load_gen_rate=1.0),
Profile(load_gen_mode=LoadGenerationModes.CONSTANT, load_gen_rate=2.0),
None
]
mock_profile_generator.next_profile.side_effect = profiles

with patch('src.guidellm.executor.executor.ProfileGenerator.create_generator', return_value=mock_profile_generator):
executor = Executor(
request_generator=mock_request_generator,
backend=mock_backend,
rate_type="constant",
profile_args={"rate_type": "constant", "rate": [1.0, 2.0]},
max_requests=10,
max_duration=100
)

mock_benchmark = MagicMock()
mock_scheduler.return_value.run.return_value = mock_benchmark

report = executor.run()


assert mock_scheduler.call_count == 2
assert len(report.benchmarks) == 2
assert report.benchmarks[0] == mock_benchmark
assert report.benchmarks[1] == mock_benchmark
calls = mock_scheduler.call_args_list
assert calls[0][1]['load_gen_mode'] == LoadGenerationModes.CONSTANT
assert calls[0][1]['load_gen_rate'] == 1.0
assert calls[1][1]['load_gen_mode'] == LoadGenerationModes.CONSTANT
assert calls[1][1]['load_gen_rate'] == 2.0
129 changes: 129 additions & 0 deletions tests/unit/executor/test_profile_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import numpy
import pytest
from unittest.mock import MagicMock
from guidellm.executor import (ProfileGenerator, FixedRateProfileGenerator, SweepProfileGenerator)
from src.guidellm.core.result import TextGenerationBenchmark, TextGenerationBenchmarkReport
from src.guidellm.executor import profile_generator
from src.guidellm.scheduler.load_generator import LoadGenerationModes

def test_invalid_profile_generation_mode_error():
rate = [1]
rate_type = "constant"
profile_mode = "burst"
with pytest.raises(ValueError, match=f"Invalid profile generation mode: {profile_mode}"):
ProfileGenerator.create_generator(profile_mode, **({ "rate": rate, "rate_type": rate_type}))

# Fixed Rate Profile Generator

def test_fixed_rate_profile_generator_creation():
rate = [1]
rate_type = "constant"
profile_generator = ProfileGenerator.create_generator("fixed_rate", **({ "rate": rate, "rate_type": rate_type}))
assert isinstance(profile_generator, FixedRateProfileGenerator)
assert profile_generator._rates == rate
assert profile_generator._rate_type == rate_type
assert profile_generator._rate_index == 0
assert profile_generator._rate_index == 0

def test_synchronous_mode_rate_list_error():
rate = [1]
rate_type = "synchronous"
with pytest.raises(ValueError, match="custom rates are not supported in synchronous mode"):
ProfileGenerator.create_generator("fixed_rate", **({ "rate": rate, "rate_type": rate_type}))

def test_next_profile_with_multiple_rates():
rates = [1, 2]
rate_type = "constant"
profile_generator = ProfileGenerator.create_generator("fixed_rate", **({ "rate": rates, "rate_type": rate_type}))
mock_report = MagicMock(spec=TextGenerationBenchmarkReport)
for rate in rates:
current_profile = profile_generator.next_profile(mock_report)
assert current_profile.load_gen_rate == rate
assert current_profile.load_gen_mode.name == LoadGenerationModes.CONSTANT.name
assert profile_generator.next_profile(mock_report) == None

def test_next_profile_with_sync_mode():
rate_type = "synchronous"
profile_generator = ProfileGenerator.create_generator("fixed_rate", **({ "rate_type": rate_type}))
mock_report = MagicMock(spec=TextGenerationBenchmarkReport)
current_profile = profile_generator.next_profile(mock_report)
assert current_profile.load_gen_rate == None
assert current_profile.load_gen_mode.name == LoadGenerationModes.SYNCHRONOUS.name
assert profile_generator.next_profile(mock_report) == None

# Sweep Profile Generator

def test_sweep_profile_generator_creation():
profile_generator = ProfileGenerator.create_generator("sweep", **({}))
assert isinstance(profile_generator, SweepProfileGenerator)
assert profile_generator._sync_run == False
assert profile_generator._max_found == False
assert profile_generator._pending_rates == None
assert profile_generator._pending_rates == None

def test_first_profile_is_synchronous():
profile_generator = ProfileGenerator.create_generator("sweep")
mock_report = MagicMock(spec=TextGenerationBenchmarkReport)
profile = profile_generator.next_profile(mock_report)
assert profile.load_gen_rate == None
assert profile.load_gen_mode.name == LoadGenerationModes.SYNCHRONOUS.name

def test_rate_doubles():
profile_generator = ProfileGenerator.create_generator("sweep")
mock_report = MagicMock(spec=TextGenerationBenchmarkReport)
mock_benchmark = MagicMock(spec=TextGenerationBenchmark)
mock_benchmark.overloaded = False
mock_benchmark.args_rate = 2.0
mock_benchmark.request_rate = 2.0
benchmarks = [
mock_benchmark
]
mock_report.benchmarks = benchmarks
profile = profile_generator.next_profile(mock_report)

profile = profile_generator.next_profile(mock_report)
assert profile.load_gen_rate == 4.0

def test_max_found():
profile_generator = ProfileGenerator.create_generator("sweep")
mock_report = MagicMock(spec=TextGenerationBenchmarkReport)
mock_benchmark = MagicMock(spec=TextGenerationBenchmark)
mock_benchmark.overloaded = False
mock_benchmark.args_rate = 2.0
mock_benchmark.request_rate = 2.0
mock_overloaded_benchmark = MagicMock(spec=TextGenerationBenchmark)
mock_overloaded_benchmark.overloaded = True
mock_overloaded_benchmark.args_rate = 4.0
mock_overloaded_benchmark.request_rate = 4.0
benchmarks = [
mock_benchmark,
mock_overloaded_benchmark
]
mock_report.benchmarks = benchmarks

profile_generator.next_profile(mock_report)
profile = profile_generator.next_profile(mock_report)

# if benchmark wasn't overloaded, rate would have doubled to 8
assert profile.load_gen_rate == 2.0

def test_pending_rates():
profile_generator = ProfileGenerator.create_generator("sweep")
mock_report = MagicMock(spec=TextGenerationBenchmarkReport)
mock_benchmark = MagicMock(spec=TextGenerationBenchmark)
mock_benchmark.overloaded = False
mock_benchmark.args_rate = 2.0
mock_benchmark.request_rate = 2.0
mock_overloaded_benchmark = MagicMock(spec=TextGenerationBenchmark)
mock_overloaded_benchmark.overloaded = True
mock_overloaded_benchmark.args_rate = 8.0
mock_overloaded_benchmark.request_rate = 8.0
benchmarks = [
mock_benchmark,
mock_overloaded_benchmark
]
mock_report.benchmarks = benchmarks
profile = profile_generator.next_profile(mock_report)
for expected_rate in numpy.linspace(2.0, 8.0, 10):
profile = profile_generator.next_profile(mock_report)
assert profile.load_gen_rate == expected_rate