Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions examples/offline_inference_gsaondevice.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,6 @@

from transformers import AutoTokenizer

# Third Party
from vllm import LLM, SamplingParams
from vllm.config import KVTransferConfig
from vllm.engine.arg_utils import EngineArgs

from ucm.logger import init_logger

logger = init_logger(__name__)
Expand Down Expand Up @@ -61,6 +56,14 @@ def setup_environment_variables():
tokenizer = AutoTokenizer.from_pretrained(model, use_chat_template=True)


# ENABLE_SPARSE must be set before import vllm to make sure monkey patch works
setup_environment_variables()
# Third Party
from vllm import LLM, SamplingParams
from vllm.config import KVTransferConfig
from vllm.engine.arg_utils import EngineArgs


@contextlib.contextmanager
def build_llm_with_uc(module_path: str, name: str, model: str):
ktc = KVTransferConfig(
Expand Down Expand Up @@ -124,7 +127,6 @@ def print_output(
def main():
module_path = "ucm.integration.vllm.ucm_connector"
name = "UCMConnector"
setup_environment_variables()

def get_prompt(prompt):
messages = [
Expand Down
4 changes: 4 additions & 0 deletions ucm/integration/vllm/patch/apply_patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,11 @@ def apply_all_patches() -> None:
# vllm patches
match version:
case "0.11.0":
logger.info("UCM patching vllm for pc...")
import ucm.integration.vllm.patch.v0110.vllm.pc_patch

if ENABLE_SPARSE:
logger.info("UCM patching vllm for sparse...")
import ucm.integration.vllm.patch.v0110.vllm.sparse_patch
case _:
pass
Expand All @@ -140,9 +142,11 @@ def apply_all_patches() -> None:
ascend_version = get_vllm_ascend_version()
match ascend_version:
case "0.11.0":
logger.info("UCM patching vllm-ascend for pc...")
import ucm.integration.vllm.patch.v0110.vllm_ascend.pc_ascend_patch

if ENABLE_SPARSE:
logger.info("UCM patching vllm-ascend for sparse...")
import ucm.integration.vllm.patch.v0110.vllm_ascend.sparse_ascend_patch
case _:
pass
Expand Down
Loading