From 0c4cc1bfe4f179bdb46ef58f55aa570b6698797a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nico=20M=C3=B6ller?= Date: Fri, 15 Nov 2024 12:59:29 +0100 Subject: [PATCH] fix onnx memory problem Onnx currently faces memory issues when dividing functionality among multiple methods --- .../services/onnx_gen_ai_completion_base.py | 30 ++++++++----------- 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_completion_base.py b/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_completion_base.py index c7e2c47d12d4..40ce552ed88b 100644 --- a/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_completion_base.py +++ b/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_completion_base.py @@ -53,23 +53,6 @@ def __init__(self, ai_model_path: str, **kwargs) -> None: **kwargs, ) - def _prepare_input_params( - self, prompt: str, settings: OnnxGenAIPromptExecutionSettings, image: ImageContent | None = None - ) -> Any: - params = OnnxRuntimeGenAi.GeneratorParams(self.model) - params.set_search_options(**settings.prepare_settings_dict()) - if not self.enable_multi_modality: - input_tokens = self.tokenizer.encode(prompt) - params.input_ids = input_tokens - else: - if image is not None: - # With the use of Pybind there is currently no way to load images from bytes - # We can only open images from a file path currently - image = OnnxRuntimeGenAi.Images.open(str(image.uri)) - input_tokens = self.tokenizer(prompt, images=image) - params.set_inputs(input_tokens) - return params - async def _generate_next_token_async( self, prompt: str, @@ -77,7 +60,18 @@ async def _generate_next_token_async( image: ImageContent | None = None, ) -> AsyncGenerator[list[str], Any]: try: - params = self._prepare_input_params(prompt, settings, image) + params = OnnxRuntimeGenAi.GeneratorParams(self.model) + params.set_search_options(**settings.prepare_settings_dict()) + if not self.enable_multi_modality: + input_tokens = self.tokenizer.encode(prompt) + params.input_ids = input_tokens + else: + if image is not None: + # With the use of Pybind there is currently no way to load images from bytes + # We can only open images from a file path currently + image = OnnxRuntimeGenAi.Images.open(str(image.uri)) + input_tokens = self.tokenizer(prompt, images=image) + params.set_inputs(input_tokens) generator = OnnxRuntimeGenAi.Generator(self.model, params) while not generator.is_done():