[Open_Source_LLM] Add Endpoints Tests and switch Deployment setup (#842)

gjwoods · Gerard · web-flow · commit eeb1df65e55d · 2023-10-23T12:39:33.000+08:00
# Description 1. Put endpoint and deployment name into the "call" function to show the errors appropriately (and share the EndpointContainer) 2. Add tests for Endpoint & Deployment name functionality 3. Clean up old comments # All Promptflow Contribution checklist: - [x] **The pull request does not introduce [breaking changes].** - [x] **CHANGELOG is updated for new features, bug fixes or other significant changes.** - [x] **I have read the [contribution guidelines](../CONTRIBUTING.md).** - [x] **Create an issue and link to the pull request to get dedicated review from promptflow team. Learn more: [suggested workflow](../CONTRIBUTING.md#suggested-workflow).** ## General Guidelines and Best Practices - [x] Title of the pull request is clear and informative. - [x] There are a small number of commits, each of which have an informative message. This means that previously merged commits do not appear in the history of the PR. For more information on cleaning up the commits in your PR, [see this page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md). ### Testing Guidelines - [x] Pull request includes test coverage for the included changes. --------- Co-authored-by: Gerard <GerardJWoods@live.com>
diff --git a/src/promptflow-tools/connections.json.example b/src/promptflow-tools/connections.json.example
@@ -51,6 +51,16 @@
       "endpoint_api_key"
     ]
   },
+  "open_source_llm_ws_service_connection": {
+    "type": "CustomConnection",
+    "value": {
+      "service_credential": "service-credential"
+    },
+    "module": "promptflow.connections",
+    "secret_keys": [
+      "service_credential"
+    ]
+  },
   "open_ai_connection": {
     "type": "OpenAIConnection",
     "value": {
diff --git a/src/promptflow-tools/promptflow/tools/open_source_llm.py b/src/promptflow-tools/promptflow/tools/open_source_llm.py
@@ -140,7 +140,7 @@ def get_deployment_from_endpoint(endpoint_name: str, deployment_name: str = None
     return (endpoint_uri, endpoint_key, model)
 
 
-def get_deployment_from_connection(connection: CustomConnection, deployment_name: str = None) -> Tuple[str, str, str]:
+def get_deployment_from_connection(connection: CustomConnection) -> Tuple[str, str, str]:
     conn_dict = dict(connection)
     for key in REQUIRED_CONFIG_KEYS:
         if key not in conn_dict:
@@ -352,17 +352,7 @@ def get_content_formatter(
 
 
 class AzureMLOnlineEndpoint:
-    """Azure ML Online Endpoint models.
-
-    Example:
-        .. code-block:: python
-
-            azure_llm = AzureMLModel(
-                endpoint_url="https://<your-endpoint>.<your_region>.inference.ml.azure.com/score",
-                endpoint_api_key="my-api-key",
-                content_formatter=content_formatter,
-            )
-    """  # noqa: E501
+    """Azure ML Online Endpoint models."""
 
     endpoint_url: str = ""
     """URL of pre-existing Endpoint. Should be passed to constructor or specified as
@@ -453,32 +443,37 @@ class OpenSourceLLM(ToolProvider):
 
     def __init__(self,
                  connection: CustomConnection = None,
-                 endpoint_name: str = None,
-                 deployment_name: str = None):
+                 endpoint_name: str = None):
         super().__init__()
 
-        self.deployment_name = deployment_name
-        if endpoint_name is not None and endpoint_name != DEFAULT_ENDPOINT_NAME:
-            (self.endpoint_uri,
-             self.endpoint_key,
-             self.model_family) = get_deployment_from_endpoint(endpoint_name, deployment_name)
-        else:
+        self.endpoint_key = None
+        self.endpoint_name = endpoint_name
+
+        if endpoint_name is None or endpoint_name == DEFAULT_ENDPOINT_NAME:
             (self.endpoint_uri,
              self.endpoint_key,
-             self.model_family) = get_deployment_from_connection(connection, deployment_name)
+             self.model_family) = get_deployment_from_connection(connection)
 
     @tool
     @handle_oneline_endpoint_error()
     def call(
         self,
         prompt: PromptTemplate,
         api: API,
+        deployment_name: str = None,
         temperature: float = 1.0,
         max_new_tokens: int = 500,
         top_p: float = 1.0,
         model_kwargs: Optional[Dict] = {},
         **kwargs
     ) -> str:
+        self.deployment_name = deployment_name
+
+        if self.endpoint_key is None and self.endpoint_name is not None:
+            (self.endpoint_uri,
+             self.endpoint_key,
+             self.model_family) = get_deployment_from_endpoint(self.endpoint_name, self.deployment_name)
+
         prompt = render_jinja_template(prompt, trim_blocks=True, keep_trailing_newline=True, **kwargs)
 
         model_kwargs["top_p"] = top_p
diff --git a/src/promptflow-tools/tests/conftest.py b/src/promptflow-tools/tests/conftest.py
@@ -77,6 +77,19 @@ def llama_chat_custom_connection():
     return ConnectionManager().get("llama_chat_connection")
 
 
+@pytest.fixture
+def open_source_llm_ws_service_connection() -> bool:
+    try:
+        creds_custom_connection: CustomConnection = ConnectionManager().get("open_source_llm_ws_service_connection")
+        subs = json.loads(creds_custom_connection.secrets['service_credential'])
+        for key, value in subs.items():
+            os.environ[key] = value
+        return True
+    except Exception as e:
+        print(f'Something failed setting environment variables for service credentials. Error: {e}')
+        return False
+
+
 @pytest.fixture(autouse=True)
 def skip_if_no_key(request, mocker):
     mocker.patch.dict(os.environ, {"PROMPTFLOW_CONNECTIONS": CONNECTION_FILE})
diff --git a/src/promptflow-tools/tests/test_open_source_llm.py b/src/promptflow-tools/tests/test_open_source_llm.py
@@ -1,3 +1,4 @@
+import copy
 import os
 import pytest
 from promptflow.tools.exception import (
@@ -6,6 +7,7 @@
     OpenSourceLLMKeyValidationError
 )
 from promptflow.tools.open_source_llm import OpenSourceLLM, API, ContentFormatterBase, LlamaContentFormatter
+from typing import List, Dict
 
 
 @pytest.fixture
@@ -18,19 +20,66 @@ def llama_chat_provider(llama_chat_custom_connection) -> OpenSourceLLM:
     return OpenSourceLLM(llama_chat_custom_connection)
 
 
+@pytest.fixture
+def endpoints_provider(open_source_llm_ws_service_connection) -> Dict[str, List[str]]:
+    if not open_source_llm_ws_service_connection:
+        pytest.skip("Service Credential not available")
+
+    from azure.ai.ml import MLClient
+    from azure.identity import DefaultAzureCredential
+    credential = DefaultAzureCredential(exclude_interactive_browser_credential=False)
+    ml_client = MLClient(
+        credential=credential,
+        subscription_id=os.getenv("AZUREML_ARM_SUBSCRIPTION"),
+        resource_group_name=os.getenv("AZUREML_ARM_RESOURCEGROUP"),
+        workspace_name=os.getenv("AZUREML_ARM_WORKSPACE_NAME"))
+
+    endpoints = {}
+    for ep in ml_client.online_endpoints.list():
+        endpoints[ep.name] = [d.name for d in ml_client.online_deployments.list(ep.name)]
+
+    return endpoints
+
+
+@pytest.fixture
+def chat_endpoints_provider(endpoints_provider: Dict[str, List[str]]) -> Dict[str, List[str]]:
+    chat_endpoint_names = ["gpt2", "llama-chat"]
+
+    chat_endpoints = {}
+    for key, value in endpoints_provider.items():
+        for ep_name in chat_endpoint_names:
+            if ep_name in key:
+                chat_endpoints[key] = value
+
+    if len(chat_endpoints) <= 0:
+        pytest.skip("No Chat Endpoints Found")
+
+    return chat_endpoints
+
+
+@pytest.fixture
+def completion_endpoints_provider(endpoints_provider: Dict[str, List[str]]) -> Dict[str, List[str]]:
+    completion_endpoint_names = ["gpt2", "llama-comp"]
+
+    completion_endpoints = {}
+    for key, value in endpoints_provider.items():
+        for ep_name in completion_endpoint_names:
+            if ep_name in key:
+                completion_endpoints[key] = value
+
+    if len(completion_endpoints) <= 0:
+        pytest.skip("No Completion Endpoints Found")
+
+    return completion_endpoints
+
+
 @pytest.mark.usefixtures("use_secrets_config_file")
 class TestOpenSourceLLM:
     completion_prompt = "In the context of Azure ML, what does the ML stand for?"
-
-    gpt2_chat_prompt = """system:
+    chat_prompt = """system:
 You are a AI which helps Customers answer questions.
 
 user:
-""" + completion_prompt
-
-    llama_chat_prompt = """system:
-You are a AI which helps Customers answer questions.
-
 """ + completion_prompt
 
     @pytest.mark.skip_if_no_key("gpt2_custom_connection")
@@ -41,56 +90,54 @@ def test_open_source_llm_completion(self, gpt2_provider):
         assert len(response) > 25
 
     @pytest.mark.skip_if_no_key("gpt2_custom_connection")
-    def test_open_source_llm_completion_with_deploy(self, gpt2_custom_connection):
-        os_tool = OpenSourceLLM(
-            gpt2_custom_connection,
-            deployment_name="gpt2-9")
-        response = os_tool.call(
+    def test_open_source_llm_completion_with_deploy(self, gpt2_provider):
+        response = gpt2_provider.call(
             self.completion_prompt,
-            API.COMPLETION)
+            API.COMPLETION,
+            deployment_name="gpt2-9")
         assert len(response) > 25
 
     @pytest.mark.skip_if_no_key("gpt2_custom_connection")
     def test_open_source_llm_chat(self, gpt2_provider):
         response = gpt2_provider.call(
-            self.gpt2_chat_prompt,
+            self.chat_prompt,
             API.CHAT)
         assert len(response) > 25
 
     @pytest.mark.skip_if_no_key("gpt2_custom_connection")
-    def test_open_source_llm_chat_with_deploy(self, gpt2_custom_connection):
-        os_tool = OpenSourceLLM(
-            gpt2_custom_connection,
+    def test_open_source_llm_chat_with_deploy(self, gpt2_provider):
+        response = gpt2_provider.call(
+            self.chat_prompt,
+            API.CHAT,
             deployment_name="gpt2-9")
-        response = os_tool.call(
-            self.gpt2_chat_prompt,
-            API.CHAT)
         assert len(response) > 25
 
     @pytest.mark.skip_if_no_key("gpt2_custom_connection")
     def test_open_source_llm_chat_with_max_length(self, gpt2_provider):
         response = gpt2_provider.call(
-            self.gpt2_chat_prompt,
+            self.chat_prompt,
             API.CHAT,
             max_new_tokens=2)
         # GPT-2 doesn't take this parameter
         assert len(response) > 25
 
     @pytest.mark.skip_if_no_key("gpt2_custom_connection")
     def test_open_source_llm_con_url_chat(self, gpt2_custom_connection):
-        del gpt2_custom_connection.configs['endpoint_url']
+        tmp = copy.deepcopy(gpt2_custom_connection)
+        del tmp.configs['endpoint_url']
         with pytest.raises(OpenSourceLLMKeyValidationError) as exc_info:
-            os = OpenSourceLLM(gpt2_custom_connection)
+            os = OpenSourceLLM(tmp)
             os.call(self.chat_prompt, API.CHAT)
         assert exc_info.value.message == """Required key `endpoint_url` not found in given custom connection.
 Required keys are: endpoint_url,model_family."""
         assert exc_info.value.error_codes == "UserError/ToolValidationError/OpenSourceLLMKeyValidationError".split("/")
 
     @pytest.mark.skip_if_no_key("gpt2_custom_connection")
     def test_open_source_llm_con_key_chat(self, gpt2_custom_connection):
-        del gpt2_custom_connection.secrets['endpoint_api_key']
+        tmp = copy.deepcopy(gpt2_custom_connection)
+        del tmp.secrets['endpoint_api_key']
         with pytest.raises(OpenSourceLLMKeyValidationError) as exc_info:
-            os = OpenSourceLLM(gpt2_custom_connection)
+            os = OpenSourceLLM(tmp)
             os.call(self.chat_prompt, API.CHAT)
         assert exc_info.value.message == (
             "Required secret key `endpoint_api_key` "
@@ -100,9 +147,10 @@ def test_open_source_llm_con_key_chat(self, gpt2_custom_connection):
 
     @pytest.mark.skip_if_no_key("gpt2_custom_connection")
     def test_open_source_llm_con_model_chat(self, gpt2_custom_connection):
-        del gpt2_custom_connection.configs['model_family']
+        tmp = copy.deepcopy(gpt2_custom_connection)
+        del tmp.configs['model_family']
         with pytest.raises(OpenSourceLLMKeyValidationError) as exc_info:
-            os = OpenSourceLLM(gpt2_custom_connection)
+            os = OpenSourceLLM(tmp)
             os.call(self.completion_prompt, API.COMPLETION)
         assert exc_info.value.message == """Required key `model_family` not found in given custom connection.
 Required keys are: endpoint_url,model_family."""
@@ -114,7 +162,7 @@ def test_open_source_llm_escape_chat(self):
         assert out_of_danger == "The quick \\brown fox\\tjumped\\\\over \\the \\\\boy\\r\\n"
 
     def test_open_source_llm_llama_parse_chat_with_chat(self):
-        LlamaContentFormatter.parse_chat(self.llama_chat_prompt)
+        LlamaContentFormatter.parse_chat(self.chat_prompt)
 
     def test_open_source_llm_llama_parse_multi_turn(self):
         multi_turn_chat = """user:
@@ -163,8 +211,9 @@ def test_open_source_llm_llama_parse_chat_with_comp(self):
 
     @pytest.mark.skip_if_no_key("gpt2_custom_connection")
     def test_open_source_llm_llama_endpoint_miss(self, gpt2_custom_connection):
-        gpt2_custom_connection.configs['endpoint_url'] += 'completely/real/endpoint'
-        os = OpenSourceLLM(gpt2_custom_connection)
+        tmp = copy.deepcopy(gpt2_custom_connection)
+        tmp.configs['endpoint_url'] += 'completely/real/endpoint'
+        os = OpenSourceLLM(tmp)
         with pytest.raises(OpenSourceLLMOnlineEndpointError) as exc_info:
             os.call(
                 self.completion_prompt,
@@ -175,30 +224,49 @@ def test_open_source_llm_llama_endpoint_miss(self, gpt2_custom_connection):
         assert exc_info.value.error_codes == "UserError/OpenSourceLLMOnlineEndpointError".split("/")
 
     @pytest.mark.skip_if_no_key("gpt2_custom_connection")
-    def test_open_source_llm_llama_deployment_miss(self, gpt2_custom_connection):
-        os = OpenSourceLLM(
-            gpt2_custom_connection,
-            deployment_name="completely/real/deployment-007")
+    def test_open_source_llm_llama_deployment_miss(self, gpt2_provider):
         with pytest.raises(OpenSourceLLMOnlineEndpointError) as exc_info:
-            os.call(self.completion_prompt, API.COMPLETION)
+            gpt2_provider.call(self.completion_prompt,
+                               API.COMPLETION,
+                               deployment_name="completely/real/deployment-007")
         assert exc_info.value.message == (
             "Exception hit calling Oneline Endpoint: "
             + "HTTPError: HTTP Error 404: Not Found")
         assert exc_info.value.error_codes == "UserError/OpenSourceLLMOnlineEndpointError".split("/")
 
-    @pytest.mark.skip
-    def test_open_source_llm_endpoint_name(self):
-        os.environ["AZUREML_ARM_SUBSCRIPTION"] = "<needs_value>"
-        os.environ["AZUREML_ARM_RESOURCEGROUP"] = "<needs_value>"
-        os.environ["AZUREML_ARM_WORKSPACE_NAME"] = "<needs_value>"
-
-        os_llm = OpenSourceLLM(endpoint_name="llama-temp-chat")
-        response = os_llm.call(self.llama_chat_prompt, API.CHAT)
-        assert len(response) > 25
+    @pytest.mark.skip_if_no_key("open_source_llm_ws_service_connection")
+    def test_open_source_llm_chat_endpoint_name(self, chat_endpoints_provider):
+        for endpoint_name in chat_endpoints_provider:
+            os_llm = OpenSourceLLM(endpoint_name=endpoint_name)
+            response = os_llm.call(self.chat_prompt, API.CHAT)
+            assert len(response) > 25
+
+    @pytest.mark.skip_if_no_key("open_source_llm_ws_service_connection")
+    def test_open_source_llm_chat_endpoint_name_with_deployment(self, chat_endpoints_provider):
+        for endpoint_name in chat_endpoints_provider:
+            os_llm = OpenSourceLLM(endpoint_name=endpoint_name)
+            for deployment_name in chat_endpoints_provider[endpoint_name]:
+                response = os_llm.call(self.chat_prompt, API.CHAT, deployment_name=deployment_name)
+                assert len(response) > 25
+
+    @pytest.mark.skip_if_no_key("open_source_llm_ws_service_connection")
+    def test_open_source_llm_completion_endpoint_name(self, completion_endpoints_provider):
+        for endpoint_name in completion_endpoints_provider:
+            os_llm = OpenSourceLLM(endpoint_name=endpoint_name)
+            response = os_llm.call(self.completion_prompt, API.COMPLETION)
+            assert len(response) > 25
+
+    @pytest.mark.skip_if_no_key("open_source_llm_ws_service_connection")
+    def test_open_source_llm_completion_endpoint_name_with_deployment(self, completion_endpoints_provider):
+        for endpoint_name in completion_endpoints_provider:
+            os_llm = OpenSourceLLM(endpoint_name=endpoint_name)
+            for deployment_name in completion_endpoints_provider[endpoint_name]:
+                response = os_llm.call(self.completion_prompt, API.COMPLETION, deployment_name=deployment_name)
+                assert len(response) > 25
 
     @pytest.mark.skip_if_no_key("llama_chat_custom_connection")
     def test_open_source_llm_llama_chat(self, llama_chat_provider):
-        response = llama_chat_provider.call(self.llama_chat_prompt, API.CHAT)
+        response = llama_chat_provider.call(self.chat_prompt, API.CHAT)
         assert len(response) > 25
 
     @pytest.mark.skip_if_no_key("llama_chat_custom_connection")