Port bugfixes from #2497 (#3179)

* port from #2497 * Auto-update of Starter template * Auto-update of E2E template * Auto-update of NLP template * lint * fix LLM template * Auto-update of LLM Finetuning template * Auto-update of LLM Finetuning template * Update src/zenml/zen_server/routers/logs_endpoints.py Co-authored-by: Barış Can Durak <[email protected]> * Refactor logs endpoint authorization logic * Refactor logs endpoint definition * Refactor resource limit settings in test_vertex_orchestrator.py --------- Co-authored-by: GitHub Actions <[email protected]> Co-authored-by: Barış Can Durak <[email protected]>
zenml-io · Nov 11, 2024 · b5fa790 · b5fa790
1 parent 5dfac96
commit b5fa790
Show file tree

Hide file tree

Showing 9 changed files with 92 additions and 16 deletions.
diff --git a/.github/workflows/update-templates-to-examples.yml b/.github/workflows/update-templates-to-examples.yml
@@ -261,7 +261,7 @@ jobs:
         with:
           python-version: ${{ inputs.python-version }}
           ref-zenml: ${{ github.ref }}
-          ref-template: 2024.10.30  # Make sure it is aligned with ZENML_PROJECT_TEMPLATES from src/zenml/cli/base.py
+          ref-template: 2024.11.08  # Make sure it is aligned with ZENML_PROJECT_TEMPLATES from src/zenml/cli/base.py
       - name: Clean-up
         run: |
           rm -rf ./local_checkout

diff --git a/examples/llm_finetuning/.copier-answers.yml b/examples/llm_finetuning/.copier-answers.yml
@@ -1,5 +1,5 @@
 # Changes here will be overwritten by Copier
-_commit: 2024.10.30
+_commit: 2024.11.08
 _src_path: gh:zenml-io/template-llm-finetuning
 bf16: true
 cuda_version: cuda11.8

diff --git a/examples/llm_finetuning/requirements.txt b/examples/llm_finetuning/requirements.txt
@@ -1,6 +1,6 @@
 zenml
 torch>=2.2.0
-datasets
+datasets>=2.15
 transformers
 peft
 bitsandbytes>=0.41.3

diff --git a/examples/llm_finetuning/steps/log_metadata.py b/examples/llm_finetuning/steps/log_metadata.py
@@ -34,7 +34,7 @@ def log_metadata_from_step_artifact(
 
     context = get_step_context()
     metadata_dict: Dict[str, Any] = (
-        context.pipeline_run.steps[step_name].outputs[artifact_name].load()
+        context.pipeline_run.steps[step_name].outputs[artifact_name][0].load()
     )
 
     metadata = {artifact_name: metadata_dict}

diff --git a/src/zenml/cli/base.py b/src/zenml/cli/base.py
@@ -91,7 +91,7 @@ def copier_github_url(self) -> str:
     ),
     llm_finetuning=ZenMLProjectTemplateLocation(
         github_url="zenml-io/template-llm-finetuning",
-        github_tag="2024.10.30",  # Make sure it is aligned with .github/workflows/update-templates-to-examples.yml
+        github_tag="2024.11.08",  # Make sure it is aligned with .github/workflows/update-templates-to-examples.yml
     ),
 )
 

diff --git a/src/zenml/client.py b/src/zenml/client.py
@@ -3426,7 +3426,9 @@ def delete_deployment(self, id_or_prefix: str) -> None:
         Args:
             id_or_prefix: The id or id prefix of the deployment.
         """
-        deployment = self.get_deployment(id_or_prefix=id_or_prefix)
+        deployment = self.get_deployment(
+            id_or_prefix=id_or_prefix, hydrate=False
+        )
         self.zen_store.delete_deployment(deployment_id=deployment.id)
 
     # ------------------------------ Run templates -----------------------------

diff --git a/src/zenml/zen_server/routers/logs_endpoints.py b/src/zenml/zen_server/routers/logs_endpoints.py
@@ -0,0 +1,66 @@
+#  Copyright (c) ZenML GmbH 2024. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at:
+#
+#       https://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+#  or implied. See the License for the specific language governing
+#  permissions and limitations under the License.
+"""Endpoint definitions for logs."""
+
+from uuid import UUID
+
+from fastapi import APIRouter, Security
+
+from zenml.constants import (
+    API,
+    LOGS,
+    VERSION_1,
+)
+from zenml.models.v2.core.logs import LogsResponse
+from zenml.zen_server.auth import AuthContext, authorize
+from zenml.zen_server.exceptions import error_response
+from zenml.zen_server.rbac.endpoint_utils import (
+    verify_permissions_and_get_entity,
+)
+from zenml.zen_server.utils import (
+    handle_exceptions,
+    zen_store,
+)
+
+router = APIRouter(
+    prefix=API + VERSION_1 + LOGS,
+    tags=["logs"],
+    responses={401: error_response, 403: error_response},
+)
+
+
+@router.get(
+    "/{logs_id}",
+    response_model=LogsResponse,
+    responses={401: error_response, 404: error_response, 422: error_response},
+)
+@handle_exceptions
+def get_logs(
+    logs_id: UUID,
+    hydrate: bool = True,
+    _: AuthContext = Security(authorize),
+) -> LogsResponse:
+    """Returns the requested logs.
+
+    Args:
+        logs_id: ID of the logs.
+        hydrate: Flag deciding whether to hydrate the output model(s)
+            by including metadata fields in the response.
+
+    Returns:
+        The requested logs.
+    """
+    return verify_permissions_and_get_entity(
+        id=logs_id, get_method=zen_store().get_logs, hydrate=hydrate
+    )
diff --git a/src/zenml/zen_server/zen_server_api.py b/src/zenml/zen_server/zen_server_api.py
@@ -64,6 +64,7 @@
     devices_endpoints,
     event_source_endpoints,
     flavors_endpoints,
+    logs_endpoints,
     model_versions_endpoints,
     models_endpoints,
     pipeline_builds_endpoints,
@@ -414,6 +415,7 @@ async def dashboard(request: Request) -> Any:
 app.include_router(plugin_endpoints.plugin_router)
 app.include_router(event_source_endpoints.event_source_router)
 app.include_router(flavors_endpoints.router)
+app.include_router(logs_endpoints.router)
 app.include_router(models_endpoints.router)
 app.include_router(model_versions_endpoints.router)
 app.include_router(model_versions_endpoints.model_version_artifacts_router)

diff --git a/tests/integration/integrations/gcp/orchestrators/test_vertex_orchestrator.py b/tests/integration/integrations/gcp/orchestrators/test_vertex_orchestrator.py
@@ -140,35 +140,41 @@ def test_vertex_orchestrator_stack_validation(
             ResourceSettings(cpu_count=1, gpu_count=1, memory="1GB"),
             {"cpu_limit": "4", "gpu_limit": 4, "memory_limit": "1G"},
             {
-                "accelerator": {"count": "1", "type": "NVIDIA_TESLA_K80"},
-                "cpuLimit": 1.0,
-                "memoryLimit": 1.0,
+                "accelerator": {
+                    "resourceCount": "1",
+                    "resourceType": "NVIDIA_TESLA_K80",
+                },
+                "resourceCpuLimit": "1.0",
+                "resourceMemoryLimit": "1G",
             },
         ),
         # No ResourceSettings, should take values from the orchestrator
         (
             ResourceSettings(cpu_count=None, gpu_count=None, memory=None),
-            {"cpu_limit": "1", "gpu_limit": 1, "memory_limit": "1G"},
+            {"cpu_limit": "1.0", "gpu_limit": 1, "memory_limit": "1G"},
             {
-                "accelerator": {"count": "1", "type": "NVIDIA_TESLA_K80"},
-                "cpuLimit": 1.0,
-                "memoryLimit": 1.0,
+                "accelerator": {
+                    "resourceCount": "1",
+                    "resourceType": "NVIDIA_TESLA_K80",
+                },
+                "resourceCpuLimit": "1.0",
+                "resourceMemoryLimit": "1G",
             },
         ),
         # GPU count is None, 1 gpu should be used (KFP default)
         (
             ResourceSettings(cpu_count=1, gpu_count=None, memory="1GB"),
             {"cpu_limit": None, "gpu_limit": None, "memory_limit": None},
             {
-                "cpuLimit": 1.0,
-                "memoryLimit": 1.0,
+                "resourceCpuLimit": "1.0",
+                "resourceMemoryLimit": "1G",
             },
         ),
         # GPU count is 0, should not be set in the resource spec
         (
             ResourceSettings(cpu_count=1, gpu_count=0, memory="1GB"),
             {"cpu_limit": None, "gpu_limit": None, "memory_limit": None},
-            {"cpuLimit": 1.0, "memoryLimit": 1.0},
+            {"resourceCpuLimit": "1.0", "resourceMemoryLimit": "1G"},
         ),
     ],
 )