Skip to content

Commit

Permalink
Port bugfixes from #2497 (#3179)
Browse files Browse the repository at this point in the history
* port from #2497

* Auto-update of Starter template

* Auto-update of E2E template

* Auto-update of NLP template

* lint

* fix LLM template

* Auto-update of LLM Finetuning template

* Auto-update of LLM Finetuning template

* Update src/zenml/zen_server/routers/logs_endpoints.py

Co-authored-by: Barış Can Durak <[email protected]>

* Refactor logs endpoint authorization logic

* Refactor logs endpoint definition

* Refactor resource limit settings in test_vertex_orchestrator.py

---------

Co-authored-by: GitHub Actions <[email protected]>
Co-authored-by: Barış Can Durak <[email protected]>
  • Loading branch information
3 people authored Nov 11, 2024
1 parent 5dfac96 commit b5fa790
Show file tree
Hide file tree
Showing 9 changed files with 92 additions and 16 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/update-templates-to-examples.yml
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ jobs:
with:
python-version: ${{ inputs.python-version }}
ref-zenml: ${{ github.ref }}
ref-template: 2024.10.30 # Make sure it is aligned with ZENML_PROJECT_TEMPLATES from src/zenml/cli/base.py
ref-template: 2024.11.08 # Make sure it is aligned with ZENML_PROJECT_TEMPLATES from src/zenml/cli/base.py
- name: Clean-up
run: |
rm -rf ./local_checkout
Expand Down
2 changes: 1 addition & 1 deletion examples/llm_finetuning/.copier-answers.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Changes here will be overwritten by Copier
_commit: 2024.10.30
_commit: 2024.11.08
_src_path: gh:zenml-io/template-llm-finetuning
bf16: true
cuda_version: cuda11.8
Expand Down
2 changes: 1 addition & 1 deletion examples/llm_finetuning/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
zenml
torch>=2.2.0
datasets
datasets>=2.15
transformers
peft
bitsandbytes>=0.41.3
Expand Down
2 changes: 1 addition & 1 deletion examples/llm_finetuning/steps/log_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def log_metadata_from_step_artifact(

context = get_step_context()
metadata_dict: Dict[str, Any] = (
context.pipeline_run.steps[step_name].outputs[artifact_name].load()
context.pipeline_run.steps[step_name].outputs[artifact_name][0].load()
)

metadata = {artifact_name: metadata_dict}
Expand Down
2 changes: 1 addition & 1 deletion src/zenml/cli/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def copier_github_url(self) -> str:
),
llm_finetuning=ZenMLProjectTemplateLocation(
github_url="zenml-io/template-llm-finetuning",
github_tag="2024.10.30", # Make sure it is aligned with .github/workflows/update-templates-to-examples.yml
github_tag="2024.11.08", # Make sure it is aligned with .github/workflows/update-templates-to-examples.yml
),
)

Expand Down
4 changes: 3 additions & 1 deletion src/zenml/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3426,7 +3426,9 @@ def delete_deployment(self, id_or_prefix: str) -> None:
Args:
id_or_prefix: The id or id prefix of the deployment.
"""
deployment = self.get_deployment(id_or_prefix=id_or_prefix)
deployment = self.get_deployment(
id_or_prefix=id_or_prefix, hydrate=False
)
self.zen_store.delete_deployment(deployment_id=deployment.id)

# ------------------------------ Run templates -----------------------------
Expand Down
66 changes: 66 additions & 0 deletions src/zenml/zen_server/routers/logs_endpoints.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Copyright (c) ZenML GmbH 2024. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing
# permissions and limitations under the License.
"""Endpoint definitions for logs."""

from uuid import UUID

from fastapi import APIRouter, Security

from zenml.constants import (
API,
LOGS,
VERSION_1,
)
from zenml.models.v2.core.logs import LogsResponse
from zenml.zen_server.auth import AuthContext, authorize
from zenml.zen_server.exceptions import error_response
from zenml.zen_server.rbac.endpoint_utils import (
verify_permissions_and_get_entity,
)
from zenml.zen_server.utils import (
handle_exceptions,
zen_store,
)

router = APIRouter(
prefix=API + VERSION_1 + LOGS,
tags=["logs"],
responses={401: error_response, 403: error_response},
)


@router.get(
"/{logs_id}",
response_model=LogsResponse,
responses={401: error_response, 404: error_response, 422: error_response},
)
@handle_exceptions
def get_logs(
logs_id: UUID,
hydrate: bool = True,
_: AuthContext = Security(authorize),
) -> LogsResponse:
"""Returns the requested logs.
Args:
logs_id: ID of the logs.
hydrate: Flag deciding whether to hydrate the output model(s)
by including metadata fields in the response.
Returns:
The requested logs.
"""
return verify_permissions_and_get_entity(
id=logs_id, get_method=zen_store().get_logs, hydrate=hydrate
)
2 changes: 2 additions & 0 deletions src/zenml/zen_server/zen_server_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
devices_endpoints,
event_source_endpoints,
flavors_endpoints,
logs_endpoints,
model_versions_endpoints,
models_endpoints,
pipeline_builds_endpoints,
Expand Down Expand Up @@ -414,6 +415,7 @@ async def dashboard(request: Request) -> Any:
app.include_router(plugin_endpoints.plugin_router)
app.include_router(event_source_endpoints.event_source_router)
app.include_router(flavors_endpoints.router)
app.include_router(logs_endpoints.router)
app.include_router(models_endpoints.router)
app.include_router(model_versions_endpoints.router)
app.include_router(model_versions_endpoints.model_version_artifacts_router)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,35 +140,41 @@ def test_vertex_orchestrator_stack_validation(
ResourceSettings(cpu_count=1, gpu_count=1, memory="1GB"),
{"cpu_limit": "4", "gpu_limit": 4, "memory_limit": "1G"},
{
"accelerator": {"count": "1", "type": "NVIDIA_TESLA_K80"},
"cpuLimit": 1.0,
"memoryLimit": 1.0,
"accelerator": {
"resourceCount": "1",
"resourceType": "NVIDIA_TESLA_K80",
},
"resourceCpuLimit": "1.0",
"resourceMemoryLimit": "1G",
},
),
# No ResourceSettings, should take values from the orchestrator
(
ResourceSettings(cpu_count=None, gpu_count=None, memory=None),
{"cpu_limit": "1", "gpu_limit": 1, "memory_limit": "1G"},
{"cpu_limit": "1.0", "gpu_limit": 1, "memory_limit": "1G"},
{
"accelerator": {"count": "1", "type": "NVIDIA_TESLA_K80"},
"cpuLimit": 1.0,
"memoryLimit": 1.0,
"accelerator": {
"resourceCount": "1",
"resourceType": "NVIDIA_TESLA_K80",
},
"resourceCpuLimit": "1.0",
"resourceMemoryLimit": "1G",
},
),
# GPU count is None, 1 gpu should be used (KFP default)
(
ResourceSettings(cpu_count=1, gpu_count=None, memory="1GB"),
{"cpu_limit": None, "gpu_limit": None, "memory_limit": None},
{
"cpuLimit": 1.0,
"memoryLimit": 1.0,
"resourceCpuLimit": "1.0",
"resourceMemoryLimit": "1G",
},
),
# GPU count is 0, should not be set in the resource spec
(
ResourceSettings(cpu_count=1, gpu_count=0, memory="1GB"),
{"cpu_limit": None, "gpu_limit": None, "memory_limit": None},
{"cpuLimit": 1.0, "memoryLimit": 1.0},
{"resourceCpuLimit": "1.0", "resourceMemoryLimit": "1G"},
),
],
)
Expand Down

0 comments on commit b5fa790

Please sign in to comment.