From 595b5aefd37e5864bf89a31b52dc0e9a0b2d8992 Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Thu, 5 Dec 2024 17:56:18 +0100 Subject: [PATCH 01/18] Fix typo in readme (#3247) * Fix typo in readme * More typos --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 86a1501968..b7398b89ba 100644 --- a/README.md +++ b/README.md @@ -147,7 +147,7 @@ Or, through our CLI command: zenml stack deploy --provider aws ``` -Alternatively, if the necessary pieces of infrastructure is already deployed, you can register a cloud stack seamlessly through the stack wizard: +Alternatively, if the necessary pieces of infrastructure are already deployed, you can register a cloud stack seamlessly through the stack wizard: ```bash zenml stack register --provider aws @@ -195,9 +195,9 @@ def trainer(training_df: pd.DataFrame) -> Annotated["model", torch.nn.Module]: ![Exploring ZenML Models](/docs/book/.gitbook/assets/readme_mcp.gif) -### Purpose built for machine learning with integration to you favorite tools +### Purpose built for machine learning with integrations to your favorite tools -While ZenML brings a lot of value of the box, it also integrates into your existing tooling and infrastructure without you having to be locked in. +While ZenML brings a lot of value out of the box, it also integrates into your existing tooling and infrastructure without you having to be locked in. ```python from bentoml._internal.bento import bento From cc1a63bba45f664898c60af278c5adf1eaa72af3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bar=C4=B1=C5=9F=20Can=20Durak?= <36421093+bcdurak@users.noreply.github.com> Date: Fri, 6 Dec 2024 09:27:39 +0100 Subject: [PATCH 02/18] adding 0.71.0 to migration tests (#3250) --- scripts/test-migrations.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/test-migrations.sh b/scripts/test-migrations.sh index 145c0df7b2..170ff03e2e 100755 --- a/scripts/test-migrations.sh +++ b/scripts/test-migrations.sh @@ -23,7 +23,7 @@ else fi # List of versions to test -VERSIONS=("0.40.3" "0.43.0" "0.44.3" "0.45.6" "0.47.0" "0.50.0" "0.51.0" "0.52.0" "0.53.1" "0.54.1" "0.55.5" "0.56.4" "0.57.1" "0.60.0" "0.61.0" "0.62.0" "0.63.0" "0.64.0" "0.65.0" "0.68.0" "0.70.0") +VERSIONS=("0.40.3" "0.43.0" "0.44.3" "0.45.6" "0.47.0" "0.50.0" "0.51.0" "0.52.0" "0.53.1" "0.54.1" "0.55.5" "0.56.4" "0.57.1" "0.60.0" "0.61.0" "0.62.0" "0.63.0" "0.64.0" "0.65.0" "0.68.0" "0.70.0" "0.71.0") # Try to get the latest version using pip index version=$(pip index versions zenml 2>/dev/null | grep -v YANKED | head -n1 | awk '{print $2}' | tr -d '()') From bf259017e847d41792a7d1b65c26edaf15cc9a9c Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Fri, 6 Dec 2024 09:29:02 +0100 Subject: [PATCH 03/18] Fix workload token expiration for cached steps/runs (#3243) --- src/zenml/zen_server/auth.py | 10 ++-------- src/zenml/zen_server/routers/auth_endpoints.py | 11 ++--------- 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/src/zenml/zen_server/auth.py b/src/zenml/zen_server/auth.py index a7918f3e81..80290f091c 100644 --- a/src/zenml/zen_server/auth.py +++ b/src/zenml/zen_server/auth.py @@ -413,10 +413,7 @@ def get_pipeline_run_status( logger.error(error) raise CredentialsNotValid(error) - if pipeline_run_status in [ - ExecutionStatus.FAILED, - ExecutionStatus.COMPLETED, - ]: + if pipeline_run_status.is_finished: error = ( f"The execution of pipeline run " f"{decoded_token.pipeline_run_id} has already concluded and " @@ -461,10 +458,7 @@ def get_step_run_status( logger.error(error) raise CredentialsNotValid(error) - if step_run_status in [ - ExecutionStatus.FAILED, - ExecutionStatus.COMPLETED, - ]: + if step_run_status.is_finished: error = ( f"The execution of step run " f"{decoded_token.step_run_id} has already concluded and " diff --git a/src/zenml/zen_server/routers/auth_endpoints.py b/src/zenml/zen_server/routers/auth_endpoints.py index e970ba535e..a1339c10bf 100644 --- a/src/zenml/zen_server/routers/auth_endpoints.py +++ b/src/zenml/zen_server/routers/auth_endpoints.py @@ -41,7 +41,6 @@ from zenml.enums import ( APITokenType, AuthScheme, - ExecutionStatus, OAuthDeviceStatus, OAuthGrantTypes, ) @@ -589,10 +588,7 @@ def api_token( "security reasons." ) - if pipeline_run.status in [ - ExecutionStatus.FAILED, - ExecutionStatus.COMPLETED, - ]: + if pipeline_run.status.is_finished: raise ValueError( f"The execution of pipeline run {pipeline_run_id} has already " "concluded and API tokens can no longer be generated for it " @@ -609,10 +605,7 @@ def api_token( "be generated for non-existent step runs for security reasons." ) - if step_run.status in [ - ExecutionStatus.FAILED, - ExecutionStatus.COMPLETED, - ]: + if step_run.status.is_finished: raise ValueError( f"The execution of step run {step_run_id} has already " "concluded and API tokens can no longer be generated for it " From df3040f0a3e3ae9c1a16be02ffe66a1b0ab5c6eb Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Fri, 6 Dec 2024 10:35:35 +0100 Subject: [PATCH 04/18] Implement wandb settings conversion for latest release (#3246) --- .../flavors/wandb_experiment_tracker_flavor.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/zenml/integrations/wandb/flavors/wandb_experiment_tracker_flavor.py b/src/zenml/integrations/wandb/flavors/wandb_experiment_tracker_flavor.py index 11e84ca2d3..7a1a732170 100644 --- a/src/zenml/integrations/wandb/flavors/wandb_experiment_tracker_flavor.py +++ b/src/zenml/integrations/wandb/flavors/wandb_experiment_tracker_flavor.py @@ -23,7 +23,7 @@ cast, ) -from pydantic import field_validator +from pydantic import field_validator, BaseModel from zenml.config.base_settings import BaseSettings from zenml.experiment_trackers.base_experiment_tracker import ( @@ -60,18 +60,26 @@ def _convert_settings(cls, value: Any) -> Any: Args: value: The settings. + Raises: + ValueError: If converting the settings failed. + Returns: Dict representation of the settings. """ import wandb if isinstance(value, wandb.Settings): - # Depending on the wandb version, either `make_static` or `to_dict` - # is available to convert the settings to a dictionary - if hasattr(value, "make_static"): + # Depending on the wandb version, either `model_dump`, + # `make_static` or `to_dict` is available to convert the settings + # to a dictionary + if isinstance(value, BaseModel): + return value.model_dump() + elif hasattr(value, "make_static"): return cast(Dict[str, Any], value.make_static()) - else: + elif hasattr(value, "to_dict"): return value.to_dict() + else: + raise ValueError("Unable to convert wandb settings to dict.") else: return value From ee3e962df89fd02ddeb4b76104c40e8352f5b17a Mon Sep 17 00:00:00 2001 From: Alex Strick van Linschoten Date: Fri, 6 Dec 2024 18:19:41 +0100 Subject: [PATCH 05/18] add comment from Jonathan (#3253) --- docs/book/component-guide/step-operators/modal.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/book/component-guide/step-operators/modal.md b/docs/book/component-guide/step-operators/modal.md index b7f46227c8..4492152050 100644 --- a/docs/book/component-guide/step-operators/modal.md +++ b/docs/book/component-guide/step-operators/modal.md @@ -86,6 +86,12 @@ def my_modal_step(): ... ``` +{% hint style="info" %} +Note that the `cpu` parameter in `ResourceSettings` currently only accepts a single integer value. This specifies a soft minimum limit - Modal will guarantee at least this many physical cores, but the actual usage could be higher. The CPU cores/hour will also determine the minimum price paid for the compute resources. + +For example, with the configuration above (2 CPUs and 32GB memory), the minimum cost would be approximately $1.03 per hour ((0.135 * 2) + (0.024 * 32) = $1.03). +{% endhint %} + This will run `my_modal_step` on a Modal instance with 1 A100 GPU, 2 CPUs, and 32GB of CPU memory. From 246716e19ffb44a3ea15083ee240b128f4a2900b Mon Sep 17 00:00:00 2001 From: Stefan Nica Date: Tue, 10 Dec 2024 14:25:29 +0100 Subject: [PATCH 06/18] Re-authenticate requests that failed authentication (#3256) --- src/zenml/cli/server.py | 38 ++++----- src/zenml/zen_stores/rest_zen_store.py | 106 ++++++++++++++++--------- 2 files changed, 86 insertions(+), 58 deletions(-) diff --git a/src/zenml/cli/server.py b/src/zenml/cli/server.py index e5c0b135f5..39c241c377 100644 --- a/src/zenml/cli/server.py +++ b/src/zenml/cli/server.py @@ -587,25 +587,6 @@ def server_list(verbose: bool = False, all: bool = False) -> None: accessible_pro_servers = client.tenant.list(member_only=not all) except AuthorizationException as e: cli_utils.warning(f"ZenML Pro authorization error: {e}") - else: - if not all: - accessible_pro_servers = [ - s - for s in accessible_pro_servers - if s.status == TenantStatus.AVAILABLE - ] - - if not accessible_pro_servers: - cli_utils.declare( - "No ZenML Pro servers that are accessible to the current " - "user could be found." - ) - if not all: - cli_utils.declare( - "Hint: use the `--all` flag to show all ZenML servers, " - "including those that the client is not currently " - "authorized to access or are not running." - ) # We update the list of stored ZenML Pro servers with the ones that the # client is a member of @@ -633,6 +614,25 @@ def server_list(verbose: bool = False, all: bool = False) -> None: stored_server.update_server_info(accessible_server) pro_servers.append(stored_server) + if not all: + accessible_pro_servers = [ + s + for s in accessible_pro_servers + if s.status == TenantStatus.AVAILABLE + ] + + if not accessible_pro_servers: + cli_utils.declare( + "No ZenML Pro servers that are accessible to the current " + "user could be found." + ) + if not all: + cli_utils.declare( + "Hint: use the `--all` flag to show all ZenML servers, " + "including those that the client is not currently " + "authorized to access or are not running." + ) + elif pro_servers: cli_utils.warning( "The ZenML Pro authentication has expired. Please re-login " diff --git a/src/zenml/zen_stores/rest_zen_store.py b/src/zenml/zen_stores/rest_zen_store.py index 974875beaf..e3e29759b2 100644 --- a/src/zenml/zen_stores/rest_zen_store.py +++ b/src/zenml/zen_stores/rest_zen_store.py @@ -4349,46 +4349,74 @@ def _request( {source_context.name: source_context.get().value} ) - try: - return self._handle_response( - self.session.request( - method, - url, - params=params, - verify=self.config.verify_ssl, - timeout=timeout or self.config.http_timeout, - **kwargs, - ) - ) - except CredentialsNotValid: - # NOTE: CredentialsNotValid is raised only when the server - # explicitly indicates that the credentials are not valid and they - # can be thrown away. - - # We authenticate or re-authenticate here and then try the request - # again, this time with a valid API token in the header. - self.authenticate( - # If the last request was authenticated with an API token, - # we force a re-authentication to get a fresh token. - force=self._api_token is not None - ) - - try: - return self._handle_response( - self.session.request( - method, - url, - params=params, - verify=self.config.verify_ssl, - timeout=self.config.http_timeout, - **kwargs, + # If the server replies with a credentials validation (401 Unauthorized) + # error, we (re-)authenticate and retry the request here in the + # following cases: + # + # 1. initial authentication: the last request was not authenticated + # with an API token. + # 2. re-authentication: the last request was authenticated with an API + # token that was rejected by the server. This is to cover the case + # of expired tokens that can be refreshed by the client automatically + # without user intervention from other sources (e.g. API keys). + # + # NOTE: it can happen that the same request is retried here for up to + # two times: once after initial authentication and once after + # re-authentication. + re_authenticated = False + while True: + try: + return self._handle_response( + self.session.request( + method, + url, + params=params, + verify=self.config.verify_ssl, + timeout=timeout or self.config.http_timeout, + **kwargs, + ) ) - ) - except CredentialsNotValid as e: - raise CredentialsNotValid( - "The current credentials are no longer valid. Please log in " - "again using 'zenml login'." - ) from e + except CredentialsNotValid as e: + # NOTE: CredentialsNotValid is raised only when the server + # explicitly indicates that the credentials are not valid and + # they can be thrown away or when the request is not + # authenticated at all. + + if self._api_token is None: + # The last request was not authenticated with an API + # token at all. We authenticate here and then try the + # request again, this time with a valid API token in the + # header. + logger.debug( + f"The last request was not authenticated: {e}\n" + "Re-authenticating and retrying..." + ) + self.authenticate() + elif not re_authenticated: + # The last request was authenticated with an API token + # that was rejected by the server. We attempt a + # re-authentication here and then retry the request. + logger.debug( + "The last request was authenticated with an API token " + f"that was rejected by the server: {e}\n" + "Re-authenticating and retrying..." + ) + re_authenticated = True + self.authenticate( + # Ignore the current token and force a re-authentication + force=True + ) + else: + # The last request was made after re-authenticating but + # still failed. Bailing out. + logger.debug( + f"The last request failed after re-authenticating: {e}\n" + "Bailing out..." + ) + raise CredentialsNotValid( + "The current credentials are no longer valid. Please " + "log in again using 'zenml login'." + ) from e def get( self, From ae73e2ee5ff3783993ef24496e9f83acc99d3f51 Mon Sep 17 00:00:00 2001 From: Hamza Tahir Date: Wed, 11 Dec 2024 10:50:49 +0100 Subject: [PATCH 07/18] Add new toc (#3255) * Add new server management and collaboration features * Add Python environment configuration guides * Add understanding of ZenML artifacts and complex use-cases * test redirect * one more * revert redirects * revert redirects * add page plcaeholder for collaborate with team * add icon * move files to the right directories * update toc with new paths * add all redirects * remove .md and README from the left pane * fix all broken links * fix more links --------- Co-authored-by: Jayesh Sharma --- .gitbook.yaml | 63 ++++++++++ .../data-validators/deepchecks.md | 2 +- .../experiment-trackers/mlflow.md | 4 +- .../experiment-trackers/neptune.md | 4 +- .../experiment-trackers/wandb.md | 4 +- .../component-guide/image-builders/gcp.md | 2 +- .../component-guide/image-builders/kaniko.md | 2 +- .../component-guide/model-deployers/seldon.md | 2 +- .../component-guide/orchestrators/airflow.md | 4 +- .../component-guide/orchestrators/azureml.md | 2 +- .../component-guide/orchestrators/custom.md | 2 +- .../orchestrators/databricks.md | 2 +- .../component-guide/orchestrators/hyperai.md | 2 +- .../component-guide/orchestrators/kubeflow.md | 6 +- .../orchestrators/kubernetes.md | 4 +- .../orchestrators/local-docker.md | 2 +- .../orchestrators/orchestrators.md | 2 +- .../orchestrators/sagemaker.md | 4 +- .../component-guide/orchestrators/tekton.md | 4 +- .../component-guide/orchestrators/vertex.md | 4 +- .../component-guide/step-operators/azureml.md | 4 +- .../component-guide/step-operators/custom.md | 2 +- .../step-operators/kubernetes.md | 4 +- .../step-operators/sagemaker.md | 4 +- .../step-operators/step-operators.md | 4 +- .../component-guide/step-operators/vertex.md | 4 +- .../getting-started/system-architectures.md | 2 +- .../advanced-topics/control-logging/README.md | 16 --- docs/book/how-to/control-logging/README.md | 16 +++ .../disable-colorful-logging.md | 2 +- .../control-logging/disable-rich-traceback.md | 4 +- .../enable-or-disable-logs-storing.md | 4 +- .../control-logging/set-logging-verbosity.md | 4 +- .../view-logs-on-the-dasbhoard.md | 8 +- .../customize-docker-builds/README.md | 2 +- .../define-where-an-image-is-built.md | 6 +- .../docker-settings-on-a-pipeline.md | 8 +- .../docker-settings-on-a-step.md | 0 .../how-to-reuse-builds.md | 4 +- .../how-to-use-a-private-pypi-repository.md | 0 ...ecify-pip-dependencies-and-apt-packages.md | 4 +- .../use-a-prebuilt-image.md | 2 +- .../use-your-own-docker-files.md | 0 .../which-files-are-built-into-the-image.md | 2 +- .../complex-usecases/README.md | 3 + .../datasets.md | 0 .../manage-big-data.md | 0 .../passing-artifacts-between-pipelines.md | 0 .../registering-existing-data.md | 0 .../unmaterialized-artifacts.md | 0 .../handle-custom-data-types.md | 2 +- .../manage-zenml-server/README.md | 0 .../best-practices-upgrading-zenml.md | 10 +- .../connecting-to-zenml/README.md | 0 .../connect-in-with-your-user-interactive.md | 0 .../connect-with-a-service-account.md | 0 .../migration-guide/migration-guide.md | 0 .../migration-guide/migration-zero-forty.md | 10 +- .../migration-guide/migration-zero-sixty.md | 2 +- .../migration-guide/migration-zero-thirty.md | 0 .../migration-guide/migration-zero-twenty.md | 4 +- .../troubleshoot-your-deployed-server.md | 0 .../upgrade-zenml-server.md | 0 .../using-zenml-server-in-prod.md | 10 +- .../configure-python-environments/README.md | 0 .../configure-the-server-environment.md | 0 .../handling-dependencies.md | 0 .../develop-locally/README.md | 0 .../keep-your-dashboard-server-clean.md | 0 .../local-prod-pipeline-variants.md | 0 .../run-remote-notebooks/README.md | 0 ...ons-of-defining-steps-in-notebook-cells.md | 0 .../run-a-single-step-from-a-notebook.md | 0 .../training-with-gpus/README.md | 0 .../accelerate-distributed-training.md | 0 .../trigger-pipelines/use-templates-python.md | 2 +- .../what-can-be-configured.md | 6 +- .../collaborate-with-team/README.md | 3 + .../access-management.md | 0 .../project-templates/README.md} | 0 .../create-your-own-template.md | 2 +- .../shared-components-for-teams.md | 0 .../stacks-pipelines-models.md | 0 .../interact-with-secrets.md | 0 docs/book/reference/environment-variables.md | 4 +- docs/book/reference/how-do-i.md | 2 +- docs/book/reference/python-client.md | 4 +- docs/book/toc.md | 116 +++++++++--------- .../finetuning-with-accelerate.md | 2 +- .../book/user-guide/production-guide/ci-cd.md | 4 +- .../production-guide/cloud-orchestration.md | 2 +- .../production-guide/configure-pipeline.md | 4 +- .../production-guide/remote-storage.md | 2 +- .../starter-guide/manage-artifacts.md | 2 +- 94 files changed, 247 insertions(+), 176 deletions(-) delete mode 100644 docs/book/how-to/advanced-topics/control-logging/README.md create mode 100644 docs/book/how-to/control-logging/README.md rename docs/book/how-to/{advanced-topics => }/control-logging/disable-colorful-logging.md (63%) rename docs/book/how-to/{advanced-topics => }/control-logging/disable-rich-traceback.md (67%) rename docs/book/how-to/{advanced-topics => }/control-logging/enable-or-disable-logs-storing.md (90%) rename docs/book/how-to/{advanced-topics => }/control-logging/set-logging-verbosity.md (60%) rename docs/book/how-to/{advanced-topics => }/control-logging/view-logs-on-the-dasbhoard.md (80%) rename docs/book/how-to/{infrastructure-deployment => }/customize-docker-builds/README.md (62%) rename docs/book/how-to/{infrastructure-deployment => }/customize-docker-builds/define-where-an-image-is-built.md (63%) rename docs/book/how-to/{infrastructure-deployment => }/customize-docker-builds/docker-settings-on-a-pipeline.md (83%) rename docs/book/how-to/{infrastructure-deployment => }/customize-docker-builds/docker-settings-on-a-step.md (100%) rename docs/book/how-to/{infrastructure-deployment => }/customize-docker-builds/how-to-reuse-builds.md (89%) rename docs/book/how-to/{infrastructure-deployment => }/customize-docker-builds/how-to-use-a-private-pypi-repository.md (100%) rename docs/book/how-to/{infrastructure-deployment => }/customize-docker-builds/specify-pip-dependencies-and-apt-packages.md (90%) rename docs/book/how-to/{infrastructure-deployment => }/customize-docker-builds/use-a-prebuilt-image.md (96%) rename docs/book/how-to/{infrastructure-deployment => }/customize-docker-builds/use-your-own-docker-files.md (100%) rename docs/book/how-to/{infrastructure-deployment => }/customize-docker-builds/which-files-are-built-into-the-image.md (92%) create mode 100644 docs/book/how-to/data-artifact-management/complex-usecases/README.md rename docs/book/how-to/data-artifact-management/{handle-data-artifacts => complex-usecases}/datasets.md (100%) rename docs/book/how-to/data-artifact-management/{handle-data-artifacts => complex-usecases}/manage-big-data.md (100%) rename docs/book/how-to/data-artifact-management/{handle-data-artifacts => complex-usecases}/passing-artifacts-between-pipelines.md (100%) rename docs/book/how-to/data-artifact-management/{handle-data-artifacts => complex-usecases}/registering-existing-data.md (100%) rename docs/book/how-to/data-artifact-management/{handle-data-artifacts => complex-usecases}/unmaterialized-artifacts.md (100%) rename docs/book/how-to/{advanced-topics => }/manage-zenml-server/README.md (100%) rename docs/book/how-to/{advanced-topics => }/manage-zenml-server/best-practices-upgrading-zenml.md (85%) rename docs/book/how-to/{project-setup-and-management => manage-zenml-server}/connecting-to-zenml/README.md (100%) rename docs/book/how-to/{project-setup-and-management => manage-zenml-server}/connecting-to-zenml/connect-in-with-your-user-interactive.md (100%) rename docs/book/how-to/{project-setup-and-management => manage-zenml-server}/connecting-to-zenml/connect-with-a-service-account.md (100%) rename docs/book/how-to/{advanced-topics => }/manage-zenml-server/migration-guide/migration-guide.md (100%) rename docs/book/how-to/{advanced-topics => }/manage-zenml-server/migration-guide/migration-zero-forty.md (91%) rename docs/book/how-to/{advanced-topics => }/manage-zenml-server/migration-guide/migration-zero-sixty.md (99%) rename docs/book/how-to/{advanced-topics => }/manage-zenml-server/migration-guide/migration-zero-thirty.md (100%) rename docs/book/how-to/{advanced-topics => }/manage-zenml-server/migration-guide/migration-zero-twenty.md (99%) rename docs/book/how-to/{advanced-topics => }/manage-zenml-server/troubleshoot-your-deployed-server.md (100%) rename docs/book/how-to/{advanced-topics => }/manage-zenml-server/upgrade-zenml-server.md (100%) rename docs/book/how-to/{advanced-topics => }/manage-zenml-server/using-zenml-server-in-prod.md (95%) rename docs/book/how-to/{infrastructure-deployment => pipeline-development}/configure-python-environments/README.md (100%) rename docs/book/how-to/{infrastructure-deployment => pipeline-development}/configure-python-environments/configure-the-server-environment.md (100%) rename docs/book/how-to/{infrastructure-deployment => pipeline-development}/configure-python-environments/handling-dependencies.md (100%) rename docs/book/how-to/{project-setup-and-management => pipeline-development}/develop-locally/README.md (100%) rename docs/book/how-to/{project-setup-and-management => pipeline-development}/develop-locally/keep-your-dashboard-server-clean.md (100%) rename docs/book/how-to/{project-setup-and-management => pipeline-development}/develop-locally/local-prod-pipeline-variants.md (100%) rename docs/book/how-to/{advanced-topics => pipeline-development}/run-remote-notebooks/README.md (100%) rename docs/book/how-to/{advanced-topics => pipeline-development}/run-remote-notebooks/limitations-of-defining-steps-in-notebook-cells.md (100%) rename docs/book/how-to/{advanced-topics => pipeline-development}/run-remote-notebooks/run-a-single-step-from-a-notebook.md (100%) rename docs/book/how-to/{advanced-topics => pipeline-development}/training-with-gpus/README.md (100%) rename docs/book/how-to/{advanced-topics => pipeline-development}/training-with-gpus/accelerate-distributed-training.md (100%) create mode 100644 docs/book/how-to/project-setup-and-management/collaborate-with-team/README.md rename docs/book/how-to/project-setup-and-management/{setting-up-a-project-repository => collaborate-with-team}/access-management.md (100%) rename docs/book/how-to/project-setup-and-management/{setting-up-a-project-repository/using-project-templates.md => collaborate-with-team/project-templates/README.md} (100%) rename docs/book/how-to/project-setup-and-management/{setting-up-a-project-repository => collaborate-with-team/project-templates}/create-your-own-template.md (86%) rename docs/book/how-to/project-setup-and-management/{setting-up-a-project-repository => collaborate-with-team}/shared-components-for-teams.md (100%) rename docs/book/how-to/project-setup-and-management/{setting-up-a-project-repository => collaborate-with-team}/stacks-pipelines-models.md (100%) rename docs/book/how-to/{ => project-setup-and-management}/interact-with-secrets.md (100%) diff --git a/.gitbook.yaml b/.gitbook.yaml index 8a1dc252fe..24efea93fb 100644 --- a/.gitbook.yaml +++ b/.gitbook.yaml @@ -202,3 +202,66 @@ redirects: docs/reference/how-do-i: reference/how-do-i.md docs/reference/community-and-content: reference/community-and-content.md docs/reference/faq: reference/faq.md + + # The new Manage ZenML Server redirects + how-to/advanced-topics/manage-zenml-server/: how-to/manage-zenml-server/README.md + how-to/project-setup-and-management/connecting-to-zenml/: how-to/manage-zenml-server/connecting-to-zenml/README.md + how-to/project-setup-and-management/connecting-to-zenml/connect-in-with-your-user-interactive: how-to/manage-zenml-server/connecting-to-zenml/connect-in-with-your-user-interactive.md + how-to/project-setup-and-management/connecting-to-zenml/connect-with-a-service-account: how-to/manage-zenml-server/connecting-to-zenml/connect-with-a-service-account.md + how-to/advanced-topics/manage-zenml-server/upgrade-zenml-server: how-to/manage-zenml-server/upgrade-zenml-server.md + how-to/advanced-topics/manage-zenml-server/best-practices-upgrading-zenml: how-to/manage-zenml-server/best-practices-upgrading-zenml.md + how-to/advanced-topics/manage-zenml-server/using-zenml-server-in-prod: how-to/manage-zenml-server/using-zenml-server-in-prod.md + how-to/advanced-topics/manage-zenml-server/troubleshoot-your-deployed-server: how-to/manage-zenml-server/troubleshoot-your-deployed-server.md + how-to/advanced-topics/manage-zenml-server/migration-guide/migration-guide: how-to/manage-zenml-server/migration-guide/migration-guide.md + how-to/advanced-topics/manage-zenml-server/migration-guide/migration-zero-twenty: how-to/manage-zenml-server/migration-guide/migration-zero-twenty.md + how-to/advanced-topics/manage-zenml-server/migration-guide/migration-zero-thirty: how-to/manage-zenml-server/migration-guide/migration-zero-thirty.md + how-to/advanced-topics/manage-zenml-server/migration-guide/migration-zero-forty: how-to/manage-zenml-server/migration-guide/migration-zero-forty.md + how-to/advanced-topics/manage-zenml-server/migration-guide/migration-zero-sixty: how-to/manage-zenml-server/migration-guide/migration-zero-sixty.md + + how-to/project-setup-and-management/setting-up-a-project-repository/using-project-templates: how-to/project-setup-and-management/collaborate-with-team/project-templates/README.md + how-to/project-setup-and-management/setting-up-a-project-repository/create-your-own-template: how-to/project-setup-and-management/collaborate-with-team/project-templates/create-your-own-template.md + how-to/project-setup-and-management/setting-up-a-project-repository/shared-components-for-teams: how-to/project-setup-and-management/collaborate-with-team/shared-components-for-teams.md + how-to/project-setup-and-management/setting-up-a-project-repository/stacks-pipelines-models: how-to/project-setup-and-management/collaborate-with-team/stacks-pipelines-models.md + how-to/project-setup-and-management/setting-up-a-project-repository/access-management: how-to/project-setup-and-management/collaborate-with-team/access-management.md + how-to/interact-with-secrets: how-to/project-setup-and-management/interact-with-secrets.md + + how-to/project-setup-and-management/develop-locally/: how-to/pipeline-development/develop-locally/README.md + how-to/project-setup-and-management/develop-locally/local-prod-pipeline-variants: how-to/pipeline-development/develop-locally/local-prod-pipeline-variants.md + how-to/project-setup-and-management/develop-locally/keep-your-dashboard-server-clean: how-to/pipeline-development/develop-locally/keep-your-dashboard-server-clean.md + + how-to/advanced-topics/training-with-gpus/: how-to/pipeline-development/training-with-gpus/README.md + how-to/advanced-topics/training-with-gpus/accelerate-distributed-training: how-to/pipeline-development/training-with-gpus/accelerate-distributed-training.md + + how-to/advanced-topics/run-remote-notebooks/: how-to/pipeline-development/run-remote-notebooks/README.md + how-to/advanced-topics/run-remote-notebooks/limitations-of-defining-steps-in-notebook-cells: how-to/pipeline-development/run-remote-notebooks/limitations-of-defining-steps-in-notebook-cells.md + how-to/advanced-topics/run-remote-notebooks/run-a-single-step-from-a-notebook: how-to/pipeline-development/run-remote-notebooks/run-a-single-step-from-a-notebook.md + + how-to/infrastructure-deployment/configure-python-environments/: how-to/pipeline-development/configure-python-environments/README.md + how-to/infrastructure-deployment/configure-python-environments/handling-dependencies: how-to/pipeline-development/configure-python-environments/handling-dependencies.md + how-to/infrastructure-deployment/configure-python-environments/configure-the-server-environment: how-to/pipeline-development/configure-python-environments/configure-the-server-environment.md + + how-to/infrastructure-deployment/customize-docker-builds/: how-to/customize-docker-builds/README.md + how-to/infrastructure-deployment/customize-docker-builds/docker-settings-on-a-pipeline: how-to/customize-docker-builds/docker-settings-on-a-pipeline.md + how-to/infrastructure-deployment/customize-docker-builds/docker-settings-on-a-step: how-to/customize-docker-builds/docker-settings-on-a-step.md + how-to/infrastructure-deployment/customize-docker-builds/use-a-prebuilt-image: how-to/customize-docker-builds/use-a-prebuilt-image.md + how-to/infrastructure-deployment/customize-docker-builds/specify-pip-dependencies-and-apt-packages: how-to/customize-docker-builds/specify-pip-dependencies-and-apt-packages.md + how-to/infrastructure-deployment/customize-docker-builds/how-to-use-a-private-pypi-repository: how-to/customize-docker-builds/how-to-use-a-private-pypi-repository.md + how-to/infrastructure-deployment/customize-docker-builds/use-your-own-docker-files: how-to/customize-docker-builds/use-your-own-docker-files.md + how-to/infrastructure-deployment/customize-docker-builds/which-files-are-built-into-the-image: how-to/customize-docker-builds/which-files-are-built-into-the-image.md + how-to/infrastructure-deployment/customize-docker-builds/how-to-reuse-builds: how-to/customize-docker-builds/how-to-reuse-builds.md + how-to/infrastructure-deployment/customize-docker-builds/define-where-an-image-is-built: how-to/customize-docker-builds/define-where-an-image-is-built.md + + how-to/data-artifact-management/handle-data-artifacts/datasets: how-to/data-artifact-management/complex-usecases/datasets.md + how-to/data-artifact-management/handle-data-artifacts/manage-big-data: how-to/data-artifact-management/complex-usecases/manage-big-data.md + how-to/data-artifact-management/handle-data-artifacts/unmaterialized-artifacts: how-to/data-artifact-management/complex-usecases/unmaterialized-artifacts.md + how-to/data-artifact-management/handle-data-artifacts/passing-artifacts-between-pipelines: how-to/data-artifact-management/complex-usecases/passing-artifacts-between-pipelines.md + how-to/data-artifact-management/handle-data-artifacts/registering-existing-data: how-to/data-artifact-management/complex-usecases/registering-existing-data.md + + how-to/advanced-topics/control-logging/: how-to/control-logging/README.md + how-to/advanced-topics/control-logging/view-logs-on-the-dasbhoard: how-to/control-logging/view-logs-on-the-dasbhoard.md + how-to/advanced-topics/control-logging/enable-or-disable-logs-storing: how-to/control-logging/enable-or-disable-logs-storing.md + how-to/advanced-topics/control-logging/set-logging-verbosity: how-to/control-logging/set-logging-verbosity.md + how-to/advanced-topics/control-logging/disable-rich-traceback: how-to/control-logging/disable-rich-traceback.md + how-to/advanced-topics/control-logging/disable-colorful-logging: how-to/control-logging/disable-colorful-logging.md + + \ No newline at end of file diff --git a/docs/book/component-guide/data-validators/deepchecks.md b/docs/book/component-guide/data-validators/deepchecks.md index b24d827f0b..cab1d0c266 100644 --- a/docs/book/component-guide/data-validators/deepchecks.md +++ b/docs/book/component-guide/data-validators/deepchecks.md @@ -78,7 +78,7 @@ RUN apt-get update RUN apt-get install ffmpeg libsm6 libxext6 -y ``` -Then, place the following snippet above your pipeline definition. Note that the path of the `dockerfile` are relative to where the pipeline definition file is. Read [the containerization guide](../../how-to/infrastructure-deployment/customize-docker-builds/README.md) for more details: +Then, place the following snippet above your pipeline definition. Note that the path of the `dockerfile` are relative to where the pipeline definition file is. Read [the containerization guide](../../how-to/customize-docker-builds/README.md) for more details: ```python import zenml diff --git a/docs/book/component-guide/experiment-trackers/mlflow.md b/docs/book/component-guide/experiment-trackers/mlflow.md index b41cffe90c..9f480648a5 100644 --- a/docs/book/component-guide/experiment-trackers/mlflow.md +++ b/docs/book/component-guide/experiment-trackers/mlflow.md @@ -82,7 +82,7 @@ zenml stack register custom_stack -e mlflow_experiment_tracker ... --set {% endtab %} {% tab title="ZenML Secret (Recommended)" %} -This method requires you to [configure a ZenML secret](../../how-to/interact-with-secrets.md) to store the MLflow tracking service credentials securely. +This method requires you to [configure a ZenML secret](../../how-to/project-setup-and-management/interact-with-secrets.md) to store the MLflow tracking service credentials securely. You can create the secret using the `zenml secret create` command: @@ -106,7 +106,7 @@ zenml experiment-tracker register mlflow \ ``` {% hint style="info" %} -Read more about [ZenML Secrets](../../how-to/interact-with-secrets.md) in the ZenML documentation. +Read more about [ZenML Secrets](../../how-to/project-setup-and-management/interact-with-secrets.md) in the ZenML documentation. {% endhint %} {% endtab %} {% endtabs %} diff --git a/docs/book/component-guide/experiment-trackers/neptune.md b/docs/book/component-guide/experiment-trackers/neptune.md index 68cf15eb09..c999ccabe1 100644 --- a/docs/book/component-guide/experiment-trackers/neptune.md +++ b/docs/book/component-guide/experiment-trackers/neptune.md @@ -37,7 +37,7 @@ You need to configure the following credentials for authentication to Neptune: {% tabs %} {% tab title="ZenML Secret (Recommended)" %} -This method requires you to [configure a ZenML secret](../../how-to/interact-with-secrets.md) to store the Neptune tracking service credentials securely. +This method requires you to [configure a ZenML secret](../../how-to/project-setup-and-management/interact-with-secrets.md) to store the Neptune tracking service credentials securely. You can create the secret using the `zenml secret create` command: @@ -61,7 +61,7 @@ zenml stack register neptune_stack -e neptune_experiment_tracker ... --set ``` {% hint style="info" %} -Read more about [ZenML Secrets](../../how-to/interact-with-secrets.md) in the ZenML documentation. +Read more about [ZenML Secrets](../../how-to/project-setup-and-management/interact-with-secrets.md) in the ZenML documentation. {% endhint %} {% endtab %} diff --git a/docs/book/component-guide/experiment-trackers/wandb.md b/docs/book/component-guide/experiment-trackers/wandb.md index ee19b7c049..1f0bbbfd32 100644 --- a/docs/book/component-guide/experiment-trackers/wandb.md +++ b/docs/book/component-guide/experiment-trackers/wandb.md @@ -55,7 +55,7 @@ zenml stack register custom_stack -e wandb_experiment_tracker ... --set {% endtab %} {% tab title="ZenML Secret (Recommended)" %} -This method requires you to [configure a ZenML secret](../../how-to/interact-with-secrets.md) to store the Weights & Biases tracking service credentials securely. +This method requires you to [configure a ZenML secret](../../how-to/project-setup-and-management/interact-with-secrets.md) to store the Weights & Biases tracking service credentials securely. You can create the secret using the `zenml secret create` command: @@ -79,7 +79,7 @@ zenml experiment-tracker register wandb_tracker \ ``` {% hint style="info" %} -Read more about [ZenML Secrets](../../how-to/interact-with-secrets.md) in the ZenML documentation. +Read more about [ZenML Secrets](../../how-to/project-setup-and-management/interact-with-secrets.md) in the ZenML documentation. {% endhint %} {% endtab %} {% endtabs %} diff --git a/docs/book/component-guide/image-builders/gcp.md b/docs/book/component-guide/image-builders/gcp.md index 32b8704289..00d9ec937a 100644 --- a/docs/book/component-guide/image-builders/gcp.md +++ b/docs/book/component-guide/image-builders/gcp.md @@ -185,7 +185,7 @@ zenml stack register -i ... --set As described in this [Google Cloud Build documentation page](https://cloud.google.com/build/docs/build-config-file-schema#network), Google Cloud Build uses containers to execute the build steps which are automatically attached to a network called `cloudbuild` that provides some Application Default Credentials (ADC), that allow the container to be authenticated and therefore use other GCP services. -By default, the GCP Image Builder is executing the build command of the ZenML Pipeline Docker image with the option `--network=cloudbuild`, so the ADC provided by the `cloudbuild` network can also be used in the build. This is useful if you want to install a private dependency from a GCP Artifact Registry, but you will also need to use a [custom base parent image](../../how-to/infrastructure-deployment/customize-docker-builds/docker-settings-on-a-pipeline.md) with the [`keyrings.google-artifactregistry-auth`](https://pypi.org/project/keyrings.google-artifactregistry-auth/) installed, so `pip` can connect and authenticate in the private artifact registry to download the dependency. +By default, the GCP Image Builder is executing the build command of the ZenML Pipeline Docker image with the option `--network=cloudbuild`, so the ADC provided by the `cloudbuild` network can also be used in the build. This is useful if you want to install a private dependency from a GCP Artifact Registry, but you will also need to use a [custom base parent image](../../how-to/customize-docker-builds/docker-settings-on-a-pipeline.md) with the [`keyrings.google-artifactregistry-auth`](https://pypi.org/project/keyrings.google-artifactregistry-auth/) installed, so `pip` can connect and authenticate in the private artifact registry to download the dependency. ```dockerfile FROM zenmldocker/zenml:latest diff --git a/docs/book/component-guide/image-builders/kaniko.md b/docs/book/component-guide/image-builders/kaniko.md index 20f0227370..c9c15553b7 100644 --- a/docs/book/component-guide/image-builders/kaniko.md +++ b/docs/book/component-guide/image-builders/kaniko.md @@ -50,7 +50,7 @@ For more information and a full list of configurable attributes of the Kaniko im The Kaniko image builder will create a Kubernetes pod that is running the build. This build pod needs to be able to pull from/push to certain container registries, and depending on the stack component configuration also needs to be able to read from the artifact store: * The pod needs to be authenticated to push to the container registry in your active stack. -* In case the [parent image](../../how-to/infrastructure-deployment/customize-docker-builds/docker-settings-on-a-pipeline.md#using-a-custom-parent-image) you use in your `DockerSettings` is stored in a private registry, the pod needs to be authenticated to pull from this registry. +* In case the [parent image](../../how-to/customize-docker-builds/docker-settings-on-a-pipeline.md#using-a-custom-parent-image) you use in your `DockerSettings` is stored in a private registry, the pod needs to be authenticated to pull from this registry. * If you configured your image builder to store the build context in the artifact store, the pod needs to be authenticated to read files from the artifact store storage. ZenML is not yet able to handle setting all of the credentials of the various combinations of container registries and artifact stores on the Kaniko build pod, which is you're required to set this up yourself for now. The following section outlines how to handle it in the most straightforward (and probably also most common) scenario, when the Kubernetes cluster you're using for the Kaniko build is hosted on the same cloud provider as your container registry (and potentially the artifact store). For all other cases, check out the [official Kaniko repository](https://github.com/GoogleContainerTools/kaniko) for more information. diff --git a/docs/book/component-guide/model-deployers/seldon.md b/docs/book/component-guide/model-deployers/seldon.md index 152337bbae..7c2ed3cf01 100644 --- a/docs/book/component-guide/model-deployers/seldon.md +++ b/docs/book/component-guide/model-deployers/seldon.md @@ -239,7 +239,7 @@ If you want to use a custom persistent storage with Seldon Core, or if you prefe **Advanced: Configuring a Custom Seldon Core Secret** -The Seldon Core model deployer stack component allows configuring an additional `secret` attribute that can be used to specify custom credentials that Seldon Core should use to authenticate to the persistent storage service where models are located. This is useful if you want to connect Seldon Core to a persistent storage service that is not supported as a ZenML Artifact Store, or if you don't want to configure or use the same credentials configured for your Artifact Store. The `secret` attribute must be set to the name of [a ZenML secret](../../how-to/interact-with-secrets.md) containing credentials configured in the format supported by Seldon Core. +The Seldon Core model deployer stack component allows configuring an additional `secret` attribute that can be used to specify custom credentials that Seldon Core should use to authenticate to the persistent storage service where models are located. This is useful if you want to connect Seldon Core to a persistent storage service that is not supported as a ZenML Artifact Store, or if you don't want to configure or use the same credentials configured for your Artifact Store. The `secret` attribute must be set to the name of [a ZenML secret](../../how-to/project-setup-and-management/interact-with-secrets.md) containing credentials configured in the format supported by Seldon Core. {% hint style="info" %} This method is not recommended, because it limits the Seldon Core model deployer to a single persistent storage service, whereas using the Artifact Store credentials gives you more flexibility in combining the Seldon Core model deployer with any Artifact Store in the same ZenML stack. diff --git a/docs/book/component-guide/orchestrators/airflow.md b/docs/book/component-guide/orchestrators/airflow.md index a5e9de12dd..7fd0fcb8ea 100644 --- a/docs/book/component-guide/orchestrators/airflow.md +++ b/docs/book/component-guide/orchestrators/airflow.md @@ -159,7 +159,7 @@ of your Airflow deployment. {% hint style="info" %} ZenML will build a Docker image called `/zenml:` which includes your code and use it to run your pipeline steps in Airflow. Check -out [this page](/docs/book/how-to/infrastructure-deployment/customize-docker-builds/README.md) if you want to learn +out [this page](/docs/book/how-to/customize-docker-builds/README.md) if you want to learn more about how ZenML builds these images and how you can customize them. {% endhint %} @@ -210,7 +210,7 @@ more information on how to specify settings. #### Enabling CUDA for GPU-backed hardware Note that if you wish to use this orchestrator to run steps on a GPU, you will need to -follow [the instructions on this page](/docs/book/how-to/advanced-topics/training-with-gpus/README.md) to ensure that it +follow [the instructions on this page](/docs/book/how-to/pipeline-development/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration. diff --git a/docs/book/component-guide/orchestrators/azureml.md b/docs/book/component-guide/orchestrators/azureml.md index e0c32f5adb..e47b4d8e9f 100644 --- a/docs/book/component-guide/orchestrators/azureml.md +++ b/docs/book/component-guide/orchestrators/azureml.md @@ -80,7 +80,7 @@ assign it the correct permissions and use it to [register a ZenML Azure Service For each pipeline run, ZenML will build a Docker image called `/zenml:` which includes your code and use it to run your pipeline steps in AzureML. Check out -[this page](../../how-to/infrastructure-deployment/customize-docker-builds/README.md) if you want to +[this page](../../how-to/customize-docker-builds/README.md) if you want to learn more about how ZenML builds these images and how you can customize them. ## AzureML UI diff --git a/docs/book/component-guide/orchestrators/custom.md b/docs/book/component-guide/orchestrators/custom.md index 14f1874483..539aecdd6b 100644 --- a/docs/book/component-guide/orchestrators/custom.md +++ b/docs/book/component-guide/orchestrators/custom.md @@ -215,6 +215,6 @@ To see a full end-to-end worked example of a custom orchestrator, [see here](htt ### Enabling CUDA for GPU-backed hardware -Note that if you wish to use your custom orchestrator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/advanced-topics/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration. +Note that if you wish to use your custom orchestrator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/pipeline-development/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration.
ZenML Scarf
diff --git a/docs/book/component-guide/orchestrators/databricks.md b/docs/book/component-guide/orchestrators/databricks.md index 9f57b5d95e..b87aec6811 100644 --- a/docs/book/component-guide/orchestrators/databricks.md +++ b/docs/book/component-guide/orchestrators/databricks.md @@ -182,7 +182,7 @@ With these settings, the orchestrator will use a GPU-enabled Spark version and a #### Enabling CUDA for GPU-backed hardware -Note that if you wish to use this orchestrator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/advanced-topics/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration. +Note that if you wish to use this orchestrator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/pipeline-development/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration.
ZenML Scarf
diff --git a/docs/book/component-guide/orchestrators/hyperai.md b/docs/book/component-guide/orchestrators/hyperai.md index 5093d296e5..3baa8ae909 100644 --- a/docs/book/component-guide/orchestrators/hyperai.md +++ b/docs/book/component-guide/orchestrators/hyperai.md @@ -78,6 +78,6 @@ python file_that_runs_a_zenml_pipeline.py #### Enabling CUDA for GPU-backed hardware -Note that if you wish to use this orchestrator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/advanced-topics/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration. +Note that if you wish to use this orchestrator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/pipeline-development/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration.
ZenML Scarf
diff --git a/docs/book/component-guide/orchestrators/kubeflow.md b/docs/book/component-guide/orchestrators/kubeflow.md index 174cb56e82..505bee559f 100644 --- a/docs/book/component-guide/orchestrators/kubeflow.md +++ b/docs/book/component-guide/orchestrators/kubeflow.md @@ -181,7 +181,7 @@ We can then register the orchestrator and use it in our active stack. This can b {% endtabs %} {% hint style="info" %} -ZenML will build a Docker image called `/zenml:` which includes all required software dependencies and use it to run your pipeline steps in Kubeflow. Check out [this page](../../how-to/infrastructure-deployment/customize-docker-builds/README.md) if you want to learn more about how ZenML builds these images and how you can customize them. +ZenML will build a Docker image called `/zenml:` which includes all required software dependencies and use it to run your pipeline steps in Kubeflow. Check out [this page](../../how-to/customize-docker-builds/README.md) if you want to learn more about how ZenML builds these images and how you can customize them. {% endhint %} You can now run any ZenML pipeline using the Kubeflow orchestrator: @@ -260,7 +260,7 @@ Check out the [SDK docs](https://sdkdocs.zenml.io/latest/integration\_code\_docs #### Enabling CUDA for GPU-backed hardware -Note that if you wish to use this orchestrator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/advanced-topics/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration. +Note that if you wish to use this orchestrator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/pipeline-development/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration. ### Important Note for Multi-Tenancy Deployments @@ -346,7 +346,7 @@ kubeflow_settings = KubeflowOrchestratorSettings( ) ``` -See full documentation of using ZenML secrets [here](../../how-to/interact-with-secrets.md). +See full documentation of using ZenML secrets [here](../../how-to/project-setup-and-management/interact-with-secrets.md). For more information and a full list of configurable attributes of the Kubeflow orchestrator, check out the [SDK Docs](https://sdkdocs.zenml.io/latest/integration\_code\_docs/integrations-kubeflow/#zenml.integrations.kubeflow.orchestrators.kubeflow\_orchestrator.KubeflowOrchestrator) . diff --git a/docs/book/component-guide/orchestrators/kubernetes.md b/docs/book/component-guide/orchestrators/kubernetes.md index 65b38fc936..2a6ca6ea60 100644 --- a/docs/book/component-guide/orchestrators/kubernetes.md +++ b/docs/book/component-guide/orchestrators/kubernetes.md @@ -98,7 +98,7 @@ We can then register the orchestrator and use it in our active stack. This can b ``` {% hint style="info" %} -ZenML will build a Docker image called `/zenml:` which includes your code and use it to run your pipeline steps in Kubernetes. Check out [this page](../../how-to/infrastructure-deployment/customize-docker-builds/README.md) if you want to learn more about how ZenML builds these images and how you can customize them. +ZenML will build a Docker image called `/zenml:` which includes your code and use it to run your pipeline steps in Kubernetes. Check out [this page](../../how-to/customize-docker-builds/README.md) if you want to learn more about how ZenML builds these images and how you can customize them. {% endhint %} You can now run any ZenML pipeline using the Kubernetes orchestrator: @@ -296,6 +296,6 @@ For more information and a full list of configurable attributes of the Kubernete #### Enabling CUDA for GPU-backed hardware -Note that if you wish to use this orchestrator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/advanced-topics/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration. +Note that if you wish to use this orchestrator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/pipeline-development/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration.
ZenML Scarf
diff --git a/docs/book/component-guide/orchestrators/local-docker.md b/docs/book/component-guide/orchestrators/local-docker.md index 076f9e0fb4..52dfcfa1ab 100644 --- a/docs/book/component-guide/orchestrators/local-docker.md +++ b/docs/book/component-guide/orchestrators/local-docker.md @@ -68,6 +68,6 @@ def simple_pipeline(): #### Enabling CUDA for GPU-backed hardware -Note that if you wish to use this orchestrator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/advanced-topics/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration. +Note that if you wish to use this orchestrator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/pipeline-development/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration.
ZenML Scarf
diff --git a/docs/book/component-guide/orchestrators/orchestrators.md b/docs/book/component-guide/orchestrators/orchestrators.md index f75e915f84..d5e34cec84 100644 --- a/docs/book/component-guide/orchestrators/orchestrators.md +++ b/docs/book/component-guide/orchestrators/orchestrators.md @@ -13,7 +13,7 @@ steps of your pipeline) are available. {% hint style="info" %} Many of ZenML's remote orchestrators build [Docker](https://www.docker.com/) images in order to transport and execute your pipeline code. If you want to learn more about how Docker images are built by ZenML, check -out [this guide](../../how-to/infrastructure-deployment/customize-docker-builds/README.md). +out [this guide](../../how-to/customize-docker-builds/README.md). {% endhint %} ### When to use it diff --git a/docs/book/component-guide/orchestrators/sagemaker.md b/docs/book/component-guide/orchestrators/sagemaker.md index 1e287af471..6464333934 100644 --- a/docs/book/component-guide/orchestrators/sagemaker.md +++ b/docs/book/component-guide/orchestrators/sagemaker.md @@ -101,7 +101,7 @@ python run.py # Authenticates with `default` profile in `~/.aws/config` {% endtabs %} {% hint style="info" %} -ZenML will build a Docker image called `/zenml:` which includes your code and use it to run your pipeline steps in Sagemaker. Check out [this page](../../how-to/infrastructure-deployment/customize-docker-builds/README.md) if you want to learn more about how ZenML builds these images and how you can customize them. +ZenML will build a Docker image called `/zenml:` which includes your code and use it to run your pipeline steps in Sagemaker. Check out [this page](../../how-to/customize-docker-builds/README.md) if you want to learn more about how ZenML builds these images and how you can customize them. {% endhint %} You can now run any ZenML pipeline using the Sagemaker orchestrator: @@ -337,6 +337,6 @@ This approach allows for more granular tagging, giving you flexibility in how yo ### Enabling CUDA for GPU-backed hardware -Note that if you wish to use this orchestrator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/advanced-topics/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration. +Note that if you wish to use this orchestrator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/pipeline-development/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration.
ZenML Scarf
diff --git a/docs/book/component-guide/orchestrators/tekton.md b/docs/book/component-guide/orchestrators/tekton.md index 507c29ae00..562aeeb912 100644 --- a/docs/book/component-guide/orchestrators/tekton.md +++ b/docs/book/component-guide/orchestrators/tekton.md @@ -135,7 +135,7 @@ We can then register the orchestrator and use it in our active stack. This can b ``` {% hint style="info" %} -ZenML will build a Docker image called `/zenml:` which includes your code and use it to run your pipeline steps in Tekton. Check out [this page](../../how-to/infrastructure-deployment/customize-docker-builds/README.md) if you want to learn more about how ZenML builds these images and how you can customize them. +ZenML will build a Docker image called `/zenml:` which includes your code and use it to run your pipeline steps in Tekton. Check out [this page](../../how-to/customize-docker-builds/README.md) if you want to learn more about how ZenML builds these images and how you can customize them. {% endhint %} You can now run any ZenML pipeline using the Tekton orchestrator: @@ -231,6 +231,6 @@ For more information and a full list of configurable attributes of the Tekton or #### Enabling CUDA for GPU-backed hardware -Note that if you wish to use this orchestrator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/advanced-topics/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration. +Note that if you wish to use this orchestrator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/pipeline-development/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration.
ZenML Scarf
diff --git a/docs/book/component-guide/orchestrators/vertex.md b/docs/book/component-guide/orchestrators/vertex.md index 35e52b786d..210d34f931 100644 --- a/docs/book/component-guide/orchestrators/vertex.md +++ b/docs/book/component-guide/orchestrators/vertex.md @@ -163,7 +163,7 @@ zenml stack register -o ... --set ``` {% hint style="info" %} -ZenML will build a Docker image called `/zenml:` which includes your code and use it to run your pipeline steps in Vertex AI. Check out [this page](../../how-to/infrastructure-deployment/customize-docker-builds/README.md) if you want to learn more about how ZenML builds these images and how you can customize them. +ZenML will build a Docker image called `/zenml:` which includes your code and use it to run your pipeline steps in Vertex AI. Check out [this page](../../how-to/customize-docker-builds/README.md) if you want to learn more about how ZenML builds these images and how you can customize them. {% endhint %} You can now run any ZenML pipeline using the Vertex orchestrator: @@ -291,6 +291,6 @@ For more information and a full list of configurable attributes of the Vertex or ### Enabling CUDA for GPU-backed hardware -Note that if you wish to use this orchestrator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/advanced-topics/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration. +Note that if you wish to use this orchestrator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/pipeline-development/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration.
ZenML Scarf
diff --git a/docs/book/component-guide/step-operators/azureml.md b/docs/book/component-guide/step-operators/azureml.md index 93bc7d0611..55681f151c 100644 --- a/docs/book/component-guide/step-operators/azureml.md +++ b/docs/book/component-guide/step-operators/azureml.md @@ -93,7 +93,7 @@ def trainer(...) -> ...: ``` {% hint style="info" %} -ZenML will build a Docker image called `/zenml:` which includes your code and use it to run your steps in AzureML. Check out [this page](../../how-to/infrastructure-deployment/customize-docker-builds/README.md) if you want to learn more about how ZenML builds these images and how you can customize them. +ZenML will build a Docker image called `/zenml:` which includes your code and use it to run your steps in AzureML. Check out [this page](../../how-to/customize-docker-builds/README.md) if you want to learn more about how ZenML builds these images and how you can customize them. {% endhint %} #### Additional configuration @@ -152,6 +152,6 @@ You can check out the [AzureMLStepOperatorSettings SDK docs](https://sdkdocs.zen #### Enabling CUDA for GPU-backed hardware -Note that if you wish to use this step operator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/advanced-topics/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration. +Note that if you wish to use this step operator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/pipeline-development/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration.
ZenML Scarf
diff --git a/docs/book/component-guide/step-operators/custom.md b/docs/book/component-guide/step-operators/custom.md index a5ad065b23..7328d9314a 100644 --- a/docs/book/component-guide/step-operators/custom.md +++ b/docs/book/component-guide/step-operators/custom.md @@ -120,6 +120,6 @@ The design behind this interaction lets us separate the configuration of the fla #### Enabling CUDA for GPU-backed hardware -Note that if you wish to use your custom step operator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/advanced-topics/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration. +Note that if you wish to use your custom step operator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/pipeline-development/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration.
ZenML Scarf
diff --git a/docs/book/component-guide/step-operators/kubernetes.md b/docs/book/component-guide/step-operators/kubernetes.md index c385982987..4ecfe9af27 100644 --- a/docs/book/component-guide/step-operators/kubernetes.md +++ b/docs/book/component-guide/step-operators/kubernetes.md @@ -93,7 +93,7 @@ def trainer(...) -> ...: ``` {% hint style="info" %} -ZenML will build a Docker images which includes your code and use it to run your steps in Kubernetes. Check out [this page](../../how-to/infrastructure-deployment/customize-docker-builds/README.md) if you want to learn more about how ZenML builds these images and how you can customize them. +ZenML will build a Docker images which includes your code and use it to run your steps in Kubernetes. Check out [this page](../../how-to/customize-docker-builds/README.md) if you want to learn more about how ZenML builds these images and how you can customize them. {% endhint %} @@ -225,6 +225,6 @@ For more information and a full list of configurable attributes of the Kubernete #### Enabling CUDA for GPU-backed hardware -Note that if you wish to use this step operator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/advanced-topics/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration. +Note that if you wish to use this step operator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/pipeline-development/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration.
ZenML Scarf
diff --git a/docs/book/component-guide/step-operators/sagemaker.md b/docs/book/component-guide/step-operators/sagemaker.md index 3bd02eba90..28e285aeb4 100644 --- a/docs/book/component-guide/step-operators/sagemaker.md +++ b/docs/book/component-guide/step-operators/sagemaker.md @@ -84,7 +84,7 @@ def trainer(...) -> ...: ``` {% hint style="info" %} -ZenML will build a Docker image called `/zenml:` which includes your code and use it to run your steps in SageMaker. Check out [this page](../../how-to/infrastructure-deployment/customize-docker-builds/README.md) if you want to learn more about how ZenML builds these images and how you can customize them. +ZenML will build a Docker image called `/zenml:` which includes your code and use it to run your steps in SageMaker. Check out [this page](../../how-to/customize-docker-builds/README.md) if you want to learn more about how ZenML builds these images and how you can customize them. {% endhint %} #### Additional configuration @@ -95,6 +95,6 @@ For more information and a full list of configurable attributes of the SageMaker #### Enabling CUDA for GPU-backed hardware -Note that if you wish to use this step operator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/advanced-topics/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration. +Note that if you wish to use this step operator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/pipeline-development/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration.
ZenML Scarf
diff --git a/docs/book/component-guide/step-operators/step-operators.md b/docs/book/component-guide/step-operators/step-operators.md index b96b848852..146e91eb91 100644 --- a/docs/book/component-guide/step-operators/step-operators.md +++ b/docs/book/component-guide/step-operators/step-operators.md @@ -63,12 +63,12 @@ def my_step(...) -> ...: #### Specifying per-step resources If your steps require additional hardware resources, you can specify them on your steps as -described [here](../../how-to/advanced-topics/training-with-gpus/README.md). +described [here](../../how-to/pipeline-development/training-with-gpus/README.md). #### Enabling CUDA for GPU-backed hardware Note that if you wish to use step operators to run steps on a GPU, you will need to -follow [the instructions on this page](../../how-to/advanced-topics/training-with-gpus/README.md) to ensure +follow [the instructions on this page](../../how-to/pipeline-development/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration. diff --git a/docs/book/component-guide/step-operators/vertex.md b/docs/book/component-guide/step-operators/vertex.md index aecfef4944..697f771876 100644 --- a/docs/book/component-guide/step-operators/vertex.md +++ b/docs/book/component-guide/step-operators/vertex.md @@ -92,7 +92,7 @@ def trainer(...) -> ...: ``` {% hint style="info" %} -ZenML will build a Docker image called `/zenml:` which includes your code and use it to run your steps in Vertex AI. Check out [this page](../../how-to/infrastructure-deployment/customize-docker-builds/README.md) if you want to learn more about how ZenML builds these images and how you can customize them. +ZenML will build a Docker image called `/zenml:` which includes your code and use it to run your steps in Vertex AI. Check out [this page](../../how-to/customize-docker-builds/README.md) if you want to learn more about how ZenML builds these images and how you can customize them. {% endhint %} #### Additional configuration @@ -133,6 +133,6 @@ For more information and a full list of configurable attributes of the Vertex st #### Enabling CUDA for GPU-backed hardware -Note that if you wish to use this step operator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/advanced-topics/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration. +Note that if you wish to use this step operator to run steps on a GPU, you will need to follow [the instructions on this page](../../how-to/pipeline-development/training-with-gpus/README.md) to ensure that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to give its full acceleration.
ZenML Scarf
diff --git a/docs/book/getting-started/system-architectures.md b/docs/book/getting-started/system-architectures.md index 369fe2dbcf..79fec7edea 100644 --- a/docs/book/getting-started/system-architectures.md +++ b/docs/book/getting-started/system-architectures.md @@ -122,7 +122,7 @@ secret store directly to the ZenML server that is managed by us. All ZenML secrets used by running pipelines to access infrastructure services and resources are stored in the customer secret store. This allows users to use [service connectors](../how-to/infrastructure-deployment/auth-management/service-connectors-guide.md) -and the [secrets API](../how-to/interact-with-secrets.md) to authenticate +and the [secrets API](../how-to/project-setup-and-management/interact-with-secrets.md) to authenticate ZenML pipelines and the ZenML Pro to third-party services and infrastructure while ensuring that credentials are always stored on the customer side. {% endhint %} diff --git a/docs/book/how-to/advanced-topics/control-logging/README.md b/docs/book/how-to/advanced-topics/control-logging/README.md deleted file mode 100644 index 64b775efe2..0000000000 --- a/docs/book/how-to/advanced-topics/control-logging/README.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -icon: memo-circle-info -description: Configuring ZenML's default logging behavior ---- - -# Control logging - -ZenML produces various kinds of logs: - -* The [ZenML Server](../../../getting-started/deploying-zenml/README.md) produces server logs (like any FastAPI server). -* The [Client or Runner](../../infrastructure-deployment/configure-python-environments/README.md#client-environment-or-the-runner-environment) environment produces logs, for example after running a pipeline. These are steps that are typically before, after, and during the creation of a pipeline run. -* The [Execution environment](../../infrastructure-deployment/configure-python-environments/README.md#execution-environments) (on the orchestrator level) produces logs when it executes each step of a pipeline. These are logs that are typically written in your steps using the python `logging` module. - -This section talks about how users can control logging behavior in these various environments. - -
ZenML Scarf
diff --git a/docs/book/how-to/control-logging/README.md b/docs/book/how-to/control-logging/README.md new file mode 100644 index 0000000000..ef2d55e352 --- /dev/null +++ b/docs/book/how-to/control-logging/README.md @@ -0,0 +1,16 @@ +--- +icon: memo-circle-info +description: Configuring ZenML's default logging behavior +--- + +# Control logging + +ZenML produces various kinds of logs: + +* The [ZenML Server](../../getting-started/deploying-zenml/README.md) produces server logs (like any FastAPI server). +* The [Client or Runner](../pipeline-development/configure-python-environments/README.md#client-environment-or-the-runner-environment) environment produces logs, for example after running a pipeline. These are steps that are typically before, after, and during the creation of a pipeline run. +* The [Execution environment](../pipeline-development/configure-python-environments/README.md#execution-environments) (on the orchestrator level) produces logs when it executes each step of a pipeline. These are logs that are typically written in your steps using the python `logging` module. + +This section talks about how users can control logging behavior in these various environments. + +
ZenML Scarf
diff --git a/docs/book/how-to/advanced-topics/control-logging/disable-colorful-logging.md b/docs/book/how-to/control-logging/disable-colorful-logging.md similarity index 63% rename from docs/book/how-to/advanced-topics/control-logging/disable-colorful-logging.md rename to docs/book/how-to/control-logging/disable-colorful-logging.md index e536fa989b..20adaabe1f 100644 --- a/docs/book/how-to/advanced-topics/control-logging/disable-colorful-logging.md +++ b/docs/book/how-to/control-logging/disable-colorful-logging.md @@ -10,7 +10,7 @@ By default, ZenML uses colorful logging to make it easier to read logs. However, ZENML_LOGGING_COLORS_DISABLED=true ``` -Note that setting this on the [client environment](../../infrastructure-deployment/configure-python-environments/README.md#client-environment-or-the-runner-environment) (e.g. your local machine which runs the pipeline) will automatically disable colorful logging on remote pipeline runs. If you wish to only disable it locally, but turn on for remote pipeline runs, you can set the `ZENML_LOGGING_COLORS_DISABLED` environment variable in your pipeline runs environment as follows: +Note that setting this on the [client environment](../pipeline-development/configure-python-environments/README.md#client-environment-or-the-runner-environment) (e.g. your local machine which runs the pipeline) will automatically disable colorful logging on remote pipeline runs. If you wish to only disable it locally, but turn on for remote pipeline runs, you can set the `ZENML_LOGGING_COLORS_DISABLED` environment variable in your pipeline runs environment as follows: ```python docker_settings = DockerSettings(environment={"ZENML_LOGGING_COLORS_DISABLED": "false"}) diff --git a/docs/book/how-to/advanced-topics/control-logging/disable-rich-traceback.md b/docs/book/how-to/control-logging/disable-rich-traceback.md similarity index 67% rename from docs/book/how-to/advanced-topics/control-logging/disable-rich-traceback.md rename to docs/book/how-to/control-logging/disable-rich-traceback.md index c19cf36257..a47f37c388 100644 --- a/docs/book/how-to/advanced-topics/control-logging/disable-rich-traceback.md +++ b/docs/book/how-to/control-logging/disable-rich-traceback.md @@ -12,9 +12,9 @@ export ZENML_ENABLE_RICH_TRACEBACK=false This will ensure that you see only the plain text traceback output. -Note that setting this on the [client environment](../../infrastructure-deployment/configure-python-environments/README.md#client-environment-or-the-runner-environment) (e.g. your local machine which runs the pipeline) will **not automatically disable rich tracebacks on remote pipeline runs**. That means setting this variable locally with only effect pipelines that run locally. +Note that setting this on the [client environment](../pipeline-development/configure-python-environments/README.md#client-environment-or-the-runner-environment) (e.g. your local machine which runs the pipeline) will **not automatically disable rich tracebacks on remote pipeline runs**. That means setting this variable locally with only effect pipelines that run locally. -If you wish to disable it also for [remote pipeline runs](../../../user-guide/production-guide/cloud-orchestration.md), you can set the `ZENML_ENABLE_RICH_TRACEBACK` environment variable in your pipeline runs environment as follows: +If you wish to disable it also for [remote pipeline runs](../../user-guide/production-guide/cloud-orchestration.md), you can set the `ZENML_ENABLE_RICH_TRACEBACK` environment variable in your pipeline runs environment as follows: ```python docker_settings = DockerSettings(environment={"ZENML_ENABLE_RICH_TRACEBACK": "false"}) diff --git a/docs/book/how-to/advanced-topics/control-logging/enable-or-disable-logs-storing.md b/docs/book/how-to/control-logging/enable-or-disable-logs-storing.md similarity index 90% rename from docs/book/how-to/advanced-topics/control-logging/enable-or-disable-logs-storing.md rename to docs/book/how-to/control-logging/enable-or-disable-logs-storing.md index 6e6e45015f..13965f9381 100644 --- a/docs/book/how-to/advanced-topics/control-logging/enable-or-disable-logs-storing.md +++ b/docs/book/how-to/control-logging/enable-or-disable-logs-storing.md @@ -15,7 +15,7 @@ def my_step() -> None: These logs are stored within the respective artifact store of your stack. You can display the logs in the dashboard as follows: -![Displaying step logs on the dashboard](../../../.gitbook/assets/zenml_step_logs.png) +![Displaying step logs on the dashboard](../../.gitbook/assets/zenml_step_logs.png) {% hint style="warning" %} Note that if you are not connected to a cloud artifact store with a service connector configured then you will not @@ -37,7 +37,7 @@ If you do not want to store the logs in your artifact store, you can: def my_pipeline(): ... ``` -2. Disable it by using the environmental variable `ZENML_DISABLE_STEP_LOGS_STORAGE` and setting it to `true`. This environmental variable takes precedence over the parameters mentioned above. Note this environmental variable needs to be set on the [execution environment](../../infrastructure-deployment/configure-python-environments/README.md#execution-environments), i.e., on the orchestrator level: +2. Disable it by using the environmental variable `ZENML_DISABLE_STEP_LOGS_STORAGE` and setting it to `true`. This environmental variable takes precedence over the parameters mentioned above. Note this environmental variable needs to be set on the [execution environment](../pipeline-development/configure-python-environments/README.md#execution-environments), i.e., on the orchestrator level: ```python docker_settings = DockerSettings(environment={"ZENML_DISABLE_STEP_LOGS_STORAGE": "true"}) diff --git a/docs/book/how-to/advanced-topics/control-logging/set-logging-verbosity.md b/docs/book/how-to/control-logging/set-logging-verbosity.md similarity index 60% rename from docs/book/how-to/advanced-topics/control-logging/set-logging-verbosity.md rename to docs/book/how-to/control-logging/set-logging-verbosity.md index b183934669..fa21a318ad 100644 --- a/docs/book/how-to/advanced-topics/control-logging/set-logging-verbosity.md +++ b/docs/book/how-to/control-logging/set-logging-verbosity.md @@ -13,9 +13,9 @@ export ZENML_LOGGING_VERBOSITY=INFO Choose from `INFO`, `WARN`, `ERROR`, `CRITICAL`, `DEBUG`. This will set the logs to whichever level you suggest. -Note that setting this on the [client environment](../../infrastructure-deployment/configure-python-environments/README.md#client-environment-or-the-runner-environment) (e.g. your local machine which runs the pipeline) will **not automatically set the same logging verbosity for remote pipeline runs**. That means setting this variable locally with only effect pipelines that run locally. +Note that setting this on the [client environment](../pipeline-development/configure-python-environments/README.md#client-environment-or-the-runner-environment) (e.g. your local machine which runs the pipeline) will **not automatically set the same logging verbosity for remote pipeline runs**. That means setting this variable locally with only effect pipelines that run locally. -If you wish to control for [remote pipeline runs](../../../user-guide/production-guide/cloud-orchestration.md), you can set the `ZENML_LOGGING_VERBOSITY` environment variable in your pipeline runs environment as follows: +If you wish to control for [remote pipeline runs](../../user-guide/production-guide/cloud-orchestration.md), you can set the `ZENML_LOGGING_VERBOSITY` environment variable in your pipeline runs environment as follows: ```python docker_settings = DockerSettings(environment={"ZENML_LOGGING_VERBOSITY": "DEBUG"}) diff --git a/docs/book/how-to/advanced-topics/control-logging/view-logs-on-the-dasbhoard.md b/docs/book/how-to/control-logging/view-logs-on-the-dasbhoard.md similarity index 80% rename from docs/book/how-to/advanced-topics/control-logging/view-logs-on-the-dasbhoard.md rename to docs/book/how-to/control-logging/view-logs-on-the-dasbhoard.md index b202fb8c9c..2b803a6d4f 100644 --- a/docs/book/how-to/advanced-topics/control-logging/view-logs-on-the-dasbhoard.md +++ b/docs/book/how-to/control-logging/view-logs-on-the-dasbhoard.md @@ -17,14 +17,14 @@ These logs are stored within the respective artifact store of your stack. This m *if the deployed ZenML server has direct access to the underlying artifact store*. There are two cases in which this will be true: * In case of a local ZenML server (via `zenml login --local`), both local and remote artifact stores may be accessible, depending on configuration of the client. -* In case of a deployed ZenML server, logs for runs on a [local artifact store](../../../component-guide/artifact-stores/local.md) will not be accessible. Logs -for runs using a [remote artifact store](../../../user-guide/production-guide/remote-storage.md) **may be** accessible, if the artifact store has been configured -with a [service connector](../../infrastructure-deployment/auth-management/service-connectors-guide.md). Please read [this chapter](../../../user-guide/production-guide/remote-storage.md) of +* In case of a deployed ZenML server, logs for runs on a [local artifact store](../../component-guide/artifact-stores/local.md) will not be accessible. Logs +for runs using a [remote artifact store](../../user-guide/production-guide/remote-storage.md) **may be** accessible, if the artifact store has been configured +with a [service connector](../../infrastructure-deployment/auth-management/service-connectors-guide.md). Please read [this chapter](../../user-guide/production-guide/remote-storage.md) of the production guide to learn how to configure a remote artifact store with a service connector. If configured correctly, the logs are displayed in the dashboard as follows: -![Displaying step logs on the dashboard](../../../.gitbook/assets/zenml_step_logs.png) +![Displaying step logs on the dashboard](../../.gitbook/assets/zenml_step_logs.png) {% hint style="warning" %} If you do not want to store the logs for your pipeline (for example due to performance reduction or storage limits), diff --git a/docs/book/how-to/infrastructure-deployment/customize-docker-builds/README.md b/docs/book/how-to/customize-docker-builds/README.md similarity index 62% rename from docs/book/how-to/infrastructure-deployment/customize-docker-builds/README.md rename to docs/book/how-to/customize-docker-builds/README.md index da604618a9..746c09af3e 100644 --- a/docs/book/how-to/infrastructure-deployment/customize-docker-builds/README.md +++ b/docs/book/how-to/customize-docker-builds/README.md @@ -5,7 +5,7 @@ description: Using Docker images to run your pipeline. # Customize Docker Builds -ZenML executes pipeline steps sequentially in the active Python environment when running locally. However, with remote [orchestrators](../../../user-guide/production-guide/cloud-orchestration.md) or [step operators](../../../component-guide/step-operators/step-operators.md), ZenML builds [Docker](https://www.docker.com/) images to run your pipeline in an isolated, well-defined environment. +ZenML executes pipeline steps sequentially in the active Python environment when running locally. However, with remote [orchestrators](../../user-guide/production-guide/cloud-orchestration.md) or [step operators](../../component-guide/step-operators/step-operators.md), ZenML builds [Docker](https://www.docker.com/) images to run your pipeline in an isolated, well-defined environment. This section discusses how to control this dockerization process. diff --git a/docs/book/how-to/infrastructure-deployment/customize-docker-builds/define-where-an-image-is-built.md b/docs/book/how-to/customize-docker-builds/define-where-an-image-is-built.md similarity index 63% rename from docs/book/how-to/infrastructure-deployment/customize-docker-builds/define-where-an-image-is-built.md rename to docs/book/how-to/customize-docker-builds/define-where-an-image-is-built.md index 6c37370535..552af1fc61 100644 --- a/docs/book/how-to/infrastructure-deployment/customize-docker-builds/define-where-an-image-is-built.md +++ b/docs/book/how-to/customize-docker-builds/define-where-an-image-is-built.md @@ -4,11 +4,11 @@ description: Defining the image builder. # 🐳 Define where an image is built -ZenML executes pipeline steps sequentially in the active Python environment when running locally. However, with remote [orchestrators](../../../component-guide/orchestrators/orchestrators.md) or [step operators](../../../component-guide/step-operators/step-operators.md), ZenML builds [Docker](https://www.docker.com/) images to run your pipeline in an isolated, well-defined environment. +ZenML executes pipeline steps sequentially in the active Python environment when running locally. However, with remote [orchestrators](../../component-guide/orchestrators/orchestrators.md) or [step operators](../../component-guide/step-operators/step-operators.md), ZenML builds [Docker](https://www.docker.com/) images to run your pipeline in an isolated, well-defined environment. -By default, execution environments are created locally in the client environment using the local Docker client. However, this requires Docker installation and permissions. ZenML offers [image builders](../../../component-guide/image-builders/image-builders.md), a special [stack component](../../../component-guide/README.md), allowing users to build and push Docker images in a different specialized _image builder environment_. +By default, execution environments are created locally in the client environment using the local Docker client. However, this requires Docker installation and permissions. ZenML offers [image builders](../../component-guide/image-builders/image-builders.md), a special [stack component](../../component-guide/README.md), allowing users to build and push Docker images in a different specialized _image builder environment_. -Note that even if you don't configure an image builder in your stack, ZenML still uses the [local image builder](../../../component-guide/image-builders/local.md) to retain consistency across all builds. In this case, the image builder environment is the same as the [client environment](../../infrastructure-deployment/configure-python-environments/README.md#client-environment-or-the-runner-environment). +Note that even if you don't configure an image builder in your stack, ZenML still uses the [local image builder](../../../component-guide/image-builders/local.md) to retain consistency across all builds. In this case, the image builder environment is the same as the [client environment](../pipeline-development/configure-python-environments/README.md#client-environment-or-the-runner-environment). You don't need to directly interact with any image builder in your code. As long as the image builder that you want to use is part of your active [ZenML stack](/docs/book/user-guide/production-guide/understand-stacks.md), it will be used diff --git a/docs/book/how-to/infrastructure-deployment/customize-docker-builds/docker-settings-on-a-pipeline.md b/docs/book/how-to/customize-docker-builds/docker-settings-on-a-pipeline.md similarity index 83% rename from docs/book/how-to/infrastructure-deployment/customize-docker-builds/docker-settings-on-a-pipeline.md rename to docs/book/how-to/customize-docker-builds/docker-settings-on-a-pipeline.md index 872cd69124..db342c4c8e 100644 --- a/docs/book/how-to/infrastructure-deployment/customize-docker-builds/docker-settings-on-a-pipeline.md +++ b/docs/book/how-to/customize-docker-builds/docker-settings-on-a-pipeline.md @@ -4,7 +4,7 @@ description: Using Docker images to run your pipeline. # Specify Docker settings for a pipeline -When a [pipeline is run with a remote orchestrator](../configure-python-environments/README.md) a [Dockerfile](https://docs.docker.com/engine/reference/builder/) is dynamically generated at runtime. It is then used to build the Docker image using the [image builder](../../infrastructure-deployment/configure-python-environments/README.md#image-builder-environment) component of your stack. The Dockerfile consists of the following steps: +When a [pipeline is run with a remote orchestrator](../pipeline-development/configure-python-environments/README.md) a [Dockerfile](https://docs.docker.com/engine/reference/builder/) is dynamically generated at runtime. It is then used to build the Docker image using the [image builder](../pipeline-development/configure-python-environments/README.md#image-builder-environment) component of your stack. The Dockerfile consists of the following steps: * **Starts from a parent image** that has **ZenML installed**. By default, this will use the [official ZenML image](https://hub.docker.com/r/zenmldocker/zenml/) for the Python and ZenML version that you're using in the active Python environment. If you want to use a different image as the base for the following steps, check out [this guide](./docker-settings-on-a-pipeline.md#using-a-custom-parent-image). * **Installs additional pip dependencies**. ZenML will automatically detect which integrations are used in your stack and install the required dependencies. If your pipeline needs any additional requirements, check out our [guide on including custom dependencies](specify-pip-dependencies-and-apt-packages.md). @@ -58,7 +58,7 @@ my_step = my_step.with_options( ) ``` -* Using a YAML configuration file as described [here](../../pipeline-development/use-configuration-files/README.md): +* Using a YAML configuration file as described [here](../pipeline-development/use-configuration-files/README.md): ```yaml settings: @@ -72,11 +72,11 @@ steps: ... ``` -Check out [this page](../../pipeline-development/use-configuration-files/configuration-hierarchy.md) for more information on the hierarchy and precedence of the various ways in which you can supply the settings. +Check out [this page](../pipeline-development/use-configuration-files/configuration-hierarchy.md) for more information on the hierarchy and precedence of the various ways in which you can supply the settings. ### Specifying Docker build options -If you want to specify build options that get passed to the build method of the [image builder](../../infrastructure-deployment/configure-python-environments/README.md#image-builder-environment). For the default local image builder, these options get passed to the [`docker build` command](https://docker-py.readthedocs.io/en/stable/images.html#docker.models.images.ImageCollection.build). +If you want to specify build options that get passed to the build method of the [image builder](../pipeline-development/configure-python-environments/README.md#image-builder-environment). For the default local image builder, these options get passed to the [`docker build` command](https://docker-py.readthedocs.io/en/stable/images.html#docker.models.images.ImageCollection.build). ```python docker_settings = DockerSettings(build_config={"build_options": {...}}) diff --git a/docs/book/how-to/infrastructure-deployment/customize-docker-builds/docker-settings-on-a-step.md b/docs/book/how-to/customize-docker-builds/docker-settings-on-a-step.md similarity index 100% rename from docs/book/how-to/infrastructure-deployment/customize-docker-builds/docker-settings-on-a-step.md rename to docs/book/how-to/customize-docker-builds/docker-settings-on-a-step.md diff --git a/docs/book/how-to/infrastructure-deployment/customize-docker-builds/how-to-reuse-builds.md b/docs/book/how-to/customize-docker-builds/how-to-reuse-builds.md similarity index 89% rename from docs/book/how-to/infrastructure-deployment/customize-docker-builds/how-to-reuse-builds.md rename to docs/book/how-to/customize-docker-builds/how-to-reuse-builds.md index 17bfe22fc7..20ebe7f4d6 100644 --- a/docs/book/how-to/infrastructure-deployment/customize-docker-builds/how-to-reuse-builds.md +++ b/docs/book/how-to/customize-docker-builds/how-to-reuse-builds.md @@ -37,9 +37,9 @@ You can also let ZenML use the artifact store to upload your code. This is the d ## Use code repositories to speed up Docker build times -One way to speed up Docker builds is to connect a git repository. Registering a [code repository](../../../user-guide/production-guide/connect-code-repository.md) lets you avoid building images each time you run a pipeline **and** quickly iterate on your code. When running a pipeline that is part of a local code repository checkout, ZenML can instead build the Docker images without including any of your source files, and download the files inside the container before running your code. This greatly speeds up the building process and also allows you to reuse images that one of your colleagues might have built for the same stack. +One way to speed up Docker builds is to connect a git repository. Registering a [code repository](../../user-guide/production-guide/connect-code-repository.md) lets you avoid building images each time you run a pipeline **and** quickly iterate on your code. When running a pipeline that is part of a local code repository checkout, ZenML can instead build the Docker images without including any of your source files, and download the files inside the container before running your code. This greatly speeds up the building process and also allows you to reuse images that one of your colleagues might have built for the same stack. -ZenML will **automatically figure out which builds match your pipeline and reuse the appropriate build id**. Therefore, you **do not** need to explicitly pass in the build id when you have a clean repository state and a connected git repository. This approach is **highly recommended**. See an end to end example [here](../../../user-guide/production-guide/connect-code-repository.md). +ZenML will **automatically figure out which builds match your pipeline and reuse the appropriate build id**. Therefore, you **do not** need to explicitly pass in the build id when you have a clean repository state and a connected git repository. This approach is **highly recommended**. See an end to end example [here](../../user-guide/production-guide/connect-code-repository.md). {% hint style="warning" %} In order to benefit from the advantages of having a code repository in a project, you need to make sure that **the relevant integrations are installed for your ZenML installation.**. For instance, let's assume you are working on a project with ZenML and one of your team members has already registered a corresponding code repository of type `github` for it. If you do `zenml code-repository list`, you would also be able to see this repository. However, in order to fully use this repository, you still need to install the corresponding integration for it, in this example the `github` integration. diff --git a/docs/book/how-to/infrastructure-deployment/customize-docker-builds/how-to-use-a-private-pypi-repository.md b/docs/book/how-to/customize-docker-builds/how-to-use-a-private-pypi-repository.md similarity index 100% rename from docs/book/how-to/infrastructure-deployment/customize-docker-builds/how-to-use-a-private-pypi-repository.md rename to docs/book/how-to/customize-docker-builds/how-to-use-a-private-pypi-repository.md diff --git a/docs/book/how-to/infrastructure-deployment/customize-docker-builds/specify-pip-dependencies-and-apt-packages.md b/docs/book/how-to/customize-docker-builds/specify-pip-dependencies-and-apt-packages.md similarity index 90% rename from docs/book/how-to/infrastructure-deployment/customize-docker-builds/specify-pip-dependencies-and-apt-packages.md rename to docs/book/how-to/customize-docker-builds/specify-pip-dependencies-and-apt-packages.md index b86bfc8f44..5c8794c424 100644 --- a/docs/book/how-to/infrastructure-deployment/customize-docker-builds/specify-pip-dependencies-and-apt-packages.md +++ b/docs/book/how-to/customize-docker-builds/specify-pip-dependencies-and-apt-packages.md @@ -4,7 +4,7 @@ The configuration for specifying pip and apt dependencies only works in the remote pipeline case, and is disregarded for local pipelines (i.e. pipelines that run locally without having to build a Docker image). {% endhint %} -When a [pipeline is run with a remote orchestrator](../../infrastructure-deployment/configure-python-environments/README.md) a [Dockerfile](https://docs.docker.com/engine/reference/builder/) is dynamically generated at runtime. It is then used to build the Docker image using the [image builder](../../infrastructure-deployment/configure-python-environments/README.md#-configure-python-environments) component of your stack. +When a [pipeline is run with a remote orchestrator](../pipeline-development/configure-python-environments/README.md) a [Dockerfile](https://docs.docker.com/engine/reference/builder/) is dynamically generated at runtime. It is then used to build the Docker image using the [image builder](../pipeline-development/configure-python-environments/README.md#-configure-python-environments) component of your stack. For all of examples on this page, note that `DockerSettings` can be imported using `from zenml.config import DockerSettings`. @@ -58,7 +58,7 @@ def my_pipeline(...): def my_pipeline(...): ... ``` -* Specify a list of [ZenML integrations](../../../component-guide/README.md) that you're using in your pipeline: +* Specify a list of [ZenML integrations](../../component-guide/README.md) that you're using in your pipeline: ```python from zenml.integrations.constants import PYTORCH, EVIDENTLY diff --git a/docs/book/how-to/infrastructure-deployment/customize-docker-builds/use-a-prebuilt-image.md b/docs/book/how-to/customize-docker-builds/use-a-prebuilt-image.md similarity index 96% rename from docs/book/how-to/infrastructure-deployment/customize-docker-builds/use-a-prebuilt-image.md rename to docs/book/how-to/customize-docker-builds/use-a-prebuilt-image.md index 77abf4f29a..052c5dea2a 100644 --- a/docs/book/how-to/infrastructure-deployment/customize-docker-builds/use-a-prebuilt-image.md +++ b/docs/book/how-to/customize-docker-builds/use-a-prebuilt-image.md @@ -106,7 +106,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends YOUR_APT_PACKAG The files containing your pipeline and step code and all other necessary functions should be available in your execution environment. -- If you have a [code repository](../../../user-guide/production-guide/connect-code-repository.md) registered, you don't need to include your code files in the image yourself. ZenML will download them from the repository to the appropriate location in the image. +- If you have a [code repository](../../user-guide/production-guide/connect-code-repository.md) registered, you don't need to include your code files in the image yourself. ZenML will download them from the repository to the appropriate location in the image. - If you don't have a code repository but `allow_download_from_artifact_store` is set to `True` in your `DockerSettings` (`True` by default), ZenML will upload your code to the artifact store and make it available to the image. diff --git a/docs/book/how-to/infrastructure-deployment/customize-docker-builds/use-your-own-docker-files.md b/docs/book/how-to/customize-docker-builds/use-your-own-docker-files.md similarity index 100% rename from docs/book/how-to/infrastructure-deployment/customize-docker-builds/use-your-own-docker-files.md rename to docs/book/how-to/customize-docker-builds/use-your-own-docker-files.md diff --git a/docs/book/how-to/infrastructure-deployment/customize-docker-builds/which-files-are-built-into-the-image.md b/docs/book/how-to/customize-docker-builds/which-files-are-built-into-the-image.md similarity index 92% rename from docs/book/how-to/infrastructure-deployment/customize-docker-builds/which-files-are-built-into-the-image.md rename to docs/book/how-to/customize-docker-builds/which-files-are-built-into-the-image.md index c0b90ba006..52b8a478f3 100644 --- a/docs/book/how-to/infrastructure-deployment/customize-docker-builds/which-files-are-built-into-the-image.md +++ b/docs/book/how-to/customize-docker-builds/which-files-are-built-into-the-image.md @@ -6,7 +6,7 @@ ZenML determines the root directory of your source files in the following order: * Otherwise, the parent directory of the Python file you're executing will be the source root. For example, running `python /path/to/file.py`, the source root would be `/path/to`. You can specify how the files inside this root directory are handled using the following three attributes on the [DockerSettings](https://sdkdocs.zenml.io/latest/core_code_docs/core-config/#zenml.config.docker_settings.DockerSettings): -* `allow_download_from_code_repository`: If this is set to `True` and your files are inside a registered [code repository](../../project-setup-and-management/setting-up-a-project-repository/connect-your-git-repository.md) and the repository has no local changes, the files will be downloaded from the code repository and not included in the image. +* `allow_download_from_code_repository`: If this is set to `True` and your files are inside a registered [code repository](../../user-guide/production-guide/connect-code-repository.md) and the repository has no local changes, the files will be downloaded from the code repository and not included in the image. * `allow_download_from_artifact_store`: If the previous option is disabled or no code repository without local changes exists for the root directory, ZenML will archive and upload your code to the artifact store if this is set to `True`. * `allow_including_files_in_images`: If both previous options were disabled or not possible, ZenML will include your files in the Docker image if this option is enabled. This means a new Docker image has to be built each time you modify one of your code files. diff --git a/docs/book/how-to/data-artifact-management/complex-usecases/README.md b/docs/book/how-to/data-artifact-management/complex-usecases/README.md new file mode 100644 index 0000000000..75fd292ef6 --- /dev/null +++ b/docs/book/how-to/data-artifact-management/complex-usecases/README.md @@ -0,0 +1,3 @@ +--- +icon: sitemap +--- \ No newline at end of file diff --git a/docs/book/how-to/data-artifact-management/handle-data-artifacts/datasets.md b/docs/book/how-to/data-artifact-management/complex-usecases/datasets.md similarity index 100% rename from docs/book/how-to/data-artifact-management/handle-data-artifacts/datasets.md rename to docs/book/how-to/data-artifact-management/complex-usecases/datasets.md diff --git a/docs/book/how-to/data-artifact-management/handle-data-artifacts/manage-big-data.md b/docs/book/how-to/data-artifact-management/complex-usecases/manage-big-data.md similarity index 100% rename from docs/book/how-to/data-artifact-management/handle-data-artifacts/manage-big-data.md rename to docs/book/how-to/data-artifact-management/complex-usecases/manage-big-data.md diff --git a/docs/book/how-to/data-artifact-management/handle-data-artifacts/passing-artifacts-between-pipelines.md b/docs/book/how-to/data-artifact-management/complex-usecases/passing-artifacts-between-pipelines.md similarity index 100% rename from docs/book/how-to/data-artifact-management/handle-data-artifacts/passing-artifacts-between-pipelines.md rename to docs/book/how-to/data-artifact-management/complex-usecases/passing-artifacts-between-pipelines.md diff --git a/docs/book/how-to/data-artifact-management/handle-data-artifacts/registering-existing-data.md b/docs/book/how-to/data-artifact-management/complex-usecases/registering-existing-data.md similarity index 100% rename from docs/book/how-to/data-artifact-management/handle-data-artifacts/registering-existing-data.md rename to docs/book/how-to/data-artifact-management/complex-usecases/registering-existing-data.md diff --git a/docs/book/how-to/data-artifact-management/handle-data-artifacts/unmaterialized-artifacts.md b/docs/book/how-to/data-artifact-management/complex-usecases/unmaterialized-artifacts.md similarity index 100% rename from docs/book/how-to/data-artifact-management/handle-data-artifacts/unmaterialized-artifacts.md rename to docs/book/how-to/data-artifact-management/complex-usecases/unmaterialized-artifacts.md diff --git a/docs/book/how-to/data-artifact-management/handle-data-artifacts/handle-custom-data-types.md b/docs/book/how-to/data-artifact-management/handle-data-artifacts/handle-custom-data-types.md index 463438eb88..0c32700cf3 100644 --- a/docs/book/how-to/data-artifact-management/handle-data-artifacts/handle-custom-data-types.md +++ b/docs/book/how-to/data-artifact-management/handle-data-artifacts/handle-custom-data-types.md @@ -310,7 +310,7 @@ If you would like to disable artifact metadata extraction altogether, you can se ## Skipping materialization -You can learn more about skipping materialization [here](unmaterialized-artifacts.md). +You can learn more about skipping materialization [here](../complex-usecases/unmaterialized-artifacts.md). ## Interaction with custom artifact stores diff --git a/docs/book/how-to/advanced-topics/manage-zenml-server/README.md b/docs/book/how-to/manage-zenml-server/README.md similarity index 100% rename from docs/book/how-to/advanced-topics/manage-zenml-server/README.md rename to docs/book/how-to/manage-zenml-server/README.md diff --git a/docs/book/how-to/advanced-topics/manage-zenml-server/best-practices-upgrading-zenml.md b/docs/book/how-to/manage-zenml-server/best-practices-upgrading-zenml.md similarity index 85% rename from docs/book/how-to/advanced-topics/manage-zenml-server/best-practices-upgrading-zenml.md rename to docs/book/how-to/manage-zenml-server/best-practices-upgrading-zenml.md index ca7e4b6ae1..3688c49f5f 100644 --- a/docs/book/how-to/advanced-topics/manage-zenml-server/best-practices-upgrading-zenml.md +++ b/docs/book/how-to/manage-zenml-server/best-practices-upgrading-zenml.md @@ -16,16 +16,16 @@ Follow the tips below while upgrading your server to mitigate data losses, downt - **Database Backup**: Before upgrading, create a backup of your MySQL database. This allows you to rollback if necessary. - **Automated Backups**: Consider setting up automatic daily backups of your database for added security. Most managed services like AWS RDS, Google Cloud SQL, and Azure Database for MySQL offer automated backup options. -![Screenshot of backups in AWS RDS](../../../.gitbook/assets/aws-rds-backups.png) +![Screenshot of backups in AWS RDS](../../.gitbook/assets/aws-rds-backups.png) ### Upgrade Strategies - **Staged Upgrade**: For large organizations or critical systems, consider using two ZenML server instances (old and new) and migrating services one by one to the new version. -![Server Migration Step 1](../../../.gitbook/assets/server_migration_1.png) +![Server Migration Step 1](../../.gitbook/assets/server_migration_1.png) -![Server Migration Step 2](../../../.gitbook/assets/server_migration_2.png) +![Server Migration Step 2](../../.gitbook/assets/server_migration_2.png) - **Team Coordination**: If multiple teams share a ZenML server instance, coordinate the upgrade timing to minimize disruption. - **Separate ZenML Servers**: Coordination between teams might be difficult if one team requires new features but the other can't upgrade yet. In such cases, it is recommended to use dedicated ZenML server instances per team or product to allow for more flexible upgrade schedules. @@ -48,7 +48,7 @@ Sometimes, you might have to upgrade your code to work with a new version of Zen - **Local Testing**: It's a good idea to test it locally first after you upgrade (`pip install zenml --upgrade`) and run some old pipelines to check for compatibility issues between the old and new versions. - **End-to-End Testing**: You can also develop simple end-to-end tests to ensure that the new version works with your pipeline code and your stack. ZenML already has an [extensive test suite](https://github.com/zenml-io/zenml/tree/main/tests) that we use for releases and you can use it as an example. -- **Artifact Compatibility**: Be cautious with pickle-based [materializers](../../../how-to/data-artifact-management/handle-data-artifacts/handle-custom-data-types.md), as they can be sensitive to changes in Python versions or libraries. Consider using version-agnostic materialization methods for critical artifacts. You can try to load older artifacts with the new version of ZenML to see if they are compatible. Every artifact has an ID which you can use to load it in the following way: +- **Artifact Compatibility**: Be cautious with pickle-based [materializers](../../how-to/data-artifact-management/handle-data-artifacts/handle-custom-data-types.md), as they can be sensitive to changes in Python versions or libraries. Consider using version-agnostic materialization methods for critical artifacts. You can try to load older artifacts with the new version of ZenML to see if they are compatible. Every artifact has an ID which you can use to load it in the following way: ```python from zenml.client import Client @@ -59,7 +59,7 @@ loaded_artifact = artifact.load() ### Dependency Management -- **Python Version**: Make sure that the Python version you are using is compatible with the ZenML version you are upgrading to. Check out the [installation guide](../../../getting-started/installation.md) to find out which Python version is supported. +- **Python Version**: Make sure that the Python version you are using is compatible with the ZenML version you are upgrading to. Check out the [installation guide](../../getting-started/installation.md) to find out which Python version is supported. - **External Dependencies**: Be mindful of external dependencies (e.g. from integrations) that might be incompatible with the new version of ZenML. This could be the case when some older versions are no longer supported or maintained and the ZenML integration is updated to use a newer version. You can find this information in the [release notes](https://github.com/zenml-io/zenml/releases) for the new version of ZenML. ### Handling API Changes diff --git a/docs/book/how-to/project-setup-and-management/connecting-to-zenml/README.md b/docs/book/how-to/manage-zenml-server/connecting-to-zenml/README.md similarity index 100% rename from docs/book/how-to/project-setup-and-management/connecting-to-zenml/README.md rename to docs/book/how-to/manage-zenml-server/connecting-to-zenml/README.md diff --git a/docs/book/how-to/project-setup-and-management/connecting-to-zenml/connect-in-with-your-user-interactive.md b/docs/book/how-to/manage-zenml-server/connecting-to-zenml/connect-in-with-your-user-interactive.md similarity index 100% rename from docs/book/how-to/project-setup-and-management/connecting-to-zenml/connect-in-with-your-user-interactive.md rename to docs/book/how-to/manage-zenml-server/connecting-to-zenml/connect-in-with-your-user-interactive.md diff --git a/docs/book/how-to/project-setup-and-management/connecting-to-zenml/connect-with-a-service-account.md b/docs/book/how-to/manage-zenml-server/connecting-to-zenml/connect-with-a-service-account.md similarity index 100% rename from docs/book/how-to/project-setup-and-management/connecting-to-zenml/connect-with-a-service-account.md rename to docs/book/how-to/manage-zenml-server/connecting-to-zenml/connect-with-a-service-account.md diff --git a/docs/book/how-to/advanced-topics/manage-zenml-server/migration-guide/migration-guide.md b/docs/book/how-to/manage-zenml-server/migration-guide/migration-guide.md similarity index 100% rename from docs/book/how-to/advanced-topics/manage-zenml-server/migration-guide/migration-guide.md rename to docs/book/how-to/manage-zenml-server/migration-guide/migration-guide.md diff --git a/docs/book/how-to/advanced-topics/manage-zenml-server/migration-guide/migration-zero-forty.md b/docs/book/how-to/manage-zenml-server/migration-guide/migration-zero-forty.md similarity index 91% rename from docs/book/how-to/advanced-topics/manage-zenml-server/migration-guide/migration-zero-forty.md rename to docs/book/how-to/manage-zenml-server/migration-guide/migration-zero-forty.md index a8614bc02f..6fb472182b 100644 --- a/docs/book/how-to/advanced-topics/manage-zenml-server/migration-guide/migration-zero-forty.md +++ b/docs/book/how-to/manage-zenml-server/migration-guide/migration-zero-forty.md @@ -135,7 +135,7 @@ def my_pipeline(): {% endtab %} {% endtabs %} -Check out [this page](../../how-to/pipeline-development/build-pipelines/use-pipeline-step-parameters.md) for more information on how to parameterize your steps. +Check out [this page](../../pipeline-development/build-pipelines/use-pipeline-step-parameters.md) for more information on how to parameterize your steps. ## Calling a step outside of a pipeline @@ -353,7 +353,7 @@ loaded_model = model.load() {% endtab %} {% endtabs %} -Check out [this page](../../../model-management-metrics/track-metrics-metadata/fetch-metadata-within-steps.md) for more information on how to programmatically fetch information about previous pipeline runs. +Check out [this page](../../model-management-metrics/track-metrics-metadata/fetch-metadata-within-steps.md) for more information on how to programmatically fetch information about previous pipeline runs. ## Controlling the step execution order @@ -385,7 +385,7 @@ def my_pipeline(): {% endtab %} {% endtabs %} -Check out [this page](../../../pipeline-development/build-pipelines/control-execution-order-of-steps.md) for more information on how to control the step execution order. +Check out [this page](../../pipeline-development/build-pipelines/control-execution-order-of-steps.md) for more information on how to control the step execution order. ## Defining steps with multiple outputs @@ -424,7 +424,7 @@ def my_step() -> Tuple[ {% endtab %} {% endtabs %} -Check out [this page](../../../pipeline-development/build-pipelines/step-output-typing-and-annotation.md) for more information on how to annotate your step outputs. +Check out [this page](../../pipeline-development/build-pipelines/step-output-typing-and-annotation.md) for more information on how to annotate your step outputs. ## Accessing run information inside steps @@ -457,6 +457,6 @@ def my_step() -> Any: # New: StepContext is no longer an argument of the step {% endtab %} {% endtabs %} -Check out [this page](../../../model-management-metrics/track-metrics-metadata/fetch-metadata-within-steps.md) for more information on how to fetch run information inside your steps using `get_step_context()`. +Check out [this page](../../model-management-metrics/track-metrics-metadata/fetch-metadata-within-steps.md) for more information on how to fetch run information inside your steps using `get_step_context()`.
ZenML Scarf
diff --git a/docs/book/how-to/advanced-topics/manage-zenml-server/migration-guide/migration-zero-sixty.md b/docs/book/how-to/manage-zenml-server/migration-guide/migration-zero-sixty.md similarity index 99% rename from docs/book/how-to/advanced-topics/manage-zenml-server/migration-guide/migration-zero-sixty.md rename to docs/book/how-to/manage-zenml-server/migration-guide/migration-zero-sixty.md index a66b8480b0..60b5fc3cb9 100644 --- a/docs/book/how-to/advanced-topics/manage-zenml-server/migration-guide/migration-zero-sixty.md +++ b/docs/book/how-to/manage-zenml-server/migration-guide/migration-zero-sixty.md @@ -56,7 +56,7 @@ is still using `sqlalchemy` v1 and is incompatible with pydantic v2. As a solution, we have removed the dependencies of the `airflow` integration. Now, you can use ZenML to create your Airflow pipelines and use a separate environment to run them with Airflow. You can check the updated docs -[right here](../../../../component-guide/orchestrators/airflow.md). +[right here](../../../component-guide/orchestrators/airflow.md). ### AWS diff --git a/docs/book/how-to/advanced-topics/manage-zenml-server/migration-guide/migration-zero-thirty.md b/docs/book/how-to/manage-zenml-server/migration-guide/migration-zero-thirty.md similarity index 100% rename from docs/book/how-to/advanced-topics/manage-zenml-server/migration-guide/migration-zero-thirty.md rename to docs/book/how-to/manage-zenml-server/migration-guide/migration-zero-thirty.md diff --git a/docs/book/how-to/advanced-topics/manage-zenml-server/migration-guide/migration-zero-twenty.md b/docs/book/how-to/manage-zenml-server/migration-guide/migration-zero-twenty.md similarity index 99% rename from docs/book/how-to/advanced-topics/manage-zenml-server/migration-guide/migration-zero-twenty.md rename to docs/book/how-to/manage-zenml-server/migration-guide/migration-zero-twenty.md index d0334358d1..e44d4a54a6 100644 --- a/docs/book/how-to/advanced-topics/manage-zenml-server/migration-guide/migration-zero-twenty.md +++ b/docs/book/how-to/manage-zenml-server/migration-guide/migration-zero-twenty.md @@ -16,7 +16,7 @@ If you have updated to ZenML 0.20.0 by mistake or are experiencing issues with t High-level overview of the changes: -* [ZenML takes over the Metadata Store](migration-zero-twenty.md#zenml-takes-over-the-metadata-store-role) role. All information about your ZenML Stacks, pipelines, and artifacts is tracked by ZenML itself directly. If you are currently using remote Metadata Stores (e.g. deployed in cloud) in your stacks, you will probably need to replace them with a [ZenML server deployment](../../../../getting-started/deploying-zenml/README.md). +* [ZenML takes over the Metadata Store](migration-zero-twenty.md#zenml-takes-over-the-metadata-store-role) role. All information about your ZenML Stacks, pipelines, and artifacts is tracked by ZenML itself directly. If you are currently using remote Metadata Stores (e.g. deployed in cloud) in your stacks, you will probably need to replace them with a [ZenML server deployment](../../../getting-started/deploying-zenml/README.md). * the [new ZenML Dashboard](migration-zero-twenty.md#the-zenml-dashboard-is-now-available) is now available with all ZenML deployments. * [ZenML Profiles have been removed](migration-zero-twenty.md#removal-of-profiles-and-the-local-yaml-database) in favor of ZenML Projects. You need to [manually migrate your existing ZenML Profiles](migration-zero-twenty.md#-how-to-migrate-your-profiles) after the update. * the [configuration of Stack Components is now decoupled from their implementation](migration-zero-twenty.md#decoupling-stack-component-configuration-from-implementation). If you extended ZenML with custom stack component implementations, you may need to update the way they are registered in ZenML. @@ -24,7 +24,7 @@ High-level overview of the changes: ## ZenML takes over the Metadata Store role -ZenML can now run [as a server](../../../../getting-started/core-concepts.md#zenml-server-and-dashboard) that can be accessed via a REST API and also comes with a visual user interface (called the ZenML Dashboard). This server can be deployed in arbitrary environments (local, on-prem, via Docker, on AWS, GCP, Azure etc.) and supports user management, workspace scoping, and more. +ZenML can now run [as a server](../../../getting-started/core-concepts.md#zenml-server-and-dashboard) that can be accessed via a REST API and also comes with a visual user interface (called the ZenML Dashboard). This server can be deployed in arbitrary environments (local, on-prem, via Docker, on AWS, GCP, Azure etc.) and supports user management, workspace scoping, and more. The release introduces a series of commands to facilitate managing the lifecycle of the ZenML server and to access the pipeline and pipeline run information: diff --git a/docs/book/how-to/advanced-topics/manage-zenml-server/troubleshoot-your-deployed-server.md b/docs/book/how-to/manage-zenml-server/troubleshoot-your-deployed-server.md similarity index 100% rename from docs/book/how-to/advanced-topics/manage-zenml-server/troubleshoot-your-deployed-server.md rename to docs/book/how-to/manage-zenml-server/troubleshoot-your-deployed-server.md diff --git a/docs/book/how-to/advanced-topics/manage-zenml-server/upgrade-zenml-server.md b/docs/book/how-to/manage-zenml-server/upgrade-zenml-server.md similarity index 100% rename from docs/book/how-to/advanced-topics/manage-zenml-server/upgrade-zenml-server.md rename to docs/book/how-to/manage-zenml-server/upgrade-zenml-server.md diff --git a/docs/book/how-to/advanced-topics/manage-zenml-server/using-zenml-server-in-prod.md b/docs/book/how-to/manage-zenml-server/using-zenml-server-in-prod.md similarity index 95% rename from docs/book/how-to/advanced-topics/manage-zenml-server/using-zenml-server-in-prod.md rename to docs/book/how-to/manage-zenml-server/using-zenml-server-in-prod.md index 6ffadb6496..82bd3265d2 100644 --- a/docs/book/how-to/advanced-topics/manage-zenml-server/using-zenml-server-in-prod.md +++ b/docs/book/how-to/manage-zenml-server/using-zenml-server-in-prod.md @@ -44,7 +44,7 @@ To scale your ZenML server deployed as a service on ECS, you can follow the step - If you scroll down, you will see the "Service auto scaling - optional" section. - Here you can enable autoscaling and set the minimum and maximum number of tasks to run for your service and also the ECS service metric to use for scaling. -![Image showing autoscaling settings for a service](../../../.gitbook/assets/ecs_autoscaling.png) +![Image showing autoscaling settings for a service](../../.gitbook/assets/ecs_autoscaling.png) {% endtab %} @@ -60,7 +60,7 @@ To scale your ZenML server deployed on Cloud Run, you can follow the steps below - Scroll down to the "Revision auto-scaling" section. - Here you can set the minimum and maximum number of instances to run for your service. -![Image showing autoscaling settings for a service](../../../.gitbook/assets/cloudrun_autoscaling.png) +![Image showing autoscaling settings for a service](../../.gitbook/assets/cloudrun_autoscaling.png) {% endtab %} {% tab title="Docker Compose" %} @@ -159,7 +159,7 @@ sum by(namespace) (rate(container_cpu_usage_seconds_total{namespace=~"zenml.*"}[ This query would give you the CPU utilization of your server pods in all namespaces that start with `zenml`. The image below shows how this query would look like in Grafana. -![Image showing CPU utilization of ZenML server pods](../../../.gitbook/assets/grafana_dashboard.png) +![Image showing CPU utilization of ZenML server pods](../../.gitbook/assets/grafana_dashboard.png) {% endtab %} @@ -168,7 +168,7 @@ On ECS, you can utilize the [CloudWatch integration](https://docs.aws.amazon.com In the "Health and metrics" section of your ECS console, you should see metrics pertaining to your ZenML service like CPU utilization and Memory utilization. -![Image showing CPU utilization ECS](../../../.gitbook/assets/ecs_cpu_utilization.png) +![Image showing CPU utilization ECS](../../.gitbook/assets/ecs_cpu_utilization.png) {% endtab %} {% tab title="Cloud Run" %} @@ -176,7 +176,7 @@ In Cloud Run, you can utilize the [Cloud Monitoring integration](https://cloud.g The "Metrics" tab in the Cloud Run console will show you metrics like Container CPU utilization, Container memory utilization, and more. -![Image showing metrics in Cloud Run](../../../.gitbook/assets/cloudrun_metrics.png) +![Image showing metrics in Cloud Run](../../.gitbook/assets/cloudrun_metrics.png) {% endtab %} {% endtabs %} diff --git a/docs/book/how-to/infrastructure-deployment/configure-python-environments/README.md b/docs/book/how-to/pipeline-development/configure-python-environments/README.md similarity index 100% rename from docs/book/how-to/infrastructure-deployment/configure-python-environments/README.md rename to docs/book/how-to/pipeline-development/configure-python-environments/README.md diff --git a/docs/book/how-to/infrastructure-deployment/configure-python-environments/configure-the-server-environment.md b/docs/book/how-to/pipeline-development/configure-python-environments/configure-the-server-environment.md similarity index 100% rename from docs/book/how-to/infrastructure-deployment/configure-python-environments/configure-the-server-environment.md rename to docs/book/how-to/pipeline-development/configure-python-environments/configure-the-server-environment.md diff --git a/docs/book/how-to/infrastructure-deployment/configure-python-environments/handling-dependencies.md b/docs/book/how-to/pipeline-development/configure-python-environments/handling-dependencies.md similarity index 100% rename from docs/book/how-to/infrastructure-deployment/configure-python-environments/handling-dependencies.md rename to docs/book/how-to/pipeline-development/configure-python-environments/handling-dependencies.md diff --git a/docs/book/how-to/project-setup-and-management/develop-locally/README.md b/docs/book/how-to/pipeline-development/develop-locally/README.md similarity index 100% rename from docs/book/how-to/project-setup-and-management/develop-locally/README.md rename to docs/book/how-to/pipeline-development/develop-locally/README.md diff --git a/docs/book/how-to/project-setup-and-management/develop-locally/keep-your-dashboard-server-clean.md b/docs/book/how-to/pipeline-development/develop-locally/keep-your-dashboard-server-clean.md similarity index 100% rename from docs/book/how-to/project-setup-and-management/develop-locally/keep-your-dashboard-server-clean.md rename to docs/book/how-to/pipeline-development/develop-locally/keep-your-dashboard-server-clean.md diff --git a/docs/book/how-to/project-setup-and-management/develop-locally/local-prod-pipeline-variants.md b/docs/book/how-to/pipeline-development/develop-locally/local-prod-pipeline-variants.md similarity index 100% rename from docs/book/how-to/project-setup-and-management/develop-locally/local-prod-pipeline-variants.md rename to docs/book/how-to/pipeline-development/develop-locally/local-prod-pipeline-variants.md diff --git a/docs/book/how-to/advanced-topics/run-remote-notebooks/README.md b/docs/book/how-to/pipeline-development/run-remote-notebooks/README.md similarity index 100% rename from docs/book/how-to/advanced-topics/run-remote-notebooks/README.md rename to docs/book/how-to/pipeline-development/run-remote-notebooks/README.md diff --git a/docs/book/how-to/advanced-topics/run-remote-notebooks/limitations-of-defining-steps-in-notebook-cells.md b/docs/book/how-to/pipeline-development/run-remote-notebooks/limitations-of-defining-steps-in-notebook-cells.md similarity index 100% rename from docs/book/how-to/advanced-topics/run-remote-notebooks/limitations-of-defining-steps-in-notebook-cells.md rename to docs/book/how-to/pipeline-development/run-remote-notebooks/limitations-of-defining-steps-in-notebook-cells.md diff --git a/docs/book/how-to/advanced-topics/run-remote-notebooks/run-a-single-step-from-a-notebook.md b/docs/book/how-to/pipeline-development/run-remote-notebooks/run-a-single-step-from-a-notebook.md similarity index 100% rename from docs/book/how-to/advanced-topics/run-remote-notebooks/run-a-single-step-from-a-notebook.md rename to docs/book/how-to/pipeline-development/run-remote-notebooks/run-a-single-step-from-a-notebook.md diff --git a/docs/book/how-to/advanced-topics/training-with-gpus/README.md b/docs/book/how-to/pipeline-development/training-with-gpus/README.md similarity index 100% rename from docs/book/how-to/advanced-topics/training-with-gpus/README.md rename to docs/book/how-to/pipeline-development/training-with-gpus/README.md diff --git a/docs/book/how-to/advanced-topics/training-with-gpus/accelerate-distributed-training.md b/docs/book/how-to/pipeline-development/training-with-gpus/accelerate-distributed-training.md similarity index 100% rename from docs/book/how-to/advanced-topics/training-with-gpus/accelerate-distributed-training.md rename to docs/book/how-to/pipeline-development/training-with-gpus/accelerate-distributed-training.md diff --git a/docs/book/how-to/pipeline-development/trigger-pipelines/use-templates-python.md b/docs/book/how-to/pipeline-development/trigger-pipelines/use-templates-python.md index 61e3f459f6..a6275ad86a 100644 --- a/docs/book/how-to/pipeline-development/trigger-pipelines/use-templates-python.md +++ b/docs/book/how-to/pipeline-development/trigger-pipelines/use-templates-python.md @@ -110,7 +110,7 @@ def loads_data_and_triggers_training(): Read more about the [PipelineRunConfiguration](https://sdkdocs.zenml.io/latest/core_code_docs/core-config/#zenml.config.pipeline_run_configuration.PipelineRunConfiguration) and [`trigger_pipeline`](https://sdkdocs.zenml.io/latest/core_code_docs/core-client/#zenml.client.Client) function object in the [SDK Docs](https://sdkdocs.zenml.io/). -Read more about Unmaterialized Artifacts [here](../../data-artifact-management/handle-data-artifacts/unmaterialized-artifacts.md). +Read more about Unmaterialized Artifacts [here](../../data-artifact-management/complex-usecases/unmaterialized-artifacts.md).
ZenML Scarf
diff --git a/docs/book/how-to/pipeline-development/use-configuration-files/what-can-be-configured.md b/docs/book/how-to/pipeline-development/use-configuration-files/what-can-be-configured.md index 5816d6c767..5ec7c57f78 100644 --- a/docs/book/how-to/pipeline-development/use-configuration-files/what-can-be-configured.md +++ b/docs/book/how-to/pipeline-development/use-configuration-files/what-can-be-configured.md @@ -107,10 +107,10 @@ steps: These are boolean flags for various configurations: -* `enable_artifact_metadata`: Whether to [associate metadata with artifacts or not](../handle-data-artifacts/handle-custom-data-types.md#optional-which-metadata-to-extract-for-the-artifact). -* `enable_artifact_visualization`: Whether to [attach visualizations of artifacts](../visualize-artifacts/README.md). +* `enable_artifact_metadata`: Whether to [associate metadata with artifacts or not](../../data-artifact-management/handle-data-artifacts/handle-custom-data-types.md#optional-which-metadata-to-extract-for-the-artifact). +* `enable_artifact_visualization`: Whether to [attach visualizations of artifacts](../../data-artifact-management/visualize-artifacts/README.md). * `enable_cache`: Utilize [caching](../build-pipelines/control-caching-behavior.md) or not. -* `enable_step_logs`: Enable tracking [step logs](../control-logging/enable-or-disable-logs-storing.md). +* `enable_step_logs`: Enable tracking [step logs](../../control-logging/enable-or-disable-logs-storing.md). ```yaml enable_artifact_metadata: True diff --git a/docs/book/how-to/project-setup-and-management/collaborate-with-team/README.md b/docs/book/how-to/project-setup-and-management/collaborate-with-team/README.md new file mode 100644 index 0000000000..3ee43e702f --- /dev/null +++ b/docs/book/how-to/project-setup-and-management/collaborate-with-team/README.md @@ -0,0 +1,3 @@ +--- +icon: people-group +--- \ No newline at end of file diff --git a/docs/book/how-to/project-setup-and-management/setting-up-a-project-repository/access-management.md b/docs/book/how-to/project-setup-and-management/collaborate-with-team/access-management.md similarity index 100% rename from docs/book/how-to/project-setup-and-management/setting-up-a-project-repository/access-management.md rename to docs/book/how-to/project-setup-and-management/collaborate-with-team/access-management.md diff --git a/docs/book/how-to/project-setup-and-management/setting-up-a-project-repository/using-project-templates.md b/docs/book/how-to/project-setup-and-management/collaborate-with-team/project-templates/README.md similarity index 100% rename from docs/book/how-to/project-setup-and-management/setting-up-a-project-repository/using-project-templates.md rename to docs/book/how-to/project-setup-and-management/collaborate-with-team/project-templates/README.md diff --git a/docs/book/how-to/project-setup-and-management/setting-up-a-project-repository/create-your-own-template.md b/docs/book/how-to/project-setup-and-management/collaborate-with-team/project-templates/create-your-own-template.md similarity index 86% rename from docs/book/how-to/project-setup-and-management/setting-up-a-project-repository/create-your-own-template.md rename to docs/book/how-to/project-setup-and-management/collaborate-with-team/project-templates/create-your-own-template.md index 3f65354402..491b850d1a 100644 --- a/docs/book/how-to/project-setup-and-management/setting-up-a-project-repository/create-your-own-template.md +++ b/docs/book/how-to/project-setup-and-management/collaborate-with-team/project-templates/create-your-own-template.md @@ -37,7 +37,7 @@ Replace `v1.0.0` with the git tag of the version you want to use. That's it! Now you have your own ZenML project template that you can use to quickly set up new ML projects. Remember to keep your template up-to-date with the latest best practices and changes in your ML workflows. -Our [Production Guide](../../../user-guide/production-guide/README.md) documentation is built around the `E2E Batch` project template codes. Most examples will be based on it, so we highly recommend you to install the `e2e_batch` template with `--template-with-defaults` flag before diving deeper into this documentation section, so you can follow this guide along using your own local environment. +Our [Production Guide](../../../../user-guide/production-guide/README.md) documentation is built around the `E2E Batch` project template codes. Most examples will be based on it, so we highly recommend you to install the `e2e_batch` template with `--template-with-defaults` flag before diving deeper into this documentation section, so you can follow this guide along using your own local environment. ```bash mkdir e2e_batch diff --git a/docs/book/how-to/project-setup-and-management/setting-up-a-project-repository/shared-components-for-teams.md b/docs/book/how-to/project-setup-and-management/collaborate-with-team/shared-components-for-teams.md similarity index 100% rename from docs/book/how-to/project-setup-and-management/setting-up-a-project-repository/shared-components-for-teams.md rename to docs/book/how-to/project-setup-and-management/collaborate-with-team/shared-components-for-teams.md diff --git a/docs/book/how-to/project-setup-and-management/setting-up-a-project-repository/stacks-pipelines-models.md b/docs/book/how-to/project-setup-and-management/collaborate-with-team/stacks-pipelines-models.md similarity index 100% rename from docs/book/how-to/project-setup-and-management/setting-up-a-project-repository/stacks-pipelines-models.md rename to docs/book/how-to/project-setup-and-management/collaborate-with-team/stacks-pipelines-models.md diff --git a/docs/book/how-to/interact-with-secrets.md b/docs/book/how-to/project-setup-and-management/interact-with-secrets.md similarity index 100% rename from docs/book/how-to/interact-with-secrets.md rename to docs/book/how-to/project-setup-and-management/interact-with-secrets.md diff --git a/docs/book/reference/environment-variables.md b/docs/book/reference/environment-variables.md index a3f14338a3..c6452c26e4 100644 --- a/docs/book/reference/environment-variables.md +++ b/docs/book/reference/environment-variables.md @@ -17,7 +17,7 @@ Choose from `INFO`, `WARN`, `ERROR`, `CRITICAL`, `DEBUG`. ## Disable step logs -Usually, ZenML [stores step logs in the artifact store](../how-to/advanced-topics/control-logging/enable-or-disable-logs-storing.md), but this can sometimes cause performance bottlenecks, especially if the code utilizes progress bars. +Usually, ZenML [stores step logs in the artifact store](../how-to/control-logging/enable-or-disable-logs-storing.md), but this can sometimes cause performance bottlenecks, especially if the code utilizes progress bars. If you want to configure whether logged output from steps is stored or not, set the `ZENML_DISABLE_STEP_LOGS_STORAGE` environment variable to `true`. Note that this will mean that logs from your steps will no longer be stored and thus won't be visible on the dashboard anymore. @@ -81,7 +81,7 @@ If you wish to disable colorful logging, set the following environment variable: ZENML_LOGGING_COLORS_DISABLED=true ``` -Note that setting this on the [client environment](../how-to/infrastructure-deployment/configure-python-environments/README.md#client-environment-or-the-runner-environment) (e.g. your local machine which runs the pipeline) will automatically disable colorful logging on remote orchestrators. If you wish to disable it locally, but turn on for remote orchestrators, you can set the `ZENML_LOGGING_COLORS_DISABLED` environment variable in your orchestrator's environment as follows: +Note that setting this on the [client environment](../how-to/pipeline-development/configure-python-environments/README.md#client-environment-or-the-runner-environment) (e.g. your local machine which runs the pipeline) will automatically disable colorful logging on remote orchestrators. If you wish to disable it locally, but turn on for remote orchestrators, you can set the `ZENML_LOGGING_COLORS_DISABLED` environment variable in your orchestrator's environment as follows: ```python docker_settings = DockerSettings(environment={"ZENML_LOGGING_COLORS_DISABLED": "false"}) diff --git a/docs/book/reference/how-do-i.md b/docs/book/reference/how-do-i.md index d6cef2f9a0..4ac076dd43 100644 --- a/docs/book/reference/how-do-i.md +++ b/docs/book/reference/how-do-i.md @@ -21,7 +21,7 @@ From there, each of the custom stack component types has a dedicated section abo * **dependency clashes** mitigation with ZenML? -Check out [our dedicated documentation page](../how-to/infrastructure-deployment/configure-python-environments/handling-dependencies.md) on some ways you can try to solve these dependency and versioning issues. +Check out [our dedicated documentation page](../how-to/pipeline-development/configure-python-environments/handling-dependencies.md) on some ways you can try to solve these dependency and versioning issues. * **deploy cloud infrastructure** and/or MLOps stacks? diff --git a/docs/book/reference/python-client.md b/docs/book/reference/python-client.md index fad315545b..441f17d112 100644 --- a/docs/book/reference/python-client.md +++ b/docs/book/reference/python-client.md @@ -43,7 +43,7 @@ These are the main ZenML resources that you can interact with via the ZenML Clie * **Step Runs**: The steps of all pipeline runs. Mainly useful for directly fetching a specific step of a run by its ID. * **Artifacts**: Information about all artifacts that were written to your artifact stores as part of pipeline runs. * **Schedules**: Metadata about the schedules that you have used to [schedule pipeline runs](../how-to/pipeline-development/build-pipelines/schedule-a-pipeline.md). -* **Builds**: The pipeline-specific Docker images that were created when [containerizing your pipeline](../how-to/infrastructure-deployment/customize-docker-builds/README.md). +* **Builds**: The pipeline-specific Docker images that were created when [containerizing your pipeline](../how-to/customize-docker-builds/README.md). * **Code Repositories**: The git code repositories that you have connected with your ZenML instance. See [here](../user-guide/production-guide/connect-code-repository.md) for more information. {% hint style="info" %} @@ -59,7 +59,7 @@ Checkout the [documentation on fetching runs](../how-to/pipeline-development/bui * Integration-enabled flavors like the [Kubeflow orchestrator](../component-guide/orchestrators/kubeflow.md), * Custom flavors that you have [created yourself](../how-to/infrastructure-deployment/stack-deployment/implement-a-custom-stack-component.md). * **User**: The users registered in your ZenML instance. If you are running locally, there will only be a single `default` user. -* **Secrets**: The infrastructure authentication secrets that you have registered in the [ZenML Secret Store](../how-to/interact-with-secrets.md). +* **Secrets**: The infrastructure authentication secrets that you have registered in the [ZenML Secret Store](../how-to/project-setup-and-management/interact-with-secrets.md). * **Service Connectors**: The service connectors that you have set up to [connect ZenML to your infrastructure](../how-to/infrastructure-deployment/auth-management/README.md). ### Client Methods diff --git a/docs/book/toc.md b/docs/book/toc.md index aff3ce0c7b..193547242a 100644 --- a/docs/book/toc.md +++ b/docs/book/toc.md @@ -67,23 +67,33 @@ * [Evaluation for finetuning](user-guide/llmops-guide/finetuning-llms/evaluation-for-finetuning.md) * [Deploying finetuned models](user-guide/llmops-guide/finetuning-llms/deploying-finetuned-models.md) * [Next steps](user-guide/llmops-guide/finetuning-llms/next-steps.md) + ## How-To +* [Manage your ZenML server](how-to/manage-zenml-server/README.md) + * [Connect to a server](how-to/manage-zenml-server/connecting-to-zenml/README.md) + * [Connect in with your User (interactive)](how-to/manage-zenml-server/connecting-to-zenml/connect-in-with-your-user-interactive.md) + * [Connect with a Service Account](how-to/manage-zenml-server/connecting-to-zenml/connect-with-a-service-account.md) + * [Upgrade your ZenML server](how-to/manage-zenml-server/upgrade-zenml-server.md) + * [Best practices for upgrading ZenML](how-to/manage-zenml-server/best-practices-upgrading-zenml.md) + * [Using ZenML server in production](how-to/manage-zenml-server/using-zenml-server-in-prod.md) + * [Troubleshoot your ZenML server](how-to/manage-zenml-server/troubleshoot-your-deployed-server.md) + * [Migration guide](how-to/manage-zenml-server/migration-guide/migration-guide.md) + * [Migration guide 0.13.2 → 0.20.0](how-to/manage-zenml-server/migration-guide/migration-zero-twenty.md) + * [Migration guide 0.23.0 → 0.30.0](how-to/manage-zenml-server/migration-guide/migration-zero-thirty.md) + * [Migration guide 0.39.1 → 0.41.0](how-to/manage-zenml-server/migration-guide/migration-zero-forty.md) + * [Migration guide 0.58.2 → 0.60.0](how-to/manage-zenml-server/migration-guide/migration-zero-sixty.md) * [Project Setup and Management](how-to/project-setup-and-management/README.md) * [Set up a ZenML project](how-to/project-setup-and-management/setting-up-a-project-repository/README.md) * [Set up a repository](how-to/project-setup-and-management/setting-up-a-project-repository/set-up-repository.md) * [Connect your git repository](how-to/project-setup-and-management/setting-up-a-project-repository/connect-your-git-repository.md) - * [Project templates](how-to/project-setup-and-management/setting-up-a-project-repository/using-project-templates.md) - * [Create your own template](how-to/project-setup-and-management/setting-up-a-project-repository/create-your-own-template.md) - * [Shared components for teams](how-to/project-setup-and-management/setting-up-a-project-repository/shared-components-for-teams.md) - * [Stacks, pipelines and models](how-to/project-setup-and-management/setting-up-a-project-repository/stacks-pipelines-models.md) - * [Access management](how-to/project-setup-and-management/setting-up-a-project-repository/access-management.md) - * [Develop locally](how-to/project-setup-and-management/develop-locally/README.md) - * [Use config files to develop locally](how-to/project-setup-and-management/develop-locally/local-prod-pipeline-variants.md) - * [Keep your pipelines and dashboard clean](how-to/project-setup-and-management/develop-locally/keep-your-dashboard-server-clean.md) - * [Connect to a server](how-to/project-setup-and-management/connecting-to-zenml/README.md) - * [Connect in with your User (interactive)](how-to/project-setup-and-management/connecting-to-zenml/connect-in-with-your-user-interactive.md) - * [Connect with a Service Account](how-to/project-setup-and-management/connecting-to-zenml/connect-with-a-service-account.md) + * [Collaborate with your team](how-to/project-setup-and-management/collaborate-with-team/README.md) + * [Project templates](how-to/project-setup-and-management/collaborate-with-team/project-templates/README.md) + * [Create your own template](how-to/project-setup-and-management/collaborate-with-team/project-templates/create-your-own-template.md) + * [Shared components for teams](how-to/project-setup-and-management/collaborate-with-team/shared-components-for-teams.md) + * [Setting up Stacks, pipelines and models](how-to/project-setup-and-management/collaborate-with-team/stacks-pipelines-models.md) + * [Access management](how-to/project-setup-and-management/collaborate-with-team/access-management.md) + * [Interact with secrets](how-to/project-setup-and-management/interact-with-secrets.md) * [Pipeline Development](how-to/pipeline-development/README.md) * [Build a pipeline](how-to/pipeline-development/build-pipelines/README.md) * [Use pipeline/step parameters](how-to/pipeline-development/build-pipelines/use-pipeline-step-parameters.md) @@ -106,6 +116,9 @@ * [Run an individual step](how-to/pipeline-development/build-pipelines/run-an-individual-step.md) * [Fetching pipelines](how-to/pipeline-development/build-pipelines/fetching-pipelines.md) * [Get past pipeline/step runs](how-to/pipeline-development/build-pipelines/get-past-pipeline-step-runs.md) + * [Develop locally](how-to/pipeline-development/develop-locally/README.md) + * [Use config files to develop locally](how-to/pipeline-development/develop-locally/local-prod-pipeline-variants.md) + * [Keep your pipelines and dashboard clean](how-to/pipeline-development/develop-locally/keep-your-dashboard-server-clean.md) * [Trigger a pipeline](how-to/pipeline-development/trigger-pipelines/README.md) * [Use templates: Python SDK](how-to/pipeline-development/trigger-pipelines/use-templates-python.md) * [Use templates: CLI](how-to/pipeline-development/trigger-pipelines/use-templates-cli.md) @@ -118,8 +131,26 @@ * [Configuration hierarchy](how-to/pipeline-development/use-configuration-files/configuration-hierarchy.md) * [Find out which configuration was used for a run](how-to/pipeline-development/use-configuration-files/retrieve-used-configuration-of-a-run.md) * [Autogenerate a template yaml file](how-to/pipeline-development/use-configuration-files/autogenerate-a-template-yaml-file.md) + * [Train with GPUs](how-to/pipeline-development/training-with-gpus/README.md) + * [Distributed Training with 🤗 Accelerate](how-to/pipeline-development/training-with-gpus/accelerate-distributed-training.md) + * [Run remote pipelines from notebooks](how-to/pipeline-development/run-remote-notebooks/README.md) + * [Limitations of defining steps in notebook cells](how-to/pipeline-development/run-remote-notebooks/limitations-of-defining-steps-in-notebook-cells.md) + * [Run a single step from a notebook](how-to/pipeline-development/run-remote-notebooks/run-a-single-step-from-a-notebook.md) + * [Configure Python environments](how-to/pipeline-development/configure-python-environments/README.md) + * [Handling dependencies](how-to/pipeline-development/configure-python-environments/handling-dependencies.md) + * [Configure the server environment](how-to/pipeline-development/configure-python-environments/configure-the-server-environment.md) +* [Customize Docker builds](how-to/customize-docker-builds/README.md) + * [Docker settings on a pipeline](how-to/customize-docker-builds/docker-settings-on-a-pipeline.md) + * [Docker settings on a step](how-to/customize-docker-builds/docker-settings-on-a-step.md) + * [Use a prebuilt image for pipeline execution](how-to/customize-docker-builds/use-a-prebuilt-image.md) + * [Specify pip dependencies and apt packages](how-to/customize-docker-builds/specify-pip-dependencies-and-apt-packages.md) + * [How to use a private PyPI repository](how-to/customize-docker-builds/how-to-use-a-private-pypi-repository.md) + * [Use your own Dockerfiles](how-to/customize-docker-builds/use-your-own-docker-files.md) + * [Which files are built into the image](how-to/customize-docker-builds/which-files-are-built-into-the-image.md) + * [How to reuse builds](how-to/customize-docker-builds/how-to-reuse-builds.md) + * [Define where an image is built](how-to/customize-docker-builds/define-where-an-image-is-built.md) * [Data and Artifact Management](how-to/data-artifact-management/README.md) - * [Handle Data/Artifacts](how-to/data-artifact-management/handle-data-artifacts/README.md) + * [Understand ZenML artifacts](how-to/data-artifact-management/handle-data-artifacts/README.md) * [How ZenML stores data](how-to/data-artifact-management/handle-data-artifacts/artifact-versioning.md) * [Return multiple outputs from a step](how-to/data-artifact-management/handle-data-artifacts/return-multiple-outputs-from-a-step.md) * [Delete an artifact](how-to/data-artifact-management/handle-data-artifacts/delete-an-artifact.md) @@ -128,11 +159,12 @@ * [Get arbitrary artifacts in a step](how-to/data-artifact-management/handle-data-artifacts/get-arbitrary-artifacts-in-a-step.md) * [Handle custom data types](how-to/data-artifact-management/handle-data-artifacts/handle-custom-data-types.md) * [Load artifacts into memory](how-to/data-artifact-management/handle-data-artifacts/load-artifacts-into-memory.md) - * [Datasets in ZenML](how-to/data-artifact-management/handle-data-artifacts/datasets.md) - * [Manage big data](how-to/data-artifact-management/handle-data-artifacts/manage-big-data.md) - * [Skipping materialization](how-to/data-artifact-management/handle-data-artifacts/unmaterialized-artifacts.md) - * [Passing artifacts between pipelines](how-to/data-artifact-management/handle-data-artifacts/passing-artifacts-between-pipelines.md) - * [Register Existing Data as a ZenML Artifact](how-to/data-artifact-management/handle-data-artifacts/registering-existing-data.md) + * [Complex use-cases](how-to/data-artifact-management/complex-usecases/README.md) + * [Datasets in ZenML](how-to/data-artifact-management/complex-usecases/datasets.md) + * [Manage big data](how-to/data-artifact-management/complex-usecases/manage-big-data.md) + * [Skipping materialization](how-to/data-artifact-management/complex-usecases/unmaterialized-artifacts.md) + * [Passing artifacts between pipelines](how-to/data-artifact-management/complex-usecases/passing-artifacts-between-pipelines.md) + * [Register Existing Data as a ZenML Artifact](how-to/data-artifact-management/complex-usecases/registering-existing-data.md) * [Visualizing artifacts](how-to/data-artifact-management/visualize-artifacts/README.md) * [Default visualizations](how-to/data-artifact-management/visualize-artifacts/types-of-visualizations.md) * [Creating custom visualizations](how-to/data-artifact-management/visualize-artifacts/creating-custom-visualizations.md) @@ -158,7 +190,7 @@ * [Special Metadata Types](how-to/model-management-metrics/track-metrics-metadata/logging-metadata.md) * [Fetch metadata within steps](how-to/model-management-metrics/track-metrics-metadata/fetch-metadata-within-steps.md) * [Fetch metadata during pipeline composition](how-to/model-management-metrics/track-metrics-metadata/fetch-metadata-within-pipeline.md) -* [Infrastructure and Deployment](how-to/infrastructure-deployment/README.md) +* [Stack infrastructure and deployment](how-to/infrastructure-deployment/README.md) * [Manage stacks & components](how-to/infrastructure-deployment/stack-deployment/README.md) * [Deploy a cloud stack with ZenML](how-to/infrastructure-deployment/stack-deployment/deploy-a-cloud-stack.md) * [Deploy a cloud stack with Terraform](how-to/infrastructure-deployment/stack-deployment/deploy-a-cloud-stack-with-terraform.md) @@ -169,17 +201,7 @@ * [Infrastructure as code](how-to/infrastructure-deployment/infrastructure-as-code/README.md) * [Manage your stacks with Terraform](how-to/infrastructure-deployment/infrastructure-as-code/terraform-stack-management.md) * [ZenML & Terraform Best Practices](how-to/infrastructure-deployment/infrastructure-as-code/best-practices.md) - * [Customize Docker builds](how-to/infrastructure-deployment/customize-docker-builds/README.md) - * [Docker settings on a pipeline](how-to/infrastructure-deployment/customize-docker-builds/docker-settings-on-a-pipeline.md) - * [Docker settings on a step](how-to/infrastructure-deployment/customize-docker-builds/docker-settings-on-a-step.md) - * [Use a prebuilt image for pipeline execution](how-to/infrastructure-deployment/customize-docker-builds/use-a-prebuilt-image.md) - * [Specify pip dependencies and apt packages](how-to/infrastructure-deployment/customize-docker-builds/specify-pip-dependencies-and-apt-packages.md) - * [How to use a private PyPI repository](how-to/infrastructure-deployment/customize-docker-builds/how-to-use-a-private-pypi-repository.md) - * [Use your own Dockerfiles](how-to/infrastructure-deployment/customize-docker-builds/use-your-own-docker-files.md) - * [Which files are built into the image](how-to/infrastructure-deployment/customize-docker-builds/which-files-are-built-into-the-image.md) - * [How to reuse builds](how-to/infrastructure-deployment/customize-docker-builds/how-to-reuse-builds.md) - * [Define where an image is built](how-to/infrastructure-deployment/customize-docker-builds/define-where-an-image-is-built.md) - * [Connect services](how-to/infrastructure-deployment/auth-management/README.md) + * [Connect services via connectors](how-to/infrastructure-deployment/auth-management/README.md) * [Service Connectors guide](how-to/infrastructure-deployment/auth-management/service-connectors-guide.md) * [Security best practices](how-to/infrastructure-deployment/auth-management/best-security-practices.md) * [Docker Service Connector](how-to/infrastructure-deployment/auth-management/docker-service-connector.md) @@ -188,31 +210,12 @@ * [GCP Service Connector](how-to/infrastructure-deployment/auth-management/gcp-service-connector.md) * [Azure Service Connector](how-to/infrastructure-deployment/auth-management/azure-service-connector.md) * [HyperAI Service Connector](how-to/infrastructure-deployment/auth-management/hyperai-service-connector.md) - * [Configure Python environments](how-to/infrastructure-deployment/configure-python-environments/README.md) - * [Handling dependencies](how-to/infrastructure-deployment/configure-python-environments/handling-dependencies.md) - * [Configure the server environment](how-to/infrastructure-deployment/configure-python-environments/configure-the-server-environment.md) -* [Advanced Topics](how-to/advanced-topics/README.md) - * [Train with GPUs](how-to/advanced-topics/training-with-gpus/README.md) - * [Distributed Training with 🤗 Accelerate](how-to/advanced-topics/training-with-gpus/accelerate-distributed-training.md) - * [Run remote pipelines from notebooks](how-to/advanced-topics/run-remote-notebooks/README.md) - * [Limitations of defining steps in notebook cells](how-to/advanced-topics/run-remote-notebooks/limitations-of-defining-steps-in-notebook-cells.md) - * [Run a single step from a notebook](how-to/advanced-topics/run-remote-notebooks/run-a-single-step-from-a-notebook.md) - * [Manage your ZenML server](how-to/advanced-topics/manage-zenml-server/README.md) - * [Best practices for upgrading ZenML](how-to/advanced-topics/manage-zenml-server/best-practices-upgrading-zenml.md) - * [Upgrade your ZenML server](how-to/advanced-topics/manage-zenml-server/upgrade-zenml-server.md) - * [Using ZenML server in production](how-to/advanced-topics/manage-zenml-server/using-zenml-server-in-prod.md) - * [Troubleshoot your ZenML server](how-to/advanced-topics/manage-zenml-server/troubleshoot-your-deployed-server.md) - * [Migration guide](how-to/advanced-topics/manage-zenml-server/migration-guide/migration-guide.md) - * [Migration guide 0.13.2 → 0.20.0](how-to/advanced-topics/manage-zenml-server/migration-guide/migration-zero-twenty.md) - * [Migration guide 0.23.0 → 0.30.0](how-to/advanced-topics/manage-zenml-server/migration-guide/migration-zero-thirty.md) - * [Migration guide 0.39.1 → 0.41.0](how-to/advanced-topics/manage-zenml-server/migration-guide/migration-zero-forty.md) - * [Migration guide 0.58.2 → 0.60.0](how-to/advanced-topics/manage-zenml-server/migration-guide/migration-zero-sixty.md) - * [Control logging](how-to/advanced-topics/control-logging/README.md) - * [View logs on the dashboard](how-to/advanced-topics/control-logging/view-logs-on-the-dasbhoard.md) - * [Enable or disable logs storage](how-to/advanced-topics/control-logging/enable-or-disable-logs-storing.md) - * [Set logging verbosity](how-to/advanced-topics/control-logging/set-logging-verbosity.md) - * [Disable `rich` traceback output](how-to/advanced-topics/control-logging/disable-rich-traceback.md) - * [Disable colorful logging](how-to/advanced-topics/control-logging/disable-colorful-logging.md) +* [Control logging](how-to/control-logging/README.md) + * [View logs on the dashboard](how-to/control-logging/view-logs-on-the-dasbhoard.md) + * [Enable or disable logs storage](how-to/control-logging/enable-or-disable-logs-storing.md) + * [Set logging verbosity](how-to/control-logging/set-logging-verbosity.md) + * [Disable `rich` traceback output](how-to/control-logging/disable-rich-traceback.md) + * [Disable colorful logging](how-to/control-logging/disable-colorful-logging.md) * [Popular integrations](how-to/popular-integrations/README.md) * [Run on AWS](how-to/popular-integrations/aws-guide.md) * [Run on GCP](how-to/popular-integrations/gcp-guide.md) @@ -221,10 +224,9 @@ * [Kubernetes](how-to/popular-integrations/kubernetes.md) * [MLflow](how-to/popular-integrations/mlflow.md) * [Skypilot](how-to/popular-integrations/skypilot.md) -* [Interact with secrets](how-to/interact-with-secrets.md) -* [Debug and solve issues](how-to/debug-and-solve-issues.md) -* [Contribute to ZenML](how-to/contribute-to-zenml/README.md) +* [Contribute to/Extend ZenML](how-to/contribute-to-zenml/README.md) * [Implement a custom integration](how-to/contribute-to-zenml/implement-a-custom-integration.md) +* [Debug and solve issues](how-to/debug-and-solve-issues.md) ## Stack Components diff --git a/docs/book/user-guide/llmops-guide/finetuning-llms/finetuning-with-accelerate.md b/docs/book/user-guide/llmops-guide/finetuning-llms/finetuning-with-accelerate.md index 6f995f7439..def093ac5a 100644 --- a/docs/book/user-guide/llmops-guide/finetuning-llms/finetuning-with-accelerate.md +++ b/docs/book/user-guide/llmops-guide/finetuning-llms/finetuning-with-accelerate.md @@ -186,7 +186,7 @@ def finetuning_pipeline(...): ``` This configuration ensures that your training environment has all the necessary -components for distributed training. For more details, see the [Accelerate documentation](../../../how-to/advanced-topics/training-with-gpus/accelerate-distributed-training.md). +components for distributed training. For more details, see the [Accelerate documentation](../../../how-to/pipeline-development/training-with-gpus/accelerate-distributed-training.md). ## Dataset iteration diff --git a/docs/book/user-guide/production-guide/ci-cd.md b/docs/book/user-guide/production-guide/ci-cd.md index 7470bf9554..eee740d49a 100644 --- a/docs/book/user-guide/production-guide/ci-cd.md +++ b/docs/book/user-guide/production-guide/ci-cd.md @@ -69,8 +69,8 @@ This step is optional, all you'll need for certain is a stack that runs remotely storage). The rest is up to you. You might for example want to parametrize your pipeline to use different data sources for the respective environments. You can also use different [configuration files](../../how-to/configuring-zenml/configuring-zenml.md) for the different environments to configure the [Model](../../how-to/model-management-metrics/model-control-plane/README.md), the -[DockerSettings](../../how-to/infrastructure-deployment/customize-docker-builds/docker-settings-on-a-pipeline.md), the [ResourceSettings like -accelerators](../../how-to/advanced-topics/training-with-gpus/README.md) differently for the different environments. +[DockerSettings](../../how-to/customize-docker-builds/docker-settings-on-a-pipeline.md), the [ResourceSettings like +accelerators](../../how-to/pipeline-development/training-with-gpus/README.md) differently for the different environments. ### Trigger a pipeline on a Pull Request (Merge Request) diff --git a/docs/book/user-guide/production-guide/cloud-orchestration.md b/docs/book/user-guide/production-guide/cloud-orchestration.md index fae93eae61..107d5e9b62 100644 --- a/docs/book/user-guide/production-guide/cloud-orchestration.md +++ b/docs/book/user-guide/production-guide/cloud-orchestration.md @@ -27,7 +27,7 @@ for a shortcut on how to deploy & register a cloud stack. The easiest cloud orchestrator to start with is the [Skypilot](https://skypilot.readthedocs.io/) orchestrator running on a public cloud. The advantage of Skypilot is that it simply provisions a VM to execute the pipeline on your cloud provider. -Coupled with Skypilot, we need a mechanism to package your code and ship it to the cloud for Skypilot to do its thing. ZenML uses [Docker](https://www.docker.com/) to achieve this. Every time you run a pipeline with a remote orchestrator, [ZenML builds an image](../../how-to/setting-up-a-project-repository/connect-your-git-repository.md) for the entire pipeline (and optionally each step of a pipeline depending on your [configuration](../../how-to/infrastructure-deployment/customize-docker-builds/README.md)). This image contains the code, requirements, and everything else needed to run the steps of the pipeline in any environment. ZenML then pushes this image to the container registry configured in your stack, and the orchestrator pulls the image when it's ready to execute a step. +Coupled with Skypilot, we need a mechanism to package your code and ship it to the cloud for Skypilot to do its thing. ZenML uses [Docker](https://www.docker.com/) to achieve this. Every time you run a pipeline with a remote orchestrator, [ZenML builds an image](../../how-to/setting-up-a-project-repository/connect-your-git-repository.md) for the entire pipeline (and optionally each step of a pipeline depending on your [configuration](../../how-to/customize-docker-builds/README.md)). This image contains the code, requirements, and everything else needed to run the steps of the pipeline in any environment. ZenML then pushes this image to the container registry configured in your stack, and the orchestrator pulls the image when it's ready to execute a step. To summarize, here is the broad sequence of events that happen when you run a pipeline with such a cloud stack: diff --git a/docs/book/user-guide/production-guide/configure-pipeline.md b/docs/book/user-guide/production-guide/configure-pipeline.md index ea1b3d375f..cdfd95a261 100644 --- a/docs/book/user-guide/production-guide/configure-pipeline.md +++ b/docs/book/user-guide/production-guide/configure-pipeline.md @@ -148,7 +148,7 @@ steps: {% hint style="info" %} Read more about settings in ZenML [here](../../how-to/pipeline-development/use-configuration-files/runtime-configuration.md) and -[here](../../how-to/advanced-topics/training-with-gpus/README.md) +[here](../../how-to/pipeline-development/training-with-gpus/README.md) {% endhint %} Now let's run the pipeline again: @@ -159,6 +159,6 @@ python run.py --training-pipeline Now you should notice the machine that gets provisioned on your cloud provider would have a different configuration as compared to last time. As easy as that! -Bear in mind that not every orchestrator supports `ResourceSettings` directly. To learn more, you can read about [`ResourceSettings` here](../../how-to/pipeline-development/use-configuration-files/runtime-configuration.md), including the ability to [attach a GPU](../../how-to/advanced-topics/training-with-gpus/README.md#1-specify-a-cuda-enabled-parent-image-in-your-dockersettings). +Bear in mind that not every orchestrator supports `ResourceSettings` directly. To learn more, you can read about [`ResourceSettings` here](../../how-to/pipeline-development/use-configuration-files/runtime-configuration.md), including the ability to [attach a GPU](../../how-to/pipeline-development/training-with-gpus/README.md#1-specify-a-cuda-enabled-parent-image-in-your-dockersettings).
ZenML Scarf
diff --git a/docs/book/user-guide/production-guide/remote-storage.md b/docs/book/user-guide/production-guide/remote-storage.md index a3667e3732..27b2461b83 100644 --- a/docs/book/user-guide/production-guide/remote-storage.md +++ b/docs/book/user-guide/production-guide/remote-storage.md @@ -120,7 +120,7 @@ While you can go ahead and [run your pipeline on your stack](remote-storage.md#r First, let's understand what a service connector does. In simple words, a service connector contains credentials that grant stack components access to cloud infrastructure. These credentials are stored in the form of a -[secret](../../how-to/interact-with-secrets.md), +[secret](../../how-to/project-setup-and-management/interact-with-secrets.md), and are available to the ZenML server to use. Using these credentials, the service connector brokers a short-lived token and grants temporary permissions to the stack component to access that infrastructure. This diagram represents diff --git a/docs/book/user-guide/starter-guide/manage-artifacts.md b/docs/book/user-guide/starter-guide/manage-artifacts.md index d51939798b..e6464d41f0 100644 --- a/docs/book/user-guide/starter-guide/manage-artifacts.md +++ b/docs/book/user-guide/starter-guide/manage-artifacts.md @@ -370,7 +370,7 @@ The artifact produced from the preexisting data will have a `pathlib.Path` type, Even if an artifact is created and stored externally, it can be treated like any other artifact produced by ZenML steps - with all the functionalities described above! -For more details and use-cases check-out detailed docs page [Register Existing Data as a ZenML Artifact](../../how-to/data-artifact-management/handle-data-artifacts/registering-existing-data.md). +For more details and use-cases check-out detailed docs page [Register Existing Data as a ZenML Artifact](../../how-to/data-artifact-management/complex-usecases/registering-existing-data.md). ## Logging metadata for an artifact From b73b567d7d5bd9091d12977d5963804bb8235eb8 Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Thu, 12 Dec 2024 11:35:00 +0100 Subject: [PATCH 08/18] Add step run unique constraint (#3236) * Add step run unique constraint * Fix alembic order --- ...1d482b9e_add_step_run_unique_constraint.py | 37 +++++++++++++++++++ .../zen_stores/schemas/step_run_schemas.py | 9 ++++- src/zenml/zen_stores/sql_zen_store.py | 18 +++------ 3 files changed, 50 insertions(+), 14 deletions(-) create mode 100644 src/zenml/zen_stores/migrations/versions/26351d482b9e_add_step_run_unique_constraint.py diff --git a/src/zenml/zen_stores/migrations/versions/26351d482b9e_add_step_run_unique_constraint.py b/src/zenml/zen_stores/migrations/versions/26351d482b9e_add_step_run_unique_constraint.py new file mode 100644 index 0000000000..a9f1b31563 --- /dev/null +++ b/src/zenml/zen_stores/migrations/versions/26351d482b9e_add_step_run_unique_constraint.py @@ -0,0 +1,37 @@ +"""Add step run unique constraint [26351d482b9e]. + +Revision ID: 26351d482b9e +Revises: 0.71.0 +Create Date: 2024-12-03 11:46:57.541578 + +""" + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "26351d482b9e" +down_revision = "0.71.0" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + """Upgrade database schema and/or data, creating a new revision.""" + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("step_run", schema=None) as batch_op: + batch_op.create_unique_constraint( + "unique_step_name_for_pipeline_run", ["name", "pipeline_run_id"] + ) + + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade database schema and/or data back to the previous revision.""" + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("step_run", schema=None) as batch_op: + batch_op.drop_constraint( + "unique_step_name_for_pipeline_run", type_="unique" + ) + + # ### end Alembic commands ### diff --git a/src/zenml/zen_stores/schemas/step_run_schemas.py b/src/zenml/zen_stores/schemas/step_run_schemas.py index f878850515..ea01de1ab2 100644 --- a/src/zenml/zen_stores/schemas/step_run_schemas.py +++ b/src/zenml/zen_stores/schemas/step_run_schemas.py @@ -19,7 +19,7 @@ from uuid import UUID from pydantic import ConfigDict -from sqlalchemy import TEXT, Column, String +from sqlalchemy import TEXT, Column, String, UniqueConstraint from sqlalchemy.dialects.mysql import MEDIUMTEXT from sqlmodel import Field, Relationship, SQLModel @@ -67,6 +67,13 @@ class StepRunSchema(NamedSchema, RunMetadataInterface, table=True): """SQL Model for steps of pipeline runs.""" __tablename__ = "step_run" + __table_args__ = ( + UniqueConstraint( + "name", + "pipeline_run_id", + name="unique_step_name_for_pipeline_run", + ), + ) # Fields start_time: Optional[datetime] = Field(nullable=True) diff --git a/src/zenml/zen_stores/sql_zen_store.py b/src/zenml/zen_stores/sql_zen_store.py index bb3a77befb..464293515b 100644 --- a/src/zenml/zen_stores/sql_zen_store.py +++ b/src/zenml/zen_stores/sql_zen_store.py @@ -8167,25 +8167,17 @@ def create_run_step(self, step_run: StepRunRequest) -> StepRunResponse: f"with ID '{step_run.pipeline_run_id}' found." ) - # Check if the step name already exists in the pipeline run - existing_step_run = session.exec( - select(StepRunSchema) - .where(StepRunSchema.name == step_run.name) - .where( - StepRunSchema.pipeline_run_id == step_run.pipeline_run_id - ) - ).first() - if existing_step_run is not None: + step_schema = StepRunSchema.from_request(step_run) + session.add(step_schema) + try: + session.commit() + except IntegrityError: raise EntityExistsError( f"Unable to create step `{step_run.name}`: A step with " f"this name already exists in the pipeline run with ID " f"'{step_run.pipeline_run_id}'." ) - # Create the step - step_schema = StepRunSchema.from_request(step_run) - session.add(step_schema) - # Add logs entry for the step if exists if step_run.logs is not None: log_entry = LogsSchema( From df8d0a8490a996662cd2ed5a6cf998abe727ac89 Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Thu, 12 Dec 2024 16:35:15 +0100 Subject: [PATCH 09/18] Fix build reuse after stack updates (#3251) * Fix build reuse after stack updates * Tests * Linting * Add missing param to client method --- src/zenml/client.py | 7 +++ src/zenml/models/v2/core/pipeline_build.py | 52 +++++++++++++++++++++- src/zenml/pipelines/build_utils.py | 12 +++++ tests/unit/pipelines/test_build_utils.py | 20 ++++++++- 4 files changed, 88 insertions(+), 3 deletions(-) diff --git a/src/zenml/client.py b/src/zenml/client.py index 995f2d8bdb..8006a4285c 100644 --- a/src/zenml/client.py +++ b/src/zenml/client.py @@ -2663,11 +2663,13 @@ def list_builds( user_id: Optional[Union[str, UUID]] = None, pipeline_id: Optional[Union[str, UUID]] = None, stack_id: Optional[Union[str, UUID]] = None, + container_registry_id: Optional[Union[UUID, str]] = None, is_local: Optional[bool] = None, contains_code: Optional[bool] = None, zenml_version: Optional[str] = None, python_version: Optional[str] = None, checksum: Optional[str] = None, + stack_checksum: Optional[str] = None, hydrate: bool = False, ) -> Page[PipelineBuildResponse]: """List all builds. @@ -2684,11 +2686,14 @@ def list_builds( user_id: The id of the user to filter by. pipeline_id: The id of the pipeline to filter by. stack_id: The id of the stack to filter by. + container_registry_id: The id of the container registry to + filter by. is_local: Use to filter local builds. contains_code: Use to filter builds that contain code. zenml_version: The version of ZenML to filter by. python_version: The Python version to filter by. checksum: The build checksum to filter by. + stack_checksum: The stack checksum to filter by. hydrate: Flag deciding whether to hydrate the output model(s) by including metadata fields in the response. @@ -2707,11 +2712,13 @@ def list_builds( user_id=user_id, pipeline_id=pipeline_id, stack_id=stack_id, + container_registry_id=container_registry_id, is_local=is_local, contains_code=contains_code, zenml_version=zenml_version, python_version=python_version, checksum=checksum, + stack_checksum=stack_checksum, ) build_filter_model.set_scope_workspace(self.active_workspace.id) return self.zen_store.list_builds( diff --git a/src/zenml/models/v2/core/pipeline_build.py b/src/zenml/models/v2/core/pipeline_build.py index 3cb6dcb4e4..93c0ff63a8 100644 --- a/src/zenml/models/v2/core/pipeline_build.py +++ b/src/zenml/models/v2/core/pipeline_build.py @@ -14,7 +14,7 @@ """Models representing pipeline builds.""" import json -from typing import TYPE_CHECKING, Any, Dict, Optional, Union +from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Optional, Union from uuid import UUID from pydantic import Field @@ -31,6 +31,8 @@ from zenml.models.v2.misc.build_item import BuildItem if TYPE_CHECKING: + from sqlalchemy.sql.elements import ColumnElement + from zenml.models.v2.core.pipeline import PipelineResponse from zenml.models.v2.core.stack import StackResponse @@ -446,6 +448,11 @@ def contains_code(self) -> bool: class PipelineBuildFilter(WorkspaceScopedFilter): """Model to enable advanced filtering of all pipeline builds.""" + FILTER_EXCLUDE_FIELDS: ClassVar[List[str]] = [ + *WorkspaceScopedFilter.FILTER_EXCLUDE_FIELDS, + "container_registry_id", + ] + workspace_id: Optional[Union[UUID, str]] = Field( description="Workspace for this pipeline build.", default=None, @@ -462,7 +469,12 @@ class PipelineBuildFilter(WorkspaceScopedFilter): union_mode="left_to_right", ) stack_id: Optional[Union[UUID, str]] = Field( - description="Stack used for the Pipeline Run", + description="Stack associated with the pipeline build.", + default=None, + union_mode="left_to_right", + ) + container_registry_id: Optional[Union[UUID, str]] = Field( + description="Container registry associated with the pipeline build.", default=None, union_mode="left_to_right", ) @@ -484,3 +496,39 @@ class PipelineBuildFilter(WorkspaceScopedFilter): checksum: Optional[str] = Field( description="The build checksum.", default=None ) + stack_checksum: Optional[str] = Field( + description="The stack checksum.", default=None + ) + + def get_custom_filters( + self, + ) -> List["ColumnElement[bool]"]: + """Get custom filters. + + Returns: + A list of custom filters. + """ + custom_filters = super().get_custom_filters() + + from sqlmodel import and_ + + from zenml.enums import StackComponentType + from zenml.zen_stores.schemas import ( + PipelineBuildSchema, + StackComponentSchema, + StackCompositionSchema, + StackSchema, + ) + + if self.container_registry_id: + container_registry_filter = and_( + PipelineBuildSchema.stack_id == StackSchema.id, + StackSchema.id == StackCompositionSchema.stack_id, + StackCompositionSchema.component_id == StackComponentSchema.id, + StackComponentSchema.type + == StackComponentType.CONTAINER_REGISTRY.value, + StackComponentSchema.id == self.container_registry_id, + ) + custom_filters.append(container_registry_filter) + + return custom_filters diff --git a/src/zenml/pipelines/build_utils.py b/src/zenml/pipelines/build_utils.py index eacbd1d07d..810f8d5f17 100644 --- a/src/zenml/pipelines/build_utils.py +++ b/src/zenml/pipelines/build_utils.py @@ -249,6 +249,11 @@ def find_existing_build( client = Client() stack = client.active_stack + if not stack.container_registry: + # There can be no non-local builds that we can reuse if there is no + # container registry in the stack. + return None + python_version_prefix = ".".join(platform.python_version_tuple()[:2]) required_builds = stack.get_docker_builds(deployment=deployment) @@ -263,6 +268,13 @@ def find_existing_build( sort_by="desc:created", size=1, stack_id=stack.id, + # Until we implement stack versioning, users can still update their + # stack to update/remove the container registry. In that case, we might + # try to pull an image from a container registry that we don't have + # access to. This is why we add an additional check for the container + # registry ID here. (This is still not perfect as users can update the + # container registry URI or config, but the best we can do) + container_registry_id=stack.container_registry.id, # The build is local and it's not clear whether the images # exist on the current machine or if they've been overwritten. # TODO: Should we support this by storing the unique Docker ID for diff --git a/tests/unit/pipelines/test_build_utils.py b/tests/unit/pipelines/test_build_utils.py index 73684af306..de278fac77 100644 --- a/tests/unit/pipelines/test_build_utils.py +++ b/tests/unit/pipelines/test_build_utils.py @@ -518,7 +518,9 @@ def test_local_repo_verification( assert isinstance(code_repo, StubCodeRepository) -def test_finding_existing_build(mocker, sample_deployment_response_model): +def test_finding_existing_build( + mocker, sample_deployment_response_model, remote_container_registry +): """Tests finding an existing build.""" mock_list_builds = mocker.patch( "zenml.client.Client.list_builds", @@ -551,14 +553,30 @@ def test_finding_existing_build(mocker, sample_deployment_response_model): ], ) + build_utils.find_existing_build( + deployment=sample_deployment_response_model, + code_repository=StubCodeRepository(), + ) + # No container registry -> no non-local build to pull + mock_list_builds.assert_not_called() + + mocker.patch.object( + Stack, + "container_registry", + new_callable=mocker.PropertyMock, + return_value=remote_container_registry, + ) + build = build_utils.find_existing_build( deployment=sample_deployment_response_model, code_repository=StubCodeRepository(), ) + mock_list_builds.assert_called_once_with( sort_by="desc:created", size=1, stack_id=Client().active_stack.id, + container_registry_id=remote_container_registry.id, is_local=False, contains_code=False, zenml_version=zenml.__version__, From ec1ac38c985640634ef612c52c91f02897f6eb34 Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Thu, 12 Dec 2024 17:20:38 +0100 Subject: [PATCH 10/18] Fix fetching run template using the client (#3258) --- src/zenml/client.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/zenml/client.py b/src/zenml/client.py index 8006a4285c..3b50dc757a 100644 --- a/src/zenml/client.py +++ b/src/zenml/client.py @@ -3495,6 +3495,7 @@ def list_run_templates( logical_operator: LogicalOperators = LogicalOperators.AND, created: Optional[Union[datetime, str]] = None, updated: Optional[Union[datetime, str]] = None, + id: Optional[Union[UUID, str]] = None, name: Optional[str] = None, tag: Optional[str] = None, workspace_id: Optional[Union[str, UUID]] = None, @@ -3517,6 +3518,7 @@ def list_run_templates( logical_operator: Which logical operator to use [and, or]. created: Filter by the creation date. updated: Filter by the last updated date. + id: Filter by run template ID. name: Filter by run template name. tag: Filter by run template tags. workspace_id: Filter by workspace ID. @@ -3541,6 +3543,7 @@ def list_run_templates( logical_operator=logical_operator, created=created, updated=updated, + id=id, name=name, tag=tag, workspace_id=workspace_id, From 96034f99e063cf0295fabd126c27765260c6cf85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bar=C4=B1=C5=9F=20Can=20Durak?= <36421093+bcdurak@users.noreply.github.com> Date: Thu, 12 Dec 2024 17:34:33 +0100 Subject: [PATCH 11/18] Improved deprecation messages for artifact configs and run metadata (#3261) * improved deprecation messages * Update src/zenml/model/utils.py Co-authored-by: Alexej Penner * Update src/zenml/artifacts/utils.py Co-authored-by: Alexej Penner * fixed it again --------- Co-authored-by: Alexej Penner --- src/zenml/artifacts/artifact_config.py | 13 ++++++++----- src/zenml/artifacts/utils.py | 4 +++- src/zenml/model/utils.py | 4 +++- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/zenml/artifacts/artifact_config.py b/src/zenml/artifacts/artifact_config.py index 236d88ec9e..9aa578207d 100644 --- a/src/zenml/artifacts/artifact_config.py +++ b/src/zenml/artifacts/artifact_config.py @@ -104,15 +104,18 @@ def _remove_old_attributes(cls, data: Dict[str, Any]) -> Dict[str, Any]: ) elif is_model_artifact: logger.warning( - "`ArtifactConfig.is_model_artifact` is deprecated and will be " - "removed soon. Use `ArtifactConfig.artifact_type` instead." + "`ArtifactConfig(..., is_model_artifact=True)` is deprecated " + "and will be removed soon. Use `ArtifactConfig(..., " + "artifact_type=ArtifactType.MODEL)` instead. For more info: " + "https://docs.zenml.io/user-guide/starter-guide/manage-artifacts" ) data.setdefault("artifact_type", ArtifactType.MODEL) elif is_deployment_artifact: logger.warning( - "`ArtifactConfig.is_deployment_artifact` is deprecated and " - "will be removed soon. Use `ArtifactConfig.artifact_type` " - "instead." + "`ArtifactConfig(..., is_deployment_artifact=True)` is " + "deprecated and will be removed soon. Use `ArtifactConfig(..., " + "artifact_type=ArtifactType.SERVICE)` instead. For more info: " + "https://docs.zenml.io/user-guide/starter-guide/manage-artifacts" ) data.setdefault("artifact_type", ArtifactType.SERVICE) diff --git a/src/zenml/artifacts/utils.py b/src/zenml/artifacts/utils.py index 2573964aa7..7930acc5c7 100644 --- a/src/zenml/artifacts/utils.py +++ b/src/zenml/artifacts/utils.py @@ -414,7 +414,9 @@ def log_artifact_metadata( """ logger.warning( "The `log_artifact_metadata` function is deprecated and will soon be " - "removed. Please use `log_metadata` instead." + "removed. Instead, you can consider using: " + "`log_metadata(metadata={...}, infer_artifact=True, ...)` instead. For more " + "info: https://docs.zenml.io/how-to/model-management-metrics/track-metrics-metadata/attach-metadata-to-an-artifact" ) from zenml import log_metadata diff --git a/src/zenml/model/utils.py b/src/zenml/model/utils.py index a3612fc2c1..2c87d83b6d 100644 --- a/src/zenml/model/utils.py +++ b/src/zenml/model/utils.py @@ -56,7 +56,9 @@ def log_model_metadata( """ logger.warning( "The `log_model_metadata` function is deprecated and will soon be " - "removed. Please use `log_metadata` instead." + "removed. Instead, you can consider using: " + "`log_metadata(metadata={...}, infer_model=True)` instead. For more " + "info: https://docs.zenml.io/how-to/model-management-metrics/track-metrics-metadata/attach-metadata-to-a-model" ) from zenml import log_metadata From f738d66e1c50062e506135b49375753acd2aac65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bar=C4=B1=C5=9F=20Can=20Durak?= <36421093+bcdurak@users.noreply.github.com> Date: Fri, 13 Dec 2024 09:04:26 +0100 Subject: [PATCH 12/18] Filtering and sorting (#3230) * first checkpoint * second checkpoint * formatting and linting * Auto-update of LLM Finetuning template * Auto-update of Starter template * Auto-update of E2E template * Auto-update of NLP template * Auto-update of LLM Finetuning template * Auto-update of Starter template * Auto-update of E2E template * Auto-update of NLP template * formatting * fixing the failing cli integration tests * adjusting the models * consistency check * formatting --------- Co-authored-by: GitHub Actions --- src/zenml/client.py | 46 ++- .../kubernetes_step_operator.py | 1 - src/zenml/models/v2/base/filter.py | 20 +- src/zenml/models/v2/base/scoped.py | 263 +++++++++++++++++- src/zenml/models/v2/core/artifact_version.py | 41 +-- src/zenml/models/v2/core/code_repository.py | 13 +- src/zenml/models/v2/core/component.py | 51 +--- src/zenml/models/v2/core/flavor.py | 12 +- src/zenml/models/v2/core/model.py | 58 +--- src/zenml/models/v2/core/model_version.py | 34 +-- .../models/v2/core/model_version_artifact.py | 14 +- .../v2/core/model_version_pipeline_run.py | 17 +- src/zenml/models/v2/core/pipeline.py | 96 +++---- src/zenml/models/v2/core/pipeline_build.py | 31 ++- .../models/v2/core/pipeline_deployment.py | 10 - src/zenml/models/v2/core/pipeline_run.py | 120 ++++++-- src/zenml/models/v2/core/run_template.py | 50 ++-- src/zenml/models/v2/core/schedule.py | 10 - src/zenml/models/v2/core/secret.py | 14 - src/zenml/models/v2/core/service.py | 25 +- src/zenml/models/v2/core/service_connector.py | 11 - src/zenml/models/v2/core/stack.py | 51 ++-- src/zenml/models/v2/core/step_run.py | 32 ++- src/zenml/models/v2/core/trigger.py | 22 +- src/zenml/zen_stores/sql_zen_store.py | 61 +--- 25 files changed, 609 insertions(+), 494 deletions(-) diff --git a/src/zenml/client.py b/src/zenml/client.py index 3b50dc757a..0441db7b97 100644 --- a/src/zenml/client.py +++ b/src/zenml/client.py @@ -1702,6 +1702,7 @@ def list_services( updated: Optional[datetime] = None, type: Optional[str] = None, flavor: Optional[str] = None, + user: Optional[Union[UUID, str]] = None, workspace_id: Optional[Union[str, UUID]] = None, user_id: Optional[Union[str, UUID]] = None, hydrate: bool = False, @@ -1727,6 +1728,7 @@ def list_services( flavor: Use the service flavor for filtering workspace_id: The id of the workspace to filter by. user_id: The id of the user to filter by. + user: Filter by user name/ID. hydrate: Flag deciding whether to hydrate the output model(s) by including metadata fields in the response. running: Use the running status for filtering @@ -1753,6 +1755,7 @@ def list_services( flavor=flavor, workspace_id=workspace_id, user_id=user_id, + user=user, running=running, name=service_name, pipeline_name=pipeline_name, @@ -2249,6 +2252,7 @@ def list_flavors( type: Optional[str] = None, integration: Optional[str] = None, user_id: Optional[Union[str, UUID]] = None, + user: Optional[Union[UUID, str]] = None, hydrate: bool = False, ) -> Page[FlavorResponse]: """Fetches all the flavor models. @@ -2262,6 +2266,7 @@ def list_flavors( created: Use to flavors by time of creation updated: Use the last updated date for filtering user_id: The id of the user to filter by. + user: Filter by user name/ID. name: The name of the flavor to filter by. type: The type of the flavor to filter by. integration: The integration of the flavor to filter by. @@ -2277,6 +2282,7 @@ def list_flavors( sort_by=sort_by, logical_operator=logical_operator, user_id=user_id, + user=user, name=name, type=type, integration=integration, @@ -2661,6 +2667,7 @@ def list_builds( updated: Optional[Union[datetime, str]] = None, workspace_id: Optional[Union[str, UUID]] = None, user_id: Optional[Union[str, UUID]] = None, + user: Optional[Union[UUID, str]] = None, pipeline_id: Optional[Union[str, UUID]] = None, stack_id: Optional[Union[str, UUID]] = None, container_registry_id: Optional[Union[UUID, str]] = None, @@ -2684,6 +2691,7 @@ def list_builds( updated: Use the last updated date for filtering workspace_id: The id of the workspace to filter by. user_id: The id of the user to filter by. + user: Filter by user name/ID. pipeline_id: The id of the pipeline to filter by. stack_id: The id of the stack to filter by. container_registry_id: The id of the container registry to @@ -2710,6 +2718,7 @@ def list_builds( updated=updated, workspace_id=workspace_id, user_id=user_id, + user=user, pipeline_id=pipeline_id, stack_id=stack_id, container_registry_id=container_registry_id, @@ -2778,7 +2787,7 @@ def get_event_source( allow_name_prefix_match: bool = True, hydrate: bool = True, ) -> EventSourceResponse: - """Get a event source by name, ID or prefix. + """Get an event source by name, ID or prefix. Args: name_id_or_prefix: The name, ID or prefix of the stack. @@ -2811,6 +2820,7 @@ def list_event_sources( event_source_type: Optional[str] = None, workspace_id: Optional[Union[str, UUID]] = None, user_id: Optional[Union[str, UUID]] = None, + user: Optional[Union[UUID, str]] = None, hydrate: bool = False, ) -> Page[EventSourceResponse]: """Lists all event_sources. @@ -2825,6 +2835,7 @@ def list_event_sources( updated: Use the last updated date for filtering workspace_id: The id of the workspace to filter by. user_id: The id of the user to filter by. + user: Filter by user name/ID. name: The name of the event_source to filter by. flavor: The flavor of the event_source to filter by. event_source_type: The subtype of the event_source to filter by. @@ -2841,6 +2852,7 @@ def list_event_sources( logical_operator=logical_operator, workspace_id=workspace_id, user_id=user_id, + user=user, name=name, flavor=flavor, plugin_subtype=event_source_type, @@ -3008,6 +3020,7 @@ def list_actions( action_type: Optional[str] = None, workspace_id: Optional[Union[str, UUID]] = None, user_id: Optional[Union[str, UUID]] = None, + user: Optional[Union[UUID, str]] = None, hydrate: bool = False, ) -> Page[ActionResponse]: """List actions. @@ -3022,6 +3035,7 @@ def list_actions( updated: Use the last updated date for filtering workspace_id: The id of the workspace to filter by. user_id: The id of the user to filter by. + user: Filter by user name/ID. name: The name of the action to filter by. flavor: The flavor of the action to filter by. action_type: The type of the action to filter by. @@ -3038,6 +3052,7 @@ def list_actions( logical_operator=logical_operator, workspace_id=workspace_id, user_id=user_id, + user=user, name=name, id=id, flavor=flavor, @@ -3186,6 +3201,7 @@ def list_triggers( action_subtype: Optional[str] = None, workspace_id: Optional[Union[str, UUID]] = None, user_id: Optional[Union[str, UUID]] = None, + user: Optional[Union[UUID, str]] = None, hydrate: bool = False, ) -> Page[TriggerResponse]: """Lists all triggers. @@ -3200,6 +3216,7 @@ def list_triggers( updated: Use the last updated date for filtering workspace_id: The id of the workspace to filter by. user_id: The id of the user to filter by. + user: Filter by user name/ID. name: The name of the trigger to filter by. event_source_id: The event source associated with the trigger. action_id: The action associated with the trigger. @@ -3222,6 +3239,7 @@ def list_triggers( logical_operator=logical_operator, workspace_id=workspace_id, user_id=user_id, + user=user, name=name, event_source_id=event_source_id, action_id=action_id, @@ -3372,6 +3390,7 @@ def list_deployments( updated: Optional[Union[datetime, str]] = None, workspace_id: Optional[Union[str, UUID]] = None, user_id: Optional[Union[str, UUID]] = None, + user: Optional[Union[UUID, str]] = None, pipeline_id: Optional[Union[str, UUID]] = None, stack_id: Optional[Union[str, UUID]] = None, build_id: Optional[Union[str, UUID]] = None, @@ -3390,6 +3409,7 @@ def list_deployments( updated: Use the last updated date for filtering workspace_id: The id of the workspace to filter by. user_id: The id of the user to filter by. + user: Filter by user name/ID. pipeline_id: The id of the pipeline to filter by. stack_id: The id of the stack to filter by. build_id: The id of the build to filter by. @@ -3410,6 +3430,7 @@ def list_deployments( updated=updated, workspace_id=workspace_id, user_id=user_id, + user=user, pipeline_id=pipeline_id, stack_id=stack_id, build_id=build_id, @@ -3660,6 +3681,7 @@ def list_schedules( name: Optional[str] = None, workspace_id: Optional[Union[str, UUID]] = None, user_id: Optional[Union[str, UUID]] = None, + user: Optional[Union[UUID, str]] = None, pipeline_id: Optional[Union[str, UUID]] = None, orchestrator_id: Optional[Union[str, UUID]] = None, active: Optional[Union[str, bool]] = None, @@ -3684,6 +3706,7 @@ def list_schedules( name: The name of the stack to filter by. workspace_id: The id of the workspace to filter by. user_id: The id of the user to filter by. + user: Filter by user name/ID. pipeline_id: The id of the pipeline to filter by. orchestrator_id: The id of the orchestrator to filter by. active: Use to filter by active status. @@ -3710,6 +3733,7 @@ def list_schedules( name=name, workspace_id=workspace_id, user_id=user_id, + user=user, pipeline_id=pipeline_id, orchestrator_id=orchestrator_id, active=active, @@ -3950,6 +3974,7 @@ def list_run_steps( original_step_run_id: Optional[Union[str, UUID]] = None, workspace_id: Optional[Union[str, UUID]] = None, user_id: Optional[Union[str, UUID]] = None, + user: Optional[Union[UUID, str]] = None, model_version_id: Optional[Union[str, UUID]] = None, model: Optional[Union[UUID, str]] = None, hydrate: bool = False, @@ -3968,6 +3993,7 @@ def list_run_steps( end_time: Use to filter by the time when the step finished running workspace_id: The id of the workspace to filter by. user_id: The id of the user to filter by. + user: Filter by user name/ID. pipeline_run_id: The id of the pipeline run to filter by. deployment_id: The id of the deployment to filter by. original_step_run_id: The id of the original step run to filter by. @@ -4002,6 +4028,7 @@ def list_run_steps( name=name, workspace_id=workspace_id, user_id=user_id, + user=user, model_version_id=model_version_id, model=model, ) @@ -4674,6 +4701,7 @@ def list_secrets( scope: Optional[SecretScope] = None, workspace_id: Optional[Union[str, UUID]] = None, user_id: Optional[Union[str, UUID]] = None, + user: Optional[Union[UUID, str]] = None, hydrate: bool = False, ) -> Page[SecretResponse]: """Fetches all the secret models. @@ -4693,6 +4721,7 @@ def list_secrets( scope: The scope of the secret to filter by. workspace_id: The id of the workspace to filter by. user_id: The id of the user to filter by. + user: Filter by user name/ID. hydrate: Flag deciding whether to hydrate the output model(s) by including metadata fields in the response. @@ -4709,6 +4738,7 @@ def list_secrets( sort_by=sort_by, logical_operator=logical_operator, user_id=user_id, + user=user, workspace_id=workspace_id, name=name, scope=scope, @@ -5023,6 +5053,7 @@ def list_code_repositories( name: Optional[str] = None, workspace_id: Optional[Union[str, UUID]] = None, user_id: Optional[Union[str, UUID]] = None, + user: Optional[Union[UUID, str]] = None, hydrate: bool = False, ) -> Page[CodeRepositoryResponse]: """List all code repositories. @@ -5038,6 +5069,7 @@ def list_code_repositories( name: The name of the code repository to filter by. workspace_id: The id of the workspace to filter by. user_id: The id of the user to filter by. + user: Filter by user name/ID. hydrate: Flag deciding whether to hydrate the output model(s) by including metadata fields in the response. @@ -5055,6 +5087,7 @@ def list_code_repositories( name=name, workspace_id=workspace_id, user_id=user_id, + user=user, ) filter_model.set_scope_workspace(self.active_workspace.id) return self.zen_store.list_code_repositories( @@ -5415,6 +5448,7 @@ def list_service_connectors( resource_id: Optional[str] = None, workspace_id: Optional[Union[str, UUID]] = None, user_id: Optional[Union[str, UUID]] = None, + user: Optional[Union[UUID, str]] = None, labels: Optional[Dict[str, Optional[str]]] = None, secret_id: Optional[Union[str, UUID]] = None, hydrate: bool = False, @@ -5437,6 +5471,7 @@ def list_service_connectors( they can give access to. workspace_id: The id of the workspace to filter by. user_id: The id of the user to filter by. + user: Filter by user name/ID. name: The name of the service connector to filter by. labels: The labels of the service connector to filter by. secret_id: Filter by the id of the secret that is referenced by the @@ -5454,6 +5489,7 @@ def list_service_connectors( logical_operator=logical_operator, workspace_id=workspace_id or self.active_workspace.id, user_id=user_id, + user=user, name=name, connector_type=connector_type, auth_method=auth_method, @@ -6606,6 +6642,7 @@ def list_authorized_devices( client_id: Union[UUID, str, None] = None, status: Union[OAuthDeviceStatus, str, None] = None, trusted_device: Union[bool, str, None] = None, + user: Optional[Union[UUID, str]] = None, failed_auth_attempts: Union[int, str, None] = None, last_login: Optional[Union[datetime, str, None]] = None, hydrate: bool = False, @@ -6623,6 +6660,7 @@ def list_authorized_devices( expires: Use the expiration date for filtering. client_id: Use the client id for filtering. status: Use the status for filtering. + user: Filter by user name/ID. trusted_device: Use the trusted device flag for filtering. failed_auth_attempts: Use the failed auth attempts for filtering. last_login: Use the last login date for filtering. @@ -6642,6 +6680,7 @@ def list_authorized_devices( updated=updated, expires=expires, client_id=client_id, + user=user, status=status, trusted_device=trusted_device, failed_auth_attempts=failed_auth_attempts, @@ -6740,7 +6779,7 @@ def get_trigger_execution( trigger_execution_id: UUID, hydrate: bool = True, ) -> TriggerExecutionResponse: - """Get an trigger execution by ID. + """Get a trigger execution by ID. Args: trigger_execution_id: The ID of the trigger execution to get. @@ -6761,6 +6800,7 @@ def list_trigger_executions( size: int = PAGE_SIZE_DEFAULT, logical_operator: LogicalOperators = LogicalOperators.AND, trigger_id: Optional[UUID] = None, + user: Optional[Union[UUID, str]] = None, hydrate: bool = False, ) -> Page[TriggerExecutionResponse]: """List all trigger executions matching the given filter criteria. @@ -6771,6 +6811,7 @@ def list_trigger_executions( size: The maximum size of all pages. logical_operator: Which logical operator to use [and, or]. trigger_id: ID of the trigger to filter by. + user: Filter by user name/ID. hydrate: Flag deciding whether to hydrate the output model(s) by including metadata fields in the response. @@ -6782,6 +6823,7 @@ def list_trigger_executions( sort_by=sort_by, page=page, size=size, + user=user, logical_operator=logical_operator, ) filter_model.set_scope_workspace(self.active_workspace.id) diff --git a/src/zenml/integrations/kubernetes/step_operators/kubernetes_step_operator.py b/src/zenml/integrations/kubernetes/step_operators/kubernetes_step_operator.py index 0b7b01b546..52b19af2af 100644 --- a/src/zenml/integrations/kubernetes/step_operators/kubernetes_step_operator.py +++ b/src/zenml/integrations/kubernetes/step_operators/kubernetes_step_operator.py @@ -33,7 +33,6 @@ from zenml.step_operators import BaseStepOperator if TYPE_CHECKING: - from zenml.config.base_settings import BaseSettings from zenml.config.step_run_info import StepRunInfo from zenml.models import PipelineDeploymentBase diff --git a/src/zenml/models/v2/base/filter.py b/src/zenml/models/v2/base/filter.py index 1c4d2cccfb..1b79696134 100644 --- a/src/zenml/models/v2/base/filter.py +++ b/src/zenml/models/v2/base/filter.py @@ -436,7 +436,6 @@ class BaseFilter(BaseModel): le=PAGE_SIZE_MAXIMUM, description="Page size", ) - id: Optional[Union[UUID, str]] = Field( default=None, description="Id for this resource", @@ -491,13 +490,13 @@ def validate_sort_by(cls, value: Any) -> Any: ) value = column - if column in cls.FILTER_EXCLUDE_FIELDS: + if column in cls.CUSTOM_SORTING_OPTIONS: + return value + elif column in cls.FILTER_EXCLUDE_FIELDS: raise ValueError( f"This resource can not be sorted by this field: '{value}'" ) - elif column in cls.model_fields: - return value - elif column in cls.CUSTOM_SORTING_OPTIONS: + if column in cls.model_fields: return value else: raise ValueError( @@ -759,7 +758,7 @@ def offset(self) -> int: return self.size * (self.page - 1) def generate_filter( - self, table: Type[SQLModel] + self, table: Type["AnySchema"] ) -> Union["ColumnElement[bool]"]: """Generate the filter for the query. @@ -779,7 +778,7 @@ def generate_filter( filters.append( column_filter.generate_query_conditions(table=table) ) - for custom_filter in self.get_custom_filters(): + for custom_filter in self.get_custom_filters(table): filters.append(custom_filter) if self.logical_operator == LogicalOperators.OR: return or_(False, *filters) @@ -788,12 +787,17 @@ def generate_filter( else: raise RuntimeError("No valid logical operator was supplied.") - def get_custom_filters(self) -> List["ColumnElement[bool]"]: + def get_custom_filters( + self, table: Type["AnySchema"] + ) -> List["ColumnElement[bool]"]: """Get custom filters. This can be overridden by subclasses to define custom filters that are not based on the columns of the underlying table. + Args: + table: The query table. + Returns: A list of custom filters. """ diff --git a/src/zenml/models/v2/base/scoped.py b/src/zenml/models/v2/base/scoped.py index f563b6dc81..f5267f4840 100644 --- a/src/zenml/models/v2/base/scoped.py +++ b/src/zenml/models/v2/base/scoped.py @@ -23,6 +23,7 @@ Optional, Type, TypeVar, + Union, ) from uuid import UUID @@ -151,16 +152,32 @@ class UserScopedFilter(BaseFilter): FILTER_EXCLUDE_FIELDS: ClassVar[List[str]] = [ *BaseFilter.FILTER_EXCLUDE_FIELDS, + "user", "scope_user", ] CLI_EXCLUDE_FIELDS: ClassVar[List[str]] = [ *BaseFilter.CLI_EXCLUDE_FIELDS, + "user_id", "scope_user", ] + CUSTOM_SORTING_OPTIONS: ClassVar[List[str]] = [ + *BaseFilter.CUSTOM_SORTING_OPTIONS, + "user", + ] + scope_user: Optional[UUID] = Field( default=None, description="The user to scope this query to.", ) + user_id: Optional[Union[UUID, str]] = Field( + default=None, + description="UUID of the user that created the entity.", + union_mode="left_to_right", + ) + user: Optional[Union[UUID, str]] = Field( + default=None, + description="Name/ID of the user that created the entity.", + ) def set_scope_user(self, user_id: UUID) -> None: """Set the user that is performing the filtering to scope the response. @@ -170,6 +187,73 @@ def set_scope_user(self, user_id: UUID) -> None: """ self.scope_user = user_id + def get_custom_filters( + self, table: Type["AnySchema"] + ) -> List["ColumnElement[bool]"]: + """Get custom filters. + + Args: + table: The query table. + + Returns: + A list of custom filters. + """ + custom_filters = super().get_custom_filters(table) + + from sqlmodel import and_ + + from zenml.zen_stores.schemas import UserSchema + + if self.user: + user_filter = and_( + getattr(table, "user_id") == UserSchema.id, + self.generate_name_or_id_query_conditions( + value=self.user, + table=UserSchema, + additional_columns=["full_name"], + ), + ) + custom_filters.append(user_filter) + + return custom_filters + + def apply_sorting( + self, + query: AnyQuery, + table: Type["AnySchema"], + ) -> AnyQuery: + """Apply sorting to the query. + + Args: + query: The query to which to apply the sorting. + table: The query table. + + Returns: + The query with sorting applied. + """ + from sqlmodel import asc, desc + + from zenml.enums import SorterOps + from zenml.zen_stores.schemas import UserSchema + + sort_by, operand = self.sorting_params + + if sort_by == "user": + column = UserSchema.name + + query = query.join( + UserSchema, getattr(table, "user_id") == UserSchema.id + ) + + if operand == SorterOps.ASCENDING: + query = query.order_by(asc(column)) + else: + query = query.order_by(desc(column)) + + return query + + return super().apply_sorting(query=query, table=table) + def apply_filter( self, query: AnyQuery, @@ -240,21 +324,37 @@ def workspace(self) -> "WorkspaceResponse": return self.get_metadata().workspace -class WorkspaceScopedFilter(BaseFilter): +class WorkspaceScopedFilter(UserScopedFilter): """Model to enable advanced scoping with workspace.""" FILTER_EXCLUDE_FIELDS: ClassVar[List[str]] = [ - *BaseFilter.FILTER_EXCLUDE_FIELDS, + *UserScopedFilter.FILTER_EXCLUDE_FIELDS, + "workspace", "scope_workspace", ] CLI_EXCLUDE_FIELDS: ClassVar[List[str]] = [ - *BaseFilter.CLI_EXCLUDE_FIELDS, + *UserScopedFilter.CLI_EXCLUDE_FIELDS, + "workspace_id", + "workspace", "scope_workspace", ] + CUSTOM_SORTING_OPTIONS: ClassVar[List[str]] = [ + *UserScopedFilter.CUSTOM_SORTING_OPTIONS, + "workspace", + ] scope_workspace: Optional[UUID] = Field( default=None, description="The workspace to scope this query to.", ) + workspace_id: Optional[Union[UUID, str]] = Field( + default=None, + description="UUID of the workspace that this entity belongs to.", + union_mode="left_to_right", + ) + workspace: Optional[Union[UUID, str]] = Field( + default=None, + description="Name/ID of the workspace that this entity belongs to.", + ) def set_scope_workspace(self, workspace_id: UUID) -> None: """Set the workspace to scope this response. @@ -264,6 +364,35 @@ def set_scope_workspace(self, workspace_id: UUID) -> None: """ self.scope_workspace = workspace_id + def get_custom_filters( + self, table: Type["AnySchema"] + ) -> List["ColumnElement[bool]"]: + """Get custom filters. + + Args: + table: The query table. + + Returns: + A list of custom filters. + """ + custom_filters = super().get_custom_filters(table) + + from sqlmodel import and_ + + from zenml.zen_stores.schemas import WorkspaceSchema + + if self.workspace: + workspace_filter = and_( + getattr(table, "workspace_id") == WorkspaceSchema.id, + self.generate_name_or_id_query_conditions( + value=self.workspace, + table=WorkspaceSchema, + ), + ) + custom_filters.append(workspace_filter) + + return custom_filters + def apply_filter( self, query: AnyQuery, @@ -291,6 +420,44 @@ def apply_filter( return query + def apply_sorting( + self, + query: AnyQuery, + table: Type["AnySchema"], + ) -> AnyQuery: + """Apply sorting to the query. + + Args: + query: The query to which to apply the sorting. + table: The query table. + + Returns: + The query with sorting applied. + """ + from sqlmodel import asc, desc + + from zenml.enums import SorterOps + from zenml.zen_stores.schemas import WorkspaceSchema + + sort_by, operand = self.sorting_params + + if sort_by == "workspace": + column = WorkspaceSchema.name + + query = query.join( + WorkspaceSchema, + getattr(table, "workspace_id") == WorkspaceSchema.id, + ) + + if operand == SorterOps.ASCENDING: + query = query.order_by(asc(column)) + else: + query = query.order_by(desc(column)) + + return query + + return super().apply_sorting(query=query, table=table) + class WorkspaceScopedTaggableFilter(WorkspaceScopedFilter): """Model to enable advanced scoping with workspace and tagging.""" @@ -304,6 +471,11 @@ class WorkspaceScopedTaggableFilter(WorkspaceScopedFilter): "tag", ] + CUSTOM_SORTING_OPTIONS: ClassVar[List[str]] = [ + *WorkspaceScopedFilter.CUSTOM_SORTING_OPTIONS, + "tag", + ] + def apply_filter( self, query: AnyQuery, @@ -330,15 +502,20 @@ def apply_filter( return query - def get_custom_filters(self) -> List["ColumnElement[bool]"]: + def get_custom_filters( + self, table: Type["AnySchema"] + ) -> List["ColumnElement[bool]"]: """Get custom tag filters. + Args: + table: The query table. + Returns: A list of custom filters. """ from zenml.zen_stores.schemas import TagSchema - custom_filters = super().get_custom_filters() + custom_filters = super().get_custom_filters(table) if self.tag: custom_filters.append( self.generate_custom_query_conditions_for_column( @@ -347,3 +524,79 @@ def get_custom_filters(self) -> List["ColumnElement[bool]"]: ) return custom_filters + + def apply_sorting( + self, + query: AnyQuery, + table: Type["AnySchema"], + ) -> AnyQuery: + """Apply sorting to the query. + + Args: + query: The query to which to apply the sorting. + table: The query table. + + Returns: + The query with sorting applied. + """ + sort_by, operand = self.sorting_params + + if sort_by == "tag": + from sqlmodel import and_, asc, desc, func + + from zenml.enums import SorterOps, TaggableResourceTypes + from zenml.zen_stores.schemas import ( + ArtifactSchema, + ArtifactVersionSchema, + ModelSchema, + ModelVersionSchema, + PipelineRunSchema, + PipelineSchema, + RunTemplateSchema, + TagResourceSchema, + TagSchema, + ) + + resource_type_mapping = { + ArtifactSchema: TaggableResourceTypes.ARTIFACT, + ArtifactVersionSchema: TaggableResourceTypes.ARTIFACT_VERSION, + ModelSchema: TaggableResourceTypes.MODEL, + ModelVersionSchema: TaggableResourceTypes.MODEL_VERSION, + PipelineSchema: TaggableResourceTypes.PIPELINE, + PipelineRunSchema: TaggableResourceTypes.PIPELINE_RUN, + RunTemplateSchema: TaggableResourceTypes.RUN_TEMPLATE, + } + + query = ( + query.outerjoin( + TagResourceSchema, + and_( + table.id == TagResourceSchema.resource_id, + TagResourceSchema.resource_type + == resource_type_mapping[table], + ), + ) + .outerjoin(TagSchema, TagResourceSchema.tag_id == TagSchema.id) + .group_by(table.id) + ) + + if operand == SorterOps.ASCENDING: + query = query.order_by( + asc( + func.group_concat(TagSchema.name, ",").label( + "tags_list" + ) + ) + ) + else: + query = query.order_by( + desc( + func.group_concat(TagSchema.name, ",").label( + "tags_list" + ) + ) + ) + + return query + + return super().apply_sorting(query=query, table=table) diff --git a/src/zenml/models/v2/core/artifact_version.py b/src/zenml/models/v2/core/artifact_version.py index cd5089a3db..a6998b92b3 100644 --- a/src/zenml/models/v2/core/artifact_version.py +++ b/src/zenml/models/v2/core/artifact_version.py @@ -20,6 +20,8 @@ Dict, List, Optional, + Type, + TypeVar, Union, ) from uuid import UUID @@ -58,6 +60,10 @@ ) from zenml.models.v2.core.pipeline_run import PipelineRunResponse from zenml.models.v2.core.step_run import StepRunResponse + from zenml.zen_stores.schemas.base_schemas import BaseSchema + + AnySchema = TypeVar("AnySchema", bound=BaseSchema) + logger = get_logger(__name__) @@ -471,7 +477,6 @@ class ArtifactVersionFilter(WorkspaceScopedTaggableFilter): "name", "only_unused", "has_custom_name", - "user", "model", "pipeline_run", "model_version_id", @@ -516,19 +521,10 @@ class ArtifactVersionFilter(WorkspaceScopedTaggableFilter): description="Artifact store for this artifact", union_mode="left_to_right", ) - workspace_id: Optional[Union[UUID, str]] = Field( - default=None, - description="Workspace for this artifact", - union_mode="left_to_right", - ) - user_id: Optional[Union[UUID, str]] = Field( - default=None, - description="User that produced this artifact", - union_mode="left_to_right", - ) model_version_id: Optional[Union[UUID, str]] = Field( default=None, - description="ID of the model version that is associated with this artifact version.", + description="ID of the model version that is associated with this " + "artifact version.", union_mode="left_to_right", ) only_unused: Optional[bool] = Field( @@ -559,13 +555,18 @@ class ArtifactVersionFilter(WorkspaceScopedTaggableFilter): model_config = ConfigDict(protected_namespaces=()) - def get_custom_filters(self) -> List[Union["ColumnElement[bool]"]]: + def get_custom_filters( + self, table: Type["AnySchema"] + ) -> List[Union["ColumnElement[bool]"]]: """Get custom filters. + Args: + table: The query table. + Returns: A list of custom filters. """ - custom_filters = super().get_custom_filters() + custom_filters = super().get_custom_filters(table) from sqlmodel import and_, or_, select @@ -581,7 +582,6 @@ def get_custom_filters(self) -> List[Union["ColumnElement[bool]"]]: StepRunInputArtifactSchema, StepRunOutputArtifactSchema, StepRunSchema, - UserSchema, ) if self.name: @@ -629,17 +629,6 @@ def get_custom_filters(self) -> List[Union["ColumnElement[bool]"]]: ) custom_filters.append(custom_name_filter) - if self.user: - user_filter = and_( - ArtifactVersionSchema.user_id == UserSchema.id, - self.generate_name_or_id_query_conditions( - value=self.user, - table=UserSchema, - additional_columns=["full_name"], - ), - ) - custom_filters.append(user_filter) - if self.model: model_filter = and_( ArtifactVersionSchema.id diff --git a/src/zenml/models/v2/core/code_repository.py b/src/zenml/models/v2/core/code_repository.py index c0a5430468..485f710b7d 100644 --- a/src/zenml/models/v2/core/code_repository.py +++ b/src/zenml/models/v2/core/code_repository.py @@ -13,8 +13,7 @@ # permissions and limitations under the License. """Models representing code repositories.""" -from typing import Any, Dict, Optional, Union -from uuid import UUID +from typing import Any, Dict, Optional from pydantic import Field @@ -189,13 +188,3 @@ class CodeRepositoryFilter(WorkspaceScopedFilter): description="Name of the code repository.", default=None, ) - workspace_id: Optional[Union[UUID, str]] = Field( - description="Workspace of the code repository.", - default=None, - union_mode="left_to_right", - ) - user_id: Optional[Union[UUID, str]] = Field( - description="User that created the code repository.", - default=None, - union_mode="left_to_right", - ) diff --git a/src/zenml/models/v2/core/component.py b/src/zenml/models/v2/core/component.py index a4f52be884..9841858922 100644 --- a/src/zenml/models/v2/core/component.py +++ b/src/zenml/models/v2/core/component.py @@ -21,6 +21,7 @@ List, Optional, Type, + TypeVar, Union, ) from uuid import UUID @@ -42,9 +43,11 @@ if TYPE_CHECKING: from sqlalchemy.sql.elements import ColumnElement - from sqlmodel import SQLModel from zenml.models import FlavorResponse, ServiceConnectorResponse + from zenml.zen_stores.schemas.base_schemas import BaseSchema + + AnySchema = TypeVar("AnySchema", bound=BaseSchema) # ------------------ Base Model ------------------ @@ -356,7 +359,6 @@ class ComponentFilter(WorkspaceScopedFilter): *WorkspaceScopedFilter.FILTER_EXCLUDE_FIELDS, "scope_type", "stack_id", - "user", ] CLI_EXCLUDE_FIELDS: ClassVar[List[str]] = [ *WorkspaceScopedFilter.CLI_EXCLUDE_FIELDS, @@ -366,7 +368,6 @@ class ComponentFilter(WorkspaceScopedFilter): default=None, description="The type to scope this query to.", ) - name: Optional[str] = Field( default=None, description="Name of the stack component", @@ -379,16 +380,6 @@ class ComponentFilter(WorkspaceScopedFilter): default=None, description="Type of the stack component", ) - workspace_id: Optional[Union[UUID, str]] = Field( - default=None, - description="Workspace of the stack component", - union_mode="left_to_right", - ) - user_id: Optional[Union[UUID, str]] = Field( - default=None, - description="User of the stack component", - union_mode="left_to_right", - ) connector_id: Optional[Union[UUID, str]] = Field( default=None, description="Connector linked to the stack component", @@ -399,10 +390,6 @@ class ComponentFilter(WorkspaceScopedFilter): description="Stack of the stack component", union_mode="left_to_right", ) - user: Optional[Union[UUID, str]] = Field( - default=None, - description="Name/ID of the user that created the component.", - ) def set_scope_type(self, component_type: str) -> None: """Set the type of component on which to perform the filtering to scope the response. @@ -413,7 +400,7 @@ def set_scope_type(self, component_type: str) -> None: self.scope_type = component_type def generate_filter( - self, table: Type["SQLModel"] + self, table: Type["AnySchema"] ) -> Union["ColumnElement[bool]"]: """Generate the filter for the query. @@ -449,31 +436,3 @@ def generate_filter( base_filter = operator(base_filter, stack_filter) return base_filter - - def get_custom_filters(self) -> List["ColumnElement[bool]"]: - """Get custom filters. - - Returns: - A list of custom filters. - """ - from sqlmodel import and_ - - from zenml.zen_stores.schemas import ( - StackComponentSchema, - UserSchema, - ) - - custom_filters = super().get_custom_filters() - - if self.user: - user_filter = and_( - StackComponentSchema.user_id == UserSchema.id, - self.generate_name_or_id_query_conditions( - value=self.user, - table=UserSchema, - additional_columns=["full_name"], - ), - ) - custom_filters.append(user_filter) - - return custom_filters diff --git a/src/zenml/models/v2/core/flavor.py b/src/zenml/models/v2/core/flavor.py index fd4110300c..77fe774c07 100644 --- a/src/zenml/models/v2/core/flavor.py +++ b/src/zenml/models/v2/core/flavor.py @@ -13,7 +13,7 @@ # permissions and limitations under the License. """Models representing flavors.""" -from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Optional from uuid import UUID from pydantic import Field @@ -428,13 +428,3 @@ class FlavorFilter(WorkspaceScopedFilter): default=None, description="Integration associated with the flavor", ) - workspace_id: Optional[Union[UUID, str]] = Field( - default=None, - description="Workspace of the stack", - union_mode="left_to_right", - ) - user_id: Optional[Union[UUID, str]] = Field( - default=None, - description="User of the stack", - union_mode="left_to_right", - ) diff --git a/src/zenml/models/v2/core/model.py b/src/zenml/models/v2/core/model.py index 0eb3b749c8..0b5272ab7e 100644 --- a/src/zenml/models/v2/core/model.py +++ b/src/zenml/models/v2/core/model.py @@ -13,7 +13,7 @@ # permissions and limitations under the License. """Models representing models.""" -from typing import TYPE_CHECKING, ClassVar, List, Optional, Union +from typing import TYPE_CHECKING, List, Optional from uuid import UUID from pydantic import BaseModel, Field @@ -30,8 +30,6 @@ from zenml.utils.pagination_utils import depaginate if TYPE_CHECKING: - from sqlalchemy.sql.elements import ColumnElement - from zenml.model.model import Model from zenml.models.v2.core.tag import TagResponse @@ -318,61 +316,7 @@ def versions(self) -> List["Model"]: class ModelFilter(WorkspaceScopedTaggableFilter): """Model to enable advanced filtering of all Workspaces.""" - CLI_EXCLUDE_FIELDS: ClassVar[List[str]] = [ - *WorkspaceScopedTaggableFilter.CLI_EXCLUDE_FIELDS, - "workspace_id", - "user_id", - ] - FILTER_EXCLUDE_FIELDS: ClassVar[List[str]] = [ - *WorkspaceScopedTaggableFilter.FILTER_EXCLUDE_FIELDS, - "user", - ] - name: Optional[str] = Field( default=None, description="Name of the Model", ) - workspace_id: Optional[Union[UUID, str]] = Field( - default=None, - description="Workspace of the Model", - union_mode="left_to_right", - ) - user_id: Optional[Union[UUID, str]] = Field( - default=None, - description="User of the Model", - union_mode="left_to_right", - ) - user: Optional[Union[UUID, str]] = Field( - default=None, - description="Name/ID of the user that created the model.", - ) - - def get_custom_filters( - self, - ) -> List["ColumnElement[bool]"]: - """Get custom filters. - - Returns: - A list of custom filters. - """ - custom_filters = super().get_custom_filters() - - from sqlmodel import and_ - - from zenml.zen_stores.schemas import ( - ModelSchema, - UserSchema, - ) - - if self.user: - user_filter = and_( - ModelSchema.user_id == UserSchema.id, - self.generate_name_or_id_query_conditions( - value=self.user, - table=UserSchema, - additional_columns=["full_name"], - ), - ) - custom_filters.append(user_filter) - - return custom_filters diff --git a/src/zenml/models/v2/core/model_version.py b/src/zenml/models/v2/core/model_version.py index d1a7a95197..949d9ce1d1 100644 --- a/src/zenml/models/v2/core/model_version.py +++ b/src/zenml/models/v2/core/model_version.py @@ -585,7 +585,6 @@ class ModelVersionFilter(WorkspaceScopedTaggableFilter): FILTER_EXCLUDE_FIELDS: ClassVar[List[str]] = [ *WorkspaceScopedTaggableFilter.FILTER_EXCLUDE_FIELDS, - "user", "run_metadata", ] @@ -597,25 +596,11 @@ class ModelVersionFilter(WorkspaceScopedTaggableFilter): default=None, description="The number of the Model Version", ) - workspace_id: Optional[Union[UUID, str]] = Field( - default=None, - description="The workspace of the Model Version", - union_mode="left_to_right", - ) - user_id: Optional[Union[UUID, str]] = Field( - default=None, - description="The user of the Model Version", - union_mode="left_to_right", - ) stage: Optional[Union[str, ModelStages]] = Field( description="The model version stage", default=None, union_mode="left_to_right", ) - user: Optional[Union[UUID, str]] = Field( - default=None, - description="Name/ID of the user that created the model version.", - ) run_metadata: Optional[Dict[str, str]] = Field( default=None, description="The run_metadata to filter the model versions by.", @@ -639,14 +624,17 @@ def set_scope_model(self, model_name_or_id: Union[str, UUID]) -> None: self._model_id = model_id def get_custom_filters( - self, + self, table: Type["AnySchema"] ) -> List["ColumnElement[bool]"]: """Get custom filters. + Args: + table: The query table. + Returns: A list of custom filters. """ - custom_filters = super().get_custom_filters() + custom_filters = super().get_custom_filters(table) from sqlmodel import and_ @@ -654,20 +642,8 @@ def get_custom_filters( ModelVersionSchema, RunMetadataResourceSchema, RunMetadataSchema, - UserSchema, ) - if self.user: - user_filter = and_( - ModelVersionSchema.user_id == UserSchema.id, - self.generate_name_or_id_query_conditions( - value=self.user, - table=UserSchema, - additional_columns=["full_name"], - ), - ) - custom_filters.append(user_filter) - if self.run_metadata is not None: from zenml.enums import MetadataResourceTypes diff --git a/src/zenml/models/v2/core/model_version_artifact.py b/src/zenml/models/v2/core/model_version_artifact.py index f3a677a86e..6c9514b973 100644 --- a/src/zenml/models/v2/core/model_version_artifact.py +++ b/src/zenml/models/v2/core/model_version_artifact.py @@ -13,7 +13,7 @@ # permissions and limitations under the License. """Models representing the link between model versions and artifacts.""" -from typing import TYPE_CHECKING, List, Optional, Union +from typing import TYPE_CHECKING, List, Optional, Type, TypeVar, Union from uuid import UUID from pydantic import ConfigDict, Field @@ -32,6 +32,9 @@ from sqlalchemy.sql.elements import ColumnElement from zenml.models.v2.core.artifact_version import ArtifactVersionResponse + from zenml.zen_stores.schemas import BaseSchema + + AnySchema = TypeVar("AnySchema", bound=BaseSchema) # ------------------ Request Model ------------------ @@ -164,13 +167,18 @@ class ModelVersionArtifactFilter(BaseFilter): # careful we might overwrite some fields protected by pydantic. model_config = ConfigDict(protected_namespaces=()) - def get_custom_filters(self) -> List[Union["ColumnElement[bool]"]]: + def get_custom_filters( + self, table: Type["AnySchema"] + ) -> List[Union["ColumnElement[bool]"]]: """Get custom filters. + Args: + table: The query table. + Returns: A list of custom filters. """ - custom_filters = super().get_custom_filters() + custom_filters = super().get_custom_filters(table) from sqlmodel import and_, col diff --git a/src/zenml/models/v2/core/model_version_pipeline_run.py b/src/zenml/models/v2/core/model_version_pipeline_run.py index 6181c2ffbb..40e7f823d9 100644 --- a/src/zenml/models/v2/core/model_version_pipeline_run.py +++ b/src/zenml/models/v2/core/model_version_pipeline_run.py @@ -13,7 +13,7 @@ # permissions and limitations under the License. """Models representing the link between model versions and pipeline runs.""" -from typing import List, Optional, Union +from typing import TYPE_CHECKING, List, Optional, Type, TypeVar, Union from uuid import UUID from pydantic import ConfigDict, Field @@ -30,6 +30,12 @@ from zenml.models.v2.base.filter import BaseFilter, StrFilter from zenml.models.v2.core.pipeline_run import PipelineRunResponse +if TYPE_CHECKING: + from zenml.zen_stores.schemas import BaseSchema + + AnySchema = TypeVar("AnySchema", bound=BaseSchema) + + # ------------------ Request Model ------------------ @@ -147,13 +153,18 @@ class ModelVersionPipelineRunFilter(BaseFilter): # careful we might overwrite some fields protected by pydantic. model_config = ConfigDict(protected_namespaces=()) - def get_custom_filters(self) -> List["ColumnElement[bool]"]: + def get_custom_filters( + self, table: Type["AnySchema"] + ) -> List["ColumnElement[bool]"]: """Get custom filters. + Args: + table: The query table. + Returns: A list of custom filters. """ - custom_filters = super().get_custom_filters() + custom_filters = super().get_custom_filters(table) from sqlmodel import and_ diff --git a/src/zenml/models/v2/core/pipeline.py b/src/zenml/models/v2/core/pipeline.py index 5166e0abb9..03a81fbb23 100644 --- a/src/zenml/models/v2/core/pipeline.py +++ b/src/zenml/models/v2/core/pipeline.py @@ -21,7 +21,6 @@ Optional, Type, TypeVar, - Union, ) from uuid import UUID @@ -45,8 +44,6 @@ from zenml.models.v2.core.tag import TagResponse if TYPE_CHECKING: - from sqlalchemy.sql.elements import ColumnElement - from zenml.models.v2.core.pipeline_run import PipelineRunResponse from zenml.zen_stores.schemas import BaseSchema @@ -258,10 +255,12 @@ def tags(self) -> List[TagResponse]: class PipelineFilter(WorkspaceScopedTaggableFilter): """Pipeline filter model.""" - CUSTOM_SORTING_OPTIONS = [SORT_PIPELINES_BY_LATEST_RUN_KEY] + CUSTOM_SORTING_OPTIONS: ClassVar[List[str]] = [ + *WorkspaceScopedTaggableFilter.CUSTOM_SORTING_OPTIONS, + SORT_PIPELINES_BY_LATEST_RUN_KEY, + ] FILTER_EXCLUDE_FIELDS: ClassVar[List[str]] = [ *WorkspaceScopedTaggableFilter.FILTER_EXCLUDE_FIELDS, - "user", "latest_run_status", ] @@ -274,20 +273,6 @@ class PipelineFilter(WorkspaceScopedTaggableFilter): description="Filter by the status of the latest run of a pipeline. " "This will always be applied as an `AND` filter for now.", ) - workspace_id: Optional[Union[UUID, str]] = Field( - default=None, - description="Workspace of the Pipeline", - union_mode="left_to_right", - ) - user_id: Optional[Union[UUID, str]] = Field( - default=None, - description="User of the Pipeline", - union_mode="left_to_right", - ) - user: Optional[Union[UUID, str]] = Field( - default=None, - description="Name/ID of the user that created the pipeline.", - ) def apply_filter( self, query: AnyQuery, table: Type["AnySchema"] @@ -343,36 +328,6 @@ def apply_filter( return query - def get_custom_filters( - self, - ) -> List["ColumnElement[bool]"]: - """Get custom filters. - - Returns: - A list of custom filters. - """ - custom_filters = super().get_custom_filters() - - from sqlmodel import and_ - - from zenml.zen_stores.schemas import ( - PipelineSchema, - UserSchema, - ) - - if self.user: - user_filter = and_( - PipelineSchema.user_id == UserSchema.id, - self.generate_name_or_id_query_conditions( - value=self.user, - table=UserSchema, - additional_columns=["full_name"], - ), - ) - custom_filters.append(user_filter) - - return custom_filters - def apply_sorting( self, query: AnyQuery, @@ -387,12 +342,45 @@ def apply_sorting( Returns: The query with sorting applied. """ - column, _ = self.sorting_params + from sqlmodel import asc, case, col, desc, func, select + + from zenml.enums import SorterOps + from zenml.zen_stores.schemas import PipelineRunSchema, PipelineSchema + + sort_by, operand = self.sorting_params + + if sort_by == SORT_PIPELINES_BY_LATEST_RUN_KEY: + # Subquery to find the latest run per pipeline + latest_run_subquery = ( + select( + PipelineRunSchema.pipeline_id, + case( + ( + func.max(PipelineRunSchema.created).is_(None), + PipelineSchema.created, + ), + else_=func.max(PipelineRunSchema.created), + ).label("latest_run"), + ) + .group_by(col(PipelineRunSchema.pipeline_id)) + .subquery() + ) + + # Join the subquery with the pipelines + query = query.outerjoin( + latest_run_subquery, + PipelineSchema.id == latest_run_subquery.c.pipeline_id, + ) + + if operand == SorterOps.ASCENDING: + query = query.order_by( + asc(latest_run_subquery.c.latest_run) + ).order_by(col(PipelineSchema.id)) + else: + query = query.order_by( + desc(latest_run_subquery.c.latest_run) + ).order_by(col(PipelineSchema.id)) - if column == SORT_PIPELINES_BY_LATEST_RUN_KEY: - # If sorting by the latest run, the sorting is already done in the - # base query in `SqlZenStore.list_pipelines(...)` and we don't need - # to to anything here return query else: return super().apply_sorting(query=query, table=table) diff --git a/src/zenml/models/v2/core/pipeline_build.py b/src/zenml/models/v2/core/pipeline_build.py index 93c0ff63a8..19dc89ccbf 100644 --- a/src/zenml/models/v2/core/pipeline_build.py +++ b/src/zenml/models/v2/core/pipeline_build.py @@ -14,7 +14,17 @@ """Models representing pipeline builds.""" import json -from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Optional, Union +from typing import ( + TYPE_CHECKING, + Any, + ClassVar, + Dict, + List, + Optional, + Type, + TypeVar, + Union, +) from uuid import UUID from pydantic import Field @@ -35,6 +45,9 @@ from zenml.models.v2.core.pipeline import PipelineResponse from zenml.models.v2.core.stack import StackResponse + from zenml.zen_stores.schemas import BaseSchema + + AnySchema = TypeVar("AnySchema", bound=BaseSchema) # ------------------ Request Model ------------------ @@ -453,16 +466,6 @@ class PipelineBuildFilter(WorkspaceScopedFilter): "container_registry_id", ] - workspace_id: Optional[Union[UUID, str]] = Field( - description="Workspace for this pipeline build.", - default=None, - union_mode="left_to_right", - ) - user_id: Optional[Union[UUID, str]] = Field( - description="User that produced this pipeline build.", - default=None, - union_mode="left_to_right", - ) pipeline_id: Optional[Union[UUID, str]] = Field( description="Pipeline associated with the pipeline build.", default=None, @@ -502,13 +505,17 @@ class PipelineBuildFilter(WorkspaceScopedFilter): def get_custom_filters( self, + table: Type["AnySchema"], ) -> List["ColumnElement[bool]"]: """Get custom filters. + Args: + table: The query table. + Returns: A list of custom filters. """ - custom_filters = super().get_custom_filters() + custom_filters = super().get_custom_filters(table) from sqlmodel import and_ diff --git a/src/zenml/models/v2/core/pipeline_deployment.py b/src/zenml/models/v2/core/pipeline_deployment.py index 760f65f1a3..94dbc43150 100644 --- a/src/zenml/models/v2/core/pipeline_deployment.py +++ b/src/zenml/models/v2/core/pipeline_deployment.py @@ -358,16 +358,6 @@ def template_id(self) -> Optional[UUID]: class PipelineDeploymentFilter(WorkspaceScopedFilter): """Model to enable advanced filtering of all pipeline deployments.""" - workspace_id: Optional[Union[UUID, str]] = Field( - default=None, - description="Workspace for this deployment.", - union_mode="left_to_right", - ) - user_id: Optional[Union[UUID, str]] = Field( - default=None, - description="User that created this deployment.", - union_mode="left_to_right", - ) pipeline_id: Optional[Union[UUID, str]] = Field( default=None, description="Pipeline associated with the deployment.", diff --git a/src/zenml/models/v2/core/pipeline_run.py b/src/zenml/models/v2/core/pipeline_run.py index 958d662a51..3a22f64295 100644 --- a/src/zenml/models/v2/core/pipeline_run.py +++ b/src/zenml/models/v2/core/pipeline_run.py @@ -16,10 +16,13 @@ from datetime import datetime from typing import ( TYPE_CHECKING, + Any, ClassVar, Dict, List, Optional, + Type, + TypeVar, Union, cast, ) @@ -55,6 +58,11 @@ from zenml.models.v2.core.schedule import ScheduleResponse from zenml.models.v2.core.stack import StackResponse from zenml.models.v2.core.step_run import StepRunResponse + from zenml.zen_stores.schemas.base_schemas import BaseSchema + + AnySchema = TypeVar("AnySchema", bound=BaseSchema) + +AnyQuery = TypeVar("AnyQuery", bound=Any) # ------------------ Request Model ------------------ @@ -584,6 +592,15 @@ def tags(self) -> List[TagResponse]: class PipelineRunFilter(WorkspaceScopedTaggableFilter): """Model to enable advanced filtering of all Workspaces.""" + CUSTOM_SORTING_OPTIONS: ClassVar[List[str]] = [ + *WorkspaceScopedTaggableFilter.CUSTOM_SORTING_OPTIONS, + "tag", + "stack", + "pipeline", + "model", + "model_version", + ] + FILTER_EXCLUDE_FIELDS: ClassVar[List[str]] = [ *WorkspaceScopedTaggableFilter.FILTER_EXCLUDE_FIELDS, "unlisted", @@ -592,7 +609,6 @@ class PipelineRunFilter(WorkspaceScopedTaggableFilter): "schedule_id", "stack_id", "template_id", - "user", "pipeline", "stack", "code_repository", @@ -615,16 +631,6 @@ class PipelineRunFilter(WorkspaceScopedTaggableFilter): description="Pipeline associated with the Pipeline Run", union_mode="left_to_right", ) - workspace_id: Optional[Union[UUID, str]] = Field( - default=None, - description="Workspace of the Pipeline Run", - union_mode="left_to_right", - ) - user_id: Optional[Union[UUID, str]] = Field( - default=None, - description="User that created the Pipeline Run", - union_mode="left_to_right", - ) stack_id: Optional[Union[UUID, str]] = Field( default=None, description="Stack used for the Pipeline Run", @@ -675,16 +681,12 @@ class PipelineRunFilter(WorkspaceScopedTaggableFilter): union_mode="left_to_right", ) unlisted: Optional[bool] = None - user: Optional[Union[UUID, str]] = Field( - default=None, - description="Name/ID of the user that created the run.", - ) run_metadata: Optional[Dict[str, str]] = Field( default=None, description="The run_metadata to filter the pipeline runs by.", ) # TODO: Remove once frontend is ready for it. This is replaced by the more - # generic `pipeline` filter below. + # generic `pipeline` filter below. pipeline_name: Optional[str] = Field( default=None, description="Name of the pipeline associated with the run", @@ -716,13 +718,17 @@ class PipelineRunFilter(WorkspaceScopedTaggableFilter): def get_custom_filters( self, + table: Type["AnySchema"], ) -> List["ColumnElement[bool]"]: """Get custom filters. + Args: + table: The query table. + Returns: A list of custom filters. """ - custom_filters = super().get_custom_filters() + custom_filters = super().get_custom_filters(table) from sqlmodel import and_, col, or_ @@ -741,7 +747,6 @@ def get_custom_filters( StackComponentSchema, StackCompositionSchema, StackSchema, - UserSchema, ) if self.unlisted is not None: @@ -792,17 +797,6 @@ def get_custom_filters( ) custom_filters.append(run_template_filter) - if self.user: - user_filter = and_( - PipelineRunSchema.user_id == UserSchema.id, - self.generate_name_or_id_query_conditions( - value=self.user, - table=UserSchema, - additional_columns=["full_name"], - ), - ) - custom_filters.append(user_filter) - if self.pipeline: pipeline_filter = and_( PipelineRunSchema.pipeline_id == PipelineSchema.id, @@ -926,3 +920,71 @@ def get_custom_filters( custom_filters.append(additional_filter) return custom_filters + + def apply_sorting( + self, + query: AnyQuery, + table: Type["AnySchema"], + ) -> AnyQuery: + """Apply sorting to the query. + + Args: + query: The query to which to apply the sorting. + table: The query table. + + Returns: + The query with sorting applied. + """ + from sqlmodel import asc, desc + + from zenml.enums import SorterOps + from zenml.zen_stores.schemas import ( + ModelSchema, + ModelVersionSchema, + PipelineDeploymentSchema, + PipelineRunSchema, + PipelineSchema, + StackSchema, + ) + + sort_by, operand = self.sorting_params + + if sort_by == "pipeline": + query = query.join( + PipelineSchema, + PipelineRunSchema.pipeline_id == PipelineSchema.id, + ) + column = PipelineSchema.name + elif sort_by == "stack": + query = query.join( + PipelineDeploymentSchema, + PipelineRunSchema.deployment_id == PipelineDeploymentSchema.id, + ).join( + StackSchema, + PipelineDeploymentSchema.stack_id == StackSchema.id, + ) + column = StackSchema.name + elif sort_by == "model": + query = query.join( + ModelVersionSchema, + PipelineRunSchema.model_version_id == ModelVersionSchema.id, + ).join( + ModelSchema, + ModelVersionSchema.model_id == ModelSchema.id, + ) + column = ModelSchema.name + elif sort_by == "model_version": + query = query.join( + ModelVersionSchema, + PipelineRunSchema.model_version_id == ModelVersionSchema.id, + ) + column = ModelVersionSchema.name + else: + return super().apply_sorting(query=query, table=table) + + if operand == SorterOps.ASCENDING: + query = query.order_by(asc(column)) + else: + query = query.order_by(desc(column)) + + return query diff --git a/src/zenml/models/v2/core/run_template.py b/src/zenml/models/v2/core/run_template.py index b1aae8a325..2bc177c043 100644 --- a/src/zenml/models/v2/core/run_template.py +++ b/src/zenml/models/v2/core/run_template.py @@ -13,7 +13,17 @@ # permissions and limitations under the License. """Models representing pipeline templates.""" -from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Optional, Union +from typing import ( + TYPE_CHECKING, + Any, + ClassVar, + Dict, + List, + Optional, + Type, + TypeVar, + Union, +) from uuid import UUID from pydantic import Field @@ -45,6 +55,11 @@ if TYPE_CHECKING: from sqlalchemy.sql.elements import ColumnElement + from zenml.zen_stores.schemas.base_schemas import BaseSchema + + AnySchema = TypeVar("AnySchema", bound=BaseSchema) + + # ------------------ Request Model ------------------ @@ -310,16 +325,6 @@ class RunTemplateFilter(WorkspaceScopedTaggableFilter): default=None, description="Name of the run template.", ) - workspace_id: Optional[Union[UUID, str]] = Field( - default=None, - description="Workspace associated with the template.", - union_mode="left_to_right", - ) - user_id: Optional[Union[UUID, str]] = Field( - default=None, - description="User that created the template.", - union_mode="left_to_right", - ) pipeline_id: Optional[Union[UUID, str]] = Field( default=None, description="Pipeline associated with the template.", @@ -340,10 +345,6 @@ class RunTemplateFilter(WorkspaceScopedTaggableFilter): description="Code repository associated with the template.", union_mode="left_to_right", ) - user: Optional[Union[UUID, str]] = Field( - default=None, - description="Name/ID of the user that created the template.", - ) pipeline: Optional[Union[UUID, str]] = Field( default=None, description="Name/ID of the pipeline associated with the template.", @@ -354,14 +355,17 @@ class RunTemplateFilter(WorkspaceScopedTaggableFilter): ) def get_custom_filters( - self, + self, table: Type["AnySchema"] ) -> List["ColumnElement[bool]"]: """Get custom filters. + Args: + table: The query table. + Returns: A list of custom filters. """ - custom_filters = super().get_custom_filters() + custom_filters = super().get_custom_filters(table) from sqlmodel import and_ @@ -371,7 +375,6 @@ def get_custom_filters( PipelineSchema, RunTemplateSchema, StackSchema, - UserSchema, ) if self.code_repository_id: @@ -409,17 +412,6 @@ def get_custom_filters( ) custom_filters.append(pipeline_filter) - if self.user: - user_filter = and_( - RunTemplateSchema.user_id == UserSchema.id, - self.generate_name_or_id_query_conditions( - value=self.user, - table=UserSchema, - additional_columns=["full_name"], - ), - ) - custom_filters.append(user_filter) - if self.pipeline: pipeline_filter = and_( RunTemplateSchema.source_deployment_id diff --git a/src/zenml/models/v2/core/schedule.py b/src/zenml/models/v2/core/schedule.py index af838f17cc..0e7dc01c42 100644 --- a/src/zenml/models/v2/core/schedule.py +++ b/src/zenml/models/v2/core/schedule.py @@ -279,16 +279,6 @@ def pipeline_id(self) -> Optional[UUID]: class ScheduleFilter(WorkspaceScopedFilter): """Model to enable advanced filtering of all Users.""" - workspace_id: Optional[Union[UUID, str]] = Field( - default=None, - description="Workspace scope of the schedule.", - union_mode="left_to_right", - ) - user_id: Optional[Union[UUID, str]] = Field( - default=None, - description="User that created the schedule", - union_mode="left_to_right", - ) pipeline_id: Optional[Union[UUID, str]] = Field( default=None, description="Pipeline that the schedule is attached to.", diff --git a/src/zenml/models/v2/core/secret.py b/src/zenml/models/v2/core/secret.py index 79e50cd184..3f29b57de2 100644 --- a/src/zenml/models/v2/core/secret.py +++ b/src/zenml/models/v2/core/secret.py @@ -15,7 +15,6 @@ from datetime import datetime from typing import Any, ClassVar, Dict, List, Optional, Union -from uuid import UUID from pydantic import Field, SecretStr @@ -253,25 +252,12 @@ class SecretFilter(WorkspaceScopedFilter): default=None, description="Name of the secret", ) - scope: Optional[Union[SecretScope, str]] = Field( default=None, description="Scope in which to filter secrets", union_mode="left_to_right", ) - workspace_id: Optional[Union[UUID, str]] = Field( - default=None, - description="Workspace of the Secret", - union_mode="left_to_right", - ) - - user_id: Optional[Union[UUID, str]] = Field( - default=None, - description="User that created the Secret", - union_mode="left_to_right", - ) - @staticmethod def _get_filtering_value(value: Optional[Any]) -> str: """Convert the value to a string that can be used for lexicographical filtering and sorting. diff --git a/src/zenml/models/v2/core/service.py b/src/zenml/models/v2/core/service.py index c3dcbd7cfc..2ad9724b20 100644 --- a/src/zenml/models/v2/core/service.py +++ b/src/zenml/models/v2/core/service.py @@ -15,19 +15,20 @@ from datetime import datetime from typing import ( + TYPE_CHECKING, Any, ClassVar, Dict, List, Optional, Type, + TypeVar, Union, ) from uuid import UUID from pydantic import BaseModel, ConfigDict, Field from sqlalchemy.sql.elements import ColumnElement -from sqlmodel import SQLModel from zenml.constants import STR_FIELD_MAX_LENGTH from zenml.models.v2.base.scoped import ( @@ -37,11 +38,15 @@ WorkspaceScopedResponseBody, WorkspaceScopedResponseMetadata, WorkspaceScopedResponseResources, - WorkspaceScopedTaggableFilter, ) from zenml.services.service_status import ServiceState from zenml.services.service_type import ServiceType +if TYPE_CHECKING: + from zenml.zen_stores.schemas import BaseSchema + + AnySchema = TypeVar("AnySchema", bound=BaseSchema) + # ------------------ Request Model ------------------ @@ -376,16 +381,6 @@ class ServiceFilter(WorkspaceScopedFilter): description="Name of the service. Use this to filter services by " "their name.", ) - workspace_id: Optional[Union[UUID, str]] = Field( - default=None, - description="Workspace of the service", - union_mode="left_to_right", - ) - user_id: Optional[Union[UUID, str]] = Field( - default=None, - description="User of the service", - union_mode="left_to_right", - ) type: Optional[str] = Field( default=None, description="Type of the service. Filter services by their type.", @@ -457,9 +452,7 @@ def set_flavor(self, flavor: str) -> None: "config", ] CLI_EXCLUDE_FIELDS: ClassVar[List[str]] = [ - *WorkspaceScopedTaggableFilter.CLI_EXCLUDE_FIELDS, - "workspace_id", - "user_id", + *WorkspaceScopedFilter.CLI_EXCLUDE_FIELDS, "flavor", "type", "pipeline_step_name", @@ -468,7 +461,7 @@ def set_flavor(self, flavor: str) -> None: ] def generate_filter( - self, table: Type["SQLModel"] + self, table: Type["AnySchema"] ) -> Union["ColumnElement[bool]"]: """Generate the filter for the query. diff --git a/src/zenml/models/v2/core/service_connector.py b/src/zenml/models/v2/core/service_connector.py index 806e610007..8c71106ae2 100644 --- a/src/zenml/models/v2/core/service_connector.py +++ b/src/zenml/models/v2/core/service_connector.py @@ -801,7 +801,6 @@ class ServiceConnectorFilter(WorkspaceScopedFilter): default=None, description="The type to scope this query to.", ) - name: Optional[str] = Field( default=None, description="The name to filter by", @@ -810,16 +809,6 @@ class ServiceConnectorFilter(WorkspaceScopedFilter): default=None, description="The type of service connector to filter by", ) - workspace_id: Optional[Union[UUID, str]] = Field( - default=None, - description="Workspace to filter by", - union_mode="left_to_right", - ) - user_id: Optional[Union[UUID, str]] = Field( - default=None, - description="User to filter by", - union_mode="left_to_right", - ) auth_method: Optional[str] = Field( default=None, title="Filter by the authentication method configured for the " diff --git a/src/zenml/models/v2/core/stack.py b/src/zenml/models/v2/core/stack.py index 3d8ad20a2c..1e49eb1544 100644 --- a/src/zenml/models/v2/core/stack.py +++ b/src/zenml/models/v2/core/stack.py @@ -14,7 +14,17 @@ """Models representing stacks.""" import json -from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Optional, Union +from typing import ( + TYPE_CHECKING, + Any, + ClassVar, + Dict, + List, + Optional, + Type, + TypeVar, + Union, +) from uuid import UUID from pydantic import Field, model_validator @@ -39,6 +49,9 @@ from sqlalchemy.sql.elements import ColumnElement from zenml.models.v2.core.component import ComponentResponse + from zenml.zen_stores.schemas import BaseSchema + + AnySchema = TypeVar("AnySchema", bound=BaseSchema) # ------------------ Request Model ------------------ @@ -323,7 +336,6 @@ class StackFilter(WorkspaceScopedFilter): FILTER_EXCLUDE_FIELDS: ClassVar[List[str]] = [ *WorkspaceScopedFilter.FILTER_EXCLUDE_FIELDS, "component_id", - "user", "component", ] @@ -334,42 +346,32 @@ class StackFilter(WorkspaceScopedFilter): description: Optional[str] = Field( default=None, description="Description of the stack" ) - workspace_id: Optional[Union[UUID, str]] = Field( - default=None, - description="Workspace of the stack", - union_mode="left_to_right", - ) - user_id: Optional[Union[UUID, str]] = Field( - default=None, - description="User of the stack", - union_mode="left_to_right", - ) component_id: Optional[Union[UUID, str]] = Field( default=None, description="Component in the stack", union_mode="left_to_right", ) - user: Optional[Union[UUID, str]] = Field( - default=None, - description="Name/ID of the user that created the stack.", - ) component: Optional[Union[UUID, str]] = Field( default=None, description="Name/ID of a component in the stack." ) - def get_custom_filters(self) -> List["ColumnElement[bool]"]: + def get_custom_filters( + self, table: Type["AnySchema"] + ) -> List["ColumnElement[bool]"]: """Get custom filters. + Args: + table: The query table. + Returns: A list of custom filters. """ - custom_filters = super().get_custom_filters() + custom_filters = super().get_custom_filters(table) from zenml.zen_stores.schemas import ( StackComponentSchema, StackCompositionSchema, StackSchema, - UserSchema, ) if self.component_id: @@ -379,17 +381,6 @@ def get_custom_filters(self) -> List["ColumnElement[bool]"]: ) custom_filters.append(component_id_filter) - if self.user: - user_filter = and_( - StackSchema.user_id == UserSchema.id, - self.generate_name_or_id_query_conditions( - value=self.user, - table=UserSchema, - additional_columns=["full_name"], - ), - ) - custom_filters.append(user_filter) - if self.component: component_filter = and_( StackCompositionSchema.stack_id == StackSchema.id, diff --git a/src/zenml/models/v2/core/step_run.py b/src/zenml/models/v2/core/step_run.py index d9ac5e0354..0a505539d0 100644 --- a/src/zenml/models/v2/core/step_run.py +++ b/src/zenml/models/v2/core/step_run.py @@ -14,7 +14,16 @@ """Models representing steps runs.""" from datetime import datetime -from typing import TYPE_CHECKING, ClassVar, Dict, List, Optional, Union +from typing import ( + TYPE_CHECKING, + ClassVar, + Dict, + List, + Optional, + Type, + TypeVar, + Union, +) from uuid import UUID from pydantic import BaseModel, ConfigDict, Field @@ -41,6 +50,9 @@ LogsRequest, LogsResponse, ) + from zenml.zen_stores.schemas import BaseSchema + + AnySchema = TypeVar("AnySchema", bound=BaseSchema) class StepRunInputResponse(ArtifactVersionResponse): @@ -553,16 +565,6 @@ class StepRunFilter(WorkspaceScopedFilter): description="Original id for this step run", union_mode="left_to_right", ) - user_id: Optional[Union[UUID, str]] = Field( - default=None, - description="User that produced this step run", - union_mode="left_to_right", - ) - workspace_id: Optional[Union[UUID, str]] = Field( - default=None, - description="Workspace of this step run", - union_mode="left_to_right", - ) model_version_id: Optional[Union[UUID, str]] = Field( default=None, description="Model version associated with the step run.", @@ -576,18 +578,20 @@ class StepRunFilter(WorkspaceScopedFilter): default=None, description="The run_metadata to filter the step runs by.", ) - model_config = ConfigDict(protected_namespaces=()) def get_custom_filters( - self, + self, table: Type["AnySchema"] ) -> List["ColumnElement[bool]"]: """Get custom filters. + Args: + table: The query table. + Returns: A list of custom filters. """ - custom_filters = super().get_custom_filters() + custom_filters = super().get_custom_filters(table) from sqlmodel import and_ diff --git a/src/zenml/models/v2/core/trigger.py b/src/zenml/models/v2/core/trigger.py index daef211ed7..45fc23a501 100644 --- a/src/zenml/models/v2/core/trigger.py +++ b/src/zenml/models/v2/core/trigger.py @@ -13,7 +13,17 @@ # permissions and limitations under the License. """Collection of all models concerning triggers.""" -from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Optional, Union +from typing import ( + TYPE_CHECKING, + Any, + ClassVar, + Dict, + List, + Optional, + Type, + TypeVar, + Union, +) from uuid import UUID from pydantic import Field, model_validator @@ -39,6 +49,9 @@ ActionResponse, ) from zenml.models.v2.core.event_source import EventSourceResponse + from zenml.zen_stores.schemas import BaseSchema + + AnySchema = TypeVar("AnySchema", bound=BaseSchema) # ------------------ Request Model ------------------ @@ -358,10 +371,13 @@ class TriggerFilter(WorkspaceScopedFilter): ) def get_custom_filters( - self, + self, table: Type["AnySchema"] ) -> List["ColumnElement[bool]"]: """Get custom filters. + Args: + table: The query table. + Returns: A list of custom filters. """ @@ -373,7 +389,7 @@ def get_custom_filters( TriggerSchema, ) - custom_filters = super().get_custom_filters() + custom_filters = super().get_custom_filters(table) if self.event_source_flavor: event_source_flavor_filter = and_( diff --git a/src/zenml/zen_stores/sql_zen_store.py b/src/zenml/zen_stores/sql_zen_store.py index 464293515b..ce20d6687f 100644 --- a/src/zenml/zen_stores/sql_zen_store.py +++ b/src/zenml/zen_stores/sql_zen_store.py @@ -55,7 +55,7 @@ field_validator, model_validator, ) -from sqlalchemy import asc, case, desc, func +from sqlalchemy import func from sqlalchemy.engine import URL, Engine, make_url from sqlalchemy.exc import ( ArgumentError, @@ -100,7 +100,6 @@ ENV_ZENML_SERVER, FINISHED_ONBOARDING_SURVEY_KEY, MAX_RETRIES_FOR_VERSIONED_ENTITY_CREATION, - SORT_PIPELINES_BY_LATEST_RUN_KEY, SQL_STORE_BACKUP_DIRECTORY_NAME, TEXT_FIELD_MAX_LENGTH, handle_bool_env_var, @@ -117,7 +116,6 @@ OnboardingStep, SecretScope, SecretsStoreType, - SorterOps, StackComponentType, StackDeploymentProvider, StepRunInputArtifactType, @@ -4358,69 +4356,14 @@ def list_pipelines( Returns: A list of all pipelines matching the filter criteria. """ - query: Union[Select[Any], SelectOfScalar[Any]] = select(PipelineSchema) - _custom_conversion: Optional[Callable[[Any], PipelineResponse]] = None - - column, operand = pipeline_filter_model.sorting_params - if column == SORT_PIPELINES_BY_LATEST_RUN_KEY: - with Session(self.engine) as session: - max_date_subquery = ( - # If no run exists for the pipeline yet, we use the pipeline - # creation date as a fallback, otherwise newly created - # pipeline would always be at the top/bottom - select( - PipelineSchema.id, - case( - ( - func.max(PipelineRunSchema.created).is_(None), - PipelineSchema.created, - ), - else_=func.max(PipelineRunSchema.created), - ).label("run_or_created"), - ) - .outerjoin( - PipelineRunSchema, - PipelineSchema.id == PipelineRunSchema.pipeline_id, # type: ignore[arg-type] - ) - .group_by(col(PipelineSchema.id)) - .subquery() - ) - - if operand == SorterOps.DESCENDING: - sort_clause = desc - else: - sort_clause = asc - - query = ( - # We need to include the subquery in the select here to - # make this query work with the distinct statement. This - # result will be removed in the custom conversion function - # applied later - select(PipelineSchema, max_date_subquery.c.run_or_created) - .where(PipelineSchema.id == max_date_subquery.c.id) - .order_by(sort_clause(max_date_subquery.c.run_or_created)) - # We always add the `id` column as a tiebreaker to ensure a - # stable, repeatable order of items, otherwise subsequent - # pages might contain the same items. - .order_by(col(PipelineSchema.id)) - ) - - def _custom_conversion(row: Any) -> PipelineResponse: - return cast( - PipelineResponse, - row[0].to_model( - include_metadata=hydrate, include_resources=True - ), - ) - with Session(self.engine) as session: + query = select(PipelineSchema) return self.filter_and_paginate( session=session, query=query, table=PipelineSchema, filter_model=pipeline_filter_model, hydrate=hydrate, - custom_schema_to_model_conversion=_custom_conversion, ) def count_pipelines(self, filter_model: Optional[PipelineFilter]) -> int: From 87cf23d355ca19b030148a364f4c8b97466cb6f8 Mon Sep 17 00:00:00 2001 From: Stefan Nica Date: Fri, 13 Dec 2024 09:28:12 +0100 Subject: [PATCH 13/18] Fix step name argument in hyperparameter tuning code example (#3259) --- .../build-pipelines/hyper-parameter-tuning.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/book/how-to/pipeline-development/build-pipelines/hyper-parameter-tuning.md b/docs/book/how-to/pipeline-development/build-pipelines/hyper-parameter-tuning.md index 35ce3c93c2..49f8ae72a3 100644 --- a/docs/book/how-to/pipeline-development/build-pipelines/hyper-parameter-tuning.md +++ b/docs/book/how-to/pipeline-development/build-pipelines/hyper-parameter-tuning.md @@ -16,7 +16,7 @@ def my_pipeline(step_count: int) -> None: data = load_data_step() after = [] for i in range(step_count): - train_step(data, learning_rate=i * 0.0001, name=f"train_step_{i}") + train_step(data, learning_rate=i * 0.0001, id=f"train_step_{i}") after.append(f"train_step_{i}") model = select_model_step(..., after=after) ``` From 941dc81b23de6a978b68c83da962c4158938b478 Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Fri, 13 Dec 2024 12:41:14 +0100 Subject: [PATCH 14/18] Include user of latest run in pipeline response (#3262) * Include user of latest run in pipeline response * Linting * Fix optional check --- src/zenml/models/v2/core/pipeline.py | 6 +++++- src/zenml/zen_stores/schemas/pipeline_schemas.py | 5 +++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/zenml/models/v2/core/pipeline.py b/src/zenml/models/v2/core/pipeline.py index 03a81fbb23..199e9cce95 100644 --- a/src/zenml/models/v2/core/pipeline.py +++ b/src/zenml/models/v2/core/pipeline.py @@ -44,7 +44,7 @@ from zenml.models.v2.core.tag import TagResponse if TYPE_CHECKING: - from zenml.models.v2.core.pipeline_run import PipelineRunResponse + from zenml.models import PipelineRunResponse, UserResponse from zenml.zen_stores.schemas import BaseSchema AnySchema = TypeVar("AnySchema", bound=BaseSchema) @@ -119,6 +119,10 @@ class PipelineResponseMetadata(WorkspaceScopedResponseMetadata): class PipelineResponseResources(WorkspaceScopedResponseResources): """Class for all resource models associated with the pipeline entity.""" + latest_run_user: Optional["UserResponse"] = Field( + default=None, + title="The user that created the latest run of this pipeline.", + ) tags: List[TagResponse] = Field( title="Tags associated with the pipeline.", ) diff --git a/src/zenml/zen_stores/schemas/pipeline_schemas.py b/src/zenml/zen_stores/schemas/pipeline_schemas.py index 1f287720ee..3719a64b20 100644 --- a/src/zenml/zen_stores/schemas/pipeline_schemas.py +++ b/src/zenml/zen_stores/schemas/pipeline_schemas.py @@ -156,7 +156,12 @@ def to_model( resources = None if include_resources: + latest_run_user = self.runs[-1].user if self.runs else None + resources = PipelineResponseResources( + latest_run_user=latest_run_user.to_model() + if latest_run_user + else None, tags=[t.tag.to_model() for t in self.tags], ) From d6fae4ef12536e1455c92a0f45882c0e4ad40a10 Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Fri, 13 Dec 2024 14:29:45 +0100 Subject: [PATCH 15/18] Create model versions server-side to avoid race conditions (#3254) * POC * Log model version if created * Fetch model version if it already exists * Test * Missing docstring * Refactor fetching model version into separate method * Remove FK cycle * Add DB migration * Cleanup * Move to computed property to solve alembic issue * mypy * Some cleanup * More tests and fixes * Typo * Move logs so they don't happen when creating the object * Always log model version * Mysql fix * Linting * More linting * Make columns non-nullable * Formatting * Fix some tests * Refactor * Fix comparison for mysql * Implement RBAC checks inside SQL ZenStore * Docstrings * Try to fix vertex test * Fix alembic order * Ignore generated columns in DB migration * Linting * Rework entire DB stuff to work with mariadb * Revert change to DB backup * Add missing null check * Try to fix vertex again --- src/zenml/model/model.py | 28 +- src/zenml/models/v2/core/model_version.py | 4 - src/zenml/orchestrators/step_launcher.py | 22 +- src/zenml/orchestrators/step_run_utils.py | 212 +---------- .../zen_server/rbac/rbac_sql_zen_store.py | 173 +++++++++ src/zenml/zen_stores/base_zen_store.py | 12 +- ..._add_model_version_producer_run_unique_.py | 68 ++++ src/zenml/zen_stores/schemas/model_schemas.py | 48 ++- .../schemas/pipeline_deployment_schemas.py | 14 +- src/zenml/zen_stores/sql_zen_store.py | 347 ++++++++++++++++-- .../functional/model/test_model_version.py | 12 +- .../pipelines/test_pipeline_context.py | 2 +- .../functional/steps/test_model_version.py | 148 +++++++- .../orchestrators/test_vertex_orchestrator.py | 36 +- tests/unit/model/test_model_version_init.py | 27 -- 15 files changed, 828 insertions(+), 325 deletions(-) create mode 100644 src/zenml/zen_server/rbac/rbac_sql_zen_store.py create mode 100644 src/zenml/zen_stores/migrations/versions/a1237ba94fd8_add_model_version_producer_run_unique_.py delete mode 100644 tests/unit/model/test_model_version_init.py diff --git a/src/zenml/model/model.py b/src/zenml/model/model.py index b7f3c59151..f7987fc1b5 100644 --- a/src/zenml/model/model.py +++ b/src/zenml/model/model.py @@ -509,22 +509,6 @@ def _root_validator(cls, data: Dict[str, Any]) -> Dict[str, Any]: raise ValueError( "`model_version_id` field is for internal use only" ) - - version = data.get("version", None) - - if ( - version in [stage.value for stage in ModelStages] - and not suppress_class_validation_warnings - ): - logger.info( - f"Version `{version}` matches one of the possible " - "`ModelStages` and will be fetched using stage." - ) - if str(version).isnumeric() and not suppress_class_validation_warnings: - logger.info( - f"`version` `{version}` is numeric and will be fetched " - "using version number." - ) data["suppress_class_validation_warnings"] = True return data @@ -603,6 +587,18 @@ def _get_model_version( hydrate=hydrate, ) else: + if self.version in ModelStages.values(): + logger.info( + f"Version `{self.version}` for model {self.name} matches " + "one of the possible `ModelStages` and will be fetched " + "using stage." + ) + if str(self.version).isnumeric(): + logger.info( + f"Version `{self.version}` for model {self.name} is " + "numeric and will be fetched using version number." + ) + mv = zenml_client.get_model_version( model_name_or_id=self.name, model_version_name_or_number_or_id=self.version, diff --git a/src/zenml/models/v2/core/model_version.py b/src/zenml/models/v2/core/model_version.py index 949d9ce1d1..80880f1e70 100644 --- a/src/zenml/models/v2/core/model_version.py +++ b/src/zenml/models/v2/core/model_version.py @@ -77,10 +77,6 @@ class ModelVersionRequest(WorkspaceScopedRequest): default=None, ) - number: Optional[int] = Field( - description="The number of the model version", - default=None, - ) model: UUID = Field( description="The ID of the model containing version", ) diff --git a/src/zenml/orchestrators/step_launcher.py b/src/zenml/orchestrators/step_launcher.py index 6db9c085a8..1141172bf3 100644 --- a/src/zenml/orchestrators/step_launcher.py +++ b/src/zenml/orchestrators/step_launcher.py @@ -179,12 +179,10 @@ def launch(self) -> None: pipeline_run_id=pipeline_run.id, pipeline_run_metadata=pipeline_run_metadata, ) - - pipeline_model_version, pipeline_run = ( - step_run_utils.prepare_pipeline_run_model_version( - pipeline_run - ) - ) + if model_version := pipeline_run.model_version: + step_run_utils.log_model_version_dashboard_url( + model_version=model_version + ) request_factory = step_run_utils.StepRunRequestFactory( deployment=self._deployment, @@ -209,12 +207,10 @@ def launch(self) -> None: step_run = Client().zen_store.create_run_step( step_run_request ) - - step_model_version, step_run = ( - step_run_utils.prepare_step_run_model_version( - step_run=step_run, pipeline_run=pipeline_run + if model_version := step_run.model_version: + step_run_utils.log_model_version_dashboard_url( + model_version=model_version ) - ) if not step_run.status.is_finished: logger.info(f"Step `{self._step_name}` has started.") @@ -289,8 +285,8 @@ def _bypass() -> None: f"Using cached version of step `{self._step_name}`." ) if ( - model_version := step_model_version - or pipeline_model_version + model_version := step_run.model_version + or pipeline_run.model_version ): step_run_utils.link_output_artifacts_to_model_version( artifacts=step_run.outputs, diff --git a/src/zenml/orchestrators/step_run_utils.py b/src/zenml/orchestrators/step_run_utils.py index e371b4c509..6451a4cc0a 100644 --- a/src/zenml/orchestrators/step_run_utils.py +++ b/src/zenml/orchestrators/step_run_utils.py @@ -14,7 +14,7 @@ """Utilities for creating step runs.""" from datetime import datetime -from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple +from typing import Dict, List, Optional, Set, Tuple from zenml.client import Client from zenml.config.step_configurations import Step @@ -24,21 +24,13 @@ from zenml.model.utils import link_artifact_version_to_model_version from zenml.models import ( ArtifactVersionResponse, - ModelVersionPipelineRunRequest, ModelVersionResponse, PipelineDeploymentResponse, PipelineRunResponse, - PipelineRunUpdate, StepRunRequest, - StepRunResponse, - StepRunUpdate, ) from zenml.orchestrators import cache_utils, input_utils, utils from zenml.stack import Stack -from zenml.utils import pagination_utils, string_utils - -if TYPE_CHECKING: - from zenml.model.model import Model logger = get_logger(__name__) @@ -293,10 +285,6 @@ def create_cached_step_runs( deployment=deployment, pipeline_run=pipeline_run, stack=stack ) - pipeline_model_version, pipeline_run = prepare_pipeline_run_model_version( - pipeline_run=pipeline_run - ) - while ( cache_candidates := find_cacheable_invocation_candidates( deployment=deployment, @@ -311,7 +299,9 @@ def create_cached_step_runs( # Make sure the request factory has the most up to date pipeline # run to avoid hydration calls - request_factory.pipeline_run = pipeline_run + request_factory.pipeline_run = Client().get_pipeline_run( + pipeline_run.id + ) try: step_run_request = request_factory.create_request( invocation_id @@ -336,15 +326,10 @@ def create_cached_step_runs( step_run = Client().zen_store.create_run_step(step_run_request) - # Refresh the pipeline run here to make sure we have the latest - # state - pipeline_run = Client().get_pipeline_run(pipeline_run.id) - - step_model_version, step_run = prepare_step_run_model_version( - step_run=step_run, pipeline_run=pipeline_run - ) - - if model_version := step_model_version or pipeline_model_version: + if ( + model_version := step_run.model_version + or pipeline_run.model_version + ): link_output_artifacts_to_model_version( artifacts=step_run.outputs, model_version=model_version, @@ -356,169 +341,6 @@ def create_cached_step_runs( return cached_invocations -def get_or_create_model_version_for_pipeline_run( - model: "Model", - pipeline_run: PipelineRunResponse, - substitutions: Dict[str, str], -) -> Tuple[ModelVersionResponse, bool]: - """Get or create a model version as part of a pipeline run. - - Args: - model: The model to get or create. - pipeline_run: The pipeline run for which the model should be created. - substitutions: Substitutions to apply to the model version name. - - Returns: - The model version and a boolean indicating whether it was newly created - or not. - """ - # Copy the model before modifying it so we don't accidently modify - # configurations in which the model object is potentially referenced - model = model.model_copy() - - if model.model_version_id: - return model._get_model_version(), False - elif model.version: - if isinstance(model.version, str): - model.version = string_utils.format_name_template( - model.version, - substitutions=substitutions, - ) - model.name = string_utils.format_name_template( - model.name, - substitutions=substitutions, - ) - - return ( - model._get_or_create_model_version(), - model._created_model_version, - ) - - # The model version should be created as part of this run - # -> We first check if it was already created as part of this run, and if - # not we do create it. If this is running in two parallel steps, we might - # run into issues that this will create two versions. Ideally, all model - # versions required for a pipeline run and its steps could be created - # server-side at run creation time before the first step starts. - if model_version := get_model_version_created_by_pipeline_run( - model_name=model.name, pipeline_run=pipeline_run - ): - return model_version, False - else: - return model._get_or_create_model_version(), True - - -def get_model_version_created_by_pipeline_run( - model_name: str, pipeline_run: PipelineRunResponse -) -> Optional[ModelVersionResponse]: - """Get a model version that was created by a specific pipeline run. - - This function does not refresh the pipeline run, so it will only try to - fetch the model version from existing steps if they're already part of the - response. - - Args: - model_name: The model name for which to get the version. - pipeline_run: The pipeline run for which to get the version. - - Returns: - A model version with the given name created by the run, or None if such - a model version does not exist. - """ - if pipeline_run.config.model and pipeline_run.model_version: - if ( - pipeline_run.config.model.name == model_name - and pipeline_run.config.model.version is None - ): - return pipeline_run.model_version - - # We fetch a list of hydrated step runs here in order to avoid hydration - # calls for each step separately. - candidate_step_runs = pagination_utils.depaginate( - Client().list_run_steps, - pipeline_run_id=pipeline_run.id, - model=model_name, - hydrate=True, - ) - for step_run in candidate_step_runs: - if step_run.config.model and step_run.model_version: - if ( - step_run.config.model.name == model_name - and step_run.config.model.version is None - ): - return step_run.model_version - - return None - - -def prepare_pipeline_run_model_version( - pipeline_run: PipelineRunResponse, -) -> Tuple[Optional[ModelVersionResponse], PipelineRunResponse]: - """Prepare the model version for a pipeline run. - - Args: - pipeline_run: The pipeline run for which to prepare the model version. - - Returns: - The prepared model version and the updated pipeline run. - """ - model_version = None - - if pipeline_run.model_version: - model_version = pipeline_run.model_version - elif config_model := pipeline_run.config.model: - model_version, _ = get_or_create_model_version_for_pipeline_run( - model=config_model, - pipeline_run=pipeline_run, - substitutions=pipeline_run.config.substitutions, - ) - pipeline_run = Client().zen_store.update_run( - run_id=pipeline_run.id, - run_update=PipelineRunUpdate(model_version_id=model_version.id), - ) - link_pipeline_run_to_model_version( - pipeline_run=pipeline_run, model_version=model_version - ) - log_model_version_dashboard_url(model_version) - - return model_version, pipeline_run - - -def prepare_step_run_model_version( - step_run: StepRunResponse, pipeline_run: PipelineRunResponse -) -> Tuple[Optional[ModelVersionResponse], StepRunResponse]: - """Prepare the model version for a step run. - - Args: - step_run: The step run for which to prepare the model version. - pipeline_run: The pipeline run of the step. - - Returns: - The prepared model version and the updated step run. - """ - model_version = None - - if step_run.model_version: - model_version = step_run.model_version - elif config_model := step_run.config.model: - model_version, created = get_or_create_model_version_for_pipeline_run( - model=config_model, - pipeline_run=pipeline_run, - substitutions=step_run.config.substitutions, - ) - step_run = Client().zen_store.update_run_step( - step_run_id=step_run.id, - step_run_update=StepRunUpdate(model_version_id=model_version.id), - ) - link_pipeline_run_to_model_version( - pipeline_run=pipeline_run, model_version=model_version - ) - if created: - log_model_version_dashboard_url(model_version) - - return model_version, step_run - - def log_model_version_dashboard_url( model_version: ModelVersionResponse, ) -> None: @@ -546,24 +368,6 @@ def log_model_version_dashboard_url( ) -def link_pipeline_run_to_model_version( - pipeline_run: PipelineRunResponse, model_version: ModelVersionResponse -) -> None: - """Link a pipeline run to a model version. - - Args: - pipeline_run: The pipeline run to link. - model_version: The model version to link. - """ - client = Client() - client.zen_store.create_model_version_pipeline_run_link( - ModelVersionPipelineRunRequest( - pipeline_run=pipeline_run.id, - model_version=model_version.id, - ) - ) - - def link_output_artifacts_to_model_version( artifacts: Dict[str, List[ArtifactVersionResponse]], model_version: ModelVersionResponse, diff --git a/src/zenml/zen_server/rbac/rbac_sql_zen_store.py b/src/zenml/zen_server/rbac/rbac_sql_zen_store.py new file mode 100644 index 0000000000..1d6082a9e7 --- /dev/null +++ b/src/zenml/zen_server/rbac/rbac_sql_zen_store.py @@ -0,0 +1,173 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""RBAC SQL Zen Store implementation.""" + +from typing import ( + Optional, + Tuple, +) +from uuid import UUID + +from zenml.logger import get_logger +from zenml.models import ( + ModelRequest, + ModelResponse, + ModelVersionRequest, + ModelVersionResponse, +) +from zenml.zen_server.feature_gate.endpoint_utils import ( + check_entitlement, + report_usage, +) +from zenml.zen_server.rbac.models import Action, ResourceType +from zenml.zen_server.rbac.utils import ( + verify_permission, + verify_permission_for_model, +) +from zenml.zen_stores.sql_zen_store import SqlZenStore + +logger = get_logger(__name__) + + +class RBACSqlZenStore(SqlZenStore): + """Wrapper around the SQLZenStore that implements RBAC functionality.""" + + def _get_or_create_model( + self, model_request: ModelRequest + ) -> Tuple[bool, ModelResponse]: + """Get or create a model. + + Args: + model_request: The model request. + + # noqa: DAR401 + Raises: + Exception: If the user is not allowed to create a model. + + Returns: + A boolean whether the model was created or not, and the model. + """ + allow_model_creation = True + error = None + + try: + verify_permission( + resource_type=ResourceType.MODEL, action=Action.CREATE + ) + check_entitlement(resource_type=ResourceType.MODEL) + except Exception as e: + allow_model_creation = False + error = e + + if allow_model_creation: + created, model_response = super()._get_or_create_model( + model_request + ) + else: + try: + model_response = self.get_model(model_request.name) + created = False + except KeyError: + # The model does not exist. We now raise the error that + # explains why the model could not be created, instead of just + # the KeyError that it doesn't exist + assert error + raise error from None + + if created: + report_usage( + resource_type=ResourceType.MODEL, resource_id=model_response.id + ) + else: + verify_permission_for_model(model_response, action=Action.READ) + + return created, model_response + + def _get_model_version( + self, + model_id: UUID, + version_name: Optional[str] = None, + producer_run_id: Optional[UUID] = None, + ) -> ModelVersionResponse: + """Get a model version. + + Args: + model_id: The ID of the model. + version_name: The name of the model version. + producer_run_id: The ID of the producer pipeline run. If this is + set, only numeric versions created as part of the pipeline run + will be returned. + + Returns: + The model version. + """ + model_version = super()._get_model_version( + model_id=model_id, + version_name=version_name, + producer_run_id=producer_run_id, + ) + verify_permission_for_model(model_version, action=Action.READ) + return model_version + + def _get_or_create_model_version( + self, + model_version_request: ModelVersionRequest, + producer_run_id: Optional[UUID] = None, + ) -> Tuple[bool, ModelVersionResponse]: + """Get or create a model version. + + Args: + model_version_request: The model version request. + producer_run_id: ID of the producer pipeline run. + + # noqa: DAR401 + Raises: + Exception: If the authenticated user is not allowed to + create a model version. + + Returns: + A boolean whether the model version was created or not, and the + model version. + """ + allow_creation = True + error = None + + try: + verify_permission( + resource_type=ResourceType.MODEL_VERSION, action=Action.CREATE + ) + except Exception as e: + allow_creation = False + error = e + + if allow_creation: + created, model_version_response = ( + super()._get_or_create_model_version(model_version_request, producer_run_id=producer_run_id) + ) + else: + try: + model_version_response = self._get_model_version( + model_id=model_version_request.model, + version_name=model_version_request.name, + producer_run_id=producer_run_id, + ) + created = False + except KeyError: + # The model version does not exist. We now raise the error that + # explains why the version could not be created, instead of just + # the KeyError that it doesn't exist + assert error + raise error from None + + return created, model_version_response diff --git a/src/zenml/zen_stores/base_zen_store.py b/src/zenml/zen_stores/base_zen_store.py index 210f6b8b1e..11467c4481 100644 --- a/src/zenml/zen_stores/base_zen_store.py +++ b/src/zenml/zen_stores/base_zen_store.py @@ -36,6 +36,7 @@ DEFAULT_STACK_AND_COMPONENT_NAME, DEFAULT_WORKSPACE_NAME, ENV_ZENML_DEFAULT_WORKSPACE_NAME, + ENV_ZENML_SERVER, IS_DEBUG_ENV, ) from zenml.enums import ( @@ -155,9 +156,16 @@ def get_store_class(store_type: StoreType) -> Type["BaseZenStore"]: TypeError: If the store type is unsupported. """ if store_type == StoreType.SQL: - from zenml.zen_stores.sql_zen_store import SqlZenStore + if os.environ.get(ENV_ZENML_SERVER): + from zenml.zen_server.rbac.rbac_sql_zen_store import ( + RBACSqlZenStore, + ) + + return RBACSqlZenStore + else: + from zenml.zen_stores.sql_zen_store import SqlZenStore - return SqlZenStore + return SqlZenStore elif store_type == StoreType.REST: from zenml.zen_stores.rest_zen_store import RestZenStore diff --git a/src/zenml/zen_stores/migrations/versions/a1237ba94fd8_add_model_version_producer_run_unique_.py b/src/zenml/zen_stores/migrations/versions/a1237ba94fd8_add_model_version_producer_run_unique_.py new file mode 100644 index 0000000000..007b5ddbb8 --- /dev/null +++ b/src/zenml/zen_stores/migrations/versions/a1237ba94fd8_add_model_version_producer_run_unique_.py @@ -0,0 +1,68 @@ +"""Add model version producer run unique constraint [a1237ba94fd8]. + +Revision ID: a1237ba94fd8 +Revises: 26351d482b9e +Create Date: 2024-12-13 10:28:55.432414 + +""" + +import sqlalchemy as sa +import sqlmodel +from alembic import op + +# revision identifiers, used by Alembic. +revision = "a1237ba94fd8" +down_revision = "26351d482b9e" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + """Upgrade database schema and/or data, creating a new revision.""" + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("model_version", schema=None) as batch_op: + batch_op.add_column( + sa.Column( + "producer_run_id_if_numeric", + sqlmodel.sql.sqltypes.GUID(), + nullable=True, + ) + ) + + # Set the producer_run_id_if_numeric column to the model version ID for + # existing rows + connection = op.get_bind() + metadata = sa.MetaData() + metadata.reflect(only=("model_version",), bind=connection) + model_version_table = sa.Table("model_version", metadata) + + connection.execute( + model_version_table.update().values( + producer_run_id_if_numeric=model_version_table.c.id + ) + ) + + with op.batch_alter_table("model_version", schema=None) as batch_op: + batch_op.alter_column( + "producer_run_id_if_numeric", + existing_type=sqlmodel.sql.sqltypes.GUID(), + nullable=False, + ) + batch_op.create_unique_constraint( + "unique_numeric_version_for_pipeline_run", + ["model_id", "producer_run_id_if_numeric"], + ) + + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade database schema and/or data back to the previous revision.""" + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table("model_version", schema=None) as batch_op: + batch_op.drop_constraint( + "unique_numeric_version_for_pipeline_run", type_="unique" + ) + batch_op.drop_column("producer_run_id_if_numeric") + + # ### end Alembic commands ### diff --git a/src/zenml/zen_stores/schemas/model_schemas.py b/src/zenml/zen_stores/schemas/model_schemas.py index feb4a93dc8..41c186c75c 100644 --- a/src/zenml/zen_stores/schemas/model_schemas.py +++ b/src/zenml/zen_stores/schemas/model_schemas.py @@ -15,10 +15,16 @@ from datetime import datetime from typing import TYPE_CHECKING, Any, Dict, List, Optional, cast -from uuid import UUID +from uuid import UUID, uuid4 from pydantic import ConfigDict -from sqlalchemy import BOOLEAN, INTEGER, TEXT, Column, UniqueConstraint +from sqlalchemy import ( + BOOLEAN, + INTEGER, + TEXT, + Column, + UniqueConstraint, +) from sqlmodel import Field, Relationship from zenml.enums import ( @@ -228,11 +234,13 @@ class ModelVersionSchema(NamedSchema, RunMetadataInterface, table=True): __tablename__ = MODEL_VERSION_TABLENAME __table_args__ = ( - # We need two unique constraints here: + # We need three unique constraints here: # - The first to ensure that each model version for a # model has a unique version number # - The second one to ensure that explicit names given by # users are unique + # - The third one to ensure that a pipeline run only produces a single + # auto-incremented version per model UniqueConstraint( "number", "model_id", @@ -243,6 +251,11 @@ class ModelVersionSchema(NamedSchema, RunMetadataInterface, table=True): "model_id", name="unique_version_for_model_id", ), + UniqueConstraint( + "model_id", + "producer_run_id_if_numeric", + name="unique_numeric_version_for_pipeline_run", + ), ) workspace_id: UUID = build_foreign_key_field( @@ -312,12 +325,23 @@ class ModelVersionSchema(NamedSchema, RunMetadataInterface, table=True): ), ) pipeline_runs: List["PipelineRunSchema"] = Relationship( - back_populates="model_version" + back_populates="model_version", ) step_runs: List["StepRunSchema"] = Relationship( back_populates="model_version" ) + # We want to make sure each pipeline run only creates a single numeric + # version for each model. To solve this, we need to add a unique constraint. + # If a value of a unique constraint is NULL it is ignored and the + # remaining values in the unique constraint have to be unique. In + # our case however, we only want the unique constraint applied in + # case there is a producer run and only for numeric versions. To solve this, + # we fall back to the model version ID (which is the primary key and + # therefore unique) in case there is no producer run or the version is not + # numeric. + producer_run_id_if_numeric: UUID + # TODO: In Pydantic v2, the `model_` is a protected namespaces for all # fields defined under base models. If not handled, this raises a warning. # It is possible to suppress this warning message with the following @@ -328,24 +352,36 @@ class ModelVersionSchema(NamedSchema, RunMetadataInterface, table=True): @classmethod def from_request( - cls, model_version_request: ModelVersionRequest + cls, + model_version_request: ModelVersionRequest, + model_version_number: int, + producer_run_id: Optional[UUID] = None, ) -> "ModelVersionSchema": """Convert an `ModelVersionRequest` to an `ModelVersionSchema`. Args: model_version_request: The request model version to convert. + model_version_number: The model version number. + producer_run_id: The ID of the producer run. Returns: The converted schema. """ + id_ = uuid4() + is_numeric = str(model_version_number) == model_version_request.name + return cls( + id=id_, workspace_id=model_version_request.workspace, user_id=model_version_request.user, model_id=model_version_request.model, name=model_version_request.name, - number=model_version_request.number, + number=model_version_number, description=model_version_request.description, stage=model_version_request.stage, + producer_run_id_if_numeric=producer_run_id + if (producer_run_id and is_numeric) + else id_, ) def to_model( diff --git a/src/zenml/zen_stores/schemas/pipeline_deployment_schemas.py b/src/zenml/zen_stores/schemas/pipeline_deployment_schemas.py index ae2fe609bc..409bd2eebf 100644 --- a/src/zenml/zen_stores/schemas/pipeline_deployment_schemas.py +++ b/src/zenml/zen_stores/schemas/pipeline_deployment_schemas.py @@ -228,13 +228,6 @@ def to_model( Returns: The created `PipelineDeploymentResponse`. """ - pipeline_configuration = PipelineConfiguration.model_validate_json( - self.pipeline_configuration - ) - step_configurations = json.loads(self.step_configurations) - for s, c in step_configurations.items(): - step_configurations[s] = Step.model_validate(c) - body = PipelineDeploymentResponseBody( user=self.user.to_model() if self.user else None, created=self.created, @@ -242,6 +235,13 @@ def to_model( ) metadata = None if include_metadata: + pipeline_configuration = PipelineConfiguration.model_validate_json( + self.pipeline_configuration + ) + step_configurations = json.loads(self.step_configurations) + for s, c in step_configurations.items(): + step_configurations[s] = Step.model_validate(c) + metadata = PipelineDeploymentResponseMetadata( workspace=self.workspace.to_model(), run_name_template=self.run_name_template, diff --git a/src/zenml/zen_stores/sql_zen_store.py b/src/zenml/zen_stores/sql_zen_store.py index ce20d6687f..19bdda8b28 100644 --- a/src/zenml/zen_stores/sql_zen_store.py +++ b/src/zenml/zen_stores/sql_zen_store.py @@ -71,6 +71,7 @@ col, create_engine, delete, + desc, or_, select, ) @@ -296,7 +297,11 @@ replace_localhost_with_internal_hostname, ) from zenml.utils.pydantic_utils import before_validator_handler -from zenml.utils.string_utils import random_str, validate_name +from zenml.utils.string_utils import ( + format_name_template, + random_str, + validate_name, +) from zenml.zen_stores import template_utils from zenml.zen_stores.base_zen_store import ( BaseZenStore, @@ -5154,6 +5159,20 @@ def create_run( "already exists." ) + if model_version_id := self._get_or_create_model_version_for_run( + new_run + ): + new_run.model_version_id = model_version_id + session.add(new_run) + session.commit() + + self.create_model_version_pipeline_run_link( + ModelVersionPipelineRunRequest( + model_version=model_version_id, pipeline_run=new_run.id + ) + ) + session.refresh(new_run) + return new_run.to_model( include_metadata=True, include_resources=True ) @@ -8216,6 +8235,21 @@ def create_run_step(self, step_run: StepRunRequest) -> StepRunResponse: session.commit() session.refresh(step_schema) + if model_version_id := self._get_or_create_model_version_for_run( + step_schema + ): + step_schema.model_version_id = model_version_id + session.add(step_schema) + session.commit() + + self.create_model_version_pipeline_run_link( + ModelVersionPipelineRunRequest( + model_version=model_version_id, + pipeline_run=step_schema.pipeline_run_id, + ) + ) + session.refresh(step_schema) + return step_schema.to_model( include_metadata=True, include_resources=True ) @@ -10218,6 +10252,22 @@ def update_model( # ----------------------------- Model Versions ----------------------------- + def _get_or_create_model( + self, model_request: ModelRequest + ) -> Tuple[bool, ModelResponse]: + """Get or create a model. + + Args: + model_request: The model request. + + Returns: + A boolean whether the model was created or not, and the model. + """ + try: + return True, self.create_model(model_request) + except EntityExistsError: + return False, self.get_model(model_request.name) + def _get_next_numeric_version_for_model( self, session: Session, model_id: UUID ) -> int: @@ -10242,55 +10292,276 @@ def _get_next_numeric_version_for_model( else: return int(current_max_version) + 1 - def _model_version_exists(self, model_id: UUID, version: str) -> bool: + def _model_version_exists( + self, + model_id: UUID, + version: Optional[str] = None, + producer_run_id: Optional[UUID] = None, + ) -> bool: """Check if a model version with a certain version exists. Args: model_id: The model ID of the version. version: The version name. + producer_run_id: The producer run ID. If given, checks if a numeric + version for the producer run exists. Returns: - If a model version with the given version name exists. + If a model version for the given arguments exists. """ + query = select(ModelVersionSchema.id).where( + ModelVersionSchema.model_id == model_id + ) + + if version: + query = query.where(ModelVersionSchema.name == version) + + if producer_run_id: + query = query.where( + ModelVersionSchema.producer_run_id_if_numeric + == producer_run_id, + ) + with Session(self.engine) as session: - return ( - session.exec( - select(ModelVersionSchema.id) - .where(ModelVersionSchema.model_id == model_id) - .where(ModelVersionSchema.name == version) - ).first() - is not None + return session.exec(query).first() is not None + + def _get_model_version( + self, + model_id: UUID, + version_name: Optional[str] = None, + producer_run_id: Optional[UUID] = None, + ) -> ModelVersionResponse: + """Get a model version. + + Args: + model_id: The ID of the model. + version_name: The name of the model version. + producer_run_id: The ID of the producer pipeline run. If this is + set, only numeric versions created as part of the pipeline run + will be returned. + + Raises: + ValueError: If no version name or producer run ID was provided. + KeyError: If no model version was found. + + Returns: + The model version. + """ + query = select(ModelVersionSchema).where( + ModelVersionSchema.model_id == model_id + ) + + if version_name: + if version_name.isnumeric(): + query = query.where( + ModelVersionSchema.number == int(version_name) + ) + error_text = ( + f"No version with number {version_name} found " + f"for model {model_id}." + ) + elif version_name in ModelStages.values(): + if version_name == ModelStages.LATEST: + query = query.order_by( + desc(col(ModelVersionSchema.number)) + ).limit(1) + else: + query = query.where( + ModelVersionSchema.stage == version_name + ) + error_text = ( + f"No {version_name} stage version found for " + f"model {model_id}." + ) + else: + query = query.where(ModelVersionSchema.name == version_name) + error_text = ( + f"No {version_name} version found for model {model_id}." + ) + + elif producer_run_id: + query = query.where( + ModelVersionSchema.producer_run_id_if_numeric + == producer_run_id, + ) + error_text = ( + f"No numeric model version found for model {model_id} " + f"and producer run {producer_run_id}." + ) + else: + raise ValueError( + "Version name or producer run id need to be specified." ) - @track_decorator(AnalyticsEvent.CREATED_MODEL_VERSION) - def create_model_version( - self, model_version: ModelVersionRequest + with Session(self.engine) as session: + schema = session.exec(query).one_or_none() + + if not schema: + raise KeyError(error_text) + + return schema.to_model( + include_metadata=True, include_resources=True + ) + + def _get_or_create_model_version( + self, + model_version_request: ModelVersionRequest, + producer_run_id: Optional[UUID] = None, + ) -> Tuple[bool, ModelVersionResponse]: + """Get or create a model version. + + Args: + model_version_request: The model version request. + producer_run_id: ID of the producer pipeline run. + + Raises: + EntityCreationError: If the model version creation failed. + + Returns: + A boolean whether the model version was created or not, and the + model version. + """ + try: + model_version = self._create_model_version( + model_version=model_version_request, + producer_run_id=producer_run_id, + ) + track(event=AnalyticsEvent.CREATED_MODEL_VERSION) + return True, model_version + except EntityCreationError: + # Need to explicitly re-raise this here as otherwise the catching + # of the RuntimeError would include this + raise + except RuntimeError: + return False, self._get_model_version( + model_id=model_version_request.model, + producer_run_id=producer_run_id, + ) + except EntityExistsError: + return False, self._get_model_version( + model_id=model_version_request.model, + version_name=model_version_request.name, + ) + + def _get_or_create_model_version_for_run( + self, pipeline_or_step_run: Union[PipelineRunSchema, StepRunSchema] + ) -> Optional[UUID]: + """Get or create a model version for a pipeline or step run. + + Args: + pipeline_or_step_run: The pipeline or step run for which to create + the model version. + + Returns: + The model version. + """ + if isinstance(pipeline_or_step_run, PipelineRunSchema): + producer_run_id = pipeline_or_step_run.id + pipeline_run = pipeline_or_step_run.to_model(include_metadata=True) + configured_model = pipeline_run.config.model + substitutions = pipeline_run.config.substitutions + else: + producer_run_id = pipeline_or_step_run.pipeline_run_id + step_run = pipeline_or_step_run.to_model(include_metadata=True) + configured_model = step_run.config.model + substitutions = step_run.config.substitutions + + if not configured_model: + return None + + model_request = ModelRequest( + name=format_name_template( + configured_model.name, substitutions=substitutions + ), + license=configured_model.license, + description=configured_model.description, + audience=configured_model.audience, + use_cases=configured_model.use_cases, + limitations=configured_model.limitations, + trade_offs=configured_model.trade_offs, + ethics=configured_model.ethics, + save_models_to_registry=configured_model.save_models_to_registry, + user=pipeline_or_step_run.user_id, + workspace=pipeline_or_step_run.workspace_id, + ) + + _, model_response = self._get_or_create_model( + model_request=model_request + ) + + version_name = None + if configured_model.version is not None: + version_name = format_name_template( + str(configured_model.version), substitutions=substitutions + ) + + # If the model version was specified to be a numeric version or + # stage we don't try to create it (which will fail because it is not + # allowed) but try to fetch it immediately + if ( + version_name.isnumeric() + or version_name in ModelStages.values() + ): + return self._get_model_version( + model_id=model_response.id, version_name=version_name + ).id + + model_version_request = ModelVersionRequest( + model=model_response.id, + name=version_name, + description=configured_model.description, + tags=configured_model.tags, + user=pipeline_or_step_run.user_id, + workspace=pipeline_or_step_run.workspace_id, + ) + + _, model_version_response = self._get_or_create_model_version( + model_version_request=model_version_request, + producer_run_id=producer_run_id, + ) + return model_version_response.id + + def _create_model_version( + self, + model_version: ModelVersionRequest, + producer_run_id: Optional[UUID] = None, ) -> ModelVersionResponse: """Creates a new model version. Args: model_version: the Model Version to be created. + producer_run_id: ID of the pipeline run that produced this model + version. Returns: The newly created model version. Raises: - ValueError: If `number` is not None during model version creation. + ValueError: If the requested version name is invalid. EntityExistsError: If a model version with the given name already exists. EntityCreationError: If the model version creation failed. + RuntimeError: If an auto-incremented model version already exists + for the producer run. """ - if model_version.number is not None: - raise ValueError( - "`number` field must be None during model version creation." - ) + has_custom_name = False + if model_version.name: + has_custom_name = True + validate_name(model_version) - model = self.get_model(model_version.model) + if model_version.name.isnumeric(): + raise ValueError( + "Can't create model version with custom numeric model " + "version name." + ) - has_custom_name = model_version.name is not None - if has_custom_name: - validate_name(model_version) + if str(model_version.name).lower() in ModelStages.values(): + raise ValueError( + "Can't create model version with a name that is used as a " + f"model version stage ({ModelStages.values()})." + ) + model = self.get_model(model_version.model) model_version_id = None remaining_tries = MAX_RETRIES_FOR_VERSIONED_ENTITY_CREATION @@ -10298,17 +10569,19 @@ def create_model_version( remaining_tries -= 1 try: with Session(self.engine) as session: - model_version.number = ( + model_version_number = ( self._get_next_numeric_version_for_model( session=session, model_id=model.id, ) ) if not has_custom_name: - model_version.name = str(model_version.number) + model_version.name = str(model_version_number) model_version_schema = ModelVersionSchema.from_request( - model_version + model_version, + model_version_number=model_version_number, + producer_run_id=producer_run_id, ) session.add(model_version_schema) session.commit() @@ -10329,6 +10602,13 @@ def create_model_version( f"{model_version.name}): A model with the " "same name and version already exists." ) + elif producer_run_id and self._model_version_exists( + model_id=model.id, producer_run_id=producer_run_id + ): + raise RuntimeError( + "Auto-incremented model version already exists for " + f"producer run {producer_run_id}." + ) elif remaining_tries == 0: raise EntityCreationError( f"Failed to create version for model " @@ -10347,10 +10627,9 @@ def create_model_version( ) logger.debug( "Failed to create model version %s " - "(version %s) due to an integrity error. " + "due to an integrity error. " "Retrying in %f seconds.", model.name, - model_version.number, sleep_duration, ) time.sleep(sleep_duration) @@ -10365,6 +10644,20 @@ def create_model_version( return self.get_model_version(model_version_id) + @track_decorator(AnalyticsEvent.CREATED_MODEL_VERSION) + def create_model_version( + self, model_version: ModelVersionRequest + ) -> ModelVersionResponse: + """Creates a new model version. + + Args: + model_version: the Model Version to be created. + + Returns: + The newly created model version. + """ + return self._create_model_version(model_version=model_version) + def get_model_version( self, model_version_id: UUID, hydrate: bool = True ) -> ModelVersionResponse: diff --git a/tests/integration/functional/model/test_model_version.py b/tests/integration/functional/model/test_model_version.py index d16b9dc31b..9d91e1eb76 100644 --- a/tests/integration/functional/model/test_model_version.py +++ b/tests/integration/functional/model/test_model_version.py @@ -268,13 +268,11 @@ def test_model_fetch_model_and_version_latest(self): def test_init_stage_logic(self): """Test that if version is set to string contained in ModelStages user is informed about it.""" with ModelContext(create_model=False) as (mdl_name, _, _): - with mock.patch("zenml.model.model.logger.info") as logger: - mv = Model( - name=mdl_name, - version=ModelStages.PRODUCTION.value, - ) - logger.assert_called_once() - assert mv.version == ModelStages.PRODUCTION.value + mv = Model( + name=mdl_name, + version=ModelStages.PRODUCTION.value, + ) + assert mv.version == ModelStages.PRODUCTION.value mv = Model(name=mdl_name, version=ModelStages.PRODUCTION) assert mv.version == ModelStages.PRODUCTION diff --git a/tests/integration/functional/pipelines/test_pipeline_context.py b/tests/integration/functional/pipelines/test_pipeline_context.py index f070ca0272..e3049d6cb5 100644 --- a/tests/integration/functional/pipelines/test_pipeline_context.py +++ b/tests/integration/functional/pipelines/test_pipeline_context.py @@ -93,7 +93,7 @@ def test_that_argument_as_get_artifact_of_model_in_pipeline_context_fails_if_not clean_client: "Client", ): producer_pipe(False) - with pytest.raises(RuntimeError): + with pytest.raises(KeyError): consumer_pipe() diff --git a/tests/integration/functional/steps/test_model_version.py b/tests/integration/functional/steps/test_model_version.py index 2100890bd8..3990494a7f 100644 --- a/tests/integration/functional/steps/test_model_version.py +++ b/tests/integration/functional/steps/test_model_version.py @@ -22,7 +22,7 @@ from zenml import get_pipeline_context, get_step_context, pipeline, step from zenml.artifacts.artifact_config import ArtifactConfig from zenml.client import Client -from zenml.enums import ModelStages +from zenml.enums import ExecutionStatus, ModelStages from zenml.model.model import Model @@ -571,7 +571,7 @@ def _inner_pipeline(): # this will run all steps, including one requesting new version run_1 = f"run_{uuid4()}" # model is configured with latest stage, so a warm-up needed - with pytest.raises(RuntimeError): + with pytest.raises(KeyError): _inner_pipeline.with_options(run_name=run_1)() run_2 = f"run_{uuid4()}" Model(name="step")._get_or_create_model_version() @@ -812,3 +812,147 @@ def _inner_pipeline(): assert "{time}" not in versions[1].version assert len(versions[1]._get_model_version().data_artifact_ids["data"]) == 2 assert versions[1].version != first_version_name + + +@step +def noop() -> None: + pass + + +def test_model_version_creation(clean_client: "Client"): + """Tests that model versions get created correctly for a pipeline run.""" + shared_model_name = random_resource_name() + custom_model_name = random_resource_name() + + @pipeline(model=Model(name=shared_model_name), enable_cache=False) + def _inner_pipeline(): + noop.with_options(model=Model(name=shared_model_name))(id="shared") + noop.with_options( + model=Model(name=shared_model_name, version="custom") + )(id="custom_version") + noop.with_options(model=Model(name=custom_model_name))( + id="custom_model" + ) + + run_1 = _inner_pipeline() + shared_versions = clean_client.list_model_versions(shared_model_name) + assert len(shared_versions) == 2 + implicit_version = shared_versions[-2] + explicit_version = shared_versions[-1] + + custom_versions = clean_client.list_model_versions(custom_model_name) + assert len(custom_versions) == 1 + custom_version = custom_versions[-1] + + assert run_1.model_version_id == implicit_version.id + for name, step_ in run_1.steps.items(): + if name == "shared": + assert step_.model_version_id == implicit_version.id + elif name == "custom_version": + assert step_.model_version_id == explicit_version.id + else: + assert step_.model_version_id == custom_version.id + links = clean_client.list_model_version_pipeline_run_links( + pipeline_run_id=run_1.id + ) + assert len(links) == 3 + + run_2 = _inner_pipeline() + shared_versions = clean_client.list_model_versions(shared_model_name) + assert len(shared_versions) == 3 + implicit_version = shared_versions[-1] + explicit_version = shared_versions[-2] + + custom_versions = clean_client.list_model_versions(custom_model_name) + assert len(custom_versions) == 2 + custom_version = custom_versions[-1] + + assert run_2.model_version_id == implicit_version.id + for name, step_ in run_2.steps.items(): + if name == "shared": + assert step_.model_version_id == implicit_version.id + elif name == "custom_version": + assert step_.model_version_id == explicit_version.id + else: + assert step_.model_version_id == custom_version.id + links = clean_client.list_model_version_pipeline_run_links( + pipeline_run_id=run_2.id + ) + assert len(links) == 3 + + # Run with caching enabled to see if everything still works + run_3 = _inner_pipeline.with_options(enable_cache=True)() + shared_versions = clean_client.list_model_versions(shared_model_name) + assert len(shared_versions) == 4 + implicit_version = shared_versions[-1] + explicit_version = shared_versions[-3] + + custom_versions = clean_client.list_model_versions(custom_model_name) + assert len(custom_versions) == 3 + custom_version = custom_versions[-1] + + assert run_3.model_version_id == implicit_version.id + for name, step_ in run_3.steps.items(): + assert step_.status == ExecutionStatus.CACHED + + if name == "shared": + assert step_.model_version_id == implicit_version.id + elif name == "custom_version": + assert step_.model_version_id == explicit_version.id + else: + assert step_.model_version_id == custom_version.id + links = clean_client.list_model_version_pipeline_run_links( + pipeline_run_id=run_3.id + ) + assert len(links) == 3 + + +def test_model_version_fetching_by_stage(clean_client: "Client"): + """Tests that model versions can be fetched by number or stage.""" + model_name = random_resource_name() + + @pipeline(model=Model(name=model_name), enable_cache=False) + def _creator_pipeline(): + noop() + + @pipeline(model=Model(name=model_name, version=1), enable_cache=False) + def _fetch_by_version_number_pipeline(): + noop() + + @pipeline( + model=Model(name=model_name, version="latest"), enable_cache=False + ) + def _fetch_latest_version_pipeline(): + noop() + + @pipeline( + model=Model(name=model_name, version="production"), enable_cache=False + ) + def _fetch_prod_version_pipeline(): + noop() + + with pytest.raises(KeyError): + _fetch_by_version_number_pipeline() + + with pytest.raises(KeyError): + _fetch_latest_version_pipeline() + + with pytest.raises(KeyError): + _fetch_prod_version_pipeline() + + _creator_pipeline() + _creator_pipeline() + + versions = clean_client.list_model_versions(model_name) + assert len(versions) == 2 + mv_1, mv_2 = versions + mv_1.set_stage("production") + + run = _fetch_by_version_number_pipeline() + assert run.model_version_id == mv_1.id + + run = _fetch_latest_version_pipeline() + assert run.model_version_id == mv_2.id + + run = _fetch_prod_version_pipeline() + assert run.model_version_id == mv_1.id diff --git a/tests/integration/integrations/gcp/orchestrators/test_vertex_orchestrator.py b/tests/integration/integrations/gcp/orchestrators/test_vertex_orchestrator.py index 6ffde7bdda..8e5c41f0d3 100644 --- a/tests/integration/integrations/gcp/orchestrators/test_vertex_orchestrator.py +++ b/tests/integration/integrations/gcp/orchestrators/test_vertex_orchestrator.py @@ -141,9 +141,13 @@ def test_vertex_orchestrator_stack_validation( {"cpu_limit": "4", "gpu_limit": 4, "memory_limit": "1G"}, { "accelerator": { + "count": "1", + "type": "NVIDIA_TESLA_K80", "resourceCount": "1", "resourceType": "NVIDIA_TESLA_K80", }, + "cpuLimit": 1.0, + "memoryLimit": 1.0, "resourceCpuLimit": "1.0", "resourceMemoryLimit": "1G", }, @@ -154,9 +158,13 @@ def test_vertex_orchestrator_stack_validation( {"cpu_limit": "1.0", "gpu_limit": 1, "memory_limit": "1G"}, { "accelerator": { + "count": "1", + "type": "NVIDIA_TESLA_K80", "resourceCount": "1", "resourceType": "NVIDIA_TESLA_K80", }, + "cpuLimit": 1.0, + "memoryLimit": 1.0, "resourceCpuLimit": "1.0", "resourceMemoryLimit": "1G", }, @@ -166,6 +174,8 @@ def test_vertex_orchestrator_stack_validation( ResourceSettings(cpu_count=1, gpu_count=None, memory="1GB"), {"cpu_limit": None, "gpu_limit": None, "memory_limit": None}, { + "cpuLimit": 1.0, + "memoryLimit": 1.0, "resourceCpuLimit": "1.0", "resourceMemoryLimit": "1G", }, @@ -174,7 +184,12 @@ def test_vertex_orchestrator_stack_validation( ( ResourceSettings(cpu_count=1, gpu_count=0, memory="1GB"), {"cpu_limit": None, "gpu_limit": None, "memory_limit": None}, - {"resourceCpuLimit": "1.0", "resourceMemoryLimit": "1G"}, + { + "cpuLimit": 1.0, + "memoryLimit": 1.0, + "resourceCpuLimit": "1.0", + "resourceMemoryLimit": "1G", + }, ), ], ) @@ -233,13 +248,16 @@ def _build_kfp_pipeline() -> None: job_spec = pipeline_json["deploymentSpec"]["executors"][ f"exec-{step_name}" ]["container"] + if "accelerator" in job_spec["resources"]: - if "count" in job_spec["resources"]["accelerator"]: - expected_resources["accelerator"]["count"] = expected_resources[ - "accelerator" - ]["resourceCount"] - if "type" in job_spec["resources"]["accelerator"]: - expected_resources["accelerator"]["type"] = expected_resources[ - "accelerator" - ]["resourceType"] + if "resourceCount" not in job_spec["resources"]["accelerator"]: + expected_resources["accelerator"].pop("resourceCount", None) + if "resourceType" not in job_spec["resources"]["accelerator"]: + expected_resources["accelerator"].pop("resourceType", None) + + if "resourceCpuLimit" not in job_spec["resources"]: + expected_resources.pop("resourceCpuLimit", None) + if "resourceMemoryLimit" not in job_spec["resources"]: + expected_resources.pop("resourceMemoryLimit", None) + assert job_spec["resources"] == expected_resources diff --git a/tests/unit/model/test_model_version_init.py b/tests/unit/model/test_model_version_init.py deleted file mode 100644 index 21009b96d5..0000000000 --- a/tests/unit/model/test_model_version_init.py +++ /dev/null @@ -1,27 +0,0 @@ -from unittest.mock import patch - -import pytest - -from zenml.model.model import Model - - -@pytest.mark.parametrize( - "version_name,logger", - [ - ["staging", "info"], - ["1", "info"], - [1, "info"], - ], - ids=[ - "Pick model by text stage", - "Pick model by text version number", - "Pick model by integer version number", - ], -) -def test_init_warns(version_name, logger): - with patch(f"zenml.model.model.logger.{logger}") as logger: - Model( - name="foo", - version=version_name, - ) - logger.assert_called_once() From 384cb8b936f2ec08605cfb321365cf425346a411 Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Fri, 13 Dec 2024 16:35:22 +0100 Subject: [PATCH 16/18] Fix request model validation (#3245) * Fix request model validation error * Don't throw value error for invalid uuids * Docstring * Fix tests * Handle invalid uuid values --- src/zenml/models/v2/base/filter.py | 36 ++++++++++--------------- src/zenml/zen_server/utils.py | 7 ++--- tests/unit/models/test_filter_models.py | 12 +-------- 3 files changed, 19 insertions(+), 36 deletions(-) diff --git a/src/zenml/models/v2/base/filter.py b/src/zenml/models/v2/base/filter.py index 1b79696134..d2aa8380be 100644 --- a/src/zenml/models/v2/base/filter.py +++ b/src/zenml/models/v2/base/filter.py @@ -113,7 +113,7 @@ def validate_operation(cls, value: Any) -> Any: def generate_query_conditions( self, table: Type[SQLModel], - ) -> Union["ColumnElement[bool]"]: + ) -> "ColumnElement[bool]": """Generate the query conditions for the database. This method converts the Filter class into an appropriate SQLModel @@ -291,11 +291,19 @@ def generate_query_conditions_from_column(self, column: Any) -> Any: import sqlalchemy from sqlalchemy_utils.functions import cast_if + from zenml.utils import uuid_utils + # For equality checks, compare the UUID directly if self.operation == GenericFilterOps.EQUALS: + if not uuid_utils.is_valid_uuid(self.value): + return False + return column == self.value if self.operation == GenericFilterOps.NOT_EQUALS: + if not uuid_utils.is_valid_uuid(self.value): + return True + return column != self.value # For all other operations, cast and handle the column as string @@ -702,16 +710,10 @@ def generate_name_or_id_query_conditions( conditions = [] - try: - filter_ = FilterGenerator(table).define_filter( - column="id", value=value, operator=operator - ) - conditions.append(filter_.generate_query_conditions(table=table)) - except ValueError: - # UUID filter with equal operators and no full UUID fail with - # a ValueError. In this case, we already know that the filter - # will not produce any result and can simply ignore it. - pass + filter_ = FilterGenerator(table).define_filter( + column="id", value=value, operator=operator + ) + conditions.append(filter_.generate_query_conditions(table=table)) filter_ = FilterGenerator(table).define_filter( column="name", value=value, operator=operator @@ -1105,18 +1107,8 @@ def _define_uuid_filter( A Filter object. Raises: - ValueError: If the value is not a valid UUID. + ValueError: If the value for a oneof filter is not a list. """ - # For equality checks, ensure that the value is a valid UUID. - if operator == GenericFilterOps.EQUALS and not isinstance(value, UUID): - try: - UUID(value) - except ValueError as e: - raise ValueError( - "Invalid value passed as UUID query parameter." - ) from e - - # For equality checks, ensure that the value is a valid UUID. if operator == GenericFilterOps.ONEOF and not isinstance(value, list): raise ValueError(ONEOF_ERROR) diff --git a/src/zenml/zen_server/utils.py b/src/zenml/zen_server/utils.py index ff96c7a640..86414385ff 100644 --- a/src/zenml/zen_server/utils.py +++ b/src/zenml/zen_server/utils.py @@ -421,6 +421,8 @@ def f(model: Model = Depends(make_dependable(Model))): """ from fastapi import Query + from zenml.zen_server.exceptions import error_detail + def init_cls_and_handle_errors(*args: Any, **kwargs: Any) -> BaseModel: from fastapi import HTTPException @@ -428,9 +430,8 @@ def init_cls_and_handle_errors(*args: Any, **kwargs: Any) -> BaseModel: inspect.signature(init_cls_and_handle_errors).bind(*args, **kwargs) return cls(*args, **kwargs) except ValidationError as e: - for error in e.errors(): - error["loc"] = tuple(["query"] + list(error["loc"])) - raise HTTPException(422, detail=e.errors()) + detail = error_detail(e, exception_type=ValueError) + raise HTTPException(422, detail=detail) params = {v.name: v for v in inspect.signature(cls).parameters.values()} query_params = getattr(cls, "API_MULTI_INPUT_PARAMS", []) diff --git a/tests/unit/models/test_filter_models.py b/tests/unit/models/test_filter_models.py index 46b711bb7c..c0d69ea4d2 100644 --- a/tests/unit/models/test_filter_models.py +++ b/tests/unit/models/test_filter_models.py @@ -235,21 +235,11 @@ def test_uuid_filter_model(): ) -def test_uuid_filter_model_fails_for_invalid_uuids_on_equality(): - """Test filtering for equality with invalid UUID fails.""" - with pytest.raises(ValueError): - uuid_value = "a92k34" - SomeFilterModel(uuid_field=f"{GenericFilterOps.EQUALS}:{uuid_value}") - - def test_uuid_filter_model_succeeds_for_invalid_uuid_on_non_equality(): """Test filtering with other UUID operations is possible with non-UUIDs.""" filter_value = "a92k34" for filter_op in UUIDFilter.ALLOWED_OPS: - if ( - filter_op == GenericFilterOps.EQUALS - or filter_op == GenericFilterOps.ONEOF - ): + if filter_op == GenericFilterOps.ONEOF: continue filter_model = SomeFilterModel( uuid_field=f"{filter_op}:{filter_value}" From 2d8b3544a90130e6a791e94a4f4e3195559d1845 Mon Sep 17 00:00:00 2001 From: Alexej Penner Date: Fri, 20 Dec 2024 16:12:55 +0100 Subject: [PATCH 17/18] Improve docs to encourage using secrets (#3272) * Improved docs * Update docs/book/how-to/project-setup-and-management/setting-up-a-project-repository/connect-your-git-repository.md Co-authored-by: hyperlint-ai[bot] <154288675+hyperlint-ai[bot]@users.noreply.github.com> --------- Co-authored-by: hyperlint-ai[bot] <154288675+hyperlint-ai[bot]@users.noreply.github.com> --- .../connect-your-git-repository.md | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/docs/book/how-to/project-setup-and-management/setting-up-a-project-repository/connect-your-git-repository.md b/docs/book/how-to/project-setup-and-management/setting-up-a-project-repository/connect-your-git-repository.md index 063f316fef..d2e82e82a5 100644 --- a/docs/book/how-to/project-setup-and-management/setting-up-a-project-repository/connect-your-git-repository.md +++ b/docs/book/how-to/project-setup-and-management/setting-up-a-project-repository/connect-your-git-repository.md @@ -54,6 +54,21 @@ zenml code-repository register --type=github \ where \ is the name of the code repository you are registering, \ is the owner of the repository, \ is the name of the repository, \ is your GitHub Personal Access Token and \ is the URL of the GitHub instance which defaults to `https://github.com.` You will need to set a URL if you are using GitHub Enterprise. +{% hint style="warning" %} +Please refer to the section on using secrets for stack configuration in order to securely store your GitHub +Personal Access Token. + +```shell +# Using central secrets management +zenml secret create github_secret \ + --pa_token= + +# Then reference the username and password +zenml code-repository register ... --token={{github_secret.pa_token}} + ... +``` +{% endhint %} + After registering the GitHub code repository, ZenML will automatically detect if your source files are being tracked by GitHub and store the commit hash for each pipeline run.
@@ -96,6 +111,21 @@ zenml code-repository register --type=gitlab \ where `` is the name of the code repository you are registering, `` is the group of the project, `` is the name of the project, \ is your GitLab Personal Access Token, and \ is the URL of the GitLab instance which defaults to `https://gitlab.com.` You will need to set a URL if you have a self-hosted GitLab instance. +{% hint style="warning" %} +Please refer to the section on using secrets for stack configuration in order to securely store your GitLab +Personal Access Token. + +```shell +# Using central secrets management +zenml secret create gitlab_secret \ + --pa_token= + +# Then reference the username and password +zenml code-repository register ... --token={{gitlab_secret.pa_token}} + ... +``` +{% endhint %} + After registering the GitLab code repository, ZenML will automatically detect if your source files are being tracked by GitLab and store the commit hash for each pipeline run.
From 2b65e523d92a65147c02ef26e718d25448a76cb8 Mon Sep 17 00:00:00 2001 From: Michael Schuster Date: Thu, 2 Jan 2025 10:19:01 +0100 Subject: [PATCH 18/18] Include service connector requirements in custom flavor registration (#3267) --- src/zenml/client.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/zenml/client.py b/src/zenml/client.py index 0441db7b97..b3dddd3e1c 100644 --- a/src/zenml/client.py +++ b/src/zenml/client.py @@ -109,7 +109,6 @@ EventSourceResponse, EventSourceUpdate, FlavorFilter, - FlavorRequest, FlavorResponse, ModelFilter, ModelRequest, @@ -2201,17 +2200,8 @@ def create_flavor( "configuration class' docstring." ) - create_flavor_request = FlavorRequest( - source=source, - type=flavor.type, - name=flavor.name, - config_schema=flavor.config_schema, - integration="custom", - user=self.active_user.id, - workspace=self.active_workspace.id, - ) - - return self.zen_store.create_flavor(flavor=create_flavor_request) + flavor_request = flavor.to_model(integration="custom", is_custom=True) + return self.zen_store.create_flavor(flavor=flavor_request) def get_flavor( self,