diff --git a/.github/workflows/docs_summarization_submit.yml b/.github/workflows/docs_summarization_submit.yml new file mode 100644 index 00000000000..e0a0e01e435 --- /dev/null +++ b/.github/workflows/docs_summarization_submit.yml @@ -0,0 +1,59 @@ +--- +name: Summarize and Upload Docs +on: + push: + branches: [release/**] +jobs: + summarize-and-upload: + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install openai pathlib huggingface_hub + npm install -g repomix + - name: Generate repomix outputs + run: | + # Create directory for outputs + mkdir -p repomix-outputs + + # Full docs + repomix --include "docs/book/**/*.md" + mv repomix-output.txt repomix-outputs/llms-full.txt + + # Component guide + repomix --include "docs/book/component-guide/**/*.md" + mv repomix-output.txt repomix-outputs/component-guide.txt + + # User guide + repomix --include "docs/book/user-guide/**/*.md" + mv repomix-output.txt user-guide.txt + + # Getting started + repomix --include "docs/book/getting-started/**/*.md" + mv repomix-output.txt getting-started.txt + + # Merge user guide and getting started into basics + cat user-guide.txt getting-started.txt > repomix-outputs/basics.txt + rm user-guide.txt getting-started.txt + - name: Upload repomix outputs + uses: actions/upload-artifact@v4 + with: + name: repomix-outputs + path: repomix-outputs + retention-days: 5 + - name: Summarize and upload to HuggingFace + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + HF_TOKEN: ${{ secrets.HF_TOKEN }} + run: |- + python scripts/summarize_docs.py + python scripts/upload_to_huggingface.py diff --git a/examples/e2e/LICENSE b/examples/e2e/LICENSE index 75d01fb4544..909e24e117c 100644 --- a/examples/e2e/LICENSE +++ b/examples/e2e/LICENSE @@ -1,6 +1,6 @@ Apache Software License 2.0 -Copyright (c) ZenML GmbH 2024. All rights reserved. +Copyright (c) ZenML GmbH 2025. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/examples/e2e/configs/deployer_config.yaml b/examples/e2e/configs/deployer_config.yaml index 69111971d86..c1a394c45e9 100644 --- a/examples/e2e/configs/deployer_config.yaml +++ b/examples/e2e/configs/deployer_config.yaml @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/configs/inference_config.yaml b/examples/e2e/configs/inference_config.yaml index 69111971d86..c1a394c45e9 100644 --- a/examples/e2e/configs/inference_config.yaml +++ b/examples/e2e/configs/inference_config.yaml @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/configs/train_config.yaml b/examples/e2e/configs/train_config.yaml index f486cd11c69..d55160d0f8f 100644 --- a/examples/e2e/configs/train_config.yaml +++ b/examples/e2e/configs/train_config.yaml @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/pipelines/__init__.py b/examples/e2e/pipelines/__init__.py index 634503d1978..2d699029b63 100644 --- a/examples/e2e/pipelines/__init__.py +++ b/examples/e2e/pipelines/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/pipelines/batch_inference.py b/examples/e2e/pipelines/batch_inference.py index fbc772273c5..09d2db75cba 100644 --- a/examples/e2e/pipelines/batch_inference.py +++ b/examples/e2e/pipelines/batch_inference.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/pipelines/deployment.py b/examples/e2e/pipelines/deployment.py index 7976cd39da7..bf40d56f601 100644 --- a/examples/e2e/pipelines/deployment.py +++ b/examples/e2e/pipelines/deployment.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/pipelines/training.py b/examples/e2e/pipelines/training.py index ba9a2f75489..60eaf72eef6 100644 --- a/examples/e2e/pipelines/training.py +++ b/examples/e2e/pipelines/training.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/run.py b/examples/e2e/run.py index 5850bc3ed84..0ad5551ba57 100644 --- a/examples/e2e/run.py +++ b/examples/e2e/run.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/steps/__init__.py b/examples/e2e/steps/__init__.py index f58ac6f764d..95392373b01 100644 --- a/examples/e2e/steps/__init__.py +++ b/examples/e2e/steps/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/steps/alerts/__init__.py b/examples/e2e/steps/alerts/__init__.py index 4b202bfbce9..1f272215a7d 100644 --- a/examples/e2e/steps/alerts/__init__.py +++ b/examples/e2e/steps/alerts/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/steps/alerts/notify_on.py b/examples/e2e/steps/alerts/notify_on.py index 381591a3ad9..25294b4d2c9 100644 --- a/examples/e2e/steps/alerts/notify_on.py +++ b/examples/e2e/steps/alerts/notify_on.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/steps/data_quality/__init__.py b/examples/e2e/steps/data_quality/__init__.py index dffcfadaac0..654d55f7205 100644 --- a/examples/e2e/steps/data_quality/__init__.py +++ b/examples/e2e/steps/data_quality/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/steps/data_quality/drift_quality_gate.py b/examples/e2e/steps/data_quality/drift_quality_gate.py index fe8af0555d9..2d552b80689 100644 --- a/examples/e2e/steps/data_quality/drift_quality_gate.py +++ b/examples/e2e/steps/data_quality/drift_quality_gate.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/steps/deployment/__init__.py b/examples/e2e/steps/deployment/__init__.py index e60bc9e5bd4..9cdc112ccd8 100644 --- a/examples/e2e/steps/deployment/__init__.py +++ b/examples/e2e/steps/deployment/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/steps/deployment/deployment_deploy.py b/examples/e2e/steps/deployment/deployment_deploy.py index dad351e45be..07395edd5db 100644 --- a/examples/e2e/steps/deployment/deployment_deploy.py +++ b/examples/e2e/steps/deployment/deployment_deploy.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -34,12 +34,10 @@ @step -def deployment_deploy() -> ( - Annotated[ - Optional[MLFlowDeploymentService], - ArtifactConfig(name="mlflow_deployment", is_deployment_artifact=True), - ] -): +def deployment_deploy() -> Annotated[ + Optional[MLFlowDeploymentService], + ArtifactConfig(name="mlflow_deployment", is_deployment_artifact=True), +]: """Predictions step. This is an example of a predictions step that takes the data in and returns diff --git a/examples/e2e/steps/etl/__init__.py b/examples/e2e/steps/etl/__init__.py index 55943c5cf21..80e527cf029 100644 --- a/examples/e2e/steps/etl/__init__.py +++ b/examples/e2e/steps/etl/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/steps/etl/data_loader.py b/examples/e2e/steps/etl/data_loader.py index a509900246d..d263a3ff207 100644 --- a/examples/e2e/steps/etl/data_loader.py +++ b/examples/e2e/steps/etl/data_loader.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/steps/etl/inference_data_preprocessor.py b/examples/e2e/steps/etl/inference_data_preprocessor.py index 5cd50f4d1f0..18f3729fa84 100644 --- a/examples/e2e/steps/etl/inference_data_preprocessor.py +++ b/examples/e2e/steps/etl/inference_data_preprocessor.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/steps/etl/train_data_preprocessor.py b/examples/e2e/steps/etl/train_data_preprocessor.py index f967c09e137..4a3cf341aaa 100644 --- a/examples/e2e/steps/etl/train_data_preprocessor.py +++ b/examples/e2e/steps/etl/train_data_preprocessor.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/steps/etl/train_data_splitter.py b/examples/e2e/steps/etl/train_data_splitter.py index e3d1a1dbb08..4155e30e198 100644 --- a/examples/e2e/steps/etl/train_data_splitter.py +++ b/examples/e2e/steps/etl/train_data_splitter.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/steps/hp_tuning/__init__.py b/examples/e2e/steps/hp_tuning/__init__.py index b135f8f91f6..7ba28dfa905 100644 --- a/examples/e2e/steps/hp_tuning/__init__.py +++ b/examples/e2e/steps/hp_tuning/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/steps/hp_tuning/hp_tuning_select_best_model.py b/examples/e2e/steps/hp_tuning/hp_tuning_select_best_model.py index 1fc9a7cdc79..d196ddbb9ca 100644 --- a/examples/e2e/steps/hp_tuning/hp_tuning_select_best_model.py +++ b/examples/e2e/steps/hp_tuning/hp_tuning_select_best_model.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/steps/hp_tuning/hp_tuning_single_search.py b/examples/e2e/steps/hp_tuning/hp_tuning_single_search.py index 7b55eebae7a..f64bdc1bda5 100644 --- a/examples/e2e/steps/hp_tuning/hp_tuning_single_search.py +++ b/examples/e2e/steps/hp_tuning/hp_tuning_single_search.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/steps/inference/__init__.py b/examples/e2e/steps/inference/__init__.py index 76ef96332c4..30a07473f7f 100644 --- a/examples/e2e/steps/inference/__init__.py +++ b/examples/e2e/steps/inference/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/steps/inference/inference_predict.py b/examples/e2e/steps/inference/inference_predict.py index 280e689c0fc..840323d781d 100644 --- a/examples/e2e/steps/inference/inference_predict.py +++ b/examples/e2e/steps/inference/inference_predict.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/steps/promotion/__init__.py b/examples/e2e/steps/promotion/__init__.py index c00281499db..ee62cfee4fd 100644 --- a/examples/e2e/steps/promotion/__init__.py +++ b/examples/e2e/steps/promotion/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/steps/promotion/compute_performance_metrics_on_current_data.py b/examples/e2e/steps/promotion/compute_performance_metrics_on_current_data.py index 4df16ecd111..98fd9d52d8a 100644 --- a/examples/e2e/steps/promotion/compute_performance_metrics_on_current_data.py +++ b/examples/e2e/steps/promotion/compute_performance_metrics_on_current_data.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/steps/promotion/promote_with_metric_compare.py b/examples/e2e/steps/promotion/promote_with_metric_compare.py index 6bc580f47ba..5a4058226ce 100644 --- a/examples/e2e/steps/promotion/promote_with_metric_compare.py +++ b/examples/e2e/steps/promotion/promote_with_metric_compare.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/steps/training/__init__.py b/examples/e2e/steps/training/__init__.py index fda26b7cf41..eb11f8ef83e 100644 --- a/examples/e2e/steps/training/__init__.py +++ b/examples/e2e/steps/training/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/steps/training/model_evaluator.py b/examples/e2e/steps/training/model_evaluator.py index 974103d1061..60f4ded534e 100644 --- a/examples/e2e/steps/training/model_evaluator.py +++ b/examples/e2e/steps/training/model_evaluator.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -82,22 +82,22 @@ def model_evaluator( dataset_trn.drop(columns=[target]), dataset_trn[target], ) - logger.info(f"Train accuracy={trn_acc*100:.2f}%") + logger.info(f"Train accuracy={trn_acc * 100:.2f}%") tst_acc = model.score( dataset_tst.drop(columns=[target]), dataset_tst[target], ) - logger.info(f"Test accuracy={tst_acc*100:.2f}%") + logger.info(f"Test accuracy={tst_acc * 100:.2f}%") mlflow.log_metric("testing_accuracy_score", tst_acc) messages = [] if trn_acc < min_train_accuracy: messages.append( - f"Train accuracy {trn_acc*100:.2f}% is below {min_train_accuracy*100:.2f}% !" + f"Train accuracy {trn_acc * 100:.2f}% is below {min_train_accuracy * 100:.2f}% !" ) if tst_acc < min_test_accuracy: messages.append( - f"Test accuracy {tst_acc*100:.2f}% is below {min_test_accuracy*100:.2f}% !" + f"Test accuracy {tst_acc * 100:.2f}% is below {min_test_accuracy * 100:.2f}% !" ) if fail_on_accuracy_quality_gates and messages: raise RuntimeError( diff --git a/examples/e2e/steps/training/model_trainer.py b/examples/e2e/steps/training/model_trainer.py index 87a695f5695..e98263584ce 100644 --- a/examples/e2e/steps/training/model_trainer.py +++ b/examples/e2e/steps/training/model_trainer.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/utils/__init__.py b/examples/e2e/utils/__init__.py index e3b50be89fd..59f461864b7 100644 --- a/examples/e2e/utils/__init__.py +++ b/examples/e2e/utils/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/utils/get_model_from_config.py b/examples/e2e/utils/get_model_from_config.py index 1d12d34e5a6..3b0ee3e0752 100644 --- a/examples/e2e/utils/get_model_from_config.py +++ b/examples/e2e/utils/get_model_from_config.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/utils/preprocess.py b/examples/e2e/utils/preprocess.py index 56cfc47f884..ee67f28d800 100644 --- a/examples/e2e/utils/preprocess.py +++ b/examples/e2e/utils/preprocess.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e/utils/promote_in_model_registry.py b/examples/e2e/utils/promote_in_model_registry.py index 6e8342984ec..0f0cb2bd460 100644 --- a/examples/e2e/utils/promote_in_model_registry.py +++ b/examples/e2e/utils/promote_in_model_registry.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/.copier-answers.yml b/examples/e2e_nlp/.copier-answers.yml index b12ebdd786b..796b29bbf6a 100644 --- a/examples/e2e_nlp/.copier-answers.yml +++ b/examples/e2e_nlp/.copier-answers.yml @@ -1,5 +1,5 @@ # Changes here will be overwritten by Copier -_commit: 2024.11.28 +_commit: 2025.01.08 _src_path: gh:zenml-io/template-nlp accelerator: cpu cloud_of_choice: aws diff --git a/examples/e2e_nlp/LICENSE b/examples/e2e_nlp/LICENSE index 75d01fb4544..909e24e117c 100644 --- a/examples/e2e_nlp/LICENSE +++ b/examples/e2e_nlp/LICENSE @@ -1,6 +1,6 @@ Apache Software License 2.0 -Copyright (c) ZenML GmbH 2024. All rights reserved. +Copyright (c) ZenML GmbH 2025. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/config.yaml b/examples/e2e_nlp/config.yaml index e5c0b0cdfe6..aaf4437bea6 100644 --- a/examples/e2e_nlp/config.yaml +++ b/examples/e2e_nlp/config.yaml @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/gradio/__init__.py b/examples/e2e_nlp/gradio/__init__.py index 8d4e9614e3d..29e36c52861 100644 --- a/examples/e2e_nlp/gradio/__init__.py +++ b/examples/e2e_nlp/gradio/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/pipelines/__init__.py b/examples/e2e_nlp/pipelines/__init__.py index 010efac7a67..567d506aa0d 100644 --- a/examples/e2e_nlp/pipelines/__init__.py +++ b/examples/e2e_nlp/pipelines/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/pipelines/deploying.py b/examples/e2e_nlp/pipelines/deploying.py index cc4fa2abd77..d553194dd5b 100644 --- a/examples/e2e_nlp/pipelines/deploying.py +++ b/examples/e2e_nlp/pipelines/deploying.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/pipelines/promoting.py b/examples/e2e_nlp/pipelines/promoting.py index a2e8424f413..9a317b434b6 100644 --- a/examples/e2e_nlp/pipelines/promoting.py +++ b/examples/e2e_nlp/pipelines/promoting.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/pipelines/training.py b/examples/e2e_nlp/pipelines/training.py index 4ac7cd5bd61..f1ae6a9cf82 100644 --- a/examples/e2e_nlp/pipelines/training.py +++ b/examples/e2e_nlp/pipelines/training.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/requirements.txt b/examples/e2e_nlp/requirements.txt index e79245c3df9..e05cb52e240 100644 --- a/examples/e2e_nlp/requirements.txt +++ b/examples/e2e_nlp/requirements.txt @@ -2,3 +2,4 @@ torchvision gradio zenml[server]>=0.56.3 datasets>=2.12.0,<3.0.0 +scikit-learn<1.6.0 \ No newline at end of file diff --git a/examples/e2e_nlp/run.py b/examples/e2e_nlp/run.py index fd09dfc82d3..a4ddc9588a7 100644 --- a/examples/e2e_nlp/run.py +++ b/examples/e2e_nlp/run.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/steps/__init__.py b/examples/e2e_nlp/steps/__init__.py index 7188c33fe86..9e953b05a8f 100644 --- a/examples/e2e_nlp/steps/__init__.py +++ b/examples/e2e_nlp/steps/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/steps/alerts/__init__.py b/examples/e2e_nlp/steps/alerts/__init__.py index 4b202bfbce9..1f272215a7d 100644 --- a/examples/e2e_nlp/steps/alerts/__init__.py +++ b/examples/e2e_nlp/steps/alerts/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/steps/alerts/notify_on.py b/examples/e2e_nlp/steps/alerts/notify_on.py index fecd5e600d2..b3f8ab0887c 100644 --- a/examples/e2e_nlp/steps/alerts/notify_on.py +++ b/examples/e2e_nlp/steps/alerts/notify_on.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/steps/dataset_loader/__init__.py b/examples/e2e_nlp/steps/dataset_loader/__init__.py index d43e59e6bfb..ad5460568b9 100644 --- a/examples/e2e_nlp/steps/dataset_loader/__init__.py +++ b/examples/e2e_nlp/steps/dataset_loader/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/steps/dataset_loader/data_loader.py b/examples/e2e_nlp/steps/dataset_loader/data_loader.py index 22a6fa602ac..99fe0d4c0c1 100644 --- a/examples/e2e_nlp/steps/dataset_loader/data_loader.py +++ b/examples/e2e_nlp/steps/dataset_loader/data_loader.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/steps/deploying/__init__.py b/examples/e2e_nlp/steps/deploying/__init__.py index eee0e0e0590..b0a28a2a16f 100644 --- a/examples/e2e_nlp/steps/deploying/__init__.py +++ b/examples/e2e_nlp/steps/deploying/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/steps/deploying/huggingface_deployment.py b/examples/e2e_nlp/steps/deploying/huggingface_deployment.py index 2e9786cd0c8..7c743ce7321 100644 --- a/examples/e2e_nlp/steps/deploying/huggingface_deployment.py +++ b/examples/e2e_nlp/steps/deploying/huggingface_deployment.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -39,7 +39,9 @@ def deploy_to_huggingface( """ ### ADD YOUR OWN CODE HERE - THIS IS JUST AN EXAMPLE ### secret = Client().get_secret("huggingface_creds") - assert secret, "No secret found with name 'huggingface_creds'. Please create one that includes your `username` and `token`." + assert secret, ( + "No secret found with name 'huggingface_creds'. Please create one that includes your `username` and `token`." + ) token = secret.secret_values["token"] api = HfApi(token=token) hf_repo = api.create_repo( diff --git a/examples/e2e_nlp/steps/deploying/local_deployment.py b/examples/e2e_nlp/steps/deploying/local_deployment.py index 31739456ebf..0ac5a1c9906 100644 --- a/examples/e2e_nlp/steps/deploying/local_deployment.py +++ b/examples/e2e_nlp/steps/deploying/local_deployment.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/steps/deploying/save_model.py b/examples/e2e_nlp/steps/deploying/save_model.py index 146a94e821c..69ca2c4c61a 100644 --- a/examples/e2e_nlp/steps/deploying/save_model.py +++ b/examples/e2e_nlp/steps/deploying/save_model.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/steps/deploying/skypilot_deployment.py b/examples/e2e_nlp/steps/deploying/skypilot_deployment.py index c8ca4bded1b..65a1b1edede 100644 --- a/examples/e2e_nlp/steps/deploying/skypilot_deployment.py +++ b/examples/e2e_nlp/steps/deploying/skypilot_deployment.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/steps/promotion/__init__.py b/examples/e2e_nlp/steps/promotion/__init__.py index a2fd96c08fc..1123e1b1a7a 100644 --- a/examples/e2e_nlp/steps/promotion/__init__.py +++ b/examples/e2e_nlp/steps/promotion/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/steps/promotion/promote_get_metrics.py b/examples/e2e_nlp/steps/promotion/promote_get_metrics.py index b24ac42245c..eec6eb526f1 100644 --- a/examples/e2e_nlp/steps/promotion/promote_get_metrics.py +++ b/examples/e2e_nlp/steps/promotion/promote_get_metrics.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -30,12 +30,10 @@ @step -def promote_get_metrics() -> ( - Tuple[ - Annotated[Dict[str, Any], "latest_metrics"], - Annotated[Dict[str, Any], "current_metrics`"], - ] -): +def promote_get_metrics() -> Tuple[ + Annotated[Dict[str, Any], "latest_metrics"], + Annotated[Dict[str, Any], "current_metrics`"], +]: """Get metrics for comparison for promoting a model. This is an example of a metric retrieval step. It is used to retrieve diff --git a/examples/e2e_nlp/steps/promotion/promote_metric_compare_promoter.py b/examples/e2e_nlp/steps/promotion/promote_metric_compare_promoter.py index 9d12e824f8d..e43e066a9f4 100644 --- a/examples/e2e_nlp/steps/promotion/promote_metric_compare_promoter.py +++ b/examples/e2e_nlp/steps/promotion/promote_metric_compare_promoter.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/steps/register/__init__.py b/examples/e2e_nlp/steps/register/__init__.py index bea8fac8820..d62adfed28c 100644 --- a/examples/e2e_nlp/steps/register/__init__.py +++ b/examples/e2e_nlp/steps/register/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/steps/register/model_log_register.py b/examples/e2e_nlp/steps/register/model_log_register.py index b79d7ab22b6..566bd1b3510 100644 --- a/examples/e2e_nlp/steps/register/model_log_register.py +++ b/examples/e2e_nlp/steps/register/model_log_register.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/steps/tokenizer_loader/__init__.py b/examples/e2e_nlp/steps/tokenizer_loader/__init__.py index eb096b3de1e..edd0ef98c14 100644 --- a/examples/e2e_nlp/steps/tokenizer_loader/__init__.py +++ b/examples/e2e_nlp/steps/tokenizer_loader/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/steps/tokenizer_loader/tokenizer_loader.py b/examples/e2e_nlp/steps/tokenizer_loader/tokenizer_loader.py index 1b5c38975a2..b6825c51e63 100644 --- a/examples/e2e_nlp/steps/tokenizer_loader/tokenizer_loader.py +++ b/examples/e2e_nlp/steps/tokenizer_loader/tokenizer_loader.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/steps/tokenzation/__init__.py b/examples/e2e_nlp/steps/tokenzation/__init__.py index 3a06f1db138..0484b53df1a 100644 --- a/examples/e2e_nlp/steps/tokenzation/__init__.py +++ b/examples/e2e_nlp/steps/tokenzation/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/steps/tokenzation/tokenization.py b/examples/e2e_nlp/steps/tokenzation/tokenization.py index 9964c29ae48..9f740565c01 100644 --- a/examples/e2e_nlp/steps/tokenzation/tokenization.py +++ b/examples/e2e_nlp/steps/tokenzation/tokenization.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/steps/training/__init__.py b/examples/e2e_nlp/steps/training/__init__.py index 112b1b545a4..b3217156411 100644 --- a/examples/e2e_nlp/steps/training/__init__.py +++ b/examples/e2e_nlp/steps/training/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/steps/training/model_trainer.py b/examples/e2e_nlp/steps/training/model_trainer.py index 0a3de574c09..977dd137c58 100644 --- a/examples/e2e_nlp/steps/training/model_trainer.py +++ b/examples/e2e_nlp/steps/training/model_trainer.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/e2e_nlp/utils/misc.py b/examples/e2e_nlp/utils/misc.py index bad839b4e3c..a4ade2875c2 100644 --- a/examples/e2e_nlp/utils/misc.py +++ b/examples/e2e_nlp/utils/misc.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -34,7 +34,7 @@ def compute_metrics( """ logits, labels = eval_pred predictions = np.argmax(logits, axis=-1) - # calculate the mertic using the predicted and true value + # calculate the metric using the predicted and true value accuracy = load_metric("accuracy", trust_remote_code=True).compute( predictions=predictions, references=labels ) diff --git a/examples/llm_finetuning/LICENSE b/examples/llm_finetuning/LICENSE index 75d01fb4544..909e24e117c 100644 --- a/examples/llm_finetuning/LICENSE +++ b/examples/llm_finetuning/LICENSE @@ -1,6 +1,6 @@ Apache Software License 2.0 -Copyright (c) ZenML GmbH 2024. All rights reserved. +Copyright (c) ZenML GmbH 2025. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/examples/llm_finetuning/configs/orchestrator_finetune.yaml b/examples/llm_finetuning/configs/orchestrator_finetune.yaml index 0d76d8ea320..404f9bb918a 100644 --- a/examples/llm_finetuning/configs/orchestrator_finetune.yaml +++ b/examples/llm_finetuning/configs/orchestrator_finetune.yaml @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/llm_finetuning/configs/remote_finetune.yaml b/examples/llm_finetuning/configs/remote_finetune.yaml index d978fe18561..74d23732053 100644 --- a/examples/llm_finetuning/configs/remote_finetune.yaml +++ b/examples/llm_finetuning/configs/remote_finetune.yaml @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/llm_finetuning/materializers/__init__.py b/examples/llm_finetuning/materializers/__init__.py index 757bd8418a5..06cf292f376 100644 --- a/examples/llm_finetuning/materializers/__init__.py +++ b/examples/llm_finetuning/materializers/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/llm_finetuning/materializers/directory_materializer.py b/examples/llm_finetuning/materializers/directory_materializer.py index 05e90e43e18..6db666a475c 100644 --- a/examples/llm_finetuning/materializers/directory_materializer.py +++ b/examples/llm_finetuning/materializers/directory_materializer.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/llm_finetuning/pipelines/train.py b/examples/llm_finetuning/pipelines/train.py index dab1b39962f..cc3a0676a6f 100644 --- a/examples/llm_finetuning/pipelines/train.py +++ b/examples/llm_finetuning/pipelines/train.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/llm_finetuning/pipelines/train_accelerated.py b/examples/llm_finetuning/pipelines/train_accelerated.py index bae22541293..be5eddfda4a 100644 --- a/examples/llm_finetuning/pipelines/train_accelerated.py +++ b/examples/llm_finetuning/pipelines/train_accelerated.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/llm_finetuning/run.py b/examples/llm_finetuning/run.py index db5f637b1de..daf7a0df99d 100644 --- a/examples/llm_finetuning/run.py +++ b/examples/llm_finetuning/run.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/llm_finetuning/steps/__init__.py b/examples/llm_finetuning/steps/__init__.py index 317b6b4c409..d3132fac823 100644 --- a/examples/llm_finetuning/steps/__init__.py +++ b/examples/llm_finetuning/steps/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/llm_finetuning/steps/evaluate_model.py b/examples/llm_finetuning/steps/evaluate_model.py index 72f053a0e60..63ba198501f 100644 --- a/examples/llm_finetuning/steps/evaluate_model.py +++ b/examples/llm_finetuning/steps/evaluate_model.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/llm_finetuning/steps/finetune.py b/examples/llm_finetuning/steps/finetune.py index eddf19a3292..b2b9d04ba40 100644 --- a/examples/llm_finetuning/steps/finetune.py +++ b/examples/llm_finetuning/steps/finetune.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/llm_finetuning/steps/log_metadata.py b/examples/llm_finetuning/steps/log_metadata.py index 90109fdf3c4..4a11d549fe1 100644 --- a/examples/llm_finetuning/steps/log_metadata.py +++ b/examples/llm_finetuning/steps/log_metadata.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/llm_finetuning/steps/prepare_datasets.py b/examples/llm_finetuning/steps/prepare_datasets.py index b9cc13c2261..2638c60f0bf 100644 --- a/examples/llm_finetuning/steps/prepare_datasets.py +++ b/examples/llm_finetuning/steps/prepare_datasets.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/llm_finetuning/steps/promote.py b/examples/llm_finetuning/steps/promote.py index 12b305d0d84..30333e0ad88 100644 --- a/examples/llm_finetuning/steps/promote.py +++ b/examples/llm_finetuning/steps/promote.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -54,11 +54,11 @@ def promote( msg = ( f"`{metric}` values to compare:\n" - f"base={base_metrics[metric]*100:.2f}%\n" - f"finetuned={ft_metrics[metric]*100:.2f}%" + f"base={base_metrics[metric] * 100:.2f}%\n" + f"finetuned={ft_metrics[metric] * 100:.2f}%" ) if staging_metrics: - msg += f"\nstaging={staging_metrics[metric]*100:.2f}%" + msg += f"\nstaging={staging_metrics[metric] * 100:.2f}%" logger.info(msg) if base_metrics[metric] <= ft_metrics[metric]: diff --git a/examples/llm_finetuning/utils/__init__.py b/examples/llm_finetuning/utils/__init__.py index 8d4e9614e3d..29e36c52861 100644 --- a/examples/llm_finetuning/utils/__init__.py +++ b/examples/llm_finetuning/utils/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/llm_finetuning/utils/callbacks.py b/examples/llm_finetuning/utils/callbacks.py index 245c498da03..f1b129a0340 100644 --- a/examples/llm_finetuning/utils/callbacks.py +++ b/examples/llm_finetuning/utils/callbacks.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/llm_finetuning/utils/loaders.py b/examples/llm_finetuning/utils/loaders.py index 837b182410e..253ffa5de8d 100644 --- a/examples/llm_finetuning/utils/loaders.py +++ b/examples/llm_finetuning/utils/loaders.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/llm_finetuning/utils/logging.py b/examples/llm_finetuning/utils/logging.py index 0fd2df7801b..844d3d22efb 100644 --- a/examples/llm_finetuning/utils/logging.py +++ b/examples/llm_finetuning/utils/logging.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/llm_finetuning/utils/tokenizer.py b/examples/llm_finetuning/utils/tokenizer.py index 39d9de11367..0e1cd0af14c 100644 --- a/examples/llm_finetuning/utils/tokenizer.py +++ b/examples/llm_finetuning/utils/tokenizer.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/mlops_starter/LICENSE b/examples/mlops_starter/LICENSE index 75d01fb4544..909e24e117c 100644 --- a/examples/mlops_starter/LICENSE +++ b/examples/mlops_starter/LICENSE @@ -1,6 +1,6 @@ Apache Software License 2.0 -Copyright (c) ZenML GmbH 2024. All rights reserved. +Copyright (c) ZenML GmbH 2025. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/examples/mlops_starter/pipelines/__init__.py b/examples/mlops_starter/pipelines/__init__.py index 6d8015e257f..e06f785b2b3 100644 --- a/examples/mlops_starter/pipelines/__init__.py +++ b/examples/mlops_starter/pipelines/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/mlops_starter/pipelines/feature_engineering.py b/examples/mlops_starter/pipelines/feature_engineering.py index edd87b6d82b..488dedf7897 100644 --- a/examples/mlops_starter/pipelines/feature_engineering.py +++ b/examples/mlops_starter/pipelines/feature_engineering.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/mlops_starter/pipelines/inference.py b/examples/mlops_starter/pipelines/inference.py index 4b05c9eef8c..d47726638c5 100644 --- a/examples/mlops_starter/pipelines/inference.py +++ b/examples/mlops_starter/pipelines/inference.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/mlops_starter/pipelines/training.py b/examples/mlops_starter/pipelines/training.py index 1e8410c608b..27964a5567c 100644 --- a/examples/mlops_starter/pipelines/training.py +++ b/examples/mlops_starter/pipelines/training.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/mlops_starter/run.py b/examples/mlops_starter/run.py index 16a352588d6..14fd779d88f 100644 --- a/examples/mlops_starter/run.py +++ b/examples/mlops_starter/run.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/mlops_starter/steps/__init__.py b/examples/mlops_starter/steps/__init__.py index ce6f59f965f..db551d463aa 100644 --- a/examples/mlops_starter/steps/__init__.py +++ b/examples/mlops_starter/steps/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/mlops_starter/steps/data_loader.py b/examples/mlops_starter/steps/data_loader.py index a0345026de3..c6b05d4fe77 100644 --- a/examples/mlops_starter/steps/data_loader.py +++ b/examples/mlops_starter/steps/data_loader.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/mlops_starter/steps/data_preprocessor.py b/examples/mlops_starter/steps/data_preprocessor.py index f94d1e85f6d..40b4ecd9b05 100644 --- a/examples/mlops_starter/steps/data_preprocessor.py +++ b/examples/mlops_starter/steps/data_preprocessor.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/mlops_starter/steps/data_splitter.py b/examples/mlops_starter/steps/data_splitter.py index d777e023aed..4623a293e13 100644 --- a/examples/mlops_starter/steps/data_splitter.py +++ b/examples/mlops_starter/steps/data_splitter.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/mlops_starter/steps/model_evaluator.py b/examples/mlops_starter/steps/model_evaluator.py index c63c53109f4..e91383aa1f9 100644 --- a/examples/mlops_starter/steps/model_evaluator.py +++ b/examples/mlops_starter/steps/model_evaluator.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/mlops_starter/steps/model_promoter.py b/examples/mlops_starter/steps/model_promoter.py index 43d43ceac1f..0c570488d29 100644 --- a/examples/mlops_starter/steps/model_promoter.py +++ b/examples/mlops_starter/steps/model_promoter.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -44,7 +44,7 @@ def model_promoter(accuracy: float, stage: str = "production") -> bool: if accuracy < 0.8: logger.info( - f"Model accuracy {accuracy*100:.2f}% is below 80% ! Not promoting model." + f"Model accuracy {accuracy * 100:.2f}% is below 80% ! Not promoting model." ) else: logger.info(f"Model promoted to {stage}!") diff --git a/examples/mlops_starter/steps/model_trainer.py b/examples/mlops_starter/steps/model_trainer.py index eeb24f32e2e..7483a302935 100644 --- a/examples/mlops_starter/steps/model_trainer.py +++ b/examples/mlops_starter/steps/model_trainer.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/mlops_starter/utils/__init__.py b/examples/mlops_starter/utils/__init__.py index 8d4e9614e3d..29e36c52861 100644 --- a/examples/mlops_starter/utils/__init__.py +++ b/examples/mlops_starter/utils/__init__.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/examples/mlops_starter/utils/preprocess.py b/examples/mlops_starter/utils/preprocess.py index df60bce30d1..9546d163a8f 100644 --- a/examples/mlops_starter/utils/preprocess.py +++ b/examples/mlops_starter/utils/preprocess.py @@ -1,6 +1,6 @@ # Apache Software License 2.0 # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Copyright (c) ZenML GmbH 2025. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/scripts/summarize_docs.py b/scripts/summarize_docs.py new file mode 100644 index 00000000000..cf759821f1d --- /dev/null +++ b/scripts/summarize_docs.py @@ -0,0 +1,97 @@ +# Copyright (c) ZenML GmbH 2025. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +import os +import re +import json +from openai import OpenAI +from pathlib import Path +from typing import List, Dict + +# Initialize OpenAI client +client = OpenAI(api_key=os.getenv('OPENAI_API_KEY')) + +def extract_content_blocks(md_content: str) -> str: + """Extracts content blocks while preserving order and marking code blocks.""" + parts = re.split(r'(```[\s\S]*?```)', md_content) + + processed_content = "" + for part in parts: + if part.startswith('```'): + processed_content += "\n[CODE_BLOCK_START]\n" + part + "\n[CODE_BLOCK_END]\n" + else: + cleaned_text = re.sub(r'\s+', ' ', part).strip() + if cleaned_text: + processed_content += "\n" + cleaned_text + "\n" + + return processed_content + +def summarize_content(content: str, file_path: str) -> str: + """Summarizes content using OpenAI API.""" + try: + response = client.chat.completions.create( + model="gpt-4o-mini", + messages=[ + { + "role": "system", + "content": "You are a technical documentation summarizer." + }, + { + "role": "user", + "content": f"""Please summarize the following documentation text for another LLM to be able to answer questions about it with enough detail. + Keep all important technical information and key points while removing redundancy and verbose explanations. + Make it concise but ensure NO critical information is lost and some details that you think are important are kept. + Make the code shorter where possible keeping only the most important parts while preserving syntax and accuracy: + + {content}""" + } + ], + temperature=0.3, + max_tokens=2000 + ) + return response.choices[0].message.content + except Exception as e: + print(f"Error summarizing {file_path}: {e}") + return "" + +def main(): + docs_dir = "docs/book" + output_file = "summarized_docs.txt" + + # Get markdown files + exclude_files = ["toc.md"] + md_files = list(Path(docs_dir).rglob("*.md")) + md_files = [file for file in md_files if file.name not in exclude_files] + + # Process each file and write summaries + with open(output_file, "w", encoding="utf-8") as out_f: + for file_path in md_files: + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + processed_content = extract_content_blocks(content) + summary = summarize_content(processed_content, str(file_path)) + + if summary: + out_f.write(f"=== File: {file_path} ===\n\n") + out_f.write(summary) + out_f.write("\n\n" + "="*50 + "\n\n") + + print(f"Processed: {file_path}") + + except Exception as e: + print(f"Error processing {file_path}: {e}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/upload_to_huggingface.py b/scripts/upload_to_huggingface.py new file mode 100644 index 00000000000..eb605a309a2 --- /dev/null +++ b/scripts/upload_to_huggingface.py @@ -0,0 +1,25 @@ +from huggingface_hub import HfApi +import os + +def upload_to_huggingface(): + api = HfApi(token=os.environ["HF_TOKEN"]) + + # Upload OpenAI summary + api.upload_file( + path_or_fileobj="summarized_docs.txt", + path_in_repo="how-to-guides.txt", + repo_id="zenml/llms.txt", + repo_type="dataset" + ) + + # Upload repomix outputs + for filename in ["component-guide.txt", "basics.txt", "llms-full.txt"]: + api.upload_file( + path_or_fileobj=f"repomix-outputs/{filename}", + path_in_repo=filename, + repo_id="zenml/llms.txt", + repo_type="dataset" + ) + +if __name__ == "__main__": + upload_to_huggingface() \ No newline at end of file