Merge pull request #128 from zenml-io/hamza/step-design

Prepare for 0.5.0 release
zenml-io · Oct 15, 2021 · cf610fa · cf610fa
2 parents b091b27 + a9f1ad6
commit cf610fa
Show file tree

Hide file tree

Showing 54 changed files with 2,511 additions and 1,148 deletions.
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
@@ -1,4 +1,54 @@
-# 0.5.0rc0
+# 0.5.0
+This long-awaited ZenML release marks a seminal moment in the project's history. We present to you a complete 
+revamp of the internals of ZenML, with a fresh new design and API. While these changes are significant, and have been months 
+in the making, the original vision of ZenML has not wavered. We hope that the ZenML community finds the new 
+design choices easier to grasp and use, and we welcome feedback on the [issues board](https://github.com/zenml-io/zenml/issues).
+
+## Warning
+0.5.0 is a complete API change from the previous versions of ZenML, and is a *breaking* upgrade. Fundamental 
+concepts have been changed, and therefore backwards compatability is not maintained. Please use only this version 
+with fresh projects.
+
+With such significant changes, we expect this release to also be breaking. Please report any bugs in the issue board, and 
+they should be addressed in upcoming releases.
+
+## Overview
+
+* Introducing a new functional API for creating pipelines and steps. This is now the default mechanism for building ZenML pipelines. [read more](https://docs.zenml.io/quickstart-guide)
+* Steps now use Materializers to handle artifact serialization/deserialization between steps. This is a powerful change, and will be expanded upon in the future. [read more](https://docs.zenml.io/core/materializers)
+* Introducing the new `Stack` paradigm: Easily transition from one MLOps stack to the next with a few CLI commands [read more](https://docs.zenml.io/core/stacks)
+* Introducing a new `Artifact`, `Typing`, and `Annotation` system, with `pydantic` (and `dataclasses`) support [read more](https://docs.zenml.io/core/artifacts)
+* Deprecating the `pipelines_dir`: Now individual pipelines will be stored in their metadata stores, making the metadata store a single source of truth. [read more](https://docs.zenml.io/core/stacks)
+* Deprecating the YAML config file: ZenML no longer natively compiles to an intermediate YAML-based representation. Instead, it compiles and deploys directly into the selected orchestrator's 
+representation. While we do plan to support running pipelines directly through YAML in the future, it will no longer be
+the default route through which pipelines are run. [read more about orchestrators here](https://docs.zenml.io/core/stacks)
+
+## Technical Improvements
+* A completely new system design, please refer to the [docs](https://docs.zenml.io/core/core-concepts).
+* Better type hints and docstrings.
+* Auto-completion support.
+* Numerous performance improvements and bug fixes, including a smaller dependency footprint.
+
+## What to expect in the next weeks and the new ZenML
+Currently, this release is bare bones. We are missing some basic features which used to be part of ZenML 0.3.8 (the previous release):
+
+* Standard interfaces for `TrainingPipeline`.
+* Individual step interfaces like `PreprocesserStep`, `TrainerStep`, `DeployerStep` etc. need to be rewritten from within the new paradigm. They should
+be included in the non-RC version of this release.
+* A proper production setup with an orchestrator like Airflow.
+* A post-execution workflow to analyze and inspect pipeline runs.
+* The concept of `Backends` will evolve into a simple mechanism of transitioning individual steps into different runners.
+* Support for `KubernetesOrchestrator`, `KubeflowOrchestrator`, `GCPOrchestrator` and `AWSOrchestrator` are also planned.
+* Dependency management including Docker support is planned.
+
+[Our roadmap](https://docs.zenml.io/support/roadmap) goes into further detail on the timeline.
+
+We encourage every user (old or new) to start afresh with this release. Please go over our latest [docs](https://docs.zenml.io) 
+and [examples](examples) to get a hang of the new system.
+
+Onwards and upwards to 1.0.0!
+
+# 0.5.0rc2
 This long-awaited ZenML release marks a seminal moment in the project's history. We present to you a complete 
 revamp of the internals of ZenML, with a fresh new design and API. While these changes are significant, and have been months 
 in the making, the original vision of ZenML has not wavered. We hope that the ZenML community finds the new 

diff --git a/examples/quickstart/quickstart.ipynb b/examples/quickstart/quickstart.ipynb
diff --git a/examples/quickstart/quickstart.py b/examples/quickstart/quickstart.py
@@ -12,56 +12,52 @@
 #  or implied. See the License for the specific language governing
 #  permissions and limitations under the License.
 
-from typing import List
 
 import numpy as np
 import tensorflow as tf
 
-from zenml.annotations import Input, Output, Step
-from zenml.artifacts import DataArtifact, ModelArtifact
 from zenml.pipelines import pipeline
 from zenml.steps import step
+from zenml.steps.base_step_config import BaseStepConfig
+from zenml.steps.step_output import Output
 
 
-@step(name="import_basic_mnist")
-def ImportDataStep() -> List[float]:
+class TrainerConfig(BaseStepConfig):
+    """Trainer params"""
+
+    epochs: int = 1
+
+
+@step
+def importer_mnist() -> Output(
+    X_train=np.ndarray, y_train=np.ndarray, X_test=np.ndarray, y_test=np.ndarray
+):
     """Download the MNIST data store it as an artifact"""
     (X_train, y_train), (
         X_test,
         y_test,
     ) = tf.keras.datasets.mnist.load_data()
-    return [
-        X_train.tolist()[0:100],
-        y_train.tolist()[0:100],
-        X_test.tolist()[0:100],
-        y_test.tolist()[0:100],
-    ]
+    return X_train, y_train, X_test, y_test
 
 
-@step(name="normalize")
-def NormalizeDataStep(data: Input[DataArtifact]) -> List[float]:
+@step
+def normalizer(
+    X_train: np.ndarray, X_test: np.ndarray
+) -> Output(X_train_normed=np.ndarray, X_test_normed=np.ndarray):
     """Normalize the values for all the images so they are between 0 and 1"""
-    import_data = data.materializers.json.read_file()
-    X_train_normed = np.array(import_data[0]) / 255.0
-    X_test_normed = np.array(import_data[2]) / 255.0
-    return [
-        X_train_normed.tolist(),
-        import_data[1],
-        X_test_normed.tolist(),
-        import_data[3],
-    ]
-
-
-@step(name="trainer")
-def MNISTTrainModelStep(
-    data: Input[DataArtifact],
-    model_artifact: Output[ModelArtifact],
-    epochs: int,
-):
+    X_train_normed = X_train / 255.0
+    X_test_normed = X_test / 255.0
+    return X_train_normed, X_test_normed
+
+
+@step
+def trainer(
+    config: TrainerConfig,
+    X_train: np.ndarray,
+    y_train: np.ndarray,
+) -> tf.keras.Model:
     """Train a neural net from scratch to recognise MNIST digits return our
     model or the learner"""
-    import_data = data.materializers.json.read_file()
-
     model = tf.keras.Sequential(
         [
             tf.keras.layers.Flatten(input_shape=(28, 28)),
@@ -77,83 +73,76 @@ def MNISTTrainModelStep(
     )
 
     model.fit(
-        import_data[0],
-        import_data[1],
-        epochs=epochs,
+        X_train,
+        y_train,
+        epochs=config.epochs,
     )
 
     # write model
-    model_artifact.materializers.keras.write_model(model)
+    return model
 
 
-@step(name="evaluate")
-def EvaluateModelStep(
-    data: Input[DataArtifact], model_artifact: Input[ModelArtifact]
-) -> List[float]:
+@step
+def evaluator(
+    X_test: np.ndarray,
+    y_test: np.ndarray,
+    model: tf.keras.Model,
+) -> np.ndarray:
     """Calculate the loss for the model for each epoch in a graph"""
-    model = model_artifact.materializers.keras.read_model()
-    import_data = data.materializers.json.read_file()
 
-    test_loss, test_acc = model.evaluate(
-        import_data[2], import_data[3], verbose=2
-    )
-    return [test_loss, test_acc]
+    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
+    return np.array([test_loss, test_acc])
 
 
 # Define the pipeline
 
 
-@pipeline("mnist")
-def MNISTTrainingPipeline(
-    import_data: Step[ImportDataStep],
-    normalize_data: Step[NormalizeDataStep],
-    trainer: Step[MNISTTrainModelStep],
-    evaluator: Step[EvaluateModelStep],
+@pipeline
+def mnist_pipeline(
+    importer,
+    normalizer: normalizer,
+    trainer,
+    evaluator,
 ):
     # Link all the steps artifacts together
-    normalize_data(data=import_data.outputs.return_output)
-    trainer(data=normalize_data.outputs.return_output)
-    evaluator(
-        data=normalize_data.outputs.return_output,
-        model_artifact=trainer.outputs.model_artifact,
-    )
+    X_train, y_train, X_test, y_test = importer()
+    X_trained_normed, X_test_normed = normalizer(X_train=X_train, X_test=X_test)
+    model = trainer(X_train=X_trained_normed, y_train=y_train)
+    evaluator(X_test=X_test_normed, y_test=y_test, model=model)
 
 
 # Initialise the pipeline
-mnist_pipeline = MNISTTrainingPipeline(
-    import_data=ImportDataStep(),
-    normalize_data=NormalizeDataStep(),
-    trainer=MNISTTrainModelStep(epochs=10),
-    evaluator=EvaluateModelStep(),
+p = mnist_pipeline(
+    importer=importer_mnist(),
+    normalizer=normalizer(),
+    trainer=trainer(config=TrainerConfig(epochs=1)),
+    evaluator=evaluator(),
 )
 
 # Run the pipeline
-mnist_pipeline.run()
+p.run()
 
 
 # Define a new modified import data step to download the Fashion MNIST model
-@step(name="import_fashion_mnist")
-def ImportDataStep() -> List[float]:
-    """Download the Fashion MNIST data store it as an artifact"""
+@step
+def importer_fashion_mnist() -> Output(
+    X_train=np.ndarray, y_train=np.ndarray, X_test=np.ndarray, y_test=np.ndarray
+):
+    """Download the MNIST data store it as an artifact"""
     (X_train, y_train), (
         X_test,
         y_test,
-    ) = tf.keras.datasets.fashion_mnist.load_data()  # CHANGING to fashion
-    return [
-        X_train.tolist()[0:100],
-        y_train.tolist()[0:100],
-        X_test.tolist()[0:100],
-        y_test.tolist()[0:100],
-    ]
+    ) = tf.keras.datasets.fashion_mnist.load_data()
+    return X_train, y_train, X_test, y_test
 
 
 # Initialise a new pipeline
-fashion_mnist_trainer = MNISTTrainingPipeline(
-    import_data=ImportDataStep(),
-    normalize_data=NormalizeDataStep(),
-    trainer=MNISTTrainModelStep(epochs=10),
-    evaluator=EvaluateModelStep(),
+fashion_p = mnist_pipeline(
+    importer=importer_fashion_mnist(),
+    normalizer=normalizer(),
+    trainer=trainer(config=TrainerConfig(epochs=1)),
+    evaluator=evaluator(),
 )
 
 # Run the new pipeline
-fashion_mnist_trainer.run()
+fashion_p.run()
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "zenml"
-version = "0.5.0rc2"
+version = "0.5.0"
 packages = [
     { include = "zenml", from = "src" },
 ]
@@ -50,7 +50,7 @@ zenml = 'zenml.cli.cli:cli'
 
 [tool.poetry.dependencies]
 python = ">=3.6.2,<3.9"
-ml-pipelines-sdk = "^1.2.0"
+ml-pipelines-sdk = "^1.3.0"
 panel = "^0.11.3"
 pandas = "^1.1.5"
 apache-beam = "^2.30.0"
@@ -137,7 +137,7 @@ exclude_lines = [
 profile = "black"
 known_third_party = []
 skip_glob = []
-line_length = 79
+line_length = 80
 
 [tool.mypy]
 # --strict
@@ -157,7 +157,7 @@ strict_equality = true
 # --strict end
 
 [tool.black]
-line-length = 79
+line-length = 80
 include = '\.pyi?$'
 exclude = '''
 /(