kubeflow · nameershah · Mar 16, 2026 · Mar 16, 2026 · Mar 18, 2026
diff --git a/kale/kfserving/__main__.py b/kale/kfserving/__main__.py
@@ -12,6 +12,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+"""Entry point for the Kale KFServing Transformer server.
+
+This script initializes and starts a KFServing server with a KaleTransformer
+model. It accepts command-line arguments for the model name and predictor host.
+
+Usage:
+    python transformer.py --predictor_host <URL> [--model_name <name>]
+
+Args:
+    --model_name (str): The name the model is served under. 
+                        Defaults to 'model'.
+    --predictor_host (str): The URL for the model predict function. 
+                            Required.
+
+Example:
+    python transformer.py --predictor_host http://localhost:8080
+"""
+
 import argparse
 
 from kale.kfserving.transformer import KaleTransformer

diff --git a/kale/marshal/backend.py b/kale/marshal/backend.py
@@ -12,6 +12,48 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+"""Core marshalling backend system for serializing Python objects.
+
+This module provides the infrastructure for saving and loading Python objects
+to/from disk using specialized backends. It handles the dispatch logic that
+routes objects to appropriate serialization handlers based on their type.
+
+Key Components:
+    MarshalBackend: Abstract base class for implementing object marshalling.
+                    Subclasses implement save/load for specific libraries.
+
+    Dispatcher: Routes objects to the correct backend based on:
+                - Object type (via regex matching on obj_type_regex)
+                - File extension (via file_type attribute)
+
+    Data Management: Functions to get/set the marshalling data directory
+                     where serialized objects are stored.
+
+Usage:
+    The Dispatcher is the primary interface. Use the top-level functions
+    from kale.marshal module:
+
+    from kale.marshal import save, load
+
+    save(my_dataframe, "my_data")  # Dispatcher routes to PandasBackend
+    loaded = load("my_data")        # Dispatcher finds .pdpkl file
+
+How It Works:
+    1. save() is called with an object
+    2. Dispatcher._dispatch_obj_type() matches object type against all
+       registered backends' obj_type_regex patterns
+    3. The matching backend's wrapped_save() is called
+    4. For load(), Dispatcher._dispatch_file_type() matches the file
+       extension to find the correct backend
+
+Extending:
+    To add support for a new library, see backends.py for examples.
+
+See Also:
+    backends.py: Concrete implementations for popular ML/data libraries
+"""
+
+
 import logging
 import os
 import re
@@ -40,22 +82,38 @@ def get_data_dir():
 
 
 class MarshalBackend:
-    """Base class for marshalling Python objects.
-
-    This class is supposed to be subclassed by specialized backends that
-    implement the `save` and `load` functions to marshal library-specific
-    objects.
-
-    A backend registers itself to specific objects/file types using the
-    following class attributes:
-
-    * `file_type`: The file extension of the files/folders the backend is able
-                   to restore. NOTE: Currently this can be just *one* ext.
-    * `obj_type_regex`: A regex which is matched against the `type` of an
-                        object.
-
-    Take a look at `backend.py` for some examples on how to create custom
-    marshal backends.
+"""Abstract base class for marshalling Python objects.
+
+    This class defines the interface and default behavior for serializing
+    and deserializing Python objects. Subclasses implement library-specific
+    serialization logic.
+
+    Attributes:
+        name (str): Human-readable name of the backend (e.g., "Pandas backend")
+        display_name (str): Short display name, typically the library name
+        file_type (str): File extension without dot (e.g., "pdpkl", "joblib")
+        obj_type_regex (str): Regex pattern to match object types this backend
+                              handles. Matched against type(obj).__name__.
+                              Example: r"pandas\\.(core\\.)?frame\\.DataFrame"
+        predictor_type (str): Optional. Used for KFServing model serving.
+        fallback_on_missing_lib (bool): If True, falls back to dill serialization
+                                        when the library import fails.
+
+    How Dispatch Works:
+        1. Dispatcher.save(obj, name) is called
+        2. Dispatcher._dispatch_obj_type(obj) searches all registered backends
+        3. First backend whose obj_type_regex matches the object type is used
+        4. That backend's wrapped_save() is called, which delegates to save()
+
+    Implementing Custom Backends:
+        1. Subclass MarshalBackend
+        2. Set class attributes (name, display_name, file_type, obj_type_regex)
+        3. Implement save(self, obj, path) method
+        4. Implement load(self, file_path) method
+        5. Decorate with @register_backend (see backends.py for examples)
+
+    See Also:
+        backends.py: Concrete implementations for sklearn, pandas, pytorch, etc.
     """
 
     name: str = "Default backend"

diff --git a/kale/marshal/backends.py b/kale/marshal/backends.py
@@ -12,6 +12,54 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+"""Concrete marshalling backend implementations for popular libraries.
+
+This module provides specialized MarshalBackend subclasses that handle
+serialization of objects from popular ML, data science, and deep learning
+libraries. Each backend registers itself with the Dispatcher (defined in
+backend.py) which automatically routes objects to the correct handler.
+
+Available Backends:
+    FunctionBackend: Python functions (pickle-based)
+    SKLearnBackend: scikit-learn estimators/models (joblib format)
+    NumpyBackend: NumPy arrays (.npy binary format)
+    PandasBackend: Pandas DataFrames and Series (.pickle format)
+    XGBoostModelBackend: XGBoost Booster models (.bst format)
+    XGBoostDMatrixBackend: XGBoost DMatrix objects (.dmatrix format)
+    PyTorchBackend: PyTorch nn.Module objects (.pt TorchScript format)
+    KerasBackend: Keras models (.keras format)
+    TensorflowKerasBackend: TensorFlow Keras models (.tfkeras format)
+
+How Registration Works:
+    Each backend class is decorated with @register_backend, which:
+    1. Calls get_dispatcher().register(BackendClass)
+    2. Creates an instance of the backend
+    3. Stores it in Dispatcher.backends dict by class name
+    4. The Dispatcher automatically uses it when saving/loading objects
+
+Example - Adding Support for a New Library:
+
+    @register_backend
+    class MyLibraryBackend(MarshalBackend):
+        name = "My Library backend"
+        display_name = "mylib"
+        file_type = "mylib"
+        obj_type_regex = r"mylib\\..*"
+
+        def save(self, obj, path):
+            '''Save object to path using MyLibrary format.'''
+            import mylib
+            mylib.save(obj, path)
+
+        def load(self, file_path):
+            '''Load object from path.'''
+            import mylib
+            return mylib.load(file_path)
+
+See Also:
+    backend.py: Core Dispatcher and MarshalBackend base class
+"""
+
 import logging
 
 from kale.marshal.backend import MarshalBackend, get_dispatcher

diff --git a/kale/step.py b/kale/step.py
@@ -193,6 +193,23 @@ def kfp_outputs(self) -> list[Artifact]:
 
 
 def __default_execution_handler(step: Step, *args, **kwargs):
+"""Default handler for executing a Step when no pipeline is registered.
+
+    Logs a warning and executes the step's source function directly if
+    it is callable. Raises a RuntimeError if the step was created from
+    a Notebook, as local execution is not supported in that case.
+
+    Args:
+        step (Step): The Step object to execute.
+        *args: Positional arguments passed to the step's source function.
+        **kwargs: Keyword arguments passed to the step's source function.
+
+    Returns:
+        Any: The return value of the step's source function.
+
+    Raises:
+        RuntimeError: If step source is not callable (e.g. from a Notebook).
+    """
     log.info("No Pipeline registration handler is set.")
     if not callable(step.source):
         raise RuntimeError(