diff --git a/docs/.rstcheck.cfg b/docs/.rstcheck.cfg index 5d48d421..080a7ccd 100644 --- a/docs/.rstcheck.cfg +++ b/docs/.rstcheck.cfg @@ -1,5 +1,5 @@ [rstcheck] report_level = warning ignore_directives = automodule, autosummary, currentmodule, toctree, ifconfig, tab-set, collapse, tabs, dropdown -ignore_roles = ref, cpp:class, cpp:func, py:func, c:macro +ignore_roles = ref, cpp:class, cpp:func, py:func, c:macro, external+data-api:doc, external+scikit_build_core:doc ignore_languages = cpp, python diff --git a/docs/concepts/abi_overview.md b/docs/concepts/abi_overview.md index c8e0cd56..125de216 100644 --- a/docs/concepts/abi_overview.md +++ b/docs/concepts/abi_overview.md @@ -15,7 +15,7 @@ -# ABI Overview +# ABI Specification This section provides an overview of the ABI convention of TVM FFI. The ABI is designed around the following key principles: diff --git a/docs/conf.py b/docs/conf.py index bb7f1202..a575ee04 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -157,8 +157,10 @@ "pillow": ("https://pillow.readthedocs.io/en/stable", None), "numpy": ("https://numpy.org/doc/stable", None), "torch": ("https://pytorch.org/docs/stable", None), - "torch-cpp": ("https://docs.pytorch.org/cppdocs/", None), + "torch-cpp": ("https://docs.pytorch.org/cppdocs", None), "dlpack": ("https://dmlc.github.io/dlpack/latest", None), + "data-api": ("https://data-apis.org/array-api/latest", None), + "scikit_build_core": ("https://scikit-build-core.readthedocs.io/en/stable/", None), } autosummary_generate = True # actually create stub pages diff --git a/docs/guides/build_from_source.md b/docs/dev/build_from_source.md similarity index 100% rename from docs/guides/build_from_source.md rename to docs/dev/build_from_source.md diff --git a/docs/get_started/quickstart.rst b/docs/get_started/quickstart.rst index 1ebb5338..c702c7cb 100644 --- a/docs/get_started/quickstart.rst +++ b/docs/get_started/quickstart.rst @@ -83,7 +83,7 @@ The class :cpp:class:`tvm::ffi::TensorView` allows zero-copy interop with tensor - NumPy, CuPy, - PyTorch, JAX, or -- any array type that supports the standard `DLPack protocol `_. +- any array type that supports the standard :external+data-api:doc:`DLPack protocol `. Finally, :cpp:func:`TVMFFIEnvGetStream` can be used in the CUDA code to launch a kernel on the caller's stream. @@ -127,36 +127,34 @@ TVM-FFI natively integrates with CMake via ``find_package`` as demonstrated belo .. code-block:: cmake - # Run `tvm-ffi-config --cmakedir` to set `tvm_ffi_DIR` + # Run `tvm-ffi-config --cmakedir` to set `tvm_ffi_ROOT` find_package(Python COMPONENTS Interpreter REQUIRED) execute_process(COMMAND "${Python_EXECUTABLE}" -m tvm_ffi.config --cmakedir OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE tvm_ffi_ROOT) find_package(tvm_ffi CONFIG REQUIRED) # Link C++ target to `tvm_ffi_header` and `tvm_ffi_shared` add_library(add_one_cpu SHARED compile/add_one_cpu.cc) - target_link_libraries(add_one_cpu PRIVATE tvm_ffi_header) - target_link_libraries(add_one_cpu PRIVATE tvm_ffi_shared) + tvm_ffi_configure_target(add_one_cpu) .. group-tab:: CUDA .. code-block:: cmake enable_language(CUDA) - # Run `tvm-ffi-config --cmakedir` to set `tvm_ffi_DIR` + # Run `tvm-ffi-config --cmakedir` to set `tvm_ffi_ROOT` find_package(Python COMPONENTS Interpreter REQUIRED) execute_process(COMMAND "${Python_EXECUTABLE}" -m tvm_ffi.config --cmakedir OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE tvm_ffi_ROOT) find_package(tvm_ffi CONFIG REQUIRED) # Link CUDA target to `tvm_ffi_header` and `tvm_ffi_shared` add_library(add_one_cuda SHARED compile/add_one_cuda.cu) - target_link_libraries(add_one_cuda PRIVATE tvm_ffi_header) - target_link_libraries(add_one_cuda PRIVATE tvm_ffi_shared) + tvm_ffi_configure_target(add_one_cuda) **Artifact.** The resulting ``add_one_cpu.so`` and ``add_one_cuda.so`` are minimal libraries that are agnostic to: - Python version/ABI. It is not compiled/linked with Python and depends only on TVM-FFI's stable C ABI; - Languages, including C++, Python, Rust or any other language that can interop with C ABI; -- ML frameworks, such as PyTorch, JAX, NumPy, CuPy, or anything with standard `DLPack protocol `_. +- ML frameworks, such as PyTorch, JAX, NumPy, CuPy, or anything with standard :external+data-api:doc:`DLPack protocol `. .. _sec-use-across-framework: @@ -177,60 +175,66 @@ directly. This process is done zero-copy, without any boilerplate code, under ex We can then use these functions in the following ways: -.. tab-set:: +.. _ship-to-pytorch: - .. tab-item:: PyTorch +PyTorch +~~~~~~~ - .. literalinclude:: ../../examples/quickstart/load/load_pytorch.py - :language: python - :start-after: [example.begin] - :end-before: [example.end] +.. literalinclude:: ../../examples/quickstart/load/load_pytorch.py + :language: python + :start-after: [example.begin] + :end-before: [example.end] - .. tab-item:: JAX +.. _ship-to-jax: - Support via `nvidia/jax-tvm-ffi `_. This can be installed via +JAX +~~~ + +Support via `nvidia/jax-tvm-ffi `_. This can be installed via - .. code-block:: bash +.. code-block:: bash - pip install jax-tvm-ffi + pip install jax-tvm-ffi - After installation, ``add_one_cuda`` can be registered as a target to JAX's ``ffi_call``. +After installation, ``add_one_cuda`` can be registered as a target to JAX's ``ffi_call``. - .. code-block:: python +.. code-block:: python - # Step 1. Load `build/add_one_cuda.so` - import tvm_ffi - mod = tvm_ffi.load_module("build/add_one_cuda.so") + # Step 1. Load `build/add_one_cuda.so` + import tvm_ffi + mod = tvm_ffi.load_module("build/add_one_cuda.so") - # Step 2. Register `mod.add_one_cuda` into JAX - import jax_tvm_ffi - jax_tvm_ffi.register_ffi_target("add_one", mod.add_one_cuda, platform="gpu") + # Step 2. Register `mod.add_one_cuda` into JAX + import jax_tvm_ffi + jax_tvm_ffi.register_ffi_target("add_one", mod.add_one_cuda, platform="gpu") - # Step 3. Run `mod.add_one_cuda` with JAX - import jax - import jax.numpy as jnp - jax_device, *_ = jax.devices("gpu") - x = jnp.array([1, 2, 3, 4, 5], dtype=jnp.float32, device=jax_device) - y = jax.ffi.ffi_call( - "add_one", # name of the registered function - jax.ShapeDtypeStruct(x.shape, x.dtype), # shape and dtype of the output - vmap_method="broadcast_all", - )(x) - print(y) + # Step 3. Run `mod.add_one_cuda` with JAX + import jax + import jax.numpy as jnp + jax_device, *_ = jax.devices("gpu") + x = jnp.array([1, 2, 3, 4, 5], dtype=jnp.float32, device=jax_device) + y = jax.ffi.ffi_call( + "add_one", # name of the registered function + jax.ShapeDtypeStruct(x.shape, x.dtype), # shape and dtype of the output + vmap_method="broadcast_all", + )(x) + print(y) - .. tab-item:: NumPy +.. _ship-to-numpy: - .. literalinclude:: ../../examples/quickstart/load/load_numpy.py - :language: python - :start-after: [example.begin] - :end-before: [example.end] +NumPy/CuPy +~~~~~~~~~~ - .. tab-item:: CuPy +.. literalinclude:: ../../examples/quickstart/load/load_numpy.py + :language: python + :start-after: [example.begin] + :end-before: [example.end] - .. literalinclude:: ../../examples/quickstart/load/load_cupy.py - :language: python - :start-after: [example.begin] - :end-before: [example.end] + +.. literalinclude:: ../../examples/quickstart/load/load_cupy.py + :language: python + :start-after: [example.begin] + :end-before: [example.end] Ship Across Languages @@ -240,14 +244,16 @@ TVM-FFI's core loading mechanism is ABI stable and works across language boundar A single library can be loaded in every language TVM-FFI supports, without having to recompile different libraries targeting different ABIs or languages. +.. _ship-to-python: + Python ~~~~~~ As shown in the :ref:`previous section`, :py:func:`tvm_ffi.load_module` loads a language- and framework-independent ``add_one_cpu.so`` or ``add_one_cuda.so`` and can be used to incorporate it into all Python -array frameworks that implement the standard `DLPack protocol `_. +array frameworks that implement the standard :external+data-api:doc:`DLPack protocol `. -.. _cpp_load: +.. _ship-to-cpp: C++ ~~~ @@ -301,6 +307,8 @@ Compile and run it with: return 0; } +.. _ship-to-rust: + Rust ~~~~ @@ -328,6 +336,15 @@ This procedure is identical to those in C++ and Python: Troubleshooting --------------- -- ``OSError: cannot open shared object file``: Add an rpath (Linux/macOS) or ensure the DLL is on ``PATH`` (Windows). Example run-path: ``-Wl,-rpath,`tvm-ffi-config --libdir```. +- ``OSError: cannot open shared object file``: Add an rpath (Linux/macOS) or ensure the DLL is on ``PATH`` (Windows). Example run-path: ``-Wl,-rpath,$(tvm-ffi-config --libdir)``. - ``undefined symbol: __tvm_ffi_add_one_cpu``: Ensure you used :c:macro:`TVM_FFI_DLL_EXPORT_TYPED_FUNC` and compiled with default symbol visibility (``-fvisibility=hidden`` is fine; the macro ensures export). - ``CUDA error: invalid device function``: Rebuild with the correct ``-arch=sm_XX`` for your GPU, or include multiple ``-gencode`` entries. + + +Further Reading +--------------- + +- :doc:`Python Packaging <../packaging/python_packaging>` provides details on ABI-agnostic Python wheel building, as well as + exposing functions, classes and C symbols from TVM-FFI modules. +- :doc:`Stable C ABI ` explains the ABI in depth and how it enables stability guarantee. Its C examples demonstrate + how to interoperate through the stable C ABI from both callee and caller sides. diff --git a/docs/get_started/stable_c_abi.rst b/docs/get_started/stable_c_abi.rst index c372a7bb..bcfe4915 100644 --- a/docs/get_started/stable_c_abi.rst +++ b/docs/get_started/stable_c_abi.rst @@ -94,7 +94,7 @@ The following conventions apply when representing values in :cpp:class:`TVMFFIAn - Heap-allocated objects: the last 64 bits store a pointer to the actual object, for example: - * Managed tensor objects that follow `DLPack `_ (i.e. `DLTensor `_) layout. + * Managed tensor objects that follow :external+data-api:doc:`DLPack ` (i.e. `DLTensor `_) layout. - Arbitrary objects: the type index identifies the concrete type, and the last 64 bits store a pointer to a reference-counted object in TVM-FFI's object format, for example: @@ -126,7 +126,7 @@ Stability and Interoperability **Cross-language.** TVM-FFI implements this calling convention in multiple languages (C, C++, Python, Rust, ...), enabling code written in one language—or generated by a DSL targeting the ABI—to be called from another language. -**Cross-framework.** TVM-FFI uses standard data structures such as `DLPack tensors `_ to represent arrays, so compiled functions can be used from any array framework that implements the DLPack protocol (NumPy, PyTorch, TensorFlow, CuPy, JAX, and others). +**Cross-framework.** TVM-FFI uses standard data structures such as :external+data-api:doc:`DLPack tensors ` to represent arrays, so compiled functions can be used from any array framework that implements the DLPack protocol (NumPy, PyTorch, TensorFlow, CuPy, JAX, and others). Stable ABI in C Code @@ -142,7 +142,7 @@ TVM FFI's :ref:`C ABI ` is designed with DSL and ML compilers in This section shows how to write C code that follows the stable C ABI. Specifically, we provide two examples: - Callee side: A CPU ``add_one_cpu`` kernel in C that is equivalent to the :ref:`C++ example `. -- Caller side: A loader and runner in C that invokes the kernel, a direct C translation of the :ref:`C++ example `. +- Caller side: A loader and runner in C that invokes the kernel, a direct C translation of the :ref:`C++ example `. The C code is minimal and dependency-free, so it can serve as a direct reference for DSL compilers that want to expose or invoke kernels through the ABI. @@ -200,7 +200,7 @@ Build it with either approach: Caller: Kernel Loader ~~~~~~~~~~~~~~~~~~~~~ -Next, a minimal C loader invokes the ``add_one_cpu`` kernel. It is functionally identical to the :ref:`C++ example ` and performs: +Next, a minimal C loader invokes the ``add_one_cpu`` kernel. It is functionally identical to the :ref:`C++ example ` and performs: - **Step 1**. Load the shared library ``build/add_one_cpu.so`` that contains the kernel; - **Step 2**. Get function ``add_one_cpu`` from the library; @@ -249,6 +249,6 @@ What's Next **ABI specification.** See the complete ABI specification in :doc:`../concepts/abi_overview`. -**Convenient compiler target.** The stable C ABI is a simple, portable codegen target for DSL compilers. Emit C that follows this ABI to integrate with TVM-FFI and call the result from multiple languages and frameworks. See :doc:`../guides/compiler_integration`. +**Convenient compiler target.** The stable C ABI is a simple, portable codegen target for DSL compilers. Emit C that follows this ABI to integrate with TVM-FFI and call the result from multiple languages and frameworks. See :doc:`../concepts/abi_overview`. -**Rich and extensible type system.** TVM-FFI supports a rich set of types in the stable C ABI: primitive types (integers, floats), DLPack tensors, strings, built-in reference-counted objects (functions, arrays, maps), and user-defined reference-counted objects. See :doc:`../guides/cpp_guide`. +**Rich and extensible type system.** TVM-FFI supports a rich set of types in the stable C ABI: primitive types (integers, floats), DLPack tensors, strings, built-in reference-counted objects (functions, arrays, maps), and user-defined reference-counted objects. See :doc:`../guides/cpp_lang_guide`. diff --git a/docs/guides/cpp_guide.md b/docs/guides/cpp_lang_guide.md similarity index 100% rename from docs/guides/cpp_guide.md rename to docs/guides/cpp_lang_guide.md diff --git a/docs/guides/python_guide.md b/docs/guides/python_lang_guide.md similarity index 100% rename from docs/guides/python_guide.md rename to docs/guides/python_lang_guide.md diff --git a/docs/guides/python_packaging.md b/docs/guides/python_packaging.md deleted file mode 100644 index 3934b1fb..00000000 --- a/docs/guides/python_packaging.md +++ /dev/null @@ -1,460 +0,0 @@ - - - - - - - - - - - - - - - - -# Python Binding and Packaging - -This guide explains how to leverage tvm-ffi to expose C++ functions into Python and package them into a wheel. -At a high level, packaging with tvm-ffi offers several benefits: - -- **Ship one wheel** that can be used across Python versions, including free-threaded Python. -- **Multi-language access** to functions from Python, C++, Rust and other languages that support the ABI. -- **ML Systems Interop** with ML frameworks, DSLs, and libraries while maintaining minimal dependency. - -## Directly using Exported Library - -If you just need to expose a simple set of functions, -you can declare an exported symbol in C++: - -```c++ -// Compiles to mylib.so -#include - -int add_one(int x) { - return x + 1; -} - -TVM_FFI_DLL_EXPORT_TYPED_FUNC(add_one, add_one) -``` - -You then load the exported function in your Python project via {py:func}`tvm_ffi.load_module`. - -```python -# In your __init__.py -import tvm_ffi - -_LIB = tvm_ffi.load_module("/path/to/mlib.so") - -def add_one(x): - """Expose mylib.add_one""" - return _LIB.add_one(x) -``` - -This approach is like using {py:mod}`ctypes` to load and run DLLs, except we have more powerful features: - -- We can pass in `torch.Tensor` (or any other DLPack-compatible arrays). -- We can pass in a richer set of data structures such as strings, tuples, and dicts. -- {py:class}`tvm_ffi.Function` enables natural callbacks to Python lambdas or other languages. -- Exceptions are propagated naturally across language boundaries. - -## Pybind11 and Nanobind style Usage - -For advanced use cases where users may wish to register global functions or custom object types, -we also provide a pybind11/nanobind style API to register functions and custom objects. - -```c++ -#include -#include - -namespace my_ffi_extension { - -namespace ffi = tvm::ffi; - -/*! - * \brief Example of a custom object that is exposed to the FFI library - */ -class IntPairObj : public ffi::Object { - public: - int64_t a; - int64_t b; - - IntPairObj() = default; - IntPairObj(int64_t a, int64_t b) : a(a), b(b) {} - - int64_t GetFirst() const { return this->a; } - - // Required: declare type information - TVM_FFI_DECLARE_OBJECT_INFO_FINAL("my_ffi_extension.IntPair", IntPairObj, ffi::Object); -}; - -/*! - * \brief Defines an explicit reference to IntPairObj - * - * A reference wrapper serves as a reference-counted pointer to the object. - * You can use obj->field to access the fields of the object. - */ -class IntPair : public tvm::ffi::ObjectRef { - public: - // Constructor - explicit IntPair(int64_t a, int64_t b) { - data_ = tvm::ffi::make_object(a, b); - } - - // Required: define object reference methods - TVM_FFI_DEFINE_OBJECT_REF_METHODS_NULLABLE(IntPair, tvm::ffi::ObjectRef, IntPairObj); -}; - -void RaiseError(ffi::String msg) { TVM_FFI_THROW(RuntimeError) << msg; } - -TVM_FFI_STATIC_INIT_BLOCK() { - namespace refl = tvm::ffi::reflection; - refl::GlobalDef() - .def("my_ffi_extension.raise_error", RaiseError); - // register object definition - refl::ObjectDef() - .def(refl::init()) - // Example static method that returns the second element of the pair - .def_static("static_get_second", [](IntPair pair) -> int64_t { return pair->b; }) - // Example to bind an instance method - .def("get_first", &IntPairObj::GetFirst) - .def_ro("a", &IntPairObj::a) - .def_ro("b", &IntPairObj::b); -} -} // namespace my_ffi_extension -``` - -Then these functions and objects can be accessed from Python as long as the library is loaded. -You can use {py:func}`tvm_ffi.load_module` or simply use {py:class}`ctypes.CDLL`. Then you can access -the function through {py:func}`tvm_ffi.get_global_func` or {py:func}`tvm_ffi.init_ffi_api`. -We also allow direct exposure of object via {py:func}`tvm_ffi.register_object`. - -```python -# __init__.py -import tvm_ffi - -def raise_error(msg: str): - """Wrap raise error function.""" - # Usually we reorganize these functions into a _ffi_api.py and load once - func = tvm_ffi.get_global_func("my_ffi_extension.raise_error") - func(msg) - - -@tvm_ffi.register_object("my_ffi_extension.IntPair") -class IntPair(tvm_ffi.Object): - """IntPair object.""" - - def __init__(self, a: int, b: int) -> None: - """Construct the object.""" - # __ffi_init__ call into the refl::init<> registered - # in the static initialization block of the extension library - self.__ffi_init__(a, b) - - -def run_example(): - pair = IntPair(1, 2) - # prints 1 - print(pair.get_first()) - # prints 2 - print(IntPair.static_get_second(pair)) - # Raises a RuntimeError("error happens") - raise_error("error happens") -``` - -### Relations to Existing Solutions - -Most current binding systems focus on creating one-to-one bindings -that take a source language and bind to an existing target language runtime and ABI. -We deliberately take a more decoupled approach here: - -- Build stable, minimal ABI convention that is agnostic to the target language. -- Create bindings to connect the source and target language to the ABI. - -The focus of this project is the ABI itself which we believe can help the overall ecosystem. -We also anticipate there are possibilities for existing binding generators to also target the tvm-ffi ABI. - -**Design philosophy**. We have the following design philosophies focusing on ML systems. - -- FFI and cross-language interop should be first-class citizens in ML systems rather than an add-on. -- Enable multi-environment support in both source and target languages. -- The same ABI should be minimal and targetable by DSL compilers. - -Of course, there is always a tradeoff. It is by design impossible to support arbitrary advanced language features -in the target language, as different programming languages have their own design considerations. -We do believe it is possible to build a universal, effective, and minimal ABI for machine learning -system use cases. Based on the above design philosophies, we focus our cross-language -interaction interface through the FFI ABI for machine learning systems. - -So if you are building projects related to machine learning compilers, runtimes, -libraries, frameworks, DSLs, or generally scientific computing, we encourage you -to try it out. The extension mechanism can likely support features in other domains as well -and we welcome you to try it out as well. - -### Mix with Existing Solutions - -Because the global registry mechanism only relies on the code being linked, -you can also partially use tvm-ffi-based registration together with pybind11/nanobind in your project. -Just add the related code, link to `libtvm_ffi` and make sure you `import tvm_ffi` before importing -your module to ensure related symbols are available. -This approach may help to quickly leverage some of the cross-language features we have. -It also provides more powerful interaction with the host Python language, but of course the tradeoff -is that the final library will now also depend on the Python ABI. - -## Example Project Walk Through - -To get hands-on experience with the packaging flow, -you can try out an example project in our folder. -First, obtain a copy of the tvm-ffi source code. - -```bash -git clone https://github.com/apache/tvm-ffi --recursive -cd tvm-ffi -``` - -The examples are now in the examples folder. You can quickly build -and install the example using the following commands. - -```bash -cd examples/packaging -pip install -v . -``` - -Then you can run examples that leverage the built wheel package. - -```bash -python run_example.py add_one -``` - -## Setup pyproject.toml - -A typical tvm-ffi-based project has the following structure: - -```text -├── CMakeLists.txt # CMake build configuration -├── pyproject.toml # Python packaging configuration -├── src/ -│ └── extension.cc # C++ source code -├── python/ -│ └── my_ffi_extension/ -│ ├── __init__.py # Python package initialization -│ ├── base.py # Library loading logic -│ └── _ffi_api.py # FFI API registration -└── README.md # Project documentation -``` - -The `pyproject.toml` file configures the build system and project metadata. - -```toml -[project] -name = "my-ffi-extension" -version = "0.1.0" -# ... more project metadata omitted ... - -[build-system] -requires = ["scikit-build-core>=0.10.0", "apache-tvm-ffi"] -build-backend = "scikit_build_core.build" - -[tool.scikit-build] -# ABI-agnostic wheel -wheel.py-api = "py3" -# ... more build configuration omitted ... -``` - -We use scikit-build-core for building the wheel. Make sure you add tvm-ffi as a build-system requirement. -Importantly, we should set `wheel.py-api` to `py3` to indicate it is ABI-generic. - -### Setup CMakeLists.txt - -The CMakeLists.txt handles the build and linking of the project. -There are two ways you can build with tvm-ffi: - -- Link the pre-built `libtvm_ffi` shipped from the pip package -- Build tvm-ffi from source - -For common cases, using the pre-built library and linking tvm_ffi_shared is sufficient. -To build with the pre-built library, you can do: - -```cmake -cmake_minimum_required(VERSION 3.18) -project(my_ffi_extension) - -find_package(Python COMPONENTS Interpreter REQUIRED) -# find the prebuilt package -find_package(tvm_ffi CONFIG REQUIRED) - -# ... more cmake configuration omitted ... - -# linking the library -target_link_libraries(my_ffi_extension tvm_ffi_shared) -``` - -There are cases where one may want to cross-compile or bundle part of tvm_ffi objects directly -into the project. In such cases, you should build from source. - -```cmake -execute_process( - COMMAND "${Python_EXECUTABLE}" -m tvm_ffi.config --sourcedir - OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE tvm_ffi_ROOT) -# add the shipped source code as a cmake subdirectory -add_subdirectory(${tvm_ffi_ROOT} tvm_ffi) - -# ... more cmake configuration omitted ... - -# linking the library -target_link_libraries(my_ffi_extension tvm_ffi_shared) -``` - -Note that it is always safe to build from source, and the extra cost of building tvm-ffi is small -because tvm-ffi is a lightweight library. If you are in doubt, -you can always choose to build tvm-ffi from source. -In Python or other cases when we dynamically load libtvm_ffi shipped with the dedicated pip package, -you do not need to ship libtvm_ffi.so in your package even if you build tvm-ffi from source. -The built objects are only used to supply the linking information. - -### Exposing C++ Functions - -The C++ implementation is defined in `src/extension.cc`. -There are two ways one can expose a function in C++ to the FFI library. -First, `TVM_FFI_DLL_EXPORT_TYPED_FUNC` can be used to expose the function directly as a C symbol that follows the tvm-ffi ABI, -which can later be accessed via `tvm_ffi.load_module`. - -Here's a basic example of the function implementation: - -```c++ -void AddOne(ffi::TensorView x, ffi::TensorView y) { - // ... implementation omitted ... -} - -TVM_FFI_DLL_EXPORT_TYPED_FUNC(add_one, my_ffi_extension::AddOne); -``` - -We can also register a function into the global function table with a given name: - -```c++ -void RaiseError(ffi::String msg) { - TVM_FFI_THROW(RuntimeError) << msg; -} - -TVM_FFI_STATIC_INIT_BLOCK() { - namespace refl = tvm::ffi::reflection; - refl::GlobalDef() - .def("my_ffi_extension.raise_error", RaiseError); -} -``` - -Make sure to have a unique name across all registered functions when registering a global function. -Always prefix with a package namespace name to avoid name collisions. -The function can then be found via `tvm_ffi.get_global_func(name)` -and is expected to stay throughout the lifetime of the program. - -We recommend using `TVM_FFI_DLL_EXPORT_TYPED_FUNC` for functions that are supposed to be dynamically -loaded (such as JIT scenarios) so they won't be exposed to the global function table. - -### Library Loading in Python - -The base module handles loading the compiled extension: - -```python -import tvm_ffi -import os -import sys - -def _load_lib(): - file_dir = os.path.dirname(os.path.realpath(__file__)) - - # Platform-specific library names - if sys.platform.startswith("win32"): - lib_name = "my_ffi_extension.dll" - elif sys.platform.startswith("darwin"): - lib_name = "my_ffi_extension.dylib" - else: - lib_name = "my_ffi_extension.so" - - lib_path = os.path.join(file_dir, lib_name) - return tvm_ffi.load_module(lib_path) - -_LIB = _load_lib() -``` - -Effectively, it leverages the `tvm_ffi.load_module` call to load the library -extension DLL shipped along with the package. The `_ffi_api.py` contains a function -call to `tvm_ffi.init_ffi_api` that registers all global functions prefixed -with `my_ffi_extension` into the module. - -```python -# _ffi_api.py -import tvm_ffi -from .base import _LIB - -# Register all global functions prefixed with 'my_ffi_extension.' -# This makes functions registered via TVM_FFI_STATIC_INIT_BLOCK available -tvm_ffi.init_ffi_api("my_ffi_extension", __name__) -``` - -Then we can redirect the calls to the related functions. - -```python -from .base import _LIB -from . import _ffi_api - -def add_one(x, y): - # ... docstring omitted ... - return _LIB.add_one(x, y) - -def raise_error(msg): - # ... docstring omitted ... - return _ffi_api.raise_error(msg) -``` - -### Build and Use the Package - -First, build the wheel: - -```bash -pip wheel -v -w dist . -``` - -Then install the built wheel: - -```bash -pip install dist/*.whl -``` - -Then you can try it out: - -```python -import torch -import my_ffi_extension - -# Create input and output tensors -x = torch.tensor([1, 2, 3, 4, 5], dtype=torch.float32) -y = torch.empty_like(x) - -# Call the function -my_ffi_extension.add_one(x, y) -print(y) # Output: tensor([2., 3., 4., 5., 6.]) -``` - -You can also run the following command to see how errors are raised and propagated -across language boundaries: - -```bash -python run_example.py raise_error -``` - -When possible, tvm-ffi will try to preserve backtraces across language boundaries. You will see outputs like: - -```text -File "src/extension.cc", line 45, in void my_ffi_extension::RaiseError(tvm::ffi::String) -``` - -## Wheel Auditing - -When using `auditwheel`, exclude `libtvm_ffi` as it will be shipped with the `tvm_ffi` package. - -```bash -auditwheel repair --exclude libtvm_ffi.so dist/*.whl -``` - -As long as you import `tvm_ffi` first before loading the library, the symbols will be available. diff --git a/docs/guides/rust_guide.md b/docs/guides/rust_lang_guide.md similarity index 97% rename from docs/guides/rust_guide.md rename to docs/guides/rust_lang_guide.md index a1b7ac78..f4c19e78 100644 --- a/docs/guides/rust_guide.md +++ b/docs/guides/rust_lang_guide.md @@ -213,5 +213,5 @@ For detailed API documentation, see the [Rust API Reference](../reference/rust/i ## Related Resources - [Quick Start Guide](../get_started/quickstart.rst) - General TVM FFI introduction -- [C++ Guide](cpp_guide.md) - C++ API usage -- [Python Guide](python_guide.md) - Python API usage +- [C++ Guide](./cpp_lang_guide.md) - C++ API usage +- [Python Guide](./python_lang_guide.md) - Python API usage diff --git a/docs/index.rst b/docs/index.rst index 23e67d0b..53f922cc 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -25,18 +25,12 @@ or reading through the guides and concepts sections. Installation ------------ -To install via pip, run: +To install TVM-FFI via pip or uv, run: .. code-block:: bash pip install apache-tvm-ffi - -We also recommend installing the optional package below for improved -torch tensor conversion performance. - -.. code-block:: bash - - pip install torch-c-dlpack-ext + pip install torch-c-dlpack-ext # compatibility package for torch <= 2.9 Table of Contents @@ -53,16 +47,12 @@ Table of Contents :maxdepth: 1 :caption: Guides - guides/python_packaging.md - guides/cpp_packaging.md - guides/cpp_guide.md - guides/python_guide.md - guides/rust_guide.md - guides/cubin_launcher.rst - guides/compiler_integration.md - guides/build_from_source.md guides/kernel_library_guide.rst - + guides/compiler_integration.md + guides/cubin_launcher.rst + guides/python_lang_guide.md + guides/cpp_lang_guide.md + guides/rust_lang_guide.md .. toctree:: :maxdepth: 1 @@ -70,6 +60,12 @@ Table of Contents concepts/abi_overview.md +.. toctree:: + :maxdepth: 1 + :caption: Packaging + + packaging/python_packaging.rst + packaging/cpp_packaging.md .. toctree:: :maxdepth: 1 @@ -78,3 +74,9 @@ Table of Contents reference/python/index.rst reference/cpp/index.rst reference/rust/index.rst + +.. toctree:: + :maxdepth: 1 + :caption: Developer Manual + + dev/build_from_source.md diff --git a/docs/guides/cpp_packaging.md b/docs/packaging/cpp_packaging.md similarity index 100% rename from docs/guides/cpp_packaging.md rename to docs/packaging/cpp_packaging.md diff --git a/docs/packaging/python_packaging.rst b/docs/packaging/python_packaging.rst new file mode 100644 index 00000000..c768768d --- /dev/null +++ b/docs/packaging/python_packaging.rst @@ -0,0 +1,506 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Python Packaging +================ + +This guide walks through a small but complete workflow for packaging a TVM-FFI extension +as a Python wheel. The goal is to help you wire up a simple extension, produce a wheel, +and ship user-friendly typing annotations without needing to know every detail of TVM +internals. We will cover three checkpoints: + +- Export C++ to Python; +- Build Python wheel; +- Automatic Python package generation tools. + +Export C++ to Python +-------------------- + +TVM-FFI offers three ways to expose code: + +- C symbols in TVM FFI ABI: Export code as plain C symbols. This is the recommended way for + most usecases as it keeps the boundary thin and works well with compiler codegen; +- Functions: Reflect functions via the global registry; +- Classes: Register C++ classes derived from :cpp:class:`tvm::ffi::Object` to Python dataclasses. + +Metadata is automatically captured and is later be turned into type hints for proper LSP help. + +TVM-FFI ABI (Recommended) +~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you prefer to export plain C symbols, TVM-FFI provides helpers to make them accessible +to Python. This option keeps the boundary thin and works well with LLVM compilers where +C symbols are easier to call into. + +.. tabs:: + + .. group-tab:: C++ + + Macro :c:macro:`TVM_FFI_DLL_EXPORT_TYPED_FUNC` exports the function ``AddTwo`` as + a C symbol ``__tvm_ffi_add_two`` inside the shared library. + + .. code-block:: cpp + + static int AddTwo(int x) { + return x + 2; + } + + TVM_FFI_DLL_EXPORT_TYPED_FUNC(add_two, AddTwo); + + .. group-tab:: Python (User) + + Symbol ``__tvm_ffi_add_two`` is made available via ``LIB.add_two`` to users. + + .. code-block:: python + + import my_ffi_extension + my_ffi_extension.LIB.add_two(1) # -> 3 + + .. group-tab:: Python (Generated) + + The shared library is loaded by :py:func:`tvm_ffi.libinfo.load_lib_module`. + + .. code-block:: python + + # File: my_ffi_extension/_ffi_api.py + + LIB = tvm_ffi.libinfo.load_lib_module( + package="my-ffi-extension", + target_name="my_ffi_extension", + ) + + +Global Function +~~~~~~~~~~~~~~~ + +This example registers a function into the global registry and then calls it from Python. +It registry handles type translation, error handling, and metadata. + +.. tabs:: + + .. group-tab:: C++ + + C++ function ``AddOne`` is registered with name ``my_ffi_extension.add_one`` + in the global registry using :cpp:class:`tvm::ffi::reflection::GlobalDef`. + + .. code-block:: cpp + + static int AddOne(int x) { + return x + 1; + } + + TVM_FFI_STATIC_INIT_BLOCK() { + namespace refl = tvm::ffi::reflection; + refl::GlobalDef() + .def("my_ffi_extension.add_one", AddOne); + } + + .. group-tab:: Python (User) + + The global function is accessible after importing the extension, + and the import path matches the registered name, i.e. ``my_ffi_extension.add_one``. + + .. code-block:: python + + import my_ffi_extension + + my_ffi_extension.add_one(3) # -> 4 + + .. group-tab:: Python (Generated) + + Under the hood, the shared library is loaded by :py:func:`tvm_ffi.init_ffi_api` + during package initialization. + + .. code-block:: python + + # File: my_ffi_extension/_ffi_api.py + + tvm_ffi.init_ffi_api( + namespace="my_ffi_extension", + target_module_name=__name__, + ) + + def add_one(x: int) -> int: ... + + +Class +~~~~~ + +Any class derived from :cpp:class:`tvm::ffi::Object` can be registered, exported and +instantiated from Python. The reflection helper :cpp:class:`tvm::ffi::reflection::ObjectDef` +makes it easy to expose: + +- Fields + + * Immutable field via :cpp:func:`ObjectDef::def_ro `; + * Mutable field via :cpp:func:`ObjectDef::def_rw `; + +- Methods + + * Member method via :cpp:func:`ObjectDef::def `. + * Static method via :cpp:func:`ObjectDef::def_static `; + * Constructors via :cpp:class:`tvm::ffi::reflection::init`. + + +.. tabs:: + + .. group-tab:: C++ + + The example below defines a class ``my_ffi_extension.IntPair`` with + + - two integer fields ``a``, ``b``, + - a constructor, and + - a method ``Sum`` that returns the sum of the two fields. + + .. code-block:: cpp + + class IntPairObj : public ffi::Object { + public: + int64_t a; + int64_t b; + IntPairObj(int64_t a, int64_t b) : a(a), b(b) {} + + int64_t Sum() const { + return a + b; + } + + TVM_FFI_DECLARE_OBJECT_INFO_FINAL( + /*type_key=*/"my_ffi_extension.IntPair", + /*class=*/IntPairObj, + /*parent_class=*/ffi::Object + ); + }; + + TVM_FFI_STATIC_INIT_BLOCK() { + namespace refl = tvm::ffi::reflection; + refl::ObjectDef() + .def(refl::init()) + .def_rw("a", &IntPairObj::a, "the first field") + .def_rw("b", &IntPairObj::b, "the second field") + .def("sum", &IntPairObj::Sum, "IntPairObj::Sum() method"); + } + + .. group-tab:: Python (User) + + The class is available immediately after importing the extension, + with the import path matching the registered name, i.e. ``my_ffi_extension.IntPair``. + + .. code-block:: python + + import my_ffi_extension + + pair = my_ffi_extension.IntPair(1, 2) + pair.sum() # -> 3 + + .. group-tab:: Python (Generated) + + Type hints are generated for both fields and methods. + + .. code-block:: python + + # File: my_ffi_extension/_ffi_api.py (auto generated) + + @tvm_ffi.register_object("my_ffi_extension.IntPair") + class IntPair(tvm_ffi.Object): + a: int + b: int + + def __init__(self, a: int, b: int) -> None: ... + def sum(self) -> int: ... + + +Build Python Wheel +------------------ + +Once the C++ side is ready, TVM-FFI provides convenient helpers to build and ship +ABI-agnostic Python extensions using any standard packaging tool. + +The flow below uses :external+scikit_build_core:doc:`scikit-build-core ` +that drives CMake build, but the same ideas translate to setuptools or other :pep:`517` backends. + +CMake Target +~~~~~~~~~~~~ + +Assume the source tree contains ``src/extension.cc``. Create a ``CMakeLists.txt`` that +creates a shared target ``my_ffi_extension`` and configures it against TVM-FFI. + +.. code-block:: cmake + + add_library(my_ffi_extension SHARED src/extension.cc) + tvm_ffi_configure_target(my_ffi_extension STUB_DIR "./python") + install(TARGETS my_ffi_extension DESTINATION .) + tvm_ffi_install(my_ffi_extension DESTINATION .) + +Function ``tvm_ffi_configure_target`` sets up TVM-FFI include paths, link against TVM-FFI library, +generates stubs under the specified directory, and optionally debug symbols. + +Function ``tvm_ffi_install`` places necessary information, e.g. debug symbols in macOS, next to +the shared library for proper packaging. + +Python Build Backend +~~~~~~~~~~~~~~~~~~~~ + +Define a :pep:`517` build backend in ``pyproject.toml``, with the following steps: + +- Sepcfiy ``apache-tvm-ffi`` as a build requirement, so that CMake can find TVM-FFI; +- Configure ``wheel.py-api`` that indicates a Python ABI-agnostic wheel; +- Specify the source directory of the package via ``wheel.packages``, and the installation + destination via ``wheel.install-dir``. + +.. code-block:: toml + + [build-system] + requires = ["scikit-build-core>=0.10.0", "apache-tvm-ffi"] + build-backend = "scikit_build_core.build" + + [tool.scikit-build] + # The wheel is Python ABI-agnostic + wheel.py-api = "py3" + # The package contains the Python module at `python/my_ffi_extension` + wheel.packages = ["python/my_ffi_extension"] + # The install dir matches the import name + wheel.install-dir = "my_ffi_extension" + +Once fully specified, scikit-build-core will invoke CMake and drive the extension building process. + + +Wheel Auditing +~~~~~~~~~~~~~~ + +**Build wheels**. The wheel can be built using the standard workflows, e.g.: + +- `pip workflow `_ or `editable install `_ + +.. code-block:: bash + + # editable install + pip install -e . + # standard wheel build + pip wheel -w dist . + +- `uv workflow `_ + +.. code-block:: bash + + uv build --wheel --out-dir dist . + +- `cibuildwheel `_ for multi-platform build + +.. code-block:: bash + + cibuildwheel --output-dir dist + +**Audit wheels**. In practice, an extra step is usually necessary to remove redundant +and error-prone shared library dependencies. In our case, given ``libtvm_ffi.so`` +(or its respective platform variants) is guaranteed to be loaded by importing ``tvm_ffi``, +we can safely exclude this dependency from the final wheel. + +.. code-block:: bash + + # Linux + auditwheel repair --exclude libtvm_ffi.so dist/*.whl + # macOS + delocate-wheel -w dist -v --exclude libtvm_ffi.dylib dist/*.whl + # Windows + delvewheel repair --exclude tvm_ffi.dll -w dist dist\\*.whl + +Stub Generation Tool +-------------------- + +TVM-FFI comes with a command-line tool ``tvm-ffi-stubgen`` that automates +the generation of type stubs for both global functions and classes. +It turns reflection metadata into proper Python type hints, and generates +corresponding Python code **inline** and **statically**. + +Inline Directives +~~~~~~~~~~~~~~~~~ + +Similar to linter tools, ``tvm-ffi-stubgen`` uses special comments +to identify what to generate and where to write generated code. + +**Directive 1 (Global functions)**. Example below shows an directive +``global/${prefix}`` marking a type stub section of global functions. + +.. code-block:: python + + # tvm-ffi-stubgen(begin): global/my_ext.arith + tvm_ffi.init_ffi_api("my_ext.arith", __name__) + if TYPE_CHECKING: + def add_one(_0: int, /) -> int: ... + def add_two(_0: int, /) -> int: ... + def add_three(_0: int, /) -> int: ... + # tvm-ffi-stubgen(end) + +Running ``tvm-ffi-stubgen`` fills in the function stubs between the +``begin`` and ``end`` markers based on the loaded registry, and in this case +introduces all the global functions named ``my_ext.arith.*``. + +**Directive 2 (Classes)**. Example below shows an directive +``object/${type_key}`` marking the fields and methods of a registered class. + +.. code-block:: python + + @tvm_ffi.register_object("my_ffi_extension.IntPair") + class IntPair(_ffi_Object): + # tvm-ffi-stubgen(begin): object/my_ffi_extension.IntPair + a: int + b: int + if TYPE_CHECKING: + def __init__(self, a: int, b: int) -> None: ... + def sum(self) -> int: ... + # tvm-ffi-stubgen(end) + +Directive-based Generation +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +After TVM-FFI extension is built as a shared library, say at +``build/libmy_ffi_extension.so`` + +**Command line tool**. The command below generates stubs for +the package located at ``python/my_ffi_extension``, updating +all sections marked by the directives. + +.. code-block:: bash + + tvm-ffi-stubgen \ + python/my_ffi_extension \ + --dlls build/libmy_ffi_extension.so \ + + +**CMake Integration**. CMake function ``tvm_ffi_configure_target`` +is integrated with this command and can be used to keep stubs up to date +every time the target is built. + +.. code-block:: cmake + + tvm_ffi_configure_target(my_ffi_extension + STUB_DIR "python" + ) + +Inside the function, CMake manages to find proper ``--dlls`` arguments +via ``$``. + +Scaffold Missing Directives +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**Command line tool**. Beyond updating existing directives, ``tvm-ffi-stubgen`` +can be used to scaffold missing directives if they are not yet present in the +package with a few extra flags. + +.. code-block:: bash + + tvm-ffi-stubgen \ + python/my_ffi_extension \ + --dlls build/libmy_ffi_extension.so \ + --init-pypkg my-ffi-extension \ + --init-lib my_ffi_extension \ + --init-prefix "my_ffi_extension." \ + +- ``--init-pypkg ``: Specifies the name of the Python package to initialize, e.g. ``apache-tvm-ffi``, ``my-ffi-extension``; +- ``--init-lib ``: Specifies the name of the CMake target (shared library) to load for reflection metadata; +- ``--init-prefix ``: Specifies the registry prefix to include for stub generation, e.g. ``my_ffi_extension.``. If names of global functions or classes start with this prefix, they will be included in the generated stubs. + +**CMake Integration**. CMake function ``tvm_ffi_configure_target`` +also supports scaffolding missing directives via the extra options +``STUB_INIT``, ``STUB_PKG``, and ``STUB_PREFIX``. + +.. code-block:: cmake + + tvm_ffi_configure_target(my_ffi_extension + STUB_DIR "python" + STUB_INIT ON + ) + +The ``STUB_INIT`` option instructs CMake to scaffold missing directives +based on the target and package information already specified. + +Other Directives +~~~~~~~~~~~~~~~~ + +All the supported directives are documented via: + +.. code-block:: bash + + tvm-ffi-stubgen --help + + +It includes: + +**Directive 3 (Import section)**. It populates all the imported names used by generated stubs. Example: + +.. code-block:: python + + # tvm-ffi-stubgen(begin): import-section + from __future__ import annotations + from ..registry import init_ffi_api as _FFI_INIT_FUNC + from typing import TYPE_CHECKING + if TYPE_CHECKING: + from collections.abc import Mapping, Sequence + from tvm_ffi import Device, Object, Tensor, dtype + from tvm_ffi.testing import TestIntPair + from typing import Any, Callable + # tvm-ffi-stubgen(end) + +**Directive 4 (Export)**. It re-exports names defined in `_ffi_api.__all__` into the current file. Usually +used in ``__init__.py`` to aggregate all exported names. Example: + +.. code-block:: python + + # tvm-ffi-stubgen(begin): export/_ffi_api + from ._ffi_api import * # noqa: F403 + from ._ffi_api import __all__ as _ffi_api__all__ + if "__all__" not in globals(): + __all__ = [] + __all__.extend(_ffi_api__all__) + # tvm-ffi-stubgen(end) + +**Directive 5 (__all__)**. It populates the ``__all__`` variable with all generated +classes and functions, as well as ``LIB`` if present. It's usually placed at the end of +``_ffi_api.py``. Example: + +.. code-block:: python + + __all__ = [ + # tvm-ffi-stubgen(begin): __all__ + "LIB", + "IntPair", + "raise_error", + # tvm-ffi-stubgen(end) + ] + +**Directive 6 (ty-map)**. It maps the type key of a class to Python types used in generation. Example: + +.. code-block:: python + + # tvm-ffi-stubgen(ty-map): ffi.reflection.AccessStep -> ffi.access_path.AccessStep + +means the class with type key ``ffi.reflection.AccessStep``, is instead class ``ffi.access_path.AccessStep`` +in Python. + +**Directive 7 (Import object)**. It injects a custom import into generated code, optionally +TYPE_CHECKING-only. Example: + + +.. code-block:: python + + # tvm-ffi-stubgen(import-object): ffi.Object;False;_ffi_Object + +imports ``ffi.Object`` as ``_ffi_Object`` for use in generated code, +where the second field ``False`` indicates the import is not TYPE_CHECKING-only. + +**Directive 8 (Skip file)**. It prevents the stub generation tool from modifying the file. +This is useful when the file contains custom code that should not be altered. diff --git a/docs/reference/python/index.rst b/docs/reference/python/index.rst index 99b652f4..93144d90 100644 --- a/docs/reference/python/index.rst +++ b/docs/reference/python/index.rst @@ -103,7 +103,7 @@ Stream Context C++ Extension --------------- +------------- C++ integration helpers for building and loading inline modules. @@ -114,6 +114,7 @@ C++ integration helpers for building and loading inline modules. cpp.build_inline cpp.load cpp.build + libinfo.load_lib_module NVRTC Utilities --------------- diff --git a/include/tvm/ffi/base_details.h b/include/tvm/ffi/base_details.h index f6280200..7224ac11 100644 --- a/include/tvm/ffi/base_details.h +++ b/include/tvm/ffi/base_details.h @@ -92,10 +92,10 @@ /// \cond Doxygen_Suppress #define TVM_FFI_STATIC_INIT_BLOCK_DEF_(FnName) __attribute__((constructor)) static void FnName() /// \endcond -/* +/*! * \brief Macro that defines a block that will be called during static initialization. * - * \code + * \code{.cpp} * TVM_FFI_STATIC_INIT_BLOCK() { * RegisterFunctions(); * } diff --git a/include/tvm/ffi/c_api.h b/include/tvm/ffi/c_api.h index 1610376f..27d6e8cf 100644 --- a/include/tvm/ffi/c_api.h +++ b/include/tvm/ffi/c_api.h @@ -802,7 +802,7 @@ typedef enum { * * The meta-data record comparison method in tree node and DAG node. * - * \code + * \code{.cpp} * x = VarNode() * v0 = AddNode(x, 1) * v1 = AddNode(x, 1) diff --git a/include/tvm/ffi/container/container_details.h b/include/tvm/ffi/container/container_details.h index 397209f3..09f513b1 100644 --- a/include/tvm/ffi/container/container_details.h +++ b/include/tvm/ffi/container/container_details.h @@ -47,7 +47,7 @@ namespace details { * \tparam ElemType The type of objects stored in the array right after * ArrayType. * - * \code + * \code{.cpp} * // Example usage of the template to define a simple array wrapper * class ArrayObj : public tvm::ffi::details::InplaceArrayBase { * public: @@ -72,7 +72,6 @@ namespace details { * // Access the 0th element in the array. * assert(ptr->operator[](0) == fields[0]); * } - * * \endcode */ template diff --git a/include/tvm/ffi/container/tensor.h b/include/tvm/ffi/container/tensor.h index 857bd6bd..3675bb5a 100644 --- a/include/tvm/ffi/container/tensor.h +++ b/include/tvm/ffi/container/tensor.h @@ -30,8 +30,6 @@ #include #include -#include -#include #include #include #include @@ -406,7 +404,7 @@ class Tensor : public ObjectRef { * to create Tensors. * * Example usage: - * \code + * \code{.cpp} * // CPU Allocator * struct CPUNDAlloc { * void AllocData(DLTensor* tensor) { tensor->data = malloc(ffi::GetDataSize(*tensor)); } @@ -431,20 +429,20 @@ class Tensor : public ObjectRef { * } * }; * - * // NVSHMEM Allocator - * struct NVSHMEMNDAlloc { - * void AllocData(DLTensor* tensor) { - * size_t size = tvm::ffi::GetDataSize(*tensor); - * tensor->data = nvshmem_malloc(size); - * TVM_FFI_ICHECK_NE(tensor->data, nullptr) << "nvshmem_malloc failed. size: " << size; - * } - * void FreeData(DLTensor* tensor) { nvshmem_free(tensor->data); } - * }; + * // NVSHMEM Allocator + * struct NVSHMEMNDAlloc { + * void AllocData(DLTensor* tensor) { + * size_t size = tvm::ffi::GetDataSize(*tensor); + * tensor->data = nvshmem_malloc(size); + * TVM_FFI_ICHECK_NE(tensor->data, nullptr) << "nvshmem_malloc failed. size: " << size; + * } + * void FreeData(DLTensor* tensor) { nvshmem_free(tensor->data); } + * }; * - * // Allocator usage - * ffi::Tensor cpu_tensor = ffi::Tensor::FromNDAlloc(CPUNDAlloc(), ...); - * ffi::Tensor cuda_tensor = ffi::Tensor::FromNDAlloc(CUDANDAlloc(), ...); - * ffi::Tensor nvshmem_tensor = ffi::Tensor::FromNDAlloc(NVSHMEMNDAlloc(), ...); + * // Allocator usage + * ffi::Tensor cpu_tensor = ffi::Tensor::FromNDAlloc(CPUNDAlloc(), ...); + * ffi::Tensor cuda_tensor = ffi::Tensor::FromNDAlloc(CUDANDAlloc(), ...); + * ffi::Tensor nvshmem_tensor = ffi::Tensor::FromNDAlloc(NVSHMEMNDAlloc(), ...); * \endcode * * \param alloc The NDAllocator. @@ -507,12 +505,8 @@ class Tensor : public ObjectRef { * in the extra/c_env_api.h to create a Tensor from the thread-local environment allocator. * We explicitly pass TVMFFIEnvTensorAlloc to maintain explicit dependency on extra/c_env_api.h * - * \code - * - * ffi::Tensor tensor = ffi::Tensor::FromEnvAlloc( - * TVMFFIEnvTensorAlloc, shape, dtype, device - * ); - * + * \code{.cpp} + * ffi::Tensor tensor = ffi::Tensor::FromEnvAlloc(TVMFFIEnvTensorAlloc, shape, dtype, device); * \endcode * * \param env_alloc TVMFFIEnvTensorAlloc function pointer. diff --git a/include/tvm/ffi/error.h b/include/tvm/ffi/error.h index 95602379..91d55c67 100644 --- a/include/tvm/ffi/error.h +++ b/include/tvm/ffi/error.h @@ -34,7 +34,6 @@ #include #include #include -#include #include #include @@ -69,15 +68,13 @@ namespace ffi { * and return a proper code to tell the frontend caller about * this fact. * - * \code - * + * \code{.cpp} * void ExampleLongRunningFunction() { * if (TVMFFIEnvCheckSignals() != 0) { * throw ::tvm::ffi::EnvErrorAlreadySet(); * } * // do work here * } - * * \endcode */ struct EnvErrorAlreadySet : public std::exception {}; @@ -295,12 +292,10 @@ class ErrorBuilder { /*! * \brief Helper macro to throw an error with backtrace and message * - * \code - * - * void ThrowError() { - * TVM_FFI_THROW(RuntimeError) << "error message"; - * } - * + * \code{.cpp} + * void ThrowError() { + * TVM_FFI_THROW(RuntimeError) << "error message"; + * } * \endcode */ #define TVM_FFI_THROW(ErrorKind) \ diff --git a/include/tvm/ffi/extra/cuda/device_guard.h b/include/tvm/ffi/extra/cuda/device_guard.h index 083580f7..01586886 100644 --- a/include/tvm/ffi/extra/cuda/device_guard.h +++ b/include/tvm/ffi/extra/cuda/device_guard.h @@ -34,7 +34,7 @@ namespace ffi { * current CUDA device back to original device index. * * Example usage: - * \code + * \code{.cpp} * void kernel(ffi::TensorView x) { * ffi::CUDADeviceGuard guard(x.device().device_id); * ... diff --git a/include/tvm/ffi/extra/module.h b/include/tvm/ffi/extra/module.h index 6af26c25..5c2142ec 100644 --- a/include/tvm/ffi/extra/module.h +++ b/include/tvm/ffi/extra/module.h @@ -87,7 +87,7 @@ class TVM_FFI_EXTRA_CXX_API ModuleObj : public Object { * \param name The name of the function. * \return The metadata as JSON string if available, nullopt otherwise. * - * \code + * \code{.cpp} * Module mod = Module::LoadFromFile("lib.so"); * Optional metadata = mod->GetFunctionMetadata("my_func"); * if (metadata.has_value()) { @@ -208,7 +208,7 @@ class TVM_FFI_EXTRA_CXX_API ModuleObj : public Object { * When invoking a function on a ModuleObj, such as GetFunction, * use operator-> to get the ModuleObj pointer and invoke the member functions. * - * \code + * \code{.cpp} * ffi::Module mod = ffi::Module::LoadFromFile("path/to/module.so"); * ffi::Function func = mod->GetFunction(name); * \endcode diff --git a/include/tvm/ffi/function.h b/include/tvm/ffi/function.h index f2cd61fa..d1cc6933 100644 --- a/include/tvm/ffi/function.h +++ b/include/tvm/ffi/function.h @@ -40,7 +40,6 @@ #include #include -#include #include #include #include @@ -55,7 +54,7 @@ namespace ffi { * \brief Marks the beginning of the safe call that catches exception explicitly * \sa TVM_FFI_SAFE_CALL_END * - * \code + * \code{.cpp} * int TVMFFICStyleFunction() { * TVM_FFI_SAFE_CALL_BEGIN(); * // c++ code region here @@ -90,7 +89,7 @@ namespace ffi { * \brief Macro to check a call to TVMFFISafeCallType and raise exception if error happens. * \param func The function to check. * - * \code + * \code{.cpp} * // calls TVMFFIFunctionCall and raises exception if error happens * TVM_FFI_CHECK_SAFE_CALL(TVMFFITypeKeyToIndex(&type_key_arr, &type_index)); * \endcode @@ -545,18 +544,15 @@ class Function : public ObjectRef { * * This function can be useful to turn an existing exported symbol into a typed function. * - * \code - * + * \code{.cpp} * // An extern "C" function, matching TVMFFISafeCallType * extern "C" int __tvm_ffi_add( * void* handle, const TVMFFIAny* args, int32_t num_args, TVMFFIAny*result * ); - * * // redirect an existing symbol into a typed function * inline int add(int a, int b) { * return tvm::ffi::Function::InvokeExternC(nullptr, __tvm_ffi_add, a, b).cast(); * } - * * \endcode * * \tparam Args The types of the arguments to the extern function. @@ -583,13 +579,13 @@ class Function : public ObjectRef { * \param args Arguments to be passed. * \tparam Args arguments to be passed. * - * \code - * // Example code on how to call packed function - * void CallFFIFunction(tvm::ffi::Function f) { - * // call like normal functions by pass in arguments - * // return value is automatically converted back - * int rvalue = f(1, 2.0); - * } + * \code{.cpp} + * // Example code on how to call packed function + * void CallFFIFunction(tvm::ffi::Function f) { + * // call like normal functions by pass in arguments + * // return value is automatically converted back + * int rvalue = f(1, 2.0); + * } * \endcode */ template @@ -669,11 +665,9 @@ class TypedFunction; * We can construct a TypedFunction from a lambda function * with the same signature. * - * \code + * \code{.cpp} * // user defined lambda function. - * auto addone = [](int x)->int { - * return x + 1; - * }; + * auto addone = [](int x)->int { return x + 1; }; * // We can directly convert * // lambda function to TypedFunction * TypedFunction ftyped(addone); @@ -703,7 +697,7 @@ class TypedFunction { * \brief construct from a lambda function with the same signature. * * Example usage: - * \code + * \code{.cpp} * auto typed_lambda = [](int x)->int { return x + 1; } * // construct from packed function * TypedFunction ftyped(typed_lambda, "add_one"); @@ -727,7 +721,7 @@ class TypedFunction { * version that takes a name for the lambda. * * Example usage: - * \code + * \code{.cpp} * auto typed_lambda = [](int x)->int { return x + 1; } * // construct from packed function * TypedFunction ftyped(typed_lambda); @@ -748,7 +742,7 @@ class TypedFunction { * \brief copy assignment operator from typed lambda * * Example usage: - * \code + * \code{.cpp} * // construct from packed function * TypedFunction ftyped; * ftyped = [](int x) { return x + 1; } @@ -901,15 +895,12 @@ inline int32_t TypeKeyToIndex(std::string_view type_key) { * * \sa ffi::TypedFunction, TVM_FFI_DLL_EXPORT_TYPED_FUNC_DOC * - * \code - * + * \code{.cpp} * int AddOne_(int x) { * return x + 1; * } - * * // Expose the function as "AddOne" * TVM_FFI_DLL_EXPORT_TYPED_FUNC(AddOne, AddOne_); - * * // Expose the function as "SubOne" * TVM_FFI_DLL_EXPORT_TYPED_FUNC(SubOne, [](int x) { * return x - 1; @@ -957,8 +948,7 @@ inline int32_t TypeKeyToIndex(std::string_view type_key) { * * \sa ffi::TypedFunction, TVM_FFI_DLL_EXPORT_TYPED_FUNC * - * \code - * + * \code{.cpp} * int Add(int a, int b) { * return a + b; * } @@ -979,7 +969,6 @@ inline int32_t TypeKeyToIndex(std::string_view type_key) { * ------- * result : int * Sum of a and b)"); - * * \endcode * * \note The exported symbol name is `__tvm_ffi__doc_` (docstring getter function). diff --git a/include/tvm/ffi/reflection/registry.h b/include/tvm/ffi/reflection/registry.h index 3014108c..3224a9fd 100644 --- a/include/tvm/ffi/reflection/registry.h +++ b/include/tvm/ffi/reflection/registry.h @@ -322,9 +322,9 @@ class ReflectionDefBase { /*! * \brief GlobalDef helper to register a global function. * - * \code - * namespace refl = tvm::ffi::reflection; - * refl::GlobalDef().def("my_ffi_extension.my_function", MyFunction); + * \code{.cpp} + * namespace refl = tvm::ffi::reflection; + * refl::GlobalDef().def("my_ffi_extension.my_function", MyFunction); * \endcode */ class GlobalDef : public ReflectionDefBase { @@ -415,19 +415,20 @@ class GlobalDef : public ReflectionDefBase { * \tparam Args The argument types for the constructor. * * Example usage: - * \code - * class ExampleObject : public Object { - * public: - * int64_t v_i64; - * int32_t v_i32; * - * ExampleObject(int64_t v_i64, int32_t v_i32) : v_i64(v_i64), v_i32(v_i32) {} - * TVM_FFI_DECLARE_OBJECT_INFO("example.ExampleObject", ExampleObject, Object); - * }; + * \code{.cpp} + * class ExampleObject : public Object { + * public: + * int64_t v_i64; + * int32_t v_i32; * - * // Register the constructor - * refl::ObjectDef() - * .def(refl::init()); + * ExampleObject(int64_t v_i64, int32_t v_i32) : v_i64(v_i64), v_i32(v_i32) {} + * TVM_FFI_DECLARE_OBJECT_INFO("example.ExampleObject", ExampleObject, Object); + * }; + * + * // Register the constructor + * refl::ObjectDef() + * .def(refl::init()); * \endcode * * \note The object type is automatically deduced from the `ObjectDef` context. @@ -460,9 +461,9 @@ struct init { * \brief Helper to register Object's reflection metadata. * \tparam Class The class type. * - * \code - * namespace refl = tvm::ffi::reflection; - * refl::ObjectDef().def_ro("my_field", &MyClass::my_field); + * \code{.cpp} + * namespace refl = tvm::ffi::reflection; + * refl::ObjectDef().def_ro("my_field", &MyClass::my_field); * \endcode */ template @@ -570,9 +571,10 @@ class ObjectDef : public ReflectionDefBase { * \return Reference to this `ObjectDef` for method chaining. * * Example: - * \code - * refl::ObjectDef() - * .def(refl::init(), "Constructor docstring"); + * + * \code{.cpp} + * refl::ObjectDef() + * .def(refl::init(), "Constructor docstring"); * \endcode */ template @@ -662,11 +664,10 @@ class ObjectDef : public ReflectionDefBase { * \tparam Class The class type. * \tparam ExtraArgs The extra arguments. * - * \code - * namespace refl = tvm::ffi::reflection; - * refl::TypeAttrDef().def("func_attr", MyFunc); + * \code{.cpp} + * namespace refl = tvm::ffi::reflection; + * refl::TypeAttrDef().def("func_attr", MyFunc); * \endcode - * */ template >> class TypeAttrDef : public ReflectionDefBase { diff --git a/include/tvm/ffi/rvalue_ref.h b/include/tvm/ffi/rvalue_ref.h index aca5840f..c34a12e2 100644 --- a/include/tvm/ffi/rvalue_ref.h +++ b/include/tvm/ffi/rvalue_ref.h @@ -50,8 +50,7 @@ namespace ffi { * This design allows us to still leverage move semantics for parameters that * need copy on write scenarios (and requires an unique copy). * - * \code - * + * \code{.cpp} * void Example() { * auto append = Function::FromTyped([](RValueRef> ref, int val) -> Array { * Array arr = *std::move(ref); @@ -65,7 +64,6 @@ namespace ffi { * a = append(RvalueRef(std::move(a)), 3); * assert(a.size() == 3); * } - * * \endcode */ template >> diff --git a/python/tvm_ffi/cython/tensor.pxi b/python/tvm_ffi/cython/tensor.pxi index 1f4973d9..8b78c809 100644 --- a/python/tvm_ffi/cython/tensor.pxi +++ b/python/tvm_ffi/cython/tensor.pxi @@ -195,10 +195,10 @@ def from_dlpack( Parameters ---------- - ext_tensor : object - An object supporting `__dlpack__ `_ - and `__dlpack_device__ `_. - require_alignment : int, optional + ext_tensor + An object supporting :py:meth:`__dlpack__ ` + and :py:meth:`__dlpack_device__ `. + require_alignment If greater than zero, require the underlying data pointer to be aligned to this many bytes. Misaligned inputs raise :class:`ValueError`. @@ -314,7 +314,7 @@ cdef class Tensor(Object): dltensor, _c_str_dltensor_versioned, _c_dlpack_versioned_deleter) def __dlpack_device__(self) -> tuple[int, int]: - """Implement the standard `__dlpack_device__ `_ protocol.""" # noqa: E501 + """Implement the standard :py:meth:`__dlpack_device__ ` protocol.""" cdef int device_type = self.cdltensor.device.device_type cdef int device_id = self.cdltensor.device.device_id return (device_type, device_id) @@ -327,7 +327,7 @@ cdef class Tensor(Object): dl_device: tuple[int, int] | None = None, copy: bool | None = None, ) -> object: - """Implement the standard `__dlpack__ `_ protocol. + """Implement the standard :py:meth:`__dlpack__ ` protocol. Parameters ---------- diff --git a/python/tvm_ffi/stub/cli.py b/python/tvm_ffi/stub/cli.py index 9b9786c6..f2d34a9f 100644 --- a/python/tvm_ffi/stub/cli.py +++ b/python/tvm_ffi/stub/cli.py @@ -141,14 +141,15 @@ def _find_or_insert_file(path: Path) -> FileInfo: } | C.BUILTIN_TYPE_KEYS # Step 0. Generate missing `_ffi_api.py` and `__init__.py` under each prefix. + prefix_filter = init_cfg.prefix.strip() + if prefix_filter and not prefix_filter.endswith("."): + prefix_filter += "." + root_prefix = prefix_filter.rstrip(".") prefixes: dict[str, list[str]] = collect_type_keys() for prefix in global_funcs: prefixes.setdefault(prefix, []) - - root_ffi_api_py = init_path / init_cfg.prefix.rstrip(".") / "_ffi_api.py" for prefix, obj_names in prefixes.items(): - # TODO(@junrushao): control the prefix to generate stubs for - if prefix.startswith("testing") or prefix.startswith("ffi"): + if not (prefix == root_prefix or prefix.startswith(prefix_filter)): continue funcs = sorted( [] if prefix in defined_func_prefixes else global_funcs.get(prefix, []), @@ -172,7 +173,7 @@ def _find_or_insert_file(path: Path) -> FileInfo: prefix, object_infos, init_cfg, - is_root=root_ffi_api_py.samefile(target_path), + is_root=prefix == root_prefix, ) ) target_file.reload() @@ -448,7 +449,7 @@ def _split_list_arg(arg: str | None) -> list[str]: default="", help=( "Python package name to generate stubs for (e.g. apache-tvm-ffi). " - "Required together with --init-lib, --init-path, and --init-prefix." + "Required together with --init-lib and --init-prefix." ), ) parser.add_argument( diff --git a/tests/python/test_stubgen.py b/tests/python/test_stubgen.py index c0d3dd5d..0c2eb0cf 100644 --- a/tests/python/test_stubgen.py +++ b/tests/python/test_stubgen.py @@ -19,9 +19,10 @@ from pathlib import Path import pytest +import tvm_ffi.stub.cli as stub_cli from tvm_ffi.core import TypeSchema from tvm_ffi.stub import consts as C -from tvm_ffi.stub.cli import _stage_3 +from tvm_ffi.stub.cli import _stage_2, _stage_3 from tvm_ffi.stub.codegen import ( generate_all, generate_export, @@ -604,3 +605,50 @@ def test_generate_ffi_api_with_objects_imports_parents() -> None: f"{C.STUB_IMPORT_OBJECT} {parent_key};False;_{parent_key.replace('.', '_')}" ) assert parent_import_prompt in code + + +def test_stage_2_filters_prefix_and_marks_root( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + prefixes: dict[str, list[FuncInfo]] = {"demo.sub": [], "demo": [], "other": []} + monkeypatch.setattr(stub_cli, "collect_type_keys", lambda: prefixes) + monkeypatch.setattr(stub_cli, "toposort_objects", lambda objs: []) + + global_funcs = { + "demo.sub": [ + FuncInfo.from_schema( + "demo.sub.add_one", + TypeSchema("Callable", (TypeSchema("int"), TypeSchema("int"))), + ) + ], + "demo": [ + FuncInfo.from_schema( + "demo.add_one", + TypeSchema("Callable", (TypeSchema("int"), TypeSchema("int"))), + ) + ], + "other": [ + FuncInfo.from_schema( + "other.add_one", + TypeSchema("Callable", (TypeSchema("int"), TypeSchema("int"))), + ) + ], + } + _stage_2( + files=[], + ty_map=_default_ty_map(), + init_cfg=InitConfig(pkg="demo-pkg", shared_target="demo_shared", prefix="demo."), + init_path=tmp_path, + global_funcs=global_funcs, + ) + + root_api = tmp_path / "demo" / "_ffi_api.py" + sub_api = tmp_path / "demo" / "sub" / "_ffi_api.py" + other_api = tmp_path / "other" / "_ffi_api.py" + assert root_api.exists() + assert sub_api.exists() + assert not other_api.exists() + root_text = root_api.read_text(encoding="utf-8") + sub_text = sub_api.read_text(encoding="utf-8") + assert 'LIB = _FFI_LOAD_LIB("demo-pkg", "demo_shared")' in root_text + assert "LIB =" not in sub_text