Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/Utils/AddGoogleTest.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ macro(tvm_ffi_add_googletest target_name)
target_link_libraries(${target_name} PRIVATE gtest_main)
gtest_discover_tests(${target_name}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
TEST_DISCOVERY_TIMEOUT 300
TEST_DISCOVERY_TIMEOUT 600
DISCOVERY_MODE PRE_TEST
PROPERTIES
VS_DEBUGGER_WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}"
Expand Down
72 changes: 69 additions & 3 deletions docs/get_started/quick_start.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ tensor and expose that function as TVM FFI compatible function. The key file str
examples/quick_start/
├── src/
│ ├── add_one_cpu.cc # CPU implementation
│ ├── add_one_c.c # A low-level C based implementation
│ ├── add_one_cuda.cu # CUDA implementation
│ └── run_example.cc # C++ usage example
├── run_example.py # Python usage example
Expand Down Expand Up @@ -201,16 +202,81 @@ shows how to run the example exported function in C++.
#include <tvm/ffi/container/tensor.h>
#include <tvm/ffi/extra/module.h>

void CallAddOne(DLTensor* x, DLTensor *y) {
namespace ffi = tvm::ffi;
namespace ffi = tvm::ffi;

void CallAddOne(ffi::Tensor x, ffi::Tensor y) {
ffi::Module mod = ffi::Module::LoadFromFile("build/add_one_cpu.so");
ffi::Function add_one_cpu = mod->GetFunction("add_one_cpu").value();
add_one_cpu(x, y);
}
```

## Advanced: Minimal C ABI demonstration

For those who need to understand the low-level C ABI or are implementing
compiler codegen, we also provided an example that is C only as follows:

```c
#include <tvm/ffi/c_api.h>
#include <tvm/ffi/extra/c_env_api.h>

// Helper to extract DLTensor from TVMFFIAny
int ReadDLTensorPtr(const TVMFFIAny *value, DLTensor** out) {
if (value->type_index == kTVMFFIDLTensorPtr) {
*out = (DLTensor*)(value->v_ptr);
return 0;
}
if (value->type_index != kTVMFFITensor) {
TVMFFIErrorSetRaisedFromCStr("ValueError", "Expects a Tensor input");
return -1;
}
*out = (DLTensor*)((char*)(value->v_obj) + sizeof(TVMFFIObject));
return 0;
}

// Raw C FFI function
int __tvm_ffi_add_one_c(
void* handle, const TVMFFIAny* args, int32_t num_args, TVMFFIAny* result
) {
DLTensor *x, *y;

// Extract tensor arguments
if (ReadDLTensorPtr(&args[0], &x) == -1) return -1;
if (ReadDLTensorPtr(&args[1], &y) == -1) return -1;

// Get current stream for device synchronization (e.g., CUDA)
// not needed for CPU, just keep here for demonstration purpose
void* stream = TVMFFIEnvGetStream(x->device.device_type, x->device.device_id);

// Perform computation
for (int i = 0; i < x->shape[0]; ++i) {
((float*)(y->data))[i] = ((float*)(x->data))[i] + 1;
}
return 0; // Success
}
```
To compile this code, you need to add {py:func}`tvm_ffi.libinfo.find_include_paths` to your include
path and link the shared library that can be found through {py:func}`tvm_ffi.libinfo.find_libtvm_ffi`.
We also provide command line tools to link, so you can compile with the following command:

```bash
gcc -shared -fPIC `tvm-ffi-config --cflags` \
src/add_one_c.c -o build/add_one_c.so \
`tvm-ffi-config --ldflags` `tvm-ffi-config --libs`
```

The main takeaway points are:
- Function symbols follow name `int __tvm_ffi_<name>`
- The function follows signaure of `TVMFFISafeCallType`
- Use `TVMFFIAny` to handle dynamic argument types
- Return `0` for success, `-1` for error (set via `TVMFFIErrorSetRaisedFromCStr`)
- This function can be compiled using a c compiler and loaded in the same one as
other libraries in this example.

## Summary Key Concepts

- **TVM_FFI_DLL_EXPORT_TYPED_FUNC** exposes a c++ function into tvm-ffi C ABI
- **DLTensor** is a universal tensor structure that enables zero-copy exchange of array data
- **ffi::Tensor** is a universal tensor structure that enables zero-copy exchange of array data
- **Module loading** is provided by tvm ffi APIs in multiple languages.
- **C ABI** is provided for easy low-level integration

34 changes: 20 additions & 14 deletions docs/guides/compiler_integration.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,43 +35,49 @@ following options:
use {c:macro}`TVM_FFI_DLL_EXPORT_TYPED_FUNC` to expose the symbol.

The following code snippet shows C code that corresponds to a
function performing `add_one` under the ABI. It is reasonably straightforward for
function performing `add_one_c` under the ABI. It is reasonably straightforward for
low-level code generators to replicate this C logic.
You can run this code as part of the [quick start example](https://github.com/apache/tvm-ffi/tree/dev/examples/quick_start).

```c
#include <tvm/ffi/c_api.h>
#include <tvm/ffi/extra/c_env_api.h>

// Helper function to extract DLTensor from TVMFFIAny (can be inlined into generated code)
int ReadDLTensorPtr(const TVMFFIAny *value, DLTensor* out) {
int ReadDLTensorPtr(const TVMFFIAny *value, DLTensor** out) {
if (value->type_index == kTVMFFIDLTensorPtr) {
*out = static_cast<DLTensor*>(value->v_ptr);
*out = (DLTensor*)(value->v_ptr);
return 0;
}
if (value->type_index == kTVMFFITensor) {
if (value->type_index != kTVMFFITensor) {
// Use TVMFFIErrorSetRaisedFromCStr to set an error which will
// be propagated to the caller
TVMFFIErrorSetRaisedFromCStr("ValueError", "Expects a Tensor input");
return -1;
}
*out = reinterpret_cast<DLTensor*>(
reinterpret_cast<char*>(value->v_obj) + sizeof(TVMFFIObject));
*out = (DLTensor*)((char*)(value->v_obj) + sizeof(TVMFFIObject));
return 0;
}

// FFI function implementing add_one operation
int __tvm_ffi_add_one(
int __tvm_ffi_add_one_c(
void* handle, const TVMFFIAny* args, int32_t num_args, TVMFFIAny* result
) {
DLTensor *a, *b, *c;
DLTensor *x, *y;
// Extract tensor arguments
if (ReadDLTensorPtr(&args[0], &a) == -1) return -1;
if (ReadDLTensorPtr(&args[1], &b) == -1) return -1;
if (ReadDLTensorPtr(&args[2], &c) == -1) return -1;
// return -1 for error, error is set through TVMFFIErrorSetRaisedFromCStr
if (ReadDLTensorPtr(&args[0], &x) == -1) return -1;
if (ReadDLTensorPtr(&args[1], &y) == -1) return -1;

// Get current stream for device synchronization (e.g., CUDA)
void* stream = TVMFFIEnvGetStream(a->device.device_type, a->device.device_id);
// not needed for CPU, just keep here for demonstration purpose
void* stream = TVMFFIEnvGetStream(x->device.device_type, x->device.device_id);

// Generated computation code would follow here to perform the actual operation
// on tensors a, b, c and store result in c
// perform the actual operation
for (int i = 0; i < x->shape[0]; ++i) {
((float*)(y->data))[i] = ((float*)(x->data))[i] + 1;
}
// return 0 for success run
return 0;
}
```
Expand Down
7 changes: 7 additions & 0 deletions examples/quick_start/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,21 @@ find_package(tvm_ffi CONFIG REQUIRED)

# use the projects as usual
add_library(add_one_cpu SHARED src/add_one_cpu.cc)
add_library(add_one_c SHARED src/add_one_c.c)
target_link_libraries(add_one_cpu tvm_ffi_header)
target_link_libraries(add_one_cpu tvm_ffi_shared)
target_link_libraries(add_one_c tvm_ffi_shared)
# show as add_one_cpu.so
set_target_properties(
add_one_cpu PROPERTIES
PREFIX ""
SUFFIX ".so"
)
set_target_properties(
add_one_c PROPERTIES
PREFIX ""
SUFFIX ".so"
)

# Check if CUDA is available
if(NOT WIN32)
Expand Down
4 changes: 2 additions & 2 deletions examples/quick_start/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ You can also compile the modules directly using
flags provided by the `tvm-ffi-config` tool.

```bash
g++ -shared -fPIC `tvm-ffi-config --cxxflags` \
src/add_one_cpu.cc -o build/add_one_cpu.so \
gcc -shared -fPIC `tvm-ffi-config --cflags` \
src/add_one_c.c -o build/add_one_c.so \
`tvm-ffi-config --ldflags` `tvm-ffi-config --libs`
```
21 changes: 21 additions & 0 deletions examples/quick_start/run_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,26 @@ def run_add_one_cpu():
print(y)


def run_add_one_c():
"""Load the add_one_c module and call the add_one_c function."""
mod = tvm_ffi.load_module("build/add_one_c.so")

x = numpy.array([1, 2, 3, 4, 5], dtype=numpy.float32)
y = numpy.empty_like(x)
mod.add_one_c(x, y)
print("numpy.result after add_one_c(x, y)")
print(x)

if torch is None:
return

x = torch.tensor([1, 2, 3, 4, 5], dtype=torch.float32)
y = torch.empty_like(x)
mod.add_one_c(x, y)
print("torch.result after add_one_c(x, y)")
print(y)


def run_add_one_cuda():
"""Load the add_one_cuda module and call the add_one_cuda function."""
if torch is None or not torch.cuda.is_available():
Expand All @@ -76,6 +96,7 @@ def run_add_one_cuda():
def main():
"""Main function to run the example."""
run_add_one_cpu()
run_add_one_c()
run_add_one_cuda()


Expand Down
72 changes: 72 additions & 0 deletions examples/quick_start/src/add_one_c.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include <tvm/ffi/c_api.h>
#include <tvm/ffi/extra/c_env_api.h>

// This is a raw C variant of the add_one_cpu function
// it is used to demonstrate how low-level mechanism works
// to construct a tvm ffi compatible function
//
// This function can also serve as a reference for how to implement
// a compiler codegen to target tvm ffi
//
// if you are looking for a more high-level way to construct a tvm ffi compatible function,
// please refer to the add_one_cpu.cc instead
/*!
* \brief Helper code to read DLTensor from TVMFFIAny, can be inlined into generated code
* \param value The TVMFFIAny to read from
* \param out The DLTensor to read into
* \return 0 on success, -1 on error
*/
int ReadDLTensorPtr(const TVMFFIAny* value, DLTensor** out) {
if (value->type_index == kTVMFFIDLTensorPtr) {
*out = (DLTensor*)(value->v_ptr);
return 0;
}
if (value->type_index != kTVMFFITensor) {
// Use TVMFFIErrorSetRaisedFromCStr to set an error which will
// be propagated to the caller
TVMFFIErrorSetRaisedFromCStr("ValueError", "Expects a Tensor input");
return -1;
}
*out = (DLTensor*)((char*)(value->v_obj) + sizeof(TVMFFIObject));
return 0;
}

// FFI function implementing add_one operation
int __tvm_ffi_add_one_c( //
void* handle, const TVMFFIAny* args, int32_t num_args, TVMFFIAny* result //
) {
DLTensor *x, *y;
// Extract tensor arguments
// return -1 for error, error is set through TVMFFIErrorSetRaisedFromCStr
if (ReadDLTensorPtr(&args[0], &x) == -1) return -1;
if (ReadDLTensorPtr(&args[1], &y) == -1) return -1;

// Get current stream for device synchronization (e.g., CUDA)
// not needed for CPU, just keep here for demonstration purpose
void* stream = TVMFFIEnvGetStream(x->device.device_type, x->device.device_id);

// perform the actual operation
for (int i = 0; i < x->shape[0]; ++i) {
((float*)(y->data))[i] = ((float*)(x->data))[i] + 1;
}
// return 0 for success run
return 0;
}
35 changes: 18 additions & 17 deletions include/tvm/ffi/c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,21 +27,6 @@
#include <dlpack/dlpack.h>
#include <stdint.h>

/*
* \brief C-style Allocator that allocates memory for a DLPack tensor.
* \param prototype The prototype DLTensor to offer details about device and shape.
* \param out The output DLManagedTensorVersioned.
* \param error_ctx The context to set the error.
* \param SetError The function to set the error.
* \return 0 on success, -1 on failure.
* call SetError(error_ctx, kind, message) to set the error kind and message.
* \note Error propagation via SetError.
*/
typedef int (*DLPackTensorAllocator)( //
DLTensor* prototype, DLManagedTensorVersioned** out, void* error_ctx, //
void (*SetError)(void* error_ctx, const char* kind, const char* message) //
);

// Macros to do weak linking
#ifdef _MSC_VER
#define TVM_FFI_WEAK __declspec(selectany)
Expand Down Expand Up @@ -75,12 +60,29 @@ typedef int (*DLPackTensorAllocator)( //
extern "C" {
#endif

// TODO(tqchen): remove this once dlpack.h is updated
typedef struct DLManagedTensorVersioned DLManagedTensorVersioned;

/*
* \brief C-style Allocator that allocates memory for a DLPack tensor.
* \param prototype The prototype DLTensor to offer details about device and shape.
* \param out The output DLManagedTensorVersioned.
* \param error_ctx The context to set the error.
* \param SetError The function to set the error.
* \return 0 on success, -1 on failure.
* call SetError(error_ctx, kind, message) to set the error kind and message.
* \note Error propagation via SetError.
*/
typedef int (*DLPackTensorAllocator)( //
DLTensor* prototype, DLManagedTensorVersioned** out, void* error_ctx, //
void (*SetError)(void* error_ctx, const char* kind, const char* message) //
);

#ifdef __cplusplus
enum TVMFFITypeIndex : int32_t {
#else
typedef enum {
#endif

/*
* \brief The root type of all FFI objects.
*
Expand Down Expand Up @@ -279,7 +281,6 @@ typedef struct {
DLDataType v_dtype; // data type
DLDevice v_device; // device
char v_bytes[8]; // small string
char32_t v_char32[2]; // small UCS4 string and Unicode
uint64_t v_uint64; // uint64 repr mainly used for hashing
};
} TVMFFIAny;
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

[project]
name = "apache-tvm-ffi"
version = "0.1.0b1"
version = "0.1.0b2"
description = "tvm ffi"

authors = [{ name = "TVM FFI team" }]
Expand Down
Loading