Skip to content

Commit 7ac3498

Browse files
committed
Implement the latest C dlpack exchange API, refer to apache/tvm-ffi#96
1 parent 81582f0 commit 7ac3498

File tree

7 files changed

+147
-17
lines changed

7 files changed

+147
-17
lines changed

paddle/fluid/framework/dlpack_tensor.cc

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,22 @@ DLManagedTensorVersioned *ToDLPackVersioned(const phi::DenseTensor &src,
358358
return ToDLPackImpl<DLManagedTensorVersioned>(src, flags);
359359
}
360360

361+
void ToDLPackNonOwningImpl(const phi::DenseTensor &tensor,
362+
::DLTensor &out) { // NOLINT
363+
// Fill in the pre-allocated DLTensor struct with direct pointers
364+
// This is a non-owning conversion - the caller owns the tensor
365+
// and must keep it alive for the duration of DLTensor usage
366+
out.data = const_cast<void *>(tensor.data());
367+
out.device = PlaceToDLDevice(tensor.place());
368+
out.ndim = static_cast<int32_t>(tensor.dims().size());
369+
out.dtype = PhiDataTypeToDLDataType(tensor.dtype());
370+
// sizes() and strides() return pointers to TensorImpl's stable storage
371+
// which remains valid as long as the tensor is alive
372+
out.shape = const_cast<int64_t *>(tensor.dims().Get());
373+
out.strides = const_cast<int64_t *>(tensor.strides().Get());
374+
out.byte_offset = 0;
375+
}
376+
361377
template <typename T>
362378
phi::DenseTensor FromDLPackImpl(T *src, Deleter deleter) {
363379
std::vector<int64_t> shape_vec;

paddle/fluid/framework/dlpack_tensor.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,12 @@ phi::DataType DLDataTypeToPhiDataType(::DLDataType type);
3434
phi::Place DLDeviceToPlace(const ::DLDevice& device);
3535
::DLDevice PlaceToDLDevice(const phi::Place& place);
3636

37-
TEST_API DLManagedTensor* ToDLPack(const phi::DenseTensor& src,
38-
uint64_t flags = 0);
37+
TEST_API ::DLManagedTensor* ToDLPack(const phi::DenseTensor& src,
38+
uint64_t flags = 0);
3939
::DLManagedTensorVersioned* ToDLPackVersioned(const phi::DenseTensor& src,
4040
uint64_t flags = 0);
41+
void ToDLPackNonOwningImpl(const phi::DenseTensor& tensor,
42+
::DLTensor& out); // NOLINT
4143
TEST_API phi::DenseTensor FromDLPack(::DLManagedTensor* src);
4244
phi::DenseTensor FromDLPackVersioned(::DLManagedTensorVersioned* src);
4345

paddle/fluid/pybind/pybind.cc

Lines changed: 119 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -763,9 +763,9 @@ class PyLayerBlockContextManager {
763763
PyLayerBlockContextManager() = default;
764764
};
765765

766-
int DLPackFromPyObject(void *py_obj,
767-
DLManagedTensorVersioned **out,
768-
void **env_stream) {
766+
int DLPackFromPyObjectLegacy(void *py_obj,
767+
DLManagedTensorVersioned **out,
768+
void **env_stream) {
769769
try {
770770
py::handle handle(static_cast<PyObject *>(py_obj));
771771
paddle::Tensor tensor = handle.cast<paddle::Tensor>();
@@ -786,7 +786,7 @@ int DLPackFromPyObject(void *py_obj,
786786
}
787787
}
788788

789-
int DLPackToPyObject(DLManagedTensorVersioned *src, void **py_obj_out) {
789+
int DLPackToPyObjectLegacy(DLManagedTensorVersioned *src, void **py_obj_out) {
790790
try {
791791
phi::DenseTensor dense_tensor = paddle::framework::FromDLPackVersioned(src);
792792
paddle::Tensor tensor(std::make_shared<phi::DenseTensor>(dense_tensor));
@@ -799,12 +799,12 @@ int DLPackToPyObject(DLManagedTensorVersioned *src, void **py_obj_out) {
799799
}
800800
}
801801

802-
int DLPackTensorAllocator(::DLTensor *prototype,
803-
::DLManagedTensorVersioned **out,
804-
void *error_ctx,
805-
void (*SetError)(void *error_ctx,
806-
const char *kind,
807-
const char *message)) {
802+
int DLPackTensorAllocatorLegacy(::DLTensor *prototype,
803+
::DLManagedTensorVersioned **out,
804+
void *error_ctx,
805+
void (*SetError)(void *error_ctx,
806+
const char *kind,
807+
const char *message)) {
808808
try {
809809
phi::IntArray shape(prototype->shape, prototype->ndim);
810810
phi::Place place(paddle::framework::DLDeviceToPlace(prototype->device));
@@ -821,6 +821,108 @@ int DLPackTensorAllocator(::DLTensor *prototype,
821821
}
822822
}
823823

824+
int DLPackDLTensorFromPyObjectNoSync(void *py_obj, DLTensor *out) {
825+
try {
826+
// Use handle (non-owning) to avoid unnecessary refcount operations
827+
py::handle handle(static_cast<PyObject *>(py_obj));
828+
paddle::Tensor tensor = handle.cast<paddle::Tensor>();
829+
std::shared_ptr<phi::DenseTensor> dense_tensor =
830+
std::static_pointer_cast<phi::DenseTensor>(tensor.impl());
831+
paddle::framework::ToDLPackNonOwningImpl(*dense_tensor, *out);
832+
return 0;
833+
} catch (const std::exception &e) {
834+
PyErr_SetString(PyExc_RuntimeError, e.what());
835+
return -1;
836+
}
837+
}
838+
839+
int DLPackManagedTensorFromPyObjectNoSync(void *py_obj,
840+
DLManagedTensorVersioned **out) {
841+
try {
842+
py::handle handle(static_cast<PyObject *>(py_obj));
843+
paddle::Tensor tensor = handle.cast<paddle::Tensor>();
844+
std::shared_ptr<phi::DenseTensor> dense_tensor =
845+
std::static_pointer_cast<phi::DenseTensor>(tensor.impl());
846+
*out = paddle::framework::ToDLPackVersioned(*dense_tensor);
847+
return 0;
848+
} catch (const std::exception &e) {
849+
PyErr_SetString(PyExc_RuntimeError, e.what());
850+
return -1;
851+
}
852+
}
853+
854+
int DLPackManagedTensorToPyObjectNoSync(DLManagedTensorVersioned *src,
855+
void **py_obj_out) {
856+
try {
857+
phi::DenseTensor dense_tensor = paddle::framework::FromDLPackVersioned(src);
858+
paddle::Tensor tensor(std::make_shared<phi::DenseTensor>(dense_tensor));
859+
egr::EagerUtils::autograd_meta(&tensor)->SetPersistable(false);
860+
*py_obj_out = ToPyObject(tensor);
861+
return 0;
862+
} catch (const std::exception &e) {
863+
PyErr_SetString(PyExc_RuntimeError, e.what());
864+
return -1;
865+
}
866+
}
867+
868+
int DLPackManagedTensorAllocator(::DLTensor *prototype,
869+
::DLManagedTensorVersioned **out,
870+
void *error_ctx,
871+
void (*SetError)(void *error_ctx,
872+
const char *kind,
873+
const char *message)) {
874+
try {
875+
phi::IntArray shape(prototype->shape, prototype->ndim);
876+
phi::Place place(paddle::framework::DLDeviceToPlace(prototype->device));
877+
phi::DataType dtype =
878+
paddle::framework::DLDataTypeToPhiDataType(prototype->dtype);
879+
paddle::Tensor tensor = paddle::empty(shape, dtype, place);
880+
std::shared_ptr<phi::DenseTensor> dense_tensor =
881+
std::static_pointer_cast<phi::DenseTensor>(tensor.impl());
882+
*out = paddle::framework::ToDLPackVersioned(*dense_tensor);
883+
return 0;
884+
} catch (const std::exception &e) {
885+
SetError(error_ctx, "DLPackManagedTensorAllocator", e.what());
886+
return -1;
887+
}
888+
}
889+
890+
int DLPackCurrentWorkStream(DLDeviceType device_type,
891+
int32_t device_id,
892+
void **out_stream) {
893+
try {
894+
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || \
895+
defined(PADDLE_WITH_CUSTOM_DEVICE)
896+
if (device_type == kDLCUDA || device_type == kDLROCM) {
897+
*out_stream = platform::get_current_stream(device_id)->raw_stream();
898+
}
899+
#endif
900+
return 0;
901+
} catch (const std::exception &e) {
902+
PyErr_SetString(PyExc_RuntimeError, e.what());
903+
return -1;
904+
}
905+
}
906+
907+
struct PaddleDLPackExchangeAPI : public ::DLPackExchangeAPI {
908+
PaddleDLPackExchangeAPI() {
909+
header.version.major = DLPACK_MAJOR_VERSION;
910+
header.version.minor = DLPACK_MINOR_VERSION;
911+
header.prev_api = nullptr;
912+
managed_tensor_allocator = DLPackManagedTensorAllocator;
913+
managed_tensor_from_py_object_no_sync =
914+
DLPackManagedTensorFromPyObjectNoSync;
915+
managed_tensor_to_py_object_no_sync = DLPackManagedTensorToPyObjectNoSync;
916+
dltensor_from_py_object_no_sync = DLPackDLTensorFromPyObjectNoSync;
917+
current_work_stream = DLPackCurrentWorkStream;
918+
}
919+
920+
static const DLPackExchangeAPI *Instance() {
921+
static PaddleDLPackExchangeAPI inst;
922+
return &inst;
923+
}
924+
};
925+
824926
// NOTE: use to load file by Mmap
825927
enum MMapLoadModes {
826928
ALLOCATOR_MAPPED_SHARED = 1,
@@ -1832,15 +1934,19 @@ PYBIND11_MODULE(libpaddle, m) {
18321934
});
18331935

18341936
m.def("dlpack_from_pyobject_ptr", []() -> int64_t {
1835-
return reinterpret_cast<int64_t>(DLPackFromPyObject);
1937+
return reinterpret_cast<int64_t>(DLPackFromPyObjectLegacy);
18361938
});
18371939

18381940
m.def("dlpack_to_pyobject_ptr", []() -> int64_t {
1839-
return reinterpret_cast<int64_t>(DLPackToPyObject);
1941+
return reinterpret_cast<int64_t>(DLPackToPyObjectLegacy);
18401942
});
18411943

18421944
m.def("dlpack_tensor_allocator_ptr", []() -> int64_t {
1843-
return reinterpret_cast<int64_t>(DLPackTensorAllocator);
1945+
return reinterpret_cast<int64_t>(DLPackTensorAllocatorLegacy);
1946+
});
1947+
1948+
m.def("dlpack_exchange_api_ptr", []() -> int64_t {
1949+
return reinterpret_cast<int64_t>(PaddleDLPackExchangeAPI::Instance());
18441950
});
18451951

18461952
m.def("from_dlpack", [](py::object data) {

python/paddle/base/dygraph/tensor_patch_methods.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1589,6 +1589,7 @@ def __tvm_ffi_env_stream__(self) -> int:
15891589
("__c_dlpack_from_pyobject__", core.dlpack_from_pyobject_ptr()),
15901590
("__c_dlpack_to_pyobject__", core.dlpack_to_pyobject_ptr()),
15911591
("__c_dlpack_tensor_allocator__", core.dlpack_tensor_allocator_ptr()),
1592+
("__c_dlpack_exchange_api__", core.dlpack_exchange_api_ptr()),
15921593
):
15931594
setattr(core.eager.Tensor, method_name, method)
15941595

python/paddle/utils/dlpack.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ class DLDeviceType(enum.IntEnum):
7575
kDLWebGPU = (15,)
7676
kDLHexagon = (16,)
7777
kDLMAIA = (17,)
78+
kDLTrn = (18,)
7879

7980

8081
def to_dlpack(x: Tensor) -> CapsuleType:
@@ -215,7 +216,7 @@ def from_dlpack(
215216

216217
if hasattr(dlpack, "__dlpack__"):
217218
kwargs = {}
218-
kwargs["max_version"] = (1, 1)
219+
kwargs["max_version"] = (1, 2)
219220
if copy is not None:
220221
kwargs["copy"] = copy
221222

test/dygraph_to_static/test_tensor_attr_consistency.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,10 @@
8181
'__dlpack__',
8282
"__dlpack_device__",
8383
"__tvm_ffi_env_stream__",
84+
"__c_dlpack_from_pyobject__",
85+
"__c_dlpack_to_pyobject__",
86+
"__c_dlpack_tensor_allocator__",
87+
"__c_dlpack_exchange_api__",
8488
]
8589
)
8690
STATIC_ONLY_TENSOR_ATTRS_ALLOW_LIST = OrderedSet(

0 commit comments

Comments
 (0)