@@ -763,9 +763,9 @@ class PyLayerBlockContextManager {
763763 PyLayerBlockContextManager () = default ;
764764};
765765
766- int DLPackFromPyObject (void *py_obj,
767- DLManagedTensorVersioned **out,
768- void **env_stream) {
766+ int DLPackFromPyObjectLegacy (void *py_obj,
767+ DLManagedTensorVersioned **out,
768+ void **env_stream) {
769769 try {
770770 py::handle handle (static_cast <PyObject *>(py_obj));
771771 paddle::Tensor tensor = handle.cast <paddle::Tensor>();
@@ -786,7 +786,7 @@ int DLPackFromPyObject(void *py_obj,
786786 }
787787}
788788
789- int DLPackToPyObject (DLManagedTensorVersioned *src, void **py_obj_out) {
789+ int DLPackToPyObjectLegacy (DLManagedTensorVersioned *src, void **py_obj_out) {
790790 try {
791791 phi::DenseTensor dense_tensor = paddle::framework::FromDLPackVersioned (src);
792792 paddle::Tensor tensor (std::make_shared<phi::DenseTensor>(dense_tensor));
@@ -799,12 +799,12 @@ int DLPackToPyObject(DLManagedTensorVersioned *src, void **py_obj_out) {
799799 }
800800}
801801
802- int DLPackTensorAllocator (::DLTensor *prototype,
803- ::DLManagedTensorVersioned **out,
804- void *error_ctx,
805- void (*SetError)(void *error_ctx,
806- const char *kind,
807- const char *message)) {
802+ int DLPackTensorAllocatorLegacy (::DLTensor *prototype,
803+ ::DLManagedTensorVersioned **out,
804+ void *error_ctx,
805+ void (*SetError)(void *error_ctx,
806+ const char *kind,
807+ const char *message)) {
808808 try {
809809 phi::IntArray shape (prototype->shape , prototype->ndim );
810810 phi::Place place (paddle::framework::DLDeviceToPlace (prototype->device ));
@@ -821,6 +821,108 @@ int DLPackTensorAllocator(::DLTensor *prototype,
821821 }
822822}
823823
824+ int DLPackDLTensorFromPyObjectNoSync (void *py_obj, DLTensor *out) {
825+ try {
826+ // Use handle (non-owning) to avoid unnecessary refcount operations
827+ py::handle handle (static_cast <PyObject *>(py_obj));
828+ paddle::Tensor tensor = handle.cast <paddle::Tensor>();
829+ std::shared_ptr<phi::DenseTensor> dense_tensor =
830+ std::static_pointer_cast<phi::DenseTensor>(tensor.impl ());
831+ paddle::framework::ToDLPackNonOwningImpl (*dense_tensor, *out);
832+ return 0 ;
833+ } catch (const std::exception &e) {
834+ PyErr_SetString (PyExc_RuntimeError, e.what ());
835+ return -1 ;
836+ }
837+ }
838+
839+ int DLPackManagedTensorFromPyObjectNoSync (void *py_obj,
840+ DLManagedTensorVersioned **out) {
841+ try {
842+ py::handle handle (static_cast <PyObject *>(py_obj));
843+ paddle::Tensor tensor = handle.cast <paddle::Tensor>();
844+ std::shared_ptr<phi::DenseTensor> dense_tensor =
845+ std::static_pointer_cast<phi::DenseTensor>(tensor.impl ());
846+ *out = paddle::framework::ToDLPackVersioned (*dense_tensor);
847+ return 0 ;
848+ } catch (const std::exception &e) {
849+ PyErr_SetString (PyExc_RuntimeError, e.what ());
850+ return -1 ;
851+ }
852+ }
853+
854+ int DLPackManagedTensorToPyObjectNoSync (DLManagedTensorVersioned *src,
855+ void **py_obj_out) {
856+ try {
857+ phi::DenseTensor dense_tensor = paddle::framework::FromDLPackVersioned (src);
858+ paddle::Tensor tensor (std::make_shared<phi::DenseTensor>(dense_tensor));
859+ egr::EagerUtils::autograd_meta (&tensor)->SetPersistable (false );
860+ *py_obj_out = ToPyObject (tensor);
861+ return 0 ;
862+ } catch (const std::exception &e) {
863+ PyErr_SetString (PyExc_RuntimeError, e.what ());
864+ return -1 ;
865+ }
866+ }
867+
868+ int DLPackManagedTensorAllocator (::DLTensor *prototype,
869+ ::DLManagedTensorVersioned **out,
870+ void *error_ctx,
871+ void (*SetError)(void *error_ctx,
872+ const char *kind,
873+ const char *message)) {
874+ try {
875+ phi::IntArray shape (prototype->shape , prototype->ndim );
876+ phi::Place place (paddle::framework::DLDeviceToPlace (prototype->device ));
877+ phi::DataType dtype =
878+ paddle::framework::DLDataTypeToPhiDataType (prototype->dtype );
879+ paddle::Tensor tensor = paddle::empty (shape, dtype, place);
880+ std::shared_ptr<phi::DenseTensor> dense_tensor =
881+ std::static_pointer_cast<phi::DenseTensor>(tensor.impl ());
882+ *out = paddle::framework::ToDLPackVersioned (*dense_tensor);
883+ return 0 ;
884+ } catch (const std::exception &e) {
885+ SetError (error_ctx, " DLPackManagedTensorAllocator" , e.what ());
886+ return -1 ;
887+ }
888+ }
889+
890+ int DLPackCurrentWorkStream (DLDeviceType device_type,
891+ int32_t device_id,
892+ void **out_stream) {
893+ try {
894+ #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || \
895+ defined (PADDLE_WITH_CUSTOM_DEVICE)
896+ if (device_type == kDLCUDA || device_type == kDLROCM ) {
897+ *out_stream = platform::get_current_stream (device_id)->raw_stream ();
898+ }
899+ #endif
900+ return 0 ;
901+ } catch (const std::exception &e) {
902+ PyErr_SetString (PyExc_RuntimeError, e.what ());
903+ return -1 ;
904+ }
905+ }
906+
907+ struct PaddleDLPackExchangeAPI : public ::DLPackExchangeAPI {
908+ PaddleDLPackExchangeAPI () {
909+ header.version .major = DLPACK_MAJOR_VERSION;
910+ header.version .minor = DLPACK_MINOR_VERSION;
911+ header.prev_api = nullptr ;
912+ managed_tensor_allocator = DLPackManagedTensorAllocator;
913+ managed_tensor_from_py_object_no_sync =
914+ DLPackManagedTensorFromPyObjectNoSync;
915+ managed_tensor_to_py_object_no_sync = DLPackManagedTensorToPyObjectNoSync;
916+ dltensor_from_py_object_no_sync = DLPackDLTensorFromPyObjectNoSync;
917+ current_work_stream = DLPackCurrentWorkStream;
918+ }
919+
920+ static const DLPackExchangeAPI *Instance () {
921+ static PaddleDLPackExchangeAPI inst;
922+ return &inst;
923+ }
924+ };
925+
824926// NOTE: use to load file by Mmap
825927enum MMapLoadModes {
826928 ALLOCATOR_MAPPED_SHARED = 1 ,
@@ -1832,15 +1934,19 @@ PYBIND11_MODULE(libpaddle, m) {
18321934 });
18331935
18341936 m.def (" dlpack_from_pyobject_ptr" , []() -> int64_t {
1835- return reinterpret_cast <int64_t >(DLPackFromPyObject );
1937+ return reinterpret_cast <int64_t >(DLPackFromPyObjectLegacy );
18361938 });
18371939
18381940 m.def (" dlpack_to_pyobject_ptr" , []() -> int64_t {
1839- return reinterpret_cast <int64_t >(DLPackToPyObject );
1941+ return reinterpret_cast <int64_t >(DLPackToPyObjectLegacy );
18401942 });
18411943
18421944 m.def (" dlpack_tensor_allocator_ptr" , []() -> int64_t {
1843- return reinterpret_cast <int64_t >(DLPackTensorAllocator);
1945+ return reinterpret_cast <int64_t >(DLPackTensorAllocatorLegacy);
1946+ });
1947+
1948+ m.def (" dlpack_exchange_api_ptr" , []() -> int64_t {
1949+ return reinterpret_cast <int64_t >(PaddleDLPackExchangeAPI::Instance ());
18441950 });
18451951
18461952 m.def (" from_dlpack" , [](py::object data) {
0 commit comments