update

Kathryn-cat · Kathryn-cat · commit a0121dae403e · 2025-10-10T05:15:34.000-04:00
diff --git a/python/tvm_ffi/_optional_torch_c_dlpack.py b/python/tvm_ffi/_optional_torch_c_dlpack.py
@@ -467,15 +467,10 @@ def load_torch_c_dlpack_extension() -> Any:
 } // namespace
 } // namespace at
 
-int TorchDLPackFromPyObject(void* py_obj, DLManagedTensorVersioned** out, void** env_stream) {
+int TorchDLPackFromPyObject(void* py_obj, DLManagedTensorVersioned** out) {
   try {
     py::handle handle(static_cast<PyObject*>(py_obj));
     at::Tensor tensor = handle.cast<at::Tensor>();
-#ifdef BUILD_WITH_CUDA
-    if (env_stream != nullptr && tensor.is_cuda()) {
-      *env_stream = at::cuda::getCurrentCUDAStream(tensor.device().index()).stream();
-    }
-#endif
     *out = at::toDLPackImpl<DLManagedTensorVersioned>(tensor);
     return 0;
   } catch (const std::exception& e) {
@@ -513,16 +508,66 @@ def load_torch_c_dlpack_extension() -> Any:
   }
 }
 
-int64_t TorchDLPackFromPyObjectPtr() {
-  return reinterpret_cast<int64_t>(TorchDLPackFromPyObject);
+int TorchDLTensorFromPyObject(void* py_obj, DLTensor* out) {
+  try {
+    // Use handle (non-owning) to avoid unnecessary refcount operations
+    py::handle handle(static_cast<PyObject*>(py_obj));
+    const at::Tensor& tensor = handle.cast<const at::Tensor&>();
+
+    // Fill in the pre-allocated DLTensor struct with direct pointers
+    // This is a non-owning conversion - the original PyObject owns the data
+    // and is kept alive by the caller for the duration of this call
+    out->data = tensor.data_ptr();
+    out->device = torchDeviceToDLDeviceForDLPackv1(tensor.device());
+    out->ndim = static_cast<int32_t>(tensor.dim());
+    out->dtype = getDLDataTypeForDLPackv1(tensor);
+    // sizes() and strides() return pointers to TensorImpl's stable storage
+    // which remains valid as long as the original PyObject is alive
+    out->shape = const_cast<int64_t*>(tensor.sizes().data());
+    out->strides = const_cast<int64_t*>(tensor.strides().data());
+    out->byte_offset = 0;
+
+    return 0;
+  } catch (const std::exception& e) {
+    PyErr_SetString(PyExc_RuntimeError, e.what());
+    return -1;
+  }
 }
 
-int64_t TorchDLPackToPyObjectPtr() {
-  return reinterpret_cast<int64_t>(TorchDLPackToPyObject);
+int TorchCurrentWorkStream(DLDeviceType device_type, int32_t device_id, void** out_stream) {
+  try {
+#ifdef BUILD_WITH_CUDA
+    if (device_type != kDLCPU) {
+      *out_stream = at::cuda::getCurrentCUDAStream(device_id).stream();
+    }
+#endif
+    return 0;
+  } catch (const std::exception& e) {
+    PyErr_SetString(PyExc_RuntimeError, e.what());
+    return -1;
+  }
 }
 
-int64_t TorchDLPackTensorAllocatorPtr() {
-  return reinterpret_cast<int64_t>(TorchDLPackTensorAllocator);
+struct TorchDLPackExchangeAPI : public DLPackExchangeAPI {
+  TorchDLPackExchangeAPI() {
+    version.major = DLPACK_MAJOR_VERSION;
+    version.minor = DLPACK_MINOR_VERSION;
+    prev_version_api = nullptr;
+    managed_tensor_allocator = TorchDLPackTensorAllocator;
+    managed_tensor_from_py_object_no_sync = TorchDLPackFromPyObject;
+    managed_tensor_to_py_object_no_sync = TorchDLPackToPyObject;
+    dltensor_from_py_object_no_sync = TorchDLTensorFromPyObject;
+    current_work_stream = TorchCurrentWorkStream;
+  }
+
+  static const DLPackExchangeAPI* Global() {
+    static TorchDLPackExchangeAPI inst;
+    return &inst;
+  }
+};
+
+int64_t TorchDLPackExchangeAPIPtr() {
+  return reinterpret_cast<int64_t>(TorchDLPackExchangeAPI::Global());
 }
     """
     try:
@@ -541,17 +586,13 @@ def load_torch_c_dlpack_extension() -> Any:
             name="c_dlpack",
             cpp_sources=cpp_source,
             functions=[
-                "TorchDLPackFromPyObjectPtr",
-                "TorchDLPackToPyObjectPtr",
-                "TorchDLPackTensorAllocatorPtr",
+                "TorchDLPackExchangeAPIPtr",
             ],
             extra_cflags=extra_cflags,
             extra_include_paths=include_paths,
         )
-        # set the dlpack related flags
-        setattr(torch.Tensor, "__c_dlpack_from_pyobject__", mod.TorchDLPackFromPyObjectPtr())
-        setattr(torch.Tensor, "__c_dlpack_to_pyobject__", mod.TorchDLPackToPyObjectPtr())
-        setattr(torch.Tensor, "__c_dlpack_tensor_allocator__", mod.TorchDLPackTensorAllocatorPtr())
+        # Set the DLPackExchangeAPI pointer on the class
+        setattr(torch.Tensor, "__c_dlpack_exchange_api__", mod.TorchDLPackExchangeAPIPtr())
         return mod
     except ImportError:
         pass