diff --git a/crates/kornia-core/src/allocator.rs b/crates/kornia-core/src/allocator.rs index 823b8098..14234c59 100644 --- a/crates/kornia-core/src/allocator.rs +++ b/crates/kornia-core/src/allocator.rs @@ -1,4 +1,5 @@ -use std::alloc::{GlobalAlloc, Layout, System}; +use std::alloc; +use std::alloc::Layout; use thiserror::Error; @@ -55,7 +56,7 @@ impl TensorAllocator for CpuAllocator { /// /// A non-null pointer to the allocated memory if successful, otherwise an error. fn alloc(&self, layout: Layout) -> Result<*mut u8, TensorAllocatorError> { - let ptr = unsafe { System.alloc(layout) }; + let ptr = unsafe { alloc::alloc(layout) }; if ptr.is_null() { Err(TensorAllocatorError::NullPointer)? } @@ -74,7 +75,7 @@ impl TensorAllocator for CpuAllocator { /// The pointer must be non-null and the layout must be correct. #[allow(clippy::not_unsafe_ptr_arg_deref)] fn dealloc(&self, ptr: *mut u8, layout: Layout) { - unsafe { System.dealloc(ptr, layout) } + unsafe { alloc::dealloc(ptr, layout) } } } diff --git a/crates/kornia-core/src/serde.rs b/crates/kornia-core/src/serde.rs index f72f0069..db2b0625 100644 --- a/crates/kornia-core/src/serde.rs +++ b/crates/kornia-core/src/serde.rs @@ -7,9 +7,10 @@ use crate::{ use serde::ser::SerializeStruct; use serde::Deserialize; -impl serde::Serialize for Tensor +impl serde::Serialize for Tensor where T: serde::Serialize + SafeTensorType, + A: TensorAllocator + 'static, { fn serialize(&self, serializer: S) -> Result where @@ -23,7 +24,7 @@ where } } -impl<'de, T, const N: usize, A: TensorAllocator + Default> serde::Deserialize<'de> +impl<'de, T, const N: usize, A: TensorAllocator + Default + 'static> serde::Deserialize<'de> for Tensor where T: serde::Deserialize<'de> + SafeTensorType, diff --git a/crates/kornia-core/src/storage.rs b/crates/kornia-core/src/storage.rs index 0eaa127b..c9b6ec32 100644 --- a/crates/kornia-core/src/storage.rs +++ b/crates/kornia-core/src/storage.rs @@ -18,6 +18,30 @@ impl SafeTensorType for i64 {} impl SafeTensorType for f32 {} impl SafeTensorType for f64 {} +/// Represents the owner of custom Arrow Buffer memory allocations. +/// +/// This struct is used to facilitate the automatic deallocation of the memory it owns, +/// using the `Drop` trait. +pub struct TensorCustomAllocationOwner { + /// The allocator used to allocate the tensor storage. + alloc: Arc, + /// The layout used for the allocation. + layout: Layout, + /// The pointer to the allocated memory + ptr: NonNull, +} + +// SAFETY: TensorCustomAllocationOwner is never modifed from multiple threads. +impl std::panic::RefUnwindSafe for TensorCustomAllocationOwner {} +unsafe impl Sync for TensorCustomAllocationOwner {} +unsafe impl Send for TensorCustomAllocationOwner {} + +impl Drop for TensorCustomAllocationOwner { + fn drop(&mut self) { + self.alloc.dealloc(self.ptr.as_ptr(), self.layout); + } +} + /// Represents a contiguous memory region that can be shared with other buffers and across thread boundaries. /// /// This struct provides methods to create, access, and manage tensor storage using a custom allocator. @@ -32,12 +56,13 @@ where /// The buffer containing the tensor storage. data: ScalarBuffer, /// The allocator used to allocate the tensor storage. - alloc: A, + alloc: Arc, } -impl TensorStorage +impl TensorStorage where T: SafeTensorType + Clone, + A: TensorAllocator + 'static, { /// Creates a new tensor storage with the given length and allocator. /// @@ -51,17 +76,19 @@ where /// A new tensor storage if successful, otherwise an error. pub fn new(len: usize, alloc: A) -> Result { // allocate memory for tensor storage - let ptr = - alloc.alloc(Layout::array::(len).map_err(TensorAllocatorError::LayoutError)?)?; + let layout = Layout::array::(len).map_err(TensorAllocatorError::LayoutError)?; + let ptr = NonNull::new(alloc.alloc(layout)?).ok_or(TensorAllocatorError::NullPointer)?; + let alloc = Arc::new(alloc); + let owner = TensorCustomAllocationOwner { + alloc: alloc.clone(), + layout, + ptr, + }; // create the buffer let buffer = unsafe { // SAFETY: `ptr` is non-null and properly aligned, and `len` is the correct size. - Buffer::from_custom_allocation( - NonNull::new_unchecked(ptr), - len * std::mem::size_of::(), - Arc::new(Vec::::with_capacity(len)), - ) + Buffer::from_custom_allocation(ptr, len * std::mem::size_of::(), Arc::new(owner)) }; Ok(Self { @@ -99,7 +126,7 @@ where // create tensor storage Self { data: buffer.into(), - alloc, + alloc: Arc::new(alloc), } } @@ -223,13 +250,13 @@ where let buffer = Buffer::from_custom_allocation( NonNull::new_unchecked(ptr as *mut u8), len * std::mem::size_of::(), - Arc::new(Vec::::with_capacity(len)), + Arc::new(()), ); // create tensor storage Self { data: buffer.into(), - alloc: alloc.clone(), + alloc: Arc::new(alloc.clone()), } } } @@ -238,10 +265,10 @@ where impl Clone for TensorStorage where T: SafeTensorType + Clone, - A: TensorAllocator + Clone, + A: TensorAllocator + Clone + 'static, { fn clone(&self) -> Self { - let mut new_storage = Self::new(self.len(), self.alloc.clone()) + let mut new_storage = Self::new(self.len(), (*self.alloc).clone()) .expect("Failed to allocate memory for cloned TensorStorage"); new_storage.as_mut_slice().clone_from_slice(self.as_slice()); new_storage @@ -253,6 +280,8 @@ mod tests { use super::*; use crate::allocator::CpuAllocator; use std::alloc::Layout; + use std::cell::RefCell; + use std::rc::Rc; #[test] fn test_tensor_storage() -> Result<(), TensorAllocatorError> { @@ -365,4 +394,88 @@ mod tests { assert_eq!(result_vec.capacity(), original_vec_capacity); assert!(std::ptr::eq(result_vec.as_ptr(), original_vec_ptr)); } + + #[test] + fn test_tensor_storage_allocator() -> Result<(), TensorAllocatorError> { + // A test TensorAllocator that keeps a count of the bytes that are allocated but not yet + // deallocated via the allocator. + #[derive(Clone)] + struct TestAllocator { + bytes_allocated: Rc>, + } + impl TensorAllocator for TestAllocator { + fn alloc(&self, layout: Layout) -> Result<*mut u8, TensorAllocatorError> { + *self.bytes_allocated.borrow_mut() += layout.size() as i32; + CpuAllocator.alloc(layout) + } + fn dealloc(&self, ptr: *mut u8, layout: Layout) { + *self.bytes_allocated.borrow_mut() -= layout.size() as i32; + CpuAllocator.dealloc(ptr, layout) + } + } + + let allocator = TestAllocator { + bytes_allocated: Rc::new(RefCell::new(0)), + }; + let len = 1024; + + // TensorStorage::new() + // Deallocation should happen when `storage` goes out of scope. + { + let _storage = TensorStorage::::new(len, allocator.clone())?; + assert_eq!(*allocator.bytes_allocated.borrow(), len as i32); + } + assert_eq!(*allocator.bytes_allocated.borrow(), 0); + + // TensorStorage::new() -> TensorStorage::into_vec() + // TensorStorage::into_vec() consumes the storage and creates a copy (in this case). + // This should cause deallocation of the original memory. + { + let storage = TensorStorage::::new(len, allocator.clone())?; + assert_eq!(*allocator.bytes_allocated.borrow(), len as i32); + + let _vec = storage.into_vec(); + assert_eq!(*allocator.bytes_allocated.borrow(), 0); + } + assert_eq!(*allocator.bytes_allocated.borrow(), 0); + + // TensorStorage::from_vec() -> TensorStorage::into_vec() + // TensorStorage::from_vec() currently does not use the custom allocator, so the + // bytes_allocated value should not change. + { + let original_vec = Vec::::with_capacity(len); + let original_vec_ptr = original_vec.as_ptr(); + let original_vec_capacity = original_vec.capacity(); + + let storage = TensorStorage::::from_vec(original_vec, allocator.clone()); + assert_eq!(*allocator.bytes_allocated.borrow(), 0); + + let result_vec = storage.into_vec(); + assert_eq!(*allocator.bytes_allocated.borrow(), 0); + + assert_eq!(result_vec.capacity(), original_vec_capacity); + assert!(std::ptr::eq(result_vec.as_ptr(), original_vec_ptr)); + } + assert_eq!(*allocator.bytes_allocated.borrow(), 0); + + // TensorStorage::from_ptr() + // TensorStorage::from_ptr() does not take ownership of buffer. So the memory should not be + // deallocated when the TensorStorage goes out of scope. + // In this case, the memory will be deallocated when the vector goes out of scope. + { + let mut original_vec = Vec::::with_capacity(len); + let original_ptr = original_vec.as_ptr(); + { + let storage = unsafe { + TensorStorage::::from_ptr(original_vec.as_mut_ptr(), len, &allocator) + }; + assert_eq!(*allocator.bytes_allocated.borrow(), 0); + + assert_eq!(storage.as_ptr(), original_ptr); + } + assert_eq!(*allocator.bytes_allocated.borrow(), 0); + } + + Ok(()) + } } diff --git a/crates/kornia-core/src/tensor.rs b/crates/kornia-core/src/tensor.rs index e18592ae..983bdb4a 100644 --- a/crates/kornia-core/src/tensor.rs +++ b/crates/kornia-core/src/tensor.rs @@ -90,7 +90,7 @@ where impl Tensor where T: SafeTensorType, - A: TensorAllocator, + A: TensorAllocator + 'static, { /// Create a new `Tensor` with uninitialized data. /// @@ -875,7 +875,7 @@ where impl Clone for Tensor where T: SafeTensorType + Clone, - A: TensorAllocator + Clone, + A: TensorAllocator + Clone + 'static, { fn clone(&self) -> Self { Self { @@ -889,7 +889,7 @@ where impl std::fmt::Display for Tensor where T: SafeTensorType + std::fmt::Display + std::fmt::LowerExp, - A: TensorAllocator, + A: TensorAllocator + 'static, { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let width = self diff --git a/crates/kornia-core/src/view.rs b/crates/kornia-core/src/view.rs index e777d898..6cafd2df 100644 --- a/crates/kornia-core/src/view.rs +++ b/crates/kornia-core/src/view.rs @@ -14,7 +14,7 @@ pub struct TensorView<'a, T: SafeTensorType, const N: usize, A: TensorAllocator> pub strides: [usize; N], } -impl<'a, T: SafeTensorType, const N: usize, A: TensorAllocator> TensorView<'a, T, N, A> { +impl<'a, T: SafeTensorType, const N: usize, A: TensorAllocator + 'static> TensorView<'a, T, N, A> { /// Returns the data slice of the tensor. #[inline] pub fn as_slice(&self) -> &[T] {