diff --git a/dali/c_api_2/data_objects.cc b/dali/c_api_2/data_objects.cc new file mode 100644 index 0000000000..a579cbb70d --- /dev/null +++ b/dali/c_api_2/data_objects.cc @@ -0,0 +1,362 @@ +// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "dali/c_api_2/data_objects.h" +#include "dali/c_api_2/error_handling.h" + +namespace dali::c_api { + +RefCountedPtr ITensor::Create(daliBufferPlacement_t placement) { + Validate(placement); + switch (placement.device_type) { + case DALI_STORAGE_CPU: + { + auto tl = std::make_shared>(); + tl->set_pinned(placement.pinned); + if (placement.pinned) + tl->set_device_id(placement.device_id); + return Wrap(std::move(tl)); + } + case DALI_STORAGE_GPU: + { + auto tl = std::make_shared>(); + tl->set_pinned(placement.pinned); + tl->set_device_id(placement.device_id); + return Wrap(std::move(tl)); + } + default: + assert(!"Unreachable code"); + return {}; + } +} + +RefCountedPtr ITensorList::Create(daliBufferPlacement_t placement) { + Validate(placement); + switch (placement.device_type) { + case DALI_STORAGE_CPU: + { + auto tl = std::make_shared>(); + tl->set_pinned(placement.pinned); + if (placement.pinned) + tl->set_device_id(placement.device_id); + return Wrap(std::move(tl)); + } + case DALI_STORAGE_GPU: + { + auto tl = std::make_shared>(); + tl->set_pinned(placement.pinned); + tl->set_device_id(placement.device_id); + return Wrap(std::move(tl)); + } + default: + assert(!"Unreachable code"); + return {}; + } +} + +ITensor *ToPointer(daliTensor_h handle) { + if (!handle) + throw NullHandle("Tensor"); + return static_cast(handle); +} + +ITensorList *ToPointer(daliTensorList_h handle) { + if (!handle) + throw NullHandle("TensorList"); + return static_cast(handle); +} + +} // namespace dali::c_api + +using namespace dali::c_api; // NOLINT + +template +std::optional ToOptional(const T *nullable) { + if (nullable == nullptr) + return std::nullopt; + else + return *nullable; +} + +////////////////////////////////////////////////////////////////////////////// +// Tensor +////////////////////////////////////////////////////////////////////////////// + +daliResult_t daliTensorCreate(daliTensor_h *out, daliBufferPlacement_t placement) { + DALI_PROLOG(); + if (!out) + throw std::invalid_argument("The output parameter must not be NULL."); + auto t = dali::c_api::ITensor::Create(placement); + *out = t.release(); // no throwing allowed after this line! + DALI_EPILOG(); +} + +daliResult_t daliTensorIncRef(daliTensor_h t, int *new_ref) { + DALI_PROLOG(); + auto *ptr = ToPointer(t); + int r = ptr->IncRef(); + if (new_ref) + *new_ref = r; + DALI_EPILOG(); +} + +daliResult_t daliTensorDecRef(daliTensor_h t, int *new_ref) { + DALI_PROLOG(); + auto *ptr = ToPointer(t); + int r = ptr->DecRef(); + if (new_ref) + *new_ref = r; + DALI_EPILOG(); +} + +daliResult_t daliTensorRefCount(daliTensor_h t, int *ref) { + DALI_PROLOG(); + auto *ptr = ToPointer(t); + int r = ptr->RefCount(); + if (!ref) + throw std::invalid_argument("The output parameter must not be NULL."); + *ref = r; + DALI_EPILOG(); +} + +daliResult_t daliTensorAttachBuffer( + daliTensor_h tensor, + int ndim, + const int64_t *shape, + daliDataType_t dtype, + const char *layout, + void *data, + daliDeleter_t deleter) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor); + ptr->AttachBuffer(ndim, shape, dtype, layout, data, deleter); + DALI_EPILOG(); +} + +daliResult_t daliTensorResize( + daliTensor_h tensor, + int ndim, + const int64_t *shape, + daliDataType_t dtype, + const char *layout) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor); + ptr->Resize(ndim, shape, dtype, layout); + DALI_EPILOG(); +} + +daliResult_t daliTensorSetLayout( + daliTensor_h tensor, + const char *layout) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor); + ptr->SetLayout(layout); + DALI_EPILOG(); +} + +daliResult_t daliTensorGetLayout( + daliTensor_h tensor, + const char **layout) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor); + if (!layout) + throw std::invalid_argument("The output parameter `layout` must not be be NULL"); + *layout = ptr->GetLayout(); + DALI_EPILOG(); +} + +daliResult_t daliTensorGetStream( + daliTensor_h tensor, + cudaStream_t *out_stream) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor); + if (!out_stream) + throw std::invalid_argument("The output parameter `out_stream` must not be NULL"); + auto str = ptr->GetStream(); + *out_stream = str.has_value() ? *str : cudaStream_t(-1); + return str.has_value() ? DALI_SUCCESS : DALI_NO_DATA; + DALI_EPILOG(); +} + +daliResult_t daliTensorSetStream( + daliTensor_h tensor, + const cudaStream_t *stream, + daliBool synchronize) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor); + ptr->SetStream(ToOptional(stream), synchronize); + DALI_EPILOG(); +} + +daliResult_t daliTensorGetDesc( + daliTensor_h tensor, + daliTensorDesc_t *out_desc) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor); + if (!out_desc) + throw std::invalid_argument("The output parameter `out_desc` must not be NULL."); + *out_desc = ptr->GetDesc(); + DALI_EPILOG(); +} + +////////////////////////////////////////////////////////////////////////////// +// TensorList +////////////////////////////////////////////////////////////////////////////// + +daliResult_t daliTensorListCreate(daliTensorList_h *out, daliBufferPlacement_t placement) { + DALI_PROLOG(); + if (!out) + throw std::invalid_argument("The output parameter must not be NULL."); + auto tl = dali::c_api::ITensorList::Create(placement); + *out = tl.release(); // no throwing allowed after this line! + DALI_EPILOG(); +} + +daliResult_t daliTensorListIncRef(daliTensorList_h tl, int *new_ref) { + DALI_PROLOG(); + auto *ptr = ToPointer(tl); + int r = ptr->IncRef(); + if (new_ref) + *new_ref = r; + DALI_EPILOG(); +} + +daliResult_t daliTensorListDecRef(daliTensorList_h tl, int *new_ref) { + DALI_PROLOG(); + auto *ptr = ToPointer(tl); + int r = ptr->DecRef(); + if (new_ref) + *new_ref = r; + DALI_EPILOG(); +} + +daliResult_t daliTensorListRefCount(daliTensorList_h tl, int *ref) { + DALI_PROLOG(); + auto *ptr = ToPointer(tl); + if (!ref) + throw std::invalid_argument("The output pointer must not be NULL."); + int r = ptr->RefCount(); + *ref = r; + DALI_EPILOG(); +} + +daliResult_t daliTensorListAttachBuffer( + daliTensorList_h tensor_list, + int num_samples, + int ndim, + const int64_t *shapes, + daliDataType_t dtype, + const char *layout, + void *data, + const ptrdiff_t *sample_offsets, + daliDeleter_t deleter) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor_list); + ptr->AttachBuffer(num_samples, ndim, shapes, dtype, layout, data, sample_offsets, deleter); + DALI_EPILOG(); +} + +daliResult_t daliTensorListAttachSamples( + daliTensorList_h tensor_list, + int num_samples, + int ndim, + daliDataType_t dtype, + const char *layout, + const daliTensorDesc_t *samples, + const daliDeleter_t *sample_deleters) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor_list); + ptr->AttachSamples(num_samples, ndim, dtype, layout, samples, sample_deleters); + DALI_EPILOG(); +} + +daliResult_t daliTensorListResize( + daliTensorList_h tensor_list, + int num_samples, + int ndim, + const int64_t *shapes, + daliDataType_t dtype, + const char *layout) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor_list); + ptr->Resize(num_samples, ndim, shapes, dtype, layout); + DALI_EPILOG(); +} + +daliResult_t daliTensorListSetLayout( + daliTensorList_h tensor_list, + const char *layout) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor_list); + ptr->SetLayout(layout); + DALI_EPILOG(); +} + +daliResult_t daliTensorListGetLayout( + daliTensorList_h tensor_list, + const char **layout) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor_list); + if (!layout) + throw std::invalid_argument("The output parameter `layout` must not be be NULL"); + *layout = ptr->GetLayout(); + DALI_EPILOG(); +} + +daliResult_t daliTensorListGetStream( + daliTensorList_h tensor_list, + cudaStream_t *out_stream) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor_list); + if (!out_stream) + throw std::invalid_argument("The output parameter `out_stream` must not be NULL"); + auto str = ptr->GetStream(); + *out_stream = str.has_value() ? *str : cudaStream_t(-1); + return str.has_value() ? DALI_SUCCESS : DALI_NO_DATA; + DALI_EPILOG(); +} + +daliResult_t daliTensorListSetStream( + daliTensorList_h tensor_list, + const cudaStream_t *stream, + daliBool synchronize) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor_list); + ptr->SetStream(ToOptional(stream), synchronize); + DALI_EPILOG(); +} + +daliResult_t daliTensorListGetTensorDesc( + daliTensorList_h tensor_list, + daliTensorDesc_t *out_tensor, + int sample_idx) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor_list); + if (!out_tensor) + throw std::invalid_argument("The output parameter `out_tensor` must not be NULL."); + *out_tensor = ptr->GetTensorDesc(sample_idx); + DALI_EPILOG(); +} + +daliResult_t daliTensorListViewAsTensor( + daliTensorList_h tensor_list, + daliTensor_h *out_tensor) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor_list); + if (!out_tensor) + throw std::invalid_argument("The output parameter `out_tensor` must not be NULL."); + auto t = ptr->ViewAsTensor(); + *out_tensor = t.release(); // no throwing allowed after this line + DALI_EPILOG(); +} diff --git a/dali/c_api_2/data_objects.h b/dali/c_api_2/data_objects.h new file mode 100644 index 0000000000..72b1a3eac0 --- /dev/null +++ b/dali/c_api_2/data_objects.h @@ -0,0 +1,613 @@ +// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef DALI_C_API_2_DATA_OBJECTS_H_ +#define DALI_C_API_2_DATA_OBJECTS_H_ + +#include +#include +#include +#include +#include +#define DALI_ALLOW_NEW_C_API +#include "dali/dali.h" +#include "dali/pipeline/data/tensor_list.h" +#include "dali/c_api_2/ref_counting.h" +#include "dali/c_api_2/validation.h" + + +struct _DALITensorList {}; +struct _DALITensor {}; + +namespace dali { +namespace c_api { + +////////////////////////////////////////////////////////////////////////////// +// Interfaces +////////////////////////////////////////////////////////////////////////////// + +class ITensor : public _DALITensor, public RefCountedObject { + public: + virtual ~ITensor() = default; + + virtual void Resize( + int ndim, + const int64_t *shape, + daliDataType_t dtype, + const char *layout) = 0; + + virtual void AttachBuffer( + int ndim, + const int64_t *shape, + daliDataType_t dtype, + const char *layout, + void *data, + daliDeleter_t deleter) = 0; + + virtual daliBufferPlacement_t GetBufferPlacement() const = 0; + + virtual const char *GetLayout() const = 0; + + virtual void SetLayout(const char *layout) = 0; + + virtual void SetStream(std::optional stream, bool synchronize) = 0; + + virtual std::optional GetStream() const = 0; + + virtual std::optional GetReadyEvent() const = 0; + + virtual cudaEvent_t GetOrCreateReadyEvent() = 0; + + virtual daliTensorDesc_t GetDesc() const = 0; + + static RefCountedPtr Create(daliBufferPlacement_t placement); +}; + + +class ITensorList : public _DALITensorList, public RefCountedObject { + public: + virtual ~ITensorList() = default; + + virtual void Resize( + int num_samples, + int ndim, + const int64_t *shapes, + daliDataType_t dtype, + const char *layout) = 0; + + virtual void AttachBuffer( + int num_samples, + int ndim, + const int64_t *shapes, + daliDataType_t dtype, + const char *layout, + void *data, + const ptrdiff_t *sample_offsets, + daliDeleter_t deleter) = 0; + + virtual void AttachSamples( + int num_samples, + int ndim, + daliDataType_t dtype, + const char *layout, + const daliTensorDesc_t *samples, + const daliDeleter_t *sample_deleters) = 0; + + virtual daliBufferPlacement_t GetBufferPlacement() const = 0; + + virtual const char *GetLayout() const = 0; + + virtual void SetLayout(const char *layout) = 0; + + virtual void SetStream(std::optional stream, bool synchronize) = 0; + + virtual std::optional GetStream() const = 0; + + virtual std::optional GetReadyEvent() const = 0; + + virtual cudaEvent_t GetOrCreateReadyEvent() = 0; + + virtual daliTensorDesc_t GetTensorDesc(int sample) const = 0; + + virtual RefCountedPtr ViewAsTensor() const = 0; + + static RefCountedPtr Create(daliBufferPlacement_t placement); +}; + + +////////////////////////////////////////////////////////////////////////////// +// Implementation +////////////////////////////////////////////////////////////////////////////// + + +struct BufferDeleter { + daliDeleter_t deleter; + AccessOrder deletion_order; + + void operator()(void *data) { + if (deleter.delete_buffer) { + cudaStream_t stream = deletion_order.stream(); + deleter.delete_buffer(deleter.deleter_ctx, data, + deletion_order.is_device() ? &stream : nullptr); + } + if (deleter.destroy_context) { + deleter.destroy_context(deleter.deleter_ctx); + } + } +}; + +template +class TensorWrapper : public ITensor { + public: + explicit TensorWrapper(std::shared_ptr> t) : t_(std::move(t)) {} + + void Resize( + int ndim, + const int64_t *shape, + daliDataType_t dtype, + const char *layout) override { + ValidateShape(ndim, shape); + Validate(dtype); + if (layout) + Validate(TensorLayout(layout), ndim); + t_->Resize(TensorShape<>(make_cspan(shape, ndim)), dtype); + if (layout) + t_->SetLayout(layout); + } + + void AttachBuffer( + int ndim, + const int64_t *shape, + daliDataType_t dtype, + const char *layout, + void *data, + daliDeleter_t deleter) override { + ValidateShape(ndim, shape); + Validate(dtype); + if (layout) + Validate(TensorLayout(layout), ndim); + + TensorShape<> tshape(make_cspan(shape, ndim)); + size_t num_elements = volume(tshape); + if (num_elements > 0 && !data) + throw std::invalid_argument("The data buffer must not be NULL for a non-empty tensor."); + + TensorLayout new_layout = {}; + + if (!layout) { + if (ndim == t_->ndim()) + new_layout = t_->GetLayout(); + } else { + new_layout = layout; + Validate(new_layout, ndim); + } + + t_->Reset(); + auto type_info = TypeTable::GetTypeInfo(dtype); + auto element_size = type_info.size(); + + std::shared_ptr buffer; + if (!deleter.delete_buffer && !deleter.destroy_context) { + buffer = std::shared_ptr(data, [](void *){}); + } else { + buffer = std::shared_ptr(data, BufferDeleter{deleter, t_->order()}); + } + + t_->ShareData( + std::move(buffer), + num_elements * element_size, + t_->is_pinned(), + tshape, + dtype, + t_->device_id(), + t_->order()); + + if (layout) + t_->SetLayout(new_layout); + } + + daliBufferPlacement_t GetBufferPlacement() const override { + daliBufferPlacement_t placement; + placement.device_id = t_->device_id(); + StorageDevice dev = backend_to_storage_device::value; + placement.device_type = static_cast(dev); + placement.pinned = t_->is_pinned(); + return placement; + } + + void SetStream(std::optional stream, bool synchronize) override { + t_->set_order(stream.has_value() ? AccessOrder(*stream) : AccessOrder::host(), synchronize); + } + + void SetLayout(const char *layout_string) { + if (layout_string) { + TensorLayout layout(layout_string); + Validate(layout, t_->ndim()); + t_->SetLayout(layout); + } else { + t_->SetLayout(""); + } + } + + const char *GetLayout() const override { + auto &layout = t_->GetLayout(); + return !layout.empty() ? layout.data() : nullptr; + } + + std::optional GetStream() const override { + auto o = t_->order(); + if (o.is_device()) + return o.stream(); + else + return std::nullopt; + } + + std::optional GetReadyEvent() const override { + auto &e = t_->ready_event(); + if (e) + return e.get(); + else + return std::nullopt; + } + + cudaEvent_t GetOrCreateReadyEvent() override { + auto &e = t_->ready_event(); + if (e) + return e.get(); + int device_id = t_->device_id(); + if (device_id < 0) + throw std::runtime_error("The tensor list is not associated with a CUDA device."); + t_->set_ready_event(CUDASharedEvent::Create(device_id)); + return t_->ready_event().get(); + } + + daliTensorDesc_t GetDesc() const override { + auto &shape = t_->shape(); + daliTensorDesc_t desc{}; + desc.ndim = shape.sample_dim(); + desc.data = t_->raw_mutable_data(); + desc.dtype = t_->type(); + desc.layout = GetLayout(); + desc.shape = shape.data(); + return desc; + } + + private: + std::shared_ptr> t_; +}; + +template +RefCountedPtr> Wrap(std::shared_ptr> tl) { + return RefCountedPtr>(new TensorWrapper(std::move(tl))); +} + +template +class TensorListWrapper : public ITensorList { + public: + explicit TensorListWrapper(std::shared_ptr> tl) : tl_(std::move(tl)) {} + + void Resize( + int num_samples, + int ndim, + const int64_t *shapes, + daliDataType_t dtype, + const char *layout) override { + Validate(dtype); + ValidateShape(num_samples, ndim, shapes); + if (layout) + Validate(TensorLayout(layout), ndim); + std::vector shape_data(shapes, shapes + ndim * num_samples); + tl_->Resize(TensorListShape<>(std::move(shape_data), num_samples, ndim), dtype); + if (layout) + tl_->SetLayout(layout); + } + + void AttachBuffer( + int num_samples, + int ndim, + const int64_t *shapes, + daliDataType_t dtype, + const char *layout, + void *data, + const ptrdiff_t *sample_offsets, + daliDeleter_t deleter) override { + ValidateShape(num_samples, ndim, shapes); + Validate(dtype); + + if (!data && num_samples > 0) { + for (int i = 0; i < num_samples; i++) { + auto sample_shape = make_cspan(&shapes[i*ndim], ndim); + + if (volume(sample_shape) > 0) + throw std::invalid_argument( + "The pointer to the data buffer must not be null for a non-empty tensor list."); + + if (sample_offsets && sample_offsets[i]) + throw std::invalid_argument( + "All sample_offsets must be zero when the data pointer is NULL."); + } + } + + TensorLayout new_layout = {}; + + if (!layout) { + if (ndim == tl_->sample_dim()) + new_layout = tl_->GetLayout(); + } else { + new_layout = layout; + Validate(new_layout, ndim); + } + + tl_->Reset(); + tl_->SetSize(num_samples); + tl_->set_sample_dim(ndim); + tl_->SetLayout(new_layout); + tl_->set_type(dtype); + ptrdiff_t next_offset = 0; + auto type_info = TypeTable::GetTypeInfo(dtype); + auto element_size = type_info.size(); + + std::shared_ptr buffer; + if (!deleter.delete_buffer && !deleter.destroy_context) { + buffer = std::shared_ptr(data, [](void *){}); + } else { + buffer = std::shared_ptr(data, BufferDeleter{deleter, tl_->order()}); + } + + bool is_contiguous = true; + if (sample_offsets) { + for (int i = 0; i < num_samples; i++) { + if (sample_offsets[i] != next_offset) { + is_contiguous = false; + break; + } + auto num_elements = volume(make_cspan(&shapes[i*ndim], ndim)); + next_offset += num_elements * element_size; + } + } + + if (is_contiguous) { + tl_->SetContiguity(BatchContiguity::Contiguous); + std::vector shape_data(shapes, shapes + ndim * num_samples); + TensorListShape<> tl_shape(shape_data, num_samples, ndim); + tl_->ShareData( + std::move(buffer), + next_offset, + tl_->is_pinned(), + tl_shape, + dtype, + tl_->device_id(), + tl_->order(), + new_layout); + } else { + tl_->SetContiguity(BatchContiguity::Automatic); + next_offset = 0; + + for (int i = 0; i < num_samples; i++) { + TensorShape<> sample_shape(make_cspan(&shapes[i*ndim], ndim)); + void *sample_data; + size_t sample_bytes = volume(sample_shape) * element_size; + if (sample_offsets) { + sample_data = static_cast(data) + sample_offsets[i]; + } else { + sample_data = static_cast(data) + next_offset; + next_offset += sample_bytes; + } + tl_->SetSample( + i, + std::shared_ptr(buffer, sample_data), + sample_bytes, + tl_->is_pinned(), + sample_shape, + dtype, + tl_->device_id(), + tl_->order(), + new_layout); + } + } + } + + void AttachSamples( + int num_samples, + int ndim, + daliDataType_t dtype, + const char *layout, + const daliTensorDesc_t *samples, + const daliDeleter_t *sample_deleters) override { + ValidateNumSamples(num_samples); + if (num_samples > 0 && !samples) + throw std::invalid_argument("The pointer to sample descriptors must not be NULL."); + if (ndim < 0) { + if (num_samples == 0) + throw std::invalid_argument( + "The number of dimensions must not be negative when num_samples is 0."); + else + ndim = samples[0].ndim; + } + if (dtype == DALI_NO_TYPE) { + if (num_samples == 0) + throw std::invalid_argument( + "A valid data type must be provided when there's no sample to take it from."); + dtype = samples[0].dtype; + } + Validate(dtype); + + for (int i = 0; i < num_samples; i++) { + if (ndim && !samples[i].shape) + throw std::invalid_argument(make_string("Got NULL shape in sample ", i, ".")); + if (samples[i].dtype != dtype) + throw std::invalid_argument(make_string("Unexpected data type in sample ", i, ". Got: ", + samples[i].dtype, ", expected ", dtype, ".")); + ValidateSampleShape(i, make_cspan(samples[i].shape, samples[i].ndim), ndim);; + + if (!samples[i].data && volume(make_cspan(samples[i].shape, ndim))) + throw std::invalid_argument(make_string( + "Got NULL data pointer in a non-empty sample ", i, ".")); + } + + TensorLayout new_layout = {}; + + if (!layout) { + if (ndim == tl_->sample_dim()) + new_layout = tl_->GetLayout(); + } else { + new_layout = layout; + Validate(new_layout, ndim); + } + + tl_->Reset(); + tl_->SetSize(num_samples); + tl_->set_sample_dim(ndim); + tl_->SetLayout(new_layout); + + auto deletion_order = tl_->order(); + + auto type_info = TypeTable::GetTypeInfo(dtype); + auto element_size = type_info.size(); + for (int i = 0; i < num_samples; i++) { + TensorShape<> sample_shape(make_cspan(samples[i].shape, samples[i].ndim)); + size_t sample_bytes = volume(sample_shape) * element_size; + std::shared_ptr sample_ptr; + if (sample_deleters) { + sample_ptr = std::shared_ptr( + samples[i].data, + BufferDeleter{sample_deleters[i], deletion_order}); + } else { + sample_ptr = std::shared_ptr(samples[i].data, [](void*) {}); + } + + tl_->SetSample( + i, + sample_ptr, + sample_bytes, + tl_->is_pinned(), + sample_shape, + dtype, + tl_->device_id(), + tl_->order(), + new_layout); + } + } + + daliBufferPlacement_t GetBufferPlacement() const override { + daliBufferPlacement_t placement; + placement.device_id = tl_->device_id(); + StorageDevice dev = backend_to_storage_device::value; + placement.device_type = static_cast(dev); + placement.pinned = tl_->is_pinned(); + return placement; + } + + void SetStream(std::optional stream, bool synchronize) override { + tl_->set_order(stream.has_value() ? AccessOrder(*stream) : AccessOrder::host(), synchronize); + } + + void SetLayout(const char *layout_string) { + if (layout_string) { + TensorLayout layout(layout_string); + Validate(layout, tl_->sample_dim()); + tl_->SetLayout(layout); + } else { + tl_->SetLayout(""); + } + } + + const char *GetLayout() const override { + auto &layout = tl_->GetLayout(); + return !layout.empty() ? layout.data() : nullptr; + } + + std::optional GetStream() const override { + auto o = tl_->order(); + if (o.is_device()) + return o.stream(); + else + return std::nullopt; + } + + std::optional GetReadyEvent() const override { + auto &e = tl_->ready_event(); + if (e) + return e.get(); + else + return std::nullopt; + } + + cudaEvent_t GetOrCreateReadyEvent() override { + auto &e = tl_->ready_event(); + if (e) + return e.get(); + int device_id = tl_->device_id(); + if (device_id < 0) + throw std::runtime_error("The tensor list is not associated with a CUDA device."); + tl_->set_ready_event(CUDASharedEvent::Create(device_id)); + return tl_->ready_event().get(); + } + + daliTensorDesc_t GetTensorDesc(int sample) const override { + auto &shape = tl_->shape(); + if (sample < 0 || sample >= shape.num_samples()) + throw std::out_of_range(make_string("The sample index ", sample, " is out of range. " + "Valid indices are [0..", shape.num_samples() - 1, "].")); + daliTensorDesc_t desc{}; + desc.ndim = shape.sample_dim(); + desc.data = tl_->raw_mutable_tensor(sample); + desc.dtype = tl_->type(); + desc.layout = GetLayout(); + desc.shape = shape.tensor_shape_span(sample).data(); + return desc; + } + + RefCountedPtr ViewAsTensor() const override { + if (!tl_->IsContiguous()) + throw std::runtime_error( + "The TensorList is not contiguous and cannot be viewed as a Tensor."); + + auto t = std::make_shared>(); + auto buf = unsafe_owner(*tl_); + auto &lshape = tl_->shape(); + TensorShape<> tshape = shape_cat(lshape.num_samples(), lshape[0]); + t->ShareData( + std::move(buf), + tl_->nbytes(), + tl_->is_pinned(), + tshape, + tl_->type(), + tl_->device_id(), + tl_->order(), + tl_->ready_event()); + TensorLayout layout = tl_->GetLayout(); + if (layout.size() == lshape.sample_dim()) { + t->SetLayout("N" + layout); + } + return Wrap(std::move(t)); + } + + private: + std::shared_ptr> tl_; +}; + +template +RefCountedPtr> Wrap(std::shared_ptr> tl) { + return RefCountedPtr>(new TensorListWrapper(std::move(tl))); +} + + +ITensor *ToPointer(daliTensor_h handle); +ITensorList *ToPointer(daliTensorList_h handle); + +} // namespace c_api +} // namespace dali + +#endif // DALI_C_API_2_DATA_OBJECTS_H_ diff --git a/dali/c_api_2/data_objects_test.cc b/dali/c_api_2/data_objects_test.cc new file mode 100644 index 0000000000..2102ae1f54 --- /dev/null +++ b/dali/c_api_2/data_objects_test.cc @@ -0,0 +1,414 @@ +// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "dali/c_api_2/data_objects.h" +#include +#include "dali/c_api_2/managed_handle.h" +#include "dali/core/span.h" + +TEST(CAPI2_TensorListTest, NullHandle) { + daliTensorList_h h = nullptr; + int ref = 0; + EXPECT_EQ(DALI_ERROR_INVALID_HANDLE, daliTensorListIncRef(h, &ref)); + EXPECT_EQ(DALI_ERROR_INVALID_HANDLE, daliTensorListDecRef(h, &ref)); + EXPECT_EQ(DALI_ERROR_INVALID_HANDLE, daliTensorListRefCount(h, &ref)); +} + +TEST(CAPI2_TensorListTest, CreateDestroy) { + daliBufferPlacement_t placement{}; + placement.device_type = DALI_STORAGE_CPU; + placement.pinned = false; + daliTensorList_h h = nullptr; + daliResult_t r = daliTensorListCreate(&h, placement); + ASSERT_NE(h, nullptr); + dali::c_api::TensorListHandle tl(h); + ASSERT_EQ(h, tl.get()); + ASSERT_EQ(r, DALI_SUCCESS); + + int ref = -1; + EXPECT_EQ(daliTensorListRefCount(h, &ref), DALI_SUCCESS); + EXPECT_EQ(ref, 1); + ref = -1; + + h = tl.release(); + EXPECT_EQ(daliTensorListDecRef(h, &ref), DALI_SUCCESS); + EXPECT_EQ(ref, 0); +} + +inline auto CreateTensorList(daliBufferPlacement_t placement) { + daliTensorList_h handle; + auto err = daliTensorListCreate(&handle, placement); + switch (err) { + case DALI_SUCCESS: + break; + case DALI_ERROR_OUT_OF_MEMORY: + throw std::bad_alloc(); + case DALI_ERROR_INVALID_ARGUMENT: + throw std::invalid_argument(daliGetLastErrorMessage()); + default: + throw std::runtime_error(daliGetLastErrorMessage()); + } + return dali::c_api::TensorListHandle(handle); +} + +void TestTensorListResize(daliStorageDevice_t storage_device) { + daliBufferPlacement_t placement{}; + placement.device_type = storage_device; + int64_t shapes[] = { + 480, 640, 3, + 600, 800, 4, + 348, 720, 1, + 1080, 1920, 3 + }; + daliDataType_t dtype = DALI_UINT32; + + auto tl = CreateTensorList(placement); + EXPECT_EQ(daliTensorListResize(tl, 4, 3, nullptr, dtype, nullptr), DALI_ERROR_INVALID_ARGUMENT); + EXPECT_EQ(daliTensorListResize(tl, -1, 3, shapes, dtype, nullptr), DALI_ERROR_INVALID_ARGUMENT); + EXPECT_EQ(daliTensorListResize(tl, 4, -1, shapes, dtype, nullptr), DALI_ERROR_INVALID_ARGUMENT); + EXPECT_EQ(daliTensorListResize(tl, 4, 3, shapes, dtype, "ABCD"), DALI_ERROR_INVALID_ARGUMENT); + shapes[0] = -1; + EXPECT_EQ(daliTensorListResize(tl, 4, 3, shapes, dtype, "HWC"), DALI_ERROR_INVALID_ARGUMENT); + shapes[0] = 480; + EXPECT_EQ(daliTensorListResize(tl, 1, 3, shapes, dtype, "HWC"), DALI_SUCCESS); + // resize, but keep the layout + EXPECT_EQ(daliTensorListResize(tl, 4, 3, shapes, dtype, nullptr), DALI_SUCCESS); + + size_t element_size = dali::TypeTable::GetTypeInfo(dtype).size(); + + ptrdiff_t offset = 0; + const char *base; + for (int i = 0; i < 4; i++) { + daliTensorDesc_t desc{}; + EXPECT_EQ(daliTensorListGetTensorDesc(tl, &desc, i), DALI_SUCCESS); + ASSERT_EQ(desc.ndim, 3); + ASSERT_NE(desc.data, nullptr); + if (i == 0) + base = static_cast(desc.data); + EXPECT_EQ(desc.data, base + offset); + EXPECT_EQ(desc.dtype, dtype); + for (int j = 0; j < 3; j++) + EXPECT_EQ(desc.shape[j], shapes[3 * i + j]); + size_t sample_bytes = volume(dali::make_cspan(desc.shape, desc.ndim)) * element_size; + if (storage_device == DALI_STORAGE_GPU) { + // Check that the data is accessible for the GPU + EXPECT_EQ(cudaMemset(desc.data, 0, sample_bytes), cudaSuccess); + } else { + // Check that the data is accessible for the CPU + memset(desc.data, 0, sample_bytes); // just not crashing is OK + } + offset += sample_bytes; + } + if (storage_device == DALI_STORAGE_GPU) { + EXPECT_EQ(cudaDeviceSynchronize(), cudaSuccess); + } +} + +struct TestDeleterCtx { + void *expected_data; + int buffer_delete_count; + int context_delete_count; +}; + +template +inline std::pair> +MakeTestDeleter(element_t *expected_data) { + auto ctx = std::unique_ptr(new TestDeleterCtx{ expected_data, 0, 0 }); + daliDeleter_t deleter = {}; + deleter.deleter_ctx = ctx.get(); + deleter.delete_buffer = [](void *vctx, void *data, const cudaStream_t *stream) { + ASSERT_NE(data, nullptr); + auto *ctx = static_cast(vctx); + EXPECT_EQ(ctx->context_delete_count, 0); + EXPECT_EQ(ctx->buffer_delete_count, 0); + EXPECT_EQ(data, ctx->expected_data); + ctx->buffer_delete_count++; + delete [] static_cast(data); + }; + deleter.destroy_context = [](void *vctx) { + auto *ctx = static_cast(vctx); + EXPECT_EQ(ctx->context_delete_count, 0); + EXPECT_EQ(ctx->buffer_delete_count, 1); + ctx->context_delete_count++; + }; + return { deleter, std::move(ctx) }; +} + +TEST(CAPI2_TensorListTest, AttachBuffer) { + daliBufferPlacement_t placement{}; + placement.device_type = DALI_STORAGE_CPU; + using element_t = int; + daliDataType_t dtype = dali::type2id::value; + dali::TensorListShape<> lshape({ + { 480, 640, 3 }, + { 600, 800, 4 }, + { 348, 720, 1 }, + { 1080, 1920, 3 } + }); + auto size = lshape.num_elements(); + std::unique_ptr data(new element_t[size]); + + ptrdiff_t offsets[4] = {}; + for (int i = 1; i < 4; i++) + offsets[i] = offsets[i - 1] + volume(lshape[i - 1]) * sizeof(element_t); + + auto [deleter, ctx] = MakeTestDeleter(data.get()); + + auto tl = CreateTensorList(placement); + ASSERT_EQ(daliTensorListAttachBuffer( + tl, + lshape.num_samples(), + lshape.sample_dim(), + lshape.data(), + dtype, + "HWC", + data.get(), + offsets, + deleter), DALI_SUCCESS); + + void *data_ptr = data.release(); // the buffer is now owned by the tensor list + + ptrdiff_t offset = 0; + const char *base = static_cast(data_ptr); + for (int i = 0; i < 4; i++) { + daliTensorDesc_t desc{}; + EXPECT_EQ(daliTensorListGetTensorDesc(tl, &desc, i), DALI_SUCCESS); + ASSERT_EQ(desc.ndim, 3); + ASSERT_NE(desc.data, nullptr); + EXPECT_EQ(desc.data, base + offset); + EXPECT_EQ(desc.dtype, dtype); + for (int j = 0; j < 3; j++) + EXPECT_EQ(desc.shape[j], lshape[i][j]); + size_t sample_bytes = volume(dali::make_cspan(desc.shape, desc.ndim)) * sizeof(element_t); + offset += sample_bytes; + } + + tl.reset(); + + EXPECT_EQ(ctx->buffer_delete_count, 1) << "Buffer deleter not called"; + EXPECT_EQ(ctx->context_delete_count, 1) << "Deleter context not destroyed"; +} + + +TEST(CAPI2_TensorListTest, ViewAsTensor) { + daliBufferPlacement_t placement{}; + placement.device_type = DALI_STORAGE_CPU; + using element_t = int; + daliDataType_t dtype = dali::type2id::value; + dali::TensorListShape<> lshape = dali::uniform_list_shape(4, { 480, 640, 3 }); + auto size = lshape.num_elements(); + std::unique_ptr data(new element_t[size]); + + ptrdiff_t sample_size = volume(lshape[0]) * sizeof(element_t); + + ptrdiff_t offsets[4] = { + 0, + 1 * sample_size, + 2 * sample_size, + 3 * sample_size, + }; + + auto [deleter, ctx] = MakeTestDeleter(data.get()); + + auto tl = CreateTensorList(placement); + ASSERT_EQ(daliTensorListAttachBuffer( + tl, + lshape.num_samples(), + lshape.sample_dim(), + lshape.data(), + dtype, + "HWC", + data.get(), + offsets, + deleter), DALI_SUCCESS); + + void *data_ptr = data.release(); // the buffer is now owned by the tensor list + + daliTensor_h ht = nullptr; + EXPECT_EQ(daliTensorListViewAsTensor(tl, &ht), DALI_SUCCESS); + ASSERT_NE(ht, nullptr); + dali::c_api::TensorHandle t(ht); + + daliTensorDesc_t desc{}; + EXPECT_EQ(daliTensorGetDesc(t, &desc), DALI_SUCCESS); + EXPECT_EQ(desc.data, data_ptr); + EXPECT_EQ(desc.shape[0], lshape.num_samples()); + ASSERT_EQ(desc.ndim, 4); + ASSERT_NE(desc.shape, nullptr); + EXPECT_EQ(desc.shape[1], lshape[0][0]); + EXPECT_EQ(desc.shape[2], lshape[0][1]); + EXPECT_EQ(desc.shape[3], lshape[0][2]); + EXPECT_STREQ(desc.layout, "NHWC"); + EXPECT_EQ(desc.dtype, dtype); + + tl.reset(); + + EXPECT_EQ(ctx->buffer_delete_count, 0) << "Buffer prematurely destroyed"; + EXPECT_EQ(ctx->context_delete_count, 0) << "Deleter context prematurely destroyed"; + + t.reset(); + + EXPECT_EQ(ctx->buffer_delete_count, 1) << "Buffer deleter not called"; + EXPECT_EQ(ctx->context_delete_count, 1) << "Deleter context not destroyed"; +} + + +TEST(CAPI2_TensorListTest, ViewAsTensorError) { + daliBufferPlacement_t placement{}; + placement.device_type = DALI_STORAGE_CPU; + using element_t = int; + daliDataType_t dtype = dali::type2id::value; + dali::TensorListShape<> lshape = dali::uniform_list_shape(4, { 480, 640, 3 }); + auto size = lshape.num_elements(); + std::unique_ptr data(new element_t[size]); + + ptrdiff_t sample_size = volume(lshape[0]) * sizeof(element_t); + + // The samples are not in order + ptrdiff_t offsets[4] = { + 0, + 2 * sample_size, + 1 * sample_size, + 3 * sample_size, + }; + + auto [deleter, ctx] = MakeTestDeleter(data.get()); + + auto tl = CreateTensorList(placement); + ASSERT_EQ(daliTensorListAttachBuffer( + tl, + lshape.num_samples(), + lshape.sample_dim(), + lshape.data(), + dtype, + "HWC", + data.get(), + offsets, + deleter), DALI_SUCCESS); + + void *data_ptr = data.release(); // the buffer is now owned by the tensor list + + daliTensor_h ht = nullptr; + EXPECT_EQ(daliTensorListViewAsTensor(tl, &ht), DALI_ERROR_INVALID_OPERATION); +} + + +TEST(CAPI2_TensorListTest, ResizeCPU) { + TestTensorListResize(DALI_STORAGE_CPU); +} + +TEST(CAPI2_TensorListTest, ResizeGPU) { + TestTensorListResize(DALI_STORAGE_GPU); +} + + + + +TEST(CAPI2_TensorTest, NullHandle) { + daliTensor_h h = nullptr; + int ref = 0; + EXPECT_EQ(DALI_ERROR_INVALID_HANDLE, daliTensorIncRef(h, &ref)); + EXPECT_EQ(DALI_ERROR_INVALID_HANDLE, daliTensorDecRef(h, &ref)); + EXPECT_EQ(DALI_ERROR_INVALID_HANDLE, daliTensorRefCount(h, &ref)); +} + +TEST(CAPI2_TensorTest, CreateDestroy) { + daliBufferPlacement_t placement{}; + placement.device_type = DALI_STORAGE_CPU; + placement.pinned = false; + daliTensor_h h = nullptr; + daliResult_t r = daliTensorCreate(&h, placement); + ASSERT_NE(h, nullptr); + dali::c_api::TensorHandle tl(h); + ASSERT_EQ(h, tl.get()); + ASSERT_EQ(r, DALI_SUCCESS); + + int ref = -1; + EXPECT_EQ(daliTensorRefCount(h, &ref), DALI_SUCCESS); + EXPECT_EQ(ref, 1); + ref = -1; + + h = tl.release(); + EXPECT_EQ(daliTensorDecRef(h, &ref), DALI_SUCCESS); + EXPECT_EQ(ref, 0); +} + + +inline auto CreateTensor(daliBufferPlacement_t placement) { + daliTensor_h handle; + auto err = daliTensorCreate(&handle, placement); + switch (err) { + case DALI_SUCCESS: + break; + case DALI_ERROR_OUT_OF_MEMORY: + throw std::bad_alloc(); + case DALI_ERROR_INVALID_ARGUMENT: + throw std::invalid_argument(daliGetLastErrorMessage()); + default: + throw std::runtime_error(daliGetLastErrorMessage()); + } + return dali::c_api::TensorHandle(handle); +} + +void TestTensorResize(daliStorageDevice_t storage_device) { + daliBufferPlacement_t placement{}; + placement.device_type = storage_device; + auto t = CreateTensor(placement); + int64_t shape[] = { + 1080, 1920, 3 + }; + daliDataType_t dtype = DALI_INT16; + + EXPECT_EQ(daliTensorResize(t, 3, nullptr, dtype, nullptr), DALI_ERROR_INVALID_ARGUMENT); + EXPECT_EQ(daliTensorResize(t, -1, shape, dtype, nullptr), DALI_ERROR_INVALID_ARGUMENT); + EXPECT_EQ(daliTensorResize(t, 3, shape, dtype, "ABCD"), DALI_ERROR_INVALID_ARGUMENT); + shape[0] = -1; + EXPECT_EQ(daliTensorResize(t, 3, shape, dtype, "HWC"), DALI_ERROR_INVALID_ARGUMENT); + shape[0] = 1; + EXPECT_EQ(daliTensorResize(t, 3, shape, dtype, "HWC"), DALI_SUCCESS); + + shape[0] = 1080; + EXPECT_EQ(daliTensorResize(t, 3, shape, dtype, nullptr), DALI_SUCCESS); + + size_t element_size = dali::TypeTable::GetTypeInfo(dtype).size(); + + ptrdiff_t offset = 0; + daliTensorDesc_t desc{}; + EXPECT_EQ(daliTensorGetDesc(t, &desc), DALI_SUCCESS); + ASSERT_EQ(desc.ndim, 3); + ASSERT_NE(desc.data, nullptr); + EXPECT_STREQ(desc.layout, "HWC"); + EXPECT_EQ(desc.dtype, dtype); + for (int j = 0; j < 3; j++) + EXPECT_EQ(desc.shape[j], shape[j]); + size_t sample_bytes = volume(dali::make_cspan(desc.shape, desc.ndim)) * element_size; + if (storage_device == DALI_STORAGE_GPU) { + // Check that the data is accessible for the GPU + EXPECT_EQ(cudaMemset(desc.data, 0, sample_bytes), cudaSuccess); + } else { + // Check that the data is accessible for the CPU + memset(desc.data, 0, sample_bytes); // just not crashing is OK + } + if (storage_device == DALI_STORAGE_GPU) { + EXPECT_EQ(cudaDeviceSynchronize(), cudaSuccess); + } +} + +TEST(CAPI2_TensorTest, ResizeCPU) { + TestTensorResize(DALI_STORAGE_CPU); +} + +TEST(CAPI2_TensorTest, ResizeGPU) { + TestTensorResize(DALI_STORAGE_GPU); +}