From e1c14ebc2dcb7a53f77cccd196c866301275d002 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Zientkiewicz?= Date: Mon, 27 Jan 2025 17:25:59 +0100 Subject: [PATCH] Tensor list. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: MichaƂ Zientkiewicz --- dali/c_api_2/data_objects.cc | 175 +++++++++++++++++++++++ dali/c_api_2/data_objects.h | 229 +++++++++++++++++++++++++++--- dali/c_api_2/data_objects_test.cc | 71 +++++++++ dali/c_api_2/error_handling.h | 6 + dali/c_api_2/managed_handle.h | 122 ++++++++++++++++ dali/c_api_2/ref_counting.h | 92 ++++++++++++ include/dali/dali.h | 16 +++ 7 files changed, 694 insertions(+), 17 deletions(-) create mode 100644 dali/c_api_2/data_objects.cc create mode 100644 dali/c_api_2/data_objects_test.cc create mode 100644 dali/c_api_2/managed_handle.h diff --git a/dali/c_api_2/data_objects.cc b/dali/c_api_2/data_objects.cc new file mode 100644 index 0000000000..fda220f4c8 --- /dev/null +++ b/dali/c_api_2/data_objects.cc @@ -0,0 +1,175 @@ +// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "dali/c_api_2/data_objects.h" +#include "dali/c_api_2/error_handling.h" + +namespace dali::c_api { + +RefCountedPtr TensorListInterface::Create(daliBufferPlacement_t placement) { + switch (placement.device_type) { + case DALI_STORAGE_CPU: + { + auto tl = std::make_shared>(); + tl->set_pinned(placement.pinned); + if (placement.pinned) + tl->set_device_id(placement.device_id); + return Wrap(std::move(tl)); + } + case DALI_STORAGE_GPU: + { + auto tl = std::make_shared>(); + tl->set_pinned(placement.pinned); + tl->set_device_id(placement.device_id); + return Wrap(std::move(tl)); + } + default: + throw std::invalid_argument(make_string("Invalid storage device: ", placement.device_type)); + } +} + +TensorListInterface *ToPointer(daliTensorList_h handle) { + if (!handle) + throw NullHandle("TensorList"); + return static_cast(handle); +} + +} // namespace dali::c_api + +using namespace dali::c_api; // NOLINT + +daliResult_t daliTensorListCreate(daliTensorList_h *out, daliBufferPlacement_t placement) { + DALI_PROLOG(); + auto tl = dali::c_api::TensorListInterface::Create(placement); + *out = tl.release(); // no throwing allowed after this line! + DALI_EPILOG(); +} + +daliResult_t daliTensorListIncRef(daliTensorList_h tl, int *new_ref) { + DALI_PROLOG(); + auto *ptr = ToPointer(tl); + int r = ptr->IncRef(); + if (new_ref) + *new_ref = r; + DALI_EPILOG(); +} + +daliResult_t daliTensorListDecRef(daliTensorList_h tl, int *new_ref) { + DALI_PROLOG(); + auto *ptr = ToPointer(tl); + int r = ptr->DecRef(); + if (new_ref) + *new_ref = r; + DALI_EPILOG(); +} + +daliResult_t daliTensorListRefCount(daliTensorList_h tl, int *ref) { + DALI_PROLOG(); + auto *ptr = ToPointer(tl); + if (!ref) + throw std::invalid_argument("The output pointer must not be NULL."); + int r = ptr->RefCount(); + *ref = r; + DALI_EPILOG(); +} + +DALI_API daliResult_t daliTensorListAttachBuffer( + daliTensorList_h tensor_list, + int num_samples, + int ndim, + daliDataType_t dtype, + const char *layout, + const int64_t *shapes, + void *data, + const ptrdiff_t *sample_offsets, + daliDeleter_t deleter) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor_list); + ptr->AttachBuffer(num_samples, ndim, dtype, layout, shapes, data, sample_offsets, deleter); + DALI_EPILOG(); +} + +DALI_API daliResult_t daliTensorListAttachSamples( + daliTensorList_h tensor_list, + int num_samples, + int ndim, + daliDataType_t dtype, + const char *layout, + const daliTensorDesc_t *samples, + const daliDeleter_t *sample_deleters) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor_list); + ptr->AttachSamples(num_samples, ndim, dtype, layout, samples, sample_deleters); + DALI_EPILOG(); +} + +daliResult_t daliTensorListResize( + daliTensorList_h tensor_list, + int num_samples, + int ndim, + daliDataType_t dtype, + const int64_t *shapes) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor_list); + ptr->Resize(num_samples, ndim, dtype, shapes); + DALI_EPILOG(); +} + +daliResult_t daliTensorListSetLayout( + daliTensorList_h tensor_list, + const char *layout) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor_list); + ptr->SetLayout(layout); + DALI_EPILOG(); +} + +daliResult_t daliTensorListGetLayout( + daliTensorList_h tensor_list, + const char **layout) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor_list); + if (!layout) + throw std::invalid_argument("The output parameter `layout` must not be be NULL"); + *layout = ptr->GetLayout(); + DALI_EPILOG(); +} + +daliResult_t daliTensorListGetStream( + daliTensorList_h tensor_list, + cudaStream_t *out_stream) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor_list); + if (!out_stream) + throw std::invalid_argument("The output parameter `out_stream` must not be NULL"); + auto str = ptr->GetStream(); + *out_stream = str.has_value() ? *str : cudaStream_t(-1); + return str.has_value() ? DALI_SUCCESS : DALI_NO_DATA; + DALI_EPILOG(); +} + +daliResult_t daliTensorListSetStream( + daliTensorList_h tensor_list, + const cudaStream_t *stream, + daliBool synchronize) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor_list); + std::optional opt_str; + if (stream) + opt_str = *stream; + else + opt_str = std::nullopt; + ptr->SetStream(opt_str, synchronize); + DALI_EPILOG(); +} diff --git a/dali/c_api_2/data_objects.h b/dali/c_api_2/data_objects.h index bacbe7167f..583fdbf5b5 100644 --- a/dali/c_api_2/data_objects.h +++ b/dali/c_api_2/data_objects.h @@ -15,15 +15,20 @@ #ifndef DALI_C_API_2_DATA_OBJECTS_H_ #define DALI_C_API_2_DATA_OBJECTS_H_ +#include +#include #include #include "dali/dali.h" #include "dali/pipeline/data/tensor_list.h" #include "dali/c_api_2/ref_counting.h" +#include "dali/core/tensor_shape_print.h" + +struct _DALITensorList {}; namespace dali { namespace c_api { -class TensorListInterface : public RefCountedObject { +class TensorListInterface : public _DALITensorList, public RefCountedObject { public: virtual ~TensorListInterface() = default; @@ -37,6 +42,7 @@ class TensorListInterface : public RefCountedObject { int num_samples, int ndim, daliDataType_t dtype, + const char *layout, const int64_t *shapes, void *data, const ptrdiff_t *sample_offsets, @@ -46,21 +52,28 @@ class TensorListInterface : public RefCountedObject { int num_samples, int ndim, daliDataType_t dtype, + const char *layout, const daliTensorDesc_t *samples, const daliDeleter_t *sample_deleters) = 0; virtual daliBufferPlacement_t GetBufferPlacement() const = 0; + virtual const char *GetLayout() const = 0; + + virtual void SetLayout(const char *layout) = 0; + virtual void SetStream(std::optional stream, bool synchronize) = 0; virtual std::optional GetStream() const = 0; - virtual std::optional GetReadyEvent() const() = 0; + virtual std::optional GetReadyEvent() const = 0; virtual cudaEvent_t GetOrCreateReadyEvent() = 0; + + static RefCountedPtr Create(daliBufferPlacement_t placement); }; -struct TensorListDeleter { +struct BufferDeleter { daliDeleter_t deleter; AccessOrder deletion_order; @@ -71,7 +84,7 @@ struct TensorListDeleter { deletion_order.is_device() ? &stream : nullptr); } if (deleter.destroy_context) { - deleter.destroy_context(deleter.destroy_context); + deleter.destroy_context(deleter.deleter_ctx); } } }; @@ -86,38 +99,90 @@ class TensorListWrapper : public TensorListInterface { int ndim, daliDataType_t dtype, const int64_t *shapes) override { - tl_->Resize(TensorListShape<>(make_cspan(shapes, num_samples*ndim), num_samples, ndim), dtype); + std::vector shape_data(shapes, shapes + ndim * num_samples); + tl_->Resize(TensorListShape<>(shape_data, num_samples, ndim), dtype); } void AttachBuffer( int num_samples, int ndim, daliDataType_t dtype, + const char *layout, const int64_t *shapes, void *data, const ptrdiff_t *sample_offsets, daliDeleter_t deleter) override { + + if (num_samples < 0) + throw std::invalid_argument("The number of samples must not be negative."); + if (ndim < 0) + throw std::invalid_argument("The number of dimensions must not be negative."); + if (!shapes && ndim >= 0) + throw std::invalid_argument("The `shapes` are required for non-scalar (ndim>=0) samples."); + if (!data && num_samples > 0) { + for (int i = 0; i < num_samples; i++) { + auto sample_shape = make_cspan(&shapes[i*ndim], ndim); + for (int j = 0; j < ndim; j++) + if (sample_shape[j] < 0) + throw std::invalid_argument(make_string( + "Negative extent encountered in the shape of sample ", i, ". Offending shape: ", + TensorShape<-1>(sample_shape))); + if (volume(sample_shape) > 0) + throw std::invalid_argument( + "The pointer to the data buffer must not be null for a non-empty tensor list."); + if (sample_offsets && sample_offsets[i]) + throw std::invalid_argument( + "All sample_offsets must be zero when the data pointer is NULL."); + } + } + + TensorLayout new_layout = {}; + + if (!layout) { + if (ndim == tl_->sample_dim()) + new_layout = tl_->GetLayout(); + } else { + new_layout = layout; + if (new_layout.ndim() != ndim) + throw std::invalid_argument(make_string( + "The layout '", new_layout, "' cannot describe ", ndim, "-dimensional data.")); + } + tl_->Reset(); tl_->SetSize(num_samples); tl_->set_sample_dim(ndim); - ptridff_t next_offset = 0; + tl_->SetLayout(new_layout); + ptrdiff_t next_offset = 0; auto type_info = TypeTable::GetTypeInfo(dtype); auto element_size = type_info.size(); - std::shared_ptr buffer; + + std::shared_ptr buffer; if (!deleter.delete_buffer && !deleter.destroy_context) { - buffer.reset(buffer, [](void *){}); + buffer = std::shared_ptr(data, [](void *){}); } else { - buffer.reset(buffer, TensorListDeleter{deleter, order()}); + buffer = std::shared_ptr(data, BufferDeleter{deleter, tl_->order()}); } + for (int i = 0; i < num_samples; i++) { - TensorShape<> sample_shape(make_cspan(&shapes[i*ndim]. ndim)); + TensorShape<> sample_shape(make_cspan(&shapes[i*ndim], ndim)); void *sample_data; + size_t sample_bytes = volume(sample_shape) * element_size; if (sample_offsets) { sample_data = static_cast(data) + sample_offsets[i]; } else { sample_data = static_cast(data) + next_offset; - next_offset += volme(sample_shape) * element_size; + next_offset += sample_bytes; } + tl_->SetSample( + i, + std::shared_ptr(buffer, sample_data), + sample_bytes, + tl_->is_pinned(), + sample_shape, + dtype, + tl_->device_id(), + tl_->order(), + new_layout); } } @@ -125,24 +190,154 @@ class TensorListWrapper : public TensorListInterface { int num_samples, int ndim, daliDataType_t dtype, + const char *layout, const daliTensorDesc_t *samples, const daliDeleter_t *sample_deleters) { + if (num_samples < 0) + throw std::invalid_argument("The number of samples must not be negative."); + if (num_samples > 0 && !samples) + throw std::invalid_argument("The pointer to sample descriptors must not be NULL."); + if (ndim < 0) { + if (num_samples == 0) + throw std::invalid_argument( + "The number of dimensions must not be negative when num_samples is 0."); + else + ndim = samples[0].ndim; + } + + for (int i = 0; i < num_samples; i++) { + if (samples[i].ndim != ndim) + throw std::invalid_argument(make_string( + "Invalid `ndim` at sample ", i, ": got ", samples[i].ndim, ", expected ", ndim, ".")); + if (ndim && !samples[i].shape) + throw std::invalid_argument(make_string("Got NULL shape in sample ", i, ".")); + + for (int j = 0; j < ndim; j++) + if (samples[i].shape[j] < 0) { + TensorShape<> sample_shape(make_cspan(samples[i].shape, samples[i].ndim)); + throw std::invalid_argument(make_string( + "Negative extent encountered in the shape of sample ", i, ". Offending shape: ", + sample_shape)); + } + + if (!samples[i].data && volume(make_cspan(samples[i].shape, ndim))) + throw std::invalid_argument(make_string( + "Got NULL data pointer in a non-empty sample ", i, ".")); + } + + TensorLayout new_layout = {}; + + if (!layout) { + if (ndim == tl_->sample_dim()) + new_layout = tl_->GetLayout(); + } else { + new_layout = layout; + if (new_layout.ndim() != ndim) + throw std::invalid_argument(make_string( + "The layout '", new_layout, "' cannot describe ", ndim, "-dimensional data.")); + } + + tl_->Reset(); + tl_->SetSize(num_samples); + tl_->set_sample_dim(ndim); + tl_->SetLayout(new_layout); + + auto deletion_order = tl_->order(); + + auto type_info = TypeTable::GetTypeInfo(dtype); + auto element_size = type_info.size(); + for (int i = 0; i < num_samples; i++) { + TensorShape<> sample_shape(make_cspan(samples[i].shape, samples[i].ndim)); + size_t sample_bytes = volume(sample_shape) * element_size; + std::shared_ptr sample_ptr; + if (sample_deleters) { + sample_ptr = std::shared_ptr( + samples[i].data, + BufferDeleter{sample_deleters[i], deletion_order}); + } else { + sample_ptr = std::shared_ptr(samples[i].data, [](void*) {}); + } + tl_->SetSample( + i, + sample_ptr, + sample_bytes, + tl_->is_pinned(), + sample_shape, + dtype, + tl_->device_id(), + tl_->order(), + new_layout); + } } - virtual daliBufferPlacement_t GetBufferPlacement() const = 0; + daliBufferPlacement_t GetBufferPlacement() const override { + daliBufferPlacement_t placement; + placement.device_id = tl_->device_id(); + StorageDevice dev = backend_to_storage_device::value; + placement.device_type = static_cast(dev); + placement.pinned = tl_->is_pinned(); + return placement; + } - virtual void SetStream(std::optional stream, bool synchronize) = 0; + void SetStream(std::optional stream, bool synchronize) override { + tl_->set_order(stream.has_value() ? AccessOrder(*stream) : AccessOrder::host(), synchronize); + } - virtual std::optional GetStream() const = 0; + void SetLayout(const char *layout_string) { + if (layout_string) { + TensorLayout layout(layout_string); + if (layout.ndim() != tl_->sample_dim()) + throw std::invalid_argument(make_string( + "The layout '", layout, "' cannot describe ", tl_->sample_dim(), "-dimensional data.")); + tl_->SetLayout(layout); + } else { + tl_->SetLayout(""); + } + } - virtual std::optional GetReadyEvent() const() = 0; + const char *GetLayout() const override { + auto &layout = tl_->GetLayout(); + return !layout.empty() ? layout.data() : nullptr; + } - virtual cudaEvent_t GetOrCreateReadyEvent() = 0; + std::optional GetStream() const override { + auto o = tl_->order(); + if (o.is_device()) + return o.stream(); + else + return std::nullopt; + } + + std::optional GetReadyEvent() const override { + auto &e = tl_->ready_event(); + if (e) + return e.get(); + else + return std::nullopt; + } + + cudaEvent_t GetOrCreateReadyEvent() override { + auto &e = tl_->ready_event(); + if (e) + return e.get(); + int device_id = tl_->device_id(); + if (device_id < 0) + throw std::runtime_error("The tensor list is not associated with a CUDA device."); + tl_->set_ready_event(CUDASharedEvent::Create(device_id)); + return tl_->ready_event().get(); + } private: - std::shared_ptr> impl_; + std::shared_ptr> tl_; }; +template +RefCountedPtr> Wrap(std::shared_ptr> tl) { + return RefCountedPtr>(new TensorListWrapper(std::move(tl))); +} + +TensorListInterface *ToPointer(daliTensorList_h handle); + } // namespace c_api } // namespace dali diff --git a/dali/c_api_2/data_objects_test.cc b/dali/c_api_2/data_objects_test.cc new file mode 100644 index 0000000000..053fbf16e0 --- /dev/null +++ b/dali/c_api_2/data_objects_test.cc @@ -0,0 +1,71 @@ +// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "dali/c_api_2/data_objects.h" +#include +#include "dali/c_api_2/managed_handle.h" + +TEST(CAPI2_TensorListTest, NullHandle) { + daliTensorList_h h = nullptr; + int ref = 0; + EXPECT_EQ(DALI_ERROR_INVALID_HANDLE, daliTensorListIncRef(h, &ref)); + EXPECT_EQ(DALI_ERROR_INVALID_HANDLE, daliTensorListDecRef(h, &ref)); + EXPECT_EQ(DALI_ERROR_INVALID_HANDLE, daliTensorListRefCount(h, &ref)); +} + +TEST(CAPI2_TensorListTest, CreateDestroy) { + daliBufferPlacement_t placement{}; + placement.device_type = DALI_STORAGE_CPU; + placement.pinned = false; + daliTensorList_h h = nullptr; + daliResult_t r = daliTensorListCreate(&h, placement); + ASSERT_NE(h, nullptr); + dali::c_api::TensorListHandle tl(h); + ASSERT_EQ(h, tl.get()); + ASSERT_EQ(r, DALI_SUCCESS); + + int ref = -1; + EXPECT_EQ(daliTensorListRefCount(h, &ref), DALI_SUCCESS); + EXPECT_EQ(ref, 1); + ref = -1; + + h = tl.release(); + EXPECT_EQ(daliTensorListDecRef(h, &ref), DALI_SUCCESS); + EXPECT_EQ(ref, 0); +} + +inline auto CreateTensorList(daliBufferPlacement_t placement) { + auto tl = dali::c_api::TensorListInterface::Create(placement); + return dali::c_api::TensorListHandle(tl.release()); +} + +TEST(CAPI2_TensorListTest, Resize) { + daliBufferPlacement_t placement{}; + placement.device_type = DALI_STORAGE_GPU; + auto tl = CreateTensorList(placement); + int64_t shapes[] = { + 480, 640, 3, + 600, 800, 3, + 348, 720, 1, // + }; + EXPECT_EQ(daliTensorListResize(tl, 4, 3, DALI_UINT32, nullptr), DALI_ERROR_INVALID_ARGUMENT); + EXPECT_EQ(daliTensorListResize(tl, -1, 3, DALI_UINT32, shapes), DALI_ERROR_INVALID_ARGUMENT); + EXPECT_EQ(daliTensorListResize(tl, 4, -1, DALI_UINT32, shapes), DALI_ERROR_INVALID_ARGUMENT); + shapes[0] = -1; + EXPECT_EQ(daliTensorListResize(tl, 4, 3, DALI_UINT32, shapes), DALI_ERROR_INVALID_ARGUMENT); + shapes[0] = 480; + EXPECT_EQ(daliTensorListResize(tl, 4, 3, DALI_UINT32, shapes), DALI_SUCCESS); + + +} diff --git a/dali/c_api_2/error_handling.h b/dali/c_api_2/error_handling.h index 30482e0aed..9101a448dd 100644 --- a/dali/c_api_2/error_handling.h +++ b/dali/c_api_2/error_handling.h @@ -32,6 +32,12 @@ class InvalidHandle : public std::invalid_argument { InvalidHandle(const char *what) : std::invalid_argument(what) {} }; +inline InvalidHandle NullHandle() { return InvalidHandle("The handle must not be NULL."); } + +inline InvalidHandle NullHandle(const char *what_handle) { + return InvalidHandle(make_string("The ", what_handle, " handle must not be NULL.")); +} + } // namespace c_api } // namespace dali diff --git a/dali/c_api_2/managed_handle.h b/dali/c_api_2/managed_handle.h new file mode 100644 index 0000000000..b230df8f68 --- /dev/null +++ b/dali/c_api_2/managed_handle.h @@ -0,0 +1,122 @@ + // Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef DALI_C_API_2_MANAGED_HANDLE_H_ +#define DALI_C_API_2_MANAGED_HANDLE_H_ + +#include "dali/dali.h" +#include "dali/core/unique_handle.h" + +namespace dali::c_api { + +template +class RefCountedHandle { + public: + using handle_type = HandleType; + static constexpr handle_type null_handle() { return 0; } + + constexpr RefCountedHandle() : handle_(Actual::null_handle()) {} + constexpr explicit RefCountedHandle(handle_type h) : handle_(h) {} + ~RefCountedHandle() { reset(); } + + RefCountedHandle(const RefCountedHandle &h) { + handle_ = h.handle_; + if (*this) + Actual::IncRef(handle_); + } + + RefCountedHandle(RefCountedHandle &&h) noexcept { + handle_ = h.handle_; + h.handle_ = Actual::null_handle(); + } + + RefCountedHandle &operator=(const RefCountedHandle &other) { + if (other.handle_) { + Actual::IncRef(other.handle_); + } + reset(); + handle_ = other.handle_; + return *this; + } + + RefCountedHandle &operator=(RefCountedHandle &&other) noexcept { + std::swap(handle_, other.handle_); + other.reset(); + return *this; + } + + void reset() noexcept { + if (*this) + Actual::DecRef(handle_); + handle_ = Actual::null_handle(); + } + + [[nodiscard]] handle_type release() noexcept { + auto h = handle_; + handle_ = Actual::null_handle(); + return h; + } + + handle_type get() const noexcept { return handle_; } + operator handle_type() const noexcept { return get(); } + + explicit operator bool() const noexcept { return handle_ != Actual::null_handle(); } + + private: + handle_type handle_; +}; + +#define DALI_C_UNIQUE_HANDLE(Resource) \ +class Resource##Handle : public dali::UniqueHandle { \ + public: \ + using UniqueHandle::UniqueHandle; \ + static void DestroyHandle(dali##Resource##_h h) { \ + auto result = dali##Resource##Destroy(h); \ + if (result != DALI_SUCCESS) { \ + throw std::runtime_error(daliGetLastErrorMessage()); \ + } \ + } \ +} + +#define DALI_C_REF_HANDLE(Resource) \ +class Resource##Handle \ +: public dali::c_api::RefCountedHandle { \ + public: \ + using RefCountedHandle::RefCountedHandle; \ + static int IncRef(dali##Resource##_h h) { \ + int ref = 0; \ + auto result = dali##Resource##IncRef(h, &ref); \ + if (result != DALI_SUCCESS) { \ + throw std::runtime_error(daliGetLastErrorMessage()); \ + } \ + return ref; \ + } \ + static int DecRef(dali##Resource##_h h) { \ + int ref = 0; \ + auto result = dali##Resource##DecRef(h, &ref); \ + if (result != DALI_SUCCESS) { \ + throw std::runtime_error(daliGetLastErrorMessage()); \ + } \ + return ref; \ + } \ +} + +DALI_C_UNIQUE_HANDLE(Pipeline); +DALI_C_UNIQUE_HANDLE(PipelineOutputs); +DALI_C_REF_HANDLE(TensorList); + + +} // namespace dali::c_api + +#endif // DALI_C_API_2_MANAGED_HANDLE_H_ diff --git a/dali/c_api_2/ref_counting.h b/dali/c_api_2/ref_counting.h index 611f4bf8e0..11df801388 100644 --- a/dali/c_api_2/ref_counting.h +++ b/dali/c_api_2/ref_counting.h @@ -15,10 +15,102 @@ #ifndef DALI_C_API_2_REF_COUNTING_H_ #define DALI_C_API_2_REF_COUNTING_H_ +#include +#include +#include + namespace dali::c_api { class RefCountedObject { public: + int IncRef() noexcept { + return std::atomic_fetch_add_explicit(&ref_, 1, std::memory_order_relaxed) + 1; + } + + int DecRef() noexcept { + int ret = std::atomic_fetch_sub_explicit(&ref_, 1, std::memory_order_acq_rel) - 1; + if (!ret) + delete this; + return ret; + } + + int RefCount() const noexcept { + return ref_.load(std::memory_order_relaxed); + } + + virtual ~RefCountedObject() = default; + private: + std::atomic ref_{1}; +}; + +template +class RefCountedPtr { + public: + constexpr RefCountedPtr() noexcept = default; + + explicit RefCountedPtr(T *ptr, bool inc_ref = false) noexcept : ptr_(ptr) { + if (inc_ref && ptr_) + ptr_->IncRef(); + } + + ~RefCountedPtr() { + reset(); + } + + template , int> = 0> + RefCountedPtr(const RefCountedPtr &other) noexcept : ptr_(other.ptr_) { + if (ptr_) + ptr_->IncRef(); + } + + template , int> = 0> + RefCountedPtr(RefCountedPtr &&other) noexcept : ptr_(other.ptr_) { + other.ptr_ = nullptr; + } + + template + std::enable_if_t, RefCountedPtr> & + operator=(const RefCountedPtr &other) noexcept { + if (ptr_ == other.ptr_) + return *this; + if (other.ptr_) + other.ptr_->IncRef(); + ptr_->DecRef(); + ptr_ = other.ptr_; + return *this; + } + + template + std::enable_if_t, RefCountedPtr> & + operator=(RefCountedPtr &&other) noexcept { + if (&other == this) + return *this; + std::swap(ptr_, other.ptr_); + other.reset(); + } + + void reset() noexcept { + if (ptr_) + ptr_->DecRef(); + ptr_= nullptr; + } + + [[nodiscard]] T *release() noexcept { + T *p = ptr_; + ptr_ = nullptr; + return p; + } + + constexpr T *operator->() const & noexcept { return ptr_; } + + constexpr T &operator*() const & noexcept { return *ptr_; } + + constexpr T *get() const & noexcept { return ptr_; } + + private: + template + friend class RefCountedPtr; + T *ptr_ = nullptr; }; } // namespace dali::c_api diff --git a/include/dali/dali.h b/include/dali/dali.h index 0d62e26beb..5b2877943b 100644 --- a/include/dali/dali.h +++ b/include/dali/dali.h @@ -281,6 +281,9 @@ DALI_API daliResult_t daliPipelineCreate( daliPipeline_h *out_pipe_handle, const daliPipelineParams_t *params); +/** Destroys a DALI pipeline. */ +DALI_API daliResult_t daliPipelineDestroy(daliPipeline_h pipeline); + /** Creates a DALI pipeline from a serialized one. * * This function creates and deserializes a pipeline. The parameters are used to override @@ -555,6 +558,8 @@ typedef struct _DALIBufferPlacement { daliStorageDevice_t device_type; /** CUDA device ordinal, as returned by CUDA runtime API. + * + * The value of this field is meaningful only if `device_type` is GPU or `pinned` is `true`. * * WARNING: The device_id returned by NVML (and thus, nvidia-smi) may be different. */ @@ -596,6 +601,10 @@ DALI_API daliResult_t daliTensorListResize( * @param num_samples the number of samples in the list * @param ndim the number of dimensions in the sample * @param dtype the element type + * @param layout a layout string describing the order of axes in each sample (e.g. HWC), + * if NULL, and the TensorList's number of dimensions is equal to `ndim, + * then the current layout is kept; + * if `layout` is an empty string, the tensor list's layout is cleared * @param shapes the concatenated shapes of the samples; * must contain num_samples*ndim extents * @param data the pointer to the data buffer @@ -608,6 +617,7 @@ DALI_API daliResult_t daliTensorListAttachBuffer( int num_samples, int ndim, daliDataType_t dtype, + const char *layout, const int64_t *shapes, void *data, const ptrdiff_t *sample_offsets, @@ -629,6 +639,10 @@ DALI_API daliResult_t daliTensorListAttachBuffer( * @param dtype the type of the element of the tensor; * if dtype is DALI_NO_TYPE, then the type is taken from samples[0].dtype; * if set, the dtype in the samples can be left at -1 + * @param layout a layout string describing the order of axes in each sample (e.g. HWC), + * if NULL, and the TensorList's number of dimensions is equal to `ndim, + * then the current layout is kept; + * if `layout` is an empty string, the tensor list's layout is cleared * @param samples the descriptors of the tensors to be attached to the TensorList; * the `ndim` and `dtype` of the samples must match and they must match the * values of `ndim` and `dtype` parameters. @@ -642,6 +656,7 @@ DALI_API daliResult_t daliTensorListAttachSamples( int num_samples, int ndim, daliDataType_t dtype, + const char *layout, const daliTensorDesc_t *samples, const daliDeleter_t *sample_deleters); @@ -699,6 +714,7 @@ DALI_API daliResult_t daliTensorListGetReadyEvent( * * The function ensures that a readiness event is associated with the tensor list. * It can also get the event handle, if the output parameter pointer is not NULL. + * The function fails if the tensor list is not associated with a CUDA device. */ DALI_API daliResult_t daliTensorListGetOrCreateReadyEvent( daliTensorList_h tensor_list,