From 3b841389ef536cf3ee3d8dc80e2b7afd68fd6878 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Zientkiewicz?= Date: Fri, 31 Jan 2025 17:37:07 +0100 Subject: [PATCH 1/5] Add C API header and C language build test. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Zientkiewicz --- dali/core/c_api_language_test.c | 16 + include/dali/dali.h | 1063 +++++++++++++++++++++++++++++++ 2 files changed, 1079 insertions(+) create mode 100644 dali/core/c_api_language_test.c create mode 100644 include/dali/dali.h diff --git a/dali/core/c_api_language_test.c b/dali/core/c_api_language_test.c new file mode 100644 index 00000000000..a3526353a18 --- /dev/null +++ b/dali/core/c_api_language_test.c @@ -0,0 +1,16 @@ +// Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#define DALI_ALLOW_NEW_C_API +#include "dali/dali.h" diff --git a/include/dali/dali.h b/include/dali/dali.h new file mode 100644 index 00000000000..5b174998429 --- /dev/null +++ b/include/dali/dali.h @@ -0,0 +1,1063 @@ +// Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef DALI_DALI_H_ +#define DALI_DALI_H_ + +#ifdef DALI_C_API_H_ +#error The new DALI C API is incompatible with the old one. Please do not include both headers in one translation unit. // NOLINT +#endif + +#ifndef DALI_ALLOW_NEW_C_API +#error The new DALI C API is work in progress and incomplete. +#endif + +#if (defined(__cplusplus) && __cplusplus < 201402L) || \ + (!defined(__cplusplus) && __STDC_VERSION__ < 199901L) +#error The DALI C API requires a C99 or a C++14 compiler. +#endif + +#include +#include +#include "dali/core/api_helper.h" +#include "dali/core/dali_data_type.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define DALI_API DLL_PUBLIC + +typedef struct _DALIPipeline *daliPipeline_h; +typedef struct _DALIPipelineOutputs *daliPipelineOutputs_h; +typedef struct _DALITensor *daliTensor_h; +typedef struct _DALITensorList *daliTensorList_h; + +typedef enum { + DALI_STORAGE_CPU = 0, + DALI_STORAGE_GPU = 1, + DALI_STORAGE_FORCE_INT32 = 0x7fffffff +} daliStorageDevice_t; + +/** Error codes returned by DALI functions */ +typedef enum { + /** The call succeeded */ + DALI_SUCCESS = 0, + /** The call succeeded, but didn't return a value */ + DALI_NO_DATA = 1, + /** The call succeeded, but the queried object is not ready */ + DALI_NOT_READY = 2, + + DALI_ERROR = (int32_t)0x80000000, // NOLINT + /** The handle is not valid. */ + DALI_ERROR_INVALID_HANDLE, + /** The argument is invalid. Check error message for details. */ + DALI_ERROR_INVALID_ARGUMENT, + /** An invalid type was specified. */ + DALI_ERROR_INVALID_TYPE, + /** A generaic user error */ + DALI_ERROR_INVALID_OPERATION, + /** The index is out of valid range */ + DALI_ERROR_OUT_OF_RANGE, + /** The key is not found (when getting) or is not a valid key (when setting) */ + DALI_ERROR_INVALID_KEY, + + /** An operating system routine failed. */ + DALI_ERROR_SYSTEM, + /** A path to a file or other OS resource is invalid */ + DALI_ERROR_PATH_NOT_FOUND, + /** An I/O operation failed */ + DALI_ERROR_IO_ERROR, + /** An operation timed out */ + DALI_ERROR_TIMEOUT, + + /** A memory allocation failed */ + DALI_ERROR_OUT_OF_MEMORY = DALI_ERROR + 0x100, + + /** Internal error - logic error in DALI code */ + DALI_ERROR_INTERNAL = DALI_ERROR + 0x200, + /** The library is shutting down or has shut down */ + DALI_ERROR_UNLOADING, + + /** A CUDA API call has failed */ + DALI_ERROR_CUDA_ERROR = DALI_ERROR + 0x10000, + + DALI_ERROR_FORCE_INT32 = 0x7fffffff +} daliResult_t; + +/** A custom deleter + * + * This object aggregates a custom memory deleter, a context and a destructor. + * + * NOTE: This structure is typically passed by value for convenience. + */ +typedef struct _DALIDeleter { + /** A custom user-provided context. + * + * If the deleter is an object, then `deleter_ctx` is its `this` pointer. + * Stateless deleters may set it to NULL. + */ + void *deleter_ctx; + + /** Destroys the user-provided context. + * + * This function is called by DALI when the deleter is no longer necessary. + * The call is omitted if either `deleter_ctx` or `destroy_context` is NULL. + * + * @param deleter_ctx a custom user-provided context for the deleter + */ + void (*destroy_context)(void *deleter_ctx); + + /** Deletes a memory buffer `data`. + * + * @param deleter_ctx a custom user-provided context for the deleter + * @param data the buffer to delete + * @param stream If present, the deletion must be ordered after all operations + * scheduled in *stream; the deleter may either use stream-ordered deletion + * or otherwise ensure that the memory is valid until all operations scheduled + * on *stream prior to the call are complete. + * No operations in any stream scheduled after this call may use `data`. + */ + void (*delete_buffer)(void *deleter_ctx, void *data, const cudaStream_t *stream); +} daliDeleter_t; + +/** Returns the last error code. + * + * Returns the error code associate with the recent unsuccessful call in the calling thread. + * Succesful calls do not overwrite the value. + */ +DALI_API daliResult_t daliGetLastError(); + +/** Returns the last error message. + * + * Returns the detailed, context-specific message associated with the recent unsuccessful call + * in the callling thread. + * Succesful calls do not overwrite the value. + * The pointer is invalidated by intervening DALI calls in the same thread. + */ +DALI_API const char *daliGetLastErrorMessage(); + +/** Clears the last error for the calling thread. */ +DALI_API void daliClearLastError(); + +/** Returns a human-readable name of a given error + * + * The value is a pointer to a string literal. It's not invalidated other than by unloading DALI. + */ +DALI_API const char *daliGetErrorName(daliResult_t error); + +/** Returns a human-readable description of a given error. + * + * The value is a pointer to a string literal. It's not invalidated other than by unloading DALI. + */ +DALI_API const char *daliGetErrorDescription(daliResult_t error); + + +/** Initializes DALI or increments initialization count. + * + * @remark If this function is not called, DALI will be initialized implicitly on the first + * call to DALI APIs. When using implicit initialization, `daliShutdown` should not be used. + */ +DALI_API daliResult_t daliInit(); + +/** Decrements initialization counts and shuts down the library when the count reaches 0. + * + * Calling this function is optional. DALI will be shut down automatically when the program exits. + */ +DALI_API daliResult_t daliShutdown(); + +DALI_API daliResult_t daliPreallocateDeviceMemory2(size_t bytes, int device_id); + +/** Allocates `bytes` bytes of device memory on device `device_id`. + * + * The function works by allocating and immediately freeing the specified amount of device + * memory. This will typically release the memory back to DALI's memory pool, speeding up + * subsequent allocations. + */ +inline daliResult_t daliPreallocateDeviceMemory(size_t bytes, int device_id) { + return daliPreallocateDeviceMemory2(bytes, device_id); +} + +DALI_API daliResult_t daliPreallocatePinnedMemory2(size_t bytes); + +/** Allocates `bytes` bytes of device-accessible host memory. + * + * The function works by allocating and immediately freeing the specified amount of pinned + * memory. This will typically release the memory back to DALI's memory pool, speeding up + * subsequent allocations. + */ +inline daliResult_t daliPreallocatePinnedMemory(size_t bytes) { + return daliPreallocatePinnedMemory2(bytes); +} + +DALI_API daliResult_t daliReleaseUnusedMemory2(); + +/** Releases unused memory from DALI memory pools to the operating system. + * + * NOTE: Some of the memory pool implementations allocate memory from the OS in large chunks. + * If the chunk is occupied by even a tiny allocation, it will not be freed by this function. + */ +inline daliResult_t daliReleaseUnusedMemory() { + return daliReleaseUnusedMemory2(); +} + + +/****************************************************************************/ +/*** Pipeline API ***********************************************************/ +/****************************************************************************/ + +typedef enum _DALIExecType { + /** The exeuctor processes data ahead, overlapping CPU/Mixed/GPU operators */ + DALI_EXEC_IS_PIPELINED = 1, + /** The executor operates in thread(s) other than the one that calls the pipeline Run */ + DALI_EXEC_IS_ASYNC = 2, + /** Deprecated: The executor uses separate CPU/GPU queues */ + DALI_EXEC_IS_SEPARATED = 4, + /** Use dynamic executor, with unrestricted operator order and aggressive memory reuse */ + DALI_EXEC_IS_DYNAMIC = 8, + + /** Use a synchronous, non-pipelined executor; useful for debugging. */ + DALI_EXEC_SIMPLE = 0, + /** Use an asynchronous pipelined executor, the default one. */ + DALI_EXEC_ASYNC_PIPELINED = DALI_EXEC_IS_PIPELINED | DALI_EXEC_IS_ASYNC, + /** Use the dynamic executor. + * + * The dynamic executor offers more flexibility, better memory efficiency and unrestricted + * lifetime of the pipeline outputs at the expense of more overhead in simple pipelines. */ + DALI_EXEC_DYNAMIC = DALI_EXEC_ASYNC_PIPELINED | DALI_EXEC_IS_DYNAMIC, +} daliExecType_t; + +typedef struct _DALIVersion { + int16_t major, minor; + int32_t patch; +} daliVersion_t; + + +/** DALI Pipeline construction parameters */ +typedef struct _DALIPipelineParams { + /** The version of this structure */ + daliVersion_t version; + + struct { + uint64_t max_batch_size_present : 1; + uint64_t num_threads_present : 1; + uint64_t device_id_present : 1; + uint64_t seed_present : 1; + uint64_t exec_flags_present : 1; + uint64_t exec_type_present : 1; + uint64_t enable_checkpointing_present : 1; + uint64_t enable_memory_stats_present : 1; + }; + int batch_size; + int num_threads; + int device_id; + int64_t seed; + daliExecType_t exec_type; + daliBool enable_checkpointing; + daliBool enable_memory_stats; +} daliPipelineParams_t; + +/** Describes an output of a DALI Pipeline */ +typedef struct _DALIPipelineOutputDesc { + const char *name; + daliStorageDevice_t device; + struct { + unsigned dtype_present : 1; + unsigned ndim_present : 1; + }; + daliDataType_t dtype; + int ndim; +} daliPipelineOutputDesc_t; + +/** Creates an empty pipeline. */ +DALI_API daliResult_t daliPipelineCreate( + daliPipeline_h *out_pipe_handle, + const daliPipelineParams_t *params); + +/** Destroys a DALI pipeline. */ +DALI_API daliResult_t daliPipelineDestroy(daliPipeline_h pipeline); + +/** Creates a DALI pipeline from a serialized one. + * + * This function creates and deserializes a pipeline. The parameters are used to override + * the serialized ones. + * + * @param out_pipe_handle [out] points to a value which will receive the handle to the newly + * created pipeline + * @param serialized_pipeline [in] a raw memory buffer containing the pipeline as protobuf + * @param serialized_pipeline_length the length, in bytes, of the `serialized_pipeline` buffer + * @param param_overrides [in] contains (partial) pipeline construction parameters; + * the parameters specified in this structure override the corresponding + * parameters deserialized from the buffer. + */ +DALI_API daliResult_t daliPipelineDeserialize( + daliPipeline_h *out_pipe_handle, + const void *serialized_pipeline, + size_t serialized_pipeline_size, + const daliPipelineParams_t *param_overrides); + + +/** Prepares the pipeline for execution */ +DALI_API daliResult_t daliPipelineBuild(daliPipeline_h pipeline); + +/** Runs the pipeline to fill the queues. + * + * DALI Pipeline can process several iterations ahead. This function pre-fills the queues. + * If the pipeline has ExternalSource operators (or other external inputs), they need to be + * supplied with enough data. + * + * @see daliPipelineFeedInput + * @see daliPipelineGetInputFeedCount + * + * @retval DALI_SUCCESS + * @retval DALI_ERROR_INVALID_OPERATION + * @retval DALI_ERROR_OUT_OF_MEMORY + * + */ +DALI_API daliResult_t daliPipelinePrefetch(daliPipeline_h pipeline); + +/** Schedules one iteration. + * + * If the executor doesn't have DALI_EXEC_IS_ASYNC flag, the function will block until the + * operation is complete on host. + * + * NOTE: The relevant device code may still be running after this function returns. + * + * @retval DALI_SUCCESS + * @retval DALI_ERROR_INVALID_OPERATION + * @retval DALI_ERROR_OUT_OF_MEMORY + */ +DALI_API daliResult_t daliPipelineRun(daliPipeline_h pipeline); + +/** Gets the required feed count for the specified input of the pipeline. + * + * @param pipeline [in] The pipeline + * @param out_feed_count [out] The number of batches to feed into the specified input before + * `daliPipelinePrefetch` can be called. + * @param input_name [in] The name of the input. + * + * @retval DALI_SUCCESS + * @retval DALI_ERROR_INVALID_KEY if `input_name` is not a valid name of an input of the + * pipeline + */ +DALI_API daliResult_t daliPipelineGetFeedCount( + daliPipeline_h pipeline, + int *out_feed_count, + const char *input_name); + +typedef enum _DALIFeedInputFlags { + /** Do not make a copy of the input, use it directly instead. + * + * When daliTensorList_h is passed to daliFeedInput, a reference count is incremented + */ + DALI_FEED_INPUT_NO_COPY = 1, +} daliFeedInputFlags_t; + +/** Feeds the input `input_name` with data from `input_data`. + * + * @param pipeline the pipeline + * @param input_name the name of the input + * @param input_data the tensor list containing the data + * @param data_id an identifier of this data batch + * @param options + * + * @retval DALI_SUCCESS + * @retval DALI_ERROR_INVALID_KEY if `input_name` is not a valid name of an input of the + * pipeline*/ +DALI_API daliResult_t daliPipelineFeedInput( + daliPipeline_h pipeline, + const char *input_name, + daliTensorList_h input_data, + const char *data_id, + daliFeedInputFlags_t options, + const cudaStream_t *stream); + +/** Gets the number of pipeline outputs. + * + * @param pipeline [in] The pipeline + * @param out_count [out] A pointer to a place where the number of pipeline outputs is stored. + */ +DALI_API daliResult_t daliPipelineGetOutputCount(daliPipeline_h pipeline, int *out_count); + +/** Gets a descriptor of the specified pipeline output. + * + * @param pipeline [in] The pipeline + * @param out_desc [out] A pointer to the returned descriptor. + * @param index [in] The 0-based index of the output. See `daliPipelineGetOutputCount`. + * + * NOTE: The names returned by this function match those specified when defining the pipeline, + * but don't necessarily indicate the output operators. When building the pipeline, + * operators may be added (e.g. to guarantee dense storage of the outputs) or removed + * (in the process of graph optimization). + */ +DALI_API daliResult_t daliPipelineGetOutputDesc( + daliPipeline_h pipeline, + daliPipelineOutputDesc_t *out_desc, + int index); + + +/** Pops the pipeline outputs from the pipeline's output queue. + * + * The outputs are ready for use on any stream. + * When no longer used, the outputs must be freed by destroying the `daliPipelineOutput` object. + * + * @param pipeline [in] The pipeline whose outputs are to be obtained + * @param out [out] A pointer to the output handle. The handle is NULL if the function + * reports an error. + * + * @return This function will report errors that occurred asynchronously when preparing the + * relevant data batch. If an error is reported, the output handle is NULL. + * + */ +DALI_API daliResult_t daliPipelinePopOutputs(daliPipeline_h pipeline, daliPipelineOutputs_h *out); + +/** Pops the pipeline outputs from the pipeline's output queue. + * + * The outputs are ready for use on the provided stream. + * When no longer used, the outputs must be freed by destroying the daliPipelineOutput object. + * + * This function works only with DALI_EXEC_IS_DYNAMIC. + * + * @param pipeline [in] The pipeline whose outputs are to be obtained + * @param out [out] A pointer to the output handle. The handle is NULL if the function + * reports an error. + * + * @return This function will report errors that occurred asynchronously when preparing the + * relevant data batch. If an error is reported, the output handle is NULL. + */ +DALI_API daliResult_t daliPipelinePopOutputsAsync( + daliPipeline_h pipeline, + daliPipelineOutputs_h *out, + cudaStream_t stream); + +/** Releases the pipeline outputs. + * + * @param pipeline [in] The pipeline outputs which are being released. + * + * This function destroys the daliPipelineOutputObject. The availability of the outputs differs + * between different executors. + * If DALI_EXEC_IS_DYNAMIC is used, the outputs may be used until their handles are destroyed. + * Otherwise, the outputs must not be used after this call has been made. + * + * @warning When NOT using DALI_EXEC_IS_DYNAMIC, the maximum number of live daliPipelineOutputs_h + * obtained from a single pipeline must not exceed the prefetch_queue_depth. An attempt + * to run the pipeline again after the + */ +DALI_API daliResult_t daliPipelineOutputsDestroy(daliPipelineOutputs_h out); + +typedef struct _DALIOperatorTrace { + const char *operator_name; + const char *trace; + const char *value; +} daliOperatorTrace_t; + +/** Gets all operator "traces" that were set when producing this set of outputs. + * + * @param outputs [in] The outputs + * @param out_traces [out] A return value pointer where, the a pointer to the beginning of an + * array of operator traces is stored. + * @param out_trace_count [out] A pointer that receives the number of traces. + * + * The output array is valid until the `outputs` handle is destroyed. + */ +DALI_API daliResult_t daliPipelineOutputsGetTraces( + daliPipelineOutputs_h outputs, + const daliOperatorTrace_t **out_traces, + int *out_trace_count); + +/** Gets a single operator "trace", identified by operator instance name and a trace name. + * + * @param outputs [in] The outputs + * @param out_trace [out] A pointer which receives a ppointer to the trace. + * @param operator_name [in] The name of the operator whose trace is being obtained. + * @param trace_name [in] The name of the trace. + * + * @retval DALI_SUCCESS On success + * @retval DALI_ERROR_INVALID_KEY When there's no trace that matches the names + */ +DALI_API daliResult_t daliPipelineOutputsGetTrace( + daliPipelineOutputs_h outputs, + const char **out_trace, + const char *operator_name, + const char *trace_name); + +/** Gets index-th output. + * + * The handle returned by this function must be released with a call to daliTensorListDecRef. + * + * Unless the pipeline uses DALI_EXEC_IS_DYNAMIC flag, the returned tensor list must not be used + * after the `outputs` handle is destroyed. + * + * @param outputs [in] The pipeline outputs object + * @param out [out] A pointer to a TensorList handle + * @param index [in] The index of the output to get a handle to. + */ +DALI_API daliResult_t daliPipelineOutputsGet( + daliPipelineOutputs_h outputs, + daliTensorList_h *out, + int index); + +/****************************************************************************/ +/*** Tensor and TensorList API **********************************************/ +/****************************************************************************/ + +typedef struct _DALITensorDesc { + /** The number of dimensions of the tensor. + * + * 0 denotes a scalar value. Negative values are invalid. + */ + int ndim; + + /** The shape of the tensor. + * + * The shape starts with the "slowest" dimension - a row-major 640x480 interleaved RGB image + * would have the shape [480, 640, 3]. + * + * The shape can be NULL if ndim == 0 + */ + const int64_t *shape; + + /** The type of the elements of the tensor */ + daliDataType_t dtype; + + /** The layout string of the tensor. + * + * A layout string consists of exactly `ndim` single-character axis labels. The entries in layout + * correspond to the dimension in the shape. A row-major interleaved image + * would have a layout "HWC" + */ + const char *layout; + + /** A pointer to the first element in the tensor. + * + * The data pointer can be NULL if the total volume of the tensor is 0. + * It must not be NULL if ndim == 0. + */ + void *data; +} daliTensorDesc_t; + +/** The specification of the buffer storage location */ +typedef struct _DALIBufferPlacement { + /** The type of the storage device (CPU or GPU). */ + daliStorageDevice_t device_type; + + /** CUDA device ordinal, as returned by CUDA runtime API. + * + * The value of this field is meaningful only if `device_type` is GPU or `pinned` is `true`. + * + * WARNING: The device_id returned by NVML (and thus, nvidia-smi) may be different. + */ + int device_id; + + /** Whether the CPU storage is "pinned" - e.g. allocated with cudaMallocHost */ + daliBool pinned; +} daliBufferPlacement_t; + +/****************************************************************************/ +/*** TensorList *************************************************************/ +/****************************************************************************/ + +/** Creates a TensorList on the specified device */ +DALI_API daliResult_t daliTensorListCreate( + daliTensorList_h *out, + daliBufferPlacement_t placement); + +/** Changes the size of the tensor, allocating more data if necessary. + * + * @param num_samples the number of samples in the batch + * @param ndim the number of dimensions of a sample + * @param shapes the concatenated shapes of the samples; + * must contain num_samples*ndim extents + * @param dtype the element type + * @param layout a layout string describing the order of axes in each sample (e.g. HWC), + * if NULL, and the TensorList's number of dimensions is equal to `ndim, + * then the current layout is kept; + * if `layout` is an empty string, the tensor list's layout is cleared * + */ +DALI_API daliResult_t daliTensorListResize( + daliTensorList_h tensor_list, + int num_samples, + int ndim, + const int64_t *shapes, + daliDataType_t dtype, + const char *layout); + +/** Attaches an externally allocated buffer to a TensorList. + * + * Attaches an externally allocated buffer and a deleter to a TensorList. + * The deleter is called when the TensorList object is destroyed. + * + * The shape and sample offsets are used only during this function call and may be safely + * disposed of after the function returns. + * + * @param tensor_list the TensorList to attach the data to + * @param num_samples the number of samples in the list + * @param ndim the number of dimensions in the sample + * @param shapes the concatenated shapes of the samples; + * must contain num_samples*ndim extents + * @param dtype the element type + * @param layout a layout string describing the order of axes in each sample (e.g. HWC), + * if NULL, and the TensorList's number of dimensions is equal to `ndim, + * then the current layout is kept; + * if `layout` is an empty string, the tensor list's layout is cleared + * @param data the pointer to the data buffer + * @param sample_offsets optional; the offsets, in bytes, of the samples in the batch from the + * base pointer `data`; if NULL, the samples are assumed to be densely + * packed, with the 0-th sample starting at the address `data`. + * @param deleter an optional deleter called when the buffer reference count goes to zero + */ +DALI_API daliResult_t daliTensorListAttachBuffer( + daliTensorList_h tensor_list, + int num_samples, + int ndim, + const int64_t *shapes, + daliDataType_t dtype, + const char *layout, + void *data, + const ptrdiff_t *sample_offsets, + daliDeleter_t deleter); + +/** Attaches externally allocated tensors to a TensorList. + * + * Attaches externally allocated buffers to a TensorList. + * If provided, the deleters are called on all buffers when the samples are destroyed. + * + * The shape and sample offsets are used only during this function call and may be safely + * disposed of after the function returns. + * + * @param tensor_list the TensorList to attach the data to + * @param num_samples the new number of samples in the batch + * @param ndim the number of dimensions in each sample; + * if num_samples > 0, this value can be set to -1 and the number of + * dimensions will be taken from samples[0].ndim + * @param dtype the type of the element of the tensor; + * if dtype is DALI_NO_TYPE, then the type is taken from samples[0].dtype + * @param layout a layout string describing the order of axes in each sample (e.g. HWC), + * if NULL, and the TensorList's number of dimensions is equal to `ndim, + * then the current layout is kept; + * if `layout` is an empty string, the tensor list's layout is cleared + * @param samples the descriptors of the tensors to be attached to the TensorList; + * the `ndim` and `dtype` of the samples must match and they must match the + * values of `ndim` and `dtype` parameters. + * @param sample_deleters optional deleters, one for each sample + * + * NOTE: If the sample_deleters specify the same object multiple times, its destructor must + * internally use reference counting to avoid multiple deletion. + */ +DALI_API daliResult_t daliTensorListAttachSamples( + daliTensorList_h tensor_list, + int num_samples, + int ndim, + daliDataType_t dtype, + const char *layout, + const daliTensorDesc_t *samples, + const daliDeleter_t *sample_deleters); + + +/** Returns the placement of the TensorLists's underlying buffer. + * + * @param tensor_list [in] the TensorList whose buffer placement is queried + * @param out_placement [out] a pointer to a place where the return value is stored. + */ +DALI_API daliResult_t daliTensorListGetBufferPlacement( + daliTensorList_h tensor_list, + daliBufferPlacement_t *out_placement); + +/** Associates a stream with the TensorList. + * + * @param stream an optional CUDA stream handle; if the handle poitner is NULL, + * host-synchronous behavior is prescribed. + * @param synchronize if true, the new stream (or host, if NULL), will be synchronized with the + * currently associated stream + */ +DALI_API daliResult_t daliTensorListSetStream( + daliTensorList_h tensor_list, + const cudaStream_t *stream, + daliBool synchronize +); + +/** Gets the stream associated with the TensorList. + * + * @retval DALI_SUCCESS if the stream handle was stored in *out_stream + * @retval DALI_NO_DATA if the tensor list is not associated with any stream + * error code otherwise + */ +DALI_API daliResult_t daliTensorListGetStream( + daliTensorList_h tensor_list, + cudaStream_t *out_stream +); + +/** Gets the readiness event associated with the TensorList. + * + * @param tensor_list [in] the tenosr list whose ready event is to be obtained + * @param out_event [out] the pointer to the return value + * + * @retval DALI_SUCCESS if the ready event handle was stored in *out_event + * @retval DALI_NO_DATA if the tensor list is not associated with a readiness event + * error code otherwise + */ +DALI_API daliResult_t daliTensorListGetReadyEvent( + daliTensorList_h tensor_list, + cudaEvent_t *out_event); + +/** Gets the readiness event associated with the TensorList or creates a new one. + * + * @param tensor_list [in] the tensor list to associate an even twith + * @param out_event [out] optional, the event handle + * + * The function ensures that a readiness event is associated with the tensor list. + * It can also get the event handle, if the output parameter pointer is not NULL. + * The function fails if the tensor list is not associated with a CUDA device. + */ +DALI_API daliResult_t daliTensorListGetOrCreateReadyEvent( + daliTensorList_h tensor_list, + cudaEvent_t *out_event); + + +/** Gets the shape of the tensor list + * + * @param tensor_list [in] the tensor list whose shape to obtain + * @param out_num_samples [out] optional; the number of samples in the batch + * @param out_ndim [out] optional; the number of dimensions in a sample + * @param out_shape [out] optional; the pointer to the concatenated array of sample shapes; + * contains (*out_num_samples) * (*out_ndim) elements + * + * The pointer returned in `out_shape` remains valid until the TensorList is destroyed or modified. + * If the caller is not intersted in some of the values, the pointers can be NULL. + */ +DALI_API daliResult_t daliTensorListGetShape( + daliTensorList_h tensor_list, + int *out_num_samples, + int *out_ndim, + const int64_t **out_shape); + +/** Gets a layout string describing the samples in the TensorList. + * + * @param tensor_list [in] the tensor list whose layout to obtain + * @param out_layout [out] a pointer to the place where a pointer to the the layout string of + * the samples in the tensor list is stored + * + * When present, the layout string consists of exactly `sample_ndim` single-character _axis labels_. + * The layout does not contain the leading "sample" dimension (typically denoted as `N`), + * for example, a batch of images would typically have a "HWC" layout. + * The axis labels can be any character except the null character '\0'. + * If there's no layout set, the returned pointer is NULL. + * + * The pointer remains valid until the tensor list is destroyed, cleared, resized or its layout + * changed. + */ +DALI_API daliResult_t daliTensorListGetLayout( + daliTensorList_h tensor_list, + const char **out_layout); + +/** Sets the layout of the samples in the TensorList. + * + * Sets the axis labels that describe the layout of the data in the TensorList. The layout must not + * contain the leading sample dimension (typically `N`). For example, a batch of images would + * typically have a layout "HWC". + * If the layout string is NULL or empty, the layout is cleared; otherwise it must contain exactly + * sample_ndim nonzero characters. The axis labels don't have to be unique. + */ +DALI_API daliResult_t daliTensorListSetLayout( + daliTensorList_h tensor_list, + const char *layout +); + +/** Gets the "source info" metadata of a sample. + * + * Each sample can be associated with a "source info" string, which typically is the file name, + * but can also contain an index in a container, key, etc. + * + * @param tensor_list [in] The tensor list + * @param out_source_info [out] A pointer to a place where the pointer to the source_info string + * is stored. On success, `*out_source_info` contains a pointer to the + * beginning of a null-terminated string. If the sample doesn't have + * associated source info, a NULL pointer is returned. + * @param sample_idx [in] The index of a sample whose source info is queried. + * + * The return value is a string pointer. It is invalidated by destroying, clearing or resizing + * the TensorList as well as by assigning a new source info. + */ +DALI_API daliResult_t daliTensorListGetSourceInfo( + daliTensorList_h tensor_list, + const char **out_source_info, + int sample_idx); + +/** Gets the tensor descriptor of the specified sample. + * + * @param tensor_list [in] The tensor list + * @param out_desc [out] A poitner to a decriptor filled by this funciton. + * @param sample_idx [in] The index of the sample, whose descriptor to get. + * + * The descriptor stored in `out_desc` contains pointers. These pointers are invalidated by + * destroying, clearing or resizing the TensorList or re-attaching new data to it. + */ +DALI_API daliResult_t daliTensorListGetTensorDesc( + daliTensorList_h tensor_list, + daliTensorDesc_t *out_desc, + int sample_idx); + +/** Increments the reference count of the tensor list. + * + * @param tensor_list [in] A handle to the tensor list. + * @param new_count [out] If not NULL, the incremented reference count is returned in *new_count. + */ +DALI_API daliResult_t daliTensorListIncRef(daliTensorList_h tensor_list, int *new_count); + +/** Decrements the reference count of the tensor list. + * + * The handle is destroyed if the reference count reaches 0. + * When the client code no longer needs the handle, it must call daliTensorDecRef. + * + * + * @param tensor_list [in] A handle to the tensor list. + * @param new_count [out] If not NULL, the incremented reference count is returned in *new_count. + */ +DALI_API daliResult_t daliTensorListDecRef(daliTensorList_h tensor_list, int *new_count); + +/** Decrements the reference count of the tensor list */ +DALI_API daliResult_t daliTensorListRefCount(daliTensorList_h tensor_list, int *count); + +/** Views a TensorList as a Tensor. + * + * Creates a new Tensor that points to the same data as the TensorList. The samples in the + * TensorList must have a uniform shape and the data in the TensorList must be contiguous. + * + * The tensor holds a reference to the data in the TensorList - it is safe to destroy the + * TensorList and continue using the resulting Tensor. + * + * @retval DALI_SUCCESS on success + * @retval DALI_ERROR_INVALID_OPERATION if the data is not contiguous + * @retval DALI_ERROR_INVALID_HANDLE the tensor list handle is invalid + * @return DALI_ERROR_INVALID_ARGUMENT the tensor handle pointer is NULL + * @return DALI_ERROR_OUT_OF_MEMORY + */ +DALI_API daliResult_t daliTensorListViewAsTensor( + daliTensorList_h tensor_list, + daliTensor_h *out_tensor); + +/***************************************************************************/ +/*** Tensor ****************************************************************/ +/***************************************************************************/ + +/** Creates a Tensor on the specified device */ +DALI_API daliResult_t daliTensorCreate( + daliTensor_h *out, + daliBufferPlacement_t placement); + +/** Changes the size of the tensor, allocating more data if necessary. + * + * @param num_samples the number of samples in the batch + * @param ndim the number of dimensions of a sample + * @param shape the shape of the tensor; can be NULL if ndim == 0 + * @param dtype the element type + * @param layout a layout string describing the order of axes in the tensor (e.g. HWC), + * if NULL, and the Tensor's number of dimensions is equal to `ndim, + * then the current layout is kept; + * if `layout` is an empty string, the tensor's layout is cleared + */ +DALI_API daliResult_t daliTensorResize( + daliTensor_h tensor, + int ndim, + const int64_t *shape, + daliDataType_t dtype, + const char *layout); + +/** Attaches an externally allocated buffer to a Tensor. + * + * Attaches an externally allocated buffer and a deleter to a Tensor. + * The deleter is called when the Tensor object is destroyed. + * + * The shape and layout are used only during this function call and may be safely + * disposed of after the function returns. + * + * @param tensor the Tensor to attach the data to + * @param ndim the number of dimensions in the sample + * @param dtype the element type + * @param shape the shape of the tensor; ndim extents; can be NULL if ndim == 0 + * @param layout a layout string describing the order of axes in the tensor (e.g. HWC), + * if NULL, and the Tensor's number of dimensions is equal to `ndim, + * then the current layout is kept; + * if `layout` is an empty string, the tensor's layout is cleared + * @param data the pointer to the data buffer + * @param deleter the deleter to be called when the tensor is destroyed + */ +DALI_API daliResult_t daliTensorAttachBuffer( + daliTensor_h tensor, + int ndim, + const int64_t *shape, + daliDataType_t dtype, + const char *layout, + void *data, + daliDeleter_t deleter); + +/** Returns the placement of the Tensor's underlying buffer. + * + * @param tensor [in] the Tensor whose buffer placement is queried + * @param out_placement [out] a pointer to a place where the return value is stored. + */ +DALI_API daliResult_t daliTensorGetBufferPlacement( + daliTensor_h tensor, + daliBufferPlacement_t *out_placement); + +/** Associates a stream with the Tensor. + * + * @param stream an optional CUDA stream handle; if the handle poitner is NULL, + * host-synchronous behavior is prescribed. + * @param synchronize if true, the new stream (or host, if NULL), will be synchronized with the + * currently associated stream + */ +DALI_API daliResult_t daliTensorSetStream( + daliTensor_h tensor, + const cudaStream_t *stream, + daliBool synchronize +); + +/** Gets the stream associated with the Tensor. + * + * @retval DALI_SUCCESS if the stream handle was stored in *out_stream + * @retval DALI_NO_DATA if the tensor is not associated with any stream + * error code otherwise + */ +DALI_API daliResult_t daliTensorGetStream( + daliTensor_h tensor, + cudaStream_t *out_stream +); + +/** Gets the readiness event associated with the Tensor. + * + * @param tensor [in] the tenosr list whose ready event is to be obtained + * @param out_event [out] the pointer to the return value + * + * @retval DALI_SUCCESS if the ready event handle was stored in *out_event + * @retval DALI_NO_DATA if the tensor is not associated with a readiness event + * error code otherwise + */ +DALI_API daliResult_t daliTensorGetReadyEvent( + daliTensor_h tensor, + cudaEvent_t *out_event); + +/** Gets the readiness event associated with the Tensor or creates a new one. + * + * @param tensor [in] the tensor to associate an even twith + * @param out_event [out] optional, the event handle + * + * The function ensures that a readiness event is associated with the tensor. + * It can also get the event handle, if the output parameter pointer is not NULL. + * The function fails if the tensor is not associated with a CUDA device. + */ +DALI_API daliResult_t daliTensorGetOrCreateReadyEvent( + daliTensor_h tensor, + cudaEvent_t *out_event); + + +/** Gets the shape of the tensor + * + * @param tensor [in] the tensor whose shape to obtain + * @param out_ndim [out] optional; receives the number of dimensions + * @param out_shape [out] optional; receives the the pointer to the shape (array of extents) + * + * The pointer returned in `out_shape` remains valid until the Tensor is destroyed or modified. + * If the caller is not intersted in some of the values, the pointers can be NULL. + */ +DALI_API daliResult_t daliTensorGetShape( + daliTensor_h tensor, + int *out_ndim, + const int64_t **out_shape); + +/** Gets a layout string describing the data in the Tensor. + * + * @param tensor [in] the tensor whose layout to obtain + * @param out_layout [out] a pointer to the place where a pointer to the the layout string of + * the samples in the tensor is stored + * + * When present, the layout string consists of exactly `ndim` single-character _axis labels_. + * for example, an image would typically have a "HWC" layout. + * The axis labels can be any character except the null character '\0'. + * If there's no layout set, the returned pointer is NULL. + * + * The pointer remains valid until the tensor is destroyed, cleared, resized or its layout + * changed. + */ +DALI_API daliResult_t daliTensorGetLayout( + daliTensor_h tensor, + const char **out_layout); + +/** Sets the layout of the data in the Tensor. + * + * Sets the axis labels that describe the layout of the data in the Tensor. + * If the layout string is NULL or empty, the layout is cleared; otherwise it must contain exactly + * sample_ndim nonzero characters. The axis labels don't have to be unique. + */ +DALI_API daliResult_t daliTensorSetLayout( + daliTensor_h tensor, + const char *layout +); + +/** Gets the "source info" metadata of a tensor. + * + * A tensor can be associated with a "source info" string, which typically is the file name, + * but can also contain an index in a container, key, etc. + * + * @param tensor [in] The tensor + * @param out_source_info [out] A pointer to a place where the pointer to the source_info string + * is stored. On success, `*out_source_info` contains a pointer to the + * beginning of a null-terminated string. If the sample doesn't have + * associated source info, a NULL pointer is returned. + * + * The return value is a string pointer. It is invalidated by destroying, clearing or resizing + * the Tensor as well as by assigning a new source info. + */ +DALI_API daliResult_t daliTensorGetSourceInfo( + daliTensor_h tensor, + const char **out_source_info); + +/** Gets the descriptor of the data in the tensor. + * + * @param tensor [in] The tensor + * @param out_desc [out] A poitner to a decriptor filled by this funciton. + * + * The descriptor stored in `out_desc` contains pointers. These pointers are invalidated by + * destroying, clearing or resizing the Tensor or re-attaching new data to it. + */ +DALI_API daliResult_t daliTensorGetDesc( + daliTensor_h tensor, + daliTensorDesc_t *out_desc); + +/** Increments the reference count of the tensor. + * + * @param tensor [in] A handle to the tensor. + * @param new_count [out] If not NULL, the incremented reference count is returned in *new_count. + */ +DALI_API daliResult_t daliTensorIncRef(daliTensor_h tensor, int *new_count); + +/** Decrements the reference count of the tensor. + * + * The handle is destroyed if the reference count reaches 0. + * When the client code no longer needs the handle, it must call daliTensorDecRef. + * + * + * @param tensor [in] A handle to the tensor. + * @param new_count [out] If not NULL, the incremented reference count is returned in *new_count. + */ +DALI_API daliResult_t daliTensorDecRef(daliTensor_h tensor, int *new_count); + +/** Decrements the reference count of the tensor */ +DALI_API daliResult_t daliTensorRefCount(daliTensor_h tensor, int *count); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // DALI_DALI_H_ From be303856b25911f8a79e5df97fe85c5305d907d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Zientkiewicz?= Date: Sat, 1 Feb 2025 16:48:31 +0100 Subject: [PATCH 2/5] Fix documentation for XxxRefCount. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Zientkiewicz --- include/dali/dali.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/include/dali/dali.h b/include/dali/dali.h index 5b174998429..1d36adbfe0c 100644 --- a/include/dali/dali.h +++ b/include/dali/dali.h @@ -826,7 +826,11 @@ DALI_API daliResult_t daliTensorListIncRef(daliTensorList_h tensor_list, int *ne */ DALI_API daliResult_t daliTensorListDecRef(daliTensorList_h tensor_list, int *new_count); -/** Decrements the reference count of the tensor list */ +/** Reads the current reference count of the tensor list. + * + * @param tensor_list [in] A handle to the tensor list. + * @param count [out] The ouput parameter that receives the reference count. + */ DALI_API daliResult_t daliTensorListRefCount(daliTensorList_h tensor_list, int *count); /** Views a TensorList as a Tensor. @@ -1037,7 +1041,7 @@ DALI_API daliResult_t daliTensorGetDesc( /** Increments the reference count of the tensor. * - * @param tensor [in] A handle to the tensor. + * @param tensor [in] A handle to the tensor. * @param new_count [out] If not NULL, the incremented reference count is returned in *new_count. */ DALI_API daliResult_t daliTensorIncRef(daliTensor_h tensor, int *new_count); @@ -1048,12 +1052,16 @@ DALI_API daliResult_t daliTensorIncRef(daliTensor_h tensor, int *new_count); * When the client code no longer needs the handle, it must call daliTensorDecRef. * * - * @param tensor [in] A handle to the tensor. + * @param tensor [in] A handle to the tensor. * @param new_count [out] If not NULL, the incremented reference count is returned in *new_count. */ DALI_API daliResult_t daliTensorDecRef(daliTensor_h tensor, int *new_count); -/** Decrements the reference count of the tensor */ +/** Reads the current reference count of the tensor. + * + * @param tensor [in] A handle to the tensor. + * @param count [out] The ouput parameter that receives the reference count. + */ DALI_API daliResult_t daliTensorRefCount(daliTensor_h tensor, int *count); #ifdef __cplusplus From d159abfadbdeedfcd59a083ace8f37ae7ebf3a2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Zientkiewicz?= Date: Fri, 31 Jan 2025 17:42:08 +0100 Subject: [PATCH 3/5] C API error handling. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Zientkiewicz --- dali/CMakeLists.txt | 3 +- dali/c_api_2/CMakeLists.txt | 21 ++++ dali/c_api_2/c_api_internal_test.cc | 75 ++++++++++++ dali/c_api_2/error_handling.cc | 182 ++++++++++++++++++++++++++++ dali/c_api_2/error_handling.h | 68 +++++++++++ dali/c_api_2/init.cc | 69 +++++++++++ 6 files changed, 417 insertions(+), 1 deletion(-) create mode 100644 dali/c_api_2/CMakeLists.txt create mode 100644 dali/c_api_2/c_api_internal_test.cc create mode 100644 dali/c_api_2/error_handling.cc create mode 100644 dali/c_api_2/error_handling.h create mode 100644 dali/c_api_2/init.cc diff --git a/dali/CMakeLists.txt b/dali/CMakeLists.txt index 4b4a3fd547d..e126c1d278e 100644 --- a/dali/CMakeLists.txt +++ b/dali/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2017-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright (c) 2017-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -46,6 +46,7 @@ if (BUILD_DALI_PIPELINE) add_subdirectory(util) add_subdirectory(plugin) add_subdirectory(c_api) + add_subdirectory(c_api_2) endif() if(BUILD_DALI_OPERATORS) diff --git a/dali/c_api_2/CMakeLists.txt b/dali/c_api_2/CMakeLists.txt new file mode 100644 index 00000000000..77dd70d568a --- /dev/null +++ b/dali/c_api_2/CMakeLists.txt @@ -0,0 +1,21 @@ +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Get all the source files + +# The headers here are private and should not be installed. +# collect_headers(DALI_INST_HDRS PARENT_SCOPE) + +collect_sources(DALI_SRCS PARENT_SCOPE) +collect_test_sources(DALI_TEST_SRCS PARENT_SCOPE) diff --git a/dali/c_api_2/c_api_internal_test.cc b/dali/c_api_2/c_api_internal_test.cc new file mode 100644 index 00000000000..1752c58d3af --- /dev/null +++ b/dali/c_api_2/c_api_internal_test.cc @@ -0,0 +1,75 @@ +// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#define DALI_ALLOW_NEW_C_API +#include "dali/dali.h" +#include "dali/c_api_2/error_handling.h" +#include "dali/core/cuda_error.h" + +namespace dali { + +template +daliResult_t ThrowAndTranslate(ExceptionType &&ex) { + DALI_PROLOG(); + throw std::forward(ex); + DALI_EPILOG(); +} + +template +void CheckException(ExceptionType &&ex, daliResult_t expected_result) { + std::string message(ex.what()); + daliResult_t ret = ThrowAndTranslate(std::forward(ex)); + EXPECT_EQ(ret, expected_result); + EXPECT_EQ(daliGetLastError(), expected_result); + EXPECT_EQ(daliGetLastErrorMessage(), message); + std::cout << daliGetErrorName(ret) << " " + << daliGetLastErrorMessage() << std::endl; + daliClearLastError(); + EXPECT_EQ(daliGetLastError(), DALI_SUCCESS); + EXPECT_STREQ(daliGetLastErrorMessage(), ""); +} + +TEST(CAPI2InternalTest, ErrorTranslation) { + CheckException(std::runtime_error("Runtime Error"), DALI_ERROR_INVALID_OPERATION); + CheckException(std::bad_alloc(), DALI_ERROR_OUT_OF_MEMORY); + CheckException(CUDABadAlloc(), DALI_ERROR_OUT_OF_MEMORY); + CheckException(std::logic_error("Logic dictates that it's an error"), DALI_ERROR_INTERNAL); + CheckException(std::out_of_range("Bullet left the shooting range"), DALI_ERROR_OUT_OF_RANGE); + CheckException(invalid_key("This key doesn't fit into the keyhole."), DALI_ERROR_INVALID_KEY); + + CheckException(std::system_error(std::make_error_code(std::errc::no_such_file_or_directory)), + DALI_ERROR_PATH_NOT_FOUND); + CheckException(std::system_error(std::make_error_code(std::errc::no_such_device_or_address)), + DALI_ERROR_PATH_NOT_FOUND); + + CheckException(std::system_error(std::make_error_code(std::errc::no_space_on_device)), + DALI_ERROR_IO_ERROR); + CheckException(std::system_error( + std::make_error_code(std::errc::inappropriate_io_control_operation)), + DALI_ERROR_IO_ERROR); + CheckException(std::system_error(std::make_error_code(std::io_errc::stream)), + DALI_ERROR_IO_ERROR); + + CheckException(std::system_error(std::make_error_code(std::errc::not_enough_memory)), + DALI_ERROR_OUT_OF_MEMORY); + + CheckException(std::system_error(std::make_error_code(std::errc::bad_file_descriptor)), + DALI_ERROR_SYSTEM); + CheckException(std::system_error(std::make_error_code(std::errc::too_many_files_open)), + DALI_ERROR_SYSTEM); +} + +} // namespace dali diff --git a/dali/c_api_2/error_handling.cc b/dali/c_api_2/error_handling.cc new file mode 100644 index 00000000000..79296eb7bd1 --- /dev/null +++ b/dali/c_api_2/error_handling.cc @@ -0,0 +1,182 @@ +// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "dali/c_api_2/error_handling.h" +#include "dali/core/error_handling.h" +#include "dali/core/cuda_error.h" + +namespace dali::c_api { + +struct ErrorInfo { + inline ErrorInfo() { + message.reserve(1024); // should help with properly reporting OOMs + } + daliResult_t result = DALI_SUCCESS; + std::string message; +}; + +thread_local ErrorInfo g_daliLastError; + +struct ErrorDesc { + const char *name, *description; +}; + +ErrorDesc GetErrorDesc(daliResult_t result) { + #define RESULT_DESC(name, desc) case DALI_##name: return { "DALI_" #name, desc } + #define ERROR_DESC(name, desc) RESULT_DESC(ERROR_##name, desc) + + switch (result) { + RESULT_DESC(SUCCESS, "The operation was successful."); + RESULT_DESC(NO_DATA, "The operation was successful, but didn't return any data."); + RESULT_DESC(NOT_READY, "The query succeeded, but the operation queried is still pending."); + ERROR_DESC(INVALID_HANDLE, "The operation received an invalid DALI handle."); + ERROR_DESC(INVALID_ARGUMENT, "An invalid argument was specified."); + ERROR_DESC(INVALID_TYPE, "An argument of invalid type encountered."); + ERROR_DESC(INVALID_OPERATION, "An invalid operation was requested."); + ERROR_DESC(OUT_OF_RANGE, "An argument is out of valid range."); + ERROR_DESC(INVALID_KEY, "The operation received an invalid dictionary key."); + + ERROR_DESC(SYSTEM, "An operating system routine failed."); + ERROR_DESC(PATH_NOT_FOUND, "A non-existent or non-accessible file path was encountered."); + ERROR_DESC(IO_ERROR, "An I/O operation failed"); + ERROR_DESC(OUT_OF_MEMORY, "Cannot allocate memory"); + ERROR_DESC(INTERNAL, "An internal error occurred"); + ERROR_DESC(UNLOADING, "DALI is unloading - either daliShutdown was called or " + "the process is shutting down."); + ERROR_DESC(CUDA_ERROR, "A CUDA call has failed."); + default: + return { "", "" }; + } +} + +daliResult_t SetLastError(daliResult_t result, const char *message) { + g_daliLastError.result = result; + g_daliLastError.message = message; + return result; +} + +daliResult_t HandleError(std::exception_ptr ex) { + try { + std::rethrow_exception(std::move(ex)); + } catch (dali::c_api::InvalidHandle &e) { + return SetLastError(DALI_ERROR_INVALID_HANDLE, e.what()); + } catch (std::invalid_argument &e) { + return SetLastError(DALI_ERROR_INVALID_ARGUMENT, e.what()); + } catch (dali::CUDAError &e) { + if (e.is_rt_api()) { + if (e.rt_error() == cudaErrorNotReady) + return SetLastError(DALI_NOT_READY, e.what()); + } else if (e.is_drv_api()) { + if (e.drv_error() == CUDA_ERROR_NOT_READY) + return SetLastError(DALI_NOT_READY, e.what()); + } + return SetLastError(DALI_ERROR_CUDA_ERROR, e.what()); + } catch (dali::CUDABadAlloc &e) { + return SetLastError(DALI_ERROR_OUT_OF_MEMORY, e.what()); + } catch (std::bad_alloc &e) { + return SetLastError(DALI_ERROR_OUT_OF_MEMORY, e.what()); + } catch (dali::invalid_key &e) { + return SetLastError(DALI_ERROR_INVALID_KEY, e.what()); + } catch (std::out_of_range &e) { + return SetLastError(DALI_ERROR_OUT_OF_RANGE, e.what()); + } catch (std::system_error &e) { + if (e.code().category() == std::generic_category()) { + daliResult_t result = [&]() { + switch (static_cast(e.code().value())) { + case std::errc::no_such_file_or_directory: + case std::errc::no_such_device: + case std::errc::no_such_device_or_address: + return DALI_ERROR_PATH_NOT_FOUND; + case std::errc::not_enough_memory: + return DALI_ERROR_OUT_OF_MEMORY; + case std::errc::timed_out: + return DALI_ERROR_TIMEOUT; + case std::errc::address_family_not_supported: + case std::errc::address_in_use: + case std::errc::address_not_available: + case std::errc::already_connected: + case std::errc::broken_pipe: + case std::errc::connection_aborted: + case std::errc::connection_already_in_progress: + case std::errc::connection_refused: + case std::errc::connection_reset: + case std::errc::device_or_resource_busy: + case std::errc::directory_not_empty: + case std::errc::file_exists: + case std::errc::file_too_large: + case std::errc::filename_too_long: + case std::errc::host_unreachable: + case std::errc::inappropriate_io_control_operation: + case std::errc::io_error: + case std::errc::is_a_directory: + case std::errc::message_size: + case std::errc::network_down: + case std::errc::network_reset: + case std::errc::network_unreachable: + case std::errc::no_buffer_space: + case std::errc::no_message: + case std::errc::no_space_on_device: + case std::errc::not_a_directory: + case std::errc::not_a_socket: + case std::errc::read_only_file_system: + return DALI_ERROR_IO_ERROR; + default: + return DALI_ERROR_SYSTEM; + } + }(); + return SetLastError(result, e.what()); + } else if (e.code().category() == std::iostream_category()) { + return SetLastError(DALI_ERROR_IO_ERROR, e.what()); + } else { + return SetLastError(DALI_ERROR_SYSTEM, e.what()); + } + } catch (std::runtime_error &e) { + return SetLastError(DALI_ERROR_INVALID_OPERATION, e.what()); + } catch (std::exception &e) { + return SetLastError(DALI_ERROR_INTERNAL, e.what()); + } catch (const char *e) { // handle strings thrown as exceptions + return SetLastError(DALI_ERROR_INTERNAL, e); + } catch (const std::string &e) { // handle strings thrown as exceptions + return SetLastError(DALI_ERROR_INTERNAL, e.c_str()); + } catch (...) { + return SetLastError(DALI_ERROR_INTERNAL, ""); + } +} + +} // namespace dali::c_api + +using namespace dali; // NOLINT + +daliResult_t daliGetLastError() { + return c_api::g_daliLastError.result; +} + +const char *daliGetLastErrorMessage() { + return c_api::g_daliLastError.message.c_str(); +} + +void daliClearLastError() { + c_api::g_daliLastError = {}; +} + +const char *daliGetErrorName(daliResult_t result) { + return c_api::GetErrorDesc(result).name; +} + +const char *daliGetErrorDescription(daliResult_t result) { + return c_api::GetErrorDesc(result).description; +} diff --git a/dali/c_api_2/error_handling.h b/dali/c_api_2/error_handling.h new file mode 100644 index 00000000000..e9cdcaf990d --- /dev/null +++ b/dali/c_api_2/error_handling.h @@ -0,0 +1,68 @@ +// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef DALI_C_API_2_ERROR_HANDLING_H_ +#define DALI_C_API_2_ERROR_HANDLING_H_ + +#include +#include +#include +#include +#define DALI_ALLOW_NEW_C_API +#include "dali/dali.h" +#include "dali/core/error_handling.h" + +inline std::ostream &operator<<(std::ostream &os, daliResult_t result) { + const char *e = daliGetErrorName(result); + if (e[0] == '<') + os << "(result) << ">"; + else + os << e; + return os; +} + +inline std::string to_string(daliResult_t result) { + std::stringstream ss; + ss << result; + return ss.str(); +} + +namespace dali { +namespace c_api { + +DLL_PUBLIC daliResult_t HandleError(std::exception_ptr ex); +DLL_PUBLIC daliResult_t CheckInit(); + +class InvalidHandle : public std::invalid_argument { + public: + InvalidHandle() : std::invalid_argument("The handle is invalid") {} + explicit InvalidHandle(const std::string &what) : std::invalid_argument(what) {} + explicit InvalidHandle(const char *what) : std::invalid_argument(what) {} +}; + +inline InvalidHandle NullHandle() { return InvalidHandle("The handle must not be NULL."); } + +inline InvalidHandle NullHandle(const char *what_handle) { + return InvalidHandle(make_string("The ", what_handle, " handle must not be NULL.")); +} + +} // namespace c_api +} // namespace dali + +#define DALI_PROLOG() try { if (auto err = dali::c_api::CheckInit()) return err; else; +#define DALI_EPILOG() return DALI_SUCCESS; } catch (...) { \ + return ::dali::c_api::HandleError(std::current_exception()); \ +} + +#endif // DALI_C_API_2_ERROR_HANDLING_H_ diff --git a/dali/c_api_2/init.cc b/dali/c_api_2/init.cc new file mode 100644 index 00000000000..33fbe4695d2 --- /dev/null +++ b/dali/c_api_2/init.cc @@ -0,0 +1,69 @@ +// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#define DALI_ALLOW_NEW_C_API +#include "dali/dali.h" +#include "dali/c_api_2/error_handling.h" +#include "dali/pipeline/init.h" +#include "dali/pipeline/operator/op_spec.h" + +using namespace dali; // NOLINT + +namespace { +std::atomic g_init_count; +std::atomic g_was_initialized; +} // namespace + +namespace dali::c_api { + daliResult_t CheckInit() { + if (g_init_count <= 0) { + if (g_was_initialized) + return DALI_ERROR_UNLOADING; + else + return daliInit(); + } + return DALI_SUCCESS; + } +} // namespace dali::c_api + +daliResult_t daliInit() { + try { // cannot use DALI_PROLOG in this function, since DALI isn't initialized yet + static int init = []() { + DALIInit(OpSpec("CPUAllocator"), + OpSpec("PinnedCPUAllocator"), + OpSpec("GPUAllocator")); + return 0; + }(); + (void)init; + g_init_count++; + g_was_initialized = true; + return DALI_SUCCESS; + } catch (...) { + return ::dali::c_api::HandleError(std::current_exception()); \ + } +} + +daliResult_t daliShutdown() { + DALI_PROLOG(); + int init_count = --g_init_count; + if (init_count < 0) { + ++g_init_count; + return DALI_ERROR_UNLOADING; + } + if (init_count == 0) { + // actual shutdown code goes here + } + DALI_EPILOG(); +} From 2ac315024b90bbdcc14424a6ec3cb9a36f58ff7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Zientkiewicz?= Date: Fri, 31 Jan 2025 17:43:03 +0100 Subject: [PATCH 4/5] C API utilities. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Zientkiewicz --- dali/c_api_2/managed_handle.h | 124 ++++++++++++++++++++++++++++++++++ dali/c_api_2/ref_counting.h | 119 ++++++++++++++++++++++++++++++++ dali/c_api_2/validation.cc | 41 +++++++++++ dali/c_api_2/validation.h | 112 ++++++++++++++++++++++++++++++ 4 files changed, 396 insertions(+) create mode 100644 dali/c_api_2/managed_handle.h create mode 100644 dali/c_api_2/ref_counting.h create mode 100644 dali/c_api_2/validation.cc create mode 100644 dali/c_api_2/validation.h diff --git a/dali/c_api_2/managed_handle.h b/dali/c_api_2/managed_handle.h new file mode 100644 index 00000000000..ce365072d7e --- /dev/null +++ b/dali/c_api_2/managed_handle.h @@ -0,0 +1,124 @@ + // Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef DALI_C_API_2_MANAGED_HANDLE_H_ +#define DALI_C_API_2_MANAGED_HANDLE_H_ + +#define DALI_ALLOW_NEW_C_API +#include "dali/dali.h" +#include "dali/core/unique_handle.h" + +namespace dali::c_api { + +template +class RefCountedHandle { + public: + using handle_type = HandleType; + static constexpr handle_type null_handle() { return 0; } + + constexpr RefCountedHandle() : handle_(Actual::null_handle()) {} + constexpr explicit RefCountedHandle(handle_type h) : handle_(h) {} + ~RefCountedHandle() { reset(); } + + RefCountedHandle(const RefCountedHandle &h) { + handle_ = h.handle_; + if (*this) + Actual::IncRef(handle_); + } + + RefCountedHandle(RefCountedHandle &&h) noexcept { + handle_ = h.handle_; + h.handle_ = Actual::null_handle(); + } + + RefCountedHandle &operator=(const RefCountedHandle &other) { + if (other.handle_) { + Actual::IncRef(other.handle_); + } + reset(); + handle_ = other.handle_; + return *this; + } + + RefCountedHandle &operator=(RefCountedHandle &&other) noexcept { + std::swap(handle_, other.handle_); + other.reset(); + return *this; + } + + void reset() noexcept { + if (*this) + Actual::DecRef(handle_); + handle_ = Actual::null_handle(); + } + + [[nodiscard]] handle_type release() noexcept { + auto h = handle_; + handle_ = Actual::null_handle(); + return h; + } + + handle_type get() const noexcept { return handle_; } + operator handle_type() const noexcept { return get(); } + + explicit operator bool() const noexcept { return handle_ != Actual::null_handle(); } + + private: + handle_type handle_; +}; + +#define DALI_C_UNIQUE_HANDLE(Resource) \ +class Resource##Handle : public dali::UniqueHandle { \ + public: \ + using UniqueHandle::UniqueHandle; \ + static void DestroyHandle(dali##Resource##_h h) { \ + auto result = dali##Resource##Destroy(h); \ + if (result != DALI_SUCCESS) { \ + throw std::runtime_error(daliGetLastErrorMessage()); \ + } \ + } \ +} + +#define DALI_C_REF_HANDLE(Resource) \ +class Resource##Handle \ +: public dali::c_api::RefCountedHandle { \ + public: \ + using RefCountedHandle::RefCountedHandle; \ + static int IncRef(dali##Resource##_h h) { \ + int ref = 0; \ + auto result = dali##Resource##IncRef(h, &ref); \ + if (result != DALI_SUCCESS) { \ + throw std::runtime_error(daliGetLastErrorMessage()); \ + } \ + return ref; \ + } \ + static int DecRef(dali##Resource##_h h) { \ + int ref = 0; \ + auto result = dali##Resource##DecRef(h, &ref); \ + if (result != DALI_SUCCESS) { \ + throw std::runtime_error(daliGetLastErrorMessage()); \ + } \ + return ref; \ + } \ +} + +DALI_C_UNIQUE_HANDLE(Pipeline); +DALI_C_UNIQUE_HANDLE(PipelineOutputs); +DALI_C_REF_HANDLE(TensorList); +DALI_C_REF_HANDLE(Tensor); + + +} // namespace dali::c_api + +#endif // DALI_C_API_2_MANAGED_HANDLE_H_ diff --git a/dali/c_api_2/ref_counting.h b/dali/c_api_2/ref_counting.h new file mode 100644 index 00000000000..2db76671d14 --- /dev/null +++ b/dali/c_api_2/ref_counting.h @@ -0,0 +1,119 @@ +// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef DALI_C_API_2_REF_COUNTING_H_ +#define DALI_C_API_2_REF_COUNTING_H_ + +#include +#include +#include + +namespace dali::c_api { + +class RefCountedObject { + public: + int IncRef() noexcept { + return std::atomic_fetch_add_explicit(&ref_, 1, std::memory_order_relaxed) + 1; + } + + int DecRef() noexcept { + int ret = std::atomic_fetch_sub_explicit(&ref_, 1, std::memory_order_acq_rel) - 1; + if (!ret) + delete this; + return ret; + } + + int RefCount() const noexcept { + return ref_.load(std::memory_order_relaxed); + } + + virtual ~RefCountedObject() = default; + private: + std::atomic ref_{1}; +}; + +template +class RefCountedPtr { + public: + constexpr RefCountedPtr() noexcept = default; + + explicit RefCountedPtr(T *ptr, bool inc_ref = false) noexcept : ptr_(ptr) { + if (inc_ref && ptr_) + ptr_->IncRef(); + } + + ~RefCountedPtr() { + reset(); + } + + template , int> = 0> + RefCountedPtr(const RefCountedPtr &other) noexcept : ptr_(other.ptr_) { + if (ptr_) + ptr_->IncRef(); + } + + template , int> = 0> + RefCountedPtr(RefCountedPtr &&other) noexcept : ptr_(other.ptr_) { + other.ptr_ = nullptr; + } + + template + std::enable_if_t, RefCountedPtr> & + operator=(const RefCountedPtr &other) noexcept { + if (ptr_ == other.ptr_) + return *this; + if (other.ptr_) + other.ptr_->IncRef(); + ptr_->DecRef(); + ptr_ = other.ptr_; + return *this; + } + + template + std::enable_if_t, RefCountedPtr> & + operator=(RefCountedPtr &&other) noexcept { + if (&other == this) + return *this; + std::swap(ptr_, other.ptr_); + other.reset(); + } + + void reset() noexcept { + if (ptr_) + ptr_->DecRef(); + ptr_ = nullptr; + } + + [[nodiscard]] T *release() noexcept { + T *p = ptr_; + ptr_ = nullptr; + return p; + } + + constexpr T *operator->() const & noexcept { return ptr_; } + + constexpr T &operator*() const & noexcept { return *ptr_; } + + constexpr T *get() const & noexcept { return ptr_; } + + private: + template + friend class RefCountedPtr; + T *ptr_ = nullptr; +}; + +} // namespace dali::c_api + +#endif // DALI_C_API_2_REF_COUNTING_H_ + diff --git a/dali/c_api_2/validation.cc b/dali/c_api_2/validation.cc new file mode 100644 index 00000000000..7513ae68a44 --- /dev/null +++ b/dali/c_api_2/validation.cc @@ -0,0 +1,41 @@ +// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include "dali/c_api_2/validation.h" + +namespace dali::c_api { + +void ValidateDeviceId(int device_id, bool allow_cpu_only) { + if (device_id == CPU_ONLY_DEVICE_ID && allow_cpu_only) + return; + + static int dev_count = []() { + int ndevs = 0; + CUDA_CALL(cudaGetDeviceCount(&ndevs)); + return ndevs; + }(); + + if (dev_count < 1) + throw std::runtime_error("No CUDA device found."); + + if (device_id < 0 || device_id >= dev_count) { + throw std::out_of_range(make_string( + "The device id ", device_id, " is invalid." + " Valid device ids are [0..", dev_count-1, "].")); + } +} + +} // namespace dali::c_api diff --git a/dali/c_api_2/validation.h b/dali/c_api_2/validation.h new file mode 100644 index 00000000000..62c9f3425aa --- /dev/null +++ b/dali/c_api_2/validation.h @@ -0,0 +1,112 @@ +// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef DALI_C_API_2_VALIDATION_H_ +#define DALI_C_API_2_VALIDATION_H_ + +#include +#include +#define DALI_ALLOW_NEW_C_API +#include "dali/dali.h" +#include "dali/core/format.h" +#include "dali/core/span.h" +#include "dali/core/tensor_shape_print.h" +#include "dali/pipeline/data/types.h" + +namespace dali::c_api { + +inline void Validate(daliDataType_t dtype) { + if (!TypeTable::TryGetTypeInfo(dtype)) + throw std::invalid_argument(make_string("Invalid data type: ", dtype)); +} + +inline void Validate(const TensorLayout &layout, int ndim, bool allow_empty = true) { + if (layout.empty() && allow_empty) + return; + if (layout.ndim() != ndim) + throw std::invalid_argument(make_string( + "The layout '", layout, "' cannot describe ", ndim, "-dimensional data.")); +} + +template +void ValidateSampleShape( + int sample_index, + ShapeLike &&sample_shape, + std::optional expected_ndim = std::nullopt) { + int ndim = std::size(sample_shape); + if (expected_ndim.has_value() && ndim != *expected_ndim) + throw std::invalid_argument(make_string( + "Unexpected number of dimensions (", ndim, ") in sample ", sample_index, + ". Expected ", *expected_ndim, ".")); + + for (int j = 0; j < ndim; j++) + if (sample_shape[j] < 0) + throw std::invalid_argument(make_string( + "Negative extent encountered in the shape of sample ", sample_index, ". Offending shape: ", + TensorShape<-1>(sample_shape))); +} + +inline void ValidateNumSamples(int num_samples) { + if (num_samples < 0) + throw std::invalid_argument("The number of samples must not be negative."); +} + +inline void ValidateNDim(int ndim) { + if (ndim < 0) + throw std::invalid_argument("The number of dimensions must not be negative."); +} + + +inline void ValidateShape( + int ndim, + const int64_t *shape) { + ValidateNDim(ndim); + if (ndim > 0 && !shape) + throw std::invalid_argument("The `shape` must not be NULL when ndim > 0."); + + for (int j = 0; j < ndim; j++) + if (shape[j] < 0) + throw std::invalid_argument(make_string( + "The tensor shape must not contain negative extents. Got: ", + TensorShape<-1>(make_cspan(shape, ndim)))); +} + +inline void ValidateShape(int num_samples, int ndim, const int64_t *shapes) { + ValidateNumSamples(num_samples); + ValidateNDim(ndim); + if (!shapes && num_samples > 0 && ndim > 0) + throw std::invalid_argument("The `shapes` are required for non-scalar (ndim>=0) samples."); + + if (ndim > 0) { + for (int i = 0; i < num_samples; i++) + ValidateSampleShape(i, make_cspan(&shapes[i*ndim], ndim)); + } +} + +inline void Validate(daliStorageDevice_t device_type) { + if (device_type != DALI_STORAGE_CPU && device_type != DALI_STORAGE_GPU) + throw std::invalid_argument(make_string("Invalid storage device type: ", device_type)); +} + +void ValidateDeviceId(int device_id, bool allow_cpu_only); + +inline void Validate(const daliBufferPlacement_t &placement) { + Validate(placement.device_type); + if (placement.device_type == DALI_STORAGE_GPU || placement.pinned) + ValidateDeviceId(placement.device_id, placement.pinned); +} + +} // namespace dali::c_api + +#endif // DALI_C_API_2_VALIDATION_H_ From e27aa118bd861428eeed7f3e6aaec5bb1ae6f97d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Zientkiewicz?= Date: Fri, 31 Jan 2025 17:44:51 +0100 Subject: [PATCH 5/5] Tensor and TensorList C API wrappers. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Zientkiewicz --- dali/c_api_2/data_objects.cc | 362 ++++++++++++++++++ dali/c_api_2/data_objects.h | 613 ++++++++++++++++++++++++++++++ dali/c_api_2/data_objects_test.cc | 414 ++++++++++++++++++++ 3 files changed, 1389 insertions(+) create mode 100644 dali/c_api_2/data_objects.cc create mode 100644 dali/c_api_2/data_objects.h create mode 100644 dali/c_api_2/data_objects_test.cc diff --git a/dali/c_api_2/data_objects.cc b/dali/c_api_2/data_objects.cc new file mode 100644 index 00000000000..a579cbb70d7 --- /dev/null +++ b/dali/c_api_2/data_objects.cc @@ -0,0 +1,362 @@ +// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "dali/c_api_2/data_objects.h" +#include "dali/c_api_2/error_handling.h" + +namespace dali::c_api { + +RefCountedPtr ITensor::Create(daliBufferPlacement_t placement) { + Validate(placement); + switch (placement.device_type) { + case DALI_STORAGE_CPU: + { + auto tl = std::make_shared>(); + tl->set_pinned(placement.pinned); + if (placement.pinned) + tl->set_device_id(placement.device_id); + return Wrap(std::move(tl)); + } + case DALI_STORAGE_GPU: + { + auto tl = std::make_shared>(); + tl->set_pinned(placement.pinned); + tl->set_device_id(placement.device_id); + return Wrap(std::move(tl)); + } + default: + assert(!"Unreachable code"); + return {}; + } +} + +RefCountedPtr ITensorList::Create(daliBufferPlacement_t placement) { + Validate(placement); + switch (placement.device_type) { + case DALI_STORAGE_CPU: + { + auto tl = std::make_shared>(); + tl->set_pinned(placement.pinned); + if (placement.pinned) + tl->set_device_id(placement.device_id); + return Wrap(std::move(tl)); + } + case DALI_STORAGE_GPU: + { + auto tl = std::make_shared>(); + tl->set_pinned(placement.pinned); + tl->set_device_id(placement.device_id); + return Wrap(std::move(tl)); + } + default: + assert(!"Unreachable code"); + return {}; + } +} + +ITensor *ToPointer(daliTensor_h handle) { + if (!handle) + throw NullHandle("Tensor"); + return static_cast(handle); +} + +ITensorList *ToPointer(daliTensorList_h handle) { + if (!handle) + throw NullHandle("TensorList"); + return static_cast(handle); +} + +} // namespace dali::c_api + +using namespace dali::c_api; // NOLINT + +template +std::optional ToOptional(const T *nullable) { + if (nullable == nullptr) + return std::nullopt; + else + return *nullable; +} + +////////////////////////////////////////////////////////////////////////////// +// Tensor +////////////////////////////////////////////////////////////////////////////// + +daliResult_t daliTensorCreate(daliTensor_h *out, daliBufferPlacement_t placement) { + DALI_PROLOG(); + if (!out) + throw std::invalid_argument("The output parameter must not be NULL."); + auto t = dali::c_api::ITensor::Create(placement); + *out = t.release(); // no throwing allowed after this line! + DALI_EPILOG(); +} + +daliResult_t daliTensorIncRef(daliTensor_h t, int *new_ref) { + DALI_PROLOG(); + auto *ptr = ToPointer(t); + int r = ptr->IncRef(); + if (new_ref) + *new_ref = r; + DALI_EPILOG(); +} + +daliResult_t daliTensorDecRef(daliTensor_h t, int *new_ref) { + DALI_PROLOG(); + auto *ptr = ToPointer(t); + int r = ptr->DecRef(); + if (new_ref) + *new_ref = r; + DALI_EPILOG(); +} + +daliResult_t daliTensorRefCount(daliTensor_h t, int *ref) { + DALI_PROLOG(); + auto *ptr = ToPointer(t); + int r = ptr->RefCount(); + if (!ref) + throw std::invalid_argument("The output parameter must not be NULL."); + *ref = r; + DALI_EPILOG(); +} + +daliResult_t daliTensorAttachBuffer( + daliTensor_h tensor, + int ndim, + const int64_t *shape, + daliDataType_t dtype, + const char *layout, + void *data, + daliDeleter_t deleter) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor); + ptr->AttachBuffer(ndim, shape, dtype, layout, data, deleter); + DALI_EPILOG(); +} + +daliResult_t daliTensorResize( + daliTensor_h tensor, + int ndim, + const int64_t *shape, + daliDataType_t dtype, + const char *layout) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor); + ptr->Resize(ndim, shape, dtype, layout); + DALI_EPILOG(); +} + +daliResult_t daliTensorSetLayout( + daliTensor_h tensor, + const char *layout) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor); + ptr->SetLayout(layout); + DALI_EPILOG(); +} + +daliResult_t daliTensorGetLayout( + daliTensor_h tensor, + const char **layout) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor); + if (!layout) + throw std::invalid_argument("The output parameter `layout` must not be be NULL"); + *layout = ptr->GetLayout(); + DALI_EPILOG(); +} + +daliResult_t daliTensorGetStream( + daliTensor_h tensor, + cudaStream_t *out_stream) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor); + if (!out_stream) + throw std::invalid_argument("The output parameter `out_stream` must not be NULL"); + auto str = ptr->GetStream(); + *out_stream = str.has_value() ? *str : cudaStream_t(-1); + return str.has_value() ? DALI_SUCCESS : DALI_NO_DATA; + DALI_EPILOG(); +} + +daliResult_t daliTensorSetStream( + daliTensor_h tensor, + const cudaStream_t *stream, + daliBool synchronize) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor); + ptr->SetStream(ToOptional(stream), synchronize); + DALI_EPILOG(); +} + +daliResult_t daliTensorGetDesc( + daliTensor_h tensor, + daliTensorDesc_t *out_desc) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor); + if (!out_desc) + throw std::invalid_argument("The output parameter `out_desc` must not be NULL."); + *out_desc = ptr->GetDesc(); + DALI_EPILOG(); +} + +////////////////////////////////////////////////////////////////////////////// +// TensorList +////////////////////////////////////////////////////////////////////////////// + +daliResult_t daliTensorListCreate(daliTensorList_h *out, daliBufferPlacement_t placement) { + DALI_PROLOG(); + if (!out) + throw std::invalid_argument("The output parameter must not be NULL."); + auto tl = dali::c_api::ITensorList::Create(placement); + *out = tl.release(); // no throwing allowed after this line! + DALI_EPILOG(); +} + +daliResult_t daliTensorListIncRef(daliTensorList_h tl, int *new_ref) { + DALI_PROLOG(); + auto *ptr = ToPointer(tl); + int r = ptr->IncRef(); + if (new_ref) + *new_ref = r; + DALI_EPILOG(); +} + +daliResult_t daliTensorListDecRef(daliTensorList_h tl, int *new_ref) { + DALI_PROLOG(); + auto *ptr = ToPointer(tl); + int r = ptr->DecRef(); + if (new_ref) + *new_ref = r; + DALI_EPILOG(); +} + +daliResult_t daliTensorListRefCount(daliTensorList_h tl, int *ref) { + DALI_PROLOG(); + auto *ptr = ToPointer(tl); + if (!ref) + throw std::invalid_argument("The output pointer must not be NULL."); + int r = ptr->RefCount(); + *ref = r; + DALI_EPILOG(); +} + +daliResult_t daliTensorListAttachBuffer( + daliTensorList_h tensor_list, + int num_samples, + int ndim, + const int64_t *shapes, + daliDataType_t dtype, + const char *layout, + void *data, + const ptrdiff_t *sample_offsets, + daliDeleter_t deleter) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor_list); + ptr->AttachBuffer(num_samples, ndim, shapes, dtype, layout, data, sample_offsets, deleter); + DALI_EPILOG(); +} + +daliResult_t daliTensorListAttachSamples( + daliTensorList_h tensor_list, + int num_samples, + int ndim, + daliDataType_t dtype, + const char *layout, + const daliTensorDesc_t *samples, + const daliDeleter_t *sample_deleters) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor_list); + ptr->AttachSamples(num_samples, ndim, dtype, layout, samples, sample_deleters); + DALI_EPILOG(); +} + +daliResult_t daliTensorListResize( + daliTensorList_h tensor_list, + int num_samples, + int ndim, + const int64_t *shapes, + daliDataType_t dtype, + const char *layout) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor_list); + ptr->Resize(num_samples, ndim, shapes, dtype, layout); + DALI_EPILOG(); +} + +daliResult_t daliTensorListSetLayout( + daliTensorList_h tensor_list, + const char *layout) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor_list); + ptr->SetLayout(layout); + DALI_EPILOG(); +} + +daliResult_t daliTensorListGetLayout( + daliTensorList_h tensor_list, + const char **layout) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor_list); + if (!layout) + throw std::invalid_argument("The output parameter `layout` must not be be NULL"); + *layout = ptr->GetLayout(); + DALI_EPILOG(); +} + +daliResult_t daliTensorListGetStream( + daliTensorList_h tensor_list, + cudaStream_t *out_stream) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor_list); + if (!out_stream) + throw std::invalid_argument("The output parameter `out_stream` must not be NULL"); + auto str = ptr->GetStream(); + *out_stream = str.has_value() ? *str : cudaStream_t(-1); + return str.has_value() ? DALI_SUCCESS : DALI_NO_DATA; + DALI_EPILOG(); +} + +daliResult_t daliTensorListSetStream( + daliTensorList_h tensor_list, + const cudaStream_t *stream, + daliBool synchronize) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor_list); + ptr->SetStream(ToOptional(stream), synchronize); + DALI_EPILOG(); +} + +daliResult_t daliTensorListGetTensorDesc( + daliTensorList_h tensor_list, + daliTensorDesc_t *out_tensor, + int sample_idx) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor_list); + if (!out_tensor) + throw std::invalid_argument("The output parameter `out_tensor` must not be NULL."); + *out_tensor = ptr->GetTensorDesc(sample_idx); + DALI_EPILOG(); +} + +daliResult_t daliTensorListViewAsTensor( + daliTensorList_h tensor_list, + daliTensor_h *out_tensor) { + DALI_PROLOG(); + auto *ptr = ToPointer(tensor_list); + if (!out_tensor) + throw std::invalid_argument("The output parameter `out_tensor` must not be NULL."); + auto t = ptr->ViewAsTensor(); + *out_tensor = t.release(); // no throwing allowed after this line + DALI_EPILOG(); +} diff --git a/dali/c_api_2/data_objects.h b/dali/c_api_2/data_objects.h new file mode 100644 index 00000000000..72b1a3eac03 --- /dev/null +++ b/dali/c_api_2/data_objects.h @@ -0,0 +1,613 @@ +// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef DALI_C_API_2_DATA_OBJECTS_H_ +#define DALI_C_API_2_DATA_OBJECTS_H_ + +#include +#include +#include +#include +#include +#define DALI_ALLOW_NEW_C_API +#include "dali/dali.h" +#include "dali/pipeline/data/tensor_list.h" +#include "dali/c_api_2/ref_counting.h" +#include "dali/c_api_2/validation.h" + + +struct _DALITensorList {}; +struct _DALITensor {}; + +namespace dali { +namespace c_api { + +////////////////////////////////////////////////////////////////////////////// +// Interfaces +////////////////////////////////////////////////////////////////////////////// + +class ITensor : public _DALITensor, public RefCountedObject { + public: + virtual ~ITensor() = default; + + virtual void Resize( + int ndim, + const int64_t *shape, + daliDataType_t dtype, + const char *layout) = 0; + + virtual void AttachBuffer( + int ndim, + const int64_t *shape, + daliDataType_t dtype, + const char *layout, + void *data, + daliDeleter_t deleter) = 0; + + virtual daliBufferPlacement_t GetBufferPlacement() const = 0; + + virtual const char *GetLayout() const = 0; + + virtual void SetLayout(const char *layout) = 0; + + virtual void SetStream(std::optional stream, bool synchronize) = 0; + + virtual std::optional GetStream() const = 0; + + virtual std::optional GetReadyEvent() const = 0; + + virtual cudaEvent_t GetOrCreateReadyEvent() = 0; + + virtual daliTensorDesc_t GetDesc() const = 0; + + static RefCountedPtr Create(daliBufferPlacement_t placement); +}; + + +class ITensorList : public _DALITensorList, public RefCountedObject { + public: + virtual ~ITensorList() = default; + + virtual void Resize( + int num_samples, + int ndim, + const int64_t *shapes, + daliDataType_t dtype, + const char *layout) = 0; + + virtual void AttachBuffer( + int num_samples, + int ndim, + const int64_t *shapes, + daliDataType_t dtype, + const char *layout, + void *data, + const ptrdiff_t *sample_offsets, + daliDeleter_t deleter) = 0; + + virtual void AttachSamples( + int num_samples, + int ndim, + daliDataType_t dtype, + const char *layout, + const daliTensorDesc_t *samples, + const daliDeleter_t *sample_deleters) = 0; + + virtual daliBufferPlacement_t GetBufferPlacement() const = 0; + + virtual const char *GetLayout() const = 0; + + virtual void SetLayout(const char *layout) = 0; + + virtual void SetStream(std::optional stream, bool synchronize) = 0; + + virtual std::optional GetStream() const = 0; + + virtual std::optional GetReadyEvent() const = 0; + + virtual cudaEvent_t GetOrCreateReadyEvent() = 0; + + virtual daliTensorDesc_t GetTensorDesc(int sample) const = 0; + + virtual RefCountedPtr ViewAsTensor() const = 0; + + static RefCountedPtr Create(daliBufferPlacement_t placement); +}; + + +////////////////////////////////////////////////////////////////////////////// +// Implementation +////////////////////////////////////////////////////////////////////////////// + + +struct BufferDeleter { + daliDeleter_t deleter; + AccessOrder deletion_order; + + void operator()(void *data) { + if (deleter.delete_buffer) { + cudaStream_t stream = deletion_order.stream(); + deleter.delete_buffer(deleter.deleter_ctx, data, + deletion_order.is_device() ? &stream : nullptr); + } + if (deleter.destroy_context) { + deleter.destroy_context(deleter.deleter_ctx); + } + } +}; + +template +class TensorWrapper : public ITensor { + public: + explicit TensorWrapper(std::shared_ptr> t) : t_(std::move(t)) {} + + void Resize( + int ndim, + const int64_t *shape, + daliDataType_t dtype, + const char *layout) override { + ValidateShape(ndim, shape); + Validate(dtype); + if (layout) + Validate(TensorLayout(layout), ndim); + t_->Resize(TensorShape<>(make_cspan(shape, ndim)), dtype); + if (layout) + t_->SetLayout(layout); + } + + void AttachBuffer( + int ndim, + const int64_t *shape, + daliDataType_t dtype, + const char *layout, + void *data, + daliDeleter_t deleter) override { + ValidateShape(ndim, shape); + Validate(dtype); + if (layout) + Validate(TensorLayout(layout), ndim); + + TensorShape<> tshape(make_cspan(shape, ndim)); + size_t num_elements = volume(tshape); + if (num_elements > 0 && !data) + throw std::invalid_argument("The data buffer must not be NULL for a non-empty tensor."); + + TensorLayout new_layout = {}; + + if (!layout) { + if (ndim == t_->ndim()) + new_layout = t_->GetLayout(); + } else { + new_layout = layout; + Validate(new_layout, ndim); + } + + t_->Reset(); + auto type_info = TypeTable::GetTypeInfo(dtype); + auto element_size = type_info.size(); + + std::shared_ptr buffer; + if (!deleter.delete_buffer && !deleter.destroy_context) { + buffer = std::shared_ptr(data, [](void *){}); + } else { + buffer = std::shared_ptr(data, BufferDeleter{deleter, t_->order()}); + } + + t_->ShareData( + std::move(buffer), + num_elements * element_size, + t_->is_pinned(), + tshape, + dtype, + t_->device_id(), + t_->order()); + + if (layout) + t_->SetLayout(new_layout); + } + + daliBufferPlacement_t GetBufferPlacement() const override { + daliBufferPlacement_t placement; + placement.device_id = t_->device_id(); + StorageDevice dev = backend_to_storage_device::value; + placement.device_type = static_cast(dev); + placement.pinned = t_->is_pinned(); + return placement; + } + + void SetStream(std::optional stream, bool synchronize) override { + t_->set_order(stream.has_value() ? AccessOrder(*stream) : AccessOrder::host(), synchronize); + } + + void SetLayout(const char *layout_string) { + if (layout_string) { + TensorLayout layout(layout_string); + Validate(layout, t_->ndim()); + t_->SetLayout(layout); + } else { + t_->SetLayout(""); + } + } + + const char *GetLayout() const override { + auto &layout = t_->GetLayout(); + return !layout.empty() ? layout.data() : nullptr; + } + + std::optional GetStream() const override { + auto o = t_->order(); + if (o.is_device()) + return o.stream(); + else + return std::nullopt; + } + + std::optional GetReadyEvent() const override { + auto &e = t_->ready_event(); + if (e) + return e.get(); + else + return std::nullopt; + } + + cudaEvent_t GetOrCreateReadyEvent() override { + auto &e = t_->ready_event(); + if (e) + return e.get(); + int device_id = t_->device_id(); + if (device_id < 0) + throw std::runtime_error("The tensor list is not associated with a CUDA device."); + t_->set_ready_event(CUDASharedEvent::Create(device_id)); + return t_->ready_event().get(); + } + + daliTensorDesc_t GetDesc() const override { + auto &shape = t_->shape(); + daliTensorDesc_t desc{}; + desc.ndim = shape.sample_dim(); + desc.data = t_->raw_mutable_data(); + desc.dtype = t_->type(); + desc.layout = GetLayout(); + desc.shape = shape.data(); + return desc; + } + + private: + std::shared_ptr> t_; +}; + +template +RefCountedPtr> Wrap(std::shared_ptr> tl) { + return RefCountedPtr>(new TensorWrapper(std::move(tl))); +} + +template +class TensorListWrapper : public ITensorList { + public: + explicit TensorListWrapper(std::shared_ptr> tl) : tl_(std::move(tl)) {} + + void Resize( + int num_samples, + int ndim, + const int64_t *shapes, + daliDataType_t dtype, + const char *layout) override { + Validate(dtype); + ValidateShape(num_samples, ndim, shapes); + if (layout) + Validate(TensorLayout(layout), ndim); + std::vector shape_data(shapes, shapes + ndim * num_samples); + tl_->Resize(TensorListShape<>(std::move(shape_data), num_samples, ndim), dtype); + if (layout) + tl_->SetLayout(layout); + } + + void AttachBuffer( + int num_samples, + int ndim, + const int64_t *shapes, + daliDataType_t dtype, + const char *layout, + void *data, + const ptrdiff_t *sample_offsets, + daliDeleter_t deleter) override { + ValidateShape(num_samples, ndim, shapes); + Validate(dtype); + + if (!data && num_samples > 0) { + for (int i = 0; i < num_samples; i++) { + auto sample_shape = make_cspan(&shapes[i*ndim], ndim); + + if (volume(sample_shape) > 0) + throw std::invalid_argument( + "The pointer to the data buffer must not be null for a non-empty tensor list."); + + if (sample_offsets && sample_offsets[i]) + throw std::invalid_argument( + "All sample_offsets must be zero when the data pointer is NULL."); + } + } + + TensorLayout new_layout = {}; + + if (!layout) { + if (ndim == tl_->sample_dim()) + new_layout = tl_->GetLayout(); + } else { + new_layout = layout; + Validate(new_layout, ndim); + } + + tl_->Reset(); + tl_->SetSize(num_samples); + tl_->set_sample_dim(ndim); + tl_->SetLayout(new_layout); + tl_->set_type(dtype); + ptrdiff_t next_offset = 0; + auto type_info = TypeTable::GetTypeInfo(dtype); + auto element_size = type_info.size(); + + std::shared_ptr buffer; + if (!deleter.delete_buffer && !deleter.destroy_context) { + buffer = std::shared_ptr(data, [](void *){}); + } else { + buffer = std::shared_ptr(data, BufferDeleter{deleter, tl_->order()}); + } + + bool is_contiguous = true; + if (sample_offsets) { + for (int i = 0; i < num_samples; i++) { + if (sample_offsets[i] != next_offset) { + is_contiguous = false; + break; + } + auto num_elements = volume(make_cspan(&shapes[i*ndim], ndim)); + next_offset += num_elements * element_size; + } + } + + if (is_contiguous) { + tl_->SetContiguity(BatchContiguity::Contiguous); + std::vector shape_data(shapes, shapes + ndim * num_samples); + TensorListShape<> tl_shape(shape_data, num_samples, ndim); + tl_->ShareData( + std::move(buffer), + next_offset, + tl_->is_pinned(), + tl_shape, + dtype, + tl_->device_id(), + tl_->order(), + new_layout); + } else { + tl_->SetContiguity(BatchContiguity::Automatic); + next_offset = 0; + + for (int i = 0; i < num_samples; i++) { + TensorShape<> sample_shape(make_cspan(&shapes[i*ndim], ndim)); + void *sample_data; + size_t sample_bytes = volume(sample_shape) * element_size; + if (sample_offsets) { + sample_data = static_cast(data) + sample_offsets[i]; + } else { + sample_data = static_cast(data) + next_offset; + next_offset += sample_bytes; + } + tl_->SetSample( + i, + std::shared_ptr(buffer, sample_data), + sample_bytes, + tl_->is_pinned(), + sample_shape, + dtype, + tl_->device_id(), + tl_->order(), + new_layout); + } + } + } + + void AttachSamples( + int num_samples, + int ndim, + daliDataType_t dtype, + const char *layout, + const daliTensorDesc_t *samples, + const daliDeleter_t *sample_deleters) override { + ValidateNumSamples(num_samples); + if (num_samples > 0 && !samples) + throw std::invalid_argument("The pointer to sample descriptors must not be NULL."); + if (ndim < 0) { + if (num_samples == 0) + throw std::invalid_argument( + "The number of dimensions must not be negative when num_samples is 0."); + else + ndim = samples[0].ndim; + } + if (dtype == DALI_NO_TYPE) { + if (num_samples == 0) + throw std::invalid_argument( + "A valid data type must be provided when there's no sample to take it from."); + dtype = samples[0].dtype; + } + Validate(dtype); + + for (int i = 0; i < num_samples; i++) { + if (ndim && !samples[i].shape) + throw std::invalid_argument(make_string("Got NULL shape in sample ", i, ".")); + if (samples[i].dtype != dtype) + throw std::invalid_argument(make_string("Unexpected data type in sample ", i, ". Got: ", + samples[i].dtype, ", expected ", dtype, ".")); + ValidateSampleShape(i, make_cspan(samples[i].shape, samples[i].ndim), ndim);; + + if (!samples[i].data && volume(make_cspan(samples[i].shape, ndim))) + throw std::invalid_argument(make_string( + "Got NULL data pointer in a non-empty sample ", i, ".")); + } + + TensorLayout new_layout = {}; + + if (!layout) { + if (ndim == tl_->sample_dim()) + new_layout = tl_->GetLayout(); + } else { + new_layout = layout; + Validate(new_layout, ndim); + } + + tl_->Reset(); + tl_->SetSize(num_samples); + tl_->set_sample_dim(ndim); + tl_->SetLayout(new_layout); + + auto deletion_order = tl_->order(); + + auto type_info = TypeTable::GetTypeInfo(dtype); + auto element_size = type_info.size(); + for (int i = 0; i < num_samples; i++) { + TensorShape<> sample_shape(make_cspan(samples[i].shape, samples[i].ndim)); + size_t sample_bytes = volume(sample_shape) * element_size; + std::shared_ptr sample_ptr; + if (sample_deleters) { + sample_ptr = std::shared_ptr( + samples[i].data, + BufferDeleter{sample_deleters[i], deletion_order}); + } else { + sample_ptr = std::shared_ptr(samples[i].data, [](void*) {}); + } + + tl_->SetSample( + i, + sample_ptr, + sample_bytes, + tl_->is_pinned(), + sample_shape, + dtype, + tl_->device_id(), + tl_->order(), + new_layout); + } + } + + daliBufferPlacement_t GetBufferPlacement() const override { + daliBufferPlacement_t placement; + placement.device_id = tl_->device_id(); + StorageDevice dev = backend_to_storage_device::value; + placement.device_type = static_cast(dev); + placement.pinned = tl_->is_pinned(); + return placement; + } + + void SetStream(std::optional stream, bool synchronize) override { + tl_->set_order(stream.has_value() ? AccessOrder(*stream) : AccessOrder::host(), synchronize); + } + + void SetLayout(const char *layout_string) { + if (layout_string) { + TensorLayout layout(layout_string); + Validate(layout, tl_->sample_dim()); + tl_->SetLayout(layout); + } else { + tl_->SetLayout(""); + } + } + + const char *GetLayout() const override { + auto &layout = tl_->GetLayout(); + return !layout.empty() ? layout.data() : nullptr; + } + + std::optional GetStream() const override { + auto o = tl_->order(); + if (o.is_device()) + return o.stream(); + else + return std::nullopt; + } + + std::optional GetReadyEvent() const override { + auto &e = tl_->ready_event(); + if (e) + return e.get(); + else + return std::nullopt; + } + + cudaEvent_t GetOrCreateReadyEvent() override { + auto &e = tl_->ready_event(); + if (e) + return e.get(); + int device_id = tl_->device_id(); + if (device_id < 0) + throw std::runtime_error("The tensor list is not associated with a CUDA device."); + tl_->set_ready_event(CUDASharedEvent::Create(device_id)); + return tl_->ready_event().get(); + } + + daliTensorDesc_t GetTensorDesc(int sample) const override { + auto &shape = tl_->shape(); + if (sample < 0 || sample >= shape.num_samples()) + throw std::out_of_range(make_string("The sample index ", sample, " is out of range. " + "Valid indices are [0..", shape.num_samples() - 1, "].")); + daliTensorDesc_t desc{}; + desc.ndim = shape.sample_dim(); + desc.data = tl_->raw_mutable_tensor(sample); + desc.dtype = tl_->type(); + desc.layout = GetLayout(); + desc.shape = shape.tensor_shape_span(sample).data(); + return desc; + } + + RefCountedPtr ViewAsTensor() const override { + if (!tl_->IsContiguous()) + throw std::runtime_error( + "The TensorList is not contiguous and cannot be viewed as a Tensor."); + + auto t = std::make_shared>(); + auto buf = unsafe_owner(*tl_); + auto &lshape = tl_->shape(); + TensorShape<> tshape = shape_cat(lshape.num_samples(), lshape[0]); + t->ShareData( + std::move(buf), + tl_->nbytes(), + tl_->is_pinned(), + tshape, + tl_->type(), + tl_->device_id(), + tl_->order(), + tl_->ready_event()); + TensorLayout layout = tl_->GetLayout(); + if (layout.size() == lshape.sample_dim()) { + t->SetLayout("N" + layout); + } + return Wrap(std::move(t)); + } + + private: + std::shared_ptr> tl_; +}; + +template +RefCountedPtr> Wrap(std::shared_ptr> tl) { + return RefCountedPtr>(new TensorListWrapper(std::move(tl))); +} + + +ITensor *ToPointer(daliTensor_h handle); +ITensorList *ToPointer(daliTensorList_h handle); + +} // namespace c_api +} // namespace dali + +#endif // DALI_C_API_2_DATA_OBJECTS_H_ diff --git a/dali/c_api_2/data_objects_test.cc b/dali/c_api_2/data_objects_test.cc new file mode 100644 index 00000000000..2102ae1f548 --- /dev/null +++ b/dali/c_api_2/data_objects_test.cc @@ -0,0 +1,414 @@ +// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "dali/c_api_2/data_objects.h" +#include +#include "dali/c_api_2/managed_handle.h" +#include "dali/core/span.h" + +TEST(CAPI2_TensorListTest, NullHandle) { + daliTensorList_h h = nullptr; + int ref = 0; + EXPECT_EQ(DALI_ERROR_INVALID_HANDLE, daliTensorListIncRef(h, &ref)); + EXPECT_EQ(DALI_ERROR_INVALID_HANDLE, daliTensorListDecRef(h, &ref)); + EXPECT_EQ(DALI_ERROR_INVALID_HANDLE, daliTensorListRefCount(h, &ref)); +} + +TEST(CAPI2_TensorListTest, CreateDestroy) { + daliBufferPlacement_t placement{}; + placement.device_type = DALI_STORAGE_CPU; + placement.pinned = false; + daliTensorList_h h = nullptr; + daliResult_t r = daliTensorListCreate(&h, placement); + ASSERT_NE(h, nullptr); + dali::c_api::TensorListHandle tl(h); + ASSERT_EQ(h, tl.get()); + ASSERT_EQ(r, DALI_SUCCESS); + + int ref = -1; + EXPECT_EQ(daliTensorListRefCount(h, &ref), DALI_SUCCESS); + EXPECT_EQ(ref, 1); + ref = -1; + + h = tl.release(); + EXPECT_EQ(daliTensorListDecRef(h, &ref), DALI_SUCCESS); + EXPECT_EQ(ref, 0); +} + +inline auto CreateTensorList(daliBufferPlacement_t placement) { + daliTensorList_h handle; + auto err = daliTensorListCreate(&handle, placement); + switch (err) { + case DALI_SUCCESS: + break; + case DALI_ERROR_OUT_OF_MEMORY: + throw std::bad_alloc(); + case DALI_ERROR_INVALID_ARGUMENT: + throw std::invalid_argument(daliGetLastErrorMessage()); + default: + throw std::runtime_error(daliGetLastErrorMessage()); + } + return dali::c_api::TensorListHandle(handle); +} + +void TestTensorListResize(daliStorageDevice_t storage_device) { + daliBufferPlacement_t placement{}; + placement.device_type = storage_device; + int64_t shapes[] = { + 480, 640, 3, + 600, 800, 4, + 348, 720, 1, + 1080, 1920, 3 + }; + daliDataType_t dtype = DALI_UINT32; + + auto tl = CreateTensorList(placement); + EXPECT_EQ(daliTensorListResize(tl, 4, 3, nullptr, dtype, nullptr), DALI_ERROR_INVALID_ARGUMENT); + EXPECT_EQ(daliTensorListResize(tl, -1, 3, shapes, dtype, nullptr), DALI_ERROR_INVALID_ARGUMENT); + EXPECT_EQ(daliTensorListResize(tl, 4, -1, shapes, dtype, nullptr), DALI_ERROR_INVALID_ARGUMENT); + EXPECT_EQ(daliTensorListResize(tl, 4, 3, shapes, dtype, "ABCD"), DALI_ERROR_INVALID_ARGUMENT); + shapes[0] = -1; + EXPECT_EQ(daliTensorListResize(tl, 4, 3, shapes, dtype, "HWC"), DALI_ERROR_INVALID_ARGUMENT); + shapes[0] = 480; + EXPECT_EQ(daliTensorListResize(tl, 1, 3, shapes, dtype, "HWC"), DALI_SUCCESS); + // resize, but keep the layout + EXPECT_EQ(daliTensorListResize(tl, 4, 3, shapes, dtype, nullptr), DALI_SUCCESS); + + size_t element_size = dali::TypeTable::GetTypeInfo(dtype).size(); + + ptrdiff_t offset = 0; + const char *base; + for (int i = 0; i < 4; i++) { + daliTensorDesc_t desc{}; + EXPECT_EQ(daliTensorListGetTensorDesc(tl, &desc, i), DALI_SUCCESS); + ASSERT_EQ(desc.ndim, 3); + ASSERT_NE(desc.data, nullptr); + if (i == 0) + base = static_cast(desc.data); + EXPECT_EQ(desc.data, base + offset); + EXPECT_EQ(desc.dtype, dtype); + for (int j = 0; j < 3; j++) + EXPECT_EQ(desc.shape[j], shapes[3 * i + j]); + size_t sample_bytes = volume(dali::make_cspan(desc.shape, desc.ndim)) * element_size; + if (storage_device == DALI_STORAGE_GPU) { + // Check that the data is accessible for the GPU + EXPECT_EQ(cudaMemset(desc.data, 0, sample_bytes), cudaSuccess); + } else { + // Check that the data is accessible for the CPU + memset(desc.data, 0, sample_bytes); // just not crashing is OK + } + offset += sample_bytes; + } + if (storage_device == DALI_STORAGE_GPU) { + EXPECT_EQ(cudaDeviceSynchronize(), cudaSuccess); + } +} + +struct TestDeleterCtx { + void *expected_data; + int buffer_delete_count; + int context_delete_count; +}; + +template +inline std::pair> +MakeTestDeleter(element_t *expected_data) { + auto ctx = std::unique_ptr(new TestDeleterCtx{ expected_data, 0, 0 }); + daliDeleter_t deleter = {}; + deleter.deleter_ctx = ctx.get(); + deleter.delete_buffer = [](void *vctx, void *data, const cudaStream_t *stream) { + ASSERT_NE(data, nullptr); + auto *ctx = static_cast(vctx); + EXPECT_EQ(ctx->context_delete_count, 0); + EXPECT_EQ(ctx->buffer_delete_count, 0); + EXPECT_EQ(data, ctx->expected_data); + ctx->buffer_delete_count++; + delete [] static_cast(data); + }; + deleter.destroy_context = [](void *vctx) { + auto *ctx = static_cast(vctx); + EXPECT_EQ(ctx->context_delete_count, 0); + EXPECT_EQ(ctx->buffer_delete_count, 1); + ctx->context_delete_count++; + }; + return { deleter, std::move(ctx) }; +} + +TEST(CAPI2_TensorListTest, AttachBuffer) { + daliBufferPlacement_t placement{}; + placement.device_type = DALI_STORAGE_CPU; + using element_t = int; + daliDataType_t dtype = dali::type2id::value; + dali::TensorListShape<> lshape({ + { 480, 640, 3 }, + { 600, 800, 4 }, + { 348, 720, 1 }, + { 1080, 1920, 3 } + }); + auto size = lshape.num_elements(); + std::unique_ptr data(new element_t[size]); + + ptrdiff_t offsets[4] = {}; + for (int i = 1; i < 4; i++) + offsets[i] = offsets[i - 1] + volume(lshape[i - 1]) * sizeof(element_t); + + auto [deleter, ctx] = MakeTestDeleter(data.get()); + + auto tl = CreateTensorList(placement); + ASSERT_EQ(daliTensorListAttachBuffer( + tl, + lshape.num_samples(), + lshape.sample_dim(), + lshape.data(), + dtype, + "HWC", + data.get(), + offsets, + deleter), DALI_SUCCESS); + + void *data_ptr = data.release(); // the buffer is now owned by the tensor list + + ptrdiff_t offset = 0; + const char *base = static_cast(data_ptr); + for (int i = 0; i < 4; i++) { + daliTensorDesc_t desc{}; + EXPECT_EQ(daliTensorListGetTensorDesc(tl, &desc, i), DALI_SUCCESS); + ASSERT_EQ(desc.ndim, 3); + ASSERT_NE(desc.data, nullptr); + EXPECT_EQ(desc.data, base + offset); + EXPECT_EQ(desc.dtype, dtype); + for (int j = 0; j < 3; j++) + EXPECT_EQ(desc.shape[j], lshape[i][j]); + size_t sample_bytes = volume(dali::make_cspan(desc.shape, desc.ndim)) * sizeof(element_t); + offset += sample_bytes; + } + + tl.reset(); + + EXPECT_EQ(ctx->buffer_delete_count, 1) << "Buffer deleter not called"; + EXPECT_EQ(ctx->context_delete_count, 1) << "Deleter context not destroyed"; +} + + +TEST(CAPI2_TensorListTest, ViewAsTensor) { + daliBufferPlacement_t placement{}; + placement.device_type = DALI_STORAGE_CPU; + using element_t = int; + daliDataType_t dtype = dali::type2id::value; + dali::TensorListShape<> lshape = dali::uniform_list_shape(4, { 480, 640, 3 }); + auto size = lshape.num_elements(); + std::unique_ptr data(new element_t[size]); + + ptrdiff_t sample_size = volume(lshape[0]) * sizeof(element_t); + + ptrdiff_t offsets[4] = { + 0, + 1 * sample_size, + 2 * sample_size, + 3 * sample_size, + }; + + auto [deleter, ctx] = MakeTestDeleter(data.get()); + + auto tl = CreateTensorList(placement); + ASSERT_EQ(daliTensorListAttachBuffer( + tl, + lshape.num_samples(), + lshape.sample_dim(), + lshape.data(), + dtype, + "HWC", + data.get(), + offsets, + deleter), DALI_SUCCESS); + + void *data_ptr = data.release(); // the buffer is now owned by the tensor list + + daliTensor_h ht = nullptr; + EXPECT_EQ(daliTensorListViewAsTensor(tl, &ht), DALI_SUCCESS); + ASSERT_NE(ht, nullptr); + dali::c_api::TensorHandle t(ht); + + daliTensorDesc_t desc{}; + EXPECT_EQ(daliTensorGetDesc(t, &desc), DALI_SUCCESS); + EXPECT_EQ(desc.data, data_ptr); + EXPECT_EQ(desc.shape[0], lshape.num_samples()); + ASSERT_EQ(desc.ndim, 4); + ASSERT_NE(desc.shape, nullptr); + EXPECT_EQ(desc.shape[1], lshape[0][0]); + EXPECT_EQ(desc.shape[2], lshape[0][1]); + EXPECT_EQ(desc.shape[3], lshape[0][2]); + EXPECT_STREQ(desc.layout, "NHWC"); + EXPECT_EQ(desc.dtype, dtype); + + tl.reset(); + + EXPECT_EQ(ctx->buffer_delete_count, 0) << "Buffer prematurely destroyed"; + EXPECT_EQ(ctx->context_delete_count, 0) << "Deleter context prematurely destroyed"; + + t.reset(); + + EXPECT_EQ(ctx->buffer_delete_count, 1) << "Buffer deleter not called"; + EXPECT_EQ(ctx->context_delete_count, 1) << "Deleter context not destroyed"; +} + + +TEST(CAPI2_TensorListTest, ViewAsTensorError) { + daliBufferPlacement_t placement{}; + placement.device_type = DALI_STORAGE_CPU; + using element_t = int; + daliDataType_t dtype = dali::type2id::value; + dali::TensorListShape<> lshape = dali::uniform_list_shape(4, { 480, 640, 3 }); + auto size = lshape.num_elements(); + std::unique_ptr data(new element_t[size]); + + ptrdiff_t sample_size = volume(lshape[0]) * sizeof(element_t); + + // The samples are not in order + ptrdiff_t offsets[4] = { + 0, + 2 * sample_size, + 1 * sample_size, + 3 * sample_size, + }; + + auto [deleter, ctx] = MakeTestDeleter(data.get()); + + auto tl = CreateTensorList(placement); + ASSERT_EQ(daliTensorListAttachBuffer( + tl, + lshape.num_samples(), + lshape.sample_dim(), + lshape.data(), + dtype, + "HWC", + data.get(), + offsets, + deleter), DALI_SUCCESS); + + void *data_ptr = data.release(); // the buffer is now owned by the tensor list + + daliTensor_h ht = nullptr; + EXPECT_EQ(daliTensorListViewAsTensor(tl, &ht), DALI_ERROR_INVALID_OPERATION); +} + + +TEST(CAPI2_TensorListTest, ResizeCPU) { + TestTensorListResize(DALI_STORAGE_CPU); +} + +TEST(CAPI2_TensorListTest, ResizeGPU) { + TestTensorListResize(DALI_STORAGE_GPU); +} + + + + +TEST(CAPI2_TensorTest, NullHandle) { + daliTensor_h h = nullptr; + int ref = 0; + EXPECT_EQ(DALI_ERROR_INVALID_HANDLE, daliTensorIncRef(h, &ref)); + EXPECT_EQ(DALI_ERROR_INVALID_HANDLE, daliTensorDecRef(h, &ref)); + EXPECT_EQ(DALI_ERROR_INVALID_HANDLE, daliTensorRefCount(h, &ref)); +} + +TEST(CAPI2_TensorTest, CreateDestroy) { + daliBufferPlacement_t placement{}; + placement.device_type = DALI_STORAGE_CPU; + placement.pinned = false; + daliTensor_h h = nullptr; + daliResult_t r = daliTensorCreate(&h, placement); + ASSERT_NE(h, nullptr); + dali::c_api::TensorHandle tl(h); + ASSERT_EQ(h, tl.get()); + ASSERT_EQ(r, DALI_SUCCESS); + + int ref = -1; + EXPECT_EQ(daliTensorRefCount(h, &ref), DALI_SUCCESS); + EXPECT_EQ(ref, 1); + ref = -1; + + h = tl.release(); + EXPECT_EQ(daliTensorDecRef(h, &ref), DALI_SUCCESS); + EXPECT_EQ(ref, 0); +} + + +inline auto CreateTensor(daliBufferPlacement_t placement) { + daliTensor_h handle; + auto err = daliTensorCreate(&handle, placement); + switch (err) { + case DALI_SUCCESS: + break; + case DALI_ERROR_OUT_OF_MEMORY: + throw std::bad_alloc(); + case DALI_ERROR_INVALID_ARGUMENT: + throw std::invalid_argument(daliGetLastErrorMessage()); + default: + throw std::runtime_error(daliGetLastErrorMessage()); + } + return dali::c_api::TensorHandle(handle); +} + +void TestTensorResize(daliStorageDevice_t storage_device) { + daliBufferPlacement_t placement{}; + placement.device_type = storage_device; + auto t = CreateTensor(placement); + int64_t shape[] = { + 1080, 1920, 3 + }; + daliDataType_t dtype = DALI_INT16; + + EXPECT_EQ(daliTensorResize(t, 3, nullptr, dtype, nullptr), DALI_ERROR_INVALID_ARGUMENT); + EXPECT_EQ(daliTensorResize(t, -1, shape, dtype, nullptr), DALI_ERROR_INVALID_ARGUMENT); + EXPECT_EQ(daliTensorResize(t, 3, shape, dtype, "ABCD"), DALI_ERROR_INVALID_ARGUMENT); + shape[0] = -1; + EXPECT_EQ(daliTensorResize(t, 3, shape, dtype, "HWC"), DALI_ERROR_INVALID_ARGUMENT); + shape[0] = 1; + EXPECT_EQ(daliTensorResize(t, 3, shape, dtype, "HWC"), DALI_SUCCESS); + + shape[0] = 1080; + EXPECT_EQ(daliTensorResize(t, 3, shape, dtype, nullptr), DALI_SUCCESS); + + size_t element_size = dali::TypeTable::GetTypeInfo(dtype).size(); + + ptrdiff_t offset = 0; + daliTensorDesc_t desc{}; + EXPECT_EQ(daliTensorGetDesc(t, &desc), DALI_SUCCESS); + ASSERT_EQ(desc.ndim, 3); + ASSERT_NE(desc.data, nullptr); + EXPECT_STREQ(desc.layout, "HWC"); + EXPECT_EQ(desc.dtype, dtype); + for (int j = 0; j < 3; j++) + EXPECT_EQ(desc.shape[j], shape[j]); + size_t sample_bytes = volume(dali::make_cspan(desc.shape, desc.ndim)) * element_size; + if (storage_device == DALI_STORAGE_GPU) { + // Check that the data is accessible for the GPU + EXPECT_EQ(cudaMemset(desc.data, 0, sample_bytes), cudaSuccess); + } else { + // Check that the data is accessible for the CPU + memset(desc.data, 0, sample_bytes); // just not crashing is OK + } + if (storage_device == DALI_STORAGE_GPU) { + EXPECT_EQ(cudaDeviceSynchronize(), cudaSuccess); + } +} + +TEST(CAPI2_TensorTest, ResizeCPU) { + TestTensorResize(DALI_STORAGE_CPU); +} + +TEST(CAPI2_TensorTest, ResizeGPU) { + TestTensorResize(DALI_STORAGE_GPU); +}