From 3b841389ef536cf3ee3d8dc80e2b7afd68fd6878 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Zientkiewicz?= <mzient@gmail.com>
Date: Fri, 31 Jan 2025 17:37:07 +0100
Subject: [PATCH 1/5] Add C API header and C language build test.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Michał Zientkiewicz <mzient@gmail.com>
---
 dali/core/c_api_language_test.c |   16 +
 include/dali/dali.h             | 1063 +++++++++++++++++++++++++++++++
 2 files changed, 1079 insertions(+)
 create mode 100644 dali/core/c_api_language_test.c
 create mode 100644 include/dali/dali.h

diff --git a/dali/core/c_api_language_test.c b/dali/core/c_api_language_test.c
new file mode 100644
index 00000000000..a3526353a18
--- /dev/null
+++ b/dali/core/c_api_language_test.c
@@ -0,0 +1,16 @@
+//   Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#define DALI_ALLOW_NEW_C_API
+#include "dali/dali.h"
diff --git a/include/dali/dali.h b/include/dali/dali.h
new file mode 100644
index 00000000000..5b174998429
--- /dev/null
+++ b/include/dali/dali.h
@@ -0,0 +1,1063 @@
+// Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef DALI_DALI_H_
+#define DALI_DALI_H_
+
+#ifdef DALI_C_API_H_
+#error The new DALI C API is incompatible with the old one. Please do not include both headers in one translation unit.  // NOLINT
+#endif
+
+#ifndef DALI_ALLOW_NEW_C_API
+#error The new DALI C API is work in progress and incomplete.
+#endif
+
+#if (defined(__cplusplus) && __cplusplus < 201402L) || \
+    (!defined(__cplusplus) && __STDC_VERSION__ < 199901L)
+#error The DALI C API requires a C99 or a C++14 compiler.
+#endif
+
+#include <cuda_runtime_api.h>
+#include <stdint.h>
+#include "dali/core/api_helper.h"
+#include "dali/core/dali_data_type.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define DALI_API DLL_PUBLIC
+
+typedef struct _DALIPipeline *daliPipeline_h;
+typedef struct _DALIPipelineOutputs *daliPipelineOutputs_h;
+typedef struct _DALITensor *daliTensor_h;
+typedef struct _DALITensorList *daliTensorList_h;
+
+typedef enum {
+  DALI_STORAGE_CPU = 0,
+  DALI_STORAGE_GPU = 1,
+  DALI_STORAGE_FORCE_INT32 = 0x7fffffff
+} daliStorageDevice_t;
+
+/** Error codes returned by DALI functions */
+typedef enum {
+  /** The call succeeded */
+  DALI_SUCCESS = 0,
+  /** The call succeeded, but didn't return a value */
+  DALI_NO_DATA = 1,
+  /** The call succeeded, but the queried object is not ready */
+  DALI_NOT_READY = 2,
+
+  DALI_ERROR = (int32_t)0x80000000,  // NOLINT
+  /** The handle is not valid. */
+  DALI_ERROR_INVALID_HANDLE,
+  /** The argument is invalid. Check error message for details. */
+  DALI_ERROR_INVALID_ARGUMENT,
+  /** An invalid type was specified. */
+  DALI_ERROR_INVALID_TYPE,
+  /** A generaic user error */
+  DALI_ERROR_INVALID_OPERATION,
+  /** The index is out of valid range */
+  DALI_ERROR_OUT_OF_RANGE,
+  /** The key is not found (when getting) or is not a valid key (when setting) */
+  DALI_ERROR_INVALID_KEY,
+
+  /** An operating system routine failed. */
+  DALI_ERROR_SYSTEM,
+  /** A path to a file or other OS resource is invalid */
+  DALI_ERROR_PATH_NOT_FOUND,
+  /** An I/O operation failed */
+  DALI_ERROR_IO_ERROR,
+  /** An operation timed out */
+  DALI_ERROR_TIMEOUT,
+
+  /** A memory allocation failed */
+  DALI_ERROR_OUT_OF_MEMORY = DALI_ERROR + 0x100,
+
+  /** Internal error - logic error in DALI code */
+  DALI_ERROR_INTERNAL = DALI_ERROR + 0x200,
+  /** The library is shutting down or has shut down */
+  DALI_ERROR_UNLOADING,
+
+  /** A CUDA API call has failed */
+  DALI_ERROR_CUDA_ERROR = DALI_ERROR + 0x10000,
+
+  DALI_ERROR_FORCE_INT32 = 0x7fffffff
+} daliResult_t;
+
+/** A custom deleter
+ *
+ * This object aggregates a custom memory deleter, a context and a destructor.
+ *
+ * NOTE: This structure is typically passed by value for convenience.
+ */
+typedef struct _DALIDeleter {
+  /** A custom user-provided context.
+   *
+   * If the deleter is an object, then `deleter_ctx` is its `this` pointer.
+   * Stateless deleters may set it to NULL.
+   */
+  void *deleter_ctx;
+
+  /** Destroys the user-provided context.
+   *
+   * This function is called by DALI when the deleter is no longer necessary.
+   * The call is omitted if either `deleter_ctx` or `destroy_context` is NULL.
+   *
+   * @param deleter_ctx a custom user-provided context for the deleter
+   */
+  void (*destroy_context)(void *deleter_ctx);
+
+  /** Deletes a memory buffer `data`.
+   *
+   * @param deleter_ctx a custom user-provided context for the deleter
+   * @param data        the buffer to delete
+   * @param stream      If present, the deletion must be ordered after all operations
+   *                    scheduled in *stream; the deleter may either use stream-ordered deletion
+   *                    or otherwise ensure that the memory is valid until all operations scheduled
+   *                    on *stream prior to the call are complete.
+   *                    No operations in any stream scheduled after this call may use `data`.
+   */
+  void (*delete_buffer)(void *deleter_ctx, void *data, const cudaStream_t *stream);
+} daliDeleter_t;
+
+/** Returns the last error code.
+ *
+ * Returns the error code associate with the recent unsuccessful call in the calling thread.
+ * Succesful calls do not overwrite the value.
+ */
+DALI_API daliResult_t daliGetLastError();
+
+/** Returns the last error message.
+ *
+ * Returns the detailed, context-specific message associated with the recent unsuccessful call
+ * in the callling thread.
+ * Succesful calls do not overwrite the value.
+ * The pointer is invalidated by intervening DALI calls in the same thread.
+ */
+DALI_API const char *daliGetLastErrorMessage();
+
+/** Clears the last error for the calling thread. */
+DALI_API void daliClearLastError();
+
+/** Returns a human-readable name of a given error
+ *
+ * The value is a pointer to a string literal. It's not invalidated other than by unloading DALI.
+ */
+DALI_API const char *daliGetErrorName(daliResult_t error);
+
+/** Returns a human-readable description of a given error.
+ *
+ * The value is a pointer to a string literal. It's not invalidated other than by unloading DALI.
+ */
+DALI_API const char *daliGetErrorDescription(daliResult_t error);
+
+
+/** Initializes DALI or increments initialization count.
+ *
+ * @remark If this function is not called, DALI will be initialized implicitly on the first
+ *         call to DALI APIs. When using implicit initialization, `daliShutdown` should not be used.
+ */
+DALI_API daliResult_t daliInit();
+
+/** Decrements initialization counts and shuts down the library when the count reaches 0.
+ *
+ * Calling this function is optional. DALI will be shut down automatically when the program exits.
+ */
+DALI_API daliResult_t daliShutdown();
+
+DALI_API daliResult_t daliPreallocateDeviceMemory2(size_t bytes, int device_id);
+
+/** Allocates `bytes` bytes of device memory on device `device_id`.
+ *
+ * The function works by allocating and immediately freeing the specified amount of device
+ * memory. This will typically release the memory back to DALI's memory pool, speeding up
+ * subsequent allocations.
+ */
+inline daliResult_t daliPreallocateDeviceMemory(size_t bytes, int device_id) {
+  return daliPreallocateDeviceMemory2(bytes, device_id);
+}
+
+DALI_API daliResult_t daliPreallocatePinnedMemory2(size_t bytes);
+
+/** Allocates `bytes` bytes of device-accessible host memory.
+ *
+ * The function works by allocating and immediately freeing the specified amount of pinned
+ * memory. This will typically release the memory back to DALI's memory pool, speeding up
+ * subsequent allocations.
+ */
+inline daliResult_t daliPreallocatePinnedMemory(size_t bytes) {
+  return daliPreallocatePinnedMemory2(bytes);
+}
+
+DALI_API daliResult_t daliReleaseUnusedMemory2();
+
+/** Releases unused memory from DALI memory pools to the operating system.
+ *
+ * NOTE: Some of the memory pool implementations allocate memory from the OS in large chunks.
+ *       If the chunk is occupied by even a tiny allocation, it will not be freed by this function.
+ */
+inline daliResult_t daliReleaseUnusedMemory() {
+  return daliReleaseUnusedMemory2();
+}
+
+
+/****************************************************************************/
+/*** Pipeline API ***********************************************************/
+/****************************************************************************/
+
+typedef enum _DALIExecType {
+  /** The exeuctor processes data ahead, overlapping CPU/Mixed/GPU operators */
+  DALI_EXEC_IS_PIPELINED    = 1,
+  /** The executor operates in thread(s) other than the one that calls the pipeline Run */
+  DALI_EXEC_IS_ASYNC        = 2,
+  /** Deprecated: The executor uses separate CPU/GPU queues */
+  DALI_EXEC_IS_SEPARATED    = 4,
+  /** Use dynamic executor, with unrestricted operator order and aggressive memory reuse */
+  DALI_EXEC_IS_DYNAMIC      = 8,
+
+  /** Use a synchronous, non-pipelined executor; useful for debugging. */
+  DALI_EXEC_SIMPLE          = 0,
+  /** Use an asynchronous pipelined executor, the default one. */
+  DALI_EXEC_ASYNC_PIPELINED = DALI_EXEC_IS_PIPELINED | DALI_EXEC_IS_ASYNC,
+  /** Use the dynamic executor.
+   *
+   * The dynamic executor offers more flexibility, better memory efficiency and unrestricted
+   * lifetime of the pipeline outputs at the expense of more overhead in simple pipelines. */
+  DALI_EXEC_DYNAMIC         = DALI_EXEC_ASYNC_PIPELINED | DALI_EXEC_IS_DYNAMIC,
+} daliExecType_t;
+
+typedef struct _DALIVersion {
+  int16_t major, minor;
+  int32_t patch;
+} daliVersion_t;
+
+
+/** DALI Pipeline construction parameters */
+typedef struct _DALIPipelineParams {
+  /** The version of this structure */
+  daliVersion_t version;
+
+  struct {
+    uint64_t max_batch_size_present : 1;
+    uint64_t num_threads_present    : 1;
+    uint64_t device_id_present      : 1;
+    uint64_t seed_present           : 1;
+    uint64_t exec_flags_present     : 1;
+    uint64_t exec_type_present      : 1;
+    uint64_t enable_checkpointing_present : 1;
+    uint64_t enable_memory_stats_present  : 1;
+  };
+  int batch_size;
+  int num_threads;
+  int device_id;
+  int64_t seed;
+  daliExecType_t exec_type;
+  daliBool enable_checkpointing;
+  daliBool enable_memory_stats;
+} daliPipelineParams_t;
+
+/** Describes an output of a DALI Pipeline */
+typedef struct _DALIPipelineOutputDesc {
+  const char *name;
+  daliStorageDevice_t device;
+  struct {
+    unsigned dtype_present : 1;
+    unsigned ndim_present : 1;
+  };
+  daliDataType_t dtype;
+  int ndim;
+} daliPipelineOutputDesc_t;
+
+/** Creates an empty pipeline. */
+DALI_API daliResult_t daliPipelineCreate(
+  daliPipeline_h *out_pipe_handle,
+  const daliPipelineParams_t *params);
+
+/** Destroys a DALI pipeline. */
+DALI_API daliResult_t daliPipelineDestroy(daliPipeline_h pipeline);
+
+/** Creates a DALI pipeline from a serialized one.
+ *
+ * This function creates and deserializes a pipeline. The parameters are used to override
+ * the serialized ones.
+ *
+ * @param out_pipe_handle [out] points to a value which will receive the handle to the newly
+ *                              created pipeline
+ * @param serialized_pipeline [in] a raw memory buffer containing the pipeline as protobuf
+ * @param serialized_pipeline_length the length, in bytes, of the `serialized_pipeline` buffer
+ * @param param_overrides [in] contains (partial) pipeline construction parameters;
+ *                             the parameters specified in this structure override the corresponding
+ *                             parameters deserialized from the buffer.
+ */
+DALI_API daliResult_t daliPipelineDeserialize(
+  daliPipeline_h *out_pipe_handle,
+  const void *serialized_pipeline,
+  size_t serialized_pipeline_size,
+  const daliPipelineParams_t *param_overrides);
+
+
+/** Prepares the pipeline for execution */
+DALI_API daliResult_t daliPipelineBuild(daliPipeline_h pipeline);
+
+/** Runs the pipeline to fill the queues.
+ *
+ * DALI Pipeline can process several iterations ahead. This function pre-fills the queues.
+ * If the pipeline has ExternalSource operators (or other external inputs), they need to be
+ * supplied with enough data.
+ *
+ * @see daliPipelineFeedInput
+ * @see daliPipelineGetInputFeedCount
+ *
+ * @retval DALI_SUCCESS
+ * @retval DALI_ERROR_INVALID_OPERATION
+ * @retval DALI_ERROR_OUT_OF_MEMORY
+ *
+ */
+DALI_API daliResult_t daliPipelinePrefetch(daliPipeline_h pipeline);
+
+/** Schedules one iteration.
+ *
+ * If the executor doesn't have DALI_EXEC_IS_ASYNC flag, the function will block until the
+ * operation is complete on host.
+ *
+ * NOTE: The relevant device code may still be running after this function returns.
+ *
+ * @retval DALI_SUCCESS
+ * @retval DALI_ERROR_INVALID_OPERATION
+ * @retval DALI_ERROR_OUT_OF_MEMORY
+ */
+DALI_API daliResult_t daliPipelineRun(daliPipeline_h pipeline);
+
+/** Gets the required feed count for the specified input of the pipeline.
+ *
+ * @param pipeline        [in]  The pipeline
+ * @param out_feed_count  [out] The number of batches to feed into the specified input before
+ *                              `daliPipelinePrefetch` can be called.
+ * @param input_name      [in]  The name of the input.
+ *
+ * @retval DALI_SUCCESS
+ * @retval DALI_ERROR_INVALID_KEY         if `input_name` is not a valid name of an input of the
+ *                                        pipeline
+ */
+DALI_API daliResult_t daliPipelineGetFeedCount(
+  daliPipeline_h pipeline,
+  int *out_feed_count,
+  const char *input_name);
+
+typedef enum _DALIFeedInputFlags {
+  /** Do not make a copy of the input, use it directly instead.
+   *
+   * When daliTensorList_h is passed to daliFeedInput, a reference count is incremented
+   */
+  DALI_FEED_INPUT_NO_COPY = 1,
+} daliFeedInputFlags_t;
+
+/** Feeds the input `input_name` with data from `input_data`.
+ *
+ * @param pipeline      the pipeline
+ * @param input_name    the name of the input
+ * @param input_data    the tensor list containing the data
+ * @param data_id       an identifier of this data batch
+ * @param options
+ *
+ * @retval DALI_SUCCESS
+ * @retval DALI_ERROR_INVALID_KEY         if `input_name` is not a valid name of an input of the
+ *                                        pipeline*/
+DALI_API daliResult_t daliPipelineFeedInput(
+  daliPipeline_h pipeline,
+  const char *input_name,
+  daliTensorList_h input_data,
+  const char *data_id,
+  daliFeedInputFlags_t options,
+  const cudaStream_t *stream);
+
+/** Gets the number of pipeline outputs.
+ *
+ * @param pipeline  [in]  The pipeline
+ * @param out_count [out] A pointer to a place where the number of pipeline outputs is stored.
+ */
+DALI_API daliResult_t daliPipelineGetOutputCount(daliPipeline_h pipeline, int *out_count);
+
+/** Gets a descriptor of the specified pipeline output.
+ *
+ * @param pipeline  [in]  The pipeline
+ * @param out_desc  [out] A pointer to the returned descriptor.
+ * @param index     [in]  The 0-based index of the output. See `daliPipelineGetOutputCount`.
+ *
+ * NOTE: The names returned by this function match those specified when defining the pipeline,
+ *       but don't necessarily indicate the output operators. When building the pipeline,
+ *       operators may be added (e.g. to guarantee dense storage of the outputs) or removed
+ *       (in the process of graph optimization).
+ */
+DALI_API daliResult_t daliPipelineGetOutputDesc(
+  daliPipeline_h pipeline,
+  daliPipelineOutputDesc_t *out_desc,
+  int index);
+
+
+/** Pops the pipeline outputs from the pipeline's output queue.
+ *
+ * The outputs are ready for use on any stream.
+ * When no longer used, the outputs must be freed by destroying the `daliPipelineOutput` object.
+ *
+ * @param pipeline [in]  The pipeline whose outputs are to be obtained
+ * @param out      [out] A pointer to the output handle. The handle is NULL if the function
+ *                       reports an error.
+ *
+ * @return This function will report errors that occurred asynchronously when preparing the
+ *         relevant data batch. If an error is reported, the output handle is NULL.
+ *
+ */
+DALI_API daliResult_t daliPipelinePopOutputs(daliPipeline_h pipeline, daliPipelineOutputs_h *out);
+
+/** Pops the pipeline outputs from the pipeline's output queue.
+ *
+ * The outputs are ready for use on the provided stream.
+ * When no longer used, the outputs must be freed by destroying the daliPipelineOutput object.
+ *
+ * This function works only with DALI_EXEC_IS_DYNAMIC.
+ *
+ * @param pipeline [in]  The pipeline whose outputs are to be obtained
+ * @param out      [out] A pointer to the output handle. The handle is NULL if the function
+ *                       reports an error.
+ *
+ * @return This function will report errors that occurred asynchronously when preparing the
+ *         relevant data batch. If an error is reported, the output handle is NULL.
+ */
+DALI_API daliResult_t daliPipelinePopOutputsAsync(
+  daliPipeline_h pipeline,
+  daliPipelineOutputs_h *out,
+  cudaStream_t stream);
+
+/** Releases the pipeline outputs.
+ *
+ * @param pipeline [in]  The pipeline outputs which are being released.
+ *
+ * This function destroys the daliPipelineOutputObject. The availability of the outputs differs
+ * between different executors.
+ * If DALI_EXEC_IS_DYNAMIC is used, the outputs may be used until their handles are destroyed.
+ * Otherwise, the outputs must not be used after this call has been made.
+ *
+ * @warning When NOT using DALI_EXEC_IS_DYNAMIC, the maximum number of live daliPipelineOutputs_h
+ *          obtained from a single pipeline must not exceed the prefetch_queue_depth. An attempt
+ *          to run the pipeline again after the
+ */
+DALI_API daliResult_t daliPipelineOutputsDestroy(daliPipelineOutputs_h out);
+
+typedef struct _DALIOperatorTrace {
+  const char *operator_name;
+  const char *trace;
+  const char *value;
+} daliOperatorTrace_t;
+
+/** Gets all operator "traces" that were set when producing this set of outputs.
+ *
+ * @param outputs         [in]  The outputs
+ * @param out_traces      [out] A return value pointer where, the a pointer to the beginning of an
+ *                              array of operator traces is stored.
+ * @param out_trace_count [out] A pointer that receives the number of traces.
+ *
+ * The output array is valid until the `outputs` handle is destroyed.
+ */
+DALI_API daliResult_t daliPipelineOutputsGetTraces(
+  daliPipelineOutputs_h outputs,
+  const daliOperatorTrace_t **out_traces,
+  int *out_trace_count);
+
+/** Gets a single operator "trace", identified by operator instance name and a trace name.
+ *
+ * @param outputs       [in]  The outputs
+ * @param out_trace     [out] A pointer which receives a ppointer to the trace.
+ * @param operator_name [in]  The name of the operator whose trace is being obtained.
+ * @param trace_name    [in]  The name of the trace.
+ *
+ * @retval DALI_SUCCESS           On success
+ * @retval DALI_ERROR_INVALID_KEY When there's no trace that matches the names
+ */
+DALI_API daliResult_t daliPipelineOutputsGetTrace(
+  daliPipelineOutputs_h outputs,
+  const char **out_trace,
+  const char *operator_name,
+  const char *trace_name);
+
+/** Gets index-th output.
+ *
+ * The handle returned by this function must be released with a call to daliTensorListDecRef.
+ *
+ * Unless the pipeline uses DALI_EXEC_IS_DYNAMIC flag, the returned tensor list must not be used
+ * after the `outputs` handle is destroyed.
+ *
+ * @param outputs [in]  The pipeline outputs object
+ * @param out     [out] A pointer to a TensorList handle
+ * @param index   [in]  The index of the output to get a handle to.
+ */
+DALI_API daliResult_t daliPipelineOutputsGet(
+  daliPipelineOutputs_h outputs,
+  daliTensorList_h *out,
+  int index);
+
+/****************************************************************************/
+/*** Tensor and TensorList API **********************************************/
+/****************************************************************************/
+
+typedef struct _DALITensorDesc {
+  /** The number of dimensions of the tensor.
+   *
+   * 0 denotes a scalar value. Negative values are invalid.
+   */
+  int             ndim;
+
+  /** The shape of the tensor.
+   *
+   * The shape starts with the "slowest" dimension - a row-major 640x480 interleaved RGB image
+   * would have the shape [480, 640, 3].
+   *
+   * The shape can be NULL if ndim == 0
+   */
+  const int64_t  *shape;
+
+  /** The type of the elements of the tensor */
+  daliDataType_t  dtype;
+
+  /** The layout string of the tensor.
+   *
+   * A layout string consists of exactly `ndim` single-character axis labels. The entries in layout
+   * correspond to the dimension in the shape. A row-major interleaved image
+   * would have a layout "HWC"
+   */
+  const char     *layout;
+
+  /** A pointer to the first element in the tensor.
+   *
+   * The data pointer can be NULL if the total volume of the tensor is 0.
+   * It must not be NULL if ndim == 0.
+   */
+  void           *data;
+} daliTensorDesc_t;
+
+/** The specification of the buffer storage location */
+typedef struct _DALIBufferPlacement {
+  /** The type of the storage device (CPU or GPU). */
+  daliStorageDevice_t device_type;
+
+  /** CUDA device ordinal, as returned by CUDA runtime API.
+   *
+   * The value of this field is meaningful only if `device_type` is GPU or `pinned` is `true`.
+   *
+   * WARNING: The device_id returned by NVML (and thus, nvidia-smi) may be different.
+   */
+  int                 device_id;
+
+  /** Whether the CPU storage is "pinned" - e.g. allocated with cudaMallocHost */
+  daliBool            pinned;
+} daliBufferPlacement_t;
+
+/****************************************************************************/
+/*** TensorList *************************************************************/
+/****************************************************************************/
+
+/** Creates a TensorList on the specified device */
+DALI_API daliResult_t daliTensorListCreate(
+  daliTensorList_h *out,
+  daliBufferPlacement_t placement);
+
+/** Changes the size of the tensor, allocating more data if necessary.
+ *
+ * @param num_samples   the number of samples in the batch
+ * @param ndim          the number of dimensions of a sample
+ * @param shapes        the concatenated shapes of the samples;
+ *                      must contain num_samples*ndim extents
+ * @param dtype         the element type
+ * @param layout        a layout string describing the order of axes in each sample (e.g. HWC),
+ *                      if NULL, and the TensorList's number of dimensions is equal to `ndim,
+ *                      then the current layout is kept;
+ *                      if `layout` is an empty string, the tensor list's layout is cleared *
+ */
+DALI_API daliResult_t daliTensorListResize(
+  daliTensorList_h tensor_list,
+  int num_samples,
+  int ndim,
+  const int64_t *shapes,
+  daliDataType_t dtype,
+  const char *layout);
+
+/** Attaches an externally allocated buffer to a TensorList.
+ *
+ * Attaches an externally allocated buffer and a deleter to a TensorList.
+ * The deleter is called when the TensorList object is destroyed.
+ *
+ * The shape and sample offsets are used only during this function call and may be safely
+ * disposed of after the function returns.
+ *
+ * @param tensor_list     the TensorList to attach the data to
+ * @param num_samples     the number of samples in the list
+ * @param ndim            the number of dimensions in the sample
+ * @param shapes          the concatenated shapes of the samples;
+ *                        must contain num_samples*ndim extents
+ * @param dtype           the element type
+ * @param layout          a layout string describing the order of axes in each sample (e.g. HWC),
+ *                        if NULL, and the TensorList's number of dimensions is equal to `ndim,
+ *                        then the current layout is kept;
+ *                        if `layout` is an empty string, the tensor list's layout is cleared
+ * @param data            the pointer to the data buffer
+ * @param sample_offsets  optional; the offsets, in bytes, of the samples in the batch from the
+ *                        base pointer `data`; if NULL, the samples are assumed to be densely
+ *                        packed, with the 0-th sample starting at the address `data`.
+ * @param deleter         an optional deleter called when the buffer reference count goes to zero
+ */
+DALI_API daliResult_t daliTensorListAttachBuffer(
+  daliTensorList_h tensor_list,
+  int num_samples,
+  int ndim,
+  const int64_t *shapes,
+  daliDataType_t dtype,
+  const char *layout,
+  void *data,
+  const ptrdiff_t *sample_offsets,
+  daliDeleter_t deleter);
+
+/** Attaches externally allocated tensors to a TensorList.
+ *
+ * Attaches externally allocated buffers to a TensorList.
+ * If provided, the deleters are called on all buffers when the samples are destroyed.
+ *
+ * The shape and sample offsets are used only during this function call and may be safely
+ * disposed of after the function returns.
+ *
+ * @param tensor_list     the TensorList to attach the data to
+ * @param num_samples     the new number of samples in the batch
+ * @param ndim            the number of dimensions in each sample;
+ *                        if num_samples > 0, this value can be set to -1 and the number of
+ *                        dimensions will be taken from samples[0].ndim
+ * @param dtype           the type of the element of the tensor;
+ *                        if dtype is DALI_NO_TYPE, then the type is taken from samples[0].dtype
+ * @param layout          a layout string describing the order of axes in each sample (e.g. HWC),
+ *                        if NULL, and the TensorList's number of dimensions is equal to `ndim,
+ *                        then the current layout is kept;
+ *                        if `layout` is an empty string, the tensor list's layout is cleared
+ * @param samples         the descriptors of the tensors to be attached to the TensorList;
+ *                        the `ndim` and `dtype` of the samples must match and they must match the
+ *                        values of `ndim` and `dtype` parameters.
+ * @param sample_deleters optional deleters, one for each sample
+ *
+ * NOTE: If the sample_deleters specify the same object multiple times, its destructor must
+ *       internally use reference counting to avoid multiple deletion.
+ */
+DALI_API daliResult_t daliTensorListAttachSamples(
+  daliTensorList_h tensor_list,
+  int num_samples,
+  int ndim,
+  daliDataType_t dtype,
+  const char *layout,
+  const daliTensorDesc_t *samples,
+  const daliDeleter_t *sample_deleters);
+
+
+/** Returns the placement of the TensorLists's underlying buffer.
+ *
+ * @param tensor_list   [in]  the TensorList whose buffer placement is queried
+ * @param out_placement [out] a pointer to a place where the return value is stored.
+ */
+DALI_API daliResult_t daliTensorListGetBufferPlacement(
+  daliTensorList_h tensor_list,
+  daliBufferPlacement_t *out_placement);
+
+/** Associates a stream with the TensorList.
+ *
+ * @param stream      an optional CUDA stream handle; if the handle poitner is NULL,
+ *                    host-synchronous behavior is prescribed.
+ * @param synchronize if true, the new stream (or host, if NULL), will be synchronized with the
+ *                    currently associated stream
+ */
+DALI_API daliResult_t daliTensorListSetStream(
+  daliTensorList_h tensor_list,
+  const cudaStream_t *stream,
+  daliBool synchronize
+);
+
+/** Gets the stream associated with the TensorList.
+ *
+ * @retval DALI_SUCCESS if the stream handle was stored in *out_stream
+ * @retval DALI_NO_DATA if the tensor list is not associated with any stream
+ *         error code otherwise
+ */
+DALI_API daliResult_t daliTensorListGetStream(
+  daliTensorList_h tensor_list,
+  cudaStream_t *out_stream
+);
+
+/** Gets the readiness event associated with the TensorList.
+ *
+ * @param tensor_list [in]  the tenosr list whose ready event is to be obtained
+ * @param out_event   [out] the pointer to the return value
+ *
+ * @retval DALI_SUCCESS if the ready event handle was stored in *out_event
+ * @retval DALI_NO_DATA if the tensor list is not associated with a readiness event
+ *         error code otherwise
+ */
+DALI_API daliResult_t daliTensorListGetReadyEvent(
+  daliTensorList_h tensor_list,
+  cudaEvent_t *out_event);
+
+/** Gets the readiness event associated with the TensorList or creates a new one.
+ *
+ * @param tensor_list   [in]  the tensor list to associate an even twith
+ * @param out_event     [out] optional, the event handle
+ *
+ * The function ensures that a readiness event is associated with the tensor list.
+ * It can also get the event handle, if the output parameter pointer is not NULL.
+ * The function fails if the tensor list is not associated with a CUDA device.
+ */
+DALI_API daliResult_t daliTensorListGetOrCreateReadyEvent(
+  daliTensorList_h tensor_list,
+  cudaEvent_t *out_event);
+
+
+/** Gets the shape of the tensor list
+ *
+ * @param tensor_list     [in]  the tensor list whose shape to obtain
+ * @param out_num_samples [out] optional; the number of samples in the batch
+ * @param out_ndim        [out] optional; the number of dimensions in a sample
+ * @param out_shape       [out] optional; the pointer to the concatenated array of sample shapes;
+ *                              contains (*out_num_samples) * (*out_ndim) elements
+ *
+ * The pointer returned in `out_shape` remains valid until the TensorList is destroyed or modified.
+ * If the caller is not intersted in some of the values, the pointers can be NULL.
+ */
+DALI_API daliResult_t daliTensorListGetShape(
+  daliTensorList_h tensor_list,
+  int *out_num_samples,
+  int *out_ndim,
+  const int64_t **out_shape);
+
+/** Gets a layout string describing the samples in the TensorList.
+ *
+ * @param tensor_list [in]  the tensor list whose layout to obtain
+ * @param out_layout  [out] a pointer to the place where a pointer to the the layout string of
+ *                          the samples in the tensor list is stored
+ *
+ * When present, the layout string consists of exactly `sample_ndim` single-character _axis labels_.
+ * The layout does not contain the leading "sample" dimension (typically denoted as `N`),
+ * for example, a batch of images would typically have a "HWC" layout.
+ * The axis labels can be any character except the null character '\0'.
+ * If there's no layout set, the returned pointer is NULL.
+ *
+ * The pointer remains valid until the tensor list is destroyed, cleared, resized or its layout
+ * changed.
+ */
+DALI_API daliResult_t daliTensorListGetLayout(
+  daliTensorList_h tensor_list,
+  const char **out_layout);
+
+/** Sets the layout of the samples in the TensorList.
+ *
+ * Sets the axis labels that describe the layout of the data in the TensorList. The layout must not
+ * contain the leading sample dimension (typically `N`). For example, a batch of images would
+ * typically have a layout "HWC".
+ * If the layout string is NULL or empty, the layout is cleared; otherwise it must contain exactly
+ * sample_ndim nonzero characters. The axis labels don't have to be unique.
+ */
+DALI_API daliResult_t daliTensorListSetLayout(
+  daliTensorList_h tensor_list,
+  const char *layout
+);
+
+/** Gets the "source info" metadata of a sample.
+ *
+ * Each sample can be associated with a "source info" string, which typically is the file name,
+ * but can also contain an index in a container, key, etc.
+ *
+ * @param tensor_list     [in]  The tensor list
+ * @param out_source_info [out] A pointer to a place where the pointer to the source_info string
+ *                              is stored. On success, `*out_source_info` contains a pointer to the
+ *                              beginning of a null-terminated string. If the sample doesn't have
+ *                              associated source info, a NULL pointer is returned.
+ * @param sample_idx      [in]  The index of a sample whose source info is queried.
+ *
+ * The return value is a string pointer. It is invalidated by destroying, clearing or resizing
+ * the TensorList as well as by assigning a new source info.
+ */
+DALI_API daliResult_t daliTensorListGetSourceInfo(
+  daliTensorList_h tensor_list,
+  const char **out_source_info,
+  int sample_idx);
+
+/** Gets the tensor descriptor of the specified sample.
+ *
+ * @param tensor_list [in] The tensor list
+ * @param out_desc    [out] A poitner to a decriptor filled by this funciton.
+ * @param sample_idx  [in] The index of the sample, whose descriptor to get.
+ *
+ * The descriptor stored in `out_desc` contains pointers. These pointers are invalidated by
+ * destroying, clearing or resizing the TensorList or re-attaching new data to it.
+ */
+DALI_API daliResult_t daliTensorListGetTensorDesc(
+  daliTensorList_h tensor_list,
+  daliTensorDesc_t *out_desc,
+  int sample_idx);
+
+/** Increments the reference count of the tensor list.
+ *
+ * @param tensor_list [in]  A handle to the tensor list.
+ * @param new_count   [out] If not NULL, the incremented reference count is returned in *new_count.
+ */
+DALI_API daliResult_t daliTensorListIncRef(daliTensorList_h tensor_list, int *new_count);
+
+/** Decrements the reference count of the tensor list.
+ *
+ * The handle is destroyed if the reference count reaches 0.
+ * When the client code no longer needs the handle, it must call daliTensorDecRef.
+ *
+ *
+ * @param tensor_list [in]  A handle to the tensor list.
+ * @param new_count   [out] If not NULL, the incremented reference count is returned in *new_count.
+ */
+DALI_API daliResult_t daliTensorListDecRef(daliTensorList_h tensor_list, int *new_count);
+
+/** Decrements the reference count of the tensor list */
+DALI_API daliResult_t daliTensorListRefCount(daliTensorList_h tensor_list, int *count);
+
+/** Views a TensorList as a Tensor.
+ *
+ * Creates a new Tensor that points to the same data as the TensorList. The samples in the
+ * TensorList must have a uniform shape and the data in the TensorList must be contiguous.
+ *
+ * The tensor holds a reference to the data in the TensorList - it is safe to destroy the
+ * TensorList and continue using the resulting Tensor.
+ *
+ * @retval DALI_SUCCESS on success
+ * @retval DALI_ERROR_INVALID_OPERATION if the data is not contiguous
+ * @retval DALI_ERROR_INVALID_HANDLE    the tensor list handle is invalid
+ * @return DALI_ERROR_INVALID_ARGUMENT  the tensor handle pointer is NULL
+ * @return DALI_ERROR_OUT_OF_MEMORY
+ */
+DALI_API daliResult_t daliTensorListViewAsTensor(
+  daliTensorList_h tensor_list,
+  daliTensor_h *out_tensor);
+
+/***************************************************************************/
+/*** Tensor ****************************************************************/
+/***************************************************************************/
+
+/** Creates a Tensor on the specified device */
+DALI_API daliResult_t daliTensorCreate(
+  daliTensor_h *out,
+  daliBufferPlacement_t placement);
+
+/** Changes the size of the tensor, allocating more data if necessary.
+ *
+ * @param num_samples   the number of samples in the batch
+ * @param ndim          the number of dimensions of a sample
+ * @param shape         the shape of the tensor; can be NULL if ndim == 0
+ * @param dtype         the element type
+ * @param layout        a layout string describing the order of axes in the tensor (e.g. HWC),
+ *                      if NULL, and the Tensor's number of dimensions is equal to `ndim,
+ *                      then the current layout is kept;
+ *                      if `layout` is an empty string, the tensor's layout is cleared
+ */
+DALI_API daliResult_t daliTensorResize(
+  daliTensor_h tensor,
+  int ndim,
+  const int64_t *shape,
+  daliDataType_t dtype,
+  const char *layout);
+
+/** Attaches an externally allocated buffer to a Tensor.
+ *
+ * Attaches an externally allocated buffer and a deleter to a Tensor.
+ * The deleter is called when the Tensor object is destroyed.
+ *
+ * The shape and layout are used only during this function call and may be safely
+ * disposed of after the function returns.
+ *
+ * @param tensor          the Tensor to attach the data to
+ * @param ndim            the number of dimensions in the sample
+ * @param dtype           the element type
+ * @param shape           the shape of the tensor; ndim extents; can be NULL if ndim == 0
+ * @param layout          a layout string describing the order of axes in the tensor (e.g. HWC),
+ *                        if NULL, and the Tensor's number of dimensions is equal to `ndim,
+ *                        then the current layout is kept;
+ *                        if `layout` is an empty string, the tensor's layout is cleared
+ * @param data            the pointer to the data buffer
+ * @param deleter         the deleter to be called when the tensor is destroyed
+ */
+DALI_API daliResult_t daliTensorAttachBuffer(
+  daliTensor_h tensor,
+  int ndim,
+  const int64_t *shape,
+  daliDataType_t dtype,
+  const char *layout,
+  void *data,
+  daliDeleter_t deleter);
+
+/** Returns the placement of the Tensor's underlying buffer.
+ *
+ * @param tensor        [in]  the Tensor whose buffer placement is queried
+ * @param out_placement [out] a pointer to a place where the return value is stored.
+ */
+DALI_API daliResult_t daliTensorGetBufferPlacement(
+  daliTensor_h tensor,
+  daliBufferPlacement_t *out_placement);
+
+/** Associates a stream with the Tensor.
+ *
+ * @param stream      an optional CUDA stream handle; if the handle poitner is NULL,
+ *                    host-synchronous behavior is prescribed.
+ * @param synchronize if true, the new stream (or host, if NULL), will be synchronized with the
+ *                    currently associated stream
+ */
+DALI_API daliResult_t daliTensorSetStream(
+  daliTensor_h tensor,
+  const cudaStream_t *stream,
+  daliBool synchronize
+);
+
+/** Gets the stream associated with the Tensor.
+ *
+ * @retval DALI_SUCCESS if the stream handle was stored in *out_stream
+ * @retval DALI_NO_DATA if the tensor is not associated with any stream
+ *         error code otherwise
+ */
+DALI_API daliResult_t daliTensorGetStream(
+  daliTensor_h tensor,
+  cudaStream_t *out_stream
+);
+
+/** Gets the readiness event associated with the Tensor.
+ *
+ * @param tensor      [in]  the tenosr list whose ready event is to be obtained
+ * @param out_event   [out] the pointer to the return value
+ *
+ * @retval DALI_SUCCESS if the ready event handle was stored in *out_event
+ * @retval DALI_NO_DATA if the tensor is not associated with a readiness event
+ *         error code otherwise
+ */
+DALI_API daliResult_t daliTensorGetReadyEvent(
+  daliTensor_h tensor,
+  cudaEvent_t *out_event);
+
+/** Gets the readiness event associated with the Tensor or creates a new one.
+ *
+ * @param tensor   [in]  the tensor to associate an even twith
+ * @param out_event     [out] optional, the event handle
+ *
+ * The function ensures that a readiness event is associated with the tensor.
+ * It can also get the event handle, if the output parameter pointer is not NULL.
+ * The function fails if the tensor is not associated with a CUDA device.
+ */
+DALI_API daliResult_t daliTensorGetOrCreateReadyEvent(
+  daliTensor_h tensor,
+  cudaEvent_t *out_event);
+
+
+/** Gets the shape of the tensor
+ *
+ * @param tensor          [in]  the tensor whose shape to obtain
+ * @param out_ndim        [out] optional; receives the number of dimensions
+ * @param out_shape       [out] optional; receives the the pointer to the shape (array of extents)
+ *
+ * The pointer returned in `out_shape` remains valid until the Tensor is destroyed or modified.
+ * If the caller is not intersted in some of the values, the pointers can be NULL.
+ */
+DALI_API daliResult_t daliTensorGetShape(
+  daliTensor_h tensor,
+  int *out_ndim,
+  const int64_t **out_shape);
+
+/** Gets a layout string describing the data in the Tensor.
+ *
+ * @param tensor      [in]  the tensor whose layout to obtain
+ * @param out_layout  [out] a pointer to the place where a pointer to the the layout string of
+ *                          the samples in the tensor is stored
+ *
+ * When present, the layout string consists of exactly `ndim` single-character _axis labels_.
+ * for example, an image would typically have a "HWC" layout.
+ * The axis labels can be any character except the null character '\0'.
+ * If there's no layout set, the returned pointer is NULL.
+ *
+ * The pointer remains valid until the tensor is destroyed, cleared, resized or its layout
+ * changed.
+ */
+DALI_API daliResult_t daliTensorGetLayout(
+  daliTensor_h tensor,
+  const char **out_layout);
+
+/** Sets the layout of the data in the Tensor.
+ *
+ * Sets the axis labels that describe the layout of the data in the Tensor.
+ * If the layout string is NULL or empty, the layout is cleared; otherwise it must contain exactly
+ * sample_ndim nonzero characters. The axis labels don't have to be unique.
+ */
+DALI_API daliResult_t daliTensorSetLayout(
+  daliTensor_h tensor,
+  const char *layout
+);
+
+/** Gets the "source info" metadata of a tensor.
+ *
+ * A tensor can be associated with a "source info" string, which typically is the file name,
+ * but can also contain an index in a container, key, etc.
+ *
+ * @param tensor          [in]  The tensor
+ * @param out_source_info [out] A pointer to a place where the pointer to the source_info string
+ *                              is stored. On success, `*out_source_info` contains a pointer to the
+ *                              beginning of a null-terminated string. If the sample doesn't have
+ *                              associated source info, a NULL pointer is returned.
+ *
+ * The return value is a string pointer. It is invalidated by destroying, clearing or resizing
+ * the Tensor as well as by assigning a new source info.
+ */
+DALI_API daliResult_t daliTensorGetSourceInfo(
+  daliTensor_h tensor,
+  const char **out_source_info);
+
+/** Gets the descriptor of the data in the tensor.
+ *
+ * @param tensor      [in] The tensor
+ * @param out_desc    [out] A poitner to a decriptor filled by this funciton.
+ *
+ * The descriptor stored in `out_desc` contains pointers. These pointers are invalidated by
+ * destroying, clearing or resizing the Tensor or re-attaching new data to it.
+ */
+DALI_API daliResult_t daliTensorGetDesc(
+  daliTensor_h tensor,
+  daliTensorDesc_t *out_desc);
+
+/** Increments the reference count of the tensor.
+ *
+ * @param tensor [in]  A handle to the tensor.
+ * @param new_count   [out] If not NULL, the incremented reference count is returned in *new_count.
+ */
+DALI_API daliResult_t daliTensorIncRef(daliTensor_h tensor, int *new_count);
+
+/** Decrements the reference count of the tensor.
+ *
+ * The handle is destroyed if the reference count reaches 0.
+ * When the client code no longer needs the handle, it must call daliTensorDecRef.
+ *
+ *
+ * @param tensor [in]  A handle to the tensor.
+ * @param new_count   [out] If not NULL, the incremented reference count is returned in *new_count.
+ */
+DALI_API daliResult_t daliTensorDecRef(daliTensor_h tensor, int *new_count);
+
+/** Decrements the reference count of the tensor */
+DALI_API daliResult_t daliTensorRefCount(daliTensor_h tensor, int *count);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // DALI_DALI_H_

From be303856b25911f8a79e5df97fe85c5305d907d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Zientkiewicz?= <mzient@gmail.com>
Date: Sat, 1 Feb 2025 16:48:31 +0100
Subject: [PATCH 2/5] Fix documentation for XxxRefCount.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Michał Zientkiewicz <mzient@gmail.com>
---
 include/dali/dali.h | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/include/dali/dali.h b/include/dali/dali.h
index 5b174998429..1d36adbfe0c 100644
--- a/include/dali/dali.h
+++ b/include/dali/dali.h
@@ -826,7 +826,11 @@ DALI_API daliResult_t daliTensorListIncRef(daliTensorList_h tensor_list, int *ne
  */
 DALI_API daliResult_t daliTensorListDecRef(daliTensorList_h tensor_list, int *new_count);
 
-/** Decrements the reference count of the tensor list */
+/** Reads the current reference count of the tensor list.
+ *
+ * @param tensor_list [in]  A handle to the tensor list.
+ * @param count       [out] The ouput parameter that receives the reference count.
+ */
 DALI_API daliResult_t daliTensorListRefCount(daliTensorList_h tensor_list, int *count);
 
 /** Views a TensorList as a Tensor.
@@ -1037,7 +1041,7 @@ DALI_API daliResult_t daliTensorGetDesc(
 
 /** Increments the reference count of the tensor.
  *
- * @param tensor [in]  A handle to the tensor.
+ * @param tensor      [in]  A handle to the tensor.
  * @param new_count   [out] If not NULL, the incremented reference count is returned in *new_count.
  */
 DALI_API daliResult_t daliTensorIncRef(daliTensor_h tensor, int *new_count);
@@ -1048,12 +1052,16 @@ DALI_API daliResult_t daliTensorIncRef(daliTensor_h tensor, int *new_count);
  * When the client code no longer needs the handle, it must call daliTensorDecRef.
  *
  *
- * @param tensor [in]  A handle to the tensor.
+ * @param tensor      [in]  A handle to the tensor.
  * @param new_count   [out] If not NULL, the incremented reference count is returned in *new_count.
  */
 DALI_API daliResult_t daliTensorDecRef(daliTensor_h tensor, int *new_count);
 
-/** Decrements the reference count of the tensor */
+/** Reads the current reference count of the tensor.
+ *
+ * @param tensor      [in]  A handle to the tensor.
+ * @param count       [out] The ouput parameter that receives the reference count.
+ */
 DALI_API daliResult_t daliTensorRefCount(daliTensor_h tensor, int *count);
 
 #ifdef __cplusplus

From d159abfadbdeedfcd59a083ace8f37ae7ebf3a2d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Zientkiewicz?= <mzient@gmail.com>
Date: Fri, 31 Jan 2025 17:42:08 +0100
Subject: [PATCH 3/5] C API error handling.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Michał Zientkiewicz <mzient@gmail.com>
---
 dali/CMakeLists.txt                 |   3 +-
 dali/c_api_2/CMakeLists.txt         |  21 ++++
 dali/c_api_2/c_api_internal_test.cc |  75 ++++++++++++
 dali/c_api_2/error_handling.cc      | 182 ++++++++++++++++++++++++++++
 dali/c_api_2/error_handling.h       |  68 +++++++++++
 dali/c_api_2/init.cc                |  69 +++++++++++
 6 files changed, 417 insertions(+), 1 deletion(-)
 create mode 100644 dali/c_api_2/CMakeLists.txt
 create mode 100644 dali/c_api_2/c_api_internal_test.cc
 create mode 100644 dali/c_api_2/error_handling.cc
 create mode 100644 dali/c_api_2/error_handling.h
 create mode 100644 dali/c_api_2/init.cc

diff --git a/dali/CMakeLists.txt b/dali/CMakeLists.txt
index 4b4a3fd547d..e126c1d278e 100644
--- a/dali/CMakeLists.txt
+++ b/dali/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (c) 2017-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2017-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -46,6 +46,7 @@ if (BUILD_DALI_PIPELINE)
   add_subdirectory(util)
   add_subdirectory(plugin)
   add_subdirectory(c_api)
+  add_subdirectory(c_api_2)
 endif()
 
 if(BUILD_DALI_OPERATORS)
diff --git a/dali/c_api_2/CMakeLists.txt b/dali/c_api_2/CMakeLists.txt
new file mode 100644
index 00000000000..77dd70d568a
--- /dev/null
+++ b/dali/c_api_2/CMakeLists.txt
@@ -0,0 +1,21 @@
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Get all the source files
+
+# The headers here are private and should not be installed.
+# collect_headers(DALI_INST_HDRS PARENT_SCOPE)
+
+collect_sources(DALI_SRCS PARENT_SCOPE)
+collect_test_sources(DALI_TEST_SRCS PARENT_SCOPE)
diff --git a/dali/c_api_2/c_api_internal_test.cc b/dali/c_api_2/c_api_internal_test.cc
new file mode 100644
index 00000000000..1752c58d3af
--- /dev/null
+++ b/dali/c_api_2/c_api_internal_test.cc
@@ -0,0 +1,75 @@
+// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <gtest/gtest.h>
+#include <stdexcept>
+#define DALI_ALLOW_NEW_C_API
+#include "dali/dali.h"
+#include "dali/c_api_2/error_handling.h"
+#include "dali/core/cuda_error.h"
+
+namespace dali {
+
+template <typename ExceptionType>
+daliResult_t ThrowAndTranslate(ExceptionType &&ex) {
+  DALI_PROLOG();
+  throw std::forward<ExceptionType>(ex);
+  DALI_EPILOG();
+}
+
+template <typename ExceptionType>
+void CheckException(ExceptionType &&ex, daliResult_t expected_result) {
+  std::string message(ex.what());
+  daliResult_t ret = ThrowAndTranslate(std::forward<ExceptionType>(ex));
+  EXPECT_EQ(ret, expected_result);
+  EXPECT_EQ(daliGetLastError(), expected_result);
+  EXPECT_EQ(daliGetLastErrorMessage(), message);
+  std::cout << daliGetErrorName(ret) << " "
+            << daliGetLastErrorMessage() << std::endl;
+  daliClearLastError();
+  EXPECT_EQ(daliGetLastError(), DALI_SUCCESS);
+  EXPECT_STREQ(daliGetLastErrorMessage(), "");
+}
+
+TEST(CAPI2InternalTest, ErrorTranslation) {
+  CheckException(std::runtime_error("Runtime Error"), DALI_ERROR_INVALID_OPERATION);
+  CheckException(std::bad_alloc(), DALI_ERROR_OUT_OF_MEMORY);
+  CheckException(CUDABadAlloc(), DALI_ERROR_OUT_OF_MEMORY);
+  CheckException(std::logic_error("Logic dictates that it's an error"), DALI_ERROR_INTERNAL);
+  CheckException(std::out_of_range("Bullet left the shooting range"), DALI_ERROR_OUT_OF_RANGE);
+  CheckException(invalid_key("This key doesn't fit into the keyhole."), DALI_ERROR_INVALID_KEY);
+
+  CheckException(std::system_error(std::make_error_code(std::errc::no_such_file_or_directory)),
+                 DALI_ERROR_PATH_NOT_FOUND);
+  CheckException(std::system_error(std::make_error_code(std::errc::no_such_device_or_address)),
+                 DALI_ERROR_PATH_NOT_FOUND);
+
+  CheckException(std::system_error(std::make_error_code(std::errc::no_space_on_device)),
+                  DALI_ERROR_IO_ERROR);
+  CheckException(std::system_error(
+                      std::make_error_code(std::errc::inappropriate_io_control_operation)),
+                 DALI_ERROR_IO_ERROR);
+  CheckException(std::system_error(std::make_error_code(std::io_errc::stream)),
+                 DALI_ERROR_IO_ERROR);
+
+  CheckException(std::system_error(std::make_error_code(std::errc::not_enough_memory)),
+                 DALI_ERROR_OUT_OF_MEMORY);
+
+  CheckException(std::system_error(std::make_error_code(std::errc::bad_file_descriptor)),
+                 DALI_ERROR_SYSTEM);
+  CheckException(std::system_error(std::make_error_code(std::errc::too_many_files_open)),
+                 DALI_ERROR_SYSTEM);
+}
+
+}  // namespace dali
diff --git a/dali/c_api_2/error_handling.cc b/dali/c_api_2/error_handling.cc
new file mode 100644
index 00000000000..79296eb7bd1
--- /dev/null
+++ b/dali/c_api_2/error_handling.cc
@@ -0,0 +1,182 @@
+// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdexcept>
+#include <string>
+#include <utility>
+#include "dali/c_api_2/error_handling.h"
+#include "dali/core/error_handling.h"
+#include "dali/core/cuda_error.h"
+
+namespace dali::c_api {
+
+struct ErrorInfo {
+  inline ErrorInfo() {
+    message.reserve(1024);  // should help with properly reporting OOMs
+  }
+  daliResult_t  result = DALI_SUCCESS;
+  std::string   message;
+};
+
+thread_local ErrorInfo g_daliLastError;
+
+struct ErrorDesc {
+  const char *name, *description;
+};
+
+ErrorDesc GetErrorDesc(daliResult_t result) {
+  #define RESULT_DESC(name, desc) case DALI_##name: return { "DALI_" #name, desc }
+  #define ERROR_DESC(name, desc) RESULT_DESC(ERROR_##name, desc)
+
+  switch (result) {
+    RESULT_DESC(SUCCESS, "The operation was successful.");
+    RESULT_DESC(NO_DATA, "The operation was successful, but didn't return any data.");
+    RESULT_DESC(NOT_READY, "The query succeeded, but the operation queried is still pending.");
+    ERROR_DESC(INVALID_HANDLE, "The operation received an invalid DALI handle.");
+    ERROR_DESC(INVALID_ARGUMENT, "An invalid argument was specified.");
+    ERROR_DESC(INVALID_TYPE, "An argument of invalid type encountered.");
+    ERROR_DESC(INVALID_OPERATION, "An invalid operation was requested.");
+    ERROR_DESC(OUT_OF_RANGE, "An argument is out of valid range.");
+    ERROR_DESC(INVALID_KEY, "The operation received an invalid dictionary key.");
+
+    ERROR_DESC(SYSTEM, "An operating system routine failed.");
+    ERROR_DESC(PATH_NOT_FOUND, "A non-existent or non-accessible file path was encountered.");
+    ERROR_DESC(IO_ERROR, "An I/O operation failed");
+    ERROR_DESC(OUT_OF_MEMORY, "Cannot allocate memory");
+    ERROR_DESC(INTERNAL, "An internal error occurred");
+    ERROR_DESC(UNLOADING, "DALI is unloading - either daliShutdown was called or "
+                          "the process is shutting down.");
+    ERROR_DESC(CUDA_ERROR, "A CUDA call has failed.");
+    default:
+      return { "<invalid>", "<invalid>" };
+  }
+}
+
+daliResult_t SetLastError(daliResult_t result, const char *message) {
+  g_daliLastError.result  = result;
+  g_daliLastError.message = message;
+  return result;
+}
+
+daliResult_t HandleError(std::exception_ptr ex) {
+  try {
+    std::rethrow_exception(std::move(ex));
+  } catch (dali::c_api::InvalidHandle &e) {
+    return SetLastError(DALI_ERROR_INVALID_HANDLE, e.what());
+  } catch (std::invalid_argument &e) {
+    return SetLastError(DALI_ERROR_INVALID_ARGUMENT, e.what());
+  } catch (dali::CUDAError &e) {
+    if (e.is_rt_api()) {
+      if (e.rt_error() == cudaErrorNotReady)
+        return SetLastError(DALI_NOT_READY, e.what());
+    } else if (e.is_drv_api()) {
+      if (e.drv_error() == CUDA_ERROR_NOT_READY)
+        return SetLastError(DALI_NOT_READY, e.what());
+    }
+    return SetLastError(DALI_ERROR_CUDA_ERROR, e.what());
+  } catch (dali::CUDABadAlloc &e) {
+    return SetLastError(DALI_ERROR_OUT_OF_MEMORY, e.what());
+  } catch (std::bad_alloc &e) {
+    return SetLastError(DALI_ERROR_OUT_OF_MEMORY, e.what());
+  } catch (dali::invalid_key &e) {
+    return SetLastError(DALI_ERROR_INVALID_KEY, e.what());
+  } catch (std::out_of_range &e) {
+    return SetLastError(DALI_ERROR_OUT_OF_RANGE, e.what());
+  } catch (std::system_error &e) {
+    if (e.code().category() == std::generic_category()) {
+      daliResult_t result = [&]() {
+        switch (static_cast<std::errc>(e.code().value())) {
+        case std::errc::no_such_file_or_directory:
+        case std::errc::no_such_device:
+        case std::errc::no_such_device_or_address:
+          return DALI_ERROR_PATH_NOT_FOUND;
+        case std::errc::not_enough_memory:
+          return DALI_ERROR_OUT_OF_MEMORY;
+        case std::errc::timed_out:
+          return DALI_ERROR_TIMEOUT;
+        case std::errc::address_family_not_supported:
+        case std::errc::address_in_use:
+        case std::errc::address_not_available:
+        case std::errc::already_connected:
+        case std::errc::broken_pipe:
+        case std::errc::connection_aborted:
+        case std::errc::connection_already_in_progress:
+        case std::errc::connection_refused:
+        case std::errc::connection_reset:
+        case std::errc::device_or_resource_busy:
+        case std::errc::directory_not_empty:
+        case std::errc::file_exists:
+        case std::errc::file_too_large:
+        case std::errc::filename_too_long:
+        case std::errc::host_unreachable:
+        case std::errc::inappropriate_io_control_operation:
+        case std::errc::io_error:
+        case std::errc::is_a_directory:
+        case std::errc::message_size:
+        case std::errc::network_down:
+        case std::errc::network_reset:
+        case std::errc::network_unreachable:
+        case std::errc::no_buffer_space:
+        case std::errc::no_message:
+        case std::errc::no_space_on_device:
+        case std::errc::not_a_directory:
+        case std::errc::not_a_socket:
+        case std::errc::read_only_file_system:
+          return DALI_ERROR_IO_ERROR;
+        default:
+          return DALI_ERROR_SYSTEM;
+        }
+      }();
+      return SetLastError(result, e.what());
+    } else if (e.code().category() == std::iostream_category()) {
+      return SetLastError(DALI_ERROR_IO_ERROR, e.what());
+    } else {
+      return SetLastError(DALI_ERROR_SYSTEM, e.what());
+    }
+  } catch (std::runtime_error &e) {
+    return SetLastError(DALI_ERROR_INVALID_OPERATION, e.what());
+  } catch (std::exception &e) {
+    return SetLastError(DALI_ERROR_INTERNAL, e.what());
+  } catch (const char *e) {  // handle strings thrown as exceptions
+    return SetLastError(DALI_ERROR_INTERNAL, e);
+  } catch (const std::string &e) {  // handle strings thrown as exceptions
+    return SetLastError(DALI_ERROR_INTERNAL, e.c_str());
+  } catch (...) {
+    return SetLastError(DALI_ERROR_INTERNAL, "<unknown error>");
+  }
+}
+
+}  // namespace dali::c_api
+
+using namespace dali;  // NOLINT
+
+daliResult_t daliGetLastError() {
+  return c_api::g_daliLastError.result;
+}
+
+const char *daliGetLastErrorMessage() {
+  return c_api::g_daliLastError.message.c_str();
+}
+
+void daliClearLastError() {
+  c_api::g_daliLastError = {};
+}
+
+const char *daliGetErrorName(daliResult_t result) {
+  return c_api::GetErrorDesc(result).name;
+}
+
+const char *daliGetErrorDescription(daliResult_t result) {
+  return c_api::GetErrorDesc(result).description;
+}
diff --git a/dali/c_api_2/error_handling.h b/dali/c_api_2/error_handling.h
new file mode 100644
index 00000000000..e9cdcaf990d
--- /dev/null
+++ b/dali/c_api_2/error_handling.h
@@ -0,0 +1,68 @@
+// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef DALI_C_API_2_ERROR_HANDLING_H_
+#define DALI_C_API_2_ERROR_HANDLING_H_
+
+#include <stdexcept>
+#include <iostream>
+#include <string>
+#include <sstream>
+#define DALI_ALLOW_NEW_C_API
+#include "dali/dali.h"
+#include "dali/core/error_handling.h"
+
+inline std::ostream &operator<<(std::ostream &os, daliResult_t result) {
+  const char *e = daliGetErrorName(result);
+  if (e[0] == '<')
+    os << "<unknown: " << static_cast<int>(result) << ">";
+  else
+    os << e;
+  return os;
+}
+
+inline std::string to_string(daliResult_t result) {
+  std::stringstream ss;
+  ss << result;
+  return ss.str();
+}
+
+namespace dali {
+namespace c_api {
+
+DLL_PUBLIC daliResult_t HandleError(std::exception_ptr ex);
+DLL_PUBLIC daliResult_t CheckInit();
+
+class InvalidHandle : public std::invalid_argument {
+ public:
+  InvalidHandle() : std::invalid_argument("The handle is invalid") {}
+  explicit InvalidHandle(const std::string &what) : std::invalid_argument(what) {}
+  explicit InvalidHandle(const char *what) : std::invalid_argument(what) {}
+};
+
+inline InvalidHandle NullHandle() { return InvalidHandle("The handle must not be NULL."); }
+
+inline InvalidHandle NullHandle(const char *what_handle) {
+  return InvalidHandle(make_string("The ", what_handle, " handle must not be NULL."));
+}
+
+}  // namespace c_api
+}  // namespace dali
+
+#define DALI_PROLOG() try { if (auto err = dali::c_api::CheckInit()) return err; else;
+#define DALI_EPILOG() return DALI_SUCCESS; } catch (...) {     \
+  return ::dali::c_api::HandleError(std::current_exception()); \
+}
+
+#endif  // DALI_C_API_2_ERROR_HANDLING_H_
diff --git a/dali/c_api_2/init.cc b/dali/c_api_2/init.cc
new file mode 100644
index 00000000000..33fbe4695d2
--- /dev/null
+++ b/dali/c_api_2/init.cc
@@ -0,0 +1,69 @@
+// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <atomic>
+#define DALI_ALLOW_NEW_C_API
+#include "dali/dali.h"
+#include "dali/c_api_2/error_handling.h"
+#include "dali/pipeline/init.h"
+#include "dali/pipeline/operator/op_spec.h"
+
+using namespace dali;  // NOLINT
+
+namespace {
+std::atomic<int> g_init_count;
+std::atomic<bool> g_was_initialized;
+}  // namespace
+
+namespace dali::c_api {
+  daliResult_t CheckInit() {
+    if (g_init_count <= 0) {
+      if (g_was_initialized)
+        return DALI_ERROR_UNLOADING;
+      else
+        return daliInit();
+    }
+    return DALI_SUCCESS;
+  }
+}  // namespace dali::c_api
+
+daliResult_t daliInit() {
+  try {  // cannot use DALI_PROLOG in this function, since DALI isn't initialized yet
+    static int init = []() {
+      DALIInit(OpSpec("CPUAllocator"),
+               OpSpec("PinnedCPUAllocator"),
+               OpSpec("GPUAllocator"));
+      return 0;
+    }();
+    (void)init;
+    g_init_count++;
+    g_was_initialized = true;
+    return DALI_SUCCESS;
+  } catch (...) {
+    return ::dali::c_api::HandleError(std::current_exception()); \
+  }
+}
+
+daliResult_t daliShutdown() {
+  DALI_PROLOG();
+  int init_count = --g_init_count;
+  if (init_count < 0) {
+    ++g_init_count;
+    return DALI_ERROR_UNLOADING;
+  }
+  if (init_count == 0) {
+    // actual shutdown code goes here
+  }
+  DALI_EPILOG();
+}

From 2ac315024b90bbdcc14424a6ec3cb9a36f58ff7a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Zientkiewicz?= <mzient@gmail.com>
Date: Fri, 31 Jan 2025 17:43:03 +0100
Subject: [PATCH 4/5] C API utilities.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Michał Zientkiewicz <mzient@gmail.com>
---
 dali/c_api_2/managed_handle.h | 124 ++++++++++++++++++++++++++++++++++
 dali/c_api_2/ref_counting.h   | 119 ++++++++++++++++++++++++++++++++
 dali/c_api_2/validation.cc    |  41 +++++++++++
 dali/c_api_2/validation.h     | 112 ++++++++++++++++++++++++++++++
 4 files changed, 396 insertions(+)
 create mode 100644 dali/c_api_2/managed_handle.h
 create mode 100644 dali/c_api_2/ref_counting.h
 create mode 100644 dali/c_api_2/validation.cc
 create mode 100644 dali/c_api_2/validation.h

diff --git a/dali/c_api_2/managed_handle.h b/dali/c_api_2/managed_handle.h
new file mode 100644
index 00000000000..ce365072d7e
--- /dev/null
+++ b/dali/c_api_2/managed_handle.h
@@ -0,0 +1,124 @@
+    // Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef DALI_C_API_2_MANAGED_HANDLE_H_
+#define DALI_C_API_2_MANAGED_HANDLE_H_
+
+#define DALI_ALLOW_NEW_C_API
+#include "dali/dali.h"
+#include "dali/core/unique_handle.h"
+
+namespace dali::c_api {
+
+template <typename HandleType, typename Actual>
+class RefCountedHandle {
+ public:
+  using handle_type = HandleType;
+  static constexpr handle_type null_handle() { return 0; }
+
+  constexpr RefCountedHandle() : handle_(Actual::null_handle()) {}
+  constexpr explicit RefCountedHandle(handle_type h) : handle_(h) {}
+  ~RefCountedHandle() { reset(); }
+
+  RefCountedHandle(const RefCountedHandle &h) {
+    handle_ = h.handle_;
+    if (*this)
+        Actual::IncRef(handle_);
+  }
+
+  RefCountedHandle(RefCountedHandle &&h) noexcept {
+    handle_ = h.handle_;
+    h.handle_ = Actual::null_handle();
+  }
+
+  RefCountedHandle &operator=(const RefCountedHandle &other) {
+    if (other.handle_) {
+        Actual::IncRef(other.handle_);
+    }
+    reset();
+    handle_ = other.handle_;
+    return *this;
+  }
+
+  RefCountedHandle &operator=(RefCountedHandle &&other) noexcept {
+    std::swap(handle_, other.handle_);
+    other.reset();
+    return *this;
+  }
+
+  void reset() noexcept {
+    if (*this)
+        Actual::DecRef(handle_);
+    handle_ = Actual::null_handle();
+  }
+
+  [[nodiscard]] handle_type release() noexcept {
+    auto h = handle_;
+    handle_ = Actual::null_handle();
+    return h;
+  }
+
+  handle_type get() const noexcept { return handle_; }
+  operator handle_type() const noexcept { return get(); }
+
+  explicit operator bool() const noexcept { return handle_ != Actual::null_handle(); }
+
+ private:
+  handle_type handle_;
+};
+
+#define DALI_C_UNIQUE_HANDLE(Resource) \
+class Resource##Handle : public dali::UniqueHandle<dali##Resource##_h, Resource##Handle> { \
+ public: \
+  using UniqueHandle<dali##Resource##_h, Resource##Handle>::UniqueHandle; \
+  static void DestroyHandle(dali##Resource##_h h) { \
+    auto result = dali##Resource##Destroy(h); \
+    if (result != DALI_SUCCESS) { \
+        throw std::runtime_error(daliGetLastErrorMessage()); \
+    } \
+  } \
+}
+
+#define DALI_C_REF_HANDLE(Resource) \
+class Resource##Handle \
+: public dali::c_api::RefCountedHandle<dali##Resource##_h, Resource##Handle> { \
+ public: \
+  using RefCountedHandle<dali##Resource##_h, Resource##Handle>::RefCountedHandle; \
+  static int IncRef(dali##Resource##_h h) { \
+    int ref = 0; \
+    auto result = dali##Resource##IncRef(h, &ref); \
+    if (result != DALI_SUCCESS) { \
+        throw std::runtime_error(daliGetLastErrorMessage()); \
+    } \
+    return ref; \
+  } \
+  static int DecRef(dali##Resource##_h h) { \
+    int ref = 0; \
+    auto result = dali##Resource##DecRef(h, &ref); \
+    if (result != DALI_SUCCESS) { \
+        throw std::runtime_error(daliGetLastErrorMessage()); \
+    } \
+    return ref; \
+  } \
+}
+
+DALI_C_UNIQUE_HANDLE(Pipeline);
+DALI_C_UNIQUE_HANDLE(PipelineOutputs);
+DALI_C_REF_HANDLE(TensorList);
+DALI_C_REF_HANDLE(Tensor);
+
+
+}  // namespace dali::c_api
+
+#endif  // DALI_C_API_2_MANAGED_HANDLE_H_
diff --git a/dali/c_api_2/ref_counting.h b/dali/c_api_2/ref_counting.h
new file mode 100644
index 00000000000..2db76671d14
--- /dev/null
+++ b/dali/c_api_2/ref_counting.h
@@ -0,0 +1,119 @@
+// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef DALI_C_API_2_REF_COUNTING_H_
+#define DALI_C_API_2_REF_COUNTING_H_
+
+#include <atomic>
+#include <type_traits>
+#include <utility>
+
+namespace dali::c_api {
+
+class RefCountedObject {
+ public:
+  int IncRef() noexcept {
+    return std::atomic_fetch_add_explicit(&ref_, 1, std::memory_order_relaxed) + 1;
+  }
+
+  int DecRef() noexcept {
+    int ret = std::atomic_fetch_sub_explicit(&ref_, 1, std::memory_order_acq_rel) - 1;
+    if (!ret)
+        delete this;
+    return ret;
+  }
+
+  int RefCount() const noexcept {
+    return ref_.load(std::memory_order_relaxed);
+  }
+
+  virtual ~RefCountedObject() = default;
+ private:
+  std::atomic<int> ref_{1};
+};
+
+template <typename T>
+class RefCountedPtr {
+ public:
+  constexpr RefCountedPtr() noexcept = default;
+
+  explicit RefCountedPtr(T *ptr, bool inc_ref = false) noexcept : ptr_(ptr) {
+    if (inc_ref && ptr_)
+      ptr_->IncRef();
+  }
+
+  ~RefCountedPtr() {
+    reset();
+  }
+
+  template <typename U, std::enable_if_t<std::is_convertible_v<U *, T *>, int> = 0>
+  RefCountedPtr(const RefCountedPtr<U> &other) noexcept : ptr_(other.ptr_) {
+    if (ptr_)
+      ptr_->IncRef();
+  }
+
+  template <typename U, std::enable_if_t<std::is_convertible_v<U *, T *>, int> = 0>
+  RefCountedPtr(RefCountedPtr<U> &&other) noexcept : ptr_(other.ptr_) {
+    other.ptr_ = nullptr;
+  }
+
+  template <typename U>
+  std::enable_if_t<std::is_convertible_v<U *, T *>, RefCountedPtr> &
+  operator=(const RefCountedPtr<U> &other) noexcept {
+    if (ptr_ == other.ptr_)
+      return *this;
+    if (other.ptr_)
+      other.ptr_->IncRef();
+    ptr_->DecRef();
+    ptr_ = other.ptr_;
+    return *this;
+  }
+
+  template <typename U>
+  std::enable_if_t<std::is_convertible_v<U *, T *>, RefCountedPtr> &
+  operator=(RefCountedPtr &&other) noexcept {
+    if (&other == this)
+      return *this;
+    std::swap(ptr_, other.ptr_);
+    other.reset();
+  }
+
+  void reset() noexcept {
+    if (ptr_)
+      ptr_->DecRef();
+    ptr_ = nullptr;
+  }
+
+  [[nodiscard]] T *release() noexcept {
+    T *p = ptr_;
+    ptr_ = nullptr;
+    return p;
+  }
+
+  constexpr T *operator->() const & noexcept { return ptr_; }
+
+  constexpr T &operator*() const & noexcept { return *ptr_; }
+
+  constexpr T *get() const & noexcept { return ptr_; }
+
+ private:
+  template <typename U>
+  friend class RefCountedPtr;
+  T *ptr_ = nullptr;
+};
+
+}  // namespace dali::c_api
+
+#endif  // DALI_C_API_2_REF_COUNTING_H_
+
diff --git a/dali/c_api_2/validation.cc b/dali/c_api_2/validation.cc
new file mode 100644
index 00000000000..7513ae68a44
--- /dev/null
+++ b/dali/c_api_2/validation.cc
@@ -0,0 +1,41 @@
+// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cuda_runtime_api.h>
+#include <stdexcept>
+#include "dali/c_api_2/validation.h"
+
+namespace dali::c_api {
+
+void ValidateDeviceId(int device_id, bool allow_cpu_only) {
+  if (device_id == CPU_ONLY_DEVICE_ID && allow_cpu_only)
+    return;
+
+  static int dev_count = []() {
+    int ndevs = 0;
+    CUDA_CALL(cudaGetDeviceCount(&ndevs));
+    return ndevs;
+  }();
+
+  if (dev_count < 1)
+    throw std::runtime_error("No CUDA device found.");
+
+  if (device_id < 0 || device_id >= dev_count) {
+    throw std::out_of_range(make_string(
+        "The device id ", device_id, " is invalid."
+        " Valid device ids are [0..", dev_count-1, "]."));
+  }
+}
+
+}  // namespace dali::c_api
diff --git a/dali/c_api_2/validation.h b/dali/c_api_2/validation.h
new file mode 100644
index 00000000000..62c9f3425aa
--- /dev/null
+++ b/dali/c_api_2/validation.h
@@ -0,0 +1,112 @@
+// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef DALI_C_API_2_VALIDATION_H_
+#define DALI_C_API_2_VALIDATION_H_
+
+#include <stdexcept>
+#include <optional>
+#define DALI_ALLOW_NEW_C_API
+#include "dali/dali.h"
+#include "dali/core/format.h"
+#include "dali/core/span.h"
+#include "dali/core/tensor_shape_print.h"
+#include "dali/pipeline/data/types.h"
+
+namespace dali::c_api {
+
+inline void Validate(daliDataType_t dtype) {
+  if (!TypeTable::TryGetTypeInfo(dtype))
+    throw std::invalid_argument(make_string("Invalid data type: ", dtype));
+}
+
+inline void Validate(const TensorLayout &layout, int ndim, bool allow_empty = true) {
+  if (layout.empty() && allow_empty)
+    return;
+  if (layout.ndim() != ndim)
+    throw std::invalid_argument(make_string(
+      "The layout '", layout, "' cannot describe ", ndim, "-dimensional data."));
+}
+
+template <typename ShapeLike>
+void ValidateSampleShape(
+      int sample_index,
+      ShapeLike &&sample_shape,
+      std::optional<int> expected_ndim = std::nullopt) {
+  int ndim = std::size(sample_shape);
+  if (expected_ndim.has_value() && ndim != *expected_ndim)
+    throw std::invalid_argument(make_string(
+      "Unexpected number of dimensions (", ndim, ") in sample ", sample_index,
+      ". Expected ", *expected_ndim, "."));
+
+  for (int j = 0; j < ndim; j++)
+    if (sample_shape[j] < 0)
+      throw std::invalid_argument(make_string(
+        "Negative extent encountered in the shape of sample ", sample_index, ". Offending shape: ",
+        TensorShape<-1>(sample_shape)));
+}
+
+inline void ValidateNumSamples(int num_samples) {
+  if (num_samples < 0)
+    throw std::invalid_argument("The number of samples must not be negative.");
+}
+
+inline void ValidateNDim(int ndim) {
+  if (ndim < 0)
+    throw std::invalid_argument("The number of dimensions must not be negative.");
+}
+
+
+inline void ValidateShape(
+      int ndim,
+      const int64_t *shape) {
+  ValidateNDim(ndim);
+  if (ndim > 0 && !shape)
+    throw std::invalid_argument("The `shape` must not be NULL when ndim > 0.");
+
+  for (int j = 0; j < ndim; j++)
+    if (shape[j] < 0)
+      throw std::invalid_argument(make_string(
+        "The tensor shape must not contain negative extents. Got: ",
+        TensorShape<-1>(make_cspan(shape, ndim))));
+}
+
+inline void ValidateShape(int num_samples, int ndim, const int64_t *shapes) {
+  ValidateNumSamples(num_samples);
+  ValidateNDim(ndim);
+  if (!shapes && num_samples > 0 && ndim > 0)
+    throw std::invalid_argument("The `shapes` are required for non-scalar (ndim>=0) samples.");
+
+  if (ndim > 0) {
+    for (int i = 0; i < num_samples; i++)
+      ValidateSampleShape(i, make_cspan(&shapes[i*ndim], ndim));
+  }
+}
+
+inline void Validate(daliStorageDevice_t device_type) {
+  if (device_type != DALI_STORAGE_CPU && device_type != DALI_STORAGE_GPU)
+    throw std::invalid_argument(make_string("Invalid storage device type: ", device_type));
+}
+
+void ValidateDeviceId(int device_id, bool allow_cpu_only);
+
+inline void Validate(const daliBufferPlacement_t &placement) {
+  Validate(placement.device_type);
+  if (placement.device_type == DALI_STORAGE_GPU || placement.pinned)
+    ValidateDeviceId(placement.device_id, placement.pinned);
+}
+
+}  // namespace dali::c_api
+
+#endif  // DALI_C_API_2_VALIDATION_H_

From e27aa118bd861428eeed7f3e6aaec5bb1ae6f97d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Zientkiewicz?= <mzient@gmail.com>
Date: Fri, 31 Jan 2025 17:44:51 +0100
Subject: [PATCH 5/5] Tensor and TensorList C API wrappers.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Michał Zientkiewicz <mzient@gmail.com>
---
 dali/c_api_2/data_objects.cc      | 362 ++++++++++++++++++
 dali/c_api_2/data_objects.h       | 613 ++++++++++++++++++++++++++++++
 dali/c_api_2/data_objects_test.cc | 414 ++++++++++++++++++++
 3 files changed, 1389 insertions(+)
 create mode 100644 dali/c_api_2/data_objects.cc
 create mode 100644 dali/c_api_2/data_objects.h
 create mode 100644 dali/c_api_2/data_objects_test.cc

diff --git a/dali/c_api_2/data_objects.cc b/dali/c_api_2/data_objects.cc
new file mode 100644
index 00000000000..a579cbb70d7
--- /dev/null
+++ b/dali/c_api_2/data_objects.cc
@@ -0,0 +1,362 @@
+// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "dali/c_api_2/data_objects.h"
+#include "dali/c_api_2/error_handling.h"
+
+namespace dali::c_api {
+
+RefCountedPtr<ITensor> ITensor::Create(daliBufferPlacement_t placement) {
+  Validate(placement);
+  switch (placement.device_type) {
+    case DALI_STORAGE_CPU:
+    {
+      auto tl = std::make_shared<Tensor<CPUBackend>>();
+      tl->set_pinned(placement.pinned);
+      if (placement.pinned)
+        tl->set_device_id(placement.device_id);
+      return Wrap(std::move(tl));
+    }
+    case DALI_STORAGE_GPU:
+    {
+      auto tl = std::make_shared<Tensor<GPUBackend>>();
+      tl->set_pinned(placement.pinned);
+      tl->set_device_id(placement.device_id);
+      return Wrap(std::move(tl));
+    }
+    default:
+      assert(!"Unreachable code");
+      return {};
+  }
+}
+
+RefCountedPtr<ITensorList> ITensorList::Create(daliBufferPlacement_t placement) {
+  Validate(placement);
+  switch (placement.device_type) {
+    case DALI_STORAGE_CPU:
+    {
+      auto tl = std::make_shared<TensorList<CPUBackend>>();
+      tl->set_pinned(placement.pinned);
+      if (placement.pinned)
+        tl->set_device_id(placement.device_id);
+      return Wrap(std::move(tl));
+    }
+    case DALI_STORAGE_GPU:
+    {
+      auto tl = std::make_shared<TensorList<GPUBackend>>();
+      tl->set_pinned(placement.pinned);
+      tl->set_device_id(placement.device_id);
+      return Wrap(std::move(tl));
+    }
+    default:
+      assert(!"Unreachable code");
+      return {};
+  }
+}
+
+ITensor *ToPointer(daliTensor_h handle) {
+  if (!handle)
+    throw NullHandle("Tensor");
+  return static_cast<ITensor *>(handle);
+}
+
+ITensorList *ToPointer(daliTensorList_h handle) {
+  if (!handle)
+    throw NullHandle("TensorList");
+  return static_cast<ITensorList *>(handle);
+}
+
+}  // namespace dali::c_api
+
+using namespace dali::c_api;  // NOLINT
+
+template <typename T>
+std::optional<T> ToOptional(const T *nullable) {
+  if (nullable == nullptr)
+    return std::nullopt;
+  else
+    return *nullable;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Tensor
+//////////////////////////////////////////////////////////////////////////////
+
+daliResult_t daliTensorCreate(daliTensor_h *out, daliBufferPlacement_t placement) {
+  DALI_PROLOG();
+  if (!out)
+    throw std::invalid_argument("The output parameter must not be NULL.");
+  auto t = dali::c_api::ITensor::Create(placement);
+  *out = t.release();  // no throwing allowed after this line!
+  DALI_EPILOG();
+}
+
+daliResult_t daliTensorIncRef(daliTensor_h t, int *new_ref) {
+  DALI_PROLOG();
+  auto *ptr = ToPointer(t);
+  int r = ptr->IncRef();
+  if (new_ref)
+    *new_ref = r;
+  DALI_EPILOG();
+}
+
+daliResult_t daliTensorDecRef(daliTensor_h t, int *new_ref) {
+  DALI_PROLOG();
+  auto *ptr = ToPointer(t);
+  int r = ptr->DecRef();
+  if (new_ref)
+    *new_ref = r;
+  DALI_EPILOG();
+}
+
+daliResult_t daliTensorRefCount(daliTensor_h t, int *ref) {
+  DALI_PROLOG();
+  auto *ptr = ToPointer(t);
+  int r = ptr->RefCount();
+  if (!ref)
+    throw std::invalid_argument("The output parameter must not be NULL.");
+  *ref = r;
+  DALI_EPILOG();
+}
+
+daliResult_t daliTensorAttachBuffer(
+      daliTensor_h tensor,
+      int ndim,
+      const int64_t *shape,
+      daliDataType_t dtype,
+      const char *layout,
+      void *data,
+      daliDeleter_t deleter) {
+  DALI_PROLOG();
+  auto *ptr = ToPointer(tensor);
+  ptr->AttachBuffer(ndim, shape, dtype, layout, data, deleter);
+  DALI_EPILOG();
+}
+
+daliResult_t daliTensorResize(
+      daliTensor_h tensor,
+      int ndim,
+      const int64_t *shape,
+      daliDataType_t dtype,
+      const char *layout) {
+  DALI_PROLOG();
+  auto *ptr = ToPointer(tensor);
+  ptr->Resize(ndim, shape, dtype, layout);
+  DALI_EPILOG();
+}
+
+daliResult_t daliTensorSetLayout(
+      daliTensor_h tensor,
+      const char *layout) {
+  DALI_PROLOG();
+  auto *ptr = ToPointer(tensor);
+  ptr->SetLayout(layout);
+  DALI_EPILOG();
+}
+
+daliResult_t daliTensorGetLayout(
+      daliTensor_h tensor,
+      const char **layout) {
+  DALI_PROLOG();
+  auto *ptr = ToPointer(tensor);
+  if (!layout)
+    throw std::invalid_argument("The output parameter `layout` must not be be NULL");
+  *layout = ptr->GetLayout();
+  DALI_EPILOG();
+}
+
+daliResult_t daliTensorGetStream(
+      daliTensor_h tensor,
+      cudaStream_t *out_stream) {
+  DALI_PROLOG();
+  auto *ptr = ToPointer(tensor);
+  if (!out_stream)
+    throw std::invalid_argument("The output parameter `out_stream` must not be NULL");
+  auto str = ptr->GetStream();
+  *out_stream = str.has_value() ? *str : cudaStream_t(-1);
+  return str.has_value() ? DALI_SUCCESS : DALI_NO_DATA;
+  DALI_EPILOG();
+}
+
+daliResult_t daliTensorSetStream(
+      daliTensor_h tensor,
+      const cudaStream_t *stream,
+      daliBool synchronize) {
+  DALI_PROLOG();
+  auto *ptr = ToPointer(tensor);
+  ptr->SetStream(ToOptional(stream), synchronize);
+  DALI_EPILOG();
+}
+
+daliResult_t daliTensorGetDesc(
+      daliTensor_h tensor,
+      daliTensorDesc_t *out_desc) {
+  DALI_PROLOG();
+  auto *ptr = ToPointer(tensor);
+  if (!out_desc)
+    throw std::invalid_argument("The output parameter `out_desc` must not be NULL.");
+  *out_desc = ptr->GetDesc();
+  DALI_EPILOG();
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// TensorList
+//////////////////////////////////////////////////////////////////////////////
+
+daliResult_t daliTensorListCreate(daliTensorList_h *out, daliBufferPlacement_t placement) {
+  DALI_PROLOG();
+  if (!out)
+    throw std::invalid_argument("The output parameter must not be NULL.");
+  auto tl = dali::c_api::ITensorList::Create(placement);
+  *out = tl.release();  // no throwing allowed after this line!
+  DALI_EPILOG();
+}
+
+daliResult_t daliTensorListIncRef(daliTensorList_h tl, int *new_ref) {
+  DALI_PROLOG();
+  auto *ptr = ToPointer(tl);
+  int r = ptr->IncRef();
+  if (new_ref)
+    *new_ref = r;
+  DALI_EPILOG();
+}
+
+daliResult_t daliTensorListDecRef(daliTensorList_h tl, int *new_ref) {
+  DALI_PROLOG();
+  auto *ptr = ToPointer(tl);
+  int r = ptr->DecRef();
+  if (new_ref)
+    *new_ref = r;
+  DALI_EPILOG();
+}
+
+daliResult_t daliTensorListRefCount(daliTensorList_h tl, int *ref) {
+  DALI_PROLOG();
+  auto *ptr = ToPointer(tl);
+  if (!ref)
+    throw std::invalid_argument("The output pointer must not be NULL.");
+  int r = ptr->RefCount();
+  *ref = r;
+  DALI_EPILOG();
+}
+
+daliResult_t daliTensorListAttachBuffer(
+      daliTensorList_h tensor_list,
+      int num_samples,
+      int ndim,
+      const int64_t *shapes,
+      daliDataType_t dtype,
+      const char *layout,
+      void *data,
+      const ptrdiff_t *sample_offsets,
+      daliDeleter_t deleter) {
+  DALI_PROLOG();
+  auto *ptr = ToPointer(tensor_list);
+  ptr->AttachBuffer(num_samples, ndim, shapes, dtype, layout, data, sample_offsets, deleter);
+  DALI_EPILOG();
+}
+
+daliResult_t daliTensorListAttachSamples(
+      daliTensorList_h tensor_list,
+      int num_samples,
+      int ndim,
+      daliDataType_t dtype,
+      const char *layout,
+      const daliTensorDesc_t *samples,
+      const daliDeleter_t *sample_deleters) {
+  DALI_PROLOG();
+  auto *ptr = ToPointer(tensor_list);
+  ptr->AttachSamples(num_samples, ndim, dtype, layout, samples, sample_deleters);
+  DALI_EPILOG();
+}
+
+daliResult_t daliTensorListResize(
+      daliTensorList_h tensor_list,
+      int num_samples,
+      int ndim,
+      const int64_t *shapes,
+      daliDataType_t dtype,
+      const char *layout) {
+  DALI_PROLOG();
+  auto *ptr = ToPointer(tensor_list);
+  ptr->Resize(num_samples, ndim, shapes, dtype, layout);
+  DALI_EPILOG();
+}
+
+daliResult_t daliTensorListSetLayout(
+      daliTensorList_h tensor_list,
+      const char *layout) {
+  DALI_PROLOG();
+  auto *ptr = ToPointer(tensor_list);
+  ptr->SetLayout(layout);
+  DALI_EPILOG();
+}
+
+daliResult_t daliTensorListGetLayout(
+      daliTensorList_h tensor_list,
+      const char **layout) {
+  DALI_PROLOG();
+  auto *ptr = ToPointer(tensor_list);
+  if (!layout)
+    throw std::invalid_argument("The output parameter `layout` must not be be NULL");
+  *layout = ptr->GetLayout();
+  DALI_EPILOG();
+}
+
+daliResult_t daliTensorListGetStream(
+      daliTensorList_h tensor_list,
+      cudaStream_t *out_stream) {
+  DALI_PROLOG();
+  auto *ptr = ToPointer(tensor_list);
+  if (!out_stream)
+    throw std::invalid_argument("The output parameter `out_stream` must not be NULL");
+  auto str = ptr->GetStream();
+  *out_stream = str.has_value() ? *str : cudaStream_t(-1);
+  return str.has_value() ? DALI_SUCCESS : DALI_NO_DATA;
+  DALI_EPILOG();
+}
+
+daliResult_t daliTensorListSetStream(
+      daliTensorList_h tensor_list,
+      const cudaStream_t *stream,
+      daliBool synchronize) {
+  DALI_PROLOG();
+  auto *ptr = ToPointer(tensor_list);
+  ptr->SetStream(ToOptional(stream), synchronize);
+  DALI_EPILOG();
+}
+
+daliResult_t daliTensorListGetTensorDesc(
+      daliTensorList_h tensor_list,
+      daliTensorDesc_t *out_tensor,
+      int sample_idx) {
+  DALI_PROLOG();
+  auto *ptr = ToPointer(tensor_list);
+  if (!out_tensor)
+    throw std::invalid_argument("The output parameter `out_tensor` must not be NULL.");
+  *out_tensor = ptr->GetTensorDesc(sample_idx);
+  DALI_EPILOG();
+}
+
+daliResult_t daliTensorListViewAsTensor(
+      daliTensorList_h tensor_list,
+      daliTensor_h *out_tensor) {
+  DALI_PROLOG();
+  auto *ptr = ToPointer(tensor_list);
+  if (!out_tensor)
+    throw std::invalid_argument("The output parameter `out_tensor` must not be NULL.");
+  auto t = ptr->ViewAsTensor();
+  *out_tensor = t.release();  // no throwing allowed after this line
+  DALI_EPILOG();
+}
diff --git a/dali/c_api_2/data_objects.h b/dali/c_api_2/data_objects.h
new file mode 100644
index 00000000000..72b1a3eac03
--- /dev/null
+++ b/dali/c_api_2/data_objects.h
@@ -0,0 +1,613 @@
+// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef DALI_C_API_2_DATA_OBJECTS_H_
+#define DALI_C_API_2_DATA_OBJECTS_H_
+
+#include <cstdint>
+#include <memory>
+#include <optional>
+#include <utility>
+#include <vector>
+#define DALI_ALLOW_NEW_C_API
+#include "dali/dali.h"
+#include "dali/pipeline/data/tensor_list.h"
+#include "dali/c_api_2/ref_counting.h"
+#include "dali/c_api_2/validation.h"
+
+
+struct _DALITensorList {};
+struct _DALITensor {};
+
+namespace dali {
+namespace c_api {
+
+//////////////////////////////////////////////////////////////////////////////
+// Interfaces
+//////////////////////////////////////////////////////////////////////////////
+
+class ITensor : public _DALITensor, public RefCountedObject {
+ public:
+  virtual ~ITensor() = default;
+
+  virtual void Resize(
+      int ndim,
+      const int64_t *shape,
+      daliDataType_t dtype,
+      const char *layout) = 0;
+
+  virtual void AttachBuffer(
+      int ndim,
+      const int64_t *shape,
+      daliDataType_t dtype,
+      const char *layout,
+      void *data,
+      daliDeleter_t deleter) = 0;
+
+  virtual daliBufferPlacement_t GetBufferPlacement() const = 0;
+
+  virtual const char *GetLayout() const = 0;
+
+  virtual void SetLayout(const char *layout) = 0;
+
+  virtual void SetStream(std::optional<cudaStream_t> stream, bool synchronize) = 0;
+
+  virtual std::optional<cudaStream_t> GetStream() const = 0;
+
+  virtual std::optional<cudaEvent_t> GetReadyEvent() const = 0;
+
+  virtual cudaEvent_t GetOrCreateReadyEvent() = 0;
+
+  virtual daliTensorDesc_t GetDesc() const = 0;
+
+  static RefCountedPtr<ITensor> Create(daliBufferPlacement_t placement);
+};
+
+
+class ITensorList : public _DALITensorList, public RefCountedObject {
+ public:
+  virtual ~ITensorList() = default;
+
+  virtual void Resize(
+      int num_samples,
+      int ndim,
+      const int64_t *shapes,
+      daliDataType_t dtype,
+      const char *layout) = 0;
+
+  virtual void AttachBuffer(
+      int num_samples,
+      int ndim,
+      const int64_t *shapes,
+      daliDataType_t dtype,
+      const char *layout,
+      void *data,
+      const ptrdiff_t *sample_offsets,
+      daliDeleter_t deleter) = 0;
+
+  virtual void AttachSamples(
+      int num_samples,
+      int ndim,
+      daliDataType_t dtype,
+      const char *layout,
+      const daliTensorDesc_t *samples,
+      const daliDeleter_t *sample_deleters) = 0;
+
+  virtual daliBufferPlacement_t GetBufferPlacement() const = 0;
+
+  virtual const char *GetLayout() const = 0;
+
+  virtual void SetLayout(const char *layout) = 0;
+
+  virtual void SetStream(std::optional<cudaStream_t> stream, bool synchronize) = 0;
+
+  virtual std::optional<cudaStream_t> GetStream() const = 0;
+
+  virtual std::optional<cudaEvent_t> GetReadyEvent() const = 0;
+
+  virtual cudaEvent_t GetOrCreateReadyEvent() = 0;
+
+  virtual daliTensorDesc_t GetTensorDesc(int sample) const = 0;
+
+  virtual RefCountedPtr<ITensor> ViewAsTensor() const = 0;
+
+  static RefCountedPtr<ITensorList> Create(daliBufferPlacement_t placement);
+};
+
+
+//////////////////////////////////////////////////////////////////////////////
+// Implementation
+//////////////////////////////////////////////////////////////////////////////
+
+
+struct BufferDeleter {
+  daliDeleter_t deleter;
+  AccessOrder deletion_order;
+
+  void operator()(void *data) {
+    if (deleter.delete_buffer) {
+      cudaStream_t stream = deletion_order.stream();
+      deleter.delete_buffer(deleter.deleter_ctx, data,
+                            deletion_order.is_device() ? &stream : nullptr);
+    }
+    if (deleter.destroy_context) {
+      deleter.destroy_context(deleter.deleter_ctx);
+    }
+  }
+};
+
+template <typename Backend>
+class TensorWrapper : public ITensor {
+ public:
+  explicit TensorWrapper(std::shared_ptr<Tensor<Backend>> t) : t_(std::move(t)) {}
+
+  void Resize(
+      int ndim,
+      const int64_t *shape,
+      daliDataType_t dtype,
+      const char *layout) override {
+    ValidateShape(ndim, shape);
+    Validate(dtype);
+    if (layout)
+      Validate(TensorLayout(layout), ndim);
+    t_->Resize(TensorShape<>(make_cspan(shape, ndim)), dtype);
+    if (layout)
+      t_->SetLayout(layout);
+  }
+
+  void AttachBuffer(
+      int ndim,
+      const int64_t *shape,
+      daliDataType_t dtype,
+      const char *layout,
+      void *data,
+      daliDeleter_t deleter) override {
+    ValidateShape(ndim, shape);
+    Validate(dtype);
+    if (layout)
+      Validate(TensorLayout(layout), ndim);
+
+    TensorShape<> tshape(make_cspan(shape, ndim));
+    size_t num_elements = volume(tshape);
+    if (num_elements > 0 && !data)
+      throw std::invalid_argument("The data buffer must not be NULL for a non-empty tensor.");
+
+    TensorLayout new_layout = {};
+
+    if (!layout) {
+      if (ndim == t_->ndim())
+        new_layout = t_->GetLayout();
+    } else {
+      new_layout = layout;
+      Validate(new_layout, ndim);
+    }
+
+    t_->Reset();
+    auto type_info = TypeTable::GetTypeInfo(dtype);
+    auto element_size = type_info.size();
+
+    std::shared_ptr<void> buffer;
+    if (!deleter.delete_buffer && !deleter.destroy_context) {
+      buffer = std::shared_ptr<void>(data, [](void *){});
+    } else {
+      buffer = std::shared_ptr<void>(data, BufferDeleter{deleter, t_->order()});
+    }
+
+    t_->ShareData(
+      std::move(buffer),
+      num_elements * element_size,
+      t_->is_pinned(),
+      tshape,
+      dtype,
+      t_->device_id(),
+      t_->order());
+
+    if (layout)
+      t_->SetLayout(new_layout);
+  }
+
+  daliBufferPlacement_t GetBufferPlacement() const override {
+    daliBufferPlacement_t placement;
+    placement.device_id = t_->device_id();
+    StorageDevice dev = backend_to_storage_device<Backend>::value;
+    placement.device_type = static_cast<daliStorageDevice_t>(dev);
+    placement.pinned = t_->is_pinned();
+    return placement;
+  }
+
+  void SetStream(std::optional<cudaStream_t> stream, bool synchronize) override {
+    t_->set_order(stream.has_value() ? AccessOrder(*stream) : AccessOrder::host(), synchronize);
+  }
+
+  void SetLayout(const char *layout_string) {
+    if (layout_string) {
+      TensorLayout layout(layout_string);
+      Validate(layout, t_->ndim());
+      t_->SetLayout(layout);
+    } else {
+      t_->SetLayout("");
+    }
+  }
+
+  const char *GetLayout() const override {
+    auto &layout = t_->GetLayout();
+    return !layout.empty() ? layout.data() : nullptr;
+  }
+
+  std::optional<cudaStream_t> GetStream() const override {
+    auto o = t_->order();
+    if (o.is_device())
+      return o.stream();
+    else
+      return std::nullopt;
+  }
+
+  std::optional<cudaEvent_t> GetReadyEvent() const override {
+    auto &e = t_->ready_event();
+    if (e)
+      return e.get();
+    else
+      return std::nullopt;
+  }
+
+  cudaEvent_t GetOrCreateReadyEvent() override {
+    auto &e = t_->ready_event();
+    if (e)
+      return e.get();
+    int device_id = t_->device_id();
+    if (device_id < 0)
+      throw std::runtime_error("The tensor list is not associated with a CUDA device.");
+    t_->set_ready_event(CUDASharedEvent::Create(device_id));
+    return t_->ready_event().get();
+  }
+
+  daliTensorDesc_t GetDesc() const override {
+    auto &shape = t_->shape();
+    daliTensorDesc_t desc{};
+    desc.ndim = shape.sample_dim();
+    desc.data = t_->raw_mutable_data();
+    desc.dtype = t_->type();
+    desc.layout = GetLayout();
+    desc.shape = shape.data();
+    return desc;
+  }
+
+ private:
+  std::shared_ptr<Tensor<Backend>> t_;
+};
+
+template <typename Backend>
+RefCountedPtr<TensorWrapper<Backend>> Wrap(std::shared_ptr<Tensor<Backend>> tl) {
+  return RefCountedPtr<TensorWrapper<Backend>>(new TensorWrapper<Backend>(std::move(tl)));
+}
+
+template <typename Backend>
+class TensorListWrapper : public ITensorList {
+ public:
+  explicit TensorListWrapper(std::shared_ptr<TensorList<Backend>> tl) : tl_(std::move(tl)) {}
+
+  void Resize(
+      int num_samples,
+      int ndim,
+      const int64_t *shapes,
+      daliDataType_t dtype,
+      const char *layout) override {
+    Validate(dtype);
+    ValidateShape(num_samples, ndim, shapes);
+    if (layout)
+      Validate(TensorLayout(layout), ndim);
+    std::vector<int64_t> shape_data(shapes, shapes + ndim * num_samples);
+    tl_->Resize(TensorListShape<>(std::move(shape_data), num_samples, ndim), dtype);
+    if (layout)
+      tl_->SetLayout(layout);
+  }
+
+  void AttachBuffer(
+      int num_samples,
+      int ndim,
+      const int64_t *shapes,
+      daliDataType_t dtype,
+      const char *layout,
+      void *data,
+      const ptrdiff_t *sample_offsets,
+      daliDeleter_t deleter) override {
+    ValidateShape(num_samples, ndim, shapes);
+    Validate(dtype);
+
+    if (!data && num_samples > 0) {
+      for (int i = 0; i < num_samples; i++) {
+        auto sample_shape = make_cspan(&shapes[i*ndim], ndim);
+
+        if (volume(sample_shape) > 0)
+          throw std::invalid_argument(
+            "The pointer to the data buffer must not be null for a non-empty tensor list.");
+
+        if (sample_offsets && sample_offsets[i])
+          throw std::invalid_argument(
+            "All sample_offsets must be zero when the data pointer is NULL.");
+      }
+    }
+
+    TensorLayout new_layout = {};
+
+    if (!layout) {
+      if (ndim == tl_->sample_dim())
+        new_layout = tl_->GetLayout();
+    } else {
+      new_layout = layout;
+      Validate(new_layout, ndim);
+    }
+
+    tl_->Reset();
+    tl_->SetSize(num_samples);
+    tl_->set_sample_dim(ndim);
+    tl_->SetLayout(new_layout);
+    tl_->set_type(dtype);
+    ptrdiff_t next_offset = 0;
+    auto type_info = TypeTable::GetTypeInfo(dtype);
+    auto element_size = type_info.size();
+
+    std::shared_ptr<void> buffer;
+    if (!deleter.delete_buffer && !deleter.destroy_context) {
+      buffer = std::shared_ptr<void>(data, [](void *){});
+    } else {
+      buffer = std::shared_ptr<void>(data, BufferDeleter{deleter, tl_->order()});
+    }
+
+    bool is_contiguous = true;
+    if (sample_offsets) {
+      for (int i = 0; i < num_samples; i++) {
+        if (sample_offsets[i] != next_offset) {
+          is_contiguous = false;
+          break;
+        }
+        auto num_elements = volume(make_cspan(&shapes[i*ndim], ndim));
+        next_offset += num_elements * element_size;
+      }
+    }
+
+    if (is_contiguous) {
+      tl_->SetContiguity(BatchContiguity::Contiguous);
+      std::vector<int64_t> shape_data(shapes, shapes + ndim * num_samples);
+      TensorListShape<> tl_shape(shape_data, num_samples, ndim);
+      tl_->ShareData(
+        std::move(buffer),
+        next_offset,
+        tl_->is_pinned(),
+        tl_shape,
+        dtype,
+        tl_->device_id(),
+        tl_->order(),
+        new_layout);
+    } else {
+      tl_->SetContiguity(BatchContiguity::Automatic);
+      next_offset = 0;
+
+      for (int i = 0; i < num_samples; i++) {
+        TensorShape<> sample_shape(make_cspan(&shapes[i*ndim], ndim));
+        void *sample_data;
+        size_t sample_bytes = volume(sample_shape) * element_size;
+        if (sample_offsets) {
+          sample_data = static_cast<char *>(data) + sample_offsets[i];
+        } else {
+          sample_data = static_cast<char *>(data) + next_offset;
+          next_offset += sample_bytes;
+        }
+        tl_->SetSample(
+          i,
+          std::shared_ptr<void>(buffer, sample_data),
+          sample_bytes,
+          tl_->is_pinned(),
+          sample_shape,
+          dtype,
+          tl_->device_id(),
+          tl_->order(),
+          new_layout);
+      }
+    }
+  }
+
+  void AttachSamples(
+      int num_samples,
+      int ndim,
+      daliDataType_t dtype,
+      const char *layout,
+      const daliTensorDesc_t *samples,
+      const daliDeleter_t *sample_deleters) override {
+    ValidateNumSamples(num_samples);
+    if (num_samples > 0 && !samples)
+      throw std::invalid_argument("The pointer to sample descriptors must not be NULL.");
+    if (ndim < 0) {
+      if (num_samples == 0)
+        throw std::invalid_argument(
+          "The number of dimensions must not be negative when num_samples is 0.");
+      else
+        ndim = samples[0].ndim;
+    }
+    if (dtype == DALI_NO_TYPE) {
+      if (num_samples == 0)
+        throw std::invalid_argument(
+          "A valid data type must be provided when there's no sample to take it from.");
+      dtype = samples[0].dtype;
+    }
+    Validate(dtype);
+
+    for (int i = 0; i < num_samples; i++) {
+      if (ndim && !samples[i].shape)
+        throw std::invalid_argument(make_string("Got NULL shape in sample ", i, "."));
+      if (samples[i].dtype != dtype)
+        throw std::invalid_argument(make_string("Unexpected data type in sample ", i, ". Got: ",
+          samples[i].dtype, ", expected ", dtype, "."));
+      ValidateSampleShape(i, make_cspan(samples[i].shape, samples[i].ndim), ndim);;
+
+      if (!samples[i].data && volume(make_cspan(samples[i].shape, ndim)))
+        throw std::invalid_argument(make_string(
+            "Got NULL data pointer in a non-empty sample ", i, "."));
+    }
+
+    TensorLayout new_layout = {};
+
+    if (!layout) {
+      if (ndim == tl_->sample_dim())
+        new_layout = tl_->GetLayout();
+    } else {
+      new_layout = layout;
+      Validate(new_layout, ndim);
+    }
+
+    tl_->Reset();
+    tl_->SetSize(num_samples);
+    tl_->set_sample_dim(ndim);
+    tl_->SetLayout(new_layout);
+
+    auto deletion_order = tl_->order();
+
+    auto type_info = TypeTable::GetTypeInfo(dtype);
+    auto element_size = type_info.size();
+    for (int i = 0; i < num_samples; i++) {
+      TensorShape<> sample_shape(make_cspan(samples[i].shape, samples[i].ndim));
+      size_t sample_bytes = volume(sample_shape) * element_size;
+      std::shared_ptr<void> sample_ptr;
+      if (sample_deleters) {
+        sample_ptr = std::shared_ptr<void>(
+          samples[i].data,
+          BufferDeleter{sample_deleters[i], deletion_order});
+      } else {
+        sample_ptr = std::shared_ptr<void>(samples[i].data, [](void*) {});
+      }
+
+      tl_->SetSample(
+        i,
+        sample_ptr,
+        sample_bytes,
+        tl_->is_pinned(),
+        sample_shape,
+        dtype,
+        tl_->device_id(),
+        tl_->order(),
+        new_layout);
+    }
+  }
+
+  daliBufferPlacement_t GetBufferPlacement() const override {
+    daliBufferPlacement_t placement;
+    placement.device_id = tl_->device_id();
+    StorageDevice dev = backend_to_storage_device<Backend>::value;
+    placement.device_type = static_cast<daliStorageDevice_t>(dev);
+    placement.pinned = tl_->is_pinned();
+    return placement;
+  }
+
+  void SetStream(std::optional<cudaStream_t> stream, bool synchronize) override {
+    tl_->set_order(stream.has_value() ? AccessOrder(*stream) : AccessOrder::host(), synchronize);
+  }
+
+  void SetLayout(const char *layout_string) {
+    if (layout_string) {
+      TensorLayout layout(layout_string);
+      Validate(layout, tl_->sample_dim());
+      tl_->SetLayout(layout);
+    } else {
+      tl_->SetLayout("");
+    }
+  }
+
+  const char *GetLayout() const override {
+    auto &layout = tl_->GetLayout();
+    return !layout.empty() ? layout.data() : nullptr;
+  }
+
+  std::optional<cudaStream_t> GetStream() const override {
+    auto o = tl_->order();
+    if (o.is_device())
+      return o.stream();
+    else
+      return std::nullopt;
+  }
+
+  std::optional<cudaEvent_t> GetReadyEvent() const override {
+    auto &e = tl_->ready_event();
+    if (e)
+      return e.get();
+    else
+      return std::nullopt;
+  }
+
+  cudaEvent_t GetOrCreateReadyEvent() override {
+    auto &e = tl_->ready_event();
+    if (e)
+      return e.get();
+    int device_id = tl_->device_id();
+    if (device_id < 0)
+      throw std::runtime_error("The tensor list is not associated with a CUDA device.");
+    tl_->set_ready_event(CUDASharedEvent::Create(device_id));
+    return tl_->ready_event().get();
+  }
+
+  daliTensorDesc_t GetTensorDesc(int sample) const override {
+    auto &shape = tl_->shape();
+    if (sample < 0 || sample >= shape.num_samples())
+      throw std::out_of_range(make_string("The sample index ", sample, " is out of range. "
+        "Valid indices are [0..", shape.num_samples() - 1, "]."));
+    daliTensorDesc_t desc{};
+    desc.ndim = shape.sample_dim();
+    desc.data = tl_->raw_mutable_tensor(sample);
+    desc.dtype = tl_->type();
+    desc.layout = GetLayout();
+    desc.shape = shape.tensor_shape_span(sample).data();
+    return desc;
+  }
+
+  RefCountedPtr<ITensor> ViewAsTensor() const override {
+    if (!tl_->IsContiguous())
+      throw std::runtime_error(
+        "The TensorList is not contiguous and cannot be viewed as a Tensor.");
+
+    auto t = std::make_shared<Tensor<Backend>>();
+    auto buf = unsafe_owner(*tl_);
+    auto &lshape = tl_->shape();
+    TensorShape<> tshape = shape_cat(lshape.num_samples(), lshape[0]);
+    t->ShareData(
+      std::move(buf),
+      tl_->nbytes(),
+      tl_->is_pinned(),
+      tshape,
+      tl_->type(),
+      tl_->device_id(),
+      tl_->order(),
+      tl_->ready_event());
+    TensorLayout layout = tl_->GetLayout();
+    if (layout.size() == lshape.sample_dim()) {
+      t->SetLayout("N" + layout);
+    }
+    return Wrap(std::move(t));
+  }
+
+ private:
+  std::shared_ptr<TensorList<Backend>> tl_;
+};
+
+template <typename Backend>
+RefCountedPtr<TensorListWrapper<Backend>> Wrap(std::shared_ptr<TensorList<Backend>> tl) {
+  return RefCountedPtr<TensorListWrapper<Backend>>(new TensorListWrapper<Backend>(std::move(tl)));
+}
+
+
+ITensor *ToPointer(daliTensor_h handle);
+ITensorList *ToPointer(daliTensorList_h handle);
+
+}  // namespace c_api
+}  // namespace dali
+
+#endif  // DALI_C_API_2_DATA_OBJECTS_H_
diff --git a/dali/c_api_2/data_objects_test.cc b/dali/c_api_2/data_objects_test.cc
new file mode 100644
index 00000000000..2102ae1f548
--- /dev/null
+++ b/dali/c_api_2/data_objects_test.cc
@@ -0,0 +1,414 @@
+// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "dali/c_api_2/data_objects.h"
+#include <gtest/gtest.h>
+#include "dali/c_api_2/managed_handle.h"
+#include "dali/core/span.h"
+
+TEST(CAPI2_TensorListTest, NullHandle) {
+  daliTensorList_h h = nullptr;
+  int ref = 0;
+  EXPECT_EQ(DALI_ERROR_INVALID_HANDLE, daliTensorListIncRef(h, &ref));
+  EXPECT_EQ(DALI_ERROR_INVALID_HANDLE, daliTensorListDecRef(h, &ref));
+  EXPECT_EQ(DALI_ERROR_INVALID_HANDLE, daliTensorListRefCount(h, &ref));
+}
+
+TEST(CAPI2_TensorListTest, CreateDestroy) {
+  daliBufferPlacement_t placement{};
+  placement.device_type = DALI_STORAGE_CPU;
+  placement.pinned = false;
+  daliTensorList_h h = nullptr;
+  daliResult_t r = daliTensorListCreate(&h, placement);
+  ASSERT_NE(h, nullptr);
+  dali::c_api::TensorListHandle tl(h);
+  ASSERT_EQ(h, tl.get());
+  ASSERT_EQ(r, DALI_SUCCESS);
+
+  int ref = -1;
+  EXPECT_EQ(daliTensorListRefCount(h, &ref), DALI_SUCCESS);
+  EXPECT_EQ(ref, 1);
+  ref = -1;
+
+  h = tl.release();
+  EXPECT_EQ(daliTensorListDecRef(h, &ref), DALI_SUCCESS);
+  EXPECT_EQ(ref, 0);
+}
+
+inline auto CreateTensorList(daliBufferPlacement_t placement) {
+  daliTensorList_h handle;
+  auto err = daliTensorListCreate(&handle, placement);
+  switch (err) {
+  case DALI_SUCCESS:
+    break;
+  case DALI_ERROR_OUT_OF_MEMORY:
+    throw std::bad_alloc();
+  case DALI_ERROR_INVALID_ARGUMENT:
+    throw std::invalid_argument(daliGetLastErrorMessage());
+  default:
+    throw std::runtime_error(daliGetLastErrorMessage());
+  }
+  return dali::c_api::TensorListHandle(handle);
+}
+
+void TestTensorListResize(daliStorageDevice_t storage_device) {
+  daliBufferPlacement_t placement{};
+  placement.device_type = storage_device;
+  int64_t shapes[] = {
+    480, 640, 3,
+    600, 800, 4,
+    348, 720, 1,
+    1080, 1920, 3
+  };
+  daliDataType_t dtype = DALI_UINT32;
+
+  auto tl = CreateTensorList(placement);
+  EXPECT_EQ(daliTensorListResize(tl, 4, 3, nullptr, dtype, nullptr), DALI_ERROR_INVALID_ARGUMENT);
+  EXPECT_EQ(daliTensorListResize(tl, -1, 3, shapes, dtype, nullptr), DALI_ERROR_INVALID_ARGUMENT);
+  EXPECT_EQ(daliTensorListResize(tl, 4, -1, shapes, dtype, nullptr), DALI_ERROR_INVALID_ARGUMENT);
+  EXPECT_EQ(daliTensorListResize(tl, 4, 3, shapes, dtype, "ABCD"), DALI_ERROR_INVALID_ARGUMENT);
+  shapes[0] = -1;
+  EXPECT_EQ(daliTensorListResize(tl, 4, 3, shapes, dtype, "HWC"), DALI_ERROR_INVALID_ARGUMENT);
+  shapes[0] = 480;
+  EXPECT_EQ(daliTensorListResize(tl, 1, 3, shapes, dtype, "HWC"), DALI_SUCCESS);
+  // resize, but keep the layout
+  EXPECT_EQ(daliTensorListResize(tl, 4, 3, shapes, dtype, nullptr), DALI_SUCCESS);
+
+  size_t element_size = dali::TypeTable::GetTypeInfo(dtype).size();
+
+  ptrdiff_t offset = 0;
+  const char *base;
+  for (int i = 0; i < 4; i++) {
+    daliTensorDesc_t desc{};
+    EXPECT_EQ(daliTensorListGetTensorDesc(tl, &desc, i), DALI_SUCCESS);
+    ASSERT_EQ(desc.ndim, 3);
+    ASSERT_NE(desc.data, nullptr);
+    if (i == 0)
+      base = static_cast<char *>(desc.data);
+    EXPECT_EQ(desc.data, base + offset);
+    EXPECT_EQ(desc.dtype, dtype);
+    for (int j = 0; j < 3; j++)
+      EXPECT_EQ(desc.shape[j], shapes[3 * i + j]);
+    size_t sample_bytes = volume(dali::make_cspan(desc.shape, desc.ndim)) * element_size;
+    if (storage_device == DALI_STORAGE_GPU) {
+      // Check that the data is accessible for the GPU
+      EXPECT_EQ(cudaMemset(desc.data, 0, sample_bytes), cudaSuccess);
+    } else {
+      // Check that the data is accessible for the CPU
+      memset(desc.data, 0, sample_bytes);  // just not crashing is OK
+    }
+    offset += sample_bytes;
+  }
+  if (storage_device == DALI_STORAGE_GPU) {
+    EXPECT_EQ(cudaDeviceSynchronize(), cudaSuccess);
+  }
+}
+
+struct TestDeleterCtx {
+  void *expected_data;
+  int buffer_delete_count;
+  int context_delete_count;
+};
+
+template <typename element_t>
+inline std::pair<daliDeleter_t, std::unique_ptr<TestDeleterCtx>>
+MakeTestDeleter(element_t *expected_data) {
+  auto ctx = std::unique_ptr<TestDeleterCtx>(new TestDeleterCtx{ expected_data, 0, 0 });
+  daliDeleter_t deleter = {};
+  deleter.deleter_ctx = ctx.get();
+  deleter.delete_buffer = [](void *vctx, void *data, const cudaStream_t *stream) {
+    ASSERT_NE(data, nullptr);
+    auto *ctx = static_cast<TestDeleterCtx *>(vctx);
+    EXPECT_EQ(ctx->context_delete_count, 0);
+    EXPECT_EQ(ctx->buffer_delete_count, 0);
+    EXPECT_EQ(data, ctx->expected_data);
+    ctx->buffer_delete_count++;
+    delete [] static_cast<element_t *>(data);
+  };
+  deleter.destroy_context = [](void *vctx) {
+    auto *ctx = static_cast<TestDeleterCtx *>(vctx);
+    EXPECT_EQ(ctx->context_delete_count, 0);
+    EXPECT_EQ(ctx->buffer_delete_count, 1);
+    ctx->context_delete_count++;
+  };
+  return { deleter, std::move(ctx) };
+}
+
+TEST(CAPI2_TensorListTest, AttachBuffer) {
+  daliBufferPlacement_t placement{};
+  placement.device_type = DALI_STORAGE_CPU;
+  using element_t = int;
+  daliDataType_t dtype = dali::type2id<element_t>::value;
+  dali::TensorListShape<> lshape({
+    { 480, 640, 3 },
+    { 600, 800, 4 },
+    { 348, 720, 1 },
+    { 1080, 1920, 3 }
+  });
+  auto size = lshape.num_elements();
+  std::unique_ptr<element_t> data(new element_t[size]);
+
+  ptrdiff_t offsets[4] = {};
+  for (int i = 1; i < 4; i++)
+    offsets[i] = offsets[i - 1] + volume(lshape[i - 1]) * sizeof(element_t);
+
+  auto [deleter, ctx] = MakeTestDeleter(data.get());
+
+  auto tl = CreateTensorList(placement);
+  ASSERT_EQ(daliTensorListAttachBuffer(
+      tl,
+      lshape.num_samples(),
+      lshape.sample_dim(),
+      lshape.data(),
+      dtype,
+      "HWC",
+      data.get(),
+      offsets,
+      deleter), DALI_SUCCESS);
+
+  void *data_ptr = data.release();  // the buffer is now owned by the tensor list
+
+  ptrdiff_t offset = 0;
+  const char *base = static_cast<const char *>(data_ptr);
+  for (int i = 0; i < 4; i++) {
+    daliTensorDesc_t desc{};
+    EXPECT_EQ(daliTensorListGetTensorDesc(tl, &desc, i), DALI_SUCCESS);
+    ASSERT_EQ(desc.ndim, 3);
+    ASSERT_NE(desc.data, nullptr);
+    EXPECT_EQ(desc.data, base + offset);
+    EXPECT_EQ(desc.dtype, dtype);
+    for (int j = 0; j < 3; j++)
+      EXPECT_EQ(desc.shape[j], lshape[i][j]);
+    size_t sample_bytes = volume(dali::make_cspan(desc.shape, desc.ndim)) * sizeof(element_t);
+    offset += sample_bytes;
+  }
+
+  tl.reset();
+
+  EXPECT_EQ(ctx->buffer_delete_count, 1) << "Buffer deleter not called";
+  EXPECT_EQ(ctx->context_delete_count, 1) << "Deleter context not destroyed";
+}
+
+
+TEST(CAPI2_TensorListTest, ViewAsTensor) {
+  daliBufferPlacement_t placement{};
+  placement.device_type = DALI_STORAGE_CPU;
+  using element_t = int;
+  daliDataType_t dtype = dali::type2id<element_t>::value;
+  dali::TensorListShape<> lshape = dali::uniform_list_shape(4, { 480, 640, 3 });
+  auto size = lshape.num_elements();
+  std::unique_ptr<element_t> data(new element_t[size]);
+
+  ptrdiff_t sample_size = volume(lshape[0]) * sizeof(element_t);
+
+  ptrdiff_t offsets[4] = {
+    0,
+    1 * sample_size,
+    2 * sample_size,
+    3 * sample_size,
+  };
+
+  auto [deleter, ctx] = MakeTestDeleter(data.get());
+
+  auto tl = CreateTensorList(placement);
+  ASSERT_EQ(daliTensorListAttachBuffer(
+      tl,
+      lshape.num_samples(),
+      lshape.sample_dim(),
+      lshape.data(),
+      dtype,
+      "HWC",
+      data.get(),
+      offsets,
+      deleter), DALI_SUCCESS);
+
+  void *data_ptr = data.release();  // the buffer is now owned by the tensor list
+
+  daliTensor_h ht = nullptr;
+  EXPECT_EQ(daliTensorListViewAsTensor(tl, &ht), DALI_SUCCESS);
+  ASSERT_NE(ht, nullptr);
+  dali::c_api::TensorHandle t(ht);
+
+  daliTensorDesc_t desc{};
+  EXPECT_EQ(daliTensorGetDesc(t, &desc), DALI_SUCCESS);
+  EXPECT_EQ(desc.data, data_ptr);
+  EXPECT_EQ(desc.shape[0], lshape.num_samples());
+  ASSERT_EQ(desc.ndim, 4);
+  ASSERT_NE(desc.shape, nullptr);
+  EXPECT_EQ(desc.shape[1], lshape[0][0]);
+  EXPECT_EQ(desc.shape[2], lshape[0][1]);
+  EXPECT_EQ(desc.shape[3], lshape[0][2]);
+  EXPECT_STREQ(desc.layout, "NHWC");
+  EXPECT_EQ(desc.dtype, dtype);
+
+  tl.reset();
+
+  EXPECT_EQ(ctx->buffer_delete_count, 0) << "Buffer prematurely destroyed";
+  EXPECT_EQ(ctx->context_delete_count, 0) << "Deleter context prematurely destroyed";
+
+  t.reset();
+
+  EXPECT_EQ(ctx->buffer_delete_count, 1) << "Buffer deleter not called";
+  EXPECT_EQ(ctx->context_delete_count, 1) << "Deleter context not destroyed";
+}
+
+
+TEST(CAPI2_TensorListTest, ViewAsTensorError) {
+  daliBufferPlacement_t placement{};
+  placement.device_type = DALI_STORAGE_CPU;
+  using element_t = int;
+  daliDataType_t dtype = dali::type2id<element_t>::value;
+  dali::TensorListShape<> lshape = dali::uniform_list_shape(4, { 480, 640, 3 });
+  auto size = lshape.num_elements();
+  std::unique_ptr<element_t> data(new element_t[size]);
+
+  ptrdiff_t sample_size = volume(lshape[0]) * sizeof(element_t);
+
+  // The samples are not in order
+  ptrdiff_t offsets[4] = {
+    0,
+    2 * sample_size,
+    1 * sample_size,
+    3 * sample_size,
+  };
+
+  auto [deleter, ctx] = MakeTestDeleter(data.get());
+
+  auto tl = CreateTensorList(placement);
+  ASSERT_EQ(daliTensorListAttachBuffer(
+      tl,
+      lshape.num_samples(),
+      lshape.sample_dim(),
+      lshape.data(),
+      dtype,
+      "HWC",
+      data.get(),
+      offsets,
+      deleter), DALI_SUCCESS);
+
+  void *data_ptr = data.release();  // the buffer is now owned by the tensor list
+
+  daliTensor_h ht = nullptr;
+  EXPECT_EQ(daliTensorListViewAsTensor(tl, &ht), DALI_ERROR_INVALID_OPERATION);
+}
+
+
+TEST(CAPI2_TensorListTest, ResizeCPU) {
+  TestTensorListResize(DALI_STORAGE_CPU);
+}
+
+TEST(CAPI2_TensorListTest, ResizeGPU) {
+  TestTensorListResize(DALI_STORAGE_GPU);
+}
+
+
+
+
+TEST(CAPI2_TensorTest, NullHandle) {
+  daliTensor_h h = nullptr;
+  int ref = 0;
+  EXPECT_EQ(DALI_ERROR_INVALID_HANDLE, daliTensorIncRef(h, &ref));
+  EXPECT_EQ(DALI_ERROR_INVALID_HANDLE, daliTensorDecRef(h, &ref));
+  EXPECT_EQ(DALI_ERROR_INVALID_HANDLE, daliTensorRefCount(h, &ref));
+}
+
+TEST(CAPI2_TensorTest, CreateDestroy) {
+  daliBufferPlacement_t placement{};
+  placement.device_type = DALI_STORAGE_CPU;
+  placement.pinned = false;
+  daliTensor_h h = nullptr;
+  daliResult_t r = daliTensorCreate(&h, placement);
+  ASSERT_NE(h, nullptr);
+  dali::c_api::TensorHandle tl(h);
+  ASSERT_EQ(h, tl.get());
+  ASSERT_EQ(r, DALI_SUCCESS);
+
+  int ref = -1;
+  EXPECT_EQ(daliTensorRefCount(h, &ref), DALI_SUCCESS);
+  EXPECT_EQ(ref, 1);
+  ref = -1;
+
+  h = tl.release();
+  EXPECT_EQ(daliTensorDecRef(h, &ref), DALI_SUCCESS);
+  EXPECT_EQ(ref, 0);
+}
+
+
+inline auto CreateTensor(daliBufferPlacement_t placement) {
+  daliTensor_h handle;
+  auto err = daliTensorCreate(&handle, placement);
+  switch (err) {
+  case DALI_SUCCESS:
+    break;
+  case DALI_ERROR_OUT_OF_MEMORY:
+    throw std::bad_alloc();
+  case DALI_ERROR_INVALID_ARGUMENT:
+    throw std::invalid_argument(daliGetLastErrorMessage());
+  default:
+    throw std::runtime_error(daliGetLastErrorMessage());
+  }
+  return dali::c_api::TensorHandle(handle);
+}
+
+void TestTensorResize(daliStorageDevice_t storage_device) {
+  daliBufferPlacement_t placement{};
+  placement.device_type = storage_device;
+  auto t = CreateTensor(placement);
+  int64_t shape[] = {
+    1080, 1920, 3
+  };
+  daliDataType_t dtype = DALI_INT16;
+
+  EXPECT_EQ(daliTensorResize(t, 3, nullptr, dtype, nullptr), DALI_ERROR_INVALID_ARGUMENT);
+  EXPECT_EQ(daliTensorResize(t, -1, shape, dtype, nullptr), DALI_ERROR_INVALID_ARGUMENT);
+  EXPECT_EQ(daliTensorResize(t, 3, shape, dtype, "ABCD"), DALI_ERROR_INVALID_ARGUMENT);
+  shape[0] = -1;
+  EXPECT_EQ(daliTensorResize(t, 3, shape, dtype, "HWC"), DALI_ERROR_INVALID_ARGUMENT);
+  shape[0] = 1;
+  EXPECT_EQ(daliTensorResize(t, 3, shape, dtype, "HWC"), DALI_SUCCESS);
+
+  shape[0] = 1080;
+  EXPECT_EQ(daliTensorResize(t, 3, shape, dtype, nullptr), DALI_SUCCESS);
+
+  size_t element_size = dali::TypeTable::GetTypeInfo(dtype).size();
+
+  ptrdiff_t offset = 0;
+  daliTensorDesc_t desc{};
+  EXPECT_EQ(daliTensorGetDesc(t, &desc), DALI_SUCCESS);
+  ASSERT_EQ(desc.ndim, 3);
+  ASSERT_NE(desc.data, nullptr);
+  EXPECT_STREQ(desc.layout, "HWC");
+  EXPECT_EQ(desc.dtype, dtype);
+  for (int j = 0; j < 3; j++)
+    EXPECT_EQ(desc.shape[j], shape[j]);
+  size_t sample_bytes = volume(dali::make_cspan(desc.shape, desc.ndim)) * element_size;
+  if (storage_device == DALI_STORAGE_GPU) {
+    // Check that the data is accessible for the GPU
+    EXPECT_EQ(cudaMemset(desc.data, 0, sample_bytes), cudaSuccess);
+  } else {
+    // Check that the data is accessible for the CPU
+    memset(desc.data, 0, sample_bytes);  // just not crashing is OK
+  }
+  if (storage_device == DALI_STORAGE_GPU) {
+    EXPECT_EQ(cudaDeviceSynchronize(), cudaSuccess);
+  }
+}
+
+TEST(CAPI2_TensorTest, ResizeCPU) {
+  TestTensorResize(DALI_STORAGE_CPU);
+}
+
+TEST(CAPI2_TensorTest, ResizeGPU) {
+  TestTensorResize(DALI_STORAGE_GPU);
+}