From 3adbb45170ff85b0afe8bf6303cc2806625fd6e9 Mon Sep 17 00:00:00 2001 From: Joaquin Anton Guirao Date: Tue, 4 Feb 2025 09:49:00 +0100 Subject: [PATCH] Ensure allocated temp memory is usable by nvImageCodec streams, as we are skipping pre-sync due to unnecessary overhead in the general case Signed-off-by: Joaquin Anton Guirao --- dali/operators/imgcodec/image_decoder.h | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/dali/operators/imgcodec/image_decoder.h b/dali/operators/imgcodec/image_decoder.h index 129b198e65..efeb80c787 100644 --- a/dali/operators/imgcodec/image_decoder.h +++ b/dali/operators/imgcodec/image_decoder.h @@ -552,15 +552,6 @@ class ImageDecoder : public StatelessOperator { MAKE_SEMANTIC_VERSION(req_major, req_minor, req_patch); } - /** - * @brief nvImageCodec up to 0.2 doesn't synchronize with the user stream before decoding. - * Because of that, we need to host synchronize before passing the async allocated buffer - * to the decoding function - */ - bool need_host_sync_alloc() { - return !version_at_least(0, 3, 0); - } - void PrepareOutput(SampleState &st, void *out_ptr, const ROI &roi, const Workspace &ws) { // Make a copy of the parsed img info. We might modify it // (for example, request planar vs. interleaved, etc) @@ -794,7 +785,9 @@ class ImageDecoder : public StatelessOperator { size_t nsamples_decode = batch_images_.size(); size_t nsamples_cache = nsamples - nsamples_decode; - if (ws.has_stream() && need_host_sync_alloc() && any_need_processing) { + // Ensure allocated memory is usable by the decoder's internal streams, + // as we are intentionally skipping pre-sync to avoid slowing down the general case. + if (ws.has_stream() && any_need_processing) { DomainTimeRange tr("alloc sync", DomainTimeRange::kOrange); CUDA_CALL(cudaStreamSynchronize(ws.stream())); }