Skip to content

Commit

Permalink
Ensure allocated temp memory is usable by nvImageCodec streams, as we…
Browse files Browse the repository at this point in the history
… are skipping pre-sync due to unnecessary overhead in the general case

Signed-off-by: Joaquin Anton Guirao <[email protected]>
  • Loading branch information
jantonguirao committed Feb 4, 2025
1 parent cefc16b commit 3adbb45
Showing 1 changed file with 3 additions and 10 deletions.
13 changes: 3 additions & 10 deletions dali/operators/imgcodec/image_decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -552,15 +552,6 @@ class ImageDecoder : public StatelessOperator<Backend> {
MAKE_SEMANTIC_VERSION(req_major, req_minor, req_patch);
}

/**
* @brief nvImageCodec up to 0.2 doesn't synchronize with the user stream before decoding.
* Because of that, we need to host synchronize before passing the async allocated buffer
* to the decoding function
*/
bool need_host_sync_alloc() {
return !version_at_least(0, 3, 0);
}

void PrepareOutput(SampleState &st, void *out_ptr, const ROI &roi, const Workspace &ws) {
// Make a copy of the parsed img info. We might modify it
// (for example, request planar vs. interleaved, etc)
Expand Down Expand Up @@ -794,7 +785,9 @@ class ImageDecoder : public StatelessOperator<Backend> {
size_t nsamples_decode = batch_images_.size();
size_t nsamples_cache = nsamples - nsamples_decode;

if (ws.has_stream() && need_host_sync_alloc() && any_need_processing) {
// Ensure allocated memory is usable by the decoder's internal streams,
// as we are intentionally skipping pre-sync to avoid slowing down the general case.
if (ws.has_stream() && any_need_processing) {
DomainTimeRange tr("alloc sync", DomainTimeRange::kOrange);
CUDA_CALL(cudaStreamSynchronize(ws.stream()));
}
Expand Down

0 comments on commit 3adbb45

Please sign in to comment.