Skip to content

Commit d54ce56

Browse files
Add an environment variable to control the VCN hardware color space conversion enablement (#167) (#168)
* Add an environment variable to control the VCN hardware color space conversion enablement * Update version * optimize the mem_alignment logic for hip memory allocation
1 parent 7890bae commit d54ce56

File tree

4 files changed

+36
-33
lines changed

4 files changed

+36
-33
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
Documentation for rocJPEG is available at
44
[https://rocm.docs.amd.com/projects/rocJPEG/en/latest/](https://rocm.docs.amd.com/projects/rocJPEG/en/latest/)
55

6-
## rocjpeg 1.0.0 for ROCm 7.0.0
6+
## rocjpeg 1.1.0 for ROCm 7.0.0
77

88
## Added
99
* cmake config files

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ endif()
4242

4343
# rocjpeg Version
4444
# NOTE: package version and rocjpeg_version.h is generated with this version
45-
set(VERSION "1.0.0")
45+
set(VERSION "1.1.0")
4646

4747
# Set Project Version and Language
4848
project(rocjpeg VERSION ${VERSION} LANGUAGES CXX)

samples/rocjpeg_samples_utils.h

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -331,30 +331,30 @@ class RocJpegUtils {
331331
switch (subsampling) {
332332
case ROCJPEG_CSS_444:
333333
num_channels = 3;
334-
output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
335-
channel_sizes[2] = channel_sizes[1] = channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
334+
output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment);
335+
channel_sizes[2] = channel_sizes[1] = channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment));
336336
break;
337337
case ROCJPEG_CSS_440:
338338
num_channels = 3;
339-
output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
340-
channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
341-
channel_sizes[2] = channel_sizes[1] = align(output_image.pitch[0] * ((is_roi_valid ? roi_height : heights[0]) >> 1), mem_alignment);
339+
output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment);
340+
channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment));
341+
channel_sizes[2] = channel_sizes[1] = output_image.pitch[0] * ((is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment)) >> 1);
342342
break;
343343
case ROCJPEG_CSS_422:
344344
num_channels = 1;
345-
output_image.pitch[0] = (is_roi_valid ? roi_width : widths[0]) * 2;
346-
channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
345+
output_image.pitch[0] = (is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment)) * 2;
346+
channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment));
347347
break;
348348
case ROCJPEG_CSS_420:
349349
num_channels = 2;
350-
output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
351-
channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
352-
channel_sizes[1] = align(output_image.pitch[1] * ((is_roi_valid ? roi_height : heights[0]) >> 1), mem_alignment);
350+
output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment);
351+
channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment));
352+
channel_sizes[1] = output_image.pitch[1] * ((is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment)) >> 1);
353353
break;
354354
case ROCJPEG_CSS_400:
355355
num_channels = 1;
356-
output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
357-
channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
356+
output_image.pitch[0] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment);
357+
channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment));
358358
break;
359359
default:
360360
std::cout << "Unknown chroma subsampling!" << std::endl;
@@ -364,32 +364,32 @@ class RocJpegUtils {
364364
case ROCJPEG_OUTPUT_YUV_PLANAR:
365365
if (subsampling == ROCJPEG_CSS_400) {
366366
num_channels = 1;
367-
output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
368-
channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
367+
output_image.pitch[0] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment);
368+
channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment));
369369
} else {
370370
num_channels = 3;
371-
output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
372-
output_image.pitch[1] = is_roi_valid ? roi_width : widths[1];
373-
output_image.pitch[2] = is_roi_valid ? roi_width : widths[2];
374-
channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
375-
channel_sizes[1] = align(output_image.pitch[1] * (is_roi_valid ? roi_height : heights[1]), mem_alignment);
376-
channel_sizes[2] = align(output_image.pitch[2] * (is_roi_valid ? roi_height : heights[2]), mem_alignment);
371+
output_image.pitch[0] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment);
372+
output_image.pitch[1] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[1], mem_alignment);
373+
output_image.pitch[2] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[2], mem_alignment);
374+
channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment));
375+
channel_sizes[1] = output_image.pitch[1] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[1], mem_alignment));
376+
channel_sizes[2] = output_image.pitch[2] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[2], mem_alignment));
377377
}
378378
break;
379379
case ROCJPEG_OUTPUT_Y:
380380
num_channels = 1;
381-
output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
382-
channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
381+
output_image.pitch[0] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment);
382+
channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment));
383383
break;
384384
case ROCJPEG_OUTPUT_RGB:
385385
num_channels = 1;
386-
output_image.pitch[0] = (is_roi_valid ? roi_width : widths[0]) * 3;
387-
channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
386+
output_image.pitch[0] = (is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment)) * 3;
387+
channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment));
388388
break;
389389
case ROCJPEG_OUTPUT_RGB_PLANAR:
390390
num_channels = 3;
391-
output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
392-
channel_sizes[2] = channel_sizes[1] = channel_sizes[0] = align(output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]), mem_alignment);
391+
output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = is_roi_valid ? align(roi_width, mem_alignment) : align(widths[0], mem_alignment);
392+
channel_sizes[2] = channel_sizes[1] = channel_sizes[0] = output_image.pitch[0] * (is_roi_valid ? align(roi_height, mem_alignment) : align(heights[0], mem_alignment));
393393
break;
394394
default:
395395
std::cout << "Unknown output format!" << std::endl;
@@ -567,9 +567,9 @@ class RocJpegUtils {
567567
return;
568568
}
569569

570-
uint32_t channel0_size = output_image->pitch[0] * heights[0];
571-
uint32_t channel1_size = output_image->pitch[1] * heights[1];
572-
uint32_t channel2_size = output_image->pitch[2] * heights[2];
570+
uint32_t channel0_size = output_image->pitch[0] * align(heights[0], mem_alignment);
571+
uint32_t channel1_size = output_image->pitch[1] * align(heights[1], mem_alignment);
572+
uint32_t channel2_size = output_image->pitch[2] * align(heights[2], mem_alignment);
573573

574574
uint32_t output_image_size = channel0_size + channel1_size + channel2_size;
575575

@@ -628,7 +628,7 @@ class RocJpegUtils {
628628
}
629629

630630
private:
631-
static const int mem_alignment = 4 * 1024 * 1024;
631+
static const int mem_alignment = 16;
632632
/**
633633
* @brief Shows the help message and exits.
634634
*

src/rocjpeg_vaapi_decoder.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,8 @@ void RocJpegVappiDecoder::GetNumJpegCores() {
429429
uint32_t major_version = 0, minor_version = 0;
430430
uint32_t num_jpeg_cores = 0;
431431
int error_code = 0;
432+
const char *enable_vcn_hw_csc_str = std::getenv("ROCJPEG_ENABLE_VCN_HW_CSC");
433+
bool enable_vcn_hw_csc = (enable_vcn_hw_csc_str != nullptr && strcmp(enable_vcn_hw_csc_str, "1") == 0);
432434
if (amdgpu_device_initialize(drm_fd_, &major_version, &minor_version, &dev_handle)) {
433435
ERR("amdgpu_device_initialize failed!");
434436
return;
@@ -437,7 +439,8 @@ void RocJpegVappiDecoder::GetNumJpegCores() {
437439
if (!error_code) {
438440
current_vcn_jpeg_spec_.num_jpeg_cores = num_jpeg_cores;
439441
// Set the capabilities based on the number of JPEG cores
440-
current_vcn_jpeg_spec_.can_roi_decode = current_vcn_jpeg_spec_.can_convert_to_rgb = (num_jpeg_cores >= 8);
442+
current_vcn_jpeg_spec_.can_roi_decode = (num_jpeg_cores >= 8);
443+
current_vcn_jpeg_spec_.can_convert_to_rgb = (num_jpeg_cores >= 8) && enable_vcn_hw_csc;
441444
} else {
442445
ERR("Failed to get the number of jpeg cores.");
443446
}

0 commit comments

Comments
 (0)