NVIDIA · JanuszL · Jan 27, 2025 · Apr 30, 2024 · Jan 27, 2025 · mzient
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -352,7 +352,7 @@ set(CMAKE_C_STANDARD 11)
 set(CMAKE_CUDA_STANDARD 17)
 
 # CXX flags
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-free-nonheap-object -Wno-unused-variable -Wno-unused-function -Wno-strict-overflow -fno-strict-aliasing -fPIC -fvisibility=hidden")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-free-nonheap-object -Wno-unused-variable -Wno-unused-function -Wno-strict-overflow -fno-strict-aliasing -fPIC -fvisibility=hidden -Wno-array-bounds")
 
 if (WERROR)
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")

diff --git a/cmake/CUDA_utils.cmake b/cmake/CUDA_utils.cmake
@@ -40,7 +40,7 @@ endfunction()
 # List of currently used arch values
 if (${ARCH} MATCHES "aarch64-")
   # aarch64-linux
-  set(CUDA_known_archs "53" "62" "72" "75" "87")
+  set(CUDA_known_archs "53" "62" "72" "75" "87" "90a")
 elseif (${ARCH} MATCHES "aarch64")
   # aarch64 SBSA, only >=Volta
   # from the whole list/; "70" "75" "80" "86"

diff --git a/dali/kernels/test/scatter_gather_test.cc b/dali/kernels/test/scatter_gather_test.cc
@@ -15,6 +15,7 @@
 #include <gtest/gtest.h>
 #include <vector>
 #include <algorithm>
+#include <random>
 
 #include "dali/core/cuda_error.h"
 #include "dali/core/mm/memory.h"
@@ -132,8 +133,10 @@ class ScatterGatherTest : public testing::Test {
       j += l;
     }
 
-    std::random_shuffle(ranges.begin(), ranges.end());
-    std::random_shuffle(back_ranges.begin(), back_ranges.end());
+    std::random_device rd;
+    std::mt19937 g(rd());
+    std::shuffle(ranges.begin(), ranges.end(), g);
+    std::shuffle(back_ranges.begin(), back_ranges.end(), g);
 
     this->template Memcpy<kind>(in_ptr.get(), in.data(), in.size(), cudaMemcpyHostToDevice);
     this->template Memset<kind>(out_ptr.get(), 0, out.size());

diff --git a/dali/operators/image/crop/bbox_crop.cc b/dali/operators/image/crop/bbox_crop.cc
@@ -679,7 +679,9 @@ class RandomBBoxCropImpl : public OpImplBase<CPUBackend> {
 
     std::array<int, ndim> order;
     std::iota(order.begin(), order.end(), 0);
-    std::random_shuffle(order.begin(), order.end());
+    std::random_device rd;
+    std::mt19937 g(rd());
+    std::shuffle(order.begin(), order.end(), g);
 
     float max_extent = 0.0f;
     for (int d = 0; d < ndim; d++) {

diff --git a/docker/Dockerfile.build.aarch64-linux b/docker/Dockerfile.build.aarch64-linux
@@ -1,9 +1,9 @@
-ARG AARCH64_BASE_IMAGE=nvidia/cuda:11.8.0-devel-ubuntu20.04
+ARG AARCH64_BASE_IMAGE=nvidia/cuda:12.6.3-devel-ubuntu20.04
 FROM ${AARCH64_BASE_IMAGE}
 
 ENV DEBIAN_FRONTEND=noninteractive \
-    CUDA_CROSS_VERSION=11-8 \
-    CUDA_CROSS_VERSION_DOT=11.8
+    CUDA_CROSS_VERSION=12-6 \
+    CUDA_CROSS_VERSION_DOT=12.6
 
 RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub && \
     apt-get update && apt-get install software-properties-common -y --no-install-recommends && \
@@ -33,7 +33,7 @@ RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/
     python3.12 python3.12-dev \
     python3.13 python3.13-dev && \
     apt-key adv --fetch-key http://repo.download.nvidia.com/jetson/jetson-ota-public.asc && \
-    add-apt-repository 'deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/cross-linux-aarch64/ /' && \
+    add-apt-repository 'deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/cross-linux-aarch64/ /' && \
     apt-get update && \
     apt-get install -y cuda-cudart-cross-aarch64-${CUDA_CROSS_VERSION} \
                        libcufft-cross-aarch64-${CUDA_CROSS_VERSION} \

diff --git a/include/dali/core/small_vector.h b/include/dali/core/small_vector.h
@@ -23,6 +23,18 @@
 #include "dali/core/util.h"
 #include "dali/core/cuda_utils.h"
 
+#pragma GCC diagnostic push
+#if __GNUC__ > 11
+  // most recent gcc seems to be confused by some things in small vector raising false warnings
+  #if defined(__has_warning)
+    #if __has_warning("-Wuse-after-free")
+      #pragma GCC diagnostic ignored "-Wuse-after-free"
+    #endif
+  #else
+    #pragma GCC diagnostic ignored "-Wuse-after-free"
+  #endif
+#endif
+
 namespace dali {
 
 template <typename T, typename Allocator, bool Contextless = std::is_empty<Allocator>::value>
@@ -732,4 +744,6 @@ class SmallVector : SmallVectorAlloc<T, allocator>, SmallVectorBase<T> {
 
 }  // namespace dali
 
+#pragma GCC diagnostic pop
+
 #endif  // DALI_CORE_SMALL_VECTOR_H_
diff --git a/qa/TL0_python-self-test_xavier/test.sh → qa/TL0_python-self-test_tegra/test.sh b/qa/TL0_python-self-test_xavier/test.sh → qa/TL0_python-self-test_tegra/test.sh
diff --git a/qa/TL0_python-self-test_xavier/test_body.sh → qa/TL0_python-self-test_tegra/test_body.sh b/qa/TL0_python-self-test_xavier/test_body.sh → qa/TL0_python-self-test_tegra/test_body.sh
diff --git a/qa/TL0_python-self-test_xavier/test_nofw.sh → qa/TL0_python-self-test_tegra/test_nofw.sh b/qa/TL0_python-self-test_xavier/test_nofw.sh → qa/TL0_python-self-test_tegra/test_nofw.sh
diff --git a/qa/TL0_self-test-exec2_xavier/test.sh → qa/TL0_self-test-exec2_tegra/test.sh b/qa/TL0_self-test-exec2_xavier/test.sh → qa/TL0_self-test-exec2_tegra/test.sh
diff --git a/qa/TL0_self-test_xavier/test.sh → qa/TL0_self-test_tegra/test.sh b/qa/TL0_self-test_xavier/test.sh → qa/TL0_self-test_tegra/test.sh
diff --git a/qa/setup_test_common.sh b/qa/setup_test_common.sh
@@ -49,10 +49,10 @@ echo "LD_LIBRARY_PATH is $LD_LIBRARY_PATH"
 # /usr/lib/aarch64-linux-gnu/libGLdispatch.so.0: cannot allocate memory in static TLS block
 # Seems there's an issue with libc:
 # https://bugzilla.redhat.com/show_bug.cgi?id=1722181
-# A fix has been proposed here: 
+# A fix has been proposed here:
 # https://sourceware.org/ml/libc-alpha/2020-01/msg00099.html
 preload_static_tls_libs() {
-    if [ "$(uname -m)" = "aarch64" ]; then
+    if [ "$(uname -m)" = "aarch64" ] && [ -f /usr/lib/aarch64-linux-gnu/libGLdispatch.so.0 ] ; then
         if [ -z "$LD_PRELOAD" ]; then
             export LD_PRELOAD="/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0"
         else