microsoft · jchen351 · Oct 6, 2023 · Oct 6, 2023 · Oct 6, 2023 · Oct 13, 2023
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -42,7 +42,7 @@ include(CheckFunctionExists)
 include(GNUInstallDirs) # onnxruntime_providers_* require CMAKE_INSTALL_* variables
 
 # TODO: update this once all system adapt c++20
-if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
+if(UNIX)
 set(CMAKE_CXX_STANDARD 20)
 else()
 set(CMAKE_CXX_STANDARD 17)

diff --git a/include/onnxruntime/core/common/eigen_common_wrapper.h b/include/onnxruntime/core/common/eigen_common_wrapper.h
@@ -10,6 +10,7 @@
 // error: ignoring attributes on template argument "Eigen::PacketType<const float, Eigen::DefaultDevice>::type {aka __vector(4) float}" [-Werror=ignored-attributes]
 #if defined(__GNUC__)
 #pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated"
 #if __GNUC__ >= 6
 #pragma GCC diagnostic ignored "-Wignored-attributes"
 #endif

diff --git a/include/onnxruntime/core/platform/ort_mutex.h b/include/onnxruntime/core/platform/ort_mutex.h
@@ -108,7 +108,14 @@ std::cv_status OrtCondVar::wait_for(std::unique_lock<OrtMutex>& cond_mutex,
 namespace onnxruntime {
 
 class OrtMutex {
+#if defined(__clang__) && __cplusplus >= 202002L
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wdeprecated-pragma"
+#endif
   nsync::nsync_mu data_ = NSYNC_MU_INIT;
+#if defined(__clang__) && __cplusplus >= 202002L
+#pragma clang diagnostic pop
+#endif
 
  public:
   constexpr OrtMutex() = default;
@@ -125,8 +132,14 @@ class OrtMutex {
 };
 
 class OrtCondVar {
+#if defined(__clang__) && __cplusplus >= 202002L
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wdeprecated-pragma"
+#endif
   nsync::nsync_cv native_cv_object = NSYNC_CV_INIT;
-
+#if defined(__clang__) && __cplusplus >= 202002L
+#pragma clang diagnostic pop
+#endif
  public:
   constexpr OrtCondVar() noexcept = default;
 

diff --git a/onnxruntime/contrib_ops/cpu/bert/embed_layer_norm.cc b/onnxruntime/contrib_ops/cpu/bert/embed_layer_norm.cc
@@ -87,7 +87,18 @@ Status EmbedLayerNorm<T>::Compute(OpKernelContext* context) const {
 
     int n = batch_size * sequence_length;
     concurrency::ThreadPool::TryBatchParallelFor(
+#if __cplusplus >= 202002L
+        context->GetOperatorThreadPool(), n, [=, this, &failed](ptrdiff_t index) {
+#else
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated"
+#endif
         context->GetOperatorThreadPool(), n, [=, &failed](ptrdiff_t index) {
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+#endif
           int word_col_index = input_ids_data[index];
           if (word_col_index < 0 || word_col_index >= word_embedding_length) {
             failed.store(true, std::memory_order_release);

diff --git a/onnxruntime/core/common/logging/sinks/ostream_sink.cc b/onnxruntime/core/common/logging/sinks/ostream_sink.cc
@@ -33,7 +33,11 @@ void OStreamSink::SendImpl(const Timestamp& timestamp, const std::string& logger
   // Going with #2 as it should scale better at the cost of creating the message in memory first
   // before sending to the stream.
 
+#ifdef _WIN32
+  std::wostringstream msg;
+#else
   std::ostringstream msg;
+#endif
 
 #ifndef ORT_MINIMAL_BUILD
   if (message.Severity() == Severity::kWARNING) {

diff --git a/onnxruntime/core/providers/cpu/text/string_normalizer.cc b/onnxruntime/core/providers/cpu/text/string_normalizer.cc
@@ -220,7 +220,14 @@ class Utf8ConverterGeneric {
   }
 
  private:
+#if __cplusplus >= 202002L
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#endif
   std::codecvt_utf8<wchar_t> converter_;
+#if __cplusplus >= 202002L
+#pragma GCC diagnostic pop
+#endif
 };
 
 // We need to specialize for MS as there is

diff --git a/onnxruntime/core/providers/cuda/math/topk_impl.cuh b/onnxruntime/core/providers/cuda/math/topk_impl.cuh
@@ -421,7 +421,7 @@
       });
     }
 
-    auto XPT = static_cast<int64_t>(ceil(static_cast<double>(dimension) / GridDim::maxThreadsPerBlock));
+    auto XPT = static_cast<int64_t>(ceil(static_cast<double>(dimension) / static_cast<double>(GridDim::maxThreadsPerBlock)));
     if (BT * 2 >= K || 0 == sorted) {
       RadixTopK<CudaT, BT, 2><<<N, BT, 256 * sizeof(uint32_t), stream>>>(
           input_x_ptr, output_v_ptr, output_i, elem_nums, size, axis, K, largest, sorted, dimension, XPT,
@@ -452,8 +452,8 @@
     CUDA_RETURN_IF_ERROR(cub::DeviceRadixSort::SortPairs(nullptr, temp_bytes, input_key, output_key, input_value, output_value, dimension, 0, sizeof(T) * 8, stream));
     auto temp_storage_buffer = kernel->GetScratchBuffer<char>(temp_bytes, ort_stream);
     auto* temp_storage = temp_storage_buffer.get();
-    auto blocks_per_grid_D = (int)(ceil(static_cast<float>(dimension) / BT));
-    auto blocks_per_grid_K = (int)(ceil(static_cast<float>(K) / BT));
+    auto blocks_per_grid_D = (int)(ceil(static_cast<float>(dimension) / static_cast<float>(BT)));
+    auto blocks_per_grid_K = (int)(ceil(static_cast<float>(K) / static_cast<float>(BT)));
     for (int64_t i = 0; i < N; i++) {
       FillInput<CudaT><<<blocks_per_grid_D, BT, 0, stream>>>(input_x_ptr, input_key, input_value, elem_nums, size, axis, K, i, dimension);
       CUDA_RETURN_IF_ERROR(1 == largest ? cub::DeviceRadixSort::SortPairsDescending(temp_storage, temp_bytes, input_key, output_key, input_value, output_value, dimension, 0, sizeof(T) * 8, stream)

diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc
@@ -1210,7 +1210,7 @@
     map_input_index_[fused_node.Name()] = input_name_index;
     map_no_input_shape_[fused_node.Name()] = no_input_shape;
     NodeComputeInfo compute_info;
-    compute_info.create_state_func = [=](ComputeContext* context, FunctionState* state) {
+    compute_info.create_state_func = [=,this](ComputeContext* context, FunctionState* state) {
       std::unique_ptr<MIGraphXFuncState> p = std::make_unique<MIGraphXFuncState>();
       *p = {context->allocate_func, context->release_func, context->allocator_handle, map_progs_[context->node_name],
             map_onnx_string_[context->node_name], options, t_, map_input_index_[context->node_name], &mgx_mu_,

diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -3381,7 +3381,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
   // Create function state
   // TODO: remove default capture
   NodeComputeInfo compute_info;
-  compute_info.create_state_func = [=](ComputeContext* context, FunctionState* state) {
+  compute_info.create_state_func = [=, this](ComputeContext* context, FunctionState* state) {
     std::unique_ptr<TensorrtFuncState> p = std::make_unique<TensorrtFuncState>();
     // translate tactic sources string to nvinfer1::TacticSources
     nvinfer1::TacticSources tactics = 0;
@@ -3408,7 +3408,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
   };
 
   // Create compute function
-  compute_info.compute_func = [this](FunctionState state, const OrtApi* api, OrtKernelContext* context) {
+  compute_info.compute_func = [=, this](FunctionState state, const OrtApi* api, OrtKernelContext* context) {
     Ort::KernelContext ctx(context);
 
     TensorrtFuncState* trt_state = reinterpret_cast<TensorrtFuncState*>(state);
@@ -4056,7 +4056,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromPrecompiledEngine(con
   // Create function state
   // TODO: remove default capture
   NodeComputeInfo compute_info;
-  compute_info.create_state_func = [=](ComputeContext* context, FunctionState* state) {
+  compute_info.create_state_func = [=, this](ComputeContext* context, FunctionState* state) {
     std::unique_ptr<TensorrtShortFuncState> p = std::make_unique<TensorrtShortFuncState>();
     *p = {context->allocate_func,
           context->release_func,
@@ -4079,7 +4079,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromPrecompiledEngine(con
   };
 
   // Create compute function
-  compute_info.compute_func = [this](FunctionState state, const OrtApi* api, OrtKernelContext* context) {
+  compute_info.compute_func = [=, this](FunctionState state, const OrtApi* api, OrtKernelContext* context) {
     Ort::KernelContext ctx(context);
 
     TensorrtShortFuncState* trt_state = reinterpret_cast<TensorrtShortFuncState*>(state);

diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc
@@ -2770,7 +2770,18 @@ common::Status InferenceSession::RunAsync(const RunOptions* run_options,
   if (!tp || concurrency::ThreadPool::DegreeOfParallelism(tp) < 2) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "intra op thread pool must have at least one thread for RunAsync");
   }
+#if __cplusplus >= 202002L
+  std::function<void()> run_fn = [=, this]() {
+#else
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated"
+#endif
   std::function<void()> run_fn = [=]() {
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+#endif
     Status status = Status::OK();
     ORT_TRY {
       if (run_options) {

diff --git a/onnxruntime/test/onnx/microbenchmark/eigen.cc b/onnxruntime/test/onnx/microbenchmark/eigen.cc
@@ -8,6 +8,7 @@
 #pragma GCC diagnostic ignored "-Wunused-parameter"
 #pragma GCC diagnostic ignored "-Wunused-result"
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#pragma GCC diagnostic ignored "-Wdeprecated"
 
 // _deps/eigen-src/unsupported/Eigen/CXX11/../../../Eigen/src/Core/arch/NEON/PacketMath.h:1671:9:
 // error: ‘void* memcpy(void*, const void*, size_t)’ copying an object of non-trivial type ‘Eigen::internal::Packet4c’

diff --git a/onnxruntime/test/onnx/microbenchmark/tptest.cc b/onnxruntime/test/onnx/microbenchmark/tptest.cc
@@ -102,7 +102,14 @@ static void BM_ThreadPoolSimpleParallelFor(benchmark::State& state) {
   for (auto _ : state) {
     for (int j = 0; j < 100; j++) {
       ThreadPool::TrySimpleParallelFor(tp.get(), len, [&](size_t) {
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wvolatile"
         for (volatile size_t x = 0; x < body; x++) {
+#pragma GCC diagnostic pop
+#else
+		for (volatile size_t x = 0; x < body; x++) {
+#endif
         }
       });
     }