Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
81fbc65
Trying out c++20 on linux
jchen351 Oct 6, 2023
c72c40e
Enabling "set_property(TARGET onnxruntime_test_all APPEND_STRING PROP…
jchen351 Oct 6, 2023
512cd4d
Revert "Enabling "set_property(TARGET onnxruntime_test_all APPEND_STR…
jchen351 Oct 6, 2023
e9fd1ef
Merge branch 'main' into Cjian/linux_c++20
jchen351 Oct 13, 2023
a3a69d0
ignore "-Werror=deprecated" for Eigen CXX11 Tensor
jchen351 Oct 13, 2023
2e795b4
-Wdeprecated-declarations
jchen351 Oct 13, 2023
1a7efec
Merge branch 'refs/heads/main' into Cjian/linux_c++20
jchen351 May 24, 2024
c46ab4a
Merge remote-tracking branch 'origin/main' into Cjian/linux_c++20
Jun 3, 2024
11f8d5b
Merge branch 'refs/heads/main' into Cjian/linux_c++20
jchen351 Jun 3, 2024
107788f
#pragma GCC diagnostic ignored "-Wdeprecated"
jchen351 Jun 3, 2024
7646254
#pragma GCC diagnostic ignored "-Wdeprecated"
jchen351 Jun 3, 2024
661a465
TreeAggregatorMin
jchen351 Jun 3, 2024
450d994
#pragma GCC diagnostic ignored "-Wdeprecated"
jchen351 Jun 4, 2024
168beb8
[=
jchen351 Jun 6, 2024
3736658
qembed_layer_norm.cc:97:50
jchen351 Jun 6, 2024
89d8e59
Merge branch 'refs/heads/main' into Cjian/linux_c++20
jchen351 Jun 17, 2024
2d6abd5
is_trivial
jchen351 Jun 17, 2024
c66de02
Merge branch 'refs/heads/main' into Cjian/linux_c++20
jchen351 Jun 19, 2024
31cb371
Merge remote-tracking branch 'refs/remotes/origin/main' into Cjian/li…
jchen351 Jun 20, 2024
d3a385a
Merge remote-tracking branch 'origin/main' into Cjian/linux_c++20
jchen351 Jun 20, 2024
41c821e
onnxruntime/core/session/inference_session.cc lintrunner
jchen351 Jun 20, 2024
cc3f411
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
jchen351 Jun 20, 2024
84122a5
#if __cplusplus >= 202002L
jchen351 Jun 20, 2024
5244cc4
std::function<void()> run_fn = [=, this]() {
jchen351 Jun 20, 2024
0e10f73
std::function<void()> run_fn = [this]() {
jchen351 Jun 20, 2024
c669ead
std::function<void()> run_fn = [this]() {
jchen351 Jun 20, 2024
55edc2d
Merge branch 'refs/heads/main' into Cjian/linux_c++20
jchen351 Jul 19, 2024
33d08f3
#pragma GCC diagnostic ignored "-Wdeprecated"
jchen351 Jul 19, 2024
2452708
volatile size_t volatile_x = 0;
jchen351 Jul 19, 2024
0a2d22b
implicit capture of ‘this’ via ‘[=]’ is deprecated in C++20
jchen351 Jul 19, 2024
c5a3e64
#ifdef __GNUC__
jchen351 Jul 22, 2024
6c105be
compute_info.create_state_func = [=](ComputeContext* context, Functio…
jchen351 Jul 22, 2024
f356538
Special case where c++20 is defined by was undefined again
jchen351 Jul 22, 2024
6177120
#pragma GCC diagnostic push
jchen351 Jul 22, 2024
254c8c1
#pragma GCC diagnostic push
jchen351 Jul 22, 2024
7386423
if(UNIX)
jchen351 Jul 22, 2024
9750089
if(UNIX)
jchen351 Jul 22, 2024
42ee12f
-Wdeprecated-pragma
jchen351 Jul 22, 2024
d837bfa
-Wdeprecated-pragma
jchen351 Jul 22, 2024
a6b56f3
google nsync
jchen351 Jul 23, 2024
78c7b4a
#if defined(
jchen351 Jul 23, 2024
4f2ebed
#if defined(__clang__) && __cplusplus >= 202002L
jchen351 Jul 23, 2024
7de09f7
[=, this]
jchen351 Jul 23, 2024
524decf
maxThreadsPerBlock
jchen351 Jul 23, 2024
0fe8720
#ifdef __GNUC__
jchen351 Jul 23, 2024
6eb70d5
deprecated
jchen351 Jul 24, 2024
fa8a8cc
Merge branch 'refs/heads/main' into Cjian/linux_c++20
jchen351 Aug 27, 2024
1471383
Changing ostringstream to wostringstream
jchen351 Aug 28, 2024
fb7ee24
Using wostringstream only on Windows
jchen351 Aug 30, 2024
fc63b36
change ifndef to ifdef
jchen351 Aug 30, 2024
3c31842
Merge branch 'refs/heads/main' into Cjian/linux_c++20
jchen351 Sep 10, 2024
9e30f73
Merge branch 'refs/heads/main' into Cjian/linux_c++20
jchen351 Sep 17, 2024
bed798e
Merge branch 'main' into Cjian/linux_c++20
jchen351 Mar 31, 2025
2fab000
Update namespace timestamp_ns = std::chrono; for linux
jchen351 Apr 1, 2025
782ebaa
return static_cast<int>(flags) & static_cast<int>(MissingTrack::kTrue);
jchen351 Apr 1, 2025
e21fe01
Merge with main
jchen351 Apr 2, 2025
ba15042
context->GetOperatorThreadPool(), n,
jchen351 Apr 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ include(CheckFunctionExists)
include(GNUInstallDirs) # onnxruntime_providers_* require CMAKE_INSTALL_* variables

# TODO: update this once all system adapt c++20
if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
if(UNIX)
set(CMAKE_CXX_STANDARD 20)
else()
set(CMAKE_CXX_STANDARD 17)
Expand Down
1 change: 1 addition & 0 deletions include/onnxruntime/core/common/eigen_common_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
// error: ignoring attributes on template argument "Eigen::PacketType<const float, Eigen::DefaultDevice>::type {aka __vector(4) float}" [-Werror=ignored-attributes]
#if defined(__GNUC__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated"
#if __GNUC__ >= 6
#pragma GCC diagnostic ignored "-Wignored-attributes"
#endif
Expand Down
2 changes: 1 addition & 1 deletion include/onnxruntime/core/common/logging/logging.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ using Timestamp = std::chrono::time_point<std::chrono::system_clock>;
#endif
#endif // __APPLE__

#if ORT_USE_CXX20_STD_CHRONO
#if ORT_USE_CXX20_STD_CHRONO || defined(__linux__)
namespace timestamp_ns = std::chrono;
#else
namespace timestamp_ns = ::date;
Expand Down
29 changes: 29 additions & 0 deletions onnxruntime/contrib_ops/cpu/bert/embed_layer_norm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,36 @@ Status EmbedLayerNorm<T>::Compute(OpKernelContext* context) const {
const float epsilon_value = epsilon();

concurrency::ThreadPool::TryBatchParallelFor(
#if __cplusplus >= 202002L
context->GetOperatorThreadPool(), n,
[input_ids_data,
word_embedding_length,
sequence_length,
position_ids_data,
broadcast_position_ids,
position_embedding_data,
word_embedding_data,
segment_ids_data,
segment_embedding_data,
position_embedding_length,
segment_embedding_length,
output_data,
embedding_sum_data,
gamma_data,
beta_data,
epsilon_value,
hidden_size,
&failed](ptrdiff_t index) {
#else
Comment on lines +96 to +115

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
context->GetOperatorThreadPool(), n,
[input_ids_data,
word_embedding_length,
sequence_length,
position_ids_data,
broadcast_position_ids,
position_embedding_data,
word_embedding_data,
segment_ids_data,
segment_embedding_data,
position_embedding_length,
segment_embedding_length,
output_data,
embedding_sum_data,
gamma_data,
beta_data,
epsilon_value,
hidden_size,
&failed](ptrdiff_t index) {
#else
context->GetOperatorThreadPool(), n,
[input_ids_data,
word_embedding_length,
sequence_length,
position_ids_data,
broadcast_position_ids,
position_embedding_data,
word_embedding_data,
segment_ids_data,
segment_embedding_data,
position_embedding_length,
segment_embedding_length,
output_data,
embedding_sum_data,
gamma_data,
beta_data,
epsilon_value,
hidden_size,
&failed](ptrdiff_t index) {
#else

#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated"
#endif
context->GetOperatorThreadPool(), n, [=, &failed](ptrdiff_t index) {
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
#endif
int word_col_index = input_ids_data[index];
if (word_col_index < 0 || word_col_index >= word_embedding_length) {
failed.store(true, std::memory_order_release);
Expand Down
4 changes: 4 additions & 0 deletions onnxruntime/core/common/logging/sinks/ostream_sink.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,11 @@ void OStreamSink::SendImpl(const Timestamp& timestamp, const std::string& logger
// Going with #2 as it should scale better at the cost of creating the message in memory first
// before sending to the stream.

#ifdef _WIN32
std::wostringstream msg;
#else
std::ostringstream msg;
#endif

#ifndef ORT_MINIMAL_BUILD
if (message.Severity() == Severity::kWARNING) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ struct TreeNodeElement {

inline NODE_MODE_ORT mode() const { return NODE_MODE_ORT(flags & 0xF); }
inline bool is_not_leaf() const { return !(flags & NODE_MODE_ORT::LEAF); }
inline bool is_missing_track_true() const { return flags & MissingTrack::kTrue; }
inline bool is_missing_track_true() const { return static_cast<int>(flags) & static_cast<int>(MissingTrack::kTrue);}
};
Comment thread
jchen351 marked this conversation as resolved.

template <typename InputType, typename ThresholdType, typename OutputType>
Expand Down
7 changes: 7 additions & 0 deletions onnxruntime/core/providers/cpu/text/string_normalizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,14 @@ class Utf8ConverterGeneric {
}

private:
#if __cplusplus >= 202002L
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#endif
std::codecvt_utf8<wchar_t> converter_;
#if __cplusplus >= 202002L
#pragma GCC diagnostic pop
#endif
};

// We need to specialize for MS as there is
Expand Down
6 changes: 3 additions & 3 deletions onnxruntime/core/providers/cuda/math/topk_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ Status TopKImpl(const CudaKernel* kernel, bool use_deterministic_compute,
});
}

auto XPT = static_cast<int64_t>(ceil(static_cast<double>(dimension) / GridDim::maxThreadsPerBlock));
auto XPT = static_cast<int64_t>(ceil(static_cast<double>(dimension) / static_cast<double>(GridDim::maxThreadsPerBlock)));
if (BT * 2 >= K || 0 == sorted) {
RadixTopK<CudaT, BT, 2><<<N, BT, 256 * sizeof(uint32_t), stream>>>(
input_x_ptr, output_v_ptr, output_i, elem_nums, size, axis, K, largest, sorted, dimension, XPT,
Expand Down Expand Up @@ -452,8 +452,8 @@ Status TopKImpl(const CudaKernel* kernel, bool use_deterministic_compute,
CUDA_RETURN_IF_ERROR(cub::DeviceRadixSort::SortPairs(nullptr, temp_bytes, input_key, output_key, input_value, output_value, dimension, 0, sizeof(T) * 8, stream));
auto temp_storage_buffer = kernel->GetScratchBuffer<char>(temp_bytes, ort_stream);
auto* temp_storage = temp_storage_buffer.get();
auto blocks_per_grid_D = (int)(ceil(static_cast<float>(dimension) / BT));
auto blocks_per_grid_K = (int)(ceil(static_cast<float>(K) / BT));
auto blocks_per_grid_D = (int)(ceil(static_cast<float>(dimension) / static_cast<float>(BT)));
auto blocks_per_grid_K = (int)(ceil(static_cast<float>(K) / static_cast<float>(BT)));
for (int64_t i = 0; i < N; i++) {
FillInput<CudaT><<<blocks_per_grid_D, BT, 0, stream>>>(input_x_ptr, input_key, input_value, elem_nums, size, axis, K, i, dimension);
CUDA_RETURN_IF_ERROR(1 == largest ? cub::DeviceRadixSort::SortPairsDescending(temp_storage, temp_bytes, input_key, output_key, input_value, output_value, dimension, 0, sizeof(T) * 8, stream)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1217,7 +1217,7 @@ Status MIGraphXExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>&
map_input_index_[fused_node.Name()] = input_name_index;
map_no_input_shape_[fused_node.Name()] = no_input_shape;
NodeComputeInfo compute_info;
compute_info.create_state_func = [=](ComputeContext* context, FunctionState* state) {
compute_info.create_state_func = [=,this](ComputeContext* context, FunctionState* state) {
std::unique_ptr<MIGraphXFuncState> p = std::make_unique<MIGraphXFuncState>();
Comment on lines 1219 to 1221

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
NodeComputeInfo compute_info;
compute_info.create_state_func = [=](ComputeContext* context, FunctionState* state) {
compute_info.create_state_func = [=,this](ComputeContext* context, FunctionState* state) {
std::unique_ptr<MIGraphXFuncState> p = std::make_unique<MIGraphXFuncState>();
NodeComputeInfo compute_info;
compute_info.create_state_func = [=, this](ComputeContext* context, FunctionState* state) {
std::unique_ptr<MIGraphXFuncState> p = std::make_unique<MIGraphXFuncState>();

*p = {context->allocate_func, context->release_func, context->allocator_handle, map_progs_[context->node_name],
map_onnx_string_[context->node_name], options, t_, map_input_index_[context->node_name], &mgx_mu_,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3510,7 +3510,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
// Create function state
// TODO: remove default capture
NodeComputeInfo compute_info;
compute_info.create_state_func = [=](ComputeContext* context, FunctionState* state) {
compute_info.create_state_func = [=, this](ComputeContext* context, FunctionState* state) {
std::unique_ptr<TensorrtFuncState> p = std::make_unique<TensorrtFuncState>();
// translate tactic sources string to nvinfer1::TacticSources
nvinfer1::TacticSources tactics = 0;
Expand All @@ -3537,7 +3537,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
};

// Create compute function
compute_info.compute_func = [this](FunctionState state, const OrtApi* api, OrtKernelContext* context) {
compute_info.compute_func = [=, this](FunctionState state, const OrtApi* api, OrtKernelContext* context) {
// The GPU device is set again here to handle multithreading scenarios.
// Consider the following:
// Users can create multiple threads to initialize separate inference sessions on different devices (not just the default device 0)
Expand Down Expand Up @@ -4197,7 +4197,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromPrecompiledEngine(con
// Create function state
// TODO: remove default capture
NodeComputeInfo compute_info;
compute_info.create_state_func = [=](ComputeContext* context, FunctionState* state) {
compute_info.create_state_func = [=, this](ComputeContext* context, FunctionState* state) {
std::unique_ptr<TensorrtShortFuncState> p = std::make_unique<TensorrtShortFuncState>();
*p = {context->allocate_func,
context->release_func,
Expand All @@ -4220,7 +4220,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromPrecompiledEngine(con
};

// Create compute function
compute_info.compute_func = [this](FunctionState state, const OrtApi* api, OrtKernelContext* context) {
compute_info.compute_func = [=, this](FunctionState state, const OrtApi* api, OrtKernelContext* context) {
// The GPU device is set again here to handle multithreading scenarios.
// Consider the following:
// Users can create multiple threads to initialize separate inference sessions on different devices (not just the default device 0)
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/test/onnx/microbenchmark/eigen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#pragma GCC diagnostic ignored "-Wunused-parameter"
#pragma GCC diagnostic ignored "-Wunused-result"
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
#pragma GCC diagnostic ignored "-Wdeprecated"

// _deps/eigen-src/unsupported/Eigen/CXX11/../../../Eigen/src/Core/arch/NEON/PacketMath.h:1671:9:
// error: ‘void* memcpy(void*, const void*, size_t)’ copying an object of non-trivial type ‘Eigen::internal::Packet4c’
Expand Down
7 changes: 7 additions & 0 deletions onnxruntime/test/onnx/microbenchmark/tptest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,14 @@ static void BM_ThreadPoolSimpleParallelFor(benchmark::State& state) {
for (auto _ : state) {
for (int j = 0; j < 100; j++) {
ThreadPool::TrySimpleParallelFor(tp.get(), len, [&](size_t) {
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wvolatile"
for (volatile size_t x = 0; x < body; x++) {
#pragma GCC diagnostic pop
#else
for (volatile size_t x = 0; x < body; x++) {
#endif
}
});
}
Expand Down