Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
Your Name committed Sep 20, 2024
1 parent d635782 commit a9a07e1
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 48 deletions.
1 change: 0 additions & 1 deletion onnxruntime/contrib_ops/cpu/bert/attention_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#include "core/platform/threadpool.h"
#include "core/providers/common.h"
#include "core/mlas/inc/mlas.h"
#include <stdio.h>

using onnxruntime::concurrency::ThreadPool;

Expand Down
94 changes: 47 additions & 47 deletions onnxruntime/contrib_ops/cpu/bert/attention_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,43 +63,43 @@ Status AddBiasTranspose(const Tensor* qkv, // Input: Q/K/V dat
constexpr size_t element_size = sizeof(T);
ProcessBroadcastSpanFuncs add_funcs{
[](BroadcastHelper& per_iter_bh) {
// per_iter_bh.OutputEigen<T>() = per_iter_bh.ScalarInput0<T>() + per_iter_bh.EigenInput1<T>().array();
auto num_elements = per_iter_bh.NumOutputElements();
per_iter_bh.OutputEigen<float>() = per_iter_bh.ScalarInput0<float>() + per_iter_bh.EigenInput1<float>().array();
// auto num_elements = per_iter_bh.NumOutputElements();

const auto* input_1 = reinterpret_cast<const typename EigenType<T>::Type*>(per_iter_bh.EigenInput1<T>().data());
ConstEigenVectorArrayMap<typename EigenType<T>::Type> input_1_vec_map(input_1, num_elements);
// const auto* input_1 = reinterpret_cast<const typename EigenType<T>::Type*>(per_iter_bh.EigenInput1<T>().data());

Check warning on line 69 in onnxruntime/contrib_ops/cpu/bert/attention_utils.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Lines should be <= 120 characters long [whitespace/line_length] [2] Raw Output: onnxruntime/contrib_ops/cpu/bert/attention_utils.cc:69: Lines should be <= 120 characters long [whitespace/line_length] [2]
// ConstEigenVectorArrayMap<typename EigenType<T>::Type> input_1_vec_map(input_1, num_elements);

auto* output = reinterpret_cast<typename EigenType<T>::Type*>(per_iter_bh.OutputEigen<T>().data());
EigenVectorArrayMap<typename EigenType<T>::Type> output_vec_map(output, num_elements);
// auto* output = reinterpret_cast<typename EigenType<T>::Type*>(per_iter_bh.OutputEigen<T>().data());
// EigenVectorArrayMap<typename EigenType<T>::Type> output_vec_map(output, num_elements);

output_vec_map = input_1_vec_map + static_cast<typename EigenType<T>::Type>(per_iter_bh.ScalarInput0<T>());
// output_vec_map = input_1_vec_map + static_cast<typename EigenType<T>::Type>(per_iter_bh.ScalarInput0<T>());
},
[](BroadcastHelper& per_iter_bh) {
// per_iter_bh.OutputEigen<T>() = per_iter_bh.EigenInput0<T>().array() + per_iter_bh.ScalarInput1<T>();
auto num_elements = per_iter_bh.NumOutputElements();
per_iter_bh.OutputEigen<float>() = per_iter_bh.EigenInput0<float>().array() + per_iter_bh.ScalarInput1<float>();
// auto num_elements = per_iter_bh.NumOutputElements();

const auto* input_0 = reinterpret_cast<const typename EigenType<T>::Type*>(per_iter_bh.EigenInput0<T>().data());
ConstEigenVectorArrayMap<typename EigenType<T>::Type> input_0_vec_map(input_0, num_elements);
// const auto* input_0 = reinterpret_cast<const typename EigenType<T>::Type*>(per_iter_bh.EigenInput0<T>().data());

Check warning on line 81 in onnxruntime/contrib_ops/cpu/bert/attention_utils.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Lines should be <= 120 characters long [whitespace/line_length] [2] Raw Output: onnxruntime/contrib_ops/cpu/bert/attention_utils.cc:81: Lines should be <= 120 characters long [whitespace/line_length] [2]
// ConstEigenVectorArrayMap<typename EigenType<T>::Type> input_0_vec_map(input_0, num_elements);

auto* output = reinterpret_cast<typename EigenType<T>::Type*>(per_iter_bh.OutputEigen<T>().data());
EigenVectorArrayMap<typename EigenType<T>::Type> output_vec_map(output, num_elements);
// auto* output = reinterpret_cast<typename EigenType<T>::Type*>(per_iter_bh.OutputEigen<T>().data());
// EigenVectorArrayMap<typename EigenType<T>::Type> output_vec_map(output, num_elements);

output_vec_map = input_0_vec_map + static_cast<typename EigenType<T>::Type>(per_iter_bh.ScalarInput1<T>());
// output_vec_map = input_0_vec_map + static_cast<typename EigenType<T>::Type>(per_iter_bh.ScalarInput1<T>());
},
[](BroadcastHelper& per_iter_bh) {
// per_iter_bh.OutputEigen<T>() = per_iter_bh.EigenInput0<T>() + per_iter_bh.EigenInput1<T>();
auto num_elements = per_iter_bh.NumOutputElements();
per_iter_bh.OutputEigen<float>() = per_iter_bh.EigenInput0<float>() + per_iter_bh.EigenInput1<float>();
// auto num_elements = per_iter_bh.NumOutputElements();

const auto* input_0 = reinterpret_cast<const typename EigenType<T>::Type*>(per_iter_bh.EigenInput0<T>().data());
ConstEigenVectorArrayMap<typename EigenType<T>::Type> input_0_vec_map(input_0, num_elements);
// const auto* input_0 = reinterpret_cast<const typename EigenType<T>::Type*>(per_iter_bh.EigenInput0<T>().data());

Check warning on line 93 in onnxruntime/contrib_ops/cpu/bert/attention_utils.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Lines should be <= 120 characters long [whitespace/line_length] [2] Raw Output: onnxruntime/contrib_ops/cpu/bert/attention_utils.cc:93: Lines should be <= 120 characters long [whitespace/line_length] [2]
// ConstEigenVectorArrayMap<typename EigenType<T>::Type> input_0_vec_map(input_0, num_elements);

const auto* input_1 = reinterpret_cast<const typename EigenType<T>::Type*>(per_iter_bh.EigenInput1<T>().data());
ConstEigenVectorArrayMap<typename EigenType<T>::Type> input_1_vec_map(input_1, num_elements);
// const auto* input_1 = reinterpret_cast<const typename EigenType<T>::Type*>(per_iter_bh.EigenInput1<T>().data());

Check warning on line 96 in onnxruntime/contrib_ops/cpu/bert/attention_utils.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Lines should be <= 120 characters long [whitespace/line_length] [2] Raw Output: onnxruntime/contrib_ops/cpu/bert/attention_utils.cc:96: Lines should be <= 120 characters long [whitespace/line_length] [2]
// ConstEigenVectorArrayMap<typename EigenType<T>::Type> input_1_vec_map(input_1, num_elements);

auto* output = reinterpret_cast<typename EigenType<T>::Type*>(per_iter_bh.OutputEigen<T>().data());
EigenVectorArrayMap<typename EigenType<T>::Type> output_vec_map(output, num_elements);
// auto* output = reinterpret_cast<typename EigenType<T>::Type*>(per_iter_bh.OutputEigen<T>().data());
// EigenVectorArrayMap<typename EigenType<T>::Type> output_vec_map(output, num_elements);

output_vec_map = input_0_vec_map + input_1_vec_map;
// output_vec_map = input_0_vec_map + input_1_vec_map;
}}; // For element-wise add

// Allocate space for output of Q(BS, D) + bias(D)
Expand Down Expand Up @@ -175,47 +175,47 @@ Status AddBiasReshape(const Tensor* qkv, // Input: Q/K/V data - query is
OpKernelContext* context) {
// Note: the comments below will refer to Q's dimensions for simplicity
auto element_type = DataTypeImpl::GetType<T>();
using eigen_type = typename EigenType<T>::Type;
//using eigen_type = typename EigenType<T>::Type;

Check warning on line 178 in onnxruntime/contrib_ops/cpu/bert/attention_utils.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Should have a space between // and comment [whitespace/comments] [4] Raw Output: onnxruntime/contrib_ops/cpu/bert/attention_utils.cc:178: Should have a space between // and comment [whitespace/comments] [4]
constexpr size_t element_size = sizeof(T);
ProcessBroadcastSpanFuncs add_funcs{
[](BroadcastHelper& per_iter_bh) {
//per_iter_bh.OutputEigen<T>() = per_iter_bh.ScalarInput0<T>() + per_iter_bh.EigenInput1<T>().array();
auto num_elements = per_iter_bh.NumOutputElements();
per_iter_bh.OutputEigen<float>() = per_iter_bh.ScalarInput0<float>() + per_iter_bh.EigenInput1<float>().array();
// auto num_elements = per_iter_bh.NumOutputElements();

const auto* input_1 = reinterpret_cast<const eigen_type*>(per_iter_bh.EigenInput1<T>().data());
ConstEigenVectorArrayMap<eigen_type> input_1_vec_map(input_1, num_elements);
// const auto* input_1 = reinterpret_cast<const eigen_type*>(per_iter_bh.EigenInput1<T>().data());
// ConstEigenVectorArrayMap<eigen_type> input_1_vec_map(input_1, num_elements);

auto* output = reinterpret_cast<eigen_type*>(per_iter_bh.OutputEigen<T>().data());
EigenVectorArrayMap<eigen_type> output_vec_map(output, num_elements);
// auto* output = reinterpret_cast<eigen_type*>(per_iter_bh.OutputEigen<T>().data());
// EigenVectorArrayMap<eigen_type> output_vec_map(output, num_elements);

output_vec_map = input_1_vec_map + static_cast<eigen_type>(per_iter_bh.ScalarInput0<T>());
// output_vec_map = input_1_vec_map + static_cast<eigen_type>(per_iter_bh.ScalarInput0<T>());
},
[](BroadcastHelper& per_iter_bh) {
// per_iter_bh.OutputEigen<T>() = per_iter_bh.EigenInput0<T>().array() + per_iter_bh.ScalarInput1<T>();
auto num_elements = per_iter_bh.NumOutputElements();
per_iter_bh.OutputEigen<float>() = per_iter_bh.EigenInput0<float>().array() + per_iter_bh.ScalarInput1<float>();
// auto num_elements = per_iter_bh.NumOutputElements();

const auto* input_0 = reinterpret_cast<const eigen_type*>(per_iter_bh.EigenInput0<T>().data());
ConstEigenVectorArrayMap<eigen_type> input_0_vec_map(input_0, num_elements);
// const auto* input_0 = reinterpret_cast<const eigen_type*>(per_iter_bh.EigenInput0<T>().data());
// ConstEigenVectorArrayMap<eigen_type> input_0_vec_map(input_0, num_elements);

auto* output = reinterpret_cast<eigen_type*>(per_iter_bh.OutputEigen<T>().data());
EigenVectorArrayMap<eigen_type> output_vec_map(output, num_elements);
// auto* output = reinterpret_cast<eigen_type*>(per_iter_bh.OutputEigen<T>().data());
// EigenVectorArrayMap<eigen_type> output_vec_map(output, num_elements);

output_vec_map = input_0_vec_map + static_cast<eigen_type>(per_iter_bh.ScalarInput1<T>());
// output_vec_map = input_0_vec_map + static_cast<eigen_type>(per_iter_bh.ScalarInput1<T>());
},
[](BroadcastHelper& per_iter_bh) {
// per_iter_bh.OutputEigen<T>() = per_iter_bh.EigenInput0<T>() + per_iter_bh.EigenInput1<T>();
auto num_elements = per_iter_bh.NumOutputElements();
per_iter_bh.OutputEigen<float>() = per_iter_bh.EigenInput0<float>() + per_iter_bh.EigenInput1<float>();
// auto num_elements = per_iter_bh.NumOutputElements();

const auto* input_0 = reinterpret_cast<const eigen_type*>(per_iter_bh.EigenInput0<T>().data());
ConstEigenVectorArrayMap<eigen_type> input_0_vec_map(input_0, num_elements);
// const auto* input_0 = reinterpret_cast<const eigen_type*>(per_iter_bh.EigenInput0<T>().data());
// ConstEigenVectorArrayMap<eigen_type> input_0_vec_map(input_0, num_elements);

const auto* input_1 = reinterpret_cast<const eigen_type*>(per_iter_bh.EigenInput1<T>().data());
ConstEigenVectorArrayMap<eigen_type> input_1_vec_map(input_1, num_elements);
// const auto* input_1 = reinterpret_cast<const eigen_type*>(per_iter_bh.EigenInput1<T>().data());
// ConstEigenVectorArrayMap<eigen_type> input_1_vec_map(input_1, num_elements);

auto* output = reinterpret_cast<eigen_type*>(per_iter_bh.OutputEigen<T>().data());
EigenVectorArrayMap<eigen_type> output_vec_map(output, num_elements);
// auto* output = reinterpret_cast<eigen_type*>(per_iter_bh.OutputEigen<T>().data());
// EigenVectorArrayMap<eigen_type> output_vec_map(output, num_elements);

output_vec_map = input_0_vec_map + input_1_vec_map;
// output_vec_map = input_0_vec_map + input_1_vec_map;
}}; // For element-wise add

// Get Q's bias from combined bias
Expand Down

0 comments on commit a9a07e1

Please sign in to comment.