Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions onnxruntime/core/providers/cpu/llm/attention.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// Licensed under the MIT License.

#include "core/providers/cpu/llm/attention.h"
#include "core/providers/cpu/llm/attention_helper.h"

#include "core/common/common.h"
#include "core/common/safeint.h"
Expand Down Expand Up @@ -125,10 +126,10 @@ Status Attention<T>::Compute(OpKernelContext* context) const {
const Tensor* past_value = context->Input<Tensor>(5);

AttentionParameters parameters;
std::vector<int64_t> y_shape;
std::vector<int64_t> present_key_shape;
std::vector<int64_t> present_value_shape;
std::vector<int64_t> output_qk_shape;
TensorShape y_shape;
TensorShape present_key_shape;
TensorShape present_value_shape;
TensorShape output_qk_shape;

ORT_ENFORCE(attention_helper::ComputeOutputShapeForAttention(
Q,
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/cpu/llm/attention.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include "core/common/common.h"
#include "core/framework/op_kernel.h"
#include "core/platform/threadpool.h"
#include "core/providers/cpu/llm/attention_helper.h"
#include "core/providers/cpu/llm/attention_parameters.h"

namespace onnxruntime {

Expand Down
156 changes: 0 additions & 156 deletions onnxruntime/core/providers/cpu/llm/attention_helper.cc

This file was deleted.

Loading
Loading