Skip to content

Commit 79d013c

Browse files
committed
[FOR DRAFT-PR ONLY] Enable -ffp-mode=fast for armclang
-ffp-mode=fast enables extra compiler optimizations for floating point operators which increases performance, previously set to -ffp-mode=std as it is uncompatible with std::numeric_limits::quiet_NaN/infinity. See https://developer.arm.com/documentation/dui0774/latest/Compiler-Command-line-Options/-ffp-mode for more info. This pull-request puts all incompatible code inside the TFLITE_EMULATE_FLOAT flag, which is not defined on arm targets. Change-Id: Ic8fab0f11497ef4fd834a3a731a8a5625913486e
1 parent 9245002 commit 79d013c

File tree

4 files changed

+10
-11
lines changed

4 files changed

+10
-11
lines changed

tensorflow/lite/kernels/internal/quantization_util.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
1+
/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
22
33
Licensed under the Apache License, Version 2.0 (the "License");
44
you may not use this file except in compliance with the License.
@@ -24,6 +24,7 @@ limitations under the License.
2424

2525
namespace tflite {
2626

27+
#ifdef TFLITE_EMULATE_FLOAT
2728
namespace {
2829
// These constants are used to manipulate the binary representation of doubles.
2930
// Double-precision binary64 floating point format is:
@@ -49,6 +50,7 @@ constexpr uint32_t kFractionShift = 22;
4950
constexpr uint32_t kFractionRoundingMask = 0x003fffff;
5051
constexpr uint32_t kFractionRoundingThreshold = 0x00200000;
5152
} // namespace
53+
#endif
5254

5355
void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
5456
int* shift) {
@@ -122,6 +124,7 @@ void QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
122124
*left_shift = shift;
123125
}
124126

127+
#ifdef TFLITE_EMULATE_FLOAT
125128
int64_t IntegerFrExp(double input, int* shift) {
126129
// Make sure our assumptions about the double layout hold.
127130
TFLITE_CHECK_EQ(8, sizeof(double));
@@ -278,6 +281,7 @@ int IntegerDoubleCompare(double a, double b) {
278281
return 0;
279282
}
280283
}
284+
#endif
281285

282286
void PreprocessSoftmaxScaling(double beta, double input_scale,
283287
int input_integer_bits,

tensorflow/lite/micro/kernels/activations_common.cc

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
1+
/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
22
33
Licensed under the Apache License, Version 2.0 (the "License");
44
you may not use this file except in compliance with the License.
@@ -54,7 +54,6 @@ template <typename T>
5454
void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
5555
ReluOpData* data) {
5656
float act_min = 0.0;
57-
float act_max = std::numeric_limits<float>::infinity();
5857
double real_multiplier =
5958
static_cast<double>(input->params.scale / output->params.scale);
6059

@@ -69,12 +68,7 @@ void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
6968
output->params.zero_point +
7069
static_cast<int32_t>(roundf(act_min / output->params.scale)));
7170
data->params.quantized_activation_max =
72-
act_max == std::numeric_limits<float>::infinity()
73-
? static_cast<int32_t>(std::numeric_limits<T>::max())
74-
: std::min(static_cast<int32_t>(std::numeric_limits<T>::max()),
75-
output->params.zero_point +
76-
static_cast<int32_t>(
77-
roundf(act_max / output->params.scale)));
71+
static_cast<int32_t>(std::numeric_limits<T>::max());
7872
data->params.input_offset = input->params.zero_point;
7973
data->params.output_offset = output->params.zero_point;
8074
}

tensorflow/lite/micro/kernels/quantization_util_test.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
1+
/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
22
33
Licensed under the Apache License, Version 2.0 (the "License");
44
you may not use this file except in compliance with the License.
@@ -232,6 +232,7 @@ TF_LITE_MICRO_TEST(
232232
TF_LITE_MICRO_EXPECT_EQ(qp.zero_point, 255);
233233
}
234234

235+
#ifdef TFLITE_EMULATE_FLOAT
235236
TF_LITE_MICRO_TEST(QuantizationUtilTest_IntegerFrExp) {
236237
int shift;
237238
int64_t result = tflite::IntegerFrExp(0.0, &shift);
@@ -412,6 +413,7 @@ TF_LITE_MICRO_TEST(QuantizationUtilTest_CalculateInputRadius) {
412413
TF_LITE_MICRO_EXPECT_EQ(tflite::CalculateInputRadius(3, 28), 7);
413414
TF_LITE_MICRO_EXPECT_EQ(tflite::CalculateInputRadius(4, 2), 503316480);
414415
}
416+
#endif
415417

416418
TF_LITE_MICRO_TEST(QuantizationUtilTest_QuantizeMultiplierArray) {
417419
const double weights[] = {-4, -2, -1, -0.5, -0.25, -0.125, 0,

tensorflow/lite/micro/tools/make/Makefile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -855,7 +855,6 @@ $(BINDIR)%.test_target: $(BINDIR)%_test
855855
# These are microcontroller-specific rules for converting the ELF output
856856
# of the linker into a binary image that can be loaded directly.
857857
ifeq ($(TOOLCHAIN), armclang)
858-
CXXFLAGS += -ffp-mode=full
859858
FROMELF := ${TARGET_TOOLCHAIN_ROOT}$(TARGET_TOOLCHAIN_PREFIX)fromelf
860859
$(BINDIR)%.bin: $(BINDIR)%
861860
@mkdir -p $(dir $@)

0 commit comments

Comments
 (0)