[FOR DRAFT-PR ONLY] Enable -ffp-mode=fast for armclang

AdrianLundell · AdrianLundell · commit 79d013ce3831 · 2024-11-07T15:23:38.000+01:00
-ffp-mode=fast enables extra compiler optimizations for floating point operators which increases performance, previously set to -ffp-mode=std as it is uncompatible with std::numeric_limits::quiet_NaN/infinity. See https://developer.arm.com/documentation/dui0774/latest/Compiler-Command-line-Options/-ffp-mode for more info. This pull-request puts all incompatible code inside the TFLITE_EMULATE_FLOAT flag, which is not defined on arm targets. Change-Id: Ic8fab0f11497ef4fd834a3a731a8a5625913486e
diff --git a/tensorflow/lite/kernels/internal/quantization_util.cc b/tensorflow/lite/kernels/internal/quantization_util.cc
@@ -1,4 +1,4 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -24,6 +24,7 @@ limitations under the License.
 
 namespace tflite {
 
+#ifdef TFLITE_EMULATE_FLOAT
 namespace {
 // These constants are used to manipulate the binary representation of doubles.
 // Double-precision binary64 floating point format is:
@@ -49,6 +50,7 @@ constexpr uint32_t kFractionShift = 22;
 constexpr uint32_t kFractionRoundingMask = 0x003fffff;
 constexpr uint32_t kFractionRoundingThreshold = 0x00200000;
 }  // namespace
+#endif
 
 void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
                         int* shift) {
@@ -122,6 +124,7 @@ void QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
   *left_shift = shift;
 }
 
+#ifdef TFLITE_EMULATE_FLOAT
 int64_t IntegerFrExp(double input, int* shift) {
   // Make sure our assumptions about the double layout hold.
   TFLITE_CHECK_EQ(8, sizeof(double));
@@ -278,6 +281,7 @@ int IntegerDoubleCompare(double a, double b) {
     return 0;
   }
 }
+#endif
 
 void PreprocessSoftmaxScaling(double beta, double input_scale,
                               int input_integer_bits,
diff --git a/tensorflow/lite/micro/kernels/activations_common.cc b/tensorflow/lite/micro/kernels/activations_common.cc
@@ -1,4 +1,4 @@
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -54,7 +54,6 @@ template <typename T>
 void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
                          ReluOpData* data) {
   float act_min = 0.0;
-  float act_max = std::numeric_limits<float>::infinity();
   double real_multiplier =
       static_cast<double>(input->params.scale / output->params.scale);
 
@@ -69,12 +68,7 @@ void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output,
       output->params.zero_point +
           static_cast<int32_t>(roundf(act_min / output->params.scale)));
   data->params.quantized_activation_max =
-      act_max == std::numeric_limits<float>::infinity()
-          ? static_cast<int32_t>(std::numeric_limits<T>::max())
-          : std::min(static_cast<int32_t>(std::numeric_limits<T>::max()),
-                     output->params.zero_point +
-                         static_cast<int32_t>(
-                             roundf(act_max / output->params.scale)));
+      static_cast<int32_t>(std::numeric_limits<T>::max());
   data->params.input_offset = input->params.zero_point;
   data->params.output_offset = output->params.zero_point;
 }
diff --git a/tensorflow/lite/micro/kernels/quantization_util_test.cc b/tensorflow/lite/micro/kernels/quantization_util_test.cc
@@ -1,4 +1,4 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2024 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -232,6 +232,7 @@ TF_LITE_MICRO_TEST(
   TF_LITE_MICRO_EXPECT_EQ(qp.zero_point, 255);
 }
 
+#ifdef TFLITE_EMULATE_FLOAT
 TF_LITE_MICRO_TEST(QuantizationUtilTest_IntegerFrExp) {
   int shift;
   int64_t result = tflite::IntegerFrExp(0.0, &shift);
@@ -412,6 +413,7 @@ TF_LITE_MICRO_TEST(QuantizationUtilTest_CalculateInputRadius) {
   TF_LITE_MICRO_EXPECT_EQ(tflite::CalculateInputRadius(3, 28), 7);
   TF_LITE_MICRO_EXPECT_EQ(tflite::CalculateInputRadius(4, 2), 503316480);
 }
+#endif
 
 TF_LITE_MICRO_TEST(QuantizationUtilTest_QuantizeMultiplierArray) {
   const double weights[] = {-4,    -2,   -1,  -0.5, -0.25, -0.125, 0,
diff --git a/tensorflow/lite/micro/tools/make/Makefile b/tensorflow/lite/micro/tools/make/Makefile
@@ -855,7 +855,6 @@ $(BINDIR)%.test_target: $(BINDIR)%_test
 # These are microcontroller-specific rules for converting the ELF output
 # of the linker into a binary image that can be loaded directly.
 ifeq ($(TOOLCHAIN), armclang)
-  CXXFLAGS += -ffp-mode=full
   FROMELF := ${TARGET_TOOLCHAIN_ROOT}$(TARGET_TOOLCHAIN_PREFIX)fromelf
   $(BINDIR)%.bin: $(BINDIR)%
 		@mkdir -p $(dir $@)