Skip to content

Commit

Permalink
No inlining, fix bugs in the hypotheses, improve the macros
Browse files Browse the repository at this point in the history
  • Loading branch information
eggrobin committed Dec 31, 2024
1 parent ce7bc2a commit 8a54073
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 67 deletions.
15 changes: 15 additions & 0 deletions numerics/numerics.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,21 @@
<Import Project="..\shared\geometry.vcxitems" Label="Shared" />
<Import Project="..\shared\testing_utilities.vcxitems" Label="Shared" />
</ImportGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<AssemblerOutput>AssemblyCode</AssemblerOutput>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<AssemblerOutput>AssemblyCode</AssemblerOutput>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release_LLVM|x64'">
<ClCompile>
<AssemblerOutput>AssemblyCode</AssemblerOutput>
</ClCompile>
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="angle_reduction.hpp" />
<ClInclude Include="angle_reduction_body.hpp" />
Expand Down
109 changes: 59 additions & 50 deletions numerics/sin_cos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,36 @@
#include "numerics/polynomial_evaluators.hpp"
#include "quantities/elementary_functions.hpp"

#define OSACA_ANALYSED_FUNCTION Cos
#define UNDER_OSACA_HYPOTHESES(expression) \
[] { \
constexpr bool UseHardwareFMA = true; \
constexpr double θ = 3; \
/* From argument reduction. */ \
constexpr std::int64_t n = \
static_cast<std::int64_t>(θ * (2 / π) + 0.5); \
constexpr double reduction_value = θ - n * π_over_2_high; \
constexpr double reduction_error = n * π_over_2_low; \
/* Used to determine whether a better argument reduction is needed. */ \
constexpr DoublePrecision<double> θ_reduced = \
QuickTwoDifference(reduction_value, reduction_error); \
/* Used in Sin to detect the near-0 case. */ \
constexpr double abs_x = \
θ_reduced.value > 0 ? θ_reduced.value : -θ_reduced.value; \
/* Used throughout the top-level functions. */ \
constexpr std::int64_t quadrant = n & 0b11; \
/* Used in DetectDangerousRounding. */ \
constexpr double normalized_error = 0; \
/* Not NaN is the only part that matters; used at the end of the */ \
/* top-level functions to determine whether to call the slow path. */ \
constexpr double value = 1; \
return expression; \
}()

#if defined(OSACA_ANALYSED_FUNCTION)
#define PRINCIPIA_USE_OSACA !PRINCIPIA_MACRO_IS_EMPTY(OSACA_ANALYSED_FUNCTION)
#endif

#if PRINCIPIA_USE_OSACA

#include "intel/iacaMarks.h"
Expand Down Expand Up @@ -97,12 +127,14 @@
// them, so they cannot be the end of a loop started unconditionally. Instead
// we loop with goto.
// — Some volatile reads and writes are used to clarify identity of the
// registers in the generated code (where the names of `OSACA_input` and
// 'OSACA_result' appear in movsd instructions) and to improve the structure
// of the generated graph.
// registers in the generated code (where the names of `OSACA_result` and, if
// `OSACA_CARRY_LOOP_THROUGH_REGISTER`, `OSACA_loop_carry` appear in movsd
// instructions) and to improve the structure of the generated graph.
//
// Putting a load of the input from memory in the analysed section makes the
// OSACA dependency graph clearer. However:
// Putting a load of the input from memory in the analysed section prevents the
// compiler from reusing intermediate values in the next iteration, e.g., if the
// absolute value of the result is computed first, the compiler might reuse it
// instead of computing the absolute value of the input. However:
// — it adds a spurious move to the latency;
// — some tools (IACA, LLVM-MCA) cannot see the dependency through memory.
// Set OSACA_CARRY_LOOP_THROUGH_REGISTER to 1 to carry the loop dependency
Expand All @@ -114,38 +146,42 @@
static bool OSACA_loop_terminator = false;

#define OSACA_FUNCTION_BEGIN(arg) \
double OSACA_INPUT_QUALIFIER OSACA_input = arg; \
double OSACA_LOOP_CARRY_QUALIFIER OSACA_loop_carry = arg; \
if constexpr (std::string_view(__func__) == \
STRINGIFY_EXPANSION(OSACA_ANALYSED_FUNCTION)) { \
IACA_VC64_START; \
} \
double OSACA_loop_carry = OSACA_input; \
_Pragma("warning(push)"); \
_Pragma("warning(disable : 4102)"); \
OSACA_loop: \
_Pragma("warning(pop)"); \
arg = OSACA_loop_carry

#define OSACA_RETURN(result) \
do { \
if constexpr (std::string_view(__func__) == \
STRINGIFY_EXPANSION(OSACA_ANALYSED_FUNCTION)) { \
OSACA_loop_carry = (result); \
if (!OSACA_loop_terminator) { \
goto OSACA_loop; \
} \
double volatile OSACA_result = OSACA_loop_carry; \
IACA_VC64_END; \
return OSACA_result; \
} else { \
return (result); \
} \
#define OSACA_RETURN(result) \
do { \
if constexpr (std::string_view(__func__) == \
STRINGIFY_EXPANSION(OSACA_ANALYSED_FUNCTION)) { \
OSACA_loop_carry = (result); \
if (!OSACA_loop_terminator) { \
goto OSACA_loop; \
} \
double volatile OSACA_result = OSACA_loop_carry; \
IACA_VC64_END; \
/* The second goto prevents the the end marker from being interleaved */ \
/* with register restoring moves. */ \
if (!OSACA_loop_terminator) { \
goto OSACA_loop; \
} \
return OSACA_result; \
} else { \
return (result); \
} \
} while (false)

#if OSACA_CARRY_LOOP_THROUGH_REGISTER
#define OSACA_INPUT_QUALIFIER
#define OSACA_LOOP_CARRY_QUALIFIER
#else
#define OSACA_INPUT_QUALIFIER volatile
#define OSACA_LOOP_CARRY_QUALIFIER volatile
#endif

// The branch not taken, determined by evaluating the condition
Expand Down Expand Up @@ -177,33 +213,6 @@ static bool OSACA_loop_terminator = false;

#define OSACA_ELSE_IF else OSACA_IF // NOLINT

// Sin- and Cos-specific definitions:

#define UNDER_OSACA_HYPOTHESES(expression) \
[&] { \
constexpr bool UseHardwareFMA = true; \
constexpr double θ = 0.1; \
/* From argument reduction. */ \
constexpr double n_double = θ * (2 / π); \
constexpr double reduction_value = θ - n_double * π_over_2_high; \
constexpr double reduction_error = n_double * π_over_2_low; \
/* Used to determine whether a better argument reduction is needed. */ \
constexpr DoublePrecision<double> θ_reduced = \
QuickTwoDifference(reduction_value, reduction_error); \
/* Used in Sin to detect the near-0 case. */ \
constexpr double abs_x = \
θ_reduced.value > 0 ? θ_reduced.value : -θ_reduced.value; \
/* Used throughout the top-level functions. */ \
constexpr std::int64_t quadrant = \
static_cast<std::int64_t>(n_double) & 0b11; \
/* Used in DetectDangerousRounding. */ \
constexpr double normalized_error = 0; \
/* Not NaN is the only part that matters; used at the end of the */ \
/* top-level functions to determine whether to call the slow path. */ \
constexpr double value = 1; \
return expression; \
}()


namespace principia {
namespace numerics {
Expand Down
17 changes: 0 additions & 17 deletions numerics/sin_cos.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,7 @@ namespace numerics {
namespace _sin_cos {
namespace internal {

#define PRINCIPIA_INLINE_SIN_COS 0
#define OSACA_ANALYSED_FUNCTION

#if defined(OSACA_ANALYSED_FUNCTION)
#define PRINCIPIA_USE_OSACA !PRINCIPIA_MACRO_IS_EMPTY(OSACA_ANALYSED_FUNCTION)
#endif

#if PRINCIPIA_INLINE_SIN_COS
FORCE_INLINE(inline)
#endif
double __cdecl Sin(double x);
#if PRINCIPIA_INLINE_SIN_COS
FORCE_INLINE(inline)
#endif
double __cdecl Cos(double x);

} // namespace internal
Expand All @@ -31,7 +18,3 @@ using internal::Sin;
} // namespace _sin_cos
} // namespace numerics
} // namespace principia

#if PRINCIPIA_INLINE_SIN_COS
#include "numerics/sin_cos.cpp"
#endif

0 comments on commit 8a54073

Please sign in to comment.