Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve the OSACA macros #4152

Merged
merged 5 commits into from
Jan 1, 2025
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
No inlining, fix bugs in the hypotheses, improve the macros
eggrobin committed Dec 31, 2024
commit 8a540732cfe4c1a8ccf8411b3c5c6b5cba4ec070
15 changes: 15 additions & 0 deletions numerics/numerics.vcxproj
Original file line number Diff line number Diff line change
@@ -10,6 +10,21 @@
<Import Project="..\shared\geometry.vcxitems" Label="Shared" />
<Import Project="..\shared\testing_utilities.vcxitems" Label="Shared" />
</ImportGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<AssemblerOutput>AssemblyCode</AssemblerOutput>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<AssemblerOutput>AssemblyCode</AssemblerOutput>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release_LLVM|x64'">
<ClCompile>
<AssemblerOutput>AssemblyCode</AssemblerOutput>
</ClCompile>
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="angle_reduction.hpp" />
<ClInclude Include="angle_reduction_body.hpp" />
109 changes: 59 additions & 50 deletions numerics/sin_cos.cpp
Original file line number Diff line number Diff line change
@@ -14,6 +14,36 @@
#include "numerics/polynomial_evaluators.hpp"
#include "quantities/elementary_functions.hpp"

#define OSACA_ANALYSED_FUNCTION Cos
#define UNDER_OSACA_HYPOTHESES(expression) \
[] { \
constexpr bool UseHardwareFMA = true; \
constexpr double θ = 3; \
/* From argument reduction. */ \
constexpr std::int64_t n = \
static_cast<std::int64_t>(θ * (2 / π) + 0.5); \
constexpr double reduction_value = θ - n * π_over_2_high; \
constexpr double reduction_error = n * π_over_2_low; \
/* Used to determine whether a better argument reduction is needed. */ \
constexpr DoublePrecision<double> θ_reduced = \
QuickTwoDifference(reduction_value, reduction_error); \
/* Used in Sin to detect the near-0 case. */ \
constexpr double abs_x = \
θ_reduced.value > 0 ? θ_reduced.value : -θ_reduced.value; \
/* Used throughout the top-level functions. */ \
constexpr std::int64_t quadrant = n & 0b11; \
/* Used in DetectDangerousRounding. */ \
constexpr double normalized_error = 0; \
/* Not NaN is the only part that matters; used at the end of the */ \
/* top-level functions to determine whether to call the slow path. */ \
constexpr double value = 1; \
return expression; \
}()

#if defined(OSACA_ANALYSED_FUNCTION)
#define PRINCIPIA_USE_OSACA !PRINCIPIA_MACRO_IS_EMPTY(OSACA_ANALYSED_FUNCTION)
#endif

#if PRINCIPIA_USE_OSACA

#include "intel/iacaMarks.h"
@@ -97,12 +127,14 @@
// them, so they cannot be the end of a loop started unconditionally. Instead
// we loop with goto.
// — Some volatile reads and writes are used to clarify identity of the
// registers in the generated code (where the names of `OSACA_input` and
// 'OSACA_result' appear in movsd instructions) and to improve the structure
// of the generated graph.
// registers in the generated code (where the names of `OSACA_result` and, if
// `OSACA_CARRY_LOOP_THROUGH_REGISTER`, `OSACA_loop_carry` appear in movsd
// instructions) and to improve the structure of the generated graph.
//
// Putting a load of the input from memory in the analysed section makes the
// OSACA dependency graph clearer. However:
// Putting a load of the input from memory in the analysed section prevents the
// compiler from reusing intermediate values in the next iteration, e.g., if the
// absolute value of the result is computed first, the compiler might reuse it
// instead of computing the absolute value of the input. However:
// — it adds a spurious move to the latency;
// — some tools (IACA, LLVM-MCA) cannot see the dependency through memory.
// Set OSACA_CARRY_LOOP_THROUGH_REGISTER to 1 to carry the loop dependency
@@ -114,38 +146,42 @@
static bool OSACA_loop_terminator = false;

#define OSACA_FUNCTION_BEGIN(arg) \
double OSACA_INPUT_QUALIFIER OSACA_input = arg; \
double OSACA_LOOP_CARRY_QUALIFIER OSACA_loop_carry = arg; \
if constexpr (std::string_view(__func__) == \
STRINGIFY_EXPANSION(OSACA_ANALYSED_FUNCTION)) { \
IACA_VC64_START; \
} \
double OSACA_loop_carry = OSACA_input; \
_Pragma("warning(push)"); \
_Pragma("warning(disable : 4102)"); \
OSACA_loop: \
_Pragma("warning(pop)"); \
arg = OSACA_loop_carry

#define OSACA_RETURN(result) \
do { \
if constexpr (std::string_view(__func__) == \
STRINGIFY_EXPANSION(OSACA_ANALYSED_FUNCTION)) { \
OSACA_loop_carry = (result); \
if (!OSACA_loop_terminator) { \
goto OSACA_loop; \
} \
double volatile OSACA_result = OSACA_loop_carry; \
IACA_VC64_END; \
return OSACA_result; \
} else { \
return (result); \
} \
#define OSACA_RETURN(result) \
do { \
if constexpr (std::string_view(__func__) == \
STRINGIFY_EXPANSION(OSACA_ANALYSED_FUNCTION)) { \
OSACA_loop_carry = (result); \
if (!OSACA_loop_terminator) { \
goto OSACA_loop; \
} \
double volatile OSACA_result = OSACA_loop_carry; \
IACA_VC64_END; \
/* The second goto prevents the the end marker from being interleaved */ \
/* with register restoring moves. */ \
if (!OSACA_loop_terminator) { \
goto OSACA_loop; \
} \
return OSACA_result; \
} else { \
return (result); \
} \
} while (false)

#if OSACA_CARRY_LOOP_THROUGH_REGISTER
#define OSACA_INPUT_QUALIFIER
#define OSACA_LOOP_CARRY_QUALIFIER
#else
#define OSACA_INPUT_QUALIFIER volatile
#define OSACA_LOOP_CARRY_QUALIFIER volatile
#endif

// The branch not taken, determined by evaluating the condition
@@ -177,33 +213,6 @@ static bool OSACA_loop_terminator = false;

#define OSACA_ELSE_IF else OSACA_IF // NOLINT

// Sin- and Cos-specific definitions:

#define UNDER_OSACA_HYPOTHESES(expression) \
[&] { \
constexpr bool UseHardwareFMA = true; \
constexpr double θ = 0.1; \
/* From argument reduction. */ \
constexpr double n_double = θ * (2 / π); \
constexpr double reduction_value = θ - n_double * π_over_2_high; \
constexpr double reduction_error = n_double * π_over_2_low; \
/* Used to determine whether a better argument reduction is needed. */ \
constexpr DoublePrecision<double> θ_reduced = \
QuickTwoDifference(reduction_value, reduction_error); \
/* Used in Sin to detect the near-0 case. */ \
constexpr double abs_x = \
θ_reduced.value > 0 ? θ_reduced.value : -θ_reduced.value; \
/* Used throughout the top-level functions. */ \
constexpr std::int64_t quadrant = \
static_cast<std::int64_t>(n_double) & 0b11; \
/* Used in DetectDangerousRounding. */ \
constexpr double normalized_error = 0; \
/* Not NaN is the only part that matters; used at the end of the */ \
/* top-level functions to determine whether to call the slow path. */ \
constexpr double value = 1; \
return expression; \
}()


namespace principia {
namespace numerics {
17 changes: 0 additions & 17 deletions numerics/sin_cos.hpp
Original file line number Diff line number Diff line change
@@ -7,20 +7,7 @@ namespace numerics {
namespace _sin_cos {
namespace internal {

#define PRINCIPIA_INLINE_SIN_COS 0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The references to this symbol in sin_cos.cpp must be removed too.

#define OSACA_ANALYSED_FUNCTION

#if defined(OSACA_ANALYSED_FUNCTION)
#define PRINCIPIA_USE_OSACA !PRINCIPIA_MACRO_IS_EMPTY(OSACA_ANALYSED_FUNCTION)
#endif

#if PRINCIPIA_INLINE_SIN_COS
FORCE_INLINE(inline)
#endif
double __cdecl Sin(double x);
#if PRINCIPIA_INLINE_SIN_COS
FORCE_INLINE(inline)
#endif
double __cdecl Cos(double x);

} // namespace internal
@@ -31,7 +18,3 @@ using internal::Sin;
} // namespace _sin_cos
} // namespace numerics
} // namespace principia

#if PRINCIPIA_INLINE_SIN_COS
#include "numerics/sin_cos.cpp"
#endif