Skip to content

Commit 37cf1b8

Browse files
Ravenwaterclaude
andcommitted
perf(elreal,ci): cut sanitizer/Debug CI time ~20x (depth, -O1, ctest -j)
The ASan/UBSan jobs grew past an hour because the elreal transcendental tests (el_math_trigonometry/hyperbolic/exponent/constants) run a heavy O(depth^4) workload at -O0 with no test parallelism. Four independent, multiplicative levers, none of which lose any asserted coverage: 1) Test generation depth (the dominant, quartic lever). The math functions default to depth 4 (~212 bits) for high-precision use, but the tests assert only loose host tolerances (1e-10/1e-5) plus structural identities and the 0-overlap invariant -- all satisfied with wide margin at far smaller depth (d ~ d*53 bits on a double host). Thread an explicit small test depth (LIBRARY DEFAULTS UNCHANGED): single-series fns (atan/sin/cos/tan/exp/log/sinh/cosh/tanh) at depth 2 (~106 bits); the deepest compositions (asin/acos, pow=exp(y*log(x))) at depth 3; constants generated at depth 6 instead of the 16/32 defaults, with one deliberately-deep pi check kept at 8. Full elreal Debug+assertions ctest drops 178s -> 8.2s locally (constants alone 175s -> 8.3s), 32/32 pass on gcc + clang. 2) Sanitizer builds at -O1 (CMakeLists.txt). AddressSanitizer's recommended setting: 2-4x faster than -O0 on this template-heavy header-only library with equal-or-better ASan detection (Debug -g retained for stacks). UBSan loses a few UB sites the optimizer elides before instrumentation -- an accepted trade. 3) ctest -j 2 in sanitizers.yml (was serial). Matches the 2-vCPU ubuntu-latest runner; safe under ASan's ~2-3x shadow-memory multiplier in 7 GB (the sibling cmake.yml already runs ctest -j 4 on the same runner). 4) --timeout 180 in sanitizers.yml: caps a hung instrumented test at 180s instead of CTest's 1500s default (tail-latency insurance, no effect on green runs). Also tightens the priestRenorm iterate-to-fixpoint loop bound from 6 to 3 (threeAdd.hpp): a cancellation residual converges in two extra passes, so 3 is ample headroom and removes the recently-added worst-case rescue cost. Validated: full elreal suite (32 tests) passes on gcc and clang in Release and Debug (assertions on); ASCII clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent ab4d8e4 commit 37cf1b8

7 files changed

Lines changed: 125 additions & 82 deletions

File tree

.github/workflows/sanitizers.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,15 +69,18 @@ jobs:
6969
env:
7070
ASAN_OPTIONS: ${{ matrix.sanitizer == 'ASan' && 'detect_leaks=0:halt_on_error=1' || '' }}
7171
UBSAN_OPTIONS: ${{ matrix.sanitizer == 'UBSan' && 'print_stacktrace=1:halt_on_error=1' || '' }}
72-
run: ctest --output-on-failure
72+
# -j 2 matches the 2-vCPU ubuntu-latest runner; safe under ASan's ~2-3x
73+
# shadow-memory multiplier in 7 GB. --timeout caps any hung instrumented
74+
# test at 180s instead of CTest's 1500s default.
75+
run: ctest -j 2 --timeout 180 --output-on-failure
7376

7477
- name: Rerun failed tests
7578
if: failure()
7679
working-directory: ${{github.workspace}}/build
7780
env:
7881
ASAN_OPTIONS: ${{ matrix.sanitizer == 'ASan' && 'detect_leaks=0:halt_on_error=1' || '' }}
7982
UBSAN_OPTIONS: ${{ matrix.sanitizer == 'UBSan' && 'print_stacktrace=1:halt_on_error=1' || '' }}
80-
run: ctest --rerun-failed --output-on-failure
83+
run: ctest -j 2 --timeout 180 --rerun-failed --output-on-failure
8184

8285
- name: Upload test logs
8386
if: failure()

CMakeLists.txt

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -478,16 +478,20 @@ if(CMAKE_COMPILER_IS_GNUCXX OR MINGW OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
478478
set(EXTRA_C_FLAGS_RELEASE "${EXTRA_C_FLAGS_RELEASE} -ffp-contract=off")
479479
endif()
480480

481-
# Sanitizer support (GCC/Clang only)
481+
# Sanitizer support (GCC/Clang only). Compile at -O1: this is the AddressSanitizer
482+
# project's recommended setting -- it runs 2-4x faster than -O0 (a large win for
483+
# this template-heavy header-only library) with equal-or-better ASan detection,
484+
# and keeps the Debug -g for symbolized stacks. UBSan loses a few UB sites the
485+
# optimizer elides before instrumentation, an accepted trade for the speedup.
482486
if(UNIVERSAL_ENABLE_ASAN)
483-
message(STATUS "AddressSanitizer enabled")
484-
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
487+
message(STATUS "AddressSanitizer enabled (-O1)")
488+
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fsanitize=address -fno-omit-frame-pointer -O1")
485489
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address")
486490
endif()
487491

488492
if(UNIVERSAL_ENABLE_UBSAN)
489-
message(STATUS "UndefinedBehaviorSanitizer enabled")
490-
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fsanitize=undefined -fno-omit-frame-pointer")
493+
message(STATUS "UndefinedBehaviorSanitizer enabled (-O1)")
494+
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fsanitize=undefined -fno-omit-frame-pointer -O1")
491495
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined")
492496
endif()
493497

elastic/elreal/math/constants.cpp

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -53,46 +53,54 @@ int check_value(const sw::universal::ZBCL<FpType>& z, double ref, double tol, co
5353
return n;
5454
}
5555

56+
// Test generation depth. The constant generators default to deep precision (e=32,
57+
// pi=16 blocks) for high-precision *use*, but these tests only compare against the
58+
// host-double std::numbers reference (a ~1e-12 tolerance). Six blocks is ~300+ bits
59+
// on a double host -- far past the reference -- while cutting generation cost by
60+
// ~(6/16)^4 (and much more for e) vs the defaults, which is the dominant saving on
61+
// this, the most expensive elreal math test, under the -O0/instrumented CI tiers.
62+
static constexpr std::size_t kConstDepth = 6;
63+
5664
// Series constants -- tested on double / float only.
5765
template <typename FpType>
58-
int verify_series(double tol, const std::string& host) {
66+
int verify_series(double tol, const std::string& host, std::size_t depth) {
5967
using namespace sw::universal;
6068
int n = 0;
61-
n += check_value(e_zbcl<FpType>(), std::numbers::e_v<double>, tol, host + " e");
62-
n += check_value(ln2_zbcl<FpType>(), std::numbers::ln2_v<double>, tol, host + " ln2");
63-
n += check_value(ln10_zbcl<FpType>(), std::numbers::ln10_v<double>, tol, host + " ln10");
64-
n += check_value(log2_10_zbcl<FpType>(), std::log2(10.0), tol, host + " log2(10)");
65-
n += check_value(pi_zbcl<FpType>(), std::numbers::pi_v<double>, tol, host + " pi");
69+
n += check_value(e_zbcl<FpType>(depth), std::numbers::e_v<double>, tol, host + " e");
70+
n += check_value(ln2_zbcl<FpType>(depth), std::numbers::ln2_v<double>, tol, host + " ln2");
71+
n += check_value(ln10_zbcl<FpType>(depth), std::numbers::ln10_v<double>, tol, host + " ln10");
72+
n += check_value(log2_10_zbcl<FpType>(depth), std::log2(10.0), tol, host + " log2(10)");
73+
n += check_value(pi_zbcl<FpType>(depth), std::numbers::pi_v<double>, tol, host + " pi");
6674
return n;
6775
}
6876

6977
// Radical constants + euler_gamma -- tested on all hosts (stay in range).
7078
template <typename FpType>
71-
int verify_radicals(double tol, const std::string& host) {
79+
int verify_radicals(double tol, const std::string& host, std::size_t depth) {
7280
using namespace sw::universal;
7381
int n = 0;
7482
struct { ZBCL<FpType> z; double ref; const char* name; } rad[] = {
75-
{ sqrt2_zbcl<FpType>(), std::numbers::sqrt2_v<double>, "sqrt2" },
76-
{ sqrt3_zbcl<FpType>(), std::numbers::sqrt3_v<double>, "sqrt3" },
77-
{ sqrt5_zbcl<FpType>(), std::sqrt(5.0), "sqrt5" },
78-
{ phi_zbcl<FpType>(), std::numbers::phi_v<double>, "phi" },
83+
{ sqrt2_zbcl<FpType>(depth), std::numbers::sqrt2_v<double>, "sqrt2" },
84+
{ sqrt3_zbcl<FpType>(depth), std::numbers::sqrt3_v<double>, "sqrt3" },
85+
{ sqrt5_zbcl<FpType>(depth), std::sqrt(5.0), "sqrt5" },
86+
{ phi_zbcl<FpType>(depth), std::numbers::phi_v<double>, "phi" },
7987
};
8088
for (auto& r : rad) n += check_value(r.z, r.ref, tol, host + " " + r.name);
8189

8290
// Algebraic identities (tolerance-based; sqrt/phi are approximate streams).
8391
for (double v : { 2.0, 3.0, 5.0 }) {
84-
ZBCL<FpType> s = sqrt(from_native<FpType>(v));
92+
ZBCL<FpType> s = sqrt(from_native<FpType>(v), depth);
8593
if (std::abs(est::approx(mul(s, s)) - v) > tol) {
8694
std::cout << host << " sqrt(" << v << ")^2 != " << v << '\n'; ++n;
8795
}
8896
}
8997
{ // phi^2 == phi + 1
90-
ZBCL<FpType> phi = phi_zbcl<FpType>();
98+
ZBCL<FpType> phi = phi_zbcl<FpType>(depth);
9199
double lhs = est::approx(mul(phi, phi));
92100
double rhs = est::approx(add(phi, from_native<FpType>(1.0)));
93101
if (std::abs(lhs - rhs) > tol) { std::cout << host << " phi^2 != phi+1\n"; ++n; }
94102
}
95-
n += check_value(euler_gamma_zbcl<FpType>(), std::numbers::egamma_v<double>, tol, host + " egamma");
103+
n += check_value(euler_gamma_zbcl<FpType>(depth), std::numbers::egamma_v<double>, tol, host + " egamma");
96104
return n;
97105
}
98106

@@ -108,7 +116,9 @@ int verify_highprec_double() {
108116
block<double>{ -2.9947698097183397e-33, 0 },
109117
};
110118
ZBCL<double> ref = zbcl_from_blocks<double>(priestRenorm(pilimbs));
111-
ZBCL<double> diff = add(pi_zbcl<double>(16), negate(ref));
119+
// depth 8 (~424 bits) is the suite's one deliberately-deep generation, well
120+
// past the 3-limb (~159-bit) reference it is compared against.
121+
ZBCL<double> diff = add(pi_zbcl<double>(8), negate(ref));
112122
// Agreement to > 100 bits (~30 digits) demonstrates precision well beyond the
113123
// host double (the 3-limb reference itself caps the check at ~159 bits). The
114124
// leading limb cancels exactly, so the residual magnitude is the real signal
@@ -130,12 +140,12 @@ try {
130140
bool reportTestCases = false;
131141
ReportTestSuiteHeader(test_suite, reportTestCases);
132142

133-
nrOfFailedTestCases += verify_series<double>(1e-12, "const<double>");
134-
nrOfFailedTestCases += verify_series<float>(1e-6, "const<float>");
143+
nrOfFailedTestCases += verify_series<double>(1e-12, "const<double>", kConstDepth);
144+
nrOfFailedTestCases += verify_series<float>(1e-6, "const<float>", kConstDepth);
135145

136-
nrOfFailedTestCases += verify_radicals<double>(1e-12, "const<double>");
137-
nrOfFailedTestCases += verify_radicals<float>(1e-6, "const<float>");
138-
nrOfFailedTestCases += verify_radicals<bfloat16>(1e-2, "const<bfloat16>");
146+
nrOfFailedTestCases += verify_radicals<double>(1e-12, "const<double>", kConstDepth);
147+
nrOfFailedTestCases += verify_radicals<float>(1e-6, "const<float>", kConstDepth);
148+
nrOfFailedTestCases += verify_radicals<bfloat16>(1e-2, "const<bfloat16>", kConstDepth);
139149

140150
nrOfFailedTestCases += verify_highprec_double();
141151

elastic/elreal/math/exponent.cpp

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -43,51 +43,58 @@ int near(const sw::universal::ZBCL<FpType>& z, double ref, double tol, const std
4343
return n;
4444
}
4545

46+
// Test depths. exp/log are single-series and fine at depth 2, but verify_explog
47+
// also exercises general pow = exp(y*log(x)) (two stacked series), so it runs at
48+
// depth 3 (~159 bits) for guard bits; still ~(3/4)^4 cheaper than the default 4.
49+
// The integer pow path is a pure multiply and needs no series headroom (depth 2).
50+
static constexpr std::size_t kExpDepth = 3;
51+
static constexpr std::size_t kPowIntDepth = 2;
52+
4653
// exp/log/general-pow -- {double, float}.
4754
template <typename FpType>
48-
int verify_explog(double tol, const std::string& host) {
55+
int verify_explog(double tol, const std::string& host, std::size_t depth) {
4956
using namespace sw::universal;
5057
int n = 0;
5158

5259
for (double v : { 0.0, 1.0, 2.0, -1.0, 0.5, 3.5 })
53-
n += near(exp(from_native<FpType>(v)), std::exp(v), tol, host + " exp(" + std::to_string(v) + ")");
60+
n += near(exp(from_native<FpType>(v), depth), std::exp(v), tol, host + " exp(" + std::to_string(v) + ")");
5461
for (double v : { 1.0, 2.0, 5.0, 10.0, 0.5, 100.0 })
55-
n += near(log(from_native<FpType>(v)), std::log(v), tol, host + " log(" + std::to_string(v) + ")");
62+
n += near(log(from_native<FpType>(v), depth), std::log(v), tol, host + " log(" + std::to_string(v) + ")");
5663

5764
// round trips
5865
for (double v : { 0.5, 2.0, 3.0 }) {
59-
n += near(log(exp(from_native<FpType>(v))), v, tol, host + " log(exp)");
66+
n += near(log(exp(from_native<FpType>(v), depth), depth), v, tol, host + " log(exp)");
6067
ZBCL<FpType> pos = from_native<FpType>(std::exp(v)); // x = e^v > 0
61-
n += near(exp(log(pos)), std::exp(v), tol * std::exp(v), host + " exp(log)");
68+
n += near(exp(log(pos, depth), depth), std::exp(v), tol * std::exp(v), host + " exp(log)");
6269
}
6370
// exp(a+b) == exp(a)*exp(b)
6471
{
6572
ZBCL<FpType> a = from_native<FpType>(1.0), b = from_native<FpType>(0.5);
66-
double lhs = est::approx(exp(add(a, b)));
67-
double rhs = est::approx(mul(exp(a), exp(b)));
73+
double lhs = est::approx(exp(add(a, b), depth));
74+
double rhs = est::approx(mul(exp(a, depth), exp(b, depth)));
6875
if (std::abs(lhs - rhs) > tol * std::exp(1.5)) { std::cout << host << " exp(a+b)!=exp(a)exp(b)\n"; ++n; }
6976
}
7077
// log(x*y) == log(x)+log(y)
7178
{
7279
ZBCL<FpType> x = from_native<FpType>(3.0), y = from_native<FpType>(7.0);
73-
double lhs = est::approx(log(mul(x, y)));
74-
double rhs = est::approx(add(log(x), log(y)));
80+
double lhs = est::approx(log(mul(x, y), depth));
81+
double rhs = est::approx(add(log(x, depth), log(y, depth)));
7582
if (std::abs(lhs - rhs) > tol) { std::cout << host << " log(xy)!=log(x)+log(y)\n"; ++n; }
7683
}
7784
// general pow via exp(y*log(x))
78-
n += near(pow(from_native<FpType>(2.0), from_native<FpType>(0.5)), std::sqrt(2.0), tol, host + " pow(2,0.5)");
79-
n += near(pow(from_native<FpType>(9.0), from_native<FpType>(0.5)), 3.0, tol, host + " pow(9,0.5)");
85+
n += near(pow(from_native<FpType>(2.0), from_native<FpType>(0.5), depth), std::sqrt(2.0), tol, host + " pow(2,0.5)");
86+
n += near(pow(from_native<FpType>(9.0), from_native<FpType>(0.5), depth), 3.0, tol, host + " pow(9,0.5)");
8087
return n;
8188
}
8289

8390
// pow integer fast path (pure multiply) -- all hosts.
8491
template <typename FpType>
85-
int verify_pow_int(double tol, const std::string& host) {
92+
int verify_pow_int(double tol, const std::string& host, std::size_t depth) {
8693
using namespace sw::universal;
8794
int n = 0;
8895
struct { double b, e, r; } cases[] = { {2,10,1024}, {3,4,81}, {5,3,125}, {2,0,1}, {7,2,49} };
8996
for (auto& c : cases) {
90-
ZBCL<FpType> p = pow(from_native<FpType>(c.b), from_native<FpType>(c.e));
97+
ZBCL<FpType> p = pow(from_native<FpType>(c.b), from_native<FpType>(c.e), depth);
9198
if (std::abs(est::approx(p) - c.r) > tol * std::max(1.0, c.r)) {
9299
std::cout << host << " pow(" << c.b << "," << c.e << ") = " << est::approx(p)
93100
<< " != " << c.r << '\n'; ++n;
@@ -107,12 +114,12 @@ try {
107114
bool reportTestCases = false;
108115
ReportTestSuiteHeader(test_suite, reportTestCases);
109116

110-
nrOfFailedTestCases += verify_explog<double>(1e-10, "explog<double>");
111-
nrOfFailedTestCases += verify_explog<float>(1e-5, "explog<float>");
117+
nrOfFailedTestCases += verify_explog<double>(1e-10, "explog<double>", kExpDepth);
118+
nrOfFailedTestCases += verify_explog<float>(1e-5, "explog<float>", kExpDepth);
112119

113-
nrOfFailedTestCases += verify_pow_int<double>(1e-12, "pow<double>");
114-
nrOfFailedTestCases += verify_pow_int<float>(1e-5, "pow<float>");
115-
nrOfFailedTestCases += verify_pow_int<bfloat16>(1e-1, "pow<bfloat16>");
120+
nrOfFailedTestCases += verify_pow_int<double>(1e-12, "pow<double>", kPowIntDepth);
121+
nrOfFailedTestCases += verify_pow_int<float>(1e-5, "pow<float>", kPowIntDepth);
122+
nrOfFailedTestCases += verify_pow_int<bfloat16>(1e-1, "pow<bfloat16>", kPowIntDepth);
116123

117124
ReportTestSuiteResults(test_suite, nrOfFailedTestCases);
118125
return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS);

elastic/elreal/math/hyperbolic.cpp

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37,35 +37,41 @@ int near(const sw::universal::ZBCL<FpType>& z, double ref, double tol, const std
3737
return n;
3838
}
3939

40+
// Test depth. sinh/cosh/tanh are single-series (exp-based) functions; depth 2
41+
// (~106 bits on a double host) clears the 1e-10/1e-5 tolerances and the
42+
// cosh^2-sinh^2 identity with wide margin, at ~O(depth^4) lower cost than the
43+
// default depth 4 -- the dominant lever on the instrumented CI tiers.
44+
static constexpr std::size_t kHypDepth = 2;
45+
4046
template <typename FpType>
41-
int verify_all(double tol, const std::string& host) {
47+
int verify_all(double tol, const std::string& host, std::size_t depth) {
4248
using namespace sw::universal;
4349
int n = 0;
4450

4551
for (double x : { 0.0, 0.5, 1.0, -1.5, 2.0, -0.25 }) {
4652
const std::string sx = std::to_string(x);
47-
n += near(sinh(from_native<FpType>(x)), std::sinh(x), tol, host + " sinh(" + sx + ")");
48-
n += near(cosh(from_native<FpType>(x)), std::cosh(x), tol, host + " cosh(" + sx + ")");
49-
n += near(tanh(from_native<FpType>(x)), std::tanh(x), tol, host + " tanh(" + sx + ")");
53+
n += near(sinh(from_native<FpType>(x), depth), std::sinh(x), tol, host + " sinh(" + sx + ")");
54+
n += near(cosh(from_native<FpType>(x), depth), std::cosh(x), tol, host + " cosh(" + sx + ")");
55+
n += near(tanh(from_native<FpType>(x), depth), std::tanh(x), tol, host + " tanh(" + sx + ")");
5056
}
5157

5258
// cosh^2 - sinh^2 == 1
5359
for (double x : { 0.5, 1.3, 2.0 }) {
54-
ZBCL<FpType> c = cosh(from_native<FpType>(x)), s = sinh(from_native<FpType>(x));
60+
ZBCL<FpType> c = cosh(from_native<FpType>(x), depth), s = sinh(from_native<FpType>(x), depth);
5561
double id = est::approx(add(mul(c, c), negate(mul(s, s))));
5662
if (std::abs(id - 1.0) > tol) { std::cout << host << " cosh^2-sinh^2 != 1 at " << x << " (" << id << ")\n"; ++n; }
5763
}
5864
// tanh == sinh/cosh
5965
for (double x : { 0.7, 1.5 }) {
60-
ZBCL<FpType> th = tanh(from_native<FpType>(x));
61-
ZBCL<FpType> sc = div(sinh(from_native<FpType>(x)), cosh(from_native<FpType>(x)));
66+
ZBCL<FpType> th = tanh(from_native<FpType>(x), depth);
67+
ZBCL<FpType> sc = div(sinh(from_native<FpType>(x), depth), cosh(from_native<FpType>(x), depth));
6268
if (std::abs(est::approx(th) - est::approx(sc)) > tol) { std::cout << host << " tanh!=sinh/cosh at " << x << '\n'; ++n; }
6369
}
6470
// parity
6571
{
6672
ZBCL<FpType> a = from_native<FpType>(1.1);
67-
if (std::abs(est::approx(sinh(negate(a))) + est::approx(sinh(a))) > tol) { std::cout << host << " sinh parity\n"; ++n; }
68-
if (std::abs(est::approx(cosh(negate(a))) - est::approx(cosh(a))) > tol) { std::cout << host << " cosh parity\n"; ++n; }
73+
if (std::abs(est::approx(sinh(negate(a), depth)) + est::approx(sinh(a, depth))) > tol) { std::cout << host << " sinh parity\n"; ++n; }
74+
if (std::abs(est::approx(cosh(negate(a), depth)) - est::approx(cosh(a, depth))) > tol) { std::cout << host << " cosh parity\n"; ++n; }
6975
}
7076
return n;
7177
}
@@ -80,8 +86,8 @@ try {
8086
bool reportTestCases = false;
8187
ReportTestSuiteHeader(test_suite, reportTestCases);
8288

83-
nrOfFailedTestCases += verify_all<double>(1e-10, "hyp<double>");
84-
nrOfFailedTestCases += verify_all<float>(1e-5, "hyp<float>");
89+
nrOfFailedTestCases += verify_all<double>(1e-10, "hyp<double>", kHypDepth);
90+
nrOfFailedTestCases += verify_all<float>(1e-5, "hyp<float>", kHypDepth);
8591

8692
ReportTestSuiteResults(test_suite, nrOfFailedTestCases);
8793
return (nrOfFailedTestCases > 0 ? EXIT_FAILURE : EXIT_SUCCESS);

0 commit comments

Comments
 (0)