Skip to content

Commit

Permalink
more flags
Browse files Browse the repository at this point in the history
  • Loading branch information
eggrobin committed Dec 29, 2024
1 parent c3b3502 commit 5026a76
Show file tree
Hide file tree
Showing 3 changed files with 156 additions and 61 deletions.
20 changes: 11 additions & 9 deletions nanobenchmarks/function_registry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,27 @@ namespace internal {

bool FunctionRegistry::Register(std::string_view name,
BenchmarkedFunction function) {
CHECK(names_by_function_.emplace(function, name).second)
CHECK(singleton_.names_by_function_.emplace(function, name).second)
<< " Registering function " << function << " as " << name << ": "
<< "function already registered as " << names_by_function_[function];
CHECK(functions_by_name_.emplace(name, function).second)
<< "function already registered as "
<< singleton_.names_by_function_[function];
CHECK(singleton_.functions_by_name_.emplace(name, function).second)
<< " Registering function " << function << " as " << name << ": "
<< " name already taken by " << functions_by_name_.find(name)->second;
<< " name already taken by "
<< singleton_.functions_by_name_.find(name)->second;
return true;
}

FunctionRegistry& FunctionRegistry::singleton = *new FunctionRegistry();
FunctionRegistry& FunctionRegistry::singleton_ = *new FunctionRegistry();

std::map<std::string, BenchmarkedFunction, std::less<>> const&

Check warning on line 25 in nanobenchmarks/function_registry.cpp

View workflow job for this annotation

GitHub Actions / check-cpp

build/include_what_you_use

Add #include <functional> for less<>
FunctionRegistry::functions_by_name() const {
return functions_by_name_;
FunctionRegistry::functions_by_name() {
return singleton_.functions_by_name_;
}

std::map<BenchmarkedFunction, std::string> const&

Check warning on line 30 in nanobenchmarks/function_registry.cpp

View workflow job for this annotation

GitHub Actions / check-cpp

build/include_what_you_use

Add #include <string> for string

Check warning on line 30 in nanobenchmarks/function_registry.cpp

View workflow job for this annotation

GitHub Actions / check-cpp

build/include_what_you_use

Add #include <map> for map<>
FunctionRegistry::names_by_function() const {
return names_by_function_;
FunctionRegistry::names_by_function() {
return singleton_.names_by_function_;
}

} // namespace internal
Expand Down
35 changes: 16 additions & 19 deletions nanobenchmarks/function_registry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,47 +4,44 @@
#include <string>
#include <string_view>

#include "base/macros.hpp"

namespace principia {
namespace nanobenchmarks {
namespace _function_registry {
namespace internal {

#define BENCHMARK_CALLING_CONVENTION
using BenchmarkedFunction = double(BENCHMARK_CALLING_CONVENTION*)(double);
using BenchmarkedFunction = double (BENCHMARK_CALLING_CONVENTION*)(double);

Check warning on line 15 in nanobenchmarks/function_registry.hpp

View workflow job for this annotation

GitHub Actions / check-cpp

readability/casting

Using C-style cast. Use reinterpret_cast<BENCHMARK_CALLING_CONVENTION*>(...) instead

class FunctionRegistry {
public:
bool Register(std::string_view name, BenchmarkedFunction function);
static FunctionRegistry& singleton;
std::map<std::string, BenchmarkedFunction, std::less<>> const&
functions_by_name() const;
std::map<BenchmarkedFunction, std::string> const& names_by_function() const;
static bool Register(std::string_view name, BenchmarkedFunction function);
static std::map<std::string, BenchmarkedFunction, std::less<>> const&
functions_by_name();
static std::map<BenchmarkedFunction, std::string> const& names_by_function();

private:
FunctionRegistry() = default;
static FunctionRegistry& singleton_;
std::map<std::string, BenchmarkedFunction, std::less<>> functions_by_name_;

Check warning on line 27 in nanobenchmarks/function_registry.hpp

View workflow job for this annotation

GitHub Actions / check-cpp

build/include_what_you_use

Add #include <functional> for less<>
std::map<BenchmarkedFunction, std::string> names_by_function_;
};

#define BENCHMARK_FUNCTION(f) \
static bool registered_##f = \
::principia::nanobenchmarks::_function_registry::FunctionRegistry:: \
singleton.Register(#f, &(f))

#define EXPAND(x) x


#define BENCHMARK_FUNCTION_WITH_NAME(name, ...) \
BENCHMARK_FUNCTION_WITH_NAME_INTERNAL(__LINE__, name, __VA_ARGS__)
#define BENCHMARK_FUNCTION_WITH_NAME_INTERNAL(line, name, ...) \
BENCHMARK_FUNCTION_WITH_NAME_INTERNAL2(line, name, __VA_ARGS__)
#define BENCHMARK_FUNCTION_WITH_NAME_INTERNAL2(line, name, ...) \
namespace { \
static bool registered##line = \
::principia::nanobenchmarks::_function_registry::FunctionRegistry:: \
singleton.Register(name, &(__VA_ARGS__)); \
#define BENCHMARK_FUNCTION_WITH_NAME_INTERNAL2(line, name, ...) \
namespace { \
static bool registered##line = ::principia::nanobenchmarks:: \
_function_registry::FunctionRegistry::Register(name, &(__VA_ARGS__)); \
}


#define BENCHMARK_FUNCTION(...) \
BENCHMARK_FUNCTION_WITH_NAME(#__VA_ARGS__, __VA_ARGS__)

#define BENCHMARKED_FUNCTION(f) \
double BENCHMARK_CALLING_CONVENTION f(double x); \
BENCHMARK_FUNCTION(f); \
Expand Down
162 changes: 129 additions & 33 deletions nanobenchmarks/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,78 @@
#include <map>
#include <print>
#include <ranges>
#include <regex>
#include <sstream>
#include <string>
#include <tuple>
#include <utility>
#include <vector>

#include <intrin.h>

#include "absl/flags/flag.h"
#include "absl/flags/parse.h"
#include "absl/strings/str_join.h"
#include "absl/strings/str_split.h"
#include "base/array.hpp"
#include "base/cpuid.hpp"
#include "mathematica/logger.hpp"
#include "nanobenchmarks/function_registry.hpp"
#include "nanobenchmarks/performance_settings_controller.hpp"
#include "numerics/cbrt.hpp"
#include "testing_utilities/statistics.hpp"

ABSL_FLAG(std::size_t,
loop_iterations,
100,
"Number of iterations of the measured loop");
ABSL_FLAG(std::size_t,
samples,
1'000'000,
"Number of measurements to perform for each benchmarked function");
ABSL_FLAG(std::string,
benchmark_filter,
".*",
"Regular expression matching the names of functions to benchmark");
ABSL_FLAG(std::string,
log_to_mathematica,
"",
"File to which to log the measurements");

// Adding support for flag types only works using ADL (or by being in
// marshalling.h), so we do this, which is UB.
namespace std {

bool AbslParseFlag(absl::string_view const text,
std::vector<int>* const flag,
std::string* const error) {
flag->clear();
for (absl::string_view const element : absl::StrSplit(text, ',')) {
if (!absl::ParseFlag(element, &flag->emplace_back(), error)) {
return false;
}
}
return true;
}

std::string AbslUnparseFlag(std::vector<int> const& flag) {
return absl::StrJoin(flag, ",");
}

} // namespace std

ABSL_FLAG(std::vector<int>,
quantiles,
(std::vector{1000, 100, 20, 10, 4, 2}),
"Inverses of the quantiles to report");

namespace principia {
namespace nanobenchmarks {
namespace _main {
namespace {

using namespace principia::mathematica::_logger;
using namespace principia::mathematica::_mathematica;
using namespace principia::nanobenchmarks::_function_registry;
using namespace principia::nanobenchmarks::_performance_settings_controller;
using namespace principia::testing_utilities::_statistics;
Expand All @@ -34,12 +91,11 @@ BENCHMARK_EXTERN_C_FUNCTION(mulsd_xmm0_xmm0_4x);
struct LatencyDistributionTable {
double min;
std::vector<double> quantiles;
static std::vector<double>& quantile_definitions;

static std::string heading() {
std::stringstream out;
std::print(out, "{:>8}", "min");
for (auto const& n : quantile_definitions) {
for (auto const& n : absl::GetFlag(FLAGS_quantiles)) {
if (n > 1000) {
std::print(out, "{:>7}‱", 10'000.0 / n);
} else if (n > 100) {
Expand All @@ -61,9 +117,6 @@ struct LatencyDistributionTable {
}
};

std::vector<double>& LatencyDistributionTable::quantile_definitions =
*new std::vector<double>();

LatencyDistributionTable operator*(double a, LatencyDistributionTable x) {
LatencyDistributionTable result{a * x.min};
for (double const quantile : x.quantiles) {
Expand All @@ -80,29 +133,36 @@ LatencyDistributionTable operator+(LatencyDistributionTable x, double b) {
return result;
}

__declspec(noinline) LatencyDistributionTable benchmark(BenchmarkedFunction f) {
constexpr int k = 1'000'000;
static double* durations = new double[k];
__declspec(noinline) LatencyDistributionTable
Benchmark(BenchmarkedFunction f, Logger* logger) {
std::size_t const sample_count = absl::GetFlag(FLAGS_samples);
std::size_t const loop_iterations = absl::GetFlag(FLAGS_loop_iterations);
static std::vector<double>& samples = *new std::vector<double>(
sample_count, std::numeric_limits<double>::quiet_NaN());
int registers[4]{};
int leaf = 0;
for (int j = 0; j < k; ++j) {
constexpr int n = 100;
for (int j = 0; j < sample_count; ++j) {
__cpuid(registers, leaf);
auto const tsc = __rdtsc();
double x = 5 + tsc % 2 + registers[0] % 2;
for (int i = 0; i < n; ++i) {
for (int i = 0; i < loop_iterations; ++i) {
x = f(x);
x += 5 - x;
}
__cpuid(registers, x);
double const δtsc = __rdtsc() - tsc;
durations[j] = δtsc / n;
samples[j] = δtsc / loop_iterations;
}
if (logger != nullptr) {
logger->Append(
"samples",
std::tuple{FunctionRegistry::names_by_function().at(f),
samples});
}
std::sort(durations, durations + k);
LatencyDistributionTable result {
durations[0]};
for (int const q : LatencyDistributionTable::quantile_definitions) {
result.quantiles.push_back(durations[k / q]);
std::ranges::sort(samples);
LatencyDistributionTable result{samples[0]};
for (int const q : absl::GetFlag(FLAGS_quantiles)) {
result.quantiles.push_back(samples[sample_count / q]);
}
return result;
}
Expand All @@ -126,36 +186,59 @@ BENCHMARK_FUNCTION_WITH_NAME(
BENCHMARK_FUNCTION_WITH_NAME("Cbrt",
principia::numerics::_cbrt::Cbrt);

int __cdecl main(int argc, char** argv) {
absl::ParseCommandLine(argc, argv);
std::size_t FormattedWidth(std::string s) {
// Two columns per code unit is wide enough, since field width is at most 2
// per extended grapheme cluster.
std::size_t wide = 2 * s.size();
// There is no vformatted_size, so we actually format.
std::size_t const formatted_size =
std::vformat("{:" + std::to_string(wide) + "}",
std::make_format_args(s))
.size();
// The actual width is the field width we allocated, minus the padding spaces
// added by formatting.
return wide - (formatted_size - s.size());
}

void Main() {
std::regex const name_matcher(absl::GetFlag(FLAGS_benchmark_filter));
auto controller = PerformanceSettingsController::Make();
LatencyDistributionTable::quantile_definitions = {1000, 100, 20, 10, 4, 2};
std::unique_ptr<Logger> logger;
std::string const& filename = absl::GetFlag(FLAGS_log_to_mathematica);
if (!filename.empty()) {
logger = std::make_unique<Logger>(filename, /*make_unique=*/false);

Check warning on line 209 in nanobenchmarks/main.cpp

View workflow job for this annotation

GitHub Actions / check-cpp

build/include_what_you_use

Add #include <memory> for make_unique<>
}
std::println("{} {}",
principia::base::_cpuid::CPUVendorIdentificationString(),
principia::base::_cpuid::ProcessorBrandString());
std::println("Features: {}", principia::base::_cpuid::CPUFeatures());
auto name_widths =
std::views::keys(FunctionRegistry::singleton.functions_by_name()) |
std::views::transform(&std::string::size);
std::size_t name_width = *std::ranges::max_element(name_widths);
std::vector reference_functions{
std::pair{&identity, 0},
std::pair{&mulsd_xmm0_xmm0, 4},
std::pair{&mulsd_xmm0_xmm0_4x, 4 * 4},
std::pair{&sqrtps_xmm0_xmm0, 12},
};
auto name_widths =
FunctionRegistry::functions_by_name() |
std::views::filter([&](auto const& pair) {
auto const& [name, f] = pair;
return std::regex_match(name, name_matcher) ||
std::ranges::contains(std::views::keys(reference_functions), f);
}) |
std::views::keys | std::views::transform(&FormattedWidth);
std::size_t name_width = *std::ranges::max_element(name_widths);
std::map<BenchmarkedFunction, LatencyDistributionTable>
reference_measurements;
std::vprint_unicode(
"{:<" + std::to_string(name_width + 2) + "}{}\n",
std::make_format_args("RAW TSC:", LatencyDistributionTable::heading()));
for (auto const& [function, _] : reference_functions) {
auto const result = benchmark(function);
auto const result = Benchmark(function, logger.get());
reference_measurements.emplace(function, result);
std::vprint_unicode(
"{:>" + std::to_string(name_width + 2) + "}{}\n",
std::make_format_args(
FunctionRegistry::singleton.names_by_function().at(function),
FunctionRegistry::names_by_function().at(function),
result.Row()));
}
std::vector<double> tsc;
Expand All @@ -166,25 +249,38 @@ int __cdecl main(int argc, char** argv) {
}
double const a = Slope(tsc, expected_cycles);
double const b = Mean(expected_cycles) - a * Mean(tsc);
auto benchmark_cycles = [&](BenchmarkedFunction const f) {
return a * Benchmark(f, logger.get()) + b;
};
std::println("Slope: {:0.6f} cycle/TSC", a);
std::println(
"Correlation coefficient: {:0.6f}",
PearsonProductMomentCorrelationCoefficient(tsc, expected_cycles));
std::vprint_unicode(
"{:<" + std::to_string(name_width + 2) + "}{}\n",
std::make_format_args("Cycles:", LatencyDistributionTable::heading()));
auto bm_cycles = [&](BenchmarkedFunction f) {
return a * benchmark(f) + b;
};
for (auto const& [name, f] : FunctionRegistry::singleton.functions_by_name()) {
auto const result = benchmark(f);
for (auto const& [name, f] :
FunctionRegistry::functions_by_name()) {
if (!std::regex_match(name, name_matcher)) {
continue;
}
std::vprint_unicode(
"{}{:>" + std::to_string(name_width + 1) + "}{}\n",
"{} {:>" + std::to_string(name_width) + "}{}\n",
std::make_format_args(
std::ranges::contains(std::views::keys(reference_functions), f)
? "R"
: " ",
name,
(a * result + b).Row()));
benchmark_cycles(f).Row()));
}
}

} // namespace
} // namespace _main
} // namespace nanobenchmarks
} // namespace principia

int __cdecl main(int const argc, char** const argv) {
absl::ParseCommandLine(argc, argv);
principia::nanobenchmarks::_main::Main();
}

Check warning on line 286 in nanobenchmarks/main.cpp

View workflow job for this annotation

GitHub Actions / check-cpp

whitespace/ending_newline

Could not find a newline character at the end of the file.

0 comments on commit 5026a76

Please sign in to comment.