Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
c8d1816
Add per-file-handle pool
kingcrimsontianyu Nov 17, 2025
f65776c
Add seq read benchmark
kingcrimsontianyu Nov 18, 2025
8375846
Add seq read benchmark
kingcrimsontianyu Nov 19, 2025
60d6a63
Update
kingcrimsontianyu Nov 19, 2025
a938ebd
Update
kingcrimsontianyu Nov 19, 2025
2ed42d5
Update
kingcrimsontianyu Nov 20, 2025
a9fb20e
Merge branch 'main' into seq-read-bench
kingcrimsontianyu Nov 20, 2025
02f92fd
Merge branch 'main' into per-file-pool
kingcrimsontianyu Nov 20, 2025
77a1c7a
Update
kingcrimsontianyu Nov 20, 2025
c782739
Update
kingcrimsontianyu Nov 20, 2025
4bd39fd
Update
kingcrimsontianyu Nov 20, 2025
65e6f35
Update
kingcrimsontianyu Nov 21, 2025
8f88010
Update
kingcrimsontianyu Nov 21, 2025
cc4a614
Merge branch 'per-file-pool' into seq-read-bench
kingcrimsontianyu Nov 21, 2025
c7425ce
Update
kingcrimsontianyu Nov 21, 2025
4d00073
Merge branch 'main' into seq-read-bench
kingcrimsontianyu Nov 24, 2025
bb2d8bd
Merge branch 'main' into per-file-pool
kingcrimsontianyu Nov 24, 2025
4d7cc3f
Update
kingcrimsontianyu Nov 24, 2025
c595042
Cleanup
kingcrimsontianyu Nov 24, 2025
59e6078
Update
kingcrimsontianyu Nov 24, 2025
afb670d
Add comments
kingcrimsontianyu Nov 24, 2025
c4c22ce
Add nullity check
kingcrimsontianyu Nov 24, 2025
e73d4d6
Add missing comments
kingcrimsontianyu Nov 24, 2025
78489e8
Update
kingcrimsontianyu Nov 24, 2025
19f8af9
Update
kingcrimsontianyu Nov 24, 2025
a92fbbc
Merge branch 'per-file-pool' into seq-read-bench
kingcrimsontianyu Nov 24, 2025
732367a
Merge remote-tracking branch 'origin/seq-read-bench' into seq-read-bench
kingcrimsontianyu Nov 24, 2025
7306407
Fix build error
kingcrimsontianyu Nov 24, 2025
45b3cc7
Update
kingcrimsontianyu Nov 25, 2025
6543709
Merge branch 'main' into per-file-pool
kingcrimsontianyu Nov 25, 2025
c348cbb
Add unit tests
kingcrimsontianyu Nov 25, 2025
bbf240b
Add comments
kingcrimsontianyu Nov 25, 2025
d35d2bb
Merge branch 'per-file-pool' into seq-read-bench
kingcrimsontianyu Nov 26, 2025
b1a9f7c
Merge branch 'main' into seq-read-bench
kingcrimsontianyu Nov 26, 2025
71837f4
Update
kingcrimsontianyu Nov 26, 2025
5e3102c
Implement per-block-device pool
kingcrimsontianyu Nov 27, 2025
f3bec0a
Merge branch 'per-drive-pool' into seq-read-bench
kingcrimsontianyu Nov 27, 2025
52276a1
Update
kingcrimsontianyu Nov 28, 2025
43993d0
Merge branch 'main' into seq-read-bench
kingcrimsontianyu Dec 10, 2025
df7501a
Update
kingcrimsontianyu Dec 10, 2025
e66aae3
Merge branch 'main' into seq-read-bench
kingcrimsontianyu Dec 17, 2025
a31feeb
Big update
kingcrimsontianyu Dec 19, 2025
439c163
Update
kingcrimsontianyu Dec 19, 2025
e6d2e07
Update
kingcrimsontianyu Dec 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ function(kvikio_add_benchmark)
CUDA_STANDARD_REQUIRED ON
)

target_link_libraries(${_KVIKIO_NAME} PUBLIC benchmark::benchmark kvikio::kvikio)
target_link_libraries(
${_KVIKIO_NAME} PUBLIC benchmark::benchmark kvikio::kvikio CUDA::cudart_static
)

if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(KVIKIO_CXX_FLAGS "-Wall;-Werror;-Wno-unknown-pragmas")
Expand All @@ -60,3 +62,7 @@ function(kvikio_add_benchmark)
endfunction()

kvikio_add_benchmark(NAME THREADPOOL_BENCHMARK SOURCES "threadpool/threadpool_benchmark.cpp")

kvikio_add_benchmark(
NAME SEQUENTIAL_BENCHMARK SOURCES "io/sequential_benchmark.cpp" "io/common.cpp"
)
302 changes: 302 additions & 0 deletions cpp/benchmarks/io/common.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,302 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

#include "common.hpp"

#include <getopt.h>

#include <algorithm>
#include <sstream>
#include <stdexcept>

#include <kvikio/detail/utils.hpp>

namespace kvikio::benchmark {

Backend parse_backend(std::string const& str)
{
if (str.empty()) { throw std::invalid_argument("Empty size string"); }

// Normalize to lowercase for case-insensitive comparison
auto backend{str};
std::transform(backend.begin(), backend.end(), backend.begin(), [](unsigned char c) {
return std::tolower(c);
});
if (backend == "filehandle") {
return Backend::FILEHANDLE;
} else if (backend == "cufile") {
return Backend::CUFILE;
} else {
throw std::invalid_argument("Invalid flag: '" + str + "' (use \"FileHandle\", \"cuFile\") ");
}
}

std::size_t parse_size(std::string const& str)
{
if (str.empty()) { throw std::invalid_argument("Empty size string"); }

// Parse the numeric part
std::size_t pos{};
double value{};
try {
value = std::stod(str, &pos);
} catch (std::exception const& e) {
throw std::invalid_argument("Invalid size format: " + str);
}

if (value < 0) { throw std::invalid_argument("Size cannot be negative"); }

// Extract suffix (everything after the number)
auto suffix = str.substr(pos);

// No suffix means raw bytes
if (suffix.empty()) { return static_cast<std::size_t>(value); }

// Normalize to lowercase for case-insensitive comparison
std::transform(
suffix.begin(), suffix.end(), suffix.begin(), [](unsigned char c) { return std::tolower(c); });

// All multipliers use 1024 (binary), not 1000
std::size_t multiplier{1};

// Support both K/Ki, M/Mi, etc. as synonyms (all 1024-based)
std::size_t constexpr one_Ki{1024ULL};
std::size_t constexpr one_Mi{1024ULL * one_Ki};
std::size_t constexpr one_Gi{1024ULL * one_Mi};
std::size_t constexpr one_Ti{1024ULL * one_Gi};
std::size_t constexpr one_Pi{1024ULL * one_Ti};
if (suffix == "k" || suffix == "ki" || suffix == "kb" || suffix == "kib") {
multiplier = one_Ki;
} else if (suffix == "m" || suffix == "mi" || suffix == "mb" || suffix == "mib") {
multiplier = one_Mi;
} else if (suffix == "g" || suffix == "gi" || suffix == "gb" || suffix == "gib") {
multiplier = one_Gi;
} else if (suffix == "t" || suffix == "ti" || suffix == "tb" || suffix == "tib") {
multiplier = one_Ti;
} else if (suffix == "p" || suffix == "pi" || suffix == "pb" || suffix == "pib") {
multiplier = one_Pi;
} else {
throw std::invalid_argument(
"Invalid size suffix: '" + suffix +
"' (use K/Ki/KB/KiB, M/Mi/MB/MiB, G/Gi/GB/GiB, T/Ti/TB/TiB, or P/Pi/PB/PiB)");
}

return static_cast<std::size_t>(value * multiplier);
}

bool parse_flag(std::string const& str)
{
if (str.empty()) { throw std::invalid_argument("Empty flag"); }

// Normalize to lowercase for case-insensitive comparison
auto result{str};
std::transform(
result.begin(), result.end(), result.begin(), [](unsigned char c) { return std::tolower(c); });

if (result == "true" || result == "on" || result == "yes" || result == "1") {
return true;
} else if (result == "false" || result == "off" || result == "no" || result == "0") {
return false;
} else {
throw std::invalid_argument("Invalid flag: '" + str +
"' (use true/false, on/off, yes/no, or 1/0)");
}
}

Backend parse_backend(int argc, char** argv)
{
constexpr int BACKEND = 1000;
Backend result{FILEHANDLE};
static option long_options[] = {
{"backend", required_argument, nullptr, BACKEND}, {0, 0, 0, 0}
// Sentinel to mark the end of the array. Needed by getopt_long()
};

int opt{0};
int option_index{-1};
while ((opt = getopt_long(argc, argv, "-:", long_options, &option_index)) != -1) {
switch (opt) {
case BACKEND: {
result = parse_backend(optarg);
break;
}
case ':': {
// The parsed option has missing argument
std::stringstream ss;
ss << "Missing argument for option " << argv[optind - 1] << " (-"
<< static_cast<char>(optopt) << ")";
throw std::runtime_error(ss.str());
break;
}
default: {
// Unknown option is deferred to subsequent parsing, if any
break;
}
}
}

// Reset getopt state for second pass in the future
optind = 0;

return result;
}

void Config::parse_args(int argc, char** argv)
{
enum LongOnlyOpts {
O_DIRECT = 1000,
ALIGN_BUFFER,
DROP_CACHE,
OPEN_ONCE,
};

static option long_options[] = {
{"file", required_argument, nullptr, 'f'},
{"size", required_argument, nullptr, 's'},
{"threads", required_argument, nullptr, 't'},
{"use-gpu-buffer", required_argument, nullptr, 'g'},
{"gpu-index", required_argument, nullptr, 'd'},
{"repetitions", required_argument, nullptr, 'r'},
{"o-direct", required_argument, nullptr, LongOnlyOpts::O_DIRECT},
{"align-buffer", required_argument, nullptr, LongOnlyOpts::ALIGN_BUFFER},
{"drop-cache", required_argument, nullptr, LongOnlyOpts::DROP_CACHE},
{"overwrite", required_argument, nullptr, 'w'},
{"open-once", required_argument, nullptr, LongOnlyOpts::OPEN_ONCE},
{"help", no_argument, nullptr, 'h'},
{0, 0, 0, 0} // Sentinel to mark the end of the array. Needed by getopt_long()
};

int opt{0};
int option_index{-1};

// - By default getopt_long() returns '?' to indicate errors if an option has missing argument or
// if an unknown option is encountered. The starting ':' in the optstring modifies this behavior.
// Missing argument error now causes the return value to be ':'. Unknow option still leads to '?'
// and its processing is deferred.
// - "f:" means option "-f" takes an argument
// - "c" means option "-c" does not take an argument
while ((opt = getopt_long(argc, argv, "-:f:s:t:g:d:r:w:h", long_options, &option_index)) != -1) {
switch (opt) {
case 'f': {
filepaths.push_back(optarg);
break;
}
case 's': {
num_bytes = parse_size(optarg); // Helper to parse "1G", "500M", etc.
break;
}
case 't': {
num_threads = std::stoul(optarg);
break;
}
case 'g': {
use_gpu_buffer = parse_flag(optarg);
break;
}
case 'd': {
gpu_index = std::stoi(optarg);
break;
}
case 'r': {
repetition = std::stoi(optarg);
break;
}
case 'w': {
overwrite_file = parse_flag(optarg);
break;
}
case LongOnlyOpts::O_DIRECT: {
o_direct = parse_flag(optarg);
break;
}
case LongOnlyOpts::ALIGN_BUFFER: {
align_buffer = parse_flag(optarg);
break;
}
case LongOnlyOpts::DROP_CACHE: {
drop_file_cache = parse_flag(optarg);
break;
}
case LongOnlyOpts::OPEN_ONCE: {
open_file_once = parse_flag(optarg);
break;
}
case 'h': {
print_usage(argv[0]);
std::exit(0);
break;
}
case ':': {
// The parsed option has missing argument
std::stringstream ss;
ss << "Missing argument for option " << argv[optind - 1] << " (-"
<< static_cast<char>(optopt) << ")";
throw std::runtime_error(ss.str());
break;
}
default: {
// Unknown option is deferred to subsequent parsing, if any
break;
}
}
}

// Validation
if (filepaths.empty()) { throw std::invalid_argument("--file is required"); }

// Reset getopt state for second pass in the future
optind = 0;
}

void Config::print_usage(std::string const& program_name)
{
std::cout
<< "Usage: " << program_name << " [OPTIONS]\n\n"
<< "Options:\n"
<< " -f, --file PATH File path to benchmark (required, repeatable)\n"
<< " -s, --size SIZE Number of bytes to read (default: 4G)\n"
<< " Supports suffixes: K, M, G, T, P\n"
<< " -t, --threads NUM Number of threads (default: 1)\n"
<< " -r, --repetitions NUM Number of repetitions (default: 5)\n"
<< " -g, --use-gpu-buffer BOOL Use GPU device memory (default: false)\n"
<< " -d, --gpu-index INDEX GPU device index (default: 0)\n"
<< " -w, --overwrite BOOL Overwrite existing file (default: false)\n"
<< " --o-direct BOOL Use O_DIRECT (default: true)\n"
<< " --align-buffer BOOL Use aligned buffer (default: true)\n"
<< " --drop-cache BOOL Drop page cache before each run (default: false)\n"
<< " --open-once BOOL Open file once, not per repetition (default: false)\n"
<< " -h, --help Show this help message\n";
}

void* CudaPageAlignedDeviceAllocator::allocate(std::size_t size)
{
void* buffer{};
auto const page_size = get_page_size();
auto const up_size = size + page_size;
KVIKIO_CHECK_CUDA(cudaMalloc(&buffer, up_size));
auto* aligned_buffer = detail::align_up(buffer, page_size);
return aligned_buffer;
}

void CudaPageAlignedDeviceAllocator::deallocate(void* buffer, std::size_t /*size*/) {}

CuFileHandle::CuFileHandle(std::string const& file_path,
std::string const& flags,
bool o_direct,
mode_t mode)
: _file_wrapper(file_path, flags, o_direct, mode)
{
_cufile_handle_wrapper.register_handle(_file_wrapper.fd());
}

void CuFileHandle::close()
{
_cufile_handle_wrapper.unregister_handle();
_file_wrapper.close();
}

CUfileHandle_t CuFileHandle::handle() const noexcept { return _cufile_handle_wrapper.handle(); }

} // namespace kvikio::benchmark
Loading