Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 112 additions & 21 deletions cpp/include/kvikio/defaults.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@

#include <cstddef>
#include <cstdlib>
#include <functional>
#include <initializer_list>
#include <map>
#include <optional>
#include <sstream>
#include <stdexcept>
#include <string>
Expand All @@ -35,23 +38,98 @@
*/
namespace kvikio {

namespace detail {
template <typename T>
T getenv_or(std::string_view env_var_name, T default_val)
std::optional<T> from_string(std::string const& env_val)
{
std::stringstream ss(env_val);
T converted_val;
ss >> converted_val;

if (!ss.fail()) { return converted_val; }

// An exception: for string, empty value is allowed
if constexpr (std::is_same_v<T, std::string>) { return std::optional<std::string>{""}; }

// For all other cases, return std::nullopt
return {};
}

template <typename T>
[[nodiscard]] T process_single_env_var(
std::string_view env_var_name,
T default_val,
std::function<std::optional<T>(std::string const&)> user_callback,
std::map<T, std::vector<std::string>> const& dictionary,
bool case_sensitive)
{
// Step 0: If the name does not exist, use default value
auto const* env_val = std::getenv(env_var_name.data());
if (env_val == nullptr) { return default_val; }

std::stringstream sstream(env_val);
T converted_val;
sstream >> converted_val;
// Step 1: try to convert to type T
std::optional<T> converted_val;
if (user_callback) {
converted_val = std::invoke(user_callback, env_val);
if (converted_val.has_value()) { return converted_val.value(); }
}

// Step 2: look up in the user-provided dictionary
std::string str{env_val};
if (!dictionary.empty()) {
// Convert to lowercase
if (!case_sensitive) {
// Special considerations regarding the case conversion:
// - std::tolower() is not an addressable function. Passing it to std::transform() as
// a function pointer, if the compile turns out successful, causes the program behavior
// "unspecified (possibly ill-formed)", hence the lambda. ::tolower() is addressable
// and does not have this problem, but the following item still applies.
// - To avoid UB in std::tolower() or ::tolower(), the character must be cast to unsigned
// char.
std::transform(
str.begin(), str.end(), str.begin(), [](unsigned char c) { return std::tolower(c); });
}

if constexpr (!std::is_same_v<T, std::string>) {
KVIKIO_EXPECT(!sstream.fail(),
"unknown config value " + std::string{env_var_name} + "=" + std::string{env_val},
std::invalid_argument);
// Trim whitespaces
std::stringstream trimmer;
trimmer << str;
str.clear();
trimmer >> str;

// Convert the dictionary to an easier format
// Example:
// dictionary is (v_1, {a, b}), (v_2, {d})
// then flat_dictionary is (a, v_1), (b, v_1), (d, v_2)
// and there must be no duplicate among a, b, d
std::map<std::string, T> flat_dictionary;
for (auto const& [dst, src_list] : dictionary) {
for (auto const& src : src_list) {
if (auto const it = flat_dictionary.find(src); it == flat_dictionary.end()) {
flat_dictionary[src] = dst;
} else {
KVIKIO_FAIL("Duplicate environment variable values.");
}
}
}

// Look up in the dictionary
if (auto it = flat_dictionary.find(str); it != flat_dictionary.end()) {
return flat_dictionary[str];
}
}

return converted_val;
KVIKIO_FAIL("unknown config value " + std::string{env_var_name} + "=" + str,
std::invalid_argument);

return {};
}
} // namespace detail

template <typename T>
T getenv_or(std::string_view env_var_name, T default_val)
{
return detail::process_single_env_var(
env_var_name, default_val, detail::from_string<T>, {}, false);
}

template <>
Expand Down Expand Up @@ -86,34 +164,42 @@ std::vector<int> getenv_or(std::string_view env_var_name, std::vector<int> defau
*/
template <typename T>
std::tuple<std::string_view, T, bool> getenv_or(
std::initializer_list<std::string_view> env_var_names, T default_val)
std::initializer_list<std::string_view> env_var_names,
T default_val,
std::function<std::optional<T>(std::string const&)> conversion_callback = detail::from_string<T>,
std::map<T, std::vector<std::string>> dictionary = {},
bool case_sensitive = false)
{
KVIKIO_EXPECT(env_var_names.size() > 0,
"`env_var_names` must contain at least one environment variable name.",
std::invalid_argument);
std::string_view env_name_target;
std::string_view env_val_target;
std::string_view env_val_str_target;
T env_val_target;

for (auto const& env_var_name : env_var_names) {
auto const* env_val = std::getenv(env_var_name.data());
if (env_val == nullptr) { continue; }
for (auto const& current_env_var_name : env_var_names) {
auto const* current_env_val_str = std::getenv(current_env_var_name.data());
if (current_env_val_str == nullptr) { continue; }

if (!env_name_target.empty() && env_val_target != env_val) {
auto current_env_val = detail::process_single_env_var(
current_env_var_name, default_val, conversion_callback, dictionary, case_sensitive);

if (!env_name_target.empty() && env_val_target != current_env_val) {
std::stringstream ss;
ss << "Environment variable " << env_var_name << " (" << env_val
<< ") has already been set by its alias " << env_name_target << " (" << env_val_target
ss << "Environment variable " << current_env_var_name << " (" << current_env_val_str
<< ") has already been set by its alias " << env_name_target << " (" << env_val_str_target
<< ") with a different value.";
KVIKIO_FAIL(ss.str(), std::invalid_argument);
}

env_name_target = env_var_name;
env_val_target = env_val;
env_name_target = current_env_var_name;
env_val_target = current_env_val;
env_val_str_target = current_env_val_str;
}

if (env_name_target.empty()) { return {env_name_target, default_val, false}; }

auto res = getenv_or<T>(env_name_target, default_val);
return {env_name_target, res, true};
return {env_name_target, env_val_target, true};
}

/**
Expand All @@ -130,6 +216,7 @@ class defaults {
std::size_t _http_max_attempts;
long _http_timeout;
std::vector<int> _http_status_codes;
std::size_t _mmap_task_size;

static unsigned int get_num_threads_from_env();

Expand Down Expand Up @@ -367,6 +454,10 @@ class defaults {
* @param status_codes The HTTP status codes to retry.
*/
static void set_http_status_codes(std::vector<int> status_codes);

[[nodiscard]] static std::size_t mmap_task_size();

static void set_mmap_task_size(std::size_t nbytes);
};

} // namespace kvikio
24 changes: 0 additions & 24 deletions cpp/src/compat_mode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
* limitations under the License.
*/

#include <algorithm>
#include <cassert>
#include <stdexcept>

Expand All @@ -27,29 +26,6 @@

namespace kvikio {

namespace detail {
CompatMode parse_compat_mode_str(std::string_view compat_mode_str)
{
KVIKIO_NVTX_FUNC_RANGE();
// Convert to lowercase
std::string tmp{compat_mode_str};
std::transform(
tmp.begin(), tmp.end(), tmp.begin(), [](unsigned char c) { return std::tolower(c); });

if (tmp == "on" || tmp == "true" || tmp == "yes" || tmp == "1") {
return CompatMode::ON;
} else if (tmp == "off" || tmp == "false" || tmp == "no" || tmp == "0") {
return CompatMode::OFF;
} else if (tmp == "auto") {
return CompatMode::AUTO;
} else {
KVIKIO_FAIL("Unknown compatibility mode: " + std::string{tmp}, std::invalid_argument);
}
return {};
}

} // namespace detail

CompatMode CompatModeManager::infer_compat_mode_if_auto(CompatMode compat_mode) noexcept
{
KVIKIO_NVTX_FUNC_RANGE();
Expand Down
62 changes: 22 additions & 40 deletions cpp/src/defaults.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,9 @@

#include <cstddef>
#include <cstdlib>
#include <regex>
#include <sstream>
#include <stdexcept>
#include <string>
#include <string_view>

#include <BS_thread_pool.hpp>

Expand All @@ -28,62 +27,45 @@
#include <kvikio/error.hpp>
#include <kvikio/http_status_codes.hpp>
#include <kvikio/shim/cufile.hpp>
#include <string_view>

namespace kvikio {

template <>
bool getenv_or(std::string_view env_var_name, bool default_val)
{
KVIKIO_NVTX_FUNC_RANGE();
auto const* env_val = std::getenv(env_var_name.data());
if (env_val == nullptr) { return default_val; }
try {
// Try parsing `env_var_name` as a integer
return static_cast<bool>(std::stoi(env_val));
} catch (std::invalid_argument const&) {
}
// Convert to lowercase
std::string str{env_val};
// Special considerations regarding the case conversion:
// - std::tolower() is not an addressable function. Passing it to std::transform() as
// a function pointer, if the compile turns out successful, causes the program behavior
// "unspecified (possibly ill-formed)", hence the lambda. ::tolower() is addressable
// and does not have this problem, but the following item still applies.
// - To avoid UB in std::tolower() or ::tolower(), the character must be cast to unsigned char.
std::transform(
str.begin(), str.end(), str.begin(), [](unsigned char c) { return std::tolower(c); });
// Trim whitespaces
std::stringstream trimmer;
trimmer << str;
str.clear();
trimmer >> str;
// Match value
if (str == "true" || str == "on" || str == "yes") { return true; }
if (str == "false" || str == "off" || str == "no") { return false; }
KVIKIO_FAIL("unknown config value " + std::string{env_var_name} + "=" + std::string{env_val},
std::invalid_argument);
return {};
return detail::process_single_env_var<bool>(
env_var_name,
default_val,
{},
{{true, {"true", "on", "yes", "1"}}, {false, {"false", "off", "no", "0"}}},
false);
}

template <>
CompatMode getenv_or(std::string_view env_var_name, CompatMode default_val)
{
KVIKIO_NVTX_FUNC_RANGE();
auto* env_val = std::getenv(env_var_name.data());
if (env_val == nullptr) { return default_val; }
return detail::parse_compat_mode_str(env_val);
return detail::process_single_env_var<CompatMode>(env_var_name,
default_val,
{},
{{CompatMode::ON, {"true", "on", "yes", "1"}},
{CompatMode::OFF, {"false", "off", "no", "0"}},
{CompatMode::AUTO, {"auto"}}},
false);
}

template <>
std::vector<int> getenv_or(std::string_view env_var_name, std::vector<int> default_val)
{
KVIKIO_NVTX_FUNC_RANGE();
auto* const env_val = std::getenv(env_var_name.data());
if (env_val == nullptr) { return std::move(default_val); }
std::string const int_str(env_val);
if (int_str.empty()) { return std::move(default_val); }

return detail::parse_http_status_codes(env_var_name, int_str);
auto callback = [=](std::string const& env_val) -> std::optional<std::vector<int>> {
std::string const int_str(env_val);
if (int_str.empty()) { return default_val; }
return detail::parse_http_status_codes(env_var_name, int_str);
};
return detail::process_single_env_var<std::vector<int>>(
env_var_name, default_val, callback, {}, false);
}

unsigned int defaults::get_num_threads_from_env()
Expand Down
Loading