diff --git a/cpp/include/kvikio/defaults.hpp b/cpp/include/kvikio/defaults.hpp index 200f248d79..d214649da7 100644 --- a/cpp/include/kvikio/defaults.hpp +++ b/cpp/include/kvikio/defaults.hpp @@ -18,7 +18,10 @@ #include #include +#include #include +#include +#include #include #include #include @@ -35,23 +38,98 @@ */ namespace kvikio { +namespace detail { template -T getenv_or(std::string_view env_var_name, T default_val) +std::optional from_string(std::string const& env_val) { + std::stringstream ss(env_val); + T converted_val; + ss >> converted_val; + + if (!ss.fail()) { return converted_val; } + + // An exception: for string, empty value is allowed + if constexpr (std::is_same_v) { return std::optional{""}; } + + // For all other cases, return std::nullopt + return {}; +} + +template +[[nodiscard]] T process_single_env_var( + std::string_view env_var_name, + T default_val, + std::function(std::string const&)> user_callback, + std::map> const& dictionary, + bool case_sensitive) +{ + // Step 0: If the name does not exist, use default value auto const* env_val = std::getenv(env_var_name.data()); if (env_val == nullptr) { return default_val; } - std::stringstream sstream(env_val); - T converted_val; - sstream >> converted_val; + // Step 1: try to convert to type T + std::optional converted_val; + if (user_callback) { + converted_val = std::invoke(user_callback, env_val); + if (converted_val.has_value()) { return converted_val.value(); } + } + + // Step 2: look up in the user-provided dictionary + std::string str{env_val}; + if (!dictionary.empty()) { + // Convert to lowercase + if (!case_sensitive) { + // Special considerations regarding the case conversion: + // - std::tolower() is not an addressable function. Passing it to std::transform() as + // a function pointer, if the compile turns out successful, causes the program behavior + // "unspecified (possibly ill-formed)", hence the lambda. ::tolower() is addressable + // and does not have this problem, but the following item still applies. + // - To avoid UB in std::tolower() or ::tolower(), the character must be cast to unsigned + // char. + std::transform( + str.begin(), str.end(), str.begin(), [](unsigned char c) { return std::tolower(c); }); + } - if constexpr (!std::is_same_v) { - KVIKIO_EXPECT(!sstream.fail(), - "unknown config value " + std::string{env_var_name} + "=" + std::string{env_val}, - std::invalid_argument); + // Trim whitespaces + std::stringstream trimmer; + trimmer << str; + str.clear(); + trimmer >> str; + + // Convert the dictionary to an easier format + // Example: + // dictionary is (v_1, {a, b}), (v_2, {d}) + // then flat_dictionary is (a, v_1), (b, v_1), (d, v_2) + // and there must be no duplicate among a, b, d + std::map flat_dictionary; + for (auto const& [dst, src_list] : dictionary) { + for (auto const& src : src_list) { + if (auto const it = flat_dictionary.find(src); it == flat_dictionary.end()) { + flat_dictionary[src] = dst; + } else { + KVIKIO_FAIL("Duplicate environment variable values."); + } + } + } + + // Look up in the dictionary + if (auto it = flat_dictionary.find(str); it != flat_dictionary.end()) { + return flat_dictionary[str]; + } } - return converted_val; + KVIKIO_FAIL("unknown config value " + std::string{env_var_name} + "=" + str, + std::invalid_argument); + + return {}; +} +} // namespace detail + +template +T getenv_or(std::string_view env_var_name, T default_val) +{ + return detail::process_single_env_var( + env_var_name, default_val, detail::from_string, {}, false); } template <> @@ -86,34 +164,42 @@ std::vector getenv_or(std::string_view env_var_name, std::vector defau */ template std::tuple getenv_or( - std::initializer_list env_var_names, T default_val) + std::initializer_list env_var_names, + T default_val, + std::function(std::string const&)> conversion_callback = detail::from_string, + std::map> dictionary = {}, + bool case_sensitive = false) { KVIKIO_EXPECT(env_var_names.size() > 0, "`env_var_names` must contain at least one environment variable name.", std::invalid_argument); std::string_view env_name_target; - std::string_view env_val_target; + std::string_view env_val_str_target; + T env_val_target; - for (auto const& env_var_name : env_var_names) { - auto const* env_val = std::getenv(env_var_name.data()); - if (env_val == nullptr) { continue; } + for (auto const& current_env_var_name : env_var_names) { + auto const* current_env_val_str = std::getenv(current_env_var_name.data()); + if (current_env_val_str == nullptr) { continue; } - if (!env_name_target.empty() && env_val_target != env_val) { + auto current_env_val = detail::process_single_env_var( + current_env_var_name, default_val, conversion_callback, dictionary, case_sensitive); + + if (!env_name_target.empty() && env_val_target != current_env_val) { std::stringstream ss; - ss << "Environment variable " << env_var_name << " (" << env_val - << ") has already been set by its alias " << env_name_target << " (" << env_val_target + ss << "Environment variable " << current_env_var_name << " (" << current_env_val_str + << ") has already been set by its alias " << env_name_target << " (" << env_val_str_target << ") with a different value."; KVIKIO_FAIL(ss.str(), std::invalid_argument); } - env_name_target = env_var_name; - env_val_target = env_val; + env_name_target = current_env_var_name; + env_val_target = current_env_val; + env_val_str_target = current_env_val_str; } if (env_name_target.empty()) { return {env_name_target, default_val, false}; } - auto res = getenv_or(env_name_target, default_val); - return {env_name_target, res, true}; + return {env_name_target, env_val_target, true}; } /** @@ -130,6 +216,7 @@ class defaults { std::size_t _http_max_attempts; long _http_timeout; std::vector _http_status_codes; + std::size_t _mmap_task_size; static unsigned int get_num_threads_from_env(); @@ -367,6 +454,10 @@ class defaults { * @param status_codes The HTTP status codes to retry. */ static void set_http_status_codes(std::vector status_codes); + + [[nodiscard]] static std::size_t mmap_task_size(); + + static void set_mmap_task_size(std::size_t nbytes); }; } // namespace kvikio diff --git a/cpp/src/compat_mode.cpp b/cpp/src/compat_mode.cpp index 78a96c66be..016c969796 100644 --- a/cpp/src/compat_mode.cpp +++ b/cpp/src/compat_mode.cpp @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include @@ -27,29 +26,6 @@ namespace kvikio { -namespace detail { -CompatMode parse_compat_mode_str(std::string_view compat_mode_str) -{ - KVIKIO_NVTX_FUNC_RANGE(); - // Convert to lowercase - std::string tmp{compat_mode_str}; - std::transform( - tmp.begin(), tmp.end(), tmp.begin(), [](unsigned char c) { return std::tolower(c); }); - - if (tmp == "on" || tmp == "true" || tmp == "yes" || tmp == "1") { - return CompatMode::ON; - } else if (tmp == "off" || tmp == "false" || tmp == "no" || tmp == "0") { - return CompatMode::OFF; - } else if (tmp == "auto") { - return CompatMode::AUTO; - } else { - KVIKIO_FAIL("Unknown compatibility mode: " + std::string{tmp}, std::invalid_argument); - } - return {}; -} - -} // namespace detail - CompatMode CompatModeManager::infer_compat_mode_if_auto(CompatMode compat_mode) noexcept { KVIKIO_NVTX_FUNC_RANGE(); diff --git a/cpp/src/defaults.cpp b/cpp/src/defaults.cpp index 24a4de0898..4e585072bf 100644 --- a/cpp/src/defaults.cpp +++ b/cpp/src/defaults.cpp @@ -16,10 +16,9 @@ #include #include -#include -#include #include #include +#include #include @@ -28,62 +27,45 @@ #include #include #include -#include namespace kvikio { + template <> bool getenv_or(std::string_view env_var_name, bool default_val) { KVIKIO_NVTX_FUNC_RANGE(); - auto const* env_val = std::getenv(env_var_name.data()); - if (env_val == nullptr) { return default_val; } - try { - // Try parsing `env_var_name` as a integer - return static_cast(std::stoi(env_val)); - } catch (std::invalid_argument const&) { - } - // Convert to lowercase - std::string str{env_val}; - // Special considerations regarding the case conversion: - // - std::tolower() is not an addressable function. Passing it to std::transform() as - // a function pointer, if the compile turns out successful, causes the program behavior - // "unspecified (possibly ill-formed)", hence the lambda. ::tolower() is addressable - // and does not have this problem, but the following item still applies. - // - To avoid UB in std::tolower() or ::tolower(), the character must be cast to unsigned char. - std::transform( - str.begin(), str.end(), str.begin(), [](unsigned char c) { return std::tolower(c); }); - // Trim whitespaces - std::stringstream trimmer; - trimmer << str; - str.clear(); - trimmer >> str; - // Match value - if (str == "true" || str == "on" || str == "yes") { return true; } - if (str == "false" || str == "off" || str == "no") { return false; } - KVIKIO_FAIL("unknown config value " + std::string{env_var_name} + "=" + std::string{env_val}, - std::invalid_argument); - return {}; + return detail::process_single_env_var( + env_var_name, + default_val, + {}, + {{true, {"true", "on", "yes", "1"}}, {false, {"false", "off", "no", "0"}}}, + false); } template <> CompatMode getenv_or(std::string_view env_var_name, CompatMode default_val) { KVIKIO_NVTX_FUNC_RANGE(); - auto* env_val = std::getenv(env_var_name.data()); - if (env_val == nullptr) { return default_val; } - return detail::parse_compat_mode_str(env_val); + return detail::process_single_env_var(env_var_name, + default_val, + {}, + {{CompatMode::ON, {"true", "on", "yes", "1"}}, + {CompatMode::OFF, {"false", "off", "no", "0"}}, + {CompatMode::AUTO, {"auto"}}}, + false); } template <> std::vector getenv_or(std::string_view env_var_name, std::vector default_val) { KVIKIO_NVTX_FUNC_RANGE(); - auto* const env_val = std::getenv(env_var_name.data()); - if (env_val == nullptr) { return std::move(default_val); } - std::string const int_str(env_val); - if (int_str.empty()) { return std::move(default_val); } - - return detail::parse_http_status_codes(env_var_name, int_str); + auto callback = [=](std::string const& env_val) -> std::optional> { + std::string const int_str(env_val); + if (int_str.empty()) { return default_val; } + return detail::parse_http_status_codes(env_var_name, int_str); + }; + return detail::process_single_env_var>( + env_var_name, default_val, callback, {}, false); } unsigned int defaults::get_num_threads_from_env() diff --git a/cpp/tests/test_defaults.cpp b/cpp/tests/test_defaults.cpp index 9c283d658a..14449817cf 100644 --- a/cpp/tests/test_defaults.cpp +++ b/cpp/tests/test_defaults.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include @@ -27,38 +28,38 @@ using ::testing::HasSubstr; using ::testing::ThrowsMessage; -TEST(DefaultsTest, parse_compat_mode_str) -{ - { - std::vector inputs{ - "ON", "on", "On", "TRUE", "true", "True", "YES", "yes", "Yes", "1"}; - for (auto const& input : inputs) { - EXPECT_EQ(kvikio::detail::parse_compat_mode_str(input), kvikio::CompatMode::ON); - } - } +// TEST(DefaultsTest, parse_compat_mode_str) +// { +// { +// std::vector inputs{ +// "ON", "on", "On", "TRUE", "true", "True", "YES", "yes", "Yes", "1"}; +// for (auto const& input : inputs) { +// EXPECT_EQ(kvikio::detail::parse_compat_mode_str(input), kvikio::CompatMode::ON); +// } +// } - { - std::vector inputs{ - "OFF", "off", "oFf", "FALSE", "false", "False", "NO", "no", "No", "0"}; - for (auto const& input : inputs) { - EXPECT_EQ(kvikio::detail::parse_compat_mode_str(input), kvikio::CompatMode::OFF); - } - } +// { +// std::vector inputs{ +// "OFF", "off", "oFf", "FALSE", "false", "False", "NO", "no", "No", "0"}; +// for (auto const& input : inputs) { +// EXPECT_EQ(kvikio::detail::parse_compat_mode_str(input), kvikio::CompatMode::OFF); +// } +// } - { - std::vector inputs{"AUTO", "auto", "aUtO"}; - for (auto const& input : inputs) { - EXPECT_EQ(kvikio::detail::parse_compat_mode_str(input), kvikio::CompatMode::AUTO); - } - } +// { +// std::vector inputs{"AUTO", "auto", "aUtO"}; +// for (auto const& input : inputs) { +// EXPECT_EQ(kvikio::detail::parse_compat_mode_str(input), kvikio::CompatMode::AUTO); +// } +// } - { - std::vector inputs{"", "invalidOption", "11", "*&^Yes"}; - for (auto const& input : inputs) { - EXPECT_THROW(kvikio::detail::parse_compat_mode_str(input), std::invalid_argument); - } - } -} +// { +// std::vector inputs{"", "invalidOption", "11", "*&^Yes"}; +// for (auto const& input : inputs) { +// EXPECT_THROW(kvikio::detail::parse_compat_mode_str(input), std::invalid_argument); +// } +// } +// } TEST(DefaultsTest, parse_http_status_codes) { @@ -104,7 +105,7 @@ TEST(DefaultsTest, alias_for_getenv_or) {{"KVIKIO_TEST_ALIAS_1", ""}, {"KVIKIO_TEST_ALIAS_2", ""}}}; EXPECT_THAT( [=] { kvikio::getenv_or({"KVIKIO_TEST_ALIAS_1", "KVIKIO_TEST_ALIAS_2"}, 123); }, - ThrowsMessage(HasSubstr("unknown config value KVIKIO_TEST_ALIAS_2="))); + ThrowsMessage(HasSubstr("unknown config value KVIKIO_TEST_ALIAS_1="))); } // String env var has an empty value @@ -188,34 +189,42 @@ TEST(DefaultsTest, alias_for_getenv_or) } // Special type: bool + std::map> dictionary_bool{{true, {"true", "on", "yes", "1"}}, + {false, {"false", "off", "no", "0"}}}; { kvikio::test::EnvVarContext env_var_ctx{{{"KVIKIO_TEST_ALIAS", "yes"}}}; - auto const [env_var_name, result, has_found] = kvikio::getenv_or({"KVIKIO_TEST_ALIAS"}, false); + auto const [env_var_name, result, has_found] = + kvikio::getenv_or({"KVIKIO_TEST_ALIAS"}, false, {}, dictionary_bool); EXPECT_EQ(env_var_name, std::string_view{"KVIKIO_TEST_ALIAS"}); EXPECT_TRUE(result); EXPECT_TRUE(has_found); } { kvikio::test::EnvVarContext env_var_ctx{{{"KVIKIO_TEST_ALIAS", "OFF"}}}; - auto const [env_var_name, result, has_found] = kvikio::getenv_or({"KVIKIO_TEST_ALIAS"}, false); + auto const [env_var_name, result, has_found] = + kvikio::getenv_or({"KVIKIO_TEST_ALIAS"}, false, {}, dictionary_bool); EXPECT_EQ(env_var_name, std::string_view{"KVIKIO_TEST_ALIAS"}); EXPECT_FALSE(result); EXPECT_TRUE(has_found); } // Special type: CompatMode + std::map> dictionary_compat_mode{ + {kvikio::CompatMode::ON, {"true", "on", "yes", "1"}}, + {kvikio::CompatMode::OFF, {"false", "off", "no", "0"}}, + {kvikio::CompatMode::AUTO, {"auto"}}}; { kvikio::test::EnvVarContext env_var_ctx{{{"KVIKIO_TEST_ALIAS", "yes"}}}; - auto const [env_var_name, result, has_found] = - kvikio::getenv_or({"KVIKIO_TEST_ALIAS"}, kvikio::CompatMode::AUTO); + auto const [env_var_name, result, has_found] = kvikio::getenv_or( + {"KVIKIO_TEST_ALIAS"}, kvikio::CompatMode::AUTO, {}, dictionary_compat_mode); EXPECT_EQ(env_var_name, std::string_view{"KVIKIO_TEST_ALIAS"}); EXPECT_EQ(result, kvikio::CompatMode::ON); EXPECT_TRUE(has_found); } { kvikio::test::EnvVarContext env_var_ctx{{{"KVIKIO_TEST_ALIAS", "FALSE"}}}; - auto const [env_var_name, result, has_found] = - kvikio::getenv_or({"KVIKIO_TEST_ALIAS"}, kvikio::CompatMode::AUTO); + auto const [env_var_name, result, has_found] = kvikio::getenv_or( + {"KVIKIO_TEST_ALIAS"}, kvikio::CompatMode::AUTO, {}, dictionary_compat_mode); EXPECT_EQ(env_var_name, std::string_view{"KVIKIO_TEST_ALIAS"}); EXPECT_EQ(result, kvikio::CompatMode::OFF); EXPECT_TRUE(has_found); @@ -223,7 +232,7 @@ TEST(DefaultsTest, alias_for_getenv_or) { kvikio::test::EnvVarContext env_var_ctx{{{"KVIKIO_TEST_ALIAS", "aUtO"}}}; auto const [env_var_name, result, has_found] = - kvikio::getenv_or({"KVIKIO_TEST_ALIAS"}, kvikio::CompatMode::ON); + kvikio::getenv_or({"KVIKIO_TEST_ALIAS"}, kvikio::CompatMode::ON, {}, dictionary_compat_mode); EXPECT_EQ(env_var_name, std::string_view{"KVIKIO_TEST_ALIAS"}); EXPECT_EQ(result, kvikio::CompatMode::AUTO); EXPECT_TRUE(has_found); @@ -231,9 +240,15 @@ TEST(DefaultsTest, alias_for_getenv_or) // Special type: std::vector { + std::vector default_val{111, 112, 113}; + auto callback = [=](std::string const& env_val) -> std::optional> { + std::string const int_str(env_val); + if (int_str.empty()) { return default_val; } + return kvikio::detail::parse_http_status_codes("KVIKIO_TEST_ALIAS", int_str); + }; kvikio::test::EnvVarContext env_var_ctx{{{"KVIKIO_TEST_ALIAS", "109, 108, 107"}}}; auto const [env_var_name, result, has_found] = - kvikio::getenv_or({"KVIKIO_TEST_ALIAS"}, std::vector{111, 112, 113}); + kvikio::getenv_or>({"KVIKIO_TEST_ALIAS"}, default_val, callback, {}); EXPECT_EQ(env_var_name, std::string_view{"KVIKIO_TEST_ALIAS"}); std::vector expected{109, 108, 107}; EXPECT_EQ(result, expected);