diff --git a/dbms/src/Common/getNumberOfLogicalCPUCores.cpp b/dbms/src/Common/getNumberOfLogicalCPUCores.cpp new file mode 100644 index 00000000000..73d8f6f7d43 --- /dev/null +++ b/dbms/src/Common/getNumberOfLogicalCPUCores.cpp @@ -0,0 +1,74 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +namespace CPUCores +{ +UInt16 number_of_logical_cpu_cores = std::thread::hardware_concurrency(); +UInt16 number_of_physical_cpu_cores = std::thread::hardware_concurrency() / 2; +} // namespace CPUCores + + +UInt16 getNumberOfLogicalCPUCores() +{ + return CPUCores::number_of_logical_cpu_cores; +} + +UInt16 getNumberOfPhysicalCPUCores() +{ + return CPUCores::number_of_physical_cpu_cores; +} + +// We should call this function before Context has been created, +// which will call `getNumberOfLogicalCPUCores`, or we can not +// set cpu cores any more. +void setNumberOfLogicalCPUCores(UInt16 number_of_logical_cpu_cores_) +{ + CPUCores::number_of_logical_cpu_cores = number_of_logical_cpu_cores_; +} + +void computeAndSetNumberOfPhysicalCPUCores(UInt16 number_of_logical_cpu_cores_, UInt16 number_of_hardware_physical_cores) +{ + // First of all, we need to take consideration of two situation: + // 1. tiflash on physical machine. + // In old codes, tiflash needs to set max_threads which is equal to + // physical cpu cores, so we need to ensure this behavior is not broken. + // 2. tiflash on virtual environment. + // In virtual environment, when setting max_threads, we can't directly + // get physical cpu cores to set this variable because only host machine's + // physical cpu core can be reached. So, number of physical cpus cores can + // only be assigned by calculated with logical cpu cores. + // + // - `number_of_logical_cpu_cores_` which represents how many logical cpu cores a tiflash could use(no matter in physical or virtual environment) is assigned from ServerInfo. + // - `hardware_logical_cpu_cores` represents how many logical cpu cores the host physical machine has. + // - `number_of_hardware_physical_cores` represents how many physical cpu cores the host physical machine has. + // - `(hardware_logical_cpu_cores / number_of_hardware_physical_cores)` means how many logical cpu core a physical cpu core has. + // - `number_of_logical_cpu_cores_ / (hardware_logical_cpu_cores / number_of_hardware_physical_cores)` means how many physical cpu cores the tiflash process could use. (Actually, it's needless to get physical cpu cores in virtual environment, but we must ensure the behavior `1` is not broken) + auto hardware_logical_cpu_cores = std::thread::hardware_concurrency(); + UInt16 physical_cpu_cores = number_of_logical_cpu_cores_ / (hardware_logical_cpu_cores / number_of_hardware_physical_cores); + CPUCores::number_of_physical_cpu_cores = physical_cpu_cores > 0 ? physical_cpu_cores : 1; + auto log = DB::Logger::get("CPUCores"); + LOG_FMT_INFO( + log, + "logical cpu cores: {}, hardware logical cpu cores: {}, hardware physical cpu cores: {}, physical cpu cores: {}, number_of_physical_cpu_cores: {}", + number_of_logical_cpu_cores_, + hardware_logical_cpu_cores, + number_of_hardware_physical_cores, + physical_cpu_cores, + CPUCores::number_of_physical_cpu_cores); +} diff --git a/dbms/src/Common/getNumberOfPhysicalCPUCores.h b/dbms/src/Common/getNumberOfLogicalCPUCores.h similarity index 56% rename from dbms/src/Common/getNumberOfPhysicalCPUCores.h rename to dbms/src/Common/getNumberOfLogicalCPUCores.h index 6f7eaef4bb4..e6b0f5b84ca 100644 --- a/dbms/src/Common/getNumberOfPhysicalCPUCores.h +++ b/dbms/src/Common/getNumberOfLogicalCPUCores.h @@ -14,5 +14,16 @@ #pragma once -/// Get number of CPU cores without hyper-threading. -unsigned getNumberOfPhysicalCPUCores(); +#include + +#include + +UInt16 getNumberOfLogicalCPUCores(); +UInt16 getNumberOfPhysicalCPUCores(); + +// We should call this function before Context has been created, +// which will call `getNumberOfLogicalCPUCores`, or we can not +// set cpu cores any more. +void setNumberOfLogicalCPUCores(UInt16 number_of_logical_cpu_cores_); + +void computeAndSetNumberOfPhysicalCPUCores(UInt16 number_of_logical_cpu_cores, UInt16 number_of_hardware_physical_cores); diff --git a/dbms/src/Common/getNumberOfPhysicalCPUCores.cpp b/dbms/src/Common/getNumberOfPhysicalCPUCores.cpp deleted file mode 100644 index de98f4faa10..00000000000 --- a/dbms/src/Common/getNumberOfPhysicalCPUCores.cpp +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#include - -#if defined(__x86_64__) -#include -#endif - -namespace DB -{ -namespace ErrorCodes -{ -extern const int CPUID_ERROR; -} -} // namespace DB - -unsigned getNumberOfPhysicalCPUCores() -{ - unsigned res = 0; -#if defined(__x86_64__) - - cpu_raw_data_t raw_data; - cpu_id_t data; - if (0 == cpuid_get_raw_data(&raw_data) && 0 == cpu_identify(&raw_data, &data) && data.num_logical_cpus != 0) - { - res = data.num_cores * data.total_logical_cpus / data.num_logical_cpus; - } - - /// Also, libcpuid gives strange result on Google Compute Engine VMs. - /// Example: - /// num_cores = 12, /// number of physical cores on current CPU socket - /// total_logical_cpus = 1, /// total number of logical cores on all sockets - /// num_logical_cpus = 24. /// number of logical cores on current CPU socket - /// It means two-way hyper-threading (24 / 12), but contradictory, 'total_logical_cpus' == 1. - if (res != 0) - { - return res; - } - // else fallback -#endif - - /// As a fallback (also for non-x86 architectures) assume there are no hyper-threading on the system. - /// (Actually, only Aarch64 is supported). - res = std::thread::hardware_concurrency(); - if (res == 0) - { - throw DB::Exception("Cannot Identify CPU: Unsupported processor", DB::ErrorCodes::CPUID_ERROR); - } - return res; -} diff --git a/dbms/src/Flash/FlashService.cpp b/dbms/src/Flash/FlashService.cpp index 422c6b47cfa..63c75e126fc 100644 --- a/dbms/src/Flash/FlashService.cpp +++ b/dbms/src/Flash/FlashService.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -55,7 +56,7 @@ FlashService::FlashService(IServer & server_) auto settings = server_.context().getSettingsRef(); enable_local_tunnel = settings.enable_local_tunnel; enable_async_grpc_client = settings.enable_async_grpc_client; - const size_t default_size = 2 * getNumberOfPhysicalCPUCores(); + const size_t default_size = getNumberOfLogicalCPUCores(); auto cop_pool_size = static_cast(settings.cop_pool_size); cop_pool_size = cop_pool_size ? cop_pool_size : default_size; diff --git a/dbms/src/Flash/Mpp/MinTSOScheduler.cpp b/dbms/src/Flash/Mpp/MinTSOScheduler.cpp index 940aca349eb..7eb16b2c32c 100644 --- a/dbms/src/Flash/Mpp/MinTSOScheduler.cpp +++ b/dbms/src/Flash/Mpp/MinTSOScheduler.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include @@ -28,8 +29,12 @@ MinTSOScheduler::MinTSOScheduler(UInt64 soft_limit, UInt64 hard_limit) , estimated_thread_usage(0) , log(&Poco::Logger::get("MinTSOScheduler")) { - auto cores = getNumberOfPhysicalCPUCores(); - active_set_soft_limit = (cores + 2) / 2; /// at least 1 + auto cores = static_cast(getNumberOfLogicalCPUCores() / 2); + if (active_set_soft_limit == 0 || active_set_soft_limit > 10 * cores) + { + /// set active_set_soft_limit to a reasonable value + active_set_soft_limit = (cores + 2) / 2; /// at least 1 + } if (isDisabled()) { LOG_FMT_INFO(log, "MinTSOScheduler is disabled!"); diff --git a/dbms/src/Interpreters/SettingsCommon.h b/dbms/src/Interpreters/SettingsCommon.h index cdc7b1b6fbd..4510159da57 100644 --- a/dbms/src/Interpreters/SettingsCommon.h +++ b/dbms/src/Interpreters/SettingsCommon.h @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include #include @@ -26,7 +26,6 @@ #include #include - namespace DB { namespace ErrorCodes @@ -166,18 +165,12 @@ struct SettingMaxThreads is_auto = true; } - UInt64 getAutoValue() const + static UInt64 getAutoValue() { - static auto res = getAutoValueImpl(); + static auto res = getNumberOfLogicalCPUCores(); return res; } - /// Executed once for all time. Executed from one thread. - UInt64 getAutoValueImpl() const - { - return getNumberOfPhysicalCPUCores(); - } - UInt64 get() const { return value; diff --git a/dbms/src/Server/CMakeLists.txt b/dbms/src/Server/CMakeLists.txt index 6c3d289dea6..7543b60d1af 100644 --- a/dbms/src/Server/CMakeLists.txt +++ b/dbms/src/Server/CMakeLists.txt @@ -32,6 +32,7 @@ add_library (clickhouse-server-lib NotFoundHandler.cpp PingRequestHandler.cpp RootRequestHandler.cpp + ServerInfo.cpp Server.cpp StatusFile.cpp TCPHandler.cpp diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index a71830ef390..685382d94bb 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include #include @@ -1049,7 +1049,26 @@ int Server::main(const std::vector & /*args*/) LOG_FMT_INFO(log, "tiflash proxy thread is joined"); }); - CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::get()); + /// get CPU/memory/disk info of this server + if (tiflash_instance_wrap.proxy_helper) + { + diagnosticspb::ServerInfoRequest request; + request.set_tp(static_cast(1)); + diagnosticspb::ServerInfoResponse response; + std::string req = request.SerializeAsString(); + auto * helper = tiflash_instance_wrap.proxy_helper; + helper->fn_server_info(helper->proxy_ptr, strIntoView(&req), &response); + server_info.parseSysInfo(response); + setNumberOfLogicalCPUCores(server_info.cpu_info.logical_cores); + computeAndSetNumberOfPhysicalCPUCores(server_info.cpu_info.logical_cores, server_info.cpu_info.physical_cores); + LOG_FMT_INFO(log, "ServerInfo: {}", server_info.debugString()); + } + else + { + setNumberOfLogicalCPUCores(std::thread::hardware_concurrency()); + computeAndSetNumberOfPhysicalCPUCores(std::thread::hardware_concurrency(), std::thread::hardware_concurrency() / 2); + LOG_FMT_INFO(log, "TiFlashRaftProxyHelper is null, failed to get server info"); + } // print necessary grpc log. grpc_log = &Poco::Logger::get("grpc"); diff --git a/dbms/src/Server/Server.h b/dbms/src/Server/Server.h index 278349f2aa4..3710a3410e1 100644 --- a/dbms/src/Server/Server.h +++ b/dbms/src/Server/Server.h @@ -15,8 +15,9 @@ #pragma once #include +#include +#include -#include "IServer.h" /** Server provides three interfaces: * 1. HTTP - simple interface for any applications. @@ -70,6 +71,8 @@ class Server : public BaseDaemon TiFlashSecurityConfig security_config; + ServerInfo server_info; + class FlashGrpcServerHolder; class TcpHttpServersHolder; }; diff --git a/dbms/src/Server/ServerInfo.cpp b/dbms/src/Server/ServerInfo.cpp new file mode 100644 index 00000000000..47dce5a413d --- /dev/null +++ b/dbms/src/Server/ServerInfo.cpp @@ -0,0 +1,199 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include + +namespace DB +{ +using diagnosticspb::ServerInfoItem; +using diagnosticspb::ServerInfoResponse; + +void ServerInfo::parseCPUInfo(const diagnosticspb::ServerInfoItem & cpu_info_item) +{ + for (const auto & pair : cpu_info_item.pairs()) + { + const auto & key = pair.key(); + if (key == "cpu-logical-cores") + { + cpu_info.logical_cores = static_cast(std::stoi(pair.value())); + } + else if (key == "cpu-physical-cores") + { + cpu_info.physical_cores = static_cast(std::stoi(pair.value())); + } + else if (key == "cpu-frequency") + { + cpu_info.frequency = pair.value(); + } + else if (key == "l1-cache-size") + { + cpu_info.l1_cache_size = static_cast(std::stoull(pair.value())); + } + else if (key == "l1-cache-line-size") + { + cpu_info.l1_cache_line_size = static_cast(std::stoi(pair.value())); + } + else if (key == "l2-cache-size") + { + cpu_info.l2_cache_size = static_cast(std::stoull(pair.value())); + } + else if (key == "l2-cache-line-size") + { + cpu_info.l2_cache_line_size = static_cast(std::stoi(pair.value())); + } + else if (key == "l3-cache-size") + { + cpu_info.l3_cache_size = static_cast(std::stoull(pair.value())); + } + else if (key == "l3-cache-line-size") + { + cpu_info.l3_cache_line_size = static_cast(std::stoi(pair.value())); + } + else if (key == "cpu-arch") + { + cpu_info.arch = pair.value(); + } + } +} + +void ServerInfo::parseDiskInfo(const diagnosticspb::ServerInfoItem & disk_info_item) +{ + Disk disk; + disk.name = disk_info_item.name(); + for (const auto & pair : disk_info_item.pairs()) + { + const auto & key = pair.key(); + if (key == "type") + { + if (pair.value() == "HDD") + { + disk.disk_type = Disk::DiskType::HDD; + } + else if (pair.value() == "SSD") + { + disk.disk_type = Disk::DiskType::SSD; + } + else + { + disk.disk_type = Disk::DiskType::UNKNOWN; + } + } + else if (key == "total") + { + disk.total_space = static_cast(std::stoull(pair.value())); + } + else if (key == "free") + { + disk.free_space = static_cast(std::stoull(pair.value())); + } + else if (key == "path") + { + disk.mount_point = pair.value(); + } + else if (key == "fstype") + { + disk.fs_type = pair.value(); + } + } + disk_infos.push_back(disk); +} + +void ServerInfo::parseMemoryInfo(const diagnosticspb::ServerInfoItem & memory_info_item) +{ + for (const auto & pair : memory_info_item.pairs()) + { + if (pair.key() == "capacity") + { + memory_info.capacity = std::stoull(pair.value()); + } + } +} + +void ServerInfo::parseSysInfo(const diagnosticspb::ServerInfoResponse & sys_info_response) +{ + for (const auto & item : sys_info_response.items()) + { + const auto & tp = item.tp(); + if (tp == "cpu") + { + parseCPUInfo(item); + } + else if (tp == "disk") + { + parseDiskInfo(item); + } + else if (tp == "memory") + { + parseMemoryInfo(item); + } + } +} + +String ServerInfo::debugString() const +{ + FmtBuffer fmt_buf; + // append cpu info + fmt_buf.fmtAppend("CPU: \n" + " logical cores: {}\n" + " physical cores: {}\n" + " frequency: {}\n" + " l1 cache size: {}\n" + " l1 cache line size: {}\n" + " l2 cache size: {}\n" + " l2 cache line size: {}\n" + " l3 cache size: {}\n" + " l3 cache line size: {}\n" + " arch: {}\n", + cpu_info.logical_cores, + cpu_info.physical_cores, + cpu_info.frequency, + cpu_info.l1_cache_size, + cpu_info.l1_cache_line_size, + cpu_info.l2_cache_size, + cpu_info.l2_cache_line_size, + cpu_info.l3_cache_size, + cpu_info.l3_cache_line_size, + cpu_info.arch); + // append disk info + { + const static String disk_type_str[] = {"UNKNOWN", "HDD", "SSD"}; + for (const auto & disk_info : disk_infos) + { + fmt_buf.fmtAppend("Disk: \n" + " name: {}\n" + " type: {}\n" + " total space: {}\n" + " free space: {}\n" + " mount point: {}\n" + " fstype: {}\n", + disk_info.name, + disk_type_str[static_cast(disk_info.disk_type)], + disk_info.total_space, + disk_info.free_space, + disk_info.mount_point, + disk_info.fs_type); + } + } + // append memory info + fmt_buf.fmtAppend("Memory: \n" + " capacity: {}\n", + memory_info.capacity); + + return fmt_buf.toString(); +} + +} // namespace DB diff --git a/dbms/src/Server/ServerInfo.h b/dbms/src/Server/ServerInfo.h new file mode 100644 index 00000000000..d9731e37bb8 --- /dev/null +++ b/dbms/src/Server/ServerInfo.h @@ -0,0 +1,98 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include + +#include +#include + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#ifdef __clang__ +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#endif +#include +#pragma GCC diagnostic pop + +namespace DB +{ +class ServerInfo +{ +public: + struct CPUInfo + { + /// number of logical CPU cores + UInt16 logical_cores = std::thread::hardware_concurrency(); + /// number of physical CPU cores + UInt16 physical_cores = std::thread::hardware_concurrency() / 2; + /// number of L1 cache size + /// units: Byte + UInt32 l1_cache_size = 16384; // 16KB (typical value) + /// number of L2 cache size + /// units: Byte + UInt32 l2_cache_size = 65536; // 64KB (typical value) + /// number of L3 cache size + /// units: Byte + UInt32 l3_cache_size = 2097152; // 2MB (typical value) + /// number of L1 cache line size + UInt8 l1_cache_line_size = 64; // 64B (typical value) + /// number of L2 cache line size + UInt8 l2_cache_line_size = 64; // 64B (typical value) + /// number of L3 cache line size + UInt8 l3_cache_line_size = 64; // 64B (typical value) + /// CPU architecture + String arch; + /// CPU frequency + String frequency; + }; + + struct Disk + { + String name; + enum DiskType + { + UNKNOWN = 0, + HDD = 1, + SSD = 2 + }; + DiskType disk_type; + UInt64 total_space = 0; + UInt64 free_space = 0; + String mount_point; + String fs_type; + }; + using DiskInfo = std::vector; + + struct MemoryInfo + { + /// total memory size + /// units: Byte + UInt64 capacity = getMemoryAmount(); + }; + + ServerInfo() = default; + ~ServerInfo() = default; + void parseCPUInfo(const diagnosticspb::ServerInfoItem & cpu_info_item); + void parseDiskInfo(const diagnosticspb::ServerInfoItem & disk_info_item); + void parseMemoryInfo(const diagnosticspb::ServerInfoItem & memory_info_item); + void parseSysInfo(const diagnosticspb::ServerInfoResponse & sys_info_response); + String debugString() const; + + CPUInfo cpu_info; + DiskInfo disk_infos; + MemoryInfo memory_info; +}; +} // namespace DB