diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc index 87f837f623a1c..d47dd21251055 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc @@ -1365,192 +1365,19 @@ Status QnnBackendManager::CreateHtpPowerCfgId(uint32_t device_id, uint32_t core_ return Status::OK(); } -Status QnnBackendManager::SetHtpPowerConfig(uint32_t htp_power_config_client_id, - HtpPerformanceMode htp_performance_mode) { +Status QnnBackendManager::SetHtpPowerConfigs(uint32_t htp_power_config_client_id, + HtpPerformanceMode htp_performance_mode, + uint32_t rpc_polling_time, + uint32_t rpc_control_latency) { // This function is called in QNN EP's OnRunStart() even if QNN backend setup failed and the model is assigned // to a different EP. Therefore, we have to check that backend setup actually completed before trying to // set an HTP power config ID. Otherwise, this causes a segfault because the QNN backend lib is unloaded. ORT_RETURN_IF_NOT(backend_setup_completed_, "Cannot set HTP power config ID if backend setup is not complete."); - QnnDevice_Infrastructure_t qnn_device_infra = nullptr; - auto status = qnn_interface_.deviceGetInfrastructure(&qnn_device_infra); - ORT_RETURN_IF(QNN_SUCCESS != status, "backendGetPerfInfrastructure failed."); - - auto* htp_infra = static_cast(qnn_device_infra); - ORT_RETURN_IF(QNN_HTP_DEVICE_INFRASTRUCTURE_TYPE_PERF != htp_infra->infraType, - "HTP infra type = ", htp_infra->infraType, ", which is not perf infra type."); - QnnHtpDevice_PerfInfrastructure_t& htp_perf_infra = htp_infra->perfInfra; - constexpr const int kNumConfigs = 1; - std::vector power_configs( - kNumConfigs); - QnnHtpPerfInfrastructure_PowerConfig_t& dcvs_config = power_configs[0]; - dcvs_config.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_DCVS_V3; - QnnHtpPerfInfrastructure_DcvsV3_t& dcvs_v3 = dcvs_config.dcvsV3Config; - dcvs_v3.contextId = htp_power_config_client_id; - dcvs_v3.setSleepDisable = 0; - dcvs_v3.sleepDisable = 0; - dcvs_v3.setDcvsEnable = 1; - dcvs_v3.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_PERFORMANCE_MODE; - // choose performance mode - switch (htp_performance_mode) { - case HtpPerformanceMode::kHtpBurst: - case HtpPerformanceMode::kHtpSustainedHighPerformance: - dcvs_v3.setSleepLatency = 1; // true - dcvs_v3.sleepLatency = kSleepMinLatency; - dcvs_v3.dcvsEnable = kDcvsDisable; - dcvs_v3.setBusParams = 1; - dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; - dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; - dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; - dcvs_v3.setCoreParams = 1; - dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; - dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; - dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; - break; - case HtpPerformanceMode::kHtpHighPerformance: - dcvs_v3.setSleepLatency = 1; // true - dcvs_v3.sleepLatency = kSleepLowLatency; - dcvs_v3.dcvsEnable = kDcvsDisable; - dcvs_v3.setBusParams = 1; - dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_TURBO; - dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_TURBO; - dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_TURBO; - dcvs_v3.setCoreParams = 1; - dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_TURBO; - dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_TURBO; - dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_TURBO; - break; - case HtpPerformanceMode::kHtpBalanced: - dcvs_v3.setSleepLatency = 1; // true - dcvs_v3.sleepLatency = kSleepMediumLatency; - dcvs_v3.dcvsEnable = kDcvsEnable; - dcvs_v3.setBusParams = 1; - dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_NOM_PLUS; - dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_NOM_PLUS; - dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_NOM_PLUS; - dcvs_v3.setCoreParams = 1; - dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_NOM_PLUS; - dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_NOM_PLUS; - dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_NOM_PLUS; - break; - case HtpPerformanceMode::kHtpLowBalanced: - dcvs_v3.setSleepLatency = 1; // true - dcvs_v3.sleepLatency = kSleepMediumLatency; - dcvs_v3.dcvsEnable = kDcvsEnable; - dcvs_v3.setBusParams = 1; - dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_NOM; - dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_NOM; - dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_NOM; - dcvs_v3.setCoreParams = 1; - dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_NOM; - dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_NOM; - dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_NOM; - break; - case HtpPerformanceMode::kHtpHighPowerSaver: - dcvs_v3.setSleepLatency = 1; // true - dcvs_v3.sleepLatency = kSleepMediumLatency; - dcvs_v3.dcvsEnable = kDcvsEnable; - dcvs_v3.setBusParams = 1; - dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS_PLUS; - dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS_PLUS; - dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS_PLUS; - dcvs_v3.setCoreParams = 1; - dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS_PLUS; - dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS_PLUS; - dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS_PLUS; - break; - case HtpPerformanceMode::kHtpPowerSaver: - dcvs_v3.setSleepLatency = 1; // true - dcvs_v3.sleepLatency = kSleepMediumLatency; - dcvs_v3.dcvsEnable = kDcvsEnable; - dcvs_v3.setBusParams = 1; - dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS; - dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS; - dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS; - dcvs_v3.setCoreParams = 1; - dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS; - dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS; - dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS; - break; - case HtpPerformanceMode::kHtpLowPowerSaver: - dcvs_v3.setSleepLatency = 1; // true - dcvs_v3.sleepLatency = kSleepMediumLatency; - dcvs_v3.dcvsEnable = kDcvsEnable; - dcvs_v3.setBusParams = 1; - dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS2; - dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS2; - dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS2; - dcvs_v3.setCoreParams = 1; - dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS2; - dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS2; - dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS2; - break; - case HtpPerformanceMode::kHtpExtremePowerSaver: - dcvs_v3.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_POWER_SAVER_MODE; - dcvs_v3.setSleepLatency = 1; // true - dcvs_v3.sleepLatency = kSleepMediumLatency; - dcvs_v3.dcvsEnable = kDcvsEnable; - dcvs_v3.setBusParams = 1; - dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_CORNER_DISABLE; - dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_CORNER_DISABLE; - dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_CORNER_DISABLE; - dcvs_v3.setCoreParams = 1; - dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_CORNER_DISABLE; - dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_CORNER_DISABLE; - dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_CORNER_DISABLE; - break; - default: - ORT_THROW("Invalid performance profile %d", static_cast(htp_performance_mode)); - break; - } - std::vector perf_power_configs_ptr = ObtainNullTermPtrVector(power_configs); - status = htp_perf_infra.setPowerConfig(htp_power_config_client_id, perf_power_configs_ptr.data()); - ORT_RETURN_IF(QNN_SUCCESS != status, "setPowerConfig failed for HTP performance mode."); - - return Status::OK(); -} - -Status QnnBackendManager::SetRpcPowerConfigs(uint32_t htp_power_config_client_id, - uint32_t rpc_control_latency, - uint32_t rpc_polling_time) { - // This function is called in QNN EP's OnRunStart() even if QNN backend setup failed and the model is assigned - // to a different EP. Therefore, we have to check that backend setup actually completed before trying to - // set RPC control latency. Otherwise, this causes a segfault because the QNN backend library is unloaded. - ORT_RETURN_IF_NOT(backend_setup_completed_, "Cannot set HTP RPC control latency if backend setup is not complete."); - - constexpr int kNumRpcPollingPowerConfigs = 2; - std::vector rpc_power_configs; - rpc_power_configs.reserve(kNumRpcPollingPowerConfigs); - - // Set rpc control latency here - if (rpc_control_latency != 0) { - auto& rpc_control_latency_cfg = rpc_power_configs.emplace_back(); - rpc_control_latency_cfg.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_RPC_CONTROL_LATENCY; - rpc_control_latency_cfg.rpcControlLatencyConfig = rpc_control_latency; - } - - // Note: v68 does not support rpc polling mode - if (rpc_polling_time != 0) { - auto& rpc_polling_time_cfg = rpc_power_configs.emplace_back(); - rpc_polling_time_cfg.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_RPC_POLLING_TIME; - rpc_polling_time_cfg.rpcPollingTimeConfig = rpc_polling_time; - } - - if (rpc_power_configs.size() > 0) { - QnnDevice_Infrastructure_t qnn_device_infra = nullptr; - auto status = qnn_interface_.deviceGetInfrastructure(&qnn_device_infra); - ORT_RETURN_IF(QNN_SUCCESS != status, "backendGetPerfInfrastructure failed."); - - auto* htp_infra = static_cast(qnn_device_infra); - ORT_RETURN_IF(QNN_HTP_DEVICE_INFRASTRUCTURE_TYPE_PERF != htp_infra->infraType, - "HTP infra type = ", htp_infra->infraType, ", which is not perf infra type."); - QnnHtpDevice_PerfInfrastructure_t& htp_perf_infra = htp_infra->perfInfra; - - std::vector perf_power_configs_ptr = - ObtainNullTermPtrVector(rpc_power_configs); - status = htp_perf_infra.setPowerConfig(htp_power_config_client_id, perf_power_configs_ptr.data()); - ORT_RETURN_IF(QNN_SUCCESS != status, "setPowerConfig failed for RPC control latency."); - } + ORT_RETURN_IF_ERROR(htp_power_config_manager_.AddRpcPollingTime(rpc_polling_time)); + ORT_RETURN_IF_ERROR(htp_power_config_manager_.AddRpcControlLatency(rpc_control_latency)); + ORT_RETURN_IF_ERROR(htp_power_config_manager_.AddHtpPerformanceMode(htp_performance_mode, htp_power_config_client_id)); + ORT_RETURN_IF_ERROR(htp_power_config_manager_.SetPowerConfig(htp_power_config_client_id, GetQnnInterface())); return Status::OK(); } @@ -1564,18 +1391,24 @@ Status QnnBackendManager::SetPerThreadHtpPowerConfigs(const std::thread::id& thr auto htp_power_config_id = htp_power_configs.power_config_id; if (pre_run) { if (htp_power_configs.pre_run_perf_mode.has_value()) { - ORT_RETURN_IF_ERROR(SetHtpPowerConfig(htp_power_config_id, *htp_power_configs.pre_run_perf_mode)); + ORT_RETURN_IF_ERROR(htp_power_config_manager_.AddHtpPerformanceMode(*htp_power_configs.pre_run_perf_mode, + htp_power_config_id)); + } + + if (htp_power_configs.rpc_control_latency.has_value()) { + ORT_RETURN_IF_ERROR(htp_power_config_manager_.AddRpcControlLatency(*htp_power_configs.rpc_control_latency)); } - if (htp_power_configs.rpc_configs.has_value()) { - ORT_RETURN_IF_ERROR(SetRpcPowerConfigs(htp_power_config_id, - htp_power_configs.rpc_configs->rpc_control_latency, - htp_power_configs.rpc_configs->rpc_polling_time)); + if (htp_power_configs.rpc_polling_time.has_value()) { + ORT_RETURN_IF_ERROR(htp_power_config_manager_.AddRpcPollingTime(*htp_power_configs.rpc_polling_time)); } } else if (htp_power_configs.post_run_perf_mode.has_value()) { - ORT_RETURN_IF_ERROR(SetHtpPowerConfig(htp_power_config_id, *htp_power_configs.post_run_perf_mode)); + ORT_RETURN_IF_ERROR(htp_power_config_manager_.AddHtpPerformanceMode(*htp_power_configs.post_run_perf_mode, + htp_power_config_id)); } + ORT_RETURN_IF_ERROR(htp_power_config_manager_.SetPowerConfig(htp_power_config_id, GetQnnInterface())); + return Status::OK(); } diff --git a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h index 9dd16694875a7..f1c6c19bb1311 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h @@ -28,6 +28,7 @@ #include "core/providers/qnn/builder/op_builder_factory.h" #include "core/providers/qnn/builder/qnn_context_mem_handle_manager.h" #include "core/providers/qnn/builder/qnn_def.h" +#include "core/providers/qnn/builder/qnn_htp_power_config_manager.h" #include "core/providers/qnn/builder/qnn_profile_serializer.h" #include "core/providers/qnn/builder/qnn_node_group/qnn_node_group.h" @@ -166,12 +167,10 @@ class QnnBackendManager : public std::enable_shared_from_this Status CreateHtpPowerCfgId(uint32_t deviceId, uint32_t coreId, uint32_t& htp_power_config_id); - Status SetHtpPowerConfig(uint32_t htp_power_config_client_id, - HtpPerformanceMode htp_performance_mode); - - Status SetRpcPowerConfigs(uint32_t htp_power_config_client_id, - uint32_t rpc_control_latency, - uint32_t rpc_polling_time); + Status SetHtpPowerConfigs(uint32_t htp_power_config_client_id, + HtpPerformanceMode htp_performance_mode, + uint32_t rpc_polling_time, + uint32_t rpc_control_latency); Status SetPerThreadHtpPowerConfigs(const std::thread::id& thread_id, bool pre_run); @@ -310,16 +309,6 @@ class QnnBackendManager : public std::enable_shared_from_this bool IsDevicePropertySupported(); - template - std::vector>> ObtainNullTermPtrVector(const std::vector& vec) { - std::vector>> ret; - for (auto& elem : vec) { - ret.push_back(&elem); - } - ret.push_back(nullptr); - return ret; - } - std::string GetBackendBuildId() { char* backend_build_id{nullptr}; if (QNN_SUCCESS != qnn_interface_.backendGetBuildId((const char**)&backend_build_id)) { @@ -432,6 +421,7 @@ class QnnBackendManager : public std::enable_shared_from_this QnnBackend_Config_t** backend_config_ = nullptr; Qnn_LogHandle_t log_handle_ = nullptr; Qnn_DeviceHandle_t device_handle_ = nullptr; + power::HtpPowerConfigManager htp_power_config_manager_; // Map of Qnn_ContextHandle_t to QnnContextHandleRecord. // The QnnContextHandleRecord has ownership of the Qnn_ContextHandle_t. diff --git a/onnxruntime/core/providers/qnn/builder/qnn_def.h b/onnxruntime/core/providers/qnn/builder/qnn_def.h index 86a991516dc08..625166f62d166 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_def.h +++ b/onnxruntime/core/providers/qnn/builder/qnn_def.h @@ -71,15 +71,11 @@ enum class HtpPerformanceMode : uint8_t { kHtpExtremePowerSaver, }; -typedef struct RpcPowerConfigs { - uint32_t rpc_control_latency = 0; - uint32_t rpc_polling_time = 0; -} RpcPowerConfigs_t; - typedef struct PerThreadHtpPowerConfigs { std::optional pre_run_perf_mode; std::optional post_run_perf_mode; - std::optional rpc_configs; + std::optional rpc_control_latency; + std::optional rpc_polling_time; uint32_t power_config_id = 0; } PerThreadHtpPowerConfigs_t; @@ -126,6 +122,9 @@ constexpr const int kSleepMediumLatency = 1000; constexpr const int kSleepHighLatency = 2000; constexpr const int kDcvsDisable = 0; constexpr const int kDcvsEnable = 1; +constexpr const uint32_t kDisableRpcPolling = 0; +constexpr const uint32_t kDisableRpcControlLatency = 0; +constexpr const uint32_t kMaxRpcPolling = 9999; struct OnnxTensorInfo { ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(OnnxTensorInfo); diff --git a/onnxruntime/core/providers/qnn/builder/qnn_htp_power_config_manager.cc b/onnxruntime/core/providers/qnn/builder/qnn_htp_power_config_manager.cc new file mode 100644 index 0000000000000..e8c4dcd13f8a4 --- /dev/null +++ b/onnxruntime/core/providers/qnn/builder/qnn_htp_power_config_manager.cc @@ -0,0 +1,276 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License + +#include "core/providers/qnn/ort_api.h" +#include "core/providers/qnn/builder/qnn_def.h" +#include "core/providers/qnn/builder/qnn_htp_power_config_manager.h" + +#include + +#include + +namespace onnxruntime { +namespace qnn { +namespace power { + +HtpPowerConfigManager::HtpPowerConfigManager() { + constexpr int kMaxNumConfigs = 3; + power_configs_.reserve(kMaxNumConfigs); +} + +HtpPowerConfigManager::~HtpPowerConfigManager() {} + +Status HtpPowerConfigManager::AddRpcPollingTime(uint32_t rpc_polling_time) { + ORT_RETURN_IF(rpc_polling_time > kMaxRpcPolling, "Cannot set RPC polling time to ", + std::to_string(rpc_polling_time), + ". Max allowable RPC polling time is: ", + std::to_string(kMaxRpcPolling)); + + ORT_RETURN_IF(rpc_polling_time_set_, "There is already a pending RPC polling time config"); + + if (rpc_polling_time == last_set_rpc_polling_time_) { + LOGS_DEFAULT(VERBOSE) << "Requested rpc polling time is the same as last set (" + << last_set_rpc_polling_time_ + << "). Ignoring request"; + } else { + LOGS_DEFAULT(VERBOSE) << "Updating rpc polling time to: " << rpc_polling_time << "us."; + auto& rpc_polling_time_cfg = power_configs_.emplace_back(); + rpc_polling_time_cfg.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_RPC_POLLING_TIME; + rpc_polling_time_cfg.rpcPollingTimeConfig = rpc_polling_time; + + last_set_rpc_polling_time_ = rpc_polling_time; + rpc_polling_time_set_ = true; + } + return Status::OK(); +} + +Status HtpPowerConfigManager::AddRpcControlLatency(uint32_t rpc_control_latency) { + ORT_RETURN_IF(rpc_control_latency_set_, "There is already a pending RPC control latency config"); + if (rpc_control_latency == last_set_rpc_control_latency_) { + LOGS_DEFAULT(VERBOSE) << "Requested rpc control latency is the same as last set (" + << last_set_rpc_control_latency_ + << "). Ignoring request"; + } else { + LOGS_DEFAULT(VERBOSE) << "Updating rpc control latency to: " << rpc_control_latency << "us."; + auto& rpc_control_latency_cfg = power_configs_.emplace_back(); + rpc_control_latency_cfg.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_RPC_CONTROL_LATENCY; + rpc_control_latency_cfg.rpcControlLatencyConfig = rpc_control_latency; + + last_set_rpc_control_latency_ = rpc_control_latency; + rpc_control_latency_set_ = true; + } + + return Status::OK(); +} + +static std::string_view PerformanceModeToString(HtpPerformanceMode htp_performance_mode) { + constexpr std::array, 10> perf_string_map = {{{HtpPerformanceMode::kHtpDefault, "default"}, + {HtpPerformanceMode::kHtpSustainedHighPerformance, "sustained_high_performance"}, + {HtpPerformanceMode::kHtpBurst, "burst"}, + {HtpPerformanceMode::kHtpHighPerformance, "high_performance"}, + {HtpPerformanceMode::kHtpPowerSaver, "power_saver"}, + {HtpPerformanceMode::kHtpLowPowerSaver, "low_power_saver"}, + {HtpPerformanceMode::kHtpHighPowerSaver, "high_power_saver"}, + {HtpPerformanceMode::kHtpLowBalanced, "low_balanced"}, + {HtpPerformanceMode::kHtpBalanced, "balanced"}, + {HtpPerformanceMode::kHtpExtremePowerSaver, "extreme_power_saver"}}}; + + auto it = std::find_if(perf_string_map.begin(), perf_string_map.end(), + [htp_performance_mode](const auto& mapping) { + return mapping.first == htp_performance_mode; + }); + + if (it != perf_string_map.end()) { + return it->second; + } + + return "UNKNOWN"; +} + +Status HtpPowerConfigManager::AddHtpPerformanceMode(HtpPerformanceMode htp_performance_mode, + uint32_t htp_power_config_client_id) { + ORT_RETURN_IF(htp_performance_mode_set_, "There is already a pending HTP performance mode config"); + if (htp_performance_mode == last_set_htp_performance_mode_) { + LOGS_DEFAULT(VERBOSE) << "Requested htp performance mode is the same as last set (" + << PerformanceModeToString(last_set_htp_performance_mode_) + << "). Ignoring request"; + } else { + LOGS_DEFAULT(VERBOSE) << "Updating htp performance mode to: " + << PerformanceModeToString(htp_performance_mode) << "."; + + QnnHtpPerfInfrastructure_PowerConfig_t htp_performance_cfg{}; + ORT_RETURN_IF_ERROR(SetHtpPerformancePowerConfig(htp_performance_cfg, + htp_power_config_client_id, + htp_performance_mode)); + + power_configs_.emplace_back(std::move(htp_performance_cfg)); + + last_set_htp_performance_mode_ = htp_performance_mode; + htp_performance_mode_set_ = true; + } + + return Status::OK(); +} + +Status HtpPowerConfigManager::SetPowerConfig(uint32_t htp_power_config_client_id, + const QNN_INTERFACE_VER_TYPE& qnn_interface) { + if (!power_configs_.empty()) { + QnnDevice_Infrastructure_t qnn_device_infra = nullptr; + auto status = qnn_interface.deviceGetInfrastructure(&qnn_device_infra); + ORT_RETURN_IF(QNN_SUCCESS != status, "backendGetPerfInfrastructure failed."); + + auto* htp_infra = static_cast(qnn_device_infra); + ORT_RETURN_IF(QNN_HTP_DEVICE_INFRASTRUCTURE_TYPE_PERF != htp_infra->infraType, + "HTP infra type = ", htp_infra->infraType, ", which is not perf infra type."); + QnnHtpDevice_PerfInfrastructure_t& htp_perf_infra = htp_infra->perfInfra; + + std::vector perf_power_configs_ptr; + + for (const auto& power_config : power_configs_) { + perf_power_configs_ptr.push_back(&power_config); + } + perf_power_configs_ptr.push_back(nullptr); + + status = htp_perf_infra.setPowerConfig(htp_power_config_client_id, perf_power_configs_ptr.data()); + ORT_RETURN_IF(QNN_SUCCESS != status, "SetPowerConfig failed."); + + rpc_polling_time_set_ = false; + rpc_control_latency_set_ = false; + htp_performance_mode_set_ = false; + power_configs_.clear(); + } else { + LOGS_DEFAULT(VERBOSE) << "SetPowerConfig called but no configs to be set."; + } + + return Status::OK(); +} + +Status HtpPowerConfigManager::SetHtpPerformancePowerConfig(QnnHtpPerfInfrastructure_PowerConfig_t& power_config, + uint32_t htp_power_config_client_id, + const HtpPerformanceMode& htp_performance_mode) { + power_config.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_DCVS_V3; + QnnHtpPerfInfrastructure_DcvsV3_t& dcvs_v3 = power_config.dcvsV3Config; + dcvs_v3.contextId = htp_power_config_client_id; + dcvs_v3.setSleepDisable = 0; + dcvs_v3.sleepDisable = 0; + dcvs_v3.setDcvsEnable = 1; + dcvs_v3.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_PERFORMANCE_MODE; + // choose performance mode + switch (htp_performance_mode) { + case HtpPerformanceMode::kHtpBurst: + case HtpPerformanceMode::kHtpSustainedHighPerformance: + dcvs_v3.setSleepLatency = 1; // true + dcvs_v3.sleepLatency = kSleepMinLatency; + dcvs_v3.dcvsEnable = kDcvsDisable; + dcvs_v3.setBusParams = 1; + dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; + dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; + dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; + dcvs_v3.setCoreParams = 1; + dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; + dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; + dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER; + break; + case HtpPerformanceMode::kHtpHighPerformance: + dcvs_v3.setSleepLatency = 1; // true + dcvs_v3.sleepLatency = kSleepLowLatency; + dcvs_v3.dcvsEnable = kDcvsDisable; + dcvs_v3.setBusParams = 1; + dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_TURBO; + dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_TURBO; + dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_TURBO; + dcvs_v3.setCoreParams = 1; + dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_TURBO; + dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_TURBO; + dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_TURBO; + break; + case HtpPerformanceMode::kHtpBalanced: + dcvs_v3.setSleepLatency = 1; // true + dcvs_v3.sleepLatency = kSleepMediumLatency; + dcvs_v3.dcvsEnable = kDcvsEnable; + dcvs_v3.setBusParams = 1; + dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_NOM_PLUS; + dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_NOM_PLUS; + dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_NOM_PLUS; + dcvs_v3.setCoreParams = 1; + dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_NOM_PLUS; + dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_NOM_PLUS; + dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_NOM_PLUS; + break; + case HtpPerformanceMode::kHtpLowBalanced: + dcvs_v3.setSleepLatency = 1; // true + dcvs_v3.sleepLatency = kSleepMediumLatency; + dcvs_v3.dcvsEnable = kDcvsEnable; + dcvs_v3.setBusParams = 1; + dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_NOM; + dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_NOM; + dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_NOM; + dcvs_v3.setCoreParams = 1; + dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_NOM; + dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_NOM; + dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_NOM; + break; + case HtpPerformanceMode::kHtpHighPowerSaver: + dcvs_v3.setSleepLatency = 1; // true + dcvs_v3.sleepLatency = kSleepMediumLatency; + dcvs_v3.dcvsEnable = kDcvsEnable; + dcvs_v3.setBusParams = 1; + dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS_PLUS; + dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS_PLUS; + dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS_PLUS; + dcvs_v3.setCoreParams = 1; + dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS_PLUS; + dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS_PLUS; + dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS_PLUS; + break; + case HtpPerformanceMode::kHtpPowerSaver: + dcvs_v3.setSleepLatency = 1; // true + dcvs_v3.sleepLatency = kSleepMediumLatency; + dcvs_v3.dcvsEnable = kDcvsEnable; + dcvs_v3.setBusParams = 1; + dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS; + dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS; + dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS; + dcvs_v3.setCoreParams = 1; + dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS; + dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS; + dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS; + break; + case HtpPerformanceMode::kHtpLowPowerSaver: + dcvs_v3.setSleepLatency = 1; // true + dcvs_v3.sleepLatency = kSleepMediumLatency; + dcvs_v3.dcvsEnable = kDcvsEnable; + dcvs_v3.setBusParams = 1; + dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS2; + dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS2; + dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS2; + dcvs_v3.setCoreParams = 1; + dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS2; + dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS2; + dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS2; + break; + case HtpPerformanceMode::kHtpExtremePowerSaver: + dcvs_v3.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_POWER_SAVER_MODE; + dcvs_v3.setSleepLatency = 1; // true + dcvs_v3.sleepLatency = kSleepMediumLatency; + dcvs_v3.dcvsEnable = kDcvsEnable; + dcvs_v3.setBusParams = 1; + dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_CORNER_DISABLE; + dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_CORNER_DISABLE; + dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_CORNER_DISABLE; + dcvs_v3.setCoreParams = 1; + dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_CORNER_DISABLE; + dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_CORNER_DISABLE; + dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_CORNER_DISABLE; + break; + default: + ORT_THROW("Invalid performance profile %d", static_cast(htp_performance_mode)); + break; + } + + return Status::OK(); +} + +} // namespace power +} // namespace qnn +} // namespace onnxruntime \ No newline at end of file diff --git a/onnxruntime/core/providers/qnn/builder/qnn_htp_power_config_manager.h b/onnxruntime/core/providers/qnn/builder/qnn_htp_power_config_manager.h new file mode 100644 index 0000000000000..4bbfc6ec45c09 --- /dev/null +++ b/onnxruntime/core/providers/qnn/builder/qnn_htp_power_config_manager.h @@ -0,0 +1,66 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License + +#pragma once + +#include "core/providers/qnn/ort_api.h" +#include "core/providers/qnn/builder/qnn_def.h" + +#include + +#include +#include +#include + +namespace onnxruntime { +namespace qnn { +namespace power { + +// Manages staging of any new power configurations and +// updates power configurations for the HTP backend +class HtpPowerConfigManager { + public: + HtpPowerConfigManager(); + ~HtpPowerConfigManager(); + + // Stages a new rpc polling time for next power config update + // If the value is the same as the last previously set, then + // there will be no new rpc polling time staged + Status AddRpcPollingTime(uint32_t rpc_polling_time); + + // Stages a new rpc control latency for next power config update + // If the value is the same as the last previously set, then + // there will be no new rpc control latency staged + Status AddRpcControlLatency(uint32_t rpc_control_latency); + + // Stages a new performance mode for next power config update + // If the value is the same as the last previously set, then + // there will be no new performance mode staged + Status AddHtpPerformanceMode(HtpPerformanceMode htp_performance_mode, + uint32_t htp_power_config_client_id); + + // Takes all configs staged for update and attempts to update + // the HTP power configurations. If there is nothing staged, + // then no attempt will be made. + Status SetPowerConfig(uint32_t htp_power_config_client_id, + const QNN_INTERFACE_VER_TYPE& qnn_interface); + + private: + // Sets voltage corner votes for HTP based on the given performance mode + Status SetHtpPerformancePowerConfig(QnnHtpPerfInfrastructure_PowerConfig_t& power_config, + uint32_t htp_power_config_client_id, + const HtpPerformanceMode& htp_performance_mode); + + uint32_t last_set_rpc_polling_time_ = kDisableRpcPolling; + uint32_t last_set_rpc_control_latency_ = kDisableRpcControlLatency; + HtpPerformanceMode last_set_htp_performance_mode_ = HtpPerformanceMode::kHtpDefault; + + bool rpc_polling_time_set_ = false; + bool rpc_control_latency_set_ = false; + bool htp_performance_mode_set_ = false; + + std::vector power_configs_; +}; +} // namespace power +} // namespace qnn +} // namespace onnxruntime \ No newline at end of file diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc index b63a6a1ebbca3..737216b81139c 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc @@ -425,6 +425,10 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio auto htp_performance_mode_pos = provider_options_map.find(HTP_PERFORMANCE_MODE); if (htp_performance_mode_pos != provider_options_map.end()) { ParseHtpPerformanceMode(htp_performance_mode_pos->second, default_htp_performance_mode_); + + if (qnn::HtpPerformanceMode::kHtpBurst == default_htp_performance_mode_) { + default_rpc_polling_time_ = 9999; + } } htp_graph_finalization_opt_mode_ = qnn::HtpGraphFinalizationOptimizationMode::kDefault; @@ -1374,12 +1378,12 @@ static bool TryGetConfigEntry(const ConfigOptions& config_options, const std::st return true; } -qnn::PerThreadHtpPowerConfigs_t QNNExecutionProvider::GetPerThreadHtpPowerConfigs(const ConfigOptions& config_options) { +bool QNNExecutionProvider::GetPerThreadHtpPowerConfigs(qnn::PerThreadHtpPowerConfigs_t& per_thread_htp_power_configs, + const ConfigOptions& config_options) { qnn::HtpPerformanceMode pre_run_htp_performance_mode = qnn::HtpPerformanceMode::kHtpDefault; qnn::HtpPerformanceMode post_run_htp_performance_mode = qnn::HtpPerformanceMode::kHtpDefault; - qnn::PerThreadHtpPowerConfigs_t per_thread_htp_power_configs; - + bool configs_set = false; std::string htp_perf_mode = ""; if (TryGetConfigEntry(config_options, kOrtRunOptionsConfigQnnPerfMode, htp_perf_mode)) { ParseHtpPerformanceMode(htp_perf_mode, pre_run_htp_performance_mode); @@ -1393,6 +1397,9 @@ qnn::PerThreadHtpPowerConfigs_t QNNExecutionProvider::GetPerThreadHtpPowerConfig uint32_t rpc_control_latency = 0; if (TryGetConfigEntry(config_options, kOrtRunOptionsConfigQnnRpcControlLatency, rpc_latency)) { rpc_control_latency = static_cast(std::stoul(rpc_latency)); + per_thread_htp_power_configs.rpc_control_latency = rpc_control_latency; + configs_set = true; + LOGS_DEFAULT(VERBOSE) << "rpc_control_latency: " << rpc_control_latency; } @@ -1403,18 +1410,17 @@ qnn::PerThreadHtpPowerConfigs_t QNNExecutionProvider::GetPerThreadHtpPowerConfig if (qnn::HtpPerformanceMode::kHtpDefault != pre_run_htp_performance_mode) { per_thread_htp_power_configs.pre_run_perf_mode = pre_run_htp_performance_mode; + // rpc polling time will only be updated with perf mode changes + per_thread_htp_power_configs.rpc_polling_time = rpc_polling_time; + configs_set = true; } if (qnn::HtpPerformanceMode::kHtpDefault != post_run_htp_performance_mode) { per_thread_htp_power_configs.post_run_perf_mode = post_run_htp_performance_mode; + configs_set = true; } - if (rpc_control_latency > 0 || rpc_polling_time > 0) { - per_thread_htp_power_configs.rpc_configs = {rpc_control_latency, - rpc_polling_time}; - } - - return per_thread_htp_power_configs; + return configs_set; } Status QNNExecutionProvider::OnRunStart(const onnxruntime::RunOptions& run_options) { @@ -1428,10 +1434,12 @@ Status QNNExecutionProvider::OnRunStart(const onnxruntime::RunOptions& run_optio uint32_t htp_power_config_id = 0; if (GetHtpPowerConfigId(htp_power_config_id)) { auto thread_id = std::this_thread::get_id(); - auto per_thread_htp_power_configs = GetPerThreadHtpPowerConfigs(config_options); - per_thread_htp_power_configs.power_config_id = htp_power_config_id; - ORT_RETURN_IF_ERROR(qnn_backend_manager_->AddPerThreadHtpPowerConfigMapping(thread_id, - per_thread_htp_power_configs)); + qnn::PerThreadHtpPowerConfigs_t per_thread_htp_power_configs; + if (GetPerThreadHtpPowerConfigs(per_thread_htp_power_configs, config_options)) { + per_thread_htp_power_configs.power_config_id = htp_power_config_id; + ORT_RETURN_IF_ERROR(qnn_backend_manager_->AddPerThreadHtpPowerConfigMapping(thread_id, + per_thread_htp_power_configs)); + } } std::string lora_config = ""; @@ -1520,10 +1528,17 @@ Status QNNExecutionProvider::SetEpDynamicOptions(gsl::span ke qnn::HtpPerformanceMode htp_performance_mode = qnn::HtpPerformanceMode::kHtpDefault; ParseHtpPerformanceMode(value, htp_performance_mode); + uint32_t rpc_polling_time = 0; + if (htp_performance_mode == qnn::HtpPerformanceMode::kHtpBurst) { + rpc_polling_time = 9999; + } + uint32_t htp_power_config_id = 0; if (GetHtpPowerConfigId(htp_power_config_id)) { - ORT_RETURN_IF_ERROR(qnn_backend_manager_->SetHtpPowerConfig(htp_power_config_id, - htp_performance_mode)); + ORT_RETURN_IF_ERROR(qnn_backend_manager_->SetHtpPowerConfigs(htp_power_config_id, + htp_performance_mode, + rpc_polling_time, + default_rpc_control_latency_)); } } else { LOGS_DEFAULT(ERROR) << "EP Dynamic Option \"" << key << "\" is not currently supported."; @@ -1558,18 +1573,19 @@ void QNNExecutionProvider::CreateHtpPowerConfigId() const { Status rt = qnn_backend_manager_->CreateHtpPowerCfgId(device_id_, core_id, htp_power_config_id); - if (rt == Status::OK()) { + if (rt.IsOK()) { htp_power_config_id_ = htp_power_config_id; - if (qnn::HtpPerformanceMode::kHtpDefault != default_htp_performance_mode_) { - ORT_IGNORE_RETURN_VALUE(qnn_backend_manager_->SetHtpPowerConfig(htp_power_config_id, - default_htp_performance_mode_)); - } - if (default_rpc_control_latency_ > 0 || default_rpc_polling_time_ > 0) { - ORT_IGNORE_RETURN_VALUE(qnn_backend_manager_->SetRpcPowerConfigs(htp_power_config_id, - default_rpc_control_latency_, - default_rpc_polling_time_)); + rt = qnn_backend_manager_->SetHtpPowerConfigs(htp_power_config_id, + default_htp_performance_mode_, + default_rpc_polling_time_, + default_rpc_control_latency_); + + if (!rt.IsOK()) { + LOGS_DEFAULT(ERROR) << "Unable to set HTP power configurations."; } + } else { + LOGS_DEFAULT(ERROR) << "Failed to create HTP power config id."; } } diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.h b/onnxruntime/core/providers/qnn/qnn_execution_provider.h index da43c9619f604..dd301d7915935 100644 --- a/onnxruntime/core/providers/qnn/qnn_execution_provider.h +++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.h @@ -83,7 +83,9 @@ class QNNExecutionProvider : public IExecutionProvider { bool IsHtpSharedMemoryAllocatorAvailable() const { return rpcmem_library_ != nullptr; } private: - qnn::PerThreadHtpPowerConfigs_t GetPerThreadHtpPowerConfigs(const ConfigOptions& config_options); + // Will return true if any power config options need to be updated + bool GetPerThreadHtpPowerConfigs(qnn::PerThreadHtpPowerConfigs_t& per_thread_htp_power_configs, + const ConfigOptions& config_options); void CreateHtpPowerConfigId() const; // Will return false if htp_power_config_id_ has no value