Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
207 changes: 20 additions & 187 deletions onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1365,192 +1365,19 @@ Status QnnBackendManager::CreateHtpPowerCfgId(uint32_t device_id, uint32_t core_
return Status::OK();
}

Status QnnBackendManager::SetHtpPowerConfig(uint32_t htp_power_config_client_id,
HtpPerformanceMode htp_performance_mode) {
Status QnnBackendManager::SetHtpPowerConfigs(uint32_t htp_power_config_client_id,
HtpPerformanceMode htp_performance_mode,
uint32_t rpc_polling_time,
uint32_t rpc_control_latency) {
// This function is called in QNN EP's OnRunStart() even if QNN backend setup failed and the model is assigned
// to a different EP. Therefore, we have to check that backend setup actually completed before trying to
// set an HTP power config ID. Otherwise, this causes a segfault because the QNN backend lib is unloaded.
ORT_RETURN_IF_NOT(backend_setup_completed_, "Cannot set HTP power config ID if backend setup is not complete.");
QnnDevice_Infrastructure_t qnn_device_infra = nullptr;
auto status = qnn_interface_.deviceGetInfrastructure(&qnn_device_infra);
ORT_RETURN_IF(QNN_SUCCESS != status, "backendGetPerfInfrastructure failed.");

auto* htp_infra = static_cast<QnnHtpDevice_Infrastructure_t*>(qnn_device_infra);
ORT_RETURN_IF(QNN_HTP_DEVICE_INFRASTRUCTURE_TYPE_PERF != htp_infra->infraType,
"HTP infra type = ", htp_infra->infraType, ", which is not perf infra type.");
QnnHtpDevice_PerfInfrastructure_t& htp_perf_infra = htp_infra->perfInfra;

constexpr const int kNumConfigs = 1;
std::vector<QnnHtpPerfInfrastructure_PowerConfig_t> power_configs(
kNumConfigs);
QnnHtpPerfInfrastructure_PowerConfig_t& dcvs_config = power_configs[0];
dcvs_config.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_DCVS_V3;
QnnHtpPerfInfrastructure_DcvsV3_t& dcvs_v3 = dcvs_config.dcvsV3Config;
dcvs_v3.contextId = htp_power_config_client_id;
dcvs_v3.setSleepDisable = 0;
dcvs_v3.sleepDisable = 0;
dcvs_v3.setDcvsEnable = 1;
dcvs_v3.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_PERFORMANCE_MODE;
// choose performance mode
switch (htp_performance_mode) {
case HtpPerformanceMode::kHtpBurst:
case HtpPerformanceMode::kHtpSustainedHighPerformance:
dcvs_v3.setSleepLatency = 1; // true
dcvs_v3.sleepLatency = kSleepMinLatency;
dcvs_v3.dcvsEnable = kDcvsDisable;
dcvs_v3.setBusParams = 1;
dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER;
dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER;
dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER;
dcvs_v3.setCoreParams = 1;
dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER;
dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER;
dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_MAX_VOLTAGE_CORNER;
break;
case HtpPerformanceMode::kHtpHighPerformance:
dcvs_v3.setSleepLatency = 1; // true
dcvs_v3.sleepLatency = kSleepLowLatency;
dcvs_v3.dcvsEnable = kDcvsDisable;
dcvs_v3.setBusParams = 1;
dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_TURBO;
dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_TURBO;
dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_TURBO;
dcvs_v3.setCoreParams = 1;
dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_TURBO;
dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_TURBO;
dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_TURBO;
break;
case HtpPerformanceMode::kHtpBalanced:
dcvs_v3.setSleepLatency = 1; // true
dcvs_v3.sleepLatency = kSleepMediumLatency;
dcvs_v3.dcvsEnable = kDcvsEnable;
dcvs_v3.setBusParams = 1;
dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_NOM_PLUS;
dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_NOM_PLUS;
dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_NOM_PLUS;
dcvs_v3.setCoreParams = 1;
dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_NOM_PLUS;
dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_NOM_PLUS;
dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_NOM_PLUS;
break;
case HtpPerformanceMode::kHtpLowBalanced:
dcvs_v3.setSleepLatency = 1; // true
dcvs_v3.sleepLatency = kSleepMediumLatency;
dcvs_v3.dcvsEnable = kDcvsEnable;
dcvs_v3.setBusParams = 1;
dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_NOM;
dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_NOM;
dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_NOM;
dcvs_v3.setCoreParams = 1;
dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_NOM;
dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_NOM;
dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_NOM;
break;
case HtpPerformanceMode::kHtpHighPowerSaver:
dcvs_v3.setSleepLatency = 1; // true
dcvs_v3.sleepLatency = kSleepMediumLatency;
dcvs_v3.dcvsEnable = kDcvsEnable;
dcvs_v3.setBusParams = 1;
dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS_PLUS;
dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS_PLUS;
dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS_PLUS;
dcvs_v3.setCoreParams = 1;
dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS_PLUS;
dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS_PLUS;
dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS_PLUS;
break;
case HtpPerformanceMode::kHtpPowerSaver:
dcvs_v3.setSleepLatency = 1; // true
dcvs_v3.sleepLatency = kSleepMediumLatency;
dcvs_v3.dcvsEnable = kDcvsEnable;
dcvs_v3.setBusParams = 1;
dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS;
dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS;
dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS;
dcvs_v3.setCoreParams = 1;
dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS;
dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS;
dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS;
break;
case HtpPerformanceMode::kHtpLowPowerSaver:
dcvs_v3.setSleepLatency = 1; // true
dcvs_v3.sleepLatency = kSleepMediumLatency;
dcvs_v3.dcvsEnable = kDcvsEnable;
dcvs_v3.setBusParams = 1;
dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS2;
dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS2;
dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS2;
dcvs_v3.setCoreParams = 1;
dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_VCORNER_SVS2;
dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_VCORNER_SVS2;
dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_VCORNER_SVS2;
break;
case HtpPerformanceMode::kHtpExtremePowerSaver:
dcvs_v3.powerMode = QNN_HTP_PERF_INFRASTRUCTURE_POWERMODE_POWER_SAVER_MODE;
dcvs_v3.setSleepLatency = 1; // true
dcvs_v3.sleepLatency = kSleepMediumLatency;
dcvs_v3.dcvsEnable = kDcvsEnable;
dcvs_v3.setBusParams = 1;
dcvs_v3.busVoltageCornerMin = DCVS_VOLTAGE_CORNER_DISABLE;
dcvs_v3.busVoltageCornerTarget = DCVS_VOLTAGE_CORNER_DISABLE;
dcvs_v3.busVoltageCornerMax = DCVS_VOLTAGE_CORNER_DISABLE;
dcvs_v3.setCoreParams = 1;
dcvs_v3.coreVoltageCornerMin = DCVS_VOLTAGE_CORNER_DISABLE;
dcvs_v3.coreVoltageCornerTarget = DCVS_VOLTAGE_CORNER_DISABLE;
dcvs_v3.coreVoltageCornerMax = DCVS_VOLTAGE_CORNER_DISABLE;
break;
default:
ORT_THROW("Invalid performance profile %d", static_cast<int>(htp_performance_mode));
break;
}
std::vector<const QnnHtpPerfInfrastructure_PowerConfig_t*> perf_power_configs_ptr = ObtainNullTermPtrVector(power_configs);
status = htp_perf_infra.setPowerConfig(htp_power_config_client_id, perf_power_configs_ptr.data());
ORT_RETURN_IF(QNN_SUCCESS != status, "setPowerConfig failed for HTP performance mode.");

return Status::OK();
}

Status QnnBackendManager::SetRpcPowerConfigs(uint32_t htp_power_config_client_id,
uint32_t rpc_control_latency,
uint32_t rpc_polling_time) {
// This function is called in QNN EP's OnRunStart() even if QNN backend setup failed and the model is assigned
// to a different EP. Therefore, we have to check that backend setup actually completed before trying to
// set RPC control latency. Otherwise, this causes a segfault because the QNN backend library is unloaded.
ORT_RETURN_IF_NOT(backend_setup_completed_, "Cannot set HTP RPC control latency if backend setup is not complete.");

constexpr int kNumRpcPollingPowerConfigs = 2;
std::vector<QnnHtpPerfInfrastructure_PowerConfig_t> rpc_power_configs;
rpc_power_configs.reserve(kNumRpcPollingPowerConfigs);

// Set rpc control latency here
if (rpc_control_latency != 0) {
auto& rpc_control_latency_cfg = rpc_power_configs.emplace_back();
rpc_control_latency_cfg.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_RPC_CONTROL_LATENCY;
rpc_control_latency_cfg.rpcControlLatencyConfig = rpc_control_latency;
}

// Note: v68 does not support rpc polling mode
if (rpc_polling_time != 0) {
auto& rpc_polling_time_cfg = rpc_power_configs.emplace_back();
rpc_polling_time_cfg.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_RPC_POLLING_TIME;
rpc_polling_time_cfg.rpcPollingTimeConfig = rpc_polling_time;
}

if (rpc_power_configs.size() > 0) {
QnnDevice_Infrastructure_t qnn_device_infra = nullptr;
auto status = qnn_interface_.deviceGetInfrastructure(&qnn_device_infra);
ORT_RETURN_IF(QNN_SUCCESS != status, "backendGetPerfInfrastructure failed.");

auto* htp_infra = static_cast<QnnHtpDevice_Infrastructure_t*>(qnn_device_infra);
ORT_RETURN_IF(QNN_HTP_DEVICE_INFRASTRUCTURE_TYPE_PERF != htp_infra->infraType,
"HTP infra type = ", htp_infra->infraType, ", which is not perf infra type.");
QnnHtpDevice_PerfInfrastructure_t& htp_perf_infra = htp_infra->perfInfra;

std::vector<const QnnHtpPerfInfrastructure_PowerConfig_t*> perf_power_configs_ptr =
ObtainNullTermPtrVector(rpc_power_configs);
status = htp_perf_infra.setPowerConfig(htp_power_config_client_id, perf_power_configs_ptr.data());
ORT_RETURN_IF(QNN_SUCCESS != status, "setPowerConfig failed for RPC control latency.");
}
ORT_RETURN_IF_ERROR(htp_power_config_manager_.AddRpcPollingTime(rpc_polling_time));
ORT_RETURN_IF_ERROR(htp_power_config_manager_.AddRpcControlLatency(rpc_control_latency));
ORT_RETURN_IF_ERROR(htp_power_config_manager_.AddHtpPerformanceMode(htp_performance_mode, htp_power_config_client_id));
ORT_RETURN_IF_ERROR(htp_power_config_manager_.SetPowerConfig(htp_power_config_client_id, GetQnnInterface()));

return Status::OK();
}
Expand All @@ -1564,18 +1391,24 @@ Status QnnBackendManager::SetPerThreadHtpPowerConfigs(const std::thread::id& thr
auto htp_power_config_id = htp_power_configs.power_config_id;
if (pre_run) {
if (htp_power_configs.pre_run_perf_mode.has_value()) {
ORT_RETURN_IF_ERROR(SetHtpPowerConfig(htp_power_config_id, *htp_power_configs.pre_run_perf_mode));
ORT_RETURN_IF_ERROR(htp_power_config_manager_.AddHtpPerformanceMode(*htp_power_configs.pre_run_perf_mode,
htp_power_config_id));
}

if (htp_power_configs.rpc_control_latency.has_value()) {
ORT_RETURN_IF_ERROR(htp_power_config_manager_.AddRpcControlLatency(*htp_power_configs.rpc_control_latency));
}

if (htp_power_configs.rpc_configs.has_value()) {
ORT_RETURN_IF_ERROR(SetRpcPowerConfigs(htp_power_config_id,
htp_power_configs.rpc_configs->rpc_control_latency,
htp_power_configs.rpc_configs->rpc_polling_time));
if (htp_power_configs.rpc_polling_time.has_value()) {
ORT_RETURN_IF_ERROR(htp_power_config_manager_.AddRpcPollingTime(*htp_power_configs.rpc_polling_time));
}
} else if (htp_power_configs.post_run_perf_mode.has_value()) {
ORT_RETURN_IF_ERROR(SetHtpPowerConfig(htp_power_config_id, *htp_power_configs.post_run_perf_mode));
ORT_RETURN_IF_ERROR(htp_power_config_manager_.AddHtpPerformanceMode(*htp_power_configs.post_run_perf_mode,
htp_power_config_id));
}

ORT_RETURN_IF_ERROR(htp_power_config_manager_.SetPowerConfig(htp_power_config_id, GetQnnInterface()));

return Status::OK();
}

Expand Down
22 changes: 6 additions & 16 deletions onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "core/providers/qnn/builder/op_builder_factory.h"
#include "core/providers/qnn/builder/qnn_context_mem_handle_manager.h"
#include "core/providers/qnn/builder/qnn_def.h"
#include "core/providers/qnn/builder/qnn_htp_power_config_manager.h"
#include "core/providers/qnn/builder/qnn_profile_serializer.h"
#include "core/providers/qnn/builder/qnn_node_group/qnn_node_group.h"

Expand Down Expand Up @@ -166,12 +167,10 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>

Status CreateHtpPowerCfgId(uint32_t deviceId, uint32_t coreId, uint32_t& htp_power_config_id);

Status SetHtpPowerConfig(uint32_t htp_power_config_client_id,
HtpPerformanceMode htp_performance_mode);

Status SetRpcPowerConfigs(uint32_t htp_power_config_client_id,
uint32_t rpc_control_latency,
uint32_t rpc_polling_time);
Status SetHtpPowerConfigs(uint32_t htp_power_config_client_id,
HtpPerformanceMode htp_performance_mode,
uint32_t rpc_polling_time,
uint32_t rpc_control_latency);

Status SetPerThreadHtpPowerConfigs(const std::thread::id& thread_id, bool pre_run);

Expand Down Expand Up @@ -310,16 +309,6 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>

bool IsDevicePropertySupported();

template <typename T>
std::vector<std::add_pointer_t<std::add_const_t<T>>> ObtainNullTermPtrVector(const std::vector<T>& vec) {
std::vector<std::add_pointer_t<std::add_const_t<T>>> ret;
for (auto& elem : vec) {
ret.push_back(&elem);
}
ret.push_back(nullptr);
return ret;
}

std::string GetBackendBuildId() {
char* backend_build_id{nullptr};
if (QNN_SUCCESS != qnn_interface_.backendGetBuildId((const char**)&backend_build_id)) {
Expand Down Expand Up @@ -432,6 +421,7 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>
QnnBackend_Config_t** backend_config_ = nullptr;
Qnn_LogHandle_t log_handle_ = nullptr;
Qnn_DeviceHandle_t device_handle_ = nullptr;
power::HtpPowerConfigManager htp_power_config_manager_;

// Map of Qnn_ContextHandle_t to QnnContextHandleRecord.
// The QnnContextHandleRecord has ownership of the Qnn_ContextHandle_t.
Expand Down
11 changes: 5 additions & 6 deletions onnxruntime/core/providers/qnn/builder/qnn_def.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,11 @@ enum class HtpPerformanceMode : uint8_t {
kHtpExtremePowerSaver,
};

typedef struct RpcPowerConfigs {
uint32_t rpc_control_latency = 0;
uint32_t rpc_polling_time = 0;
} RpcPowerConfigs_t;

typedef struct PerThreadHtpPowerConfigs {
std::optional<HtpPerformanceMode> pre_run_perf_mode;
std::optional<HtpPerformanceMode> post_run_perf_mode;
std::optional<RpcPowerConfigs_t> rpc_configs;
std::optional<uint32_t> rpc_control_latency;
std::optional<uint32_t> rpc_polling_time;

uint32_t power_config_id = 0;
} PerThreadHtpPowerConfigs_t;
Expand Down Expand Up @@ -126,6 +122,9 @@ constexpr const int kSleepMediumLatency = 1000;
constexpr const int kSleepHighLatency = 2000;
constexpr const int kDcvsDisable = 0;
constexpr const int kDcvsEnable = 1;
constexpr const uint32_t kDisableRpcPolling = 0;
constexpr const uint32_t kDisableRpcControlLatency = 0;
constexpr const uint32_t kMaxRpcPolling = 9999;

struct OnnxTensorInfo {
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(OnnxTensorInfo);
Expand Down
Loading
Loading