Skip to content

Commit cee25ba

Browse files
QNN-EP: DSPQueue Polling (microsoft#25361)
### Description Enable DSP queue polling when performance profile is burst
1 parent 9fc41c3 commit cee25ba

File tree

4 files changed

+45
-22
lines changed

4 files changed

+45
-22
lines changed

onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1426,13 +1426,33 @@ Status QnnBackendManager::SetHtpPowerConfig(uint32_t htp_power_config_client_id,
14261426
return Status::OK();
14271427
}
14281428

1429-
Status QnnBackendManager::SetRpcControlLatency(uint32_t htp_power_config_client_id,
1430-
uint32_t rpc_control_latency) {
1429+
Status QnnBackendManager::SetRpcPowerConfigs(uint32_t htp_power_config_client_id,
1430+
uint32_t rpc_control_latency,
1431+
uint32_t rpc_polling_time) {
14311432
// This function is called in QNN EP's OnRunStart() even if QNN backend setup failed and the model is assigned
14321433
// to a different EP. Therefore, we have to check that backend setup actually completed before trying to
14331434
// set RPC control latency. Otherwise, this causes a segfault because the QNN backend library is unloaded.
14341435
ORT_RETURN_IF_NOT(backend_setup_completed_, "Cannot set HTP RPC control latency if backend setup is not complete.");
1436+
1437+
constexpr int kNumRpcPollingPowerConfigs = 2;
1438+
std::vector<QnnHtpPerfInfrastructure_PowerConfig_t> rpc_power_configs;
1439+
rpc_power_configs.reserve(kNumRpcPollingPowerConfigs);
1440+
1441+
// Set rpc control latency here
14351442
if (rpc_control_latency != 0) {
1443+
auto& rpc_control_latency_cfg = rpc_power_configs.emplace_back();
1444+
rpc_control_latency_cfg.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_RPC_CONTROL_LATENCY;
1445+
rpc_control_latency_cfg.rpcControlLatencyConfig = rpc_control_latency;
1446+
}
1447+
1448+
// Note: v68 does not support rpc polling mode
1449+
if (rpc_polling_time != 0) {
1450+
auto& rpc_polling_time_cfg = rpc_power_configs.emplace_back();
1451+
rpc_polling_time_cfg.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_RPC_POLLING_TIME;
1452+
rpc_polling_time_cfg.rpcPollingTimeConfig = rpc_polling_time;
1453+
}
1454+
1455+
if (rpc_power_configs.size() > 0) {
14361456
QnnDevice_Infrastructure_t qnn_device_infra = nullptr;
14371457
auto status = qnn_interface_.deviceGetInfrastructure(&qnn_device_infra);
14381458
ORT_RETURN_IF(QNN_SUCCESS != status, "backendGetPerfInfrastructure failed.");
@@ -1442,15 +1462,6 @@ Status QnnBackendManager::SetRpcControlLatency(uint32_t htp_power_config_client_
14421462
"HTP infra type = ", htp_infra->infraType, ", which is not perf infra type.");
14431463
QnnHtpDevice_PerfInfrastructure_t& htp_perf_infra = htp_infra->perfInfra;
14441464

1445-
// Set rpc control latency here, but note that v68 doesn't support rpc polling mode.
1446-
constexpr int kNumRpcPollingPowerConfigs = 2;
1447-
std::vector<QnnHtpPerfInfrastructure_PowerConfig_t> rpc_power_configs(kNumRpcPollingPowerConfigs);
1448-
QnnHtpPerfInfrastructure_PowerConfig_t& rpc_control_latency_cfg = rpc_power_configs[0];
1449-
// v68 doesn't support this.
1450-
QnnHtpPerfInfrastructure_PowerConfig_t& rpc_polling_time = rpc_power_configs[1];
1451-
rpc_control_latency_cfg.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_RPC_CONTROL_LATENCY;
1452-
rpc_polling_time.option = QNN_HTP_PERF_INFRASTRUCTURE_POWER_CONFIGOPTION_RPC_POLLING_TIME;
1453-
rpc_control_latency_cfg.rpcControlLatencyConfig = rpc_control_latency;
14541465
std::vector<const QnnHtpPerfInfrastructure_PowerConfig_t*> perf_power_configs_ptr =
14551466
ObtainNullTermPtrVector(rpc_power_configs);
14561467
status = htp_perf_infra.setPowerConfig(htp_power_config_client_id, perf_power_configs_ptr.data());

onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,9 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>
159159
Status SetHtpPowerConfig(uint32_t htp_power_config_client_id,
160160
HtpPerformanceMode htp_performance_mode);
161161

162-
Status SetRpcControlLatency(uint32_t htp_power_config_client_id,
163-
uint32_t rpc_control_latency);
162+
Status SetRpcPowerConfigs(uint32_t htp_power_config_client_id,
163+
uint32_t rpc_control_latency,
164+
uint32_t rpc_polling_time);
164165

165166
const QNN_INTERFACE_VER_TYPE& GetQnnInterface() { return qnn_interface_; }
166167

onnxruntime/core/providers/qnn/qnn_execution_provider.cc

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1356,7 +1356,8 @@ QNNExecutionProvider::PerThreadContext::PerThreadContext(qnn::QnnBackendManager*
13561356
uint32_t device_id,
13571357
uint32_t core_id,
13581358
qnn::HtpPerformanceMode default_htp_performance_mode,
1359-
uint32_t default_rpc_control_latency)
1359+
uint32_t default_rpc_control_latency,
1360+
uint32_t default_rpc_polling_time)
13601361
: qnn_backend_manager_(qnn_backend_manager) {
13611362
Status rt = qnn_backend_manager_->CreateHtpPowerCfgId(device_id, core_id, htp_power_config_id_);
13621363
is_htp_power_config_id_valid_ = rt.IsOK();
@@ -1367,9 +1368,10 @@ QNNExecutionProvider::PerThreadContext::PerThreadContext(qnn::QnnBackendManager*
13671368
ORT_IGNORE_RETURN_VALUE(qnn_backend_manager_->SetHtpPowerConfig(htp_power_config_id_,
13681369
default_htp_performance_mode));
13691370
}
1370-
if (default_rpc_control_latency > 0) {
1371-
ORT_IGNORE_RETURN_VALUE(qnn_backend_manager_->SetRpcControlLatency(htp_power_config_id_,
1372-
default_rpc_control_latency));
1371+
if (default_rpc_control_latency > 0 || default_rpc_polling_time > 0) {
1372+
ORT_IGNORE_RETURN_VALUE(qnn_backend_manager_->SetRpcPowerConfigs(htp_power_config_id_,
1373+
default_rpc_control_latency,
1374+
default_rpc_polling_time));
13731375
}
13741376
}
13751377
}
@@ -1400,7 +1402,8 @@ QNNExecutionProvider::PerThreadContext& QNNExecutionProvider::GetPerThreadContex
14001402
if (context_state_.retired_context_pool.empty()) {
14011403
uint32_t core_id = 0;
14021404
context = std::make_shared<PerThreadContext>(qnn_backend_manager_.get(), device_id_, core_id,
1403-
default_htp_performance_mode_, default_rpc_control_latency_);
1405+
default_htp_performance_mode_, default_rpc_control_latency_,
1406+
default_rpc_polling_time_);
14041407
} else {
14051408
context = context_state_.retired_context_pool.back();
14061409
context_state_.retired_context_pool.pop_back();
@@ -1468,15 +1471,21 @@ Status QNNExecutionProvider::OnRunStart(const onnxruntime::RunOptions& run_optio
14681471
LOGS_DEFAULT(VERBOSE) << "rpc_control_latency: " << rpc_control_latency;
14691472
}
14701473

1474+
uint32_t rpc_polling_time = 0;
1475+
if (qnn::HtpPerformanceMode::kHtpBurst != htp_performance_mode) {
1476+
rpc_polling_time = 9999;
1477+
}
1478+
14711479
if (GetPerThreadContext().IsHtpPowerConfigIdValid()) {
14721480
if (qnn::HtpPerformanceMode::kHtpDefault != htp_performance_mode) {
14731481
ORT_RETURN_IF_ERROR(qnn_backend_manager_->SetHtpPowerConfig(GetPerThreadContext().GetHtpPowerConfigId(),
14741482
htp_performance_mode));
14751483
}
14761484

1477-
if (rpc_control_latency > 0) {
1478-
ORT_RETURN_IF_ERROR(qnn_backend_manager_->SetRpcControlLatency(GetPerThreadContext().GetHtpPowerConfigId(),
1479-
rpc_control_latency));
1485+
if (rpc_control_latency > 0 || rpc_polling_time > 0) {
1486+
ORT_RETURN_IF_ERROR(qnn_backend_manager_->SetRpcPowerConfigs(GetPerThreadContext().GetHtpPowerConfigId(),
1487+
rpc_control_latency,
1488+
rpc_polling_time));
14801489
}
14811490
}
14821491

onnxruntime/core/providers/qnn/qnn_execution_provider.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ class QNNExecutionProvider : public IExecutionProvider {
9696
uint32_t device_id_ = 0;
9797
qnn::HtpPerformanceMode default_htp_performance_mode_ = qnn::HtpPerformanceMode::kHtpDefault;
9898
uint32_t default_rpc_control_latency_ = 0;
99+
uint32_t default_rpc_polling_time_ = 0;
99100
bool enable_HTP_FP16_precision_ = true;
100101
bool share_ep_contexts_ = false;
101102
bool stop_share_ep_contexts_ = false;
@@ -116,7 +117,8 @@ class QNNExecutionProvider : public IExecutionProvider {
116117
PerThreadContext(qnn::QnnBackendManager* qnn_backend_manager,
117118
uint32_t device_id, uint32_t core_id,
118119
qnn::HtpPerformanceMode default_htp_performance_mode,
119-
uint32_t default_rpc_control_latency);
120+
uint32_t default_rpc_control_latency,
121+
uint32_t default_rpc_polling_time);
120122
~PerThreadContext();
121123
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(PerThreadContext);
122124

0 commit comments

Comments
 (0)