Skip to content

Commit 7bcfe9f

Browse files
samwebsteradrianlizarraga
authored andcommitted
[QNN EP] Make sure everything gets cleaned up (#23275)
### Description Always make sure resources and callbacks are cleaned up ### Motivation and Context We've seen problems where the log callback isn't deregistered which can lead to crashes --------- Co-authored-by: Adrian Lizarraga <adrianlm2@gmail.com>
1 parent 0a6585e commit 7bcfe9f

2 files changed

Lines changed: 67 additions & 39 deletions

File tree

onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc

Lines changed: 61 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ QnnLog_Level_t QnnBackendManager::MapOrtSeverityToQNNLogLevel(logging::Severity
306306
}
307307

308308
Status QnnBackendManager::ResetQnnLogLevel(std::optional<logging::Severity> ort_log_level) {
309-
std::lock_guard<std::mutex> lock(logger_mutex_);
309+
std::lock_guard<std::recursive_mutex> lock(logger_recursive_mutex_);
310310
if (!backend_setup_completed_ || logger_ == nullptr) {
311311
return Status::OK();
312312
}
@@ -796,50 +796,78 @@ Status QnnBackendManager::LoadCachedQnnContextFromBuffer(char* buffer, uint64_t
796796
Status QnnBackendManager::SetupBackend(const logging::Logger& logger,
797797
bool load_from_cached_context,
798798
bool need_load_system_lib) {
799-
std::lock_guard<std::mutex> lock(logger_mutex_);
799+
std::lock_guard<std::recursive_mutex> lock(logger_recursive_mutex_);
800800
if (backend_setup_completed_) {
801801
LOGS(logger, VERBOSE) << "Backend setup already!";
802802
return Status::OK();
803803
}
804804

805+
Status status = Status::OK();
805806
if (qnn_saver_path_.empty()) {
806-
ORT_RETURN_IF_ERROR(LoadBackend());
807+
status = LoadBackend();
807808
} else {
808-
ORT_RETURN_IF_ERROR(LoadQnnSaverBackend());
809+
status = LoadQnnSaverBackend();
810+
}
811+
if (status.IsOK()) {
812+
LOGS(logger, VERBOSE) << "LoadBackend succeed.";
809813
}
810814

811-
LOGS(logger, VERBOSE) << "LoadBackend succeed.";
812-
813-
if (load_from_cached_context || need_load_system_lib) {
814-
ORT_RETURN_IF_ERROR(LoadQnnSystemLib());
815+
if (status.IsOK() && (load_from_cached_context || need_load_system_lib)) {
816+
status = LoadQnnSystemLib();
815817
}
816818

817-
sdk_build_version_ = GetBackendBuildId();
818-
LOGS(logger, VERBOSE) << "Backend build version: "
819-
<< sdk_build_version_;
819+
if (status.IsOK()) {
820+
sdk_build_version_ = GetBackendBuildId();
821+
LOGS(logger, VERBOSE) << "Backend build version: "
822+
<< sdk_build_version_;
823+
}
820824

821-
ORT_RETURN_IF_ERROR(InitializeQnnLog(logger));
822-
LOGS(logger, VERBOSE) << "SetLogger succeed.";
825+
if (status.IsOK()) {
826+
status = InitializeQnnLog(logger);
827+
}
828+
if (status.IsOK()) {
829+
LOGS(logger, VERBOSE) << "SetLogger succeed.";
830+
}
823831

824-
ORT_RETURN_IF_ERROR(InitializeBackend());
825-
LOGS(logger, VERBOSE) << "InitializeBackend succeed.";
832+
if (status.IsOK()) {
833+
status = InitializeBackend();
834+
}
835+
if (status.IsOK()) {
836+
LOGS(logger, VERBOSE) << "InitializeBackend succeed.";
837+
}
826838

827-
ORT_RETURN_IF_ERROR(CreateDevice());
828-
LOGS(logger, VERBOSE) << "CreateDevice succeed.";
839+
if (status.IsOK()) {
840+
status = CreateDevice();
841+
}
842+
if (status.IsOK()) {
843+
LOGS(logger, VERBOSE) << "CreateDevice succeed.";
844+
}
829845

830-
ORT_RETURN_IF_ERROR(InitializeProfiling());
831-
LOGS(logger, VERBOSE) << "InitializeProfiling succeed.";
846+
if (status.IsOK()) {
847+
status = InitializeProfiling();
848+
}
849+
if (status.IsOK()) {
850+
LOGS(logger, VERBOSE) << "InitializeProfiling succeed.";
851+
}
832852

833853
if (!load_from_cached_context) {
834-
ORT_RETURN_IF_ERROR(CreateContext());
835-
LOGS(logger, VERBOSE) << "CreateContext succeed.";
854+
if (status.IsOK()) {
855+
status = CreateContext();
856+
}
857+
if (status.IsOK()) {
858+
LOGS(logger, VERBOSE) << "CreateContext succeed.";
859+
}
836860
}
837861

838-
LOGS(logger, VERBOSE) << "QNN SetupBackend succeed";
839-
840-
backend_setup_completed_ = true;
862+
if (status.IsOK()) {
863+
LOGS(logger, VERBOSE) << "QNN SetupBackend succeed";
864+
backend_setup_completed_ = true;
865+
} else {
866+
LOGS_DEFAULT(WARNING) << "Failed to setup so cleaning up";
867+
ReleaseResources();
868+
}
841869

842-
return Status::OK();
870+
return status;
843871
}
844872

845873
Status QnnBackendManager::CreateHtpPowerCfgId(uint32_t device_id, uint32_t core_id, uint32_t& htp_power_config_id) {
@@ -1045,15 +1073,15 @@ Status QnnBackendManager::DestroyHTPPowerConfigID(uint32_t htp_power_config_id)
10451073
}
10461074

10471075
Status QnnBackendManager::TerminateQnnLog() {
1048-
std::lock_guard<std::mutex> lock(logger_mutex_);
1076+
std::lock_guard<std::recursive_mutex> lock(logger_recursive_mutex_);
10491077
if (logger_ == nullptr) {
10501078
return Status::OK();
10511079
}
10521080

10531081
if (nullptr != qnn_interface_.logFree && nullptr != log_handle_) {
10541082
auto ret_val = qnn_interface_.logFree(log_handle_);
10551083

1056-
// Reset QNN log handle to nullptr so other threads that are waiting on logger_mutex_ know it was freed.
1084+
// Reset QNN log handle to nullptr so other threads that are waiting on logger_recursive_mutex_ know it was freed.
10571085
log_handle_ = nullptr;
10581086
ORT_RETURN_IF(QNN_SUCCESS != ret_val,
10591087
"Unable to terminate logging in the backend.");
@@ -1069,33 +1097,33 @@ void QnnBackendManager::ReleaseResources() {
10691097

10701098
auto result = ReleaseContext();
10711099
if (Status::OK() != result) {
1072-
ORT_THROW("Failed to ReleaseContext.");
1100+
LOGS_DEFAULT(ERROR) << "Failed to ReleaseContext.";
10731101
}
10741102

10751103
result = ReleaseProfilehandle();
10761104
if (Status::OK() != result) {
1077-
ORT_THROW("Failed to ReleaseProfilehandle.");
1105+
LOGS_DEFAULT(ERROR) << "Failed to ReleaseProfilehandle.";
10781106
}
10791107

10801108
result = ReleaseDevice();
10811109
if (Status::OK() != result) {
1082-
ORT_THROW("Failed to ReleaseDevice.");
1110+
LOGS_DEFAULT(ERROR) << "Failed to ReleaseDevice.";
10831111
}
10841112

10851113
result = ShutdownBackend();
10861114
if (Status::OK() != result) {
1087-
ORT_THROW("Failed to ShutdownBackend.");
1115+
LOGS_DEFAULT(ERROR) << "Failed to ShutdownBackend.";
10881116
}
10891117

10901118
result = TerminateQnnLog();
10911119
if (Status::OK() != result) {
1092-
ORT_THROW("Failed to TerminateQnnLog.");
1120+
LOGS_DEFAULT(ERROR) << "Failed to TerminateQnnLog.";
10931121
}
10941122

10951123
if (backend_lib_handle_) {
10961124
result = UnloadLib(backend_lib_handle_);
10971125
if (Status::OK() != result) {
1098-
ORT_THROW("Failed to unload backend library.");
1126+
LOGS_DEFAULT(ERROR) << "Failed to unload backend library.";
10991127
}
11001128
}
11011129

onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ class QnnBackendManager {
9797
int64_t max_spill_fill_size);
9898

9999
// Initializes handles to QNN resources (device, logger, etc.).
100-
// NOTE: This function locks the internal `logger_mutex_`.
100+
// NOTE: This function locks the internal `logger_recursive_mutex_`.
101101
Status SetupBackend(const logging::Logger& logger, bool load_from_cached_context, bool need_load_system_lib);
102102

103103
Status CreateHtpPowerCfgId(uint32_t deviceId, uint32_t coreId, uint32_t& htp_power_config_id);
@@ -125,7 +125,7 @@ class QnnBackendManager {
125125

126126
// Resets the QNN log level to the given ORT log level or to the default log level if the argument is
127127
// std::nullopt.
128-
// NOTE: This function locks the internal `logger_mutex_`.
128+
// NOTE: This function locks the internal `logger_recursive_mutex_`.
129129
Status ResetQnnLogLevel(std::optional<logging::Severity> ort_log_level = std::nullopt);
130130

131131
Status ExtractBackendProfilingInfo();
@@ -150,15 +150,15 @@ class QnnBackendManager {
150150

151151
private:
152152
// Sets the ORT logger and creates a corresponding QNN logger with the same log level.
153-
// NOTE: caller must lock the `logger_mutex_` before calling this function.
153+
// NOTE: caller must lock the `logger_recursive_mutex_` before calling this function.
154154
Status InitializeQnnLog(const logging::Logger& logger);
155155

156156
// Terminate logging in the backend
157-
// NOTE: This function locks the internal `logger_mutex_`.
157+
// NOTE: This function locks the internal `logger_recursive_mutex_`.
158158
Status TerminateQnnLog();
159159

160160
// Releases all QNN resources. Called in the destructor.
161-
// NOTE: This function indirectly locks the internal `logger_mutex_` via nested function calls.
161+
// NOTE: This function indirectly locks the internal `logger_recursive_mutex_` via nested function calls.
162162
void ReleaseResources();
163163

164164
void* LoadLib(const char* file_name, int flags, std::string& error_msg);
@@ -232,7 +232,7 @@ class QnnBackendManager {
232232

233233
private:
234234
const std::string backend_path_;
235-
std::mutex logger_mutex_;
235+
std::recursive_mutex logger_recursive_mutex_;
236236
const logging::Logger* logger_ = nullptr;
237237
QNN_INTERFACE_VER_TYPE qnn_interface_ = QNN_INTERFACE_VER_TYPE_INIT;
238238
QNN_SYSTEM_INTERFACE_VER_TYPE qnn_sys_interface_ = QNN_SYSTEM_INTERFACE_VER_TYPE_INIT;

0 commit comments

Comments
 (0)