From 3071892b2a59efffbb072665a9574b0a9d0486f0 Mon Sep 17 00:00:00 2001 From: Ilia Yastrebov Date: Tue, 6 Feb 2024 17:27:55 +0200 Subject: [PATCH] ompi/oshmem/spml/ucx: Fix cleanup when disconnected: avoid double free Signed-off-by: Ilia Yastrebov --- oshmem/mca/spml/ucx/spml_ucx.h | 1 + oshmem/mca/spml/ucx/spml_ucx_component.c | 9 ++++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/oshmem/mca/spml/ucx/spml_ucx.h b/oshmem/mca/spml/ucx/spml_ucx.h index 2fec131ad2d..938129dd00b 100644 --- a/oshmem/mca/spml/ucx/spml_ucx.h +++ b/oshmem/mca/spml/ucx/spml_ucx.h @@ -43,6 +43,7 @@ BEGIN_C_DECLS #define SPML_UCX_ASSERT MCA_COMMON_UCX_ASSERT #define SPML_UCX_ERROR MCA_COMMON_UCX_ERROR +#define SPML_UCX_WARN MCA_COMMON_UCX_WARN #define SPML_UCX_VERBOSE MCA_COMMON_UCX_VERBOSE #define SPML_UCX_TRANSP_IDX 0 #define SPML_UCX_TRANSP_CNT 1 diff --git a/oshmem/mca/spml/ucx/spml_ucx_component.c b/oshmem/mca/spml/ucx/spml_ucx_component.c index e44a800a8be..8747b39fb8d 100644 --- a/oshmem/mca/spml/ucx/spml_ucx_component.c +++ b/oshmem/mca/spml/ucx/spml_ucx_component.c @@ -458,7 +458,8 @@ static void mca_spml_ucx_ctx_fini(mca_spml_ucx_ctx_t *ctx) static int mca_spml_ucx_component_fini(void) { - int fenced = 0, i; + volatile int fenced = 0; + int i; int ret = OSHMEM_SUCCESS; mca_spml_ucx_ctx_t *ctx; @@ -491,8 +492,10 @@ static int mca_spml_ucx_component_fini(void) ret = opal_common_ucx_mca_pmix_fence_nb(&fenced); - if (OPAL_SUCCESS != ret) { - return ret; + if (ret != PMIX_SUCCESS) { + SPML_UCX_WARN("pmix fence failed: %s", PMIx_Error_string(ret)); + /* In case of pmix fence failure just continue cleanup */ + fenced = 1; } while (!fenced) {