Skip to content

Commit f726dae

Browse files
committed
Ensure we have HNP node aliases
We captured the HNP's aliases in prte_process_info, but that happened _after_ we had already copied them to the HNP's node object. So when we then checked the node aliases, they were missing from that node. Ensure we capture the HNP's aliases on the node object. Simplify the check for local node by including the "localhost" and "127.0.0.1" aliases, being sure not to include them in the nidmap. Correct the check in dash-host for matching node names. Thanks to Alexey Novikov for the report Signed-off-by: Ralph Castain <[email protected]> (cherry picked from commit 8070277)
1 parent 88eca13 commit f726dae

File tree

7 files changed

+25
-16
lines changed

7 files changed

+25
-16
lines changed

src/mca/ess/base/ess_base_std_prted.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#include "src/event/event-internal.h"
4444
#include "src/hwloc/hwloc-internal.h"
4545
#include "src/pmix/pmix-internal.h"
46+
#include "src/util/pmix_if.h"
4647
#include "src/util/pmix_os_path.h"
4748
#include "src/util/pmix_environ.h"
4849

@@ -303,6 +304,10 @@ int prte_ess_base_prted_setup(void)
303304
goto error;
304305
}
305306

307+
/* add network aliases to our list of alias hostnames - must
308+
* wait until after we init PMIx before getting them */
309+
pmix_ifgetaliases(&prte_process_info.aliases);
310+
306311
/* Setup the communication infrastructure */
307312
if (PRTE_SUCCESS
308313
!= (ret = pmix_mca_base_framework_open(&prte_prtereachable_base_framework,

src/mca/ess/hnp/ess_hnp_module.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,8 +222,6 @@ static int rte_init(int argc, char **argv)
222222
node->daemon = proc;
223223
PRTE_FLAG_SET(node, PRTE_NODE_FLAG_DAEMON_LAUNCHED);
224224
node->state = PRTE_NODE_STATE_UP;
225-
/* get our aliases - will include all the interface aliases captured in prte_init */
226-
node->aliases = PMIX_ARGV_COPY_COMPAT(prte_process_info.aliases);
227225
/* record that the daemon job is running */
228226
jdata->num_procs = 1;
229227
jdata->state = PRTE_JOB_STATE_RUNNING;
@@ -253,6 +251,13 @@ static int rte_init(int argc, char **argv)
253251
goto error;
254252
}
255253

254+
/* add network aliases to our list of alias hostnames - must
255+
* wait until after we init PMIx before getting them */
256+
pmix_ifgetaliases(&prte_process_info.aliases);
257+
258+
/* get our aliases - will include all the interface aliases captured in prte_init */
259+
node->aliases = PMIX_ARGV_COPY_COMPAT(prte_process_info.aliases);
260+
256261
/* if we are using xml for output, put a start tag */
257262
if (prte_xml_output) {
258263
fprintf(stdout, "<%s>\n", prte_tool_basename);

src/runtime/prte_init.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -420,8 +420,6 @@ int prte_init(int *pargc, char ***pargv, prte_proc_type_t flags)
420420
error = "prte_ess_init";
421421
goto error;
422422
}
423-
/* add network aliases to our list of alias hostnames */
424-
pmix_ifgetaliases(&prte_process_info.aliases);
425423

426424
/* initialize the cache */
427425
prte_cache = PMIX_NEW(pmix_pointer_array_t);

src/tools/prted/prted.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -573,9 +573,9 @@ int main(int argc, char *argv[])
573573

574574
/* include any non-loopback aliases for this node */
575575
for (n = 0; NULL != prte_process_info.aliases[n]; n++) {
576-
if (0 != strcmp(prte_process_info.aliases[n], "localhost")
577-
&& 0 != strcmp(prte_process_info.aliases[n], "127.0.0.1")
578-
&& 0 != strcmp(prte_process_info.aliases[n], prte_process_info.nodename)) {
576+
if (0 != strcmp(prte_process_info.aliases[n], "localhost") &&
577+
0 != strcmp(prte_process_info.aliases[n], "127.0.0.1") &&
578+
0 != strcmp(prte_process_info.aliases[n], prte_process_info.nodename)) {
579579
PMIX_ARGV_APPEND_NOSIZE_COMPAT(&nonlocal, prte_process_info.aliases[n]);
580580
}
581581
}

src/util/dash_host/dash_host.c

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,6 @@ static bool quickmatch(prte_node_t *nd, char *name)
5050
if (0 == strcmp(nd->name, name)) {
5151
return true;
5252
}
53-
if (0 == strcmp(nd->name, prte_process_info.nodename) &&
54-
(0 == strcmp(name, "localhost") ||
55-
0 == strcmp(name, "127.0.0.1"))) {
56-
return true;
57-
}
5853
if (NULL != nd->aliases) {
5954
for (n=0; NULL != nd->aliases[n]; n++) {
6055
if (0 == strcmp(nd->aliases[n], name)) {

src/util/nidmap.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
* Copyright (c) 2020 Cisco Systems, Inc. All rights reserved
66
* Copyright (c) 2020 Triad National Security, LLC. All rights
77
* reserved.
8-
* Copyright (c) 2021-2023 Nanook Consulting. All rights reserved.
8+
* Copyright (c) 2021-2025 Nanook Consulting All rights reserved.
99
* $COPYRIGHT$
1010
*
1111
* Additional copyrights may follow
@@ -94,6 +94,11 @@ int prte_util_nidmap_create(pmix_pointer_array_t *pool, pmix_data_buffer_t *buff
9494
als = NULL;
9595
if (NULL != nptr->aliases) {
9696
for (m=0; NULL != nptr->aliases[m]; m++) {
97+
// skip any localhost entries
98+
if (0 == strcmp(nptr->aliases[m], "localhost") ||
99+
0 == strcmp(nptr->aliases[m], "127.0.0.1")) {
100+
continue;
101+
}
97102
PMIX_ARGV_APPEND_NOSIZE_COMPAT(&als, nptr->aliases[m]);
98103
}
99104
raw = PMIX_ARGV_JOIN_COMPAT(als, ',');

src/util/proc_info.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,15 +153,16 @@ void prte_setup_hostname(void)
153153
}
154154
}
155155

156+
// add the localhost names
157+
PMIX_ARGV_APPEND_UNIQUE_COMPAT(&prte_process_info.aliases, "localhost");
158+
PMIX_ARGV_APPEND_UNIQUE_COMPAT(&prte_process_info.aliases, "127.0.0.1");
156159
}
157160

158161
bool prte_check_host_is_local(const char *name)
159162
{
160163
int i;
161164

162-
if (0 == strcmp(name, prte_process_info.nodename) ||
163-
0 == strcmp(name, "localhost") ||
164-
0 == strcmp(name, "127.0.0.1")) {
165+
if (0 == strcmp(name, prte_process_info.nodename)) {
165166
return true;
166167
}
167168

0 commit comments

Comments
 (0)