From be39f1be69ed07709a9778ced5583bfde288e48f Mon Sep 17 00:00:00 2001 From: Wenduo Wang Date: Mon, 25 Dec 2023 17:53:04 +0000 Subject: [PATCH] coll/libnbc/ialltoallv: skip send/recv 0-byte data As per MPI specification the amount of data sent must be equal to the amount of data received for each communication pair, and therefore both count and datatype size should be accounted for to determine if the data is 0-byte and therefore skippable. Signed-off-by: Wenduo Wang --- ompi/mca/coll/libnbc/nbc_ialltoallv.c | 59 ++++++++++++++++++--------- 1 file changed, 39 insertions(+), 20 deletions(-) diff --git a/ompi/mca/coll/libnbc/nbc_ialltoallv.c b/ompi/mca/coll/libnbc/nbc_ialltoallv.c index a52c2675acf..76f1d5f6364 100644 --- a/ompi/mca/coll/libnbc/nbc_ialltoallv.c +++ b/ompi/mca/coll/libnbc/nbc_ialltoallv.c @@ -48,6 +48,7 @@ static int nbc_alltoallv_init(const void* sendbuf, const int *sendcounts, const mca_coll_base_module_t *module, bool persistent) { int rank, p, res; + size_t sndsz, rcvsz; MPI_Aint sndext, rcvext; NBC_Schedule *schedule; char *rbuf, *sbuf, inplace; @@ -60,6 +61,7 @@ static int nbc_alltoallv_init(const void* sendbuf, const int *sendcounts, const rank = ompi_comm_rank (comm); p = ompi_comm_size (comm); + ompi_datatype_type_size(recvtype, &rcvsz); res = ompi_datatype_type_extent (recvtype, &rcvext); if (MPI_SUCCESS != res) { NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); @@ -92,7 +94,9 @@ static int nbc_alltoallv_init(const void* sendbuf, const int *sendcounts, const sendcounts = recvcounts; sdispls = rdispls; sndext = rcvext; + sndsz = rcvsz; } else { + ompi_datatype_type_size(sendtype, &sndsz); res = ompi_datatype_type_extent (sendtype, &sndext); if (MPI_SUCCESS != res) { NBC_Error("MPI Error in ompi_datatype_type_extent() (%i)", res); @@ -100,14 +104,19 @@ static int nbc_alltoallv_init(const void* sendbuf, const int *sendcounts, const } } + if (0 == sndsz || 0 == rcvsz) { + /* Nothing to exchange */ + ompi_coll_base_nbc_reserve_tags(comm, 1); + return nbc_get_noop_request(persistent, request); + } + schedule = OBJ_NEW(NBC_Schedule); if (OPAL_UNLIKELY(NULL == schedule)) { free(tmpbuf); return OMPI_ERR_OUT_OF_RESOURCE; } - - if (!inplace && sendcounts[rank] != 0) { + if (!inplace && 0 < sendcounts[rank]) { rbuf = (char *) recvbuf + rdispls[rank] * rcvext; sbuf = (char *) sendbuf + sdispls[rank] * sndext; res = NBC_Sched_copy (sbuf, false, sendcounts[rank], sendtype, @@ -177,10 +186,18 @@ static int nbc_alltoallv_inter_init (const void* sendbuf, const int *sendcounts, mca_coll_base_module_t *module, bool persistent) { int res, rsize; + size_t sndsz, rcvsz; MPI_Aint sndext, rcvext; NBC_Schedule *schedule; ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module; + ompi_datatype_type_size(sendtype, &sndsz); + ompi_datatype_type_size(recvtype, &rcvsz); + if (0 == sndsz || 0 == rcvsz) { + /* Nothing to exchange */ + ompi_coll_base_nbc_reserve_tags(comm, 1); + return nbc_get_noop_request(persistent, request); + } res = ompi_datatype_type_extent(sendtype, &sndext); if (MPI_SUCCESS != res) { @@ -203,7 +220,7 @@ static int nbc_alltoallv_inter_init (const void* sendbuf, const int *sendcounts, for (int i = 0; i < rsize; i++) { /* post all sends */ - if (sendcounts[i] != 0) { + if (0 < sendcounts[i]) { char *sbuf = (char *) sendbuf + sdispls[i] * sndext; res = NBC_Sched_send (sbuf, false, sendcounts[i], sendtype, i, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { @@ -212,7 +229,7 @@ static int nbc_alltoallv_inter_init (const void* sendbuf, const int *sendcounts, } } /* post all receives */ - if (recvcounts[i] != 0) { + if (0 < recvcounts[i]) { char *rbuf = (char *) recvbuf + rdispls[i] * rcvext; res = NBC_Sched_recv (rbuf, false, recvcounts[i], recvtype, i, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { @@ -272,7 +289,7 @@ static inline int a2av_sched_linear(int rank, int p, NBC_Schedule *schedule, } /* post send */ - if (sendcounts[i] != 0) { + if (0 < sendcounts[i]) { char *sbuf = ((char *) sendbuf) + (sdispls[i] * sndext); res = NBC_Sched_send(sbuf, false, sendcounts[i], sendtype, i, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { @@ -281,7 +298,7 @@ static inline int a2av_sched_linear(int rank, int p, NBC_Schedule *schedule, } /* post receive */ - if (recvcounts[i] != 0) { + if (0 < recvcounts[i]) { char *rbuf = ((char *) recvbuf) + (rdispls[i] * rcvext); res = NBC_Sched_recv(rbuf, false, recvcounts[i], recvtype, i, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { @@ -306,7 +323,7 @@ static inline int a2av_sched_pairwise(int rank, int p, NBC_Schedule *schedule, int rcvpeer = (rank + p - i) %p; /* post send */ - if (sendcounts[sndpeer] != 0) { + if (0 < sendcounts[sndpeer]) { char *sbuf = ((char *) sendbuf) + (sdispls[sndpeer] * sndext); res = NBC_Sched_send(sbuf, false, sendcounts[sndpeer], sendtype, sndpeer, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { @@ -315,7 +332,7 @@ static inline int a2av_sched_pairwise(int rank, int p, NBC_Schedule *schedule, } /* post receive */ - if (recvcounts[rcvpeer] != 0) { + if (0 < recvcounts[rcvpeer]) { char *rbuf = ((char *) recvbuf) + (rdispls[rcvpeer] * rcvext); res = NBC_Sched_recv(rbuf, false, recvcounts[rcvpeer], recvtype, rcvpeer, schedule, true); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { @@ -338,7 +355,7 @@ static inline int a2av_sched_inplace(int rank, int p, NBC_Schedule *schedule, char *sbuf = (char *) buf + displs[speer] * ext; char *rbuf = (char *) buf + displs[rpeer] * ext; - if (0 != counts[rpeer]) { + if (0 < counts[rpeer]) { res = NBC_Sched_copy (rbuf, false, counts[rpeer], type, (void *)(-gap), true, counts[rpeer], type, schedule, true); @@ -346,26 +363,26 @@ static inline int a2av_sched_inplace(int rank, int p, NBC_Schedule *schedule, return res; } } - if (0 != counts[speer]) { + if (0 < counts[speer]) { res = NBC_Sched_send (sbuf, false , counts[speer], type, speer, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { return res; } } - if (0 != counts[rpeer]) { + if (0 < counts[rpeer]) { res = NBC_Sched_recv (rbuf, false , counts[rpeer], type, rpeer, schedule, true); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { return res; } } - if (0 != counts[rpeer]) { + if (0 < counts[rpeer]) { res = NBC_Sched_send ((void *)(-gap), true, counts[rpeer], type, rpeer, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { return res; } } - if (0 != counts[speer]) { + if (0 < counts[speer]) { res = NBC_Sched_recv (sbuf, false, counts[speer], type, speer, schedule, true); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { return res; @@ -374,15 +391,17 @@ static inline int a2av_sched_inplace(int rank, int p, NBC_Schedule *schedule, } if (0 == (p%2)) { int peer = (rank + p/2) % p; - char *tbuf = (char *) buf + displs[peer] * ext; - res = NBC_Sched_copy (tbuf, false, counts[peer], type, - (void *)(-gap), true, counts[peer], type, - schedule, true); - if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { - return res; + + if (0 < counts[peer]) { + res = NBC_Sched_copy(tbuf, false, counts[peer], type, (void *) (-gap), true, counts[peer], + type, schedule, true); + if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { + return res; + } } - if (0 != counts[peer]) { + + if (0 < counts[peer]) { res = NBC_Sched_send ((void *)(-gap), true , counts[peer], type, peer, schedule, false); if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { return res;