forked from open-mpi/ompi
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
SHMEM_LOCKS: MCS implementation of SHMEM LOCKS Adding MCS algorithm-based implementation for shmem_locks to improve performance for large scale SHMEM applications using locks. MCS lock is now the default algorithm, use the following MCA parameter to disable. --mca oshmem_enable_mcs_lock 0 to disable mcs locks and revert to default ticket locking. --mca oshmem_api_verbose 10 for debug information on shmem_locks. Signed-off-by: Vishwanath Venkatesan <[email protected]>
- Loading branch information
Vishwanath Venkatesan
committed
Oct 18, 2023
1 parent
386b88d
commit 4cecc52
Showing
8 changed files
with
298 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,239 @@ | ||
/* | ||
* Copyright (c) 2023 NVIDIA Corporation. | ||
* All rights reserved. | ||
* | ||
* $COPYRIGHT$ | ||
* | ||
* Additional copyrights may follow | ||
* | ||
* $HEADER$ | ||
*/ | ||
|
||
#include "oshmem_config.h" | ||
|
||
#include "oshmem/constants.h" | ||
#include "oshmem/include/shmem.h" | ||
#include "oshmem/runtime/params.h" | ||
#include "oshmem/runtime/runtime.h" | ||
#include <stdlib.h> | ||
#include <memory.h> | ||
|
||
#include "oshmem/shmem/shmem_api_logger.h" | ||
#include "oshmem/shmem/shmem_lock.h" | ||
#include "oshmem/mca/memheap/memheap.h" | ||
#include "oshmem/mca/memheap/base/base.h" | ||
#include "oshmem/mca/atomic/atomic.h" | ||
|
||
#define OPAL_BITWISE_SIZEOF_LONG (SIZEOF_LONG * 8) | ||
|
||
|
||
/** Use basic MCS distributed lock algorithm for lock */ | ||
struct shmem_mcs_lock { | ||
/** has meaning only on MCSQ_TAIL OWNER */ | ||
int tail; | ||
/** It has meaning on all PEs */ | ||
/** The next pointer is a combination of the PE ID and wait signal */ | ||
int next; | ||
}; | ||
typedef struct shmem_mcs_lock shmem_mcs_lock_t; | ||
|
||
#define SHMEM_MCSL_TAIL_OWNER(lock_ptr)\ | ||
(((uintptr_t)(lock_ptr) / sizeof(long)) % shmem_n_pes()) | ||
|
||
#define SHMEM_MCSL_NEXT_MASK 0x7FFFFFFFU | ||
#define SHMEM_MCSL_SIGNAL_MASK 0x80000000U /** Wait signal mask */ | ||
#define SHMEM_MCSL_NEXT(lock_val) ((lock_val) & SHMEM_MCSL_NEXT_MASK) | ||
/** Improve readability */ | ||
#define SHMEM_MCSL_GET_PE(tail_val) ((tail_val) & SHMEM_MCSL_NEXT_MASK) | ||
#define SHMEM_MCSL_SIGNAL(lock_val) ((lock_val) & SHMEM_MCSL_SIGNAL_MASK) | ||
#define SHMEM_MCSL_SET_SIGNAL(lock_val) ((lock_val) | SHMEM_MCSL_SIGNAL_MASK) | ||
|
||
void | ||
_shmem_mcs_set_lock(long *lockp) | ||
{ | ||
shmem_mcs_lock_t *lock = (shmem_mcs_lock_t *) lockp; | ||
int mcs_tail_owner = SHMEM_MCSL_TAIL_OWNER(lock); | ||
int new_tail_req = 0; | ||
int *tail = &(lock->tail); | ||
int *next = &(lock->next); | ||
int my_pe = shmem_my_pe(); | ||
int curr = 0; | ||
int out_value = 0; | ||
int prev_tail = 0; | ||
int prev_tailpe = 0; | ||
int tval = 0; | ||
int tmp_val = 0; | ||
int retv = 0; | ||
uint64_t value_tmp = 0; | ||
|
||
RUNTIME_CHECK_INIT(); | ||
/** | ||
* Initializing next pointer to next mask | ||
* Done atomically to avoid races as NEXT pointer | ||
* can be modified by other PEs while acquiring or | ||
* releasing it. | ||
*/ | ||
/** | ||
* Can make this to be shmem_atomic_set to be safe | ||
* in non-cc architectures | ||
* has an impact on performance | ||
*/ | ||
value_tmp = SHMEM_MCSL_NEXT_MASK; | ||
out_value = SHMEM_MCSL_NEXT_MASK; | ||
retv = MCA_ATOMIC_CALL(swap(oshmem_ctx_default, (void*)next, | ||
(void*)&out_value, value_tmp, | ||
sizeof(int), my_pe)); | ||
RUNTIME_CHECK_RC(retv); | ||
MCA_SPML_CALL(quiet(oshmem_ctx_default)); | ||
|
||
/** Signal for setting lock */ | ||
new_tail_req = SHMEM_MCSL_SET_SIGNAL(my_pe); | ||
/** | ||
* Swap and make me the new tail and update in tail owner | ||
* Get the previous tail PE. | ||
*/ | ||
retv = MCA_ATOMIC_CALL(swap(oshmem_ctx_default, (void *)tail, | ||
(void*)&prev_tail, | ||
OSHMEM_ATOMIC_PTR_2_INT(&new_tail_req, | ||
sizeof(new_tail_req)), | ||
sizeof(int), mcs_tail_owner)); | ||
RUNTIME_CHECK_RC(retv); | ||
|
||
prev_tailpe = SHMEM_MCSL_GET_PE(prev_tail); | ||
if (SHMEM_MCSL_SIGNAL(prev_tail)) { | ||
/** | ||
* Someone else has got the lock before this PE | ||
* Adding this PE to the previous tail PE's Next pointer | ||
* Substract the SIGNAL Bit to avoid changing it. | ||
*/ | ||
tmp_val = my_pe - SHMEM_MCSL_NEXT_MASK; | ||
retv = MCA_ATOMIC_CALL(add(oshmem_ctx_default, (void*)next, tmp_val, | ||
sizeof(int), prev_tailpe)); | ||
RUNTIME_CHECK_RC(retv); | ||
/** | ||
* This value to be changed eventually by predecessor | ||
* when its lock is released. | ||
* Need to be done atomically to avoid any races where | ||
* next pointer is modified by another PE acquiring or | ||
* releasing this. | ||
*/ | ||
retv = MCA_ATOMIC_CALL(add(oshmem_ctx_default, (void *)next, | ||
SHMEM_MCSL_SIGNAL_MASK, sizeof(int), | ||
my_pe)); | ||
RUNTIME_CHECK_RC(retv); | ||
MCA_SPML_CALL(quiet(oshmem_ctx_default)); | ||
/** Wait for predecessor release lock to this PE signal to false. */ | ||
retv = MCA_ATOMIC_CALL(fadd(oshmem_ctx_default, (void*)next, | ||
(void*)&curr, tval, sizeof(int), my_pe)); | ||
RUNTIME_CHECK_RC(retv); | ||
|
||
while (SHMEM_MCSL_SIGNAL(curr)) { | ||
retv = MCA_SPML_CALL(wait((void*)next, SHMEM_CMP_NE, | ||
(void*)&curr, SHMEM_INT)); | ||
RUNTIME_CHECK_RC(retv); | ||
retv = MCA_ATOMIC_CALL(fadd(oshmem_ctx_default, (void*)next, | ||
(void*)&curr, tval, sizeof(int), | ||
my_pe)); | ||
RUNTIME_CHECK_RC(retv); | ||
} | ||
} | ||
/** else.. this pe has got the lock as no one else had it */ | ||
} | ||
|
||
void | ||
_shmem_mcs_clear_lock(long *lockp) | ||
{ | ||
shmem_mcs_lock_t *lock = (shmem_mcs_lock_t *) lockp; | ||
int mcs_tail_owner = SHMEM_MCSL_TAIL_OWNER(lock); | ||
int *tail = &(lock->tail); | ||
int *next = &(lock->next); | ||
int my_pe = shmem_my_pe(); | ||
int next_value = 0; | ||
int swap_cond = 0; | ||
int prev_value = 0; | ||
int tval = 0; | ||
int val_tmp = 0; | ||
int nmask = 0; | ||
int a_val = 0; | ||
int retv = 0; | ||
|
||
/** | ||
* Can make atomic fetch to be safe in non-cc architectures | ||
* Has impact on performance | ||
*/ | ||
retv = MCA_ATOMIC_CALL(fadd(oshmem_ctx_default, (void*)next, | ||
(void*)&next_value, tval, sizeof(int), | ||
my_pe)); | ||
RUNTIME_CHECK_RC(retv); | ||
MCA_SPML_CALL(quiet(oshmem_ctx_default)); | ||
|
||
if (next_value == SHMEM_MCSL_NEXT_MASK) { | ||
swap_cond = SHMEM_MCSL_SET_SIGNAL(my_pe); | ||
retv = MCA_ATOMIC_CALL(cswap(oshmem_ctx_default, | ||
(void *)tail, (uint64_t *)&(prev_value), | ||
OSHMEM_ATOMIC_PTR_2_INT(&swap_cond, | ||
sizeof(swap_cond)), | ||
OSHMEM_ATOMIC_PTR_2_INT(&val_tmp, | ||
sizeof(val_tmp)), sizeof(int), | ||
mcs_tail_owner)); | ||
RUNTIME_CHECK_RC(retv); | ||
|
||
/** I am the tail.. and lock is released */ | ||
if (prev_value == swap_cond) { | ||
return; | ||
} | ||
/** | ||
* I am not the tail, another PE maybe racing to acquire lock, | ||
* let them complete setting themselves as our next | ||
*/ | ||
nmask = SHMEM_MCSL_NEXT_MASK; | ||
while(next_value == nmask) { | ||
retv = MCA_SPML_CALL(wait((void*)next, SHMEM_CMP_NE, | ||
(void*)&nmask, SHMEM_INT)); | ||
RUNTIME_CHECK_RC(retv); | ||
retv = MCA_ATOMIC_CALL(fadd(oshmem_ctx_default, (void*)next, | ||
(void*)&next_value, tval, | ||
sizeof(int), my_pe)); | ||
RUNTIME_CHECK_RC(retv); | ||
} | ||
} | ||
/** There is a successor release lock to the successor */ | ||
a_val = SHMEM_MCSL_SIGNAL_MASK; | ||
retv = MCA_ATOMIC_CALL(add(oshmem_ctx_default, | ||
(void *)next, a_val, sizeof(a_val), | ||
SHMEM_MCSL_NEXT(next_value))); | ||
RUNTIME_CHECK_RC(retv); | ||
MCA_SPML_CALL(quiet(oshmem_ctx_default)); | ||
} | ||
|
||
int | ||
_shmem_mcs_test_lock(long *lockp) | ||
{ | ||
shmem_mcs_lock_t *lock = (shmem_mcs_lock_t *) lockp; | ||
int mcs_tail_owner = SHMEM_MCSL_TAIL_OWNER(lock); | ||
int new_tail_req = 0; | ||
int prev_tail = 0; | ||
int tmp_cond = 0; | ||
int *tail = &(lock->tail); | ||
int *next = &(lock->next); | ||
int my_pe = shmem_my_pe(); | ||
int retv = 0; | ||
|
||
/** Initializing next pointer to next mask */ | ||
*next = SHMEM_MCSL_NEXT_MASK; | ||
|
||
/** Signal for setting lock */ | ||
new_tail_req = SHMEM_MCSL_SET_SIGNAL(my_pe); | ||
|
||
/** Check if previously cleared before swapping */ | ||
retv = MCA_ATOMIC_CALL(cswap(oshmem_ctx_default, | ||
(void *)tail, (uint64_t *)&(prev_tail), | ||
OSHMEM_ATOMIC_PTR_2_INT(&tmp_cond, | ||
sizeof(tmp_cond)), | ||
OSHMEM_ATOMIC_PTR_2_INT(&new_tail_req, | ||
sizeof(new_tail_req)), | ||
sizeof(int), mcs_tail_owner)); | ||
RUNTIME_CHECK_RC(retv); | ||
|
||
return (0 != prev_tail); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters