-
Notifications
You must be signed in to change notification settings - Fork 489
UCT/MM: Replace CAS with FAA #10705
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
arun-chandran-edarath
wants to merge
1
commit into
openucx:master
Choose a base branch
from
arun-chandran-edarath:replace_cas_with_faa
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
UCT/MM: Replace CAS with FAA #10705
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -200,6 +200,8 @@ static UCS_CLASS_INIT_FUNC(uct_mm_ep_t, const uct_ep_params_t *params) | |
| self->cached_tail = self->fifo_ctl->tail; | ||
| ucs_arbiter_elem_init(&self->arb_elem); | ||
|
|
||
| self->reserved_head = 0; | ||
|
|
||
| status = uct_ep_keepalive_init(&self->keepalive, self->fifo_ctl->pid); | ||
| if (status != UCS_OK) { | ||
| goto err_free_segs; | ||
|
|
@@ -230,26 +232,15 @@ UCS_CLASS_DEFINE_NEW_FUNC(uct_mm_ep_t, uct_ep_t, const uct_ep_params_t *); | |
| UCS_CLASS_DEFINE_DELETE_FUNC(uct_mm_ep_t, uct_ep_t); | ||
|
|
||
|
|
||
| static inline ucs_status_t | ||
| static inline void | ||
| uct_mm_ep_get_remote_elem(uct_mm_ep_t *ep, uint64_t head, | ||
| uct_mm_fifo_element_t **elem) | ||
| { | ||
| uct_mm_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_mm_iface_t); | ||
| uint64_t new_head, prev_head; | ||
| uint64_t elem_index; /* index of the element to write */ | ||
|
|
||
| elem_index = head & iface->fifo_mask; | ||
| *elem = UCT_MM_IFACE_GET_FIFO_ELEM(iface, ep->fifo_elems, elem_index); | ||
| new_head = (head + 1) & ~UCT_MM_IFACE_FIFO_HEAD_EVENT_ARMED; | ||
|
|
||
| /* try to get ownership of the head element */ | ||
| prev_head = ucs_atomic_cswap64(ucs_unaligned_ptr(&ep->fifo_ctl->head), head, | ||
| new_head); | ||
| if (prev_head != head) { | ||
| return UCS_ERR_NO_RESOURCE; | ||
| } | ||
|
|
||
| return UCS_OK; | ||
| } | ||
|
|
||
| static inline void uct_mm_ep_update_cached_tail(uct_mm_ep_t *ep) | ||
|
|
@@ -288,14 +279,20 @@ static UCS_F_ALWAYS_INLINE ssize_t uct_mm_ep_am_common_send( | |
| ucs_status_t status; | ||
| void *base_address; | ||
| uint8_t elem_flags; | ||
| uint64_t head; | ||
| uint64_t head, prev_head; | ||
| ucs_iov_iter_t iov_iter; | ||
| void *desc_data; | ||
|
|
||
| UCT_CHECK_AM_ID(am_id); | ||
|
|
||
| retry: | ||
| head = ep->fifo_ctl->head; | ||
| if (!ep->reserved_head) { | ||
| /* Do a atomic add and get reserve a FIFO element */ | ||
| ep->reserved_head = ucs_atomic_fadd64(&ep->fifo_ctl->head, 1); | ||
| } | ||
|
|
||
|
|
||
| head = ep->reserved_head; | ||
|
|
||
| /* check if there is room in the remote process's receive FIFO to write */ | ||
| if (!UCT_MM_EP_IS_ABLE_TO_SEND(head, ep->cached_tail, iface->config.fifo_size)) { | ||
| if (!ucs_arbiter_group_is_empty(&ep->arb_group)) { | ||
|
|
@@ -315,12 +312,9 @@ static UCS_F_ALWAYS_INLINE ssize_t uct_mm_ep_am_common_send( | |
| } | ||
| } | ||
|
|
||
| status = uct_mm_ep_get_remote_elem(ep, head, &elem); | ||
| if (status != UCS_OK) { | ||
| ucs_assert(status == UCS_ERR_NO_RESOURCE); | ||
| ucs_trace_poll("couldn't get an available FIFO element. retrying"); | ||
| goto retry; | ||
| } | ||
| /* reset reserved_head */ | ||
| ep->reserved_head = 0; | ||
| uct_mm_ep_get_remote_elem(ep, head, &elem); | ||
|
|
||
| switch (send_op) { | ||
| case UCT_MM_SEND_AM_SHORT: | ||
|
|
@@ -382,7 +376,14 @@ static UCS_F_ALWAYS_INLINE ssize_t uct_mm_ep_am_common_send( | |
| elem->flags = elem_flags; | ||
|
|
||
| if (ucs_unlikely(head & UCT_MM_IFACE_FIFO_HEAD_EVENT_ARMED)) { | ||
| uct_mm_ep_signal_remote(ep); | ||
| /* Try to minimize number of ranks that do _fetch_and_and_ in | ||
| * the case of heavy contention */ | ||
| if ((ep->fifo_ctl->head - head) == 1) { | ||
| prev_head = ucs_atomic_fand64(&ep->fifo_ctl->head, ~UCT_MM_IFACE_FIFO_HEAD_EVENT_ARMED); | ||
| if (prev_head & UCT_MM_IFACE_FIFO_HEAD_EVENT_ARMED) { | ||
| uct_mm_ep_signal_remote(ep); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| uct_mm_ep_peer_check(ep, flags); | ||
|
|
@@ -453,8 +454,9 @@ ucs_status_t uct_mm_ep_pending_add(uct_ep_h tl_ep, uct_pending_req_t *n, | |
| uct_mm_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_mm_iface_t); | ||
| uct_mm_ep_t *ep = ucs_derived_of(tl_ep, uct_mm_ep_t); | ||
|
|
||
| /* check if resources became available */ | ||
| if (uct_mm_ep_has_tx_resources(ep)) { | ||
| /* check if we can use the already reserved slot in FIFO */ | ||
| if (UCT_MM_EP_IS_ABLE_TO_SEND(ep->reserved_head, ep->cached_tail, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe uct_mm_ep_has_tx_resources should be updated? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will try this way, seems better. |
||
| iface->config.fifo_size)) { | ||
| ucs_assert(ucs_arbiter_group_is_empty(&ep->arb_group)); | ||
| return UCS_ERR_BUSY; | ||
| } | ||
|
|
@@ -475,15 +477,24 @@ ucs_arbiter_cb_result_t uct_mm_ep_process_pending(ucs_arbiter_t *arbiter, | |
| void *arg) | ||
| { | ||
| uct_mm_ep_t *ep = ucs_container_of(group, uct_mm_ep_t, arb_group); | ||
| uct_mm_iface_t *iface = ucs_derived_of(ep->super.super.iface, uct_mm_iface_t); | ||
| unsigned *count = (unsigned*)arg; | ||
| uct_pending_req_t *req; | ||
| ucs_status_t status; | ||
| uint64_t fifo_head; | ||
|
|
||
| /* update the local tail with its actual value from the remote peer | ||
| * making sure that the pending sends would use the real tail value */ | ||
| uct_mm_ep_update_cached_tail(ep); | ||
|
|
||
| if (!uct_mm_ep_has_tx_resources(ep)) { | ||
| if (!ep->reserved_head) { | ||
| fifo_head = ep->fifo_ctl->head; | ||
| } else { | ||
| fifo_head = ep->reserved_head; | ||
| } | ||
|
|
||
| if (!UCT_MM_EP_IS_ABLE_TO_SEND(fifo_head, ep->cached_tail, | ||
| iface->config.fifo_size)) { | ||
| return UCS_ARBITER_CB_RESULT_RESCHED_GROUP; | ||
| } | ||
|
|
||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is this check really needed given that
if (prev_head & UCT_MM_IFACE_FIFO_HEAD_EVENT_ARMED)already makes sure only one rank would call uct_mm_ep_signal_remote?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's assume multiple writers are simultaneously trying to communicate with a reader whose FIFO is already 'ARMED'.
In this case, as we are using FAA instead of CAS, all writers will see the 'ARM' bit.
If we don't have the '((ep->fifo_ctl->head - head) == 1)' check, all writers would perform 'ucs_atomic_fand64(&ep->fifo_ctl->head, ~UCT_MM_IFACE_FIFO_HEAD_EVENT_ARMED)', increasing unnecessary atomic operations.
This check helps reduce the number of writers doing atomic_fetch_and_and.
We could avoid it if signaling is infrequent among participants.
Can you comment on how often signaling typically occurs among workers (ranks)?