Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion src/ucp/core/ucp_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -684,12 +684,18 @@ ucs_status_t ucp_worker_mem_type_eps_create(ucp_worker_h worker)
ucs_status_t status;
void *address_buffer;
size_t address_length;
ucp_tl_bitmap_t mem_access_tls;
ucp_tl_bitmap_t mem_access_tls, host_mem_access_tls;
char ep_name[UCP_WORKER_ADDRESS_NAME_MAX];
unsigned addr_indices[UCP_MAX_LANES];
ucp_lane_index_t num_lanes;

ucs_memory_type_for_each(mem_type) {
ucp_context_memaccess_tl_bitmap(context, mem_type, 0, &mem_access_tls);
/* Mem type EP requires host memory support */
ucp_context_memaccess_tl_bitmap(context, UCS_MEMORY_TYPE_HOST, 0,
&host_mem_access_tls);
UCS_STATIC_BITMAP_AND_INPLACE(&mem_access_tls, host_mem_access_tls);

if (UCP_MEM_IS_HOST(mem_type) ||
UCS_STATIC_BITMAP_IS_ZERO(mem_access_tls)) {
continue;
Expand Down Expand Up @@ -725,6 +731,9 @@ ucs_status_t ucp_worker_mem_type_eps_create(ucp_worker_h worker)
goto err_free_address_list;
}

/* Mem type EP cannot have more than one lane */
num_lanes = ucp_ep_num_lanes(worker->mem_type_ep[mem_type]);
ucs_assertv_always(num_lanes == 1, "num_lanes=%u", num_lanes);
UCS_ASYNC_UNBLOCK(&worker->async);

ucs_free(local_address.address_list);
Expand Down
2 changes: 1 addition & 1 deletion src/ucp/rma/flush.c
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,7 @@ UCS_PROFILE_FUNC(ucs_status_ptr_t, ucp_ep_flush_nbx, (ep, param),
return request;
}

static ucs_status_t ucp_worker_flush_check(ucp_worker_h worker)
ucs_status_t ucp_worker_flush_check(ucp_worker_h worker)
{
ucp_rsc_index_t iface_id;
ucp_worker_iface_t *wiface;
Expand Down
1 change: 1 addition & 0 deletions src/ucp/rma/rma.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ extern ucp_amo_proto_t ucp_amo_sw_proto;
extern const ucp_rma_proto_t *ucp_rma_proto_list[];
extern const ucp_amo_proto_t *ucp_amo_proto_list[];

ucs_status_t ucp_worker_flush_check(ucp_worker_h worker);

ucs_status_t ucp_rma_request_advance(ucp_request_t *req, ssize_t frag_length,
ucs_status_t status,
Expand Down
10 changes: 0 additions & 10 deletions src/uct/cuda/cuda_ipc/cuda_ipc_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,6 @@ static ucs_config_field_t uct_cuda_ipc_iface_config_table[] = {
"Estimated CPU overhead for transferring GPU memory",
ucs_offsetof(uct_cuda_ipc_iface_config_t, params.overhead), UCS_CONFIG_TYPE_TIME},

{"ENABLE_SAME_PROCESS", "n",
"Enable same process same device communication for cuda_ipc",
ucs_offsetof(uct_cuda_ipc_iface_config_t, params.enable_same_process), UCS_CONFIG_TYPE_BOOL},

{NULL}
};

Expand Down Expand Up @@ -146,12 +142,6 @@ uct_cuda_ipc_iface_is_reachable_v2(const uct_iface_h tl_iface,
dev_addr = (const uct_cuda_ipc_device_addr_t *)params->device_addr;
same_uuid = (ucs_get_system_id() == dev_addr->system_uuid);

if ((getpid() == *(pid_t*)params->iface_addr) && same_uuid &&
!iface->config.enable_same_process) {
uct_iface_fill_info_str_buf(params, "same process");
return 0;
}

if (same_uuid ||
uct_cuda_ipc_iface_mnnvl_supported(md, dev_addr, dev_addr_len)) {
return uct_iface_scope_is_reachable(tl_iface, params);
Expand Down
1 change: 0 additions & 1 deletion src/uct/cuda/cuda_ipc/cuda_ipc_iface.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ typedef struct {
double bandwidth; /* estimated bandwidth */
double latency; /* estimated latency */
double overhead; /* estimated CPU overhead */
int enable_same_process; /* enable cuda_ipc for same pid same device */
} uct_cuda_ipc_iface_config_params_t;


Expand Down
1 change: 0 additions & 1 deletion test/gtest/ucp/test_ucp_device.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ void test_ucp_device::get_test_variants(std::vector<ucp_test_variant> &variants)

void test_ucp_device::init()
{
m_env.push_back(new ucs::scoped_setenv("UCX_CUDA_IPC_ENABLE_SAME_PROCESS", "y"));
m_env.push_back(new ucs::scoped_setenv("UCX_IB_GDA_MAX_SYS_LATENCY", "1us"));
ucp_test::init();
sender().connect(&receiver(), get_ep_params());
Expand Down
7 changes: 7 additions & 0 deletions test/gtest/ucp/test_ucp_memheap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
#include <common/mem_buffer.h>
#include <common/test_helpers.h>
#include <ucs/sys/sys.h>
extern "C" {
#include <ucp/rma/rma.h>
}

#include <ucs/sys/ptr_arith.h>


Expand Down Expand Up @@ -95,6 +99,9 @@ void test_ucp_memheap::test_xfer(send_func_t send_func, size_t size,
flush_ep(sender());
} else {
flush_worker(sender());
while(ucp_worker_flush_check(sender().worker()) != UCS_OK) {
progress();
}
}

/* Validate data */
Expand Down
2 changes: 1 addition & 1 deletion test/gtest/ucp/test_ucp_peer_failure.cc
Original file line number Diff line number Diff line change
Expand Up @@ -987,7 +987,7 @@ UCS_TEST_P(test_ucp_peer_failure_rndv_put_ppln_abort, rtr_mtype)
}

UCS_TEST_P(test_ucp_peer_failure_rndv_put_ppln_abort, pipeline,
"RNDV_FRAG_SIZE=host:8K")
"RNDV_FRAG_SIZE=host:8K,cuda:8K")
{
rndv_progress_failure_test(rndv_mode::put_ppln, true);
}
Expand Down
23 changes: 2 additions & 21 deletions test/gtest/uct/test_uct_iface.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,6 @@ class test_uct_iface : public uct_test {
}

void test_is_reachable();

virtual bool is_self_reachable() const
{
return true;
}
};

void test_uct_iface::test_is_reachable()
Expand Down Expand Up @@ -63,7 +58,7 @@ void test_uct_iface::test_is_reachable()
ASSERT_UCS_OK(status);

bool is_reachable = uct_iface_is_reachable_v2(iface, &params);
EXPECT_EQ(is_self_reachable(), is_reachable);
EXPECT_TRUE(is_reachable);

// Allocate corrupted address buffers, make it larger than the correct
// buffer size in case the corrupted data indicates a larger address length
Expand Down Expand Up @@ -98,18 +93,4 @@ UCS_TEST_P(test_uct_iface, is_reachable)
}

UCT_INSTANTIATE_TEST_CASE(test_uct_iface)

class test_uct_iface_self_unreachable : public test_uct_iface {
protected:
bool is_self_reachable() const override
{
return false;
}
};

UCS_TEST_P(test_uct_iface_self_unreachable, is_reachable)
{
test_is_reachable();
}

UCT_INSTANTIATE_CUDA_IPC_TEST_CASE(test_uct_iface_self_unreachable)
UCT_INSTANTIATE_CUDA_IPC_TEST_CASE(test_uct_iface)
Loading