Skip to content

Commit

Permalink
Make up commit
Browse files Browse the repository at this point in the history
  • Loading branch information
mdemoret-nv committed Jun 14, 2024
1 parent f37268b commit 716127f
Show file tree
Hide file tree
Showing 10 changed files with 34 additions and 47 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ option(MRC_USE_CONDA "Enables finding dependencies via conda. All dependencies m
environment" ON)
option(MRC_USE_IWYU "Enable running include-what-you-use as part of the build process" OFF)

set(MRC_RAPIDS_VERSION "23.12" CACHE STRING "Which version of RAPIDS to build for. Sets default versions for RAPIDS CMake and RMM.")
set(MRC_RAPIDS_VERSION "24.04" CACHE STRING "Which version of RAPIDS to build for. Sets default versions for RAPIDS CMake and RMM.")

set(MRC_CACHE_DIR "${CMAKE_SOURCE_DIR}/.cache" CACHE PATH "Directory to contain all CPM and CCache data")
mark_as_advanced(MRC_CACHE_DIR)
Expand Down
6 changes: 3 additions & 3 deletions cmake/Configure_UCXX.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ function(morpheus_utils_configure_UCXX)
list(APPEND CMAKE_MESSAGE_CONTEXT "UCXX")

morpheus_utils_assert_cpm_initialized()
set(UCXX_VERSION "0.37.00" CACHE STRING "Which version of UCXX to use.")
set(UCXX_VERSION "0.38.00" CACHE STRING "Which version of UCXX to use.")

find_package(ucx REQUIRED)

Expand All @@ -34,8 +34,8 @@ function(morpheus_utils_configure_UCXX)
INSTALL_EXPORT_SET
${PROJECT_NAME}-core-exports
CPM_ARGS
GIT_REPOSITORY https://github.com/pentschev/ucxx.git
GIT_TAG mrc-all
GIT_REPOSITORY https://github.com/rapidsai/ucxx.git
GIT_TAG branch-0.38
GIT_SHALLOW TRUE
SOURCE_SUBDIR cpp
OPTIONS "UCXX_ENABLE_RMM ON"
Expand Down
4 changes: 2 additions & 2 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ dependencies:
- libclang=17
- libgrpc=1.54.0
- libhwloc=2.9.2
- librmm=23.12
- librmm=24.04
- llvmdev=17
- ninja=1.10
- nlohmann_json=3.9
Expand All @@ -53,6 +53,6 @@ dependencies:
- pytest-timeout
- python=3.10
- scikit-build>=0.17
- ucx=1.14
- ucx>=1.15
- yapf
name: all_cuda-118_arch-x86_64
4 changes: 2 additions & 2 deletions conda/environments/ci_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ dependencies:
- include-what-you-use=0.21
- libgrpc=1.54.0
- libhwloc=2.9.2
- librmm=23.12
- librmm=24.04
- ninja=1.10
- nlohmann_json=3.9
- nodejs=18
Expand All @@ -43,6 +43,6 @@ dependencies:
- pytest-timeout
- python=3.10
- scikit-build>=0.17
- ucx=1.14
- ucx>=1.15
- yapf
name: ci_cuda-118_arch-x86_64
9 changes: 9 additions & 0 deletions cpp/mrc/src/internal/data_plane/data_plane_resources.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,15 @@ std::shared_ptr<ucxx::Endpoint> DataPlaneResources2::create_endpoint(const ucx::
m_endpoints_by_address[address] = endpoint;
m_endpoints_by_id[instance_id] = endpoint;

auto close_request = endpoint->close();

endpoint->cancelInflightRequests();

while (close_request->isCompleted() == false)
{
this->progress();
}

DVLOG(10) << "Created endpoint with address: " << address;

return endpoint;
Expand Down
4 changes: 3 additions & 1 deletion cpp/mrc/src/internal/data_plane/data_plane_resources.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ class DataPlaneResources2
{
public:
DataPlaneResources2();
~DataPlaneResources2();
virtual ~DataPlaneResources2();

void set_instance_id(uint64_t instance_id);
bool has_instance_id() const;
Expand Down Expand Up @@ -222,6 +222,8 @@ class DataPlaneResources2
channel::Egress<std::unique_ptr<runtime::RemoteDescriptor2>>& get_inbound_channel() const;

private:
virtual std::shared_ptr<runtime::RemoteDescriptorImpl2> get_descriptor(uint64_t object_id);

std::optional<uint64_t> m_instance_id; // Global ID used to identify this instance

std::shared_ptr<ucxx::Context> m_context;
Expand Down
6 changes: 2 additions & 4 deletions cpp/mrc/src/tests/test_network.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,8 @@
#include "internal/ucx/registration_cache.hpp"

#include "mrc/channel/status.hpp"
#include "mrc/codable/codable_protocol.hpp"
#include "mrc/codable/decode.hpp"
#include "mrc/codable/fundamental_types.hpp"
#include "mrc/codable/fundamental_types.hpp" // IWYU pragma: keep
#include "mrc/codable/type_traits.hpp"
#include "mrc/edge/edge_builder.hpp"
#include "mrc/memory/adaptors.hpp"
Expand Down Expand Up @@ -82,7 +81,6 @@
#include <optional>
#include <ostream>
#include <stop_token>
#include <thread>
#include <utility>

using namespace mrc;
Expand All @@ -91,7 +89,7 @@ using namespace mrc::memory::literals;
class DataPlaneResources2Tester : public data_plane::DataPlaneResources2
{
public:
std::shared_ptr<runtime::RemoteDescriptorImpl2> get_descriptor(uint64_t object_id)
std::shared_ptr<runtime::RemoteDescriptorImpl2> get_descriptor(uint64_t object_id) override
{
return m_remote_descriptor_by_id[object_id];
}
Expand Down
2 changes: 2 additions & 0 deletions cpp/mrc/tests/test_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ class TestExecutor : public ::testing::Test
// #endif
}

VLOG(10) << "Sent all messages";

s.on_completed();
});

Expand Down
4 changes: 2 additions & 2 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ dependencies:
- gxx=11.2
- libgrpc=1.54.0
- libhwloc=2.9.2
- librmm=23.12
- librmm=24.04
- ninja=1.10
- nlohmann_json=3.9
- nodejs=18
Expand All @@ -69,7 +69,7 @@ dependencies:
- pybind11-stubgen=0.10
- python=3.10
- scikit-build>=0.17
- ucx=1.14
- ucx>=1.15

checks:
common:
Expand Down
40 changes: 8 additions & 32 deletions mrc.code-workspace
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,10 @@
"python.testing.pytestEnabled": true,
"python.testing.unittestEnabled": false,
"testMate.cpp.debug.configTemplate": {
"name": "Debug Test Runner (${parentLabel} > ${label})",
"request": "launch",
"type": "lldb",
"program": "${exec}",
"args": "${argsArray}",
"cwd": "${cwd}",
"darwin": {
Expand All @@ -266,44 +270,14 @@
},
"env": "${envObj}",
"environment": "${envObjArray}",
"linux": {
"MIMode": "gdb",
"symbolLoadInfo": {
"exceptionList": "*libmrc*.so",
"loadAll": false
},
"type": "cppdbg"
},
"program": "${exec}",
"setupCommands": [
{
"description": "Enable pretty-printing for gdb",
"text": "-enable-pretty-printing"
},
{
"description": "Skip stdio-common files",
"text": "-interpreter-exec console \"skip -gfi **/bits/*.h\""
}
// {
// "description": "Stay on same thread when debugging",
// "text": "-interpreter-exec console \"set scheduler-locking step\""
// }
],
"sourceFileMap": {
"${workspaceFolder}": {
"editorPath": "${workspaceFolder}",
"useForBreakpoints": "true"
}
},
"testMate.cpp.debug.setEnv": {
"GLOG_v": "10",
"GTEST_CATCH_EXCEPTIONS": "0", // Allow the debugger to catch exceptions
"UCX_ERROR_SIGNALS": "" // Prevent UCX from capturing errors
},
"type": "cppvsdbg",
"win32": {
"type": "cppvsdbg"
}
},
},
"testMate.cpp.log.logpanel": true,
"testMate.cpp.test.executables": "{build,Build,BUILD,out,Out,OUT}/**/*{test,Test,TEST}_*.x",
Expand All @@ -312,6 +286,8 @@
"typescript.referencesCodeLens.enabled": true,
"yapf.args": [
"--style=${workspaceFolder}/python/setup.cfg"
]
],
// "lldb.library": "/home/mdemoret/mambaforge/envs/mrc_dev2/lib/liblldb.so",
"lldb.launch.terminal": "integrated"
}
}

0 comments on commit 716127f

Please sign in to comment.