Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
0bf9823
initial commit for remote work
Kh4ster Jul 2, 2025
975da23
partially working batched PDHG
Kh4ster Jul 16, 2025
efafee7
removed tmp primal from batch
Kh4ster Jul 16, 2025
9d7aebf
remove potential next dual copy in adaptative
Kh4ster Jul 16, 2025
5208d8a
use batch dual solution in pdhg
Kh4ster Jul 16, 2025
61eac97
use batched next primal and batch potential next primal in pdhg
Kh4ster Jul 16, 2025
b0aef9c
add s to batch solutions
Kh4ster Jul 17, 2025
787827a
use batch delta primal
Kh4ster Jul 17, 2025
69ecc1d
use batch delta dual in adaptive and bit delta primal for regular mode
Kh4ster Jul 17, 2025
26d2f35
add batch tmp primal to adaptive
Kh4ster Jul 17, 2025
dc497d9
moved interaction, movement, norm sqaure primal/dual to vectors
Kh4ster Jul 17, 2025
37a73c9
move primal and dual step size to vectors instead of scalars
Kh4ster Jul 17, 2025
3a91210
run the dual projection on the batch with wrapped around functor
Kh4ster Jul 18, 2025
7fac69d
run the primal projection on the batch with wrapped around functor
Kh4ster Jul 18, 2025
f8f495c
propagate the for now scalar primal weight and step size to the vecto…
Kh4ster Jul 18, 2025
84bc401
fix average propagation to the whole batch solutions, throw exception…
Kh4ster Jul 18, 2025
934d643
move step size and primal weight to uvector
Kh4ster Jul 18, 2025
e4cc3f8
access the primal weight vector per cell
Kh4ster Jul 18, 2025
cebc973
access step size and primal weight as an array where it's needed
Kh4ster Jul 21, 2025
fb0f8d2
convert valid step size and interaction to device span and remove use…
Kh4ster Jul 21, 2025
71d874b
support batch average
Kh4ster Jul 21, 2025
a8cb2da
improve functor for tma
Kh4ster Jul 21, 2025
d0dc5bd
remove batch primal and batch dual solution to directly use primal an…
Kh4ster Jul 21, 2025
8ac1e90
fix: use an actual batch for the primal and dual solutions
Kh4ster Jul 22, 2025
6b94b93
use same vector for delta for batch and non batch
Kh4ster Jul 22, 2025
dfb3c92
move current and next aty to use regular just wider vectors
Kh4ster Jul 22, 2025
e1008d7
move batch dual gradient to use regular just wider vectors
Kh4ster Jul 22, 2025
2543ab5
tmp not fully working batch potential primal and dual just using wide…
Kh4ster Jul 22, 2025
4417b3b
fix using a wider vector and switching to swap instead of copy
Kh4ster Jul 22, 2025
0183819
use wider tmp primal instead of a batch vector
Kh4ster Jul 22, 2025
574dbc0
unique per solution distance travel and thus primal weight
Kh4ster Jul 24, 2025
5b7318d
convert most now vector access to span
Kh4ster Jul 24, 2025
d069a2f
unique convergeance information and termination strategy per climber
Kh4ster Jul 29, 2025
34f4225
put back cuda graph
Kh4ster Jul 29, 2025
4501eda
per climber interaction and movement
Kh4ster Jul 29, 2025
f3d450d
return and print information of best solution amond climbers
Kh4ster Jul 30, 2025
263c1f1
put back private and use getter setter in pdhg
Kh4ster Jul 30, 2025
23955ef
working multi climber restart kkt strategy
Kh4ster Aug 1, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions benchmarks/linear_programming/cuopt/run_pdlp.cu
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ static void parse_arguments(argparse::ArgumentParser& program)
"Path to PDLP hyper-params file to configure PDLP solver. Has priority over PDLP solver "
"modes.");

program.add_argument("--batch-mode")
.help("Batch mode for PDLP. Possible values: 0 (default), 1")
.default_value(0)
.scan<'i', int>()
.choices(0, 1);

program.add_argument("--solution-path").help("Path where solution file will be generated");
}

Expand Down Expand Up @@ -106,6 +112,7 @@ static cuopt::linear_programming::pdlp_solver_settings_t<int, double> create_sol
string_to_pdlp_solver_mode(program.get<std::string>("--pdlp-solver-mode"));
settings.method = static_cast<cuopt::linear_programming::method_t>(program.get<int>("--method"));
settings.crossover = program.get<int>("--crossover");
settings.batch_mode = program.get<int>("--batch-mode");

return settings;
}
Expand Down
618 changes: 618 additions & 0 deletions benchmarks/linear_programming/cuopt/test4.cu

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ class pdlp_solver_settings_t {
bool save_best_primal_so_far{false};
bool first_primal_feasible{false};
method_t method{method_t::Concurrent};
bool batch_mode{false};
// For concurrent termination
std::atomic<i_t>* concurrent_halt;
static constexpr f_t minimal_absolute_tolerance = 1.0e-12;
Expand Down
228 changes: 223 additions & 5 deletions cpp/src/linear_programming/cusparse_view.cu

Large diffs are not rendered by default.

28 changes: 26 additions & 2 deletions cpp/src/linear_programming/cusparse_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ class cusparse_view_t {
saddle_point_state_t<i_t, f_t>& current_saddle_point_state,
rmm::device_uvector<f_t>& _tmp_primal,
rmm::device_uvector<f_t>& _tmp_dual,
rmm::device_uvector<f_t>& _potential_next_dual_solution);
rmm::device_uvector<f_t>& _potential_next_dual_solution,
bool batch_mode);

cusparse_view_t(raft::handle_t const* handle_ptr,
const problem_t<i_t, f_t>& op_problem,
Expand All @@ -44,7 +45,8 @@ class cusparse_view_t {
rmm::device_uvector<f_t>& _tmp_dual,
const rmm::device_uvector<f_t>& _A_T,
const rmm::device_uvector<i_t>& _A_T_offsets,
const rmm::device_uvector<i_t>& _A_T_indices);
const rmm::device_uvector<i_t>& _A_T_indices,
bool batch_mode);

cusparse_view_t(raft::handle_t const* handle_ptr,
const problem_t<i_t, f_t>& op_problem,
Expand All @@ -70,25 +72,45 @@ class cusparse_view_t {
cusparseDnVecDescr_t primal_solution;
cusparseDnVecDescr_t dual_solution;

// cusparse view of batch solutions
cusparseDnMatDescr_t batch_primal_solutions;
cusparseDnMatDescr_t batch_dual_solutions;
cusparseDnMatDescr_t batch_potential_next_dual_solution;
cusparseDnMatDescr_t batch_next_AtYs;
cusparseDnMatDescr_t batch_tmp_duals;

// cusparse view of gradients
cusparseDnVecDescr_t primal_gradient;
cusparseDnVecDescr_t dual_gradient;

// cusparse view of batch gradients
cusparseDnMatDescr_t batch_dual_gradients;

// cusparse view of At * Y computation
cusparseDnVecDescr_t
current_AtY; // Only used at very first iteration and after each restart to average
cusparseDnVecDescr_t next_AtY; // Next value is swaped out with current after each valid PDHG
// step to save the first AtY SpMV in compute next primal
cusparseDnVecDescr_t potential_next_dual_solution;

// cusparse view of At * Y batch computation
cusparseDnMatDescr_t batch_current_AtYs;

// cusparse view of auxillirary space needed for some spmv computations
cusparseDnVecDescr_t tmp_primal;
cusparseDnVecDescr_t tmp_dual;

// cusparse view of auxillirary space needed for some spmm computations
cusparseDnMatDescr_t batch_tmp_primals;

// reuse buffers for cusparse spmv
rmm::device_uvector<uint8_t> buffer_non_transpose;
rmm::device_uvector<uint8_t> buffer_transpose;

// reuse buffers for cusparse spmm
rmm::device_uvector<uint8_t> buffer_transpose_batch;
rmm::device_uvector<uint8_t> buffer_non_transpose_batch;

// Ref to the A_T found in either
// Initial problem, we use it to have an unscaled A_T
// PDLP copy of the problem which holds the scaled version
Expand All @@ -102,5 +124,7 @@ class cusparse_view_t {
const rmm::device_uvector<f_t>& A_;
const rmm::device_uvector<i_t>& A_offsets_;
const rmm::device_uvector<i_t>& A_indices_;

bool batch_mode_{false};
};
} // namespace cuopt::linear_programming::detail
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ pdlp_initial_scaling_strategy_t<i_t, f_t>::pdlp_initial_scaling_strategy_t(
rmm::device_uvector<f_t>& A_T,
rmm::device_uvector<i_t>& A_T_offsets,
rmm::device_uvector<i_t>& A_T_indices,
bool running_mip)
bool running_mip,
bool batch_mode
)
: handle_ptr_(handle_ptr),
stream_view_(handle_ptr_->get_stream()),
primal_size_h_(op_problem_scaled.n_variables),
Expand All @@ -57,7 +59,8 @@ pdlp_initial_scaling_strategy_t<i_t, f_t>::pdlp_initial_scaling_strategy_t(
iteration_constraint_matrix_scaling_{static_cast<size_t>(dual_size_h_), stream_view_},
iteration_variable_scaling_{static_cast<size_t>(primal_size_h_), stream_view_},
cummulative_constraint_matrix_scaling_{static_cast<size_t>(dual_size_h_), stream_view_},
cummulative_variable_scaling_{static_cast<size_t>(primal_size_h_), stream_view_}
cummulative_variable_scaling_{static_cast<size_t>(primal_size_h_), stream_view_},
batch_mode_(batch_mode)
{
raft::common::nvtx::range fun_scope("Initializing initial_scaling_strategy");
#ifdef PDLP_DEBUG_MODE
Expand Down Expand Up @@ -412,16 +415,24 @@ void pdlp_initial_scaling_strategy_t<i_t, f_t>::scale_solutions(
rmm::device_uvector<f_t>& primal_solution, rmm::device_uvector<f_t>& dual_solution) const
{
// scale solutions
raft::linalg::eltwiseDivideCheckZero(primal_solution.data(),
cub::DeviceTransform::Transform(cuda::std::make_tuple(primal_solution.data(),
thrust::make_transform_iterator(
thrust::make_counting_iterator(0),
problem_wrapped_iterator<f_t>(cummulative_variable_scaling_.data(), primal_size_h_)
)),
primal_solution.data(),
cummulative_variable_scaling_.data(),
primal_size_h_,
primal_solution.size(),
batch_safe_div<f_t>(),
stream_view_);
if (dual_solution.size()) {
raft::linalg::eltwiseDivideCheckZero(dual_solution.data(),
dual_solution.data(),
cummulative_constraint_matrix_scaling_.data(),
dual_size_h_,
cub::DeviceTransform::Transform(cuda::std::make_tuple(dual_solution.data(),
thrust::make_transform_iterator(
thrust::make_counting_iterator(0),
problem_wrapped_iterator<f_t>(cummulative_constraint_matrix_scaling_.data(), dual_size_h_)
)),
dual_solution.data(),
dual_solution.size(),
batch_safe_div<f_t>(),
stream_view_);
}
}
Expand Down Expand Up @@ -461,25 +472,38 @@ void pdlp_initial_scaling_strategy_t<i_t, f_t>::unscale_solutions(
rmm::device_uvector<f_t>& primal_solution, rmm::device_uvector<f_t>& dual_solution) const
{
// if there are some tails in the solution, don't scale that
cuopt_expects(primal_solution.size() == static_cast<size_t>(primal_size_h_),
// TODO tmp change in the condition
cuopt_expects(primal_solution.size() == static_cast<size_t>(primal_size_h_) || primal_solution.size() == static_cast<size_t>((0 + 3)/*@@*/) * static_cast<size_t>(primal_size_h_),
error_type_t::RuntimeError,
"Unscale primal didn't get a vector of size primal");
// unscale avg solutions
raft::linalg::eltwiseMultiply(primal_solution.data(),
primal_solution.data(),
cummulative_variable_scaling_.data(),
primal_size_h_,
stream_view_);
cub::DeviceTransform::Transform(cuda::std::make_tuple(primal_solution.data(),
thrust::make_transform_iterator(
thrust::make_counting_iterator(0),
problem_wrapped_iterator<f_t>(cummulative_variable_scaling_.data(), primal_size_h_)
)
),
primal_solution.data(),
primal_solution.size(),
mul_op<f_t>(),
stream_view_);

if (dual_solution.size()) {
cuopt_expects(dual_solution.size() == static_cast<size_t>(dual_size_h_),
// TODO tmp change in the condition
cuopt_expects(dual_solution.size() == static_cast<size_t>(dual_size_h_) || dual_solution.size() == static_cast<size_t>((0 + 3)/*@@*/) * static_cast<size_t>(dual_size_h_),
error_type_t::RuntimeError,
"Unscale dual didn't get a vector of size dual");
raft::linalg::eltwiseMultiply(dual_solution.data(),
dual_solution.data(),
cummulative_constraint_matrix_scaling_.data(),
dual_size_h_,
stream_view_);
cub::DeviceTransform::Transform(cuda::std::make_tuple(
dual_solution.data(),
thrust::make_transform_iterator(
thrust::make_counting_iterator(0),
problem_wrapped_iterator<f_t>(cummulative_constraint_matrix_scaling_.data(), dual_size_h_)
)
),
dual_solution.data(),
dual_solution.size(),
mul_op<f_t>(),
stream_view_);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ class pdlp_initial_scaling_strategy_t {
rmm::device_uvector<f_t>& A_T,
rmm::device_uvector<i_t>& A_T_offsets,
rmm::device_uvector<i_t>& A_T_indices,
bool running_mip = false);
bool running_mip = false,
bool batch_mode = false);

void scale_problem();

Expand Down Expand Up @@ -103,5 +104,6 @@ class pdlp_initial_scaling_strategy_t {
rmm::device_uvector<i_t>& A_T_offsets_;
rmm::device_uvector<i_t>& A_T_indices_;
bool running_mip_;
bool batch_mode_;
};
} // namespace cuopt::linear_programming::detail
Loading