Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update the wrappers for cuDSS v0.5.0 #73

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/Aqua.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: julia-actions/setup-julia@latest
with:
version: '1'
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
[compat]
CEnum = "0.5"
CUDA = "5.4.0"
CUDSS_jll = "0.4.0"
CUDSS_jll = "0.5.0"
julia = "1.6"
LinearAlgebra = "1.6"
SparseArrays = "1.6"
Expand Down
6 changes: 3 additions & 3 deletions gen/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@ Clang = "40e3b903-d033-50b4-a0cc-940c62c95e31"
JuliaFormatter = "98e50ef6-434e-11e9-1051-2b60c6c9e899"

[compat]
CUDA_SDK_jll = "12.6.3"
CUDSS_jll = "0.4.0"
julia = "1.6"
CUDA_SDK_jll = "12.8.0"
CUDSS_jll = "0.5.0"
julia = "1.10"
2 changes: 1 addition & 1 deletion gen/wrapper.jl
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ function main()

# create context
headers = ["$cudss/cudss.h"]
targets = ["$cudss/cudss.h", "$cudss/cudss_distributed_interface.h"]
targets = ["$cudss/cudss.h"] # ["$cudss/cudss_distributed_interface.h", "$cudss/cudss_threading_interface.h"]
ctx = create_context(headers, args, options)

# run generator
Expand Down
6 changes: 3 additions & 3 deletions src/helpers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ mutable struct CudssBatchedMatrix{T,M}
ld = nrows
Mptrs = unsafe_cudss_batch(b)
M = typeof(Mptrs)
cudssMatrixCreateBatchDn(matrix_ref, nbatch, nrows, ncols, ld, Mptrs, T, 'C')
cudssMatrixCreateBatchDn(matrix_ref, nbatch, nrows, ncols, ld, Mptrs, Cint, T, 'C')
obj = new{T,M}(T, matrix_ref[], nbatch, nrows, ncols, Cint[], Mptrs)
finalizer(cudssBatchedMatrixDestroy, obj)
obj
Expand All @@ -147,9 +147,9 @@ mutable struct CudssBatchedMatrix{T,M}
Mptrs = unsafe_cudss_batch(B)
M = typeof(Mptrs)
if transposed
cudssMatrixCreateBatchDn(matrix_ref, nbatch, ncols, nrows, ld, Mptrs, T, 'R')
cudssMatrixCreateBatchDn(matrix_ref, nbatch, ncols, nrows, ld, Mptrs, CInt, T, 'R')
else
cudssMatrixCreateBatchDn(matrix_ref, nbatch, nrows, ncols, ld, Mptrs, T, 'C')
cudssMatrixCreateBatchDn(matrix_ref, nbatch, nrows, ncols, ld, Mptrs, Cint, T, 'C')
end
obj = new{T,M}(T, matrix_ref[], nbatch, nrows, ncols, Cint[], Mptrs)
finalizer(cudssBatchedMatrixDestroy, obj)
Expand Down
10 changes: 8 additions & 2 deletions src/interfaces.jl
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,10 @@ The available configuration parameters are:
- `"max_lu_nnz"`: Upper limit on the number of nonzero entries in LU factors for non-symmetric matrices;
- `"hybrid_mode"`: Memory mode -- `0` (default = device-only) or `1` (hybrid = host/device);
- `"hybrid_device_memory_limit"`: User-defined device memory limit (number of bytes) for the hybrid memory mode;
- `"use_cuda_register_memory"`: A flag to enable (`1`) or disable (`0`) usage of `cudaHostRegister()` by the hybrid memory mode.
- `"use_cuda_register_memory"`: A flag to enable (`1`) or disable (`0`) usage of `cudaHostRegister()` by the hybrid memory mode;
- `"host_nthreads"`: Number of threads to be used by cuDSS in multi-threaded mode;
- `"hybrid_execute_mode"`: Hybrid execute mode -- `0` (default = device-only) or `1` (hybrid = host/device);
- `"pivot_epsilon_alg"`: Algorithm for the pivot epsilon calculation.

The available data parameters are:
- `"info"`: Device-side error information;
Expand Down Expand Up @@ -230,7 +233,10 @@ The available configuration parameters are:
- `"max_lu_nnz"`: Upper limit on the number of nonzero entries in LU factors for non-symmetric matrices;
- `"hybrid_mode"`: Memory mode -- `0` (default = device-only) or `1` (hybrid = host/device);
- `"hybrid_device_memory_limit"`: User-defined device memory limit (number of bytes) for the hybrid memory mode;
- `"use_cuda_register_memory"`: A flag to enable (`1`) or disable (`0`) usage of `cudaHostRegister()` by the hybrid memory mode.
- `"use_cuda_register_memory"`: A flag to enable (`1`) or disable (`0`) usage of `cudaHostRegister()` by the hybrid memory mode;
- `"host_nthreads"`: Number of threads to be used by cuDSS in multi-threaded mode;
- `"hybrid_execute_mode"`: Hybrid execute mode -- `0` (default = device-only) or `1` (hybrid = host/device);
- `"pivot_epsilon_alg"`: Algorithm for the pivot epsilon calculation.

The available data parameters are:
- `"info"`: Device-side error information;
Expand Down
45 changes: 19 additions & 26 deletions src/libcudss.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,6 @@ const cudaStream_t = CUstream
const cudaDataType_t = cudaDataType
const CUPTR_C_NULL = CuPtr{Ptr{Cvoid}}(0)

@cenum cudssOpType_t::UInt32 begin
CUDSS_SUM = 0
CUDSS_MAX = 1
CUDSS_MIN = 2
end

struct cudssDistributedInterface_t
cudssCommRank::Ptr{Cvoid}
cudssCommSize::Ptr{Cvoid}
cudssSend::Ptr{Cvoid}
cudssRecv::Ptr{Cvoid}
cudssBcast::Ptr{Cvoid}
cudssReduce::Ptr{Cvoid}
cudssAllreduce::Ptr{Cvoid}
cudssScatterv::Ptr{Cvoid}
cudssCommSplit::Ptr{Cvoid}
cudssCommFree::Ptr{Cvoid}
end

mutable struct cudssContext end

const cudssHandle_t = Ptr{cudssContext}
Expand Down Expand Up @@ -55,6 +36,9 @@ const cudssConfig_t = Ptr{cudssConfig}
CUDSS_CONFIG_HYBRID_MODE = 11
CUDSS_CONFIG_HYBRID_DEVICE_MEMORY_LIMIT = 12
CUDSS_CONFIG_USE_CUDA_REGISTER_MEMORY = 13
CUDSS_CONFIG_HOST_NTHREADS = 14
CUDSS_CONFIG_HYBRID_EXECUTE_MODE = 15
CUDSS_CONFIG_PIVOT_EPSILON_ALG = 16
end

@cenum cudssDataParam_t::UInt32 begin
Expand Down Expand Up @@ -131,8 +115,9 @@ end
end

@cenum cudssMatrixFormat_t::UInt32 begin
CUDSS_MFORMAT_DENSE = 0
CUDSS_MFORMAT_CSR = 1
CUDSS_MFORMAT_DENSE = 1
CUDSS_MFORMAT_CSR = 2
CUDSS_MFORMAT_BATCH = 4
end

struct cudssDeviceMemHandler_t
Expand Down Expand Up @@ -193,6 +178,12 @@ end
commLibFileName::Cstring)::cudssStatus_t
end

@checked function cudssSetThreadingLayer(handle, thrLibFileName)
initialize_context()
@gcsafe_ccall libcudss.cudssSetThreadingLayer(handle::cudssHandle_t,
thrLibFileName::Cstring)::cudssStatus_t
end

@checked function cudssConfigCreate(solverConfig)
initialize_context()
@gcsafe_ccall libcudss.cudssConfigCreate(solverConfig::Ptr{cudssConfig_t})::cudssStatus_t
Expand Down Expand Up @@ -257,12 +248,13 @@ end
end

@checked function cudssMatrixCreateBatchDn(matrix, batchCount, nrows, ncols, ld, values,
valueType, layout)
indexType, valueType, layout)
initialize_context()
@gcsafe_ccall libcudss.cudssMatrixCreateBatchDn(matrix::Ptr{cudssMatrix_t},
batchCount::Int64, nrows::Ptr{Cvoid},
ncols::Ptr{Cvoid}, ld::Ptr{Cvoid},
values::CuPtr{Ptr{Cvoid}},
indexType::cudaDataType_t,
valueType::cudaDataType_t,
layout::cudssLayout_t)::cudssStatus_t
end
Expand Down Expand Up @@ -330,16 +322,17 @@ end
values::CuPtr{Cvoid})::cudssStatus_t
end

@checked function cudssMatrixGetBatchDn(matrix, batchCount, nrows, ncols, ld, values, type,
layout)
@checked function cudssMatrixGetBatchDn(matrix, batchCount, nrows, ncols, ld, values,
indexType, valueType, layout)
initialize_context()
@gcsafe_ccall libcudss.cudssMatrixGetBatchDn(matrix::cudssMatrix_t,
batchCount::Ptr{Int64},
nrows::Ptr{Ptr{Cvoid}},
ncols::Ptr{Ptr{Cvoid}},
ld::Ptr{Ptr{Cvoid}},
values::Ptr{CuPtr{Ptr{Cvoid}}},
type::Ptr{cudaDataType_t},
indexType::Ptr{cudaDataType_t},
valueType::Ptr{cudaDataType_t},
layout::Ptr{cudssLayout_t})::cudssStatus_t
end

Expand Down Expand Up @@ -382,7 +375,7 @@ end
@checked function cudssMatrixGetFormat(matrix, format)
initialize_context()
@gcsafe_ccall libcudss.cudssMatrixGetFormat(matrix::cudssMatrix_t,
format::Ptr{cudssMatrixFormat_t})::cudssStatus_t
format::Ptr{Cint})::cudssStatus_t
end

@checked function cudssGetDeviceMemHandler(handle, handler)
Expand Down
16 changes: 14 additions & 2 deletions src/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ const CUDSS_DATA_PARAMETERS = ("info", "lu_nnz", "npivots", "inertia", "perm_reo
const CUDSS_CONFIG_PARAMETERS = ("reordering_alg", "factorization_alg", "solve_alg", "matching_type",
"solve_mode", "ir_n_steps", "ir_tol", "pivot_type", "pivot_threshold",
"pivot_epsilon", "max_lu_nnz", "hybrid_mode", "hybrid_device_memory_limit",
"use_cuda_register_memory")
"use_cuda_register_memory", "host_nthreads", "hybrid_execute_mode",
"pivot_epsilon_alg")

const CUDSS_TYPES = Dict{String, DataType}(
# data type
Expand Down Expand Up @@ -38,7 +39,10 @@ const CUDSS_TYPES = Dict{String, DataType}(
"max_lu_nnz" => Int64,
"hybrid_mode" => Cint,
"hybrid_device_memory_limit" => Int64,
"use_cuda_register_memory" => Cint
"use_cuda_register_memory" => Cint,
"host_nthreads" => Cint,
"hybrid_execute_mode" => Cint,
"pivot_epsilon_alg" => cudssAlgType_t,
)

## config type
Expand Down Expand Up @@ -72,6 +76,12 @@ function Base.convert(::Type{cudssConfigParam_t}, config::String)
return CUDSS_CONFIG_HYBRID_DEVICE_MEMORY_LIMIT
elseif config == "use_cuda_register_memory"
return CUDSS_CONFIG_USE_CUDA_REGISTER_MEMORY
elseif config == "host_nthreads"
return CUDSS_CONFIG_HOST_NTHREADS
elseif config == "hybrid_execute_mode"
return CUDSS_CONFIG_HYBRID_EXECUTE_MODE
elseif config == "pivot_epsilon_alg"
return CUDSS_CONFIG_PIVOT_EPSILON_ALG
else
throw(ArgumentError("Unknown config parameter $config"))
end
Expand Down Expand Up @@ -226,6 +236,8 @@ function Base.convert(::Type{cudssMatrixFormat_t}, format::Char)
return CUDSS_MFORMAT_DENSE
elseif format == 'S'
return CUDSS_MFORMAT_CSR
elseif format == 'B'
return CUDSS_MFORMAT_BATCH
else
throw(ArgumentError("Unknown format $format"))
end
Expand Down
12 changes: 6 additions & 6 deletions test/test_batched_cudss.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ function cudss_batched_dense()
A_cpu = rand(T, n)
A_gpu = [CuVector(A_cpu)]
matrix = CudssBatchedMatrix(A_gpu)
format = Ref{CUDSS.cudssMatrixFormat_t}()
format = Ref{Cint}()
CUDSS.cudssMatrixGetFormat(matrix, format)
@test format[] == CUDSS.CUDSS_MFORMAT_DENSE
@test format[] == CUDSS.CUDSS_MFORMAT_BATCH

A_cpu2 = rand(T, n)
A_gpu2 = [CuVector(A_cpu2)]
Expand All @@ -19,9 +19,9 @@ function cudss_batched_dense()
A_cpu = rand(T, n, p)
A_gpu = [CuMatrix(A_cpu)]
matrix = CudssBatchedMatrix(A_gpu)
format = Ref{CUDSS.cudssMatrixFormat_t}()
format = Ref{CInt}()
CUDSS.cudssMatrixGetFormat(matrix, format)
@test format[] == CUDSS.CUDSS_MFORMAT_DENSE
@test format[] == CUDSS.CUDSS_MFORMAT_BATCH

A_cpu2 = rand(T, n, p)
A_gpu2 = [CuMatrix(A_cpu2)]
Expand All @@ -39,9 +39,9 @@ function cudss_batched_sparse()
@testset "view = $view" for view in ('L', 'U', 'F')
@testset "structure = $structure" for structure in ("G", "S", "H", "SPD", "HPD")
matrix = CudssBatchedMatrix(A_gpu, structure, view)
format = Ref{CUDSS.cudssMatrixFormat_t}()
format = Ref{Cint}()
CUDSS.cudssMatrixGetFormat(matrix, format)
@test format[] == CUDSS.CUDSS_MFORMAT_CSR
@test format[] == CUDSS.CUDSS_MFORMAT_BATCH

A_cpu2 = sprand(T, n, n, 1.0)
A_cpu2 = A_cpu2 + A_cpu2'
Expand Down
15 changes: 10 additions & 5 deletions test/test_cudss.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
function cudss_version()
@test CUDSS.version() >= v"0.4.0"
@test CUDSS.version() >= v"0.5.0"
end

function cudss_dense()
Expand All @@ -10,7 +10,7 @@ function cudss_dense()
A_cpu = rand(T, n)
A_gpu = CuVector(A_cpu)
matrix = CudssMatrix(A_gpu)
format = Ref{CUDSS.cudssMatrixFormat_t}()
format = Ref{Cint}()
CUDSS.cudssMatrixGetFormat(matrix, format)
@test format[] == CUDSS.CUDSS_MFORMAT_DENSE

Expand All @@ -23,7 +23,7 @@ function cudss_dense()
A_cpu = rand(T, n, p)
A_gpu = CuMatrix(A_cpu)
matrix = CudssMatrix(A_gpu)
format = Ref{CUDSS.cudssMatrixFormat_t}()
format = Ref{Cint}()
CUDSS.cudssMatrixGetFormat(matrix, format)
@test format[] == CUDSS.CUDSS_MFORMAT_DENSE

Expand All @@ -43,7 +43,7 @@ function cudss_sparse()
@testset "view = $view" for view in ('L', 'U', 'F')
@testset "structure = $structure" for structure in ("G", "S", "H", "SPD", "HPD")
matrix = CudssMatrix(A_gpu, structure, view)
format = Ref{CUDSS.cudssMatrixFormat_t}()
format = Ref{Cint}()
CUDSS.cudssMatrixGetFormat(matrix, format)
@test format[] == CUDSS.CUDSS_MFORMAT_CSR

Expand Down Expand Up @@ -93,7 +93,9 @@ function cudss_solver()

@testset "config parameter = $parameter" for parameter in CUDSS_CONFIG_PARAMETERS
@testset "cudss_get" begin
val = cudss_get(solver, parameter)
if parameter != "host_nthreads"
val = cudss_get(solver, parameter)
end
end
@testset "cudss_set" begin
(parameter == "matching_type") && cudss_set(solver, parameter, 0)
Expand All @@ -104,13 +106,16 @@ function cudss_solver()
(parameter == "pivot_epsilon") && cudss_set(solver, parameter, 1e-12)
(parameter == "max_lu_nnz") && cudss_set(solver, parameter, 10)
(parameter == "hybrid_device_memory_limit") && cudss_set(solver, parameter, 2048)
(parameter == "host_nthreads") && cudss_set(solver, parameter, 0)
for algo in ("default", "algo1", "algo2", "algo3")
(parameter == "reordering_alg") && cudss_set(solver, parameter, algo)
(parameter == "factorization_alg") && cudss_set(solver, parameter, algo)
(parameter == "solve_alg") && cudss_set(solver, parameter, algo)
(parameter == "pivot_epsilon_alg") && cudss_set(solver, parameter, algo)
end
for flag in (0, 1)
(parameter == "hybrid_mode") && cudss_set(solver, parameter, flag)
(parameter == "hybrid_execute_mode") && cudss_set(solver, parameter, flag)
(parameter == "use_cuda_register_memory") && cudss_set(solver, parameter, flag)
end
for pivoting in ('C', 'R', 'N')
Expand Down
Loading