Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable device_type to be none #79

Merged
merged 3 commits into from
Dec 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions src/init_global_grid.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ Initialize a Cartesian grid of MPI processes (and also MPI itself by default) de
- `reorder::Integer=1`: the reorder argument to `MPI.Cart_create` in order to create the Cartesian process topology.
- `comm::MPI.Comm=MPI.COMM_WORLD`: the input communicator argument to `MPI.Cart_create` in order to create the Cartesian process topology.
- `init_MPI::Bool=true`: whether to initialize MPI (`true`) or not (`false`).
- `device_type::String="auto"`: the type of the device to be used if available: "CUDA", "AMDGPU" or "auto". If `device_type` is "auto" (default), it is automatically determined, depending on which of the modules used for programming the devices (CUDA.jl or AMDGPU.jl) is functional; if both are functional, an error will be given if `device_type` is set as "auto".
- `select_device::Bool=true`: whether to automatically select the device (GPU) (`true`) or not (`false`) if CUDA is functional. If `true`, it selects the device corresponding to the node-local MPI rank. This method of device selection suits both single and multi-device compute nodes and is recommended in general. It is also the default method of device selection of the *function* [`select_device`](@ref).
- `device_type::String="auto"`: the type of the device to be used if available: `"CUDA"`, `"AMDGPU"`, `"none"` or `"auto"`. Set `device_type="none"` if you want to use only CPUs on a system having also GPUs. If `device_type` is `"auto"` (default), it is automatically determined, depending on which of the modules used for programming the devices (CUDA.jl or AMDGPU.jl) is functional; if both are functional, an error will be given if `device_type` is set as `"auto"`.
- `select_device::Bool=true`: whether to automatically select the device (GPU) (`true`) or not (`false`) if CUDA or AMDGPU is functional and `device_type` not `"none"`. If `true`, it selects the device corresponding to the node-local MPI rank. This method of device selection suits both single and multi-device compute nodes and is recommended in general. It is also the default method of device selection of the *function* [`select_device`](@ref).
For more information, refer to the documentation of MPI.jl / MPI.

# Return values
Expand Down Expand Up @@ -68,10 +68,12 @@ function init_global_grid(nx::Integer, ny::Integer, nz::Integer; dimx::Integer=0
if haskey(ENV, "IGG_LOOPVECTORIZATION_DIMY") loopvectorization[2] = (parse(Int64, ENV["IGG_LOOPVECTORIZATION_DIMY"]) > 0); end
if haskey(ENV, "IGG_LOOPVECTORIZATION_DIMZ") loopvectorization[3] = (parse(Int64, ENV["IGG_LOOPVECTORIZATION_DIMZ"]) > 0); end
end
if !(device_type in [DEVICE_TYPE_AUTO, DEVICE_TYPE_CUDA, DEVICE_TYPE_AMDGPU]) error("Argument `device_type`: invalid value obtained ($device_type). Valid values are: $DEVICE_TYPE_CUDA, $DEVICE_TYPE_AMDGPU, $DEVICE_TYPE_AUTO") end
if !(device_type in [DEVICE_TYPE_NONE, DEVICE_TYPE_AUTO, DEVICE_TYPE_CUDA, DEVICE_TYPE_AMDGPU]) error("Argument `device_type`: invalid value obtained ($device_type). Valid values are: $DEVICE_TYPE_CUDA, $DEVICE_TYPE_AMDGPU, $DEVICE_TYPE_NONE, $DEVICE_TYPE_AUTO") end
if ((device_type == DEVICE_TYPE_AUTO) && cuda_functional() && amdgpu_functional()) error("Automatic detection of the device type to be used not possible: both CUDA and AMDGPU are functional. Set keyword argument `device_type` to $DEVICE_TYPE_CUDA or $DEVICE_TYPE_AMDGPU.") end
if (device_type in [DEVICE_TYPE_CUDA, DEVICE_TYPE_AUTO]) cuda_enabled = cuda_functional() end # NOTE: cuda could be enabled/disabled depending on some additional criteria.
if (device_type in [DEVICE_TYPE_AMDGPU, DEVICE_TYPE_AUTO]) amdgpu_enabled = amdgpu_functional() end # NOTE: amdgpu could be enabled/disabled depending on some additional criteria.
if (device_type != DEVICE_TYPE_NONE)
if (device_type in [DEVICE_TYPE_CUDA, DEVICE_TYPE_AUTO]) cuda_enabled = cuda_functional() end # NOTE: cuda could be enabled/disabled depending on some additional criteria.
if (device_type in [DEVICE_TYPE_AMDGPU, DEVICE_TYPE_AUTO]) amdgpu_enabled = amdgpu_functional() end # NOTE: amdgpu could be enabled/disabled depending on some additional criteria.
end
if (any(nxyz .< 1)) error("Invalid arguments: nx, ny, and nz cannot be less than 1."); end
if (any(dims .< 0)) error("Invalid arguments: dimx, dimy, and dimz cannot be negative."); end
if (any(periods .∉ ((0,1),))) error("Invalid arguments: periodx, periody, and periodz must be either 0 or 1."); end
Expand Down
1 change: 1 addition & 0 deletions src/shared.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ const NDIMS_MPI = 3 # Internally, we set the number of dimens
const NNEIGHBORS_PER_DIM = 2 # Number of neighbors per dimension (left neighbor + right neighbor).
const GG_ALLOC_GRANULARITY = 32 # Internal buffers are allocated with a granulariy of GG_ALLOC_GRANULARITY elements in order to ensure correct reinterpretation when used for different types and to reduce amount of re-allocations.
const GG_THREADCOPY_THRESHOLD = 32768 # When LoopVectorization is deactivated, then the GG_THREADCOPY_THRESHOLD defines the size in bytes upon which memory copy is performed with multiple threads.
const DEVICE_TYPE_NONE = "none"
const DEVICE_TYPE_AUTO = "auto"
const DEVICE_TYPE_CUDA = "CUDA"
const DEVICE_TYPE_AMDGPU = "AMDGPU"
Expand Down
61 changes: 49 additions & 12 deletions test/test_select_device.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,57 @@ nprocs = MPI.Comm_size(MPI.COMM_WORLD); # NOTE: these tests can run with any num

@testset "$(basename(@__FILE__)) (processes: $nprocs)" begin
@testset "1. select_device" begin
@static if test_cuda
me, = init_global_grid(3, 4, 5; quiet=true, init_MPI=false, device_type="CUDA");
gpu_id = select_device();
@test gpu_id < length(CUDA.devices())
finalize_global_grid(finalize_MPI=false);
@static if test_cuda && !test_amdgpu
@testset "\"CUDA\"" begin
me, = init_global_grid(3, 4, 5; quiet=true, init_MPI=false, device_type="CUDA");
gpu_id = select_device();
@test gpu_id < length(CUDA.devices())
finalize_global_grid(finalize_MPI=false);
end;
@testset "\"auto\"" begin
me, = init_global_grid(3, 4, 5; quiet=true, init_MPI=false, device_type="auto");
gpu_id = select_device();
@test gpu_id < length(CUDA.devices())
finalize_global_grid(finalize_MPI=false);
end;
end
@static if test_amdgpu
me, = init_global_grid(3, 4, 5; quiet=true, init_MPI=false, device_type="AMDGPU");
gpu_id = select_device();
@test gpu_id < length(AMDGPU.devices())
finalize_global_grid(finalize_MPI=false);
@static if test_amdgpu && !test_cuda
@testset "\"AMDGPU\"" begin
me, = init_global_grid(3, 4, 5; quiet=true, init_MPI=false, device_type="AMDGPU");
gpu_id = select_device();
@test gpu_id < length(AMDGPU.devices())
finalize_global_grid(finalize_MPI=false);
end;
@testset "\"auto\"" begin
me, = init_global_grid(3, 4, 5; quiet=true, init_MPI=false, device_type="auto");
gpu_id = select_device();
@test gpu_id < length(AMDGPU.devices())
finalize_global_grid(finalize_MPI=false);
end;
end
@static if !(test_cuda || test_amdgpu) || (test_cuda && test_amdgpu)
@testset "\"auto\"" begin
me, = init_global_grid(3, 4, 5; quiet=true, init_MPI=false, device_type="auto");
@test_throws ErrorException select_device()
finalize_global_grid(finalize_MPI=false);
end;
end
@static if !test_cuda
@testset "\"CUDA\"" begin
me, = init_global_grid(3, 4, 5; quiet=true, init_MPI=false, device_type="CUDA");
@test_throws ErrorException select_device()
finalize_global_grid(finalize_MPI=false);
end;
end
@static if !test_amdgpu
@testset "\"AMDGPU\"" begin
me, = init_global_grid(3, 4, 5; quiet=true, init_MPI=false, device_type="AMDGPU");
@test_throws ErrorException select_device()
finalize_global_grid(finalize_MPI=false);
end;
end
@static if !(test_cuda || test_amdgpu)
me, = init_global_grid(3, 4, 5; quiet=true, init_MPI=false);
@testset "\"none\"" begin
me, = init_global_grid(3, 4, 5; quiet=true, init_MPI=false, device_type="none");
@test_throws ErrorException select_device()
finalize_global_grid(finalize_MPI=false);
end
Expand Down
Loading