Skip to content

Commit

Permalink
update handling of gpu support checking if extension is loaded
Browse files Browse the repository at this point in the history
  • Loading branch information
omlins committed Jan 18, 2024
1 parent 0a4dc18 commit 441aacd
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 25 deletions.
6 changes: 6 additions & 0 deletions src/AMDGPUExt/shared.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ using AMDGPU
const ROCField{T,N} = GGField{T,N,ROCArray{T,N}}


##------------------------------------
## HANDLING OF CUDA AND AMDGPU SUPPORT

ImplicitGlobalGrid.is_loaded(::Val{:ImplicitGlobalGrid_AMDGPUExt}) = (@assert AMDGPU.functional(); return true)


##-------------
## SYNTAX SUGAR

Expand Down
6 changes: 6 additions & 0 deletions src/CUDAExt/shared.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ using CUDA
const CuField{T,N} = GGField{T,N,CuArray{T,N}}


##------------------------------------
## HANDLING OF CUDA AND AMDGPU SUPPORT

ImplicitGlobalGrid.is_loaded(::Val{:ImplicitGlobalGrid_CUDAExt}) = (@assert CUDA.functional(true); return true)


##-------------
## SYNTAX SUGAR

Expand Down
12 changes: 7 additions & 5 deletions src/init_global_grid.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ Initialize a Cartesian grid of MPI processes (and also MPI itself by default) de
- `reorder::Integer=1`: the reorder argument to `MPI.Cart_create` in order to create the Cartesian process topology.
- `comm::MPI.Comm=MPI.COMM_WORLD`: the input communicator argument to `MPI.Cart_create` in order to create the Cartesian process topology.
- `init_MPI::Bool=true`: whether to initialize MPI (`true`) or not (`false`).
- `device_type::String="auto"`: the type of the device to be used if available: `"CUDA"`, `"AMDGPU"`, `"none"` or `"auto"`. Set `device_type="none"` if you want to use only CPUs on a system having also GPUs. If `device_type` is `"auto"` (default), it is automatically determined, depending on which of the modules used for programming the devices (CUDA.jl or AMDGPU.jl) is functional; if both are functional, an error will be given if `device_type` is set as `"auto"`.
- `select_device::Bool=true`: whether to automatically select the device (GPU) (`true`) or not (`false`) if CUDA or AMDGPU is functional and `device_type` not `"none"`. If `true`, it selects the device corresponding to the node-local MPI rank. This method of device selection suits both single and multi-device compute nodes and is recommended in general. It is also the default method of device selection of the *function* [`select_device`](@ref).
- `device_type::String="auto"`: the type of the device to be used if available: `"CUDA"`, `"AMDGPU"`, `"none"` or `"auto"`. Set `device_type="none"` if you want to use only CPUs on a system having also GPUs. If `device_type` is `"auto"` (default), it is automatically determined, depending on which of the modules used for programming the devices (CUDA.jl or AMDGPU.jl) was imported before ImplicitGlobalGrid; if both were imported, an error will be given if `device_type` is set as `"auto"`.
- `select_device::Bool=true`: whether to automatically select the device (GPU) (`true`) or not (`false`) if CUDA or AMDGPU was imported and `device_type` is not `"none"`. If `true`, it selects the device corresponding to the node-local MPI rank. This method of device selection suits both single and multi-device compute nodes and is recommended in general. It is also the default method of device selection of the *function* [`select_device`](@ref).
For more information, refer to the documentation of MPI.jl / MPI.
# Return values
Expand All @@ -40,6 +40,8 @@ See also: [`finalize_global_grid`](@ref), [`select_device`](@ref)
"""
function init_global_grid(nx::Integer, ny::Integer, nz::Integer; dimx::Integer=0, dimy::Integer=0, dimz::Integer=0, periodx::Integer=0, periody::Integer=0, periodz::Integer=0, overlaps::Tuple{Int,Int,Int}=(2,2,2), halowidths::Tuple{Int,Int,Int}=max.(1,overlaps2), disp::Integer=1, reorder::Integer=1, comm::MPI.Comm=MPI.COMM_WORLD, init_MPI::Bool=true, device_type::String=DEVICE_TYPE_AUTO, select_device::Bool=true, quiet::Bool=false)
if grid_is_initialized() error("The global grid has already been initialized.") end
set_cuda_loaded()
set_amdgpu_loaded()
nxyz = [nx, ny, nz];
dims = [dimx, dimy, dimz];
periods = [periodx, periody, periodz];
Expand Down Expand Up @@ -69,10 +71,10 @@ function init_global_grid(nx::Integer, ny::Integer, nz::Integer; dimx::Integer=0
if haskey(ENV, "IGG_LOOPVECTORIZATION_DIMZ") loopvectorization[3] = (parse(Int64, ENV["IGG_LOOPVECTORIZATION_DIMZ"]) > 0); end
end
if !(device_type in [DEVICE_TYPE_NONE, DEVICE_TYPE_AUTO, DEVICE_TYPE_CUDA, DEVICE_TYPE_AMDGPU]) error("Argument `device_type`: invalid value obtained ($device_type). Valid values are: $DEVICE_TYPE_CUDA, $DEVICE_TYPE_AMDGPU, $DEVICE_TYPE_NONE, $DEVICE_TYPE_AUTO") end
if ((device_type == DEVICE_TYPE_AUTO) && cuda_functional() && amdgpu_functional()) error("Automatic detection of the device type to be used not possible: both CUDA and AMDGPU are functional. Set keyword argument `device_type` to $DEVICE_TYPE_CUDA or $DEVICE_TYPE_AMDGPU.") end
if ((device_type == DEVICE_TYPE_AUTO) && cuda_loaded() && amdgpu_loaded()) error("Automatic detection of the device type to be used not possible: both CUDA and AMDGPU extensions are loaded. Set keyword argument `device_type` to $DEVICE_TYPE_CUDA or $DEVICE_TYPE_AMDGPU.") end
if (device_type != DEVICE_TYPE_NONE)
if (device_type in [DEVICE_TYPE_CUDA, DEVICE_TYPE_AUTO]) cuda_enabled = cuda_functional() end # NOTE: cuda could be enabled/disabled depending on some additional criteria.
if (device_type in [DEVICE_TYPE_AMDGPU, DEVICE_TYPE_AUTO]) amdgpu_enabled = amdgpu_functional() end # NOTE: amdgpu could be enabled/disabled depending on some additional criteria.
if (device_type in [DEVICE_TYPE_CUDA, DEVICE_TYPE_AUTO]) cuda_enabled = cuda_loaded() end # NOTE: cuda could be enabled/disabled depending on some additional criteria.
if (device_type in [DEVICE_TYPE_AMDGPU, DEVICE_TYPE_AUTO]) amdgpu_enabled = amdgpu_loaded() end # NOTE: amdgpu could be enabled/disabled depending on some additional criteria.
end
if (any(nxyz .< 1)) error("Invalid arguments: nx, ny, and nz cannot be less than 1."); end
if (any(dims .< 0)) error("Invalid arguments: dimx, dimy, and dimz cannot be negative."); end
Expand Down
4 changes: 1 addition & 3 deletions src/select_device.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,8 @@ function select_device()
if cuda_enabled() || amdgpu_enabled()
check_initialized();
if cuda_enabled()
@assert CUDA.functional(true)
nb_devices = length(CUDA.devices())
elseif amdgpu_enabled()
@assert AMDGPU.functional()
nb_devices = length(AMDGPU.devices())
end
comm_l = MPI.Comm_split_type(comm(), MPI.COMM_TYPE_SHARED, me())
Expand All @@ -31,7 +29,7 @@ function select_device()
end
return device_id
else
error("Cannot select a device because neither CUDA nor AMDGPU is enabled (possibly detected non functional when the ImplicitGlobalGrid module was loaded).")
error("Cannot select a device because neither CUDA nor AMDGPU is enabled (meaning that the corresponding module was not imported before ImplicitGlobalGrid).")
end
end

Expand Down
26 changes: 11 additions & 15 deletions src/shared.jl
Original file line number Diff line number Diff line change
@@ -1,24 +1,20 @@
import MPI
using CUDA #TODO: to be removed!
using AMDGPU #TODO: to be removed!
using Base.Threads


##-------------------------
##------------------------------------
## HANDLING OF CUDA AND AMDGPU SUPPORT
let
global cuda_functional, amdgpu_functional, set_cuda_functional, set_amdgpu_functional
_cuda_functional::Bool = false
_amdgpu_functional::Bool = false
cuda_functional()::Bool = _cuda_functional
amdgpu_functional()::Bool = _amdgpu_functional
set_cuda_functional(val::Bool) = (_cuda_functional = val;)
set_amdgpu_functional(val::Bool) = (_amdgpu_functional = val;)
end

function __init__()
set_cuda_functional(CUDA.functional())
set_amdgpu_functional(AMDGPU.functional())
is_loaded(arg) = false #TODO: this would not work as it should be the caller module...: (Base.get_extension(@__MODULE__, ext) !== nothing)

let
global cuda_loaded, amdgpu_loaded, set_cuda_loaded, set_amdgpu_loaded
_cuda_loaded::Bool = false
_amdgpu_loaded::Bool = false
cuda_loaded()::Bool = _cuda_loaded
amdgpu_loaded()::Bool = _amdgpu_loaded
set_cuda_loaded() = (_cuda_loaded = is_loaded(Val(:ImplicitGlobalGrid_CUDAExt)))
set_amdgpu_loaded() = (_amdgpu_loaded = is_loaded(Val(:ImplicitGlobalGrid_AMDGPUExt)))
end


Expand Down
3 changes: 1 addition & 2 deletions src/update_halo.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,7 @@ function update_halo!(A::Union{GGArray, GGField, GGFieldConvertible}...)
end

function _update_halo!(fields::GGField...)
if (any_cuarray(fields...) && !cuda_enabled()) error("CUDA is not enabled (possibly detected non functional when the ImplicitGlobalGrid module was loaded)."); end #NOTE: in the following, it is only required to check for `cuda_enabled()` when the context does not imply `any_cuarray(fields...)` or `is_cuarray(A)`.
if (any_rocarray(fields...) && !amdgpu_enabled()) error("AMDGPU is not enabled (possibly detected non functional when the ImplicitGlobalGrid module was loaded)."); end #NOTE: in the following, it is only required to check for `amdgpu_enabled()` when the context does not imply `any_rocarray(fields...)` or `is_rocarray(A)`.
if (!cuda_enabled() && !amdgpu_enabled() && !all_arrays(fields...)) error("not all arrays are CPU arrays, but no GPU extension is loaded.") end #NOTE: in the following, it is only required to check for `cuda_enabled()`/`amdgpu_enabled()` when the context does not imply `any_cuarray(fields...)` or `is_cuarray(A)` or the corresponding for AMDGPU. # NOTE: the case where only one of the two extensions are loaded, but an array dad would be for the other extension is passed is very unlikely and therefore not explicitly checked here (but could be added later).
allocate_bufs(fields...);
if any_array(fields...) allocate_tasks(fields...); end
if any_cuarray(fields...) allocate_custreams(fields...); end
Expand Down

0 comments on commit 441aacd

Please sign in to comment.