JuliaGPU
diff --git a/‎Project.toml‎
Lines changed: 2 additions & 0 deletions b/‎Project.toml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 60 additions & 0 deletions b/‎README.md‎
Lines changed: 60 additions & 0 deletions
diff --git a/‎lib/JLArrays/Project.toml‎
Lines changed: 4 additions & 0 deletions b/‎lib/JLArrays/Project.toml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎lib/JLArrays/src/JLArrays.jl‎
Lines changed: 224 additions & 1 deletion b/‎lib/JLArrays/src/JLArrays.jl‎
Lines changed: 224 additions & 1 deletion
diff --git a/‎src/GPUArrays.jl‎
Lines changed: 2 additions & 0 deletions b/‎src/GPUArrays.jl‎
Lines changed: 2 additions & 0 deletions
@@ -13,6 +13,7 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 ScopedValues = "7e506255-f358-4e82-b7e4-beb19740aa63"
 Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 
 [weakdeps]
@@ -33,5 +34,6 @@ Random = "1"
 Reexport = "1"
 ScopedValues = "1"
 Serialization = "1"
+SparseArrays = "1"
 Statistics = "1"
 julia = "1.10"
@@ -31,3 +31,63 @@ This package is the counterpart of Julia's `AbstractArray` interface, but for GP
 types: It provides functionality and tooling to speed-up development of new GPU array types.
 **This package is not intended for end users!** Instead, you should use one of the packages
 that builds on GPUArrays.jl, such as [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl), [oneAPI.jl](https://github.com/JuliaGPU/oneAPI.jl), [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl), or [Metal.jl](https://github.com/JuliaGPU/Metal.jl).
+
+## Interface methods
+
+To support a new GPU backend, you will need to implement various interface methods for your backend's array types.
+Some (CPU based) examples can be see in the testing library `JLArrays` (located in the `lib` directory of this package).
+
+### Dense array support
+
+### Sparse array support (optional)
+
+`GPUArrays.jl` provides **device-side** array types for `CSC`, `CSR`, `COO`, and `BSR` matrices, as well as sparse vectors.
+It also provides abstract types for these layouts that you can create concrete child types of in order to benefit from the
+backend-agnostic wrappers. In particular, `GPUArrays.jl` provides out-of-the-box support for broadcasting and `mapreduce` over
+GPU sparse arrays.
+
+For **host-side** types, your custom sparse types should implement:
+
+- `dense_array_type` - the corresponding dense array type. For example, for a `CuSparseVector` or `CuSparseMatrixCXX`, the `dense_array_type` is `CuArray`
+- `sparse_array_type` - the **untyped** sparse array type corresponding to a given parametrized type. A `CuSparseVector{Tv, Ti}` would have a `sparse_array_type` of `CuVector` -- note the lack of type parameters!
+- `csc_type(::Type{T})` - the compressed sparse column type for your backend. A `CuSparseMatrixCSR` would have a `csc_type` of `CuSparseMatrixCSC`. 
+- `csr_type(::Type{T})` - the compressed sparse row type for your backend. A `CuSparseMatrixCSC` would have a `csr_type` of `CuSparseMatrixCSR`. 
+- `coo_type(::Type{T})` - the coordinate sparse matrix type for your backend. A `CuSparseMatrixCSC` would have a `coo_type` of `CuSparseMatrixCOO`.
+
+To use `SparseArrays.findnz`, your host-side type **must** implement `sortperm`. This can be done with scalar indexing, but will be very slow.
+
+Additionally, you need to teach `GPUArrays.jl` how to translate your backend's specific types onto the device. `GPUArrays.jl` provides the device-side types:
+
+- `GPUSparseDeviceVector`
+- `GPUSparseDeviceMatrixCSC`
+- `GPUSparseDeviceMatrixCSR`
+- `GPUSparseDeviceMatrixBSR`
+- `GPUSparseDeviceMatrixCOO`
+
+You will need to create a method of `Adapt.adapt_structure` for each format your backend supports. **Note** that if your backend supports separate address spaces,
+as CUDA and ROCm do, you need to provide a parameter to these device-side arrays to indicate in which address space the underlying pointers live. An example of adapting
+an array to the device-side struct:
+
+```julia
+function GPUArrays.GPUSparseDeviceVector(iPtr::MyDeviceVector{Ti, A},
+                                         nzVal::MyDeviceVector{Tv, A},
+                                         len::Int,
+                                         nnz::Ti) where {Ti, Tv, A}
+    GPUArrays.GPUSparseDeviceVector{Tv, Ti, MyDeviceVector{Ti, A}, MyDeviceVector{Tv, A}, A}(iPtr, nzVal, len, nnz)
+end
+
+function Adapt.adapt_structure(to::MyAdaptor, x::MySparseVector)
+    return GPUArrays.GPUSparseDeviceVector(
+        adapt(to, x.iPtr),
+        adapt(to, x.nzVal),
+        length(x), x.nnz
+    )
+end
+```
+
+You'll also need to inform `GPUArrays.jl` and `GPUCompiler.jl` how to adapt your sparse arrays by extending `KernelAbstractions.jl`'s `get_backend()`:
+
+```julia
+KA.get_backend(::MySparseVector) = MyBackend()
+```  
+``` 
@@ -7,11 +7,15 @@ version = "0.3.0"
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
 KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 
 [compat]
 Adapt = "2.0, 3.0, 4.0"
 GPUArrays = "11.1"
 KernelAbstractions = "0.9, 0.10"
+LinearAlgebra = "1"
 Random = "1"
+SparseArrays = "1"
 julia = "1.8"
@@ -6,11 +6,14 @@
 
 module JLArrays
 
-export JLArray, JLVector, JLMatrix, jl, JLBackend
+export JLArray, JLVector, JLMatrix, jl, JLBackend, JLSparseVector, JLSparseMatrixCSC, JLSparseMatrixCSR
 
 using GPUArrays
 
 using Adapt
+using SparseArrays, LinearAlgebra
+
+import GPUArrays: dense_array_type
 
 import KernelAbstractions
 import KernelAbstractions: Adapt, StaticArrays, Backend, Kernel, StaticSize, DynamicSize, partition, blocks, workitems, launch_config
@@ -19,6 +22,11 @@ import KernelAbstractions: Adapt, StaticArrays, Backend, Kernel, StaticSize, Dyn
     import KernelAbstractions: POCL
 end
 
+module AS
+
+const Generic  = 0
+
+end
 
 #
 # Device functionality
@@ -115,7 +123,141 @@ mutable struct JLArray{T, N} <: AbstractGPUArray{T, N}
     end
 end
 
+mutable struct JLSparseVector{Tv, Ti} <: GPUArrays.AbstractGPUSparseVector{Tv, Ti}
+    iPtr::JLArray{Ti, 1}
+    nzVal::JLArray{Tv, 1}
+    len::Int
+    nnz::Ti
+
+    function JLSparseVector{Tv, Ti}(iPtr::JLArray{<:Integer, 1}, nzVal::JLArray{Tv, 1},
+                                    len::Integer) where {Tv, Ti <: Integer}
+        new{Tv, Ti}(iPtr, nzVal, len, length(nzVal))
+    end
+end
+SparseArrays.nnz(x::JLSparseVector)          = x.nnz 
+SparseArrays.nonzeroinds(x::JLSparseVector)  = x.iPtr
+SparseArrays.nonzeros(x::JLSparseVector)     = x.nzVal
+
+mutable struct JLSparseMatrixCSC{Tv, Ti} <: GPUArrays.AbstractGPUSparseMatrixCSC{Tv, Ti}
+    colPtr::JLArray{Ti, 1}
+    rowVal::JLArray{Ti, 1}
+    nzVal::JLArray{Tv, 1}
+    dims::NTuple{2,Int}
+    nnz::Ti
+
+    function JLSparseMatrixCSC{Tv, Ti}(colPtr::JLArray{<:Integer, 1}, rowVal::JLArray{<:Integer, 1},
+                                       nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer}
+        new{Tv, Ti}(colPtr, rowVal, nzVal, dims, length(nzVal))
+    end
+end
+function JLSparseMatrixCSC(colPtr::JLArray{Ti, 1}, rowVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer}
+    return JLSparseMatrixCSC{Tv, Ti}(colPtr, rowVal, nzVal, dims)
+end
+SparseArrays.SparseMatrixCSC(x::JLSparseMatrixCSC) = SparseMatrixCSC(size(x)..., Array(x.colPtr), Array(x.rowVal), Array(x.nzVal))
+
+JLSparseMatrixCSC(A::JLSparseMatrixCSC) = A
+
+function Base.getindex(A::JLSparseMatrixCSC{Tv, Ti}, i::Integer, j::Integer) where {Tv, Ti}
+    @boundscheck checkbounds(A, i, j)
+    r1 = Int(@inbounds A.colPtr[j])
+    r2 = Int(@inbounds A.colPtr[j+1]-1)
+    (r1 > r2) && return zero(Tv)
+    r1 = searchsortedfirst(view(A.rowVal, r1:r2), i) + r1 - 1
+    ((r1 > r2) || (A.rowVal[r1] != i)) ? zero(Tv) : A.nzVal[r1]
+end
+
+mutable struct JLSparseMatrixCSR{Tv, Ti} <: GPUArrays.AbstractGPUSparseMatrixCSR{Tv, Ti}
+    rowPtr::JLArray{Ti, 1}
+    colVal::JLArray{Ti, 1}
+    nzVal::JLArray{Tv, 1}
+    dims::NTuple{2,Int}
+    nnz::Ti
+
+    function JLSparseMatrixCSR{Tv, Ti}(rowPtr::JLArray{<:Integer, 1}, colVal::JLArray{<:Integer, 1},
+                                       nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti<:Integer}
+        new{Tv, Ti}(rowPtr, colVal, nzVal, dims, length(nzVal))
+    end
+end
+function JLSparseMatrixCSR(rowPtr::JLArray{Ti, 1}, colVal::JLArray{Ti, 1}, nzVal::JLArray{Tv, 1}, dims::NTuple{2,<:Integer}) where {Tv, Ti <: Integer}
+    return JLSparseMatrixCSR{Tv, Ti}(rowPtr, colVal, nzVal, dims)
+end
+function SparseArrays.SparseMatrixCSC(x::JLSparseMatrixCSR) 
+    x_transpose = SparseMatrixCSC(size(x, 2), size(x, 1), Array(x.rowPtr), Array(x.colVal), Array(x.nzVal))
+    return SparseMatrixCSC(transpose(x_transpose))
+end
+
+JLSparseMatrixCSC(Mat::Union{Transpose{Tv, <:SparseMatrixCSC}, Adjoint{Tv, <:SparseMatrixCSC}}) where {Tv} = JLSparseMatrixCSC(JLSparseMatrixCSR(Mat))
+
+function Base.size(g::JLSparseMatrixCSR, d::Integer)
+    if 1 <= d <= 2
+        return g.dims[d]
+    elseif d > 1
+        return 1
+    else
+        throw(ArgumentError("dimension must be ≥ 1, got $d"))
+    end
+end
+
+JLSparseMatrixCSR(Mat::Transpose{Tv, <:SparseMatrixCSC}) where {Tv} =
+    JLSparseMatrixCSR(JLVector{Cint}(parent(Mat).colptr), JLVector{Cint}(parent(Mat).rowval),
+                         JLVector(parent(Mat).nzval), size(Mat))
+JLSparseMatrixCSR(Mat::Adjoint{Tv, <:SparseMatrixCSC}) where {Tv} =
+    JLSparseMatrixCSR(JLVector{Cint}(parent(Mat).colptr), JLVector{Cint}(parent(Mat).rowval),
+                         JLVector(conj.(parent(Mat).nzval)), size(Mat))
+
+JLSparseMatrixCSR(A::JLSparseMatrixCSR) = A
+
+function Base.getindex(A::JLSparseMatrixCSR{Tv, Ti}, i0::Integer, i1::Integer) where {Tv, Ti}
+    @boundscheck checkbounds(A, i0, i1)
+    c1 = Int(A.rowPtr[i0])
+    c2 = Int(A.rowPtr[i0+1]-1)
+    (c1 > c2) && return zero(Tv)
+    c1 = searchsortedfirst(A.colVal, i1, c1, c2, Base.Order.Forward)
+    (c1 > c2 || A.colVal[c1] != i1) && return zero(Tv)
+    nonzeros(A)[c1]
+end
+
 GPUArrays.storage(a::JLArray) = a.data
+GPUArrays.dense_array_type(a::JLArray{T, N}) where {T, N} = JLArray{T, N}
+GPUArrays.dense_array_type(::Type{JLArray{T, N}}) where {T, N} = JLArray{T, N}
+GPUArrays.dense_vector_type(a::JLArray{T, N}) where {T, N} = JLArray{T, 1}
+GPUArrays.dense_vector_type(::Type{JLArray{T, N}}) where {T, N} = JLArray{T, 1}
+
+GPUArrays.sparse_array_type(sa::JLSparseMatrixCSC) = JLSparseMatrixCSC
+GPUArrays.sparse_array_type(::Type{<:JLSparseMatrixCSC}) = JLSparseMatrixCSC
+GPUArrays.sparse_array_type(sa::JLSparseMatrixCSR) = JLSparseMatrixCSR
+GPUArrays.sparse_array_type(::Type{<:JLSparseMatrixCSR}) = JLSparseMatrixCSR
+GPUArrays.sparse_array_type(sa::JLSparseVector) = JLSparseVector
+GPUArrays.sparse_array_type(::Type{<:JLSparseVector}) = JLSparseVector
+
+GPUArrays.dense_array_type(sa::JLSparseVector) = JLArray 
+GPUArrays.dense_array_type(::Type{<:JLSparseVector}) = JLArray 
+GPUArrays.dense_array_type(sa::JLSparseMatrixCSC) = JLArray 
+GPUArrays.dense_array_type(::Type{<:JLSparseMatrixCSC}) = JLArray 
+GPUArrays.dense_array_type(sa::JLSparseMatrixCSR) = JLArray 
+GPUArrays.dense_array_type(::Type{<:JLSparseMatrixCSR}) = JLArray 
+
+GPUArrays.csc_type(sa::JLSparseMatrixCSR) = JLSparseMatrixCSC
+GPUArrays.csr_type(sa::JLSparseMatrixCSC) = JLSparseMatrixCSR
+
+Base.similar(Mat::JLSparseMatrixCSR) = JLSparseMatrixCSR(copy(Mat.rowPtr), copy(Mat.colVal), similar(nonzeros(Mat)), size(Mat))
+Base.similar(Mat::JLSparseMatrixCSR, T::Type) = JLSparseMatrixCSR(copy(Mat.rowPtr), copy(Mat.colVal), similar(nonzeros(Mat), T), size(Mat))
+
+Base.similar(Mat::JLSparseMatrixCSC, T::Type, N::Int, M::Int) =  JLSparseMatrixCSC(JLVector([zero(Int32)]), JLVector{Int32}(undef, 0), JLVector{T}(undef, 0), (N, M))
+Base.similar(Mat::JLSparseMatrixCSR, T::Type, N::Int, M::Int) =  JLSparseMatrixCSR(JLVector([zero(Int32)]), JLVector{Int32}(undef, 0), JLVector{T}(undef, 0), (N, M))
+
+Base.similar(Mat::JLSparseMatrixCSC{Tv, Ti}, N::Int, M::Int) where {Tv, Ti} = similar(Mat, Tv, N, M) 
+Base.similar(Mat::JLSparseMatrixCSR{Tv, Ti}, N::Int, M::Int) where {Tv, Ti} = similar(Mat, Tv, N, M) 
+
+Base.similar(Mat::JLSparseMatrixCSC, T::Type, dims::Tuple{Int, Int}) = similar(Mat, T, dims...) 
+Base.similar(Mat::JLSparseMatrixCSR, T::Type, dims::Tuple{Int, Int}) = similar(Mat, T, dims...) 
+
+Base.similar(Mat::JLSparseMatrixCSC, dims::Tuple{Int, Int}) = similar(Mat, dims...) 
+Base.similar(Mat::JLSparseMatrixCSR, dims::Tuple{Int, Int}) = similar(Mat, dims...) 
+
+JLArray(x::JLSparseVector)    = JLArray(collect(SparseVector(x)))
+JLArray(x::JLSparseMatrixCSC) = JLArray(collect(SparseMatrixCSC(x)))
+JLArray(x::JLSparseMatrixCSR) = JLArray(collect(SparseMatrixCSC(x)))
 
 # conversion of untyped data to a typed Array
 function typed_data(x::JLArray{T}) where {T}
@@ -217,6 +359,79 @@ JLArray{T}(xs::AbstractArray{S,N}) where {T,N,S} = JLArray{T,N}(xs)
 (::Type{JLArray{T,N} where T})(x::AbstractArray{S,N}) where {S,N} = JLArray{S,N}(x)
 JLArray(A::AbstractArray{T,N}) where {T,N} = JLArray{T,N}(A)
 
+function JLSparseVector(xs::SparseVector{Tv, Ti}) where {Ti, Tv}
+    iPtr  = JLVector{Ti}(undef, length(xs.nzind))
+    nzVal = JLVector{Tv}(undef, length(xs.nzval))
+    copyto!(iPtr, convert(Vector{Ti}, xs.nzind))
+    copyto!(nzVal, convert(Vector{Tv}, xs.nzval))
+    return JLSparseVector{Tv, Ti}(iPtr, nzVal, length(xs),)
+end
+Base.length(x::JLSparseVector) = x.len
+Base.size(x::JLSparseVector) = (x.len,)
+
+function JLSparseMatrixCSC(xs::SparseMatrixCSC{Tv, Ti}) where {Ti, Tv}
+    colPtr = JLVector{Ti}(undef, length(xs.colptr))
+    rowVal = JLVector{Ti}(undef, length(xs.rowval))
+    nzVal  = JLVector{Tv}(undef, length(xs.nzval))
+    copyto!(colPtr, convert(Vector{Ti}, xs.colptr))
+    copyto!(rowVal, convert(Vector{Ti}, xs.rowval))
+    copyto!(nzVal,  convert(Vector{Tv}, xs.nzval))
+    return JLSparseMatrixCSC{Tv, Ti}(colPtr, rowVal, nzVal, (xs.m, xs.n))
+end
+JLSparseMatrixCSC(xs::SparseVector) = JLSparseMatrixCSC(SparseMatrixCSC(xs)) 
+Base.length(x::JLSparseMatrixCSC) = prod(x.dims)
+Base.size(x::JLSparseMatrixCSC) = x.dims
+
+function JLSparseMatrixCSR(xs::SparseMatrixCSC{Tv, Ti}) where {Ti, Tv}
+    csr_xs = SparseMatrixCSC(transpose(xs))
+    rowPtr = JLVector{Ti}(undef, length(csr_xs.colptr))
+    colVal = JLVector{Ti}(undef, length(csr_xs.rowval))
+    nzVal  = JLVector{Tv}(undef, length(csr_xs.nzval))
+    copyto!(rowPtr, convert(Vector{Ti}, csr_xs.colptr))
+    copyto!(colVal, convert(Vector{Ti}, csr_xs.rowval))
+    copyto!(nzVal,  convert(Vector{Tv}, csr_xs.nzval))
+    return JLSparseMatrixCSR{Tv, Ti}(rowPtr, colVal, nzVal, (xs.m, xs.n))
+end
+JLSparseMatrixCSR(xs::SparseVector{Tv, Ti}) where {Ti, Tv} = JLSparseMatrixCSR(SparseMatrixCSC(xs))
+function JLSparseMatrixCSR(xs::JLSparseMatrixCSC{Tv, Ti}) where {Ti, Tv}
+    return JLSparseMatrixCSR(SparseMatrixCSC(xs))
+end
+function JLSparseMatrixCSC(xs::JLSparseMatrixCSR{Tv, Ti}) where {Ti, Tv}
+    return JLSparseMatrixCSC(SparseMatrixCSC(xs))
+end
+function Base.copyto!(dst::JLSparseMatrixCSR, src::JLSparseMatrixCSR)
+    if size(dst) != size(src)
+        throw(ArgumentError("Inconsistent Sparse Matrix size"))
+    end
+    resize!(dst.rowPtr, length(src.rowPtr))
+    resize!(dst.colVal, length(src.colVal))
+    resize!(SparseArrays.nonzeros(dst), length(SparseArrays.nonzeros(src)))
+    copyto!(dst.rowPtr, src.rowPtr)
+    copyto!(dst.colVal, src.colVal)
+    copyto!(SparseArrays.nonzeros(dst), SparseArrays.nonzeros(src))
+    dst.nnz = src.nnz
+    dst
+end
+Base.length(x::JLSparseMatrixCSR) = prod(x.dims)
+Base.size(x::JLSparseMatrixCSR) = x.dims
+
+function GPUArrays._spadjoint(A::JLSparseMatrixCSR)
+    Aᴴ = JLSparseMatrixCSC(A.rowPtr, A.colVal, conj(A.nzVal), reverse(size(A)))
+    JLSparseMatrixCSR(Aᴴ)
+end
+function GPUArrays._sptranspose(A::JLSparseMatrixCSR)
+    Aᵀ = JLSparseMatrixCSC(A.rowPtr, A.colVal, A.nzVal, reverse(size(A)))
+    JLSparseMatrixCSR(Aᵀ)
+end
+function _spadjoint(A::JLSparseMatrixCSC)
+    Aᴴ = JLSparseMatrixCSR(A.colPtr, A.rowVal, conj(A.nzVal), reverse(size(A)))
+    JLSparseMatrixCSC(Aᴴ)
+end
+function _sptranspose(A::JLSparseMatrixCSC)
+    Aᵀ = JLSparseMatrixCSR(A.colPtr, A.rowVal, A.nzVal, reverse(size(A)))
+    JLSparseMatrixCSC(Aᵀ)
+end
+
 # idempotency
 JLArray{T,N}(xs::JLArray{T,N}) where {T,N} = xs
 
@@ -358,9 +573,17 @@ function GPUArrays.mapreducedim!(f, op, R::AnyJLArray, A::Union{AbstractArray,Br
     R
 end
 
+Adapt.adapt_structure(to::Adaptor, x::JLSparseMatrixCSC{Tv,Ti}) where {Tv,Ti} =
+GPUSparseDeviceMatrixCSC{Tv,Ti,JLDeviceArray{Ti, 1}, JLDeviceArray{Tv, 1}, AS.Generic}(adapt(to, x.colPtr), adapt(to, x.rowVal), adapt(to, x.nzVal), x.dims, x.nnz)
+Adapt.adapt_structure(to::Adaptor, x::JLSparseMatrixCSR{Tv,Ti}) where {Tv,Ti} =
+GPUSparseDeviceMatrixCSR{Tv,Ti,JLDeviceArray{Ti, 1}, JLDeviceArray{Tv, 1}, AS.Generic}(adapt(to, x.rowPtr), adapt(to, x.colVal), adapt(to, x.nzVal), x.dims, x.nnz)
+Adapt.adapt_structure(to::Adaptor, x::JLSparseVector{Tv,Ti}) where {Tv,Ti} =
+GPUSparseDeviceVector{Tv,Ti,JLDeviceArray{Ti, 1}, JLDeviceArray{Tv, 1}, AS.Generic}(adapt(to, x.iPtr), adapt(to, x.nzVal), x.len, x.nnz)
+
 ## KernelAbstractions interface
 
 KernelAbstractions.get_backend(a::JLA) where JLA <: JLArray = JLBackend()
+KernelAbstractions.get_backend(a::JLA) where JLA <: Union{JLSparseMatrixCSC, JLSparseMatrixCSR, JLSparseVector} = JLBackend()
 
 function KernelAbstractions.mkcontext(kernel::Kernel{JLBackend}, I, _ndrange, iterspace, ::Dynamic) where Dynamic
     return KernelAbstractions.CompilerMetadata{KernelAbstractions.ndrange(kernel), Dynamic}(I, _ndrange, iterspace)
 
@@ -19,6 +19,7 @@ using KernelAbstractions
 
 # device functionality
 include("device/abstractarray.jl")
+include("device/sparse.jl")
 
 # host abstractions
 include("host/abstractarray.jl")
@@ -34,6 +35,7 @@ include("host/random.jl")
 include("host/quirks.jl")
 include("host/uniformscaling.jl")
 include("host/statistics.jl")
+include("host/sparse.jl")
 include("host/alloc_cache.jl")