Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhancements to cuda array creation #19

Open
wants to merge 37 commits into
base: cuda_main_temp
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
4ab2e9f
Add cudaMemcpyKind enum and fix typo in printing
Pinkyboi Nov 18, 2022
6057987
Merge branch 'cuda_main_temp' into cuda_memcopy
Pinkyboi Nov 18, 2022
69cb9d8
Addition of context in array creation
Pinkyboi Nov 18, 2022
77b25a0
Choose context using decorators
Pinkyboi Nov 18, 2022
521d199
Add cuda to_host and to_device
Pinkyboi Nov 19, 2022
ca9c7d1
Raise error when creating host array from kernel
Pinkyboi Nov 19, 2022
938c732
Create temporary variables when using numpy/cuda/cupy array as arg
Pinkyboi Nov 19, 2022
423571a
Add docstring to CudaToHost, CudaToDevice and CudaGrid
Pinkyboi Nov 19, 2022
7a703ae
Add test for cuda.to_device and cuda.to_host
Pinkyboi Nov 19, 2022
f4d773b
Remove numba functions from cudaext
Pinkyboi Dec 3, 2022
07adae4
Merge branch 'cuda_main_temp' into cuda_memcopy
Pinkyboi Dec 3, 2022
cb5c4cc
Split test to_host and to device
Pinkyboi Dec 29, 2022
c0d4d32
Try reinstall
EmilyBourne Dec 30, 2022
4905573
Add docstring for memory_location and current_context
Pinkyboi Dec 31, 2022
c427bb5
Use f-string in copy_CudaArray_Data
Pinkyboi Dec 31, 2022
b4e5670
Disable missing docstring and add missing final new line is ccuda tests
Pinkyboi Dec 31, 2022
b58fb7a
Merge with cuda_main_temp
Pinkyboi Dec 31, 2022
de6dc5c
Use f-string in memcpy_kind
Pinkyboi Dec 31, 2022
bb1dbba
Try to fix django not configured pylint error
Pinkyboi Jan 1, 2023
41fa424
Remove disable=django-not-configure
Pinkyboi Jan 1, 2023
f0330fb
Merge branch 'cuda_main_temp' into cuda_memcopy
Pinkyboi Jan 4, 2023
0f0a1fc
Merge branch 'cuda_main_temp' into cuda_memcopy
Pinkyboi Jan 7, 2023
9ea3abf
Revert change on action.yml
Pinkyboi Jan 7, 2023
bec781c
Separate and / or with parentheses
Pinkyboi Jan 7, 2023
6f16b4e
Use get to check if array's memory location
Pinkyboi Jan 7, 2023
8b89bba
Change CudaGrid doc string
Pinkyboi Jan 7, 2023
b8643d2
Use tuple instead of list for variable
Pinkyboi Jan 7, 2023
38b2d10
Add current_context to variable in _infere_type
Pinkyboi Jan 9, 2023
4a8fab5
Put current context check in _assign_lhs_variable
Pinkyboi Jan 10, 2023
7b099cc
Merge branch 'cuda_main_temp' into cuda_memcopy
Pinkyboi Jan 11, 2023
db8d76a
Revert "Put current context check in _assign_lhs_variable"
Pinkyboi Jan 13, 2023
b63f075
Fix previous failed merge
Pinkyboi Jan 13, 2023
1e66985
Add CudaArray import
Pinkyboi Jan 13, 2023
68ad097
Remove already defined cudaMemcpyKind enum
Pinkyboi Jan 14, 2023
2896396
Remove old to_host / to_device test functions
Pinkyboi Jan 14, 2023
3933de5
Add CupyArray/CudaArray to _infere_type
Pinkyboi Jan 14, 2023
587b1f8
Merge remote-tracking branch 'origin/cuda_main_temp' into cuda_memcopy
EmilyBourne Mar 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions pyccel/ast/cudaext.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,11 @@ class CudaArray(CudaNewArray):
arg : list, tuple, PythonList

"""
__slots__ = ('_arg','_dtype','_precision','_shape','_rank','_order','_memory_location')
__slots__ = ('_arg','_dtype','_precision','_shape','_rank','_order','_memory_location', '_current_context')
_attribute_nodes = ('_arg',)
name = 'array'

def __init__(self, arg, dtype=None, order='C', memory_location='managed'):
def __init__(self, arg, dtype=None, order='C', current_context='host', memory_location='managed'):

if not isinstance(arg, (PythonTuple, PythonList, Variable)):
raise TypeError('Unknown type of %s.' % type(arg))
Expand Down Expand Up @@ -119,13 +119,18 @@ def __init__(self, arg, dtype=None, order='C', memory_location='managed'):
#Verify memory location
if memory_location not in ('host', 'device', 'managed'):
raise ValueError("memory_location must be 'host', 'device' or 'managed'")
# ...
if current_context not in ('host', 'device'):
raise ValueError("The current context can only be 'host' or 'device'")

self._arg = arg
self._shape = shape
self._rank = rank
self._dtype = dtype
self._order = order
self._precision = prec
self._memory_location = memory_location
self._current_context = current_context
super().__init__()

def __str__(self):
Expand All @@ -136,8 +141,16 @@ def arg(self):
return self._arg
@property
def memory_location(self):
""" Indicate if the array is in the host or device memory
"""
return self._memory_location

@property
def current_context(self):
""" Indicates if the array created is in a host or device context
"""
return self._current_context

class CudaSynchronize(PyccelInternalFunction):
"Represents a call to Cuda.deviceSynchronize for code generation."

Expand Down Expand Up @@ -281,8 +294,6 @@ def __new__(cls, dim=0):
return expr[0]
return PythonTuple(*expr)



cuda_funcs = {
'array' : PyccelFunctionDef('array' , CudaArray),
'copy' : PyccelFunctionDef('copy' , CudaCopy),
Expand Down
21 changes: 18 additions & 3 deletions pyccel/ast/cupyext.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,11 @@ class CupyArray(CupyNewArray):
arg : list, tuple, PythonList

"""
__slots__ = ('_arg','_dtype','_precision','_shape','_rank','_order')
__slots__ = ('_arg', '_dtype','_precision','_shape','_rank','_order', '_memory_location', '_current_context')
_attribute_nodes = ('_arg',)
name = 'array'

def __init__(self, arg, dtype=None, order='C'):
def __init__(self, arg, dtype=None, order='C', current_context='host'):

if not isinstance(arg, (PythonTuple, PythonList, Variable)):
raise TypeError('Unknown type of %s.' % type(arg))
Expand All @@ -110,7 +110,8 @@ def __init__(self, arg, dtype=None, order='C'):

shape = process_shape(False, arg.shape)
rank = len(shape)

if current_context not in ('host', 'device'):
raise ValueError("The current context can only be 'host' or 'device'")
if rank < 2:
order = None
else:
Expand All @@ -131,6 +132,8 @@ def __init__(self, arg, dtype=None, order='C'):
self._dtype = dtype
self._order = order
self._precision = prec
self._current_context = current_context
self._memory_location = 'device'
super().__init__()

def __str__(self):
Expand All @@ -140,6 +143,18 @@ def __str__(self):
def arg(self):
return self._arg

@property
def current_context(self):
""" Indicates if the array created is in a host or device context
"""
return self._current_context

@property
def memory_location(self):
""" Indicate if the array is in the host or device memory
"""
return self._memory_location

#==============================================================================
class CupyArange(CupyNewArray):
"""
Expand Down
15 changes: 13 additions & 2 deletions pyccel/ast/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ class base if variable is an object or an object member [Default value: None]
>>> Variable('int', DottedName('matrix', 'n_rows'))
matrix.n_rows
"""
__slots__ = ('_name', '_alloc_shape', '_memory_handling', '_memory_location', '_is_const',
'_is_target', '_is_optional', '_allows_negative_indexes',
__slots__ = ('_name', '_alloc_shape', '_memory_handling', '_memory_location', '_current_context',
'_is_const', '_is_target', '_is_optional', '_allows_negative_indexes',
'_cls_base', '_is_argument', '_is_kwonly', '_is_temp','_dtype','_precision',
'_rank','_shape','_order','_is_private')
_attribute_nodes = ()
Expand All @@ -123,6 +123,7 @@ def __init__(
rank=0,
memory_handling='stack',
memory_location='host',
current_context = 'host',
is_const=False,
is_target=False,
is_optional=False,
Expand Down Expand Up @@ -162,6 +163,10 @@ def __init__(
raise ValueError("memory_location must be 'host', 'device' or 'managed'")
self._memory_location = memory_location

if current_context not in ('host', 'device'):
raise ValueError("The current context can only be 'host' or 'device'")
self._current_context = current_context

if not isinstance(is_const, bool):
raise TypeError('is_const must be a boolean.')
self._is_const = is_const
Expand Down Expand Up @@ -323,6 +328,12 @@ def memory_location(self):
"""
return self._memory_location

@property
def current_context(self):
""" Indicates if the variable is currently in the host or device context
"""
return self._current_context

@memory_location.setter
def memory_location(self, memory_location):
if memory_location not in ('host', 'device', 'managed'):
Expand Down
10 changes: 8 additions & 2 deletions pyccel/codegen/printing/ccudacode.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,19 +521,25 @@ def copy_CudaArray_Data(self, expr):
declare_dtype = self.find_in_dtype_registry(self._print(rhs.dtype), rhs.precision)
dtype = self.find_in_ndarray_type_registry(self._print(rhs.dtype), rhs.precision)
arg = rhs.arg if isinstance(rhs, (CudaArray, CupyArray)) else rhs

memcpy_kind_src = str(rhs.current_context).capitalize()
memcpy_kind_dest = 'Host' if rhs.memory_location == 'host' else 'Device'
memcpy_kind = f"cudaMemcpy{memcpy_kind_src}To{memcpy_kind_dest}"

if rhs.rank > 1:
# flattening the args to use them in C initialization.
arg = self._flatten_list(arg)

self.add_import(c_imports['string'])
if isinstance(arg, Variable):
arg = self._print(arg)
cpy_data = "cudaMemcpy({0}.raw_data, {1}.{2}, {0}.buffer_size, cudaMemcpyHostToDevice);".format(lhs, arg, dtype)
cpy_data = f"cudaMemcpy({lhs}.raw_data, {arg}.{dtype}, {lhs}.buffer_size, {memcpy_kind});"
return '%s\n' % (cpy_data)
else :
arg = ', '.join(self._print(i) for i in arg)
dummy_array = "%s %s[] = {%s};\n" % (declare_dtype, dummy_array_name, arg)
cpy_data = "cudaMemcpy({0}.raw_data, {1}, {0}.buffer_size, cudaMemcpyHostToDevice);".format(self._print(lhs), dummy_array_name, dtype)
target_array_name = self._print(lhs)
cpy_data = f"cudaMemcpy({target_array_name}.raw_data, {dummy_array_name}, {target_array_name}.buffer_size, {memcpy_kind});"
return '%s%s\n' % (dummy_array, cpy_data)

def _print_CudaSynchronize(self, expr):
Expand Down
44 changes: 42 additions & 2 deletions pyccel/parser/semantic.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
from pyccel.ast.core import Assert

from pyccel.ast.class_defs import NumpyArrayClass, TupleClass, get_cls_base, CudaArrayClass
from pyccel.ast.cupyext import CupyArray

from pyccel.ast.datatypes import NativeRange, str_dtype
from pyccel.ast.datatypes import NativeSymbol
Expand Down Expand Up @@ -96,7 +97,8 @@
from pyccel.ast.numpyext import DtypePrecisionToCastFunction

from pyccel.ast.cupyext import CupyNewArray
from pyccel.ast.cudaext import CudaNewArray, CudaThreadIdx, CudaBlockDim, CudaBlockIdx, CudaGridDim
from pyccel.ast.cudaext import CudaArray, CudaNewArray, CudaThreadIdx, CudaBlockDim, CudaBlockIdx, CudaGridDim


from pyccel.ast.omp import (OMP_For_Loop, OMP_Simd_Construct, OMP_Distribute_Construct,
OMP_TaskLoop_Construct, OMP_Sections_Construct, Omp_End_Clause,
Expand Down Expand Up @@ -436,6 +438,7 @@ def _infere_type(self, expr, **settings):
d_var['datatype' ] = expr.dtype
d_var['memory_handling'] = expr.memory_handling
d_var['memory_location'] = expr.memory_location
d_var['current_context'] = expr.current_context
d_var['shape' ] = expr.shape
d_var['rank' ] = expr.rank
d_var['cls_base' ] = expr.cls_base
Expand Down Expand Up @@ -493,6 +496,18 @@ def _infere_type(self, expr, **settings):
d_var['cls_base' ] = NumpyArrayClass
return d_var

elif isinstance(expr, CupyArray):
d_var['datatype' ] = expr.dtype
d_var['memory_handling'] = 'heap' if expr.rank > 0 else 'stack'
d_var['memory_location'] = expr.memory_location
d_var['current_context'] = expr.current_context
d_var['shape' ] = expr.shape
d_var['rank' ] = expr.rank
d_var['order' ] = expr.order
d_var['precision' ] = expr.precision
d_var['cls_base' ] = CudaArrayClass
return d_var

elif isinstance(expr, CupyNewArray):
d_var['datatype' ] = expr.dtype
d_var['memory_handling'] = 'heap' if expr.rank > 0 else 'stack'
Expand All @@ -504,6 +519,18 @@ def _infere_type(self, expr, **settings):
d_var['cls_base' ] = CudaArrayClass
return d_var

elif isinstance(expr, CudaArray):
d_var['datatype' ] = expr.dtype
d_var['memory_handling'] = 'heap' if expr.rank > 0 else 'stack'
d_var['memory_location'] = expr.memory_location
d_var['current_context'] = expr.current_context
d_var['shape' ] = expr.shape
d_var['rank' ] = expr.rank
d_var['order' ] = expr.order
d_var['precision' ] = expr.precision
d_var['cls_base' ] = CudaArrayClass
return d_var

elif isinstance(expr, CudaNewArray):
d_var['datatype' ] = expr.dtype
d_var['memory_handling'] = 'heap' if expr.rank > 0 else 'stack'
Expand Down Expand Up @@ -731,7 +758,8 @@ def _handle_function_args(self, arguments, **settings):
if isinstance(a.value, StarredArguments):
args.extend([FunctionCallArgument(av) for av in a.value.args_var])
else:
if isinstance(a.value, PyccelArithmeticOperator) and a.value.rank:
if (isinstance(a.value, PyccelArithmeticOperator) and a.value.rank)\
or isinstance(a.value, (NumpyNewArray, CudaNewArray)):
tmp_var = PyccelSymbol(self.scope.get_new_name(), is_temp=True)
assign = self._visit(Assign(tmp_var, arg.value, fst= arg.value.fst))
self._additional_exprs[-1].append(assign)
Expand Down Expand Up @@ -837,6 +865,18 @@ def _handle_function(self, expr, func, args, **settings):
severity = 'fatal')

args, kwargs = split_positional_keyword_arguments(*args)
if func in (CupyArray, CudaArray, NumpyArray):
if 'device' in self.scope.decorators or 'kernel' in self.scope.decorators:
current_context = 'device'
if kwargs.get('memory_location', 'host') == 'host':
errors.report("Host arrays cannot be allocated on the Device",
symbol = expr,
severity = 'fatal')
else:
current_context = 'host'
if func in (CudaArray, CupyArray):
kwargs['current_context'] = current_context

for a in args:
if getattr(a,'dtype',None) == 'tuple':
self._infere_type(a, **settings)
Expand Down
19 changes: 19 additions & 0 deletions tests/internal/scripts/ccuda/cuda_array_variable.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# pylint: disable=missing-function-docstring, disable=unused-variable, missing-module-docstring/

from pyccel.decorators import kernel, types
from pyccel import cuda

@kernel
@types('int[:]')
def square(a):
index = cuda.blockIdx(0) * cuda.blockDim(0) + cuda.threadIdx(0)
a[index] = a[index] * a[index]

if __name__ == '__main__':
threads_per_block = 5
n_blocks = 1
arr_var = (0,1,2,3,4)
arr = cuda.array(arr_var)
cuda.synchronize()
square[n_blocks, threads_per_block](arr)
cuda.synchronize()
2 changes: 2 additions & 0 deletions tests/internal/scripts/ccuda/cuda_copy.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# pylint: disable=missing-function-docstring, disable=unused-variable, missing-module-docstring/

from pyccel.decorators import kernel, types
from pyccel import cuda

Expand Down
2 changes: 2 additions & 0 deletions tests/internal/scripts/ccuda/cuda_grid.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# pylint: disable=missing-function-docstring, disable=unused-variable, missing-module-docstring

from pyccel.decorators import kernel, types
from pyccel import cuda

Expand Down
2 changes: 2 additions & 0 deletions tests/internal/scripts/ccuda/kernel.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# pylint: disable=missing-function-docstring, disable=unused-variable, missing-module-docstring

from pyccel.decorators import kernel, types
from pyccel import cuda

Expand Down
18 changes: 18 additions & 0 deletions tests/internal/scripts/ccuda/kernel_launch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# pylint: disable=missing-function-docstring, disable=unused-variable, missing-module-docstring/

from pyccel.decorators import kernel, types
from pyccel import cuda

@kernel
@types('int[:]')
def func(a):
i = cuda.threadIdx(0) + cuda.blockIdx(0) * cuda.blockDim(0)
print("Hello World! ", a[i])

if __name__ == '__main__':
threads_per_block = 5
n_blocks = 1
arr = cuda.array([0, 1, 2, 3, 4])
cuda.synchronize()
func[n_blocks, threads_per_block](arr)
cuda.synchronize()