diff --git a/CHANGELOG.md b/CHANGELOG.md index 7c1dcffc55..191b21e28e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,8 +7,10 @@ All notable changes to this project will be documented in this file. - #32 : Add support for `nvcc` Compiler and `cuda` language as a possible option. - #48 : Fix incorrect handling of imports in `cuda`. +- #59 : Updated `cuda` clash checker. - #42 : Add support for custom kernel in`cuda`. - #42 : Add Cuda module to Pyccel. Add support for `cuda.synchronize` function. +- #41 : Add support for custom device in`cuda`. ## \[UNRELEASED\] diff --git a/docs/cuda.md b/docs/cuda.md index de30d52b80..7643a4ac02 100644 --- a/docs/cuda.md +++ b/docs/cuda.md @@ -20,4 +20,27 @@ threadsperblock = 1 # Call your kernel function my_kernel[blockspergrid, threadsperblock]() -``` \ No newline at end of file +``` + +### device + +Device functions are similar to kernels, but are executed within the context of a kernel. They can be called only from kernels or device functions, and are typically used for operations that are too small to justify launching a separate kernel, or for operations that need to be performed repeatedly within the context of a kernel. + +```python +from pyccel.decorators import device, kernel + +@device +def add(x, y): + return x + y + +@kernel +def my_kernel(): + x = 1 + y = 2 + z = add(x, y) + print(z) + +my_kernel[1, 1]() + +``` + diff --git a/pyccel/codegen/printing/cucode.py b/pyccel/codegen/printing/cucode.py index cd26843017..7c01d93c47 100644 --- a/pyccel/codegen/printing/cucode.py +++ b/pyccel/codegen/printing/cucode.py @@ -86,9 +86,10 @@ def function_signature(self, expr, print_arg_names = True): str Signature of the function. """ - cuda_decorater = '__global__' if 'kernel' in expr.decorators else '' + cuda_decorator = '__global__' if 'kernel' in expr.decorators else \ + '__device__' if 'device' in expr.decorators else '' c_function_signature = super().function_signature(expr, print_arg_names) - return f'{cuda_decorater} {c_function_signature}' + return f'{cuda_decorator} {c_function_signature}' def _print_KernelCall(self, expr): func = expr.funcdef @@ -109,7 +110,7 @@ def _print_ModuleHeader(self, expr): cuda_headers = "" for f in expr.module.funcs: if not f.is_inline: - if 'kernel' in f.decorators: # Checking for 'kernel' decorator + if 'kernel' in f.decorators or 'device' in f.decorators: cuda_headers += self.function_signature(f) + ';\n' else: funcs += self.function_signature(f) + ';\n' diff --git a/pyccel/decorators.py b/pyccel/decorators.py index 77717a991f..ff413fe443 100644 --- a/pyccel/decorators.py +++ b/pyccel/decorators.py @@ -11,6 +11,7 @@ __all__ = ( 'allow_negative_index', 'bypass', + 'device', 'elemental', 'inline', 'private', @@ -141,3 +142,21 @@ def __getitem__(self, args): return self._f return KernelAccessor(f) + +def device(f): + """ + Decorator for marking a function as a GPU device function. + + This decorator is used to mark a Python function as a GPU device function. + + Parameters + ---------- + f : Function + The function to be marked as a device. + + Returns + ------- + f + The function marked as a device. + """ + return f diff --git a/pyccel/errors/messages.py b/pyccel/errors/messages.py index 09966d810c..5fe622c29b 100644 --- a/pyccel/errors/messages.py +++ b/pyccel/errors/messages.py @@ -166,7 +166,7 @@ INVALID_KERNEL_LAUNCH_CONFIG = 'Expected exactly 2 parameters for kernel launch' INVALID_KERNEL_CALL_BP_GRID = 'Invalid Block per grid parameter for Kernel call' INVALID_KERNEL_CALL_TP_BLOCK = 'Invalid Thread per Block parameter for Kernel call' - +INVAlID_DEVICE_CALL = 'A function decorated with "device" should be called only from a "kernel" or another "device" function.' diff --git a/pyccel/naming/cudanameclashchecker.py b/pyccel/naming/cudanameclashchecker.py index 971204e912..c7aaa4952f 100644 --- a/pyccel/naming/cudanameclashchecker.py +++ b/pyccel/naming/cudanameclashchecker.py @@ -16,6 +16,7 @@ class CudaNameClashChecker(LanguageNameClashChecker): verify that they do not cause name clashes. Name clashes may be due to new variables, or due to the use of reserved keywords. """ + # Keywords as mentioned on https://en.cppreference.com/w/c/keyword keywords = set(['isign', 'fsign', 'csign', 'auto', 'break', 'case', 'char', 'const', 'continue', 'default', 'do', 'double', 'else', 'enum', @@ -37,7 +38,40 @@ class CudaNameClashChecker(LanguageNameClashChecker): 'GET_INDEX_FUNC_H2', 'GET_INDEX_FUNC', 'GET_INDEX', 'INDEX', 'GET_ELEMENT', 'free_array', 'free_pointer', 'get_index', 'numpy_to_ndarray_strides', - 'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data']) + 'numpy_to_ndarray_shape', 'get_size', 'order_f', 'order_c', 'array_copy_data' + '__global__', '__device__', '__host__','__constant__', '__shared__', + '__managed__','threadIdx', 'blockIdx', 'blockDim', 'gridDim', + 'warpSize', 'cudaMalloc', 'cudaFree', 'cudaMemcpy', 'cudaMemset', + 'cudaMallocHost', 'cudaFreeHost', 'cudaMallocPitch', + 'cudaMallocArray', 'cudaFreeArray', 'cudaHostAlloc', + 'cudaHostRegister', 'cudaHostUnregister', 'cudaHostGetDevicePointer', + 'cudaHostGetFlags', 'cudaDeviceSynchronize', 'cudaDeviceReset', + 'cudaSetDevice', 'cudaGetDeviceCount', 'cudaGetDeviceProperties', + 'cudaChooseDevice', 'cudaSetDeviceFlags', 'cudaGetDevice', + 'cudaStreamCreate', 'cudaStreamDestroy', 'cudaStreamSynchronize', + 'cudaStreamWaitEvent', 'cudaEventCreate', 'cudaEventDestroy', 'cudaEventRecord', + 'cudaEventSynchronize', 'cudaEventElapsedTime', 'cuInit', 'cuDeviceGet', + 'cuDeviceGetCount', 'cuDeviceGetName', + 'cuDeviceComputeCapability', 'cuCtxCreate', 'cuCtxDestroy', + 'cuCtxSynchronize', 'cuModuleLoad', 'cuModuleUnload', + 'cuModuleGetFunction', 'cuModuleGetGlobal', 'cuModuleGetTexRef', + 'cuMemAlloc', 'cuMemFree', 'cuMemcpyHtoD', 'cuMemcpyDtoH', + 'cuMemcpyDtoD', 'cuMemcpyHtoDAsync', 'cuMemcpyDtoHAsync', + 'cuMemcpyDtoDAsync', 'cuMemsetD8', 'cuMemsetD16', 'cuMemsetD32', + 'cuMemsetD2D8', 'cuMemsetD2D16', 'cuMemsetD2D32', 'cuParamSetSize', + 'cuParamSeti', 'cuParamSetf', 'cuParamSetv', 'cuLaunch', 'cuLaunchGrid', + 'cuLaunchGridAsync', 'cuEventCreate', 'cuEventRecord', 'cuEventQuery', + 'cuEventSynchronize', 'cuEventDestroy', 'cuEventElapsedTime', + 'cuStreamCreate', 'cuStreamQuery', 'cuStreamSynchronize', + 'cuStreamDestroy', 'cuFuncSetBlockShape', 'cuFuncSetSharedSize', + 'cuFuncGetAttribute', 'cuTexRefCreate', 'cuTexRefDestroy', + 'cuTexRefSetArray', 'cuTexRefSetAddress', 'cuTexRefSetAddress2D', + 'cuTexRefSetFormat', 'cuTexRefSetAddressMode', 'cuTexRefSetFilterMode', + 'cuTexRefSetFlags', 'cuTexRefGetAddress', 'cuTexRefGetArray', + 'cuTexRefGetAddressMode', 'cuTexRefGetFilterMode', 'cuTexRefGetFormat', + 'cuTexRefGetFlags', 'cuLaunchKernel', 'cuOccupancyMaxActiveBlocksPerMultiprocessor', + 'cuOccupancyMaxPotentialBlockSize', 'cuOccupancyMaxPotentialBlockSizeWithFlags' + ]) def has_clash(self, name, symbols): """ diff --git a/pyccel/naming/languagenameclashchecker.py b/pyccel/naming/languagenameclashchecker.py index fa672a905b..d6415e6449 100644 --- a/pyccel/naming/languagenameclashchecker.py +++ b/pyccel/naming/languagenameclashchecker.py @@ -19,6 +19,11 @@ class LanguageNameClashChecker(metaclass = Singleton): """ keywords = None + def __init__(self): #pylint: disable=useless-parent-delegation + # This __init__ function is required so the ArgumentSingleton can + # always detect a signature + super().__init__() + def _get_collisionless_name(self, name, symbols): """ Get a name which doesn't collision with keywords or symbols. diff --git a/pyccel/parser/semantic.py b/pyccel/parser/semantic.py index fde10d6317..7e8dd11bb4 100644 --- a/pyccel/parser/semantic.py +++ b/pyccel/parser/semantic.py @@ -136,9 +136,10 @@ UNUSED_DECORATORS, UNSUPPORTED_POINTER_RETURN_VALUE, PYCCEL_RESTRICTION_OPTIONAL_NONE, PYCCEL_RESTRICTION_PRIMITIVE_IMMUTABLE, PYCCEL_RESTRICTION_IS_ISNOT, FOUND_DUPLICATED_IMPORT, UNDEFINED_WITH_ACCESS, MACRO_MISSING_HEADER_OR_FUNC, PYCCEL_RESTRICTION_INHOMOG_SET, - MISSING_KERNEL_CONFIGURATION, + MISSING_KERNEL_CONFIGURATION, INVAlID_DEVICE_CALL, INVALID_KERNEL_LAUNCH_CONFIG, INVALID_KERNEL_CALL_BP_GRID, INVALID_KERNEL_CALL_TP_BLOCK) + from pyccel.parser.base import BasicParser from pyccel.parser.syntactic import SyntaxParser @@ -1061,6 +1062,10 @@ def _handle_function(self, expr, func, args, *, is_method = False, use_build_fun FunctionCall/PyccelFunction The semantic representation of the call. """ + + if isinstance(func, FunctionDef) and 'device' in func.decorators: + if 'kernel' not in self.scope.decorators and 'device' not in self.scope.decorators: + errors.report(INVAlID_DEVICE_CALL,symbol=expr, severity='fatal') if isinstance(func, PyccelFunctionDef): if use_build_functions: annotation_method = '_build_' + func.cls_name.__name__ diff --git a/tests/cuda/test_device_semantic.py b/tests/cuda/test_device_semantic.py new file mode 100644 index 0000000000..5723991961 --- /dev/null +++ b/tests/cuda/test_device_semantic.py @@ -0,0 +1,31 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +import pytest + +from pyccel import epyccel +from pyccel.decorators import device +from pyccel.errors.errors import Errors, PyccelSemanticError +from pyccel.errors.messages import (INVAlID_DEVICE_CALL,) + + +@pytest.mark.cuda +def test_invalid_device_call(): + def invalid_device_call(): + @device + def device_call(): + pass + def fake_kernel_call(): + device_call() + + fake_kernel_call() + + errors = Errors() + + with pytest.raises(PyccelSemanticError): + epyccel(invalid_device_call, language="cuda") + + assert errors.has_errors() + + assert errors.num_messages() == 1 + + error_info = [*errors.error_info_map.values()][0][0] + assert INVAlID_DEVICE_CALL == error_info.message diff --git a/tests/pyccel/scripts/kernel/device_test.py b/tests/pyccel/scripts/kernel/device_test.py new file mode 100644 index 0000000000..a4762a6242 --- /dev/null +++ b/tests/pyccel/scripts/kernel/device_test.py @@ -0,0 +1,18 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +from pyccel.decorators import device, kernel +from pyccel import cuda + +@device +def device_call(): + print("Hello from device") + +@kernel +def kernel_call(): + device_call() + +def f(): + kernel_call[1,1]() + cuda.synchronize() + +if __name__ == '__main__': + f() diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index b4757a3c31..2d55c6e1cb 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -742,6 +742,14 @@ def test_kernel_collision(gpu_available): pyccel_test("scripts/kernel/kernel_name_collision.py", language="cuda", execute_code=gpu_available) +#------------------------------------------------------------------------------ + +@pytest.mark.cuda +def test_device_call(gpu_available): + types = str + pyccel_test("scripts/kernel/device_test.py", + language="cuda", output_dtype=types, execute_code=gpu_available) + #------------------------------------------------------------------------------ def test_print_strings(language): types = str