diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp index 42e78fc96e4445..1d434d512d0c5c 100644 --- a/flang/lib/Lower/Allocatable.cpp +++ b/flang/lib/Lower/Allocatable.cpp @@ -14,6 +14,7 @@ #include "flang/Evaluate/tools.h" #include "flang/Lower/AbstractConverter.h" #include "flang/Lower/ConvertType.h" +#include "flang/Lower/ConvertVariable.h" #include "flang/Lower/IterationSpace.h" #include "flang/Lower/Mangler.h" #include "flang/Lower/OpenACC.h" @@ -368,20 +369,17 @@ class AllocateStmtHelper { [&](const Fortran::parser::AllocOpt::Mold &mold) { moldExpr = Fortran::semantics::GetExpr(mold.v.value()); }, - [&](const Fortran::parser::AllocOpt::Stream &) { - TODO(loc, "CUDA ALLOCATE(STREAM=)"); + [&](const Fortran::parser::AllocOpt::Stream &stream) { + streamExpr = Fortran::semantics::GetExpr(stream.v.value()); }, - [&](const Fortran::parser::AllocOpt::Pinned &) { - TODO(loc, "CUDA ALLOCATE(PINNED=)"); + [&](const Fortran::parser::AllocOpt::Pinned &pinned) { + pinnedExpr = Fortran::semantics::GetExpr(pinned.v.value()); }, }, allocOption.u); } void lowerAllocation(const Allocation &alloc) { - if (Fortran::semantics::HasCUDAAttr(alloc.getSymbol())) - TODO(loc, "Allocation of variable with CUDA attributes"); - fir::MutableBoxValue boxAddr = genMutableBoxValue(converter, loc, alloc.getAllocObj()); @@ -456,7 +454,8 @@ class AllocateStmtHelper { const fir::MutableBoxValue &box) { if (!box.isDerived() && !errorManager.hasStatSpec() && !alloc.type.IsPolymorphic() && !alloc.hasCoarraySpec() && - !useAllocateRuntime && !box.isPointer()) { + !useAllocateRuntime && !box.isPointer() && + !Fortran::semantics::HasCUDAAttr(alloc.getSymbol())) { // Pointers must use PointerAllocate so that their deallocations // can be validated. genInlinedAllocation(alloc, box); @@ -472,7 +471,12 @@ class AllocateStmtHelper { genSetType(alloc, box, loc); genSetDeferredLengthParameters(alloc, box); genAllocateObjectBounds(alloc, box); - mlir::Value stat = genRuntimeAllocate(builder, loc, box, errorManager); + mlir::Value stat; + if (!Fortran::semantics::HasCUDAAttr(alloc.getSymbol())) + stat = genRuntimeAllocate(builder, loc, box, errorManager); + else + stat = + genCudaAllocate(builder, loc, box, errorManager, alloc.getSymbol()); fir::factory::syncMutableBoxFromIRBox(builder, loc, box); postAllocationAction(alloc); errorManager.assignStat(builder, loc, stat); @@ -602,7 +606,10 @@ class AllocateStmtHelper { genSetDeferredLengthParameters(alloc, box); genAllocateObjectBounds(alloc, box); mlir::Value stat; - if (isSource) + if (Fortran::semantics::HasCUDAAttr(alloc.getSymbol())) + stat = + genCudaAllocate(builder, loc, box, errorManager, alloc.getSymbol()); + else if (isSource) stat = genRuntimeAllocateSource(builder, loc, box, exv, errorManager); else stat = genRuntimeAllocate(builder, loc, box, errorManager); @@ -717,6 +724,34 @@ class AllocateStmtHelper { return nullptr; } + mlir::Value genCudaAllocate(fir::FirOpBuilder &builder, mlir::Location loc, + const fir::MutableBoxValue &box, + ErrorManager &errorManager, + const Fortran::semantics::Symbol &sym) { + Fortran::lower::StatementContext stmtCtx; + fir::CUDADataAttributeAttr cudaAttr = + Fortran::lower::translateSymbolCUDADataAttribute(builder.getContext(), + sym); + mlir::Value errmsg = errMsgExpr ? errorManager.errMsgAddr : nullptr; + mlir::Value stream = + streamExpr + ? fir::getBase(converter.genExprValue(loc, *streamExpr, stmtCtx)) + : nullptr; + mlir::Value pinned = + pinnedExpr + ? fir::getBase(converter.genExprAddr(loc, *pinnedExpr, stmtCtx)) + : nullptr; + mlir::Value source = sourceExpr ? fir::getBase(sourceExv) : nullptr; + + // Keep return type the same as a standard AllocatableAllocate call. + mlir::Type retTy = fir::runtime::getModel()(builder.getContext()); + return builder + .create( + loc, retTy, box.getAddr(), errmsg, stream, pinned, source, cudaAttr, + errorManager.hasStatSpec() ? builder.getUnitAttr() : nullptr) + .getResult(); + } + Fortran::lower::AbstractConverter &converter; fir::FirOpBuilder &builder; const Fortran::parser::AllocateStmt &stmt; @@ -724,6 +759,8 @@ class AllocateStmtHelper { const Fortran::lower::SomeExpr *moldExpr{nullptr}; const Fortran::lower::SomeExpr *statExpr{nullptr}; const Fortran::lower::SomeExpr *errMsgExpr{nullptr}; + const Fortran::lower::SomeExpr *pinnedExpr{nullptr}; + const Fortran::lower::SomeExpr *streamExpr{nullptr}; // If the allocate has a type spec, lenParams contains the // value of the length parameters that were specified inside. llvm::SmallVector lenParams; diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf new file mode 100644 index 00000000000000..55223011e8d9e9 --- /dev/null +++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf @@ -0,0 +1,107 @@ +! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s + +! Test lowering of CUDA allocatable allocate/deallocate statements. + +subroutine sub1() + real, allocatable, device :: a(:) + allocate(a(10)) +end subroutine + +! CHECK-LABEL: func.func @_QPsub1() +! CHECK: %[[BOX:.*]] = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub1Ea"} +! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) +! CHECK: fir.call @_FortranAAllocatableSetBounds +! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref>>> {cuda_attr = #fir.cuda} -> i32 + +subroutine sub2() + real, allocatable, managed :: a(:) + integer :: istat + allocate(a(10), stat=istat) +end subroutine + +! CHECK-LABEL: func.func @_QPsub2() +! CHECK: %[[BOX:.*]] = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub2Ea"} +! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub2Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) +! CHECK: %[[ISTAT:.*]] = fir.alloca i32 {bindc_name = "istat", uniq_name = "_QFsub2Eistat"} +! CHECK: %[[ISTAT_DECL:.*]]:2 = hlfir.declare %[[ISTAT]] {uniq_name = "_QFsub2Eistat"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: fir.call @_FortranAAllocatableSetBounds +! CHECK: %[[STAT:.*]] = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref>>> {cuda_attr = #fir.cuda, hasStat} -> i32 +! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref + +subroutine sub3() + integer, allocatable, pinned :: a(:,:) + logical :: plog + allocate(a(20,30), pinned = plog) +end subroutine + +! CHECK-LABEL: func.func @_QPsub3() +! CHECK: %[[BOX:.*]] = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub3Ea"} +! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub3Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) +! CHECK: %[[PLOG:.*]] = fir.alloca !fir.logical<4> {bindc_name = "plog", uniq_name = "_QFsub3Eplog"} +! CHECK: %[[PLOG_DECL:.*]]:2 = hlfir.declare %5 {uniq_name = "_QFsub3Eplog"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK-2: fir.call @_FortranAAllocatableSetBounds +! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref>>> pinned(%[[PLOG_DECL]]#1 : !fir.ref>) {cuda_attr = #fir.cuda} -> i32 + +subroutine sub4() + real, allocatable, unified :: a(:) + integer :: istream + allocate(a(10), stream=istream) +end subroutine + +! CHECK-LABEL: func.func @_QPsub4() +! CHECK: %[[BOX:.*]] = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub4Ea"} +! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %0 {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub4Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) +! CHECK: %[[ISTREAM:.*]] = fir.alloca i32 {bindc_name = "istream", uniq_name = "_QFsub4Eistream"} +! CHECK: %[[ISTREAM_DECL:.*]]:2 = hlfir.declare %[[ISTREAM]] {uniq_name = "_QFsub4Eistream"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: fir.call @_FortranAAllocatableSetBounds +! CHECK: %[[STREAM:.*]] = fir.load %[[ISTREAM_DECL]]#0 : !fir.ref +! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref>>> stream(%[[STREAM]] : i32) {cuda_attr = #fir.cuda} -> i32 + +subroutine sub5() + real, allocatable, device :: a(:) + real, allocatable :: b(:) + allocate(a, source=b) +end subroutine + +! CHECK-LABEL: func.func @_QPsub5() +! CHECK: %[[BOX_A:.*]] = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub5Ea"} +! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub5Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) +! CHECK: %[[BOX_B:.*]] = fir.alloca !fir.box>> {bindc_name = "b", uniq_name = "_QFsub5Eb"} +! CHECK: %[[BOX_B_DECL:.*]]:2 = hlfir.declare %[[BOX_B]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub5Eb"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) +! CHECK: %[[LOAD_B:.*]] = fir.load %[[BOX_B_DECL]]#1 : !fir.ref>>> +! CHECK: fir.call @_FortranAAllocatableSetBounds +! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_A_DECL]]#1 : !fir.ref>>> source(%[[LOAD_B]] : !fir.box>>) {cuda_attr = #fir.cuda} -> i32 + +subroutine sub6() + real, allocatable, device :: a(:) + real, allocatable :: b(:) + allocate(a, mold=b) +end subroutine + +! CHECK-LABEL: func.func @_QPsub6() +! CHECK: %[[BOX_A:.*]] = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub6Ea"} +! CHECK: %[[BOX_A_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub6Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) +! CHECK: %[[BOX_B:.*]] = fir.alloca !fir.box>> {bindc_name = "b", uniq_name = "_QFsub6Eb"} +! CHECK: %[[BOX_B_DECL:.*]]:2 = hlfir.declare %[[BOX_B]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub6Eb"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) +! CHECK: %[[LOAD_B:.*]] = fir.load %[[BOX_B_DECL]]#1 : !fir.ref>>> +! CHECK: fir.call @_FortranAAllocatableApplyMold +! CHECK: %{{.*}} = fir.cuda_allocate %[[BOX_A_DECL]]#1 : !fir.ref>>> {cuda_attr = #fir.cuda} -> i32 + +subroutine sub7() + real, allocatable, device :: a(:) + integer :: istat + character(50) :: err + allocate(a(100), stat=istat, errmsg=err) +end subroutine + +! CHECK-LABEL: func.func @_QPsub7() +! CHECK: %[[BOX:.*]] = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub7Ea"} +! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub7Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) +! CHECK: %[[ERR:.*]] = fir.alloca !fir.char<1,50> {bindc_name = "err", uniq_name = "_QFsub7Eerr"} +! CHECK: %[[ERR_DECL:.*]]:2 = hlfir.declare %[[ERR]] typeparams %{{.*}} {uniq_name = "_QFsub7Eerr"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[ISTAT:.*]] = fir.alloca i32 {bindc_name = "istat", uniq_name = "_QFsub7Eistat"} +! CHECK: %[[ISTAT_DECL:.*]]:2 = hlfir.declare %[[ISTAT]] {uniq_name = "_QFsub7Eistat"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[ERR_BOX:.*]] = fir.embox %[[ERR_DECL]]#1 : (!fir.ref>) -> !fir.box> +! CHECK: fir.call @_FortranAAllocatableSetBounds +! CHECK: %[[STAT:.*]] = fir.cuda_allocate %[[BOX_DECL]]#1 : !fir.ref>>> errmsg(%[[ERR_BOX]] : !fir.box>) {cuda_attr = #fir.cuda, hasStat} -> i32 +! CHECK: fir.store %[[STAT]] to %[[ISTAT_DECL]]#1 : !fir.ref