From 41cece8c86399dd1ffcb6b7a8b50c10083fe5a40 Mon Sep 17 00:00:00 2001 From: David Truby Date: Thu, 20 Feb 2025 13:59:29 +0000 Subject: [PATCH] [flang] Add -f[no-]vectorize flags (#119718) This patch adds the -fvectorize and -fno-vectorize flags to flang. Note that this also changes the behaviour of `flang -fc1` to match that of `clang -cc1`, which is that vectorization is only enabled in the presence of the `-vectorize-loops` flag. Additionally, this patch changes the behaviour of the default optimisation levels to match clang, such that vectorization only happens at the same levels as it does there. This patch is in draft while I write an RFC to discuss the above two changes. --- clang/include/clang/Driver/Options.td | 11 +++++-- clang/lib/Driver/ToolChains/Clang.cpp | 33 ------------------- clang/lib/Driver/ToolChains/CommonArgs.cpp | 33 +++++++++++++++++++ clang/lib/Driver/ToolChains/CommonArgs.h | 2 ++ clang/lib/Driver/ToolChains/Flang.cpp | 10 ++++++ .../include/flang/Frontend/CodeGenOptions.def | 1 + flang/lib/Frontend/CompilerInvocation.cpp | 4 +++ flang/lib/Frontend/FrontendActions.cpp | 2 ++ flang/test/Driver/optimization-remark.f90 | 22 ++++++------- flang/test/Integration/unroll-loops.f90 | 4 +-- flang/test/Lower/HLFIR/unroll-loops.fir | 4 +-- 11 files changed, 75 insertions(+), 51 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index bb74defaa28bf..e6932821d8af8 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3997,11 +3997,15 @@ defm assumptions : BoolFOption<"assumptions", "Disable codegen and compile-time checks for C++23's [[assume]] attribute">, PosFlag>; + +let Visibility = [ClangOption, FlangOption] in { def fvectorize : Flag<["-"], "fvectorize">, Group, HelpText<"Enable the loop vectorization passes">; def fno_vectorize : Flag<["-"], "fno-vectorize">, Group; def : Flag<["-"], "ftree-vectorize">, Alias; def : Flag<["-"], "fno-tree-vectorize">, Alias; +} + def fslp_vectorize : Flag<["-"], "fslp-vectorize">, Group, HelpText<"Enable the superword-level parallelism vectorization passes">; def fno_slp_vectorize : Flag<["-"], "fno-slp-vectorize">, Group; @@ -7343,6 +7347,10 @@ def mlink_builtin_bitcode : Separate<["-"], "mlink-builtin-bitcode">, def mlink_bitcode_file : Separate<["-"], "mlink-bitcode-file">, HelpText<"Link the given bitcode file before performing optimizations.">; + +def vectorize_loops : Flag<["-"], "vectorize-loops">, + HelpText<"Run the Loop vectorization passes">, + MarshallingInfoFlag>; } // let Visibility = [CC1Option, FC1Option] let Visibility = [CC1Option] in { @@ -7458,9 +7466,6 @@ defm link_builtin_bitcode_postopt: BoolMOption<"link-builtin-bitcode-postopt", PosFlag, NegFlag>; -def vectorize_loops : Flag<["-"], "vectorize-loops">, - HelpText<"Run the Loop vectorization passes">, - MarshallingInfoFlag>; def vectorize_slp : Flag<["-"], "vectorize-slp">, HelpText<"Run the SLP vectorization passes">, MarshallingInfoFlag>; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 96af466e067a8..7c50970068fa9 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -511,39 +511,6 @@ static void addCoveragePrefixMapArg(const Driver &D, const ArgList &Args, } } -/// Vectorize at all optimization levels greater than 1 except for -Oz. -/// For -Oz the loop vectorizer is disabled, while the slp vectorizer is -/// enabled. -static bool shouldEnableVectorizerAtOLevel(const ArgList &Args, bool isSlpVec) { - if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { - if (A->getOption().matches(options::OPT_O4) || - A->getOption().matches(options::OPT_Ofast)) - return true; - - if (A->getOption().matches(options::OPT_O0)) - return false; - - assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag"); - - // Vectorize -Os. - StringRef S(A->getValue()); - if (S == "s") - return true; - - // Don't vectorize -Oz, unless it's the slp vectorizer. - if (S == "z") - return isSlpVec; - - unsigned OptLevel = 0; - if (S.getAsInteger(10, OptLevel)) - return false; - - return OptLevel > 1; - } - - return false; -} - /// Add -x lang to \p CmdArgs for \p Input. static void addDashXForInput(const ArgList &Args, const InputInfo &Input, ArgStringList &CmdArgs) { diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 2d01943ca1ac4..1a2299a92c54e 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -3133,3 +3133,36 @@ void tools::renderCommonIntegerOverflowOptions(const ArgList &Args, if (use_fwrapv_pointer) CmdArgs.push_back("-fwrapv-pointer"); } + +/// Vectorize at all optimization levels greater than 1 except for -Oz. +/// For -Oz the loop vectorizer is disabled, while the slp vectorizer is +/// enabled. +bool tools::shouldEnableVectorizerAtOLevel(const ArgList &Args, bool isSlpVec) { + if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { + if (A->getOption().matches(options::OPT_O4) || + A->getOption().matches(options::OPT_Ofast)) + return true; + + if (A->getOption().matches(options::OPT_O0)) + return false; + + assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag"); + + // Vectorize -Os. + StringRef S(A->getValue()); + if (S == "s") + return true; + + // Don't vectorize -Oz, unless it's the slp vectorizer. + if (S == "z") + return isSlpVec; + + unsigned OptLevel = 0; + if (S.getAsInteger(10, OptLevel)) + return false; + + return OptLevel > 1; + } + + return false; +} diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h index b6ddd99b87279..783a1f834b33d 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.h +++ b/clang/lib/Driver/ToolChains/CommonArgs.h @@ -265,6 +265,8 @@ bool shouldRecordCommandLine(const ToolChain &TC, void renderCommonIntegerOverflowOptions(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs); +bool shouldEnableVectorizerAtOLevel(const llvm::opt::ArgList &Args, + bool isSlpVec); } // end namespace tools } // end namespace driver } // end namespace clang diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 9ad795edd724d..45c26cf8c3159 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -149,6 +149,16 @@ void Flang::addCodegenOptions(const ArgList &Args, !stackArrays->getOption().matches(options::OPT_fno_stack_arrays)) CmdArgs.push_back("-fstack-arrays"); + // Enable vectorization per default according to the optimization level + // selected. For optimization levels that want vectorization we use the alias + // option to simplify the hasFlag logic. + bool enableVec = shouldEnableVectorizerAtOLevel(Args, false); + OptSpecifier vectorizeAliasOption = + enableVec ? options::OPT_O_Group : options::OPT_fvectorize; + if (Args.hasFlag(options::OPT_fvectorize, vectorizeAliasOption, + options::OPT_fno_vectorize, enableVec)) + CmdArgs.push_back("-vectorize-loops"); + if (shouldLoopVersion(Args)) CmdArgs.push_back("-fversion-loops-for-stride"); diff --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def index deb8d1aede518..44cb5a2cdd497 100644 --- a/flang/include/flang/Frontend/CodeGenOptions.def +++ b/flang/include/flang/Frontend/CodeGenOptions.def @@ -31,6 +31,7 @@ CODEGENOPT(PrepareForFullLTO , 1, 0) ///< Set when -flto is enabled on the CODEGENOPT(PrepareForThinLTO , 1, 0) ///< Set when -flto=thin is enabled on the ///< compile step. CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass) +CODEGENOPT(VectorizeLoop, 1, 0) ///< Enable loop vectorization. CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning. CODEGENOPT(UnrollLoops, 1, 0) ///< Enable loop unrolling CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index f3d9432c62d3b..724316f1b30c7 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -23,6 +23,7 @@ #include "clang/Basic/AllDiagnostics.h" #include "clang/Basic/DiagnosticDriver.h" #include "clang/Basic/DiagnosticOptions.h" +#include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/OptionUtils.h" #include "clang/Driver/Options.h" @@ -242,6 +243,9 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts, clang::driver::options::OPT_fno_stack_arrays, false)) opts.StackArrays = 1; + if (args.getLastArg(clang::driver::options::OPT_vectorize_loops)) + opts.VectorizeLoop = 1; + if (args.hasFlag(clang::driver::options::OPT_floop_versioning, clang::driver::options::OPT_fno_loop_versioning, false)) opts.LoopVersioning = 1; diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index 763c810ace0eb..76d329d043731 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -1037,6 +1037,8 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) { si.getTimePasses().setOutStream(ci.getTimingStreamLLVM()); pto.LoopUnrolling = opts.UnrollLoops; pto.LoopInterleaving = opts.UnrollLoops; + pto.LoopVectorization = opts.VectorizeLoop; + llvm::PassBuilder pb(targetMachine, pto, pgoOpt, &pic); // Attempt to load pass plugins and register their callbacks with PB. diff --git a/flang/test/Driver/optimization-remark.f90 b/flang/test/Driver/optimization-remark.f90 index e90baa892f46a..90e310d36c807 100644 --- a/flang/test/Driver/optimization-remark.f90 +++ b/flang/test/Driver/optimization-remark.f90 @@ -5,33 +5,33 @@ ! DEFINE: %{output} = -emit-llvm -flang-deprecated-no-hlfir -o /dev/null 2>&1 ! Check fc1 can handle -Rpass -! RUN: %flang_fc1 %s -O1 -Rpass %{output} 2>&1 | FileCheck %s --check-prefix=REMARKS +! RUN: %flang_fc1 %s -O1 -vectorize-loops -Rpass %{output} 2>&1 | FileCheck %s --check-prefix=REMARKS ! Check that we can override -Rpass= with -Rno-pass. -! RUN: %flang_fc1 %s -O1 -Rpass -Rno-pass %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS +! RUN: %flang_fc1 %s -O1 -vectorize-loops -Rpass -Rno-pass %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS ! Check -Rno-pass, -Rno-pass-analysis, -Rno-pass-missed nothing emitted -! RUN: %flang %s -O1 -Rno-pass -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS -! RUN: %flang %s -O1 -Rno-pass-missed -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS -! RUN: %flang %s -O1 -Rno-pass-analysis -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS +! RUN: %flang %s -O2 -Rno-pass -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS +! RUN: %flang %s -O2 -Rno-pass-missed -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS +! RUN: %flang %s -O2 -Rno-pass-analysis -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS ! Check valid -Rpass regex -! RUN: %flang %s -O1 -Rpass=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS-REGEX-LOOP-ONLY +! RUN: %flang %s -O2 -Rpass=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS-REGEX-LOOP-ONLY ! Check valid -Rpass-missed regex -! RUN: %flang %s -O1 -Rpass-missed=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED-REGEX-LOOP-ONLY +! RUN: %flang %s -O2 -Rpass-missed=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED-REGEX-LOOP-ONLY ! Check valid -Rpass-analysis regex -! RUN: %flang %s -O1 -Rpass-analysis=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=ANALYSIS-REGEX-LOOP-ONLY +! RUN: %flang %s -O2 -Rpass-analysis=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=ANALYSIS-REGEX-LOOP-ONLY ! Check full -Rpass message is emitted -! RUN: %flang %s -O1 -Rpass -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS +! RUN: %flang %s -O2 -Rpass -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS ! Check full -Rpass-missed message is emitted -! RUN: %flang %s -O1 -Rpass-missed -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED +! RUN: %flang %s -O2 -Rpass-missed -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED ! Check full -Rpass-analysis message is emitted -! RUN: %flang %s -O1 -Rpass-analysis -S -o /dev/null 2>&1 | FileCheck %s --check-prefix=ANALYSIS +! RUN: %flang %s -O2 -Rpass-analysis -S -o /dev/null 2>&1 | FileCheck %s --check-prefix=ANALYSIS ! REMARKS: remark: ! NO-REMARKS-NOT: remark: diff --git a/flang/test/Integration/unroll-loops.f90 b/flang/test/Integration/unroll-loops.f90 index c3fcf1c3a7cf3..debe45e0ec359 100644 --- a/flang/test/Integration/unroll-loops.f90 +++ b/flang/test/Integration/unroll-loops.f90 @@ -1,8 +1,8 @@ ! FIXME: https://github.com/llvm/llvm-project/issues/123668 ! ! DEFINE: %{triple} = -! DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL -! DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL +! DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL +! DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL ! ! REDEFINE: %{triple} = aarch64-unknown-linux-gnu ! RUN: %if aarch64-registered-target %{ %{check-unroll} %} diff --git a/flang/test/Lower/HLFIR/unroll-loops.fir b/flang/test/Lower/HLFIR/unroll-loops.fir index 6a9dd28a37b6d..1321f39677405 100644 --- a/flang/test/Lower/HLFIR/unroll-loops.fir +++ b/flang/test/Lower/HLFIR/unroll-loops.fir @@ -1,6 +1,6 @@ // DEFINE: %{triple} = -// DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL -// DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL +// DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL +// DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL // REDEFINE: %{triple} = aarch64-unknown-linux-gnu // RUN: %if aarch64-registered-target %{ %{check-unroll} %}