Skip to content

Commit

Permalink
[flang] Add -f[no-]vectorize flags (#119718)
Browse files Browse the repository at this point in the history
This patch adds the -fvectorize and -fno-vectorize flags to flang. 

Note that this also changes the behaviour of `flang -fc1` to match that
of `clang -cc1`, which is that vectorization is only enabled in the
presence of the `-vectorize-loops` flag.

Additionally, this patch changes the behaviour of the default
optimisation levels to match clang, such that vectorization only happens
at the same levels as it does there.

This patch is in draft while I write an RFC to discuss the above two
changes.
  • Loading branch information
DavidTruby authored Feb 20, 2025
1 parent a03f064 commit 41cece8
Show file tree
Hide file tree
Showing 11 changed files with 75 additions and 51 deletions.
11 changes: 8 additions & 3 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -3997,11 +3997,15 @@ defm assumptions : BoolFOption<"assumptions",
"Disable codegen and compile-time checks for C++23's [[assume]] attribute">,
PosFlag<SetTrue>>;


let Visibility = [ClangOption, FlangOption] in {
def fvectorize : Flag<["-"], "fvectorize">, Group<f_Group>,
HelpText<"Enable the loop vectorization passes">;
def fno_vectorize : Flag<["-"], "fno-vectorize">, Group<f_Group>;
def : Flag<["-"], "ftree-vectorize">, Alias<fvectorize>;
def : Flag<["-"], "fno-tree-vectorize">, Alias<fno_vectorize>;
}

def fslp_vectorize : Flag<["-"], "fslp-vectorize">, Group<f_Group>,
HelpText<"Enable the superword-level parallelism vectorization passes">;
def fno_slp_vectorize : Flag<["-"], "fno-slp-vectorize">, Group<f_Group>;
Expand Down Expand Up @@ -7343,6 +7347,10 @@ def mlink_builtin_bitcode : Separate<["-"], "mlink-builtin-bitcode">,
def mlink_bitcode_file
: Separate<["-"], "mlink-bitcode-file">,
HelpText<"Link the given bitcode file before performing optimizations.">;

def vectorize_loops : Flag<["-"], "vectorize-loops">,
HelpText<"Run the Loop vectorization passes">,
MarshallingInfoFlag<CodeGenOpts<"VectorizeLoop">>;
} // let Visibility = [CC1Option, FC1Option]

let Visibility = [CC1Option] in {
Expand Down Expand Up @@ -7458,9 +7466,6 @@ defm link_builtin_bitcode_postopt: BoolMOption<"link-builtin-bitcode-postopt",
PosFlag<SetTrue, [], [ClangOption], "Link builtin bitcodes after the "
"optimization pipeline">,
NegFlag<SetFalse, [], [ClangOption]>>;
def vectorize_loops : Flag<["-"], "vectorize-loops">,
HelpText<"Run the Loop vectorization passes">,
MarshallingInfoFlag<CodeGenOpts<"VectorizeLoop">>;
def vectorize_slp : Flag<["-"], "vectorize-slp">,
HelpText<"Run the SLP vectorization passes">,
MarshallingInfoFlag<CodeGenOpts<"VectorizeSLP">>;
Expand Down
33 changes: 0 additions & 33 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -511,39 +511,6 @@ static void addCoveragePrefixMapArg(const Driver &D, const ArgList &Args,
}
}

/// Vectorize at all optimization levels greater than 1 except for -Oz.
/// For -Oz the loop vectorizer is disabled, while the slp vectorizer is
/// enabled.
static bool shouldEnableVectorizerAtOLevel(const ArgList &Args, bool isSlpVec) {
if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
if (A->getOption().matches(options::OPT_O4) ||
A->getOption().matches(options::OPT_Ofast))
return true;

if (A->getOption().matches(options::OPT_O0))
return false;

assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag");

// Vectorize -Os.
StringRef S(A->getValue());
if (S == "s")
return true;

// Don't vectorize -Oz, unless it's the slp vectorizer.
if (S == "z")
return isSlpVec;

unsigned OptLevel = 0;
if (S.getAsInteger(10, OptLevel))
return false;

return OptLevel > 1;
}

return false;
}

/// Add -x lang to \p CmdArgs for \p Input.
static void addDashXForInput(const ArgList &Args, const InputInfo &Input,
ArgStringList &CmdArgs) {
Expand Down
33 changes: 33 additions & 0 deletions clang/lib/Driver/ToolChains/CommonArgs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3133,3 +3133,36 @@ void tools::renderCommonIntegerOverflowOptions(const ArgList &Args,
if (use_fwrapv_pointer)
CmdArgs.push_back("-fwrapv-pointer");
}

/// Vectorize at all optimization levels greater than 1 except for -Oz.
/// For -Oz the loop vectorizer is disabled, while the slp vectorizer is
/// enabled.
bool tools::shouldEnableVectorizerAtOLevel(const ArgList &Args, bool isSlpVec) {
if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
if (A->getOption().matches(options::OPT_O4) ||
A->getOption().matches(options::OPT_Ofast))
return true;

if (A->getOption().matches(options::OPT_O0))
return false;

assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag");

// Vectorize -Os.
StringRef S(A->getValue());
if (S == "s")
return true;

// Don't vectorize -Oz, unless it's the slp vectorizer.
if (S == "z")
return isSlpVec;

unsigned OptLevel = 0;
if (S.getAsInteger(10, OptLevel))
return false;

return OptLevel > 1;
}

return false;
}
2 changes: 2 additions & 0 deletions clang/lib/Driver/ToolChains/CommonArgs.h
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,8 @@ bool shouldRecordCommandLine(const ToolChain &TC,
void renderCommonIntegerOverflowOptions(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs);

bool shouldEnableVectorizerAtOLevel(const llvm::opt::ArgList &Args,
bool isSlpVec);
} // end namespace tools
} // end namespace driver
} // end namespace clang
Expand Down
10 changes: 10 additions & 0 deletions clang/lib/Driver/ToolChains/Flang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,16 @@ void Flang::addCodegenOptions(const ArgList &Args,
!stackArrays->getOption().matches(options::OPT_fno_stack_arrays))
CmdArgs.push_back("-fstack-arrays");

// Enable vectorization per default according to the optimization level
// selected. For optimization levels that want vectorization we use the alias
// option to simplify the hasFlag logic.
bool enableVec = shouldEnableVectorizerAtOLevel(Args, false);
OptSpecifier vectorizeAliasOption =
enableVec ? options::OPT_O_Group : options::OPT_fvectorize;
if (Args.hasFlag(options::OPT_fvectorize, vectorizeAliasOption,
options::OPT_fno_vectorize, enableVec))
CmdArgs.push_back("-vectorize-loops");

if (shouldLoopVersion(Args))
CmdArgs.push_back("-fversion-loops-for-stride");

Expand Down
1 change: 1 addition & 0 deletions flang/include/flang/Frontend/CodeGenOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ CODEGENOPT(PrepareForFullLTO , 1, 0) ///< Set when -flto is enabled on the
CODEGENOPT(PrepareForThinLTO , 1, 0) ///< Set when -flto=thin is enabled on the
///< compile step.
CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass)
CODEGENOPT(VectorizeLoop, 1, 0) ///< Enable loop vectorization.
CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning.
CODEGENOPT(UnrollLoops, 1, 0) ///< Enable loop unrolling
CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass
Expand Down
4 changes: 4 additions & 0 deletions flang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "clang/Basic/AllDiagnostics.h"
#include "clang/Basic/DiagnosticDriver.h"
#include "clang/Basic/DiagnosticOptions.h"
#include "clang/Driver/Driver.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/OptionUtils.h"
#include "clang/Driver/Options.h"
Expand Down Expand Up @@ -242,6 +243,9 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
clang::driver::options::OPT_fno_stack_arrays, false))
opts.StackArrays = 1;

if (args.getLastArg(clang::driver::options::OPT_vectorize_loops))
opts.VectorizeLoop = 1;

if (args.hasFlag(clang::driver::options::OPT_floop_versioning,
clang::driver::options::OPT_fno_loop_versioning, false))
opts.LoopVersioning = 1;
Expand Down
2 changes: 2 additions & 0 deletions flang/lib/Frontend/FrontendActions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1037,6 +1037,8 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) {
si.getTimePasses().setOutStream(ci.getTimingStreamLLVM());
pto.LoopUnrolling = opts.UnrollLoops;
pto.LoopInterleaving = opts.UnrollLoops;
pto.LoopVectorization = opts.VectorizeLoop;

llvm::PassBuilder pb(targetMachine, pto, pgoOpt, &pic);

// Attempt to load pass plugins and register their callbacks with PB.
Expand Down
22 changes: 11 additions & 11 deletions flang/test/Driver/optimization-remark.f90
Original file line number Diff line number Diff line change
Expand Up @@ -5,33 +5,33 @@
! DEFINE: %{output} = -emit-llvm -flang-deprecated-no-hlfir -o /dev/null 2>&1

! Check fc1 can handle -Rpass
! RUN: %flang_fc1 %s -O1 -Rpass %{output} 2>&1 | FileCheck %s --check-prefix=REMARKS
! RUN: %flang_fc1 %s -O1 -vectorize-loops -Rpass %{output} 2>&1 | FileCheck %s --check-prefix=REMARKS

! Check that we can override -Rpass= with -Rno-pass.
! RUN: %flang_fc1 %s -O1 -Rpass -Rno-pass %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
! RUN: %flang_fc1 %s -O1 -vectorize-loops -Rpass -Rno-pass %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS

! Check -Rno-pass, -Rno-pass-analysis, -Rno-pass-missed nothing emitted
! RUN: %flang %s -O1 -Rno-pass -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
! RUN: %flang %s -O1 -Rno-pass-missed -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
! RUN: %flang %s -O1 -Rno-pass-analysis -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
! RUN: %flang %s -O2 -Rno-pass -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
! RUN: %flang %s -O2 -Rno-pass-missed -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS
! RUN: %flang %s -O2 -Rno-pass-analysis -S %{output} 2>&1 | FileCheck %s --allow-empty --check-prefix=NO-REMARKS

! Check valid -Rpass regex
! RUN: %flang %s -O1 -Rpass=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS-REGEX-LOOP-ONLY
! RUN: %flang %s -O2 -Rpass=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS-REGEX-LOOP-ONLY

! Check valid -Rpass-missed regex
! RUN: %flang %s -O1 -Rpass-missed=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED-REGEX-LOOP-ONLY
! RUN: %flang %s -O2 -Rpass-missed=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED-REGEX-LOOP-ONLY

! Check valid -Rpass-analysis regex
! RUN: %flang %s -O1 -Rpass-analysis=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=ANALYSIS-REGEX-LOOP-ONLY
! RUN: %flang %s -O2 -Rpass-analysis=loop -S %{output} 2>&1 | FileCheck %s --check-prefix=ANALYSIS-REGEX-LOOP-ONLY

! Check full -Rpass message is emitted
! RUN: %flang %s -O1 -Rpass -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS
! RUN: %flang %s -O2 -Rpass -S %{output} 2>&1 | FileCheck %s --check-prefix=PASS

! Check full -Rpass-missed message is emitted
! RUN: %flang %s -O1 -Rpass-missed -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED
! RUN: %flang %s -O2 -Rpass-missed -S %{output} 2>&1 | FileCheck %s --check-prefix=MISSED

! Check full -Rpass-analysis message is emitted
! RUN: %flang %s -O1 -Rpass-analysis -S -o /dev/null 2>&1 | FileCheck %s --check-prefix=ANALYSIS
! RUN: %flang %s -O2 -Rpass-analysis -S -o /dev/null 2>&1 | FileCheck %s --check-prefix=ANALYSIS

! REMARKS: remark:
! NO-REMARKS-NOT: remark:
Expand Down
4 changes: 2 additions & 2 deletions flang/test/Integration/unroll-loops.f90
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
! FIXME: https://github.com/llvm/llvm-project/issues/123668
!
! DEFINE: %{triple} =
! DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL
! DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL
! DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL
! DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL
!
! REDEFINE: %{triple} = aarch64-unknown-linux-gnu
! RUN: %if aarch64-registered-target %{ %{check-unroll} %}
Expand Down
4 changes: 2 additions & 2 deletions flang/test/Lower/HLFIR/unroll-loops.fir
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// DEFINE: %{triple} =
// DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL
// DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL
// DEFINE: %{check-unroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -funroll-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,UNROLL
// DEFINE: %{check-nounroll} = %flang_fc1 -emit-llvm -O1 -vectorize-loops -mllvm -force-vector-width=2 -triple %{triple} -o- %s | FileCheck %s --check-prefixes=CHECK,NO-UNROLL

// REDEFINE: %{triple} = aarch64-unknown-linux-gnu
// RUN: %if aarch64-registered-target %{ %{check-unroll} %}
Expand Down

0 comments on commit 41cece8

Please sign in to comment.