Manual type legalization.

LLVM's own type legalization promotes floating point operations without truncation of intermediate results. We rely on that truncation, so run our own pass to legalize manually before LLVM's legalization runs.
uxlfoundation · Dec 7, 2023 · b3ad276 · b3ad276
1 parent d0f09c8
commit b3ad276
Show file tree

Hide file tree

Showing 13 changed files with 263 additions and 25 deletions.
diff --git a/doc/modules/compiler/utils.rst b/doc/modules/compiler/utils.rst
@@ -1199,6 +1199,20 @@ about some of the above types, such as the type of images passed to any of the
 it may be required to skip other passes such as the
 ``compiler::ImageArgumentSubstitutionPass``.
 
+ManualTypeLegalizationPass
+--------------------------
+
+The ``ManualTypeLegalizationPass`` pass replaces ``half`` operations with
+``float`` operations, inserting conversions as needed. It does this to work
+around LLVM issue 73805, where LLVM's own legalization replaces whole chains of
+operations rather than each operation individually, thus leaving out rounding
+operations implied by the LLVM IR.
+
+This replacement is only done on targets that promote ``half`` to ``float``
+during type legalization. On targets where ``half`` is a native type, or where
+``half`` is known to be promoted using "soft-promotion" rules, LLVM is presumed
+to translate ``half`` correctly.
+
 Metadata Utilities
 ------------------
 

diff --git a/examples/refsi/refsi_g1_wi/compiler/refsi_g1_wi/source/refsi_pass_machinery.cpp b/examples/refsi/refsi_g1_wi/compiler/refsi_g1_wi/source/refsi_pass_machinery.cpp
@@ -23,6 +23,7 @@
 #include <compiler/utils/define_mux_dma_pass.h>
 #include <compiler/utils/encode_kernel_metadata_pass.h>
 #include <compiler/utils/link_builtins_pass.h>
+#include <compiler/utils/manual_type_legalization_pass.h>
 #include <compiler/utils/metadata_analysis.h>
 #include <compiler/utils/replace_address_space_qualifier_functions_pass.h>
 #include <compiler/utils/replace_mem_intrinsics_pass.h>
@@ -156,6 +157,9 @@ llvm::ModulePassManager RefSiG1PassMachinery::getLateTargetPasses() {
 
   addLLVMDefaultPerModulePipeline(PM, getPB(), options);
 
+  PM.addPass(llvm::createModuleToFunctionPassAdaptor(
+      compiler::utils::ManualTypeLegalizationPass()));
+
   if (env_debug_prefix) {
     // With all passes scheduled, add a callback pass to view the
     // assembly/object file, if requested.

diff --git a/examples/refsi/refsi_m1/compiler/refsi_m1/source/refsi_pass_machinery.cpp b/examples/refsi/refsi_m1/compiler/refsi_m1/source/refsi_pass_machinery.cpp
@@ -21,6 +21,7 @@
 #include <compiler/utils/cl_builtin_info.h>
 #include <compiler/utils/encode_kernel_metadata_pass.h>
 #include <compiler/utils/link_builtins_pass.h>
+#include <compiler/utils/manual_type_legalization_pass.h>
 #include <compiler/utils/metadata_analysis.h>
 #include <compiler/utils/replace_address_space_qualifier_functions_pass.h>
 #include <compiler/utils/replace_local_module_scope_variables_pass.h>
@@ -159,6 +160,9 @@ llvm::ModulePassManager RefSiM1PassMachinery::getLateTargetPasses() {
 
   addLLVMDefaultPerModulePipeline(PM, getPB(), options);
 
+  PM.addPass(llvm::createModuleToFunctionPassAdaptor(
+      compiler::utils::ManualTypeLegalizationPass()));
+
   if (env_debug_prefix) {
     // With all passes scheduled, add a callback pass to view the
     // assembly/object file, if requested.

diff --git a/...ookie/{{cookiecutter.target_name}}/source/{{cookiecutter.target_name}}_pass_machinery.cpp b/...ookie/{{cookiecutter.target_name}}/source/{{cookiecutter.target_name}}_pass_machinery.cpp
@@ -26,6 +26,7 @@
 #include <compiler/utils/attributes.h>
 #include <compiler/utils/encode_kernel_metadata_pass.h>
 #include <compiler/utils/link_builtins_pass.h>
+#include <compiler/utils/manual_type_legalization_pass.h>
 #include <compiler/utils/metadata.h>
 #include <compiler/utils/metadata_analysis.h>
 #include <compiler/utils/replace_local_module_scope_variables_pass.h>
@@ -222,6 +223,9 @@ llvm::ModulePassManager {{cookiecutter.target_name.capitalize()}}PassMachinery::
 
   addLLVMDefaultPerModulePipeline(PM, getPB(), options);
 
+  PM.addPass(llvm::createModuleToFunctionPassAdaptor(
+      compiler::utils::ManualTypeLegalizationPass()));
+
   if (env_debug_prefix) {
     // With all passes scheduled, add a callback pass to view the
     // assembly/object file, if requested.

diff --git a/modules/compiler/riscv/source/riscv_pass_machinery.cpp b/modules/compiler/riscv/source/riscv_pass_machinery.cpp
@@ -21,6 +21,7 @@
 #include <compiler/utils/attributes.h>
 #include <compiler/utils/encode_kernel_metadata_pass.h>
 #include <compiler/utils/link_builtins_pass.h>
+#include <compiler/utils/manual_type_legalization_pass.h>
 #include <compiler/utils/metadata.h>
 #include <compiler/utils/metadata_analysis.h>
 #include <compiler/utils/replace_address_space_qualifier_functions_pass.h>
@@ -255,6 +256,9 @@ llvm::ModulePassManager RiscvPassMachinery::getLateTargetPasses() {
 
   addLLVMDefaultPerModulePipeline(PM, getPB(), options);
 
+  PM.addPass(llvm::createModuleToFunctionPassAdaptor(
+      compiler::utils::ManualTypeLegalizationPass()));
+
   if (env_debug_prefix) {
     // With all passes scheduled, add a callback pass to view the
     // assembly/object file, if requested.

diff --git a/modules/compiler/source/base/source/base_module_pass_machinery.cpp b/modules/compiler/source/base/source/base_module_pass_machinery.cpp
@@ -42,6 +42,7 @@
 #include <compiler/utils/link_builtins_pass.h>
 #include <compiler/utils/lower_to_mux_builtins_pass.h>
 #include <compiler/utils/make_function_name_unique_pass.h>
+#include <compiler/utils/manual_type_legalization_pass.h>
 #include <compiler/utils/metadata_analysis.h>
 #include <compiler/utils/optimal_builtin_replacement_pass.h>
 #include <compiler/utils/pipeline_parse_helpers.h>

diff --git a/modules/compiler/source/base/source/base_module_pass_registry.def b/modules/compiler/source/base/source/base_module_pass_registry.def
@@ -172,6 +172,7 @@ FUNCTION_PASS("bit-shift-fixup", compiler::BitShiftFixupPass())
 FUNCTION_PASS("ca-mem2reg", compiler::MemToRegPass())
 FUNCTION_PASS("check-unsupported-types", compiler::CheckForUnsupportedTypesPass())
 FUNCTION_PASS("combine-fpext-fptrunc", compiler::CombineFPExtFPTruncPass())
+FUNCTION_PASS("manual-type-legalization", compiler::utils::ManualTypeLegalizationPass())
 FUNCTION_PASS("software-div", compiler::SoftwareDivisionPass())
 FUNCTION_PASS("replace-addrspace-fns", compiler::utils::ReplaceAddressSpaceQualifierFunctionsPass())
 FUNCTION_PASS("remove-lifetime", compiler::utils::RemoveLifetimeIntrinsicsPass())

diff --git a/modules/compiler/targets/host/source/HostPassMachinery.cpp b/modules/compiler/targets/host/source/HostPassMachinery.cpp
@@ -27,6 +27,7 @@
 #include <compiler/utils/compute_local_memory_usage_pass.h>
 #include <compiler/utils/define_mux_builtins_pass.h>
 #include <compiler/utils/make_function_name_unique_pass.h>
+#include <compiler/utils/manual_type_legalization_pass.h>
 #include <compiler/utils/metadata.h>
 #include <compiler/utils/metadata_analysis.h>
 #include <compiler/utils/pipeline_parse_helpers.h>
@@ -330,6 +331,9 @@ llvm::ModulePassManager HostPassMachinery::getKernelFinalizationPasses(
              compiler::utils::VectorizeMetadataAnalysis,
              handler::VectorizeInfoMetadataHandler>());
 
+  PM.addPass(llvm::createModuleToFunctionPassAdaptor(
+      compiler::utils::ManualTypeLegalizationPass()));
+
   return PM;
 }
 

diff --git a/modules/compiler/test/lit/passes/manual_type_legalization.ll b/modules/compiler/test/lit/passes/manual_type_legalization.ll
@@ -0,0 +1,38 @@
+; Copyright (C) Codeplay Software Limited
+;
+; Licensed under the Apache License, Version 2.0 (the "License") with LLVM
+; Exceptions; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+;     https://github.com/codeplaysoftware/oneapi-construction-kit/blob/main/LICENSE.txt
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+; WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+; License for the specific language governing permissions and limitations
+; under the License.
+;
+; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+; RUN: muxc --passes manual-type-legalization,verify -S %s | FileCheck %s
+
+; Make sure we use a triple that does not have half as a legal type.
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "spir64-unknown-unknown"
+
+; CHECK-LABEL: define half @f
+; CHECK-DAG: [[AEXT:%.*]] = fpext half %a to float
+; CHECK-DAG: [[BEXT:%.*]] = fpext half %b to float
+; CHECK-DAG: [[CEXT:%.*]] = fpext half %c to float
+; CHECK-DAG: [[DADD:%.*]] = fadd float [[AEXT]], [[BEXT]]
+; CHECK-DAG: [[DTRUNC:%.*]] = fptrunc float [[DADD]] to half
+; CHECK-DAG: [[DEXT:%.*]] = fpext half [[DTRUNC]] to float
+; CHECK-DAG: [[EADD:%.*]] = fadd float [[DEXT]], [[CEXT]]
+; CHECK-DAG: [[ETRUNC:%.*]] = fptrunc float [[EADD]] to half
+; CHECK: ret half [[ETRUNC]]
+define half @f(half %a, half %b, half %c) {
+entry:
+  %d = fadd half %a, %b
+  %e = fadd half %d, %c
+  ret half %e
+}
diff --git a/modules/compiler/utils/CMakeLists.txt b/modules/compiler/utils/CMakeLists.txt
@@ -39,6 +39,7 @@ add_ca_library(compiler-utils STATIC
   ${CMAKE_CURRENT_SOURCE_DIR}/include/compiler/utils/llvm_global_mutex.h
   ${CMAKE_CURRENT_SOURCE_DIR}/include/compiler/utils/lower_to_mux_builtins_pass.h
   ${CMAKE_CURRENT_SOURCE_DIR}/include/compiler/utils/make_function_name_unique_pass.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/include/compiler/utils/manual_type_legalization_pass.h
   ${CMAKE_CURRENT_SOURCE_DIR}/include/compiler/utils/mangling.h
   ${CMAKE_CURRENT_SOURCE_DIR}/include/compiler/utils/memory_buffer.h
   ${CMAKE_CURRENT_SOURCE_DIR}/include/compiler/utils/metadata.h
@@ -92,6 +93,7 @@ add_ca_library(compiler-utils STATIC
   ${CMAKE_CURRENT_SOURCE_DIR}/source/lower_to_mux_builtins_pass.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/source/make_function_name_unique_pass.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/source/mangling.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/source/manual_type_legalization_pass.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/source/metadata.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/source/metadata_analysis.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/source/metadata_hooks.cpp

diff --git a/modules/compiler/utils/include/compiler/utils/manual_type_legalization_pass.h b/modules/compiler/utils/include/compiler/utils/manual_type_legalization_pass.h
@@ -0,0 +1,56 @@
+// Copyright (C) Codeplay Software Limited
+//
+// Licensed under the Apache License, Version 2.0 (the "License") with LLVM
+// Exceptions; you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://github.com/codeplaysoftware/oneapi-construction-kit/blob/main/LICENSE.txt
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations
+// under the License.
+//
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef COMPILER_UTILS_MANUAL_TYPE_LEGALIZATION_PASS_H_INCLUDED
+#define COMPILER_UTILS_MANUAL_TYPE_LEGALIZATION_PASS_H_INCLUDED
+
+#include <llvm/IR/PassManager.h>
+
+namespace compiler {
+namespace utils {
+
+/// Manual type legalization pass.
+///
+/// On targets that do not natively support \c half, promote operations on \c
+/// half to \c float instead.
+///
+/// When LLVM encounters floating point operations in a type it does not support
+/// natively, it extends its operands to an extended precision floating point
+/// type, performs the operation in that extended type, and rounds the result
+/// back to the original type. However, when it extends its operands to an
+/// extended precision floating point type, if an operand itself was a floating
+/// point operation that was also so extended, its rounding and re-extension are
+/// skipped. This causes issues for code that relies on exact rounding of
+/// intermediate results, which we avoid by manually doing this promition
+/// ourselves.
+///
+/// Simply performing operations in a wider floating point type and rounding
+/// back to the narrow floating point type is not, in general, correct, due to
+/// double rounding. For addition, subtraction, and multiplications, \c float
+/// provides enough additional precision that double rounding is known not to be
+/// an issue. For other operations, this pass may generate incorrect results,
+/// but this should only happen in cases where letting the operation pass
+/// through to LLVM would result in the same incorrect results.
+struct ManualTypeLegalizationPass final
+    : llvm::PassInfoMixin<ManualTypeLegalizationPass> {
+  llvm::PreservedAnalyses run(llvm::Function &F,
+                              llvm::FunctionAnalysisManager &FAM);
+};
+
+}  // namespace utils
+}  // namespace compiler
+
+#endif  // COMPILER_UTILS_MANUAL_TYPE_LEGALIZATION_PASS_H_INCLUDED
diff --git a/modules/compiler/utils/source/manual_type_legalization_pass.cpp b/modules/compiler/utils/source/manual_type_legalization_pass.cpp
@@ -0,0 +1,129 @@
+// Copyright (C) Codeplay Software Limited
+//
+// Licensed under the Apache License, Version 2.0 (the "License") with LLVM
+// Exceptions; you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://github.com/codeplaysoftware/oneapi-construction-kit/blob/main/LICENSE.txt
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations
+// under the License.
+//
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <compiler/utils/manual_type_legalization_pass.h>
+#include <llvm/ADT/DenseMap.h>
+#include <llvm/Analysis/TargetTransformInfo.h>
+#include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/InstrTypes.h>
+#include <llvm/IR/Instructions.h>
+#include <llvm/IR/Module.h>
+#include <llvm/IR/PassManager.h>
+#include <llvm/IR/Type.h>
+#include <llvm/Support/Casting.h>
+#include <llvm/TargetParser/Triple.h>
+#include <multi_llvm/llvm_version.h>
+
+using namespace llvm;
+
+PreservedAnalyses compiler::utils::ManualTypeLegalizationPass::run(
+    Function &F, FunctionAnalysisManager &FAM) {
+  auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
+
+  auto *HalfT = Type::getHalfTy(F.getContext());
+  auto *FloatT = Type::getFloatTy(F.getContext());
+
+  // Targets where half is a legal type do not need this pass. Targets where
+  // half is promoted using "soft promotion" rules also do not need this pass.
+  // We cannot reliably determine which targets these are, but that is okay, on
+  // targets where this pass is not needed it does no harm, it merely wastes
+  // time.
+  llvm::Triple TT(F.getParent()->getTargetTriple());
+  if (TTI.isTypeLegal(HalfT) || TT.isX86() || TT.isRISCV()) {
+    return PreservedAnalyses::all();
+  }
+
+  DenseMap<Value *, Value *> FPExtVals;
+  IRBuilder<> B(F.getContext());
+
+  auto CreateFPExt = [&](Value *V, Type *ExtTy) {
+    auto *&FPExt = FPExtVals[V];
+    if (!FPExt) {
+      if (auto *I = dyn_cast<Instruction>(V)) {
+#if LLVM_VERSION_GREATER_EQUAL(18, 0)
+        std::optional<BasicBlock::iterator> IPAD;
+        IPAD = I->getInsertionPointAfterDef();
+#else
+        std::optional<Instruction *> IPAD;
+        if (auto *IPADRaw = I->getInsertionPointAfterDef()) {
+          IPAD = IPADRaw;
+        }
+#endif
+        assert(IPAD &&
+               "getInsertionPointAfterDef() should return an insertion point "
+               "for all FP16 instructions");
+        B.SetInsertPoint(*IPAD);
+      } else {
+        B.SetInsertPointPastAllocas(&F);
+      }
+      FPExt = B.CreateFPExt(V, ExtTy, V->getName() + ".fpext");
+    }
+    return FPExt;
+  };
+
+  bool Changed = false;
+
+  for (auto &BB : F) {
+    for (auto &I : make_early_inc_range(BB)) {
+      auto *BO = dyn_cast<BinaryOperator>(&I);
+      if (!BO) continue;
+
+      auto *T = BO->getType();
+      auto *VecT = dyn_cast<VectorType>(T);
+      auto *ElT = VecT ? VecT->getElementType() : T;
+
+      if (ElT != HalfT) continue;
+
+      auto *LHS = BO->getOperand(0);
+      auto *RHS = BO->getOperand(1);
+      assert(LHS->getType() == T &&
+             "Expected matching types for floating point operation");
+      assert(RHS->getType() == T &&
+             "Expected matching types for floating point operation");
+
+      auto *ExtElT = FloatT;
+      auto *ExtT =
+          VecT ? VectorType::get(ExtElT, VecT->getElementCount()) : ExtElT;
+
+      auto *LHSExt = CreateFPExt(LHS, ExtT);
+      auto *RHSExt = CreateFPExt(RHS, ExtT);
+
+      B.SetInsertPoint(BO);
+
+      B.setFastMathFlags(BO->getFastMathFlags());
+      auto *OpExt = B.CreateBinOp(BO->getOpcode(), LHSExt, RHSExt,
+                                  BO->getName() + ".fpext");
+      B.clearFastMathFlags();
+
+      auto *Trunc = B.CreateFPTrunc(OpExt, T);
+      Trunc->takeName(BO);
+
+      BO->replaceAllUsesWith(Trunc);
+      BO->eraseFromParent();
+
+      Changed = true;
+    }
+  }
+
+  PreservedAnalyses PA;
+  if (Changed) {
+    PA = PreservedAnalyses::none();
+    PA.preserveSet<CFGAnalyses>();
+  } else {
+    PA = PreservedAnalyses::all();
+  }
+  return PA;
+}